Project

General

Profile

Statistics
| Branch: | Revision:

cool / src / owl / OWLFunctionalParser.ml @ 7c4d2eb4

History | View | Annotate | Download (4.01 KB)

1

    
2
module L = List
3

    
4
open CoolUtils
5
open Str
6
open Stream
7

    
8

    
9
(* types *)
10
type annotation = string * int * int (* file and line number and column*)
11

    
12
exception ParseError of annotation * string
13
exception OWLParseError of string
14

    
15
type 'a tree =
16
    | Leaf of 'a
17
    | Node of (('a tree) list)
18

    
19
type 'a annotated = 'a * annotation
20

    
21
(* implementation *)
22

    
23
let stream_map f stream =
24
    let next i =
25
      try Some (f (Stream.next stream))
26
      with Stream.Failure -> None in
27
    Stream.from next
28

    
29
let stream_fold f stream init =
30
    let result = ref init in
31
    Stream.iter
32
      (fun x -> result := f x !result)
33
      stream;
34
    !result;;
35

    
36
let stream_combine stream1 stream2 =
37
    let rec next i =
38
      try Some (Stream.next stream1, Stream.next stream2)
39
      with Stream.Failure -> None in
40
    Stream.from next;;
41

    
42
let line_stream_of_string string =
43
    Stream.of_list (Str.split (Str.regexp "\n") string);;
44

    
45
let list_of_stream stream =
46
    let cons a b = a::b in
47
    L.rev (stream_fold cons stream [])
48

    
49
let tokens_of_string_stream (lines:string Stream.t) : string annotated list =
50
    let lastline = ref 0 in
51
    let nextline () =
52
        lastline := 1 + !lastline;
53
        !lastline
54
    in
55
    let tokenize l =
56
        let col = ref 0 in
57
        let linenr = nextline () in
58
        let annotation s =
59
            let c = !col in
60
            col := !col + (String.length s);
61
            ("",linenr, c)
62
        in
63
        let extract x = match x with
64
                        | (Delim a) -> a
65
                        | (Text  a) -> a
66
        in
67
        let words = L.map extract (Str.full_split (regexp "[ \t+]") l) in
68
        let tokens = L.concat (L.map (full_split (regexp "[()]")) words) in
69
        L.map (fun t -> (t, annotation t)) (L.map extract tokens)
70
    in
71
    L.concat (list_of_stream (stream_map tokenize lines))
72

    
73
let string_of_annotation (file,line,col) =
74
    file ^ ":" ^ (string_of_int line) ^ ":" ^ (string_of_int col)
75

    
76
let string_of_annotated str_of_a (obj,anno) : string =
77
    string_of_annotation anno ^ " " ^ str_of_a obj
78

    
79
let filter_comment line =
80
    let comment_reg = "//.*$" in
81
    let lst = full_split (regexp "<[^>]*>") line in
82
    let lastidx = (List.length lst) - 1 in
83
    let f i a =
84
        match a with
85
        | Delim a -> a
86
        | Text a -> if not (i = lastidx) then a
87
                    else replace_first (regexp comment_reg) "" a
88
    in
89
    String.concat "" (L.mapi f lst)
90

    
91

    
92
let tree_of_tokens lst : string annotated tree =
93
    let stream:string annotated Stream.t = Stream.of_list lst in
94
    (* returns the next tree in our forest *)
95
    let rec tree_of_token_stream _ : string annotated tree option =
96
        match peek stream with
97
        | Some ("(", a1) ->
98
            junk stream;
99
            (* assemble another Node by creating children to that node *)
100
            (* until a ")" appears *)
101
            let composenode _ : string annotated tree option = 
102
                match peek stream with
103
                | Some (")", _) -> (junk stream ; None)
104
                | Some (_,_) -> tree_of_token_stream ()
105
                | None -> raise (ParseError (a1, "Unmatched ("))
106
            in
107
            Some (Node (list_of_stream (Stream.from composenode)))
108
        | Some annostr -> junk stream ; Some (Leaf annostr)
109
        | None -> None
110
    in
111
    let forest = list_of_stream (Stream.from (fun _ -> tree_of_token_stream ())) in
112
    Node forest
113

    
114
let string_of_tree str_of_a atree : string =
115
    let indent str = "  "^str in
116
    let rec lines_of_tree t = match t with
117
        | (Leaf a) -> [str_of_a a]
118
        | (Node lst) -> L.map indent (L.concat (L.map lines_of_tree lst))
119
    in
120
    String.concat "\n" (lines_of_tree atree)
121

    
122
let filter_whitespace =
123
    (* only keep those strings that aren't whitespace only *)
124
    L.filter (fun (s,_) -> not (string_match (regexp "[ ]*$") s 0))
125

    
126
let tree_of_string str =
127
    let lns = stream_map filter_comment (line_stream_of_string str) in
128
    tree_of_tokens (filter_whitespace (tokens_of_string_stream lns))
129

    
130
let ontology_of_tree tree = []
131

    
132
let parse str =
133
    let tree = tree_of_string str in
134
    ontology_of_tree tree
135