Project

General

Profile

Statistics
| Branch: | Revision:

cool / src / owl / OWLFunctionalParser.ml @ 4611a507

History | View | Annotate | Download (10.6 KB)

1

    
2
module L = List
3

    
4
open CoolUtils
5
open Str
6
open Stream
7

    
8

    
9
(* types *)
10
type annotation = string * int * int (* file and line number and column*)
11

    
12
exception ParseError of annotation * string
13
exception OWLParseError of string
14

    
15
type 'a tree =
16
    | Leaf of 'a
17
    | Node of (('a tree) list)
18

    
19
type 'a intree = (* tree carrying values in leafes and inner nodes *)
20
    | INode of 'a * ('a intree list)
21

    
22
type 'a annotated = 'a * annotation
23

    
24
(* implementation *)
25

    
26
let stream_map f stream =
27
    let next i =
28
      try Some (f (Stream.next stream))
29
      with Stream.Failure -> None in
30
    Stream.from next
31

    
32
let stream_fold f stream init =
33
    let result = ref init in
34
    Stream.iter
35
      (fun x -> result := f x !result)
36
      stream;
37
    !result;;
38

    
39
let stream_combine stream1 stream2 =
40
    let rec next i =
41
      try Some (Stream.next stream1, Stream.next stream2)
42
      with Stream.Failure -> None in
43
    Stream.from next;;
44

    
45
let line_stream_of_string string =
46
    Stream.of_list (Str.split (Str.regexp "\n") string);;
47

    
48
let list_of_stream stream =
49
    let cons a b = a::b in
50
    L.rev (stream_fold cons stream [])
51

    
52
let rec intree_flatten (INode (v,sub)) : 'a list =
53
    v :: L.concat (L.map intree_flatten sub)
54

    
55
let tokens_of_string_stream (lines:string Stream.t) : string annotated list =
56
    let lastline = ref 0 in
57
    let nextline () =
58
        lastline := 1 + !lastline;
59
        !lastline
60
    in
61
    let tokenize l =
62
        let col = ref 1 in
63
        let linenr = nextline () in
64
        let annotation s =
65
            let c = !col in
66
            col := !col + (String.length s);
67
            ("",linenr, c)
68
        in
69
        let extract x = match x with
70
                        | (Delim a) -> a
71
                        | (Text  a) -> a
72
        in
73
        let words = L.map extract (Str.full_split (regexp "[ \t+]") l) in
74
        let tokens = L.concat (L.map (full_split (regexp "[()]")) words) in
75
        L.map (fun t -> (t, annotation t)) (L.map extract tokens)
76
    in
77
    L.concat (list_of_stream (stream_map tokenize lines))
78

    
79
let string_of_annotation (file,line,col) =
80
    file ^ ":" ^ (string_of_int line) ^ ":" ^ (string_of_int col)
81

    
82
let string_of_annotated str_of_a (obj,anno) : string =
83
    string_of_annotation anno ^ " " ^ str_of_a obj
84

    
85
let filter_comment line =
86
    let comment_reg = "//.*$" in
87
    let lst = full_split (regexp "<[^>]*>") line in
88
    let lastidx = (List.length lst) - 1 in
89
    let f i a =
90
        match a with
91
        | Delim a -> a
92
        | Text a -> if not (i = lastidx) then a
93
                    else replace_first (regexp comment_reg) "" a
94
    in
95
    String.concat "" (L.mapi f lst)
96

    
97

    
98
let tree_of_tokens lst : string annotated tree =
99
    let stream:string annotated Stream.t = Stream.of_list lst in
100
    (* returns the next tree in our forest *)
101
    let rec tree_of_token_stream _ : string annotated tree option =
102
        match peek stream with
103
        | Some ("(", a1) ->
104
            junk stream;
105
            (* assemble another Node by creating children to that node *)
106
            (* until a ")" appears *)
107
            let composenode _ : string annotated tree option = 
108
                match peek stream with
109
                | Some (")", _) -> (junk stream ; None)
110
                | Some (_,_) -> tree_of_token_stream ()
111
                | None -> raise (ParseError (a1, "Unmatched ("))
112
            in
113
            Some (Node (list_of_stream (Stream.from composenode)))
114
        | Some annostr -> junk stream ; Some (Leaf annostr)
115
        | None -> None
116
    in
117
    let forest = list_of_stream (Stream.from (fun _ -> tree_of_token_stream ())) in
118
    Node forest
119

    
120
let string_of_tree str_of_a atree : string =
121
    let rec lines_of_tree t = 
122
        let firstchild = ref true in
123
        let indent str =
124
            if !firstchild
125
            then (firstchild := false; "---+"^str)
126
            else  "   |"^str
127
        in
128
        match t with
129
        | (Leaf a) -> ["-" ^ str_of_a a]
130
        | (Node lst) -> (L.map indent (L.concat (L.map lines_of_tree lst)))
131
    in
132
    String.concat "\n" (lines_of_tree atree)
133

    
134
let string_of_intree str_of_a atree : string =
135
    let rec lines_of_tree (INode (a,lst)) = 
136
        let indent str = "  |"^str in
137
        ("--+ "^str_of_a a)::
138
        (L.map indent (L.concat (L.map lines_of_tree lst)))
139
    in
140
    String.concat "\n" (lines_of_tree atree)
141

    
142

    
143
let filter_whitespace =
144
    (* only keep those strings that aren't whitespace only *)
145
    L.filter (fun (s,_) -> not (string_match (regexp "[ ]*$") s 0))
146

    
147
let tree_of_string str =
148
    let lns = stream_map filter_comment (line_stream_of_string str) in
149
    tree_of_tokens (filter_whitespace (tokens_of_string_stream lns))
150

    
151
let ensure_node (t: string annotated tree) : string annotated tree list =
152
    match t with
153
    | (Leaf (s,a)) -> raise (ParseError (a, "Expected \"(\" but got "^s))
154
    | (Node lst) -> lst
155

    
156
let rec intree_list_of_tree tree =
157
    let nextINode (stream: 'a tree Stream.t): ('a intree option) =
158
        match peek stream with
159
        | Some (Node lst) -> None
160
        | Some (Leaf a) ->
161
            junk stream;
162
            let lst = match peek stream with
163
                | None
164
                | Some (Leaf _) -> []
165
                | Some (Node lst) -> (
166
                    junk stream;
167
                    intree_list_of_tree (Node lst)
168
                )
169
            in Some (INode (a,lst))
170
        | None -> None
171
    in
172
    match tree with
173
    | (Leaf a) -> [INode (a,[])]
174
    | (Node lst) ->
175
        let rec intree_list_of_tree_stream stream =
176
            match nextINode stream with
177
            | Some n -> (n::(intree_list_of_tree_stream stream))
178
            | None -> []
179
        in
180
        intree_list_of_tree_stream (Stream.of_list lst)
181

    
182
let rec tree_of_intree it =
183
    let tree_list_of_intree (INode (x,lst)) =
184
        (Leaf x)::(L.map tree_of_intree lst)
185
    in
186
    Node (tree_list_of_intree it)
187

    
188
let parse_uri (str:string) : OWL.uri =
189
    Str.replace_first (Str.regexp "<\\(.*\\)>") "\\1" str
190

    
191
let parse_prefix a lst : OWL.prefix =
192
    match lst with
193
    | [INode ((str,a),[])] -> (
194
        match split (regexp "=") str with
195
        | [l;r] -> (l,parse_uri r)
196
        | _ -> raise (ParseError (a,("Invalid Prefix-Definition")))
197
        )
198
    | _ -> raise (ParseError (a,("Missing Prefix-definition")))
199

    
200
let rec parse_role (INode ((str,a),sub)) : OWL.role =
201
    if ((String.contains str ':') && (sub = [])) then OWL.ROLE str
202
    else match str, sub with
203
    | "ObjectInverseOf",[r] -> OWL.INVERSE (parse_role r)
204
    | _,_ -> raise (ParseError (a, "Unknown role construct \""^str^"\""))
205

    
206
let rec parse_class_exp (INode ((name,anno), sub): string annotated intree) : OWL.class_exp =
207
    let pr = parse_role in
208
    let pc = parse_class_exp in
209
    if (String.contains name ':') then
210
        match name with
211
        | "owl:Thing" -> OWL.THING
212
        | "owl:Nothing" -> OWL.NOTHING
213
        | _ -> (OWL.NAMED name)
214
    else match name, sub with
215
         | "ObjectSomeValuesFrom", [role; obj] -> OWL.SOME (pr role, pc obj)
216
         | "ObjectAllValuesFrom", [role; obj] -> OWL.ALL (pr role, pc obj)
217
         | "ObjectIntersectionOf", _ -> OWL.AND (L.map pc sub)
218
         | "ObjectUnionOf", _ -> OWL.OR (L.map pc sub)
219
         | "ObjectComplementOf", [c] -> OWL.NOT (pc c)
220
         | "ObjectMinCardinality", _
221
         | "ObjectMaxCardinality", _
222
         | "ObjectExactCardinality", _
223
         | "ObjectOneOf", _
224
         | "ObjectHasValue", _
225
         | "ObjectHasSelf", _
226
         | "DataSomeValuesFrom", _
227
         | "DataAllValuesFrom", _
228
         | "DataHasValue", _
229
         | "DataMinCardinality", _
230
         | "DataMaxCardinality", _
231
         | "DataExactCardinality", _ -> raise (ParseError (anno, name^" not implemented yet."))
232
         | _,_ ->
233
           raise (ParseError(anno, "Unknown class expression name \""^name^"\" or wrong parameter count"))
234

    
235
let parse_axiom (ax: string annotated intree) : OWL.axiom =
236
    let INode ((name,anno), sub) = ax in
237
    let pce = parse_class_exp in
238
    match name with
239
    | "Annotation" -> let sub = L.map intree_flatten sub in (* flatten structure *)
240
                      let sub = L.concat sub in             (* flatten more...*)
241
                      let sub = L.map (fun (s,_) -> s) sub in (* drop annotations *)
242
                      OWL.ANNOTATION (String.concat " " sub)
243
    | "Declaration" -> (match sub with
244
                        | [(INode ((name,anno),[INode ((child,_),[])]))] ->
245
                            (match name with
246
                             | "Class" -> OWL.DECLARATION (OWL.CLASS child)
247
                             | _ -> raise (ParseError (anno, "Unknown declaration name \""^name^"\"")))
248
                        | _ ->  raise (ParseError (anno, name^" expects exactly one parameters")))
249
    | "SubClassOf"  -> (match sub with
250
                        | [csub;csuper] -> OWL.SUBCLASS (pce csub,parse_class_exp csuper)
251
                        | _ -> raise (ParseError (anno, name^" expects exactly two parameters")))
252
    | "EquivalentClasses" -> OWL.EQUIVALENTCLASSES (L.map pce sub)
253
    | "DisjointClasses" -> OWL.DISJOINTCLASSES (L.map pce sub)
254
    | "DisjointUnion" -> (match sub with
255
                          | (union::parts) -> OWL.DISJOINTUNION ((pce union),(L.map pce parts))
256
                          | _ -> raise (ParseError (anno, name^" expects at least one parameter")))
257
    | _ -> raise (ParseError (anno, "Unknown axiom name \""^name^"\""))
258

    
259
let parse_ontology a (axioms:string annotated intree list) : OWL.ontology =
260
    (* the first element of axioms, i.e. of the children of the ontology node
261
    is just the ontology name/uri *)
262
    match axioms with
263
    | (INode ((ontoname,_), [])::axioms) -> (parse_uri ontoname, L.map parse_axiom axioms)
264
    | _ -> raise (ParseError (a,("Missing Ontology name")))
265

    
266
let ap (lref:'a list ref) (el:'a): unit = (* append to 'a list ref *)
267
        (lref := ((!lref) @ [el]))
268

    
269
let parse_intree_list (strAnIts: string annotated intree list) =
270
    let prefs : OWL.prefixes ref = ref [] in
271
    let ontos : OWL.ontology list ref = ref [] in
272
    foreach_l strAnIts (fun f -> match f with
273
    | INode (("Prefix",   a), lst) -> (ap prefs (parse_prefix a lst))
274
    | INode (("Ontology", a), axioms) -> (ap ontos (parse_ontology a axioms))
275
    | INode ((n, a), _) -> raise (ParseError (a,(
276
        "Unknown toplevel element \""^n^"\""))));
277
    (!prefs, !ontos)
278

    
279
let parse str =
280
    try (let tree = tree_of_string str in
281
         let itlist = intree_list_of_tree tree in
282
         parse_intree_list itlist
283
    ) with ParseError ((file,line,col), msg) -> ( (* beautify error message *)
284
        let stream = line_stream_of_string str in
285
        repeat (line - 1) (fun () -> Stream.junk stream);
286
        let content = Stream.next stream in
287
        let indent = ref "" in
288
        repeat (col - 1) (fun () -> indent := " " ^ !indent);
289
        let msg = msg ^"\n"^ content ^"\n"^ !indent ^"^-- here" in
290
        raise (ParseError((file,line,col), msg))
291
    )
292