%:- auto_table.

:- export
	transform/2,
	transform_file/2,
	tag/3,
	tagbag/3,
	strip_nodes/2.

:- import
	load_xml_structure/3 from sgml.

transform([], []).

transform([H|T], [Head|Tail]) :-
	transform(H, Head), !,
	transform(T, Tail).

transform_file([], []).

transform_file([H|T], [Head|Tail]) :-
	transform_file(H, Head), !,
	transform_file(T, Tail).

transform_file(files(FileList), corpus(DocList)) :-
	transform_file(FileList, DocList).

transform_file(File, Doc) :-
	atom(File),
	load_xml_structure(file(File), [Term], Warn), !, %never, ever load structure twice.
%	write('Here'), nl,
	transform(Term, Doc).

transform(element('GateDocument', A, B), document(FeatureMap, Content, Annotations)) :-
%	write('document'), nl,
	tag('GateDocumentFeatures', element('GateDocument', A, B), GateDocumentFeatures),
	transform(GateDocumentFeatures, FeatureMap),
	tag('TextWithNodes', element('GateDocument', A, B), TextWithNodes),
	transform(TextWithNodes, Content),
	tagbag('AnnotationSet', element('GateDocument', A, B), AnnotationSets),
	transform(AnnotationSets, Annotations).

transform(element('GateDocumentFeatures', A, B), featuremap([Features])) :-
%	write('docfeatures'), nl,
	tagbag('Feature', element('GateDocumentFeatures', A, B), FeatureElements),
%	write(FeatureElements), nl,
	transform(FeatureElements, Features).

transform(element('Feature', A, B), feature(Name, Value)) :-
%	write('feature'), nl,
	tag('Name', element('Feature', A, B), element('Name', _, Name)),
	tag('Value', element('Feature', A, B), element('Value', _, Value)).

transform(element('TextWithNodes', A, B), content(Content)) :-
%	write('content'), nl,
	strip_nodes(element('TextWithNodes', A, B), Content).

transform(element('AnnotationSet', A, B), annotationset(Annotations)) :-
%	write('annotationset'), nl,
	tagbag('Annotation', element('GateDocumentFeatures', A, B), AnnotationElements),
	transform(AnnotationElements, Annotations).

transform(element('Annotation', ['Type' = Type, 'StartNode' = Start, 'EndNode' = End], B), annotation(10000, Type, Start, End, featuremap(Features))) :-
%	write('annotation'), nl,
	(tagbag('Feature', element('GateDocumentFeatures', A, B), FeatureElements); FeatureElements = []),
%	write(FeatureElements), nl,
	transform(FeatureElements, Features).

tagbag(Tag, Term, Bag) :-
	bagof(X, tag(Tag, Term, X), Bag).

tag(Tag, element(Tag,Attributes,Content), element(Tag,Attributes,Content)).

tag(Tag, element(_,_,Inside), Answer) :-
	tag(Tag, Inside, Answer).

tag(Tag, [Head|Tail], Answer) :-
	tag(Tag, Head, Answer);
	tag(Tag, Tail, Answer).

strip_nodes(element('TextWithNodes',_,List), Output) :-
	strip_nodes(List, Output).

strip_nodes([], []).

strip_nodes([element('Node',[id = Integer],_)|TailNode], [node(Integer)|TailOut]) :-
	strip_nodes(TailNode, TailOut), !. %never treat node as word

strip_nodes([Word|TailNode], [Word|TailOut]) :-
	strip_nodes(TailNode, TailOut).

