%:- auto_table.

:- export
	transform/2.
	tag/3,
	tagbag/3,
	strip_nodes/2,
	text/2,
/*	annotations/2.*/
/*	'GateDocument'/2,
	'GateDocumentFeatures'/2,
	'TextWithNodes'/2.*/

:- import
	load_xml_structure/3 from sgml.

transform([], []).

transform([H|T], [Head|Tail]) :-
	transform(H, Head),
	transform(T, Tail).

transform(files(FileList), corpus(DocList)) :-
	transform(FileList, DocList).

transform(File, Doc) :-
	atom(File),
	load_xml_structure(file(File), X),
	transform(X, Doc).

transform(element('GateDocument', A, B), document(FeatureMap, Content, Annotations)) :-
	tag('GateDocumentFeatures', element('GateDocument', A, B), GateDocumentFeatures),
	transform(GateDocumentFeatures, FeatureMap),
	tag('TextWithNodes', element('GateDocument', A, B), TextWithNodes),
	transform(TextWithNodes, Content),
	tagbag('AnnotationSet', element('GateDocument', A, B), AnnontationSets),
	transform(AnnotationSets, Annotations).

transform(element('GateDocumentFeatures', A, FeatureElements), featuremap([Features])) :-
	transform(FeatureElements, Features).

transform(element('Feature', A, B), feature(Name, Value)) :-
	tag('Name', element('Feature', A, B), element('Name', _, Name),
	tag('Value', element('Feature', A, B), element('Value', _, Value).

transform(element('TextWithNodes', A, B), content(Content)) :-
	strip_nodes(element('TextWithNodes', A, B), Content).

transform(element('AnnotationSet', A, AnnotationElements), Annotations) :-
	transform(AnnotationElements, Annotations).

transform(element('Annotation', A, B), annotation(Id, Type, Start, End, Featuremap)) :-


/*transform(files([]), corpus([])).

transform(files([File|Files]), corpus([Doc,Docs])) :-
	transform(File, Doc),
	transform(files(Files), Docs).

transform(File, Doc) :-
	atom(File),
	load_xml_structure(file(File), X),
	transform(X, Doc).

transform(element('GateDocument', A, B), document(FeatureMap, Content, Annotations)) :-
	tag('GateDocumentFeatures', element('GateDocument', A, B), GateDocumentFeatures),
	transform(GateDocumentFeatures, FeatureMap),
	tag('TextWithNodes', element('GateDocument', A, B), TextWithNodes),
	transform(TextWithNodes, Content),
	tagbag('AnnotationSet', element('GateDocument', A, B), AnnontationSets),
	transform(annotationsets(AnnotationSets), Annotations).

transform(element('GateDocumentFeatures', A, [FeatureElement,FeatureElements]), featuremap([Feature,Features])) :-
	transform(FeatureElement, Feature),
	trasnform(element('GateDocumentFeatures', A, FeatureElements), Features).

transform(element('TextWithNodes', A, B), content(Content)) :-
	strip_nodes(element('TextWithNodes', A, B), Content).

transform(annotationsets([AnnotationSet|AnnotationSets]), [Annotation|Annotations]) :-
	transform(AnnotationSet, Annotation),
	trasnform(annotationsets(AnnotationSets), Annotations).

transform(element('AnnotationSet', A, [AnnotationSet|AnnotationSets]), annotationset([Annotation|Annotations])) :-
	transform(AnnotationSet, Annotation),
	transform(element('AnnotationSet' A, [AnnotationSets]), AnnotationSets).

transform(element('Annotation', A, B), annotation(Id, Type, Start, End, FeatureMap))*/

%annotations(FileName, Annotations) :-
%	tagbag('Annotation', FileName, Annotations).

tagbag(Tag, FileName, Bag) :-
	load_xml_structure(file(FileName), A, _),
	bagof(X, tag(Tag, A, X), Bag).

text(FileName, StrippedNodeText) :-
	atom(FileName),
	load_xml_structure(file(FileName), A, _),
	tag('TextWithNodes', A, NodeText),
	strip_nodes(NodeText, StrippedNodeText).

tag(Tag, element(Tag,Attributes,Content), element(Tag,Attributes,Content)).

tag(Tag, element(_,_,Inside), Answer) :-
	tag(Tag, Inside, Answer).

tag(Tag, [Head|Tail], Answer) :-
	tag(Tag, Head, Answer);
	tag(Tag, Tail, Answer).

strip_nodes(element('TextWithNodes',_,List), Output) :-
	strip_nodes(List, Output).

strip_nodes([], []).

strip_nodes([element('Node',[id = Integer],_)|TailNode], [node(Integer)|TailOut]) :-
	strip_nodes(TailNode, TailOut), !. %never treat node as word

strip_nodes([Word|TailNode], [Word|TailOut]) :-
	strip_nodes(TailNode, TailOut).

/*'GateDocument'(element('GateDocument',B,C), element('GateDocument',B,C)).

'GateDocument'(element(_,_,X), Y) :-
	'GateDocument'(X, Y).

'GateDocument'([H|T], X) :-
	'GateDocument'(H, X);
	'GateDocument'(T, X).

'GateDocumentFeatures'(element('GateDocumentFeatures',B,C), element('GateDocumentFeatures',B,C)).

'GateDocumentFeatures'(element(_,_,X), Y) :-
	'GateDocumentFeatures'(X, Y).

'GateDocumentFeatures'([H|T], X) :-
	'GateDocumentFeatures'(H, X);
	'GateDocumentFeatures'(T, X).

'TextWithNodes'(element('TextWithNodes',B,C), element('TextWithNodes',B,C)).

'TextWithNodes'(element(_,_,X), Y) :-
	'TextWithNodes'(X, Y).

'TextWithNodes'([H|T], X) :-
	'TextWithNodes'(H, X);
	'TextWithNodes'(T, X).*/

