https://github.com/RichardMoot/GrailLight
Raw File
Tip revision: 67fbacd0e365d9008c021016377c0cfe1f1e309d authored by Richard Moot on 27 April 2021, 15:02:38 UTC
Update Supertag.tcl
Tip revision: 67fbacd
read_trees.pl
#!/Applications/SWI-Prolog.app/Contents/MacOS/swipl -q -g read_trees -f

:- use_module(m2const, [compute_penalties1/2]).

% = read_trees
%
% read trees from FileName in Stanford parser output such as the following
%
% (NP (Det the) (ADJ interesting) (N idea))
%
% and converts them into GrailLight crosses/4 declarations.

:- dynamic user:crosses/4, user:constituent/4.

read_trees :-
	current_prolog_flag(os_argv, Argv),
        append(_, [A|Av], Argv),
	file_base_name(A, 'read_trees.pl'),
	!,
        read_files_trees(Av),
	halt.

read_files_trees(Files) :-
	user:abolish(crosses/4),
	user:retractall(crosses(_,_,_,_)),
	read_files_trees(Files, 0, _),
	tell('parser_crosses.pl'),
	user:listing(crosses/4),
	told.

read_files_trees([], N, N).
read_files_trees([F|Fs], N0, N) :-
	read_trees(F, N0, N1),
	read_files_trees(Fs, N1, N).


read_trees(FileName) :-
	read_trees(FileName, 0, _).

read_trees(FileName, N0, N) :-
	see(FileName),
	read_all_trees(N0, N),
	seen.

read_all_trees(N, N) :-
	read_spaces,
	peek_char(end_of_file),
	!.
read_all_trees(N0, Last) :-
	read_tree(Tree),
	N is N0 + 1,
	format('~D. ~@', [N, portray_clause(Tree)]),
	/* compute constituents */
	user:abolish(constituent/4),
	user:retractall(constituent(_,_,_,_)),
	tree_length(Tree, N, 0, Length),
	/* compute crosses declarations */
	compute_penalties1(N, Length),
	nl,
	user:retractall(crosses(N,_,_,0)),
	/* just in case, clean up choice points */
	!,
	read_all_trees(N, Last).

tree_length(word(_), _, N0, N) :-
	N is N0 + 1.
tree_length(tree(Label, List), Sent, N0, N) :-
	tree_length_list(List, Sent, N0, N),
	/* assert only non-trivial constituents */
   (
	N > N0 + 1
   ->
        user:assert(constituent(Sent, Label, N0, N))
   ;
        true
   ).

tree_length_list([], _, N, N).
tree_length_list([T|Ts], Sent, N0, N) :-
	tree_length(T, Sent, N0, N1),
	tree_length_list(Ts, Sent, N1, N).

read_tree(Tree) :-
	get_char(C),
	read_tree(C, Tree).

read_tree('(', nil) :-
	peek_char(')'),
	!,
	/* remove close bracket from input */
	get_char(_).
read_tree('(', tree(Label, Ds)) :-
	read_label(Label),
	!,
	read_spaces,
	read_daughters(Ds).
read_tree(C, W) :-
	read_word(C, W),
	!.

read_daughters(Ds) :-
	get_char(C),
	read_daughters(C, Ds).

read_daughters(')', []) :-
	!.
read_daughters(C, [W|Ws]) :-
	read_tree(C, W),
	read_spaces,
	!,
	read_daughters(Ws).

read_label(Label) :-
	get_char(C),
	read_label(C, LabelL, []),
	atom_chars1(Label, LabelL).

read_label(C) -->
	{char_type(C, upper)},
	!,
	{char_type(CL, to_lower(C))},
	[CL],
	{get_char(C2)},
	read_label(C2).
read_label(C) -->
	{char_type(C, lower)},
	!,
	[C],
	{get_char(C2)},
	read_label(C2).
read_label(C) -->
	{char_type(C, punct)},
	!,
	[p],
	[u],
	[n],
	{get_char(C2)},
	read_label(C2).

read_label(' ') -->
	[].
read_label('\n') -->
	[].
read_label(end_of_file) -->
	[].


read_word(Label) :-
	get_char(C),
	read_word(C, Label).

read_word(C, Label) :-
	read_word(C, LabelL, []),
	atom_chars1(Label0, LabelL),
	/* if atom contains "_", split it into multiple words */
	atomic_list_concat([A|As], '_', Label0),
	return_word(As, A, Label).

return_word([], A, word(A)).
return_word([A|As], A0, tree(w, [word(A0)|Words])) :-
	return_word_list([A|As], Words).

return_word_list([], []).
return_word_list([A|As0], [word(A)|As]) :-
	return_word_list(As0, As).


read_word(')') -->
	!,
	[].
read_word(' ') -->
	!,
	[].
read_word('\n') -->
	!,
	[].
read_word(end_of_file) -->
	!,
	[].
read_word(C, W0, W) :-
	char_type(C, graph),
	!,
	W0 = [C|W1],
  (
	peek_char(')')
  ->
	 W1 = W
  ;

	get_char(C2),
	read_word(C2, W1, W)
   ).

% = read_spaces/0
%
% ignore all spaces; read from input until next get_char produces a non-space character

read_spaces :-
	peek_char(C),
	char_type(C, space),
	!,
	get_char(C),
	read_spaces.
read_spaces.

% tree pretty printer

portray_tree(Tree) :-
	format('~N'),
	portray_tree(Tree, 0).

portray_tree(word(Word), _Tab) :-
	format('word(~p)', [Word]).
portray_tree(tree(Label, Daughters), Tab0) :-
	format('tree(~p, [', [Label]),
	Tab is Tab0 + 3,
	portray_tree_list(Daughters, Tab).


portray_tree_list([], _) :-
	format('])', []).
portray_tree_list([T|Ts], Tab) :-
	format('~N'),
	tab(Tab),
	portray_tree_list1(Ts, T, Tab).

portray_tree_list1([], T, Tab) :-
	portray_tree(T, Tab),
	format('~N', []),
	tab(Tab),
	format('])', []).
portray_tree_list1([T|Ts], T0, Tab) :-
	portray_tree(T0, Tab),
	format(',~n', []),
	tab(Tab),
	portray_tree_list1(Ts, T, Tab).


atom_chars1(nil, []) :-
	!.
atom_chars1(C, L) :-
	atom_chars(C, L).
back to top