https://github.com/RichardMoot/GrailLight
Tip revision: 67fbacd0e365d9008c021016377c0cfe1f1e309d authored by Richard Moot on 27 April 2021, 15:02:38 UTC
Update Supertag.tcl
Update Supertag.tcl
Tip revision: 67fbacd
read_trees.pl
#!/Applications/SWI-Prolog.app/Contents/MacOS/swipl -q -g read_trees -f
:- use_module(m2const, [compute_penalties1/2]).
% = read_trees
%
% read trees from FileName in Stanford parser output such as the following
%
% (NP (Det the) (ADJ interesting) (N idea))
%
% and converts them into GrailLight crosses/4 declarations.
:- dynamic user:crosses/4, user:constituent/4.
read_trees :-
current_prolog_flag(os_argv, Argv),
append(_, [A|Av], Argv),
file_base_name(A, 'read_trees.pl'),
!,
read_files_trees(Av),
halt.
read_files_trees(Files) :-
user:abolish(crosses/4),
user:retractall(crosses(_,_,_,_)),
read_files_trees(Files, 0, _),
tell('parser_crosses.pl'),
user:listing(crosses/4),
told.
read_files_trees([], N, N).
read_files_trees([F|Fs], N0, N) :-
read_trees(F, N0, N1),
read_files_trees(Fs, N1, N).
read_trees(FileName) :-
read_trees(FileName, 0, _).
read_trees(FileName, N0, N) :-
see(FileName),
read_all_trees(N0, N),
seen.
read_all_trees(N, N) :-
read_spaces,
peek_char(end_of_file),
!.
read_all_trees(N0, Last) :-
read_tree(Tree),
N is N0 + 1,
format('~D. ~@', [N, portray_clause(Tree)]),
/* compute constituents */
user:abolish(constituent/4),
user:retractall(constituent(_,_,_,_)),
tree_length(Tree, N, 0, Length),
/* compute crosses declarations */
compute_penalties1(N, Length),
nl,
user:retractall(crosses(N,_,_,0)),
/* just in case, clean up choice points */
!,
read_all_trees(N, Last).
tree_length(word(_), _, N0, N) :-
N is N0 + 1.
tree_length(tree(Label, List), Sent, N0, N) :-
tree_length_list(List, Sent, N0, N),
/* assert only non-trivial constituents */
(
N > N0 + 1
->
user:assert(constituent(Sent, Label, N0, N))
;
true
).
tree_length_list([], _, N, N).
tree_length_list([T|Ts], Sent, N0, N) :-
tree_length(T, Sent, N0, N1),
tree_length_list(Ts, Sent, N1, N).
read_tree(Tree) :-
get_char(C),
read_tree(C, Tree).
read_tree('(', nil) :-
peek_char(')'),
!,
/* remove close bracket from input */
get_char(_).
read_tree('(', tree(Label, Ds)) :-
read_label(Label),
!,
read_spaces,
read_daughters(Ds).
read_tree(C, W) :-
read_word(C, W),
!.
read_daughters(Ds) :-
get_char(C),
read_daughters(C, Ds).
read_daughters(')', []) :-
!.
read_daughters(C, [W|Ws]) :-
read_tree(C, W),
read_spaces,
!,
read_daughters(Ws).
read_label(Label) :-
get_char(C),
read_label(C, LabelL, []),
atom_chars1(Label, LabelL).
read_label(C) -->
{char_type(C, upper)},
!,
{char_type(CL, to_lower(C))},
[CL],
{get_char(C2)},
read_label(C2).
read_label(C) -->
{char_type(C, lower)},
!,
[C],
{get_char(C2)},
read_label(C2).
read_label(C) -->
{char_type(C, punct)},
!,
[p],
[u],
[n],
{get_char(C2)},
read_label(C2).
read_label(' ') -->
[].
read_label('\n') -->
[].
read_label(end_of_file) -->
[].
read_word(Label) :-
get_char(C),
read_word(C, Label).
read_word(C, Label) :-
read_word(C, LabelL, []),
atom_chars1(Label0, LabelL),
/* if atom contains "_", split it into multiple words */
atomic_list_concat([A|As], '_', Label0),
return_word(As, A, Label).
return_word([], A, word(A)).
return_word([A|As], A0, tree(w, [word(A0)|Words])) :-
return_word_list([A|As], Words).
return_word_list([], []).
return_word_list([A|As0], [word(A)|As]) :-
return_word_list(As0, As).
read_word(')') -->
!,
[].
read_word(' ') -->
!,
[].
read_word('\n') -->
!,
[].
read_word(end_of_file) -->
!,
[].
read_word(C, W0, W) :-
char_type(C, graph),
!,
W0 = [C|W1],
(
peek_char(')')
->
W1 = W
;
get_char(C2),
read_word(C2, W1, W)
).
% = read_spaces/0
%
% ignore all spaces; read from input until next get_char produces a non-space character
read_spaces :-
peek_char(C),
char_type(C, space),
!,
get_char(C),
read_spaces.
read_spaces.
% tree pretty printer
portray_tree(Tree) :-
format('~N'),
portray_tree(Tree, 0).
portray_tree(word(Word), _Tab) :-
format('word(~p)', [Word]).
portray_tree(tree(Label, Daughters), Tab0) :-
format('tree(~p, [', [Label]),
Tab is Tab0 + 3,
portray_tree_list(Daughters, Tab).
portray_tree_list([], _) :-
format('])', []).
portray_tree_list([T|Ts], Tab) :-
format('~N'),
tab(Tab),
portray_tree_list1(Ts, T, Tab).
portray_tree_list1([], T, Tab) :-
portray_tree(T, Tab),
format('~N', []),
tab(Tab),
format('])', []).
portray_tree_list1([T|Ts], T0, Tab) :-
portray_tree(T0, Tab),
format(',~n', []),
tab(Tab),
portray_tree_list1(Ts, T, Tab).
atom_chars1(nil, []) :-
!.
atom_chars1(C, L) :-
atom_chars(C, L).