https://gricad-gitlab.univ-grenoble-alpes.fr/coavouxm/flaubertagger.git
Tip revision: c939ca9fac094ac3c379256ef3d3d4d14a5a4bf1 authored by m on 23 February 2024, 16:44:50 UTC
up
up
Tip revision: c939ca9
patch_ftb_train.sh
corpus=~/data/FRENCH_SPMRL/gold/conll/train/train.French.gold.conll.orig
out=~/data/FRENCH_SPMRL/gold/conll/train/train.French.gold.conll
grep "p=f" $corpus
grep "7 7 7 7" $corpus
grep "UNK UNK" $corpus
grep " X X" $corpus
grep "PC PC m" $corpus
sed 's/p=f/p=2/' ${corpus} > ${out}
sed 's/7 7 7 7/7 7 D DET/g' ${out} > ${out}_tmp
sed 's/demi demi X X/demi demi A ADJ/g' ${out}_tmp > ${out}
sed 's/après après PC PC/après après P P/' ${out} > ${out}_tmp
sed 's/du du UNK UNK/du du P+D P+D/' ${out}_tmp > ${out}
sed 's/New New UNK UNK/New New ET ET/g' ${out} > ${out}_tmp
sed 's/York York UNK UNK/York York ET ET/g' ${out}_tmp > ${out}
#cat ${out}_tmp > $out
rm ${out}_tmp
echo
echo
grep "p=f" $out
grep "7 7 7 7" $out
grep "UNK UNK" $out
grep " X X" $out
grep "PC PC m" $out