Skip to main content
  • Home
  • Development
  • Documentation
  • Donate
  • Operational login
  • Browse the archive

swh logo
SoftwareHeritage
Software
Heritage
Archive
Features
  • Search

  • Downloads

  • Save code now

  • Add forge now

  • Help

https://github.com/nino-cunei/oldbabylonian
10 April 2019, 10:45:51 UTC
  • Code
  • Branches (11)
  • Releases (0)
  • Visits
    • Branches
    • Releases
    • HEAD
    • refs/heads/master
    • refs/tags/v0.2
    • refs/tags/v0.3
    • refs/tags/v0.4
    • refs/tags/v1.0
    • refs/tags/v1.0.1
    • refs/tags/v1.1
    • refs/tags/v1.2
    • refs/tags/v1.3
    • refs/tags/v1.4
    No releases to show
  • 6fe789d
  • /
  • programs
  • /
  • tfFromATF.py
Raw File Download
Take a new snapshot of a software origin

If the archived software origin currently browsed is not synchronized with its upstream version (for instance when new commits have been issued), you can explicitly request Software Heritage to take a new snapshot of it.

Use the form below to proceed. Once a request has been submitted and accepted, it will be processed as soon as possible. You can then check its processing state by visiting this dedicated page.
swh spinner

Processing "take a new snapshot" request ...

To reference or cite the objects present in the Software Heritage archive, permalinks based on SoftWare Hash IDentifiers (SWHIDs) must be used.
Select below a type of object currently browsed in order to display its associated SWHID and permalink.

  • content
  • directory
  • revision
  • snapshot
origin badgecontent badge Iframe embedding
swh:1:cnt:deabf2ea2306b645b67f7369a06c1e5eb3acbdf4
origin badgedirectory badge Iframe embedding
swh:1:dir:9287bc84340ee2f1ec0e4d777e59e51908046a3d
origin badgerevision badge
swh:1:rev:20173f788d445e60e4bc40891f2fee26044119a1
origin badgesnapshot badge
swh:1:snp:9091ca8d749e2b01a10b40227cf5a226e41c8da5

This interface enables to generate software citations, provided that the root directory of browsed objects contains a citation.cff or codemeta.json file.
Select below a type of object currently browsed in order to generate citations for them.

  • content
  • directory
  • revision
  • snapshot
Generate software citation in BibTex format (requires biblatex-software package)
Generating citation ...
Generate software citation in BibTex format (requires biblatex-software package)
Generating citation ...
Generate software citation in BibTex format (requires biblatex-software package)
Generating citation ...
Generate software citation in BibTex format (requires biblatex-software package)
Generating citation ...
Tip revision: 20173f788d445e60e4bc40891f2fee26044119a1 authored by Dirk Roorda on 28 March 2019, 07:29:10 UTC
docs analysis
Tip revision: 20173f7
tfFromATF.py
import sys
import os
import re
import collections
from unicodedata import name as uname
from shutil import rmtree
from glob import glob
from tf.fabric import Fabric
from tf.convert.walker import CV

# LOCATIONS

BASE = os.path.expanduser('~/github')
ORG = 'Nino-cunei'
REPO = 'oldbabylonian'
VERSION_SRC = '0.3'
VERSION_TF = '1.0.4'
REPO_DIR = f'{BASE}/{ORG}/{REPO}'

TRANS_DIR = f'{REPO_DIR}/sources/cdli/transcriptions'
CHAR_DIR = f'{REPO_DIR}/characters'

MAPPING_FILE = 'mapping.tsv'
MAPPING_PATH = f'{CHAR_DIR}/{MAPPING_FILE}'

IN_DIR = f'{TRANS_DIR}/{VERSION_SRC}'

TF_DIR = f'{REPO_DIR}/tf'
OUT_DIR = f'{TF_DIR}/{VERSION_TF}'

#  CHARACTERS

UNMAPPABLE = {'x', 'X', 'n', 'N', '...'}

prime = "'"
ellips = '…'
liga = '␣'
adjacent = '⁼'
excl = '¡'

emphatic = {
    's,': 'ş',
    't,': 'ţ',
}

unknownStr = 'xXnN'
unknownSet = set(unknownStr)

lowerLetterStr = 'abcdefghijklmnopqrstuvwyz' + ''.join(emphatic.values())
upperLetterStr = lowerLetterStr.upper()
lowerLetterStr += prime


div = '÷'
digitStr = f'0123456789{div}'

divRe = re.compile(r'''([0-9])/([0-9])''')


def divRepl(match):
  return f'{match.group(1)}{div}{match.group(2)}'


graphemeStr = f'{liga}{excl}'
operatorStr = '.+/:'
operatorSet = set(operatorStr)


flagging = {
    '*': 'collated',
    '!': 'remarkable',
    '?': 'question',
    '#': 'damage',
}
flagStr = ''.join(flagging)

clusterChars = (
    ('◀', '▶', '{', '}', 'det'),
    ('∈', '∋', '(', ')', 'uncertain'),
    ('〖', '〗', '[', ']', 'missing'),
    ('«', '»', '<<', '>>', 'excised'),
    ('⊂', '⊃', '<', '>', 'supplied'),
    ('┌', '┐', '_', '_', 'langalt'),
)

clusterCharsB = {x[0] for x in clusterChars}
clusterCharsE = {x[1] for x in clusterChars}
clusterCharsA = {x[0] for x in clusterChars} | {x[1] for x in clusterChars}
clusterCharsO = {x[2] for x in clusterChars} | {x[3] for x in clusterChars}
clusterType = {x[0]: x[4] for x in clusterChars}
clusterAtfE = {x[0]: x[1] for x in clusterChars}
clusterAtfB = {x[1]: x[0] for x in clusterChars}
clusterAtf = {x[0]: x[2] for x in clusterChars}
clusterAtf.update({x[1]: x[3] for x in clusterChars})
clusterAtfInv = {co: ca for (ca, co) in clusterAtf.items()}

readingPat = (
    f'(?:(?:[{lowerLetterStr}{upperLetterStr}]'
    f'[{lowerLetterStr}{upperLetterStr}{digitStr}{prime}]*'
    f')|{ellips}|[{unknownStr}])'
    f'[{flagStr}]*'
)
graphemePat = (
    r'\|?'
    f'[{upperLetterStr}]'
    f'[{upperLetterStr}{digitStr}{operatorStr}]*'
    r'\|?'
)


def makeClusterEscRepl(cab, cae):
  def repl(match):
    return f'{cab}{match.group(2)}{cae}'

  return repl


clusterEscRe = {}
clusterEscRepl = {}

for (cab, cae, cob, coe, ctp) in clusterChars:
  if cob == coe:
    clusterEscRe[cab] = re.compile(f'''({re.escape(cob)}(.*?){re.escape(coe)})''')
    clusterEscRepl[cab] = makeClusterEscRepl(cab, cae)


def clusterCheck(text):
  return clusterORe.findall(text)


def transEsc(text):
  text = divRe.sub(divRepl, text)
  text = text.replace('...', ellips)
  text = text.replace('x(', f'{liga}(')
  text = text.replace('!(', f'{excl}(')
  for (exp, abb) in emphatic.items():
    text = text.replace(exp, abb)
  for (cab, cae, cob, coe, ctp) in clusterChars:
    if cob == coe:
      text = clusterEscRe[cab].sub(clusterEscRepl[cab], text)
    else:
      text = text.replace(cob, cab).replace(coe, cae)
  return text


def transUnEsc(text):
  for (cab, cae, cob, coe, ctp) in clusterChars:
    text = text.replace(cab, cob).replace(cae, coe)
  for (exp, abb) in emphatic.items():
    text = text.replace(abb, exp)
  text = text.replace(excl, '!')
  text = text.replace(liga, 'x')
  text = text.replace(ellips, '...')
  text = text.replace(div, '/')
  return text


clusterA = re.escape(''.join(clusterCharsA))
clusterB = re.escape(''.join(clusterCharsB))
clusterE = re.escape(''.join(clusterCharsE))
clusterO = re.escape(''.join(clusterCharsO))
inside = r'''(?:\s+)'''
outside = r'''\s*'''
spaceB = r'''(?:\s+|^)'''
spaceE = r'''(?:\s+|$)'''
bO = r'\('
bC = r'\)'

insaneRe = re.compile(r'''[^0-9a-zA-Z$(){}\[\]<>.,:=$#&@"'?!/+*| _-]''')
transRe = re.compile(r'''^([0-9a-zA-Z']+)\.\s+(.+)$''')
translationRe = re.compile(r'''^tr\.([^:]+):\s*(.*)''')
collectionRe = re.compile(r'''^(\S+)\s+([0-9]+)\s*,?\s*([^&+]*)(?:[&+]|$)''')
commentRe = re.compile(r'∈\$(.*?)\$∋''')
numeralBackRe = re.compile(f'''(n|(?:[0-9]+(?:{div}[0-9]+)?))∈([^∋]+)∋''')
numeralRe = re.compile(f'''(n|(?:[0-9]+(?:{div}[0-9]+)?)){bO}({readingPat}){bC}''')
withGraphemeBackRe = re.compile(f'''([{graphemeStr}])∈([^∋]+)∋''')
withGraphemeRe = re.compile(f'''({readingPat})([{graphemeStr}]){bO}({graphemePat}){bC}''')
numeral2Re = re.compile(r'''([0-9]+∈[^∋]+∋)''')
clusterORe = re.compile(f'[{clusterO}]')
clusterTermRe = re.compile(f'^[{clusterA}]*$')
cSpaceBRe = re.compile(f'{outside}([{clusterB}]){inside}')
cSpaceERe = re.compile(f'{inside}([{clusterE}]){outside}')
wHyphenBRe = re.compile(f'{spaceB}([{clusterB}]*)-')
wHyphenERe = re.compile(f'-([{clusterE}]*){spaceE}')
cHyphenBRe = re.compile(f'([{clusterB}]+)-')
cHyphenERe = re.compile(f'-([{clusterE}]+)')
cFlagRe = re.compile(f'[{clusterA}]([{flagStr}]+)[{clusterA}]')
inlineCommentRe = re.compile(r'''^├[^┤]*┤$''')

transUni = {
    'h,': 'ḫ',
    'H,': 'Ḫ',
    'j,': 'ŋ',
    'J,': 'Ŋ',
    's,': 'ṣ',
    'S,': 'Ṣ',
    "s'": ':',
    "S'": ':',
    't,': 'ṭ',
    'T,': 'Ṭ',
    'sz': 'š',
    'SZ': 'Š',
    'x2': 'ₓ',
    'X2': 'ₓ',
    "'": ':',
    '0': '₀',
    '1': '₁',
    '2': '₂',
    '3': '₃',
    '4': '₄',
    '5': '₅',
    '6': '₆',
    '7': '₇',
    '8': '₈',
    '9': '₉',
}


def nice(text):
  for (a, r) in transUni.items():
    text = text.replace(a, r)
  return text


def makeAscii(text):
  for (a, r) in transUni.items():
    text = text.replace(r, a)
  return text


META_FIELDS = {
    'Author(s)': ('author', 'str'),
    'Publication date': ('pubdate', 'str'),
    'Collection': ('museumname', 'str'),
    'Museum no.': ('museumcode', 'str'),
    'Excavation no.': ('excavation', 'str'),
    'Period': ('period', 'str'),
    'Material': ('material', 'str'),
    'Genre': ('genre', 'str'),
    'Sub-genre': ('subgenre', 'str'),
    'ATF source': ('transcriber', 'str'),
    'UCLA Library ARK': ('ARK', 'str'),
}


# TF CONFIGURATION

slotType = 'sign'

generic = {
    'name': 'AbB Old Babylonian Cuneiform',
    'editor': 'Cale Johnson et. al.',
    'institute': 'CDL',
    'converters': 'Cale Johnson, Dirk Roorda',
}

otext = {
    'fmt:text-orig-full': '{atfpre}{atf}{atfpost}{after}',
    'fmt:text-orig-plain': '{sym}{afterr}',
    'fmt:text-orig-rich': '{symr}{afterr}',
    'fmt:text-orig-unicode': '{symu}{afteru}',
    'sectionFeatures': 'pnumber,face,lnno',
    'sectionTypes': 'document,face,line',
}

intFeatures = (
    set('''
        ln
        col
        primeln
        primecol
        repeat
        srcLnNum
        trans
        volume
    '''.strip().split()) |
    set(flagging.values()) |
    set(clusterType.values()) |
    {x[1][0] for x in META_FIELDS.items() if x[1][1] == 'int'}
)

featureMeta = {
    'after': {
        'description': 'what comes after a sign or word (- or space)',
    },
    'afterr': {
        'description': (
            'what comes after a sign or word (- or space); '
            'between adjacent signs a ␣ is inserted'
        ),
    },
    'afteru': {
        'description': 'what comes after a sign when represented as unicode (space)',
    },
    'atf': {
        'description': (
            'full atf of a sign (without cluster chars)'
            ' or word (including cluster chars)'
        ),
    },
    'atfpost': {
        'description': 'atf of cluster closings at sign',
    },
    'atfpre': {
        'description': 'atf of cluster openings at sign',
    },
    'col': {
        'description': 'ATF column number',
    },
    'collated': {
        'description': 'whether a sign is collated (*)',
    },
    'collection': {
        'description': 'collection of a document',
    },
    'comment': {
        'description': '$ comment to line or inline comment to slot ($ and $)',
    },
    'damage': {
        'description': 'whether a sign is damaged',
    },
    'det': {
        'description': 'whether a sign is a determinative gloss - between braces { }',
    },
    'docnote': {
        'description': 'additional remarks in the document identification',
    },
    'docnumber': {
        'description': 'number of a document within a collection-volume',
    },
    'excised': {
        'description': 'whether a sign is excised - between double angle brackets << >>',
    },
    'face': {
        'description': 'full name of a face including the enclosing object',
    },
    'flags': {
        'description': 'sequence of flags after a sign',
    },
    'fraction': {
        'description': 'fraction of a numeral',
    },
    'grapheme': {
        'description': 'grapheme of a sign',
    },
    'graphemer': {
        'description': 'grapheme of a sign using non-ascii characters',
    },
    'graphemeu': {
        'description': 'grapheme of a sign using cuneiform unicode characters',
    },
    'lang': {
        'description': 'language of a document',
    },
    'langalt': {
        'description': (
            '1 if a sign is in the alternate language (i.e. Sumerian)'
            ' - between underscores _ _'
        ),
    },
    'ln': {
        'description': 'ATF line number of a numbered line, without prime',
    },
    'lnc': {
        'description': 'ATF line identification of a comment line ($)',
    },
    'lnno': {
        'description': 'ATF line number, may be $ or #, with prime; column number prepended',
    },
    'missing': {
        'description': 'whether a sign is missing - between square brackets [ ]',
    },
    'object': {
        'description': 'name of an object of a document',
    },
    'operator': {
        'description': 'the ! or x in a !() or x() construction',
    },
    'operatorr': {
        f'description': 'the ! or x in a !() or x() construction, represented as =, {liga}',
    },
    'operatoru': {
        f'description': 'the ! or x in a !() or x() construction, represented as =, {liga}',
    },
    'pnumber': {
        'description': 'P number of a document',
    },
    'primecol': {
        'description': 'whether a prime is present on a column number',
    },
    'primeln': {
        'description': 'whether a prime is present on a line number',
    },
    'question': {
        'description': 'whether a sign has the question flag (?)',
    },
    'reading': {
        'description': 'reading of a sign',
    },
    'readingr': {
        'description': 'reading of a sign using non-ascii characters',
    },
    'readingu': {
        'description': 'reading of a sign using cuneiform unicode characters',
    },
    'remarks': {
        'description': '# comment to line',
    },
    'remarkable': {
        'description': 'whether a sign is remarkable (!)',
    },
    'repeat': {
        'description': 'repeat of a numeral; the value n (unknown) is represented as -1',
    },
    'sym': {
        'description': 'essential part of a sign or of a word',
    },
    'symr': {
        'description': 'essential part of a sign or of a word using non-ascii characters',
    },
    'symu': {
        'description': 'essential part of a sign or of a word using cuneiform unicode characters',
    },
    'srcfile': {
        'description': 'source file name of a document',
    },
    'srcLn': {
        'description': 'full line in source file',
    },
    'srcLnNum': {
        'description': 'line number in source file',
    },
    'supplied': {
        'description': 'whether a sign is supplied - between angle brackets < >',
    },
    'trans': {
        'description': 'whether a line has a translation',
    },
    'translation@en': {
        'description': 'translation of line in language en = English',
    },
    'type': {
        'description': 'name of a type of cluster or kind of sign',
    },
    'uncertain': {
        'description': 'whether a sign is uncertain - between brackets ( )',
    },
    'volume': {
        'description': 'volume of a document within a collection',
    },
    'author': {
        'description': 'author from metadata field "Author(s)"',
    },
    'pubdate': {
        'description': 'publication date from metadata field "Publication date"',
    },
    'museumname': {
        'description': 'museum name from metadata field "Collection"',
    },
    'museumcode': {
        'description': 'museum code from metadata field "Museum no."',
    },
    'excavation': {
        'description': 'excavation number from metadata field "Excavation no."',
    },
    'period': {
        'description': 'period indication from metadata field "Period"',
    },
    'material': {
        'description': 'material indication from metadata field "Material"',
    },
    'genre': {
        'description': 'genre from metadata field "Genre"',
    },
    'subgenre': {
        'description': 'genre from metadata field "Sub-genre"',
    },
    'transcriber': {
        'description': 'person who did the encoding into ATF from metadata field "ATF source"',
    },
    'ARK': {
        'description': 'persistent identifier of type ARK from metadata field "UCLA Library ARK"',
    },
}


# ATF INTERPRETATION

transAscii = {rout.upper(): rin for (rin, rout) in transUni.items()}

VAR_OBJ = 'object'
DEFAULT_OBJ = 'tablet'

OBJECTS = set('''
    tablet
    envelope
    case
'''.strip().split())

FACES = set('''
    obverse
    reverse
    left edge
    upper edge
    lower edge
    bottom
    surface a
    seal 1
'''.strip().split())

FACES_CORRECTION = {
    'overse': 'obverse',
    'obverrse': 'obverse',
}

COL_CORRECTION = {
    'second': 'column',
}

COMMENTS = '''
    (uninscribed)
    (needs to be added)
'''
COMMENTS = {c.strip() for c in COMMENTS.strip('\n').split('\n')}

COMMENT_PATTERN = r'''
    (?:
      ^
      (?:
          (?: maybe)?
          (?:
              (?:
                  (?:at \s+ least)
                  | about
              )?
              \s*
              (?:
                  (?:
                    [0-9]+
                    (?:-[0-9]+)?
                  )
                  | one | two | three | four | five | six | seven | eight | nine | ten

              )
              \s+
              lines?
          )
          | rest | obverse | reverse | seal | lower edge |
          (?:
              beginning
              (?: \s+ lines?)?
          )
          |
          (?: blank \s+ space)
          | single | double
      )?
      \s*
      (?:
          (?:
              broken
              (?:\s+ off)?
          )
          | blank | illegible | unreadable | uninscribed | destroyed | missing | erased | effaced
          | ruling | impression |
          (?: not \s+ inscribed) |
          (?: of \s+ traces)
      )?
      $
    )
    |
    (?:
      ^
      reading
    )
'''
COMMENT_RE = re.compile(COMMENT_PATTERN, re.X)


def bracketBackRepl(match):
  return f'{match.group(1)}({match.group(2)})'


def wHyphenBRepl(match):
  return f' {match.group(1)}'


def wHyphenERepl(match):
  return f'{match.group(1)} '


def cHyphenBRepl(match):
  return f'-{match.group(1)}'


def cHyphenERepl(match):
  return f'{match.group(1)}-'


def insaneRepl(match):
  return f'┣{match.group(0)}┫'


def cSpaceBRepl(match):
  return ' ' + match.group(1)


def cSpaceERepl(match):
  return match.group(1) + ' '


commentNotes = []


def commentRepl(match):
  comment = match.group(1)
  commentIndex = len(commentNotes)
  commentNotes.append(comment.strip())
  return f'├{commentIndex}┤'


# ERROR HANDLING

def showDiags(diags, kind, batch=20):
  if not diags:
    print('No diags')
  else:
    for (diag, srcs) in sorted(diags.items()):
      print(f'{kind} {diag}')
      for (src, data) in sorted(srcs.items()):
        print(f'\t{src} ({len(data)}x)')
        for (l, line, doc, sore) in sorted(data)[0:batch]:
          soreRep = '' if sore is None else f'"{sore}" in '
          print(f'\t\t{l} in {doc}: {soreRep}{line}')
        if len(data) > batch:
          print(f'\t\t + more')


# SET UP CONVERSION

def getMapping():
  mapping = {}
  with open(MAPPING_PATH) as fh:
    for line in fh:
      (k, v) = line.strip().split('\t', 1)
      mapping[k] = v

  print(f'{len(mapping)} tokens in the character mapping')
  return mapping


def getSources():
  return tuple(
      os.path.splitext(os.path.basename(f))[0]
      for f in glob(f'{IN_DIR}/*.txt')
  )


def getConverter():
  TF = Fabric(locations=OUT_DIR)
  return CV(TF)


def checkSane(line):
  inSane = insaneRe.findall(line)
  insaneRep = ''
  lineMsg = line
  if inSane:
    sep = ''
    for c in sorted(inSane):
      try:
        name = uname(c)
      except ValueError:
        name = '??'
      insaneRep += f"{sep}┣{c}┫ = {ord(c):>04x} = {name}"
      sep = '; '
    lineMsg = insaneRe.sub(insaneRepl, line)
    line = insaneRe.sub('', line)
  return (insaneRep, lineMsg, line)


def convert():
  if generateTf:
    if os.path.exists(OUT_DIR):
      rmtree(OUT_DIR)
    os.makedirs(OUT_DIR, exist_ok=True)

  cv = getConverter()

  return cv.walk(
      director,
      slotType,
      otext=otext,
      generic=generic,
      intFeatures=intFeatures,
      featureMeta=featureMeta,
      generateTf=generateTf,
  )


# DIRECTOR

def director(cv):

  sources = getSources()
  mapping = getMapping()
  unmapped = collections.Counter()

  curDocument = None
  recentObject = None
  curFace = None
  recentColumn = None
  recentComment = 0
  curLine = None
  recentTrans = None
  curCluster = collections.defaultdict(list)
  clusterStatus = {typ: False for typ in clusterType}
  curSign = None
  skip = False
  curMeta = {}

  i = 0
  pNum = None

  pNums = {}

  warnings = collections.defaultdict(lambda: collections.defaultdict(set))
  errors = collections.defaultdict(lambda: collections.defaultdict(set))

  # sub director: setting up a document node

  def uni(asciiStr):
    if asciiStr is None:
      return ''
    uniChars = mapping.get(asciiStr, None)
    if uniChars is None:
      if asciiStr not in UNMAPPABLE:
        unmapped[asciiStr] += 1
      uniChars = asciiStr
    return uniChars

  def documentStart():
    # we build nodes for documents, faces, lines
    # the node is stored in the cur-variables
    # we remember the latest object and column specs
    # object and column is stored in the recent variables
    nonlocal curDocument
    nonlocal pNum
    nonlocal skip

    documentEnd()

    identifiers = line[1:].split('=')
    pNum = identifiers[0].strip()
    docNum = identifiers[-1].strip()

    other = pNums.get(pNum, None)
    if other is not None:
      (otherSrc, otherI) = other
      rep = f'{pNum} also in {otherSrc}:{otherI}'
      errors[f'document: duplicate pnums'][src].add((i, line, pNum, rep))
      skip = True
      return

    curDocument = cv.node('document')
    pNums[pNum] = (src, i)

    sys.stderr.write(f'{src:<15} : {i:>4} : {pNum:<20}\r')

    if curMeta:
      cv.feature(curDocument, **curMeta)
      curMeta.clear()

    cv.feature(
        curDocument,
        pnumber=pNum,
        srcfile=src,
        srcLnNum=i,
        srcLn=line,
    )
    skip = False

    docnumber = None
    docnote = None
    match = collectionRe.match(docNum)

    if not match:
      warnings[f'document: malformed collection volume, number'][src].add(
          (i, line, pNum, docNum)
      )
      docnote = docNum
    else:
      collection = match.group(1)
      volume = match.group(2)
      docnumber = match.group(3).strip()
      docnote = None
      if docnumber:
        docnumber = docnumber.replace('pl. ', '').strip()
        docnumParts = docnumber.split(',', 1)
        if len(docnumParts) == 1:
          docnote = None
        else:
          docnumber = docnumParts[0].strip()
          docnote = docnumParts[1].strip()

      if ' ' in docnumber:
        warnings[f'document: unusual number'][src].add(
            (i, line, pNum, docnumber)
        )
        docnote = docnumber
        docnumber = None
      cv.feature(curDocument, collection=collection, volume=volume)

    if docnumber:
      cv.feature(curDocument, docnumber=docnumber)
    if docnote:
      cv.feature(curDocument, docnote=docnote)

  # sub director: terminating a document node

  def documentEnd():
    nonlocal curDocument
    nonlocal recentObject

    if curDocument is None:
      return

    faceEnd()
    recentObject = None
    cv.terminate(curDocument)
    if not cv.linked(curDocument):
      errors[f'document: empty'][src].add((i, line, pNum, None))
    curDocument = None

  # sub director: processing an # metadata line

  def processMeta():
    lineInfo = line[1:].strip()
    if not curDocument:
      errors[f'meta: outside document'][src].add((i, line, pNum, lineInfo))
      return
    if len(line) > 1 and line[1] == ' ':
      commentInsert(meta=True)
      return
    match = translationRe.match(lineInfo)
    if match:
      lang = match.group(1)
      trans = match.group(2)
      if not curLine:
        errors[f'meta: translation outside line'][src].add((i, line, pNum, lineInfo))
        return
      cv.feature(curLine, **{'trans': 1, f'translation@{lang}': trans})
      return

    if lineInfo.startswith('atf:l'):
      errors[f'meta: no space after atf:'][src].add((i, line, pNum, None))
      lineInfo = 'atf: l' + lineInfo[5:]
    fields = lineInfo.split(maxsplit=1)
    if fields[0] == 'atf:':
      infoFields = fields[1].split(maxsplit=1)
      if len(infoFields) != 2:
        errors[f'meta: invalid'][src].add((i, line, pNum, fields[1]))
        return
      (key, value) = infoFields
      value = value.strip()
      if value.startswith('='):
        newValue = value[1:].strip()
        errors[f'meta: spurious ='][src].add((i, line, pNum, f'"{value}" => "{newValue}"'))
        value = newValue
      cv.feature(curDocument, **{key: value})
    else:
      errors[f'meta: unknown kind'][src].add((i, line, pNum, fields[0]))
      return

  # sub director: processing an @ specifier

  def processAtSpec():
    lineInfo = line[1:].strip()
    fields = lineInfo.split(maxsplit=1)
    typ = fields[0]
    subType = fields[1] if len(fields) == 2 else None

    if typ == 'column' or typ in COL_CORRECTION:
      if typ in COL_CORRECTION:
        typCorr = COL_CORRECTION[typ]
        errors[f'structure: column correction'][src].add((i, line, pNum, f'{typ} => {typCorr}'))
        typ = typCorr
      columnSet(subType)
    elif typ == 'object':
      objectSet(subType)
    elif typ in OBJECTS:
      objectSet(lineInfo)
    elif typ in FACES or typ in FACES_CORRECTION:
      if typ in FACES_CORRECTION:
        faceCorr = FACES_CORRECTION[typ]
        errors[f'structure: face correction'][src].add((i, line, pNum, f'{typ} => {faceCorr}'))
        faceStart(faceCorr)
      else:
        faceStart(lineInfo)
    else:
      errors[f'structure: unrecognized @'][src].add((i, line, pNum, lineInfo))

  # sub director: setting the object type

  def objectSet(typ):
    nonlocal recentObject
    nonlocal recentColumn
    nonlocal recentComment

    if typ is None:
      errors[f'structure: object without type'][src].add((i, line, pNum, None))

    faceEnd()
    recentColumn = None
    recentComment = 0
    recentObject = typ

  # sub director: setting up a face node

  def faceStart(faceName):
    nonlocal curFace
    nonlocal recentObject

    faceEnd()
    curFace = cv.node('face')

    if recentObject is None:
      errors[f'structure: object missing'][src].add((i, line, pNum, faceName))
      recentObject = DEFAULT_OBJ

    objSpec = recentObject if recentObject and recentObject != DEFAULT_OBJ else ''
    sep = ' - ' if objSpec and faceName else ''
    faceSpec = f'{objSpec}{sep}{faceName or ""}'
    cv.feature(
        curFace,
        object=recentObject,
        face=faceSpec,
        srcfile=src,
        srcLnNum=i,
        srcLn=line,
    )

  def faceEnd():
    nonlocal recentColumn
    nonlocal recentComment
    nonlocal curFace

    if curFace is None:
      return

    lineEnd()
    recentColumn = None
    recentComment = 0
    cv.terminate(curFace)
    if not cv.linked(curFace):
      errors[f'structure: face empty'][src].add((i, line, pNum, None))
    curFace = None

  # sub director: setting the column number

  def columnSet(number):
    nonlocal recentColumn
    nonlocal recentComment

    if number is None:
      errors[f'structure: column without number'][src].add((i, line, pNum, None))

    lineEnd()
    recentColumn = number
    recentComment = 0

  # sub director: setting up a comment line

  # comments are $ lines.
  # We interpret a comment line as a line with one empty slot.
  # The comment it self is a feature of the line node.

  def commentInsert(meta=False):
    nonlocal recentComment
    nonlocal curLine

    comment = line[1:].strip()
    if not meta and comment not in COMMENTS and not COMMENT_RE.match(comment):
      warnings[f'comment: unrecognized'][src].add((i, line, pNum, comment))

    if meta:
      if transLine is None:
        errors[f'comment: # line without preceding transcription line'][src].add(
            (i, line, pNum, comment)
        )
      else:
        prevRemarks = cv.get('remarks', transLine)
        combinedRemarks = f'{prevRemarks}\n{comment}' if prevRemarks else comment
        cv.feature(transLine, remarks=combinedRemarks)
    else:
      lineEnd()
      lnno = f'${chr(ord("a") + recentComment)}'
      recentComment += 1
      if recentColumn:
        lnno = f'{recentColumn}:{lnno}'
      curLine = cv.node('line')
      emptySlot = cv.slot()
      commentRep = f'$ {comment}'
      cv.feature(
          emptySlot,
          type='commentline',
          comment=comment,
          atf=commentRep,
          sym=commentRep,
          symr=commentRep,
          symu=commentRep,
      )
      cv.feature(
          curLine,
          lnc='$',
          lnno=lnno,
          srcfile=src,
          srcLnNum=i,
          srcLn=line,
      )

    if recentColumn is not None:
      cv.feature(curLine, col=recentColumn)

    cv.terminate(curLine)
    curLine = None

  # sub director: setting up a line node

  def lineStart(ln):
    nonlocal curLine
    nonlocal recentTrans

    lineEnd()
    curLine = cv.node('line')

    lnno = ln
    if recentColumn:
      lnno = f'{recentColumn}:{ln}'
    cv.feature(curLine, lnno=lnno)

    if recentColumn is not None:
      hasPrimeCol = "'" in recentColumn
      col = recentColumn.replace("'", '') if hasPrimeCol else recentColumn
      cv.feature(curLine, col=col)

      if hasPrimeCol:
        cv.feature(curLine, primecol=1)

    hasPrimeLn = "'" in ln
    if hasPrimeLn:
      ln = ln.replace("'", '')

    cv.feature(
        curLine,
        ln=ln,
        srcfile=src,
        srcLnNum=i,
        srcLn=line,
    )
    if hasPrimeLn:
      cv.feature(curLine, primeln=1)

    recentTrans = recentTrans.strip() + ' '

    commentNotes.clear()
    recentTrans = commentRe.sub(commentRepl, recentTrans)

    for (cab, cae, cob, coe, ctp) in clusterChars:
      bCount = recentTrans.count(cab)
      eCount = recentTrans.count(cae)
      if bCount != eCount:
        errors[f'cluster: unbalanced {cob} {coe}'][src].add(
            (i, line, pNum, f'{bCount} vs {eCount}')
        )

    changed = False
    if cSpaceBRe.search(recentTrans):
      recentTrans = cSpaceBRe.sub(cSpaceBRepl, recentTrans)
      changed = True

    if cSpaceERe.search(recentTrans):
      recentTrans = cSpaceERe.sub(cSpaceERepl, recentTrans)
      changed = True

    recentTrans = recentTrans.strip()

    if changed:
      errors[f'cluster: space near edge'][src].add((i, line, pNum, transUnEsc(recentTrans)))

  def lineEnd():
    nonlocal curLine

    if curLine is None:
      return

    cv.terminate(curLine)
    if not cv.linked(curLine):
      errors[f'line: empty'][src].add((i, line, pNum, None))
    curLine = None

  # sub director: adding data to a line node
  # this is itself a complicated generator with sub gens

  def lineData():
    nonlocal curLine
    nonlocal recentTrans

    curWord = None

    for typ in clusterStatus:
      clusterStatus[typ] = False

    if wHyphenBRe.search(recentTrans):
      errors[f'line: words starting with -'][src].add((i, line, pNum, None))
      recentTrans = wHyphenBRe.sub(wHyphenBRepl, recentTrans)
    if wHyphenERe.search(recentTrans):
      errors[f'line: words ending with -'][src].add((i, line, pNum, None))
      recentTrans = wHyphenERe.sub(wHyphenERepl, recentTrans)
    if cHyphenBRe.search(recentTrans):
      errors[f'line: clusters starting with -'][src].add((i, line, pNum, None))
      recentTrans = cHyphenBRe.sub(cHyphenBRepl, recentTrans)
    if cHyphenERe.search(recentTrans):
      errors[f'line: clusters ending with -'][src].add((i, line, pNum, None))
      recentTrans = cHyphenERe.sub(cHyphenERepl, recentTrans)

    words = recentTrans.split()

    # subsub director: processing cluster chars

    def clusterChar(before):
      nonlocal part

      brackets = ''

      if cFlagRe.search(part):
        errors[f'cluster: flag enclosed in cluster chars'][src].add(
            (i, line, pNum, transUnEsc(part))
        )

      flags = ''
      while part:
        refChar = part[0] if before else part[-1]
        if refChar in flagging:
          flags += refChar
        else:
          if refChar not in clusterCharsA:
            break
          if refChar in clusterCharsB:
            cab = refChar
            cob = clusterAtf[cab]
            ctp = clusterType[cab]
            if before:
              brackets += cab
            else:
              brackets = cab + brackets

            clusterStatus[ctp] = True

            cNode = cv.node('cluster')

            curCluster[cab].append(cNode)

            cv.feature(cNode, type=ctp)
          elif refChar in clusterCharsE:
            cae = refChar
            cab = clusterAtfB[cae]
            coe = clusterAtf[cae]
            cob = clusterAtf[cab]
            ctp = clusterType[cab]
            if before:
              brackets += cae
            else:
              brackets = cae + brackets

            clusterStatus[ctp] = False

            for cNode in curCluster[cab]:
              cv.terminate(cNode)
              if not cv.linked(cNode):
                errors[f'cluster: empty {cob} {coe}'][src].add((i, line, pNum, None))
            del curCluster[cab]
        part = part[1:] if before else part[0:-1]

      if before:
        part = flags + part
      else:
        part += flags[::-1]

      return brackets

    # subsub director: finishing off  all clusters on a line

    def clusterEndMakeSure():
      for (cab, cNodes) in curCluster.items():
        cob = clusterAtf[cab]
        cae = clusterAtfE[cab]
        coe = clusterAtf[cae]
        for cNode in cNodes:
          cv.terminate(cNode)
          if not cv.linked(cNode):
            errors[f'cluster: empty {cob} {coe}'][src].add((i, line, pNum, None))
      curCluster.clear()

    # subsub director: setting up a sign node

    def signStart():
      nonlocal curSign

      curSign = cv.slot()

      for typ in clusterStatus:
        if clusterStatus[typ]:
          cv.feature(curSign, **{typ: 1})

  # sub director: adding data to a sign node

    def doFlags():
      nonlocal part

      lPart = len(part)
      flags = ''
      for i in range(lPart):
        refChar = part[-1]
        if refChar in flagging:
          mf = flagging[refChar]
          cv.feature(curSign, **{mf: 1})
          part = part[0:-1]
          flags = refChar + flags
        else:
          break
      return flags

    def signData(clusterBefore, clusterAfter, after, afterr):
      nonlocal curSign
      nonlocal part

      sym = None
      symR = None
      symU = None
      origPart = part

      afteru = None if after == '-' else after

      if after:
        cv.feature(curSign, after=after)
      if afterr:
        cv.feature(curSign, afterr=afterr)
      if afteru:
        cv.feature(curSign, afteru=afteru)

      if clusterBefore:
        cv.feature(
            curSign,
            atfpre=transUnEsc(clusterBefore),
        )
      if clusterAfter:
        cv.feature(
            curSign,
            atfpost=transUnEsc(clusterAfter),
        )

      if not part:
        cv.feature(curSign, type='empty')
        errors['sign: empty (in cluster)'][src].add((i, line, pNum, transUnEsc(origPart)))
        return (sym, symR, symU)

      if part.startswith('├') and part.endswith('┤'):
        commentIndex = int(part[1:-1])
        comment = commentNotes[commentIndex]
        commentRep = f'($ {comment} $)'
        cv.feature(
            curSign,
            type='comment',
            comment=comment,
            atf=commentRep,
            sym=commentRep,
            symr=commentRep,
            symu=commentRep,
        )
        symR = sym
        symU = sym
        return (sym, symR, symU)

      reading = None
      readingR = None
      readingU = None
      grapheme = None
      graphemeR = None
      graphemeU = None

      partRep = transUnEsc(part)
      cv.feature(curSign, atf=partRep)

      flags = doFlags()
      partRep = transUnEsc(part)
      partRepR = nice(partRep)
      if flags:
        cv.feature(curSign, flags=flags)

      fallenThrough = False

      for x in [1]:
        match = numeralRe.match(part)
        if match:
          quantity = match.group(1)
          qpart = match.group(2)
          qpartRep = transUnEsc(qpart)
          qpartRepR = nice(qpartRep)
          qpartRepU = uni(qpartRep)
          if qpartRep.islower():
            reading = qpartRep
            readingR = qpartRepR
            readingU = qpartRepU
          else:
            grapheme = qpartRep
            graphemeR = qpartRepR
            graphemeU = qpartRepU

          if quantity == 'n':
            fraction = None
            repeat = -1
            sym = f'n({qpartRep})'
            symR = f'n({qpartRepR})'
            symU = f'n({qpartRepU})'
            cv.feature(curSign, repeat=repeat)
          elif div in quantity:
            fraction = transUnEsc(quantity)
            repeat = None
            sym = f'{fraction}({qpartRep})'
            symR = f'{fraction}({qpartRepR})'
            partRep = transUnEsc(part)
            partRepU = uni(partRep)
            symU = partRepU
            cv.feature(curSign, fraction=fraction)
          else:
            repeat = int(quantity)
            fraction = None
            sym = f'{repeat}({partRep})'
            symR = f'{repeat}({partRepR})'
            partRep = transUnEsc(part)
            partRepU = uni(partRep)
            symU = partRepU
            cv.feature(curSign, repeat=repeat)

          cv.feature(
              curSign,
              type='numeral',
              sym=sym,
              symr=symR,
              symu=symU,
          )
          break

        match = withGraphemeRe.search(part)
        if match:
          part = match.group(1)
          operator = match.group(2)
          grapheme = match.group(3)
          flags = doFlags()
          if flags:
            cv.feature(curSign, flags=flags)

          partRep = transUnEsc(part)
          partRepR = nice(partRep)
          partRepU = uni(partRep)
          grapheme = transUnEsc(grapheme)
          graphemeR = nice(grapheme)
          graphemeU = uni(grapheme)
          operator = transUnEsc(operator)

          reading = partRep
          readingR = partRepR
          readingU = partRepU
          op = '=' if operator == '!' else liga if operator == 'x' else operator
          opR = op.replace('x', 'ₓ')
          sym = f'{reading}{operator}{grapheme}'
          symR = f'{readingR}{op}{graphemeR}'
          symU = f'{readingU}{op}{graphemeU}'

          cv.feature(
              curSign,
              type='complex',
              operator=operator,
              operatorr=opR,
              operatoru=op,
              sym=sym,
              symr=symR,
              symu=symU,
          )
          break

        partRepU = uni(partRep)

        if part == '':
          errors['sign: empty (after flags)'][src].add((i, line, pNum, transUnEsc(origPart)))
          cv.feature(curSign, type='empty')
          break

        if part == ellips:
          cv.feature(curSign, type='ellipsis')
          grapheme = partRep
          graphemeR = partRepR
          graphemeU = partRepU
          sym = '...'
          symR = ellips
          symU = ellips
          break

        if part in unknownSet:
          cv.feature(curSign, type='unknown')
          if partRep.islower():
            reading = partRep
            readingR = partRepR
            readingU = partRepU
          else:
            grapheme = partRep
            graphemeR = partRepR
            graphemeU = partRepU
          break

        if part.islower():
          reading = partRep
          readingR = partRepR
          readingU = partRepU
          cv.feature(curSign, type='reading')
          break

        if part.isupper():
          grapheme = partRep
          graphemeR = partRepR
          graphemeU = partRepU
          cv.feature(curSign, type='grapheme')
          break

        fallenThrough = True

      if fallenThrough:
        grapheme = partRep
        graphemeR = partRepR
        graphemeU = partRepU
        cv.feature(curSign, type='other')
        msg = 'mixed case' if part.isalnum() else 'strange grapheme'
        errors[f'sign: {msg}'][src].add((i, line, pNum, transUnEsc(origPart)))

      if part != '':
        if sym is None:
          sym = partRep
          symR = partRepR
          symU = partRepU
        if sym:
          cv.feature(curSign, sym=sym, symr=symR, symu=symU)

        clusterClasses = []
        for (cab, cae, cob, coe, ctp) in clusterChars:
          if cv.get(ctp, curSign):
            clusterClasses.append(ctp)
        clusterClasses = ' '.join(clusterClasses)

        if reading:
          cv.feature(curSign, reading=reading, readingr=readingR, readingu=readingU)
        if grapheme:
          cv.feature(curSign, grapheme=grapheme, graphemer=graphemeR, graphemeu=graphemeU)

      return (sym, symR, symU)

    def getParts(word):
      origWord = word

      parts = []
      curPart = ''
      inSign = False
      endSign = False
      endPart = False

      while word:
        inCase = True
        if word.startswith('x'):
          c = 'x'
          word = word[1:]
        elif word.startswith(ellips):
          c = ellips
          word = word[1:]
        else:
          match = numeralRe.match(word) or withGraphemeRe.match(word)
          if match:
            c = match.group(0)
            lc = len(c)
            word = word[lc:]
          else:
            inCase = False
        if inCase:
          if endPart or endSign:
            parts.append((curPart, ''))
            curPart = c
            endPart = False
            endSign = False
          else:
            curPart += c
          inSign = True
          endSign = True
          continue

        c = word[0]
        if c == '-' or c in operatorSet:
          if inSign or len(parts) == 0:
            parts.append((curPart, c))
          else:
            (prevPart, prevAfter) = parts[-1]
            parts[-1] = (prevPart + curPart, prevAfter + c)
            errors[f'sign: {c} after no sign'][src].add(
                (i, line, pNum, transUnEsc(curPart))
            )
          curPart = ''
          inSign = False
          endSign = False
          endPart = False
        elif c in clusterCharsB:
          if inSign:
            parts.append((curPart, ''))
            curPart = c
            inSign = False
            endSign = False
            endPart = False
          else:
            curPart += c
        elif c in clusterCharsE:
          curPart += c
          if inSign:
            endPart = True
        elif c in flagging:
          if inSign and not endPart:
            curPart += c
          elif not inSign and not endPart:
            errors[f'sign: flag not attached to sign (ignored)'][src].add(
                (i, line, pNum, transUnEsc(curPart))
            )
          elif inSign:
            errors[f'sign: flag attached to cluster (applied to sign instead)'][src].add(
                (i, line, pNum, transUnEsc(curPart))
            )
            curPart += c
          else:
            errors[f'sign: flag after cluster chars (ignored)'][src].add(
                (i, line, pNum, transUnEsc(curPart))
            )
        else:
          if endPart or endSign:
            parts.append((curPart, ''))
            curPart = c
            endSign = False
            endPart = False
          else:
            curPart += c
          inSign = True
        word = word[1:]

      if curPart:
        if inSign:
          parts.append((curPart, ''))
        else:
          if len(parts):
            parts[-1] += ((curPart, ''))
          else:
            errors[f'sign: empty (in word)'][src].add(
                (i, line, pNum, f'{transUnEsc(curPart)} in {transUnEsc(origWord)}')
            )
            parts = [(curPart, '')]
      return parts

    # the outer loop of the lineData sub generator

    lWords = len(words)

    for (w, word) in enumerate(words):
      curWord = cv.node('word')
      if not inlineCommentRe.match(word):
        cv.feature(curWord, atf=transUnEsc(word))

      parts = getParts(word)
      lParts = len(parts)
      sym = ''
      symR = ''
      symU = ''

      after = None

      for p in range(len(parts)):
        (part, afterPart) = parts[p]

        cAtfStart = clusterChar(True)
        signStart()
        cAtfEnd = clusterChar(False)
        after = afterPart + (
            ' ' if p == lParts - 1 and w != lWords - 1 else ''
        )
        afterr = adjacent if p < lParts - 1 and afterPart == '' else after
        afteru = afterPart.replace('-', '')
        (symPart, symPartR, symPartU) = signData(cAtfStart, cAtfEnd, after, afterr)
        sym += f'{symPart}{after or adjacent}'
        symR += f'{symPartR}{after}'
        symU += f'{symPartU}{afteru}'
      if sym:
        cv.feature(
            curWord,
            sym=sym.strip(f'{adjacent} -'),
            symr=symR.strip(' -'),
            symu=symU.strip(' '),
        )
      if after:
        cv.feature(curWord, after=after)

      cv.terminate(curWord)
      if not cv.linked(curWord):
        errors[f'word: empty'][src].add((i, line, pNum, None))
      curWord = None

    # terminating all unfinished clusters

    clusterEndMakeSure()

  # the outer loop of the corpus generator

  for src in sorted(sources):
    path = f'{IN_DIR}/{src}.txt'
    print(f'Reading source {src}')

    transLine = None

    with open(path) as fh:
      i = 0
      for line in fh:
        i += 1

        if not line:
          continue

        line = line.strip()

        if not line:
          continue

        if line[0].isupper():
          metaParts = line.split(':', 1)
          if len(metaParts) == 1:
            continue
          (metaKey, metaValue) = metaParts
          metaFeature = META_FIELDS.get(metaKey, None)
          if not metaFeature:
            continue
          metaValue = metaValue.strip()
          if not metaValue:
            continue
          curMeta[metaFeature[0]] = metaValue
          continue

        isDoc = line.startswith('&')

        if isDoc:
          if len(line) > 1 and line[1] == 'P':
            transLine = None
            documentStart()
          else:
            errors[f'atf: stray & replaced by $'][src].add((i, line, pNum, None))
            commentInsert()

        isMeta = line.startswith('#')

        if not isMeta or not line.startswith('#tr.'):
          (msg, lineMsg, line) = checkSane(line)
          if msg:
            errors[f'atf: illegal character(s)'][src].add((i, lineMsg, pNum, msg))

        if isDoc:
          continue

        if skip:
          continue

        if isMeta:
          processMeta()
          continue

        isStruct = line.startswith('@')

        if isStruct:
          processAtSpec()
          continue

        if curFace is None:
          faceStart(None)

        isComment = line.startswith('$')

        if isComment:
          commentInsert()
          continue

        isNumbered = transRe.match(line)
        if isNumbered:
          ln = isNumbered.group(1)
          recentTrans = isNumbered.group(2)

        else:
          errors[f'line: not numbered'][src].add((i, line, pNum, None))
          ln = ''
          recentTrans = line
          continue

        recentTrans = transEsc(recentTrans)
        cos = clusterCheck(recentTrans)
        if cos:
          cosRep = ' '.join(sorted(set(cos)))
          errors[f'cluster: not escaped {cosRep}'][src].add((i, line, pNum, None))

        recentTrans = numeralBackRe.sub(bracketBackRepl, recentTrans)
        recentTrans = withGraphemeBackRe.sub(bracketBackRepl, recentTrans)

        lineStart(ln)
        transLine = curLine
        lineData()

      documentEnd()

      print(f'{src:<15} : {i:>4} : {pNum:<20}\r')

  print(f'\n{len(pNums)} documents in corpus')

  if unmapped:
    total = 0
    print(f'WARNING: {len(unmapped)} unmapped tokens')
    for (token, amount) in sorted(
        unmapped.items(),
        key=lambda x: (-x[1], x[0]),
    ):
      total += amount
      print(f'\t{token:<15} {amount:>5} x')
    print(f'\t{"Total unmapped":<15} {total:>5} x')
  if warnings:
    showDiags(warnings, 'WARNING')
  if errors:
    showDiags(errors, 'ERROR')


# TF LOADING (to test the generated TF)

def loadTf():
  TF = Fabric(locations=[OUT_DIR])
  allFeatures = TF.explore(silent=True, show=True)
  loadableFeatures = allFeatures['nodes'] + allFeatures['edges']
  api = TF.load(loadableFeatures, silent=False)
  if api:
    print(f'max node = {api.F.otype.maxNode}')
    print('Frequency of readings')
    print(api.F.reading.freqList()[0:20])
    print('Frequency of grapheme')
    print(api.F.grapheme.freqList()[0:20])


# MAIN

generateTf = len(sys.argv) == 1 or sys.argv[1] != '-notf'

print(f'ATF to TF converter for {REPO}')
print(f'ATF source version = {VERSION_SRC}')
print(f'TF  target version = {VERSION_TF}')
good = convert()

if generateTf and good:
  loadTf()

back to top

Software Heritage — Copyright (C) 2015–2025, The Software Heritage developers. License: GNU AGPLv3+.
The source code of Software Heritage itself is available on our development forge.
The source code files archived by Software Heritage are available under their own copyright and licenses.
Terms of use: Archive access, API— Content policy— Contact— JavaScript license information— Web API