https://github.com/annotation/text-fabric
Raw File
Tip revision: df244d89dd1006fb3ce6bea78e144271347c6bf7 authored by Dirk Roorda on 31 May 2018, 13:51:07 UTC
New minor release 4.3.2
Tip revision: df244d8
cunei.py
import os
import re
import operator
from glob import glob
from shutil import copyfile
from functools import reduce
from IPython.display import display, Markdown, HTML

from tf.fabric import Fabric

SOURCE = 'uruk'
SOURCE_FULL = 'Uruk IV-III'
VERSION = '1.0'
CORPUS = f'tf/{SOURCE}/{VERSION}'
CORPUS_FULL = f'{SOURCE_FULL} (v{VERSION})'
SOURCE_DIR = 'sources/cdli'
IMAGE_DIR = f'{SOURCE_DIR}/images'
TEMP_DIR = '_temp'
REPORT_DIR = 'reports'

LIMIT_SHOW = 100
LIMIT_TABLE = 2000

PHOTO_TO = '{}/tablets/photos'
PHOTO_EXT = 'jpg'

TABLET_TO = '{}/tablets/lineart'
IDEO_TO = '{}/ideographs/lineart'
LINEART_EXT = 'jpg'

LOCAL_DIR = 'cdli-imagery'

URL_GH = 'https://github.com'
URL_NB = 'http://nbviewer.jupyter.org/github'

URL_FORMAT = dict(
    tablet=dict(
        photo=f'https://cdli.ucla.edu/dl/photo/{{}}_d.{PHOTO_EXT}',
        lineart=f'https://cdli.ucla.edu/dl/lineart/{{}}_l.{LINEART_EXT}',
        main=(
            'https://cdli.ucla.edu/search/search_results.php?'
            'SearchMode=Text&ObjectID={}'
        ),
    ),
    ideograph=dict(
        lineart=(
            'https://cdli.ucla.edu/tools/SignLists'
            f'/protocuneiform/archsigns/{{}}.{LINEART_EXT}'
        ),
        main=(
            'https://cdli.ucla.edu/tools/SignLists'
            '/protocuneiform/archsigns.html'
        ),
    ),
)

FLAGS = (
    ('damage', '#'),
    ('remarkable', '!'),
    ('written', ('!(', ')')),
    ('uncertain', '?'),
)

OUTER_QUAD_TYPES = {'sign', 'quad'}

CLUSTER_BEGIN = {'[': ']', '<': '>', '(': ')'}
CLUSTER_END = {y: x for (x, y) in CLUSTER_BEGIN.items()}
CLUSTER_KIND = {'[': 'uncertain', '(': 'properName', '<': 'supplied'}
CLUSTER_BRACKETS = dict((name, (bOpen, CLUSTER_BEGIN[bOpen]))
                        for (bOpen, name) in CLUSTER_KIND.items())

FLEX_STYLE = (
    'display: flex;'
    'flex-flow: row nowrap;'
    'justify-content: flex-start;'
    'align-items: center;'
    'align-content: flex-start;'
)
CAPTION_STYLE = dict(
    top=(
        'display: flex;'
        'flex-flow: column-reverse nowrap;'
        'justify-content: space-between;'
        'align-items: center;'
        'align-content: space-between;'
    ),
    bottom=(
        'display: flex;'
        'flex-flow: column nowrap;'
        'justify-content: space-between;'
        'align-items: center;'
        'align-content: space-between;'
    ),
    left=(
        'display: flex;'
        'flex-flow: row-reverse nowrap;'
        'justify-content: space-between;'
        'align-items: center;'
        'align-content: space-between;'
    ),
    right=(
        'display: flex;'
        'flex-flow: row nowrap;'
        'justify-content: space-between;'
        'align-items: center;'
        'align-content: space-between;'
    ),
)

ITEM_STYLE = ('padding: 0.5rem;')

SIZING = {'height', 'width'}

COMMENT_TYPES = set(
    '''
    tablet
    face
    column
    line
    case
'''.strip().split()
)

CLUSTER_TYPES = dict(
    uncertain='?',
    properName='=',
    supplied='&gt;',
)

ATF_TYPES = set('''
    sign
    quad
    cluster
'''.strip().split())

CSS = '''
<style>
.pnum {
    font-family: sans-serif;
    font-size: small;
    font-weight: bold;
    color: #444444;
}
.nd {
    font-family: monospace;
    font-size: x-small;
    color: #999999;
}
.meta {
    display: flex;
    justify-content: flex-start;
    align-items: flex-start;
    align-content: flex-start;
    flex-flow: row nowrap;
}
.features,.comments {
    display: flex;
    justify-content: flex-start;
    align-items: flex-start;
    align-content: flex-start;
    flex-flow: column nowrap;
}
.children {
    display: flex;
    justify-content: flex-start;
    align-items: flex-start;
    align-content: flex-start;
    border: 0;
    background-color: #ffffff;
}
.children.tablet,.children.face {
    flex-flow: row nowrap;
}
.children.column {
    align-items: stretch;
    flex-flow: column nowrap;
}
.children.line,.children.case {
    align-items: stretch;
    flex-flow: row nowrap;
}
.children.caseh,.children.trminalh {
    flex-flow: row nowrap;
}
.children.casev,.children.trminalv {
    flex-flow: column nowrap;
}
.children.trminal {
    flex-flow: row nowrap;
}
.children.cluster {
    flex-flow: row wrap;
}
.children.quad {
    flex-flow: row wrap;
}
.children.sign {
    flex-flow: column nowrap;
}
.contnr {
    width: fit-content;
}
.contnr.tablet,.contnr.face,.contnr.column,
.contnr.line,.contnr.case,.contnr.trminal,
.contnr.comment,
.contnr.cluster,
.contnr.quad,.contnr.sign {
    display: flex;
    justify-content: flex-start;
    align-items: flex-start;
    align-content: flex-start;
    flex-flow: column nowrap;
    background: #ffffff none repeat scroll 0 0;
    padding:  0.5em 0.1em 0.1em 0.1em;
    margin: 0.8em 0.1em 0.1em 0.1em;
    border-radius: 0.2em;
    border-style: solid;
    border-width: 0.2em;
    font-size: small;
}
.contnr.tablet,.contnr.face,.contnr.column {
    border-color: #bb8800;
}
.contnr.line,.contnr.case,.contnr.trminal {
    border-color: #0088bb;
}
.contnr.cluster {
    flex-flow: row wrap;
    border: 0;
}
.contnr.sign,.contnr.quad {
    border-color: #bbbbbb;
}
.contnr.comment {
    background-color: #ffddaa;
    border: 0.2em solid #eecc99;
    border-radius: 0.3em;
}
.contnr.hl {
    background-color: #ffee66;
}
.lbl.tablet,.lbl.face,.lbl.column,
.lbl.line,.lbl.case,.lbl.trminal,
.lbl.comment,
.lbl.cluster,
.lbl.quad,.lbl.sign {
    margin-top: -1.2em;
    margin-left: 1em;
    background: #ffffff none repeat scroll 0 0;
    padding: 0 0.3em;
    border-style: solid;
    font-size: small;
    color: #0000cc;
    display: block;
}
.lbl.tablet,.lbl.face,.lbl.column {
    border-color: #bb8800;
    border-width: 0.2em;
    border-radius: 0.2em;
    color: #bb8800;
}
.lbl.line,.lbl.case,.lbl.trminal {
    border-color: #0088bb;
    border-width: 0.2em;
    border-radius: 0.2em;
    color: #0088bb;
}
.lbl.comment {
    border-color: #eecc99;
    border-width: 0.2em;
    border-radius: 0.2em;
    color: #eecc99;
    background-color: #ffddaa none repeat scroll 0 0;
}
.lbl.clusterB,.lbl.clusterE {
    padding:  0.5em 0.1em 0.1em 0.1em;
    margin: 0.8em 0.1em 0.1em 0.1em;
    color: #888844;
    font-size: x-small;
}
.lbl.clusterB {
    border-left: 0.3em solid #cccc99;
    border-right: 0;
    border-top: 0;
    border-bottom: 0;
    border-radius: 1em;
}
.lbl.clusterE {
    border-left: 0;
    border-right: 0.3em solid #cccc99;
    border-top: 0;
    border-bottom: 0;
    border-radius: 1em;
}
.lbl.quad,.lbl.sign {
    border-color: #bbbbbb;
    border-width: 0.1em;
    border-radius: 0.1em;
    color: #bbbbbb;
}
.op {
    padding:  0.5em 0.1em 0.1em 0.1em;
    margin: 0.8em 0.1em 0.1em 0.1em;
    font-family: monospace;
    font-size: x-large;
    font-weight: bold;
}
.name {
    font-family: monospace;
    font-size: medium;
    color: #0000bb;
}
.period {
    font-family: monospace;
    font-size: medium;
    font-weight: bold;
    color: #0000bb;
}
.excavation {
    font-family: monospace;
    font-size: medium;
    font-style: italic;
    color: #779900;
}
.text {
    font-family: sans-serif;
    font-size: x-small;
    color: #000000;
}
.srcLn {
    font-family: monospace;
    font-size: medium;
    color: #000000;
}
.srcLnNum {
    font-family: monospace;
    font-size: x-small;
    color: #0000bb;
}
</style>
'''


def dm(md):
    display(Markdown(md))


def _outLink(text, href, title=None):
    titleAtt = '' if title is None else f' title="{title}"'
    return f'<a target="_blank" href="{href}"{titleAtt}>{text}</a>'


def _wrapLink(piece, objectType, kind, identifier, pos='bottom', caption=None):
    title = (
        'to CDLI main page for this item'
        if kind == 'main' else f'to higher resolution {kind} on CDLI'
    )
    url = URL_FORMAT.get(objectType, {}).get(kind, '').format(identifier)

    result = _outLink(piece, url, title=title) if url else piece
    if caption:
        result = (
            f'<div style="{CAPTION_STYLE[pos]}">'
            f'<div>{result}</div>'
            f'<div>{caption}</div>'
            '</div>'
        )
    return result


class Cunei(object):
    def __init__(self, repoBase, repoRel, name):
        repoBase = os.path.expanduser(repoBase)
        repo = f'{repoBase}/{repoRel}'
        self.repo = repo
        self.version = VERSION
        self.sourceDir = f'{repo}/{SOURCE_DIR}'
        self.imageDir = f'{repo}/{IMAGE_DIR}'
        self._imagery = {}
        self.corpus = f'{repo}/{CORPUS}'
        self.corpusFull = CORPUS_FULL
        TF = Fabric(locations=[self.corpus], modules=[''], silent=True)
        api = TF.load('', silent=True)
        allFeatures = TF.explore(silent=True, show=True)
        loadableFeatures = allFeatures['nodes'] + allFeatures['edges']
        TF.load(loadableFeatures, add=True, silent=True)
        self.api = api
        self._getImagery()
        self.cwd = os.getcwd()
        cwdPat = re.compile(f'^.*/github/([^/]+)/([^/]+)((?:/.+)?)$', re.I)
        cwdRel = cwdPat.findall(self.cwd)
        if cwdRel:
            (thisOrg, thisRepo, thisPath) = cwdRel[0]
            onlineTail = (
                f'{thisOrg}/{thisRepo}'
                f'/blob/master{thisPath}/{name}.ipynb'
            )
        else:
            cwdRel = None
        nbLink = (
            None
            if name is None or cwdRel is None else f'{URL_NB}/{onlineTail}'
        )
        ghLink = (
            None
            if name is None or cwdRel is None else f'{URL_GH}/{onlineTail}'
        )
        docLink = f'https://github.com/{repoRel}/blob/master/docs'
        extraLink = f'https://dans-labs.github.io/text-fabric/Api/Cunei/'
        dataLink = _outLink(
            self.corpusFull, f'{docLink}/about.md', 'provenance of this corpus'
        )
        featureLink = _outLink(
            'Feature docs', f'{docLink}/transcription.md',
            'feature documentation'
        )
        cuneiLink = _outLink('Cunei API', extraLink, 'cunei api documentation')
        tfLink = _outLink(
            f'Text-Fabric API {api.TF.version}',
            'https://dans-labs.github.io/text-fabric/Api/General/',
            'text-fabric-api'
        )
        tfsLink = _outLink(
            'Search Reference',
            (
                'https://dans-labs.github.io/text-fabric/Api/General/'
                '#search-templates'
            ),
            'Search Templates Introduction and Reference'
        )
        dm(
            '**Documentation:**'
            f' {dataLink} {featureLink} {cuneiLink} {tfLink} {tfsLink}'
        )
        if nbLink:
            dm(
                f'''
This notebook online:
{_outLink('NBViewer', nbLink)}
{_outLink('GitHub', ghLink)}
'''
            )
        thisRepoDir = (
            None if cwdRel is None else f'{repoBase}/{thisOrg}/{thisRepo}'
        )
        self.tempDir = (
            None if cwdRel is None else f'{thisRepoDir}/{TEMP_DIR}'
        )
        self.reportDir = (
            None if cwdRel is None else f'{thisRepoDir}/{REPORT_DIR}'
        )
        for cdir in (self.tempDir, self.reportDir):
            if cdir:
                os.makedirs(cdir, exist_ok=True)

        self._loadCSS()

    def getSource(self, node, nodeType=None, lineNumbers=False):
        api = self.api
        F = api.F
        L = api.L
        sourceLines = []
        lineNumber = ''
        if lineNumbers:
            lineNo = F.srcLnNum.v(node)
            lineNumber = f'{lineNo:>5} ' if lineNo else ''
        sourceLine = F.srcLn.v(node)
        if sourceLine:
            sourceLines.append(f'{lineNumber}{sourceLine}')
        for child in L.d(node, nodeType):
            sourceLine = F.srcLn.v(child)
            lineNumber = ''
            if sourceLine:
                if lineNumbers:
                    lineNumber = f'{F.srcLnNum.v(child):>5}: '
                sourceLines.append(f'{lineNumber}{sourceLine}')
        return sourceLines

    def atfFromSign(self, n, flags=False):
        F = self.api.F
        Fs = self.api.Fs
        if F.otype.v(n) != 'sign':
            return '«no sign»'

        grapheme = F.grapheme.v(n)
        if grapheme == '…':
            grapheme = '...'
        primeN = F.prime.v(n)
        prime = ("'" * primeN) if primeN else ''

        variantValue = F.variant.v(n)
        variant = f'~{variantValue}' if variantValue else ''

        modifierValue = F.modifier.v(n)
        modifier = f'@{modifierValue}' if modifierValue else ''
        modifierInnerValue = F.modifierInner.v(n)
        modifierInner = f'@{modifierInnerValue}' if modifierInnerValue else ''

        modifierFirst = F.modifierFirst.v(n)

        repeat = F.repeat.v(n)
        if repeat is None:
            varmod = (
                f'{modifier}{variant}'
                if modifierFirst else f'{variant}{modifier}'
            )
            result = f'{grapheme}{prime}{varmod}'
        else:
            if repeat == -1:
                repeat = 'N'
            varmod = (
                f'{modifierInner}{variant}'
                if modifierFirst else f'{variant}{modifierInner}'
            )
            result = f'{repeat}({grapheme}{prime}{varmod}){modifier}'

        if flags:
            for (flag, char) in FLAGS:
                value = Fs(flag).v(n)
                if value:
                    if type(char) is tuple:
                        result += f'{char[0]}{value}{char[1]}'
                    else:
                        result += char

        return result

    def atfFromQuad(self, n, flags=False, outer=True):
        api = self.api
        E = api.E
        F = api.F
        Fs = api.Fs
        if F.otype.v(n) != 'quad':
            return '«no quad»'

        children = E.sub.f(n)
        if not children or len(children) < 2:
            return f'«quad with less than two sub-quads»'
        result = ''
        for child in children:
            nextChildren = E.op.f(child)
            if nextChildren:
                op = nextChildren[0][1]
            else:
                op = ''
            childType = F.otype.v(child)

            thisResult = (
                self.atfFromQuad(child, flags=flags, outer=False) if
                childType == 'quad' else self.atfFromSign(child, flags=flags)
            )
            result += f'{thisResult}{op}'

        variant = F.variantOuter.v(n)
        variantStr = f'~{variant}' if variant else ''

        flagStr = ''
        if flags:
            for (flag, char) in FLAGS:
                value = Fs(flag).v(n)
                if value:
                    if type(char) is tuple:
                        flagStr += f'{char[0]}{value}{char[1]}'
                    else:
                        flagStr += char

        if variant:
            if flagStr:
                if outer:
                    result = f'|({result}){variantStr}|{flagStr}'
                else:
                    result = f'(({result}){variantStr}){flagStr}'
            else:
                if outer:
                    result = f'|({result}){variantStr}|'
                else:
                    result = f'({result}){variantStr}'
        else:
            if flagStr:
                if outer:
                    result = f'|{result}|{flagStr}'
                else:
                    result = f'({result}){flagStr}'
            else:
                if outer:
                    result = f'|{result}|'
                else:
                    result = f'({result})'

        return result

    def atfFromOuterQuad(self, n, flags=False):
        api = self.api
        F = api.F
        nodeType = F.otype.v(n)
        if nodeType == 'sign':
            return self.atfFromSign(n, flags=flags)
        elif nodeType == 'quad':
            return self.atfFromQuad(n, flags=flags, outer=True)
        else:
            return '«no outer quad»'

    def atfFromCluster(self, n, seen=None):
        api = self.api
        F = api.F
        E = api.E
        if F.otype.v(n) != 'cluster':
            return '«no cluster»'

        typ = F.type.v(n)
        (bOpen, bClose) = CLUSTER_BRACKETS[typ]
        if bClose == ')':
            bClose = ')a'
        children = api.sortNodes(E.sub.f(n))

        if seen is None:
            seen = set()
        result = []
        for child in children:
            if child in seen:
                continue
            childType = F.otype.v(child)

            thisResult = (
                self.atfFromCluster(child, seen=seen) if childType == 'cluster'
                else self.atfFromQuad(child, flags=True) if childType == 'quad'
                else self.atfFromSign(child, flags=True)
                if childType == 'sign' else None
            )
            seen.add(child)
            if thisResult is None:
                dm(
                    f'TF: child of cluster has type {childType}:'
                    ' should not happen'
                )
            result.append(thisResult)
        return f'{bOpen}{" ".join(result)}{bClose}'

    def getOuterQuads(self, n):
        api = self.api
        F = api.F
        E = api.E
        L = api.L
        return [
            quad for quad in L.d(n)
            if (
                F.otype.v(quad) in OUTER_QUAD_TYPES and
                all(F.otype.v(parent) != 'quad' for parent in E.sub.t(quad))
            )
        ]

    def lineFromNode(self, n):
        api = self.api
        F = api.F
        L = api.L
        caseOrLineUp = [m for m in L.u(n) if F.terminal.v(m)]
        return caseOrLineUp[0] if caseOrLineUp else None

    def nodeFromCase(self, passage):
        api = self.api
        F = api.F
        L = api.L
        T = api.T
        section = passage[0:2]
        caseNum = passage[2].replace('.', '')
        column = T.nodeFromSection(section)
        if column is None:
            return None
        casesOrLines = [
            c for c in L.d(column)
            if F.terminal.v(c) and F.number.v(c) == caseNum
        ]
        if not casesOrLines:
            return None
        return casesOrLines[0]

    def caseFromNode(self, n):
        api = self.api
        F = api.F
        T = api.T
        L = api.L
        section = T.sectionFromNode(n)
        if section is None:
            return None
        nodeType = F.otype.v(n)
        if nodeType in {'sign', 'quad', 'cluster', 'case'}:
            if nodeType == 'case':
                caseNumber = F.number.v(n)
            else:
                caseOrLine = [m for m in L.u(n) if F.terminal.v(m)][0]
                caseNumber = F.number.v(caseOrLine)
            return (section[0], section[1], caseNumber)
        else:
            return section

    # this is a slow implementation!
    def _casesByLevelM(self, lev, terminal=True):
        api = self.api
        E = api.E
        F = api.F
        if lev == 0:
            results = F.otype.s('line')
        else:
            parents = self._casesByLevelM(lev - 1, terminal=False)
            results = reduce(
                operator.add,
                [
                    tuple(s for s in E.sub.f(p) if F.otype.v(s) == 'case')
                    for p in parents
                ],
                (),
            )
        return (
            tuple(r for r in results if F.terminal.v(r))
            if terminal else results
        )

    # this is a fast implementation!
    def _casesByLevelS(self, lev, terminal=True):
        api = self.api
        S = api.S
        sortNodes = api.sortNodes
        query = ''
        for i in range(lev + 1):
            extra = (' terminal' if i == lev and terminal else '')
            nodeType = 'line' if i == 0 else 'case'
            query += ('  ' * i) + f'w{i}:{nodeType}{extra}\n'
        for i in range(lev):
            query += f'w{i} -sub> w{i+1}\n'
        results = list(S.search(query))
        return sortNodes(tuple(r[-1] for r in results))

    def casesByLevel(self, lev, terminal=True):
        api = self.api
        F = api.F
        return ((
            tuple(c for c in F.otype.s('line') if F.terminal.v(c))
            if terminal else F.otype.s('line')
        ) if lev == 0 else (
            tuple(c for c in F.depth.s(lev) if F.terminal.v(c))
            if terminal else F.depth.s(lev)
        ))

    def lineart(self, ns, key=None, asLink=False, withCaption=None, **options):
        return self._getImages(
            ns,
            kind='lineart',
            key=key,
            asLink=asLink,
            withCaption=withCaption,
            **options
        )

    def photo(self, ns, key=None, asLink=False, withCaption=None, **options):
        return self._getImages(
            ns,
            kind='photo',
            key=key,
            asLink=asLink,
            withCaption=withCaption,
            **options
        )

    def imagery(self, objectType, kind):
        return set(self._imagery.get(objectType, {}).get(kind, {}))

    def cdli(self, n, linkText=None, asString=False):
        (nType, objectType, identifier) = self._imageClass(n)
        if linkText is None:
            linkText = identifier
        result = _wrapLink(linkText, objectType, 'main', identifier)
        if asString:
            return result
        else:
            display(HTML(result))

    def tabletLink(self, t, text=None, asString=False):
        api = self.api
        L = api.L
        F = api.F
        if type(t) is str:
            pNum = t
        else:
            n = t if F.otype.v(t) == 'tablet' else L.u(t, otype='tablet')[0]
            pNum = F.catalogId.v(n)

        title = ('to CDLI main page for this tablet')
        linkText = pNum if text is None else text
        url = URL_FORMAT['tablet']['main'].format(pNum)

        result = _outLink(linkText, url, title=title)
        if asString:
            return result
        display(HTML(result))

    def plain(
        self,
        n,
        linked=True,
        lineart=True,
        withNodes=False,
        lineNumbers=False,
        asString=False,
    ):
        api = self.api
        F = api.F

        nType = F.otype.v(n)
        markdown = ''
        nodeRep = f' *{n}* ' if withNodes else ''

        if nType in ATF_TYPES:
            isSign = nType == 'sign'
            isQuad = nType == 'quad'
            rep = (
                self.atfFromSign(n) if isSign else self.atfFromQuad(n)
                if isQuad else self.atfFromCluster(n)
            )
            if linked:
                rep = self.tabletLink(n, text=rep, asString=True)
            theLineart = ''
            if lineart:
                if isSign or isQuad:
                    width = '2em' if isSign else '4em'
                    height = '4em' if isSign else '6em'
                    theLineart = self._getImages(
                        n,
                        kind='lineart',
                        width=width,
                        height=height,
                        asString=True,
                        withCaption=False,
                        warning=False
                    )
                    theLineart = f' {theLineart}'
            markdown = (
                f'{rep}{nodeRep}{theLineart}'
            ) if theLineart else f'{rep}{nodeRep}'
        elif nType == 'comment':
            rep = F.type.v(n)
            if linked:
                rep = self.tabletLink(n, text=rep, asString=True)
            markdown = f'{rep}{nodeRep}: {F.text.v(n)}'
        elif nType == 'line' or nType == 'case':
            rep = f'{nType} {F.number.v(n)}'
            if linked:
                rep = self.tabletLink(n, text=rep, asString=True)
            theLine = ''
            if lineNumbers:
                if F.terminal.v(n):
                    theLine = f' @{F.srcLnNum.v(n)} '
            markdown = f'{rep}{nodeRep}{theLine}'
        elif nType == 'column':
            rep = f'{nType} {F.number.v(n)}'
            if linked:
                rep = self.tabletLink(n, text=rep, asString=True)
            theLine = ''
            if lineNumbers:
                theLine = f' @{F.srcLnNum.v(n)} '
            markdown = f'{rep}{nodeRep}{theLine}'
        elif nType == 'face':
            rep = f'{nType} {F.type.v(n)}'
            if linked:
                rep = self.tabletLink(n, text=rep, asString=True)
            theLine = ''
            if lineNumbers:
                theLine = f' @{F.srcLnNum.v(n)} '
            markdown = f'{rep}{nodeRep}{theLine}'
        elif nType == 'tablet':
            rep = f'{nType} {F.catalogId.v(n)}'
            if linked:
                rep = self.tabletLink(n, text=rep, asString=True)
            theLine = ''
            if lineNumbers:
                theLine = f' @{F.srcLnNum.v(n)} '
            markdown = f'{rep}{nodeRep}{theLine}'

        if asString:
            return markdown
        dm(markdown)

    def plainTuple(
        self,
        ns,
        seqNumber,
        linked=1,
        lineart=True,
        withNodes=False,
        lineNumbers=False,
        asString=False,
    ):
        markdown = [str(seqNumber)]
        for (i, n) in enumerate(ns):
            markdown.append(
                self.plain(
                    n,
                    linked=i == linked - 1,
                    lineart=lineart,
                    withNodes=withNodes,
                    lineNumbers=lineNumbers,
                    asString=True,
                ).replace('|', '&#124;')
            )
        markdown = '|'.join(markdown)
        if asString:
            return markdown
        api = self.api
        F = api.F
        head = ['n | ' + (' | '.join(F.otype.v(n) for n in ns))]
        head.append(' | '.join('---' for n in range(len(ns) + 1)))
        head.append(markdown)

        dm('\n'.join(head))

    def table(
        self,
        results,
        start=None,
        end=None,
        linked=1,
        lineart=True,
        withNodes=False,
        lineNumbers=False,
        asString=False,
    ):
        api = self.api
        F = api.F

        collected = []
        if start is None:
            start = 1
        i = -1
        rest = 0
        if not hasattr(results, '__len__'):
            if end is None or end - start + 1 > LIMIT_SHOW:
                end = start - 1 + LIMIT_SHOW
            for result in results:
                i += 1
                if i < start - 1:
                    continue
                if i >= end:
                    break
                collected.append((i + 1, result))
        else:
            typeResults = type(results)
            if typeResults is set or typeResults is frozenset:
                results = sorted(results)
            if end is None or end > len(results):
                end = len(results)
            rest = 0
            if end - (start - 1) > LIMIT_TABLE:
                rest = end - (start - 1) - LIMIT_TABLE
                end = start - 1 + LIMIT_TABLE
            for i in range(start - 1, end):
                collected.append((i + 1, results[i]))

        if len(collected) == 0:
            return
        (firstSeq, firstResult) = collected[0]
        nColumns = len(firstResult)
        markdown = ['n | ' + (' | '.join(F.otype.v(n) for n in firstResult))]
        markdown.append(' | '.join('---' for n in range(nColumns + 1)))
        for (seqNumber, ns) in collected:
            markdown.append(
                self.plainTuple(
                    ns,
                    seqNumber,
                    linked=linked,
                    lineart=lineart,
                    withNodes=withNodes,
                    lineNumbers=lineNumbers,
                    asString=True,
                )
            )
        markdown = '\n'.join(markdown)
        if asString:
            return markdown
        dm(markdown)
        if rest:
            dm(
                f'**{rest} more results skipped**'
                f' because we show a maximum of'
                f' {LIMIT_TABLE} results at a time'
            )

    def pretty(
        self,
        n,
        lineart=True,
        withNodes=False,
        lineNumbers=False,
        suppress=set(),
        highlights={},
    ):
        html = []
        if type(highlights) is set:
            highlights = {m: '' for m in highlights}
        self._pretty(
            n,
            True,
            html,
            seen=set(),
            lineart=lineart,
            withNodes=withNodes,
            lineNumbers=lineNumbers,
            suppress=suppress,
            highlights=highlights,
        )
        htmlStr = '\n'.join(html)
        display(HTML(htmlStr))

    def prettyTuple(
        self,
        ns,
        seqNumber,
        item='Result',
        lineart=True,
        withNodes=False,
        lineNumbers=False,
        suppress=set(),
        colorMap=None,
        highlights=None,
    ):
        api = self.api
        L = api.L
        F = api.F
        sortNodes = api.sortNodes
        tablets = set()
        newHighlights = {}
        if type(highlights) is set:
            highlights = {m: '' for m in highlights}
        if highlights:
            newHighlights.update(highlights)
        for (i, n) in enumerate(ns):
            thisHighlight = None
            if highlights is not None:
                thisHighlight = highlights.get(n, None)
            elif colorMap is not None:
                thisHighlight = colorMap.get(i + 1, None)
            else:
                thisHighlight = ''

            nType = F.otype.v(n)
            if nType == 'tablet':
                tablets.add(n)
            else:
                t = L.u(n, otype='tablet')[0]
                tablets.add(t)
                if thisHighlight is not None:
                    newHighlights[n] = thisHighlight
        dm(f'''
##### {item} {seqNumber}
''')
        for t in sortNodes(tablets):
            self.pretty(
                t,
                lineart=lineart,
                withNodes=withNodes,
                lineNumbers=lineNumbers,
                suppress=suppress,
                highlights=newHighlights,
            )

    def show(
        self,
        results,
        condensed=True,
        start=None,
        end=None,
        lineart=True,
        withNodes=False,
        lineNumbers=False,
        suppress=set(),
        colorMap=None,
        highlights=None,
    ):
        newHighlights = None
        if type(highlights) is set:
            highlights = {m: '' for m in highlights}
        if highlights:
            newHighlights = {}
            newHighlights.update(highlights)
        if condensed:
            if colorMap is not None:
                if newHighlights is None:
                    newHighlights = {}
                for ns in results:
                    for (i, n) in enumerate(ns):
                        thisHighlight = None
                        if highlights is not None:
                            thisHighlight = highlights.get(n, None)
                        elif colorMap is not None:
                            thisHighlight = colorMap.get(i + 1, None)
                        else:
                            thisHighlight = ''
                        newHighlights[n] = thisHighlight

            results = self._condense(results)

        if start is None:
            start = 1
        i = -1
        if not hasattr(results, '__len__'):
            if end is None or end - start + 1 > LIMIT_SHOW:
                end = start - 1 + LIMIT_SHOW
            for result in results:
                i += 1
                if i < start - 1:
                    continue
                if i >= end:
                    break
                self.prettyTuple(
                    result,
                    i + 1,
                    item='Tablet' if condensed else 'Result',
                    lineart=lineart,
                    withNodes=withNodes,
                    lineNumbers=lineNumbers,
                    suppress=suppress,
                    colorMap=colorMap,
                    highlights=newHighlights,
                )
        else:
            if end is None or end > len(results):
                end = len(results)
            rest = 0
            if end - (start - 1) > LIMIT_SHOW:
                rest = end - (start - 1) - LIMIT_SHOW
                end = start - 1 + LIMIT_SHOW
            for i in range(start - 1, end):
                self.prettyTuple(
                    results[i],
                    i + 1,
                    item='Tablet' if condensed else 'Result',
                    lineart=lineart,
                    withNodes=withNodes,
                    lineNumbers=lineNumbers,
                    suppress=suppress,
                    colorMap=colorMap,
                    highlights=newHighlights,
                )
            if rest:
                dm(
                    f'**{rest} more results skipped**'
                    f' because we show a maximum of'
                    f' {LIMIT_SHOW} results at a time'
                )

    def search(self, query, silent=False, sets=None, shallow=False):
        api = self.api
        S = api.S
        results = S.search(query, sets=sets, shallow=shallow)
        if not shallow:
            results = sorted(results)
        nResults = len(results)
        plural = '' if nResults == 1 else 's'
        if not silent:
            print(f'{nResults} result{plural}')
        return results

    def _condense(self, results):
        api = self.api
        F = api.F
        L = api.L
        sortNodes = api.sortNodes
        tablets = {}
        for ns in results:
            for n in ns:
                if F.otype.v(n) == 'tablet':
                    tablets.setdefault(n, set())
                else:
                    t = L.u(n, otype='tablet')[0]
                    tablets.setdefault(t, set()).add(n)
        return tuple((t, ) + tuple(tablets[t]) for t in sortNodes(tablets))

    def _pretty(
        self,
        n,
        outer,
        html,
        lineart=True,
        withNodes=False,
        lineNumbers=False,
        seen=set(),
        suppress=set(),
        highlights={},
    ):
        api = self.api
        L = api.L
        F = api.F
        E = api.E
        sortNodes = api.sortNodes
        hl = highlights.get(n, None)
        hlClass = ''
        hlStyle = ''
        if hl == '':
            hlClass = ' hl'
        else:
            hlStyle = f' style="background-color: {hl};"'

        nType = F.otype.v(n)
        className = nType
        nodePart = (f'<span class="nd">{n}</span>' if withNodes else '')
        heading = ''
        featurePart = ''
        commentsPart = self._getComments(
            n, withNodes=withNodes, lineNumbers=lineNumbers
        ) if nType in COMMENT_TYPES else ''
        children = ()

        if nType == 'tablet':
            heading = F.catalogId.v(n)
            heading += ' '
            heading += self._getFeatures(
                n,
                suppress,
                ('name', 'period', 'excavation'),
                plain=True,
            )
            children = L.d(n, otype='face')
        elif nType == 'face':
            heading = F.type.v(n)
            featurePart = self._getFeatures(
                n,
                suppress,
                ('identifier', 'fragment'),
            )
            children = L.d(n, otype='column')
        elif nType == 'column':
            heading = F.number.v(n)
            if F.prime.v(n):
                heading += "'"
            children = L.d(n, otype='line')
        elif nType == 'line' or nType == 'case':
            heading = F.number.v(n)
            if F.prime.v(n):
                heading += "'"
            if F.terminal.v(n):
                className = 'trminal'
                theseFeats = ('srcLnNum', ) if lineNumbers else ()
                featurePart = self._getFeatures(
                    n,
                    suppress,
                    theseFeats,
                )
                children = sortNodes(
                    set(L.d(n, otype='cluster'))
                    | set(L.d(n, otype='quad'))
                    | set(L.d(n, otype='sign'))
                )
            else:
                children = E.sub.f(n)
        elif nType == 'comment':
            heading = F.type.v(n)
            featurePart = self._getFeatures(
                n,
                suppress,
                ('text', ),
            )
        elif nType == 'cluster':
            seen.add(n)
            heading = CLUSTER_TYPES.get(F.type.v(n), '')
            children = sortNodes(
                set(L.d(n, otype='cluster'))
                | set(L.d(n, otype='quad'))
                | set(L.d(n, otype='sign'))
            )
        elif nType == 'quad':
            seen.add(n)
            children = E.sub.f(n)
        elif nType == 'sign':
            featurePart = self._getAtf(n)
            seen.add(n)
            if not outer and F.type.v(n) == 'empty':
                return

        if outer:
            typePart = self.tabletLink(
                n, text=f'{nType} {heading}', asString=True
            )
        else:
            typePart = heading

        isCluster = nType == 'cluster'
        extra = 'B' if isCluster else ''
        label = f'''
    <div class="lbl {className}{extra}">
        {typePart}
        {nodePart}
    </div>
''' if typePart or nodePart else ''

        if isCluster:
            if outer:
                html.append(
                    f'<div class="contnr {className}{hlClass}"{hlStyle}>'
                )
            html.append(label)
            if outer:
                html.append(f'<div class="children {className}">')
        else:
            html.append(
                f'''
<div class="contnr {className}{hlClass}"{hlStyle}>
    {label}
    <div class="meta">
        {featurePart}
        {commentsPart}
    </div>
'''
            )
        if lineart:
            isQuad = nType == 'quad'
            isSign = nType == 'sign'
            if isQuad or isSign:
                isOuter = outer or (
                    all(F.otype.v(parent) != 'quad' for parent in E.sub.t(n))
                )
                if isOuter:
                    width = '2em' if isSign else '4em'
                    height = '4em' if isSign else '6em'
                    theLineart = self._getImages(
                        n,
                        kind='lineart',
                        width=width,
                        height=height,
                        asString=True,
                        withCaption=False,
                        warning=False
                    )
                    if theLineart:
                        html.append(f'<div>{theLineart}</div>')
        caseDir = ''
        if not isCluster:
            if children:
                if nType == 'case':
                    depth = F.depth.v(n)
                    caseDir = 'v' if depth & 1 else 'h'
                html.append(
                    f'''
    <div class="children {className}{caseDir}">
'''
                )

        for ch in children:
            if ch not in seen:
                self._pretty(
                    ch,
                    False,
                    html,
                    seen=seen,
                    lineart=lineart,
                    withNodes=withNodes,
                    lineNumbers=lineNumbers,
                    suppress=suppress,
                    highlights=highlights,
                )
                if nType == 'quad':
                    nextChildren = E.op.f(ch)
                    if nextChildren:
                        op = nextChildren[0][1]
                        html.append(f'<div class="op">{op}</div>')
        if isCluster:
            html.append(
                f'''
    <div class="lbl {className}E{hlClass}"{hlStyle}>
        {typePart}
        {nodePart}
    </div>
'''
            )
            if outer:
                html.append('</div></div>')
        else:
            if children:
                html.append('''
    </div>
''')
            html.append('''
</div>
''')

    def _getFeatures(self, n, suppress, features, plain=False):
        api = self.api
        Fs = api.Fs
        featurePart = '' if plain else '<div class="features">'
        for name in features:
            if 'name' not in suppress:
                value = Fs(name).v(n)
                if value is not None:
                    featurePart += (
                        f' <span class="{name}">{Fs(name).v(n)}</span>'
                    )
        if not plain:
            featurePart += '</div>'
        return featurePart

    def _getComments(self, n, withNodes=False, lineNumbers=False):
        api = self.api
        E = api.E
        cns = E.comments.f(n)
        if len(cns):
            html = ['<div class="comments">']
            for c in cns:
                self._pretty(
                    c,
                    False,
                    html,
                    lineart=False,
                    withNodes=withNodes,
                    lineNumbers=lineNumbers,
                )
            html.append('</div>')
            commentsPart = ''.join(html)
        else:
            commentsPart = ''
        return commentsPart

    def _getAtf(self, n):
        atf = self.atfFromSign(n, flags=True)
        featurePart = f' <span class="srcLn">{atf}</span>'
        return featurePart

    def _loadCSS(self):
        display(HTML(CSS))

    def _imageClass(self, n):
        api = self.api
        F = api.F
        if type(n) is str:
            identifier = n
            if n == '':
                identifier = None
                objectType = None
                nType = None
            elif len(n) == 1:
                objectType = 'ideograph'
                nType = 'sign/quad'
            else:
                if n[0] == 'P' and n[1:].isdigit():
                    objectType = 'tablet'
                    nType = 'tablet'
                else:
                    objectType = 'ideograph'
                    nType = 'sign/quad'
        else:
            nType = F.otype.v(n)
            if nType in OUTER_QUAD_TYPES:
                identifier = self.atfFromOuterQuad(n)
                objectType = 'ideograph'
            elif nType == 'tablet':
                identifier = F.catalogId.v(n)
                objectType = 'tablet'
            else:
                identifier = None
                objectType = None
        return (nType, objectType, identifier)

    def _getImages(
        self,
        ns,
        kind=None,
        key=None,
        asLink=False,
        withCaption=None,
        warning=True,
        asString=False,
        **options
    ):
        if type(ns) is int or type(ns) is str:
            ns = [ns]
        result = []
        attStr = ' '.join(
            f'{opt}="{value}"' for (opt, value) in options.items()
            if opt not in SIZING
        )
        cssProps = {}
        for (opt, value) in options.items():
            if opt in SIZING:
                if type(value) is int:
                    force = False
                    realValue = f'{value}px'
                else:
                    if value.startswith('!'):
                        force = True
                        realValue = value[1:]
                    else:
                        force = False
                        realValue = value
                    if realValue.isdecimal():
                        realValue += 'px'
                cssProps[f'max-{opt}'] = realValue
                if force:
                    cssProps[f'min-{opt}'] = realValue
        cssStr = ' '.join(
            f'{opt}: {value};' for (opt, value) in cssProps.items()
        )
        if withCaption is None:
            withCaption = None if asLink else 'bottom'
        for n in ns:
            caption = None
            (nType, objectType, identifier) = self._imageClass(n)
            if objectType:
                imageBase = self._imagery.get(objectType, {}).get(kind, {})
                images = imageBase.get(identifier, None)
                if withCaption:
                    caption = _wrapLink(
                        identifier, objectType, 'main', identifier
                    )
                if images is None:
                    thisImage = (
                        f'<span><b>no {kind}</b> for {objectType}'
                        f' <code>{identifier}</code></span>'
                    ) if warning else ''
                else:
                    image = images.get(key or '', None)
                    if image is None:
                        thisImage = (
                            '<span><b>try</b>'
                            ' key=<code>'
                            f'{"</code> <code>".join(sorted(images.keys()))}'
                            '</code></span>'
                        ) if warning else ''
                    else:
                        if asLink:
                            thisImage = identifier
                        else:
                            theImage = self._useImage(
                                image, kind, key or '', n
                            )
                            thisImage = (
                                f'<img src="{theImage}"'
                                f' style="display: inline;{cssStr}"'
                                f' {attStr} />'
                            )
                thisResult = _wrapLink(
                    thisImage,
                    objectType,
                    kind,
                    identifier,
                    pos=withCaption,
                    caption=caption
                ) if thisImage else None
            else:
                thisResult = (
                    f'<span><b>no {kind}</b> for'
                    f' <code>{nType}</code>s</span>'
                ) if warning else ''
            result.append(thisResult)
        if not warning:
            result = [image for image in result if image]
        if not result:
            return ''
        if asString:
            return ''.join(result)
        resultStr = f'</div><div style="{ITEM_STYLE}">'.join(result)
        html = (
            f'<div style="{FLEX_STYLE}">'
            f'<div style="{ITEM_STYLE}">'
            f'{resultStr}</div></div>'
        ).replace('\n', '')
        display(HTML(html))
        if not warning:
            return True

    def _useImage(self, image, kind, key, node):
        api = self.api
        F = api.F
        (imageDir, imageName) = os.path.split(image)
        (base, ext) = os.path.splitext(imageName)
        localDir = f'{self.cwd}/{LOCAL_DIR}'
        if not os.path.exists(localDir):
            os.makedirs(localDir, exist_ok=True)
        if type(node) is int:
            nType = F.otype.v(node)
            if nType == 'tablet':
                nodeRep = F.catalogId.v(node)
            elif nType in OUTER_QUAD_TYPES:
                nodeRep = self.atfFromOuterQuad(node)
            else:
                nodeRep = str(node)
        else:
            nodeRep = node
        nodeRep = (
            nodeRep.lower().replace('|', 'q').replace('~', '-')
            .replace('@', '(a)').replace('&', '(e)')
            .replace('+', '(p)').replace('.', '(d)')
        )
        keyRep = '' if key == '' else f'-{key}'
        localImageName = f'{kind}-{nodeRep}{keyRep}{ext}'
        localImagePath = f'{localDir}/{localImageName}'
        if (
            not os.path.exists(localImagePath) or
            os.path.getmtime(image) > os.path.getmtime(localImagePath)
        ):
            copyfile(image, localImagePath)
        return f'{LOCAL_DIR}/{localImageName}'

    def _getImagery(self):
        for (dirFmt, ext, kind, objectType) in (
            (IDEO_TO, LINEART_EXT, 'lineart', 'ideograph'),
            (TABLET_TO, LINEART_EXT, 'lineart', 'tablet'),
            (PHOTO_TO, PHOTO_EXT, 'photo', 'tablet'),
        ):
            srcDir = dirFmt.format(self.imageDir)
            filePaths = glob(f'{srcDir}/*.{ext}')
            images = {}
            idPat = re.compile('P[0-9]+')
            for filePath in filePaths:
                (fileDir, fileName) = os.path.split(filePath)
                (base, thisExt) = os.path.splitext(fileName)
                if kind == 'lineart' and objectType == 'tablet':
                    ids = idPat.findall(base)
                    if not ids:
                        print(f'skipped non-{objectType} "{fileName}"')
                        continue
                    identifier = ids[0]
                    key = base.replace('_l', '').replace(identifier, '')
                else:
                    identifier = base
                    key = ''
                images.setdefault(identifier, {})[key] = filePath
            self._imagery.setdefault(objectType, {})[kind] = images
            print(f'Found {len(images)} {objectType} {kind}s')
back to top