Skip to main content
  • Home
  • Development
  • Documentation
  • Donate
  • Operational login
  • Browse the archive

swh logo
SoftwareHeritage
Software
Heritage
Archive
Features
  • Search

  • Downloads

  • Save code now

  • Add forge now

  • Help

  • 9287bc8
  • /
  • glyphs.ipynb
Raw File Download

To reference or cite the objects present in the Software Heritage archive, permalinks based on SoftWare Hash IDentifiers (SWHIDs) must be used.
Select below a type of object currently browsed in order to display its associated SWHID and permalink.

  • content
  • directory
content badge Iframe embedding
swh:1:cnt:baa58ce961e58a2110df420f9feb85919173b850
directory badge Iframe embedding
swh:1:dir:9287bc84340ee2f1ec0e4d777e59e51908046a3d

This interface enables to generate software citations, provided that the root directory of browsed objects contains a citation.cff or codemeta.json file.
Select below a type of object currently browsed in order to generate citations for them.

  • content
  • directory
Generate software citation in BibTex format (requires biblatex-software package)
Generating citation ...
Generate software citation in BibTex format (requires biblatex-software package)
Generating citation ...
glyphs.ipynb
{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 69,
   "metadata": {},
   "outputs": [],
   "source": [
    "import os\n",
    "import collections\n",
    "import pickle"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 79,
   "metadata": {},
   "outputs": [],
   "source": [
    "BASE = os.path.expanduser('~/github')\n",
    "ORG = 'Nino-cunei'\n",
    "REPO = 'oldbabylonian'\n",
    "VERSION = '0.2'\n",
    "\n",
    "REPO_PATH = f'{BASE}/{ORG}/{REPO}'\n",
    "MAP_FILE_T = f'{REPO_PATH}/sources/writing/signs.txt'\n",
    "MAP_FILE_P = f'{REPO_PATH}/sources/writing/signs.p'"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 78,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "columns 0 and 1 are equal\n",
      "columns 0 and 2 are equal\n",
      "Written data to /Users/dirk/github/Nino-cunei/oldbabylonian/sources/writing/signs.txt\n",
      "headers = ['value', 'form', 'character', 'language']\n",
      "Data has 9907 rows\n",
      "Data has languages {'sux'}\n",
      "found 446 duplicate values:\n",
      "\t...Ac ...ingara /cumun/ 1(car)u) 1/3(dic@c) 1/4 2/3(dic@c) 4(dic@v) 4(dic@v@c) KWU127~a and more\n",
      "found 1311 duplicate characters:\n",
      "\tβ–‘β–‘ π’€€ π’€€π’€€ π’€€π’€­ 𒀀𒁺 π’€€π’‚” 𒀀𒂔𒇲 π’€€π’ƒΌ π’€€π’„  π’€€π’„© and more\n",
      "found 75 duplicate value,forms:\n",
      "\t4(dic@v),LIMMU 4(dic@v@c),LIMMU KWU127~a,|ZI&ZI| LAK469~a,|ZI&ZI| MZL101~a,|ZI&ZI| arata,|LAMΓ—(KUR.RU)| at,AD at2,GIRβ‚‚@g bat,BAD bit,Eβ‚‚ and more\n",
      "found 75 duplicate value,form,languages:\n",
      "\t4(dic@v),LIMMU,sux 4(dic@v@c),LIMMU,sux KWU127~a,|ZI&ZI|,sux LAK469~a,|ZI&ZI|,sux MZL101~a,|ZI&ZI|,sux arata,|LAMΓ—(KUR.RU)|,sux at,AD,sux at2,GIRβ‚‚@g,sux bat,BAD,sux bit,Eβ‚‚,sux and more\n",
      "rows with language = \"sux\"\n",
      "\tfound 446 duplicate values:\n",
      "\t\t...Ac ...ingara /cumun/ 1(car)u) 1/3(dic@c) 1/4 2/3(dic@c) 4(dic@v) 4(dic@v@c) KWU127~a and more\n",
      "['LAK797', 'A', 'π’€€', 'sux']\n",
      "['MZL839', 'A', 'π’€€', 'sux']\n",
      "[')u4', 'A', 'π’€€', 'sux']\n",
      "['a', 'A', 'π’€€', 'sux']\n",
      "['aia2', 'A', 'π’€€', 'sux']\n",
      "['aya2', 'A', 'π’€€', 'sux']\n",
      "['barx', 'A', 'π’€€', 'sux']\n",
      "['bunijx', 'A', 'π’€€', 'sux']\n",
      "['burx', 'A', 'π’€€', 'sux']\n",
      "['dur5', 'A', 'π’€€', 'sux']\n",
      "\n",
      "... 9887 rows ...\n",
      "\n",
      "['1/8', 'F₃', 'π’‘Ÿ', 'sux']\n",
      "['1/4', 'Fβ‚„', 'π’‘ ', 'sux']\n",
      "['1/4(iku)', 'Fβ‚„', 'π’‘ ', 'sux']\n",
      "['1/6', 'Fβ‚…', 'π’‘‘', 'sux']\n",
      "['1/4', 'F₆', 'π’‘’', 'sux']\n",
      "['/', 'P₁', 'π’‘°', 'sux']\n",
      "[':', 'Pβ‚‚', 'π’‘±', 'sux']\n",
      "[':\"', 'P₃', 'π’‘²', 'sux']\n",
      "[':.', 'Pβ‚„', 'π’‘³', 'sux']\n",
      "['::', 'Pβ‚…', '\\ue100', 'sux']\n"
     ]
    }
   ],
   "source": [
    "def checkSignMapData(path):\n",
    "  with open(MAP_FILE_P, 'rb') as fh: \n",
    "    pData = pickle.load(fh)\n",
    "    \n",
    "  def compare(col1, col2):\n",
    "    p1Data = pData[col1]\n",
    "    p2Data = pData[col2]\n",
    "    diff = False\n",
    "    for i in range(len(p1Data)):\n",
    "      if p1Data[i] != p2Data[i]:\n",
    "        print(f'row {i} is different')\n",
    "        print(p1Data[i])\n",
    "        print(p2Data[i])\n",
    "        diff = True\n",
    "        break\n",
    "    if not diff:\n",
    "      print(f'columns {col1} and {col2} are equal')\n",
    "    return not diff\n",
    "  \n",
    "  good = True\n",
    "  for (c1, c2) in ((0, c) for c in range(1, len(pData))):\n",
    "    if not compare(c1, c2):\n",
    "      good = False\n",
    "\n",
    "  if not good or not len(pData):\n",
    "    print('No data delivered')\n",
    "    return None\n",
    "  \n",
    "  data = pData[0]\n",
    "  lData = len(data)\n",
    "  \n",
    "  if not lData:\n",
    "    print('Data is empty')\n",
    "    return None\n",
    "  \n",
    "  with open(MAP_FILE_T, 'w') as tfh:\n",
    "    for row in data:\n",
    "      rowStr = '\\t'.join(row)\n",
    "      tfh.write(f'{rowStr}\\n')\n",
    "  print(f'Written data to {MAP_FILE_T}')\n",
    "                \n",
    "  headers = data.pop(0)\n",
    "  lData -= 1\n",
    "  print(f'headers = {headers}')\n",
    "  \n",
    "  if not lData:\n",
    "    print('No rows')\n",
    "    return None\n",
    "  \n",
    "  batch = 10\n",
    "  \n",
    "  langs = {row[3] for row in data}\n",
    "  print(f'Data has {lData} rows')\n",
    "  print(f'Data has languages {langs}')\n",
    "  \n",
    "  # check whether forms are unique\n",
    "  \n",
    "  def checkUnique(cols, per=None):\n",
    "    if type(cols) is int:\n",
    "      cols = (cols,)\n",
    "    colNames = ','.join(headers[col] for col in cols)\n",
    "    values = set()\n",
    "    duplicates = set()\n",
    "    \n",
    "    if per is None:\n",
    "      indent = ''\n",
    "      chunks = {None: data}\n",
    "    else:\n",
    "      indent = '\\t'\n",
    "      chunks = collections.defaultdict(list)\n",
    "      for row in data:\n",
    "        chunks[row[per]].append(row)\n",
    "        \n",
    "    for (perVal, rows) in sorted(chunks.items()):\n",
    "      if perVal is not None:\n",
    "        print(f'rows with {headers[per]} = \"{perVal}\"')\n",
    "      for row in rows:\n",
    "        value = ','.join(row[col] for col in cols)\n",
    "        dest = duplicates if value in values else values\n",
    "        dest.add(value)\n",
    "\n",
    "      if duplicates:\n",
    "        lDups = len(duplicates)\n",
    "        print(f'{indent}found {lDups} duplicate {colNames}s:')\n",
    "        rest = '' if lDups <= batch else ' and more'\n",
    "        dupStr = ' '.join(sorted(duplicates)[0:batch])\n",
    "        print(f'{indent}\\t{dupStr}{rest}')\n",
    "      else:\n",
    "        print(f'{indent}no duplicate {colNames}s')\n",
    "            \n",
    "  checkUnique(0)\n",
    "  checkUnique(2)\n",
    "  checkUnique((0, 1))\n",
    "  checkUnique((0, 1, 3))\n",
    "  checkUnique(0, per=3)\n",
    "  \n",
    "  for row in data[0:batch]:\n",
    "    print(row)\n",
    "  print(f'\\n... {lData - 2 * batch} rows ...\\n')\n",
    "  for row in data[-batch:]:\n",
    "    print(row)\n",
    "    \n",
    "  return data\n",
    "  \n",
    "data = checkSignMapData(MAP_FILE)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 42,
   "metadata": {},
   "outputs": [],
   "source": [
    "def makeMap(data):\n",
    "  pass\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 16,
   "metadata": {},
   "outputs": [],
   "source": [
    "import collections\n",
    "from unicodedata import name as uname"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 17,
   "metadata": {},
   "outputs": [],
   "source": [
    "cuneiBlocks = {\n",
    "  'Cuneiform': ('12000', '123FF'),\n",
    "  'Cuneiform Numbers and Punctuation': ('12400', '1247F'),\n",
    "  'Early Dynastic Cuneiform': ('12480', '1254F'),\n",
    "}"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "cNumber = dict(\n",
    "  one=1,\n",
    "  two=2,\n",
    "  three=3,\n",
    "  four=4,\n",
    "  five=5,\n",
    "  six=6,\n",
    "  seven=7,\n",
    "  eight=8,\n",
    "  nine=9,\n",
    ")\n",
    "\n",
    "numericGlyphs = set('''\n",
    "  ash\n",
    "  ash9\n",
    "  ban2\n",
    "  buru\n",
    "  dish\n",
    "  eshe3\n",
    "  esh16\n",
    "  esh21\n",
    "  gesh2\n",
    "  geshu\n",
    "  ilimmu\n",
    "  ilimmu3\n",
    "  ilimmu4\n",
    "  imin\n",
    "  imin3\n",
    "  limmu\n",
    "  limmu4\n",
    "  shar2\n",
    "  sharu\n",
    "  u\n",
    "  ussu\n",
    "  ussu3\n",
    "'''.strip().split())\n",
    "\n",
    "fractions = dict(\n",
    "  half=2,\n",
    "  third=3,\n",
    "  thirds=3,\n",
    "  quarter=4,\n",
    "  sixths=6,\n",
    "  eighth=8,\n",
    ")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 63,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "1360 positions; 1234 cuneiform characters\n",
      "126 skipped positions\n",
      "number: 65\n",
      "12400 = 𒐀 = ash <= CUNEIFORM NUMERIC SIGN TWO ASH\n",
      "12401 = 𒐁 = ash <= CUNEIFORM NUMERIC SIGN THREE ASH\n",
      "12402 = 𒐂 = ash <= CUNEIFORM NUMERIC SIGN FOUR ASH\n",
      "12403 = 𒐃 = ash <= CUNEIFORM NUMERIC SIGN FIVE ASH\n",
      "12404 = 𒐄 = ash <= CUNEIFORM NUMERIC SIGN SIX ASH\n",
      "12405 = 𒐅 = ash <= CUNEIFORM NUMERIC SIGN SEVEN ASH\n",
      "12406 = 𒐆 = ash <= CUNEIFORM NUMERIC SIGN EIGHT ASH\n",
      "12407 = 𒐇 = ash <= CUNEIFORM NUMERIC SIGN NINE ASH\n",
      "12408 = π’ˆ = dish <= CUNEIFORM NUMERIC SIGN THREE DISH\n",
      "12409 = 𒐉 = dish <= CUNEIFORM NUMERIC SIGN FOUR DISH\n",
      "1240a = π’Š = dish <= CUNEIFORM NUMERIC SIGN FIVE DISH\n",
      "1240b = 𒐋 = dish <= CUNEIFORM NUMERIC SIGN SIX DISH\n",
      "1240c = π’Œ = dish <= CUNEIFORM NUMERIC SIGN SEVEN DISH\n",
      "1240d = 𒐍 = dish <= CUNEIFORM NUMERIC SIGN EIGHT DISH\n",
      "1240e = π’Ž = dish <= CUNEIFORM NUMERIC SIGN NINE DISH\n",
      "1240f = 𒐏 = u <= CUNEIFORM NUMERIC SIGN FOUR U\n",
      "12410 = 𒐐 = u <= CUNEIFORM NUMERIC SIGN FIVE U\n",
      "12411 = 𒐑 = u <= CUNEIFORM NUMERIC SIGN SIX U\n",
      "12412 = 𒐒 = u <= CUNEIFORM NUMERIC SIGN SEVEN U\n",
      "12413 = 𒐓 = u <= CUNEIFORM NUMERIC SIGN EIGHT U\n",
      "numberSpecial: 10\n",
      "12432 = 𒐲 = shar2 times gal plus dish <= CUNEIFORM NUMERIC SIGN SHAR2 TIMES GAL PLUS DISH\n",
      "12433 = 𒐳 = shar2 times gal plus min <= CUNEIFORM NUMERIC SIGN SHAR2 TIMES GAL PLUS MIN\n",
      "12456 = π’‘– = nigidamin <= CUNEIFORM NUMERIC SIGN NIGIDAMIN\n",
      "12457 = π’‘— = nigidaesh <= CUNEIFORM NUMERIC SIGN NIGIDAESH\n",
      "12461 = π’‘‘ = old assyrian one sixth <= CUNEIFORM NUMERIC SIGN OLD ASSYRIAN ONE SIXTH\n",
      "12462 = π’‘’ = old assyrian one quarter <= CUNEIFORM NUMERIC SIGN OLD ASSYRIAN ONE QUARTER\n",
      "12465 = π’‘₯ = elamite one third <= CUNEIFORM NUMERIC SIGN ELAMITE ONE THIRD\n",
      "12466 = 𒑦 = elamite two thirds <= CUNEIFORM NUMERIC SIGN ELAMITE TWO THIRDS\n",
      "12467 = π’‘§ = elamite forty <= CUNEIFORM NUMERIC SIGN ELAMITE FORTY\n",
      "12468 = 𒑨 = elamite fifty <= CUNEIFORM NUMERIC SIGN ELAMITE FIFTY\n",
      "numberVar: 29\n",
      "12425 = π’₯ = shar2 ~ <= CUNEIFORM NUMERIC SIGN THREE SHAR2 VARIANT FORM\n",
      "1242f = 𒐯 = sharu ~ <= CUNEIFORM NUMERIC SIGN THREE SHARU VARIANT FORM\n",
      "12437 = 𒐷 = buru ~ <= CUNEIFORM NUMERIC SIGN THREE BURU VARIANT FORM\n",
      "1243a = 𒐺 = esh16 ~ <= CUNEIFORM NUMERIC SIGN THREE VARIANT FORM ESH16\n",
      "1243b = 𒐻 = esh21 ~ <= CUNEIFORM NUMERIC SIGN THREE VARIANT FORM ESH21\n",
      "1243c = 𒐼 = limmu ~ <= CUNEIFORM NUMERIC SIGN FOUR VARIANT FORM LIMMU\n",
      "1243d = 𒐽 = limmu4 ~ <= CUNEIFORM NUMERIC SIGN FOUR VARIANT FORM LIMMU4\n",
      "1243e = 𒐾 = limmu ~a <= CUNEIFORM NUMERIC SIGN FOUR VARIANT FORM LIMMU A\n",
      "1243f = 𒐿 = limmu ~b <= CUNEIFORM NUMERIC SIGN FOUR VARIANT FORM LIMMU B\n",
      "12440 = π’‘€ = ash9 ~ <= CUNEIFORM NUMERIC SIGN SIX VARIANT FORM ASH9\n",
      "12441 = 𒑁 = imin3 ~ <= CUNEIFORM NUMERIC SIGN SEVEN VARIANT FORM IMIN3\n",
      "12442 = π’‘‚ = imin ~a <= CUNEIFORM NUMERIC SIGN SEVEN VARIANT FORM IMIN A\n",
      "12443 = 𒑃 = imin ~b <= CUNEIFORM NUMERIC SIGN SEVEN VARIANT FORM IMIN B\n",
      "12444 = π’‘„ = ussu ~ <= CUNEIFORM NUMERIC SIGN EIGHT VARIANT FORM USSU\n",
      "12445 = π’‘… = ussu3 ~ <= CUNEIFORM NUMERIC SIGN EIGHT VARIANT FORM USSU3\n",
      "12446 = 𒑆 = ilimmu ~ <= CUNEIFORM NUMERIC SIGN NINE VARIANT FORM ILIMMU\n",
      "12447 = 𒑇 = ilimmu3 ~ <= CUNEIFORM NUMERIC SIGN NINE VARIANT FORM ILIMMU3\n",
      "12448 = π’‘ˆ = ilimmu4 ~ <= CUNEIFORM NUMERIC SIGN NINE VARIANT FORM ILIMMU4\n",
      "12449 = 𒑉 = ilimmu ~a <= CUNEIFORM NUMERIC SIGN NINE VARIANT FORM ILIMMU A\n",
      "12453 = π’‘“ = ban2 ~ <= CUNEIFORM NUMERIC SIGN FOUR BAN2 VARIANT FORM\n",
      "odd: 7\n",
      "1245a = π’‘š = third dish <= CUNEIFORM NUMERIC SIGN ONE THIRD DISH\n",
      "1245b = π’‘› = thirds dish <= CUNEIFORM NUMERIC SIGN TWO THIRDS DISH\n",
      "1245c = π’‘œ = sixths dish <= CUNEIFORM NUMERIC SIGN FIVE SIXTHS DISH\n",
      "1245f = π’‘Ÿ = eighth ash <= CUNEIFORM NUMERIC SIGN ONE EIGHTH ASH\n",
      "12460 = π’‘  = quarter ash <= CUNEIFORM NUMERIC SIGN ONE QUARTER ASH\n",
      "12463 = π’‘£ = quarter gur <= CUNEIFORM NUMERIC SIGN ONE QUARTER GUR\n",
      "12464 = π’‘€ = half gur <= CUNEIFORM NUMERIC SIGN ONE HALF GUR\n",
      "punct: 5\n",
      "12470 = π’‘° = old assyrian word divider <= CUNEIFORM PUNCTUATION SIGN OLD ASSYRIAN WORD DIVIDER\n",
      "12471 = π’‘± = vertical colon <= CUNEIFORM PUNCTUATION SIGN VERTICAL COLON\n",
      "12472 = π’‘² = diagonal colon <= CUNEIFORM PUNCTUATION SIGN DIAGONAL COLON\n",
      "12473 = π’‘³ = diagonal tricolon <= CUNEIFORM PUNCTUATION SIGN DIAGONAL TRICOLON\n",
      "12474 = π’‘΄ = diagonal quadcolon <= CUNEIFORM PUNCTUATION SIGN DIAGONAL QUADCOLON\n",
      "sign: 1118\n",
      "12000 = π’€€ = a <= CUNEIFORM SIGN A\n",
      "12001 = 𒀁 = a times a <= CUNEIFORM SIGN A TIMES A\n",
      "12002 = 𒀂 = a times bad <= CUNEIFORM SIGN A TIMES BAD\n",
      "12003 = 𒀃 = a times gan2 tenu <= CUNEIFORM SIGN A TIMES GAN2 TENU\n",
      "12004 = 𒀄 = a times ha <= CUNEIFORM SIGN A TIMES HA\n",
      "12005 = π’€… = a times igi <= CUNEIFORM SIGN A TIMES IGI\n",
      "12006 = 𒀆 = a times lagar gunu <= CUNEIFORM SIGN A TIMES LAGAR GUNU\n",
      "12007 = 𒀇 = a times mush <= CUNEIFORM SIGN A TIMES MUSH\n",
      "12008 = π’€ˆ = a times sag <= CUNEIFORM SIGN A TIMES SAG\n",
      "12009 = 𒀉 = a2 <= CUNEIFORM SIGN A2\n",
      "1200a = π’€Š = ab <= CUNEIFORM SIGN AB\n",
      "1200b = 𒀋 = ab times ash2 <= CUNEIFORM SIGN AB TIMES ASH2\n",
      "1200c = π’€Œ = ab times dun3 gunu <= CUNEIFORM SIGN AB TIMES DUN3 GUNU\n",
      "1200d = 𒀍 = ab times gal <= CUNEIFORM SIGN AB TIMES GAL\n",
      "1200e = π’€Ž = ab times gan2 tenu <= CUNEIFORM SIGN AB TIMES GAN2 TENU\n",
      "1200f = 𒀏 = ab times ha <= CUNEIFORM SIGN AB TIMES HA\n",
      "12010 = 𒀐 = ab times igi gunu <= CUNEIFORM SIGN AB TIMES IGI GUNU\n",
      "12011 = 𒀑 = ab times imin <= CUNEIFORM SIGN AB TIMES IMIN\n",
      "12012 = π’€’ = ab times lagab <= CUNEIFORM SIGN AB TIMES LAGAB\n",
      "12013 = 𒀓 = ab times shesh <= CUNEIFORM SIGN AB TIMES SHESH\n"
     ]
    }
   ],
   "source": [
    "pos = 0\n",
    "nChars = 0\n",
    "\n",
    "noUni = []\n",
    "\n",
    "glyphs = collections.defaultdict(list)\n",
    "\n",
    "for (cuneiBlock, (start, end)) in cuneiBlocks.items():\n",
    "  for u in range(int(start, 16), int(end, 16) + 1):\n",
    "    pos += 1\n",
    "    c = chr(u)\n",
    "    name = uname(c, None)\n",
    "    if name is None:\n",
    "      noUni.append(u)\n",
    "      continue\n",
    "    nChars += 1\n",
    "    if not name.startswith('CUNEIFORM '):\n",
    "      glyphs['no'].append(u)\n",
    "    parts = [p.lower() for p in name.split()][1:]\n",
    "    kind = 'other'\n",
    "    if parts[0] == 'sign':\n",
    "      kind = 'sign'\n",
    "      parts = parts[1:]\n",
    "    elif parts[0] == 'numeric':\n",
    "      kind = 'number'\n",
    "      parts = parts[1:]\n",
    "      if parts[0] != 'sign':\n",
    "        kind = 'odd'\n",
    "      else:\n",
    "        parts = parts[1:]\n",
    "        num = cNumber.get(parts[0], None)\n",
    "        variant = ''\n",
    "        if num is None:\n",
    "          kind = 'numberSpecial'\n",
    "        else:\n",
    "          parts[0] = str(num)\n",
    "          parts = parts[1:]\n",
    "          if 'variant' in parts and 'form' in parts:\n",
    "            variant = '~'\n",
    "            kind = 'numberVar'\n",
    "            parts.remove('variant')\n",
    "            parts.remove('form')\n",
    "          if len(parts) == 0 or parts[0] not in numericGlyphs or len(parts) > 1:\n",
    "            if len(parts) == 2:\n",
    "              if parts[1] in {'a', 'b'}:\n",
    "                variant = f'~{parts[1]}'\n",
    "                kind = 'numberVar'\n",
    "                parts = parts[0:-1]\n",
    "              elif parts[1] == 'tenu':\n",
    "                pass\n",
    "              else:\n",
    "                kind = 'odd'\n",
    "            else:\n",
    "              kind = 'odd'\n",
    "          if variant:\n",
    "            parts.append(variant)\n",
    "    elif parts[0] == 'punctuation':\n",
    "      kind = 'punct'\n",
    "      parts = parts[1:]\n",
    "      if parts[0] != 'sign':\n",
    "        kind = 'odd'\n",
    "      else:\n",
    "        parts = parts[1:]\n",
    "    \n",
    "    glyphs[kind].append((u, ' '.join(parts)))\n",
    "      \n",
    "print(f'{pos} positions; {nChars} cuneiform characters')\n",
    "if pos - nChars:\n",
    "  print(f'{pos - nChars} skipped positions')\n",
    "  \n",
    "for (kind, unis) in sorted(glyphs.items()):\n",
    "  print(f'{kind}: {len(unis)}')\n",
    "  for (u, shortName) in unis[0:20]:\n",
    "    c = chr(u)\n",
    "    print(f'{u:>03x} = {c} = {shortName} <= {uname(c)}')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 45,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "'LATIN SMALL LETTER A'"
      ]
     },
     "execution_count": 45,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "uname('a')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.7.2"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}

back to top

Software Heritage β€” Copyright (C) 2015–2025, The Software Heritage developers. License: GNU AGPLv3+.
The source code of Software Heritage itself is available on our development forge.
The source code files archived by Software Heritage are available under their own copyright and licenses.
Terms of use: Archive access, APIβ€” Content policyβ€” Contactβ€” JavaScript license informationβ€” Web API