Revision - eb69908 - bpo-34454: Clean up datetime.fromisoformat surrogate handling [...] - origin: https://github.com/python/cpython

visit type:

https://github.com/python/cpython

18 August 2024, 09:59:52 UTC

Revision eb69908b5cfdf568e01356f96d01cd146979cfe7 authored by Paul Ganssle on 22 October 2018, 16:32:52 UTC, committed by Miss Islington (bot) on 22 October 2018, 19:37:55 UTC

bpo-34454: Clean up datetime.fromisoformat surrogate handling (GH-8959)

* Use _PyUnicode_Copy in sanitize_isoformat_str

* Use repr in fromisoformat error message

This reverses commit 67b74a98b2 per Serhiy Storchaka's suggestion:

     I suggested to use %R in the error message because including the raw
     string can be confusing in the case of empty string, or string
     containing trailing whitespaces, invisible or unprintable characters.

We agree that it is better to change both the C and pure Python versions
to use repr.

* Retain non-sanitized dtstr for error printing

This does not create an extra string, it just holds on to a reference to
the original input string for purposes of creating the error message.

* PEP 7 fixes to from_isoformat

* Separate handling of Unicode and other errors

In the initial implementation, errors other than encoding errors would
both raise an error indicating an invalid format, which would not be
true for errors like MemoryError.

* Drop needs_decref from _sanitize_isoformat_str

Instead _sanitize_isoformat_str returns a new reference, even to the
original string.
(cherry picked from commit 3df85404d4bf420db3362eeae1345f2cad948a71)

Co-authored-by: Paul Ganssle <pganssle@users.noreply.github.com>

1 parent 7f34d55

Files
Changes

Permalinks

Tip revision: eb69908b5cfdf568e01356f96d01cd146979cfe7 authored by Paul Ganssle on 22 October 2018, 16:32:52 UTC
bpo-34454: Clean up datetime.fromisoformat surrogate handling (GH-8959)

Tip revision: eb69908

highlight.py

#!/usr/bin/env python3
'''Add syntax highlighting to Python source code'''

__author__ = 'Raymond Hettinger'

import builtins
import functools
import html as html_module
import keyword
import re
import tokenize

#### Analyze Python Source #################################

def is_builtin(s):
    'Return True if s is the name of a builtin'
    return hasattr(builtins, s)

def combine_range(lines, start, end):
    'Join content from a range of lines between start and end'
    (srow, scol), (erow, ecol) = start, end
    if srow == erow:
        return lines[srow-1][scol:ecol], end
    rows = [lines[srow-1][scol:]] + lines[srow: erow-1] + [lines[erow-1][:ecol]]
    return ''.join(rows), end

def analyze_python(source):
    '''Generate and classify chunks of Python for syntax highlighting.
       Yields tuples in the form: (category, categorized_text).
    '''
    lines = source.splitlines(True)
    lines.append('')
    readline = functools.partial(next, iter(lines), '')
    kind = tok_str = ''
    tok_type = tokenize.COMMENT
    written = (1, 0)
    for tok in tokenize.generate_tokens(readline):
        prev_tok_type, prev_tok_str = tok_type, tok_str
        tok_type, tok_str, (srow, scol), (erow, ecol), logical_lineno = tok
        kind = ''
        if tok_type == tokenize.COMMENT:
            kind = 'comment'
        elif tok_type == tokenize.OP and tok_str[:1] not in '{}[](),.:;@':
            kind = 'operator'
        elif tok_type == tokenize.STRING:
            kind = 'string'
            if prev_tok_type == tokenize.INDENT or scol==0:
                kind = 'docstring'
        elif tok_type == tokenize.NAME:
            if tok_str in ('def', 'class', 'import', 'from'):
                kind = 'definition'
            elif prev_tok_str in ('def', 'class'):
                kind = 'defname'
            elif keyword.iskeyword(tok_str):
                kind = 'keyword'
            elif is_builtin(tok_str) and prev_tok_str != '.':
                kind = 'builtin'
        if kind:
            text, written = combine_range(lines, written, (srow, scol))
            yield '', text
            text, written = tok_str, (erow, ecol)
            yield kind, text
    line_upto_token, written = combine_range(lines, written, (erow, ecol))
    yield '', line_upto_token

#### Raw Output  ###########################################

def raw_highlight(classified_text):
    'Straight text display of text classifications'
    result = []
    for kind, text in classified_text:
        result.append('%15s:  %r\n' % (kind or 'plain', text))
    return ''.join(result)

#### ANSI Output ###########################################

default_ansi = {
    'comment': ('\033[0;31m', '\033[0m'),
    'string': ('\033[0;32m', '\033[0m'),
    'docstring': ('\033[0;32m', '\033[0m'),
    'keyword': ('\033[0;33m', '\033[0m'),
    'builtin': ('\033[0;35m', '\033[0m'),
    'definition': ('\033[0;33m', '\033[0m'),
    'defname': ('\033[0;34m', '\033[0m'),
    'operator': ('\033[0;33m', '\033[0m'),
}

def ansi_highlight(classified_text, colors=default_ansi):
    'Add syntax highlighting to source code using ANSI escape sequences'
    # http://en.wikipedia.org/wiki/ANSI_escape_code
    result = []
    for kind, text in classified_text:
        opener, closer = colors.get(kind, ('', ''))
        result += [opener, text, closer]
    return ''.join(result)

#### HTML Output ###########################################

def html_highlight(classified_text,opener='<pre class="python">\n', closer='</pre>\n'):
    'Convert classified text to an HTML fragment'
    result = [opener]
    for kind, text in classified_text:
        if kind:
            result.append('<span class="%s">' % kind)
        result.append(html_module.escape(text))
        if kind:
            result.append('</span>')
    result.append(closer)
    return ''.join(result)

default_css = {
    '.comment': '{color: crimson;}',
    '.string':  '{color: forestgreen;}',
    '.docstring': '{color: forestgreen; font-style:italic;}',
    '.keyword': '{color: darkorange;}',
    '.builtin': '{color: purple;}',
    '.definition': '{color: darkorange; font-weight:bold;}',
    '.defname': '{color: blue;}',
    '.operator': '{color: brown;}',
}

default_html = '''\
<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01//EN"
          "http://www.w3.org/TR/html4/strict.dtd">
<html>
<head>
<meta http-equiv="Content-type" content="text/html;charset=UTF-8">
<title> {title} </title>
<style type="text/css">
{css}
</style>
</head>
<body>
{body}
</body>
</html>
'''

def build_html_page(classified_text, title='python',
                    css=default_css, html=default_html):
    'Create a complete HTML page with colorized source code'
    css_str = '\n'.join(['%s %s' % item for item in css.items()])
    result = html_highlight(classified_text)
    title = html_module.escape(title)
    return html.format(title=title, css=css_str, body=result)

#### LaTeX Output ##########################################

default_latex_commands = {
    'comment': r'{\color{red}#1}',
    'string': r'{\color{ForestGreen}#1}',
    'docstring': r'{\emph{\color{ForestGreen}#1}}',
    'keyword': r'{\color{orange}#1}',
    'builtin': r'{\color{purple}#1}',
    'definition': r'{\color{orange}#1}',
    'defname': r'{\color{blue}#1}',
    'operator': r'{\color{brown}#1}',
}

default_latex_document = r'''
\documentclass{article}
\usepackage{alltt}
\usepackage{upquote}
\usepackage{color}
\usepackage[usenames,dvipsnames]{xcolor}
\usepackage[cm]{fullpage}
%(macros)s
\begin{document}
\center{\LARGE{%(title)s}}
\begin{alltt}
%(body)s
\end{alltt}
\end{document}
'''

def alltt_escape(s):
    'Replace backslash and braces with their escaped equivalents'
    xlat = {'{': r'\{', '}': r'\}', '\\': r'\textbackslash{}'}
    return re.sub(r'[\\{}]', lambda mo: xlat[mo.group()], s)

def latex_highlight(classified_text, title = 'python',
                    commands = default_latex_commands,
                    document = default_latex_document):
    'Create a complete LaTeX document with colorized source code'
    macros = '\n'.join(r'\newcommand{\py%s}[1]{%s}' % c for c in commands.items())
    result = []
    for kind, text in classified_text:
        if kind:
            result.append(r'\py%s{' % kind)
        result.append(alltt_escape(text))
        if kind:
            result.append('}')
    return default_latex_document % dict(title=title, macros=macros, body=''.join(result))


if __name__ == '__main__':
    import argparse
    import os.path
    import sys
    import textwrap
    import webbrowser

    parser = argparse.ArgumentParser(
            description = 'Add syntax highlighting to Python source code',
            formatter_class=argparse.RawDescriptionHelpFormatter,
            epilog = textwrap.dedent('''
                examples:

                  # Show syntax highlighted code in the terminal window
                  $ ./highlight.py myfile.py

                  # Colorize myfile.py and display in a browser
                  $ ./highlight.py -b myfile.py

                  # Create an HTML section to embed in an existing webpage
                  ./highlight.py -s myfile.py

                  # Create a complete HTML file
                  $ ./highlight.py -c myfile.py > myfile.html

                  # Create a PDF using LaTeX
                  $ ./highlight.py -l myfile.py | pdflatex

            '''))
    parser.add_argument('sourcefile', metavar = 'SOURCEFILE',
            help = 'file containing Python sourcecode')
    parser.add_argument('-b', '--browser', action = 'store_true',
            help = 'launch a browser to show results')
    parser.add_argument('-c', '--complete', action = 'store_true',
            help = 'build a complete html webpage')
    parser.add_argument('-l', '--latex', action = 'store_true',
            help = 'build a LaTeX document')
    parser.add_argument('-r', '--raw', action = 'store_true',
            help = 'raw parse of categorized text')
    parser.add_argument('-s', '--section', action = 'store_true',
            help = 'show an HTML section rather than a complete webpage')
    args = parser.parse_args()

    if args.section and (args.browser or args.complete):
        parser.error('The -s/--section option is incompatible with '
                     'the -b/--browser or -c/--complete options')

    sourcefile = args.sourcefile
    with open(sourcefile) as f:
        source = f.read()
    classified_text = analyze_python(source)

    if args.raw:
        encoded = raw_highlight(classified_text)
    elif args.complete or args.browser:
        encoded = build_html_page(classified_text, title=sourcefile)
    elif args.section:
        encoded = html_highlight(classified_text)
    elif args.latex:
        encoded = latex_highlight(classified_text, title=sourcefile)
    else:
        encoded = ansi_highlight(classified_text)

    if args.browser:
        htmlfile = os.path.splitext(os.path.basename(sourcefile))[0] + '.html'
        with open(htmlfile, 'w') as f:
            f.write(encoded)
        webbrowser.open('file://' + os.path.abspath(htmlfile))
    else:
        sys.stdout.write(encoded)

Showing with 0 additions and 0 deletions (0 / 0 diffs computed)

Computing file changes ...