
__author__ = 'Ka-Ping Yee <ping@lfw.org>'
__credits__ = 'GvR, ESR, Tim Peters, Thomas Wouters, Fred Drake, Skip Montanaro'
import re
from codecs import BOM_UTF8, lookup
from blib2to3.pgen2.token import *
import sys
from . import token
__all__ = ([x for x in dir(token) if (x[0] != '_')] + ['tokenize', 'generate_tokens', 'untokenize'])
del token
try:
    bytes
except NameError:
    bytes = str

def group(*choices):
    return (('(' + '|'.join(choices)) + ')')

def any(*choices):
    return (group(*choices) + '*')

def maybe(*choices):
    return (group(*choices) + '?')

def _combinations(*l):
    return set(((x + y) for x in l for y in (l + ('',)) if (x.lower() != y.lower())))
Whitespace = '[ \\f\\t]*'
Comment = '#[^\\r\\n]*'
Ignore = ((Whitespace + any(('\\\\\\r?\\n' + Whitespace))) + maybe(Comment))
Name = '\\w+'
DollarName = '\\$\\w+'
Binnumber = '0[bB]_?[01]+(?:_[01]+)*'
Hexnumber = '0[xX]_?[\\da-fA-F]+(?:_[\\da-fA-F]+)*[lL]?'
Octnumber = '0[oO]?_?[0-7]+(?:_[0-7]+)*[lL]?'
Decnumber = group('[1-9]\\d*(?:_\\d+)*[lL]?', '0[lL]?')
Intnumber = group(Binnumber, Hexnumber, Octnumber, Decnumber)
Exponent = '[eE][-+]?\\d+(?:_\\d+)*'
Pointfloat = (group('\\d+(?:_\\d+)*\\.(?:\\d+(?:_\\d+)*)?', '\\.\\d+(?:_\\d+)*') + maybe(Exponent))
Expfloat = ('\\d+(?:_\\d+)*' + Exponent)
Floatnumber = group(Pointfloat, Expfloat)
Imagnumber = group('\\d+(?:_\\d+)*[jJ]', (Floatnumber + '[jJ]'))
Number = group(Imagnumber, Floatnumber, Intnumber)
Single = "[^'\\\\]*(?:\\\\.[^'\\\\]*)*'"
Double = '[^"\\\\]*(?:\\\\.[^"\\\\]*)*"'
Single3 = "[^'\\\\]*(?:(?:\\\\.|'(?!''))[^'\\\\]*)*'''"
Double3 = '[^"\\\\]*(?:(?:\\\\.|"(?!""))[^"\\\\]*)*"""'
_litprefix = '(?:[uUrRbBfF]|[rR][fFbB]|[fFbBuU][rR])?'
Triple = group((_litprefix + "'''"), (_litprefix + '"""'))
String = group((_litprefix + "'[^\\n'\\\\]*(?:\\\\.[^\\n'\\\\]*)*'"), (_litprefix + '"[^\\n"\\\\]*(?:\\\\.[^\\n"\\\\]*)*"'))
Operator = group('\\*\\*=?', '>>=?', '<<=?', '<>', '!=', '//=?', '->', '[+\\-*/%&@|^=<>]=?', '~')
Bracket = '[][(){}]'
Special = group('\\r?\\n', '[:;.,`@]')
Funny = group(Operator, Bracket, Special)
PlainToken = group(Number, Funny, String, Name, DollarName)
Token = (Ignore + PlainToken)
ContStr = group(((_litprefix + "'[^\\n'\\\\]*(?:\\\\.[^\\n'\\\\]*)*") + group("'", '\\\\\\r?\\n')), ((_litprefix + '"[^\\n"\\\\]*(?:\\\\.[^\\n"\\\\]*)*') + group('"', '\\\\\\r?\\n')))
PseudoExtras = group('\\\\\\r?\\n', Comment, Triple)
PseudoToken = (Whitespace + group(PseudoExtras, Number, Funny, ContStr, Name, DollarName))
tokenprog = re.compile(Token, re.UNICODE)
pseudoprog = re.compile(PseudoToken, re.UNICODE)
single3prog = re.compile(Single3)
double3prog = re.compile(Double3)
_strprefixes = ((_combinations('r', 'R', 'f', 'F') | _combinations('r', 'R', 'b', 'B')) | {'u', 'U', 'ur', 'uR', 'Ur', 'UR'})
endprogs = {"'": re.compile(Single), '"': re.compile(Double), "'''": single3prog, '"""': double3prog}
endprogs.update({(prefix + "'''"): single3prog for prefix in _strprefixes})
endprogs.update({(prefix + '"""'): double3prog for prefix in _strprefixes})
endprogs.update({prefix: None for prefix in _strprefixes})
triple_quoted = (({"'''", '"""'} | {(prefix + "'''") for prefix in _strprefixes}) | {(prefix + '"""') for prefix in _strprefixes})
single_quoted = (({"'", '"'} | {(prefix + "'") for prefix in _strprefixes}) | {(prefix + '"') for prefix in _strprefixes})
tabsize = 8

class TokenError(Exception):
    pass

class StopTokenizing(Exception):
    pass

def printtoken(type, token, xxx_todo_changeme, xxx_todo_changeme1, line):
    (srow, scol) = xxx_todo_changeme
    (erow, ecol) = xxx_todo_changeme1
    print(('%d,%d-%d,%d:\t%s\t%s' % (srow, scol, erow, ecol, tok_name[type], repr(token))))

def tokenize(readline, tokeneater=printtoken):
    try:
        tokenize_loop(readline, tokeneater)
    except StopTokenizing:
        pass

def tokenize_loop(readline, tokeneater):
    for token_info in generate_tokens(readline):
        tokeneater(*token_info)
if (sys.version_info > (3,)):
    isidentifier = str.isidentifier
else:
    IDENTIFIER_RE = re.compile('^[^\\d\\W]\\w*$', re.UNICODE)

    def isidentifier(s):
        return bool(IDENTIFIER_RE.match(s))
ASCII = (re.ASCII if (sys.version_info > (3,)) else 0)
cookie_re = re.compile('^[ \\t\\f]*#.*?coding[:=][ \\t]*([-\\w.]+)', ASCII)
blank_re = re.compile(b'^[ \\t\\f]*(?:[#\\r\\n]|$)', ASCII)

def _get_normal_name(orig_enc):
    enc = orig_enc[:12].lower().replace('_', '-')
    if ((enc == 'utf-8') or enc.startswith('utf-8-')):
        return 'utf-8'
    if ((enc in ('latin-1', 'iso-8859-1', 'iso-latin-1')) or enc.startswith(('latin-1-', 'iso-8859-1-', 'iso-latin-1-'))):
        return 'iso-8859-1'
    return orig_enc

def detect_encoding(readline):
    bom_found = False
    encoding = None
    default = 'utf-8'

    def read_or_stop():
        try:
            return readline()
        except StopIteration:
            return bytes()

    def find_cookie(line):
        try:
            line_string = line.decode('ascii')
        except UnicodeDecodeError:
            return None
        match = cookie_re.match(line_string)
        if (not match):
            return None
        encoding = _get_normal_name(match.group(1))
        try:
            codec = lookup(encoding)
        except LookupError:
            raise SyntaxError(('unknown encoding: ' + encoding))
        if bom_found:
            if (codec.name != 'utf-8'):
                raise SyntaxError('encoding problem: utf-8')
            encoding += '-sig'
        return encoding
    first = read_or_stop()
    if first.startswith(BOM_UTF8):
        bom_found = True
        first = first[3:]
        default = 'utf-8-sig'
    if (not first):
        return (default, [])
    encoding = find_cookie(first)
    if encoding:
        return (encoding, [first])
    if (not blank_re.match(first)):
        return (default, [first])
    second = read_or_stop()
    if (not second):
        return (default, [first])
    encoding = find_cookie(second)
    if encoding:
        return (encoding, [first, second])
    return (default, [first, second])

def generate_tokens(readline):
    lnum = parenlev = continued = 0
    numchars = '0123456789'
    (contstr, needcont) = ('', 0)
    contline = None
    indents = [0]
    stashed = None
    async_def = False
    async_def_indent = 0
    async_def_nl = False
    while 1:
        try:
            line = readline()
        except StopIteration:
            line = ''
        lnum = (lnum + 1)
        (pos, max) = (0, len(line))
        if contstr:
            if (not line):
                raise TokenError('EOF in multi-line string', strstart)
            endmatch = endprog.match(line)
            if endmatch:
                pos = end = endmatch.end(0)
                (yield (STRING, (contstr + line[:end]), strstart, (lnum, end), (contline + line)))
                (contstr, needcont) = ('', 0)
                contline = None
            elif (needcont and (line[(- 2):] != '\\\n') and (line[(- 3):] != '\\\r\n')):
                (yield (ERRORTOKEN, (contstr + line), strstart, (lnum, len(line)), contline))
                contstr = ''
                contline = None
                continue
            else:
                contstr = (contstr + line)
                contline = (contline + line)
                continue
        elif ((parenlev == 0) and (not continued)):
            if (not line):
                break
            column = 0
            while (pos < max):
                if (line[pos] == ' '):
                    column = (column + 1)
                elif (line[pos] == '\t'):
                    column = (((column // tabsize) + 1) * tabsize)
                elif (line[pos] == '\x0c'):
                    column = 0
                else:
                    break
                pos = (pos + 1)
            if (pos == max):
                break
            if stashed:
                (yield stashed)
                stashed = None
            if (line[pos] in '\r\n'):
                (yield (NL, line[pos:], (lnum, pos), (lnum, len(line)), line))
                continue
            if (line[pos] == '#'):
                comment_token = line[pos:].rstrip('\r\n')
                nl_pos = (pos + len(comment_token))
                (yield (COMMENT, comment_token, (lnum, pos), (lnum, (pos + len(comment_token))), line))
                (yield (NL, line[nl_pos:], (lnum, nl_pos), (lnum, len(line)), line))
                continue
            if (column > indents[(- 1)]):
                indents.append(column)
                (yield (INDENT, line[:pos], (lnum, 0), (lnum, pos), line))
            while (column < indents[(- 1)]):
                if (column not in indents):
                    raise IndentationError('unindent does not match any outer indentation level', ('<tokenize>', lnum, pos, line))
                indents = indents[:(- 1)]
                if (async_def and (async_def_indent >= indents[(- 1)])):
                    async_def = False
                    async_def_nl = False
                    async_def_indent = 0
                (yield (DEDENT, '', (lnum, pos), (lnum, pos), line))
            if (async_def and async_def_nl and (async_def_indent >= indents[(- 1)])):
                async_def = False
                async_def_nl = False
                async_def_indent = 0
        else:
            if (not line):
                raise TokenError('EOF in multi-line statement', (lnum, 0))
            continued = 0
        while (pos < max):
            pseudomatch = pseudoprog.match(line, pos)
            if pseudomatch:
                (start, end) = pseudomatch.span(1)
                (spos, epos, pos) = ((lnum, start), (lnum, end), end)
                (token, initial) = (line[start:end], line[start])
                if ((initial in numchars) or ((initial == '.') and (token != '.'))):
                    (yield (NUMBER, token, spos, epos, line))
                elif (initial in '\r\n'):
                    newline = NEWLINE
                    if (parenlev > 0):
                        newline = NL
                    elif async_def:
                        async_def_nl = True
                    if stashed:
                        (yield stashed)
                        stashed = None
                    (yield (newline, token, spos, epos, line))
                elif (initial == '#'):
                    assert (not token.endswith('\n'))
                    if stashed:
                        (yield stashed)
                        stashed = None
                    (yield (COMMENT, token, spos, epos, line))
                elif (token in triple_quoted):
                    endprog = endprogs[token]
                    endmatch = endprog.match(line, pos)
                    if endmatch:
                        pos = endmatch.end(0)
                        token = line[start:pos]
                        if stashed:
                            (yield stashed)
                            stashed = None
                        (yield (STRING, token, spos, (lnum, pos), line))
                    else:
                        strstart = (lnum, start)
                        contstr = line[start:]
                        contline = line
                        break
                elif ((initial in single_quoted) or (token[:2] in single_quoted) or (token[:3] in single_quoted)):
                    if (token[(- 1)] == '\n'):
                        strstart = (lnum, start)
                        endprog = (endprogs[initial] or endprogs[token[1]] or endprogs[token[2]])
                        (contstr, needcont) = (line[start:], 1)
                        contline = line
                        break
                    else:
                        if stashed:
                            (yield stashed)
                            stashed = None
                        (yield (STRING, token, spos, epos, line))
                elif isidentifier(initial):
                    if (token in ('async', 'await')):
                        if async_def:
                            (yield ((ASYNC if (token == 'async') else AWAIT), token, spos, epos, line))
                            continue
                    tok = (NAME, token, spos, epos, line)
                    if ((token == 'async') and (not stashed)):
                        stashed = tok
                        continue
                    if (token in ('def', 'for')):
                        if (stashed and (stashed[0] == NAME) and (stashed[1] == 'async')):
                            if (token == 'def'):
                                async_def = True
                                async_def_indent = indents[(- 1)]
                            (yield (ASYNC, stashed[1], stashed[2], stashed[3], stashed[4]))
                            stashed = None
                    if stashed:
                        (yield stashed)
                        stashed = None
                    (yield tok)
                elif (initial == '\\'):
                    if stashed:
                        (yield stashed)
                        stashed = None
                    (yield (NL, token, spos, (lnum, pos), line))
                    continued = 1
                elif (initial == '$'):
                    if stashed:
                        (yield stashed)
                        stashed = None
                    (yield (DOLLARNAME, token, spos, epos, line))
                else:
                    if (initial in '([{'):
                        parenlev = (parenlev + 1)
                    elif (initial in ')]}'):
                        parenlev = (parenlev - 1)
                    if stashed:
                        (yield stashed)
                        stashed = None
                    (yield (OP, token, spos, epos, line))
            else:
                (yield (ERRORTOKEN, line[pos], (lnum, pos), (lnum, (pos + 1)), line))
                pos = (pos + 1)
    if stashed:
        (yield stashed)
        stashed = None
    for indent in indents[1:]:
        (yield (DEDENT, '', (lnum, 0), (lnum, 0), ''))
    (yield (ENDMARKER, '', (lnum, 0), (lnum, 0), ''))
if (__name__ == '__main__'):
    import sys
    if (len(sys.argv) > 1):
        tokenize(open(sys.argv[1]).readline)
    else:
        tokenize(sys.stdin.readline)
