
import ast
import sys
from types import ModuleType, GetSetDescriptorType
import hashlib
import os
from semmle.python import ast
from semmle.python.passes._pass import Pass
from semmle.util import unicode, str_to_unicode, get_analysis_major_version
from semmle.python.passes.ast_pass import iter_fields
from semmle.cmdline import is_legal_module_name
'\nThe QL library depends on a reasonable one-to-one correspondence\nbetween DB entities and Python objects. However, since QL has only\none notion of equality, but Python has two (`__eq__` and `is`) we need to be careful.\nWhat we want to do is to treat objects like builtin functions and classes as using\nreference equality and numbers and strings as using value equality.\n\nIn practice this is impossible as we want to distinguish `True` from `1` from `1.0`\neven though all these values are equal. However, we want to get as close as possible.\n\n'
__all__ = ['ObjectPass']
OBJECT_TYPES = set([ast.ClassExpr, ast.Call, ast.FunctionExpr, ast.Tuple, ast.Str, ast.Num, ast.List, ast.ListComp, ast.Module, ast.Dict, ast.Ellipsis, ast.Lambda])
if (sys.hexversion >= 34013184):
    OBJECT_TYPES.add(ast.DictComp)
    OBJECT_TYPES.add(ast.SetComp)
    OBJECT_TYPES.add(ast.Set)
if (sys.hexversion >= 50331648):
    unicode = str
NUMERIC_TYPES = set([int, float, bool])
if (sys.hexversion < 50331648):
    NUMERIC_TYPES.add(long)
if (sys.hexversion >= 50331648):
    BUILTINS_NAME = 'builtins'
else:
    BUILTINS_NAME = b'__builtin__'
LITERALS = (ast.Num, ast.Str)

class _CObject(object):
    __slots__ = ['obj']

    def __init__(self, obj):
        self.obj = obj

    def __eq__(self, other):
        if isinstance(other, _CObject):
            return (self.obj is other.obj)
        else:
            return False

    def __ne__(self, other):
        return (not self.__eq__(other))

    def __hash__(self):
        return id(self.obj)

class ObjectPass(Pass):

    def extract(self, ast, path, writer):
        self.writer = writer
        try:
            self._extract_py(ast)
            self._extract_possible_module_names(path)
        finally:
            self.writer = None

    def _extract_possible_module_names(self, path):
        (maybe_name, _) = os.path.splitext(path)
        maybe_name = maybe_name.replace(os.sep, '.')
        while (maybe_name.count('.') > 3):
            (_, maybe_name) = maybe_name.split('.', 1)
        while True:
            if is_legal_module_name(maybe_name):
                self._write_module_and_package_names(maybe_name)
            if ('.' not in maybe_name):
                return
            (_, maybe_name) = maybe_name.split('.', 1)

    def _write_module_and_package_names(self, module_name):
        self._write_c_object(module_name, None, False)
        while ('.' in module_name):
            (module_name, _) = module_name.rsplit('.', 1)
            self._write_c_object(module_name, None, False)

    def extract_builtin(self, module, writer):
        self.writer = writer
        try:
            self._extract_c(module)
        finally:
            self.writer = None

    def _extract_c(self, mod):
        self.next_address_label = 0
        self.address_labels = {}
        self._write_c_object(mod, None, False)
        self.address_labels = None

    def _write_str(self, s):
        assert (type(s) is str)
        self._write_c_object(s, None, False)

    def _write_c_object(self, obj, label, write_special, string_prefix=''):
        ANALYSIS_MAJOR_VERSION = get_analysis_major_version()
        obj_type = type(obj)
        if ((obj_type == str) and (ANALYSIS_MAJOR_VERSION == 2) and ('u' not in string_prefix)):
            obj_type = bytes
        cobj = _CObject(obj)
        if self.writer.has_written(cobj):
            return self.writer.get_node_id(cobj)
        obj_label = self.get_label_for_object(obj, label, obj_type)
        obj_id = self.writer.get_labelled_id(cobj, obj_label)
        if ((not write_special) and (cobj in SPECIAL_OBJECTS)):
            return obj_id
        type_id = self._write_c_object(obj_type, None, write_special)
        self.writer.write_tuple('py_cobjects', 'r', obj_id)
        self.writer.write_tuple('py_cobjecttypes', 'rr', obj_id, type_id)
        self.writer.write_tuple('py_cobject_sources', 'rd', obj_id, 0)
        if (isinstance(obj, ModuleType) or isinstance(obj, type)):
            for (name, value) in sorted(obj.__dict__.items()):
                if ((obj, name) in BLACKLIST):
                    continue
                val_id = self._write_c_object(value, ((obj_label + ('$%d' % ANALYSIS_MAJOR_VERSION)) + str_to_unicode(name)), write_special)
                self.writer.write_tuple('py_cmembers_versioned', 'rsrs', obj_id, name, val_id, ANALYSIS_MAJOR_VERSION)
            if (isinstance(obj, type) and (obj is not object)):
                super_id = self._write_c_object(obj.__mro__[1], None, write_special)
                self.writer.write_tuple('py_cmembers_versioned', 'rsrs', obj_id, '.super.', super_id, ANALYSIS_MAJOR_VERSION)
        if isinstance(obj, (list, tuple)):
            for (index, item) in enumerate(obj):
                item_id = self._write_c_object(item, ((obj_label + '$') + unicode(index)), write_special)
                self.writer.write_tuple('py_citems', 'rdr', obj_id, index, item_id)
        if (type(obj) is GetSetDescriptorType):
            for name in type(obj).__dict__:
                if ((name == '__name__') or (not hasattr(obj, name))):
                    continue
                val_id = self._write_c_object(getattr(obj, name), ((obj_label + ('$%d' % ANALYSIS_MAJOR_VERSION)) + str_to_unicode(name)), write_special)
                self.writer.write_tuple('py_cmembers_versioned', 'rsrs', obj_id, name, val_id, ANALYSIS_MAJOR_VERSION)
        if hasattr(obj, '__name__'):
            if isinstance(obj, type):
                name = qualified_type_name(obj)
            elif (isinstance(obj, ModuleType) and (obj.__name__ == 'io')):
                name = '_io'
            elif (obj is EXEC):
                name = 'exec'
            else:
                name = obj.__name__
            self.writer.write_tuple('py_cobjectnames', 'rs', obj_id, name)
        elif (type(obj) in NUMERIC_TYPES):
            self.writer.write_tuple('py_cobjectnames', 'rq', obj_id, obj)
        elif (type(obj) is str):
            if ('b' in string_prefix):
                prefix = 'b'
            elif ('u' in string_prefix):
                prefix = 'u'
            elif (ANALYSIS_MAJOR_VERSION == 2):
                prefix = 'b'
            else:
                prefix = 'u'
            self.writer.write_tuple('py_cobjectnames', 'rs', obj_id, (((prefix + "'") + obj) + "'"))
        elif (type(obj) is bytes):
            obj_string = (("b'" + obj.decode('latin-1')) + "'")
            self.writer.write_tuple('py_cobjectnames', 'rs', obj_id, obj_string)
        elif (type(obj) is type(None)):
            self.writer.write_tuple('py_cobjectnames', 'rs', obj_id, 'None')
        else:
            self.writer.write_tuple('py_cobjectnames', 'rs', obj_id, 'object')
        return obj_id

    def write_special_objects(self, writer):
        self.writer = writer
        self.next_address_label = 0
        self.address_labels = {}

        def write(obj, name, label=None):
            obj_id = self._write_c_object(obj, label, True)
            self.writer.write_tuple('py_special_objects', 'rs', obj_id, name)
        for (obj, name) in SPECIAL_OBJECTS.items():
            write(obj.obj, name)
        write(object(), '_semmle_unknown_type', '$_semmle_unknown_type')
        write(object(), '_semmle_undefined_value', '$_semmle_undefined_value')
        self.writer = None
        self.address_labels = None

    def get_label_for_object(self, obj, default_label, obj_type):
        if (obj is None):
            return 'C_None'
        t = type(obj)
        t_name = str_to_unicode(t.__name__)
        if ((t is tuple) and (len(obj) == 0)):
            return 'C_EmptyTuple'
        if (obj_type is str):
            prefix = 'C_unicode$'
        else:
            prefix = 'C_bytes$'
        if (t is str):
            obj = obj.encode('utf8', errors='replace')
            return (prefix + hashlib.sha1(obj).hexdigest())
        if (t is bytes):
            return (prefix + hashlib.sha1(obj).hexdigest())
        if (t in NUMERIC_TYPES):
            return ((('C_' + t_name) + '$') + str_to_unicode(repr(obj)))
        try:
            if isinstance(obj, type):
                return ((('C_' + t_name) + '$') + qualified_type_name(obj))
        except Exception:
            return default_label
        if (t is ModuleType):
            return ((('C_' + t_name) + '$') + str_to_unicode(obj.__name__))
        if (t is type(len)):
            mod_name = obj.__module__
            if isinstance(mod_name, str):
                if (mod_name == BUILTINS_NAME):
                    mod_name = 'builtins'
                    return ((('C_' + t_name) + '$') + str_to_unicode(((mod_name + '.') + obj.__name__)))
        return default_label

    def _extract_py(self, ast):
        self._walk_py(ast)

    def _write_literal(self, node):
        if isinstance(node, ast.Num):
            self._write_c_object(node.n, None, False)
        else:
            prefix = getattr(node, 'prefix', '')
            if ('u' not in prefix):
                try:
                    self._write_c_object(node.s.encode('latin-1'), None, False, string_prefix=prefix)
                except UnicodeEncodeError:
                    pass
            if ('b' not in prefix):
                self._write_c_object(node.s, None, False, string_prefix=prefix)

    def _walk_py(self, node):
        if isinstance(node, ast.AstBase):
            if isinstance(node, LITERALS):
                self._write_literal(node)
            else:
                for (_, _, child_node) in iter_fields(node):
                    self._walk_py(child_node)
        elif isinstance(node, list):
            for n in node:
                self._walk_py(n)

def a_function():
    pass

def a_generator_function():
    (yield None)

class C(object):

    def meth(self):
        pass
EXEC = [].append
SPECIAL_OBJECTS = {type(a_function): 'FunctionType', type(len): 'BuiltinFunctionType', classmethod: 'ClassMethod', staticmethod: 'StaticMethod', type(sys): 'ModuleType', type(a_generator_function()): 'generator', None: 'None', type(None): 'NoneType', True: 'True', False: 'False', bool: 'bool', sys: 'sys', Exception: 'Exception', BaseException: 'BaseException', TypeError: 'TypeError', AttributeError: 'AttributeError', KeyError: 'KeyError', int: 'int', float: 'float', object: 'object', type: 'type', tuple: 'tuple', dict: 'dict', list: 'list', set: 'set', locals: 'locals', globals: 'globals', property: 'property', type(list.append): 'MethodDescriptorType', super: 'super', type(C().meth): 'MethodType', object(): '_1', object(): '_2', b'2': 'b2', b'3': 'b3', '2': 'u2', '3': 'u3'}
SPECIAL_OBJECTS[__import__(BUILTINS_NAME)] = 'builtin_module'
SPECIAL_OBJECTS[str] = 'unicode'
SPECIAL_OBJECTS[bytes] = 'bytes'
tmp = {}
for (obj, name) in SPECIAL_OBJECTS.items():
    tmp[_CObject(obj)] = name
SPECIAL_OBJECTS = tmp
del tmp
BLACKLIST = set([(sys, 'exc_value'), (sys, 'exc_type'), (sys, 'exc_traceback'), (__import__(BUILTINS_NAME), '_')])

def qualified_type_name(cls):
    if (cls is bytes):
        return 'bytes'
    if (cls is unicode):
        return 'unicode'
    if ((cls.__module__ == BUILTINS_NAME) or (cls.__module__ == 'exceptions')):
        return str_to_unicode(cls.__name__)
    else:
        return str_to_unicode(((cls.__module__ + '.') + cls.__name__))
