
from __future__ import print_function, division
import sys
import os
import subprocess
import csv
if (sys.version_info < (3,)):
    from urlparse import urlparse
    from urllib import url2pathname
else:
    from urllib.parse import urlparse
    from urllib.request import url2pathname
from buildtools import discover
from buildtools import install
from buildtools.version import executable, extractor_executable
INCLUDE_TAG = 'LGTM_INDEX_INCLUDE'
EXCLUDE_TAG = 'LGTM_INDEX_EXCLUDE'
FILTER_TAG = 'LGTM_INDEX_FILTERS'
PATH_TAG = 'LGTM_INDEX_IMPORT_PATH'
REPO_FOLDERS_TAG = 'LGTM_REPOSITORY_FOLDERS_CSV'
REPO_EXCLUDE_KINDS = ('metadata', 'external')

def trap_cache():
    return os.path.join(os.environ['LGTM_WORKSPACE'], 'trap_cache')

def split_into_options(lines, opt):
    opts = []
    for line in lines.split('\n'):
        line = line.strip()
        if line:
            opts.append(opt)
            opts.append(line)
    return opts

def get_include_options():
    if (INCLUDE_TAG in os.environ):
        return split_into_options(os.environ[INCLUDE_TAG], '-R')
    else:
        src = os.environ['LGTM_SRC']
        return ['-R', src]

def get_exclude_options():
    options = []
    if (EXCLUDE_TAG in os.environ):
        options.extend(split_into_options(os.environ[EXCLUDE_TAG], '-Y'))
    if (REPO_FOLDERS_TAG not in os.environ):
        return options
    with open(os.environ[REPO_FOLDERS_TAG]) as csv_file:
        csv_reader = csv.reader(csv_file)
        next(csv_reader)
        for (kind, url) in csv_reader:
            if (kind not in REPO_EXCLUDE_KINDS):
                continue
            try:
                path = url2pathname(urlparse(url).path)
            except:
                print((("Unable to parse '" + url) + "' as file url."))
            else:
                options.append('-Y')
                options.append(path)
    return options

def get_filter_options():
    if (FILTER_TAG in os.environ):
        return split_into_options(os.environ[FILTER_TAG], '--filter')
    else:
        return []

def get_path_options(version):
    path_option = ['-p', install.get_library(version)]
    if (PATH_TAG in os.environ):
        path_option = (split_into_options(os.environ[PATH_TAG], '-p') + path_option)
    return path_option

def get_stdlib():
    return os.path.dirname(os.__file__)

def exclude_pip_21_3_build_dir_options():
    if os.environ.get('CODEQL_EXTRACTOR_PYTHON_DISABLE_AUTOMATIC_PIP_BUILD_DIR_EXCLUDE'):
        return []
    include_dirs = set(get_include_options()[1::2])
    exclude_dirs = set((os.path.abspath(path) for path in get_exclude_options()[1::2]))
    to_exclude = list()

    def walk_dir(dirpath):
        if (os.path.abspath(dirpath) in exclude_dirs):
            return
        contents = os.listdir(dirpath)
        paths = [os.path.join(dirpath, c) for c in contents]
        dirs = [path for path in paths if os.path.isdir(path)]
        dirnames = [os.path.basename(path) for path in dirs]
        if ('__init__.py' in contents):
            return
        if ((os.path.basename(dirpath) == 'build') and ('lib' in dirnames) and (dirs == paths)):
            to_exclude.append(dirpath)
            return
        for dir in dirs:
            if (not os.path.islink(dir)):
                walk_dir(dir)
    for top in include_dirs:
        walk_dir(top)
    options = []
    if to_exclude:
        print('Excluding the following directories from extraction, since they look like in-tree build directories generated by pip: {}'.format(to_exclude))
        print('You can disable this behavior by setting the environment variable CODEQL_EXTRACTOR_PYTHON_DISABLE_AUTOMATIC_PIP_BUILD_DIR_EXCLUDE=1')
        for dirpath in to_exclude:
            options.append('-Y')
            options.append(dirpath)
    return options

def exclude_venvs_options():
    if os.environ.get('CODEQL_EXTRACTOR_PYTHON_DISABLE_AUTOMATIC_VENV_EXCLUDE'):
        return []
    include_dirs = set(get_include_options()[1::2])
    exclude_dirs = set((os.path.abspath(path) for path in get_exclude_options()[1::2]))
    to_exclude = []

    def walk_dir(dirpath):
        if (os.path.abspath(dirpath) in exclude_dirs):
            return
        paths = [os.path.join(dirpath, c) for c in os.listdir(dirpath)]
        dirs = [path for path in paths if os.path.isdir(path)]
        dirnames = [os.path.basename(path) for path in dirs]
        if ('Lib' in dirnames):
            has_site_packages_folder = os.path.exists(os.path.join(dirpath, 'Lib', 'site-packages'))
        elif ('lib' in dirnames):
            lib_path = os.path.join(dirpath, 'lib')
            python_folders = [dirname for dirname in os.listdir(lib_path) if dirname.startswith('python')]
            has_site_packages_folder = (bool(python_folders) and any((os.path.exists(os.path.join(dirpath, 'lib', python_folder, 'site-packages')) for python_folder in python_folders)))
        else:
            has_site_packages_folder = False
        if has_site_packages_folder:
            to_exclude.append(dirpath)
            return
        for dir in dirs:
            if (not os.path.islink(dir)):
                walk_dir(dir)
    for top in include_dirs:
        walk_dir(top)
    options = []
    if to_exclude:
        print('Excluding the following directories from extraction, since they look like virtual environments: {}'.format(to_exclude))
        print('You can disable this behavior by setting the environment variable CODEQL_EXTRACTOR_PYTHON_DISABLE_AUTOMATIC_VENV_EXCLUDE=1')
        for dirpath in to_exclude:
            options.append('-Y')
            options.append(dirpath)
    return options

def extractor_options(version):
    options = []
    options += ['-v']
    options += ['-z', 'all']
    options += ['-c', trap_cache()]
    options += get_path_options(version)
    options += get_include_options()
    options += get_exclude_options()
    options += get_filter_options()
    options += exclude_pip_21_3_build_dir_options()
    options += exclude_venvs_options()
    return options

def site_flag(version):
    if os.environ.get('CODEQL_EXTRACTOR_PYTHON_ENABLE_SITE'):
        return []
    try:
        args = (executable(version) + ['-S', '-c', 'import gzip'])
        subprocess.check_call(args)
        return ['-S']
    except (subprocess.CalledProcessError, Exception):
        print('Running without -S')
        return []

def get_analysis_version(major_version):
    if ('CODEQL_EXTRACTOR_PYTHON_ANALYSIS_VERSION' in os.environ):
        return os.environ['CODEQL_EXTRACTOR_PYTHON_ANALYSIS_VERSION']
    elif (major_version == 2):
        return '2.7.18'
    else:
        return '3.11'

def main():
    version = discover.get_version()
    tracer = os.path.join(os.environ['SEMMLE_DIST'], 'tools', 'python_tracer.py')
    args = (((extractor_executable() + site_flag(3)) + [tracer]) + extractor_options(version))
    print(('Calling ' + ' '.join(args)))
    sys.stdout.flush()
    sys.stderr.flush()
    env = os.environ.copy()
    env['CODEQL_EXTRACTOR_PYTHON_ANALYSIS_VERSION'] = get_analysis_version(version)
    subprocess.check_call(args, env=env)
if (__name__ == '__main__'):
    main()
