import gradio as gr import urllib.request import requests import bs4 import lxml import os #import subprocess from huggingface_hub import InferenceClient,HfApi import random import json import datetime #from query import tasks from agent import ( FINDER, COMPRESS_HISTORY_PROMPT, COMPRESS_DATA_PROMPT, COMPRESS_DATA_PROMPT_SMALL, LOG_PROMPT, LOG_RESPONSE, PREFIX, TASK_PROMPT, ) api=HfApi() client = InferenceClient( "mistralai/Mixtral-8x7B-Instruct-v0.1" ) def parse_action(string: str): print("PARSING:") print(string) assert string.startswith("action:") idx = string.find("action_input=") print(idx) if idx == -1: print ("idx == -1") print (string[8:]) return string[8:], None print ("last return:") print (string[8 : idx - 1]) print (string[idx + 13 :].strip("'").strip('"')) return string[8 : idx - 1], string[idx + 13 :].strip("'").strip('"') VERBOSE = True MAX_HISTORY = 100 MAX_DATA = 20000 def format_prompt(message, history): prompt = "" for user_prompt, bot_response in history: prompt += f"[INST] {user_prompt} [/INST]" prompt += f" {bot_response} " prompt += f"[INST] {message} [/INST]" return prompt def call_search(purpose, task, history, action_input): return_list=[] print (action_input) #if action_input in query.tasks: print ("trying") try: if action_input != "" and action_input != None: action_input.strip('""') #model_list = api.list_models(filter=f"{action_input}",sort="last_modified",limit=1000,direction=-1) #model_list = api.list_models(filter=f"{action_input}",limit=1000) model_list = api.list_models(filter=f"{action_input}") this_obj = list(model_list) print(f'THIS_OBJ :: {this_obj[0]}') for i,eb in enumerate(this_obj): #return_list.append(this_obj[i].id) return_list.append({"id":this_obj[i].id, "author":this_obj[i].author, "created_at":this_obj[i].created_at, "last_modified":this_obj[i].last_modified, "private":this_obj[i].private, "gated":this_obj[i].gated, "disabled":this_obj[i].disabled, "downloads":this_obj[i].downloads, "likes":this_obj[i].likes, "library_name":this_obj[i].library_name, "tags":this_obj[i].tags, "pipeline_tag":this_obj[i].pipeline_tag, }) #print (return_list) c=0 rl = len(return_list) print(rl) for i in str(return_list): if i == " " or i==",": c +=1 print (c) if rl > MAX_DATA: print("compressing...") return_list = compress_data(rl,purpose,task,return_list) history = "observation: the search results are:\n {}\n".format(return_list) return "MAIN", None, history, task else: history = "observation: I need to trigger a search using the following syntax:\naction: SEARCH action_input=URL\n" return "UPDATE-TASK", None, history, task except Exception as e: print (e) history = "observation: I need to trigger a search using the following syntax:\naction: SEARCH action_input=URL\n" return "UPDATE-TASK", None, history, task #else: # history = "observation: The search query I used did not return a valid response" return "MAIN", None, history, task def run_gpt( prompt_template, stop_tokens, max_tokens, seed, purpose, **prompt_kwargs, ): timestamp=datetime.datetime.now() print(seed) generate_kwargs = dict( temperature=0.9, max_new_tokens=max_tokens, top_p=0.95, repetition_penalty=1.0, do_sample=True, seed=seed, ) content = PREFIX.format( timestamp=timestamp, purpose=purpose, ) + prompt_template.format(**prompt_kwargs) if VERBOSE: print(LOG_PROMPT.format(content)) #formatted_prompt = format_prompt(f"{system_prompt}, {prompt}", history) #formatted_prompt = format_prompt(f'{content}', history) stream = client.text_generation(content, **generate_kwargs, stream=True, details=True, return_full_text=False) resp = "" for response in stream: resp += response.token.text #yield resp if VERBOSE: print(LOG_RESPONSE.format(resp)) return resp def compress_data(c,purpose, task, history): seed=random.randint(1,1000000000) print (c) #tot=len(purpose) #print(tot) divr=int(c)/MAX_DATA divi=int(divr)+1 if divr != int(divr) else int(divr) chunk = int(int(c)/divr) print(f'chunk:: {chunk}') print(f'divr:: {divr}') print (f'divi:: {divi}') out = [] #out="" s=0 e=chunk print(f'e:: {e}') new_history="" task = f'Compile this data to fulfill the task: {task}, and complete the purpose: {purpose}\n' for z in range(divi): print(f's:e :: {s}:{e}') hist = history[s:e] resp = run_gpt( COMPRESS_DATA_PROMPT_SMALL, stop_tokens=["observation:", "task:", "action:", "thought:"], max_tokens=2048, seed=seed, purpose=purpose, task=task, knowledge=new_history, history=hist, ) new_history = resp print (resp) out+=resp e=e+chunk s=s+chunk ''' resp = run_gpt( COMPRESS_DATA_PROMPT, stop_tokens=["observation:", "task:", "action:", "thought:"], max_tokens=1024, seed=seed, purpose=purpose, task=task, knowledge=new_history, history="All data has been recieved.", )''' print ("final" + resp) history = "observation: {}\n".format(resp) return history def compress_history(purpose, task, history): resp = run_gpt( COMPRESS_HISTORY_PROMPT, stop_tokens=["observation:", "task:", "action:", "thought:"], max_tokens=512, seed=random.randint(1,1000000000), purpose=purpose, task=task, history=history, ) history = "observation: {}\n".format(resp) return history def call_main(purpose, task, history, action_input): resp = run_gpt( FINDER, stop_tokens=["observation:", "task:"], max_tokens=2048, seed=random.randint(1,1000000000), purpose=purpose, task=task, history=history, ) lines = resp.strip().strip("\n").split("\n") for line in lines: if line == "": continue if line.startswith("thought: "): history += "{}\n".format(line) if line.startswith("action: COMPLETE"): print("COMPLETE called") return "COMPLETE", None, history, task if line.startswith("action: "): action_name, action_input = parse_action(line) print(f'ACTION::{action_name} -- INPUT :: {action_input}') history += "{}\n".format(line) return action_name, action_input,history,task else: #pass history += "{}\n".format(line) #assert False, "unknown action: {}".format(line) #return "UPDATE-TASK", None, history, task if "VERBOSE": print(history) #action_name="MAIN" if not action_name else action_name return "MAIN", None, history, task def call_set_task(purpose, task, history, action_input): task = run_gpt( TASK_PROMPT, stop_tokens=[], max_tokens=1024, seed=random.randint(1,1000000000), purpose=purpose, task=task, history=history, ).strip("\n") history += "observation: task has been updated to: {}\n".format(task) return "MAIN", None, history, task ########################################################### def search_all(url): source="" return source def find_all(purpose,task,history, url): return_list=[] print (url) #if action_input in query.tasks: print (f"trying URL:: {url}") try: if url != "" and url != None: #rawp = [] out = [] source = requests.get(url) #source = urllib.request.urlopen(url).read() soup = bs4.BeautifulSoup(source.content,'lxml') # title of the page print(soup.title) # get attributes: print(soup.title.name) # get values: print(soup.title.string) # beginning navigation: print(soup.title.parent.name) #rawp.append([tag.name for tag in soup.find_all()] ) print([tag.name for tag in soup.find_all()]) rawp=(f'RAW TEXT RETURNED: {soup.text}') #out.append(rawp) q=("a","p","span","content","article") for p in soup.find_all(q): out.append([{p.name:p.string,"parent":p.parent.name,"previous":p.previous,"first-child":[b.name for b in p.children],"content":p}]) #c=0 #out = str(out) #rl = len(out) #print(f'rl:: {rl}') ##for ea in out: #for i in str(out): # if i == " " or i=="," or i=="\n": # c +=1 #print (f'c:: {c}') rl=len(rawp) print (rl) #if rl > MAX_DATA: # print("compressing...") rawp = compress_data(rl,purpose,task,rawp) print (rawp) print (f'out:: {out}') history = "observation: the search results are:\n {}\n".format(rawp) task = "complete?" return "MAIN", None, history, task else: history += "observation: I need to trigger a search using the following syntax:\naction: SCRAPE_WEBSITE action_input=URL\n" return "MAIN", None, history, task except Exception as e: print (e) history += "observation: I need to trigger a search using the following syntax:\naction: SCRAPE_WEBSITE action_input=URL\n" return "MAIN", None, history, task #else: # history = "observation: The search query I used did not return a valid response" return "MAIN", None, history, task ################################# NAME_TO_FUNC = { "MAIN": call_main, "UPDATE-TASK": call_set_task, "SEARCH_ENGINE": find_all, "SCRAPE_WEBSITE": find_all, } def run_action(purpose, task, history, action_name, action_input): if action_name == "COMPLETE": print("Complete - Exiting") #exit(0) return "COMPLETE", None, history, task # compress the history when it is long if len(history.split("\n")) > MAX_HISTORY: if VERBOSE: print("COMPRESSING HISTORY") history = compress_history(purpose, task, history) if action_name in NAME_TO_FUNC: assert action_name in NAME_TO_FUNC print(f"RUN: {action_name} ACTION_INPUT: {action_input}") return NAME_TO_FUNC[action_name](purpose, task, history, action_input) else: history += "observation: The TOOL I tried to use returned an error, I need to select a tool from: (UPDATE-TASK, SEARCH_ENGINE, SCRAPE_WEBSITE, COMPLETE)\n" return "MAIN", None, history, task def run(purpose,history,data=None,file=None,url=None,pdf_url=None,pdf_batch=None): task=None #history = "" if history: history=format_prompt(purpose, history) else: history="" action_name = "SEARCH_ENGINE" if task is None else "MAIN" action_input = None task = "Use search engine tool to search for more information" while True: print("") print("") print("---") print("purpose:", purpose) print("task:", task) print("---") #print(history) print("---") action_name, action_input, history, task = run_action( purpose, task, history, action_name, action_input, ) yield None,[(purpose,history)],None if action_name == "COMPLETE": return None,[(purpose,history)],None def clear_fn(): return "",[(None,None)] with gr.Blocks() as app: gr.HTML("""

Mixtral 8x7B TLDR Summarizer + Web

Summarize Data of unlimited length

""") chatbot = gr.Chatbot() with gr.Row(): with gr.Column(scale=3): prompt=gr.Textbox(label = "Instructions (optional)") with gr.Column(scale=1): button=gr.Button() #models_dd=gr.Dropdown(choices=[m for m in return_list],interactive=True) with gr.Row(): stop_button=gr.Button("Stop") clear_btn = gr.Button("Clear") with gr.Row(): with gr.Tab("Text"): data=gr.Textbox(label="Input Data (paste text)", lines=6) with gr.Tab("File"): file=gr.Files(label="Input File (.pdf .txt)") with gr.Tab("Raw HTML"): url = gr.Textbox(label="URL") with gr.Tab("PDF URL"): pdf_url = gr.Textbox(label="PDF URL") with gr.Tab("PDF Batch"): pdf_batch = gr.Textbox(label="PDF Batch (comma separated)") e_box=gr.Textbox() #text=gr.JSON() #inp_query.change(search_models,inp_query,models_dd) clear_btn.click(clear_fn,None,[prompt,chatbot]) go=button.click(run,[prompt,chatbot,data,file,url,pdf_url,pdf_batch],[prompt,chatbot,e_box]) stop_button.click(None,None,None,cancels=[go]) app.launch(server_port=7861,show_api=False,share=False)