In [None]:
import pandas as pd 
import numpy as np
import os

%load_ext autoreload
%autoreload 2

import sys
sys.path.append(os.path.dirname(os.getcwd()))

from dotenv import load_dotenv
load_dotenv()

In [None]:
from climateqa.engine.llm import get_llm
from climateqa.engine.vectorstore import get_vectorstore
from climateqa.engine.embeddings import get_embeddings_function
from climateqa.engine.reranker import get_reranker
from climateqa.engine.graph import make_graph_agent, display_graph


## LLM

In [None]:
from climateqa.engine.llm import get_llm
    
llm = get_llm(provider="openai")
llm.invoke("Say Hello !")


## Retriever 

In [None]:
from climateqa.engine.vectorstore import get_vectorstore
from climateqa.engine.embeddings import get_embeddings_function


question = "What is the impact of climate change on the environment?"

embeddings_function = get_embeddings_function()
vectorstore_ipcc = get_vectorstore(embeddings = embeddings_function, provider = "azure_search", index_name="climateqa-ipx")
docs_question = vectorstore_ipcc.search(query = question, search_type="similarity")
docs_question

In [None]:
# optional filters for Azure Search
sources_owid = ["OWID"]
filters = {"source": sources_owid}

# vectorestore_graphs
vectorstore_graphs = get_vectorstore(provider="azure_search", embeddings=embeddings_function, index_name="climateqa-owid", text_key="description")
owid_graphs = vectorstore_graphs.similarity_search_with_score(query = question, filter=filters, k=5)
owid_graphs

## Reranker

In [None]:
from climateqa.engine.reranker import get_reranker
from climateqa.engine.reranker import rerank_docs

reranker = get_reranker("nano")
reranked_docs_question = rerank_docs(reranker,docs_question,question)
reranked_docs_question

# Graph

In [None]:
from climateqa.engine.graph import make_graph_agent, display_graph

llm = get_llm(provider="openai")
embeddings_function = get_embeddings_function()
vectorstore_ipcc = get_vectorstore(embeddings = embeddings_function, provider = "azure_search", index_name="climateqa-ipx")
vectorstore_graphs = get_vectorstore(provider="azure_search", embeddings=embeddings_function, index_name="climateqa-owid", text_key="description")
vectorstore_region = get_vectorstore(provider="azure_search", embeddings=embeddings_function, index_name="climateqa-v2")
reranker = get_reranker("nano")

app = make_graph_agent(llm=llm, vectorstore_ipcc=vectorstore_ipcc, vectorstore_graphs=vectorstore_graphs, vectorstore_region=vectorstore_region, reranker=reranker)
display_graph(app)

In [None]:
from climateqa.engine.graph import search 

from climateqa.engine.chains.intent_categorization import make_intent_categorization_node


from climateqa.engine.chains.answer_chitchat import make_chitchat_node
from climateqa.engine.chains.answer_ai_impact import make_ai_impact_node
from climateqa.engine.chains.query_transformation import make_query_transform_node
from climateqa.engine.chains.translation import make_translation_node
from climateqa.engine.chains.retrieve_documents import make_IPx_retriever_node, make_POC_retriever_node
from climateqa.engine.chains.answer_rag import make_rag_node
from climateqa.engine.chains.graph_retriever import make_graph_retriever_node
from climateqa.engine.chains.chitchat_categorization import make_chitchat_intent_categorization_node
from climateqa.engine.chains.prompts import audience_prompts
from climateqa.engine.graph import route_intent


In [None]:
inial_state = {
    # "user_input": "What is the impact of climate change on the environment?", 
    "user_input": "Quel est l'impact du changement climatique sur Bordeaux ?",
    "audience" : audience_prompts["general"],
    # "sources_input":["IPCC"],
    "relevant_content_sources_selection": ["Figures (IPCC/IPBES)","POC region"],
    "search_only" : False,
    "reports": [],
}
state=inial_state.copy()

In [None]:
cat_node = make_intent_categorization_node(llm)
state.update(cat_node(inial_state))
state

In [None]:
intent = route_intent(state)

if route_intent(state) == "translate_query":
    make_translation_node(llm)(state)

In [None]:
state.update(make_query_transform_node(llm)(state))
state

In [None]:
docs = await make_IPx_retriever_node(vectorstore_ipcc, reranker, llm)(state, {})
docs

In [None]:
from climateqa.engine.chains.graph_retriever import make_graph_retriever_node

node_retrieve_graphs = make_graph_retriever_node(vectorstore_graphs, reranker)

state = {'questions_list': [
  {'question': 'What are the consequences of climate change on the environment and economy of Bordeaux?',
   'sources': ['OWID'],
   'index': 'IPx',
   'source_type': 'IPx'}]}

docs = await node_retrieve_graphs(state)
docs


In [None]:
retriever_node = make_POC_retriever_node(vectorstore_ipcc, reranker, llm)
# retriever_node

In [None]:
new_state = state.copy()
evolutions_states = []
while len(new_state["questions_list"])>0:    
    async for temp_state in retriever_node.astream(new_state):
        evolutions_states.append(temp_state)
        new_state.update(temp_state)
        print(temp_state)

In [None]:
answer_rag = await make_rag_node(llm)(new_state,{})
new_state.update(answer_rag)

# stream event of the whole chain

In [None]:

from climateqa.engine.graph import make_graph_agent, display_graph
from climateqa.engine.chains.prompts import audience_prompts


inial_state = {
    # "user_input": "What is the impact of climate change on the environment?", 
    # "user_input": "What is the impact of climate  in Bordeaux", 
    "user_input": "What will be the precipitation in Bordeaux in 2050?", 
    "audience" : audience_prompts["general"],
    "sources_input":["IPCC"],
    # "relevant_content_sources_selection": ["Figures (IPCC/IPBES)","POC region"],
    "relevant_content_sources_selection": [],
    "search_only" : False,
    "reports": [],
}
app = make_graph_agent(llm=llm, vectorstore_ipcc=vectorstore_ipcc, vectorstore_graphs=vectorstore_graphs, vectorstore_region=vectorstore_region, reranker=reranker)

inial_state

In [None]:
event_list = app.astream_events(inial_state, version = "v1")
static_event_list = []
async for event in event_list:
    static_event_list.append(event)

In [None]:
static_event_list[-1]

In [None]:
# Get the answer at the end
from climateqa.handle_stream_events import stream_answer
event_list = app.astream_events(inial_state, version = "v1")
history = []
start_streaming = False
answer_message_content = ""
async for event in event_list:

    if "langgraph_node" in event["metadata"]:
        node = event["metadata"]["langgraph_node"]

        if (event["name"] != "transform_query" and 
                      event["event"] == "on_chat_model_stream" and
                      node in ["answer_rag","answer_rag_no_docs", "answer_search", "answer_chitchat"]):
                    history, start_streaming, answer_message_content = stream_answer(
                        history, event, start_streaming, answer_message_content
                    )

# Test events logs


In [None]:
inial_state = {'user_input': 'What is the impact of climate  in Bordeaux',
 'audience': 'the general public who know the basics in science and climate change and want to learn more about it without technical terms. Still use references to passages.',
 'sources_input': ['IPCC'],
 'relevant_content_sources_selection': ['Figures (IPCC/IPBES)', 'POC region'],
 'search_only': False,
 'reports': []
 }

In [None]:
# Get the answer at the end
from climateqa.handle_stream_events import stream_answer
app = make_graph_agent(llm=llm, vectorstore_ipcc=vectorstore_ipcc, vectorstore_graphs=vectorstore_graphs, vectorstore_region=vectorstore_region, reranker=reranker)

event_list = app.astream_events(inial_state, version = "v1")
history = []
start_streaming = False
answer_message_content = ""
static_event_list = []
async for event in event_list:
    static_event_list.append(event)

In [None]:
df_static_events = pd.DataFrame(static_event_list)

In [None]:
df_static_events.head()

In [None]:
df_static_events["name"].unique()

In [None]:
selected_events = df_static_events[
    (df_static_events["event"] == "on_chain_end") &
    (df_static_events["name"].isin(["retrieve_documents", "retrieve_local_data", "retrieve_POC_docs_node","retrieve_IPx_docs"]))
    # (df_static_events["data"].apply(lambda x: x["output"] is not None))
]
selected_events

In [None]:
# selected_events[selected_events["data"].apply(lambda x : "output" in x and x["output"] is not None)]
selected_events["data"].apply(lambda x : x["output"]["documents"])

In [None]:
selected_events = df_static_events[
    (df_static_events["event"] == "on_chain_end") &
    (df_static_events["name"].isin(["answer_search"]))
    # (df_static_events["data"].apply(lambda x: x["output"] is not None))
]
selected_events["metadata"]

In [None]:
selected_events["data"].iloc[0]["input"]["related_contents"]

In [None]:
selected_events["data"].apply(lambda x : x["output"]).iloc[2]

In [None]:
selected_events.iloc[0]["data"].values()

In [None]:
selected_events.iloc[1]["data"].values()

In [None]:
list(selected_events.iloc[0]["data"].values())

In [None]:
list(selected_events.iloc[1]["data"].values())

In [None]:
list(selected_events.iloc[2]["data"].values())

In [None]:
list(selected_events.iloc[3]["data"].values())

In [None]:
# import json

# print(json.dumps(list(selected_events.iloc[1]["data"].values()), indent=4))



In [None]:

data_values = selected_events.iloc[1]["data"].values()
formatted_data = json.dumps(list(data_values)[0], indent=4)
print(formatted_data)

In [None]:
from pprint import pprint
import json
selected_events.iloc[2]["data"].values()

In [None]:
selected_events.iloc[3]["data"].values()

In [None]:
df_static_events[df_static_events["name"] == "retrieve_POC_docs_node"].iloc[0]