Spaces:
Sleeping
Sleeping
Add app and requirements
Browse files- app.py +74 -0
- requirements.txt +4 -0
app.py
ADDED
|
@@ -0,0 +1,74 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Following https://python.langchain.com/docs/tutorials/chatbot/
|
| 2 |
+
# Missing: trimming, streaming with memory, use multiple threads
|
| 3 |
+
|
| 4 |
+
from langchain_mistralai import ChatMistralAI
|
| 5 |
+
from langchain_core.rate_limiters import InMemoryRateLimiter
|
| 6 |
+
from langgraph.checkpoint.memory import MemorySaver
|
| 7 |
+
from langgraph.graph import START, MessagesState, StateGraph
|
| 8 |
+
from langchain_core.prompts import ChatPromptTemplate, MessagesPlaceholder
|
| 9 |
+
from langchain_core.messages import HumanMessage, AIMessage
|
| 10 |
+
import gradio as gr
|
| 11 |
+
|
| 12 |
+
# Prompt template
|
| 13 |
+
prompt = ChatPromptTemplate.from_messages(
|
| 14 |
+
[
|
| 15 |
+
(
|
| 16 |
+
"system",
|
| 17 |
+
"You are an AI assistant. Answer all questions to the best of your ability.",
|
| 18 |
+
),
|
| 19 |
+
MessagesPlaceholder(variable_name="messages"),
|
| 20 |
+
]
|
| 21 |
+
)
|
| 22 |
+
|
| 23 |
+
# Rate limiter
|
| 24 |
+
rate_limiter = InMemoryRateLimiter(
|
| 25 |
+
requests_per_second=0.1, # <-- MistralAI free. We can only make a request once every second
|
| 26 |
+
check_every_n_seconds=0.01, # Wake up every 100 ms to check whether allowed to make a request,
|
| 27 |
+
max_bucket_size=10, # Controls the maximum burst size.
|
| 28 |
+
)
|
| 29 |
+
|
| 30 |
+
model = ChatMistralAI(model="mistral-large-latest", rate_limiter=rate_limiter)
|
| 31 |
+
|
| 32 |
+
# Define a new graph
|
| 33 |
+
workflow = StateGraph(state_schema=MessagesState)
|
| 34 |
+
|
| 35 |
+
# Define the function that calls the model
|
| 36 |
+
def call_model(state: MessagesState):
|
| 37 |
+
chain = prompt | model
|
| 38 |
+
response = chain.invoke(state)
|
| 39 |
+
return {"messages": response}
|
| 40 |
+
|
| 41 |
+
# Define the (single) node in the graph
|
| 42 |
+
workflow.add_edge(START, "model")
|
| 43 |
+
workflow.add_node("model", call_model)
|
| 44 |
+
|
| 45 |
+
# Add memory
|
| 46 |
+
memory = MemorySaver()
|
| 47 |
+
app = workflow.compile(checkpointer=memory)
|
| 48 |
+
|
| 49 |
+
# Config with thread
|
| 50 |
+
config = {"configurable": {"thread_id": "abc345"}}
|
| 51 |
+
|
| 52 |
+
|
| 53 |
+
def handle_prompt(query, history):
|
| 54 |
+
input_messages = [HumanMessage(query)]
|
| 55 |
+
try:
|
| 56 |
+
# Stream output
|
| 57 |
+
# out=""
|
| 58 |
+
# for chunk, metadata in app.stream({
|
| 59 |
+
# "messages": input_messages},
|
| 60 |
+
# config,
|
| 61 |
+
# stream_mode="messages"):
|
| 62 |
+
# if isinstance(chunk, AIMessage): # Filter to just model responses
|
| 63 |
+
# out += chunk.content
|
| 64 |
+
# yield out
|
| 65 |
+
output = app.invoke({"messages": input_messages}, config)
|
| 66 |
+
return output["messages"][-1].content
|
| 67 |
+
except:
|
| 68 |
+
raise gr.Error("Requests rate limit exceeded")
|
| 69 |
+
|
| 70 |
+
description = "A MistralAI powered chatbot, using Langchain and deployed with Gradio."
|
| 71 |
+
|
| 72 |
+
demo = gr.ChatInterface(handle_prompt, type="messages", title="Medieval ChatBot", theme=gr.themes.Citrus(), description=description)
|
| 73 |
+
|
| 74 |
+
demo.launch()
|
requirements.txt
ADDED
|
@@ -0,0 +1,4 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
langchain-core
|
| 2 |
+
langgraph>0.2.27
|
| 3 |
+
langchain_mistralai
|
| 4 |
+
gradio
|