Spaces:
Runtime error
Runtime error
| import uvicorn | |
| from fastapi import FastAPI | |
| from fastapi.responses import StreamingResponse | |
| from fastapi.middleware.cors import CORSMiddleware | |
| from pydantic import BaseModel, Field | |
| import requests | |
| import uuid | |
| import time | |
| import json | |
| from typing import List, Optional, Dict, Any, AsyncGenerator | |
| # --- OpenAI-Compatible Data Models --- | |
| # These models define the structure that Cursor expects to send and receive. | |
| class Model(BaseModel): | |
| id: str | |
| object: str = "model" | |
| owned_by: str = "user" | |
| permission: List[Dict[str, Any]] = [] | |
| class ModelList(BaseModel): | |
| object: str = "list" | |
| data: List[Model] | |
| class ChatMessage(BaseModel): | |
| role: str | |
| content: str | |
| class ChatCompletionRequest(BaseModel): | |
| model: str | |
| messages: List[ChatMessage] | |
| temperature: Optional[float] = 0.7 | |
| stream: Optional[bool] = False | |
| tools: Optional[List[Dict[str, Any]]] = None | |
| tool_choice: Optional[Any] = None | |
| # --- Models for Non-Streaming Response --- | |
| class ChatCompletionChoice(BaseModel): | |
| index: int | |
| message: ChatMessage | |
| finish_reason: str = "stop" | |
| class ChatCompletionResponse(BaseModel): | |
| id: str = Field(default_factory=lambda: f"chatcmpl-{uuid.uuid4()}") | |
| object: str = "chat.completion" | |
| # A bit of a hack for timestamp | |
| created: int = Field(default_factory=lambda: int(time.time())) | |
| model: str | |
| choices: List[ChatCompletionChoice] | |
| # --- Models for Streaming Response --- | |
| class DeltaMessage(BaseModel): | |
| role: Optional[str] = None | |
| content: Optional[str] = None | |
| class ChatCompletionStreamChoice(BaseModel): | |
| index: int | |
| delta: DeltaMessage | |
| finish_reason: Optional[str] = None | |
| class ChatCompletionStreamResponse(BaseModel): | |
| id: str = Field(default_factory=lambda: f"chatcmpl-{uuid.uuid4()}") | |
| object: str = "chat.completion.chunk" | |
| created: int = Field(default_factory=lambda: int(time.time())) | |
| model: str | |
| choices: List[ChatCompletionStreamChoice] | |
| # --- FastAPI Application --- | |
| app = FastAPI() | |
| # --- Add CORS Middleware --- | |
| # This allows the Cursor IDE (running on a different origin) | |
| # to make requests to our adapter server. | |
| app.add_middleware( | |
| CORSMiddleware, | |
| allow_origins=["*"], # Allows all origins | |
| allow_credentials=True, | |
| allow_methods=["*"], # Allows all methods (GET, POST, OPTIONS, etc.) | |
| allow_headers=["*"], # Allows all headers | |
| ) | |
| GOLEM_SERVER_URL = "http://localhost:5000/generate" | |
| MODEL_NAME = "aether_golem" | |
| async def list_models(): | |
| """ | |
| This endpoint provides a list of available models to Cursor. | |
| It's required for Cursor to recognize our custom Golem model. | |
| """ | |
| return ModelList( | |
| data=[ | |
| Model(id=MODEL_NAME) | |
| ] | |
| ) | |
| async def stream_golem_response(golem_response_content: str, model: str) -> AsyncGenerator[str, None]: | |
| """ | |
| Simulates a streaming response by breaking the Golem's full text response | |
| into word-by-word chunks, formatted as Server-Sent Events (SSEs). | |
| """ | |
| # First, send a chunk with the role | |
| role_chunk = ChatCompletionStreamResponse( | |
| model=model, | |
| choices=[ChatCompletionStreamChoice(index=0, delta=DeltaMessage(role="assistant"), finish_reason=None)] | |
| ) | |
| yield f"data: {role_chunk.json()}\n\n" | |
| # Then, stream the content word by word | |
| words = golem_response_content.split(" ") | |
| for word in words: | |
| if not word: | |
| continue | |
| # Add a space before each word to reconstruct the sentence | |
| content_chunk = ChatCompletionStreamResponse( | |
| model=model, | |
| choices=[ChatCompletionStreamChoice(index=0, delta=DeltaMessage(content=f" {word}"), finish_reason=None)] | |
| ) | |
| yield f"data: {content_chunk.json()}\n\n" | |
| time.sleep(0.05) # Small delay to simulate typing | |
| # Finally, send the stop signal | |
| stop_chunk = ChatCompletionStreamResponse( | |
| model=model, | |
| choices=[ChatCompletionStreamChoice(index=0, delta=DeltaMessage(), finish_reason="stop")] | |
| ) | |
| yield f"data: {stop_chunk.json()}\n\n" | |
| yield "data: [DONE]\n\n" | |
| async def chat_completions(request: ChatCompletionRequest): | |
| """ | |
| This endpoint mimics OpenAI's chat completions endpoint. | |
| It translates requests from Cursor to the Golem server and back. | |
| """ | |
| print(f"Received request: {request.dict()}") | |
| # 1. Extract the user's prompt from the incoming request. | |
| # We'll just take the last message as the prompt. | |
| user_prompt = "" | |
| if request.messages: | |
| user_prompt = request.messages[-1].content | |
| if not user_prompt: | |
| return {"error": "No prompt found in the request."} | |
| # 2. Construct the request for our Golem server. | |
| golem_payload = { | |
| "prompt": user_prompt, | |
| "sessionId": f"cursor-session-{uuid.uuid4()}", | |
| "temperature": request.temperature, | |
| "golemActivated": True, # We assume activation for this endpoint | |
| "activationPhrases": [], | |
| "sefirotSettings": {} | |
| } | |
| print(f"Sending to Golem: {golem_payload}") | |
| try: | |
| # 3. Send the request to the Golem server. | |
| response = requests.post(GOLEM_SERVER_URL, json=golem_payload) | |
| response.raise_for_status() # Raise an exception for bad status codes | |
| golem_data = response.json() | |
| print(f"Received from Golem: {golem_data}") | |
| # 4. Extract the response and format it for Cursor (OpenAI standard). | |
| direct_response = golem_data.get("direct_response", "No direct response found.") | |
| aether_analysis = golem_data.get("aether_analysis", "") | |
| # Combine the direct response with the aether analysis for a richer reply. | |
| full_content = f"{direct_response}\n\n--- Aether Analysis ---\n{aether_analysis}" | |
| # 5. Create the OpenAI-compatible response object. | |
| # If the client requested a stream, return a StreamingResponse. | |
| if request.stream: | |
| return StreamingResponse( | |
| stream_golem_response(full_content, request.model), | |
| media_type="text/event-stream" | |
| ) | |
| # Otherwise, return a regular JSON response. | |
| chat_message = ChatMessage(role="assistant", content=full_content) | |
| choice = ChatCompletionChoice(index=0, message=chat_message) | |
| chat_response = ChatCompletionResponse(model=request.model, choices=[choice]) | |
| return chat_response | |
| except requests.exceptions.RequestException as e: | |
| print(f"Error contacting Golem server: {e}") | |
| return {"error": f"Failed to connect to Golem server at {GOLEM_SERVER_URL}"} | |
| except Exception as e: | |
| print(f"An unexpected error occurred: {e}") | |
| return {"error": "An internal error occurred in the adapter."} | |
| if __name__ == "__main__": | |
| # To run this adapter: uvicorn golem_cursor_adapter:app --reload --port 8001 | |
| print("Starting Golem Cursor Adapter Server...") | |
| print("Run with: uvicorn home.chezy.golem_cursor_adapter:app --reload --port 8001") | |
| uvicorn.run(app, host="0.0.0.0", port=8001) |