import uvicorn from fastapi import FastAPI from fastapi.responses import StreamingResponse from fastapi.middleware.cors import CORSMiddleware from pydantic import BaseModel, Field import requests import uuid import time import json from typing import List, Optional, Dict, Any, AsyncGenerator # --- OpenAI-Compatible Data Models --- # These models define the structure that Cursor expects to send and receive. class Model(BaseModel): id: str object: str = "model" owned_by: str = "user" permission: List[Dict[str, Any]] = [] class ModelList(BaseModel): object: str = "list" data: List[Model] class ChatMessage(BaseModel): role: str content: str class ChatCompletionRequest(BaseModel): model: str messages: List[ChatMessage] temperature: Optional[float] = 0.7 stream: Optional[bool] = False tools: Optional[List[Dict[str, Any]]] = None tool_choice: Optional[Any] = None # --- Models for Non-Streaming Response --- class ChatCompletionChoice(BaseModel): index: int message: ChatMessage finish_reason: str = "stop" class ChatCompletionResponse(BaseModel): id: str = Field(default_factory=lambda: f"chatcmpl-{uuid.uuid4()}") object: str = "chat.completion" # A bit of a hack for timestamp created: int = Field(default_factory=lambda: int(time.time())) model: str choices: List[ChatCompletionChoice] # --- Models for Streaming Response --- class DeltaMessage(BaseModel): role: Optional[str] = None content: Optional[str] = None class ChatCompletionStreamChoice(BaseModel): index: int delta: DeltaMessage finish_reason: Optional[str] = None class ChatCompletionStreamResponse(BaseModel): id: str = Field(default_factory=lambda: f"chatcmpl-{uuid.uuid4()}") object: str = "chat.completion.chunk" created: int = Field(default_factory=lambda: int(time.time())) model: str choices: List[ChatCompletionStreamChoice] # --- FastAPI Application --- app = FastAPI() # --- Add CORS Middleware --- # This allows the Cursor IDE (running on a different origin) # to make requests to our adapter server. app.add_middleware( CORSMiddleware, allow_origins=["*"], # Allows all origins allow_credentials=True, allow_methods=["*"], # Allows all methods (GET, POST, OPTIONS, etc.) allow_headers=["*"], # Allows all headers ) GOLEM_SERVER_URL = "http://localhost:5000/generate" MODEL_NAME = "aether_golem" @app.get("/v1/models") async def list_models(): """ This endpoint provides a list of available models to Cursor. It's required for Cursor to recognize our custom Golem model. """ return ModelList( data=[ Model(id=MODEL_NAME) ] ) async def stream_golem_response(golem_response_content: str, model: str) -> AsyncGenerator[str, None]: """ Simulates a streaming response by breaking the Golem's full text response into word-by-word chunks, formatted as Server-Sent Events (SSEs). """ # First, send a chunk with the role role_chunk = ChatCompletionStreamResponse( model=model, choices=[ChatCompletionStreamChoice(index=0, delta=DeltaMessage(role="assistant"), finish_reason=None)] ) yield f"data: {role_chunk.json()}\n\n" # Then, stream the content word by word words = golem_response_content.split(" ") for word in words: if not word: continue # Add a space before each word to reconstruct the sentence content_chunk = ChatCompletionStreamResponse( model=model, choices=[ChatCompletionStreamChoice(index=0, delta=DeltaMessage(content=f" {word}"), finish_reason=None)] ) yield f"data: {content_chunk.json()}\n\n" time.sleep(0.05) # Small delay to simulate typing # Finally, send the stop signal stop_chunk = ChatCompletionStreamResponse( model=model, choices=[ChatCompletionStreamChoice(index=0, delta=DeltaMessage(), finish_reason="stop")] ) yield f"data: {stop_chunk.json()}\n\n" yield "data: [DONE]\n\n" @app.post("/v1/chat/completions") async def chat_completions(request: ChatCompletionRequest): """ This endpoint mimics OpenAI's chat completions endpoint. It translates requests from Cursor to the Golem server and back. """ print(f"Received request: {request.dict()}") # 1. Extract the user's prompt from the incoming request. # We'll just take the last message as the prompt. user_prompt = "" if request.messages: user_prompt = request.messages[-1].content if not user_prompt: return {"error": "No prompt found in the request."} # 2. Construct the request for our Golem server. golem_payload = { "prompt": user_prompt, "sessionId": f"cursor-session-{uuid.uuid4()}", "temperature": request.temperature, "golemActivated": True, # We assume activation for this endpoint "activationPhrases": [], "sefirotSettings": {} } print(f"Sending to Golem: {golem_payload}") try: # 3. Send the request to the Golem server. response = requests.post(GOLEM_SERVER_URL, json=golem_payload) response.raise_for_status() # Raise an exception for bad status codes golem_data = response.json() print(f"Received from Golem: {golem_data}") # 4. Extract the response and format it for Cursor (OpenAI standard). direct_response = golem_data.get("direct_response", "No direct response found.") aether_analysis = golem_data.get("aether_analysis", "") # Combine the direct response with the aether analysis for a richer reply. full_content = f"{direct_response}\n\n--- Aether Analysis ---\n{aether_analysis}" # 5. Create the OpenAI-compatible response object. # If the client requested a stream, return a StreamingResponse. if request.stream: return StreamingResponse( stream_golem_response(full_content, request.model), media_type="text/event-stream" ) # Otherwise, return a regular JSON response. chat_message = ChatMessage(role="assistant", content=full_content) choice = ChatCompletionChoice(index=0, message=chat_message) chat_response = ChatCompletionResponse(model=request.model, choices=[choice]) return chat_response except requests.exceptions.RequestException as e: print(f"Error contacting Golem server: {e}") return {"error": f"Failed to connect to Golem server at {GOLEM_SERVER_URL}"} except Exception as e: print(f"An unexpected error occurred: {e}") return {"error": "An internal error occurred in the adapter."} if __name__ == "__main__": # To run this adapter: uvicorn golem_cursor_adapter:app --reload --port 8001 print("Starting Golem Cursor Adapter Server...") print("Run with: uvicorn home.chezy.golem_cursor_adapter:app --reload --port 8001") uvicorn.run(app, host="0.0.0.0", port=8001)