import gradio as gr
import numpy as np
import pandas as pd
from sklearn.datasets import fetch_california_housing
from sklearn.linear_model import LinearRegression
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error

# Load dataset directly from sklearn (safe for Hugging Face Spaces)
def load_data():
    data = fetch_california_housing(as_frame=True, data_home="/tmp")
    return data.frame

# Train model once at startup
def train_model():
    df = load_data()
    X = df.drop("MedHouseVal", axis=1)
    y = df["MedHouseVal"]
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

    model = LinearRegression()
    model.fit(X_train, y_train)
    mse = mean_squared_error(y_test, model.predict(X_test))
    return model, mse

model, mse = train_model()

# Prediction function
def predict(MedInc, HouseAge, AveRooms, AveBedrms, Population, AveOccup, Latitude, Longitude):
    input_array = np.array([[MedInc, HouseAge, AveRooms, AveBedrms, Population, AveOccup, Latitude, Longitude]])
    prediction = model.predict(input_array)[0]
    return f"Estimated Median House Value: ${prediction * 100000:.2f}"

# Gradio UI
inputs = [
    gr.Number(label="Median Income"),
    gr.Number(label="House Age"),
    gr.Number(label="Average Rooms"),
    gr.Number(label="Average Bedrooms"),
    gr.Number(label="Population"),
    gr.Number(label="Average Occupancy"),
    gr.Number(label="Latitude"),
    gr.Number(label="Longitude"),
]

iface = gr.Interface(
    fn=predict,
    inputs=inputs,
    outputs="text",
    title="🏠 California House Price Predictor",
    description=f"Trained on California housing data. Model MSE: {mse:.4f}",
)

iface.launch()