mahesh1209's picture
Update app.py
cc78537 verified
raw
history blame
2.65 kB
import gradio as gr
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
from transformers import pipeline
import tempfile
import os
# Load and clean Titanic dataset
def load_and_clean_data():
url = "https://raw.githubusercontent.com/datasciencedojo/datasets/master/titanic.csv"
df = pd.read_csv(url)
df['Age'].fillna(df['Age'].median(), inplace=True)
df['Embarked'].fillna(df['Embarked'].mode()[0], inplace=True)
df.drop(columns=['Cabin', 'Ticket', 'Name'], inplace=True)
return df
# Generate EDA plots
def generate_plot_images(df):
temp_files = []
for plot_func in [plot_survival_count, plot_survival_by_gender]:
fig, ax = plt.subplots()
plot_func(df, ax)
tmp = tempfile.NamedTemporaryFile(suffix=".png", delete=False)
fig.savefig(tmp.name)
temp_files.append(tmp.name)
plt.close(fig)
return temp_files
def plot_survival_count(df, ax):
sns.countplot(x='Survived', data=df, ax=ax)
def plot_survival_by_gender(df, ax):
sns.countplot(x='Sex', hue='Survived', data=df, ax=ax)
# Summarizer pipeline
summarizer = pipeline("summarization", model="sshleifer/distilbart-cnn-12-6")
def summarize_eda(df):
try:
text = f"""
Titanic dataset has {len(df)} rows. Missing values in Age and Embarked were filled.
Columns Cabin, Ticket, and Name were dropped. Females had higher survival rates.
Most passengers embarked from {df['Embarked'].mode()[0]}.
"""
summary = summarizer(text, max_length=100, min_length=30, do_sample=False)[0]['summary_text']
return summary
except Exception as e:
return f"Summarization failed: {str(e)}"
# Markdown fallback logic
def safe_markdown(df):
try:
return df.head().to_markdown()
except Exception:
return df.head().to_string()
# Main app logic
def run_titanic_eda():
try:
df = load_and_clean_data()
plots = generate_plot_images(df)
summary = summarize_eda(df)
return safe_markdown(df), plots[0], plots[1], summary
except Exception as e:
return "Data load failed", None, None, f"Error: {str(e)}"
# Gradio interface
demo = gr.Interface(
fn=run_titanic_eda,
inputs=[],
outputs=[
gr.Markdown(label="Cleaned Data Sample"),
gr.Image(label="Survival Count"),
gr.Image(label="Survival by Gender"),
gr.Textbox(label="LLM Summary of EDA")
],
title="🚒 Titanic EDA + LLM Summary",
description="Cleaned dataset, EDA plots, and LLM-generated summary β€” all in one page."
)
if __name__ == "__main__":
demo.launch()