mahesh1209 commited on
Commit
840f5fb
·
verified ·
1 Parent(s): e39ce96

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +71 -0
app.py ADDED
@@ -0,0 +1,71 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ import pandas as pd
3
+ import seaborn as sns
4
+ import matplotlib.pyplot as plt
5
+ from transformers import pipeline
6
+ import tempfile
7
+ import os
8
+
9
+ def load_and_clean_data():
10
+ url = "https://raw.githubusercontent.com/datasciencedojo/datasets/master/titanic.csv"
11
+ df = pd.read_csv(url)
12
+ df['Age'].fillna(df['Age'].median(), inplace=True)
13
+ df['Embarked'].fillna(df['Embarked'].mode()[0], inplace=True)
14
+ df.drop(columns=['Cabin', 'Ticket', 'Name'], inplace=True)
15
+ return df
16
+
17
+ def generate_plot_images(df):
18
+ temp_files = []
19
+ for plot_func in [plot_survival_count, plot_survival_by_gender]:
20
+ fig, ax = plt.subplots()
21
+ plot_func(df, ax)
22
+ tmp = tempfile.NamedTemporaryFile(suffix=".png", delete=False)
23
+ fig.savefig(tmp.name)
24
+ temp_files.append(tmp.name)
25
+ plt.close(fig)
26
+ return temp_files
27
+
28
+ def plot_survival_count(df, ax):
29
+ sns.countplot(x='Survived', data=df, ax=ax)
30
+
31
+ def plot_survival_by_gender(df, ax):
32
+ sns.countplot(x='Sex', hue='Survived', data=df, ax=ax)
33
+
34
+ summarizer = pipeline("summarization", model="sshleifer/distilbart-cnn-12-6")
35
+
36
+ def summarize_eda(df):
37
+ try:
38
+ text = f"""
39
+ Titanic dataset has {len(df)} rows. Missing values in Age and Embarked were filled.
40
+ Columns Cabin, Ticket, and Name were dropped. Females had higher survival rates.
41
+ Most passengers embarked from {df['Embarked'].mode()[0]}.
42
+ """
43
+ summary = summarizer(text, max_length=100, min_length=30, do_sample=False)[0]['summary_text']
44
+ return summary
45
+ except Exception as e:
46
+ return f"Summarization failed: {str(e)}"
47
+
48
+ def run_titanic_eda():
49
+ try:
50
+ df = load_and_clean_data()
51
+ plots = generate_plot_images(df)
52
+ summary = summarize_eda(df)
53
+ return df.head().to_markdown(), plots[0], plots[1], summary
54
+ except Exception as e:
55
+ return "Data load failed", None, None, f"Error: {str(e)}"
56
+
57
+ demo = gr.Interface(
58
+ fn=run_titanic_eda,
59
+ inputs=[],
60
+ outputs=[
61
+ gr.Markdown(label="Cleaned Data Sample"),
62
+ gr.Image(label="Survival Count"),
63
+ gr.Image(label="Survival by Gender"),
64
+ gr.Textbox(label="LLM Summary of EDA")
65
+ ],
66
+ title="🚢 Titanic EDA + LLM Summary",
67
+ description="Cleaned dataset, EDA plots, and LLM-generated summary — all in one page."
68
+ )
69
+
70
+ if __name__ == "__main__":
71
+ demo.launch()