Spaces:
Sleeping
Sleeping
| import pandas as pd | |
| import numpy as np | |
| from sklearn.cluster import KMeans | |
| from sklearn.preprocessing import MultiLabelBinarizer | |
| import gradio as gr | |
| from transformers import pipeline | |
| # β‘ Load lightweight summarizer | |
| summarizer = pipeline("summarization", model="sshleifer/distilbart-cnn-12-6") | |
| # π§Ί Synthetic transaction data | |
| transactions = [ | |
| ["Milk", "Bread"], | |
| ["Milk", "Diapers", "Beer"], | |
| ["Bread", "Diapers", "Eggs"], | |
| ["Milk", "Bread", "Diapers", "Beer"], | |
| ["Bread", "Eggs"], | |
| ["Milk", "Eggs"], | |
| ["Beer", "Diapers"], | |
| ["Milk", "Bread", "Eggs"], | |
| ["Bread", "Diapers"], | |
| ["Milk", "Beer"] | |
| ] | |
| df = pd.DataFrame({"TransactionID": range(1, len(transactions)+1), "Items": transactions}) | |
| # π One-hot encode items | |
| mlb = MultiLabelBinarizer() | |
| encoded = mlb.fit_transform(df["Items"]) | |
| encoded_df = pd.DataFrame(encoded, columns=mlb.classes_) | |
| # π§ Clustering + LLM summary | |
| def cluster_and_summarize(n_clusters): | |
| kmeans = KMeans(n_clusters=n_clusters, random_state=42, n_init="auto") | |
| df["Cluster"] = kmeans.fit_predict(encoded_df) | |
| summaries = [] | |
| for i in range(n_clusters): | |
| cluster_items = df[df["Cluster"] == i]["Items"].explode() | |
| top_items = cluster_items.value_counts().head(5).to_dict() | |
| raw_summary = f"Cluster {i} contains transactions with top items: " + ", ".join([f"{k} ({v})" for k, v in top_items.items()]) | |
| llm_output = summarizer(raw_summary, max_length=50, min_length=10, do_sample=False)[0]["summary_text"] | |
| summaries.append(f"π§ Cluster {i}: {llm_output}") | |
| return "\n\n".join(summaries) | |
| # π Gradio UI | |
| gr.Interface( | |
| fn=cluster_and_summarize, | |
| inputs=gr.Slider(2, 5, value=3, label="Number of Clusters"), | |
| outputs="text", | |
| title="π Market Basket Clustering + LLM Summary", | |
| description="Fast, error-free clustering of synthetic transactions with LLM-powered summaries.", | |
| cache_examples=False | |
| ).launch() | |