Spaces:
Build error
Build error
| import requests | |
| import pandas as pd | |
| import numpy as np | |
| import torch | |
| from datasets import load_dataset | |
| from sentence_transformers.util import semantic_search | |
| # Installable | |
| # pip install datasets | |
| # !pip install retry | |
| # !pip install -U sentence-transformers | |
| texts = ["How do I get a replacement Medicare card?", | |
| "What is the monthly premium for Medicare Part B?", | |
| "How do I terminate my Medicare Part B (medical insurance)?", | |
| "How do I sign up for Medicare?", | |
| "Can I sign up for Medicare Part B if I am working and have health insurance through an employer?", | |
| "How do I sign up for Medicare Part B if I already have Part A?", | |
| "What are Medicare late enrollment penalties?", | |
| "What is Medicare and who can get it?", | |
| "How can I get help with my Medicare Part A and Part B premiums?", | |
| "What are the different parts of Medicare?", | |
| "Will my Medicare premiums be higher because of my higher income?", | |
| "What is TRICARE ?", | |
| "Should I sign up for Medicare Part B if I have Veterans' Benefits?"] | |
| model_id = "sentence-transformers/all-MiniLM-L6-v2" | |
| hf_token = "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" | |
| api_url = f"/static-proxy?url=https%3A%2F%2Fapi-inference.huggingface.co%2Fpipeline%2Ffeature-extraction%2F%3Cspan class="hljs-subst">{model_id}" | |
| headers = {"Authorization": f"Bearer {hf_token}"} | |
| # def query(texts): | |
| # response = requests.post(api_url, headers=headers, json={"inputs": texts, "options":{"wait_for_model":True}}) | |
| # return response.json() | |
| #@retry(tries=3, delay=10) | |
| def query(texts): | |
| response = requests.post(api_url, headers=headers, json={"inputs": texts}) | |
| result = response.json() | |
| if isinstance(result, list): | |
| return result | |
| elif list(result.keys())[0] == "error": | |
| raise RuntimeError( | |
| "The model is currently loading, please re-run the query." | |
| ) | |
| output = (dict(inputs = texts)) | |
| print("output done") | |
| embeddings = pd.DataFrame(output) | |
| embeddings.to_csv("embeddings.csv", index=False) | |
| print("embeddings done") | |
| # If were to upload embeddings in huggingface dataset | |
| faqs_embeddings = load_dataset('ITESM/embedded_faqs_medicare') | |
| dataset_embeddings = torch.from_numpy(faqs_embeddings["train"].to_pandas().to_numpy()).to(torch.float) | |
| print("dataset_embeddings done") | |
| # embeddings_new = pd.read_csv(embeddings.csv) | |
| # dataset_embeddings = torch.from_numpy(embeddings_new.to_pandas().to_numpy()).to(torch.float) | |
| question = ["How can Medicare help me?"] | |
| output = query(question) | |
| print("output done") | |
| query_embeddings = torch.FloatTensor(output) | |
| print(f"The size of our embedded dataset is {dataset_embeddings.shape} and of our embedded query is {query_embeddings.shape}.") | |
| # Search top 5 matching query | |
| hits = semantic_search(query_embeddings, dataset_embeddings, top_k=5) | |
| print([texts[hits[0][i]['corpus_id']] for i in range(len(hits[0]))]) |