dmr76
/

mmx_classifier_microblog_ENv02

@@ -51,32 +51,26 @@ import pandas as pd, numpy as np, warnings, torch, re
 from transformers import AutoModelForSequenceClassification, AutoTokenizer
 from bs4 import BeautifulSoup
 warnings.filterwarnings("ignore", category=UserWarning, module='bs4')
 # Helper Functions
 def clean_and_parse_tweet(tweet):
     tweet = re.sub(r"https?://\S+|www\.\S+", " URL ", tweet)
     parsed = BeautifulSoup(tweet, "html.parser").get_text() if "filename" not in str(BeautifulSoup(tweet, "html.parser")) else None
     return re.sub(r" +", " ", re.sub(r'^[.:]+', '', re.sub(r"\\n+|\n+", " ", parsed or tweet)).strip()) if parsed else None
 def predict_tweet(tweet, model, tokenizer, device, threshold=0.5):
     inputs = tokenizer(tweet, return_tensors="pt", padding=True, truncation=True, max_length=128).to(device)
     probs = torch.sigmoid(model(**inputs).logits).detach().cpu().numpy()[0]
     return probs, [id2label[i] for i, p in enumerate(probs) if id2label[i] in {'Product', 'Place', 'Price', 'Promotion'} and p >= threshold]
 # Setup
 device = "mps" if torch.backends.mps.is_built() and torch.backends.mps.is_available() else "cuda" if torch.cuda.is_available() else "cpu"
 synxp = "dmr76/mmx_classifier_microblog_ENv02"
 model = AutoModelForSequenceClassification.from_pretrained(synxp).to(device)
 tokenizer = AutoTokenizer.from_pretrained(synxp)
 id2label = model.config.id2label
 # ---->>> Define your Tweet  <<<----
 tweet = "Best cushioning ever!!! 🤗🤗🤗  my zoom vomeros are the bomb🏃🏽‍♀️💨!!!  \n @nike #run #training https://randomurl.ai"
 # Clean and Predict
 cleaned_tweet = clean_and_parse_tweet(tweet)
 probs, labels = predict_tweet(cleaned_tweet, model, tokenizer, device)
 # Print Labels and Probabilities
 print("Please don't forget to cite the paper: https://ssrn.com/abstract=4542949 in you use this code")
 print(labels, probs)

 from transformers import AutoModelForSequenceClassification, AutoTokenizer
 from bs4 import BeautifulSoup
 warnings.filterwarnings("ignore", category=UserWarning, module='bs4')
 # Helper Functions
 def clean_and_parse_tweet(tweet):
     tweet = re.sub(r"https?://\S+|www\.\S+", " URL ", tweet)
     parsed = BeautifulSoup(tweet, "html.parser").get_text() if "filename" not in str(BeautifulSoup(tweet, "html.parser")) else None
     return re.sub(r" +", " ", re.sub(r'^[.:]+', '', re.sub(r"\\n+|\n+", " ", parsed or tweet)).strip()) if parsed else None
 def predict_tweet(tweet, model, tokenizer, device, threshold=0.5):
     inputs = tokenizer(tweet, return_tensors="pt", padding=True, truncation=True, max_length=128).to(device)
     probs = torch.sigmoid(model(**inputs).logits).detach().cpu().numpy()[0]
     return probs, [id2label[i] for i, p in enumerate(probs) if id2label[i] in {'Product', 'Place', 'Price', 'Promotion'} and p >= threshold]
 # Setup
 device = "mps" if torch.backends.mps.is_built() and torch.backends.mps.is_available() else "cuda" if torch.cuda.is_available() else "cpu"
 synxp = "dmr76/mmx_classifier_microblog_ENv02"
 model = AutoModelForSequenceClassification.from_pretrained(synxp).to(device)
 tokenizer = AutoTokenizer.from_pretrained(synxp)
 id2label = model.config.id2label
 # ---->>> Define your Tweet  <<<----
 tweet = "Best cushioning ever!!! 🤗🤗🤗  my zoom vomeros are the bomb🏃🏽‍♀️💨!!!  \n @nike #run #training https://randomurl.ai"
 # Clean and Predict
 cleaned_tweet = clean_and_parse_tweet(tweet)
 probs, labels = predict_tweet(cleaned_tweet, model, tokenizer, device)
 # Print Labels and Probabilities
 print("Please don't forget to cite the paper: https://ssrn.com/abstract=4542949 in you use this code")
 print(labels, probs)