| import evaluate | |
| import torch | |
| from enum import Enum | |
| from tqdm import tqdm | |
| class AssertionType(Enum): | |
| PRESENT = 0 | |
| ABSENT = 1 | |
| POSSIBLE = 2 | |
| class EntityWithAssertion: | |
| def __init__(self, entity: str, assertion_type: AssertionType): | |
| self.entity = entity | |
| self.assertion_type = assertion_type | |
| def __repr__(self) -> str: | |
| return f"{self.assertion_type.name}: {self.entity}" | |
| def classify_assertions_in_sentences(sentences, model, tokenizer, batch_size=32): | |
| predictions = [] | |
| for i in tqdm(range(0, len(sentences), batch_size)): | |
| batch = tokenizer(sentences[i:i + batch_size], return_tensors="pt", padding=True, truncation=True).to("cuda") | |
| with torch.no_grad(): | |
| outputs = model(**batch) | |
| predicted_labels = torch.argmax(outputs.logits, dim=1) | |
| predictions.append(predicted_labels) | |
| return torch.cat(predictions) | |
| def input_classification(model, tokenizer, x: str = None, all_classes = False): | |
| if x is None: | |
| x = input("Write your sentence and press Enter to continue") | |
| tokenized_x = tokenizer(x, return_tensors="pt", padding=True, truncation=True) | |
| with torch.no_grad(): | |
| outputs = model(**tokenized_x) | |
| predicted_label = torch.argmax(outputs.logits, dim=1) | |
| if all_classes: | |
| return {model.config.id2label[i]:float(k) for i,k in enumerate(torch.softmax(outputs.logits, dim=1)[0])} | |
| return model.config.id2label[int(predicted_label)] | |
| def compute_results(y, y_hat): | |
| metric_f1 = evaluate.load("f1") | |
| metric_acc = evaluate.load("accuracy") | |
| return { | |
| "macro-f1": metric_f1.compute(predictions=y_hat, references=y, average="macro")["f1"], | |
| "micro-f1": metric_f1.compute(predictions=y_hat, references=y, average="micro")["f1"], | |
| "accuracy": metric_acc.compute(predictions=y_hat, references=y)["accuracy"] | |
| } | |