Spaces:

lvwerra
/

bary_score

Build error

App Files Files Community

lvwerra HF Staff commited on Jun 9, 2022

Commit

85f9580

1 Parent(s): 1e59200

add logic

Browse files

Files changed (3) hide show

bary_score.py +9 -18
requirements.txt +4 -1
score.py +255 -0

bary_score.py CHANGED Viewed

@@ -16,6 +16,8 @@
 import evaluate
 import datasets
 # TODO: Add BibTeX citation
 _CITATION = """\
@@ -53,10 +55,6 @@ Examples:
     {'accuracy': 1.0}
 """
-# TODO: Define external resources urls if needed
-BAD_WORDS_URL = "http://url/to/external/resource/bad_words.txt"
 @evaluate.utils.file_utils.add_start_docstrings(_DESCRIPTION, _KWARGS_DESCRIPTION)
 class BaryScore(evaluate.EvaluationModule):
     """TODO: Short description of my evaluation module."""
@@ -71,8 +69,8 @@ class BaryScore(evaluate.EvaluationModule):
             inputs_description=_KWARGS_DESCRIPTION,
             # This defines the format of each prediction and reference
             features=datasets.Features({
-                'predictions': datasets.Value('int64'),
-                'references': datasets.Value('int64'),
             }),
             # Homepage of the module for documentation
             homepage="http://module.homepage",
@@ -81,15 +79,8 @@ class BaryScore(evaluate.EvaluationModule):
             reference_urls=["http://path.to.reference.url/new_module"]
         )
-    def _download_and_prepare(self, dl_manager):
-        """Optional: download external resources useful to compute the scores"""
-        # TODO: Download external resources if needed
-        pass
-    def _compute(self, predictions, references):
-        """Returns the scores"""
-        # TODO: Compute the different scores of the module
-        accuracy = sum(i == j for i, j in zip(predictions, references)) / len(predictions)
-        return {
-            "accuracy": accuracy,
-        }

 import evaluate
 import datasets
+from score import BaryScoreMetric
 # TODO: Add BibTeX citation
 _CITATION = """\
     {'accuracy': 1.0}
 """
 @evaluate.utils.file_utils.add_start_docstrings(_DESCRIPTION, _KWARGS_DESCRIPTION)
 class BaryScore(evaluate.EvaluationModule):
     """TODO: Short description of my evaluation module."""
             inputs_description=_KWARGS_DESCRIPTION,
             # This defines the format of each prediction and reference
             features=datasets.Features({
+                'predictions': datasets.Value('string'),
+                'references': datasets.Value('string'),
             }),
             # Homepage of the module for documentation
             homepage="http://module.homepage",
             reference_urls=["http://path.to.reference.url/new_module"]
         )
+    def _compute(self, predictions, references, model_name="bert-base-uncased", last_layers=5, use_idfs=True, sinkhorn_ref=0.01):
+        metric_call = BaryScoreMetric(model_name=model_name, last_layers=last_layers, use_idfs=use_idfs, sinkhorn_ref=sinkhorn_ref)
+        metric_call.prepare_idfs(references, predictions)
+        result = metric_call.evaluate_batch(references, predictions)
+        return result

requirements.txt CHANGED Viewed

@@ -1,2 +1,5 @@
 evaluate==0.1.0
-datasets~=2.0

 evaluate==0.1.0
+datasets~=2.0
+POT
+transformers
+torch

score.py ADDED Viewed

	@@ -0,0 +1,255 @@

+from __future__ import absolute_import, division, print_function
+import numpy as np
+import torch
+from tqdm import tqdm
+import ot
+from math import log
+from collections import defaultdict, Counter
+from transformers import AutoModelForMaskedLM, AutoTokenizer
+class BaryScoreMetric:
+    def __init__(self, model_name="bert-base-uncased", last_layers=5, use_idfs=True, sinkhorn_ref=0.01):
+        """
+        BaryScore metric
+        :param model_name: model name or path from HuggingFace Librairy
+        :param last_layers: last layer to use in the pretrained model
+        :param use_idfs: if true use idf costs else use uniform weights
+        :param sinkhorn_ref:  weight of the KL in the SD
+        """
+        self.model_name = model_name
+        self.load_tokenizer_and_model()
+        n = self.model.config.num_hidden_layers + 1
+        assert n - last_layers > 0
+        self.layers_to_consider = range(n - last_layers, n)
+        self.use_idfs = use_idfs
+        self.sinkhorn_ref = sinkhorn_ref
+        self.idfs = []
+        self.device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
+    def prepare_idfs(self, hyps, refs):
+        """
+        :param hyps: hypothesis list of string sentences has to be computed at corpus level
+        :param refs:reference list of string sentences has to be computed at corpus level
+        """
+        t_hyps = self.tokenizer(hyps)['input_ids']
+        t_refs = self.tokenizer(refs)['input_ids']
+        idf_dict_ref = self.ref_list_to_idf(t_refs)
+        idf_dict_hyp = self.ref_list_to_idf(t_hyps)
+        idfs_tokenizer = (idf_dict_ref, idf_dict_hyp)
+        self.model_ids = idfs_tokenizer
+        return idf_dict_hyp, idf_dict_ref
+    def ref_list_to_idf(self, input_refs):
+        """
+        :param input_refs: list of input reference
+        :return: idf dictionnary
+        """
+        idf_count = Counter()
+        num_docs = len(input_refs)
+        idf_count.update(sum([list(set(i)) for i in input_refs], []))
+        idf_dict = defaultdict(lambda: log((num_docs + 1) / (1)))
+        idf_dict.update({idx: log((num_docs + 1) / (c + 1)) for (idx, c) in idf_count.items()})
+        return idf_dict
+    def load_tokenizer_and_model(self):
+        """
+        Loading and initializing the chosen model and tokenizer
+        """
+        tokenizer = AutoTokenizer.from_pretrained('{}'.format(self.model_name))
+        model = AutoModelForMaskedLM.from_pretrained('{}'.format(self.model_name))
+        model.config.output_hidden_states = True
+        model.eval()
+        self.tokenizer = tokenizer
+        self.model = model
+    def evaluate_batch(self, batch_hyps, batch_refs, idf_hyps=None, idf_ref=None):
+        """
+        :param batch_hyps: hypothesis list of string sentences
+        :param batch_refs: reference list of string sentences
+        :param idf_hyps: idfs of hypothesis computed at corpus level
+        :param idf_ref: idfs of references computed at corpus level
+        :return: dictionnary of scores
+        """
+        ###############################################
+        ## Extract Embeddings From Pretrained Models ##
+        ###############################################
+        if isinstance(batch_hyps, str):
+            batch_hyps = [batch_hyps]
+        if isinstance(batch_refs, str):
+            batch_refs = [batch_refs]
+        nb_sentences = len(batch_refs)
+        baryscores = []
+        assert len(batch_hyps) == len(batch_refs)
+        if (idf_hyps is None) and (idf_ref is None):
+            idf_hyps, idf_ref = self.model_ids
+        model = self.model.to(self.device)
+        with torch.no_grad():
+            ###############################################
+            ## Extract Embeddings From Pretrained Models ##
+            ###############################################
+            batch_refs = self.tokenizer(batch_refs, return_tensors='pt', padding=True, truncation=True).to(self.device)
+            batch_refs_embeddings_ = model(**batch_refs)[-1]
+            batch_hyps = self.tokenizer(batch_hyps, return_tensors='pt', padding=True, truncation=True).to(self.device)
+            batch_hyps_embeddings_ = model(**batch_hyps)[-1]
+            batch_refs_embeddings = [batch_refs_embeddings_[i] for i in list(self.layers_to_consider)]
+            batch_hyps_embeddings = [batch_hyps_embeddings_[i] for i in list(self.layers_to_consider)]
+            batch_refs_embeddings = torch.cat([i.unsqueeze(0) for i in batch_refs_embeddings])
+            batch_refs_embeddings.div_(torch.norm(batch_refs_embeddings, dim=-1).unsqueeze(-1))
+            batch_hyps_embeddings = torch.cat([i.unsqueeze(0) for i in batch_hyps_embeddings])
+            batch_hyps_embeddings.div_(torch.norm(batch_hyps_embeddings, dim=-1).unsqueeze(-1))
+            ref_tokens_id = batch_refs['input_ids'].cpu().tolist()
+            hyp_tokens_id = batch_hyps['input_ids'].cpu().tolist()
+            ####################################
+            ## Unbatched BaryScore Prediction ##
+            ####################################
+            for index_sentence in tqdm(range(nb_sentences), 'BaryScore Progress'):
+                dict_score = {}
+                ref_ids_idf = batch_refs['input_ids'][index_sentence]
+                hyp_idf_ids = batch_hyps['input_ids'][index_sentence]
+                ref_tokens = [i for i in self.tokenizer.convert_ids_to_tokens(ref_tokens_id[index_sentence],
+                                                                              skip_special_tokens=False) if
+                              i != self.tokenizer.pad_token]
+                hyp_tokens = [i for i in self.tokenizer.convert_ids_to_tokens(hyp_tokens_id[index_sentence],
+                                                                              skip_special_tokens=False) if
+                              i != self.tokenizer.pad_token]
+                ref_ids = [k for k, w in enumerate(ref_tokens)]
+                hyp_ids = [k for k, w in enumerate(hyp_tokens)]
+                # With stop words
+                ref_idf_i = [idf_ref[i] for i in ref_ids_idf[ref_ids]]
+                hyp_idf_i = [idf_hyps[i] for i in hyp_idf_ids[hyp_ids]]
+                ref_embedding_i = batch_refs_embeddings[:, index_sentence, ref_ids, :]
+                hyp_embedding_i = batch_hyps_embeddings[:, index_sentence, hyp_ids, :]
+                measures_locations_ref = ref_embedding_i.permute(1, 0, 2).cpu().numpy().tolist()
+                measures_locations_ref = [np.array(i) for i in measures_locations_ref]
+                measures_locations_hyps = hyp_embedding_i.permute(1, 0, 2).cpu().numpy().tolist()
+                measures_locations_hyps = [np.array(i) for i in measures_locations_hyps]
+                # ADDED
+                measures_locations_ref = [np.array(i) for i in
+                                          np.array(measures_locations_ref).transpose(1, 0, 2).tolist()]
+                measures_locations_hyps = [np.array(i) for i in
+                                           np.array(measures_locations_hyps).transpose(1, 0,
+                                                                                       2).tolist()]
+                if self.use_idfs:
+                    #########################
+                    ## Use TF-IDF weights  ##
+                    #########################
+                    baryscore = self.baryscore(measures_locations_ref, measures_locations_hyps, ref_idf_i,
+                                               hyp_idf_i)
+                else:
+                    #####################
+                    ## Uniform Weights ##
+                    #####################
+                    baryscore = self.baryscore(measures_locations_ref, measures_locations_hyps, None, None)
+                for key, value in baryscore.items():
+                    dict_score['baryscore_{}'.format(key)] = value
+                baryscores.append(dict_score)
+            baryscores_dic = {}
+            for k in dict_score.keys():
+                baryscores_dic[k] = []
+                for score in baryscores:
+                    baryscores_dic[k].append(score[k])
+        return baryscores_dic
+    def baryscore(self, measures_locations_ref, measures_locations_hyps, weights_refs, weights_hyps):
+        """
+        :param measures_locations_ref: input measure reference locations
+        :param measures_locations_hyps: input measure hypothesis locations
+        :param weights_refs: references weights in the Wasserstein Barycenters
+        :param weights_hyps: hypothesis weights in the Wasserstein Barycenters
+        :return:
+        """
+        if weights_hyps is not None or weights_refs is not None:
+            assert weights_refs is not None
+            assert weights_hyps is not None
+            weights_hyps = np.array([i / sum(weights_hyps) for i in weights_hyps]).astype(np.float64)
+            weights_refs = np.array([i / sum(weights_refs) for i in weights_refs]).astype(np.float64)
+        self.n_layers = len(measures_locations_ref)
+        self.d_bert = measures_locations_ref[0].shape[1]
+        ####################################
+        ## Compute Wasserstein Barycenter ##
+        ####################################
+        bary_ref = self.w_barycenter(measures_locations_ref, weights_refs)
+        bary_hyp = self.w_barycenter(measures_locations_hyps, weights_hyps)
+        #################################################
+        ## Compute Wasserstein and Sinkhorn Divergence ##
+        #################################################
+        C = ot.dist(bary_ref, bary_hyp)
+        weights_first_barycenter = np.zeros((C.shape[0])) + 1 / C.shape[0]
+        weights_second_barycenter = np.zeros((C.shape[1])) + 1 / C.shape[1]
+        wasserstein_distance = ot.emd2(weights_first_barycenter, weights_second_barycenter, C,
+                                       log=True)[0]
+        dic_results = {
+            "W": wasserstein_distance,
+        }
+        for reg in [10, 1, 5, 1, 0.1, 0.5, 0.01, 0.001]:
+            wasserstein_sinkhorn = ot.bregman.sinkhorn2(weights_first_barycenter, weights_second_barycenter, C,
+                                                        reg=reg, numItermax=10000).tolist()
+            if isinstance(wasserstein_sinkhorn, list):
+                wasserstein_sinkhorn = wasserstein_sinkhorn[0]  # for POT==0.7.0
+            dic_results['SD_{}'.format(reg)] = wasserstein_sinkhorn
+        return dic_results
+    def w_barycenter(self, measures_locations, weights):
+        """
+        :param measures_locations: location of the discrete input measures
+        :param weights: weights of the input measures
+        :return: barycentrique distribution
+        """
+        X_init = np.zeros((measures_locations[0].shape[0], self.d_bert)).astype(np.float64)
+        if weights is None:
+            measures_weights = [np.array(
+                [1 / measures_locations[0].shape[0]] * measures_locations[0].shape[0])] * self.n_layers
+        else:
+            measures_weights = [weights / sum(weights)] * self.n_layers
+        b = np.array([1 / measures_locations[0].shape[0]] * measures_locations[0].shape[0]).astype(np.float64)
+        mesure_bary = ot.lp.free_support_barycenter(measures_locations, measures_weights, X_init,
+                                                    b=b, numItermax=1000, verbose=False)
+        return mesure_bary
+    @property
+    def supports_multi_ref(self):
+        """
+        :return: BaryScore does not support multi ref
+        """
+        return False
+if __name__ == '__main__':
+    """
+    Here you can find an example to use the BaryScore
+    """
+    metric_call = BaryScoreMetric(use_idfs=False)
+    ref = [
+        'I like my cakes very much',
+        'I hate these cakes!']
+    hypothesis = ['I like my cakes very much',
+                  'I like my cakes very much']
+    metric_call.prepare_idfs(ref, hypothesis)
+    final_preds = metric_call.evaluate_batch(ref, hypothesis)
+    print(final_preds)