Spaces:

mteb
/

leaderboard

Running on CPU Upgrade

App Files Files Community

171

Tom Aarsen commited on Apr 2, 2024

Commit

5613607

1 Parent(s): e82960d

Use separate proprietary models list

Browse files

Files changed (1) hide show

app.py +33 -2

app.py CHANGED Viewed

@@ -874,11 +874,42 @@ EXTERNAL_MODEL_TO_SIZE = {
     "text2vec-large-chinese": 326,
     "unsup-simcse-bert-base-uncased": 110,
     "use-cmlm-multilingual": 472,
-    # "voyage-lite-02-instruct": 613, # <- Removed as we use unknown sizes to mark API models
     "xlm-roberta-base": 279,
     "xlm-roberta-large": 560,
 }
 MODELS_TO_SKIP = {
     "baseplate/instructor-large-1", # Duplicate
     "radames/e5-large", # Duplicate
@@ -1996,7 +2027,7 @@ def filter_data(search_query, model_types, model_sizes, *full_dataframes):
                 if model_type == "Open":
                     masks.append(df["Model Size (Million Parameters)"] != "")
                 elif model_type == "Proprietary":
-                    masks.append(df["Model Size (Million Parameters)"] == "")
                 elif model_type == "Sentence Transformers":
                     masks.append(df["Model"].isin(SENTENCE_TRANSFORMERS_COMPATIBLE_MODELS))
             if masks:

     "text2vec-large-chinese": 326,
     "unsup-simcse-bert-base-uncased": 110,
     "use-cmlm-multilingual": 472,
+    "voyage-lite-02-instruct": 1220,
     "xlm-roberta-base": 279,
     "xlm-roberta-large": 560,
 }
+PROPRIETARY_MODELS = {
+    "Cohere-embed-multilingual-v3.0",
+    "Cohere-embed-multilingual-light-v3.0",
+    "Baichuan-text-embedding",
+    "mistral-embed",
+    "OpenSearch-text-hybrid",
+    "text-embedding-3-small",
+    "text-embedding-3-large",
+    "text-embedding-3-large-256",
+    "text-embedding-ada-002",
+    "text-similarity-ada-001",
+    "text-similarity-babbage-001",
+    "text-similarity-curie-001",
+    "text-similarity-davinci-001",
+    "text-search-ada-doc-001",
+    "text-search-ada-query-001",
+    "text-search-ada-001",
+    "text-search-curie-001",
+    "text-search-babbage-001",
+    "text-search-davinci-001",
+    "titan-embed-text-v1",
+    "voyage-2",
+    "voyage-code-2",
+    "voyage-lite-01-instruct",
+    "voyage-lite-02-instruct",
+}
+PROPRIETARY_MODELS = {
+    make_clickable_model(model, link=EXTERNAL_MODEL_TO_LINK.get(model, "https://huggingface.co/spaces/mteb/leaderboard"))
+    for model in PROPRIETARY_MODELS
+}
 MODELS_TO_SKIP = {
     "baseplate/instructor-large-1", # Duplicate
     "radames/e5-large", # Duplicate
                 if model_type == "Open":
                     masks.append(df["Model Size (Million Parameters)"] != "")
                 elif model_type == "Proprietary":
+                    masks.append(df["Model"].isin(PROPRIETARY_MODELS))
                 elif model_type == "Sentence Transformers":
                     masks.append(df["Model"].isin(SENTENCE_TRANSFORMERS_COMPATIBLE_MODELS))
             if masks: