Spaces:
Running
on
CPU Upgrade
Running
on
CPU Upgrade
Tom Aarsen
commited on
Commit
·
5613607
1
Parent(s):
e82960d
Use separate proprietary models list
Browse files
app.py
CHANGED
|
@@ -874,11 +874,42 @@ EXTERNAL_MODEL_TO_SIZE = {
|
|
| 874 |
"text2vec-large-chinese": 326,
|
| 875 |
"unsup-simcse-bert-base-uncased": 110,
|
| 876 |
"use-cmlm-multilingual": 472,
|
| 877 |
-
|
| 878 |
"xlm-roberta-base": 279,
|
| 879 |
"xlm-roberta-large": 560,
|
| 880 |
}
|
| 881 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 882 |
MODELS_TO_SKIP = {
|
| 883 |
"baseplate/instructor-large-1", # Duplicate
|
| 884 |
"radames/e5-large", # Duplicate
|
|
@@ -1996,7 +2027,7 @@ def filter_data(search_query, model_types, model_sizes, *full_dataframes):
|
|
| 1996 |
if model_type == "Open":
|
| 1997 |
masks.append(df["Model Size (Million Parameters)"] != "")
|
| 1998 |
elif model_type == "Proprietary":
|
| 1999 |
-
masks.append(df["Model
|
| 2000 |
elif model_type == "Sentence Transformers":
|
| 2001 |
masks.append(df["Model"].isin(SENTENCE_TRANSFORMERS_COMPATIBLE_MODELS))
|
| 2002 |
if masks:
|
|
|
|
| 874 |
"text2vec-large-chinese": 326,
|
| 875 |
"unsup-simcse-bert-base-uncased": 110,
|
| 876 |
"use-cmlm-multilingual": 472,
|
| 877 |
+
"voyage-lite-02-instruct": 1220,
|
| 878 |
"xlm-roberta-base": 279,
|
| 879 |
"xlm-roberta-large": 560,
|
| 880 |
}
|
| 881 |
|
| 882 |
+
PROPRIETARY_MODELS = {
|
| 883 |
+
"Cohere-embed-multilingual-v3.0",
|
| 884 |
+
"Cohere-embed-multilingual-light-v3.0",
|
| 885 |
+
"Baichuan-text-embedding",
|
| 886 |
+
"mistral-embed",
|
| 887 |
+
"OpenSearch-text-hybrid",
|
| 888 |
+
"text-embedding-3-small",
|
| 889 |
+
"text-embedding-3-large",
|
| 890 |
+
"text-embedding-3-large-256",
|
| 891 |
+
"text-embedding-ada-002",
|
| 892 |
+
"text-similarity-ada-001",
|
| 893 |
+
"text-similarity-babbage-001",
|
| 894 |
+
"text-similarity-curie-001",
|
| 895 |
+
"text-similarity-davinci-001",
|
| 896 |
+
"text-search-ada-doc-001",
|
| 897 |
+
"text-search-ada-query-001",
|
| 898 |
+
"text-search-ada-001",
|
| 899 |
+
"text-search-curie-001",
|
| 900 |
+
"text-search-babbage-001",
|
| 901 |
+
"text-search-davinci-001",
|
| 902 |
+
"titan-embed-text-v1",
|
| 903 |
+
"voyage-2",
|
| 904 |
+
"voyage-code-2",
|
| 905 |
+
"voyage-lite-01-instruct",
|
| 906 |
+
"voyage-lite-02-instruct",
|
| 907 |
+
}
|
| 908 |
+
PROPRIETARY_MODELS = {
|
| 909 |
+
make_clickable_model(model, link=EXTERNAL_MODEL_TO_LINK.get(model, "https://huggingface.co/spaces/mteb/leaderboard"))
|
| 910 |
+
for model in PROPRIETARY_MODELS
|
| 911 |
+
}
|
| 912 |
+
|
| 913 |
MODELS_TO_SKIP = {
|
| 914 |
"baseplate/instructor-large-1", # Duplicate
|
| 915 |
"radames/e5-large", # Duplicate
|
|
|
|
| 2027 |
if model_type == "Open":
|
| 2028 |
masks.append(df["Model Size (Million Parameters)"] != "")
|
| 2029 |
elif model_type == "Proprietary":
|
| 2030 |
+
masks.append(df["Model"].isin(PROPRIETARY_MODELS))
|
| 2031 |
elif model_type == "Sentence Transformers":
|
| 2032 |
masks.append(df["Model"].isin(SENTENCE_TRANSFORMERS_COMPATIBLE_MODELS))
|
| 2033 |
if masks:
|