pj-mathematician's picture
Add files using upload-large-folder tool
70af7bb verified
metadata
tags:
  - sentence-transformers
  - sentence-similarity
  - feature-extraction
  - generated_from_trainer
  - dataset_size:124788
  - loss:GISTEmbedLoss
base_model: BAAI/bge-small-en-v1.5
widget:
  - source_sentence: 其他机械、设备和有形货物租赁服务代表
    sentences:
      - 其他机械和设备租赁服务工作人员
      - 电子和电信设备及零部件物流经理
      - 工业主厨
  - source_sentence: 公交车司机
    sentences:
      - 表演灯光设计师
      - 乙烯基地板安装工
      - 国际巴士司机
  - source_sentence: online communication manager
    sentences:
      - trades union official
      - social media manager
      - budget manager
  - source_sentence: Projektmanagerin
    sentences:
      - Projektmanager/Projektmanagerin
      - Category-Manager
      - Infanterist
  - source_sentence: Volksvertreter
    sentences:
      - Parlamentarier
      - Oberbürgermeister
      - Konsul
pipeline_tag: sentence-similarity
library_name: sentence-transformers
metrics:
  - cosine_accuracy@1
  - cosine_accuracy@20
  - cosine_accuracy@50
  - cosine_accuracy@100
  - cosine_accuracy@150
  - cosine_accuracy@200
  - cosine_precision@1
  - cosine_precision@20
  - cosine_precision@50
  - cosine_precision@100
  - cosine_precision@150
  - cosine_precision@200
  - cosine_recall@1
  - cosine_recall@20
  - cosine_recall@50
  - cosine_recall@100
  - cosine_recall@150
  - cosine_recall@200
  - cosine_ndcg@1
  - cosine_ndcg@20
  - cosine_ndcg@50
  - cosine_ndcg@100
  - cosine_ndcg@150
  - cosine_ndcg@200
  - cosine_mrr@1
  - cosine_mrr@20
  - cosine_mrr@50
  - cosine_mrr@100
  - cosine_mrr@150
  - cosine_mrr@200
  - cosine_map@1
  - cosine_map@20
  - cosine_map@50
  - cosine_map@100
  - cosine_map@150
  - cosine_map@200
  - cosine_map@500
model-index:
  - name: SentenceTransformer based on BAAI/bge-small-en-v1.5
    results:
      - task:
          type: information-retrieval
          name: Information Retrieval
        dataset:
          name: full en
          type: full_en
        metrics:
          - type: cosine_accuracy@1
            value: 0.6571428571428571
            name: Cosine Accuracy@1
          - type: cosine_accuracy@20
            value: 0.9904761904761905
            name: Cosine Accuracy@20
          - type: cosine_accuracy@50
            value: 0.9904761904761905
            name: Cosine Accuracy@50
          - type: cosine_accuracy@100
            value: 0.9904761904761905
            name: Cosine Accuracy@100
          - type: cosine_accuracy@150
            value: 0.9904761904761905
            name: Cosine Accuracy@150
          - type: cosine_accuracy@200
            value: 0.9904761904761905
            name: Cosine Accuracy@200
          - type: cosine_precision@1
            value: 0.6571428571428571
            name: Cosine Precision@1
          - type: cosine_precision@20
            value: 0.5047619047619047
            name: Cosine Precision@20
          - type: cosine_precision@50
            value: 0.30857142857142855
            name: Cosine Precision@50
          - type: cosine_precision@100
            value: 0.18666666666666668
            name: Cosine Precision@100
          - type: cosine_precision@150
            value: 0.13269841269841268
            name: Cosine Precision@150
          - type: cosine_precision@200
            value: 0.1029047619047619
            name: Cosine Precision@200
          - type: cosine_recall@1
            value: 0.0680237860830842
            name: Cosine Recall@1
          - type: cosine_recall@20
            value: 0.539060339827615
            name: Cosine Recall@20
          - type: cosine_recall@50
            value: 0.7269844521994231
            name: Cosine Recall@50
          - type: cosine_recall@100
            value: 0.8337131628681403
            name: Cosine Recall@100
          - type: cosine_recall@150
            value: 0.879935375805825
            name: Cosine Recall@150
          - type: cosine_recall@200
            value: 0.9050529457831012
            name: Cosine Recall@200
          - type: cosine_ndcg@1
            value: 0.6571428571428571
            name: Cosine Ndcg@1
          - type: cosine_ndcg@20
            value: 0.686462471196106
            name: Cosine Ndcg@20
          - type: cosine_ndcg@50
            value: 0.7052824081502371
            name: Cosine Ndcg@50
          - type: cosine_ndcg@100
            value: 0.7601614355798527
            name: Cosine Ndcg@100
          - type: cosine_ndcg@150
            value: 0.7798476891938094
            name: Cosine Ndcg@150
          - type: cosine_ndcg@200
            value: 0.7898871141566125
            name: Cosine Ndcg@200
          - type: cosine_mrr@1
            value: 0.6571428571428571
            name: Cosine Mrr@1
          - type: cosine_mrr@20
            value: 0.8095238095238095
            name: Cosine Mrr@20
          - type: cosine_mrr@50
            value: 0.8095238095238095
            name: Cosine Mrr@50
          - type: cosine_mrr@100
            value: 0.8095238095238095
            name: Cosine Mrr@100
          - type: cosine_mrr@150
            value: 0.8095238095238095
            name: Cosine Mrr@150
          - type: cosine_mrr@200
            value: 0.8095238095238095
            name: Cosine Mrr@200
          - type: cosine_map@1
            value: 0.6571428571428571
            name: Cosine Map@1
          - type: cosine_map@20
            value: 0.5451065538458748
            name: Cosine Map@20
          - type: cosine_map@50
            value: 0.5347802076206865
            name: Cosine Map@50
          - type: cosine_map@100
            value: 0.567702602098158
            name: Cosine Map@100
          - type: cosine_map@150
            value: 0.5756725358487015
            name: Cosine Map@150
          - type: cosine_map@200
            value: 0.5789669196636947
            name: Cosine Map@200
          - type: cosine_map@500
            value: 0.5832808543489026
            name: Cosine Map@500
      - task:
          type: information-retrieval
          name: Information Retrieval
        dataset:
          name: full es
          type: full_es
        metrics:
          - type: cosine_accuracy@1
            value: 0.11351351351351352
            name: Cosine Accuracy@1
          - type: cosine_accuracy@20
            value: 1
            name: Cosine Accuracy@20
          - type: cosine_accuracy@50
            value: 1
            name: Cosine Accuracy@50
          - type: cosine_accuracy@100
            value: 1
            name: Cosine Accuracy@100
          - type: cosine_accuracy@150
            value: 1
            name: Cosine Accuracy@150
          - type: cosine_accuracy@200
            value: 1
            name: Cosine Accuracy@200
          - type: cosine_precision@1
            value: 0.11351351351351352
            name: Cosine Precision@1
          - type: cosine_precision@20
            value: 0.4913513513513514
            name: Cosine Precision@20
          - type: cosine_precision@50
            value: 0.316972972972973
            name: Cosine Precision@50
          - type: cosine_precision@100
            value: 0.19843243243243244
            name: Cosine Precision@100
          - type: cosine_precision@150
            value: 0.146990990990991
            name: Cosine Precision@150
          - type: cosine_precision@200
            value: 0.11778378378378378
            name: Cosine Precision@200
          - type: cosine_recall@1
            value: 0.002992884071419607
            name: Cosine Recall@1
          - type: cosine_recall@20
            value: 0.32341666838263944
            name: Cosine Recall@20
          - type: cosine_recall@50
            value: 0.4630260221149236
            name: Cosine Recall@50
          - type: cosine_recall@100
            value: 0.5419804526017848
            name: Cosine Recall@100
          - type: cosine_recall@150
            value: 0.5826718468403144
            name: Cosine Recall@150
          - type: cosine_recall@200
            value: 0.6149262657286421
            name: Cosine Recall@200
          - type: cosine_ndcg@1
            value: 0.11351351351351352
            name: Cosine Ndcg@1
          - type: cosine_ndcg@20
            value: 0.5389058089458943
            name: Cosine Ndcg@20
          - type: cosine_ndcg@50
            value: 0.5002442028172164
            name: Cosine Ndcg@50
          - type: cosine_ndcg@100
            value: 0.5138591255215345
            name: Cosine Ndcg@100
          - type: cosine_ndcg@150
            value: 0.5346372349516221
            name: Cosine Ndcg@150
          - type: cosine_ndcg@200
            value: 0.5502474315848075
            name: Cosine Ndcg@200
          - type: cosine_mrr@1
            value: 0.11351351351351352
            name: Cosine Mrr@1
          - type: cosine_mrr@20
            value: 0.5444744744744745
            name: Cosine Mrr@20
          - type: cosine_mrr@50
            value: 0.5444744744744745
            name: Cosine Mrr@50
          - type: cosine_mrr@100
            value: 0.5444744744744745
            name: Cosine Mrr@100
          - type: cosine_mrr@150
            value: 0.5444744744744745
            name: Cosine Mrr@150
          - type: cosine_mrr@200
            value: 0.5444744744744745
            name: Cosine Mrr@200
          - type: cosine_map@1
            value: 0.11351351351351352
            name: Cosine Map@1
          - type: cosine_map@20
            value: 0.40352984921129137
            name: Cosine Map@20
          - type: cosine_map@50
            value: 0.3418539578142162
            name: Cosine Map@50
          - type: cosine_map@100
            value: 0.339373689987275
            name: Cosine Map@100
          - type: cosine_map@150
            value: 0.3478760829213016
            name: Cosine Map@150
          - type: cosine_map@200
            value: 0.3533435915341769
            name: Cosine Map@200
          - type: cosine_map@500
            value: 0.363222785830563
            name: Cosine Map@500
      - task:
          type: information-retrieval
          name: Information Retrieval
        dataset:
          name: full de
          type: full_de
        metrics:
          - type: cosine_accuracy@1
            value: 0.2955665024630542
            name: Cosine Accuracy@1
          - type: cosine_accuracy@20
            value: 0.9211822660098522
            name: Cosine Accuracy@20
          - type: cosine_accuracy@50
            value: 0.9655172413793104
            name: Cosine Accuracy@50
          - type: cosine_accuracy@100
            value: 0.9753694581280788
            name: Cosine Accuracy@100
          - type: cosine_accuracy@150
            value: 0.9852216748768473
            name: Cosine Accuracy@150
          - type: cosine_accuracy@200
            value: 0.9852216748768473
            name: Cosine Accuracy@200
          - type: cosine_precision@1
            value: 0.2955665024630542
            name: Cosine Precision@1
          - type: cosine_precision@20
            value: 0.424384236453202
            name: Cosine Precision@20
          - type: cosine_precision@50
            value: 0.28167487684729065
            name: Cosine Precision@50
          - type: cosine_precision@100
            value: 0.17995073891625615
            name: Cosine Precision@100
          - type: cosine_precision@150
            value: 0.13589490968801315
            name: Cosine Precision@150
          - type: cosine_precision@200
            value: 0.1108128078817734
            name: Cosine Precision@200
          - type: cosine_recall@1
            value: 0.01108543831680986
            name: Cosine Recall@1
          - type: cosine_recall@20
            value: 0.2600945586038909
            name: Cosine Recall@20
          - type: cosine_recall@50
            value: 0.3844030994839744
            name: Cosine Recall@50
          - type: cosine_recall@100
            value: 0.4672649807153451
            name: Cosine Recall@100
          - type: cosine_recall@150
            value: 0.5171228717670064
            name: Cosine Recall@150
          - type: cosine_recall@200
            value: 0.5533299912627624
            name: Cosine Recall@200
          - type: cosine_ndcg@1
            value: 0.2955665024630542
            name: Cosine Ndcg@1
          - type: cosine_ndcg@20
            value: 0.4593107411252075
            name: Cosine Ndcg@20
          - type: cosine_ndcg@50
            value: 0.42313178566078624
            name: Cosine Ndcg@50
          - type: cosine_ndcg@100
            value: 0.4367043857530601
            name: Cosine Ndcg@100
          - type: cosine_ndcg@150
            value: 0.4621847371016286
            name: Cosine Ndcg@150
          - type: cosine_ndcg@200
            value: 0.48019099347834654
            name: Cosine Ndcg@200
          - type: cosine_mrr@1
            value: 0.2955665024630542
            name: Cosine Mrr@1
          - type: cosine_mrr@20
            value: 0.4892678749821603
            name: Cosine Mrr@20
          - type: cosine_mrr@50
            value: 0.49065090899064223
            name: Cosine Mrr@50
          - type: cosine_mrr@100
            value: 0.49080251743966435
            name: Cosine Mrr@100
          - type: cosine_mrr@150
            value: 0.4908799208299932
            name: Cosine Mrr@150
          - type: cosine_mrr@200
            value: 0.4908799208299932
            name: Cosine Mrr@200
          - type: cosine_map@1
            value: 0.2955665024630542
            name: Cosine Map@1
          - type: cosine_map@20
            value: 0.3228620941051522
            name: Cosine Map@20
          - type: cosine_map@50
            value: 0.2644260812747752
            name: Cosine Map@50
          - type: cosine_map@100
            value: 0.2576011230547815
            name: Cosine Map@100
          - type: cosine_map@150
            value: 0.2666548881846307
            name: Cosine Map@150
          - type: cosine_map@200
            value: 0.27224102651692533
            name: Cosine Map@200
          - type: cosine_map@500
            value: 0.28312561300678324
            name: Cosine Map@500
      - task:
          type: information-retrieval
          name: Information Retrieval
        dataset:
          name: full zh
          type: full_zh
        metrics:
          - type: cosine_accuracy@1
            value: 0.3300970873786408
            name: Cosine Accuracy@1
          - type: cosine_accuracy@20
            value: 0.7184466019417476
            name: Cosine Accuracy@20
          - type: cosine_accuracy@50
            value: 0.8155339805825242
            name: Cosine Accuracy@50
          - type: cosine_accuracy@100
            value: 0.8932038834951457
            name: Cosine Accuracy@100
          - type: cosine_accuracy@150
            value: 0.9223300970873787
            name: Cosine Accuracy@150
          - type: cosine_accuracy@200
            value: 0.9320388349514563
            name: Cosine Accuracy@200
          - type: cosine_precision@1
            value: 0.3300970873786408
            name: Cosine Precision@1
          - type: cosine_precision@20
            value: 0.16796116504854372
            name: Cosine Precision@20
          - type: cosine_precision@50
            value: 0.09262135922330093
            name: Cosine Precision@50
          - type: cosine_precision@100
            value: 0.05815533980582525
            name: Cosine Precision@100
          - type: cosine_precision@150
            value: 0.04563106796116505
            name: Cosine Precision@150
          - type: cosine_precision@200
            value: 0.03771844660194174
            name: Cosine Precision@200
          - type: cosine_recall@1
            value: 0.02573649124630195
            name: Cosine Recall@1
          - type: cosine_recall@20
            value: 0.17402459309945448
            name: Cosine Recall@20
          - type: cosine_recall@50
            value: 0.23816219248808224
            name: Cosine Recall@50
          - type: cosine_recall@100
            value: 0.28291725637657983
            name: Cosine Recall@100
          - type: cosine_recall@150
            value: 0.32619122038725784
            name: Cosine Recall@150
          - type: cosine_recall@200
            value: 0.3543394793587958
            name: Cosine Recall@200
          - type: cosine_ndcg@1
            value: 0.3300970873786408
            name: Cosine Ndcg@1
          - type: cosine_ndcg@20
            value: 0.23956118764265208
            name: Cosine Ndcg@20
          - type: cosine_ndcg@50
            value: 0.2341910409667355
            name: Cosine Ndcg@50
          - type: cosine_ndcg@100
            value: 0.2559822552765659
            name: Cosine Ndcg@100
          - type: cosine_ndcg@150
            value: 0.27344655996496936
            name: Cosine Ndcg@150
          - type: cosine_ndcg@200
            value: 0.28432223965649855
            name: Cosine Ndcg@200
          - type: cosine_mrr@1
            value: 0.3300970873786408
            name: Cosine Mrr@1
          - type: cosine_mrr@20
            value: 0.43064643766798927
            name: Cosine Mrr@20
          - type: cosine_mrr@50
            value: 0.43374387043765733
            name: Cosine Mrr@50
          - type: cosine_mrr@100
            value: 0.4348781442268605
            name: Cosine Mrr@100
          - type: cosine_mrr@150
            value: 0.4351279925655956
            name: Cosine Mrr@150
          - type: cosine_mrr@200
            value: 0.4351822313246128
            name: Cosine Mrr@200
          - type: cosine_map@1
            value: 0.3300970873786408
            name: Cosine Map@1
          - type: cosine_map@20
            value: 0.14301006319225865
            name: Cosine Map@20
          - type: cosine_map@50
            value: 0.12425793473074002
            name: Cosine Map@50
          - type: cosine_map@100
            value: 0.12962575663735706
            name: Cosine Map@100
          - type: cosine_map@150
            value: 0.13242860022521366
            name: Cosine Map@150
          - type: cosine_map@200
            value: 0.13374255185989983
            name: Cosine Map@200
          - type: cosine_map@500
            value: 0.13779434547799502
            name: Cosine Map@500
      - task:
          type: information-retrieval
          name: Information Retrieval
        dataset:
          name: mix es
          type: mix_es
        metrics:
          - type: cosine_accuracy@1
            value: 0.40977639105564223
            name: Cosine Accuracy@1
          - type: cosine_accuracy@20
            value: 0.7618304732189287
            name: Cosine Accuracy@20
          - type: cosine_accuracy@50
            value: 0.8512740509620385
            name: Cosine Accuracy@50
          - type: cosine_accuracy@100
            value: 0.9105564222568903
            name: Cosine Accuracy@100
          - type: cosine_accuracy@150
            value: 0.9381175247009881
            name: Cosine Accuracy@150
          - type: cosine_accuracy@200
            value: 0.9542381695267811
            name: Cosine Accuracy@200
          - type: cosine_precision@1
            value: 0.40977639105564223
            name: Cosine Precision@1
          - type: cosine_precision@20
            value: 0.0890015600624025
            name: Cosine Precision@20
          - type: cosine_precision@50
            value: 0.04168486739469579
            name: Cosine Precision@50
          - type: cosine_precision@100
            value: 0.022854914196567863
            name: Cosine Precision@100
          - type: cosine_precision@150
            value: 0.01585370081469925
            name: Cosine Precision@150
          - type: cosine_precision@200
            value: 0.012220488819552783
            name: Cosine Precision@200
          - type: cosine_recall@1
            value: 0.15567317930812472
            name: Cosine Recall@1
          - type: cosine_recall@20
            value: 0.6574783943738703
            name: Cosine Recall@20
          - type: cosine_recall@50
            value: 0.7691404799049105
            name: Cosine Recall@50
          - type: cosine_recall@100
            value: 0.8454015303469281
            name: Cosine Recall@100
          - type: cosine_recall@150
            value: 0.8795148948815096
            name: Cosine Recall@150
          - type: cosine_recall@200
            value: 0.9035051878265606
            name: Cosine Recall@200
          - type: cosine_ndcg@1
            value: 0.40977639105564223
            name: Cosine Ndcg@1
          - type: cosine_ndcg@20
            value: 0.5094055696124096
            name: Cosine Ndcg@20
          - type: cosine_ndcg@50
            value: 0.5398029704628499
            name: Cosine Ndcg@50
          - type: cosine_ndcg@100
            value: 0.5563939454831869
            name: Cosine Ndcg@100
          - type: cosine_ndcg@150
            value: 0.5630335952477792
            name: Cosine Ndcg@150
          - type: cosine_ndcg@200
            value: 0.5674217099859529
            name: Cosine Ndcg@200
          - type: cosine_mrr@1
            value: 0.40977639105564223
            name: Cosine Mrr@1
          - type: cosine_mrr@20
            value: 0.4963374711503733
            name: Cosine Mrr@20
          - type: cosine_mrr@50
            value: 0.49930745416180927
            name: Cosine Mrr@50
          - type: cosine_mrr@100
            value: 0.5001571935146001
            name: Cosine Mrr@100
          - type: cosine_mrr@150
            value: 0.5003842041203103
            name: Cosine Mrr@150
          - type: cosine_mrr@200
            value: 0.5004783417497985
            name: Cosine Mrr@200
          - type: cosine_map@1
            value: 0.40977639105564223
            name: Cosine Map@1
          - type: cosine_map@20
            value: 0.4236549905504724
            name: Cosine Map@20
          - type: cosine_map@50
            value: 0.4311498037279026
            name: Cosine Map@50
          - type: cosine_map@100
            value: 0.43327838927965695
            name: Cosine Map@100
          - type: cosine_map@150
            value: 0.4338451382952763
            name: Cosine Map@150
          - type: cosine_map@200
            value: 0.4341307997461715
            name: Cosine Map@200
          - type: cosine_map@500
            value: 0.4345995592976099
            name: Cosine Map@500
      - task:
          type: information-retrieval
          name: Information Retrieval
        dataset:
          name: mix de
          type: mix_de
        metrics:
          - type: cosine_accuracy@1
            value: 0.2912116484659386
            name: Cosine Accuracy@1
          - type: cosine_accuracy@20
            value: 0.6526261050442018
            name: Cosine Accuracy@20
          - type: cosine_accuracy@50
            value: 0.7550702028081123
            name: Cosine Accuracy@50
          - type: cosine_accuracy@100
            value: 0.8460738429537181
            name: Cosine Accuracy@100
          - type: cosine_accuracy@150
            value: 0.8876755070202809
            name: Cosine Accuracy@150
          - type: cosine_accuracy@200
            value: 0.9173166926677067
            name: Cosine Accuracy@200
          - type: cosine_precision@1
            value: 0.2912116484659386
            name: Cosine Precision@1
          - type: cosine_precision@20
            value: 0.07308892355694228
            name: Cosine Precision@20
          - type: cosine_precision@50
            value: 0.03583983359334374
            name: Cosine Precision@50
          - type: cosine_precision@100
            value: 0.02058242329693188
            name: Cosine Precision@100
          - type: cosine_precision@150
            value: 0.014609117698041255
            name: Cosine Precision@150
          - type: cosine_precision@200
            value: 0.011515860634425378
            name: Cosine Precision@200
          - type: cosine_recall@1
            value: 0.10977639105564223
            name: Cosine Recall@1
          - type: cosine_recall@20
            value: 0.5342520367481365
            name: Cosine Recall@20
          - type: cosine_recall@50
            value: 0.6529207834980065
            name: Cosine Recall@50
          - type: cosine_recall@100
            value: 0.7505633558675681
            name: Cosine Recall@100
          - type: cosine_recall@150
            value: 0.7989166233315999
            name: Cosine Recall@150
          - type: cosine_recall@200
            value: 0.8393482405962905
            name: Cosine Recall@200
          - type: cosine_ndcg@1
            value: 0.2912116484659386
            name: Cosine Ndcg@1
          - type: cosine_ndcg@20
            value: 0.39027078330836906
            name: Cosine Ndcg@20
          - type: cosine_ndcg@50
            value: 0.4224011615840446
            name: Cosine Ndcg@50
          - type: cosine_ndcg@100
            value: 0.4438393956774872
            name: Cosine Ndcg@100
          - type: cosine_ndcg@150
            value: 0.45327900259303716
            name: Cosine Ndcg@150
          - type: cosine_ndcg@200
            value: 0.4606831999024183
            name: Cosine Ndcg@200
          - type: cosine_mrr@1
            value: 0.2912116484659386
            name: Cosine Mrr@1
          - type: cosine_mrr@20
            value: 0.37544207546115405
            name: Cosine Mrr@20
          - type: cosine_mrr@50
            value: 0.37870409367323543
            name: Cosine Mrr@50
          - type: cosine_mrr@100
            value: 0.37999194359776256
            name: Cosine Mrr@100
          - type: cosine_mrr@150
            value: 0.3803335431113417
            name: Cosine Mrr@150
          - type: cosine_mrr@200
            value: 0.3805079454038972
            name: Cosine Mrr@200
          - type: cosine_map@1
            value: 0.2912116484659386
            name: Cosine Map@1
          - type: cosine_map@20
            value: 0.3075927383942124
            name: Cosine Map@20
          - type: cosine_map@50
            value: 0.31502827814698436
            name: Cosine Map@50
          - type: cosine_map@100
            value: 0.31767149302992986
            name: Cosine Map@100
          - type: cosine_map@150
            value: 0.31842095656425334
            name: Cosine Map@150
          - type: cosine_map@200
            value: 0.3189017921904424
            name: Cosine Map@200
          - type: cosine_map@500
            value: 0.31963709557315734
            name: Cosine Map@500
      - task:
          type: information-retrieval
          name: Information Retrieval
        dataset:
          name: mix zh
          type: mix_zh
        metrics:
          - type: cosine_accuracy@1
            value: 0.09498956158663883
            name: Cosine Accuracy@1
          - type: cosine_accuracy@20
            value: 0.35281837160751567
            name: Cosine Accuracy@20
          - type: cosine_accuracy@50
            value: 0.48851774530271397
            name: Cosine Accuracy@50
          - type: cosine_accuracy@100
            value: 0.5960334029227558
            name: Cosine Accuracy@100
          - type: cosine_accuracy@150
            value: 0.657098121085595
            name: Cosine Accuracy@150
          - type: cosine_accuracy@200
            value: 0.7025052192066806
            name: Cosine Accuracy@200
          - type: cosine_precision@1
            value: 0.09498956158663883
            name: Cosine Precision@1
          - type: cosine_precision@20
            value: 0.03102818371607516
            name: Cosine Precision@20
          - type: cosine_precision@50
            value: 0.018528183716075158
            name: Cosine Precision@50
          - type: cosine_precision@100
            value: 0.011550104384133612
            name: Cosine Precision@100
          - type: cosine_precision@150
            value: 0.008601252609603338
            name: Cosine Precision@150
          - type: cosine_precision@200
            value: 0.007074634655532359
            name: Cosine Precision@200
          - type: cosine_recall@1
            value: 0.03218510786360473
            name: Cosine Recall@1
          - type: cosine_recall@20
            value: 0.20682473406899293
            name: Cosine Recall@20
          - type: cosine_recall@50
            value: 0.30616239188786165
            name: Cosine Recall@50
          - type: cosine_recall@100
            value: 0.38175970109686186
            name: Cosine Recall@100
          - type: cosine_recall@150
            value: 0.4266063558339132
            name: Cosine Recall@150
          - type: cosine_recall@200
            value: 0.4677598005103224
            name: Cosine Recall@200
          - type: cosine_ndcg@1
            value: 0.09498956158663883
            name: Cosine Ndcg@1
          - type: cosine_ndcg@20
            value: 0.13726194438538974
            name: Cosine Ndcg@20
          - type: cosine_ndcg@50
            value: 0.16515347653846224
            name: Cosine Ndcg@50
          - type: cosine_ndcg@100
            value: 0.18245718935168395
            name: Cosine Ndcg@100
          - type: cosine_ndcg@150
            value: 0.1915123607890909
            name: Cosine Ndcg@150
          - type: cosine_ndcg@200
            value: 0.1993072789458329
            name: Cosine Ndcg@200
          - type: cosine_mrr@1
            value: 0.09498956158663883
            name: Cosine Mrr@1
          - type: cosine_mrr@20
            value: 0.15082760305134044
            name: Cosine Mrr@20
          - type: cosine_mrr@50
            value: 0.1552139914541245
            name: Cosine Mrr@50
          - type: cosine_mrr@100
            value: 0.1567682757261486
            name: Cosine Mrr@100
          - type: cosine_mrr@150
            value: 0.1572599746321091
            name: Cosine Mrr@150
          - type: cosine_mrr@200
            value: 0.15752063728764779
            name: Cosine Mrr@200
          - type: cosine_map@1
            value: 0.09498956158663883
            name: Cosine Map@1
          - type: cosine_map@20
            value: 0.08696228866764828
            name: Cosine Map@20
          - type: cosine_map@50
            value: 0.0925585898977933
            name: Cosine Map@50
          - type: cosine_map@100
            value: 0.09443690504503688
            name: Cosine Map@100
          - type: cosine_map@150
            value: 0.09508196706389692
            name: Cosine Map@150
          - type: cosine_map@200
            value: 0.09552658777692054
            name: Cosine Map@200
          - type: cosine_map@500
            value: 0.09647934265199021
            name: Cosine Map@500

SentenceTransformer based on BAAI/bge-small-en-v1.5

This is a sentence-transformers model finetuned from BAAI/bge-small-en-v1.5 on the full_en, full_de, full_es, full_zh and mix datasets. It maps sentences & paragraphs to a 384-dimensional dense vector space and can be used for semantic textual similarity, semantic search, paraphrase mining, text classification, clustering, and more.

Model Details

Model Description

  • Model Type: Sentence Transformer
  • Base model: BAAI/bge-small-en-v1.5
  • Maximum Sequence Length: 512 tokens
  • Output Dimensionality: 384 dimensions
  • Similarity Function: Cosine Similarity
  • Training Datasets:
    • full_en
    • full_de
    • full_es
    • full_zh
    • mix

Model Sources

Full Model Architecture

SentenceTransformer(
  (0): Transformer({'max_seq_length': 512, 'do_lower_case': True}) with Transformer model: BertModel 
  (1): Pooling({'word_embedding_dimension': 384, 'pooling_mode_cls_token': True, 'pooling_mode_mean_tokens': False, 'pooling_mode_max_tokens': False, 'pooling_mode_mean_sqrt_len_tokens': False, 'pooling_mode_weightedmean_tokens': False, 'pooling_mode_lasttoken': False, 'include_prompt': True})
  (2): Normalize()
)

Usage

Direct Usage (Sentence Transformers)

First install the Sentence Transformers library:

pip install -U sentence-transformers

Then you can load this model and run inference.

from sentence_transformers import SentenceTransformer

# Download from the 🤗 Hub
model = SentenceTransformer("sentence_transformers_model_id")
# Run inference
sentences = [
    'Volksvertreter',
    'Parlamentarier',
    'Oberbürgermeister',
]
embeddings = model.encode(sentences)
print(embeddings.shape)
# [3, 384]

# Get the similarity scores for the embeddings
similarities = model.similarity(embeddings, embeddings)
print(similarities.shape)
# [3, 3]

Evaluation

Metrics

Information Retrieval

Metric full_en full_es full_de full_zh mix_es mix_de mix_zh
cosine_accuracy@1 0.6571 0.1135 0.2956 0.3301 0.4098 0.2912 0.095
cosine_accuracy@20 0.9905 1.0 0.9212 0.7184 0.7618 0.6526 0.3528
cosine_accuracy@50 0.9905 1.0 0.9655 0.8155 0.8513 0.7551 0.4885
cosine_accuracy@100 0.9905 1.0 0.9754 0.8932 0.9106 0.8461 0.596
cosine_accuracy@150 0.9905 1.0 0.9852 0.9223 0.9381 0.8877 0.6571
cosine_accuracy@200 0.9905 1.0 0.9852 0.932 0.9542 0.9173 0.7025
cosine_precision@1 0.6571 0.1135 0.2956 0.3301 0.4098 0.2912 0.095
cosine_precision@20 0.5048 0.4914 0.4244 0.168 0.089 0.0731 0.031
cosine_precision@50 0.3086 0.317 0.2817 0.0926 0.0417 0.0358 0.0185
cosine_precision@100 0.1867 0.1984 0.18 0.0582 0.0229 0.0206 0.0116
cosine_precision@150 0.1327 0.147 0.1359 0.0456 0.0159 0.0146 0.0086
cosine_precision@200 0.1029 0.1178 0.1108 0.0377 0.0122 0.0115 0.0071
cosine_recall@1 0.068 0.003 0.0111 0.0257 0.1557 0.1098 0.0322
cosine_recall@20 0.5391 0.3234 0.2601 0.174 0.6575 0.5343 0.2068
cosine_recall@50 0.727 0.463 0.3844 0.2382 0.7691 0.6529 0.3062
cosine_recall@100 0.8337 0.542 0.4673 0.2829 0.8454 0.7506 0.3818
cosine_recall@150 0.8799 0.5827 0.5171 0.3262 0.8795 0.7989 0.4266
cosine_recall@200 0.9051 0.6149 0.5533 0.3543 0.9035 0.8393 0.4678
cosine_ndcg@1 0.6571 0.1135 0.2956 0.3301 0.4098 0.2912 0.095
cosine_ndcg@20 0.6865 0.5389 0.4593 0.2396 0.5094 0.3903 0.1373
cosine_ndcg@50 0.7053 0.5002 0.4231 0.2342 0.5398 0.4224 0.1652
cosine_ndcg@100 0.7602 0.5139 0.4367 0.256 0.5564 0.4438 0.1825
cosine_ndcg@150 0.7798 0.5346 0.4622 0.2734 0.563 0.4533 0.1915
cosine_ndcg@200 0.7899 0.5502 0.4802 0.2843 0.5674 0.4607 0.1993
cosine_mrr@1 0.6571 0.1135 0.2956 0.3301 0.4098 0.2912 0.095
cosine_mrr@20 0.8095 0.5445 0.4893 0.4306 0.4963 0.3754 0.1508
cosine_mrr@50 0.8095 0.5445 0.4907 0.4337 0.4993 0.3787 0.1552
cosine_mrr@100 0.8095 0.5445 0.4908 0.4349 0.5002 0.38 0.1568
cosine_mrr@150 0.8095 0.5445 0.4909 0.4351 0.5004 0.3803 0.1573
cosine_mrr@200 0.8095 0.5445 0.4909 0.4352 0.5005 0.3805 0.1575
cosine_map@1 0.6571 0.1135 0.2956 0.3301 0.4098 0.2912 0.095
cosine_map@20 0.5451 0.4035 0.3229 0.143 0.4237 0.3076 0.087
cosine_map@50 0.5348 0.3419 0.2644 0.1243 0.4311 0.315 0.0926
cosine_map@100 0.5677 0.3394 0.2576 0.1296 0.4333 0.3177 0.0944
cosine_map@150 0.5757 0.3479 0.2667 0.1324 0.4338 0.3184 0.0951
cosine_map@200 0.579 0.3533 0.2722 0.1337 0.4341 0.3189 0.0955
cosine_map@500 0.5833 0.3632 0.2831 0.1378 0.4346 0.3196 0.0965

Training Details

Training Datasets

full_en

full_en

  • Dataset: full_en
  • Size: 28,880 training samples
  • Columns: anchor and positive
  • Approximate statistics based on the first 1000 samples:
    anchor positive
    type string string
    details
    • min: 3 tokens
    • mean: 5.0 tokens
    • max: 10 tokens
    • min: 3 tokens
    • mean: 5.01 tokens
    • max: 13 tokens
  • Samples:
    anchor positive
    air commodore flight lieutenant
    command and control officer flight officer
    air commodore command and control officer
  • Loss: GISTEmbedLoss with these parameters:
    {'guide': SentenceTransformer(
      (0): Transformer({'max_seq_length': 128, 'do_lower_case': False}) with Transformer model: BertModel 
      (1): Pooling({'word_embedding_dimension': 384, 'pooling_mode_cls_token': False, 'pooling_mode_mean_tokens': True, 'pooling_mode_max_tokens': False, 'pooling_mode_mean_sqrt_len_tokens': False, 'pooling_mode_weightedmean_tokens': False, 'pooling_mode_lasttoken': False, 'include_prompt': True})
      (2): Normalize()
    ), 'temperature': 0.01, 'margin_strategy': 'absolute', 'margin': 0.0}
    
full_de

full_de

  • Dataset: full_de
  • Size: 23,023 training samples
  • Columns: anchor and positive
  • Approximate statistics based on the first 1000 samples:
    anchor positive
    type string string
    details
    • min: 3 tokens
    • mean: 11.05 tokens
    • max: 45 tokens
    • min: 3 tokens
    • mean: 11.43 tokens
    • max: 45 tokens
  • Samples:
    anchor positive
    Staffelkommandantin Kommodore
    Luftwaffenoffizierin Luftwaffenoffizier/Luftwaffenoffizierin
    Staffelkommandantin Luftwaffenoffizierin
  • Loss: GISTEmbedLoss with these parameters:
    {'guide': SentenceTransformer(
      (0): Transformer({'max_seq_length': 128, 'do_lower_case': False}) with Transformer model: BertModel 
      (1): Pooling({'word_embedding_dimension': 384, 'pooling_mode_cls_token': False, 'pooling_mode_mean_tokens': True, 'pooling_mode_max_tokens': False, 'pooling_mode_mean_sqrt_len_tokens': False, 'pooling_mode_weightedmean_tokens': False, 'pooling_mode_lasttoken': False, 'include_prompt': True})
      (2): Normalize()
    ), 'temperature': 0.01, 'margin_strategy': 'absolute', 'margin': 0.0}
    
full_es

full_es

  • Dataset: full_es
  • Size: 20,724 training samples
  • Columns: anchor and positive
  • Approximate statistics based on the first 1000 samples:
    anchor positive
    type string string
    details
    • min: 3 tokens
    • mean: 12.95 tokens
    • max: 50 tokens
    • min: 3 tokens
    • mean: 12.57 tokens
    • max: 50 tokens
  • Samples:
    anchor positive
    jefe de escuadrón instructor
    comandante de aeronave instructor de simulador
    instructor oficial del Ejército del Aire
  • Loss: GISTEmbedLoss with these parameters:
    {'guide': SentenceTransformer(
      (0): Transformer({'max_seq_length': 128, 'do_lower_case': False}) with Transformer model: BertModel 
      (1): Pooling({'word_embedding_dimension': 384, 'pooling_mode_cls_token': False, 'pooling_mode_mean_tokens': True, 'pooling_mode_max_tokens': False, 'pooling_mode_mean_sqrt_len_tokens': False, 'pooling_mode_weightedmean_tokens': False, 'pooling_mode_lasttoken': False, 'include_prompt': True})
      (2): Normalize()
    ), 'temperature': 0.01, 'margin_strategy': 'absolute', 'margin': 0.0}
    
full_zh

full_zh

  • Dataset: full_zh
  • Size: 30,401 training samples
  • Columns: anchor and positive
  • Approximate statistics based on the first 1000 samples:
    anchor positive
    type string string
    details
    • min: 4 tokens
    • mean: 8.36 tokens
    • max: 20 tokens
    • min: 4 tokens
    • mean: 8.95 tokens
    • max: 27 tokens
  • Samples:
    anchor positive
    技术总监 技术和运营总监
    技术总监 技术主管
    技术总监 技术艺术总监
  • Loss: GISTEmbedLoss with these parameters:
    {'guide': SentenceTransformer(
      (0): Transformer({'max_seq_length': 128, 'do_lower_case': False}) with Transformer model: BertModel 
      (1): Pooling({'word_embedding_dimension': 384, 'pooling_mode_cls_token': False, 'pooling_mode_mean_tokens': True, 'pooling_mode_max_tokens': False, 'pooling_mode_mean_sqrt_len_tokens': False, 'pooling_mode_weightedmean_tokens': False, 'pooling_mode_lasttoken': False, 'include_prompt': True})
      (2): Normalize()
    ), 'temperature': 0.01, 'margin_strategy': 'absolute', 'margin': 0.0}
    
mix

mix

  • Dataset: mix
  • Size: 21,760 training samples
  • Columns: anchor and positive
  • Approximate statistics based on the first 1000 samples:
    anchor positive
    type string string
    details
    • min: 2 tokens
    • mean: 5.65 tokens
    • max: 14 tokens
    • min: 2 tokens
    • mean: 10.08 tokens
    • max: 30 tokens
  • Samples:
    anchor positive
    technical manager Technischer Direktor für Bühne, Film und Fernsehen
    head of technical directora técnica
    head of technical department 技术艺术总监
  • Loss: GISTEmbedLoss with these parameters:
    {'guide': SentenceTransformer(
      (0): Transformer({'max_seq_length': 128, 'do_lower_case': False}) with Transformer model: BertModel 
      (1): Pooling({'word_embedding_dimension': 384, 'pooling_mode_cls_token': False, 'pooling_mode_mean_tokens': True, 'pooling_mode_max_tokens': False, 'pooling_mode_mean_sqrt_len_tokens': False, 'pooling_mode_weightedmean_tokens': False, 'pooling_mode_lasttoken': False, 'include_prompt': True})
      (2): Normalize()
    ), 'temperature': 0.01, 'margin_strategy': 'absolute', 'margin': 0.0}
    

Training Hyperparameters

Non-Default Hyperparameters

  • eval_strategy: steps
  • per_device_train_batch_size: 128
  • per_device_eval_batch_size: 128
  • gradient_accumulation_steps: 2
  • num_train_epochs: 5
  • warmup_ratio: 0.05
  • log_on_each_node: False
  • fp16: True
  • dataloader_num_workers: 4
  • ddp_find_unused_parameters: True
  • batch_sampler: no_duplicates

All Hyperparameters

Click to expand
  • overwrite_output_dir: False
  • do_predict: False
  • eval_strategy: steps
  • prediction_loss_only: True
  • per_device_train_batch_size: 128
  • per_device_eval_batch_size: 128
  • per_gpu_train_batch_size: None
  • per_gpu_eval_batch_size: None
  • gradient_accumulation_steps: 2
  • eval_accumulation_steps: None
  • torch_empty_cache_steps: None
  • learning_rate: 5e-05
  • weight_decay: 0.0
  • adam_beta1: 0.9
  • adam_beta2: 0.999
  • adam_epsilon: 1e-08
  • max_grad_norm: 1.0
  • num_train_epochs: 5
  • max_steps: -1
  • lr_scheduler_type: linear
  • lr_scheduler_kwargs: {}
  • warmup_ratio: 0.05
  • warmup_steps: 0
  • log_level: passive
  • log_level_replica: warning
  • log_on_each_node: False
  • logging_nan_inf_filter: True
  • save_safetensors: True
  • save_on_each_node: False
  • save_only_model: False
  • restore_callback_states_from_checkpoint: False
  • no_cuda: False
  • use_cpu: False
  • use_mps_device: False
  • seed: 42
  • data_seed: None
  • jit_mode_eval: False
  • use_ipex: False
  • bf16: False
  • fp16: True
  • fp16_opt_level: O1
  • half_precision_backend: auto
  • bf16_full_eval: False
  • fp16_full_eval: False
  • tf32: None
  • local_rank: 0
  • ddp_backend: None
  • tpu_num_cores: None
  • tpu_metrics_debug: False
  • debug: []
  • dataloader_drop_last: True
  • dataloader_num_workers: 4
  • dataloader_prefetch_factor: None
  • past_index: -1
  • disable_tqdm: False
  • remove_unused_columns: True
  • label_names: None
  • load_best_model_at_end: False
  • ignore_data_skip: False
  • fsdp: []
  • fsdp_min_num_params: 0
  • fsdp_config: {'min_num_params': 0, 'xla': False, 'xla_fsdp_v2': False, 'xla_fsdp_grad_ckpt': False}
  • tp_size: 0
  • fsdp_transformer_layer_cls_to_wrap: None
  • accelerator_config: {'split_batches': False, 'dispatch_batches': None, 'even_batches': True, 'use_seedable_sampler': True, 'non_blocking': False, 'gradient_accumulation_kwargs': None}
  • deepspeed: None
  • label_smoothing_factor: 0.0
  • optim: adamw_torch
  • optim_args: None
  • adafactor: False
  • group_by_length: False
  • length_column_name: length
  • ddp_find_unused_parameters: True
  • ddp_bucket_cap_mb: None
  • ddp_broadcast_buffers: False
  • dataloader_pin_memory: True
  • dataloader_persistent_workers: False
  • skip_memory_metrics: True
  • use_legacy_prediction_loop: False
  • push_to_hub: False
  • resume_from_checkpoint: None
  • hub_model_id: None
  • hub_strategy: every_save
  • hub_private_repo: None
  • hub_always_push: False
  • gradient_checkpointing: False
  • gradient_checkpointing_kwargs: None
  • include_inputs_for_metrics: False
  • include_for_metrics: []
  • eval_do_concat_batches: True
  • fp16_backend: auto
  • push_to_hub_model_id: None
  • push_to_hub_organization: None
  • mp_parameters:
  • auto_find_batch_size: False
  • full_determinism: False
  • torchdynamo: None
  • ray_scope: last
  • ddp_timeout: 1800
  • torch_compile: False
  • torch_compile_backend: None
  • torch_compile_mode: None
  • include_tokens_per_second: False
  • include_num_input_tokens_seen: False
  • neftune_noise_alpha: None
  • optim_target_modules: None
  • batch_eval_metrics: False
  • eval_on_start: False
  • use_liger_kernel: False
  • eval_use_gather_object: False
  • average_tokens_across_devices: False
  • prompts: None
  • batch_sampler: no_duplicates
  • multi_dataset_batch_sampler: proportional

Training Logs

Epoch Step Training Loss full_en_cosine_ndcg@200 full_es_cosine_ndcg@200 full_de_cosine_ndcg@200 full_zh_cosine_ndcg@200 mix_es_cosine_ndcg@200 mix_de_cosine_ndcg@200 mix_zh_cosine_ndcg@200
-1 -1 - 0.7322 0.4690 0.3853 0.2723 0.3209 0.2244 0.0919
0.0021 1 23.8878 - - - - - - -
0.2058 100 7.2098 - - - - - - -
0.4115 200 4.2635 0.7800 0.5132 0.4268 0.2798 0.4372 0.2996 0.1447
0.6173 300 4.1931 - - - - - - -
0.8230 400 3.73 0.7863 0.5274 0.4451 0.2805 0.4762 0.3455 0.1648
1.0309 500 3.3569 - - - - - - -
1.2366 600 3.6464 0.7868 0.5372 0.4540 0.2813 0.5063 0.3794 0.1755
1.4424 700 3.0772 - - - - - - -
1.6481 800 3.114 0.7906 0.5391 0.4576 0.2832 0.5221 0.4047 0.1779
1.8539 900 2.9246 - - - - - - -
2.0617 1000 2.7479 0.7873 0.5423 0.4631 0.2871 0.5323 0.4143 0.1843
2.2675 1100 3.049 - - - - - - -
2.4733 1200 2.6137 0.7878 0.5418 0.4685 0.2870 0.5470 0.4339 0.1932
2.6790 1300 2.8607 - - - - - - -
2.8848 1400 2.7071 0.7889 0.5465 0.4714 0.2891 0.5504 0.4362 0.1944
3.0926 1500 2.7012 - - - - - - -
3.2984 1600 2.7423 0.7882 0.5471 0.4748 0.2868 0.5542 0.4454 0.1976
3.5041 1700 2.5316 - - - - - - -
3.7099 1800 2.6344 0.7900 0.5498 0.4763 0.2857 0.5639 0.4552 0.1954
3.9156 1900 2.4983 - - - - - - -
4.1235 2000 2.5423 0.7894 0.5499 0.4786 0.2870 0.5644 0.4576 0.1974
4.3292 2100 2.5674 - - - - - - -
4.5350 2200 2.6237 0.7899 0.5502 0.4802 0.2843 0.5674 0.4607 0.1993

Framework Versions

  • Python: 3.11.11
  • Sentence Transformers: 4.1.0
  • Transformers: 4.51.3
  • PyTorch: 2.6.0+cu124
  • Accelerate: 1.6.0
  • Datasets: 3.5.0
  • Tokenizers: 0.21.1

Citation

BibTeX

Sentence Transformers

@inproceedings{reimers-2019-sentence-bert,
    title = "Sentence-BERT: Sentence Embeddings using Siamese BERT-Networks",
    author = "Reimers, Nils and Gurevych, Iryna",
    booktitle = "Proceedings of the 2019 Conference on Empirical Methods in Natural Language Processing",
    month = "11",
    year = "2019",
    publisher = "Association for Computational Linguistics",
    url = "https://arxiv.org/abs/1908.10084",
}

GISTEmbedLoss

@misc{solatorio2024gistembed,
    title={GISTEmbed: Guided In-sample Selection of Training Negatives for Text Embedding Fine-tuning},
    author={Aivin V. Solatorio},
    year={2024},
    eprint={2402.16829},
    archivePrefix={arXiv},
    primaryClass={cs.LG}
}