HITL-KG / config.example.yaml
avojarot's picture
Upload 22 files
c5880fb verified
# HITL-KG Configuration File
# Copy to config.yaml and customize
# Server settings
host: "0.0.0.0"
port: 7860
debug: false
# Paths (relative to app root)
data_dir: "./data"
cache_dir: "./data/cache"
sessions_dir: "./data/sessions"
# Session management
session_max_age_hours: 24
session_cleanup_interval_minutes: 5
max_sessions: 1000
# Default language
default_language: "en"
supported_languages:
- "en"
- "uk"
- "ru"
- "es"
- "de"
- "fr"
# Embedding configuration
embedding:
# Multilingual model supporting 50+ languages
model_name: "sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2"
cache_dir: "./data/embeddings"
dimension: 384
batch_size: 32
device: "cpu" # "cpu", "cuda", or "mps" (Apple Silicon)
# LLM configuration
llm:
provider: "local" # "openai" or "local"
model: "gpt-4o-mini"
temperature: 0.7
max_tokens: 2048
# api_key: "" # Or set OPENAI_API_KEY environment variable
# Datasets configuration
# The system supports multiple dataset formats: obo, csv, json
datasets:
# Disease Ontology (DOID)
- name: "disease_ontology"
source_type: "obo"
source_url: "https://raw.githubusercontent.com/DiseaseOntology/HumanDiseaseOntology/main/src/ontology/doid.obo"
entity_category: "disease"
cache_enabled: true
cache_max_age_days: 7
# Symptom Ontology (SYMP)
- name: "symptom_ontology"
source_type: "obo"
source_url: "https://raw.githubusercontent.com/DiseaseOntology/SymptomOntology/main/symp.obo"
entity_category: "symptom"
cache_enabled: true
cache_max_age_days: 7
# Example: Custom CSV dataset (uncomment to use)
# - name: "custom_symptoms"
# source_type: "csv"
# source_path: "./data/custom_symptoms.csv"
# entity_category: "symptom"
# cache_enabled: false
# Advanced settings
# embedding:
# # For domain-specific embeddings, consider:
# # - "dmis-lab/biobert-base-cased-v1.2" (biomedical)
# # - "emilyalsentzer/Bio_ClinicalBERT" (clinical)
# model_name: "sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2"