#!/usr/bin/env python3 """ Script untuk generate README.md dari template Penggunaan: python generate_readme.py config.yaml """ import argparse from pathlib import Path import yaml def load_config(config_path): """Load konfigurasi dari file YAML""" with open(config_path, "r", encoding="utf-8") as f: return yaml.safe_load(f) def load_template(template_path): """Load template README""" with open(template_path, "r", encoding="utf-8") as f: return f.read() def replace_placeholders(template, config): """Replace placeholder dengan nilai dari config""" content = template # Replace semua placeholder dengan nilai dari config for key, value in config.items(): placeholder = f"{{{{{key}}}}}" if isinstance(value, (list, dict)): # Convert list/dict ke string YAML format value = yaml.dump( value, default_flow_style=False, allow_unicode=True ).strip() content = content.replace(placeholder, str(value)) return content def generate_readme(config_path, template_path, output_path): """Generate README dari template dan config""" config = load_config(config_path) template = load_template(template_path) readme_content = replace_placeholders(template, config) with open(output_path, "w", encoding="utf-8") as f: f.write(readme_content) print(f"README berhasil digenerate: {output_path}") def create_sample_config(output_path): """Buat sample config file""" sample_config = { # Metadata "LICENSE": "mit", "LANGUAGE": "id", "LIBRARY_NAME": "transformers", "PIPELINE_TAG": "text-classification", "DATASET_TYPE": "custom", "INFERENCE_ENABLED": True, # Model Info "MODEL_NAME": "BERT Indonesian Topic Classification (16 labels)", "MODEL_TITLE": "BERT Indonesian Topic Classification (16 labels)", "BASE_MODEL": "cahya/bert-base-indonesian-1.5G", "TASK_TYPE": "text-classification", "TASK_NAME": "Topic Classification", "TASK_DESCRIPTION": "Topic classification (single-label)", "NUM_LABELS": 16, "LABELS_INLINE": "Politik, Ekonomi, Olahraga, Teknologi, dll.", "DATASET_NAME": "Custom Dataset (ID)", "SPLIT_TYPE": "validation", # Visualization "VISUALIZATION_TYPE": "Confusion Matrix", "VISUALIZATION_FILENAME": "confusion_matrix.png", # Tags (sebagai list) "TAGS": [ " - indonesian", " - indonesia", " - topic-classification", " - bert", ], # Metrics (sebagai list) "METRICS": [ " - type: accuracy", " value: 0.921", " - type: f1", " name: f1_macro", " value: 0.893", " - type: f1", " name: f1_micro", " value: 0.912", ], # Content sections "INTENDED_USE": "- Klasifikasi topik untuk teks berbahasa Indonesia pada domain umum.", "LIMITATIONS": """- Performa bergantung pada distribusi label dataset Anda. - Teks OOD (di luar domain data latih) bisa turun akurasinya.""", "TRAINING_DETAILS": """- Framework: 🤗 Transformers (PyTorch) - Max length: 512 - Batch size: 16 - Epochs: 3 - Learning rate: 2e-5 - Weight decay: 0.01 - Warmup ratio: 0.1 - Scheduler: linear - Mixed precision: true""", "EVALUATION_DETAILS": """- Split: 80/20 stratified - Accuracy (val): **92.1%** - F1 Macro (val): **89.3%** - F1 Micro (val): **91.2%** Per-label report tersedia pada artifact `eval_results.json`.""", "USAGE_CODE": """from transformers import AutoTokenizer, AutoModelForSequenceClassification import torch repo_id = "your-username/model-name" tokenizer = AutoTokenizer.from_pretrained(repo_id) model = AutoModelForSequenceClassification.from_pretrained(repo_id).eval() text = "Contoh teks untuk diklasifikasi." inputs = tokenizer(text, return_tensors="pt") with torch.no_grad(): logits = model(**inputs).logits pred_id = logits.argmax(-1).item() label = model.config.id2label[pred_id] print(label)""", "ADDITIONAL_INFO": """## Citation Jika menggunakan model ini, mohon kutip: ```bibtex @misc{your-model-2025, title={Model Title}, author={Your Name}, year={2025}, url={https://huggingface.co/your-username/model-name} } ```""", } with open(output_path, "w", encoding="utf-8") as f: yaml.dump( sample_config, f, default_flow_style=False, allow_unicode=True, indent=2 ) print(f"Sample config dibuat: {output_path}") def main(): parser = argparse.ArgumentParser(description="Generate README dari template") parser.add_argument("--config", "-c", help="Path ke file config YAML") parser.add_argument( "--template", "-t", default="README.md", help="Path ke template README (default: README.md)", ) parser.add_argument( "--output", "-o", default="README_generated.md", help="Path output README (default: README_generated.md)", ) parser.add_argument( "--create-sample", action="store_true", help="Buat sample config file" ) args = parser.parse_args() if args.create_sample: create_sample_config("sample_config.yaml") return if not args.config: print("Error: --config diperlukan kecuali menggunakan --create-sample") parser.print_help() return if not Path(args.config).exists(): print(f"Error: Config file tidak ditemukan: {args.config}") return if not Path(args.template).exists(): print(f"Error: Template file tidak ditemukan: {args.template}") return generate_readme(args.config, args.template, args.output) if __name__ == "__main__": main()