Spaces:

cadenza83
/

model-card-generator

Sleeping

App Files Files Community

hasune commited on May 25

Commit

559023d

1 Parent(s): 940a08c

Initial commit

Browse files

Files changed (2) hide show

app.py +226 -0
requirements.txt +2 -0

app.py ADDED Viewed

	@@ -0,0 +1,226 @@

+import gradio as gr
+import json
+import os
+from datetime import datetime
+def extract_model_info(config_file):
+    """config.json 파일에서 모델 정보 추출"""
+    try:
+        if config_file is None:
+            return None
+        # 파일 읽기
+        config_content = config_file.read().decode('utf-8')
+        config = json.loads(config_content)
+        # 주요 정보 추출
+        model_info = {
+            'architecture': config.get('model_type', 'Unknown'),
+            'hidden_size': config.get('hidden_size', 'Unknown'),
+            'num_layers': config.get('num_hidden_layers', config.get('num_layers', 'Unknown')),
+            'num_attention_heads': config.get('num_attention_heads', 'Unknown'),
+            'vocab_size': config.get('vocab_size', 'Unknown'),
+            'max_position_embeddings': config.get('max_position_embeddings', 'Unknown')
+        }
+        return model_info
+    except Exception as e:
+        return f"Error parsing config: {str(e)}"
+def generate_model_card(config_file, model_name, short_description, model_description, dataset_name, task_type, language, license_type):
+    """모델 카드 자동 생성"""
+    # 기본 정보 설정
+    if not model_name:
+        model_name = "My Model"
+    if not short_description:
+        short_description = "A fine-tuned model"
+    if not model_description:
+        model_description = "A fine-tuned model for specific tasks"
+    if not dataset_name:
+        dataset_name = "Custom dataset"
+    if not task_type:
+        task_type = "Text classification"
+    if not language:
+        language = "Korean"
+    if not license_type:
+        license_type = "apache-2.0"
+    # 모델 정보 추출
+    model_info = extract_model_info(config_file) if config_file else None
+    # 모델 카드 템플릿
+    model_card = f"""---
+license: {license_type}
+language: {language.lower()}
+pipeline_tag: text-classification
+tags:
+- {task_type.lower().replace(' ', '-')}
+- {language.lower()}
+base_model: ""
+datasets:
+- {dataset_name.lower().replace(' ', '-')}
+---
+# {model_name}
+{short_description}
+## Model Description
+{model_description}
+## Model Details
+"""
+    # 모델 정보가 있으면 추가
+    if model_info and isinstance(model_info, dict):
+        model_card += f"""- **Architecture**: {model_info['architecture']}
+- **Hidden Size**: {model_info['hidden_size']}
+- **Number of Layers**: {model_info['num_layers']}
+- **Attention Heads**: {model_info['num_attention_heads']}
+- **Vocabulary Size**: {model_info['vocab_size']}
+- **Max Position Embeddings**: {model_info['max_position_embeddings']}
+"""
+    model_card += f"""## Intended Use
+This model is intended for {task_type.lower()} tasks in {language}.
+## Training Data
+The model was trained on: {dataset_name}
+## Training Procedure
+### Training Details
+- **Task**: {task_type}
+- **Language**: {language}
+- **Date**: {datetime.now().strftime('%Y-%m-%d')}
+## Usage
+```python
+from transformers import AutoTokenizer, AutoModelForSequenceClassification
+tokenizer = AutoTokenizer.from_pretrained("your-username/{model_name.lower().replace(' ', '-')}")
+model = AutoModelForSequenceClassification.from_pretrained("your-username/{model_name.lower().replace(' ', '-')}")
+# Example usage
+inputs = tokenizer("Your text here", return_tensors="pt")
+outputs = model(**inputs)
+```
+## Limitations and Bias
+- This model may have limitations based on the training data
+- Please evaluate the model on your specific use case
+- Consider potential biases in the training data
+## Citation
+```bibtex
+@misc{{{model_name.lower().replace(' ', '_')}_2024,
+  author = {{Your Name}},
+  title = {{{model_name}}},
+  year = {{2024}},
+  url = {{https://huggingface.co/your-username/{model_name.lower().replace(' ', '-')}}}
+}}
+```
+## Contact
+For questions and comments, please contact [[email protected]](mailto:[email protected])
+"""
+    return model_card
+# Gradio 인터페이스 설정
+with gr.Blocks(title="🤗 Model Card Generator") as demo:
+    gr.Markdown("# 🤗 Model Card Generator")
+    gr.Markdown("모델의 기본 정보를 입력하면 자동으로 Hugging Face 스타일의 모델 카드를 생성해드립니다!")
+    with gr.Row():
+        with gr.Column():
+            gr.Markdown("### 📁 모델 정보")
+            config_file = gr.File(
+                label="config.json 파일 (선택사항)",
+                file_types=[".json"],
+                type="binary"
+            )
+            gr.Markdown("### ✏️ 기본 정보")
+            model_name = gr.Textbox(
+                label="모델 이름",
+                placeholder="예: Korean-BERT-Sentiment",
+                value=""
+            )
+            short_description = gr.Textbox(
+                label="짧은 설명 (한 줄)",
+                placeholder="예: Korean sentiment analysis model based on BERT",
+                lines=1
+            )
+            model_description = gr.Textbox(
+                label="상세 설명",
+                placeholder="예: 한국어 감정 분석을 위해 fine-tuning된 BERT 모델입니다. 긍정/부정/중립 감정을 분류할 수 있습니다.",
+                lines=3
+            )
+            dataset_name = gr.Textbox(
+                label="훈련 데이터셋",
+                placeholder="예: Korean Sentiment Dataset"
+            )
+            task_type = gr.Dropdown(
+                label="태스크 타입",
+                choices=["Text Classification", "Question Answering", "Text Generation", "Named Entity Recognition", "Text Summarization"],
+                value="Text Classification"
+            )
+            language = gr.Dropdown(
+                label="언어",
+                choices=["Korean", "English", "Multilingual"],
+                value="Korean"
+            )
+            license_type = gr.Dropdown(
+                label="라이선스",
+                choices=["apache-2.0", "mit", "cc-by-4.0", "cc-by-nc-4.0", "cc-by-sa-4.0", "bsd-3-clause", "gpl-3.0", "other"],
+                value="apache-2.0"
+            )
+            generate_btn = gr.Button("🚀 모델 카드 생성", variant="primary")
+        with gr.Column():
+            gr.Markdown("### 📄 생성된 모델 카드")
+            output = gr.Textbox(
+                label="Model Card (Markdown)",
+                lines=25,
+                max_lines=50,
+                show_copy_button=True
+            )
+    generate_btn.click(
+        fn=generate_model_card,
+        inputs=[config_file, model_name, short_description, model_description, dataset_name, task_type, language, license_type],
+        outputs=output
+    )
+    gr.Markdown("### 💡 사용법")
+    gr.Markdown("""
+    1. **config.json 파일 업로드** (선택사항): 모델의 config.json 파일을 업로드하면 자동으로 아키텍처 정보를 추출합니다
+    2. **기본 정보 입력**:
+       - **모델 이름**: Hugging Face에서 사용할 모델명
+       - **짧은 설명**: 모델 카드 상단에 표시될 한 줄 요약
+       - **상세 설명**: 모델에 대한 자세한 설명
+       - **라이선스**: 모델의 사용 라이선스 선택
+    3. **생성 버튼 클릭**: 표준화된 모델 카드가 자동으로 생성됩니다
+    4. **복사하여 사용**: 생성된 텍스트를 복사해서 README.md로 사용하세요!
+    **주요 라이선스 설명:**
+    - **apache-2.0**: 상업적 사용 가능, 가장 자유로운 라이선스
+    - **mit**: 간단하고 자유로운 라이선스
+    - **cc-by-4.0**: 크리에이티브 커먼즈, 출처 표시 필요
+    - **cc-by-nc-4.0**: 비상업적 사용만 허용
+    """)
+if __name__ == "__main__":
+    demo.launch()

requirements.txt ADDED Viewed

	@@ -0,0 +1,2 @@


1	+ gradio
2	+ transformers