hasune commited on
Commit
559023d
ยท
1 Parent(s): 940a08c

Initial commit

Browse files
Files changed (2) hide show
  1. app.py +226 -0
  2. requirements.txt +2 -0
app.py ADDED
@@ -0,0 +1,226 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ import json
3
+ import os
4
+ from datetime import datetime
5
+
6
+ def extract_model_info(config_file):
7
+ """config.json ํŒŒ์ผ์—์„œ ๋ชจ๋ธ ์ •๋ณด ์ถ”์ถœ"""
8
+ try:
9
+ if config_file is None:
10
+ return None
11
+
12
+ # ํŒŒ์ผ ์ฝ๊ธฐ
13
+ config_content = config_file.read().decode('utf-8')
14
+ config = json.loads(config_content)
15
+
16
+ # ์ฃผ์š” ์ •๋ณด ์ถ”์ถœ
17
+ model_info = {
18
+ 'architecture': config.get('model_type', 'Unknown'),
19
+ 'hidden_size': config.get('hidden_size', 'Unknown'),
20
+ 'num_layers': config.get('num_hidden_layers', config.get('num_layers', 'Unknown')),
21
+ 'num_attention_heads': config.get('num_attention_heads', 'Unknown'),
22
+ 'vocab_size': config.get('vocab_size', 'Unknown'),
23
+ 'max_position_embeddings': config.get('max_position_embeddings', 'Unknown')
24
+ }
25
+
26
+ return model_info
27
+ except Exception as e:
28
+ return f"Error parsing config: {str(e)}"
29
+
30
+ def generate_model_card(config_file, model_name, short_description, model_description, dataset_name, task_type, language, license_type):
31
+ """๋ชจ๋ธ ์นด๋“œ ์ž๋™ ์ƒ์„ฑ"""
32
+
33
+ # ๊ธฐ๋ณธ ์ •๋ณด ์„ค์ •
34
+ if not model_name:
35
+ model_name = "My Model"
36
+ if not short_description:
37
+ short_description = "A fine-tuned model"
38
+ if not model_description:
39
+ model_description = "A fine-tuned model for specific tasks"
40
+ if not dataset_name:
41
+ dataset_name = "Custom dataset"
42
+ if not task_type:
43
+ task_type = "Text classification"
44
+ if not language:
45
+ language = "Korean"
46
+ if not license_type:
47
+ license_type = "apache-2.0"
48
+
49
+ # ๋ชจ๋ธ ์ •๋ณด ์ถ”์ถœ
50
+ model_info = extract_model_info(config_file) if config_file else None
51
+
52
+ # ๋ชจ๋ธ ์นด๋“œ ํ…œํ”Œ๋ฆฟ
53
+ model_card = f"""---
54
+ license: {license_type}
55
+ language: {language.lower()}
56
+ pipeline_tag: text-classification
57
+ tags:
58
+ - {task_type.lower().replace(' ', '-')}
59
+ - {language.lower()}
60
+ base_model: ""
61
+ datasets:
62
+ - {dataset_name.lower().replace(' ', '-')}
63
+ ---
64
+
65
+ # {model_name}
66
+
67
+ {short_description}
68
+
69
+ ## Model Description
70
+
71
+ {model_description}
72
+
73
+ ## Model Details
74
+
75
+ """
76
+
77
+ # ๋ชจ๋ธ ์ •๋ณด๊ฐ€ ์žˆ์œผ๋ฉด ์ถ”๊ฐ€
78
+ if model_info and isinstance(model_info, dict):
79
+ model_card += f"""- **Architecture**: {model_info['architecture']}
80
+ - **Hidden Size**: {model_info['hidden_size']}
81
+ - **Number of Layers**: {model_info['num_layers']}
82
+ - **Attention Heads**: {model_info['num_attention_heads']}
83
+ - **Vocabulary Size**: {model_info['vocab_size']}
84
+ - **Max Position Embeddings**: {model_info['max_position_embeddings']}
85
+
86
+ """
87
+
88
+ model_card += f"""## Intended Use
89
+
90
+ This model is intended for {task_type.lower()} tasks in {language}.
91
+
92
+ ## Training Data
93
+
94
+ The model was trained on: {dataset_name}
95
+
96
+ ## Training Procedure
97
+
98
+ ### Training Details
99
+ - **Task**: {task_type}
100
+ - **Language**: {language}
101
+ - **Date**: {datetime.now().strftime('%Y-%m-%d')}
102
+
103
+ ## Usage
104
+
105
+ ```python
106
+ from transformers import AutoTokenizer, AutoModelForSequenceClassification
107
+
108
+ tokenizer = AutoTokenizer.from_pretrained("your-username/{model_name.lower().replace(' ', '-')}")
109
+ model = AutoModelForSequenceClassification.from_pretrained("your-username/{model_name.lower().replace(' ', '-')}")
110
+
111
+ # Example usage
112
+ inputs = tokenizer("Your text here", return_tensors="pt")
113
+ outputs = model(**inputs)
114
+ ```
115
+
116
+ ## Limitations and Bias
117
+
118
+ - This model may have limitations based on the training data
119
+ - Please evaluate the model on your specific use case
120
+ - Consider potential biases in the training data
121
+
122
+ ## Citation
123
+
124
+ ```bibtex
125
+ @misc{{{model_name.lower().replace(' ', '_')}_2024,
126
+ author = {{Your Name}},
127
+ title = {{{model_name}}},
128
+ year = {{2024}},
129
+ url = {{https://huggingface.co/your-username/{model_name.lower().replace(' ', '-')}}}
130
+ }}
131
+ ```
132
+
133
+ ## Contact
134
+
135
+ For questions and comments, please contact [[email protected]](mailto:[email protected])
136
+ """
137
+
138
+ return model_card
139
+
140
+ # Gradio ์ธํ„ฐํŽ˜์ด์Šค ์„ค์ •
141
+ with gr.Blocks(title="๐Ÿค— Model Card Generator") as demo:
142
+ gr.Markdown("# ๐Ÿค— Model Card Generator")
143
+ gr.Markdown("๋ชจ๋ธ์˜ ๊ธฐ๋ณธ ์ •๋ณด๋ฅผ ์ž…๋ ฅํ•˜๋ฉด ์ž๋™์œผ๋กœ Hugging Face ์Šคํƒ€์ผ์˜ ๋ชจ๋ธ ์นด๋“œ๋ฅผ ์ƒ์„ฑํ•ด๋“œ๋ฆฝ๋‹ˆ๋‹ค!")
144
+
145
+ with gr.Row():
146
+ with gr.Column():
147
+ gr.Markdown("### ๐Ÿ“ ๋ชจ๋ธ ์ •๋ณด")
148
+ config_file = gr.File(
149
+ label="config.json ํŒŒ์ผ (์„ ํƒ์‚ฌํ•ญ)",
150
+ file_types=[".json"],
151
+ type="binary"
152
+ )
153
+
154
+ gr.Markdown("### โœ๏ธ ๊ธฐ๋ณธ ์ •๋ณด")
155
+ model_name = gr.Textbox(
156
+ label="๋ชจ๋ธ ์ด๋ฆ„",
157
+ placeholder="์˜ˆ: Korean-BERT-Sentiment",
158
+ value=""
159
+ )
160
+ short_description = gr.Textbox(
161
+ label="์งง์€ ์„ค๋ช… (ํ•œ ์ค„)",
162
+ placeholder="์˜ˆ: Korean sentiment analysis model based on BERT",
163
+ lines=1
164
+ )
165
+ model_description = gr.Textbox(
166
+ label="์ƒ์„ธ ์„ค๋ช…",
167
+ placeholder="์˜ˆ: ํ•œ๊ตญ์–ด ๊ฐ์ • ๋ถ„์„์„ ์œ„ํ•ด fine-tuning๋œ BERT ๋ชจ๋ธ์ž…๋‹ˆ๋‹ค. ๊ธ์ •/๋ถ€์ •/์ค‘๋ฆฝ ๊ฐ์ •์„ ๋ถ„๋ฅ˜ํ•  ์ˆ˜ ์žˆ์Šต๋‹ˆ๋‹ค.",
168
+ lines=3
169
+ )
170
+ dataset_name = gr.Textbox(
171
+ label="ํ›ˆ๋ จ ๋ฐ์ดํ„ฐ์…‹",
172
+ placeholder="์˜ˆ: Korean Sentiment Dataset"
173
+ )
174
+ task_type = gr.Dropdown(
175
+ label="ํƒœ์Šคํฌ ํƒ€์ž…",
176
+ choices=["Text Classification", "Question Answering", "Text Generation", "Named Entity Recognition", "Text Summarization"],
177
+ value="Text Classification"
178
+ )
179
+ language = gr.Dropdown(
180
+ label="์–ธ์–ด",
181
+ choices=["Korean", "English", "Multilingual"],
182
+ value="Korean"
183
+ )
184
+ license_type = gr.Dropdown(
185
+ label="๋ผ์ด์„ ์Šค",
186
+ choices=["apache-2.0", "mit", "cc-by-4.0", "cc-by-nc-4.0", "cc-by-sa-4.0", "bsd-3-clause", "gpl-3.0", "other"],
187
+ value="apache-2.0"
188
+ )
189
+
190
+ generate_btn = gr.Button("๐Ÿš€ ๋ชจ๋ธ ์นด๋“œ ์ƒ์„ฑ", variant="primary")
191
+
192
+ with gr.Column():
193
+ gr.Markdown("### ๐Ÿ“„ ์ƒ์„ฑ๋œ ๋ชจ๋ธ ์นด๋“œ")
194
+ output = gr.Textbox(
195
+ label="Model Card (Markdown)",
196
+ lines=25,
197
+ max_lines=50,
198
+ show_copy_button=True
199
+ )
200
+
201
+ generate_btn.click(
202
+ fn=generate_model_card,
203
+ inputs=[config_file, model_name, short_description, model_description, dataset_name, task_type, language, license_type],
204
+ outputs=output
205
+ )
206
+
207
+ gr.Markdown("### ๐Ÿ’ก ์‚ฌ์šฉ๋ฒ•")
208
+ gr.Markdown("""
209
+ 1. **config.json ํŒŒ์ผ ์—…๋กœ๋“œ** (์„ ํƒ์‚ฌํ•ญ): ๋ชจ๋ธ์˜ config.json ํŒŒ์ผ์„ ์—…๋กœ๋“œํ•˜๋ฉด ์ž๋™์œผ๋กœ ์•„ํ‚คํ…์ฒ˜ ์ •๋ณด๋ฅผ ์ถ”์ถœํ•ฉ๋‹ˆ๋‹ค
210
+ 2. **๊ธฐ๋ณธ ์ •๋ณด ์ž…๋ ฅ**:
211
+ - **๋ชจ๋ธ ์ด๋ฆ„**: Hugging Face์—์„œ ์‚ฌ์šฉํ•  ๋ชจ๋ธ๋ช…
212
+ - **์งง์€ ์„ค๋ช…**: ๋ชจ๋ธ ์นด๋“œ ์ƒ๋‹จ์— ํ‘œ์‹œ๋  ํ•œ ์ค„ ์š”์•ฝ
213
+ - **์ƒ์„ธ ์„ค๋ช…**: ๋ชจ๋ธ์— ๋Œ€ํ•œ ์ž์„ธํ•œ ์„ค๋ช…
214
+ - **๋ผ์ด์„ ์Šค**: ๋ชจ๋ธ์˜ ์‚ฌ์šฉ ๋ผ์ด์„ ์Šค ์„ ํƒ
215
+ 3. **์ƒ์„ฑ ๋ฒ„ํŠผ ํด๋ฆญ**: ํ‘œ์ค€ํ™”๋œ ๋ชจ๋ธ ์นด๋“œ๊ฐ€ ์ž๋™์œผ๋กœ ์ƒ์„ฑ๋ฉ๋‹ˆ๋‹ค
216
+ 4. **๋ณต์‚ฌํ•˜์—ฌ ์‚ฌ์šฉ**: ์ƒ์„ฑ๋œ ํ…์ŠคํŠธ๋ฅผ ๋ณต์‚ฌํ•ด์„œ README.md๋กœ ์‚ฌ์šฉํ•˜์„ธ์š”!
217
+
218
+ **์ฃผ์š” ๋ผ์ด์„ ์Šค ์„ค๋ช…:**
219
+ - **apache-2.0**: ์ƒ์—…์  ์‚ฌ์šฉ ๊ฐ€๋Šฅ, ๊ฐ€์žฅ ์ž์œ ๋กœ์šด ๋ผ์ด์„ ์Šค
220
+ - **mit**: ๊ฐ„๋‹จํ•˜๊ณ  ์ž์œ ๋กœ์šด ๋ผ์ด์„ ์Šค
221
+ - **cc-by-4.0**: ํฌ๋ฆฌ์—์ดํ‹ฐ๋ธŒ ์ปค๋จผ์ฆˆ, ์ถœ์ฒ˜ ํ‘œ์‹œ ํ•„์š”
222
+ - **cc-by-nc-4.0**: ๋น„์ƒ์—…์  ์‚ฌ์šฉ๋งŒ ํ—ˆ์šฉ
223
+ """)
224
+
225
+ if __name__ == "__main__":
226
+ demo.launch()
requirements.txt ADDED
@@ -0,0 +1,2 @@
 
 
 
1
+ gradio
2
+ transformers