DataPilot
/

ArrowMint-Gemma3-4B-ChocoMint-jp

@@ -25,199 +25,95 @@ Gemma 3 ファミリーと同様に、テキストと画像のマルチモーダ
 まず、必要なライブラリをインストールします。Gemma 3は `transformers` 4.50.0 以降が必要です。
 ```sh
-pip install -U transformers accelerate Pillow vllm
 # CPUのみで使用する場合や特定の環境ではvllmのインストールが異なる場合があります。
 # vLLMの公式ドキュメントを参照してください: https://docs.vllm.ai/en/latest/getting_started/installation.html
 ```
-### vLLMでの推論
-[vLLM](https://github.com/vllm-project/vllm) を使用して高速な推論を行うサンプルコードです。
 ```python
-from vllm import LLM, SamplingParams
-from transformers import AutoTokenizer
 model_id = "DataPilot/ArrowMint-Gemma3-4B-ChocoMint-jp"
-# Gemma 3のチャットテンプレートを使用するためにTokenizerをロード
-tokenizer = AutoTokenizer.from_pretrained(model_id)
-# プロンプトの準備 (チャット形式)
-messages = [
-    {"role": "system", "content": "あなたは親切なAIアシスタントです。"},
-    {"role": "user", "content": "日本の首都とその見どころを教えてください。"}
-]
-# チャットテンプレートを適用
-# vLLMは直接チャットテンプレートを適用できないため、tokenizerで文字列に変換します
-# 注意: vLLMのバージョンや設定によっては、より効率的な方法がある可能性があります
-prompt = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
-# LLMの初期化
-# 必要に応じて tensor_parallel_size を調整してください
-llm = LLM(model=model_id, trust_remote_code=True) # Gemma 3 モデルによっては trust_remote_code が必要
-# サンプリングパラメータの設定
-sampling_params = SamplingParams(temperature=0.7, top_p=0.9, max_tokens=512)
-# 推論の実行
-outputs = llm.generate(prompt, sampling_params)
-# 結果の表示
-for output in outputs:
-    generated_text = output.outputs[0].text
-    print(f"Generated text: {generated_text!r}")
-# >>> Generated text: '東京は日本の首都であり、多くの魅力的な観光スポットがあります。\n\n*   **東京タワー:** 市街を一望できる象徴的なランドマークです。\n*   **浅草寺:** 歴史ある寺院で、仲見世通りでの買い物も楽しめます。\n*   **渋谷スクランブル交差点:** 世界的に有名な活気あふれる交差点です。\n*   **新宿御苑:** 都心にある広大な庭園で、四季折々の自然を楽しめます。\n*   **築地場外市場:** 新鮮な海産物やグルメを堪能できます。\n\nこれらの他にも、美術館、博物館、ショッピングエリアなど、見どころは尽きません。'
-```
-### Transformersでのテキスト推論
-`transformers` ライブラリを使用して、テキストのみ（システムプロンプトとユーザープロンプト）で推論を行うサンプルコードです。
-```python
-# pip install accelerate が必要になる場合があります
-from transformers import AutoTokenizer, AutoModelForCausalLM # Gemma 3はConditionalGenerationですが、テキストのみならこちらでもロードできる場合があります
-# もし上記でエラーが出る場合や、公式に合わせる場合は以下を使用
-# from transformers import AutoTokenizer, Gemma3ForConditionalGeneration
-import torch
-model_id = "DataPilot/ArrowMint-Gemma3-4B-ChocoMint-jp"
-device = "cuda" # GPUが利用可能な場合
-# トークナイザーとモデルのロード
-tokenizer = AutoTokenizer.from_pretrained(model_id)
-# テキストのみの場合でも Gemma3ForConditionalGeneration を使用するのが確実です
-model = AutoModelForCausalLM.from_pretrained( # または Gemma3ForConditionalGeneration.from_pretrained
-    model_id,
-    torch_dtype=torch.bfloat16, # bfloat16を推奨
-    device_map="auto", # 自動的にGPUに配置
-)
-# model = Gemma3ForConditionalGeneration.from_pretrained(
-#     model_id,
-#     torch_dtype=torch.bfloat16,
-#     device_map="auto",
-# )
-model.eval()
-# チャット形式のプロンプト
 messages = [
-    {"role": "system", "content": "あなたは知識豊富な歴史解説家です。簡潔に説明してください。"},
-    {"role": "user", "content": "戦国時代の三英傑について教えてください。"}
 ]
-# プロンプトをトークナイズ (チャットテンプレートを適用)
-# Gemma 3 instruction-tuned モデルでは add_generation_prompt=True が重要です
-inputs = tokenizer.apply_chat_template(
-    messages,
-    add_generation_prompt=True,
-    tokenize=True,
-    return_tensors="pt"
-).to(model.device)
-input_len = inputs.shape[-1]
-# 推論の実行
 with torch.inference_mode():
-    generation = model.generate(
-        inputs,
-        max_new_tokens=200,
-        do_sample=True, # サンプリングを行う場合
-        temperature=0.2,
-        top_p=0.9
-    )
-    # 入力部分を除いた生成されたトークンのみを取得
-    generated_ids = generation[0][input_len:]
-# 結果をデコード
-decoded = tokenizer.decode(generated_ids, skip_special_tokens=True)
-print(decoded)
-# >>> 戦国時代の三英傑とは、織田信長、豊臣秀吉、徳川家康の3人を指します。
-# >>>
-# >>> *   **織田信長:** 尾張の小大名から身を起こし、天下統一を目前にしながら本能寺の変で倒れました。革新的な政策や戦術で知られます。
-# >>> *   **豊臣秀吉:** 信長の後を継ぎ、天下統一を成し遂げました。農民出身から最高権力者に上り詰めた人物です。
-# >>> *   **徳川家康:** 秀吉の死後、関ヶ原の戦いで勝利し、江戸幕府を開いて約260年続く泰平の世を築きました。
 ```
-### Transformersでの画像とテキスト推論
-`transformers` ライブラリを使用して、画像とテキストを入力として推論を行うサンプルコードです。
 ```python
-# pip install accelerate が必要になる場合があります
 from transformers import AutoProcessor, Gemma3ForConditionalGeneration
-from PIL import Image
-import requests
 import torch
 model_id = "DataPilot/ArrowMint-Gemma3-4B-ChocoMint-jp"
-device = "cuda" # GPUが利用可能な場合
-# プロセッサーとモデルのロード
-processor = AutoProcessor.from_pretrained(model_id)
 model = Gemma3ForConditionalGeneration.from_pretrained(
-    model_id,
-    torch_dtype=torch.bfloat16, # bfloat16を推奨
-    device_map="auto", # 自動的にGPUに配置
 ).eval()
-# チャット形式のプロンプト (画像とテキストを含む)
-# 画像のURLやローカルパスを指定できます
-image_url = "https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/bee.jpg"
-# ローカルファイルの場合: image = Image.open("path/to/your/image.jpg")
-image = Image.open(requests.get(image_url, stream=True).raw)
 messages = [
     {
         "role": "system",
-        "content": [{"type": "text", "text": "あなたは画像について説明するAIアシスタントです。"}]
     },
     {
         "role": "user",
         "content": [
-            {"type": "image", "image": image}, # PILイメージオブジェクトを渡す
-            # URLを直接渡すことも可能な場合があります (ライブラリのバージョンによる)
-            # {"type": "image", "url": image_url},
-            {"type": "text", "text": "この画像に写っている花と昆虫について説明してください。"}
         ]
     }
 ]
-# プロンプトを処理してトークナイズ
-# Gemma 3 instruction-tuned モデルでは add_generation_prompt=True が重要です
 inputs = processor.apply_chat_template(
-    messages,
-    add_generation_prompt=True,
-    tokenize=True,
-    return_dict=True, # return_tensors="pt" と合わせて辞書形式で受け取る
-    return_tensors="pt"
-).to(model.device) # processorがtorch_dtypeを適切に扱わない場合があるため、ここで .to(dtype=torch.bfloat16) を追加する必要があるかもしれません
 input_len = inputs["input_ids"].shape[-1]
-# 推論の実行
 with torch.inference_mode():
-    generation = model.generate(
-        **inputs,
-        max_new_tokens=150,
-        do_sample=False # 決定的な出力を得る場合
-    )
-    # 入力部分を除いた生成されたトークンのみを取得
-    generated_ids = generation[0][input_len:]
-# 結果をデコード
-# processor.decode は text/image トークンを適切に扱います
-decoded = processor.decode(generated_ids, skip_special_tokens=True)
-print(decoded)
-# >>> 画像には、ピンク色のコスモスのような花にミツバチ（またはマルハナバチ）が止まっている様子が写っています。
-# >>>
-# >>> *   **花:** ピンク色の花びらを持つキク科の植物で、おそらくコスモスでしょう。中央には黄色い花粉が見えます。
-# >>> *   **昆虫:** 体に黄色と黒の縞模様があり、毛深い外見からマルハナバチ（Bumblebee）である可能性が高いです。花の中心部で蜜や花粉を集めているようです。
-# >>>
-# >>> 背景は緑色で、自然光の下で撮影されたような、柔らかい雰囲気の写真です。
 ```
 ## License

 まず、必要なライブラリをインストールします。Gemma 3は `transformers` 4.50.0 以降が必要です。
 ```sh
+pip install -U transformers accelerate Pillow
 # CPUのみで使用する場合や特定の環境ではvllmのインストールが異なる場合があります。
 # vLLMの公式ドキュメントを参照してください: https://docs.vllm.ai/en/latest/getting_started/installation.html
 ```
+### 画像付き推論
 ```python
+from transformers import AutoProcessor, Gemma3ForConditionalGeneration
+from PIL import Image
+import requests
+import torch
 model_id = "DataPilot/ArrowMint-Gemma3-4B-ChocoMint-jp"
+model = Gemma3ForConditionalGeneration.from_pretrained(
+    model_id, device_map="auto"
+).eval()
+processor = AutoProcessor.from_pretrained(model_id)
 messages = [
+    {
+        "role": "system",
+        "content": [{"type": "text", "text": "あなたは素晴らしい日本語アシスタントです。"}]
+    },
+    {
+        "role": "user",
+        "content": [
+            {"type": "image", "image": "https://cs.stanford.edu/people/rak248/VG_100K_2/2399540.jpg"},
+            {"type": "text", "text": "この画像を説明してください。"}
+        ]
+    }
 ]
+inputs = processor.apply_chat_template(
+    messages, add_generation_prompt=True, tokenize=True,
+    return_dict=True, return_tensors="pt"
+).to(model.device, dtype=torch.bfloat16)
+input_len = inputs["input_ids"].shape[-1]
 with torch.inference_mode():
+    generation = model.generate(**inputs, max_new_tokens=100, do_sample=False)
+    generation = generation[0][input_len:]
+decoded = processor.decode(generation, skip_special_tokens=True)
+print(decoded)
 ```
+### 画像無し推論
 ```python
 from transformers import AutoProcessor, Gemma3ForConditionalGeneration
 import torch
 model_id = "DataPilot/ArrowMint-Gemma3-4B-ChocoMint-jp"
 model = Gemma3ForConditionalGeneration.from_pretrained(
+    model_id, device_map="auto"
 ).eval()
+processor = AutoProcessor.from_pretrained(model_id)
 messages = [
     {
         "role": "system",
+        "content": [{"type": "text", "text": "あなたは素晴らしい日本語アシスタントです。"}]
     },
     {
         "role": "user",
         "content": [
+            {"type": "text", "text": "福岡に一人で遊びに行くのですがお勧めスポットはありますか？"}
         ]
     }
 ]
 inputs = processor.apply_chat_template(
+    messages, add_generation_prompt=True, tokenize=True,
+    return_dict=True, return_tensors="pt"
+).to(model.device, dtype=torch.bfloat16)
 input_len = inputs["input_ids"].shape[-1]
 with torch.inference_mode():
+    generation = model.generate(**inputs, max_new_tokens=100, do_sample=False)
+    generation = generation[0][input_len:]
+decoded = processor.decode(generation, skip_special_tokens=True)
+print(decoded)
 ```
 ## License