silence09
/

InternLM2.5-7B-Chat-Converted-Qwen2

Safetensors

qwen2

Model card Files Files and versions

xet

Community

silence09 commited on May 27, 2025

Commit

e62347c

verified ·

1 Parent(s): f7d07be

Update：we can use the apply_chat_template

Browse files

Files changed (1) hide show

README.md +24 -66

README.md CHANGED Viewed

@@ -11,81 +11,39 @@ This is a converted model from [InternLM2.5-7B-Chat](https://huggingface.co/inte
 ## Usage
 You can load the model using the `Qwen2ForCausalLM` class as shown below:
 ```python
-device = "cpu" # cpu is exacatly the same
 attn_impl = 'eager' # the attention implementation to use
-meta_instruction = ("You are an AI assistant whose name is InternLM (书生·浦语).\n"
-"- InternLM (书生·浦语) is a conversational language model that is developed by Shanghai AI Laboratory "
-"(上海人工智能实验室). It is designed to be helpful, honest, and harmless.\n"
-"- InternLM (书生·浦语) can understand and communicate fluently in the language chosen by the user such "
-"as English and 中文."
-)
-prompt1 = "介绍下你自己"
-prompt2 = "介绍下上海人工智能实验室"
-def build_inputs(tokenizer, query: str, history: List[Tuple[str, str]] = None, meta_instruction=meta_instruction):
-    if history is None:
-        history = []
-    if tokenizer.add_bos_token:
-        prompt = ""
-    else:
-        prompt = tokenizer.bos_token
-    if meta_instruction:
-        prompt += f"""<|im_start|>system\n{meta_instruction}<|im_end|>\n"""
-    for record in history:
-        prompt += f"""<|im_start|>user\n{record[0]}<|im_end|>\n<|im_start|>assistant\n{record[1]}<|im_end|>\n"""
-    prompt += f"""<|im_start|>user\n{query}<|im_end|>\n<|im_start|>assistant\n"""
-    return tokenizer([prompt], return_tensors="pt")
-@torch.inference_mode()
-def chat(
-    model: Union[AutoModelForCausalLM, Qwen2ForCausalLM],
-    tokenizer,
-    query: str,
-    history: Optional[List[Tuple[str, str]]] = None,
-    streamer: Optional[BaseStreamer] = None,
-    max_new_tokens: int = 1024,
-    do_sample: bool = True,
-    temperature: float = 0.8,
-    top_p: float = 0.8,
-    meta_instruction: str = meta_instruction,
-    **kwargs,
-):
-    if history is None:
-        history = []
-    inputs = build_inputs(tokenizer, query, history, meta_instruction)
-    inputs = {k: v.to(model.device) for k, v in inputs.items() if torch.is_tensor(v)}
-    # also add end-of-assistant token in eos token id to avoid unnecessary generation
-    eos_token_id = [tokenizer.eos_token_id, tokenizer.convert_tokens_to_ids(["<|im_end|>"])[0]]
-    outputs = model.generate(
-        **inputs,
-        streamer=streamer,
-        max_new_tokens=max_new_tokens,
-        do_sample=do_sample,
-        temperature=temperature,
-        top_p=top_p,
-        eos_token_id=eos_token_id,
-        **kwargs,
-    )
-    outputs = outputs[0].cpu().tolist()[len(inputs["input_ids"][0]) :]
-    response = tokenizer.decode(outputs, skip_special_tokens=True)
-    response = response.split("<|im_end|>")[0]
-    history = history + [(query, response)]
-    return response, history
-# use the official tokenizer
 tokenizer = AutoTokenizer.from_pretrained("silence09/InternLM2.5-7B-Chat-Converted-Qwen2", trust_remote_code=True)
-# use the converted LlaMA model
 qwen2_model = Qwen2ForCausalLM.from_pretrained(
     "silence09/InternLM2.5-7B-Chat-Converted-Qwen2",
     torch_dtype='auto',
     attn_implementation=attn_impl).to(device)
-qwen2_model.eval()
-response_qwen2_and_splitfunc_1, history = chat(qwen2_model, tokenizer, prompt1, history=[], do_sample=False)
-print(f"User Input: {prompt1}\nConverted LlaMA Response: {response_qwen2_and_splitfunc_1}")
-response_qwen2_and_splitfunc_2, history = chat(qwen2_model, tokenizer, prompt2, history=history, do_sample=False)
-print(f"User Input: {prompt2}\nConverted LlaMA Response: {response_qwen2_and_splitfunc_2}")
 ```

 ## Usage
 You can load the model using the `Qwen2ForCausalLM` class as shown below:
 ```python
+from transformers import AutoModelForCausalLM, AutoTokenizer, Qwen2ForCausalLM
+device = "cuda" # the device to load the model onto, cpu or cuda
 attn_impl = 'eager' # the attention implementation to use
+prompt = "大模型和人工智能经历了两年的快速发展，请你以此主题对人工智能的从业者写一段新年寄语"
+system_prompt = """You are an AI assistant whose name is InternLM (书生·浦语).
+- InternLM (书生·浦语) is a conversational language model that is developed by Shanghai AI Laboratory (上海人工智能实验室). It is designed to be helpful, honest, and harmless.
+- InternLM (书生·浦语) can understand and communicate fluently in the language chosen by the user such as English and 中文."""
+messages = [
+    {"role": "system", "content": system_prompt},
+    {"role": "user", "content": prompt},
+ ]
 tokenizer = AutoTokenizer.from_pretrained("silence09/InternLM2.5-7B-Chat-Converted-Qwen2", trust_remote_code=True)
+text = tokenizer.apply_chat_template(
+    messages,
+    tokenize=False,
+    add_generation_prompt=True
+)
+model_inputs = tokenizer([text], return_tensors="pt").to(device)
+print(prompt)
 qwen2_model = Qwen2ForCausalLM.from_pretrained(
     "silence09/InternLM2.5-7B-Chat-Converted-Qwen2",
     torch_dtype='auto',
     attn_implementation=attn_impl).to(device)
+qwen2_generated_ids = qwen2_model.generate(model_inputs.input_ids, max_new_tokens=100, do_sample=False)
+qwen2_generated_ids = [
+    output_ids[len(input_ids):] for input_ids, output_ids in zip(model_inputs.input_ids, qwen2_generated_ids)
+]
+qwen2_response = tokenizer.batch_decode(qwen2_generated_ids, skip_special_tokens=True)[0]
+print(qwen2_response)
 ```