silence09 commited on
Commit
e62347c
·
verified ·
1 Parent(s): f7d07be

Update:we can use the apply_chat_template

Browse files
Files changed (1) hide show
  1. README.md +24 -66
README.md CHANGED
@@ -11,81 +11,39 @@ This is a converted model from [InternLM2.5-7B-Chat](https://huggingface.co/inte
11
  ## Usage
12
  You can load the model using the `Qwen2ForCausalLM` class as shown below:
13
  ```python
14
- device = "cpu" # cpu is exacatly the same
 
 
15
  attn_impl = 'eager' # the attention implementation to use
16
- meta_instruction = ("You are an AI assistant whose name is InternLM (书生·浦语).\n"
17
- "- InternLM (书生·浦语) is a conversational language model that is developed by Shanghai AI Laboratory "
18
- "(上海人工智能实验室). It is designed to be helpful, honest, and harmless.\n"
19
- "- InternLM (书生·浦语) can understand and communicate fluently in the language chosen by the user such "
20
- "as English and 中文."
21
- )
22
- prompt1 = "介绍下你自己"
23
- prompt2 = "介绍下上海人工智能实验室"
24
 
25
- def build_inputs(tokenizer, query: str, history: List[Tuple[str, str]] = None, meta_instruction=meta_instruction):
26
- if history is None:
27
- history = []
28
- if tokenizer.add_bos_token:
29
- prompt = ""
30
- else:
31
- prompt = tokenizer.bos_token
32
- if meta_instruction:
33
- prompt += f"""<|im_start|>system\n{meta_instruction}<|im_end|>\n"""
34
- for record in history:
35
- prompt += f"""<|im_start|>user\n{record[0]}<|im_end|>\n<|im_start|>assistant\n{record[1]}<|im_end|>\n"""
36
- prompt += f"""<|im_start|>user\n{query}<|im_end|>\n<|im_start|>assistant\n"""
37
- return tokenizer([prompt], return_tensors="pt")
38
 
39
- @torch.inference_mode()
40
- def chat(
41
- model: Union[AutoModelForCausalLM, Qwen2ForCausalLM],
42
- tokenizer,
43
- query: str,
44
- history: Optional[List[Tuple[str, str]]] = None,
45
- streamer: Optional[BaseStreamer] = None,
46
- max_new_tokens: int = 1024,
47
- do_sample: bool = True,
48
- temperature: float = 0.8,
49
- top_p: float = 0.8,
50
- meta_instruction: str = meta_instruction,
51
- **kwargs,
52
- ):
53
- if history is None:
54
- history = []
55
- inputs = build_inputs(tokenizer, query, history, meta_instruction)
56
- inputs = {k: v.to(model.device) for k, v in inputs.items() if torch.is_tensor(v)}
57
- # also add end-of-assistant token in eos token id to avoid unnecessary generation
58
- eos_token_id = [tokenizer.eos_token_id, tokenizer.convert_tokens_to_ids(["<|im_end|>"])[0]]
59
- outputs = model.generate(
60
- **inputs,
61
- streamer=streamer,
62
- max_new_tokens=max_new_tokens,
63
- do_sample=do_sample,
64
- temperature=temperature,
65
- top_p=top_p,
66
- eos_token_id=eos_token_id,
67
- **kwargs,
68
- )
69
- outputs = outputs[0].cpu().tolist()[len(inputs["input_ids"][0]) :]
70
- response = tokenizer.decode(outputs, skip_special_tokens=True)
71
- response = response.split("<|im_end|>")[0]
72
- history = history + [(query, response)]
73
- return response, history
74
-
75
 
76
- # use the official tokenizer
77
  tokenizer = AutoTokenizer.from_pretrained("silence09/InternLM2.5-7B-Chat-Converted-Qwen2", trust_remote_code=True)
78
- # use the converted LlaMA model
 
 
 
 
 
 
79
  qwen2_model = Qwen2ForCausalLM.from_pretrained(
80
  "silence09/InternLM2.5-7B-Chat-Converted-Qwen2",
81
  torch_dtype='auto',
82
  attn_implementation=attn_impl).to(device)
83
- qwen2_model.eval()
84
- response_qwen2_and_splitfunc_1, history = chat(qwen2_model, tokenizer, prompt1, history=[], do_sample=False)
85
- print(f"User Input: {prompt1}\nConverted LlaMA Response: {response_qwen2_and_splitfunc_1}")
86
-
87
- response_qwen2_and_splitfunc_2, history = chat(qwen2_model, tokenizer, prompt2, history=history, do_sample=False)
88
- print(f"User Input: {prompt2}\nConverted LlaMA Response: {response_qwen2_and_splitfunc_2}")
89
 
90
  ```
91
 
 
11
  ## Usage
12
  You can load the model using the `Qwen2ForCausalLM` class as shown below:
13
  ```python
14
+ from transformers import AutoModelForCausalLM, AutoTokenizer, Qwen2ForCausalLM
15
+
16
+ device = "cuda" # the device to load the model onto, cpu or cuda
17
  attn_impl = 'eager' # the attention implementation to use
 
 
 
 
 
 
 
 
18
 
19
+ prompt = "大模型和人工智能经历了两年的快速发展,请你以此主题对人工智能的从业者写一段新年寄语"
 
 
 
 
 
 
 
 
 
 
 
 
20
 
21
+ system_prompt = """You are an AI assistant whose name is InternLM (书生·浦语).
22
+ - InternLM (书生·浦语) is a conversational language model that is developed by Shanghai AI Laboratory (上海人工智能实验室). It is designed to be helpful, honest, and harmless.
23
+ - InternLM (书生·浦语) can understand and communicate fluently in the language chosen by the user such as English and 中文."""
24
+ messages = [
25
+ {"role": "system", "content": system_prompt},
26
+ {"role": "user", "content": prompt},
27
+ ]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
28
 
 
29
  tokenizer = AutoTokenizer.from_pretrained("silence09/InternLM2.5-7B-Chat-Converted-Qwen2", trust_remote_code=True)
30
+ text = tokenizer.apply_chat_template(
31
+ messages,
32
+ tokenize=False,
33
+ add_generation_prompt=True
34
+ )
35
+ model_inputs = tokenizer([text], return_tensors="pt").to(device)
36
+ print(prompt)
37
  qwen2_model = Qwen2ForCausalLM.from_pretrained(
38
  "silence09/InternLM2.5-7B-Chat-Converted-Qwen2",
39
  torch_dtype='auto',
40
  attn_implementation=attn_impl).to(device)
41
+ qwen2_generated_ids = qwen2_model.generate(model_inputs.input_ids, max_new_tokens=100, do_sample=False)
42
+ qwen2_generated_ids = [
43
+ output_ids[len(input_ids):] for input_ids, output_ids in zip(model_inputs.input_ids, qwen2_generated_ids)
44
+ ]
45
+ qwen2_response = tokenizer.batch_decode(qwen2_generated_ids, skip_special_tokens=True)[0]
46
+ print(qwen2_response)
47
 
48
  ```
49