import torch from peft import PeftModel from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline # Paths base_model = "meta-llama/Meta-Llama-3.1-8B-Instruct" adapter_model = "alphaoumardev/Llama3-8B-noryu-instruct" # Load base model model = AutoModelForCausalLM.from_pretrained( base_model, torch_dtype=torch.float16, device_map="auto" ) # Apply LoRA adapter model = PeftModel.from_pretrained(model, adapter_model) # Load tokenizer from base model tokenizer = AutoTokenizer.from_pretrained(base_model) # Create a pipeline for inference pipe = pipeline( "text-generation", model=model, tokenizer=tokenizer, torch_dtype=torch.float16, device_map="auto" ) # Example inference print(pipe("Hello, how are you today?", max_length=100)[0]['generated_text'])