Usage
from llama_cpp import Llama
from typing import Optional
import time
from huggingface_hub import hf_hub_download
def generate_prompt(input_text: str, instruction: Optional[str] = None) -> str:
text = f"### Question: {input_text}\n\n### Answer: "
if instruction:
text = f"### Instruction: {instruction}\n\n{text}"
return text
# Set up the parameters
repo_id = "vdpappu/gemma2_science_qa_gguf"
filename = "gemma2_scienceqa.gguf"
local_dir = "."
downloaded_file_path = hf_hub_download(repo_id=repo_id, filename=filename, local_dir=local_dir)
print(f"File downloaded to: {downloaded_file_path}")
# Load the model
llm = Llama(model_path=downloaded_file_path) #1 is thug
question = "Which is the smoothest? Choose from: concrete sidewalk, sandpaper, paper."
prompt = generate_prompt(input_text=question)
start = time.time()
output = llm(prompt,
temperature=0.7,
top_p=0.9,
top_k=50,
repeat_penalty=1.5,
max_tokens=200,
stop=["Question:","<eos>"])
end = time.time()
print(f"Inference time: {end-start:.2f} seconds \n")
print(output['choices'][0]['text'])
- Downloads last month
- 1