|
import pytest |
|
from tests.utils import wrap_test_forked |
|
|
|
|
|
@pytest.mark.need_tokens |
|
@wrap_test_forked |
|
def test_langchain_simple_h2ogpt(): |
|
run_langchain_simple(base_model='h2oai/h2ogpt-oasst1-512-12b', prompt_type='human_bot') |
|
|
|
|
|
@pytest.mark.need_tokens |
|
@wrap_test_forked |
|
def test_langchain_simple_vicuna(): |
|
run_langchain_simple(base_model='junelee/wizard-vicuna-13b', prompt_type='instruct_vicuna') |
|
|
|
|
|
def run_langchain_simple(base_model='h2oai/h2ogpt-oasst1-512-12b', prompt_type='human_bot'): |
|
""" |
|
:param base_model: |
|
:param prompt_type: prompt_type required for stopping support and correct handling of instruction prompting |
|
:return: |
|
""" |
|
import torch |
|
from transformers import AutoModelForCausalLM, AutoTokenizer |
|
from h2oai_pipeline import H2OTextGenerationPipeline |
|
|
|
model_name = base_model |
|
|
|
from transformers import AutoConfig |
|
config = AutoConfig.from_pretrained(base_model, use_auth_token=True, |
|
trust_remote_code=True, |
|
offload_folder="./") |
|
|
|
llama_type_from_config = 'llama' in str(config).lower() |
|
llama_type_from_name = "llama" in base_model.lower() |
|
llama_type = llama_type_from_config or llama_type_from_name |
|
|
|
if llama_type: |
|
from transformers import LlamaForCausalLM, LlamaTokenizer |
|
model_loader = LlamaForCausalLM |
|
tokenizer_loader = LlamaTokenizer |
|
else: |
|
model_loader = AutoModelForCausalLM |
|
tokenizer_loader = AutoTokenizer |
|
|
|
load_in_8bit = True |
|
n_gpus = torch.cuda.device_count() if torch.cuda.is_available else 0 |
|
device = 'cpu' if n_gpus == 0 else 'cuda' |
|
device_map = {"": 0} if device == 'cuda' else "auto" |
|
tokenizer = tokenizer_loader.from_pretrained(model_name, padding_side="left") |
|
|
|
model = model_loader.from_pretrained(model_name, torch_dtype=torch.bfloat16, device_map=device_map, |
|
load_in_8bit=load_in_8bit) |
|
|
|
gen_kwargs = dict(max_new_tokens=512, return_full_text=True, early_stopping=False) |
|
pipe = H2OTextGenerationPipeline(model=model, tokenizer=tokenizer, prompt_type=prompt_type, **gen_kwargs) |
|
|
|
|
|
pipe.task = "text2text-generation" |
|
|
|
|
|
from langchain.llms import HuggingFacePipeline |
|
llm = HuggingFacePipeline(pipeline=pipe) |
|
|
|
|
|
from langchain import PromptTemplate |
|
from langchain.chains.question_answering import load_qa_chain |
|
|
|
template = """ |
|
== |
|
{context} |
|
== |
|
{question}""" |
|
prompt = PromptTemplate( |
|
input_variables=["context", "question"], |
|
template=template, |
|
) |
|
chain = load_qa_chain(llm, prompt=prompt) |
|
docs = [] |
|
query = "Give detailed list of reasons for who is smarter, Einstein or Newton." |
|
chain_kwargs = dict(input_documents=docs, question=query) |
|
answer = chain(chain_kwargs) |
|
print(answer) |
|
|
|
if 'vicuna' in base_model: |
|
res1 = 'Both Albert Einstein and Sir Isaac Newton were brilliant scientists' in answer[ |
|
'output_text'] and "Newton" in answer['output_text'] |
|
res2 = 'Both Albert Einstein and Sir Isaac Newton are considered two' in answer[ |
|
'output_text'] and "Newton" in answer['output_text'] |
|
else: |
|
res1 = 'Einstein was a genius who revolutionized physics' in answer['output_text'] and "Newton" in answer[ |
|
'output_text'] |
|
res2 = 'Einstein and Newton are two of the most famous scientists in history' in answer[ |
|
'output_text'] and "Newton" in answer['output_text'] |
|
assert res1 or res2 |
|
|