Spaces:
Sleeping
Sleeping
File size: 2,887 Bytes
db694c4 b2b3b83 db694c4 b2b3b83 00561ea db694c4 b2b3b83 00561ea e236b6c b2b3b83 8cbd452 b2b3b83 23e06a5 b2b3b83 23e06a5 b2b3b83 23e06a5 00561ea b2b3b83 db694c4 b2b3b83 dd6d0f9 b2b3b83 db694c4 b2b3b83 db694c4 b2b3b83 db694c4 b2b3b83 db694c4 b2b3b83 23e06a5 b2b3b83 dd6d0f9 b2b3b83 00561ea b2b3b83 db694c4 b2b3b83 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 |
import utils
import os
import openai
from llama_index import SimpleDirectoryReader
from llama_index import Document
from llama_index import VectorStoreIndex
from llama_index import ServiceContext
from llama_index.llms import OpenAI
from llama_index.embeddings import HuggingFaceEmbedding
from trulens_eval import Tru
from utils import get_prebuilt_trulens_recorder
import time
openai.api_key = utils.get_openai_api_key()
def main():
if not os.path.exists("./default.sqlite"):
start_time = time.time()
file_ls_str = ", ".join(os.listdir("./raw_documents"))
print(f"File list: {file_ls_str}")
print("")
documents = SimpleDirectoryReader(
input_files=["./raw_documents/HI_Knowledge_Base.pdf"]
).load_data()
document = Document(text="\n\n".join([doc.text for doc in documents]))
### gpt-4-1106-preview
### gpt-3.5-turbo-1106 / gpt-3.5-turbo
print("Initializing GPT 3.5 ..")
llm = OpenAI(model="gpt-3.5-turbo-1106", temperature=0.1)
print("Initializing bge-small-en-v1.5 embedding model ..")
embed_model = HuggingFaceEmbedding(model_name="BAAI/bge-small-en-v1.5")
print("Creating vector store ..")
print("time spent:", time.time() - start_time)
service_context = ServiceContext.from_defaults(llm=llm, embed_model=embed_model)
index = VectorStoreIndex.from_documents([document], service_context=service_context)
query_engine = index.as_query_engine()
separator = "\n\n"
eval_questions = []
with open('./raw_documents/eval_questions.txt', 'r') as file:
content = file.read()
for question in content.split(separator):
print(question)
print(separator)
eval_questions.append(question.strip())
response = query_engine.query(eval_questions[0])
print(str(response))
tru = Tru()
# tru.reset_database()
tru_recorder = get_prebuilt_trulens_recorder(query_engine,
app_id="Direct Query Engine")
print("Sending each question to llm ..")
with tru_recorder as recording:
for question in eval_questions:
response = query_engine.query(question)
records, feedback = tru.get_records_and_feedback(app_ids=[])
os.makedirs("./results", exist_ok=True)
records.to_csv("./results/records.csv", index=False)
print(tru.db.engine.url.render_as_string(hide_password=False))
end_time = time.time()
time_spent_mins = (end_time - start_time) / 60
with open("./results/time_cost.txt", "w") as fp:
fp.write(f"Takes {int(time_spent_mins)} mins to create llm evaluation.")
# tru.run_dashboard()
if __name__ == "__main__":
main() |