import os
import streamlit as st
from llama_index.core import Settings, SimpleDirectoryReader, VectorStoreIndex, StorageContext
from llama_index.core.storage.docstore import SimpleDocumentStore
from llama_index.llms.ollama import Ollama
from llama_index.embeddings.ollama import OllamaEmbedding
from llama_index.core.node_parser import LangchainNodeParser
from langchain.text_splitter import RecursiveCharacterTextSplitter
from llama_index.core.storage.chat_store import SimpleChatStore
from llama_index.core.memory import ChatMemoryBuffer
from llama_index.core.query_engine import RetrieverQueryEngine
from llama_index.core.chat_engine import CondensePlusContextChatEngine
from llama_index.retrievers.bm25 import BM25Retriever
from llama_index.core.retrievers import QueryFusionRetriever
from llama_index.vector_stores.chroma import ChromaVectorStore
import chromadb
import nest_asyncio
import os
from llama_index.core import VectorStoreIndex, SimpleDirectoryReader
from llama_index.llms.huggingface import HuggingFaceLLM
from llama_index.llms.huggingface_api import HuggingFaceInferenceAPI
from llama_index.core import Settings
from typing import List, Optional
from llama_index.core import PromptTemplate
import torch
from llama_index.embeddings.huggingface import HuggingFaceEmbedding
import huggingface_hub
import logging
import sys
from PIL import Image
import gc
def flush():
gc.collect()
torch.cuda.empty_cache()
torch.cuda.reset_peak_memory_stats()
#Token do huggingface
HF_TOKEN: Optional[str] = os.getenv("HF_TOKEN")
huggingface_hub.login(HF_TOKEN)
#Configuração da imagem da aba
im = Image.open("./pngegg.png")
st.set_page_config(page_title = "Chatbot Carômetro", page_icon=im, layout = "wide")
# Lista de pastas que precisam ser criadas
pastas = ["bm25_retriever", "chat_store", "chroma_db", "documentos"]
# Criar cada pasta caso não exista
for pasta in pastas:
if not os.path.exists(pasta):
os.makedirs(pasta)
print(f"Pasta '{pasta}' criada com sucesso.")
else:
print(f"Pasta '{pasta}' já existe.")
# Configuração do Streamlit
st.sidebar.title("Configuração de LLM")
sidebar_option = st.sidebar.radio("Selecione o LLM", ["OpenAI", "HF Local"])
# logo_url = 'app\logos\logo-sicoob.jpg'
# st.sidebar.image(logo_url)
import base64
#Configuração da imagem da sidebar
with open("sicoob-logo.png", "rb") as f:
data = base64.b64encode(f.read()).decode("utf-8")
st.sidebar.markdown(
f"""
""",
unsafe_allow_html=True,
)
#if sidebar_option == "Ollama":
# Settings.llm = Ollama(model="llama3.2:latest", request_timeout=500.0, num_gpu=1)
# Settings.embed_model = OllamaEmbedding(model_name="nomic-embed-text:latest")
if sidebar_option == "gpt-3.5":
from llama_index.llms.openai import OpenAI
from llama_index.embeddings.openai import OpenAIEmbedding
os.environ["OPENAI_API_KEY"] = "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"
Settings.llm = OpenAI(model="gpt-3.5-turbo")
Settings.embed_model = OpenAIEmbedding(model_name="text-embedding-ada-002")
elif sidebar_option == 'HF Local':
logging.basicConfig(stream=sys.stdout, level=logging.INFO)
logging.getLogger().addHandler(logging.StreamHandler(stream=sys.stdout))
#query_wrapper_prompt = PromptTemplate(
#"Below are several documents about a company "
#"Write a response that appropriately completes the request.\n\n"
#"### Instruction:\n{query_str}\n\n### Response:"
#)
#Embedding do huggingface
Settings.embed_model = HuggingFaceEmbedding(
model_name="BAAI/bge-small-en-v1.5"
)
#Carregamento do modelo local, descomentar o modelo desejado
llm = HuggingFaceLLM(
context_window=2048,
max_new_tokens=256,
generate_kwargs={"do_sample": False},
#query_wrapper_prompt=query_wrapper_prompt,
#model_name="Qwen/Qwen2.5-Coder-32B-Instruct",
#model_name="Qwen/Qwen2.5-14B-Instruct",
# model_name="meta-llama/Llama-3.2-3B",
#model_name="HuggingFaceH4/zephyr-7b-beta",
# model_name="meta-llama/Meta-Llama-3-8B",
model_name="numind/NuExtract-1.5",
#model_name="meta-llama/Llama-3.2-3B",
tokenizer_name="numind/NuExtract-1.5",
device_map="auto",
tokenizer_kwargs={"max_length": 2048},
# uncomment this if using CUDA to reduce memory usage
model_kwargs={"torch_dtype": torch.bfloat16},
)
chat = [
{"role": "user", "content": "Hello, how are you?"},
{"role": "assistant", "content": "I'm doing great. How can I help you today?"},
{"role": "user", "content": "I'd like to show off how chat templating works!"},
]
from transformers import AutoTokenizer
tokenizer = AutoTokenizer.from_pretrained("numind/NuExtract-1.5")
tokenizer.apply_chat_template(chat, tokenize=False)
Settings.chunk_size = 512
Settings.llm = llm
else:
raise Exception("Opção de LLM inválida!")
# Diretórios configurados pelo usuário
chat_store_path = os.path.join("chat_store", "chat_store.json")
documents_path = os.path.join("documentos")
chroma_storage_path = os.path.join("chroma_db") # Diretório para persistência do Chroma
bm25_persist_path = os.path.join("bm25_retriever")
# Configuração de leitura de documentos
documents = SimpleDirectoryReader(input_dir=documents_path).load_data()
# Configuração do Chroma e BM25 com persistência
docstore = SimpleDocumentStore()
docstore.add_documents(documents)
db = chromadb.PersistentClient(path=chroma_storage_path)
chroma_collection = db.get_or_create_collection("dense_vectors")
vector_store = ChromaVectorStore(chroma_collection=chroma_collection)
# Configuração do StorageContext
storage_context = StorageContext.from_defaults(
docstore=docstore, vector_store=vector_store
)
# Criação/Recarregamento do índice com embeddings
if os.path.exists(chroma_storage_path):
index = VectorStoreIndex.from_vector_store(vector_store)
else:
splitter = LangchainNodeParser(
RecursiveCharacterTextSplitter(chunk_size=512, chunk_overlap=64)
)
index = VectorStoreIndex.from_documents(
documents, storage_context=storage_context, transformations=[splitter]
)
vector_store.persist()
# Criação/Recarregamento do BM25 Retriever
if os.path.exists(os.path.join(bm25_persist_path, "params.index.json")):
bm25_retriever = BM25Retriever.from_persist_dir(bm25_persist_path)
else:
bm25_retriever = BM25Retriever.from_defaults(
docstore=docstore,
similarity_top_k=2,
language="portuguese", # Idioma ajustado para seu caso
)
os.makedirs(bm25_persist_path, exist_ok=True)
bm25_retriever.persist(bm25_persist_path)
# Combinação de Retrievers (Embeddings + BM25)
vector_retriever = index.as_retriever(similarity_top_k=2)
retriever = QueryFusionRetriever(
[vector_retriever, bm25_retriever],
similarity_top_k=2,
num_queries=4,
mode="reciprocal_rerank",
use_async=True,
verbose=True,
query_gen_prompt=(
"Gere {num_queries} perguntas de busca relacionadas à seguinte pergunta. "
"Priorize o significado da pergunta sobre qualquer histórico de conversa. "
"Se o histórico não for relevante para a pergunta, ignore-o. "
"Não adicione explicações, notas ou introduções. Apenas escreva as perguntas. "
"Pergunta: {query}\n\n"
"Perguntas:\n"
),
)
# Configuração do chat engine
nest_asyncio.apply()
memory = ChatMemoryBuffer.from_defaults(token_limit=3900)
query_engine = RetrieverQueryEngine.from_args(retriever)
chat_engine = CondensePlusContextChatEngine.from_defaults(
query_engine,
memory=memory,
context_prompt=(
"Você é um assistente virtual capaz de interagir normalmente, além de"
" fornecer informações sobre organogramas e listar funcionários."
" Aqui estão os documentos relevantes para o contexto:\n"
"{context_str}"
"\nInstrução: Use o histórico da conversa anterior, ou o contexto acima, para responder."
),
verbose=True,
)
# Armazenamento do chat
chat_store = SimpleChatStore()
if os.path.exists(chat_store_path):
chat_store = SimpleChatStore.from_persist_path(persist_path=chat_store_path)
else:
chat_store.persist(persist_path=chat_store_path)
# Interface do Chatbot
st.title("Chatbot Carômetro")
st.write("Este chatbot pode te ajudar a conseguir informações relevantes sobre os carômetros da Sicoob.")
if "chat_history" not in st.session_state:
st.session_state.chat_history = []
user_input = st.chat_input("Digite sua pergunta")
if user_input:
response = chat_engine.chat(user_input)
st.session_state.chat_history.append(f"user: {user_input}")
st.session_state.chat_history.append(f"assistant: {response}")
for message in st.session_state.chat_history:
role, text = message.split(":", 1)
with st.chat_message(role.strip().lower()):
st.write(text.strip())