Spaces:
Running
Running
File size: 2,956 Bytes
e43f92e 31d62a0 0f705a4 e43f92e 0f705a4 e43f92e 0f705a4 e43f92e 05b3af6 0f705a4 05b3af6 0f705a4 e43f92e 0f705a4 e43f92e |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 |
from time import sleep
import streamlit as st
import openai
import pinecone
from postgres_db import query_postgresql_realvest
PINECONE_API_KEY = st.secrets["PINECONE_API_KEY"]
OPENAI_API_KEY = st.secrets["OPENAI_API_KEY"]
INDEX_NAME = 'realvest-data-v2'
EMBEDDING_MODEL = "text-embedding-ada-002" # OpenAI's best embeddings as of Apr 2023
def test_pinecone(sleep_time: int=1):
MAX_TRIALS = 5
trial = 0
stats = None
while (stats is None) and (trial < MAX_TRIALS):
try:
print(f"BEFORE: trial: {trial}; stats: {stats}")
stats = index.describe_index_stats()
print(f"AFTER: trial: {trial}; stats: {stats}")
return stats
except pinecone.core.exceptions.PineconeProtocolError as err:
print(f"Error, sleep! {err}")
sleep(sleep_time)
trial = trial + 1
raise Exception(f'max trials {MAX_TRIALS} Exceeded!')
def query_pinecone(xq, top_k: int=3, include_metadata: bool=True, sleep_time: int=1):
MAX_TRIALS = 5
trial = 0
out = None
while (out is None) and (trial < MAX_TRIALS):
try:
# print(f"BEFORE: trial: {trial}; stats: {out}")
out = index.query(xq, top_k=top_k, include_metadata=include_metadata)
# print(f"AFTER: trial: {trial}; stats: {out}")
return out
except pinecone.core.exceptions.PineconeProtocolError as err:
print(f"Error, sleep! {err}")
sleep(sleep_time)
trial = trial + 1
raise Exception(f'max trials {MAX_TRIALS} Exceeded!')
# initialize connection to pinecone (get API key at app.pinecone.io)
pinecone.init(
api_key=PINECONE_API_KEY,
environment="us-central1-gcp" # may be different, check at app.pinecone.io
)
index = pinecone.Index(INDEX_NAME)
stats = test_pinecone()
print(f"Pinecone DB stats: {stats}")
### Main
# Create a text input field
query = st.text_input("What are you looking for?")
# Create a button
if st.button('Submit'):
# ### call OpenAI text-embedding
res = openai.Embedding.create(model=EMBEDDING_MODEL, input=[query], api_key=OPENAI_API_KEY)
xq = res['data'][0]['embedding']
# out = index.query(xq, top_k=3, include_metadata=True)
out = query_pinecone(xq, top_k=3, include_metadata=True)
### display
# print(f"{'*'*30}results #3: {out}")
# st.write("Matched results")
# for match in out['matches']:
# st.write( match['id'] )
### candidates
pids = [
match['metadata']['product_id']
for match in out['matches']
]
### query pids
pids_str = [f"'{pid}'"for pid in pids]
query = f"""
SELECT productid, name, category, alternatename, url, logo, description
FROM main_products
WHERE productid in ({', '.join(pids_str)});
"""
results = query_postgresql_realvest(query)
print(results)
for result in results:
st.write("---")
st.json(result)
|