File size: 2,956 Bytes
e43f92e
31d62a0
0f705a4
 
e43f92e
0f705a4
 
 
 
 
 
e43f92e
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
0f705a4
 
 
 
 
 
e43f92e
05b3af6
0f705a4
 
 
 
 
 
 
 
05b3af6
0f705a4
 
e43f92e
 
0f705a4
 
e43f92e
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
from time import sleep
import streamlit as st
import openai
import pinecone
from postgres_db import query_postgresql_realvest

PINECONE_API_KEY = st.secrets["PINECONE_API_KEY"]
OPENAI_API_KEY = st.secrets["OPENAI_API_KEY"]
INDEX_NAME = 'realvest-data-v2'
EMBEDDING_MODEL = "text-embedding-ada-002"  # OpenAI's best embeddings as of Apr 2023

def test_pinecone(sleep_time: int=1):
    MAX_TRIALS = 5
    trial = 0
    stats = None
    while (stats is None) and (trial < MAX_TRIALS):
        try:
            print(f"BEFORE: trial: {trial}; stats: {stats}")
            stats = index.describe_index_stats()
            print(f"AFTER: trial: {trial}; stats: {stats}")
            return stats
        except pinecone.core.exceptions.PineconeProtocolError as err:
            print(f"Error, sleep! {err}")
            sleep(sleep_time)
            trial = trial + 1
    
    raise Exception(f'max trials {MAX_TRIALS} Exceeded!')

def query_pinecone(xq, top_k: int=3, include_metadata: bool=True, sleep_time: int=1):
    MAX_TRIALS = 5
    trial = 0
    out = None
    while (out is None) and (trial < MAX_TRIALS):
        try:
            # print(f"BEFORE: trial: {trial}; stats: {out}")
            out = index.query(xq, top_k=top_k, include_metadata=include_metadata)
            # print(f"AFTER: trial: {trial}; stats: {out}")
            return out
        except pinecone.core.exceptions.PineconeProtocolError as err:
            print(f"Error, sleep! {err}")
            sleep(sleep_time)
            trial = trial + 1
    
    raise Exception(f'max trials {MAX_TRIALS} Exceeded!')

# initialize connection to pinecone (get API key at app.pinecone.io)
pinecone.init(
    api_key=PINECONE_API_KEY,
    environment="us-central1-gcp"  # may be different, check at app.pinecone.io
)
index = pinecone.Index(INDEX_NAME)
stats = test_pinecone()
print(f"Pinecone DB stats: {stats}")

### Main
# Create a text input field
query = st.text_input("What are you looking for?")

# Create a button
if st.button('Submit'):

    # ### call OpenAI text-embedding
    res = openai.Embedding.create(model=EMBEDDING_MODEL, input=[query], api_key=OPENAI_API_KEY)
    xq = res['data'][0]['embedding']
    # out = index.query(xq, top_k=3, include_metadata=True)
    out = query_pinecone(xq, top_k=3, include_metadata=True)

    ### display
    # print(f"{'*'*30}results #3: {out}")
    # st.write("Matched results")
    # for match in out['matches']:
    #     st.write( match['id'] )

    ### candidates
    pids = [
        match['metadata']['product_id']
        for match in out['matches']
    ]

    ### query pids
    pids_str = [f"'{pid}'"for pid in pids]
    query = f"""
    SELECT productid, name, category, alternatename, url, logo, description
    FROM main_products
    WHERE productid in ({', '.join(pids_str)});
    """

    results = query_postgresql_realvest(query)
    print(results)

    for result in results:
        st.write("---")
        st.json(result)