File size: 4,669 Bytes
1601503
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
0320c4b
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
fc811e2
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
7414c03
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
fc811e2
06f80ec
7414c03
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
import numpy as np 
import pandas as pd  # type: ignore
import os
import keras
import tensorflow as tf
from tensorflow.keras.models import load_model
import pymongo
import streamlit as st
from sentence_transformers import SentenceTransformer
from langchain.prompts import ChatPromptTemplate
from langchain_openai import ChatOpenAI
from langchain.schema.runnable import RunnablePassthrough
from langchain.schema.output_parser import StrOutputParser
from langchain_core.messages import HumanMessage, SystemMessage
from PIL import Image
import json

st.set_page_config(
    page_title="Food Chain", 
    page_icon="🍴"
)

os.environ["OPENAI_API_KEY"] = os.getenv("OPENAI_API_KEY")
mongo_uri = os.getenv("MONGO_URI_RAG_RECIPE")

@st.cache_resource
def loadEmbedding():
    embedding = SentenceTransformer("thenlper/gte-large") 
    return embedding
embedding = loadEmbedding()


def getEmbedding(text):
    if not text.strip():
        print("Text was empty")
        return []
    encoded = embedding.encode(text)
    return encoded.tolist()


# Connect to MongoDB
def get_mongo_client(mongo_uri):
    try:
        client = pymongo.MongoClient(mongo_uri)
        print("Connection to MongoDB successful")
        return client
    except pymongo.errors.ConnectionFailure as e:
        print(f"Connection failed: {e}")
        return None

if not mongo_uri:
    print("MONGO_URI not set in env")

mongo_client = get_mongo_client(mongo_uri)

mongo_db = mongo_client['recipes']
mongo_collection = mongo_db['recipesCollection']

def vector_search(user_query, collection):
    query_embedding = getEmbedding(user_query)
    if query_embedding is None:
        return "Invalid query or embedding gen failed"
    vector_search_stage = {
        "$vectorSearch": {
            "index": "vector_index",
            "queryVector": query_embedding,
            "path": "embedding",
            "numCandidates": 150,  # Number of candidate matches to consider
            "limit": 4  # Return top 4 matches
        }
    }

    unset_stage = {
        "$unset": "embedding"  # Exclude the 'embedding' field from the results
    }

    project_stage = {
        "$project": {
            "_id": 0,  # Exclude the _id field
            "name": 1,
            "minutes": 1,
            "tags": 1,
            "n_steps": 1,
            "description": 1,
            "ingredients": 1,
            "n_ingredients": 1,
            "formatted_nutrition": 1,
            "formatted_steps": 1,
            "score": {
                "$meta": "vectorSearchScore"  # Include the search score
            }
        }
    }

    pipeline = [vector_search_stage, unset_stage, project_stage]
    results = mongo_collection.aggregate(pipeline)
    return list(results)

def mongo_retriever(query):
    documents = vector_search(query, mongo_collection)
    return documents


template = """
You are an assistant for generating results based on user questions.
Use the provided context to generate a result based on the following JSON format:
{{
  "name": "Recipe Name",
  "minutes": 0,
  "tags": [
    "tag1",
    "tag2",
    "tag3"
  ],
  "n_steps": 0,
  "description": "A GENERAL description of the recipe goes here.",
  "ingredients": [
    "ingredient1",
    "ingredient2",
    "ingredient3"
  ],
  "n_ingredients": 0,
  "formatted_nutrition": [
    "Calorie : per serving",
    "Total Fat : % daily value",
    "Sugar : % daily value",
    "Sodium : % daily value",
    "Protein : % daily value",
    "Saturated Fat : % daily value",
    "Total Carbohydrate : % daily value"
  ],
  "formatted_steps": [
    "1. Step 1 of the recipe.",
    "2. Step 2 of the recipe.",
    "3. Step 3 of the recipe."
  ]
}}

Instructions:
1. Focus on the user's specific request and avoid irrelevant ingredients or approaches.
2. Do not return anything other than the JSON.
3. If the answer is unclear or the context does not fully address the prompt, return []
4. Base the response on simple, healthy, and accessible ingredients and techniques.
5. Rewrite the description in third person

Context: {context}

When choosing a recipe from the context, FOLLOW these instructions:
1. The recipe should be makeable from scratch, using only proper ingredients and not other dishes or pre-made recipes

Question: {question}
"""

custom_rag_prompt = ChatPromptTemplate.from_template(template)


llm = ChatOpenAI(
    model_name="gpt-3.5-turbo",
    temperature=0.2)


rag_chain = (
    {"context": mongo_retriever,  "question": RunnablePassthrough()}
    | custom_rag_prompt
    | llm
    | StrOutputParser()
)

def get_response(query):
    return rag_chain.invoke(query)
    
print("HELLO WORLD")
st.title("RESSSSULTS")