File size: 19,490 Bytes
1601503
 
 
9a3ad8c
1601503
 
 
 
 
 
 
 
 
 
 
 
a18c061
f70ff88
1601503
0320c4b
 
dfefc49
 
0320c4b
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
fc811e2
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
7414c03
 
73e5d20
7414c03
eb68218
7414c03
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
015eded
 
 
7414c03
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
c4307f8
7414c03
 
2344a54
015eded
7414c03
 
2344a54
7414c03
e440823
 
2344a54
 
 
 
 
7414c03
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
015eded
 
 
 
15be7c5
 
 
 
 
7175dc6
 
 
54c1dec
7175dc6
 
 
8b020ec
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
722c532
8b020ec
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
dfefc49
9a3541b
 
8b020ec
 
 
9a3541b
8b020ec
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
ce23d1e
8b020ec
 
ce23d1e
8b020ec
ce23d1e
8b020ec
ce23d1e
 
8b020ec
ec553ba
8b020ec
 
ce23d1e
 
8b020ec
ce23d1e
8b020ec
 
 
 
015eded
ac16cb8
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
c6292f2
8b020ec
 
c663fff
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
015eded
 
977daae
7e45f68
015eded
 
911f461
722c532
7e45f68
 
 
 
 
 
 
2136522
977daae
03d8b79
911f461
7e45f68
8b020ec
c6292f2
2136522
4b7b969
 
dd23fdc
 
4b7b969
1ff7989
 
90c3a2c
4b7b969
08cdd38
4b7b969
722c532
977daae
 
08cdd38
1ff7989
722c532
dd23fdc
4b7b969
8b020ec
f70ff88
 
 
 
977daae
1ff7989
f70ff88
977daae
2a7e086
1ff7989
751fa06
90c3a2c
 
2a7e086
 
1ff7989
 
f4f3de7
 
2a7e086
 
f4f3de7
4dd2cf8
1ff7989
f70ff88
4dd2cf8
90c3a2c
751fa06
 
 
08cdd38
5f58b73
08cdd38
8632659
1ff7989
8b020ec
 
 
 
 
 
 
 
 
 
 
8f02d73
 
aea6906
 
8f02d73
 
 
 
5bb1e50
8f02d73
 
 
 
5bb1e50
8f02d73
 
 
 
5bb1e50
8b020ec
 
 
015eded
 
 
 
 
 
 
 
 
 
2136522
1ff7989
c6292f2
 
 
 
2136522
c6292f2
 
2136522
c6292f2
 
 
2136522
c6292f2
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
ac16cb8
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
import numpy as np 
import pandas as pd  # type: ignore
import os
import keras
import tensorflow as tf
from tensorflow.keras.models import load_model
import pymongo
import streamlit as st
from sentence_transformers import SentenceTransformer
from langchain.prompts import ChatPromptTemplate
from langchain_openai import ChatOpenAI
from langchain.schema.runnable import RunnablePassthrough
from langchain.schema.output_parser import StrOutputParser
from langchain_core.messages import HumanMessage, SystemMessage
from PIL import Image
import json
import matplotlib.pyplot as plt
from matplotlib.colors import LinearSegmentedColormap

st.set_page_config(
    page_title="Food Chain", 
    page_icon="🍴",
    layout="wide"
)

os.environ["OPENAI_API_KEY"] = os.getenv("OPENAI_API_KEY")
mongo_uri = os.getenv("MONGO_URI_RAG_RECIPE")

@st.cache_resource
def loadEmbedding():
    embedding = SentenceTransformer("thenlper/gte-large") 
    return embedding
embedding = loadEmbedding()


def getEmbedding(text):
    if not text.strip():
        print("Text was empty")
        return []
    encoded = embedding.encode(text)
    return encoded.tolist()


# Connect to MongoDB
def get_mongo_client(mongo_uri):
    try:
        client = pymongo.MongoClient(mongo_uri)
        print("Connection to MongoDB successful")
        return client
    except pymongo.errors.ConnectionFailure as e:
        print(f"Connection failed: {e}")
        return None

if not mongo_uri:
    print("MONGO_URI not set in env")

mongo_client = get_mongo_client(mongo_uri)

mongo_db = mongo_client['recipes']
mongo_collection = mongo_db['recipesCollection']

def vector_search(user_query, collection):
    query_embedding = getEmbedding(user_query)
    if query_embedding is None:
        return "Invalid query or embedding gen failed"
    vector_search_stage = {
        "$vectorSearch": {
            "index": "vector_index",
            "queryVector": query_embedding,
            "path": "embedding",
            "numCandidates": 150,  # Number of candidate matches to consider
            "limit": 4  # Return top 4 matches
        }
    }

    unset_stage = {
        "$unset": "embedding"  # Exclude the 'embedding' field from the results
    }

    project_stage = {
        "$project": {
            "_id": 0,  # Exclude the _id field
            "name": 1,
            "minutes": 1,
            "tags": 1,
            "n_steps": 1,
            "description": 1,
            "ingredients": 1,
            "n_ingredients": 1,
            "formatted_nutrition": 1,
            "formatted_steps": 1,
            "score": {
                "$meta": "vectorSearchScore"  # Include the search score
            }
        }
    }

    pipeline = [vector_search_stage, unset_stage, project_stage]
    results = mongo_collection.aggregate(pipeline)
    return list(results)

def mongo_retriever(query):
    print("mongo retriever query: ", query)
    documents = vector_search(query, mongo_collection)
    print("Documents Retrieved: ", documents)
    return documents


template = """
You are an assistant for generating results based on user questions.
Use the provided context to generate a result based on the following JSON format:
{{
  "name": "Recipe Name",
  "minutes": 0,
  "tags": [
    "tag1",
    "tag2",
    "tag3"
  ],
  "n_steps": 0,
  "description": "A GENERAL description of the recipe goes here.",
  "ingredients": [
    "0 tablespoons ingredient1",
    "0 cups ingredient2",
    "0 teaspoons ingredient3"
  ],
  "n_ingredients": 0,
  "formatted_nutrition": [
    "Calorie : per serving",
    "Total Fat : % daily value",
    "Sugar : % daily value",
    "Sodium : % daily value",
    "Protein : % daily value",
    "Saturated Fat : % daily value",
    "Total Carbohydrate : % daily value"
  ],
  "formatted_steps": [
    "1. Step 1 of the recipe.",
    "2. Step 2 of the recipe.",
    "3. Step 3 of the recipe."
  ]
}}

Instructions:
1. Focus on the user's specific request and avoid irrelevant ingredients or approaches.
2. Do not return anything other than the JSON.
3. If the answer is unclear or the context does not fully address the prompt, return [].
4. Base the response on simple, healthy, and accessible ingredients and techniques.
5. Rewrite the description in third person
6. If context does not match {question} at all, return []
7. Include the ingredient amounts and say them in the steps.

When choosing a recipe from the context, FOLLOW these instructions:
0. If context does not match {question} at all, return []
1. The recipe should be makeable from scratch, using only proper ingredients and not other dishes or pre-made recipes
2. If the recipes from the context makes sense but do not match {question}, generate an amazing, specific recipe for {question} 
   with precise steps and measurements. Take some inspiration from context if availab.e
3. Following the above template.
4. If context does not match {question} at all, return []


Context: {context}

Question: {question}
"""

custom_rag_prompt = ChatPromptTemplate.from_template(template)


llm = ChatOpenAI(
    model_name="gpt-3.5-turbo",
    temperature=0.2)


rag_chain = (
    {"context": mongo_retriever,  "question": RunnablePassthrough()}
    | custom_rag_prompt
    | llm
    | StrOutputParser()
)

def get_response(query):
    if query:
        print("get_response query: ", query)
        return rag_chain.invoke(query)
    return ""


##############################################
# Classifier
img_size = 224

@st.cache_resource
def loadModel():
    model = load_model('efficientnet-fine-d1.keras')
    return model

model = loadModel()


class_names = [
    "apple_pie", "baby_back_ribs", "baklava", "beef_carpaccio", "beef_tartare", "beet_salad", 
    "beignets", "bibimbap", "bread_pudding", "breakfast_burrito", "bruschetta", "caesar_salad", 
    "cannoli", "caprese_salad", "carrot_cake", "ceviche", "cheese_plate", "cheesecake", "chicken_curry", 
    "chicken_quesadilla", "chicken_wings", "chocolate_cake", "chocolate_mousse", "churros", "clam_chowder", 
    "club_sandwich", "crab_cakes", "creme_brulee", "croque_madame", "cup_cakes", "deviled_eggs", "donuts", 
    "dumplings", "edamame", "eggs_benedict", "escargots", "falafel", "filet_mignon", "fish_and_chips", "foie_gras", 
    "french_fries", "french_onion_soup", "french_toast", "fried_calamari", "fried_rice", "frozen_yogurt", 
    "garlic_bread", "gnocchi", "greek_salad", "grilled_cheese_sandwich", "grilled_salmon", "guacamole", "gyoza", 
    "hamburger", "hot_and_sour_soup", "hot_dog", "huevos_rancheros", "hummus", "ice_cream", "lasagna", 
    "lobster_bisque", "lobster_roll_sandwich", "macaroni_and_cheese", "macarons", "miso_soup", "mussels", 
    "nachos", "omelette", "onion_rings", "oysters", "pad_thai", "paella", "pancakes", "panna_cotta", "peking_duck", 
    "pho", "pizza", "pork_chop", "poutine", "prime_rib", "pulled_pork_sandwich", "ramen", "ravioli", "red_velvet_cake", 
    "risotto", "samosa", "sashimi", "scallops", "seaweed_salad", "shrimp_and_grits", "spaghetti_bolognese", 
    "spaghetti_carbonara", "spring_rolls", "steak", "strawberry_shortcake", "sushi", "tacos", "takoyaki", "tiramisu", 
    "tuna_tartare", "waffles"
]

def classifyImage(input_image):
    input_image = input_image.resize((img_size, img_size))
    input_array = tf.keras.utils.img_to_array(input_image)

    # Add a batch dimension 
    input_array = tf.expand_dims(input_array, 0)  # (1, 224, 224, 3)
    
    predictions = model.predict(input_array)[0]
    print(f"Predictions: {predictions}")

    # Sort predictions to get top 5
    top_indices = np.argsort(predictions)[-5:][::-1]
    
    # Prepare the top 5 predictions with their class names and percentages
    top_predictions = [(class_names[i], predictions[i] * 100) for i in top_indices]
    for i, (class_name, confidence) in enumerate(top_predictions, 1):
        print(f"{i}. Predicted {class_name} with {confidence:.1f}% Confidence")

    return top_predictions

def capitalize_after_number(input_string):
    # Split the string on the first period
    if ". " in input_string:
        num, text = input_string.split(". ", 1)
        return f"{num}. {text.capitalize()}"
    return input_string
##############################################

#for displaying RAG recipe response
def display_response(response):
    """
    Function to format a JSON response into Streamlit's `st.write()` format.
    """
    if response == "[]" or "":
        st.write("No recipes found :(")
        return
    if isinstance(response, str):
        # Convert JSON string to dictionary if necessary
        response = json.loads(response)

    st.write(f"**Name:** {response['name'].capitalize()}")
    st.write(f"**Preparation Time:** {response['minutes']} minutes")
    st.write(f"**Description:** {response['description'].capitalize()}")
    st.write(f"**Tags:** {', '.join(response['tags'])}")
    st.write("### Ingredients")
    st.write(", ".join([ingredient.capitalize() for ingredient in response['ingredients']]))
    st.write(f"**Total Ingredients:** {response['n_ingredients']}")
    st.write("### Nutrition Information (per serving)")
    st.write(", ".join(response['formatted_nutrition']))
    st.write(f"**Number of Steps:** {response['n_steps']}")
    st.write("### Steps")
    for step in response['formatted_steps']:
        st.write(capitalize_after_number(step))

def display_dishes_in_grid(dishes, cols=3):
    rows = len(dishes) // cols + int(len(dishes) % cols > 0)
    for i in range(rows):
        cols_data = dishes[i*cols:(i+1)*cols]
        cols_list = st.columns(len(cols_data))
        for col, dish in zip(cols_list, cols_data):
            with col:
                st.sidebar.write(dish.replace("_", " ").capitalize())
# #Streamlit

#Left sidebar title
st.sidebar.markdown(
    "<h1 style='font-size:32px;'>Food-Chain</h1>", 
    unsafe_allow_html=True
)
st.sidebar.write("Upload an image and/or enter a query to get started! Explore our trained dish types listed below for guidance.")

st.sidebar.markdown('### Food Classification')
uploaded_image = st.sidebar.file_uploader("Choose an image:", type="jpg")

st.sidebar.markdown('### RAG Recipe')
query = st.sidebar.text_area("Enter your query:", height=100)
recipe_submit = st.sidebar.button(label='Chain Recipe', icon=':material/link:', use_container_width=True)

# gap
st.sidebar.markdown("<br><br>", unsafe_allow_html=True)
st.sidebar.markdown("### Dish Database")
selected_dish = st.sidebar.selectbox(
    "Search for a dish that our model can classify:",
    options=class_names,
    index=0  
)

# Main title
st.title("Welcome to FOOD CHAIN!")
with st.expander("**What is FOOD CHAIN?**"):
    st.markdown(
        """
        The project aims to use machine learning and computer vision techniques to analyze food images 
        and identify them. By using diverse datasets, the model will learn to recognize dishes based on 
        visual features. Our project aims to inform users about what it is they are eating, including 
        potential nutritional value and an AI generated response on how their dish might have been prepared. 
        We want users to have an easy way to figure out what their favorite foods contain, to know any 
        allergens in the food and to better connect to the food around them. This tool can also tell users 
        the calories of their dish, they can figure out the nutrients with only a few steps!

        Thank you for using our project!

        Made by the Classify Crew: [Contact List](https://linktr.ee/classifycrew)
        """
    )

#################

sample_RAG = {
    "name": "Cinnamon Sugar Baked Donuts",
    "minutes": 27,
    "tags": [
        "30-minutes-or-less",
        "time-to-make",
        "course",
        "cuisine",
        "preparation",
        "occasion",
        "north-american",
        "healthy",
        "desserts",
        "american",
        "dietary",
        "comfort-food",
        "taste-mood"
    ],
    "n_steps": 10,
    "description": "A delightful treat with a crusty sugar-cinnamon coating, perfect for a weekend breakfast or snack. Leftovers freeze well.",
    "ingredients": [
        "1 cup flour",
        "1 teaspoon baking powder",
        "1 teaspoon cinnamon",
        "1/2 teaspoon nutmeg",
        "1/4 teaspoon mace",
        "1/4 teaspoon salt",
        "1/2 cup sugar",
        "1 egg",
        "1/2 cup milk",
        "2 tablespoons butter, melted",
        "1 teaspoon vanilla",
        "1/4 cup brown sugar"
    ],
    "n_ingredients": 12,
    "formatted_nutrition": [
        "Calorie : 302.9 per serving",
        "Total Fat : 11.0 % daily value",
        "Sugar : 154.0 % daily value",
        "Sodium : 9.0 % daily value",
        "Protein : 7.0 % daily value",
        "Saturated Fat : 22.0 % daily value",
        "Total Carbohydrate : 18.0 % daily value"
    ],
    "formatted_steps": [
        "1. Mix all dry ingredients in a medium-size bowl",
        "2. In a smaller bowl, beat the egg",
        "3. Mix the egg with milk and melted butter",
        "4. Add vanilla to the mixture",
        "5. Stir the milk mixture into the dry ingredients until just combined, being careful not to overmix",
        "6. Pour the batter into a greased donut baking tin, filling approximately 3/4 full",
        "7. Mix cinnamon into brown sugar and sprinkle over the donuts",
        "8. Drizzle or spoon melted butter over the top of each donut",
        "9. Bake in a 350-degree oven for 17 minutes",
        "10. Enjoy!"
    ]
}

col1, col2 = st.columns(2)
with col1:
    st.title("Image Classification")
    if not uploaded_image and not recipe_submit: 
        placeholder = Image.open("dish-placeholder.jpg")
        st.image(placeholder, caption="Placeholder Image.", use_container_width=True)
        st.header("Top Predictions:")
        st.markdown(f"*Donuts*: 98.1%")
        
    if uploaded_image:
        # Open the image
        input_image = Image.open(uploaded_image)
    
        # Display the image
        st.image(input_image, caption="Uploaded Image.", use_container_width=True)
with col2:
    st.title('RAG Recipe')
    if not recipe_submit:
        display_response(sample_RAG)
        
# Image Classification Section
if recipe_submit and uploaded_image:
    with col1:
        predictions = classifyImage(input_image)
        print("Predictions: ", predictions)

        # graph variables
        fpredictions = ""
        class_names = []
        confidences = []
        
        # Show the top predictions with percentages
        # st.write("Top Predictions:")
        for class_name, confidence in predictions:
            fpredictions += f"{class_name}: {confidence:.1f}%,"
            class_name = class_name.replace("_", " ")
            class_name = class_name.title()
            # st.markdown(f"*{class_name}*: {confidence:.2f}%")
            class_names.append(class_name)
            confidences.append(confidence)

        print(fpredictions)

        #reversing them so graph displays highest predictions at the top
        confidences.reverse()
        class_names.reverse()

        #display as a graph
        norm = plt.Normalize(min(confidences), max(confidences))
        cmap = LinearSegmentedColormap.from_list("grey_orange", ["#808080", "#FFA500"]) #color map grey to orange
        
        fig, ax = plt.subplots(figsize=(12, 6))
        bars = ax.barh(class_names, confidences, color=cmap(norm(confidences)))

        fig.patch.set_alpha(0)  # Transparent background
        ax.set_facecolor('none') 

        min_width = 0.07 * ax.get_xlim()[1]  # 7% of the x-axis range
        # Add labels inside the bars, aligned to the right
        for bar in bars:
            original_width = bar.get_width()
            width = original_width
            if width < min_width:
                width = min_width
            ax.text(width - 0.02, bar.get_y() + bar.get_height()/2, f'{original_width:.1f}%', 
                    va='center', ha='right', color='white', fontweight='bold', fontsize=16)

        ax.set_xticklabels([]) #remove x label
        ax.tick_params(axis='y', colors='white', labelsize=16)
        
        #no borders
        for spine in ax.spines.values():
            spine.set_visible(False)

        ax.set_title(class_names[-1], color='white', fontsize=16, fontweight='bold', ha='center')
        
        st.pyplot(fig) # Display the plot
        
    # call openai to pick the best classification result based on query
    openAICall = [
        SystemMessage(
            content = "You are a helpful assistant that identifies the best match between classified food items and a user's request based on provided classifications and keywords."
        ),
        HumanMessage( 
            content = f"""
                Based on the following image classification with percentages of each food:
                {fpredictions}
                And the following user request:
                {query}
                1. If the user's query relates to any of the classified predictions (even partially or conceptually), select the most relevant dish from the predictions.
                2. If the query does not align with the predictions, disregard them and suggest a dish that best matches the user's query.
                3. Return in the format: [dish]
                4. ONLY return the name of the dish in brackets. 

                Example 1:
                Predictions: apple pie: 50%, cherry tart: 30%, vanilla ice cream: 20%
                User query: pumpkin
                YOUR Response: [pumpkin pie]
                
                Example 2:
                Predictions: spaghetti: 60%, lasagna: 30%, salad: 10%
                User query: pasta with layers
                YOUR Response: [lasagna]
                
                Example 3:
                Predictions: sushi: 70%, sashimi: 20%, ramen: 10%
                User query: noodles
                YOUR Response: [ramen]
            """
        ),
    ]
    with col2:
        if query:
            # Call the OpenAI API
            openAIresponse = llm.invoke(openAICall)
            print("AI CALL RESPONSE: ", openAIresponse.content, "END AI CALL RESONSE")
        
            RAGresponse = get_response(openAIresponse.content + " " + query)
        else:
            RAGresponse = get_response(predictions[0][0])
        print("RAGresponse: ", RAGresponse)
        display_response(RAGresponse)

# elif uploaded_image is not None:
#     with col1:
#         # Open the image
#         input_image = Image.open(uploaded_image)
    
#         # Display the image
#         st.image(input_image, caption="Uploaded Image.", use_container_width=True)
    
#         # Classify the image and display the result
#         predictions = classifyImage(input_image)
#         fpredictions = ""
    
#         # Show the top predictions with percentages
#         st.write("Top Predictions:")
#         for class_name, confidence in predictions:
#             if int(confidence) > 0.05:
#                 fpredictions += f"{class_name}: {confidence:.2f}%,"
#             if int(confidence) > 5:
#                 class_name = class_name.replace("_", " ")
#                 class_name = class_name.title()
#                 st.markdown(f"*{class_name}*: {confidence:.2f}%")
#         print(fpredictions)

# elif recipe_submit:
#     with col2:
#         response = get_response(query)
#         print(response)
#         display_response(response)