In [1]:
import os
import json
import gradio as gr
from llama_index import (
 VectorStoreIndex,
 download_loader,
)
import chromadb

from llama_index.llms import MistralAI
from llama_index.embeddings import MistralAIEmbedding
from llama_index.vector_stores import ChromaVectorStore
from llama_index.storage.storage_context import StorageContext
from llama_index import ServiceContext

In [2]:
from llama_index import Document

In [4]:
title = "Gaia Mistral Chat RAG PDF Demo"
description = "Example of an assistant with Gradio, RAG from PDF documents and Mistral AI via its API"
placeholder = (
 "Vous pouvez me posez une question sur ce contexte, appuyer sur Entrée pour valider"
)
placeholder_url = "Extract text from this url"
llm_model = "mistral-medium"

env_api_key = "Yb2kAF0DR4Mva5AEmoYFV3kYRAKdXB7i"
query_engine = None

# Define LLMs
llm = MistralAI(api_key=env_api_key, model=llm_model)
embed_model = MistralAIEmbedding(model_name="mistral-embed", api_key=env_api_key)

# create client and a new collection
db = chromadb.PersistentClient(path="./chroma_db")
chroma_collection = db.get_or_create_collection("quickstart")

# set up ChromaVectorStore and load in data
vector_store = ChromaVectorStore(chroma_collection=chroma_collection)
storage_context = StorageContext.from_defaults(vector_store=vector_store)
service_context = ServiceContext.from_defaults(
 chunk_size=1024, llm=llm, embed_model=embed_model
)

PDFReader = download_loader("PDFReader")
loader = PDFReader()

index = VectorStoreIndex(
 [], service_context=service_context, storage_context=storage_context
)
query_engine = index.as_query_engine(similarity_top_k=5)

In [171]:
ids = chroma_collection.get()["ids"]
chroma_collection.delete(ids)

In [5]:
pdf_file = "/Users/robertgray/Downloads/rotations_2.pdf"

In [6]:
documents = loader.load_data(file=pdf_file)

In [7]:
for doc in documents:
 index.insert(doc)

In [12]:
response = query_engine.query("What's a typical rotation in Normandy?")

In [13]:
response.response

'In Normandy, a typical rotation in agriculture includes various crops such as maize, forage, and associations of cereals and protein crops. For instance, a common rotation is PT (4-5 years) - maize forage - associations of cerpro (2 years). In systems of livestock farming and polyculture, rotations of type PT 4-5 years (multi-species with predominance RGA, fescue, and white clover) - maize - wheat - fava beans - wheat - associations of cerpro (9-10 years) are also found. The zones of large crops are mainly located in the Orne, in the Perche area near Eure and Loire, where rotations of type PT 2-3 years (violet clover and RG hybrid or alfalfa-dactyl) - wheat – maize grain – associations of cerpro (triticale/forage peas, triticale/fava beans, protein peas/wheat) – fava beans H – wheat – associations of cerpro or pure cereal (buckwheat, rye, spelt) – lupin P – wheat – rye P are common, lasting for 11 years.'

In [14]:
import pandas as pd

In [15]:
production_costs = ""
for i, row in pd.read_excel("/Users/robertgray/Downloads/data_cout_production_grandes_cultures_2021_2025.xlsx").iterrows():
 production_costs += f"En {row['ANNEE']}, le coût de production en moyenne pour {row['CULTURES']} était {row['MOYENNE']} euros par tonne. "

In [191]:
index.insert(Document(text=production_costs))

In [142]:
response = query_engine.query("")
response.response

In [18]:
prices = pd.read_excel("/Users/robertgray/Downloads/data_marches_grandes_cultures_2006_2024.xlsx")

In [19]:
prices.columns=["date", "price", "crop"]

In [20]:
prices_text = ""
for year, price in zip(prices[:15]["date"], prices[:15]["price"]):
 prices_text += f"En {year} le prix moyen de pommes de terre était {price} euros per tonne. "
for year, price in zip(prices[:15]["date"], prices[16:30]["price"]):
 prices_text += f"En {year} le prix moyen de betterave était {price} euros per tonne. "

In [22]:
for crop in prices[30:]["crop"].unique():
 prices_s = prices[prices["crop"] == crop].copy()
 for y, p in prices_s.groupby(pd.to_datetime(prices_s["date"]).dt.year)["price"].mean().round(2).to_dict().items():
 prices_text += f"En {y} le prix moyen de {crop} était {p} euros per tonne. "

In [23]:
index.insert(Document(text=prices_text))

In [30]:
response = query_engine.query("What was the price of peas in 2019?")
response.response

'The price of peas in 2019 was 206.48 euros per tonne.'

In [29]:
response = query_engine.query("""
If I had a plot of peas with a yield of 10t/ha in 2019, and the associated cost was
80 euros, what was my profit?
""")
response.response

"To calculate the profit, we need to know the selling price of the peas. Without that information, it's impossible to determine the profit from growing peas with a yield of 10t/ha and a cost of 80 euros."

In [None]:
response = query_engine.query("""
You just told me the price of peas.
""")
response.response

In [None]:
msg = gr.Textbox(placeholder=placeholder)
clear = gr.ClearButton([msg, chatbot])

def respond(message, chat_history):
 response = query_engine.query(message)
 chat_history.append((message, str(response)))

In [168]:
from datasets import load_dataset

#dataset = load_dataset("team-lfd-sia-pro/data_marches_2006_2024")

In [24]:
import json

In [178]:
with open("/Users/robertgray/Downloads/data_Hauts_de_France.json", "r") as f:
 yield_data = json.load(f)

In [211]:
yield_data

[{'LIB_CODE': "01 - Blé tendre d'hiver et épeautre",
 'SURF_2010': 803341.0,
 'SURF_2011': 842006.0,
 'SURF_2012': 842926.0,
 'SURF_2013': 823520.0,
 'SURF_2014': 845165.0,
 'SURF_2015': 854213.0,
 'SURF_2016': 834800.0,
 'SURF_2017': 813800.0,
 'SURF_2018': 806980.0,
 'SURF_2019': 822220.0,
 'SURF_2020': 763350.0,
 'SURF_2021': 826200.0,
 'SURF_2022': 793972.0,
 'REND_2010': 85.44,
 'REND_2011': 83.53,
 'REND_2012': 81.57,
 'REND_2013': 90.7,
 'REND_2014': 89.57,
 'REND_2015': 97.11,
 'REND_2016': 58.06,
 'REND_2017': 87.26,
 'REND_2018': 84.6,
 'REND_2019': 94.49,
 'REND_2020': 88.08,
 'REND_2021': 82.01,
 'REND_2022': 90.78,
 'PROD_2010': 68634094.0,
 'PROD_2011': 70333994.0,
 'PROD_2012': 68757112.0,
 'PROD_2013': 74690930.0,
 'PROD_2014': 75703724.0,
 'PROD_2015': 82949187.0,
 'PROD_2016': 48468100.0,
 'PROD_2017': 71011100.0,
 'PROD_2018': 68270570.0,
 'PROD_2019': 77688780.0,
 'PROD_2020': 67237130.0,
 'PROD_2021': 67755200.0,
 'PROD_2022': 72080064.0},
 {'LIB_CODE': '02 - Blé t

In [207]:
yield_text = ""
for crop in yield_data:
 if crop.lower() in prices["crop"].str.lower().unique():
 for k in crop.keys():
 if "REND" in k:
 year = k.split("_")[1]
 yield_text += f'Le rendement moyen par hectare pour {crop["LIB_CODE"]} en {year} était {crop[k]}. '

AttributeError: 'dict' object has no attribute 'lower'

In [188]:
index.insert(Document(text=yield_text))

In [192]:
response = query_engine.query("Given the provided context, what would be the expected profit from a field of 10ha growing wheat in 2022?")
response.response

'To answer this question, we first need to identify the relevant data for wheat in the provided context. The rendement moyen par hectare, or average yield per hectare, for 03 - Total blé tendre (01 + 02) in 2022 varies, but we can use the last given value, 90.66, as it is the most recent. \n\nHowever, the context does not provide information about the cost of production or the selling price of the wheat, which are crucial for determining profit. Therefore, it is not possible to provide an exact profit amount based on the given context.'

In [16]:
request = """
You are an agronomical advisor. Your task is to provide an advice to the farmer what to seed in the next year and in which proportion. You will be given the historical information about the farmer. Consider agronomical limitation and provide advice to the farmer to maximize his profit (maximum yield and price)
#facts
The farm area is 111.66ha
It has 28 fields
In 2023 the area of Common Wheat (Winter) was 24.04ha, the area of Maize / Corn was 20.97ha, the area of Soybean was 28.15ha, the area of Gram&Leg mix (> 2 years) was 4.94ha, the area of Sunflower was 7.22ha
Field 1 in 2023 was seeded with Common Wheat (Winter)
Field 2 in 2023 was seeded with Common Wheat (Winter)
Field 5 in 2023 was seeded with Maize / Corn
Field 6 in 2023 was seeded with Common Wheat (Winter)
Field 7 in 2023 was seeded with Common Wheat (Winter)
Field 8 in 2023 was seeded with Soybean
Field 11 in 2023 was seeded with Gram&Leg mix (> 2 years)
Field 12 in 2023 was seeded with Soybean
Field 13 in 2023 was seeded with Sunflower
Field 14 in 2023 was seeded with Common Wheat (Winter)
Field 16 in 2023 was seeded with Soybean
Field 17 in 2023 was seeded with Maize / Corn
Field 19 in 2023 was seeded with Maize / Corn
Field 21 in 2023 was seeded with Common Wheat (Winter)
Field 22 in 2023 was seeded with Soybean
Field 23 in 2023 was seeded with Sunflower
Field 24 in 2023 was seeded with Sunflower
Field 25 in 2023 was seeded with Soybean
Field 27 in 2023 was seeded with Maize / Corn
Field 28 in 2023 was seeded with Soybean
"""

In [204]:
yield_text

''

In [198]:
request_complete = production_costs + yield_text + prices_text + request

In [209]:
request_complete

"En 2021, le coût de production en moyenne pour POMMES DE TERRE CONSO était 150 euros par tonne. En 2022, le coût de production en moyenne pour POMMES DE TERRE CONSO était 185 euros par tonne. En 2023, le coût de production en moyenne pour POMMES DE TERRE CONSO était 210 euros par tonne. En 2024, le coût de production en moyenne pour POMMES DE TERRE CONSO était 200 euros par tonne. En 2025, le coût de production en moyenne pour POMMES DE TERRE CONSO était 185 euros par tonne. En 2021, le coût de production en moyenne pour BETTERAVE était 25 euros par tonne. En 2022, le coût de production en moyenne pour BETTERAVE était 28 euros par tonne. En 2023, le coût de production en moyenne pour BETTERAVE était 35 euros par tonne. En 2024, le coût de production en moyenne pour BETTERAVE était 33 euros par tonne. En 2025, le coût de production en moyenne pour BETTERAVE était 31 euros par tonne. En 2021, le coût de production en moyenne pour POIS DE PRINTEMPS était 260 euros par tonne. En 2022, le 

In [17]:
response = query_engine.query(request)
response.response

'Based on the information provided, I would recommend the following crop rotation plan for the next year to maximize profit:\n\n1. Continue growing Common Wheat (Winter) on Fields 1, 2, 6, 7, 14, and 21. This crop has a high yield and price, and it is already established on these fields.\n2. Consider planting Soybean on Fields 5, 17, 19, and 27. Soybean is a profitable crop and it can help improve soil fertility by fixing nitrogen.\n3. Plant Maize / Corn on Field 12. This field was previously seeded with Soybean, and planting Maize / Corn can help break the disease cycle and improve yield.\n4. Consider planting a Gram & Leg mix on Field 8. This crop can help improve soil fertility and provide a good rotation option for the following year.\n5. Plant Sunflower on Fields 13, 23, and 24. Sunflower is a profitable crop and it can help break the disease cycle and improve yield.\n6. Consider planting a cover crop on Field 11 during the off-season to improve soil health and prevent erosion.\n\

In [195]:
response = query_engine.query("What would be the expected profit for your suggestion?")
response.response

"To provide an answer, I would need to know the selling price for each crop, the cost of each crop per hectare, and the yield per hectare. The context information provided does not include this data. Therefore, I'm unable to calculate the expected profit."