File size: 9,723 Bytes
3b6db3d
 
4b75db9
3b6db3d
 
 
 
 
 
b22953d
4b75db9
3b6db3d
 
 
 
 
bbd44b8
3b6db3d
701d698
 
3b6db3d
 
 
 
0793dc8
3b6db3d
305ae95
3b6db3d
 
 
1771168
 
3b6db3d
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
944e40e
3b6db3d
701d698
 
bbd44b8
701d698
 
 
 
57e7832
b610783
8e8a4e2
79a38a9
701d698
 
 
013e46d
b22953d
013e46d
 
 
 
 
 
 
 
3ee20f8
bbd44b8
 
 
b31c350
 
701d698
bbd44b8
701d698
 
 
b31c350
53a4ff5
701d698
013e46d
3b6db3d
 
 
 
 
4b75db9
 
 
 
3b6db3d
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
63c3d58
 
4b75db9
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
63c3d58
3b6db3d
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
fc9a5fe
 
bd1326b
 
 
 
3b6db3d
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
bad9f43
 
28e5642
707e941
ef096d4
 
bad9f43
 
ef096d4
707e941
3b6db3d
 
701d698
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
import os
import json
import pandas as pd
import gradio as gr
from llama_index import (
    VectorStoreIndex,
    download_loader,
)
import chromadb
import typing_extensions
from llama_index import Document
from llama_index.llms import MistralAI
from llama_index.embeddings import MistralAIEmbedding
from llama_index.vector_stores import ChromaVectorStore
from llama_index.storage.storage_context import StorageContext
from llama_index import ServiceContext
from utils import departments_list, region_list

title = "Team LFD rotation finder app"
description = "Propose a rotation for a farmer"
placeholder = (
    "Vous pouvez me posez une question sur ce contexte, appuyer sur Entrée pour valider"
)
placeholder_url = "Extract text from this url"
llm_model = "mistral-small"

env_api_key = os.environ.get("MISTRAL_API_KEY")
query_engine = None

# Define LLMs
llm = MistralAI(api_key=env_api_key, model=llm_model, temperature = 0.2)
embed_model = MistralAIEmbedding(model_name="mistral-embed", api_key=env_api_key, max_length=5000)

# create client and a new collection
db = chromadb.PersistentClient(path="./chroma_db")
chroma_collection = db.get_or_create_collection("quickstart")

# set up ChromaVectorStore and load in data
vector_store = ChromaVectorStore(chroma_collection=chroma_collection)
storage_context = StorageContext.from_defaults(vector_store=vector_store)
service_context = ServiceContext.from_defaults(
    chunk_size=1024, llm=llm, embed_model=embed_model
)

PDFReader = download_loader("PDFReader")
loader = PDFReader()

index = VectorStoreIndex(
    [], service_context=service_context, storage_context=storage_context
)
query_engine = index.as_query_engine(similarity_top_k=10)

def create_prompt(farmSize, cultures):
    prompt = f"""
    You are a French agronomical advisor, answering in French. Your task is to provide an concise advice as a table of rotation crops (with a prioritary suggestion and an alternative one) to the farmer what to seed in the next year and in which proportion. You will be given the historical information about the farmer, and context data given previously gives you average performances in yield per acre by region and by culture, as well as production costs and selling prices. Consider agronomical limitation and provide advice to the farmer to maximize his profit (maximum yield and revenue -- (the difference between the selling price and the cost of production) times the yield). There are three possible scenarii, pessimistic (lowest revenue), optimistic (highest revenue) and mean.
    #facts
    The farm area is {farmSize} ha. 
    """
    for i, culture in enumerate(cultures):
        prompt += f"Parcel {i+1} most recently grew {culture}."
    prompt += """I need you to answer in French formulating a concise table with the crops you want to grow and by parcel, and predicting gross margin per hectare according to the scenario asked for (mean, pessimistic or optimistic. Default: mean). 
    Réponds en français en formulant un tableau concis avec les cultures que tu veux cultiver et par parcelle, et en prévoyant la marge et le coût par hectare selon le scénario demandé (moyen, pessimiste ou optimiste. Par défaut : moyen).\n"
    """
    return prompt


# Structure of the data sent by the form
InputForm = typing_extensions.TypedDict('InputForm', {
  'department': str,
  'farmSize': float,
  'benefitsFromCommonAgriculturalPolicy': bool,
  'cultures': list[str],
  'yields': dict[str, float]
})

# This function is the API endpoint the web app will use 
def find_my_rotation(department: str, farmSize: float, benefitsFromCommonAgriculturalPolicy: bool, cultures: list[str], yields: dict[str, float]):
    department_name = departments_list.get(department)
    dpt_yield = pd.read_csv(f'data/departments/{department_name}.csv')
    yield_text = ''
    for i, row in dpt_yield.iterrows():
        yield_text += f"Dans le département de {department_name}, la production de {row['Culture'].split('-')[1]} est de {row['mean']} en moyenne, de {row['pessimistic']} avec un scenario pessimiste et de {row['optimistic']} avec un scenario optimiste. "
    # Create the prompt
    index.insert(Document(text=yield_text))
    prompt = create_prompt(farmSize, cultures)
    # Question the model
    response = query_engine.query(prompt)
    prompt = 'Traduis cette réponse en français: ' + response.response
    response = query_engine.query(prompt)
    return response


def get_documents_in_db():
    print("Fetching documents in DB")
    docs = []
    for item in chroma_collection.get(include=["metadatas"])["metadatas"]:
        try:
            docs.append(json.loads(item["_node_content"])["metadata"]["file_name"])
        except:
            pass
    docs = list(set(docs))
    print(f"Found {len(docs)} documents")
    out = "**List of files in db:**\n"
    for d in docs:
        out += " - " + d + "\n"

    return out


def empty_db():
    ids = chroma_collection.get()["ids"]
    chroma_collection.delete(ids)
    return get_documents_in_db()


def load_file(file):
    documents = loader.load_data(file=file)

    for doc in documents:
        index.insert(doc)

    return (
        gr.Textbox(visible=False),
        gr.Textbox(value=f"Document encoded ! You can ask questions", visible=True),
        get_documents_in_db(),
    )

def load_local_data(data_folder): 
    for file in os.listdir(data_folder):
        if file.endswith('.pdf'):
            print('Adding file ' + file + ' to DB')
            documents = loader.load_data(file= data_folder + file)
            for doc in documents:
                index.insert(doc)
        if file.endswith('.txt'):
            print('Adding file ' + file + ' to DB')
            with open(data_folder + file, 'r') as f:
                file_ = f.read()
                index.insert(Document(text=file_))
        if file=='price_by_crop.csv':
            print('Adding file ' + file + ' to DB')
            prices_text = 'The price of some agricultural data is given by this csv: It displays three scenario, a mean, an optimistic, and a pessimistic' + str(pd.read_csv(data_folder + file))
            index.insert(Document(text=prices_text))
        if file=='data_cout_production_grandes_cultures_2021_2025.xlsx':
            production_costs = ""
            for _, row in pd.read_excel(data_folder + file).iterrows():
                if row['ANNEE']==2024:
                    production_costs += f"Le coût de production par tonne en moyenne pour {row['CULTURES']} était {row['MOYENNE']} euros par tonne avec un scénario moyen, {row['QUART INFERIEUR']} pour un scénario optimiste, et {row['QUART SUPERIEUR']} pour un scénario pessimiste. \n"
            print('Adding file ' + file + ' to DB')
            index.insert(Document(text=production_costs))


def load_document(input_file):
    file_name = input_file.name.split("/")[-1]
    return gr.Textbox(value=f"Document loaded: {file_name}", visible=True)


with gr.Blocks() as demo:
    gr.Markdown(
        """ # Welcome to Gaia Level 3 Demo 
    
        Add a file before interacting with the Chat.
        This demo allows you to interact with a pdf file and then ask questions to Mistral APIs.
        Mistral will answer with the context extracted from your uploaded file.
        *The files will stay in the database unless there is 48h of inactivty or you re-build the space.*
        """
    )

    gr.Markdown(""" ### 1 / Extract data from PDF """)

    with gr.Row():
        with gr.Column():
            input_file = gr.File(
                label="Load a pdf",
                file_types=[".pdf"],
                file_count="single",
                type="filepath",
                interactive=True,
            )
            file_msg = gr.Textbox(
                label="Loaded documents:", container=False, visible=False
            )
            input_file.upload(
                fn=load_document,
                inputs=[
                    input_file,
                ],
                outputs=[file_msg],
                concurrency_limit=20,
            )

            load_local_data('data/')
            load_local_data('data/pdf/')
            help_msg = gr.Markdown(
                value="Once the document is loaded, press the Encode button below to add it to the db."
            )

            file_btn = gr.Button(value="Encode file ✅", interactive=True)
            btn_msg = gr.Textbox(container=False, visible=False)

            with gr.Row():
                db_list = gr.Markdown(value=get_documents_in_db)
                delete_btn = gr.Button(value="Empty db 🗑️", interactive=True, scale=0)

            file_btn.click(
                load_file,
                inputs=[input_file],
                outputs=[file_msg, btn_msg, db_list],
                show_progress="full",
            )
            delete_btn.click(empty_db, outputs=[db_list], show_progress="minimal")

    gr.Markdown(""" ### 2 / Ask a question about this context """)

    chatbot = gr.Chatbot()
    msg = gr.Textbox(placeholder=placeholder)
    clear = gr.ClearButton([msg, chatbot])

    def respond(message, chat_history):
        response = query_engine.query(message)
        chat_history.append((message, str(response)))
        return chat_history

    msg.submit(respond, [msg, chatbot], [chatbot])

    # Terrible terrible terrible way of handling this
    # but we don't have much time left
    invisible_output = gr.Textbox(visible=True)
    invisible_btn = gr.Button(visible=False)
    invisible_btn.click(
        find_my_rotation, 
        inputs=[gr.Textbox(), gr.Number(), gr.Checkbox(), gr.List(), gr.List()],
        outputs=[invisible_output]
    )

demo.title = title

demo.launch()