Infosynth / app.py
Umang-Bansal's picture
Upload 2 files
bb1b68b verified
import streamlit as st
import pandas as pd
from functions import *
from dotenv import load_dotenv
load_dotenv()
def initialize_session_state():
if 'processing_complete' not in st.session_state:
st.session_state['processing_complete'] = False
if 'results_df' not in st.session_state:
st.session_state['results_df'] = None
if 'output_choice' not in st.session_state:
st.session_state['output_choice'] = "Download CSV"
initialize_session_state()
def main():
st.title("InfoSynth")
df = None
# File upload section
st.header("1. Upload Your Data")
data_source = st.radio("Choose a data source:", ["CSV File", "Google Sheet"])
if data_source == "CSV File":
uploaded_file = st.file_uploader("Choose a CSV file", type=['csv'])
if uploaded_file is not None:
df = pd.read_csv(uploaded_file)
else:
st.info(
"Before proceeding, ensure your Google Sheet is shared with the service account. "
"You can find the service account email in your credentials.json file."
)
spreadsheet_id = st.text_input(
"Enter Google Spreadsheet ID",
help="You can find this in the spreadsheet URL between /d/ and /edit"
)
sheet_names = None
if spreadsheet_id:
try:
sheet_names = get_all_sheet_names(spreadsheet_id)
if not sheet_names:
st.error("No sheets found in this spreadsheet. Please check the ID and permissions.")
except ValueError as e:
st.error(f"Error accessing spreadsheet: {str(e)}")
st.info("Please check the ID and permissions.")
except Exception as e:
st.error(f"Error accessing spreadsheet: {str(e)}")
sheet_names = []
sheet_name = None
if sheet_names:
sheet_name = st.selectbox(
"Select Sheet Name",
options=sheet_names,
help="The name of the specific sheet to read from"
)
if spreadsheet_id and sheet_name:
try:
df = load_google_sheet(spreadsheet_id, sheet_name)
if df is None or df.empty:
st.error("No data found in the selected sheet.")
except Exception as e:
st.error(f"Error loading sheet data: {str(e)}")
df = None
if df is not None:
try:
# Display available columns for selection
st.header("2. Select Primary Column")
primary_column = st.selectbox(
"Choose the main column for analysis:",
options=df.columns.tolist()
)
# Show data preview
st.header("3. Data Preview")
st.write("First 5 rows of your data:")
st.dataframe(df.head())
# Add Query Template Section
st.header("4. Query Template")
st.write(f"""
Create your query template using {primary_column} as a placeholder.
Example: "What products does {primary_column} offer?"
""")
query_template = st.text_area(
"Enter your query template:",
value=f"Tell me about {{{primary_column}}}",
help=f"Use {{{primary_column}}} as a placeholder"
)
# Preview generated queries
#if st.button("Preview Generated Queries"):
# st.subheader("Generated Queries Preview")
# # Get first 5 values from the selected column
# sample_values = df[primary_column].head()
#
# # Display example queries
# for value in sample_values:
# generated_query = query_template.replace(
# f"{{{primary_column}}}", str(value)
# )
# st.write(f"- {generated_query}")
#
# # Show total number of queries that will be generated
# st.info(f"Total queries to be generated: {len(df)}")
# Add confirmation and processing section
st.header("5. Process Queries")
total_queries = len(df[primary_column])
estimated_time = total_queries * 2 # 2 second per query due to rate limiting
st.warning(f"""
⚠️ Please confirm:
- Number of queries to process: {total_queries}
- Estimated processing time: {estimated_time} seconds ({estimated_time/60:.1f} minutes)
- This will use {total_queries} API calls
""")
# Show sample of what will be processed
#st.subheader("Sample of data to be processed:")
#sample_df = df[[primary_column]].head()
#st.dataframe(sample_df)
# Process button with confirmation
if st.button("Start Processing"):
with st.spinner("Processing queries..."):
# Add a progress bar
progress_bar = st.progress(0)
results = []
llm = setup_llm()
for index, row in df.iterrows():
try:
value = row[primary_column]
# Handle empty/null values
if pd.isna(value) or str(value).strip() == '':
results.append({
'input_value': value,
'result': 'NA'
})
continue
query = query_template.replace(f"{{{primary_column}}}", str(value))
# Display current processing item
st.text(f"Processing: {value}")
# Process query
result = process_queries(pd.DataFrame([row]), primary_column, query)
output = process_with_ai(result, query, llm)
results.append({
'input_value': value,
'result': output.content
})
# Update progress
progress_bar.progress((index + 1) / total_queries)
except Exception as e:
st.error(f"Error processing {value}: {str(e)}")
continue
# Show completion and results
st.session_state['processing_complete'] = True
st.session_state['results_df'] = pd.DataFrame(results, columns=['input_value', 'result'])
# Show results and save options if processing is complete
if st.session_state['processing_complete']:
st.success(f"✅ Completed processing {len(st.session_state['results_df'])} queries!")
st.subheader("Results Preview:")
st.dataframe(st.session_state['results_df'].head())
st.header("6. Save Results")
output_choice = st.radio("Choose an output format:", ["Download CSV", "Update Google Sheet"])
if output_choice == "Download CSV":
csv = st.session_state['results_df'].to_csv(index=False)
if st.download_button(
"Download Complete Results (CSV)",
csv,
"search_results.csv",
"text/csv",
key='download-csv'
):
st.success("✅ File downloaded successfully!")
elif output_choice == "Update Google Sheet":
update_button = st.button("Confirm Update to Google Sheet")
if update_button:
try:
write_to_google_sheet(spreadsheet_id, sheet_name, st.session_state['results_df'])
st.success("✅ Results successfully added as new column!")
except Exception as e:
st.error(f"Error updating sheet: {str(e)}")
except Exception as e:
st.error(f"Error processing the file: {str(e)}")
if __name__ == "__main__":
main()