import streamlit as st import pandas as pd from functions import * from dotenv import load_dotenv load_dotenv() def initialize_session_state(): if 'processing_complete' not in st.session_state: st.session_state['processing_complete'] = False if 'results_df' not in st.session_state: st.session_state['results_df'] = None if 'output_choice' not in st.session_state: st.session_state['output_choice'] = "Download CSV" initialize_session_state() def main(): st.title("InfoSynth") df = None # File upload section st.header("1. Upload Your Data") data_source = st.radio("Choose a data source:", ["CSV File", "Google Sheet"]) if data_source == "CSV File": uploaded_file = st.file_uploader("Choose a CSV file", type=['csv']) if uploaded_file is not None: df = pd.read_csv(uploaded_file) else: st.info( "Before proceeding, ensure your Google Sheet is shared with the service account. " "You can find the service account email in your credentials.json file." ) spreadsheet_id = st.text_input( "Enter Google Spreadsheet ID", help="You can find this in the spreadsheet URL between /d/ and /edit" ) sheet_names = None if spreadsheet_id: try: sheet_names = get_all_sheet_names(spreadsheet_id) if not sheet_names: st.error("No sheets found in this spreadsheet. Please check the ID and permissions.") except ValueError as e: st.error(f"Error accessing spreadsheet: {str(e)}") st.info("Please check the ID and permissions.") except Exception as e: st.error(f"Error accessing spreadsheet: {str(e)}") sheet_names = [] sheet_name = None if sheet_names: sheet_name = st.selectbox( "Select Sheet Name", options=sheet_names, help="The name of the specific sheet to read from" ) if spreadsheet_id and sheet_name: try: df = load_google_sheet(spreadsheet_id, sheet_name) if df is None or df.empty: st.error("No data found in the selected sheet.") except Exception as e: st.error(f"Error loading sheet data: {str(e)}") df = None if df is not None: try: # Display available columns for selection st.header("2. Select Primary Column") primary_column = st.selectbox( "Choose the main column for analysis:", options=df.columns.tolist() ) # Show data preview st.header("3. Data Preview") st.write("First 5 rows of your data:") st.dataframe(df.head()) # Add Query Template Section st.header("4. Query Template") st.write(f""" Create your query template using {primary_column} as a placeholder. Example: "What products does {primary_column} offer?" """) query_template = st.text_area( "Enter your query template:", value=f"Tell me about {{{primary_column}}}", help=f"Use {{{primary_column}}} as a placeholder" ) # Preview generated queries #if st.button("Preview Generated Queries"): # st.subheader("Generated Queries Preview") # # Get first 5 values from the selected column # sample_values = df[primary_column].head() # # # Display example queries # for value in sample_values: # generated_query = query_template.replace( # f"{{{primary_column}}}", str(value) # ) # st.write(f"- {generated_query}") # # # Show total number of queries that will be generated # st.info(f"Total queries to be generated: {len(df)}") # Add confirmation and processing section st.header("5. Process Queries") total_queries = len(df[primary_column]) estimated_time = total_queries * 2 # 2 second per query due to rate limiting st.warning(f""" ⚠️ Please confirm: - Number of queries to process: {total_queries} - Estimated processing time: {estimated_time} seconds ({estimated_time/60:.1f} minutes) - This will use {total_queries} API calls """) # Show sample of what will be processed #st.subheader("Sample of data to be processed:") #sample_df = df[[primary_column]].head() #st.dataframe(sample_df) # Process button with confirmation if st.button("Start Processing"): with st.spinner("Processing queries..."): # Add a progress bar progress_bar = st.progress(0) results = [] llm = setup_llm() for index, row in df.iterrows(): try: value = row[primary_column] # Handle empty/null values if pd.isna(value) or str(value).strip() == '': results.append({ 'input_value': value, 'result': 'NA' }) continue query = query_template.replace(f"{{{primary_column}}}", str(value)) # Display current processing item st.text(f"Processing: {value}") # Process query result = process_queries(pd.DataFrame([row]), primary_column, query) output = process_with_ai(result, query, llm) results.append({ 'input_value': value, 'result': output.content }) # Update progress progress_bar.progress((index + 1) / total_queries) except Exception as e: st.error(f"Error processing {value}: {str(e)}") continue # Show completion and results st.session_state['processing_complete'] = True st.session_state['results_df'] = pd.DataFrame(results, columns=['input_value', 'result']) # Show results and save options if processing is complete if st.session_state['processing_complete']: st.success(f"✅ Completed processing {len(st.session_state['results_df'])} queries!") st.subheader("Results Preview:") st.dataframe(st.session_state['results_df'].head()) st.header("6. Save Results") output_choice = st.radio("Choose an output format:", ["Download CSV", "Update Google Sheet"]) if output_choice == "Download CSV": csv = st.session_state['results_df'].to_csv(index=False) if st.download_button( "Download Complete Results (CSV)", csv, "search_results.csv", "text/csv", key='download-csv' ): st.success("✅ File downloaded successfully!") elif output_choice == "Update Google Sheet": update_button = st.button("Confirm Update to Google Sheet") if update_button: try: write_to_google_sheet(spreadsheet_id, sheet_name, st.session_state['results_df']) st.success("✅ Results successfully added as new column!") except Exception as e: st.error(f"Error updating sheet: {str(e)}") except Exception as e: st.error(f"Error processing the file: {str(e)}") if __name__ == "__main__": main()