Infosynth / functions.py
Umang-Bansal's picture
Update functions.py
4f30460 verified
import streamlit as st
import pandas as pd
import time
from typing import List, Dict
from serpapi import GoogleSearch
from langchain_groq import ChatGroq
from langchain.prompts import PromptTemplate
import gspread
from google.oauth2.service_account import Credentials
import pandas as pd
import os
credentials = {
"type": os.getenv("type"),
"project_id": os.getenv("project_id"),
"private_key_id": os.getenv("private_key_id"),
"private_key": os.getenv("private_key"),
"client_email": os.getenv("client_email"),
"client_id": os.getenv("client_id"),
"client_x509_cert_url": os.getenv("client_x509_cert_url"),
"universe_domain": os.getenv("universe_domain"),
"auth_uri": os.getenv("auth_uri"),
"token_uri": os.getenv("token_uri"),
"auth_provider_x509_cert_url": os.getenv("auth_provider_x509_cert_url")
}
def get_sheet_client():
"""Helper function to create authenticated Google Sheets client"""
try:
scope = ["https://www.googleapis.com/auth/spreadsheets"]
creds = Credentials.from_service_account_info(credentials)
client = gspread.authorize(creds)
# Get service account email for error messages
service_account_email = creds.service_account_email
st.session_state['service_account_email'] = service_account_email
return client
except FileNotFoundError:
raise ValueError(
"credentials.json file not found. Please ensure it exists in the project directory."
)
except Exception as e:
raise ValueError(f"Error setting up Google Sheets client: {str(e)}")
def get_worksheet(sheet_id: str, range_name: str = None):
"""Helper function to get worksheet with improved error handling"""
try:
client = get_sheet_client()
sheet = client.open_by_key(sheet_id)
return sheet.worksheet(range_name) if range_name else sheet
except gspread.exceptions.SpreadsheetNotFound:
service_email = st.session_state.get('service_account_email', 'the service account')
raise ValueError(
f"Spreadsheet not found. Please verify:\n"
f"1. The spreadsheet ID is correct\n"
f"2. The sheet is shared with {service_email}\n"
f"3. Sharing permissions allow edit access"
)
except gspread.exceptions.WorksheetNotFound:
raise ValueError(f"Worksheet '{range_name}' not found in the spreadsheet")
except gspread.exceptions.APIError as e:
if 'PERMISSION_DENIED' in str(e):
service_email = st.session_state.get('service_account_email', 'the service account')
raise ValueError(
f"Permission denied. Please share the spreadsheet with {service_email} "
f"and ensure it has edit access."
)
raise ValueError(f"Google Sheets API error: {str(e)}")
def process_queries(df: pd.DataFrame, primary_column: str, query_template: str) -> List[Dict]:
results = []
serpapi_key = os.getenv("SERPAPI_API_KEY")
for index, row in df.iterrows():
try:
value = row[primary_column]
query = query_template.replace(f"{{{primary_column}}}", str(value))
# Perform search
search = GoogleSearch({
"q": query,
"gl": "in",
"api_key": serpapi_key,
"num": 5
})
search_results = search.get_dict()
# Store results
results.append({
primary_column: value,
"query": query,
"search_results": search_results.get("organic_results", [])
})
# Rate limiting
time.sleep(1)
if index % 10 == 0:
st.write(f"Processed {index + 1} queries...")
except Exception as e:
st.warning(f"Error processing query for {value}: {str(e)}")
continue
return results
def setup_llm():
"""Setup LangChain with Groq"""
api_key=os.getenv("GROQ_API_KEY")
llm = ChatGroq(
api_key=api_key,
model="llama-3.1-8b-instant",
temperature=0,
max_tokens=None,
timeout=None,
max_retries=2,
)
return llm
def process_with_ai(search_results: dict, query: str, llm) -> str:
template = """
Extract ONLY the specific information requested from the search results for: {query}
Search Results:
{search_results}
Provide ONLY the extracted information as a simple text response.
If multiple items exist, separate them with semicolons.
If no relevant information is found, respond with "Not found".
For example:
- If asked for locations: "Bengaluru; Mumbai; Delhi"
- If asked for email: "[email protected]"
- If asked for address: "123 Main Street, City, Country"
"""
prompt = PromptTemplate(
input_variables=["query", "search_results"],
template=template
)
chain = prompt | llm
response = chain.invoke({"query": query, "search_results": search_results})
return response
def load_google_sheet(sheet_id: str, range_name: str) -> pd.DataFrame:
worksheet = get_worksheet(sheet_id,range_name)
data = worksheet.get_all_records()
return pd.DataFrame(data)
def write_to_google_sheet(sheet_id: str, range_name: str, results_df: pd.DataFrame):
worksheet = get_worksheet(sheet_id, range_name)
all_values = worksheet.get_all_values()
num_rows = len(all_values)
next_col_num = len(all_values[0]) + 1
next_col_letter = chr(64 + next_col_num)
range = f'{next_col_letter}1:{next_col_letter}{num_rows}'
values = [['AI Results']] + [[str(result)] for result in results_df['result']]
worksheet.update(values, f'{range}')
def get_all_sheet_names(sheet_id: str) -> List[str]:
worksheet = get_worksheet(sheet_id)
sheets = map(lambda x: x.title, worksheet.worksheets())
return list(sheets)