|
import tensorflow.compat.v1 as tf |
|
import os |
|
import shutil |
|
import csv |
|
import sys |
|
import pandas as pd |
|
import numpy as np |
|
import IPython |
|
import streamlit as st |
|
|
|
from itertools import islice |
|
import random |
|
|
|
from transformers import TapasTokenizer, TapasForQuestionAnswering |
|
|
|
tf.get_logger().setLevel('ERROR') |
|
|
|
model_name = 'google/tapas-base-finetuned-wtq' |
|
|
|
|
|
|
|
model = TapasForQuestionAnswering.from_pretrained(model_name, local_files_only=False) |
|
tokenizer = TapasTokenizer.from_pretrained(model_name) |
|
|
|
st.set_option('deprecation.showfileUploaderEncoding', False) |
|
|
|
st.title('Query your Table') |
|
st.header('Upload CSV file') |
|
|
|
uploaded_file = st.file_uploader("Choose your CSV file",type = 'csv') |
|
placeholder = st.empty() |
|
|
|
if uploaded_file is not None: |
|
data = pd.read_csv(uploaded_file) |
|
data.replace(',','', regex=True, inplace=True) |
|
if st.checkbox('Want to see the data?'): |
|
placeholder.dataframe(data) |
|
|
|
st.header('Enter your queries') |
|
input_queries = st.text_input('Type your queries separated by comma(,)',value='') |
|
input_queries = input_queries.split(',') |
|
|
|
colors1 = ["#"+''.join([random.choice('0123456789ABCDEF') for j in range(6)]) for i in range(len(input_queries))] |
|
colors2 = ['background-color:'+str(color)+'; color: black' for color in colors1] |
|
|
|
def styling_specific_cell(x,tags,colors): |
|
df_styler = pd.DataFrame('', index=x.index, columns=x.columns) |
|
for idx,tag in enumerate(tags): |
|
for r,c in tag: |
|
df_styler.iloc[r, c] = colors[idx] |
|
return df_styler |
|
|
|
if st.button('Predict Answers'): |
|
with st.spinner('It will take approx a minute'): |
|
table = data.astype(str) |
|
inputs = tokenizer(table=table , queries=input_queries, padding='max_length',truncation=True, return_tensors="pt") |
|
outputs = model(**inputs) |
|
|
|
predicted_answer_coordinates, predicted_aggregation_indices = tokenizer.convert_logits_to_predictions( inputs, outputs.logits.detach(), outputs.logits_aggregation.detach()) |
|
|
|
id2aggregation = {0: "NONE", 1: "SUM", 2: "AVERAGE", 3:"COUNT"} |
|
aggregation_predictions_string = [id2aggregation[x] for x in predicted_aggregation_indices] |
|
|
|
answers = [] |
|
|
|
for coordinates in predicted_answer_coordinates: |
|
if len(coordinates) == 1: |
|
|
|
answers.append(table.iat[coordinates[0]]) |
|
else: |
|
|
|
cell_values = [] |
|
for coordinate in coordinates: |
|
cell_values.append(table.iat[coordinate]) |
|
answers.append(", ".join(cell_values)) |
|
|
|
st.success('Done! Please check below the answers and its cells highlighted in table above') |
|
|
|
placeholder.dataframe(data.style.apply(styling_specific_cell,tags=predicted_answer_coordinates,colors=colors2,axis=None)) |
|
|
|
for query, answer, predicted_agg, c in zip(input_queries, answers, aggregation_predictions_string, colors1): |
|
st.write('\n') |
|
st.markdown('<font color={} size=4>**{}**</font>'.format(c,query), unsafe_allow_html=True) |
|
st.write('\n') |
|
|
|
if predicted_agg == "NONE" or predicted_agg == 'COUNT': |
|
st.markdown('**>** '+str(answer)) |
|
else: |
|
|
|
|
|
if predicted_agg == 'SUM': |
|
st.markdown('**>** '+str(sum(list(map(float,answer.split(',')))))) |
|
else: |
|
st.markdown('**>** '+str(np.round(np.mean(list(map(float,answer.split(',')))),2))) |