|
import base64 |
|
import json |
|
import uuid |
|
|
|
import pandas as pd |
|
import openpyxl |
|
from openpyxl.chart import BarChart, Reference, PieChart |
|
from openpyxl.chart.label import DataLabelList |
|
from openpyxl.utils.dataframe import dataframe_to_rows |
|
from datetime import datetime |
|
import matplotlib.pyplot as plt |
|
import gradio as gr |
|
import tempfile |
|
from huggingface_hub import InferenceClient, hf_hub_url |
|
import os |
|
|
|
import matplotlib |
|
matplotlib.use('Agg') |
|
|
|
|
|
def read_excel_data(file): |
|
df = pd.read_excel(file, usecols="A, B, C, D, E", skiprows=1, |
|
names=["ID", "Review Date", "Option", "Review", "ReviewScore"], engine='openpyxl') |
|
df['Review Date'] = pd.to_datetime(df['Review Date']).dt.tz_localize(None).dt.date |
|
df['Year-Month'] = df['Review Date'].astype(str).str.slice(0, 7) |
|
df['Year'] = df['Review Date'].astype(str).str.slice(0, 4) |
|
df['Month'] = df['Review Date'].astype(str).str.slice(5, 7) |
|
df['Day'] = df['Review Date'].astype(str).str.slice(8, 10) |
|
df['Option'] = df['Option'].astype(str) |
|
df['Option1'] = df['Option'].str.split(" / ").str[0] |
|
df['Review Length'] = df['Review'].str.len() |
|
return df |
|
|
|
|
|
|
|
def analyze_data(df): |
|
monthly_data = df.groupby('Year-Month').size().reset_index(name='Counts') |
|
yearly_data = df.groupby('Year').size().reset_index(name='Counts') |
|
return monthly_data, yearly_data |
|
|
|
|
|
def analyze_monthly_data_for_year(df, selected_year): |
|
monthly_data_for_year = df[df['Year'] == selected_year].groupby('Month').size().reset_index(name='Counts') |
|
all_months = pd.DataFrame([f"{m:02d}" for m in range(1, 13)], columns=['Month']) |
|
monthly_trend_for_year = pd.merge(all_months, monthly_data_for_year, on='Month', how='left') |
|
monthly_trend_for_year['Counts'] = monthly_trend_for_year['Counts'].fillna(0).astype(int) |
|
return monthly_trend_for_year |
|
|
|
|
|
def analyze_daily_data(df, selected_year): |
|
start_date = datetime.strptime(f"{selected_year}-01-01", "%Y-%m-%d").date() |
|
end_date = datetime.strptime(f"{selected_year}-12-31", "%Y-%m-%d").date() |
|
date_range = pd.date_range(start=start_date, end=end_date).date |
|
daily_data = df[df['Year'] == selected_year].groupby('Review Date').size().reset_index(name='Counts') |
|
daily_data['Review Date'] = pd.to_datetime(daily_data['Review Date']).dt.date |
|
all_dates_df = pd.DataFrame(date_range, columns=['Review Date']) |
|
all_dates_df['Review Date'] = pd.to_datetime(all_dates_df['Review Date']).dt.date |
|
merged_data = pd.merge(all_dates_df, daily_data, on='Review Date', how='left') |
|
merged_data['Counts'] = merged_data['Counts'].fillna(0).astype(int) |
|
return merged_data |
|
|
|
|
|
def analyze_option_data(df): |
|
data_counts = df['Option1'].value_counts().reset_index() |
|
data_counts.columns = ['Option', 'Counts'] |
|
total_counts = data_counts['Counts'].sum() |
|
data_counts['Percentage'] = (data_counts['Counts'] / total_counts * 100).round(2) |
|
data_counts.sort_values(by='Counts', ascending=False, inplace=True) |
|
return data_counts |
|
|
|
|
|
def analyze_option_review_data(df): |
|
df["Option1"] = df["Option"].apply(lambda x: x.split(" / ")[0] if isinstance(x, str) else x) |
|
df["Option2"] = df["Option"].apply( |
|
lambda x: x.split(" / ")[1] if isinstance(x, str) and len(x.split(" / ")) > 1 else "") |
|
review_counts = df.groupby(["Option1", "Option2"])["ReviewScore"].value_counts().unstack(fill_value=0) |
|
review_counts["Total"] = review_counts.sum(axis=1) |
|
option1_counts = df.groupby("Option1")["Option"].count() |
|
option2_counts = df.groupby(["Option1", "Option2"])["Option"].count() |
|
review_counts["์ต์
๋ช
(1์ฐจ)๊ฑด์"] = review_counts.index.get_level_values("Option1").map(option1_counts) |
|
review_counts["์ต์
๋ช
(2์ฐจ)๊ฑด์"] = option2_counts |
|
review_counts.sort_values(by=["์ต์
๋ช
(1์ฐจ)๊ฑด์", "์ต์
๋ช
(2์ฐจ)๊ฑด์"], ascending=[False, False], inplace=True) |
|
return review_counts |
|
|
|
|
|
def analyze_option_data_for_year(df, selected_year): |
|
df_year = df[df['Year'] == selected_year] |
|
data_counts = df_year['Option1'].value_counts().reset_index() |
|
data_counts.columns = ['Option', 'Counts'] |
|
total_counts = data_counts['Counts'].sum() |
|
data_counts['Percentage'] = (data_counts['Counts'] / total_counts * 100).round(2) |
|
data_counts.sort_values(by='Counts', ascending=False, inplace=True) |
|
return data_counts |
|
|
|
|
|
def analyze_option_review_data_for_year(df, selected_year): |
|
df_year = df[df['Year'] == selected_year].copy() |
|
df_year.loc[:, "Option1"] = df_year["Option"].apply(lambda x: x.split(" / ")[0] if isinstance(x, str) else x) |
|
df_year.loc[:, "Option2"] = df_year["Option"].apply( |
|
lambda x: x.split(" / ")[1] if isinstance(x, str) and len(x.split(" / ")) > 1 else "") |
|
review_counts = df_year.groupby(["Option1", "Option2"])["ReviewScore"].value_counts().unstack(fill_value=0) |
|
review_counts["Total"] = review_counts.sum(axis=1) |
|
option1_counts = df_year.groupby("Option1")["Option"].count() |
|
option2_counts = df_year.groupby(["Option1", "Option2"])["Option"].count() |
|
review_counts["์ต์
๋ช
(1์ฐจ)๊ฑด์"] = review_counts.index.get_level_values("Option1").map(option1_counts) |
|
review_counts["์ต์
๋ช
(2์ฐจ)๊ฑด์"] = option2_counts |
|
review_counts.sort_values(by=["์ต์
๋ช
(1์ฐจ)๊ฑด์", "์ต์
๋ช
(2์ฐจ)๊ฑด์"], ascending=[False, False], inplace=True) |
|
return review_counts |
|
|
|
|
|
def extract_longest_reviews(df): |
|
longest_reviews = df.groupby('ReviewScore').apply( |
|
lambda x: x.nlargest(100, 'Review Length', keep='all')).reset_index(drop=True) |
|
return longest_reviews.drop( |
|
columns=['Review Length', 'Year-Month', 'Year', 'Month', 'Day', 'Option1', 'Option2']) |
|
|
|
|
|
def save_to_excel(original_data, monthly_counts, yearly_counts, monthly_trend, daily_counts, option_counts, |
|
review_counts, selected_option_counts, selected_review_counts, longest_reviews): |
|
wb = openpyxl.Workbook() |
|
|
|
|
|
ws_original = wb.active |
|
ws_original.title = "์๋ณธ๋ฆฌ๋ทฐ๋ฐ์ดํฐ" |
|
for r in dataframe_to_rows(original_data, index=False, header=True): |
|
ws_original.append(r) |
|
ws_original.sheet_properties.tabColor = "000000" |
|
|
|
|
|
ws1 = wb.create_sheet(title="์ ์ฒด์๋ณ์ถ์ด(๋ฆฌ๋ทฐ๋ถ์)") |
|
for r in dataframe_to_rows(monthly_counts, index=False, header=True): |
|
ws1.append(r) |
|
chart1 = BarChart() |
|
chart1.type = "col" |
|
chart1.style = 10 |
|
chart1.title = "Monthly Review Trends" |
|
chart1.y_axis.title = 'Review Counts' |
|
chart1.x_axis.title = 'Year-Month' |
|
data1 = Reference(ws1, min_col=2, min_row=1, max_row=ws1.max_row, max_col=2) |
|
cats1 = Reference(ws1, min_col=1, min_row=2, max_row=ws1.max_row) |
|
chart1.add_data(data1, titles_from_data=True) |
|
chart1.set_categories(cats1) |
|
chart1.width = 30 |
|
chart1.height = 15 |
|
ws1.add_chart(chart1, "C2") |
|
ws1.sheet_properties.tabColor = "FFA500" |
|
|
|
|
|
ws2 = wb.create_sheet(title="๋
๋๋ณ์ถ์ด(๋ฆฌ๋ทฐ๋ถ์)") |
|
for r in dataframe_to_rows(yearly_counts, index=False, header=True): |
|
ws2.append(r) |
|
chart2 = BarChart() |
|
chart2.type = "col" |
|
chart2.style = 10 |
|
chart2.title = "Yearly Review Trends" |
|
chart2.y_axis.title = 'Review Counts' |
|
chart2.x_axis.title = 'Year' |
|
data2 = Reference(ws2, min_col=2, min_row=1, max_row=ws2.max_row, max_col=2) |
|
cats2 = Reference(ws2, min_col=1, min_row=2, max_row=ws2.max_row) |
|
chart2.add_data(data2, titles_from_data=True) |
|
chart2.set_categories(cats2) |
|
chart2.width = 30 |
|
chart2.height = 15 |
|
ws2.add_chart(chart2, "C2") |
|
ws2.sheet_properties.tabColor = "FFA500" |
|
|
|
|
|
ws3 = wb.create_sheet(title="์ ํํ ๋
๋ ์๋ณ์ถ์ด(๋ฆฌ๋ทฐ๋ถ์)") |
|
for r in dataframe_to_rows(monthly_trend, index=False, header=True): |
|
ws3.append(r) |
|
chart3 = BarChart() |
|
chart3.type = "col" |
|
chart3.style = 10 |
|
chart3.title = "Monthly Trends for Selected Year" |
|
chart3.y_axis.title = 'Review Counts' |
|
chart3.x_axis.title = 'Month' |
|
data3 = Reference(ws3, min_col=2, min_row=1, max_row=ws3.max_row, max_col=2) |
|
cats3 = Reference(ws3, min_col=1, min_row=2, max_row=ws3.max_row) |
|
chart3.add_data(data3, titles_from_data=True) |
|
chart3.set_categories(cats3) |
|
chart3.width = 30 |
|
chart3.height = 15 |
|
ws3.add_chart(chart3, "C2") |
|
ws3.sheet_properties.tabColor = "FFA500" |
|
|
|
|
|
ws4 = wb.create_sheet(title="์ ํํ ๋
๋ ์ผ๋ณ์ถ์ด(๋ฆฌ๋ทฐ๋ถ์)") |
|
for r in dataframe_to_rows(daily_counts, index=False, header=True): |
|
ws4.append(r) |
|
chart4 = BarChart() |
|
chart4.type = "col" |
|
chart4.style = 10 |
|
chart4.title = "Daily Trends for Selected Year" |
|
chart4.y_axis.title = 'Review Counts' |
|
chart4.x_axis.title = 'Date' |
|
data4 = Reference(ws4, min_col=2, min_row=2, max_row=ws4.max_row + 1, max_col=2) |
|
cats4 = Reference(ws4, min_col=1, min_row=2, max_row=ws4.max_row + 1) |
|
chart4.add_data(data4, titles_from_data=True) |
|
chart4.set_categories(cats4) |
|
chart4.width = 50 |
|
chart4.height = 15 |
|
ws4.add_chart(chart4, "C2") |
|
ws4.sheet_properties.tabColor = "FFA500" |
|
|
|
|
|
ws5 = wb.create_sheet(title="์ต์
๋ถ์ ๊ฒฐ๊ณผ(์ต์
๋ถ์)") |
|
for r in dataframe_to_rows(option_counts, index=False, header=True): |
|
ws5.append(r) |
|
bar_chart = BarChart() |
|
data = Reference(ws5, min_col=2, min_row=2, max_row=ws5.max_row, max_col=2) |
|
cats = Reference(ws5, min_col=1, min_row=2, max_row=ws5.max_row, max_col=1) |
|
bar_chart.add_data(data, titles_from_data=False) |
|
bar_chart.set_categories(cats) |
|
bar_chart.title = "Option Analysis (Counts)" |
|
bar_chart.width = 40 |
|
bar_chart.height = 20 |
|
ws5.add_chart(bar_chart, "G2") |
|
ws5.sheet_properties.tabColor = "0000FF" |
|
|
|
|
|
top_10 = option_counts.head(10) |
|
for idx, row in enumerate(top_10.itertuples(), 1): |
|
ws5.cell(row=idx + 1, column=5, value=row.Option) |
|
ws5.cell(row=idx + 1, column=6, value=row.Counts) |
|
others_sum = option_counts['Counts'][10:].sum() |
|
ws5.cell(row=12, column=5, value='Others') |
|
ws5.cell(row=12, column=6, value=others_sum) |
|
ws5.cell(row=1, column=5, value='Option') |
|
ws5.cell(row=1, column=6, value='Counts') |
|
pie_chart = PieChart() |
|
data = Reference(ws5, min_col=6, min_row=2, max_row=12) |
|
categories = Reference(ws5, min_col=5, min_row=2, max_row=12) |
|
pie_chart.add_data(data, titles_from_data=False) |
|
pie_chart.set_categories(categories) |
|
pie_chart.title = "Top 10 Options (Share)" |
|
pie_chart.dataLabels = DataLabelList() |
|
pie_chart.dataLabels.showPercent = True |
|
pie_chart.width = 30 |
|
pie_chart.height = 20 |
|
|
|
ws5.add_chart(pie_chart, "G40") |
|
|
|
|
|
ws6 = wb.create_sheet(title="์ต์
๋ณํ์ ๋ถ์(์ต์
๋ถ์)") |
|
ws6.append( |
|
["Option1", "Option2", "Total Counts", "Score 5", "Score 4", "Score 3", "Score 2", "Score 1", "Option1 Counts", |
|
"Option2 Counts"]) |
|
for r in dataframe_to_rows(review_counts, index=True, header=False): |
|
ws6.append(r) |
|
ws6.sheet_properties.tabColor = "0000FF" |
|
|
|
|
|
ws7 = wb.create_sheet(title="์ ํํ ๋
๋ ์ต์
๋ถ์ ๊ฒฐ๊ณผ(์ต์
๋ถ์)") |
|
for r in dataframe_to_rows(selected_option_counts, index=False, header=True): |
|
ws7.append(r) |
|
bar_chart_selected = BarChart() |
|
data_selected = Reference(ws7, min_col=2, min_row=2, max_row=ws7.max_row, max_col=2) |
|
cats_selected = Reference(ws7, min_col=1, min_row=2, max_row=ws7.max_row, max_col=1) |
|
bar_chart_selected.add_data(data_selected, titles_from_data=False) |
|
bar_chart_selected.set_categories(cats_selected) |
|
bar_chart_selected.title = "Option Analysis for Selected Year (Counts)" |
|
bar_chart_selected.width = 40 |
|
bar_chart_selected.height = 20 |
|
ws7.add_chart(bar_chart_selected, "G2") |
|
ws7.sheet_properties.tabColor = "0000FF" |
|
|
|
|
|
top_10_selected = selected_option_counts.head(10) |
|
for idx, row in enumerate(top_10_selected.itertuples(), 1): |
|
ws7.cell(row=idx + 1, column=5, value=row.Option) |
|
ws7.cell(row=idx + 1, column=6, value=row.Counts) |
|
others_sum_selected = selected_option_counts['Counts'][10:].sum() |
|
ws7.cell(row=12, column=5, value='Others') |
|
ws7.cell(row=12, column=6, value=others_sum_selected) |
|
ws7.cell(row=1, column=5, value='Option') |
|
ws7.cell(row=1, column=6, value='Counts') |
|
pie_chart_selected = PieChart() |
|
data_selected_pie = Reference(ws7, min_col=6, min_row=2, max_row=12) |
|
categories_selected_pie = Reference(ws7, min_col=5, min_row=2, max_row=12) |
|
pie_chart_selected.add_data(data_selected_pie, titles_from_data=False) |
|
pie_chart_selected.set_categories(categories_selected_pie) |
|
pie_chart_selected.title = "Top 10 Options for Selected Year (Share)" |
|
pie_chart_selected.dataLabels = DataLabelList() |
|
pie_chart_selected.dataLabels.showPercent = True |
|
pie_chart_selected.width = 30 |
|
pie_chart_selected.height = 20 |
|
|
|
ws7.add_chart(pie_chart_selected, "G40") |
|
|
|
|
|
ws8 = wb.create_sheet(title="์ ํํ ๋
๋ ์ต์
๋ณํ์ ๋ถ์(์ต์
๋ถ์)") |
|
ws8.append( |
|
["Option1", "Option2", "Total Counts", "Score 5", "Score 4", "Score 3", "Score 2", "Score 1", "Option1 Counts", |
|
"Option2 Counts"]) |
|
for r in dataframe_to_rows(selected_review_counts, index=True, header=False): |
|
ws8.append(r) |
|
ws8.sheet_properties.tabColor = "0000FF" |
|
|
|
|
|
ws9 = wb.create_sheet(title="๊ธด ๋ฆฌ๋ทฐ ๋ด์ฉ") |
|
for r in dataframe_to_rows(longest_reviews, index=False, header=True): |
|
ws9.append(r) |
|
ws9.sheet_properties.tabColor = "00FF00" |
|
|
|
file_path = "๋ฆฌ๋ทฐ๋ถ์ ๋ค์ด๋ก๋.xlsx" |
|
wb.save(file_path) |
|
return file_path |
|
|
|
|
|
def generate_plots(df, year): |
|
|
|
start_year = datetime.now().year - 2 |
|
recent_data = df[df['Year'].astype(int) >= start_year] |
|
|
|
|
|
|
|
monthly_counts, yearly_counts = analyze_data(df) |
|
recent_monthly_counts, _ = analyze_data(recent_data) |
|
monthly_trend = analyze_monthly_data_for_year(recent_data, year) |
|
daily_counts = analyze_daily_data(recent_data, year) |
|
option_counts = analyze_option_data(recent_data) |
|
|
|
plot_files = [] |
|
|
|
|
|
fig1, ax1 = plt.subplots() |
|
ax1.plot(recent_monthly_counts['Year-Month'], recent_monthly_counts['Counts'], marker='o') |
|
ax1.set_title('Monthly Review Trends (Recent 3 Years)', fontsize=16) |
|
ax1.set_ylabel('Review Counts', fontsize=14) |
|
|
|
|
|
ax1.tick_params(axis='x', rotation=90, labelsize=6) |
|
|
|
tmp_file1 = tempfile.NamedTemporaryFile(delete=False, suffix=".png") |
|
fig1.savefig(tmp_file1.name) |
|
plot_files.append(tmp_file1.name) |
|
|
|
fig2, ax2 = plt.subplots() |
|
ax2.bar(yearly_counts['Year'], yearly_counts['Counts']) |
|
ax2.set_title('Yearly Review Trends') |
|
ax2.set_xlabel('Year') |
|
ax2.set_ylabel('Review Counts') |
|
tmp_file2 = tempfile.NamedTemporaryFile(delete=False, suffix=".png") |
|
fig2.savefig(tmp_file2.name) |
|
plot_files.append(tmp_file2.name) |
|
|
|
fig3, ax3 = plt.subplots() |
|
ax3.bar(monthly_trend['Month'], monthly_trend['Counts']) |
|
ax3.set_title('Monthly Trends for Selected Year') |
|
ax3.set_xlabel('Month') |
|
ax3.set_ylabel('Review Counts') |
|
tmp_file3 = tempfile.NamedTemporaryFile(delete=False, suffix=".png") |
|
fig3.savefig(tmp_file3.name) |
|
plot_files.append(tmp_file3.name) |
|
|
|
fig4, ax4 = plt.subplots() |
|
ax4.bar(daily_counts['Review Date'], daily_counts['Counts']) |
|
ax4.set_title('Daily Trends for Selected Year') |
|
ax4.set_xlabel('Date') |
|
ax4.set_ylabel('Review Counts') |
|
tmp_file4 = tempfile.NamedTemporaryFile(delete=False, suffix=".png") |
|
fig4.savefig(tmp_file4.name) |
|
plot_files.append(tmp_file4.name) |
|
|
|
return plot_files |
|
|
|
|
|
def process_file(file, year): |
|
df = read_excel_data(file) |
|
monthly_counts, yearly_counts = analyze_data(df) |
|
monthly_trend = analyze_monthly_data_for_year(df, year) |
|
daily_counts = analyze_daily_data(df, year) |
|
option_counts = analyze_option_data(df) |
|
review_counts = analyze_option_review_data(df) |
|
|
|
selected_option_counts = analyze_option_data_for_year(df, year) |
|
selected_review_counts = analyze_option_review_data_for_year(df, year) |
|
|
|
longest_reviews = extract_longest_reviews(df) |
|
|
|
original_data = pd.read_excel(file, sheet_name=0, engine='openpyxl') |
|
|
|
result_file = save_to_excel(original_data, monthly_counts, yearly_counts, monthly_trend, daily_counts, |
|
option_counts, review_counts, selected_option_counts, selected_review_counts, |
|
longest_reviews) |
|
|
|
return result_file |
|
|
|
|
|
|
|
def generate_download_links(plots): |
|
download_links = [] |
|
for i, plot in enumerate(plots): |
|
if os.path.exists(plot): |
|
with open(plot, "rb") as image_file: |
|
encoded_string = base64.b64encode(image_file.read()).decode() |
|
data_url = f"image/png;base64,{encoded_string}" |
|
download_links.append(data_url) |
|
|
|
return download_links |
|
|
|
|
|
def get_model_info(filenames): |
|
download_links = [] |
|
for f in filenames: |
|
if os.path.exists(f): |
|
url = hf_hub_url(repo_id="", filename=f) |
|
download_links.append(url) |
|
print(download_links) |
|
return download_links |
|
|
|
|
|
def process_file_with_plots(file, year): |
|
df = read_excel_data(file) |
|
result_file = process_file(file, year) |
|
plots = generate_plots(df, year) |
|
print(result_file) |
|
print(plots) |
|
return [result_file] + plots |
|
|
|
def process_file_with_plots2(file, year): |
|
df = read_excel_data(file) |
|
result_file = process_file(file, year) |
|
plots = generate_plots(df, year) |
|
download_links = get_model_info(plots) |
|
return_values = [result_file] + download_links |
|
return return_values |
|
|
|
|
|
years = [str(year) for year in range(datetime.now().year, datetime.now().year - 10, -1)] |
|
|
|
|
|
def predict(file, year): |
|
return process_file_with_plots(file, year) |
|
|
|
def predict_api(file, year): |
|
return process_file_with_plots2(file, year) |
|
|
|
|
|
def get_positive_reviews(df, years, option_analysis): |
|
df = df[df['Year'].isin(years)] |
|
if option_analysis != "์ ์ฒด์ต์
๋ถ์": |
|
top_n = int(option_analysis.split("(")[1].split("๊ฐ")[0]) |
|
top_options = df['Option1'].value_counts().head(top_n).index.tolist() |
|
df = df[df['Option1'].isin(top_options)] |
|
positive_reviews = df[(df['ReviewScore'] == 5) & (df['Review Length'] <= 500)].sort_values(by='Review Length', |
|
ascending=False) |
|
if len(positive_reviews) < 20: |
|
additional_reviews = df[(df['ReviewScore'] == 4) & (df['Review Length'] <= 500)].sort_values(by='Review Length', |
|
ascending=False) |
|
positive_reviews = pd.concat([positive_reviews, additional_reviews]) |
|
positive_reviews = positive_reviews.head(20) |
|
|
|
positive_reviews.reset_index(drop=True, inplace=True) |
|
positive_reviews.index += 1 |
|
positive_reviews['์๋ฒ'] = positive_reviews.index |
|
|
|
return "\n\n".join(positive_reviews.apply( |
|
lambda x: f"{x['์๋ฒ']}. **{x['Review Date']} / {x['ID']} / {x['Option']}**\n\n{x['Review']}", axis=1)) |
|
|
|
|
|
|
|
def get_negative_reviews(df, years, option_analysis): |
|
df = df[df['Year'].isin(years)] |
|
if option_analysis != "์ ์ฒด์ต์
๋ถ์": |
|
top_n = int(option_analysis.split("(")[1].split("๊ฐ")[0]) |
|
top_options = df['Option1'].value_counts().head(top_n).index.tolist() |
|
df = df[df['Option1'].isin(top_options)] |
|
negative_reviews = df[(df['ReviewScore'] == 1) & (df['Review Length'] <= 500)].sort_values(by='Review Length', |
|
ascending=False) |
|
if len(negative_reviews) < 30: |
|
additional_reviews = df[(df['ReviewScore'] == 2) & (df['Review Length'] <= 500)].sort_values(by='Review Length', |
|
ascending=False) |
|
negative_reviews = pd.concat([negative_reviews, additional_reviews]) |
|
negative_reviews = negative_reviews.head(30) |
|
|
|
negative_reviews.reset_index(drop=True, inplace=True) |
|
negative_reviews.index += 1 |
|
negative_reviews['์๋ฒ'] = negative_reviews.index |
|
|
|
return "\n\n".join(negative_reviews.apply( |
|
lambda x: f"{x['์๋ฒ']}. **{x['Review Date']} / {x['ID']} / {x['Option']}**\n\n{x['Review']}", axis=1)) |
|
|
|
|
|
|
|
def update_reviews(file, years, option_analysis): |
|
df = read_excel_data(file) |
|
positive_reviews = get_positive_reviews(df, years, option_analysis) |
|
negative_reviews = get_negative_reviews(df, years, option_analysis) |
|
positive_prompt = f"{positive_reviews}\n\n{prompts['๊ธ์ ์ ์ธ ๋ฆฌ๋ทฐ๋ถ์']}" |
|
negative_prompt = f"{negative_reviews}\n\n{prompts['๋ถ์ ์ ์ธ ๋ฆฌ๋ทฐ๋ถ์']}" |
|
return positive_reviews, negative_reviews, positive_prompt, negative_prompt |
|
|
|
|
|
|
|
def analyze_all(positive_prompt, negative_prompt): |
|
positive_analysis, _ = generate_section( |
|
review_output=positive_prompt, |
|
system_message=prompts["๊ธ์ ์ ์ธ ๋ฆฌ๋ทฐ๋ถ์"], |
|
max_tokens=15000, |
|
temperature=0.3, |
|
top_p=0.95, |
|
) |
|
negative_analysis, _ = generate_section( |
|
review_output=negative_prompt, |
|
system_message=prompts["๋ถ์ ์ ์ธ ๋ฆฌ๋ทฐ๋ถ์"], |
|
max_tokens=15000, |
|
temperature=0.4, |
|
top_p=0.95, |
|
) |
|
return positive_analysis, negative_analysis |
|
|
|
|
|
|
|
def create_client(model_name): |
|
return InferenceClient(model_name, token=os.getenv("HF_TOKEN")) |
|
|
|
|
|
client = create_client("CohereForAI/c4ai-command-r-plus") |
|
|
|
|
|
|
|
def generate_section(review_output, system_message, max_tokens, temperature, top_p): |
|
prompt = f"{review_output}\n\n{system_message}" |
|
response = call_api(prompt, max_tokens, temperature, top_p) |
|
return response, prompt |
|
|
|
|
|
|
|
def call_api(content, max_tokens, temperature, top_p): |
|
messages = [{"role": "system", "content": ""}, {"role": "user", "content": content}] |
|
response = client.chat_completion(messages, max_tokens=max_tokens, temperature=temperature, top_p=top_p) |
|
return response.choices[0].message['content'] |
|
|
|
|
|
prompts = { |
|
"๊ธ์ ์ ์ธ ๋ฆฌ๋ทฐ๋ถ์": """[์ค์ ๊ท์น] |
|
1. ๋ฐ๋์ ํ๊ธ(ํ๊ตญ์ด)๋ก ์ถ๋ ฅํ๋ผ. |
|
2. ๋๋ ๋ฆฌ๋ทฐ ๋ฐ์ดํฐ๋ฅผ ๋ถ์ํ๋ ๋น
๋ฐ์ดํฐ ๋ถ์๊ฐ์ด๋ค. |
|
3. ๊ณ ๊ฐ์ ๋ฆฌ๋ทฐ ๋ฐ์ดํฐ๋ฅผ ๋ฐํ์ผ๋ก ๊ธ์ ์ ์ธ ์๊ฒฌ์ ๋ฐ์ดํฐ๋ง ๋ถ์ํ๋ผ. |
|
4. ๋ฐ๋์ ์ ๊ณต๋ ๋ฆฌ๋ทฐ ๋ฐ์ดํฐ์์๋ง ๋ถ์ํ๋ผ. |
|
5. ๋์ ์๊ฐ์ ํฌํจํ์ง ๋ง ๊ฒ. |
|
[๋ถ์ ์กฐ๊ฑด] |
|
1. ์ด 20๊ฐ์ ๋ฆฌ๋ทฐ๋ฐ์ดํฐ๋ฅผ ์ ๊ณตํ๋ค. |
|
2. ๊ฐ ๋ฆฌ๋ทฐ ๋ฐ์ดํฐ์ ๋์งธ์ค ๋ถํฐ์ ์ค์ ๊ณ ๊ฐ๋ฆฌ๋ทฐ๋ฅผ ๋ฐ์ํ๋ผ. |
|
3. ๋ฐ๋์ ๊ธ์ ์ ์ธ ์๊ฒฌ๋ง์ ๋ถ์ํ๋ผ. ๋ถ์ ์ ์ธ ์๊ฒฌ์ ์ ์ธํ๋ผ. |
|
4. ๊ธฐ๋ฅ๊ณผ ์ฑ๋ฅ์ ๋ถ๋ถ, ๊ฐ์ฑ์ ์ธ ๋ถ๋ถ, ์ค์ ์ฌ์ฉ ์ธก๋ฉด์ ๋ถ๋ถ, ๋ฐฐ์ก์ ๋ถ๋ถ, ํ๊ฒ๋ณ ๋ถ๋ถ์ ๊ด์ ์ผ๋ก ๋ถ์ํ๋ผ. |
|
5. 4๋ฒ์ ์กฐ๊ฑด์ ํฌํจ๋์ง ์๋ ๊ธ์ ์ ์ธ ๋ฆฌ๋ทฐ๋ฅผ ๋ณ๋๋ก ์ถ๋ ฅํ๋ผ. |
|
6. ๋ง์ผํ
์ ์ธ ์์๋ก ์ฌ์ฉํ ์ ์๋ ๊ณ ๊ฐ์ ์ค์ ๋ฆฌ๋ทฐ๋ฅผ ๋ฐ์ํ๋ผ. |
|
[์ถ๋ ฅ ํํ ์กฐ๊ฑด] |
|
1. ๊ฐ๊ฐ์ ์ ๋ชฉ ์์ '๐'์ด๋ชจ์ง๋ฅผ ์ถ๋ ฅํ๋ผ,'#', '##'์ ์ถ๋ ฅํ์ง ๋ง๊ฒ. |
|
2. ๊ฐ์ฅ ๋ง์ง๋ง์ ์ข
ํฉ ์๊ฒฌ์ ์์ฑํ๋ผ, "๐์ข
ํฉ์๊ฒฌ"์ ์ ๋ชฉํํ๋ฅผ ์ฌ์ฉํ๋ผ. |
|
[์ข
ํฉ์๊ฒฌ์ ์ถ๋ ฅ ์กฐ๊ฑด ์์] |
|
('์ข
ํฉ์๊ฒฌ'์ด ์๋ ๋ค๋ฅธ ๋ถ๋ถ์ ์ด ์ถ๋ ฅ ์กฐ๊ฑด์ ๋ฐ์ํ์ง ๋ง ๊ฒ. |
|
- ํญ๋ชฉ๋ณ ์ ๋ชฉ์ ์ ์ธํ๋ผ. |
|
- ์ข
ํฉ์๊ฒฌ์๋ ํญ๋ชฉ๋ณ ์ ๋ชฉ์ ์ ์ธํ๊ณ ์์ ์ ๋ฌธ์ฅ์ผ๋ก ์์ฑํ๋ผ. |
|
- ๋งค์ถ์ ๊ทน๋ํ ํ ์ ์๋ ๊ณ ๊ฐ์ ์ค์ ๋ฆฌ๋ทฐ ํฌ์ธํธ๋ฅผ ์ ์ํ๋ผ. |
|
[SWOT๋ถ์ ์กฐ๊ฑด] |
|
1. '์ข
ํฉ์๊ฒฌ' ๋ค์ ๋ด์ฉ์ผ๋ก SWOT๋ถ์ ์๊ฒฌ์ ์ถ๋ ฅํ๋ผ. |
|
2. SWOT๋ถ์ ์ค '๊ฐ์ '์๊ฒฌ๊ณผ '๊ธฐํ'์ ์๊ฒฌ์ ์ถ๋ ฅํ๋ผ. |
|
3. ๋ฐ๋์ '์ข
ํฉ์๊ฒฌ'์ ๋ด์ฉ์ ๊ธฐ๋ฐ์ผ๋ก ์์ฑํ๋ผ. |
|
4. ์ ๋ชฉ์ '๐น ๊ฐ์ ', '๐น ๊ธฐํ'์ผ๋ก ์ถ๋ ฅํ๋ผ. |
|
[์ข
ํฉ์๊ฒฌ์ ์ถ๋ ฅ ์กฐ๊ฑด ๋] |
|
|
|
3. ์ค์ ๊ณ ๊ฐ์ ๋ฆฌ๋ทฐ ๋ฐ์ดํฐ์์ ์ฌ์ฉ๋ ๋จ์ด๋ฅผ ํฌํจํ๋ผ. |
|
4. ๋์ ์๊ฐ์ ์์๋ก ๋ฃ์ง ๋ง ๊ฒ. |
|
""", |
|
"๋ถ์ ์ ์ธ ๋ฆฌ๋ทฐ๋ถ์": """[์ค์ ๊ท์น] |
|
1. ๋ฐ๋์ ํ๊ธ(ํ๊ตญ์ด)๋ก ์ถ๋ ฅํ๋ผ. |
|
2. ๋๋ ๋ฆฌ๋ทฐ ๋ฐ์ดํฐ๋ฅผ ๋ถ์ํ๋ ๋น
๋ฐ์ดํฐ ๋ถ์๊ฐ์ด๋ค. |
|
3. ๊ณ ๊ฐ์ ๋ฆฌ๋ทฐ ๋ฐ์ดํฐ๋ฅผ ๋ฐํ์ผ๋ก ๋ถ์ ์ ์ธ ์๊ฒฌ์ ๋ฐ์ดํฐ๋ง ๋ถ์ํ๋ผ. |
|
4. ๋ฐ๋์ ์ ๊ณต๋ ๋ฆฌ๋ทฐ ๋ฐ์ดํฐ์์๋ง ๋ถ์ํ๋ผ. |
|
5. ๋์ ์๊ฐ์ ํฌํจํ์ง ๋ง ๊ฒ. |
|
[๋ถ์ ์กฐ๊ฑด] |
|
1. ์ด 30๊ฐ์ ๋ฆฌ๋ทฐ๋ฐ์ดํฐ๋ฅผ ์ ๊ณตํ๋ค. |
|
2. ๊ฐ ๋ฆฌ๋ทฐ ๋ฐ์ดํฐ์ ๋์งธ์ค ๋ถํฐ์ ์ค์ ๊ณ ๊ฐ๋ฆฌ๋ทฐ๋ฅผ ๋ฐ์ํ๋ผ. |
|
3. ๋ถ์ ์ ์ธ ์๊ฒฌ๋ง์ ๋ถ์ํ๋ผ. |
|
4. ๊ธฐ๋ฅ๊ณผ ์ฑ๋ฅ์ ๋ถ๋ถ, ๊ฐ์ฑ์ ์ธ ๋ถ๋ถ, ์ค์ ์ฌ์ฉ ์ธก๋ฉด์ ๋ถ๋ถ, ๋ฐฐ์ก์ ๋ถ๋ถ, ๊ณ ๊ฐ์ ๋ถ๋
ธ ๋ถ๋ถ์ ๊ด์ ์ผ๋ก ๋ถ์ํ๋ผ. |
|
5. 4๋ฒ์ ์กฐ๊ฑด์ ํฌํจ๋์ง ์๋ ๋ถ์ ์ ์ธ ๋ฆฌ๋ทฐ๋ฅผ ๋ณ๋๋ก ์ถ๋ ฅํ๋ผ. |
|
6. ๋ถ์ ์ ์ธ ๋ฆฌ๋ทฐ ๋ถ์ ๊ฒฐ๊ณผ๋ฅผ ๋ฐํ์ผ๋ก '๊ฐ์ ํ ์ '์ ์ถ๋ ฅํ๋ผ. |
|
[์ถ๋ ฅ ํํ ์กฐ๊ฑด] |
|
1. ๊ฐ๊ฐ์ ์ ๋ชฉ ์์ '๐'์ด๋ชจ์ง๋ฅผ ์ถ๋ ฅํ๋ผ,'#', '##'์ ์ถ๋ ฅํ์ง ๋ง๊ฒ. |
|
2. ๊ฐ์ฅ ๋ง์ง๋ง์ '๊ฐ์ ํ ์ '์ ์ถ๋ ฅํ๋ผ("๐ข๊ฐ์ ํ ์ "์ ์ ๋ชฉํํ๋ฅผ ์ฌ์ฉํ๋ผ.) |
|
[๊ฐ์ ํ ์ ์ ์ถ๋ ฅ ์กฐ๊ฑด ์์] |
|
('๊ฐ์ ํ ์ '์ด ์๋ ๋ค๋ฅธ ๋ถ๋ถ์ ์ด ์ถ๋ ฅ ์กฐ๊ฑด์ ๋ฐ์ํ์ง ๋ง ๊ฒ. |
|
- ํญ๋ชฉ๋ณ ์ ๋ชฉ์ ์ ์ธํ๋ผ. |
|
- ์ฃผ์ ํญ๋ชฉ๋ณ๋ก ๊ฐ์ ํ ์ ์ ์ถ๋ ฅํ๋ผ. |
|
- ์ ๋ฌธ์ ์ด๊ณ , ๋ถ์์ ์ด๋ฉฐ, ์ ์ํ๋ ํํ์ ๊ณต์ํ ์ดํฌ๋ฅผ ์ฌ์ฉํ๋ผ.(๋จ๋ตํ ํํ ๊ธ์ง) |
|
[SWOT๋ถ์ ์กฐ๊ฑด] |
|
1. '์ข
ํฉ์๊ฒฌ' ๋ค์ ๋ด์ฉ์ผ๋ก SWOT๋ถ์ ์๊ฒฌ์ ์ถ๋ ฅํ๋ผ. |
|
2. SWOT๋ถ์ ์ค '์ฝ์ '์๊ฒฌ๊ณผ '์ํ'์ ์๊ฒฌ์ ์ถ๋ ฅํ๋ผ. |
|
3. ๋ฐ๋์ '๊ฐ์ ํ ์ '์ ๋ด์ฉ์ ๊ธฐ๋ฐ์ผ๋ก ์์ฑํ๋ผ. |
|
4. ์ ๋ชฉ์ '๐ ์ฝ์ ', '๐ ์ํ'์ผ๋ก ์ถ๋ ฅํ๋ผ. |
|
[๊ฐ์ ํ ์ ์ ์ถ๋ ฅ ์กฐ๊ฑด ๋] |
|
|
|
3. ์ค์ ๊ณ ๊ฐ์ ๋ฆฌ๋ทฐ ๋ฐ์ดํฐ์์ ์ฌ์ฉ๋ ๋จ์ด๋ฅผ ํฌํจํ๋ผ. |
|
4. ๋์ ์๊ฐ์ ์์๋ก ๋ฃ์ง ๋ง ๊ฒ. |
|
""" |
|
} |
|
|
|
|
|
def select_all_years(): |
|
current_year = datetime.now().year |
|
return [str(year) for year in range(current_year, current_year - 5, -1)] |
|
|
|
|
|
def deselect_all_years(): |
|
return [] |
|
|
|
|
|
with gr.Blocks() as ๋ฆฌ๋ทฐ์ถ์ด_๋ถ์: |
|
gr.Markdown("### ์์
ํ์ผ ์
๋ก๋") |
|
file_input = gr.File(label="", file_types=["xlsx"]) |
|
year_selection = gr.Radio(years, label="๋ถ์๋
๋ ์ ํ", value=str(datetime.now().year)) |
|
analyze_button = gr.Button("๋ถ์ ์คํ") |
|
|
|
outputs = [ |
|
gr.File(label="์ธ๋ถ๋ถ์ ์๋ฃ๋ฅผ ๋ค์ด๋ฐ์ผ์ธ์(Excelํ์ผ)"), |
|
gr.File(label="์ต๊ทผ3๋
๊ฐ ์๋ณ ๋ฆฌ๋ทฐ์ถ์ด"), |
|
gr.File(label="์ต๊ทผ ๋
๋๋ณ ๋ฆฌ๋ทฐ์ถ์ด"), |
|
gr.File(label="์ ํ๋
๋ ์ ๋ฆฌ๋ทฐ์ถ์ด"), |
|
gr.File(label="์ ํ๋
๋ ์ผ์ผ ๋ฆฌ๋ทฐ์ถ์ด"), |
|
] |
|
|
|
analyze_button.click(predict, inputs=[file_input, year_selection], outputs=outputs) |
|
|
|
with gr.Blocks() as ๋ฆฌ๋ทฐ๋ถ์: |
|
year_selection_review = gr.CheckboxGroup( |
|
choices=[str(year) for year in select_all_years()], |
|
label="์ฐ๋ ์ ํ", |
|
value=[str(year) for year in select_all_years()] |
|
) |
|
option_selection = gr.Radio( |
|
choices=["์ ์ฒด์ต์
๋ถ์", "์ฃผ์์ต์
๋ถ์(1๊ฐ)", "์ฃผ์์ต์
๋ถ์(3๊ฐ)", "์ฃผ์์ต์
๋ถ์(5๊ฐ)"], |
|
label="์ต์
๋ณ ๋ฆฌ๋ทฐ๋ถ์ ์ ํ", |
|
value="์ ์ฒด์ต์
๋ถ์" |
|
) |
|
analyze_button_review = gr.Button("๋ฆฌ๋ทฐ ๊ฐ์ ธ์ค๊ธฐ") |
|
analyze_all_button = gr.Button("๋ฆฌ๋ทฐ ๋ถ์ํ๊ธฐ") |
|
|
|
with gr.Column(): |
|
gr.Markdown("### ๋ฆฌ๋ทฐ ๊ฒฐ๊ณผ") |
|
positive_reviews_output_review = gr.Textbox(label="๊ธ์ ์ ์ธ ์ฃผ์ ๋ฆฌ๋ทฐ(20๊ฐ)", interactive=False, lines=12) |
|
negative_reviews_output_review = gr.Textbox(label="๋ถ์ ์ ์ธ ์ฃผ์ ๋ฆฌ๋ทฐ(30๊ฐ)", interactive=False, lines=12) |
|
|
|
gr.Markdown("### ์ถ๋ ฅ") |
|
|
|
positive_analysis_output_review = gr.Textbox(label="๊ธ์ ์ ์ธ ๋ฆฌ๋ทฐ๋ถ์", interactive=False, lines=12) |
|
negative_analysis_output_review = gr.Textbox(label="๋ถ์ ์ ์ธ ๋ฆฌ๋ทฐ๋ถ์", interactive=False, lines=12) |
|
|
|
analyze_button_review.click(update_reviews, inputs=[file_input, year_selection_review, option_selection], |
|
outputs=[positive_reviews_output_review, negative_reviews_output_review]) |
|
|
|
analyze_all_button.click( |
|
fn=analyze_all, |
|
inputs=[positive_reviews_output_review, negative_reviews_output_review], |
|
outputs=[positive_analysis_output_review, negative_analysis_output_review] |
|
) |
|
|
|
with gr.Row(): |
|
with gr.Column(): |
|
positive_analysis_output_review |
|
with gr.Column(): |
|
negative_analysis_output_review |
|
|
|
with gr.Blocks() as tabs: |
|
with gr.Tab("๋ฆฌ๋ทฐ์ถ์ด ๋ถ์"): |
|
๋ฆฌ๋ทฐ์ถ์ด_๋ถ์.render() |
|
with gr.Tab("๋ฆฌ๋ทฐ๋ถ์"): |
|
๋ฆฌ๋ทฐ๋ถ์.render() |
|
|
|
if __name__ == "__main__": |
|
tabs.launch() |