{ "cells": [ { "cell_type": "markdown", "metadata": {}, "source": [ "!pip intall numpy pandas FlagEmbedding scikit-learn" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "import pandas as pd\n", "import numpy as np\n", "from sklearn.metrics import precision_score, recall_score, f1_score\n", "from FlagEmbedding import FlagReranker\n", "import json" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "model_path = '...'\n", "qd_df = pd.read_parquet('AutoRAG-example-korean-embedding-benchmark/data/qa_v4.parquet')\n", "qd_df['generation_gt'].apply(lambda x : len(x)).describe()\n", "qd_df['retrieval_gt'].apply(lambda x : len(x[0])).describe()\n", "qd_df['retrieval_gt'] = qd_df['retrieval_gt'].apply(lambda x : x[0][0])\n", "\n", "corpus_df = pd.read_parquet('AutoRAG-example-korean-embedding-benchmark/data/ocr_corpus_v3.parquet')\n", "corpus_id = {}\n", "for idx, row in corpus_df.iterrows():\n", " corpus_id[row[0]] = row[1]\n" ] }, { "cell_type": "code", "execution_count": 2, "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "/tmp/ipykernel_3861538/48936308.py:10: FutureWarning: Series.__getitem__ treating keys as positions is deprecated. In a future version, integer keys will always be treated as labels (consistent with DataFrame behavior). To access a value by position, use `ser.iloc[pos]`\n", " corpus_id[row[0]] = row[1]\n", "/tmp/ipykernel_3861538/48936308.py:18: FutureWarning: Series.__getitem__ treating keys as positions is deprecated. In a future version, integer keys will always be treated as labels (consistent with DataFrame behavior). To access a value by position, use `ser.iloc[pos]`\n", " query_id[row[0]] = row[1]\n" ] } ], "source": [ "qd_df = qd_df[['qid','query','generation_gt','retrieval_gt']]\n", "\n", "query_id = {}\n", "for idx, row in qd_df.iterrows():\n", " query_id[row[0]] = row[1]\n", "\n", "qrel = qd_df[['qid','retrieval_gt']]\n", "qrel_id = {}\n", "for idx, row in qrel.iterrows():\n", " q_id = row.iloc[0]\n", " relevant_copus_id = row.iloc[1]\n", " if q_id not in qrel_id:\n", " qrel_id[q_id] = set()\n", " qrel_id[q_id].add(relevant_copus_id)\n", "\n", "corpus_df = corpus_df[['doc_id','contents']]\n", "\n", "valid_dict = {}\n", "valid_dict['qrel'] =qrel_id" ] }, { "cell_type": "code", "execution_count": 3, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "doc_id commerce - B2BDigComm.pdf - 1\n", "contents Adobe\\n디지털 커머스 시대,\\nB2B 비즈니스 생존 전략\\nB2B 비즈니스를 ...\n", "Name: 0, dtype: object" ] }, "execution_count": 3, "metadata": {}, "output_type": "execute_result" } ], "source": [ "corpus_df.iloc[0]" ] }, { "cell_type": "code", "execution_count": 4, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "Index(['qid', 'query', 'generation_gt', 'retrieval_gt'], dtype='object')" ] }, "execution_count": 4, "metadata": {}, "output_type": "execute_result" } ], "source": [ "qd_df.columns" ] }, { "cell_type": "code", "execution_count": 7, "metadata": {}, "outputs": [], "source": [ "corpus_df = corpus_df.reset_index(drop=True)\n", "qd_df = qd_df.reset_index(drop=True)" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "def calculate_accuracy(ranks_list, valid_dict, qd_df, k_values=[1, 3, 5]):\n", " accuracies = {k: 0 for k in k_values}\n", " total_queries = len(qd_df)\n", " \n", " for i in range(total_queries):\n", " search_idx = ranks_list[i]\n", " true_doc_idx = corpus_df[corpus_df['doc_id'] == list(valid_dict['qrel'][qd_df.loc[i, 'qid']])[0]].index[0]\n", " \n", " for k in k_values:\n", " top_k_preds = search_idx[:k]\n", " if true_doc_idx in top_k_preds:\n", " accuracies[k] += 1\n", " \n", " return {k: accuracies[k] / total_queries for k in k_values}\n", "\n", "def calculate_f1_recall_precision(ranks_list, valid_dict, qd_df, k_values=[1, 3, 5]):\n", " f1_scores = {k: 0 for k in k_values}\n", " recall_scores = {k: 0 for k in k_values}\n", " precision_scores = {k: 0 for k in k_values}\n", " \n", " total_queries = len(qd_df)\n", " \n", " for i in range(total_queries):\n", " search_idx = ranks_list[i]\n", " true_doc_idx = corpus_df[corpus_df['doc_id'] == list(valid_dict['qrel'][qd_df.loc[i, 'qid']])[0]].index[0]\n", " \n", " for k in k_values:\n", " top_k_preds = search_idx[:k]\n", " y_true = [1 if idx == true_doc_idx else 0 for idx in top_k_preds]\n", " y_pred = [1] * len(top_k_preds)\n", " \n", " # Precision, Recall, F1\n", " precision_scores[k] += precision_score(y_true, y_pred)\n", " recall_scores[k] += recall_score(y_true, y_pred)\n", " f1_scores[k] += f1_score(y_true, y_pred)\n", " \n", " return {k: f1_scores[k] / total_queries for k in k_values}, \\\n", " {k: recall_scores[k] / total_queries for k in k_values}, \\\n", " {k: precision_scores[k] / total_queries for k in k_values}\n", "\n", "\n", "def evaluate_model(corpus_df, qd_df, valid_dict, reranker):\n", " scores_list = []\n", " ranks_list = []\n", " \n", " for c, query in enumerate(qd_df['query'], start=1):\n", " corpus_df['query'] = query\n", " pair_df = corpus_df[['query', 'contents']]\n", " scores = reranker.compute_score(pair_df.values.tolist(), normalize=True)\n", " scores = np.array(scores)\n", " \n", " sorted_idxs = np.argsort(-scores)\n", " scores_list.append(scores[sorted_idxs])\n", " ranks_list.append(sorted_idxs)\n", " print(f'{c}/{len(qd_df)}')\n", "\n", " k_values = [1, 3, 5, 10]\n", " accuracies = calculate_accuracy(ranks_list, valid_dict, qd_df, k_values=k_values)\n", " f1_scores, recalls, precisions = calculate_f1_recall_precision(ranks_list, valid_dict, qd_df, k_values=k_values)\n", " \n", " return accuracies, f1_scores, recalls, precisions\n", "\n", "\n", "# 모델 평가 및 결과 저장\n", "reranker = FlagReranker(model_path, use_fp16=True)\n", "\n", "accuracies, f1_scores, recalls, precisions = evaluate_model(\n", " corpus_df.copy(), qd_df, valid_dict, reranker)\n", "\n", "print(f'Model: {model_path}')\n", "for k in [1, 3, 5, 10]:\n", " print(f'Accuracy@{k}: {accuracies[k]:.4f}')\n", " print(f'F1@{k}: {f1_scores[k]:.4f}')\n", " print(f'Recall@{k}: {recalls[k]:.4f}')\n", " print(f'Precision@{k}: {precisions[k]:.4f}')\n" ] } ], "metadata": { "kernelspec": { "display_name": "sbert3", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.9.19" } }, "nbformat": 4, "nbformat_minor": 2 }