import streamlit as st from news_scraper import YahooNewsScraper from tfidf_calculator import JapaneseTextVectorizer from cosine_similarity_calculator import CosineSimilarityCalculator from summerizer import TextSummarizer st.title("ニュース検索アプリ") # 初期化 best_article_text = None best_article_url = None best_max_word = None max_word = None best_max_value = -1 # cos類似度は0以上なので、初期値を-1に設定 num_news = 5 # セッションステートの初期化 if 'news_fetched' not in st.session_state: st.session_state['news_fetched'] = False st.session_state['article_text_list'] = [] st.session_state['article_url_list'] = [] if st.button('最新ニュース取得'): with st.spinner('ニュースを取得中...'): # yahooニュースをスクレイピング scraper = YahooNewsScraper() article_text_list = [] article_url_list = [] for i in range(num_news): article_text, detail_url = scraper.scrape_article(i) article_text_list.append(article_text) article_url_list.append(detail_url) st.session_state['news_fetched'] = True # 処理完了フラグを設定 st.session_state['article_text_list'] = article_text_list # セッションステートに保存 st.session_state['article_url_list'] = article_url_list st.write("取得完了しました") if st.session_state['news_fetched']: search_word = st.text_input('名詞', placeholder='名詞を入力してください', max_chars=10, help='10文字以内の名詞') if st.button('要約作成'): article_text_list = st.session_state['article_text_list'] article_url_list = st.session_state['article_url_list'] for temp_article_text, temp_article_url in zip(article_text_list, article_url_list): # TD-IDF値を計算 vectorizer = JapaneseTextVectorizer() tfidf_dict = vectorizer.fit_transform(temp_article_text) # cos類似度を計算 word_similarity = CosineSimilarityCalculator() article_keyword_list = list(tfidf_dict.keys()) result_word_similarity = word_similarity.calculate_similarity(search_word, article_keyword_list) # None でない値のみを抽出 filtered_data = {k: v for k, v in result_word_similarity.items() if v is not None} # 最大値を持つキーとその値を取得 if filtered_data: # filtered_dataが空でないことを確認 max_word = max(filtered_data, key=filtered_data.get) max_value = filtered_data[max_word] # 最大値がこれまでの最大値より大きければ更新 if max_value > best_max_value: best_max_value = max_value best_max_word = max_word best_article_text = temp_article_text best_article_url = temp_article_url # テキストを要約 summarizer = TextSummarizer() summary_text = summarizer.summarize(best_article_text, max_length=40, min_length=20) st.write(f'最も類似度が高いワードは「{best_max_word}」でした') st.write(f'url:{best_article_url}') st.text_area("要約:", summary_text, height=20)