samjulien commited on
Commit
ef64ce4
·
1 Parent(s): f4616b8

Add Hacker News app

Browse files
.gitignore ADDED
@@ -0,0 +1,2 @@
 
 
 
1
+ .DS_Store
2
+ __pycache__
Dockerfile ADDED
@@ -0,0 +1,59 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Build stage
2
+ FROM python:3.11-slim-buster AS Build
3
+
4
+ # Set environment variables for Python and Poetry
5
+ ENV PYTHONUNBUFFERED=1 \
6
+ PIP_NO_CACHE_DIR=1 \
7
+ POETRY_NO_INTERACTION=1 \
8
+ POETRY_VIRTUALENVS_CREATE=false \
9
+ POETRY_VERSION=1.7.1
10
+
11
+ # Set the working directory in the container
12
+ WORKDIR /app
13
+
14
+ # Copy the dependencies file to the working directory
15
+ COPY ./pyproject.toml /app/
16
+
17
+ # Update, install dependencies, and prepare the Python environment
18
+ RUN apt-get update && \
19
+ apt-get install -y gcc g++ unixodbc-dev && \
20
+ pip install "poetry==$POETRY_VERSION" && \
21
+ poetry export --without-hashes --format requirements.txt --output requirements.txt && \
22
+ python3 -m pip wheel --no-cache-dir --no-deps -w /app/wheels -r requirements.txt
23
+
24
+ # Runtime stage
25
+ FROM python:3.11-slim-buster AS Run
26
+
27
+ # Set environment variables for Python and Poetry
28
+ ENV HOME=/home/user \
29
+ PATH=/home/user/.local/bin:$PATH
30
+
31
+ # Create a non-root user
32
+ RUN useradd -m -u 1000 user
33
+
34
+ # Switch to the non-root user
35
+ USER user
36
+
37
+ # Copy wheel files from the build stage
38
+ COPY --from=build /app/wheels $HOME/app/wheels
39
+
40
+ # Set the working directory to where the wheels are
41
+ WORKDIR $HOME/app/wheels
42
+
43
+ # Install the wheel files
44
+ RUN pip3 --no-cache-dir install *.whl
45
+
46
+ # Change app name here to copy the application files to the working directory
47
+ COPY --chown=user ./hacker-news-social-listener $HOME/app
48
+
49
+ # Set the working directory to the application files
50
+ WORKDIR $HOME/app
51
+
52
+ # Specify the command to run the application
53
+ ENTRYPOINT [ "writer", "run" ]
54
+
55
+ # Expose the port the app runs on
56
+ EXPOSE 8080
57
+
58
+ # Set the default command to run the app
59
+ CMD [ ".", "--port", "8080", "--host", "0.0.0.0" ]
README.md CHANGED
@@ -1,11 +1,12 @@
1
  ---
2
  title: Hacker News Listener
3
- emoji: 🐠
4
  colorFrom: purple
5
  colorTo: red
6
  sdk: docker
7
  pinned: false
8
  short_description: Navigate and analyze Hacker News posts and comments.
 
9
  ---
10
 
11
  Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
 
1
  ---
2
  title: Hacker News Listener
3
+ emoji: 🎧
4
  colorFrom: purple
5
  colorTo: red
6
  sdk: docker
7
  pinned: false
8
  short_description: Navigate and analyze Hacker News posts and comments.
9
+ app_port: 8080
10
  ---
11
 
12
  Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
hacker-news-social-listener/.gitignore ADDED
@@ -0,0 +1,2 @@
 
 
 
1
+ static/hackernews_comments.csv
2
+ static/hackernews_posts.csv
hacker-news-social-listener/.wf/components-page-0-c0f99a9e-5004-4e75-a6c6-36f17490b134.jsonl ADDED
@@ -0,0 +1,34 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {"id": "c0f99a9e-5004-4e75-a6c6-36f17490b134", "type": "page", "content": {"pageMode": "compact"}, "handlers": {}, "isCodeManaged": false, "parentId": "root", "position": 0, "visible": {"binding": "", "expression": true, "reversed": false}}
2
+ {"id": "bebc5fe9-63a7-46a7-b0fa-62303555cfaf", "type": "header", "content": {"text": "Hacker News Listener"}, "handlers": {}, "isCodeManaged": false, "parentId": "c0f99a9e-5004-4e75-a6c6-36f17490b134", "position": 0, "visible": {"binding": "", "expression": true, "reversed": false}}
3
+ {"id": "m7luxumzscv65i09", "type": "tabs", "content": {}, "handlers": {}, "isCodeManaged": false, "parentId": "c0f99a9e-5004-4e75-a6c6-36f17490b134", "position": 1, "visible": {"binding": "", "expression": true, "reversed": false}}
4
+ {"id": "lon4vs20gd3myh7e", "type": "tab", "content": {"name": "Setup"}, "handlers": {}, "isCodeManaged": false, "parentId": "m7luxumzscv65i09", "position": 0, "visible": {"binding": "", "expression": true, "reversed": false}}
5
+ {"id": "xlc1wtcwu1g2i07p", "type": "heading", "content": {"text": "Navigate and analyze Hacker News posts and comments using Writer graph-based RAG and Palmyra X 004."}, "handlers": {}, "isCodeManaged": false, "parentId": "lon4vs20gd3myh7e", "position": 0}
6
+ {"id": "62r9auy895datfnp", "type": "message", "content": {"message": "@{message_setup}"}, "handlers": {}, "isCodeManaged": false, "parentId": "lon4vs20gd3myh7e", "position": 1, "visible": {"binding": "message_setup_vis", "expression": "custom", "reversed": false}}
7
+ {"id": "mmh30t1tiyv5mi9s", "type": "section", "content": {"containerBackgroundColor": "#BFCBFF", "isCollapsible": "yes", "title": "\ud83d\udd0e Pull latest N posts"}, "handlers": {}, "isCodeManaged": false, "parentId": "lon4vs20gd3myh7e", "position": 2}
8
+ {"id": "hpeff7ha6v27zsk8", "type": "text", "content": {"text": "**Pull up to 500 Hacker News stories, with or without comments.** Data from the Hacker News API is sent to to Writer\u2019s graph-based RAG system, known as a Knowledge Graph. This is accessed dynamically by an LLM through the Writer RAG tool.", "useMarkdown": "yes"}, "handlers": {}, "isCodeManaged": false, "parentId": "mmh30t1tiyv5mi9s", "position": 0}
9
+ {"id": "up5ofq0drv233umy", "type": "section", "content": {"containerBackgroundColor": "#FFD8CD", "isCollapsible": "yes", "title": "\ud83d\ude4b\u200d\u2640\ufe0f Ask questions"}, "handlers": {}, "isCodeManaged": false, "parentId": "lon4vs20gd3myh7e", "position": 3}
10
+ {"id": "9tlgkbsuw9wgpocp", "type": "text", "content": {"text": "**Explore trending topics, recent product releases, open-source projects, or job listings.** The LLM, Palmyra X 004, interprets user queries, calls the Writer RAG tool to retrieve relevant Hacker News posts, and generates responses.", "useMarkdown": "yes"}, "handlers": {}, "isCodeManaged": false, "parentId": "up5ofq0drv233umy", "position": 0}
11
+ {"id": "tmi69i8jt7u50b99", "type": "section", "content": {"containerBackgroundColor": "#D4B2F7", "isCollapsible": "yes", "title": "\ud83d\udcc4 Generate a report"}, "handlers": {}, "isCodeManaged": false, "parentId": "lon4vs20gd3myh7e", "position": 4}
12
+ {"id": "taarom6dd93ryw1k", "type": "text", "content": {"text": "**Automatically summarize the most popular topics in one click.** All posts are loaded into Palmyra X 004's 128k context window, which are analyzed to produce a structured report highlighting key trends and insights.", "useMarkdown": "yes"}, "handlers": {}, "isCodeManaged": false, "parentId": "tmi69i8jt7u50b99", "position": 0}
13
+ {"id": "5myjrizpz5uxilif", "type": "text", "content": {"text": "#### \ud83c\udfd7\ufe0f <a href=\"https://writer.com/engineering/rag-tool/\" target=\"_blank\">Build your own RAG app</a>. | \ud83d\udcbb <a href=\"https://github.com/writer/framework-tutorials/tree/main/hacker-news-social-listener\">Get the code.</a>", "useMarkdown": "yes"}, "handlers": {}, "isCodeManaged": false, "parentId": "lon4vs20gd3myh7e", "position": 5, "visible": {"binding": "", "expression": true, "reversed": false}}
14
+ {"id": "wcljpgx0f50y5kac", "type": "sliderinput", "binding": {"eventType": "wf-number-change", "stateRef": "fetch_limit"}, "content": {"label": "Pull the last N posts", "maxValue": "500", "minValue": "10", "stepSize": "10"}, "handlers": {}, "isCodeManaged": false, "parentId": "lon4vs20gd3myh7e", "position": 6, "visible": {"binding": "", "expression": true, "reversed": false}}
15
+ {"id": "zw8nru7f036fdubh", "type": "horizontalstack", "content": {}, "handlers": {}, "isCodeManaged": false, "parentId": "lon4vs20gd3myh7e", "position": 7}
16
+ {"id": "tk9h8c6f6kf44x7z", "type": "switchinput", "binding": {"eventType": "wf-toggle", "stateRef": "allow_comments"}, "content": {"label": "Include comments"}, "handlers": {}, "isCodeManaged": false, "parentId": "zw8nru7f036fdubh", "position": 0}
17
+ {"id": "cwgi42r2nxzja4gg", "type": "button", "content": {"text": "Fetch posts"}, "handlers": {"wf-click": "fetch_posts"}, "isCodeManaged": false, "parentId": "zw8nru7f036fdubh", "position": 1}
18
+ {"id": "yo212stq5so5c5au", "type": "tab", "content": {"name": "Raw data"}, "handlers": {}, "isCodeManaged": false, "parentId": "m7luxumzscv65i09", "position": 1, "visible": {"binding": "", "expression": true, "reversed": false}}
19
+ {"id": "7gzo3w8vnbqvld8r", "type": "heading", "content": {"text": "Scraped posts"}, "handlers": {}, "isCodeManaged": false, "parentId": "yo212stq5so5c5au", "position": 0, "visible": {"binding": "", "expression": true, "reversed": false}}
20
+ {"id": "46q42e5z5jninm4o", "type": "dataframe", "content": {"dataframe": "@{posts}", "enableDownload": "yes", "enableSearch": "yes", "showIndex": "no", "useMarkdown": "yes", "wrapText": "yes"}, "handlers": {}, "isCodeManaged": false, "parentId": "yo212stq5so5c5au", "position": 1, "visible": {"binding": "", "expression": true, "reversed": false}}
21
+ {"id": "2d3gfh1tavmi1iab", "type": "heading", "content": {"text": "Scraped comments"}, "handlers": {}, "isCodeManaged": false, "parentId": "yo212stq5so5c5au", "position": 2, "visible": {"binding": "allow_comments", "expression": "custom", "reversed": false}}
22
+ {"id": "q0275uf0oldds8v8", "type": "dataframe", "content": {"dataframe": "@{comments}", "enableDownload": "yes", "enableSearch": "yes", "showIndex": "no", "useMarkdown": "yes"}, "handlers": {}, "isCodeManaged": false, "parentId": "yo212stq5so5c5au", "position": 3, "visible": {"binding": "allow_comments", "expression": "custom", "reversed": false}}
23
+ {"id": "8eghbz6jlckh3zmd", "type": "tab", "content": {"name": "Chat with Hacker News"}, "handlers": {}, "isCodeManaged": false, "parentId": "m7luxumzscv65i09", "position": 2, "visible": {"binding": "", "expression": true, "reversed": false}}
24
+ {"id": "rpi88dvxmlxr0qd9", "type": "columns", "content": {}, "handlers": {}, "isCodeManaged": false, "parentId": "8eghbz6jlckh3zmd", "position": 0}
25
+ {"id": "rosw32keaejygiir", "type": "column", "content": {"width": "13"}, "handlers": {}, "isCodeManaged": false, "parentId": "rpi88dvxmlxr0qd9", "position": 0}
26
+ {"id": "crm2bdbrjclid4k4", "type": "chatbot", "content": {"conversation": "@{conversation}", "useMarkdown": "yes"}, "handlers": {"wf-chatbot-message": "message_handler"}, "isCodeManaged": false, "parentId": "rosw32keaejygiir", "position": 0}
27
+ {"id": "814lb9dktd2e1ye3", "type": "horizontalstack", "content": {"contentHAlign": "start"}, "handlers": {}, "isCodeManaged": false, "parentId": "rosw32keaejygiir", "position": 1}
28
+ {"id": "t171dwz5muor2n9x", "type": "text", "content": {"alignment": "center", "text": "@{contributing_sources_button_text}", "useMarkdown": "yes"}, "handlers": {"wf-click": "contributing_sources_change_vis"}, "isCodeManaged": false, "parentId": "814lb9dktd2e1ye3", "position": 0}
29
+ {"id": "contributed_sources", "type": "column", "content": {"cssClasses": "files-list", "title": "Contributing sources", "width": "7"}, "handlers": {}, "isCodeManaged": false, "parentId": "rpi88dvxmlxr0qd9", "position": 1, "visible": {"binding": "contributing_sources_vis", "expression": "custom", "reversed": false}}
30
+ {"id": "erkqharzgysk5s3n", "type": "tab", "content": {"name": "Generate trend report"}, "handlers": {}, "isCodeManaged": false, "parentId": "m7luxumzscv65i09", "position": 3, "visible": {"binding": "", "expression": true, "reversed": false}}
31
+ {"id": "vzbrrx0dlp3skcn2", "type": "message", "content": {"message": "@{message_report}"}, "handlers": {}, "isCodeManaged": false, "parentId": "erkqharzgysk5s3n", "position": 0, "visible": {"binding": "message_report_vis", "expression": "custom", "reversed": false}}
32
+ {"id": "osvuzxnivfs3qrut", "type": "text", "content": {"text": "@{prepared_report}", "useMarkdown": "yes"}, "handlers": {}, "isCodeManaged": false, "parentId": "erkqharzgysk5s3n", "position": 1}
33
+ {"id": "w2hg99rv80b12tpt", "type": "button", "content": {"text": "Generate report"}, "handlers": {"wf-click": "run_report"}, "isCodeManaged": false, "parentId": "erkqharzgysk5s3n", "position": 2, "visible": {"binding": "", "expression": true, "reversed": false}}
34
+ {"id": "6ugh4p7vlu1j7fvp", "type": "text", "content": {"alignment": "center", "text": "Made with \u2764\ufe0f by <a href=\"http://www.writer.com\">Writer</a>.", "useMarkdown": "yes"}, "handlers": {}, "parentId": "c0f99a9e-5004-4e75-a6c6-36f17490b134", "position": 2}
hacker-news-social-listener/.wf/components-root.jsonl ADDED
@@ -0,0 +1 @@
 
 
1
+ {"id": "root", "type": "root", "content": {"appName": "Hacker News Listener"}, "handlers": {}, "isCodeManaged": false, "position": 0, "visible": {"binding": "", "expression": true, "reversed": false}}
hacker-news-social-listener/.wf/components-workflows_root.jsonl ADDED
@@ -0,0 +1 @@
 
 
1
+ {"id": "workflows_root", "type": "workflows_root", "content": {}, "handlers": {}, "isCodeManaged": false, "position": 0, "visible": {"binding": "", "expression": true, "reversed": false}}
hacker-news-social-listener/.wf/metadata.json ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ {
2
+ "writer_version": "0.8.2"
3
+ }
hacker-news-social-listener/README.md ADDED
@@ -0,0 +1,45 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Hacker News Listener
2
+ This application is built using the Writer Framework and is designed to scrape the top posts and comments from Hacker News. It processes the data, uploads it to a Writer Knowledge Graph for further analysis, and generates AI-powered insights based on the content of the posts.
3
+
4
+ ## Usage
5
+
6
+ 1. Select the number of items you wish to process.
7
+ 2. The application will generate raw data with analysis of posts and comments.
8
+ 3. Ask specific questions using the Knowledge Graph chat.
9
+ 4. Generate a detailed report from the processed data using the Prepared Report feature.
10
+
11
+ ## Running the application
12
+ First, ensure you have Poetry installed. Then, in the project directory, install the dependencies by running:
13
+
14
+ ```sh
15
+ poetry install
16
+ ```
17
+
18
+ To build this application, you'll need to sign up for [Writer AI Studio](https://app.writer.com/aistudio/signup?utm_campaign=devrel), create a new API Key and Knowledge Graph. To pass your API key and Knowledge Graph to the Writer Framework, you'll need to set an environment variables called `WRITER_API_KEY` and `GRAPH_ID`:
19
+ ```sh
20
+ export WRITER_API_KEY=your-api-key
21
+ ```
22
+ ```sh
23
+ export GRAPH_ID=your-graph-id
24
+ ```
25
+
26
+ You can also set the `WRITER_API_KEY` and `GRAPH_ID` in the `.env` file.
27
+
28
+ To make changes or edit the application, navigate to root folder and use the following command:
29
+
30
+
31
+ ```sh
32
+ writer edit .
33
+ ```
34
+
35
+ Once you're ready to run the application, execute:
36
+
37
+ ```sh
38
+ writer run .
39
+ ```
40
+
41
+ To learn more, check out the [full documentation for Writer Framework](https://dev.writer.com/framework/introduction).
42
+
43
+ ## About Writer
44
+
45
+ Writer is the full-stack generative AI platform for enterprises. Quickly and easily build and deploy generative AI apps with a suite of developer tools fully integrated with our platform of LLMs, graph-based RAG tools, AI guardrails, and more. Learn more at [writer.com](https://www.writer.com?utm_source=github&utm_medium=readme&utm_campaign=framework).
hacker-news-social-listener/main.py ADDED
@@ -0,0 +1,329 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import asyncio
2
+ import os
3
+ from datetime import datetime
4
+ from pathlib import Path
5
+ from typing import Any, List, Optional
6
+
7
+ import aiohttp
8
+ import pandas as pd
9
+ import requests
10
+ import writer as wf
11
+ from aiohttp import ClientSession
12
+ from dotenv import load_dotenv
13
+ from prompts import report_prompt
14
+ from writer import WriterState
15
+ from writer.ai import (
16
+ Conversation,
17
+ File,
18
+ list_files,
19
+ retrieve_file,
20
+ retrieve_graph,
21
+ upload_file,
22
+ )
23
+
24
+ load_dotenv()
25
+
26
+ GRAPH_ID = os.getenv("GRAPH_ID", "")
27
+ HACKERNEWS_API_URL = os.getenv("HACKERNEWS_API_URL", "")
28
+ WRITER_API_KEY = os.getenv("WRITER_API_KEY", "")
29
+
30
+ wf.Config.feature_flags = ["dataframeEditor"]
31
+
32
+
33
+ def main(state: WriterState) -> None:
34
+ _delete_files_from_graph(GRAPH_ID)
35
+ state["message_setup"] = "%Scraping data"
36
+ state["message_setup_vis"] = True
37
+
38
+ posts, comments = _scrape_hackernews(state)
39
+ state["message_setup"] = "%Data was scraped"
40
+ state["posts"] = posts[["title", "created_utc", "score", "num_comments", "url"]] if posts is not None else pd.DataFrame()
41
+ if state["allow_comments"]:
42
+ state["comments"] = comments[["body", "author", "created_utc"]] if comments is not None else pd.DataFrame()
43
+ state["message_setup"] = "%Scraped data, now saving to csv"
44
+
45
+ _save_results_to_csv(state)
46
+ state["message_setup"] = "%Saved data, now uploading to KG"
47
+
48
+ files_path = "static/hackernews_posts.csv"
49
+ _upload_file_and_add_to_graph(files_path, GRAPH_ID)
50
+ state["message_setup"] = "%Uploaded file and added to graph"
51
+
52
+ if state["allow_comments"]:
53
+ file_path = "static/hackernews_comments.csv"
54
+ _upload_file_and_add_to_graph(file_path, GRAPH_ID)
55
+
56
+ state["message_setup"] = "+Scrapping is completed!"
57
+ state["message_setup_vis"] = False
58
+
59
+
60
+ def _delete_files_from_graph(graph_id: str) -> None:
61
+ try:
62
+ graph = retrieve_graph(graph_id=graph_id)
63
+ graph_files = list_files(config={"extra_query": {"graph_id": graph_id}})
64
+
65
+ if not graph_files:
66
+ print("No files found in the specified graph.")
67
+
68
+ for file_id in graph_files:
69
+ graph.remove_file(file_id.id)
70
+
71
+ except Exception as e:
72
+ print(f"An error while file deletion occurred: {str(e)}")
73
+
74
+
75
+ def _get_file_from_graph(file_id: str) -> Optional[File]:
76
+ try:
77
+ return retrieve_file(file_id=file_id)
78
+ except Exception as e:
79
+ print(f"An error while file obtainment occurred: {str(e)}")
80
+ return None
81
+
82
+
83
+ def _scrape_hackernews(state: WriterState) -> tuple[Any, Any]:
84
+ stories_ids = _get_stories_ids(state)
85
+ posts, comments_ids = _get_posts(stories_ids)
86
+ comments = _get_comments(comments_ids)
87
+
88
+ if len(posts) > 0:
89
+ state["posts"] = pd.DataFrame(posts).sort_values(
90
+ by=["score", "num_comments"], ascending=False
91
+ )
92
+
93
+ if len(comments) > 0:
94
+ state["comments"] = pd.DataFrame(comments)
95
+
96
+ return state["posts"], state["comments"]
97
+
98
+
99
+ def _get_stories_ids(state: WriterState) -> List[str]:
100
+ top_stories_url = f"{HACKERNEWS_API_URL}/topstories.json"
101
+ try:
102
+ response = requests.get(url=top_stories_url, timeout=5)
103
+
104
+ if response.status_code != 200:
105
+ print("Failed to fetch data from Hacker News")
106
+ return []
107
+
108
+ return response.json()[: int(state["fetch_limit"])]
109
+ except Exception as e:
110
+ print(f"Failed to fetch story ids from Hacker News: {str(e)}")
111
+ return []
112
+
113
+
114
+ def _get_posts(stories_ids: List[str]) -> (List[dict], List[int]):
115
+ try:
116
+ stories_urls = [
117
+ f"{HACKERNEWS_API_URL}/item/{story_id}.json" for story_id in stories_ids
118
+ ]
119
+ stories = asyncio.run(_perform_calls(stories_urls))
120
+
121
+ comments_ids = []
122
+ posts_data = []
123
+
124
+ for story in stories:
125
+ posts_data.append(
126
+ {
127
+ "post_id": str(story.get("id", "")),
128
+ "title": story.get("title", ""),
129
+ "author": story.get("by", ""),
130
+ "score": int(story.get("score", 0)),
131
+ "created_utc": datetime.fromtimestamp(story.get("time")).strftime(
132
+ "%Y-%m-%d %H:%M:%S"
133
+ ),
134
+ "num_comments": int(story.get("descendants", 0)),
135
+ "url": story.get("url", ""),
136
+ }
137
+ )
138
+
139
+ kids = story.get("kids", [])
140
+ if kids:
141
+ comments_ids += kids
142
+
143
+ return posts_data, comments_ids
144
+ except Exception as e:
145
+ print(f"Failed to fetch stories from Hacker News: {str(e)}")
146
+ return ([], [])
147
+
148
+
149
+ def _get_comments(comments_ids: List[str]) -> List[dict]:
150
+ try:
151
+ comments_urls = [
152
+ f"{HACKERNEWS_API_URL}/item/{comment_id}.json"
153
+ for comment_id in comments_ids
154
+ ]
155
+ comments_data = []
156
+
157
+ comments = asyncio.run(_perform_calls(comments_urls))
158
+
159
+ for comment in comments:
160
+ comments_data.append(
161
+ {
162
+ "comment_id": str(comment.get("id", "")),
163
+ "post_id": str(comment.get("parent", "")),
164
+ "author": comment.get("by", "anonymous"),
165
+ "created_utc": datetime.fromtimestamp(comment.get("time")).strftime(
166
+ "%Y-%m-%d %H:%M:%S"
167
+ ),
168
+ "body": comment.get("text", ""),
169
+ }
170
+ )
171
+
172
+ return comments_data
173
+ except Exception as e:
174
+ print(f"Failed to fetch comments from Hacker News: {str(e)}")
175
+ return []
176
+
177
+
178
+ async def _fetch_data(session: ClientSession, url: str) -> str:
179
+ async with session.get(url) as response:
180
+ return await response.json()
181
+
182
+
183
+ async def _perform_calls(urls: List[str]) -> List[dict]:
184
+ async with aiohttp.ClientSession() as session:
185
+ tasks = [_fetch_data(session, url) for url in urls]
186
+ results = await asyncio.gather(*tasks)
187
+
188
+ return results
189
+
190
+
191
+ def _save_results_to_csv(state: WriterState) -> None:
192
+ state["posts"].to_csv("static/hackernews_posts.csv", index=False)
193
+ if state["allow_comments"]:
194
+ state["comments"].to_csv("static/hackernews_comments.csv", index=False)
195
+
196
+
197
+ def _upload_file_and_add_to_graph(file_path: str, graph_id: str) -> dict:
198
+ try:
199
+ file_id = _upload_file(file_path)
200
+ _add_file_to_graph(graph_id, file_id)
201
+
202
+ return {"file_id": file_id, "graph_id": graph_id}
203
+ except Exception as e:
204
+ print(f"An error while file uploading occurred: {str(e)}")
205
+ return {}
206
+
207
+
208
+ def _upload_file(file_path: str) -> str:
209
+ with open(file_path, "rb") as file:
210
+ uploaded_file = upload_file(
211
+ data=file.read(), name=Path(file.name).stem, type="text/csv"
212
+ )
213
+ return uploaded_file.id
214
+
215
+
216
+ def _add_file_to_graph(graph_id: str, file_id: str) -> None:
217
+ graph = retrieve_graph(graph_id)
218
+ graph.add_file(file_id)
219
+
220
+
221
+ def _handle_contributing_sources(state: WriterState, graph_data: dict) -> None:
222
+ sources = graph_data.get("sources")
223
+ if sources:
224
+ with wf.init_ui() as ui:
225
+ with ui.refresh_with("contributed_sources"):
226
+ for index, source in enumerate(sources):
227
+ source_file = _get_file_from_graph(source["file_id"])
228
+ source_snippet = source["snippet"]
229
+ ui.Section(
230
+ content={
231
+ "title": "📄 " + source_file.name,
232
+ "cssClasses": "file",
233
+ },
234
+ id=f"source {index}",
235
+ )
236
+ with ui.find(f"source {index}"):
237
+ ui.Text({"text": source_snippet, "cssClasses": "file-text"})
238
+
239
+ state["contributing_sources_vis"] = True
240
+ state["contributing_sources_button_text"] = "View contributing sources ▸"
241
+
242
+
243
+ def run_report(state: WriterState) -> None:
244
+ try:
245
+ state["message_report"] = "%Creating report"
246
+ state["message_report_vis"] = True
247
+
248
+ prompt = report_prompt(state["posts"], state["comments"])
249
+ report_convo = Conversation()
250
+ report_convo += {"role": "user", "content": prompt}
251
+ response = report_convo.stream_complete()
252
+
253
+ state["prepared_report"] = ""
254
+
255
+ for chunk in response:
256
+ state["prepared_report"] += chunk["content"]
257
+
258
+ state["message_report"] = "+Creation is finished!"
259
+ state["message_report_vis"] = False
260
+ except Exception as e:
261
+ state["prepared_report"] = "Something went wrong. Please try again!"
262
+ raise e
263
+
264
+
265
+ def fetch_posts(state: WriterState) -> None:
266
+ main(state)
267
+
268
+
269
+ def message_handler(payload: dict, state: WriterState) -> None:
270
+ try:
271
+ state.call_frontend_function("scripts", "enableDisableTextarea", ["true"])
272
+ state["conversation"] += payload
273
+
274
+ graph = retrieve_graph(GRAPH_ID)
275
+
276
+ response = state["conversation"].stream_complete(tools=graph)
277
+
278
+ for chunk in response:
279
+ state["conversation"] += chunk
280
+
281
+ graph_data = state["conversation"].messages[-1].get("graph_data")
282
+ if graph_data:
283
+ _handle_contributing_sources(state, graph_data)
284
+
285
+ state.call_frontend_function("scripts", "enableDisableTextarea", ["false"])
286
+ except Exception as e:
287
+ state["conversation"] += {
288
+ "role": "assistant",
289
+ "content": "Something went wrong. Please try again!",
290
+ }
291
+ state.call_frontend_function("scripts", "enableDisableTextarea", ["false"])
292
+ raise e
293
+
294
+
295
+ def contributing_sources_change_vis(state: WriterState) -> None:
296
+ state["contributing_sources_vis"] = not state["contributing_sources_vis"]
297
+ if state["contributing_sources_vis"]:
298
+ state["contributing_sources_button_text"] = "View contributing sources ▸"
299
+ else:
300
+ state["contributing_sources_button_text"] = "View contributing sources ◂"
301
+
302
+
303
+ initial_state = wf.init_state(
304
+ {
305
+ "conversation": Conversation(
306
+ [
307
+ {"role": "assistant", "content": "Ask me anything about the scraped Hacker News data."},
308
+ ],
309
+ ),
310
+ "response": None,
311
+ "file_path": "",
312
+ "graph_name": "",
313
+ "uploaded_file": None,
314
+ "graph_id": None,
315
+ "prepared_report": "# **Trend report**",
316
+ "contributing_sources_button_text": "View contributing sources ◂",
317
+ "message_setup": "",
318
+ "message_report": "",
319
+ "message_setup_vis": False,
320
+ "message_report_vis": False,
321
+ "contributing_sources_vis": False,
322
+ "fetch_limit": 100,
323
+ "allow_comments": True,
324
+ }
325
+ )
326
+
327
+ initial_state.import_frontend_module("scripts", "/static/custom.js")
328
+ initial_state.import_stylesheet("style", "/static/custom.css")
329
+ initial_state.call_frontend_function("scripts", "initSelectedDropdownOption", [])
hacker-news-social-listener/poetry.lock ADDED
The diff for this file is too large to render. See raw diff
 
hacker-news-social-listener/prompts.py ADDED
@@ -0,0 +1,34 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ def report_prompt(posts, comments):
2
+ return f"""
3
+ # CONTEXT #
4
+ You are an expert at analyzing large amounts of posts and comments
5
+ at social network for software developers called HackerNews. You are creating
6
+ summary reports of provided data. Furthermore, you are acting
7
+ really carefully outlining main trends, top posts and comments,
8
+ most famous topics and development approaches.
9
+
10
+ # INSTRUCTIONS #
11
+ Create an expertly written summary report.
12
+
13
+ # DATA #
14
+ Here are the posts and comments you will use to create the report.
15
+
16
+ Posts:
17
+ {posts}
18
+
19
+ Comments:
20
+ {comments}
21
+
22
+ # ADDITIONAL GUIDELINES #
23
+ - Reflect only top posts and comments. DO NOT reflect all data in your
24
+ report.
25
+ - FIT your report in 10-15 paragraphs. This is also very IMPORTANT.
26
+ - Say a few words about posts reflected at report.
27
+ - Provide some analysis of trends you are surveying: if users consider
28
+ theme useful or not, if they are pleased with it and so on.
29
+ - Outline most interesting, discussed and high rated comments and posts.
30
+
31
+ # RESPONSE FORMAT #
32
+
33
+ Highlight headers, topics, main ideas. Use .md markup to style your text.
34
+ """
hacker-news-social-listener/static/README.md ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ # Serving static files
2
+
3
+ You can use this folder to store files which will be served statically in the "/static" route.
4
+
5
+ This is useful to store images and other files which will be served directly to the user of your application.
6
+
7
+ For example, if you store an image named "myimage.jpg" in this folder, it'll be accessible as "static/myimage.jpg".
8
+ You can use this relative route as the source in an Image component.
hacker-news-social-listener/static/custom.css ADDED
@@ -0,0 +1,17 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ .file{
2
+ height: 60vh !important;
3
+ }
4
+
5
+ .file-text{
6
+ height: 51vh !important;
7
+ overflow-y: auto !important;
8
+ }
9
+
10
+ .files-list{
11
+ height: 84vh !important;
12
+ overflow-y: auto !important;
13
+ }
14
+
15
+ .CoreSection__title h3{
16
+ font-size: 1.05rem !important;
17
+ }
hacker-news-social-listener/static/custom.js ADDED
@@ -0,0 +1,5 @@
 
 
 
 
 
 
1
+ export function initSelectedDropdownOption() {
2
+ document.getElementsByClassName("subreddits")[0]
3
+ .getElementsByTagName("option")[0]
4
+ .selected = true
5
+ }
hacker-news-social-listener/static/favicon.png ADDED
pyproject.toml ADDED
@@ -0,0 +1,23 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ [tool.poetry]
2
+ name = "writer-framework-default"
3
+ version = "0.1.0"
4
+ description = ""
5
+ authors = ["Your Name <[email protected]>"]
6
+ readme = "README.md"
7
+
8
+ [tool.poetry.dependencies]
9
+ python = "^3.10.0"
10
+ writer = "^0.8.2"
11
+ praw = "^7.7.1"
12
+ black = "^24.8.0"
13
+ flake8 = "^7.1.1"
14
+ isort = "^5.13.2"
15
+ pre-commit = "^3.8.0"
16
+ python-dotenv = "^1.0.1"
17
+ aiohttp = "^3.10.10"
18
+ asyncio = "^3.4.3"
19
+
20
+
21
+ [build-system]
22
+ requires = ["poetry-core"]
23
+ build-backend = "poetry.core.masonry.api"