{ "cells": [ { "cell_type": "code", "execution_count": 6, "id": "7a478c86", "metadata": { "ExecuteTime": { "end_time": "2023-07-31T01:55:12.877222Z", "start_time": "2023-07-31T01:55:12.874203Z" } }, "outputs": [], "source": [ "import requests\n", "import os" ] }, { "cell_type": "code", "execution_count": 12, "id": "61571547", "metadata": { "ExecuteTime": { "end_time": "2023-07-31T01:57:23.295679Z", "start_time": "2023-07-31T01:57:23.292514Z" } }, "outputs": [], "source": [ "t = 2314" ] }, { "cell_type": "code", "execution_count": 9, "id": "5d7eda84", "metadata": { "ExecuteTime": { "end_time": "2023-07-31T01:56:12.033568Z", "start_time": "2023-07-31T01:56:12.025127Z" } }, "outputs": [], "source": [ "def download_java_files(url, path):\n", " global t\n", " # Send GET request to retrieve the folder contents\n", " response = requests.get(url, params={\"ref\": branch})\n", " if response.status_code == 200:\n", " # Parse the response JSON\n", " contents = response.json()\n", "\n", " for item in contents:\n", " if item[\"type\"] == \"file\" and item[\"name\"].endswith(\".java\"):\n", " # Download Java file\n", " download_url = item[\"download_url\"]\n", " file_name = \"train_\"+str(t)\n", " output_file_path = f\"Desktop/MITACS/Dataset/inp-txt/{file_name}\"\n", "\n", " # Send GET request to download the file content\n", " file_content = requests.get(download_url).text\n", "\n", " # Save the file to the output directory\n", " with open(output_file_path, \"w\") as output_file:\n", " output_file.write(file_content)\n", "\n", " print(f\"Downloaded: {file_name}\")\n", " t+=1\n", " elif item[\"type\"] == \"dir\":\n", " # Recursively navigate into subfolders\n", " subfolder_url = item[\"url\"]\n", " subfolder_path = f\"{path}/{item['name']}\"\n", " download_java_files(subfolder_url, subfolder_path)\n", " else:\n", " print(\"Failed to retrieve folder contents.\")" ] }, { "cell_type": "code", "execution_count": 14, "id": "dafd2b43", "metadata": { "ExecuteTime": { "end_time": "2023-07-31T01:59:48.822445Z", "start_time": "2023-07-31T01:59:48.667420Z" } }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Failed to retrieve folder contents.\n", "2314\n" ] } ], "source": [ "# GitHub repository details\n", "repo = \"leakcanary\"\n", "owner = \"square\"\n", "branch = \"main\" # Specify the branch you want to download from\n", "output_directory = \"DesktopDataset/inp-txt\" # Specify the directory to save the downloaded files\n", "# API endpoint to get the list of files in the repository\n", "url = f\"https://api.github.com/repos/{owner}/{repo}/contents\"\n", "download_java_files(url, output_directory)\n", "print(t)\n" ] }, { "cell_type": "code", "execution_count": null, "id": "f51e193d", "metadata": {}, "outputs": [], "source": [] } ], "metadata": { "kernelspec": { "display_name": "Python 3.10 (tensorflown)", "language": "python", "name": "tensorflown" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.10.12" }, "toc": { "base_numbering": 1, "nav_menu": {}, "number_sections": true, "sideBar": true, "skip_h1_title": false, "title_cell": "Table of Contents", "title_sidebar": "Contents", "toc_cell": false, "toc_position": {}, "toc_section_display": true, "toc_window_display": false } }, "nbformat": 4, "nbformat_minor": 5 }