Corey Morris commited on
Commit
b58e1f0
·
1 Parent(s): cc32c4f

Updated download_file method

Browse files
details_data_processor.py CHANGED
@@ -6,6 +6,9 @@ import re
6
  import numpy as np
7
  import requests
8
  from urllib.parse import quote
 
 
 
9
 
10
  class DetailsDataProcessor:
11
  # Download
@@ -27,10 +30,50 @@ class DetailsDataProcessor:
27
  return matching_files # Return the list of matching filenames
28
 
29
  # download a file from a single url and save it to a local directory
 
 
 
 
 
 
 
 
 
 
30
  @staticmethod
31
- def download_file(url, filename):
32
- r = requests.get(url, allow_redirects=True)
33
- open(filename, 'wb').write(r.content)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
34
 
35
  @staticmethod
36
  def single_file_pipeline(url, filename):
 
6
  import numpy as np
7
  import requests
8
  from urllib.parse import quote
9
+ from datetime import datetime
10
+
11
+
12
 
13
  class DetailsDataProcessor:
14
  # Download
 
30
  return matching_files # Return the list of matching filenames
31
 
32
  # download a file from a single url and save it to a local directory
33
+ # @staticmethod
34
+ # def download_file(url, file_path):
35
+ # #TODO: I may not need to save the file. I can just read it in and convert to a dataframe
36
+ # r = requests.get(url, allow_redirects=True)
37
+ # open(file_path, 'wb').write(r.content)
38
+ # # return dataframe
39
+ # df = pd.DataFrame(r.content)
40
+ # return df
41
+
42
+
43
  @staticmethod
44
+ def download_file(url, save_file_path):
45
+ # Get the current date and time
46
+ timestamp = datetime.now()
47
+
48
+ # Format the timestamp as a string, suitable for use in a filename
49
+ filename_timestamp = timestamp.strftime("%Y-%m-%dT%H-%M-%S")
50
+
51
+ # Example usage in a filename
52
+ save_file_path = save_file_path + filename_timestamp + ".json"
53
+
54
+ print(save_file_path) # Output will be something like "results_2023-08-20T12-34-56.txt"
55
+
56
+ try:
57
+ # Sending a GET request
58
+ r = requests.get(url, allow_redirects=True)
59
+ r.raise_for_status() # Raises an HTTPError if the HTTP request returned an unsuccessful status code
60
+
61
+ # Writing the content to the specified file
62
+ with open(save_file_path, 'wb') as file:
63
+ file.write(r.content)
64
+
65
+ print(f"Successfully downloaded file: {save_file_path}")
66
+ except requests.ConnectionError:
67
+ print(f"Failed to connect to the URL: {url}")
68
+ except requests.HTTPError as e:
69
+ print(f"HTTP error occurred: {e}")
70
+ except FileNotFoundError:
71
+ print(f"File not found at path: {save_file_path}")
72
+ except Exception as e:
73
+ print(f"An unexpected error occurred: {e}")
74
+
75
+ return None
76
+
77
 
78
  @staticmethod
79
  def single_file_pipeline(url, filename):
test_details_data_processing.py CHANGED
@@ -16,7 +16,7 @@ class TestDetailsDataProcessor(unittest.TestCase):
16
  # self.assertIsInstance(data, pd.DataFrame)
17
 
18
  def test_download_file(self):
19
- DetailsDataProcessor.download_file('https://www.google.com', 'test.html')
20
  self.assertTrue(os.path.exists('test.html'))
21
  os.remove('test.html')
22
 
 
16
  # self.assertIsInstance(data, pd.DataFrame)
17
 
18
  def test_download_file(self):
19
+ DetailsDataProcessor.download_file('https://www.google.com', 'test_file_please_remove')
20
  self.assertTrue(os.path.exists('test.html'))
21
  os.remove('test.html')
22