Spaces:

SoulofSukuna
/

Tor-Search-Api

Sleeping

SoulofSukuna commited on Sep 5, 2024

Commit

46ed472

verified ·

1 Parent(s): 326883a

Update helper/html_scraper.py

Files changed (1) hide show

helper/html_scraper.py CHANGED Viewed

@@ -7,7 +7,6 @@ from constants.headers import HEADER_AIO
 HTTP_PROXY = os.environ.get("HTTP_PROXY", None)
 class Scraper:
     @decorator_asyncio_fix
     async def _get_html(self, session, url):
@@ -21,23 +20,20 @@ class Scraper:
         return await asyncio.gather(asyncio.create_task(self._get_html(session, url)))
-class AsyncCloudscraper:
     def __init__(self):
         self.scraper = cloudscraper.create_scraper()
-    @decorator_asyncio_fix
-    async def _get_html(self, url):
-        loop = asyncio.get_running_loop()
-        return await loop.run_in_executor(None, self._sync_scrape, url)
-    def _sync_scrape(self, url):
         try:
-            response = self.scraper.get(url, headers=HEADER_AIO)
             return response.text
         except Exception as e:
-            print(f"Error occurred while fetching {url}: {e}")
             return None
-    async def get_all_results(self, urls):
-        tasks = [asyncio.create_task(self._get_html(url)) for url in urls]
-        return await asyncio.gather(*tasks)

 HTTP_PROXY = os.environ.get("HTTP_PROXY", None)
 class Scraper:
     @decorator_asyncio_fix
     async def _get_html(self, session, url):
         return await asyncio.gather(asyncio.create_task(self._get_html(session, url)))
+class CloudScraper:
     def __init__(self):
         self.scraper = cloudscraper.create_scraper()
+    def _get_html(self, url):
         try:
+            response = self.scraper.get(url, headers=HEADER_AIO, proxies={'http': HTTP_PROXY, 'https': HTTP_PROXY})
+            response.raise_for_status()  # Raise an error for bad responses
             return response.text
         except Exception as e:
+            print(f"Error fetching {url}: {e}")
             return None
+    async def get_all_results(self, url):
+        loop = asyncio.get_event_loop()
+        with ThreadPoolExecutor() as pool:
+            return await loop.run_in_executor(pool, self._get_html, url)