Spaces:
Runtime error
Runtime error
File size: 2,632 Bytes
a2ee974 5782e66 a2ee974 5782e66 566bba1 5782e66 a2ee974 77b7045 566bba1 f5f34c2 566bba1 a2ee974 14ee6ff a2ee974 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 |
# Created by Leandro Carneiro at 19/01/2024
# Description:
# ------------------------------------------------
import os.path
import time
from googleapiclient.discovery import build
import requests
from bs4 import BeautifulSoup
import constants
def google_search_api(search_term, api_key, cse_id, **kwargs):
try:
service = build("customsearch", "v1", developerKey=api_key)
res = service.cse().list(q=search_term, cx=cse_id, **kwargs).execute()
return res['items']
except Exception as e:
return -1
def search_google(subject, sites):
try:
results = []
for site in sites:
print(' Buscando notícias no domínio: ' + site)
query = f"{subject} site:{site}"
sites_searched = google_search_api(query, os.environ['GOOGLE_KEY'], os.environ['GOOGLE_SEARCH'], num=constants.num_sites)
if sites_searched == -1:
results.append(site)
else:
for s in sites_searched:
if 'pdf' not in s['link'].lower():
results.append(s['link'])
else:
print(' Arquivo PDF encontrado: ' + s['link'])
#time.sleep(3)
print(' Total de sites encontrados: ' + str(len(results)))
return results
except Exception as e:
print(str(e))
return str(e)
def retrieve_text_from_site(sites):
result = []
for site in sites:
print(' Baixando texto do site: ' + site)
try:
response = requests.get(site)
response.raise_for_status()
soup = BeautifulSoup(response.content, 'html.parser')
result.append(soup.get_text())
except Exception as e:
result.append('Erro na recuperação do texto: ' + str(e))
return result
def delete_base(local_base):
try:
for i in os.listdir(local_base):
file_path = os.path.join(local_base, i)
os.remove(file_path)
return 0
except Exception as e:
return str(e)
def save_on_base(sites, texts, local_base):
try:
for i in range(len(sites)):
filename = f'news{i}.txt'
with open(os.path.join(local_base, filename), 'w', encoding='utf-8') as file:
file.write(texts[i])
with open(os.path.join(local_base, 'filename_url.csv'), 'a', encoding='utf-8') as file:
file.write(filename + ';' + sites[i] + '\n')
return 0
except Exception as e:
return str(e)
|