Spaces:
Sleeping
Sleeping
import requests | |
from bs4 import BeautifulSoup | |
import pandas as pd | |
import gradio as gr | |
def scrape_104_jobs(url, num_pages=1): | |
headers = { | |
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.3' | |
} | |
all_jobs = [] | |
for page in range(1, num_pages + 1): | |
page_url = url.replace('page=2', f'page={page}') | |
try: | |
response = requests.get(page_url, headers=headers) | |
response.raise_for_status() | |
soup = BeautifulSoup(response.text, 'html.parser') | |
job_items = soup.find_all('article', class_='js-job-item') | |
for item in job_items: | |
job_link = item.find('a', class_='js-job-link') | |
company_name = item.find('ul', class_='b-list-inline b-clearfix') | |
job_loc = item.find('ul', class_='b-list-inline b-clearfix job-list-intro b-content') | |
if job_link and company_name and job_loc: | |
title = job_link.text.strip() | |
link = 'https:' + job_link['href'] if job_link['href'].startswith('//') else job_link['href'] | |
company = company_name.find('a').text.strip() | |
location = job_loc.find('li').text.strip() | |
all_jobs.append({ | |
'Job Title': title, | |
'Company': company, | |
'Location': location, | |
'Link': link | |
}) | |
except requests.RequestException as e: | |
print(f"Error fetching page {page}: {e}") | |
continue | |
return pd.DataFrame(all_jobs) | |
def get_jobs_from_104(pages): | |
url = 'https://www.104.com.tw/jobs/search/?ro=0&kwop=7&keyword=AI&expansionType=area%2Cspec%2Ccom%2Cjob%2Cwf%2Cwktm&order=14&asc=0&page=2&mode=s&jobsource=index_s&langFlag=0&langStatus=0&recommendJob=1&hotJob=1' | |
df = scrape_104_jobs(url, num_pages=pages) | |
return df | |
# 使用 Gradio 構建網頁界面 | |
def display_jobs(num_pages): | |
df = get_jobs_from_104(num_pages) | |
return df | |
# 建立Gradio介面 | |
interface = gr.Interface( | |
fn=display_jobs, # 呼叫的函數 | |
inputs=gr.Number(label="Enter number of pages to scrape (1-5):"), # 用戶輸入的頁數 | |
outputs="dataframe", # 輸出的格式為DataFrame | |
title="104 Job Scraper", | |
description="爬取 104 人力銀行上的職缺數據,輸入要抓取的頁數 (1-5)。" | |
) | |
# 啟動介面 | |
interface.launch() | |