import requests from bs4 import BeautifulSoup import pandas as pd import gradio as gr def scrape_104_jobs(url, num_pages=1): headers = { 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.3' } all_jobs = [] for page in range(1, num_pages + 1): page_url = url.replace('page=2', f'page={page}') try: response = requests.get(page_url, headers=headers) response.raise_for_status() soup = BeautifulSoup(response.text, 'html.parser') job_items = soup.find_all('article', class_='js-job-item') for item in job_items: job_link = item.find('a', class_='js-job-link') company_name = item.find('ul', class_='b-list-inline b-clearfix') job_loc = item.find('ul', class_='b-list-inline b-clearfix job-list-intro b-content') if job_link and company_name and job_loc: title = job_link.text.strip() link = 'https:' + job_link['href'] if job_link['href'].startswith('//') else job_link['href'] company = company_name.find('a').text.strip() location = job_loc.find('li').text.strip() all_jobs.append({ 'Job Title': title, 'Company': company, 'Location': location, 'Link': link }) except requests.RequestException as e: print(f"Error fetching page {page}: {e}") continue return pd.DataFrame(all_jobs) def get_jobs_from_104(pages): url = 'https://www.104.com.tw/jobs/search/?ro=0&kwop=7&keyword=AI&expansionType=area%2Cspec%2Ccom%2Cjob%2Cwf%2Cwktm&order=14&asc=0&page=2&mode=s&jobsource=index_s&langFlag=0&langStatus=0&recommendJob=1&hotJob=1' df = scrape_104_jobs(url, num_pages=pages) return df # 使用 Gradio 構建網頁界面 def display_jobs(num_pages): df = get_jobs_from_104(num_pages) return df # 建立Gradio介面 interface = gr.Interface( fn=display_jobs, # 呼叫的函數 inputs=gr.Number(label="Enter number of pages to scrape (1-5):"), # 用戶輸入的頁數 outputs="dataframe", # 輸出的格式為DataFrame title="104 Job Scraper", description="爬取 104 人力銀行上的職缺數據,輸入要抓取的頁數 (1-5)。" ) # 啟動介面 interface.launch()