Spaces:
Sleeping
Sleeping
File size: 2,524 Bytes
a7c5a8a |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 |
import requests
from bs4 import BeautifulSoup
import pandas as pd
import gradio as gr
def scrape_104_jobs(url, num_pages=1):
headers = {
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.3'
}
all_jobs = []
for page in range(1, num_pages + 1):
page_url = url.replace('page=2', f'page={page}')
try:
response = requests.get(page_url, headers=headers)
response.raise_for_status()
soup = BeautifulSoup(response.text, 'html.parser')
job_items = soup.find_all('article', class_='js-job-item')
for item in job_items:
job_link = item.find('a', class_='js-job-link')
company_name = item.find('ul', class_='b-list-inline b-clearfix')
job_loc = item.find('ul', class_='b-list-inline b-clearfix job-list-intro b-content')
if job_link and company_name and job_loc:
title = job_link.text.strip()
link = 'https:' + job_link['href'] if job_link['href'].startswith('//') else job_link['href']
company = company_name.find('a').text.strip()
location = job_loc.find('li').text.strip()
all_jobs.append({
'Job Title': title,
'Company': company,
'Location': location,
'Link': link
})
except requests.RequestException as e:
print(f"Error fetching page {page}: {e}")
continue
return pd.DataFrame(all_jobs)
def get_jobs_from_104(pages):
url = 'https://www.104.com.tw/jobs/search/?ro=0&kwop=7&keyword=AI&expansionType=area%2Cspec%2Ccom%2Cjob%2Cwf%2Cwktm&order=14&asc=0&page=2&mode=s&jobsource=index_s&langFlag=0&langStatus=0&recommendJob=1&hotJob=1'
df = scrape_104_jobs(url, num_pages=pages)
return df
# 使用 Gradio 構建網頁界面
def display_jobs(num_pages):
df = get_jobs_from_104(num_pages)
return df
# 建立Gradio介面
interface = gr.Interface(
fn=display_jobs, # 呼叫的函數
inputs=gr.Number(label="Enter number of pages to scrape (1-5):"), # 用戶輸入的頁數
outputs="dataframe", # 輸出的格式為DataFrame
title="104 Job Scraper",
description="爬取 104 人力銀行上的職缺數據,輸入要抓取的頁數 (1-5)。"
)
# 啟動介面
interface.launch()
|