Spaces:
Runtime error
Runtime error
add job posting scrape capability to find my match page
Browse files- main.py +15 -5
- scrape_onet.py +21 -1
- templates/find_match.html +13 -2
- templates/find_my_match.html +10 -4
- templates/job_list.html +2 -2
main.py
CHANGED
@@ -17,7 +17,7 @@ from uuid import uuid1
|
|
17 |
from localStoragePy import localStoragePy
|
18 |
localStorage = localStoragePy('pathfinder', 'text')
|
19 |
|
20 |
-
from scrape_onet import get_onet_code, get_onet_description, get_onet_tasks
|
21 |
from match_utils import neighborhoods, get_resume, skillNER, sim_result_loop, get_links, coSkillEmbed, sim_result_loop_jobFinder, sim_result_loop_candFinder
|
22 |
from user_utils import Hash
|
23 |
|
@@ -143,6 +143,12 @@ def get_matches(request: Request):
|
|
143 |
@app.post('/find-my-match/', response_class=HTMLResponse)
|
144 |
async def post_matches(request: Request, bt: BackgroundTasks, resume: UploadFile = File(...)):
|
145 |
|
|
|
|
|
|
|
|
|
|
|
|
|
146 |
username = localStorage.getItem('username')
|
147 |
|
148 |
def add_data_to_db(resume):
|
@@ -164,13 +170,17 @@ async def post_matches(request: Request, bt: BackgroundTasks, resume: UploadFile
|
|
164 |
bt.add_task(add_data_to_db, resume)
|
165 |
bt.add_task(get_jobs_from_db, resume)
|
166 |
|
167 |
-
return templates.TemplateResponse('find_my_match.html', context={'request': request, 'resume': resume, 'skills': skills, 'simResults': simResults[0], 'links': links})
|
168 |
|
169 |
@app.get("/find-match/", response_class=HTMLResponse)
|
170 |
def find_match(request: Request):
|
171 |
-
|
172 |
-
|
173 |
-
|
|
|
|
|
|
|
|
|
174 |
|
175 |
@app.get("/find-my-hire/", response_class=HTMLResponse)
|
176 |
def get_hires(request: Request):
|
|
|
17 |
from localStoragePy import localStoragePy
|
18 |
localStorage = localStoragePy('pathfinder', 'text')
|
19 |
|
20 |
+
from scrape_onet import get_onet_code, get_onet_description, get_onet_tasks, get_job_postings
|
21 |
from match_utils import neighborhoods, get_resume, skillNER, sim_result_loop, get_links, coSkillEmbed, sim_result_loop_jobFinder, sim_result_loop_candFinder
|
22 |
from user_utils import Hash
|
23 |
|
|
|
143 |
@app.post('/find-my-match/', response_class=HTMLResponse)
|
144 |
async def post_matches(request: Request, bt: BackgroundTasks, resume: UploadFile = File(...)):
|
145 |
|
146 |
+
statelist = [ 'AK', 'AL', 'AR', 'AZ', 'CA', 'CO', 'CT', 'DC', 'DE', 'FL', 'GA',
|
147 |
+
'HI', 'IA', 'ID', 'IL', 'IN', 'KS', 'KY', 'LA', 'MA', 'MD', 'ME',
|
148 |
+
'MI', 'MN', 'MO', 'MS', 'MT', 'NC', 'ND', 'NE', 'NH', 'NJ', 'NM',
|
149 |
+
'NV', 'NY', 'OH', 'OK', 'OR', 'PA', 'RI', 'SC', 'SD', 'TN', 'TX',
|
150 |
+
'UT', 'VA', 'VT', 'WA', 'WI', 'WV', 'WY']
|
151 |
+
|
152 |
username = localStorage.getItem('username')
|
153 |
|
154 |
def add_data_to_db(resume):
|
|
|
170 |
bt.add_task(add_data_to_db, resume)
|
171 |
bt.add_task(get_jobs_from_db, resume)
|
172 |
|
173 |
+
return templates.TemplateResponse('find_my_match.html', context={'request': request, 'resume': resume, 'skills': skills, 'simResults': simResults[0], 'links': links, 'statelist': statelist})
|
174 |
|
175 |
@app.get("/find-match/", response_class=HTMLResponse)
|
176 |
def find_match(request: Request):
|
177 |
+
jobtitle = str(request.url).split("=")[1].replace('HTTP/1.1', '').replace("-", " ").replace("%2C", "").replace('&state', '')
|
178 |
+
state = str(request.url).split("=")[2]
|
179 |
+
onetCode = get_onet_code(jobtitle)
|
180 |
+
postings = get_job_postings(onetCode, state)
|
181 |
+
jobpostings = postings[0]
|
182 |
+
linklist = postings[1]
|
183 |
+
return templates.TemplateResponse('find_match.html', context={'request': request, 'jobpostings': jobpostings, 'linklist': linklist, 'jobtitle': jobtitle, 'state': state})
|
184 |
|
185 |
@app.get("/find-my-hire/", response_class=HTMLResponse)
|
186 |
def get_hires(request: Request):
|
scrape_onet.py
CHANGED
@@ -2,6 +2,7 @@ import requests
|
|
2 |
from bs4 import BeautifulSoup
|
3 |
from cleantext import clean
|
4 |
import pandas as pd
|
|
|
5 |
|
6 |
onet = pd.read_csv('static/ONET_JobTitles.csv')
|
7 |
headers = {'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/14.1.2 Safari/605.1.15'}
|
@@ -33,4 +34,23 @@ def get_onet_tasks(onetCode):
|
|
33 |
tasks = remove_new_line(tasks).replace("related occupations", " ").replace("core", " - ").replace(" )importance category task", "").replace(" find ", "")
|
34 |
tasks = tasks.split(". ")
|
35 |
tasks = [''.join(map(lambda c: '' if c in '0123456789-' else c, task)) for task in tasks]
|
36 |
-
return tasks
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
2 |
from bs4 import BeautifulSoup
|
3 |
from cleantext import clean
|
4 |
import pandas as pd
|
5 |
+
import numpy as np
|
6 |
|
7 |
onet = pd.read_csv('static/ONET_JobTitles.csv')
|
8 |
headers = {'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/14.1.2 Safari/605.1.15'}
|
|
|
34 |
tasks = remove_new_line(tasks).replace("related occupations", " ").replace("core", " - ").replace(" )importance category task", "").replace(" find ", "")
|
35 |
tasks = tasks.split(". ")
|
36 |
tasks = [''.join(map(lambda c: '' if c in '0123456789-' else c, task)) for task in tasks]
|
37 |
+
return tasks
|
38 |
+
|
39 |
+
def get_job_postings(onetCode, state):
|
40 |
+
headers = {'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/14.1.2 Safari/605.1.15'}
|
41 |
+
url = "https://www.onetonline.org/link/localjobs/" + onetCode + "?st=" + state
|
42 |
+
response = requests.get(url, headers=headers)
|
43 |
+
soup = BeautifulSoup(response.text, 'html.parser')
|
44 |
+
jobs = str(soup.get_text("tbody")).split('PostedtbodyTitle and CompanytbodyLocation')[1].split('Sources:')[0].split("tbody")
|
45 |
+
jobs = jobs[5:45]
|
46 |
+
starts = np.linspace(start=0, stop=len(jobs)-4,num= 10)
|
47 |
+
stops = np.linspace(start=3, stop=len(jobs)-1, num= 10)
|
48 |
+
jobpostings = []
|
49 |
+
for i in range(0,10):
|
50 |
+
jobpostings.append(str([' '.join(jobs[int(starts[i]):int(stops[i])])]).replace("['", '').replace("']", ''))
|
51 |
+
links = str(soup.find_all('a', href=True)).split("</small>")[1].split(', <a href="https://www.careeronestop.org/"')[0].split(' data-bs-toggle="modal" ')
|
52 |
+
linklist = []
|
53 |
+
for i in range(1, len(links)):
|
54 |
+
links[i] = "https://www.onetonline.org" + str(links[i]).split(' role="button">')[0].replace("href=", "")
|
55 |
+
linklist.append(links[i].replace('"', ''))
|
56 |
+
return jobpostings, linklist
|
templates/find_match.html
CHANGED
@@ -23,14 +23,25 @@
|
|
23 |
</header>
|
24 |
<main class="main">
|
25 |
<h1 class="pagetitle">Matching Jobs</h1>
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
26 |
<h2 class="pagesubtitle">We're sorry! This page is currently under construction.</h2>
|
27 |
<h2 class="pagesubtitle">Please check back soon to get {{ jobselection }} jobs that are a great match for your skillset and interests!</h2>
|
|
|
28 |
<br>
|
29 |
-
<br>
|
|
|
30 |
<footer class="footer">
|
31 |
<ul class="footer__text">
|
32 |
<li class="footer__text-item">© 2023 Pathfinder</li>
|
33 |
-
|
|
|
34 |
</footer>
|
35 |
</body>
|
36 |
</html>
|
|
|
23 |
</header>
|
24 |
<main class="main">
|
25 |
<h1 class="pagetitle">Matching Jobs</h1>
|
26 |
+
{% if jobpostings %}
|
27 |
+
<h2 class="pagesubtitle">Here are the top 10 {{ jobtitle }} jobs in {{ state }} that are a great match for your skillset and interests!</h2>
|
28 |
+
<ul class="sectionlist">
|
29 |
+
{% for n in range(10) %}
|
30 |
+
<li class="sectionlist__item"><a href={{ linklist[n] }}>{{ jobpostings[n] }}</a></li>
|
31 |
+
{% endfor %}
|
32 |
+
</ul>
|
33 |
+
{% else %}
|
34 |
<h2 class="pagesubtitle">We're sorry! This page is currently under construction.</h2>
|
35 |
<h2 class="pagesubtitle">Please check back soon to get {{ jobselection }} jobs that are a great match for your skillset and interests!</h2>
|
36 |
+
{% endif %}
|
37 |
<br>
|
38 |
+
<br>
|
39 |
+
</main>
|
40 |
<footer class="footer">
|
41 |
<ul class="footer__text">
|
42 |
<li class="footer__text-item">© 2023 Pathfinder</li>
|
43 |
+
<li class="footer__text-item">Job postings courtesy of <a class="footer__text-link" href="https://www.onetonline.org">onetonline.org</li>
|
44 |
+
</ul>
|
45 |
</footer>
|
46 |
</body>
|
47 |
</html>
|
templates/find_my_match.html
CHANGED
@@ -46,7 +46,7 @@
|
|
46 |
<article class="output__section">
|
47 |
<h2 class="output__subtitle">Job Matches</h3>
|
48 |
<p class="alert">(Note: You can click on the links to find out more.)</p>
|
49 |
-
<p class="alert">Instructions:
|
50 |
<form action="/find-match/" class="selection__form" method="GET">
|
51 |
<table>
|
52 |
<thead class="output__list">
|
@@ -71,9 +71,15 @@
|
|
71 |
</table>
|
72 |
<br>
|
73 |
<br>
|
74 |
-
<
|
75 |
-
|
76 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
77 |
</form>
|
78 |
</article>
|
79 |
{% endif %}
|
|
|
46 |
<article class="output__section">
|
47 |
<h2 class="output__subtitle">Job Matches</h3>
|
48 |
<p class="alert">(Note: You can click on the links to find out more.)</p>
|
49 |
+
<p class="alert">Instructions: Choose the radio button corresponding to the job title that is most closely aligned with your skills and interests"</p>
|
50 |
<form action="/find-match/" class="selection__form" method="GET">
|
51 |
<table>
|
52 |
<thead class="output__list">
|
|
|
71 |
</table>
|
72 |
<br>
|
73 |
<br>
|
74 |
+
<label for="state" class="form__label">Select the state in which you would like to work:</label>
|
75 |
+
<select name="state" class="form__dropdown" id="state">
|
76 |
+
{% for state in statelist %}
|
77 |
+
<option value="{{ state }}">{{ state }}</option>
|
78 |
+
{% endfor %}
|
79 |
+
</select>
|
80 |
+
<br>
|
81 |
+
<br>
|
82 |
+
<button type="submit" class="radio__submit">Submit</button>
|
83 |
</form>
|
84 |
</article>
|
85 |
{% endif %}
|
templates/job_list.html
CHANGED
@@ -47,11 +47,11 @@
|
|
47 |
{% if tasks %}
|
48 |
<section>
|
49 |
<h1 class="sectiontitle">Work Tasks:</h1>
|
50 |
-
<
|
51 |
{% for task in tasks %}
|
52 |
<li class="sectionlist__item">{{ task }}</li>
|
53 |
{% endfor %}
|
54 |
-
</
|
55 |
</section>
|
56 |
{% endif %}
|
57 |
{% if activities %}
|
|
|
47 |
{% if tasks %}
|
48 |
<section>
|
49 |
<h1 class="sectiontitle">Work Tasks:</h1>
|
50 |
+
<ul class="sectionlist">
|
51 |
{% for task in tasks %}
|
52 |
<li class="sectionlist__item">{{ task }}</li>
|
53 |
{% endfor %}
|
54 |
+
</ul>
|
55 |
</section>
|
56 |
{% endif %}
|
57 |
{% if activities %}
|