celise88 commited on
Commit
24de2aa
·
1 Parent(s): 140f531

add job posting scrape capability to find my match page

Browse files
main.py CHANGED
@@ -17,7 +17,7 @@ from uuid import uuid1
17
  from localStoragePy import localStoragePy
18
  localStorage = localStoragePy('pathfinder', 'text')
19
 
20
- from scrape_onet import get_onet_code, get_onet_description, get_onet_tasks
21
  from match_utils import neighborhoods, get_resume, skillNER, sim_result_loop, get_links, coSkillEmbed, sim_result_loop_jobFinder, sim_result_loop_candFinder
22
  from user_utils import Hash
23
 
@@ -143,6 +143,12 @@ def get_matches(request: Request):
143
  @app.post('/find-my-match/', response_class=HTMLResponse)
144
  async def post_matches(request: Request, bt: BackgroundTasks, resume: UploadFile = File(...)):
145
 
 
 
 
 
 
 
146
  username = localStorage.getItem('username')
147
 
148
  def add_data_to_db(resume):
@@ -164,13 +170,17 @@ async def post_matches(request: Request, bt: BackgroundTasks, resume: UploadFile
164
  bt.add_task(add_data_to_db, resume)
165
  bt.add_task(get_jobs_from_db, resume)
166
 
167
- return templates.TemplateResponse('find_my_match.html', context={'request': request, 'resume': resume, 'skills': skills, 'simResults': simResults[0], 'links': links})
168
 
169
  @app.get("/find-match/", response_class=HTMLResponse)
170
  def find_match(request: Request):
171
- jobselection = str(request.url).split("=")[1].replace('HTTP/1.1', '').replace("-", " ").replace("%2C", "")
172
- print(jobselection)
173
- return templates.TemplateResponse('find_match.html', context={'request': request, 'jobselection': jobselection})
 
 
 
 
174
 
175
  @app.get("/find-my-hire/", response_class=HTMLResponse)
176
  def get_hires(request: Request):
 
17
  from localStoragePy import localStoragePy
18
  localStorage = localStoragePy('pathfinder', 'text')
19
 
20
+ from scrape_onet import get_onet_code, get_onet_description, get_onet_tasks, get_job_postings
21
  from match_utils import neighborhoods, get_resume, skillNER, sim_result_loop, get_links, coSkillEmbed, sim_result_loop_jobFinder, sim_result_loop_candFinder
22
  from user_utils import Hash
23
 
 
143
  @app.post('/find-my-match/', response_class=HTMLResponse)
144
  async def post_matches(request: Request, bt: BackgroundTasks, resume: UploadFile = File(...)):
145
 
146
+ statelist = [ 'AK', 'AL', 'AR', 'AZ', 'CA', 'CO', 'CT', 'DC', 'DE', 'FL', 'GA',
147
+ 'HI', 'IA', 'ID', 'IL', 'IN', 'KS', 'KY', 'LA', 'MA', 'MD', 'ME',
148
+ 'MI', 'MN', 'MO', 'MS', 'MT', 'NC', 'ND', 'NE', 'NH', 'NJ', 'NM',
149
+ 'NV', 'NY', 'OH', 'OK', 'OR', 'PA', 'RI', 'SC', 'SD', 'TN', 'TX',
150
+ 'UT', 'VA', 'VT', 'WA', 'WI', 'WV', 'WY']
151
+
152
  username = localStorage.getItem('username')
153
 
154
  def add_data_to_db(resume):
 
170
  bt.add_task(add_data_to_db, resume)
171
  bt.add_task(get_jobs_from_db, resume)
172
 
173
+ return templates.TemplateResponse('find_my_match.html', context={'request': request, 'resume': resume, 'skills': skills, 'simResults': simResults[0], 'links': links, 'statelist': statelist})
174
 
175
  @app.get("/find-match/", response_class=HTMLResponse)
176
  def find_match(request: Request):
177
+ jobtitle = str(request.url).split("=")[1].replace('HTTP/1.1', '').replace("-", " ").replace("%2C", "").replace('&state', '')
178
+ state = str(request.url).split("=")[2]
179
+ onetCode = get_onet_code(jobtitle)
180
+ postings = get_job_postings(onetCode, state)
181
+ jobpostings = postings[0]
182
+ linklist = postings[1]
183
+ return templates.TemplateResponse('find_match.html', context={'request': request, 'jobpostings': jobpostings, 'linklist': linklist, 'jobtitle': jobtitle, 'state': state})
184
 
185
  @app.get("/find-my-hire/", response_class=HTMLResponse)
186
  def get_hires(request: Request):
scrape_onet.py CHANGED
@@ -2,6 +2,7 @@ import requests
2
  from bs4 import BeautifulSoup
3
  from cleantext import clean
4
  import pandas as pd
 
5
 
6
  onet = pd.read_csv('static/ONET_JobTitles.csv')
7
  headers = {'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/14.1.2 Safari/605.1.15'}
@@ -33,4 +34,23 @@ def get_onet_tasks(onetCode):
33
  tasks = remove_new_line(tasks).replace("related occupations", " ").replace("core", " - ").replace(" )importance category task", "").replace(" find ", "")
34
  tasks = tasks.split(". ")
35
  tasks = [''.join(map(lambda c: '' if c in '0123456789-' else c, task)) for task in tasks]
36
- return tasks
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2
  from bs4 import BeautifulSoup
3
  from cleantext import clean
4
  import pandas as pd
5
+ import numpy as np
6
 
7
  onet = pd.read_csv('static/ONET_JobTitles.csv')
8
  headers = {'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/14.1.2 Safari/605.1.15'}
 
34
  tasks = remove_new_line(tasks).replace("related occupations", " ").replace("core", " - ").replace(" )importance category task", "").replace(" find ", "")
35
  tasks = tasks.split(". ")
36
  tasks = [''.join(map(lambda c: '' if c in '0123456789-' else c, task)) for task in tasks]
37
+ return tasks
38
+
39
+ def get_job_postings(onetCode, state):
40
+ headers = {'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/14.1.2 Safari/605.1.15'}
41
+ url = "https://www.onetonline.org/link/localjobs/" + onetCode + "?st=" + state
42
+ response = requests.get(url, headers=headers)
43
+ soup = BeautifulSoup(response.text, 'html.parser')
44
+ jobs = str(soup.get_text("tbody")).split('PostedtbodyTitle and CompanytbodyLocation')[1].split('Sources:')[0].split("tbody")
45
+ jobs = jobs[5:45]
46
+ starts = np.linspace(start=0, stop=len(jobs)-4,num= 10)
47
+ stops = np.linspace(start=3, stop=len(jobs)-1, num= 10)
48
+ jobpostings = []
49
+ for i in range(0,10):
50
+ jobpostings.append(str([' '.join(jobs[int(starts[i]):int(stops[i])])]).replace("['", '').replace("']", ''))
51
+ links = str(soup.find_all('a', href=True)).split("</small>")[1].split(', <a href="https://www.careeronestop.org/"')[0].split(' data-bs-toggle="modal" ')
52
+ linklist = []
53
+ for i in range(1, len(links)):
54
+ links[i] = "https://www.onetonline.org" + str(links[i]).split(' role="button">')[0].replace("href=", "")
55
+ linklist.append(links[i].replace('"', ''))
56
+ return jobpostings, linklist
templates/find_match.html CHANGED
@@ -23,14 +23,25 @@
23
  </header>
24
  <main class="main">
25
  <h1 class="pagetitle">Matching Jobs</h1>
 
 
 
 
 
 
 
 
26
  <h2 class="pagesubtitle">We're sorry! This page is currently under construction.</h2>
27
  <h2 class="pagesubtitle">Please check back soon to get {{ jobselection }} jobs that are a great match for your skillset and interests!</h2>
 
28
  <br>
29
- <br> </main>
 
30
  <footer class="footer">
31
  <ul class="footer__text">
32
  <li class="footer__text-item">© 2023 Pathfinder</li>
33
- </ul>
 
34
  </footer>
35
  </body>
36
  </html>
 
23
  </header>
24
  <main class="main">
25
  <h1 class="pagetitle">Matching Jobs</h1>
26
+ {% if jobpostings %}
27
+ <h2 class="pagesubtitle">Here are the top 10 {{ jobtitle }} jobs in {{ state }} that are a great match for your skillset and interests!</h2>
28
+ <ul class="sectionlist">
29
+ {% for n in range(10) %}
30
+ <li class="sectionlist__item"><a href={{ linklist[n] }}>{{ jobpostings[n] }}</a></li>
31
+ {% endfor %}
32
+ </ul>
33
+ {% else %}
34
  <h2 class="pagesubtitle">We're sorry! This page is currently under construction.</h2>
35
  <h2 class="pagesubtitle">Please check back soon to get {{ jobselection }} jobs that are a great match for your skillset and interests!</h2>
36
+ {% endif %}
37
  <br>
38
+ <br>
39
+ </main>
40
  <footer class="footer">
41
  <ul class="footer__text">
42
  <li class="footer__text-item">© 2023 Pathfinder</li>
43
+ <li class="footer__text-item">Job postings courtesy of <a class="footer__text-link" href="https://www.onetonline.org">onetonline.org</li>
44
+ </ul>
45
  </footer>
46
  </body>
47
  </html>
templates/find_my_match.html CHANGED
@@ -46,7 +46,7 @@
46
  <article class="output__section">
47
  <h2 class="output__subtitle">Job Matches</h3>
48
  <p class="alert">(Note: You can click on the links to find out more.)</p>
49
- <p class="alert">Instructions: When you have decided which job title is most closely aligned with your skills and interests, choose the corresponding radio button and click "Submit."</p>
50
  <form action="/find-match/" class="selection__form" method="GET">
51
  <table>
52
  <thead class="output__list">
@@ -71,9 +71,15 @@
71
  </table>
72
  <br>
73
  <br>
74
- <div class="radio__submit">
75
- <button type="submit" class="radio__submit">Submit</button>
76
- </div>
 
 
 
 
 
 
77
  </form>
78
  </article>
79
  {% endif %}
 
46
  <article class="output__section">
47
  <h2 class="output__subtitle">Job Matches</h3>
48
  <p class="alert">(Note: You can click on the links to find out more.)</p>
49
+ <p class="alert">Instructions: Choose the radio button corresponding to the job title that is most closely aligned with your skills and interests"</p>
50
  <form action="/find-match/" class="selection__form" method="GET">
51
  <table>
52
  <thead class="output__list">
 
71
  </table>
72
  <br>
73
  <br>
74
+ <label for="state" class="form__label">Select the state in which you would like to work:</label>
75
+ <select name="state" class="form__dropdown" id="state">
76
+ {% for state in statelist %}
77
+ <option value="{{ state }}">{{ state }}</option>
78
+ {% endfor %}
79
+ </select>
80
+ <br>
81
+ <br>
82
+ <button type="submit" class="radio__submit">Submit</button>
83
  </form>
84
  </article>
85
  {% endif %}
templates/job_list.html CHANGED
@@ -47,11 +47,11 @@
47
  {% if tasks %}
48
  <section>
49
  <h1 class="sectiontitle">Work Tasks:</h1>
50
- <ui class="sectionlist"></ui>
51
  {% for task in tasks %}
52
  <li class="sectionlist__item">{{ task }}</li>
53
  {% endfor %}
54
- </ui>
55
  </section>
56
  {% endif %}
57
  {% if activities %}
 
47
  {% if tasks %}
48
  <section>
49
  <h1 class="sectiontitle">Work Tasks:</h1>
50
+ <ul class="sectionlist">
51
  {% for task in tasks %}
52
  <li class="sectionlist__item">{{ task }}</li>
53
  {% endfor %}
54
+ </ul>
55
  </section>
56
  {% endif %}
57
  {% if activities %}