Spaces:
Runtime error
Runtime error
performance optimization enhancements
Browse files- main.py +10 -8
- match_utils.py +18 -6
- static/cohere_tSNE_dat.csv +3 -0
- templates/job_neighborhoods.html +0 -0
main.py
CHANGED
@@ -7,13 +7,13 @@
|
|
7 |
# License: MIT License
|
8 |
|
9 |
# IMPORTS
|
10 |
-
from fastapi import FastAPI, Request, Form, File, UploadFile
|
11 |
from fastapi.templating import Jinja2Templates
|
12 |
from fastapi.staticfiles import StaticFiles
|
13 |
from fastapi.responses import HTMLResponse
|
14 |
import pandas as pd
|
15 |
from scrape_onet import get_onet_code, get_onet_description, get_onet_tasks
|
16 |
-
from match_utils import get_resume,
|
17 |
|
18 |
# APP SETUP
|
19 |
app = FastAPI()
|
@@ -26,18 +26,19 @@ onet = pd.read_csv('static/ONET_JobTitles.csv')
|
|
26 |
### JOB INFORMATION CENTER ###
|
27 |
# GET
|
28 |
@app.get("/")
|
29 |
-
def
|
30 |
joblist = onet['JobTitle']
|
31 |
return templates.TemplateResponse('job_list.html', context={'request': request, 'joblist': joblist})
|
32 |
|
33 |
# POST
|
34 |
@app.post("/")
|
35 |
-
def
|
36 |
joblist = onet['JobTitle']
|
37 |
if jobtitle:
|
38 |
onetCode = get_onet_code(jobtitle)
|
39 |
jobdescription = get_onet_description(onetCode)
|
40 |
tasks = get_onet_tasks(onetCode)
|
|
|
41 |
return templates.TemplateResponse('job_list.html', context={
|
42 |
'request': request,
|
43 |
'joblist': joblist,
|
@@ -47,19 +48,20 @@ def render_job_info(request: Request, jobtitle: str = Form(enum=[x for x in onet
|
|
47 |
|
48 |
### JOB NEIGHBORHOODS ###
|
49 |
@app.get("/explore-job-neighborhoods/", response_class=HTMLResponse)
|
50 |
-
def
|
51 |
return templates.TemplateResponse('job_neighborhoods.html', context={'request': request})
|
52 |
|
53 |
### FIND-MY-MATCH ###
|
54 |
# GET
|
55 |
@app.get("/find-my-match/", response_class=HTMLResponse)
|
56 |
-
def
|
57 |
return templates.TemplateResponse('find_my_match.html', context={'request': request})
|
58 |
|
59 |
# POST
|
60 |
@app.post('/find-my-match/', response_class=HTMLResponse)
|
61 |
-
async def
|
62 |
resume = get_resume(resume)
|
63 |
-
|
|
|
64 |
skills = await skillNER(resume)
|
65 |
return templates.TemplateResponse('find_my_match.html', context={'request': request, 'resume': resume, 'skills': skills, 'simResults': simResults})
|
|
|
7 |
# License: MIT License
|
8 |
|
9 |
# IMPORTS
|
10 |
+
from fastapi import FastAPI, Request, Form, File, UploadFile, BackgroundTasks
|
11 |
from fastapi.templating import Jinja2Templates
|
12 |
from fastapi.staticfiles import StaticFiles
|
13 |
from fastapi.responses import HTMLResponse
|
14 |
import pandas as pd
|
15 |
from scrape_onet import get_onet_code, get_onet_description, get_onet_tasks
|
16 |
+
from match_utils import neighborhoods, get_resume, coSkillEmbed, sim_result_loop, skillNER
|
17 |
|
18 |
# APP SETUP
|
19 |
app = FastAPI()
|
|
|
26 |
### JOB INFORMATION CENTER ###
|
27 |
# GET
|
28 |
@app.get("/")
|
29 |
+
def get_job(request: Request):
|
30 |
joblist = onet['JobTitle']
|
31 |
return templates.TemplateResponse('job_list.html', context={'request': request, 'joblist': joblist})
|
32 |
|
33 |
# POST
|
34 |
@app.post("/")
|
35 |
+
def post_job(request: Request, bt: BackgroundTasks, jobtitle: str = Form(enum=[x for x in onet['JobTitle']])):
|
36 |
joblist = onet['JobTitle']
|
37 |
if jobtitle:
|
38 |
onetCode = get_onet_code(jobtitle)
|
39 |
jobdescription = get_onet_description(onetCode)
|
40 |
tasks = get_onet_tasks(onetCode)
|
41 |
+
bt.add_task(neighborhoods, jobtitle)
|
42 |
return templates.TemplateResponse('job_list.html', context={
|
43 |
'request': request,
|
44 |
'joblist': joblist,
|
|
|
48 |
|
49 |
### JOB NEIGHBORHOODS ###
|
50 |
@app.get("/explore-job-neighborhoods/", response_class=HTMLResponse)
|
51 |
+
async def get_job_neighborhoods(request: Request):
|
52 |
return templates.TemplateResponse('job_neighborhoods.html', context={'request': request})
|
53 |
|
54 |
### FIND-MY-MATCH ###
|
55 |
# GET
|
56 |
@app.get("/find-my-match/", response_class=HTMLResponse)
|
57 |
+
def get_matches(request: Request):
|
58 |
return templates.TemplateResponse('find_my_match.html', context={'request': request})
|
59 |
|
60 |
# POST
|
61 |
@app.post('/find-my-match/', response_class=HTMLResponse)
|
62 |
+
async def post_matches(request: Request, resume: UploadFile = File(...)):
|
63 |
resume = get_resume(resume)
|
64 |
+
embeds = await coSkillEmbed(resume)
|
65 |
+
simResults = await sim_result_loop(embeds)
|
66 |
skills = await skillNER(resume)
|
67 |
return templates.TemplateResponse('find_my_match.html', context={'request': request, 'resume': resume, 'skills': skills, 'simResults': simResults})
|
match_utils.py
CHANGED
@@ -12,7 +12,7 @@ import numpy as np
|
|
12 |
from numpy.linalg import norm
|
13 |
import ssl
|
14 |
from dotenv import load_dotenv
|
15 |
-
import
|
16 |
|
17 |
# SSL CERTIFICATE FIX
|
18 |
try:
|
@@ -31,6 +31,7 @@ load_dotenv()
|
|
31 |
|
32 |
# LOAD COHERE EMBEDDINGS:
|
33 |
simdat = pd.read_csv('static/cohere_embeddings.csv')
|
|
|
34 |
|
35 |
# LOAD FINE-TUNED MODEL
|
36 |
# (see https://huggingface.co/celise88/distilbert-base-uncased-finetuned-binary-classifier)
|
@@ -39,6 +40,18 @@ tokenizer = AutoTokenizer.from_pretrained('static/tokenizer_shards', low_cpu_mem
|
|
39 |
classifier = pipeline('text-classification', model = model, tokenizer = tokenizer)
|
40 |
|
41 |
# UTILITY FUNCTIONS
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
42 |
def get_resume(resume):
|
43 |
path = f"static/{resume.filename}"
|
44 |
with open(path, 'wb') as buffer:
|
@@ -50,7 +63,7 @@ def get_resume(resume):
|
|
50 |
resume = "\n".join(text)
|
51 |
return resume
|
52 |
|
53 |
-
def coSkillEmbed(text):
|
54 |
try:
|
55 |
co = cohere.Client(os.getenv("COHERE_TOKEN"))
|
56 |
response = co.embed(
|
@@ -60,10 +73,9 @@ def coSkillEmbed(text):
|
|
60 |
except CohereError as e:
|
61 |
return e
|
62 |
|
63 |
-
async def
|
64 |
def cosine(A, B):
|
65 |
return np.dot(A,B)/(norm(A)*norm(B))
|
66 |
-
embeds = coSkillEmbed(resume)
|
67 |
simResults = []
|
68 |
for i in range(len(simdat)):
|
69 |
simResults.append(cosine(np.array(embeds), np.array(simdat.iloc[i,1:])))
|
@@ -103,5 +115,5 @@ async def skillNER(resume):
|
|
103 |
labels.append("Skill")
|
104 |
else:
|
105 |
labels.append("Not Skill")
|
106 |
-
|
107 |
-
return
|
|
|
12 |
from numpy.linalg import norm
|
13 |
import ssl
|
14 |
from dotenv import load_dotenv
|
15 |
+
import plotly_express as px
|
16 |
|
17 |
# SSL CERTIFICATE FIX
|
18 |
try:
|
|
|
31 |
|
32 |
# LOAD COHERE EMBEDDINGS:
|
33 |
simdat = pd.read_csv('static/cohere_embeddings.csv')
|
34 |
+
coheredat = pd.read_csv('static/cohere_tSNE_dat.csv')
|
35 |
|
36 |
# LOAD FINE-TUNED MODEL
|
37 |
# (see https://huggingface.co/celise88/distilbert-base-uncased-finetuned-binary-classifier)
|
|
|
40 |
classifier = pipeline('text-classification', model = model, tokenizer = tokenizer)
|
41 |
|
42 |
# UTILITY FUNCTIONS
|
43 |
+
async def neighborhoods(jobtitle=None):
|
44 |
+
def format_title(logo, title, subtitle, title_font_size = 28, subtitle_font_size=14):
|
45 |
+
logo = f'<a href="/" target="_self">{logo}</a>'
|
46 |
+
subtitle = f'<span style="font-size: {subtitle_font_size}px;">{subtitle}</span>'
|
47 |
+
title = f'<span style="font-size: {title_font_size}px;">{title}</span>'
|
48 |
+
return f'{logo}{title}<br>{subtitle}'
|
49 |
+
|
50 |
+
fig = px.scatter(coheredat, x = 'longitude', y = 'latitude', color = 'Category', hover_data = ['Category', 'Title'],
|
51 |
+
title=format_title("Pathfinder", " Job Neighborhoods: Explore the Map!", "(Generated using Co-here AI's LLM & ONET's Task Statements)"))
|
52 |
+
fig['layout'].update(height=1000, width=1500, font=dict(family='Courier New, monospace', color='black'))
|
53 |
+
fig.write_html('templates/job_neighborhoods.html')
|
54 |
+
|
55 |
def get_resume(resume):
|
56 |
path = f"static/{resume.filename}"
|
57 |
with open(path, 'wb') as buffer:
|
|
|
63 |
resume = "\n".join(text)
|
64 |
return resume
|
65 |
|
66 |
+
async def coSkillEmbed(text):
|
67 |
try:
|
68 |
co = cohere.Client(os.getenv("COHERE_TOKEN"))
|
69 |
response = co.embed(
|
|
|
73 |
except CohereError as e:
|
74 |
return e
|
75 |
|
76 |
+
async def sim_result_loop(embeds):
|
77 |
def cosine(A, B):
|
78 |
return np.dot(A,B)/(norm(A)*norm(B))
|
|
|
79 |
simResults = []
|
80 |
for i in range(len(simdat)):
|
81 |
simResults.append(cosine(np.array(embeds), np.array(simdat.iloc[i,1:])))
|
|
|
115 |
labels.append("Skill")
|
116 |
else:
|
117 |
labels.append("Not Skill")
|
118 |
+
skills = dict(zip(resume, labels))
|
119 |
+
return skills
|
static/cohere_tSNE_dat.csv
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:ac3dbbea21867638654b3c399b988ca95c5573cc602383d8835cffe36952a7cb
|
3 |
+
size 1858107
|
templates/job_neighborhoods.html
CHANGED
The diff for this file is too large to render.
See raw diff
|
|