celise88 commited on
Commit
8de44db
·
1 Parent(s): 9b3b1bc

performance optimization enhancements

Browse files
main.py CHANGED
@@ -7,13 +7,13 @@
7
  # License: MIT License
8
 
9
  # IMPORTS
10
- from fastapi import FastAPI, Request, Form, File, UploadFile
11
  from fastapi.templating import Jinja2Templates
12
  from fastapi.staticfiles import StaticFiles
13
  from fastapi.responses import HTMLResponse
14
  import pandas as pd
15
  from scrape_onet import get_onet_code, get_onet_description, get_onet_tasks
16
- from match_utils import get_resume, get_simresults, skillNER
17
 
18
  # APP SETUP
19
  app = FastAPI()
@@ -26,18 +26,19 @@ onet = pd.read_csv('static/ONET_JobTitles.csv')
26
  ### JOB INFORMATION CENTER ###
27
  # GET
28
  @app.get("/")
29
- def render_job_list(request: Request):
30
  joblist = onet['JobTitle']
31
  return templates.TemplateResponse('job_list.html', context={'request': request, 'joblist': joblist})
32
 
33
  # POST
34
  @app.post("/")
35
- def render_job_info(request: Request, jobtitle: str = Form(enum=[x for x in onet['JobTitle']])):
36
  joblist = onet['JobTitle']
37
  if jobtitle:
38
  onetCode = get_onet_code(jobtitle)
39
  jobdescription = get_onet_description(onetCode)
40
  tasks = get_onet_tasks(onetCode)
 
41
  return templates.TemplateResponse('job_list.html', context={
42
  'request': request,
43
  'joblist': joblist,
@@ -47,19 +48,20 @@ def render_job_info(request: Request, jobtitle: str = Form(enum=[x for x in onet
47
 
48
  ### JOB NEIGHBORHOODS ###
49
  @app.get("/explore-job-neighborhoods/", response_class=HTMLResponse)
50
- def render_job_neighborhoods(request: Request):
51
  return templates.TemplateResponse('job_neighborhoods.html', context={'request': request})
52
 
53
  ### FIND-MY-MATCH ###
54
  # GET
55
  @app.get("/find-my-match/", response_class=HTMLResponse)
56
- def match_page(request: Request):
57
  return templates.TemplateResponse('find_my_match.html', context={'request': request})
58
 
59
  # POST
60
  @app.post('/find-my-match/', response_class=HTMLResponse)
61
- async def match_page(request: Request, resume: UploadFile = File(...)):
62
  resume = get_resume(resume)
63
- simResults = await get_simresults(resume)
 
64
  skills = await skillNER(resume)
65
  return templates.TemplateResponse('find_my_match.html', context={'request': request, 'resume': resume, 'skills': skills, 'simResults': simResults})
 
7
  # License: MIT License
8
 
9
  # IMPORTS
10
+ from fastapi import FastAPI, Request, Form, File, UploadFile, BackgroundTasks
11
  from fastapi.templating import Jinja2Templates
12
  from fastapi.staticfiles import StaticFiles
13
  from fastapi.responses import HTMLResponse
14
  import pandas as pd
15
  from scrape_onet import get_onet_code, get_onet_description, get_onet_tasks
16
+ from match_utils import neighborhoods, get_resume, coSkillEmbed, sim_result_loop, skillNER
17
 
18
  # APP SETUP
19
  app = FastAPI()
 
26
  ### JOB INFORMATION CENTER ###
27
  # GET
28
  @app.get("/")
29
+ def get_job(request: Request):
30
  joblist = onet['JobTitle']
31
  return templates.TemplateResponse('job_list.html', context={'request': request, 'joblist': joblist})
32
 
33
  # POST
34
  @app.post("/")
35
+ def post_job(request: Request, bt: BackgroundTasks, jobtitle: str = Form(enum=[x for x in onet['JobTitle']])):
36
  joblist = onet['JobTitle']
37
  if jobtitle:
38
  onetCode = get_onet_code(jobtitle)
39
  jobdescription = get_onet_description(onetCode)
40
  tasks = get_onet_tasks(onetCode)
41
+ bt.add_task(neighborhoods, jobtitle)
42
  return templates.TemplateResponse('job_list.html', context={
43
  'request': request,
44
  'joblist': joblist,
 
48
 
49
  ### JOB NEIGHBORHOODS ###
50
  @app.get("/explore-job-neighborhoods/", response_class=HTMLResponse)
51
+ async def get_job_neighborhoods(request: Request):
52
  return templates.TemplateResponse('job_neighborhoods.html', context={'request': request})
53
 
54
  ### FIND-MY-MATCH ###
55
  # GET
56
  @app.get("/find-my-match/", response_class=HTMLResponse)
57
+ def get_matches(request: Request):
58
  return templates.TemplateResponse('find_my_match.html', context={'request': request})
59
 
60
  # POST
61
  @app.post('/find-my-match/', response_class=HTMLResponse)
62
+ async def post_matches(request: Request, resume: UploadFile = File(...)):
63
  resume = get_resume(resume)
64
+ embeds = await coSkillEmbed(resume)
65
+ simResults = await sim_result_loop(embeds)
66
  skills = await skillNER(resume)
67
  return templates.TemplateResponse('find_my_match.html', context={'request': request, 'resume': resume, 'skills': skills, 'simResults': simResults})
match_utils.py CHANGED
@@ -12,7 +12,7 @@ import numpy as np
12
  from numpy.linalg import norm
13
  import ssl
14
  from dotenv import load_dotenv
15
- import concurrent.futures
16
 
17
  # SSL CERTIFICATE FIX
18
  try:
@@ -31,6 +31,7 @@ load_dotenv()
31
 
32
  # LOAD COHERE EMBEDDINGS:
33
  simdat = pd.read_csv('static/cohere_embeddings.csv')
 
34
 
35
  # LOAD FINE-TUNED MODEL
36
  # (see https://huggingface.co/celise88/distilbert-base-uncased-finetuned-binary-classifier)
@@ -39,6 +40,18 @@ tokenizer = AutoTokenizer.from_pretrained('static/tokenizer_shards', low_cpu_mem
39
  classifier = pipeline('text-classification', model = model, tokenizer = tokenizer)
40
 
41
  # UTILITY FUNCTIONS
 
 
 
 
 
 
 
 
 
 
 
 
42
  def get_resume(resume):
43
  path = f"static/{resume.filename}"
44
  with open(path, 'wb') as buffer:
@@ -50,7 +63,7 @@ def get_resume(resume):
50
  resume = "\n".join(text)
51
  return resume
52
 
53
- def coSkillEmbed(text):
54
  try:
55
  co = cohere.Client(os.getenv("COHERE_TOKEN"))
56
  response = co.embed(
@@ -60,10 +73,9 @@ def coSkillEmbed(text):
60
  except CohereError as e:
61
  return e
62
 
63
- async def get_simresults(resume):
64
  def cosine(A, B):
65
  return np.dot(A,B)/(norm(A)*norm(B))
66
- embeds = coSkillEmbed(resume)
67
  simResults = []
68
  for i in range(len(simdat)):
69
  simResults.append(cosine(np.array(embeds), np.array(simdat.iloc[i,1:])))
@@ -103,5 +115,5 @@ async def skillNER(resume):
103
  labels.append("Skill")
104
  else:
105
  labels.append("Not Skill")
106
- labels_dict = dict(zip(resume, labels))
107
- return labels_dict
 
12
  from numpy.linalg import norm
13
  import ssl
14
  from dotenv import load_dotenv
15
+ import plotly_express as px
16
 
17
  # SSL CERTIFICATE FIX
18
  try:
 
31
 
32
  # LOAD COHERE EMBEDDINGS:
33
  simdat = pd.read_csv('static/cohere_embeddings.csv')
34
+ coheredat = pd.read_csv('static/cohere_tSNE_dat.csv')
35
 
36
  # LOAD FINE-TUNED MODEL
37
  # (see https://huggingface.co/celise88/distilbert-base-uncased-finetuned-binary-classifier)
 
40
  classifier = pipeline('text-classification', model = model, tokenizer = tokenizer)
41
 
42
  # UTILITY FUNCTIONS
43
+ async def neighborhoods(jobtitle=None):
44
+ def format_title(logo, title, subtitle, title_font_size = 28, subtitle_font_size=14):
45
+ logo = f'<a href="/" target="_self">{logo}</a>'
46
+ subtitle = f'<span style="font-size: {subtitle_font_size}px;">{subtitle}</span>'
47
+ title = f'<span style="font-size: {title_font_size}px;">{title}</span>'
48
+ return f'{logo}{title}<br>{subtitle}'
49
+
50
+ fig = px.scatter(coheredat, x = 'longitude', y = 'latitude', color = 'Category', hover_data = ['Category', 'Title'],
51
+ title=format_title("Pathfinder", " Job Neighborhoods: Explore the Map!", "(Generated using Co-here AI's LLM & ONET's Task Statements)"))
52
+ fig['layout'].update(height=1000, width=1500, font=dict(family='Courier New, monospace', color='black'))
53
+ fig.write_html('templates/job_neighborhoods.html')
54
+
55
  def get_resume(resume):
56
  path = f"static/{resume.filename}"
57
  with open(path, 'wb') as buffer:
 
63
  resume = "\n".join(text)
64
  return resume
65
 
66
+ async def coSkillEmbed(text):
67
  try:
68
  co = cohere.Client(os.getenv("COHERE_TOKEN"))
69
  response = co.embed(
 
73
  except CohereError as e:
74
  return e
75
 
76
+ async def sim_result_loop(embeds):
77
  def cosine(A, B):
78
  return np.dot(A,B)/(norm(A)*norm(B))
 
79
  simResults = []
80
  for i in range(len(simdat)):
81
  simResults.append(cosine(np.array(embeds), np.array(simdat.iloc[i,1:])))
 
115
  labels.append("Skill")
116
  else:
117
  labels.append("Not Skill")
118
+ skills = dict(zip(resume, labels))
119
+ return skills
static/cohere_tSNE_dat.csv ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ac3dbbea21867638654b3c399b988ca95c5573cc602383d8835cffe36952a7cb
3
+ size 1858107
templates/job_neighborhoods.html CHANGED
The diff for this file is too large to render. See raw diff