Spaces:

Souha-BH
/

ResumeJobMatching

Runtime error

Souha Ben Hassine

try again

497f6e8 9 months ago

4.4 kB

	import gradio as gr
	import pandas as pd
	import spacy
	from spacy.pipeline import EntityRuler
	from spacy.lang.en import English
	from spacy.tokens import Doc
	from spacy import displacy
	import plotly.express as px
	import numpy as np
	import re
	import nltk
	from nltk.corpus import stopwords
	from nltk.stem import WordNetLemmatizer

	nltk.download(['stopwords','wordnet'])
	nltk.download('omw-1.4')
	# Load the CSV file into a DataFrame
	dataset_path = "Resume.csv"
	df = pd.read_csv(dataset_path)
	df= df.reindex(np.random.permutation(df.index))
	data = df.copy().iloc[0:200,]

	# Load the spaCy English language model with large vocabulary and pre-trained word vectors
	nlp = spacy.load("en_core_web_lg")

	# Path to the file containing skill patterns in JSONL format
	skill_pattern_path = "jz_skill_patterns.jsonl"

	# Add an entity ruler to the spaCy pipeline
	ruler = nlp.add_pipe("entity_ruler")

	# Load skill patterns from disk into the entity ruler
	ruler.from_disk(skill_pattern_path)

	def get_unique_skills(text):
	doc = nlp(text)
	skills = set()
	for ent in doc.ents:
	if ent.label_ == "SKILL":
	skills.add(ent.text)
	return list(skills)

	def preprocess_resume(resume_str):
	# Remove special characters, URLs, and Twitter mentions
	review = re.sub(r'(@[A-Za-z0-9]+)\|([^0-9A-Za-z \t])\|(\w+:\/\/\S+)\|^rt\|http.+?"', " ", resume_str)

	# Convert to lowercase and tokenize
	review = review.lower().split()

	# Lemmatize and remove stopwords
	lm = WordNetLemmatizer()
	review = [lm.lemmatize(word) for word in review if word not in set(stopwords.words("english"))]

	# Join the words back into a string
	review = " ".join(review)
	return review

	# Apply the preprocess_resume function to each resume string and store the result in a new column
	data["Clean_Resume"] = data["Resume_str"].apply(preprocess_resume)

	# Extract skills from each preprocessed resume and store them in a new column
	data["skills"] = data["Clean_Resume"].str.lower().apply(get_unique_skills)

	print(data)

	Job_cat = data["Category"].unique()
	Job_cat = np.append(Job_cat, "ALL")
	Job_Category = "INFORMATION-TECHNOLOGY"

	def get_skills_distribution(Job_Category):
	if Job_Category != "ALL":
	filtered_data = data[data["Category"] == Job_Category]["skills"]
	else:
	filtered_data = data["skills"]

	total_skills = [skill for sublist in filtered_data for skill in sublist]

	fig = px.histogram(
	x=total_skills,
	labels={"x": "Skills"},
	title=f"{Job_Category} Distribution of Skills",
	).update_xaxes(categoryorder="total descending")

	return fig.show()

	get_skills_distribution(Job_Category)

	# Apply the preprocess_resume function to each resume string and store the result in a new column
	data["Clean_Resume"] = data["Resume_str"].apply(preprocess_resume)

	# Extract skills from each preprocessed resume and store them in a new column
	data["skills"] = data["Clean_Resume"].str.lower().apply(get_unique_skills)

	patterns = data.Category.unique()
	for a in patterns:
	ruler.add_patterns([{"label": "Job-Category", "pattern": a}])


	# Load the spaCy model
	nlp = spacy.load("en_core_web_sm")

	# Define the styles and options for highlighting entities
	colors = {
	"Job-Category": "linear-gradient(90deg, #aa9cfc, #fc9ce7)",
	"SKILL": "linear-gradient(90deg, #9BE15D, #00E3AE)",
	"ORG": "#ffd966",
	"PERSON": "#e06666",
	"GPE": "#9fc5e8",
	"DATE": "#c27ba0",
	"ORDINAL": "#674ea7",
	"PRODUCT": "#f9cb9c",
	}
	options = {
	"ents": [
	"Job-Category",
	"SKILL",
	"ORG",
	"PERSON",
	"GPE",
	"DATE",
	"ORDINAL",
	"PRODUCT",
	],
	"colors": colors,
	}

	# Define a function to process the resume text and highlight entities
	def highlight_entities(resume_text):
	# Process the resume text with spaCy
	doc = nlp(resume_text)
	# Render the entities with displacy and return the HTML
	html = displacy.render(doc, style="ent", options=options, jupyter=False)
	return html

	# Create the Gradio interface
	iface = gr.Interface(
	fn=highlight_entities,
	inputs=gr.Textbox(lines=10, label="Input Resume Text"),
	outputs=gr.HTML(label="Highlighted Entities"),
	title="Resume Entity Highlighter",
	description="Enter your resume text and see entities highlighted.",
	theme="compact"
	)

	# Launch the interface
	iface.launch()