Souha Ben Hassine commited on
Commit
497f6e8
·
1 Parent(s): 54a099a
Files changed (2) hide show
  1. README.md +1 -0
  2. app.py +6 -1
README.md CHANGED
@@ -15,5 +15,6 @@ Check out the configuration reference at https://huggingface.co/docs/hub/spaces-
15
 
16
  ```
17
  pip install -r requirements.txt
 
18
 
19
  ```
 
15
 
16
  ```
17
  pip install -r requirements.txt
18
+ python3 -m spacy download en_core_web_sm
19
 
20
  ```
app.py CHANGED
@@ -11,11 +11,14 @@ import re
11
  import nltk
12
  from nltk.corpus import stopwords
13
  from nltk.stem import WordNetLemmatizer
 
14
  nltk.download(['stopwords','wordnet'])
15
  nltk.download('omw-1.4')
16
  # Load the CSV file into a DataFrame
17
  dataset_path = "Resume.csv"
18
- data = pd.read_csv(dataset_path)
 
 
19
 
20
  # Load the spaCy English language model with large vocabulary and pre-trained word vectors
21
  nlp = spacy.load("en_core_web_lg")
@@ -58,6 +61,8 @@ data["Clean_Resume"] = data["Resume_str"].apply(preprocess_resume)
58
  # Extract skills from each preprocessed resume and store them in a new column
59
  data["skills"] = data["Clean_Resume"].str.lower().apply(get_unique_skills)
60
 
 
 
61
  Job_cat = data["Category"].unique()
62
  Job_cat = np.append(Job_cat, "ALL")
63
  Job_Category = "INFORMATION-TECHNOLOGY"
 
11
  import nltk
12
  from nltk.corpus import stopwords
13
  from nltk.stem import WordNetLemmatizer
14
+
15
  nltk.download(['stopwords','wordnet'])
16
  nltk.download('omw-1.4')
17
  # Load the CSV file into a DataFrame
18
  dataset_path = "Resume.csv"
19
+ df = pd.read_csv(dataset_path)
20
+ df= df.reindex(np.random.permutation(df.index))
21
+ data = df.copy().iloc[0:200,]
22
 
23
  # Load the spaCy English language model with large vocabulary and pre-trained word vectors
24
  nlp = spacy.load("en_core_web_lg")
 
61
  # Extract skills from each preprocessed resume and store them in a new column
62
  data["skills"] = data["Clean_Resume"].str.lower().apply(get_unique_skills)
63
 
64
+ print(data)
65
+
66
  Job_cat = data["Category"].unique()
67
  Job_cat = np.append(Job_cat, "ALL")
68
  Job_Category = "INFORMATION-TECHNOLOGY"