File size: 2,252 Bytes
e062e72
 
 
 
 
8c09499
e062e72
c916824
 
8c09499
 
 
 
 
e062e72
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
import gradio as gr
from modules.parse_pdf import process_pdf
from modules.classify import classify_text_multi  # Importing BERT model classification
from modules.RandomForest import classify_text_rf,classify_text_rf_multi #Importing single and multi-label classification
from modules.SVM import classify_text_svm,classify_text_svm_multi #Importing single and multi-label classification 
import nltk

nltk.download('wordnet')

# Check if the stopwords resource is available; if not, download it
try:
    nltk.data.find('corpora/stopwords')
except LookupError:
    nltk.download('stopwords')
# Function to process and classify PDF using both BERT and Random Forest models
def process_and_classify_pdf(file):
    # Step 1: Process the PDF to extract and clean the text
    parsed_text = process_pdf(file)
    
    # Step 2: Classify using the existing BERT model
    classification_bert = classify_text_multi(parsed_text)  # Assuming this is multi-label BERT model
    
    # Step 3: Classify using Random Forest single-label and multi-label
    classification_rf_single = classify_text_rf(parsed_text)
    classification_rf_multi = classify_text_rf_multi(parsed_text)
    classification_svm_single=classify_text_svm(parsed_text)
    classification_svm_multi=classify_text_svm_multi(parsed_text)
    
    # Combine the results
    combined_result = (
        f"BERT Classification: {', '.join(classification_bert)}\n"
        f"Random Forest (Single-label): {classification_rf_single}\n"
        f"Random Forest (Multi-label): {', '.join(classification_rf_multi)}\n"
        f"SVM (Single-label):{classification_svm_single}\n"
        f"SVM (multi-label):{', '.join(classification_svm_multi)}"
    )
    
    # Step 4: Return parsed text and combined classification results
    return parsed_text, combined_result

# Define Gradio interface
input_file = gr.File(label="Upload PDF")
output_text = gr.Textbox(label="Parsed Text")
output_class = gr.Textbox(label="Job Title Predictions")

# Launch Gradio interface
gr.Interface(
    fn=process_and_classify_pdf,
    inputs=input_file,
    outputs=[output_text, output_class],
    title="Resume Classification and Parsing for Intelligent Applicant Screening",
    theme=gr.themes.Soft()
).launch(share=True)