-
Notifications
You must be signed in to change notification settings - Fork 145
/
Copy pathdl_based_parser_predict.py
36 lines (24 loc) · 1.14 KB
/
dl_based_parser_predict.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
import sys
import os
def main():
sys.path.append(os.path.join(os.path.dirname(__file__), '..'))
from keras_en_parser_and_analyzer.library.dl_based_parser import ResumeParser
from keras_en_parser_and_analyzer.library.utility.io_utils import read_pdf_and_docx
current_dir = os.path.dirname(__file__)
current_dir = current_dir if current_dir is not '' else '.'
data_dir_path = current_dir + '/data/resume_samples' # directory to scan for any pdf and docx files
def parse_resume(file_path, file_content):
print('parsing file: ', file_path)
parser = ResumeParser()
parser.load_model(current_dir + '/models')
parser.parse(file_content)
print(parser.raw) # print out the raw contents extracted from pdf or docx files
if parser.unknown is False:
print(parser.summary())
print('++++++++++++++++++++++++++++++++++++++++++')
collected = read_pdf_and_docx(data_dir_path, command_logging=True, callback=lambda index, file_path, file_content: {
parse_resume(file_path, file_content)
})
print('count: ', len(collected))
if __name__ == '__main__':
main()