-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathinterviewbot.py
112 lines (80 loc) · 2.48 KB
/
interviewbot.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
import numpy as np
import pandas as pd
import urllib.request
from bs4 import BeautifulSoup
from googlesearch import search
import os
from urllib.request import Request, urlopen
# package = sputnik.install('spacy', spacy.about.__version__, spacy.about.__download_url__)
# nlp = spacy.load('en_core_web_sm')
import en_core_web_sm
nlp = en_core_web_sm.load()
# Read the Qs.Bank Dump
my_path = os.path.abspath(os.path.dirname(__file__))
path = os.path.join(my_path, "java_sample-qs.xlsx")
df = pd.read_excel(path)
def random_qs_index():
qs_idx = np.random.randint(0, (len(df) - 1))
return qs_idx
def random_question(qs_idx):
qs = df['Question'][qs_idx]
return qs
def answer_tokens(ans):
ans_tokens = nlp(ans)
return ans_tokens
def original_ans_tokens(qs_idx):
orig_ans = df['Answer'][qs_idx]
orig_ans_tokens = nlp(orig_ans)
return orig_ans_tokens
# Check Similarity
def similarity_check(token1, token2):
sim = token1.similarity(token2)
return sim
# Keywords matching - Answer tokens vs keywords to produce a score
def key_matching(qs_idx):
keywords = df['Keywords'][qs_idx]
keywords = nlp(keywords)
return keywords
def key_similarity_check(keywords, ans_tokens):
score = keywords.similarity(ans_tokens)
return score
# Another approach to calculate score
def match_score(keywords, ans_tokens):
matches = 0
for k in keywords.text.split():
if k in list(ans_tokens.text.split()):
print(k, 'is found in answer')
matches += 1
print('matches = ', matches)
return matches
# Alternate score
def alternate_score(keywords, matches):
total = len(keywords)
if matches != 0:
alt_score = total // matches
return alt_score
else:
return 0
def google_links(qs):
# to search
query = qs
links = []
for j in search(query, tld="co.in", num=10, stop=1, pause=2):
links.append(j)
return links
def get_link_text(links):
texts = []
for link in links:
req = Request(link, headers={'User-Agent': 'Mozilla/5.10'})
req.add_header('Accept',
'application/json,text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8')
try:
soup = BeautifulSoup(urllib.request.urlopen(req), "html5lib")
# print(soup.text)
texts.append(soup.text)
except:
continue
return texts
def is_copied(texts, ans):
for text in texts:
return ans in text