-
Notifications
You must be signed in to change notification settings - Fork 2
/
Copy pathsummary.py
64 lines (38 loc) · 1.62 KB
/
summary.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
import spacy
from spacy.lang.en.stop_words import STOP_WORDS
from string import punctuation
from heapq import nlargest
import streamlit as st
def para_summary(text, text_len):
sum_len = text_len/100
stopwords = list(STOP_WORDS)
nlp = spacy.load('en_core_web_sm')
doc = nlp(text)
tokens = [token.text for token in doc]
word_frequencies = {}
for word in doc:
if word.text.lower() not in stopwords:
if word.text.lower() not in punctuation:
if word.text not in word_frequencies.keys():
word_frequencies[word.text] = 1
else:
word_frequencies[word.text] += 1
max_frequency = max(word_frequencies.values(), default = 1)
for word in word_frequencies.keys():
word_frequencies[word] = word_frequencies[word]/max_frequency
sentence_tokens = [sent for sent in doc.sents]
sentence_score = {}
for sent in sentence_tokens:
for word in sent:
if word.text.lower() in word_frequencies.keys():
if sent not in sentence_score.keys():
sentence_score[sent] = word_frequencies[word.text.lower()]
else:
sentence_score[sent] += word_frequencies[word.text.lower()]
select_length = int(len(sentence_tokens)*sum_len)
summary = nlargest(select_length, sentence_score, key=sentence_score.get)
final_summary = [word.text for word in summary]
counter = 1
for i in final_summary :
st.markdown(str(counter)+ ". " + i)
counter += 1