-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathkeyword_search.py
57 lines (42 loc) · 1.54 KB
/
keyword_search.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
from __future__import print_functon
from Bio import Entrez
from Bio import Medline
from pandas import *
import urllib2, json
from itertools import chain
import re
Entrez.email = "[email protected]"
bioconcept = "Gene,Mutation,Disease" #just replace with whatever associations you are making
format = 'PubTator'
max_res = 500
accep_pub_types = ["Journal Article", "Clinical Trial"] #add more if needed
handle = Entrez.esearch(db="pubmed", term="", reldate=7, rettype ="medline", retmode="text", retmax = max_res)
record = Entrez.read(handle)
handle.close()
idlist = record["IdList"]
handle = Entrez.efetch(db="pubmed", id=idlist, rettype="medline",retmode="text")
records = Medline.parse(handle)
records = list(records)
pattern = re.compile(r"Disease\tD\w\w\w\w\w\w")
hallmark_queries = ['proliferation receptor',
'growth factor',
'cell cycle',
'contact inhibition',
'apoptosis',
'necrosis',
'autophagy',
'senescence',
'immortalization',
'angiogenesis',
'angiogenic factor',
'metastasis',
'mutation',
'DNA repair',
'adducts',
'DNA damage',
'inflammation',
'oxidative stress',
'warburg effect',
'growth',
'activation',
'immune system']