-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathnih-report.py
254 lines (207 loc) · 8.53 KB
/
nih-report.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
#!/bin/env python
"""
Prepare an NIH report
"""
################################################################################
# Metadata for report
# TODO: Move this to a command-line arguments to specify grant id and autodetect reporting period
################################################################################
import datetime
# Start and end date for reporting period
reporting_period_start = datetime.date.fromisoformat('2021-09-01')
reporting_period_end = datetime.date.fromisoformat('2022-12-12')
# Grant ID to report
grant_id = 'NIH R01 GM121505' # kinase grant
# OpenMM grant report
reporting_period_start = datetime.date.fromisoformat('2022-04-01')
reporting_period_end = datetime.date.fromisoformat('2023-03-31')
grant_id = 'NIH R01 GM140090' # openmm grant
# NIH R35 GM152017
reporting_period_start = datetime.date.fromisoformat('2024-03-01')
reporting_period_end = datetime.date.fromisoformat('2025-02-28')
grant_id = 'NIH R35 GM152017' # openmm grant
################################################################################
# Helper functions to act as filters
################################################################################
def funded_by_grant(paper, grant_id):
"""
Determine whether the paper was funded by the specified grant,
and if specified, the rationale behind how the paper aligns with the grant.
Parameters
----------
paper : dict
The 'paper' entry from the papers.yaml database.
grant_id : str
The grant id from grants.yaml to query for
Returns
-------
rationale : bool or str
If a rationale is provided, a str containing the rationale of how the paper aligns with the grant is provided.
If no rationale is provided, True is returned if the paper contains the grant_id as a funding source, False otherwise.
"""
try:
# get list of grants
for grant in paper['funding']:
# key: value entries may have a rationale
if type(grant) is dict:
if grant['id'] == grant_id:
if 'rationale' in grant:
# Return rationale if provided
return grant['rationale']
else:
return True
# If we haven't specified a dict, there can be no rationale
elif type(grant) is str:
if grant == grant_id:
return True
except Exception as e:
pass
return False
def published_during_reporting_period(paper):
"""Return True if paper was published during the grant reporting period.
"""
try:
if reporting_period_start <= paper['published']['dates']['published'] <= reporting_period_end:
return True
except Exception as e:
pass
return False
def accepted_during_reporting_period(paper):
"""Return True if paper was published during the grant reporting period.
"""
try:
if reporting_period_start <= paper['published']['dates']['accepted'] <= reporting_period_end:
return True
except Exception as e:
pass
return False
def preprinted_during_reporting_period(paper):
"""Return True if paper was published during the grant reporting period.
"""
try:
if reporting_period_start <= paper['preprint']['date'] <= reporting_period_end:
return True
except Exception as e:
pass
return False
################################################################################
# Rendering function
################################################################################
def show_paper(paper, show_links=False, grant_id=None):
"""Render the paper as Markdown
Parameters
----------
paper : dict
The 'paper' entry from the papers.yaml database.
show_links : bool, optional, default=False
If True, will display links associated with the paper.
grant_id : str, optional, default=None
If specified, print the rationale for how the paper aligns with this grant.
"""
try:
# Title
print(f"**{paper['title']}**")
# Journal
if 'published' in paper:
published = paper['published']
print("*{journal}* {volume}:{page}, {year}".format(**published))
print(f"DOI: {published['doi']}")
elif 'preprint' in paper:
preprint = paper['preprint']
print(f"Preprint: {preprint['url']}")
# Authors
for index, author in enumerate(paper['authors']):
if index == 0:
print(f"{author}", end='')
if index == len(paper['authors']) - 1:
print(f", and {author}")
else:
print(f", {author}", end='')
# Links
if ('links' in paper) and show_links:
for link in paper['links']:
print(f"**{link['action']}:** {link['url']}")
# Description
if 'description' in paper:
print(f"*{paper['description'].rstrip()}*")
if grant_id is not None:
rationale = funded_by_grant(paper, grant_id)
if type(rationale) is str:
print(f"{rationale.rstrip()}")
except Exception as e:
# Give up on rendering if we get stuck
print(e)
pass
def show_resources(paper):
"""Show all resources (links) associated with a given paper.
"""
import pprint
pp = pprint.PrettyPrinter(indent=4)
#pp.pprint(paper)
if ('links' in paper):
for link in paper['links']:
if 'description' in link:
print(f"**{link['description']}:**")
print(f"*{link['short']}:* {link['url']}")
print('')
################################################################################
# Load the databases
################################################################################
def load_databases():
"""
Load all databases
Returns
-------
db : dict
db[dbname] is the database contents
dbname is one of ['grants', 'papers']
"""
# Load papers
import yaml
db = dict()
for dbname in ['papers', 'grants']:
with open(f'{dbname}.yaml') as infile:
db[dbname] = yaml.load(infile, Loader=yaml.FullLoader)
return db
if __name__ == '__main__':
# Load the databases
db = load_databases()
# Extract all publications in the reporting period
from collections import defaultdict
papers_to_report = defaultdict(list)
papers = db['papers']
for paper in papers:
# Identify those papers that were funded by the grant
if not funded_by_grant(paper, grant_id):
continue
# Identify papers published in the reporting range
# Use a precendence scheme where we prefer published papers to accepted papers to preprints.
if published_during_reporting_period(paper):
papers_to_report['papers were published'].append(paper)
elif accepted_during_reporting_period(paper):
papers_to_report['manuscripts were accepted'].append(paper)
elif preprinted_during_reporting_period(paper):
papers_to_report['preprints were posted'].append(paper)
# count papers
for category in ['papers were published', 'manuscripts were accepted', 'preprints were posted']:
if (category in papers_to_report) and (len(papers_to_report[category]) > 0):
n_papers = sum([1 for _ in papers_to_report[category]])
print(f'{category:30} : {n_papers:4} papers')
# report papers
for category in ['papers were published', 'manuscripts were accepted', 'preprints were posted']:
if (category in papers_to_report) and (len(papers_to_report[category]) > 0):
print(f'Since the last reporting period, the following {category}, funded by this grant in part or whole:')
for paper in papers_to_report[category]:
print('')
show_paper(paper, grant_id=grant_id)
print('')
# Report all resources generated in the last reporting period
print('------')
print('')
print('The following resources corresponding to papers that were published, accepted, or preprinted were generated in the reporting period:')
print('')
for category in ['papers were published', 'manuscripts were accepted', 'preprints were posted']:
if (category in papers_to_report) and (len(papers_to_report[category]) > 0):
for paper in papers_to_report[category]:
show_resources(paper)
#show_paper(paper, grant_id=None)