-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathreport_export.py
68 lines (58 loc) · 1.75 KB
/
report_export.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
"""
Fetch acquisition files linked to project/subject/session labels from Flywheel
and upload the records to the warehouse DB.
"""
import os
import re
import json
import flywheel
import pandas
from sqlalchemy import create_engine
fw_api_token = os.getenv("FLYWHEEL_API_TOKEN")
db_url = os.getenv("D3B_WAREHOUSE_DB_URL")
assert fw_api_token and db_url
print("Starting scour")
fw = flywheel.Client(fw_api_token)
db = create_engine(db_url)
table = "flywheel_report_export"
# Get file metadata quickly with Views. This is relatively fast.
view = fw.View(
container="acquisition",
filename="*",
match="all",
columns=[
"file.name",
"file.size",
"file.created",
"file.modified",
],
include_ids=True,
include_labels=True,
process_files=False,
sort=False,
)
all_data = []
# for project in fw.projects.iter():
proj_name = 'CBTN_reports'
project = fw.projects.find_first(f'label={proj_name}')
pid = project.id
name = project.label
print(f"Fetching view for project {name} ({pid})...")
d = json.load(fw.read_view_data(view, pid, decode=False, format="json-flat"))
all_data.extend(d)
# Send file records to database.
if all_data:
rex = re.compile(r"(?<!_)(?=[A-Z])")
df = (
pandas.DataFrame(all_data)
.rename(columns=lambda x: x.replace(".", "_")) # avoid "." in colnames
.rename(columns=lambda x: rex.sub("_", x).lower()) # camelcase to snake
)
print(f"Submitting {len(df)} records to the '{table}' table in {repr(db.url)}...")
df.to_sql(
table, db, schema = "fw_cloud", index=False, if_exists="replace", chunksize=10000, method="multi"
)
with db.connect() as conn:
conn.execute(f"GRANT SELECT ON {table} TO public")
else:
print("No files found.")