-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathmapi.py
134 lines (123 loc) · 3.5 KB
/
mapi.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
from flask import Flask, jsonify
#import the FlaskElasticsearch package
from flask.ext.elasticsearch import Elasticsearch
#import json
app = Flask(__name__)
es = Elasticsearch()
#Assumption: objectId is download_id
#Parses the elasticSearch response and makes it as similar as possible to the ICGC Data
#Portal. String "DUMMY" is used when no data for that field is available.
def parse_ES_response(es_dict):
#print es_dict
protoDict = {'hits':[]}
for hit in es_dict['hits']['hits']:
protoDict['hits'].append({
'id' : 'DUMMY',
'objectID' : 'DUMMY',
'access' : 'DUMMY',
'study' : ['DUMMY'],
'dataCategorization' : {
'dataType' : hit['_source']['analysis_type'],
'experimentalStrategy' : 'DUMMY'
},
'fileCopies' : [{
'repoDataBundleId' : 'DUMMY',
'repoDataSetIds' :[],
'repoCode' : 'DUMMY',
'repoOrg' : 'DUMMY',
'repoName' : 'DUMMY',
'repoType' : 'DUMMY',
'repoCountry' : 'DUMMY',
'repoBaseUrl' : 'DUMMY',
'repoDataPath' : 'DUMMY',
'repoMetadatapath' : 'DUMMY',
'indexFile' : {
'id' : 'DUMMY',
'objectId' : hit['_source']['download_id'],
'fileName' : hit['_source']['title'],
'fileFormat' : hit['_source']['file_type'],
'fileMd5sum' : 'DUMMY',
'fileSize' : 'DUMMY'
},
'fileName' : hit['_source']['title'],
'fileFormat' : hit['_source']['file_type'],
'fileMd5sum' : 'DUMMY',
'lastModified' : 'DUMMY'
}],
'donors' : [{
'donorId' : hit['_source']['donor'],
'primarySite' : 'DUMMY',
'projectCode' : hit['_source']['project'],
'study' : 'DUMMY',
'sampleId' : ['DUMMY'],
'specimenType' : [hit['_source']['specimen_type']],
'submittedDonorId' : "DUMMY",
'submittedSampleId' : ['DUMMY'],
'submittedSpecimenId' : ['DUMMY'],
'otherIdentifiers' : {
'tcgaSampleBarcode' : ['DUMMY'],
'tcgaAliquotBarcode' : ['DUMMY']
}
}],
'analysisMethod' : {
'analysisType' : hit['_source']['analysis_type'],
'software' : 'DUMMY'
},
'referenceGenome' : {
'genomeBuild' : 'DUMMY',
'referenceName' : 'DUMMY',
'downloadUrl' : 'DUMMY'
}
})
#print hit
#print protoDict
protoDict['pagination'] = {
'count' : 25,
'total' : es_dict['hits']['total'],
'size' : 25,
'from' : 1,
'page' : 1,
'pages' : -(-es_dict['hits']['total'] // 25),
'sort' : 'DUMMY',
'order' : 'desc'
}
protoDict['termFacets'] = es_dict['aggregations']
#func_total = lambda x: x+x
#Get the total for all the terms
for section in protoDict['termFacets']:
m_sum = 0
print section
for bucket in protoDict['termFacets'][section]['buckets']:
m_sum += bucket['doc_count']
#func_total(bucket['doc_count'])
protoDict['termFacets'][section]['total'] = m_sum
return protoDict
#This returns the agreggate terms and the list of hits from ElasticSearch
@app.route('/')
def get_data():
mText = es.search(index='mfiles', body={"query": {"match_all": {}}, "aggs" : {
"dataType" : {
"terms" : { "field" : "analysis_type",
"size" : 9999}
},
"projectCode":{
"terms":{
"field" : "project",
"size" : 99999
}
},
"specimenType":{
"terms":{
"field" : "specimen_type",
"size" : 99999
}
},
"fileFormat":{
"terms":{
"field" : "file_type",
"size" : 99999
}
}
}}, size=25)
#print mText
return jsonify(parse_ES_response(mText))