runStat.py 13 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408
  1. import statUtils
  2. import os
  3. import radiomics
  4. import SimpleITK
  5. import sys
  6. import json
  7. import numpy
  8. def main(parFile='../templates/statistics.json'):
  9. featureExtractor=radiomics.featureextractor.RadiomicsFeatureExtractor
  10. setup=statUtils.loadSetup(parFile)
  11. rFile='radiomics.json'
  12. forceLiver=setup.get('forceLiver',False)
  13. forceSUVMax=setup.get('forceSUVMax',False)
  14. forceLiver15=setup.get('forceLiver15',False)
  15. forceGlobal=setup.get('forceGlobal',False)
  16. forceSUV4=setup.get('forceSUV4',False)
  17. forceSUV4=setup.get('forceSUV4',False)
  18. forceSUVMaxLesions=setup.get('forceSUVMaxLesions',False)
  19. forceSUVMaxMetastases=setup.get('forceSUVMaxMetastases',False)
  20. petField=setup.get('petField','petResampled')
  21. doUpload=setup.get('doUpload',True)
  22. #update threshold values if needed
  23. with open(rFile,'w') as f:
  24. f.write(json.dumps(setup['radiomics']))
  25. setup['db'],setup['fb']=statUtils.connectDB('onko-nix')
  26. users=statUtils.getUsers(setup['db'],setup['project'])
  27. p=setup['project']
  28. qFilter=[]
  29. try:
  30. vList=';'.join(setup['participants'])
  31. qFilter.append({'variable':'ParticipantId','value':vList,'oper':'in'})
  32. except KeyError:
  33. pass
  34. try:
  35. vList=';'.join(setup['visits'])
  36. qFilter.append({'variable':'visitCode','value':vList,'oper':'in'})
  37. except KeyError:
  38. pass
  39. ds=setup['db'].selectRows(p,'study',setup['imagingDataset'],qFilter)
  40. if not os.path.isdir(setup['localDir']):
  41. os.mkdir(setup['localDir'])
  42. #select just the first row; debugging
  43. rows=ds['rows']
  44. setup['values']=['COM','MTV','TLG','SUVmean','SUVmax','voxelCount','SUVSD']
  45. setup['featureExtractor']=featureExtractor(rFile)
  46. #make sure we calculate Variance
  47. firstOrder=setup['radiomics']['featureClass']['firstorder']
  48. if 'Variance' not in firstOrder:
  49. firstOrder.append('Variance')
  50. n=setup.get('n',-1)
  51. if n>0:
  52. rows=rows[:n]
  53. for r in rows:
  54. setup['SUVdataset']='SUVanalysis'
  55. globalDone=not forceGlobal and checkData(setup,r)
  56. #check if we have to do calculation
  57. setup['SUVdataset']='SUVanalysis_liver'
  58. liverDone=not forceLiver and checkData(setup,r)
  59. setup['SUVdataset']='SUVanalysis_SUVmax'
  60. suvMaxDone=not forceSUVMax and checkData(setup,r)
  61. setup['SUVdataset']='SUVanalysis_SUVmaxMetastases'
  62. suvMaxMetastasesDone=not forceSUVMaxMetastases and checkData(setup,r)
  63. setup['SUVdataset']='SUVanalysis_SUVmaxLesions'
  64. suvMaxLesionsDone=not forceSUVMaxLesions and checkData(setup,r)
  65. setup['SUVdataset']='SUVanalysis_liver1p5'
  66. liver1p5Done=not forceLiver15 and checkData(setup,r)
  67. setup['SUVdataset']='SUVanalysis_SUV4'
  68. suv4Done=not forceSUV4 and checkData(setup,r)
  69. doneCode=f'({globalDone}/{liverDone}/{liver1p5Done}/{suvMaxDone}/{suv4Done}/{suvMaxLesionsDone}/{suvMaxMetastasesDone})'
  70. codeDesc='(global/liver/liver1p5/suvMax/suv4/suvMaxLesions/suvMaxMetastases)'
  71. idVar='ParticipantId'
  72. pid=r[idVar]
  73. visitCode=r['visitCode']
  74. print(f'Done[{pid}/{visitCode}: {codeDesc}: {doneCode}')
  75. if globalDone and liverDone and suvMaxDone and \
  76. liver1p5Done and suv4Done and \
  77. suvMaxLesionsDone and suvMaxMetastasesDone:
  78. print('Skipping {} {}'.format(r['ParticipantId'],r['visitCode']))
  79. continue
  80. #PET
  81. for q in [petField]:
  82. localPath=statUtils.getImage(setup,r,q)
  83. if localPath=="NONE":
  84. continue
  85. pet=SimpleITK.ReadImage(localPath)
  86. #Seg
  87. segPaths=statUtils.getSegmentations(setup,r)
  88. if "NONE" in segPaths.values():
  89. os.remove(localPath)
  90. continue
  91. segKeys=list(segPaths.keys())
  92. for x in segPaths:
  93. print('Loaded {}/{}'.format(users[x],segPaths[x]))
  94. seg={x:SimpleITK.ReadImage(segPaths[x]) for x in segPaths}
  95. #check dimensions of seg and pet
  96. if not matchingDimensions(pet,seg):
  97. continue
  98. globalThreshold={x:0 for x in seg}
  99. if not globalDone:
  100. #get value for maximum in organs or liver mean and std
  101. setup['SUVdataset']='SUVanalysis'
  102. globalOutputs=thresholdAnalysis(setup,r,pet,seg,globalThreshold)
  103. if doUpload:
  104. uploadData(setup,r,globalOutputs)
  105. default={'SUVmax':0,'SUVmean':0,'SUVSD':0}
  106. setup['SUVdataset']='SUVanalysis'
  107. outputs=loadGlobals(setup,r,seg)
  108. #this are IDs from labkey
  109. liverId=1
  110. lesionId=4
  111. bmId=3
  112. metastasesId=6
  113. #liver threshold
  114. if not liverDone:
  115. liverThreshold={x:outputs[x].get(liverId,default)['SUVmean']
  116. +2*outputs[x].get(liverId,default)['SUVSD']
  117. for x in outputs}
  118. print(f'thr[liver]={liverThreshold}')
  119. setup['SUVdataset']='SUVanalysis_liver'
  120. liverOutputs=thresholdAnalysis(setup,r,pet,seg,liverThreshold)
  121. if doUpload:
  122. uploadData(setup,r,liverOutputs)
  123. #also for threshold=1.5
  124. if not liver1p5Done:
  125. liver1p5Threshold={x:1.5*outputs[x].get(liverId,default)['SUVmean']
  126. +2*outputs[x].get(liverId,default)['SUVSD']
  127. for x in outputs}
  128. print(f'thr[liver/1.5]={liver1p5Threshold}')
  129. setup['SUVdataset']='SUVanalysis_liver1p5'
  130. liver1p5Outputs=thresholdAnalysis(setup,r,pet,seg,liver1p5Threshold)
  131. if doUpload:
  132. uploadData(setup,r,liver1p5Outputs)
  133. #threshold=0.41 SUVmax
  134. if not suvMaxDone:
  135. idArr=[lesionId,bmId,metastasesId]
  136. fM=numpy.max
  137. suvMax={x:fM([outputs[x].get(y,default)['SUVmax'] for y in idArr])}
  138. suvMaxThreshold={x:0.41*suvMax[x] for x in suvMax}
  139. print(f'thr[SUVmax]={suvMaxThreshold}')
  140. setup['SUVdataset']='SUVanalysis_SUVmax'
  141. suvMaxOutputs=thresholdAnalysis(setup,r,pet,seg,suvMaxThreshold)
  142. if doUpload:
  143. uploadData(setup,r,suvMaxOutputs)
  144. #threshold=0.41 SUVMax (Lesion)
  145. if not suvMaxLesionsDone:
  146. suvMaxLesionsThreshold=\
  147. {x:0.41*outputs[x].get(lesionId,default)['SUVmax'] \
  148. for x in outputs}
  149. print(f'thr[SUVmaxLesions]={suvMaxLesionsThreshold}')
  150. setup['SUVdataset']='SUVanalysis_SUVmaxLesions'
  151. suvMaxLesionsOutputs=thresholdAnalysis(setup,r,pet,seg,
  152. suvMaxLesionsThreshold)
  153. if doUpload:
  154. uploadData(setup,r,suvMaxLesionsOutputs)
  155. #threshold=0.41 SUVMax (Metastases)
  156. if not suvMaxMetastasesDone:
  157. suvMaxMetastasesThreshold=\
  158. {x:0.41*outputs[x].get(metastasesId,default)['SUVmax'] \
  159. for x in outputs}
  160. print(f'thr[SUVmaxMetastases]={suvMaxMetastasesThreshold}')
  161. setup['SUVdataset']='SUVanalysis_SUVmaxMetastases'
  162. suvMaxMetastasesOutputs=thresholdAnalysis(setup,r,pet,seg,\
  163. suvMaxMetastasesThreshold)
  164. if doUpload:
  165. uploadData(setup,r,suvMaxMetastasesOutputs)
  166. if not suv4Done:
  167. suv4Threshold={x:4 for x in outputs}
  168. print(f'thr[SUV4]={suv4Threshold}')
  169. setup['SUVdataset']='SUVanalysis_SUV4'
  170. suv4Outputs=thresholdAnalysis(setup,r,pet,seg,suv4Threshold)
  171. if doUpload:
  172. uploadData(setup,r,suv4Outputs)
  173. #cleanup
  174. os.remove(localPath)
  175. for x in segPaths:
  176. os.remove(segPaths[x])
  177. def loadGlobals(setup,r,seg):
  178. matchingVisits={
  179. 'VISIT_0':['VISIT_0'],
  180. 'VISIT_1':['VISIT_1','VISIT_11'],
  181. 'VISIT_11':['VISIT_1','VISIT_11'],
  182. 'VISIT_2':['VISIT_2','VISIT_12'],
  183. 'VISIT_12':['VISIT_2','VISIT_12'],
  184. 'VISIT_3':['VISIT_3','VISIT_13'],
  185. 'VISIT_13':['VISIT_3','VISIT_13'],
  186. 'VISIT_4':['VISIT_4','VISIT_14'],
  187. 'VISIT_14':['VISIT_4','VISIT_14']}
  188. ids=[1,2,3,4,5,6]
  189. p=setup['project']
  190. d=setup['SUVdataset']
  191. idVar='ParticipantId'
  192. vCodes=';'.join(matchingVisits[r['visitCode']])
  193. vars=['MTV','TLG','SUVmean','SUVmax','SUVSD']
  194. outputs={}
  195. for x in seg:
  196. outputs[x]={}
  197. for i in ids:
  198. qFilter=[]
  199. qFilter.append({'variable':idVar,'value':r[idVar],'oper':'eq'})
  200. qFilter.append({'variable':'visitCode','value':vCodes,'oper':'in'})
  201. qFilter.append({'variable':'User','value':f'{x}','oper':'eq'})
  202. qFilter.append({'variable':'segment','value':f'{i}','oper':'eq'})
  203. ds=setup['db'].selectRows(p,'study',d,qFilter)
  204. outputs[x][i]={v:combineData(ds['rows'],v) for v in vars}
  205. return outputs
  206. def combineData(rows,x):
  207. dt=numpy.array([float(r[x] or 0) for r in rows])
  208. #print('Combine data[{}] {}'.format(x,dt))
  209. r=0
  210. if x=='MTV' or x=='TLG':
  211. r=numpy.sum(dt)
  212. if x=='SUVmean':
  213. m=combineData(rows,'MTV')
  214. if m==0:
  215. r=0
  216. else:
  217. r=combineData(rows,'TLG')/m
  218. if x=='SUVmax':
  219. try:
  220. r=numpy.max(dt)
  221. except ValueError:
  222. #empty array
  223. r=0
  224. if x=='SUVSD':
  225. #this should be probably be done right,
  226. #here I only want it to work also if one of the components is 0
  227. r=numpy.sqrt(numpy.sum(dt*dt))
  228. #print(f'Result: {r}')
  229. return r
  230. def thresholdAnalysis(setup,r,pet,seg,thrs):
  231. #thresholds thrs are by segmentation author
  232. outputs={}
  233. for s in thrs:
  234. t=thrs[s]
  235. print(f'Thr[{s}]: {t}')
  236. #update radiomics setting
  237. setup['radiomics']['setting']['resegmentRange']=[thrs[s]]
  238. setup['radiomics']['setting']['resegmentShape']=True
  239. outputs[s]=getValuesForSegmentation(setup,r,pet,seg[s])
  240. _=[outputs[s][y].update({'threshold':thrs[s]}) for y in outputs[s]]
  241. print(outputs)
  242. return outputs
  243. def getValues(setup,row,pet,seg):
  244. #seg is a list of segments
  245. rFile='radiomics.json'
  246. #short function names
  247. featureExtractor=radiomics.featureextractor.RadiomicsFeatureExtractor
  248. getStats=statUtils.getRadiomicsComponentStats
  249. with open(rFile,'w') as f:
  250. f.write(json.dumps(setup['radiomics']))
  251. setup['featureExtractor']=featureExtractor(rFile)
  252. #find labels associated with each (non-overlaping) segmentation
  253. ids=statUtils.getSegments(list(seg.values())[0])
  254. outputs={x:{} for x in seg}
  255. for x in seg:
  256. for id in ids:
  257. print('{} {}'.format(id,ids[id]))
  258. try:
  259. output=getStats(setup,pet,seg[x],ids[id])
  260. except ValueError:
  261. continue
  262. outputs[x][ids[id]]=output
  263. os.remove(rFile)
  264. return outputs
  265. def getValuesForSegmentation(setup,row,pet,seg):
  266. #short function names
  267. featureExtractor=radiomics.featureextractor.RadiomicsFeatureExtractor
  268. getStats=statUtils.getRadiomicsComponentStats
  269. rFile='radiomics.json'
  270. with open(rFile,'w') as f:
  271. f.write(json.dumps(setup['radiomics']))
  272. setup['featureExtractor']=featureExtractor(rFile)
  273. #find labels associated with each (non-overlaping) segmentation
  274. ids=statUtils.getSegments(seg)
  275. outputs={}
  276. for id in ids:
  277. print('{} {}'.format(id,ids[id]))
  278. try:
  279. output=getStats(setup,pet,seg,ids[id])
  280. except ValueError:
  281. continue
  282. outputs[ids[id]]=output
  283. os.remove(rFile)
  284. return outputs
  285. def uploadData(setup,r,outputs):
  286. baseVar=['ParticipantId','SequenceNum','patientCode','visitCode']
  287. p=setup['project']
  288. d=setup['SUVdataset']
  289. for x in outputs:
  290. for s in outputs[x]:
  291. output=outputs[x][s]
  292. output.update({x:r[x] for x in baseVar})
  293. output['User']=x
  294. output['segment']=s
  295. statUtils.updateDatasetRows(setup['db'],p,d,[output])
  296. def checkData(setup,r):
  297. p=setup['project']
  298. d=setup['SUVdataset']
  299. qFilter=[]
  300. idVar='ParticipantId'
  301. qFilter.append({'variable':idVar,'value':r[idVar],'oper':'eq'})
  302. qFilter.append({'variable':'visitCode','value':r['visitCode'],'oper':'eq'})
  303. ds=setup['db'].selectRows(p,'study',d,qFilter)
  304. try:
  305. n=len(ds['rows'])
  306. except KeyError:
  307. #contraintuitevly return as if data were there
  308. #if ds has no rows, which typically happens
  309. #on failure to retrieve dataset, which is
  310. #related to missing datasets
  311. print(f'Dataset [{d}] not found. Returning OK')
  312. return True
  313. print('[{}:{}/{}] got {} rows.'.format(d,r[idVar],r['visitCode'],n))
  314. return n>0
  315. def vEqual(a,b,eps=1e-8):
  316. delta=numpy.array(a)-numpy.array(b)
  317. aDelta=numpy.abs(delta)
  318. #print(f'Delta={delta} abs(Delta)={aDelta}')
  319. return all(aDelta<eps)
  320. def matchingDimensions(pet,seg):
  321. #QA routine
  322. #check if dimensnions of pet and seg match
  323. #seg is a dict {'ID':sitkImage}
  324. #print('[PET] size={} origin={}'.format(pet.GetSize(),pet.GetOrigin()))
  325. sPET=pet.GetSize()
  326. ok=[vEqual(seg[x].GetSize(),sPET) for x in seg]
  327. if not all(ok):
  328. sz={x:seg[x].GetSize() for x in seg}
  329. print('Size mismatch PET[{}]: {}'.format(sPET,sz))
  330. return False
  331. return True
  332. if __name__=='__main__':
  333. main(sys.argv[1])