Pārlūkot izejas kodu

Correcting for missing liver segmentation in threshold calculation when liver and disease are in different PET files

Andrej Studen 1 mēnesi atpakaļ
vecāks
revīzija
f9f17be78d
1 mainītis faili ar 127 papildinājumiem un 42 dzēšanām
  1. 127 42
      pythonScripts/runStat.py

+ 127 - 42
pythonScripts/runStat.py

@@ -7,14 +7,26 @@ import json
 import numpy
 
 def main(parFile='../templates/statistics.json'):
+
+
+    featureExtractor=radiomics.featureextractor.RadiomicsFeatureExtractor
+ 
     setup=statUtils.loadSetup(parFile)
     rFile='radiomics.json'
 
+    forceLiver=setup.get('forceLiver',False)
+    forceSUVMax=setup.get('forceSUVMax',False)
+    forceLiver15=setup.get('forceLiver15',False)
+    forceGlobal=setup.get('forceGlobal',False)
+
+    doUpload=setup.get('doUpload',True)
+
     #update threshold values if needed
     with open(rFile,'w') as f:
         f.write(json.dumps(setup['radiomics']))
     setup['db'],setup['fb']=statUtils.connectDB('onko-nix')
     users=statUtils.getUsers(setup['db'],setup['project'])
+    p=setup['project']
     qFilter=[]
     try:
         vList=';'.join(setup['participants'])    
@@ -28,14 +40,18 @@ def main(parFile='../templates/statistics.json'):
     except KeyError:
         pass
 
-    ds=setup['db'].selectRows(setup['project'],'study',setup['imagingDataset'],qFilter)
+    ds=setup['db'].selectRows(p,'study',setup['imagingDataset'],qFilter)
     if not os.path.isdir(setup['localDir']):
         os.mkdir(setup['localDir'])
     #select just the first row; debugging
     rows=ds['rows']
     setup['values']=['COM','MTV','TLG','SUVmean','SUVmax','voxelCount','SUVSD']
-    #params=os.path.join('..','templates','radiomics.yaml')
-    setup['featureExtractor']=radiomics.featureextractor.RadiomicsFeatureExtractor(rFile)
+    setup['featureExtractor']=featureExtractor(rFile)
+
+    #make sure we calculate Variance
+    firstOrder=setup['radiomics']['featureClass']['firstorder']
+    if 'Variance' not in firstOrder:
+        firstOrder.append('Variance')
 
     n=setup.get('n',-1)
     if n>0:
@@ -43,22 +59,26 @@ def main(parFile='../templates/statistics.json'):
 
     for r in rows:
 
+        setup['SUVdataset']='SUVanalysis'
+        globalDone=not forceGlobal and checkData(setup,r)
+
         #check if we have to do calculation
         setup['SUVdataset']='SUVanalysis_liver'
-        liverDone=checkData(setup,r)
+        liverDone=not forceLiver and checkData(setup,r)
 
         setup['SUVdataset']='SUVanalysis_SUVmax'
-        suvMaxDone=checkData(setup,r) 
+        suvMaxDone=not forceSUVMax and checkData(setup,r) 
 
         setup['SUVdataset']='SUVanalysis_liver1p5'
-        liver1p5Done=checkData(setup,r)
+        liver1p5Done=not forceLiver15 and checkData(setup,r)
 
-        
-        if liverDone and suvMaxDone and liver1p5Done:
+        doneCode=f'({globalDone}/{liverDone}/{liver1p5Done}/{suvMaxDone})'
+        print(f'Done: (global/liver/liver1p5/suvMax): {doneCode}')
+
+        if globalDone and liverDone and suvMaxDone and liver1p5Done:
             print('Skipping {} {}'.format(r['ParticipantId'],r['visitCode']))
             continue
-        doneCode=f'({liverDone}/{liver1p5Done}/{suvMaxDone})'
-        print(f'Done: (liver/liver1p5/suvMax): {doneCode}')
+
         #PET
         for q in ['petResampled']:
             localPath=statUtils.getImage(setup,r,q)
@@ -78,23 +98,18 @@ def main(parFile='../templates/statistics.json'):
         seg={x:SimpleITK.ReadImage(segPaths[x]) for x in segPaths}
 
 
-        try:
-            thr=setup['radiomics']['setting']['resegmentRange'][0]
-        except KeyError:
-            thr=None
-
-        setup['radiomics']['setting']['resegmentRange']=None        
-        firstOrder=setup['radiomics']['featureClass']['firstorder']
-        if 'Variance' not in firstOrder:
-            firstOrder.append('Variance')
-        #get value for maximum in organs or liver mean and std
-        outputs=getValues(setup,r,pet,seg)
-        setup['SUVdataset']='SUVanalysis'
-        #uploadData(setup,r,outputs)
+        globalThreshold={x:0 for x in seg}
+        if not globalDone:
+            #get value for maximum in organs or liver mean and std
+            setup['SUVdataset']='SUVanalysis'
+            globalOutputs=thresholdAnalysis(setup,r,pet,seg,globalThreshold)
+            if doUpload:
+                uploadData(setup,r,globalOutputs)
 
-        print(outputs)
-        
         default={'SUVmax':0,'SUVmean':0,'SUVSD':0}
+        setup['SUVdataset']='SUVanalysis'
+        outputs=loadGlobals(setup,r,seg)
+
         #liver threshold
         liverId=1
         liverThreshold={x:outputs[x].get(liverId,default)['SUVmean']
@@ -110,25 +125,28 @@ def main(parFile='../templates/statistics.json'):
                 for x in outputs}
         suvMaxThreshold={x:0.41*suvMax[x] for x in suvMax}
     
-
-        print('thr[liver]={} thr[liver/1.5]={} thr(suvmax)={}'.format(liverThreshold,liver1p5Threshold,suvMaxThreshold))
+        ftStr='thr[liver]={} thr[liver/1.5]={} thr(suvmax)={}'
+        print(ftStr.format(liverThreshold,liver1p5Threshold,suvMaxThreshold))
        
 
         if not liverDone:
             setup['SUVdataset']='SUVanalysis_liver'
             liverOutputs=thresholdAnalysis(setup,r,pet,seg,liverThreshold)
-            uploadData(setup,r,liverOutputs)
+            if doUpload:
+                uploadData(setup,r,liverOutputs)
 
         #also for threshold=1.5
         if not liver1p5Done:
             setup['SUVdataset']='SUVanalysis_liver1p5'
             liver1p5Outputs=thresholdAnalysis(setup,r,pet,seg,liver1p5Threshold)
-            uploadData(setup,r,liver1p5Outputs)
+            if doUpload:
+                uploadData(setup,r,liver1p5Outputs)
 
         if not suvMaxDone:
             setup['SUVdataset']='SUVanalysis_SUVmax'
             suvMaxOutputs=thresholdAnalysis(setup,r,pet,seg,suvMaxThreshold)
-            uploadData(setup,r,suvMaxOutputs)
+            if doUpload:
+                uploadData(setup,r,suvMaxOutputs)
 
         #skip threshold of 4
         doThreshold4=False
@@ -141,9 +159,6 @@ def main(parFile='../templates/statistics.json'):
             uploadData(setup,r,outputs4)
 
         
-        #outputs=getValues(setup,users,r,pet)
-        #uploadData(setup,r,outputs)
-
         #cleanup
         os.remove(localPath)
 
@@ -151,27 +166,88 @@ def main(parFile='../templates/statistics.json'):
             os.remove(segPaths[x])
 
 
+def loadGlobals(setup,r,seg):
+    matchingVisits={
+            'VISIT_1':['VISIT_1','VISIT_11'],
+            'VISIT_11':['VISIT_1','VISIT_11'],
+            'VISIT_2':['VISIT_2','VISIT_12'],
+            'VISIT_12':['VISIT_2','VISIT_12'],
+            'VISIT_3':['VISIT_3','VISIT_13'],
+            'VISIT_13':['VISIT_3','VISIT_13'],
+            'VISIT_4':['VISIT_4','VISIT_14'],
+            'VISIT_14':['VISIT_4','VISIT_14']}
+    ids=[1,2,3,4,5,6]
+    p=setup['project']
+    d=setup['SUVdataset']
+    idVar='ParticipantId'
+    vCodes=';'.join(matchingVisits[r['visitCode']])
+    vars=['MTV','TLG','SUVmean','SUVmax','SUVSD']
+    outputs={}
+    for x in seg:
+        outputs[x]={}
+        for i in ids:
+            qFilter=[]
+            qFilter.append({'variable':idVar,'value':r[idVar],'oper':'eq'})
+            qFilter.append({'variable':'visitCode','value':vCodes,'oper':'in'})
+            qFilter.append({'variable':'User','value':f'{x}','oper':'eq'})
+            qFilter.append({'variable':'segment','value':f'{i}','oper':'eq'})
+            ds=setup['db'].selectRows(p,'study',d,qFilter)
+            outputs[x][i]={v:combineData(ds['rows'],v) for v in vars}
+    return outputs
+
+def combineData(rows,x):
+    dt=numpy.array([float(r[x] or 0) for r in rows])
+    #print('Combine data[{}] {}'.format(x,dt))
+    r=0
+    if x=='MTV' or x=='TLG':
+        r=numpy.sum(dt)
+    if x=='SUVmean':
+        m=combineData(rows,'MTV')
+        if m==0:
+            r=0
+        else:
+            r=combineData(rows,'TLG')/m
+    if x=='SUVmax':
+        try:
+            r=numpy.max(dt)
+        except ValueError:
+            #empty array
+            r=0
+    if x=='SUVSD':
+        #this should be probably be done right, 
+        #here I only want it to work also if one of the components is 0
+        r=numpy.sqrt(numpy.sum(dt*dt))
+    #print(f'Result: {r}')
+    return r
+    
 def thresholdAnalysis(setup,r,pet,seg,thrs):
-    #thresholds thrs are by participant and region
+    #thresholds thrs are by segmentation author
     outputs={}
     for s in thrs:
+        t=thrs[s]
+        print(f'Thr[{s}]: {t}')
         #update radiomics setting
         setup['radiomics']['setting']['resegmentRange']=[thrs[s]]
         setup['radiomics']['setting']['resegmentShape']=True
         outputs[s]=getValuesForSegmentation(setup,r,pet,seg[s])
             
         _=[outputs[s][y].update({'threshold':thrs[s]}) for y in outputs[s]]
+    print(outputs)
     return outputs
 
 
 def getValues(setup,row,pet,seg):
+    #seg is a list of segments
 
     rFile='radiomics.json'
+    #short function names
+    featureExtractor=radiomics.featureextractor.RadiomicsFeatureExtractor
+    getStats=statUtils.getRadiomicsComponentStats
 
     with open(rFile,'w') as f:
         f.write(json.dumps(setup['radiomics']))
 
-    setup['featureExtractor']=radiomics.featureextractor.RadiomicsFeatureExtractor(rFile)
+    setup['featureExtractor']=featureExtractor(rFile)
 
 
     #find labels associated with each (non-overlaping) segmentation
@@ -182,7 +258,7 @@ def getValues(setup,row,pet,seg):
         for id in ids:
             print('{} {}'.format(id,ids[id]))
             try:
-                output=statUtils.getRadiomicsComponentStats(setup,pet,seg[x],ids[id])
+                output=getStats(setup,pet,seg[x],ids[id])
             except ValueError:
                 continue
             outputs[x][ids[id]]=output
@@ -192,12 +268,16 @@ def getValues(setup,row,pet,seg):
 
 def getValuesForSegmentation(setup,row,pet,seg):
 
+    #short function names
+    featureExtractor=radiomics.featureextractor.RadiomicsFeatureExtractor
+    getStats=statUtils.getRadiomicsComponentStats
+
     rFile='radiomics.json'
 
     with open(rFile,'w') as f:
         f.write(json.dumps(setup['radiomics']))
 
-    setup['featureExtractor']=radiomics.featureextractor.RadiomicsFeatureExtractor(rFile)
+    setup['featureExtractor']=featureExtractor(rFile)
 
 
     #find labels associated with each (non-overlaping) segmentation
@@ -207,7 +287,7 @@ def getValuesForSegmentation(setup,row,pet,seg):
     for id in ids:
         print('{} {}'.format(id,ids[id]))
         try:
-            output=statUtils.getRadiomicsComponentStats(setup,pet,seg,ids[id])
+            output=getStats(setup,pet,seg,ids[id])
         except ValueError:
             continue
         outputs[ids[id]]=output
@@ -218,21 +298,26 @@ def getValuesForSegmentation(setup,row,pet,seg):
 
 def uploadData(setup,r,outputs):
     baseVar=['ParticipantId','SequenceNum','patientCode','visitCode']
+    p=setup['project']
+    d=setup['SUVdataset']
     for x in outputs:
         for s in outputs[x]:
             output=outputs[x][s]
             output.update({x:r[x] for x in baseVar})
             output['User']=x
             output['segment']=s
-            statUtils.updateDatasetRows(setup['db'],setup['project'],setup['SUVdataset'],[output])
+            statUtils.updateDatasetRows(setup['db'],p,d,[output])
     
 def checkData(setup,r):
+    p=setup['project']
+    d=setup['SUVdataset']
     qFilter=[]
-    qFilter.append({'variable':'ParticipantId','value':r['ParticipantId'],'oper':'eq'})
+    idVar='ParticipantId'
+    qFilter.append({'variable':idVar,'value':r[idVar],'oper':'eq'})
     qFilter.append({'variable':'visitCode','value':r['visitCode'],'oper':'eq'})
-    ds=setup['db'].selectRows(setup['project'],'study',setup['SUVdataset'],qFilter)
+    ds=setup['db'].selectRows(p,'study',d,qFilter)
     n=len(ds['rows'])
-    print('[{}:{}/{}] got {} rows.'.format(setup['SUVdataset'],r['ParticipantId'],r['visitCode'],n))
+    print('[{}:{}/{}] got {} rows.'.format(d,r[idVar],r['visitCode'],n))
     return n>0