2 Commits 7a794a39d6 ... f9f17be78d

Autore SHA1 Messaggio Data
  Andrej Studen f9f17be78d Correcting for missing liver segmentation in threshold calculation when liver and disease are in different PET files 3 mesi fa
  Andrej Studen 495c4bbe80 Updates to anonymization routine 3 mesi fa
2 ha cambiato i file con 168 aggiunte e 63 eliminazioni
  1. 41 21
      pythonScripts/anonymize.py
  2. 127 42
      pythonScripts/runStat.py

+ 41 - 21
pythonScripts/anonymize.py

@@ -2,6 +2,8 @@ import os
 import json
 import re
 import sys
+import numpy
+import shutil
 
 #nothing gets done if you do import
 
@@ -43,22 +45,31 @@ def main(parameterFile):
     dataset=pars['Database']['queryName']
     schema=pars['Database']['schemaName']
 
-    tempBase=os.path.join('/','data','nixUser','RIS')
+    #tempBase=os.path.join('/','data','nixUser','RIS')
+    tempBase=os.path.join(fhome,'temp','RIS')
     if not os.path.isdir(tempBase):
         os.mkdir(tempBase)
 
 
     participantField=pars['Database']['participantField']
-    segmentation=pars['Database']['segementationQuery']
+    segmentation=pars['Database']['segmentationQuery']
 
     #all images from database
-    visitFilter={'variable':'visitCode','value':'VISIT_1','oper':'eq'}
-    iodineFilter={'variable':'iodineContrast','value':'0','oper':'eq'}
+    visitFilter={'variable':'visitCode','value':'VISIT_2','oper':'eq'}
+    iodineFilter={'variable':'iodineContrast','value':'1','oper':'neq'}
+    #for VISIT_1, also apply iodineFilter
+    #qFilter=[visitFilter,iodineFilter]
+    #for VISIT_2, iodineFilter has no meaning (shuld be false or blank, but or is hard to do)
+    qFilter=[visitFilter]
 
-    ds=db.selectRows(project,schema,dataset,[visitFilter,iodineFilter])
+    #shift generated patient names
+    offset=100
+
+    ds=db.selectRows(project,schema,dataset,qFilter)
     #imageSelector={"CT":"CT_orthancId","PET":"PETWB_orthancId"}
     #output
-    imageResampledField={"CT":"ctResampled","PET":"petResampled","patientmask":"ROImask"}
+    imageResampledField={"CT":"ctResampled","PET":"petResampled"}
+    #,"patientmask":"ROImask"}
 
     #use webdav to transfer file (even though it is localhost)
 
@@ -67,6 +78,7 @@ def main(parameterFile):
     n=len(ds['rows'])
     keys=[r[participantField] for r in ds['rows']]
     perm=numpy.random.permutation(n)
+    perm+=offset
     pseudo={keys[i]:perm[i] for i in range(n)}
     
     for row in ds["rows"]:
@@ -76,7 +88,9 @@ def main(parameterFile):
 
         idFilter={'variable':participantField,'value':row[participantField],'oper':'eq'}
         segFilter={'variable':'SequenceNum','value':'{}'.format(row['SequenceNum']),'oper':'eq'}
-        ds=db.selectRows(project,schema,segmentation,[idFilter,segFilter])
+        #adoma
+        userFilter={'variable':'User','value':'1037','oper':'eq'}
+        ds=db.selectRows(project,schema,segmentation,[idFilter,segFilter,userFilter])
         nS=len(ds['rows'])
         if nS==0:
             print('No segmentation found')
@@ -84,34 +98,40 @@ def main(parameterFile):
         if nS>1:
             print('Multiple segmentations found')
             continue
-        maskField={'mask':'/'.join('Segmentations',ds['rows'][0]['latestFile'])}
-        imageResampleField.update(maskField)
-        
+        maskFile={'mask':'/'.join(['Segmentations',ds['rows'][0]['latestFile']])}
     
         #build/check remote directory structure
         remoteDir=fb.buildPathURL(project,['preprocessedImages',\
             getPatientLabel(row,participantField),getVisitLabel(row)])
-
-        gzRemoteFiles={x:'/'.join(remoteDir,row[imageResampleField[x]]) for x in imageResampleField}
         
-        for f in gzRemoteFiles.values():
+        remoteFiles={x:row[imageResampledField[x]] for x in imageResampledField}
+        remoteFiles.update(maskFile)
+        remoteFiles={x:'/'.join([remoteDir,remoteFiles[x]]) for x in remoteFiles}
+        for f in remoteFiles.values():
             print("[{}]: [{}]".format(f,fb.entryExists(f)))
-        localDir='patient{:03d}'.format(pseudo[row[participantField]])
-        localDir=os.path.join(tempBase,localDir)
+        patientALabel='patient{:03d}'.format(pseudo[row[participantField]])
+        localDir=os.path.join(tempBase,patientALabel)
         if not os.path.isdir(localDir):
             os.mkdir(localDir)
-        localFiles={x:os.path.join(localDir,'{}.nii.gz'.format(x)) for x in gzRemoteFiles}
-
+        fileNames={x:'{}.nii.gz'.format(x) for x in remoteFiles}
+        fileNames['mask']=fileNames['mask'].replace('nii.gz','nrrd')
+        localFiles={x:os.path.join(localDir,fileNames[x]) for x in fileNames}
         
-        if not all(remoteFilePresent):
+        remoteFilesPresent={x:fb.entryExists(remoteFiles[x]) for x in remoteFiles}
+        if not all(remoteFilesPresent):
             print('Missing remote files')
             continue
     
-        continue
 
-        _=[fb.readFileToFile(gzRemoteFiles[x],localFiles[x]) for x in localFiles]
+        _=[fb.readFileToFile(remoteFiles[x],localFiles[x]) for x in localFiles]
+
+        remoteADir=fb.buildPathURL(project,['anonymized',patientALabel])
+        remoteAFiles={x:'/'.join([remoteADir,fileNames[x]]) for x in fileNames}
+        _=[fb.writeFileToFile(localFiles[x],remoteAFiles[x]) for x in remoteAFiles]
+
+        shutil.rmtree(localDir)
 
-        if i==0:
+        if i==-1:
             break
         i=i+1
 

+ 127 - 42
pythonScripts/runStat.py

@@ -7,14 +7,26 @@ import json
 import numpy
 
 def main(parFile='../templates/statistics.json'):
+
+
+    featureExtractor=radiomics.featureextractor.RadiomicsFeatureExtractor
+ 
     setup=statUtils.loadSetup(parFile)
     rFile='radiomics.json'
 
+    forceLiver=setup.get('forceLiver',False)
+    forceSUVMax=setup.get('forceSUVMax',False)
+    forceLiver15=setup.get('forceLiver15',False)
+    forceGlobal=setup.get('forceGlobal',False)
+
+    doUpload=setup.get('doUpload',True)
+
     #update threshold values if needed
     with open(rFile,'w') as f:
         f.write(json.dumps(setup['radiomics']))
     setup['db'],setup['fb']=statUtils.connectDB('onko-nix')
     users=statUtils.getUsers(setup['db'],setup['project'])
+    p=setup['project']
     qFilter=[]
     try:
         vList=';'.join(setup['participants'])    
@@ -28,14 +40,18 @@ def main(parFile='../templates/statistics.json'):
     except KeyError:
         pass
 
-    ds=setup['db'].selectRows(setup['project'],'study',setup['imagingDataset'],qFilter)
+    ds=setup['db'].selectRows(p,'study',setup['imagingDataset'],qFilter)
     if not os.path.isdir(setup['localDir']):
         os.mkdir(setup['localDir'])
     #select just the first row; debugging
     rows=ds['rows']
     setup['values']=['COM','MTV','TLG','SUVmean','SUVmax','voxelCount','SUVSD']
-    #params=os.path.join('..','templates','radiomics.yaml')
-    setup['featureExtractor']=radiomics.featureextractor.RadiomicsFeatureExtractor(rFile)
+    setup['featureExtractor']=featureExtractor(rFile)
+
+    #make sure we calculate Variance
+    firstOrder=setup['radiomics']['featureClass']['firstorder']
+    if 'Variance' not in firstOrder:
+        firstOrder.append('Variance')
 
     n=setup.get('n',-1)
     if n>0:
@@ -43,22 +59,26 @@ def main(parFile='../templates/statistics.json'):
 
     for r in rows:
 
+        setup['SUVdataset']='SUVanalysis'
+        globalDone=not forceGlobal and checkData(setup,r)
+
         #check if we have to do calculation
         setup['SUVdataset']='SUVanalysis_liver'
-        liverDone=checkData(setup,r)
+        liverDone=not forceLiver and checkData(setup,r)
 
         setup['SUVdataset']='SUVanalysis_SUVmax'
-        suvMaxDone=checkData(setup,r) 
+        suvMaxDone=not forceSUVMax and checkData(setup,r) 
 
         setup['SUVdataset']='SUVanalysis_liver1p5'
-        liver1p5Done=checkData(setup,r)
+        liver1p5Done=not forceLiver15 and checkData(setup,r)
 
-        
-        if liverDone and suvMaxDone and liver1p5Done:
+        doneCode=f'({globalDone}/{liverDone}/{liver1p5Done}/{suvMaxDone})'
+        print(f'Done: (global/liver/liver1p5/suvMax): {doneCode}')
+
+        if globalDone and liverDone and suvMaxDone and liver1p5Done:
             print('Skipping {} {}'.format(r['ParticipantId'],r['visitCode']))
             continue
-        doneCode=f'({liverDone}/{liver1p5Done}/{suvMaxDone})'
-        print(f'Done: (liver/liver1p5/suvMax): {doneCode}')
+
         #PET
         for q in ['petResampled']:
             localPath=statUtils.getImage(setup,r,q)
@@ -78,23 +98,18 @@ def main(parFile='../templates/statistics.json'):
         seg={x:SimpleITK.ReadImage(segPaths[x]) for x in segPaths}
 
 
-        try:
-            thr=setup['radiomics']['setting']['resegmentRange'][0]
-        except KeyError:
-            thr=None
-
-        setup['radiomics']['setting']['resegmentRange']=None        
-        firstOrder=setup['radiomics']['featureClass']['firstorder']
-        if 'Variance' not in firstOrder:
-            firstOrder.append('Variance')
-        #get value for maximum in organs or liver mean and std
-        outputs=getValues(setup,r,pet,seg)
-        setup['SUVdataset']='SUVanalysis'
-        #uploadData(setup,r,outputs)
+        globalThreshold={x:0 for x in seg}
+        if not globalDone:
+            #get value for maximum in organs or liver mean and std
+            setup['SUVdataset']='SUVanalysis'
+            globalOutputs=thresholdAnalysis(setup,r,pet,seg,globalThreshold)
+            if doUpload:
+                uploadData(setup,r,globalOutputs)
 
-        print(outputs)
-        
         default={'SUVmax':0,'SUVmean':0,'SUVSD':0}
+        setup['SUVdataset']='SUVanalysis'
+        outputs=loadGlobals(setup,r,seg)
+
         #liver threshold
         liverId=1
         liverThreshold={x:outputs[x].get(liverId,default)['SUVmean']
@@ -110,25 +125,28 @@ def main(parFile='../templates/statistics.json'):
                 for x in outputs}
         suvMaxThreshold={x:0.41*suvMax[x] for x in suvMax}
     
-
-        print('thr[liver]={} thr[liver/1.5]={} thr(suvmax)={}'.format(liverThreshold,liver1p5Threshold,suvMaxThreshold))
+        ftStr='thr[liver]={} thr[liver/1.5]={} thr(suvmax)={}'
+        print(ftStr.format(liverThreshold,liver1p5Threshold,suvMaxThreshold))
        
 
         if not liverDone:
             setup['SUVdataset']='SUVanalysis_liver'
             liverOutputs=thresholdAnalysis(setup,r,pet,seg,liverThreshold)
-            uploadData(setup,r,liverOutputs)
+            if doUpload:
+                uploadData(setup,r,liverOutputs)
 
         #also for threshold=1.5
         if not liver1p5Done:
             setup['SUVdataset']='SUVanalysis_liver1p5'
             liver1p5Outputs=thresholdAnalysis(setup,r,pet,seg,liver1p5Threshold)
-            uploadData(setup,r,liver1p5Outputs)
+            if doUpload:
+                uploadData(setup,r,liver1p5Outputs)
 
         if not suvMaxDone:
             setup['SUVdataset']='SUVanalysis_SUVmax'
             suvMaxOutputs=thresholdAnalysis(setup,r,pet,seg,suvMaxThreshold)
-            uploadData(setup,r,suvMaxOutputs)
+            if doUpload:
+                uploadData(setup,r,suvMaxOutputs)
 
         #skip threshold of 4
         doThreshold4=False
@@ -141,9 +159,6 @@ def main(parFile='../templates/statistics.json'):
             uploadData(setup,r,outputs4)
 
         
-        #outputs=getValues(setup,users,r,pet)
-        #uploadData(setup,r,outputs)
-
         #cleanup
         os.remove(localPath)
 
@@ -151,27 +166,88 @@ def main(parFile='../templates/statistics.json'):
             os.remove(segPaths[x])
 
 
+def loadGlobals(setup,r,seg):
+    matchingVisits={
+            'VISIT_1':['VISIT_1','VISIT_11'],
+            'VISIT_11':['VISIT_1','VISIT_11'],
+            'VISIT_2':['VISIT_2','VISIT_12'],
+            'VISIT_12':['VISIT_2','VISIT_12'],
+            'VISIT_3':['VISIT_3','VISIT_13'],
+            'VISIT_13':['VISIT_3','VISIT_13'],
+            'VISIT_4':['VISIT_4','VISIT_14'],
+            'VISIT_14':['VISIT_4','VISIT_14']}
+    ids=[1,2,3,4,5,6]
+    p=setup['project']
+    d=setup['SUVdataset']
+    idVar='ParticipantId'
+    vCodes=';'.join(matchingVisits[r['visitCode']])
+    vars=['MTV','TLG','SUVmean','SUVmax','SUVSD']
+    outputs={}
+    for x in seg:
+        outputs[x]={}
+        for i in ids:
+            qFilter=[]
+            qFilter.append({'variable':idVar,'value':r[idVar],'oper':'eq'})
+            qFilter.append({'variable':'visitCode','value':vCodes,'oper':'in'})
+            qFilter.append({'variable':'User','value':f'{x}','oper':'eq'})
+            qFilter.append({'variable':'segment','value':f'{i}','oper':'eq'})
+            ds=setup['db'].selectRows(p,'study',d,qFilter)
+            outputs[x][i]={v:combineData(ds['rows'],v) for v in vars}
+    return outputs
+
+def combineData(rows,x):
+    dt=numpy.array([float(r[x] or 0) for r in rows])
+    #print('Combine data[{}] {}'.format(x,dt))
+    r=0
+    if x=='MTV' or x=='TLG':
+        r=numpy.sum(dt)
+    if x=='SUVmean':
+        m=combineData(rows,'MTV')
+        if m==0:
+            r=0
+        else:
+            r=combineData(rows,'TLG')/m
+    if x=='SUVmax':
+        try:
+            r=numpy.max(dt)
+        except ValueError:
+            #empty array
+            r=0
+    if x=='SUVSD':
+        #this should be probably be done right, 
+        #here I only want it to work also if one of the components is 0
+        r=numpy.sqrt(numpy.sum(dt*dt))
+    #print(f'Result: {r}')
+    return r
+    
 def thresholdAnalysis(setup,r,pet,seg,thrs):
-    #thresholds thrs are by participant and region
+    #thresholds thrs are by segmentation author
     outputs={}
     for s in thrs:
+        t=thrs[s]
+        print(f'Thr[{s}]: {t}')
         #update radiomics setting
         setup['radiomics']['setting']['resegmentRange']=[thrs[s]]
         setup['radiomics']['setting']['resegmentShape']=True
         outputs[s]=getValuesForSegmentation(setup,r,pet,seg[s])
             
         _=[outputs[s][y].update({'threshold':thrs[s]}) for y in outputs[s]]
+    print(outputs)
     return outputs
 
 
 def getValues(setup,row,pet,seg):
+    #seg is a list of segments
 
     rFile='radiomics.json'
+    #short function names
+    featureExtractor=radiomics.featureextractor.RadiomicsFeatureExtractor
+    getStats=statUtils.getRadiomicsComponentStats
 
     with open(rFile,'w') as f:
         f.write(json.dumps(setup['radiomics']))
 
-    setup['featureExtractor']=radiomics.featureextractor.RadiomicsFeatureExtractor(rFile)
+    setup['featureExtractor']=featureExtractor(rFile)
 
 
     #find labels associated with each (non-overlaping) segmentation
@@ -182,7 +258,7 @@ def getValues(setup,row,pet,seg):
         for id in ids:
             print('{} {}'.format(id,ids[id]))
             try:
-                output=statUtils.getRadiomicsComponentStats(setup,pet,seg[x],ids[id])
+                output=getStats(setup,pet,seg[x],ids[id])
             except ValueError:
                 continue
             outputs[x][ids[id]]=output
@@ -192,12 +268,16 @@ def getValues(setup,row,pet,seg):
 
 def getValuesForSegmentation(setup,row,pet,seg):
 
+    #short function names
+    featureExtractor=radiomics.featureextractor.RadiomicsFeatureExtractor
+    getStats=statUtils.getRadiomicsComponentStats
+
     rFile='radiomics.json'
 
     with open(rFile,'w') as f:
         f.write(json.dumps(setup['radiomics']))
 
-    setup['featureExtractor']=radiomics.featureextractor.RadiomicsFeatureExtractor(rFile)
+    setup['featureExtractor']=featureExtractor(rFile)
 
 
     #find labels associated with each (non-overlaping) segmentation
@@ -207,7 +287,7 @@ def getValuesForSegmentation(setup,row,pet,seg):
     for id in ids:
         print('{} {}'.format(id,ids[id]))
         try:
-            output=statUtils.getRadiomicsComponentStats(setup,pet,seg,ids[id])
+            output=getStats(setup,pet,seg,ids[id])
         except ValueError:
             continue
         outputs[ids[id]]=output
@@ -218,21 +298,26 @@ def getValuesForSegmentation(setup,row,pet,seg):
 
 def uploadData(setup,r,outputs):
     baseVar=['ParticipantId','SequenceNum','patientCode','visitCode']
+    p=setup['project']
+    d=setup['SUVdataset']
     for x in outputs:
         for s in outputs[x]:
             output=outputs[x][s]
             output.update({x:r[x] for x in baseVar})
             output['User']=x
             output['segment']=s
-            statUtils.updateDatasetRows(setup['db'],setup['project'],setup['SUVdataset'],[output])
+            statUtils.updateDatasetRows(setup['db'],p,d,[output])
     
 def checkData(setup,r):
+    p=setup['project']
+    d=setup['SUVdataset']
     qFilter=[]
-    qFilter.append({'variable':'ParticipantId','value':r['ParticipantId'],'oper':'eq'})
+    idVar='ParticipantId'
+    qFilter.append({'variable':idVar,'value':r[idVar],'oper':'eq'})
     qFilter.append({'variable':'visitCode','value':r['visitCode'],'oper':'eq'})
-    ds=setup['db'].selectRows(setup['project'],'study',setup['SUVdataset'],qFilter)
+    ds=setup['db'].selectRows(p,'study',d,qFilter)
     n=len(ds['rows'])
-    print('[{}:{}/{}] got {} rows.'.format(setup['SUVdataset'],r['ParticipantId'],r['visitCode'],n))
+    print('[{}:{}/{}] got {} rows.'.format(d,r[idVar],r['visitCode'],n))
     return n>0