2 Commits 7a794a39d6 ... f9f17be78d

Autore SHA1 Messaggio Data
  Andrej Studen f9f17be78d Correcting for missing liver segmentation in threshold calculation when liver and disease are in different PET files 11 mesi fa
  Andrej Studen 495c4bbe80 Updates to anonymization routine 11 mesi fa
2 ha cambiato i file con 168 aggiunte e 63 eliminazioni
  1. 41 21
      pythonScripts/anonymize.py
  2. 127 42
      pythonScripts/runStat.py

+ 41 - 21
pythonScripts/anonymize.py

@@ -2,6 +2,8 @@ import os
 import json
 import re
 import sys
+import numpy
+import shutil
 
 #nothing gets done if you do import
 
@@ -43,22 +45,31 @@ def main(parameterFile):
     dataset=pars['Database']['queryName']
     schema=pars['Database']['schemaName']
 
-    tempBase=os.path.join('/','data','nixUser','RIS')
+    #tempBase=os.path.join('/','data','nixUser','RIS')
+    tempBase=os.path.join(fhome,'temp','RIS')
     if not os.path.isdir(tempBase):
         os.mkdir(tempBase)
 
 
     participantField=pars['Database']['participantField']
-    segmentation=pars['Database']['segementationQuery']
+    segmentation=pars['Database']['segmentationQuery']
 
     #all images from database
-    visitFilter={'variable':'visitCode','value':'VISIT_1','oper':'eq'}
-    iodineFilter={'variable':'iodineContrast','value':'0','oper':'eq'}
+    visitFilter={'variable':'visitCode','value':'VISIT_2','oper':'eq'}
+    iodineFilter={'variable':'iodineContrast','value':'1','oper':'neq'}
+    #for VISIT_1, also apply iodineFilter
+    #qFilter=[visitFilter,iodineFilter]
+    #for VISIT_2, iodineFilter has no meaning (shuld be false or blank, but or is hard to do)
+    qFilter=[visitFilter]
 
-    ds=db.selectRows(project,schema,dataset,[visitFilter,iodineFilter])
+    #shift generated patient names
+    offset=100
+
+    ds=db.selectRows(project,schema,dataset,qFilter)
     #imageSelector={"CT":"CT_orthancId","PET":"PETWB_orthancId"}
     #output
-    imageResampledField={"CT":"ctResampled","PET":"petResampled","patientmask":"ROImask"}
+    imageResampledField={"CT":"ctResampled","PET":"petResampled"}
+    #,"patientmask":"ROImask"}
 
     #use webdav to transfer file (even though it is localhost)
 
@@ -67,6 +78,7 @@ def main(parameterFile):
     n=len(ds['rows'])
     keys=[r[participantField] for r in ds['rows']]
     perm=numpy.random.permutation(n)
+    perm+=offset
     pseudo={keys[i]:perm[i] for i in range(n)}
     
     for row in ds["rows"]:
@@ -76,7 +88,9 @@ def main(parameterFile):
 
         idFilter={'variable':participantField,'value':row[participantField],'oper':'eq'}
         segFilter={'variable':'SequenceNum','value':'{}'.format(row['SequenceNum']),'oper':'eq'}
-        ds=db.selectRows(project,schema,segmentation,[idFilter,segFilter])
+        #adoma
+        userFilter={'variable':'User','value':'1037','oper':'eq'}
+        ds=db.selectRows(project,schema,segmentation,[idFilter,segFilter,userFilter])
         nS=len(ds['rows'])
         if nS==0:
             print('No segmentation found')
@@ -84,34 +98,40 @@ def main(parameterFile):
         if nS>1:
             print('Multiple segmentations found')
             continue
-        maskField={'mask':'/'.join('Segmentations',ds['rows'][0]['latestFile'])}
-        imageResampleField.update(maskField)
-        
+        maskFile={'mask':'/'.join(['Segmentations',ds['rows'][0]['latestFile']])}
     
         #build/check remote directory structure
         remoteDir=fb.buildPathURL(project,['preprocessedImages',\
             getPatientLabel(row,participantField),getVisitLabel(row)])
-
-        gzRemoteFiles={x:'/'.join(remoteDir,row[imageResampleField[x]]) for x in imageResampleField}
         
-        for f in gzRemoteFiles.values():
+        remoteFiles={x:row[imageResampledField[x]] for x in imageResampledField}
+        remoteFiles.update(maskFile)
+        remoteFiles={x:'/'.join([remoteDir,remoteFiles[x]]) for x in remoteFiles}
+        for f in remoteFiles.values():
             print("[{}]: [{}]".format(f,fb.entryExists(f)))
-        localDir='patient{:03d}'.format(pseudo[row[participantField]])
-        localDir=os.path.join(tempBase,localDir)
+        patientALabel='patient{:03d}'.format(pseudo[row[participantField]])
+        localDir=os.path.join(tempBase,patientALabel)
         if not os.path.isdir(localDir):
             os.mkdir(localDir)
-        localFiles={x:os.path.join(localDir,'{}.nii.gz'.format(x)) for x in gzRemoteFiles}
-
+        fileNames={x:'{}.nii.gz'.format(x) for x in remoteFiles}
+        fileNames['mask']=fileNames['mask'].replace('nii.gz','nrrd')
+        localFiles={x:os.path.join(localDir,fileNames[x]) for x in fileNames}
         
-        if not all(remoteFilePresent):
+        remoteFilesPresent={x:fb.entryExists(remoteFiles[x]) for x in remoteFiles}
+        if not all(remoteFilesPresent):
             print('Missing remote files')
             continue
     
-        continue
 
-        _=[fb.readFileToFile(gzRemoteFiles[x],localFiles[x]) for x in localFiles]
+        _=[fb.readFileToFile(remoteFiles[x],localFiles[x]) for x in localFiles]
+
+        remoteADir=fb.buildPathURL(project,['anonymized',patientALabel])
+        remoteAFiles={x:'/'.join([remoteADir,fileNames[x]]) for x in fileNames}
+        _=[fb.writeFileToFile(localFiles[x],remoteAFiles[x]) for x in remoteAFiles]
+
+        shutil.rmtree(localDir)
 
-        if i==0:
+        if i==-1:
             break
         i=i+1
 

+ 127 - 42
pythonScripts/runStat.py

@@ -7,14 +7,26 @@ import json
 import numpy
 
 def main(parFile='../templates/statistics.json'):
+
+
+    featureExtractor=radiomics.featureextractor.RadiomicsFeatureExtractor
+ 
     setup=statUtils.loadSetup(parFile)
     rFile='radiomics.json'
 
+    forceLiver=setup.get('forceLiver',False)
+    forceSUVMax=setup.get('forceSUVMax',False)
+    forceLiver15=setup.get('forceLiver15',False)
+    forceGlobal=setup.get('forceGlobal',False)
+
+    doUpload=setup.get('doUpload',True)
+
     #update threshold values if needed
     with open(rFile,'w') as f:
         f.write(json.dumps(setup['radiomics']))
     setup['db'],setup['fb']=statUtils.connectDB('onko-nix')
     users=statUtils.getUsers(setup['db'],setup['project'])
+    p=setup['project']
     qFilter=[]
     try:
         vList=';'.join(setup['participants'])    
@@ -28,14 +40,18 @@ def main(parFile='../templates/statistics.json'):
     except KeyError:
         pass
 
-    ds=setup['db'].selectRows(setup['project'],'study',setup['imagingDataset'],qFilter)
+    ds=setup['db'].selectRows(p,'study',setup['imagingDataset'],qFilter)
     if not os.path.isdir(setup['localDir']):
         os.mkdir(setup['localDir'])
     #select just the first row; debugging
     rows=ds['rows']
     setup['values']=['COM','MTV','TLG','SUVmean','SUVmax','voxelCount','SUVSD']
-    #params=os.path.join('..','templates','radiomics.yaml')
-    setup['featureExtractor']=radiomics.featureextractor.RadiomicsFeatureExtractor(rFile)
+    setup['featureExtractor']=featureExtractor(rFile)
+
+    #make sure we calculate Variance
+    firstOrder=setup['radiomics']['featureClass']['firstorder']
+    if 'Variance' not in firstOrder:
+        firstOrder.append('Variance')
 
     n=setup.get('n',-1)
     if n>0:
@@ -43,22 +59,26 @@ def main(parFile='../templates/statistics.json'):
 
     for r in rows:
 
+        setup['SUVdataset']='SUVanalysis'
+        globalDone=not forceGlobal and checkData(setup,r)
+
         #check if we have to do calculation
         setup['SUVdataset']='SUVanalysis_liver'
-        liverDone=checkData(setup,r)
+        liverDone=not forceLiver and checkData(setup,r)
 
         setup['SUVdataset']='SUVanalysis_SUVmax'
-        suvMaxDone=checkData(setup,r) 
+        suvMaxDone=not forceSUVMax and checkData(setup,r) 
 
         setup['SUVdataset']='SUVanalysis_liver1p5'
-        liver1p5Done=checkData(setup,r)
+        liver1p5Done=not forceLiver15 and checkData(setup,r)
 
-        
-        if liverDone and suvMaxDone and liver1p5Done:
+        doneCode=f'({globalDone}/{liverDone}/{liver1p5Done}/{suvMaxDone})'
+        print(f'Done: (global/liver/liver1p5/suvMax): {doneCode}')
+
+        if globalDone and liverDone and suvMaxDone and liver1p5Done:
             print('Skipping {} {}'.format(r['ParticipantId'],r['visitCode']))
             continue
-        doneCode=f'({liverDone}/{liver1p5Done}/{suvMaxDone})'
-        print(f'Done: (liver/liver1p5/suvMax): {doneCode}')
+
         #PET
         for q in ['petResampled']:
             localPath=statUtils.getImage(setup,r,q)
@@ -78,23 +98,18 @@ def main(parFile='../templates/statistics.json'):
         seg={x:SimpleITK.ReadImage(segPaths[x]) for x in segPaths}
 
 
-        try:
-            thr=setup['radiomics']['setting']['resegmentRange'][0]
-        except KeyError:
-            thr=None
-
-        setup['radiomics']['setting']['resegmentRange']=None        
-        firstOrder=setup['radiomics']['featureClass']['firstorder']
-        if 'Variance' not in firstOrder:
-            firstOrder.append('Variance')
-        #get value for maximum in organs or liver mean and std
-        outputs=getValues(setup,r,pet,seg)
-        setup['SUVdataset']='SUVanalysis'
-        #uploadData(setup,r,outputs)
+        globalThreshold={x:0 for x in seg}
+        if not globalDone:
+            #get value for maximum in organs or liver mean and std
+            setup['SUVdataset']='SUVanalysis'
+            globalOutputs=thresholdAnalysis(setup,r,pet,seg,globalThreshold)
+            if doUpload:
+                uploadData(setup,r,globalOutputs)
 
-        print(outputs)
-        
         default={'SUVmax':0,'SUVmean':0,'SUVSD':0}
+        setup['SUVdataset']='SUVanalysis'
+        outputs=loadGlobals(setup,r,seg)
+
         #liver threshold
         liverId=1
         liverThreshold={x:outputs[x].get(liverId,default)['SUVmean']
@@ -110,25 +125,28 @@ def main(parFile='../templates/statistics.json'):
                 for x in outputs}
         suvMaxThreshold={x:0.41*suvMax[x] for x in suvMax}
     
-
-        print('thr[liver]={} thr[liver/1.5]={} thr(suvmax)={}'.format(liverThreshold,liver1p5Threshold,suvMaxThreshold))
+        ftStr='thr[liver]={} thr[liver/1.5]={} thr(suvmax)={}'
+        print(ftStr.format(liverThreshold,liver1p5Threshold,suvMaxThreshold))
        
 
         if not liverDone:
             setup['SUVdataset']='SUVanalysis_liver'
             liverOutputs=thresholdAnalysis(setup,r,pet,seg,liverThreshold)
-            uploadData(setup,r,liverOutputs)
+            if doUpload:
+                uploadData(setup,r,liverOutputs)
 
         #also for threshold=1.5
         if not liver1p5Done:
             setup['SUVdataset']='SUVanalysis_liver1p5'
             liver1p5Outputs=thresholdAnalysis(setup,r,pet,seg,liver1p5Threshold)
-            uploadData(setup,r,liver1p5Outputs)
+            if doUpload:
+                uploadData(setup,r,liver1p5Outputs)
 
         if not suvMaxDone:
             setup['SUVdataset']='SUVanalysis_SUVmax'
             suvMaxOutputs=thresholdAnalysis(setup,r,pet,seg,suvMaxThreshold)
-            uploadData(setup,r,suvMaxOutputs)
+            if doUpload:
+                uploadData(setup,r,suvMaxOutputs)
 
         #skip threshold of 4
         doThreshold4=False
@@ -141,9 +159,6 @@ def main(parFile='../templates/statistics.json'):
             uploadData(setup,r,outputs4)
 
         
-        #outputs=getValues(setup,users,r,pet)
-        #uploadData(setup,r,outputs)
-
         #cleanup
         os.remove(localPath)
 
@@ -151,27 +166,88 @@ def main(parFile='../templates/statistics.json'):
             os.remove(segPaths[x])
 
 
+def loadGlobals(setup,r,seg):
+    matchingVisits={
+            'VISIT_1':['VISIT_1','VISIT_11'],
+            'VISIT_11':['VISIT_1','VISIT_11'],
+            'VISIT_2':['VISIT_2','VISIT_12'],
+            'VISIT_12':['VISIT_2','VISIT_12'],
+            'VISIT_3':['VISIT_3','VISIT_13'],
+            'VISIT_13':['VISIT_3','VISIT_13'],
+            'VISIT_4':['VISIT_4','VISIT_14'],
+            'VISIT_14':['VISIT_4','VISIT_14']}
+    ids=[1,2,3,4,5,6]
+    p=setup['project']
+    d=setup['SUVdataset']
+    idVar='ParticipantId'
+    vCodes=';'.join(matchingVisits[r['visitCode']])
+    vars=['MTV','TLG','SUVmean','SUVmax','SUVSD']
+    outputs={}
+    for x in seg:
+        outputs[x]={}
+        for i in ids:
+            qFilter=[]
+            qFilter.append({'variable':idVar,'value':r[idVar],'oper':'eq'})
+            qFilter.append({'variable':'visitCode','value':vCodes,'oper':'in'})
+            qFilter.append({'variable':'User','value':f'{x}','oper':'eq'})
+            qFilter.append({'variable':'segment','value':f'{i}','oper':'eq'})
+            ds=setup['db'].selectRows(p,'study',d,qFilter)
+            outputs[x][i]={v:combineData(ds['rows'],v) for v in vars}
+    return outputs
+
+def combineData(rows,x):
+    dt=numpy.array([float(r[x] or 0) for r in rows])
+    #print('Combine data[{}] {}'.format(x,dt))
+    r=0
+    if x=='MTV' or x=='TLG':
+        r=numpy.sum(dt)
+    if x=='SUVmean':
+        m=combineData(rows,'MTV')
+        if m==0:
+            r=0
+        else:
+            r=combineData(rows,'TLG')/m
+    if x=='SUVmax':
+        try:
+            r=numpy.max(dt)
+        except ValueError:
+            #empty array
+            r=0
+    if x=='SUVSD':
+        #this should be probably be done right, 
+        #here I only want it to work also if one of the components is 0
+        r=numpy.sqrt(numpy.sum(dt*dt))
+    #print(f'Result: {r}')
+    return r
+    
 def thresholdAnalysis(setup,r,pet,seg,thrs):
-    #thresholds thrs are by participant and region
+    #thresholds thrs are by segmentation author
     outputs={}
     for s in thrs:
+        t=thrs[s]
+        print(f'Thr[{s}]: {t}')
         #update radiomics setting
         setup['radiomics']['setting']['resegmentRange']=[thrs[s]]
         setup['radiomics']['setting']['resegmentShape']=True
         outputs[s]=getValuesForSegmentation(setup,r,pet,seg[s])
             
         _=[outputs[s][y].update({'threshold':thrs[s]}) for y in outputs[s]]
+    print(outputs)
     return outputs
 
 
 def getValues(setup,row,pet,seg):
+    #seg is a list of segments
 
     rFile='radiomics.json'
+    #short function names
+    featureExtractor=radiomics.featureextractor.RadiomicsFeatureExtractor
+    getStats=statUtils.getRadiomicsComponentStats
 
     with open(rFile,'w') as f:
         f.write(json.dumps(setup['radiomics']))
 
-    setup['featureExtractor']=radiomics.featureextractor.RadiomicsFeatureExtractor(rFile)
+    setup['featureExtractor']=featureExtractor(rFile)
 
 
     #find labels associated with each (non-overlaping) segmentation
@@ -182,7 +258,7 @@ def getValues(setup,row,pet,seg):
         for id in ids:
             print('{} {}'.format(id,ids[id]))
             try:
-                output=statUtils.getRadiomicsComponentStats(setup,pet,seg[x],ids[id])
+                output=getStats(setup,pet,seg[x],ids[id])
             except ValueError:
                 continue
             outputs[x][ids[id]]=output
@@ -192,12 +268,16 @@ def getValues(setup,row,pet,seg):
 
 def getValuesForSegmentation(setup,row,pet,seg):
 
+    #short function names
+    featureExtractor=radiomics.featureextractor.RadiomicsFeatureExtractor
+    getStats=statUtils.getRadiomicsComponentStats
+
     rFile='radiomics.json'
 
     with open(rFile,'w') as f:
         f.write(json.dumps(setup['radiomics']))
 
-    setup['featureExtractor']=radiomics.featureextractor.RadiomicsFeatureExtractor(rFile)
+    setup['featureExtractor']=featureExtractor(rFile)
 
 
     #find labels associated with each (non-overlaping) segmentation
@@ -207,7 +287,7 @@ def getValuesForSegmentation(setup,row,pet,seg):
     for id in ids:
         print('{} {}'.format(id,ids[id]))
         try:
-            output=statUtils.getRadiomicsComponentStats(setup,pet,seg,ids[id])
+            output=getStats(setup,pet,seg,ids[id])
         except ValueError:
             continue
         outputs[ids[id]]=output
@@ -218,21 +298,26 @@ def getValuesForSegmentation(setup,row,pet,seg):
 
 def uploadData(setup,r,outputs):
     baseVar=['ParticipantId','SequenceNum','patientCode','visitCode']
+    p=setup['project']
+    d=setup['SUVdataset']
     for x in outputs:
         for s in outputs[x]:
             output=outputs[x][s]
             output.update({x:r[x] for x in baseVar})
             output['User']=x
             output['segment']=s
-            statUtils.updateDatasetRows(setup['db'],setup['project'],setup['SUVdataset'],[output])
+            statUtils.updateDatasetRows(setup['db'],p,d,[output])
     
 def checkData(setup,r):
+    p=setup['project']
+    d=setup['SUVdataset']
     qFilter=[]
-    qFilter.append({'variable':'ParticipantId','value':r['ParticipantId'],'oper':'eq'})
+    idVar='ParticipantId'
+    qFilter.append({'variable':idVar,'value':r[idVar],'oper':'eq'})
     qFilter.append({'variable':'visitCode','value':r['visitCode'],'oper':'eq'})
-    ds=setup['db'].selectRows(setup['project'],'study',setup['SUVdataset'],qFilter)
+    ds=setup['db'].selectRows(p,'study',d,qFilter)
     n=len(ds['rows'])
-    print('[{}:{}/{}] got {} rows.'.format(setup['SUVdataset'],r['ParticipantId'],r['visitCode'],n))
+    print('[{}:{}/{}] got {} rows.'.format(d,r[idVar],r['visitCode'],n))
     return n>0