Bläddra i källkod

Updates to anonymization routine

Andrej Studen 1 månad sedan
förälder
incheckning
495c4bbe80
1 ändrade filer med 41 tillägg och 21 borttagningar
  1. 41 21
      pythonScripts/anonymize.py

+ 41 - 21
pythonScripts/anonymize.py

@@ -2,6 +2,8 @@ import os
 import json
 import re
 import sys
+import numpy
+import shutil
 
 #nothing gets done if you do import
 
@@ -43,22 +45,31 @@ def main(parameterFile):
     dataset=pars['Database']['queryName']
     schema=pars['Database']['schemaName']
 
-    tempBase=os.path.join('/','data','nixUser','RIS')
+    #tempBase=os.path.join('/','data','nixUser','RIS')
+    tempBase=os.path.join(fhome,'temp','RIS')
     if not os.path.isdir(tempBase):
         os.mkdir(tempBase)
 
 
     participantField=pars['Database']['participantField']
-    segmentation=pars['Database']['segementationQuery']
+    segmentation=pars['Database']['segmentationQuery']
 
     #all images from database
-    visitFilter={'variable':'visitCode','value':'VISIT_1','oper':'eq'}
-    iodineFilter={'variable':'iodineContrast','value':'0','oper':'eq'}
+    visitFilter={'variable':'visitCode','value':'VISIT_2','oper':'eq'}
+    iodineFilter={'variable':'iodineContrast','value':'1','oper':'neq'}
+    #for VISIT_1, also apply iodineFilter
+    #qFilter=[visitFilter,iodineFilter]
+    #for VISIT_2, iodineFilter has no meaning (shuld be false or blank, but or is hard to do)
+    qFilter=[visitFilter]
 
-    ds=db.selectRows(project,schema,dataset,[visitFilter,iodineFilter])
+    #shift generated patient names
+    offset=100
+
+    ds=db.selectRows(project,schema,dataset,qFilter)
     #imageSelector={"CT":"CT_orthancId","PET":"PETWB_orthancId"}
     #output
-    imageResampledField={"CT":"ctResampled","PET":"petResampled","patientmask":"ROImask"}
+    imageResampledField={"CT":"ctResampled","PET":"petResampled"}
+    #,"patientmask":"ROImask"}
 
     #use webdav to transfer file (even though it is localhost)
 
@@ -67,6 +78,7 @@ def main(parameterFile):
     n=len(ds['rows'])
     keys=[r[participantField] for r in ds['rows']]
     perm=numpy.random.permutation(n)
+    perm+=offset
     pseudo={keys[i]:perm[i] for i in range(n)}
     
     for row in ds["rows"]:
@@ -76,7 +88,9 @@ def main(parameterFile):
 
         idFilter={'variable':participantField,'value':row[participantField],'oper':'eq'}
         segFilter={'variable':'SequenceNum','value':'{}'.format(row['SequenceNum']),'oper':'eq'}
-        ds=db.selectRows(project,schema,segmentation,[idFilter,segFilter])
+        #adoma
+        userFilter={'variable':'User','value':'1037','oper':'eq'}
+        ds=db.selectRows(project,schema,segmentation,[idFilter,segFilter,userFilter])
         nS=len(ds['rows'])
         if nS==0:
             print('No segmentation found')
@@ -84,34 +98,40 @@ def main(parameterFile):
         if nS>1:
             print('Multiple segmentations found')
             continue
-        maskField={'mask':'/'.join('Segmentations',ds['rows'][0]['latestFile'])}
-        imageResampleField.update(maskField)
-        
+        maskFile={'mask':'/'.join(['Segmentations',ds['rows'][0]['latestFile']])}
     
         #build/check remote directory structure
         remoteDir=fb.buildPathURL(project,['preprocessedImages',\
             getPatientLabel(row,participantField),getVisitLabel(row)])
-
-        gzRemoteFiles={x:'/'.join(remoteDir,row[imageResampleField[x]]) for x in imageResampleField}
         
-        for f in gzRemoteFiles.values():
+        remoteFiles={x:row[imageResampledField[x]] for x in imageResampledField}
+        remoteFiles.update(maskFile)
+        remoteFiles={x:'/'.join([remoteDir,remoteFiles[x]]) for x in remoteFiles}
+        for f in remoteFiles.values():
             print("[{}]: [{}]".format(f,fb.entryExists(f)))
-        localDir='patient{:03d}'.format(pseudo[row[participantField]])
-        localDir=os.path.join(tempBase,localDir)
+        patientALabel='patient{:03d}'.format(pseudo[row[participantField]])
+        localDir=os.path.join(tempBase,patientALabel)
         if not os.path.isdir(localDir):
             os.mkdir(localDir)
-        localFiles={x:os.path.join(localDir,'{}.nii.gz'.format(x)) for x in gzRemoteFiles}
-
+        fileNames={x:'{}.nii.gz'.format(x) for x in remoteFiles}
+        fileNames['mask']=fileNames['mask'].replace('nii.gz','nrrd')
+        localFiles={x:os.path.join(localDir,fileNames[x]) for x in fileNames}
         
-        if not all(remoteFilePresent):
+        remoteFilesPresent={x:fb.entryExists(remoteFiles[x]) for x in remoteFiles}
+        if not all(remoteFilesPresent):
             print('Missing remote files')
             continue
     
-        continue
 
-        _=[fb.readFileToFile(gzRemoteFiles[x],localFiles[x]) for x in localFiles]
+        _=[fb.readFileToFile(remoteFiles[x],localFiles[x]) for x in localFiles]
+
+        remoteADir=fb.buildPathURL(project,['anonymized',patientALabel])
+        remoteAFiles={x:'/'.join([remoteADir,fileNames[x]]) for x in fileNames}
+        _=[fb.writeFileToFile(localFiles[x],remoteAFiles[x]) for x in remoteAFiles]
+
+        shutil.rmtree(localDir)
 
-        if i==0:
+        if i==-1:
             break
         i=i+1