Browse Source

Adding anonymization routine

NIX User 5 months ago
parent
commit
a055c84b39

+ 123 - 0
pythonScripts/anonymize.py

@@ -0,0 +1,123 @@
+import os
+import json
+import re
+import sys
+
+#nothing gets done if you do import
+
+def getPatientLabel(row,participantField='PatientId'):
+    return row[participantField].replace('/','_') 
+
+def getVisitLabel(row):
+    return 'VISIT_'+str(int(row['SequenceNum']))
+
+def getStudyLabel(row,participantField='PatientId'):
+    return getPatientLabel(row,participantField)+'-'+getVisitLabel(row)
+
+def main(parameterFile):
+    fhome=os.path.expanduser('~')
+    with open(os.path.join(fhome,".labkey","setup.json")) as f:
+        setup=json.load(f)
+
+    sys.path.insert(0,setup["paths"]["nixWrapper"])
+    import nixWrapper
+
+    nixWrapper.loadLibrary("labkeyInterface")
+
+    import labkeyInterface
+    import labkeyDatabaseBrowser
+    import labkeyFileBrowser
+
+    fconfig=os.path.join(fhome,'.labkey','onko-nix.json')
+
+    net=labkeyInterface.labkeyInterface()
+    net.init(fconfig)
+    db=labkeyDatabaseBrowser.labkeyDB(net)
+    fb=labkeyFileBrowser.labkeyFileBrowser(net)
+
+    with open(parameterFile) as f:
+        pars=json.load(f)
+
+    hi=0
+    project=pars['Database']['project']
+    dataset=pars['Database']['queryName']
+    schema=pars['Database']['schemaName']
+
+    tempBase=os.path.join('/','data','nixUser','RIS')
+    if not os.path.isdir(tempBase):
+        os.mkdir(tempBase)
+
+
+    participantField=pars['Database']['participantField']
+    segmentation=pars['Database']['segementationQuery']
+
+    #all images from database
+    visitFilter={'variable':'visitCode','value':'VISIT_1','oper':'eq'}
+    iodineFilter={'variable':'iodineContrast','value':'0','oper':'eq'}
+
+    ds=db.selectRows(project,schema,dataset,[visitFilter,iodineFilter])
+    #imageSelector={"CT":"CT_orthancId","PET":"PETWB_orthancId"}
+    #output
+    imageResampledField={"CT":"ctResampled","PET":"petResampled","patientmask":"ROImask"}
+
+    #use webdav to transfer file (even though it is localhost)
+
+    i=0
+
+    n=len(ds['rows'])
+    keys=[r[participantField] for r in ds['rows']]
+    perm=numpy.random.permutation(n)
+    pseudo={keys[i]:perm[i] for i in range(n)}
+    
+    for row in ds["rows"]:
+        print("Starting row id:{} seq:{}".format(row[participantField],row['SequenceNum']))
+        #interesting files are processedDir/studyName_CT_notCropped_2mmVoxel.nii
+        #asn processedDir/studyName_PET_notCropped_2mmVoxel.nii
+
+        idFilter={'variable':participantField,'value':row[participantField],'oper':'eq'}
+        segFilter={'variable':'SequenceNum','value':'{}'.format(row['SequenceNum']),'oper':'eq'}
+        ds=db.selectRows(project,schema,segmentation,[idFilter,segFilter])
+        nS=len(ds['rows'])
+        if nS==0:
+            print('No segmentation found')
+            continue
+        if nS>1:
+            print('Multiple segmentations found')
+            continue
+        maskField={'mask':'/'.join('Segmentations',ds['rows'][0]['latestFile'])}
+        imageResampleField.update(maskField)
+        
+    
+        #build/check remote directory structure
+        remoteDir=fb.buildPathURL(project,['preprocessedImages',\
+            getPatientLabel(row,participantField),getVisitLabel(row)])
+
+        gzRemoteFiles={x:'/'.join(remoteDir,row[imageResampleField[x]]) for x in imageResampleField}
+        
+        for f in gzRemoteFiles.values():
+            print("[{}]: [{}]".format(f,fb.entryExists(f)))
+        localDir='patient{:03d}'.format(pseudo[row[participantField]])
+        localDir=os.path.join(tempBase,localDir)
+        if not os.path.isdir(localDir):
+            os.mkdir(localDir)
+        localFiles={x:os.path.join(localDir,'{}.nii.gz'.format(x)) for x in gzRemoteFiles}
+
+        
+        if not all(remoteFilePresent):
+            print('Missing remote files')
+            continue
+    
+        continue
+
+        _=[fb.readFileToFile(gzRemoteFiles[x],localFiles[x]) for x in localFiles]
+
+        if i==0:
+            break
+        i=i+1
+
+    print("Done")
+
+
+if __name__ == '__main__':
+    main(sys.argv[1])
+

+ 4 - 12
pythonScripts/populateImagingFromTransferList.py

@@ -141,14 +141,6 @@ def main(parameterFile):
         rowsPET=[r for r in rowsMatch if r['seriesDescription']=='PET WB']
         print('entry[{}/{}] rowsPET: {}'.format(im[dbParticipantField],seqNum,rowsPET))
         
-        dataRow={}
-        if len(rowsCT)==1:
-            dataRow=rowsCT[0]
-        if len(dataRow)==0:
-            if len(rowsPET)==1:
-                dataRows=rowsPET[0]
-        
-
         #deal with erroneous outcomes (ie- no CT, more then 1 CT, no PET, more than 1 PET)
         if len(rowsPET)!=1 or len(rowsCT)!=1:
             mode='insert'
@@ -190,8 +182,6 @@ def main(parameterFile):
         
             outRow[dbParticipantField]=im[dbParticipantField]
             outRow['SequenceNum']=seqNum
-            if len(dataRow)==1:
-                outRow['dicomStudy']=dataRow['dicomStudy']
         
         else:
             #never happens if we check for sd1 before matches are found
@@ -199,12 +189,14 @@ def main(parameterFile):
         
         if len(rowsPET)==1:    
             outRow['PETWB_orthancId']=rowsPET[0]['orthancSeries']
+            outRow['studyDate']=rowsPET[0]['studyDate']
         if len(rowsCT)==1:
             outRow['CT_orthancId']=rowsCT[0]['orthancSeries']
-        if len(dataRow)==1:
-            outRow['studyDate']=dataRow['studyDate']
+            outRow['studyDate']=rowsCT[0]['studyDate']
+
         outRow['imagingVisitId']=im['imagingVisitId']
         outRow['visitCode']='VISIT_'+str(im['imagingVisitId'])
+        outRow['patientCode']=re.sub('/','_',im[dbParticipantField])
 
         modifyStatus=db.modifyRows(mode,projectStudy,outputSchema,\
                 outputQuery,[outRow])

+ 2 - 1
templates/preprocessLimfomiPET.json

@@ -12,6 +12,7 @@
  	 "participantField":"ParticipantId",
 	"transferQuery":"imageTransferReport",
 	"missingImagesQuery":"missingImages",
-	"missingImagesSchema":"lists"
+	"missingImagesSchema":"lists",
+	"segmentationQuery":"Segmentations"
 	}
 }