Bladeren bron

Initial import (mostly copy of IRAEMM)

NIX User 2 jaren geleden
commit
d6736712d2
3 gewijzigde bestanden met toevoegingen van 493 en 0 verwijderingen
  1. 216 0
      pythonScripts/populateImagingFromTransferList.py
  2. 260 0
      pythonScripts/preprocess.py
  3. 17 0
      templates/preprocessLimfomiPET.json

+ 216 - 0
pythonScripts/populateImagingFromTransferList.py

@@ -0,0 +1,216 @@
+#date sorts studies from orthanc dataset into target study dataset
+
+#takes transferQuery as the list of images that should be available on orthanc
+
+
+import os
+import json
+import re
+import sys
+import datetime
+import re
+
+def main(parameterFile):
+    fhome=os.path.expanduser('~')
+    fsetup=os.path.join(fhome,'.labkey','setup.json')
+    with open(fsetup,'r') as f:
+        setup=json.load(f)
+
+    sys.path.insert(0,setup['paths']['nixWrapper'])
+
+    import nixWrapper
+
+    nixWrapper.loadLibrary("labkeyInterface")
+
+    import labkeyInterface
+    import labkeyDatabaseBrowser
+    import labkeyFileBrowser
+
+    fconfig=os.path.join(fhome,'.labkey','network.json')
+
+    net=labkeyInterface.labkeyInterface()
+    net.init(fconfig)
+    db=labkeyDatabaseBrowser.labkeyDB(net)
+    fb=labkeyFileBrowser.labkeyFileBrowser(net)
+
+    with open(parameterFile,'r') as f:
+        pars=json.load(f)
+
+
+
+    i=0
+    #from orthancDatabase/Imaging dataset
+    projectOrthanc=pars['Orthanc']['project']
+    inputQuery=pars['Orthanc']['queryName']
+    inputSchema=pars['Orthanc']['schemaName']
+    inputParticipantField=pars['Orthanc']['participantField']
+
+    #to target project dataset
+    projectStudy=pars['Database']['project']
+    #'iPNUMMretro/Study'
+    #for prospective, set
+    #projectStudy='IPNUMMprospektiva/Study'
+    outputQuery=pars['Database']['queryName']
+    outputSchema=pars['Database']['schemaName']
+    #select patientId that are contained in the demographics dataset
+    transferQuery=pars['Database']['transferQuery']
+    dbParticipantField=pars['Database']['participantField']
+
+    missingSchema=pars['Database']['missingImagesSchema']
+    missingQuery=pars['Database']['missingImagesQuery']
+
+    #delete all rows from missingQuery (should be rebuilt for every run)
+    dsMissing=db.selectRows(projectStudy,missingSchema,missingQuery,[])
+    db.modifyRows('delete',projectStudy,missingSchema,missingQuery,dsMissing['rows'])
+
+
+    #make a list of images from transferQuery
+    dsImage=db.selectRows(projectStudy,outputSchema,transferQuery,[])
+
+    for im in dsImage['rows']:
+
+
+        #for orthanc
+        inputIdFilter={'variable':inputParticipantField,\
+                'value':im[dbParticipantField],\
+                'oper':'eq'}
+
+        #for database
+        idFilter={'variable':dbParticipantField,\
+                'value':im[dbParticipantField],\
+                'oper':'eq'}
+
+
+        seqNum=im['imagingVisitId']
+        seqFilter={'variable':'SequenceNum','value':str(seqNum),'oper':'eq'}
+
+        
+        #for speedup one should check if a match was already done in Database/queryName
+        #ds1 are the matching outputs in target dataset
+        ds1=db.selectRows(projectStudy,outputSchema,outputQuery,\
+                [idFilter,seqFilter])
+
+        if len(ds1['rows'])>1:
+            #never happens (idFilter and seqFilter)
+            print('ERROR: too many matches in {} for {}/{}'.\
+                    format(outputQuery,im[dbParticipantField],seqNum))
+            continue
+        if len(ds1['rows'])>0:
+            #just the one match, fine
+            print('Entry for {}/{} already resolved'.\
+                    format(im[dbParticipantField],seqNum))
+            
+            entry=ds1['rows'][0]
+            try:
+                if len(entry['PETWB_orthancId'])>0 and len(entry['CT_orthancId'])>0:
+                    continue
+            except:
+                pass
+            print('Debug mode for missing cases')
+
+        #have to convert from datetime to %Y%m%d format
+        #dateFilter={'variable':'imageDate','value':im['imageDate'],'oper':'eq'}
+
+        dsOrthanc=db.selectRows(projectOrthanc,inputSchema,inputQuery,[inputIdFilter])
+
+        #what if dsOrthanc['rows'] is empty?
+        #this is part of QA and is reported in missingImages Schema/Query
+    
+        #convert dates
+        sDates={r['orthancSeries']:datetime.datetime.strptime(r['studyDate'],'%Y/%m/%d %H:%M:%S') 
+                for r in dsOrthanc['rows']}
+
+        sDates={x:sDates[x].strftime('%Y%m%d') for x in sDates}
+
+        #unique dates (for date mismatch)
+        dates=list(set(sDates.values()))
+        
+        #select dates
+        sDatesMatch={x:sDates[x] for x in sDates if sDates[x]==im['imageDate']}
+        
+        #select 
+        rowsMatch=[r for r in dsOrthanc['rows'] if r['orthancSeries'] in list(sDatesMatch.keys())]
+
+        #select CT matches
+        rowsCT=[r for r in rowsMatch if r['seriesDescription'].find('CT WB')==0]
+        #should not include fov
+        rowsCT=[r for r in rowsCT if r['seriesDescription'].find('fov')<0]
+        print('entry[{}/{}] rowsCT : {}'.format(im[dbParticipantField],seqNum,rowsCT))
+        
+        #should be one and one only
+        rowsPET=[r for r in rowsMatch if r['seriesDescription']=='PET WB']
+        print('entry[{}/{}] rowsPET: {}'.format(im[dbParticipantField],seqNum,rowsPET))
+        
+        dataRow={}
+        if len(rowsCT)==1:
+            dataRow=rowsCT[0]
+        if len(dataRow)==0:
+            if len(rowsPET)==1:
+                dataRows=rowsPET[0]
+        
+
+        #deal with erroneous outcomes (ie- no CT, more then 1 CT, no PET, more than 1 PET)
+        if len(rowsPET)!=1 or len(rowsCT)!=1:
+            mode='insert'
+            #copy values en mass
+            vals=[dbParticipantField,'imagingVisitId','imageDate']
+            orow={v:im[v] for v in vals}
+            orow['imageDateMismatch']=','.join(dates)
+            
+            #generate description of failure
+            failDesc=''
+            if len(rowsPET)==0:
+                failDesc+='[MISSSING PET]'
+            if len(rowsPET)>1:
+                failDesc+='[MULTIPLE PET]'
+            if len(rowsCT)==0:
+                failDesc+='[MISSSING CT]'
+            if len(rowsCT)>1:
+                failDesc+='[MULTIPLE CT]'
+            orow['failureDescription']=failDesc
+
+            #generate fail entries
+            db.modifyRows(mode,projectStudy,missingSchema,\
+                    missingQuery,[orow])
+            #don't break, but fill only for singular outcomes
+         
+
+        #figure out which row in output study to update
+        filters=[]
+        print('Participant {} Sequence number {}'.\
+               format(im[dbParticipantField],str(seqNum)))
+        #refresh the matching study list (after CT we have found PET)
+        ds1=db.selectRows(projectStudy,outputSchema,outputQuery,\
+                [idFilter,seqFilter])
+
+        mode='update'
+        outRow={}
+        if len(ds1['rows'])==0:
+            mode='insert'
+        
+            outRow[dbParticipantField]=im[dbParticipantField]
+            outRow['SequenceNum']=seqNum
+            if len(dataRow)==1:
+                outRow['dicomStudy']=dataRow['dicomStudy']
+        
+        else:
+            #never happens if we check for sd1 before matches are found
+            outRow=ds1['rows'][0]
+        
+        if len(rowsPET)==1:    
+            outRow['PETWB_orthancId']=rowsPET[0]['orthancSeries']
+        if len(rowsCT)==1:
+            outRow['CT_orthancId']=rowsCT[0]['orthancSeries']
+        if len(dataRow)==1:
+            outRow['studyDate']=dataRow['studyDate']
+        outRow['imagingVisitId']=im['imagingVisitId']
+        outRow['visitCode']='VISIT_'+str(im['imagingVisitId'])
+
+        modifyStatus=db.modifyRows(mode,projectStudy,outputSchema,\
+                outputQuery,[outRow])
+        print('{}'.format(modifyStatus))
+        
+    print("Done")
+
+if __name__=='__main__':
+    main(sys.argv[1])

+ 260 - 0
pythonScripts/preprocess.py

@@ -0,0 +1,260 @@
+import os
+import json
+import re
+import subprocess
+import nibabel
+import shutil
+import sys
+
+#nothing gets done if you do import
+
+def getPatientLabel(row,participantField='PatientId'):
+    return row[participantField].replace('/','_') 
+
+def getVisitLabel(row):
+    return 'VISIT_'+str(int(row['SequenceNum']))
+
+def getStudyLabel(row,participantField='PatientId'):
+    return getPatientLabel(row,participantField)+'-'+getVisitLabel(row)
+
+def runPreprocess_DM(matlab,generalCodes,niftiTools,studyDir):
+    print("Running matlab")
+    #run after all directories have been assembled
+    script="addpath('"+generalCodes+"');"
+    script+="addpath('"+niftiTools+"');"
+    script+="preprocess_DM('"+studyDir+"',0,0);"
+    script+="test;"
+    script+="quit();"
+    #outText=subprocess.check_output(["/bin/echo",script])
+    try: 
+        outText=subprocess.check_output([matlab,"-nojvm","-r",script])
+    except subprocess.CalledProcessError as e:
+        print("Failed with:\n{}".format(e.output.decode('utf-8')))
+        return False
+    print(outText.decode('utf-8'))
+    return True
+
+
+def getDicom(ofb,row,zipDir,rawDir,im,imageSelector,\
+        participantField='PatientId'):
+
+    #Load the dicom zip file and unzips it. If zip file is already at the expected path, it skips the loading step
+
+    #Return True for valid outcome and False for troubles in row formating or unzip failures
+
+    seriesId=row[imageSelector[im]];
+    if seriesId=="0":
+        return False
+
+    print("getDicom: {}: {}".format(im,seriesId))
+    fname=os.path.join(zipDir,\
+            getStudyLabel(row,participantField)+'_'+im+".zip");
+
+    #copy data from orthanc
+    if os.path.isfile(fname):
+        print("Data already loaded. Skipping")
+    else:
+        print("getDicom: Loading data from orthanc")
+        ofb.getZip('series',seriesId,fname)
+
+    #unzip the zipped dicom series
+
+    unzipDir=os.path.join(rawDir,im)
+
+    if os.path.isdir(unzipDir):
+        print("Data already unzipped")
+        return True
+    
+    try:
+        os.mkdir(unzipDir)
+    except FileExistsError:
+        shutil.rmtree(unzipDir)
+    
+    try:
+        outTxt=subprocess.check_output(["unzip","-d",unzipDir,"-xj",fname])
+    except subprocess.CalledProcessError:
+        print("unzip failed for {}".format(fname))
+        return False
+
+    return True    
+
+def updateRow(db,project,dataset,row,imageResampledField,gzFileNames,\
+        participantField='PatientId'):
+    row['patientCode']=getPatientLabel(row,participantField)
+    row['visitCode']=getVisitLabel(row)
+    for im in imageResampledField:
+        row[imageResampledField[im]]=gzFileNames[im]
+    db.modifyRows('update',project,'study',dataset,[row])
+ 
+
+def main(parameterFile):
+    fhome=os.path.expanduser('~')
+    with open(os.path.join(fhome,".labkey","setup.json")) as f:
+        setup=json.load(f)
+
+    sys.path.insert(0,setup["paths"]["nixWrapper"])
+    import nixWrapper
+
+    nixWrapper.loadLibrary("labkeyInterface")
+
+    import labkeyInterface
+    import labkeyDatabaseBrowser
+    import labkeyFileBrowser
+
+    nixWrapper.loadLibrary("orthancInterface")
+    
+    import orthancInterface
+    import orthancFileBrowser
+
+    fconfig=os.path.join(fhome,'.labkey','network.json')
+
+    matlab=setup["paths"]["matlab"]
+    generalCodes=setup["paths"]["generalCodes"]
+    niftiTools=setup["paths"]["niftiTools"]
+    gzip=setup['paths']['gzip']
+
+    net=labkeyInterface.labkeyInterface()
+    net.init(fconfig)
+    db=labkeyDatabaseBrowser.labkeyDB(net)
+    fb=labkeyFileBrowser.labkeyFileBrowser(net)
+
+    onet=orthancInterface.orthancInterface()
+    onet.init(fconfig)
+    ofb=orthancFileBrowser.orthancFileBrowser(onet)
+
+
+    with open(parameterFile) as f:
+        pars=json.load(f)
+
+    hi=0
+    project=pars['Database']['project']
+    dataset=pars['Database']['queryName']
+    schema=pars['Database']['schemaName']
+
+    tempBase=os.path.join(fhome,'temp')
+
+
+    participantField=pars['Database']['participantField']
+
+    #all images from database
+    ds=db.selectRows(project,schema,dataset,[])
+    imageSelector={"CT":"CT_orthancId","PET":"PETWB_orthancId"}
+    #output
+    imageResampledField={"CT":"ctResampled","PET":"petResampled","patientmask":"ROImask"}
+
+    #use webdav to transfer file (even though it is localhost)
+
+
+ 
+
+    i=0
+    for row in ds["rows"]:
+        print("Starting row id:{} seq:{}".format(row[participantField],row['SequenceNum']))
+        #interesting files are processedDir/studyName_CT_notCropped_2mmVoxel.nii
+        #asn processedDir/studyName_PET_notCropped_2mmVoxel.nii
+        volumeFileNames={im:\
+            getStudyLabel(row,participantField)+'_'+im+
+            '_notCropped_2mmVoxel.nii'\
+                for im in imageResampledField}
+        gzFileNames={im:f+".gz" \
+            for (im,f) in volumeFileNames.items()}
+    
+        #build/check remote directory structure
+        remoteDir=fb.buildPathURL(project,['preprocessedImages',\
+            getPatientLabel(row,participantField),getVisitLabel(row)])
+
+        gzRemoteFiles={im:remoteDir+'/'+f\
+            for (im,f) in gzFileNames.items()}
+
+        remoteFilePresent=[fb.entryExists(f)\
+            for f in gzRemoteFiles.values()]
+
+        for f in gzRemoteFiles.values():
+            print("[{}]: [{}]".format(f,fb.entryExists(f)))
+
+
+        if all(remoteFilePresent):
+            print("Entry for row done.")
+            updateRow(db,project,dataset,row,imageResampledField,\
+                gzFileNames,participantField)
+            continue
+
+    
+        #setup the directory structure for preprocess_DM
+        studyDir=os.path.join(tempBase,getStudyLabel(row,participantField))
+        print("Making local directories in {}".format(studyDir))
+
+        if not os.path.isdir(studyDir):
+            os.mkdir(studyDir)
+
+        rawDir=os.path.join(studyDir,'Raw')
+        if not os.path.isdir(rawDir):
+            os.mkdir(rawDir)
+
+        zipDir=os.path.join(studyDir,'Zip')
+        if not os.path.isdir(zipDir):
+            os.mkdir(zipDir)
+
+        processedDir=os.path.join(studyDir,'Processed')
+        if not os.path.isdir(processedDir):
+            os.mkdir(processedDir)
+
+        #specify local file names with path 
+        volumeFiles={im:os.path.join(processedDir,f)\
+            for (im,f) in volumeFileNames.items()}
+        gzFiles={im:f+".gz"\
+            for (im,f) in volumeFiles.items()}
+
+        filesPresent=[os.path.isfile(f) for f in gzFiles.values()]
+    
+    
+        if not all(filesPresent):
+
+            #use imageSelector -> inputs
+            for im in imageSelector:
+                #checks if raw files are already loaded
+                getDicom(ofb,row,zipDir,rawDir,im,imageSelector,\
+                    participantField)
+
+
+    
+            #preprocess and zip
+            ok=runPreprocess_DM(matlab,generalCodes,niftiTools,studyDir)
+            if not ok:
+                shutil.rmtree(studyDir)
+                continue
+
+
+            for f in volumeFiles.values():
+                print("Running gzip {}".format(f))
+                outText=subprocess.check_output([gzip,f])
+                print(outText.decode('utf-8'))
+
+        #upload local files to remote
+        for im in gzFiles:
+        #for local,remote in zip(gzFiles,gzRemoteFiles):
+            local=gzFiles[im]
+            remote=gzRemoteFiles[im]
+            print("Uploading {}".format(local))
+            fb.writeFileToFile(local,remote)
+
+
+        #update row and let it know where the processed files are
+        updateRow(db,project,dataset,row,imageResampledField,gzFileNames,\
+            participantField)
+   
+
+        #cleanup
+        shutil.rmtree(studyDir)
+    
+
+        if i==-1:
+            break
+        i=i+1
+
+    print("Done")
+
+
+if __name__ == '__main__':
+    main(sys.argv[1])
+

+ 17 - 0
templates/preprocessLimfomiPET.json

@@ -0,0 +1,17 @@
+{ "Orthanc":
+	{"project":"Orthanc/Database",
+	"schemaName":"study",
+	"queryName":"Imaging",
+	"demographicsQuery":"Demographics",
+	"participantField":"PatientId"
+	},
+  "Database":
+  	{"project":"limfomiPET/Study",
+	 "schemaName":"study",
+	 "queryName":"Imaging1",
+ 	 "participantField":"ParticipantId",
+	"transferQuery":"imageTransferReport",
+	"missingImagesQuery":"missingImages",
+	"missingImagesSchema":"lists"
+	}
+}