3 gadi atpakaļ · d6736712d2
--- a/pythonScripts/populateImagingFromTransferList.py
+++ b/pythonScripts/populateImagingFromTransferList.py
@@ -0,0 +1,216 @@
 
				+#date sorts studies from orthanc dataset into target study dataset
			
 
				+
			
 
				+#takes transferQuery as the list of images that should be available on orthanc
			
 
				+
			
 
				+
			
 
				+import os
			
 
				+import json
			
 
				+import re
			
 
				+import sys
			
 
				+import datetime
			
 
				+import re
			
 
				+
			
 
				+def main(parameterFile):
			
 
				+    fhome=os.path.expanduser('~')
			
 
				+    fsetup=os.path.join(fhome,'.labkey','setup.json')
			
 
				+    with open(fsetup,'r') as f:
			
 
				+        setup=json.load(f)
			
 
				+
			
 
				+    sys.path.insert(0,setup['paths']['nixWrapper'])
			
 
				+
			
 
				+    import nixWrapper
			
 
				+
			
 
				+    nixWrapper.loadLibrary("labkeyInterface")
			
 
				+
			
 
				+    import labkeyInterface
			
 
				+    import labkeyDatabaseBrowser
			
 
				+    import labkeyFileBrowser
			
 
				+
			
 
				+    fconfig=os.path.join(fhome,'.labkey','network.json')
			
 
				+
			
 
				+    net=labkeyInterface.labkeyInterface()
			
 
				+    net.init(fconfig)
			
 
				+    db=labkeyDatabaseBrowser.labkeyDB(net)
			
 
				+    fb=labkeyFileBrowser.labkeyFileBrowser(net)
			
 
				+
			
 
				+    with open(parameterFile,'r') as f:
			
 
				+        pars=json.load(f)
			
 
				+
			
 
				+
			
 
				+
			
 
				+    i=0
			
 
				+    #from orthancDatabase/Imaging dataset
			
 
				+    projectOrthanc=pars['Orthanc']['project']
			
 
				+    inputQuery=pars['Orthanc']['queryName']
			
 
				+    inputSchema=pars['Orthanc']['schemaName']
			
 
				+    inputParticipantField=pars['Orthanc']['participantField']
			
 
				+
			
 
				+    #to target project dataset
			
 
				+    projectStudy=pars['Database']['project']
			
 
				+    #'iPNUMMretro/Study'
			
 
				+    #for prospective, set
			
 
				+    #projectStudy='IPNUMMprospektiva/Study'
			
 
				+    outputQuery=pars['Database']['queryName']
			
 
				+    outputSchema=pars['Database']['schemaName']
			
 
				+    #select patientId that are contained in the demographics dataset
			
 
				+    transferQuery=pars['Database']['transferQuery']
			
 
				+    dbParticipantField=pars['Database']['participantField']
			
 
				+
			
 
				+    missingSchema=pars['Database']['missingImagesSchema']
			
 
				+    missingQuery=pars['Database']['missingImagesQuery']
			
 
				+
			
 
				+    #delete all rows from missingQuery (should be rebuilt for every run)
			
 
				+    dsMissing=db.selectRows(projectStudy,missingSchema,missingQuery,[])
			
 
				+    db.modifyRows('delete',projectStudy,missingSchema,missingQuery,dsMissing['rows'])
			
 
				+
			
 
				+
			
 
				+    #make a list of images from transferQuery
			
 
				+    dsImage=db.selectRows(projectStudy,outputSchema,transferQuery,[])
			
 
				+
			
 
				+    for im in dsImage['rows']:
			
 
				+
			
 
				+
			
 
				+        #for orthanc
			
 
				+        inputIdFilter={'variable':inputParticipantField,\
			
 
				+                'value':im[dbParticipantField],\
			
 
				+                'oper':'eq'}
			
 
				+
			
 
				+        #for database
			
 
				+        idFilter={'variable':dbParticipantField,\
			
 
				+                'value':im[dbParticipantField],\
			
 
				+                'oper':'eq'}
			
 
				+
			
 
				+
			
 
				+        seqNum=im['imagingVisitId']
			
 
				+        seqFilter={'variable':'SequenceNum','value':str(seqNum),'oper':'eq'}
			
 
				+
			
 
				+        
			
 
				+        #for speedup one should check if a match was already done in Database/queryName
			
 
				+        #ds1 are the matching outputs in target dataset
			
 
				+        ds1=db.selectRows(projectStudy,outputSchema,outputQuery,\
			
 
				+                [idFilter,seqFilter])
			
 
				+
			
 
				+        if len(ds1['rows'])>1:
			
 
				+            #never happens (idFilter and seqFilter)
			
 
				+            print('ERROR: too many matches in {} for {}/{}'.\
			
 
				+                    format(outputQuery,im[dbParticipantField],seqNum))
			
 
				+            continue
			
 
				+        if len(ds1['rows'])>0:
			
 
				+            #just the one match, fine
			
 
				+            print('Entry for {}/{} already resolved'.\
			
 
				+                    format(im[dbParticipantField],seqNum))
			
 
				+            
			
 
				+            entry=ds1['rows'][0]
			
 
				+            try:
			
 
				+                if len(entry['PETWB_orthancId'])>0 and len(entry['CT_orthancId'])>0:
			
 
				+                    continue
			
 
				+            except:
			
 
				+                pass
			
 
				+            print('Debug mode for missing cases')
			
 
				+
			
 
				+        #have to convert from datetime to %Y%m%d format
			
 
				+        #dateFilter={'variable':'imageDate','value':im['imageDate'],'oper':'eq'}
			
 
				+
			
 
				+        dsOrthanc=db.selectRows(projectOrthanc,inputSchema,inputQuery,[inputIdFilter])
			
 
				+
			
 
				+        #what if dsOrthanc['rows'] is empty?
			
 
				+        #this is part of QA and is reported in missingImages Schema/Query
			
 
				+    
			
 
				+        #convert dates
			
 
				+        sDates={r['orthancSeries']:datetime.datetime.strptime(r['studyDate'],'%Y/%m/%d %H:%M:%S') 
			
 
				+                for r in dsOrthanc['rows']}
			
 
				+
			
 
				+        sDates={x:sDates[x].strftime('%Y%m%d') for x in sDates}
			
 
				+
			
 
				+        #unique dates (for date mismatch)
			
 
				+        dates=list(set(sDates.values()))
			
 
				+        
			
 
				+        #select dates
			
 
				+        sDatesMatch={x:sDates[x] for x in sDates if sDates[x]==im['imageDate']}
			
 
				+        
			
 
				+        #select 
			
 
				+        rowsMatch=[r for r in dsOrthanc['rows'] if r['orthancSeries'] in list(sDatesMatch.keys())]
			
 
				+
			
 
				+        #select CT matches
			
 
				+        rowsCT=[r for r in rowsMatch if r['seriesDescription'].find('CT WB')==0]
			
 
				+        #should not include fov
			
 
				+        rowsCT=[r for r in rowsCT if r['seriesDescription'].find('fov')<0]
			
 
				+        print('entry[{}/{}] rowsCT : {}'.format(im[dbParticipantField],seqNum,rowsCT))
			
 
				+        
			
 
				+        #should be one and one only
			
 
				+        rowsPET=[r for r in rowsMatch if r['seriesDescription']=='PET WB']
			
 
				+        print('entry[{}/{}] rowsPET: {}'.format(im[dbParticipantField],seqNum,rowsPET))
			
 
				+        
			
 
				+        dataRow={}
			
 
				+        if len(rowsCT)==1:
			
 
				+            dataRow=rowsCT[0]
			
 
				+        if len(dataRow)==0:
			
 
				+            if len(rowsPET)==1:
			
 
				+                dataRows=rowsPET[0]
			
 
				+        
			
 
				+
			
 
				+        #deal with erroneous outcomes (ie- no CT, more then 1 CT, no PET, more than 1 PET)
			
 
				+        if len(rowsPET)!=1 or len(rowsCT)!=1:
			
 
				+            mode='insert'
			
 
				+            #copy values en mass
			
 
				+            vals=[dbParticipantField,'imagingVisitId','imageDate']
			
 
				+            orow={v:im[v] for v in vals}
			
 
				+            orow['imageDateMismatch']=','.join(dates)
			
 
				+            
			
 
				+            #generate description of failure
			
 
				+            failDesc=''
			
 
				+            if len(rowsPET)==0:
			
 
				+                failDesc+='[MISSSING PET]'
			
 
				+            if len(rowsPET)>1:
			
 
				+                failDesc+='[MULTIPLE PET]'
			
 
				+            if len(rowsCT)==0:
			
 
				+                failDesc+='[MISSSING CT]'
			
 
				+            if len(rowsCT)>1:
			
 
				+                failDesc+='[MULTIPLE CT]'
			
 
				+            orow['failureDescription']=failDesc
			
 
				+
			
 
				+            #generate fail entries
			
 
				+            db.modifyRows(mode,projectStudy,missingSchema,\
			
 
				+                    missingQuery,[orow])
			
 
				+            #don't break, but fill only for singular outcomes
			
 
				+         
			
 
				+
			
 
				+        #figure out which row in output study to update
			
 
				+        filters=[]
			
 
				+        print('Participant {} Sequence number {}'.\
			
 
				+               format(im[dbParticipantField],str(seqNum)))
			
 
				+        #refresh the matching study list (after CT we have found PET)
			
 
				+        ds1=db.selectRows(projectStudy,outputSchema,outputQuery,\
			
 
				+                [idFilter,seqFilter])
			
 
				+
			
 
				+        mode='update'
			
 
				+        outRow={}
			
 
				+        if len(ds1['rows'])==0:
			
 
				+            mode='insert'
			
 
				+        
			
 
				+            outRow[dbParticipantField]=im[dbParticipantField]
			
 
				+            outRow['SequenceNum']=seqNum
			
 
				+            if len(dataRow)==1:
			
 
				+                outRow['dicomStudy']=dataRow['dicomStudy']
			
 
				+        
			
 
				+        else:
			
 
				+            #never happens if we check for sd1 before matches are found
			
 
				+            outRow=ds1['rows'][0]
			
 
				+        
			
 
				+        if len(rowsPET)==1:    
			
 
				+            outRow['PETWB_orthancId']=rowsPET[0]['orthancSeries']
			
 
				+        if len(rowsCT)==1:
			
 
				+            outRow['CT_orthancId']=rowsCT[0]['orthancSeries']
			
 
				+        if len(dataRow)==1:
			
 
				+            outRow['studyDate']=dataRow['studyDate']
			
 
				+        outRow['imagingVisitId']=im['imagingVisitId']
			
 
				+        outRow['visitCode']='VISIT_'+str(im['imagingVisitId'])
			
 
				+
			
 
				+        modifyStatus=db.modifyRows(mode,projectStudy,outputSchema,\
			
 
				+                outputQuery,[outRow])
			
 
				+        print('{}'.format(modifyStatus))
			
 
				+        
			
 
				+    print("Done")
			
 
				+
			
 
				+if __name__=='__main__':
			
 
				+    main(sys.argv[1])
			
--- a/pythonScripts/preprocess.py
+++ b/pythonScripts/preprocess.py
@@ -0,0 +1,260 @@
 
				+import os
			
 
				+import json
			
 
				+import re
			
 
				+import subprocess
			
 
				+import nibabel
			
 
				+import shutil
			
 
				+import sys
			
 
				+
			
 
				+#nothing gets done if you do import
			
 
				+
			
 
				+def getPatientLabel(row,participantField='PatientId'):
			
 
				+    return row[participantField].replace('/','_') 
			
 
				+
			
 
				+def getVisitLabel(row):
			
 
				+    return 'VISIT_'+str(int(row['SequenceNum']))
			
 
				+
			
 
				+def getStudyLabel(row,participantField='PatientId'):
			
 
				+    return getPatientLabel(row,participantField)+'-'+getVisitLabel(row)
			
 
				+
			
 
				+def runPreprocess_DM(matlab,generalCodes,niftiTools,studyDir):
			
 
				+    print("Running matlab")
			
 
				+    #run after all directories have been assembled
			
 
				+    script="addpath('"+generalCodes+"');"
			
 
				+    script+="addpath('"+niftiTools+"');"
			
 
				+    script+="preprocess_DM('"+studyDir+"',0,0);"
			
 
				+    script+="test;"
			
 
				+    script+="quit();"
			
 
				+    #outText=subprocess.check_output(["/bin/echo",script])
			
 
				+    try: 
			
 
				+        outText=subprocess.check_output([matlab,"-nojvm","-r",script])
			
 
				+    except subprocess.CalledProcessError as e:
			
 
				+        print("Failed with:\n{}".format(e.output.decode('utf-8')))
			
 
				+        return False
			
 
				+    print(outText.decode('utf-8'))
			
 
				+    return True
			
 
				+
			
 
				+
			
 
				+def getDicom(ofb,row,zipDir,rawDir,im,imageSelector,\
			
 
				+        participantField='PatientId'):
			
 
				+
			
 
				+    #Load the dicom zip file and unzips it. If zip file is already at the expected path, it skips the loading step
			
 
				+
			
 
				+    #Return True for valid outcome and False for troubles in row formating or unzip failures
			
 
				+
			
 
				+    seriesId=row[imageSelector[im]];
			
 
				+    if seriesId=="0":
			
 
				+        return False
			
 
				+
			
 
				+    print("getDicom: {}: {}".format(im,seriesId))
			
 
				+    fname=os.path.join(zipDir,\
			
 
				+            getStudyLabel(row,participantField)+'_'+im+".zip");
			
 
				+
			
 
				+    #copy data from orthanc
			
 
				+    if os.path.isfile(fname):
			
 
				+        print("Data already loaded. Skipping")
			
 
				+    else:
			
 
				+        print("getDicom: Loading data from orthanc")
			
 
				+        ofb.getZip('series',seriesId,fname)
			
 
				+
			
 
				+    #unzip the zipped dicom series
			
 
				+
			
 
				+    unzipDir=os.path.join(rawDir,im)
			
 
				+
			
 
				+    if os.path.isdir(unzipDir):
			
 
				+        print("Data already unzipped")
			
 
				+        return True
			
 
				+    
			
 
				+    try:
			
 
				+        os.mkdir(unzipDir)
			
 
				+    except FileExistsError:
			
 
				+        shutil.rmtree(unzipDir)
			
 
				+    
			
 
				+    try:
			
 
				+        outTxt=subprocess.check_output(["unzip","-d",unzipDir,"-xj",fname])
			
 
				+    except subprocess.CalledProcessError:
			
 
				+        print("unzip failed for {}".format(fname))
			
 
				+        return False
			
 
				+
			
 
				+    return True    
			
 
				+
			
 
				+def updateRow(db,project,dataset,row,imageResampledField,gzFileNames,\
			
 
				+        participantField='PatientId'):
			
 
				+    row['patientCode']=getPatientLabel(row,participantField)
			
 
				+    row['visitCode']=getVisitLabel(row)
			
 
				+    for im in imageResampledField:
			
 
				+        row[imageResampledField[im]]=gzFileNames[im]
			
 
				+    db.modifyRows('update',project,'study',dataset,[row])
			
 
				+ 
			
 
				+
			
 
				+def main(parameterFile):
			
 
				+    fhome=os.path.expanduser('~')
			
 
				+    with open(os.path.join(fhome,".labkey","setup.json")) as f:
			
 
				+        setup=json.load(f)
			
 
				+
			
 
				+    sys.path.insert(0,setup["paths"]["nixWrapper"])
			
 
				+    import nixWrapper
			
 
				+
			
 
				+    nixWrapper.loadLibrary("labkeyInterface")
			
 
				+
			
 
				+    import labkeyInterface
			
 
				+    import labkeyDatabaseBrowser
			
 
				+    import labkeyFileBrowser
			
 
				+
			
 
				+    nixWrapper.loadLibrary("orthancInterface")
			
 
				+    
			
 
				+    import orthancInterface
			
 
				+    import orthancFileBrowser
			
 
				+
			
 
				+    fconfig=os.path.join(fhome,'.labkey','network.json')
			
 
				+
			
 
				+    matlab=setup["paths"]["matlab"]
			
 
				+    generalCodes=setup["paths"]["generalCodes"]
			
 
				+    niftiTools=setup["paths"]["niftiTools"]
			
 
				+    gzip=setup['paths']['gzip']
			
 
				+
			
 
				+    net=labkeyInterface.labkeyInterface()
			
 
				+    net.init(fconfig)
			
 
				+    db=labkeyDatabaseBrowser.labkeyDB(net)
			
 
				+    fb=labkeyFileBrowser.labkeyFileBrowser(net)
			
 
				+
			
 
				+    onet=orthancInterface.orthancInterface()
			
 
				+    onet.init(fconfig)
			
 
				+    ofb=orthancFileBrowser.orthancFileBrowser(onet)
			
 
				+
			
 
				+
			
 
				+    with open(parameterFile) as f:
			
 
				+        pars=json.load(f)
			
 
				+
			
 
				+    hi=0
			
 
				+    project=pars['Database']['project']
			
 
				+    dataset=pars['Database']['queryName']
			
 
				+    schema=pars['Database']['schemaName']
			
 
				+
			
 
				+    tempBase=os.path.join(fhome,'temp')
			
 
				+
			
 
				+
			
 
				+    participantField=pars['Database']['participantField']
			
 
				+
			
 
				+    #all images from database
			
 
				+    ds=db.selectRows(project,schema,dataset,[])
			
 
				+    imageSelector={"CT":"CT_orthancId","PET":"PETWB_orthancId"}
			
 
				+    #output
			
 
				+    imageResampledField={"CT":"ctResampled","PET":"petResampled","patientmask":"ROImask"}
			
 
				+
			
 
				+    #use webdav to transfer file (even though it is localhost)
			
 
				+
			
 
				+
			
 
				+ 
			
 
				+
			
 
				+    i=0
			
 
				+    for row in ds["rows"]:
			
 
				+        print("Starting row id:{} seq:{}".format(row[participantField],row['SequenceNum']))
			
 
				+        #interesting files are processedDir/studyName_CT_notCropped_2mmVoxel.nii
			
 
				+        #asn processedDir/studyName_PET_notCropped_2mmVoxel.nii
			
 
				+        volumeFileNames={im:\
			
 
				+            getStudyLabel(row,participantField)+'_'+im+
			
 
				+            '_notCropped_2mmVoxel.nii'\
			
 
				+                for im in imageResampledField}
			
 
				+        gzFileNames={im:f+".gz" \
			
 
				+            for (im,f) in volumeFileNames.items()}
			
 
				+    
			
 
				+        #build/check remote directory structure
			
 
				+        remoteDir=fb.buildPathURL(project,['preprocessedImages',\
			
 
				+            getPatientLabel(row,participantField),getVisitLabel(row)])
			
 
				+
			
 
				+        gzRemoteFiles={im:remoteDir+'/'+f\
			
 
				+            for (im,f) in gzFileNames.items()}
			
 
				+
			
 
				+        remoteFilePresent=[fb.entryExists(f)\
			
 
				+            for f in gzRemoteFiles.values()]
			
 
				+
			
 
				+        for f in gzRemoteFiles.values():
			
 
				+            print("[{}]: [{}]".format(f,fb.entryExists(f)))
			
 
				+
			
 
				+
			
 
				+        if all(remoteFilePresent):
			
 
				+            print("Entry for row done.")
			
 
				+            updateRow(db,project,dataset,row,imageResampledField,\
			
 
				+                gzFileNames,participantField)
			
 
				+            continue
			
 
				+
			
 
				+    
			
 
				+        #setup the directory structure for preprocess_DM
			
 
				+        studyDir=os.path.join(tempBase,getStudyLabel(row,participantField))
			
 
				+        print("Making local directories in {}".format(studyDir))
			
 
				+
			
 
				+        if not os.path.isdir(studyDir):
			
 
				+            os.mkdir(studyDir)
			
 
				+
			
 
				+        rawDir=os.path.join(studyDir,'Raw')
			
 
				+        if not os.path.isdir(rawDir):
			
 
				+            os.mkdir(rawDir)
			
 
				+
			
 
				+        zipDir=os.path.join(studyDir,'Zip')
			
 
				+        if not os.path.isdir(zipDir):
			
 
				+            os.mkdir(zipDir)
			
 
				+
			
 
				+        processedDir=os.path.join(studyDir,'Processed')
			
 
				+        if not os.path.isdir(processedDir):
			
 
				+            os.mkdir(processedDir)
			
 
				+
			
 
				+        #specify local file names with path 
			
 
				+        volumeFiles={im:os.path.join(processedDir,f)\
			
 
				+            for (im,f) in volumeFileNames.items()}
			
 
				+        gzFiles={im:f+".gz"\
			
 
				+            for (im,f) in volumeFiles.items()}
			
 
				+
			
 
				+        filesPresent=[os.path.isfile(f) for f in gzFiles.values()]
			
 
				+    
			
 
				+    
			
 
				+        if not all(filesPresent):
			
 
				+
			
 
				+            #use imageSelector -> inputs
			
 
				+            for im in imageSelector:
			
 
				+                #checks if raw files are already loaded
			
 
				+                getDicom(ofb,row,zipDir,rawDir,im,imageSelector,\
			
 
				+                    participantField)
			
 
				+
			
 
				+
			
 
				+    
			
 
				+            #preprocess and zip
			
 
				+            ok=runPreprocess_DM(matlab,generalCodes,niftiTools,studyDir)
			
 
				+            if not ok:
			
 
				+                shutil.rmtree(studyDir)
			
 
				+                continue
			
 
				+
			
 
				+
			
 
				+            for f in volumeFiles.values():
			
 
				+                print("Running gzip {}".format(f))
			
 
				+                outText=subprocess.check_output([gzip,f])
			
 
				+                print(outText.decode('utf-8'))
			
 
				+
			
 
				+        #upload local files to remote
			
 
				+        for im in gzFiles:
			
 
				+        #for local,remote in zip(gzFiles,gzRemoteFiles):
			
 
				+            local=gzFiles[im]
			
 
				+            remote=gzRemoteFiles[im]
			
 
				+            print("Uploading {}".format(local))
			
 
				+            fb.writeFileToFile(local,remote)
			
 
				+
			
 
				+
			
 
				+        #update row and let it know where the processed files are
			
 
				+        updateRow(db,project,dataset,row,imageResampledField,gzFileNames,\
			
 
				+            participantField)
			
 
				+   
			
 
				+
			
 
				+        #cleanup
			
 
				+        shutil.rmtree(studyDir)
			
 
				+    
			
 
				+
			
 
				+        if i==-1:
			
 
				+            break
			
 
				+        i=i+1
			
 
				+
			
 
				+    print("Done")
			
 
				+
			
 
				+
			
 
				+if __name__ == '__main__':
			
 
				+    main(sys.argv[1])
			
 
				+
			
--- a/templates/preprocessLimfomiPET.json
+++ b/templates/preprocessLimfomiPET.json
@@ -0,0 +1,17 @@
 
				+{ "Orthanc":
			
 
				+	{"project":"Orthanc/Database",
			
 
				+	"schemaName":"study",
			
 
				+	"queryName":"Imaging",
			
 
				+	"demographicsQuery":"Demographics",
			
 
				+	"participantField":"PatientId"
			
 
				+	},
			
 
				+  "Database":
			
 
				+  	{"project":"limfomiPET/Study",
			
 
				+	 "schemaName":"study",
			
 
				+	 "queryName":"Imaging1",
			
 
				+ 	 "participantField":"ParticipantId",
			
 
				+	"transferQuery":"imageTransferReport",
			
 
				+	"missingImagesQuery":"missingImages",
			
 
				+	"missingImagesSchema":"lists"
			
 
				+	}
			
 
				+}