Преглед изворни кода

Adding segmentations from remote directory (with attached version); modifying populateImagingFromOrthanc and preprocess to deal with prospective study layout

NIX User пре 4 година
родитељ
комит
fa011e3a21

+ 36 - 13
pythonScripts/addSegmentations.py

@@ -6,6 +6,13 @@ import nibabel
 import shutil
 import sys
 
+if len(sys.argv)<2:
+    print("Usage {} sourceDir version(v1 or similar)".format(sys.argv[0]))
+    sys.exit(0)
+
+sourceDir=sys.argv[1]
+ver=sys.argv[2]
+
 shome=os.path.expanduser('~nixUser')
 fhome=os.path.expanduser('~')
 with open(os.path.join(fhome,".labkey","setup.json")) as f:
@@ -35,18 +42,22 @@ ds=db.selectRows(project,'study',dataset,[])
 #imageSelector={"CT":"CT","PET":"PETWB"};
 imageResampledField={"Segm":"Segmentation"}
 
+participantField='PatientId'
+#for prosepective
+#participantField='ParticipantId'
+
 #projectNIfTIBase=os.path.join(labkeyBase,'files',project,'@files/nifti')
 #use webdav to transfer file (even though it is localhost)
 
 
-def getPatientLabel(row):
-    return row['PatientId'].replace('/','_') 
+def getPatientLabel(row,participantField='PatientId'):
+    return row[participantField].replace('/','_') 
 
 def getVisitLabel(row):
     return 'VISIT_'+str(int(row['SequenceNum']))
 
-def getStudyLabel(row):
-    return getPatientLabel(row)+'-'+getVisitLabel(row)
+def getStudyLabel(row,participantField='PatientId'):
+    return getPatientLabel(row,participantField)+'-'+getVisitLabel(row)
 
 def updateRow(project,dataset,row,imageResampledField,gzFileNames):
     for im in imageResampledField:
@@ -58,16 +69,28 @@ for row in ds["rows"]:
 
     #interesting files are processedDir/studyName_CT_notCropped_2mmVoxel.nii
     #asn processedDir/studyName_PET_notCropped_2mmVoxel.nii
-    gzFileNames={im:\
-            getStudyLabel(row)+'_'+im+'.nii.gz'\
+
+    #standard names provided by Zan and Daniel
+    baseFileNames={im:\
+            getStudyLabel(row,participantField)+'_'+im \
                 for im in imageResampledField}
     
+    #append suffix to base name for source files
+    gzSrcFileNames={im:baseFileNames[im]+'.nii.gz'\
+            for im in baseFileNames}
+
+    #add version to out files
+    gzOutFileNames={im:baseFileNames[im]+'_'+ver+'.nii.gz'\
+            for im in baseFileNames}
+
     #build/check remote directory structure
     remoteDir=fb.buildPathURL(project,\
-            ['preprocessedImages',getPatientLabel(row),getVisitLabel(row)])
+            ['preprocessedImages',getPatientLabel(row,participantField),\
+            getVisitLabel(row)])
 
+    #target files
     gzRemoteFiles={im:remoteDir+'/'+f\
-            for (im,f) in gzFileNames.items()}
+            for (im,f) in gzOutFileNames.items()}
 
     remoteFilePresent=[fb.entryExists(f)\
             for f in gzRemoteFiles.values()]
@@ -79,11 +102,11 @@ for row in ds["rows"]:
     if all(remoteFilePresent):
         print("Entry for row done.")
         updateRow(project,dataset,row,imageResampledField,\
-                gzFileNames)
+                gzOutFileNames)
         continue
 
-    inputDir=fb.buildPathURL(project,['segmentations']) 
-    inputFiles={im:inputDir+'/'+f for (im,f) in gzFileNames.items()}
+    inputDir=fb.buildPathURL(project,[sourceDir]) 
+    inputFiles={im:inputDir+'/'+f for (im,f) in gzSrcFileNames.items()}
     
     for im in inputFiles:
         f=inputFiles[im]
@@ -91,7 +114,7 @@ for row in ds["rows"]:
             print("Input file {} not found".format(f))
             continue
         print("Found {}".format(f))
-        localFile=os.path.join(tempBase,gzFileNames[im])
+        localFile=os.path.join(tempBase,gzSrcFileNames[im])
         print("Local {}".format(localFile))
         fb.readFileToFile(f,localFile)
         fb.writeFileToFile(localFile,gzRemoteFiles[im])
@@ -99,7 +122,7 @@ for row in ds["rows"]:
         os.remove(localFile)
 
     #update row and let it know where the processed files are
-    updateRow(project,dataset,row,imageResampledField,gzFileNames)
+    updateRow(project,dataset,row,imageResampledField,gzOutFileNames)
    
 
     if i==-1:

+ 0 - 83
pythonScripts/linkOrthanc.py

@@ -1,83 +0,0 @@
-import os
-import json
-import re
-import sys
-import datetime
-import re
-
-fhome=os.path.expanduser('~')
-sys.path.insert(1,fhome+'/software/src/labkeyInterface')
-import labkeyInterface
-import labkeyDatabaseBrowser
-
-fconfig=os.path.join(fhome,'.labkey','network.json')
-
-net=labkeyInterface.labkeyInterface()
-net.init(fconfig)
-db=labkeyDatabaseBrowser.labkeyDB(net)
-
-
-i=0
-projectOrthanc='Orthanc/Database'
-projectIPNU='iPNUMMretro/Study'
-
-ds=db.selectRows(projectIPNU,'study','Imaging',[])
-
-varList={'CT':['startswith','CT%20WB'],'PETWB':['eq','PET%20WB'],
-        'PETWBUncorrected':['eq','PET%20WB%20Uncorrected'],
-        'Topogram':['startswith','Topogram']}
-
-i=0
-
-for row in ds['rows']:
-
-    for var in varList:
-        print('Filtering for {}/{}'.format(var,varList[var][1]))
-        qfilter={}
-        qfilter['variable']='seriesDescription'
-        qfilter['value']=varList[var][1]
-        qfilter['oper']=varList[var][0]
-        
-        qfilter1={}
-        qfilter1['variable']='PatientId'
-        qfilter1['value']=row['PatientId']
-        qfilter1['oper']='eq'
-
-        #don't have dates, so I have to poll
-        qfilter2={}
-        qfilter2['variable']='studyDate'
-        qfilter2['oper']='dateeq'
-        fdate=row['date']
-        fdate=re.sub(r' (.*)$','',fdate)
-        fdate=re.sub(r'/',r'-',fdate)
-        qfilter2['value']=fdate
-
-
-        tfilter=[qfilter,qfilter1,qfilter2]
-        ds1=db.selectRows(projectOrthanc,'study','Imaging',tfilter)
-        print('[{}][{}][{}]: {}'.format(\
-                row['PatientId'],var,fdate,len(ds1['rows'])))
-        
-        
-        for r1 in ds1['rows']:
-            print("ID: {}, DESC: {}, DATE: {}".format(\
-                r1['PatientId'],r1['seriesDescription'],r1['studyDate']))
-            #print("Study date {}/{}".format(row['date'],r1['studyDate']))
-
-
-            
-        row[var]=len(ds1['rows'])
-        if len(ds1['rows'])==1:
-            row[var]=ds1['rows'][0]['orthancSeries']
-
-        if len(ds1['rows'])>1:
-            if var=='CT':
-                varC=[r1['orthancSeries']  for r1 in ds1['rows']\
-                        if r1['seriesDescription'].find('fov')<0] 
-                if len(varC)==1:
-                    row[var]=varC[0]
-
-       
-    db.modifyRows('update',projectIPNU,'study','Imaging',[row])
-        
-print("Done")

+ 66 - 11
pythonScripts/populateImagingFromOrthanc.py

@@ -1,3 +1,5 @@
+#date sorts studies from orthanc dataset into target study dataset
+
 import os
 import json
 import re
@@ -6,7 +8,11 @@ import datetime
 import re
 
 fhome=os.path.expanduser('~')
-sys.path.insert(1,fhome+'/software/src/labkeyInterface')
+fsetup=os.path.join(fhome,'.labkey','setup.json')
+with open(fsetup,'r') as f:
+    setup=json.load(f)
+
+sys.path.insert(0,setup['paths']['labkeyInterface'])
 import labkeyInterface
 import labkeyDatabaseBrowser
 
@@ -18,51 +24,100 @@ db=labkeyDatabaseBrowser.labkeyDB(net)
 
 
 i=0
+#from orthancDatabase/Imaging dataset
 projectOrthanc='Orthanc/Database'
 inputDataset='Imaging'
+#to target project dataset
 projectStudy='iPNUMMretro/Study'
+#for prospective, set
+projectStudy='IPNUMMprospektiva/Study'
 outputDataset='Imaging1'
+#select patientId that are contained in the demographics dataset
+demographicDataset='ClinicalData'
+#for prospective
+demographicDataset='demographicData'
+
+
+orthancParticipantField='PatientId'
+participantField='PatientId'
+#for prospective
+participantField='ParticipantId'
+
+#make a list of patients
+dsDemo=db.selectRows(projectStudy,'study',demographicDataset,[])
+patients=[row[participantField] for row in dsDemo['rows']]
+patients=list(set(patients))
+
+patientListStr=""
+for p in patients:
+    if len(patientListStr)>0:
+        patientListStr+=";"
+    patientListStr+=p
+
 
-ds=db.selectRows(projectOrthanc,'study',inputDataset,[])
+patientFilter={'variable':orthancParticipantField,
+        'value':patientListStr,'oper':'in'}
+
+#takes orthanc as the baseline, selects from patient list
+ds=db.selectRows(projectOrthanc,'study',inputDataset,[patientFilter])
 
 
 #single entry for the patientId/dicomStudy pair
-selectVars=['PatientId','dicomStudy']
+selectVars={participantField:orthancParticipantField,\
+        'dicomStudy':'dicomStudy'}
+
+dates=[datetime.datetime.strptime(row['studyDate'],'%Y/%m/%d %H:%M:%S') \
+        for row in ds['rows']]
 
-dates=[datetime.datetime.strptime(row['studyDate'],'%Y/%m/%d %H:%M:%S') for row in ds['rows']]
+#date sorted entries
 idx=sorted(range(len(dates)),key=lambda k:dates[k])
 
+
+#historical traverse of all studies from inputDataset
 for j in range(len(dates)):
-    #row in ds['rows']:
+    
     row=ds['rows'][idx[j]]
 
     #skip series which don't match selected filters
     outvar='NONE'
     sd=row['seriesDescription']
     if sd=='PET WB':
-        outvar='PETWB'
+        outvar='PETWB_orthancId'
     if sd.find('CT WB')==0:
         if sd.find('fov')<0:
-            outvar='CT'
+            outvar='CT_orthancId'
 
+    #skip irrelevant series
     if outvar=='NONE':
         continue
 
     filters=[]
     for v in selectVars:
-        filters.append({'variable':v,'value':row[v],'oper':'eq'})
+        filters.append({'variable':v,\
+                'value':row[selectVars[v]],'oper':'eq'})
+
+    #ds2 are all studies by patient from sorted dataset
     ds2=db.selectRows(projectStudy,'study',outputDataset,
-            [{'variable':'PatientId','value':row['PatientId'],'oper':'eq'}])
+            [{'variable':participantField,\
+                    'value':row[orthancParticipantField],'oper':'eq'}])
+    
+    #ds1 is the matching row from output dataset 
     ds1=db.selectRows(projectStudy,'study',outputDataset,filters)
     if len(ds1['rows'])>1:
-        print('ERROR: too many matches for {}/{}'.format(row['PatientId'],row['dicomStudy']))
+        print('ERROR: too many matches for {}/{}'.\
+                format(row[orthancParticipantField],row['dicomStudy']))
         continue
+
     mode='update'
     outRow={}
     if len(ds1['rows'])==0:
         mode='insert'
-        outRow['PatientId']=row['PatientId']
+        outRow[participantField]=row[orthancParticipantField]
+        
+        #setting sequence number to length of already included studies
+        #sorted by date makes it historically incremental
         outRow['SequenceNum']=len(ds2['rows'])
+
         outRow['dicomStudy']=row['dicomStudy']
     else:
         outRow=ds1['rows'][0]

+ 28 - 15
pythonScripts/preprocess.py

@@ -46,9 +46,16 @@ ofb=orthancFileBrowser.orthancFileBrowser(onet)
 
 hi=0
 project='iPNUMMretro/Study'
-dataset='Imaging'
+#for prospective, set
+project='IPNUMMprospektiva/Study'
+dataset='Imaging1'
 tempBase=os.path.join(fhome,'temp')
 
+
+participantField='PatientId'
+#for prospective set
+participantField='ParticipantId'
+
 #all images from database
 ds=db.selectRows(project,'study',dataset,[])
 #imageSelector={"CT":"CT","PET":"PETWB"};
@@ -59,14 +66,14 @@ imageResampledField={"CT":"ctResampled","PET":"petResampled"}
 #use webdav to transfer file (even though it is localhost)
 
 
-def getPatientLabel(row):
-    return row['PatientId'].replace('/','_') 
+def getPatientLabel(row,participantField='PatientId'):
+    return row[participantField].replace('/','_') 
 
 def getVisitLabel(row):
     return 'VISIT_'+str(int(row['SequenceNum']))
 
-def getStudyLabel(row):
-    return getPatientLabel(row)+'-'+getVisitLabel(row)
+def getStudyLabel(row,participantField='PatientId'):
+    return getPatientLabel(row,participantField)+'-'+getVisitLabel(row)
 
 def runPreprocess_DM(matlab,generalCodes,niftiTools,studyDir):
 
@@ -86,7 +93,8 @@ def runPreprocess_DM(matlab,generalCodes,niftiTools,studyDir):
     return True
 
 
-def getDicom(ofb,row,zipDir,rawDir,im,imageSelector):
+def getDicom(ofb,row,zipDir,rawDir,im,imageSelector,\
+        participantField='PatientId'):
 
     #Load the dicom zip file and unzips it. If zip file is already at the expected path, it skips the loading step
 
@@ -97,7 +105,8 @@ def getDicom(ofb,row,zipDir,rawDir,im,imageSelector):
         return False
 
     print("{}: {}".format(im,seriesId))
-    fname=os.path.join(zipDir,getStudyLabel(row)+'_'+im+".zip");
+    fname=os.path.join(zipDir,\
+            getStudyLabel(row,participantField)+'_'+im+".zip");
 
     #copy data from orthanc
     if os.path.isfile(fname):
@@ -127,8 +136,9 @@ def getDicom(ofb,row,zipDir,rawDir,im,imageSelector):
 
     return True    
 
-def updateRow(project,dataset,row,imageResampledField,gzFileNames):
-    row['patientCode']=getPatientLabel(row)
+def updateRow(project,dataset,row,imageResampledField,gzFileNames,\
+        participantField='PatientId'):
+    row['patientCode']=getPatientLabel(row,participantField)
     row['visitCode']=getVisitLabel(row)
     for im in imageResampledField:
         row[imageResampledField[im]]=gzFileNames[im]
@@ -142,14 +152,15 @@ for row in ds["rows"]:
     #interesting files are processedDir/studyName_CT_notCropped_2mmVoxel.nii
     #asn processedDir/studyName_PET_notCropped_2mmVoxel.nii
     volumeFileNames={im:\
-            getStudyLabel(row)+'_'+im+
+            getStudyLabel(row,participantField)+'_'+im+
             '_notCropped_2mmVoxel.nii'\
                 for im in imageSelector}
     gzFileNames={im:f+".gz" \
             for (im,f) in volumeFileNames.items()}
     
     #build/check remote directory structure
-    remoteDir=fb.buildPathURL(project,['preprocessedImages',getPatientLabel(row),getVisitLabel(row)])
+    remoteDir=fb.buildPathURL(project,['preprocessedImages',\
+            getPatientLabel(row,participantField),getVisitLabel(row)])
 
     gzRemoteFiles={im:remoteDir+'/'+f\
             for (im,f) in gzFileNames.items()}
@@ -164,12 +175,12 @@ for row in ds["rows"]:
     if all(remoteFilePresent):
         print("Entry for row done.")
         updateRow(project,dataset,row,imageResampledField,\
-                gzFileNames)
+                gzFileNames,participantField)
         continue
 
     
     #setup the directory structure for preprocess_DM
-    studyDir=os.path.join(tempBase,getStudyLabel(row))
+    studyDir=os.path.join(tempBase,getStudyLabel(row,participantField))
     if not os.path.isdir(studyDir):
         os.mkdir(studyDir)
 
@@ -198,7 +209,8 @@ for row in ds["rows"]:
 
         for im in imageSelector:
             #checks if raw files are already loaded
-            getDicom(ofb,row,zipDir,rawDir,im,imageSelector)
+            getDicom(ofb,row,zipDir,rawDir,im,imageSelector,\
+                    participantField)
 
 
     
@@ -224,7 +236,8 @@ for row in ds["rows"]:
 
 
     #update row and let it know where the processed files are
-    updateRow(project,dataset,row,imageResampledField,gzFileNames)
+    updateRow(project,dataset,row,imageResampledField,gzFileNames,\
+            participantField)
    
 
     #cleanup