preprocess.py 8.6 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293
  1. import os
  2. import json
  3. import re
  4. import subprocess
  5. import nibabel
  6. import shutil
  7. import sys
  8. import nixWrapper
  9. #nothing gets done if you do import
  10. def getPatientLabel(row,participantField='PatientId'):
  11. return row[participantField].replace('/','_')
  12. def getVisitLabel(row):
  13. return 'VISIT_'+str(int(row['SequenceNum']))
  14. def getStudyLabel(row,participantField='PatientId'):
  15. return getPatientLabel(row,participantField)+'-'+getVisitLabel(row)
  16. def runPreprocess_DM(matlab,generalCodes,niftiTools,studyDir):
  17. print("Running matlab")
  18. #run after all directories have been assembled
  19. script="addpath('"+generalCodes+"');"
  20. script+="addpath('"+niftiTools+"');"
  21. script+="preprocess_DM('"+studyDir+"',0,0);"
  22. script+="test;"
  23. script+="quit();"
  24. #outText=subprocess.check_output(["/bin/echo",script])
  25. try:
  26. outText=subprocess.check_output([matlab,"-nojvm","-r",script])
  27. except subprocess.CalledProcessError as e:
  28. print("Failed with:\n{}".format(e.output.decode('utf-8')))
  29. return False
  30. print(outText.decode('utf-8'))
  31. return True
  32. def getDicom(pars,row,zipDir,rawDir,im,imageSelector):
  33. #Load the dicom zip file and unzips it. If zip file is already at the expected path, it skips the loading step
  34. #Return True for valid outcome and False for troubles in row formating or unzip failures
  35. participantField=pars.get('participantField','PatientId')
  36. ofb=pars['ofb']
  37. seriesId=row[imageSelector[im]];
  38. if seriesId=="0":
  39. return False
  40. print("getDicom: {}: {}".format(im,seriesId))
  41. fname=os.path.join(zipDir,\
  42. getStudyLabel(row,participantField)+'_'+im+".zip");
  43. #copy data from orthanc
  44. if os.path.isfile(fname):
  45. print("Data already loaded. Skipping")
  46. else:
  47. print("getDicom: Loading data from orthanc")
  48. #use study specific orthanc
  49. if row['movedToProjectOrthanc']=='TRUE':
  50. ofbStore=ofb.net.store()
  51. ofb.net.set(pars['orthanc'])
  52. ofb.getZip('series',seriesId,fname)
  53. #reset to global value
  54. if row['movedToProjectOrthanc']=='TRUE':
  55. ofb.net.set(ofbStore)
  56. #unzip the zipped dicom series
  57. unzipDir=os.path.join(rawDir,im)
  58. if os.path.isdir(unzipDir):
  59. print("Data already unzipped")
  60. return True
  61. try:
  62. os.mkdir(unzipDir)
  63. except FileExistsError:
  64. shutil.rmtree(unzipDir)
  65. try:
  66. outTxt=subprocess.check_output(["unzip","-d",unzipDir,"-xj",fname])
  67. except subprocess.CalledProcessError:
  68. print("unzip failed for {}".format(fname))
  69. return False
  70. return True
  71. def updateRow(db,project,dataset,row,imageResampledField,gzFileNames,\
  72. participantField='PatientId'):
  73. row['patientCode']=getPatientLabel(row,participantField)
  74. row['visitCode']=getVisitLabel(row)
  75. for im in imageResampledField:
  76. row[imageResampledField[im]]=gzFileNames[im]
  77. db.modifyRows('update',project,'study',dataset,[row])
  78. def main(parameterFile):
  79. fhome=os.path.expanduser('~')
  80. with open(os.path.join(fhome,".labkey","setup.json")) as f:
  81. setup=json.load(f)
  82. nixWrapper.loadLibrary("labkeyInterface")
  83. import labkeyInterface
  84. import labkeyDatabaseBrowser
  85. import labkeyFileBrowser
  86. nixWrapper.loadLibrary("orthancInterface")
  87. import orthancInterface
  88. import orthancFileBrowser
  89. fconfig=os.path.join(fhome,'.labkey','network.json')
  90. matlab=setup["paths"]["matlab"]
  91. generalCodes=setup["paths"]["generalCodes"]
  92. niftiTools=setup["paths"]["niftiTools"]
  93. gzip=setup['paths']['gzip']
  94. net=labkeyInterface.labkeyInterface()
  95. net.init(fconfig)
  96. db=labkeyDatabaseBrowser.labkeyDB(net)
  97. fb=labkeyFileBrowser.labkeyFileBrowser(net)
  98. onet=orthancInterface.orthancInterface()
  99. onet.init(fconfig)
  100. ofb=orthancFileBrowser.orthancFileBrowser(onet)
  101. with open(parameterFile) as f:
  102. pars=json.load(f)
  103. hi=0
  104. project=pars['Database']['project']
  105. dataset=pars['Database']['queryName']
  106. schema=pars['Database']['schemaName']
  107. tempBase=os.path.join(fhome,'temp')
  108. participantField=pars['Database']['participantField']
  109. dicomPars={'participantField':participantField,'ofb':ofb}
  110. ds=db.selectRows(project,'study','StudyProperties',[])
  111. orthancObj={'server':ds['rows'][0]['orthancURL'],
  112. 'user':ds['rows'][0]['orthancUser'],
  113. 'password':ds['rows'][0]['orthancPassword']}
  114. dicomPars['orhanc']=orthancObj
  115. #all images from database
  116. ds=db.selectRows(project,schema,dataset,[])
  117. imageSelector={"CT":"CT_orthancId","PET":"PETWB_orthancId"}
  118. #output
  119. imageResampledField={"CT":"ctResampled","PET":"petResampled","patientmask":"ROImask"}
  120. #use webdav to transfer file (even though it is localhost)
  121. i=0
  122. for row in ds["rows"]:
  123. #check if imageSelector fields are set
  124. orthancFields=[row[imageSelector[f]] for f in imageSelector]
  125. #what does labkey return for empty fields - undefined/None
  126. orthancFieldsOK=[f!=None for f in orthancFields]
  127. #here I assume it returns a string with length 0
  128. #orthancFieldsOK=[len(f)>0 for f in orthancFields]
  129. if not all(orthancFieldsOK):
  130. print('[{}/{}]: Missing orthanc fields:{}'.
  131. format(row[participantField],row['SequenceNum'],orthancFieldsOK))
  132. continue
  133. print("Starting row id:{} seq:{}".format(row[participantField],row['SequenceNum']))
  134. #interesting files are processedDir/studyName_CT_notCropped_2mmVoxel.nii
  135. #asn processedDir/studyName_PET_notCropped_2mmVoxel.nii
  136. volumeFileNames={im:\
  137. getStudyLabel(row,participantField)+'_'+im+
  138. '_notCropped_2mmVoxel.nii'\
  139. for im in imageResampledField}
  140. gzFileNames={im:f+".gz" \
  141. for (im,f) in volumeFileNames.items()}
  142. #build/check remote directory structure
  143. remoteDir=fb.buildPathURL(project,['preprocessedImages',\
  144. getPatientLabel(row,participantField),getVisitLabel(row)])
  145. gzRemoteFiles={im:remoteDir+'/'+f\
  146. for (im,f) in gzFileNames.items()}
  147. remoteFilePresent=[fb.entryExists(f)\
  148. for f in gzRemoteFiles.values()]
  149. for f in gzRemoteFiles.values():
  150. print("[{}]: [{}]".format(f,fb.entryExists(f)))
  151. if all(remoteFilePresent):
  152. print("Entry for row done.")
  153. updateRow(db,project,dataset,row,imageResampledField,\
  154. gzFileNames,participantField)
  155. continue
  156. #setup the directory structure for preprocess_DM
  157. studyDir=os.path.join(tempBase,getStudyLabel(row,participantField))
  158. print("Making local directories in {}".format(studyDir))
  159. if not os.path.isdir(studyDir):
  160. os.mkdir(studyDir)
  161. rawDir=os.path.join(studyDir,'Raw')
  162. if not os.path.isdir(rawDir):
  163. os.mkdir(rawDir)
  164. zipDir=os.path.join(studyDir,'Zip')
  165. if not os.path.isdir(zipDir):
  166. os.mkdir(zipDir)
  167. processedDir=os.path.join(studyDir,'Processed')
  168. if not os.path.isdir(processedDir):
  169. os.mkdir(processedDir)
  170. #specify local file names with path
  171. volumeFiles={im:os.path.join(processedDir,f)\
  172. for (im,f) in volumeFileNames.items()}
  173. gzFiles={im:f+".gz"\
  174. for (im,f) in volumeFiles.items()}
  175. filesPresent=[os.path.isfile(f) for f in gzFiles.values()]
  176. if not all(filesPresent):
  177. #use imageSelector -> inputs
  178. for im in imageSelector:
  179. #checks if raw files are already loaded
  180. getDicom(dicomPars,row,zipDir,rawDir,im,imageSelector)
  181. #preprocess and zip
  182. ok=runPreprocess_DM(matlab,generalCodes,niftiTools,studyDir)
  183. if not ok:
  184. shutil.rmtree(studyDir)
  185. continue
  186. for f in volumeFiles.values():
  187. print("Running gzip {}".format(f))
  188. outText=subprocess.check_output([gzip,f])
  189. print(outText.decode('utf-8'))
  190. #upload local files to remote
  191. for im in gzFiles:
  192. #for local,remote in zip(gzFiles,gzRemoteFiles):
  193. local=gzFiles[im]
  194. remote=gzRemoteFiles[im]
  195. print("Uploading {}".format(local))
  196. fb.writeFileToFile(local,remote)
  197. #update row and let it know where the processed files are
  198. updateRow(db,project,dataset,row,imageResampledField,gzFileNames,\
  199. participantField)
  200. #cleanup
  201. shutil.rmtree(studyDir)
  202. if i==-1:
  203. break
  204. i=i+1
  205. print("Done")
  206. if __name__ == '__main__':
  207. main(sys.argv[1])