preprocess.py 8.7 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297
  1. import os
  2. import json
  3. import re
  4. import subprocess
  5. import nibabel
  6. import shutil
  7. import sys
  8. import nixWrapper
  9. #nothing gets done if you do import
  10. def getPatientLabel(row,participantField='PatientId'):
  11. return row[participantField].replace('/','_')
  12. def getVisitLabel(row):
  13. return 'VISIT_'+str(int(row['SequenceNum']))
  14. def getStudyLabel(row,participantField='PatientId'):
  15. return getPatientLabel(row,participantField)+'-'+getVisitLabel(row)
  16. def runPreprocess_DM(matlab,generalCodes,niftiTools,studyDir):
  17. print("Running matlab")
  18. #run after all directories have been assembled
  19. script="addpath('"+generalCodes+"');"
  20. script+="addpath('"+niftiTools+"');"
  21. script+="preprocess_DM('"+studyDir+"',0,0);"
  22. script+="test;"
  23. script+="quit();"
  24. #outText=subprocess.check_output(["/bin/echo",script])
  25. try:
  26. outText=subprocess.check_output([matlab,"-nojvm","-r",script])
  27. except subprocess.CalledProcessError as e:
  28. print("Failed with:\n{}".format(e.output.decode('utf-8')))
  29. return False
  30. print(outText.decode('utf-8'))
  31. return True
  32. def getDicom(pars,row,zipDir,rawDir,im,imageSelector):
  33. #Load the dicom zip file and unzips it. If zip file is already at the expected path, it skips the loading step
  34. #Return True for valid outcome and False for troubles in row formating or unzip failures
  35. participantField=pars.get('participantField','PatientId')
  36. ofb=pars['ofb']
  37. if row['movedToProjectOrthanc']=='TRUE':
  38. ofbStore=ofb.store()
  39. ofb.set(pars['orthanc'])
  40. seriesId=row[imageSelector[im]];
  41. if seriesId=="0":
  42. return False
  43. print("getDicom: {}: {}".format(im,seriesId))
  44. fname=os.path.join(zipDir,\
  45. getStudyLabel(row,participantField)+'_'+im+".zip");
  46. #copy data from orthanc
  47. if os.path.isfile(fname):
  48. print("Data already loaded. Skipping")
  49. else:
  50. print("getDicom: Loading data from orthanc")
  51. #use study specific orthanc
  52. if row['movedToProjectOrthanc']=='TRUE':
  53. ofbStore=ofb.store()
  54. ofb.set(pars['orthanc'])
  55. ofb.getZip('series',seriesId,fname)
  56. #reset to global value
  57. if row['movedToProjectOrthanc']=='TRUE':
  58. ofb.set(ofbStore)
  59. #unzip the zipped dicom series
  60. unzipDir=os.path.join(rawDir,im)
  61. if os.path.isdir(unzipDir):
  62. print("Data already unzipped")
  63. return True
  64. try:
  65. os.mkdir(unzipDir)
  66. except FileExistsError:
  67. shutil.rmtree(unzipDir)
  68. try:
  69. outTxt=subprocess.check_output(["unzip","-d",unzipDir,"-xj",fname])
  70. except subprocess.CalledProcessError:
  71. print("unzip failed for {}".format(fname))
  72. return False
  73. return True
  74. def updateRow(db,project,dataset,row,imageResampledField,gzFileNames,\
  75. participantField='PatientId'):
  76. row['patientCode']=getPatientLabel(row,participantField)
  77. row['visitCode']=getVisitLabel(row)
  78. for im in imageResampledField:
  79. row[imageResampledField[im]]=gzFileNames[im]
  80. db.modifyRows('update',project,'study',dataset,[row])
  81. def main(parameterFile):
  82. fhome=os.path.expanduser('~')
  83. with open(os.path.join(fhome,".labkey","setup.json")) as f:
  84. setup=json.load(f)
  85. nixWrapper.loadLibrary("labkeyInterface")
  86. import labkeyInterface
  87. import labkeyDatabaseBrowser
  88. import labkeyFileBrowser
  89. nixWrapper.loadLibrary("orthancInterface")
  90. import orthancInterface
  91. import orthancFileBrowser
  92. fconfig=os.path.join(fhome,'.labkey','network.json')
  93. matlab=setup["paths"]["matlab"]
  94. generalCodes=setup["paths"]["generalCodes"]
  95. niftiTools=setup["paths"]["niftiTools"]
  96. gzip=setup['paths']['gzip']
  97. net=labkeyInterface.labkeyInterface()
  98. net.init(fconfig)
  99. db=labkeyDatabaseBrowser.labkeyDB(net)
  100. fb=labkeyFileBrowser.labkeyFileBrowser(net)
  101. onet=orthancInterface.orthancInterface()
  102. onet.init(fconfig)
  103. ofb=orthancFileBrowser.orthancFileBrowser(onet)
  104. with open(parameterFile) as f:
  105. pars=json.load(f)
  106. hi=0
  107. project=pars['Database']['project']
  108. dataset=pars['Database']['queryName']
  109. schema=pars['Database']['schemaName']
  110. tempBase=os.path.join(fhome,'temp')
  111. participantField=pars['Database']['participantField']
  112. dicomPars={'participantField':participantField,'ofb':ofb}
  113. ds=db.selectRows(project,'study','StudyProperties')
  114. orthancObj={'server':ds['rows'][0]['orthancURL'],
  115. 'user':ds['rows'][0]['orthancUser'],
  116. 'password':ds['rows'][0]['orthancPassword']}
  117. dicomPars['orhanc']=orthancObj
  118. #all images from database
  119. ds=db.selectRows(project,schema,dataset,[])
  120. imageSelector={"CT":"CT_orthancId","PET":"PETWB_orthancId"}
  121. #output
  122. imageResampledField={"CT":"ctResampled","PET":"petResampled","patientmask":"ROImask"}
  123. #use webdav to transfer file (even though it is localhost)
  124. i=0
  125. for row in ds["rows"]:
  126. #check if imageSelector fields are set
  127. orthancFields=[row[imageSelector[f]] for f in imageSelector]
  128. #what does labkey return for empty fields - undefined/None
  129. orthancFieldsOK=[f!=None for f in orthancFields]
  130. #here I assume it returns a string with length 0
  131. #orthancFieldsOK=[len(f)>0 for f in orthancFields]
  132. if not all(orthancFieldsOK):
  133. print('[{}/{}]: Missing orthanc fields:{}'.
  134. format(row[participantField],row['SequenceNum'],orthancFieldsOK))
  135. continue
  136. print("Starting row id:{} seq:{}".format(row[participantField],row['SequenceNum']))
  137. #interesting files are processedDir/studyName_CT_notCropped_2mmVoxel.nii
  138. #asn processedDir/studyName_PET_notCropped_2mmVoxel.nii
  139. volumeFileNames={im:\
  140. getStudyLabel(row,participantField)+'_'+im+
  141. '_notCropped_2mmVoxel.nii'\
  142. for im in imageResampledField}
  143. gzFileNames={im:f+".gz" \
  144. for (im,f) in volumeFileNames.items()}
  145. #build/check remote directory structure
  146. remoteDir=fb.buildPathURL(project,['preprocessedImages',\
  147. getPatientLabel(row,participantField),getVisitLabel(row)])
  148. gzRemoteFiles={im:remoteDir+'/'+f\
  149. for (im,f) in gzFileNames.items()}
  150. remoteFilePresent=[fb.entryExists(f)\
  151. for f in gzRemoteFiles.values()]
  152. for f in gzRemoteFiles.values():
  153. print("[{}]: [{}]".format(f,fb.entryExists(f)))
  154. if all(remoteFilePresent):
  155. print("Entry for row done.")
  156. updateRow(db,project,dataset,row,imageResampledField,\
  157. gzFileNames,participantField)
  158. continue
  159. #setup the directory structure for preprocess_DM
  160. studyDir=os.path.join(tempBase,getStudyLabel(row,participantField))
  161. print("Making local directories in {}".format(studyDir))
  162. if not os.path.isdir(studyDir):
  163. os.mkdir(studyDir)
  164. rawDir=os.path.join(studyDir,'Raw')
  165. if not os.path.isdir(rawDir):
  166. os.mkdir(rawDir)
  167. zipDir=os.path.join(studyDir,'Zip')
  168. if not os.path.isdir(zipDir):
  169. os.mkdir(zipDir)
  170. processedDir=os.path.join(studyDir,'Processed')
  171. if not os.path.isdir(processedDir):
  172. os.mkdir(processedDir)
  173. #specify local file names with path
  174. volumeFiles={im:os.path.join(processedDir,f)\
  175. for (im,f) in volumeFileNames.items()}
  176. gzFiles={im:f+".gz"\
  177. for (im,f) in volumeFiles.items()}
  178. filesPresent=[os.path.isfile(f) for f in gzFiles.values()]
  179. if not all(filesPresent):
  180. #use imageSelector -> inputs
  181. for im in imageSelector:
  182. #checks if raw files are already loaded
  183. getDicom(dicomPars,row,zipDir,rawDir,im,imageSelector,\
  184. participantField)
  185. #preprocess and zip
  186. ok=runPreprocess_DM(matlab,generalCodes,niftiTools,studyDir)
  187. if not ok:
  188. shutil.rmtree(studyDir)
  189. continue
  190. for f in volumeFiles.values():
  191. print("Running gzip {}".format(f))
  192. outText=subprocess.check_output([gzip,f])
  193. print(outText.decode('utf-8'))
  194. #upload local files to remote
  195. for im in gzFiles:
  196. #for local,remote in zip(gzFiles,gzRemoteFiles):
  197. local=gzFiles[im]
  198. remote=gzRemoteFiles[im]
  199. print("Uploading {}".format(local))
  200. fb.writeFileToFile(local,remote)
  201. #update row and let it know where the processed files are
  202. updateRow(db,project,dataset,row,imageResampledField,gzFileNames,\
  203. participantField)
  204. #cleanup
  205. shutil.rmtree(studyDir)
  206. if i==-1:
  207. break
  208. i=i+1
  209. print("Done")
  210. if __name__ == '__main__':
  211. main(sys.argv[1])