preprocess.py 9.1 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308
  1. import os
  2. import json
  3. import re
  4. import subprocess
  5. import nibabel
  6. import shutil
  7. import sys
  8. import matplotlib.pyplot
  9. #nothing gets done if you do import
  10. def getPatientLabel(row,participantField='PatientId'):
  11. return row[participantField].replace('/','_')
  12. def getVisitLabel(row):
  13. return 'VISIT_'+str(int(row['SequenceNum']))
  14. def getStudyLabel(row,participantField='PatientId'):
  15. return getPatientLabel(row,participantField)+'-'+getVisitLabel(row)
  16. def runPreprocess_DM(matlab,generalCodes,niftiTools,studyDir):
  17. print("Running matlab")
  18. #run after all directories have been assembled
  19. script="addpath('"+generalCodes+"');"
  20. script+="addpath('"+niftiTools+"');"
  21. script+="preprocess_DM('"+studyDir+"',0,0);"
  22. script+="test;"
  23. script+="quit();"
  24. #outText=subprocess.check_output(["/bin/echo",script])
  25. try:
  26. outText=subprocess.check_output([matlab,"-nojvm","-r",script])
  27. except subprocess.CalledProcessError as e:
  28. print("Failed with:\n{}".format(e.output.decode('utf-8')))
  29. return False
  30. print(outText.decode('utf-8'))
  31. return True
  32. def getDicom(ofb,row,zipDir,rawDir,im,imageSelector,\
  33. participantField='PatientId'):
  34. #Load the dicom zip file and unzips it. If zip file is already at the expected path, it skips the loading step
  35. #Return True for valid outcome and False for troubles in row formating or unzip failures
  36. seriesId=row[imageSelector[im]];
  37. if seriesId=="0":
  38. return False
  39. if seriesId==None:
  40. return False
  41. print("getDicom: {}: {}".format(im,seriesId))
  42. fname=os.path.join(zipDir,\
  43. getStudyLabel(row,participantField)+'_'+im+".zip");
  44. #copy data from orthanc
  45. if os.path.isfile(fname):
  46. print("Data already loaded. Skipping")
  47. else:
  48. print("getDicom: Loading data from orthanc")
  49. ofb.getZip('series',seriesId,fname)
  50. #unzip the zipped dicom series
  51. unzipDir=os.path.join(rawDir,im)
  52. if os.path.isdir(unzipDir):
  53. print("Data already unzipped")
  54. return True
  55. try:
  56. os.mkdir(unzipDir)
  57. except FileExistsError:
  58. shutil.rmtree(unzipDir)
  59. try:
  60. outTxt=subprocess.check_output(["unzip","-d",unzipDir,"-xj",fname])
  61. except subprocess.CalledProcessError:
  62. print("unzip failed for {}".format(fname))
  63. return False
  64. return True
  65. def createOverlay(gzFileNames,overlayName):
  66. ct=nibabel.load(gzFileNames['CT'])
  67. pet=nibabel.load(gzFileNames['PET'])
  68. aCT=ct.get_fdata()
  69. aPET=pet.get_fdata()
  70. n2=int(aCT.shape[1]/2)
  71. matplotlib.pyplot.imshow(aCT[:,n2,:],cmap='gray')
  72. matplotlib.pyplot.imshow(aPET[:,n2,:],cmap='viridis',alpha=0.5)
  73. matplotlib.pyplot.savefig(overlayName)
  74. def updateRow(db,project,dataset,row,imageResampledField,gzFileNames,overlayName='overlay',\
  75. participantField='PatientId'):
  76. row['patientCode']=getPatientLabel(row,participantField)
  77. row['visitCode']=getVisitLabel(row)
  78. for im in imageResampledField:
  79. row[imageResampledField[im]]=gzFileNames[im]
  80. row['overlay']=overlayName
  81. db.modifyRows('update',project,'study',dataset,[row])
  82. def main(parameterFile):
  83. debug=False
  84. fhome=os.path.expanduser('~')
  85. with open(os.path.join(fhome,".labkey","setup.json")) as f:
  86. setup=json.load(f)
  87. sys.path.insert(0,setup["paths"]["nixWrapper"])
  88. import nixWrapper
  89. nixWrapper.loadLibrary("labkeyInterface")
  90. import labkeyInterface
  91. import labkeyDatabaseBrowser
  92. import labkeyFileBrowser
  93. nixWrapper.loadLibrary("orthancInterface")
  94. import orthancInterface
  95. import orthancFileBrowser
  96. fconfig=os.path.join(fhome,'.labkey','network.json')
  97. matlab=setup["paths"]["matlab"]
  98. generalCodes=setup["paths"]["generalCodes"]
  99. niftiTools=setup["paths"]["niftiTools"]
  100. gzip=setup['paths']['gzip']
  101. net=labkeyInterface.labkeyInterface()
  102. net.init(fconfig)
  103. db=labkeyDatabaseBrowser.labkeyDB(net)
  104. fb=labkeyFileBrowser.labkeyFileBrowser(net)
  105. onet=orthancInterface.orthancInterface()
  106. onet.init(fconfig)
  107. ofb=orthancFileBrowser.orthancFileBrowser(onet)
  108. with open(parameterFile) as f:
  109. pars=json.load(f)
  110. hi=0
  111. project=pars['Database']['project']
  112. dataset=pars['Database']['queryName']
  113. schema=pars['Database']['schemaName']
  114. tempBase=os.path.join(fhome,'temp')
  115. participantField=pars['Database']['participantField']
  116. #all images from database
  117. ds=db.selectRows(project,schema,dataset,[])
  118. imageSelector={"CT":"CT_orthancId","PET":"PETWB_orthancId"}
  119. #output
  120. imageResampledField={"CT":"ctResampled","PET":"petResampled","patientmask":"ROImask"}
  121. #use webdav to transfer file (even though it is localhost)
  122. i=0
  123. for row in ds["rows"]:
  124. pId=row[participantField]
  125. seqNo=row['SequenceNum']
  126. print("Starting row id:{} seq:{}".format(pId,seqNo))
  127. #interesting files are processedDir/studyName_CT_notCropped_2mmVoxel.nii
  128. #asn processedDir/studyName_PET_notCropped_2mmVoxel.nii
  129. volumeFileNames={im:\
  130. getStudyLabel(row,participantField)+'_'+im+
  131. '_notCropped_2mmVoxel.nii'\
  132. for im in imageResampledField}
  133. gzFileNames={im:f+".gz" \
  134. for (im,f) in volumeFileNames.items()}
  135. #build/check remote directory structure
  136. remoteDir=fb.buildPathURL(project,['preprocessedImages',\
  137. getPatientLabel(row,participantField),getVisitLabel(row)])
  138. overlayName=getStudyLabel(row,participantField)+'_overlay.png'
  139. gzRemoteFiles={im:remoteDir+'/'+f\
  140. for (im,f) in gzFileNames.items()}
  141. overlayRemote=remoteDir+'/'+overlayName
  142. remoteFilePresent=[fb.entryExists(f)\
  143. for f in gzRemoteFiles.values()]
  144. for f in gzRemoteFiles.values():
  145. print("[{}]: [{}]".format(f,fb.entryExists(f)))
  146. #force preprocessing if needsResampling is set to true
  147. overloadFlag='needsResampling'
  148. try:
  149. sampleOK=not row[overloadFlag]
  150. print(f'Using {overloadFlag} -> sampleOK={sampleOK}')
  151. except KeyError:
  152. sampleOK=True
  153. if sampleOK and all(remoteFilePresent):
  154. print("Entry for row done.")
  155. updateRow(db,project,dataset,row,imageResampledField,\
  156. gzFileNames,overlayName,participantField)
  157. continue
  158. #setup the directory structure for preprocess_DM
  159. studyDir=os.path.join(tempBase,getStudyLabel(row,participantField))
  160. print("Making local directories in {}".format(studyDir))
  161. if not os.path.isdir(studyDir):
  162. os.mkdir(studyDir)
  163. rawDir=os.path.join(studyDir,'Raw')
  164. if not os.path.isdir(rawDir):
  165. os.mkdir(rawDir)
  166. zipDir=os.path.join(studyDir,'Zip')
  167. if not os.path.isdir(zipDir):
  168. os.mkdir(zipDir)
  169. processedDir=os.path.join(studyDir,'Processed')
  170. if not os.path.isdir(processedDir):
  171. os.mkdir(processedDir)
  172. #specify local file names with path
  173. volumeFiles={im:os.path.join(processedDir,f)\
  174. for (im,f) in volumeFileNames.items()}
  175. gzFiles={im:f+".gz"\
  176. for (im,f) in volumeFiles.items()}
  177. filesPresent=[os.path.isfile(f) for f in gzFiles.values()]
  178. if not all(filesPresent):
  179. #use imageSelector -> inputs
  180. dicomStatus="OK"
  181. for im in imageSelector:
  182. #checks if raw files are already loaded
  183. ok=getDicom(ofb,row,zipDir,rawDir,im,imageSelector,\
  184. participantField)
  185. if not ok:
  186. dicomStatus="BAD"
  187. break
  188. if not dicomStatus=="OK":
  189. print('Troubles getting dicom for {}/{}'.format(pId,seqNo))
  190. continue
  191. #preprocess and zip
  192. ok=runPreprocess_DM(matlab,generalCodes,niftiTools,studyDir)
  193. if not ok:
  194. shutil.rmtree(studyDir)
  195. continue
  196. for f in volumeFiles.values():
  197. print("Running gzip {}".format(f))
  198. outText=subprocess.check_output([gzip,f])
  199. print(outText.decode('utf-8'))
  200. #create overlay
  201. overlayLocal=os.path.join(processedDir,overlayName)
  202. createOverlay(gzFiles,overlayLocal)
  203. #upload local files to remote
  204. for im in gzFiles:
  205. #for local,remote in zip(gzFiles,gzRemoteFiles):
  206. local=gzFiles[im]
  207. remote=gzRemoteFiles[im]
  208. print("Uploading {}".format(local))
  209. fb.writeFileToFile(local,remote)
  210. #upload overlay:
  211. print(f'Uploading {overlayLocal}')
  212. fb.writeFileToFile(overlayLocal,overlayRemote)
  213. #update row and let it know where the processed files are
  214. updateRow(db,project,dataset,row,imageResampledField,gzFileNames,overlayName,\
  215. participantField)
  216. #cleanup
  217. if not debug:
  218. shutil.rmtree(studyDir)
  219. if i==-1:
  220. break
  221. i=i+1
  222. print("Done")
  223. if __name__ == '__main__':
  224. main(sys.argv[1])