preprocess.py 8.2 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281
  1. import os
  2. import json
  3. import re
  4. import subprocess
  5. import nibabel
  6. import shutil
  7. import sys
  8. #nothing gets done if you do import
  9. def getPatientLabel(row,participantField='PatientId'):
  10. return row[participantField].replace('/','_')
  11. def getVisitLabel(row):
  12. return 'VISIT_'+str(int(row['SequenceNum']))
  13. def getStudyLabel(row,participantField='PatientId'):
  14. return getPatientLabel(row,participantField)+'-'+getVisitLabel(row)
  15. def runPreprocess_DM(matlab,generalCodes,niftiTools,studyDir):
  16. print("Running matlab")
  17. #run after all directories have been assembled
  18. script="addpath('"+generalCodes+"');"
  19. script+="addpath('"+niftiTools+"');"
  20. script+="preprocess_DM('"+studyDir+"',0,0);"
  21. script+="test;"
  22. script+="quit();"
  23. #outText=subprocess.check_output(["/bin/echo",script])
  24. try:
  25. outText=subprocess.check_output([matlab,"-nojvm","-r",script])
  26. except subprocess.CalledProcessError as e:
  27. print("Failed with:\n{}".format(e.output.decode('utf-8')))
  28. return False
  29. print(outText.decode('utf-8'))
  30. return True
  31. def getDicom(ofb,row,zipDir,rawDir,im,imageSelector,\
  32. participantField='PatientId'):
  33. #Load the dicom zip file and unzips it. If zip file is already at the expected path, it skips the loading step
  34. #Return True for valid outcome and False for troubles in row formating or unzip failures
  35. seriesId=row[imageSelector[im]];
  36. if seriesId=="0":
  37. return False
  38. if seriesId==None:
  39. return False
  40. print("getDicom: {}: {}".format(im,seriesId))
  41. fname=os.path.join(zipDir,\
  42. getStudyLabel(row,participantField)+'_'+im+".zip");
  43. #copy data from orthanc
  44. if os.path.isfile(fname):
  45. print("Data already loaded. Skipping")
  46. else:
  47. print("getDicom: Loading data from orthanc")
  48. ofb.getZip('series',seriesId,fname)
  49. #unzip the zipped dicom series
  50. unzipDir=os.path.join(rawDir,im)
  51. if os.path.isdir(unzipDir):
  52. print("Data already unzipped")
  53. return True
  54. try:
  55. os.mkdir(unzipDir)
  56. except FileExistsError:
  57. shutil.rmtree(unzipDir)
  58. try:
  59. outTxt=subprocess.check_output(["unzip","-d",unzipDir,"-xj",fname])
  60. except subprocess.CalledProcessError:
  61. print("unzip failed for {}".format(fname))
  62. return False
  63. return True
  64. def updateRow(db,project,dataset,row,imageResampledField,gzFileNames,\
  65. participantField='PatientId'):
  66. row['patientCode']=getPatientLabel(row,participantField)
  67. row['visitCode']=getVisitLabel(row)
  68. for im in imageResampledField:
  69. row[imageResampledField[im]]=gzFileNames[im]
  70. db.modifyRows('update',project,'study',dataset,[row])
  71. def main(parameterFile):
  72. fhome=os.path.expanduser('~')
  73. with open(os.path.join(fhome,".labkey","setup.json")) as f:
  74. setup=json.load(f)
  75. sys.path.insert(0,setup["paths"]["nixWrapper"])
  76. import nixWrapper
  77. nixWrapper.loadLibrary("labkeyInterface")
  78. import labkeyInterface
  79. import labkeyDatabaseBrowser
  80. import labkeyFileBrowser
  81. nixWrapper.loadLibrary("orthancInterface")
  82. import orthancInterface
  83. import orthancFileBrowser
  84. fconfig=os.path.join(fhome,'.labkey','network.json')
  85. matlab=setup["paths"]["matlab"]
  86. generalCodes=setup["paths"]["generalCodes"]
  87. niftiTools=setup["paths"]["niftiTools"]
  88. gzip=setup['paths']['gzip']
  89. net=labkeyInterface.labkeyInterface()
  90. net.init(fconfig)
  91. db=labkeyDatabaseBrowser.labkeyDB(net)
  92. fb=labkeyFileBrowser.labkeyFileBrowser(net)
  93. onet=orthancInterface.orthancInterface()
  94. onet.init(fconfig)
  95. ofb=orthancFileBrowser.orthancFileBrowser(onet)
  96. with open(parameterFile) as f:
  97. pars=json.load(f)
  98. hi=0
  99. project=pars['Database']['project']
  100. dataset=pars['Database']['queryName']
  101. schema=pars['Database']['schemaName']
  102. tempBase=os.path.join(fhome,'temp')
  103. participantField=pars['Database']['participantField']
  104. #all images from database
  105. ds=db.selectRows(project,schema,dataset,[])
  106. imageSelector={"CT":"CT_orthancId","PET":"PETWB_orthancId"}
  107. #output
  108. imageResampledField={"CT":"ctResampled","PET":"petResampled","patientmask":"ROImask"}
  109. #use webdav to transfer file (even though it is localhost)
  110. i=0
  111. for row in ds["rows"]:
  112. pId=row[participantField]
  113. seqNo=row['SequenceNum']
  114. print("Starting row id:{} seq:{}".format(pId,seqNo))
  115. #interesting files are processedDir/studyName_CT_notCropped_2mmVoxel.nii
  116. #asn processedDir/studyName_PET_notCropped_2mmVoxel.nii
  117. volumeFileNames={im:\
  118. getStudyLabel(row,participantField)+'_'+im+
  119. '_notCropped_2mmVoxel.nii'\
  120. for im in imageResampledField}
  121. gzFileNames={im:f+".gz" \
  122. for (im,f) in volumeFileNames.items()}
  123. #build/check remote directory structure
  124. remoteDir=fb.buildPathURL(project,['preprocessedImages',\
  125. getPatientLabel(row,participantField),getVisitLabel(row)])
  126. gzRemoteFiles={im:remoteDir+'/'+f\
  127. for (im,f) in gzFileNames.items()}
  128. remoteFilePresent=[fb.entryExists(f)\
  129. for f in gzRemoteFiles.values()]
  130. for f in gzRemoteFiles.values():
  131. print("[{}]: [{}]".format(f,fb.entryExists(f)))
  132. #force preprocessing if needsResampling is set to true
  133. overloadFlag='needsResampling'
  134. try:
  135. sampleOK=not row[overloadFlag]
  136. print(f'Using {overloadFlag} -> sampleOK={sampleOK}')
  137. except KeyError:
  138. sampleOK=True
  139. if sampleOK and all(remoteFilePresent):
  140. print("Entry for row done.")
  141. updateRow(db,project,dataset,row,imageResampledField,\
  142. gzFileNames,participantField)
  143. continue
  144. #setup the directory structure for preprocess_DM
  145. studyDir=os.path.join(tempBase,getStudyLabel(row,participantField))
  146. print("Making local directories in {}".format(studyDir))
  147. if not os.path.isdir(studyDir):
  148. os.mkdir(studyDir)
  149. rawDir=os.path.join(studyDir,'Raw')
  150. if not os.path.isdir(rawDir):
  151. os.mkdir(rawDir)
  152. zipDir=os.path.join(studyDir,'Zip')
  153. if not os.path.isdir(zipDir):
  154. os.mkdir(zipDir)
  155. processedDir=os.path.join(studyDir,'Processed')
  156. if not os.path.isdir(processedDir):
  157. os.mkdir(processedDir)
  158. #specify local file names with path
  159. volumeFiles={im:os.path.join(processedDir,f)\
  160. for (im,f) in volumeFileNames.items()}
  161. gzFiles={im:f+".gz"\
  162. for (im,f) in volumeFiles.items()}
  163. filesPresent=[os.path.isfile(f) for f in gzFiles.values()]
  164. if not all(filesPresent):
  165. #use imageSelector -> inputs
  166. dicomStatus="OK"
  167. for im in imageSelector:
  168. #checks if raw files are already loaded
  169. ok=getDicom(ofb,row,zipDir,rawDir,im,imageSelector,\
  170. participantField)
  171. if not ok:
  172. dicomStatus="BAD"
  173. break
  174. if not dicomStatus=="OK":
  175. print('Troubles getting dicom for {}/{}'.format(pId,seqNo))
  176. continue
  177. #preprocess and zip
  178. ok=runPreprocess_DM(matlab,generalCodes,niftiTools,studyDir)
  179. if not ok:
  180. shutil.rmtree(studyDir)
  181. continue
  182. for f in volumeFiles.values():
  183. print("Running gzip {}".format(f))
  184. outText=subprocess.check_output([gzip,f])
  185. print(outText.decode('utf-8'))
  186. #upload local files to remote
  187. for im in gzFiles:
  188. #for local,remote in zip(gzFiles,gzRemoteFiles):
  189. local=gzFiles[im]
  190. remote=gzRemoteFiles[im]
  191. print("Uploading {}".format(local))
  192. fb.writeFileToFile(local,remote)
  193. #update row and let it know where the processed files are
  194. updateRow(db,project,dataset,row,imageResampledField,gzFileNames,\
  195. participantField)
  196. #cleanup
  197. shutil.rmtree(studyDir)
  198. if i==-1:
  199. break
  200. i=i+1
  201. print("Done")
  202. if __name__ == '__main__':
  203. main(sys.argv[1])