preprocess.py 7.3 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253
  1. import os
  2. import json
  3. import re
  4. import subprocess
  5. import nibabel
  6. import shutil
  7. import sys
  8. #nothing gets done if you do import
  9. def getPatientLabel(row,participantField='PatientId'):
  10. return row[participantField].replace('/','_')
  11. def getVisitLabel(row):
  12. return 'VISIT_'+str(int(row['SequenceNum']))
  13. def getStudyLabel(row,participantField='PatientId'):
  14. return getPatientLabel(row,participantField)+'-'+getVisitLabel(row)
  15. def runPreprocess_DM(matlab,generalCodes,niftiTools,studyDir):
  16. #run after all directories have been assembled
  17. script="addpath('"+generalCodes+"');"
  18. script+="addpath('"+niftiTools+"');"
  19. script+="preprocess_DM('"+studyDir+"',0,0);"
  20. script+="test;"
  21. script+="quit();"
  22. #outText=subprocess.check_output(["/bin/echo",script])
  23. try:
  24. outText=subprocess.check_output([matlab,"-nojvm","-r",script])
  25. except subprocess.CalledProcessError as e:
  26. print("Failed with:\n{}".format(e.output.decode('utf-8')))
  27. return False
  28. print(outText.decode('utf-8'))
  29. return True
  30. def getDicom(ofb,row,zipDir,rawDir,im,imageSelector,\
  31. participantField='PatientId'):
  32. #Load the dicom zip file and unzips it. If zip file is already at the expected path, it skips the loading step
  33. #Return True for valid outcome and False for troubles in row formating or unzip failures
  34. seriesId=row[imageSelector[im]];
  35. if seriesId=="0":
  36. return False
  37. print("{}: {}".format(im,seriesId))
  38. fname=os.path.join(zipDir,\
  39. getStudyLabel(row,participantField)+'_'+im+".zip");
  40. #copy data from orthanc
  41. if os.path.isfile(fname):
  42. print("Data already loaded. Skipping")
  43. else:
  44. print("Loading data from orthanc")
  45. ofb.getZip('series',seriesId,fname)
  46. #unzip the zipped dicom series
  47. unzipDir=os.path.join(rawDir,im)
  48. if os.path.isdir(unzipDir):
  49. print("Data already unzipped")
  50. return True
  51. try:
  52. os.mkdir(unzipDir)
  53. except FileExistsError:
  54. shutil.rmtree(unzipDir)
  55. try:
  56. outTxt=subprocess.check_output(["unzip","-d",unzipDir,"-xj",fname])
  57. except subprocess.CalledProcessError:
  58. print("unzip failed for {}".format(fname))
  59. return False
  60. return True
  61. def updateRow(project,dataset,row,imageResampledField,gzFileNames,\
  62. participantField='PatientId'):
  63. row['patientCode']=getPatientLabel(row,participantField)
  64. row['visitCode']=getVisitLabel(row)
  65. for im in imageResampledField:
  66. row[imageResampledField[im]]=gzFileNames[im]
  67. db.modifyRows('update',project,'study',dataset,[row])
  68. def main(parameterFile):
  69. shome=os.path.expanduser('~nixUser')
  70. fhome=os.path.expanduser('~')
  71. with open(os.path.join(fhome,".labkey","setup.json")) as f:
  72. setup=json.load(f)
  73. sys.path.insert(0,setup["paths"]["labkeyInterface"])
  74. import labkeyInterface
  75. import labkeyDatabaseBrowser
  76. import labkeyFileBrowser
  77. sys.path.insert(0,setup["paths"]["orthancInterface"])
  78. import orthancInterface
  79. import orthancFileBrowser
  80. fconfig=os.path.join(fhome,'.labkey','network.json')
  81. matlab=setup["paths"]["matlab"]
  82. generalCodes=setup["paths"]["generalCodes"]
  83. niftiTools=setup["paths"]["niftiTools"]
  84. net=labkeyInterface.labkeyInterface()
  85. net.init(fconfig)
  86. db=labkeyDatabaseBrowser.labkeyDB(net)
  87. fb=labkeyFileBrowser.labkeyFileBrowser(net)
  88. onet=orthancInterface.orthancInterface()
  89. onet.init(fconfig)
  90. ofb=orthancFileBrowser.orthancFileBrowser(onet)
  91. with open(parameterFile) as f:
  92. pars=json.load(f)
  93. hi=0
  94. project=pars['Database']['project']
  95. dataset=pars['Database']['queryName']
  96. schema=pars['Database']['schemaName']
  97. tempBase=os.path.join(fhome,'temp')
  98. participantField=pars['Database']['participantField']
  99. #all images from database
  100. ds=db.selectRows(project,schema,dataset,[])
  101. imageSelector={"CT":"CT_orthancId","PET":"PETWB_orthancId"}
  102. #output
  103. imageResampledField={"CT":"ctResampled","PET":"petResampled","patientmask":"ROImask"}
  104. #use webdav to transfer file (even though it is localhost)
  105. i=0
  106. for row in ds["rows"]:
  107. #interesting files are processedDir/studyName_CT_notCropped_2mmVoxel.nii
  108. #asn processedDir/studyName_PET_notCropped_2mmVoxel.nii
  109. volumeFileNames={im:\
  110. getStudyLabel(row,participantField)+'_'+im+
  111. '_notCropped_2mmVoxel.nii'\
  112. for im in imageResampledField}
  113. gzFileNames={im:f+".gz" \
  114. for (im,f) in volumeFileNames.items()}
  115. #build/check remote directory structure
  116. remoteDir=fb.buildPathURL(project,['preprocessedImages',\
  117. getPatientLabel(row,participantField),getVisitLabel(row)])
  118. gzRemoteFiles={im:remoteDir+'/'+f\
  119. for (im,f) in gzFileNames.items()}
  120. remoteFilePresent=[fb.entryExists(f)\
  121. for f in gzRemoteFiles.values()]
  122. for f in gzRemoteFiles.values():
  123. print("[{}]: [{}]".format(f,fb.entryExists(f)))
  124. if all(remoteFilePresent):
  125. print("Entry for row done.")
  126. updateRow(project,dataset,row,imageResampledField,\
  127. gzFileNames,participantField)
  128. continue
  129. #setup the directory structure for preprocess_DM
  130. studyDir=os.path.join(tempBase,getStudyLabel(row,participantField))
  131. if not os.path.isdir(studyDir):
  132. os.mkdir(studyDir)
  133. rawDir=os.path.join(studyDir,'Raw')
  134. if not os.path.isdir(rawDir):
  135. os.mkdir(rawDir)
  136. zipDir=os.path.join(studyDir,'Zip')
  137. if not os.path.isdir(zipDir):
  138. os.mkdir(zipDir)
  139. processedDir=os.path.join(studyDir,'Processed')
  140. if not os.path.isdir(processedDir):
  141. os.mkdir(processedDir)
  142. #specify local file names with path
  143. volumeFiles={im:os.path.join(processedDir,f)\
  144. for (im,f) in volumeFileNames.items()}
  145. gzFiles={im:f+".gz"\
  146. for (im,f) in volumeFiles.items()}
  147. filesPresent=[os.path.isfile(f) for f in gzFiles.values()]
  148. if not all(filesPresent):
  149. #use imageSelector -> inputs
  150. for im in imageSelector:
  151. #checks if raw files are already loaded
  152. getDicom(ofb,row,zipDir,rawDir,im,imageSelector,\
  153. participantField)
  154. #preprocess and zip
  155. ok=runPreprocess_DM(matlab,generalCodes,niftiTools,studyDir)
  156. if not ok:
  157. shutil.rmtree(studyDir)
  158. continue
  159. for f in volumeFiles.values():
  160. print("Running gzip {}".format(f))
  161. outText=subprocess.check_output(["/bin/gzip",f])
  162. print(outText.decode('utf-8'))
  163. #upload local files to remote
  164. for im in gzFiles:
  165. #for local,remote in zip(gzFiles,gzRemoteFiles):
  166. local=gzFiles[im]
  167. remote=gzRemoteFiles[im]
  168. print("Uploading {}".format(local))
  169. fb.writeFileToFile(local,remote)
  170. #update row and let it know where the processed files are
  171. updateRow(project,dataset,row,imageResampledField,gzFileNames,\
  172. participantField)
  173. #cleanup
  174. shutil.rmtree(studyDir)
  175. if i==-1:
  176. break
  177. i=i+1
  178. print("Done")
  179. if __name__ == '__main__':
  180. main(sys.argv[1])