preprocess.py 6.9 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250
  1. import os
  2. import json
  3. import re
  4. import subprocess
  5. import nibabel
  6. import shutil
  7. import sys
  8. shome=os.path.expanduser('~nixUser')
  9. fhome=os.path.expanduser('~')
  10. with open(os.path.join(fhome,".labkey","setup.json")) as f:
  11. setup=json.load(f)
  12. sys.path.insert(0,setup["paths"]["labkeyInterface"])
  13. import labkeyInterface
  14. import labkeyDatabaseBrowser
  15. import labkeyFileBrowser
  16. sys.path.insert(0,setup["paths"]["orthancInterface"])
  17. import orthancInterface
  18. import orthancFileBrowser
  19. sys.path.insert(1,setup['paths']['analysisInterface'])
  20. import analysisInterface
  21. fconfig=os.path.join(fhome,'.labkey','network.json')
  22. matlab=setup["paths"]["matlab"]
  23. generalCodes=setup["paths"]["generalCodes"]
  24. niftiTools=setup["paths"]["niftiTools"]
  25. net=labkeyInterface.labkeyInterface()
  26. net.init(fconfig)
  27. db=labkeyDatabaseBrowser.labkeyDB(net)
  28. fb=labkeyFileBrowser.labkeyFileBrowser(net)
  29. onet=orthancInterface.orthancInterface()
  30. onet.init(fconfig)
  31. ofb=orthancFileBrowser.orthancFileBrowser(onet)
  32. parameterFile=sys.argv[1]
  33. runid=sys.argv[2]
  34. ana=analysisInterface.analysisInterface(db,fb,runid)
  35. ana.updateStatus(2)
  36. pars=ana.getParameters(parameterFile)
  37. hi=0
  38. project=pars['Database']['project']
  39. dataset=pars['Database']['queryName']
  40. schema=pars['Database']['schemaName']
  41. tempBase=os.path.join(fhome,'temp')
  42. participantField=pars['Database']['participantField']
  43. #all images from database
  44. ds=db.selectRows(project,schema,dataset,[])
  45. imageSelector={"CT":"CT_orthancId","PET":"PETWB_orthancId"}
  46. imageResampledField={"CT":"ctResampled","PET":"petResampled"}
  47. #use webdav to transfer file (even though it is localhost)
  48. def getPatientLabel(row,participantField='PatientId'):
  49. return row[participantField].replace('/','_')
  50. def getVisitLabel(row):
  51. return 'VISIT_'+str(int(row['SequenceNum']))
  52. def getStudyLabel(row,participantField='PatientId'):
  53. return getPatientLabel(row,participantField)+'-'+getVisitLabel(row)
  54. def runPreprocess_DM(matlab,generalCodes,niftiTools,studyDir):
  55. #run after all directories have been assembled
  56. script="addpath('"+generalCodes+"');"
  57. script+="addpath('"+niftiTools+"');"
  58. script+="preprocess_DM('"+studyDir+"',0,0);"
  59. script+="test;"
  60. script+="quit();"
  61. #outText=subprocess.check_output(["/bin/echo",script])
  62. try:
  63. outText=subprocess.check_output([matlab,"-nojvm","-r",script])
  64. except subprocess.CalledProcessError as e:
  65. print("Failed with:\n{}".format(e.output.decode('utf-8')))
  66. return False
  67. print(outText.decode('utf-8'))
  68. return True
  69. def getDicom(ofb,row,zipDir,rawDir,im,imageSelector,\
  70. participantField='PatientId'):
  71. #Load the dicom zip file and unzips it. If zip file is already at the expected path, it skips the loading step
  72. #Return True for valid outcome and False for troubles in row formating or unzip failures
  73. seriesId=row[imageSelector[im]];
  74. if seriesId=="0":
  75. return False
  76. print("{}: {}".format(im,seriesId))
  77. fname=os.path.join(zipDir,\
  78. getStudyLabel(row,participantField)+'_'+im+".zip");
  79. #copy data from orthanc
  80. if os.path.isfile(fname):
  81. print("Data already loaded. Skipping")
  82. else:
  83. print("Loading data from orthanc")
  84. ofb.getZip('series',seriesId,fname)
  85. #unzip the zipped dicom series
  86. unzipDir=os.path.join(rawDir,im)
  87. if os.path.isdir(unzipDir):
  88. print("Data already unzipped")
  89. return True
  90. try:
  91. os.mkdir(unzipDir)
  92. except FileExistsError:
  93. shutil.rmtree(unzipDir)
  94. try:
  95. outTxt=subprocess.check_output(["unzip","-d",unzipDir,"-xj",fname])
  96. except subprocess.CalledProcessError:
  97. print("unzip failed for {}".format(fname))
  98. return False
  99. return True
  100. def updateRow(project,dataset,row,imageResampledField,gzFileNames,\
  101. participantField='PatientId'):
  102. row['patientCode']=getPatientLabel(row,participantField)
  103. row['visitCode']=getVisitLabel(row)
  104. for im in imageResampledField:
  105. row[imageResampledField[im]]=gzFileNames[im]
  106. db.modifyRows('update',project,'study',dataset,[row])
  107. ana.updateStatus(3)
  108. i=0
  109. for row in ds["rows"]:
  110. #interesting files are processedDir/studyName_CT_notCropped_2mmVoxel.nii
  111. #asn processedDir/studyName_PET_notCropped_2mmVoxel.nii
  112. volumeFileNames={im:\
  113. getStudyLabel(row,participantField)+'_'+im+
  114. '_notCropped_2mmVoxel.nii'\
  115. for im in imageSelector}
  116. gzFileNames={im:f+".gz" \
  117. for (im,f) in volumeFileNames.items()}
  118. #build/check remote directory structure
  119. remoteDir=fb.buildPathURL(project,['preprocessedImages',\
  120. getPatientLabel(row,participantField),getVisitLabel(row)])
  121. gzRemoteFiles={im:remoteDir+'/'+f\
  122. for (im,f) in gzFileNames.items()}
  123. remoteFilePresent=[fb.entryExists(f)\
  124. for f in gzRemoteFiles.values()]
  125. for f in gzRemoteFiles.values():
  126. print("[{}]: [{}]".format(f,fb.entryExists(f)))
  127. if all(remoteFilePresent):
  128. print("Entry for row done.")
  129. updateRow(project,dataset,row,imageResampledField,\
  130. gzFileNames,participantField)
  131. continue
  132. #setup the directory structure for preprocess_DM
  133. studyDir=os.path.join(tempBase,getStudyLabel(row,participantField))
  134. if not os.path.isdir(studyDir):
  135. os.mkdir(studyDir)
  136. rawDir=os.path.join(studyDir,'Raw')
  137. if not os.path.isdir(rawDir):
  138. os.mkdir(rawDir)
  139. zipDir=os.path.join(studyDir,'Zip')
  140. if not os.path.isdir(zipDir):
  141. os.mkdir(zipDir)
  142. processedDir=os.path.join(studyDir,'Processed')
  143. if not os.path.isdir(processedDir):
  144. os.mkdir(processedDir)
  145. #specify local file names with path
  146. volumeFiles={im:os.path.join(processedDir,f)\
  147. for (im,f) in volumeFileNames.items()}
  148. gzFiles={im:f+".gz"\
  149. for (im,f) in volumeFiles.items()}
  150. filesPresent=[os.path.isfile(f) for f in gzFiles.values()]
  151. if not all(filesPresent):
  152. for im in imageSelector:
  153. #checks if raw files are already loaded
  154. getDicom(ofb,row,zipDir,rawDir,im,imageSelector,\
  155. participantField)
  156. #preprocess and zip
  157. ok=runPreprocess_DM(matlab,generalCodes,niftiTools,studyDir)
  158. if not ok:
  159. shutil.rmtree(studyDir)
  160. continue
  161. for f in volumeFiles.values():
  162. print("Running gzip {}".format(f))
  163. outText=subprocess.check_output(["/bin/gzip",f])
  164. print(outText.decode('utf-8'))
  165. #upload local files to remote
  166. for im in gzFiles:
  167. #for local,remote in zip(gzFiles,gzRemoteFiles):
  168. local=gzFiles[im]
  169. remote=gzRemoteFiles[im]
  170. print("Uploading {}".format(local))
  171. fb.writeFileToFile(local,remote)
  172. #update row and let it know where the processed files are
  173. updateRow(project,dataset,row,imageResampledField,gzFileNames,\
  174. participantField)
  175. #cleanup
  176. shutil.rmtree(studyDir)
  177. if i==-1:
  178. break
  179. i=i+1
  180. ana.updateStatus(4)
  181. print("Done")