preprocess.py 6.4 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234
  1. import os
  2. import json
  3. import re
  4. import subprocess
  5. import nibabel
  6. import shutil
  7. import sys
  8. shome=os.path.expanduser('~nixUser')
  9. sys.path.insert(1,shome+'/software/src/labkeyInterface')
  10. import labkeyInterface
  11. import labkeyDatabaseBrowser
  12. import labkeyFileBrowser
  13. sys.path.insert(1,shome+'/software/src/orthancInterface')
  14. import orthancInterface
  15. import orthancFileBrowser
  16. #sys.path.insert(1,shome+'/software/src/IPNUMM/dicomUtils')
  17. #import loadDicom
  18. fhome=os.path.expanduser('~')
  19. fconfig=os.path.join(fhome,'.labkey','network.json')
  20. matlab=os.path.join("/","data","software","install","matlab","bin","matlab")
  21. generalCodes=os.path.join(fhome,"software","src","generalCodes")
  22. niftiTools=os.path.join(fhome,"software","src","NifTiScripts")
  23. net=labkeyInterface.labkeyInterface()
  24. net.init(fconfig)
  25. db=labkeyDatabaseBrowser.labkeyDB(net)
  26. fb=labkeyFileBrowser.labkeyFileBrowser(net)
  27. onet=orthancInterface.orthancInterface()
  28. onet.init(fconfig)
  29. ofb=orthancFileBrowser.orthancFileBrowser(onet)
  30. hi=0
  31. project='iPNUMMretro/Study'
  32. dataset='Imaging1'
  33. tempBase=os.path.join(fhome,'temp')
  34. #all images from database
  35. ds=db.selectRows(project,'study','Imaging1',[])
  36. imageSelector={"CT":"CT","PETWB":"PET"};
  37. imageResampledField={"CT":"ctResampled","PETWB":"petResampled"}
  38. niftiBase='/data/nifti'
  39. labkeyBase='/data/labkey'
  40. #projectNIfTIBase=os.path.join(labkeyBase,'files',project,'@files/nifti')
  41. #use webdav to transfer file (even though it is localhost)
  42. def getPatientLabel(row):
  43. return row['PatientId'].replace('/','_')
  44. def getVisitLabel(row):
  45. return 'VISIT_'+str(int(row['SequenceNum']))
  46. def getStudyLabel(row):
  47. return getPatientLabel(row)+'-'+getVisitLabel(row)
  48. def runPreprocess_DM(matlab,generalCodes,niftiTools,studyDir):
  49. #run after all directories have been assembled
  50. script="addpath('"+generalCodes+"');"
  51. script+="addpath('"+niftiTools+"');"
  52. script+="preprocess_DM('"+studyDir+"',0,0);"
  53. script+="test;"
  54. script+="quit();"
  55. #outText=subprocess.check_output(["/bin/echo",script])
  56. try:
  57. outText=subprocess.check_output([matlab,"-nojvm","-r",script])
  58. except subprocess.CalledProcessError as e:
  59. print("Failed with:\n{}".format(e.output.decode('utf-8')))
  60. return False
  61. print(outText.decode('utf-8'))
  62. return True
  63. def getDicom(ofb,row,zipDir,rawDir,im,imageSelector):
  64. #Load the dicom zip file and unzips it. If zip file is already at the expected path, it skips the loading step
  65. #Return True for valid outcome and False for troubles in row formating or unzip failures
  66. seriesId=row[im];
  67. if seriesId=="0":
  68. return False
  69. print("{}: {}".format(im,seriesId))
  70. fname=os.path.join(zipDir,getStudyLabel(row)+'_'+imageSelector[im]+".zip");
  71. #copy data from orthanc
  72. if os.path.isfile(fname):
  73. print("Data already loaded. Skipping")
  74. else:
  75. print("Loading data from orthanc")
  76. ofb.getZip('series',seriesId,fname)
  77. #unzip the zipped dicom series
  78. unzipDir=os.path.join(rawDir,imageSelector[im])
  79. if os.path.isdir(unzipDir):
  80. print("Data already unzipped")
  81. return True
  82. try:
  83. os.mkdir(unzipDir)
  84. except FileExistsError:
  85. shutil.rmtree(unzipDir)
  86. try:
  87. outTxt=subprocess.check_output(["unzip","-d",unzipDir,"-xj",fname])
  88. except subprocess.CalledProcessError:
  89. print("unzip failed for {}".format(fname))
  90. return False
  91. return True
  92. def updateRow(project,dataset,row,imageResampledField,gzFileNames):
  93. row['patientCode']=getPatientLabel(row)
  94. row['visitCode']=getVisitLabel(row)
  95. for im in imageResampledField:
  96. row[imageResampledField[im]]=gzFileNames[im]
  97. db.modifyRows('update',project,'study',dataset,[row])
  98. i=0
  99. for row in ds["rows"]:
  100. #interesting files are processedDir/studyName_CT_notCropped_2mmVoxel.nii
  101. #asn processedDir/studyName_PET_notCropped_2mmVoxel.nii
  102. volumeFileNames={im:\
  103. getStudyLabel(row)+'_'+imageSelector[im]+
  104. '_notCropped_2mmVoxel.nii'\
  105. for im in imageSelector}
  106. gzFileNames={im:f+".gz" \
  107. for (im,f) in volumeFileNames.items()}
  108. #build/check remote directory structure
  109. remoteDir=fb.buildPathURL(project,['preprocessedImages',getPatientLabel(row),getVisitLabel(row)])
  110. gzRemoteFiles={im:remoteDir+'/'+f\
  111. for (im,f) in gzFileNames.items()}
  112. remoteFilePresent=[fb.entryExists(f)\
  113. for f in gzRemoteFiles.values()]
  114. for f in gzRemoteFiles.values():
  115. print("[{}]: [{}]".format(f,fb.entryExists(f)))
  116. if all(remoteFilePresent):
  117. print("Entry for row done.")
  118. updateRow(project,dataset,row,imageResampledField,\
  119. gzFileNames)
  120. continue
  121. #setup the directory structure for preprocess_DM
  122. studyDir=os.path.join(tempBase,getStudyLabel(row))
  123. if not os.path.isdir(studyDir):
  124. os.mkdir(studyDir)
  125. rawDir=os.path.join(studyDir,'Raw')
  126. if not os.path.isdir(rawDir):
  127. os.mkdir(rawDir)
  128. zipDir=os.path.join(studyDir,'Zip')
  129. if not os.path.isdir(zipDir):
  130. os.mkdir(zipDir)
  131. processedDir=os.path.join(studyDir,'Processed')
  132. if not os.path.isdir(processedDir):
  133. os.mkdir(processedDir)
  134. #specify local file names with path
  135. volumeFiles={im:os.path.join(processedDir,f)\
  136. for (im.f) in volumeFileNames.items()}
  137. gzFiles={im:f+".gz"\
  138. for (im,f) in volumeFiles.items()}
  139. filesPresent=[os.path.isfile(f) for f in gzFiles.values()]
  140. if not all(filesPresent):
  141. for im in imageSelector:
  142. #checks if raw files are already loaded
  143. getDicom(ofb,row,zipDir,rawDir,im,imageSelector)
  144. #preprocess and zip
  145. ok=runPreprocess_DM(matlab,generalCodes,niftiTools,studyDir)
  146. if not ok:
  147. shutil.rmtree(studyDir)
  148. continue
  149. for f in volumeFiles.values():
  150. print("Running gzip {}".format(f))
  151. outText=subprocess.check_output(["/bin/gzip",f])
  152. print(outText.decode('utf-8'))
  153. #upload local files to remote
  154. for im in gzFiles:
  155. #for local,remote in zip(gzFiles,gzRemoteFiles):
  156. local=gzFiles[im]
  157. remote=gzRemoteFiles[im]
  158. print("Uploading {}".format(local))
  159. fb.writeFileToFile(local,remote)
  160. #update row and let it know where the processed files are
  161. updateRow(project,dataset,row,imageResampledField,gzFileNames)
  162. #cleanup
  163. shutil.rmtree(studyDir)
  164. if i==-1:
  165. break
  166. i=i+1
  167. print("Done")