anonymizeSegmentations.py 5.5 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198
  1. import os
  2. import json
  3. import re
  4. import subprocess
  5. import nibabel
  6. import shutil
  7. import sys
  8. import datetime
  9. if len(sys.argv)<2:
  10. print("Usage {} version(v1 or similar)".format(sys.argv[0]))
  11. sys.exit(0)
  12. #sourceDir=sys.argv[1]
  13. ver=sys.argv[1]
  14. shome=os.path.expanduser('~nixUser')
  15. fhome=os.path.expanduser('~')
  16. with open(os.path.join(fhome,".labkey","setup.json")) as f:
  17. setup=json.load(f)
  18. sys.path.insert(0,setup["paths"]["labkeyInterface"])
  19. import labkeyInterface
  20. import labkeyDatabaseBrowser
  21. import labkeyFileBrowser
  22. #sys.path.insert(1,shome+'/software/src/IPNUMM/dicomUtils')
  23. #import loadDicom
  24. #onko
  25. configOnko=os.path.join(fhome,'.labkey','onko-nix.json')
  26. netOnko=labkeyInterface.labkeyInterface()
  27. netOnko.init(configOnko)
  28. dbOnko=labkeyDatabaseBrowser.labkeyDB(netOnko)
  29. fbOnko=labkeyFileBrowser.labkeyFileBrowser(netOnko)
  30. #merlin
  31. configMerlin=os.path.join(fhome,'.labkey','merlin.json')
  32. netMerlin=labkeyInterface.labkeyInterface()
  33. netMerlin.init(configMerlin)
  34. dbMerlin=labkeyDatabaseBrowser.labkeyDB(netMerlin)
  35. fbMerlin=labkeyFileBrowser.labkeyFileBrowser(netMerlin)
  36. projectOnko='iPNUMMretro/Study'
  37. datasetOnko='Imaging1'
  38. projectMerlin='iPNUMMretro/Study'
  39. datasetMerlin='Imaging1'
  40. tempBase=os.path.join(fhome,'temp')
  41. #all images from database
  42. #imageSelector={"CT":"CT","PET":"PETWB"};
  43. imageResampledField={"Segm":"Segmentation"}
  44. fileCode="Segm"
  45. participantField='PatientId'
  46. visitCode='visitCode'
  47. #for prosepective
  48. #participantField='ParticipantId'
  49. #generate key from known imaging dates
  50. def getImagingDates(db,project,dataset,participant,\
  51. participantField='PatientId'):
  52. idFilter={'variable':participantField,\
  53. 'value':participant,
  54. 'oper':'eq'}
  55. ds=db.selectRows(project,'study',dataset,[idFilter])
  56. return [datetime.datetime.strptime(row['studyDate'], '%Y/%m/%d %H:%M:%S')\
  57. for row in ds['rows']]
  58. def getImagingDatesDictionary(db,project,dataset,\
  59. participantField='PatientId'):
  60. ds=db.selectRows(project,'study',dataset,[])
  61. participants=[row[participantField] for row in ds['rows']]
  62. #unique
  63. participants=list(set(participants))
  64. return {p:getImagingDates(db,project,dataset,p,participantField)\
  65. for p in participants}
  66. def imagingDatesMatch(dateList1,dateList2):
  67. n=min(len(dateList1),len(dateList2))
  68. datesEqual=[abs(d2-d1)<datetime.timedelta(days=1) \
  69. for d1,d2 in zip(dateList1[:n],dateList2[:n])]
  70. return all(datesEqual)
  71. def findMatch(dateList,dateDictionary):
  72. for p in dateDictionary:
  73. if imagingDatesMatch(dateList,dateDictionary[p]):
  74. return p
  75. return None
  76. #use webdav to transfer file (even though it is localhost)
  77. def getPatientLabel(row,participantField='PatientId'):
  78. return row[participantField].replace('/','_')
  79. def getVisitLabel(row):
  80. return 'VISIT_'+str(int(row['SequenceNum']))
  81. def getStudyLabel(row,participantField='PatientId'):
  82. return getPatientLabel(row,participantField)+'-'+getVisitLabel(row)
  83. def updateRow(db,project,dataset,row,imageResampledField,gzFileNames):
  84. for im in imageResampledField:
  85. row[imageResampledField[im]]=gzFileNames[im]
  86. db.modifyRows('update',project,'study',dataset,[row])
  87. dOnko=getImagingDatesDictionary(dbOnko,projectOnko,datasetOnko,\
  88. participantField)
  89. dMerlin=getImagingDatesDictionary(dbMerlin,projectMerlin,datasetMerlin,\
  90. participantField)
  91. i=0
  92. idMatch={d0:findMatch(dOnko[d0],dMerlin) for d0 in dOnko}
  93. for id in idMatch:
  94. print("{} : {}".format(id,idMatch[id]))
  95. dsOnko=dbOnko.selectRows(projectOnko,'study',datasetOnko,[])
  96. im=list(imageResampledField.keys())[0]
  97. for rowOnko in dsOnko["rows"]:
  98. segFileOnko=rowOnko[imageResampledField[im]]
  99. dirOnko=fbOnko.buildPathURL(projectOnko,\
  100. ['preprocessedImages',getPatientLabel(rowOnko,participantField),\
  101. getVisitLabel(rowOnko)])
  102. fileOnko=dirOnko+'/'+segFileOnko
  103. if not fbOnko.entryExists(fileOnko):
  104. print("Original file {} not available".format(fileOnko))
  105. continue
  106. idFilter={'variable':participantField,\
  107. 'value':idMatch[rowOnko[participantField]],\
  108. 'oper':'eq'}
  109. visitFilter={'variable':visitCode,
  110. 'value':rowOnko[visitCode],
  111. 'oper':'eq'}
  112. dsMerlin=dbMerlin.selectRows(projectMerlin,'study',datasetMerlin,\
  113. [idFilter,visitFilter])
  114. if len(dsMerlin['rows'])==0:
  115. continue
  116. rowMerlin=dsMerlin['rows'][0]
  117. segFileMerlin=getStudyLabel(rowMerlin,participantField)+'_'+\
  118. im+'_'+ver+'.nii.gz'
  119. dirMerlin=fbMerlin.buildPathURL(projectMerlin,\
  120. ['preprocessedImages',getPatientLabel(rowMerlin,participantField),\
  121. getVisitLabel(rowMerlin)])
  122. fileMerlin=dirMerlin+'/'+segFileMerlin
  123. gzTargetFiles={im:segFileMerlin}
  124. if fbMerlin.entryExists(fileMerlin):
  125. print("Target file {} already uploaded".format(fileMerlin))
  126. updateRow(dbMerlin, projectMerlin,datasetMerlin,rowMerlin,\
  127. imageResampledField,gzTargetFiles)
  128. continue
  129. #upload file
  130. localFile=os.path.join(tempBase,segFileMerlin)
  131. print("Local {}".format(localFile))
  132. fbOnko.readFileToFile(fileOnko,localFile)
  133. fbMerlin.writeFileToFile(localFile,fileMerlin)
  134. print("Remote {}".format(fileMerlin))
  135. os.remove(localFile)
  136. #update row and let it know where the processed files are
  137. updateRow(dbMerlin, projectMerlin,datasetMerlin,rowMerlin,
  138. imageResampledField,gzTargetFiles)
  139. if i==-1:
  140. break
  141. i=i+1
  142. print("Done")