anonymize.py 4.7 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143
  1. import os
  2. import json
  3. import re
  4. import sys
  5. import numpy
  6. import shutil
  7. #nothing gets done if you do import
  8. def getPatientLabel(row,participantField='PatientId'):
  9. return row[participantField].replace('/','_')
  10. def getVisitLabel(row):
  11. return 'VISIT_'+str(int(row['SequenceNum']))
  12. def getStudyLabel(row,participantField='PatientId'):
  13. return getPatientLabel(row,participantField)+'-'+getVisitLabel(row)
  14. def main(parameterFile):
  15. fhome=os.path.expanduser('~')
  16. with open(os.path.join(fhome,".labkey","setup.json")) as f:
  17. setup=json.load(f)
  18. sys.path.insert(0,setup["paths"]["nixWrapper"])
  19. import nixWrapper
  20. nixWrapper.loadLibrary("labkeyInterface")
  21. import labkeyInterface
  22. import labkeyDatabaseBrowser
  23. import labkeyFileBrowser
  24. fconfig=os.path.join(fhome,'.labkey','onko-nix.json')
  25. net=labkeyInterface.labkeyInterface()
  26. net.init(fconfig)
  27. db=labkeyDatabaseBrowser.labkeyDB(net)
  28. fb=labkeyFileBrowser.labkeyFileBrowser(net)
  29. with open(parameterFile) as f:
  30. pars=json.load(f)
  31. hi=0
  32. project=pars['Database']['project']
  33. dataset=pars['Database']['queryName']
  34. schema=pars['Database']['schemaName']
  35. #tempBase=os.path.join('/','data','nixUser','RIS')
  36. tempBase=os.path.join(fhome,'temp','RIS')
  37. if not os.path.isdir(tempBase):
  38. os.mkdir(tempBase)
  39. participantField=pars['Database']['participantField']
  40. segmentation=pars['Database']['segmentationQuery']
  41. #all images from database
  42. visitFilter={'variable':'visitCode','value':'VISIT_2','oper':'eq'}
  43. iodineFilter={'variable':'iodineContrast','value':'1','oper':'neq'}
  44. #for VISIT_1, also apply iodineFilter
  45. #qFilter=[visitFilter,iodineFilter]
  46. #for VISIT_2, iodineFilter has no meaning (shuld be false or blank, but or is hard to do)
  47. qFilter=[visitFilter]
  48. #shift generated patient names
  49. offset=100
  50. ds=db.selectRows(project,schema,dataset,qFilter)
  51. #imageSelector={"CT":"CT_orthancId","PET":"PETWB_orthancId"}
  52. #output
  53. imageResampledField={"CT":"ctResampled","PET":"petResampled"}
  54. #,"patientmask":"ROImask"}
  55. #use webdav to transfer file (even though it is localhost)
  56. i=0
  57. n=len(ds['rows'])
  58. keys=[r[participantField] for r in ds['rows']]
  59. perm=numpy.random.permutation(n)
  60. perm+=offset
  61. pseudo={keys[i]:perm[i] for i in range(n)}
  62. for row in ds["rows"]:
  63. print("Starting row id:{} seq:{}".format(row[participantField],row['SequenceNum']))
  64. #interesting files are processedDir/studyName_CT_notCropped_2mmVoxel.nii
  65. #asn processedDir/studyName_PET_notCropped_2mmVoxel.nii
  66. idFilter={'variable':participantField,'value':row[participantField],'oper':'eq'}
  67. segFilter={'variable':'SequenceNum','value':'{}'.format(row['SequenceNum']),'oper':'eq'}
  68. #adoma
  69. userFilter={'variable':'User','value':'1037','oper':'eq'}
  70. ds=db.selectRows(project,schema,segmentation,[idFilter,segFilter,userFilter])
  71. nS=len(ds['rows'])
  72. if nS==0:
  73. print('No segmentation found')
  74. continue
  75. if nS>1:
  76. print('Multiple segmentations found')
  77. continue
  78. maskFile={'mask':'/'.join(['Segmentations',ds['rows'][0]['latestFile']])}
  79. #build/check remote directory structure
  80. remoteDir=fb.buildPathURL(project,['preprocessedImages',\
  81. getPatientLabel(row,participantField),getVisitLabel(row)])
  82. remoteFiles={x:row[imageResampledField[x]] for x in imageResampledField}
  83. remoteFiles.update(maskFile)
  84. remoteFiles={x:'/'.join([remoteDir,remoteFiles[x]]) for x in remoteFiles}
  85. for f in remoteFiles.values():
  86. print("[{}]: [{}]".format(f,fb.entryExists(f)))
  87. patientALabel='patient{:03d}'.format(pseudo[row[participantField]])
  88. localDir=os.path.join(tempBase,patientALabel)
  89. if not os.path.isdir(localDir):
  90. os.mkdir(localDir)
  91. fileNames={x:'{}.nii.gz'.format(x) for x in remoteFiles}
  92. fileNames['mask']=fileNames['mask'].replace('nii.gz','nrrd')
  93. localFiles={x:os.path.join(localDir,fileNames[x]) for x in fileNames}
  94. remoteFilesPresent={x:fb.entryExists(remoteFiles[x]) for x in remoteFiles}
  95. if not all(remoteFilesPresent):
  96. print('Missing remote files')
  97. continue
  98. _=[fb.readFileToFile(remoteFiles[x],localFiles[x]) for x in localFiles]
  99. remoteADir=fb.buildPathURL(project,['anonymized',patientALabel])
  100. remoteAFiles={x:'/'.join([remoteADir,fileNames[x]]) for x in fileNames}
  101. _=[fb.writeFileToFile(localFiles[x],remoteAFiles[x]) for x in remoteAFiles]
  102. shutil.rmtree(localDir)
  103. if i==-1:
  104. break
  105. i=i+1
  106. print("Done")
  107. if __name__ == '__main__':
  108. main(sys.argv[1])