anonymize.py 3.8 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123
  1. import os
  2. import json
  3. import re
  4. import sys
  5. #nothing gets done if you do import
  6. def getPatientLabel(row,participantField='PatientId'):
  7. return row[participantField].replace('/','_')
  8. def getVisitLabel(row):
  9. return 'VISIT_'+str(int(row['SequenceNum']))
  10. def getStudyLabel(row,participantField='PatientId'):
  11. return getPatientLabel(row,participantField)+'-'+getVisitLabel(row)
  12. def main(parameterFile):
  13. fhome=os.path.expanduser('~')
  14. with open(os.path.join(fhome,".labkey","setup.json")) as f:
  15. setup=json.load(f)
  16. sys.path.insert(0,setup["paths"]["nixWrapper"])
  17. import nixWrapper
  18. nixWrapper.loadLibrary("labkeyInterface")
  19. import labkeyInterface
  20. import labkeyDatabaseBrowser
  21. import labkeyFileBrowser
  22. fconfig=os.path.join(fhome,'.labkey','onko-nix.json')
  23. net=labkeyInterface.labkeyInterface()
  24. net.init(fconfig)
  25. db=labkeyDatabaseBrowser.labkeyDB(net)
  26. fb=labkeyFileBrowser.labkeyFileBrowser(net)
  27. with open(parameterFile) as f:
  28. pars=json.load(f)
  29. hi=0
  30. project=pars['Database']['project']
  31. dataset=pars['Database']['queryName']
  32. schema=pars['Database']['schemaName']
  33. tempBase=os.path.join('/','data','nixUser','RIS')
  34. if not os.path.isdir(tempBase):
  35. os.mkdir(tempBase)
  36. participantField=pars['Database']['participantField']
  37. segmentation=pars['Database']['segementationQuery']
  38. #all images from database
  39. visitFilter={'variable':'visitCode','value':'VISIT_1','oper':'eq'}
  40. iodineFilter={'variable':'iodineContrast','value':'0','oper':'eq'}
  41. ds=db.selectRows(project,schema,dataset,[visitFilter,iodineFilter])
  42. #imageSelector={"CT":"CT_orthancId","PET":"PETWB_orthancId"}
  43. #output
  44. imageResampledField={"CT":"ctResampled","PET":"petResampled","patientmask":"ROImask"}
  45. #use webdav to transfer file (even though it is localhost)
  46. i=0
  47. n=len(ds['rows'])
  48. keys=[r[participantField] for r in ds['rows']]
  49. perm=numpy.random.permutation(n)
  50. pseudo={keys[i]:perm[i] for i in range(n)}
  51. for row in ds["rows"]:
  52. print("Starting row id:{} seq:{}".format(row[participantField],row['SequenceNum']))
  53. #interesting files are processedDir/studyName_CT_notCropped_2mmVoxel.nii
  54. #asn processedDir/studyName_PET_notCropped_2mmVoxel.nii
  55. idFilter={'variable':participantField,'value':row[participantField],'oper':'eq'}
  56. segFilter={'variable':'SequenceNum','value':'{}'.format(row['SequenceNum']),'oper':'eq'}
  57. ds=db.selectRows(project,schema,segmentation,[idFilter,segFilter])
  58. nS=len(ds['rows'])
  59. if nS==0:
  60. print('No segmentation found')
  61. continue
  62. if nS>1:
  63. print('Multiple segmentations found')
  64. continue
  65. maskField={'mask':'/'.join('Segmentations',ds['rows'][0]['latestFile'])}
  66. imageResampleField.update(maskField)
  67. #build/check remote directory structure
  68. remoteDir=fb.buildPathURL(project,['preprocessedImages',\
  69. getPatientLabel(row,participantField),getVisitLabel(row)])
  70. gzRemoteFiles={x:'/'.join(remoteDir,row[imageResampleField[x]]) for x in imageResampleField}
  71. for f in gzRemoteFiles.values():
  72. print("[{}]: [{}]".format(f,fb.entryExists(f)))
  73. localDir='patient{:03d}'.format(pseudo[row[participantField]])
  74. localDir=os.path.join(tempBase,localDir)
  75. if not os.path.isdir(localDir):
  76. os.mkdir(localDir)
  77. localFiles={x:os.path.join(localDir,'{}.nii.gz'.format(x)) for x in gzRemoteFiles}
  78. if not all(remoteFilePresent):
  79. print('Missing remote files')
  80. continue
  81. continue
  82. _=[fb.readFileToFile(gzRemoteFiles[x],localFiles[x]) for x in localFiles]
  83. if i==0:
  84. break
  85. i=i+1
  86. print("Done")
  87. if __name__ == '__main__':
  88. main(sys.argv[1])