import os import json import re import subprocess import nibabel import shutil import sys import numpy shome=os.path.expanduser('~nixUser') sys.path.insert(1,shome+'/software/src/labkeyInterface') import labkeyInterface import labkeyDatabaseBrowser fhome=os.path.expanduser('~') fconfig=os.path.join(fhome,'.labkey','network.json') net=labkeyInterface.labkeyInterface() net.init(fconfig) db=labkeyDatabaseBrowser.labkeyDB(net) hi=0 project='iPNUMMretro/Study' #project='Orthanc/Database' labkeyBase='/data/labkey' #tempBase=os.path.join(fhome,'temp') #all images from database imageDataset='Imaging1' clinicalDataset='ClinicalData' anonymousClinicalDataset='AnonymousClinicalData' anonymousImagingDataset='AnonymousImaging' ds=db.selectRows(project,'study',clinicalDataset,[]) fieldMatches={ 'SequenceNum':'SequenceNum', 'datumRojstva':'birthDate', 'DatumDiagnozeMetaM':'mmDiagnosisDate', 'StarostObZacetkuIT':'itStartAge', 'Spol':'sex', 'Origo':'origo', 'OrigoCode':'origoCode', 'MStadij':'mStage', 'MStadijCode':'mStageCode', 'MetastatskeLokalizacije':'metastaticLocalization', 'SteviloMetaLokalizacija':'metastaticLocalizationCount', 'Genetika':'mutations', 'DolocitevMut':'mutationDetermination', 'PSObUvedbiIT':'psAtITIntroduction', 'PridruzeneKronicneBolezni':'assocatedCronicDiseases', 'PricetekIT':'itStart', 'ZakljucekIT':'itEnd', 'VzrokPrenehanje':'itEndCause', 'StAplikacij':'applicationCount', 'NajboljsiOdgovor':'bora', 'DatumPD_PFS':'DatumPD_PFS', 'VitalnoStanje':'vitalState', 'DatumSmrti':'deathDate', 'KozniIzpuscaj':'skinRash', 'Vitiligo':'vitiligo', 'Puritus':'puritus', 'Hipotiroza':'hypotirosis', 'Osteoartritis':'osteoartritis', 'Diareja':'diarrhea', 'Pnevmonitis':'pneumonitis', 'Hepatitis':'hepatitis', 'Fatigue':'fatigue', 'DrugaTox':'otherTox', 'LDH':'ldh', 'S100':'s100', 'KSSistemsko':'sistemicKS', 'KSvzrokNiPrejemal':'ksNotAdministeredCause', 'PETopazovanNU':'petMonitoredAE', 'PETObelezilNU':'petDetectedAE', 'PETPredSimZnaki':'earlyPETAEDetection', 'UkrepanjeZaradiPET':'petRelatedAction', 'DatumIzhodiscnegaPETPredIT':'initialPETDate', 'noPETCT':'petCTCount', 'DatumPrvegaPETZNU':'firstAEPETCTDate', 'DatumPrvegaSimOzLab':'aeIdentificationDate', 'DodatnePreiskavePolegPET':'aeAdditionalExams', 'IzidNU':'aeOutcome' } #randomize patientIDs patientList=[row['PatientId'] for row in ds['rows']] patientList=list(set(patientList)) patientCodes={} perm=numpy.random.permutation(len(patientList)) for i in numpy.arange(len(perm)): code='A{:03d}'.format(perm[i]) patientCodes[patientList[i]]=code #anonymize clinical data for row in ds['rows']: outRow={} for f in fieldMatches: outRow[fieldMatches[f]]=row[f] #mask patientId outRow['PatientId']=patientCodes[row['PatientId']] db.modifyRows('insert',project,'study',anonymousClinicalDataset,[outRow]) #anonymize image data ds=db.selectRows(project,'study',imageDataset,[]) fields=['SequenceNum','studyDate'] #for links projectAnonymousBase=os.path.join(labkeyBase,'files',project,'@files/anonymous') for row in ds['rows']: outRow={} for f in fields: outRow[f]=row[f] outRow['PatientId']=patientCodes[row['PatientId']] #copy links for f in ['CT','PETWB']: #idealy we should use series uuid from dicom, this is a cludge anonSeriesId='{}_{}_{:03.0f}'.\ format(f,outRow['PatientId'],row['SequenceNum']) origFile=os.path.join(projectAnonymousBase,row[f]+'.zip') modFile=os.path.join(projectAnonymousBase,anonSeriesId+'.zip') subprocess.run(['cp','-d',origFile,modFile]) outRow[f+'_UUID']=anonSeriesId outRow[f]='[DICOM]' db.modifyRows('insert',project,'study',anonymousImagingDataset,[outRow]) print('Done')