123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142 |
- import os
- import json
- import re
- import subprocess
- import nibabel
- import shutil
- import sys
- import numpy
- shome=os.path.expanduser('~nixUser')
- sys.path.insert(1,shome+'/software/src/labkeyInterface')
- import labkeyInterface
- import labkeyDatabaseBrowser
- fhome=os.path.expanduser('~')
- fconfig=os.path.join(fhome,'.labkey','network.json')
- net=labkeyInterface.labkeyInterface()
- net.init(fconfig)
- db=labkeyDatabaseBrowser.labkeyDB(net)
- hi=0
- project='iPNUMMretro/Study'
- #project='Orthanc/Database'
- labkeyBase='/data/labkey'
- #tempBase=os.path.join(fhome,'temp')
- #all images from database
- imageDataset='Imaging1'
- clinicalDataset='ClinicalData'
- anonymousClinicalDataset='AnonymousClinicalData'
- anonymousImagingDataset='AnonymousImaging'
- ds=db.selectRows(project,'study',clinicalDataset,[])
- fieldMatches={
- 'SequenceNum':'SequenceNum',
- 'datumRojstva':'birthDate',
- 'DatumDiagnozeMetaM':'mmDiagnosisDate',
- 'StarostObZacetkuIT':'itStartAge',
- 'Spol':'sex',
- 'Origo':'origo',
- 'OrigoCode':'origoCode',
- 'MStadij':'mStage',
- 'MStadijCode':'mStageCode',
- 'MetastatskeLokalizacije':'metastaticLocalization',
- 'SteviloMetaLokalizacija':'metastaticLocalizationCount',
- 'Genetika':'mutations',
- 'DolocitevMut':'mutationDetermination',
- 'PSObUvedbiIT':'psAtITIntroduction',
- 'PridruzeneKronicneBolezni':'assocatedCronicDiseases',
- 'PricetekIT':'itStart',
- 'ZakljucekIT':'itEnd',
- 'VzrokPrenehanje':'itEndCause',
- 'StAplikacij':'applicationCount',
- 'NajboljsiOdgovor':'bora',
- 'DatumPD_PFS':'DatumPD_PFS',
- 'VitalnoStanje':'vitalState',
- 'DatumSmrti':'deathDate',
- 'KozniIzpuscaj':'skinRash',
- 'Vitiligo':'vitiligo',
- 'Puritus':'puritus',
- 'Hipotiroza':'hypotirosis',
- 'Osteoartritis':'osteoartritis',
- 'Diareja':'diarrhea',
- 'Pnevmonitis':'pneumonitis',
- 'Hepatitis':'hepatitis',
- 'Fatigue':'fatigue',
- 'DrugaTox':'otherTox',
- 'LDH':'ldh',
- 'S100':'s100',
- 'KSSistemsko':'sistemicKS',
- 'KSvzrokNiPrejemal':'ksNotAdministeredCause',
- 'PETopazovanNU':'petMonitoredAE',
- 'PETObelezilNU':'petDetectedAE',
- 'PETPredSimZnaki':'earlyPETAEDetection',
- 'UkrepanjeZaradiPET':'petRelatedAction',
- 'DatumIzhodiscnegaPETPredIT':'initialPETDate',
- 'noPETCT':'petCTCount',
- 'DatumPrvegaPETZNU':'firstAEPETCTDate',
- 'DatumPrvegaSimOzLab':'aeIdentificationDate',
- 'DodatnePreiskavePolegPET':'aeAdditionalExams',
- 'IzidNU':'aeOutcome'
- }
- #randomize patientIDs
- patientList=[row['PatientId'] for row in ds['rows']]
- patientList=list(set(patientList))
- patientCodes={}
- perm=numpy.random.permutation(len(patientList))
- for i in numpy.arange(len(perm)):
- code='A{:03d}'.format(perm[i])
- patientCodes[patientList[i]]=code
- #anonymize clinical data
- for row in ds['rows']:
- outRow={}
- for f in fieldMatches:
- outRow[fieldMatches[f]]=row[f]
- #mask patientId
- outRow['PatientId']=patientCodes[row['PatientId']]
- db.modifyRows('insert',project,'study',anonymousClinicalDataset,[outRow])
- #anonymize image data
- ds=db.selectRows(project,'study',imageDataset,[])
- fields=['SequenceNum','studyDate']
- #for links
- projectAnonymousBase=os.path.join(labkeyBase,'files',project,'@files/anonymous')
- for row in ds['rows']:
- outRow={}
- for f in fields:
- outRow[f]=row[f]
- outRow['PatientId']=patientCodes[row['PatientId']]
- #copy links
- for f in ['CT','PETWB']:
- #idealy we should use series uuid from dicom, this is a cludge
- anonSeriesId='{}_{}_{:03.0f}'.\
- format(f,outRow['PatientId'],row['SequenceNum'])
- origFile=os.path.join(projectAnonymousBase,row[f]+'.zip')
- modFile=os.path.join(projectAnonymousBase,anonSeriesId+'.zip')
- subprocess.run(['cp','-d',origFile,modFile])
- outRow[f+'_UUID']=anonSeriesId
- outRow[f]='[DICOM]'
- db.modifyRows('insert',project,'study',anonymousImagingDataset,[outRow])
- print('Done')
|