|
@@ -0,0 +1,142 @@
|
|
|
+import os
|
|
|
+import json
|
|
|
+import re
|
|
|
+import subprocess
|
|
|
+import nibabel
|
|
|
+import shutil
|
|
|
+import sys
|
|
|
+import numpy
|
|
|
+
|
|
|
+shome=os.path.expanduser('~nixUser')
|
|
|
+sys.path.insert(1,shome+'/software/src/labkeyInterface')
|
|
|
+import labkeyInterface
|
|
|
+import labkeyDatabaseBrowser
|
|
|
+
|
|
|
+
|
|
|
+fhome=os.path.expanduser('~')
|
|
|
+fconfig=os.path.join(fhome,'.labkey','network.json')
|
|
|
+
|
|
|
+net=labkeyInterface.labkeyInterface()
|
|
|
+net.init(fconfig)
|
|
|
+db=labkeyDatabaseBrowser.labkeyDB(net)
|
|
|
+
|
|
|
+
|
|
|
+hi=0
|
|
|
+project='iPNUMMretro/Study'
|
|
|
+#project='Orthanc/Database'
|
|
|
+
|
|
|
+labkeyBase='/data/labkey'
|
|
|
+#tempBase=os.path.join(fhome,'temp')
|
|
|
+
|
|
|
+#all images from database
|
|
|
+imageDataset='Imaging1'
|
|
|
+clinicalDataset='ClinicalData'
|
|
|
+anonymousClinicalDataset='AnonymousClinicalData'
|
|
|
+anonymousImagingDataset='AnonymousImaging'
|
|
|
+
|
|
|
+
|
|
|
+ds=db.selectRows(project,'study',clinicalDataset,[])
|
|
|
+
|
|
|
+
|
|
|
+fieldMatches={
|
|
|
+ 'SequenceNum':'SequenceNum',
|
|
|
+ 'datumRojstva':'birthDate',
|
|
|
+ 'DatumDiagnozeMetaM':'mmDiagnosisDate',
|
|
|
+ 'StarostObZacetkuIT':'itStartAge',
|
|
|
+ 'Spol':'sex',
|
|
|
+ 'Origo':'origo',
|
|
|
+ 'OrigoCode':'origoCode',
|
|
|
+ 'MStadij':'mStage',
|
|
|
+ 'MStadijCode':'mStageCode',
|
|
|
+ 'MetastatskeLokalizacije':'metastaticLocalization',
|
|
|
+ 'SteviloMetaLokalizacija':'metastaticLocalizationCount',
|
|
|
+ 'Genetika':'mutations',
|
|
|
+ 'DolocitevMut':'mutationDetermination',
|
|
|
+ 'PSObUvedbiIT':'psAtITIntroduction',
|
|
|
+ 'PridruzeneKronicneBolezni':'assocatedCronicDiseases',
|
|
|
+ 'PricetekIT':'itStart',
|
|
|
+ 'ZakljucekIT':'itEnd',
|
|
|
+ 'VzrokPrenehanje':'itEndCause',
|
|
|
+ 'StAplikacij':'applicationCount',
|
|
|
+ 'NajboljsiOdgovor':'bora',
|
|
|
+ 'DatumPD_PFS':'DatumPD_PFS',
|
|
|
+ 'VitalnoStanje':'vitalState',
|
|
|
+ 'DatumSmrti':'deathDate',
|
|
|
+ 'KozniIzpuscaj':'skinRash',
|
|
|
+ 'Vitiligo':'vitiligo',
|
|
|
+ 'Puritus':'puritus',
|
|
|
+ 'Hipotiroza':'hypotirosis',
|
|
|
+ 'Osteoartritis':'osteoartritis',
|
|
|
+ 'Diareja':'diarrhea',
|
|
|
+ 'Pnevmonitis':'pneumonitis',
|
|
|
+ 'Hepatitis':'hepatitis',
|
|
|
+ 'Fatigue':'fatigue',
|
|
|
+ 'DrugaTox':'otherTox',
|
|
|
+ 'LDH':'ldh',
|
|
|
+ 'S100':'s100',
|
|
|
+ 'KSSistemsko':'sistemicKS',
|
|
|
+ 'KSvzrokNiPrejemal':'ksNotAdministeredCause',
|
|
|
+ 'PETopazovanNU':'petMonitoredAE',
|
|
|
+ 'PETObelezilNU':'petDetectedAE',
|
|
|
+ 'PETPredSimZnaki':'earlyPETAEDetection',
|
|
|
+ 'UkrepanjeZaradiPET':'petRelatedAction',
|
|
|
+ 'DatumIzhodiscnegaPETPredIT':'initialPETDate',
|
|
|
+ 'noPETCT':'petCTCount',
|
|
|
+ 'DatumPrvegaPETZNU':'firstAEPETCTDate',
|
|
|
+ 'DatumPrvegaSimOzLab':'aeIdentificationDate',
|
|
|
+ 'DodatnePreiskavePolegPET':'aeAdditionalExams',
|
|
|
+ 'IzidNU':'aeOutcome'
|
|
|
+ }
|
|
|
+
|
|
|
+
|
|
|
+
|
|
|
+#randomize patientIDs
|
|
|
+patientList=[row['PatientId'] for row in ds['rows']]
|
|
|
+patientList=list(set(patientList))
|
|
|
+
|
|
|
+patientCodes={}
|
|
|
+perm=numpy.random.permutation(len(patientList))
|
|
|
+for i in numpy.arange(len(perm)):
|
|
|
+ code='A{:03d}'.format(perm[i])
|
|
|
+ patientCodes[patientList[i]]=code
|
|
|
+
|
|
|
+
|
|
|
+#anonymize clinical data
|
|
|
+for row in ds['rows']:
|
|
|
+ outRow={}
|
|
|
+ for f in fieldMatches:
|
|
|
+ outRow[fieldMatches[f]]=row[f]
|
|
|
+ #mask patientId
|
|
|
+ outRow['PatientId']=patientCodes[row['PatientId']]
|
|
|
+
|
|
|
+ db.modifyRows('insert',project,'study',anonymousClinicalDataset,[outRow])
|
|
|
+
|
|
|
+#anonymize image data
|
|
|
+
|
|
|
+ds=db.selectRows(project,'study',imageDataset,[])
|
|
|
+fields=['SequenceNum','studyDate']
|
|
|
+
|
|
|
+#for links
|
|
|
+projectAnonymousBase=os.path.join(labkeyBase,'files',project,'@files/anonymous')
|
|
|
+
|
|
|
+for row in ds['rows']:
|
|
|
+ outRow={}
|
|
|
+ for f in fields:
|
|
|
+ outRow[f]=row[f]
|
|
|
+ outRow['PatientId']=patientCodes[row['PatientId']]
|
|
|
+
|
|
|
+ #copy links
|
|
|
+
|
|
|
+ for f in ['CT','PETWB']:
|
|
|
+ #idealy we should use series uuid from dicom, this is a cludge
|
|
|
+ anonSeriesId='{}_{}_{:03.0f}'.\
|
|
|
+ format(f,outRow['PatientId'],row['SequenceNum'])
|
|
|
+ origFile=os.path.join(projectAnonymousBase,row[f]+'.zip')
|
|
|
+ modFile=os.path.join(projectAnonymousBase,anonSeriesId+'.zip')
|
|
|
+ subprocess.run(['cp','-d',origFile,modFile])
|
|
|
+ outRow[f+'_UUID']=anonSeriesId
|
|
|
+ outRow[f]='[DICOM]'
|
|
|
+
|
|
|
+ db.modifyRows('insert',project,'study',anonymousImagingDataset,[outRow])
|
|
|
+
|
|
|
+print('Done')
|