Преглед изворни кода

Adding database management routines

NIX User пре 4 година
родитељ
комит
1f7661b703

+ 53 - 0
pythonScripts/copyAnonymizedDataset.py

@@ -0,0 +1,53 @@
+import os
+import json
+import re
+import subprocess
+import shutil
+import sys
+import numpy
+import pydicom
+import copy
+
+shome=os.path.expanduser('~nixUser')
+sys.path.insert(1,shome+'/software/src/labkeyInterface')
+import labkeyInterface
+import labkeyDatabaseBrowser
+
+
+fhome=os.path.expanduser('~')
+fconfig=os.path.join(fhome,'.labkey','network.json')
+net=labkeyInterface.labkeyInterface()
+net.init(fconfig)
+db=labkeyDatabaseBrowser.labkeyDB(net)
+
+
+#also need merlin credentials
+
+fconfigMerlin=os.path.join(fhome,'.labkey','merlin.json')
+netMerlin=labkeyInterface.labkeyInterface()
+netMerlin.init(fconfigMerlin)
+dbMerlin=labkeyDatabaseBrowser.labkeyDB(netMerlin)
+
+
+
+project='iPNUMMretro/Study'
+merlinProject=project
+#project='Orthanc/Database'
+
+
+anonymousDataset='AnonymousClinicalData'
+merlinDataset='ClinicalData'
+
+#getNixID
+studyData=db.selectRows(project,'study','Study',[])
+nixID=studyData['rows'][0]['nixID']
+
+ds=db.selectRows(project,'study',anonymousDataset,[])
+
+for row in ds['rows']:
+    outRow=copy.deepcopy(row)
+    outRow['PatientId']='{}-{}'.format(nixID,row['PatientId'])
+    dbMerlin.modifyRows('insert',merlinProject,'study',merlinDataset,[outRow])
+
+print('Done')
+

+ 142 - 0
pythonScripts/copyAnonymizedImagesToOrthanc.py

@@ -0,0 +1,142 @@
+import os
+import json
+import re
+import subprocess
+import shutil
+import sys
+import numpy
+import pydicom
+import copy
+
+shome=os.path.expanduser('~nixUser')
+sys.path.insert(1,shome+'/software/src/labkeyInterface')
+import labkeyInterface
+import labkeyDatabaseBrowser
+
+sys.path.insert(1,shome+'/software/src/orthancInterface')
+import orthancInterface
+import orthancDatabaseBrowser
+
+sys.path.insert(1,shome+'/software/src/IPNUMM/dicomUtils')
+import loadDicom
+
+
+fhome=os.path.expanduser('~')
+fconfig=os.path.join(fhome,'.labkey','network.json')
+net=labkeyInterface.labkeyInterface()
+net.init(fconfig)
+db=labkeyDatabaseBrowser.labkeyDB(net)
+
+
+#also need merlin credentials
+
+fconfigMerlin=os.path.join(fhome,'.labkey','merlin.json')
+netMerlin=labkeyInterface.labkeyInterface()
+netMerlin.init(fconfigMerlin)
+dbMerlin=labkeyDatabaseBrowser.labkeyDB(netMerlin)
+
+onetMerlin=orthancInterface.orthancInterface()
+onetMerlin.init(fconfigMerlin)
+odbMerlin=orthancDatabaseBrowser.orthancDB(onetMerlin)
+
+
+project='iPNUMMretro/Study'
+merlinProject=project
+#project='Orthanc/Database'
+
+labkeyBase='/data/labkey'
+tempBase=os.path.join(fhome,'temp')
+
+#anonymousClinicalDataset='AnonymousClinicalData'
+anonymousImagingDataset='AnonymousImaging'
+merlinImagingDataset='Imaging'
+
+#getNixID
+studyData=db.selectRows(project,'study','Study',[])
+nixID=studyData['rows'][0]['nixID']
+
+ds=db.selectRows(project,'study',anonymousImagingDataset,[])
+
+#get UID generator
+uid=loadDicom.uuid()
+
+
+
+#paths 
+projectAnonymousBase=os.path.join(labkeyBase,'files',project,'@files/anonymous')
+i=0
+status="OK"
+for row in ds['rows']:
+    outRow=copy.deepcopy(row)
+    outRow['PatientId']='{}-{}'.format(nixID,row['PatientId'])
+
+    
+    studyUID=uid.generateStudyUUID('volume')
+    for iMod in ['CT','PETWB']:
+        anonSeriesId=row[iMod+'_UUID']
+        dicomZipFile=os.path.join(projectAnonymousBase,anonSeriesId+'.zip')
+        tempDir=os.path.join(tempBase,anonSeriesId)
+        try:
+            os.mkdir(tempDir)
+        except FileExistsError:
+            shutil.rmtree(tempDir)
+            os.mkdir(tempDir)
+
+        subprocess.run(['unzip','-d',tempDir,'-xj',dicomZipFile])
+        seriesUID=uid.generateSeriesUUID('volume')
+        for f in os.listdir(tempDir):
+            dicomFile=os.path.join(tempDir,f)
+            #modify patientId field?
+            dcm=pydicom.dcmread(dicomFile)
+            dcm['PatientID'].value='{}-{}'.format(nixID,row['PatientId'])
+            #make sure series and study UID are set:
+            try:
+                outRow['studyUUID']=dcm['StudyInstanceUID']
+            except KeyError:
+                dcm.StudyInstanceUID=studyUID
+                outRow['studyUUID']=studyUID
+
+            try:
+                outRow[iMod+'_UUID']=dcm['SeriesInstanceUID']
+            except KeyError:
+                dcm.SeriesInstanceUID=seriesUID
+                outRow[iMod+'_UUID']=seriesUID
+                
+            dcm.save_as(dicomFile)
+            print('Modified: {}'.format(dicomFile))
+            #send instance to orthanc
+            resp=odbMerlin.upload(dicomFile)
+            try:
+                if resp['status']=="FAIL":
+                    sys.exit()
+            except KeyError:
+                pass
+
+            outRow[iMod+'_orthancId']=resp['ParentSeries']
+            outRow['orthancId']=resp['ParentStudy']
+            outRow['patientOrthancId']=resp['ParentPatient']
+            
+            #keep track of the ID to change instance
+            print(resp)
+            if i==-1:
+                break
+        shutil.rmtree(tempDir)
+        if i==-1:
+            break
+    #check on studyUID for duplicates
+    studyFilter={'variable':'studyUUID','value':outRow['studyUUID'],'oper':'eq'}
+    dsMerlin=dbMerlin.selectRows(merlinProject,'study',\
+            merlinImagingDataset,[studyFilter])
+
+    mode='insert'
+    if len(dsMerlin['rows'])>0:
+        mode='update'
+
+    dbMerlin.modifyRows(mode,merlinProject,'study',\
+            merlinImagingDataset,[outRow])
+
+    if i==-1:
+        break
+    i=i+1
+
+print('Done')

+ 142 - 0
pythonScripts/makeAnonymizedDataset.py

@@ -0,0 +1,142 @@
+import os
+import json
+import re
+import subprocess
+import nibabel
+import shutil
+import sys
+import numpy
+
+shome=os.path.expanduser('~nixUser')
+sys.path.insert(1,shome+'/software/src/labkeyInterface')
+import labkeyInterface
+import labkeyDatabaseBrowser
+
+
+fhome=os.path.expanduser('~')
+fconfig=os.path.join(fhome,'.labkey','network.json')
+
+net=labkeyInterface.labkeyInterface()
+net.init(fconfig)
+db=labkeyDatabaseBrowser.labkeyDB(net)
+
+
+hi=0
+project='iPNUMMretro/Study'
+#project='Orthanc/Database'
+
+labkeyBase='/data/labkey'
+#tempBase=os.path.join(fhome,'temp')
+
+#all images from database
+imageDataset='Imaging1'
+clinicalDataset='ClinicalData'
+anonymousClinicalDataset='AnonymousClinicalData'
+anonymousImagingDataset='AnonymousImaging'
+
+
+ds=db.selectRows(project,'study',clinicalDataset,[])
+
+
+fieldMatches={
+        'SequenceNum':'SequenceNum',
+        'datumRojstva':'birthDate',
+        'DatumDiagnozeMetaM':'mmDiagnosisDate',
+        'StarostObZacetkuIT':'itStartAge',
+        'Spol':'sex',
+        'Origo':'origo',
+        'OrigoCode':'origoCode',
+        'MStadij':'mStage',
+        'MStadijCode':'mStageCode',
+        'MetastatskeLokalizacije':'metastaticLocalization',
+        'SteviloMetaLokalizacija':'metastaticLocalizationCount',
+        'Genetika':'mutations',
+        'DolocitevMut':'mutationDetermination',
+        'PSObUvedbiIT':'psAtITIntroduction',
+        'PridruzeneKronicneBolezni':'assocatedCronicDiseases',
+        'PricetekIT':'itStart',
+        'ZakljucekIT':'itEnd',
+        'VzrokPrenehanje':'itEndCause',
+        'StAplikacij':'applicationCount',
+        'NajboljsiOdgovor':'bora',
+        'DatumPD_PFS':'DatumPD_PFS',
+        'VitalnoStanje':'vitalState',
+        'DatumSmrti':'deathDate',
+        'KozniIzpuscaj':'skinRash',
+        'Vitiligo':'vitiligo',
+        'Puritus':'puritus',
+        'Hipotiroza':'hypotirosis',
+        'Osteoartritis':'osteoartritis',
+        'Diareja':'diarrhea',
+        'Pnevmonitis':'pneumonitis',
+        'Hepatitis':'hepatitis',
+        'Fatigue':'fatigue',
+        'DrugaTox':'otherTox',
+        'LDH':'ldh',
+        'S100':'s100',
+        'KSSistemsko':'sistemicKS',
+        'KSvzrokNiPrejemal':'ksNotAdministeredCause',
+        'PETopazovanNU':'petMonitoredAE',
+        'PETObelezilNU':'petDetectedAE',
+        'PETPredSimZnaki':'earlyPETAEDetection',
+        'UkrepanjeZaradiPET':'petRelatedAction',
+        'DatumIzhodiscnegaPETPredIT':'initialPETDate',
+        'noPETCT':'petCTCount',
+        'DatumPrvegaPETZNU':'firstAEPETCTDate',
+        'DatumPrvegaSimOzLab':'aeIdentificationDate',
+        'DodatnePreiskavePolegPET':'aeAdditionalExams',
+        'IzidNU':'aeOutcome'
+        }
+
+
+
+#randomize patientIDs
+patientList=[row['PatientId'] for row in ds['rows']]
+patientList=list(set(patientList))
+
+patientCodes={}
+perm=numpy.random.permutation(len(patientList))
+for i in numpy.arange(len(perm)):
+    code='A{:03d}'.format(perm[i])
+    patientCodes[patientList[i]]=code
+
+
+#anonymize clinical data
+for row in ds['rows']:
+    outRow={}
+    for f in fieldMatches:
+        outRow[fieldMatches[f]]=row[f]
+    #mask patientId
+    outRow['PatientId']=patientCodes[row['PatientId']]
+
+    db.modifyRows('insert',project,'study',anonymousClinicalDataset,[outRow])
+
+#anonymize image data
+
+ds=db.selectRows(project,'study',imageDataset,[])
+fields=['SequenceNum','studyDate']
+
+#for links
+projectAnonymousBase=os.path.join(labkeyBase,'files',project,'@files/anonymous')
+
+for row in ds['rows']:
+    outRow={}
+    for f in fields:
+        outRow[f]=row[f]
+    outRow['PatientId']=patientCodes[row['PatientId']]
+
+    #copy links
+
+    for f in ['CT','PETWB']:
+        #idealy we should use series uuid from dicom, this is a cludge
+        anonSeriesId='{}_{}_{:03.0f}'.\
+                format(f,outRow['PatientId'],row['SequenceNum'])
+        origFile=os.path.join(projectAnonymousBase,row[f]+'.zip')
+        modFile=os.path.join(projectAnonymousBase,anonSeriesId+'.zip')
+        subprocess.run(['cp','-d',origFile,modFile])
+        outRow[f+'_UUID']=anonSeriesId
+        outRow[f]='[DICOM]'
+
+    db.modifyRows('insert',project,'study',anonymousImagingDataset,[outRow])
+
+print('Done')

+ 115 - 0
pythonScripts/modifyPatients.py

@@ -0,0 +1,115 @@
+#a script to modify a patient from list. Current implementation deletes 
+#all data for patients identified by study regulatory number from study
+
+regulatoryNumber="X000"
+
+
+#basic python
+import os
+import subprocess
+import re
+import datetime
+import sys
+
+
+fhome=os.path.expanduser('~')
+
+sys.path.insert(1,fhome+'/software/src/labkeyInterface')
+import labkeyInterface
+import labkeyDatabaseBrowser
+
+
+net=labkeyInterface.labkeyInterface()
+net.init(fhome+'/.labkey/network.json')
+
+db=labkeyDatabaseBrowser.labkeyDB(net)
+
+#by default uses .labkey/Remote.json configuration
+
+project="IPNUMMprospektiva/Study"
+
+
+
+#study section ################
+
+
+
+#select patients enroled under regulatory number
+filters=[]
+regulatoryFilter={\
+    'variable':'regulatoryNumber',\
+    'value':regulatoryNumber,\
+    'oper':'eq'}
+filters.append(regulatoryFilter)
+
+ds=db.selectRows(project,"study","demographicData",filters)
+
+ids=[row['ParticipantId'] for row in ds['rows']]
+idCode=""
+for id in ids:
+    if len(idCode)>0:
+        idCode+=";"
+    idCode+=id
+
+idFilter={\
+    'variable':'ParticipantId',\
+    'value':idCode,\
+    'oper':'in'}
+
+#print("idCode: {}".format(idCode))
+
+dsts=db.selectRows(project,"study","Datasets",[])
+
+
+for row in dsts['rows']:
+
+    dsId=db.selectRows(project,"study",row['Name'],[idFilter])
+    rows=[r for r in dsId['rows']]
+    print("[{}]: {}".format(row['Name'],len(rows)))
+    if len(rows)==0:
+        continue
+    #this is for security only
+    #db.modifyRows('delete',project,"study",row['Name'],rows)
+
+
+
+# list section ########################
+
+
+ds=db.selectRows(project,"lists","crfEntry",[regulatoryFilter])
+masterRows=[row for row in ds['rows']]
+
+crfs=[row['entryId'] for row in masterRows]
+
+crfCode=""
+for crf in crfs:
+    if len(crfCode)>0:
+        crfCode+=";"
+    crfCode+=crf
+print("crfCode: {}".format(crfCode))
+
+
+crfFilter={\
+    'variable':'crfRef',\
+    'value':crfCode,\
+    'oper':'in'}
+
+dsts=db.selectRows(project,"lists","inputLists",[])
+
+for row in dsts['rows']:
+
+    dsId=db.selectRows(project,"lists",row['queryName'],[crfFilter])
+    rows=[r for r in dsId['rows']]
+    print("[{}]: {}".format(row['queryName'],len(rows)))
+    if len(rows)==0:
+        continue
+    #this is for security only
+    #db.modifyRows('delete',project,"lists",row['queryName'],rows)
+
+
+#this is for security only
+#ds=db.modifyRows('delete',project,"lists","crfEntry",masterRows)
+
+print("Done")
+quit()
+

+ 78 - 0
pythonScripts/populateImagingFromOrthanc.py

@@ -0,0 +1,78 @@
+import os
+import json
+import re
+import sys
+import datetime
+import re
+
+fhome=os.path.expanduser('~')
+sys.path.insert(1,fhome+'/software/src/labkeyInterface')
+import labkeyInterface
+import labkeyDatabaseBrowser
+
+fconfig=os.path.join(fhome,'.labkey','network.json')
+
+net=labkeyInterface.labkeyInterface()
+net.init(fconfig)
+db=labkeyDatabaseBrowser.labkeyDB(net)
+
+
+i=0
+projectOrthanc='Orthanc/Database'
+inputDataset='Imaging'
+projectStudy='iPNUMMretro/Study'
+outputDataset='Imaging1'
+
+ds=db.selectRows(projectOrthanc,'study',inputDataset,[])
+
+
+#single entry for the patientId/dicomStudy pair
+selectVars=['PatientId','dicomStudy']
+
+dates=[datetime.datetime.strptime(row['studyDate'],'%Y/%m/%d %H:%M:%S') for row in ds['rows']]
+idx=sorted(range(len(dates)),key=lambda k:dates[k])
+
+for j in range(len(dates)):
+    #row in ds['rows']:
+    row=ds['rows'][idx[j]]
+
+    #skip series which don't match selected filters
+    outvar='NONE'
+    sd=row['seriesDescription']
+    if sd=='PET WB':
+        outvar='PETWB'
+    if sd.find('CT WB')==0:
+        if sd.find('fov')<0:
+            outvar='CT'
+
+    if outvar=='NONE':
+        continue
+
+    filters=[]
+    for v in selectVars:
+        filters.append({'variable':v,'value':row[v],'oper':'eq'})
+    ds2=db.selectRows(projectStudy,'study',outputDataset,
+            [{'variable':'PatientId','value':row['PatientId'],'oper':'eq'}])
+    ds1=db.selectRows(projectStudy,'study',outputDataset,filters)
+    if len(ds1['rows'])>1:
+        print('ERROR: too many matches for {}/{}'.format(row['PatientId'],row['dicomStudy']))
+        continue
+    mode='update'
+    outRow={}
+    if len(ds1['rows'])==0:
+        mode='insert'
+        outRow['PatientId']=row['PatientId']
+        outRow['SequenceNum']=len(ds2['rows'])
+        outRow['dicomStudy']=row['dicomStudy']
+    else:
+        outRow=ds1['rows'][0]
+        
+    outRow[outvar]=row['orthancSeries']
+    outRow['studyDate']=row['studyDate']
+
+    status=db.modifyRows(mode,projectStudy,'study',outputDataset,[outRow])
+    print('{}'.format(status))
+    if j==50:
+        break
+        
+print("Done")