Browse Source

Adding statistics infrastructure

Andrej Studen @ VBOX 2 years ago
parent
commit
ab1be334d6

File diff suppressed because it is too large
+ 14 - 0
pythonScripts/checkData.ipynb


+ 60 - 0
pythonScripts/runStat.py

@@ -0,0 +1,60 @@
+import statUtils
+import os
+import radiomics
+import SimpleITK
+import sys
+
+def main(parFile='../templates/statistics.json'):
+    setup=statUtils.loadSetup(parFile)
+    setup['db'],setup['fb']=statUtils.connectDB('onko-nix')
+    users=statUtils.getUsers(setup['db'],setup['project'])
+    qFilter=[]
+    try:
+        vList=';'.join(setup['participants'])    
+        qFilter.append({'variable':'ParticipantId','value':vList,'oper':'in'})
+    except KeyError:
+        pass
+    ds=setup['db'].selectRows(setup['project'],'study','Imaging1',qFilter)
+    if not os.path.isdir(setup['localDir']):
+        os.mkdir(setup['localDir'])
+    #select just the first row; debugging
+    rows=ds['rows']
+    setup['values']=['COM','MTV','TLG','SUVmean','SUVmax']
+    params=os.path.join('..','templates','radiomics.yaml')
+    setup['featureExtractor']=radiomics.featureextractor.RadiomicsFeatureExtractor(params)
+    for r in rows:
+        print(r)
+        for q in ['petResampled']:
+            localPath=statUtils.getImage(setup,r,q)
+        if localPath=="NONE":
+            continue
+        segPaths=statUtils.getSegmentations(setup,r)
+        if "NONE" in segPaths.values():
+            os.remove(localPath)
+            continue
+        segKeys=list(segPaths.keys())
+        for x in segPaths:
+            print('Loaded {}/{}'.format(users[x],segPaths[x]))
+        pet=SimpleITK.ReadImage(localPath)
+        seg={x:SimpleITK.ReadImage(segPaths[x]) for x in segPaths}
+        #find labels associated with each (non-overlaping) segmentation
+        ids=statUtils.getSegments(list(seg.values())[0])
+        for id in ids:
+            print('{} {}'.format(id,ids[id]))
+            for x in seg:
+                try:
+                    output=statUtils.getRadiomicsComponentStats(setup,pet,seg[x],ids[id])
+                except ValueError:
+                    continue
+                output.update({x:r[x] for x in ['ParticipantId','SequenceNum','patientCode','visitCode']})
+                output['User']=x
+                output['segment']=ids[id]
+                statUtils.updateDatasetRows(setup['db'],setup['project'],'SUVanalysis',[output])
+                #print(output)
+        #cleanup
+        for x in segPath:
+            os.remove(segPath[x])
+        os.remove(localPath)
+
+if __name__=='__main__':
+    main(sys.argv[1])

+ 218 - 0
pythonScripts/statUtils.py

@@ -0,0 +1,218 @@
+#load required libraries
+import sys
+import os
+import SimpleITK
+import json
+
+#you should get nixSuite via git clone https://git0.fmf.uni-lj.si/studen/nixSuite.git
+#if you don't put it to $HOME/software/src/, you should update the path
+nixSuite=os.path.join(os.path.expanduser('~'),'software','src','nixSuite')
+sys.path.append(os.path.join(nixSuite,'wrapper'))
+import nixWrapper
+nixWrapper.loadLibrary('labkeyInterface')
+import labkeyInterface
+import labkeyDatabaseBrowser
+import labkeyFileBrowser
+
+def connectDB(server):
+    #check connectivity. This checks the configuration in $HOME/.labkey/network.json, 
+    #where paths to certificates are stored
+    net=labkeyInterface.labkeyInterface()
+    fconfig=os.path.join(os.path.expanduser('~'),'.labkey','{}.json'.format(server))
+    net.init(fconfig)
+    #this reports the certificate used
+    try:
+        print('Using: {}'.format(net.connectionConfig['SSL']['user']))
+    except KeyError:
+        pass
+    #This gets a deafult CSRF code; It should report user name plus a long string of random hex numbers
+    net.getCSRF()
+    db=labkeyDatabaseBrowser.labkeyDB(net)
+    fb=labkeyFileBrowser.labkeyFileBrowser(net)
+    return db,fb
+
+def getUsers(db,project):
+    ds=db.selectRows(project,'core','Users',[])
+    users={x['UserId']:x['DisplayName'] for x in ds['rows']}
+    for u in users:
+        print('{} {}'.format(u,users[u]))
+    return users
+
+def getImage(setup, row, field, extraPath=None):
+    fb=setup['fb']
+    pathList=[setup['imageDir'],row['patientCode'],row['visitCode']]
+    if extraPath!=None:
+        pathList.append(extraPath)
+    pathList.append(row[field])
+    remotePath='/'.join(pathList)
+    urlPath=fb.formatPathURL(setup['project'],remotePath)
+    localPath=os.path.join(setup['localDir'],row[field])
+    if os.path.isfile(localPath):
+        print('{} done'.format(localPath))
+    else:
+        if not fb.entryExists(urlPath):
+            print('No file {}'.format(urlPath))
+            return "NONE"
+        fb.readFileToFile(urlPath,localPath)
+    return localPath
+
+def getSegmentations(setup,row):
+    idField=setup['idField']
+    visitField=setup['visitField']
+    qFilter=[{'variable':x,'value':row[x],'oper':'eq'} for x in [idField,visitField]]
+    dsSeg=setup['db'].selectRows(setup['project'],'study','Segmentations',qFilter)
+    if len(dsSeg['rows'])<1:
+        print('Failed to find segmentation for {}/{}'.format(row[idField],row[visitField]))
+        return {0:"NONE"}
+    return {r['User']:getImage(setup,r,'latestFile',extraPath='Segmentations') for r in dsSeg['rows']}
+
+def loadSetup(path):
+    with open(path,'r') as f:
+        setup=json.load(f)
+    fC=[x for x in setup.keys() if x.find('Dir')>-1]
+    for q in fC:
+        setup[q]=setup[q].replace('_home_',os.path.expanduser('~'))
+    return setup
+
+def getSegments(image):
+    keys=image.GetMetaDataKeys()
+    i=0
+    ids={}
+    while True:
+        id='Segment{}_ID'.format(i)
+        value='Segment{}_LabelValue'.format(i)
+        try:
+            ids[image.GetMetaData(id)]=int(image.GetMetaData(value))
+        except RuntimeError:
+            break
+        i+=1
+    return ids
+    
+def getStats(image):
+    print(image.GetSize())
+    print(image.GetOrigin())
+    print(image.GetSpacing())
+    print(image.GetNumberOfComponentsPerPixel())
+    
+    
+def getSegmentStats(pet,seg,label):
+    o=getComponents(seg,label)
+    cc=o['image']
+    shape_stats = SimpleITK.LabelShapeStatisticsImageFilter()
+    #shape_stats.ComputeOrientedBoundingBoxOn()
+    shape_stats.Execute(cc)
+
+    intensity_stats = SimpleITK.LabelIntensityStatisticsImageFilter()
+    intensity_stats.Execute(cc,pet) 
+    
+    #output
+    out=[(shape_stats.GetPhysicalSize(i),
+    intensity_stats.GetMean(i),
+    intensity_stats.GetStandardDeviation(i),
+    intensity_stats.GetSkewness(i)) for i in shape_stats.GetLabels()]
+    print(out)
+    
+def getComponents(seg,label):
+    cc=SimpleITK.Threshold(seg,lower=label,upper=label)
+    ccFilter=SimpleITK.ConnectedComponentImageFilter()
+    cc1=ccFilter.Execute(cc)
+    return {'image':cc1, 'count':ccFilter.GetObjectCount()}
+
+def drop_array(v):
+    return float(v)
+    #if type(v) is numpy.ndarray:
+    #    return v[0]
+    #return v
+
+def getSUVmax(vals):
+    return drop_array(vals['original_firstorder_Maximum'])
+
+def getSUVmean(vals):
+    return drop_array(vals['original_firstorder_Mean'])
+
+def getMTV(vals):
+    try:
+        return drop_array(vals['original_shape_MeshVolume'])
+    except KeyError:
+        return drop_array(vals['original_shape_VoxelVolume'])
+    
+def getTLG(vals):
+    V=vals['original_shape_VoxelVolume']
+    return V*getSUVmean(vals)
+
+def getCOM(vals):
+    return vals['diagnostics_Mask-original_CenterOfMassIndex']
+
+def getValue(vals,valueName):
+    if valueName=='SUVmean':
+        return getSUVmean(vals)
+    if valueName=='SUVmax':
+        return getSUVmax(vals)
+    if valueName=='MTV':
+        return getMTV(vals)
+    if valueName=='TLG':
+        return getTLG(vals)
+    if valueName=='COM':
+        return getCOM(vals)
+    return 0
+
+
+def getRadiomicsStats(setup,pet,seg,label):
+    o=getComponents(seg,label)
+    cc=o['image']
+    n=o['count']
+    output={}
+    for i in range(n):
+        output[i]=getRadiomicsComponentStats(setup,pet,seg,label)
+    return output
+        
+def getRadiomicsComponentStats(setup,pet,cc,label):
+    vals=setup['featureExtractor'].execute(pet,cc,label=label)
+    output={x:getValue(vals,x) for x in setup['values']}
+    #for v in vals:
+    #    print('{}: {}'.format(v,vals[v]))
+    for c in output:
+        print('{}: {}'.format(c,output[c]))
+    return output
+
+def findMatchingComponent(o,a,b,label):
+    statFilter=SimpleITK.StatisticsImageFilter()
+    overlap_measures_filter = SimpleITK.LabelOverlapMeasuresImageFilter()
+    print('{}: [{}]:{} [{}]:{}'.format(label,a,o[a]['count'],b,o[b]['count']))
+    comps={v:{x+1:o[v]['image']==x+1 for x in range(o[v]['count'])} for v in [a,b]}
+    stat={}
+    for comp in comps:
+        stat[comp]={}
+        for x in comps[comp]:
+            cc=comps[comp][x]
+            statFilter.Execute(cc)
+            stat[comp][x]={'sum':statFilter.GetSum()}
+    
+    for c in comps[a]:
+        cc=comps[a][c]
+        print('{}:{}'.format(c,stat[a][c]['sum']))
+        for d in comps[b]:
+            cc1=comps[b][d]
+            overlap_measures_filter.Execute(cc, cc1)
+            print('\t {}:{} {}'.format(d,stat[b][d]['sum'],overlap_measures_filter.GetDiceCoefficient()))
+
+def evaluateByLesions(pet,seg,ids):
+     for id in ids:
+        print('{}: {}'.format(id,ids[id]))
+        o={x:getComponents(seg[x],ids[id]) for x in seg}
+        a=segKeys[0]
+        for x in segKeys[1:]:
+            findMatchingComponent(o,a,x,ids[id])
+
+def updateDatasetRows(db,project,dataset,rows,filterVars=['ParticipantId','SequenceNum']):
+    for r in rows:
+        r['SequenceNum']+=0.01*r['segment']
+        qFilter=[{'variable':x,'value':''.format(r[x]),'oper':'eq'} for x in filterVars]
+        ds=db.selectRows(project,'study',dataset,qFilter)
+        if len(ds['rows'])>0:
+            row=ds['rows'][0]
+            row.update({x:r[x] for x in r if x not in filterVars})
+            db.modifyRows('update',project,'study',dataset,[row])
+        else:
+            db.modifyRows('insert',project,'study',dataset,[r])
+        

+ 36 - 0
templates/radiomics.yaml

@@ -0,0 +1,36 @@
+setting:
+  binWidth: 25
+  label: 1
+  interpolator: 'sitkBSpline' # This is an enumerated value, here None is not allowed
+  resampledPixelSpacing: # This disables resampling, as it is interpreted as None, to enable it, specify spacing in x, y, z as [x, y , z]
+  weightingNorm: # If no value is specified, it is interpreted as None
+  voxelArrayShift: 0
+
+# Image types to use: "Original" for unfiltered image, for possible filters, see documentation.
+imageType:
+  Original: {} 
+  # for dictionaries / mappings, None values are not allowed, '{}' is interpreted as an empty dictionary
+
+# Featureclasses, from which features must be calculated. If a featureclass is not mentioned, no features are calculated
+# for that class. Otherwise, the specified features are calculated, or, if none are specified, all are calculated (excluding redundant/deprecated features).
+featureClass:
+  # redundant Compactness 1, Compactness 2 an Spherical Disproportion features are disabled by default, they can be
+  # enabled by specifying individual feature names (as is done for glcm) and including them in the list.
+  shape: 
+     - 'VoxelVolume' #total volume
+     - 'MeshVolume' #volume from triangular mesh
+     - 'SurfaceArea' #area of enclosed shape
+  firstorder: 
+  # specifying an empty list has the same effect as specifying nothing.
+     - 'Maximum' #SUVmax
+     - 'Mean' #SUVmean
+  glcm:   []
+  # Disable SumAverage by specifying all other GLCM features available
+  #  - 'Autocorrelation'
+  #  - 'JointAverage'
+  #  - 'ClusterProminence'
+  glrlm: []
+  # for lists none values are allowed, in this case, all features are enabled
+  glszm: []
+  gldm:  []
+  # contains deprecated features, but as no individual features are specified, the deprecated features are not enabled

+ 8 - 0
templates/statistics.json

@@ -0,0 +1,8 @@
+{"imageDir":"preprocessedImages",
+ "project":"limfomiPET/Study",
+ "localDir":"_home_/temp/limfomiPET",
+ "idField":"ParticipantId",
+ "visitField":"visitCode",
+ "params":"basic.yaml",
+ "participants":["1965/16"]
+}

Some files were not shown because too many files changed in this diff