clusterAnalysisLeftovers.ipynb 24 KB

#functions used in final analysis
def getWeights(locDir,p,nclass,realizationId):
    #for w1, classes are in 0 to nclass-1 space
    
    segmFile=os.path.join(locDir,p,p+'_Segm.nrrd')
    imS=SimpleITK.ReadImage(segmFile)
    nS=SimpleITK.GetArrayFromImage(imS)
    
    unit=numpy.ones(nS.shape)
    w={region:numpy.zeros(nclass) for region in numpy.arange(1,10)}
    w1={region:{} for region in w}
    
    sUnit={region:numpy.sum(unit[nS==region]) for region in w}
                            
    for c in numpy.arange(nclass):
        uFile=os.path.join(locDir,p,'{}_{}_{}_center{}_centerWeigth.nrrd'.format(p,nclass,realizationId,c+1))
        imU=SimpleITK.ReadImage(uFile)
        nU=SimpleITK.GetArrayFromImage(imU)
        
        for region in w:
            v=nU[(nS==region) ]
            #print('{}: {}'.format(region,v.shape))
            w[region][c]=numpy.sum(v)/sUnit[region]
            w1[region][c]=v
            #print('[{}/{}]: {}'.format(region,j,w[region,j]))
    return w,w1

def getFuzziness(locDir,p,nclass,realizationId):
    
    segmFile=os.path.join(locDir,p,p+'_Segm.nrrd')
    imS=SimpleITK.ReadImage(segmFile)
    nS=SimpleITK.GetArrayFromImage(imS)
    
    w={region:0 for region in numpy.arange(1,10)}
    nU={}
    for c in numpy.arange(nclass):
        uFile=os.path.join(locDir,p,'{}_{}_{}_center{}_centerWeigth.nrrd'.format(p,nclass,realizationId,c+1))
        imU=SimpleITK.ReadImage(uFile)
        fy=SimpleITK.GetArrayFromImage(imU)
        
        for region in w:
            fy1=fy[(nS==region)]
            try:
                nU[region][:,c]=fy1
            except KeyError:
                nU[region]=numpy.zeros((len(fy1),nclass))
                nU[region][:,c]=fy1
    for u in nU:
        w[u]=numpy.mean(nU[u].max(1))
    return w
    

def getFitPar(locDir,p,nclass,realizationId):
    #fitGoodnes A tau alpha delay [k1 BVF k2 delay]xNcenters
    uFile=os.path.join(locDir,p,'{}_{}_{}_fitParFinal.txt'.format(p,nclass,realizationId))
    return numpy.genfromtxt(uFile,delimiter='\t')

def getK1(fitPar,iclass):
    #fitGoodnes A tau alpha delay [k1 BVF k2 delay]xNcenters
    #0 to nclass-1 space
    return fitPar[4*iclass+5]

def calculateK1(w1,fitPar,range=1.0, nbins=50):
    #returns k1 by pixel in w1
    histM={}
    histW={}
    classWeightM={}
    classWeightW={}
    fuzzy={}
    std={'W':{},'M':{}}
    median={'W':{},'M':{}}
    f90p={'W':{},'M':{}}
    f95p={'W':{},'M':{}}
    mean={'W':{},'M':{}}
    for region in w1:
        vTempl=w1[region][list(w1[region].keys())[0]]#should by a numpy array with shape (npx,)
        v={v:{} for v in numpy.arange(len(vTempl))}
        for c in w1[region]:
            for i in v:
                v[i][c]=w1[region][c][i]
        fuzzy[region]=0
        k1W=numpy.zeros(vTempl.shape)
        k1M=numpy.zeros(vTempl.shape)
        cW=numpy.zeros(len(w1[region]))#number of classes
        cM=numpy.zeros(len(w1[region]))#number of classes
        for i in v:
            maxClass=max(v[i],key=v[i].get)
            maxValue=v[i][maxClass]
            fuzzy[region]+=maxValue
            k1W[i]=0
            for c in v[i]:
                k1W[i]+=v[i][c]*getK1(fitPar,c)
                cW[c]+=v[i][c]
            k1M[i]=getK1(fitPar,maxClass)
            cM[maxClass]+=1
            #print('[{},{}:{}] {},{}'.format(i,max(v[i],key=v[i].get),max(v[i].values()),k1W[i],k1M[i]))
        histM[region]=numpy.histogram(60*k1M,bins=nbins,range=(0,range))
        histW[region]=numpy.histogram(60*k1W,bins=nbins,range=(0,range))
        mean['M'][region]=numpy.mean(k1M)
        mean['W'][region]=numpy.mean(k1W)
        std['M'][region]=numpy.std(k1M)
        std['W'][region]=numpy.std(k1W)
        median['M'][region]=numpy.median(k1M)
        median['W'][region]=numpy.median(k1W)
        f90p['M'][region]=numpy.percentile(k1M,90)
        f90p['W'][region]=numpy.percentile(k1W,90)
        f95p['M'][region]=numpy.percentile(k1M,95)
        f95p['W'][region]=numpy.percentile(k1W,95)
            
        numpy.set_printoptions(precision=3)
        cM/=len(v)
        cW/=len(v)
        classWeightM[region]=cM
        classWeightW[region]=cW
        for c1,c2 in zip(cM,cW):
            pass
            #print('{}/{:.2f}'.format(c1,c2))
        fuzzy[region]/=len(v)
        
    return fuzzy,histM,histW,classWeightM,classWeightW,mean,std,median,f90p,f95p

def makePlot(h):
    width = numpy.diff(h[1])
    center = (h[1][:-1] + h[1][1:]) / 2
    y=h[0]
    return center,y,width

def median(x,y):
    N=numpy.sum(y)
    s=0
    i=0
    while s<0.5*N:
        s+=y[i]
        i+=1
    return x[i]

def mean(x,y):
    return sum(x*y)/sum(y)

#updates k1 values in ImagingData tables by averaging over realizations
#used calculateK1, getWeights and getFitPar

def updatePatient(db,fb,project, locDir,patient, nclass):
    idFilter={'variable':'PatientId','value':patient,'oper':'eq'}
    ds=db.selectRows(project,'study','Imaging',[idFilter])
    rows=ds['rows']
    #rows=[ds['rows'][1]]
    rId=1
    hMy={}
    hMx={}
    hMw={}

    hMean={}
    hStd={}
    hMedian={}
    h90p={}
    h95p={}

    selectFlag='M'

    for r in rows:
        pId=r['PatientId']
        if pId.find('8')==0:
            continue
        state='MIR'
        if r['SequenceNum']>1:
            state='OBR'
        p=r['aliasID']

        for rId in numpy.arange(1,21):
            w,w1=getWeights(locDir,p,nclass,rId)
            fitPar=getFitPar(locDir,p,nclass,rId)
            fz,hM,hW,cM,cW,mean,qstd,qmedian,f90p,f95p=\
                calculateK1(w1,fitPar,range=0.8)
            #x,y,w = makePlot(hW[7])
            #matplotlib.pyplot.bar(x,y,width=w)
            #matplotlib.pyplot.text(0.3,20,'{} {}'.format(state,rId))
            #matplotlib.pyplot.show()
            for r in w1:
                if selectFlag=='M':
                    x,y,w = makePlot(hM[r])
                else:
                    x,y,w = makePlot(hW[r])
                try:
                    fy=hMy[r][state]
                    hMy[r][state]=fy+y
                except KeyError:
                    try:
                        hMy[r][state]=y
                        hMx[r][state]=x
                        hMw[r][state]=w
                    except KeyError:
                        hMy[r]={}
                        hMx[r]={}
                        hMw[r]={}
                        hMy[r][state]=y
                        hMx[r][state]=x
                        hMw[r][state]=w
                try:
                    hMean[r][state].append(mean[selectFlag][r])
                    hMedian[r][state].append(qmedian[selectFlag][r])
                    hStd[r][state].append(qstd[selectFlag][r])
                    h90p[r][state].append(f90p[selectFlag][r])
                    h95p[r][state].append(f95p[selectFlag][r])
                except KeyError:
                    try:
                        hMean[r][state]=[]
                        hMedian[r][state]=[]
                        hStd[r][state]=[]
                        h90p[r][state]=[]
                        h95p[r][state]=[]
                    except KeyError:
                        hMean[r]={}
                        hMedian[r]={}
                        hStd[r]={}
                        h90p[r]={}
                        h95p[r]={}


    #generate table
    nclassFilter={'variable':'nclass','value':str(nclass),'oper':'eq'}
    ds=db.selectRows(project,'study','ImagingData',[idFilter,nclassFilter])

    for qrow in ds['rows']:
        r=qrow['regionId']

        ymax=-1e30
        s={}
        for state in hMy[r]:
            y=hMy[r][state]
            x=hMx[r][state]
            s[state]=sum(x*y)/numpy.sum(y)
            s[state]=median(x,y)
            if max(y)>ymax:
                ymax=max(y)
            matplotlib.pyplot.bar(hMx[r][state],hMy[r][state],width=hMw[r][state])

            #fill variables
            qrow['k1'+state+'Mean']=numpy.mean(hMean[r][state])
            qrow['k1'+state+'MeanStd']=numpy.std(hMean[r][state])
            qrow['k1'+state+'Median']=numpy.mean(hMedian[r][state])
            qrow['k1'+state+'MedianStd']=numpy.std(hMedian[r][state])
            qrow['k1'+state+'Std']=numpy.mean(hStd[r][state])
            qrow['k1'+state+'StdStd']=numpy.std(hStd[r][state])
            qrow['k1'+state+'90p']=numpy.mean(h90p[r][state])
            qrow['k1'+state+'90pStd']=numpy.std(h90p[r][state])
            qrow['k1'+state+'95p']=numpy.mean(h90p[r][state])
            qrow['k1'+state+'95pStd']=numpy.std(h90p[r][state])

        p=qrow['PatientId']
        oDir=os.path.join(locDir,p)
        oDirRemote=fb.buildPathURL(project,[p]);
        qrow['k1RatioMean']=qrow['k1MIRMean']/qrow['k1OBRMean']
        qrow['k1RatioMedian']=qrow['k1MIRMedian']/qrow['k1OBRMedian']
        qrow['k1Ratio90p']=qrow['k1MIR90p']/qrow['k1OBR90p']
        qrow['k1Ratio95p']=qrow['k1MIR95p']/qrow['k1OBR95p']
        db.modifyRows('update',project,'study','ImagingData',[qrow])
        matplotlib.pyplot.text(0.4,0.95*ymax,'{} region:{}'.format(qrow['PatientId'], r))
        matplotlib.pyplot.text(0.4,0.85*ymax,'k1={:.2f}/min k2={:.2f}/min'.format(s['MIR'], s['OBR']))
        matplotlib.pyplot.text(0.4,0.75*ymax,'R={:.2f}'.format(s['OBR']/s['MIR']))
        fname='{}_{}_{}_k1Distribution.png'.format(p,nclass,r)
        matplotlib.pyplot.savefig(os.path.join(oDir,fname))
        fb.writeFileToFile(os.path.join(oDir,fname),oDirRemote+'/'+fname)
        matplotlib.pyplot.show()
        
        
def getReadings(db,fb,project, locDir,patient, nclass, state, region):
    readings=[]
    idFilter={'variable':'PatientId','value':patient,'oper':'eq'}
    if state=='MIR':
        seqNum=1
    else:
        seqNum=2
    seqFilter={'variable':'SequenceNum','value':str(seqNum),'oper':'eq'}
    ds=db.selectRows(project,'study','Imaging',[idFilter,seqFilter])
    r=ds['rows'][0]
    p=r['aliasID']
    weightOption='M'
    for realizationId in numpy.arange(1,21):
        w,w1=getWeights(locDir,p,nclass,realizationId)
        fitPar=getFitPar(locDir,p,nclass,realizationId)
        fz,hM,hW,cM,cW,mean,qstd,qmedian,f90p,f95p=calculateK1(w1,fitPar,range=0.8)
        readings.append(qmedian[weightOption][region])
    return readings

import matplotlib.pyplot
import matplotlib.image as mpimg
import json

#extract patient values for a particular realization,
#return region averages and histograms of k1 in region
#based on pixel selection criteria, which is either 'M' 
#for maximum probability or 'W' for probability weighted contribution
#of cluster in each pixel (see calculate K1)

#fill ImageDataRealization
def getPatient(db,fb,project, locDir,patient, nclass, realizationId,\
               state, region):
    avgMode='M'
    idFilter={'variable':'PatientId','value':patient,'oper':'eq'}
    if state=='MIR':
        seqNum=1
    else:
        seqNum=2
    seqFilter={'variable':'SequenceNum','value':str(seqNum),'oper':'eq'}
    ds=db.selectRows(project,'study','Imaging',[idFilter,seqFilter])
    r=ds['rows'][0]
    
    p=r['aliasID']
    oDirRemote=fb.buildPathURL(project,[p]);
    
    w,w1=getWeights(locDir,p,nclass,realizationId)
    fitPar=getFitPar(locDir,p,nclass,realizationId)
    fz,hM,hW,cM,cW,mean,qstd,qmedian,f90p,f95p=calculateK1(w1,fitPar,range=0.8)
    #plot most common responses
    cmSort=numpy.flip(numpy.argsort(cM[region]))
    
    #last argument
    for i in numpy.arange(3):
        idx=cmSort[i]
        #print('{}: {}'.format(idx,cM[region][idx]))
        fname='{}_{}_{}_centers{}.png'.format(p,nclass,realizationId,idx+1)
        fp=os.path.join(locDir,p,fname)
        img = mpimg.imread(fp)
        #imgplot = matplotlib.pyplot.imshow(img)
        #matplotlib.pyplot.show()
        rf=oDirRemote+'/'+fname
        if not fb.entryExists(rf):
            fb.writeFileToFile(fp,rf)  
    
    realizationFilter={'variable':'realizationId','value':str(realizationId),'oper':'eq'}
    stateFilter={'variable':'state','value':state,'oper':'eq'}
    nclassFilter={'variable':'nclass','value':str(nclass),'oper':'eq'}
    regionFilter={'variable':'regionId','value':str(region),'oper':'eq'}
    
    #fill database
    dsQ=db.selectRows(project,'study','ImagingDataRealization',[idFilter,stateFilter,nclassFilter,regionFilter,realizationFilter])
    if len(dsQ['rows'])>0:
        qrow=dsQ['rows'][0]
        mode='update'
    else:
        qrow={}
        qrow['PatientId']=patient
        qrow['SequenceNum']=nclass+0.01*region+0.0001*realizationId
        if state!='MIR':
            qrow['SequenceNum']+=1
        qrow['realizationId']=int(realizationId)
        qrow['state']=state
        qrow['nclass']=int(nclass)
        qrow['regionId']=int(region)
        mode='insert'
    
    qrow['k1Mean']=mean[avgMode][region]
    qrow['k1Median']=qmedian[avgMode][region]
    qrow['k190p']=f90p[avgMode][region]
    qrow['k195p']=f95p[avgMode][region]
    qrow['k1Std']=qstd[avgMode][region]
    qrow['centers']=';'.join([str(cmSort[i]) for i in numpy.arange(3)])
    qrow['cW']=';'.join(['{:.2f}'.format(cM[region][cmSort[i]]) for i in numpy.arange(3)])
    db.modifyRows(mode,project,'study','ImagingDataRealization',[qrow])
    
    
    fname='{}_{}_{}_histogram{}.png'.format(p,nclass,realizationId,region)
    fp=os.path.join(locDir,p,fname)
    x,y,w=makePlot(hM[region])
    if state=='MIR':
        color='tab:blue'
    else:
        color='tab:orange'
    imgplot = matplotlib.pyplot.bar(x,y,width=w,color=color)
    matplotlib.pyplot.savefig(fp)
    rf=oDirRemote+'/'+fname
    if not fb.entryExists(rf):
        fb.writeFileToFile(fp,rf)  
        
    #print('Saved to {}'.format(oDirRemote+'/'+fname))
    #write file to file
    #matplotlib.pyplot.show()
    
    
    jsonHistogram={'x':x.tolist(),'y':y.tolist(),'w':w.tolist()}
    fname='{}_{}_{}_histogram{}.json'.format(p,nclass,realizationId,region)
    fp=os.path.join(locDir,p,fname)
    with open (fp,'w') as f:
        json.dump(jsonHistogram,f)
    #write to server
    rf=oDirRemote+'/'+fname
    if not fb.entryExists(rf):
        fb.writeFileToFile(fp,rf)
    #fitPar?
    fname='{}_{}_{}_fitParFinal.txt'.format(p,nclass,realizationId)
    fpRemote=oDirRemote+'/'+fname
    if not fb.entryExists(fpRemote):
        fp=os.path.join(locDir,p,fname)
        if os.path.isfile(fp):
            fb.writeFileToFile(fp,fpRemote)
    
    
nclass=30
ds=db.selectRows(project,'study','Imaging',[])
patients=list(set([row['PatientId'] for row in ds['rows']]))
idFilter={'variable':'PatientId','value':patients[4],'oper':'eq'}
ds=db.selectRows(project,'study','Imaging',[idFilter])
rows=ds['rows']
#rows=[ds['rows'][1]]
rId=1
k1Mean={}
k1Std={}
k1Median={}
k190p={}
k195p={}

for r in rows:
    pId=r['PatientId']
    if pId.find('8')==0:
        continue
    state='MIR'
    if r['SequenceNum']>1:
        state='OBR'
    p=r['aliasID']
    
    for rId in numpy.arange(1,21):
        w,w1=getWeights(locDir,p,nclass,rId)
        fitPar=getFitPar(locDir,p,nclass,rId)
        fz,hM,hW,cM,cW=calculateK1(w1,fitPar,range=0.8)
        #x,y,w = makePlot(hW[7])
        #matplotlib.pyplot.bar(x,y,width=w)
        #matplotlib.pyplot.text(0.3,20,'{} {}'.format(state,rId))
        #matplotlib.pyplot.show()
        for r in w1:
            x,y,w = makePlot(hW[r])
            try:
                k1Mean
                fy=hMy[r][state]
                hMy[r][state]=fy+y
            except KeyError:
                try:
                    hMy[r][state]=y
                    hMx[r][state]=x
                    hMw[r][state]=w
                except KeyError:
                    hMy[r]={}
                    hMx[r]={}
                    hMw[r]={}
                    hMy[r][state]=y
                    hMx[r][state]=x
                    hMw[r][state]=w
                    
#generate table

            
for r in hMy:
    ymax=-1e30
    s={}
    for state in hMy[r]:
        y=hMy[r][state]
        x=hMx[r][state]
        s[state]=sum(x*y)/numpy.sum(y)
        s[state]=median(x,y)
        if max(y)>ymax:
            ymax=max(y)
        matplotlib.pyplot.bar(hMx[r][state],hMy[r][state],width=hMw[r][state])
        
    matplotlib.pyplot.text(0.5,0.8*ymax,'{} {:.2f} {:.2f} {:.2f}'.format(r, s['MIR'], s['OBR'], s['OBR']/s['MIR']))
    matplotlib.pyplot.show()
    
   
readings=getReadings(db,fb,project,locDir,'3ZF',30,'OBR',4)
print('{}/{} {}'.format(numpy.mean(readings),numpy.median(readings),readings))

patients=['2SB','3ZF','5MI','7TM','10KF','11ZM']
states=['MIR','OBR']
classes=[10,20,30]
regions=numpy.arange(1,10)
realizations=numpy.arange(1,21)
for p in patients:
    for s in states:
        for c in classes:
            for realizationId in realizations:
                for r in regions:
                    print('{} {} {} {}/20 {}/9'.format(p,s,c,realizationId,r))
                    getPatient(db,fb,project,locDir,p,c,realizationId,s,r)
                    
#update k1 values in ImagingData from getK1 
prows=[row for row in ds['rows'] if row['PatientId']==pId]
    print(len(prows))
    
    for prow in prows:
        idFilter={'variable':'PatientId','value':pId,'oper':'eq'}
        nclassFilter={'variable':'nclass','value':str(nclass),'oper':'eq'}
        dsQ=db.selectRows(project,'study','ImagingData',[idFilter,nclassFilter])
        
        #skip those already done
        #if len(dsQ['rows'])>0:
        #    continue

        p=prow['aliasID']
        visitName='MIR'
        if prow['SequenceNum']>1:
                visitName='OBR'
                
        #here the sampling happens
        fq=[getK1(p,nclass,visitName,i) for i in numpy.arange(0,nsample)]
        qMerge=combine(fq)
    
        #this is fine also if previous step is omitted
        if len(dsQ['rows'])==0:
            regionIds=numpy.arange(1,10)
        else:
            regionIds=[int(r['regionId']) for r in dsQ['rows']]

        
        for region in regionIds:
            qrow=[r for r in dsQ['rows'] if r['regionId']==region]
            
            mode='update'
            if len(qrow):
                qrow=qrow[0]
            else:
                qrow={}
                qrow['PatientId']=prow['PatientId']
                qrow['SequenceNum']=str(nclass+0.01*region)
                qrow['regionId']=str(region)
                mode='insert'
            
            qrow['nclass']=str(nclass)
            vars=['k1','v19']
            avgs=['Mean','Std','Median','90p','95p']
            for var in vars:
                for avg in avgs:
                    qrow[var+visitName+avg]=str(numpy.mean(qMerge[var+visitName+avg+'_'+str(region)]))
                    qrow[var+visitName+avg+'Std']=str(numpy.std(qMerge[var+visitName+avg+'_'+str(region)]))
            db.modifyRows(mode,project,'study','ImagingData',[qrow])
        if i==0:
            break
    if i==0: