studen
/
DCIS


			
							123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223
							import os
import sys
import importlib
import json
import lxml.etree

#need labkey interface's importXLS
def getImporter(setup):
    sys.path.append(setup['paths']['nixWrapper'])
    import nixWrapper
    nixWrapper.loadLibrary('labkeyInterface')
    import importXLSX
    importlib.reload(importXLSX)
    return importXLSX

def getFileBrowser(db):
   import nixWrapper
   nixWrapper.loadLibrary('labkeyInterface')
   import labkeyFileBrowser
   return labkeyFileBrowser.labkeyFileBrowser(db.net)

def countElements(xmlRoot,xPath):
   xElUp=xmlRoot
   if xPath.find('/')>-1:
      xPathUp=xPath[:xPath.rfind('/')]
      xElUp=xmlRoot.find(xPathUp)
   n=len(xElUp)
   print('Counting {} in {}[{}]'.format(xPath,xElUp.tag,n))
   i=1
   while xmlRoot.find(f'{xPath}[{i}]') is not None:
      i=i+1
   return i-1

def mergePaths(corePath,subPath):
   if len(corePath)>0:
      if len(subPath)>0:
         return '/'.join([corePath,subPath])
      return corePath
   return subPath

def getVal(xmlRoot,aliasVal,xPath=''):

   q=aliasVal.split(':')
   xName=q[0]
   elementPath=mergePaths(xPath,xName)
   attributeName=q[1]

   eList=[elementPath]
   if xPath[-1]!=']':
      n=countElements(xmlRoot,elementPath) 
      eList=[f'{elementPath}[{i+1}]' for i in range(n)]
   
   print(eList)
   try:
      return [xmlRoot.find(e).get(attributeName) for e in eList]
   except AttributeError:
      return None

def updateTxt(txt,replacePatterns):
   for x in replacePatterns:
      txt=txt.replace(x,replacePatterns[x])
   return txt

def updateAliases(aliasValues,aliasReplace):
   return {a:updateTxt(aliasValues[a],aliasReplace) for a in aliasValues}

def parseJSON(x):
    print(f'Decoding [{x}]')
    try:
        return json.loads(x)
    except TypeError:
        pass
    return {}
                
def readSetup(importXLSX,pars):
    db=importXLSX.getDB(pars)
    ds=db.selectRows(pars['project'],'lists','importSetup',[])
    setupRows=ds['rows']
    for r in setupRows:
        
        r['aliasReplace']=parseJSON(r['aliasReplace'])
        r['presetValues']=parseJSON(r['presetValues'])
    return setupRows
        
    
def getID(root):
    return root.find('Patient/PatientID').get('val')

def getXMLRoot(xmlFile):
   return lxml.etree.ElementTree(file=xmlFile).getroot()

def matchingLengths(dictArray):
   #print(dictArray)
   n={x:len(dictArray[x]) for x in dictArray}
   a=list(n.values())
   return all(x==a[0] for x in a)
    
def importXML(importXLSX,pars,xmlRoot,dryRun=True):
    #def importData(pars,filename,getId=getId,modify=modify,convertLookup=convertLookup,dryRun=True,debug=True):
#master routine that imports data based on pars, 
#applies user supplied functions modify, convertLookup and get Id and 
#updates relevant database

#some useful fields from pars (d is for default value)
# - seqNumOffset specify visit/sequenceNum offset (number, d:0 will result in 1)
# - XPath - xml path to the element to take data from (helpful if multiple elements are present in xml and identical data is sought)
# - additionalKeyColumn - name of the variable/column used for separating data entries (on top of ParticipantId and SequenceNum, helpful in the same cases as XPath)
# - presetVariables - set some of the row variables to this values (same cases as XPath)
# - project - labkey project
# - schema - labkey schema (list/study, d: study)
# - query - labkey query

   #set this is as sequenceNum for entries, or initial seqNum if more than a single entry is in the dataset
   seqNumOffset=pars.get('seqNumOffset',0)
   xPath=pars.get('XPath','')
   keyColumn=pars.get('additionalKeyColumn')
   presetValues=pars.get('presetValues',{})
   aliasReplace=pars.get('aliasReplace',{})
   allowMultiple=pars.get('allowMultiple',"False")
#convert to boolean
   allowMultiple=allowMultiple=="True" or allowMultiple=="true"
   
    
   fields=importXLSX.getFields(pars)
   lookupVars=importXLSX.getVariables(fields,fieldType='LOOKUP')
   dateVars=importXLSX.getVariables(fields,fieldType='DATE')
   doubleVars=importXLSX.getVariables(fields,fieldType='DOUBLE')

#convert dates to list
   dateVars=list(dateVars.keys())
   print(f'dateVars: {dateVars}')
   lookupMap={f:importXLSX.getLookupMap(pars,fields,f) for f in lookupVars}
   alias=importXLSX.invertMap(importXLSX.getAlias(fields))
   alias=updateAliases(alias,aliasReplace)
   print(f'aliases: {alias}')
   row={}
    
#patient id can be either set in pars (takes precedence) or from xml record
   pid=pars.get('id',getID(xmlRoot))

   row={'ParticipantId':pid,'SequenceNum':seqNumOffset+1}
   row.update(presetValues)

   rows=[]
   vals={}
   for f in fields:
      try:
         x=getVal(xmlRoot,alias[f],xPath)
         if not x:
            continue
         if not allowMultiple:
            x=x[0:1]
         n=len(x)
         print('{}[{}]: {}'.format(f,n,x))
         vals[f]=x
      except KeyError:
         print(f'Alias for field {f} not found')
         continue
   if not matchingLengths(vals):
      print('Mismatch lenghts of variables {}'.format({v:len(vals[v]) for v in vals}))
      return
   n=[len(vals[v]) for v in vals][0]
   for i in range(n):
      r={x:row[x] for x in row}
      r.update({v:vals[v][i] for v in vals})
      if n>1:
         r[keyColumn]='{}_{}'.format(r[keyColumn],i+1)
      rows.append(r)

   #print(rows)
   importXLSX.loadSafely(pars,rows,keyColumn,dryRun)


def main(parameterFile):

   with open(parameterFile) as f:
      pars=json.load(f)

   print(pars)

   fhome=os.path.expanduser('~')
   with open(os.path.join(fhome,".labkey","setup.json")) as f:
     setup=json.load(f)

   importXLSX=getImporter(setup)

   #needs server
   db=importXLSX.getDB(pars)
   
   db.net.getCSRF()
   keyFilter={'variable':'Key','value':'{key}'.format(**pars),'oper':'eq'}
#needs project
   ds=db.selectRows(pars['project'],'lists','importXML',[keyFilter])
   r=ds['rows'][0]
   xmlFile=os.path.join(fhome,'temp','DCIS','data.xml')
   url=db.net.connectionConfig['host']+r['_labkeyurl_fileUpload']
   fb=getFileBrowser(db)
   fb.readFileToFile(url,xmlFile)
   xmlRoot=getXMLRoot(xmlFile)
   #needs project
   setupRows=readSetup(importXLSX,pars)
   #setupRows=setupRows[17:18]

#let labkey know that we started a processing run
#do not field fileUpload
   del r['fileUpload']
   r['status']=3
   if r['ID']==None:
      r['ID']=getID(xmlRoot)
   db.modifyRows('update',pars['project'],'lists','importXML',[r])
   for s in setupRows:
#needs project and schema for queries 
      importXML(importXLSX,pars|s,xmlRoot,dryRun=False)

#make calling importXML updates
#we are done now, set status to OK(1)
   r['status']=1
   db.modifyRows('update',pars['project'],'lists','importXML',[r])


if __name__ == "__main__" :
   main(sys.argv[1])