123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223 |
- import os
- import sys
- import importlib
- import json
- import lxml.etree
- #need labkey interface's importXLS
- def getImporter(setup):
- sys.path.append(setup['paths']['nixWrapper'])
- import nixWrapper
- nixWrapper.loadLibrary('labkeyInterface')
- import importXLSX
- importlib.reload(importXLSX)
- return importXLSX
- def getFileBrowser(db):
- import nixWrapper
- nixWrapper.loadLibrary('labkeyInterface')
- import labkeyFileBrowser
- return labkeyFileBrowser.labkeyFileBrowser(db.net)
- def countElements(xmlRoot,xPath):
- xElUp=xmlRoot
- if xPath.find('/')>-1:
- xPathUp=xPath[:xPath.rfind('/')]
- xElUp=xmlRoot.find(xPathUp)
- n=len(xElUp)
- print('Counting {} in {}[{}]'.format(xPath,xElUp.tag,n))
- i=1
- while xmlRoot.find(f'{xPath}[{i}]') is not None:
- i=i+1
- return i-1
- def mergePaths(corePath,subPath):
- if len(corePath)>0:
- if len(subPath)>0:
- return '/'.join([corePath,subPath])
- return corePath
- return subPath
- def getVal(xmlRoot,aliasVal,xPath=''):
- q=aliasVal.split(':')
- xName=q[0]
- elementPath=mergePaths(xPath,xName)
- attributeName=q[1]
- eList=[elementPath]
- if xPath[-1]!=']':
- n=countElements(xmlRoot,elementPath)
- eList=[f'{elementPath}[{i+1}]' for i in range(n)]
-
- print(eList)
- try:
- return [xmlRoot.find(e).get(attributeName) for e in eList]
- except AttributeError:
- return None
- def updateTxt(txt,replacePatterns):
- for x in replacePatterns:
- txt=txt.replace(x,replacePatterns[x])
- return txt
- def updateAliases(aliasValues,aliasReplace):
- return {a:updateTxt(aliasValues[a],aliasReplace) for a in aliasValues}
- def parseJSON(x):
- print(f'Decoding [{x}]')
- try:
- return json.loads(x)
- except TypeError:
- pass
- return {}
-
- def readSetup(importXLSX,pars):
- db=importXLSX.getDB(pars)
- ds=db.selectRows(pars['project'],'lists','importSetup',[])
- setupRows=ds['rows']
- for r in setupRows:
-
- r['aliasReplace']=parseJSON(r['aliasReplace'])
- r['presetValues']=parseJSON(r['presetValues'])
- return setupRows
-
-
- def getID(root):
- return root.find('Patient/PatientID').get('val')
- def getXMLRoot(xmlFile):
- return lxml.etree.ElementTree(file=xmlFile).getroot()
- def matchingLengths(dictArray):
- #print(dictArray)
- n={x:len(dictArray[x]) for x in dictArray}
- a=list(n.values())
- return all(x==a[0] for x in a)
-
- def importXML(importXLSX,pars,xmlRoot,dryRun=True):
- #def importData(pars,filename,getId=getId,modify=modify,convertLookup=convertLookup,dryRun=True,debug=True):
- #master routine that imports data based on pars,
- #applies user supplied functions modify, convertLookup and get Id and
- #updates relevant database
- #some useful fields from pars (d is for default value)
- # - seqNumOffset specify visit/sequenceNum offset (number, d:0 will result in 1)
- # - XPath - xml path to the element to take data from (helpful if multiple elements are present in xml and identical data is sought)
- # - additionalKeyColumn - name of the variable/column used for separating data entries (on top of ParticipantId and SequenceNum, helpful in the same cases as XPath)
- # - presetVariables - set some of the row variables to this values (same cases as XPath)
- # - project - labkey project
- # - schema - labkey schema (list/study, d: study)
- # - query - labkey query
- #set this is as sequenceNum for entries, or initial seqNum if more than a single entry is in the dataset
- seqNumOffset=pars.get('seqNumOffset',0)
- xPath=pars.get('XPath','')
- keyColumn=pars.get('additionalKeyColumn')
- presetValues=pars.get('presetValues',{})
- aliasReplace=pars.get('aliasReplace',{})
- allowMultiple=pars.get('allowMultiple',"False")
- #convert to boolean
- allowMultiple=allowMultiple=="True" or allowMultiple=="true"
-
-
- fields=importXLSX.getFields(pars)
- lookupVars=importXLSX.getVariables(fields,fieldType='LOOKUP')
- dateVars=importXLSX.getVariables(fields,fieldType='DATE')
- doubleVars=importXLSX.getVariables(fields,fieldType='DOUBLE')
- #convert dates to list
- dateVars=list(dateVars.keys())
- print(f'dateVars: {dateVars}')
- lookupMap={f:importXLSX.getLookupMap(pars,fields,f) for f in lookupVars}
- alias=importXLSX.invertMap(importXLSX.getAlias(fields))
- alias=updateAliases(alias,aliasReplace)
- print(f'aliases: {alias}')
- row={}
-
- #patient id can be either set in pars (takes precedence) or from xml record
- pid=pars.get('id',getID(xmlRoot))
- row={'ParticipantId':pid,'SequenceNum':seqNumOffset+1}
- row.update(presetValues)
- rows=[]
- vals={}
- for f in fields:
- try:
- x=getVal(xmlRoot,alias[f],xPath)
- if not x:
- continue
- if not allowMultiple:
- x=x[0:1]
- n=len(x)
- print('{}[{}]: {}'.format(f,n,x))
- vals[f]=x
- except KeyError:
- print(f'Alias for field {f} not found')
- continue
- if not matchingLengths(vals):
- print('Mismatch lenghts of variables {}'.format({v:len(vals[v]) for v in vals}))
- return
- n=[len(vals[v]) for v in vals][0]
- for i in range(n):
- r={x:row[x] for x in row}
- r.update({v:vals[v][i] for v in vals})
- if n>1:
- r[keyColumn]='{}_{}'.format(r[keyColumn],i+1)
- rows.append(r)
- #print(rows)
- importXLSX.loadSafely(pars,rows,keyColumn,dryRun)
- def main(parameterFile):
- with open(parameterFile) as f:
- pars=json.load(f)
- print(pars)
- fhome=os.path.expanduser('~')
- with open(os.path.join(fhome,".labkey","setup.json")) as f:
- setup=json.load(f)
- importXLSX=getImporter(setup)
- #needs server
- db=importXLSX.getDB(pars)
-
- db.net.getCSRF()
- keyFilter={'variable':'Key','value':'{key}'.format(**pars),'oper':'eq'}
- #needs project
- ds=db.selectRows(pars['project'],'lists','importXML',[keyFilter])
- r=ds['rows'][0]
- xmlFile=os.path.join(fhome,'temp','DCIS','data.xml')
- url=db.net.connectionConfig['host']+r['_labkeyurl_fileUpload']
- fb=getFileBrowser(db)
- fb.readFileToFile(url,xmlFile)
- xmlRoot=getXMLRoot(xmlFile)
- #needs project
- setupRows=readSetup(importXLSX,pars)
- #setupRows=setupRows[17:18]
- #let labkey know that we started a processing run
- #do not field fileUpload
- del r['fileUpload']
- r['status']=3
- if r['ID']==None:
- r['ID']=getID(xmlRoot)
- db.modifyRows('update',pars['project'],'lists','importXML',[r])
- for s in setupRows:
- #needs project and schema for queries
- importXML(importXLSX,pars|s,xmlRoot,dryRun=False)
- #make calling importXML updates
- #we are done now, set status to OK(1)
- r['status']=1
- db.modifyRows('update',pars['project'],'lists','importXML',[r])
- if __name__ == "__main__" :
- main(sys.argv[1])
|