123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173 |
- import os
- import sys
- import importlib
- import json
- import lxml.etree
- #need labkey interface's importXLS
- def getImporter(setup):
- sys.path.append(setup['paths']['nixWrapper'])
- import nixWrapper
- nixWrapper.loadLibrary('labkeyInterface')
- import importXLSX
- importlib.reload(importXLSX)
- return importXLSX
- def getFileBrowser(db):
- import nixWrapper
- nixWrapper.loadLibrary('labkeyInterface')
- import labkeyFileBrowser
- return labkeyFileBrowser.labkeyFileBrowser(db.net)
- def getVal(xmlRoot,aliasVal,xPath='',aliasReplace={}):
- q=aliasVal.split(':')
- xName=q[0]
-
- for x in aliasReplace:
- xName=xName.replace(x,aliasReplace[x])
- if len(xPath)>0:
- if len(xName)>0:
- elementPath='/'.join([xPath,xName])
- else:
- elementPath=xPath
- else:
- elementPath=xName
- attributeName=q[1]
- try:
- return xmlRoot.find(elementPath).get(attributeName)
- except AttributeError:
- return None
-
- def parseJSON(x):
- print(f'Decoding [{x}]')
- try:
- return json.loads(x)
- except TypeError:
- pass
- return {}
-
- def readSetup(importXLSX,pars):
- db=importXLSX.getDB(pars)
- ds=db.selectRows(pars['project'],'lists','importSetup',[])
- setupRows=ds['rows']
- for r in setupRows:
-
- r['aliasReplace']=parseJSON(r['aliasReplace'])
- r['presetValues']=parseJSON(r['presetValues'])
- return setupRows
-
-
-
-
- def importXML(importXLSX,pars,xmlFile,dryRun=True):
- #def importData(pars,filename,getId=getId,modify=modify,convertLookup=convertLookup,dryRun=True,debug=True):
- #master routine that imports data based on pars,
- #applies user supplied functions modify, convertLookup and get Id and
- #updates relevant database
- #some useful fields from pars (d is for default value)
- # - seqNumOffset specify visit/sequenceNum offset (number, d:0 will result in 1)
- # - XPath - xml path to the element to take data from (helpful if multiple elements are present in xml and identical data is sought)
- # - additionalKeyColumn - name of the variable/column used for separating data entries (on top of ParticipantId and SequenceNum, helpful in the same cases as XPath)
- # - presetVariables - set some of the row variables to this values (same cases as XPath)
- # - project - labkey project
- # - schema - labkey schema (list/study, d: study)
- # - query - labkey query
- #set this is as sequenceNum for entries, or initial seqNum if more than a single entry is in the dataset
- seqNumOffset=pars.get('seqNumOffset',0)
- xPath=pars.get('XPath','')
- keyColumn=pars.get('additionalKeyColumn')
- presetValues=pars.get('presetValues',{})
- aliasReplace=pars.get('aliasReplace',{})
-
- fields=importXLSX.getFields(pars)
- lookupVars=importXLSX.getVariables(fields,fieldType='LOOKUP')
- dateVars=importXLSX.getVariables(fields,fieldType='DATE')
- doubleVars=importXLSX.getVariables(fields,fieldType='DOUBLE')
- #convert dates to list
- dateVars=list(dateVars.keys())
- print(f'dateVars: {dateVars}')
- lookupMap={f:importXLSX.getLookupMap(pars,fields,f) for f in lookupVars}
- alias=importXLSX.invertMap(importXLSX.getAlias(fields))
- print(f'aliases: {alias}')
- row={}
-
- tree = lxml.etree.ElementTree(file=xmlFile)
- print(tree.docinfo.xml_version)
- root=tree.getroot()#Element
- #patient id can be either set in pars (takes precedence) or from xml record
- pid=pars.get('id',root.find('Patient/PatientID').get('val'))
-
- row={'ParticipantId':pid,'SequenceNum':seqNumOffset+1}
- row.update(presetValues)
-
- for f in fields:
- try:
- row[f]=getVal(root,alias[f],xPath,aliasReplace)
- except KeyError:
- print(f'Alias for field {f} not found')
- continue
- print(row)
- db=importXLSX.getDB(pars)
- project=pars.get('project','DCIS/Study')
- schema=pars.get('schema','demographics')
- query=pars.get('query','demographics')
- selVal=['ParticipantId','SequenceNum']
- if keyColumn:
- selVal.append(keyColumn)
- qFilter=[{'variable':v,'value':'{}'.format(row[v]),'oper':'eq'} for v in selVal]
- ds=db.selectRows(project,schema,query,qFilter)
- if len(ds['rows'])>0:
- r=ds['rows'][0]
- r.update(row)
- print(f'Updating entry')
- if not dryRun:
- importXLSX.printErr(db.modifyRows('update',project,schema,query,[r]))
- return r
- print(f'Inserting entry')
- if not dryRun:
- importXLSX.printErr(db.modifyRows('insert',project,schema,query,[row]))
-
-
- def main(parameterFile):
- with open(parameterFile) as f:
- pars=json.load(f)
- print(pars)
- fhome=os.path.expanduser('~')
- with open(os.path.join(fhome,".labkey","setup.json")) as f:
- setup=json.load(f)
- importXLSX=getImporter(setup)
- #needs server
- db=importXLSX.getDB(pars)
-
- db.net.getCSRF()
- keyFilter={'variable':'Key','value':'{key}'.format(**pars),'oper':'eq'}
- #needs project
- ds=db.selectRows(pars['project'],'lists','importXML',[keyFilter])
- r=ds['rows'][0]
- filename=os.path.join(fhome,'temp','DCIS','data.xml')
- url=db.net.connectionConfig['host']+r['_labkeyurl_fileUpload']
- fb=getFileBrowser(db)
- fb.readFileToFile(url,filename)
- #needs project
- setupRows=readSetup(importXLSX,pars)
- setupRows=setupRows[0:1]
- for s in setupRows:
- #needs project and schema for queries
- importXML(importXLSX,pars|s,filename,dryRun=True)
- if __name__ == "__main__" :
- main(sys.argv[1])
|