import os import sys import importlib import json import lxml.etree #need labkey interface's importXLS def getImporter(setup): sys.path.append(setup['paths']['nixWrapper']) import nixWrapper nixWrapper.loadLibrary('labkeyInterface') import importXLSX importlib.reload(importXLSX) return importXLSX def getVal(xmlRoot,aliasVal,xPath='',aliasReplace={}): q=aliasVal.split(':') xName=q[0] for x in aliasReplace: xName=xName.replace(x,aliasReplace[x]) if len(xPath)>0: if len(xName)>0: elementPath='/'.join([xPath,xName]) else: elementPath=xPath else: elementPath=xName attributeName=q[1] try: return xmlRoot.find(elementPath).get(attributeName) except AttributeError: return None def parseJSON(x): print(f'Decoding [{x}]') try: return json.loads(x) except TypeError: pass return {} def readSetup(importXSLX,pars): db=importXLSX.getDB(pars) ds=db.selectRows(pars['project'],'lists','importSetup',[]) setupRows=ds['rows'] for r in setupRows: r['aliasReplace']=parseJSON(r['aliasReplace']) r['presetValues']=parseJSON(r['presetValues']) return setupRows def importXML(importXLSX,pars,xmlFile,dryRun=True): #def importData(pars,filename,getId=getId,modify=modify,convertLookup=convertLookup,dryRun=True,debug=True): #master routine that imports data based on pars, #applies user supplied functions modify, convertLookup and get Id and #updates relevant database #some useful fields from pars (d is for default value) # - seqNumOffset specify visit/sequenceNum offset (number, d:0 will result in 1) # - XPath - xml path to the element to take data from (helpful if multiple elements are present in xml and identical data is sought) # - additionalKeyColumn - name of the variable/column used for separating data entries (on top of ParticipantId and SequenceNum, helpful in the same cases as XPath) # - presetVariables - set some of the row variables to this values (same cases as XPath) # - project - labkey project # - schema - labkey schema (list/study, d: study) # - query - labkey query #set this is as sequenceNum for entries, or initial seqNum if more than a single entry is in the dataset seqNumOffset=pars.get('seqNumOffset',0) xPath=pars.get('XPath','') keyColumn=pars.get('additionalKeyColumn') presetValues=pars.get('presetValues',{}) aliasReplace=pars.get('aliasReplace',{}) fields=importXLSX.getFields(pars) lookupVars=importXLSX.getVariables(fields,fieldType='LOOKUP') dateVars=importXLSX.getVariables(fields,fieldType='DATE') doubleVars=importXLSX.getVariables(fields,fieldType='DOUBLE') #convert dates to list dateVars=list(dateVars.keys()) print(f'dateVars: {dateVars}') lookupMap={f:importXLSX.getLookupMap(pars,fields,f) for f in lookupVars} alias=importXLSX.invertMap(importXLSX.getAlias(fields)) print(f'aliases: {alias}') row={} tree = lxml.etree.ElementTree(file=filename) print(tree.docinfo.xml_version) root=tree.getroot()#Element #patient id can be either set in pars (takes precedence) or from xml record pid=pars.get('id',root.find('Patient/PatientID').get('val')) row={'ParticipantId':pid,'SequenceNum':seqNumOffset+1} row.update(presetValues) for f in fields: try: row[f]=getVal(root,alias[f],xPath,aliasReplace) except KeyError: print(f'Alias for field {f} not found') continue print(row) db=importXLSX.getDB(pars) project=pars.get('project','DCIS/Study') schema=pars.get('schema','demographics') query=pars.get('query','demographics') selVal=['ParticipantId','SequenceNum'] if keyColumn: selVal.append(keyColumn) qFilter=[{'variable':v,'value':'{}'.format(row[v]),'oper':'eq'} for v in selVal] ds=db.selectRows(project,schema,query,qFilter) if len(ds['rows'])>0: r=ds['rows'][0] r.update(row) print(f'Updating entry') if not dryRun: importXLSX.printErr(db.modifyRows('update',project,schema,query,[r])) return r print(f'Inserting entry') if not dryRun: importXLSX.printErr(db.modifyRows('insert',project,schema,query,[row])) def main(parameterFile): with open(parameterFile) as f: pars=json.load(f) fhome=os.path.expanduser('~') with open(os.path.join(fhome,".labkey","setup.json")) as f: setup=json.load(f) importXLSX=getImporter(setup) #needs server db=importXLSX.getDB(pars) keyFilter={'variable':'Key','value':pars['key'],'oper':'eq'} #needs project ds=db.selectRows(pars['project'],'lists','importXML',[keyFilter]) r=ds['rows'][0] filename=os.path.join(fhome,'temp','data.xml') url=db.net.connectionConfig['host']+r['_labkeyurl_fileUpload'] fb.readFileToFile(url,filename) #needs project setupRows=readSetup(importXLSX,pars) setupRows=setupRows[0:1] for s in setupRows: #needs project and schema for queries importXML(importXLSX,pars|s,filename,dryRun=True) if __name__ == "__main__" : main(sys.argv[1])