import os import sys import importlib import json import lxml.etree #need labkey interface's importXLS def getImporter(setup): sys.path.append(setup['paths']['nixWrapper']) import nixWrapper nixWrapper.loadLibrary('labkeyInterface') import importXLSX importlib.reload(importXLSX) return importXLSX def getFileBrowser(db): import nixWrapper nixWrapper.loadLibrary('labkeyInterface') import labkeyFileBrowser return labkeyFileBrowser.labkeyFileBrowser(db.net) def countElements(xmlRoot,xPath): xElUp=xmlRoot if xPath.find('/')>-1: xPathUp=xPath[:xPath.rfind('/')] xElUp=xmlRoot.find(xPathUp) n=len(xElUp) print('Counting {} in {}[{}]'.format(xPath,xElUp.tag,n)) i=1 while xmlRoot.find(f'{xPath}[{i}]') is not None: i=i+1 return i-1 def mergePaths(corePath,subPath): if len(corePath)>0: if len(subPath)>0: return '/'.join([corePath,subPath]) return corePath return subPath def getVal(xmlRoot,aliasVal,xPath=''): q=aliasVal.split(':') xName=q[0] elementPath=mergePaths(xPath,xName) attributeName=q[1] eList=[elementPath] if xPath[-1]!=']': n=countElements(xmlRoot,elementPath) eList=[f'{elementPath}[{i+1}]' for i in range(n)] print(eList) try: return [xmlRoot.find(e).get(attributeName) for e in eList] except AttributeError: return None def updateTxt(txt,replacePatterns): for x in replacePatterns: txt=txt.replace(x,replacePatterns[x]) return txt def updateAliases(aliasValues,aliasReplace): return {a:updateTxt(aliasValues[a],aliasReplace) for a in aliasValues} def parseJSON(x): print(f'Decoding [{x}]') try: return json.loads(x) except TypeError: pass return {} def readSetup(importXLSX,pars): db=importXLSX.getDB(pars) ds=db.selectRows(pars['project'],'lists','importSetup',[]) setupRows=ds['rows'] for r in setupRows: r['aliasReplace']=parseJSON(r['aliasReplace']) r['presetValues']=parseJSON(r['presetValues']) return setupRows def getID(root): return root.find('Patient/PatientID').get('val') def getXMLRoot(xmlFile): return lxml.etree.ElementTree(file=xmlFile).getroot() def matchingLengths(dictArray): #print(dictArray) n={x:len(dictArray[x]) for x in dictArray} a=list(n.values()) return all(x==a[0] for x in a) def importXML(importXLSX,pars,xmlRoot,dryRun=True): #def importData(pars,filename,getId=getId,modify=modify,convertLookup=convertLookup,dryRun=True,debug=True): #master routine that imports data based on pars, #applies user supplied functions modify, convertLookup and get Id and #updates relevant database #some useful fields from pars (d is for default value) # - seqNumOffset specify visit/sequenceNum offset (number, d:0 will result in 1) # - XPath - xml path to the element to take data from (helpful if multiple elements are present in xml and identical data is sought) # - additionalKeyColumn - name of the variable/column used for separating data entries (on top of ParticipantId and SequenceNum, helpful in the same cases as XPath) # - presetVariables - set some of the row variables to this values (same cases as XPath) # - project - labkey project # - schema - labkey schema (list/study, d: study) # - query - labkey query #set this is as sequenceNum for entries, or initial seqNum if more than a single entry is in the dataset seqNumOffset=pars.get('seqNumOffset',0) xPath=pars.get('XPath','') keyColumn=pars.get('additionalKeyColumn') presetValues=pars.get('presetValues',{}) aliasReplace=pars.get('aliasReplace',{}) allowMultiple=pars.get('allowMultiple',"False") #convert to boolean allowMultiple=allowMultiple=="True" or allowMultiple=="true" fields=importXLSX.getFields(pars) lookupVars=importXLSX.getVariables(fields,fieldType='LOOKUP') dateVars=importXLSX.getVariables(fields,fieldType='DATE') doubleVars=importXLSX.getVariables(fields,fieldType='DOUBLE') #convert dates to list dateVars=list(dateVars.keys()) print(f'dateVars: {dateVars}') lookupMap={f:importXLSX.getLookupMap(pars,fields,f) for f in lookupVars} alias=importXLSX.invertMap(importXLSX.getAlias(fields)) alias=updateAliases(alias,aliasReplace) print(f'aliases: {alias}') row={} #patient id can be either set in pars (takes precedence) or from xml record pid=pars.get('id',getID(xmlRoot)) row={'ParticipantId':pid,'SequenceNum':seqNumOffset+1} row.update(presetValues) rows=[] vals={} for f in fields: try: x=getVal(xmlRoot,alias[f],xPath) if not x: continue if not allowMultiple: x=x[0:1] n=len(x) print('{}[{}]: {}'.format(f,n,x)) vals[f]=x except KeyError: print(f'Alias for field {f} not found') continue if not matchingLengths(vals): print('Mismatch lenghts of variables {}'.format({v:len(vals[v]) for v in vals})) return n=[len(vals[v]) for v in vals][0] for i in range(n): r={x:row[x] for x in row} r.update({v:vals[v][i] for v in vals}) if n>1: r[keyColumn]='{}_{}'.format(r[keyColumn],i+1) rows.append(r) #print(rows) importXLSX.loadSafely(pars,rows,keyColumn,dryRun) def main(parameterFile): with open(parameterFile) as f: pars=json.load(f) print(pars) fhome=os.path.expanduser('~') with open(os.path.join(fhome,".labkey","setup.json")) as f: setup=json.load(f) importXLSX=getImporter(setup) #needs server db=importXLSX.getDB(pars) db.net.getCSRF() keyFilter={'variable':'Key','value':'{key}'.format(**pars),'oper':'eq'} #needs project ds=db.selectRows(pars['project'],'lists','importXML',[keyFilter]) r=ds['rows'][0] xmlFile=os.path.join(fhome,'temp','DCIS','data.xml') url=db.net.connectionConfig['host']+r['_labkeyurl_fileUpload'] fb=getFileBrowser(db) fb.readFileToFile(url,xmlFile) xmlRoot=getXMLRoot(xmlFile) #needs project setupRows=readSetup(importXLSX,pars) #setupRows=setupRows[17:18] for s in setupRows: #needs project and schema for queries importXML(importXLSX,pars|s,xmlRoot,dryRun=False) #make calling importXML updates r['status']=1 r['ID']=getID(xmlRoot) del r['fileUpload'] db.modifyRows('update',pars['project'],'lists','importXML',[r]) if __name__ == "__main__" : main(sys.argv[1])