import os import sys import importlib import json import lxml.etree #need labkey interface's importXLS def getImporter(setup): sys.path.append(setup['paths']['nixWrapper']) import nixWrapper nixWrapper.loadLibrary('labkeyInterface') import importXLSX importlib.reload(importXLSX) return importXLSX def getFileBrowser(db): import nixWrapper nixWrapper.loadLibrary('labkeyInterface') import labkeyFileBrowser return labkeyFileBrowser.labkeyFileBrowser(db.net) def getVal(xmlRoot,aliasVal,xPath=''): q=aliasVal.split(':') xName=q[0] if len(xPath)>0: if len(xName)>0: elementPath='/'.join([xPath,xName]) else: elementPath=xPath else: elementPath=xName attributeName=q[1] try: return xmlRoot.find(elementPath).get(attributeName) except AttributeError: return None def updateTxt(txt,replacePatterns): for x in replacePatterns: txt=txt.replace(x,replacePatterns[x]) return txt def updateAliases(aliasValues,aliasReplace): return {a:updateTxt(aliasValues[a],aliasReplace) for a in aliasValues} def parseJSON(x): print(f'Decoding [{x}]') try: return json.loads(x) except TypeError: pass return {} def readSetup(importXLSX,pars): db=importXLSX.getDB(pars) ds=db.selectRows(pars['project'],'lists','importSetup',[]) setupRows=ds['rows'] for r in setupRows: r['aliasReplace']=parseJSON(r['aliasReplace']) r['presetValues']=parseJSON(r['presetValues']) return setupRows def getID(root): return root.find('Patient/PatientID').get('val') def getXMLRoot(xmlFile): return lxml.etree.ElementTree(file=xmlFile).getroot() def importXML(importXLSX,pars,xmlRoot,dryRun=True): #def importData(pars,filename,getId=getId,modify=modify,convertLookup=convertLookup,dryRun=True,debug=True): #master routine that imports data based on pars, #applies user supplied functions modify, convertLookup and get Id and #updates relevant database #some useful fields from pars (d is for default value) # - seqNumOffset specify visit/sequenceNum offset (number, d:0 will result in 1) # - XPath - xml path to the element to take data from (helpful if multiple elements are present in xml and identical data is sought) # - additionalKeyColumn - name of the variable/column used for separating data entries (on top of ParticipantId and SequenceNum, helpful in the same cases as XPath) # - presetVariables - set some of the row variables to this values (same cases as XPath) # - project - labkey project # - schema - labkey schema (list/study, d: study) # - query - labkey query #set this is as sequenceNum for entries, or initial seqNum if more than a single entry is in the dataset seqNumOffset=pars.get('seqNumOffset',0) xPath=pars.get('XPath','') keyColumn=pars.get('additionalKeyColumn') presetValues=pars.get('presetValues',{}) aliasReplace=pars.get('aliasReplace',{}) fields=importXLSX.getFields(pars) lookupVars=importXLSX.getVariables(fields,fieldType='LOOKUP') dateVars=importXLSX.getVariables(fields,fieldType='DATE') doubleVars=importXLSX.getVariables(fields,fieldType='DOUBLE') #convert dates to list dateVars=list(dateVars.keys()) print(f'dateVars: {dateVars}') lookupMap={f:importXLSX.getLookupMap(pars,fields,f) for f in lookupVars} alias=importXLSX.invertMap(importXLSX.getAlias(fields)) alias=updateAliases(alias,aliasReplace) print(f'aliases: {alias}') row={} #patient id can be either set in pars (takes precedence) or from xml record pid=pars.get('id',getID(xmlRoot)) row={'ParticipantId':pid,'SequenceNum':seqNumOffset+1} row.update(presetValues) for f in fields: try: row[f]=getVal(xmlRoot,alias[f],xPath) except KeyError: print(f'Alias for field {f} not found') continue print(row) db=importXLSX.getDB(pars) project=pars.get('project','DCIS/Study') schema=pars.get('schema','study') query=pars.get('query','demographics') selVal=['ParticipantId','SequenceNum'] if keyColumn: selVal.append(keyColumn) qFilter=[{'variable':v,'value':'{}'.format(row[v]),'oper':'eq'} for v in selVal] ds=db.selectRows(project,schema,query,qFilter) if len(ds['rows'])>0: r=ds['rows'][0] r.update(row) print(f'Updating entry') if not dryRun: importXLSX.printErr(db.modifyRows('update',project,schema,query,[r])) return r print(f'Inserting entry') if not dryRun: importXLSX.printErr(db.modifyRows('insert',project,schema,query,[row])) def main(parameterFile): with open(parameterFile) as f: pars=json.load(f) print(pars) fhome=os.path.expanduser('~') with open(os.path.join(fhome,".labkey","setup.json")) as f: setup=json.load(f) importXLSX=getImporter(setup) #needs server db=importXLSX.getDB(pars) db.net.getCSRF() keyFilter={'variable':'Key','value':'{key}'.format(**pars),'oper':'eq'} #needs project ds=db.selectRows(pars['project'],'lists','importXML',[keyFilter]) r=ds['rows'][0] xmlFile=os.path.join(fhome,'temp','DCIS','data.xml') url=db.net.connectionConfig['host']+r['_labkeyurl_fileUpload'] fb=getFileBrowser(db) fb.readFileToFile(url,xmlFile) xmlRoot=getXMLRoot(xmlFile) #needs project setupRows=readSetup(importXLSX,pars) #setupRows=setupRows[17:18] for s in setupRows: #needs project and schema for queries importXML(importXLSX,pars|s,xmlRoot,dryRun=False) #make calling importXML updates r['status']=1 r['ID']=getID(xmlRoot) del r['fileUpload'] db.modifyRows('update',pars['project'],'lists','importXML',[r]) if __name__ == "__main__" : main(sys.argv[1])