|
@@ -0,0 +1,162 @@
|
|
|
+import os
|
|
|
+import sys
|
|
|
+import importlib
|
|
|
+import json
|
|
|
+import lxml.etree
|
|
|
+
|
|
|
+#need labkey interface's importXLS
|
|
|
+def getImporter(setup):
|
|
|
+ sys.path.append(setup['paths']['nixWrapper'])
|
|
|
+ import nixWrapper
|
|
|
+ nixWrapper.loadLibrary('labkeyInterface')
|
|
|
+ import importXLSX
|
|
|
+ importlib.reload(importXLSX)
|
|
|
+ return importXLSX
|
|
|
+
|
|
|
+def getVal(xmlRoot,aliasVal,xPath='',aliasReplace={}):
|
|
|
+ q=aliasVal.split(':')
|
|
|
+ xName=q[0]
|
|
|
+
|
|
|
+ for x in aliasReplace:
|
|
|
+ xName=xName.replace(x,aliasReplace[x])
|
|
|
+ if len(xPath)>0:
|
|
|
+ if len(xName)>0:
|
|
|
+ elementPath='/'.join([xPath,xName])
|
|
|
+ else:
|
|
|
+ elementPath=xPath
|
|
|
+ else:
|
|
|
+ elementPath=xName
|
|
|
+ attributeName=q[1]
|
|
|
+ try:
|
|
|
+ return xmlRoot.find(elementPath).get(attributeName)
|
|
|
+ except AttributeError:
|
|
|
+ return None
|
|
|
+
|
|
|
+def parseJSON(x):
|
|
|
+ print(f'Decoding [{x}]')
|
|
|
+ try:
|
|
|
+ return json.loads(x)
|
|
|
+ except TypeError:
|
|
|
+ pass
|
|
|
+ return {}
|
|
|
+
|
|
|
+def readSetup(importXSLX,pars):
|
|
|
+ db=importXLSX.getDB(pars)
|
|
|
+ ds=db.selectRows(pars['project'],'lists','importSetup',[])
|
|
|
+ setupRows=ds['rows']
|
|
|
+ for r in setupRows:
|
|
|
+
|
|
|
+ r['aliasReplace']=parseJSON(r['aliasReplace'])
|
|
|
+ r['presetValues']=parseJSON(r['presetValues'])
|
|
|
+ return setupRows
|
|
|
+
|
|
|
+
|
|
|
+
|
|
|
+
|
|
|
+def importXML(importXLSX,pars,xmlFile,dryRun=True):
|
|
|
+ #def importData(pars,filename,getId=getId,modify=modify,convertLookup=convertLookup,dryRun=True,debug=True):
|
|
|
+#master routine that imports data based on pars,
|
|
|
+#applies user supplied functions modify, convertLookup and get Id and
|
|
|
+#updates relevant database
|
|
|
+
|
|
|
+#some useful fields from pars (d is for default value)
|
|
|
+# - seqNumOffset specify visit/sequenceNum offset (number, d:0 will result in 1)
|
|
|
+# - XPath - xml path to the element to take data from (helpful if multiple elements are present in xml and identical data is sought)
|
|
|
+# - additionalKeyColumn - name of the variable/column used for separating data entries (on top of ParticipantId and SequenceNum, helpful in the same cases as XPath)
|
|
|
+# - presetVariables - set some of the row variables to this values (same cases as XPath)
|
|
|
+# - project - labkey project
|
|
|
+# - schema - labkey schema (list/study, d: study)
|
|
|
+# - query - labkey query
|
|
|
+
|
|
|
+ #set this is as sequenceNum for entries, or initial seqNum if more than a single entry is in the dataset
|
|
|
+ seqNumOffset=pars.get('seqNumOffset',0)
|
|
|
+ xPath=pars.get('XPath','')
|
|
|
+ keyColumn=pars.get('additionalKeyColumn')
|
|
|
+ presetValues=pars.get('presetValues',{})
|
|
|
+ aliasReplace=pars.get('aliasReplace',{})
|
|
|
+
|
|
|
+ fields=importXLSX.getFields(pars)
|
|
|
+ lookupVars=importXLSX.getVariables(fields,fieldType='LOOKUP')
|
|
|
+ dateVars=importXLSX.getVariables(fields,fieldType='DATE')
|
|
|
+ doubleVars=importXLSX.getVariables(fields,fieldType='DOUBLE')
|
|
|
+
|
|
|
+#convert dates to list
|
|
|
+ dateVars=list(dateVars.keys())
|
|
|
+ print(f'dateVars: {dateVars}')
|
|
|
+ lookupMap={f:importXLSX.getLookupMap(pars,fields,f) for f in lookupVars}
|
|
|
+ alias=importXLSX.invertMap(importXLSX.getAlias(fields))
|
|
|
+ print(f'aliases: {alias}')
|
|
|
+ row={}
|
|
|
+
|
|
|
+ tree = lxml.etree.ElementTree(file=filename)
|
|
|
+ print(tree.docinfo.xml_version)
|
|
|
+ root=tree.getroot()#Element
|
|
|
+ #patient id can be either set in pars (takes precedence) or from xml record
|
|
|
+ pid=pars.get('id',root.find('Patient/PatientID').get('val'))
|
|
|
+
|
|
|
+ row={'ParticipantId':pid,'SequenceNum':seqNumOffset+1}
|
|
|
+ row.update(presetValues)
|
|
|
+
|
|
|
+ for f in fields:
|
|
|
+ try:
|
|
|
+ row[f]=getVal(root,alias[f],xPath,aliasReplace)
|
|
|
+ except KeyError:
|
|
|
+ print(f'Alias for field {f} not found')
|
|
|
+ continue
|
|
|
+ print(row)
|
|
|
+ db=importXLSX.getDB(pars)
|
|
|
+ project=pars.get('project','DCIS/Study')
|
|
|
+ schema=pars.get('schema','demographics')
|
|
|
+ query=pars.get('query','demographics')
|
|
|
+
|
|
|
+ selVal=['ParticipantId','SequenceNum']
|
|
|
+ if keyColumn:
|
|
|
+ selVal.append(keyColumn)
|
|
|
+ qFilter=[{'variable':v,'value':'{}'.format(row[v]),'oper':'eq'} for v in selVal]
|
|
|
+
|
|
|
+ ds=db.selectRows(project,schema,query,qFilter)
|
|
|
+ if len(ds['rows'])>0:
|
|
|
+ r=ds['rows'][0]
|
|
|
+ r.update(row)
|
|
|
+ print(f'Updating entry')
|
|
|
+ if not dryRun:
|
|
|
+ importXLSX.printErr(db.modifyRows('update',project,schema,query,[r]))
|
|
|
+ return r
|
|
|
+ print(f'Inserting entry')
|
|
|
+ if not dryRun:
|
|
|
+ importXLSX.printErr(db.modifyRows('insert',project,schema,query,[row]))
|
|
|
+
|
|
|
+
|
|
|
+
|
|
|
+
|
|
|
+def main(parameterFile):
|
|
|
+
|
|
|
+ with open(parameterFile) as f:
|
|
|
+ pars=json.load(f)
|
|
|
+
|
|
|
+ fhome=os.path.expanduser('~')
|
|
|
+ with open(os.path.join(fhome,".labkey","setup.json")) as f:
|
|
|
+ setup=json.load(f)
|
|
|
+
|
|
|
+ importXLSX=getImporter(setup)
|
|
|
+
|
|
|
+ #needs server
|
|
|
+ db=importXLSX.getDB(pars)
|
|
|
+ keyFilter={'variable':'Key','value':pars['key'],'oper':'eq'}
|
|
|
+#needs project
|
|
|
+ ds=db.selectRows(pars['project'],'lists','importXML',[keyFilter])
|
|
|
+ r=ds['rows'][0]
|
|
|
+ filename=os.path.join(fhome,'temp','data.xml')
|
|
|
+ url=db.net.connectionConfig['host']+r['_labkeyurl_fileUpload']
|
|
|
+ fb.readFileToFile(url,filename)
|
|
|
+ #needs project
|
|
|
+ setupRows=readSetup(importXLSX,pars)
|
|
|
+ setupRows=setupRows[0:1]
|
|
|
+ for s in setupRows:
|
|
|
+#needs project and schema for queries
|
|
|
+ importXML(importXLSX,pars|s,filename,dryRun=True)
|
|
|
+
|
|
|
+
|
|
|
+
|
|
|
+if __name__ == "__main__" :
|
|
|
+ main(sys.argv[1])
|