|
@@ -19,22 +19,39 @@ def getFileBrowser(db):
|
|
|
import labkeyFileBrowser
|
|
|
return labkeyFileBrowser.labkeyFileBrowser(db.net)
|
|
|
|
|
|
+def countElements(xmlRoot,xPath):
|
|
|
+ xElUp=xmlRoot
|
|
|
+ if xPath.find('/')>-1:
|
|
|
+ xPathUp=xPath[:xPath.rfind('/')]
|
|
|
+ xElUp=xmlRoot.find(xPathUp)
|
|
|
+ n=len(xElUp)
|
|
|
+ print('Counting {} in {}[{}]'.format(xPath,xElUp.tag,n))
|
|
|
+ i=1
|
|
|
+ while xmlRoot.find(f'{xPath}[{i}]') is not None:
|
|
|
+ i=i+1
|
|
|
+ return i-1
|
|
|
+
|
|
|
+
|
|
|
def getVal(xmlRoot,aliasVal,xPath=''):
|
|
|
- q=aliasVal.split(':')
|
|
|
- xName=q[0]
|
|
|
+ q=aliasVal.split(':')
|
|
|
+ xName=q[0]
|
|
|
|
|
|
- if len(xPath)>0:
|
|
|
- if len(xName)>0:
|
|
|
- elementPath='/'.join([xPath,xName])
|
|
|
- else:
|
|
|
- elementPath=xPath
|
|
|
- else:
|
|
|
- elementPath=xName
|
|
|
- attributeName=q[1]
|
|
|
- try:
|
|
|
- return xmlRoot.find(elementPath).get(attributeName)
|
|
|
- except AttributeError:
|
|
|
- return None
|
|
|
+ if len(xPath)>0:
|
|
|
+ if len(xName)>0:
|
|
|
+ elementPath='/'.join([xPath,xName])
|
|
|
+ else:
|
|
|
+ elementPath=xPath
|
|
|
+ else:
|
|
|
+ elementPath=xName
|
|
|
+
|
|
|
+ attributeName=q[1]
|
|
|
+
|
|
|
+#find number of elements with this path
|
|
|
+ n=countElements(xmlRoot,elementPath)
|
|
|
+ try:
|
|
|
+ return [xmlRoot.find(f'{elementPath}[{i+1}]').get(attributeName) for i in range(n)]
|
|
|
+ except AttributeError:
|
|
|
+ return [None]
|
|
|
|
|
|
def updateTxt(txt,replacePatterns):
|
|
|
for x in replacePatterns:
|
|
@@ -68,6 +85,12 @@ def getID(root):
|
|
|
|
|
|
def getXMLRoot(xmlFile):
|
|
|
return lxml.etree.ElementTree(file=xmlFile).getroot()
|
|
|
+
|
|
|
+def matchingLengths(dictArray):
|
|
|
+ #print(dictArray)
|
|
|
+ n={x:len(dictArray[x]) for x in dictArray}
|
|
|
+ a=list(n.values())
|
|
|
+ return all(x==a[0] for x in a)
|
|
|
|
|
|
def importXML(importXLSX,pars,xmlRoot,dryRun=True):
|
|
|
#def importData(pars,filename,getId=getId,modify=modify,convertLookup=convertLookup,dryRun=True,debug=True):
|
|
@@ -85,62 +108,60 @@ def importXML(importXLSX,pars,xmlRoot,dryRun=True):
|
|
|
# - query - labkey query
|
|
|
|
|
|
#set this is as sequenceNum for entries, or initial seqNum if more than a single entry is in the dataset
|
|
|
- seqNumOffset=pars.get('seqNumOffset',0)
|
|
|
- xPath=pars.get('XPath','')
|
|
|
- keyColumn=pars.get('additionalKeyColumn')
|
|
|
- presetValues=pars.get('presetValues',{})
|
|
|
- aliasReplace=pars.get('aliasReplace',{})
|
|
|
+ seqNumOffset=pars.get('seqNumOffset',0)
|
|
|
+ xPath=pars.get('XPath','')
|
|
|
+ keyColumn=pars.get('additionalKeyColumn')
|
|
|
+ presetValues=pars.get('presetValues',{})
|
|
|
+ aliasReplace=pars.get('aliasReplace',{})
|
|
|
+ allowMultiple=pars.get('allowMultiple',"False")
|
|
|
+#convert to boolean
|
|
|
+ allowMultiple=allowMultiple=="True" or allowMultiple=="true"
|
|
|
+
|
|
|
|
|
|
- fields=importXLSX.getFields(pars)
|
|
|
- lookupVars=importXLSX.getVariables(fields,fieldType='LOOKUP')
|
|
|
- dateVars=importXLSX.getVariables(fields,fieldType='DATE')
|
|
|
- doubleVars=importXLSX.getVariables(fields,fieldType='DOUBLE')
|
|
|
+ fields=importXLSX.getFields(pars)
|
|
|
+ lookupVars=importXLSX.getVariables(fields,fieldType='LOOKUP')
|
|
|
+ dateVars=importXLSX.getVariables(fields,fieldType='DATE')
|
|
|
+ doubleVars=importXLSX.getVariables(fields,fieldType='DOUBLE')
|
|
|
|
|
|
#convert dates to list
|
|
|
- dateVars=list(dateVars.keys())
|
|
|
- print(f'dateVars: {dateVars}')
|
|
|
- lookupMap={f:importXLSX.getLookupMap(pars,fields,f) for f in lookupVars}
|
|
|
- alias=importXLSX.invertMap(importXLSX.getAlias(fields))
|
|
|
- alias=updateAliases(alias,aliasReplace)
|
|
|
- print(f'aliases: {alias}')
|
|
|
- row={}
|
|
|
-
|
|
|
- #patient id can be either set in pars (takes precedence) or from xml record
|
|
|
- pid=pars.get('id',getID(xmlRoot))
|
|
|
+ dateVars=list(dateVars.keys())
|
|
|
+ print(f'dateVars: {dateVars}')
|
|
|
+ lookupMap={f:importXLSX.getLookupMap(pars,fields,f) for f in lookupVars}
|
|
|
+ alias=importXLSX.invertMap(importXLSX.getAlias(fields))
|
|
|
+ alias=updateAliases(alias,aliasReplace)
|
|
|
+ print(f'aliases: {alias}')
|
|
|
+ row={}
|
|
|
|
|
|
- row={'ParticipantId':pid,'SequenceNum':seqNumOffset+1}
|
|
|
- row.update(presetValues)
|
|
|
-
|
|
|
- for f in fields:
|
|
|
- try:
|
|
|
- row[f]=getVal(xmlRoot,alias[f],xPath)
|
|
|
- except KeyError:
|
|
|
- print(f'Alias for field {f} not found')
|
|
|
- continue
|
|
|
- print(row)
|
|
|
- db=importXLSX.getDB(pars)
|
|
|
- project=pars.get('project','DCIS/Study')
|
|
|
- schema=pars.get('schema','study')
|
|
|
- query=pars.get('query','demographics')
|
|
|
-
|
|
|
- selVal=['ParticipantId','SequenceNum']
|
|
|
- if keyColumn:
|
|
|
- selVal.append(keyColumn)
|
|
|
- qFilter=[{'variable':v,'value':'{}'.format(row[v]),'oper':'eq'} for v in selVal]
|
|
|
-
|
|
|
- ds=db.selectRows(project,schema,query,qFilter)
|
|
|
- if len(ds['rows'])>0:
|
|
|
- r=ds['rows'][0]
|
|
|
- r.update(row)
|
|
|
- print(f'Updating entry')
|
|
|
- if not dryRun:
|
|
|
- importXLSX.printErr(db.modifyRows('update',project,schema,query,[r]))
|
|
|
- return r
|
|
|
- print(f'Inserting entry')
|
|
|
- if not dryRun:
|
|
|
- importXLSX.printErr(db.modifyRows('insert',project,schema,query,[row]))
|
|
|
-
|
|
|
-
|
|
|
+#patient id can be either set in pars (takes precedence) or from xml record
|
|
|
+ pid=pars.get('id',getID(xmlRoot))
|
|
|
+
|
|
|
+ row={'ParticipantId':pid,'SequenceNum':seqNumOffset+1}
|
|
|
+ row.update(presetValues)
|
|
|
+
|
|
|
+ rows=[]
|
|
|
+ vals={}
|
|
|
+ for f in fields:
|
|
|
+ try:
|
|
|
+ vals[f]=getVal(xmlRoot,alias[f],xPath)
|
|
|
+ if not allowMultiple:
|
|
|
+ vals[f]=vals[f][0:1]
|
|
|
+ except KeyError:
|
|
|
+ print(f'Alias for field {f} not found')
|
|
|
+ continue
|
|
|
+ if not matchingLengths(vals):
|
|
|
+ print('Mismatch lenghts of variables {}'.format({v:len(vals[v]) for v in vals}))
|
|
|
+ return
|
|
|
+ n=[len(vals[v]) for v in vals][0]
|
|
|
+ for i in range(n):
|
|
|
+ r={x:row[x] for x in row}
|
|
|
+ r.update({v:vals[v][i] for v in vals})
|
|
|
+ if n>1:
|
|
|
+ r[keyColumn]='{}_{}'.format(r[keyColumn],i+1)
|
|
|
+ rows.append(r)
|
|
|
+
|
|
|
+ #print(rows)
|
|
|
+ importXLSX.loadSafely(pars,rows,keyColumn,dryRun)
|
|
|
+
|
|
|
|
|
|
|
|
|
def main(parameterFile):
|