瀏覽代碼

Added support for multiple entries

Andrej 3 周之前
父節點
當前提交
033f4c9feb
共有 1 個文件被更改,包括 87 次插入66 次删除
  1. 87 66
      pythonScripts/importXML.py

+ 87 - 66
pythonScripts/importXML.py

@@ -19,22 +19,39 @@ def getFileBrowser(db):
    import labkeyFileBrowser
    return labkeyFileBrowser.labkeyFileBrowser(db.net)
 
+def countElements(xmlRoot,xPath):
+   xElUp=xmlRoot
+   if xPath.find('/')>-1:
+      xPathUp=xPath[:xPath.rfind('/')]
+      xElUp=xmlRoot.find(xPathUp)
+   n=len(xElUp)
+   print('Counting {} in {}[{}]'.format(xPath,xElUp.tag,n))
+   i=1
+   while xmlRoot.find(f'{xPath}[{i}]') is not None:
+      i=i+1
+   return i-1
+
+
 def getVal(xmlRoot,aliasVal,xPath=''):
-    q=aliasVal.split(':')
-    xName=q[0]
+   q=aliasVal.split(':')
+   xName=q[0]
     
-    if len(xPath)>0:
-        if len(xName)>0:
-            elementPath='/'.join([xPath,xName])
-        else:
-            elementPath=xPath
-    else:
-        elementPath=xName
-    attributeName=q[1]
-    try:
-        return xmlRoot.find(elementPath).get(attributeName)
-    except AttributeError:
-        return None
+   if len(xPath)>0:
+      if len(xName)>0:
+         elementPath='/'.join([xPath,xName])
+      else:
+         elementPath=xPath
+   else:
+      elementPath=xName
+
+   attributeName=q[1]
+
+#find number of elements with this path
+   n=countElements(xmlRoot,elementPath) 
+   try:
+      return [xmlRoot.find(f'{elementPath}[{i+1}]').get(attributeName) for i in range(n)]
+   except AttributeError:
+      return [None]
 
 def updateTxt(txt,replacePatterns):
    for x in replacePatterns:
@@ -68,6 +85,12 @@ def getID(root):
 
 def getXMLRoot(xmlFile):
    return lxml.etree.ElementTree(file=xmlFile).getroot()
+
+def matchingLengths(dictArray):
+   #print(dictArray)
+   n={x:len(dictArray[x]) for x in dictArray}
+   a=list(n.values())
+   return all(x==a[0] for x in a)
     
 def importXML(importXLSX,pars,xmlRoot,dryRun=True):
     #def importData(pars,filename,getId=getId,modify=modify,convertLookup=convertLookup,dryRun=True,debug=True):
@@ -85,62 +108,60 @@ def importXML(importXLSX,pars,xmlRoot,dryRun=True):
 # - query - labkey query
 
    #set this is as sequenceNum for entries, or initial seqNum if more than a single entry is in the dataset
-    seqNumOffset=pars.get('seqNumOffset',0)
-    xPath=pars.get('XPath','')
-    keyColumn=pars.get('additionalKeyColumn')
-    presetValues=pars.get('presetValues',{})
-    aliasReplace=pars.get('aliasReplace',{})
+   seqNumOffset=pars.get('seqNumOffset',0)
+   xPath=pars.get('XPath','')
+   keyColumn=pars.get('additionalKeyColumn')
+   presetValues=pars.get('presetValues',{})
+   aliasReplace=pars.get('aliasReplace',{})
+   allowMultiple=pars.get('allowMultiple',"False")
+#convert to boolean
+   allowMultiple=allowMultiple=="True" or allowMultiple=="true"
+   
     
-    fields=importXLSX.getFields(pars)
-    lookupVars=importXLSX.getVariables(fields,fieldType='LOOKUP')
-    dateVars=importXLSX.getVariables(fields,fieldType='DATE')
-    doubleVars=importXLSX.getVariables(fields,fieldType='DOUBLE')
+   fields=importXLSX.getFields(pars)
+   lookupVars=importXLSX.getVariables(fields,fieldType='LOOKUP')
+   dateVars=importXLSX.getVariables(fields,fieldType='DATE')
+   doubleVars=importXLSX.getVariables(fields,fieldType='DOUBLE')
 
 #convert dates to list
-    dateVars=list(dateVars.keys())
-    print(f'dateVars: {dateVars}')
-    lookupMap={f:importXLSX.getLookupMap(pars,fields,f) for f in lookupVars}
-    alias=importXLSX.invertMap(importXLSX.getAlias(fields))
-    alias=updateAliases(alias,aliasReplace)
-    print(f'aliases: {alias}')
-    row={}
-    
-    #patient id can be either set in pars (takes precedence) or from xml record
-    pid=pars.get('id',getID(xmlRoot))
+   dateVars=list(dateVars.keys())
+   print(f'dateVars: {dateVars}')
+   lookupMap={f:importXLSX.getLookupMap(pars,fields,f) for f in lookupVars}
+   alias=importXLSX.invertMap(importXLSX.getAlias(fields))
+   alias=updateAliases(alias,aliasReplace)
+   print(f'aliases: {alias}')
+   row={}
     
-    row={'ParticipantId':pid,'SequenceNum':seqNumOffset+1}
-    row.update(presetValues)
-    
-    for f in fields:
-        try:
-            row[f]=getVal(xmlRoot,alias[f],xPath)
-        except KeyError:
-            print(f'Alias for field {f} not found')
-            continue
-    print(row)
-    db=importXLSX.getDB(pars)
-    project=pars.get('project','DCIS/Study')
-    schema=pars.get('schema','study')
-    query=pars.get('query','demographics')
-
-    selVal=['ParticipantId','SequenceNum']
-    if keyColumn:
-        selVal.append(keyColumn)
-    qFilter=[{'variable':v,'value':'{}'.format(row[v]),'oper':'eq'} for v in selVal]
-
-    ds=db.selectRows(project,schema,query,qFilter)
-    if len(ds['rows'])>0:
-        r=ds['rows'][0]
-        r.update(row)
-        print(f'Updating entry')
-        if not dryRun:
-            importXLSX.printErr(db.modifyRows('update',project,schema,query,[r]))
-        return r
-    print(f'Inserting entry')
-    if not dryRun:
-        importXLSX.printErr(db.modifyRows('insert',project,schema,query,[row]))
-    
-        
+#patient id can be either set in pars (takes precedence) or from xml record
+   pid=pars.get('id',getID(xmlRoot))
+
+   row={'ParticipantId':pid,'SequenceNum':seqNumOffset+1}
+   row.update(presetValues)
+
+   rows=[]
+   vals={}
+   for f in fields:
+      try:
+         vals[f]=getVal(xmlRoot,alias[f],xPath)
+         if not allowMultiple:
+            vals[f]=vals[f][0:1]
+      except KeyError:
+         print(f'Alias for field {f} not found')
+         continue
+   if not matchingLengths(vals):
+      print('Mismatch lenghts of variables {}'.format({v:len(vals[v]) for v in vals}))
+      return
+   n=[len(vals[v]) for v in vals][0]
+   for i in range(n):
+      r={x:row[x] for x in row}
+      r.update({v:vals[v][i] for v in vals})
+      if n>1:
+         r[keyColumn]='{}_{}'.format(r[keyColumn],i+1)
+      rows.append(r)
+
+   #print(rows)
+   importXLSX.loadSafely(pars,rows,keyColumn,dryRun)
+
 
 
 def main(parameterFile):