Andrej 3 недель назад
Родитель
Сommit
86a33d99bf
1 измененных файлов с 162 добавлено и 0 удалено
  1. 162 0
      pythonScripts/importXML.py

+ 162 - 0
pythonScripts/importXML.py

@@ -0,0 +1,162 @@
+import os
+import sys
+import importlib
+import json
+import lxml.etree
+
+#need labkey interface's importXLS
+def getImporter(setup):
+    sys.path.append(setup['paths']['nixWrapper'])
+    import nixWrapper
+    nixWrapper.loadLibrary('labkeyInterface')
+    import importXLSX
+    importlib.reload(importXLSX)
+    return importXLSX
+
+def getVal(xmlRoot,aliasVal,xPath='',aliasReplace={}):
+    q=aliasVal.split(':')
+    xName=q[0]
+    
+    for x in aliasReplace:
+        xName=xName.replace(x,aliasReplace[x])
+    if len(xPath)>0:
+        if len(xName)>0:
+            elementPath='/'.join([xPath,xName])
+        else:
+            elementPath=xPath
+    else:
+        elementPath=xName
+    attributeName=q[1]
+    try:
+        return xmlRoot.find(elementPath).get(attributeName)
+    except AttributeError:
+        return None
+    
+def parseJSON(x):
+    print(f'Decoding [{x}]')
+    try:
+        return json.loads(x)
+    except TypeError:
+        pass
+    return {}
+                
+def readSetup(importXSLX,pars):
+    db=importXLSX.getDB(pars)
+    ds=db.selectRows(pars['project'],'lists','importSetup',[])
+    setupRows=ds['rows']
+    for r in setupRows:
+        
+        r['aliasReplace']=parseJSON(r['aliasReplace'])
+        r['presetValues']=parseJSON(r['presetValues'])
+    return setupRows
+        
+    
+        
+    
+def importXML(importXLSX,pars,xmlFile,dryRun=True):
+    #def importData(pars,filename,getId=getId,modify=modify,convertLookup=convertLookup,dryRun=True,debug=True):
+#master routine that imports data based on pars, 
+#applies user supplied functions modify, convertLookup and get Id and 
+#updates relevant database
+
+#some useful fields from pars (d is for default value)
+# - seqNumOffset specify visit/sequenceNum offset (number, d:0 will result in 1)
+# - XPath - xml path to the element to take data from (helpful if multiple elements are present in xml and identical data is sought)
+# - additionalKeyColumn - name of the variable/column used for separating data entries (on top of ParticipantId and SequenceNum, helpful in the same cases as XPath)
+# - presetVariables - set some of the row variables to this values (same cases as XPath)
+# - project - labkey project
+# - schema - labkey schema (list/study, d: study)
+# - query - labkey query
+
+   #set this is as sequenceNum for entries, or initial seqNum if more than a single entry is in the dataset
+    seqNumOffset=pars.get('seqNumOffset',0)
+    xPath=pars.get('XPath','')
+    keyColumn=pars.get('additionalKeyColumn')
+    presetValues=pars.get('presetValues',{})
+    aliasReplace=pars.get('aliasReplace',{})
+    
+    fields=importXLSX.getFields(pars)
+    lookupVars=importXLSX.getVariables(fields,fieldType='LOOKUP')
+    dateVars=importXLSX.getVariables(fields,fieldType='DATE')
+    doubleVars=importXLSX.getVariables(fields,fieldType='DOUBLE')
+
+#convert dates to list
+    dateVars=list(dateVars.keys())
+    print(f'dateVars: {dateVars}')
+    lookupMap={f:importXLSX.getLookupMap(pars,fields,f) for f in lookupVars}
+    alias=importXLSX.invertMap(importXLSX.getAlias(fields))
+    print(f'aliases: {alias}')
+    row={}
+    
+    tree = lxml.etree.ElementTree(file=filename)
+    print(tree.docinfo.xml_version)
+    root=tree.getroot()#Element
+    #patient id can be either set in pars (takes precedence) or from xml record
+    pid=pars.get('id',root.find('Patient/PatientID').get('val'))
+    
+    row={'ParticipantId':pid,'SequenceNum':seqNumOffset+1}
+    row.update(presetValues)
+    
+    for f in fields:
+        try:
+            row[f]=getVal(root,alias[f],xPath,aliasReplace)
+        except KeyError:
+            print(f'Alias for field {f} not found')
+            continue
+    print(row)
+    db=importXLSX.getDB(pars)
+    project=pars.get('project','DCIS/Study')
+    schema=pars.get('schema','demographics')
+    query=pars.get('query','demographics')
+
+    selVal=['ParticipantId','SequenceNum']
+    if keyColumn:
+        selVal.append(keyColumn)
+    qFilter=[{'variable':v,'value':'{}'.format(row[v]),'oper':'eq'} for v in selVal]
+
+    ds=db.selectRows(project,schema,query,qFilter)
+    if len(ds['rows'])>0:
+        r=ds['rows'][0]
+        r.update(row)
+        print(f'Updating entry')
+        if not dryRun:
+            importXLSX.printErr(db.modifyRows('update',project,schema,query,[r]))
+        return r
+    print(f'Inserting entry')
+    if not dryRun:
+        importXLSX.printErr(db.modifyRows('insert',project,schema,query,[row]))
+    
+        
+
+
+def main(parameterFile):
+
+   with open(parameterFile) as f:
+      pars=json.load(f)
+
+   fhome=os.path.expanduser('~')
+   with open(os.path.join(fhome,".labkey","setup.json")) as f:
+     setup=json.load(f)
+
+   importXLSX=getImporter(setup)
+
+   #needs server
+   db=importXLSX.getDB(pars)
+   keyFilter={'variable':'Key','value':pars['key'],'oper':'eq'}
+#needs project
+   ds=db.selectRows(pars['project'],'lists','importXML',[keyFilter])
+   r=ds['rows'][0]
+   filename=os.path.join(fhome,'temp','data.xml')
+   url=db.net.connectionConfig['host']+r['_labkeyurl_fileUpload']
+   fb.readFileToFile(url,filename)
+   #needs project
+   setupRows=readSetup(importXLSX,pars)
+   setupRows=setupRows[0:1]
+   for s in setupRows:
+#needs project and schema for queries 
+      importXML(importXLSX,pars|s,filename,dryRun=True)
+
+
+
+if __name__ == "__main__" :
+   main(sys.argv[1])