importXML.py 5.4 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173
  1. import os
  2. import sys
  3. import importlib
  4. import json
  5. import lxml.etree
  6. #need labkey interface's importXLS
  7. def getImporter(setup):
  8. sys.path.append(setup['paths']['nixWrapper'])
  9. import nixWrapper
  10. nixWrapper.loadLibrary('labkeyInterface')
  11. import importXLSX
  12. importlib.reload(importXLSX)
  13. return importXLSX
  14. def getFileBrowser(db):
  15. import nixWrapper
  16. nixWrapper.loadLibrary('labkeyInterface')
  17. import labkeyFileBrowser
  18. return labkeyFileBrowser.labkeyFileBrowser(db.net)
  19. def getVal(xmlRoot,aliasVal,xPath='',aliasReplace={}):
  20. q=aliasVal.split(':')
  21. xName=q[0]
  22. for x in aliasReplace:
  23. xName=xName.replace(x,aliasReplace[x])
  24. if len(xPath)>0:
  25. if len(xName)>0:
  26. elementPath='/'.join([xPath,xName])
  27. else:
  28. elementPath=xPath
  29. else:
  30. elementPath=xName
  31. attributeName=q[1]
  32. try:
  33. return xmlRoot.find(elementPath).get(attributeName)
  34. except AttributeError:
  35. return None
  36. def parseJSON(x):
  37. print(f'Decoding [{x}]')
  38. try:
  39. return json.loads(x)
  40. except TypeError:
  41. pass
  42. return {}
  43. def readSetup(importXLSX,pars):
  44. db=importXLSX.getDB(pars)
  45. ds=db.selectRows(pars['project'],'lists','importSetup',[])
  46. setupRows=ds['rows']
  47. for r in setupRows:
  48. r['aliasReplace']=parseJSON(r['aliasReplace'])
  49. r['presetValues']=parseJSON(r['presetValues'])
  50. return setupRows
  51. def importXML(importXLSX,pars,xmlFile,dryRun=True):
  52. #def importData(pars,filename,getId=getId,modify=modify,convertLookup=convertLookup,dryRun=True,debug=True):
  53. #master routine that imports data based on pars,
  54. #applies user supplied functions modify, convertLookup and get Id and
  55. #updates relevant database
  56. #some useful fields from pars (d is for default value)
  57. # - seqNumOffset specify visit/sequenceNum offset (number, d:0 will result in 1)
  58. # - XPath - xml path to the element to take data from (helpful if multiple elements are present in xml and identical data is sought)
  59. # - additionalKeyColumn - name of the variable/column used for separating data entries (on top of ParticipantId and SequenceNum, helpful in the same cases as XPath)
  60. # - presetVariables - set some of the row variables to this values (same cases as XPath)
  61. # - project - labkey project
  62. # - schema - labkey schema (list/study, d: study)
  63. # - query - labkey query
  64. #set this is as sequenceNum for entries, or initial seqNum if more than a single entry is in the dataset
  65. seqNumOffset=pars.get('seqNumOffset',0)
  66. xPath=pars.get('XPath','')
  67. keyColumn=pars.get('additionalKeyColumn')
  68. presetValues=pars.get('presetValues',{})
  69. aliasReplace=pars.get('aliasReplace',{})
  70. fields=importXLSX.getFields(pars)
  71. lookupVars=importXLSX.getVariables(fields,fieldType='LOOKUP')
  72. dateVars=importXLSX.getVariables(fields,fieldType='DATE')
  73. doubleVars=importXLSX.getVariables(fields,fieldType='DOUBLE')
  74. #convert dates to list
  75. dateVars=list(dateVars.keys())
  76. print(f'dateVars: {dateVars}')
  77. lookupMap={f:importXLSX.getLookupMap(pars,fields,f) for f in lookupVars}
  78. alias=importXLSX.invertMap(importXLSX.getAlias(fields))
  79. print(f'aliases: {alias}')
  80. row={}
  81. tree = lxml.etree.ElementTree(file=xmlFile)
  82. print(tree.docinfo.xml_version)
  83. root=tree.getroot()#Element
  84. #patient id can be either set in pars (takes precedence) or from xml record
  85. pid=pars.get('id',root.find('Patient/PatientID').get('val'))
  86. row={'ParticipantId':pid,'SequenceNum':seqNumOffset+1}
  87. row.update(presetValues)
  88. for f in fields:
  89. try:
  90. row[f]=getVal(root,alias[f],xPath,aliasReplace)
  91. except KeyError:
  92. print(f'Alias for field {f} not found')
  93. continue
  94. print(row)
  95. db=importXLSX.getDB(pars)
  96. project=pars.get('project','DCIS/Study')
  97. schema=pars.get('schema','demographics')
  98. query=pars.get('query','demographics')
  99. selVal=['ParticipantId','SequenceNum']
  100. if keyColumn:
  101. selVal.append(keyColumn)
  102. qFilter=[{'variable':v,'value':'{}'.format(row[v]),'oper':'eq'} for v in selVal]
  103. ds=db.selectRows(project,schema,query,qFilter)
  104. if len(ds['rows'])>0:
  105. r=ds['rows'][0]
  106. r.update(row)
  107. print(f'Updating entry')
  108. if not dryRun:
  109. importXLSX.printErr(db.modifyRows('update',project,schema,query,[r]))
  110. return r
  111. print(f'Inserting entry')
  112. if not dryRun:
  113. importXLSX.printErr(db.modifyRows('insert',project,schema,query,[row]))
  114. def main(parameterFile):
  115. with open(parameterFile) as f:
  116. pars=json.load(f)
  117. print(pars)
  118. fhome=os.path.expanduser('~')
  119. with open(os.path.join(fhome,".labkey","setup.json")) as f:
  120. setup=json.load(f)
  121. importXLSX=getImporter(setup)
  122. #needs server
  123. db=importXLSX.getDB(pars)
  124. db.net.getCSRF()
  125. keyFilter={'variable':'Key','value':'{key}'.format(**pars),'oper':'eq'}
  126. #needs project
  127. ds=db.selectRows(pars['project'],'lists','importXML',[keyFilter])
  128. r=ds['rows'][0]
  129. filename=os.path.join(fhome,'temp','DCIS','data.xml')
  130. url=db.net.connectionConfig['host']+r['_labkeyurl_fileUpload']
  131. fb=getFileBrowser(db)
  132. fb.readFileToFile(url,filename)
  133. #needs project
  134. setupRows=readSetup(importXLSX,pars)
  135. setupRows=setupRows[0:1]
  136. for s in setupRows:
  137. #needs project and schema for queries
  138. importXML(importXLSX,pars|s,filename,dryRun=True)
  139. if __name__ == "__main__" :
  140. main(sys.argv[1])