importXML.py 5.8 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187
  1. import os
  2. import sys
  3. import importlib
  4. import json
  5. import lxml.etree
  6. #need labkey interface's importXLS
  7. def getImporter(setup):
  8. sys.path.append(setup['paths']['nixWrapper'])
  9. import nixWrapper
  10. nixWrapper.loadLibrary('labkeyInterface')
  11. import importXLSX
  12. importlib.reload(importXLSX)
  13. return importXLSX
  14. def getFileBrowser(db):
  15. import nixWrapper
  16. nixWrapper.loadLibrary('labkeyInterface')
  17. import labkeyFileBrowser
  18. return labkeyFileBrowser.labkeyFileBrowser(db.net)
  19. def getVal(xmlRoot,aliasVal,xPath=''):
  20. q=aliasVal.split(':')
  21. xName=q[0]
  22. if len(xPath)>0:
  23. if len(xName)>0:
  24. elementPath='/'.join([xPath,xName])
  25. else:
  26. elementPath=xPath
  27. else:
  28. elementPath=xName
  29. attributeName=q[1]
  30. try:
  31. return xmlRoot.find(elementPath).get(attributeName)
  32. except AttributeError:
  33. return None
  34. def updateTxt(txt,replacePatterns):
  35. for x in replacePatterns:
  36. txt=txt.replace(x,replacePatterns[x])
  37. return txt
  38. def updateAliases(aliasValues,aliasReplace):
  39. return {a:updateTxt(aliasValues[a],aliasReplace) for a in aliasValues}
  40. def parseJSON(x):
  41. print(f'Decoding [{x}]')
  42. try:
  43. return json.loads(x)
  44. except TypeError:
  45. pass
  46. return {}
  47. def readSetup(importXLSX,pars):
  48. db=importXLSX.getDB(pars)
  49. ds=db.selectRows(pars['project'],'lists','importSetup',[])
  50. setupRows=ds['rows']
  51. for r in setupRows:
  52. r['aliasReplace']=parseJSON(r['aliasReplace'])
  53. r['presetValues']=parseJSON(r['presetValues'])
  54. return setupRows
  55. def getID(root):
  56. return root.find('Patient/PatientID').get('val')
  57. def getXMLRoot(xmlFile):
  58. return lxml.etree.ElementTree(file=xmlFile).getroot()
  59. def importXML(importXLSX,pars,xmlRoot,dryRun=True):
  60. #def importData(pars,filename,getId=getId,modify=modify,convertLookup=convertLookup,dryRun=True,debug=True):
  61. #master routine that imports data based on pars,
  62. #applies user supplied functions modify, convertLookup and get Id and
  63. #updates relevant database
  64. #some useful fields from pars (d is for default value)
  65. # - seqNumOffset specify visit/sequenceNum offset (number, d:0 will result in 1)
  66. # - XPath - xml path to the element to take data from (helpful if multiple elements are present in xml and identical data is sought)
  67. # - additionalKeyColumn - name of the variable/column used for separating data entries (on top of ParticipantId and SequenceNum, helpful in the same cases as XPath)
  68. # - presetVariables - set some of the row variables to this values (same cases as XPath)
  69. # - project - labkey project
  70. # - schema - labkey schema (list/study, d: study)
  71. # - query - labkey query
  72. #set this is as sequenceNum for entries, or initial seqNum if more than a single entry is in the dataset
  73. seqNumOffset=pars.get('seqNumOffset',0)
  74. xPath=pars.get('XPath','')
  75. keyColumn=pars.get('additionalKeyColumn')
  76. presetValues=pars.get('presetValues',{})
  77. aliasReplace=pars.get('aliasReplace',{})
  78. fields=importXLSX.getFields(pars)
  79. lookupVars=importXLSX.getVariables(fields,fieldType='LOOKUP')
  80. dateVars=importXLSX.getVariables(fields,fieldType='DATE')
  81. doubleVars=importXLSX.getVariables(fields,fieldType='DOUBLE')
  82. #convert dates to list
  83. dateVars=list(dateVars.keys())
  84. print(f'dateVars: {dateVars}')
  85. lookupMap={f:importXLSX.getLookupMap(pars,fields,f) for f in lookupVars}
  86. alias=importXLSX.invertMap(importXLSX.getAlias(fields))
  87. alias=updateAliases(alias,aliasReplace)
  88. print(f'aliases: {alias}')
  89. row={}
  90. #patient id can be either set in pars (takes precedence) or from xml record
  91. pid=pars.get('id',getID(xmlRoot))
  92. row={'ParticipantId':pid,'SequenceNum':seqNumOffset+1}
  93. row.update(presetValues)
  94. for f in fields:
  95. try:
  96. row[f]=getVal(xmlRoot,alias[f],xPath)
  97. except KeyError:
  98. print(f'Alias for field {f} not found')
  99. continue
  100. print(row)
  101. db=importXLSX.getDB(pars)
  102. project=pars.get('project','DCIS/Study')
  103. schema=pars.get('schema','study')
  104. query=pars.get('query','demographics')
  105. selVal=['ParticipantId','SequenceNum']
  106. if keyColumn:
  107. selVal.append(keyColumn)
  108. qFilter=[{'variable':v,'value':'{}'.format(row[v]),'oper':'eq'} for v in selVal]
  109. ds=db.selectRows(project,schema,query,qFilter)
  110. if len(ds['rows'])>0:
  111. r=ds['rows'][0]
  112. r.update(row)
  113. print(f'Updating entry')
  114. if not dryRun:
  115. importXLSX.printErr(db.modifyRows('update',project,schema,query,[r]))
  116. return r
  117. print(f'Inserting entry')
  118. if not dryRun:
  119. importXLSX.printErr(db.modifyRows('insert',project,schema,query,[row]))
  120. def main(parameterFile):
  121. with open(parameterFile) as f:
  122. pars=json.load(f)
  123. print(pars)
  124. fhome=os.path.expanduser('~')
  125. with open(os.path.join(fhome,".labkey","setup.json")) as f:
  126. setup=json.load(f)
  127. importXLSX=getImporter(setup)
  128. #needs server
  129. db=importXLSX.getDB(pars)
  130. db.net.getCSRF()
  131. keyFilter={'variable':'Key','value':'{key}'.format(**pars),'oper':'eq'}
  132. #needs project
  133. ds=db.selectRows(pars['project'],'lists','importXML',[keyFilter])
  134. r=ds['rows'][0]
  135. xmlFile=os.path.join(fhome,'temp','DCIS','data.xml')
  136. url=db.net.connectionConfig['host']+r['_labkeyurl_fileUpload']
  137. fb=getFileBrowser(db)
  138. fb.readFileToFile(url,xmlFile)
  139. xmlRoot=getXMLRoot(xmlFile)
  140. #needs project
  141. setupRows=readSetup(importXLSX,pars)
  142. #setupRows=setupRows[17:18]
  143. for s in setupRows:
  144. #needs project and schema for queries
  145. importXML(importXLSX,pars|s,xmlRoot,dryRun=False)
  146. #make calling importXML updates
  147. r['status']=1
  148. r['ID']=getID(xmlRoot)
  149. del r['fileUpload']
  150. db.modifyRows('update',pars['project'],'lists','importXML',[r])
  151. if __name__ == "__main__" :
  152. main(sys.argv[1])