Tutorial: XML Processing with Python


Extensible Markup Language (XML) is a markup language similar to HTML. It is useful to keep track of data without using database. XML files are very popular as configuration files.

Now what if we want to process or edit XML Configuration files using Python?

Consider that we want to perform following operations in XML Files with script rather than doing it manually:

To find a node based on any attribute in any hierarchy and even with the index
To Comment Node(s)
To Delete Node(s)
To Update Node(s)
To Insert Node(s)
To add attribute to Node(s)

Tutorial: XML Processing with Python

We have provided here a simple Python program with multiple functions to perform above jobs. It will use one property file to make changes in the XML file.

================================================
Following is a sample XML file in which we want to make changes.

<?xml version=”1.0″ encoding=”utf-8″?>
<!DOCTYPE hibernate-configuration PUBLIC
“-//Hibernate/Hibernate Configuration DTD 3.0//EN”
“http://www.hibernate.org/dtd/hibernate-configuration-3.0.dtd”>

<hibernate-configuration>
<session-factory>
<property name=”hibernate.connection.driver_class”>com.mysql.jdbc.Driver</property>
<property name=”hibernate.connection.url”>jdbc:mysql://localhost:3306/discovertechno</property>
<property name=”hibernate.connection.username”>root</property>
<property name=”hibernate.connection.password”>password</property>
<property name=”hibernate.dialect”>org.hibernate.dialect.MySQLDialect</property>
<property name=”show_sql”>true</property>
<property name=”format_sql”>true</property>
<mapping resource=”com/discovertechno/stock/Stock.hbm.xml” />
<mapping resource=”com/discovertechno/stock/StockDailyRecord.hbm.xml” />
</session-factory>

</hibernate-configuration>

================================================

Property File Structure:

#xmlLocation=standalone.xml
xmlLocation=hibernate.xml
#update=interfaces@index=0&interface@name=public&inet-address@index=0$@value=0.0.0.3#interfaces@index=0&interface@name=management&inet-address@index=0$@value=0.0.0.4
update=session-factory@index=0&property@name=hibernate.connection.username$@value=root#session-factory@index=0&property@name=hibernate.connection.password$@value=root@123
comment=session-factory@index=0&property@name=hibernate.connection.username#session-factory@index=0&property@name=hibernate.connection.password
insert=session-factory@index=0*<property name=”hibernate.connection.username” value=”root”>root</property>#session-factory@index=0*<property name=”hibernate.connection.password” value=”root@123″>root@123</property>

===============================================

#!/usr/local/bin/python2.7
from xml.dom import minidom
from xml.dom.minidom import parseString
from xml.parsers.expat import ExpatError

#Save XML File
def saveXML(xmlDoc, fileName):
xmlFILE = open(fileName, “w”)
xmlDoc.writexml(xmlFILE)
xmlFILE.close()

#To find a node based on any attribute in any hierarchy and even with the index
def findNode(parentNode, nodePath):
print “—-“, parentNode
if parentNode != None:
tempList = nodePath.split(“@”)
elementName = tempList[0]
attributeInfo = tempList[1]
name, var = attributeInfo.partition(“=”)[::2]
childElements = parentNode.getElementsByTagName(elementName)

#To find element based on index
if name == ‘index’:
try:
element = childElements[int(var)]
return element
except IndexError:
print “Index Error: Verify Properties file for Valid XML element Hierarchy and its Index”
return
else:
#To find an element based on attribute from a multiple elments
for childElement in childElements:
try:
if childElement.attributes[name].value == var:
print “=======>”, childElement.attributes[“xmlns”].value
return childElement
except KeyError:
print “Attribute not found in XML Element”
#return None
else:
return None

#Recursive procedure to find a node considering a hierarchy
def getNode(parentNode, nodeList):
searchNode = nodeList[0]
xmlElement = findNode(parentNode, searchNode)
childList = nodeList[1:]
if len(childList) == 0:
return xmlElement
else:
xmlElement = getNode(xmlElement, childList)
if xmlElement != None:
return xmlElement
else:
return

def commentNodes(xmlDoc, commentSTR, fileName):
#comment=session-factory@index=0&property@name=hibernate.connection.username#session-factory@index=0&property@name=hibernate.connection.password
#If multiple elements need to be commented in specific hierarchy then split it with “#” delimeter
commentList  = commentSTR.split(“#”)
for comment in commentList:
#Find a node to comment it
ele = getNode(xmlDoc.documentElement, comment.split(“&”))
if ele != None:
#Get a parent node of node
parentNode = ele.parentNode
#Create a comment by using xml value of node and insert it before that element in a parent node
parentNode.insertBefore(xmlDoc.createComment(ele.toxml()), ele)
#Remove the main node as comment node of it is already created
parentNode.removeChild(ele)
# To un-comment a node
#node = minidom.parseString(comment.data).firstChild #find a comment node, extract its data and create a new node from it
#comment.parentNode.replaceChild(node, comment) //replace comment with a new node
#save file
saveXML(xmlDoc, fileName)
print “Comment Operation Successfully Completed.”
else:
print “Comment Operation Failed: Verify Properties file for Valid XML element Hierarchy, Index or Attribute Name”
return

def deleteNodes(xmlDoc, deleteSTR, fileName):
#delete=session-factory@index=0&property@name=hibernate.connection.username#session-factory@index=0&property@name=hibernate.connection.password
#If multiple elements need to be deleted in specific hierarchy then split it with “#” delimeter
deleteList  = deleteSTR.split(“#”)
print deleteList
for delete in deleteList:
#Find a node to comment it
ele = getNode(xmlDoc.documentElement, delete.split(“&”))
print “ele==”, ele
if ele != None:
#Get a parent node of node
parentNode = ele.parentNode
#Remove the main node as comment node of it is already created
parentNode.removeChild(ele)
#save file
saveXML(xmlDoc, fileName)
print “Delete Operation Successfully Completed.”
else:
print “Delete Operation Failed: Verify Properties file for Valid XML element Hierarchy, Index or Attribute Name”
return

def updateNodes(xmlDoc, updateSTR, fileName):
#update=session-factory@index=0&property@name=hibernate.connection.username$@value=root#session-factory@index=0&property@name=hibernate.connection.password$@value=root@123
#If multiple elements need to be modified in specific hierarchy then split it with “#” delimeter
updateList  = updateSTR.split(“#”)
for elementSTR in updateList:
#session-factory@index=0&property@name=hibernate.connection.username$@value=root
# “$” delimeter is used to separate Text or attribute value which needs to be replaced and path to that element
#Here, Element Path = session-factory@index=0&property@name=hibernate.connection.username while Value = @value=root
enode, val = elementSTR.partition(“$”)[::2]
val = val.rstrip()
#Find a XMl node from the Dom
ele = getNode(xmlDoc.documentElement, enode.strip().split(“&”))
#$@value=root if Value starts with ‘@’ then it means attribute value needs to be replaced else it is a text value of that xml element
if ele != None:
if ‘@’ == val[0]:
val = val[1:]
attributeName, attributeVal = val.partition(“=”)[::2]
ele.attributes[attributeName].value = attributeVal
else:
attributeName, attributeVal = val.partition(“=”)[::2]
ele.firstChild.replaceWholeText(attributeVal)
#save file
saveXML(xmlDoc, fileName)
print “Update Operation Successfully Completed.”
else:
print “Update Operation Failed: Verify Properties file for Valid XML element Hierarchy, Index or Attribute Name”
return

#Insert a node having multiple child nodes or any number of attributes; this function takes String as a input, convert it into xml element and append it to parent element
def insertNodes(xmlDoc, insertSTR, fileName):
#session-factory@index=0*<property name=”hibernate.connection.username” value=”root”>root</property>#session-factory@index=0*<property name=”hibernate.connection.password” value=”root@123″>root@123</property>

#If multiple elements need to be inserted in specific hierarchy then split it with “#” delimeter
insertList  = insertSTR.split(“#”)
#print insertList
for insert in insertList:
#session-factory@index=0*<property name=”hibernate.connection.username” value=”root”>root</property>
#”*” delimeter is used to divide path for parent element and String value for a complete node
#Here parent element path is = session-factory@index=0 and XML to insert is = <property name=”hibernate.connection.username” value=”root”>root</property>
enode, val = insert.partition(“*”)[::2]
#Find a Parent node from the XML based on the value session-factory@index=0
ele = getNode(xmlDoc.documentElement, enode.strip().split(“&”))
if ele != None:
#convert string into xml document, get the document element of it which will be a complete xml node of string separated by * delimenter
#this is an easy way else each node needs to be created with all attributes and hierarchy has to be maintained
tempElement = parseString(val).documentElement
#append the document element in parent node
ele.appendChild(tempElement)
#save file
saveXML(xmlDoc, fileName)
print “Insert Operation Successfully Completed.”
else:
print “Insert Operation Failed: Verify Properties file for Valid XML element Hierarchy, Index or Attribute Name”
return

def addAttribute(xmlDoc, attributeSTR, fileName):
#If multiple elements need to be inserted in specific hierarchy then split it with “#” delimeter
attributeList  = attributeSTR.split(“#”)
for attribute in attributeList:
enode, val = attribute.partition(“$”)[::2]
val = val.rstrip()
#Find a XMl node from the Dom
ele = getNode(xmlDoc.documentElement, enode.strip().split(“&”))
if ele != None:
attributeName, attributeVal = val.partition(“=”)[::2]
ele.setAttribute(attributeName, attributeVal)
#save file
saveXML(xmlDoc, fileName)
print “Add Attribute Operation Successfully Completed.”
else:
print “Add Attribute Operation Failed: Verify Properties file for Valid XML element Hierarchy, Index or Attribute Name”
return

#Convert Property file into Name Value Pair
try:
myvars = {}
with open(“xmlProp.properties”) as myfile:
for line in myfile:
name, var = line.partition(“=”)[::2]
myvars[name.strip()] = var.rstrip()
try:
#Parse XML Document
try:
xmlDoc = minidom.parse(myvars[“xmlLocation”])
mainElement = xmlDoc.documentElement

#To update Node value or attibutes in the XML
if “update” in myvars:
updateSTR = myvars[“update”]
updateNodes(xmlDoc, updateSTR, myvars[“xmlLocation”])
else:
print “No Update operation is specified for XML File”

#To Comment Nodes in the XML
if “comment” in myvars:
commentSTR = myvars[“comment”]
commentNodes(xmlDoc, commentSTR, myvars[“xmlLocation”])
else:
print “No Comment operation is specified for XML File”

#To insert Nodes in the XML
if “insert” in myvars:
insertSTR = myvars[“insert”]
insertNodes(xmlDoc, insertSTR, myvars[“xmlLocation”])
else:
print “No Insert operation is specified for XML File”

#To delete Nodes in the XML
if “delete” in myvars:
deleteSTR = myvars[“delete”]
deleteNodes(xmlDoc, deleteSTR, myvars[“xmlLocation”])
else:
print “No Delete operation is specified for XML File”

#To add attribute in an Element
if “addattribute” in myvars:
attributeSTR = myvars[“addattribute”]
addAttribute(xmlDoc, attributeSTR, myvars[“xmlLocation”])
else:
print “No Add Attribute operation is specified for XML File”
except ExpatError:
print “XML file is not Well-Formed. Please check the XML Structure.”
except IOError:
print “XMl file does not exist.”
except IOError:
print “Properties File does not exist.”

Leave a comment

Your email address will not be published. Required fields are marked *