DOM back to XML in Python

OK, time to crank up to speed, it’s been a lot longer than I intended between posts.

In the last episode we learned how to initialize many of the DOMs and DOM-like tools for Python from an XML document. Today we’re going to see how to convert these back to XML from the DOM. So fasten your seatbelts and let’s go.

import domParse
from xml.dom.ext.c14n import Canonicalize
def stringMinidom(filename):
    return Canonicalize(domParse.parseMinidom(filename))

def string4Dom(filename):
    return Canonicalize(domParse.parse4Dom(filename))

def stringDomlette(filename):
    return Canonicalize(domParse.parseDomlette(filename))

def stringLibXml(filename):
    # pretty-printed, which may not be what you want,
    # depending on the XML in question
    return domParse.parseLibXml(filename).serialize(encoding='utf-8', format=True)
    # 4DOM c14n breaks because libXML doesn't give you a DOM
    # return Canonicalize(domParse.parseLibXml(filename))

def stringPxDom(filename):
    import pxdom
    serializer = pxdom.LSSerializer()
    return serializer.writeToString(domParse.parsePxDom(filename))
    #return Canonicalize(domParse.parsePxDom(filename))

def main(filename):
    print '4DOM:', string4Dom(filename)
    print 'Domlette:', stringDomlette(filename)
    print 'MiniDom:', stringMinidom(filename)
    print 'LibXml:', stringLibXml(filename)
    print 'PxDom:', stringPxDom(filename)
if __name__ == '__main__': main(domParse.small_filename)

As you can see, there’s not much to it. This codes does require that you’ve installed the PyXML package, but if you’re serious about XML in Python, that will already be the case. In our next outing we can explore some of the less DOM-like, but more Pythonic ways to play with XML.



libxml: (instructions for the python bindings are linked from this page)


You may now return your trays to their upright positions.

Initializing a DOM in Python

There are many DOM options in Python, and I have trouble remembering how to load a document into the various DOMs. Here are a few common ones, although there are many variations on them (loading from URL or string, different configurations, etc.). This should provide a starting point.

# Examples for reading in various DOMs from an XML file
# MiniDOM
def parseMinidom(filename):
        from xml.dom.minidom import parse
        doc = parse(filename)
        return doc
    except Exception, e:
        return 'parseMinidom() failed with exception %s' % e
# 4DOM
def parse4Dom(filename):
        from xml.dom.ext.reader.Sax2 import Reader
        f = file(filename)
        reader = Reader(validate=0, keepAllWs=0, catName=None)
        doc = reader.fromStream(f) # slow!
        return doc
    except Exception, e:
        return 'parse4Dom() failed with exception %s' % e
# Domlette
def parseDomlette(filename):
        from Ft.Xml.Domlette import NonvalidatingReader as reader
        f = file(filename)
        uri = 'file:///%s' % filename # suppress warning
        doc = reader.parseStream(f, uri)
        return doc
    except Exception, e:
        return 'parseDomlette() failed with exception %s' % e
# libXml
def parseLibXml(filename):
        import libxml2
        f = file(filename)
        data =
        doc = libxml2.parseDoc(data)
        return doc
    except Exception, e:
        return 'parseLibXml() failed with exception %s' % e
# pxDom
def parsePxDom(filename):
        import pxdom
        doc = pxdom.parse(filename)
        return doc
    except Exception, e:
        return 'parsePxDom() failed with exception %s' % e
def main():
    import sys
    filename = sys.argv[1]
    print '4DOM:', parse4Dom(filename)
    print 'Domlette:', parseDomlette(filename)
    print 'MiniDom:', parseMinidom(filename)
    print 'LibXml:', parseLibXml(filename)
    print 'PxDom:', parsePxDom(filename)
if __name__ == '__main__': main()