Initializing a DOM in Python

There are many DOM options in Python, and I have trouble remembering how to load a document into the various DOMs. Here are a few common ones, although there are many variations on them (loading from URL or string, different configurations, etc.). This should provide a starting point.

# Examples for reading in various DOMs from an XML file
# MiniDOM
def parseMinidom(filename):
    try:
        from xml.dom.minidom import parse
        doc = parse(filename)
        return doc
    except Exception, e:
        return 'parseMinidom() failed with exception %s' % e
# 4DOM
def parse4Dom(filename):
    try:
        from xml.dom.ext.reader.Sax2 import Reader
        f = file(filename)
        reader = Reader(validate=0, keepAllWs=0, catName=None)
        doc = reader.fromStream(f) # slow!
        f.close()
        return doc
    except Exception, e:
        return 'parse4Dom() failed with exception %s' % e
# Domlette
def parseDomlette(filename):
    try:
        from Ft.Xml.Domlette import NonvalidatingReader as reader
        f = file(filename)
        uri = 'file:///%s' % filename # suppress warning
        doc = reader.parseStream(f, uri)
        f.close()
        return doc
    except Exception, e:
        return 'parseDomlette() failed with exception %s' % e
# libXml
def parseLibXml(filename):
    try:
        import libxml2
        f = file(filename)
        data = f.read()
        f.close()
        doc = libxml2.parseDoc(data)
        return doc
    except Exception, e:
        return 'parseLibXml() failed with exception %s' % e
# pxDom
def parsePxDom(filename):
    try:
        import pxdom
        doc = pxdom.parse(filename)
        return doc
    except Exception, e:
        return 'parsePxDom() failed with exception %s' % e
def main():
    import sys
    filename = sys.argv[1]
    print '4DOM:', parse4Dom(filename)
    print 'Domlette:', parseDomlette(filename)
    print 'MiniDom:', parseMinidom(filename)
    print 'LibXml:', parseLibXml(filename)
    print 'PxDom:', parsePxDom(filename)
if __name__ == '__main__': main()

Tags:

[] Posted on 2004-09-15 by Dethe Elza

Previous: Conspiracy Theory #3 Next: DOM back to XML in Python