Living Code

Tags: XML DOM Example Code Tutorial

Sep 15, 2004 • 1 min read

Initializing a DOM in Python

There are many DOM options in Python, and I have trouble remembering how to load a document into the various DOMs. Here are a few common ones, although there are many variations on them (loading from URL or string, different configurations, etc.). This should provide a starting point.

# Examples for reading in various DOMs from an XML file

# MiniDOM

def parseMinidom(filename):

    try:
        from xml.dom.minidom import parse
        doc = parse(filename)
        return doc
    except Exception, e:
        return 'parseMinidom() failed with exception %s' % e

# 4DOM

def parse4Dom(filename):

    try:
        from xml.dom.ext.reader.Sax2 import Reader
        f = file(filename)
        reader = Reader(validate=0, keepAllWs=0, catName=None)
        doc = reader.fromStream(f) # slow!
        f.close()
        return doc
    except Exception, e:
        return 'parse4Dom() failed with exception %s' % e

# Domlette

def parseDomlette(filename):

    try:
        from Ft.Xml.Domlette import NonvalidatingReader as reader
        f = file(filename)
        uri = 'file:///%s' % filename # suppress warning
        doc = reader.parseStream(f, uri)
        f.close()
        return doc
    except Exception, e:
        return 'parseDomlette() failed with exception %s' % e

# libXml

def parseLibXml(filename):

    try:
        import libxml2
        f = file(filename)
        data = f.read()
        f.close()
        doc = libxml2.parseDoc(data)
        return doc
    except Exception, e:
        return 'parseLibXml() failed with exception %s' % e

# pxDom

def parsePxDom(filename):
    try:
        import pxdom
        doc = pxdom.parse(filename)
        return doc
    except Exception, e:
        return 'parsePxDom() failed with exception %s' % e

def main():
    import sys
    filename = sys.argv[1]
    print '4DOM:', parse4Dom(filename)
    print 'Domlette:', parseDomlette(filename)
    print 'MiniDom:', parseMinidom(filename)
    print 'LibXml:', parseLibXml(filename)
    print 'PxDom:', parsePxDom(filename)

if __name__ == '__main__': main()
Post by: Dethe Elza 💜