Viewing file: test_pulldom.py (12.66 KB) -rw-r--r-- Select action/file-type: (+) | (+) | (+) | Code (+) | Session (+) | (+) | SDB (+) | (+) | (+) | (+) | (+) | (+) |
import io import unittest import xml.sax
from xml.sax.xmlreader import AttributesImpl from xml.sax.handler import feature_external_ges from xml.dom import pulldom
from test.support import findfile
tstfile = findfile("test.xml", subdir="xmltestdata")
# A handy XML snippet, containing attributes, a namespace prefix, and a # self-closing tag: SMALL_SAMPLE = """<?xml version="1.0"?> <html xmlns="http://www.w3.org/1999/xhtml" xmlns:xdc="http://www.xml.com/books"> <!-- A comment --> <title>Introduction to XSL</title> <hr/> <p><xdc:author xdc:attrib="prefixed attribute" attrib="other attrib">A. Namespace</xdc:author></p> </html>"""
class PullDOMTestCase(unittest.TestCase):
def test_parse(self): """Minimal test of DOMEventStream.parse()"""
# This just tests that parsing from a stream works. Actual parser # semantics are tested using parseString with a more focused XML # fragment.
# Test with a filename: handler = pulldom.parse(tstfile) self.addCleanup(handler.stream.close) list(handler)
# Test with a file object: with open(tstfile, "rb") as fin: list(pulldom.parse(fin))
def test_parse_semantics(self): """Test DOMEventStream parsing semantics."""
items = pulldom.parseString(SMALL_SAMPLE) evt, node = next(items) # Just check the node is a Document: self.assertTrue(hasattr(node, "createElement")) self.assertEqual(pulldom.START_DOCUMENT, evt) evt, node = next(items) self.assertEqual(pulldom.START_ELEMENT, evt) self.assertEqual("html", node.tagName) self.assertEqual(2, len(node.attributes)) self.assertEqual(node.attributes.getNamedItem("xmlns:xdc").value, "http://www.xml.com/books") evt, node = next(items) self.assertEqual(pulldom.CHARACTERS, evt) # Line break evt, node = next(items) # XXX - A comment should be reported here! # self.assertEqual(pulldom.COMMENT, evt) # Line break after swallowed comment: self.assertEqual(pulldom.CHARACTERS, evt) evt, node = next(items) self.assertEqual("title", node.tagName) title_node = node evt, node = next(items) self.assertEqual(pulldom.CHARACTERS, evt) self.assertEqual("Introduction to XSL", node.data) evt, node = next(items) self.assertEqual(pulldom.END_ELEMENT, evt) self.assertEqual("title", node.tagName) self.assertTrue(title_node is node) evt, node = next(items) self.assertEqual(pulldom.CHARACTERS, evt) evt, node = next(items) self.assertEqual(pulldom.START_ELEMENT, evt) self.assertEqual("hr", node.tagName) evt, node = next(items) self.assertEqual(pulldom.END_ELEMENT, evt) self.assertEqual("hr", node.tagName) evt, node = next(items) self.assertEqual(pulldom.CHARACTERS, evt) evt, node = next(items) self.assertEqual(pulldom.START_ELEMENT, evt) self.assertEqual("p", node.tagName) evt, node = next(items) self.assertEqual(pulldom.START_ELEMENT, evt) self.assertEqual("xdc:author", node.tagName) evt, node = next(items) self.assertEqual(pulldom.CHARACTERS, evt) evt, node = next(items) self.assertEqual(pulldom.END_ELEMENT, evt) self.assertEqual("xdc:author", node.tagName) evt, node = next(items) self.assertEqual(pulldom.END_ELEMENT, evt) evt, node = next(items) self.assertEqual(pulldom.CHARACTERS, evt) evt, node = next(items) self.assertEqual(pulldom.END_ELEMENT, evt) # XXX No END_DOCUMENT item is ever obtained: #evt, node = next(items) #self.assertEqual(pulldom.END_DOCUMENT, evt)
def test_expandItem(self): """Ensure expandItem works as expected.""" items = pulldom.parseString(SMALL_SAMPLE) # Loop through the nodes until we get to a "title" start tag: for evt, item in items: if evt == pulldom.START_ELEMENT and item.tagName == "title": items.expandNode(item) self.assertEqual(1, len(item.childNodes)) break else: self.fail("No \"title\" element detected in SMALL_SAMPLE!") # Loop until we get to the next start-element: for evt, node in items: if evt == pulldom.START_ELEMENT: break self.assertEqual("hr", node.tagName, "expandNode did not leave DOMEventStream in the correct state.") # Attempt to expand a standalone element: items.expandNode(node) self.assertEqual(next(items)[0], pulldom.CHARACTERS) evt, node = next(items) self.assertEqual(node.tagName, "p") items.expandNode(node) next(items) # Skip character data evt, node = next(items) self.assertEqual(node.tagName, "html") with self.assertRaises(StopIteration): next(items) items.clear() self.assertIsNone(items.parser) self.assertIsNone(items.stream)
@unittest.expectedFailure def test_comment(self): """PullDOM does not receive "comment" events.""" items = pulldom.parseString(SMALL_SAMPLE) for evt, _ in items: if evt == pulldom.COMMENT: break else: self.fail("No comment was encountered")
@unittest.expectedFailure def test_end_document(self): """PullDOM does not receive "end-document" events.""" items = pulldom.parseString(SMALL_SAMPLE) # Read all of the nodes up to and including </html>: for evt, node in items: if evt == pulldom.END_ELEMENT and node.tagName == "html": break try: # Assert that the next node is END_DOCUMENT: evt, node = next(items) self.assertEqual(pulldom.END_DOCUMENT, evt) except StopIteration: self.fail( "Ran out of events, but should have received END_DOCUMENT")
def test_getitem_deprecation(self): parser = pulldom.parseString(SMALL_SAMPLE) with self.assertWarnsRegex(DeprecationWarning, r'Use iterator protocol instead'): # This should have returned 'END_ELEMENT'. self.assertEqual(parser[-1][0], pulldom.START_DOCUMENT)
def test_external_ges_default(self): parser = pulldom.parseString(SMALL_SAMPLE) saxparser = parser.parser ges = saxparser.getFeature(feature_external_ges) self.assertEqual(ges, False)
class ThoroughTestCase(unittest.TestCase): """Test the hard-to-reach parts of pulldom."""
def test_thorough_parse(self): """Test some of the hard-to-reach parts of PullDOM.""" self._test_thorough(pulldom.parse(None, parser=SAXExerciser()))
@unittest.expectedFailure def test_sax2dom_fail(self): """SAX2DOM can"t handle a PI before the root element.""" pd = SAX2DOMTestHelper(None, SAXExerciser(), 12) self._test_thorough(pd)
def test_thorough_sax2dom(self): """Test some of the hard-to-reach parts of SAX2DOM.""" pd = SAX2DOMTestHelper(None, SAX2DOMExerciser(), 12) self._test_thorough(pd, False)
def _test_thorough(self, pd, before_root=True): """Test some of the hard-to-reach parts of the parser, using a mock parser."""
evt, node = next(pd) self.assertEqual(pulldom.START_DOCUMENT, evt) # Just check the node is a Document: self.assertTrue(hasattr(node, "createElement"))
if before_root: evt, node = next(pd) self.assertEqual(pulldom.COMMENT, evt) self.assertEqual("a comment", node.data) evt, node = next(pd) self.assertEqual(pulldom.PROCESSING_INSTRUCTION, evt) self.assertEqual("target", node.target) self.assertEqual("data", node.data)
evt, node = next(pd) self.assertEqual(pulldom.START_ELEMENT, evt) self.assertEqual("html", node.tagName)
evt, node = next(pd) self.assertEqual(pulldom.COMMENT, evt) self.assertEqual("a comment", node.data) evt, node = next(pd) self.assertEqual(pulldom.PROCESSING_INSTRUCTION, evt) self.assertEqual("target", node.target) self.assertEqual("data", node.data)
evt, node = next(pd) self.assertEqual(pulldom.START_ELEMENT, evt) self.assertEqual("p", node.tagName)
evt, node = next(pd) self.assertEqual(pulldom.CHARACTERS, evt) self.assertEqual("text", node.data) evt, node = next(pd) self.assertEqual(pulldom.END_ELEMENT, evt) self.assertEqual("p", node.tagName) evt, node = next(pd) self.assertEqual(pulldom.END_ELEMENT, evt) self.assertEqual("html", node.tagName) evt, node = next(pd) self.assertEqual(pulldom.END_DOCUMENT, evt)
class SAXExerciser(object): """A fake sax parser that calls some of the harder-to-reach sax methods to ensure it emits the correct events"""
def setContentHandler(self, handler): self._handler = handler
def parse(self, _): h = self._handler h.startDocument()
# The next two items ensure that items preceding the first # start_element are properly stored and emitted: h.comment("a comment") h.processingInstruction("target", "data")
h.startElement("html", AttributesImpl({}))
h.comment("a comment") h.processingInstruction("target", "data")
h.startElement("p", AttributesImpl({"class": "paraclass"})) h.characters("text") h.endElement("p") h.endElement("html") h.endDocument()
def stub(self, *args, **kwargs): """Stub method. Does nothing.""" pass setProperty = stub setFeature = stub
class SAX2DOMExerciser(SAXExerciser): """The same as SAXExerciser, but without the processing instruction and comment before the root element, because S2D can"t handle it"""
def parse(self, _): h = self._handler h.startDocument() h.startElement("html", AttributesImpl({})) h.comment("a comment") h.processingInstruction("target", "data") h.startElement("p", AttributesImpl({"class": "paraclass"})) h.characters("text") h.endElement("p") h.endElement("html") h.endDocument()
class SAX2DOMTestHelper(pulldom.DOMEventStream): """Allows us to drive SAX2DOM from a DOMEventStream."""
def reset(self): self.pulldom = pulldom.SAX2DOM() # This content handler relies on namespace support self.parser.setFeature(xml.sax.handler.feature_namespaces, 1) self.parser.setContentHandler(self.pulldom)
class SAX2DOMTestCase(unittest.TestCase):
def confirm(self, test, testname="Test"): self.assertTrue(test, testname)
def test_basic(self): """Ensure SAX2DOM can parse from a stream.""" with io.StringIO(SMALL_SAMPLE) as fin: sd = SAX2DOMTestHelper(fin, xml.sax.make_parser(), len(SMALL_SAMPLE)) for evt, node in sd: if evt == pulldom.START_ELEMENT and node.tagName == "html": break # Because the buffer is the same length as the XML, all the # nodes should have been parsed and added: self.assertGreater(len(node.childNodes), 0)
def testSAX2DOM(self): """Ensure SAX2DOM expands nodes as expected.""" sax2dom = pulldom.SAX2DOM() sax2dom.startDocument() sax2dom.startElement("doc", {}) sax2dom.characters("text") sax2dom.startElement("subelm", {}) sax2dom.characters("text") sax2dom.endElement("subelm") sax2dom.characters("text") sax2dom.endElement("doc") sax2dom.endDocument()
doc = sax2dom.document root = doc.documentElement (text1, elm1, text2) = root.childNodes text3 = elm1.childNodes[0]
self.assertIsNone(text1.previousSibling) self.assertIs(text1.nextSibling, elm1) self.assertIs(elm1.previousSibling, text1) self.assertIs(elm1.nextSibling, text2) self.assertIs(text2.previousSibling, elm1) self.assertIsNone(text2.nextSibling) self.assertIsNone(text3.previousSibling) self.assertIsNone(text3.nextSibling)
self.assertIs(root.parentNode, doc) self.assertIs(text1.parentNode, root) self.assertIs(elm1.parentNode, root) self.assertIs(text2.parentNode, root) self.assertIs(text3.parentNode, elm1) doc.unlink()
if __name__ == "__main__": unittest.main()
|