Source code for zope.testing.formparser

"""HTML parser that extracts form information.

This is intended to support functional tests that need to extract
information from HTML forms returned by the publisher.

See :doc:`../formparser` for documentation.

This isn't intended to simulate a browser session; that's provided by
the `zope.testbrowser` package.

.. versionchanged:: 4.10.0
   Add support for Python 3.

"""
__docformat__ = "reStructuredText"


import html.parser as HTMLParser
import urllib.parse as urlparse


[docs] def parse(data, base=None): """Return a form collection parsed from *data*. *base* should be the URL from which *data* was retrieved. """ parser = FormParser(data, base) return parser.parse()
[docs] class FormParser: """ The parser. """ def __init__(self, data, base=None): self.data = data self.base = base self._parser = HTMLParser.HTMLParser() self._parser.handle_data = self._handle_data self._parser.handle_endtag = self._handle_endtag self._parser.handle_starttag = self._handle_starttag self._parser.handle_startendtag = self._handle_starttag self._buffer = [] self.current = None # current form self.forms = FormCollection()
[docs] def parse(self): """Parse the document, returning the collection of forms.""" self._parser.feed(self.data) self._parser.close() return self.forms
# HTMLParser handlers def _handle_data(self, data): self._buffer.append(data) def _handle_endtag(self, tag): if tag == "textarea": self.textarea.value = "".join(self._buffer) self.textarea = None elif tag == "select": self.select = None elif tag == "option": option = self.select.options[-1] label = "".join(self._buffer) if not option.label: option.label = label if not option.value: option.value = label if option.selected: if self.select.multiple: self.select.value.append(option.value) else: self.select.value = option.value def _handle_starttag(self, tag, attrs): del self._buffer[:] d = {} for name, value in attrs: d[name] = value name = d.get("name") id = d.get("id") or d.get("xml:id") if tag == "form": method = kwattr(d, "method", "get") action = d.get("action", "").strip() or None if self.base and action: action = urlparse.urljoin(self.base, action) enctype = kwattr(d, "enctype", "application/x-www-form-urlencoded") self.current = Form(name, id, method, action, enctype) self.forms.append(self.current) elif tag == "input": type = kwattr(d, "type", "text") checked = "checked" in d disabled = "disabled" in d readonly = "readonly" in d src = d.get("src", "").strip() or None if self.base and src: src = urlparse.urljoin(self.base, src) value = d.get("value") size = intattr(d, "size") maxlength = intattr(d, "maxlength") self._add_field( Input(name, id, type, value, checked, disabled, readonly, src, size, maxlength)) elif tag == "button": pass elif tag == "textarea": disabled = "disabled" in d readonly = "readonly" in d self.textarea = Input(name, id, "textarea", None, None, disabled, readonly, None, None, None) self.textarea.rows = intattr(d, "rows") self.textarea.cols = intattr(d, "cols") self._add_field(self.textarea) # The value will be set when the </textarea> is seen. elif tag == "base": href = d.get("href", "").strip() if href and self.base: href = urlparse.urljoin(self.base, href) self.base = href elif tag == "select": disabled = "disabled" in d multiple = "multiple" in d size = intattr(d, "size") self.select = Select(name, id, disabled, multiple, size) self._add_field(self.select) elif tag == "option": disabled = "disabled" in d selected = "selected" in d value = d.get("value") label = d.get("label") option = Option(id, value, selected, label, disabled) self.select.options.append(option) # Helpers: def _add_field(self, field): if field.name in self.current: ob = self.current[field.name] if isinstance(ob, list): ob.append(field) else: self.current[field.name] = [ob, field] else: self.current[field.name] = field
[docs] def kwattr(d, name, default=None): """Return attribute, converted to lowercase.""" v = d.get(name, default) if v != default and v is not None: v = v.strip().lower() v = v or default return v
[docs] def intattr(d, name): """Return attribute as an integer, or None.""" if name in d: v = d[name].strip() return int(v) else: return None
[docs] class FormCollection(list): """Collection of all forms from a page.""" def __getattr__(self, name): for form in self: if form.name == name: return form raise AttributeError(name)
[docs] class Form(dict): """A specific form within a page.""" # This object should provide some method to prepare a dictionary # that can be passed directly as the value of the `form` argument # to the `http()` function of the Zope functional test. # # This is probably a low priority given the availability of the # `zope.testbrowser` package. def __init__(self, name, id, method, action, enctype): super().__init__() self.name = name self.id = id self.method = method self.action = action self.enctype = enctype
[docs] class Input: """Input element.""" rows = None cols = None def __init__(self, name, id, type, value, checked, disabled, readonly, src, size, maxlength): super().__init__() self.name = name self.id = id self.type = type self.value = value self.checked = checked self.disabled = disabled self.readonly = readonly self.src = src self.size = size self.maxlength = maxlength
[docs] class Select(Input): """Select element.""" def __init__(self, name, id, disabled, multiple, size): super().__init__(name, id, "select", None, None, disabled, None, None, size, None) self.options = [] self.multiple = multiple if multiple: self.value = []
[docs] class Option: """Individual value representation for a select element.""" def __init__(self, id, value, selected, label, disabled): super().__init__() self.id = id self.value = value self.selected = selected self.label = label self.disabled = disabled