Source code for headerparser.scanner

import re
from   warnings import warn
from   .errors  import MalformedHeaderError, UnexpectedFoldingError
from   .util    import ascii_splitlines

[docs]def scan_string(s, **kwargs): """ Scan a string for RFC 822-style header fields and return a generator of ``(name, value)`` pairs for each header field in the input, plus a ``(None, body)`` pair representing the body (if any) after the header section. See `scan()` for more information on the exact behavior of the scanner. :param s: a string which will be broken into lines on CR, LF, and CR LF boundaries and passed to `scan()` :param kwargs: :ref:`scanner options <scan_opts>` :rtype: generator of pairs of strings :raises ScannerError: if the header section is malformed """ return scan(ascii_splitlines(s), **kwargs)
[docs]def scan_file(fp, **kwargs): """ Scan a file for RFC 822-style header fields and return a generator of ``(name, value)`` pairs for each header field in the input, plus a ``(None, body)`` pair representing the body (if any) after the header section. See `scan()` for more information on the exact behavior of the scanner. .. deprecated:: 0.4.0 Use `scan()` instead. :param fp: A file-like object than can be iterated over to produce lines to pass to `scan()`. Opening the file in universal newlines mode is recommended. :param kwargs: :ref:`scanner options <scan_opts>` :rtype: generator of pairs of strings :raises ScannerError: if the header section is malformed """ warn('scan_file() is deprecated. Use scan() instead.', DeprecationWarning) return scan(fp, **kwargs)
[docs]def scan_lines(fp, **kwargs): """ Scan an iterable of lines for RFC 822-style header fields and return a generator of ``(name, value)`` pairs for each header field in the input, plus a ``(None, body)`` pair representing the body (if any) after the header section. See `scan()` for more information on the exact behavior of the scanner. .. deprecated:: 0.4.0 Use `scan()` instead. :param iterable: an iterable of strings representing lines of input :param kwargs: :ref:`scanner options <scan_opts>` :rtype: generator of pairs of strings :raises ScannerError: if the header section is malformed """ warn('scan_lines() is deprecated. Use scan() instead.', DeprecationWarning) return scan(fp, **kwargs)
[docs]def scan(iterable, **kwargs): """ .. versionadded:: 0.4.0 Scan a text-file-like object or iterable of lines for RFC 822-style header fields and return a generator of ``(name, value)`` pairs for each header field in the input, plus a ``(None, body)`` pair representing the body (if any) after the header section. All lines after the first blank line are concatenated & yielded as-is in a ``(None, body)`` pair. (Note that body lines which do not end with a line terminator will not have one appended.) If there is no empty line in ``iterable``, then no body pair is yielded. If the empty line is the last line in ``iterable``, the body will be the empty string. If the empty line is the *first* line in ``iterable`` and the ``skip_leading_newlines`` option is false (the default), then all other lines will be treated as part of the body and will not be scanned for header fields. :param iterable: a text-file-like object or iterable of strings representing lines of input :param kwargs: :ref:`scanner options <scan_opts>` :rtype: generator of pairs of strings :raises ScannerError: if the header section is malformed """ lineiter = iter(iterable) for name, value in _scan_next_stanza(lineiter, **kwargs): if name is not None: yield (name, value) elif value: yield (None, ''.join(lineiter))
[docs]def scan_next_stanza(iterator, **kwargs): """ .. versionadded:: 0.4.0 Scan a text-file-like object or iterator of lines for RFC 822-style header fields and return a generator of ``(name, value)`` pairs for each header field in the input. Input processing stops as soon as a blank line is encountered, leaving the rest of the iterator unconsumed (If ``skip_leading_newlines`` is true, the function only stops on a blank line after a non-blank line). :param iterator: a text-file-like object or iterator of strings representing lines of input :param kwargs: :ref:`scanner options <scan_opts>` :rtype: generator of pairs of strings :raises ScannerError: if the header section is malformed """ for name, value in _scan_next_stanza(iterator, **kwargs): if name is not None: yield (name, value)
def _scan_next_stanza( iterator, separator_regex = re.compile(r'[ \t]*:[ \t]*'), # noqa: B008 skip_leading_newlines = False, ): """ .. versionadded:: 0.4.0 Like `scan_next_stanza()`, except it additionally yields as its last item a ``(None, flag)`` pair where ``flag`` is `True` iff the stanza was terminated by a blank line (thereby suggesting there is more input left to process), `False` iff the stanza was terminated by EOF. This is the core function that all other scanners ultimately call. """ name = None value = '' begun = False more_left = False if not hasattr(separator_regex, 'match'): separator_regex = re.compile(separator_regex) for line in iterator: line = line.rstrip('\r\n') if line.startswith((' ', '\t')): begun = True if name is not None: value += '\n' + line else: raise UnexpectedFoldingError(line) else: m = separator_regex.search(line) if m: begun = True if name is not None: yield (name, value) name = line[:m.start()] value = line[m.end():] elif line == '': if skip_leading_newlines and not begun: continue else: more_left = True break else: raise MalformedHeaderError(line) if name is not None: yield (name, value) yield (None, more_left)
[docs]def scan_next_stanza_string(s, **kwargs): """ .. versionadded:: 0.4.0 Scan a string for RFC 822-style header fields and return a pair ``(fields, extra)`` where ``fields`` is a list of ``(name, value)`` pairs for each header field in the input up to the first blank line and ``extra`` is everything after the first blank line (If ``skip_leading_newlines`` is true, the dividing point is instead the first blank line after a non-blank line); if there is no appropriate blank line in the input, ``extra`` is the empty string. :param s: a string to scan :param kwargs: :ref:`scanner options <scan_opts>` :rtype: pair of a list of pairs of strings and a string :raises ScannerError: if the header section is malformed """ lineiter = iter(ascii_splitlines(s)) fields = list(scan_next_stanza(lineiter, **kwargs)) body = ''.join(lineiter) return (fields, body)
[docs]def scan_stanzas(iterable, **kwargs): """ .. versionadded:: 0.4.0 Scan a text-file-like object or iterable of lines for zero or more stanzas of RFC 822-style header fields and return a generator of lists of ``(name, value)`` pairs, where each list represents a stanza of header fields in the input. The stanzas are terminated by blank lines. Consecutive blank lines between stanzas are treated as a single blank line. Blank lines at the end of the input are discarded without creating a new stanza. :param iterable: a text-file-like object or iterable of strings representing lines of input :param kwargs: :ref:`scanner options <scan_opts>` :rtype: generator of lists of pairs of strings :raises ScannerError: if the header section is malformed """ lineiter = iter(iterable) while True: fields = list(_scan_next_stanza(lineiter, **kwargs)) more_left = fields.pop()[1] if fields or more_left: yield fields else: break kwargs["skip_leading_newlines"] = True
[docs]def scan_stanzas_string(s, **kwargs): """ .. versionadded:: 0.4.0 Scan a string for zero or more stanzas of RFC 822-style header fields and return a generator of lists of ``(name, value)`` pairs, where each list represents a stanza of header fields in the input. The stanzas are terminated by blank lines. Consecutive blank lines between stanzas are treated as a single blank line. Blank lines at the end of the input are discarded without creating a new stanza. :param s: a string which will be broken into lines on CR, LF, and CR LF boundaries and passed to `scan_stanzas()` :param kwargs: :ref:`scanner options <scan_opts>` :rtype: generator of lists of pairs of strings :raises ScannerError: if the header section is malformed """ return scan_stanzas(ascii_splitlines(s), **kwargs)