Source code for headerparser.scanner

from .errors import MalformedHeaderError, UnexpectedFoldingError
from .util   import ascii_splitlines

[docs]def scan_string(s): """ Scan a string for RFC 822-style header fields and return a generator of ``(name, value)`` pairs for each header field in the input, plus a ``(None, body)`` pair representing the body (if any) after the header section. See `scan_lines()` for more information on the exact behavior of the scanner. :param s: a string which will be broken into lines on CR, LF, and CR LF boundaries and passed to `scan_lines()` :rtype: generator of pairs of strings :raises MalformedHeaderError: if an invalid header line, i.e., a line without either a colon or leading whitespace, is encountered :raises UnexpectedFoldingError: if a folded (indented) line that is not preceded by a valid header line is encountered """ return scan_lines(ascii_splitlines(s))
[docs]def scan_file(fp): """ Scan a file for RFC 822-style header fields and return a generator of ``(name, value)`` pairs for each header field in the input, plus a ``(None, body)`` pair representing the body (if any) after the header section. See `scan_lines()` for more information on the exact behavior of the scanner. :param fp: A file-like object than can be iterated over to produce lines to pass to `scan_lines()`. Opening the file in universal newlines mode is recommended. :rtype: generator of pairs of strings :raises MalformedHeaderError: if an invalid header line, i.e., a line without either a colon or leading whitespace, is encountered :raises UnexpectedFoldingError: if a folded (indented) line that is not preceded by a valid header line is encountered """ ### TODO: Handle files not opened in universal newlines mode? return scan_lines(fp)
[docs]def scan_lines(iterable): """ Scan an iterable of lines for RFC 822-style header fields and return a generator of ``(name, value)`` pairs for each header field in the input, plus a ``(None, body)`` pair representing the body (if any) after the header section. Each field value is a single string, the concatenation of one or more lines, with leading whitespace on lines after the first preserved. The ending of each line is converted to ``'\\n'`` (added if there is no ending), and the last line of the field value has its trailing line ending (if any) removed. .. note:: "Line ending" here means a CR, LF, or CR LF sequence at the end of one of the lines in ``iterable``. Unicode line separators, along with line endings occurring in the middle of a line, are not treated as line endings and are not trimmed or converted to ``\\n``. All lines after the first blank line are concatenated & yielded as-is in a ``(None, body)`` pair. (Note that body lines which do not end with a line terminator will not have one appended.) If there is no empty line in ``iterable``, then no body pair is yielded. If the empty line is the last line in ``iterable``, the body will be the empty string. If the empty line is the *first* line in ``iterable``, then all other lines will be treated as part of the body and will not be scanned for header fields. :param iterable: an iterable of strings representing lines of input :rtype: generator of pairs of strings :raises MalformedHeaderError: if an invalid header line, i.e., a line without either a colon or leading whitespace, is encountered :raises UnexpectedFoldingError: if a folded (indented) line that is not preceded by a valid header line is encountered """ lineiter = iter(iterable) name = None value = '' eof = False for line in lineiter: line = line.rstrip('\r\n') if line.startswith((' ', '\t')): if name is not None: value += '\n' + line else: raise UnexpectedFoldingError(line) elif ':' in line: if name is not None: yield (name, value) name, _, value = line.partition(':') name = name.rstrip(' \t') value = value.lstrip(' \t') elif line == '': break else: raise MalformedHeaderError(line) else: eof = True if name is not None: yield (name, value) if not eof: yield (None, ''.join(lineiter))