Source code for formatparse.api

"""Core parse API wrappers."""

from __future__ import annotations

from typing import Any, Iterator, List, Optional, Sequence, Union

from ._native import (
    FormatParser,
    ParseResult,
    Results,
    _compile,
    _findall,
    _findall_iter,
    _parse,
    _parse_batch,
    _search,
)
from .exceptions import RepeatedNameError
from .types import ExtraTypes, ValidationMode, ValidatorMap
from .validation import ValidationPipeline, post_parse_validate


[docs] def compile(pattern: str, extra_types: Optional[ExtraTypes] = None) -> FormatParser: """Compile a pattern into a FormatParser for repeated use. Compiling a pattern allows you to reuse the same pattern multiple times without recompiling the regex, which improves performance for repeated parsing operations. Repeated ``compile`` calls with the same *pattern* and equivalent ``extra_types`` (same converter ``pattern`` and ``regex_group_count`` per name) share the same internal compiled-regex cache as :func:`parse`, :func:`search`, and :func:`findall`, so hot loops that call ``compile`` do not pay full pattern-to-regex compilation on every iteration (see `issue #29 <https://github.com/eddiethedean/formatparse/issues/29>`_). **Custom types:** keys are the *type names* used after ``:`` in fields (for example ``Number`` in ``{:Number}`` or ``{x:Number}``). Values are callables, usually from :func:`with_pattern`, which attach a ``pattern`` regex fragment and optional ``regex_group_count`` when the regex contains capturing parentheses. See the `Custom types guide <https://formatparse.readthedocs.io/en/latest/user_guides/custom_types.html>`_ for examples with :func:`search` / :func:`findall` and for ``regex_group_count``. The cache fingerprints each name's ``pattern`` and ``regex_group_count``. If you mutate those attributes on a live converter object, reuse the same ``extra_types`` dict, and the fingerprint stays unchanged, you can see a stale compiled parser until the process restarts—prefer a fresh dict or new function objects when changing patterns at runtime. :param pattern: Format specification pattern (e.g., ``"{name}: {age:d}"``) :type pattern: str :param extra_types: Optional mapping of custom type names to converters (see above) :type extra_types: dict, optional :returns: FormatParser object that can be used to parse strings :rtype: FormatParser :raises RepeatedNameError: If a repeated field name has mismatched types :raises PatternParseMismatch: For some malformed patterns (missing ``}`` after a field); subclass of :exc:`ValueError`. :func:`parse` returns ``None`` for the same pattern. :raises ValueError: For other invalid patterns or internal errors **Pickling:** A :class:`FormatParser` only round-trips the pattern string. If you compiled with ``extra_types``, unpickling yields a parser **without** those converters; call :func:`compile` again with the same ``extra_types`` if you need them after ``pickle.loads``. Example:: >>> parser = compile("{name}: {age:d}") >>> result = parser.parse("Alice: 30") >>> result.named['name'] 'Alice' >>> result.named['age'] 30 >>> result2 = parser.parse("Bob: 25") >>> result2.named['name'] 'Bob' >>> result2.named['age'] 25 """ try: return _compile(pattern, extra_types) except ValueError as e: if "Repeated name" in str(e) and "mismatched types" in str(e): raise RepeatedNameError(str(e)) from e raise
[docs] def parse( pattern: str, string: str, extra_types: Optional[ExtraTypes] = None, case_sensitive: bool = False, evaluate_result: bool = True, *, validators: Optional[ValidatorMap] = None, pipeline: Optional[ValidationPipeline] = None, validation_mode: ValidationMode = "strict", ) -> Optional[ParseResult]: """Parse a string using a format specification. This function parses a string according to a format pattern and extracts named or positional fields from it. The pattern syntax is based on Python's format() function syntax. :param pattern: Format specification pattern (e.g., ``"{name}: {age:d}"``) :type pattern: str :param string: String to parse :type string: str :param extra_types: Optional mapping of custom type names (after ``:`` in the field) to callables, typically from :func:`with_pattern`. Uses the same compiled-parser cache as :func:`compile` (pattern plus per-name ``pattern`` / ``regex_group_count``). See the `Custom types guide <https://formatparse.readthedocs.io/en/latest/user_guides/custom_types.html>`_. :type extra_types: dict, optional :param case_sensitive: Whether matching should be case sensitive (default: False) :type case_sensitive: bool :param evaluate_result: Whether to evaluate and convert result types (default: True) :type evaluate_result: bool :param validators: Optional map of field key to validator; see :func:`apply_validators`. :param pipeline: Optional :class:`ValidationPipeline` (mutually exclusive with ``validators``). :param validation_mode: ``\"strict\"``, ``\"collect\"``, or ``\"lenient\"`` for validation. :returns: ParseResult object if match found, None otherwise :rtype: ParseResult or None :raises ValueError: If the pattern is invalid in a way that still raises from the native compiler (for example some unclosed nested format specs), or if both ``validators`` and ``pipeline`` are set. For a narrow class of malformed patterns (missing ``}`` after a field), this function returns ``None`` while :func:`compile` raises :exc:`PatternParseMismatch`, which is a :exc:`ValueError` subclass (same split as the original ``parse`` library). :raises NotImplementedError: For unsupported pattern features (for example quoted dict keys). :raises ValidationError: If validation fails in strict mode :raises MultipleValidationErrors: If ``validation_mode='collect'`` and any validator fails Example:: >>> result = parse("{name}: {age:d}", "Alice: 30") >>> result.named['name'] 'Alice' >>> result.named['age'] 30 >>> result = parse("{}, {}", "Hello, World") >>> result.fixed ('Hello', 'World') """ r = _parse(pattern, string, extra_types, case_sensitive, evaluate_result) return post_parse_validate( r, validators=validators, pipeline=pipeline, validation_mode=validation_mode, )
[docs] def parse_with_validation( parser: FormatParser, string: str, pipeline: ValidationPipeline, *, extra_types: Optional[ExtraTypes] = None, case_sensitive: bool = False, evaluate_result: bool = True, validation_mode: ValidationMode = "strict", ) -> Optional[ParseResult]: """Parse ``string`` with a compiled ``parser``, then run ``pipeline``. Equivalent to applying ``pipeline`` to the result of ``parser.parse(...)`` with the same ``case_sensitive``, ``extra_types``, and ``evaluate_result`` defaults as :func:`parse`. Use :func:`parse` or :meth:`ValidatedParser.parse` when you pass a ``validators`` map instead of a :class:`ValidationPipeline`. :param parser: Output of :func:`compile`. :param string: Text to parse. :param pipeline: Validation pipeline (required). :param validation_mode: Passed to :meth:`ValidationPipeline.apply`. :returns: Same as :meth:`FormatParser.parse` after validation, or ``None`` if parse failed. In ``lenient`` mode, validation failures emit :exc:`ValidationWarning` and do not raise. :raises ValidationError: In ``strict`` mode when validation fails. :raises MultipleValidationErrors: In ``collect`` mode when validation fails. """ r = parser.parse(string, case_sensitive, extra_types, evaluate_result) return pipeline.apply(r, mode=validation_mode)
[docs] class ValidatedParser: """Thin wrapper around :class:`FormatParser` with optional ``validators`` / ``pipeline`` on :meth:`parse`. Also provides :meth:`parse_with_validation` for the compile-once + pipeline case. Other attributes and methods are forwarded to the inner parser (e.g. ``search``, ``pattern``). Use when you compile once and want the same validation ergonomics as :func:`parse` keyword arguments. """ __slots__ = ("_parser",)
[docs] def __init__(self, parser: FormatParser) -> None: object.__setattr__(self, "_parser", parser)
def __getattr__(self, name: str) -> Any: return getattr(self._parser, name)
[docs] def parse( self, string: str, case_sensitive: bool = False, extra_types: Optional[ExtraTypes] = None, evaluate_result: bool = True, *, validators: Optional[ValidatorMap] = None, pipeline: Optional[ValidationPipeline] = None, validation_mode: ValidationMode = "strict", ) -> Optional[ParseResult]: """Parse ``string`` with optional ``validators`` or ``pipeline`` (same rules as :func:`parse`). :param validation_mode: ``\"strict\"``, ``\"collect\"``, or ``\"lenient\"`` (see :func:`parse`). """ r = self._parser.parse(string, case_sensitive, extra_types, evaluate_result) return post_parse_validate( r, validators=validators, pipeline=pipeline, validation_mode=validation_mode, )
[docs] def parse_with_validation( self, string: str, pipeline: ValidationPipeline, *, extra_types: Optional[ExtraTypes] = None, case_sensitive: bool = False, evaluate_result: bool = True, validation_mode: ValidationMode = "strict", ) -> Optional[ParseResult]: """Parse ``string`` with the inner parser, then ``pipeline`` (see :func:`parse_with_validation`).""" return parse_with_validation( self._parser, string, pipeline, extra_types=extra_types, case_sensitive=case_sensitive, evaluate_result=evaluate_result, validation_mode=validation_mode, )
[docs] def parse_batch( pattern: str, strings: Sequence[str], extra_types: Optional[ExtraTypes] = None, case_sensitive: bool = False, evaluate_result: bool = True, ) -> List[Optional[ParseResult]]: """Parse many strings with the same pattern (compile once, sequential apply). This is intended for workloads that apply one pattern to many strings: the compiled regex is resolved once (same LRU cache as :func:`parse` / :func:`compile`) and each string is parsed in order. Non-matches appear as ``None`` at the corresponding index. ``strings`` is copied to a list of ``str`` on the Rust side (pass a ``list`` or ``tuple`` of strings; a bare ``str`` is treated as an iterable of characters, which is usually not what you want). :param pattern: Format specification pattern :param strings: Sequence of strings to parse (e.g. list or tuple) :param extra_types: Same as :func:`parse` :param case_sensitive: Same as :func:`parse` :param evaluate_result: Same as :func:`parse` :returns: List of :class:`ParseResult` or ``None`` per input string :raises ValueError: Same pattern-compile rules as :func:`parse`; if the pattern is in the narrow class where :func:`parse` returns ``None``, this function returns a list of ``None`` with one entry per input string. Example:: >>> out = parse_batch("{:d}", ["1", "2", "x"]) >>> out[0].fixed[0] 1 >>> out[2] is None True """ return _parse_batch( pattern, list(strings), extra_types, case_sensitive, evaluate_result )
[docs] def findall( pattern: str, string: str, extra_types: Optional[ExtraTypes] = None, case_sensitive: bool = False, evaluate_result: bool = True, max_matches: Optional[int] = None, ) -> Union[Results, List[Any]]: """Find all matches of a pattern in a string. Searches for all non-overlapping occurrences of the pattern in the string. Returns a list-like :class:`Results` when the fast Rust path applies (no ``extra_types``, ``evaluate_result`` is True, and no nested dict field names). Otherwise returns a plain Python ``list`` of :class:`ParseResult` or :class:`Match` objects (same values as the original ``parse`` library). :param pattern: Format specification pattern :type pattern: str :param string: String to search :type string: str :param extra_types: Same semantics as :func:`parse`. When provided, the Rust fast path that returns :class:`Results` is disabled and a Python ``list`` is built instead (see returns below). See the `Custom types guide <https://formatparse.readthedocs.io/en/latest/user_guides/custom_types.html>`_. :type extra_types: dict, optional :param case_sensitive: Whether matching should be case sensitive (default: False) :type case_sensitive: bool :param evaluate_result: Whether to evaluate and convert result types (default: True) :type evaluate_result: bool :param max_matches: Stop after this many matches (default: no limit). Useful for untrusted input; see the Security guide in the project docs (``docs/security.rst``). :type max_matches: int, optional :returns: ``Results`` (preferred) or ``list`` of matches, depending on options :rtype: Results | list Example:: >>> results = findall("ID:{id:d}", "ID:1 ID:2 ID:3") >>> len(results) 3 >>> results[0].named['id'] 1 >>> results[1].named['id'] 2 >>> results[2].named['id'] 3 >>> for result in results: ... print(result.named['id']) 1 2 3 """ return _findall( pattern, string, extra_types, case_sensitive, evaluate_result, max_matches, )
[docs] def findall_iter( pattern: str, string: str, extra_types: Optional[ExtraTypes] = None, case_sensitive: bool = False, evaluate_result: bool = True, max_matches: Optional[int] = None, ) -> Iterator[Any]: """Yield non-overlapping matches for ``pattern`` in ``string``, one at a time. Semantics match :func:`findall` (same ``extra_types``, ``case_sensitive``, and ``evaluate_result``), but each step converts at most one match. This lowers peak memory when you stream results instead of building a full :class:`Results` or list. This is a **partial** answer to `issue #13 <https://github.com/eddiethedean/formatparse/issues/13>`_: it does **not** implement arbitrary chunked file reads with backtracking across chunk boundaries. For logs, a common pattern is line-sized strings (matches must not span lines):: parser = compile("ID:{id:d}") with open("log.txt") as f: for line in f: for m in parser.findall_iter(line.strip()): process(m.named["id"]) :param max_matches: Same as :func:`findall` (default: no limit). :type max_matches: int, optional :returns: Iterator of :class:`ParseResult` or :class:`Match` (same as ``findall``) """ return _findall_iter( pattern, string, extra_types, case_sensitive, evaluate_result, max_matches, )