Source code for file_re.core

import re
import warnings
from pathlib import Path
from typing import Iterator, List, Optional, Tuple, Union

from ._file_re import _findall, _finditer, _match, _search
from .match import Match
from .pattern import (
    Pattern,
    _MatchIterator,
    _PathLike,
    _post_process_findall,
    _to_str_path,
    _validate_span_lines,
    _wrap_match,
)

_ASCII_WARNED = False


def _check_flags(flags: int) -> None:
    global _ASCII_WARNED
    if flags & re.DEBUG:
        warnings.warn(
            "re.DEBUG is not supported by file_re and is ignored.",
            UserWarning,
            stacklevel=3,
        )
    if flags & re.ASCII and not _ASCII_WARNED:
        warnings.warn(
            "re.ASCII in file_re disables Unicode character class matching entirely "
            "(Rust regex semantics); this is broader than Python's re.ASCII.",
            UserWarning,
            stacklevel=3,
        )
        _ASCII_WARNED = True


[docs] class file_re_cls: """ Static facade that exposes the public ``file_re`` API. The class mirrors the module-level surface of :mod:`re`. Each method is a thin adapter over the Rust extension: it coerces ``Path`` inputs, validates ``max_span_lines``, emits warnings for flags that diverge from :mod:`re`, and wraps results in Python :class:`Match` objects. """
[docs] @staticmethod def search( pattern: str, file_path: _PathLike, flags: int = 0, *, max_span_lines: Optional[int] = None, ) -> Optional[Match]: """ Scan the file and return the first match anywhere. Parameters ---------- pattern : str The regex pattern to search for. file_path : str or pathlib.Path Path to the file. ``.gz`` and ``.xz`` archives are decoded transparently. flags : int, optional Bitwise OR of :mod:`re` flags. Defaults to ``0``. max_span_lines : int or None, optional Controls how much of the file is held in memory. ``None`` (default) loads the whole file; ``1`` scans line by line; ``N > 1`` uses a sliding ``N``-line window. Returns ------- Match or None The first match, or ``None`` if no match is found. Raises ------ ValueError If ``max_span_lines`` is provided and not ``>= 1``, if ``pattern`` is invalid, or if ``flags`` contains :data:`re.LOCALE`. OSError If the file cannot be opened or read. Examples -------- >>> from file_re import file_re >>> m = file_re.search(r"ERROR: (\\w+)", "server.log") >>> m.group(1) if m else None """ _validate_span_lines(max_span_lines) _check_flags(flags) return _wrap_match( _search(pattern, _to_str_path(file_path), flags, max_span_lines=max_span_lines) )
[docs] @staticmethod def match( pattern: str, file_path: _PathLike, flags: int = 0, *, max_span_lines: Optional[int] = None, ) -> Optional[Match]: """ Match the pattern anchored at the start of the file. Parameters ---------- pattern : str The regex pattern to match. file_path : str or pathlib.Path Path to the file. flags : int, optional Bitwise OR of :mod:`re` flags. Defaults to ``0``. max_span_lines : int or None, optional See :meth:`search`. Returns ------- Match or None The match, or ``None`` if the pattern does not match at position 0. Raises ------ ValueError If ``max_span_lines`` is not ``>= 1``, if ``pattern`` is invalid, or if ``flags`` contains :data:`re.LOCALE`. OSError If the file cannot be opened or read. """ _validate_span_lines(max_span_lines) _check_flags(flags) return _wrap_match( _match(pattern, _to_str_path(file_path), flags, max_span_lines=max_span_lines) )
[docs] @staticmethod def findall( pattern: str, file_path: _PathLike, flags: int = 0, *, max_span_lines: Optional[int] = None, ) -> Union[List[Optional[str]], List[Tuple[Optional[str], ...]]]: """ Return all non-overlapping matches in the file. Parameters ---------- pattern : str The regex pattern to search for. file_path : str or pathlib.Path Path to the file. flags : int, optional Bitwise OR of :mod:`re` flags. Defaults to ``0``. max_span_lines : int or None, optional See :meth:`search`. Returns ------- list If ``pattern`` has no capturing groups, a list of match strings. Otherwise, a list of tuples containing the captured groups (non-participating groups are ``None``). Raises ------ ValueError If ``max_span_lines`` is not ``>= 1``, if ``pattern`` is invalid, or if ``flags`` contains :data:`re.LOCALE`. OSError If the file cannot be opened or read. """ _validate_span_lines(max_span_lines) _check_flags(flags) return _post_process_findall( _findall(pattern, _to_str_path(file_path), flags, max_span_lines=max_span_lines) )
[docs] @staticmethod def finditer( pattern: str, file_path: _PathLike, flags: int = 0, *, max_span_lines: Optional[int] = None, ) -> Iterator[Match]: """ Iterate lazily over all non-overlapping matches in the file. Parameters ---------- pattern : str The regex pattern to search for. file_path : str or pathlib.Path Path to the file. flags : int, optional Bitwise OR of :mod:`re` flags. Defaults to ``0``. max_span_lines : int or None, optional See :meth:`search`. Returns ------- Iterator of Match Yields :class:`Match` objects lazily. The underlying file is closed when the iterator is exhausted or garbage collected. Raises ------ ValueError If ``max_span_lines`` is not ``>= 1``, if ``pattern`` is invalid, or if ``flags`` contains :data:`re.LOCALE`. OSError If the file cannot be opened. Examples -------- >>> from file_re import file_re >>> for m in file_re.finditer(r"\\bERROR\\b", "server.log", max_span_lines=1): ... print(m.span(), m.group()) """ _validate_span_lines(max_span_lines) _check_flags(flags) return _MatchIterator( _finditer(pattern, _to_str_path(file_path), flags, max_span_lines=max_span_lines) )
[docs] @staticmethod def compile(pattern: str, flags: int = 0) -> Pattern: """ Compile a regex pattern into a reusable :class:`Pattern` object. Parameters ---------- pattern : str The regex source string. flags : int, optional Bitwise OR of :mod:`re` flags. Defaults to ``0``. Returns ------- Pattern A compiled pattern that exposes ``search``, ``match``, ``findall``, and ``finditer`` methods. Raises ------ ValueError If ``pattern`` is invalid, or if ``flags`` contains :data:`re.LOCALE`. """ _check_flags(flags) return Pattern(pattern, flags)