__doc__ = """
This module a single class, namely :class:`Pregex`, which
constitutes the base class for every other class within `pregex`.
Classes & methods
-------------------------------------------
Below are listed all classes within :py:mod:`pregex.core.pre`
along with any possible methods they may possess.
"""
import re as _re
import enum as _enum
import pregex.core.exceptions as _ex
from typing import Union as _Union
from typing import Optional as _Optional
from typing import Iterator as _Iterator
class _Type(_enum.Enum):
'''
This enum represents all possible types of a RegEx pattern.
'''
Alternation = 0
Assertion = 1
Class = 2
Empty = 3
Group = 4
Other = 5
Quantifier = 6
Token = 7
[docs]class Pregex():
'''
Wraps the provided pattern within an instance of this class.
:param str pattern: The pattern that is to be wrapped within an instance \
of this class. Defaults to the empty string ``''``.
:param bool escape: Determines whether to escape the provided pattern or not. \
Defaults to ``True``.
:raises InvalidArgumentTypeException: Parameter ``pattern`` is not a string.
:note: This class constitutes the base class for every other class within the `pregex` package.
'''
'''
Determines the groupping rules of each Pregex instance type:
:schema: __groupping_rules[type] => (on_concat, on_quantify, on_assertion)
'''
__groupping_rules: dict[_Type, str] = {
_Type.Alternation: (True, True, True),
_Type.Assertion : (False, True, False),
_Type.Class: (False, False, False),
_Type.Empty: (False, False, False),
_Type.Group: (False, False, False),
_Type.Other: (False, True, False),
_Type.Quantifier: (False, True, False),
_Type.Token: (False, False, False),
}
'''
The totality of active RegEx flags.
'''
__flags: _re.RegexFlag = _re.MULTILINE | _re.DOTALL
def __init__(self, pattern: str = '', escape: bool = True) -> 'Pregex':
'''
Wraps the provided pattern within an instance of this class.
:param str pattern: The pattern that is to be wrapped within an instance \
of this class. Defaults to the empty string ``''``.
:param bool escape: Determines whether to escape the provided pattern or not. \
Defaults to ``True``.
:raises InvalidArgumentTypeException: Parameter ``pattern`` is not a string.
:note: This class constitutes the base class for every other class within the `pregex` package.
'''
if not isinstance(pattern, str):
message = "Provided argument \"pattern\" is not a string."
raise _ex.InvalidArgumentTypeException(message)
if escape:
self.__pattern = __class__.__escape(pattern)
else:
self.__pattern = pattern
self.__type, self.__repeatable = __class__.__infer_type(self.__pattern)
self.__compiled: _re.Pattern = None
'''
Public Methods
'''
[docs] def print_pattern(self, include_flags: bool = False) -> None:
'''
Prints this instance's underlying RegEx pattern.
:param bool include_flags: Determines whether to display the \
used RegEx flags along with the pattern. Defaults to ``False``.
'''
print(self.get_pattern(include_flags))
[docs] def get_pattern(self, include_flags: bool = False) -> str:
'''
Returns this instance's underlying RegEx pattern as a string.
:param bool include_flags: Determines whether to display the \
used RegEx flags along with the pattern. Defaults to ``False``.
:note: This method is to be preferred over str() when one needs \
to display this instance's underlying Regex pattern.
'''
pattern = repr(self)
return f"/{pattern}/gmsu" if include_flags else pattern
[docs] def get_compiled_pattern(self, discard_after: bool = True) -> _re.Pattern:
'''
Returns this instance's underlying RegEx pattern as a ``re.Pattern`` instance.
:param bool discard_after: Determines whether the compiled pattern is to be \
discarded after the program has exited from this method, or to be retained \
so that any further attempt at matching a string will use the compiled pattern \
instead of the regular one. Defaults to ``True``.
'''
if self.__compiled is None:
self.compile()
compiled = self.__compiled
if discard_after:
self.__compiled = None
return compiled
[docs] def compile(self) -> None:
'''
Compiles the underlying RegEx pattern. After invoking this method, \
any further attempt at matching a string will be making use of the \
compiled RegEx pattern.
'''
self.__compiled = _re.compile(self.get_pattern(), flags=self.__flags)
[docs] @staticmethod
def purge() -> None:
'''
Clears the regular expression caches.
'''
_re.purge()
[docs] def has_match(self, source: str, is_path: bool = False) -> bool:
'''
Returns ``True`` if at least one match is found within the provided text.
:param str source: The text that is to be examined.
:param bool is_path: If set to ``True``, then parameter ``source`` \
is considered to be a local path pointing to the file from which \
the text is to be read. Defaults to ``False``.
'''
if is_path:
source = self.__extract_text(source)
return bool(_re.search(self.__pattern, source, flags=self.__flags) \
if self.__compiled is None else self.__compiled.search(source))
[docs] def is_exact_match(self, source: str, is_path: bool = False) -> bool:
'''
Returns ``True`` only if the provided text matches this pattern exactly.
:param str source: The text that is to be examined.
:param bool is_path: If set to ``True``, then parameter ``source`` \
is considered to be a local path pointing to the file from which \
the text is to be read. Defaults to ``False``.
'''
if is_path:
source = self.__extract_text(source)
return bool(_re.fullmatch(self.__pattern, source, flags=self.__flags) \
if self.__compiled is None else self.__compiled.fullmatch(source))
[docs] def iterate_matches(self, source: str, is_path: bool = False) -> _Iterator[str]:
'''
Generates any possible matches found within the provided text.
:param str source: The text that is to be examined.
:param bool is_path: If set to ``True``, then parameter ``source`` \
is considered to be a local path pointing to the file from which \
the text is to be read. Defaults to ``False``.
'''
for match in self.__iterate_match_objects(source, is_path):
yield match.group(0)
[docs] def iterate_matches_and_pos(self, source: str, is_path: bool = False) -> _Iterator[tuple[str, int, int]]:
'''
Generates any possible matches found within the provided text \
along with their exact position.
:param str source: The text that is to be examined.
:param bool is_path: If set to ``True``, then parameter ``source`` \
is considered to be a local path pointing to the file from which \
the text is to be read. Defaults to ``False``.
'''
for match in self.__iterate_match_objects(source, is_path):
yield (match.group(0), *match.span())
[docs] def iterate_matches_with_context(self, source: str, n_left: int = 5,
n_right: int = 5, is_path: bool = False) -> _Iterator[str]:
'''
Generates any possible matches found within the provided text, \
along with any of its surrounding context, the exact length of \
which can be configured through this method's parameters.
:param str source: The text that is to be examined.
:param int n_left: The number of characters representing the context \
on the left side of the match. Defaults to ``5``.
:param int n_right: The number of characters representing the context \
on the right side of the match. Defaults to ``5``.
:param bool is_path: If set to ``True``, then parameter ``source`` \
is considered to be a local path pointing to the file from which \
the text is to be read. Defaults to ``False``.
:raises InvalidArgumentTypeException: Either parameter ``n_left`` or \
``n_right`` is not an integer.
:raises InvalidArgumentValueException: Either parameter ``n_left`` or \
``n_right`` has a value of less than zero.
'''
if not isinstance(n_left, int) or isinstance(n_left, bool):
message = "Provided argument \"n_left\" is not an integer."
raise _ex.InvalidArgumentTypeException(message)
if not isinstance(n_right, int) or isinstance(n_right, bool):
message = "Provided argument \"n_right\" is not an integer."
raise _ex.InvalidArgumentTypeException(message)
if n_left < 0:
message = "Parameter \"n_left\" can't be negative."
raise _ex.InvalidArgumentValueException(message)
if n_right < 0:
message = "Parameter \"n_right\" can't be negative."
raise _ex.InvalidArgumentValueException(message)
for _, start, end in self.iterate_matches_and_pos(source, is_path):
yield source[max(start - n_left, 0):min(end + n_right, len(source))]
[docs] def iterate_captures(self, source: str, include_empty: bool = True,
is_path: bool = False) -> _Iterator[tuple[str]]:
'''
Generates tuples, one tuple per match, where each tuple contains \
all of its corresponding match's captured groups.
:param str source: The text that is to be examined.
:param bool include_empty: Determines whether to include empty captures \
into the results. Defaults to ``True``.
:param bool is_path: If set to ``True``, then parameter ``source`` \
is considered to be a local path pointing to the file from which \
the text is to be read. Defaults to ``False``.
:note: In case there exists an optional capturing group within the pattern, \
that has not been captured by a match, then that capture's corresponding \
value will be ``None``.
'''
for match in self.__iterate_match_objects(source, is_path):
yield match.groups() if include_empty else \
tuple(group for group in match.groups() if group != '')
[docs] def iterate_captures_and_pos(self, source: str, include_empty: bool = True,
relative_to_match : bool = False, is_path: bool = False) -> _Iterator[list[tuple[str, int, int]]]:
'''
Generates lists of tuples, one list per match, where each tuple contains one \
of its corresponding match's captured groups along with its exact position \
within the text.
:param str source: The text that is to be examined.
:param bool include_empty: Determines whether to include empty captures into the \
results. Defaults to ``True``.
:param bool relative_to_match: If ``True``, then each group's position-indices \
are calculated relative to the group's corresponding match, not to the whole \
string. Defaults to ``False``.
:param bool is_path: If set to ``True``, then parameter ``source`` \
is considered to be a local path pointing to the file from which \
the text is to be read. Defaults to ``False``.
:note: In case there exists an optional capturing group within the pattern, \
that has not been captured by a match, then that capture's corresponding \
tuple will be ``(None, -1, -1)``.
'''
for match in self.__iterate_match_objects(source, is_path):
groups, counter = list(), 0
for group in match.groups():
counter += 1
if include_empty or (group != ''):
start, end = match.span(counter)
if relative_to_match and start > -1:
start, end = start - match.start(0), end - match.start(0)
groups.append((group, start, end))
yield groups
[docs] def iterate_named_captures(self, source: str, include_empty: bool = True,
is_path: bool = False) -> _Iterator[dict[str, str]]:
'''
Generates dictionaries, one dictionary per match, where each dictionary \
contains key-value pairs of any named captured groups that belong to its \
corresponding match, with each key being the name of the captured group, \
whereas its corresponding value will be the actual captured text.
:param str source: The text that is to be examined.
:param bool include_empty: Determines whether to include empty captures \
into the results. Defaults to ``True``.
:param bool is_path: If set to ``True``, then parameter ``source`` \
is considered to be a local path pointing to the file from which \
the text is to be read. Defaults to ``False``.
:note: In case there exists an optional capturing group within the pattern, \
that has not been captured by a match, then that capture's corresponding \
key-value pair will be ``name --> None``.
'''
for match in self.__iterate_match_objects(source, is_path):
yield match.groupdict() if include_empty else \
{k : v for k, v in match.groupdict().items() if v != ''}
[docs] def iterate_named_captures_and_pos(self, source: str, include_empty: bool = True,
relative_to_match : bool = False, is_path: bool = False) -> _Iterator[dict[str, tuple[str, int, int]]]:
'''
Generates dictionaries, one dictionary per match, where each dictionary \
contains key-value pairs of any named captured groups that belong to its\
corresponding match, with each key being the name of the captured group, \
whereas its corresponding value will be a tuple containing the actual \
captured group along with its exact position within the text.
:param str source: The text that is to be examined.
:param bool include_empty: Determines whether to include empty captures into the \
results. Defaults to ``True``.
:param bool relative_to_match: If ``True``, then each group's position-indices \
are calculated relative to the group's corresponding match, not to the whole \
string. Defaults to ``False``.
:param bool is_path: If set to ``True``, then parameter ``source`` \
is considered to be a local path pointing to the file from which \
the text is to be read. Defaults to ``False``.
:note: In case there exists an optional capturing group within the pattern, \
that has not been captured by a match, then that capture's corresponding \
key-value pair will be ``name --> (None, -1, -1)``.
'''
for match in self.__iterate_match_objects(source, is_path):
groups, counter = dict(), 0
for k, v in match.groupdict().items():
counter += 1
if include_empty or (v != ''):
start, end = match.span(counter)
if relative_to_match and start > -1:
start, end = start - match.start(0), end - match.start(0)
groups.update({k: (v, start, end)})
yield groups
[docs] def get_matches(self, source: str, is_path: bool = False) -> list[str]:
'''
Returns a list containing any possible matches found within \
the provided text.
:param str source: The text that is to be examined.
:param bool is_path: If set to ``True``, then parameter ``source`` \
is considered to be a local path pointing to the file from which \
the text is to be read. Defaults to ``False``.
'''
return list(match for match in self.iterate_matches(source, is_path))
[docs] def get_matches_and_pos(self, source: str, is_path: bool = False) -> list[tuple[str, int, int]]:
'''
Returns a list containing any possible matches found within the \
provided text along with their exact position.
:param str source: The text that is to be examined.
:param bool is_path: If set to ``True``, then parameter ``source`` \
is considered to be a local path pointing to the file from which \
the text is to be read. Defaults to ``False``.
'''
return list(match for match in self.iterate_matches_and_pos(source, is_path))
[docs] def get_matches_with_context(self, source: str, n_left: int = 5, n_right: int = 5,
is_path: bool = False) -> list[str]:
'''
Returns a list containing any possible matches found within the \
provided text, along with any of its surrounding context, the exact \
length of which can be configured through this method's parameters.
:param str source: The text that is to be examined.
:param int n_left: The number of characters representing the context \
on the left side of the match. Defaults to ``5``.
:param int n_right: The number of characters representing the context \
on the right side of the match. Defaults to ``5``.
:param bool is_path: If set to ``True``, then parameter ``source`` \
is considered to be a local path pointing to the file from which \
the text is to be read. Defaults to ``False``.
:raises InvalidArgumentTypeException: Either parameter ``n_left`` or \
``n_right`` is not an integer.
:raises InvalidArgumentValueException: Either parameter ``n_left`` or \
``n_right`` has a value of less than zero.
'''
return list(match for match in self.iterate_matches_with_context(
source, n_left, n_right, is_path))
[docs] def get_captures(self, source: str, include_empty: bool = True, is_path: bool = False) -> list[tuple[str]]:
'''
Returns a list of tuples, one tuple per match, where each tuple contains \
all of its corresponding match's captured groups.
:param str source: The text that is to be examined.
:param bool include_empty: Determines whether to include empty captures \
into the results. Defaults to ``True``.
:param bool is_path: If set to ``True``, then parameter ``source`` \
is considered to be a local path pointing to the file from which \
the text is to be read. Defaults to ``False``.
:note: In case there exists an optional capturing group within the pattern, \
that has not been captured by a match, then that capture's corresponding \
value will be ``None``.
'''
return list(group for group in self.iterate_captures(source, include_empty, is_path))
[docs] def get_captures_and_pos(self, source: str, include_empty: bool = True,
relative_to_match: bool = False, is_path: bool = False) -> list[list[tuple[str, int, int]]]:
'''
Returns a list containing lists of tuples, one list per match, where each \
tuple contains one of its corresponding match's captured groups along with \
its exact position within the text.
:param str source: The text that is to be examined.
:param bool include_empty: Determines whether to include empty captures into the \
results. Defaults to ``True``.
:param bool relative_to_match: If ``True``, then each group's position-indices \
are calculated relative to the group's corresponding match, not to the whole \
string. Defaults to ``False``.
:param bool is_path: If set to ``True``, then parameter ``source`` \
is considered to be a local path pointing to the file from which \
the text is to be read. Defaults to ``False``.
:note: In case there exists an optional capturing group within the pattern, \
that has not been captured by a match, then that capture's corresponding \
tuple will be ``(None, -1, -1)``.
'''
return list(tup for tup in self.iterate_captures_and_pos(
source, include_empty, relative_to_match, is_path))
[docs] def get_named_captures(self, source: str,
include_empty: bool = True, is_path: bool = False) -> list[dict[str, str]]:
'''
Returns a dictionary of tuples, one dictionary per match, where each \
dictionary contains key-value pairs of any named captured groups that \
belong to its corresponding match, with each key being the name of the \
captured group, whereas its corresponding value will be the actual \
captured text.
:param str source: The text that is to be examined.
:param bool include_empty: Determines whether to include empty captures \
into the results. Defaults to ``True``.
:param bool is_path: If set to ``True``, then parameter ``source`` \
is considered to be a local path pointing to the file from which \
the text is to be read. Defaults to ``False``.
:note: In case there exists an optional capturing group within the pattern, \
that has not been captured by a match, then that capture's corresponding \
key-value pair will be ``name --> None``.
'''
return list(group for group in self.iterate_named_captures(source, include_empty, is_path))
[docs] def get_named_captures_and_pos(self, source: str, include_empty: bool = True,
relative_to_match: bool = False, is_path: bool = False) -> list[dict[str, tuple[str, int, int]]]:
'''
Returns a dictionary of tuples, one dictionary per match, where each \
dictionary contains key-value pairs of any named captured groups that \
belong to its corresponding match, with each key being the name of the \
captured group, whereas its corresponding value will be a tuple containing \
the actual captured group along with its exact position within the text.
:param str source: The text that is to be examined.
:param bool include_empty: Determines whether to include empty captures into the \
results. Defaults to ``True``.
:param bool relative_to_match: If ``True``, then each group's position-indices \
are calculated relative to the group's corresponding match, not to the whole \
string. Defaults to ``False``.
:param bool is_path: If set to ``True``, then parameter ``source`` \
is considered to be a local path pointing to the file from which \
the text is to be read. Defaults to ``False``.
:note: In case there exists an optional capturing group within the pattern, \
that has not been captured by a match, then that capture's corresponding \
key-value pair will be ``name --> (None, -1, -1)``.
'''
return list(group for group in self.iterate_named_captures_and_pos(
source, include_empty, relative_to_match, is_path))
[docs] def replace(self, source: str, repl: str, count: int = 0, is_path: bool = False) -> str:
'''
Replaces all or some of the occuring matches with ``repl`` and \
returns the resulting string. If there are no matches, then this \
method will return the provided text without modifying it.
:param str source: The text that is to be matched and modified.
:param str repl: The string that is to replace any matches.
:param int count: The number of matches that are to be replaced, \
starting from left to right. A value of ``0`` indicates that \
all matches must be replaced. Defaults to ``0``.
:param bool is_path: If set to ``True``, then parameter ``source`` \
is considered to be a local path pointing to the file from which \
the text is to be read. Defaults to ``False``.
:raises InvalidArgumentValueException: Parameter ``count`` has a value of \
less than zero.
'''
if count < 0:
message = "Parameter \"count\" can't be negative."
raise _ex.InvalidArgumentValueException(message)
if is_path:
source = self.__extract_text(source)
return _re.sub(str(self), repl, source, count, flags=self.__flags)
[docs] def split_by_match(self, source: str, is_path: bool = False) -> list[str]:
'''
Splits the provided text based on any occuring matches and returns \
the result as a list containing each individual part of the text \
after the split.
:param str source: The text that is to be matched and split.
:param bool is_path: If set to ``True``, then parameter ``source`` \
is considered to be a local path pointing to the file from which \
the text is to be read. Defaults to ``False``.
'''
if is_path:
source = self.__extract_text(source)
split_list, index = list(), 0
for _, start, end in self.iterate_matches_and_pos(source):
split_list.append(source[index:start])
index = end
split_list.append(source[index:])
return split_list
[docs] def split_by_capture(self, source: str, include_empty: bool = True, is_path: bool = False) -> list[str]:
'''
Splits the provided text based on any occuring captures and returns \
the result as alist containing each individual part of the text \
after the split.
:param str source: The piece of text that is to be matched and split.
:param bool include_empty: Determines whether to include empty groups into the results. \
Defaults to ``True``.
:param bool is_path: If set to ``True``, then parameter ``source`` \
is considered to be a local path pointing to the file from which \
the text is to be read. Defaults to ``False``.
'''
if is_path:
source = self.__extract_text(source)
split_list, index = list(), 0
for groups in self.iterate_captures_and_pos(source, include_empty):
for group, start, end in groups:
if group is None:
continue
split_list.append(source[index:start])
index = end
split_list.append(source[index:])
return split_list
'''
Quantifiers
'''
[docs] def optional(self, is_greedy: bool = True)-> 'Pregex':
'''
Applies quantifier ``?`` to this instance's underlying pattern \
and returns the result as a ``Pregex`` instance.
:param bool is_greedy: Determines whether to declare this quantifier as greedy. \
When declared as such, the regex engine will try to match \
the expression as many times as possible. Defaults to ``True``.
'''
if self._get_type() == _Type.Empty:
return self
return __class__(
f"{self._quantify_conditional_group()}?{'' if is_greedy else '?'}",
escape=False)
[docs] def indefinite(self, is_greedy: bool = True) -> 'Pregex':
'''
Applies quantifier ``*`` to this instance's underlying pattern \
and returns the result as a ``Pregex`` instance.
:param bool is_greedy: Determines whether to declare this quantifier as greedy. \
When declared as such, the regex engine will try to match \
the expression as many times as possible. Defaults to ``True``.
:raises CannotBeRepeatedException: This instance represents a non-repeatable pattern.
'''
if self._get_type() == _Type.Empty:
return self
if not self._is_repeatable():
raise _ex.CannotBeRepeatedException(self)
return __class__(
f"{self._quantify_conditional_group()}*{'' if is_greedy else '?'}",
escape=False)
[docs] def one_or_more(self, is_greedy: bool = True) -> 'Pregex':
'''
Applies quantifier ``+`` to this instance's underlying pattern \
and returns the result as a ``Pregex`` instance.
:param bool is_greedy: Determines whether to declare this quantifier as greedy. \
When declared as such, the regex engine will try to match \
the expression as many times as possible. Defaults to ``True``.
:raises CannotBeRepeatedException: This instance represents a non-repeatable pattern.
'''
if self._get_type() == _Type.Empty:
return self
if not self._is_repeatable():
raise _ex.CannotBeRepeatedException(self)
return __class__(
f"{self._quantify_conditional_group()}+{'' if is_greedy else '?'}",
escape=False)
[docs] def exactly(self, n: int) -> 'Pregex':
'''
Applies quantifier ``{n}`` to this instance's underlying pattern \
and returns the result as a ``Pregex`` instance.
:param int n: The exact number of times that the patterns is to be matched.
:raises InvalidArgumentTypeException: Parameter ``n`` is not an integer.
:raises InvalidArgumentValueException: Parameter ``n`` has a value of less \
than zero.
:raises CannotBeRepeatedException: Parameter ``n`` has a value of greater \
than one, while this instance represents a non-repeatable pattern.
'''
if not isinstance(n, int) or isinstance(n, bool):
message = "Provided argument \"n\" is not an integer."
raise _ex.InvalidArgumentTypeException(message)
if n == 0:
return Pregex()
if n == 1:
return self
else:
if n < 0:
message = "Parameter \"n\" can't be negative."
raise _ex.InvalidArgumentValueException(message)
if self._get_type() == _Type.Empty:
return self
if not self._is_repeatable():
raise _ex.CannotBeRepeatedException(self)
return __class__(
f"{self._quantify_conditional_group()}{{{n}}}",
escape=False)
[docs] def at_least(self, n: int, is_greedy: bool = True)-> 'Pregex':
'''
Applies quantifier ``{n,}`` to this instance's underlying pattern \
and returns the result as a ``Pregex`` instance.
:param int n: The minimum number of times that the pattern is to be matched.
:param bool is_greedy: Determines whether to declare this quantifier as greedy. \
When declared as such, the regex engine will try to match \
the expression as many times as possible. Defaults to ``True``.`
:raises InvalidArgumentTypeException: Parameter ``n`` is not an integer.
:raises InvalidArgumentValueException: Parameter ``n`` has a value of \
less than zero.
:raises CannotBeRepeatedException: This instance represents a \
non-repeatable pattern.
'''
if not isinstance(n, int) or isinstance(n, bool):
message = "Provided argument \"n\" is not an integer."
raise _ex.InvalidArgumentTypeException(message)
if n == 0:
return self.indefinite(is_greedy)
elif n == 1:
return self.one_or_more(is_greedy)
else:
if n < 0:
message = "Parameter \"n\" can't be negative."
raise _ex.InvalidArgumentValueException(message)
if self._get_type() == _Type.Empty:
return self
if not self._is_repeatable():
raise _ex.CannotBeRepeatedException(self)
return __class__(
f"{self._quantify_conditional_group()}{{{n},}}{'' if is_greedy else '?'}",
escape=False)
[docs] def at_most(self, n: _Optional[int], is_greedy: bool = True) -> 'Pregex':
'''
Applies quantifier ``{,n}`` to this instance's underlying pattern \
and returns the result as a ``Pregex`` instance.
:param int n: The maximum number of times that the pattern is to be matched.
:param bool is_greedy: Determines whether to declare this quantifier as greedy. \
When declared as such, the regex engine will try to match \
the expression as many times as possible. Defaults to ``True``.
:raises InvalidArgumentTypeException: Parameter ``n`` is neither an \
integer nor ``None``.
:raises InvalidArgumentValueException: Parameter ``n`` has a value of \
less than zero.
:raises CannotBeRepeatedException: Parameter ``n`` has a value of \
greater than one, while this instance represents a non-repeatable \
pattern.
:note: Setting ``n`` equal to ``None`` indicates that there is no upper limit to \
the number of times the pattern is to be repeated.
'''
if not isinstance(n, int) or isinstance(n, bool):
if n == None:
return self.indefinite(is_greedy)
message = "Provided argument \"n\" is neither an integer nor None."
raise _ex.InvalidArgumentTypeException(message)
elif n == 0:
return self.exactly(n)
elif n == 1:
return self.optional(is_greedy)
else:
if n < 0:
message = "Parameter \"n\" can't be negative."
raise _ex.InvalidArgumentValueException(message)
if self._get_type() == _Type.Empty:
return self
if not self._is_repeatable():
raise _ex.CannotBeRepeatedException(self)
return __class__(
f"{self._quantify_conditional_group()}{{,{n}}}{'' if is_greedy else '?'}",
escape=False)
[docs] def at_least_at_most(self, n: int, m: _Optional[int], is_greedy: bool = True) -> 'Pregex':
'''
Applies quantifier ``{n,m}`` to this instance's underlying pattern \
and returns the result as a ``Pregex`` instance.
:param int n: The minimum number of times that the pattern is to be matched.
:param int m: The minimum number of times that the pattern is to be matched.
:param bool is_greedy: Determines whether to declare this quantifier as greedy. \
When declared as such, the regex engine will try to match \
the expression as many times as possible. Defaults to ``True``.`
:raises InvalidArgumentTypeException:
- Parameter ``pre`` is neither a ``Pregex`` instance nor a string.
- Parameter ``n`` is not an integer.
- Parameter ``m`` is neither an integer nor ``None``.
:raises InvalidArgumentValueException:
- Either parameter ``n`` or ``m`` has a value of less than zero.
- Parameter ``n`` has a greater value than that of parameter ``m``.
:raises CannotBeRepeatedException: Parameter ``m`` has a value of greater \
than one, while this instance represents a non-repeatable pattern.
:note:
- Parameter ``is_greedy`` has no effect in the case that ``n`` equals ``m``.
- Setting ``m`` equal to ``None`` indicates that there is no upper limit to the \
number of times the pattern is to be repeated.
'''
if not isinstance(n, int) or isinstance(n, bool):
message = "Provided argument \"n\" is not an integer."
raise _ex.InvalidArgumentTypeException(message)
elif not isinstance(m, int) or isinstance(m, bool):
if m is not None:
message = "Provided argument \"m\" is neither an integer nor \"None\"."
raise _ex.InvalidArgumentTypeException(message)
elif n < 0:
message = "Parameter \"n\" can't be negative."
raise _ex.InvalidArgumentValueException(message)
elif m < 0:
message = "Parameter \"m\" can't be negative."
raise _ex.InvalidArgumentValueException(message)
elif m < n:
message = "The value of parameter \"m\" can't be"
message += " less than the value of parameter \"n\"."
raise _ex.InvalidArgumentValueException(message)
if n == m:
return self.exactly(n)
elif n == 0:
return self.at_most(m, is_greedy)
elif m is None:
return self.at_least(n, is_greedy)
else:
if self._get_type() == _Type.Empty:
return self
if not self._is_repeatable():
raise _ex.CannotBeRepeatedException(self)
return __class__(
f"{self._quantify_conditional_group()}{{{n},{m}}}{'' if is_greedy else '?'}",
escape=False)
'''
Operators
'''
[docs] def concat(self, pre: _Union['Pregex', str], on_right: bool = True) -> 'Pregex':
'''
Concatenates the provided pattern to this instance's underlying pattern \
and returns the resulting pattern as a ``Pregex`` instance.
:param Pregex | str pre: Either a string or a ``Pregex`` instance \
representing the pattern that is to take part in the concatenation.
:param bool on_right: If ``True``, then places the provided pattern on the \
right side of the concatenation, else on the left. Defaults to ``True``.
:raises InvalidArgumentTypeException: Parameter ``pre`` is neither \
a ``Pregex`` instance nor a string.
'''
pre = __class__._to_pregex(pre)
if pre._get_type() == _Type.Empty:
return self
pattern = self._concat_conditional_group()
pre = pre._concat_conditional_group()
pattern = pattern + pre if on_right else pre + pattern
return __class__(pattern, escape=False)
[docs] def either(self, pre: _Union['Pregex', str], on_right: bool = True) -> 'Pregex':
'''
Applies the alternation operator ``|`` between the provided pattern \
and this instance's underlying pattern, and returns the resulting pattern \
as a ``Pregex`` instance.
:param Pregex | str pre: Either a string or a ``Pregex`` instance \
representing the pattern that is to take part in the alternation.
:param bool on_right: If ``True``, then places the provided pattern on the \
right side of the alternation, else on the left. Defaults to ``True``.
:raises InvalidArgumentTypeException: Parameter ``pre`` is neither \
a ``Pregex`` instance nor a string.
'''
pre = __class__._to_pregex(pre)
if pre._get_type() == _Type.Empty:
pattern = str(self)
else:
pattern = f"{self}|{pre}" if on_right else f"{pre}|{self}"
return __class__(pattern, escape=False)
[docs] def enclose(self, pre: _Union['Pregex', str]) -> 'Pregex':
'''
Concatenates the provided pattern to both sides of this instance's \
underlying pattern, and returns the resulting pattern as a ``Pregex`` \
instance.
:param Pregex | str pre: Either a string or a ``Pregex`` instance \
representing the "enclosing" pattern.
:raises InvalidArgumentTypeException: Parameter `pre` is neither a \
``Pregex`` instance nor a string.
'''
pre = __class__._to_pregex(pre)._concat_conditional_group()
pattern = f"{pre}{self._concat_conditional_group()}{pre}"
return __class__(pattern, escape=False)
'''
Groups
'''
[docs] def capture(self, name: _Optional[str] = None) -> 'Pregex':
'''
Creates a capturing group out of this instance's underlying \
pattern and returns the result as a ``Pregex`` instance.
:param Pregex | str pre: The pattern out of which the capturing group is created.
:param str name: The name that is assigned to the captured group for backreference \
purposes. A value of ``None`` indicates that no name is to be assigned to the \
group. Defaults to ``None``.
:raises InvalidArgumentTypeException: Parameter ``name`` is neither a string \
nor ``None``.
:raises InvalidCapturingGroupNameException: Parameter ``name`` is not a valid \
capturing group name. Such name must contain word characters only and start \
with a non-digit character.
:note:
- Creating a capturing group out of a capturing group does nothing.
- Creating a capturing group out of a non-capturing group converts it \
into a capturing group, except if any flags have been applied to it, \
in which case, the non-capturing group is wrapped within a capturing \
group as a whole.
- Creating a named capturing group out of an unnamed capturing group, \
assigns a name to it.
- Creating a named capturing group out of a named capturing group, \
changes the group's name.
'''
if name is not None:
if not isinstance(name, str):
message = "Provided argument \"name\" is not a string."
raise _ex.InvalidArgumentTypeException(message)
if _re.fullmatch("[A-Za-z_]\w*", name) is None:
raise _ex.InvalidCapturingGroupNameException(name)
if self.__type == _Type.Empty:
return self
elif self.__type == _Type.Group:
if self.__pattern.startswith('(?:'):
# non-capturing group.
pattern = self.__pattern.replace('?:', '', 1)
elif _re.match('\(\?[i].+', self.__pattern):
# non-capturing group with flag.
pattern = f'({str(self)})'
else:
# capturing group.
pattern = self.__pattern
if name is not None:
if pattern.startswith('(?P'):
pattern = _re.sub('\(\?P<[^>]*>', f'(?P<{name}>', pattern)
else:
pattern = f"(?P<{name}>{pattern[1:-1]})"
else:
pattern = f"({f'?P<{name}>' if name != None else ''}{self})"
return __class__(pattern, escape=False)
[docs] def group(self, is_case_insensitive: bool = False) -> 'Pregex':
'''
Creates a non-capturing group out of this instance's underlying \
pattern and returns the result as a ``Pregex`` instance.
:param bool is_case_insensitive: If ``True``, then the "case insensitive" \
flag is applied to the group so that the pattern within it ignores case \
when it comes to matching. Defaults to ``False``.
:raises InvalidArgumentTypeException: Parameter ``pre`` is neither \
a ``Pregex`` instance nor a string.
:note:
- Creating a non-capturing group out of a non-capturing group does nothing, \
except for reset its flags, e.g. ``is_case_insensitive``, if it has any.
- Creating a non-capturing group out of a capturing group converts it into \
a non-capturing group.
'''
if self.__type == _Type.Empty:
return self
elif self.__type == _Type.Group:
if self.__pattern.startswith('(?P'):
# Remove name from named capturing group.
pattern = _re.sub('\(\?P<[^>]*>', f'(?:', str(self))
elif self.__pattern.startswith('(?'):
# Remove any possible flags from non-capturing group.
pattern = _re.sub(
r'\(\?[i]*:', f"(?{'i' if is_case_insensitive else ''}:",
self.__pattern,
count=1)
else:
# Else convert capturing group to non-capturing group.
pattern = self.__pattern.replace('(', '(?:', 1)
else:
pattern = f"(?{'i' if is_case_insensitive else ''}:{self})"
return __class__(pattern, escape=False)
'''
Assertions
'''
[docs] def match_at_start(self) -> 'Pregex':
'''
Applies assertion ``\\A`` to this instance's underlying pattern \
so that it only matches if it is found at the start of a string, \
and returns the resulting pattern as a ``Pregex`` instance.
:note: The resulting pattern cannot have a repeating quantifier \
applied to it.
'''
return __class__(f"\\A{self._assert_conditional_group()}", escape=False)
[docs] def match_at_end(self) -> 'Pregex':
'''
Applies assertion ``\\Z`` to this instance's underlying pattern \
so that it only matches if it is found at the end of a string, \
and returns the resulting pattern as a ``Pregex`` instance.
:note: The resulting pattern cannot have a repeating quantifier \
applied to it.
'''
return __class__(f"{self._assert_conditional_group()}\\Z", escape=False)
[docs] def match_at_line_start(self) -> 'Pregex':
'''
Applies assertion ``^`` to this instance's underlying pattern \
so that it only matches if it is found at the start of a line, \
and returns the resulting pattern as a ``Pregex`` instance.
:note:
- The resulting pattern cannot have a repeating quantifier \
applied to it.
- Uses meta character ``^`` since the `MULTILINE` flag is \
considered on.
'''
return __class__(f"^{self._assert_conditional_group()}", escape=False)
[docs] def match_at_line_end(self) -> 'Pregex':
'''
Applies assertion ``$`` to this instance's underlying pattern \
so that it only matches if it is found at the end of a line, \
and returns the resulting pattern as a ``Pregex`` instance.
:note:
- The resulting pattern cannot have a repeating quantifier\
applied to it.
- Uses meta character ``$`` since the `MULTILINE` flag is \
considered on.
'''
return __class__(f"{self._assert_conditional_group()}$", escape=False)
[docs] def followed_by(self, pre: _Union['Pregex', str]) -> 'Pregex':
'''
Applies positive lookahead assertion ``(?=<PRE>)``, where \
``<PRE>`` corresponds to the provided pattern, to this \
instance's underlying pattern and returns the resulting pattern \
as a ``Pregex`` instance.
:param str | Pregex pre: A Pregex instance or string \
representing the "assertion" pattern.
:raises InvalidArgumentTypeException: The provided argument \
is neither a ``Pregex`` instance nor a string.
:note: The resulting pattern cannot have a repeating quantifier \
applied to it.
'''
pre = __class__._to_pregex(pre)
if pre._get_type() == _Type.Empty:
return self
return __class__(
f"{self._assert_conditional_group()}(?={pre})",
escape=False)
[docs] def preceded_by(self, pre: _Union['Pregex', str]) -> 'Pregex':
'''
Applies positive lookbehind assertion ``(?<=<PRE>)``, where \
``<PRE>`` corresponds to the provided pattern, to this \
instance's underlying pattern and returns the resulting pattern \
as a ``Pregex`` instance.
:param str | Pregex pre: A Pregex instance or string \
representing the "assertion" pattern.
:raises InvalidArgumentTypeException: The provided argument \
is neither a ``Pregex`` instance nor a string.
:raises NonFixedWidthPatternException: A non-fixed-width pattern \
is provided in place of parameter ``assertion``.
:note: The resulting pattern cannot have a repeating quantifier \
applied to it.
'''
pre = __class__._to_pregex(pre)
if pre._get_type() == _Type.Empty:
return self
if _re.search(_re.sub(r"\s", "", r"""
(?<!\\)(?:\\\\)*(?<!\()(?:\?|\*|\+|\{,\d+\}|\{\d+,\}|\{\d+,\d+\})|
(?<!\\)(?:\\\\)*\\\((?:\?|\*|\+|\{,\d+\}|\{\d+,\}|\{\d+,\d+\})
"""), str(pre)) is not None:
raise _ex.NonFixedWidthPatternException(pre)
return __class__(
f"(?<={pre}){self._assert_conditional_group()}",
escape=False)
[docs] def enclosed_by(self, pre: _Union['Pregex', str]) -> 'Pregex':
'''
Applies both positive lookahead assertion ``(?=<PRE>)`` and positive \
lookbehind assertion ``(?<=<PRE>)``, where ``<PRE>`` corresponds to \
the provided pattern, to this instance's underlying pattern and \
returns the resulting pattern as a ``Pregex`` instance.
:param str | Pregex pre: A Pregex instance or string \
representing the "assertion" pattern.
:raises InvalidArgumentTypeException: The provided argument \
is neither a ``Pregex`` instance nor a string.
:raises NonFixedWidthPatternException: A non-fixed-width pattern \
is provided in place of parameter ``assertion``.
:note: The resulting pattern cannot have a repeating quantifier \
applied to it.
'''
pre = __class__._to_pregex(pre)
if pre._get_type() == _Type.Empty:
return self
if _re.search(_re.sub(r"\s", "", r"""
(?<!\\)(?:\\\\)*(?<!\()(?:\?|\*|\+|\{,\d+\}|\{\d+,\}|\{\d+,\d+\})|
(?<!\\)(?:\\\\)*\\\((?:\?|\*|\+|\{,\d+\}|\{\d+,\}|\{\d+,\d+\})
"""), str(pre)) is not None:
raise _ex.NonFixedWidthPatternException(pre)
return __class__(
f"(?<={pre}){self._assert_conditional_group()}(?={pre})",
escape=False)
[docs] def not_followed_by(self, pre: _Union['Pregex', str]) -> 'Pregex':
'''
Applies negative lookahead assertion ``(?!<PRE>)``, where ``<PRE>`` \
corresponds to the provided pattern, to this instance's underlying \
pattern and returns the resulting pattern as a ``Pregex`` instance.
:param Pregex | str pre: Either a string or a ``Pregex`` instance \
representing the "assertion" pattern.
:raises InvalidArgumentTypeException: The provided argument is neither \
a ``Pregex`` instance nor a string.
:raises EmptyNegativeAssertionException: The provided assertion pattern \
is the empty-string pattern.
'''
pre = __class__._to_pregex(pre)
if pre._get_type() == _Type.Empty:
raise _ex.EmptyNegativeAssertionException()
pattern = f"{self._assert_conditional_group()}(?!{pre})"
return __class__(pattern, escape=False)
[docs] def not_preceded_by(self, pre: _Union['Pregex', str]) -> 'Pregex':
'''
Applies negative lookbehind assertion ``(?<!<PRE>)``, where ``<PRE>`` \
corresponds to the provided pattern, to this instance's underlying \
pattern and returns the resulting pattern as a ``Pregex`` instance.
:param Pregex | str pre: Either a string or a ``Pregex`` instance \
representing the "assertion" pattern.
:raises InvalidArgumentTypeException: The provided argument is neither \
a ``Pregex`` instance nor a string.
:raises EmptyNegativeAssertionException: The provided assertion pattern \
is the empty-string pattern.
:raises NonFixedWidthPatternException: The provided assertion pattern \
does not have a fixed width.
'''
pre = __class__._to_pregex(pre)
if pre._get_type() == _Type.Empty:
raise _ex.EmptyNegativeAssertionException()
if _re.search(_re.sub(r"\s", "", r"""
(?<!\\)(?:\\\\)*(?<!\()(?:\?|\*|\+|\{,\d+\}|\{\d+,\}|\{\d+,\d+\})|
(?<!\\)(?:\\\\)*\\\((?:\?|\*|\+|\{,\d+\}|\{\d+,\}|\{\d+,\d+\})
"""), str(pre)) is not None:
raise _ex.NonFixedWidthPatternException(pre)
pattern = f"(?<!{pre}){self._assert_conditional_group()}"
return __class__(pattern, escape=False)
[docs] def not_enclosed_by(self, pre: _Union['Pregex', str]) -> 'Pregex':
'''
Applies both negative lookahead assertion ``(?=<PRE>)``` and \
negative lookbehind assertion ``(?<!<PRE>)``, where ``<PRE>`` \
corresponds to the provided pattern, to this instance's underlying \
pattern and returns the resulting pattern as a ``Pregex`` instance.
:param Pregex | str pre: Either a string or a ``Pregex`` instance \
representing the "assertion" pattern.
:raises InvalidArgumentTypeException: The provided argument is neither \
a ``Pregex`` instance nor a string.
:raises EmptyNegativeAssertionException: The provided assertion pattern \
is the empty-string pattern.
:raises NonFixedWidthPatternException: The provided assertion pattern \
does not have a fixed width.
'''
pre = __class__._to_pregex(pre)
if pre._get_type() == _Type.Empty:
raise _ex.EmptyNegativeAssertionException()
if _re.search(_re.sub(r"\s", "", r"""
(?<!\\)(?:\\\\)*(?<!\()(?:\?|\*|\+|\{,\d+\}|\{\d+,\}|\{\d+,\d+\})|
(?<!\\)(?:\\\\)*\\\((?:\?|\*|\+|\{,\d+\}|\{\d+,\}|\{\d+,\d+\})
"""), str(pre)) is not None:
raise _ex.NonFixedWidthPatternException(pre)
pattern = f"(?<!{pre}){self._assert_conditional_group()}(?!{pre})"
return __class__(pattern, escape=False)
'''
Protected Methods
'''
def _get_type(self) -> _Type:
'''
Returns the type of this instance's underlying pattern.
'''
return self.__type
def _is_repeatable(self) -> bool:
'''
Returns ``True`` if this pattern can be quantified, \
else returns ``False``.
'''
return self.__repeatable
def _concat_conditional_group(self) -> str:
'''
Returns this instance's underlying pattern wrapped within a \
non-capturing group only if the instance's "group-on-concat" \
rule is set to ``True``, else returns it as it is.
'''
return str(self.group()) if self.__get_group_on_concat_rule() else str(self)
def _quantify_conditional_group(self) -> str:
'''
Returns this instance's underlying pattern wrapped within a \
non-capturing group only if the instance's "group-on-quantify" \
rule is set to ``True``, else returns it as it is.
'''
return str(self.group()) if self.__get_group_on_quantify_rule() else str(self)
def _assert_conditional_group(self) -> str:
'''
Returns this instance's underlying pattern wrapped within a \
non-capturing group only if the instance's "group-on-assertion" \
rule is set to ``True``, else returns it as it is.
'''
return str(self.group()) if self.__get_group_on_assert_rule() else str(self)
@staticmethod
def _to_pregex(pre: 'Pregex' or str) -> 'Pregex':
'''
Returns ``pre`` exactly as provided if it is a ``Pregex`` instance, \
else if it is a string, this method returns it wrapped within a ``Pregex`` \
instance for which parameter ``escape`` has been set to ``True``.
:param Pregex | str: Either a string or a ``Pregex`` instance.
:raises InvalidArgumentTypeException: Argument ``pre`` is neither a string nor a \
``Pregex`` class instance.
'''
if isinstance(pre, str):
return Pregex(pre, escape=True)
elif issubclass(pre.__class__, __class__):
return pre
else:
message = "Parameter \"pre\" must either be a string or an instance of \"Pregex\"."
raise _ex.InvalidArgumentTypeException(message)
'''
Private Methods
'''
def __str__(self) -> str:
'''
Returns the string representation of this instance's \
underlying pattern.
:note: Not to be used for pattern-display purposes.
'''
return self.__pattern
def __repr__(self) -> str:
'''
Returns the string representation of this instance's \
underlying pattern in a printable format.
'''
# Replace any quadraple backslashes.
return _re.sub(r"\\\\", r"\\", repr(self.__pattern)[1:-1])
def __add__(self, pre: _Union['Pregex', str]) -> 'Pregex':
'''
Concatenates this instance's underlying pattern with the provided \
pattern and returns the resulting ``Pregex`` instance.
:param pre: Either a string or ``Pregex`` class instance that is to \
be concatenated to this instance's underlying pattern.
'''
return __class__(str(self.concat(__class__._to_pregex(pre))), escape=False)
def __radd__(self, pre: _Union['Pregex', str]) -> 'Pregex':
'''
Concatenates this instance's underlying pattern with the provided \
pattern and returns the resulting ``Pregex`` instance.
:param pre: Either a string or ``Pregex`` class instance that is to \
be concatenated to this instance's underlying pattern.
'''
return __class__(str(__class__._to_pregex(pre).concat(self)), escape=False)
def __mul__(self, n: int) -> 'Pregex':
'''
Applies quantifier ``{n}`` to this instance's underlying pattern \
and returns the result as a ``Pregex`` instance.
:param int n: The exact number of times that the patterns is to be matched.
:raises InvalidArgumentTypeException: Parameter ``n`` is not an integer.
:raises InvalidArgumentValueException: Parameter ``n`` has a value of less \
than zero.
:raises CannotBeRepeatedException: Parameter ``n`` has a value of greater \
than one, while this instance represents a non-repeatable pattern.
'''
if not self._is_repeatable():
raise _ex.CannotBeRepeatedException(self)
if not isinstance(n, int) or isinstance(n, bool):
message = "Provided argument \"n\" is not an integer."
raise _ex.InvalidArgumentTypeException(message)
if n < 0:
message = "Using multiplication operator with a negative integer is not allowed."
raise _ex.InvalidArgumentValueException(message)
if self._get_type() == _Type.Empty:
return self
return __class__(str(self.exactly(n)), escape=False)
def __rmul__(self, n: int) -> 'Pregex':
'''
Applies quantifier ``{n}`` to this instance's underlying pattern \
and returns the result as a ``Pregex`` instance.
:param int n: The exact number of times that the patterns is to be matched.
:raises InvalidArgumentTypeException: Parameter ``n`` is not an integer.
:raises InvalidArgumentValueException: Parameter ``n`` has a value of less \
than zero.
:raises CannotBeRepeatedException: Parameter ``n`` has a value of greater \
than one, while this instance represents a non-repeatable pattern.
'''
if not self._is_repeatable():
raise _ex.CannotBeRepeatedException(self)
if not isinstance(n, int) or isinstance(n, bool):
message = "Provided argument \"n\" is not an integer."
raise _ex.InvalidArgumentTypeException(message)
if n < 0:
message = "Using multiplication operator with a negative integer is not allowed."
raise _ex.InvalidArgumentValueException(message)
if self._get_type() == _Type.Empty:
return self
return __class__(str(self.exactly(n)), escape=False)
def __get_group_on_concat_rule(self) -> bool:
'''
Returns the value of this instance's "group-on-concat" rule.
'''
return __class__.__groupping_rules[self.__type][0]
def __get_group_on_quantify_rule(self) -> bool:
'''
Returns the value of this instance's "group-on-quantify" rule.
'''
return __class__.__groupping_rules[self.__type][1]
def __get_group_on_assert_rule(self) -> bool:
'''
Returns the value of this instance's "group-on-assertion" rule.
'''
return __class__.__groupping_rules[self.__type][2]
def __iterate_match_objects(self, source: str, is_path: bool) -> _Iterator[_re.Match]:
'''
Invokes ``re.finditer`` in order to iterate over all matches of this \
instance's underlying pattern with the provided text as instances of \
type ``re.Match``.
:param str source: The text that is to be examined.
:param bool is_path: If set to ``True``, then parameter ``source`` \
is considered to be a local path pointing to the file from which \
the text is to be read.
'''
if is_path:
source = self.__extract_text(source)
return _re.finditer(self.__pattern, source, flags=self.__flags) \
if self.__compiled is None else self.__compiled.finditer(source)
@staticmethod
def __escape(pattern: str) -> str:
'''
Scans this instance's underlying pattern for any characters that need to \
be escaped, escapes them if there are any, and returns the resulting \
pattern as a string.
'''
pattern = pattern.replace("\\", "\\\\")
for c in {'^', '$', '(', ')', '[', ']', '{', '}', '?', '+', '*', '.', '|', '/'}:
pattern = pattern.replace(c, f"\\{c}")
return pattern
@staticmethod
def __infer_type(pattern: str) -> tuple[_Type, bool]:
'''
Examines the provided RegEx pattern and returns its type, \
as well as a boolean indicating whether said pattern can be \
quantified or not.
:param str pattern: The RegEx pattern that is to be examined.
'''
def remove_groups(pattern: str, repl: str = ''):
'''
Removes all groups from the provided pattern, and replaces them with ``repl``.
:param str pattern: The pattern whose groups are to be removed.
:param str repl: The string that replaces all groups within the pattern. \
Defaults to ``''``.
'''
left_par, right_par = r"(?:(?<!\\)\()", r"(?:(?<!\\)\))"
if len(_re.findall(left_par, pattern)) == 0:
return pattern
temp = _re.sub(pattern=left_par + r"(?:[^\(\)]|\\(?:\(|\)))+" + right_par,
repl=repl, string=pattern)
return temp if temp == repl else remove_groups(temp, repl)
def __is_group(pattern: str) -> bool:
'''
Looks at the underlying pattern of this instance, and returns either \
``True`` or ``False``, depending on whether the provided RegEx pattern \
represents a group or not.
:param str pattern: The pattern that is to be examined.
'''
if pattern.startswith('(') and pattern.endswith(')'):
n_open = 0
for i in range(1, len(pattern) - 1):
prev_char, curr_char = pattern[i-1], pattern[i]
if prev_char != "\\":
if curr_char == ')':
if n_open == 0:
return False
else:
n_open -= 1
if curr_char == '(':
n_open += 1
return n_open == 0
return False
# Replace escaped backslashes with some other character.
pattern = _re.sub(r"\\{2}", "a", pattern)
if pattern == "":
return _Type.Empty, True
elif _re.fullmatch(r"\\?.", pattern, flags=__class__.__flags) is not None:
if _re.fullmatch(r"\.|\\(?:w|d|s)", pattern,
flags=__class__.__flags | _re.IGNORECASE) is not None:
return _Type.Class, True
elif _re.fullmatch(r"\\b", pattern,
flags=__class__.__flags | _re.IGNORECASE) is not None:
return _Type.Assertion, True
else:
return _Type.Token, True
# Simplify classes by removing extra characters.
pattern = _re.sub(r"\[.+?(?<!\\)\]", "[a]", pattern)
if pattern == "[a]":
return _Type.Class, True
elif __is_group(pattern):
return _Type.Group, True
# Replace every group with a simple character.
temp = remove_groups(pattern, repl="G")
if len(_re.split(pattern=r"(?<!\\)\|", string=temp)) > 1:
return _Type.Alternation, True
elif _re.fullmatch(r"(?:\^|\\A|\(\?<=.+\)).+|.+(?:\$|\\Z|\(\?=.+\))",
pattern, flags=__class__.__flags) is not None:
return _Type.Assertion, False
elif _re.fullmatch(r"(?:\\b|\\B|\(\?<!.+\)).+|.+(?:\\b|\\B|\(\?!.+\))",
pattern, flags=__class__.__flags) is not None:
return _Type.Assertion, True
elif _re.fullmatch(r"(?:\\.|[^\\])?(?:\?|\*|\+|\{(?:\d+|\d+,|,\d+|\d+,\d+)\})",
temp, flags=__class__.__flags) is not None:
return _Type.Quantifier, True
return _Type.Other, True
@staticmethod
def __extract_text(source: str) -> str:
'''
Reads and returns the text that is contained within the file \
to which the provided path points.
:param str source: The path pointing to the file from which the text \
is to be extracted.
'''
with open(file=source, mode='r', encoding='utf-8') as f:
text = f.read()
return text