Source code for inflex.verb

#!/usr/bin/env python
# -*- coding: utf-8 -*-

import re
from typing import Optional, Pattern, Tuple

from inflex.syllable import Syllable
from inflex.term import Term
from inflex.verb_core import (
    is_plural,
    is_singular,
    is_past,
    is_pres_part,
    is_past_part,
    convert_to_singular,
    convert_to_plural,
    convert_to_past,
    convert_to_pres_part,
    convert_to_past_part,
    plural_of,
    singular_of,
    past_of,
    pres_part_of,
    past_part_of,
)


[docs]class Verb(Term):
    """Class for detecting and converting to verb forms."""

    _prefixes = (
        'counter',
        'trans',
        'cross',
        'inter',
        'under',
        'fore',
        'back',
        'over',
        # 'post',
        'out',
        'mis',
        'for',
        'dis',
        'way',
        # 'pre',
        # 'sub',
        'un',
        'in',
        # 'be',
        'up',
        're',
        # 'co',
        # 'de',
    )

    """
    Regexes to be tried before applying -ed or -ing.
    E.g. "argue" is converted to "argu" according to these regexes,
    and then "ing" or "ed" are appended for present participle,
    and past/past participle respectively.
    This produces "arguing" and "argued".
    """
    _stem_regexes = {
        # Words ending in "fer" always duplicate their consonant,
        # e.g. "transfer" -> "transferr" (+ "ed" or "ing")
        re.compile(r"fer\Z"): lambda match: "ferr",
        # Words ending in "c" will have an extra "k" appended before
        # -ed and -ing. One exception is "arc" -> "arced".
        re.compile(r"c\Z"): lambda match: "ck",
        # Words ending in "ie" will end in "y" before appending
        # -ed or -ing.
        re.compile(r"ie\Z"): lambda match: "y",
        # Words ending with "ski" don't change,
        # and then immediately have -ed or -ing appended.
        re.compile(r"ski\Z"): lambda match: "ski",
        # Words ending with "e" prepended by anything other than an "e"
        # have that "e" stripped. e.g. "argue" -> "argu"
        re.compile(r"(.)e\Z"): lambda match: match.group(1),
        # Words ending with "er" don't duplicate.
        re.compile(r"er\Z"): lambda match: match.group(),
        # Words ending with "en" don't duplicate,
        # unless the word is small, e.g. ken -> kenned, pen -> penned, yen -> yenned
        re.compile(r"..en\Z"): lambda match: match.group(),
        # Words ending with "on" don't duplicate.
        re.compile(r"(.[bdghklmnprstzy]on)\Z"): lambda match: match.group(1),
        # Always duplicate CVl (British English)
        re.compile(r"[^aeo][aeiuo]l\Z"): lambda match: match.group() + "l",
    }

    _stem_double_regex = re.compile(
        r"((?:[^aeiou]|^)[aeiouy]([bcdlgkmnprstvz]))\Z", re.I)

    # ---------------------------------- #
    # Override default methods from Term #
    # ---------------------------------- #

[docs]    def __init__(self, term: str): # pylint: disable=W0235
        """Creates a Verb instance with detection and conversion methods.

        Examples:
            >>> verb = Verb("fly")
            >>> verb.singular()
            'flies'
            >>> verb.past()
            'flew'
            >>> verb.past_part()
            'flying'
            >>> verb.pres_part()
            'flown'

            >>> verb.is_plural()
            True

        Note:
            Capitalisation and whitespace will be preserved between input `term` and
            generated output.

        Args:
            term (str): Input word or collocation.
        """
        super().__init__(term)

[docs]    def is_verb(self) -> bool:
        """Returns `True` only if this verb is instantiated via `Verb(term)`.

        Returns:
            bool: Returns `True` only if this verb is instantiated via `Verb(term)`.
        """
        return True

[docs]    def is_singular(self) -> bool:
        """Detect whether this verb is in singular form.

        Returns:
            bool: True if this verb is deemed singular.
        """
        # Get first word, last section of that word (if "-" in the word)
        term, _ = Verb.get_subterm(self.term)

        return is_singular(term)

[docs]    def is_plural(self) -> bool:
        """Detect whether this verb is in plural form.

        Returns:
            bool: True if this verb is deemed plural.
        """
        # Get first word, last section of that word (if "-" in the word)
        term, _ = Verb.get_subterm(self.term)

        return is_plural(term)

[docs]    def singular(self, person: int = 0) -> str: # pylint: disable=R0911
        """Returns this verb's singular form.

        Args:
            person (Optional[int], optional): Represents the grammatical "person" (1st, 2nd, 3rd).
                Defaults to 0.

        Returns:
            str: This verb's singular form.
        """
        self._check_valid_person(person)

        # "To be" is special
        if self.term.lower() in ["is", "am", "are"]:
            if person == 0:
                # "are" is already singular, e.g. "they are my friend",
                # but the expected result is "is", so we opt for that.
                if self.term.lower() == "are":
                    return self._encase("is")
                return self._reapply_whitespace(self.term)
            if person == 2 or not self.is_singular():
                return self._encase("are")
            if person == 1:
                return self._encase("am")
            return self._encase("is")

        # Third person uses the "notational" singular inflection
        if person in (3, 0):
            # Get first word, last section of that word (if "-" in the word)
            term, form = Verb.get_subterm(self.term)

            # If this term is in the list of known cases
            # TODO: # pylint: disable=W0511
            # - This partially overlaps with `known = convert_to_singular(term)` from below
            if term.lower() in singular_of:
                return self._encase(form.format(singular_of[term.lower()]))

            # Try splitting off a prefix
            prefix, subterm = Verb.split_prefix(term)
            if prefix:
                known = convert_to_singular(subterm)
                if known:
                    return self._encase(form.format(prefix + known))

            # Otherwise convert the first word, last section
            known = convert_to_singular(term)
            if known:
                return self._encase(form.format(known))

            # If all else fails, return the term
            return self._reapply_whitespace(self.term)

        # First and second person always use the uninflected (i.e. "notational plural" form)
        return self.plural()

[docs]    def plural(self, person: int = 0) -> str:
        """Returns this verb's plural form.

        Args:
            person (Optional[int], optional): Represents the grammatical "person" (1st, 2nd, 3rd).
                Defaults to 0.

        Returns:
            str: This verb's plural form.
        """
        self._check_valid_person(person)

        known = None
        # Get first word, last section of that word (if "-" in the word)
        term, form = Verb.get_subterm(self.term)

        # If this term is in the list of known cases
        # TODO: # pylint: disable=W0511
        # - This partially overlaps with `known = convert_to_singular(term)` from below
        if term.lower() in plural_of:
            return self._encase(form.format(plural_of[term.lower()]))

        # Try splitting off a prefix
        prefix, subterm = Verb.split_prefix(term)
        if prefix:
            known = convert_to_plural(subterm)
            if known:
                return self._encase(form.format(prefix + known))

        # Otherwise convert the first word, last section
        known = convert_to_plural(term)
        if known:
            return self._encase(form.format(known))

        # If all else fails, return the term
        return self._reapply_whitespace(self.term)

[docs]    def lemma(self) -> str:
        """Return this object's lemma form.

        Examples:
            >>> Verb('eating').lemma()
            'eat'

        Returns:
            str: This object's lemma form.
        """
        return self.plural()

[docs]    def as_regex(self) -> Pattern[str]:
        """Returns a `re.Pattern` which case-insensitively matches any inflected form of the verb.

        Returns:
            re.Pattern: Compiled regex object which case-insensitively matches any inflected form
                of the verb.

        Examples:
            >>> Verb('eat').as_regex()
            re.compile('eats|eating|eaten|eat|ate', re.IGNORECASE)
        """
        return re.compile("|".join(sorted(map(re.escape, {self.singular(), # type: ignore
                                                          self.plural(),
                                                          self.past(),
                                                          self.past_part(),
                                                          self.pres_part()
                                                          }), reverse=True)), flags=re.I)

    # ---------------------------- #
    # Methods exclusively for Verb #
    # ---------------------------- #

    @staticmethod
    def _stem(term: str) -> str:
        """Stem `term` so that "-ed"/"-ing" can be appended for past and present participle forms.

        Args:
            term (str): The input word to stem.

        Returns:
            str: The stemmed version of `term`, ready for appending "-ed" or "-ing".
        """
        if not term:
            return term

        # Utility method that adjusts final consonants when they need to be doubled in inflexions
        # Apply the first relevant transform
        for regex in Verb._stem_regexes:
            match = regex.search(term)
            if match:
                # Adding `term[match.end():]` is unnecessary for now,
                # but allows for more complex regexes.
                return term[:match.start()] + Verb._stem_regexes[regex](match) + term[match.end():]

        # Get the last word from the term, and remove a potential prefix
        last_word = term.replace("-", " ").split()[-1]
        _, last_word = Verb.split_prefix(last_word)

        # Get a set of known syllable counts for last_word
        syllable_count = Syllable.count_syllables(last_word)

        # Duplicate last letter if:
        if (
            # The word is certainly just one syllable, or
            1 in syllable_count
            # The word is just one syllable, or
            or (not syllable_count and Syllable.guess_if_one_syllable(last_word))
            # The last syllable is stressed
            or (Syllable.ends_with_stress(last_word))
        ) and Verb._stem_double_regex.search(term):  # AND the word ends in (roughly) CVC
            return term + term[-1]

        return term

[docs]    @staticmethod
    def split_prefix(term: str) -> Tuple[str, str]:
        """Split the prefix from the term.

        Examples:
            >>> Verb.split_prefix("unbind")
            ("un", "bind")
            >>> Verb.split_prefix("mistake")
            ("mis", "take")
            >>> Verb.split_prefix("reappear")
            ("re", "appear")
            >>> Verb.split_prefix("use")
            ("", "use")

        Args:
            term (str): The input word to potentially split a prefix from.

        Returns:
            Tuple[str, str]: The first string is the prefix, the second string is the remainder.
                If the input does not have a prefix to split, then the first string is empty,
                while the second string is the full input `term`.
        """
        if term.startswith(Verb._prefixes):
            for prefix in Verb._prefixes:
                if term.startswith(prefix) and len(term[len(prefix):]) > 1:
                    return prefix, term[len(prefix):]
        return "", term

[docs]    @staticmethod
    def get_subterm(term: str) -> Tuple[str, str]:
        """Extract last sub-section (split by '-') of the first word.

        Examples:
            >>> Verb.get_subterm("aaa-bbb ccc")
            ('bbb', 'aaa-{} ccc')

        Args:
            term (str): The input word to potentially split the subterm from.

        Returns:
            Tuple[str, str]: The first string is the format string, e.g. "aaa-{} ccc", while
                the second string is the last sub-section, e.g. "bbb".
        """
        form = "{}"
        # Split off first word
        try:
            index = term.index(" ")
            form += term[index:]
            term = term[:index]
        except ValueError:
            pass

        # Don't split if the word ends with a hyphen
        if term.endswith("-"):
            return term, form

        # Split off last sub-word of first word
        try:
            index = term.rindex("-") + 1
            form = term[:index] + form
            term = term[index:]
        except ValueError:
            pass
        return term, form

[docs]    def past(self) -> str:
        """Returns this Verb's past form.

        Examples:
            >>> verb = Verb("fly")
            >>> verb.past()
            "flew"

        Returns:
            str: This Verb's past form.
        """
        known = None
        # "To be" is special
        if self.term.lower() in ["is", "am"]:
            return self._encase("was")
        if self.term.lower() == "are":
            return self._encase("were")

        # Get first word, last section of that word (if "-" in the word)
        term, form = Verb.get_subterm(self.term)

        # If this term is in the list of known cases
        if term.lower() in past_of:
            return self._encase(form.format(past_of[term.lower()]))

        # Try splitting off a prefix
        prefix, subterm = Verb.split_prefix(term)
        if prefix:
            known = convert_to_past(subterm)
            if known:
                return self._encase(form.format(prefix + known))

        # Convert the root of the term
        root, form = Verb.get_subterm(self.plural())
        known = convert_to_past(root)
        if known:
            return self._encase(form.format(known))

        # Otherwise use the standard pattern on the root
        known = Verb._stem(root) + "ed"

        return self._encase(form.format(known))

[docs]    def pres_part(self) -> str:
        """Returns this Verb's present participle form.

        Examples:
            >>> verb = Verb("fly")
            >>> verb.pres_part()
            "flying"

        Returns:
            str: This Verb's present participle form.
        """
        known = None
        # If this term is in the list of known cases
        if self.term.lower() in pres_part_of:
            return self._encase(pres_part_of[self.term.lower()])

        # Get first word, last section of that word (if "-" in the word)
        term, form = Verb.get_subterm(self.plural())

        # Try splitting off a prefix
        prefix, subterm = Verb.split_prefix(term)
        if prefix:
            known = convert_to_pres_part(subterm)
            if known:
                return self._encase(form.format(prefix + known))

        # Convert the full (sub)term
        known = convert_to_pres_part(term)

        # Otherwise use the standard pattern on the root
        if known is None:
            known = Verb._stem(term) + "ing"

        return self._encase(form.format(known))

[docs]    def past_part(self) -> str:
        """Returns this Verb's past participle form.

        Examples:
            >>> verb = Verb("fly")
            >>> verb.pres_part()
            "flown"

        Returns:
            str: This Verb's past participle form.
        """
        known = None
        # If this term is in the list of known cases
        if self.term.lower() in past_part_of:
            return self._encase(past_part_of[self.term.lower()])

        # Get first word, last section of that word (if "-" in the word)
        term, form = Verb.get_subterm(self.plural())

        # Try splitting off a prefix
        prefix, subterm = Verb.split_prefix(term)
        if prefix:
            known = convert_to_past_part(subterm)
            if known:
                return self._encase(form.format(prefix + known))

        # Convert the full (sub)term
        known = convert_to_past_part(term)

        # Otherwise use the standard pattern on the root
        if known is None:
            known = Verb._stem(term) + "ed"

        return self._encase(form.format(known))

[docs]    def is_past(self) -> bool:
        """Detect whether this Verb is in past form.

        Returns:
            bool: True if this Verb is deemed past.
        """
        # Get first word, last section of that word (if "-" in the word)
        term, _ = Verb.get_subterm(self.term)

        return is_past(term)

[docs]    def is_pres_part(self) -> bool:
        """Detect whether this Verb is in present participle form.

        Returns:
            bool: True if this Verb is deemed present participle.
        """
        # Get first word, last section of that word (if "-" in the word)
        term, _ = Verb.get_subterm(self.term)

        return is_pres_part(term)

[docs]    def is_past_part(self) -> bool:
        """Detect whether this Verb is in past participle form.

        Returns:
            bool: True if this Verb is deemed past participle.
        """
        # Get first word, last section of that word (if "-" in the word)
        term, _ = Verb.get_subterm(self.term)

        return is_past_part(term)

[docs]    def indefinite(self, count: Optional[int] = 1) -> str:
        """Return the singular if `count` == 1, and the plural otherwise.

        Examples:
            >>> verb = Verb("fly")
            >>> verb.indefinite(count = 1)
            'flies'
            >>> verb.indefinite(count = 3)
            'fly'

        Args:
            count (Optional[int], optional): The number of objects on which this verb applies.
                Defaults to 1.

        Returns:
            str: The singular if `count` == 1, and the plural otherwise.
        """
        if count == 1:
            return self.singular()
        return self.plural()
Inflex

Documentation

Source code for inflex.verb