Source code for inflex.verb

#!/usr/bin/env python
# -*- coding: utf-8 -*-

import re
from typing import Optional, Pattern, Tuple

from inflex.syllable import Syllable
from inflex.term import Term
from inflex.verb_core import (
    is_plural,
    is_singular,
    is_past,
    is_pres_part,
    is_past_part,
    convert_to_singular,
    convert_to_plural,
    convert_to_past,
    convert_to_pres_part,
    convert_to_past_part,
    plural_of,
    singular_of,
    past_of,
    pres_part_of,
    past_part_of,
)


[docs]class Verb(Term): """Class for detecting and converting to verb forms.""" _prefixes = ( 'counter', 'trans', 'cross', 'inter', 'under', 'fore', 'back', 'over', # 'post', 'out', 'mis', 'for', 'dis', 'way', # 'pre', # 'sub', 'un', 'in', # 'be', 'up', 're', # 'co', # 'de', ) """ Regexes to be tried before applying -ed or -ing. E.g. "argue" is converted to "argu" according to these regexes, and then "ing" or "ed" are appended for present participle, and past/past participle respectively. This produces "arguing" and "argued". """ _stem_regexes = { # Words ending in "fer" always duplicate their consonant, # e.g. "transfer" -> "transferr" (+ "ed" or "ing") re.compile(r"fer\Z"): lambda match: "ferr", # Words ending in "c" will have an extra "k" appended before # -ed and -ing. One exception is "arc" -> "arced". re.compile(r"c\Z"): lambda match: "ck", # Words ending in "ie" will end in "y" before appending # -ed or -ing. re.compile(r"ie\Z"): lambda match: "y", # Words ending with "ski" don't change, # and then immediately have -ed or -ing appended. re.compile(r"ski\Z"): lambda match: "ski", # Words ending with "e" prepended by anything other than an "e" # have that "e" stripped. e.g. "argue" -> "argu" re.compile(r"(.)e\Z"): lambda match: match.group(1), # Words ending with "er" don't duplicate. re.compile(r"er\Z"): lambda match: match.group(), # Words ending with "en" don't duplicate, # unless the word is small, e.g. ken -> kenned, pen -> penned, yen -> yenned re.compile(r"..en\Z"): lambda match: match.group(), # Words ending with "on" don't duplicate. re.compile(r"(.[bdghklmnprstzy]on)\Z"): lambda match: match.group(1), # Always duplicate CVl (British English) re.compile(r"[^aeo][aeiuo]l\Z"): lambda match: match.group() + "l", } _stem_double_regex = re.compile( r"((?:[^aeiou]|^)[aeiouy]([bcdlgkmnprstvz]))\Z", re.I) # ---------------------------------- # # Override default methods from Term # # ---------------------------------- #
[docs] def __init__(self, term: str): # pylint: disable=W0235 """Creates a Verb instance with detection and conversion methods. Examples: >>> verb = Verb("fly") >>> verb.singular() 'flies' >>> verb.past() 'flew' >>> verb.past_part() 'flying' >>> verb.pres_part() 'flown' >>> verb.is_plural() True Note: Capitalisation and whitespace will be preserved between input `term` and generated output. Args: term (str): Input word or collocation. """ super().__init__(term)
[docs] def is_verb(self) -> bool: """Returns `True` only if this verb is instantiated via `Verb(term)`. Returns: bool: Returns `True` only if this verb is instantiated via `Verb(term)`. """ return True
[docs] def is_singular(self) -> bool: """Detect whether this verb is in singular form. Returns: bool: True if this verb is deemed singular. """ # Get first word, last section of that word (if "-" in the word) term, _ = Verb.get_subterm(self.term) return is_singular(term)
[docs] def is_plural(self) -> bool: """Detect whether this verb is in plural form. Returns: bool: True if this verb is deemed plural. """ # Get first word, last section of that word (if "-" in the word) term, _ = Verb.get_subterm(self.term) return is_plural(term)
[docs] def singular(self, person: int = 0) -> str: # pylint: disable=R0911 """Returns this verb's singular form. Args: person (Optional[int], optional): Represents the grammatical "person" (1st, 2nd, 3rd). Defaults to 0. Returns: str: This verb's singular form. """ self._check_valid_person(person) # "To be" is special if self.term.lower() in ["is", "am", "are"]: if person == 0: # "are" is already singular, e.g. "they are my friend", # but the expected result is "is", so we opt for that. if self.term.lower() == "are": return self._encase("is") return self._reapply_whitespace(self.term) if person == 2 or not self.is_singular(): return self._encase("are") if person == 1: return self._encase("am") return self._encase("is") # Third person uses the "notational" singular inflection if person in (3, 0): # Get first word, last section of that word (if "-" in the word) term, form = Verb.get_subterm(self.term) # If this term is in the list of known cases # TODO: # pylint: disable=W0511 # - This partially overlaps with `known = convert_to_singular(term)` from below if term.lower() in singular_of: return self._encase(form.format(singular_of[term.lower()])) # Try splitting off a prefix prefix, subterm = Verb.split_prefix(term) if prefix: known = convert_to_singular(subterm) if known: return self._encase(form.format(prefix + known)) # Otherwise convert the first word, last section known = convert_to_singular(term) if known: return self._encase(form.format(known)) # If all else fails, return the term return self._reapply_whitespace(self.term) # First and second person always use the uninflected (i.e. "notational plural" form) return self.plural()
[docs] def plural(self, person: int = 0) -> str: """Returns this verb's plural form. Args: person (Optional[int], optional): Represents the grammatical "person" (1st, 2nd, 3rd). Defaults to 0. Returns: str: This verb's plural form. """ self._check_valid_person(person) known = None # Get first word, last section of that word (if "-" in the word) term, form = Verb.get_subterm(self.term) # If this term is in the list of known cases # TODO: # pylint: disable=W0511 # - This partially overlaps with `known = convert_to_singular(term)` from below if term.lower() in plural_of: return self._encase(form.format(plural_of[term.lower()])) # Try splitting off a prefix prefix, subterm = Verb.split_prefix(term) if prefix: known = convert_to_plural(subterm) if known: return self._encase(form.format(prefix + known)) # Otherwise convert the first word, last section known = convert_to_plural(term) if known: return self._encase(form.format(known)) # If all else fails, return the term return self._reapply_whitespace(self.term)
[docs] def lemma(self) -> str: """Return this object's lemma form. Examples: >>> Verb('eating').lemma() 'eat' Returns: str: This object's lemma form. """ return self.plural()
[docs] def as_regex(self) -> Pattern[str]: """Returns a `re.Pattern` which case-insensitively matches any inflected form of the verb. Returns: re.Pattern: Compiled regex object which case-insensitively matches any inflected form of the verb. Examples: >>> Verb('eat').as_regex() re.compile('eats|eating|eaten|eat|ate', re.IGNORECASE) """ return re.compile("|".join(sorted(map(re.escape, {self.singular(), # type: ignore self.plural(), self.past(), self.past_part(), self.pres_part() }), reverse=True)), flags=re.I)
# ---------------------------- # # Methods exclusively for Verb # # ---------------------------- # @staticmethod def _stem(term: str) -> str: """Stem `term` so that "-ed"/"-ing" can be appended for past and present participle forms. Args: term (str): The input word to stem. Returns: str: The stemmed version of `term`, ready for appending "-ed" or "-ing". """ if not term: return term # Utility method that adjusts final consonants when they need to be doubled in inflexions # Apply the first relevant transform for regex in Verb._stem_regexes: match = regex.search(term) if match: # Adding `term[match.end():]` is unnecessary for now, # but allows for more complex regexes. return term[:match.start()] + Verb._stem_regexes[regex](match) + term[match.end():] # Get the last word from the term, and remove a potential prefix last_word = term.replace("-", " ").split()[-1] _, last_word = Verb.split_prefix(last_word) # Get a set of known syllable counts for last_word syllable_count = Syllable.count_syllables(last_word) # Duplicate last letter if: if ( # The word is certainly just one syllable, or 1 in syllable_count # The word is just one syllable, or or (not syllable_count and Syllable.guess_if_one_syllable(last_word)) # The last syllable is stressed or (Syllable.ends_with_stress(last_word)) ) and Verb._stem_double_regex.search(term): # AND the word ends in (roughly) CVC return term + term[-1] return term
[docs] @staticmethod def split_prefix(term: str) -> Tuple[str, str]: """Split the prefix from the term. Examples: >>> Verb.split_prefix("unbind") ("un", "bind") >>> Verb.split_prefix("mistake") ("mis", "take") >>> Verb.split_prefix("reappear") ("re", "appear") >>> Verb.split_prefix("use") ("", "use") Args: term (str): The input word to potentially split a prefix from. Returns: Tuple[str, str]: The first string is the prefix, the second string is the remainder. If the input does not have a prefix to split, then the first string is empty, while the second string is the full input `term`. """ if term.startswith(Verb._prefixes): for prefix in Verb._prefixes: if term.startswith(prefix) and len(term[len(prefix):]) > 1: return prefix, term[len(prefix):] return "", term
[docs] @staticmethod def get_subterm(term: str) -> Tuple[str, str]: """Extract last sub-section (split by '-') of the first word. Examples: >>> Verb.get_subterm("aaa-bbb ccc") ('bbb', 'aaa-{} ccc') Args: term (str): The input word to potentially split the subterm from. Returns: Tuple[str, str]: The first string is the format string, e.g. "aaa-{} ccc", while the second string is the last sub-section, e.g. "bbb". """ form = "{}" # Split off first word try: index = term.index(" ") form += term[index:] term = term[:index] except ValueError: pass # Don't split if the word ends with a hyphen if term.endswith("-"): return term, form # Split off last sub-word of first word try: index = term.rindex("-") + 1 form = term[:index] + form term = term[index:] except ValueError: pass return term, form
[docs] def past(self) -> str: """Returns this Verb's past form. Examples: >>> verb = Verb("fly") >>> verb.past() "flew" Returns: str: This Verb's past form. """ known = None # "To be" is special if self.term.lower() in ["is", "am"]: return self._encase("was") if self.term.lower() == "are": return self._encase("were") # Get first word, last section of that word (if "-" in the word) term, form = Verb.get_subterm(self.term) # If this term is in the list of known cases if term.lower() in past_of: return self._encase(form.format(past_of[term.lower()])) # Try splitting off a prefix prefix, subterm = Verb.split_prefix(term) if prefix: known = convert_to_past(subterm) if known: return self._encase(form.format(prefix + known)) # Convert the root of the term root, form = Verb.get_subterm(self.plural()) known = convert_to_past(root) if known: return self._encase(form.format(known)) # Otherwise use the standard pattern on the root known = Verb._stem(root) + "ed" return self._encase(form.format(known))
[docs] def pres_part(self) -> str: """Returns this Verb's present participle form. Examples: >>> verb = Verb("fly") >>> verb.pres_part() "flying" Returns: str: This Verb's present participle form. """ known = None # If this term is in the list of known cases if self.term.lower() in pres_part_of: return self._encase(pres_part_of[self.term.lower()]) # Get first word, last section of that word (if "-" in the word) term, form = Verb.get_subterm(self.plural()) # Try splitting off a prefix prefix, subterm = Verb.split_prefix(term) if prefix: known = convert_to_pres_part(subterm) if known: return self._encase(form.format(prefix + known)) # Convert the full (sub)term known = convert_to_pres_part(term) # Otherwise use the standard pattern on the root if known is None: known = Verb._stem(term) + "ing" return self._encase(form.format(known))
[docs] def past_part(self) -> str: """Returns this Verb's past participle form. Examples: >>> verb = Verb("fly") >>> verb.pres_part() "flown" Returns: str: This Verb's past participle form. """ known = None # If this term is in the list of known cases if self.term.lower() in past_part_of: return self._encase(past_part_of[self.term.lower()]) # Get first word, last section of that word (if "-" in the word) term, form = Verb.get_subterm(self.plural()) # Try splitting off a prefix prefix, subterm = Verb.split_prefix(term) if prefix: known = convert_to_past_part(subterm) if known: return self._encase(form.format(prefix + known)) # Convert the full (sub)term known = convert_to_past_part(term) # Otherwise use the standard pattern on the root if known is None: known = Verb._stem(term) + "ed" return self._encase(form.format(known))
[docs] def is_past(self) -> bool: """Detect whether this Verb is in past form. Returns: bool: True if this Verb is deemed past. """ # Get first word, last section of that word (if "-" in the word) term, _ = Verb.get_subterm(self.term) return is_past(term)
[docs] def is_pres_part(self) -> bool: """Detect whether this Verb is in present participle form. Returns: bool: True if this Verb is deemed present participle. """ # Get first word, last section of that word (if "-" in the word) term, _ = Verb.get_subterm(self.term) return is_pres_part(term)
[docs] def is_past_part(self) -> bool: """Detect whether this Verb is in past participle form. Returns: bool: True if this Verb is deemed past participle. """ # Get first word, last section of that word (if "-" in the word) term, _ = Verb.get_subterm(self.term) return is_past_part(term)
[docs] def indefinite(self, count: Optional[int] = 1) -> str: """Return the singular if `count` == 1, and the plural otherwise. Examples: >>> verb = Verb("fly") >>> verb.indefinite(count = 1) 'flies' >>> verb.indefinite(count = 3) 'fly' Args: count (Optional[int], optional): The number of objects on which this verb applies. Defaults to 1. Returns: str: The singular if `count` == 1, and the plural otherwise. """ if count == 1: return self.singular() return self.plural()