#!/usr/bin/env python
# -*- coding: utf-8 -*-
import re
from typing import Dict, List, Optional, Pattern, Union
from inflex.term import Term
from inflex.noun_core import (
is_singular,
is_plural,
convert_to_classical_plural,
convert_to_modern_plural,
convert_to_singular,
)
from inflex.indefinite_core import (
select_indefinite_article,
prepend_indefinite_article,
)
[docs]class Noun(Term):
"""Class for detecting and converting to noun forms."""
_noun_inflection: Dict[str, Dict[str, Dict[str, Union[str, int, List[str]]]]] = {
# CASE
# TERM 0TH 1ST 2ND 3RD
"nominative": {
"i": {
"number": "singular",
"person": 1,
"singular": ["I", "I", "you", "it"],
"plural": ["we", "we", "you", "they"],
},
"you": {
"number": "singular",
"person": 2,
"singular": ["you", "I", "you", "it"],
"plural": ["you", "we", "you", "they"],
},
"she": {
"number": "singular",
"person": 3,
"singular": ["she", "I", "you", "she"],
"plural": ["they", "we", "you", "they"],
},
"he": {
"number": "singular",
"person": 3,
"singular": ["he", "I", "you", "he"],
"plural": ["they", "we", "you", "they"],
},
"it": {
"number": "singular",
"person": 3,
"singular": ["it", "I", "you", "it"],
"plural": ["they", "we", "you", "they"],
},
"we": {
"number": "plural",
"person": 1,
"singular": ["I", "I", "you", "it"],
"plural": ["we", "we", "you", "they"],
},
"they": {
"number": "plural",
"person": 3,
"singular": ["it", "I", "you", "it"],
"plural": ["they", "we", "you", "they"],
},
"one": {
"number": "singular",
"person": 3,
"singular": ["one", "I", "you", "one"],
"plural": ["some", "we", "you", "some"],
},
"this": {
"number": "singular",
"person": 3,
"singular": ["this", "this", "this", "this"],
"plural": ["these", "these", "these", "these"],
},
"that": {
"number": "singular",
"person": 3,
"singular": ["that", "that", "that", "that"],
"plural": ["those", "those", "those", "those"],
},
"these": {
"number": "plural",
"person": 3,
"singular": ["this", "this", "this", "this"],
"plural": ["these", "these", "these", "these"],
},
"those": {
"number": "plural",
"person": 3,
"singular": ["that", "that", "that", "that"],
"plural": ["those", "those", "those", "those"],
},
"who": {
"number": "singular",
"person": 3,
"singular": ["who", "who", "who", "who"],
"plural": ["who", "who", "who", "who"],
},
"whoever": {
"number": "singular",
"person": 3,
"singular": ["whoever", "whoever", "whoever", "whoever"],
"plural": ["whoever", "whoever", "whoever", "whoever"],
},
"whosoever": {
"number": "singular",
"person": 3,
"singular": ["whosoever", "whosoever", "whosoever", "whosoever"],
"plural": ["whosoever", "whosoever", "whosoever", "whosoever"],
},
},
# CASE
# TERM 0TH 1ST 2ND 3RD
"objective": {
"me": {
"number": "singular",
"person": 1,
"singular": ["me", "me", "you", "it"],
"plural": ["us", "us", "you", "them"],
},
"you": {
"number": "singular",
"person": 2,
"singular": ["you", "me", "you", "it"],
"plural": ["you", "us", "you", "them"],
},
"her": {
"number": "singular",
"person": 3,
"singular": ["her", "me", "you", "her"],
"plural": ["them", "us", "you", "them"],
},
"him": {
"number": "singular",
"person": 3,
"singular": ["him", "me", "you", "him"],
"plural": ["them", "us", "you", "them"],
},
"it": {
"number": "singular",
"person": 3,
"singular": ["it", "me", "you", "it"],
"plural": ["them", "us", "you", "them"],
},
"one": {
"number": "singular",
"person": 3,
"singular": ["one", "me", "you", "one"],
"plural": ["some", "us", "you", "some"],
},
"us": {
"number": "plural",
"person": 1,
"singular": ["me", "me", "you", "it"],
"plural": ["us", "us", "you", "them"],
},
"them": {
"number": "plural",
"person": 3,
"singular": ["it", "me", "you", "it"],
"plural": ["them", "us", "you", "them"],
},
"this": {
"number": "singular",
"person": 3,
"singular": ["this", "this", "this", "this"],
"plural": ["these", "these", "these", "these"],
},
"that": {
"number": "singular",
"person": 3,
"singular": ["that", "that", "that", "that"],
"plural": ["those", "those", "those", "those"],
},
"these": {
"number": "plural",
"person": 3,
"singular": ["this", "this", "this", "this"],
"plural": ["these", "these", "these", "these"],
},
"those": {
"number": "plural",
"person": 3,
"singular": ["that", "that", "that", "that"],
"plural": ["those", "those", "those", "those"],
},
"whom": {
"number": "singular",
"person": 3,
"singular": ["whom", "whom", "whom", "whom"],
"plural": ["whom", "whom", "whom", "whom"],
},
"whomever": {
"number": "singular",
"person": 3,
"singular": ["whomever", "whomever", "whomever", "whomever"],
"plural": ["whomever", "whomever", "whomever", "whomever"],
},
"whomsoever": {
"number": "singular",
"person": 3,
"singular": ["whomsoever", "whomsoever", "whomsoever", "whomsoever"],
"plural": ["whomsoever", "whomsoever", "whomsoever", "whomsoever"],
},
},
# CASE
# TERM 0TH 1ST 2ND 3RD
"possessive": {
"mine": {
"number": "singular",
"person": 1,
"singular": ["mine", "mine", "yours", "its"],
"plural": ["ours", "ours", "yours", "theirs"],
},
"yours": {
"number": "singular",
"person": 2,
"singular": ["yours", "mine", "yours", "its"],
"plural": ["yours", "ours", "yours", "theirs"],
},
"hers": {
"number": "singular",
"person": 3,
"singular": ["hers", "mine", "yours", "hers"],
"plural": ["theirs", "ours", "yours", "theirs"],
},
"his": {
"number": "singular",
"person": 3,
"singular": ["his", "mine", "yours", "his"],
"plural": ["theirs", "ours", "yours", "theirs"],
},
"its": {
"number": "singular",
"person": 3,
"singular": ["its", "mine", "yours", "its"],
"plural": ["theirs", "ours", "yours", "theirs"],
},
"one's": {
"number": "singular",
"person": 3,
"singular": ["one's", "mine", "yours", "one's"],
"plural": ["theirs", "ours", "yours", "theirs"],
},
"ours": {
"number": "plural",
"person": 1,
"singular": ["mine", "mine", "yours", "its"],
"plural": ["ours", "ours", "yours", "theirs"],
},
"theirs": {
"number": "plural",
"person": 3,
"singular": ["its", "mine", "yours", "its"],
"plural": ["theirs", "ours", "yours", "theirs"],
},
"whose": {
"number": "singular",
"person": 3,
"singular": ["whose", "whose", "whose", "whose"],
"plural": ["whose", "whose", "whose", "whose"],
},
"whosever": {
"number": "singular",
"person": 3,
"singular": ["whosever", "whosever", "whosever", "whosever"],
"plural": ["whosever", "whosever", "whosever", "whosever"],
},
"whosesoever": {
"number": "singular",
"person": 3,
"singular": ["whosesoever", "whosesoever", "whosesoever", "whosesoever"],
"plural": ["whosesoever", "whosesoever", "whosesoever", "whosesoever"]
},
},
# CASE
# TERM 0TH 1ST 2ND 3RD
"reflexive": {
"myself": {
"number": "singular",
"person": 1,
"singular": ["myself", "myself", "yourself", "itself"],
"plural": ["ourselves", "ourselves", "yourselves", "themselves"],
},
"yourself": {
"number": "singular",
"person": 2,
"singular": ["yourself", "myself", "yourself", "itself"],
"plural": ["yourselves", "ourselves", "yourselves", "themselves"],
},
"herself": {
"number": "singular",
"person": 3,
"singular": ["herself", "myself", "yourself", "herself"],
"plural": ["themselves", "ourselves", "yourselves", "themselves"],
},
"himself": {
"number": "singular",
"person": 3,
"singular": ["himself", "myself", "yourself", "himself"],
"plural": ["themselves", "ourselves", "yourselves", "themselves"],
},
"themself": {
"number": "singular",
"person": 3,
"singular": ["themselves", "myself", "yourself", "themselves"],
"plural": ["themselves", "ourselves", "yourselves", "themselves"],
},
"itself": {
"number": "singular",
"person": 3,
"singular": ["itself", "myself", "yourself", "itself"],
"plural": ["themselves", "ourselves", "yourselves", "themselves"],
},
"oneself": {
"number": "singular",
"person": 3,
"singular": ["oneself", "myself", "yourself", "oneself"],
"plural": ["oneselves", "ourselves", "yourselves", "oneselves"],
},
"ourselves": {
"number": "plural",
"person": 1,
"singular": ["myself", "myself", "yourself", "itself"],
"plural": ["ourselves", "ourselves", "yourselves", "themselves"],
},
"yourselves": {
"number": "plural",
"person": 2,
"singular": ["yourself", "myself", "yourself", "itself"],
"plural": ["yourselves", "ourselves", "yourselves", "themselves"],
},
"themselves": {
"number": "plural",
"person": 3,
"singular": ["itself", "myself", "yourself", "themselves"],
"plural": ["themselves", "ourselves", "yourselves", "themselves"],
},
"oneselves": {
"number": "plural",
"person": 3,
"singular": ["oneself", "myself", "yourself", "oneself"],
"plural": ["oneselves", "ourselves", "yourselves", "oneselves"],
},
},
}
# Regex to detect a preposition
_prep_regex = re.compile(
r"""\A ( \s*(?:
about | above | across | after | among | around | athwart
| at | before | behind | below | beneath | besides?
| between | betwixt | beyond | but | by | during
| except | for | from | into | in | near | off
| of | onto | on | out | over | since | till
| to | under | until | unto | upon | within | without | with
)\s+)
""", flags=re.IGNORECASE | re.VERBOSE
)
[docs] def __init__(self, term: str):
"""Creates a Noun instance with detection and conversion methods.
Examples:
>>> noun = Noun("brother")
>>> noun.plural()
'brothers'
>>> noun.classical().plural()
'brethren'
>>> noun.is_singular()
True
Note:
Capitalisation and whitespace will be preserved between input `term` and
generated output.
Args:
term (str): Input word or collocation.
"""
super().__init__(term)
# Cached classical form of this Noun, to be lazily loaded just once.
self._classical: Optional[ClassicalNoun] = None
# ---------------------------------- #
# Override default methods from Term #
# ---------------------------------- #
[docs] def is_noun(self) -> bool:
"""Returns `True` only if this noun is instantiated via `Noun(term)`.
Returns:
bool: Returns `True` only if this noun is instantiated via `Noun(term)`.
"""
return True
[docs] def is_singular(self) -> bool:
"""Detect whether this noun is in singular form.
Returns:
bool: True if this noun is deemed singular.
"""
return is_singular(self.term)
[docs] def is_plural(self) -> bool:
"""Detect whether this noun is in plural form.
Returns:
bool: True if this noun is deemed plural.
"""
return is_plural(self.term)
[docs] def singular(self, person: int = 0) -> str:
"""Returns this noun's singular form.
Args:
person (Optional[int], optional): Represents the grammatical "person" (1st, 2nd, 3rd).
This option only affects personal and possessive pronouns. Defaults to 0.
Returns:
str: This noun's singular form.
"""
self._check_valid_person(person)
match = Noun._prep_regex.match(self.term)
if match:
prep = match.group()
term = self.term[match.end():]
for case in ["objective", "possessive", "reflexive", "nominative"]:
if term.lower() in Noun._noun_inflection[case]:
converted = Noun._noun_inflection[case][term.lower()]["singular"][person] # type: ignore
return self._encase(prep + converted)
return self._encase(prep + convert_to_singular(term))
for case in ["nominative", "objective", "possessive", "reflexive"]:
if self.term.lower() in Noun._noun_inflection[case]:
converted = Noun._noun_inflection[case][self.term.lower()]["singular"][person] # type: ignore
return self._encase(converted)
return self._encase(convert_to_singular(self.term))
[docs] def plural(self, person: int = 0) -> str:
"""Returns this noun's plural form.
Args:
person (Optional[int], optional): Represents the grammatical "person" (1st, 2nd, 3rd).
This option only affects personal and possessive pronouns. Defaults to 0.
Returns:
str: This noun's plural form.
"""
self._check_valid_person(person)
match = Noun._prep_regex.match(self.term)
if match:
prep = match.group()
term = self.term[match.end():]
for case in ["objective", "possessive", "reflexive", "nominative"]:
if term.lower() in Noun._noun_inflection[case]:
converted = Noun._noun_inflection[case][term.lower()]["plural"][person] # type: ignore
return self._encase(prep + converted)
return self._encase(prep + self._convert_to_plural(term))
for case in ["nominative", "objective", "possessive", "reflexive"]:
if self.term.lower() in Noun._noun_inflection[case]:
converted = Noun._noun_inflection[case][self.term.lower()]["plural"][person] # type: ignore
return self._encase(converted)
return self._encase(self._convert_to_plural(self.term))
def _convert_to_plural(self, term) -> str: # pylint: disable=R0201
"""The convert to plural call used by this class. Is overridden for classical nouns.
Args:
term (term): The input word or collocation.
Returns:
str: The plural form of `term`.
"""
return convert_to_modern_plural(term)
[docs] def classical(self) -> "ClassicalNoun":
"""Returns an object always inflecting in the classical/unassimilated manner.
Examples:
>>> Noun('cow').plural()
'cows'
>>> Noun('cow').classical().plural()
'kine'
Note:
Identical to `unassimilated()`.
Returns:
ClassicalNoun: A Noun object that pluralises according to classical rules.
"""
if self._classical:
return self._classical
# "them" is an exception, as "it -> they" and "it -> them" is ambiguous
if self.term and self.term.split()[-1].lower() in ["them", "they"]:
self._classical = ClassicalNoun(self._encase(self.term), self)
else:
# TODO: # pylint: disable=W0511
# - self.singular() versus self.term
# - Prevent needing to encase and then re-encase
self._classical = ClassicalNoun(self._encase(self.term), self)
return self._classical
[docs] def lemma(self) -> str:
"""Return this object's lemma form.
Examples:
>>> Noun('books').lemma()
'book'
Returns:
str: This object's lemma form.
"""
return self.singular()
[docs] def as_regex(self) -> Pattern[str]:
"""Returns a `re.Pattern` which case-insensitively matches any inflected form of the word.
Returns:
re.Pattern: Compiled regex object which case-insensitively matches any inflected form
of the word.
Examples:
>>> Noun('cherub').as_regex()
re.compile('cherubs|cherubim|cherub', re.IGNORECASE)
"""
return re.compile("|".join(sorted(map(re.escape, {self.singular(), # type: ignore
self.plural(),
self.classical().plural()
}), reverse=True)), flags=re.I)
# ---------------------------- #
# Methods exclusively for Noun #
# ---------------------------- #
[docs] def indef_article(self) -> str:
"""Return the correct indefinite article ("a" or "an") for `word`.
Args:
word (str): Input word or collocation.
Returns:
str: Either "a" or "an".
"""
return select_indefinite_article(self.term)
[docs] def indefinite(self, count: Optional[int] = 1) -> str:
"""Prepend "a" or "an" or the number to the correct form of this Noun.
Examples:
>>> noun = Noun("book")
>>> noun.indefinite(count = 1)
'a book'
>>> noun.indefinite(count = 3)
'3 books'
TODO: self.term versus self.singular()
Args:
count (Optional[int], optional): The number of objects on which this verb applies.
Defaults to 1.
Returns:
str: The singular if `count` == 1, and the plural otherwise.
"""
if count == 1:
return prepend_indefinite_article(self.singular())
return f"{count} {self.plural()}"
[docs]class ClassicalNoun(Noun):
"""Subclass of noun for detecting and converting to noun forms, with a classical plural."""
[docs] def __init__(self, term: str, modern: Noun) -> None:
"""Creates ClassicalNoun instance with detection and conversion methods.
Note:
Capitalisation and whitespace will be preserved between input `term` and
output generated via e.g. `singular`.
The main difference to Noun is that the `plural` method is overridden.
Another difference is that this class is generally initialized using the
singular form of the Noun. `as_regex()` and `__repr__()` are also overridden.
Args:
term (str): Input word or collocation.
modern (Noun): The Noun object from which `classical()` or `unassimilated` was
called to create this object.
"""
super().__init__(term)
# Modern form of this Classical noun
self._modern = modern
def _convert_to_plural(self, term) -> str:
"""The convert to classical plural call used by this class.
Args:
term (term): The input word or collocation.
Returns:
str: The plural form of `term`.
"""
return convert_to_classical_plural(term)
[docs] def classical(self) -> "ClassicalNoun":
"""Returns an object always inflecting in the classical/unassimilated manner.
Examples:
>>> Noun('cow').plural()
'cows'
>>> Noun('cow').unassimilated().plural()
'kine'
Note:
Identical to `unassimilated()`.
Returns:
Term: A Term object, or a subclass thereof.
"""
return self
[docs] def modern(self) -> "Noun":
"""Returns the non-classical modern version of this Noun.
Examples:
>>> noun = Noun('cow')
>>> noun == noun.classical().modern()
True
Returns:
Noun: The Noun object that will pluralize according to modern rules.
"""
return self._modern
[docs] def as_regex(self) -> Pattern[str]:
"""Returns a `re.Pattern` which case-insensitively matches any inflected form of the word.
Returns:
re.Pattern: Compiled regex object which case-insensitively matches any inflected form
of the word.
Examples:
>>> Noun('brother').classical().as_regex()
re.compile('brother|brethren', re.IGNORECASE)
"""
return re.compile("|".join(sorted(map(re.escape, {self.singular(), # type: ignore
self.plural(),
}), reverse=True)), flags=re.I)
def __repr__(self) -> str:
"""Return `repr(self)`.
Examples:
>>> noun = Noun("book").classical()
>>> f"My noun: {noun!r}"
"My noun: Noun('book').classical()"
"""
return f"{self._modern!r}.classical()"