Source code for inflex.indefinite_core

#!/usr/bin/env python
# -*- coding: utf-8 -*-

import re

# This module implements A/AN inflexion for nouns

# Special cases of A/AN
ix = re.IGNORECASE | re.VERBOSE
xms = re.VERBOSE | re.MULTILINE | re.DOTALL
ORDINAL_AN = re.compile(r"\A [aefhilmnorsx]   -?th \Z", flags=ix)
ORDINAL_A = re.compile(r"\A [bcdgjkpqtuvwyz] -?th \Z", flags=ix)
EXPLICIT_AN = re.compile(
    r"\A (?: euler | hour(?!i) | heir | honest | hono )", flags=ix)
SINGLE_AN = re.compile(r"\A [aefhilmnorsx]   \Z", flags=ix)
SINGLE_A = re.compile(r"\A [bcdgjkpqtuvwyz] \Z", flags=ix)

# This pattern matches strings of capitals (i.e. abbreviations) that
# start with a "vowel-sound" consonant followed by another consonant,
# and which are not likely to be real words

ABBREV_AN = re.compile(r"""
    \A
    (?! FJO | [HLMNS]Y.  | RY[EO] | SQU
    |   ( F[LR]? | [HL] | MN? | N | RH? | S[CHKLMNPTVW]? | X(YL)?) [AEIOU]
    )
    [FHLMNRSX][A-Z]
""", flags=xms)

# This pattern codes the beginnings of all english words beginning with a
# 'Y' followed by a consonant. Any other Y-consonant prefix therefore
# implies an abbreviation...

INITIAL_Y_AN = re.compile(
    r"\A y (?: b[lor] | cl[ea] | fere | gg | p[ios] | rou | tt)", flags=ix)


[docs]def prepend_indefinite_article(word: str) -> str: """Prepend the indefinite article ("a" or "an") to `word`. Args: word (str): Input word or collocation. Returns: str: `word` prepended by "a" or "an". """ return f"{select_indefinite_article(word)} {word}"
[docs]def select_indefinite_article(word: str) -> str: """Return the correct indefinite article ("a" or "an") for `word`. Args: word (str): Input word or collocation. Returns: str: Either "a" or "an". """ # Handle ordinal forms: Single character followed by "-th" or "th", eg "A-th" -> "an A-th". if ORDINAL_A.match(word): return "a" if ORDINAL_AN.match(word): return "an" # Handle special cases: Special words (honest) or a single character, eg "a" -> "an a" if EXPLICIT_AN.match(word): return "an" if SINGLE_AN.match(word): return "an" if SINGLE_A.match(word): return "a" # Handle abbreviations if ABBREV_AN.match(word): return "an" if re.match(r"\A [aefhilmnorsx][.-]", word, flags=ix): return "an" if re.match(r"\A [a-z][.-]", word, flags=ix): return "a" # Handle consonants if re.match(r"\A [^aeiouy]", word, flags=ix): return "a" # Handle special vowel forms if re.match(r"\A e [uw]", word, flags=ix): return "a" if re.match(r"\A onc?e \b ", word, flags=ix): return "a" if re.match(r"\A uni (?: [^nmd] | mo)", word, flags=ix): return "a" if re.match(r"\A ut[th]", word, flags=ix): return "an" if re.match(r"\A u [bcfhjkqrst] [aeiou]", word, flags=ix): return "a" # Handle special capitals if re.match(r"\A U [NK] [AIEO]?", word, flags=re.X): return "a" # Handle vowels if re.match(r"\A [aeiou]", word, flags=ix): return "an" # Handle Y... (before certain consonants implies (unnaturalized) "I.." sound) if INITIAL_Y_AN.match(word): return "an" # Otherwise, guess "a" return "a"