dotfiles

My personal shell configs and stuff
git clone git://git.alex.balgavy.eu/dotfiles.git
Log | Files | Refs | Submodules | README | LICENSE

numbers.py (7621B)


      1 from talon import Context, Module, actions
      2 from typing import List, Optional, Union, Iterator
      3 
      4 mod = Module()
      5 ctx = Context()
      6 
      7 digits = "zero one two three four five six seven eight nine".split()
      8 teens = "eleven twelve thirteen fourteen fifteen sixteen seventeen eighteen nineteen".split()
      9 tens = "ten twenty thirty forty fifty sixty seventy eighty ninety".split()
     10 scales = "hundred thousand million billion trillion quadrillion quintillion sextillion septillion octillion nonillion decillion".split()
     11 
     12 digits_map = {n: i for i, n in enumerate(digits)}
     13 digits_map["oh"] = 0
     14 teens_map = {n: i + 11 for i, n in enumerate(teens)}
     15 tens_map = {n: 10 * (i + 1) for i, n in enumerate(tens)}
     16 scales_map = {n: 10 ** (3 * (i+1)) for i, n in enumerate(scales[1:])}
     17 scales_map["hundred"] = 100
     18 
     19 numbers_map = digits_map.copy()
     20 numbers_map.update(teens_map)
     21 numbers_map.update(tens_map)
     22 numbers_map.update(scales_map)
     23 
     24 def parse_number(l: List[str]) -> str:
     25     """Parses a list of words into a number/digit string."""
     26     l = list(scan_small_numbers(l))
     27     for scale in scales:
     28         l = parse_scale(scale, l)
     29     return "".join(str(n) for n in l)
     30 
     31 def scan_small_numbers(l: List[str]) -> Iterator[Union[str,int]]:
     32     """
     33     Takes a list of number words, yields a generator of mixed numbers & strings.
     34     Translates small number terms (<100) into corresponding numbers.
     35     Drops all occurrences of "and".
     36     Smashes digits onto tens words, eg. ["twenty", "one"] -> [21].
     37     But note that "ten" and "zero" are excluded, ie:
     38       ["ten", "three"] -> [10, 3]
     39       ["fifty", "zero"] -> [50, 0]
     40     Does nothing to scale words ("hundred", "thousand", "million", etc).
     41     """
     42     # reversed so that repeated pop() visits in left-to-right order
     43     l = [x for x in reversed(l) if x != "and"]
     44     while l:
     45         n = l.pop()
     46         # fuse tens onto digits, eg. "twenty", "one" -> 21
     47         if n in tens_map and n != "ten" and l and digits_map.get(l[-1], 0) != 0:
     48             d = l.pop()
     49             yield numbers_map[n] + numbers_map[d]
     50         # turn small number terms into corresponding numbers
     51         elif n not in scales_map:
     52             yield numbers_map[n]
     53         else:
     54             yield n
     55 
     56 def parse_scale(scale: str, l: List[Union[str,int]]) -> List[Union[str,int]]:
     57     """Parses a list of mixed numbers & strings for occurrences of the following
     58     pattern:
     59 
     60         <multiplier> <scale> <remainder>
     61 
     62     where <scale> is a scale word like "hundred", "thousand", "million", etc and
     63     multiplier and remainder are numbers or strings of numbers of the
     64     appropriate size. For example:
     65 
     66         parse_scale("hundred", [1, "hundred", 2]) -> [102]
     67         parse_scale("thousand", [12, "thousand", 3, 45]) -> [12345]
     68 
     69     We assume that all scales of lower magnitude have already been parsed; don't
     70     call parse_scale("thousand") until you've called parse_scale("hundred").
     71     """
     72     scale_value = scales_map[scale]
     73     scale_digits = len(str(scale_value))
     74 
     75     # Split the list on the desired scale word, then parse from left to right.
     76     left, *splits = split_list(scale, l)
     77     for right in splits:
     78         # (1) Figure out the multiplier by looking to the left of the scale
     79         # word. We ignore non-integers because they are scale words that we
     80         # haven't processed yet; this strategy means that "thousand hundred"
     81         # gets parsed as 1,100 instead of 100,000, but "hundred thousand" is
     82         # parsed correctly as 100,000.
     83         before = 1 # default multiplier
     84         if left and isinstance(left[-1], int) and left[-1] != 0:
     85             before = left.pop()
     86 
     87         # (2) Absorb numbers to the right, eg. in [1, "thousand", 1, 26], "1
     88         # thousand" absorbs ["1", "26"] to make 1,126. We pull numbers off
     89         # `right` until we fill up the desired number of digits.
     90         after = ""
     91         while right and isinstance(right[0], int):
     92             next = after + str(right[0])
     93             if len(next) >= scale_digits: break
     94             after = next
     95             right.pop(0)
     96         after = int(after) if after else 0
     97 
     98         # (3) Push the parsed number into place, append whatever was left
     99         # unparsed, and continue.
    100         left.append(before * scale_value + after)
    101         left.extend(right)
    102 
    103     return left
    104 
    105 def split_list(value, l: list) -> Iterator:
    106     """Splits a list by occurrences of a given value."""
    107     start = 0
    108     while True:
    109         try: i = l.index(value, start)
    110         except ValueError: break
    111         yield l[start:i]
    112         start = i+1
    113     yield l[start:]
    114 
    115 
    116 # # ---------- TESTS (uncomment to run) ----------
    117 # def test_number(expected, string):
    118 #     print('testing:', string)
    119 #     l = list(scan_small_numbers(string.split()))
    120 #     print("  scan --->", l)
    121 #     for scale in scales:
    122 #         old = l
    123 #         l = parse_scale(scale, l)
    124 #         if scale in old: print("  parse -->", l)
    125 #         else: assert old == l, "parse_scale should do nothing if the scale does not occur in the list"
    126 #     result = "".join(str(n) for n in l)
    127 #     assert result == parse_number(string.split())
    128 #     assert str(expected) == result, f"parsing {string!r}, expected {expected}, got {result}"
    129 
    130 # test_number(105000, "one hundred and five thousand")
    131 # test_number(1000000, "one thousand thousand")
    132 # test_number(1501000, "one million five hundred one thousand")
    133 # test_number(1501106, "one million five hundred and one thousand one hundred and six")
    134 # test_number(123, "one two three")
    135 # test_number(123, "one twenty three")
    136 # test_number(104, "ten four") # borderline, but valid in some dialects
    137 # test_number(1066, "ten sixty six") # a common way of saying years
    138 # test_number(1906, "nineteen oh six") # year
    139 # test_number(2001, "twenty oh one") # year
    140 # test_number(2020, "twenty twenty")
    141 # test_number(1001, "one thousand one")
    142 # test_number(1010, "one thousand ten")
    143 # test_number(123456, "one hundred and twenty three thousand and four hundred and fifty six")
    144 # test_number(123456, "one twenty three thousand four fifty six")
    145 
    146 # ## failing (and somewhat debatable) tests from old numbers.py
    147 # #test_number(10000011, "one million one one")
    148 # #test_number(100001010, "one million ten ten")
    149 # #test_number(1050006000, "one hundred thousand and five thousand and six thousand")
    150 
    151 
    152 # ---------- CAPTURES ----------
    153 alt_digits = "(" + ("|".join(digits_map.keys())) + ")"
    154 alt_teens = "(" + ("|".join(teens_map.keys())) + ")"
    155 alt_tens = "(" + ("|".join(tens_map.keys())) + ")"
    156 alt_scales = "(" + ("|".join(scales_map.keys())) + ")"
    157 number_word = "(" + "|".join(numbers_map.keys()) + ")"
    158 
    159 # TODO: allow things like "double eight" for 88
    160 @ctx.capture("digit_string", rule=f"({alt_digits} | {alt_teens} | {alt_tens})+")
    161 def digit_string(m) -> str: return parse_number(list(m))
    162 
    163 @ctx.capture("digits", rule="<digit_string>")
    164 def digits(m) -> int:
    165     """Parses a phrase representing a digit sequence, returning it as an integer."""
    166     return int(m.digit_string)
    167 
    168 @mod.capture(rule=f"{number_word}+ (and {number_word}+)*")
    169 def number_string(m) -> str:
    170     """Parses a number phrase, returning that number as a string."""
    171     return parse_number(list(m))
    172 
    173 @ctx.capture("number", rule="<user.number_string>")
    174 def number(m) -> int:
    175     """Parses a number phrase, returning it as an integer."""
    176     return int(m.number_string)
    177 
    178 @ctx.capture("number_signed", rule=f"[negative|minus] <number>")
    179 def number_signed(m):
    180     number = m[-1]
    181     return -number if (m[0] in ["negative", "minus"]) else number
    182 
    183 @ctx.capture(
    184     "number_small", rule=f"({alt_digits} | {alt_teens} | {alt_tens} [{alt_digits}])"
    185 )
    186 def number_small(m): return int(parse_number(list(m)))