dotfiles

My personal shell configs and stuff
git clone git://git.alex.balgavy.eu/dotfiles.git
Log | Files | Refs | Submodules | README | LICENSE

dictation.py (11817B)


      1 # Descended from https://github.com/dwiel/talon_community/blob/master/misc/dictation.py
      2 from talon import Module, Context, ui, actions, clip, app, grammar
      3 from typing import Optional, Tuple, Literal
      4 import re
      5 
      6 mod = Module()
      7 
      8 setting_context_sensitive_dictation = mod.setting(
      9     "context_sensitive_dictation",
     10     type=bool,
     11     default=False,
     12     desc="Look at surrounding text to improve auto-capitalization/spacing in dictation mode. By default, this works by selecting that text & copying it to the clipboard, so it may be slow or fail in some applications.",
     13 )
     14 
     15 @mod.capture(rule="({user.vocabulary} | <word>)")
     16 def word(m) -> str:
     17     """A single word, including user-defined vocabulary."""
     18     try:
     19         return m.vocabulary
     20     except AttributeError:
     21         return " ".join(actions.dictate.replace_words(actions.dictate.parse_words(m.word)))
     22 
     23 @mod.capture(rule="({user.vocabulary} | <phrase>)+")
     24 def text(m) -> str:
     25     """A sequence of words, including user-defined vocabulary."""
     26     return format_phrase(m)
     27 
     28 @mod.capture(rule="({user.vocabulary} | {user.punctuation} | <phrase>)+")
     29 def prose(m) -> str:
     30     """Mixed words and punctuation, auto-spaced & capitalized."""
     31     text, _state = auto_capitalize(format_phrase(m))
     32     return text
     33 
     34 
     35 # ---------- FORMATTING ---------- #
     36 def format_phrase(m):
     37     words = capture_to_words(m)
     38     result = ""
     39     for i, word in enumerate(words):
     40         if i > 0 and needs_space_between(words[i-1], word):
     41             result += " "
     42         result += word
     43     return result
     44 
     45 def capture_to_words(m):
     46     words = []
     47     for item in m:
     48         words.extend(
     49             actions.dictate.replace_words(actions.dictate.parse_words(item))
     50             if isinstance(item, grammar.vm.Phrase) else
     51             item.split(" "))
     52     return words
     53 
     54 # There must be a simpler way to do this, but I don't see it right now.
     55 no_space_after = re.compile(r"""
     56   (?:
     57     [\s\-_/#@([{‘“]     # characters that never need space after them
     58   | (?<!\w)[$£€¥₩₽₹]    # currency symbols not preceded by a word character
     59   # quotes preceded by beginning of string, space, opening braces, dash, or other quotes
     60   | (?: ^ | [\s([{\-'"] ) ['"]
     61   )$""", re.VERBOSE)
     62 no_space_before = re.compile(r"""
     63   ^(?:
     64     [\s\-_.,!?;:/%)\]}’”]   # characters that never need space before them
     65   | [$£€¥₩₽₹](?!\w)         # currency symbols not followed by a word character
     66   # quotes followed by end of string, space, closing braces, dash, other quotes, or some punctuation.
     67   | ['"] (?: $ | [\s)\]}\-'".,!?;:/] )
     68   )""", re.VERBOSE)
     69 
     70 # no_space_before = set("\n .,!?;:-_/%)]}")
     71 # no_space_after = set("\n -_/#@([{")
     72 def needs_space_between(before: str, after: str) -> bool:
     73     return (before and after
     74             and not no_space_after.search(before)
     75             and not no_space_before.search(after))
     76     # return (before != "" and after != ""
     77     #         and before[-1] not in no_space_after
     78     #         and after[0] not in no_space_before)
     79 
     80 # # TESTS, uncomment to enable
     81 # assert needs_space_between("a", "break")
     82 # assert needs_space_between("break", "a")
     83 # assert needs_space_between(".", "a")
     84 # assert needs_space_between("said", "'hello")
     85 # assert needs_space_between("hello'", "said")
     86 # assert needs_space_between("hello.", "'John")
     87 # assert needs_space_between("John.'", "They")
     88 # assert needs_space_between("paid", "$50")
     89 # assert needs_space_between("50$", "payment")
     90 # assert not needs_space_between("", "")
     91 # assert not needs_space_between("a", "")
     92 # assert not needs_space_between("a", " ")
     93 # assert not needs_space_between("", "a")
     94 # assert not needs_space_between(" ", "a")
     95 # assert not needs_space_between("a", ",")
     96 # assert not needs_space_between("'", "a")
     97 # assert not needs_space_between("a", "'")
     98 # assert not needs_space_between("and-", "or")
     99 # assert not needs_space_between("mary", "-kate")
    100 # assert not needs_space_between("$", "50")
    101 # assert not needs_space_between("US", "$")
    102 # assert not needs_space_between("(", ")")
    103 # assert not needs_space_between("(", "e.g.")
    104 # assert not needs_space_between("example", ")")
    105 # assert not needs_space_between("example", '".')
    106 # assert not needs_space_between("example", '."')
    107 # assert not needs_space_between("hello'", ".")
    108 # assert not needs_space_between("hello.", "'")
    109 
    110 def auto_capitalize(text, state = None):
    111     """
    112     Auto-capitalizes text. `state` argument means:
    113 
    114     - None: Don't capitalize initial word.
    115     - "sentence start": Capitalize initial word.
    116     - "after newline": Don't capitalize initial word, but we're after a newline.
    117       Used for double-newline detection.
    118 
    119     Returns (capitalized text, updated state).
    120     """
    121     output = ""
    122     # Imagine a metaphorical "capitalization charge" travelling through the
    123     # string left-to-right.
    124     charge = state == "sentence start"
    125     newline = state == "after newline"
    126     for c in text:
    127         # Sentence endings & double newlines create a charge.
    128         if c in ".!?" or (newline and c == "\n"):
    129             charge = True
    130         # Alphanumeric characters and commas/colons absorb charge & try to
    131         # capitalize (for numbers & punctuation this does nothing, which is what
    132         # we want).
    133         elif charge and (c.isalnum() or c in ",:"):
    134             charge = False
    135             c = c.capitalize()
    136         # Otherwise the charge just passes through.
    137         output += c
    138         newline = c == "\n"
    139     return output, ("sentence start" if charge else
    140                     "after newline" if newline else None)
    141 
    142 
    143 # ---------- DICTATION AUTO FORMATTING ---------- #
    144 class DictationFormat:
    145     def __init__(self):
    146         self.reset()
    147 
    148     def reset(self):
    149         self.before = ""
    150         self.state = "sentence start"
    151 
    152     def update_context(self, before):
    153         if before is None: return
    154         self.reset()
    155         self.pass_through(before)
    156 
    157     def pass_through(self, text):
    158         _, self.state = auto_capitalize(text, self.state)
    159         self.before = text or self.before
    160 
    161     def format(self, text):
    162         if needs_space_between(self.before, text):
    163             text = " " + text
    164         text, self.state = auto_capitalize(text, self.state)
    165         self.before = text or self.before
    166         return text
    167 
    168 dictation_formatter = DictationFormat()
    169 ui.register("app_deactivate", lambda app: dictation_formatter.reset())
    170 ui.register("win_focus", lambda win: dictation_formatter.reset())
    171 
    172 @mod.action_class
    173 class Actions:
    174     def dictation_format_reset():
    175         """Resets the dictation formatter"""
    176         return dictation_formatter.reset()
    177 
    178     def dictation_insert_raw(text: str):
    179         """Inserts text as-is, without invoking the dictation formatter."""
    180         dictation_formatter.pass_through(text)
    181         actions.insert(text)
    182 
    183     def dictation_insert(text: str) -> str:
    184         """Inserts dictated text, formatted appropriately."""
    185         # do_the_dance = whether we should try to be context-sensitive. Since
    186         # whitespace is not affected by formatter state, if text.isspace() is
    187         # True we don't need context-sensitivity.
    188         do_the_dance = (setting_context_sensitive_dictation.get()
    189                         and not text.isspace())
    190         if do_the_dance:
    191             dictation_formatter.update_context(
    192                 actions.user.dictation_peek_left(clobber=True))
    193         text = dictation_formatter.format(text)
    194         actions.user.add_phrase_to_history(text)
    195         actions.insert(text)
    196         # Add a space after cursor if necessary.
    197         if not do_the_dance or not text or no_space_after.search(text):
    198             return
    199         char = actions.user.dictation_peek_right()
    200         if char is not None and needs_space_between(text, char):
    201             actions.insert(" ")
    202             actions.edit.left()
    203 
    204     def dictation_peek_left(clobber: bool = False) -> Optional[str]:
    205         """
    206         Tries to get some text before the cursor, ideally a word or two, for the
    207         purpose of auto-spacing & -capitalization. Results are not guaranteed;
    208         dictation_peek_left() may return None to indicate no information. (Note
    209         that returning the empty string "" indicates there is nothing before
    210         cursor, ie. we are at the beginning of the document.)
    211 
    212         If there is currently a selection, dictation_peek_left() must leave it
    213         unchanged unless `clobber` is true, in which case it may clobber it.
    214         """
    215         # Get rid of the selection if it exists.
    216         if clobber: actions.user.clobber_selection_if_exists()
    217         # Otherwise, if there's a selection, fail.
    218         elif "" != actions.edit.selected_text(): return None
    219 
    220         # In principle the previous word should suffice, but some applications
    221         # have a funny concept of what the previous word is (for example, they
    222         # may only take the "`" at the end of "`foo`"). To be double sure we
    223         # take two words left. I also tried taking a line up + a word left, but
    224         # edit.extend_up() = key(shift-up) doesn't work consistently in the
    225         # Slack webapp (sometimes escapes the text box).
    226         actions.edit.extend_word_left()
    227         actions.edit.extend_word_left()
    228         text = actions.edit.selected_text()
    229         # if we're at the beginning of the document/text box, we may not have
    230         # selected any text, in which case we shouldn't move the cursor.
    231         if text:
    232             # Unfortunately, in web Slack, if our selection ends at newline,
    233             # this will go right over the newline. Argh.
    234             actions.edit.right()
    235         return text
    236 
    237     def clobber_selection_if_exists():
    238         """Deletes the currently selected text if it exists; otherwise does nothing."""
    239         actions.key("space backspace")
    240         # This space-backspace trick is fast and reliable but has the
    241         # side-effect of cluttering the undo history. Other options:
    242         #
    243         # 1. Call edit.cut() inside a clip.revert() block. This assumes
    244         #    edit.cut() is supported AND will be a no-op if there's no
    245         #    selection. Unfortunately, sometimes one or both of these is false,
    246         #    eg. the notion webapp makes ctrl-x cut the current block by default
    247         #    if nothing is selected.
    248         #
    249         # 2. Test whether a selection exists by asking whether
    250         #    edit.selected_text() is empty; if it does, use edit.delete(). This
    251         #    usually uses the clipboard, which can be quite slow. Also, not sure
    252         #    how this would interact with switching edit.selected_text() to use
    253         #    the selection clipboard on linux, which can be nonempty even if no
    254         #    text is selected in the current application.
    255         #
    256         # Perhaps this ought to be configurable by a setting.
    257 
    258     def dictation_peek_right() -> Optional[str]:
    259         """
    260         Tries to get a few characters after the cursor for auto-spacing.
    261         Results are not guaranteed; dictation_peek_right() may return None to
    262         indicate no information. (Note that returning the empty string ""
    263         indicates there is nothing after cursor, ie. we are at the end of the
    264         document.)
    265         """
    266         # We grab two characters because I think that's what no_space_before
    267         # needs in the worst case. An example where the second character matters
    268         # is inserting before (1) "' hello" vs (2) "'hello". In case (1) we
    269         # don't want to add space, in case (2) we do.
    270         actions.edit.extend_right()
    271         actions.edit.extend_right()
    272         after = actions.edit.selected_text()
    273         if after: actions.edit.left()
    274         return after
    275 
    276 # Use the dictation formatter in dictation mode.
    277 dictation_ctx = Context()
    278 dictation_ctx.matches = r"""
    279 mode: dictation
    280 """
    281 
    282 @dictation_ctx.action_class("main")
    283 class main_action:
    284     def auto_insert(text): actions.user.dictation_insert(text)