dictation.py (11817B)
1 # Descended from https://github.com/dwiel/talon_community/blob/master/misc/dictation.py 2 from talon import Module, Context, ui, actions, clip, app, grammar 3 from typing import Optional, Tuple, Literal 4 import re 5 6 mod = Module() 7 8 setting_context_sensitive_dictation = mod.setting( 9 "context_sensitive_dictation", 10 type=bool, 11 default=False, 12 desc="Look at surrounding text to improve auto-capitalization/spacing in dictation mode. By default, this works by selecting that text & copying it to the clipboard, so it may be slow or fail in some applications.", 13 ) 14 15 @mod.capture(rule="({user.vocabulary} | <word>)") 16 def word(m) -> str: 17 """A single word, including user-defined vocabulary.""" 18 try: 19 return m.vocabulary 20 except AttributeError: 21 return " ".join(actions.dictate.replace_words(actions.dictate.parse_words(m.word))) 22 23 @mod.capture(rule="({user.vocabulary} | <phrase>)+") 24 def text(m) -> str: 25 """A sequence of words, including user-defined vocabulary.""" 26 return format_phrase(m) 27 28 @mod.capture(rule="({user.vocabulary} | {user.punctuation} | <phrase>)+") 29 def prose(m) -> str: 30 """Mixed words and punctuation, auto-spaced & capitalized.""" 31 text, _state = auto_capitalize(format_phrase(m)) 32 return text 33 34 35 # ---------- FORMATTING ---------- # 36 def format_phrase(m): 37 words = capture_to_words(m) 38 result = "" 39 for i, word in enumerate(words): 40 if i > 0 and needs_space_between(words[i-1], word): 41 result += " " 42 result += word 43 return result 44 45 def capture_to_words(m): 46 words = [] 47 for item in m: 48 words.extend( 49 actions.dictate.replace_words(actions.dictate.parse_words(item)) 50 if isinstance(item, grammar.vm.Phrase) else 51 item.split(" ")) 52 return words 53 54 # There must be a simpler way to do this, but I don't see it right now. 55 no_space_after = re.compile(r""" 56 (?: 57 [\s\-_/#@([{‘“] # characters that never need space after them 58 | (?<!\w)[$£€¥₩₽₹] # currency symbols not preceded by a word character 59 # quotes preceded by beginning of string, space, opening braces, dash, or other quotes 60 | (?: ^ | [\s([{\-'"] ) ['"] 61 )$""", re.VERBOSE) 62 no_space_before = re.compile(r""" 63 ^(?: 64 [\s\-_.,!?;:/%)\]}’”] # characters that never need space before them 65 | [$£€¥₩₽₹](?!\w) # currency symbols not followed by a word character 66 # quotes followed by end of string, space, closing braces, dash, other quotes, or some punctuation. 67 | ['"] (?: $ | [\s)\]}\-'".,!?;:/] ) 68 )""", re.VERBOSE) 69 70 # no_space_before = set("\n .,!?;:-_/%)]}") 71 # no_space_after = set("\n -_/#@([{") 72 def needs_space_between(before: str, after: str) -> bool: 73 return (before and after 74 and not no_space_after.search(before) 75 and not no_space_before.search(after)) 76 # return (before != "" and after != "" 77 # and before[-1] not in no_space_after 78 # and after[0] not in no_space_before) 79 80 # # TESTS, uncomment to enable 81 # assert needs_space_between("a", "break") 82 # assert needs_space_between("break", "a") 83 # assert needs_space_between(".", "a") 84 # assert needs_space_between("said", "'hello") 85 # assert needs_space_between("hello'", "said") 86 # assert needs_space_between("hello.", "'John") 87 # assert needs_space_between("John.'", "They") 88 # assert needs_space_between("paid", "$50") 89 # assert needs_space_between("50$", "payment") 90 # assert not needs_space_between("", "") 91 # assert not needs_space_between("a", "") 92 # assert not needs_space_between("a", " ") 93 # assert not needs_space_between("", "a") 94 # assert not needs_space_between(" ", "a") 95 # assert not needs_space_between("a", ",") 96 # assert not needs_space_between("'", "a") 97 # assert not needs_space_between("a", "'") 98 # assert not needs_space_between("and-", "or") 99 # assert not needs_space_between("mary", "-kate") 100 # assert not needs_space_between("$", "50") 101 # assert not needs_space_between("US", "$") 102 # assert not needs_space_between("(", ")") 103 # assert not needs_space_between("(", "e.g.") 104 # assert not needs_space_between("example", ")") 105 # assert not needs_space_between("example", '".') 106 # assert not needs_space_between("example", '."') 107 # assert not needs_space_between("hello'", ".") 108 # assert not needs_space_between("hello.", "'") 109 110 def auto_capitalize(text, state = None): 111 """ 112 Auto-capitalizes text. `state` argument means: 113 114 - None: Don't capitalize initial word. 115 - "sentence start": Capitalize initial word. 116 - "after newline": Don't capitalize initial word, but we're after a newline. 117 Used for double-newline detection. 118 119 Returns (capitalized text, updated state). 120 """ 121 output = "" 122 # Imagine a metaphorical "capitalization charge" travelling through the 123 # string left-to-right. 124 charge = state == "sentence start" 125 newline = state == "after newline" 126 for c in text: 127 # Sentence endings & double newlines create a charge. 128 if c in ".!?" or (newline and c == "\n"): 129 charge = True 130 # Alphanumeric characters and commas/colons absorb charge & try to 131 # capitalize (for numbers & punctuation this does nothing, which is what 132 # we want). 133 elif charge and (c.isalnum() or c in ",:"): 134 charge = False 135 c = c.capitalize() 136 # Otherwise the charge just passes through. 137 output += c 138 newline = c == "\n" 139 return output, ("sentence start" if charge else 140 "after newline" if newline else None) 141 142 143 # ---------- DICTATION AUTO FORMATTING ---------- # 144 class DictationFormat: 145 def __init__(self): 146 self.reset() 147 148 def reset(self): 149 self.before = "" 150 self.state = "sentence start" 151 152 def update_context(self, before): 153 if before is None: return 154 self.reset() 155 self.pass_through(before) 156 157 def pass_through(self, text): 158 _, self.state = auto_capitalize(text, self.state) 159 self.before = text or self.before 160 161 def format(self, text): 162 if needs_space_between(self.before, text): 163 text = " " + text 164 text, self.state = auto_capitalize(text, self.state) 165 self.before = text or self.before 166 return text 167 168 dictation_formatter = DictationFormat() 169 ui.register("app_deactivate", lambda app: dictation_formatter.reset()) 170 ui.register("win_focus", lambda win: dictation_formatter.reset()) 171 172 @mod.action_class 173 class Actions: 174 def dictation_format_reset(): 175 """Resets the dictation formatter""" 176 return dictation_formatter.reset() 177 178 def dictation_insert_raw(text: str): 179 """Inserts text as-is, without invoking the dictation formatter.""" 180 dictation_formatter.pass_through(text) 181 actions.insert(text) 182 183 def dictation_insert(text: str) -> str: 184 """Inserts dictated text, formatted appropriately.""" 185 # do_the_dance = whether we should try to be context-sensitive. Since 186 # whitespace is not affected by formatter state, if text.isspace() is 187 # True we don't need context-sensitivity. 188 do_the_dance = (setting_context_sensitive_dictation.get() 189 and not text.isspace()) 190 if do_the_dance: 191 dictation_formatter.update_context( 192 actions.user.dictation_peek_left(clobber=True)) 193 text = dictation_formatter.format(text) 194 actions.user.add_phrase_to_history(text) 195 actions.insert(text) 196 # Add a space after cursor if necessary. 197 if not do_the_dance or not text or no_space_after.search(text): 198 return 199 char = actions.user.dictation_peek_right() 200 if char is not None and needs_space_between(text, char): 201 actions.insert(" ") 202 actions.edit.left() 203 204 def dictation_peek_left(clobber: bool = False) -> Optional[str]: 205 """ 206 Tries to get some text before the cursor, ideally a word or two, for the 207 purpose of auto-spacing & -capitalization. Results are not guaranteed; 208 dictation_peek_left() may return None to indicate no information. (Note 209 that returning the empty string "" indicates there is nothing before 210 cursor, ie. we are at the beginning of the document.) 211 212 If there is currently a selection, dictation_peek_left() must leave it 213 unchanged unless `clobber` is true, in which case it may clobber it. 214 """ 215 # Get rid of the selection if it exists. 216 if clobber: actions.user.clobber_selection_if_exists() 217 # Otherwise, if there's a selection, fail. 218 elif "" != actions.edit.selected_text(): return None 219 220 # In principle the previous word should suffice, but some applications 221 # have a funny concept of what the previous word is (for example, they 222 # may only take the "`" at the end of "`foo`"). To be double sure we 223 # take two words left. I also tried taking a line up + a word left, but 224 # edit.extend_up() = key(shift-up) doesn't work consistently in the 225 # Slack webapp (sometimes escapes the text box). 226 actions.edit.extend_word_left() 227 actions.edit.extend_word_left() 228 text = actions.edit.selected_text() 229 # if we're at the beginning of the document/text box, we may not have 230 # selected any text, in which case we shouldn't move the cursor. 231 if text: 232 # Unfortunately, in web Slack, if our selection ends at newline, 233 # this will go right over the newline. Argh. 234 actions.edit.right() 235 return text 236 237 def clobber_selection_if_exists(): 238 """Deletes the currently selected text if it exists; otherwise does nothing.""" 239 actions.key("space backspace") 240 # This space-backspace trick is fast and reliable but has the 241 # side-effect of cluttering the undo history. Other options: 242 # 243 # 1. Call edit.cut() inside a clip.revert() block. This assumes 244 # edit.cut() is supported AND will be a no-op if there's no 245 # selection. Unfortunately, sometimes one or both of these is false, 246 # eg. the notion webapp makes ctrl-x cut the current block by default 247 # if nothing is selected. 248 # 249 # 2. Test whether a selection exists by asking whether 250 # edit.selected_text() is empty; if it does, use edit.delete(). This 251 # usually uses the clipboard, which can be quite slow. Also, not sure 252 # how this would interact with switching edit.selected_text() to use 253 # the selection clipboard on linux, which can be nonempty even if no 254 # text is selected in the current application. 255 # 256 # Perhaps this ought to be configurable by a setting. 257 258 def dictation_peek_right() -> Optional[str]: 259 """ 260 Tries to get a few characters after the cursor for auto-spacing. 261 Results are not guaranteed; dictation_peek_right() may return None to 262 indicate no information. (Note that returning the empty string "" 263 indicates there is nothing after cursor, ie. we are at the end of the 264 document.) 265 """ 266 # We grab two characters because I think that's what no_space_before 267 # needs in the worst case. An example where the second character matters 268 # is inserting before (1) "' hello" vs (2) "'hello". In case (1) we 269 # don't want to add space, in case (2) we do. 270 actions.edit.extend_right() 271 actions.edit.extend_right() 272 after = actions.edit.selected_text() 273 if after: actions.edit.left() 274 return after 275 276 # Use the dictation formatter in dictation mode. 277 dictation_ctx = Context() 278 dictation_ctx.matches = r""" 279 mode: dictation 280 """ 281 282 @dictation_ctx.action_class("main") 283 class main_action: 284 def auto_insert(text): actions.user.dictation_insert(text)