epr.py (13159B)
1 #!/usr/bin/env python3 2 """ 3 Usage: 4 epr.py [EPUBFILE] 5 6 Key binding: 7 Help : ? 8 Quit : q 9 Scroll down : ARROW DOWN j 10 Scroll up : ARROW UP k 11 Page down : PGDN J SPC 12 Page up : PGUP K 13 Next chapter : ARROW RIGHT l 14 Prev chapter : ARROW LEFT h 15 Beginning of ch : HOME g 16 End of ch : END G 17 Shrink : - 18 Enlarge : = 19 TOC : t 20 Metadata : m 21 22 Source: 23 https://github.com/wustho/epr.git 24 25 """ 26 27 import curses 28 import zipfile 29 import locale 30 import sys 31 import re 32 import os 33 import textwrap 34 import json 35 import xml.etree.ElementTree as ET 36 from urllib.parse import unquote 37 from html.entities import html5 38 39 locale.setlocale(locale.LC_ALL, "") 40 # code = locale.getpreferredencoding() 41 42 statefile = os.path.join(os.getenv("HOME"), ".config/.epr") 43 if os.path.exists(statefile): 44 with open(statefile, "r") as f: 45 state = json.load(f) 46 else: 47 state = {} 48 49 # key bindings 50 SCROLL_DOWN = {curses.KEY_DOWN, ord("j")} 51 SCROLL_UP = {curses.KEY_UP, ord("k")} 52 PAGE_DOWN = {curses.KEY_NPAGE, ord("J"), ord(" ")} 53 PAGE_UP = {curses.KEY_PPAGE, ord("K")} 54 CH_NEXT = {curses.KEY_RIGHT, ord("l")} 55 CH_PREV = {curses.KEY_LEFT, ord("h")} 56 CH_HOME = {curses.KEY_HOME, ord("g")} 57 CH_END = {curses.KEY_END, ord("G")} 58 SHRINK = ord("-") 59 WIDEN = ord("=") 60 META = ord("m") 61 TOC = ord("t") 62 FOLLOW = 10 63 QUIT = {ord("q"), 3} 64 HELP = {ord("?")} 65 66 NS = {"DAISY" : "http://www.daisy.org/z3986/2005/ncx/", 67 "OPF" : "http://www.idpf.org/2007/opf", 68 "CONT" : "urn:oasis:names:tc:opendocument:xmlns:container", 69 "XHTML" : "http://www.w3.org/1999/xhtml", 70 "EPUB" : "http://www.idpf.org/2007/ops"} 71 72 RIGHTPADDING = 2 73 LINEPRSRV = 0 # default = 2 74 75 class Epub: 76 def __init__(self, fileepub): 77 self.path = os.path.abspath(fileepub) 78 self.file = zipfile.ZipFile(fileepub, "r") 79 cont = ET.parse(self.file.open("META-INF/container.xml")) 80 self.rootfile = cont.find("CONT:rootfiles/CONT:rootfile", NS).attrib["full-path"] 81 self.rootdir = os.path.dirname(self.rootfile) + "/" if os.path.dirname(self.rootfile) != "" else "" 82 cont = ET.parse(self.file.open(self.rootfile)) 83 # EPUB3 84 self.version = cont.getroot().get("version") 85 if self.version == "2.0": 86 self.toc = self.rootdir + cont.find("OPF:manifest/*[@id='ncx']", NS).get("href") 87 elif self.version == "3.0": 88 self.toc = self.rootdir + cont.find("OPF:manifest/*[@properties='nav']", NS).get("href") 89 90 def get_meta(self): 91 meta = [] 92 # why self.file.read(self.rootfile) problematic 93 cont = ET.fromstring(self.file.open(self.rootfile).read()) 94 for i in cont.findall("OPF:metadata/*", NS): 95 if i.text != None: 96 meta.append([re.sub("{.*?}", "", i.tag), i.text]) 97 return meta 98 99 def get_contents(self): 100 contents = [] 101 cont = ET.parse(self.file.open(self.rootfile)).getroot() 102 manifest = [] 103 for i in cont.findall("OPF:manifest/*", NS): 104 # EPUB3 105 if i.get("id") != "ncx" and i.get("properties") != "nav": 106 manifest.append([ 107 i.get("id"), 108 i.get("href") 109 ]) 110 else: 111 toc = self.rootdir + unquote(i.get("href")) 112 113 spine = [] 114 for i in cont.findall("OPF:spine/*", NS): 115 spine.append(i.get("idref")) 116 for i in spine: 117 for j in manifest: 118 if i == j[0]: 119 contents.append(unquote(j[1])) 120 manifest.remove(j) 121 # TODO: test is break necessary 122 break 123 124 namedcontents = [] 125 toc = ET.parse(self.file.open(toc)).getroot() 126 # EPUB3 127 if self.version == "2.0": 128 navPoints = toc.findall("DAISY:navMap//DAISY:navPoint", NS) 129 elif self.version == "3.0": 130 navPoints = toc.findall("XHTML:body/XHTML:nav[@EPUB:type='toc']//XHTML:a", NS) 131 for i in contents: 132 name = "unknown" 133 for j in navPoints: 134 # EPUB3 135 if self.version == "2.0": 136 if i == unquote(j.find("DAISY:content", NS).get("src")): 137 name = j.find("DAISY:navLabel/DAISY:text", NS).text 138 break 139 elif self.version == "3.0": 140 if i == unquote(j.get("href")): 141 name = "".join(list(j.itertext())) 142 break 143 144 namedcontents.append([ 145 name, 146 self.rootdir + i 147 ]) 148 149 return namedcontents 150 151 def toc(stdscr, ebook, index, width): 152 rows, cols = stdscr.getmaxyx() 153 hi, wi = rows - 4, cols - 4 154 Y, X = 2, 2 155 toc = curses.newwin(hi, wi, Y, X) 156 toc.box() 157 toc.keypad(True) 158 toc.addstr(1,2, "Table of Contents") 159 toc.addstr(2,2, "-----------------") 160 key_toc = 0 161 162 def pad(src, id, top=0): 163 pad = curses.newpad(len(src), wi - 2 ) 164 pad.keypad(True) 165 pad.clear() 166 for i in range(len(src)): 167 if i == id: 168 pad.addstr(i, 0, "> " + src[i][0], curses.A_REVERSE) 169 else: 170 pad.addstr(i, 0, " " + src[i][0]) 171 # scrolling up 172 if top == id and top > 0: 173 top = top - 1 174 # steady 175 elif id - top <= rows - Y -9: 176 top = top 177 # scrolling down 178 else: 179 top = id - rows + Y + 9 180 181 pad.refresh(top,0, Y+4,X+4, rows - 5, cols - 6) 182 return top 183 184 src = ebook.get_contents() 185 toc.refresh() 186 top = pad(src, index) 187 188 while key_toc != TOC and key_toc not in QUIT: 189 if key_toc in SCROLL_UP and index > 0: 190 index -= 1 191 top = pad(src, index, top) 192 if key_toc in SCROLL_DOWN and index + 1 < len(src): 193 index += 1 194 top = pad(src, index, top) 195 if key_toc == FOLLOW: 196 reader(stdscr, ebook, index, width, 0) 197 key_toc = toc.getch() 198 199 toc.clear() 200 toc.refresh() 201 return 202 203 def meta(stdscr, ebook): 204 rows, cols = stdscr.getmaxyx() 205 hi, wi = rows - 4, cols - 4 206 Y, X = 2, 2 207 meta = curses.newwin(hi, wi, Y, X) 208 meta.box() 209 meta.keypad(True) 210 meta.addstr(1,2, "Metadata") 211 meta.addstr(2,2, "--------") 212 key_meta = 0 213 214 mdata = [] 215 src = "" 216 for i in ebook.get_meta(): 217 data = re.sub("<[^>]*>", "", i[1]) 218 data = re.sub("\t", "", data) 219 mdata += textwrap.fill(i[0] + " : " + data, wi - 6).splitlines() 220 src_lines = mdata 221 222 pad = curses.newpad(len(src_lines), wi - 2 ) 223 pad.keypad(True) 224 for i in range(len(src_lines)): 225 pad.addstr(i, 0, src_lines[i]) 226 y = 0 227 meta.refresh() 228 pad.refresh(y,0, Y+4,X+4, rows - 5, cols - 6) 229 230 while key_meta != META and key_meta not in QUIT: 231 if key_meta in SCROLL_UP and y > 0: 232 y -= 1 233 if key_meta in SCROLL_DOWN and y < len(src_lines) - hi + 4: 234 y += 1 235 pad.refresh(y,0, 6,5, rows - 5, cols - 5) 236 key_meta = meta.getch() 237 238 meta.clear() 239 meta.refresh() 240 return 241 242 def help(stdscr): 243 rows, cols = stdscr.getmaxyx() 244 hi, wi = rows - 4, cols - 4 245 Y, X = 2, 2 246 help = curses.newwin(hi, wi, Y, X) 247 help.box() 248 help.keypad(True) 249 help.addstr(1,2, "Help") 250 help.addstr(2,2, "----") 251 key_help = 0 252 253 src = __doc__ 254 src_lines = src.split("\n") 255 256 pad = curses.newpad(len(src_lines), wi - 2 ) 257 pad.keypad(True) 258 for i in range(len(src_lines)): 259 pad.addstr(i, 0, src_lines[i]) 260 y = 0 261 help.refresh() 262 pad.refresh(y,0, Y+4,X+4, rows - 5, cols - 6) 263 264 while key_help not in HELP and key_help not in QUIT: 265 if key_help == SCROLL_UP and y > 0: 266 y -= 1 267 if key_help == SCROLL_DOWN and y < len(src_lines) - hi + 4: 268 y += 1 269 if key_help == curses.KEY_RESIZE: 270 break 271 pad.refresh(y,0, 6,5, rows - 5, cols - 5) 272 key_help = help.getch() 273 274 help.clear() 275 help.refresh() 276 return 277 278 def to_text(src, width): 279 while True: 280 try: 281 root = ET.fromstring(src) 282 break 283 except Exception as ent: 284 ent = str(ent) 285 ent = re.search("(?<=undefined entity &).*?;(?=:)", ent).group() 286 src = re.sub("&" + ent, html5[ent], src.decode("utf-8")).encode("utf-8") 287 288 body = root.find("XHTML:body", NS) 289 text = [] 290 # for i in body.findall("*", NS): 291 # for i in body.findall(".//XHTML:p", NS): 292 for i in body.findall(".//*"): 293 if re.match("{"+NS["XHTML"]+"}h[0-9]", i.tag) != None: 294 for j in i.itertext(): 295 text.append(j.rjust(width//2 + len(j)//2 - RIGHTPADDING)) 296 text.append("") 297 elif re.match("{"+NS["XHTML"]+"}p", i.tag) != None: 298 par = ET.tostring(i, encoding="utf-8").decode("utf-8") 299 par = re.sub("<[^>]*>", "", par) 300 par = re.sub("\t", "", par) 301 par = textwrap.fill(par, width) 302 text += par.splitlines() + [""] 303 304 return text + [""] 305 306 def reader(stdscr, ebook, index, width, y=0): 307 k = 0 308 rows, cols = stdscr.getmaxyx() 309 x = (cols - width) // 2 310 stdscr.clear() 311 stdscr.refresh() 312 313 content = ebook.file.open(ebook.get_contents()[index][1]).read() 314 315 src_lines = to_text(content, width) 316 317 pad = curses.newpad(len(src_lines), width + 2) # + 2 unnecessary 318 pad.keypad(True) 319 for i in range(len(src_lines)): 320 pad.addstr(i, 0, src_lines[i]) 321 pad.addstr(i, width//2 - 10 - RIGHTPADDING, "-- End of Chapter --", curses.A_REVERSE) 322 pad.refresh(y,0, 0,x, rows-1,x+width) 323 324 while True: 325 # if k == QUIT or k == 3: 326 if k in QUIT: 327 for i in state: 328 state[i]["lastread"] = str(0) 329 state[ebook.path]["lastread"] = str(1) 330 state[ebook.path]["index"] = str(index) 331 state[ebook.path]["width"] = str(width) 332 state[ebook.path]["pos"] = str(y) 333 with open(statefile, "w") as f: 334 json.dump(state, f, indent=4) 335 exit() 336 if k in SCROLL_UP: 337 if y > 0: 338 y -= 1 339 # if y == 0 and index > 0: 340 # reader(stdscr, ebook, index-1, width) 341 if k in PAGE_UP: 342 if y >= rows - LINEPRSRV: 343 y -= rows - LINEPRSRV 344 else: 345 y = 0 346 if k in SCROLL_DOWN: 347 if y < len(src_lines) - rows: 348 y += 1 349 # if y + rows >= len(src_lines): 350 # reader(stdscr, ebook, index+1, width) 351 if k in PAGE_DOWN: 352 if y + rows - 2 <= len(src_lines) - rows: 353 y += rows - LINEPRSRV 354 else: 355 y = len(src_lines) - rows 356 if y < 0: 357 y = 0 358 if k in CH_NEXT and index < len(ebook.get_contents()) - 1: 359 reader(stdscr, ebook, index+1, width) 360 if k in CH_PREV and index > 0: 361 reader(stdscr, ebook, index-1, width) 362 if k in CH_HOME: 363 y = 0 364 if k in CH_END: 365 y = len(src_lines) - rows 366 if y < 0: 367 y = 0 368 if k == TOC: 369 toc(stdscr, ebook, index, width) 370 if k == META: 371 meta(stdscr, ebook) 372 if k in HELP: 373 help(stdscr) 374 if k == WIDEN and (width + 2) < cols: 375 width += 2 376 reader(stdscr, ebook, index, width) 377 return 378 if k == SHRINK and width >= 22: 379 width -= 2 380 reader(stdscr, ebook, index, width) 381 return 382 if k == curses.KEY_RESIZE: 383 curses.resize_term(rows, cols) 384 rows, cols = stdscr.getmaxyx() 385 # TODO 386 if cols <= width: 387 width = cols - 2 388 reader(stdscr, ebook, index, width) 389 390 pad.refresh(y,0, 0,x, rows-1,x+width) 391 k = pad.getch() 392 393 def main(stdscr, file): 394 stdscr.keypad(True) 395 curses.curs_set(0) 396 stdscr.clear() 397 stdscr.refresh() 398 rows, cols = stdscr.getmaxyx() 399 epub = Epub(file) 400 401 if epub.path in state: 402 idx = int(state[epub.path]["index"]) 403 width = int(state[epub.path]["width"]) 404 y = int(state[epub.path]["pos"]) 405 else: 406 state[epub.path] = {} 407 idx = 1 408 y = 0 409 width = 80 410 411 if cols <= width: 412 width = cols - 2 413 y = 0 414 reader(stdscr, epub, idx, width, y) 415 416 if __name__ == "__main__": 417 if len(sys.argv) == 1: 418 file = False 419 for i in state: 420 if not os.path.exists(i): 421 del state[i] 422 elif state[i]["lastread"] == str(1): 423 file = i 424 if not file: 425 print("ERROR: Found no last read file.") 426 print(__doc__) 427 else: 428 curses.wrapper(main, file) 429 elif len(sys.argv) == 2 and sys.argv[1] not in ("-h", "--help"): 430 curses.wrapper(main, sys.argv[1]) 431 else: 432 print(__doc__)