epr epub reader - dotfiles - My personal shell configs and stuff

commit 2f0332063ea2b5d260b5358da6df5fa741d1b85d
parent 146a3bb9d27c9cea73fe6c1169c2dcd1d4ce461e
Author: Alex Balgavy <a.balgavy@gmail.com>
Date:   Mon, 25 Mar 2019 22:06:29 +0100

epr epub reader


Former-commit-id: e14b4a7dd262434942907c8e7bc9140c839d9cec
Diffstat:
A scripts/epr.py  | 433 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++

1 file changed, 433 insertions(+), 0 deletions(-)
diff --git a/scripts/epr.py b/scripts/epr.py
@@ -0,0 +1,432 @@
+#!/usr/bin/env python3
+"""
+Usage:
+    epr.py [EPUBFILE]
+
+Key binding:
+    Help            : ?
+    Quit            : q
+    Scroll down     : ARROW DOWN    j
+    Scroll up       : ARROW UP      k
+    Page down       : PGDN          J   SPC
+    Page up         : PGUP          K
+    Next chapter    : ARROW RIGHT   l
+    Prev chapter    : ARROW LEFT    h
+    Beginning of ch : HOME          g
+    End of ch       : END           G
+    Shrink          : -
+    Enlarge         : =
+    TOC             : t
+    Metadata        : m
+
+Source:
+    https://github.com/wustho/epr.git
+
+"""
+
+import curses
+import zipfile
+import locale
+import sys
+import re
+import os
+import textwrap
+import json
+import xml.etree.ElementTree as ET
+from urllib.parse import unquote
+from html.entities import html5
+
+locale.setlocale(locale.LC_ALL, "")
+# code = locale.getpreferredencoding()
+
+statefile = os.path.join(os.getenv("HOME"), ".config/.epr")
+if os.path.exists(statefile):
+    with open(statefile, "r") as f:
+        state = json.load(f)
+else:
+    state = {}
+
+# key bindings
+SCROLL_DOWN = {curses.KEY_DOWN, ord("j")}
+SCROLL_UP = {curses.KEY_UP, ord("k")}
+PAGE_DOWN = {curses.KEY_NPAGE, ord("J"), ord(" ")}
+PAGE_UP = {curses.KEY_PPAGE, ord("K")}
+CH_NEXT = {curses.KEY_RIGHT, ord("l")}
+CH_PREV = {curses.KEY_LEFT, ord("h")}
+CH_HOME = {curses.KEY_HOME, ord("g")}
+CH_END = {curses.KEY_END, ord("G")}
+SHRINK = ord("-")
+WIDEN = ord("=")
+META = ord("m")
+TOC = ord("t")
+FOLLOW = 10
+QUIT = {ord("q"), 3}
+HELP = {ord("?")}
+
+NS = {"DAISY" : "http://www.daisy.org/z3986/2005/ncx/",
+      "OPF" : "http://www.idpf.org/2007/opf",
+      "CONT" : "urn:oasis:names:tc:opendocument:xmlns:container",
+      "XHTML" : "http://www.w3.org/1999/xhtml",
+      "EPUB" : "http://www.idpf.org/2007/ops"}
+
+RIGHTPADDING = 2
+LINEPRSRV = 0 # default = 2
+
+class Epub:
+    def __init__(self, fileepub):
+        self.path = os.path.abspath(fileepub)
+        self.file = zipfile.ZipFile(fileepub, "r")
+        cont = ET.parse(self.file.open("META-INF/container.xml"))
+        self.rootfile = cont.find("CONT:rootfiles/CONT:rootfile", NS).attrib["full-path"]
+        self.rootdir = os.path.dirname(self.rootfile) + "/" if os.path.dirname(self.rootfile) != "" else ""
+        cont = ET.parse(self.file.open(self.rootfile))
+        # EPUB3
+        self.version = cont.getroot().get("version")
+        if self.version == "2.0":
+            self.toc = self.rootdir + cont.find("OPF:manifest/*[@id='ncx']", NS).get("href")
+        elif self.version == "3.0":
+            self.toc = self.rootdir + cont.find("OPF:manifest/*[@properties='nav']", NS).get("href")
+
+    def get_meta(self):
+        meta = []
+        # why self.file.read(self.rootfile) problematic
+        cont = ET.fromstring(self.file.open(self.rootfile).read()) 
+        for i in cont.findall("OPF:metadata/*", NS):
+            if i.text != None:
+                meta.append([re.sub("{.*?}", "", i.tag), i.text])
+        return meta
+
+    def get_contents(self):
+        contents = []
+        cont = ET.parse(self.file.open(self.rootfile)).getroot()
+        manifest = []
+        for i in cont.findall("OPF:manifest/*", NS):
+            # EPUB3
+            if i.get("id") != "ncx" and i.get("properties") != "nav":
+                manifest.append([
+                    i.get("id"),
+                    i.get("href")
+                ])
+            else:
+                toc = self.rootdir + unquote(i.get("href"))
+
+        spine = []
+        for i in cont.findall("OPF:spine/*", NS):
+            spine.append(i.get("idref"))
+        for i in spine:
+            for j in manifest:
+                if i == j[0]:
+                    contents.append(unquote(j[1]))
+                    manifest.remove(j)
+                    # TODO: test is break necessary
+                    break
+
+        namedcontents = []
+        toc = ET.parse(self.file.open(toc)).getroot()
+        # EPUB3
+        if self.version == "2.0":
+            navPoints = toc.findall("DAISY:navMap//DAISY:navPoint", NS)
+        elif self.version == "3.0":
+            navPoints = toc.findall("XHTML:body/XHTML:nav[@EPUB:type='toc']//XHTML:a", NS)
+        for i in contents:
+            name = "unknown"
+            for j in navPoints:
+                # EPUB3
+                if self.version == "2.0":
+                    if i == unquote(j.find("DAISY:content", NS).get("src")):
+                        name = j.find("DAISY:navLabel/DAISY:text", NS).text
+                        break
+                elif self.version == "3.0":
+                    if i == unquote(j.get("href")):
+                        name = "".join(list(j.itertext()))
+                        break
+
+            namedcontents.append([
+                name,
+                self.rootdir + i
+            ])
+
+        return namedcontents
+
+def toc(stdscr, ebook, index, width):
+    rows, cols = stdscr.getmaxyx()
+    hi, wi = rows - 4, cols - 4
+    Y, X = 2, 2
+    toc = curses.newwin(hi, wi, Y, X)
+    toc.box()
+    toc.keypad(True)
+    toc.addstr(1,2, "Table of Contents")
+    toc.addstr(2,2, "-----------------")
+    key_toc = 0
+
+    def pad(src, id, top=0):
+        pad = curses.newpad(len(src), wi - 2 )
+        pad.keypad(True)
+        pad.clear()
+        for i in range(len(src)):
+            if i == id:
+                pad.addstr(i, 0, "> " + src[i][0], curses.A_REVERSE)
+            else:
+                pad.addstr(i, 0, " " + src[i][0])
+        # scrolling up
+        if top == id and top > 0:
+            top = top - 1
+        # steady
+        elif id - top <= rows - Y -9:
+            top = top
+        # scrolling down
+        else:
+            top = id - rows + Y + 9
+
+        pad.refresh(top,0, Y+4,X+4, rows - 5, cols - 6)
+        return top
+
+    src = ebook.get_contents()
+    toc.refresh()
+    top = pad(src, index)
+
+    while key_toc != TOC and key_toc not in QUIT:
+        if key_toc in SCROLL_UP and index > 0:
+            index -= 1
+            top = pad(src, index, top)
+        if key_toc in SCROLL_DOWN and index + 1 < len(src):
+            index += 1
+            top = pad(src, index, top)
+        if key_toc == FOLLOW:
+            reader(stdscr, ebook, index, width, 0)
+        key_toc = toc.getch()
+
+    toc.clear()
+    toc.refresh()
+    return
+
+def meta(stdscr, ebook):
+    rows, cols = stdscr.getmaxyx()
+    hi, wi = rows - 4, cols - 4
+    Y, X = 2, 2
+    meta = curses.newwin(hi, wi, Y, X)
+    meta.box()
+    meta.keypad(True)
+    meta.addstr(1,2, "Metadata")
+    meta.addstr(2,2, "--------")
+    key_meta = 0
+
+    mdata = []
+    src = ""
+    for i in ebook.get_meta():
+        data = re.sub("<[^>]*>", "", i[1])
+        data = re.sub("\t", "", data)
+        mdata += textwrap.fill(i[0] + " : " + data, wi - 6).splitlines()
+    src_lines = mdata
+
+    pad = curses.newpad(len(src_lines), wi - 2 )
+    pad.keypad(True)
+    for i in range(len(src_lines)):
+        pad.addstr(i, 0, src_lines[i])
+    y = 0
+    meta.refresh()
+    pad.refresh(y,0, Y+4,X+4, rows - 5, cols - 6)
+
+    while key_meta != META and key_meta not in QUIT:
+        if key_meta in SCROLL_UP and y > 0:
+            y -= 1
+        if key_meta in SCROLL_DOWN and y < len(src_lines) - hi + 4:
+            y += 1
+        pad.refresh(y,0, 6,5, rows - 5, cols - 5)
+        key_meta = meta.getch()
+
+    meta.clear()
+    meta.refresh()
+    return
+
+def help(stdscr):
+    rows, cols = stdscr.getmaxyx()
+    hi, wi = rows - 4, cols - 4
+    Y, X = 2, 2
+    help = curses.newwin(hi, wi, Y, X)
+    help.box()
+    help.keypad(True)
+    help.addstr(1,2, "Help")
+    help.addstr(2,2, "----")
+    key_help = 0
+
+    src = __doc__
+    src_lines = src.split("\n")
+
+    pad = curses.newpad(len(src_lines), wi - 2 )
+    pad.keypad(True)
+    for i in range(len(src_lines)):
+        pad.addstr(i, 0, src_lines[i])
+    y = 0
+    help.refresh()
+    pad.refresh(y,0, Y+4,X+4, rows - 5, cols - 6)
+
+    while key_help not in HELP and key_help not in QUIT:
+        if key_help == SCROLL_UP and y > 0:
+            y -= 1
+        if key_help == SCROLL_DOWN and y < len(src_lines) - hi + 4:
+            y += 1
+        if key_help == curses.KEY_RESIZE:
+            break
+        pad.refresh(y,0, 6,5, rows - 5, cols - 5)
+        key_help = help.getch()
+
+    help.clear()
+    help.refresh()
+    return
+
+def to_text(src, width):
+    while True:
+        try:
+            root = ET.fromstring(src)
+            break
+        except Exception as ent:
+            ent = str(ent)
+            ent = re.search("(?<=undefined entity &).*?;(?=:)", ent).group()
+            src = re.sub("&" + ent, html5[ent], src.decode("utf-8")).encode("utf-8")
+        
+    body = root.find("XHTML:body", NS)
+    text = []
+    # for i in body.findall("*", NS):
+    # for i in body.findall(".//XHTML:p", NS):
+    for i in body.findall(".//*"):
+        if re.match("{"+NS["XHTML"]+"}h[0-9]", i.tag) != None:
+            for j in i.itertext():
+                text.append(j.rjust(width//2 + len(j)//2 - RIGHTPADDING))
+                text.append("")
+        elif re.match("{"+NS["XHTML"]+"}p", i.tag) != None:
+            par = ET.tostring(i, encoding="utf-8").decode("utf-8")
+            par = re.sub("<[^>]*>", "", par)
+            par = re.sub("\t", "", par)
+            par = textwrap.fill(par, width)
+            text += par.splitlines() + [""]
+
+    return text + [""]
+
+def reader(stdscr, ebook, index, width, y=0):
+    k = 0
+    rows, cols = stdscr.getmaxyx()
+    x = (cols - width) // 2
+    stdscr.clear()
+    stdscr.refresh()
+
+    content = ebook.file.open(ebook.get_contents()[index][1]).read()
+
+    src_lines = to_text(content, width)
+
+    pad = curses.newpad(len(src_lines), width + 2) # + 2 unnecessary
+    pad.keypad(True)
+    for i in range(len(src_lines)):
+        pad.addstr(i, 0, src_lines[i])
+    pad.addstr(i, width//2 - 10 - RIGHTPADDING, "-- End of Chapter --", curses.A_REVERSE)
+    pad.refresh(y,0, 0,x, rows-1,x+width)
+
+    while True:
+        # if k == QUIT or k == 3:
+        if k in QUIT:
+            for i in state:
+                state[i]["lastread"] = str(0)
+            state[ebook.path]["lastread"] = str(1)
+            state[ebook.path]["index"] = str(index)
+            state[ebook.path]["width"] = str(width)
+            state[ebook.path]["pos"] = str(y)
+            with open(statefile, "w") as f:
+                json.dump(state, f, indent=4)
+            exit()
+        if k in SCROLL_UP:
+            if y > 0:
+                y -= 1
+            # if y == 0 and index > 0:
+            #     reader(stdscr, ebook, index-1, width)
+        if k in PAGE_UP:
+            if y >= rows - LINEPRSRV:
+                y -= rows - LINEPRSRV
+            else:
+                y = 0
+        if k in SCROLL_DOWN:
+            if y < len(src_lines) - rows:
+                y += 1
+            # if y + rows >= len(src_lines):
+            #     reader(stdscr, ebook, index+1, width)
+        if k in PAGE_DOWN:
+            if y + rows - 2 <= len(src_lines) - rows:
+                y += rows - LINEPRSRV
+            else:
+                y = len(src_lines) - rows
+                if y < 0:
+                    y = 0
+        if k in CH_NEXT and index < len(ebook.get_contents()) - 1:
+            reader(stdscr, ebook, index+1, width)
+        if k in CH_PREV and index > 0:
+            reader(stdscr, ebook, index-1, width)
+        if k in CH_HOME:
+            y = 0
+        if k in CH_END:
+            y = len(src_lines) - rows
+            if y < 0:
+                y = 0
+        if k == TOC:
+            toc(stdscr, ebook, index, width)
+        if k == META:
+            meta(stdscr, ebook)
+        if k in HELP:
+            help(stdscr)
+        if k == WIDEN and (width + 2) < cols:
+            width += 2
+            reader(stdscr, ebook, index, width)
+            return
+        if k == SHRINK and width >= 22:
+            width -= 2
+            reader(stdscr, ebook, index, width)
+            return
+        if k == curses.KEY_RESIZE:
+            curses.resize_term(rows, cols)
+            rows, cols = stdscr.getmaxyx()
+            # TODO
+            if cols <= width:
+                width = cols - 2
+            reader(stdscr, ebook, index, width)
+
+        pad.refresh(y,0, 0,x, rows-1,x+width)
+        k = pad.getch()
+
+def main(stdscr, file):
+    stdscr.keypad(True)
+    curses.curs_set(0)
+    stdscr.clear()
+    stdscr.refresh()
+    rows, cols = stdscr.getmaxyx()
+    epub = Epub(file)
+
+    if epub.path in state:
+        idx = int(state[epub.path]["index"])
+        width = int(state[epub.path]["width"])
+        y = int(state[epub.path]["pos"])
+    else:
+        state[epub.path] = {}
+        idx = 1
+        y = 0
+        width = 80
+
+    if cols <= width:
+        width = cols - 2
+        y = 0
+    reader(stdscr, epub, idx, width, y)
+
+if __name__ == "__main__":
+    if len(sys.argv) == 1:
+        file = False
+        for i in state:
+            if not os.path.exists(i):
+                del state[i]
+            elif state[i]["lastread"] == str(1):
+                file = i
+        if not file:
+            print("ERROR: Found no last read file.")
+            print(__doc__)
+        else:
+            curses.wrapper(main, file)
+    elif len(sys.argv) == 2 and sys.argv[1] not in ("-h", "--help"):
+        curses.wrapper(main, sys.argv[1])
+    else:
+        print(__doc__)+
\ No newline at end of file

	dotfiles My personal shell configs and stuff
	git clone git://git.alex.balgavy.eu/dotfiles.git
	Log \| Files \| Refs \| Submodules \| README \| LICENSE