dotfiles

My personal shell configs and stuff
git clone git://git.alex.balgavy.eu/dotfiles.git
Log | Files | Refs | Submodules | README | LICENSE

epr.py (13159B)


      1 #!/usr/bin/env python3
      2 """
      3 Usage:
      4     epr.py [EPUBFILE]
      5 
      6 Key binding:
      7     Help            : ?
      8     Quit            : q
      9     Scroll down     : ARROW DOWN    j
     10     Scroll up       : ARROW UP      k
     11     Page down       : PGDN          J   SPC
     12     Page up         : PGUP          K
     13     Next chapter    : ARROW RIGHT   l
     14     Prev chapter    : ARROW LEFT    h
     15     Beginning of ch : HOME          g
     16     End of ch       : END           G
     17     Shrink          : -
     18     Enlarge         : =
     19     TOC             : t
     20     Metadata        : m
     21 
     22 Source:
     23     https://github.com/wustho/epr.git
     24 
     25 """
     26 
     27 import curses
     28 import zipfile
     29 import locale
     30 import sys
     31 import re
     32 import os
     33 import textwrap
     34 import json
     35 import xml.etree.ElementTree as ET
     36 from urllib.parse import unquote
     37 from html.entities import html5
     38 
     39 locale.setlocale(locale.LC_ALL, "")
     40 # code = locale.getpreferredencoding()
     41 
     42 statefile = os.path.join(os.getenv("HOME"), ".config/.epr")
     43 if os.path.exists(statefile):
     44     with open(statefile, "r") as f:
     45         state = json.load(f)
     46 else:
     47     state = {}
     48 
     49 # key bindings
     50 SCROLL_DOWN = {curses.KEY_DOWN, ord("j")}
     51 SCROLL_UP = {curses.KEY_UP, ord("k")}
     52 PAGE_DOWN = {curses.KEY_NPAGE, ord("J"), ord(" ")}
     53 PAGE_UP = {curses.KEY_PPAGE, ord("K")}
     54 CH_NEXT = {curses.KEY_RIGHT, ord("l")}
     55 CH_PREV = {curses.KEY_LEFT, ord("h")}
     56 CH_HOME = {curses.KEY_HOME, ord("g")}
     57 CH_END = {curses.KEY_END, ord("G")}
     58 SHRINK = ord("-")
     59 WIDEN = ord("=")
     60 META = ord("m")
     61 TOC = ord("t")
     62 FOLLOW = 10
     63 QUIT = {ord("q"), 3}
     64 HELP = {ord("?")}
     65 
     66 NS = {"DAISY" : "http://www.daisy.org/z3986/2005/ncx/",
     67       "OPF" : "http://www.idpf.org/2007/opf",
     68       "CONT" : "urn:oasis:names:tc:opendocument:xmlns:container",
     69       "XHTML" : "http://www.w3.org/1999/xhtml",
     70       "EPUB" : "http://www.idpf.org/2007/ops"}
     71 
     72 RIGHTPADDING = 2
     73 LINEPRSRV = 0 # default = 2
     74 
     75 class Epub:
     76     def __init__(self, fileepub):
     77         self.path = os.path.abspath(fileepub)
     78         self.file = zipfile.ZipFile(fileepub, "r")
     79         cont = ET.parse(self.file.open("META-INF/container.xml"))
     80         self.rootfile = cont.find("CONT:rootfiles/CONT:rootfile", NS).attrib["full-path"]
     81         self.rootdir = os.path.dirname(self.rootfile) + "/" if os.path.dirname(self.rootfile) != "" else ""
     82         cont = ET.parse(self.file.open(self.rootfile))
     83         # EPUB3
     84         self.version = cont.getroot().get("version")
     85         if self.version == "2.0":
     86             self.toc = self.rootdir + cont.find("OPF:manifest/*[@id='ncx']", NS).get("href")
     87         elif self.version == "3.0":
     88             self.toc = self.rootdir + cont.find("OPF:manifest/*[@properties='nav']", NS).get("href")
     89 
     90     def get_meta(self):
     91         meta = []
     92         # why self.file.read(self.rootfile) problematic
     93         cont = ET.fromstring(self.file.open(self.rootfile).read()) 
     94         for i in cont.findall("OPF:metadata/*", NS):
     95             if i.text != None:
     96                 meta.append([re.sub("{.*?}", "", i.tag), i.text])
     97         return meta
     98 
     99     def get_contents(self):
    100         contents = []
    101         cont = ET.parse(self.file.open(self.rootfile)).getroot()
    102         manifest = []
    103         for i in cont.findall("OPF:manifest/*", NS):
    104             # EPUB3
    105             if i.get("id") != "ncx" and i.get("properties") != "nav":
    106                 manifest.append([
    107                     i.get("id"),
    108                     i.get("href")
    109                 ])
    110             else:
    111                 toc = self.rootdir + unquote(i.get("href"))
    112 
    113         spine = []
    114         for i in cont.findall("OPF:spine/*", NS):
    115             spine.append(i.get("idref"))
    116         for i in spine:
    117             for j in manifest:
    118                 if i == j[0]:
    119                     contents.append(unquote(j[1]))
    120                     manifest.remove(j)
    121                     # TODO: test is break necessary
    122                     break
    123 
    124         namedcontents = []
    125         toc = ET.parse(self.file.open(toc)).getroot()
    126         # EPUB3
    127         if self.version == "2.0":
    128             navPoints = toc.findall("DAISY:navMap//DAISY:navPoint", NS)
    129         elif self.version == "3.0":
    130             navPoints = toc.findall("XHTML:body/XHTML:nav[@EPUB:type='toc']//XHTML:a", NS)
    131         for i in contents:
    132             name = "unknown"
    133             for j in navPoints:
    134                 # EPUB3
    135                 if self.version == "2.0":
    136                     if i == unquote(j.find("DAISY:content", NS).get("src")):
    137                         name = j.find("DAISY:navLabel/DAISY:text", NS).text
    138                         break
    139                 elif self.version == "3.0":
    140                     if i == unquote(j.get("href")):
    141                         name = "".join(list(j.itertext()))
    142                         break
    143 
    144             namedcontents.append([
    145                 name,
    146                 self.rootdir + i
    147             ])
    148 
    149         return namedcontents
    150 
    151 def toc(stdscr, ebook, index, width):
    152     rows, cols = stdscr.getmaxyx()
    153     hi, wi = rows - 4, cols - 4
    154     Y, X = 2, 2
    155     toc = curses.newwin(hi, wi, Y, X)
    156     toc.box()
    157     toc.keypad(True)
    158     toc.addstr(1,2, "Table of Contents")
    159     toc.addstr(2,2, "-----------------")
    160     key_toc = 0
    161 
    162     def pad(src, id, top=0):
    163         pad = curses.newpad(len(src), wi - 2 )
    164         pad.keypad(True)
    165         pad.clear()
    166         for i in range(len(src)):
    167             if i == id:
    168                 pad.addstr(i, 0, "> " + src[i][0], curses.A_REVERSE)
    169             else:
    170                 pad.addstr(i, 0, " " + src[i][0])
    171         # scrolling up
    172         if top == id and top > 0:
    173             top = top - 1
    174         # steady
    175         elif id - top <= rows - Y -9:
    176             top = top
    177         # scrolling down
    178         else:
    179             top = id - rows + Y + 9
    180 
    181         pad.refresh(top,0, Y+4,X+4, rows - 5, cols - 6)
    182         return top
    183 
    184     src = ebook.get_contents()
    185     toc.refresh()
    186     top = pad(src, index)
    187 
    188     while key_toc != TOC and key_toc not in QUIT:
    189         if key_toc in SCROLL_UP and index > 0:
    190             index -= 1
    191             top = pad(src, index, top)
    192         if key_toc in SCROLL_DOWN and index + 1 < len(src):
    193             index += 1
    194             top = pad(src, index, top)
    195         if key_toc == FOLLOW:
    196             reader(stdscr, ebook, index, width, 0)
    197         key_toc = toc.getch()
    198 
    199     toc.clear()
    200     toc.refresh()
    201     return
    202 
    203 def meta(stdscr, ebook):
    204     rows, cols = stdscr.getmaxyx()
    205     hi, wi = rows - 4, cols - 4
    206     Y, X = 2, 2
    207     meta = curses.newwin(hi, wi, Y, X)
    208     meta.box()
    209     meta.keypad(True)
    210     meta.addstr(1,2, "Metadata")
    211     meta.addstr(2,2, "--------")
    212     key_meta = 0
    213 
    214     mdata = []
    215     src = ""
    216     for i in ebook.get_meta():
    217         data = re.sub("<[^>]*>", "", i[1])
    218         data = re.sub("\t", "", data)
    219         mdata += textwrap.fill(i[0] + " : " + data, wi - 6).splitlines()
    220     src_lines = mdata
    221 
    222     pad = curses.newpad(len(src_lines), wi - 2 )
    223     pad.keypad(True)
    224     for i in range(len(src_lines)):
    225         pad.addstr(i, 0, src_lines[i])
    226     y = 0
    227     meta.refresh()
    228     pad.refresh(y,0, Y+4,X+4, rows - 5, cols - 6)
    229 
    230     while key_meta != META and key_meta not in QUIT:
    231         if key_meta in SCROLL_UP and y > 0:
    232             y -= 1
    233         if key_meta in SCROLL_DOWN and y < len(src_lines) - hi + 4:
    234             y += 1
    235         pad.refresh(y,0, 6,5, rows - 5, cols - 5)
    236         key_meta = meta.getch()
    237 
    238     meta.clear()
    239     meta.refresh()
    240     return
    241 
    242 def help(stdscr):
    243     rows, cols = stdscr.getmaxyx()
    244     hi, wi = rows - 4, cols - 4
    245     Y, X = 2, 2
    246     help = curses.newwin(hi, wi, Y, X)
    247     help.box()
    248     help.keypad(True)
    249     help.addstr(1,2, "Help")
    250     help.addstr(2,2, "----")
    251     key_help = 0
    252 
    253     src = __doc__
    254     src_lines = src.split("\n")
    255 
    256     pad = curses.newpad(len(src_lines), wi - 2 )
    257     pad.keypad(True)
    258     for i in range(len(src_lines)):
    259         pad.addstr(i, 0, src_lines[i])
    260     y = 0
    261     help.refresh()
    262     pad.refresh(y,0, Y+4,X+4, rows - 5, cols - 6)
    263 
    264     while key_help not in HELP and key_help not in QUIT:
    265         if key_help == SCROLL_UP and y > 0:
    266             y -= 1
    267         if key_help == SCROLL_DOWN and y < len(src_lines) - hi + 4:
    268             y += 1
    269         if key_help == curses.KEY_RESIZE:
    270             break
    271         pad.refresh(y,0, 6,5, rows - 5, cols - 5)
    272         key_help = help.getch()
    273 
    274     help.clear()
    275     help.refresh()
    276     return
    277 
    278 def to_text(src, width):
    279     while True:
    280         try:
    281             root = ET.fromstring(src)
    282             break
    283         except Exception as ent:
    284             ent = str(ent)
    285             ent = re.search("(?<=undefined entity &).*?;(?=:)", ent).group()
    286             src = re.sub("&" + ent, html5[ent], src.decode("utf-8")).encode("utf-8")
    287         
    288     body = root.find("XHTML:body", NS)
    289     text = []
    290     # for i in body.findall("*", NS):
    291     # for i in body.findall(".//XHTML:p", NS):
    292     for i in body.findall(".//*"):
    293         if re.match("{"+NS["XHTML"]+"}h[0-9]", i.tag) != None:
    294             for j in i.itertext():
    295                 text.append(j.rjust(width//2 + len(j)//2 - RIGHTPADDING))
    296                 text.append("")
    297         elif re.match("{"+NS["XHTML"]+"}p", i.tag) != None:
    298             par = ET.tostring(i, encoding="utf-8").decode("utf-8")
    299             par = re.sub("<[^>]*>", "", par)
    300             par = re.sub("\t", "", par)
    301             par = textwrap.fill(par, width)
    302             text += par.splitlines() + [""]
    303 
    304     return text + [""]
    305 
    306 def reader(stdscr, ebook, index, width, y=0):
    307     k = 0
    308     rows, cols = stdscr.getmaxyx()
    309     x = (cols - width) // 2
    310     stdscr.clear()
    311     stdscr.refresh()
    312 
    313     content = ebook.file.open(ebook.get_contents()[index][1]).read()
    314 
    315     src_lines = to_text(content, width)
    316 
    317     pad = curses.newpad(len(src_lines), width + 2) # + 2 unnecessary
    318     pad.keypad(True)
    319     for i in range(len(src_lines)):
    320         pad.addstr(i, 0, src_lines[i])
    321     pad.addstr(i, width//2 - 10 - RIGHTPADDING, "-- End of Chapter --", curses.A_REVERSE)
    322     pad.refresh(y,0, 0,x, rows-1,x+width)
    323 
    324     while True:
    325         # if k == QUIT or k == 3:
    326         if k in QUIT:
    327             for i in state:
    328                 state[i]["lastread"] = str(0)
    329             state[ebook.path]["lastread"] = str(1)
    330             state[ebook.path]["index"] = str(index)
    331             state[ebook.path]["width"] = str(width)
    332             state[ebook.path]["pos"] = str(y)
    333             with open(statefile, "w") as f:
    334                 json.dump(state, f, indent=4)
    335             exit()
    336         if k in SCROLL_UP:
    337             if y > 0:
    338                 y -= 1
    339             # if y == 0 and index > 0:
    340             #     reader(stdscr, ebook, index-1, width)
    341         if k in PAGE_UP:
    342             if y >= rows - LINEPRSRV:
    343                 y -= rows - LINEPRSRV
    344             else:
    345                 y = 0
    346         if k in SCROLL_DOWN:
    347             if y < len(src_lines) - rows:
    348                 y += 1
    349             # if y + rows >= len(src_lines):
    350             #     reader(stdscr, ebook, index+1, width)
    351         if k in PAGE_DOWN:
    352             if y + rows - 2 <= len(src_lines) - rows:
    353                 y += rows - LINEPRSRV
    354             else:
    355                 y = len(src_lines) - rows
    356                 if y < 0:
    357                     y = 0
    358         if k in CH_NEXT and index < len(ebook.get_contents()) - 1:
    359             reader(stdscr, ebook, index+1, width)
    360         if k in CH_PREV and index > 0:
    361             reader(stdscr, ebook, index-1, width)
    362         if k in CH_HOME:
    363             y = 0
    364         if k in CH_END:
    365             y = len(src_lines) - rows
    366             if y < 0:
    367                 y = 0
    368         if k == TOC:
    369             toc(stdscr, ebook, index, width)
    370         if k == META:
    371             meta(stdscr, ebook)
    372         if k in HELP:
    373             help(stdscr)
    374         if k == WIDEN and (width + 2) < cols:
    375             width += 2
    376             reader(stdscr, ebook, index, width)
    377             return
    378         if k == SHRINK and width >= 22:
    379             width -= 2
    380             reader(stdscr, ebook, index, width)
    381             return
    382         if k == curses.KEY_RESIZE:
    383             curses.resize_term(rows, cols)
    384             rows, cols = stdscr.getmaxyx()
    385             # TODO
    386             if cols <= width:
    387                 width = cols - 2
    388             reader(stdscr, ebook, index, width)
    389 
    390         pad.refresh(y,0, 0,x, rows-1,x+width)
    391         k = pad.getch()
    392 
    393 def main(stdscr, file):
    394     stdscr.keypad(True)
    395     curses.curs_set(0)
    396     stdscr.clear()
    397     stdscr.refresh()
    398     rows, cols = stdscr.getmaxyx()
    399     epub = Epub(file)
    400 
    401     if epub.path in state:
    402         idx = int(state[epub.path]["index"])
    403         width = int(state[epub.path]["width"])
    404         y = int(state[epub.path]["pos"])
    405     else:
    406         state[epub.path] = {}
    407         idx = 1
    408         y = 0
    409         width = 80
    410 
    411     if cols <= width:
    412         width = cols - 2
    413         y = 0
    414     reader(stdscr, epub, idx, width, y)
    415 
    416 if __name__ == "__main__":
    417     if len(sys.argv) == 1:
    418         file = False
    419         for i in state:
    420             if not os.path.exists(i):
    421                 del state[i]
    422             elif state[i]["lastread"] == str(1):
    423                 file = i
    424         if not file:
    425             print("ERROR: Found no last read file.")
    426             print(__doc__)
    427         else:
    428             curses.wrapper(main, file)
    429     elif len(sys.argv) == 2 and sys.argv[1] not in ("-h", "--help"):
    430         curses.wrapper(main, sys.argv[1])
    431     else:
    432         print(__doc__)