mdvl (20420B)
1 #!/usr/bin/env python -Ss 2 # coding: utf-8 3 # Some systems do not accept shebang with args. Use python -Ss mdvl.py then. 4 ''' 5 # Lightweight Simple Markdown Renderer for the Terminal 6 7 ## Usage 8 9 mdvl <markdown source | markdown file> 10 cat <markdown file> | mdvl 11 12 ## Config 13 14 ``` 15 %s 16 ``` 17 18 ### Colors 19 20 ``` 21 %%s 22 ``` 23 24 ## Debugging Parsing Errors 25 26 export mdvl_debug=1 27 28 See also https://github.com/axiros/mdvl 29 30 ''' 31 __version__ = "2017.07.16.7" # count up for new pip versions 32 __author__ = "Gunther Klessinger" 33 34 from textwrap import fill 35 from operator import setitem as set 36 import re, os 37 38 debug=os.environ.get('mdvl_debug') 39 40 # check environ for value and cast into bools if necessary: 41 _b = {'True': True, 'False': False} 42 env = lambda k, d=None: _b.get(k, os.environ.get(k, d)) 43 44 # ----------------------------------------------------------------- Config Mgmt 45 class Cfg: 46 ''' 47 Base class for osenv and kw configurable instances. 48 - We have defaults, overridable by environ keys, overridable by **kws 49 - problem is that color codes should be givabable as ints - but on the 50 terminal we usually have them as full ansi escape str. 51 Thats why `get_val` is present and adapts for that in the Colors cls. 52 ''' 53 _parms = None # our (relevant) keys and values 54 55 def setup(self, kw): 56 ''' find all our key value defaults and override with env and **kw''' 57 self._parms = [] 58 kv = [(k, getattr(self, k)) 59 for k in dir(self) if not k.startswith('_')] 60 self._parms = [(k, v) for k, v in kv if not hasattr(v, '__code__')] 61 [setattr(self, k, self.get_val(k, v, kw)) for k, v in self._parms] 62 63 def get_val(self, k, dflt, kw): 64 try: 65 return type(dflt)(kw.get(k, env(k, dflt))) 66 except Exception as ex: 67 # show reason clearer: 68 raise Exception( 69 'Could not cast type %s - have %s %s %s %s' % ( 70 type(dflt), k, dflt, kw, env(k, dflt))) 71 72 73 class Colors(Cfg): 74 ''' 75 Color namespae with efault color scheme (greenish). 76 The 'C' in the main method. 77 x_ -> x with ansi escapes in __init__, with env precedence 78 ''' 79 O = '\x1B[0m' 80 GRAY = 240 81 CODE = 245 82 L = env('L', 66) 83 H1 = env('I', 158) 84 H2 = env('G', 115) 85 H3 = env('M', 72) 86 H4 = env('CODE', 66) 87 emph = env('I', 158) 88 ital = env('M', 72) 89 90 def get_val(self, k, dflt, kw): 91 # see Cfg for expl. 92 v = kw.get(k, env(k, dflt)) 93 v = str(v) 94 if '\x1B' in v: 95 pass 96 elif '[' in v: 97 v = '\x1B' + v[2:] 98 else: 99 v = '\x1B[1;38;5;%sm' % v 100 return v 101 102 def H(s, lev): 103 return getattr(s, 'H%s' % lev, s.L) 104 105 106 class Facts(Cfg): 107 ''' features config ''' 108 debug = False 109 term_width = 80 110 no_print = False 111 bq_mark = '┃' 112 code_mark = '│' 113 light_bg = False 114 no_smart_indent = False 115 horiz_rule = '─' 116 single_line_mode = False 117 # left and right global indents: 118 indent = 0 119 rindent = 0 120 width = 0 # if set > 0 we set rindent accordingly 121 header_numbering = 50 # -1: off, min number of lines to do autonumbering 122 header_numb_level_min= 1 # min header level to show the numbers 123 header_numb_level_max= 6 # max header level to show the numbers 124 header_underlining = '*' # e.g. '*-' to underline H1 with *** and H2 with --- 125 opts_tbl_start = '-' 126 opts_tbl_end = ':' 127 128 129 def __init__(f, md, **kw): 130 # first check if the config contains color codes and set to C: 131 # now overriding our defaults with kw then with env 132 if md.split('\n', 1)[0] == md: 133 f.single_line_mode = True 134 f.indent = 0 135 f.setup(kw) 136 f.colr = Colors(); f.colr.setup(kw) 137 138 # ------------------------------------------------ end config - begin rendering 139 # helper funcs: 140 def get_subseq_light_table_indent(l0): 141 p = '**' if l0.startswith('**') else '*' 142 keywrd, l1 = l0[2:].split(p, 1) 143 keywrd = l0[:2] + keywrd + p 144 l1 = l1 145 offs = 1 if l1 and l1[0] == ' ' else 2 146 return len(l0) - len(l1[offs:].lstrip()) - (2 * len(p)) 147 148 149 def block_quote_status(l, g): 150 'blockquote' 151 if not l.startswith('>'): 152 return 0, l, '' 153 _ = l.split(' ', 1) 154 lev = len(_[0]) 155 g['max_bq_depth'] = max(lev, g['max_bq_depth']) 156 return lev, _[1], _[0] 157 158 159 h_rules_col = {'-': 'L', '_': 'H3', '*': 'H1'} # different colors 160 list_markup = {'- ': ('\x03 ', 'L', '❖ '), '* ': ('\x04 ', 'H2', '▪ ')} 161 h_rules = '---', '___', '***' 162 def _main(md, f): 163 C, cur_colr = f.colr, 'cur_colr' 164 cols = int(f.term_width) 165 if f.width: 166 f.rindent = cols - f.indent - f.width + f.rindent 167 cols = cols - f.indent - f.rindent 168 169 g = {} # glob parsing state (current color, code blocks) 170 171 172 173 # ------------ line tools requiring facts instance, possible ctx g as well: 174 def is_opts_tbl(l, b=f.opts_tbl_start, e=f.opts_tbl_end): 175 fw = first_word(l) 176 if fw and fw.startswith(b) and fw.endswith(e): 177 return l.replace(fw, '*%s*' % fw[:-len(e)]), len(fw) 178 return l, None 179 180 def is_rule(l): 181 if not l[:3] in h_rules: 182 return 183 ll = len(l) 184 return True if l in (ll * '-', ll * '*', ll * '_') else False 185 186 187 188 # Line Tools: 189 first_word = lambda l: l.split(' ', 1)[0] 190 is_header = lambda l: l.startswith('#') 191 is_list = lambda l: l.lstrip()[:2] in list_markup 192 is_empty = lambda l: l.strip() == '' 193 is_md_link = lambda l: l[0] == '[' and 'http' in l and ']' in l 194 195 is_new_block = lambda l: ( 196 is_header(l) or 197 is_list(l) or 198 is_opts_tbl(l)[1] or 199 is_empty(l) or 200 is_md_link(l) or 201 l[0] in ('\x02', ) or 202 is_rule(l) 203 ) 204 # ------------------------------------------------------------------------- 205 206 207 md = md.strip() 208 209 # FENCED CODE BLOCKS: 210 # we take them out before all parsing,see http://stackoverflow.com/a/587518 211 apo, apos = chr(96), chr(96) * 3 # chr 96 is backtick. 212 _ = r'^({apos}[^\n]+)\n((?:[^{apo}]+\n{apo}{apo})+)'.format(apos=apos, apo=apo) 213 fncd = re.compile(_, re.MULTILINE) # finds fenced code 214 md = md.replace('\n~~~', apos) # alternative markup for fenced 215 # remembering the blocks by their occurance number (len(g)) 216 [set(g, len(g), '\n'.join(m.groups()) + apo ) for m in fncd.finditer(md)] 217 blocks = len(g) 218 for i in range(blocks): 219 md = md.replace(g[i], '\x02%s' % i) 220 221 g['max_bq_depth'] = 0 222 223 224 # LINESPROCESSOR: 225 lines, out = md.splitlines(), [] 226 227 g['header_numbering'] = False 228 if f.header_numbering > -1 and len(lines) > f.header_numbering: 229 g['header_numbering'] = True 230 g['header_level'] = {} # storing the current header numberings 231 232 # remove boundary effects: 233 lines.insert(0, '') 234 lines.append('') 235 236 while lines: 237 238 line = lines.pop(0) 239 if is_empty(line): 240 out.append('') 241 continue 242 if debug: 243 print('procesing: ', line) 244 if is_rule(line): 245 out.append(getattr(C, h_rules_col[line[0]])+ (cols * f.horiz_rule)) 246 continue 247 248 cb = None # indentd code blocks: 249 while line.startswith(' '): 250 cb = cb or [] 251 cb.append(line[4:]) 252 line = lines.pop(0) 253 if cb: 254 if out[-1] == '': 255 out.pop() 256 g[blocks] = '\n%s\n' % '\n'.join(cb) 257 out.append('\x02%s' % blocks) 258 blocks += 1 259 lines.insert(0, line) 260 continue 261 262 ssi = None # subseq indent for textwrap 263 264 # TEXTBLOCKS: Concat lines which must be wrapped: 265 bqm = '' # blockquote mark. e.g. '>>'. 266 bq_lev, line, bqm = block_quote_status(line, g) 267 268 src_line_nr = 0 269 270 # we derive the (static) opts table ssi for a new textblox: 271 line, opts_tbl_ssi = is_opts_tbl(line) 272 # now we find all other lines belonging to that text block and 273 # concat (pop from lines) all of them: 274 while ( lines and not line.endswith(' ') 275 and not is_header(line) ): 276 277 src_line_nr += 1 278 nl, l0 = lines[0], line.lstrip() # next line, this line 279 280 bqnl = block_quote_status(nl, g) 281 if bqnl[0] == bq_lev: 282 lines[0] = nl = bqnl[1] # remove redundant '>' 283 284 elif bqnl[0] != bq_lev and bqnl[0] > 0: 285 break # next line different blockquote level -> new text block 286 287 # finding subseq. indent for textwrap.fill: 288 289 # Little md violation: If first word is starred, we set a ssi to 290 # position: first line second word start. 291 # Gives easy 2 col wrappable tables when first col is hilited. 292 293 #if 'xyz' in line: 294 # import pdb; pdb.set_trace() 295 if ssi == None: 296 if is_list(l0): 297 # replace "- " and "* " with tags: 298 line = list_markup[l0[:2]][0] + l0[2:] 299 ssi = 2 300 elif opts_tbl_ssi: 301 ssi = opts_tbl_ssi 302 elif ( l0.startswith('*') and 303 not f.no_smart_indent and 304 src_line_nr == 1 ): 305 ssi = get_subseq_light_table_indent(l0) 306 307 if is_new_block(nl): 308 # line is now one wrapable textblock 309 if bqnl[0]: # block quote new line 310 # adapt next line to parse: 311 lines[0] = (bqnl[2] + ' ') + lines[0] 312 break 313 else: 314 line = line.rstrip() + ' ' + lines.pop(0).lstrip() 315 316 ssi = 0 if ssi is None else ssi 317 # lines are now blocks 318 319 g[cur_colr] = C.O # reset color 320 ind = len(line) - len(line.lstrip()) 321 if bqm: 322 bqm += ' ' 323 line = bqm + line 324 325 326 if is_header(line): 327 cont = line.lstrip('#') 328 level = len(line) - len(cont) 329 line = cont.lstrip() 330 331 u = getattr(f, 'header_underlining', '') 332 if len(u) >= level: 333 lines.insert(0, 3 * u[level-1]) 334 335 if g['header_numbering']: 336 hl = g['header_level'] 337 hl[level] = hl.get(level, 0) + 1 338 [set(hl, i, 0) for i in hl if i > level] 339 nr = '.'.join([str(hl[ll]) for ll in range(1, level + 1)]) 340 if f.header_numb_level_max > level - 1: 341 if f.header_numb_level_min > 1: 342 nr = nr.split('.')[f.header_numb_level_min-1:] 343 nr = '.'.join(nr) 344 if nr: 345 line = nr + ' ' + line 346 347 g[cur_colr] = C.H(level) 348 349 # WRAP: 350 if len(line) > cols: 351 s = (bqm + ' ' * (ind + ssi)) 352 line = fill(line, subsequent_indent=s, width=cols) 353 if is_md_link(line): 354 g[cur_colr] = C.GRAY 355 out.append(g[cur_colr] + line) 356 357 358 # --------------- Leaving line/block scanning, reWork complete document now 359 g[cur_colr] = C.O 360 out = '\n'.join(out) 361 362 # INLINE MARKUP, *, **, backticks 363 # Alternating replacements, e.g. code, emph. requires a first space char: 364 altern = lambda s, c, r: re.sub( 365 r'([^{c}]+){c}([^{c}]+){c}?'.format(c=c), 366 r'\1%s\2%s' % (r, g[cur_colr]), ' ' + s)[1:] # removing space again 367 368 # Star must be replaced, else the re would not work :(( 369 # currently no way to find single stars and not process them.. 370 out = out.replace('*', '\x01') 371 out = altern(out, apo , C.CODE) # code 372 out = altern(out, '\x01\x01', C.emph) # ** 373 out = altern(out, '\x01' , C.ital) # * 374 375 # rearrange resets, to be *before* the line breaks, not after... 376 out = out.replace('\n' + C.O, C.O + '\n') 377 # ... so that we can look for blockquotes: 378 for i in range(g['max_bq_depth'], 0, -1): 379 # coloring, take header levels. bq_mark is "|": 380 m = '' 381 for j in range(1, i + 1): 382 m += C.H(j) + f.bq_mark 383 m += C.O 384 out = out.replace('\n' + '>' * i, '\n' + m) 385 386 # Insert back the stored code blocks: 387 code_fmt = lambda c: c.replace('\n', '\n%s%s %s' % (C.L, f.code_mark, C.CODE) 388 ).rsplit('\n', 1)[0] 389 for i in range(blocks): 390 out = out.replace('\x02%s' % i, 391 '%s%s%s' % (C.CODE, code_fmt(g[i]), C.O)) 392 out = out.replace(apos + '\n', '') # before 393 out = out.replace(apos, '') # after 394 395 for k, v in list_markup.items(): 396 out = out.replace(v[0], getattr(C, v[1]) + v[2] + C.O) 397 398 out = strip_it(out, C.O) 399 if not f.single_line_mode: 400 out = '\n' + out + '\n' 401 li, ri = f.indent * ' ', f.rindent * ' ' 402 if li or ri: 403 out = li + out.replace('\n', '%s\n%s' % (ri, li)) 404 out += C.O # reset 405 if not f.no_print: 406 print (out) 407 return out 408 409 def strip_it(out, rst): 410 'clumsy way to strip at start at end, including color resets' 411 sc = {' ': 1, rst: len(rst), '\n': 1} 412 while 1: 413 m = False 414 for k in sc: 415 if out.startswith(k): 416 out = out[sc[k]:] 417 m = True 418 if out.endswith(k): 419 out = out[:-sc[k]] 420 m = True 421 if m: 422 break 423 if not m: 424 break 425 return out 426 427 428 def main(md, **kw): 429 f = Facts(md, **kw) 430 #return _main(md, f), f # we also return to the client the config 431 if debug or f.debug: 432 return _main(md, f), f # we also return to the client the config 433 try: 434 return _main(md, f), f # we also return to the client the config 435 except Exception as ex: 436 print (md) # clear text 437 print ('md error: %s %s ' % (f.colr.CODE, ex)) 438 439 def render(md, cols, **kw): 440 kw['term_width'] = cols 441 return main(md, **kw)[0] 442 443 def get_help(cols, PY2): 444 ff = Facts('\n', term_width=cols) 445 md, C = __doc__, ff.colr 446 for o in ff, C: 447 mmd = () 448 for k, d in sorted(o._parms): 449 v = getattr(o, k) 450 if o == ff: # need the perceived len here: 451 v = C.H2 + (str(u'%5s' % str(v)) if PY2 else '%5s' % v) + C.O 452 mmd += ('%s %s [%s]' % (v, k, d),) 453 md = md % ('\n'.join(mmd)) 454 return md 455 456 # allow to adapt $COLUMNS by setting $term_width: 457 get_cols = lambda: (env('term_width') or 458 os.popen('tput cols 2>/dev/null').read().strip() or '80' ) 459 460 def sys_main(): 461 import sys 462 PY2 = sys.version_info[0] == 2 463 if PY2: 464 reload(sys); sys.setdefaultencoding('utf-8') 465 import os 466 from stat import S_ISFIFO 467 err = None 468 try: 469 cols = get_cols() 470 except Exception as ex: 471 err = str(ex) 472 cols = 80 473 if S_ISFIFO(os.fstat(0).st_mode): # pipe mode 474 md = sys.stdin.read() 475 else: 476 if not len(sys.argv) > 1 or '-h' in sys.argv: 477 md = get_help(cols, PY2) 478 else: 479 md = sys.argv[1] 480 if os.path.exists(md): 481 with open(md) as fd: 482 md = fd.read() 483 if err: 484 print(err) 485 print md 486 else: 487 main(md, term_width=cols) 488 489 # ============================================== Script Formatters =========== 490 491 def format_bash(dev_help, cols, lines, script, *args): 492 ''' 493 Renders help for a bash script nicely, given it follows some conventions. 494 495 These are: 496 497 1. An `md_doc` function is required, returning general docu as markdown, 498 containing the string "<auto_command_doc>" 499 500 2. All public functions must be in this format: 501 502 : 'optional doculines before...' 503 function myfunc { 504 : 'optional inner doculines (params...)' 505 <code lines> 506 } 507 508 3. In the command parsing part then this: `show_help $sourced $0 $*` 509 with that function elsewhere in your tools: 510 511 show_help () { 512 local sourced=$1; shift 513 local dev_help=false 514 test "${2:-}x" == "make_docx" && { md_doc; exit 0; } 515 test "${@: -1}" == "-hh" 2>/dev/null && dev_help=true || { 516 test "${@: -1}" == "-h" 2>/dev/null || return 0 517 } 518 local cols=`stty size | cut -d ' ' -f 2` 519 mdvl -f $dev_help $cols "$*"; $sourced && return 1 || exit 520 } 521 522 ''' 523 dev_help = True if str(dev_help) in ('True', 'true', '1') else False 524 single_func_doc=False; l = lines; funcs = [] 525 start = ": '" 526 is_func = lambda l: l.startswith('function ') 527 is_cmt_end = lambda l: l.rstrip().endswith("'") 528 is_cmt_start= lambda l: l.lstrip().startswith(start) 529 530 def clean(s, head_sub): 531 s = s.strip() 532 s = s[len(start):] if s.startswith(start) else s 533 s = s[:-1] if s.endswith("'") else s 534 s = (('\n' + s).replace('\n#', '\n%s#' % head_sub))[1:] 535 return s 536 537 def render_func(m, single_func_doc): 538 fn = m.keys()[0] 539 hf, hs = ('# `Function` **%s**', '##') if single_func_doc else ( 540 '### %s', '###') 541 nr, pre, post, code = m.values()[0] 542 md = [hf % fn] 543 pre and md.append(clean('\n'.join(pre), hs)) 544 post and md.append(clean('\n'.join(post), hs)) 545 if code: 546 code = '\n'.join(code) 547 if post or pre: 548 md.append('') 549 md.append(code) 550 md.extend(['---', '']) 551 md = '\n'.join(md) 552 return md 553 554 fm = {} 555 for i in range(len(l)): 556 if is_func(l[i]): 557 fn = (l[i] + ' ').split(' ', 2)[1] 558 funcs.append({fn: [i, [], [], []]}) 559 fm[fn] = len(funcs) - 1 560 561 if not '-h' in args[0]: 562 match = args[0] 563 f = [] 564 for m in funcs: 565 if match in m.keys()[0]: 566 f.append(m) 567 if f: 568 funcs = f 569 single_func_doc = True 570 571 for m in funcs: 572 nr, pre, post, code = m.values()[0] 573 # pre: 574 if is_cmt_end(l[nr-1]): 575 i = nr 576 while True: 577 i = i -1 578 pre.insert(0, l[i]) 579 if is_cmt_start(l[i]): 580 break 581 if i > 1 and l[i-1].rstrip().endswith('}'): 582 pre = [] # err 583 break 584 585 # post: 586 i = nr 587 if is_cmt_start(l[nr + 1]): 588 while True: 589 i += 1 590 post.append(l[i][4:]) 591 if is_cmt_end(l[i]) and not l[i].strip() == start: 592 break 593 if l[i+1].rstrip().endswith('}'): 594 post = []; i = nr # err 595 break 596 if dev_help: 597 i += 1 598 while True: 599 if l[i].strip() == '}': 600 break 601 code.append(l[i]) 602 i += 1 603 604 Facts.indent = 0 605 if single_func_doc: 606 for m in funcs: 607 md = render_func(m, single_func_doc) 608 main(md, term_width=cols) 609 print 610 return 611 612 # now the full doc. convention is to call with make_doc arg: 613 Facts.no_print = True 614 Facts.header_numbering = 10 615 Facts.header_numb_level_min = 2 616 Facts.header_numb_level_max = 2 617 618 full = os.popen(script.split(' ')[0] + ' make_doc').read() 619 acd = '<auto_command_doc>' 620 full = full.replace(acd, '## Commands\n\n' + acd) 621 md = main(full, term_width=cols)[0] 622 623 Facts.header_numbering = -1 624 rfuncs, sep = '', '\n\n' 625 if dev_help: 626 sep = '' 627 for m in funcs: 628 rfuncs = rfuncs + sep + render_func(m, single_func_doc) 629 mdf = main(rfuncs, term_width=cols)[0] 630 print(md.replace(acd, mdf)) 631 632 633 634 635 def format_file(dev_help, cols, fn, *args): 636 if not os.path.exists(fn): 637 raise Exception('Not found' + fn) 638 with open(fn) as fd: 639 lines = fd.read().splitlines() 640 if 'bash' in lines[0]: 641 format_bash(dev_help, cols, lines, fn, *args) 642 else: 643 raise Exception('Not supported format') 644 645 if __name__ == '__main__': 646 import os, sys 647 if len(sys.argv) > 1 and sys.argv[1] == '-f': 648 format_file(*sys.argv[2:]) 649 else: 650 sys_main() 651