mirror of
https://github.com/stedolan/jq.git
synced 2024-05-11 05:55:39 +00:00
236 lines
7.5 KiB
Python
Executable File
236 lines
7.5 KiB
Python
Executable File
#!/usr/bin/env python3
|
|
from datetime import date
|
|
from io import StringIO
|
|
from lxml import etree
|
|
import markdown
|
|
from markdown.extensions import Extension
|
|
import re
|
|
import sys
|
|
import yaml
|
|
|
|
# Prevent our markdown parser from trying to help by interpreting things in angle brackets as HTML tags.
|
|
class EscapeHtml(Extension):
|
|
def extendMarkdown(self, md, md_globals):
|
|
del md.preprocessors['html_block']
|
|
del md.inlinePatterns['html']
|
|
|
|
class RoffWalker(object):
|
|
def __init__(self, tree, output=sys.stdout):
|
|
self.tree = tree
|
|
self.target = output
|
|
self.f = StringIO()
|
|
|
|
def walk(self):
|
|
self._walk(self.tree, parent_tag=None)
|
|
# We don't want to start lines with \. because that can confuse man
|
|
# For lines that start with \., we need to prefix them with \& so it
|
|
# knows not to treat that line as a directive
|
|
data = re.sub(r'^\\\.', r'\&.', self.f.getvalue(), flags=re.MULTILINE)
|
|
self.target.write(data)
|
|
|
|
def _ul_is_special(self, root):
|
|
if len(root) != 1:
|
|
return False
|
|
child = root[0]
|
|
if child.tag != 'li':
|
|
return False
|
|
msg = ''.join(child.itertext()).strip()
|
|
return msg.endswith(':')
|
|
|
|
def _walk_child(self, root):
|
|
if len(root) > 0:
|
|
self._walk(root[0], parent_tag=root.tag)
|
|
|
|
def _write_element(self, root, ensure_newline=True):
|
|
if root.text is not None:
|
|
text = self._sanitize(root.text)
|
|
self.__write_raw(text)
|
|
self._walk_child(root)
|
|
self._write_tail(root, ensure_newline=ensure_newline)
|
|
|
|
def _write_tail(self, root, ensure_newline=False, inline=False):
|
|
if root.tail is not None:
|
|
if inline or root.tail != '\n':
|
|
text = self._sanitize(root.tail)
|
|
if text.endswith('\n'):
|
|
ensure_newline = False
|
|
self.__write_raw(text)
|
|
if ensure_newline:
|
|
self.__write_raw('\n')
|
|
|
|
def _walk(self, root, parent_tag=None):
|
|
last_tag = None
|
|
while root is not None:
|
|
if root.tag == 'h1':
|
|
self.__write_cmd('.TH "JQ" "1" "{}" "" ""'.format(date.today().strftime('%B %Y')))
|
|
self.__write_cmd('.SH "NAME"')
|
|
# TODO: properly parse this
|
|
self.__write_raw(r'\fBjq\fR \- Command\-line JSON processor' + "\n")
|
|
|
|
elif root.tag == 'h2':
|
|
self.__write_cmd('.SH "{}"'.format(''.join(root.itertext()).strip()))
|
|
|
|
elif root.tag == 'h3':
|
|
text = ''.join(root.itertext()).strip()
|
|
self.__write_cmd('.SS "{}"'.format(self._h3_sanitize(text)))
|
|
|
|
elif root.tag == 'p':
|
|
if last_tag not in ['h2', 'h3'] and parent_tag not in ['li']:
|
|
self.__write_cmd('.P')
|
|
self._write_element(root, ensure_newline=(parent_tag != 'li'))
|
|
|
|
elif root.tag == 'ul':
|
|
if self._ul_is_special(root):
|
|
li = root[0]
|
|
self.__write_cmd('.TP')
|
|
self._write_element(li)
|
|
next = root.getnext()
|
|
while next is not None and next.tag == 'p':
|
|
if next.getnext() is not None and next.getnext().tag == 'pre':
|
|
# we don't want to .IP these, because it'll look funny with the code indent
|
|
break
|
|
self.__write_cmd('.IP')
|
|
self._write_element(next)
|
|
root = next
|
|
next = root.getnext()
|
|
else:
|
|
self._walk_child(root)
|
|
self._write_tail(root)
|
|
# A pre tag after the end of a list doesn't want two of the indentation commands
|
|
if root.getnext() is None or root.getnext().tag != 'pre':
|
|
self.__write_cmd('.IP "" 0')
|
|
|
|
elif root.tag == 'li':
|
|
self.__write_cmd(r'.IP "\(bu" 4')
|
|
if root.text is not None and root.text.strip() != '':
|
|
text = self._sanitize(root.text)
|
|
self.__write_raw(text)
|
|
self._walk_child(root)
|
|
self._write_tail(root, ensure_newline=True)
|
|
|
|
elif root.tag == 'strong':
|
|
if root.text is not None:
|
|
text = self._sanitize(root.text)
|
|
self.__write_raw('\\fB{}\\fR'.format(text))
|
|
|
|
self._write_tail(root, inline=True)
|
|
|
|
elif root.tag == 'em':
|
|
if root.text is not None:
|
|
text = self._sanitize(root.text)
|
|
self.__write_raw('\\fI{}\\fR'.format(text))
|
|
self._write_tail(root, inline=True)
|
|
|
|
elif root.tag == 'code':
|
|
if root.text is not None:
|
|
text = self._code_sanitize(root.text)
|
|
self.__write_raw('\\fB{}\\fR'.format(text))
|
|
self._write_tail(root, inline=True)
|
|
|
|
elif root.tag == 'pre':
|
|
self.__write_cmd('.IP "" 4')
|
|
self.__write_cmd('.nf\n') # extra newline for spacing reasons
|
|
next = root
|
|
first = True
|
|
while next is not None and next.tag == 'pre':
|
|
if not first:
|
|
self.__write_raw('\n')
|
|
text = ''.join(next.itertext(with_tail=False))
|
|
self.__write_raw(self._pre_sanitize(text))
|
|
first = False
|
|
root = next
|
|
next = next.getnext()
|
|
self.__write_cmd('.fi')
|
|
self.__write_cmd('.IP "" 0')
|
|
|
|
else:
|
|
self._walk_child(root)
|
|
|
|
last_tag = root.tag
|
|
root = root.getnext()
|
|
|
|
def _base_sanitize(self, text):
|
|
text = re.sub(r'\\', r'\\e', text)
|
|
text = re.sub(r'\.', r'\\.', text)
|
|
text = re.sub("'", r"\'", text)
|
|
text = re.sub('-', r'\-', text)
|
|
return text
|
|
|
|
def _pre_sanitize(self, text):
|
|
return self._base_sanitize(text)
|
|
|
|
def _code_sanitize(self, text):
|
|
text = self._base_sanitize(text)
|
|
text = re.sub(r'\s', ' ', text)
|
|
return text
|
|
|
|
def _h3_sanitize(self, text):
|
|
text = self._base_sanitize(text)
|
|
text = re.sub(' \n|\n ', ' ', text)
|
|
text = re.sub('\n', ' ', text)
|
|
return text
|
|
|
|
def _sanitize(self, text):
|
|
text = self._base_sanitize(text)
|
|
text = re.sub(r'<([^>]+)>', r'\\fI\1\\fR', text)
|
|
text = re.sub(r' +', ' ', text)
|
|
text = re.sub('\n', ' ', text)
|
|
return text
|
|
|
|
def __write_cmd(self, dat):
|
|
print('.', dat, sep='\n', file=self.f)
|
|
pass
|
|
|
|
def __write_raw(self, dat):
|
|
print(dat, sep='', end='', file=self.f)
|
|
pass
|
|
|
|
def load_yml_file(fn):
|
|
with open(fn) as f:
|
|
return yaml.safe_load(f)
|
|
|
|
def dedent_body(body):
|
|
lines = [re.sub(r'^ (\S)', r'\1', l) for l in body.split('\n')]
|
|
return '\n'.join(lines)
|
|
|
|
def convert_manual_to_markdown():
|
|
f = StringIO()
|
|
manual = load_yml_file("content/manual/manual.yml")
|
|
f.write(manual.get('manpage_intro', '\n'))
|
|
f.write(dedent_body(manual.get('body', '\n')))
|
|
for section in manual.get('sections', []):
|
|
f.write('## {}\n'.format(section.get('title', '').upper()))
|
|
f.write(dedent_body(section.get('body', '\n')))
|
|
f.write('\n')
|
|
for entry in section.get('entries', []):
|
|
f.write('### {}\n'.format(entry.get('title', '')))
|
|
f.write(dedent_body(entry.get('body', '\n')))
|
|
f.write('\n')
|
|
if entry.get('examples') is not None:
|
|
f.write("~~~~\n")
|
|
first = True
|
|
for example in entry.get('examples'):
|
|
if not first:
|
|
f.write('\n')
|
|
f.write("jq '{}'\n".format(example.get('program', '')))
|
|
f.write(" {}\n".format(example.get('input', '')))
|
|
output = [str(x) for x in example.get('output', [])]
|
|
f.write("=> {}\n".format(', '.join(output)))
|
|
first = False
|
|
f.write("~~~~\n")
|
|
f.write('\n')
|
|
f.write(manual.get('manpage_epilogue', ''))
|
|
return f.getvalue()
|
|
|
|
# Convert manual.yml to our special markdown format
|
|
markdown_data = convert_manual_to_markdown()
|
|
|
|
# Convert markdown to html
|
|
html_data = markdown.markdown(markdown_data, extensions=[EscapeHtml(), 'fenced_code'])
|
|
|
|
# Parse the html into a tree so we can walk it
|
|
tr = etree.HTML(html_data, etree.HTMLParser())
|
|
|
|
# Convert the markdown to ROFF
|
|
RoffWalker(tr).walk()
|