1
0
mirror of https://github.com/stedolan/jq.git synced 2024-05-11 05:55:39 +00:00

docs: Fix coding style of Python scripts using YAPF (#2606)

This commit is contained in:
itchyny
2023-06-08 19:04:19 +09:00
committed by GitHub
parent fd9da6647c
commit e5414e6828
3 changed files with 247 additions and 227 deletions

View File

@@ -8,225 +8,238 @@ import re
import sys import sys
import yaml import yaml
# Prevent our markdown parser from trying to help by interpreting things in angle brackets as HTML tags. # Prevent our markdown parser from trying to help by interpreting things in angle brackets as HTML tags.
class EscapeHtml(Extension): class EscapeHtml(Extension):
def extendMarkdown(self, md, md_globals):
del md.preprocessors['html_block'] def extendMarkdown(self, md, md_globals):
del md.inlinePatterns['html'] del md.preprocessors['html_block']
del md.inlinePatterns['html']
class RoffWalker(object): class RoffWalker(object):
def __init__(self, tree, output=sys.stdout):
self.tree = tree
self.target = output
self.f = StringIO()
def walk(self): def __init__(self, tree, output=sys.stdout):
self._walk(self.tree, parent_tag=None) self.tree = tree
# We don't want to start lines with \. because that can confuse man self.target = output
# For lines that start with \., we need to prefix them with \& so it self.f = StringIO()
# knows not to treat that line as a directive
data = re.sub(r'^\\\.', r'\&.', self.f.getvalue(), flags=re.MULTILINE)
self.target.write(data)
def _ul_is_special(self, root): def walk(self):
if len(root) != 1: self._walk(self.tree, parent_tag=None)
return False # We don't want to start lines with \. because that can confuse man
child = root[0] # For lines that start with \., we need to prefix them with \& so it
if child.tag != 'li': # knows not to treat that line as a directive
return False data = re.sub(r'^\\\.', r'\&.', self.f.getvalue(), flags=re.MULTILINE)
msg = ''.join(child.itertext()).strip() self.target.write(data)
return msg.endswith(':')
def _walk_child(self, root): def _ul_is_special(self, root):
if len(root) > 0: if len(root) != 1:
self._walk(root[0], parent_tag=root.tag) return False
child = root[0]
if child.tag != 'li':
return False
msg = ''.join(child.itertext()).strip()
return msg.endswith(':')
def _write_element(self, root, ensure_newline=True): def _walk_child(self, root):
if root.text is not None: if len(root) > 0:
text = self._sanitize(root.text) self._walk(root[0], parent_tag=root.tag)
self.__write_raw(text)
self._walk_child(root)
self._write_tail(root, ensure_newline=ensure_newline)
def _write_tail(self, root, ensure_newline=False, inline=False): def _write_element(self, root, ensure_newline=True):
if root.tail is not None: if root.text is not None:
if inline or root.tail != '\n': text = self._sanitize(root.text)
text = self._sanitize(root.tail) self.__write_raw(text)
if text.endswith('\n'):
ensure_newline = False
self.__write_raw(text)
if ensure_newline:
self.__write_raw('\n')
def _walk(self, root, parent_tag=None):
last_tag = None
while root is not None:
if root.tag == 'h1':
self.__write_cmd('.TH "JQ" "1" "{}" "" ""'.format(date.today().strftime('%B %Y')))
self.__write_cmd('.SH "NAME"')
# TODO: properly parse this
self.__write_raw(r'\fBjq\fR \- Command\-line JSON processor' + "\n")
elif root.tag == 'h2':
self.__write_cmd('.SH "{}"'.format(''.join(root.itertext()).strip()))
elif root.tag == 'h3':
text = ''.join(root.itertext()).strip()
self.__write_cmd('.SS "{}"'.format(self._h3_sanitize(text)))
elif root.tag == 'p':
if last_tag not in ['h2', 'h3'] and parent_tag not in ['li']:
self.__write_cmd('.P')
self._write_element(root, ensure_newline=(parent_tag != 'li'))
elif root.tag == 'ul':
if self._ul_is_special(root):
li = root[0]
self.__write_cmd('.TP')
self._write_element(li)
next = root.getnext()
while next is not None and next.tag == 'p':
if next.getnext() is not None and next.getnext().tag == 'pre':
# we don't want to .IP these, because it'll look funny with the code indent
break
self.__write_cmd('.IP')
self._write_element(next)
root = next
next = root.getnext()
else:
self._walk_child(root)
self._write_tail(root)
# A pre tag after the end of a list doesn't want two of the indentation commands
if root.getnext() is None or root.getnext().tag != 'pre':
self.__write_cmd('.IP "" 0')
elif root.tag == 'li':
self.__write_cmd(r'.IP "\(bu" 4')
if root.text is not None and root.text.strip() != '':
text = self._sanitize(root.text)
self.__write_raw(text)
self._walk_child(root) self._walk_child(root)
self._write_tail(root, ensure_newline=True) self._write_tail(root, ensure_newline=ensure_newline)
elif root.tag == 'strong': def _write_tail(self, root, ensure_newline=False, inline=False):
if root.text is not None: if root.tail is not None:
text = self._sanitize(root.text) if inline or root.tail != '\n':
self.__write_raw('\\fB{}\\fR'.format(text)) text = self._sanitize(root.tail)
if text.endswith('\n'):
self._write_tail(root, inline=True) ensure_newline = False
self.__write_raw(text)
elif root.tag == 'em': if ensure_newline:
if root.text is not None:
text = self._sanitize(root.text)
self.__write_raw('\\fI{}\\fR'.format(text))
self._write_tail(root, inline=True)
elif root.tag == 'code':
if root.text is not None:
text = self._code_sanitize(root.text)
self.__write_raw('\\fB{}\\fR'.format(text))
self._write_tail(root, inline=True)
elif root.tag == 'pre':
self.__write_cmd('.IP "" 4')
self.__write_cmd('.nf\n') # extra newline for spacing reasons
next = root
first = True
while next is not None and next.tag == 'pre':
if not first:
self.__write_raw('\n') self.__write_raw('\n')
text = ''.join(next.itertext(with_tail=False))
self.__write_raw(self._pre_sanitize(text))
first = False
root = next
next = next.getnext()
self.__write_cmd('.fi')
self.__write_cmd('.IP "" 0')
else: def _walk(self, root, parent_tag=None):
self._walk_child(root) last_tag = None
while root is not None:
if root.tag == 'h1':
self.__write_cmd('.TH "JQ" "1" "{}" "" ""'.format(
date.today().strftime('%B %Y')))
self.__write_cmd('.SH "NAME"')
# TODO: properly parse this
self.__write_raw(r'\fBjq\fR \- Command\-line JSON processor' +
"\n")
last_tag = root.tag elif root.tag == 'h2':
root = root.getnext() self.__write_cmd('.SH "{}"'.format(''.join(
root.itertext()).strip()))
def _base_sanitize(self, text): elif root.tag == 'h3':
text = re.sub(r'\\', r'\\e', text) text = ''.join(root.itertext()).strip()
text = re.sub(r'\.', r'\\.', text) self.__write_cmd('.SS "{}"'.format(self._h3_sanitize(text)))
text = re.sub("'", r"\'", text)
text = re.sub('-', r'\-', text)
return text
def _pre_sanitize(self, text): elif root.tag == 'p':
return self._base_sanitize(text) if last_tag not in ['h2', 'h3'] and parent_tag not in ['li']:
self.__write_cmd('.P')
self._write_element(root, ensure_newline=(parent_tag != 'li'))
def _code_sanitize(self, text): elif root.tag == 'ul':
text = self._base_sanitize(text) if self._ul_is_special(root):
text = re.sub(r'\s', ' ', text) li = root[0]
return text self.__write_cmd('.TP')
self._write_element(li)
next = root.getnext()
while next is not None and next.tag == 'p':
if next.getnext() is not None and next.getnext(
).tag == 'pre':
# we don't want to .IP these, because it'll look funny with the code indent
break
self.__write_cmd('.IP')
self._write_element(next)
root = next
next = root.getnext()
else:
self._walk_child(root)
self._write_tail(root)
# A pre tag after the end of a list doesn't want two of the indentation commands
if root.getnext() is None or root.getnext().tag != 'pre':
self.__write_cmd('.IP "" 0')
def _h3_sanitize(self, text): elif root.tag == 'li':
text = self._base_sanitize(text) self.__write_cmd(r'.IP "\(bu" 4')
text = re.sub(' \n|\n ', ' ', text) if root.text is not None and root.text.strip() != '':
text = re.sub('\n', ' ', text) text = self._sanitize(root.text)
return text self.__write_raw(text)
self._walk_child(root)
self._write_tail(root, ensure_newline=True)
def _sanitize(self, text): elif root.tag == 'strong':
text = self._base_sanitize(text) if root.text is not None:
text = re.sub(r'<([^>]+)>', r'\\fI\1\\fR', text) text = self._sanitize(root.text)
text = re.sub(r' +', ' ', text) self.__write_raw('\\fB{}\\fR'.format(text))
text = re.sub('\n', ' ', text)
return text
def __write_cmd(self, dat): self._write_tail(root, inline=True)
print('.', dat, sep='\n', file=self.f)
pass elif root.tag == 'em':
if root.text is not None:
text = self._sanitize(root.text)
self.__write_raw('\\fI{}\\fR'.format(text))
self._write_tail(root, inline=True)
elif root.tag == 'code':
if root.text is not None:
text = self._code_sanitize(root.text)
self.__write_raw('\\fB{}\\fR'.format(text))
self._write_tail(root, inline=True)
elif root.tag == 'pre':
self.__write_cmd('.IP "" 4')
self.__write_cmd('.nf\n') # extra newline for spacing reasons
next = root
first = True
while next is not None and next.tag == 'pre':
if not first:
self.__write_raw('\n')
text = ''.join(next.itertext(with_tail=False))
self.__write_raw(self._pre_sanitize(text))
first = False
root = next
next = next.getnext()
self.__write_cmd('.fi')
self.__write_cmd('.IP "" 0')
else:
self._walk_child(root)
last_tag = root.tag
root = root.getnext()
def _base_sanitize(self, text):
text = re.sub(r'\\', r'\\e', text)
text = re.sub(r'\.', r'\\.', text)
text = re.sub("'", r"\'", text)
text = re.sub('-', r'\-', text)
return text
def _pre_sanitize(self, text):
return self._base_sanitize(text)
def _code_sanitize(self, text):
text = self._base_sanitize(text)
text = re.sub(r'\s', ' ', text)
return text
def _h3_sanitize(self, text):
text = self._base_sanitize(text)
text = re.sub(' \n|\n ', ' ', text)
text = re.sub('\n', ' ', text)
return text
def _sanitize(self, text):
text = self._base_sanitize(text)
text = re.sub(r'<([^>]+)>', r'\\fI\1\\fR', text)
text = re.sub(r' +', ' ', text)
text = re.sub('\n', ' ', text)
return text
def __write_cmd(self, dat):
print('.', dat, sep='\n', file=self.f)
pass
def __write_raw(self, dat):
print(dat, sep='', end='', file=self.f)
pass
def __write_raw(self, dat):
print(dat, sep='', end='', file=self.f)
pass
def load_yml_file(fn): def load_yml_file(fn):
with open(fn) as f: with open(fn) as f:
return yaml.safe_load(f) return yaml.safe_load(f)
def dedent_body(body): def dedent_body(body):
lines = [re.sub(r'^ (\S)', r'\1', l) for l in body.split('\n')] lines = [re.sub(r'^ (\S)', r'\1', l) for l in body.split('\n')]
return '\n'.join(lines) return '\n'.join(lines)
def convert_manual_to_markdown(): def convert_manual_to_markdown():
f = StringIO() f = StringIO()
manual = load_yml_file("content/manual/manual.yml") manual = load_yml_file("content/manual/manual.yml")
f.write(manual.get('manpage_intro', '\n')) f.write(manual.get('manpage_intro', '\n'))
f.write(dedent_body(manual.get('body', '\n'))) f.write(dedent_body(manual.get('body', '\n')))
for section in manual.get('sections', []): for section in manual.get('sections', []):
f.write('## {}\n'.format(section.get('title', '').upper())) f.write('## {}\n'.format(section.get('title', '').upper()))
f.write(dedent_body(section.get('body', '\n'))) f.write(dedent_body(section.get('body', '\n')))
f.write('\n') f.write('\n')
for entry in section.get('entries', []): for entry in section.get('entries', []):
f.write('### {}\n'.format(entry.get('title', ''))) f.write('### {}\n'.format(entry.get('title', '')))
f.write(dedent_body(entry.get('body', '\n'))) f.write(dedent_body(entry.get('body', '\n')))
f.write('\n')
if entry.get('examples') is not None:
f.write("~~~~\n")
first = True
for example in entry.get('examples'):
if not first:
f.write('\n') f.write('\n')
f.write("jq '{}'\n".format(example.get('program', ''))) if entry.get('examples') is not None:
f.write(" {}\n".format(example.get('input', ''))) f.write("~~~~\n")
output = [str(x) for x in example.get('output', [])] first = True
f.write("=> {}\n".format(', '.join(output))) for example in entry.get('examples'):
first = False if not first:
f.write("~~~~\n") f.write('\n')
f.write('\n') f.write("jq '{}'\n".format(example.get('program', '')))
f.write(manual.get('manpage_epilogue', '')) f.write(" {}\n".format(example.get('input', '')))
return f.getvalue() output = [str(x) for x in example.get('output', [])]
f.write("=> {}\n".format(', '.join(output)))
first = False
f.write("~~~~\n")
f.write('\n')
f.write(manual.get('manpage_epilogue', ''))
return f.getvalue()
# Convert manual.yml to our special markdown format # Convert manual.yml to our special markdown format
markdown_data = convert_manual_to_markdown() markdown_data = convert_manual_to_markdown()
# Convert markdown to html # Convert markdown to html
html_data = markdown.markdown(markdown_data, extensions=[EscapeHtml(), 'fenced_code']) html_data = markdown.markdown(markdown_data,
extensions=[EscapeHtml(), 'fenced_code'])
# Parse the html into a tree so we can walk it # Parse the html into a tree so we can walk it
tr = etree.HTML(html_data, etree.HTMLParser()) tr = etree.HTML(html_data, etree.HTMLParser())

View File

@@ -2,12 +2,12 @@
import yaml import yaml
with open("content/manual/manual.yml") as f: with open("content/manual/manual.yml") as f:
manual = yaml.safe_load(f) manual = yaml.safe_load(f)
for section in manual.get('sections', []): for section in manual.get('sections', []):
for entry in section.get('entries', []): for entry in section.get('entries', []):
for example in entry.get('examples', []): for example in entry.get('examples', []):
print(example.get('program', '').replace('\n', ' ')) print(example.get('program', '').replace('\n', ' '))
print(example.get('input', '')) print(example.get('input', ''))
for s in example.get('output', []): for s in example.get('output', []):
print(s) print(s)
print('') print('')

View File

@@ -10,13 +10,15 @@ import shutil
import yaml import yaml
env = Environment( env = Environment(
loader=FileSystemLoader('templates'), loader=FileSystemLoader('templates'),
autoescape=select_autoescape(['html.j2']), autoescape=select_autoescape(['html.j2']),
) )
def load_yml_file(fn): def load_yml_file(fn):
with open(fn) as f: with open(fn) as f:
return yaml.safe_load(f) return yaml.safe_load(f)
env.filters['search_id'] = lambda input: input.replace(r'`', '') env.filters['search_id'] = lambda input: input.replace(r'`', '')
env.filters['section_id'] = lambda input: re.sub(r"[^a-zA-Z0-9_]", '', input) env.filters['section_id'] = lambda input: re.sub(r"[^a-zA-Z0-9_]", '', input)
@@ -24,47 +26,52 @@ env.filters['entry_id'] = lambda input: re.sub(r"[ `]", '', input)
env.filters['markdownify'] = lambda input: Markup(markdown(input)) env.filters['markdownify'] = lambda input: Markup(markdown(input))
env.filters['no_paragraph'] = lambda input: Markup(re.sub(r"</?p>", '', input)) env.filters['no_paragraph'] = lambda input: Markup(re.sub(r"</?p>", '', input))
env.globals['unique_id'] = contextfunction(lambda ctx: str(next(ctx['unique_ctr']))) env.globals['unique_id'] = contextfunction(
lambda ctx: str(next(ctx['unique_ctr'])))
env.globals.update(load_yml_file('site.yml')) env.globals.update(load_yml_file('site.yml'))
env.globals['navigation'] = ['tutorial', 'download', 'manual'] env.globals['navigation'] = ['tutorial', 'download', 'manual']
def generate_file(env, fname='content/1.tutorial/default.yml'): def generate_file(env, fname='content/1.tutorial/default.yml'):
path, base = os.path.split(fname) path, base = os.path.split(fname)
path = os.path.relpath(path, 'content') path = os.path.relpath(path, 'content')
if path == '.': if path == '.':
path = '' path = ''
slug = 'index' slug = 'index'
permalink = '' permalink = ''
else: else:
slug = os.path.basename(path) slug = os.path.basename(path)
permalink = path + '/' permalink = path + '/'
output_dir = os.path.join('output', path) output_dir = os.path.join('output', path)
output_path = os.path.join(output_dir, 'index.html') output_path = os.path.join(output_dir, 'index.html')
template_name = re.sub(r".yml$", '.html.j2', base) template_name = re.sub(r".yml$", '.html.j2', base)
ctx = load_yml_file(fname) ctx = load_yml_file(fname)
ctx.update(unique_ctr=itertools.count(1), permalink=permalink, slug=slug, navitem=path) ctx.update(unique_ctr=itertools.count(1),
os.makedirs(output_dir, exist_ok=True) permalink=permalink,
env.get_template(template_name).stream(ctx).dump(output_path, encoding='utf-8') slug=slug,
navitem=path)
os.makedirs(output_dir, exist_ok=True)
env.get_template(template_name).stream(ctx).dump(output_path,
encoding='utf-8')
def copy_public_files(root=''): def copy_public_files(root=''):
for f in os.scandir(os.path.join('public', root)): for f in os.scandir(os.path.join('public', root)):
src = os.path.join(root, f.name) src = os.path.join(root, f.name)
dst = os.path.join('output', src) dst = os.path.join('output', src)
if f.is_dir(): if f.is_dir():
os.makedirs(dst, exist_ok=True) os.makedirs(dst, exist_ok=True)
copy_public_files(src) copy_public_files(src)
else: else:
shutil.copyfile(f.path, dst) shutil.copyfile(f.path, dst)
copy_public_files() copy_public_files()
for fn in glob.glob('content/**/*.yml', recursive=True): for fn in glob.glob('content/**/*.yml', recursive=True):
generate_file(env, fn) generate_file(env, fn)