1
0
mirror of https://github.com/stedolan/jq.git synced 2024-05-11 05:55:39 +00:00

docs: Fix coding style of Python scripts using YAPF (#2606)

This commit is contained in:
itchyny
2023-06-08 19:04:19 +09:00
committed by GitHub
parent fd9da6647c
commit e5414e6828
3 changed files with 247 additions and 227 deletions

View File

@@ -8,225 +8,238 @@ import re
import sys
import yaml
# Prevent our markdown parser from trying to help by interpreting things in angle brackets as HTML tags.
class EscapeHtml(Extension):
def extendMarkdown(self, md, md_globals):
del md.preprocessors['html_block']
del md.inlinePatterns['html']
def extendMarkdown(self, md, md_globals):
del md.preprocessors['html_block']
del md.inlinePatterns['html']
class RoffWalker(object):
def __init__(self, tree, output=sys.stdout):
self.tree = tree
self.target = output
self.f = StringIO()
def walk(self):
self._walk(self.tree, parent_tag=None)
# We don't want to start lines with \. because that can confuse man
# For lines that start with \., we need to prefix them with \& so it
# knows not to treat that line as a directive
data = re.sub(r'^\\\.', r'\&.', self.f.getvalue(), flags=re.MULTILINE)
self.target.write(data)
def __init__(self, tree, output=sys.stdout):
self.tree = tree
self.target = output
self.f = StringIO()
def _ul_is_special(self, root):
if len(root) != 1:
return False
child = root[0]
if child.tag != 'li':
return False
msg = ''.join(child.itertext()).strip()
return msg.endswith(':')
def walk(self):
self._walk(self.tree, parent_tag=None)
# We don't want to start lines with \. because that can confuse man
# For lines that start with \., we need to prefix them with \& so it
# knows not to treat that line as a directive
data = re.sub(r'^\\\.', r'\&.', self.f.getvalue(), flags=re.MULTILINE)
self.target.write(data)
def _walk_child(self, root):
if len(root) > 0:
self._walk(root[0], parent_tag=root.tag)
def _ul_is_special(self, root):
if len(root) != 1:
return False
child = root[0]
if child.tag != 'li':
return False
msg = ''.join(child.itertext()).strip()
return msg.endswith(':')
def _write_element(self, root, ensure_newline=True):
if root.text is not None:
text = self._sanitize(root.text)
self.__write_raw(text)
self._walk_child(root)
self._write_tail(root, ensure_newline=ensure_newline)
def _walk_child(self, root):
if len(root) > 0:
self._walk(root[0], parent_tag=root.tag)
def _write_tail(self, root, ensure_newline=False, inline=False):
if root.tail is not None:
if inline or root.tail != '\n':
text = self._sanitize(root.tail)
if text.endswith('\n'):
ensure_newline = False
self.__write_raw(text)
if ensure_newline:
self.__write_raw('\n')
def _walk(self, root, parent_tag=None):
last_tag = None
while root is not None:
if root.tag == 'h1':
self.__write_cmd('.TH "JQ" "1" "{}" "" ""'.format(date.today().strftime('%B %Y')))
self.__write_cmd('.SH "NAME"')
# TODO: properly parse this
self.__write_raw(r'\fBjq\fR \- Command\-line JSON processor' + "\n")
elif root.tag == 'h2':
self.__write_cmd('.SH "{}"'.format(''.join(root.itertext()).strip()))
elif root.tag == 'h3':
text = ''.join(root.itertext()).strip()
self.__write_cmd('.SS "{}"'.format(self._h3_sanitize(text)))
elif root.tag == 'p':
if last_tag not in ['h2', 'h3'] and parent_tag not in ['li']:
self.__write_cmd('.P')
self._write_element(root, ensure_newline=(parent_tag != 'li'))
elif root.tag == 'ul':
if self._ul_is_special(root):
li = root[0]
self.__write_cmd('.TP')
self._write_element(li)
next = root.getnext()
while next is not None and next.tag == 'p':
if next.getnext() is not None and next.getnext().tag == 'pre':
# we don't want to .IP these, because it'll look funny with the code indent
break
self.__write_cmd('.IP')
self._write_element(next)
root = next
next = root.getnext()
else:
self._walk_child(root)
self._write_tail(root)
# A pre tag after the end of a list doesn't want two of the indentation commands
if root.getnext() is None or root.getnext().tag != 'pre':
self.__write_cmd('.IP "" 0')
elif root.tag == 'li':
self.__write_cmd(r'.IP "\(bu" 4')
if root.text is not None and root.text.strip() != '':
text = self._sanitize(root.text)
self.__write_raw(text)
def _write_element(self, root, ensure_newline=True):
if root.text is not None:
text = self._sanitize(root.text)
self.__write_raw(text)
self._walk_child(root)
self._write_tail(root, ensure_newline=True)
self._write_tail(root, ensure_newline=ensure_newline)
elif root.tag == 'strong':
if root.text is not None:
text = self._sanitize(root.text)
self.__write_raw('\\fB{}\\fR'.format(text))
self._write_tail(root, inline=True)
elif root.tag == 'em':
if root.text is not None:
text = self._sanitize(root.text)
self.__write_raw('\\fI{}\\fR'.format(text))
self._write_tail(root, inline=True)
elif root.tag == 'code':
if root.text is not None:
text = self._code_sanitize(root.text)
self.__write_raw('\\fB{}\\fR'.format(text))
self._write_tail(root, inline=True)
elif root.tag == 'pre':
self.__write_cmd('.IP "" 4')
self.__write_cmd('.nf\n') # extra newline for spacing reasons
next = root
first = True
while next is not None and next.tag == 'pre':
if not first:
def _write_tail(self, root, ensure_newline=False, inline=False):
if root.tail is not None:
if inline or root.tail != '\n':
text = self._sanitize(root.tail)
if text.endswith('\n'):
ensure_newline = False
self.__write_raw(text)
if ensure_newline:
self.__write_raw('\n')
text = ''.join(next.itertext(with_tail=False))
self.__write_raw(self._pre_sanitize(text))
first = False
root = next
next = next.getnext()
self.__write_cmd('.fi')
self.__write_cmd('.IP "" 0')
else:
self._walk_child(root)
def _walk(self, root, parent_tag=None):
last_tag = None
while root is not None:
if root.tag == 'h1':
self.__write_cmd('.TH "JQ" "1" "{}" "" ""'.format(
date.today().strftime('%B %Y')))
self.__write_cmd('.SH "NAME"')
# TODO: properly parse this
self.__write_raw(r'\fBjq\fR \- Command\-line JSON processor' +
"\n")
last_tag = root.tag
root = root.getnext()
elif root.tag == 'h2':
self.__write_cmd('.SH "{}"'.format(''.join(
root.itertext()).strip()))
def _base_sanitize(self, text):
text = re.sub(r'\\', r'\\e', text)
text = re.sub(r'\.', r'\\.', text)
text = re.sub("'", r"\'", text)
text = re.sub('-', r'\-', text)
return text
elif root.tag == 'h3':
text = ''.join(root.itertext()).strip()
self.__write_cmd('.SS "{}"'.format(self._h3_sanitize(text)))
def _pre_sanitize(self, text):
return self._base_sanitize(text)
elif root.tag == 'p':
if last_tag not in ['h2', 'h3'] and parent_tag not in ['li']:
self.__write_cmd('.P')
self._write_element(root, ensure_newline=(parent_tag != 'li'))
def _code_sanitize(self, text):
text = self._base_sanitize(text)
text = re.sub(r'\s', ' ', text)
return text
elif root.tag == 'ul':
if self._ul_is_special(root):
li = root[0]
self.__write_cmd('.TP')
self._write_element(li)
next = root.getnext()
while next is not None and next.tag == 'p':
if next.getnext() is not None and next.getnext(
).tag == 'pre':
# we don't want to .IP these, because it'll look funny with the code indent
break
self.__write_cmd('.IP')
self._write_element(next)
root = next
next = root.getnext()
else:
self._walk_child(root)
self._write_tail(root)
# A pre tag after the end of a list doesn't want two of the indentation commands
if root.getnext() is None or root.getnext().tag != 'pre':
self.__write_cmd('.IP "" 0')
def _h3_sanitize(self, text):
text = self._base_sanitize(text)
text = re.sub(' \n|\n ', ' ', text)
text = re.sub('\n', ' ', text)
return text
elif root.tag == 'li':
self.__write_cmd(r'.IP "\(bu" 4')
if root.text is not None and root.text.strip() != '':
text = self._sanitize(root.text)
self.__write_raw(text)
self._walk_child(root)
self._write_tail(root, ensure_newline=True)
def _sanitize(self, text):
text = self._base_sanitize(text)
text = re.sub(r'<([^>]+)>', r'\\fI\1\\fR', text)
text = re.sub(r' +', ' ', text)
text = re.sub('\n', ' ', text)
return text
elif root.tag == 'strong':
if root.text is not None:
text = self._sanitize(root.text)
self.__write_raw('\\fB{}\\fR'.format(text))
def __write_cmd(self, dat):
print('.', dat, sep='\n', file=self.f)
pass
self._write_tail(root, inline=True)
elif root.tag == 'em':
if root.text is not None:
text = self._sanitize(root.text)
self.__write_raw('\\fI{}\\fR'.format(text))
self._write_tail(root, inline=True)
elif root.tag == 'code':
if root.text is not None:
text = self._code_sanitize(root.text)
self.__write_raw('\\fB{}\\fR'.format(text))
self._write_tail(root, inline=True)
elif root.tag == 'pre':
self.__write_cmd('.IP "" 4')
self.__write_cmd('.nf\n') # extra newline for spacing reasons
next = root
first = True
while next is not None and next.tag == 'pre':
if not first:
self.__write_raw('\n')
text = ''.join(next.itertext(with_tail=False))
self.__write_raw(self._pre_sanitize(text))
first = False
root = next
next = next.getnext()
self.__write_cmd('.fi')
self.__write_cmd('.IP "" 0')
else:
self._walk_child(root)
last_tag = root.tag
root = root.getnext()
def _base_sanitize(self, text):
text = re.sub(r'\\', r'\\e', text)
text = re.sub(r'\.', r'\\.', text)
text = re.sub("'", r"\'", text)
text = re.sub('-', r'\-', text)
return text
def _pre_sanitize(self, text):
return self._base_sanitize(text)
def _code_sanitize(self, text):
text = self._base_sanitize(text)
text = re.sub(r'\s', ' ', text)
return text
def _h3_sanitize(self, text):
text = self._base_sanitize(text)
text = re.sub(' \n|\n ', ' ', text)
text = re.sub('\n', ' ', text)
return text
def _sanitize(self, text):
text = self._base_sanitize(text)
text = re.sub(r'<([^>]+)>', r'\\fI\1\\fR', text)
text = re.sub(r' +', ' ', text)
text = re.sub('\n', ' ', text)
return text
def __write_cmd(self, dat):
print('.', dat, sep='\n', file=self.f)
pass
def __write_raw(self, dat):
print(dat, sep='', end='', file=self.f)
pass
def __write_raw(self, dat):
print(dat, sep='', end='', file=self.f)
pass
def load_yml_file(fn):
with open(fn) as f:
return yaml.safe_load(f)
with open(fn) as f:
return yaml.safe_load(f)
def dedent_body(body):
lines = [re.sub(r'^ (\S)', r'\1', l) for l in body.split('\n')]
return '\n'.join(lines)
lines = [re.sub(r'^ (\S)', r'\1', l) for l in body.split('\n')]
return '\n'.join(lines)
def convert_manual_to_markdown():
f = StringIO()
manual = load_yml_file("content/manual/manual.yml")
f.write(manual.get('manpage_intro', '\n'))
f.write(dedent_body(manual.get('body', '\n')))
for section in manual.get('sections', []):
f.write('## {}\n'.format(section.get('title', '').upper()))
f.write(dedent_body(section.get('body', '\n')))
f.write('\n')
for entry in section.get('entries', []):
f.write('### {}\n'.format(entry.get('title', '')))
f.write(dedent_body(entry.get('body', '\n')))
f.write('\n')
if entry.get('examples') is not None:
f.write("~~~~\n")
first = True
for example in entry.get('examples'):
if not first:
f = StringIO()
manual = load_yml_file("content/manual/manual.yml")
f.write(manual.get('manpage_intro', '\n'))
f.write(dedent_body(manual.get('body', '\n')))
for section in manual.get('sections', []):
f.write('## {}\n'.format(section.get('title', '').upper()))
f.write(dedent_body(section.get('body', '\n')))
f.write('\n')
for entry in section.get('entries', []):
f.write('### {}\n'.format(entry.get('title', '')))
f.write(dedent_body(entry.get('body', '\n')))
f.write('\n')
f.write("jq '{}'\n".format(example.get('program', '')))
f.write(" {}\n".format(example.get('input', '')))
output = [str(x) for x in example.get('output', [])]
f.write("=> {}\n".format(', '.join(output)))
first = False
f.write("~~~~\n")
f.write('\n')
f.write(manual.get('manpage_epilogue', ''))
return f.getvalue()
if entry.get('examples') is not None:
f.write("~~~~\n")
first = True
for example in entry.get('examples'):
if not first:
f.write('\n')
f.write("jq '{}'\n".format(example.get('program', '')))
f.write(" {}\n".format(example.get('input', '')))
output = [str(x) for x in example.get('output', [])]
f.write("=> {}\n".format(', '.join(output)))
first = False
f.write("~~~~\n")
f.write('\n')
f.write(manual.get('manpage_epilogue', ''))
return f.getvalue()
# Convert manual.yml to our special markdown format
markdown_data = convert_manual_to_markdown()
# Convert markdown to html
html_data = markdown.markdown(markdown_data, extensions=[EscapeHtml(), 'fenced_code'])
html_data = markdown.markdown(markdown_data,
extensions=[EscapeHtml(), 'fenced_code'])
# Parse the html into a tree so we can walk it
tr = etree.HTML(html_data, etree.HTMLParser())

View File

@@ -2,12 +2,12 @@
import yaml
with open("content/manual/manual.yml") as f:
manual = yaml.safe_load(f)
for section in manual.get('sections', []):
for entry in section.get('entries', []):
for example in entry.get('examples', []):
print(example.get('program', '').replace('\n', ' '))
print(example.get('input', ''))
for s in example.get('output', []):
print(s)
print('')
manual = yaml.safe_load(f)
for section in manual.get('sections', []):
for entry in section.get('entries', []):
for example in entry.get('examples', []):
print(example.get('program', '').replace('\n', ' '))
print(example.get('input', ''))
for s in example.get('output', []):
print(s)
print('')

View File

@@ -10,13 +10,15 @@ import shutil
import yaml
env = Environment(
loader=FileSystemLoader('templates'),
autoescape=select_autoescape(['html.j2']),
loader=FileSystemLoader('templates'),
autoescape=select_autoescape(['html.j2']),
)
def load_yml_file(fn):
with open(fn) as f:
return yaml.safe_load(f)
with open(fn) as f:
return yaml.safe_load(f)
env.filters['search_id'] = lambda input: input.replace(r'`', '')
env.filters['section_id'] = lambda input: re.sub(r"[^a-zA-Z0-9_]", '', input)
@@ -24,47 +26,52 @@ env.filters['entry_id'] = lambda input: re.sub(r"[ `]", '', input)
env.filters['markdownify'] = lambda input: Markup(markdown(input))
env.filters['no_paragraph'] = lambda input: Markup(re.sub(r"</?p>", '', input))
env.globals['unique_id'] = contextfunction(lambda ctx: str(next(ctx['unique_ctr'])))
env.globals['unique_id'] = contextfunction(
lambda ctx: str(next(ctx['unique_ctr'])))
env.globals.update(load_yml_file('site.yml'))
env.globals['navigation'] = ['tutorial', 'download', 'manual']
def generate_file(env, fname='content/1.tutorial/default.yml'):
path, base = os.path.split(fname)
path = os.path.relpath(path, 'content')
if path == '.':
path = ''
slug = 'index'
permalink = ''
else:
slug = os.path.basename(path)
permalink = path + '/'
path, base = os.path.split(fname)
path = os.path.relpath(path, 'content')
if path == '.':
path = ''
slug = 'index'
permalink = ''
else:
slug = os.path.basename(path)
permalink = path + '/'
output_dir = os.path.join('output', path)
output_path = os.path.join(output_dir, 'index.html')
output_dir = os.path.join('output', path)
output_path = os.path.join(output_dir, 'index.html')
template_name = re.sub(r".yml$", '.html.j2', base)
template_name = re.sub(r".yml$", '.html.j2', base)
ctx = load_yml_file(fname)
ctx.update(unique_ctr=itertools.count(1), permalink=permalink, slug=slug, navitem=path)
os.makedirs(output_dir, exist_ok=True)
env.get_template(template_name).stream(ctx).dump(output_path, encoding='utf-8')
ctx = load_yml_file(fname)
ctx.update(unique_ctr=itertools.count(1),
permalink=permalink,
slug=slug,
navitem=path)
os.makedirs(output_dir, exist_ok=True)
env.get_template(template_name).stream(ctx).dump(output_path,
encoding='utf-8')
def copy_public_files(root=''):
for f in os.scandir(os.path.join('public', root)):
src = os.path.join(root, f.name)
dst = os.path.join('output', src)
if f.is_dir():
os.makedirs(dst, exist_ok=True)
copy_public_files(src)
else:
shutil.copyfile(f.path, dst)
for f in os.scandir(os.path.join('public', root)):
src = os.path.join(root, f.name)
dst = os.path.join('output', src)
if f.is_dir():
os.makedirs(dst, exist_ok=True)
copy_public_files(src)
else:
shutil.copyfile(f.path, dst)
copy_public_files()
for fn in glob.glob('content/**/*.yml', recursive=True):
generate_file(env, fn)
generate_file(env, fn)