1
0
mirror of https://github.com/becarpenter/book6.git synced 2024-05-07 02:54:53 +00:00
Brian E Carpenter e2a37499ab Update utilities for command line use
All utilities now allow a directory name if invoked by a shell command
2024-04-28 16:21:51 +12:00

714 lines
26 KiB
Python

#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""Reconcile book6 chapters with contents, and set up inter-section
and inter-chapter links as far as possible."""
# Version: 2022-09-18 - original
# Version: 2022-09-26 - added {{{ }}} citations
# Version: 2022-10-05 - fencepost error when adding section to contents
# Version: 2022-10-06 - added citation expansion for chapter base file
# Version: 2022-11-09 - allow {{ }} as well as {{{ }}}
# - added citation of I-D. or draft-
# Version: 2022-11-15 - check that cited references exist (partial)
# Version: 2022-11-16 - improved reference checks (but still partial)
# Version: 2022-11-18 - small oversight in reference check
# Version: 2022-11-19 - cosmetic
# Version: 2022-11-20 - now checks I-D, BCP and STD refs
# Version: 2022-11-22 - fix oversights/nits in contents updating
# Version: 2022-11-27 - {{ }} now puts [ ] round citation
# - {{{ }}} does not put [ ]
# - fix missing newline when adding new section
# Version: 2023-01-10 - fix bug when adding new chapter name to Contents.md
# - enormous simplification of Contents creation
# Version: 2023-05-20 - skip on-line check for RFC bibliography
# Version: 2023-07-19 - apply mdformat to changed files
# - add global mdformat option
# - add mitigations for SSL certs for URL checking
# Version: 2023-08-03 - correctly ignore ``` blocks
# Version: 2023-08-10 - changed to use RFC index for existence checking
# Version: 2024-01-01 - changed default text for empty sections
# Version: 2024-04-12 - improved optics of RFC citations
# Version: 2024-04-28 - handle directory on command line
########################################################
# Copyright (C) 2022-2024 Brian E. Carpenter.
# All rights reserved.
#
# Redistribution and use in source and binary forms, with
# or without modification, are permitted provided that the
# following conditions are met:
#
# 1. Redistributions of source code must retain the above
# copyright notice, this list of conditions and the following
# disclaimer.
#
# 2. Redistributions in binary form must reproduce the above
# copyright notice, this list of conditions and the following
# disclaimer in the documentation and/or other materials
# provided with the distribution.
#
# 3. Neither the name of the copyright holder nor the names of
# its contributors may be used to endorse or promote products
# derived from this software without specific prior written
# permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS
# AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED
# WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
# WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
# PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL
# THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY
# DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
# CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
# USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
# HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER
# IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
# NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
# USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
# POSSIBILITY OF SUCH DAMAGE.
#
########################################################
from tkinter import Tk
from tkinter.filedialog import askdirectory
from tkinter.messagebox import askokcancel, askyesno, showinfo
import time
import sys
import os
import urllib.request
import ssl
import certifi
import requests
try:
import mdformat
formatter = True
except:
formatter = False
def show(msg):
"""Show a message"""
global T, cmd_line
if cmd_line:
print(msg)
else:
showinfo(title=T, message = msg)
def logit(msg):
"""Add a message to the log file"""
global flog, printing
flog.write(msg+"\n")
if printing:
print(msg)
def logitw(msg):
"""Add a warning message to the log file"""
global warnings
logit("WARNING: "+msg)
warnings += 1
def dprint(*msg):
""" Diagnostic print """
global printing
if printing:
print(*msg)
def crash(msg):
"""Log and crash"""
printing = True
logit("CRASH "+msg)
flog.close()
exit()
def rf(f):
"""Return a file as a list of strings"""
file = open(f, "r",encoding='utf-8', errors='replace')
l = file.readlines()
file.close()
#ensure last line has a newline
if l[-1][-1] != "\n":
l[-1] += "\n"
return l
def wf(f,l, mdf = True):
"""Write list of strings to file"""
global written
file = open(f, "w",encoding='utf-8')
for line in l:
file.write(line)
file.close()
logit("'"+f+"' written")
if mdf and formatter and f.endswith(".md"):
mdformat.file(f, options={"wrap":72})
logit("'"+f+"' md formatted")
written +=1
def uncase(l):
"""Return lower case version of a list of strings"""
u = []
for s in l:
u.append(s.lower())
return u
def make_basenames():
"""Make or refresh base names"""
global base_names, base
base_names = []
for bline in base:
if len(bline) < 4:
continue
bline = bline.strip("\n")
if bline.startswith("## ["):
# existing section reference
sname,_ = bline.split("[", maxsplit = 1)[1].split("]", maxsplit = 1)
base_names.append(sname)
elif bline.startswith("##") and not "###" in bline:
#possible section
try:
_,sname = bline.split(" ", maxsplit=1)
except:
continue
#treat as new section (will create file later)
base_names.append(sname)
dprint("Base names: ", base_names)
def link_text(prev, nxt, chapter):
"""Construct link for end of a section"""
part1 = ""
part2 = ""
if prev:
part1 = " [<ins>Previous</ins>]("+prev.replace(" ","%20")+".md)"
if nxt:
part2 = " [<ins>Next</ins>]("+nxt.replace(" ","%20")+".md)"
return "###"+part1+part2+" [<ins>Chapter Contents</ins>]("+chapter.replace(" ","%20")+".md)"
link_warn = "<!-- Link lines generated automatically; do not delete -->\n"
def url_ok(url):
"""Check if a URL is OK"""
global headers, context
request = urllib.request.Request(url, headers=headers)
try:
response = urllib.request.urlopen(request, context=context, timeout=30).getcode()
except Exception as E:
#logitw(url+": "+str(E))
return False #URL doesn't work
return response==200
def rfc_ok(s):
"""Check if an RFC etc. is real"""
global rfcs_checkable
if not rfcs_checkable:
return True #because we can't check on line right now
dprint("Checking", s)
if s[:3] == "BCP":
found = [i for i in whole if "<bcp-entry><doc-id>BCP"+s[3:].zfill(4)+"</doc-id>" in i]
#print(found)
return(bool(found))
elif s[:3] == "STD":
found = [i for i in whole if "<std-entry><doc-id>STD"+s[3:].zfill(4)+"</doc-id>" in i]
#print(found)
return(bool(found))
elif s[:3] == "RFC":
found = [i for i in whole if "<rfc-entry><doc-id>RFC"+s[3:].zfill(4)+"</doc-id>" in i]
#print(found)
return(bool(found))
else:
return(False) #invalid call
def draft_ok(s):
"""Check if a draft is real"""
global drafts_checkable
if not drafts_checkable:
return True #because we can't check on line right now
dprint("Checking", s)
#remove revision number if present
if s[-3] == '-' and s[-2].isdigit() and s[-1].isdigit():
s = s[:-3]
url = 'https://bib.ietf.org/public/rfc/bibxml3/reference.I-D.'+s+'.xml'
return url_ok(url)
def file_ok(fn):
"""Check if a local file is OK"""
if fn.startswith("../"):
fn = fn.replace("../","")
fn = fn.replace("%20"," ")
return os.path.exists(fn)
def expand_cites():
"""Look for kramdown-style citations and expand them"""
global section, contents, file_names, topic_file
schange = False
inlit = False
for i in range(len(section)):
lchange = False
line = section[i]
newcite = False
if not inlit and line.startswith("```"):
inlit = True #start of literal text - ignore
continue
if inlit:
if line.startswith("```"):
inlit = False #end of literal text - stop ignoring
continue
try:
#convert {{ }} to \[{{ }}\]
line = line.replace("{{{","{?x{").replace("}}}","}?y}")
line = line.replace("{{","\[{{").replace("}}","}}\]")
line = line.replace("{?x{","{{").replace("}?y}","}}")
if line.count("{{") != line.count("}}"):
logitw("Malformed reference in "+topic_file)
while "{{" in line and "}}" in line:
#dprint("Citation in:", line)
#found an expandable citation
head, body = line.split("{{", maxsplit=1)
bracketed = head.endswith("\[")
newcite = True
cite, tail = body.split("}}", maxsplit=1)
if cite.startswith("RFC") or cite.startswith("BCP") or cite.startswith("STD"):
if topic_file != "RFC bibliography":
if not rfc_ok(cite):
logitw(cite+" not found on line")
cite = "["+cite+"](https://www.rfc-editor.org/info/"+cite.lower()+")"
if not bracketed:
#citation in noun form
cite = cite.replace("RFC", "RFC ").replace("BCP", "BCP ").replace("STD", "STD ")
line = head + cite + tail
lchange = True
elif cite.startswith("I-D."):
draft_name = cite[4:]
cite = "["+cite+"](https://datatracker.ietf.org/doc/draft-"+draft_name+"/)"
if not draft_ok(draft_name):
logitw(draft_name+" not found on line")
line = head + cite + tail
lchange = True
elif cite.startswith("draft-"):
draft_name = cite[6:]
if not draft_ok(draft_name):
logitw(cite+" not found on line")
cite = "["+cite+"](https://datatracker.ietf.org/doc/"+cite+"/)"
line = head + cite + tail
lchange = True
elif cite[0].isdigit():
#print("Found chapter?", cite)
found_c = False
#extract chapter number
if ". " in cite:
cnum, sname = cite.split(". ", maxsplit=1)
#derive chapter name
for cline in contents:
if "["+cnum+"." in cline:
chap = cline.split("(")[1].split("/")[0]
fn = "../"+chap+"/"+sname.replace(" ","%20")+".md"
if not file_ok(fn):
logitw('"'+cite+'" not found')
cite = "["+cite+"]("+fn+")"
line = head + cite + tail
lchange = True
found_c = True
break
if not found_c:
#Bogus chapter number
line = head + "[" + cite + "](TBD)" + tail
lchange = True
logitw('"'+cite+'" reference could not be resolved.')
else:
#maybe it's a section name
#print("Found section?", cite)
if cite in file_names:
cite = "["+cite+"]("+cite.replace(" ","%20")+".md)"
line = head + cite + tail
lchange = True
else:
#print("Found nothing")
line = head + "[" + cite + "](TBD)" + tail
lchange = True
logitw('"'+cite+'" reference could not be resolved.')
except:
#malformed line, do nothing
pass
#string bracketed citations together
if newcite and ")\]" in line:
line = line.replace(")\]\[", "), ")
line = line.replace(")\] \[", "), ")
line = line.replace(")\], \[", "), ")
line = line.replace(")\],\[", "), ")
lchange = True
if lchange:
section[i] = line
schange = True
return schange
######### Startup
#Define some globals
printing = False # True for extra diagnostic prints
base = [] # the base file for each chapter
base_names = [] # the section names extracted from the base file
warnings = 0 # counts warnings in the log file
written = 0 # counts files written
default_text = "If you know what should be written here, please write it! [How to contribute.](https://github.com/becarpenter/book6/blob/main/1.%20Introduction%20and%20Foreword/How%20to%20contribute.md#how-to-contribute)"
#Horrible hack to avoid spurious 403 errors on redirected URLs
# - we pretend to be a browser. Thank you StackOverflow!
headers = {}
_s = 'Mozilla/5.0 (X11; Linux i686) AppleWebKit/537.17 (KHTML, like Gecko) Chrome/24.0.1312.27 Safari/537.17'
headers['User-Agent'] = _s
#Ensure certificates available. Again, thank you StackOverflow!
#print("CA file", certifi.where())
context = ssl.create_default_context(cafile=certifi.where())
#Has the user supplied a directory on the command line?
cmd_line = False
if len(sys.argv) > 1:
#user provided directory name?
if os.path.isdir(sys.argv[1]):
#assume user has provided directory
#and set all options to defaults
os.chdir(sys.argv[1])
cmd_line = True
#Announce
if not cmd_line:
Tk().withdraw() # we don't want a full GUI
T = "Book reconciler and link maker."
printing = askyesno(title=T,
message = "Diagnostic printing?")
os.chdir(askdirectory(title = "Select main book directory"))
#Open log file
flog = open("makeBook.log", "w",encoding='utf-8')
logit("makeBook run at "
+time.strftime("%Y-%m-%d %H:%M:%S UTC%z",time.localtime()))
logit("Running in directory "+ os.getcwd())
if not formatter:
logitw("No markdown formatting (mdformat not imported)")
else:
if cmd_line:
formatting = False
else:
formatting = askyesno(title=T,
message = "Rarely needed option!\nRun md formatter on all files?",
default='no')
if formatting:
logit("User requested mdformat on all files.")
show("Will read in current contents and RFC index.\nTouch no files until done!")
#Can we check RFCs?
fp = "rfc-index.xml"
rfcs_checkable = True
if (not os.path.exists(fp)) or (time.time()-os.path.getmtime(fp) > 60*60*24*30):
#need fresh copy of index
try:
if cmd_line or askyesno(title=T, message = "OK to download RFC index?\n(15 MB file)"):
response = requests.get("https://www.rfc-editor.org/rfc/rfc-index.xml")
open(fp, "wb").write(response.content)
logit("Downloaded and cached RFC index")
else:
rfcs_checkable = False
except Exception as E:
logitw("Cannot get RFC index: "+str(E))
rfcs_checkable = False
if rfcs_checkable:
whole = rf(fp)
for i in range(len(whole)):
l = whole[i]
# hack to make subsequent search more efficient
if "<bcp-entry>" in l or "<rfc-entry>" in l or "<std-entry>" in l:
whole[i] = l.strip() + whole[i+1].strip() + "\n"
whole[i+1] = "\n"
else:
logitw("Cannot check RFC existence on-line")
drafts_checkable = url_ok("https://bib.ietf.org")
if not drafts_checkable:
logitw("Cannot check drafts' existence on-line")
### For testing on-line existence checks
##print("RFC8200:",rfc_ok("RFC8200"))
##print("RFC711:",rfc_ok("RFC711"))
##print("RFC9999:",rfc_ok("RFC9999"))
##print("RFC12345:",rfc_ok("RFC12345"))
##print("BCP97:",rfc_ok("BCP97"))
##print("BCP9876:",rfc_ok("BCP9876"))
##print("STD24:",rfc_ok("STD24"))
##print("STD9875:",rfc_ok("STD9875"))
######### Read previous contents
contents = rf("Contents.md")
######### Scan contents and decorate any plain chapter headings
#Get rid of blank lines in the working copy
contents[:] = (l for l in contents if l != "\n")
for i in range(len(contents)):
l = contents[i]
if l[0].isdigit():
# Found a plain chapter title - change to link format
l = l[:-1] #remove newline
try:
_, _ = l.split(" ", maxsplit = 1)
except:
if not askokcancel(title=T,
message = "Suspect chapter title: "+l+"\nOK to continue?"):
crash(l+": bad chapter title, abandoned make")
url_frag = l.replace(" ","%20")
l = "["+l+"]("+url_frag+"/"+url_frag+".md)\n"
contents[i] = l
######### Scan contents and create any missing directories,
######### build chapter list, extract sections lists
chapters = []
contentx = -1 # Note that contents may expand or contract
while contentx < len(contents)-1: # dynamically, so we control the loop count
contentx += 1 # explicitly as we go.
cline = contents[contentx]
if cline[0] == "[" and cline[1].isdigit():
# Found a decorated chapter title - extract directory name
dname = cline.split("(")[1].split("/")[0].replace("%20"," ")
chapters.append(dname)
#Need to create directory?
if not os.path.isdir(dname):
os.mkdir(dname) #create empty directory
logit("Created directory "+dname)
#create base file
base = []
base.append("# "+dname+"\n\n")
base.append("General introduction to this chapter.\n\n")
base.append(default_text+"\n\n")
base.append("<!-- ## Name (add plain section names like that) -->\n\n")
base.append(link_warn)
base.append("### [<ins>Back to main Contents</ins>](../Contents.md)\n")
wf(dname+"/"+dname+".md", base)
else:
# read the base file
base = rf(dname+"/"+dname+".md")
logit("Processing '"+dname+"'")
base_changed = False
#Does the base end with the contents link?
if not "### [<ins>Back to main" in base[-1]:
base.append(link_warn)
base.append("### [<ins>Back to main Contents</ins>](../Contents.md)\n")
base_changed = True
#extract section names from base file
make_basenames()
#extract section names for existing files
file_names = []
for fname in os.listdir(dname):
if os.path.isfile(os.path.join(dname, fname)):
if ".md" in fname and fname[-3:] == ".md" \
and fname[:-3].lower() != dname.lower():
file_names.append(fname[:-3])
dprint("Files", file_names)
#replace section names in Contents.md
#(it doesn't matter whether they've changed, the list will
# end up current)
#N.B. loop within loop on contents list
contentx +=1
while contentx < len(contents):
cline = contents[contentx]
if contents[contentx].startswith("* "):
#found a section name to remove
del contents[contentx]
else:
break
#old sections have gone, contentx points where the
#new sections belong
for sname in base_names:
contents[contentx:contentx] = ["* "+sname+"\n"]
contentx += 1
contentx -= 1 #so that the outer loop search doesn't skip a line
#Maybe update base_names
if base_changed:
make_basenames()
#Make uncased versions for comparisons
u_base_names = uncase(base_names)
u_file_names = uncase(file_names)
if set(base_names) != set(file_names):
#reconciliation needed
logit("Reconciling base and files for '"+dname+"'")
#Create a dictionary in case of file-name case discrepancies
fndict = {}
#Look for discrepant or missing filenames
for topic in file_names:
if (not topic in base_names) and topic.lower() in u_base_names:
#we have a file-name case discrepancy
logitw("File-name case discrepancy for '"+dname+"/"+topic+"'")
fndict[topic.lower()] = topic
elif not topic in base_names:
#found a new topic
logit("New section '"+topic+"' added to base '"+dname+"'")
new_sec = "\n## ["+topic+"]("+topic.replace(" ","%20")+".md)\n"
for bx in range(len(base)):
if "### [<ins>Back" in base[bx]:
base[bx-1:bx-1] = [new_sec]
base_changed = True
break
logitw("Run makeBook again to update main contents with new section")
#Maybe update base_names
if base_changed:
make_basenames()
u_base_names = uncase(base_names)
#Look for runt sections in base and create files
for topic in base_names:
if not topic.lower() in u_file_names:
#There is no file, make it
new_md = []
new_md.append('## '+topic+"\n\n")
new_md.append(default_text+"\n\n")
new_md.append(link_warn)
new_md.append(link_text("PREVIOUS","NEXT",dname))
wf(dname+"/"+topic+".md", new_md)
#Add link to file in base
for bx in range(len(base)):
if "## "+topic in base[bx]:
base[bx] = "## ["+topic+"]("+topic.replace(" ","%20")+".md)\n"
base_changed = True
break
#Add file name
file_names.append(topic)
u_file_names = uncase(file_names)
if base_changed or formatting :
wf(dname+"/"+dname+".md", base)
#Now fixup link lines in section files. The only safe way
#is to read them all and write back if fixed.
#Assertion: base names and file names now match except for any case discrepancies
if set(u_base_names) != set(u_file_names):
dprint(dname, "Base names", base_names)
dprint(dname, "File names", file_names)
crash("Fatal base and file names mismatch in '"+dname+"'")
#The sections are by definition in the order shown in the chapter base
#Make a list of file names sorted like the base names
#(Necessary because of possible case discrepancies)
sorted_file_names = []
for topic in base_names:
try:
#get actual file name from dictionary
sorted_file_names.append(fndict[topic.lower()])
except:
#not in dictionary, so no case discrepancy
sorted_file_names.append(topic)
#Make the link line for each section
#and update section file if necessary.
#Also expand "kramdown" citations.
for bx in range(len(base_names)):
topic = base_names[bx]
topic_file = sorted_file_names[bx]
#is there a previous topic?
if bx == 0:
previous = None
else:
previous = sorted_file_names[bx-1]
#is there a subsequent topic?
if bx == len(base_names)-1:
nxt = None
else:
nxt = sorted_file_names[bx+1]
link_line = link_text(previous, nxt, dname)
section = rf(dname+"/"+topic_file+".md")
section_changed = expand_cites()
if "### [<ins>" in section[-1]:
#replace existing link line if necessary
if section[-1].strip("\n") != link_line:
section[-1] = link_line
section_changed = True
else:
#add new link line
section.append(link_warn)
section.append(link_line)
section_changed = True
if section_changed or formatting:
wf(dname+"/"+topic_file+".md", section)
#Expand citations for chapter base file itself
section = rf(dname+"/"+dname+".md")
topic_file = dname #used by expand_cites()
if expand_cites() or formatting:
wf(dname+"/"+dname+".md", section)
######### Rewrite contents
#ensure there is a blank line before each link or # title
#and that logo is followed by blank line
for i in range(1,len(contents)):
if contents[i].startswith("[") or contents[i].startswith("#"):
contents[i] = "\n"+contents[i]
elif contents[i].startswith("<img src="):
contents[i] += "\n"
#and write it back
wf("Contents.md", contents, mdf=False)
######### Close log and exit
flog.close()
if warnings:
warn = str(warnings)+" warning(s)\n"
else:
warn = ""
if written:
wrote = str(written)+" file(s) written.\n"
else:
wrote = "Clean run.\n"
show(wrote+warn+"Check makeBook.log.")