#!/usr/bin/env python3 # -*- coding: utf-8 -*- """Reconcile book6 chapters with contents, and set up inter-section and inter-chapter links as far as possible.""" # Version: 2022-09-18 - original # Version: 2022-09-26 - added {{{ }}} citations # Version: 2022-10-05 - fencepost error when adding section to contents # Version: 2022-10-06 - added citation expansion for chapter base file # Version: 2022-11-09 - allow {{ }} as well as {{{ }}} # - added citation of I-D. or draft- # Version: 2022-11-15 - check that cited references exist (partial) # Version: 2022-11-16 - improved reference checks (but still partial) # Version: 2022-11-18 - small oversight in reference check # Version: 2022-11-19 - cosmetic # Version: 2022-11-20 - now checks I-D, BCP and STD refs # Version: 2022-11-22 - fix oversights/nits in contents updating # Version: 2022-11-27 - {{ }} now puts [ ] round citation # - {{{ }}} does not put [ ] # - fix missing newline when adding new section # Version: 2023-01-10 - fix bug when adding new chapter name to Contents.md # - enormous simplification of Contents creation # Version: 2023-05-20 - skip on-line check for RFC bibliography # Version: 2023-07-19 - apply mdformat to changed files # - add global mdformat option # - add mitigations for SSL certs for URL checking # Version: 2023-08-03 - correctly ignore ``` blocks # Version: 2023-08-10 - changed to use RFC index for existence checking # Version: 2024-01-01 - changed default text for empty sections ######################################################## # Copyright (C) 2022-2024 Brian E. Carpenter. # All rights reserved. # # Redistribution and use in source and binary forms, with # or without modification, are permitted provided that the # following conditions are met: # # 1. Redistributions of source code must retain the above # copyright notice, this list of conditions and the following # disclaimer. # # 2. Redistributions in binary form must reproduce the above # copyright notice, this list of conditions and the following # disclaimer in the documentation and/or other materials # provided with the distribution. # # 3. Neither the name of the copyright holder nor the names of # its contributors may be used to endorse or promote products # derived from this software without specific prior written # permission. # # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS # AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED # WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED # WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A # PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL # THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY # DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR # CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, # PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF # USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) # HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER # IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING # NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE # USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE # POSSIBILITY OF SUCH DAMAGE. # ######################################################## from tkinter import Tk from tkinter.filedialog import askdirectory from tkinter.messagebox import askokcancel, askyesno, showinfo import time import os import urllib.request import ssl import certifi import requests try: import mdformat formatter = True except: formatter = False def logit(msg): """Add a message to the log file""" global flog, printing flog.write(msg+"\n") if printing: print(msg) def logitw(msg): """Add a warning message to the log file""" global warnings logit("WARNING: "+msg) warnings += 1 def dprint(*msg): """ Diagnostic print """ global printing if printing: print(*msg) def crash(msg): """Log and crash""" printing = True logit("CRASH "+msg) flog.close() exit() def rf(f): """Return a file as a list of strings""" file = open(f, "r",encoding='utf-8', errors='replace') l = file.readlines() file.close() #ensure last line has a newline if l[-1][-1] != "\n": l[-1] += "\n" return l def wf(f,l, mdf = True): """Write list of strings to file""" global written file = open(f, "w",encoding='utf-8') for line in l: file.write(line) file.close() logit("'"+f+"' written") if mdf and formatter and f.endswith(".md"): mdformat.file(f, options={"wrap":72}) logit("'"+f+"' md formatted") written +=1 def uncase(l): """Return lower case version of a list of strings""" u = [] for s in l: u.append(s.lower()) return u def make_basenames(): """Make or refresh base names""" global base_names, base base_names = [] for bline in base: if len(bline) < 4: continue bline = bline.strip("\n") if bline.startswith("## ["): # existing section reference sname,_ = bline.split("[", maxsplit = 1)[1].split("]", maxsplit = 1) base_names.append(sname) elif bline.startswith("##") and not "###" in bline: #possible section try: _,sname = bline.split(" ", maxsplit=1) except: continue #treat as new section (will create file later) base_names.append(sname) dprint("Base names: ", base_names) def link_text(prev, nxt, chapter): """Construct link for end of a section""" part1 = "" part2 = "" if prev: part1 = " [Previous]("+prev.replace(" ","%20")+".md)" if nxt: part2 = " [Next]("+nxt.replace(" ","%20")+".md)" return "###"+part1+part2+" [Chapter Contents]("+chapter.replace(" ","%20")+".md)" link_warn = "\n" def url_ok(url): """Check if a URL is OK""" global headers, context request = urllib.request.Request(url, headers=headers) try: response = urllib.request.urlopen(request, context=context, timeout=30).getcode() except Exception as E: #logitw(url+": "+str(E)) return False #URL doesn't work return response==200 def rfc_ok(s): """Check if an RFC etc. is real""" global rfcs_checkable if not rfcs_checkable: return True #because we can't check on line right now dprint("Checking", s) if s[:3] == "BCP": found = [i for i in whole if "BCP"+s[3:].zfill(4)+"" in i] #print(found) return(bool(found)) elif s[:3] == "STD": found = [i for i in whole if "STD"+s[3:].zfill(4)+"" in i] #print(found) return(bool(found)) elif s[:3] == "RFC": found = [i for i in whole if "RFC"+s[3:].zfill(4)+"" in i] #print(found) return(bool(found)) else: return(False) #invalid call def draft_ok(s): """Check if a draft is real""" global drafts_checkable if not drafts_checkable: return True #because we can't check on line right now dprint("Checking", s) #remove revision number if present if s[-3] == '-' and s[-2].isdigit() and s[-1].isdigit(): s = s[:-3] url = 'https://bib.ietf.org/public/rfc/bibxml3/reference.I-D.'+s+'.xml' return url_ok(url) def file_ok(fn): """Check if a local file is OK""" if fn.startswith("../"): fn = fn.replace("../","") fn = fn.replace("%20"," ") return os.path.exists(fn) def expand_cites(): """Look for kramdown-style citations and expand them""" global section, contents, file_names, topic_file schange = False inlit = False for i in range(len(section)): lchange = False line = section[i] if not inlit and line.startswith("```"): inlit = True #start of literal text - ignore continue if inlit: if line.startswith("```"): inlit = False #end of literal text - stop ignoring continue try: #convert {{ }} to \[{{ }}] line = line.replace("{{{","{?x{").replace("}}}","}?y}") line = line.replace("{{","\[{{").replace("}}","}}]") line = line.replace("{?x{","{{").replace("}?y}","}}") if line.count("{{") != line.count("}}"): logitw("Malformed reference in "+topic_file) while "{{" in line and "}}" in line: #dprint("Citation in:", line) #found an expandable citation head, body = line.split("{{", maxsplit=1) cite, tail = body.split("}}", maxsplit=1) if cite.startswith("RFC") or cite.startswith("BCP") or cite.startswith("STD"): if topic_file != "RFC bibliography": if not rfc_ok(cite): logitw(cite+" not found on line") cite = "["+cite+"](https://www.rfc-editor.org/info/"+cite.lower()+")" line = head + cite + tail lchange = True elif cite.startswith("I-D."): draft_name = cite[4:] cite = "["+cite+"](https://datatracker.ietf.org/doc/draft-"+draft_name+"/)" if not draft_ok(draft_name): logitw(draft_name+" not found on line") line = head + cite + tail lchange = True elif cite.startswith("draft-"): draft_name = cite[6:] if not draft_ok(draft_name): logitw(cite+" not found on line") cite = "["+cite+"](https://datatracker.ietf.org/doc/"+cite+"/)" line = head + cite + tail lchange = True elif cite[0].isdigit(): #print("Found chapter?", cite) found_c = False #extract chapter number if ". " in cite: cnum, sname = cite.split(". ", maxsplit=1) #derive chapter name for cline in contents: if "["+cnum+"." in cline: chap = cline.split("(")[1].split("/")[0] fn = "../"+chap+"/"+sname.replace(" ","%20")+".md" if not file_ok(fn): logitw('"'+cite+'" not found') cite = "["+cite+"]("+fn+")" line = head + cite + tail lchange = True found_c = True break if not found_c: #Bogus chapter number line = head + "[" + cite + "](TBD)" + tail lchange = True logitw('"'+cite+'" reference could not be resolved.') else: #maybe it's a section name #print("Found section?", cite) if cite in file_names: cite = "["+cite+"]("+cite.replace(" ","%20")+".md)" line = head + cite + tail lchange = True else: #print("Found nothing") line = head + "[" + cite + "](TBD)" + tail lchange = True logitw('"'+cite+'" reference could not be resolved.') except: #malformed line, do nothing pass if lchange: section[i] = line schange = True return schange ######### Startup #Define some globals printing = False # True for extra diagnostic prints base = [] # the base file for each chapter base_names = [] # the section names extracted from the base file warnings = 0 # counts warnings in the log file written = 0 # counts files written default_text = "If you know what should be written here, please write it! [How to contribute.](https://github.com/becarpenter/book6/blob/main/1.%20Introduction%20and%20Foreword/How%20to%20contribute.md#how-to-contribute)" #Horrible hack to avoid spurious 403 errors on redirected URLs # - we pretend to be a browser. Thank you StackOverflow! headers = {} _s = 'Mozilla/5.0 (X11; Linux i686) AppleWebKit/537.17 (KHTML, like Gecko) Chrome/24.0.1312.27 Safari/537.17' headers['User-Agent'] = _s #Ensure certificates available. Again, thank you StackOverflow! #print("CA file", certifi.where()) context = ssl.create_default_context(cafile=certifi.where()) #Announce Tk().withdraw() # we don't want a full GUI T = "Book reconciler and link maker." printing = askyesno(title=T, message = "Diagnostic printing?") where = askdirectory(title = "Select main book directory") os.chdir(where) #Open log file flog = open("makeBook.log", "w",encoding='utf-8') logit("makeBook run at " +time.strftime("%Y-%m-%d %H:%M:%S UTC%z",time.localtime())) logit("Running in directory "+ os.getcwd()) if not formatter: logitw("No markdown formatting (mdformat not imported)") else: formatting = askyesno(title=T, message = "Rarely needed option!\nRun md formatter on all files?", default='no') if formatting: logit("User requested mdformat on all files.") showinfo(title=T, message = "Will read in current contents and RFC index.\nTouch no files until done!") #Can we check RFCs? fp = "rfc-index.xml" rfcs_checkable = True if (not os.path.exists(fp)) or (time.time()-os.path.getmtime(fp) > 60*60*24*30): #need fresh copy of index try: if askyesno(title=T, message = "OK to download RFC index?\n(15 MB file)"): response = requests.get("https://www.rfc-editor.org/rfc/rfc-index.xml") open(fp, "wb").write(response.content) logit("Downloaded and cached RFC index") else: rfcs_checkable = False except Exception as E: logitw("Cannot get RFC index: "+str(E)) rfcs_checkable = False if rfcs_checkable: whole = rf(fp) for i in range(len(whole)): l = whole[i] # hack to make subsequent search more efficient if "" in l or "" in l or "" in l: whole[i] = l.strip() + whole[i+1].strip() + "\n" whole[i+1] = "\n" else: logitw("Cannot check RFC existence on-line") drafts_checkable = url_ok("https://bib.ietf.org") if not drafts_checkable: logitw("Cannot check drafts' existence on-line") ### For testing on-line existence checks ##print("RFC8200:",rfc_ok("RFC8200")) ##print("RFC711:",rfc_ok("RFC711")) ##print("RFC9999:",rfc_ok("RFC9999")) ##print("RFC12345:",rfc_ok("RFC12345")) ##print("BCP97:",rfc_ok("BCP97")) ##print("BCP9876:",rfc_ok("BCP9876")) ##print("STD24:",rfc_ok("STD24")) ##print("STD9875:",rfc_ok("STD9875")) ######### Read previous contents contents = rf("Contents.md") ######### Scan contents and decorate any plain chapter headings #Get rid of blank lines in the working copy contents[:] = (l for l in contents if l != "\n") for i in range(len(contents)): l = contents[i] if l[0].isdigit(): # Found a plain chapter title - change to link format l = l[:-1] #remove newline try: _, _ = l.split(" ", maxsplit = 1) except: if not askokcancel(title=T, message = "Suspect chapter title: "+l+"\nOK to continue?"): crash(l+": bad chapter title, abandoned make") url_frag = l.replace(" ","%20") l = "["+l+"]("+url_frag+"/"+url_frag+".md)\n" contents[i] = l ######### Scan contents and create any missing directories, ######### build chapter list, extract sections lists chapters = [] contentx = -1 # Note that contents may expand or contract while contentx < len(contents)-1: # dynamically, so we control the loop count contentx += 1 # explicitly as we go. cline = contents[contentx] if cline[0] == "[" and cline[1].isdigit(): # Found a decorated chapter title - extract directory name dname = cline.split("(")[1].split("/")[0].replace("%20"," ") chapters.append(dname) #Need to create directory? if not os.path.isdir(dname): os.mkdir(dname) #create empty directory logit("Created directory "+dname) #create base file base = [] base.append("# "+dname+"\n\n") base.append("General introduction to this chapter.\n\n") base.append(default_text+"\n\n") base.append("\n\n") base.append(link_warn) base.append("### [Back to main Contents](../Contents.md)\n") wf(dname+"/"+dname+".md", base) else: # read the base file base = rf(dname+"/"+dname+".md") logit("Processing '"+dname+"'") base_changed = False #Does the base end with the contents link? if not "### [Back to main" in base[-1]: base.append(link_warn) base.append("### [Back to main Contents](../Contents.md)\n") base_changed = True #extract section names from base file make_basenames() #extract section names for existing files file_names = [] for fname in os.listdir(dname): if os.path.isfile(os.path.join(dname, fname)): if ".md" in fname and fname[-3:] == ".md" \ and fname[:-3].lower() != dname.lower(): file_names.append(fname[:-3]) dprint("Files", file_names) #replace section names in Contents.md #(it doesn't matter whether they've changed, the list will # end up current) #N.B. loop within loop on contents list contentx +=1 while contentx < len(contents): cline = contents[contentx] if contents[contentx].startswith("* "): #found a section name to remove del contents[contentx] else: break #old sections have gone, contentx points where the #new sections belong for sname in base_names: contents[contentx:contentx] = ["* "+sname+"\n"] contentx += 1 contentx -= 1 #so that the outer loop search doesn't skip a line #Maybe update base_names if base_changed: make_basenames() #Make uncased versions for comparisons u_base_names = uncase(base_names) u_file_names = uncase(file_names) if set(base_names) != set(file_names): #reconciliation needed logit("Reconciling base and files for '"+dname+"'") #Create a dictionary in case of file-name case discrepancies fndict = {} #Look for discrepant or missing filenames for topic in file_names: if (not topic in base_names) and topic.lower() in u_base_names: #we have a file-name case discrepancy logitw("File-name case discrepancy for '"+dname+"/"+topic+"'") fndict[topic.lower()] = topic elif not topic in base_names: #found a new topic logit("New section '"+topic+"' added to base '"+dname+"'") new_sec = "\n## ["+topic+"]("+topic.replace(" ","%20")+".md)\n" for bx in range(len(base)): if "### [Back" in base[bx]: base[bx-1:bx-1] = [new_sec] base_changed = True break logitw("Run makeBook again to update main contents with new section") #Maybe update base_names if base_changed: make_basenames() u_base_names = uncase(base_names) #Look for runt sections in base and create files for topic in base_names: if not topic.lower() in u_file_names: #There is no file, make it new_md = [] new_md.append('## '+topic+"\n\n") new_md.append(default_text+"\n\n") new_md.append(link_warn) new_md.append(link_text("PREVIOUS","NEXT",dname)) wf(dname+"/"+topic+".md", new_md) #Add link to file in base for bx in range(len(base)): if "## "+topic in base[bx]: base[bx] = "## ["+topic+"]("+topic.replace(" ","%20")+".md)\n" base_changed = True break #Add file name file_names.append(topic) u_file_names = uncase(file_names) if base_changed or formatting : wf(dname+"/"+dname+".md", base) #Now fixup link lines in section files. The only safe way #is to read them all and write back if fixed. #Assertion: base names and file names now match except for any case discrepancies if set(u_base_names) != set(u_file_names): dprint(dname, "Base names", base_names) dprint(dname, "File names", file_names) crash("Fatal base and file names mismatch in '"+dname+"'") #The sections are by definition in the order shown in the chapter base #Make a list of file names sorted like the base names #(Necessary because of possible case discrepancies) sorted_file_names = [] for topic in base_names: try: #get actual file name from dictionary sorted_file_names.append(fndict[topic.lower()]) except: #not in dictionary, so no case discrepancy sorted_file_names.append(topic) #Make the link line for each section #and update section file if necessary. #Also expand "kramdown" citations. for bx in range(len(base_names)): topic = base_names[bx] topic_file = sorted_file_names[bx] #is there a previous topic? if bx == 0: previous = None else: previous = sorted_file_names[bx-1] #is there a subsequent topic? if bx == len(base_names)-1: nxt = None else: nxt = sorted_file_names[bx+1] link_line = link_text(previous, nxt, dname) section = rf(dname+"/"+topic_file+".md") section_changed = expand_cites() if "### [" in section[-1]: #replace existing link line if necessary if section[-1].strip("\n") != link_line: section[-1] = link_line section_changed = True else: #add new link line section.append(link_warn) section.append(link_line) section_changed = True if section_changed or formatting: wf(dname+"/"+topic_file+".md", section) #Expand citations for chapter base file itself section = rf(dname+"/"+dname+".md") topic_file = dname #used by expand_cites() if expand_cites() or formatting: wf(dname+"/"+dname+".md", section) ######### Rewrite contents #ensure there is a blank line before each link or # title #and that logo is followed by blank line for i in range(1,len(contents)): if contents[i].startswith("[") or contents[i].startswith("#"): contents[i] = "\n"+contents[i] elif contents[i].startswith("