#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""Reconcile book6 chapters with contents, and set up inter-section and inter-chapter links as far as possible."""

# Version: 2022-09-18 - original
# Version: 2022-09-26 - added {{{ }}} citations
# Version: 2022-10-05 - fencepost error when adding section to contents
# Version: 2022-10-06 - added citation expansion for chapter base file
# Version: 2022-11-09 - allow {{ }} as well as {{{ }}}
#                      - added citation of I-D. or draft-
# Version: 2022-11-15 - check that cited references exist (partial)
# Version: 2022-11-16 - improved reference checks (but still partial)
# Version: 2022-11-18 - small oversight in reference check
# Version: 2022-11-19 - cosmetic
# Version: 2022-11-20 - now checks I-D, BCP and STD refs
# Version: 2022-11-22 - fix oversights/nits in contents updating
# Version: 2022-11-27 - {{ }} now puts [ ] round citation
#                      - {{{ }}} does not put [ ]
#                      - fix missing newline when adding new section
# Version: 2023-01-10 - fix bug when adding new chapter name to Contents.md
#                      - enormous simplification of Contents creation
# Version: 2023-05-20 - skip on-line check for RFC bibliography
# Version: 2023-07-19 - apply mdformat to changed files
#                      - add global mdformat option
#                      - add mitigations for SSL certs for URL checking
# Version: 2023-08-03 - correctly ignore ``` blocks
# Version: 2023-08-10 - changed to use RFC index for existence checking
# Version: 2024-01-01 - changed default text for empty sections

######################################################## ignore continue if inlit: if line.startswith("```"): inlit = False #end of literal text - stop ignoring continue try: #convert {{ }} to \[{{ }}] line = line.replace("{{{","{?x{").replace("}}}","}?y}") line = line.replace("{{","\[{{").replace("}}","}}]") line = line.replace("{?x{","{{").replace("}?y}","}}") if line.count("{{") != line.count("}}"): logitw("Malformed reference in "+topic_file) while "{{" in line and "}}" in line: #dprint("Citation in:", line) #found an expandable citation head, body = line.split("{{", maxsplit=1) cite, tail = body.split("}}", maxsplit=1) if cite.startswith("RFC") or cite.startswith("BCP") or cite.startswith("STD"): if topic_file != "RFC bibliography": if not rfc_ok(cite): logitw(cite+" not found on line") cite = "["+cite+"](https://www.rfc-editor.org/info/"+cite.lower()+")" line = head + cite + tail lchange = True elif cite.startswith("I-D."): draft_name = cite[4:] cite = "["+cite+"](https://datatracker.ietf.org/doc/draft-"+draft_name+"/)" if not draft_ok(draft_name): logitw(draft_name+" not found on line") line = head + cite + tail lchange = True elif cite.startswith("draft-"): draft_name = cite[6:] if not draft_ok(draft_name): logitw(cite+" not found on line") cite = "["+cite+"](https://datatracker.ietf.org/doc/"+cite+"/)" line = head + cite + tail lchange = True elif cite[0].isdigit(): #print("Found chapter?", cite) found_c = False #extract chapter number if ". " in cite: cnum, sname = cite.split(". ", maxsplit=1) #derive chapter name for cline in contents: if "["+cnum+"." in cline: chap = cline.split("(")[1].split("/")[0] fn = "../"+chap+"/"+sname.replace(" ","%20")+".md" if not file_ok(fn): logitw('"'+cite+'" not found') cite = "["+cite+"]("+fn+")" line = head + cite + tail lchange = True found_c = True break if not found_c: #Bogus chapter number line = head + "[" + cite + "](TBD)" + tail lchange = True logitw('"'+cite+'" reference could not be resolved.') else: #maybe it's a section name #print("Found section?", cite) if cite in file_names: cite = "["+cite+"]("+cite.replace(" ","%20")+".md)" line = head + cite + tail lchange = True else: #print("Found nothing") line = head + "[" + cite + "](TBD)" + tail lchange = True logitw('"'+cite+'" reference could not be resolved.') except: #malformed line, do nothing pass if lchange: section[i] = line schange = True return schange ######### Startup #Define some globals printing = False # True for extra diagnostic prints base = [] # the base file for each chapter base_names = [] # the section names extracted from the base file warnings = 0 # counts warnings in the log file written = 0 # counts files written default_text = "If you know what should be written here, please write it! [How to contribute.](https://github.com/becarpenter/book6/blob/main/1.%20Introduction%20and%20Foreword/How%20to%20contribute.md#how-to-contribute)" #Horrible hack to avoid spurious 403 errors on redirected URLs # - we pretend to be a browser. Thank you StackOverflow! headers = {} _s = 'Mozilla/5.0 (X11; Linux i686) AppleWebKit/537.17 (KHTML, like Gecko) Chrome/24.0.1312.27 Safari/537.17' headers['User-Agent'] = _s #Ensure certificates available. Again, thank you StackOverflow! #print("CA file", certifi.where()) context = ssl.create_default_context(cafile=certifi.where()) #Announce Tk().withdraw() # we don't want a full GUI T = "Book reconciler and link maker." printing = askyesno(title=T, message = "Diagnostic printing?") where = askdirectory(title = "Select main book directory") os.chdir(where) #Open log file flog = open("makeBook.log", "w",encoding='utf-8') logit("makeBook run at " +time.strftime("%Y-%m-%d %H:%M:%S UTC%z",time.localtime())) logit("Running in directory "+ os.getcwd()) if not formatter: logitw("No markdown formatting (mdformat not imported)") else: formatting = askyesno(title=T, message = "Rarely needed option!\nRun md formatter on all files?", default='no') if formatting: logit("User requested mdformat on all files.") showinfo(title=T, message = "Will read in current contents and RFC index.\nTouch no files until done!") #Can we check RFCs? fp = "rfc-index.xml" rfcs_checkable = True if (not os.path.exists(fp)) or (time.time()-os.path.getmtime(fp) > 60*60*24*30): #need fresh copy of index try: if askyesno(title=T, message = "OK to download RFC index?\n(15 MB file)"): response = requests.get("https://www.rfc-editor.org/rfc/rfc-index.xml") open(fp, "wb").write(response.content) logit("Downloaded and cached RFC index") else: rfcs_checkable = False except Exception as E: logitw("Cannot get RFC index: "+str(E)) rfcs_checkable = False if rfcs_checkable: whole = rf(fp) for i in range(len(whole)): l = whole[i] # hack to make subsequent search more efficient if "" in l or "" in l or "" in l: whole[i] = l.strip() + whole[i+1].strip() + "\n" whole[i+1] = "\n" else: logitw("Cannot check RFC existence on-line") drafts_checkable = url_ok("https://bib.ietf.org") if not drafts_checkable: logitw("Cannot check drafts' existence on-line") ### For testing on-line existence checks ##print("RFC8200:",rfc_ok("RFC8200")) ##print("RFC711:",rfc_ok("RFC711")) ##print("RFC9999:",rfc_ok("RFC9999")) ##print("RFC12345:",rfc_ok("RFC12345")) ##print("BCP97:",rfc_ok("BCP97")) ##print("BCP9876:",rfc_ok("BCP9876")) ##print("STD24:",rfc_ok("STD24")) ##print("STD9875:",rfc_ok("STD9875")) ######### Read previous contents contents = rf("Contents.md") ######### Scan contents and decorate any plain chapter headings #Get rid of blank lines in the working copy contents[:] = (l for l in contents if l != "\n") for i in range(len(contents)): l = contents[i] if l[0].isdigit(): # Found a plain chapter title - change to link format l = l[:-1] #remove newline try: _, _ = l.split(" ", maxsplit = 1) except: if not askokcancel(title=T, message = "Suspect chapter title: "+l+"\nOK to continue?"): crash(l+": bad chapter title, abandoned make") url_frag = l.replace(" ","%20") l = "["+l+"]("+url_frag+"/"+url_frag+".md)\n" contents[i] = l ######### Scan contents and create any missing directories, ######### build chapter list, extract sections lists chapters = [] contentx = -1 # Note that contents may expand or contract while contentx < len(contents)-1: # dynamically, so we control the loop count contentx += 1 # explicitly as we go. cline = contents[contentx] if cline[0] == "[" and cline[1].isdigit(): # Found a decorated chapter title - extract directory name dname = cline.split("(")[1].split("/")[0].replace("%20"," ") chapters.append(dname) #Need to create directory? if not os.path.isdir(dname): os.mkdir(dname) #create empty directory logit("Created directory "+dname) #create base file base = [] base.append("# "+dname+"\n\n") base.append("General introduction to this chapter.\n\n") base.append(default_text+"\n\n") base.append("\n\n") base.append(link_warn) base.append("### [Back to main Contents](../Contents.md)\n") wf(dname+"/"+dname+".md", base) else: # read the base file base = rf(dname+"/"+dname+".md") logit("Processing '"+dname+"'") base_changed = False #Does the base end with the contents link? if not "### [Back to main" in base[-1]: base.append(link_warn) base.append("### [Back to main Contents](../Contents.md)\n") base_changed = True #extract section names from base file make_basenames() #extract section names for existing files file_names = [] for fname in os.listdir(dname): if os.path.isfile(os.path.join(dname, fname)): if ".md" in fname and fname[-3:] == ".md" \ and fname[:-3].lower() != dname.lower(): file_names.append(fname[:-3]) dprint("Files", file_names) #replace section names in Contents.md #(it doesn't matter whether they've changed, the list will # end up current) #N.B. loop within loop on contents list contentx +=1 while contentx < len(contents): cline = contents[contentx] if contents[contentx].startswith("* "): #found a section name to remove del contents[contentx] else: break #old sections have gone, contentx points where the #new sections belong for sname in base_names: contents[contentx:contentx] = ["* "+sname+"\n"] contentx += 1 contentx -= 1 #so that the outer loop search doesn't skip a line #Maybe update base_names if base_changed: make_basenames() #Make uncased versions for comparisons u_base_names = uncase(base_names) u_file_names = uncase(file_names) if set(base_names) != set(file_names): #reconciliation needed logit("Reconciling base and files for '"+dname+"'") #Create a dictionary in case of file-name case discrepancies fndict = {} #Look for discrepant or missing filenames for topic in file_names: if (not topic in base_names) and topic.lower() in u_base_names: #we have a file-name case discrepancy logitw("File-name case discrepancy for '"+dname+"/"+topic+"'") fndict[topic.lower()] = topic elif not topic in base_names: #found a new topic logit("New section '"+topic+"' added to base '"+dname+"'") new_sec = "\n## ["+topic+"]("+topic.replace(" ","%20")+".md)\n" for bx in range(len(base)): if "### [Back" in base[bx]: base[bx-1:bx-1] = [new_sec] base_changed = True break logitw("Run makeBook again to update main contents with new section") #Maybe update base_names if base_changed: make_basenames() u_base_names = uncase(base_names) #Look for runt sections in base and create files for topic in base_names: if not topic.lower() in u_file_names: #There is no file, make it new_md = [] new_md.append('## '+topic+"\n\n") new_md.append(default_text+"\n\n") new_md.append(link_warn) new_md.append(link_text("PREVIOUS","NEXT",dname)) wf(dname+"/"+topic+".md", new_md) #Add link to file in base for bx in range(len(base)): if "## "+topic in base[bx]: base[bx] = "## ["+topic+"]("+topic.replace(" ","%20")+".md)\n" base_changed = True break #Add file name file_names.append(topic) u_file_names = uncase(file_names) if base_changed or formatting : wf(dname+"/"+dname+".md", base) #Now fixup link lines in section files. The only safe way #is to read them all and write back if fixed. #Assertion: base names and file names now match except for any case discrepancies if set(u_base_names) != set(u_file_names): dprint(dname, "Base names", base_names) dprint(dname, "File names", file_names) crash("Fatal base and file names mismatch in '"+dname+"'") #The sections are by definition in the order shown in the chapter base #Make a list of file names sorted like the base names #(Necessary because of possible case discrepancies) sorted_file_names = [] for topic in base_names: try: #get actual file name from dictionary sorted_file_names.append(fndict[topic.lower()]) except: #not in dictionary, so no case discrepancy sorted_file_names.append(topic) #Make the link line for each section #and update section file if necessary. #Also expand "kramdown" citations. for bx in range(len(base_names)): topic = base_names[bx] topic_file = sorted_file_names[bx] #is there a previous topic? if bx == 0: previous = None else: previous = sorted_file_names[bx-1] #is there a subsequent topic? if bx == len(base_names)-1: nxt = None else: nxt = sorted_file_names[bx+1] link_line = link_text(previous, nxt, dname) section = rf(dname+"/"+topic_file+".md") section_changed = expand_cites() if "### [" in section[-1]: #replace existing link line if necessary if section[-1].strip("\n") != link_line: section[-1] = link_line section_changed = True else: #add new link line section.append(link_warn) section.append(link_line) section_changed = True if section_changed or formatting: wf(dname+"/"+topic_file+".md", section) #Expand citations for chapter base file itself section = rf(dname+"/"+dname+".md") topic_file = dname #used by expand_cites() if expand_cites() or formatting: wf(dname+"/"+dname+".md", section) ######### Rewrite contents #ensure there is a blank line before each link or # title #and that logo is followed by blank line for i in range(1,len(contents)): if contents[i].startswith("[") or contents[i].startswith("#"): contents[i] = "\n"+contents[i] elif contents[i].startswith("