1
0
mirror of https://github.com/StackExchange/dnscontrol.git synced 2024-05-11 05:55:12 +00:00

Update vendored packages (#326)

* update all vendored packages
This commit is contained in:
Tom Limoncelli
2018-02-27 18:24:11 -05:00
committed by GitHub
parent 54de1ff698
commit 43dc9ac92f
55 changed files with 12608 additions and 13551 deletions

View File

@ -1,713 +0,0 @@
// Copyright 2012 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
// +build ignore
package main
// This program generates table.go and table_test.go based on the authoritative
// public suffix list at https://publicsuffix.org/list/effective_tld_names.dat
//
// The version is derived from
// https://api.github.com/repos/publicsuffix/list/commits?path=public_suffix_list.dat
// and a human-readable form is at
// https://github.com/publicsuffix/list/commits/master/public_suffix_list.dat
//
// To fetch a particular git revision, such as 5c70ccd250, pass
// -url "https://raw.githubusercontent.com/publicsuffix/list/5c70ccd250/public_suffix_list.dat"
// and -version "an explicit version string".
import (
"bufio"
"bytes"
"flag"
"fmt"
"go/format"
"io"
"io/ioutil"
"net/http"
"os"
"regexp"
"sort"
"strings"
"golang.org/x/net/idna"
)
const (
// These sum of these four values must be no greater than 32.
nodesBitsChildren = 10
nodesBitsICANN = 1
nodesBitsTextOffset = 15
nodesBitsTextLength = 6
// These sum of these four values must be no greater than 32.
childrenBitsWildcard = 1
childrenBitsNodeType = 2
childrenBitsHi = 14
childrenBitsLo = 14
)
var (
maxChildren int
maxTextOffset int
maxTextLength int
maxHi uint32
maxLo uint32
)
func max(a, b int) int {
if a < b {
return b
}
return a
}
func u32max(a, b uint32) uint32 {
if a < b {
return b
}
return a
}
const (
nodeTypeNormal = 0
nodeTypeException = 1
nodeTypeParentOnly = 2
numNodeType = 3
)
func nodeTypeStr(n int) string {
switch n {
case nodeTypeNormal:
return "+"
case nodeTypeException:
return "!"
case nodeTypeParentOnly:
return "o"
}
panic("unreachable")
}
const (
defaultURL = "https://publicsuffix.org/list/effective_tld_names.dat"
gitCommitURL = "https://api.github.com/repos/publicsuffix/list/commits?path=public_suffix_list.dat"
)
var (
labelEncoding = map[string]uint32{}
labelsList = []string{}
labelsMap = map[string]bool{}
rules = []string{}
// validSuffixRE is used to check that the entries in the public suffix
// list are in canonical form (after Punycode encoding). Specifically,
// capital letters are not allowed.
validSuffixRE = regexp.MustCompile(`^[a-z0-9_\!\*\-\.]+$`)
shaRE = regexp.MustCompile(`"sha":"([^"]+)"`)
dateRE = regexp.MustCompile(`"committer":{[^{]+"date":"([^"]+)"`)
comments = flag.Bool("comments", false, "generate table.go comments, for debugging")
subset = flag.Bool("subset", false, "generate only a subset of the full table, for debugging")
url = flag.String("url", defaultURL, "URL of the publicsuffix.org list. If empty, stdin is read instead")
v = flag.Bool("v", false, "verbose output (to stderr)")
version = flag.String("version", "", "the effective_tld_names.dat version")
)
func main() {
if err := main1(); err != nil {
fmt.Fprintln(os.Stderr, err)
os.Exit(1)
}
}
func main1() error {
flag.Parse()
if nodesBitsTextLength+nodesBitsTextOffset+nodesBitsICANN+nodesBitsChildren > 32 {
return fmt.Errorf("not enough bits to encode the nodes table")
}
if childrenBitsLo+childrenBitsHi+childrenBitsNodeType+childrenBitsWildcard > 32 {
return fmt.Errorf("not enough bits to encode the children table")
}
if *version == "" {
if *url != defaultURL {
return fmt.Errorf("-version was not specified, and the -url is not the default one")
}
sha, date, err := gitCommit()
if err != nil {
return err
}
*version = fmt.Sprintf("publicsuffix.org's public_suffix_list.dat, git revision %s (%s)", sha, date)
}
var r io.Reader = os.Stdin
if *url != "" {
res, err := http.Get(*url)
if err != nil {
return err
}
if res.StatusCode != http.StatusOK {
return fmt.Errorf("bad GET status for %s: %d", *url, res.Status)
}
r = res.Body
defer res.Body.Close()
}
var root node
icann := false
br := bufio.NewReader(r)
for {
s, err := br.ReadString('\n')
if err != nil {
if err == io.EOF {
break
}
return err
}
s = strings.TrimSpace(s)
if strings.Contains(s, "BEGIN ICANN DOMAINS") {
icann = true
continue
}
if strings.Contains(s, "END ICANN DOMAINS") {
icann = false
continue
}
if s == "" || strings.HasPrefix(s, "//") {
continue
}
s, err = idna.ToASCII(s)
if err != nil {
return err
}
if !validSuffixRE.MatchString(s) {
return fmt.Errorf("bad publicsuffix.org list data: %q", s)
}
if *subset {
switch {
case s == "ac.jp" || strings.HasSuffix(s, ".ac.jp"):
case s == "ak.us" || strings.HasSuffix(s, ".ak.us"):
case s == "ao" || strings.HasSuffix(s, ".ao"):
case s == "ar" || strings.HasSuffix(s, ".ar"):
case s == "arpa" || strings.HasSuffix(s, ".arpa"):
case s == "cy" || strings.HasSuffix(s, ".cy"):
case s == "dyndns.org" || strings.HasSuffix(s, ".dyndns.org"):
case s == "jp":
case s == "kobe.jp" || strings.HasSuffix(s, ".kobe.jp"):
case s == "kyoto.jp" || strings.HasSuffix(s, ".kyoto.jp"):
case s == "om" || strings.HasSuffix(s, ".om"):
case s == "uk" || strings.HasSuffix(s, ".uk"):
case s == "uk.com" || strings.HasSuffix(s, ".uk.com"):
case s == "tw" || strings.HasSuffix(s, ".tw"):
case s == "zw" || strings.HasSuffix(s, ".zw"):
case s == "xn--p1ai" || strings.HasSuffix(s, ".xn--p1ai"):
// xn--p1ai is Russian-Cyrillic "рф".
default:
continue
}
}
rules = append(rules, s)
nt, wildcard := nodeTypeNormal, false
switch {
case strings.HasPrefix(s, "*."):
s, nt = s[2:], nodeTypeParentOnly
wildcard = true
case strings.HasPrefix(s, "!"):
s, nt = s[1:], nodeTypeException
}
labels := strings.Split(s, ".")
for n, i := &root, len(labels)-1; i >= 0; i-- {
label := labels[i]
n = n.child(label)
if i == 0 {
if nt != nodeTypeParentOnly && n.nodeType == nodeTypeParentOnly {
n.nodeType = nt
}
n.icann = n.icann && icann
n.wildcard = n.wildcard || wildcard
}
labelsMap[label] = true
}
}
labelsList = make([]string, 0, len(labelsMap))
for label := range labelsMap {
labelsList = append(labelsList, label)
}
sort.Strings(labelsList)
if err := generate(printReal, &root, "table.go"); err != nil {
return err
}
if err := generate(printTest, &root, "table_test.go"); err != nil {
return err
}
return nil
}
func generate(p func(io.Writer, *node) error, root *node, filename string) error {
buf := new(bytes.Buffer)
if err := p(buf, root); err != nil {
return err
}
b, err := format.Source(buf.Bytes())
if err != nil {
return err
}
return ioutil.WriteFile(filename, b, 0644)
}
func gitCommit() (sha, date string, retErr error) {
res, err := http.Get(gitCommitURL)
if err != nil {
return "", "", err
}
if res.StatusCode != http.StatusOK {
return "", "", fmt.Errorf("bad GET status for %s: %d", gitCommitURL, res.Status)
}
defer res.Body.Close()
b, err := ioutil.ReadAll(res.Body)
if err != nil {
return "", "", err
}
if m := shaRE.FindSubmatch(b); m != nil {
sha = string(m[1])
}
if m := dateRE.FindSubmatch(b); m != nil {
date = string(m[1])
}
if sha == "" || date == "" {
retErr = fmt.Errorf("could not find commit SHA and date in %s", gitCommitURL)
}
return sha, date, retErr
}
func printTest(w io.Writer, n *node) error {
fmt.Fprintf(w, "// generated by go run gen.go; DO NOT EDIT\n\n")
fmt.Fprintf(w, "package publicsuffix\n\nvar rules = [...]string{\n")
for _, rule := range rules {
fmt.Fprintf(w, "%q,\n", rule)
}
fmt.Fprintf(w, "}\n\nvar nodeLabels = [...]string{\n")
if err := n.walk(w, printNodeLabel); err != nil {
return err
}
fmt.Fprintf(w, "}\n")
return nil
}
func printReal(w io.Writer, n *node) error {
const header = `// generated by go run gen.go; DO NOT EDIT
package publicsuffix
const version = %q
const (
nodesBitsChildren = %d
nodesBitsICANN = %d
nodesBitsTextOffset = %d
nodesBitsTextLength = %d
childrenBitsWildcard = %d
childrenBitsNodeType = %d
childrenBitsHi = %d
childrenBitsLo = %d
)
const (
nodeTypeNormal = %d
nodeTypeException = %d
nodeTypeParentOnly = %d
)
// numTLD is the number of top level domains.
const numTLD = %d
`
fmt.Fprintf(w, header, *version,
nodesBitsChildren, nodesBitsICANN, nodesBitsTextOffset, nodesBitsTextLength,
childrenBitsWildcard, childrenBitsNodeType, childrenBitsHi, childrenBitsLo,
nodeTypeNormal, nodeTypeException, nodeTypeParentOnly, len(n.children))
text := combineText(labelsList)
if text == "" {
return fmt.Errorf("internal error: makeText returned no text")
}
for _, label := range labelsList {
offset, length := strings.Index(text, label), len(label)
if offset < 0 {
return fmt.Errorf("internal error: could not find %q in text %q", label, text)
}
maxTextOffset, maxTextLength = max(maxTextOffset, offset), max(maxTextLength, length)
if offset >= 1<<nodesBitsTextOffset {
return fmt.Errorf("text offset %d is too large, or nodeBitsTextOffset is too small", offset)
}
if length >= 1<<nodesBitsTextLength {
return fmt.Errorf("text length %d is too large, or nodeBitsTextLength is too small", length)
}
labelEncoding[label] = uint32(offset)<<nodesBitsTextLength | uint32(length)
}
fmt.Fprintf(w, "// Text is the combined text of all labels.\nconst text = ")
for len(text) > 0 {
n, plus := len(text), ""
if n > 64 {
n, plus = 64, " +"
}
fmt.Fprintf(w, "%q%s\n", text[:n], plus)
text = text[n:]
}
if err := n.walk(w, assignIndexes); err != nil {
return err
}
fmt.Fprintf(w, `
// nodes is the list of nodes. Each node is represented as a uint32, which
// encodes the node's children, wildcard bit and node type (as an index into
// the children array), ICANN bit and text.
//
// If the table was generated with the -comments flag, there is a //-comment
// after each node's data. In it is the nodes-array indexes of the children,
// formatted as (n0x1234-n0x1256), with * denoting the wildcard bit. The
// nodeType is printed as + for normal, ! for exception, and o for parent-only
// nodes that have children but don't match a domain label in their own right.
// An I denotes an ICANN domain.
//
// The layout within the uint32, from MSB to LSB, is:
// [%2d bits] unused
// [%2d bits] children index
// [%2d bits] ICANN bit
// [%2d bits] text index
// [%2d bits] text length
var nodes = [...]uint32{
`,
32-nodesBitsChildren-nodesBitsICANN-nodesBitsTextOffset-nodesBitsTextLength,
nodesBitsChildren, nodesBitsICANN, nodesBitsTextOffset, nodesBitsTextLength)
if err := n.walk(w, printNode); err != nil {
return err
}
fmt.Fprintf(w, `}
// children is the list of nodes' children, the parent's wildcard bit and the
// parent's node type. If a node has no children then their children index
// will be in the range [0, 6), depending on the wildcard bit and node type.
//
// The layout within the uint32, from MSB to LSB, is:
// [%2d bits] unused
// [%2d bits] wildcard bit
// [%2d bits] node type
// [%2d bits] high nodes index (exclusive) of children
// [%2d bits] low nodes index (inclusive) of children
var children=[...]uint32{
`,
32-childrenBitsWildcard-childrenBitsNodeType-childrenBitsHi-childrenBitsLo,
childrenBitsWildcard, childrenBitsNodeType, childrenBitsHi, childrenBitsLo)
for i, c := range childrenEncoding {
s := "---------------"
lo := c & (1<<childrenBitsLo - 1)
hi := (c >> childrenBitsLo) & (1<<childrenBitsHi - 1)
if lo != hi {
s = fmt.Sprintf("n0x%04x-n0x%04x", lo, hi)
}
nodeType := int(c>>(childrenBitsLo+childrenBitsHi)) & (1<<childrenBitsNodeType - 1)
wildcard := c>>(childrenBitsLo+childrenBitsHi+childrenBitsNodeType) != 0
if *comments {
fmt.Fprintf(w, "0x%08x, // c0x%04x (%s)%s %s\n",
c, i, s, wildcardStr(wildcard), nodeTypeStr(nodeType))
} else {
fmt.Fprintf(w, "0x%x,\n", c)
}
}
fmt.Fprintf(w, "}\n\n")
fmt.Fprintf(w, "// max children %d (capacity %d)\n", maxChildren, 1<<nodesBitsChildren-1)
fmt.Fprintf(w, "// max text offset %d (capacity %d)\n", maxTextOffset, 1<<nodesBitsTextOffset-1)
fmt.Fprintf(w, "// max text length %d (capacity %d)\n", maxTextLength, 1<<nodesBitsTextLength-1)
fmt.Fprintf(w, "// max hi %d (capacity %d)\n", maxHi, 1<<childrenBitsHi-1)
fmt.Fprintf(w, "// max lo %d (capacity %d)\n", maxLo, 1<<childrenBitsLo-1)
return nil
}
type node struct {
label string
nodeType int
icann bool
wildcard bool
// nodesIndex and childrenIndex are the index of this node in the nodes
// and the index of its children offset/length in the children arrays.
nodesIndex, childrenIndex int
// firstChild is the index of this node's first child, or zero if this
// node has no children.
firstChild int
// children are the node's children, in strictly increasing node label order.
children []*node
}
func (n *node) walk(w io.Writer, f func(w1 io.Writer, n1 *node) error) error {
if err := f(w, n); err != nil {
return err
}
for _, c := range n.children {
if err := c.walk(w, f); err != nil {
return err
}
}
return nil
}
// child returns the child of n with the given label. The child is created if
// it did not exist beforehand.
func (n *node) child(label string) *node {
for _, c := range n.children {
if c.label == label {
return c
}
}
c := &node{
label: label,
nodeType: nodeTypeParentOnly,
icann: true,
}
n.children = append(n.children, c)
sort.Sort(byLabel(n.children))
return c
}
type byLabel []*node
func (b byLabel) Len() int { return len(b) }
func (b byLabel) Swap(i, j int) { b[i], b[j] = b[j], b[i] }
func (b byLabel) Less(i, j int) bool { return b[i].label < b[j].label }
var nextNodesIndex int
// childrenEncoding are the encoded entries in the generated children array.
// All these pre-defined entries have no children.
var childrenEncoding = []uint32{
0 << (childrenBitsLo + childrenBitsHi), // Without wildcard bit, nodeTypeNormal.
1 << (childrenBitsLo + childrenBitsHi), // Without wildcard bit, nodeTypeException.
2 << (childrenBitsLo + childrenBitsHi), // Without wildcard bit, nodeTypeParentOnly.
4 << (childrenBitsLo + childrenBitsHi), // With wildcard bit, nodeTypeNormal.
5 << (childrenBitsLo + childrenBitsHi), // With wildcard bit, nodeTypeException.
6 << (childrenBitsLo + childrenBitsHi), // With wildcard bit, nodeTypeParentOnly.
}
var firstCallToAssignIndexes = true
func assignIndexes(w io.Writer, n *node) error {
if len(n.children) != 0 {
// Assign nodesIndex.
n.firstChild = nextNodesIndex
for _, c := range n.children {
c.nodesIndex = nextNodesIndex
nextNodesIndex++
}
// The root node's children is implicit.
if firstCallToAssignIndexes {
firstCallToAssignIndexes = false
return nil
}
// Assign childrenIndex.
maxChildren = max(maxChildren, len(childrenEncoding))
if len(childrenEncoding) >= 1<<nodesBitsChildren {
return fmt.Errorf("children table size %d is too large, or nodeBitsChildren is too small", len(childrenEncoding))
}
n.childrenIndex = len(childrenEncoding)
lo := uint32(n.firstChild)
hi := lo + uint32(len(n.children))
maxLo, maxHi = u32max(maxLo, lo), u32max(maxHi, hi)
if lo >= 1<<childrenBitsLo {
return fmt.Errorf("children lo %d is too large, or childrenBitsLo is too small", lo)
}
if hi >= 1<<childrenBitsHi {
return fmt.Errorf("children hi %d is too large, or childrenBitsHi is too small", hi)
}
enc := hi<<childrenBitsLo | lo
enc |= uint32(n.nodeType) << (childrenBitsLo + childrenBitsHi)
if n.wildcard {
enc |= 1 << (childrenBitsLo + childrenBitsHi + childrenBitsNodeType)
}
childrenEncoding = append(childrenEncoding, enc)
} else {
n.childrenIndex = n.nodeType
if n.wildcard {
n.childrenIndex += numNodeType
}
}
return nil
}
func printNode(w io.Writer, n *node) error {
for _, c := range n.children {
s := "---------------"
if len(c.children) != 0 {
s = fmt.Sprintf("n0x%04x-n0x%04x", c.firstChild, c.firstChild+len(c.children))
}
encoding := labelEncoding[c.label]
if c.icann {
encoding |= 1 << (nodesBitsTextLength + nodesBitsTextOffset)
}
encoding |= uint32(c.childrenIndex) << (nodesBitsTextLength + nodesBitsTextOffset + nodesBitsICANN)
if *comments {
fmt.Fprintf(w, "0x%08x, // n0x%04x c0x%04x (%s)%s %s %s %s\n",
encoding, c.nodesIndex, c.childrenIndex, s, wildcardStr(c.wildcard),
nodeTypeStr(c.nodeType), icannStr(c.icann), c.label,
)
} else {
fmt.Fprintf(w, "0x%x,\n", encoding)
}
}
return nil
}
func printNodeLabel(w io.Writer, n *node) error {
for _, c := range n.children {
fmt.Fprintf(w, "%q,\n", c.label)
}
return nil
}
func icannStr(icann bool) string {
if icann {
return "I"
}
return " "
}
func wildcardStr(wildcard bool) string {
if wildcard {
return "*"
}
return " "
}
// combineText combines all the strings in labelsList to form one giant string.
// Overlapping strings will be merged: "arpa" and "parliament" could yield
// "arparliament".
func combineText(labelsList []string) string {
beforeLength := 0
for _, s := range labelsList {
beforeLength += len(s)
}
text := crush(removeSubstrings(labelsList))
if *v {
fmt.Fprintf(os.Stderr, "crushed %d bytes to become %d bytes\n", beforeLength, len(text))
}
return text
}
type byLength []string
func (s byLength) Len() int { return len(s) }
func (s byLength) Swap(i, j int) { s[i], s[j] = s[j], s[i] }
func (s byLength) Less(i, j int) bool { return len(s[i]) < len(s[j]) }
// removeSubstrings returns a copy of its input with any strings removed
// that are substrings of other provided strings.
func removeSubstrings(input []string) []string {
// Make a copy of input.
ss := append(make([]string, 0, len(input)), input...)
sort.Sort(byLength(ss))
for i, shortString := range ss {
// For each string, only consider strings higher than it in sort order, i.e.
// of equal length or greater.
for _, longString := range ss[i+1:] {
if strings.Contains(longString, shortString) {
ss[i] = ""
break
}
}
}
// Remove the empty strings.
sort.Strings(ss)
for len(ss) > 0 && ss[0] == "" {
ss = ss[1:]
}
return ss
}
// crush combines a list of strings, taking advantage of overlaps. It returns a
// single string that contains each input string as a substring.
func crush(ss []string) string {
maxLabelLen := 0
for _, s := range ss {
if maxLabelLen < len(s) {
maxLabelLen = len(s)
}
}
for prefixLen := maxLabelLen; prefixLen > 0; prefixLen-- {
prefixes := makePrefixMap(ss, prefixLen)
for i, s := range ss {
if len(s) <= prefixLen {
continue
}
mergeLabel(ss, i, prefixLen, prefixes)
}
}
return strings.Join(ss, "")
}
// mergeLabel merges the label at ss[i] with the first available matching label
// in prefixMap, where the last "prefixLen" characters in ss[i] match the first
// "prefixLen" characters in the matching label.
// It will merge ss[i] repeatedly until no more matches are available.
// All matching labels merged into ss[i] are replaced by "".
func mergeLabel(ss []string, i, prefixLen int, prefixes prefixMap) {
s := ss[i]
suffix := s[len(s)-prefixLen:]
for _, j := range prefixes[suffix] {
// Empty strings mean "already used." Also avoid merging with self.
if ss[j] == "" || i == j {
continue
}
if *v {
fmt.Fprintf(os.Stderr, "%d-length overlap at (%4d,%4d): %q and %q share %q\n",
prefixLen, i, j, ss[i], ss[j], suffix)
}
ss[i] += ss[j][prefixLen:]
ss[j] = ""
// ss[i] has a new suffix, so merge again if possible.
// Note: we only have to merge again at the same prefix length. Shorter
// prefix lengths will be handled in the next iteration of crush's for loop.
// Can there be matches for longer prefix lengths, introduced by the merge?
// I believe that any such matches would by necessity have been eliminated
// during substring removal or merged at a higher prefix length. For
// instance, in crush("abc", "cde", "bcdef"), combining "abc" and "cde"
// would yield "abcde", which could be merged with "bcdef." However, in
// practice "cde" would already have been elimintated by removeSubstrings.
mergeLabel(ss, i, prefixLen, prefixes)
return
}
}
// prefixMap maps from a prefix to a list of strings containing that prefix. The
// list of strings is represented as indexes into a slice of strings stored
// elsewhere.
type prefixMap map[string][]int
// makePrefixMap constructs a prefixMap from a slice of strings.
func makePrefixMap(ss []string, prefixLen int) prefixMap {
prefixes := make(prefixMap)
for i, s := range ss {
// We use < rather than <= because if a label matches on a prefix equal to
// its full length, that's actually a substring match handled by
// removeSubstrings.
if prefixLen < len(s) {
prefix := s[:prefixLen]
prefixes[prefix] = append(prefixes[prefix], i)
}
}
return prefixes
}

File diff suppressed because it is too large Load Diff

View File

@ -33,32 +33,32 @@ var (
ISO8859_8I encoding.Encoding = &iso8859_8I
iso8859_6E = internal.Encoding{
Encoding: ISO8859_6,
Name: "ISO-8859-6E",
MIB: identifier.ISO88596E,
ISO8859_6,
"ISO-8859-6E",
identifier.ISO88596E,
}
iso8859_6I = internal.Encoding{
Encoding: ISO8859_6,
Name: "ISO-8859-6I",
MIB: identifier.ISO88596I,
ISO8859_6,
"ISO-8859-6I",
identifier.ISO88596I,
}
iso8859_8E = internal.Encoding{
Encoding: ISO8859_8,
Name: "ISO-8859-8E",
MIB: identifier.ISO88598E,
ISO8859_8,
"ISO-8859-8E",
identifier.ISO88598E,
}
iso8859_8I = internal.Encoding{
Encoding: ISO8859_8,
Name: "ISO-8859-8I",
MIB: identifier.ISO88598I,
ISO8859_8,
"ISO-8859-8I",
identifier.ISO88598I,
}
)
// All is a list of all defined encodings in this package.
var All []encoding.Encoding = listAll
var All = listAll
// TODO: implement these encodings, in order of importance.
// ASCII, ISO8859_1: Rather common. Close to Windows 1252.
@ -70,8 +70,8 @@ type utf8Enc struct {
data [3]byte
}
// Charmap is an 8-bit character set encoding.
type Charmap struct {
// charmap describes an 8-bit character set encoding.
type charmap struct {
// name is the encoding's name.
name string
// mib is the encoding type of this encoder.
@ -79,7 +79,7 @@ type Charmap struct {
// asciiSuperset states whether the encoding is a superset of ASCII.
asciiSuperset bool
// low is the lower bound of the encoded byte for a non-ASCII rune. If
// Charmap.asciiSuperset is true then this will be 0x80, otherwise 0x00.
// charmap.asciiSuperset is true then this will be 0x80, otherwise 0x00.
low uint8
// replacement is the encoded replacement character.
replacement byte
@ -91,30 +91,26 @@ type Charmap struct {
encode [256]uint32
}
// NewDecoder implements the encoding.Encoding interface.
func (m *Charmap) NewDecoder() *encoding.Decoder {
func (m *charmap) NewDecoder() *encoding.Decoder {
return &encoding.Decoder{Transformer: charmapDecoder{charmap: m}}
}
// NewEncoder implements the encoding.Encoding interface.
func (m *Charmap) NewEncoder() *encoding.Encoder {
func (m *charmap) NewEncoder() *encoding.Encoder {
return &encoding.Encoder{Transformer: charmapEncoder{charmap: m}}
}
// String returns the Charmap's name.
func (m *Charmap) String() string {
func (m *charmap) String() string {
return m.name
}
// ID implements an internal interface.
func (m *Charmap) ID() (mib identifier.MIB, other string) {
func (m *charmap) ID() (mib identifier.MIB, other string) {
return m.mib, ""
}
// charmapDecoder implements transform.Transformer by decoding to UTF-8.
type charmapDecoder struct {
transform.NopResetter
charmap *Charmap
charmap *charmap
}
func (m charmapDecoder) Transform(dst, src []byte, atEOF bool) (nDst, nSrc int, err error) {
@ -146,22 +142,10 @@ func (m charmapDecoder) Transform(dst, src []byte, atEOF bool) (nDst, nSrc int,
return nDst, nSrc, err
}
// DecodeByte returns the Charmap's rune decoding of the byte b.
func (m *Charmap) DecodeByte(b byte) rune {
switch x := &m.decode[b]; x.len {
case 1:
return rune(x.data[0])
case 2:
return rune(x.data[0]&0x1f)<<6 | rune(x.data[1]&0x3f)
default:
return rune(x.data[0]&0x0f)<<12 | rune(x.data[1]&0x3f)<<6 | rune(x.data[2]&0x3f)
}
}
// charmapEncoder implements transform.Transformer by encoding from UTF-8.
type charmapEncoder struct {
transform.NopResetter
charmap *Charmap
charmap *charmap
}
func (m charmapEncoder) Transform(dst, src []byte, atEOF bool) (nDst, nSrc int, err error) {
@ -223,27 +207,3 @@ loop:
}
return nDst, nSrc, err
}
// EncodeRune returns the Charmap's byte encoding of the rune r. ok is whether
// r is in the Charmap's repertoire. If not, b is set to the Charmap's
// replacement byte. This is often the ASCII substitute character '\x1a'.
func (m *Charmap) EncodeRune(r rune) (b byte, ok bool) {
if r < utf8.RuneSelf && m.asciiSuperset {
return byte(r), true
}
for low, high := int(m.low), 0x100; ; {
if low >= high {
return m.replacement, false
}
mid := (low + high) / 2
got := m.encode[mid]
gotRune := rune(got & (1<<24 - 1))
if gotRune < r {
low = mid + 1
} else if gotRune > r {
high = mid
} else {
return byte(got >> 24), true
}
}
}

View File

@ -1,556 +0,0 @@
// Copyright 2013 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
// +build ignore
package main
import (
"bufio"
"fmt"
"log"
"net/http"
"sort"
"strings"
"unicode/utf8"
"golang.org/x/text/encoding"
"golang.org/x/text/internal/gen"
)
const ascii = "\x00\x01\x02\x03\x04\x05\x06\x07\x08\x09\x0a\x0b\x0c\x0d\x0e\x0f" +
"\x10\x11\x12\x13\x14\x15\x16\x17\x18\x19\x1a\x1b\x1c\x1d\x1e\x1f" +
` !"#$%&'()*+,-./0123456789:;<=>?` +
`@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\]^_` +
"`abcdefghijklmnopqrstuvwxyz{|}~\u007f"
var encodings = []struct {
name string
mib string
comment string
varName string
replacement byte
mapping string
}{
{
"IBM Code Page 037",
"IBM037",
"",
"CodePage037",
0x3f,
"http://source.icu-project.org/repos/icu/data/trunk/charset/data/ucm/glibc-IBM037-2.1.2.ucm",
},
{
"IBM Code Page 437",
"PC8CodePage437",
"",
"CodePage437",
encoding.ASCIISub,
"http://source.icu-project.org/repos/icu/data/trunk/charset/data/ucm/glibc-IBM437-2.1.2.ucm",
},
{
"IBM Code Page 850",
"PC850Multilingual",
"",
"CodePage850",
encoding.ASCIISub,
"http://source.icu-project.org/repos/icu/data/trunk/charset/data/ucm/glibc-IBM850-2.1.2.ucm",
},
{
"IBM Code Page 852",
"PCp852",
"",
"CodePage852",
encoding.ASCIISub,
"http://source.icu-project.org/repos/icu/data/trunk/charset/data/ucm/glibc-IBM852-2.1.2.ucm",
},
{
"IBM Code Page 855",
"IBM855",
"",
"CodePage855",
encoding.ASCIISub,
"http://source.icu-project.org/repos/icu/data/trunk/charset/data/ucm/glibc-IBM855-2.1.2.ucm",
},
{
"Windows Code Page 858", // PC latin1 with Euro
"IBM00858",
"",
"CodePage858",
encoding.ASCIISub,
"http://source.icu-project.org/repos/icu/data/trunk/charset/data/ucm/windows-858-2000.ucm",
},
{
"IBM Code Page 860",
"IBM860",
"",
"CodePage860",
encoding.ASCIISub,
"http://source.icu-project.org/repos/icu/data/trunk/charset/data/ucm/glibc-IBM860-2.1.2.ucm",
},
{
"IBM Code Page 862",
"PC862LatinHebrew",
"",
"CodePage862",
encoding.ASCIISub,
"http://source.icu-project.org/repos/icu/data/trunk/charset/data/ucm/glibc-IBM862-2.1.2.ucm",
},
{
"IBM Code Page 863",
"IBM863",
"",
"CodePage863",
encoding.ASCIISub,
"http://source.icu-project.org/repos/icu/data/trunk/charset/data/ucm/glibc-IBM863-2.1.2.ucm",
},
{
"IBM Code Page 865",
"IBM865",
"",
"CodePage865",
encoding.ASCIISub,
"http://source.icu-project.org/repos/icu/data/trunk/charset/data/ucm/glibc-IBM865-2.1.2.ucm",
},
{
"IBM Code Page 866",
"IBM866",
"",
"CodePage866",
encoding.ASCIISub,
"http://encoding.spec.whatwg.org/index-ibm866.txt",
},
{
"IBM Code Page 1047",
"IBM1047",
"",
"CodePage1047",
0x3f,
"http://source.icu-project.org/repos/icu/data/trunk/charset/data/ucm/glibc-IBM1047-2.1.2.ucm",
},
{
"IBM Code Page 1140",
"IBM01140",
"",
"CodePage1140",
0x3f,
"http://source.icu-project.org/repos/icu/data/trunk/charset/data/ucm/ibm-1140_P100-1997.ucm",
},
{
"ISO 8859-1",
"ISOLatin1",
"",
"ISO8859_1",
encoding.ASCIISub,
"http://source.icu-project.org/repos/icu/data/trunk/charset/data/ucm/iso-8859_1-1998.ucm",
},
{
"ISO 8859-2",
"ISOLatin2",
"",
"ISO8859_2",
encoding.ASCIISub,
"http://encoding.spec.whatwg.org/index-iso-8859-2.txt",
},
{
"ISO 8859-3",
"ISOLatin3",
"",
"ISO8859_3",
encoding.ASCIISub,
"http://encoding.spec.whatwg.org/index-iso-8859-3.txt",
},
{
"ISO 8859-4",
"ISOLatin4",
"",
"ISO8859_4",
encoding.ASCIISub,
"http://encoding.spec.whatwg.org/index-iso-8859-4.txt",
},
{
"ISO 8859-5",
"ISOLatinCyrillic",
"",
"ISO8859_5",
encoding.ASCIISub,
"http://encoding.spec.whatwg.org/index-iso-8859-5.txt",
},
{
"ISO 8859-6",
"ISOLatinArabic",
"",
"ISO8859_6,ISO8859_6E,ISO8859_6I",
encoding.ASCIISub,
"http://encoding.spec.whatwg.org/index-iso-8859-6.txt",
},
{
"ISO 8859-7",
"ISOLatinGreek",
"",
"ISO8859_7",
encoding.ASCIISub,
"http://encoding.spec.whatwg.org/index-iso-8859-7.txt",
},
{
"ISO 8859-8",
"ISOLatinHebrew",
"",
"ISO8859_8,ISO8859_8E,ISO8859_8I",
encoding.ASCIISub,
"http://encoding.spec.whatwg.org/index-iso-8859-8.txt",
},
{
"ISO 8859-9",
"ISOLatin5",
"",
"ISO8859_9",
encoding.ASCIISub,
"http://source.icu-project.org/repos/icu/data/trunk/charset/data/ucm/iso-8859_9-1999.ucm",
},
{
"ISO 8859-10",
"ISOLatin6",
"",
"ISO8859_10",
encoding.ASCIISub,
"http://encoding.spec.whatwg.org/index-iso-8859-10.txt",
},
{
"ISO 8859-13",
"ISO885913",
"",
"ISO8859_13",
encoding.ASCIISub,
"http://encoding.spec.whatwg.org/index-iso-8859-13.txt",
},
{
"ISO 8859-14",
"ISO885914",
"",
"ISO8859_14",
encoding.ASCIISub,
"http://encoding.spec.whatwg.org/index-iso-8859-14.txt",
},
{
"ISO 8859-15",
"ISO885915",
"",
"ISO8859_15",
encoding.ASCIISub,
"http://encoding.spec.whatwg.org/index-iso-8859-15.txt",
},
{
"ISO 8859-16",
"ISO885916",
"",
"ISO8859_16",
encoding.ASCIISub,
"http://encoding.spec.whatwg.org/index-iso-8859-16.txt",
},
{
"KOI8-R",
"KOI8R",
"",
"KOI8R",
encoding.ASCIISub,
"http://encoding.spec.whatwg.org/index-koi8-r.txt",
},
{
"KOI8-U",
"KOI8U",
"",
"KOI8U",
encoding.ASCIISub,
"http://encoding.spec.whatwg.org/index-koi8-u.txt",
},
{
"Macintosh",
"Macintosh",
"",
"Macintosh",
encoding.ASCIISub,
"http://encoding.spec.whatwg.org/index-macintosh.txt",
},
{
"Macintosh Cyrillic",
"MacintoshCyrillic",
"",
"MacintoshCyrillic",
encoding.ASCIISub,
"http://encoding.spec.whatwg.org/index-x-mac-cyrillic.txt",
},
{
"Windows 874",
"Windows874",
"",
"Windows874",
encoding.ASCIISub,
"http://encoding.spec.whatwg.org/index-windows-874.txt",
},
{
"Windows 1250",
"Windows1250",
"",
"Windows1250",
encoding.ASCIISub,
"http://encoding.spec.whatwg.org/index-windows-1250.txt",
},
{
"Windows 1251",
"Windows1251",
"",
"Windows1251",
encoding.ASCIISub,
"http://encoding.spec.whatwg.org/index-windows-1251.txt",
},
{
"Windows 1252",
"Windows1252",
"",
"Windows1252",
encoding.ASCIISub,
"http://encoding.spec.whatwg.org/index-windows-1252.txt",
},
{
"Windows 1253",
"Windows1253",
"",
"Windows1253",
encoding.ASCIISub,
"http://encoding.spec.whatwg.org/index-windows-1253.txt",
},
{
"Windows 1254",
"Windows1254",
"",
"Windows1254",
encoding.ASCIISub,
"http://encoding.spec.whatwg.org/index-windows-1254.txt",
},
{
"Windows 1255",
"Windows1255",
"",
"Windows1255",
encoding.ASCIISub,
"http://encoding.spec.whatwg.org/index-windows-1255.txt",
},
{
"Windows 1256",
"Windows1256",
"",
"Windows1256",
encoding.ASCIISub,
"http://encoding.spec.whatwg.org/index-windows-1256.txt",
},
{
"Windows 1257",
"Windows1257",
"",
"Windows1257",
encoding.ASCIISub,
"http://encoding.spec.whatwg.org/index-windows-1257.txt",
},
{
"Windows 1258",
"Windows1258",
"",
"Windows1258",
encoding.ASCIISub,
"http://encoding.spec.whatwg.org/index-windows-1258.txt",
},
{
"X-User-Defined",
"XUserDefined",
"It is defined at http://encoding.spec.whatwg.org/#x-user-defined",
"XUserDefined",
encoding.ASCIISub,
ascii +
"\uf780\uf781\uf782\uf783\uf784\uf785\uf786\uf787" +
"\uf788\uf789\uf78a\uf78b\uf78c\uf78d\uf78e\uf78f" +
"\uf790\uf791\uf792\uf793\uf794\uf795\uf796\uf797" +
"\uf798\uf799\uf79a\uf79b\uf79c\uf79d\uf79e\uf79f" +
"\uf7a0\uf7a1\uf7a2\uf7a3\uf7a4\uf7a5\uf7a6\uf7a7" +
"\uf7a8\uf7a9\uf7aa\uf7ab\uf7ac\uf7ad\uf7ae\uf7af" +
"\uf7b0\uf7b1\uf7b2\uf7b3\uf7b4\uf7b5\uf7b6\uf7b7" +
"\uf7b8\uf7b9\uf7ba\uf7bb\uf7bc\uf7bd\uf7be\uf7bf" +
"\uf7c0\uf7c1\uf7c2\uf7c3\uf7c4\uf7c5\uf7c6\uf7c7" +
"\uf7c8\uf7c9\uf7ca\uf7cb\uf7cc\uf7cd\uf7ce\uf7cf" +
"\uf7d0\uf7d1\uf7d2\uf7d3\uf7d4\uf7d5\uf7d6\uf7d7" +
"\uf7d8\uf7d9\uf7da\uf7db\uf7dc\uf7dd\uf7de\uf7df" +
"\uf7e0\uf7e1\uf7e2\uf7e3\uf7e4\uf7e5\uf7e6\uf7e7" +
"\uf7e8\uf7e9\uf7ea\uf7eb\uf7ec\uf7ed\uf7ee\uf7ef" +
"\uf7f0\uf7f1\uf7f2\uf7f3\uf7f4\uf7f5\uf7f6\uf7f7" +
"\uf7f8\uf7f9\uf7fa\uf7fb\uf7fc\uf7fd\uf7fe\uf7ff",
},
}
func getWHATWG(url string) string {
res, err := http.Get(url)
if err != nil {
log.Fatalf("%q: Get: %v", url, err)
}
defer res.Body.Close()
mapping := make([]rune, 128)
for i := range mapping {
mapping[i] = '\ufffd'
}
scanner := bufio.NewScanner(res.Body)
for scanner.Scan() {
s := strings.TrimSpace(scanner.Text())
if s == "" || s[0] == '#' {
continue
}
x, y := 0, 0
if _, err := fmt.Sscanf(s, "%d\t0x%x", &x, &y); err != nil {
log.Fatalf("could not parse %q", s)
}
if x < 0 || 128 <= x {
log.Fatalf("code %d is out of range", x)
}
if 0x80 <= y && y < 0xa0 {
// We diverge from the WHATWG spec by mapping control characters
// in the range [0x80, 0xa0) to U+FFFD.
continue
}
mapping[x] = rune(y)
}
return ascii + string(mapping)
}
func getUCM(url string) string {
res, err := http.Get(url)
if err != nil {
log.Fatalf("%q: Get: %v", url, err)
}
defer res.Body.Close()
mapping := make([]rune, 256)
for i := range mapping {
mapping[i] = '\ufffd'
}
charsFound := 0
scanner := bufio.NewScanner(res.Body)
for scanner.Scan() {
s := strings.TrimSpace(scanner.Text())
if s == "" || s[0] == '#' {
continue
}
var c byte
var r rune
if _, err := fmt.Sscanf(s, `<U%x> \x%x |0`, &r, &c); err != nil {
continue
}
mapping[c] = r
charsFound++
}
if charsFound < 200 {
log.Fatalf("%q: only %d characters found (wrong page format?)", url, charsFound)
}
return string(mapping)
}
func main() {
mibs := map[string]bool{}
all := []string{}
w := gen.NewCodeWriter()
defer w.WriteGoFile("tables.go", "charmap")
printf := func(s string, a ...interface{}) { fmt.Fprintf(w, s, a...) }
printf("import (\n")
printf("\t\"golang.org/x/text/encoding\"\n")
printf("\t\"golang.org/x/text/encoding/internal/identifier\"\n")
printf(")\n\n")
for _, e := range encodings {
varNames := strings.Split(e.varName, ",")
all = append(all, varNames...)
varName := varNames[0]
switch {
case strings.HasPrefix(e.mapping, "http://encoding.spec.whatwg.org/"):
e.mapping = getWHATWG(e.mapping)
case strings.HasPrefix(e.mapping, "http://source.icu-project.org/repos/icu/data/trunk/charset/data/ucm/"):
e.mapping = getUCM(e.mapping)
}
asciiSuperset, low := strings.HasPrefix(e.mapping, ascii), 0x00
if asciiSuperset {
low = 0x80
}
lvn := 1
if strings.HasPrefix(varName, "ISO") || strings.HasPrefix(varName, "KOI") {
lvn = 3
}
lowerVarName := strings.ToLower(varName[:lvn]) + varName[lvn:]
printf("// %s is the %s encoding.\n", varName, e.name)
if e.comment != "" {
printf("//\n// %s\n", e.comment)
}
printf("var %s *Charmap = &%s\n\nvar %s = Charmap{\nname: %q,\n",
varName, lowerVarName, lowerVarName, e.name)
if mibs[e.mib] {
log.Fatalf("MIB type %q declared multiple times.", e.mib)
}
printf("mib: identifier.%s,\n", e.mib)
printf("asciiSuperset: %t,\n", asciiSuperset)
printf("low: 0x%02x,\n", low)
printf("replacement: 0x%02x,\n", e.replacement)
printf("decode: [256]utf8Enc{\n")
i, backMapping := 0, map[rune]byte{}
for _, c := range e.mapping {
if _, ok := backMapping[c]; !ok && c != utf8.RuneError {
backMapping[c] = byte(i)
}
var buf [8]byte
n := utf8.EncodeRune(buf[:], c)
if n > 3 {
panic(fmt.Sprintf("rune %q (%U) is too long", c, c))
}
printf("{%d,[3]byte{0x%02x,0x%02x,0x%02x}},", n, buf[0], buf[1], buf[2])
if i%2 == 1 {
printf("\n")
}
i++
}
printf("},\n")
printf("encode: [256]uint32{\n")
encode := make([]uint32, 0, 256)
for c, i := range backMapping {
encode = append(encode, uint32(i)<<24|uint32(c))
}
sort.Sort(byRune(encode))
for len(encode) < cap(encode) {
encode = append(encode, encode[len(encode)-1])
}
for i, enc := range encode {
printf("0x%08x,", enc)
if i%8 == 7 {
printf("\n")
}
}
printf("},\n}\n")
// Add an estimate of the size of a single Charmap{} struct value, which
// includes two 256 elem arrays of 4 bytes and some extra fields, which
// align to 3 uint64s on 64-bit architectures.
w.Size += 2*4*256 + 3*8
}
// TODO: add proper line breaking.
printf("var listAll = []encoding.Encoding{\n%s,\n}\n\n", strings.Join(all, ",\n"))
}
type byRune []uint32
func (b byRune) Len() int { return len(b) }
func (b byRune) Less(i, j int) bool { return b[i]&0xffffff < b[j]&0xffffff }
func (b byRune) Swap(i, j int) { b[i], b[j] = b[j], b[i] }

View File

@ -1,4 +1,4 @@
// Code generated by running "go generate" in golang.org/x/text. DO NOT EDIT.
// This file was generated by go generate; DO NOT EDIT
package charmap
@ -7,185 +7,10 @@ import (
"golang.org/x/text/encoding/internal/identifier"
)
// CodePage037 is the IBM Code Page 037 encoding.
var CodePage037 *Charmap = &codePage037
var codePage037 = Charmap{
name: "IBM Code Page 037",
mib: identifier.IBM037,
asciiSuperset: false,
low: 0x00,
replacement: 0x3f,
decode: [256]utf8Enc{
{1, [3]byte{0x00, 0x00, 0x00}}, {1, [3]byte{0x01, 0x00, 0x00}},
{1, [3]byte{0x02, 0x00, 0x00}}, {1, [3]byte{0x03, 0x00, 0x00}},
{2, [3]byte{0xc2, 0x9c, 0x00}}, {1, [3]byte{0x09, 0x00, 0x00}},
{2, [3]byte{0xc2, 0x86, 0x00}}, {1, [3]byte{0x7f, 0x00, 0x00}},
{2, [3]byte{0xc2, 0x97, 0x00}}, {2, [3]byte{0xc2, 0x8d, 0x00}},
{2, [3]byte{0xc2, 0x8e, 0x00}}, {1, [3]byte{0x0b, 0x00, 0x00}},
{1, [3]byte{0x0c, 0x00, 0x00}}, {1, [3]byte{0x0d, 0x00, 0x00}},
{1, [3]byte{0x0e, 0x00, 0x00}}, {1, [3]byte{0x0f, 0x00, 0x00}},
{1, [3]byte{0x10, 0x00, 0x00}}, {1, [3]byte{0x11, 0x00, 0x00}},
{1, [3]byte{0x12, 0x00, 0x00}}, {1, [3]byte{0x13, 0x00, 0x00}},
{2, [3]byte{0xc2, 0x9d, 0x00}}, {2, [3]byte{0xc2, 0x85, 0x00}},
{1, [3]byte{0x08, 0x00, 0x00}}, {2, [3]byte{0xc2, 0x87, 0x00}},
{1, [3]byte{0x18, 0x00, 0x00}}, {1, [3]byte{0x19, 0x00, 0x00}},
{2, [3]byte{0xc2, 0x92, 0x00}}, {2, [3]byte{0xc2, 0x8f, 0x00}},
{1, [3]byte{0x1c, 0x00, 0x00}}, {1, [3]byte{0x1d, 0x00, 0x00}},
{1, [3]byte{0x1e, 0x00, 0x00}}, {1, [3]byte{0x1f, 0x00, 0x00}},
{2, [3]byte{0xc2, 0x80, 0x00}}, {2, [3]byte{0xc2, 0x81, 0x00}},
{2, [3]byte{0xc2, 0x82, 0x00}}, {2, [3]byte{0xc2, 0x83, 0x00}},
{2, [3]byte{0xc2, 0x84, 0x00}}, {1, [3]byte{0x0a, 0x00, 0x00}},
{1, [3]byte{0x17, 0x00, 0x00}}, {1, [3]byte{0x1b, 0x00, 0x00}},
{2, [3]byte{0xc2, 0x88, 0x00}}, {2, [3]byte{0xc2, 0x89, 0x00}},
{2, [3]byte{0xc2, 0x8a, 0x00}}, {2, [3]byte{0xc2, 0x8b, 0x00}},
{2, [3]byte{0xc2, 0x8c, 0x00}}, {1, [3]byte{0x05, 0x00, 0x00}},
{1, [3]byte{0x06, 0x00, 0x00}}, {1, [3]byte{0x07, 0x00, 0x00}},
{2, [3]byte{0xc2, 0x90, 0x00}}, {2, [3]byte{0xc2, 0x91, 0x00}},
{1, [3]byte{0x16, 0x00, 0x00}}, {2, [3]byte{0xc2, 0x93, 0x00}},
{2, [3]byte{0xc2, 0x94, 0x00}}, {2, [3]byte{0xc2, 0x95, 0x00}},
{2, [3]byte{0xc2, 0x96, 0x00}}, {1, [3]byte{0x04, 0x00, 0x00}},
{2, [3]byte{0xc2, 0x98, 0x00}}, {2, [3]byte{0xc2, 0x99, 0x00}},
{2, [3]byte{0xc2, 0x9a, 0x00}}, {2, [3]byte{0xc2, 0x9b, 0x00}},
{1, [3]byte{0x14, 0x00, 0x00}}, {1, [3]byte{0x15, 0x00, 0x00}},
{2, [3]byte{0xc2, 0x9e, 0x00}}, {1, [3]byte{0x1a, 0x00, 0x00}},
{1, [3]byte{0x20, 0x00, 0x00}}, {2, [3]byte{0xc2, 0xa0, 0x00}},
{2, [3]byte{0xc3, 0xa2, 0x00}}, {2, [3]byte{0xc3, 0xa4, 0x00}},
{2, [3]byte{0xc3, 0xa0, 0x00}}, {2, [3]byte{0xc3, 0xa1, 0x00}},
{2, [3]byte{0xc3, 0xa3, 0x00}}, {2, [3]byte{0xc3, 0xa5, 0x00}},
{2, [3]byte{0xc3, 0xa7, 0x00}}, {2, [3]byte{0xc3, 0xb1, 0x00}},
{2, [3]byte{0xc2, 0xa2, 0x00}}, {1, [3]byte{0x2e, 0x00, 0x00}},
{1, [3]byte{0x3c, 0x00, 0x00}}, {1, [3]byte{0x28, 0x00, 0x00}},
{1, [3]byte{0x2b, 0x00, 0x00}}, {1, [3]byte{0x7c, 0x00, 0x00}},
{1, [3]byte{0x26, 0x00, 0x00}}, {2, [3]byte{0xc3, 0xa9, 0x00}},
{2, [3]byte{0xc3, 0xaa, 0x00}}, {2, [3]byte{0xc3, 0xab, 0x00}},
{2, [3]byte{0xc3, 0xa8, 0x00}}, {2, [3]byte{0xc3, 0xad, 0x00}},
{2, [3]byte{0xc3, 0xae, 0x00}}, {2, [3]byte{0xc3, 0xaf, 0x00}},
{2, [3]byte{0xc3, 0xac, 0x00}}, {2, [3]byte{0xc3, 0x9f, 0x00}},
{1, [3]byte{0x21, 0x00, 0x00}}, {1, [3]byte{0x24, 0x00, 0x00}},
{1, [3]byte{0x2a, 0x00, 0x00}}, {1, [3]byte{0x29, 0x00, 0x00}},
{1, [3]byte{0x3b, 0x00, 0x00}}, {2, [3]byte{0xc2, 0xac, 0x00}},
{1, [3]byte{0x2d, 0x00, 0x00}}, {1, [3]byte{0x2f, 0x00, 0x00}},
{2, [3]byte{0xc3, 0x82, 0x00}}, {2, [3]byte{0xc3, 0x84, 0x00}},
{2, [3]byte{0xc3, 0x80, 0x00}}, {2, [3]byte{0xc3, 0x81, 0x00}},
{2, [3]byte{0xc3, 0x83, 0x00}}, {2, [3]byte{0xc3, 0x85, 0x00}},
{2, [3]byte{0xc3, 0x87, 0x00}}, {2, [3]byte{0xc3, 0x91, 0x00}},
{2, [3]byte{0xc2, 0xa6, 0x00}}, {1, [3]byte{0x2c, 0x00, 0x00}},
{1, [3]byte{0x25, 0x00, 0x00}}, {1, [3]byte{0x5f, 0x00, 0x00}},
{1, [3]byte{0x3e, 0x00, 0x00}}, {1, [3]byte{0x3f, 0x00, 0x00}},
{2, [3]byte{0xc3, 0xb8, 0x00}}, {2, [3]byte{0xc3, 0x89, 0x00}},
{2, [3]byte{0xc3, 0x8a, 0x00}}, {2, [3]byte{0xc3, 0x8b, 0x00}},
{2, [3]byte{0xc3, 0x88, 0x00}}, {2, [3]byte{0xc3, 0x8d, 0x00}},
{2, [3]byte{0xc3, 0x8e, 0x00}}, {2, [3]byte{0xc3, 0x8f, 0x00}},
{2, [3]byte{0xc3, 0x8c, 0x00}}, {1, [3]byte{0x60, 0x00, 0x00}},
{1, [3]byte{0x3a, 0x00, 0x00}}, {1, [3]byte{0x23, 0x00, 0x00}},
{1, [3]byte{0x40, 0x00, 0x00}}, {1, [3]byte{0x27, 0x00, 0x00}},
{1, [3]byte{0x3d, 0x00, 0x00}}, {1, [3]byte{0x22, 0x00, 0x00}},
{2, [3]byte{0xc3, 0x98, 0x00}}, {1, [3]byte{0x61, 0x00, 0x00}},
{1, [3]byte{0x62, 0x00, 0x00}}, {1, [3]byte{0x63, 0x00, 0x00}},
{1, [3]byte{0x64, 0x00, 0x00}}, {1, [3]byte{0x65, 0x00, 0x00}},
{1, [3]byte{0x66, 0x00, 0x00}}, {1, [3]byte{0x67, 0x00, 0x00}},
{1, [3]byte{0x68, 0x00, 0x00}}, {1, [3]byte{0x69, 0x00, 0x00}},
{2, [3]byte{0xc2, 0xab, 0x00}}, {2, [3]byte{0xc2, 0xbb, 0x00}},
{2, [3]byte{0xc3, 0xb0, 0x00}}, {2, [3]byte{0xc3, 0xbd, 0x00}},
{2, [3]byte{0xc3, 0xbe, 0x00}}, {2, [3]byte{0xc2, 0xb1, 0x00}},
{2, [3]byte{0xc2, 0xb0, 0x00}}, {1, [3]byte{0x6a, 0x00, 0x00}},
{1, [3]byte{0x6b, 0x00, 0x00}}, {1, [3]byte{0x6c, 0x00, 0x00}},
{1, [3]byte{0x6d, 0x00, 0x00}}, {1, [3]byte{0x6e, 0x00, 0x00}},
{1, [3]byte{0x6f, 0x00, 0x00}}, {1, [3]byte{0x70, 0x00, 0x00}},
{1, [3]byte{0x71, 0x00, 0x00}}, {1, [3]byte{0x72, 0x00, 0x00}},
{2, [3]byte{0xc2, 0xaa, 0x00}}, {2, [3]byte{0xc2, 0xba, 0x00}},
{2, [3]byte{0xc3, 0xa6, 0x00}}, {2, [3]byte{0xc2, 0xb8, 0x00}},
{2, [3]byte{0xc3, 0x86, 0x00}}, {2, [3]byte{0xc2, 0xa4, 0x00}},
{2, [3]byte{0xc2, 0xb5, 0x00}}, {1, [3]byte{0x7e, 0x00, 0x00}},
{1, [3]byte{0x73, 0x00, 0x00}}, {1, [3]byte{0x74, 0x00, 0x00}},
{1, [3]byte{0x75, 0x00, 0x00}}, {1, [3]byte{0x76, 0x00, 0x00}},
{1, [3]byte{0x77, 0x00, 0x00}}, {1, [3]byte{0x78, 0x00, 0x00}},
{1, [3]byte{0x79, 0x00, 0x00}}, {1, [3]byte{0x7a, 0x00, 0x00}},
{2, [3]byte{0xc2, 0xa1, 0x00}}, {2, [3]byte{0xc2, 0xbf, 0x00}},
{2, [3]byte{0xc3, 0x90, 0x00}}, {2, [3]byte{0xc3, 0x9d, 0x00}},
{2, [3]byte{0xc3, 0x9e, 0x00}}, {2, [3]byte{0xc2, 0xae, 0x00}},
{1, [3]byte{0x5e, 0x00, 0x00}}, {2, [3]byte{0xc2, 0xa3, 0x00}},
{2, [3]byte{0xc2, 0xa5, 0x00}}, {2, [3]byte{0xc2, 0xb7, 0x00}},
{2, [3]byte{0xc2, 0xa9, 0x00}}, {2, [3]byte{0xc2, 0xa7, 0x00}},
{2, [3]byte{0xc2, 0xb6, 0x00}}, {2, [3]byte{0xc2, 0xbc, 0x00}},
{2, [3]byte{0xc2, 0xbd, 0x00}}, {2, [3]byte{0xc2, 0xbe, 0x00}},
{1, [3]byte{0x5b, 0x00, 0x00}}, {1, [3]byte{0x5d, 0x00, 0x00}},
{2, [3]byte{0xc2, 0xaf, 0x00}}, {2, [3]byte{0xc2, 0xa8, 0x00}},
{2, [3]byte{0xc2, 0xb4, 0x00}}, {2, [3]byte{0xc3, 0x97, 0x00}},
{1, [3]byte{0x7b, 0x00, 0x00}}, {1, [3]byte{0x41, 0x00, 0x00}},
{1, [3]byte{0x42, 0x00, 0x00}}, {1, [3]byte{0x43, 0x00, 0x00}},
{1, [3]byte{0x44, 0x00, 0x00}}, {1, [3]byte{0x45, 0x00, 0x00}},
{1, [3]byte{0x46, 0x00, 0x00}}, {1, [3]byte{0x47, 0x00, 0x00}},
{1, [3]byte{0x48, 0x00, 0x00}}, {1, [3]byte{0x49, 0x00, 0x00}},
{2, [3]byte{0xc2, 0xad, 0x00}}, {2, [3]byte{0xc3, 0xb4, 0x00}},
{2, [3]byte{0xc3, 0xb6, 0x00}}, {2, [3]byte{0xc3, 0xb2, 0x00}},
{2, [3]byte{0xc3, 0xb3, 0x00}}, {2, [3]byte{0xc3, 0xb5, 0x00}},
{1, [3]byte{0x7d, 0x00, 0x00}}, {1, [3]byte{0x4a, 0x00, 0x00}},
{1, [3]byte{0x4b, 0x00, 0x00}}, {1, [3]byte{0x4c, 0x00, 0x00}},
{1, [3]byte{0x4d, 0x00, 0x00}}, {1, [3]byte{0x4e, 0x00, 0x00}},
{1, [3]byte{0x4f, 0x00, 0x00}}, {1, [3]byte{0x50, 0x00, 0x00}},
{1, [3]byte{0x51, 0x00, 0x00}}, {1, [3]byte{0x52, 0x00, 0x00}},
{2, [3]byte{0xc2, 0xb9, 0x00}}, {2, [3]byte{0xc3, 0xbb, 0x00}},
{2, [3]byte{0xc3, 0xbc, 0x00}}, {2, [3]byte{0xc3, 0xb9, 0x00}},
{2, [3]byte{0xc3, 0xba, 0x00}}, {2, [3]byte{0xc3, 0xbf, 0x00}},
{1, [3]byte{0x5c, 0x00, 0x00}}, {2, [3]byte{0xc3, 0xb7, 0x00}},
{1, [3]byte{0x53, 0x00, 0x00}}, {1, [3]byte{0x54, 0x00, 0x00}},
{1, [3]byte{0x55, 0x00, 0x00}}, {1, [3]byte{0x56, 0x00, 0x00}},
{1, [3]byte{0x57, 0x00, 0x00}}, {1, [3]byte{0x58, 0x00, 0x00}},
{1, [3]byte{0x59, 0x00, 0x00}}, {1, [3]byte{0x5a, 0x00, 0x00}},
{2, [3]byte{0xc2, 0xb2, 0x00}}, {2, [3]byte{0xc3, 0x94, 0x00}},
{2, [3]byte{0xc3, 0x96, 0x00}}, {2, [3]byte{0xc3, 0x92, 0x00}},
{2, [3]byte{0xc3, 0x93, 0x00}}, {2, [3]byte{0xc3, 0x95, 0x00}},
{1, [3]byte{0x30, 0x00, 0x00}}, {1, [3]byte{0x31, 0x00, 0x00}},
{1, [3]byte{0x32, 0x00, 0x00}}, {1, [3]byte{0x33, 0x00, 0x00}},
{1, [3]byte{0x34, 0x00, 0x00}}, {1, [3]byte{0x35, 0x00, 0x00}},
{1, [3]byte{0x36, 0x00, 0x00}}, {1, [3]byte{0x37, 0x00, 0x00}},
{1, [3]byte{0x38, 0x00, 0x00}}, {1, [3]byte{0x39, 0x00, 0x00}},
{2, [3]byte{0xc2, 0xb3, 0x00}}, {2, [3]byte{0xc3, 0x9b, 0x00}},
{2, [3]byte{0xc3, 0x9c, 0x00}}, {2, [3]byte{0xc3, 0x99, 0x00}},
{2, [3]byte{0xc3, 0x9a, 0x00}}, {2, [3]byte{0xc2, 0x9f, 0x00}},
},
encode: [256]uint32{
0x00000000, 0x01000001, 0x02000002, 0x03000003, 0x37000004, 0x2d000005, 0x2e000006, 0x2f000007,
0x16000008, 0x05000009, 0x2500000a, 0x0b00000b, 0x0c00000c, 0x0d00000d, 0x0e00000e, 0x0f00000f,
0x10000010, 0x11000011, 0x12000012, 0x13000013, 0x3c000014, 0x3d000015, 0x32000016, 0x26000017,
0x18000018, 0x19000019, 0x3f00001a, 0x2700001b, 0x1c00001c, 0x1d00001d, 0x1e00001e, 0x1f00001f,
0x40000020, 0x5a000021, 0x7f000022, 0x7b000023, 0x5b000024, 0x6c000025, 0x50000026, 0x7d000027,
0x4d000028, 0x5d000029, 0x5c00002a, 0x4e00002b, 0x6b00002c, 0x6000002d, 0x4b00002e, 0x6100002f,
0xf0000030, 0xf1000031, 0xf2000032, 0xf3000033, 0xf4000034, 0xf5000035, 0xf6000036, 0xf7000037,
0xf8000038, 0xf9000039, 0x7a00003a, 0x5e00003b, 0x4c00003c, 0x7e00003d, 0x6e00003e, 0x6f00003f,
0x7c000040, 0xc1000041, 0xc2000042, 0xc3000043, 0xc4000044, 0xc5000045, 0xc6000046, 0xc7000047,
0xc8000048, 0xc9000049, 0xd100004a, 0xd200004b, 0xd300004c, 0xd400004d, 0xd500004e, 0xd600004f,
0xd7000050, 0xd8000051, 0xd9000052, 0xe2000053, 0xe3000054, 0xe4000055, 0xe5000056, 0xe6000057,
0xe7000058, 0xe8000059, 0xe900005a, 0xba00005b, 0xe000005c, 0xbb00005d, 0xb000005e, 0x6d00005f,
0x79000060, 0x81000061, 0x82000062, 0x83000063, 0x84000064, 0x85000065, 0x86000066, 0x87000067,
0x88000068, 0x89000069, 0x9100006a, 0x9200006b, 0x9300006c, 0x9400006d, 0x9500006e, 0x9600006f,
0x97000070, 0x98000071, 0x99000072, 0xa2000073, 0xa3000074, 0xa4000075, 0xa5000076, 0xa6000077,
0xa7000078, 0xa8000079, 0xa900007a, 0xc000007b, 0x4f00007c, 0xd000007d, 0xa100007e, 0x0700007f,
0x20000080, 0x21000081, 0x22000082, 0x23000083, 0x24000084, 0x15000085, 0x06000086, 0x17000087,
0x28000088, 0x29000089, 0x2a00008a, 0x2b00008b, 0x2c00008c, 0x0900008d, 0x0a00008e, 0x1b00008f,
0x30000090, 0x31000091, 0x1a000092, 0x33000093, 0x34000094, 0x35000095, 0x36000096, 0x08000097,
0x38000098, 0x39000099, 0x3a00009a, 0x3b00009b, 0x0400009c, 0x1400009d, 0x3e00009e, 0xff00009f,
0x410000a0, 0xaa0000a1, 0x4a0000a2, 0xb10000a3, 0x9f0000a4, 0xb20000a5, 0x6a0000a6, 0xb50000a7,
0xbd0000a8, 0xb40000a9, 0x9a0000aa, 0x8a0000ab, 0x5f0000ac, 0xca0000ad, 0xaf0000ae, 0xbc0000af,
0x900000b0, 0x8f0000b1, 0xea0000b2, 0xfa0000b3, 0xbe0000b4, 0xa00000b5, 0xb60000b6, 0xb30000b7,
0x9d0000b8, 0xda0000b9, 0x9b0000ba, 0x8b0000bb, 0xb70000bc, 0xb80000bd, 0xb90000be, 0xab0000bf,
0x640000c0, 0x650000c1, 0x620000c2, 0x660000c3, 0x630000c4, 0x670000c5, 0x9e0000c6, 0x680000c7,
0x740000c8, 0x710000c9, 0x720000ca, 0x730000cb, 0x780000cc, 0x750000cd, 0x760000ce, 0x770000cf,
0xac0000d0, 0x690000d1, 0xed0000d2, 0xee0000d3, 0xeb0000d4, 0xef0000d5, 0xec0000d6, 0xbf0000d7,
0x800000d8, 0xfd0000d9, 0xfe0000da, 0xfb0000db, 0xfc0000dc, 0xad0000dd, 0xae0000de, 0x590000df,
0x440000e0, 0x450000e1, 0x420000e2, 0x460000e3, 0x430000e4, 0x470000e5, 0x9c0000e6, 0x480000e7,
0x540000e8, 0x510000e9, 0x520000ea, 0x530000eb, 0x580000ec, 0x550000ed, 0x560000ee, 0x570000ef,
0x8c0000f0, 0x490000f1, 0xcd0000f2, 0xce0000f3, 0xcb0000f4, 0xcf0000f5, 0xcc0000f6, 0xe10000f7,
0x700000f8, 0xdd0000f9, 0xde0000fa, 0xdb0000fb, 0xdc0000fc, 0x8d0000fd, 0x8e0000fe, 0xdf0000ff,
},
}
// CodePage437 is the IBM Code Page 437 encoding.
var CodePage437 *Charmap = &codePage437
var CodePage437 encoding.Encoding = &codePage437
var codePage437 = Charmap{
var codePage437 = charmap{
name: "IBM Code Page 437",
mib: identifier.PC8CodePage437,
asciiSuperset: true,
@ -358,9 +183,9 @@ var codePage437 = Charmap{
}
// CodePage850 is the IBM Code Page 850 encoding.
var CodePage850 *Charmap = &codePage850
var CodePage850 encoding.Encoding = &codePage850
var codePage850 = Charmap{
var codePage850 = charmap{
name: "IBM Code Page 850",
mib: identifier.PC850Multilingual,
asciiSuperset: true,
@ -533,9 +358,9 @@ var codePage850 = Charmap{
}
// CodePage852 is the IBM Code Page 852 encoding.
var CodePage852 *Charmap = &codePage852
var CodePage852 encoding.Encoding = &codePage852
var codePage852 = Charmap{
var codePage852 = charmap{
name: "IBM Code Page 852",
mib: identifier.PCp852,
asciiSuperset: true,
@ -708,9 +533,9 @@ var codePage852 = Charmap{
}
// CodePage855 is the IBM Code Page 855 encoding.
var CodePage855 *Charmap = &codePage855
var CodePage855 encoding.Encoding = &codePage855
var codePage855 = Charmap{
var codePage855 = charmap{
name: "IBM Code Page 855",
mib: identifier.IBM855,
asciiSuperset: true,
@ -883,9 +708,9 @@ var codePage855 = Charmap{
}
// CodePage858 is the Windows Code Page 858 encoding.
var CodePage858 *Charmap = &codePage858
var CodePage858 encoding.Encoding = &codePage858
var codePage858 = Charmap{
var codePage858 = charmap{
name: "Windows Code Page 858",
mib: identifier.IBM00858,
asciiSuperset: true,
@ -1058,9 +883,9 @@ var codePage858 = Charmap{
}
// CodePage860 is the IBM Code Page 860 encoding.
var CodePage860 *Charmap = &codePage860
var CodePage860 encoding.Encoding = &codePage860
var codePage860 = Charmap{
var codePage860 = charmap{
name: "IBM Code Page 860",
mib: identifier.IBM860,
asciiSuperset: true,
@ -1233,9 +1058,9 @@ var codePage860 = Charmap{
}
// CodePage862 is the IBM Code Page 862 encoding.
var CodePage862 *Charmap = &codePage862
var CodePage862 encoding.Encoding = &codePage862
var codePage862 = Charmap{
var codePage862 = charmap{
name: "IBM Code Page 862",
mib: identifier.PC862LatinHebrew,
asciiSuperset: true,
@ -1408,9 +1233,9 @@ var codePage862 = Charmap{
}
// CodePage863 is the IBM Code Page 863 encoding.
var CodePage863 *Charmap = &codePage863
var CodePage863 encoding.Encoding = &codePage863
var codePage863 = Charmap{
var codePage863 = charmap{
name: "IBM Code Page 863",
mib: identifier.IBM863,
asciiSuperset: true,
@ -1583,9 +1408,9 @@ var codePage863 = Charmap{
}
// CodePage865 is the IBM Code Page 865 encoding.
var CodePage865 *Charmap = &codePage865
var CodePage865 encoding.Encoding = &codePage865
var codePage865 = Charmap{
var codePage865 = charmap{
name: "IBM Code Page 865",
mib: identifier.IBM865,
asciiSuperset: true,
@ -1758,9 +1583,9 @@ var codePage865 = Charmap{
}
// CodePage866 is the IBM Code Page 866 encoding.
var CodePage866 *Charmap = &codePage866
var CodePage866 encoding.Encoding = &codePage866
var codePage866 = Charmap{
var codePage866 = charmap{
name: "IBM Code Page 866",
mib: identifier.IBM866,
asciiSuperset: true,
@ -1933,9 +1758,9 @@ var codePage866 = Charmap{
}
// CodePage1047 is the IBM Code Page 1047 encoding.
var CodePage1047 *Charmap = &codePage1047
var CodePage1047 encoding.Encoding = &codePage1047
var codePage1047 = Charmap{
var codePage1047 = charmap{
name: "IBM Code Page 1047",
mib: identifier.IBM1047,
asciiSuperset: false,
@ -2108,9 +1933,9 @@ var codePage1047 = Charmap{
}
// CodePage1140 is the IBM Code Page 1140 encoding.
var CodePage1140 *Charmap = &codePage1140
var CodePage1140 encoding.Encoding = &codePage1140
var codePage1140 = Charmap{
var codePage1140 = charmap{
name: "IBM Code Page 1140",
mib: identifier.IBM01140,
asciiSuperset: false,
@ -2283,9 +2108,9 @@ var codePage1140 = Charmap{
}
// ISO8859_1 is the ISO 8859-1 encoding.
var ISO8859_1 *Charmap = &iso8859_1
var ISO8859_1 encoding.Encoding = &iso8859_1
var iso8859_1 = Charmap{
var iso8859_1 = charmap{
name: "ISO 8859-1",
mib: identifier.ISOLatin1,
asciiSuperset: true,
@ -2458,9 +2283,9 @@ var iso8859_1 = Charmap{
}
// ISO8859_2 is the ISO 8859-2 encoding.
var ISO8859_2 *Charmap = &iso8859_2
var ISO8859_2 encoding.Encoding = &iso8859_2
var iso8859_2 = Charmap{
var iso8859_2 = charmap{
name: "ISO 8859-2",
mib: identifier.ISOLatin2,
asciiSuperset: true,
@ -2633,9 +2458,9 @@ var iso8859_2 = Charmap{
}
// ISO8859_3 is the ISO 8859-3 encoding.
var ISO8859_3 *Charmap = &iso8859_3
var ISO8859_3 encoding.Encoding = &iso8859_3
var iso8859_3 = Charmap{
var iso8859_3 = charmap{
name: "ISO 8859-3",
mib: identifier.ISOLatin3,
asciiSuperset: true,
@ -2808,9 +2633,9 @@ var iso8859_3 = Charmap{
}
// ISO8859_4 is the ISO 8859-4 encoding.
var ISO8859_4 *Charmap = &iso8859_4
var ISO8859_4 encoding.Encoding = &iso8859_4
var iso8859_4 = Charmap{
var iso8859_4 = charmap{
name: "ISO 8859-4",
mib: identifier.ISOLatin4,
asciiSuperset: true,
@ -2983,9 +2808,9 @@ var iso8859_4 = Charmap{
}
// ISO8859_5 is the ISO 8859-5 encoding.
var ISO8859_5 *Charmap = &iso8859_5
var ISO8859_5 encoding.Encoding = &iso8859_5
var iso8859_5 = Charmap{
var iso8859_5 = charmap{
name: "ISO 8859-5",
mib: identifier.ISOLatinCyrillic,
asciiSuperset: true,
@ -3158,9 +2983,9 @@ var iso8859_5 = Charmap{
}
// ISO8859_6 is the ISO 8859-6 encoding.
var ISO8859_6 *Charmap = &iso8859_6
var ISO8859_6 encoding.Encoding = &iso8859_6
var iso8859_6 = Charmap{
var iso8859_6 = charmap{
name: "ISO 8859-6",
mib: identifier.ISOLatinArabic,
asciiSuperset: true,
@ -3333,9 +3158,9 @@ var iso8859_6 = Charmap{
}
// ISO8859_7 is the ISO 8859-7 encoding.
var ISO8859_7 *Charmap = &iso8859_7
var ISO8859_7 encoding.Encoding = &iso8859_7
var iso8859_7 = Charmap{
var iso8859_7 = charmap{
name: "ISO 8859-7",
mib: identifier.ISOLatinGreek,
asciiSuperset: true,
@ -3508,9 +3333,9 @@ var iso8859_7 = Charmap{
}
// ISO8859_8 is the ISO 8859-8 encoding.
var ISO8859_8 *Charmap = &iso8859_8
var ISO8859_8 encoding.Encoding = &iso8859_8
var iso8859_8 = Charmap{
var iso8859_8 = charmap{
name: "ISO 8859-8",
mib: identifier.ISOLatinHebrew,
asciiSuperset: true,
@ -3683,9 +3508,9 @@ var iso8859_8 = Charmap{
}
// ISO8859_9 is the ISO 8859-9 encoding.
var ISO8859_9 *Charmap = &iso8859_9
var ISO8859_9 encoding.Encoding = &iso8859_9
var iso8859_9 = Charmap{
var iso8859_9 = charmap{
name: "ISO 8859-9",
mib: identifier.ISOLatin5,
asciiSuperset: true,
@ -3858,9 +3683,9 @@ var iso8859_9 = Charmap{
}
// ISO8859_10 is the ISO 8859-10 encoding.
var ISO8859_10 *Charmap = &iso8859_10
var ISO8859_10 encoding.Encoding = &iso8859_10
var iso8859_10 = Charmap{
var iso8859_10 = charmap{
name: "ISO 8859-10",
mib: identifier.ISOLatin6,
asciiSuperset: true,
@ -4033,9 +3858,9 @@ var iso8859_10 = Charmap{
}
// ISO8859_13 is the ISO 8859-13 encoding.
var ISO8859_13 *Charmap = &iso8859_13
var ISO8859_13 encoding.Encoding = &iso8859_13
var iso8859_13 = Charmap{
var iso8859_13 = charmap{
name: "ISO 8859-13",
mib: identifier.ISO885913,
asciiSuperset: true,
@ -4208,9 +4033,9 @@ var iso8859_13 = Charmap{
}
// ISO8859_14 is the ISO 8859-14 encoding.
var ISO8859_14 *Charmap = &iso8859_14
var ISO8859_14 encoding.Encoding = &iso8859_14
var iso8859_14 = Charmap{
var iso8859_14 = charmap{
name: "ISO 8859-14",
mib: identifier.ISO885914,
asciiSuperset: true,
@ -4383,9 +4208,9 @@ var iso8859_14 = Charmap{
}
// ISO8859_15 is the ISO 8859-15 encoding.
var ISO8859_15 *Charmap = &iso8859_15
var ISO8859_15 encoding.Encoding = &iso8859_15
var iso8859_15 = Charmap{
var iso8859_15 = charmap{
name: "ISO 8859-15",
mib: identifier.ISO885915,
asciiSuperset: true,
@ -4558,9 +4383,9 @@ var iso8859_15 = Charmap{
}
// ISO8859_16 is the ISO 8859-16 encoding.
var ISO8859_16 *Charmap = &iso8859_16
var ISO8859_16 encoding.Encoding = &iso8859_16
var iso8859_16 = Charmap{
var iso8859_16 = charmap{
name: "ISO 8859-16",
mib: identifier.ISO885916,
asciiSuperset: true,
@ -4733,9 +4558,9 @@ var iso8859_16 = Charmap{
}
// KOI8R is the KOI8-R encoding.
var KOI8R *Charmap = &koi8R
var KOI8R encoding.Encoding = &koi8R
var koi8R = Charmap{
var koi8R = charmap{
name: "KOI8-R",
mib: identifier.KOI8R,
asciiSuperset: true,
@ -4908,9 +4733,9 @@ var koi8R = Charmap{
}
// KOI8U is the KOI8-U encoding.
var KOI8U *Charmap = &koi8U
var KOI8U encoding.Encoding = &koi8U
var koi8U = Charmap{
var koi8U = charmap{
name: "KOI8-U",
mib: identifier.KOI8U,
asciiSuperset: true,
@ -5083,9 +4908,9 @@ var koi8U = Charmap{
}
// Macintosh is the Macintosh encoding.
var Macintosh *Charmap = &macintosh
var Macintosh encoding.Encoding = &macintosh
var macintosh = Charmap{
var macintosh = charmap{
name: "Macintosh",
mib: identifier.Macintosh,
asciiSuperset: true,
@ -5258,9 +5083,9 @@ var macintosh = Charmap{
}
// MacintoshCyrillic is the Macintosh Cyrillic encoding.
var MacintoshCyrillic *Charmap = &macintoshCyrillic
var MacintoshCyrillic encoding.Encoding = &macintoshCyrillic
var macintoshCyrillic = Charmap{
var macintoshCyrillic = charmap{
name: "Macintosh Cyrillic",
mib: identifier.MacintoshCyrillic,
asciiSuperset: true,
@ -5433,9 +5258,9 @@ var macintoshCyrillic = Charmap{
}
// Windows874 is the Windows 874 encoding.
var Windows874 *Charmap = &windows874
var Windows874 encoding.Encoding = &windows874
var windows874 = Charmap{
var windows874 = charmap{
name: "Windows 874",
mib: identifier.Windows874,
asciiSuperset: true,
@ -5608,9 +5433,9 @@ var windows874 = Charmap{
}
// Windows1250 is the Windows 1250 encoding.
var Windows1250 *Charmap = &windows1250
var Windows1250 encoding.Encoding = &windows1250
var windows1250 = Charmap{
var windows1250 = charmap{
name: "Windows 1250",
mib: identifier.Windows1250,
asciiSuperset: true,
@ -5783,9 +5608,9 @@ var windows1250 = Charmap{
}
// Windows1251 is the Windows 1251 encoding.
var Windows1251 *Charmap = &windows1251
var Windows1251 encoding.Encoding = &windows1251
var windows1251 = Charmap{
var windows1251 = charmap{
name: "Windows 1251",
mib: identifier.Windows1251,
asciiSuperset: true,
@ -5958,9 +5783,9 @@ var windows1251 = Charmap{
}
// Windows1252 is the Windows 1252 encoding.
var Windows1252 *Charmap = &windows1252
var Windows1252 encoding.Encoding = &windows1252
var windows1252 = Charmap{
var windows1252 = charmap{
name: "Windows 1252",
mib: identifier.Windows1252,
asciiSuperset: true,
@ -6133,9 +5958,9 @@ var windows1252 = Charmap{
}
// Windows1253 is the Windows 1253 encoding.
var Windows1253 *Charmap = &windows1253
var Windows1253 encoding.Encoding = &windows1253
var windows1253 = Charmap{
var windows1253 = charmap{
name: "Windows 1253",
mib: identifier.Windows1253,
asciiSuperset: true,
@ -6308,9 +6133,9 @@ var windows1253 = Charmap{
}
// Windows1254 is the Windows 1254 encoding.
var Windows1254 *Charmap = &windows1254
var Windows1254 encoding.Encoding = &windows1254
var windows1254 = Charmap{
var windows1254 = charmap{
name: "Windows 1254",
mib: identifier.Windows1254,
asciiSuperset: true,
@ -6483,9 +6308,9 @@ var windows1254 = Charmap{
}
// Windows1255 is the Windows 1255 encoding.
var Windows1255 *Charmap = &windows1255
var Windows1255 encoding.Encoding = &windows1255
var windows1255 = Charmap{
var windows1255 = charmap{
name: "Windows 1255",
mib: identifier.Windows1255,
asciiSuperset: true,
@ -6593,7 +6418,7 @@ var windows1255 = Charmap{
{2, [3]byte{0xd6, 0xb4, 0x00}}, {2, [3]byte{0xd6, 0xb5, 0x00}},
{2, [3]byte{0xd6, 0xb6, 0x00}}, {2, [3]byte{0xd6, 0xb7, 0x00}},
{2, [3]byte{0xd6, 0xb8, 0x00}}, {2, [3]byte{0xd6, 0xb9, 0x00}},
{2, [3]byte{0xd6, 0xba, 0x00}}, {2, [3]byte{0xd6, 0xbb, 0x00}},
{3, [3]byte{0xef, 0xbf, 0xbd}}, {2, [3]byte{0xd6, 0xbb, 0x00}},
{2, [3]byte{0xd6, 0xbc, 0x00}}, {2, [3]byte{0xd6, 0xbd, 0x00}},
{2, [3]byte{0xd6, 0xbe, 0x00}}, {2, [3]byte{0xd6, 0xbf, 0x00}},
{2, [3]byte{0xd7, 0x80, 0x00}}, {2, [3]byte{0xd7, 0x81, 0x00}},
@ -6643,24 +6468,24 @@ var windows1255 = Charmap{
0xb20000b2, 0xb30000b3, 0xb40000b4, 0xb50000b5, 0xb60000b6, 0xb70000b7, 0xb80000b8, 0xb90000b9,
0xbb0000bb, 0xbc0000bc, 0xbd0000bd, 0xbe0000be, 0xbf0000bf, 0xaa0000d7, 0xba0000f7, 0x83000192,
0x880002c6, 0x980002dc, 0xc00005b0, 0xc10005b1, 0xc20005b2, 0xc30005b3, 0xc40005b4, 0xc50005b5,
0xc60005b6, 0xc70005b7, 0xc80005b8, 0xc90005b9, 0xca0005ba, 0xcb0005bb, 0xcc0005bc, 0xcd0005bd,
0xce0005be, 0xcf0005bf, 0xd00005c0, 0xd10005c1, 0xd20005c2, 0xd30005c3, 0xe00005d0, 0xe10005d1,
0xe20005d2, 0xe30005d3, 0xe40005d4, 0xe50005d5, 0xe60005d6, 0xe70005d7, 0xe80005d8, 0xe90005d9,
0xea0005da, 0xeb0005db, 0xec0005dc, 0xed0005dd, 0xee0005de, 0xef0005df, 0xf00005e0, 0xf10005e1,
0xf20005e2, 0xf30005e3, 0xf40005e4, 0xf50005e5, 0xf60005e6, 0xf70005e7, 0xf80005e8, 0xf90005e9,
0xfa0005ea, 0xd40005f0, 0xd50005f1, 0xd60005f2, 0xd70005f3, 0xd80005f4, 0xfd00200e, 0xfe00200f,
0x96002013, 0x97002014, 0x91002018, 0x92002019, 0x8200201a, 0x9300201c, 0x9400201d, 0x8400201e,
0x86002020, 0x87002021, 0x95002022, 0x85002026, 0x89002030, 0x8b002039, 0x9b00203a, 0xa40020aa,
0x800020ac, 0x99002122, 0x99002122, 0x99002122, 0x99002122, 0x99002122, 0x99002122, 0x99002122,
0xc60005b6, 0xc70005b7, 0xc80005b8, 0xc90005b9, 0xcb0005bb, 0xcc0005bc, 0xcd0005bd, 0xce0005be,
0xcf0005bf, 0xd00005c0, 0xd10005c1, 0xd20005c2, 0xd30005c3, 0xe00005d0, 0xe10005d1, 0xe20005d2,
0xe30005d3, 0xe40005d4, 0xe50005d5, 0xe60005d6, 0xe70005d7, 0xe80005d8, 0xe90005d9, 0xea0005da,
0xeb0005db, 0xec0005dc, 0xed0005dd, 0xee0005de, 0xef0005df, 0xf00005e0, 0xf10005e1, 0xf20005e2,
0xf30005e3, 0xf40005e4, 0xf50005e5, 0xf60005e6, 0xf70005e7, 0xf80005e8, 0xf90005e9, 0xfa0005ea,
0xd40005f0, 0xd50005f1, 0xd60005f2, 0xd70005f3, 0xd80005f4, 0xfd00200e, 0xfe00200f, 0x96002013,
0x97002014, 0x91002018, 0x92002019, 0x8200201a, 0x9300201c, 0x9400201d, 0x8400201e, 0x86002020,
0x87002021, 0x95002022, 0x85002026, 0x89002030, 0x8b002039, 0x9b00203a, 0xa40020aa, 0x800020ac,
0x99002122, 0x99002122, 0x99002122, 0x99002122, 0x99002122, 0x99002122, 0x99002122, 0x99002122,
0x99002122, 0x99002122, 0x99002122, 0x99002122, 0x99002122, 0x99002122, 0x99002122, 0x99002122,
0x99002122, 0x99002122, 0x99002122, 0x99002122, 0x99002122, 0x99002122, 0x99002122, 0x99002122,
},
}
// Windows1256 is the Windows 1256 encoding.
var Windows1256 *Charmap = &windows1256
var Windows1256 encoding.Encoding = &windows1256
var windows1256 = Charmap{
var windows1256 = charmap{
name: "Windows 1256",
mib: identifier.Windows1256,
asciiSuperset: true,
@ -6833,9 +6658,9 @@ var windows1256 = Charmap{
}
// Windows1257 is the Windows 1257 encoding.
var Windows1257 *Charmap = &windows1257
var Windows1257 encoding.Encoding = &windows1257
var windows1257 = Charmap{
var windows1257 = charmap{
name: "Windows 1257",
mib: identifier.Windows1257,
asciiSuperset: true,
@ -7008,9 +6833,9 @@ var windows1257 = Charmap{
}
// Windows1258 is the Windows 1258 encoding.
var Windows1258 *Charmap = &windows1258
var Windows1258 encoding.Encoding = &windows1258
var windows1258 = Charmap{
var windows1258 = charmap{
name: "Windows 1258",
mib: identifier.Windows1258,
asciiSuperset: true,
@ -7185,9 +7010,9 @@ var windows1258 = Charmap{
// XUserDefined is the X-User-Defined encoding.
//
// It is defined at http://encoding.spec.whatwg.org/#x-user-defined
var XUserDefined *Charmap = &xUserDefined
var XUserDefined encoding.Encoding = &xUserDefined
var xUserDefined = Charmap{
var xUserDefined = charmap{
name: "X-User-Defined",
mib: identifier.XUserDefined,
asciiSuperset: true,
@ -7359,7 +7184,6 @@ var xUserDefined = Charmap{
},
}
var listAll = []encoding.Encoding{
CodePage037,
CodePage437,
CodePage850,
CodePage852,
@ -7407,4 +7231,4 @@ var listAll = []encoding.Encoding{
XUserDefined,
}
// Total table size 87024 bytes (84KiB); checksum: 811C9DC5
// Total table size 84952 bytes (82KiB); checksum: 811C9DC5

View File

@ -52,7 +52,7 @@ type Decoder struct {
}
// Bytes converts the given encoded bytes to UTF-8. It returns the converted
// bytes or 0, err if any error occurred.
// bytes or nil, err if any error occurred.
func (d *Decoder) Bytes(b []byte) ([]byte, error) {
b, _, err := transform.Bytes(d, b)
if err != nil {
@ -62,7 +62,7 @@ func (d *Decoder) Bytes(b []byte) ([]byte, error) {
}
// String converts the given encoded string to UTF-8. It returns the converted
// string or 0, err if any error occurred.
// string or "", err if any error occurred.
func (d *Decoder) String(s string) (string, error) {
s, _, err := transform.String(d, s)
if err != nil {
@ -95,7 +95,7 @@ type Encoder struct {
_ struct{}
}
// Bytes converts bytes from UTF-8. It returns the converted bytes or 0, err if
// Bytes converts bytes from UTF-8. It returns the converted bytes or nil, err if
// any error occurred.
func (e *Encoder) Bytes(b []byte) ([]byte, error) {
b, _, err := transform.Bytes(e, b)
@ -106,7 +106,7 @@ func (e *Encoder) Bytes(b []byte) ([]byte, error) {
}
// String converts a string from UTF-8. It returns the converted string or
// 0, err if any error occurred.
// "", err if any error occurred.
func (e *Encoder) String(s string) (string, error) {
s, _, err := transform.String(e, s)
if err != nil {

View File

@ -1,137 +0,0 @@
// Copyright 2015 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
// +build ignore
package main
import (
"bytes"
"encoding/xml"
"fmt"
"io"
"log"
"strings"
"golang.org/x/text/internal/gen"
)
type registry struct {
XMLName xml.Name `xml:"registry"`
Updated string `xml:"updated"`
Registry []struct {
ID string `xml:"id,attr"`
Record []struct {
Name string `xml:"name"`
Xref []struct {
Type string `xml:"type,attr"`
Data string `xml:"data,attr"`
} `xml:"xref"`
Desc struct {
Data string `xml:",innerxml"`
// Any []struct {
// Data string `xml:",chardata"`
// } `xml:",any"`
// Data string `xml:",chardata"`
} `xml:"description,"`
MIB string `xml:"value"`
Alias []string `xml:"alias"`
MIME string `xml:"preferred_alias"`
} `xml:"record"`
} `xml:"registry"`
}
func main() {
r := gen.OpenIANAFile("assignments/character-sets/character-sets.xml")
reg := &registry{}
if err := xml.NewDecoder(r).Decode(&reg); err != nil && err != io.EOF {
log.Fatalf("Error decoding charset registry: %v", err)
}
if len(reg.Registry) == 0 || reg.Registry[0].ID != "character-sets-1" {
log.Fatalf("Unexpected ID %s", reg.Registry[0].ID)
}
w := &bytes.Buffer{}
fmt.Fprintf(w, "const (\n")
for _, rec := range reg.Registry[0].Record {
constName := ""
for _, a := range rec.Alias {
if strings.HasPrefix(a, "cs") && strings.IndexByte(a, '-') == -1 {
// Some of the constant definitions have comments in them. Strip those.
constName = strings.Title(strings.SplitN(a[2:], "\n", 2)[0])
}
}
if constName == "" {
switch rec.MIB {
case "2085":
constName = "HZGB2312" // Not listed as alias for some reason.
default:
log.Fatalf("No cs alias defined for %s.", rec.MIB)
}
}
if rec.MIME != "" {
rec.MIME = fmt.Sprintf(" (MIME: %s)", rec.MIME)
}
fmt.Fprintf(w, "// %s is the MIB identifier with IANA name %s%s.\n//\n", constName, rec.Name, rec.MIME)
if len(rec.Desc.Data) > 0 {
fmt.Fprint(w, "// ")
d := xml.NewDecoder(strings.NewReader(rec.Desc.Data))
inElem := true
attr := ""
for {
t, err := d.Token()
if err != nil {
if err != io.EOF {
log.Fatal(err)
}
break
}
switch x := t.(type) {
case xml.CharData:
attr = "" // Don't need attribute info.
a := bytes.Split([]byte(x), []byte("\n"))
for i, b := range a {
if b = bytes.TrimSpace(b); len(b) != 0 {
if !inElem && i > 0 {
fmt.Fprint(w, "\n// ")
}
inElem = false
fmt.Fprintf(w, "%s ", string(b))
}
}
case xml.StartElement:
if x.Name.Local == "xref" {
inElem = true
use := false
for _, a := range x.Attr {
if a.Name.Local == "type" {
use = use || a.Value != "person"
}
if a.Name.Local == "data" && use {
attr = a.Value + " "
}
}
}
case xml.EndElement:
inElem = false
fmt.Fprint(w, attr)
}
}
fmt.Fprint(w, "\n")
}
for _, x := range rec.Xref {
switch x.Type {
case "rfc":
fmt.Fprintf(w, "// Reference: %s\n", strings.ToUpper(x.Data))
case "uri":
fmt.Fprintf(w, "// Reference: %s\n", x.Data)
}
}
fmt.Fprintf(w, "%s MIB = %s\n", constName, rec.MIB)
fmt.Fprintln(w)
}
fmt.Fprintln(w, ")")
gen.WriteGoFile("mib.go", "identifier", w.Bytes())
}

View File

@ -41,20 +41,35 @@ func If(s Set, tIn, tNotIn transform.Transformer) Transformer {
if tNotIn == nil {
tNotIn = transform.Nop
}
sIn, ok := tIn.(transform.SpanningTransformer)
if !ok {
sIn = dummySpan{tIn}
}
sNotIn, ok := tNotIn.(transform.SpanningTransformer)
if !ok {
sNotIn = dummySpan{tNotIn}
}
a := &cond{
tIn: tIn,
tNotIn: tNotIn,
tIn: sIn,
tNotIn: sNotIn,
f: s.Contains,
}
a.Reset()
return Transformer{a}
}
type dummySpan struct{ transform.Transformer }
func (d dummySpan) Span(src []byte, atEOF bool) (n int, err error) {
return 0, transform.ErrEndOfSpan
}
type cond struct {
tIn, tNotIn transform.Transformer
tIn, tNotIn transform.SpanningTransformer
f func(rune) bool
check func(rune) bool // current check to perform
t transform.Transformer // current transformer to use
check func(rune) bool // current check to perform
t transform.SpanningTransformer // current transformer to use
}
// Reset implements transform.Transformer.
@ -84,6 +99,51 @@ func (t *cond) isNot(r rune) bool {
return false
}
// This implementation of Span doesn't help all too much, but it needs to be
// there to satisfy this package's Transformer interface.
// TODO: there are certainly room for improvements, though. For example, if
// t.t == transform.Nop (which will a common occurrence) it will save a bundle
// to special-case that loop.
func (t *cond) Span(src []byte, atEOF bool) (n int, err error) {
p := 0
for n < len(src) && err == nil {
// Don't process too much at a time as the Spanner that will be
// called on this block may terminate early.
const maxChunk = 4096
max := len(src)
if v := n + maxChunk; v < max {
max = v
}
atEnd := false
size := 0
current := t.t
for ; p < max; p += size {
r := rune(src[p])
if r < utf8.RuneSelf {
size = 1
} else if r, size = utf8.DecodeRune(src[p:]); size == 1 {
if !atEOF && !utf8.FullRune(src[p:]) {
err = transform.ErrShortSrc
break
}
}
if !t.check(r) {
// The next rune will be the start of a new run.
atEnd = true
break
}
}
n2, err2 := current.Span(src[n:p], atEnd || (atEOF && p == len(src)))
n += n2
if err2 != nil {
return n, err2
}
// At this point either err != nil or t.check will pass for the rune at p.
p = n + size
}
return n, err
}
func (t *cond) Transform(dst, src []byte, atEOF bool) (nDst, nSrc int, err error) {
p := 0
for nSrc < len(src) && err == nil {
@ -99,9 +159,10 @@ func (t *cond) Transform(dst, src []byte, atEOF bool) (nDst, nSrc int, err error
size := 0
current := t.t
for ; p < max; p += size {
var r rune
r, size = utf8.DecodeRune(src[p:])
if r == utf8.RuneError && size == 1 {
r := rune(src[p])
if r < utf8.RuneSelf {
size = 1
} else if r, size = utf8.DecodeRune(src[p:]); size == 1 {
if !atEOF && !utf8.FullRune(src[p:]) {
err = transform.ErrShortSrc
break

View File

@ -46,9 +46,19 @@ func Predicate(f func(rune) bool) Set {
// Transformer implements the transform.Transformer interface.
type Transformer struct {
transform.Transformer
t transform.SpanningTransformer
}
func (t Transformer) Transform(dst, src []byte, atEOF bool) (nDst, nSrc int, err error) {
return t.t.Transform(dst, src, atEOF)
}
func (t Transformer) Span(b []byte, atEOF bool) (n int, err error) {
return t.t.Span(b, atEOF)
}
func (t Transformer) Reset() { t.t.Reset() }
// Bytes returns a new byte slice with the result of converting b using t. It
// calls Reset on t. It returns nil if any error was found. This can only happen
// if an error-producing Transformer is passed to If.
@ -96,39 +106,57 @@ type remove func(r rune) bool
func (remove) Reset() {}
// Span implements transform.Spanner.
func (t remove) Span(src []byte, atEOF bool) (n int, err error) {
for r, size := rune(0), 0; n < len(src); {
if r = rune(src[n]); r < utf8.RuneSelf {
size = 1
} else if r, size = utf8.DecodeRune(src[n:]); size == 1 {
// Invalid rune.
if !atEOF && !utf8.FullRune(src[n:]) {
err = transform.ErrShortSrc
} else {
err = transform.ErrEndOfSpan
}
break
}
if t(r) {
err = transform.ErrEndOfSpan
break
}
n += size
}
return
}
// Transform implements transform.Transformer.
func (t remove) Transform(dst, src []byte, atEOF bool) (nDst, nSrc int, err error) {
for r, size := rune(0), 0; nSrc < len(src); {
if r = rune(src[nSrc]); r < utf8.RuneSelf {
size = 1
} else {
r, size = utf8.DecodeRune(src[nSrc:])
if size == 1 {
// Invalid rune.
if !atEOF && !utf8.FullRune(src[nSrc:]) {
err = transform.ErrShortSrc
} else if r, size = utf8.DecodeRune(src[nSrc:]); size == 1 {
// Invalid rune.
if !atEOF && !utf8.FullRune(src[nSrc:]) {
err = transform.ErrShortSrc
break
}
// We replace illegal bytes with RuneError. Not doing so might
// otherwise turn a sequence of invalid UTF-8 into valid UTF-8.
// The resulting byte sequence may subsequently contain runes
// for which t(r) is true that were passed unnoticed.
if !t(utf8.RuneError) {
if nDst+3 > len(dst) {
err = transform.ErrShortDst
break
}
// We replace illegal bytes with RuneError. Not doing so might
// otherwise turn a sequence of invalid UTF-8 into valid UTF-8.
// The resulting byte sequence may subsequently contain runes
// for which t(r) is true that were passed unnoticed.
if !t(utf8.RuneError) {
if nDst+3 > len(dst) {
err = transform.ErrShortDst
break
}
dst[nDst+0] = runeErrorString[0]
dst[nDst+1] = runeErrorString[1]
dst[nDst+2] = runeErrorString[2]
nDst += 3
}
nSrc++
continue
dst[nDst+0] = runeErrorString[0]
dst[nDst+1] = runeErrorString[1]
dst[nDst+2] = runeErrorString[2]
nDst += 3
}
nSrc++
continue
}
if t(r) {
nSrc += size
continue
@ -157,6 +185,28 @@ type mapper func(rune) rune
func (mapper) Reset() {}
// Span implements transform.Spanner.
func (t mapper) Span(src []byte, atEOF bool) (n int, err error) {
for r, size := rune(0), 0; n < len(src); n += size {
if r = rune(src[n]); r < utf8.RuneSelf {
size = 1
} else if r, size = utf8.DecodeRune(src[n:]); size == 1 {
// Invalid rune.
if !atEOF && !utf8.FullRune(src[n:]) {
err = transform.ErrShortSrc
} else {
err = transform.ErrEndOfSpan
}
break
}
if t(r) != r {
err = transform.ErrEndOfSpan
break
}
}
return n, err
}
// Transform implements transform.Transformer.
func (t mapper) Transform(dst, src []byte, atEOF bool) (nDst, nSrc int, err error) {
var replacement rune
@ -230,24 +280,51 @@ func ReplaceIllFormed() Transformer {
type replaceIllFormed struct{ transform.NopResetter }
func (t replaceIllFormed) Span(src []byte, atEOF bool) (n int, err error) {
for n < len(src) {
// ASCII fast path.
if src[n] < utf8.RuneSelf {
n++
continue
}
r, size := utf8.DecodeRune(src[n:])
// Look for a valid non-ASCII rune.
if r != utf8.RuneError || size != 1 {
n += size
continue
}
// Look for short source data.
if !atEOF && !utf8.FullRune(src[n:]) {
err = transform.ErrShortSrc
break
}
// We have an invalid rune.
err = transform.ErrEndOfSpan
break
}
return n, err
}
func (t replaceIllFormed) Transform(dst, src []byte, atEOF bool) (nDst, nSrc int, err error) {
for nSrc < len(src) {
r, size := utf8.DecodeRune(src[nSrc:])
// Look for an ASCII rune.
if r < utf8.RuneSelf {
// ASCII fast path.
if r := src[nSrc]; r < utf8.RuneSelf {
if nDst == len(dst) {
err = transform.ErrShortDst
break
}
dst[nDst] = byte(r)
dst[nDst] = r
nDst++
nSrc++
continue
}
// Look for a valid non-ASCII rune.
if r != utf8.RuneError || size != 1 {
if _, size := utf8.DecodeRune(src[nSrc:]); size != 1 {
if size != copy(dst[nDst:], src[nSrc:nSrc+size]) {
err = transform.ErrShortDst
break

View File

@ -24,6 +24,10 @@ var (
// complete the transformation.
ErrShortSrc = errors.New("transform: short source buffer")
// ErrEndOfSpan means that the input and output (the transformed input)
// are not identical.
ErrEndOfSpan = errors.New("transform: input and output are not identical")
// errInconsistentByteCount means that Transform returned success (nil
// error) but also returned nSrc inconsistent with the src argument.
errInconsistentByteCount = errors.New("transform: inconsistent byte count returned")
@ -60,6 +64,41 @@ type Transformer interface {
Reset()
}
// SpanningTransformer extends the Transformer interface with a Span method
// that determines how much of the input already conforms to the Transformer.
type SpanningTransformer interface {
Transformer
// Span returns a position in src such that transforming src[:n] results in
// identical output src[:n] for these bytes. It does not necessarily return
// the largest such n. The atEOF argument tells whether src represents the
// last bytes of the input.
//
// Callers should always account for the n bytes consumed before
// considering the error err.
//
// A nil error means that all input bytes are known to be identical to the
// output produced by the Transformer. A nil error can be be returned
// regardless of whether atEOF is true. If err is nil, then then n must
// equal len(src); the converse is not necessarily true.
//
// ErrEndOfSpan means that the Transformer output may differ from the
// input after n bytes. Note that n may be len(src), meaning that the output
// would contain additional bytes after otherwise identical output.
// ErrShortSrc means that src had insufficient data to determine whether the
// remaining bytes would change. Other than the error conditions listed
// here, implementations are free to report other errors that arise.
//
// Calling Span can modify the Transformer state as a side effect. In
// effect, it does the transformation just as calling Transform would, only
// without copying to a destination buffer and only up to a point it can
// determine the input and output bytes are the same. This is obviously more
// limited than calling Transform, but can be more efficient in terms of
// copying and allocating buffers. Calls to Span and Transform may be
// interleaved.
Span(src []byte, atEOF bool) (n int, err error)
}
// NopResetter can be embedded by implementations of Transformer to add a nop
// Reset method.
type NopResetter struct{}
@ -278,6 +317,10 @@ func (nop) Transform(dst, src []byte, atEOF bool) (nDst, nSrc int, err error) {
return n, n, err
}
func (nop) Span(src []byte, atEOF bool) (n int, err error) {
return len(src), nil
}
type discard struct{ NopResetter }
func (discard) Transform(dst, src []byte, atEOF bool) (nDst, nSrc int, err error) {
@ -289,8 +332,8 @@ var (
// by consuming all bytes and writing nothing.
Discard Transformer = discard{}
// Nop is a Transformer that copies src to dst.
Nop Transformer = nop{}
// Nop is a SpanningTransformer that copies src to dst.
Nop SpanningTransformer = nop{}
)
// chain is a sequence of links. A chain with N Transformers has N+1 links and
@ -358,6 +401,8 @@ func (c *chain) Reset() {
}
}
// TODO: make chain use Span (is going to be fun to implement!)
// Transform applies the transformers of c in sequence.
func (c *chain) Transform(dst, src []byte, atEOF bool) (nDst, nSrc int, err error) {
// Set up src and dst in the chain.
@ -448,8 +493,7 @@ func (c *chain) Transform(dst, src []byte, atEOF bool) (nDst, nSrc int, err erro
return dstL.n, srcL.p, err
}
// RemoveFunc returns a Transformer that removes from the input all runes r for
// which f(r) is true. Illegal bytes in the input are replaced by RuneError.
// Deprecated: use runes.Remove instead.
func RemoveFunc(f func(r rune) bool) Transformer {
return removeF(f)
}