From 9ff8f6926f46728a630e739e7824255c923e1bc5 Mon Sep 17 00:00:00 2001 From: Tom Limoncelli Date: Fri, 28 Jun 2019 11:38:35 -0400 Subject: [PATCH] Update github.com/TomOnTime/utfutil (#512) --- .../github.com/TomOnTime/utfutil/utfutil.go | 121 ++++++++++++++++-- vendor/vendor.json | 6 +- 2 files changed, 112 insertions(+), 15 deletions(-) diff --git a/vendor/github.com/TomOnTime/utfutil/utfutil.go b/vendor/github.com/TomOnTime/utfutil/utfutil.go index 42ad551d6..61c1031e8 100644 --- a/vendor/github.com/TomOnTime/utfutil/utfutil.go +++ b/vendor/github.com/TomOnTime/utfutil/utfutil.go @@ -41,14 +41,97 @@ import ( type EncodingHint int const ( - UTF8 EncodingHint = iota // UTF-8 - UTF16LE // UTF 16 Little Endian - UTF16BE // UTF 16 Big Endian - WINDOWS = UTF16LE // File came from a MS-Windows system - POSIX = UTF8 // File came from Unix or Unix-like systems - HTML5 = UTF8 // File came from the web + // UTF8 indicates the specified encoding. + UTF8 EncodingHint = iota + // UTF16LE indicates the specified encoding. + UTF16LE + // UTF16BE indicates the specified encoding. + UTF16BE + // WINDOWS indicates that the file came from a MS-Windows system + WINDOWS = UTF16LE + // POSIX indicates that the file came from Unix or Unix-like systems + POSIX = UTF8 + // HTML5 indicates that the file came from the web + HTML5 = UTF8 ) +// UTFReadCloser describes the utfutil ReadCloser structure. +type UTFReadCloser interface { + Read(p []byte) (n int, err error) + Close() error +} + +// ReadCloser is a readcloser for the UTFUtil package. +type readCloser struct { + file *os.File + reader io.Reader +} + +// Read implements the standard Reader interface. +func (u readCloser) Read(p []byte) (n int, err error) { + return u.reader.Read(p) +} + +// Close implements the standard Closer interface. +func (u readCloser) Close() error { + if u.file != nil { + return u.file.Close() + } + return nil +} + +// UTFScanCloser describes a new utfutil ScanCloser structure. +// It's similar to ReadCloser, but with a scanner instead of a reader. +type UTFScanCloser interface { + Buffer(buf []byte, max int) + Bytes() []byte + Err() error + Scan() bool + Split(split bufio.SplitFunc) + Text() string + Close() error +} + +type scanCloser struct { + file UTFReadCloser + scanner *bufio.Scanner +} + +// Buffer will run the Buffer function on the underlying bufio.Scanner. +func (sc scanCloser) Buffer(buf []byte, max int) { + sc.scanner.Buffer(buf, max) +} + +// Bytes will run the Bytes function on the underlying bufio.Scanner. +func (sc scanCloser) Bytes() []byte { + return sc.scanner.Bytes() +} + +// Err will run the Err function on the underlying bufio.Scanner. +func (sc scanCloser) Err() error { + return sc.scanner.Err() +} + +// Scan will run the Scan function on the underlying bufio.Scanner. +func (sc scanCloser) Scan() bool { + return sc.scanner.Scan() +} + +// Split will run the Split function on the underlying bufio.Scanner. +func (sc scanCloser) Split(split bufio.SplitFunc) { + sc.scanner.Split(split) +} + +// Text will return the text from the underlying bufio.Scanner. +func (sc scanCloser) Text() string { + return sc.scanner.Text() +} + +// Close will close the underlying file handle. +func (sc scanCloser) Close() error { + return sc.file.Close() +} + // About utfutil.HTML5: // This technique is recommended by the W3C for use in HTML 5: // "For compatibility with deployed content, the byte order @@ -56,12 +139,14 @@ const ( // than anything else." http://www.w3.org/TR/encoding/#specification-hooks // OpenFile is the equivalent of os.Open(). -func OpenFile(name string, d EncodingHint) (io.Reader, error) { +func OpenFile(name string, d EncodingHint) (UTFReadCloser, error) { f, err := os.Open(name) if err != nil { return nil, err } - return NewReader(f, d), nil + + rc := readCloser{file: f} + return NewReader(rc, d), nil } // ReadFile is the equivalent of ioutil.ReadFile() @@ -70,20 +155,25 @@ func ReadFile(name string, d EncodingHint) ([]byte, error) { if err != nil { return nil, err } + defer file.Close() return ioutil.ReadAll(file) } // NewScanner is a convenience function that takes a filename and returns a scanner. -func NewScanner(name string, d EncodingHint) (*bufio.Scanner, error) { +func NewScanner(name string, d EncodingHint) (UTFScanCloser, error) { f, err := OpenFile(name, d) if err != nil { return nil, err } - return bufio.NewScanner(f), nil + + return scanCloser{ + scanner: bufio.NewScanner(f), + file: f, + }, nil } // NewReader wraps a Reader to decode Unicode to UTF-8 as it reads. -func NewReader(r io.Reader, d EncodingHint) io.Reader { +func NewReader(r io.Reader, d EncodingHint) UTFReadCloser { var decoder *encoding.Decoder switch d { case UTF8: @@ -102,7 +192,14 @@ func NewReader(r io.Reader, d EncodingHint) io.Reader { } // Make a Reader that uses utf16bom: - return transform.NewReader(r, unicode.BOMOverride(decoder)) + if rc, ok := r.(readCloser); ok { + rc.reader = transform.NewReader(rc.file, unicode.BOMOverride(decoder)) + return rc + } + + return readCloser{ + reader: transform.NewReader(r, unicode.BOMOverride(decoder)), + } } // BytesReader is a convenience function that takes a []byte and decodes them to UTF-8. diff --git a/vendor/vendor.json b/vendor/vendor.json index 791943e1d..1ae24199d 100644 --- a/vendor/vendor.json +++ b/vendor/vendor.json @@ -26,10 +26,10 @@ "revisionTime": "2017-08-08T19:54:39Z" }, { - "checksumSHA1": "cksQ/Vucu9mWJrsLG1bjUvg4ao8=", + "checksumSHA1": "dxnNQI0lrvE1S4lNRzJI647hQDY=", "path": "github.com/TomOnTime/utfutil", - "revision": "1916859f26bb476f86a4c47c272f7c881f92619a", - "revisionTime": "2017-11-03T00:04:35Z" + "revision": "09c41003ee1d5015b75f331e52215512e7145b8d", + "revisionTime": "2018-05-11T10:42:25Z" }, { "checksumSHA1": "irAylH0FbfVgkCcXP9U+Fkou9+U=",