mirror of
https://github.com/StackExchange/dnscontrol.git
synced 2024-05-11 05:55:12 +00:00
migrate code for github
This commit is contained in:
27
vendor/github.com/TomOnTime/utfutil/LICENSE
generated
vendored
Normal file
27
vendor/github.com/TomOnTime/utfutil/LICENSE
generated
vendored
Normal file
@@ -0,0 +1,27 @@
|
||||
Copyright (c) 2016, Tom Limoncelli
|
||||
All rights reserved.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are met:
|
||||
|
||||
* Redistributions of source code must retain the above copyright notice, this
|
||||
list of conditions and the following disclaimer.
|
||||
|
||||
* Redistributions in binary form must reproduce the above copyright notice,
|
||||
this list of conditions and the following disclaimer in the documentation
|
||||
and/or other materials provided with the distribution.
|
||||
|
||||
* Neither the name of utfutil nor the names of its
|
||||
contributors may be used to endorse or promote products derived from
|
||||
this software without specific prior written permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
|
||||
DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
|
||||
FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
|
||||
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
|
||||
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
|
||||
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
70
vendor/github.com/TomOnTime/utfutil/README.md
generated
vendored
Normal file
70
vendor/github.com/TomOnTime/utfutil/README.md
generated
vendored
Normal file
@@ -0,0 +1,70 @@
|
||||
# utfutil
|
||||
|
||||
Utilities to make it easier to read text encoded as UTF-16.
|
||||
|
||||
## Dealing with UTF-16 files from Windows.
|
||||
|
||||
Ever have code that worked for years until you received a file from a MS-Windows system that just didn't work at all? Looking at a hex dump you realize every other byte is \0. WTF? No, UTF. More specifically UTF-16LE with an optional BOM.
|
||||
|
||||
What does all that mean? Well, first you should read ["The Absolute Minimum Every Software Developer Absolutely, Positively Must Know About Unicode and Character Sets (No Excuses!)"](http://www.joelonsoftware.com/articles/Unicode.html) by Joel Spolsky.
|
||||
|
||||
Now you are an expert. You can spend an afternoon trying to figure out how the heck to put all that together and use `golang.org/x/text/encoding/unicode` to decode UTF-16LE. However I've already done that for you. Now you can take the easy way out change ioutil.ReadFile() to utfutil.ReadFile(). Everything will just work.
|
||||
|
||||
### utfutil.ReadFile() is the equivalent of ioutil.ReadFile()
|
||||
|
||||
OLD: Works with UTF8 and ASCII files:
|
||||
|
||||
```
|
||||
data, err := ioutil.ReadFile(filename)
|
||||
```
|
||||
|
||||
NEW: Works if someone gives you a Windows UTF-16LE file occasionally but normally you are processing UTF8 files:
|
||||
|
||||
```
|
||||
data, err := utfutil.ReadFile(filename, utfutil.UTF8)
|
||||
```
|
||||
|
||||
### utfutil.OpenFile() is the equivalent of os.Open().
|
||||
|
||||
OLD: Works with UTF8 and ASCII files:
|
||||
|
||||
```
|
||||
data, err := os.Open(filename)
|
||||
```
|
||||
|
||||
NEW: Works if someone gives you a file with a BOM:
|
||||
|
||||
```
|
||||
data, err := utfutil.OpenFile(filename, utfutil.HTML5)
|
||||
```
|
||||
|
||||
### utfutil.NewScanner() is for reading files line-by-line
|
||||
|
||||
It works like os.Open():
|
||||
|
||||
```
|
||||
s, err := utfutil.NewScanner(filename, utfutil.HTML5)
|
||||
```
|
||||
|
||||
|
||||
## Encoding hints:
|
||||
|
||||
What's that second argument all about?
|
||||
|
||||
Since it is impossible to guess 100% correctly if there is no BOM,
|
||||
the functions take a 2nd parameter of type "EncodingHint" where you
|
||||
specify the default encoding for BOM-less files.
|
||||
|
||||
```
|
||||
UTF8 No BOM? Assume UTF-8
|
||||
UTF16LE No BOM? Assume UTF 16 Little Endian
|
||||
UTF16BE No BOM? Assume UTF 16 Big Endian
|
||||
WINDOWS = UTF16LE (i.e. a reasonable guess if file is from MS-Windows)
|
||||
POSIX = UTF8 (i.e. a reasonable guess if file is from Unix or Unix-like systems)
|
||||
HTML5 = UTF8 (i.e. a reasonable guess if file is from the web)
|
||||
```
|
||||
|
||||
## Future Directions
|
||||
|
||||
If someone writes a golang equivalent of uchatdet, I'll add a hint
|
||||
called "AUTO" which uses it. That would be awesome. Volunteers?
|
110
vendor/github.com/TomOnTime/utfutil/utfutil.go
generated
vendored
Normal file
110
vendor/github.com/TomOnTime/utfutil/utfutil.go
generated
vendored
Normal file
@@ -0,0 +1,110 @@
|
||||
// Package utfutil provides methods that make it easy to read data in an UTF-encoding agnostic.
|
||||
package utfutil
|
||||
|
||||
// These functions autodetect UTF BOM and return UTF-8. If no
|
||||
// BOM is found, a hint is provided as to which encoding to assume.
|
||||
// You can use them as replacements for os.Open() and ioutil.ReadFile()
|
||||
// when the encoding of the file is unknown.
|
||||
|
||||
// utfutil.OpenFile() is a replacement for os.Open().
|
||||
// utfutil.ReadFile() is a replacement for ioutil.ReadFile().
|
||||
// utfutil.NewScanner() takes a filename and returns a Scanner.
|
||||
// utfutil.NewReader() rewraps an existing scanner to make it UTF-encoding agnostic.
|
||||
// utfutil.BytesReader() takes a []byte and decodes it to UTF-8.
|
||||
|
||||
// Since it is impossible to guess 100% correctly if there is no BOM,
|
||||
// the functions take a 2nd parameter of type "EncodingHint" where you
|
||||
// specify the default encoding for BOM-less data.
|
||||
|
||||
// If someone writes a golang equivalent of uchatdet, I'll add
|
||||
// a hint called "AUTO" which uses it.
|
||||
|
||||
// Inspiration: I wrote this after spending half a day trying
|
||||
// to figure out how to use unicode.BOMOverride.
|
||||
// Hopefully this will save other golang newbies from the same.
|
||||
// (golang.org/x/text/encoding/unicode)
|
||||
|
||||
import (
|
||||
"bufio"
|
||||
"bytes"
|
||||
"io"
|
||||
"io/ioutil"
|
||||
"os"
|
||||
|
||||
"golang.org/x/text/encoding"
|
||||
"golang.org/x/text/encoding/unicode"
|
||||
"golang.org/x/text/transform"
|
||||
)
|
||||
|
||||
// EncodingHint indicates the file's encoding if there is no BOM.
|
||||
type EncodingHint int
|
||||
|
||||
const (
|
||||
UTF8 EncodingHint = iota // UTF-8
|
||||
UTF16LE // UTF 16 Little Endian
|
||||
UTF16BE // UTF 16 Big Endian
|
||||
WINDOWS = UTF16LE // File came from a MS-Windows system
|
||||
POSIX = UTF8 // File came from Unix or Unix-like systems
|
||||
HTML5 = UTF8 // File came from the web
|
||||
)
|
||||
|
||||
// About utfutil.HTML5:
|
||||
// This technique is recommended by the W3C for use in HTML 5:
|
||||
// "For compatibility with deployed content, the byte order
|
||||
// mark (also known as BOM) is considered more authoritative
|
||||
// than anything else." http://www.w3.org/TR/encoding/#specification-hooks
|
||||
|
||||
// OpenFile is the equivalent of os.Open().
|
||||
func OpenFile(name string, d EncodingHint) (io.Reader, error) {
|
||||
f, err := os.Open(name)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
return NewReader(f, d), nil
|
||||
}
|
||||
|
||||
// ReadFile is the equivalent of ioutil.ReadFile()
|
||||
func ReadFile(name string, d EncodingHint) ([]byte, error) {
|
||||
file, err := OpenFile(name, d)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
return ioutil.ReadAll(file)
|
||||
}
|
||||
|
||||
// NewScanner is a convenience function that takes a filename and returns a scanner.
|
||||
func NewScanner(name string, d EncodingHint) (*bufio.Scanner, error) {
|
||||
f, err := OpenFile(name, d)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
return bufio.NewScanner(f), nil
|
||||
}
|
||||
|
||||
// NewReader wraps a Reader to decode Unicode to UTF-8 as it reads.
|
||||
func NewReader(r io.Reader, d EncodingHint) io.Reader {
|
||||
var decoder *encoding.Decoder
|
||||
switch d {
|
||||
case UTF8:
|
||||
// Make a transformer that assumes UTF-8 but abides by the BOM.
|
||||
decoder = unicode.UTF8.NewDecoder()
|
||||
case UTF16LE:
|
||||
// Make an tranformer that decodes MS-Windows (16LE) UTF files:
|
||||
winutf := unicode.UTF16(unicode.LittleEndian, unicode.IgnoreBOM)
|
||||
// Make a transformer that is like winutf, but abides by BOM if found:
|
||||
decoder = winutf.NewDecoder()
|
||||
case UTF16BE:
|
||||
// Make an tranformer that decodes UTF-16BE files:
|
||||
utf16be := unicode.UTF16(unicode.BigEndian, unicode.IgnoreBOM)
|
||||
// Make a transformer that is like utf16be, but abides by BOM if found:
|
||||
decoder = utf16be.NewDecoder()
|
||||
}
|
||||
|
||||
// Make a Reader that uses utf16bom:
|
||||
return transform.NewReader(r, unicode.BOMOverride(decoder))
|
||||
}
|
||||
|
||||
// BytesReader is a convenience function that takes a []byte and decodes them to UTF-8.
|
||||
func BytesReader(b []byte, d EncodingHint) io.Reader {
|
||||
return NewReader(bytes.NewReader(b), d)
|
||||
}
|
Reference in New Issue
Block a user