V2EX = way to explore
V2EX 是一个关于分享和探索的地方
Sign Up Now
For Existing Member  Sign In
The Go Programming Language
http://golang.org/
Go Playground
Go Projects
Revel Web Framework
freehere

golang 分享一个将 windows 记事本各种编码文字转成 utf8

  •  
  •   freehere ·
    freehere107 · Jun 27, 2017 · 2057 views
    This topic created in 3235 days ago, the information mentioned may be changed or developed.

    起初想用 unoconv 将各类文档转成 pdf,txt 确实是一头包。这个方法基本可以将 txt 4 种格式都统一起来。 注意 linux 下需设置

    LANG C.UTF-8

    package until
    
    import (
    	"unicode/utf16"
    	"bytes"
    	"golang.org/x/text/encoding/simplifiedchinese"
    	"golang.org/x/text/transform"
    	"io/ioutil"
    )
    
    func Utf16toString(b []uint8) (string) {
    	if len(b)&1 != 0 {
    		return string(b)
    	}
    	var bom int
    	if len(b) >= 2 {
    		switch n := int(b[0])<<8 | int(b[1]); n {
    		case 0xfffe:
    			bom = 1
    			fallthrough
    		case 0xfeff:
    			b = b[2:]
    			w := make([]uint16, len(b)/2)
    			for i := range w {
    				w[i] = uint16(b[2*i+bom&1])<<8 | uint16(b[2*i+(bom+1)&1])
    			}
    			return string(utf16.Decode(w))
    		default:
    			gbk, _ := GbkToUtf8(b)
    			return string(gbk)
    		}
    	} else {
    		return string(b)
    	}
    
    }
    
    func GbkToUtf8(s []byte) ([]byte, error) {
    	reader := transform.NewReader(bytes.NewReader(s), simplifiedchinese.GBK.NewDecoder())
    	d, e := ioutil.ReadAll(reader)
    	if e != nil {
    		return nil, e
    	}
    	return d, nil
    }
    
    
    Supplement 1  ·  Jun 29, 2017

    utf8识别不是很好,今天做了一些修改

    func Utf16toString(b []uint8) (string) {
    	var bom int
    	if len(b) >= 2 {
    		switch n := int(b[0])<<8 | int(b[1]); n {
    		case 0xfffe:
    			bom = 1
    			fallthrough
    		case 0xfeff:
    			b = b[2:]
    			w := make([]uint16, len(b)/2)
    			for i := range w {
    				w[i] = uint16(b[2*i+bom&1])<<8 | uint16(b[2*i+(bom+1)&1])
    			}
    			return string(utf16.Decode(w))
    		case 0x564d:
    			gbk, _ := GbkToUtf8(b)
    			return string(gbk)
    		default:
    			return string(b)
    		}
    	} else {
    		return string(b)
    	}
    
    }
    
    No Comments Yet
    About   ·   Help   ·   Advertise   ·   Blog   ·   API   ·   FAQ   ·   Solana   ·   5503 Online   Highest 6679   ·     Select Language
    创意工作者们的社区
    World is powered by solitude
    VERSION: 3.9.8.5 · 34ms · UTC 01:13 · PVG 09:13 · LAX 18:13 · JFK 21:13
    ♥ Do have faith in what you're doing.