逐步优化实现一个高效的随机字符串算法
来自stackoverflow上的一个问题:How to generate a random string of a fixed length in Go?
问题描述是:我想要一个随机的字符串(大写或小写,没有数字),用Go实现, 哪种方法最快最简单?
下面内容来自Paul’s solution回答
常见做法(Runes)
随机获取字符串
1
2
3
4
5
6
7
8
9
10
11
12
13
|
func init() {
rand.Seed(time.Now().UnixNano())
}
var letterRunes = []rune("abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ")
func RandStringRunes(n int) string {
b := make([]rune, n)
for i := range b {
b[i] = letterRunes[rand.Intn(len(letterRunes))]
}
return string(b)
}
|
Bytes改进
因为随机字符串只包含大小写字母, 在Golang UTF-8
编码中,英文字母和byte
字节是一对一的。
byte
的本质是uint8
类型,rune
本质是int32
类型,没有必要使用rune
类型存储。
1
2
3
4
5
6
7
8
9
|
const letterBytes = "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ"
func RandStringBytes(n int) string {
b := make([]byte, n)
for i := range b {
b[i] = letterBytes[rand.Intn(len(letterBytes))]
}
return string(b)
}
|
余数改进
rand.Intn()
相比直接调用rand.Int63()
要慢很多,
把rand.Intn()
换成rand.Int63()
来提高效率。
1
2
3
4
5
6
7
|
func RandStringBytesRmndr(n int) string {
b := make([]byte, n)
for i := range b {
b[i] = letterBytes[rand.Int63() % int64(len(letterBytes))]
}
return string(b)
}
|
Masking 掩码
使用随机数的最低位保证字母的均等分配,也就是掩码的方式。现有52个字母,52用二进制表示就是52==110100b
,
所以我们可以只使用rand.Int63()
返回最低的6位数就可以。为了保证平均分配,如果返回的值大于len(letterBytes)-1
,则舍弃不用
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
|
const letterBytes = "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ"
const (
letterIdxBits = 6 // 6 bits to represent a letter index
letterIdxMask = 1<<letterIdxBits - 1 // All 1-bits, as many as letterIdxBits
)
func RandStringBytesMask(n int) string {
b := make([]byte, n)
for i := 0; i < n; {
if idx := int(rand.Int63() & letterIdxMask); idx < len(letterBytes) {
b[i] = letterBytes[idx]
i++
}
}
return string(b)
}
|
Masking 掩码改进
使用rand.Int63()
可以生成63个随机位的数,把63分为10份(63/6=10)充分利用,rand.Int63()
次数将大大降低,进而提升效率。
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
|
const letterBytes = "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ"
const (
letterIdxBits = 6 // 6 bits to represent a letter index
letterIdxMask = 1<<letterIdxBits - 1 // All 1-bits, as many as letterIdxBits
letterIdxMax = 63 / letterIdxBits // # of letter indices fitting in 63 bits
)
func RandStringBytesMaskImpr(n int) string {
b := make([]byte, n)
// A rand.Int63() generates 63 random bits, enough for letterIdxMax letters!
for i, cache, remain := n-1, rand.Int63(), letterIdxMax; i >= 0; {
if remain == 0 {
cache, remain = rand.Int63(), letterIdxMax
}
if idx := int(cache & letterIdxMask); idx < len(letterBytes) {
b[i] = letterBytes[idx]
i--
}
cache >>= letterIdxBits
remain--
}
return string(b)
}
|
rand Source 优化
原来的rand.Int63()
是整个rand
包全局的,而且支持安全高并发,所以速度比较慢。
使用rand.Source
替换全局rand来提高效率。
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
|
var src = rand.NewSource(time.Now().UnixNano())
func RandStringBytesMaskImprSrc(n int) string {
b := make([]byte, n)
// A src.Int63() generates 63 random bits, enough for letterIdxMax characters!
for i, cache, remain := n-1, src.Int63(), letterIdxMax; i >= 0; {
if remain == 0 {
cache, remain = src.Int63(), letterIdxMax
}
if idx := int(cache & letterIdxMask); idx < len(letterBytes) {
b[i] = letterBytes[idx]
i--
}
cache >>= letterIdxBits
remain--
}
return string(b)
}
|
strings.Builder 改进
使用G0 1.10
新增的功能strings.Builder
,提升字符串拼接的效率。
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
|
func RandStringBytesMaskImprSrcSB(n int) string {
sb := strings.Builder{}
sb.Grow(n)
// A src.Int63() generates 63 random bits, enough for letterIdxMax characters!
for i, cache, remain := n-1, src.Int63(), letterIdxMax; i >= 0; {
if remain == 0 {
cache, remain = src.Int63(), letterIdxMax
}
if idx := int(cache & letterIdxMask); idx < len(letterBytes) {
sb.WriteByte(letterBytes[idx])
i--
}
cache >>= letterIdxBits
remain--
}
return sb.String()
}
|
使用unsafe包模拟 strings.Builder
strings.Builder
内置了一个[]byte
存储字符,最终转换为string
的时候为了避免拷贝,使用了unsafe
包。
1
2
3
4
|
// String returns the accumulated string.
func (b *Builder) String() string {
return *(*string)(unsafe.Pointer(&b.buf))
}
|
直接替换strings.Builder()
更简洁
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
|
func RandStringBytesMaskImprSrcUnsafe(n int) string {
b := make([]byte, n)
// A src.Int63() generates 63 random bits, enough for letterIdxMax characters!
for i, cache, remain := n-1, src.Int63(), letterIdxMax; i >= 0; {
if remain == 0 {
cache, remain = src.Int63(), letterIdxMax
}
if idx := int(cache & letterIdxMask); idx < len(letterBytes) {
b[i] = letterBytes[idx]
i--
}
cache >>= letterIdxBits
remain--
}
return *(*string)(unsafe.Pointer(&b))
}
|
Benchmark 性能测试
Benchmark测试这些方法的效果:
1
2
3
4
5
6
7
8
|
BenchmarkRunes-4 2000000 723 ns/op 96 B/op 2 allocs/op
BenchmarkBytes-4 3000000 550 ns/op 32 B/op 2 allocs/op
BenchmarkBytesRmndr-4 3000000 438 ns/op 32 B/op 2 allocs/op
BenchmarkBytesMask-4 3000000 534 ns/op 32 B/op 2 allocs/op
BenchmarkBytesMaskImpr-4 10000000 176 ns/op 32 B/op 2 allocs/op
BenchmarkBytesMaskImprSrc-4 10000000 139 ns/op 32 B/op 2 allocs/op
BenchmarkBytesMaskImprSrcSB-4 10000000 134 ns/op 16 B/op 1 allocs/op
BenchmarkBytesMaskImprSrcUnsafe-4 10000000 115 ns/op 16 B/op 1 allocs/op
|
Benchmark代码
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
|
package main
import (
"math/rand"
"testing"
"time"
)
// Implementations
func init() {
rand.Seed(time.Now().UnixNano())
}
var letterRunes = []rune("abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ")
func RandStringRunes(n int) string {
b := make([]rune, n)
for i := range b {
b[i] = letterRunes[rand.Intn(len(letterRunes))]
}
return string(b)
}
const letterBytes = "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ"
const (
letterIdxBits = 6 // 6 bits to represent a letter index
letterIdxMask = 1<<letterIdxBits - 1 // All 1-bits, as many as letterIdxBits
letterIdxMax = 63 / letterIdxBits // # of letter indices fitting in 63 bits
)
func RandStringBytes(n int) string {
b := make([]byte, n)
for i := range b {
b[i] = letterBytes[rand.Intn(len(letterBytes))]
}
return string(b)
}
func RandStringBytesRmndr(n int) string {
b := make([]byte, n)
for i := range b {
b[i] = letterBytes[rand.Int63()%int64(len(letterBytes))]
}
return string(b)
}
func RandStringBytesMask(n int) string {
b := make([]byte, n)
for i := 0; i < n; {
if idx := int(rand.Int63() & letterIdxMask); idx < len(letterBytes) {
b[i] = letterBytes[idx]
i++
}
}
return string(b)
}
func RandStringBytesMaskImpr(n int) string {
b := make([]byte, n)
// A rand.Int63() generates 63 random bits, enough for letterIdxMax letters!
for i, cache, remain := n-1, rand.Int63(), letterIdxMax; i >= 0; {
if remain == 0 {
cache, remain = rand.Int63(), letterIdxMax
}
if idx := int(cache & letterIdxMask); idx < len(letterBytes) {
b[i] = letterBytes[idx]
i--
}
cache >>= letterIdxBits
remain--
}
return string(b)
}
var src = rand.NewSource(time.Now().UnixNano())
func RandStringBytesMaskImprSrc(n int) string {
b := make([]byte, n)
// A src.Int63() generates 63 random bits, enough for letterIdxMax characters!
for i, cache, remain := n-1, src.Int63(), letterIdxMax; i >= 0; {
if remain == 0 {
cache, remain = src.Int63(), letterIdxMax
}
if idx := int(cache & letterIdxMask); idx < len(letterBytes) {
b[i] = letterBytes[idx]
i--
}
cache >>= letterIdxBits
remain--
}
return string(b)
}
// Benchmark functions
const n = 16
func BenchmarkRunes(b *testing.B) {
for i := 0; i < b.N; i++ {
RandStringRunes(n)
}
}
func BenchmarkBytes(b *testing.B) {
for i := 0; i < b.N; i++ {
RandStringBytes(n)
}
}
func BenchmarkBytesRmndr(b *testing.B) {
for i := 0; i < b.N; i++ {
RandStringBytesRmndr(n)
}
}
func BenchmarkBytesMask(b *testing.B) {
for i := 0; i < b.N; i++ {
RandStringBytesMask(n)
}
}
func BenchmarkBytesMaskImpr(b *testing.B) {
for i := 0; i < b.N; i++ {
RandStringBytesMaskImpr(n)
}
}
func BenchmarkBytesMaskImprSrc(b *testing.B) {
for i := 0; i < b.N; i++ {
RandStringBytesMaskImprSrc(n)
}
}
|
参考