逐步优化实现一个高效的随机字符串算法

来自stackoverflow上的一个问题:How to generate a random string of a fixed length in Go?

问题描述是:我想要一个随机的字符串(大写或小写,没有数字),用Go实现, 哪种方法最快最简单?

下面内容来自Paul’s solution回答

常见做法(Runes)

随机获取字符串

 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
func init() {
    rand.Seed(time.Now().UnixNano())
}

var letterRunes = []rune("abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ")

func RandStringRunes(n int) string {
    b := make([]rune, n)
    for i := range b {
        b[i] = letterRunes[rand.Intn(len(letterRunes))]
    }
    return string(b)
}

Bytes改进

因为随机字符串只包含大小写字母, 在Golang UTF-8编码中,英文字母和byte字节是一对一的。 byte的本质是uint8类型,rune本质是int32类型,没有必要使用rune类型存储。

1
2
3
4
5
6
7
8
9
const letterBytes = "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ"

func RandStringBytes(n int) string {
    b := make([]byte, n)
    for i := range b {
        b[i] = letterBytes[rand.Intn(len(letterBytes))]
    }
    return string(b)
}

余数改进

rand.Intn()相比直接调用rand.Int63()要慢很多, 把rand.Intn()换成rand.Int63()来提高效率。

1
2
3
4
5
6
7
func RandStringBytesRmndr(n int) string {
    b := make([]byte, n)
    for i := range b {
        b[i] = letterBytes[rand.Int63() % int64(len(letterBytes))]
    }
    return string(b)
}

Masking 掩码

使用随机数的最低位保证字母的均等分配,也就是掩码的方式。现有52个字母,52用二进制表示就是52==110100b, 所以我们可以只使用rand.Int63()返回最低的6位数就可以。为了保证平均分配,如果返回的值大于len(letterBytes)-1,则舍弃不用

 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
const letterBytes = "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ"
const (
    letterIdxBits = 6                    // 6 bits to represent a letter index
    letterIdxMask = 1<<letterIdxBits - 1 // All 1-bits, as many as letterIdxBits
)

func RandStringBytesMask(n int) string {
    b := make([]byte, n)
    for i := 0; i < n; {
        if idx := int(rand.Int63() & letterIdxMask); idx < len(letterBytes) {
            b[i] = letterBytes[idx]
            i++
        }
    }
    return string(b)
}

Masking 掩码改进

使用rand.Int63()可以生成63个随机位的数,把63分为10份(63/6=10)充分利用,rand.Int63()次数将大大降低,进而提升效率。

 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
const letterBytes = "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ"
const (
    letterIdxBits = 6                    // 6 bits to represent a letter index
    letterIdxMask = 1<<letterIdxBits - 1 // All 1-bits, as many as letterIdxBits
    letterIdxMax  = 63 / letterIdxBits   // # of letter indices fitting in 63 bits
)

func RandStringBytesMaskImpr(n int) string {
    b := make([]byte, n)
    // A rand.Int63() generates 63 random bits, enough for letterIdxMax letters!
    for i, cache, remain := n-1, rand.Int63(), letterIdxMax; i >= 0; {
        if remain == 0 {
            cache, remain = rand.Int63(), letterIdxMax
        }
        if idx := int(cache & letterIdxMask); idx < len(letterBytes) {
            b[i] = letterBytes[idx]
            i--
        }
        cache >>= letterIdxBits
        remain--
    }

    return string(b)
}

rand Source 优化

原来的rand.Int63()是整个rand包全局的,而且支持安全高并发,所以速度比较慢。 使用rand.Source替换全局rand来提高效率。

 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
17
18
19
var src = rand.NewSource(time.Now().UnixNano())

func RandStringBytesMaskImprSrc(n int) string {
    b := make([]byte, n)
    // A src.Int63() generates 63 random bits, enough for letterIdxMax characters!
    for i, cache, remain := n-1, src.Int63(), letterIdxMax; i >= 0; {
        if remain == 0 {
            cache, remain = src.Int63(), letterIdxMax
        }
        if idx := int(cache & letterIdxMask); idx < len(letterBytes) {
            b[i] = letterBytes[idx]
            i--
        }
        cache >>= letterIdxBits
        remain--
    }

    return string(b)
}

strings.Builder 改进

使用G0 1.10新增的功能strings.Builder,提升字符串拼接的效率。

 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
17
18
func RandStringBytesMaskImprSrcSB(n int) string {
    sb := strings.Builder{}
    sb.Grow(n)
    // A src.Int63() generates 63 random bits, enough for letterIdxMax characters!
    for i, cache, remain := n-1, src.Int63(), letterIdxMax; i >= 0; {
        if remain == 0 {
            cache, remain = src.Int63(), letterIdxMax
        }
        if idx := int(cache & letterIdxMask); idx < len(letterBytes) {
            sb.WriteByte(letterBytes[idx])
            i--
        }
        cache >>= letterIdxBits
        remain--
    }

    return sb.String()
}

使用unsafe包模拟 strings.Builder

strings.Builder内置了一个[]byte存储字符,最终转换为string的时候为了避免拷贝,使用了unsafe包。

1
2
3
4
// String returns the accumulated string.
func (b *Builder) String() string {
    return *(*string)(unsafe.Pointer(&b.buf))
}

直接替换strings.Builder()更简洁

 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
17
func RandStringBytesMaskImprSrcUnsafe(n int) string {
    b := make([]byte, n)
    // A src.Int63() generates 63 random bits, enough for letterIdxMax characters!
    for i, cache, remain := n-1, src.Int63(), letterIdxMax; i >= 0; {
        if remain == 0 {
            cache, remain = src.Int63(), letterIdxMax
        }
        if idx := int(cache & letterIdxMask); idx < len(letterBytes) {
            b[i] = letterBytes[idx]
            i--
        }
        cache >>= letterIdxBits
        remain--
    }

    return *(*string)(unsafe.Pointer(&b))
}

Benchmark 性能测试

Benchmark测试这些方法的效果:

1
2
3
4
5
6
7
8
BenchmarkRunes-4                     2000000    723 ns/op   96 B/op   2 allocs/op
BenchmarkBytes-4                     3000000    550 ns/op   32 B/op   2 allocs/op
BenchmarkBytesRmndr-4                3000000    438 ns/op   32 B/op   2 allocs/op
BenchmarkBytesMask-4                 3000000    534 ns/op   32 B/op   2 allocs/op
BenchmarkBytesMaskImpr-4            10000000    176 ns/op   32 B/op   2 allocs/op
BenchmarkBytesMaskImprSrc-4         10000000    139 ns/op   32 B/op   2 allocs/op
BenchmarkBytesMaskImprSrcSB-4       10000000    134 ns/op   16 B/op   1 allocs/op
BenchmarkBytesMaskImprSrcUnsafe-4   10000000    115 ns/op   16 B/op   1 allocs/op

Benchmark代码

  1
  2
  3
  4
  5
  6
  7
  8
  9
 10
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
package main
import (
    "math/rand"
    "testing"
    "time"
)
// Implementations
func init() {
    rand.Seed(time.Now().UnixNano())
}
var letterRunes = []rune("abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ")
func RandStringRunes(n int) string {
    b := make([]rune, n)
    for i := range b {
        b[i] = letterRunes[rand.Intn(len(letterRunes))]
    }
    return string(b)
}
const letterBytes = "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ"
const (
    letterIdxBits = 6                    // 6 bits to represent a letter index
    letterIdxMask = 1<<letterIdxBits - 1 // All 1-bits, as many as letterIdxBits
    letterIdxMax  = 63 / letterIdxBits   // # of letter indices fitting in 63 bits
)
func RandStringBytes(n int) string {
    b := make([]byte, n)
    for i := range b {
        b[i] = letterBytes[rand.Intn(len(letterBytes))]
    }
    return string(b)
}
func RandStringBytesRmndr(n int) string {
    b := make([]byte, n)
    for i := range b {
        b[i] = letterBytes[rand.Int63()%int64(len(letterBytes))]
    }
    return string(b)
}
func RandStringBytesMask(n int) string {
    b := make([]byte, n)
    for i := 0; i < n; {
        if idx := int(rand.Int63() & letterIdxMask); idx < len(letterBytes) {
            b[i] = letterBytes[idx]
            i++
        }
    }
    return string(b)
}
func RandStringBytesMaskImpr(n int) string {
    b := make([]byte, n)
    // A rand.Int63() generates 63 random bits, enough for letterIdxMax letters!
    for i, cache, remain := n-1, rand.Int63(), letterIdxMax; i >= 0; {
        if remain == 0 {
            cache, remain = rand.Int63(), letterIdxMax
        }
        if idx := int(cache & letterIdxMask); idx < len(letterBytes) {
            b[i] = letterBytes[idx]
            i--
        }
        cache >>= letterIdxBits
        remain--
    }
    return string(b)
}
var src = rand.NewSource(time.Now().UnixNano())
func RandStringBytesMaskImprSrc(n int) string {
    b := make([]byte, n)
    // A src.Int63() generates 63 random bits, enough for letterIdxMax characters!
    for i, cache, remain := n-1, src.Int63(), letterIdxMax; i >= 0; {
        if remain == 0 {
            cache, remain = src.Int63(), letterIdxMax
        }
        if idx := int(cache & letterIdxMask); idx < len(letterBytes) {
            b[i] = letterBytes[idx]
            i--
        }
        cache >>= letterIdxBits
        remain--
    }
    return string(b)
}
// Benchmark functions
const n = 16
func BenchmarkRunes(b *testing.B) {
    for i := 0; i < b.N; i++ {
        RandStringRunes(n)
    }
}
func BenchmarkBytes(b *testing.B) {
    for i := 0; i < b.N; i++ {
        RandStringBytes(n)
    }
}
func BenchmarkBytesRmndr(b *testing.B) {
    for i := 0; i < b.N; i++ {
        RandStringBytesRmndr(n)
    }
}
func BenchmarkBytesMask(b *testing.B) {
    for i := 0; i < b.N; i++ {
        RandStringBytesMask(n)
    }
}
func BenchmarkBytesMaskImpr(b *testing.B) {
    for i := 0; i < b.N; i++ {
        RandStringBytesMaskImpr(n)
    }
}
func BenchmarkBytesMaskImprSrc(b *testing.B) {
    for i := 0; i < b.N; i++ {
        RandStringBytesMaskImprSrc(n)
    }
}

参考