打包解包协议中应用adler32校验和

adler32 介绍

Adler-32是Mark Adler发明的校验和算法,和32位CRC校验算法一样,都是保护数据防止意外更改的算法,
和CRC一样不能完全可靠的校验数据,存在伪造风险,但是比CRC计算快。

Adler-32通过求解两个16位的数值A、B实现,并将结果连结成一个32位整数;
A是所有字节的和,B是A在相加时每一阶段值的和。两个累积值都取65521的余数;
在Adler-32开始运行时,A初始化为1,B初始化为0,最后的校验和要模上65521(继216之后的最小素数);
Afler-32校验和保存为A*65536 + B。(最高有效字节在前/大端在前)
1
2
3
4
5
A = 1 + D1 + D2 + ... + Dn (mod 65521)
B = (1 + D1) + (1 + D1 + D2) + ... + (1 + D1 + D2 + ... + Dn) (mod 65521)
   = n×D1 + (n-1)×D2 + (n-2)×D3 + ... + Dn + n (mod 65521)
Adler-32(D) = B × 65536 + A
其中D为字符串的字节,n是D的字节长度

举例使用Adler-32校验算法产生字符串"Wikipedia"的校验和:

 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
ASCII code          A                   B
   W: 87           1 +  87 =  88        0 +  88 =   88
   i: 105         88 + 105 = 193       88 + 193 =  281
   k: 107        193 + 107 = 300      281 + 300 =  581
   i: 105        300 + 105 = 405      581 + 405 =  986
   p: 112        405 + 112 = 517      986 + 517 = 1503
   e: 101        517 + 101 = 618     1503 + 618 = 2121
   d: 100        618 + 100 = 718     2121 + 718 = 2839
   i: 105        718 + 105 = 823     2839 + 823 = 3662
   a: 97         823 +  97 = 920     3662 + 920 = 4582
   A =  920 = 0x398  hex(base 16)
   B = 4582 = 0x11E6 hex
   Output = 0x11E6 << 16 + 0x398 = 0x11E60398 hex

C语言实现

 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
17
18
19
const uint32_t MOD_ADLER = 65521;
uint32_t adler32(unsigned char *data, size_t len) 
/*
    where data is the location of the data in physical memory and 
    len is the length of the data in bytes 
*/
{
    uint32_t a = 1, b = 0;
    size_t index;

    // Process each byte of the data in order
    for (index = 0; index < len; ++index)
    {
        a = (a + data[index]) % MOD_ADLER;
        b = (b + a) % MOD_ADLER;
    }

    return (b << 16) | a;
 }

adler32 应用打包解包协议

 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
package main

import (
    "encoding/binary"
    "fmt"
    "hash/adler32"
    "io"
)

var RPC_MAGIC = [4]byte{'p', 'i', 'a', 'o'}

// | 32 bit length | 32 bit magic | length - 64 bit payload | 32 bit sum |
type Packet struct {
    TotalSize uint32
    Magic     [4]byte
    Payload   []byte
    Checksum  uint32
}

func EncodePacket(w io.Writer, payload []byte) error {
    // len(Magic) + len(Checksum) == 8
    totalsize := uint32(len(RPC_MAGIC) + len(payload) + 4)
    // write total size
    binary.Write(w, binary.BigEndian, totalsize)

    sum := adler32.New()
    ww := io.MultiWriter(sum, w)
    // write magic bytes
    binary.Write(ww, binary.BigEndian, RPC_MAGIC)

    // write payload
    ww.Write(payload)

    // calculate checksum
    checksum := sum.Sum32()

    // write checksum
    return binary.Write(w, binary.BigEndian, checksum)
}

func DecodePacket(r io.Reader) ([]byte, error) {
    var totalsize uint32
    err := binary.Read(r, binary.BigEndian, &totalsize)
    if err != nil {
        return nil, fmt.Errorf("read total size err %w", err)
    }

    // at least len(magic) + len(checksum)
    if totalsize < 8 {
        return nil, fmt.Errorf("bad packet. header:%d", totalsize)
    }

    sum := adler32.New()
    rr := io.TeeReader(r, sum)

    var magic [4]byte
    err = binary.Read(rr, binary.BigEndian, &magic)
    if err != nil {
        return nil, fmt.Errorf("read magic err %w", err)
    }
    if magic != RPC_MAGIC {
        return nil, fmt.Errorf("bad rpc magic:%v", magic)
    }

    payload := make([]byte, totalsize-8)
    _, err = io.ReadFull(rr, payload)
    if err != nil {
        return nil, fmt.Errorf("read payload err %w", err)
    }

    var checksum uint32
    err = binary.Read(r, binary.BigEndian, &checksum)
    if err != nil {
        return nil, fmt.Errorf("read checksum err %w", err)
    }

    if checksum != sum.Sum32() {
        return nil, fmt.Errorf("checkSum error, %d(calc) %d(remote)", sum.Sum32(), checksum)
    }
    return payload, nil
}

参考