xhys121zero2025-04-18文章来源:SecHub网络安全社区
SM4国密算法的前身是SMS4,SMS4分组加密算法是中国无线标准中使用的分组加密算法,在2012念杯确定为国密行业标准,同时改名为SM4算法
SM4密码算法是一个分组算法,该算法的分组长度为<u>128比特</u>,密钥长度为<u>128比特</u>。加密算法与密钥扩展算法均采用非线性迭代结构,运算轮数均为<u>32轮</u>。数据解密和数据加密的算法结构相同,只是轮密钥的使用顺序相反,解密轮密钥是加密轮密钥的逆序。
字 word
加密过程由32次迭代运算盒一次反序变换组成的.
当
F其实是轮函数
设输入为,轮密钥为rk
,则轮函数F见式1:
T:是一个可逆变换,由线性变换r和线性变换L复合而成,即
r由4个并行的S盒构成。
设输入为则见式(2):
a0,a1,a2,a3都是8bit的字符串,每个都可以用十六进制数字来表示,一个十六进制数字可以用4bit来表示
总结一下,我们在加密的时候,有以下加密流程:
所以我们要生成
加密过程中使用的轮密钥由加密密钥生成,其中加密密钥,加密过程中的轮密钥生成方式见式(6)和式(7):
下面是SM4分组密码算法对一组明文进行加密的运算示例
那根据
脚本如下:
FK = [0xa3b1bac6,0x56aa3350,0x677d9197,0xb27022dc]
MK = [0x01234567,0x89abcdef,0xfedcba98,0x76543210]
for i in range(4):
print(hex(FK[i] ^ MK[i] & 0xFFFFFFFF))
#0xa292ffa1 K0
#0xdf01febf K1
#0x99a12b0f K2
#0xc42410cc K3
接下来我们要根据
依次类推
脚本如下:
CK = [ 0x00070e15, 0x1c232a31, 0x383f464d, 0x545b6269,
0x70777e85, 0x8c939aa1, 0xa8afb6bd, 0xc4cbd2d9,
0xe0e7eef5, 0xfc030a11, 0x181f262d, 0x343b4249,
0x50575e65, 0x6c737a81, 0x888f969d, 0xa4abb2b9,
0xc0c7ced5, 0xdce3eaf1, 0xf8ff060d, 0x141b2229,
0x30373e45, 0x4c535a61, 0x686f767d, 0x848b9299,
0xa0a7aeb5, 0xbcc3cad1, 0xd8dfe6ed, 0xf4fb0209,
0x10171e25, 0x2c333a41, 0x484f565d, 0x646b7279 ]
K = [0xa292ffa1,0xdf01febf,0x99a12b0f,0xc42410cc]
print(hex(K[1]^K[2]^K[3]^CK[0]))#0x8283cb69
置换脚本如下:
Sbox = [
[0xd6,0x90,0xe9,0xfe,0xcc,0xe1,0x3d,0xb7,0x16,0xb6,0x14,0xc2,0x28,0xfb,0x2c,0x05],
[0x2b,0x67,0x9a,0x76,0x2a,0xbe,0x04,0xc3,0xaa,0x44,0x13,0x26,0x49,0x86,0x06,0x99],
[0x9c,0x42,0x50,0xf4,0x91,0xef,0x98,0x7a,0x33,0x54,0x0b,0x43,0xed,0xcf,0xac,0x62],
[0xe4,0xb3,0x1c,0xa9,0xc9,0x08,0xe8,0x95,0x80,0xdf,0x94,0xfa,0x75,0x8f,0x3f,0xa6],
[0x47,0x07,0xa7,0xfc,0xf3,0x73,0x17,0xba,0x83,0x59,0x3c,0x19,0xe6,0x85,0x4f,0xa8],
[0x68,0x6b,0x81,0xb2,0x71,0x64,0xda,0x8b,0xf8,0xeb,0x0f,0x4b,0x70,0x56,0x9d,0x35],
[0x1e,0x24,0x0e,0x5e,0x63,0x58,0xd1,0xa2,0x25,0x22,0x7c,0x3b,0x01,0x21,0x78,0x87],
[0xd4,0x00,0x46,0x57,0x9f,0xd3,0x27,0x52,0x4c,0x36,0x02,0xe7,0xa0,0xc4,0xc8,0x9e],
[0xea,0xbf,0x8a,0xd2,0x40,0xc7,0x38,0xb5,0xa3,0xf7,0xf2,0xce,0xf9,0x61,0x15,0xa1],
[0xe0,0xae,0x5d,0xa4,0x9b,0x34,0x1a,0x55,0xad,0x93,0x32,0x30,0xf5,0x8c,0xb1,0xe3],
[0x1d,0xf6,0xe2,0x2e,0x82,0x66,0xca,0x60,0xc0,0x29,0x23,0xab,0x0d,0x53,0x4e,0x6f],
[0xd5,0xdb,0x37,0x45,0xde,0xfd,0x8e,0x2f,0x03,0xff,0x6a,0x72,0x6d,0x6c,0x5b,0x51],
[0x8d,0x1b,0xaf,0x92,0xbb,0xdd,0xbc,0x7f,0x11,0xd9,0x5c,0x41,0x1f,0x10,0x5a,0xd8],
[0x0a,0xc1,0x31,0x88,0xa5,0xcd,0x7b,0xbd,0x2d,0x74,0xd0,0x12,0xb8,0xe5,0xb4,0xb0],
[0x89,0x69,0x97,0x4a,0x0c,0x96,0x77,0x7e,0x65,0xb9,0xf1,0x09,0xc5,0x6e,0xc6,0x84],
[0x18,0xf0,0x7d,0xec,0x3a,0xdc,0x4d,0x20,0x79,0xee,0x5f,0x3e,0xd7,0xcb,0x39,0x48] ]
a = [0x82,0x83,0xCB,0x69]
b = [0,0,0,0]
for i in range(4):
index_1 = a[i] >> 4
index_2 = a[i] & 0xF
b[i] = Sbox[index_1][index_2]
print("".join(hex(x).strip('0x') for x in b))#8ad24122
所以简化后的式子如下:
而原型如下所示:
脚本如下:
#define SHL(x,n) (((x) & 0xFFFFFFFF) << n)
#define ROTL(x,n) (SHL((x),n) | ((x) >> (32 - n)))
#include <stdio.h>
#include <string.h>
void calc()
{
unsigned long a = 0x8AD24122;
unsigned long rk = 0;
rk = a ^ ROTL(a, 13) ^ ROTL(a, 23);
printf("rk = %X\n", rk);
}
int main(void)
{
calc();
return 0;
}
#rk = 53B37958
所以简化后的式子如下:
最后直接异或即可,脚本如下:
from ctypes import *
a = c_uint32(0xA292FFA1)
b = c_uint32(0x53B37958)
print(hex(a.value ^ b.value & 0xFFFFFFFF))#0xf12186f9
那我们现在已经求出来了之后,我们回到一开始的轮函数加密的地方
然后接下来我们用脚本算一下即可
X = [0x01234567,0x89ABCDEF,0xFEDCBA98,0x76543210]
RK_0 = 0xF12186F9
print(hex(X[1] ^ X[2] ^ X[3] ^ RK_0 & 0xFFFFFFFF))#0xf002c39e
我们化简一下可以得出:
而我们又知道加密过程中的T是由非线性变换r和线性变换L所组成的,<u>这里的L和上面加密密钥生成轮密钥的是不同的!!!</u>
还是跟之前密钥的计算一样的,先进行一个r的S盒置换,然后进行一个L的线性变换:
r的S盒置换脚本如下:
Sbox = [
[0xd6,0x90,0xe9,0xfe,0xcc,0xe1,0x3d,0xb7,0x16,0xb6,0x14,0xc2,0x28,0xfb,0x2c,0x05],
[0x2b,0x67,0x9a,0x76,0x2a,0xbe,0x04,0xc3,0xaa,0x44,0x13,0x26,0x49,0x86,0x06,0x99],
[0x9c,0x42,0x50,0xf4,0x91,0xef,0x98,0x7a,0x33,0x54,0x0b,0x43,0xed,0xcf,0xac,0x62],
[0xe4,0xb3,0x1c,0xa9,0xc9,0x08,0xe8,0x95,0x80,0xdf,0x94,0xfa,0x75,0x8f,0x3f,0xa6],
[0x47,0x07,0xa7,0xfc,0xf3,0x73,0x17,0xba,0x83,0x59,0x3c,0x19,0xe6,0x85,0x4f,0xa8],
[0x68,0x6b,0x81,0xb2,0x71,0x64,0xda,0x8b,0xf8,0xeb,0x0f,0x4b,0x70,0x56,0x9d,0x35],
[0x1e,0x24,0x0e,0x5e,0x63,0x58,0xd1,0xa2,0x25,0x22,0x7c,0x3b,0x01,0x21,0x78,0x87],
[0xd4,0x00,0x46,0x57,0x9f,0xd3,0x27,0x52,0x4c,0x36,0x02,0xe7,0xa0,0xc4,0xc8,0x9e],
[0xea,0xbf,0x8a,0xd2,0x40,0xc7,0x38,0xb5,0xa3,0xf7,0xf2,0xce,0xf9,0x61,0x15,0xa1],
[0xe0,0xae,0x5d,0xa4,0x9b,0x34,0x1a,0x55,0xad,0x93,0x32,0x30,0xf5,0x8c,0xb1,0xe3],
[0x1d,0xf6,0xe2,0x2e,0x82,0x66,0xca,0x60,0xc0,0x29,0x23,0xab,0x0d,0x53,0x4e,0x6f],
[0xd5,0xdb,0x37,0x45,0xde,0xfd,0x8e,0x2f,0x03,0xff,0x6a,0x72,0x6d,0x6c,0x5b,0x51],
[0x8d,0x1b,0xaf,0x92,0xbb,0xdd,0xbc,0x7f,0x11,0xd9,0x5c,0x41,0x1f,0x10,0x5a,0xd8],
[0x0a,0xc1,0x31,0x88,0xa5,0xcd,0x7b,0xbd,0x2d,0x74,0xd0,0x12,0xb8,0xe5,0xb4,0xb0],
[0x89,0x69,0x97,0x4a,0x0c,0x96,0x77,0x7e,0x65,0xb9,0xf1,0x09,0xc5,0x6e,0xc6,0x84],
[0x18,0xf0,0x7d,0xec,0x3a,0xdc,0x4d,0x20,0x79,0xee,0x5f,0x3e,0xd7,0xcb,0x39,0x48] ]
a = [0xF0,0x02,0xC3,0x9E]
b = [0,0,0,0]
for i in range(4):
index_1 = a[i] >> 4
index_2 = a[i] & 0xF
b[i] = Sbox[index_1][index_2]
print("".join(hex(x).strip('0x') for x in b))#18e992b1
化简之后我们可以得出:
这里的L运算如下所示:
代码如下所示:
#define SHL(x,n) (((x) & 0xFFFFFFFF) << n)
#define ROTL(x,n) (SHL((x),n) | ((x) >> (32 - n)))
#include <stdio.h>
#include <string.h>
void calc()
{
unsigned long a = 0x18E992B1;
unsigned long rk = 0;
rk = a ^ ROTL(a, 2) ^ ROTL(a, 10) ^ ROTL(a,18) ^ ROTL(a,24);
printf("rk = %X\n", rk);//rk = 26D99622
}
int main(void)
{
calc();
return 0;
}
化简后得:
最后再进行一次异或即可得出.
脚本如下:
from ctypes import *
a = c_uint32(0x01234567)
b = c_uint32(0x26D99622)
print(hex(a.value ^ b.value & 0xFFFFFFFF))#0x27fad345
那么对照第一轮的输出状态的话,可以看出我们成功算出了第一轮的结果。
这里要注意的是,最后的密文是反序的即依次类推16个字节即可.
/**
* file sm4.h
*/
#ifndef XYSSL_SM4_H
#define XYSSL_SM4_H
#define SM4_ENCRYPT 1
#define SM4_DECRYPT 0
#ifndef GET_ULONG_BE
//将字符型数组b的第i到第i+3位的二进制拼接成一个4*8=32bit的整数,存入n中
#define GET_ULONG_BE(n,b,i) \
{ \
(n) = ( (unsigned long) (b)[(i) ] << 24) \
| ( (unsigned long) (b)[(i) + 1] << 16) \
| ( (unsigned long) (b)[(i) + 2] << 8) \
| ( (unsigned long) (b)[(i) + 3] ); \
}
#endif
//将整数n的32位的二进制表述转换为4个char的数组,存入数组b的第i到第i+3位
#ifndef PUT_ULONG_BE
#define PUT_ULONG_BE(n,b,i)\
{ \
(b)[(i)] = (unsigned char)((n) >> 24); \
(b)[(i)+1] = (unsigned char)((n) >> 16); \
(b)[(i)+2] = (unsigned char)((n) >> 8); \
(b)[(i)+3] = (unsigned char)((n)); \
}
#endif
//循环左移 的巧妙实现 (SHL(x,n)可以得到左移n位之后的结果,然后与右移的结果((x) >> (32 - n)逐位或来将右边空缺的n位补齐,效率比较高).
#define SHL(x,n) (((x) & 0xFFFFFFFF) << n)
#define ROTL(x,n) (SHL((x),n) | ((x) >> (32 - n)))
#define SWAP(a,b) {unsigned long t = a; a = b; b = t; t = 0;}
/**
* \brief SM4 context structure
*/
typedef struct
{
int mode; /*!< encrypt/decrypt 加密或者解密模式*/
unsigned long sk[32]; /*!< SM4 subkeys */
}
sm4_context;
#ifdef __cplusplus
extern "C" {
#endif
/**
* \brief SM4 key schedule (128-bit,encryption //SM4密钥流程(128位,加密)
* \param ctx SM4 context to be initialized //SM4结构体初始化
* \param key 16-byte secret key //16字节的密钥
*/
void sm4_setkey_enc(sm4_context* ctx, unsigned char key[16]);
/**
* \brief SM4 key schedule (128-bit,decryption) //SM4密钥流程(128位,加密)
* \param ctx SM4 context to be initialized //SM4结构体初始化
* \param key 16-byte secret key //16字节的密钥
*/
void sm4_setkey_dec(sm4_context* ctx, unsigned char key[16]);
/**
* \brief SM4-ECB block encryption/decryption //SM4-ECB分组加密/解密
* \param ctx SM4 context //SM4结构体
* \param mode SM4_ENCRYPT or SM4_DECRYPT //SM4加密or解密模式
* \param length length of the input data //输入数据的长度
* \param input input block //输入块
* \param output output block //输出块
*/
void sm4_crypt_ecb(sm4_context* ctx, int mode, int length, unsigned char* input, unsigned char* output);
/**
* \brief SM4-CBC buffer encryption/decryption ////SM4-ECB分组加密/解密
* \param ctx SM4 context //SM4结构体
* \param mode SM4_ENCRYPT or SM4_DECRYPT //SM4加密or解密模式
* \param iv initialization vector(updataed after use)//初始化向量(使用后更新)
* \param input buffer holding the input data //保存输入数据的缓冲区
* \param output buffer holding the output data //保存输出数据的缓冲区
*/
void sm4_crypt_cbc(sm4_context* ctx, int mode, int length, unsigned char iv[16], unsigned char* input, unsigned char* output);
#ifdef __cplusplus
}
#endif
#endif/* sm4.h */
#include <iostream>
#include <string.h>
#include "sm4.h"
using namespace std;
//S盒
const unsigned char Sbox[256] = {
0xd6,0x90,0xe9,0xfe,0xcc,0xe1,0x3d,0xb7,0x16,0xb6,0x14,0xc2,0x28,0xfb,0x2c,0x05,
0x2b,0x67,0x9a,0x76,0x2a,0xbe,0x04,0xc3,0xaa,0x44,0x13,0x26,0x49,0x86,0x06,0x99,
0x9c,0x42,0x50,0xf4,0x91,0xef,0x98,0x7a,0x33,0x54,0x0b,0x43,0xed,0xcf,0xac,0x62,
0xe4,0xb3,0x1c,0xa9,0xc9,0x08,0xe8,0x95,0x80,0xdf,0x94,0xfa,0x75,0x8f,0x3f,0xa6,
0x47,0x07,0xa7,0xfc,0xf3,0x73,0x17,0xba,0x83,0x59,0x3c,0x19,0xe6,0x85,0x4f,0xa8,
0x68,0x6b,0x81,0xb2,0x71,0x64,0xda,0x8b,0xf8,0xeb,0x0f,0x4b,0x70,0x56,0x9d,0x35,
0x1e,0x24,0x0e,0x5e,0x63,0x58,0xd1,0xa2,0x25,0x22,0x7c,0x3b,0x01,0x21,0x78,0x87,
0xd4,0x00,0x46,0x57,0x9f,0xd3,0x27,0x52,0x4c,0x36,0x02,0xe7,0xa0,0xc4,0xc8,0x9e,
0xea,0xbf,0x8a,0xd2,0x40,0xc7,0x38,0xb5,0xa3,0xf7,0xf2,0xce,0xf9,0x61,0x15,0xa1,
0xe0,0xae,0x5d,0xa4,0x9b,0x34,0x1a,0x55,0xad,0x93,0x32,0x30,0xf5,0x8c,0xb1,0xe3,
0x1d,0xf6,0xe2,0x2e,0x82,0x66,0xca,0x60,0xc0,0x29,0x23,0xab,0x0d,0x53,0x4e,0x6f,
0xd5,0xdb,0x37,0x45,0xde,0xfd,0x8e,0x2f,0x03,0xff,0x6a,0x72,0x6d,0x6c,0x5b,0x51,
0x8d,0x1b,0xaf,0x92,0xbb,0xdd,0xbc,0x7f,0x11,0xd9,0x5c,0x41,0x1f,0x10,0x5a,0xd8,
0x0a,0xc1,0x31,0x88,0xa5,0xcd,0x7b,0xbd,0x2d,0x74,0xd0,0x12,0xb8,0xe5,0xb4,0xb0,
0x89,0x69,0x97,0x4a,0x0c,0x96,0x77,0x7e,0x65,0xb9,0xf1,0x09,0xc5,0x6e,0xc6,0x84,
0x18,0xf0,0x7d,0xec,0x3a,0xdc,0x4d,0x20,0x79,0xee,0x5f,0x3e,0xd7,0xcb,0x39,0x48
};
//CK为固定参数
const unsigned int CK[32] = {
0x00070e15, 0x1c232a31, 0x383f464d, 0x545b6269,
0x70777e85, 0x8c939aa1, 0xa8afb6bd, 0xc4cbd2d9,
0xe0e7eef5, 0xfc030a11, 0x181f262d, 0x343b4249,
0x50575e65, 0x6c737a81, 0x888f969d, 0xa4abb2b9,
0xc0c7ced5, 0xdce3eaf1, 0xf8ff060d, 0x141b2229,
0x30373e45, 0x4c535a61, 0x686f767d, 0x848b9299,
0xa0a7aeb5, 0xbcc3cad1, 0xd8dfe6ed, 0xf4fb0209,
0x10171e25, 0x2c333a41, 0x484f565d, 0x646b7279 };
//FK为系统参数
static const unsigned long FK[4] = { 0xa3b1bac6,0x56aa3350,0x677d9197,0xb27022dc };
static unsigned char sm4Sbox(unsigned char inch)
{
unsigned char* pTable = (unsigned char*)Sbox;
unsigned char retVal = (unsigned char)(pTable[inch]);
return retVal;
}
//已知加密密钥MK,求轮转密钥rk
static unsigned long sm4CaliRk(unsigned long ka) { //复合变换T
unsigned long bb = 0; //unsigned long 4字节( 32bit )
unsigned long rk = 0;
unsigned char a[4];
unsigned char b[4];
PUT_ULONG_BE(ka, a, 0) //换转成8bit一个字符
b[0] = sm4Sbox(a[0]);
b[1] = sm4Sbox(a[1]);
b[2] = sm4Sbox(a[2]);
b[3] = sm4Sbox(a[3]);
GET_ULONG_BE(bb, b, 0) //将变换结果转换为32bit的整数
//对得到的32位整数bb进行线性变换
rk = bb ^ ROTL(bb, 13) ^ ROTL(bb, 23);
return rk;
}
static void sm4_setkey(unsigned long SK[32], unsigned char key[16]) {
unsigned long MK[4];
unsigned long k[36];
unsigned long i = 0;
GET_ULONG_BE(MK[0], key, 0);
GET_ULONG_BE(MK[1], key, 4);
GET_ULONG_BE(MK[2], key, 8);
GET_ULONG_BE(MK[3], key, 12);
k[0] = MK[0] ^ FK[0];
k[1] = MK[1] ^ FK[1];
k[2] = MK[2] ^ FK[2];
k[3] = MK[3] ^ FK[3];
for (; i < 32; i++) {
k[i + 4] = k[i] ^ sm4CaliRk(k[i + 1] ^ k[i + 2] ^ k[i + 3] ^ CK[i]);
SK[i] = k[i + 4];
}
}
void sm4_setkey_enc(sm4_context* ctx, unsigned char key[16]) {
ctx->mode = SM4_ENCRYPT;
sm4_setkey(ctx->sk, key);
}
static unsigned long sm4Lt(unsigned long ka)
{
unsigned long bb = 0;
unsigned long c = 0;
unsigned char a[4];
unsigned char b[4];
PUT_ULONG_BE(ka, a, 0)
// b[0] = sm4Sbox(a[0]);
// b[1] = sm4Sbox(a[1]);
// b[2] = sm4Sbox(a[2]);
// b[3] = sm4Sbox(a[3]);
b[0] = Sbox[a[0]];
b[1] = Sbox[a[1]];
b[2] = Sbox[a[2]];
b[3] = Sbox[a[3]];
GET_ULONG_BE(bb, b, 0)
c = bb ^ (ROTL(bb, 2)) ^ (ROTL(bb, 10)) ^ (ROTL(bb, 18)) ^ (ROTL(bb, 24));
return c;
}
//一轮加密
static unsigned long sm4F(unsigned long x0, unsigned long x1, unsigned long x2, unsigned long x3, unsigned long rk)
{
return (x0 ^ sm4Lt(x1 ^ x2 ^ x3 ^ rk));
}
static void sm4_one_round(unsigned long sk[32],
unsigned char input[16],
unsigned char output[16])
{
unsigned long i = 0;
unsigned long ulbuf[36];
memset(ulbuf, 0, sizeof(ulbuf));
GET_ULONG_BE(ulbuf[0], input, 0)
GET_ULONG_BE(ulbuf[1], input, 4)
GET_ULONG_BE(ulbuf[2], input, 8)
GET_ULONG_BE(ulbuf[3], input, 12)
while (i < 32)
{
ulbuf[i + 4] = sm4F(ulbuf[i], ulbuf[i + 1], ulbuf[i + 2], ulbuf[i + 3], sk[i]);
// #ifdef _DEBUG
// printf("rk(%02d) = 0x%08x, X(%02d) = 0x%08x \n",i,sk[i], i, ulbuf[i+4] );
// #endif
i++;
}
PUT_ULONG_BE(ulbuf[35], output, 0);
PUT_ULONG_BE(ulbuf[34], output, 4);
PUT_ULONG_BE(ulbuf[33], output, 8);
PUT_ULONG_BE(ulbuf[32], output, 12);
}
//ECB模式
void sm4_crypt_ecb(sm4_context* ctx,
int mode,
int length,
unsigned char* input,
unsigned char* output)
{
while (length > 0)
{
sm4_one_round(ctx->sk, input, output);
input += 16;
output += 16;
length -= 16;
}
}
//ECB模式解密密钥
void sm4_setkey_dec(sm4_context* ctx, unsigned char key[16])
{
int i;
ctx->mode = SM4_ENCRYPT;
sm4_setkey(ctx->sk, key);
for (i = 0; i < 16; i++)
{
SWAP(ctx->sk[i], ctx->sk[31 - i]);
}
}
//CBC模式加解密
void sm4_crypt_cbc(sm4_context* ctx,
int mode,
int length,
unsigned char iv[16],
unsigned char* input,
unsigned char* output)
{
int i;
unsigned char temp[16];
if (mode == SM4_ENCRYPT)
{
while (length > 0)
{
for (i = 0; i < 16; i++)
output[i] = (unsigned char)(input[i] ^ iv[i]);
sm4_one_round(ctx->sk, output, output);
memcpy(iv, output, 16);
input += 16;
output += 16;
length -= 16;
}
}
else /* SM4_DECRYPT */
{
while (length > 0)
{
memcpy(temp, input, 16);
sm4_one_round(ctx->sk, input, output);
for (i = 0; i < 16; i++)
output[i] = (unsigned char)(output[i] ^ iv[i]);
memcpy(iv, temp, 16);
input += 16;
output += 16;
length -= 16;
}
}
}
int main()
{
unsigned char key[16] = { 0x01,0x23,0x45,0x67,0x89,0xab,0xcd,0xef,0xfe,0xdc,0xba,0x98,0x76,0x54,0x32,0x10 };
unsigned char input[16] = { 0x01,0x23,0x45,0x67,0x89,0xab,0xcd,0xef,0xfe,0xdc,0xba,0x98,0x76,0x54,0x32,0x10 };
unsigned char output[16];
sm4_context ctx;
unsigned long i;
//encrypt
sm4_setkey_enc(&ctx, key);
sm4_crypt_ecb(&ctx, 1, 16, input, output);
//加密结果
printf("加密结果:\n");
for (i = 0; i < 16; i++) {
printf("%02x ", output[i]);
}
printf("\n");
sm4_setkey_dec(&ctx, key);
sm4_crypt_ecb(&ctx, 0, 16, output, output);
//解密结果
printf("解密结果:\n");
for (i = 0; i < 16; i++) {
printf("%02x ", output[i]);
}
printf("\n");
return 0;
}
函数分析:
总结函数结构如下:
打开后,进入main函数,main函数代码如下:
int __cdecl main_0(int argc, const char **argv, const char **envp)
{
char v4; // [esp+0h] [ebp-1B0h]
char v5; // [esp+0h] [ebp-1B0h]
char v6; // [esp+0h] [ebp-1B0h]
char v7; // [esp+0h] [ebp-1B0h]
unsigned int i; // [esp+D0h] [ebp-E0h]
unsigned int j; // [esp+D0h] [ebp-E0h]
char v10[140]; // [esp+DCh] [ebp-D4h] BYREF
char v11[24]; // [esp+168h] [ebp-48h] BYREF
char v12[24]; // [esp+180h] [ebp-30h] BYREF
char v13[20]; // [esp+198h] [ebp-18h] BYREF
__CheckForDebuggerJustMyCode(&unk_41D070);
v13[0] = 1;
v13[1] = 0x23;
v13[2] = 0x45;
v13[3] = 0x67;
v13[4] = 0x89;
v13[5] = 0xAB;
v13[6] = 0xCD;
v13[7] = 0xEF;
v13[8] = 0xFE;
v13[9] = 0xDC;
v13[10] = 0xBA;
v13[11] = 0x98;
v13[12] = 0x76;
v13[13] = 0x54;
v13[14] = 0x32;
v13[15] = 0x10;
v12[0] = 1;
v12[1] = 0x23;
v12[2] = 0x45;
v12[3] = 0x67;
v12[4] = 0x89;
v12[5] = 0xAB;
v12[6] = 0xCD;
v12[7] = 0xEF;
v12[8] = 0xFE;
v12[9] = 0xDC;
v12[10] = 0xBA;
v12[11] = 0x98;
v12[12] = 0x76;
v12[13] = 0x54;
v12[14] = 0x32;
v12[15] = 0x10;
sub_4112AD((int)v10, (int)v13);
sub_41113B(v10, 1, 16, v12, v11);
sub_4110CD((char *)&byte_418D40, v4);
for ( i = 0; i < 0x10; ++i )
sub_4110CD("%02x ", v11[i]);
sub_4110CD("\n", v5);
sub_411212(v10, v13);
sub_41113B(v10, 0, 16, v11, v11);
sub_4110CD((char *)&byte_418D5C, v6);
for ( j = 0; j < 0x10; ++j )
sub_4110CD("%02x ", v11[j]);
sub_4110CD("\n", v7);
return 0;
}
可以看到v13是一个数组,不过在IDA其实是被分割成了多个小数组的,而v13实际上是我们的加密密钥,即:
看到这4组密钥,我们可以粗略的认为是SM4加密.
sub_4112AD((int)v10, (int)v13);
int __cdecl sub_4112AD(int a1, int a2)
{
return sub_412EE0(a1, a2);
}
int __cdecl sub_412EE0(_DWORD *a1, int a2)
{
__CheckForDebuggerJustMyCode(&unk_41D070);
*a1 = 1;
return sub_412330(a1 + 1, a2);
}
int __cdecl sub_412330(int a1, unsigned int *a2)
{
int result; // eax
unsigned int v3; // [esp+D0h] [ebp-B8h]
unsigned int v4; // [esp+DCh] [ebp-ACh]
unsigned int v5; // [esp+E0h] [ebp-A8h]
unsigned int v6; // [esp+E4h] [ebp-A4h]
int v7[35]; // [esp+E8h] [ebp-A0h]
unsigned int v8; // [esp+174h] [ebp-14h]
unsigned int v9; // [esp+178h] [ebp-10h]
unsigned int v10; // [esp+17Ch] [ebp-Ch]
unsigned int v11; // [esp+180h] [ebp-8h]
__CheckForDebuggerJustMyCode(&unk_41D070);
v3 = 0;
v8 = _byteswap_ulong(*a2);
v9 = _byteswap_ulong(a2[1]);
v10 = _byteswap_ulong(a2[2]);
v11 = _byteswap_ulong(a2[3]);
v4 = v8 ^ 0xA3B1BAC6;
v5 = dword_418CB0[1] ^ v9;
v6 = dword_418CB0[2] ^ v10;
result = 12;
v7[0] = dword_418CB0[3] ^ v11;
while ( v3 < 0x20 )
{
v7[v3 + 1] = *(&v4 + v3) ^ sub_411850(dword_418C30[v3] ^ v7[v3] ^ v7[v3 - 1] ^ *(&v5 + v3));
*(_DWORD *)(a1 + 4 * v3) = v7[v3 + 1];
result = ++v3;
}
return result;
}
那么传入的a1是什么呢?在SM4加密算法中,首先我们要通过加密密钥来生成轮密钥,自然我们肯定要设置加解密模式,而这里的v10其实就是我们的结构体,这里在函数sub_412EE0被设置成1,其实就是设置为ENCRYPT模式,即加密.
设置完加密函数以后,传入了a1 + 1和a2,刚刚讲过a2传入的其实是加密密钥,那么a1+1呢?
a1+1实际上是SM4结构体定义的32位无符号整型密钥存储空间。
我们现在主要来分析里面最重要的一个子函数即sub_412330,传入了密钥缓冲区+密钥:
_byteswap_ulong
来交换字节顺序。这里相当于把指向key的指针指向一个无符号整型的数组里面去(从unsigned int v8到v11可以看出来)后面就是一个循环:
循环里面其实是一个求轮密钥的过程,为什么这样说,因为轮密钥等于
所以这里的a1 + 4 *v3 其实是一个轮密钥的一个存储方式,因为要从+4开始存储,而这里的函数sub_411850其实是{T{'}}的一个结构,由非线性变换r和线性变换L衍化成的L{'}所组成的.
这里的 v7[v3 + 1] = *(&v4 + v3) ^ sub_411850(dword_418C30[v3] ^ v7[v3] ^ v7[v3 - 1] ^ *(&v5 + v3));我们可以这样理解:
int __cdecl sub_411850(int a1)
{
unsigned int v2; // [esp+D0h] [ebp-2Ch]
unsigned int v3; // [esp+F4h] [ebp-8h]
__CheckForDebuggerJustMyCode(&unk_41D070);
LOBYTE(v2) = sub_411D20(SHIBYTE(a1));
BYTE1(v2) = sub_411D20(SBYTE2(a1));
BYTE2(v2) = sub_411D20(SBYTE1(a1));
HIBYTE(v2) = sub_411D20(a1);
v3 = _byteswap_ulong(v2);
return ((v3 >> 9) | (v3 << 23)) ^ v3 ^ ((v3 >> 19) | (v3 << 13));
char __cdecl sub_411D20(unsigned __int8 a1)
{
__CheckForDebuggerJustMyCode(&unk_41D070);
return byte_418B30[a1];
}
所以这里的sub_411D20其实就是将传入的dword_418C30[v3] ^ v7[v3] ^ v7[v3 - 1] ^ *(&v5 + v3)进行一个下标在S盒之中的置换,可以理解为跟RC4里面的置换S盒差不多的意思.
return ((v3 >> 9) | (v3 << 23)) ^ v3 ^ ((v3 >> 19) | (v3 << 13))
这里的线性变换,其实就是SM4算法中经典的自身与自身进行循环左移,然后与循环右移进行异或.
即如下代码:
#define SHL(x,n) (((x) & 0xFFFFFFFF) << n)
#define ROTL(x,n) (SHL((x),n) | ((x) >> (32 - n)))
左移和右移凑齐32即可.
那通过上述运算,我们就可以不断求出32轮的所有轮的轮密钥,因为while循环是<=0x1E,十进制为31,所以是32轮循环.
sub_41113B(ctx, 1, 16, input, output);
int __cdecl sub_41113B(int a1, int a2, int a3, int a4, int a5)
{
return sub_412D70(a1, a2, a3, a4, a5);
}
int __cdecl sub_412D70(int a1, int a2, int a3, int a4, int a5)
{
int result; // eax
result = __CheckForDebuggerJustMyCode(&unk_41D070);
while ( a3 > 0 )
{
sub_411D90(a1 + 4, a4, a5);
a4 += 16;
a5 += 16;
result = a3 - 16;
a3 -= 16;
}
return result;
}
首先的话,我们一共传入5个参数,分别是:
sub_412D70函数里面存在一个while循环,它是通过初始化向量即长度来进行循环的,而这里面最终的函数其实是sub_411D90.传入了我们的生成了轮密钥、输入数据的缓冲区、输出数据的缓冲区.
char __cdecl sub_411D90(int a1, unsigned int *a2, _BYTE *a3)
{
int v3; // eax
char result; // al
int v5[38]; // [esp+D0h] [ebp-A4h] BYREF
char v6[4]; // [esp+168h] [ebp-Ch]
__CheckForDebuggerJustMyCode(&unk_41D070);
*(_DWORD *)v6 = 0;
j_memset(v5, 0, 0x90u);
v5[0] = _byteswap_ulong(*a2);
v5[1] = _byteswap_ulong(a2[1]);
v5[2] = _byteswap_ulong(a2[2]);
v5[3] = _byteswap_ulong(a2[3]);
while ( *(_DWORD *)v6 < 0x20u )
{
v3 = sub_411A80(
v5[*(_DWORD *)v6],
v5[*(_DWORD *)v6 + 1],
v5[*(_DWORD *)v6 + 2],
v5[*(_DWORD *)v6 + 3],
*(_DWORD *)(a1 + 4 * *(_DWORD *)v6));
v5[*(_DWORD *)v6 + 4] = v3;
printf("rk(%02d) = 0x%08x, X(%02d) = 0x%08x \n", v6[0]);
++*(_DWORD *)v6;
}
*a3 = HIBYTE(v5[35]);
a3[1] = BYTE2(v5[35]);
a3[2] = BYTE1(v5[35]);
a3[3] = v5[35];
a3[4] = HIBYTE(v5[34]);
a3[5] = BYTE2(v5[34]);
a3[6] = BYTE1(v5[34]);
a3[7] = v5[34];
a3[8] = HIBYTE(v5[33]);
a3[9] = BYTE2(v5[33]);
a3[10] = BYTE1(v5[33]);
a3[11] = v5[33];
a3[12] = HIBYTE(v5[32]);
a3[13] = BYTE2(v5[32]);
a3[14] = BYTE1(v5[32]);
result = v5[32];
a3[15] = v5[32];
return result;
}
然后又来到了一个while循环,循环次数又是32次,多半是加密,这里传入的是相邻的4个参数(即明文)还有一个就是SK(即前面我们计算好的轮密钥)
int __cdecl sub_411A80(int a1, int a2, int a3, int a4, int a5)
{
__CheckForDebuggerJustMyCode(&unk_41D070);
return a1 ^ sub_411AE0(a5 ^ a4 ^ a3 ^ a2);
}
int __cdecl sub_411AE0(int a1)
{
unsigned int v2; // [esp+D0h] [ebp-2Ch]
unsigned int v3; // [esp+F4h] [ebp-8h]
__CheckForDebuggerJustMyCode(&unk_41D070);
LOBYTE(v2) = byte_418B30[HIBYTE(a1)];
BYTE1(v2) = byte_418B30[BYTE2(a1)];
BYTE2(v2) = byte_418B30[BYTE1(a1)];
HIBYTE(v2) = byte_418B30[(unsigned __int8)a1];
v3 = _byteswap_ulong(v2);
return ((v3 >> 8) | (v3 << 24)) ^ ((v3 >> 14) | (v3 << 18)) ^ ((v3 >> 22) | (v3 << 10)) ^ v3 ^ ((v3 >> 30) | (4 * v3));
}
return a1 ^ sub_411AE0(a5 ^ a4 ^ a3 ^ a2);
a1 为 刚刚通过_byteswap_ulong函数转换成的char类型的数组
而后面则是一个T(合成置换)的结构.
LOBYTE(v2) = byte_418B30[HIBYTE(a1)];
BYTE1(v2) = byte_418B30[BYTE2(a1)];
BYTE2(v2) = byte_418B30[BYTE1(a1)];
HIBYTE(v2) = byte_418B30[(unsigned __int8)a1];
S盒置换,跟前面生成轮密钥的S盒置换一样
return ((v3 >> 8) | (v3 << 24)) ^ ((v3 >> 14) | (v3 << 18)) ^ ((v3 >> 22) | (v3 << 10)) ^ v3 ^ ((v3 >> 30) | (4 * v3));
这里的线性变换,其实就是SM4算法中经典的自身与自身进行循环左移,然后与循环右移进行异或.
即如下代码:
#define SHL(x,n) (((x) & 0xFFFFFFFF) << n)
#define ROTL(x,n) (SHL((x),n) | ((x) >> (32 - n)))
左移和右移凑齐32即可.
那通过上述运算,我们就可以不断求出32轮的所有轮的轮密钥,因为while循环是<=0x1E,十进制为31,所以是32轮循环.
*a3 = HIBYTE(v5[35]);
a3[1] = BYTE2(v5[35]);
a3[2] = BYTE1(v5[35]);
a3[3] = v5[35];
a3[4] = HIBYTE(v5[34]);
a3[5] = BYTE2(v5[34]);
a3[6] = BYTE1(v5[34]);
a3[7] = v5[34];
a3[8] = HIBYTE(v5[33]);
a3[9] = BYTE2(v5[33]);
a3[10] = BYTE1(v5[33]);
a3[11] = v5[33];
a3[12] = HIBYTE(v5[32]);
a3[13] = BYTE2(v5[32]);
a3[14] = BYTE1(v5[32]);
result = v5[32];
a3[15] = v5[32];
这里要注意,密文是要反序取得.
初始密钥可以自行定义,上述只是常见的SM4初始密钥特征
如上面这种,而且上面v4,v5,v6,v7分别异或了FK常量,基本就可以推断这部分的函数是为了生成轮密钥而准备的.
经典的先通过S盒置换,立马后面就是自身左移异或自身右移.