-
Notifications
You must be signed in to change notification settings - Fork 0
/
hc128_opt32.h
300 lines (262 loc) · 15.5 KB
/
hc128_opt32.h
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
/* This program gives the optimized implementation of stream cipher HC-128 for 32-bit platform
The docuement of HC-128 is available at
1) Hongjun Wu. ``The Stream Cipher HC-128.'' New Stream Cipher Designs -- The eSTREAM Finalists, LNCS 4986, pp. 39-47, Springer-Verlag, 2008.
2) eSTREAM website: http://www.ecrypt.eu.org/stream/hcp3.html
-----------------------------------------
Performance:
Microprocessor: Intel CORE 2 processor (Core 2 Duo Mobile P9400 2.53GHz)
Operating System: 32-bit Debian 5.0 (Linux kernel 2.6.26-2-686)
Speed of encrypting long message:
1) 2.1 cycle/byte compiler: Intel C++ compiler 11.1 compilation option: icc -O2
2) 3.9 cycles/byte compiler: gcc 4.3.2 compilation option: gcc -O3
Microprocessor: Intel CORE 2 processor (Core 2 Quad Q6600 2.4GHz)
Operating System: 32-bit Windows Vista Business
Speed of encrypting long message:
3) 2.2 cycles/byte compiler: Intel C++ compiler 11.1 compilation option: icl /O2
4) 3.4 cycles/byte compiler: Microsoft Visual C++ 2008 compilation option: release
------------------------------------------
In this simplified optimization program, loop unrolling is applied to the description of HC-128 directly.
16 steps are executed in each loop.
------------------------------------------
Written by: Hongjun Wu
Last Modified: December 15, 2009
*/
#include <string.h>
typedef unsigned char uint8;
typedef unsigned long long uint64;
/*for LP64, "int" is 32-bit integer, while "long" is 64-bit integer*/
#if defined(_LP64)
typedef unsigned int uint32;
#else
typedef unsigned long uint32;
#endif
/*define data alignment for different C compilers*/
#if defined(__GNUC__)
#define DATA_ALIGN16(x) x __attribute__ ((aligned(16)))
#else
#define DATA_ALIGN16(x) __declspec(align(16)) x
#endif
typedef struct {
DATA_ALIGN16(uint32 P[512]);
DATA_ALIGN16(uint32 Q[512]);
DATA_ALIGN16(uint32 keystream[16]); /*16 32-bit keystream words*/
uint32 counter1024; /*counter1024 = i mod 1024 */
} HC128_State;
/*this function right rotates a 32-bit word x by n positions*/
#define ROTR32(x,n) ( ((x) >> (n)) | ((x) << (32 - (n))) )
#define ROTL32(x,n) ( ((x) << (n)) | ((x) >> (32 - (n))) )
/*the h1 function in HC-128*/
#define h1(x,y) { \
a = (uint8)(x); \
c = (uint8)((x) >> 16); \
(y) = state->Q[a]+state->Q[256+c]; \
}
/*the h2 function in HC-128*/
#define h2(x,y) { \
a = (uint8)(x); \
c = (uint8)((x) >> 16); \
(y) = state->P[a]+state->P[256+c]; \
}
/*one step of HC-128, update P and generate 32 bits keystream*/
#define step_P(m0,m511,m3,m10,m12,s){ \
tem0 = ROTR32((m511),23); \
tem1 = ROTR32((m3),10); \
tem2 = ROTR32((m10),8); \
(m0) += tem2+(tem0 ^ tem1); \
h1((m12),tem3); \
(s) = tem3 ^ (m0) ; \
}
/*one step of HC-128, update Q and generate 32 bits keystream*/
#define step_Q(m0,m511,m3,m10,m12,s){ \
tem0 = ROTL32((m511),23); \
tem1 = ROTL32((m3),10); \
tem2 = ROTL32((m10),8); \
(m0) += tem2+(tem0 ^ tem1); \
h2((m12),tem3); \
(s) = tem3 ^ (m0) ; \
}
/* this function computes 16 steps of HC-128
the state is updated,
16 32-bit keystream words are generated and stored in the array state->keystream
*/
void SixteenSteps(HC128_State *state)
{
uint32 tem0,tem1,tem2,tem3;
uint8 a,c;
uint32 cc,dd,ee;
cc = state->counter1024 & 0x1ff;
dd = (cc+16) & 0x1ff;
ee = (cc-16) & 0x1ff;
if (state->counter1024 < 512)
{
step_P(state->P[cc+0], state->P[cc+1], state->P[ee+13],state->P[ee+6], state->P[ee+4], state->keystream[0]);
step_P(state->P[cc+1], state->P[cc+2], state->P[ee+14],state->P[ee+7], state->P[ee+5], state->keystream[1]);
step_P(state->P[cc+2], state->P[cc+3], state->P[ee+15],state->P[ee+8], state->P[ee+6], state->keystream[2]);
step_P(state->P[cc+3], state->P[cc+4], state->P[cc+0], state->P[ee+9], state->P[ee+7], state->keystream[3]);
step_P(state->P[cc+4], state->P[cc+5], state->P[cc+1], state->P[ee+10],state->P[ee+8], state->keystream[4]);
step_P(state->P[cc+5], state->P[cc+6], state->P[cc+2], state->P[ee+11],state->P[ee+9], state->keystream[5]);
step_P(state->P[cc+6], state->P[cc+7], state->P[cc+3], state->P[ee+12],state->P[ee+10],state->keystream[6]);
step_P(state->P[cc+7], state->P[cc+8], state->P[cc+4], state->P[ee+13],state->P[ee+11],state->keystream[7]);
step_P(state->P[cc+8], state->P[cc+9], state->P[cc+5], state->P[ee+14],state->P[ee+12],state->keystream[8]);
step_P(state->P[cc+9], state->P[cc+10],state->P[cc+6], state->P[ee+15],state->P[ee+13],state->keystream[9]);
step_P(state->P[cc+10],state->P[cc+11],state->P[cc+7], state->P[cc+0], state->P[ee+14],state->keystream[10]);
step_P(state->P[cc+11],state->P[cc+12],state->P[cc+8], state->P[cc+1], state->P[ee+15],state->keystream[11]);
step_P(state->P[cc+12],state->P[cc+13],state->P[cc+9], state->P[cc+2], state->P[cc+0], state->keystream[12]);
step_P(state->P[cc+13],state->P[cc+14],state->P[cc+10],state->P[cc+3], state->P[cc+1], state->keystream[13]);
step_P(state->P[cc+14],state->P[cc+15],state->P[cc+11],state->P[cc+4], state->P[cc+2], state->keystream[14]);
step_P(state->P[cc+15],state->P[dd+0], state->P[cc+12],state->P[cc+5], state->P[cc+3], state->keystream[15]);
}
else
{
step_Q(state->Q[cc+0], state->Q[cc+1], state->Q[ee+13],state->Q[ee+6], state->Q[ee+4], state->keystream[0]);
step_Q(state->Q[cc+1], state->Q[cc+2], state->Q[ee+14],state->Q[ee+7], state->Q[ee+5], state->keystream[1]);
step_Q(state->Q[cc+2], state->Q[cc+3], state->Q[ee+15],state->Q[ee+8], state->Q[ee+6], state->keystream[2]);
step_Q(state->Q[cc+3], state->Q[cc+4], state->Q[cc+0], state->Q[ee+9], state->Q[ee+7], state->keystream[3]);
step_Q(state->Q[cc+4], state->Q[cc+5], state->Q[cc+1], state->Q[ee+10],state->Q[ee+8], state->keystream[4]);
step_Q(state->Q[cc+5], state->Q[cc+6], state->Q[cc+2], state->Q[ee+11],state->Q[ee+9], state->keystream[5]);
step_Q(state->Q[cc+6], state->Q[cc+7], state->Q[cc+3], state->Q[ee+12],state->Q[ee+10],state->keystream[6]);
step_Q(state->Q[cc+7], state->Q[cc+8], state->Q[cc+4], state->Q[ee+13],state->Q[ee+11],state->keystream[7]);
step_Q(state->Q[cc+8], state->Q[cc+9], state->Q[cc+5], state->Q[ee+14],state->Q[ee+12],state->keystream[8]);
step_Q(state->Q[cc+9], state->Q[cc+10],state->Q[cc+6], state->Q[ee+15],state->Q[ee+13],state->keystream[9]);
step_Q(state->Q[cc+10],state->Q[cc+11],state->Q[cc+7], state->Q[cc+0], state->Q[ee+14],state->keystream[10]);
step_Q(state->Q[cc+11],state->Q[cc+12],state->Q[cc+8], state->Q[cc+1], state->Q[ee+15],state->keystream[11]);
step_Q(state->Q[cc+12],state->Q[cc+13],state->Q[cc+9], state->Q[cc+2], state->Q[cc+0], state->keystream[12]);
step_Q(state->Q[cc+13],state->Q[cc+14],state->Q[cc+10],state->Q[cc+3], state->Q[cc+1], state->keystream[13]);
step_Q(state->Q[cc+14],state->Q[cc+15],state->Q[cc+11],state->Q[cc+4], state->Q[cc+2], state->keystream[14]);
step_Q(state->Q[cc+15],state->Q[dd+0], state->Q[cc+12],state->Q[cc+5], state->Q[cc+3], state->keystream[15]);
}
state->counter1024 = (state->counter1024 + 16) & 0x3ff;
}
/*The following defines the initialization functions*/
/*the functions used for expanding the key and iv*/
#define f1(x) (ROTR32((x),7) ^ ROTR32((x),18) ^ ((x) >> 3))
#define f2(x) (ROTR32((x),17) ^ ROTR32((x),19) ^ ((x) >> 10))
#define f(a,b,c,d) (f2((a)) + b + f1((c)) + d)
/*update one element in table P*/
#define update_P(m0,m511,m3,m10,m12){ \
tem0 = ROTR32((m511),23); \
tem1 = ROTR32((m3),10); \
tem2 = ROTR32((m10),8); \
(m0) += tem2+(tem0 ^ tem1); \
h1((m12),tem3); \
(m0) = tem3 ^ (m0) ; \
}
/*update one element in table Q*/
#define update_Q(m0,m511,m3,m10,m12){ \
tem0 = ROTL32((m511),23); \
tem1 = ROTL32((m3),10); \
tem2 = ROTL32((m10),8); \
(m0) += tem2+(tem0 ^ tem1); \
h2((m12),tem3); \
(m0) = tem3 ^ (m0) ; \
}
/*update the state for 16 steps, without generating keystream*/
void UpdateSixteenSteps(HC128_State *state)
{
uint32 tem0,tem1,tem2,tem3;
uint8 a,c;
uint32 cc,dd,ee;
cc = state->counter1024 & 0x1ff;
dd = (cc+16) & 0x1ff;
ee = (cc-16) & 0x1ff;
if (state->counter1024 < 512)
{
update_P(state->P[cc+0], state->P[cc+1], state->P[ee+13],state->P[ee+6], state->P[ee+4]);
update_P(state->P[cc+1], state->P[cc+2], state->P[ee+14],state->P[ee+7], state->P[ee+5]);
update_P(state->P[cc+2], state->P[cc+3], state->P[ee+15],state->P[ee+8], state->P[ee+6]);
update_P(state->P[cc+3], state->P[cc+4], state->P[cc+0], state->P[ee+9], state->P[ee+7]);
update_P(state->P[cc+4], state->P[cc+5], state->P[cc+1], state->P[ee+10],state->P[ee+8]);
update_P(state->P[cc+5], state->P[cc+6], state->P[cc+2], state->P[ee+11],state->P[ee+9]);
update_P(state->P[cc+6], state->P[cc+7], state->P[cc+3], state->P[ee+12],state->P[ee+10]);
update_P(state->P[cc+7], state->P[cc+8], state->P[cc+4], state->P[ee+13],state->P[ee+11]);
update_P(state->P[cc+8], state->P[cc+9], state->P[cc+5], state->P[ee+14],state->P[ee+12]);
update_P(state->P[cc+9], state->P[cc+10],state->P[cc+6], state->P[ee+15],state->P[ee+13]);
update_P(state->P[cc+10],state->P[cc+11],state->P[cc+7], state->P[cc+0], state->P[ee+14]);
update_P(state->P[cc+11],state->P[cc+12],state->P[cc+8], state->P[cc+1], state->P[ee+15]);
update_P(state->P[cc+12],state->P[cc+13],state->P[cc+9], state->P[cc+2], state->P[cc+0]);
update_P(state->P[cc+13],state->P[cc+14],state->P[cc+10],state->P[cc+3], state->P[cc+1]);
update_P(state->P[cc+14],state->P[cc+15],state->P[cc+11],state->P[cc+4], state->P[cc+2]);
update_P(state->P[cc+15],state->P[dd+0], state->P[cc+12],state->P[cc+5], state->P[cc+3]);
}
else
{
update_Q(state->Q[cc+0], state->Q[cc+1], state->Q[ee+13],state->Q[ee+6], state->Q[ee+4]);
update_Q(state->Q[cc+1], state->Q[cc+2], state->Q[ee+14],state->Q[ee+7], state->Q[ee+5]);
update_Q(state->Q[cc+2], state->Q[cc+3], state->Q[ee+15],state->Q[ee+8], state->Q[ee+6]);
update_Q(state->Q[cc+3], state->Q[cc+4], state->Q[cc+0], state->Q[ee+9], state->Q[ee+7]);
update_Q(state->Q[cc+4], state->Q[cc+5], state->Q[cc+1], state->Q[ee+10],state->Q[ee+8]);
update_Q(state->Q[cc+5], state->Q[cc+6], state->Q[cc+2], state->Q[ee+11],state->Q[ee+9]);
update_Q(state->Q[cc+6], state->Q[cc+7], state->Q[cc+3], state->Q[ee+12],state->Q[ee+10]);
update_Q(state->Q[cc+7], state->Q[cc+8], state->Q[cc+4], state->Q[ee+13],state->Q[ee+11]);
update_Q(state->Q[cc+8], state->Q[cc+9], state->Q[cc+5], state->Q[ee+14],state->Q[ee+12]);
update_Q(state->Q[cc+9], state->Q[cc+10],state->Q[cc+6], state->Q[ee+15],state->Q[ee+13]);
update_Q(state->Q[cc+10],state->Q[cc+11],state->Q[cc+7], state->Q[cc+0], state->Q[ee+14]);
update_Q(state->Q[cc+11],state->Q[cc+12],state->Q[cc+8], state->Q[cc+1], state->Q[ee+15]);
update_Q(state->Q[cc+12],state->Q[cc+13],state->Q[cc+9], state->Q[cc+2], state->Q[cc+0]);
update_Q(state->Q[cc+13],state->Q[cc+14],state->Q[cc+10],state->Q[cc+3], state->Q[cc+1]);
update_Q(state->Q[cc+14],state->Q[cc+15],state->Q[cc+11],state->Q[cc+4], state->Q[cc+2]);
update_Q(state->Q[cc+15],state->Q[dd+0], state->Q[cc+12],state->Q[cc+5], state->Q[cc+3]);
}
state->counter1024 = (state->counter1024 + 16) & 0x3ff;
}
/*initialization of the cipher, the key and iv are used to update the state*/
void Initialization(HC128_State *state, uint8 *key, uint8 *iv)
{
uint32 i;
/*expand the key and iv into P and Q*/
for (i = 0; i < 4; i++) {state->P[i] = ((uint32*)key)[i]; state->P[i+4] = ((uint32*)key)[i];}
for (i = 0; i < 4; i++) {state->P[i+8] = ((uint32*)iv)[i]; state->P[i+12] = ((uint32*)iv)[i];}
for (i = 16; i < 256+16; i++)
state->P[i] = f(state->P[i-2],state->P[i-7],state->P[i-15],state->P[i-16])+i; /*generate W[16] ... W[256+16-1] */
for (i = 0; i < 16; i++)
state->P[i] = state->P[i+256];
for (i = 16; i < 512; i++)
state->P[i] = f(state->P[i-2],state->P[i-7],state->P[i-15],state->P[i-16])+256+i; /*generate W[256+16] ... W[256+512-1] */
for (i = 0; i < 16; i++)
state->Q[i] = state->P[512-16+i];
for (i = 16; i < 32; i++)
state->Q[i] = f(state->Q[i-2],state->Q[i-7],state->Q[i-15],state->Q[i-16])+256+512+(i-16); /*generate W[256+512] ... W[256+512+16-1]*/
for (i = 0; i < 16; i++)
state->Q[i] = state->Q[i+16];
for (i = 16; i < 512;i++)
state->Q[i] = f(state->Q[i-2],state->Q[i-7],state->Q[i-15],state->Q[i-16])+768+i; /*generate W[256+512+16] ... W[256+512+512-1]*/
/*initialize counter1024*/
state->counter1024 = 0;
/*run the cipher 1024 steps without generating keystream*/
for (i = 0; i < 64; i++) UpdateSixteenSteps(state);
}
/* encrypt a message
three inputs to this function: cipher state, message, the message length in bytes
one output: ciphertext
*/
void EncryptMessage(HC128_State *state, uint8 *message, uint8 *ciphertext, uint64 msglength)
{
uint64 i;
uint32 j;
/*encrypt a message, each time 64 bytes are encrypted*/
for (i = 0; (i+64) <= msglength; i += 64, message += 64, ciphertext += 64) {
/*generate 16 32-bit keystream and store it in state.keystream*/
SixteenSteps(state);
/*encrypt 64 bytes of the message*/
for (j = 0; j < 16; j++) ((uint32*)ciphertext)[j] = ((uint32*)message)[j] ^ state->keystream[j];
}
/*encrypt the last message block if the message length is not multiple of 64 bytes*/
if ((msglength & 0x3f) != 0) {
SixteenSteps(state);
for (j = 0; j < (msglength & 0x3f); j++) {
*(ciphertext+j) = *(message+j) ^ *( ((uint8*)state->keystream) +j);
}
}
}
/* this function encrypts a message,
there are four inputs to this function: a 128-bit key, a 128-bit iv, a message, the message length in bytes
one output from this function: ciphertext
*/
void HC128(uint8 *key, uint8 *iv, uint8 *message,uint8 *ciphertext, uint64 msglength)
{
HC128_State state;
/*initializing the state*/
Initialization(&state,key,iv);
/*encrypt a message*/
EncryptMessage(&state,message,ciphertext,msglength);
}