forked from microsoft/SymCrypt
-
Notifications
You must be signed in to change notification settings - Fork 0
/
parhash.c
517 lines (460 loc) · 20.3 KB
/
parhash.c
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
//
// ParHash.c
// Code shared with all the parallel hash implementations
//
// Copyright (c) Microsoft Corporation. Licensed under the MIT license.
//
#include "precomp.h"
SYMCRYPT_ERROR
SYMCRYPT_CALL
SymCryptParallelHashProcess_serial(
_In_ PCSYMCRYPT_PARALLEL_HASH pParHash,
_Inout_updates_bytes_( nStates * pParHash->pHash->stateSize ) PVOID pStates,
SIZE_T nStates,
_Inout_updates_( nOperations ) PSYMCRYPT_PARALLEL_HASH_OPERATION pOperations,
SIZE_T nOperations,
_Out_writes_( cbScratch ) PBYTE pbScratch,
SIZE_T cbScratch )
{
SYMCRYPT_ERROR scError = SYMCRYPT_NO_ERROR;
SIZE_T i;
PSYMCRYPT_PARALLEL_HASH_OPERATION op;
PCSYMCRYPT_HASH pHash;
pHash = pParHash->pHash;
op = pOperations;
//
// Wipe the scratch space to detect erroneous callers.
// We do this so that callers that test on a non-parallel platform will work on a platform that does support
// parallel operations.
//
if( cbScratch < pParHash->parScratchFixed + nStates * SYMCRYPT_PARALLEL_HASH_PER_STATE_SCRATCH )
{
scError = SYMCRYPT_BUFFER_TOO_SMALL;
goto cleanup;
}
SymCryptWipeKnownSize( pbScratch, pParHash->parScratchFixed + nStates * SYMCRYPT_PARALLEL_HASH_PER_STATE_SCRATCH );
for( i=0; i<nOperations; i++ )
{
if( op->iHash >= nStates )
{
scError = SYMCRYPT_INVALID_ARGUMENT;
goto cleanup;
}
switch( op->hashOperation )
{
case SYMCRYPT_HASH_OPERATION_APPEND:
(*pHash->appendFunc)( (PBYTE)pStates + pHash->stateSize * op->iHash, op->pbBuffer, op->cbBuffer );
break;
case SYMCRYPT_HASH_OPERATION_RESULT:
if( op->cbBuffer != pHash->resultSize )
{
scError = SYMCRYPT_INVALID_ARGUMENT;
goto cleanup;
}
(*pHash->resultFunc)( (PBYTE)pStates + pHash->stateSize * op->iHash, op->pbBuffer );
break;
default:
scError = SYMCRYPT_INVALID_ARGUMENT;
goto cleanup;
}
op++;
}
cleanup:
return scError;
}
//
// This function looks at a state and decides what to do.
// If it returns FALSE, then this state is done and no further processing is required.
// If it returns TRUE, the pbData/cbData have to be processed in parallel.
// This function is called again on the same state after the pbData/cbData have been processed.
//
// Internally, it keeps track of the next step to be taken for this state.
// the processingState keeps track of the next action to take.
//
//
// An enum to keep track of the state of a request block
//
BOOLEAN
SYMCRYPT_CALL
SymCryptParallelHashSetNextWork( PCSYMCRYPT_PARALLEL_HASH pParHash, PSYMCRYPT_PARALLEL_HASH_SCRATCH_STATE pScratch )
{
PSYMCRYPT_COMMON_HASH_STATE pState;
PCSYMCRYPT_HASH pHash;
PCSYMRYPT_PARALLEL_HASH_OPERATION pOp;
SIZE_T bytesInBuffer;
SIZE_T todo;
BOOLEAN res;
// Retrieve the state we will operate on.
pState = (PSYMCRYPT_COMMON_HASH_STATE) pScratch->hashState;
pHash = pParHash->pHash;
//
// This is a state machine where some states have to iterate
// The loop allows them to use 'continue' for that.
//
#pragma warning( suppress: 4127 ) // conditional expression is constant
while( TRUE )
{
//
// At this point, the processing state, pbData/cbData, and next pointer define what needs to be done.
// STATE_NEXT: cbData == 0 and we have to process the remaining operations.
// STATE_DATA_START: We are working on the next operation; the first BytesAlreadyProcessed have been hashed,
// and the hash state has an empty buffer.
// STATE_DATA_END: We are working on the next operation (an append), and pbData/cbData have whatever partial block remains
// after all the whole blocks have been processed.
// STATE_PAD2: We are working on the next operation (a result), and have processed the first half of a 2-block padding.
// STATE_RESULT: We are working on the next operation (a result), and have processed all the padding.
//
// The pState->dataLength is updated whenver we copy bytes from the append into the state's buffer, or when
// we return TRUE and process bulk data.
//
pOp = pScratch->next;
switch( pScratch->processingState )
{
case STATE_NEXT:
if( pOp == NULL )
{
return FALSE;
}
bytesInBuffer = pState->bytesInBuffer;
// SYMCRYPT_ASSERT( pOp->cbBuffer < ((SIZE_T)-1)/2 ); // used during testing
if( pOp->hashOperation == SYMCRYPT_HASH_OPERATION_APPEND )
{
pState->dataLengthL += pOp->cbBuffer;
if( pState->dataLengthL < pOp->cbBuffer ) {
pState->dataLengthH ++; // This is almost-unreachable code as it requires 2^64 bytes to be hashed.
}
if( bytesInBuffer > 0 )
{
SYMCRYPT_ASSERT( pHash->inputBlockSize > bytesInBuffer );
todo = SYMCRYPT_MIN( pHash->inputBlockSize - bytesInBuffer, pOp->cbBuffer );
memcpy( &pState->buffer[bytesInBuffer], pOp->pbBuffer, todo );
pState->bytesInBuffer += (UINT32) todo;
if( pState->bytesInBuffer == pHash->inputBlockSize )
{
//
// We filled the buffer; set it for processing.
// Remember the # bytes we did and set the next state to process the rest of the request.
//
pScratch->pbData = &pState->buffer[0];
pScratch->cbData = pHash->inputBlockSize;
if( todo == pOp->cbBuffer )
{
//
// We finished the request after the pbData processing
//
pScratch->next = pOp->next;
// pScratch->processingState = STATE_NEXT // already has that value
} else {
pScratch->processingState = STATE_DATA_START;
SYMCRYPT_ASSERT( todo <= 0xff );
pScratch->bytesAlreadyProcessed = (BYTE) todo;
}
pState->bytesInBuffer = 0; // it will be after we process the block
return TRUE;
} else {
//
// We finished the operation; skip to the next one.
//
pScratch->next = pOp->next;
// pScratch->processingState = STATE_NEXT // already has that value
continue;
}
} else {
//
// Buffer is empty; process the bulk data
//
pScratch->pbData = pOp->pbBuffer;
pScratch->cbData = pOp->cbBuffer;
pScratch->processingState = STATE_DATA_END;
//
// Return TRUE if there is real data to process, and just re-run the state
// machine if we should copy the partial block to the buffer.
//
if( pScratch->cbData >= pHash->inputBlockSize )
{
return TRUE;
} else {
continue;
}
}
} else {
SYMCRYPT_ASSERT( pOp->hashOperation == SYMCRYPT_HASH_OPERATION_RESULT );
if( (*pParHash->parResult1Func)( pParHash, pState, pScratch, &res ) )
{
return res;
}
}
break;
case STATE_DATA_START:
//
// The next operation is an append, and the first few bytes of that operation have already been copied to
// the buffer and processed. We need to process the rest.
// Note that the # bytes remaining is never zero.
//
SYMCRYPT_ASSERT( pOp->hashOperation == SYMCRYPT_HASH_OPERATION_APPEND && pOp->cbBuffer >= pScratch->bytesAlreadyProcessed );
pScratch->pbData = pOp->pbBuffer + pScratch->bytesAlreadyProcessed;
pScratch->cbData = pOp->cbBuffer - pScratch->bytesAlreadyProcessed;
if( pScratch->cbData >= pHash->inputBlockSize )
{
pScratch->processingState = STATE_DATA_END;
return TRUE;
}
//
// We have less than one block left; this is exactly the same state as we have at the end of
// a normal append. Fall through to that code.
//
// FALLTHROUGH!
case STATE_DATA_END:
//
// We finished processing the whole blocks of the pScratch->pbData, and have to process the rest.
// The current append is already popped off the work list.
//
if( pScratch->cbData > 0 )
{
SYMCRYPT_ASSERT( pScratch->cbData < pHash->inputBlockSize );
memcpy( &pState->buffer[0], pScratch->pbData, pScratch->cbData );
pState->bytesInBuffer = (UINT32) pScratch->cbData;
}
pScratch->next = pOp->next;
pScratch->processingState = STATE_NEXT;
continue;
case STATE_RESULT2:
if( (*pParHash->parResult2Func)( pParHash, pState, pScratch, &res ) )
{
return res;
}
continue;
case STATE_RESULT_DONE:
(*pParHash->parResultDoneFunc)( pParHash, pState, pOp );
pScratch->next = pOp->next;
pScratch->processingState = STATE_NEXT;
continue;
}
}
return FALSE;
}
//
// Comparison function used to sort the work into largest-first order.
//
int SYMCRYPT_CDECL
compareRequestSize( PCVOID p1, PCVOID p2 )
{
PSYMCRYPT_PARALLEL_HASH_SCRATCH_STATE * pp1 = (PSYMCRYPT_PARALLEL_HASH_SCRATCH_STATE *) p1;
PSYMCRYPT_PARALLEL_HASH_SCRATCH_STATE * pp2 = (PSYMCRYPT_PARALLEL_HASH_SCRATCH_STATE *) p2;
UINT64 c1 = (*pp1)->bytes;
UINT64 c2 = (*pp2)->bytes;
//
// This is 'reverse' compare function as we want the largest item first.
//
if( c1 < c2 )
{
return 1;
} else if( c1 > c2 )
{
return -1;
} else {
return 0;
}
}
SYMCRYPT_ERROR
SYMCRYPT_CALL
SymCryptParallelHashProcess(
_In_ PCSYMCRYPT_PARALLEL_HASH pParHash,
_Inout_updates_bytes_( nStates * pParHash->pHash->stateSize ) PVOID pStates,
SIZE_T nStates,
_Inout_updates_( nOperations ) PSYMCRYPT_PARALLEL_HASH_OPERATION pOperations,
SIZE_T nOperations,
_Out_writes_( cbScratch ) PBYTE pbScratch,
SIZE_T cbScratch,
UINT32 maxParallel )
{
SYMCRYPT_ERROR scError = SYMCRYPT_NO_ERROR;
PSYMCRYPT_PARALLEL_HASH_SCRATCH_STATE pScratchState;
PSYMCRYPT_PARALLEL_HASH_SCRATCH_STATE * pWork;
SIZE_T nWork;
PSYMCRYPT_PARALLEL_HASH_OPERATION pOp;
PSYMCRYPT_PARALLEL_HASH_SCRATCH_STATE pSc;
SIZE_T i;
UINT64 singleSize;
BOOLEAN sameSize;
SIZE_T nPar;
PSYMCRYPT_PARALLEL_HASH_SCRATCH_STATE * pNextWork;
SIZE_T todo;
SIZE_T nBytes;
PBYTE pbScratchEnd;
PBYTE pbFixedScratch;
SIZE_T cbFixedScratch;
PCSYMCRYPT_HASH pHash;
if( nOperations == 0 )
{
goto cleanup;
}
pHash = pParHash->pHash;
//
// The caller passes us a scratch buffer. We split that into the following pieces:
//
// <alignment space to SYMCRYPT_ALIGN_VALUE>
// SYMCRYPT_PARALLEL_HASH_SCRATCH pScratchState[ nStates ]
// PSYMCRYPT_PARALLEL_HASH_SCRATCH pWork[ nStates ]
// <alignment space to SYMCRYPT_SIMD_ELEMENT_SIZE>
// scratch space for parallel function
//
pbScratchEnd = pbScratch + cbScratch;
pScratchState = (PSYMCRYPT_PARALLEL_HASH_SCRATCH_STATE) SYMCRYPT_ALIGN_UP( pbScratch );
pWork = (PSYMCRYPT_PARALLEL_HASH_SCRATCH_STATE *) (pScratchState + nStates);
pbFixedScratch = (PBYTE)((((UINT_PTR)(pWork + nStates)) + SYMCRYPT_SIMD_ELEMENT_SIZE - 1) & ~(SYMCRYPT_SIMD_ELEMENT_SIZE - 1));
cbFixedScratch = pParHash->parScratchFixed;
if( pbFixedScratch + cbFixedScratch > pbScratchEnd )
{
scError = SYMCRYPT_BUFFER_TOO_SMALL;
goto cleanup;
}
//
// Wipe the scratch state; this sets the pointers to NULL, and the byte counts to 0.
//
memset( pScratchState, 0, nStates * sizeof( *pScratchState ));
nWork = 0;
//
// The general data structure is as follows.
// For each hash state, we keep our administration in the pScratchState[i]. This contains a pointer to the actual
// hash state, a pointer to a linked list of operations to be performed on this state, pointer/length of the
// current data to be processed, and a few more administrative items.
// We also keep the pWork array of pointers to our scratch states, which contains all the states that still need
// work to be done.
//
// We process over the operations in reverse order to make it easy to build a forward single-linked list
//
pOp = &pOperations[ nOperations ];
while( pOp > pOperations )
{
pOp--;
if( pOp->iHash >= nStates )
{
scError = SYMCRYPT_INVALID_ARGUMENT;
goto cleanup;
}
pSc = &pScratchState[ pOp->iHash ];
if( pSc->hashState == NULL )
{
//
// We found a new state that is being modified by this set of operations.
// Set the pointer to the hash state, and add it to the work list.
//
SYMCRYPT_ASSERT( nWork < nStates );
pSc->hashState = (PBYTE) pStates + pHash->stateSize * pOp->iHash;
pWork[nWork] = pSc;
nWork++;
}
//
// We estimate how much work we have to do on each state, so that we can start on the largest ones
// and be more efficient.
//
if( pOp->hashOperation == SYMCRYPT_HASH_OPERATION_APPEND )
{
pSc->bytes += pOp->cbBuffer;
} else if( pOp->hashOperation == SYMCRYPT_HASH_OPERATION_RESULT )
{
//
// The result could be a 1 or 2-block operation; but it is mostly a 1-block one so that is what we budget.
//
pSc->bytes += pHash->inputBlockSize;
} else {
scError = SYMCRYPT_INVALID_ARGUMENT;
goto cleanup;
}
//
// Add the operation to the list of operations for this state
//
pOp->next = pSc->next;
pSc->next = pOp;
}
//
// We have built all the structures.
// Run the SetNextWork on each of them, and drop the ones that don't have work.
// Also detect whether they are all the same size so that we can avoid the sorting cost.
//
SYMCRYPT_ASSERT( nWork > 0 );
singleSize = (*pWork)->bytes;
sameSize = TRUE;
i = 0;
while( i < nWork )
{
if( !SymCryptParallelHashSetNextWork( pParHash, pWork[i] ) )
{
pWork[i] = pWork[nWork-1];
nWork--;
continue;
}
if( pWork[i]->bytes != singleSize )
{
sameSize = FALSE;
}
i++;
}
if( !sameSize )
{
qsort( pWork, nWork, sizeof( *pWork ), &compareRequestSize );
}
nPar = SYMCRYPT_MIN( nWork, maxParallel ); // # parallel states we currently work on
pNextWork = pWork + nPar; // next work pointer.
while( nWork > 0 )
{
todo = pWork[0]->cbData;
for( i=1; i<nPar; i++ )
{
todo = SYMCRYPT_MIN( todo, pWork[i]->cbData );
}
nBytes = todo & ~(pHash->inputBlockSize - 1 );
(*pParHash->parAppendFunc)( pWork, nPar, nBytes, pbFixedScratch, cbFixedScratch );
for( i=0; i<nPar; i++ )
{
if( pWork[i]->cbData < pHash->inputBlockSize )
{
//
// Once we start a request we finish it; this is not optimal.
// It would be better to switch things around a bit, but that is much more complicated.
// Example: suppose we can do 4-parallel and have requests of size
// 9 8 7 6 6 6
// Our code does
// Process first 4 of # blocks Resulting state
// 9 8 7 6 / 6 6 6 3 2 1 - / 6 6
// 6 3 2 1 / 6 1 5 2 1 0 / 6
// 6 more to finish for a total of 13 blocks.
//
// Better would be:
// Process first 4 of # blocks Resulting state
// 9 8 7 6 / 6 6 6 3 2 1 - / 6 6
// 6 6 3 2 / 1 - 2 4 4 1 0 / 1
// 4 more to finish for total of 12 blocks.
//
// Or even better:
// Process first 4 of # blocks Resulting state
// 9 8 7 6 / 6 6 5 4 3 2 1 / 6 6
// 6 6 4 3 / 2 1 3 3 3 1 - / 2 1
// 3 3 2 1 / 1 - 1 2 2 1 - / 1 -
// 2 more to finish for a total of 11 blocks.
// Note that this last one requires the interruption of a started hash computation.
//
if( !SymCryptParallelHashSetNextWork( pParHash, pWork[i] ))
{
if( nWork > nPar )
{
pWork[i] = *pNextWork++;
nWork--;
} else {
//
// Ugly: copy the last item here, and wind back the loop counter
// by one so that we will process the last item again.
//
pWork[i] = pWork[ --nPar ];
i--;
nWork--;
}
}
}
}
}
SymCryptWipe( pbFixedScratch, cbFixedScratch );
cleanup:
return scError;
}