2 * GF-Complete: A Comprehensive Open Source Library for Galois Field Arithmetic
3 * James S. Plank, Ethan L. Miller, Kevin M. Greenan,
4 * Benjamin A. Arnold, John A. Burnum, Adam W. Disney, Allen C. McBride.
8 * This file has helper routines for doing basic GF operations with any
9 * legal value of w. The problem is that w <= 32, w=64 and w=128 all have
10 * different data types, which is a pain. The procedures in this file try
11 * to alleviate that pain. They are used in gf_unit and gf_time.
22 #include "gf_complete.h"
24 #include "gf_method.h"
26 #include "gf_general.h"
28 void gf_general_set_zero(gf_general_t *v, int w)
40 void gf_general_set_one(gf_general_t *v, int w)
52 void gf_general_set_two(gf_general_t *v, int w)
64 int gf_general_is_zero(gf_general_t *v, int w)
71 return (v->w128[0] == 0 && v->w128[1] == 0);
75 int gf_general_is_one(gf_general_t *v, int w)
82 return (v->w128[0] == 0 && v->w128[1] == 1);
86 void gf_general_set_random(gf_general_t *v, int w, int zero_ok)
89 v->w32 = MOA_Random_W(w, zero_ok);
92 v->w64 = MOA_Random_64();
93 if (v->w64 != 0 || zero_ok) return;
97 MOA_Random_128(v->w128);
98 if (v->w128[0] != 0 || v->w128[1] != 0 || zero_ok) return;
103 void gf_general_val_to_s(gf_general_t *v, int w, char *s, int hex)
107 sprintf(s, "%x", v->w32);
109 sprintf(s, "%u", v->w32);
111 } else if (w <= 64) {
113 sprintf(s, "%llx", (long long unsigned int) v->w64);
115 sprintf(s, "%lld", (long long unsigned int) v->w64);
118 if (v->w128[0] == 0) {
119 sprintf(s, "%llx", (long long unsigned int) v->w128[1]);
121 sprintf(s, "%llx%016llx", (long long unsigned int) v->w128[0],
122 (long long unsigned int) v->w128[1]);
127 int gf_general_s_to_val(gf_general_t *v, int w, char *s, int hex)
134 if (sscanf(s, "%x", &(v->w32)) == 0) return 0;
136 if (sscanf(s, "%u", &(v->w32)) == 0) return 0;
138 if (w == 32) return 1;
140 if (v->w32 & ((gf_val_32_t)1 << 31)) return 0;
143 if (v->w32 & ~((1 << w)-1)) return 0;
145 } else if (w <= 64) {
146 if (hex) return (sscanf(s, "%llx", (long long unsigned int *) (&(v->w64))) == 1);
147 return (sscanf(s, "%lld", (long long int *) (&(v->w64))) == 1);
153 return (sscanf(s, "%llx", (long long unsigned int *) (&(v->w128[1]))) == 1);
155 if (l > 32) return 0;
158 if (sscanf(s, "%llx", (long long unsigned int *) (&(v->w128[0]))) == 0) {
162 return (sscanf(s+(l-16), "%llx", (long long unsigned int *) (&(v->w128[1]))) == 1);
167 void gf_general_add(gf_t *gf, gf_general_t *a, gf_general_t *b, gf_general_t *c)
172 h = (gf_internal_t *) gf->scratch;
176 c->w32 = a->w32 ^ b->w32;
177 } else if (w <= 64) {
178 c->w64 = a->w64 ^ b->w64;
180 c->w128[0] = a->w128[0] ^ b->w128[0];
181 c->w128[1] = a->w128[1] ^ b->w128[1];
185 void gf_general_multiply(gf_t *gf, gf_general_t *a, gf_general_t *b, gf_general_t *c)
190 h = (gf_internal_t *) gf->scratch;
194 c->w32 = gf->multiply.w32(gf, a->w32, b->w32);
195 } else if (w <= 64) {
196 c->w64 = gf->multiply.w64(gf, a->w64, b->w64);
198 gf->multiply.w128(gf, a->w128, b->w128, c->w128);
202 void gf_general_divide(gf_t *gf, gf_general_t *a, gf_general_t *b, gf_general_t *c)
207 h = (gf_internal_t *) gf->scratch;
211 c->w32 = gf->divide.w32(gf, a->w32, b->w32);
212 } else if (w <= 64) {
213 c->w64 = gf->divide.w64(gf, a->w64, b->w64);
215 gf->divide.w128(gf, a->w128, b->w128, c->w128);
219 void gf_general_inverse(gf_t *gf, gf_general_t *a, gf_general_t *b)
224 h = (gf_internal_t *) gf->scratch;
228 b->w32 = gf->inverse.w32(gf, a->w32);
229 } else if (w <= 64) {
230 b->w64 = gf->inverse.w64(gf, a->w64);
232 gf->inverse.w128(gf, a->w128, b->w128);
236 int gf_general_are_equal(gf_general_t *v1, gf_general_t *v2, int w)
239 return (v1->w32 == v2->w32);
240 } else if (w <= 64) {
241 return (v1->w64 == v2->w64);
243 return (v1->w128[0] == v2->w128[0] &&
244 v1->w128[1] == v2->w128[1]);
248 void gf_general_do_region_multiply(gf_t *gf, gf_general_t *a, void *ra, void *rb, int bytes, int xor)
253 h = (gf_internal_t *) gf->scratch;
257 gf->multiply_region.w32(gf, ra, rb, a->w32, bytes, xor);
258 } else if (w <= 64) {
259 gf->multiply_region.w64(gf, ra, rb, a->w64, bytes, xor);
261 gf->multiply_region.w128(gf, ra, rb, a->w128, bytes, xor);
265 void gf_general_do_region_check(gf_t *gf, gf_general_t *a, void *orig_a, void *orig_target, void *final_target, int bytes, int xor)
269 gf_general_t oa, ot, ft, sb;
270 char sa[50], soa[50], sot[50], sft[50], ssb[50];
272 h = (gf_internal_t *) gf->scratch;
275 words = (bytes * 8) / w;
276 for (i = 0; i < words; i++) {
278 oa.w32 = gf->extract_word.w32(gf, orig_a, bytes, i);
279 ot.w32 = gf->extract_word.w32(gf, orig_target, bytes, i);
280 ft.w32 = gf->extract_word.w32(gf, final_target, bytes, i);
281 sb.w32 = gf->multiply.w32(gf, a->w32, oa.w32);
282 if (xor) sb.w32 ^= ot.w32;
283 } else if (w <= 64) {
284 oa.w64 = gf->extract_word.w64(gf, orig_a, bytes, i);
285 ot.w64 = gf->extract_word.w64(gf, orig_target, bytes, i);
286 ft.w64 = gf->extract_word.w64(gf, final_target, bytes, i);
287 sb.w64 = gf->multiply.w64(gf, a->w64, oa.w64);
288 if (xor) sb.w64 ^= ot.w64;
290 gf->extract_word.w128(gf, orig_a, bytes, i, oa.w128);
291 gf->extract_word.w128(gf, orig_target, bytes, i, ot.w128);
292 gf->extract_word.w128(gf, final_target, bytes, i, ft.w128);
293 gf->multiply.w128(gf, a->w128, oa.w128, sb.w128);
295 sb.w128[0] ^= ot.w128[0];
296 sb.w128[1] ^= ot.w128[1];
300 if (!gf_general_are_equal(&ft, &sb, w)) {
302 fprintf(stderr,"Problem with region multiply (all values in hex):\n");
303 fprintf(stderr," Target address base: 0x%lx. Word 0x%x of 0x%x. Xor: %d\n",
304 (unsigned long) final_target, i, words, xor);
305 gf_general_val_to_s(a, w, sa, 1);
306 gf_general_val_to_s(&oa, w, soa, 1);
307 gf_general_val_to_s(&ot, w, sot, 1);
308 gf_general_val_to_s(&ft, w, sft, 1);
309 gf_general_val_to_s(&sb, w, ssb, 1);
310 fprintf(stderr," Value: %s\n", sa);
311 fprintf(stderr," Original source word: %s\n", soa);
312 if (xor) fprintf(stderr," XOR with target word: %s\n", sot);
313 fprintf(stderr," Product word: %s\n", sft);
314 fprintf(stderr," It should be: %s\n", ssb);
320 void gf_general_set_up_single_timing_test(int w, void *ra, void *rb, int size)
330 top = (uint8_t *)rb+size;
332 /* If w is 8, 16, 32, 64 or 128, fill the regions with random bytes.
333 However, don't allow for zeros in rb, because that will screw up
336 When w is 4, you fill the regions with random 4-bit words in each byte.
338 Otherwise, treat every four bytes as an uint32_t
339 and fill it with a random value mod (1 << w).
342 if (w == 8 || w == 16 || w == 32 || w == 64 || w == 128) {
343 MOA_Fill_Random_Region (ra, size);
345 gf_general_set_random(&g, w, 0);
352 r16 = (uint16_t *) rb;
356 r32 = (uint32_t *) rb;
360 r64 = (uint64_t *) rb;
364 r64 = (uint64_t *) rb;
369 rb = (uint8_t *)rb + (w/8);
372 r8a = (uint8_t *) ra;
374 while (r8 < (uint8_t *) top) {
375 gf_general_set_random(&g, w, 1);
377 gf_general_set_random(&g, w, 0);
383 r32 = (uint32_t *) ra;
384 for (i = 0; i < size/4; i++) r32[i] = MOA_Random_W(w, 1);
385 r32 = (uint32_t *) rb;
386 for (i = 0; i < size/4; i++) r32[i] = MOA_Random_W(w, 0);
390 /* This sucks, but in order to time, you really need to avoid putting ifs in
391 the inner loops. So, I'm doing a separate timing test for each w:
392 (4 & 8), 16, 32, 64, 128 and everything else. Fortunately, the "everything else"
393 tests can be equivalent to w=32.
395 I'm also putting the results back into ra, because otherwise, the optimizer might
396 figure out that we're not really doing anything in the inner loops and it
399 int gf_general_do_single_timing_test(gf_t *gf, void *ra, void *rb, int size, char test)
403 uint8_t *r8a, *r8b, *top8;
404 uint16_t *r16a, *r16b, *top16;
405 uint32_t *r32a, *r32b, *top32;
406 uint64_t *r64a, *r64b, *top64, *r64c;
409 h = (gf_internal_t *) gf->scratch;
411 top = (uint8_t *)ra + size;
413 if (w == 8 || w == 4) {
414 r8a = (uint8_t *) ra;
415 r8b = (uint8_t *) rb;
416 top8 = (uint8_t *) top;
419 *r8a = gf->multiply.w32(gf, *r8a, *r8b);
423 } else if (test == 'D') {
425 *r8a = gf->divide.w32(gf, *r8a, *r8b);
429 } else if (test == 'I') {
431 *r8a = gf->inverse.w32(gf, *r8a);
435 return (top8 - (uint8_t *) ra);
439 r16a = (uint16_t *) ra;
440 r16b = (uint16_t *) rb;
441 top16 = (uint16_t *) top;
443 while (r16a < top16) {
444 *r16a = gf->multiply.w32(gf, *r16a, *r16b);
448 } else if (test == 'D') {
449 while (r16a < top16) {
450 *r16a = gf->divide.w32(gf, *r16a, *r16b);
454 } else if (test == 'I') {
455 while (r16a < top16) {
456 *r16a = gf->inverse.w32(gf, *r16a);
460 return (top16 - (uint16_t *) ra);
463 r32a = (uint32_t *) ra;
464 r32b = (uint32_t *) rb;
465 top32 = (uint32_t *) ra + (size/4); /* This is for the "everything elses" */
468 while (r32a < top32) {
469 *r32a = gf->multiply.w32(gf, *r32a, *r32b);
473 } else if (test == 'D') {
474 while (r32a < top32) {
475 *r32a = gf->divide.w32(gf, *r32a, *r32b);
479 } else if (test == 'I') {
480 while (r32a < top32) {
481 *r32a = gf->inverse.w32(gf, *r32a);
485 return (top32 - (uint32_t *) ra);
488 r64a = (uint64_t *) ra;
489 r64b = (uint64_t *) rb;
490 top64 = (uint64_t *) top;
492 while (r64a < top64) {
493 *r64a = gf->multiply.w64(gf, *r64a, *r64b);
497 } else if (test == 'D') {
498 while (r64a < top64) {
499 *r64a = gf->divide.w64(gf, *r64a, *r64b);
503 } else if (test == 'I') {
504 while (r64a < top64) {
505 *r64a = gf->inverse.w64(gf, *r64a);
509 return (top64 - (uint64_t *) ra);
512 r64a = (uint64_t *) ra;
515 r64b = (uint64_t *) rb;
516 top64 = (uint64_t *) top;
517 rv = (top64 - r64a)/2;
519 while (r64a < top64) {
520 gf->multiply.w128(gf, r64a, r64b, r64c);
524 } else if (test == 'D') {
525 while (r64a < top64) {
526 gf->divide.w128(gf, r64a, r64b, r64c);
530 } else if (test == 'I') {
531 while (r64a < top64) {
532 gf->inverse.w128(gf, r64a, r64c);