2 * Copyright (c) 2014, James S. Plank and Kevin Greenan
5 * Jerasure - A C/C++ Library for a Variety of Reed-Solomon and RAID-6 Erasure
8 * Revision 2.0: Galois Field backend now links to GF-Complete
10 * Redistribution and use in source and binary forms, with or without
11 * modification, are permitted provided that the following conditions
14 * - Redistributions of source code must retain the above copyright
15 * notice, this list of conditions and the following disclaimer.
17 * - Redistributions in binary form must reproduce the above copyright
18 * notice, this list of conditions and the following disclaimer in
19 * the documentation and/or other materials provided with the
22 * - Neither the name of the University of Tennessee nor the names of its
23 * contributors may be used to endorse or promote products derived
24 * from this software without specific prior written permission.
26 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
27 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
28 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
29 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
30 * HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
31 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
32 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS
33 * OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
34 * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
35 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY
36 * WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
37 * POSSIBILITY OF SUCH DAMAGE.
40 /* Jerasure's authors:
42 Revision 2.x - 2014: James S. Plank and Kevin M. Greenan
43 Revision 1.2 - 2008: James S. Plank, Scott Simmerman and Catherine D. Schuman.
44 Revision 1.0 - 2007: James S. Plank
55 #define talloc(type, num) (type *) malloc(sizeof(type)*(num))
57 static double jerasure_total_xor_bytes = 0;
58 static double jerasure_total_gf_bytes = 0;
59 static double jerasure_total_memcpy_bytes = 0;
61 void jerasure_print_matrix(int *m, int rows, int cols, int w)
72 sprintf(s, "%u", w2-1);
76 for (i = 0; i < rows; i++) {
77 for (j = 0; j < cols; j++) {
78 if (j != 0) printf(" ");
79 printf("%*u", fw, m[i*cols+j]);
85 void jerasure_print_bitmatrix(int *m, int rows, int cols, int w)
89 for (i = 0; i < rows; i++) {
90 if (i != 0 && i%w == 0) printf("\n");
91 for (j = 0; j < cols; j++) {
92 if (j != 0 && j%w == 0) printf(" ");
93 printf("%d", m[i*cols+j]);
99 int jerasure_make_decoding_matrix(int k, int m, int w, int *matrix, int *erased, int *decoding_matrix, int *dm_ids)
104 for (i = 0; j < k; i++) {
105 if (erased[i] == 0) {
111 tmpmat = talloc(int, k*k);
112 if (tmpmat == NULL) { return -1; }
113 for (i = 0; i < k; i++) {
115 for (j = 0; j < k; j++) tmpmat[i*k+j] = 0;
116 tmpmat[i*k+dm_ids[i]] = 1;
118 for (j = 0; j < k; j++) {
119 tmpmat[i*k+j] = matrix[(dm_ids[i]-k)*k+j];
124 i = jerasure_invert_matrix(tmpmat, decoding_matrix, k, w);
129 /* Internal Routine */
130 int jerasure_make_decoding_bitmatrix(int k, int m, int w, int *matrix, int *erased, int *decoding_matrix, int *dm_ids)
136 for (i = 0; j < k; i++) {
137 if (erased[i] == 0) {
143 tmpmat = talloc(int, k*k*w*w);
144 if (tmpmat == NULL) { return -1; }
145 for (i = 0; i < k; i++) {
148 for (j = 0; j < k*w*w; j++) tmpmat[index+j] = 0;
149 index = i*k*w*w+dm_ids[i]*w;
150 for (j = 0; j < w; j++) {
156 mindex = (dm_ids[i]-k)*k*w*w;
157 for (j = 0; j < k*w*w; j++) {
158 tmpmat[index+j] = matrix[mindex+j];
163 i = jerasure_invert_bitmatrix(tmpmat, decoding_matrix, k*w);
168 int jerasure_matrix_decode(int k, int m, int w, int *matrix, int row_k_ones, int *erasures,
169 char **data_ptrs, char **coding_ptrs, int size)
171 int i, edd, lastdrive;
173 int *erased, *decoding_matrix, *dm_ids;
175 if (w != 8 && w != 16 && w != 32) return -1;
177 erased = jerasure_erasures_to_erased(k, m, erasures);
178 if (erased == NULL) return -1;
180 /* Find the number of data drives failed */
185 for (i = 0; i < k; i++) {
192 /* You only need to create the decoding matrix in the following cases:
194 1. edd > 0 and row_k_ones is false.
195 2. edd > 0 and row_k_ones is true and coding device 0 has been erased.
198 We're going to use lastdrive to denote when to stop decoding data.
199 At this point in the code, it is equal to the last erased data device.
200 However, if we can't use the parity row to decode it (i.e. row_k_ones=0
201 or erased[k] = 1, we're going to set it to k so that the decoding
202 pass will decode all data.
205 if (!row_k_ones || erased[k]) lastdrive = k;
208 decoding_matrix = NULL;
210 if (edd > 1 || (edd > 0 && (!row_k_ones || erased[k]))) {
211 dm_ids = talloc(int, k);
212 if (dm_ids == NULL) {
217 decoding_matrix = talloc(int, k*k);
218 if (decoding_matrix == NULL) {
224 if (jerasure_make_decoding_matrix(k, m, w, matrix, erased, decoding_matrix, dm_ids) < 0) {
227 free(decoding_matrix);
232 /* Decode the data drives.
233 If row_k_ones is true and coding device 0 is intact, then only decode edd-1 drives.
234 This is done by stopping at lastdrive.
235 We test whether edd > 0 so that we can exit the loop early if we're done.
238 for (i = 0; edd > 0 && i < lastdrive; i++) {
240 jerasure_matrix_dotprod(k, w, decoding_matrix+(i*k), dm_ids, i, data_ptrs, coding_ptrs, size);
245 /* Then if necessary, decode drive lastdrive */
248 tmpids = talloc(int, k);
252 free(decoding_matrix);
255 for (i = 0; i < k; i++) {
256 tmpids[i] = (i < lastdrive) ? i : i+1;
258 jerasure_matrix_dotprod(k, w, matrix, tmpids, lastdrive, data_ptrs, coding_ptrs, size);
262 /* Finally, re-encode any erased coding devices */
264 for (i = 0; i < m; i++) {
266 jerasure_matrix_dotprod(k, w, matrix+(i*k), NULL, i+k, data_ptrs, coding_ptrs, size);
271 if (dm_ids != NULL) free(dm_ids);
272 if (decoding_matrix != NULL) free(decoding_matrix);
278 int *jerasure_matrix_to_bitmatrix(int k, int m, int w, int *matrix)
281 int rowelts, rowindex, colindex, elt, i, j, l, x;
283 if (matrix == NULL) { return NULL; }
285 bitmatrix = talloc(int, k*m*w*w);
286 if (!bitmatrix) return NULL;
291 for (i = 0; i < m; i++) {
293 for (j = 0; j < k; j++) {
295 for (x = 0; x < w; x++) {
296 for (l = 0; l < w; l++) {
297 bitmatrix[colindex+x+l*rowelts] = ((elt & (1 << l)) ? 1 : 0);
299 elt = galois_single_multiply(elt, 2, w);
303 rowindex += rowelts * w;
308 void jerasure_matrix_encode(int k, int m, int w, int *matrix,
309 char **data_ptrs, char **coding_ptrs, int size)
313 if (w != 8 && w != 16 && w != 32) {
314 fprintf(stderr, "ERROR: jerasure_matrix_encode() and w is not 8, 16 or 32\n");
318 for (i = 0; i < m; i++) {
319 jerasure_matrix_dotprod(k, w, matrix+(i*k), NULL, k+i, data_ptrs, coding_ptrs, size);
323 void jerasure_bitmatrix_dotprod(int k, int w, int *bitmatrix_row,
324 int *src_ids, int dest_id,
325 char **data_ptrs, char **coding_ptrs, int size, int packetsize)
327 int j, sindex, pstarted, index, x, y;
328 char *dptr, *pptr, *bdptr, *bpptr;
330 if (size%(w*packetsize) != 0) {
331 fprintf(stderr, "jerasure_bitmatrix_dotprod - size%c(w*packetsize)) must = 0\n", '%');
335 bpptr = (dest_id < k) ? data_ptrs[dest_id] : coding_ptrs[dest_id-k];
337 for (sindex = 0; sindex < size; sindex += (packetsize*w)) {
339 for (j = 0; j < w; j++) {
341 pptr = bpptr + sindex + j*packetsize;
342 for (x = 0; x < k; x++) {
343 if (src_ids == NULL) {
344 bdptr = data_ptrs[x];
345 } else if (src_ids[x] < k) {
346 bdptr = data_ptrs[src_ids[x]];
348 bdptr = coding_ptrs[src_ids[x]-k];
350 for (y = 0; y < w; y++) {
351 if (bitmatrix_row[index]) {
352 dptr = bdptr + sindex + y*packetsize;
354 memcpy(pptr, dptr, packetsize);
355 jerasure_total_memcpy_bytes += packetsize;
358 galois_region_xor(dptr, pptr, packetsize);
359 jerasure_total_xor_bytes += packetsize;
369 void jerasure_do_parity(int k, char **data_ptrs, char *parity_ptr, int size)
373 memcpy(parity_ptr, data_ptrs[0], size);
374 jerasure_total_memcpy_bytes += size;
376 for (i = 1; i < k; i++) {
377 galois_region_xor(data_ptrs[i], parity_ptr, size);
378 jerasure_total_xor_bytes += size;
382 int jerasure_invert_matrix(int *mat, int *inv, int rows, int w)
384 int cols, i, j, k, x, rs2;
385 int row_start, tmp, inverse;
390 for (i = 0; i < rows; i++) {
391 for (j = 0; j < cols; j++) {
392 inv[k] = (i == j) ? 1 : 0;
397 /* First -- convert into upper triangular */
398 for (i = 0; i < cols; i++) {
401 /* Swap rows if we ave a zero i,i element. If we can't swap, then the
402 matrix was not invertible */
404 if (mat[row_start+i] == 0) {
405 for (j = i+1; j < rows && mat[cols*j+i] == 0; j++) ;
406 if (j == rows) return -1;
408 for (k = 0; k < cols; k++) {
409 tmp = mat[row_start+k];
410 mat[row_start+k] = mat[rs2+k];
412 tmp = inv[row_start+k];
413 inv[row_start+k] = inv[rs2+k];
418 /* Multiply the row by 1/element i,i */
419 tmp = mat[row_start+i];
421 inverse = galois_single_divide(1, tmp, w);
422 for (j = 0; j < cols; j++) {
423 mat[row_start+j] = galois_single_multiply(mat[row_start+j], inverse, w);
424 inv[row_start+j] = galois_single_multiply(inv[row_start+j], inverse, w);
428 /* Now for each j>i, add A_ji*Ai to Aj */
430 for (j = i+1; j != cols; j++) {
435 for (x = 0; x < cols; x++) {
436 mat[rs2+x] ^= mat[row_start+x];
437 inv[rs2+x] ^= inv[row_start+x];
442 for (x = 0; x < cols; x++) {
443 mat[rs2+x] ^= galois_single_multiply(tmp, mat[row_start+x], w);
444 inv[rs2+x] ^= galois_single_multiply(tmp, inv[row_start+x], w);
451 /* Now the matrix is upper triangular. Start at the top and multiply down */
453 for (i = rows-1; i >= 0; i--) {
455 for (j = 0; j < i; j++) {
457 if (mat[rs2+i] != 0) {
460 for (k = 0; k < cols; k++) {
461 inv[rs2+k] ^= galois_single_multiply(tmp, inv[row_start+k], w);
469 int jerasure_invertible_matrix(int *mat, int rows, int w)
471 int cols, i, j, k, x, rs2;
472 int row_start, tmp, inverse;
476 /* First -- convert into upper triangular */
477 for (i = 0; i < cols; i++) {
480 /* Swap rows if we ave a zero i,i element. If we can't swap, then the
481 matrix was not invertible */
483 if (mat[row_start+i] == 0) {
484 for (j = i+1; j < rows && mat[cols*j+i] == 0; j++) ;
485 if (j == rows) return 0;
487 for (k = 0; k < cols; k++) {
488 tmp = mat[row_start+k];
489 mat[row_start+k] = mat[rs2+k];
494 /* Multiply the row by 1/element i,i */
495 tmp = mat[row_start+i];
497 inverse = galois_single_divide(1, tmp, w);
498 for (j = 0; j < cols; j++) {
499 mat[row_start+j] = galois_single_multiply(mat[row_start+j], inverse, w);
503 /* Now for each j>i, add A_ji*Ai to Aj */
505 for (j = i+1; j != cols; j++) {
510 for (x = 0; x < cols; x++) {
511 mat[rs2+x] ^= mat[row_start+x];
516 for (x = 0; x < cols; x++) {
517 mat[rs2+x] ^= galois_single_multiply(tmp, mat[row_start+x], w);
526 /* Converts a list-style version of the erasures into an array of k+m elements
527 where the element = 1 if the index has been erased, and zero otherwise */
529 int *jerasure_erasures_to_erased(int k, int m, int *erasures)
537 erased = talloc(int, td);
538 if (erased == NULL) return NULL;
541 for (i = 0; i < td; i++) erased[i] = 0;
543 for (i = 0; erasures[i] != -1; i++) {
544 if (erased[erasures[i]] == 0) {
545 erased[erasures[i]] = 1;
547 if (t_non_erased < k) {
556 void jerasure_free_schedule(int **schedule)
560 for (i = 0; schedule[i][0] >= 0; i++) free(schedule[i]);
565 void jerasure_free_schedule_cache(int k, int m, int ***cache)
570 fprintf(stderr, "jerasure_free_schedule_cache(): m must equal 2\n");
574 for (e1 = 0; e1 < k+m; e1++) {
575 for (e2 = 0; e2 < e1; e2++) {
576 jerasure_free_schedule(cache[e1*(k+m)+e2]);
578 jerasure_free_schedule(cache[e1*(k+m)+e1]);
583 void jerasure_matrix_dotprod(int k, int w, int *matrix_row,
584 int *src_ids, int dest_id,
585 char **data_ptrs, char **coding_ptrs, int size)
591 if (w != 1 && w != 8 && w != 16 && w != 32) {
592 fprintf(stderr, "ERROR: jerasure_matrix_dotprod() called and w is not 1, 8, 16 or 32\n");
598 dptr = (dest_id < k) ? data_ptrs[dest_id] : coding_ptrs[dest_id-k];
600 /* First copy or xor any data that does not need to be multiplied by a factor */
602 for (i = 0; i < k; i++) {
603 if (matrix_row[i] == 1) {
604 if (src_ids == NULL) {
606 } else if (src_ids[i] < k) {
607 sptr = data_ptrs[src_ids[i]];
609 sptr = coding_ptrs[src_ids[i]-k];
612 memcpy(dptr, sptr, size);
613 jerasure_total_memcpy_bytes += size;
616 galois_region_xor(sptr, dptr, size);
617 jerasure_total_xor_bytes += size;
622 /* Now do the data that needs to be multiplied by a factor */
624 for (i = 0; i < k; i++) {
625 if (matrix_row[i] != 0 && matrix_row[i] != 1) {
626 if (src_ids == NULL) {
628 } else if (src_ids[i] < k) {
629 sptr = data_ptrs[src_ids[i]];
631 sptr = coding_ptrs[src_ids[i]-k];
634 case 8: galois_w08_region_multiply(sptr, matrix_row[i], size, dptr, init); break;
635 case 16: galois_w16_region_multiply(sptr, matrix_row[i], size, dptr, init); break;
636 case 32: galois_w32_region_multiply(sptr, matrix_row[i], size, dptr, init); break;
638 jerasure_total_gf_bytes += size;
645 int jerasure_bitmatrix_decode(int k, int m, int w, int *bitmatrix, int row_k_ones, int *erasures,
646 char **data_ptrs, char **coding_ptrs, int size, int packetsize)
650 int *decoding_matrix;
652 int edd, *tmpids, lastdrive;
654 erased = jerasure_erasures_to_erased(k, m, erasures);
655 if (erased == NULL) return -1;
657 /* See jerasure_matrix_decode for the logic of this routine. This one works just like
658 it, but calls the bitmatrix ops instead */
663 for (i = 0; i < k; i++) {
670 if (row_k_ones != 1 || erased[k]) lastdrive = k;
673 decoding_matrix = NULL;
675 if (edd > 1 || (edd > 0 && (row_k_ones != 1 || erased[k]))) {
677 dm_ids = talloc(int, k);
678 if (dm_ids == NULL) {
683 decoding_matrix = talloc(int, k*k*w*w);
684 if (decoding_matrix == NULL) {
690 if (jerasure_make_decoding_bitmatrix(k, m, w, bitmatrix, erased, decoding_matrix, dm_ids) < 0) {
693 free(decoding_matrix);
698 for (i = 0; edd > 0 && i < lastdrive; i++) {
700 jerasure_bitmatrix_dotprod(k, w, decoding_matrix+i*k*w*w, dm_ids, i, data_ptrs, coding_ptrs, size, packetsize);
706 tmpids = talloc(int, k);
710 free(decoding_matrix);
713 for (i = 0; i < k; i++) {
714 tmpids[i] = (i < lastdrive) ? i : i+1;
716 jerasure_bitmatrix_dotprod(k, w, bitmatrix, tmpids, lastdrive, data_ptrs, coding_ptrs, size, packetsize);
720 for (i = 0; i < m; i++) {
722 jerasure_bitmatrix_dotprod(k, w, bitmatrix+i*k*w*w, NULL, k+i, data_ptrs, coding_ptrs, size, packetsize);
727 if (dm_ids != NULL) free(dm_ids);
728 if (decoding_matrix != NULL) free(decoding_matrix);
733 static char **set_up_ptrs_for_scheduled_decoding(int k, int m, int *erasures, char **data_ptrs, char **coding_ptrs)
742 for (i = 0; erasures[i] != -1; i++) {
743 if (erasures[i] < k) ddf++; else cdf++;
746 erased = jerasure_erasures_to_erased(k, m, erasures);
747 if (erased == NULL) return NULL;
749 /* Set up ptrs. It will be as follows:
751 - If data drive i has not failed, then ptrs[i] = data_ptrs[i].
752 - If data drive i has failed, then ptrs[i] = coding_ptrs[j], where j is the
753 lowest unused non-failed coding drive.
754 - Elements k to k+ddf-1 are data_ptrs[] of the failed data drives.
755 - Elements k+ddf to k+ddf+cdf-1 are coding_ptrs[] of the failed data drives.
757 The array row_ids contains the ids of ptrs.
758 The array ind_to_row_ids contains the row_id of drive i.
760 However, we're going to set row_ids and ind_to_row in a different procedure.
763 ptrs = talloc(char *, k+m);
771 for (i = 0; i < k; i++) {
772 if (erased[i] == 0) {
773 ptrs[i] = data_ptrs[i];
775 while (erased[j]) j++;
776 ptrs[i] = coding_ptrs[j-k];
778 ptrs[x] = data_ptrs[i];
782 for (i = k; i < k+m; i++) {
784 ptrs[x] = coding_ptrs[i-k];
792 static int set_up_ids_for_scheduled_decoding(int k, int m, int *erasures, int *row_ids, int *ind_to_row)
800 for (i = 0; erasures[i] != -1; i++) {
801 if (erasures[i] < k) ddf++; else cdf++;
804 erased = jerasure_erasures_to_erased(k, m, erasures);
805 if (erased == NULL) return -1;
807 /* See set_up_ptrs_for_scheduled_decoding for how these are set */
811 for (i = 0; i < k; i++) {
812 if (erased[i] == 0) {
816 while (erased[j]) j++;
825 for (i = k; i < k+m; i++) {
836 static int **jerasure_generate_decoding_schedule(int k, int m, int w, int *bitmatrix, int *erasures, int smart)
838 int i, j, x, drive, y, index, z;
839 int *decoding_matrix, *inverse, *real_decoding_matrix;
847 /* First, figure out the number of data drives that have failed, and the
848 number of coding drives that have failed: ddf and cdf */
852 for (i = 0; erasures[i] != -1; i++) {
853 if (erasures[i] < k) ddf++; else cdf++;
856 row_ids = talloc(int, k+m);
857 if (!row_ids) return NULL;
858 ind_to_row = talloc(int, k+m);
864 if (set_up_ids_for_scheduled_decoding(k, m, erasures, row_ids, ind_to_row) < 0) {
870 /* Now, we're going to create one decoding matrix which is going to
871 decode everything with one call. The hope is that the scheduler
872 will do a good job. This matrix has w*e rows, where e is the
873 number of erasures (ddf+cdf) */
875 real_decoding_matrix = talloc(int, k*w*(cdf+ddf)*w);
876 if (!real_decoding_matrix) {
882 /* First, if any data drives have failed, then initialize the first
883 ddf*w rows of the decoding matrix from the standard decoding
888 decoding_matrix = talloc(int, k*k*w*w);
889 if (!decoding_matrix) {
894 ptr = decoding_matrix;
895 for (i = 0; i < k; i++) {
896 if (row_ids[i] == i) {
897 bzero(ptr, k*w*w*sizeof(int));
898 for (x = 0; x < w; x++) {
899 ptr[x+i*w+x*k*w] = 1;
902 memcpy(ptr, bitmatrix+k*w*w*(row_ids[i]-k), k*w*w*sizeof(int));
906 inverse = talloc(int, k*k*w*w);
910 free(decoding_matrix);
913 jerasure_invert_bitmatrix(decoding_matrix, inverse, k*w);
915 /* printf("\nMatrix to invert\n");
916 jerasure_print_bitmatrix(decoding_matrix, k*w, k*w, w);
918 printf("\nInverse\n");
919 jerasure_print_bitmatrix(inverse, k*w, k*w, w);
922 free(decoding_matrix);
923 ptr = real_decoding_matrix;
924 for (i = 0; i < ddf; i++) {
925 memcpy(ptr, inverse+k*w*w*row_ids[k+i], sizeof(int)*k*w*w);
931 /* Next, here comes the hard part. For each coding node that needs
932 to be decoded, you start by putting its rows of the distribution
933 matrix into the decoding matrix. If there were no failed data
934 nodes, then you're done. However, if there have been failed
935 data nodes, then you need to modify the columns that correspond
936 to the data nodes. You do that by first zeroing them. Then
937 whereever there is a one in the distribution matrix, you XOR
938 in the corresponding row from the failed data node's entry in
939 the decoding matrix. The whole process kind of makes my head
943 for (x = 0; x < cdf; x++) {
944 drive = row_ids[x+ddf+k]-k;
945 ptr = real_decoding_matrix + k*w*w*(ddf+x);
946 memcpy(ptr, bitmatrix+drive*k*w*w, sizeof(int)*k*w*w);
948 for (i = 0; i < k; i++) {
949 if (row_ids[i] != i) {
950 for (j = 0; j < w; j++) {
951 bzero(ptr+j*k*w+i*w, sizeof(int)*w);
956 /* There's the yucky part */
959 for (i = 0; i < k; i++) {
960 if (row_ids[i] != i) {
961 b1 = real_decoding_matrix+(ind_to_row[i]-k)*k*w*w;
962 for (j = 0; j < w; j++) {
964 for (y = 0; y < w; y++) {
965 if (bitmatrix[index+j*k*w+i*w+y]) {
966 for (z = 0; z < k*w; z++) {
967 b2[z] = b2[z] ^ b1[z+y*k*w];
977 printf("\n\nReal Decoding Matrix\n\n");
978 jerasure_print_bitmatrix(real_decoding_matrix, (ddf+cdf)*w, k*w, w);
981 schedule = jerasure_smart_bitmatrix_to_schedule(k, ddf+cdf, w, real_decoding_matrix);
983 schedule = jerasure_dumb_bitmatrix_to_schedule(k, ddf+cdf, w, real_decoding_matrix);
987 free(real_decoding_matrix);
991 int jerasure_schedule_decode_lazy(int k, int m, int w, int *bitmatrix, int *erasures,
992 char **data_ptrs, char **coding_ptrs, int size, int packetsize,
999 ptrs = set_up_ptrs_for_scheduled_decoding(k, m, erasures, data_ptrs, coding_ptrs);
1000 if (ptrs == NULL) return -1;
1002 schedule = jerasure_generate_decoding_schedule(k, m, w, bitmatrix, erasures, smart);
1003 if (schedule == NULL) {
1008 for (tdone = 0; tdone < size; tdone += packetsize*w) {
1009 jerasure_do_scheduled_operations(ptrs, schedule, packetsize);
1010 for (i = 0; i < k+m; i++) ptrs[i] += (packetsize*w);
1013 jerasure_free_schedule(schedule);
1019 int jerasure_schedule_decode_cache(int k, int m, int w, int ***scache, int *erasures,
1020 char **data_ptrs, char **coding_ptrs, int size, int packetsize)
1027 if (erasures[1] == -1) {
1028 index = erasures[0]*(k+m) + erasures[0];
1029 } else if (erasures[2] == -1) {
1030 index = erasures[0]*(k+m) + erasures[1];
1035 schedule = scache[index];
1037 ptrs = set_up_ptrs_for_scheduled_decoding(k, m, erasures, data_ptrs, coding_ptrs);
1038 if (ptrs == NULL) return -1;
1041 for (tdone = 0; tdone < size; tdone += packetsize*w) {
1042 jerasure_do_scheduled_operations(ptrs, schedule, packetsize);
1043 for (i = 0; i < k+m; i++) ptrs[i] += (packetsize*w);
1051 /* This only works when m = 2 */
1053 int ***jerasure_generate_schedule_cache(int k, int m, int w, int *bitmatrix, int smart)
1059 /* Ok -- this is yucky, but it's how I'm doing it. You will make an index out
1060 of erasures, which will be e1*(k+m)+(e2). If there is no e2, then e2 = e1.
1061 Isn't that clever and confusing. Sorry.
1063 We're not going to worry about ordering -- in other words, the schedule for
1064 e1,e2 will be the same as e2,e1. They will have the same pointer -- the
1065 schedule will not be duplicated. */
1067 if (m != 2) return NULL;
1069 scache = talloc(int **, (k+m)*(k+m+1));
1070 if (scache == NULL) return NULL;
1072 for (e1 = 0; e1 < k+m; e1++) {
1074 for (e2 = 0; e2 < e1; e2++) {
1077 scache[e1*(k+m)+e2] = jerasure_generate_decoding_schedule(k, m, w, bitmatrix, erasures, smart);
1078 scache[e2*(k+m)+e1] = scache[e1*(k+m)+e2];
1081 scache[e1*(k+m)+e1] = jerasure_generate_decoding_schedule(k, m, w, bitmatrix, erasures, smart);
1087 int jerasure_invert_bitmatrix(int *mat, int *inv, int rows)
1095 for (i = 0; i < rows; i++) {
1096 for (j = 0; j < cols; j++) {
1097 inv[k] = (i == j) ? 1 : 0;
1102 /* First -- convert into upper triangular */
1104 for (i = 0; i < cols; i++) {
1106 /* Swap rows if we have a zero i,i element. If we can't swap, then the
1107 matrix was not invertible */
1109 if ((mat[i*cols+i]) == 0) {
1110 for (j = i+1; j < rows && (mat[j*cols+i]) == 0; j++) ;
1111 if (j == rows) return -1;
1112 for (k = 0; k < cols; k++) {
1113 tmp = mat[i*cols+k]; mat[i*cols+k] = mat[j*cols+k]; mat[j*cols+k] = tmp;
1114 tmp = inv[i*cols+k]; inv[i*cols+k] = inv[j*cols+k]; inv[j*cols+k] = tmp;
1118 /* Now for each j>i, add A_ji*Ai to Aj */
1119 for (j = i+1; j != rows; j++) {
1120 if (mat[j*cols+i] != 0) {
1121 for (k = 0; k < cols; k++) {
1122 mat[j*cols+k] ^= mat[i*cols+k];
1123 inv[j*cols+k] ^= inv[i*cols+k];
1129 /* Now the matrix is upper triangular. Start at the top and multiply down */
1131 for (i = rows-1; i >= 0; i--) {
1132 for (j = 0; j < i; j++) {
1133 if (mat[j*cols+i]) {
1134 for (k = 0; k < cols; k++) {
1135 mat[j*cols+k] ^= mat[i*cols+k];
1136 inv[j*cols+k] ^= inv[i*cols+k];
1144 int jerasure_invertible_bitmatrix(int *mat, int rows)
1151 /* First -- convert into upper triangular */
1153 for (i = 0; i < cols; i++) {
1155 /* Swap rows if we have a zero i,i element. If we can't swap, then the
1156 matrix was not invertible */
1158 if ((mat[i*cols+i]) == 0) {
1159 for (j = i+1; j < rows && (mat[j*cols+i]) == 0; j++) ;
1160 if (j == rows) return 0;
1161 for (k = 0; k < cols; k++) {
1162 tmp = mat[i*cols+k]; mat[i*cols+k] = mat[j*cols+k]; mat[j*cols+k] = tmp;
1166 /* Now for each j>i, add A_ji*Ai to Aj */
1167 for (j = i+1; j != rows; j++) {
1168 if (mat[j*cols+i] != 0) {
1169 for (k = 0; k < cols; k++) {
1170 mat[j*cols+k] ^= mat[i*cols+k];
1179 int *jerasure_matrix_multiply(int *m1, int *m2, int r1, int c1, int r2, int c2, int w)
1181 int *product, i, j, k;
1183 product = (int *) malloc(sizeof(int)*r1*c2);
1184 for (i = 0; i < r1*c2; i++) product[i] = 0;
1186 for (i = 0; i < r1; i++) {
1187 for (j = 0; j < c2; j++) {
1188 for (k = 0; k < r2; k++) {
1189 product[i*c2+j] ^= galois_single_multiply(m1[i*c1+k], m2[k*c2+j], w);
1196 void jerasure_get_stats(double *fill_in)
1198 fill_in[0] = jerasure_total_xor_bytes;
1199 fill_in[1] = jerasure_total_gf_bytes;
1200 fill_in[2] = jerasure_total_memcpy_bytes;
1201 jerasure_total_xor_bytes = 0;
1202 jerasure_total_gf_bytes = 0;
1203 jerasure_total_memcpy_bytes = 0;
1206 void jerasure_do_scheduled_operations(char **ptrs, int **operations, int packetsize)
1212 for (op = 0; operations[op][0] >= 0; op++) {
1213 sptr = ptrs[operations[op][0]] + operations[op][1]*packetsize;
1214 dptr = ptrs[operations[op][2]] + operations[op][3]*packetsize;
1215 if (operations[op][4]) {
1216 /* printf("%d,%d %d,%d\n", operations[op][0],
1220 printf("xor(0x%x, 0x%x -> 0x%x, %d)\n", sptr, dptr, dptr, packetsize); */
1221 galois_region_xor(sptr, dptr, packetsize);
1222 jerasure_total_xor_bytes += packetsize;
1224 /* printf("memcpy(0x%x <- 0x%x)\n", dptr, sptr); */
1225 memcpy(dptr, sptr, packetsize);
1226 jerasure_total_memcpy_bytes += packetsize;
1231 void jerasure_schedule_encode(int k, int m, int w, int **schedule,
1232 char **data_ptrs, char **coding_ptrs, int size, int packetsize)
1237 ptr_copy = talloc(char *, (k+m));
1238 for (i = 0; i < k; i++) ptr_copy[i] = data_ptrs[i];
1239 for (i = 0; i < m; i++) ptr_copy[i+k] = coding_ptrs[i];
1240 for (tdone = 0; tdone < size; tdone += packetsize*w) {
1241 jerasure_do_scheduled_operations(ptr_copy, schedule, packetsize);
1242 for (i = 0; i < k+m; i++) ptr_copy[i] += (packetsize*w);
1247 int **jerasure_dumb_bitmatrix_to_schedule(int k, int m, int w, int *bitmatrix)
1251 int index, optodo, i, j;
1253 operations = talloc(int *, k*m*w*w+1);
1254 if (!operations) return NULL;
1258 for (i = 0; i < m*w; i++) {
1260 for (j = 0; j < k*w; j++) {
1261 if (bitmatrix[index]) {
1262 operations[op] = talloc(int, 5);
1263 if (!operations[op]) {
1267 operations[op][4] = optodo;
1268 operations[op][0] = j/w;
1269 operations[op][1] = j%w;
1270 operations[op][2] = k+i/w;
1271 operations[op][3] = i%w;
1279 operations[op] = talloc(int, 5);
1280 if (!operations[op]) {
1284 operations[op][0] = -1;
1288 for (i = 0; i <= op; i++) {
1289 free(operations[op]);
1295 int **jerasure_smart_bitmatrix_to_schedule(int k, int m, int w, int *bitmatrix)
1300 int *diff, *from, *b1, *flink, *blink;
1303 int bestrow = 0, bestdiff, top;
1305 /* printf("Scheduling:\n\n");
1306 jerasure_print_bitmatrix(bitmatrix, m*w, k*w, w); */
1308 operations = talloc(int *, k*m*w*w+1);
1309 if (!operations) return NULL;
1312 diff = talloc(int, m*w);
1317 from = talloc(int, m*w);
1323 flink = talloc(int, m*w);
1330 blink = talloc(int, m*w);
1343 for (i = 0; i < m*w; i++) {
1345 for (j = 0; j < k*w; j++) {
1353 if (no < bestdiff) {
1363 /* printf("Doing row %d - %d from %d\n", row, diff[row], from[row]); */
1365 if (blink[row] == -1) {
1367 if (top != -1) blink[top] = -1;
1369 flink[blink[row]] = flink[row];
1370 if (flink[row] != -1) {
1371 blink[flink[row]] = blink[row];
1375 ptr = bitmatrix + row*k*w;
1376 if (from[row] == -1) {
1378 for (j = 0; j < k*w; j++) {
1380 operations[op] = talloc(int, 5);
1381 if (!operations[op]) goto error;
1382 operations[op][4] = optodo;
1383 operations[op][0] = j/w;
1384 operations[op][1] = j%w;
1385 operations[op][2] = k+row/w;
1386 operations[op][3] = row%w;
1392 operations[op] = talloc(int, 5);
1393 if (!operations[op]) goto error;
1394 operations[op][4] = 0;
1395 operations[op][0] = k+from[row]/w;
1396 operations[op][1] = from[row]%w;
1397 operations[op][2] = k+row/w;
1398 operations[op][3] = row%w;
1400 b1 = bitmatrix + from[row]*k*w;
1401 for (j = 0; j < k*w; j++) {
1402 if (ptr[j] ^ b1[j]) {
1403 operations[op] = talloc(int, 5);
1404 if (!operations[op]) goto error;
1405 operations[op][4] = 1;
1406 operations[op][0] = j/w;
1407 operations[op][1] = j%w;
1408 operations[op][2] = k+row/w;
1409 operations[op][3] = row%w;
1416 for (i = top; i != -1; i = flink[i]) {
1418 b1 = bitmatrix + i*k*w;
1419 for (j = 0; j < k*w; j++) no += (ptr[j] ^ b1[j]);
1424 if (diff[i] < bestdiff) {
1431 operations[op] = talloc(int, 5);
1432 if (!operations[op]) goto error;
1433 operations[op][0] = -1;
1442 for (i = 0; i <= op; i++) {
1443 free(operations[op]);
1453 void jerasure_bitmatrix_encode(int k, int m, int w, int *bitmatrix,
1454 char **data_ptrs, char **coding_ptrs, int size, int packetsize)
1458 if (packetsize%sizeof(long) != 0) {
1459 fprintf(stderr, "jerasure_bitmatrix_encode - packetsize(%d) %c sizeof(long) != 0\n", packetsize, '%');
1462 if (size%(packetsize*w) != 0) {
1463 fprintf(stderr, "jerasure_bitmatrix_encode - size(%d) %c (packetsize(%d)*w(%d))) != 0\n",
1464 size, '%', packetsize, w);
1468 for (i = 0; i < m; i++) {
1469 jerasure_bitmatrix_dotprod(k, w, bitmatrix+i*k*w*w, NULL, k+i, data_ptrs, coding_ptrs, size, packetsize);
1474 * Exported function for use by autoconf to perform quick
1477 int jerasure_autoconf_test()
1479 int x = galois_single_multiply(1, 2, 8);