2 * Copyright (c) 2014, James S. Plank and Kevin Greenan
5 * Jerasure - A C/C++ Library for a Variety of Reed-Solomon and RAID-6 Erasure
8 * Revision 2.0: Galois Field backend now links to GF-Complete
10 * Redistribution and use in source and binary forms, with or without
11 * modification, are permitted provided that the following conditions
14 * - Redistributions of source code must retain the above copyright
15 * notice, this list of conditions and the following disclaimer.
17 * - Redistributions in binary form must reproduce the above copyright
18 * notice, this list of conditions and the following disclaimer in
19 * the documentation and/or other materials provided with the
22 * - Neither the name of the University of Tennessee nor the names of its
23 * contributors may be used to endorse or promote products derived
24 * from this software without specific prior written permission.
26 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
27 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
28 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
29 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
30 * HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
31 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
32 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS
33 * OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
34 * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
35 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY
36 * WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
37 * POSSIBILITY OF SUCH DAMAGE.
40 /* Jerasure's authors:
42 Revision 2.x - 2014: James S. Plank and Kevin M. Greenan.
43 Revision 1.2 - 2008: James S. Plank, Scott Simmerman and Catherine D. Schuman.
44 Revision 1.0 - 2007: James S. Plank.
49 This program takes as input an inputfile, k, m, a coding
50 technique, w, and packetsize. It creates k+m files from
51 the original file so that k of these files are parts of
52 the original file and m of the files are encoded based on
53 the given coding technique. The format of the created files
54 is the file name with "_k#" or "_m#" and then the extension.
55 (For example, inputfile test.txt would yield file "test_k1.txt".)
73 #include "liberation.h"
78 enum Coding_Technique {Reed_Sol_Van, Reed_Sol_R6_Op, Cauchy_Orig, Cauchy_Good, Liberation, Blaum_Roth, Liber8tion, RDP, EVENODD, No_Coding};
80 char *Methods[N] = {"reed_sol_van", "reed_sol_r6_op", "cauchy_orig", "cauchy_good", "liberation", "blaum_roth", "liber8tion", "no_coding"};
82 /* Global variables for signal handler */
84 enum Coding_Technique method;
86 /* Function prototypes */
88 void ctrl_bs_handler(int dummy);
90 int jfread(void *ptr, int size, int nmembers, FILE *stream)
92 if (stream != NULL) return fread(ptr, size, nmembers, stream);
94 MOA_Fill_Random_Region(ptr, size);
99 int main (int argc, char **argv) {
100 FILE *fp, *fp2; // file pointers
101 char *block; // padding file
102 int size, newsize; // size of file and temp size
103 struct stat status; // finding file size
106 enum Coding_Technique tech; // coding technique (parameter)
107 int k, m, w, packetsize; // parameters
108 int buffersize; // paramter
109 int i; // loop control variables
110 int blocksize; // size of k+m files
114 /* Jerasure Arguments */
121 /* Creation of file name variables */
123 char *s1, *s2, *extension;
128 /* Timing variables */
129 struct timing t1, t2, t3, t4;
134 /* Find buffersize */
138 signal(SIGQUIT, ctrl_bs_handler);
147 /* Error check Arguments*/
149 fprintf(stderr, "usage: inputfile k m coding_technique w packetsize buffersize\n");
150 fprintf(stderr, "\nChoose one of the following coding techniques: \nreed_sol_van, \nreed_sol_r6_op, \ncauchy_orig, \ncauchy_good, \nliberation, \nblaum_roth, \nliber8tion");
151 fprintf(stderr, "\n\nPacketsize is ignored for the reed_sol's");
152 fprintf(stderr, "\nBuffersize of 0 means the buffersize is chosen automatically.\n");
153 fprintf(stderr, "\nIf you just want to test speed, use an inputfile of \"-number\" where number is the size of the fake file you want to test.\n\n");
156 /* Conversion of parameters and error checking */
157 if (sscanf(argv[2], "%d", &k) == 0 || k <= 0) {
158 fprintf(stderr, "Invalid value for k\n");
161 if (sscanf(argv[3], "%d", &m) == 0 || m < 0) {
162 fprintf(stderr, "Invalid value for m\n");
165 if (sscanf(argv[5],"%d", &w) == 0 || w <= 0) {
166 fprintf(stderr, "Invalid value for w.\n");
173 if (sscanf(argv[6], "%d", &packetsize) == 0 || packetsize < 0) {
174 fprintf(stderr, "Invalid value for packetsize.\n");
182 if (sscanf(argv[7], "%d", &buffersize) == 0 || buffersize < 0) {
183 fprintf(stderr, "Invalid value for buffersize\n");
189 /* Determine proper buffersize by finding the closest valid buffersize to the input value */
190 if (buffersize != 0) {
191 if (packetsize != 0 && buffersize%(sizeof(long)*w*k*packetsize) != 0) {
194 while (up%(sizeof(long)*w*k*packetsize) != 0 && (down%(sizeof(long)*w*k*packetsize) != 0)) {
200 if (up%(sizeof(long)*w*k*packetsize) == 0) {
209 else if (packetsize == 0 && buffersize%(sizeof(long)*w*k) != 0) {
212 while (up%(sizeof(long)*w*k) != 0 && down%(sizeof(long)*w*k) != 0) {
216 if (up%(sizeof(long)*w*k) == 0) {
225 /* Setting of coding technique and error checking */
227 if (strcmp(argv[4], "no_coding") == 0) {
230 else if (strcmp(argv[4], "reed_sol_van") == 0) {
232 if (w != 8 && w != 16 && w != 32) {
233 fprintf(stderr, "w must be one of {8, 16, 32}\n");
237 else if (strcmp(argv[4], "reed_sol_r6_op") == 0) {
239 fprintf(stderr, "m must be equal to 2\n");
242 if (w != 8 && w != 16 && w != 32) {
243 fprintf(stderr, "w must be one of {8, 16, 32}\n");
246 tech = Reed_Sol_R6_Op;
248 else if (strcmp(argv[4], "cauchy_orig") == 0) {
250 if (packetsize == 0) {
251 fprintf(stderr, "Must include packetsize.\n");
255 else if (strcmp(argv[4], "cauchy_good") == 0) {
257 if (packetsize == 0) {
258 fprintf(stderr, "Must include packetsize.\n");
262 else if (strcmp(argv[4], "liberation") == 0) {
264 fprintf(stderr, "k must be less than or equal to w\n");
267 if (w <= 2 || !(w%2) || !is_prime(w)) {
268 fprintf(stderr, "w must be greater than two and w must be prime\n");
271 if (packetsize == 0) {
272 fprintf(stderr, "Must include packetsize.\n");
275 if ((packetsize%(sizeof(long))) != 0) {
276 fprintf(stderr, "packetsize must be a multiple of sizeof(long)\n");
281 else if (strcmp(argv[4], "blaum_roth") == 0) {
283 fprintf(stderr, "k must be less than or equal to w\n");
286 if (w <= 2 || !((w+1)%2) || !is_prime(w+1)) {
287 fprintf(stderr, "w must be greater than two and w+1 must be prime\n");
290 if (packetsize == 0) {
291 fprintf(stderr, "Must include packetsize.\n");
294 if ((packetsize%(sizeof(long))) != 0) {
295 fprintf(stderr, "packetsize must be a multiple of sizeof(long)\n");
300 else if (strcmp(argv[4], "liber8tion") == 0) {
301 if (packetsize == 0) {
302 fprintf(stderr, "Must include packetsize\n");
306 fprintf(stderr, "w must equal 8\n");
310 fprintf(stderr, "m must equal 2\n");
314 fprintf(stderr, "k must be less than or equal to w\n");
320 fprintf(stderr, "Not a valid coding technique. Choose one of the following: reed_sol_van, reed_sol_r6_op, cauchy_orig, cauchy_good, liberation, blaum_roth, liber8tion, no_coding\n");
324 /* Set global variable method for signal handler */
327 /* Get current working directory for construction of file names */
328 curdir = (char*)malloc(sizeof(char)*1000);
329 assert(curdir == getcwd(curdir, 1000));
331 if (argv[1][0] != '-') {
333 /* Open file and error check */
334 fp = fopen(argv[1], "rb");
336 fprintf(stderr, "Unable to open file.\n");
340 /* Create Coding directory */
341 i = mkdir("Coding", S_IRWXU);
342 if (i == -1 && errno != EEXIST) {
343 fprintf(stderr, "Unable to create Coding directory.\n");
347 /* Determine original size of file */
348 stat(argv[1], &status);
349 size = status.st_size;
351 if (sscanf(argv[1]+1, "%d", &size) != 1 || size <= 0) {
352 fprintf(stderr, "Files starting with '-' should be sizes for randomly created input\n");
361 /* Find new size by determining next closest multiple */
362 if (packetsize != 0) {
363 if (size%(k*w*packetsize*sizeof(long)) != 0) {
364 while (newsize%(k*w*packetsize*sizeof(long)) != 0)
369 if (size%(k*w*sizeof(long)) != 0) {
370 while (newsize%(k*w*sizeof(long)) != 0)
375 if (buffersize != 0) {
376 while (newsize%buffersize != 0) {
382 /* Determine size of k+m files */
383 blocksize = newsize/k;
385 /* Allow for buffersize and determine number of read-ins */
386 if (size > buffersize && buffersize != 0) {
387 if (newsize%buffersize != 0) {
388 readins = newsize/buffersize;
391 readins = newsize/buffersize;
393 block = (char *)malloc(sizeof(char)*buffersize);
394 blocksize = buffersize/k;
399 block = (char *)malloc(sizeof(char)*newsize);
402 /* Break inputfile name into the filename and extension */
403 s1 = (char*)malloc(sizeof(char)*(strlen(argv[1])+20));
404 s2 = strrchr(argv[1], '/');
412 s2 = strchr(s1, '.');
414 extension = strdup(s2);
417 extension = strdup("");
420 /* Allocate for full file name */
421 fname = (char*)malloc(sizeof(char)*(strlen(argv[1])+strlen(curdir)+20));
422 sprintf(temp, "%d", k);
425 /* Allocate data and coding */
426 data = (char **)malloc(sizeof(char*)*k);
427 coding = (char **)malloc(sizeof(char*)*m);
428 for (i = 0; i < m; i++) {
429 coding[i] = (char *)malloc(sizeof(char)*blocksize);
430 if (coding[i] == NULL) { perror("malloc"); exit(1); }
435 /* Create coding matrix or bitmatrix and schedule */
441 matrix = reed_sol_vandermonde_coding_matrix(k, m, w);
446 matrix = cauchy_original_coding_matrix(k, m, w);
447 bitmatrix = jerasure_matrix_to_bitmatrix(k, m, w, matrix);
448 schedule = jerasure_smart_bitmatrix_to_schedule(k, m, w, bitmatrix);
451 matrix = cauchy_good_general_coding_matrix(k, m, w);
452 bitmatrix = jerasure_matrix_to_bitmatrix(k, m, w, matrix);
453 schedule = jerasure_smart_bitmatrix_to_schedule(k, m, w, bitmatrix);
456 bitmatrix = liberation_coding_bitmatrix(k, w);
457 schedule = jerasure_smart_bitmatrix_to_schedule(k, m, w, bitmatrix);
460 bitmatrix = blaum_roth_coding_bitmatrix(k, w);
461 schedule = jerasure_smart_bitmatrix_to_schedule(k, m, w, bitmatrix);
464 bitmatrix = liber8tion_coding_bitmatrix(k);
465 schedule = jerasure_smart_bitmatrix_to_schedule(k, m, w, bitmatrix);
473 totalsec += timing_delta(&t3, &t4);
477 /* Read in data until finished */
481 while (n <= readins) {
482 /* Check if padding is needed, if so, add appropriate
484 if (total < size && total+buffersize <= size) {
485 total += jfread(block, sizeof(char), buffersize, fp);
487 else if (total < size && total+buffersize > size) {
488 extra = jfread(block, sizeof(char), buffersize, fp);
489 for (i = extra; i < buffersize; i++) {
493 else if (total == size) {
494 for (i = 0; i < buffersize; i++) {
499 /* Set pointers to point to file data */
500 for (i = 0; i < k; i++) {
501 data[i] = block+(i*blocksize);
505 /* Encode according to coding method */
510 jerasure_matrix_encode(k, m, w, matrix, data, coding, blocksize);
513 reed_sol_r6_encode(k, w, data, coding, blocksize);
516 jerasure_schedule_encode(k, m, w, schedule, data, coding, blocksize, packetsize);
519 jerasure_schedule_encode(k, m, w, schedule, data, coding, blocksize, packetsize);
522 jerasure_schedule_encode(k, m, w, schedule, data, coding, blocksize, packetsize);
525 jerasure_schedule_encode(k, m, w, schedule, data, coding, blocksize, packetsize);
528 jerasure_schedule_encode(k, m, w, schedule, data, coding, blocksize, packetsize);
536 /* Write data and encoded data to k+m files */
537 for (i = 1; i <= k; i++) {
539 bzero(data[i-1], blocksize);
541 sprintf(fname, "%s/Coding/%s_k%0*d%s", curdir, s1, md, i, extension);
543 fp2 = fopen(fname, "wb");
546 fp2 = fopen(fname, "ab");
548 fwrite(data[i-1], sizeof(char), blocksize, fp2);
553 for (i = 1; i <= m; i++) {
555 bzero(data[i-1], blocksize);
557 sprintf(fname, "%s/Coding/%s_m%0*d%s", curdir, s1, md, i, extension);
559 fp2 = fopen(fname, "wb");
562 fp2 = fopen(fname, "ab");
564 fwrite(coding[i-1], sizeof(char), blocksize, fp2);
569 /* Calculate encoding time */
570 totalsec += timing_delta(&t3, &t4);
573 /* Create metadata file */
575 sprintf(fname, "%s/Coding/%s_meta.txt", curdir, s1);
576 fp2 = fopen(fname, "wb");
577 fprintf(fp2, "%s\n", argv[1]);
578 fprintf(fp2, "%d\n", size);
579 fprintf(fp2, "%d %d %d %d %d\n", k, m, w, packetsize, buffersize);
580 fprintf(fp2, "%s\n", argv[4]);
581 fprintf(fp2, "%d\n", tech);
582 fprintf(fp2, "%d\n", readins);
587 /* Free allocated memory */
593 /* Calculate rate in MB/sec and print */
595 tsec = timing_delta(&t1, &t2);
596 printf("Encoding (MB/sec): %0.10f\n", (((double) size)/1024.0/1024.0)/totalsec);
597 printf("En_Total (MB/sec): %0.10f\n", (((double) size)/1024.0/1024.0)/tsec);
602 /* is_prime returns 1 if number if prime, 0 if not prime */
603 int is_prime(int w) {
604 int prime55[] = {2,3,5,7,11,13,17,19,23,29,31,37,41,43,47,53,59,61,67,71,
605 73,79,83,89,97,101,103,107,109,113,127,131,137,139,149,151,157,163,167,173,179,
606 181,191,193,197,199,211,223,227,229,233,239,241,251,257};
608 for (i = 0; i < 55; i++) {
609 if (w%prime55[i] == 0) {
610 if (w == prime55[i]) return 1;
617 /* Handles ctrl-\ event */
618 void ctrl_bs_handler(int dummy) {
621 fprintf(stderr, "\n%s\n", ctime(&mytime));
622 fprintf(stderr, "You just typed ctrl-\\ in encoder.c.\n");
623 fprintf(stderr, "Total number of read ins = %d\n", readins);
624 fprintf(stderr, "Current read in: %d\n", n);
625 fprintf(stderr, "Method: %s\n\n", Methods[method]);
626 signal(SIGQUIT, ctrl_bs_handler);