2 * Copyright (c) 2013, James S. Plank and Kevin Greenan
5 * Jerasure - A C/C++ Library for a Variety of Reed-Solomon and RAID-6 Erasure
8 * Revision 2.0: Galois Field backend now links to GF-Complete
10 * Redistribution and use in source and binary forms, with or without
11 * modification, are permitted provided that the following conditions
14 * - Redistributions of source code must retain the above copyright
15 * notice, this list of conditions and the following disclaimer.
17 * - Redistributions in binary form must reproduce the above copyright
18 * notice, this list of conditions and the following disclaimer in
19 * the documentation and/or other materials provided with the
22 * - Neither the name of the University of Tennessee nor the names of its
23 * contributors may be used to endorse or promote products derived
24 * from this software without specific prior written permission.
26 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
27 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
28 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
29 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
30 * HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
31 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
32 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS
33 * OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
34 * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
35 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY
36 * WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
37 * POSSIBILITY OF SUCH DAMAGE.
45 /* This uses procedures from the Galois Field arithmetic library */
53 /* ------------------------------------------------------------ */
54 /* In all of the routines below:
56 k = Number of data devices
57 m = Number of coding devices
60 data_ptrs = An array of k pointers to data which is size bytes.
61 Size must be a multiple of sizeof(long).
62 Pointers must also be longword aligned.
64 coding_ptrs = An array of m pointers to coding data which is size bytes.
66 packetsize = The size of a coding block with bitmatrix coding.
67 When you code with a bitmatrix, you will use w packets
70 matrix = an array of k*m integers.
71 It represents an m by k matrix.
72 Element i,j is in matrix[i*k+j];
74 bitmatrix = an array of k*m*w*w integers.
75 It represents an mw by kw matrix.
76 Element i,j is in matrix[i*k*w+j];
78 erasures = an array of id's of erased devices.
79 Id's are integers between 0 and k+m-1.
80 Id's 0 to k-1 are id's of data devices.
81 Id's k to k+m-1 are id's of coding devices:
82 Coding device id = id-k.
83 If there are e erasures, erasures[e] = -1.
85 schedule = an array of schedule operations.
87 If there are m operations, then schedule[m][0] = -1.
89 operation = an array of 5 integers:
91 0 = operation: 0 for copy, 1 for xor (-1 for end)
92 1 = source device (0 - k+m-1)
93 2 = source packet (0 - w-1)
94 3 = destination device (0 - k+m-1)
95 4 = destination packet (0 - w-1)
98 /* --------------------------------------------------------------- */
99 /* Bitmatrices / schedules ---------------------------------------- */
101 - jerasure_matrix_to_bitmatrix turns a m X k matrix in GF(2^w) into a
102 wm X wk bitmatrix (in GF(2)). This is
103 explained in the Cauchy Reed-Solomon coding
106 - jerasure_dumb_bitmatrix_to_schedule turns a bitmatrix into a schedule
107 using the straightforward algorithm -- just
108 schedule the dot products defined by each
111 - jerasure_smart_bitmatrix_to_schedule turns a bitmatrix into a schedule,
112 but tries to use previous dot products to
113 calculate new ones. This is the optimization
114 explained in the original Liberation code paper.
116 - jerasure_generate_schedule_cache precalcalculate all the schedule for the
117 given distribution bitmatrix. M must equal 2.
119 - jerasure_free_schedule frees a schedule that was allocated with
120 jerasure_XXX_bitmatrix_to_schedule.
122 - jerasure_free_schedule_cache frees a schedule cache that was created with
123 jerasure_generate_schedule_cache.
126 int *jerasure_matrix_to_bitmatrix(int k, int m, int w, int *matrix);
127 int **jerasure_dumb_bitmatrix_to_schedule(int k, int m, int w, int *bitmatrix);
128 int **jerasure_smart_bitmatrix_to_schedule(int k, int m, int w, int *bitmatrix);
129 int ***jerasure_generate_schedule_cache(int k, int m, int w, int *bitmatrix, int smart);
131 void jerasure_free_schedule(int **schedule);
132 void jerasure_free_schedule_cache(int k, int m, int ***cache);
135 /* ------------------------------------------------------------ */
136 /* Encoding - these are all straightforward. jerasure_matrix_encode only
137 works with w = 8|16|32. */
139 void jerasure_do_parity(int k, char **data_ptrs, char *parity_ptr, int size);
141 void jerasure_matrix_encode(int k, int m, int w, int *matrix,
142 char **data_ptrs, char **coding_ptrs, int size);
144 void jerasure_bitmatrix_encode(int k, int m, int w, int *bitmatrix,
145 char **data_ptrs, char **coding_ptrs, int size, int packetsize);
147 void jerasure_schedule_encode(int k, int m, int w, int **schedule,
148 char **data_ptrs, char **coding_ptrs, int size, int packetsize);
150 /* ------------------------------------------------------------ */
151 /* Decoding. -------------------------------------------------- */
153 /* These return integers, because the matrix may not be invertible.
155 The parameter row_k_ones should be set to 1 if row k of the matrix
156 (or rows kw to (k+1)w+1) of th distribution matrix are all ones
157 (or all identity matrices). Then you can improve the performance
158 of decoding when there is more than one failure, and the parity
159 device didn't fail. You do it by decoding all but one of the data
160 devices, and then decoding the last data device from the data devices
161 and the parity device.
163 jerasure_schedule_decode_lazy generates the schedule on the fly.
165 jerasure_matrix_decode only works when w = 8|16|32.
167 jerasure_make_decoding_matrix/bitmatrix make the k*k decoding matrix
168 (or wk*wk bitmatrix) by taking the rows corresponding to k
169 non-erased devices of the distribution matrix, and then
170 inverting that matrix.
172 You should already have allocated the decoding matrix and
173 dm_ids, which is a vector of k integers. These will be
174 filled in appropriately. dm_ids[i] is the id of element
175 i of the survivors vector. I.e. row i of the decoding matrix
176 times dm_ids equals data drive i.
178 Both of these routines take "erased" instead of "erasures".
179 Erased is a vector with k+m elements, which has 0 or 1 for
180 each device's id, according to whether the device is erased.
182 jerasure_erasures_to_erased allocates and returns erased from erasures.
186 int jerasure_matrix_decode(int k, int m, int w,
187 int *matrix, int row_k_ones, int *erasures,
188 char **data_ptrs, char **coding_ptrs, int size);
190 int jerasure_bitmatrix_decode(int k, int m, int w,
191 int *bitmatrix, int row_k_ones, int *erasures,
192 char **data_ptrs, char **coding_ptrs, int size, int packetsize);
194 int jerasure_schedule_decode_lazy(int k, int m, int w, int *bitmatrix, int *erasures,
195 char **data_ptrs, char **coding_ptrs, int size, int packetsize,
198 int jerasure_schedule_decode_cache(int k, int m, int w, int ***scache, int *erasures,
199 char **data_ptrs, char **coding_ptrs, int size, int packetsize);
201 int jerasure_make_decoding_matrix(int k, int m, int w, int *matrix, int *erased,
202 int *decoding_matrix, int *dm_ids);
204 int jerasure_make_decoding_bitmatrix(int k, int m, int w, int *matrix, int *erased,
205 int *decoding_matrix, int *dm_ids);
207 int *jerasure_erasures_to_erased(int k, int m, int *erasures);
209 /* ------------------------------------------------------------ */
210 /* These perform dot products and schedules. -------------------*/
212 src_ids is a matrix of k id's (0 - k-1 for data devices, k - k+m-1
213 for coding devices) that identify the source devices. Dest_id is
214 the id of the destination device.
216 jerasure_matrix_dotprod only works when w = 8|16|32.
218 jerasure_do_scheduled_operations executes the schedule on w*packetsize worth of
219 bytes from each device. ptrs is an array of pointers which should have as many
220 elements as the highest referenced device in the schedule.
224 void jerasure_matrix_dotprod(int k, int w, int *matrix_row,
225 int *src_ids, int dest_id,
226 char **data_ptrs, char **coding_ptrs, int size);
228 void jerasure_bitmatrix_dotprod(int k, int w, int *bitmatrix_row,
229 int *src_ids, int dest_id,
230 char **data_ptrs, char **coding_ptrs, int size, int packetsize);
232 void jerasure_do_scheduled_operations(char **ptrs, int **schedule, int packetsize);
234 /* ------------------------------------------------------------ */
235 /* Matrix Inversion ------------------------------------------- */
237 The two matrix inversion functions work on rows*rows matrices of
238 ints. If a bitmatrix, then each int will just be zero or one.
239 Otherwise, they will be elements of gf(2^w). Obviously, you can
240 do bit matrices with crs_invert_matrix() and set w = 1, but
241 crs_invert_bitmatrix will be more efficient.
243 The two invertible functions return whether a matrix is invertible.
244 They are more efficient than the inverstion functions.
246 Mat will be destroyed when the matrix inversion or invertible
247 testing is done. Sorry.
249 Inv must be allocated by the caller.
251 The two invert_matrix functions return 0 on success, and -1 if the
252 matrix is uninvertible.
254 The two invertible function simply return whether the matrix is
255 invertible. (0 or 1). Mat will be destroyed.
258 int jerasure_invert_matrix(int *mat, int *inv, int rows, int w);
259 int jerasure_invert_bitmatrix(int *mat, int *inv, int rows);
260 int jerasure_invertible_matrix(int *mat, int rows, int w);
261 int jerasure_invertible_bitmatrix(int *mat, int rows);
263 /* ------------------------------------------------------------ */
264 /* Basic matrix operations -------------------------------------*/
266 Each of the print_matrix routines require a w. In jerasure_print_matrix,
267 this is to calculate the field width. In jerasure_print_bitmatrix, it is
268 to put spaces between the bits.
270 jerasure_matrix_multiply is a simple matrix multiplier in GF(2^w). It returns a r1*c2
271 matrix, which is the product of the two input matrices. It allocates
272 the product. Obviously, c1 should equal r2. However, this is not
273 validated by the procedure.
276 void jerasure_print_matrix(int *matrix, int rows, int cols, int w);
277 void jerasure_print_bitmatrix(int *matrix, int rows, int cols, int w);
280 int *jerasure_matrix_multiply(int *m1, int *m2, int r1, int c1, int r2, int c2, int w);
282 /* ------------------------------------------------------------ */
283 /* Stats ------------------------------------------------------ */
285 jerasure_get_stats fills in a vector of three doubles:
287 fill_in[0] is the number of bytes that have been XOR'd
288 fill_in[1] is the number of bytes that have been copied
289 fill_in[2] is the number of bytes that have been multiplied
290 by a constant in GF(2^w)
292 When jerasure_get_stats() is called, it resets its values.
295 void jerasure_get_stats(double *fill_in);
297 int jerasure_autoconf_test();