2 // MEX-File: dv_embed
\r
4 // [double scalar price, uint8 vector stego] = dv_embed(uint8 vector cover, uint8 vector message, uint8/double vector pricevec, int scalar constrLength = 10)
\r
6 typedef unsigned int u32;
\r
7 typedef unsigned short u16;
\r
8 typedef unsigned char u8;
\r
15 #include <emmintrin.h>
\r
19 #define MATLAB_in_cover 0
\r
20 #define MATLAB_in_message 1
\r
21 #define MATLAB_in_pricevec 2
\r
22 #define MATLAB_in_constrLength 3
\r
24 #define MATLAB_out_price 0
\r
25 #define MATLAB_out_stego 1
\r
27 void *aligned_malloc(unsigned int bytes, int align)
\r
30 char *temp = (char *)malloc(bytes + align);
\r
34 shift = align - (int)(((unsigned long long)temp) & (align - 1));
\r
35 temp = temp + shift;
\r
37 return (void *)temp;
\r
40 void aligned_free(void *vptr)
\r
42 char *ptr = (char *)vptr;
\r
43 free(ptr - ptr[-1]);
\r
47 inline __m128i maxLessThan255(const __m128i v1, const __m128i v2)
\r
49 register __m128i mask = _mm_set1_epi32(0xffffffff);
\r
50 return _mm_max_epu8(_mm_andnot_si128(_mm_cmpeq_epi8(v1, mask), v1), _mm_andnot_si128(_mm_cmpeq_epi8(v2, mask), v2));
\r
53 inline u8 max16B(__m128i maxp)
\r
56 maxp = _mm_max_epu8(maxp, _mm_srli_si128(maxp, 8));
\r
57 maxp = _mm_max_epu8(maxp, _mm_srli_si128(maxp, 4));
\r
58 *((int*)mtemp) = _mm_cvtsi128_si32(maxp);
\r
59 if(mtemp[2] > mtemp[0])
\r
60 mtemp[0] = mtemp[2];
\r
61 if(mtemp[3] > mtemp[1])
\r
62 mtemp[1] = mtemp[3];
\r
63 if(mtemp[1] > mtemp[0])
\r
69 inline u8 min16B(__m128i minp)
\r
72 minp = _mm_min_epu8(minp, _mm_srli_si128(minp, 8));
\r
73 minp = _mm_min_epu8(minp, _mm_srli_si128(minp, 4));
\r
74 *((int*)mtemp) = _mm_cvtsi128_si32(minp);
\r
75 if(mtemp[2] < mtemp[0])
\r
76 mtemp[0] = mtemp[2];
\r
77 if(mtemp[3] < mtemp[1])
\r
78 mtemp[1] = mtemp[3];
\r
79 if(mtemp[1] < mtemp[0])
\r
85 void mexFunction(int nlhs, mxArray *plhs[], int nrhs, const mxArray *prhs[])
\r
87 int matrixheight, vectorlength, height, i, j, k, l, index, index2, parts, m, sseheight, altm, pathindex, syndromelength, matrixwidth;
\r
88 u32 column, colmask, state;
\r
92 void *temp, *pricevectorv;
\r
93 u8 *vector, *syndrome, *closestData, *ssedone, *stepdown;
\r
94 u32 *path, *columns[2];
\r
95 int *matrices, *widths;
\r
98 mexErrMsgTxt("Too few input parameters (3 or 4 expected)");
\r
102 if(!mxIsUint8(prhs[MATLAB_in_cover])) {
\r
103 mexErrMsgTxt("The input vector must be of type uint8.");
\r
106 vector = (u8*)mxGetPr(prhs[MATLAB_in_cover]);
\r
107 vectorlength = (int)mxGetM(prhs[MATLAB_in_cover]);
\r
109 if(!mxIsUint8(prhs[MATLAB_in_message])) {
\r
110 mexErrMsgTxt("The syndrome must be of type uint8.");
\r
113 syndrome = (u8*)mxGetPr(prhs[MATLAB_in_message]);
\r
114 syndromelength = (int)mxGetM(prhs[MATLAB_in_message]);
\r
116 if(!mxIsDouble(prhs[MATLAB_in_pricevec])) {
\r
117 if(!mxIsUint8(prhs[MATLAB_in_pricevec])) {
\r
118 mexErrMsgTxt("The price vector must be of type double or uint8.");
\r
126 pricevectorv = (void*)mxGetPr(prhs[MATLAB_in_pricevec]);
\r
129 matrixheight = (int)mxGetScalar(prhs[MATLAB_in_constrLength]);
\r
133 // end of MATLAB interface
\r
135 if(matrixheight > 31) {
\r
136 mexErrMsgTxt("Submatrix height must not exceed 31.");
\r
140 height = 1 << matrixheight;
\r
141 colmask = height - 1;
\r
142 height = (height + 31) & (~31);
\r
144 parts = height >> 5;
\r
147 path = (u32*)malloc(vectorlength * parts * sizeof(u32));
\r
150 sprintf(errmsg, "Not enough memory (%u byte array could not be allocated).\n", vectorlength * parts * sizeof(u32));
\r
151 mexErrMsgTxt(errmsg);
\r
158 int shorter, longer, worm;
\r
161 matrices = (int *)malloc(syndromelength * sizeof(int));
\r
162 widths = (int *)malloc(syndromelength * sizeof(int));
\r
164 invalpha = (double)vectorlength / syndromelength;
\r
170 mexErrMsgTxt("The message cannot be longer than the cover object.\n");
\r
174 mexWarnMsgTxt("The relative payload is greater than 1/2. This may result in poor embedding efficiency.\n");
\r
176 shorter = (int)floor(invalpha);
\r
177 longer = (int)ceil(invalpha);
\r
178 if((columns[0] = getMatrix(shorter, matrixheight)) == NULL) {
\r
185 if((columns[1] = getMatrix(longer, matrixheight)) == NULL) {
\r
194 for(i = 0; i < syndromelength; i++) {
\r
195 if(worm + longer <= (i + 1) * invalpha + 0.5) {
\r
197 widths[i] = longer;
\r
201 widths[i] = shorter;
\r
211 int pathindex8 = 0;
\r
212 int shift[2] = {0, 4};
\r
213 u8 mask[2] = {0xf0, 0x0f};
\r
215 u8 *path8 = (u8*)path;
\r
216 double *pricevector = (double*)pricevectorv;
\r
218 double inf = std::numeric_limits<double>::infinity();
\r
220 sseheight = height >> 2;
\r
221 ssedone = (u8*)malloc(sseheight * sizeof(u8));
\r
222 prices = (float*)aligned_malloc(height * sizeof(float), 16);
\r
225 __m128 fillval = _mm_set1_ps(inf);
\r
226 for(i = 0; i < height; i+= 4) {
\r
227 _mm_store_ps(&prices[i], fillval);
\r
228 ssedone[i >> 2] = 0;
\r
234 for(index = 0, index2 = 0; index2 < syndromelength; index2++) {
\r
235 register __m128 c1, c2;
\r
237 for(k = 0; k < widths[index2]; k++, index++) {
\r
238 column = columns[matrices[index2]][k] & colmask;
\r
240 if(vector[index] == 0) {
\r
241 c1 = _mm_setzero_ps();
\r
242 c2 = _mm_set1_ps((float)pricevector[index]);
\r
244 c1 = _mm_set1_ps((float)pricevector[index]);
\r
245 c2 = _mm_setzero_ps();
\r
248 total += pricevector[index];
\r
250 for(m = 0; m < sseheight; m++) {
\r
252 register __m128 v1, v2, v3, v4;
\r
253 altm = (m ^ (column >> 2));
\r
254 v1 = _mm_load_ps(&prices[m << 2]);
\r
255 v2 = _mm_load_ps(&prices[altm << 2]);
\r
260 switch(column & 3) {
\r
264 v2 = _mm_shuffle_ps(v2, v2, 0xb1);
\r
265 v3 = _mm_shuffle_ps(v3, v3, 0xb1);
\r
268 v2 = _mm_shuffle_ps(v2, v2, 0x4e);
\r
269 v3 = _mm_shuffle_ps(v3, v3, 0x4e);
\r
272 v2 = _mm_shuffle_ps(v2, v2, 0x1b);
\r
273 v3 = _mm_shuffle_ps(v3, v3, 0x1b);
\r
276 v1 = _mm_add_ps(v1, c1);
\r
277 v2 = _mm_add_ps(v2, c2);
\r
278 v3 = _mm_add_ps(v3, c2);
\r
279 v4 = _mm_add_ps(v4, c1);
\r
281 v1 = _mm_min_ps(v1, v2);
\r
282 v4 = _mm_min_ps(v3, v4);
\r
284 _mm_store_ps(&prices[m << 2], v1);
\r
285 _mm_store_ps(&prices[altm << 2], v4);
\r
288 v2 = _mm_cmpeq_ps(v1, v2);
\r
289 v3 = _mm_cmpeq_ps(v3, v4);
\r
290 path8[pathindex8 + (m >> 1)] = (path8[pathindex8 + (m >> 1)] & mask[m & 1]) | (_mm_movemask_ps(v2) << shift[m & 1]);
\r
291 path8[pathindex8 + (altm >> 1)] = (path8[pathindex8 + (altm >> 1)] & mask[altm & 1]) | (_mm_movemask_ps(v3) << shift[altm & 1]);
\r
296 for(i = 0; i < sseheight; i++) {
\r
300 pathindex += parts;
\r
301 pathindex8 += parts << 2;
\r
304 if(syndrome[index2] == 0) {
\r
305 for(i = 0, l = 0; i < sseheight; i+= 2, l += 4) {
\r
306 _mm_store_ps(&prices[l], _mm_shuffle_ps(_mm_load_ps(&prices[i << 2]), _mm_load_ps(&prices[(i + 1) << 2]), 0x88));
\r
309 for(i = 0, l = 0; i < sseheight; i+= 2, l += 4) {
\r
310 _mm_store_ps(&prices[l], _mm_shuffle_ps(_mm_load_ps(&prices[i << 2]), _mm_load_ps(&prices[(i + 1) << 2]), 0xdd));
\r
314 if(syndromelength - index2 <= matrixheight)
\r
318 register __m128 fillval = _mm_set1_ps(inf);
\r
319 for(l >>= 2; l < sseheight; l++) {
\r
320 _mm_store_ps(&prices[l << 2], fillval);
\r
325 totalprice = prices[0];
\r
327 aligned_free(prices);
\r
330 if(totalprice >= total) {
\r
337 mexErrMsgTxt("The syndrome is not in the range of the syndrome matrix.\n");
\r
344 int pathindex16 = 0, subprice = 0;
\r
345 u8 maxc = 0, minc = 0;
\r
346 u8 *prices, *pricevector = (u8*)pricevectorv;
\r
347 u16 *path16 = (u16 *)path;
\r
348 __m128i *prices16B;
\r
350 sseheight = height >> 4;
\r
351 ssedone = (u8*)malloc(sseheight * sizeof(u8));
\r
352 prices = (u8*)aligned_malloc(height * sizeof(u8), 16);
\r
353 prices16B = (__m128i*)prices;
\r
356 __m128i napln = _mm_set1_epi32(0xffffffff);
\r
357 for(i = 0; i < sseheight; i++) {
\r
358 _mm_store_si128(&prices16B[i], napln);
\r
365 for(index = 0, index2 = 0; index2 < syndromelength; index2++) {
\r
366 register __m128i c1, c2, maxp, minp;
\r
368 if((u32)maxc + pricevector[index] >= 254) {
\r
369 aligned_free(path);
\r
377 mexErrMsgTxt("Price vector limit exceeded.");
\r
381 for(k = 0; k < widths[index2]; k++, index++) {
\r
382 column = columns[matrices[index2]][k] & colmask;
\r
384 if(vector[index] == 0) {
\r
385 c1 = _mm_setzero_si128();
\r
386 c2 = _mm_set1_epi8(pricevector[index]);
\r
388 c1 = _mm_set1_epi8(pricevector[index]);
\r
389 c2 = _mm_setzero_si128();
\r
392 minp = _mm_set1_epi8(-1);
\r
393 maxp = _mm_setzero_si128();
\r
395 for(m = 0; m < sseheight; m++) {
\r
397 register __m128i v1, v2, v3, v4;
\r
398 altm = (m ^ (column >> 4));
\r
399 v1 = _mm_load_si128(&prices16B[m]);
\r
400 v2 = _mm_load_si128(&prices16B[altm]);
\r
406 v2 = _mm_shuffle_epi32(v2, 0x4e);
\r
407 v3 = _mm_shuffle_epi32(v3, 0x4e);
\r
410 v2 = _mm_shuffle_epi32(v2, 0xb1);
\r
411 v3 = _mm_shuffle_epi32(v3, 0xb1);
\r
414 v2 = _mm_shufflehi_epi16(v2, 0xb1);
\r
415 v3 = _mm_shufflehi_epi16(v3, 0xb1);
\r
416 v2 = _mm_shufflelo_epi16(v2, 0xb1);
\r
417 v3 = _mm_shufflelo_epi16(v3, 0xb1);
\r
420 v2 = _mm_or_si128(_mm_srli_epi16(v2, 8), _mm_slli_epi16(v2, 8));
\r
421 v3 = _mm_or_si128(_mm_srli_epi16(v3, 8), _mm_slli_epi16(v3, 8));
\r
423 v1 = _mm_adds_epu8(v1, c1);
\r
424 v2 = _mm_adds_epu8(v2, c2);
\r
425 v3 = _mm_adds_epu8(v3, c2);
\r
426 v4 = _mm_adds_epu8(v4, c1);
\r
428 v1 = _mm_min_epu8(v1, v2);
\r
429 v4 = _mm_min_epu8(v3, v4);
\r
431 _mm_store_si128(&prices16B[m], v1);
\r
432 _mm_store_si128(&prices16B[altm], v4);
\r
434 minp = _mm_min_epu8(minp, _mm_min_epu8(v1, v4));
\r
435 maxp = _mm_max_epu8(maxp, maxLessThan255(v1, v4));
\r
438 v2 = _mm_cmpeq_epi8(v1, v2);
\r
439 v3 = _mm_cmpeq_epi8(v3, v4);
\r
440 path16[pathindex16 + m] = (u16)_mm_movemask_epi8(v2);
\r
441 path16[pathindex16 + altm] = (u16)_mm_movemask_epi8(v3);
\r
446 maxc = max16B(maxp);
\r
447 minc = min16B(minp);
\r
452 register __m128i mask = _mm_set1_epi32(0xffffffff);
\r
453 register __m128i m = _mm_set1_epi8(minc);
\r
454 for(i = 0; i < sseheight; i++) {
\r
455 register __m128i res;
\r
456 register __m128i pr = prices16B[i];
\r
457 res = _mm_andnot_si128(_mm_cmpeq_epi8(pr, mask), m);
\r
458 prices16B[i] = _mm_sub_epi8(pr, res);
\r
463 pathindex += parts;
\r
464 pathindex16 += parts << 1;
\r
468 register __m128i mask = _mm_set1_epi32(0x00ff00ff);
\r
471 aligned_free(path);
\r
479 mexErrMsgTxt("The syndrome is not in the syndrome matrix range.\n");
\r
483 if(syndrome[index2] == 0) {
\r
484 for(i = 0, l = 0; i < sseheight; i += 2, l++) {
\r
485 _mm_store_si128(&prices16B[l], _mm_packus_epi16(_mm_and_si128(_mm_load_si128(&prices16B[i]), mask),
\r
486 _mm_and_si128(_mm_load_si128(&prices16B[i + 1]), mask)));
\r
489 for(i = 0, l = 0; i < sseheight; i += 2, l++) {
\r
490 _mm_store_si128(&prices16B[l], _mm_packus_epi16(_mm_and_si128(_mm_srli_si128(_mm_load_si128(&prices16B[i]), 1), mask),
\r
491 _mm_and_si128(_mm_srli_si128(_mm_load_si128(&prices16B[i + 1]), 1), mask)));
\r
495 if(syndromelength - index2 <= matrixheight)
\r
498 register __m128i fillval = _mm_set1_epi32(0xffffffff);
\r
499 for(; l < sseheight; l++)
\r
500 _mm_store_si128(&prices16B[l], fillval);
\r
504 totalprice = subprice + prices[0];
\r
506 aligned_free(prices);
\r
515 size[0] = vectorlength;
\r
518 closest = mxCreateNumericArray(2, size, mxUINT8_CLASS, mxREAL);
\r
519 closestData = (u8*)mxGetPr(closest);
\r
521 pathindex -= parts;
\r
526 int h = syndromelength;
\r
529 for(; index2 >= 0; index2--) {
\r
530 for(k = widths[index2] - 1; k >= 0; k--, index--) {
\r
531 if(k == widths[index2] - 1) {
\r
532 state = (state << 1) | syndrome[index2];
\r
533 if(syndromelength - index2 <= matrixheight)
\r
534 colmask = (colmask << 1) | 1;
\r
537 if(path[pathindex + (state >> 5)] & (1 << (state & 31))) {
\r
538 closestData[index] = 1;
\r
539 state = state ^ (columns[matrices[index2]][k] & colmask);
\r
541 closestData[index] = 0;
\r
544 pathindex -= parts;
\r
547 plhs[MATLAB_out_stego] = closest;
\r
560 size[0] = size[1] = 1;
\r
561 arr = mxCreateNumericArray(2, size, mxDOUBLE_CLASS, mxREAL);
\r
562 (mxGetPr(arr))[0] = totalprice;
\r
563 plhs[MATLAB_out_price] = arr;
\r