2 //gcc aes1.c pixmap_io.o -o aes1 -maes -msse2 -msse4 -O3
8 #include <stdint.h> //for int8_t
9 #include <string.h> //for memcmp
10 #include <wmmintrin.h> //for intrinsics for AES-NI
12 //compile using gcc and following arguments: -g;-O0;-Wall;-msse2;-msse;-march=native;-maes
13 #include "pixmap_io.h"
15 typedef unsigned char uchar;
22 void print128_num(__m128i var)
24 uint8_t *val = (uint8_t*) &var;
25 /* printf("Numerical: %.2x %.2x %.2x %.2x %.2x %.2x %.2x %.2x %.2x %.2x %.2x %.2x %.2x %.2x %.2x %.2x \n", val[3], val[2], val[1], val[0], val[7], val[6],
26 val[5], val[4], val[11], val[10], val[9], val[8], val[15], val[14],
28 printf("Numerical: %d %d %d %d %d %d %d %d %d %d %d %d %d %d %d %d \n",
29 val[0], val[1], val[2], val[3], val[4], val[5],
30 val[6], val[7], val[8], val[9], val[10], val[11], val[12], val[13],
37 struct timeval tstart;
38 gettimeofday(&tstart,0);
39 return( (double) (tstart.tv_sec + tstart.tv_usec*1e-6) );
42 double TimeStop(double t)
46 gettimeofday(&tend,0);
47 t = (double) (tend.tv_sec + tend.tv_usec*1e-6) - t;
56 #define DO_ENC_BLOCK(m,k) \
58 m = _mm_xor_si128 (m, k[ 0]); \
59 m = _mm_aesenc_si128 (m, k[ 1]); \
60 m = _mm_aesenc_si128 (m, k[ 2]); \
61 m = _mm_aesenc_si128 (m, k[ 3]); \
62 m = _mm_aesenc_si128 (m, k[ 4]); \
63 m = _mm_aesenc_si128 (m, k[ 5]); \
64 m = _mm_aesenc_si128 (m, k[ 6]); \
65 m = _mm_aesenc_si128 (m, k[ 7]); \
66 m = _mm_aesenc_si128 (m, k[ 8]); \
67 m = _mm_aesenc_si128 (m, k[ 9]); \
68 m = _mm_aesenclast_si128(m, k[10]);\
71 #define DO_DEC_BLOCK(m,k) \
73 m = _mm_xor_si128 (m, k[10+0]); \
74 m = _mm_aesdec_si128 (m, k[10+1]); \
75 m = _mm_aesdec_si128 (m, k[10+2]); \
76 m = _mm_aesdec_si128 (m, k[10+3]); \
77 m = _mm_aesdec_si128 (m, k[10+4]); \
78 m = _mm_aesdec_si128 (m, k[10+5]); \
79 m = _mm_aesdec_si128 (m, k[10+6]); \
80 m = _mm_aesdec_si128 (m, k[10+7]); \
81 m = _mm_aesdec_si128 (m, k[10+8]); \
82 m = _mm_aesdec_si128 (m, k[10+9]); \
83 m = _mm_aesdeclast_si128(m, k[0]);\
86 #define AES_128_key_exp(k, rcon) aes_128_key_expansion(k, _mm_aeskeygenassist_si128(k, rcon))
88 static __m128i aes_128_key_expansion(__m128i key, __m128i keygened){
89 keygened = _mm_shuffle_epi32(keygened, _MM_SHUFFLE(3,3,3,3));
90 key = _mm_xor_si128(key, _mm_slli_si128(key, 4));
91 key = _mm_xor_si128(key, _mm_slli_si128(key, 4));
92 key = _mm_xor_si128(key, _mm_slli_si128(key, 4));
93 return _mm_xor_si128(key, keygened);
97 static void aes128_load_key_enc_only(uint8_t *enc_key, __m128i *key_schedule){
98 key_schedule[0] = _mm_loadu_si128((const __m128i*) enc_key);
99 // print128_num( key_schedule[0]);
100 key_schedule[1] = AES_128_key_exp(key_schedule[0], 0x01);
101 // print128_num( key_schedule[1]);
102 key_schedule[2] = AES_128_key_exp(key_schedule[1], 0x02);
103 // print128_num( key_schedule[2]);
104 key_schedule[3] = AES_128_key_exp(key_schedule[2], 0x04);
105 // print128_num( key_schedule[3]);
106 key_schedule[4] = AES_128_key_exp(key_schedule[3], 0x08);
107 // print128_num( key_schedule[4]);
108 key_schedule[5] = AES_128_key_exp(key_schedule[4], 0x10);
109 // print128_num( key_schedule[5]);
110 key_schedule[6] = AES_128_key_exp(key_schedule[5], 0x20);
111 // print128_num( key_schedule[6]);
112 key_schedule[7] = AES_128_key_exp(key_schedule[6], 0x40);
113 // print128_num( key_schedule[7]);
114 key_schedule[8] = AES_128_key_exp(key_schedule[7], 0x80);
115 // print128_num( key_schedule[8]);
116 key_schedule[9] = AES_128_key_exp(key_schedule[8], 0x1B);
117 // print128_num( key_schedule[9]);
118 key_schedule[10] = AES_128_key_exp(key_schedule[9], 0x36);
119 // print128_num( key_schedule[10]);
124 static void aes128_load_key_dec_only(__m128i *key_schedule){
126 key_schedule[11] = _mm_aesimc_si128(key_schedule[9]);
127 // print128_num( key_schedule[11]);
128 key_schedule[12] = _mm_aesimc_si128(key_schedule[8]);
129 // print128_num( key_schedule[12]);
130 key_schedule[13] = _mm_aesimc_si128(key_schedule[7]);
131 // print128_num( key_schedule[13]);
132 key_schedule[14] = _mm_aesimc_si128(key_schedule[6]);
133 // print128_num( key_schedule[14]);
134 key_schedule[15] = _mm_aesimc_si128(key_schedule[5]);
135 // print128_num( key_schedule[15]);
136 key_schedule[16] = _mm_aesimc_si128(key_schedule[4]);
137 // print128_num( key_schedule[16]);
138 key_schedule[17] = _mm_aesimc_si128(key_schedule[3]);
139 // print128_num( key_schedule[17]);
140 key_schedule[18] = _mm_aesimc_si128(key_schedule[2]);
141 // print128_num( key_schedule[18]);
142 key_schedule[19] = _mm_aesimc_si128(key_schedule[1]);
143 // print128_num( key_schedule[19]);
147 static void aes128_load_key(uint8_t *enc_key, __m128i *key_schedule){
148 aes128_load_key_enc_only(enc_key, key_schedule);
150 // generate decryption keys in reverse order.
151 // k[10] is shared by last encryption and first decryption rounds
152 // k[0] is shared by first encryption round and last decryption round (and is the original user key)
153 // For some implementation reasons, decryption key schedule is NOT the encryption key schedule in reverse order
155 aes128_load_key_dec_only(key_schedule);
163 static void aes128_enc(__m128i *key_schedule, uint8_t *plainText,uint8_t *cipherText){
164 __m128i m = _mm_loadu_si128((__m128i *) plainText);
166 DO_ENC_BLOCK(m,key_schedule);
168 _mm_storeu_si128((__m128i *) cipherText, m);
171 static void aes128_dec(__m128i *key_schedule, uint8_t *cipherText,uint8_t *plainText){
172 __m128i m = _mm_loadu_si128((__m128i *) cipherText);
174 DO_DEC_BLOCK(m,key_schedule);
176 _mm_storeu_si128((__m128i *) plainText, m);
189 //return 0 if no error
190 //1 if encryption failed
191 //2 if decryption failed
193 static int aes128_self_test(void){
194 uint8_t plain[] = {0x32, 0x43, 0xf6, 0xa8, 0x88, 0x5a, 0x30, 0x8d, 0x31, 0x31, 0x98, 0xa2, 0xe0, 0x37, 0x07, 0x34};
195 uint8_t enc_key[] = {0x2b, 0x7e, 0x15, 0x16, 0x28, 0xae, 0xd2, 0xa6, 0xab, 0xf7, 0x15, 0x88, 0x09, 0xcf, 0x4f, 0x3c};
196 uint8_t cipher[] = {0x39, 0x25, 0x84, 0x1d, 0x02, 0xdc, 0x09, 0xfb, 0xdc, 0x11, 0x85, 0x97, 0x19, 0x6a, 0x0b, 0x32};
197 uint8_t computed_cipher[16];
198 uint8_t computed_plain[16];
200 __m128i key_schedule[20];
201 aes128_load_key(enc_key,key_schedule);
202 aes128_enc(key_schedule,plain,computed_cipher);
203 aes128_dec(key_schedule,cipher,computed_plain);
204 if(memcmp(cipher,computed_cipher,sizeof(cipher))) out=1;
205 if(memcmp(plain,computed_plain,sizeof(plain))) out|=2;
214 //0 1 2 3 4 5 6 7 8 9 A B C D E F
215 0x63, 0x7c, 0x77, 0x7b, 0xf2, 0x6b, 0x6f, 0xc5, 0x30, 0x01, 0x67, 0x2b, 0xfe, 0xd7, 0xab, 0x76, //0
216 0xca, 0x82, 0xc9, 0x7d, 0xfa, 0x59, 0x47, 0xf0, 0xad, 0xd4, 0xa2, 0xaf, 0x9c, 0xa4, 0x72, 0xc0, //1
217 0xb7, 0xfd, 0x93, 0x26, 0x36, 0x3f, 0xf7, 0xcc, 0x34, 0xa5, 0xe5, 0xf1, 0x71, 0xd8, 0x31, 0x15, //2
218 0x04, 0xc7, 0x23, 0xc3, 0x18, 0x96, 0x05, 0x9a, 0x07, 0x12, 0x80, 0xe2, 0xeb, 0x27, 0xb2, 0x75, //3
219 0x09, 0x83, 0x2c, 0x1a, 0x1b, 0x6e, 0x5a, 0xa0, 0x52, 0x3b, 0xd6, 0xb3, 0x29, 0xe3, 0x2f, 0x84, //4
220 0x53, 0xd1, 0x00, 0xed, 0x20, 0xfc, 0xb1, 0x5b, 0x6a, 0xcb, 0xbe, 0x39, 0x4a, 0x4c, 0x58, 0xcf, //5
221 0xd0, 0xef, 0xaa, 0xfb, 0x43, 0x4d, 0x33, 0x85, 0x45, 0xf9, 0x02, 0x7f, 0x50, 0x3c, 0x9f, 0xa8, //6
222 0x51, 0xa3, 0x40, 0x8f, 0x92, 0x9d, 0x38, 0xf5, 0xbc, 0xb6, 0xda, 0x21, 0x10, 0xff, 0xf3, 0xd2, //7
223 0xcd, 0x0c, 0x13, 0xec, 0x5f, 0x97, 0x44, 0x17, 0xc4, 0xa7, 0x7e, 0x3d, 0x64, 0x5d, 0x19, 0x73, //8
224 0x60, 0x81, 0x4f, 0xdc, 0x22, 0x2a, 0x90, 0x88, 0x46, 0xee, 0xb8, 0x14, 0xde, 0x5e, 0x0b, 0xdb, //9
225 0xe0, 0x32, 0x3a, 0x0a, 0x49, 0x06, 0x24, 0x5c, 0xc2, 0xd3, 0xac, 0x62, 0x91, 0x95, 0xe4, 0x79, //A
226 0xe7, 0xc8, 0x37, 0x6d, 0x8d, 0xd5, 0x4e, 0xa9, 0x6c, 0x56, 0xf4, 0xea, 0x65, 0x7a, 0xae, 0x08, //B
227 0xba, 0x78, 0x25, 0x2e, 0x1c, 0xa6, 0xb4, 0xc6, 0xe8, 0xdd, 0x74, 0x1f, 0x4b, 0xbd, 0x8b, 0x8a, //C
228 0x70, 0x3e, 0xb5, 0x66, 0x48, 0x03, 0xf6, 0x0e, 0x61, 0x35, 0x57, 0xb9, 0x86, 0xc1, 0x1d, 0x9e, //D
229 0xe1, 0xf8, 0x98, 0x11, 0x69, 0xd9, 0x8e, 0x94, 0x9b, 0x1e, 0x87, 0xe9, 0xce, 0x55, 0x28, 0xdf, //E
230 0x8c, 0xa1, 0x89, 0x0d, 0xbf, 0xe6, 0x42, 0x68, 0x41, 0x99, 0x2d, 0x0f, 0xb0, 0x54, 0xbb, 0x16 }; //F
234 0x8d, 0x01, 0x02, 0x04, 0x08, 0x10, 0x20, 0x40, 0x80, 0x1b, 0x36, 0x6c, 0xd8, 0xab, 0x4d, 0x9a,
235 0x2f, 0x5e, 0xbc, 0x63, 0xc6, 0x97, 0x35, 0x6a, 0xd4, 0xb3, 0x7d, 0xfa, 0xef, 0xc5, 0x91, 0x39,
236 0x72, 0xe4, 0xd3, 0xbd, 0x61, 0xc2, 0x9f, 0x25, 0x4a, 0x94, 0x33, 0x66, 0xcc, 0x83, 0x1d, 0x3a,
237 0x74, 0xe8, 0xcb, 0x8d, 0x01, 0x02, 0x04, 0x08, 0x10, 0x20, 0x40, 0x80, 0x1b, 0x36, 0x6c, 0xd8,
238 0xab, 0x4d, 0x9a, 0x2f, 0x5e, 0xbc, 0x63, 0xc6, 0x97, 0x35, 0x6a, 0xd4, 0xb3, 0x7d, 0xfa, 0xef,
239 0xc5, 0x91, 0x39, 0x72, 0xe4, 0xd3, 0xbd, 0x61, 0xc2, 0x9f, 0x25, 0x4a, 0x94, 0x33, 0x66, 0xcc,
240 0x83, 0x1d, 0x3a, 0x74, 0xe8, 0xcb, 0x8d, 0x01, 0x02, 0x04, 0x08, 0x10, 0x20, 0x40, 0x80, 0x1b,
241 0x36, 0x6c, 0xd8, 0xab, 0x4d, 0x9a, 0x2f, 0x5e, 0xbc, 0x63, 0xc6, 0x97, 0x35, 0x6a, 0xd4, 0xb3,
242 0x7d, 0xfa, 0xef, 0xc5, 0x91, 0x39, 0x72, 0xe4, 0xd3, 0xbd, 0x61, 0xc2, 0x9f, 0x25, 0x4a, 0x94,
243 0x33, 0x66, 0xcc, 0x83, 0x1d, 0x3a, 0x74, 0xe8, 0xcb, 0x8d, 0x01, 0x02, 0x04, 0x08, 0x10, 0x20,
244 0x40, 0x80, 0x1b, 0x36, 0x6c, 0xd8, 0xab, 0x4d, 0x9a, 0x2f, 0x5e, 0xbc, 0x63, 0xc6, 0x97, 0x35,
245 0x6a, 0xd4, 0xb3, 0x7d, 0xfa, 0xef, 0xc5, 0x91, 0x39, 0x72, 0xe4, 0xd3, 0xbd, 0x61, 0xc2, 0x9f,
246 0x25, 0x4a, 0x94, 0x33, 0x66, 0xcc, 0x83, 0x1d, 0x3a, 0x74, 0xe8, 0xcb, 0x8d, 0x01, 0x02, 0x04,
247 0x08, 0x10, 0x20, 0x40, 0x80, 0x1b, 0x36, 0x6c, 0xd8, 0xab, 0x4d, 0x9a, 0x2f, 0x5e, 0xbc, 0x63,
248 0xc6, 0x97, 0x35, 0x6a, 0xd4, 0xb3, 0x7d, 0xfa, 0xef, 0xc5, 0x91, 0x39, 0x72, 0xe4, 0xd3, 0xbd,
249 0x61, 0xc2, 0x9f, 0x25, 0x4a, 0x94, 0x33, 0x66, 0xcc, 0x83, 0x1d, 0x3a, 0x74, 0xe8, 0xcb
255 void KeyExpansion( unsigned char *RoundKey, unsigned char Key[], int Nr)
258 unsigned char temp[4],k;
261 // The first round key is the key itself.
264 RoundKey[j*4]=Key[j*4];
265 RoundKey[j*4+1]=Key[j*4+1];
266 RoundKey[j*4+2]=Key[j*4+2];
267 RoundKey[j*4+3]=Key[j*4+3];
270 // All other round keys are found from the previous round keys.
271 while (j < (4 * (Nr+1)))
275 temp[i]=RoundKey[(j-1) * 4 + i];
279 // This function rotates the 4 bytes in a word to the left once.
280 // [a0,a1,a2,a3] becomes [a1,a2,a3,a0]
282 // Function RotWord()
291 // SubWord() is a function that takes a four-byte input word and
292 // applies the S-box to each of the four bytes to produce an output word.
294 // Function Subword()
296 temp[0]=sbox[temp[0]];
297 temp[1]=sbox[temp[1]];
298 temp[2]=sbox[temp[2]];
299 temp[3]=sbox[temp[3]];
302 temp[0] = temp[0] ^ Rcon[j/Nk];
304 else if ((Nk > 6) && ((j % Nk) == 4))
306 // Function Subword()
308 temp[0]=sbox[temp[0]];
309 temp[1]=sbox[temp[1]];
310 temp[2]=sbox[temp[2]];
311 temp[3]=sbox[temp[3]];
314 RoundKey[j*4+0] = RoundKey[(j-Nk)*4+0] ^ temp[0];
315 RoundKey[j*4+1] = RoundKey[(j-Nk)*4+1] ^ temp[1];
316 RoundKey[j*4+2] = RoundKey[(j-Nk)*4+2] ^ temp[2];
317 RoundKey[j*4+3] = RoundKey[(j-Nk)*4+3] ^ temp[3];
324 void rc4key(uchar *key, uchar *sc, int size_DK) {
326 for(int i=0;i<256;i++) {
332 for(int i0=0; i0<256; i0++) {
333 j0 = (j0 + sc[i0] + key[i0&(size_DK-1)] );
345 static void test_aes128(void){
348 uchar *data_R, *data_G, *data_B;
349 // load_RGB_pixmap("lena.ppm", &width, &height, &data_R, &data_G, &data_B);
350 // load_RGB_pixmap("No_ecb_mode_picture.ppm", &width, &height, &data_R, &data_G, &data_B);
351 load_RGB_pixmap("lena.ppm", &width, &height, &data_R, &data_G, &data_B);
355 int size=width*height*3;
357 uint8_t *plain=malloc(sizeof(uint8_t)*size);
358 uint8_t *cipher=malloc(sizeof(uint8_t)*size);
359 uint8_t *decipher=malloc(sizeof(uint8_t)*size);
362 int oneD=width*height;
363 for(int i=0;i<oneD;i++) {
365 plain[oneD+i]=data_G[i];
366 plain[2*oneD+i]=data_B[i];
374 for(int i=0;i<16;i++) {
375 enc_key[i]=lrand48();
376 printf("%d ",enc_key[i]);
383 __m128i my_key_schedule[20];
384 unsigned char RoundKey[240];
385 /* for(int i=0;i<240;i++) {
391 const int nb_times_init=1000000;
394 double t=TimeStart();
395 for(int i=0;i<nb_times_init;i++) {
396 rc4key(enc_key,sbox,16);
399 printf("init Sbox generation %f\n",time);
403 for(int i=0;i<nb_times_init;i++) {
404 KeyExpansion(RoundKey, enc_key, 10);
405 // printf("MY KEYS\n");
407 for (size_t i=0; i<11; ++i) {
408 my_key_schedule[i] = _mm_loadu_si128((const __m128i*) &RoundKey[16*i]);
409 //print128_num(my_key_schedule[i]);
411 aes128_load_key_dec_only(my_key_schedule);
414 printf("init our Key schedule aes %f\n",time);
418 printf("key generated with sbox :\n");
419 for(int a=0;a<11;a++) {
420 for(int i=0;i<16;i++) {
421 printf("%d ",RoundKey[16*a+i]);
427 __m128i key_schedule[20];
434 // printf("AES KEYS\n");
438 for(int i=0;i<nb_times_init;i++) {
439 aes128_load_key(enc_key,key_schedule);
442 printf("init normal aes %f\n",time);
447 for(int a=0;a<nb_test;a++) {
449 for(int i=0;i<size;i+=16) {
450 aes128_enc(my_key_schedule,&plain[i],&cipher[i]);
457 printf("Time encrypt %f\n",time);
459 for(int i=0;i<oneD;i++) {
461 data_G[i]=cipher[oneD+i];
462 data_B[i]=cipher[2*oneD+i];
464 store_RGB_pixmap("lena2.ppm", data_R, data_G, data_B, width, height);
472 for(int a=0;a<nb_test;a++) {
473 for(int i=0;i<size;i+=16) {
474 aes128_dec(my_key_schedule,&cipher[i],&decipher[i]);
479 printf("Time decrypt %f\n",time);
482 for(int i=0;i<oneD;i++) {
483 data_R[i]=decipher[i];
484 data_G[i]=decipher[oneD+i];
485 data_B[i]=decipher[2*oneD+i];
487 store_RGB_pixmap("lena3.ppm", data_R, data_G, data_B, width, height);
498 int main(int argc, char** argv) {
500 for(int i=1; i<argc; i++){
501 if(strncmp(argv[i],"nb",2)==0) nb_test = atoi(&(argv[i][2])); //nb of test
502 if(strncmp(argv[i],"seed",2)==0) seed = atoi(&(argv[i][4])); //nb of test