2 //gcc aes1.c pixmap_io.o -o aes1 -maes -msse2 -msse4 -O3
8 #include <stdint.h> //for int8_t
9 #include <string.h> //for memcmp
10 #include <wmmintrin.h> //for intrinsics for AES-NI
12 //compile using gcc and following arguments: -g;-O0;-Wall;-msse2;-msse;-march=native;-maes
13 #include "pixmap_io.h"
15 typedef unsigned char uchar;
22 void print128_num(__m128i var)
24 uint8_t *val = (uint8_t*) &var;
25 /* printf("Numerical: %.2x %.2x %.2x %.2x %.2x %.2x %.2x %.2x %.2x %.2x %.2x %.2x %.2x %.2x %.2x %.2x \n", val[3], val[2], val[1], val[0], val[7], val[6],
26 val[5], val[4], val[11], val[10], val[9], val[8], val[15], val[14],
28 printf("Numerical: %d %d %d %d %d %d %d %d %d %d %d %d %d %d %d %d \n",
29 val[0], val[1], val[2], val[3], val[4], val[5],
30 val[6], val[7], val[8], val[9], val[10], val[11], val[12], val[13],
37 struct timeval tstart;
38 gettimeofday(&tstart,0);
39 return( (double) (tstart.tv_sec + tstart.tv_usec*1e-6) );
42 double TimeStop(double t)
46 gettimeofday(&tend,0);
47 t = (double) (tend.tv_sec + tend.tv_usec*1e-6) - t;
56 #define DO_ENC_BLOCK(m,k) \
58 m = _mm_xor_si128 (m, k[ 0]); \
59 m = _mm_aesenc_si128 (m, k[ 1]); \
60 m = _mm_aesenc_si128 (m, k[ 2]); \
61 m = _mm_aesenc_si128 (m, k[ 3]); \
62 m = _mm_aesenc_si128 (m, k[ 4]); \
63 m = _mm_aesenc_si128 (m, k[ 5]); \
64 m = _mm_aesenc_si128 (m, k[ 6]); \
65 m = _mm_aesenc_si128 (m, k[ 7]); \
66 m = _mm_aesenc_si128 (m, k[ 8]); \
67 m = _mm_aesenc_si128 (m, k[ 9]); \
68 m = _mm_aesenclast_si128(m, k[10]);\
71 #define DO_DEC_BLOCK(m,k) \
73 m = _mm_xor_si128 (m, k[10+0]); \
74 m = _mm_aesdec_si128 (m, k[10+1]); \
75 m = _mm_aesdec_si128 (m, k[10+2]); \
76 m = _mm_aesdec_si128 (m, k[10+3]); \
77 m = _mm_aesdec_si128 (m, k[10+4]); \
78 m = _mm_aesdec_si128 (m, k[10+5]); \
79 m = _mm_aesdec_si128 (m, k[10+6]); \
80 m = _mm_aesdec_si128 (m, k[10+7]); \
81 m = _mm_aesdec_si128 (m, k[10+8]); \
82 m = _mm_aesdec_si128 (m, k[10+9]); \
83 m = _mm_aesdeclast_si128(m, k[0]);\
86 #define AES_128_key_exp(k, rcon) aes_128_key_expansion(k, _mm_aeskeygenassist_si128(k, rcon))
88 static __m128i aes_128_key_expansion(__m128i key, __m128i keygened){
89 keygened = _mm_shuffle_epi32(keygened, _MM_SHUFFLE(3,3,3,3));
90 key = _mm_xor_si128(key, _mm_slli_si128(key, 4));
91 key = _mm_xor_si128(key, _mm_slli_si128(key, 4));
92 key = _mm_xor_si128(key, _mm_slli_si128(key, 4));
93 return _mm_xor_si128(key, keygened);
97 static void aes128_load_key_enc_only(uint8_t *enc_key, __m128i *key_schedule){
98 key_schedule[0] = _mm_loadu_si128((const __m128i*) enc_key);
99 // print128_num( key_schedule[0]);
100 key_schedule[1] = AES_128_key_exp(key_schedule[0], 0x01);
101 // print128_num( key_schedule[1]);
102 key_schedule[2] = AES_128_key_exp(key_schedule[1], 0x02);
103 // print128_num( key_schedule[2]);
104 key_schedule[3] = AES_128_key_exp(key_schedule[2], 0x04);
105 // print128_num( key_schedule[3]);
106 key_schedule[4] = AES_128_key_exp(key_schedule[3], 0x08);
107 // print128_num( key_schedule[4]);
108 key_schedule[5] = AES_128_key_exp(key_schedule[4], 0x10);
109 // print128_num( key_schedule[5]);
110 key_schedule[6] = AES_128_key_exp(key_schedule[5], 0x20);
111 // print128_num( key_schedule[6]);
112 key_schedule[7] = AES_128_key_exp(key_schedule[6], 0x40);
113 // print128_num( key_schedule[7]);
114 key_schedule[8] = AES_128_key_exp(key_schedule[7], 0x80);
115 // print128_num( key_schedule[8]);
116 key_schedule[9] = AES_128_key_exp(key_schedule[8], 0x1B);
117 // print128_num( key_schedule[9]);
118 key_schedule[10] = AES_128_key_exp(key_schedule[9], 0x36);
119 // print128_num( key_schedule[10]);
124 static void aes128_load_key_dec_only(__m128i *key_schedule){
126 key_schedule[11] = _mm_aesimc_si128(key_schedule[9]);
127 // print128_num( key_schedule[11]);
128 key_schedule[12] = _mm_aesimc_si128(key_schedule[8]);
129 // print128_num( key_schedule[12]);
130 key_schedule[13] = _mm_aesimc_si128(key_schedule[7]);
131 // print128_num( key_schedule[13]);
132 key_schedule[14] = _mm_aesimc_si128(key_schedule[6]);
133 // print128_num( key_schedule[14]);
134 key_schedule[15] = _mm_aesimc_si128(key_schedule[5]);
135 // print128_num( key_schedule[15]);
136 key_schedule[16] = _mm_aesimc_si128(key_schedule[4]);
137 // print128_num( key_schedule[16]);
138 key_schedule[17] = _mm_aesimc_si128(key_schedule[3]);
139 // print128_num( key_schedule[17]);
140 key_schedule[18] = _mm_aesimc_si128(key_schedule[2]);
141 // print128_num( key_schedule[18]);
142 key_schedule[19] = _mm_aesimc_si128(key_schedule[1]);
143 // print128_num( key_schedule[19]);
147 static void aes128_load_key(uint8_t *enc_key, __m128i *key_schedule){
148 aes128_load_key_enc_only(enc_key, key_schedule);
150 // generate decryption keys in reverse order.
151 // k[10] is shared by last encryption and first decryption rounds
152 // k[0] is shared by first encryption round and last decryption round (and is the original user key)
153 // For some implementation reasons, decryption key schedule is NOT the encryption key schedule in reverse order
155 aes128_load_key_dec_only(key_schedule);
163 static void aes128_enc(__m128i *key_schedule, uint8_t *plainText,uint8_t *cipherText){
164 __m128i m = _mm_loadu_si128((__m128i *) plainText);
166 DO_ENC_BLOCK(m,key_schedule);
168 _mm_storeu_si128((__m128i *) cipherText, m);
171 static void aes128_dec(__m128i *key_schedule, uint8_t *cipherText,uint8_t *plainText){
172 __m128i m = _mm_loadu_si128((__m128i *) cipherText);
174 DO_DEC_BLOCK(m,key_schedule);
176 _mm_storeu_si128((__m128i *) plainText, m);
179 //return 0 if no error
180 //1 if encryption failed
181 //2 if decryption failed
183 static int aes128_self_test(void){
184 uint8_t plain[] = {0x32, 0x43, 0xf6, 0xa8, 0x88, 0x5a, 0x30, 0x8d, 0x31, 0x31, 0x98, 0xa2, 0xe0, 0x37, 0x07, 0x34};
185 uint8_t enc_key[] = {0x2b, 0x7e, 0x15, 0x16, 0x28, 0xae, 0xd2, 0xa6, 0xab, 0xf7, 0x15, 0x88, 0x09, 0xcf, 0x4f, 0x3c};
186 uint8_t cipher[] = {0x39, 0x25, 0x84, 0x1d, 0x02, 0xdc, 0x09, 0xfb, 0xdc, 0x11, 0x85, 0x97, 0x19, 0x6a, 0x0b, 0x32};
187 uint8_t computed_cipher[16];
188 uint8_t computed_plain[16];
190 __m128i key_schedule[20];
191 aes128_load_key(enc_key,key_schedule);
192 aes128_enc(key_schedule,plain,computed_cipher);
193 aes128_dec(key_schedule,cipher,computed_plain);
194 if(memcmp(cipher,computed_cipher,sizeof(cipher))) out=1;
195 if(memcmp(plain,computed_plain,sizeof(plain))) out|=2;
204 //0 1 2 3 4 5 6 7 8 9 A B C D E F
205 0x63, 0x7c, 0x77, 0x7b, 0xf2, 0x6b, 0x6f, 0xc5, 0x30, 0x01, 0x67, 0x2b, 0xfe, 0xd7, 0xab, 0x76, //0
206 0xca, 0x82, 0xc9, 0x7d, 0xfa, 0x59, 0x47, 0xf0, 0xad, 0xd4, 0xa2, 0xaf, 0x9c, 0xa4, 0x72, 0xc0, //1
207 0xb7, 0xfd, 0x93, 0x26, 0x36, 0x3f, 0xf7, 0xcc, 0x34, 0xa5, 0xe5, 0xf1, 0x71, 0xd8, 0x31, 0x15, //2
208 0x04, 0xc7, 0x23, 0xc3, 0x18, 0x96, 0x05, 0x9a, 0x07, 0x12, 0x80, 0xe2, 0xeb, 0x27, 0xb2, 0x75, //3
209 0x09, 0x83, 0x2c, 0x1a, 0x1b, 0x6e, 0x5a, 0xa0, 0x52, 0x3b, 0xd6, 0xb3, 0x29, 0xe3, 0x2f, 0x84, //4
210 0x53, 0xd1, 0x00, 0xed, 0x20, 0xfc, 0xb1, 0x5b, 0x6a, 0xcb, 0xbe, 0x39, 0x4a, 0x4c, 0x58, 0xcf, //5
211 0xd0, 0xef, 0xaa, 0xfb, 0x43, 0x4d, 0x33, 0x85, 0x45, 0xf9, 0x02, 0x7f, 0x50, 0x3c, 0x9f, 0xa8, //6
212 0x51, 0xa3, 0x40, 0x8f, 0x92, 0x9d, 0x38, 0xf5, 0xbc, 0xb6, 0xda, 0x21, 0x10, 0xff, 0xf3, 0xd2, //7
213 0xcd, 0x0c, 0x13, 0xec, 0x5f, 0x97, 0x44, 0x17, 0xc4, 0xa7, 0x7e, 0x3d, 0x64, 0x5d, 0x19, 0x73, //8
214 0x60, 0x81, 0x4f, 0xdc, 0x22, 0x2a, 0x90, 0x88, 0x46, 0xee, 0xb8, 0x14, 0xde, 0x5e, 0x0b, 0xdb, //9
215 0xe0, 0x32, 0x3a, 0x0a, 0x49, 0x06, 0x24, 0x5c, 0xc2, 0xd3, 0xac, 0x62, 0x91, 0x95, 0xe4, 0x79, //A
216 0xe7, 0xc8, 0x37, 0x6d, 0x8d, 0xd5, 0x4e, 0xa9, 0x6c, 0x56, 0xf4, 0xea, 0x65, 0x7a, 0xae, 0x08, //B
217 0xba, 0x78, 0x25, 0x2e, 0x1c, 0xa6, 0xb4, 0xc6, 0xe8, 0xdd, 0x74, 0x1f, 0x4b, 0xbd, 0x8b, 0x8a, //C
218 0x70, 0x3e, 0xb5, 0x66, 0x48, 0x03, 0xf6, 0x0e, 0x61, 0x35, 0x57, 0xb9, 0x86, 0xc1, 0x1d, 0x9e, //D
219 0xe1, 0xf8, 0x98, 0x11, 0x69, 0xd9, 0x8e, 0x94, 0x9b, 0x1e, 0x87, 0xe9, 0xce, 0x55, 0x28, 0xdf, //E
220 0x8c, 0xa1, 0x89, 0x0d, 0xbf, 0xe6, 0x42, 0x68, 0x41, 0x99, 0x2d, 0x0f, 0xb0, 0x54, 0xbb, 0x16 }; //F
224 0x8d, 0x01, 0x02, 0x04, 0x08, 0x10, 0x20, 0x40, 0x80, 0x1b, 0x36, 0x6c, 0xd8, 0xab, 0x4d, 0x9a,
225 0x2f, 0x5e, 0xbc, 0x63, 0xc6, 0x97, 0x35, 0x6a, 0xd4, 0xb3, 0x7d, 0xfa, 0xef, 0xc5, 0x91, 0x39,
226 0x72, 0xe4, 0xd3, 0xbd, 0x61, 0xc2, 0x9f, 0x25, 0x4a, 0x94, 0x33, 0x66, 0xcc, 0x83, 0x1d, 0x3a,
227 0x74, 0xe8, 0xcb, 0x8d, 0x01, 0x02, 0x04, 0x08, 0x10, 0x20, 0x40, 0x80, 0x1b, 0x36, 0x6c, 0xd8,
228 0xab, 0x4d, 0x9a, 0x2f, 0x5e, 0xbc, 0x63, 0xc6, 0x97, 0x35, 0x6a, 0xd4, 0xb3, 0x7d, 0xfa, 0xef,
229 0xc5, 0x91, 0x39, 0x72, 0xe4, 0xd3, 0xbd, 0x61, 0xc2, 0x9f, 0x25, 0x4a, 0x94, 0x33, 0x66, 0xcc,
230 0x83, 0x1d, 0x3a, 0x74, 0xe8, 0xcb, 0x8d, 0x01, 0x02, 0x04, 0x08, 0x10, 0x20, 0x40, 0x80, 0x1b,
231 0x36, 0x6c, 0xd8, 0xab, 0x4d, 0x9a, 0x2f, 0x5e, 0xbc, 0x63, 0xc6, 0x97, 0x35, 0x6a, 0xd4, 0xb3,
232 0x7d, 0xfa, 0xef, 0xc5, 0x91, 0x39, 0x72, 0xe4, 0xd3, 0xbd, 0x61, 0xc2, 0x9f, 0x25, 0x4a, 0x94,
233 0x33, 0x66, 0xcc, 0x83, 0x1d, 0x3a, 0x74, 0xe8, 0xcb, 0x8d, 0x01, 0x02, 0x04, 0x08, 0x10, 0x20,
234 0x40, 0x80, 0x1b, 0x36, 0x6c, 0xd8, 0xab, 0x4d, 0x9a, 0x2f, 0x5e, 0xbc, 0x63, 0xc6, 0x97, 0x35,
235 0x6a, 0xd4, 0xb3, 0x7d, 0xfa, 0xef, 0xc5, 0x91, 0x39, 0x72, 0xe4, 0xd3, 0xbd, 0x61, 0xc2, 0x9f,
236 0x25, 0x4a, 0x94, 0x33, 0x66, 0xcc, 0x83, 0x1d, 0x3a, 0x74, 0xe8, 0xcb, 0x8d, 0x01, 0x02, 0x04,
237 0x08, 0x10, 0x20, 0x40, 0x80, 0x1b, 0x36, 0x6c, 0xd8, 0xab, 0x4d, 0x9a, 0x2f, 0x5e, 0xbc, 0x63,
238 0xc6, 0x97, 0x35, 0x6a, 0xd4, 0xb3, 0x7d, 0xfa, 0xef, 0xc5, 0x91, 0x39, 0x72, 0xe4, 0xd3, 0xbd,
239 0x61, 0xc2, 0x9f, 0x25, 0x4a, 0x94, 0x33, 0x66, 0xcc, 0x83, 0x1d, 0x3a, 0x74, 0xe8, 0xcb
245 void KeyExpansion( unsigned char *RoundKey, unsigned char Key[], int Nr)
248 unsigned char temp[4],k;
251 // The first round key is the key itself.
254 RoundKey[j*4]=Key[j*4];
255 RoundKey[j*4+1]=Key[j*4+1];
256 RoundKey[j*4+2]=Key[j*4+2];
257 RoundKey[j*4+3]=Key[j*4+3];
260 // All other round keys are found from the previous round keys.
261 while (j < (4 * (Nr+1)))
265 temp[i]=RoundKey[(j-1) * 4 + i];
269 // This function rotates the 4 bytes in a word to the left once.
270 // [a0,a1,a2,a3] becomes [a1,a2,a3,a0]
272 // Function RotWord()
281 // SubWord() is a function that takes a four-byte input word and
282 // applies the S-box to each of the four bytes to produce an output word.
284 // Function Subword()
286 temp[0]=sbox[temp[0]];
287 temp[1]=sbox[temp[1]];
288 temp[2]=sbox[temp[2]];
289 temp[3]=sbox[temp[3]];
292 temp[0] = temp[0] ^ Rcon[j/Nk];
294 else if ((Nk > 6) && ((j % Nk) == 4))
296 // Function Subword()
298 temp[0]=sbox[temp[0]];
299 temp[1]=sbox[temp[1]];
300 temp[2]=sbox[temp[2]];
301 temp[3]=sbox[temp[3]];
304 RoundKey[j*4+0] = RoundKey[(j-Nk)*4+0] ^ temp[0];
305 RoundKey[j*4+1] = RoundKey[(j-Nk)*4+1] ^ temp[1];
306 RoundKey[j*4+2] = RoundKey[(j-Nk)*4+2] ^ temp[2];
307 RoundKey[j*4+3] = RoundKey[(j-Nk)*4+3] ^ temp[3];
314 void rc4key(uchar *key, uchar *sc, int size_DK) {
316 for(int i=0;i<256;i++) {
322 for(int i0=0; i0<256; i0++) {
323 j0 = (j0 + sc[i0] + key[i0&(size_DK-1)] );
335 static void test_aes128(void){
338 uchar *data_R, *data_G, *data_B;
339 // load_RGB_pixmap("lena.ppm", &width, &height, &data_R, &data_G, &data_B);
340 // load_RGB_pixmap("No_ecb_mode_picture.ppm", &width, &height, &data_R, &data_G, &data_B);
341 load_RGB_pixmap("lena.ppm", &width, &height, &data_R, &data_G, &data_B);
345 int size=width*height*3;
347 uint8_t *plain=malloc(sizeof(uint8_t)*size);
348 uint8_t *cipher=malloc(sizeof(uint8_t)*size);
349 uint8_t *decipher=malloc(sizeof(uint8_t)*size);
352 int oneD=width*height;
353 for(int i=0;i<oneD;i++) {
355 plain[oneD+i]=data_G[i];
356 plain[2*oneD+i]=data_B[i];
364 for(int i=0;i<16;i++) {
365 enc_key[i]=lrand48();
366 printf("%d ",enc_key[i]);
373 __m128i my_key_schedule[20];
374 unsigned char RoundKey[240];
375 /* for(int i=0;i<240;i++) {
381 const int nb_times_init=1;
384 double t=TimeStart();
385 for(int i=0;i<nb_times_init;i++) {
386 rc4key(enc_key,sbox,16);
387 KeyExpansion(RoundKey, enc_key, 10);
388 // printf("MY KEYS\n");
390 for (size_t i=0; i<11; ++i) {
391 my_key_schedule[i] = _mm_loadu_si128((const __m128i*) &RoundKey[16*i]);
392 //print128_num(my_key_schedule[i]);
394 aes128_load_key_dec_only(my_key_schedule);
397 printf("init our aes %f\n",time);
401 printf("key generated with sbox :\n");
402 for(int a=0;a<11;a++) {
403 for(int i=0;i<16;i++) {
404 printf("%d ",RoundKey[16*a+i]);
410 __m128i key_schedule[20];
417 // printf("AES KEYS\n");
421 for(int i=0;i<nb_times_init;i++) {
422 aes128_load_key(enc_key,key_schedule);
425 printf("init normal aes %f\n",time);
430 for(int a=0;a<nb_test;a++) {
432 for(int i=0;i<size;i+=16) {
433 aes128_enc(my_key_schedule,&plain[i],&cipher[i]);
440 printf("Time encrypt %f\n",time);
442 for(int i=0;i<oneD;i++) {
444 data_G[i]=cipher[oneD+i];
445 data_B[i]=cipher[2*oneD+i];
447 store_RGB_pixmap("lena2.ppm", data_R, data_G, data_B, width, height);
455 for(int a=0;a<nb_test;a++) {
456 for(int i=0;i<size;i+=16) {
457 aes128_dec(my_key_schedule,&cipher[i],&decipher[i]);
462 printf("Time decrypt %f\n",time);
465 for(int i=0;i<oneD;i++) {
466 data_R[i]=decipher[i];
467 data_G[i]=decipher[oneD+i];
468 data_B[i]=decipher[2*oneD+i];
470 store_RGB_pixmap("lena3.ppm", data_R, data_G, data_B, width, height);
481 int main(int argc, char** argv) {
483 for(int i=1; i<argc; i++){
484 if(strncmp(argv[i],"nb",2)==0) nb_test = atoi(&(argv[i][2])); //nb of test
485 if(strncmp(argv[i],"seed",2)==0) seed = atoi(&(argv[i][4])); //nb of test