19 #if (CRYPTOPP_SSSE3_AVAILABLE) 21 # include <pmmintrin.h> 22 # include <tmmintrin.h> 26 # include <ammintrin.h> 27 # if defined(__GNUC__) 28 # include <x86intrin.h> 32 #if (CRYPTOPP_ARM_NEON_HEADER) 34 # include <arm_neon.h> 37 #if (CRYPTOPP_ARM_ACLE_HEADER) 39 # include <arm_acle.h> 60 #undef CRYPTOPP_POWER8_AVAILABLE 61 #if defined(CRYPTOPP_POWER8_AVAILABLE) 67 extern const char LEA_SIMD_FNAME[] = __FILE__;
69 ANONYMOUS_NAMESPACE_BEGIN
75 #if (CRYPTOPP_ARM_NEON_AVAILABLE) 77 inline uint32x4_t Xor(
const uint32x4_t& a,
const uint32x4_t& b)
79 return veorq_u32(a, b);
82 inline uint32x4_t Add(
const uint32x4_t& a,
const uint32x4_t& b)
84 return vaddq_u32(a, b);
87 inline uint32x4_t Sub(
const uint32x4_t& a,
const uint32x4_t& b)
89 return vsubq_u32(a, b);
92 template <
unsigned int R>
93 inline uint32x4_t RotateLeft(
const uint32x4_t& val)
95 const uint32x4_t a(vshlq_n_u32(val, R));
96 const uint32x4_t b(vshrq_n_u32(val, 32 - R));
97 return vorrq_u32(a, b);
100 template <
unsigned int R>
101 inline uint32x4_t RotateRight(
const uint32x4_t& val)
103 const uint32x4_t a(vshlq_n_u32(val, 32 - R));
104 const uint32x4_t b(vshrq_n_u32(val, R));
105 return vorrq_u32(a, b);
108 #if defined(__aarch32__) || defined(__aarch64__) 110 inline uint32x4_t RotateLeft<8>(
const uint32x4_t& val)
112 #if (CRYPTOPP_BIG_ENDIAN) 113 const uint8_t maskb[16] = { 14,13,12,15, 10,9,8,11, 6,5,4,7, 2,1,0,3 };
114 const uint8x16_t mask = vld1q_u8(maskb);
116 const uint8_t maskb[16] = { 3,0,1,2, 7,4,5,6, 11,8,9,10, 15,12,13,14 };
117 const uint8x16_t mask = vld1q_u8(maskb);
120 return vreinterpretq_u32_u8(
121 vqtbl1q_u8(vreinterpretq_u8_u32(val), mask));
125 inline uint32x4_t RotateRight<8>(
const uint32x4_t& val)
127 #if (CRYPTOPP_BIG_ENDIAN) 128 const uint8_t maskb[16] = { 12,15,14,13, 8,11,10,9, 4,7,6,5, 0,3,2,1 };
129 const uint8x16_t mask = vld1q_u8(maskb);
131 const uint8_t maskb[16] = { 1,2,3,0, 5,6,7,4, 9,10,11,8, 13,14,14,12 };
132 const uint8x16_t mask = vld1q_u8(maskb);
135 return vreinterpretq_u32_u8(
136 vqtbl1q_u8(vreinterpretq_u8_u32(val), mask));
140 uint32x4_t UnpackLow32(uint32x4_t a, uint32x4_t b)
142 uint32x2_t a1 = vget_low_u32(a);
143 uint32x2_t b1 = vget_low_u32(b);
144 uint32x2x2_t result = vzip_u32(a1, b1);
145 return vcombine_u32(result.val[0], result.val[1]);
148 uint32x4_t UnpackHigh32(uint32x4_t a, uint32x4_t b)
150 uint32x2_t a1 = vget_high_u32(a);
151 uint32x2_t b1 = vget_high_u32(b);
152 uint32x2x2_t result = vzip_u32(a1, b1);
153 return vcombine_u32(result.val[0], result.val[1]);
156 uint32x4_t UnpackLow64(uint32x4_t a, uint32x4_t b)
158 uint64x1_t a1 = vget_low_u64((uint64x2_t)a);
159 uint64x1_t b1 = vget_low_u64((uint64x2_t)b);
160 return (uint32x4_t)vcombine_u64(a1, b1);
163 uint32x4_t UnpackHigh64(uint32x4_t a, uint32x4_t b)
165 uint64x1_t a1 = vget_high_u64((uint64x2_t)a);
166 uint64x1_t b1 = vget_high_u64((uint64x2_t)b);
167 return (uint32x4_t)vcombine_u64(a1, b1);
170 template <
unsigned int IDX>
171 inline uint32x4_t LoadKey(
const word32 rkey[])
173 return vdupq_n_u32(rkey[IDX]);
176 template <
unsigned int IDX>
177 inline uint32x4_t UnpackNEON(
const uint32x4_t& a,
const uint32x4_t& b,
const uint32x4_t& c,
const uint32x4_t& d)
182 CRYPTOPP_UNUSED(a); CRYPTOPP_UNUSED(b);
183 CRYPTOPP_UNUSED(c); CRYPTOPP_UNUSED(d);
184 return vmovq_n_u32(0);
188 inline uint32x4_t UnpackNEON<0>(
const uint32x4_t& a,
const uint32x4_t& b,
const uint32x4_t& c,
const uint32x4_t& d)
190 const uint32x4_t r1 = UnpackLow32(a, b);
191 const uint32x4_t r2 = UnpackLow32(c, d);
192 return UnpackLow64(r1, r2);
196 inline uint32x4_t UnpackNEON<1>(
const uint32x4_t& a,
const uint32x4_t& b,
const uint32x4_t& c,
const uint32x4_t& d)
198 const uint32x4_t r1 = UnpackLow32(a, b);
199 const uint32x4_t r2 = UnpackLow32(c, d);
200 return UnpackHigh64(r1, r2);
204 inline uint32x4_t UnpackNEON<2>(
const uint32x4_t& a,
const uint32x4_t& b,
const uint32x4_t& c,
const uint32x4_t& d)
206 const uint32x4_t r1 = UnpackHigh32(a, b);
207 const uint32x4_t r2 = UnpackHigh32(c, d);
208 return UnpackLow64(r1, r2);
212 inline uint32x4_t UnpackNEON<3>(
const uint32x4_t& a,
const uint32x4_t& b,
const uint32x4_t& c,
const uint32x4_t& d)
214 const uint32x4_t r1 = UnpackHigh32(a, b);
215 const uint32x4_t r2 = UnpackHigh32(c, d);
216 return UnpackHigh64(r1, r2);
219 template <
unsigned int IDX>
220 inline uint32x4_t UnpackNEON(
const uint32x4_t& v)
226 return vmovq_n_u32(0);
230 inline uint32x4_t UnpackNEON<0>(
const uint32x4_t& v)
233 return vdupq_n_u32(vgetq_lane_u32(v, 0));
237 inline uint32x4_t UnpackNEON<1>(
const uint32x4_t& v)
240 return vdupq_n_u32(vgetq_lane_u32(v, 1));
244 inline uint32x4_t UnpackNEON<2>(
const uint32x4_t& v)
247 return vdupq_n_u32(vgetq_lane_u32(v, 2));
251 inline uint32x4_t UnpackNEON<3>(
const uint32x4_t& v)
254 return vdupq_n_u32(vgetq_lane_u32(v, 3));
257 template <
unsigned int IDX>
258 inline uint32x4_t RepackNEON(
const uint32x4_t& a,
const uint32x4_t& b,
const uint32x4_t& c,
const uint32x4_t& d)
260 return UnpackNEON<IDX>(a, b, c, d);
263 template <
unsigned int IDX>
264 inline uint32x4_t RepackNEON(
const uint32x4_t& v)
266 return UnpackNEON<IDX>(v);
269 #endif // CRYPTOPP_ARM_NEON_AVAILABLE 273 #if (CRYPTOPP_SSSE3_AVAILABLE) 275 inline __m128i Xor(
const __m128i& a,
const __m128i& b)
277 return _mm_xor_si128(a, b);
280 inline __m128i Add(
const __m128i& a,
const __m128i& b)
282 return _mm_add_epi32(a, b);
285 inline __m128i Sub(
const __m128i& a,
const __m128i& b)
287 return _mm_sub_epi32(a, b);
290 template <
unsigned int R>
291 inline __m128i RotateLeft(
const __m128i& val)
294 return _mm_roti_epi32(val, R);
297 _mm_slli_epi32(val, R), _mm_srli_epi32(val, 32-R));
301 template <
unsigned int R>
302 inline __m128i RotateRight(
const __m128i& val)
305 return _mm_roti_epi32(val, 32-R);
308 _mm_slli_epi32(val, 32-R), _mm_srli_epi32(val, R));
314 inline __m128i RotateLeft<8>(
const __m128i& val)
317 return _mm_roti_epi32(val, 8);
319 const __m128i mask = _mm_set_epi8(14,13,12,15, 10,9,8,11, 6,5,4,7, 2,1,0,3);
320 return _mm_shuffle_epi8(val, mask);
326 inline __m128i RotateRight<8>(
const __m128i& val)
329 return _mm_roti_epi32(val, 32-8);
331 const __m128i mask = _mm_set_epi8(12,15,14,13, 8,11,10,9, 4,7,6,5, 0,3,2,1);
332 return _mm_shuffle_epi8(val, mask);
336 template <
unsigned int IDX>
337 inline __m128i LoadKey(
const word32 rkey[])
339 float rk; std::memcpy(&rk, rkey+IDX,
sizeof(rk));
340 return _mm_castps_si128(_mm_load_ps1(&rk));
343 template <
unsigned int IDX>
344 inline __m128i UnpackXMM(
const __m128i& a,
const __m128i& b,
const __m128i& c,
const __m128i& d)
347 CRYPTOPP_UNUSED(a); CRYPTOPP_UNUSED(b);
348 CRYPTOPP_UNUSED(c); CRYPTOPP_UNUSED(d);
350 return _mm_setzero_si128();
354 inline __m128i UnpackXMM<0>(
const __m128i& a,
const __m128i& b,
const __m128i& c,
const __m128i& d)
357 const __m128i r1 = _mm_unpacklo_epi32(a, b);
358 const __m128i r2 = _mm_unpacklo_epi32(c, d);
359 return _mm_unpacklo_epi64(r1, r2);
363 inline __m128i UnpackXMM<1>(
const __m128i& a,
const __m128i& b,
const __m128i& c,
const __m128i& d)
366 const __m128i r1 = _mm_unpacklo_epi32(a, b);
367 const __m128i r2 = _mm_unpacklo_epi32(c, d);
368 return _mm_unpackhi_epi64(r1, r2);
372 inline __m128i UnpackXMM<2>(
const __m128i& a,
const __m128i& b,
const __m128i& c,
const __m128i& d)
375 const __m128i r1 = _mm_unpackhi_epi32(a, b);
376 const __m128i r2 = _mm_unpackhi_epi32(c, d);
377 return _mm_unpacklo_epi64(r1, r2);
381 inline __m128i UnpackXMM<3>(
const __m128i& a,
const __m128i& b,
const __m128i& c,
const __m128i& d)
384 const __m128i r1 = _mm_unpackhi_epi32(a, b);
385 const __m128i r2 = _mm_unpackhi_epi32(c, d);
386 return _mm_unpackhi_epi64(r1, r2);
389 template <
unsigned int IDX>
390 inline __m128i UnpackXMM(
const __m128i& v)
394 return _mm_setzero_si128();
398 inline __m128i UnpackXMM<0>(
const __m128i& v)
401 return _mm_shuffle_epi8(v, _mm_set_epi8(3,2,1,0, 3,2,1,0, 3,2,1,0, 3,2,1,0));
405 inline __m128i UnpackXMM<1>(
const __m128i& v)
408 return _mm_shuffle_epi8(v, _mm_set_epi8(7,6,5,4, 7,6,5,4, 7,6,5,4, 7,6,5,4));
412 inline __m128i UnpackXMM<2>(
const __m128i& v)
415 return _mm_shuffle_epi8(v, _mm_set_epi8(11,10,9,8, 11,10,9,8, 11,10,9,8, 11,10,9,8));
419 inline __m128i UnpackXMM<3>(
const __m128i& v)
422 return _mm_shuffle_epi8(v, _mm_set_epi8(15,14,13,12, 15,14,13,12, 15,14,13,12, 15,14,13,12));
425 template <
unsigned int IDX>
426 inline __m128i RepackXMM(
const __m128i& a,
const __m128i& b,
const __m128i& c,
const __m128i& d)
428 return UnpackXMM<IDX>(a, b, c, d);
431 template <
unsigned int IDX>
432 inline __m128i RepackXMM(
const __m128i& v)
434 return UnpackXMM<IDX>(v);
437 #endif // CRYPTOPP_SSSE3_AVAILABLE 441 #if (CRYPTOPP_POWER8_AVAILABLE) 462 template <
unsigned int R>
466 return vec_rl(val, m);
469 template <
unsigned int R>
472 const uint32x4_p m = {32-R, 32-R, 32-R, 32-R};
473 return vec_rl(val, m);
476 template <
unsigned int IDX>
479 return vec_splats(rkey[IDX]);
482 template <
unsigned int IDX>
486 CRYPTOPP_UNUSED(a); CRYPTOPP_UNUSED(b);
487 CRYPTOPP_UNUSED(c); CRYPTOPP_UNUSED(d);
524 template <
unsigned int IDX>
536 const uint8x16_p m = {3,2,1,0, 3,2,1,0, 3,2,1,0, 3,2,1,0};
544 const uint8x16_p m = {7,6,5,4, 7,6,5,4, 7,6,5,4, 7,6,5,4};
552 const uint8x16_p m = {11,10,9,8, 11,10,9,8, 11,10,9,8, 11,10,9,8};
560 const uint8x16_p m = {15,14,13,12, 15,14,13,12, 15,14,13,12, 15,14,13,12};
564 template <
unsigned int IDX>
567 return UnpackSIMD<IDX>(a, b, c, d);
570 template <
unsigned int IDX>
573 return UnpackSIMD<IDX>(v);
576 #endif // CRYPTOPP_POWER8_AVAILABLE 580 #if (CRYPTOPP_ARM_NEON_AVAILABLE || CRYPTOPP_SSSE3_AVAILABLE) 583 inline void LEA_Encryption(W temp[4],
const word32 *subkeys,
unsigned int rounds)
585 temp[3] = RotateRight<3>(Add(Xor(temp[2], LoadKey<4>(subkeys)), Xor(temp[3], LoadKey<5>(subkeys))));
586 temp[2] = RotateRight<5>(Add(Xor(temp[1], LoadKey<2>(subkeys)), Xor(temp[2], LoadKey<3>(subkeys))));
587 temp[1] = RotateLeft<9>(Add(Xor(temp[0], LoadKey<0>(subkeys)), Xor(temp[1], LoadKey<1>(subkeys))));
588 temp[0] = RotateRight<3>(Add(Xor(temp[3], LoadKey<10>(subkeys)), Xor(temp[0], LoadKey<11>(subkeys))));
589 temp[3] = RotateRight<5>(Add(Xor(temp[2], LoadKey<8>(subkeys)), Xor(temp[3], LoadKey<9>(subkeys))));
590 temp[2] = RotateLeft<9>(Add(Xor(temp[1], LoadKey<6>(subkeys)), Xor(temp[2], LoadKey<7>(subkeys))));
591 temp[1] = RotateRight<3>(Add(Xor(temp[0], LoadKey<16>(subkeys)), Xor(temp[1], LoadKey<17>(subkeys))));
592 temp[0] = RotateRight<5>(Add(Xor(temp[3], LoadKey<14>(subkeys)), Xor(temp[0], LoadKey<15>(subkeys))));
593 temp[3] = RotateLeft<9>(Add(Xor(temp[2], LoadKey<12>(subkeys)), Xor(temp[3], LoadKey<13>(subkeys))));
594 temp[2] = RotateRight<3>(Add(Xor(temp[1], LoadKey<22>(subkeys)), Xor(temp[2], LoadKey<23>(subkeys))));
595 temp[1] = RotateRight<5>(Add(Xor(temp[0], LoadKey<20>(subkeys)), Xor(temp[1], LoadKey<21>(subkeys))));
596 temp[0] = RotateLeft<9>(Add(Xor(temp[3], LoadKey<18>(subkeys)), Xor(temp[0], LoadKey<19>(subkeys))));
598 temp[3] = RotateRight<3>(Add(Xor(temp[2], LoadKey<28>(subkeys)), Xor(temp[3], LoadKey<29>(subkeys))));
599 temp[2] = RotateRight<5>(Add(Xor(temp[1], LoadKey<26>(subkeys)), Xor(temp[2], LoadKey<27>(subkeys))));
600 temp[1] = RotateLeft<9>(Add(Xor(temp[0], LoadKey<24>(subkeys)), Xor(temp[1], LoadKey<25>(subkeys))));
601 temp[0] = RotateRight<3>(Add(Xor(temp[3], LoadKey<34>(subkeys)), Xor(temp[0], LoadKey<35>(subkeys))));
602 temp[3] = RotateRight<5>(Add(Xor(temp[2], LoadKey<32>(subkeys)), Xor(temp[3], LoadKey<33>(subkeys))));
603 temp[2] = RotateLeft<9>(Add(Xor(temp[1], LoadKey<30>(subkeys)), Xor(temp[2], LoadKey<31>(subkeys))));
604 temp[1] = RotateRight<3>(Add(Xor(temp[0], LoadKey<40>(subkeys)), Xor(temp[1], LoadKey<41>(subkeys))));
605 temp[0] = RotateRight<5>(Add(Xor(temp[3], LoadKey<38>(subkeys)), Xor(temp[0], LoadKey<39>(subkeys))));
606 temp[3] = RotateLeft<9>(Add(Xor(temp[2], LoadKey<36>(subkeys)), Xor(temp[3], LoadKey<37>(subkeys))));
607 temp[2] = RotateRight<3>(Add(Xor(temp[1], LoadKey<46>(subkeys)), Xor(temp[2], LoadKey<47>(subkeys))));
608 temp[1] = RotateRight<5>(Add(Xor(temp[0], LoadKey<44>(subkeys)), Xor(temp[1], LoadKey<45>(subkeys))));
609 temp[0] = RotateLeft<9>(Add(Xor(temp[3], LoadKey<42>(subkeys)), Xor(temp[0], LoadKey<43>(subkeys))));
611 temp[3] = RotateRight<3>(Add(Xor(temp[2], LoadKey<52>(subkeys)), Xor(temp[3], LoadKey<53>(subkeys))));
612 temp[2] = RotateRight<5>(Add(Xor(temp[1], LoadKey<50>(subkeys)), Xor(temp[2], LoadKey<51>(subkeys))));
613 temp[1] = RotateLeft<9>(Add(Xor(temp[0], LoadKey<48>(subkeys)), Xor(temp[1], LoadKey<49>(subkeys))));
614 temp[0] = RotateRight<3>(Add(Xor(temp[3], LoadKey<58>(subkeys)), Xor(temp[0], LoadKey<59>(subkeys))));
615 temp[3] = RotateRight<5>(Add(Xor(temp[2], LoadKey<56>(subkeys)), Xor(temp[3], LoadKey<57>(subkeys))));
616 temp[2] = RotateLeft<9>(Add(Xor(temp[1], LoadKey<54>(subkeys)), Xor(temp[2], LoadKey<55>(subkeys))));
617 temp[1] = RotateRight<3>(Add(Xor(temp[0], LoadKey<64>(subkeys)), Xor(temp[1], LoadKey<65>(subkeys))));
618 temp[0] = RotateRight<5>(Add(Xor(temp[3], LoadKey<62>(subkeys)), Xor(temp[0], LoadKey<63>(subkeys))));
619 temp[3] = RotateLeft<9>(Add(Xor(temp[2], LoadKey<60>(subkeys)), Xor(temp[3], LoadKey<61>(subkeys))));
620 temp[2] = RotateRight<3>(Add(Xor(temp[1], LoadKey<70>(subkeys)), Xor(temp[2], LoadKey<71>(subkeys))));
621 temp[1] = RotateRight<5>(Add(Xor(temp[0], LoadKey<68>(subkeys)), Xor(temp[1], LoadKey<69>(subkeys))));
622 temp[0] = RotateLeft<9>(Add(Xor(temp[3], LoadKey<66>(subkeys)), Xor(temp[0], LoadKey<67>(subkeys))));
624 temp[3] = RotateRight<3>(Add(Xor(temp[2], LoadKey<76>(subkeys)), Xor(temp[3], LoadKey<77>(subkeys))));
625 temp[2] = RotateRight<5>(Add(Xor(temp[1], LoadKey<74>(subkeys)), Xor(temp[2], LoadKey<75>(subkeys))));
626 temp[1] = RotateLeft<9>(Add(Xor(temp[0], LoadKey<72>(subkeys)), Xor(temp[1], LoadKey<73>(subkeys))));
627 temp[0] = RotateRight<3>(Add(Xor(temp[3], LoadKey<82>(subkeys)), Xor(temp[0], LoadKey<83>(subkeys))));
628 temp[3] = RotateRight<5>(Add(Xor(temp[2], LoadKey<80>(subkeys)), Xor(temp[3], LoadKey<81>(subkeys))));
629 temp[2] = RotateLeft<9>(Add(Xor(temp[1], LoadKey<78>(subkeys)), Xor(temp[2], LoadKey<79>(subkeys))));
630 temp[1] = RotateRight<3>(Add(Xor(temp[0], LoadKey<88>(subkeys)), Xor(temp[1], LoadKey<89>(subkeys))));
631 temp[0] = RotateRight<5>(Add(Xor(temp[3], LoadKey<86>(subkeys)), Xor(temp[0], LoadKey<87>(subkeys))));
632 temp[3] = RotateLeft<9>(Add(Xor(temp[2], LoadKey<84>(subkeys)), Xor(temp[3], LoadKey<85>(subkeys))));
633 temp[2] = RotateRight<3>(Add(Xor(temp[1], LoadKey<94>(subkeys)), Xor(temp[2], LoadKey<95>(subkeys))));
634 temp[1] = RotateRight<5>(Add(Xor(temp[0], LoadKey<92>(subkeys)), Xor(temp[1], LoadKey<93>(subkeys))));
635 temp[0] = RotateLeft<9>(Add(Xor(temp[3], LoadKey<90>(subkeys)), Xor(temp[0], LoadKey<91>(subkeys))));
637 temp[3] = RotateRight<3>(Add(Xor(temp[2], LoadKey<100>(subkeys)), Xor(temp[3], LoadKey<101>(subkeys))));
638 temp[2] = RotateRight<5>(Add(Xor(temp[1], LoadKey<98>(subkeys)), Xor(temp[2], LoadKey<99>(subkeys))));
639 temp[1] = RotateLeft<9>(Add(Xor(temp[0], LoadKey<96>(subkeys)), Xor(temp[1], LoadKey<97>(subkeys))));
640 temp[0] = RotateRight<3>(Add(Xor(temp[3], LoadKey<106>(subkeys)), Xor(temp[0], LoadKey<107>(subkeys))));
641 temp[3] = RotateRight<5>(Add(Xor(temp[2], LoadKey<104>(subkeys)), Xor(temp[3], LoadKey<105>(subkeys))));
642 temp[2] = RotateLeft<9>(Add(Xor(temp[1], LoadKey<102>(subkeys)), Xor(temp[2], LoadKey<103>(subkeys))));
643 temp[1] = RotateRight<3>(Add(Xor(temp[0], LoadKey<112>(subkeys)), Xor(temp[1], LoadKey<113>(subkeys))));
644 temp[0] = RotateRight<5>(Add(Xor(temp[3], LoadKey<110>(subkeys)), Xor(temp[0], LoadKey<111>(subkeys))));
645 temp[3] = RotateLeft<9>(Add(Xor(temp[2], LoadKey<108>(subkeys)), Xor(temp[3], LoadKey<109>(subkeys))));
646 temp[2] = RotateRight<3>(Add(Xor(temp[1], LoadKey<118>(subkeys)), Xor(temp[2], LoadKey<119>(subkeys))));
647 temp[1] = RotateRight<5>(Add(Xor(temp[0], LoadKey<116>(subkeys)), Xor(temp[1], LoadKey<117>(subkeys))));
648 temp[0] = RotateLeft<9>(Add(Xor(temp[3], LoadKey<114>(subkeys)), Xor(temp[0], LoadKey<115>(subkeys))));
650 temp[3] = RotateRight<3>(Add(Xor(temp[2], LoadKey<124>(subkeys)), Xor(temp[3], LoadKey<125>(subkeys))));
651 temp[2] = RotateRight<5>(Add(Xor(temp[1], LoadKey<122>(subkeys)), Xor(temp[2], LoadKey<123>(subkeys))));
652 temp[1] = RotateLeft<9>(Add(Xor(temp[0], LoadKey<120>(subkeys)), Xor(temp[1], LoadKey<121>(subkeys))));
653 temp[0] = RotateRight<3>(Add(Xor(temp[3], LoadKey<130>(subkeys)), Xor(temp[0], LoadKey<131>(subkeys))));
654 temp[3] = RotateRight<5>(Add(Xor(temp[2], LoadKey<128>(subkeys)), Xor(temp[3], LoadKey<129>(subkeys))));
655 temp[2] = RotateLeft<9>(Add(Xor(temp[1], LoadKey<126>(subkeys)), Xor(temp[2], LoadKey<127>(subkeys))));
656 temp[1] = RotateRight<3>(Add(Xor(temp[0], LoadKey<136>(subkeys)), Xor(temp[1], LoadKey<137>(subkeys))));
657 temp[0] = RotateRight<5>(Add(Xor(temp[3], LoadKey<134>(subkeys)), Xor(temp[0], LoadKey<135>(subkeys))));
658 temp[3] = RotateLeft<9>(Add(Xor(temp[2], LoadKey<132>(subkeys)), Xor(temp[3], LoadKey<133>(subkeys))));
659 temp[2] = RotateRight<3>(Add(Xor(temp[1], LoadKey<142>(subkeys)), Xor(temp[2], LoadKey<143>(subkeys))));
660 temp[1] = RotateRight<5>(Add(Xor(temp[0], LoadKey<140>(subkeys)), Xor(temp[1], LoadKey<141>(subkeys))));
661 temp[0] = RotateLeft<9>(Add(Xor(temp[3], LoadKey<138>(subkeys)), Xor(temp[0], LoadKey<139>(subkeys))));
665 temp[3] = RotateRight<3>(Add(Xor(temp[2], LoadKey<148>(subkeys)), Xor(temp[3], LoadKey<149>(subkeys))));
666 temp[2] = RotateRight<5>(Add(Xor(temp[1], LoadKey<146>(subkeys)), Xor(temp[2], LoadKey<147>(subkeys))));
667 temp[1] = RotateLeft<9>(Add(Xor(temp[0], LoadKey<144>(subkeys)), Xor(temp[1], LoadKey<145>(subkeys))));
668 temp[0] = RotateRight<3>(Add(Xor(temp[3], LoadKey<154>(subkeys)), Xor(temp[0], LoadKey<155>(subkeys))));
669 temp[3] = RotateRight<5>(Add(Xor(temp[2], LoadKey<152>(subkeys)), Xor(temp[3], LoadKey<153>(subkeys))));
670 temp[2] = RotateLeft<9>(Add(Xor(temp[1], LoadKey<150>(subkeys)), Xor(temp[2], LoadKey<151>(subkeys))));
671 temp[1] = RotateRight<3>(Add(Xor(temp[0], LoadKey<160>(subkeys)), Xor(temp[1], LoadKey<161>(subkeys))));
672 temp[0] = RotateRight<5>(Add(Xor(temp[3], LoadKey<158>(subkeys)), Xor(temp[0], LoadKey<159>(subkeys))));
673 temp[3] = RotateLeft<9>(Add(Xor(temp[2], LoadKey<156>(subkeys)), Xor(temp[3], LoadKey<157>(subkeys))));
674 temp[2] = RotateRight<3>(Add(Xor(temp[1], LoadKey<166>(subkeys)), Xor(temp[2], LoadKey<167>(subkeys))));
675 temp[1] = RotateRight<5>(Add(Xor(temp[0], LoadKey<164>(subkeys)), Xor(temp[1], LoadKey<165>(subkeys))));
676 temp[0] = RotateLeft<9>(Add(Xor(temp[3], LoadKey<162>(subkeys)), Xor(temp[0], LoadKey<163>(subkeys))));
681 temp[3] = RotateRight<3>(Add(Xor(temp[2], LoadKey<172>(subkeys)), Xor(temp[3], LoadKey<173>(subkeys))));
682 temp[2] = RotateRight<5>(Add(Xor(temp[1], LoadKey<170>(subkeys)), Xor(temp[2], LoadKey<171>(subkeys))));
683 temp[1] = RotateLeft<9>(Add(Xor(temp[0], LoadKey<168>(subkeys)), Xor(temp[1], LoadKey<169>(subkeys))));
684 temp[0] = RotateRight<3>(Add(Xor(temp[3], LoadKey<178>(subkeys)), Xor(temp[0], LoadKey<179>(subkeys))));
685 temp[3] = RotateRight<5>(Add(Xor(temp[2], LoadKey<176>(subkeys)), Xor(temp[3], LoadKey<177>(subkeys))));
686 temp[2] = RotateLeft<9>(Add(Xor(temp[1], LoadKey<174>(subkeys)), Xor(temp[2], LoadKey<175>(subkeys))));
687 temp[1] = RotateRight<3>(Add(Xor(temp[0], LoadKey<184>(subkeys)), Xor(temp[1], LoadKey<185>(subkeys))));
688 temp[0] = RotateRight<5>(Add(Xor(temp[3], LoadKey<182>(subkeys)), Xor(temp[0], LoadKey<183>(subkeys))));
689 temp[3] = RotateLeft<9>(Add(Xor(temp[2], LoadKey<180>(subkeys)), Xor(temp[3], LoadKey<181>(subkeys))));
690 temp[2] = RotateRight<3>(Add(Xor(temp[1], LoadKey<190>(subkeys)), Xor(temp[2], LoadKey<191>(subkeys))));
691 temp[1] = RotateRight<5>(Add(Xor(temp[0], LoadKey<188>(subkeys)), Xor(temp[1], LoadKey<189>(subkeys))));
692 temp[0] = RotateLeft<9>(Add(Xor(temp[3], LoadKey<186>(subkeys)), Xor(temp[0], LoadKey<187>(subkeys))));
699 inline void LEA_Decryption(W temp[4],
const word32 *subkeys,
unsigned int rounds)
703 temp[0] = Xor(Sub(RotateRight<9>(temp[0]), Xor(temp[3], LoadKey<186>(subkeys))), LoadKey<187>(subkeys));
704 temp[1] = Xor(Sub(RotateLeft<5>(temp[1]), Xor(temp[0], LoadKey<188>(subkeys))), LoadKey<189>(subkeys));
705 temp[2] = Xor(Sub(RotateLeft<3>(temp[2]), Xor(temp[1], LoadKey<190>(subkeys))), LoadKey<191>(subkeys));
706 temp[3] = Xor(Sub(RotateRight<9>(temp[3]), Xor(temp[2], LoadKey<180>(subkeys))), LoadKey<181>(subkeys));
707 temp[0] = Xor(Sub(RotateLeft<5>(temp[0]), Xor(temp[3], LoadKey<182>(subkeys))), LoadKey<183>(subkeys));
708 temp[1] = Xor(Sub(RotateLeft<3>(temp[1]), Xor(temp[0], LoadKey<184>(subkeys))), LoadKey<185>(subkeys));
709 temp[2] = Xor(Sub(RotateRight<9>(temp[2]), Xor(temp[1], LoadKey<174>(subkeys))), LoadKey<175>(subkeys));
710 temp[3] = Xor(Sub(RotateLeft<5>(temp[3]), Xor(temp[2], LoadKey<176>(subkeys))), LoadKey<177>(subkeys));
711 temp[0] = Xor(Sub(RotateLeft<3>(temp[0]), Xor(temp[3], LoadKey<178>(subkeys))), LoadKey<179>(subkeys));
712 temp[1] = Xor(Sub(RotateRight<9>(temp[1]), Xor(temp[0], LoadKey<168>(subkeys))), LoadKey<169>(subkeys));
713 temp[2] = Xor(Sub(RotateLeft<5>(temp[2]), Xor(temp[1], LoadKey<170>(subkeys))), LoadKey<171>(subkeys));
714 temp[3] = Xor(Sub(RotateLeft<3>(temp[3]), Xor(temp[2], LoadKey<172>(subkeys))), LoadKey<173>(subkeys));
719 temp[0] = Xor(Sub(RotateRight<9>(temp[0]), Xor(temp[3], LoadKey<162>(subkeys))), LoadKey<163>(subkeys));
720 temp[1] = Xor(Sub(RotateLeft<5>(temp[1]), Xor(temp[0], LoadKey<164>(subkeys))), LoadKey<165>(subkeys));
721 temp[2] = Xor(Sub(RotateLeft<3>(temp[2]), Xor(temp[1], LoadKey<166>(subkeys))), LoadKey<167>(subkeys));
722 temp[3] = Xor(Sub(RotateRight<9>(temp[3]), Xor(temp[2], LoadKey<156>(subkeys))), LoadKey<157>(subkeys));
723 temp[0] = Xor(Sub(RotateLeft<5>(temp[0]), Xor(temp[3], LoadKey<158>(subkeys))), LoadKey<159>(subkeys));
724 temp[1] = Xor(Sub(RotateLeft<3>(temp[1]), Xor(temp[0], LoadKey<160>(subkeys))), LoadKey<161>(subkeys));
725 temp[2] = Xor(Sub(RotateRight<9>(temp[2]), Xor(temp[1], LoadKey<150>(subkeys))), LoadKey<151>(subkeys));
726 temp[3] = Xor(Sub(RotateLeft<5>(temp[3]), Xor(temp[2], LoadKey<152>(subkeys))), LoadKey<153>(subkeys));
727 temp[0] = Xor(Sub(RotateLeft<3>(temp[0]), Xor(temp[3], LoadKey<154>(subkeys))), LoadKey<155>(subkeys));
728 temp[1] = Xor(Sub(RotateRight<9>(temp[1]), Xor(temp[0], LoadKey<144>(subkeys))), LoadKey<145>(subkeys));
729 temp[2] = Xor(Sub(RotateLeft<5>(temp[2]), Xor(temp[1], LoadKey<146>(subkeys))), LoadKey<147>(subkeys));
730 temp[3] = Xor(Sub(RotateLeft<3>(temp[3]), Xor(temp[2], LoadKey<148>(subkeys))), LoadKey<149>(subkeys));
733 temp[0] = Xor(Sub(RotateRight<9>(temp[0]), Xor(temp[3], LoadKey<138>(subkeys))), LoadKey<139>(subkeys));
734 temp[1] = Xor(Sub(RotateLeft<5>(temp[1]), Xor(temp[0], LoadKey<140>(subkeys))), LoadKey<141>(subkeys));
735 temp[2] = Xor(Sub(RotateLeft<3>(temp[2]), Xor(temp[1], LoadKey<142>(subkeys))), LoadKey<143>(subkeys));
736 temp[3] = Xor(Sub(RotateRight<9>(temp[3]), Xor(temp[2], LoadKey<132>(subkeys))), LoadKey<133>(subkeys));
737 temp[0] = Xor(Sub(RotateLeft<5>(temp[0]), Xor(temp[3], LoadKey<134>(subkeys))), LoadKey<135>(subkeys));
738 temp[1] = Xor(Sub(RotateLeft<3>(temp[1]), Xor(temp[0], LoadKey<136>(subkeys))), LoadKey<137>(subkeys));
739 temp[2] = Xor(Sub(RotateRight<9>(temp[2]), Xor(temp[1], LoadKey<126>(subkeys))), LoadKey<127>(subkeys));
740 temp[3] = Xor(Sub(RotateLeft<5>(temp[3]), Xor(temp[2], LoadKey<128>(subkeys))), LoadKey<129>(subkeys));
741 temp[0] = Xor(Sub(RotateLeft<3>(temp[0]), Xor(temp[3], LoadKey<130>(subkeys))), LoadKey<131>(subkeys));
742 temp[1] = Xor(Sub(RotateRight<9>(temp[1]), Xor(temp[0], LoadKey<120>(subkeys))), LoadKey<121>(subkeys));
743 temp[2] = Xor(Sub(RotateLeft<5>(temp[2]), Xor(temp[1], LoadKey<122>(subkeys))), LoadKey<123>(subkeys));
744 temp[3] = Xor(Sub(RotateLeft<3>(temp[3]), Xor(temp[2], LoadKey<124>(subkeys))), LoadKey<125>(subkeys));
746 temp[0] = Xor(Sub(RotateRight<9>(temp[0]), Xor(temp[3], LoadKey<114>(subkeys))), LoadKey<115>(subkeys));
747 temp[1] = Xor(Sub(RotateLeft<5>(temp[1]), Xor(temp[0], LoadKey<116>(subkeys))), LoadKey<117>(subkeys));
748 temp[2] = Xor(Sub(RotateLeft<3>(temp[2]), Xor(temp[1], LoadKey<118>(subkeys))), LoadKey<119>(subkeys));
749 temp[3] = Xor(Sub(RotateRight<9>(temp[3]), Xor(temp[2], LoadKey<108>(subkeys))), LoadKey<109>(subkeys));
750 temp[0] = Xor(Sub(RotateLeft<5>(temp[0]), Xor(temp[3], LoadKey<110>(subkeys))), LoadKey<111>(subkeys));
751 temp[1] = Xor(Sub(RotateLeft<3>(temp[1]), Xor(temp[0], LoadKey<112>(subkeys))), LoadKey<113>(subkeys));
752 temp[2] = Xor(Sub(RotateRight<9>(temp[2]), Xor(temp[1], LoadKey<102>(subkeys))), LoadKey<103>(subkeys));
753 temp[3] = Xor(Sub(RotateLeft<5>(temp[3]), Xor(temp[2], LoadKey<104>(subkeys))), LoadKey<105>(subkeys));
754 temp[0] = Xor(Sub(RotateLeft<3>(temp[0]), Xor(temp[3], LoadKey<106>(subkeys))), LoadKey<107>(subkeys));
755 temp[1] = Xor(Sub(RotateRight<9>(temp[1]), Xor(temp[0], LoadKey<96>(subkeys))), LoadKey<97>(subkeys));
756 temp[2] = Xor(Sub(RotateLeft<5>(temp[2]), Xor(temp[1], LoadKey<98>(subkeys))), LoadKey<99>(subkeys));
757 temp[3] = Xor(Sub(RotateLeft<3>(temp[3]), Xor(temp[2], LoadKey<100>(subkeys))), LoadKey<101>(subkeys));
759 temp[0] = Xor(Sub(RotateRight<9>(temp[0]), Xor(temp[3], LoadKey<90>(subkeys))), LoadKey<91>(subkeys));
760 temp[1] = Xor(Sub(RotateLeft<5>(temp[1]), Xor(temp[0], LoadKey<92>(subkeys))), LoadKey<93>(subkeys));
761 temp[2] = Xor(Sub(RotateLeft<3>(temp[2]), Xor(temp[1], LoadKey<94>(subkeys))), LoadKey<95>(subkeys));
762 temp[3] = Xor(Sub(RotateRight<9>(temp[3]), Xor(temp[2], LoadKey<84>(subkeys))), LoadKey<85>(subkeys));
763 temp[0] = Xor(Sub(RotateLeft<5>(temp[0]), Xor(temp[3], LoadKey<86>(subkeys))), LoadKey<87>(subkeys));
764 temp[1] = Xor(Sub(RotateLeft<3>(temp[1]), Xor(temp[0], LoadKey<88>(subkeys))), LoadKey<89>(subkeys));
765 temp[2] = Xor(Sub(RotateRight<9>(temp[2]), Xor(temp[1], LoadKey<78>(subkeys))), LoadKey<79>(subkeys));
766 temp[3] = Xor(Sub(RotateLeft<5>(temp[3]), Xor(temp[2], LoadKey<80>(subkeys))), LoadKey<81>(subkeys));
767 temp[0] = Xor(Sub(RotateLeft<3>(temp[0]), Xor(temp[3], LoadKey<82>(subkeys))), LoadKey<83>(subkeys));
768 temp[1] = Xor(Sub(RotateRight<9>(temp[1]), Xor(temp[0], LoadKey<72>(subkeys))), LoadKey<73>(subkeys));
769 temp[2] = Xor(Sub(RotateLeft<5>(temp[2]), Xor(temp[1], LoadKey<74>(subkeys))), LoadKey<75>(subkeys));
770 temp[3] = Xor(Sub(RotateLeft<3>(temp[3]), Xor(temp[2], LoadKey<76>(subkeys))), LoadKey<77>(subkeys));
772 temp[0] = Xor(Sub(RotateRight<9>(temp[0]), Xor(temp[3], LoadKey<66>(subkeys))), LoadKey<67>(subkeys));
773 temp[1] = Xor(Sub(RotateLeft<5>(temp[1]), Xor(temp[0], LoadKey<68>(subkeys))), LoadKey<69>(subkeys));
774 temp[2] = Xor(Sub(RotateLeft<3>(temp[2]), Xor(temp[1], LoadKey<70>(subkeys))), LoadKey<71>(subkeys));
775 temp[3] = Xor(Sub(RotateRight<9>(temp[3]), Xor(temp[2], LoadKey<60>(subkeys))), LoadKey<61>(subkeys));
776 temp[0] = Xor(Sub(RotateLeft<5>(temp[0]), Xor(temp[3], LoadKey<62>(subkeys))), LoadKey<63>(subkeys));
777 temp[1] = Xor(Sub(RotateLeft<3>(temp[1]), Xor(temp[0], LoadKey<64>(subkeys))), LoadKey<65>(subkeys));
778 temp[2] = Xor(Sub(RotateRight<9>(temp[2]), Xor(temp[1], LoadKey<54>(subkeys))), LoadKey<55>(subkeys));
779 temp[3] = Xor(Sub(RotateLeft<5>(temp[3]), Xor(temp[2], LoadKey<56>(subkeys))), LoadKey<57>(subkeys));
780 temp[0] = Xor(Sub(RotateLeft<3>(temp[0]), Xor(temp[3], LoadKey<58>(subkeys))), LoadKey<59>(subkeys));
781 temp[1] = Xor(Sub(RotateRight<9>(temp[1]), Xor(temp[0], LoadKey<48>(subkeys))), LoadKey<49>(subkeys));
782 temp[2] = Xor(Sub(RotateLeft<5>(temp[2]), Xor(temp[1], LoadKey<50>(subkeys))), LoadKey<51>(subkeys));
783 temp[3] = Xor(Sub(RotateLeft<3>(temp[3]), Xor(temp[2], LoadKey<52>(subkeys))), LoadKey<53>(subkeys));
785 temp[0] = Xor(Sub(RotateRight<9>(temp[0]), Xor(temp[3], LoadKey<42>(subkeys))), LoadKey<43>(subkeys));
786 temp[1] = Xor(Sub(RotateLeft<5>(temp[1]), Xor(temp[0], LoadKey<44>(subkeys))), LoadKey<45>(subkeys));
787 temp[2] = Xor(Sub(RotateLeft<3>(temp[2]), Xor(temp[1], LoadKey<46>(subkeys))), LoadKey<47>(subkeys));
788 temp[3] = Xor(Sub(RotateRight<9>(temp[3]), Xor(temp[2], LoadKey<36>(subkeys))), LoadKey<37>(subkeys));
789 temp[0] = Xor(Sub(RotateLeft<5>(temp[0]), Xor(temp[3], LoadKey<38>(subkeys))), LoadKey<39>(subkeys));
790 temp[1] = Xor(Sub(RotateLeft<3>(temp[1]), Xor(temp[0], LoadKey<40>(subkeys))), LoadKey<41>(subkeys));
791 temp[2] = Xor(Sub(RotateRight<9>(temp[2]), Xor(temp[1], LoadKey<30>(subkeys))), LoadKey<31>(subkeys));
792 temp[3] = Xor(Sub(RotateLeft<5>(temp[3]), Xor(temp[2], LoadKey<32>(subkeys))), LoadKey<33>(subkeys));
793 temp[0] = Xor(Sub(RotateLeft<3>(temp[0]), Xor(temp[3], LoadKey<34>(subkeys))), LoadKey<35>(subkeys));
794 temp[1] = Xor(Sub(RotateRight<9>(temp[1]), Xor(temp[0], LoadKey<24>(subkeys))), LoadKey<25>(subkeys));
795 temp[2] = Xor(Sub(RotateLeft<5>(temp[2]), Xor(temp[1], LoadKey<26>(subkeys))), LoadKey<27>(subkeys));
796 temp[3] = Xor(Sub(RotateLeft<3>(temp[3]), Xor(temp[2], LoadKey<28>(subkeys))), LoadKey<29>(subkeys));
798 temp[0] = Xor(Sub(RotateRight<9>(temp[0]), Xor(temp[3], LoadKey<18>(subkeys))), LoadKey<19>(subkeys));
799 temp[1] = Xor(Sub(RotateLeft<5>(temp[1]), Xor(temp[0], LoadKey<20>(subkeys))), LoadKey<21>(subkeys));
800 temp[2] = Xor(Sub(RotateLeft<3>(temp[2]), Xor(temp[1], LoadKey<22>(subkeys))), LoadKey<23>(subkeys));
801 temp[3] = Xor(Sub(RotateRight<9>(temp[3]), Xor(temp[2], LoadKey<12>(subkeys))), LoadKey<13>(subkeys));
802 temp[0] = Xor(Sub(RotateLeft<5>(temp[0]), Xor(temp[3], LoadKey<14>(subkeys))), LoadKey<15>(subkeys));
803 temp[1] = Xor(Sub(RotateLeft<3>(temp[1]), Xor(temp[0], LoadKey<16>(subkeys))), LoadKey<17>(subkeys));
804 temp[2] = Xor(Sub(RotateRight<9>(temp[2]), Xor(temp[1], LoadKey<6>(subkeys))), LoadKey<7>(subkeys));
805 temp[3] = Xor(Sub(RotateLeft<5>(temp[3]), Xor(temp[2], LoadKey<8>(subkeys))), LoadKey<9>(subkeys));
806 temp[0] = Xor(Sub(RotateLeft<3>(temp[0]), Xor(temp[3], LoadKey<10>(subkeys))), LoadKey<11>(subkeys));
807 temp[1] = Xor(Sub(RotateRight<9>(temp[1]), Xor(temp[0], LoadKey<0>(subkeys))), LoadKey<1>(subkeys));
808 temp[2] = Xor(Sub(RotateLeft<5>(temp[2]), Xor(temp[1], LoadKey<2>(subkeys))), LoadKey<3>(subkeys));
809 temp[3] = Xor(Sub(RotateLeft<3>(temp[3]), Xor(temp[2], LoadKey<4>(subkeys))), LoadKey<5>(subkeys));
812 #endif // LEA Encryption and Decryption 816 #if (CRYPTOPP_ARM_NEON_AVAILABLE) 818 inline void LEA_Enc_Block(uint32x4_t &block0,
819 const word32 *subkeys,
unsigned int rounds)
822 temp[0] = UnpackNEON<0>(block0);
823 temp[1] = UnpackNEON<1>(block0);
824 temp[2] = UnpackNEON<2>(block0);
825 temp[3] = UnpackNEON<3>(block0);
827 LEA_Encryption(temp, subkeys, rounds);
829 block0 = RepackNEON<0>(temp[0], temp[1], temp[2], temp[3]);
832 inline void LEA_Dec_Block(uint32x4_t &block0,
833 const word32 *subkeys,
unsigned int rounds)
836 temp[0] = UnpackNEON<0>(block0);
837 temp[1] = UnpackNEON<1>(block0);
838 temp[2] = UnpackNEON<2>(block0);
839 temp[3] = UnpackNEON<3>(block0);
841 LEA_Decryption(temp, subkeys, rounds);
843 block0 = RepackNEON<0>(temp[0], temp[1], temp[2], temp[3]);
846 inline void LEA_Enc_4_Blocks(uint32x4_t &block0, uint32x4_t &block1,
847 uint32x4_t &block2, uint32x4_t &block3,
const word32 *subkeys,
unsigned int rounds)
850 temp[0] = UnpackNEON<0>(block0, block1, block2, block3);
851 temp[1] = UnpackNEON<1>(block0, block1, block2, block3);
852 temp[2] = UnpackNEON<2>(block0, block1, block2, block3);
853 temp[3] = UnpackNEON<3>(block0, block1, block2, block3);
855 LEA_Encryption(temp, subkeys, rounds);
857 block0 = RepackNEON<0>(temp[0], temp[1], temp[2], temp[3]);
858 block1 = RepackNEON<1>(temp[0], temp[1], temp[2], temp[3]);
859 block2 = RepackNEON<2>(temp[0], temp[1], temp[2], temp[3]);
860 block3 = RepackNEON<3>(temp[0], temp[1], temp[2], temp[3]);
863 inline void LEA_Dec_4_Blocks(uint32x4_t &block0, uint32x4_t &block1,
864 uint32x4_t &block2, uint32x4_t &block3,
const word32 *subkeys,
unsigned int rounds)
867 temp[0] = UnpackNEON<0>(block0, block1, block2, block3);
868 temp[1] = UnpackNEON<1>(block0, block1, block2, block3);
869 temp[2] = UnpackNEON<2>(block0, block1, block2, block3);
870 temp[3] = UnpackNEON<3>(block0, block1, block2, block3);
872 LEA_Decryption(temp, subkeys, rounds);
874 block0 = RepackNEON<0>(temp[0], temp[1], temp[2], temp[3]);
875 block1 = RepackNEON<1>(temp[0], temp[1], temp[2], temp[3]);
876 block2 = RepackNEON<2>(temp[0], temp[1], temp[2], temp[3]);
877 block3 = RepackNEON<3>(temp[0], temp[1], temp[2], temp[3]);
880 #endif // CRYPTOPP_ARM_NEON_AVAILABLE 884 #if (CRYPTOPP_SSSE3_AVAILABLE) 886 inline void LEA_Enc_Block(__m128i &block0,
887 const word32 *subkeys,
unsigned int rounds)
890 temp[0] = UnpackXMM<0>(block0);
891 temp[1] = UnpackXMM<1>(block0);
892 temp[2] = UnpackXMM<2>(block0);
893 temp[3] = UnpackXMM<3>(block0);
895 LEA_Encryption(temp, subkeys, rounds);
897 block0 = RepackXMM<0>(temp[0], temp[1], temp[2], temp[3]);
900 inline void LEA_Dec_Block(__m128i &block0,
901 const word32 *subkeys,
unsigned int rounds)
904 temp[0] = UnpackXMM<0>(block0);
905 temp[1] = UnpackXMM<1>(block0);
906 temp[2] = UnpackXMM<2>(block0);
907 temp[3] = UnpackXMM<3>(block0);
909 LEA_Decryption(temp, subkeys, rounds);
911 block0 = RepackXMM<0>(temp[0], temp[1], temp[2], temp[3]);
914 inline void LEA_Enc_4_Blocks(__m128i &block0, __m128i &block1,
915 __m128i &block2, __m128i &block3,
const word32 *subkeys,
unsigned int rounds)
918 temp[0] = UnpackXMM<0>(block0, block1, block2, block3);
919 temp[1] = UnpackXMM<1>(block0, block1, block2, block3);
920 temp[2] = UnpackXMM<2>(block0, block1, block2, block3);
921 temp[3] = UnpackXMM<3>(block0, block1, block2, block3);
923 LEA_Encryption(temp, subkeys, rounds);
925 block0 = RepackXMM<0>(temp[0], temp[1], temp[2], temp[3]);
926 block1 = RepackXMM<1>(temp[0], temp[1], temp[2], temp[3]);
927 block2 = RepackXMM<2>(temp[0], temp[1], temp[2], temp[3]);
928 block3 = RepackXMM<3>(temp[0], temp[1], temp[2], temp[3]);
931 inline void LEA_Dec_4_Blocks(__m128i &block0, __m128i &block1,
932 __m128i &block2, __m128i &block3,
const word32 *subkeys,
unsigned int rounds)
935 temp[0] = UnpackXMM<0>(block0, block1, block2, block3);
936 temp[1] = UnpackXMM<1>(block0, block1, block2, block3);
937 temp[2] = UnpackXMM<2>(block0, block1, block2, block3);
938 temp[3] = UnpackXMM<3>(block0, block1, block2, block3);
940 LEA_Decryption(temp, subkeys, rounds);
942 block0 = RepackXMM<0>(temp[0], temp[1], temp[2], temp[3]);
943 block1 = RepackXMM<1>(temp[0], temp[1], temp[2], temp[3]);
944 block2 = RepackXMM<2>(temp[0], temp[1], temp[2], temp[3]);
945 block3 = RepackXMM<3>(temp[0], temp[1], temp[2], temp[3]);
948 #endif // CRYPTOPP_SSSE3_AVAILABLE 952 #if (CRYPTOPP_POWER8_AVAILABLE) 955 const word32 *subkeys,
unsigned int rounds)
958 temp[0] = UnpackSIMD<0>(block0);
959 temp[1] = UnpackSIMD<1>(block0);
960 temp[2] = UnpackSIMD<2>(block0);
961 temp[3] = UnpackSIMD<3>(block0);
963 LEA_Encryption(temp, subkeys, rounds);
965 block0 = RepackSIMD<0>(temp[0], temp[1], temp[2], temp[3]);
969 const word32 *subkeys,
unsigned int rounds)
972 temp[0] = UnpackSIMD<0>(block0);
973 temp[1] = UnpackSIMD<1>(block0);
974 temp[2] = UnpackSIMD<2>(block0);
975 temp[3] = UnpackSIMD<3>(block0);
977 LEA_Decryption(temp, subkeys, rounds);
979 block0 = RepackSIMD<0>(temp[0], temp[1], temp[2], temp[3]);
986 temp[0] = UnpackSIMD<0>(block0, block1, block2, block3);
987 temp[1] = UnpackSIMD<1>(block0, block1, block2, block3);
988 temp[2] = UnpackSIMD<2>(block0, block1, block2, block3);
989 temp[3] = UnpackSIMD<3>(block0, block1, block2, block3);
991 LEA_Encryption(temp, subkeys, rounds);
993 block0 = RepackSIMD<0>(temp[0], temp[1], temp[2], temp[3]);
994 block1 = RepackSIMD<1>(temp[0], temp[1], temp[2], temp[3]);
995 block2 = RepackSIMD<2>(temp[0], temp[1], temp[2], temp[3]);
996 block3 = RepackSIMD<3>(temp[0], temp[1], temp[2], temp[3]);
1003 temp[0] = UnpackSIMD<0>(block0, block1, block2, block3);
1004 temp[1] = UnpackSIMD<1>(block0, block1, block2, block3);
1005 temp[2] = UnpackSIMD<2>(block0, block1, block2, block3);
1006 temp[3] = UnpackSIMD<3>(block0, block1, block2, block3);
1008 LEA_Decryption(temp, subkeys, rounds);
1010 block0 = RepackSIMD<0>(temp[0], temp[1], temp[2], temp[3]);
1011 block1 = RepackSIMD<1>(temp[0], temp[1], temp[2], temp[3]);
1012 block2 = RepackSIMD<2>(temp[0], temp[1], temp[2], temp[3]);
1013 block3 = RepackSIMD<3>(temp[0], temp[1], temp[2], temp[3]);
1016 #endif // CRYPTOPP_POWER8_AVAILABLE 1018 ANONYMOUS_NAMESPACE_END
1024 #if defined(CRYPTOPP_SSSE3_AVAILABLE) 1025 size_t LEA_Enc_AdvancedProcessBlocks_SSSE3(
const word32* subKeys,
size_t rounds,
1026 const byte *inBlocks,
const byte *xorBlocks,
byte *outBlocks,
size_t length,
word32 flags)
1029 subKeys, rounds, inBlocks, xorBlocks, outBlocks, length, flags);
1032 size_t LEA_Dec_AdvancedProcessBlocks_SSSE3(
const word32* subKeys,
size_t rounds,
1033 const byte *inBlocks,
const byte *xorBlocks,
byte *outBlocks,
size_t length,
word32 flags)
1036 subKeys, rounds, inBlocks, xorBlocks, outBlocks, length, flags);
1038 #endif // CRYPTOPP_SSSE3_AVAILABLE 1040 #if defined(CRYPTOPP_ARM_NEON_AVAILABLE) 1041 size_t LEA_Enc_AdvancedProcessBlocks_NEON(
const word32* subKeys,
size_t rounds,
1042 const byte *inBlocks,
const byte *xorBlocks,
byte *outBlocks,
size_t length,
word32 flags)
1045 subKeys, rounds, inBlocks, xorBlocks, outBlocks, length, flags);
1048 size_t LEA_Dec_AdvancedProcessBlocks_NEON(
const word32* subKeys,
size_t rounds,
1049 const byte *inBlocks,
const byte *xorBlocks,
byte *outBlocks,
size_t length,
word32 flags)
1052 subKeys, rounds, inBlocks, xorBlocks, outBlocks, length, flags);
1054 #endif // CRYPTOPP_ARM_NEON_AVAILABLE 1056 #if defined(CRYPTOPP_POWER8_AVAILABLE) 1057 size_t LEA_Enc_AdvancedProcessBlocks_POWER8(
const word32* subKeys,
size_t rounds,
1058 const byte *inBlocks,
const byte *xorBlocks,
byte *outBlocks,
size_t length,
word32 flags)
1061 subKeys, rounds, inBlocks, xorBlocks, outBlocks, length, flags);
1064 size_t LEA_Dec_AdvancedProcessBlocks_POWER8(
const word32* subKeys,
size_t rounds,
1065 const byte *inBlocks,
const byte *xorBlocks,
byte *outBlocks,
size_t length,
word32 flags)
1068 subKeys, rounds, inBlocks, xorBlocks, outBlocks, length, flags);
1070 #endif // CRYPTOPP_POWER8_AVAILABLE Utility functions for the Crypto++ library.
Classes for the LEA block cipher.
unsigned int word32
32-bit unsigned datatype
T1 VecSub(const T1 vec1, const T2 vec2)
Subtract two vectors.
Library configuration file.
T1 VecAdd(const T1 vec1, const T2 vec2)
Add two vectors.
T1 VecPermute(const T1 vec, const T2 mask)
Permutes a vector.
__vector unsigned int uint32x4_p
Vector of 32-bit elements.
Support functions for PowerPC and vector operations.
Template for AdvancedProcessBlocks and SIMD processing.
size_t AdvancedProcessBlocks128_4x1_NEON(F1 func1, F4 func4, const W *subKeys, size_t rounds, const byte *inBlocks, const byte *xorBlocks, byte *outBlocks, size_t length, word32 flags)
AdvancedProcessBlocks for 1 and 4 blocks.
#define CRYPTOPP_ASSERT(exp)
Debugging and diagnostic assertion.
size_t AdvancedProcessBlocks128_4x1_SSE(F1 func1, F4 func4, const W *subKeys, size_t rounds, const byte *inBlocks, const byte *xorBlocks, byte *outBlocks, size_t length, word32 flags)
AdvancedProcessBlocks for 1 and 4 blocks.
T1 VecXor(const T1 vec1, const T2 vec2)
XOR two vectors.
unsigned char byte
8-bit unsigned datatype
size_t AdvancedProcessBlocks128_4x1_ALTIVEC(F1 func1, F4 func4, const W *subKeys, size_t rounds, const byte *inBlocks, const byte *xorBlocks, byte *outBlocks, size_t length, word32 flags)
AdvancedProcessBlocks for 1 and 4 blocks.
__vector unsigned long long uint64x2_p
Vector of 64-bit elements.
Crypto++ library namespace.
__vector unsigned char uint8x16_p
Vector of 8-bit elements.