7 #ifndef CRYPTOPP_GENERATE_X64_MASM
13 NAMESPACE_BEGIN(CryptoPP)
18 memset(m_state, 0, m_state.SizeInBytes());
19 #if CRYPTOPP_BOOL_SSSE3_ASM_AVAILABLE
20 m_state[17] = HasSSSE3();
24 #endif // #ifndef CRYPTOPP_GENERATE_X64_MASM
26 #ifdef CRYPTOPP_X64_MASM_AVAILABLE
28 void Panama_SSE2_Pull(
size_t count, word32 *state, word32 *z,
const word32 *y);
30 #elif CRYPTOPP_BOOL_SSE2_ASM_AVAILABLE
32 #ifdef CRYPTOPP_GENERATE_X64_MASM
33 Panama_SSE2_Pull PROC FRAME
40 #pragma warning(disable: 4731) // frame pointer register 'ebp' modified by inline assembly code
41 void CRYPTOPP_NOINLINE Panama_SSE2_Pull(
size_t count, word32 *state, word32 *z,
const word32 *y)
43 #ifdef CRYPTOPP_GNU_STYLE_INLINE_ASSEMBLY
46 ".intel_syntax noprefix;"
49 AS2( mov AS_REG_1, count)
50 AS2( mov AS_REG_2, state)
57 #define REG_loopEnd [esp]
58 #elif defined(CRYPTOPP_GENERATE_X64_MASM)
59 #define REG_loopEnd rdi
61 #define REG_loopEnd r8
66 AS2( mov AS_REG_6d, [AS_REG_2+4*17])
67 AS2( add AS_REG_1, AS_REG_6)
70 AS2( mov REG_loopEnd, AS_REG_1)
76 AS2( movdqa xmm0, XMMWORD_PTR [AS_REG_2+0*16])
77 AS2( movdqa xmm1, XMMWORD_PTR [AS_REG_2+1*16])
78 AS2( movdqa xmm2, XMMWORD_PTR [AS_REG_2+2*16])
79 AS2( movdqa xmm3, XMMWORD_PTR [AS_REG_2+3*16])
80 AS2( mov eax, dword ptr [AS_REG_2+4*16])
84 #
if CRYPTOPP_BOOL_SSSE3_ASM_AVAILABLE
85 AS2( test AS_REG_6, 1)
88 AS2( movdqa xmm6, xmm2)
89 AS2( movss xmm6, xmm3)
90 ASS( pshufd xmm5, xmm6, 0, 3, 2, 1)
92 AS2( movdqa xmm7, xmm3)
93 AS2( movss xmm7, xmm6)
94 ASS( pshufd xmm6, xmm7, 0, 3, 2, 1)
95 #
if CRYPTOPP_BOOL_SSSE3_ASM_AVAILABLE
98 AS2( movdqa xmm5, xmm3)
99 AS3( palignr xmm5, xmm2, 4)
101 AS3( palignr xmm6, xmm3, 4)
105 AS2( movd AS_REG_1d, xmm2)
107 AS2( movd AS_REG_7d, xmm3)
108 AS2( or AS_REG_1d, AS_REG_7d)
109 AS2( xor eax, AS_REG_1d)
111 #define SSE2_Index(i) ASM_MOD(((i)*13+16), 17)
114 AS2( movd AS_REG_1d, xmm7)\
115 AS2( rol AS_REG_1d, ASM_MOD((ASM_MOD(5*i,17)*(ASM_MOD(5*i,17)+1)/2), 32))\
116 AS2( mov [AS_REG_2+SSE2_Index(ASM_MOD(5*(i), 17))*4], AS_REG_1d)
118 #define pi4(x, y, z, a, b, c, d) \
119 AS2( pcmpeqb xmm7, xmm7)\
124 ASS( pshuflw xmm7, xmm7, 1, 0, 3, 2)\
126 AS2( punpckhqdq xmm7, xmm7)\
128 ASS( pshuflw xmm7, xmm7, 1, 0, 3, 2)\
131 pi4(xmm1, xmm2, xmm3, 1, 5, 9, 13)
132 pi4(xmm0, xmm1, xmm2, 2, 6, 10, 14)
133 pi4(xmm6, xmm0, xmm1, 3, 7, 11, 15)
134 pi4(xmm5, xmm6, xmm0, 4, 8, 12, 16)
137 AS2( movdqa xmm4, xmm3)
138 AS2( punpcklqdq xmm3, xmm2)
139 AS2( punpckhdq xmm4, xmm2)
140 AS2( movdqa xmm2, xmm1)
141 AS2( punpcklqdq xmm1, xmm0)
142 AS2( punpckhdq xmm2, xmm0)
145 AS2( test AS_REG_3, AS_REG_3)
147 AS2( movdqa xmm6, xmm4)
148 AS2( punpcklqdq xmm4, xmm2)
149 AS2( punpckhqdq xmm6, xmm2)
150 AS2( test AS_REG_4, 15)
152 AS2( test AS_REG_4, AS_REG_4)
154 AS2( pxor xmm4, [AS_REG_4])
155 AS2( pxor xmm6, [AS_REG_4+16])
156 AS2( add AS_REG_4, 32)
159 AS2( movdqu xmm0, [AS_REG_4])
160 AS2( movdqu xmm2, [AS_REG_4+16])
161 AS2( pxor xmm4, xmm0)
162 AS2( pxor xmm6, xmm2)
163 AS2( add AS_REG_4, 32)
165 AS2( test AS_REG_3, 15)
167 AS2( movdqa XMMWORD_PTR [AS_REG_3], xmm4)
168 AS2( movdqa XMMWORD_PTR [AS_REG_3+16], xmm6)
169 AS2( add AS_REG_3, 32)
172 AS2( movdqu XMMWORD_PTR [AS_REG_3], xmm4)
173 AS2( movdqu XMMWORD_PTR [AS_REG_3+16], xmm6)
174 AS2( add AS_REG_3, 32)
178 AS2( lea AS_REG_1, [AS_REG_6 + 32])
179 AS2( and AS_REG_1, 31*32)
180 AS2( lea AS_REG_7, [AS_REG_6 + (32-24)*32])
181 AS2( and AS_REG_7, 31*32)
183 AS2( movdqa xmm0, XMMWORD_PTR [AS_REG_2+20*4+AS_REG_1+0*8])
184 AS2( pxor xmm3, xmm0)
185 ASS( pshufd xmm0, xmm0, 2, 3, 0, 1)
186 AS2( movdqa XMMWORD_PTR [AS_REG_2+20*4+AS_REG_1+0*8], xmm3)
187 AS2( pxor xmm0, XMMWORD_PTR [AS_REG_2+20*4+AS_REG_7+2*8])
188 AS2( movdqa XMMWORD_PTR [AS_REG_2+20*4+AS_REG_7+2*8], xmm0)
190 AS2( movdqa xmm4, XMMWORD_PTR [AS_REG_2+20*4+AS_REG_1+2*8])
191 AS2( pxor xmm1, xmm4)
192 AS2( movdqa XMMWORD_PTR [AS_REG_2+20*4+AS_REG_1+2*8], xmm1)
193 AS2( pxor xmm4, XMMWORD_PTR [AS_REG_2+20*4+AS_REG_7+0*8])
194 AS2( movdqa XMMWORD_PTR [AS_REG_2+20*4+AS_REG_7+0*8], xmm4)
197 AS2( movdqa xmm3, XMMWORD_PTR [AS_REG_2+3*16])
198 AS2( movdqa xmm2, XMMWORD_PTR [AS_REG_2+2*16])
199 AS2( movdqa xmm1, XMMWORD_PTR [AS_REG_2+1*16])
200 AS2( movdqa xmm0, XMMWORD_PTR [AS_REG_2+0*16])
202 #
if CRYPTOPP_BOOL_SSSE3_ASM_AVAILABLE
203 AS2( test AS_REG_6, 1)
207 AS2( movdqa xmm7, xmm3)
208 AS2( movss xmm7, xmm6)
209 AS2( movdqa xmm6, xmm2)
210 AS2( movss xmm6, xmm3)
211 AS2( movdqa xmm5, xmm1)
212 AS2( movss xmm5, xmm2)
213 AS2( movdqa xmm4, xmm0)
214 AS2( movss xmm4, xmm1)
215 ASS( pshufd xmm7, xmm7, 0, 3, 2, 1)
216 ASS( pshufd xmm6, xmm6, 0, 3, 2, 1)
217 ASS( pshufd xmm5, xmm5, 0, 3, 2, 1)
218 ASS( pshufd xmm4, xmm4, 0, 3, 2, 1)
219 #
if CRYPTOPP_BOOL_SSSE3_ASM_AVAILABLE
223 AS3( palignr xmm7, xmm3, 4)
224 AS2( movq xmm6, xmm3)
225 AS3( palignr xmm6, xmm2, 4)
226 AS2( movq xmm5, xmm2)
227 AS3( palignr xmm5, xmm1, 4)
228 AS2( movq xmm4, xmm1)
229 AS3( palignr xmm4, xmm0, 4)
234 AS2( movd AS_REG_1d, xmm0)
235 AS2( xor eax, AS_REG_1d)
236 AS2( movd AS_REG_1d, xmm3)
237 AS2( xor eax, AS_REG_1d)
239 AS2( pxor xmm3, xmm2)
240 AS2( pxor xmm2, xmm1)
241 AS2( pxor xmm1, xmm0)
242 AS2( pxor xmm0, xmm7)
243 AS2( pxor xmm3, xmm7)
244 AS2( pxor xmm2, xmm6)
245 AS2( pxor xmm1, xmm5)
246 AS2( pxor xmm0, xmm4)
249 AS2( lea AS_REG_1, [AS_REG_6 + (32-4)*32])
250 AS2( and AS_REG_1, 31*32)
251 AS2( lea AS_REG_7, [AS_REG_6 + 16*32])
252 AS2( and AS_REG_7, 31*32)
254 AS2( movdqa xmm4, XMMWORD_PTR [AS_REG_2+20*4+AS_REG_1+0*16])
255 AS2( movdqa xmm5, XMMWORD_PTR [AS_REG_2+20*4+AS_REG_7+0*16])
256 AS2( movdqa xmm6, xmm4)
257 AS2( punpcklqdq xmm4, xmm5)
258 AS2( punpckhqdq xmm6, xmm5)
259 AS2( pxor xmm3, xmm4)
260 AS2( pxor xmm2, xmm6)
262 AS2( movdqa xmm4, XMMWORD_PTR [AS_REG_2+20*4+AS_REG_1+1*16])
263 AS2( movdqa xmm5, XMMWORD_PTR [AS_REG_2+20*4+AS_REG_7+1*16])
264 AS2( movdqa xmm6, xmm4)
265 AS2( punpcklqdq xmm4, xmm5)
266 AS2( punpckhqdq xmm6, xmm5)
267 AS2( pxor xmm1, xmm4)
268 AS2( pxor xmm0, xmm6)
271 AS2( add AS_REG_6, 32)
272 AS2( cmp AS_REG_6, REG_loopEnd)
276 AS2( mov [AS_REG_2+4*16], eax)
277 AS2( movdqa XMMWORD_PTR [AS_REG_2+3*16], xmm3)
278 AS2( movdqa XMMWORD_PTR [AS_REG_2+2*16], xmm2)
279 AS2( movdqa XMMWORD_PTR [AS_REG_2+1*16], xmm1)
280 AS2( movdqa XMMWORD_PTR [AS_REG_2+0*16], xmm0)
282 #
if CRYPTOPP_BOOL_X86
288 #ifdef CRYPTOPP_GNU_STYLE_INLINE_ASSEMBLY
290 ".att_syntax prefix;"
292 #
if CRYPTOPP_BOOL_X64
293 :
"D" (count),
"S" (state),
"d" (z),
"c" (y)
294 :
"%r8",
"%r9",
"r10",
"%eax",
"memory",
"cc",
"%xmm0",
"%xmm1",
"%xmm2",
"%xmm3",
"%xmm4",
"%xmm5",
"%xmm6",
"%xmm7"
296 :
"c" (count),
"d" (state),
"S" (z),
"D" (y)
297 :
"%eax",
"memory",
"cc"
301 #ifdef CRYPTOPP_GENERATE_X64_MASM
302 movdqa xmm6, [rsp + 0h]
303 movdqa xmm7, [rsp + 10h]
307 Panama_SSE2_Pull ENDP
311 #endif // #ifdef CRYPTOPP_BOOL_SSE2_ASM_AVAILABLE
313 #ifndef CRYPTOPP_GENERATE_X64_MASM
316 void Panama<B>::Iterate(
size_t count,
const word32 *p, byte *output,
const byte *input, KeystreamOperation operation)
318 word32 bstart = m_state[17];
319 word32 *
const aPtr = m_state;
322 #define bPtr ((byte *)(aPtr+20))
327 #define a(i) aPtr[((i)*13+16) % 17] // 13 is inverse of 4 mod 17
328 #define c(i) cPtr[((i)*13+16) % 17]
330 #define b(i, j) b##i[(j)*2%8 + (j)/4]
333 #define US(i) {word32 t=b(0,i); b(0,i)=ConditionalByteReverse(B::ToEnum(), p[i])^t; b(25,(i+6)%8)^=t;}
334 #define UL(i) {word32 t=b(0,i); b(0,i)=a(i+1)^t; b(25,(i+6)%8)^=t;}
336 #define GP(i) c(5*i%17) = rotlFixed(a(i) ^ (a((i+1)%17) | ~a((i+2)%17)), ((5*i%17)*((5*i%17)+1)/2)%32)
338 #define T(i,x) a(i) = c(i) ^ c((i+1)%17) ^ c((i+4)%17) ^ x
339 #define TS1S(i) T(i+1, ConditionalByteReverse(B::ToEnum(), p[i]))
340 #define TS1L(i) T(i+1, b(4,i))
341 #define TS2(i) T(i+9, b(16,i))
347 #define PANAMA_OUTPUT(x) \
348 CRYPTOPP_KEYSTREAM_OUTPUT_WORD(x, B::ToEnum(), 0, a(0+9));\
349 CRYPTOPP_KEYSTREAM_OUTPUT_WORD(x, B::ToEnum(), 1, a(1+9));\
350 CRYPTOPP_KEYSTREAM_OUTPUT_WORD(x, B::ToEnum(), 2, a(2+9));\
351 CRYPTOPP_KEYSTREAM_OUTPUT_WORD(x, B::ToEnum(), 3, a(3+9));\
352 CRYPTOPP_KEYSTREAM_OUTPUT_WORD(x, B::ToEnum(), 4, a(4+9));\
353 CRYPTOPP_KEYSTREAM_OUTPUT_WORD(x, B::ToEnum(), 5, a(5+9));\
354 CRYPTOPP_KEYSTREAM_OUTPUT_WORD(x, B::ToEnum(), 6, a(6+9));\
355 CRYPTOPP_KEYSTREAM_OUTPUT_WORD(x, B::ToEnum(), 7, a(7+9));
357 typedef word32 WordType;
358 CRYPTOPP_KEYSTREAM_OUTPUT_SWITCH(PANAMA_OUTPUT, 4*8);
361 word32 *
const b16 = (word32 *)(bPtr+((bstart+16*32) & 31*32));
362 word32 *
const b4 = (word32 *)(bPtr+((bstart+(32-4)*32) & 31*32));
364 word32 *
const b0 = (word32 *)(bPtr+((bstart) & 31*32));
365 word32 *
const b25 = (word32 *)(bPtr+((bstart+(32-25)*32) & 31*32));
369 US(0); US(1); US(2); US(3); US(4); US(5); US(6); US(7);
373 UL(0); UL(1); UL(2); UL(3); UL(4); UL(5); UL(6); UL(7);
398 TS1S(0); TS1S(1); TS1S(2); TS1S(3); TS1S(4); TS1S(5); TS1S(6); TS1S(7);
403 TS1L(0); TS1L(1); TS1L(2); TS1L(3); TS1L(4); TS1L(5); TS1L(6); TS1L(7);
406 TS2(0); TS2(1); TS2(2); TS2(3); TS2(4); TS2(5); TS2(6); TS2(7);
408 m_state[17] = bstart;
413 size_t PanamaHash<B>::HashMultipleBlocks(
const word32 *input,
size_t length)
415 this->Iterate(length / this->BLOCKSIZE, input);
416 return length % this->BLOCKSIZE;
422 this->ThrowIfInvalidTruncatedSize(size);
424 this->PadLastBlock(this->BLOCKSIZE, 0x01);
426 HashEndianCorrectedBlock(this->m_data);
431 this->Iterate(1, NULL, buf.BytePtr(), NULL);
433 memcpy(hash, buf, size);
443 memcpy(m_key, key, 32);
451 this->Iterate(1, m_key);
452 if (iv && IsAligned<word32>(iv))
453 this->Iterate(1, (
const word32 *)iv);
461 this->Iterate(1, buf);
464 #if CRYPTOPP_BOOL_SSE2_ASM_AVAILABLE || defined(CRYPTOPP_X64_MASM_AVAILABLE)
465 if (B::ToEnum() == LITTLE_ENDIAN_ORDER && HasSSE2() && !IsP4())
466 Panama_SSE2_Pull(32, this->m_state, NULL, NULL);
472 #if CRYPTOPP_BOOL_X86 || CRYPTOPP_BOOL_X64
476 #if CRYPTOPP_BOOL_SSE2_ASM_AVAILABLE || defined(CRYPTOPP_X64_MASM_AVAILABLE)
477 if (B::ToEnum() == LITTLE_ENDIAN_ORDER && HasSSE2())
488 #if CRYPTOPP_BOOL_SSE2_ASM_AVAILABLE || defined(CRYPTOPP_X64_MASM_AVAILABLE)
489 if (B::ToEnum() == LITTLE_ENDIAN_ORDER && HasSSE2())
490 Panama_SSE2_Pull(iterationCount, this->m_state, (word32 *)output, (
const word32 *)input);
493 this->Iterate(iterationCount, NULL, output, input, operation);
507 #endif // #ifndef CRYPTOPP_GENERATE_X64_MASM
void TruncatedFinal(byte *hash, size_t size)
truncated version of Final()
base class, do not use directly
interface for retrieving values given their names