poly1305/asm/poly1305-x86_64.pl: switch to vpermdd in table expansion.
[openssl.git] / crypto / idea / idea_lcl.h
1 /*
2  * Copyright 1995-2016 The OpenSSL Project Authors. All Rights Reserved.
3  *
4  * Licensed under the OpenSSL license (the "License").  You may not use
5  * this file except in compliance with the License.  You can obtain a copy
6  * in the file LICENSE in the source distribution or at
7  * https://www.openssl.org/source/license.html
8  */
9
10 /*
11  * The new form of this macro (check if the a*b == 0) was suggested by Colin
12  * Plumb <colin@nyx10.cs.du.edu>
13  */
14 /* Removal of the inner if from from Wei Dai 24/4/96 */
15 #define idea_mul(r,a,b,ul) \
16 ul=(unsigned long)a*b; \
17 if (ul != 0) \
18         { \
19         r=(ul&0xffff)-(ul>>16); \
20         r-=((r)>>16); \
21         } \
22 else \
23         r=(-(int)a-b+1);        /* assuming a or b is 0 and in range */
24
25 /*
26  * 7/12/95 - Many thanks to Rhys Weatherley <rweather@us.oracle.com> for
27  * pointing out that I was assuming little endian byte order for all
28  * quantities what idea actually used bigendian.  No where in the spec does
29  * it mention this, it is all in terms of 16 bit numbers and even the example
30  * does not use byte streams for the input example :-(. If you byte swap each
31  * pair of input, keys and iv, the functions would produce the output as the
32  * old version :-(.
33  */
34
35 /* NOTE - c is not incremented as per n2l */
36 #define n2ln(c,l1,l2,n) { \
37                         c+=n; \
38                         l1=l2=0; \
39                         switch (n) { \
40                         case 8: l2 =((unsigned long)(*(--(c))))    ; \
41                         case 7: l2|=((unsigned long)(*(--(c))))<< 8; \
42                         case 6: l2|=((unsigned long)(*(--(c))))<<16; \
43                         case 5: l2|=((unsigned long)(*(--(c))))<<24; \
44                         case 4: l1 =((unsigned long)(*(--(c))))    ; \
45                         case 3: l1|=((unsigned long)(*(--(c))))<< 8; \
46                         case 2: l1|=((unsigned long)(*(--(c))))<<16; \
47                         case 1: l1|=((unsigned long)(*(--(c))))<<24; \
48                                 } \
49                         }
50
51 /* NOTE - c is not incremented as per l2n */
52 #define l2nn(l1,l2,c,n) { \
53                         c+=n; \
54                         switch (n) { \
55                         case 8: *(--(c))=(unsigned char)(((l2)    )&0xff); \
56                         case 7: *(--(c))=(unsigned char)(((l2)>> 8)&0xff); \
57                         case 6: *(--(c))=(unsigned char)(((l2)>>16)&0xff); \
58                         case 5: *(--(c))=(unsigned char)(((l2)>>24)&0xff); \
59                         case 4: *(--(c))=(unsigned char)(((l1)    )&0xff); \
60                         case 3: *(--(c))=(unsigned char)(((l1)>> 8)&0xff); \
61                         case 2: *(--(c))=(unsigned char)(((l1)>>16)&0xff); \
62                         case 1: *(--(c))=(unsigned char)(((l1)>>24)&0xff); \
63                                 } \
64                         }
65
66 #undef n2l
67 #define n2l(c,l)        (l =((unsigned long)(*((c)++)))<<24L, \
68                          l|=((unsigned long)(*((c)++)))<<16L, \
69                          l|=((unsigned long)(*((c)++)))<< 8L, \
70                          l|=((unsigned long)(*((c)++))))
71
72 #undef l2n
73 #define l2n(l,c)        (*((c)++)=(unsigned char)(((l)>>24L)&0xff), \
74                          *((c)++)=(unsigned char)(((l)>>16L)&0xff), \
75                          *((c)++)=(unsigned char)(((l)>> 8L)&0xff), \
76                          *((c)++)=(unsigned char)(((l)     )&0xff))
77
78 #undef s2n
79 #define s2n(l,c)        (*((c)++)=(unsigned char)(((l)     )&0xff), \
80                          *((c)++)=(unsigned char)(((l)>> 8L)&0xff))
81
82 #undef n2s
83 #define n2s(c,l)        (l =((IDEA_INT)(*((c)++)))<< 8L, \
84                          l|=((IDEA_INT)(*((c)++)))      )
85
86
87 #define E_IDEA(num) \
88         x1&=0xffff; \
89         idea_mul(x1,x1,*p,ul); p++; \
90         x2+= *(p++); \
91         x3+= *(p++); \
92         x4&=0xffff; \
93         idea_mul(x4,x4,*p,ul); p++; \
94         t0=(x1^x3)&0xffff; \
95         idea_mul(t0,t0,*p,ul); p++; \
96         t1=(t0+(x2^x4))&0xffff; \
97         idea_mul(t1,t1,*p,ul); p++; \
98         t0+=t1; \
99         x1^=t1; \
100         x4^=t0; \
101         ul=x2^t0; /* do the swap to x3 */ \
102         x2=x3^t1; \
103         x3=ul;