More IGE speedup.
authorBen Laurie <ben@openssl.org>
Sun, 13 May 2007 15:14:38 +0000 (15:14 +0000)
committerBen Laurie <ben@openssl.org>
Sun, 13 May 2007 15:14:38 +0000 (15:14 +0000)
CHANGES
apps/speed.c
crypto/aes/aes_ige.c

diff --git a/CHANGES b/CHANGES
index 03b2c5a..8cc6aa1 100644 (file)
--- a/CHANGES
+++ b/CHANGES
 
  Changes between 0.9.8e and 0.9.8f  [xx XXX xxxx]
 
+  *) Squeeze another 10% out of IGE mode when in != out.
+     [Ben Laurie]
+
   *) AES IGE mode speedup.
      [Dean Gaudet (Google)]
 
index 4447f73..c2ee1de 100644 (file)
@@ -1703,7 +1703,7 @@ int MAIN(int argc, char **argv)
                        print_message(names[D_IGE_128_AES],c[D_IGE_128_AES][j],lengths[j]);
                        Time_F(START);
                        for (count=0,run=1; COND(c[D_IGE_128_AES][j]); count++)
-                               AES_ige_encrypt(buf,buf,
+                               AES_ige_encrypt(buf,buf2,
                                        (unsigned long)lengths[j],&aes_ks1,
                                        iv,AES_ENCRYPT);
                        d=Time_F(STOP);
@@ -1717,7 +1717,7 @@ int MAIN(int argc, char **argv)
                        print_message(names[D_IGE_192_AES],c[D_IGE_192_AES][j],lengths[j]);
                        Time_F(START);
                        for (count=0,run=1; COND(c[D_IGE_192_AES][j]); count++)
-                               AES_ige_encrypt(buf,buf,
+                               AES_ige_encrypt(buf,buf2,
                                        (unsigned long)lengths[j],&aes_ks2,
                                        iv,AES_ENCRYPT);
                        d=Time_F(STOP);
@@ -1731,7 +1731,7 @@ int MAIN(int argc, char **argv)
                        print_message(names[D_IGE_256_AES],c[D_IGE_256_AES][j],lengths[j]);
                        Time_F(START);
                        for (count=0,run=1; COND(c[D_IGE_256_AES][j]); count++)
-                               AES_ige_encrypt(buf,buf,
+                               AES_ige_encrypt(buf,buf2,
                                        (unsigned long)lengths[j],&aes_ks3,
                                        iv,AES_ENCRYPT);
                        d=Time_F(STOP);
index 337f680..aeb11db 100644 (file)
 #include <openssl/aes.h>
 #include "aes_locl.h"
 
-/*
-static void hexdump(FILE *f,const char *title,const unsigned char *s,int l)
-    {
-    int n=0;
-
-    fprintf(f,"%s",title);
-    for( ; n < l ; ++n)
-               {
-               if((n%16) == 0)
-                       fprintf(f,"\n%04x",n);
-               fprintf(f," %02x",s[n]);
-               }
-    fprintf(f,"\n");
-    }
-*/
-
 #define N_WORDS (AES_BLOCK_SIZE / sizeof(unsigned long))
 typedef struct {
         unsigned long data[N_WORDS];
@@ -95,73 +79,123 @@ void AES_ige_encrypt(const unsigned char *in, unsigned char *out,
                                         unsigned char *ivec, const int enc)
        {
        unsigned long n;
-        unsigned long len;
-        aes_block_t tmp, tmp2;
-        aes_block_t iv;
-        aes_block_t iv2;
+       unsigned long len;
 
        OPENSSL_assert(in && out && key && ivec);
        OPENSSL_assert((AES_ENCRYPT == enc)||(AES_DECRYPT == enc));
        OPENSSL_assert((length%AES_BLOCK_SIZE) == 0);
 
-        len = length / AES_BLOCK_SIZE;
-        load_block(iv, ivec);
-        load_block(iv2, ivec + AES_BLOCK_SIZE);
+       len = length / AES_BLOCK_SIZE;
 
        if (AES_ENCRYPT == enc)
                {
-               /* XXX: Do a separate case for when in != out (strictly should
-                  check for overlap, too) */
-               while (len)
+               if (in != out)
                        {
-                        load_block(tmp, in);
-                       /*                      hexdump(stdout, "in", in, AES_BLOCK_SIZE); */
-                       /*                      hexdump(stdout, "iv", iv, AES_BLOCK_SIZE); */
-                        for(n=0 ; n < N_WORDS; ++n)
-                                tmp2.data[n] = tmp.data[n] ^ iv.data[n];
-                       /*                      hexdump(stdout, "in ^ iv", out, AES_BLOCK_SIZE); */
-                       AES_encrypt((unsigned char *)tmp2.data, (unsigned char *)tmp2.data, key);
-                       /*                      hexdump(stdout,"enc", out, AES_BLOCK_SIZE); */
-                       /*                      hexdump(stdout,"iv2", iv2, AES_BLOCK_SIZE); */
-                       for(n=0 ; n < N_WORDS; ++n)
-                                tmp2.data[n] ^= iv2.data[n];
-                        store_block(out, tmp2);
-                       /*                      hexdump(stdout,"out", out, AES_BLOCK_SIZE); */
-                        iv = tmp2;
-                        iv2 = tmp;
-                       --len;
-                       in += AES_BLOCK_SIZE;
-                       out += AES_BLOCK_SIZE;
+                       aes_block_t *ivp = (aes_block_t *)ivec;
+                       aes_block_t *iv2p = (aes_block_t *)(ivec + AES_BLOCK_SIZE);
+
+                       while (len)
+                               {
+                               aes_block_t *inp = (aes_block_t *)in;
+                               aes_block_t *outp = (aes_block_t *)out;
+
+                               for(n=0 ; n < N_WORDS; ++n)
+                                       outp->data[n] = inp->data[n] ^ ivp->data[n];
+                               AES_encrypt((unsigned char *)outp->data, (unsigned char *)outp->data, key);
+                               for(n=0 ; n < N_WORDS; ++n)
+                                       outp->data[n] ^= iv2p->data[n];
+                               ivp = outp;
+                               iv2p = inp;
+                               --len;
+                               in += AES_BLOCK_SIZE;
+                               out += AES_BLOCK_SIZE;
+                               }
+                       memcpy(ivec, ivp->data, AES_BLOCK_SIZE);
+                       memcpy(ivec + AES_BLOCK_SIZE, iv2p->data, AES_BLOCK_SIZE);
+                       }
+               else
+                       {
+                       aes_block_t tmp, tmp2;
+                       aes_block_t iv;
+                       aes_block_t iv2;
+
+                       load_block(iv, ivec);
+                       load_block(iv2, ivec + AES_BLOCK_SIZE);
+
+                       while (len)
+                               {
+                               load_block(tmp, in);
+                               for(n=0 ; n < N_WORDS; ++n)
+                                       tmp2.data[n] = tmp.data[n] ^ iv.data[n];
+                               AES_encrypt((unsigned char *)tmp2.data, (unsigned char *)tmp2.data, key);
+                               for(n=0 ; n < N_WORDS; ++n)
+                                       tmp2.data[n] ^= iv2.data[n];
+                               store_block(out, tmp2);
+                               iv = tmp2;
+                               iv2 = tmp;
+                               --len;
+                               in += AES_BLOCK_SIZE;
+                               out += AES_BLOCK_SIZE;
+                               }
+                       memcpy(ivec, iv.data, AES_BLOCK_SIZE);
+                       memcpy(ivec + AES_BLOCK_SIZE, iv2.data, AES_BLOCK_SIZE);
                        }
-               memcpy(ivec, iv.data, AES_BLOCK_SIZE);
-               memcpy(ivec + AES_BLOCK_SIZE, iv2.data, AES_BLOCK_SIZE);
                }
        else
                {
-               while (len)
+               if(in != out)
                        {
-                        load_block(tmp, in);
-                        tmp2 = tmp;
-                       /*                      hexdump(stdout, "in", in, AES_BLOCK_SIZE); */
-                       /*                      hexdump(stdout, "iv2", iv2, AES_BLOCK_SIZE); */
-                       for(n=0 ; n < N_WORDS; ++n)
-                                tmp.data[n] ^= iv2.data[n];
-                       /*                      hexdump(stdout, "in ^ iv2", tmp, AES_BLOCK_SIZE); */
-                       AES_decrypt((unsigned char *)tmp.data, (unsigned char *)tmp.data, key);
-                       /*                      hexdump(stdout, "dec", out, AES_BLOCK_SIZE); */
-                       /*                      hexdump(stdout, "iv", iv, AES_BLOCK_SIZE); */
-                       for(n=0 ; n < N_WORDS; ++n)
-                               tmp.data[n] ^= iv.data[n];
-                        store_block(out, tmp);
-                       /*                      hexdump(stdout, "out", out, AES_BLOCK_SIZE); */
-                        iv = tmp2;
-                        iv2 = tmp;
-                       --len;
-                       in += AES_BLOCK_SIZE;
-                       out += AES_BLOCK_SIZE;
+                       aes_block_t *ivp = (aes_block_t *)ivec;
+                       aes_block_t *iv2p = (aes_block_t *)(ivec + AES_BLOCK_SIZE);
+
+                       while (len)
+                               {
+                               aes_block_t tmp;
+                               aes_block_t *inp = (aes_block_t *)in;
+                               aes_block_t *outp = (aes_block_t *)out;
+
+                               for(n=0 ; n < N_WORDS; ++n)
+                                       tmp.data[n] = inp->data[n] ^ iv2p->data[n];
+                               AES_decrypt((unsigned char *)tmp.data, (unsigned char *)outp->data, key);
+                               for(n=0 ; n < N_WORDS; ++n)
+                                       outp->data[n] ^= ivp->data[n];
+                               ivp = inp;
+                               iv2p = outp;
+                               --len;
+                               in += AES_BLOCK_SIZE;
+                               out += AES_BLOCK_SIZE;
+                               }
+                       memcpy(ivec, ivp->data, AES_BLOCK_SIZE);
+                       memcpy(ivec + AES_BLOCK_SIZE, iv2p->data, AES_BLOCK_SIZE);
+                       }
+               else
+                       {
+                       aes_block_t tmp, tmp2;
+                       aes_block_t iv;
+                       aes_block_t iv2;
+
+                       load_block(iv, ivec);
+                       load_block(iv2, ivec + AES_BLOCK_SIZE);
+
+                       while (len)
+                               {
+                               load_block(tmp, in);
+                               tmp2 = tmp;
+                               for(n=0 ; n < N_WORDS; ++n)
+                                       tmp.data[n] ^= iv2.data[n];
+                               AES_decrypt((unsigned char *)tmp.data, (unsigned char *)tmp.data, key);
+                               for(n=0 ; n < N_WORDS; ++n)
+                                       tmp.data[n] ^= iv.data[n];
+                               store_block(out, tmp);
+                               iv = tmp2;
+                               iv2 = tmp;
+                               --len;
+                               in += AES_BLOCK_SIZE;
+                               out += AES_BLOCK_SIZE;
+                               }
+                       memcpy(ivec, iv.data, AES_BLOCK_SIZE);
+                       memcpy(ivec + AES_BLOCK_SIZE, iv2.data, AES_BLOCK_SIZE);
                        }
-                memcpy(ivec, iv.data, AES_BLOCK_SIZE);
-               memcpy(ivec + AES_BLOCK_SIZE, iv2.data, AES_BLOCK_SIZE);
                }
        }
 
@@ -200,17 +234,11 @@ void AES_bi_ige_encrypt(const unsigned char *in, unsigned char *out,
                iv2 = ivec + AES_BLOCK_SIZE;
                while (len >= AES_BLOCK_SIZE)
                        {
-                       /*                      hexdump(stdout, "in", in, AES_BLOCK_SIZE); */
-                       /*                      hexdump(stdout, "iv", iv, AES_BLOCK_SIZE); */
                        for(n=0 ; n < AES_BLOCK_SIZE ; ++n)
                                out[n] = in[n] ^ iv[n];
-                       /*                      hexdump(stdout, "in ^ iv", out, AES_BLOCK_SIZE); */
                        AES_encrypt(out, out, key);
-                       /*                      hexdump(stdout,"enc", out, AES_BLOCK_SIZE); */
-                       /*                      hexdump(stdout,"iv2", iv2, AES_BLOCK_SIZE); */
                        for(n=0 ; n < AES_BLOCK_SIZE ; ++n)
                                out[n] ^= iv2[n];
-                       /*                      hexdump(stdout,"out", out, AES_BLOCK_SIZE); */
                        iv = out;
                        memcpy(prev, in, AES_BLOCK_SIZE);
                        iv2 = prev;
@@ -226,8 +254,6 @@ void AES_bi_ige_encrypt(const unsigned char *in, unsigned char *out,
                while(len >= AES_BLOCK_SIZE)
                        {
                        out -= AES_BLOCK_SIZE;
-                       /*                      hexdump(stdout, "intermediate", out, AES_BLOCK_SIZE); */
-                       /*                      hexdump(stdout, "iv", iv, AES_BLOCK_SIZE); */
                        /* XXX: reduce copies by alternating between buffers */
                        memcpy(tmp, out, AES_BLOCK_SIZE);
                        for(n=0 ; n < AES_BLOCK_SIZE ; ++n)
@@ -258,17 +284,11 @@ void AES_bi_ige_encrypt(const unsigned char *in, unsigned char *out,
                        out -= AES_BLOCK_SIZE;
                        memcpy(tmp, in, AES_BLOCK_SIZE);
                        memcpy(tmp2, in, AES_BLOCK_SIZE);
-                       /*                      hexdump(stdout, "in", in, AES_BLOCK_SIZE); */
-                       /*                      hexdump(stdout, "iv2", iv2, AES_BLOCK_SIZE); */
                        for(n=0 ; n < AES_BLOCK_SIZE ; ++n)
                                tmp[n] ^= iv2[n];
-                       /*                      hexdump(stdout, "in ^ iv2", tmp, AES_BLOCK_SIZE); */
                        AES_decrypt(tmp, out, key);
-                       /*                      hexdump(stdout, "dec", out, AES_BLOCK_SIZE); */
-                       /*                      hexdump(stdout, "iv", iv, AES_BLOCK_SIZE); */
                        for(n=0 ; n < AES_BLOCK_SIZE ; ++n)
                                out[n] ^= iv[n];
-                       /*                      hexdump(stdout, "out", out, AES_BLOCK_SIZE); */
                        memcpy(tmp3, tmp2, AES_BLOCK_SIZE);
                        iv = tmp3;
                        iv2 = out;
@@ -283,17 +303,11 @@ void AES_bi_ige_encrypt(const unsigned char *in, unsigned char *out,
                        {
                        memcpy(tmp, out, AES_BLOCK_SIZE);
                        memcpy(tmp2, out, AES_BLOCK_SIZE);
-                       /*                      hexdump(stdout, "intermediate", out, AES_BLOCK_SIZE); */
-                       /*                      hexdump(stdout, "iv2", iv2, AES_BLOCK_SIZE); */
                        for(n=0 ; n < AES_BLOCK_SIZE ; ++n)
                                tmp[n] ^= iv2[n];
-                       /*                      hexdump(stdout, "out ^ iv2", tmp, AES_BLOCK_SIZE); */
                        AES_decrypt(tmp, out, key);
-                       /*                      hexdump(stdout, "dec", out, AES_BLOCK_SIZE); */
-                       /*                      hexdump(stdout, "iv", ivec, AES_BLOCK_SIZE); */
                        for(n=0 ; n < AES_BLOCK_SIZE ; ++n)
                                out[n] ^= iv[n];
-                       /*                      hexdump(stdout, "out", out, AES_BLOCK_SIZE); */
                        memcpy(tmp3, tmp2, AES_BLOCK_SIZE);
                        iv = tmp3;
                        iv2 = out;
@@ -301,6 +315,5 @@ void AES_bi_ige_encrypt(const unsigned char *in, unsigned char *out,
                        in += AES_BLOCK_SIZE;
                        out += AES_BLOCK_SIZE;
                        }
-
                }
        }