+#ifdef BSWAP4
+ blocks[i].d[0] = BSWAP4(ctx->A[i]); ctx->A[i] = key->tail.h[0];
+ blocks[i].d[1] = BSWAP4(ctx->B[i]); ctx->B[i] = key->tail.h[1];
+ blocks[i].d[2] = BSWAP4(ctx->C[i]); ctx->C[i] = key->tail.h[2];
+ blocks[i].d[3] = BSWAP4(ctx->D[i]); ctx->D[i] = key->tail.h[3];
+ blocks[i].d[4] = BSWAP4(ctx->E[i]); ctx->E[i] = key->tail.h[4];
+ blocks[i].d[5] = BSWAP4(ctx->F[i]); ctx->F[i] = key->tail.h[5];
+ blocks[i].d[6] = BSWAP4(ctx->G[i]); ctx->G[i] = key->tail.h[6];
+ blocks[i].d[7] = BSWAP4(ctx->H[i]); ctx->H[i] = key->tail.h[7];
+ blocks[i].c[32] = 0x80;
+ blocks[i].d[15] = BSWAP4((64+32)*8);
+#else