# ====================================================================
#
# December 2014
-#
+#
# ChaCha20 for ARMv4.
#
# Performance in cycles per byte out of large buffer.
#include "arm_arch.h"
.text
-#if defined(__thumb2__)
+#if defined(__thumb2__) || defined(__clang__)
.syntax unified
+#endif
+#if defined(__thumb2__)
.thumb
#else
.code 32
vadd.i32 $d2,$d1,$t0 @ counter+2
str @t[3], [sp,#4*(16+15)]
mov @t[3],#10
- add @x[12],@x[12],#3 @ counter+3
+ add @x[12],@x[12],#3 @ counter+3
b .Loop_neon
.align 4