?lvsl $outperm,0,$out # prepare for unaligned store
?vperm $outmask,$outmask,$T0,$outperm
+ be?lvsl $T0,0,@x[0] # 0x00..0f
be?vspltisb $T1,3 # 0x03..03
- be?vxor $inpperm,$inpperm,$T1 # swap bytes within words
+ be?vxor $T0,$T0,$T1 # swap bytes within words
be?vxor $outperm,$outperm,$T1
+ be?vperm $inpperm,$inpperm,$inpperm,$T0
b Loop_outer_vmx