Complete synchronization of aes-x86_64 with aes-586.
[openssl.git] / crypto / ia64cpuid.S
1 // Works on all IA-64 platforms: Linux, HP-UX, Win64i...
2 // On Win64i compile with ias.exe.
3 .text
4
5 .global OPENSSL_cpuid_setup#
6 .proc   OPENSSL_cpuid_setup#
7 OPENSSL_cpuid_setup:
8 { .mib; br.ret.sptk.many        b0              };;
9 .endp   OPENSSL_cpuid_setup#
10
11 .global OPENSSL_rdtsc#
12 .proc   OPENSSL_rdtsc#
13 OPENSSL_rdtsc:
14 { .mib; mov                     r8=ar.itc
15         br.ret.sptk.many        b0              };;
16 .endp   OPENSSL_rdtsc#
17
18 .global OPENSSL_atomic_add#
19 .proc   OPENSSL_atomic_add#
20 .align  32
21 OPENSSL_atomic_add:
22 { .mii; ld4             r2=[r32]
23         nop.i           0
24         nop.i           0               };;
25 .Lspin:
26 { .mii; mov             ar.ccv=r2
27         add             r8=r2,r33
28         mov             r3=r2           };;
29 { .mmi; mf
30         cmpxchg4.acq    r2=[r32],r8,ar.ccv
31         nop.i           0               };;
32 { .mib; cmp.ne          p6,p0=r2,r3
33         nop.i           0
34 (p6)    br.dpnt         .Lspin          };;
35 { .mib; nop.m           0
36         sxt4            r8=r8
37         br.ret.sptk.many        b0      };;
38 .endp   OPENSSL_atomic_add#
39
40 // Returns a structure comprising pointer to the top of stack of
41 // the caller and pointer beyond backing storage for the current
42 // register frame. The latter is required, because it might be
43 // insufficient to wipe backing storage for the current frame
44 // (as this procedure does), one might have to go further, toward
45 // higher addresses to reach for whole "retroactively" saved
46 // context...
47 .global OPENSSL_wipe_cpu#
48 .proc   OPENSSL_wipe_cpu#
49 .align  32
50 OPENSSL_wipe_cpu:
51         .prologue
52         .fframe 0
53         .save   ar.pfs,r2
54         .save   ar.lc,r3
55 { .mib; alloc           r2=ar.pfs,0,96,0,96
56         mov             r3=ar.lc
57         brp.loop.imp    .L_wipe_top,.L_wipe_end-16
58                                         };;
59 { .mii; mov             r9=ar.bsp
60         mov             r8=pr
61         mov             ar.lc=96        };;
62         .body
63 { .mii; add             r9=96*8-8,r9
64         mov             ar.ec=1         };;
65
66 // One can sweep double as fast, but then we can't quarantee
67 // that backing storage is wiped...
68 .L_wipe_top:
69 { .mfi; st8             [r9]=r0,-8
70         mov             f127=f0
71         mov             r127=r0         }
72 { .mfb; nop.m           0
73         nop.f           0
74         br.ctop.sptk    .L_wipe_top     };;
75 .L_wipe_end:
76
77 { .mfi; mov             r11=r0
78         mov             f6=f0
79         mov             r14=r0          }
80 { .mfi; mov             r15=r0
81         mov             f7=f0
82         mov             r16=r0          }
83 { .mfi; mov             r17=r0
84         mov             f8=f0
85         mov             r18=r0          }
86 { .mfi; mov             r19=r0
87         mov             f9=f0
88         mov             r20=r0          }
89 { .mfi; mov             r21=r0
90         mov             f10=f0
91         mov             r22=r0          }
92 { .mfi; mov             r23=r0
93         mov             f11=f0
94         mov             r24=r0          }
95 { .mfi; mov             r25=r0
96         mov             f12=f0
97         mov             r26=r0          }
98 { .mfi; mov             r27=r0
99         mov             f13=f0
100         mov             r28=r0          }
101 { .mfi; mov             r29=r0
102         mov             f14=f0
103         mov             r30=r0          }
104 { .mfi; mov             r31=r0
105         mov             f15=f0
106         nop.i           0               }
107 { .mfi; mov             f16=f0          }
108 { .mfi; mov             f17=f0          }
109 { .mfi; mov             f18=f0          }
110 { .mfi; mov             f19=f0          }
111 { .mfi; mov             f20=f0          }
112 { .mfi; mov             f21=f0          }
113 { .mfi; mov             f22=f0          }
114 { .mfi; mov             f23=f0          }
115 { .mfi; mov             f24=f0          }
116 { .mfi; mov             f25=f0          }
117 { .mfi; mov             f26=f0          }
118 { .mfi; mov             f27=f0          }
119 { .mfi; mov             f28=f0          }
120 { .mfi; mov             f29=f0          }
121 { .mfi; mov             f30=f0          }
122 { .mfi; add             r9=96*8+8,r9
123         mov             f31=f0
124         mov             pr=r8,0x1ffff   }
125 { .mib; mov             r8=sp
126         mov             ar.lc=r3
127         br.ret.sptk     b0              };;
128 .endp   OPENSSL_wipe_cpu#
129
130 .global OPENSSL_cleanse#
131 .proc   OPENSSL_cleanse#
132 OPENSSL_cleanse:
133 { .mib; and             r2=7,r32
134         cmp.leu         p6,p0=15,r33        // len>=15
135 (p6)    br.cond.dptk    .Lot            };;
136
137 .Little:
138 { .mib; st1             [r32]=r0,1
139         cmp.ltu         p6,p7=1,r33     }  // len>1
140 { .mbb; add             r33=-1,r33         // len--
141 (p6)    br.cond.dptk    .Little
142 (p7)    br.ret.sptk.many        b0      };;
143
144 .Lot:
145 { .mib; cmp.eq          p6,p0=0,r2
146 (p6)    br.cond.dptk    .Laligned       };;
147 { .mmi; st1             [r32]=r0,1;;
148         and             r2=7,r32        }
149 { .mib; add             r33=-1,r33
150         br              .Lot            };;
151
152 .Laligned:
153 { .mmi; st8             [r32]=r0,8
154         and             r2=-8,r33           // len&~7
155         add             r33=-8,r33      };; // len-=8
156 { .mib; cmp.ltu         p6,p0=8,r2          // ((len+8)&~7)>8
157 (p6)    br.cond.dptk    .Laligned       };;
158
159 { .mbb; cmp.eq          p6,p7=r0,r33
160 (p7)    br.cond.dpnt    .Little
161 (p6)    br.ret.sptk.many        b0      };;
162 .endp   OPENSSL_cleanse#