1 // Copyright (c) 2003-2006 Hewlett-Packard Development Company, L.P.
2 // Permission is hereby granted, free of charge, to any person
3 // obtaining a copy of this software and associated documentation
4 // files (the "Software"), to deal in the Software without
5 // restriction, including without limitation the rights to use,
6 // copy, modify, merge, publish, distribute, sublicense, and/or sell
7 // copies of the Software, and to permit persons to whom the
8 // Software is furnished to do so, subject to the following
11 // The above copyright notice and this permission notice shall be
12 // included in all copies or substantial portions of the Software.
14 // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
15 // EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
16 // OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
17 // NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
18 // HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
19 // WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 // FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
21 // OTHER DEALINGS IN THE SOFTWARE.
58 VALID_PRIUNAT = 1 << 8
64 VALID_BRS = 0x1f << 20
65 VALID_BASIC4 = VALID_IP | VALID_SP | VALID_BSP | VALID_CFM
66 VALID_SPEC = VALID_PREDS | VALID_PRIUNAT | VALID_RNAT | VALID_UNAT | VALID_FPSR | VALID_LC
67 VALID_REGS = VALID_BASIC4 | VALID_SPEC | VALID_GRS | VALID_BRS
69 // valid_regs and valid_frs are separate unsigned int fields.
70 // In order to store them with a single st8, we need to know
72 #ifdef __LITTLE_ENDIAN__
73 VALID_BITS = (VALID_FRS << 32) | VALID_REGS
75 VALID_BITS = (VALID_REGS << 32) | VALID_FRS
80 // int uwx_self_init_context(struct uwx_env *env);
82 // Stores a snapshot of the caller's context in the uwx_env structure.
84 .proc uwx_self_init_context
85 .global uwx_self_init_context
86 uwx_self_init_context:
88 alloc rPFS = ar.pfs, 1, 0, 0, 0
91 SWIZZLE rENV0 = r0, r32 // rENV0 = &env
94 extr.u rNSLOT = rPFS, 7, 7 // nslots = pfs.sol
98 add rENV1 = 136, rENV0 // rENV1 = &env->context.gr[0]
99 add rENV2 = 144, rENV0 // rENV2 = &env->context.gr[1]
101 and rRSC0 = -4, rRSC // clear ar.rsc.mode
102 adds rNATP = 0x1f8, r0
105 st8.spill [rENV1] = r4, 16 // env+136: r4
106 st8.spill [rENV2] = r5, 16 // env+144: r5
109 st8.spill [rENV1] = r6, 16 // env+152: r6
110 st8.spill [rENV2] = r7, 16 // env+160: r7
113 st8 [rENV1] = rTMP1, 16 // env+168: b1
114 st8 [rENV2] = rTMP2, 16 // env+176: b2
117 st8 [rENV1] = rTMP3, 16 // env+184: b3
118 st8 [rENV2] = rTMP1, 16 // env+192: b4
121 st8 [rENV1] = rTMP2 // env+200: b5
122 mov ar.rsc = rRSC0 // enforced lazy mode
125 mov rRNAT = ar.rnat // get copy of ar.rnat
126 movl rTMP1 = VALID_BITS // valid_regs: ip, sp, bsp, cfm,
127 // preds, priunat, rnat, unat, fpsr,
129 // = 0x1ff3d8f00000000
131 mov ar.rsc = rRSC // restore ar.rsc
133 add rTMP3 = 136, rENV0 // spill_loc = &env->context.gr[0]
137 extr.u rTMP3 = rTMP3, 3, 6 // bitpos = spill_loc{8:3}
139 and rBIAS = rBSP, rNATP // bias = (bsp & 0x1f8) ...
140 sub rTMP4 = 64, rTMP3 // (64 - bitpos)
141 shr rTMP5 = rTMP2, rTMP3 // (unat >> bitpos)
144 extr.u rBIAS = rBIAS, 3, 6 // ... div 8
145 shl rTMP2 = rTMP2, rTMP4 // (unat << (64 - bitpos))
147 or rTMP2 = rTMP2, rTMP5 // rotate_right(unat, bitpos)
151 st8 [rENV0] = rTMP1, 16 // env+0: valid_regs mask
152 st8 [rENV1] = rRP, 24 // env+8: ip (my rp)
153 sub rBIAS = rNSLOT, rBIAS // bias = nslots - bias
155 cmp.lt p6, p0 = 0, rBIAS // if (0 < bias) ...
156 cmp.lt p7, p0 = 63, rBIAS // if (63 < bias) ...
158 st8 [rENV0] = r12, 48 // env+16: sp
159 st8 [rENV1] = rPFS, 40 // env+32: cfm (my pfs)
160 (p6) add rNSLOT = 1, rNSLOT // ... nslots++
162 st8 [rENV0] = rTMP4, 24 // env+64: preds
163 st8 [rENV1] = rTMP2, 24 // env+72: priunat
164 (p7) add rNSLOT = 1, rNSLOT // ... nslots++
166 st8 [rENV0] = rRNAT, -64 // env+88: ar.rnat
167 st8 [rENV1] = rUNAT, 8 // env+96: ar.unat
168 dep.z rTMP3 = rNSLOT, 3, 7 // (nslots << 3)
170 sub rPBSP = rBSP, rTMP3 // prev_bsp = bsp - (nslots << 3)
174 st8 [rENV0] = rPBSP, 184 // env+24: bsp (my prev bsp)
175 st8 [rENV1] = rTMP3, 8 // env+104: ar.fpsr
176 add rENV2 = 320, rENV2 // rENV2 = &env->context.rstate
178 st8 [rENV1] = rTMP1, 112 // env+112: ar.lc
179 STPTR [rENV2] = r0 // env+528: env->rstate = 0
182 // THIS CODE NEEDS TO BE SCHEDULED!!!
183 stf.spill [rENV0] = f2, 32 // env+208: f2
184 stf.spill [rENV1] = f3, 32 // env+224: f3
186 stf.spill [rENV0] = f4, 32 // env+240: f4
187 stf.spill [rENV1] = f5, 32 // env+256: f5
189 stf.spill [rENV0] = f16, 32 // env+272: f16
190 stf.spill [rENV1] = f17, 32 // env+288: f17
192 stf.spill [rENV0] = f18, 32 // env+304: f16
193 stf.spill [rENV1] = f19, 32 // env+320: f17
195 stf.spill [rENV0] = f20, 32 // env+336: f16
196 stf.spill [rENV1] = f21, 32 // env+352: f17
198 stf.spill [rENV0] = f22, 32 // env+368: f16
199 stf.spill [rENV1] = f23, 32 // env+384: f17
201 stf.spill [rENV0] = f24, 32 // env+400: f16
202 stf.spill [rENV1] = f25, 32 // env+416: f17
204 stf.spill [rENV0] = f26, 32 // env+432: f16
205 stf.spill [rENV1] = f27, 32 // env+448: f17
207 stf.spill [rENV0] = f28, 32 // env+464: f16
208 stf.spill [rENV1] = f29, 32 // env+480: f17
210 stf.spill [rENV0] = f30, 32 // env+496: f16
211 stf.spill [rENV1] = f31, 32 // env+512: f17
214 mov ret0 = r0 // return UWX_OK
218 // uwx_self_install_context(
219 // struct uwx_env *env,
227 // Installs the given context, and sets the landing pad binding
228 // registers r15-r18 to the values given.
229 // Returns the value "ret" to the new context (for testing --
230 // when transferring to a landing pad, the new context won't
231 // care about the return value).
233 .proc uwx_self_install_context
234 .global uwx_self_install_context
235 uwx_self_install_context:
237 alloc rMYPFS = ar.pfs, 6, 0, 0, 0
239 SWIZZLE rENV0 = r0, r32 // rENV0 = &env
242 // THIS CODE NEEDS TO BE SCHEDULED!!!
244 // Restore GR 4-7 and ar.unat
245 add rENV1 = 136, rENV0 // &env->context.gr[0]
246 add rENV2 = 72, rENV0 // &env->context.priunat
248 ld8 rTMP2 = [rENV2], 24 // env+72: priunat
249 extr.u rTMP3 = rENV1, 3, 6 // bitpos = spill_loc{8:3}
251 ld8 rUNAT = [rENV2], 48 // env+96: ar.unat
252 sub rTMP4 = 64, rTMP3 // (64 - bitpos)
253 shl rTMP5 = rTMP2, rTMP3 // (unat << bitpos)
255 shr rTMP2 = rTMP2, rTMP4 // (unat >> (64 - bitpos))
257 or rTMP2 = rTMP2, rTMP5 // rotate_left(unat, bitpos)
259 mov ar.unat = rTMP2 // put priunat in place
261 ld8.fill r4 = [rENV1], 16 // env+136: r4
262 ld8.fill r5 = [rENV2], 16 // env+144: r5
264 ld8.fill r6 = [rENV1], 16 // env+152: r6
265 ld8.fill r7 = [rENV2], 16 // env+160: r7
267 mov ar.unat = rUNAT // restore real ar.unat
270 ld8 rTMP1 = [rENV1], 16 // env+168: b1
271 ld8 rTMP2 = [rENV2], 16 // env+176: b2
273 ld8 rTMP3 = [rENV1], 16 // env+184: b3
274 ld8 rTMP4 = [rENV2], -168 // env+192: b4
277 ld8 rTMP1 = [rENV1], -168 // env+200: b5
284 // Restore ar.bsp, ar.pfs, and ar.rnat
285 ld8 rPFS = [rENV1], 56 // env+32: cfm (+saved ar.ec)
287 adds rBIAS = 0x1f8, r0
290 ld8 rRNAT = [rENV1], -24 // env+88: ar.rnat
291 ld8 rPBSP = [rENV2], 88 // env+24: prev_bsp
292 and rRSC0 = -4, rRSC // clear ar.rsc.mode
294 mov ar.rsc = rRSC0 // enforced lazy mode
295 extr.u rNSLOT = rPFS, 7, 7 // nslots = pfs.sol
298 and rBIAS = rPBSP, rBIAS // bias = prev_bsp & 0x1f8 ...
300 extr.u rBIAS = rBIAS, 3, 6 // ... div 8
302 add rBIAS = rNSLOT, rBIAS // bias += nslots
304 cmp.lt p6, p0 = 63, rBIAS // if (63 < bias) ...
305 cmp.lt p7, p0 = 126, rBIAS // if (126 < bias) ...
307 (p6) add rNSLOT = 1, rNSLOT // ... nslots++
309 (p7) add rNSLOT = 1, rNSLOT // ... nslots++
311 dep.z rTMP3 = rNSLOT, 3, 7 // (nslots << 3)
313 add rBSP = rPBSP, rTMP3 // bsp = prev_bsp + (nslots << 3)
315 mov ar.bspstore = rBSP // restore ar.bsp
317 mov ar.rnat = rRNAT // restore ar.rnat
318 mov ar.pfs = rPFS // restore ar.pfs
320 mov ar.rsc = rRSC // restore ar.rsc
322 // Restore preds and ar.lc
323 ld8 rTMP1 = [rENV1], -56 // env+64: preds
324 ld8 rTMP2 = [rENV2], -96 // env+112: ar.lc
329 // Get previous sp and ip
330 ld8 rRP = [rENV1], 96 // env+8: ip (my rp)
331 ld8 rPSP = [rENV2], 112 // env+16: sp
334 // Restore ar.fpsr and gp
335 ld8 rTMP1 = [rENV1], 104 // env+104: ar.fpsr
336 ld8 r1 = [rENV2], 96 // env+128: gp
338 mov ar.fpsr = rTMP1 // restore ar.fpsr
340 // Restore FR 2-5 and 16-31
341 ldf.fill f2 = [rENV1], 32 // env+208: f2
342 ldf.fill f3 = [rENV2], 32 // env+224: f3
344 ldf.fill f4 = [rENV1], 32 // env+240: f4
345 ldf.fill f5 = [rENV2], 32 // env+256: f5
347 ldf.fill f16 = [rENV1], 32 // env+272: f16
348 ldf.fill f17 = [rENV2], 32 // env+288: f17
350 ldf.fill f18 = [rENV1], 32 // env+304: f16
351 ldf.fill f19 = [rENV2], 32 // env+320: f17
353 ldf.fill f20 = [rENV1], 32 // env+336: f16
354 ldf.fill f21 = [rENV2], 32 // env+352: f17
356 ldf.fill f22 = [rENV1], 32 // env+368: f16
357 ldf.fill f23 = [rENV2], 32 // env+384: f17
359 ldf.fill f24 = [rENV1], 32 // env+400: f16
360 ldf.fill f25 = [rENV2], 32 // env+416: f17
362 ldf.fill f26 = [rENV1], 32 // env+432: f16
363 ldf.fill f27 = [rENV2], 32 // env+448: f17
365 ldf.fill f28 = [rENV1], 32 // env+464: f16
366 ldf.fill f29 = [rENV2], 32 // env+480: f17
368 ldf.fill f30 = [rENV1], 32 // env+496: f16
369 ldf.fill f31 = [rENV2], 32 // env+512: f17
371 // Set landing pad parameter registers
377 // Restore previous sp and Return