1 /* Subroutines used for code generation on IA-32.
2 Copyright (C) 1988, 1992, 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001,
3 2002, 2003, 2004, 2005, 2006, 2007 Free Software Foundation, Inc.
5 This file is part of GCC.
7 GCC is free software; you can redistribute it and/or modify
8 it under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 2, or (at your option)
12 GCC is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 GNU General Public License for more details.
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING. If not, write to
19 the Free Software Foundation, 51 Franklin Street, Fifth Floor,
20 Boston, MA 02110-1301, USA. */
26 #include "coretypes.h"
32 #include "hard-reg-set.h"
34 #include "insn-config.h"
35 #include "conditions.h"
37 #include "insn-codes.h"
38 #include "insn-attr.h"
46 #include "basic-block.h"
49 #include "target-def.h"
50 #include "langhooks.h"
52 #include "tree-gimple.h"
54 #include "tm-constrs.h"
56 #ifndef CHECK_STACK_LIMIT
57 #define CHECK_STACK_LIMIT (-1)
60 /* Return index of given mode in mult and division cost tables. */
61 #define MODE_INDEX(mode) \
62 ((mode) == QImode ? 0 \
63 : (mode) == HImode ? 1 \
64 : (mode) == SImode ? 2 \
65 : (mode) == DImode ? 3 \
68 /* Processor costs (relative to an add) */
69 /* We assume COSTS_N_INSNS is defined as (N)*4 and an addition is 2 bytes. */
70 #define COSTS_N_BYTES(N) ((N) * 2)
73 struct processor_costs size_cost = { /* costs for tuning for size */
74 COSTS_N_BYTES (2), /* cost of an add instruction */
75 COSTS_N_BYTES (3), /* cost of a lea instruction */
76 COSTS_N_BYTES (2), /* variable shift costs */
77 COSTS_N_BYTES (3), /* constant shift costs */
78 {COSTS_N_BYTES (3), /* cost of starting multiply for QI */
79 COSTS_N_BYTES (3), /* HI */
80 COSTS_N_BYTES (3), /* SI */
81 COSTS_N_BYTES (3), /* DI */
82 COSTS_N_BYTES (5)}, /* other */
83 0, /* cost of multiply per each bit set */
84 {COSTS_N_BYTES (3), /* cost of a divide/mod for QI */
85 COSTS_N_BYTES (3), /* HI */
86 COSTS_N_BYTES (3), /* SI */
87 COSTS_N_BYTES (3), /* DI */
88 COSTS_N_BYTES (5)}, /* other */
89 COSTS_N_BYTES (3), /* cost of movsx */
90 COSTS_N_BYTES (3), /* cost of movzx */
93 2, /* cost for loading QImode using movzbl */
94 {2, 2, 2}, /* cost of loading integer registers
95 in QImode, HImode and SImode.
96 Relative to reg-reg move (2). */
97 {2, 2, 2}, /* cost of storing integer registers */
98 2, /* cost of reg,reg fld/fst */
99 {2, 2, 2}, /* cost of loading fp registers
100 in SFmode, DFmode and XFmode */
101 {2, 2, 2}, /* cost of storing fp registers
102 in SFmode, DFmode and XFmode */
103 3, /* cost of moving MMX register */
104 {3, 3}, /* cost of loading MMX registers
105 in SImode and DImode */
106 {3, 3}, /* cost of storing MMX registers
107 in SImode and DImode */
108 3, /* cost of moving SSE register */
109 {3, 3, 3}, /* cost of loading SSE registers
110 in SImode, DImode and TImode */
111 {3, 3, 3}, /* cost of storing SSE registers
112 in SImode, DImode and TImode */
113 3, /* MMX or SSE register to integer */
114 0, /* size of prefetch block */
115 0, /* number of parallel prefetches */
117 COSTS_N_BYTES (2), /* cost of FADD and FSUB insns. */
118 COSTS_N_BYTES (2), /* cost of FMUL instruction. */
119 COSTS_N_BYTES (2), /* cost of FDIV instruction. */
120 COSTS_N_BYTES (2), /* cost of FABS instruction. */
121 COSTS_N_BYTES (2), /* cost of FCHS instruction. */
122 COSTS_N_BYTES (2), /* cost of FSQRT instruction. */
125 /* Processor costs (relative to an add) */
127 struct processor_costs i386_cost = { /* 386 specific costs */
128 COSTS_N_INSNS (1), /* cost of an add instruction */
129 COSTS_N_INSNS (1), /* cost of a lea instruction */
130 COSTS_N_INSNS (3), /* variable shift costs */
131 COSTS_N_INSNS (2), /* constant shift costs */
132 {COSTS_N_INSNS (6), /* cost of starting multiply for QI */
133 COSTS_N_INSNS (6), /* HI */
134 COSTS_N_INSNS (6), /* SI */
135 COSTS_N_INSNS (6), /* DI */
136 COSTS_N_INSNS (6)}, /* other */
137 COSTS_N_INSNS (1), /* cost of multiply per each bit set */
138 {COSTS_N_INSNS (23), /* cost of a divide/mod for QI */
139 COSTS_N_INSNS (23), /* HI */
140 COSTS_N_INSNS (23), /* SI */
141 COSTS_N_INSNS (23), /* DI */
142 COSTS_N_INSNS (23)}, /* other */
143 COSTS_N_INSNS (3), /* cost of movsx */
144 COSTS_N_INSNS (2), /* cost of movzx */
145 15, /* "large" insn */
147 4, /* cost for loading QImode using movzbl */
148 {2, 4, 2}, /* cost of loading integer registers
149 in QImode, HImode and SImode.
150 Relative to reg-reg move (2). */
151 {2, 4, 2}, /* cost of storing integer registers */
152 2, /* cost of reg,reg fld/fst */
153 {8, 8, 8}, /* cost of loading fp registers
154 in SFmode, DFmode and XFmode */
155 {8, 8, 8}, /* cost of storing fp registers
156 in SFmode, DFmode and XFmode */
157 2, /* cost of moving MMX register */
158 {4, 8}, /* cost of loading MMX registers
159 in SImode and DImode */
160 {4, 8}, /* cost of storing MMX registers
161 in SImode and DImode */
162 2, /* cost of moving SSE register */
163 {4, 8, 16}, /* cost of loading SSE registers
164 in SImode, DImode and TImode */
165 {4, 8, 16}, /* cost of storing SSE registers
166 in SImode, DImode and TImode */
167 3, /* MMX or SSE register to integer */
168 0, /* size of prefetch block */
169 0, /* number of parallel prefetches */
171 COSTS_N_INSNS (23), /* cost of FADD and FSUB insns. */
172 COSTS_N_INSNS (27), /* cost of FMUL instruction. */
173 COSTS_N_INSNS (88), /* cost of FDIV instruction. */
174 COSTS_N_INSNS (22), /* cost of FABS instruction. */
175 COSTS_N_INSNS (24), /* cost of FCHS instruction. */
176 COSTS_N_INSNS (122), /* cost of FSQRT instruction. */
180 struct processor_costs i486_cost = { /* 486 specific costs */
181 COSTS_N_INSNS (1), /* cost of an add instruction */
182 COSTS_N_INSNS (1), /* cost of a lea instruction */
183 COSTS_N_INSNS (3), /* variable shift costs */
184 COSTS_N_INSNS (2), /* constant shift costs */
185 {COSTS_N_INSNS (12), /* cost of starting multiply for QI */
186 COSTS_N_INSNS (12), /* HI */
187 COSTS_N_INSNS (12), /* SI */
188 COSTS_N_INSNS (12), /* DI */
189 COSTS_N_INSNS (12)}, /* other */
190 1, /* cost of multiply per each bit set */
191 {COSTS_N_INSNS (40), /* cost of a divide/mod for QI */
192 COSTS_N_INSNS (40), /* HI */
193 COSTS_N_INSNS (40), /* SI */
194 COSTS_N_INSNS (40), /* DI */
195 COSTS_N_INSNS (40)}, /* other */
196 COSTS_N_INSNS (3), /* cost of movsx */
197 COSTS_N_INSNS (2), /* cost of movzx */
198 15, /* "large" insn */
200 4, /* cost for loading QImode using movzbl */
201 {2, 4, 2}, /* cost of loading integer registers
202 in QImode, HImode and SImode.
203 Relative to reg-reg move (2). */
204 {2, 4, 2}, /* cost of storing integer registers */
205 2, /* cost of reg,reg fld/fst */
206 {8, 8, 8}, /* cost of loading fp registers
207 in SFmode, DFmode and XFmode */
208 {8, 8, 8}, /* cost of storing fp registers
209 in SFmode, DFmode and XFmode */
210 2, /* cost of moving MMX register */
211 {4, 8}, /* cost of loading MMX registers
212 in SImode and DImode */
213 {4, 8}, /* cost of storing MMX registers
214 in SImode and DImode */
215 2, /* cost of moving SSE register */
216 {4, 8, 16}, /* cost of loading SSE registers
217 in SImode, DImode and TImode */
218 {4, 8, 16}, /* cost of storing SSE registers
219 in SImode, DImode and TImode */
220 3, /* MMX or SSE register to integer */
221 0, /* size of prefetch block */
222 0, /* number of parallel prefetches */
224 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
225 COSTS_N_INSNS (16), /* cost of FMUL instruction. */
226 COSTS_N_INSNS (73), /* cost of FDIV instruction. */
227 COSTS_N_INSNS (3), /* cost of FABS instruction. */
228 COSTS_N_INSNS (3), /* cost of FCHS instruction. */
229 COSTS_N_INSNS (83), /* cost of FSQRT instruction. */
233 struct processor_costs pentium_cost = {
234 COSTS_N_INSNS (1), /* cost of an add instruction */
235 COSTS_N_INSNS (1), /* cost of a lea instruction */
236 COSTS_N_INSNS (4), /* variable shift costs */
237 COSTS_N_INSNS (1), /* constant shift costs */
238 {COSTS_N_INSNS (11), /* cost of starting multiply for QI */
239 COSTS_N_INSNS (11), /* HI */
240 COSTS_N_INSNS (11), /* SI */
241 COSTS_N_INSNS (11), /* DI */
242 COSTS_N_INSNS (11)}, /* other */
243 0, /* cost of multiply per each bit set */
244 {COSTS_N_INSNS (25), /* cost of a divide/mod for QI */
245 COSTS_N_INSNS (25), /* HI */
246 COSTS_N_INSNS (25), /* SI */
247 COSTS_N_INSNS (25), /* DI */
248 COSTS_N_INSNS (25)}, /* other */
249 COSTS_N_INSNS (3), /* cost of movsx */
250 COSTS_N_INSNS (2), /* cost of movzx */
251 8, /* "large" insn */
253 6, /* cost for loading QImode using movzbl */
254 {2, 4, 2}, /* cost of loading integer registers
255 in QImode, HImode and SImode.
256 Relative to reg-reg move (2). */
257 {2, 4, 2}, /* cost of storing integer registers */
258 2, /* cost of reg,reg fld/fst */
259 {2, 2, 6}, /* cost of loading fp registers
260 in SFmode, DFmode and XFmode */
261 {4, 4, 6}, /* cost of storing fp registers
262 in SFmode, DFmode and XFmode */
263 8, /* cost of moving MMX register */
264 {8, 8}, /* cost of loading MMX registers
265 in SImode and DImode */
266 {8, 8}, /* cost of storing MMX registers
267 in SImode and DImode */
268 2, /* cost of moving SSE register */
269 {4, 8, 16}, /* cost of loading SSE registers
270 in SImode, DImode and TImode */
271 {4, 8, 16}, /* cost of storing SSE registers
272 in SImode, DImode and TImode */
273 3, /* MMX or SSE register to integer */
274 0, /* size of prefetch block */
275 0, /* number of parallel prefetches */
277 COSTS_N_INSNS (3), /* cost of FADD and FSUB insns. */
278 COSTS_N_INSNS (3), /* cost of FMUL instruction. */
279 COSTS_N_INSNS (39), /* cost of FDIV instruction. */
280 COSTS_N_INSNS (1), /* cost of FABS instruction. */
281 COSTS_N_INSNS (1), /* cost of FCHS instruction. */
282 COSTS_N_INSNS (70), /* cost of FSQRT instruction. */
286 struct processor_costs pentiumpro_cost = {
287 COSTS_N_INSNS (1), /* cost of an add instruction */
288 COSTS_N_INSNS (1), /* cost of a lea instruction */
289 COSTS_N_INSNS (1), /* variable shift costs */
290 COSTS_N_INSNS (1), /* constant shift costs */
291 {COSTS_N_INSNS (4), /* cost of starting multiply for QI */
292 COSTS_N_INSNS (4), /* HI */
293 COSTS_N_INSNS (4), /* SI */
294 COSTS_N_INSNS (4), /* DI */
295 COSTS_N_INSNS (4)}, /* other */
296 0, /* cost of multiply per each bit set */
297 {COSTS_N_INSNS (17), /* cost of a divide/mod for QI */
298 COSTS_N_INSNS (17), /* HI */
299 COSTS_N_INSNS (17), /* SI */
300 COSTS_N_INSNS (17), /* DI */
301 COSTS_N_INSNS (17)}, /* other */
302 COSTS_N_INSNS (1), /* cost of movsx */
303 COSTS_N_INSNS (1), /* cost of movzx */
304 8, /* "large" insn */
306 2, /* cost for loading QImode using movzbl */
307 {4, 4, 4}, /* cost of loading integer registers
308 in QImode, HImode and SImode.
309 Relative to reg-reg move (2). */
310 {2, 2, 2}, /* cost of storing integer registers */
311 2, /* cost of reg,reg fld/fst */
312 {2, 2, 6}, /* cost of loading fp registers
313 in SFmode, DFmode and XFmode */
314 {4, 4, 6}, /* cost of storing fp registers
315 in SFmode, DFmode and XFmode */
316 2, /* cost of moving MMX register */
317 {2, 2}, /* cost of loading MMX registers
318 in SImode and DImode */
319 {2, 2}, /* cost of storing MMX registers
320 in SImode and DImode */
321 2, /* cost of moving SSE register */
322 {2, 2, 8}, /* cost of loading SSE registers
323 in SImode, DImode and TImode */
324 {2, 2, 8}, /* cost of storing SSE registers
325 in SImode, DImode and TImode */
326 3, /* MMX or SSE register to integer */
327 32, /* size of prefetch block */
328 6, /* number of parallel prefetches */
330 COSTS_N_INSNS (3), /* cost of FADD and FSUB insns. */
331 COSTS_N_INSNS (5), /* cost of FMUL instruction. */
332 COSTS_N_INSNS (56), /* cost of FDIV instruction. */
333 COSTS_N_INSNS (2), /* cost of FABS instruction. */
334 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
335 COSTS_N_INSNS (56), /* cost of FSQRT instruction. */
339 struct processor_costs k6_cost = {
340 COSTS_N_INSNS (1), /* cost of an add instruction */
341 COSTS_N_INSNS (2), /* cost of a lea instruction */
342 COSTS_N_INSNS (1), /* variable shift costs */
343 COSTS_N_INSNS (1), /* constant shift costs */
344 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
345 COSTS_N_INSNS (3), /* HI */
346 COSTS_N_INSNS (3), /* SI */
347 COSTS_N_INSNS (3), /* DI */
348 COSTS_N_INSNS (3)}, /* other */
349 0, /* cost of multiply per each bit set */
350 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
351 COSTS_N_INSNS (18), /* HI */
352 COSTS_N_INSNS (18), /* SI */
353 COSTS_N_INSNS (18), /* DI */
354 COSTS_N_INSNS (18)}, /* other */
355 COSTS_N_INSNS (2), /* cost of movsx */
356 COSTS_N_INSNS (2), /* cost of movzx */
357 8, /* "large" insn */
359 3, /* cost for loading QImode using movzbl */
360 {4, 5, 4}, /* cost of loading integer registers
361 in QImode, HImode and SImode.
362 Relative to reg-reg move (2). */
363 {2, 3, 2}, /* cost of storing integer registers */
364 4, /* cost of reg,reg fld/fst */
365 {6, 6, 6}, /* cost of loading fp registers
366 in SFmode, DFmode and XFmode */
367 {4, 4, 4}, /* cost of storing fp registers
368 in SFmode, DFmode and XFmode */
369 2, /* cost of moving MMX register */
370 {2, 2}, /* cost of loading MMX registers
371 in SImode and DImode */
372 {2, 2}, /* cost of storing MMX registers
373 in SImode and DImode */
374 2, /* cost of moving SSE register */
375 {2, 2, 8}, /* cost of loading SSE registers
376 in SImode, DImode and TImode */
377 {2, 2, 8}, /* cost of storing SSE registers
378 in SImode, DImode and TImode */
379 6, /* MMX or SSE register to integer */
380 32, /* size of prefetch block */
381 1, /* number of parallel prefetches */
383 COSTS_N_INSNS (2), /* cost of FADD and FSUB insns. */
384 COSTS_N_INSNS (2), /* cost of FMUL instruction. */
385 COSTS_N_INSNS (56), /* cost of FDIV instruction. */
386 COSTS_N_INSNS (2), /* cost of FABS instruction. */
387 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
388 COSTS_N_INSNS (56), /* cost of FSQRT instruction. */
392 struct processor_costs athlon_cost = {
393 COSTS_N_INSNS (1), /* cost of an add instruction */
394 COSTS_N_INSNS (2), /* cost of a lea instruction */
395 COSTS_N_INSNS (1), /* variable shift costs */
396 COSTS_N_INSNS (1), /* constant shift costs */
397 {COSTS_N_INSNS (5), /* cost of starting multiply for QI */
398 COSTS_N_INSNS (5), /* HI */
399 COSTS_N_INSNS (5), /* SI */
400 COSTS_N_INSNS (5), /* DI */
401 COSTS_N_INSNS (5)}, /* other */
402 0, /* cost of multiply per each bit set */
403 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
404 COSTS_N_INSNS (26), /* HI */
405 COSTS_N_INSNS (42), /* SI */
406 COSTS_N_INSNS (74), /* DI */
407 COSTS_N_INSNS (74)}, /* other */
408 COSTS_N_INSNS (1), /* cost of movsx */
409 COSTS_N_INSNS (1), /* cost of movzx */
410 8, /* "large" insn */
412 4, /* cost for loading QImode using movzbl */
413 {3, 4, 3}, /* cost of loading integer registers
414 in QImode, HImode and SImode.
415 Relative to reg-reg move (2). */
416 {3, 4, 3}, /* cost of storing integer registers */
417 4, /* cost of reg,reg fld/fst */
418 {4, 4, 12}, /* cost of loading fp registers
419 in SFmode, DFmode and XFmode */
420 {6, 6, 8}, /* cost of storing fp registers
421 in SFmode, DFmode and XFmode */
422 2, /* cost of moving MMX register */
423 {4, 4}, /* cost of loading MMX registers
424 in SImode and DImode */
425 {4, 4}, /* cost of storing MMX registers
426 in SImode and DImode */
427 2, /* cost of moving SSE register */
428 {4, 4, 6}, /* cost of loading SSE registers
429 in SImode, DImode and TImode */
430 {4, 4, 5}, /* cost of storing SSE registers
431 in SImode, DImode and TImode */
432 5, /* MMX or SSE register to integer */
433 64, /* size of prefetch block */
434 6, /* number of parallel prefetches */
436 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
437 COSTS_N_INSNS (4), /* cost of FMUL instruction. */
438 COSTS_N_INSNS (24), /* cost of FDIV instruction. */
439 COSTS_N_INSNS (2), /* cost of FABS instruction. */
440 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
441 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
445 struct processor_costs k8_cost = {
446 COSTS_N_INSNS (1), /* cost of an add instruction */
447 COSTS_N_INSNS (2), /* cost of a lea instruction */
448 COSTS_N_INSNS (1), /* variable shift costs */
449 COSTS_N_INSNS (1), /* constant shift costs */
450 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
451 COSTS_N_INSNS (4), /* HI */
452 COSTS_N_INSNS (3), /* SI */
453 COSTS_N_INSNS (4), /* DI */
454 COSTS_N_INSNS (5)}, /* other */
455 0, /* cost of multiply per each bit set */
456 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
457 COSTS_N_INSNS (26), /* HI */
458 COSTS_N_INSNS (42), /* SI */
459 COSTS_N_INSNS (74), /* DI */
460 COSTS_N_INSNS (74)}, /* other */
461 COSTS_N_INSNS (1), /* cost of movsx */
462 COSTS_N_INSNS (1), /* cost of movzx */
463 8, /* "large" insn */
465 4, /* cost for loading QImode using movzbl */
466 {3, 4, 3}, /* cost of loading integer registers
467 in QImode, HImode and SImode.
468 Relative to reg-reg move (2). */
469 {3, 4, 3}, /* cost of storing integer registers */
470 4, /* cost of reg,reg fld/fst */
471 {4, 4, 12}, /* cost of loading fp registers
472 in SFmode, DFmode and XFmode */
473 {6, 6, 8}, /* cost of storing fp registers
474 in SFmode, DFmode and XFmode */
475 2, /* cost of moving MMX register */
476 {3, 3}, /* cost of loading MMX registers
477 in SImode and DImode */
478 {4, 4}, /* cost of storing MMX registers
479 in SImode and DImode */
480 2, /* cost of moving SSE register */
481 {4, 3, 6}, /* cost of loading SSE registers
482 in SImode, DImode and TImode */
483 {4, 4, 5}, /* cost of storing SSE registers
484 in SImode, DImode and TImode */
485 5, /* MMX or SSE register to integer */
486 64, /* size of prefetch block */
487 6, /* number of parallel prefetches */
489 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
490 COSTS_N_INSNS (4), /* cost of FMUL instruction. */
491 COSTS_N_INSNS (19), /* cost of FDIV instruction. */
492 COSTS_N_INSNS (2), /* cost of FABS instruction. */
493 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
494 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
498 struct processor_costs pentium4_cost = {
499 COSTS_N_INSNS (1), /* cost of an add instruction */
500 COSTS_N_INSNS (3), /* cost of a lea instruction */
501 COSTS_N_INSNS (4), /* variable shift costs */
502 COSTS_N_INSNS (4), /* constant shift costs */
503 {COSTS_N_INSNS (15), /* cost of starting multiply for QI */
504 COSTS_N_INSNS (15), /* HI */
505 COSTS_N_INSNS (15), /* SI */
506 COSTS_N_INSNS (15), /* DI */
507 COSTS_N_INSNS (15)}, /* other */
508 0, /* cost of multiply per each bit set */
509 {COSTS_N_INSNS (56), /* cost of a divide/mod for QI */
510 COSTS_N_INSNS (56), /* HI */
511 COSTS_N_INSNS (56), /* SI */
512 COSTS_N_INSNS (56), /* DI */
513 COSTS_N_INSNS (56)}, /* other */
514 COSTS_N_INSNS (1), /* cost of movsx */
515 COSTS_N_INSNS (1), /* cost of movzx */
516 16, /* "large" insn */
518 2, /* cost for loading QImode using movzbl */
519 {4, 5, 4}, /* cost of loading integer registers
520 in QImode, HImode and SImode.
521 Relative to reg-reg move (2). */
522 {2, 3, 2}, /* cost of storing integer registers */
523 2, /* cost of reg,reg fld/fst */
524 {2, 2, 6}, /* cost of loading fp registers
525 in SFmode, DFmode and XFmode */
526 {4, 4, 6}, /* cost of storing fp registers
527 in SFmode, DFmode and XFmode */
528 2, /* cost of moving MMX register */
529 {2, 2}, /* cost of loading MMX registers
530 in SImode and DImode */
531 {2, 2}, /* cost of storing MMX registers
532 in SImode and DImode */
533 12, /* cost of moving SSE register */
534 {12, 12, 12}, /* cost of loading SSE registers
535 in SImode, DImode and TImode */
536 {2, 2, 8}, /* cost of storing SSE registers
537 in SImode, DImode and TImode */
538 10, /* MMX or SSE register to integer */
539 64, /* size of prefetch block */
540 6, /* number of parallel prefetches */
542 COSTS_N_INSNS (5), /* cost of FADD and FSUB insns. */
543 COSTS_N_INSNS (7), /* cost of FMUL instruction. */
544 COSTS_N_INSNS (43), /* cost of FDIV instruction. */
545 COSTS_N_INSNS (2), /* cost of FABS instruction. */
546 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
547 COSTS_N_INSNS (43), /* cost of FSQRT instruction. */
551 struct processor_costs nocona_cost = {
552 COSTS_N_INSNS (1), /* cost of an add instruction */
553 COSTS_N_INSNS (1), /* cost of a lea instruction */
554 COSTS_N_INSNS (1), /* variable shift costs */
555 COSTS_N_INSNS (1), /* constant shift costs */
556 {COSTS_N_INSNS (10), /* cost of starting multiply for QI */
557 COSTS_N_INSNS (10), /* HI */
558 COSTS_N_INSNS (10), /* SI */
559 COSTS_N_INSNS (10), /* DI */
560 COSTS_N_INSNS (10)}, /* other */
561 0, /* cost of multiply per each bit set */
562 {COSTS_N_INSNS (66), /* cost of a divide/mod for QI */
563 COSTS_N_INSNS (66), /* HI */
564 COSTS_N_INSNS (66), /* SI */
565 COSTS_N_INSNS (66), /* DI */
566 COSTS_N_INSNS (66)}, /* other */
567 COSTS_N_INSNS (1), /* cost of movsx */
568 COSTS_N_INSNS (1), /* cost of movzx */
569 16, /* "large" insn */
571 4, /* cost for loading QImode using movzbl */
572 {4, 4, 4}, /* cost of loading integer registers
573 in QImode, HImode and SImode.
574 Relative to reg-reg move (2). */
575 {4, 4, 4}, /* cost of storing integer registers */
576 3, /* cost of reg,reg fld/fst */
577 {12, 12, 12}, /* cost of loading fp registers
578 in SFmode, DFmode and XFmode */
579 {4, 4, 4}, /* cost of storing fp registers
580 in SFmode, DFmode and XFmode */
581 6, /* cost of moving MMX register */
582 {12, 12}, /* cost of loading MMX registers
583 in SImode and DImode */
584 {12, 12}, /* cost of storing MMX registers
585 in SImode and DImode */
586 6, /* cost of moving SSE register */
587 {12, 12, 12}, /* cost of loading SSE registers
588 in SImode, DImode and TImode */
589 {12, 12, 12}, /* cost of storing SSE registers
590 in SImode, DImode and TImode */
591 8, /* MMX or SSE register to integer */
592 128, /* size of prefetch block */
593 8, /* number of parallel prefetches */
595 COSTS_N_INSNS (6), /* cost of FADD and FSUB insns. */
596 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
597 COSTS_N_INSNS (40), /* cost of FDIV instruction. */
598 COSTS_N_INSNS (3), /* cost of FABS instruction. */
599 COSTS_N_INSNS (3), /* cost of FCHS instruction. */
600 COSTS_N_INSNS (44), /* cost of FSQRT instruction. */
603 /* Generic64 should produce code tuned for Nocona and K8. */
605 struct processor_costs generic64_cost = {
606 COSTS_N_INSNS (1), /* cost of an add instruction */
607 /* On all chips taken into consideration lea is 2 cycles and more. With
608 this cost however our current implementation of synth_mult results in
609 use of unnecessary temporary registers causing regression on several
610 SPECfp benchmarks. */
611 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
612 COSTS_N_INSNS (1), /* variable shift costs */
613 COSTS_N_INSNS (1), /* constant shift costs */
614 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
615 COSTS_N_INSNS (4), /* HI */
616 COSTS_N_INSNS (3), /* SI */
617 COSTS_N_INSNS (4), /* DI */
618 COSTS_N_INSNS (2)}, /* other */
619 0, /* cost of multiply per each bit set */
620 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
621 COSTS_N_INSNS (26), /* HI */
622 COSTS_N_INSNS (42), /* SI */
623 COSTS_N_INSNS (74), /* DI */
624 COSTS_N_INSNS (74)}, /* other */
625 COSTS_N_INSNS (1), /* cost of movsx */
626 COSTS_N_INSNS (1), /* cost of movzx */
627 8, /* "large" insn */
629 4, /* cost for loading QImode using movzbl */
630 {4, 4, 4}, /* cost of loading integer registers
631 in QImode, HImode and SImode.
632 Relative to reg-reg move (2). */
633 {4, 4, 4}, /* cost of storing integer registers */
634 4, /* cost of reg,reg fld/fst */
635 {12, 12, 12}, /* cost of loading fp registers
636 in SFmode, DFmode and XFmode */
637 {6, 6, 8}, /* cost of storing fp registers
638 in SFmode, DFmode and XFmode */
639 2, /* cost of moving MMX register */
640 {8, 8}, /* cost of loading MMX registers
641 in SImode and DImode */
642 {8, 8}, /* cost of storing MMX registers
643 in SImode and DImode */
644 2, /* cost of moving SSE register */
645 {8, 8, 8}, /* cost of loading SSE registers
646 in SImode, DImode and TImode */
647 {8, 8, 8}, /* cost of storing SSE registers
648 in SImode, DImode and TImode */
649 5, /* MMX or SSE register to integer */
650 64, /* size of prefetch block */
651 6, /* number of parallel prefetches */
652 /* Benchmarks shows large regressions on K8 sixtrack benchmark when this value
653 is increased to perhaps more appropriate value of 5. */
655 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
656 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
657 COSTS_N_INSNS (20), /* cost of FDIV instruction. */
658 COSTS_N_INSNS (8), /* cost of FABS instruction. */
659 COSTS_N_INSNS (8), /* cost of FCHS instruction. */
660 COSTS_N_INSNS (40), /* cost of FSQRT instruction. */
663 /* Generic32 should produce code tuned for Athlon, PPro, Pentium4, Nocona and K8. */
665 struct processor_costs generic32_cost = {
666 COSTS_N_INSNS (1), /* cost of an add instruction */
667 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
668 COSTS_N_INSNS (1), /* variable shift costs */
669 COSTS_N_INSNS (1), /* constant shift costs */
670 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
671 COSTS_N_INSNS (4), /* HI */
672 COSTS_N_INSNS (3), /* SI */
673 COSTS_N_INSNS (4), /* DI */
674 COSTS_N_INSNS (2)}, /* other */
675 0, /* cost of multiply per each bit set */
676 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
677 COSTS_N_INSNS (26), /* HI */
678 COSTS_N_INSNS (42), /* SI */
679 COSTS_N_INSNS (74), /* DI */
680 COSTS_N_INSNS (74)}, /* other */
681 COSTS_N_INSNS (1), /* cost of movsx */
682 COSTS_N_INSNS (1), /* cost of movzx */
683 8, /* "large" insn */
685 4, /* cost for loading QImode using movzbl */
686 {4, 4, 4}, /* cost of loading integer registers
687 in QImode, HImode and SImode.
688 Relative to reg-reg move (2). */
689 {4, 4, 4}, /* cost of storing integer registers */
690 4, /* cost of reg,reg fld/fst */
691 {12, 12, 12}, /* cost of loading fp registers
692 in SFmode, DFmode and XFmode */
693 {6, 6, 8}, /* cost of storing fp registers
694 in SFmode, DFmode and XFmode */
695 2, /* cost of moving MMX register */
696 {8, 8}, /* cost of loading MMX registers
697 in SImode and DImode */
698 {8, 8}, /* cost of storing MMX registers
699 in SImode and DImode */
700 2, /* cost of moving SSE register */
701 {8, 8, 8}, /* cost of loading SSE registers
702 in SImode, DImode and TImode */
703 {8, 8, 8}, /* cost of storing SSE registers
704 in SImode, DImode and TImode */
705 5, /* MMX or SSE register to integer */
706 64, /* size of prefetch block */
707 6, /* number of parallel prefetches */
709 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
710 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
711 COSTS_N_INSNS (20), /* cost of FDIV instruction. */
712 COSTS_N_INSNS (8), /* cost of FABS instruction. */
713 COSTS_N_INSNS (8), /* cost of FCHS instruction. */
714 COSTS_N_INSNS (40), /* cost of FSQRT instruction. */
717 const struct processor_costs *ix86_cost = &pentium_cost;
719 /* Processor feature/optimization bitmasks. */
720 #define m_386 (1<<PROCESSOR_I386)
721 #define m_486 (1<<PROCESSOR_I486)
722 #define m_PENT (1<<PROCESSOR_PENTIUM)
723 #define m_PPRO (1<<PROCESSOR_PENTIUMPRO)
724 #define m_K6 (1<<PROCESSOR_K6)
725 #define m_ATHLON (1<<PROCESSOR_ATHLON)
726 #define m_PENT4 (1<<PROCESSOR_PENTIUM4)
727 #define m_K8 (1<<PROCESSOR_K8)
728 #define m_ATHLON_K8 (m_K8 | m_ATHLON)
729 #define m_NOCONA (1<<PROCESSOR_NOCONA)
730 #define m_GENERIC32 (1<<PROCESSOR_GENERIC32)
731 #define m_GENERIC64 (1<<PROCESSOR_GENERIC64)
732 #define m_GENERIC (m_GENERIC32 | m_GENERIC64)
734 /* Generic instruction choice should be common subset of supported CPUs
735 (PPro/PENT4/NOCONA/Athlon/K8). */
737 /* Leave is not affecting Nocona SPEC2000 results negatively, so enabling for
738 Generic64 seems like good code size tradeoff. We can't enable it for 32bit
739 generic because it is not working well with PPro base chips. */
740 const int x86_use_leave = m_386 | m_K6 | m_ATHLON_K8 | m_GENERIC64;
741 const int x86_push_memory = m_386 | m_K6 | m_ATHLON_K8 | m_PENT4 | m_NOCONA | m_GENERIC;
742 const int x86_zero_extend_with_and = m_486 | m_PENT;
743 const int x86_movx = m_ATHLON_K8 | m_PPRO | m_PENT4 | m_NOCONA | m_GENERIC /* m_386 | m_K6 */;
744 const int x86_double_with_add = ~m_386;
745 const int x86_use_bit_test = m_386;
746 const int x86_unroll_strlen = m_486 | m_PENT | m_PPRO | m_ATHLON_K8 | m_K6 | m_GENERIC;
747 const int x86_cmove = m_PPRO | m_ATHLON_K8 | m_PENT4 | m_NOCONA;
748 const int x86_3dnow_a = m_ATHLON_K8;
749 const int x86_deep_branch = m_PPRO | m_K6 | m_ATHLON_K8 | m_PENT4 | m_NOCONA | m_GENERIC;
750 /* Branch hints were put in P4 based on simulation result. But
751 after P4 was made, no performance benefit was observed with
752 branch hints. It also increases the code size. As the result,
753 icc never generates branch hints. */
754 const int x86_branch_hints = 0;
755 const int x86_use_sahf = m_PPRO | m_K6 | m_PENT4 | m_NOCONA | m_GENERIC32; /*m_GENERIC | m_ATHLON_K8 ? */
756 /* We probably ought to watch for partial register stalls on Generic32
757 compilation setting as well. However in current implementation the
758 partial register stalls are not eliminated very well - they can
759 be introduced via subregs synthesized by combine and can happen
760 in caller/callee saving sequences.
761 Because this option pays back little on PPro based chips and is in conflict
762 with partial reg. dependencies used by Athlon/P4 based chips, it is better
763 to leave it off for generic32 for now. */
764 const int x86_partial_reg_stall = m_PPRO;
765 const int x86_partial_flag_reg_stall = m_GENERIC;
766 const int x86_use_himode_fiop = m_386 | m_486 | m_K6;
767 const int x86_use_simode_fiop = ~(m_PPRO | m_ATHLON_K8 | m_PENT | m_GENERIC);
768 const int x86_use_mov0 = m_K6;
769 const int x86_use_cltd = ~(m_PENT | m_K6 | m_GENERIC);
770 const int x86_read_modify_write = ~m_PENT;
771 const int x86_read_modify = ~(m_PENT | m_PPRO);
772 const int x86_split_long_moves = m_PPRO;
773 const int x86_promote_QImode = m_K6 | m_PENT | m_386 | m_486 | m_ATHLON_K8 | m_GENERIC; /* m_PENT4 ? */
774 const int x86_fast_prefix = ~(m_PENT | m_486 | m_386);
775 const int x86_single_stringop = m_386 | m_PENT4 | m_NOCONA;
776 const int x86_qimode_math = ~(0);
777 const int x86_promote_qi_regs = 0;
778 /* On PPro this flag is meant to avoid partial register stalls. Just like
779 the x86_partial_reg_stall this option might be considered for Generic32
780 if our scheme for avoiding partial stalls was more effective. */
781 const int x86_himode_math = ~(m_PPRO);
782 const int x86_promote_hi_regs = m_PPRO;
783 const int x86_sub_esp_4 = m_ATHLON_K8 | m_PPRO | m_PENT4 | m_NOCONA | m_GENERIC;
784 const int x86_sub_esp_8 = m_ATHLON_K8 | m_PPRO | m_386 | m_486 | m_PENT4 | m_NOCONA | m_GENERIC;
785 const int x86_add_esp_4 = m_ATHLON_K8 | m_K6 | m_PENT4 | m_NOCONA | m_GENERIC;
786 const int x86_add_esp_8 = m_ATHLON_K8 | m_PPRO | m_K6 | m_386 | m_486 | m_PENT4 | m_NOCONA | m_GENERIC;
787 const int x86_integer_DFmode_moves = ~(m_ATHLON_K8 | m_PENT4 | m_NOCONA | m_PPRO | m_GENERIC);
788 const int x86_partial_reg_dependency = m_ATHLON_K8 | m_PENT4 | m_NOCONA | m_GENERIC;
789 const int x86_memory_mismatch_stall = m_ATHLON_K8 | m_PENT4 | m_NOCONA | m_GENERIC;
790 const int x86_accumulate_outgoing_args = m_ATHLON_K8 | m_PENT4 | m_NOCONA | m_PPRO | m_GENERIC;
791 const int x86_prologue_using_move = m_ATHLON_K8 | m_PPRO | m_GENERIC;
792 const int x86_epilogue_using_move = m_ATHLON_K8 | m_PPRO | m_GENERIC;
793 const int x86_shift1 = ~m_486;
794 const int x86_arch_always_fancy_math_387 = m_PENT | m_PPRO | m_ATHLON_K8 | m_PENT4 | m_NOCONA | m_GENERIC;
795 /* In Generic model we have an conflict here in between PPro/Pentium4 based chips
796 that thread 128bit SSE registers as single units versus K8 based chips that
797 divide SSE registers to two 64bit halves.
798 x86_sse_partial_reg_dependency promote all store destinations to be 128bit
799 to allow register renaming on 128bit SSE units, but usually results in one
800 extra microop on 64bit SSE units. Experimental results shows that disabling
801 this option on P4 brings over 20% SPECfp regression, while enabling it on
802 K8 brings roughly 2.4% regression that can be partly masked by careful scheduling
804 const int x86_sse_partial_reg_dependency = m_PENT4 | m_NOCONA | m_PPRO | m_GENERIC;
805 /* Set for machines where the type and dependencies are resolved on SSE
806 register parts instead of whole registers, so we may maintain just
807 lower part of scalar values in proper format leaving the upper part
809 const int x86_sse_split_regs = m_ATHLON_K8;
810 const int x86_sse_typeless_stores = m_ATHLON_K8;
811 const int x86_sse_load0_by_pxor = m_PPRO | m_PENT4 | m_NOCONA;
812 const int x86_use_ffreep = m_ATHLON_K8;
813 const int x86_rep_movl_optimal = m_386 | m_PENT | m_PPRO | m_K6;
814 const int x86_use_incdec = ~(m_PENT4 | m_NOCONA | m_GENERIC);
816 /* ??? Allowing interunit moves makes it all too easy for the compiler to put
817 integer data in xmm registers. Which results in pretty abysmal code. */
818 const int x86_inter_unit_moves = 0 /* ~(m_ATHLON_K8) */;
820 const int x86_ext_80387_constants = m_K6 | m_ATHLON | m_PENT4 | m_NOCONA | m_PPRO | m_GENERIC32;
821 /* Some CPU cores are not able to predict more than 4 branch instructions in
822 the 16 byte window. */
823 const int x86_four_jump_limit = m_PPRO | m_ATHLON_K8 | m_PENT4 | m_NOCONA | m_GENERIC;
824 const int x86_schedule = m_PPRO | m_ATHLON_K8 | m_K6 | m_PENT | m_GENERIC;
825 const int x86_use_bt = m_ATHLON_K8;
826 /* Compare and exchange was added for 80486. */
827 const int x86_cmpxchg = ~m_386;
828 /* Compare and exchange 8 bytes was added for pentium. */
829 const int x86_cmpxchg8b = ~(m_386 | m_486);
830 /* Compare and exchange 16 bytes was added for nocona. */
831 const int x86_cmpxchg16b = m_NOCONA;
832 /* Exchange and add was added for 80486. */
833 const int x86_xadd = ~m_386;
834 const int x86_pad_returns = m_ATHLON_K8 | m_GENERIC;
836 /* In case the average insn count for single function invocation is
837 lower than this constant, emit fast (but longer) prologue and
839 #define FAST_PROLOGUE_INSN_COUNT 20
841 /* Names for 8 (low), 8 (high), and 16-bit registers, respectively. */
842 static const char *const qi_reg_name[] = QI_REGISTER_NAMES;
843 static const char *const qi_high_reg_name[] = QI_HIGH_REGISTER_NAMES;
844 static const char *const hi_reg_name[] = HI_REGISTER_NAMES;
846 /* Array of the smallest class containing reg number REGNO, indexed by
847 REGNO. Used by REGNO_REG_CLASS in i386.h. */
849 enum reg_class const regclass_map[FIRST_PSEUDO_REGISTER] =
852 AREG, DREG, CREG, BREG,
854 SIREG, DIREG, NON_Q_REGS, NON_Q_REGS,
856 FP_TOP_REG, FP_SECOND_REG, FLOAT_REGS, FLOAT_REGS,
857 FLOAT_REGS, FLOAT_REGS, FLOAT_REGS, FLOAT_REGS,
860 /* flags, fpsr, dirflag, frame */
861 NO_REGS, NO_REGS, NO_REGS, NON_Q_REGS,
862 SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS,
864 MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS,
866 NON_Q_REGS, NON_Q_REGS, NON_Q_REGS, NON_Q_REGS,
867 NON_Q_REGS, NON_Q_REGS, NON_Q_REGS, NON_Q_REGS,
868 SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS,
872 /* The "default" register map used in 32bit mode. */
874 int const dbx_register_map[FIRST_PSEUDO_REGISTER] =
876 0, 2, 1, 3, 6, 7, 4, 5, /* general regs */
877 12, 13, 14, 15, 16, 17, 18, 19, /* fp regs */
878 -1, -1, -1, -1, -1, /* arg, flags, fpsr, dir, frame */
879 21, 22, 23, 24, 25, 26, 27, 28, /* SSE */
880 29, 30, 31, 32, 33, 34, 35, 36, /* MMX */
881 -1, -1, -1, -1, -1, -1, -1, -1, /* extended integer registers */
882 -1, -1, -1, -1, -1, -1, -1, -1, /* extended SSE registers */
885 static int const x86_64_int_parameter_registers[6] =
887 5 /*RDI*/, 4 /*RSI*/, 1 /*RDX*/, 2 /*RCX*/,
888 FIRST_REX_INT_REG /*R8 */, FIRST_REX_INT_REG + 1 /*R9 */
891 static int const x86_64_int_return_registers[4] =
893 0 /*RAX*/, 1 /*RDI*/, 5 /*RDI*/, 4 /*RSI*/
896 /* The "default" register map used in 64bit mode. */
897 int const dbx64_register_map[FIRST_PSEUDO_REGISTER] =
899 0, 1, 2, 3, 4, 5, 6, 7, /* general regs */
900 33, 34, 35, 36, 37, 38, 39, 40, /* fp regs */
901 -1, -1, -1, -1, -1, /* arg, flags, fpsr, dir, frame */
902 17, 18, 19, 20, 21, 22, 23, 24, /* SSE */
903 41, 42, 43, 44, 45, 46, 47, 48, /* MMX */
904 8,9,10,11,12,13,14,15, /* extended integer registers */
905 25, 26, 27, 28, 29, 30, 31, 32, /* extended SSE registers */
908 /* Define the register numbers to be used in Dwarf debugging information.
909 The SVR4 reference port C compiler uses the following register numbers
910 in its Dwarf output code:
911 0 for %eax (gcc regno = 0)
912 1 for %ecx (gcc regno = 2)
913 2 for %edx (gcc regno = 1)
914 3 for %ebx (gcc regno = 3)
915 4 for %esp (gcc regno = 7)
916 5 for %ebp (gcc regno = 6)
917 6 for %esi (gcc regno = 4)
918 7 for %edi (gcc regno = 5)
919 The following three DWARF register numbers are never generated by
920 the SVR4 C compiler or by the GNU compilers, but SDB on x86/svr4
921 believes these numbers have these meanings.
922 8 for %eip (no gcc equivalent)
923 9 for %eflags (gcc regno = 17)
924 10 for %trapno (no gcc equivalent)
925 It is not at all clear how we should number the FP stack registers
926 for the x86 architecture. If the version of SDB on x86/svr4 were
927 a bit less brain dead with respect to floating-point then we would
928 have a precedent to follow with respect to DWARF register numbers
929 for x86 FP registers, but the SDB on x86/svr4 is so completely
930 broken with respect to FP registers that it is hardly worth thinking
931 of it as something to strive for compatibility with.
932 The version of x86/svr4 SDB I have at the moment does (partially)
933 seem to believe that DWARF register number 11 is associated with
934 the x86 register %st(0), but that's about all. Higher DWARF
935 register numbers don't seem to be associated with anything in
936 particular, and even for DWARF regno 11, SDB only seems to under-
937 stand that it should say that a variable lives in %st(0) (when
938 asked via an `=' command) if we said it was in DWARF regno 11,
939 but SDB still prints garbage when asked for the value of the
940 variable in question (via a `/' command).
941 (Also note that the labels SDB prints for various FP stack regs
942 when doing an `x' command are all wrong.)
943 Note that these problems generally don't affect the native SVR4
944 C compiler because it doesn't allow the use of -O with -g and
945 because when it is *not* optimizing, it allocates a memory
946 location for each floating-point variable, and the memory
947 location is what gets described in the DWARF AT_location
948 attribute for the variable in question.
949 Regardless of the severe mental illness of the x86/svr4 SDB, we
950 do something sensible here and we use the following DWARF
951 register numbers. Note that these are all stack-top-relative
953 11 for %st(0) (gcc regno = 8)
954 12 for %st(1) (gcc regno = 9)
955 13 for %st(2) (gcc regno = 10)
956 14 for %st(3) (gcc regno = 11)
957 15 for %st(4) (gcc regno = 12)
958 16 for %st(5) (gcc regno = 13)
959 17 for %st(6) (gcc regno = 14)
960 18 for %st(7) (gcc regno = 15)
962 int const svr4_dbx_register_map[FIRST_PSEUDO_REGISTER] =
964 0, 2, 1, 3, 6, 7, 5, 4, /* general regs */
965 11, 12, 13, 14, 15, 16, 17, 18, /* fp regs */
966 -1, 9, -1, -1, -1, /* arg, flags, fpsr, dir, frame */
967 21, 22, 23, 24, 25, 26, 27, 28, /* SSE registers */
968 29, 30, 31, 32, 33, 34, 35, 36, /* MMX registers */
969 -1, -1, -1, -1, -1, -1, -1, -1, /* extended integer registers */
970 -1, -1, -1, -1, -1, -1, -1, -1, /* extended SSE registers */
973 /* Test and compare insns in i386.md store the information needed to
974 generate branch and scc insns here. */
976 rtx ix86_compare_op0 = NULL_RTX;
977 rtx ix86_compare_op1 = NULL_RTX;
978 rtx ix86_compare_emitted = NULL_RTX;
980 /* Size of the register save area. */
981 #define X86_64_VARARGS_SIZE (REGPARM_MAX * UNITS_PER_WORD + SSE_REGPARM_MAX * 16)
983 /* Define the structure for the machine field in struct function. */
985 struct stack_local_entry GTY(())
990 struct stack_local_entry *next;
993 /* Structure describing stack frame layout.
994 Stack grows downward:
1000 saved frame pointer if frame_pointer_needed
1001 <- HARD_FRAME_POINTER
1006 [va_arg registers] (
1007 > to_allocate <- FRAME_POINTER
1017 HOST_WIDE_INT frame;
1019 int outgoing_arguments_size;
1022 HOST_WIDE_INT to_allocate;
1023 /* The offsets relative to ARG_POINTER. */
1024 HOST_WIDE_INT frame_pointer_offset;
1025 HOST_WIDE_INT hard_frame_pointer_offset;
1026 HOST_WIDE_INT stack_pointer_offset;
1028 /* When save_regs_using_mov is set, emit prologue using
1029 move instead of push instructions. */
1030 bool save_regs_using_mov;
1033 /* Code model option. */
1034 enum cmodel ix86_cmodel;
1036 enum asm_dialect ix86_asm_dialect = ASM_ATT;
1038 enum tls_dialect ix86_tls_dialect = TLS_DIALECT_GNU;
1040 /* Which unit we are generating floating point math for. */
1041 enum fpmath_unit ix86_fpmath;
1043 /* Which cpu are we scheduling for. */
1044 enum processor_type ix86_tune;
1045 /* Which instruction set architecture to use. */
1046 enum processor_type ix86_arch;
1048 /* true if sse prefetch instruction is not NOOP. */
1049 int x86_prefetch_sse;
1051 /* ix86_regparm_string as a number */
1052 static int ix86_regparm;
1054 /* -mstackrealign option */
1055 extern int ix86_force_align_arg_pointer;
1056 static const char ix86_force_align_arg_pointer_string[] = "force_align_arg_pointer";
1058 /* Preferred alignment for stack boundary in bits. */
1059 unsigned int ix86_preferred_stack_boundary;
1061 /* Values 1-5: see jump.c */
1062 int ix86_branch_cost;
1064 /* Variables which are this size or smaller are put in the data/bss
1065 or ldata/lbss sections. */
1067 int ix86_section_threshold = 65536;
1069 /* Prefix built by ASM_GENERATE_INTERNAL_LABEL. */
1070 char internal_label_prefix[16];
1071 int internal_label_prefix_len;
1073 static bool ix86_handle_option (size_t, const char *, int);
1074 static void output_pic_addr_const (FILE *, rtx, int);
1075 static void put_condition_code (enum rtx_code, enum machine_mode,
1077 static const char *get_some_local_dynamic_name (void);
1078 static int get_some_local_dynamic_name_1 (rtx *, void *);
1079 static rtx ix86_expand_int_compare (enum rtx_code, rtx, rtx);
1080 static enum rtx_code ix86_prepare_fp_compare_args (enum rtx_code, rtx *,
1082 static bool ix86_fixed_condition_code_regs (unsigned int *, unsigned int *);
1083 static enum machine_mode ix86_cc_modes_compatible (enum machine_mode,
1085 static rtx get_thread_pointer (int);
1086 static rtx legitimize_tls_address (rtx, enum tls_model, int);
1087 static void get_pc_thunk_name (char [32], unsigned int);
1088 static rtx gen_push (rtx);
1089 static int ix86_flags_dependent (rtx, rtx, enum attr_type);
1090 static int ix86_agi_dependent (rtx, rtx, enum attr_type);
1091 static struct machine_function * ix86_init_machine_status (void);
1092 static int ix86_split_to_parts (rtx, rtx *, enum machine_mode);
1093 static int ix86_nsaved_regs (void);
1094 static void ix86_emit_save_regs (void);
1095 static void ix86_emit_save_regs_using_mov (rtx, HOST_WIDE_INT);
1096 static void ix86_emit_restore_regs_using_mov (rtx, HOST_WIDE_INT, int);
1097 static void ix86_output_function_epilogue (FILE *, HOST_WIDE_INT);
1098 static HOST_WIDE_INT ix86_GOT_alias_set (void);
1099 static void ix86_adjust_counter (rtx, HOST_WIDE_INT);
1100 static rtx ix86_expand_aligntest (rtx, int);
1101 static void ix86_expand_strlensi_unroll_1 (rtx, rtx, rtx);
1102 static int ix86_issue_rate (void);
1103 static int ix86_adjust_cost (rtx, rtx, rtx, int);
1104 static int ia32_multipass_dfa_lookahead (void);
1105 static void ix86_init_mmx_sse_builtins (void);
1106 static rtx x86_this_parameter (tree);
1107 static void x86_output_mi_thunk (FILE *, tree, HOST_WIDE_INT,
1108 HOST_WIDE_INT, tree);
1109 static bool x86_can_output_mi_thunk (tree, HOST_WIDE_INT, HOST_WIDE_INT, tree);
1110 static void x86_file_start (void);
1111 static void ix86_reorg (void);
1112 static bool ix86_expand_carry_flag_compare (enum rtx_code, rtx, rtx, rtx*);
1113 static tree ix86_build_builtin_va_list (void);
1114 static void ix86_setup_incoming_varargs (CUMULATIVE_ARGS *, enum machine_mode,
1116 static tree ix86_gimplify_va_arg (tree, tree, tree *, tree *);
1117 static bool ix86_scalar_mode_supported_p (enum machine_mode);
1118 static bool ix86_vector_mode_supported_p (enum machine_mode);
1120 static int ix86_address_cost (rtx);
1121 static bool ix86_cannot_force_const_mem (rtx);
1122 static rtx ix86_delegitimize_address (rtx);
1124 static void i386_output_dwarf_dtprel (FILE *, int, rtx) ATTRIBUTE_UNUSED;
1126 struct builtin_description;
1127 static rtx ix86_expand_sse_comi (const struct builtin_description *,
1129 static rtx ix86_expand_sse_compare (const struct builtin_description *,
1131 static rtx ix86_expand_unop1_builtin (enum insn_code, tree, rtx);
1132 static rtx ix86_expand_unop_builtin (enum insn_code, tree, rtx, int);
1133 static rtx ix86_expand_binop_builtin (enum insn_code, tree, rtx);
1134 static rtx ix86_expand_store_builtin (enum insn_code, tree);
1135 static rtx safe_vector_operand (rtx, enum machine_mode);
1136 static rtx ix86_expand_fp_compare (enum rtx_code, rtx, rtx, rtx, rtx *, rtx *);
1137 static int ix86_fp_comparison_arithmetics_cost (enum rtx_code code);
1138 static int ix86_fp_comparison_fcomi_cost (enum rtx_code code);
1139 static int ix86_fp_comparison_sahf_cost (enum rtx_code code);
1140 static int ix86_fp_comparison_cost (enum rtx_code code);
1141 static unsigned int ix86_select_alt_pic_regnum (void);
1142 static int ix86_save_reg (unsigned int, int);
1143 static void ix86_compute_frame_layout (struct ix86_frame *);
1144 static int ix86_comp_type_attributes (tree, tree);
1145 static int ix86_function_regparm (tree, tree);
1146 const struct attribute_spec ix86_attribute_table[];
1147 static bool ix86_function_ok_for_sibcall (tree, tree);
1148 static tree ix86_handle_cconv_attribute (tree *, tree, tree, int, bool *);
1149 static int ix86_value_regno (enum machine_mode, tree, tree);
1150 static bool contains_128bit_aligned_vector_p (tree);
1151 static rtx ix86_struct_value_rtx (tree, int);
1152 static bool ix86_ms_bitfield_layout_p (tree);
1153 static tree ix86_handle_struct_attribute (tree *, tree, tree, int, bool *);
1154 static int extended_reg_mentioned_1 (rtx *, void *);
1155 static bool ix86_rtx_costs (rtx, int, int, int *);
1156 static int min_insn_size (rtx);
1157 static tree ix86_md_asm_clobbers (tree outputs, tree inputs, tree clobbers);
1158 static bool ix86_must_pass_in_stack (enum machine_mode mode, tree type);
1159 static bool ix86_pass_by_reference (CUMULATIVE_ARGS *, enum machine_mode,
1161 static void ix86_init_builtins (void);
1162 static rtx ix86_expand_builtin (tree, rtx, rtx, enum machine_mode, int);
1163 static const char *ix86_mangle_fundamental_type (tree);
1164 static tree ix86_stack_protect_fail (void);
1165 static rtx ix86_internal_arg_pointer (void);
1166 static void ix86_dwarf_handle_frame_unspec (const char *, rtx, int);
1168 /* This function is only used on Solaris. */
1169 static void i386_solaris_elf_named_section (const char *, unsigned int, tree)
1172 /* Register class used for passing given 64bit part of the argument.
1173 These represent classes as documented by the PS ABI, with the exception
1174 of SSESF, SSEDF classes, that are basically SSE class, just gcc will
1175 use SF or DFmode move instead of DImode to avoid reformatting penalties.
1177 Similarly we play games with INTEGERSI_CLASS to use cheaper SImode moves
1178 whenever possible (upper half does contain padding).
1180 enum x86_64_reg_class
1183 X86_64_INTEGER_CLASS,
1184 X86_64_INTEGERSI_CLASS,
1191 X86_64_COMPLEX_X87_CLASS,
1194 static const char * const x86_64_reg_class_name[] = {
1195 "no", "integer", "integerSI", "sse", "sseSF", "sseDF",
1196 "sseup", "x87", "x87up", "cplx87", "no"
1199 #define MAX_CLASSES 4
1201 /* Table of constants used by fldpi, fldln2, etc.... */
1202 static REAL_VALUE_TYPE ext_80387_constants_table [5];
1203 static bool ext_80387_constants_init = 0;
1204 static void init_ext_80387_constants (void);
1205 static bool ix86_in_large_data_p (tree) ATTRIBUTE_UNUSED;
1206 static void ix86_encode_section_info (tree, rtx, int) ATTRIBUTE_UNUSED;
1207 static void x86_64_elf_unique_section (tree decl, int reloc) ATTRIBUTE_UNUSED;
1208 static section *x86_64_elf_select_section (tree decl, int reloc,
1209 unsigned HOST_WIDE_INT align)
1212 /* Initialize the GCC target structure. */
1213 #undef TARGET_ATTRIBUTE_TABLE
1214 #define TARGET_ATTRIBUTE_TABLE ix86_attribute_table
1215 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
1216 # undef TARGET_MERGE_DECL_ATTRIBUTES
1217 # define TARGET_MERGE_DECL_ATTRIBUTES merge_dllimport_decl_attributes
1220 #undef TARGET_COMP_TYPE_ATTRIBUTES
1221 #define TARGET_COMP_TYPE_ATTRIBUTES ix86_comp_type_attributes
1223 #undef TARGET_INIT_BUILTINS
1224 #define TARGET_INIT_BUILTINS ix86_init_builtins
1225 #undef TARGET_EXPAND_BUILTIN
1226 #define TARGET_EXPAND_BUILTIN ix86_expand_builtin
1228 #undef TARGET_ASM_FUNCTION_EPILOGUE
1229 #define TARGET_ASM_FUNCTION_EPILOGUE ix86_output_function_epilogue
1231 #undef TARGET_ENCODE_SECTION_INFO
1232 #ifndef SUBTARGET_ENCODE_SECTION_INFO
1233 #define TARGET_ENCODE_SECTION_INFO ix86_encode_section_info
1235 #define TARGET_ENCODE_SECTION_INFO SUBTARGET_ENCODE_SECTION_INFO
1238 #undef TARGET_ASM_OPEN_PAREN
1239 #define TARGET_ASM_OPEN_PAREN ""
1240 #undef TARGET_ASM_CLOSE_PAREN
1241 #define TARGET_ASM_CLOSE_PAREN ""
1243 #undef TARGET_ASM_ALIGNED_HI_OP
1244 #define TARGET_ASM_ALIGNED_HI_OP ASM_SHORT
1245 #undef TARGET_ASM_ALIGNED_SI_OP
1246 #define TARGET_ASM_ALIGNED_SI_OP ASM_LONG
1248 #undef TARGET_ASM_ALIGNED_DI_OP
1249 #define TARGET_ASM_ALIGNED_DI_OP ASM_QUAD
1252 #undef TARGET_ASM_UNALIGNED_HI_OP
1253 #define TARGET_ASM_UNALIGNED_HI_OP TARGET_ASM_ALIGNED_HI_OP
1254 #undef TARGET_ASM_UNALIGNED_SI_OP
1255 #define TARGET_ASM_UNALIGNED_SI_OP TARGET_ASM_ALIGNED_SI_OP
1256 #undef TARGET_ASM_UNALIGNED_DI_OP
1257 #define TARGET_ASM_UNALIGNED_DI_OP TARGET_ASM_ALIGNED_DI_OP
1259 #undef TARGET_SCHED_ADJUST_COST
1260 #define TARGET_SCHED_ADJUST_COST ix86_adjust_cost
1261 #undef TARGET_SCHED_ISSUE_RATE
1262 #define TARGET_SCHED_ISSUE_RATE ix86_issue_rate
1263 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD
1264 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD \
1265 ia32_multipass_dfa_lookahead
1267 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
1268 #define TARGET_FUNCTION_OK_FOR_SIBCALL ix86_function_ok_for_sibcall
1271 #undef TARGET_HAVE_TLS
1272 #define TARGET_HAVE_TLS true
1274 #undef TARGET_CANNOT_FORCE_CONST_MEM
1275 #define TARGET_CANNOT_FORCE_CONST_MEM ix86_cannot_force_const_mem
1276 #undef TARGET_USE_BLOCKS_FOR_CONSTANT_P
1277 #define TARGET_USE_BLOCKS_FOR_CONSTANT_P hook_bool_mode_rtx_true
1279 #undef TARGET_DELEGITIMIZE_ADDRESS
1280 #define TARGET_DELEGITIMIZE_ADDRESS ix86_delegitimize_address
1282 #undef TARGET_MS_BITFIELD_LAYOUT_P
1283 #define TARGET_MS_BITFIELD_LAYOUT_P ix86_ms_bitfield_layout_p
1286 #undef TARGET_BINDS_LOCAL_P
1287 #define TARGET_BINDS_LOCAL_P darwin_binds_local_p
1290 #undef TARGET_ASM_OUTPUT_MI_THUNK
1291 #define TARGET_ASM_OUTPUT_MI_THUNK x86_output_mi_thunk
1292 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
1293 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK x86_can_output_mi_thunk
1295 #undef TARGET_ASM_FILE_START
1296 #define TARGET_ASM_FILE_START x86_file_start
1298 #undef TARGET_DEFAULT_TARGET_FLAGS
1299 #define TARGET_DEFAULT_TARGET_FLAGS \
1301 | TARGET_64BIT_DEFAULT \
1302 | TARGET_SUBTARGET_DEFAULT \
1303 | TARGET_TLS_DIRECT_SEG_REFS_DEFAULT)
1305 #undef TARGET_HANDLE_OPTION
1306 #define TARGET_HANDLE_OPTION ix86_handle_option
1308 #undef TARGET_RTX_COSTS
1309 #define TARGET_RTX_COSTS ix86_rtx_costs
1310 #undef TARGET_ADDRESS_COST
1311 #define TARGET_ADDRESS_COST ix86_address_cost
1313 #undef TARGET_FIXED_CONDITION_CODE_REGS
1314 #define TARGET_FIXED_CONDITION_CODE_REGS ix86_fixed_condition_code_regs
1315 #undef TARGET_CC_MODES_COMPATIBLE
1316 #define TARGET_CC_MODES_COMPATIBLE ix86_cc_modes_compatible
1318 #undef TARGET_MACHINE_DEPENDENT_REORG
1319 #define TARGET_MACHINE_DEPENDENT_REORG ix86_reorg
1321 #undef TARGET_BUILD_BUILTIN_VA_LIST
1322 #define TARGET_BUILD_BUILTIN_VA_LIST ix86_build_builtin_va_list
1324 #undef TARGET_MD_ASM_CLOBBERS
1325 #define TARGET_MD_ASM_CLOBBERS ix86_md_asm_clobbers
1327 #undef TARGET_PROMOTE_PROTOTYPES
1328 #define TARGET_PROMOTE_PROTOTYPES hook_bool_tree_true
1329 #undef TARGET_STRUCT_VALUE_RTX
1330 #define TARGET_STRUCT_VALUE_RTX ix86_struct_value_rtx
1331 #undef TARGET_SETUP_INCOMING_VARARGS
1332 #define TARGET_SETUP_INCOMING_VARARGS ix86_setup_incoming_varargs
1333 #undef TARGET_MUST_PASS_IN_STACK
1334 #define TARGET_MUST_PASS_IN_STACK ix86_must_pass_in_stack
1335 #undef TARGET_PASS_BY_REFERENCE
1336 #define TARGET_PASS_BY_REFERENCE ix86_pass_by_reference
1337 #undef TARGET_INTERNAL_ARG_POINTER
1338 #define TARGET_INTERNAL_ARG_POINTER ix86_internal_arg_pointer
1339 #undef TARGET_DWARF_HANDLE_FRAME_UNSPEC
1340 #define TARGET_DWARF_HANDLE_FRAME_UNSPEC ix86_dwarf_handle_frame_unspec
1342 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
1343 #define TARGET_GIMPLIFY_VA_ARG_EXPR ix86_gimplify_va_arg
1345 #undef TARGET_SCALAR_MODE_SUPPORTED_P
1346 #define TARGET_SCALAR_MODE_SUPPORTED_P ix86_scalar_mode_supported_p
1348 #undef TARGET_VECTOR_MODE_SUPPORTED_P
1349 #define TARGET_VECTOR_MODE_SUPPORTED_P ix86_vector_mode_supported_p
1352 #undef TARGET_ASM_OUTPUT_DWARF_DTPREL
1353 #define TARGET_ASM_OUTPUT_DWARF_DTPREL i386_output_dwarf_dtprel
1356 #ifdef SUBTARGET_INSERT_ATTRIBUTES
1357 #undef TARGET_INSERT_ATTRIBUTES
1358 #define TARGET_INSERT_ATTRIBUTES SUBTARGET_INSERT_ATTRIBUTES
1361 #undef TARGET_MANGLE_FUNDAMENTAL_TYPE
1362 #define TARGET_MANGLE_FUNDAMENTAL_TYPE ix86_mangle_fundamental_type
1364 #undef TARGET_STACK_PROTECT_FAIL
1365 #define TARGET_STACK_PROTECT_FAIL ix86_stack_protect_fail
1367 #undef TARGET_FUNCTION_VALUE
1368 #define TARGET_FUNCTION_VALUE ix86_function_value
1370 struct gcc_target targetm = TARGET_INITIALIZER;
1373 /* The svr4 ABI for the i386 says that records and unions are returned
1375 #ifndef DEFAULT_PCC_STRUCT_RETURN
1376 #define DEFAULT_PCC_STRUCT_RETURN 1
1379 /* Implement TARGET_HANDLE_OPTION. */
1382 ix86_handle_option (size_t code, const char *arg ATTRIBUTE_UNUSED, int value)
1389 target_flags &= ~MASK_3DNOW_A;
1390 target_flags_explicit |= MASK_3DNOW_A;
1397 target_flags &= ~(MASK_3DNOW | MASK_3DNOW_A);
1398 target_flags_explicit |= MASK_3DNOW | MASK_3DNOW_A;
1405 target_flags &= ~(MASK_SSE2 | MASK_SSE3);
1406 target_flags_explicit |= MASK_SSE2 | MASK_SSE3;
1413 target_flags &= ~MASK_SSE3;
1414 target_flags_explicit |= MASK_SSE3;
1423 /* Sometimes certain combinations of command options do not make
1424 sense on a particular target machine. You can define a macro
1425 `OVERRIDE_OPTIONS' to take account of this. This macro, if
1426 defined, is executed once just after all the command options have
1429 Don't use this macro to turn on various extra optimizations for
1430 `-O'. That is what `OPTIMIZATION_OPTIONS' is for. */
1433 override_options (void)
1436 int ix86_tune_defaulted = 0;
1438 /* Comes from final.c -- no real reason to change it. */
1439 #define MAX_CODE_ALIGN 16
1443 const struct processor_costs *cost; /* Processor costs */
1444 const int target_enable; /* Target flags to enable. */
1445 const int target_disable; /* Target flags to disable. */
1446 const int align_loop; /* Default alignments. */
1447 const int align_loop_max_skip;
1448 const int align_jump;
1449 const int align_jump_max_skip;
1450 const int align_func;
1452 const processor_target_table[PROCESSOR_max] =
1454 {&i386_cost, 0, 0, 4, 3, 4, 3, 4},
1455 {&i486_cost, 0, 0, 16, 15, 16, 15, 16},
1456 {&pentium_cost, 0, 0, 16, 7, 16, 7, 16},
1457 {&pentiumpro_cost, 0, 0, 16, 15, 16, 7, 16},
1458 {&k6_cost, 0, 0, 32, 7, 32, 7, 32},
1459 {&athlon_cost, 0, 0, 16, 7, 16, 7, 16},
1460 {&pentium4_cost, 0, 0, 0, 0, 0, 0, 0},
1461 {&k8_cost, 0, 0, 16, 7, 16, 7, 16},
1462 {&nocona_cost, 0, 0, 0, 0, 0, 0, 0},
1463 {&generic32_cost, 0, 0, 16, 7, 16, 7, 16},
1464 {&generic64_cost, 0, 0, 16, 7, 16, 7, 16}
1467 static const char * const cpu_names[] = TARGET_CPU_DEFAULT_NAMES;
1470 const char *const name; /* processor name or nickname. */
1471 const enum processor_type processor;
1472 const enum pta_flags
1478 PTA_PREFETCH_SSE = 16,
1484 const processor_alias_table[] =
1486 {"i386", PROCESSOR_I386, 0},
1487 {"i486", PROCESSOR_I486, 0},
1488 {"i586", PROCESSOR_PENTIUM, 0},
1489 {"pentium", PROCESSOR_PENTIUM, 0},
1490 {"pentium-mmx", PROCESSOR_PENTIUM, PTA_MMX},
1491 {"winchip-c6", PROCESSOR_I486, PTA_MMX},
1492 {"winchip2", PROCESSOR_I486, PTA_MMX | PTA_3DNOW},
1493 {"c3", PROCESSOR_I486, PTA_MMX | PTA_3DNOW},
1494 {"c3-2", PROCESSOR_PENTIUMPRO, PTA_MMX | PTA_PREFETCH_SSE | PTA_SSE},
1495 {"i686", PROCESSOR_PENTIUMPRO, 0},
1496 {"pentiumpro", PROCESSOR_PENTIUMPRO, 0},
1497 {"pentium2", PROCESSOR_PENTIUMPRO, PTA_MMX},
1498 {"pentium3", PROCESSOR_PENTIUMPRO, PTA_MMX | PTA_SSE | PTA_PREFETCH_SSE},
1499 {"pentium3m", PROCESSOR_PENTIUMPRO, PTA_MMX | PTA_SSE | PTA_PREFETCH_SSE},
1500 {"pentium-m", PROCESSOR_PENTIUMPRO, PTA_MMX | PTA_SSE | PTA_PREFETCH_SSE | PTA_SSE2},
1501 {"pentium4", PROCESSOR_PENTIUM4, PTA_SSE | PTA_SSE2
1502 | PTA_MMX | PTA_PREFETCH_SSE},
1503 {"pentium4m", PROCESSOR_PENTIUM4, PTA_SSE | PTA_SSE2
1504 | PTA_MMX | PTA_PREFETCH_SSE},
1505 {"prescott", PROCESSOR_NOCONA, PTA_SSE | PTA_SSE2 | PTA_SSE3
1506 | PTA_MMX | PTA_PREFETCH_SSE},
1507 {"nocona", PROCESSOR_NOCONA, PTA_SSE | PTA_SSE2 | PTA_SSE3 | PTA_64BIT
1508 | PTA_MMX | PTA_PREFETCH_SSE},
1509 {"k6", PROCESSOR_K6, PTA_MMX},
1510 {"k6-2", PROCESSOR_K6, PTA_MMX | PTA_3DNOW},
1511 {"k6-3", PROCESSOR_K6, PTA_MMX | PTA_3DNOW},
1512 {"athlon", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW
1514 {"athlon-tbird", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE
1515 | PTA_3DNOW | PTA_3DNOW_A},
1516 {"athlon-4", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW
1517 | PTA_3DNOW_A | PTA_SSE},
1518 {"athlon-xp", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW
1519 | PTA_3DNOW_A | PTA_SSE},
1520 {"athlon-mp", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW
1521 | PTA_3DNOW_A | PTA_SSE},
1522 {"x86-64", PROCESSOR_K8, PTA_MMX | PTA_PREFETCH_SSE | PTA_64BIT
1523 | PTA_SSE | PTA_SSE2 },
1524 {"k8", PROCESSOR_K8, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW | PTA_64BIT
1525 | PTA_3DNOW_A | PTA_SSE | PTA_SSE2},
1526 {"opteron", PROCESSOR_K8, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW | PTA_64BIT
1527 | PTA_3DNOW_A | PTA_SSE | PTA_SSE2},
1528 {"athlon64", PROCESSOR_K8, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW | PTA_64BIT
1529 | PTA_3DNOW_A | PTA_SSE | PTA_SSE2},
1530 {"athlon-fx", PROCESSOR_K8, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW | PTA_64BIT
1531 | PTA_3DNOW_A | PTA_SSE | PTA_SSE2},
1532 {"generic32", PROCESSOR_GENERIC32, 0 /* flags are only used for -march switch. */ },
1533 {"generic64", PROCESSOR_GENERIC64, PTA_64BIT /* flags are only used for -march switch. */ },
1536 int const pta_size = ARRAY_SIZE (processor_alias_table);
1538 #ifdef SUBTARGET_OVERRIDE_OPTIONS
1539 SUBTARGET_OVERRIDE_OPTIONS;
1542 #ifdef SUBSUBTARGET_OVERRIDE_OPTIONS
1543 SUBSUBTARGET_OVERRIDE_OPTIONS;
1546 /* -fPIC is the default for x86_64. */
1547 if (TARGET_MACHO && TARGET_64BIT)
1550 /* Set the default values for switches whose default depends on TARGET_64BIT
1551 in case they weren't overwritten by command line options. */
1554 /* Mach-O doesn't support omitting the frame pointer for now. */
1555 if (flag_omit_frame_pointer == 2)
1556 flag_omit_frame_pointer = (TARGET_MACHO ? 0 : 1);
1557 if (flag_asynchronous_unwind_tables == 2)
1558 flag_asynchronous_unwind_tables = 1;
1559 if (flag_pcc_struct_return == 2)
1560 flag_pcc_struct_return = 0;
1564 if (flag_omit_frame_pointer == 2)
1565 flag_omit_frame_pointer = 0;
1566 if (flag_asynchronous_unwind_tables == 2)
1567 flag_asynchronous_unwind_tables = 0;
1568 if (flag_pcc_struct_return == 2)
1569 flag_pcc_struct_return = DEFAULT_PCC_STRUCT_RETURN;
1572 /* Need to check -mtune=generic first. */
1573 if (ix86_tune_string)
1575 if (!strcmp (ix86_tune_string, "generic")
1576 || !strcmp (ix86_tune_string, "i686")
1577 /* As special support for cross compilers we read -mtune=native
1578 as -mtune=generic. With native compilers we won't see the
1579 -mtune=native, as it was changed by the driver. */
1580 || !strcmp (ix86_tune_string, "native"))
1583 ix86_tune_string = "generic64";
1585 ix86_tune_string = "generic32";
1587 else if (!strncmp (ix86_tune_string, "generic", 7))
1588 error ("bad value (%s) for -mtune= switch", ix86_tune_string);
1592 if (ix86_arch_string)
1593 ix86_tune_string = ix86_arch_string;
1594 if (!ix86_tune_string)
1596 ix86_tune_string = cpu_names [TARGET_CPU_DEFAULT];
1597 ix86_tune_defaulted = 1;
1600 /* ix86_tune_string is set to ix86_arch_string or defaulted. We
1601 need to use a sensible tune option. */
1602 if (!strcmp (ix86_tune_string, "generic")
1603 || !strcmp (ix86_tune_string, "x86-64")
1604 || !strcmp (ix86_tune_string, "i686"))
1607 ix86_tune_string = "generic64";
1609 ix86_tune_string = "generic32";
1612 if (!strcmp (ix86_tune_string, "x86-64"))
1613 warning (OPT_Wdeprecated, "-mtune=x86-64 is deprecated. Use -mtune=k8 or "
1614 "-mtune=generic instead as appropriate.");
1616 if (!ix86_arch_string)
1617 ix86_arch_string = TARGET_64BIT ? "x86-64" : "i486";
1618 if (!strcmp (ix86_arch_string, "generic"))
1619 error ("generic CPU can be used only for -mtune= switch");
1620 if (!strncmp (ix86_arch_string, "generic", 7))
1621 error ("bad value (%s) for -march= switch", ix86_arch_string);
1623 if (ix86_cmodel_string != 0)
1625 if (!strcmp (ix86_cmodel_string, "small"))
1626 ix86_cmodel = flag_pic ? CM_SMALL_PIC : CM_SMALL;
1627 else if (!strcmp (ix86_cmodel_string, "medium"))
1628 ix86_cmodel = flag_pic ? CM_MEDIUM_PIC : CM_MEDIUM;
1630 sorry ("code model %s not supported in PIC mode", ix86_cmodel_string);
1631 else if (!strcmp (ix86_cmodel_string, "32"))
1632 ix86_cmodel = CM_32;
1633 else if (!strcmp (ix86_cmodel_string, "kernel") && !flag_pic)
1634 ix86_cmodel = CM_KERNEL;
1635 else if (!strcmp (ix86_cmodel_string, "large") && !flag_pic)
1636 ix86_cmodel = CM_LARGE;
1638 error ("bad value (%s) for -mcmodel= switch", ix86_cmodel_string);
1642 ix86_cmodel = CM_32;
1644 ix86_cmodel = flag_pic ? CM_SMALL_PIC : CM_SMALL;
1646 if (ix86_asm_string != 0)
1649 && !strcmp (ix86_asm_string, "intel"))
1650 ix86_asm_dialect = ASM_INTEL;
1651 else if (!strcmp (ix86_asm_string, "att"))
1652 ix86_asm_dialect = ASM_ATT;
1654 error ("bad value (%s) for -masm= switch", ix86_asm_string);
1656 if ((TARGET_64BIT == 0) != (ix86_cmodel == CM_32))
1657 error ("code model %qs not supported in the %s bit mode",
1658 ix86_cmodel_string, TARGET_64BIT ? "64" : "32");
1659 if (ix86_cmodel == CM_LARGE)
1660 sorry ("code model %<large%> not supported yet");
1661 if ((TARGET_64BIT != 0) != ((target_flags & MASK_64BIT) != 0))
1662 sorry ("%i-bit mode not compiled in",
1663 (target_flags & MASK_64BIT) ? 64 : 32);
1665 for (i = 0; i < pta_size; i++)
1666 if (! strcmp (ix86_arch_string, processor_alias_table[i].name))
1668 ix86_arch = processor_alias_table[i].processor;
1669 /* Default cpu tuning to the architecture. */
1670 ix86_tune = ix86_arch;
1671 if (processor_alias_table[i].flags & PTA_MMX
1672 && !(target_flags_explicit & MASK_MMX))
1673 target_flags |= MASK_MMX;
1674 if (processor_alias_table[i].flags & PTA_3DNOW
1675 && !(target_flags_explicit & MASK_3DNOW))
1676 target_flags |= MASK_3DNOW;
1677 if (processor_alias_table[i].flags & PTA_3DNOW_A
1678 && !(target_flags_explicit & MASK_3DNOW_A))
1679 target_flags |= MASK_3DNOW_A;
1680 if (processor_alias_table[i].flags & PTA_SSE
1681 && !(target_flags_explicit & MASK_SSE))
1682 target_flags |= MASK_SSE;
1683 if (processor_alias_table[i].flags & PTA_SSE2
1684 && !(target_flags_explicit & MASK_SSE2))
1685 target_flags |= MASK_SSE2;
1686 if (processor_alias_table[i].flags & PTA_SSE3
1687 && !(target_flags_explicit & MASK_SSE3))
1688 target_flags |= MASK_SSE3;
1689 if (processor_alias_table[i].flags & PTA_PREFETCH_SSE)
1690 x86_prefetch_sse = true;
1691 if (TARGET_64BIT && !(processor_alias_table[i].flags & PTA_64BIT))
1692 error ("CPU you selected does not support x86-64 "
1698 error ("bad value (%s) for -march= switch", ix86_arch_string);
1700 for (i = 0; i < pta_size; i++)
1701 if (! strcmp (ix86_tune_string, processor_alias_table[i].name))
1703 ix86_tune = processor_alias_table[i].processor;
1704 if (TARGET_64BIT && !(processor_alias_table[i].flags & PTA_64BIT))
1706 if (ix86_tune_defaulted)
1708 ix86_tune_string = "x86-64";
1709 for (i = 0; i < pta_size; i++)
1710 if (! strcmp (ix86_tune_string,
1711 processor_alias_table[i].name))
1713 ix86_tune = processor_alias_table[i].processor;
1716 error ("CPU you selected does not support x86-64 "
1719 /* Intel CPUs have always interpreted SSE prefetch instructions as
1720 NOPs; so, we can enable SSE prefetch instructions even when
1721 -mtune (rather than -march) points us to a processor that has them.
1722 However, the VIA C3 gives a SIGILL, so we only do that for i686 and
1723 higher processors. */
1724 if (TARGET_CMOVE && (processor_alias_table[i].flags & PTA_PREFETCH_SSE))
1725 x86_prefetch_sse = true;
1729 error ("bad value (%s) for -mtune= switch", ix86_tune_string);
1732 ix86_cost = &size_cost;
1734 ix86_cost = processor_target_table[ix86_tune].cost;
1735 target_flags |= processor_target_table[ix86_tune].target_enable;
1736 target_flags &= ~processor_target_table[ix86_tune].target_disable;
1738 /* Arrange to set up i386_stack_locals for all functions. */
1739 init_machine_status = ix86_init_machine_status;
1741 /* Validate -mregparm= value. */
1742 if (ix86_regparm_string)
1744 i = atoi (ix86_regparm_string);
1745 if (i < 0 || i > REGPARM_MAX)
1746 error ("-mregparm=%d is not between 0 and %d", i, REGPARM_MAX);
1752 ix86_regparm = REGPARM_MAX;
1754 /* If the user has provided any of the -malign-* options,
1755 warn and use that value only if -falign-* is not set.
1756 Remove this code in GCC 3.2 or later. */
1757 if (ix86_align_loops_string)
1759 warning (0, "-malign-loops is obsolete, use -falign-loops");
1760 if (align_loops == 0)
1762 i = atoi (ix86_align_loops_string);
1763 if (i < 0 || i > MAX_CODE_ALIGN)
1764 error ("-malign-loops=%d is not between 0 and %d", i, MAX_CODE_ALIGN);
1766 align_loops = 1 << i;
1770 if (ix86_align_jumps_string)
1772 warning (0, "-malign-jumps is obsolete, use -falign-jumps");
1773 if (align_jumps == 0)
1775 i = atoi (ix86_align_jumps_string);
1776 if (i < 0 || i > MAX_CODE_ALIGN)
1777 error ("-malign-loops=%d is not between 0 and %d", i, MAX_CODE_ALIGN);
1779 align_jumps = 1 << i;
1783 if (ix86_align_funcs_string)
1785 warning (0, "-malign-functions is obsolete, use -falign-functions");
1786 if (align_functions == 0)
1788 i = atoi (ix86_align_funcs_string);
1789 if (i < 0 || i > MAX_CODE_ALIGN)
1790 error ("-malign-loops=%d is not between 0 and %d", i, MAX_CODE_ALIGN);
1792 align_functions = 1 << i;
1796 /* Default align_* from the processor table. */
1797 if (align_loops == 0)
1799 align_loops = processor_target_table[ix86_tune].align_loop;
1800 align_loops_max_skip = processor_target_table[ix86_tune].align_loop_max_skip;
1802 if (align_jumps == 0)
1804 align_jumps = processor_target_table[ix86_tune].align_jump;
1805 align_jumps_max_skip = processor_target_table[ix86_tune].align_jump_max_skip;
1807 if (align_functions == 0)
1809 align_functions = processor_target_table[ix86_tune].align_func;
1812 /* Validate -mbranch-cost= value, or provide default. */
1813 ix86_branch_cost = ix86_cost->branch_cost;
1814 if (ix86_branch_cost_string)
1816 i = atoi (ix86_branch_cost_string);
1818 error ("-mbranch-cost=%d is not between 0 and 5", i);
1820 ix86_branch_cost = i;
1822 if (ix86_section_threshold_string)
1824 i = atoi (ix86_section_threshold_string);
1826 error ("-mlarge-data-threshold=%d is negative", i);
1828 ix86_section_threshold = i;
1831 if (ix86_tls_dialect_string)
1833 if (strcmp (ix86_tls_dialect_string, "gnu") == 0)
1834 ix86_tls_dialect = TLS_DIALECT_GNU;
1835 else if (strcmp (ix86_tls_dialect_string, "gnu2") == 0)
1836 ix86_tls_dialect = TLS_DIALECT_GNU2;
1837 else if (strcmp (ix86_tls_dialect_string, "sun") == 0)
1838 ix86_tls_dialect = TLS_DIALECT_SUN;
1840 error ("bad value (%s) for -mtls-dialect= switch",
1841 ix86_tls_dialect_string);
1844 /* Keep nonleaf frame pointers. */
1845 if (flag_omit_frame_pointer)
1846 target_flags &= ~MASK_OMIT_LEAF_FRAME_POINTER;
1847 else if (TARGET_OMIT_LEAF_FRAME_POINTER)
1848 flag_omit_frame_pointer = 1;
1850 /* If we're doing fast math, we don't care about comparison order
1851 wrt NaNs. This lets us use a shorter comparison sequence. */
1852 if (flag_finite_math_only)
1853 target_flags &= ~MASK_IEEE_FP;
1855 /* If the architecture always has an FPU, turn off NO_FANCY_MATH_387,
1856 since the insns won't need emulation. */
1857 if (x86_arch_always_fancy_math_387 & (1 << ix86_arch))
1858 target_flags &= ~MASK_NO_FANCY_MATH_387;
1860 /* Likewise, if the target doesn't have a 387, or we've specified
1861 software floating point, don't use 387 inline intrinsics. */
1863 target_flags |= MASK_NO_FANCY_MATH_387;
1865 /* Turn on SSE2 builtins for -msse3. */
1867 target_flags |= MASK_SSE2;
1869 /* Turn on SSE builtins for -msse2. */
1871 target_flags |= MASK_SSE;
1873 /* Turn on MMX builtins for -msse. */
1876 target_flags |= MASK_MMX & ~target_flags_explicit;
1877 x86_prefetch_sse = true;
1880 /* Turn on MMX builtins for 3Dnow. */
1882 target_flags |= MASK_MMX;
1886 if (TARGET_ALIGN_DOUBLE)
1887 error ("-malign-double makes no sense in the 64bit mode");
1889 error ("-mrtd calling convention not supported in the 64bit mode");
1891 /* Enable by default the SSE and MMX builtins. Do allow the user to
1892 explicitly disable any of these. In particular, disabling SSE and
1893 MMX for kernel code is extremely useful. */
1895 |= ((MASK_SSE2 | MASK_SSE | MASK_MMX | MASK_128BIT_LONG_DOUBLE)
1896 & ~target_flags_explicit);
1900 /* i386 ABI does not specify red zone. It still makes sense to use it
1901 when programmer takes care to stack from being destroyed. */
1902 if (!(target_flags_explicit & MASK_NO_RED_ZONE))
1903 target_flags |= MASK_NO_RED_ZONE;
1906 /* Validate -mpreferred-stack-boundary= value, or provide default.
1907 The default of 128 bits is for Pentium III's SSE __m128. We can't
1908 change it because of optimize_size. Otherwise, we can't mix object
1909 files compiled with -Os and -On. */
1910 ix86_preferred_stack_boundary = 128;
1911 if (ix86_preferred_stack_boundary_string)
1913 i = atoi (ix86_preferred_stack_boundary_string);
1914 if (i < (TARGET_64BIT ? 4 : 2) || i > 12)
1915 error ("-mpreferred-stack-boundary=%d is not between %d and 12", i,
1916 TARGET_64BIT ? 4 : 2);
1918 ix86_preferred_stack_boundary = (1 << i) * BITS_PER_UNIT;
1921 /* Accept -msseregparm only if at least SSE support is enabled. */
1922 if (TARGET_SSEREGPARM
1924 error ("-msseregparm used without SSE enabled");
1926 ix86_fpmath = TARGET_FPMATH_DEFAULT;
1928 if (ix86_fpmath_string != 0)
1930 if (! strcmp (ix86_fpmath_string, "387"))
1931 ix86_fpmath = FPMATH_387;
1932 else if (! strcmp (ix86_fpmath_string, "sse"))
1936 warning (0, "SSE instruction set disabled, using 387 arithmetics");
1937 ix86_fpmath = FPMATH_387;
1940 ix86_fpmath = FPMATH_SSE;
1942 else if (! strcmp (ix86_fpmath_string, "387,sse")
1943 || ! strcmp (ix86_fpmath_string, "sse,387"))
1947 warning (0, "SSE instruction set disabled, using 387 arithmetics");
1948 ix86_fpmath = FPMATH_387;
1950 else if (!TARGET_80387)
1952 warning (0, "387 instruction set disabled, using SSE arithmetics");
1953 ix86_fpmath = FPMATH_SSE;
1956 ix86_fpmath = FPMATH_SSE | FPMATH_387;
1959 error ("bad value (%s) for -mfpmath= switch", ix86_fpmath_string);
1962 /* If the i387 is disabled, then do not return values in it. */
1964 target_flags &= ~MASK_FLOAT_RETURNS;
1966 if ((x86_accumulate_outgoing_args & TUNEMASK)
1967 && !(target_flags_explicit & MASK_ACCUMULATE_OUTGOING_ARGS)
1969 target_flags |= MASK_ACCUMULATE_OUTGOING_ARGS;
1971 /* ??? Unwind info is not correct around the CFG unless either a frame
1972 pointer is present or M_A_O_A is set. Fixing this requires rewriting
1973 unwind info generation to be aware of the CFG and propagating states
1975 if ((flag_unwind_tables || flag_asynchronous_unwind_tables
1976 || flag_exceptions || flag_non_call_exceptions)
1977 && flag_omit_frame_pointer
1978 && !(target_flags & MASK_ACCUMULATE_OUTGOING_ARGS))
1980 if (target_flags_explicit & MASK_ACCUMULATE_OUTGOING_ARGS)
1981 warning (0, "unwind tables currently require either a frame pointer "
1982 "or -maccumulate-outgoing-args for correctness");
1983 target_flags |= MASK_ACCUMULATE_OUTGOING_ARGS;
1986 /* Figure out what ASM_GENERATE_INTERNAL_LABEL builds as a prefix. */
1989 ASM_GENERATE_INTERNAL_LABEL (internal_label_prefix, "LX", 0);
1990 p = strchr (internal_label_prefix, 'X');
1991 internal_label_prefix_len = p - internal_label_prefix;
1995 /* When scheduling description is not available, disable scheduler pass
1996 so it won't slow down the compilation and make x87 code slower. */
1997 if (!TARGET_SCHEDULE)
1998 flag_schedule_insns_after_reload = flag_schedule_insns = 0;
2001 /* switch to the appropriate section for output of DECL.
2002 DECL is either a `VAR_DECL' node or a constant of some sort.
2003 RELOC indicates whether forming the initial value of DECL requires
2004 link-time relocations. */
2007 x86_64_elf_select_section (tree decl, int reloc,
2008 unsigned HOST_WIDE_INT align)
2010 if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC)
2011 && ix86_in_large_data_p (decl))
2013 const char *sname = NULL;
2014 unsigned int flags = SECTION_WRITE;
2015 switch (categorize_decl_for_section (decl, reloc))
2020 case SECCAT_DATA_REL:
2021 sname = ".ldata.rel";
2023 case SECCAT_DATA_REL_LOCAL:
2024 sname = ".ldata.rel.local";
2026 case SECCAT_DATA_REL_RO:
2027 sname = ".ldata.rel.ro";
2029 case SECCAT_DATA_REL_RO_LOCAL:
2030 sname = ".ldata.rel.ro.local";
2034 flags |= SECTION_BSS;
2037 case SECCAT_RODATA_MERGE_STR:
2038 case SECCAT_RODATA_MERGE_STR_INIT:
2039 case SECCAT_RODATA_MERGE_CONST:
2043 case SECCAT_SRODATA:
2050 /* We don't split these for medium model. Place them into
2051 default sections and hope for best. */
2056 /* We might get called with string constants, but get_named_section
2057 doesn't like them as they are not DECLs. Also, we need to set
2058 flags in that case. */
2060 return get_section (sname, flags, NULL);
2061 return get_named_section (decl, sname, reloc);
2064 return default_elf_select_section (decl, reloc, align);
2067 /* Build up a unique section name, expressed as a
2068 STRING_CST node, and assign it to DECL_SECTION_NAME (decl).
2069 RELOC indicates whether the initial value of EXP requires
2070 link-time relocations. */
2073 x86_64_elf_unique_section (tree decl, int reloc)
2075 if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC)
2076 && ix86_in_large_data_p (decl))
2078 const char *prefix = NULL;
2079 /* We only need to use .gnu.linkonce if we don't have COMDAT groups. */
2080 bool one_only = DECL_ONE_ONLY (decl) && !HAVE_COMDAT_GROUP;
2082 switch (categorize_decl_for_section (decl, reloc))
2085 case SECCAT_DATA_REL:
2086 case SECCAT_DATA_REL_LOCAL:
2087 case SECCAT_DATA_REL_RO:
2088 case SECCAT_DATA_REL_RO_LOCAL:
2089 prefix = one_only ? ".gnu.linkonce.ld." : ".ldata.";
2092 prefix = one_only ? ".gnu.linkonce.lb." : ".lbss.";
2095 case SECCAT_RODATA_MERGE_STR:
2096 case SECCAT_RODATA_MERGE_STR_INIT:
2097 case SECCAT_RODATA_MERGE_CONST:
2098 prefix = one_only ? ".gnu.linkonce.lr." : ".lrodata.";
2100 case SECCAT_SRODATA:
2107 /* We don't split these for medium model. Place them into
2108 default sections and hope for best. */
2116 plen = strlen (prefix);
2118 name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl));
2119 name = targetm.strip_name_encoding (name);
2120 nlen = strlen (name);
2122 string = alloca (nlen + plen + 1);
2123 memcpy (string, prefix, plen);
2124 memcpy (string + plen, name, nlen + 1);
2126 DECL_SECTION_NAME (decl) = build_string (nlen + plen, string);
2130 default_unique_section (decl, reloc);
2133 #ifdef COMMON_ASM_OP
2134 /* This says how to output assembler code to declare an
2135 uninitialized external linkage data object.
2137 For medium model x86-64 we need to use .largecomm opcode for
2140 x86_elf_aligned_common (FILE *file,
2141 const char *name, unsigned HOST_WIDE_INT size,
2144 if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC)
2145 && size > (unsigned int)ix86_section_threshold)
2146 fprintf (file, ".largecomm\t");
2148 fprintf (file, "%s", COMMON_ASM_OP);
2149 assemble_name (file, name);
2150 fprintf (file, ","HOST_WIDE_INT_PRINT_UNSIGNED",%u\n",
2151 size, align / BITS_PER_UNIT);
2154 /* Utility function for targets to use in implementing
2155 ASM_OUTPUT_ALIGNED_BSS. */
2158 x86_output_aligned_bss (FILE *file, tree decl ATTRIBUTE_UNUSED,
2159 const char *name, unsigned HOST_WIDE_INT size,
2162 if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC)
2163 && size > (unsigned int)ix86_section_threshold)
2164 switch_to_section (get_named_section (decl, ".lbss", 0));
2166 switch_to_section (bss_section);
2167 ASM_OUTPUT_ALIGN (file, floor_log2 (align / BITS_PER_UNIT));
2168 #ifdef ASM_DECLARE_OBJECT_NAME
2169 last_assemble_variable_decl = decl;
2170 ASM_DECLARE_OBJECT_NAME (file, name, decl);
2172 /* Standard thing is just output label for the object. */
2173 ASM_OUTPUT_LABEL (file, name);
2174 #endif /* ASM_DECLARE_OBJECT_NAME */
2175 ASM_OUTPUT_SKIP (file, size ? size : 1);
2180 optimization_options (int level, int size ATTRIBUTE_UNUSED)
2182 /* For -O2 and beyond, turn off -fschedule-insns by default. It tends to
2183 make the problem with not enough registers even worse. */
2184 #ifdef INSN_SCHEDULING
2186 flag_schedule_insns = 0;
2190 /* The Darwin libraries never set errno, so we might as well
2191 avoid calling them when that's the only reason we would. */
2192 flag_errno_math = 0;
2194 /* The default values of these switches depend on the TARGET_64BIT
2195 that is not known at this moment. Mark these values with 2 and
2196 let user the to override these. In case there is no command line option
2197 specifying them, we will set the defaults in override_options. */
2199 flag_omit_frame_pointer = 2;
2200 flag_pcc_struct_return = 2;
2201 flag_asynchronous_unwind_tables = 2;
2202 #ifdef SUBTARGET_OPTIMIZATION_OPTIONS
2203 SUBTARGET_OPTIMIZATION_OPTIONS;
2207 /* Table of valid machine attributes. */
2208 const struct attribute_spec ix86_attribute_table[] =
2210 /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler } */
2211 /* Stdcall attribute says callee is responsible for popping arguments
2212 if they are not variable. */
2213 { "stdcall", 0, 0, false, true, true, ix86_handle_cconv_attribute },
2214 /* Fastcall attribute says callee is responsible for popping arguments
2215 if they are not variable. */
2216 { "fastcall", 0, 0, false, true, true, ix86_handle_cconv_attribute },
2217 /* Cdecl attribute says the callee is a normal C declaration */
2218 { "cdecl", 0, 0, false, true, true, ix86_handle_cconv_attribute },
2219 /* Regparm attribute specifies how many integer arguments are to be
2220 passed in registers. */
2221 { "regparm", 1, 1, false, true, true, ix86_handle_cconv_attribute },
2222 /* Sseregparm attribute says we are using x86_64 calling conventions
2223 for FP arguments. */
2224 { "sseregparm", 0, 0, false, true, true, ix86_handle_cconv_attribute },
2225 /* force_align_arg_pointer says this function realigns the stack at entry. */
2226 { (const char *)&ix86_force_align_arg_pointer_string, 0, 0,
2227 false, true, true, ix86_handle_cconv_attribute },
2228 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
2229 { "dllimport", 0, 0, false, false, false, handle_dll_attribute },
2230 { "dllexport", 0, 0, false, false, false, handle_dll_attribute },
2231 { "shared", 0, 0, true, false, false, ix86_handle_shared_attribute },
2233 { "ms_struct", 0, 0, false, false, false, ix86_handle_struct_attribute },
2234 { "gcc_struct", 0, 0, false, false, false, ix86_handle_struct_attribute },
2235 #ifdef SUBTARGET_ATTRIBUTE_TABLE
2236 SUBTARGET_ATTRIBUTE_TABLE,
2238 { NULL, 0, 0, false, false, false, NULL }
2241 /* Decide whether we can make a sibling call to a function. DECL is the
2242 declaration of the function being targeted by the call and EXP is the
2243 CALL_EXPR representing the call. */
2246 ix86_function_ok_for_sibcall (tree decl, tree exp)
2251 /* If we are generating position-independent code, we cannot sibcall
2252 optimize any indirect call, or a direct call to a global function,
2253 as the PLT requires %ebx be live. */
2254 if (!TARGET_64BIT && flag_pic && (!decl || !targetm.binds_local_p (decl)))
2261 func = TREE_TYPE (TREE_OPERAND (exp, 0));
2262 if (POINTER_TYPE_P (func))
2263 func = TREE_TYPE (func);
2266 /* Check that the return value locations are the same. Like
2267 if we are returning floats on the 80387 register stack, we cannot
2268 make a sibcall from a function that doesn't return a float to a
2269 function that does or, conversely, from a function that does return
2270 a float to a function that doesn't; the necessary stack adjustment
2271 would not be executed. This is also the place we notice
2272 differences in the return value ABI. Note that it is ok for one
2273 of the functions to have void return type as long as the return
2274 value of the other is passed in a register. */
2275 a = ix86_function_value (TREE_TYPE (exp), func, false);
2276 b = ix86_function_value (TREE_TYPE (DECL_RESULT (cfun->decl)),
2278 if (STACK_REG_P (a) || STACK_REG_P (b))
2280 if (!rtx_equal_p (a, b))
2283 else if (VOID_TYPE_P (TREE_TYPE (DECL_RESULT (cfun->decl))))
2285 else if (!rtx_equal_p (a, b))
2288 /* If this call is indirect, we'll need to be able to use a call-clobbered
2289 register for the address of the target function. Make sure that all
2290 such registers are not used for passing parameters. */
2291 if (!decl && !TARGET_64BIT)
2295 /* We're looking at the CALL_EXPR, we need the type of the function. */
2296 type = TREE_OPERAND (exp, 0); /* pointer expression */
2297 type = TREE_TYPE (type); /* pointer type */
2298 type = TREE_TYPE (type); /* function type */
2300 if (ix86_function_regparm (type, NULL) >= 3)
2302 /* ??? Need to count the actual number of registers to be used,
2303 not the possible number of registers. Fix later. */
2308 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
2309 /* Dllimport'd functions are also called indirectly. */
2310 if (decl && DECL_DLLIMPORT_P (decl)
2311 && ix86_function_regparm (TREE_TYPE (decl), NULL) >= 3)
2315 /* If we forced aligned the stack, then sibcalling would unalign the
2316 stack, which may break the called function. */
2317 if (cfun->machine->force_align_arg_pointer)
2320 /* Otherwise okay. That also includes certain types of indirect calls. */
2324 /* Handle "cdecl", "stdcall", "fastcall", "regparm" and "sseregparm"
2325 calling convention attributes;
2326 arguments as in struct attribute_spec.handler. */
2329 ix86_handle_cconv_attribute (tree *node, tree name,
2331 int flags ATTRIBUTE_UNUSED,
2334 if (TREE_CODE (*node) != FUNCTION_TYPE
2335 && TREE_CODE (*node) != METHOD_TYPE
2336 && TREE_CODE (*node) != FIELD_DECL
2337 && TREE_CODE (*node) != TYPE_DECL)
2339 warning (OPT_Wattributes, "%qs attribute only applies to functions",
2340 IDENTIFIER_POINTER (name));
2341 *no_add_attrs = true;
2345 /* Can combine regparm with all attributes but fastcall. */
2346 if (is_attribute_p ("regparm", name))
2350 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
2352 error ("fastcall and regparm attributes are not compatible");
2355 cst = TREE_VALUE (args);
2356 if (TREE_CODE (cst) != INTEGER_CST)
2358 warning (OPT_Wattributes,
2359 "%qs attribute requires an integer constant argument",
2360 IDENTIFIER_POINTER (name));
2361 *no_add_attrs = true;
2363 else if (compare_tree_int (cst, REGPARM_MAX) > 0)
2365 warning (OPT_Wattributes, "argument to %qs attribute larger than %d",
2366 IDENTIFIER_POINTER (name), REGPARM_MAX);
2367 *no_add_attrs = true;
2371 && lookup_attribute (ix86_force_align_arg_pointer_string,
2372 TYPE_ATTRIBUTES (*node))
2373 && compare_tree_int (cst, REGPARM_MAX-1))
2375 error ("%s functions limited to %d register parameters",
2376 ix86_force_align_arg_pointer_string, REGPARM_MAX-1);
2384 warning (OPT_Wattributes, "%qs attribute ignored",
2385 IDENTIFIER_POINTER (name));
2386 *no_add_attrs = true;
2390 /* Can combine fastcall with stdcall (redundant) and sseregparm. */
2391 if (is_attribute_p ("fastcall", name))
2393 if (lookup_attribute ("cdecl", TYPE_ATTRIBUTES (*node)))
2395 error ("fastcall and cdecl attributes are not compatible");
2397 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (*node)))
2399 error ("fastcall and stdcall attributes are not compatible");
2401 if (lookup_attribute ("regparm", TYPE_ATTRIBUTES (*node)))
2403 error ("fastcall and regparm attributes are not compatible");
2407 /* Can combine stdcall with fastcall (redundant), regparm and
2409 else if (is_attribute_p ("stdcall", name))
2411 if (lookup_attribute ("cdecl", TYPE_ATTRIBUTES (*node)))
2413 error ("stdcall and cdecl attributes are not compatible");
2415 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
2417 error ("stdcall and fastcall attributes are not compatible");
2421 /* Can combine cdecl with regparm and sseregparm. */
2422 else if (is_attribute_p ("cdecl", name))
2424 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (*node)))
2426 error ("stdcall and cdecl attributes are not compatible");
2428 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
2430 error ("fastcall and cdecl attributes are not compatible");
2434 /* Can combine sseregparm with all attributes. */
2439 /* Return 0 if the attributes for two types are incompatible, 1 if they
2440 are compatible, and 2 if they are nearly compatible (which causes a
2441 warning to be generated). */
2444 ix86_comp_type_attributes (tree type1, tree type2)
2446 /* Check for mismatch of non-default calling convention. */
2447 const char *const rtdstr = TARGET_RTD ? "cdecl" : "stdcall";
2449 if (TREE_CODE (type1) != FUNCTION_TYPE)
2452 /* Check for mismatched fastcall/regparm types. */
2453 if ((!lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type1))
2454 != !lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type2)))
2455 || (ix86_function_regparm (type1, NULL)
2456 != ix86_function_regparm (type2, NULL)))
2459 /* Check for mismatched sseregparm types. */
2460 if (!lookup_attribute ("sseregparm", TYPE_ATTRIBUTES (type1))
2461 != !lookup_attribute ("sseregparm", TYPE_ATTRIBUTES (type2)))
2464 /* Check for mismatched return types (cdecl vs stdcall). */
2465 if (!lookup_attribute (rtdstr, TYPE_ATTRIBUTES (type1))
2466 != !lookup_attribute (rtdstr, TYPE_ATTRIBUTES (type2)))
2472 /* Return the regparm value for a function with the indicated TYPE and DECL.
2473 DECL may be NULL when calling function indirectly
2474 or considering a libcall. */
2477 ix86_function_regparm (tree type, tree decl)
2480 int regparm = ix86_regparm;
2481 bool user_convention = false;
2485 attr = lookup_attribute ("regparm", TYPE_ATTRIBUTES (type));
2488 regparm = TREE_INT_CST_LOW (TREE_VALUE (TREE_VALUE (attr)));
2489 user_convention = true;
2492 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type)))
2495 user_convention = true;
2498 /* Use register calling convention for local functions when possible. */
2499 if (!TARGET_64BIT && !user_convention && decl
2500 && flag_unit_at_a_time && !profile_flag)
2502 struct cgraph_local_info *i = cgraph_local_info (decl);
2505 int local_regparm, globals = 0, regno;
2507 /* Make sure no regparm register is taken by a global register
2509 for (local_regparm = 0; local_regparm < 3; local_regparm++)
2510 if (global_regs[local_regparm])
2512 /* We can't use regparm(3) for nested functions as these use
2513 static chain pointer in third argument. */
2514 if (local_regparm == 3
2515 && decl_function_context (decl)
2516 && !DECL_NO_STATIC_CHAIN (decl))
2518 /* If the function realigns its stackpointer, the
2519 prologue will clobber %ecx. If we've already
2520 generated code for the callee, the callee
2521 DECL_STRUCT_FUNCTION is gone, so we fall back to
2522 scanning the attributes for the self-realigning
2524 if ((DECL_STRUCT_FUNCTION (decl)
2525 && DECL_STRUCT_FUNCTION (decl)->machine->force_align_arg_pointer)
2526 || (!DECL_STRUCT_FUNCTION (decl)
2527 && lookup_attribute (ix86_force_align_arg_pointer_string,
2528 TYPE_ATTRIBUTES (TREE_TYPE (decl)))))
2530 /* Each global register variable increases register preassure,
2531 so the more global reg vars there are, the smaller regparm
2532 optimization use, unless requested by the user explicitly. */
2533 for (regno = 0; regno < 6; regno++)
2534 if (global_regs[regno])
2537 = globals < local_regparm ? local_regparm - globals : 0;
2539 if (local_regparm > regparm)
2540 regparm = local_regparm;
2547 /* Return 1 or 2, if we can pass up to SSE_REGPARM_MAX SFmode (1) and
2548 DFmode (2) arguments in SSE registers for a function with the
2549 indicated TYPE and DECL. DECL may be NULL when calling function
2550 indirectly or considering a libcall. Otherwise return 0. */
2553 ix86_function_sseregparm (tree type, tree decl)
2555 /* Use SSE registers to pass SFmode and DFmode arguments if requested
2556 by the sseregparm attribute. */
2557 if (TARGET_SSEREGPARM
2559 && lookup_attribute ("sseregparm", TYPE_ATTRIBUTES (type))))
2564 error ("Calling %qD with attribute sseregparm without "
2565 "SSE/SSE2 enabled", decl);
2567 error ("Calling %qT with attribute sseregparm without "
2568 "SSE/SSE2 enabled", type);
2575 /* For local functions, pass up to SSE_REGPARM_MAX SFmode
2576 (and DFmode for SSE2) arguments in SSE registers,
2577 even for 32-bit targets. */
2578 if (!TARGET_64BIT && decl
2579 && TARGET_SSE_MATH && flag_unit_at_a_time && !profile_flag)
2581 struct cgraph_local_info *i = cgraph_local_info (decl);
2583 return TARGET_SSE2 ? 2 : 1;
2589 /* Return true if EAX is live at the start of the function. Used by
2590 ix86_expand_prologue to determine if we need special help before
2591 calling allocate_stack_worker. */
2594 ix86_eax_live_at_start_p (void)
2596 /* Cheat. Don't bother working forward from ix86_function_regparm
2597 to the function type to whether an actual argument is located in
2598 eax. Instead just look at cfg info, which is still close enough
2599 to correct at this point. This gives false positives for broken
2600 functions that might use uninitialized data that happens to be
2601 allocated in eax, but who cares? */
2602 return REGNO_REG_SET_P (ENTRY_BLOCK_PTR->il.rtl->global_live_at_end, 0);
2605 /* Value is the number of bytes of arguments automatically
2606 popped when returning from a subroutine call.
2607 FUNDECL is the declaration node of the function (as a tree),
2608 FUNTYPE is the data type of the function (as a tree),
2609 or for a library call it is an identifier node for the subroutine name.
2610 SIZE is the number of bytes of arguments passed on the stack.
2612 On the 80386, the RTD insn may be used to pop them if the number
2613 of args is fixed, but if the number is variable then the caller
2614 must pop them all. RTD can't be used for library calls now
2615 because the library is compiled with the Unix compiler.
2616 Use of RTD is a selectable option, since it is incompatible with
2617 standard Unix calling sequences. If the option is not selected,
2618 the caller must always pop the args.
2620 The attribute stdcall is equivalent to RTD on a per module basis. */
2623 ix86_return_pops_args (tree fundecl, tree funtype, int size)
2625 int rtd = TARGET_RTD && (!fundecl || TREE_CODE (fundecl) != IDENTIFIER_NODE);
2627 /* Cdecl functions override -mrtd, and never pop the stack. */
2628 if (! lookup_attribute ("cdecl", TYPE_ATTRIBUTES (funtype))) {
2630 /* Stdcall and fastcall functions will pop the stack if not
2632 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (funtype))
2633 || lookup_attribute ("fastcall", TYPE_ATTRIBUTES (funtype)))
2637 && (TYPE_ARG_TYPES (funtype) == NULL_TREE
2638 || (TREE_VALUE (tree_last (TYPE_ARG_TYPES (funtype)))
2639 == void_type_node)))
2643 /* Lose any fake structure return argument if it is passed on the stack. */
2644 if (aggregate_value_p (TREE_TYPE (funtype), fundecl)
2646 && !KEEP_AGGREGATE_RETURN_POINTER)
2648 int nregs = ix86_function_regparm (funtype, fundecl);
2651 return GET_MODE_SIZE (Pmode);
2657 /* Argument support functions. */
2659 /* Return true when register may be used to pass function parameters. */
2661 ix86_function_arg_regno_p (int regno)
2667 return (regno < REGPARM_MAX
2668 || (TARGET_SSE && SSE_REGNO_P (regno) && !fixed_regs[regno]));
2670 return (regno < REGPARM_MAX
2671 || (TARGET_MMX && MMX_REGNO_P (regno)
2672 && (regno < FIRST_MMX_REG + MMX_REGPARM_MAX))
2673 || (TARGET_SSE && SSE_REGNO_P (regno)
2674 && (regno < FIRST_SSE_REG + SSE_REGPARM_MAX)));
2679 if (SSE_REGNO_P (regno) && TARGET_SSE)
2684 if (TARGET_SSE && SSE_REGNO_P (regno)
2685 && (regno < FIRST_SSE_REG + SSE_REGPARM_MAX))
2688 /* RAX is used as hidden argument to va_arg functions. */
2691 for (i = 0; i < REGPARM_MAX; i++)
2692 if (regno == x86_64_int_parameter_registers[i])
2697 /* Return if we do not know how to pass TYPE solely in registers. */
2700 ix86_must_pass_in_stack (enum machine_mode mode, tree type)
2702 if (must_pass_in_stack_var_size_or_pad (mode, type))
2705 /* For 32-bit, we want TImode aggregates to go on the stack. But watch out!
2706 The layout_type routine is crafty and tries to trick us into passing
2707 currently unsupported vector types on the stack by using TImode. */
2708 return (!TARGET_64BIT && mode == TImode
2709 && type && TREE_CODE (type) != VECTOR_TYPE);
2712 /* Initialize a variable CUM of type CUMULATIVE_ARGS
2713 for a call to a function whose data type is FNTYPE.
2714 For a library call, FNTYPE is 0. */
2717 init_cumulative_args (CUMULATIVE_ARGS *cum, /* Argument info to initialize */
2718 tree fntype, /* tree ptr for function decl */
2719 rtx libname, /* SYMBOL_REF of library name or 0 */
2722 static CUMULATIVE_ARGS zero_cum;
2723 tree param, next_param;
2725 if (TARGET_DEBUG_ARG)
2727 fprintf (stderr, "\ninit_cumulative_args (");
2729 fprintf (stderr, "fntype code = %s, ret code = %s",
2730 tree_code_name[(int) TREE_CODE (fntype)],
2731 tree_code_name[(int) TREE_CODE (TREE_TYPE (fntype))]);
2733 fprintf (stderr, "no fntype");
2736 fprintf (stderr, ", libname = %s", XSTR (libname, 0));
2741 /* Set up the number of registers to use for passing arguments. */
2742 cum->nregs = ix86_regparm;
2744 cum->sse_nregs = SSE_REGPARM_MAX;
2746 cum->mmx_nregs = MMX_REGPARM_MAX;
2747 cum->warn_sse = true;
2748 cum->warn_mmx = true;
2749 cum->maybe_vaarg = false;
2751 /* Use ecx and edx registers if function has fastcall attribute,
2752 else look for regparm information. */
2753 if (fntype && !TARGET_64BIT)
2755 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (fntype)))
2761 cum->nregs = ix86_function_regparm (fntype, fndecl);
2764 /* Set up the number of SSE registers used for passing SFmode
2765 and DFmode arguments. Warn for mismatching ABI. */
2766 cum->float_in_sse = ix86_function_sseregparm (fntype, fndecl);
2768 /* Determine if this function has variable arguments. This is
2769 indicated by the last argument being 'void_type_mode' if there
2770 are no variable arguments. If there are variable arguments, then
2771 we won't pass anything in registers in 32-bit mode. */
2773 if (cum->nregs || cum->mmx_nregs || cum->sse_nregs)
2775 for (param = (fntype) ? TYPE_ARG_TYPES (fntype) : 0;
2776 param != 0; param = next_param)
2778 next_param = TREE_CHAIN (param);
2779 if (next_param == 0 && TREE_VALUE (param) != void_type_node)
2789 cum->float_in_sse = 0;
2791 cum->maybe_vaarg = true;
2795 if ((!fntype && !libname)
2796 || (fntype && !TYPE_ARG_TYPES (fntype)))
2797 cum->maybe_vaarg = true;
2799 if (TARGET_DEBUG_ARG)
2800 fprintf (stderr, ", nregs=%d )\n", cum->nregs);
2805 /* Return the "natural" mode for TYPE. In most cases, this is just TYPE_MODE.
2806 But in the case of vector types, it is some vector mode.
2808 When we have only some of our vector isa extensions enabled, then there
2809 are some modes for which vector_mode_supported_p is false. For these
2810 modes, the generic vector support in gcc will choose some non-vector mode
2811 in order to implement the type. By computing the natural mode, we'll
2812 select the proper ABI location for the operand and not depend on whatever
2813 the middle-end decides to do with these vector types. */
2815 static enum machine_mode
2816 type_natural_mode (tree type)
2818 enum machine_mode mode = TYPE_MODE (type);
2820 if (TREE_CODE (type) == VECTOR_TYPE && !VECTOR_MODE_P (mode))
2822 HOST_WIDE_INT size = int_size_in_bytes (type);
2823 if ((size == 8 || size == 16)
2824 /* ??? Generic code allows us to create width 1 vectors. Ignore. */
2825 && TYPE_VECTOR_SUBPARTS (type) > 1)
2827 enum machine_mode innermode = TYPE_MODE (TREE_TYPE (type));
2829 if (TREE_CODE (TREE_TYPE (type)) == REAL_TYPE)
2830 mode = MIN_MODE_VECTOR_FLOAT;
2832 mode = MIN_MODE_VECTOR_INT;
2834 /* Get the mode which has this inner mode and number of units. */
2835 for (; mode != VOIDmode; mode = GET_MODE_WIDER_MODE (mode))
2836 if (GET_MODE_NUNITS (mode) == TYPE_VECTOR_SUBPARTS (type)
2837 && GET_MODE_INNER (mode) == innermode)
2847 /* We want to pass a value in REGNO whose "natural" mode is MODE. However,
2848 this may not agree with the mode that the type system has chosen for the
2849 register, which is ORIG_MODE. If ORIG_MODE is not BLKmode, then we can
2850 go ahead and use it. Otherwise we have to build a PARALLEL instead. */
2853 gen_reg_or_parallel (enum machine_mode mode, enum machine_mode orig_mode,
2858 if (orig_mode != BLKmode)
2859 tmp = gen_rtx_REG (orig_mode, regno);
2862 tmp = gen_rtx_REG (mode, regno);
2863 tmp = gen_rtx_EXPR_LIST (VOIDmode, tmp, const0_rtx);
2864 tmp = gen_rtx_PARALLEL (orig_mode, gen_rtvec (1, tmp));
2870 /* x86-64 register passing implementation. See x86-64 ABI for details. Goal
2871 of this code is to classify each 8bytes of incoming argument by the register
2872 class and assign registers accordingly. */
2874 /* Return the union class of CLASS1 and CLASS2.
2875 See the x86-64 PS ABI for details. */
2877 static enum x86_64_reg_class
2878 merge_classes (enum x86_64_reg_class class1, enum x86_64_reg_class class2)
2880 /* Rule #1: If both classes are equal, this is the resulting class. */
2881 if (class1 == class2)
2884 /* Rule #2: If one of the classes is NO_CLASS, the resulting class is
2886 if (class1 == X86_64_NO_CLASS)
2888 if (class2 == X86_64_NO_CLASS)
2891 /* Rule #3: If one of the classes is MEMORY, the result is MEMORY. */
2892 if (class1 == X86_64_MEMORY_CLASS || class2 == X86_64_MEMORY_CLASS)
2893 return X86_64_MEMORY_CLASS;
2895 /* Rule #4: If one of the classes is INTEGER, the result is INTEGER. */
2896 if ((class1 == X86_64_INTEGERSI_CLASS && class2 == X86_64_SSESF_CLASS)
2897 || (class2 == X86_64_INTEGERSI_CLASS && class1 == X86_64_SSESF_CLASS))
2898 return X86_64_INTEGERSI_CLASS;
2899 if (class1 == X86_64_INTEGER_CLASS || class1 == X86_64_INTEGERSI_CLASS
2900 || class2 == X86_64_INTEGER_CLASS || class2 == X86_64_INTEGERSI_CLASS)
2901 return X86_64_INTEGER_CLASS;
2903 /* Rule #5: If one of the classes is X87, X87UP, or COMPLEX_X87 class,
2905 if (class1 == X86_64_X87_CLASS
2906 || class1 == X86_64_X87UP_CLASS
2907 || class1 == X86_64_COMPLEX_X87_CLASS
2908 || class2 == X86_64_X87_CLASS
2909 || class2 == X86_64_X87UP_CLASS
2910 || class2 == X86_64_COMPLEX_X87_CLASS)
2911 return X86_64_MEMORY_CLASS;
2913 /* Rule #6: Otherwise class SSE is used. */
2914 return X86_64_SSE_CLASS;
2917 /* Classify the argument of type TYPE and mode MODE.
2918 CLASSES will be filled by the register class used to pass each word
2919 of the operand. The number of words is returned. In case the parameter
2920 should be passed in memory, 0 is returned. As a special case for zero
2921 sized containers, classes[0] will be NO_CLASS and 1 is returned.
2923 BIT_OFFSET is used internally for handling records and specifies offset
2924 of the offset in bits modulo 256 to avoid overflow cases.
2926 See the x86-64 PS ABI for details.
2930 classify_argument (enum machine_mode mode, tree type,
2931 enum x86_64_reg_class classes[MAX_CLASSES], int bit_offset)
2933 HOST_WIDE_INT bytes =
2934 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
2935 int words = (bytes + (bit_offset % 64) / 8 + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
2937 /* Variable sized entities are always passed/returned in memory. */
2941 if (mode != VOIDmode
2942 && targetm.calls.must_pass_in_stack (mode, type))
2945 if (type && AGGREGATE_TYPE_P (type))
2949 enum x86_64_reg_class subclasses[MAX_CLASSES];
2951 /* On x86-64 we pass structures larger than 16 bytes on the stack. */
2955 for (i = 0; i < words; i++)
2956 classes[i] = X86_64_NO_CLASS;
2958 /* Zero sized arrays or structures are NO_CLASS. We return 0 to
2959 signalize memory class, so handle it as special case. */
2962 classes[0] = X86_64_NO_CLASS;
2966 /* Classify each field of record and merge classes. */
2967 switch (TREE_CODE (type))
2970 /* For classes first merge in the field of the subclasses. */
2971 if (TYPE_BINFO (type))
2973 tree binfo, base_binfo;
2976 for (binfo = TYPE_BINFO (type), basenum = 0;
2977 BINFO_BASE_ITERATE (binfo, basenum, base_binfo); basenum++)
2980 int offset = tree_low_cst (BINFO_OFFSET (base_binfo), 0) * 8;
2981 tree type = BINFO_TYPE (base_binfo);
2983 num = classify_argument (TYPE_MODE (type),
2985 (offset + bit_offset) % 256);
2988 for (i = 0; i < num; i++)
2990 int pos = (offset + (bit_offset % 64)) / 8 / 8;
2992 merge_classes (subclasses[i], classes[i + pos]);
2996 /* And now merge the fields of structure. */
2997 for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
2999 if (TREE_CODE (field) == FIELD_DECL)
3003 if (TREE_TYPE (field) == error_mark_node)
3006 /* Bitfields are always classified as integer. Handle them
3007 early, since later code would consider them to be
3008 misaligned integers. */
3009 if (DECL_BIT_FIELD (field))
3011 for (i = (int_bit_position (field) + (bit_offset % 64)) / 8 / 8;
3012 i < ((int_bit_position (field) + (bit_offset % 64))
3013 + tree_low_cst (DECL_SIZE (field), 0)
3016 merge_classes (X86_64_INTEGER_CLASS,
3021 num = classify_argument (TYPE_MODE (TREE_TYPE (field)),
3022 TREE_TYPE (field), subclasses,
3023 (int_bit_position (field)
3024 + bit_offset) % 256);
3027 for (i = 0; i < num; i++)
3030 (int_bit_position (field) + (bit_offset % 64)) / 8 / 8;
3032 merge_classes (subclasses[i], classes[i + pos]);
3040 /* Arrays are handled as small records. */
3043 num = classify_argument (TYPE_MODE (TREE_TYPE (type)),
3044 TREE_TYPE (type), subclasses, bit_offset);
3048 /* The partial classes are now full classes. */
3049 if (subclasses[0] == X86_64_SSESF_CLASS && bytes != 4)
3050 subclasses[0] = X86_64_SSE_CLASS;
3051 if (subclasses[0] == X86_64_INTEGERSI_CLASS && bytes != 4)
3052 subclasses[0] = X86_64_INTEGER_CLASS;
3054 for (i = 0; i < words; i++)
3055 classes[i] = subclasses[i % num];
3060 case QUAL_UNION_TYPE:
3061 /* Unions are similar to RECORD_TYPE but offset is always 0.
3064 /* Unions are not derived. */
3065 gcc_assert (!TYPE_BINFO (type)
3066 || !BINFO_N_BASE_BINFOS (TYPE_BINFO (type)));
3067 for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
3069 if (TREE_CODE (field) == FIELD_DECL)
3073 if (TREE_TYPE (field) == error_mark_node)
3076 num = classify_argument (TYPE_MODE (TREE_TYPE (field)),
3077 TREE_TYPE (field), subclasses,
3081 for (i = 0; i < num; i++)
3082 classes[i] = merge_classes (subclasses[i], classes[i]);
3091 /* Final merger cleanup. */
3092 for (i = 0; i < words; i++)
3094 /* If one class is MEMORY, everything should be passed in
3096 if (classes[i] == X86_64_MEMORY_CLASS)
3099 /* The X86_64_SSEUP_CLASS should be always preceded by
3100 X86_64_SSE_CLASS. */
3101 if (classes[i] == X86_64_SSEUP_CLASS
3102 && (i == 0 || classes[i - 1] != X86_64_SSE_CLASS))
3103 classes[i] = X86_64_SSE_CLASS;
3105 /* X86_64_X87UP_CLASS should be preceded by X86_64_X87_CLASS. */
3106 if (classes[i] == X86_64_X87UP_CLASS
3107 && (i == 0 || classes[i - 1] != X86_64_X87_CLASS))
3108 classes[i] = X86_64_SSE_CLASS;
3113 /* Compute alignment needed. We align all types to natural boundaries with
3114 exception of XFmode that is aligned to 64bits. */
3115 if (mode != VOIDmode && mode != BLKmode)
3117 int mode_alignment = GET_MODE_BITSIZE (mode);
3120 mode_alignment = 128;
3121 else if (mode == XCmode)
3122 mode_alignment = 256;
3123 if (COMPLEX_MODE_P (mode))
3124 mode_alignment /= 2;
3125 /* Misaligned fields are always returned in memory. */
3126 if (bit_offset % mode_alignment)
3130 /* for V1xx modes, just use the base mode */
3131 if (VECTOR_MODE_P (mode)
3132 && GET_MODE_SIZE (GET_MODE_INNER (mode)) == bytes)
3133 mode = GET_MODE_INNER (mode);
3135 /* Classification of atomic types. */
3140 classes[0] = X86_64_SSE_CLASS;
3143 classes[0] = X86_64_SSE_CLASS;
3144 classes[1] = X86_64_SSEUP_CLASS;
3153 if (bit_offset + GET_MODE_BITSIZE (mode) <= 32)
3154 classes[0] = X86_64_INTEGERSI_CLASS;
3156 classes[0] = X86_64_INTEGER_CLASS;
3160 classes[0] = classes[1] = X86_64_INTEGER_CLASS;
3165 if (!(bit_offset % 64))
3166 classes[0] = X86_64_SSESF_CLASS;
3168 classes[0] = X86_64_SSE_CLASS;
3171 classes[0] = X86_64_SSEDF_CLASS;
3174 classes[0] = X86_64_X87_CLASS;
3175 classes[1] = X86_64_X87UP_CLASS;
3178 classes[0] = X86_64_SSE_CLASS;
3179 classes[1] = X86_64_SSEUP_CLASS;
3182 classes[0] = X86_64_SSE_CLASS;
3185 classes[0] = X86_64_SSEDF_CLASS;
3186 classes[1] = X86_64_SSEDF_CLASS;
3189 classes[0] = X86_64_COMPLEX_X87_CLASS;
3192 /* This modes is larger than 16 bytes. */
3200 classes[0] = X86_64_SSE_CLASS;
3201 classes[1] = X86_64_SSEUP_CLASS;
3207 classes[0] = X86_64_SSE_CLASS;
3213 gcc_assert (VECTOR_MODE_P (mode));
3218 gcc_assert (GET_MODE_CLASS (GET_MODE_INNER (mode)) == MODE_INT);
3220 if (bit_offset + GET_MODE_BITSIZE (mode) <= 32)
3221 classes[0] = X86_64_INTEGERSI_CLASS;
3223 classes[0] = X86_64_INTEGER_CLASS;
3224 classes[1] = X86_64_INTEGER_CLASS;
3225 return 1 + (bytes > 8);
3229 /* Examine the argument and return set number of register required in each
3230 class. Return 0 iff parameter should be passed in memory. */
3232 examine_argument (enum machine_mode mode, tree type, int in_return,
3233 int *int_nregs, int *sse_nregs)
3235 enum x86_64_reg_class class[MAX_CLASSES];
3236 int n = classify_argument (mode, type, class, 0);
3242 for (n--; n >= 0; n--)
3245 case X86_64_INTEGER_CLASS:
3246 case X86_64_INTEGERSI_CLASS:
3249 case X86_64_SSE_CLASS:
3250 case X86_64_SSESF_CLASS:
3251 case X86_64_SSEDF_CLASS:
3254 case X86_64_NO_CLASS:
3255 case X86_64_SSEUP_CLASS:
3257 case X86_64_X87_CLASS:
3258 case X86_64_X87UP_CLASS:
3262 case X86_64_COMPLEX_X87_CLASS:
3263 return in_return ? 2 : 0;
3264 case X86_64_MEMORY_CLASS:
3270 /* Construct container for the argument used by GCC interface. See
3271 FUNCTION_ARG for the detailed description. */
3274 construct_container (enum machine_mode mode, enum machine_mode orig_mode,
3275 tree type, int in_return, int nintregs, int nsseregs,
3276 const int *intreg, int sse_regno)
3278 /* The following variables hold the static issued_error state. */
3279 static bool issued_sse_arg_error;
3280 static bool issued_sse_ret_error;
3281 static bool issued_x87_ret_error;
3283 enum machine_mode tmpmode;
3285 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
3286 enum x86_64_reg_class class[MAX_CLASSES];
3290 int needed_sseregs, needed_intregs;
3291 rtx exp[MAX_CLASSES];
3294 n = classify_argument (mode, type, class, 0);
3295 if (TARGET_DEBUG_ARG)
3298 fprintf (stderr, "Memory class\n");
3301 fprintf (stderr, "Classes:");
3302 for (i = 0; i < n; i++)
3304 fprintf (stderr, " %s", x86_64_reg_class_name[class[i]]);
3306 fprintf (stderr, "\n");
3311 if (!examine_argument (mode, type, in_return, &needed_intregs,
3314 if (needed_intregs > nintregs || needed_sseregs > nsseregs)
3317 /* We allowed the user to turn off SSE for kernel mode. Don't crash if
3318 some less clueful developer tries to use floating-point anyway. */
3319 if (needed_sseregs && !TARGET_SSE)
3323 if (!issued_sse_ret_error)
3325 error ("SSE register return with SSE disabled");
3326 issued_sse_ret_error = true;
3329 else if (!issued_sse_arg_error)
3331 error ("SSE register argument with SSE disabled");
3332 issued_sse_arg_error = true;
3337 /* Likewise, error if the ABI requires us to return values in the
3338 x87 registers and the user specified -mno-80387. */
3339 if (!TARGET_80387 && in_return)
3340 for (i = 0; i < n; i++)
3341 if (class[i] == X86_64_X87_CLASS
3342 || class[i] == X86_64_X87UP_CLASS
3343 || class[i] == X86_64_COMPLEX_X87_CLASS)
3345 if (!issued_x87_ret_error)
3347 error ("x87 register return with x87 disabled");
3348 issued_x87_ret_error = true;
3353 /* First construct simple cases. Avoid SCmode, since we want to use
3354 single register to pass this type. */
3355 if (n == 1 && mode != SCmode)
3358 case X86_64_INTEGER_CLASS:
3359 case X86_64_INTEGERSI_CLASS:
3360 return gen_rtx_REG (mode, intreg[0]);
3361 case X86_64_SSE_CLASS:
3362 case X86_64_SSESF_CLASS:
3363 case X86_64_SSEDF_CLASS:
3364 return gen_reg_or_parallel (mode, orig_mode, SSE_REGNO (sse_regno));
3365 case X86_64_X87_CLASS:
3366 case X86_64_COMPLEX_X87_CLASS:
3367 return gen_rtx_REG (mode, FIRST_STACK_REG);
3368 case X86_64_NO_CLASS:
3369 /* Zero sized array, struct or class. */
3374 if (n == 2 && class[0] == X86_64_SSE_CLASS && class[1] == X86_64_SSEUP_CLASS
3376 return gen_rtx_REG (mode, SSE_REGNO (sse_regno));
3378 && class[0] == X86_64_X87_CLASS && class[1] == X86_64_X87UP_CLASS)
3379 return gen_rtx_REG (XFmode, FIRST_STACK_REG);
3380 if (n == 2 && class[0] == X86_64_INTEGER_CLASS
3381 && class[1] == X86_64_INTEGER_CLASS
3382 && (mode == CDImode || mode == TImode || mode == TFmode)
3383 && intreg[0] + 1 == intreg[1])
3384 return gen_rtx_REG (mode, intreg[0]);
3386 /* Otherwise figure out the entries of the PARALLEL. */
3387 for (i = 0; i < n; i++)
3391 case X86_64_NO_CLASS:
3393 case X86_64_INTEGER_CLASS:
3394 case X86_64_INTEGERSI_CLASS:
3395 /* Merge TImodes on aligned occasions here too. */
3396 if (i * 8 + 8 > bytes)
3397 tmpmode = mode_for_size ((bytes - i * 8) * BITS_PER_UNIT, MODE_INT, 0);
3398 else if (class[i] == X86_64_INTEGERSI_CLASS)
3402 /* We've requested 24 bytes we don't have mode for. Use DImode. */
3403 if (tmpmode == BLKmode)
3405 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
3406 gen_rtx_REG (tmpmode, *intreg),
3410 case X86_64_SSESF_CLASS:
3411 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
3412 gen_rtx_REG (SFmode,
3413 SSE_REGNO (sse_regno)),
3417 case X86_64_SSEDF_CLASS:
3418 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
3419 gen_rtx_REG (DFmode,
3420 SSE_REGNO (sse_regno)),
3424 case X86_64_SSE_CLASS:
3425 if (i < n - 1 && class[i + 1] == X86_64_SSEUP_CLASS)
3429 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
3430 gen_rtx_REG (tmpmode,
3431 SSE_REGNO (sse_regno)),
3433 if (tmpmode == TImode)
3442 /* Empty aligned struct, union or class. */
3446 ret = gen_rtx_PARALLEL (mode, rtvec_alloc (nexps));
3447 for (i = 0; i < nexps; i++)
3448 XVECEXP (ret, 0, i) = exp [i];
3452 /* Update the data in CUM to advance over an argument
3453 of mode MODE and data type TYPE.
3454 (TYPE is null for libcalls where that information may not be available.) */
3457 function_arg_advance (CUMULATIVE_ARGS *cum, enum machine_mode mode,
3458 tree type, int named)
3461 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
3462 int words = (bytes + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
3465 mode = type_natural_mode (type);
3467 if (TARGET_DEBUG_ARG)
3468 fprintf (stderr, "function_adv (sz=%d, wds=%2d, nregs=%d, ssenregs=%d, "
3469 "mode=%s, named=%d)\n\n",
3470 words, cum->words, cum->nregs, cum->sse_nregs,
3471 GET_MODE_NAME (mode), named);
3475 int int_nregs, sse_nregs;
3476 if (!examine_argument (mode, type, 0, &int_nregs, &sse_nregs))
3477 cum->words += words;
3478 else if (sse_nregs <= cum->sse_nregs && int_nregs <= cum->nregs)
3480 cum->nregs -= int_nregs;
3481 cum->sse_nregs -= sse_nregs;
3482 cum->regno += int_nregs;
3483 cum->sse_regno += sse_nregs;
3486 cum->words += words;
3504 cum->words += words;
3505 cum->nregs -= words;
3506 cum->regno += words;
3508 if (cum->nregs <= 0)
3516 if (cum->float_in_sse < 2)
3519 if (cum->float_in_sse < 1)
3530 if (!type || !AGGREGATE_TYPE_P (type))
3532 cum->sse_words += words;
3533 cum->sse_nregs -= 1;
3534 cum->sse_regno += 1;
3535 if (cum->sse_nregs <= 0)
3547 if (!type || !AGGREGATE_TYPE_P (type))
3549 cum->mmx_words += words;
3550 cum->mmx_nregs -= 1;
3551 cum->mmx_regno += 1;
3552 if (cum->mmx_nregs <= 0)
3563 /* Define where to put the arguments to a function.
3564 Value is zero to push the argument on the stack,
3565 or a hard register in which to store the argument.
3567 MODE is the argument's machine mode.
3568 TYPE is the data type of the argument (as a tree).
3569 This is null for libcalls where that information may
3571 CUM is a variable of type CUMULATIVE_ARGS which gives info about
3572 the preceding args and about the function being called.
3573 NAMED is nonzero if this argument is a named parameter
3574 (otherwise it is an extra parameter matching an ellipsis). */
3577 function_arg (CUMULATIVE_ARGS *cum, enum machine_mode orig_mode,
3578 tree type, int named)
3580 enum machine_mode mode = orig_mode;
3583 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
3584 int words = (bytes + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
3585 static bool warnedsse, warnedmmx;
3587 /* To simplify the code below, represent vector types with a vector mode
3588 even if MMX/SSE are not active. */
3589 if (type && TREE_CODE (type) == VECTOR_TYPE)
3590 mode = type_natural_mode (type);
3592 /* Handle a hidden AL argument containing number of registers for varargs
3593 x86-64 functions. For i386 ABI just return constm1_rtx to avoid
3595 if (mode == VOIDmode)
3598 return GEN_INT (cum->maybe_vaarg
3599 ? (cum->sse_nregs < 0
3607 ret = construct_container (mode, orig_mode, type, 0, cum->nregs,
3609 &x86_64_int_parameter_registers [cum->regno],
3614 /* For now, pass fp/complex values on the stack. */
3626 if (words <= cum->nregs)
3628 int regno = cum->regno;
3630 /* Fastcall allocates the first two DWORD (SImode) or
3631 smaller arguments to ECX and EDX. */
3634 if (mode == BLKmode || mode == DImode)
3637 /* ECX not EAX is the first allocated register. */
3641 ret = gen_rtx_REG (mode, regno);
3645 if (cum->float_in_sse < 2)
3648 if (cum->float_in_sse < 1)
3658 if (!type || !AGGREGATE_TYPE_P (type))
3660 if (!TARGET_SSE && !warnedsse && cum->warn_sse)
3663 warning (0, "SSE vector argument without SSE enabled "
3667 ret = gen_reg_or_parallel (mode, orig_mode,
3668 cum->sse_regno + FIRST_SSE_REG);
3675 if (!type || !AGGREGATE_TYPE_P (type))
3677 if (!TARGET_MMX && !warnedmmx && cum->warn_mmx)
3680 warning (0, "MMX vector argument without MMX enabled "
3684 ret = gen_reg_or_parallel (mode, orig_mode,
3685 cum->mmx_regno + FIRST_MMX_REG);
3690 if (TARGET_DEBUG_ARG)
3693 "function_arg (size=%d, wds=%2d, nregs=%d, mode=%4s, named=%d, ",
3694 words, cum->words, cum->nregs, GET_MODE_NAME (mode), named);
3697 print_simple_rtl (stderr, ret);
3699 fprintf (stderr, ", stack");
3701 fprintf (stderr, " )\n");
3707 /* A C expression that indicates when an argument must be passed by
3708 reference. If nonzero for an argument, a copy of that argument is
3709 made in memory and a pointer to the argument is passed instead of
3710 the argument itself. The pointer is passed in whatever way is
3711 appropriate for passing a pointer to that type. */
3714 ix86_pass_by_reference (CUMULATIVE_ARGS *cum ATTRIBUTE_UNUSED,
3715 enum machine_mode mode ATTRIBUTE_UNUSED,
3716 tree type, bool named ATTRIBUTE_UNUSED)
3721 if (type && int_size_in_bytes (type) == -1)
3723 if (TARGET_DEBUG_ARG)
3724 fprintf (stderr, "function_arg_pass_by_reference\n");
3731 /* Return true when TYPE should be 128bit aligned for 32bit argument passing
3732 ABI. Only called if TARGET_SSE. */
3734 contains_128bit_aligned_vector_p (tree type)
3736 enum machine_mode mode = TYPE_MODE (type);
3737 if (SSE_REG_MODE_P (mode)
3738 && (!TYPE_USER_ALIGN (type) || TYPE_ALIGN (type) > 128))
3740 if (TYPE_ALIGN (type) < 128)
3743 if (AGGREGATE_TYPE_P (type))
3745 /* Walk the aggregates recursively. */
3746 switch (TREE_CODE (type))
3750 case QUAL_UNION_TYPE:
3754 if (TYPE_BINFO (type))
3756 tree binfo, base_binfo;
3759 for (binfo = TYPE_BINFO (type), i = 0;
3760 BINFO_BASE_ITERATE (binfo, i, base_binfo); i++)
3761 if (contains_128bit_aligned_vector_p
3762 (BINFO_TYPE (base_binfo)))
3765 /* And now merge the fields of structure. */
3766 for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
3768 if (TREE_CODE (field) == FIELD_DECL
3769 && contains_128bit_aligned_vector_p (TREE_TYPE (field)))
3776 /* Just for use if some languages passes arrays by value. */
3777 if (contains_128bit_aligned_vector_p (TREE_TYPE (type)))
3788 /* Gives the alignment boundary, in bits, of an argument with the
3789 specified mode and type. */
3792 ix86_function_arg_boundary (enum machine_mode mode, tree type)
3796 align = TYPE_ALIGN (type);
3798 align = GET_MODE_ALIGNMENT (mode);
3799 if (align < PARM_BOUNDARY)
3800 align = PARM_BOUNDARY;
3803 /* i386 ABI defines all arguments to be 4 byte aligned. We have to
3804 make an exception for SSE modes since these require 128bit
3807 The handling here differs from field_alignment. ICC aligns MMX
3808 arguments to 4 byte boundaries, while structure fields are aligned
3809 to 8 byte boundaries. */
3811 align = PARM_BOUNDARY;
3814 if (!SSE_REG_MODE_P (mode))
3815 align = PARM_BOUNDARY;
3819 if (!contains_128bit_aligned_vector_p (type))
3820 align = PARM_BOUNDARY;
3828 /* Return true if N is a possible register number of function value. */
3830 ix86_function_value_regno_p (int regno)
3836 return ((regno) == 0
3837 || ((regno) == FIRST_FLOAT_REG && TARGET_FLOAT_RETURNS_IN_80387)
3838 || ((regno) == FIRST_SSE_REG && TARGET_SSE));
3840 return ((regno) == 0 || (regno) == FIRST_FLOAT_REG
3841 || ((regno) == FIRST_SSE_REG && TARGET_SSE)
3842 || ((regno) == FIRST_FLOAT_REG && TARGET_FLOAT_RETURNS_IN_80387));
3847 || (regno == FIRST_FLOAT_REG && TARGET_FLOAT_RETURNS_IN_80387)
3848 || (regno == FIRST_SSE_REG && TARGET_SSE))
3852 && (regno == FIRST_MMX_REG && TARGET_MMX))
3859 /* Define how to find the value returned by a function.
3860 VALTYPE is the data type of the value (as a tree).
3861 If the precise function being called is known, FUNC is its FUNCTION_DECL;
3862 otherwise, FUNC is 0. */
3864 ix86_function_value (tree valtype, tree fntype_or_decl,
3865 bool outgoing ATTRIBUTE_UNUSED)
3867 enum machine_mode natmode = type_natural_mode (valtype);
3871 rtx ret = construct_container (natmode, TYPE_MODE (valtype), valtype,
3872 1, REGPARM_MAX, SSE_REGPARM_MAX,
3873 x86_64_int_return_registers, 0);
3874 /* For zero sized structures, construct_container return NULL, but we
3875 need to keep rest of compiler happy by returning meaningful value. */
3877 ret = gen_rtx_REG (TYPE_MODE (valtype), 0);
3882 tree fn = NULL_TREE, fntype;
3884 && DECL_P (fntype_or_decl))
3885 fn = fntype_or_decl;
3886 fntype = fn ? TREE_TYPE (fn) : fntype_or_decl;
3887 return gen_rtx_REG (TYPE_MODE (valtype),
3888 ix86_value_regno (natmode, fn, fntype));
3892 /* Return true iff type is returned in memory. */
3894 ix86_return_in_memory (tree type)
3896 int needed_intregs, needed_sseregs, size;
3897 enum machine_mode mode = type_natural_mode (type);
3900 return !examine_argument (mode, type, 1, &needed_intregs, &needed_sseregs);
3902 if (mode == BLKmode)
3905 size = int_size_in_bytes (type);
3907 if (MS_AGGREGATE_RETURN && AGGREGATE_TYPE_P (type) && size <= 8)
3910 if (VECTOR_MODE_P (mode) || mode == TImode)
3912 /* User-created vectors small enough to fit in EAX. */
3916 /* MMX/3dNow values are returned in MM0,
3917 except when it doesn't exits. */
3919 return (TARGET_MMX ? 0 : 1);
3921 /* SSE values are returned in XMM0, except when it doesn't exist. */
3923 return (TARGET_SSE ? 0 : 1);
3937 /* When returning SSE vector types, we have a choice of either
3938 (1) being abi incompatible with a -march switch, or
3939 (2) generating an error.
3940 Given no good solution, I think the safest thing is one warning.
3941 The user won't be able to use -Werror, but....
3943 Choose the STRUCT_VALUE_RTX hook because that's (at present) only
3944 called in response to actually generating a caller or callee that
3945 uses such a type. As opposed to RETURN_IN_MEMORY, which is called
3946 via aggregate_value_p for general type probing from tree-ssa. */
3949 ix86_struct_value_rtx (tree type, int incoming ATTRIBUTE_UNUSED)
3951 static bool warnedsse, warnedmmx;
3955 /* Look at the return type of the function, not the function type. */
3956 enum machine_mode mode = TYPE_MODE (TREE_TYPE (type));
3958 if (!TARGET_SSE && !warnedsse)
3961 || (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 16))
3964 warning (0, "SSE vector return without SSE enabled "
3969 if (!TARGET_MMX && !warnedmmx)
3971 if (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 8)
3974 warning (0, "MMX vector return without MMX enabled "
3983 /* Define how to find the value returned by a library function
3984 assuming the value has mode MODE. */
3986 ix86_libcall_value (enum machine_mode mode)
4000 return gen_rtx_REG (mode, FIRST_SSE_REG);
4003 return gen_rtx_REG (mode, FIRST_FLOAT_REG);
4007 return gen_rtx_REG (mode, 0);
4011 return gen_rtx_REG (mode, ix86_value_regno (mode, NULL, NULL));
4014 /* Given a mode, return the register to use for a return value. */
4017 ix86_value_regno (enum machine_mode mode, tree func, tree fntype)
4019 gcc_assert (!TARGET_64BIT);
4021 /* 8-byte vector modes in %mm0. See ix86_return_in_memory for where
4022 we normally prevent this case when mmx is not available. However
4023 some ABIs may require the result to be returned like DImode. */
4024 if (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 8)
4025 return TARGET_MMX ? FIRST_MMX_REG : 0;
4027 /* 16-byte vector modes in %xmm0. See ix86_return_in_memory for where
4028 we prevent this case when sse is not available. However some ABIs
4029 may require the result to be returned like integer TImode. */
4030 if (mode == TImode || (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 16))
4031 return TARGET_SSE ? FIRST_SSE_REG : 0;
4033 /* Decimal floating point values can go in %eax, unlike other float modes. */
4034 if (DECIMAL_FLOAT_MODE_P (mode))
4037 /* Most things go in %eax, except (unless -mno-fp-ret-in-387) fp values. */
4038 if (!SCALAR_FLOAT_MODE_P (mode) || !TARGET_FLOAT_RETURNS_IN_80387)
4041 /* Floating point return values in %st(0), except for local functions when
4042 SSE math is enabled or for functions with sseregparm attribute. */
4043 if ((func || fntype)
4044 && (mode == SFmode || mode == DFmode))
4046 int sse_level = ix86_function_sseregparm (fntype, func);
4047 if ((sse_level >= 1 && mode == SFmode)
4048 || (sse_level == 2 && mode == DFmode))
4049 return FIRST_SSE_REG;
4052 return FIRST_FLOAT_REG;
4055 /* Create the va_list data type. */
4058 ix86_build_builtin_va_list (void)
4060 tree f_gpr, f_fpr, f_ovf, f_sav, record, type_decl;
4062 /* For i386 we use plain pointer to argument area. */
4064 return build_pointer_type (char_type_node);
4066 record = (*lang_hooks.types.make_type) (RECORD_TYPE);
4067 type_decl = build_decl (TYPE_DECL, get_identifier ("__va_list_tag"), record);
4069 f_gpr = build_decl (FIELD_DECL, get_identifier ("gp_offset"),
4070 unsigned_type_node);
4071 f_fpr = build_decl (FIELD_DECL, get_identifier ("fp_offset"),
4072 unsigned_type_node);
4073 f_ovf = build_decl (FIELD_DECL, get_identifier ("overflow_arg_area"),
4075 f_sav = build_decl (FIELD_DECL, get_identifier ("reg_save_area"),
4078 va_list_gpr_counter_field = f_gpr;
4079 va_list_fpr_counter_field = f_fpr;
4081 DECL_FIELD_CONTEXT (f_gpr) = record;
4082 DECL_FIELD_CONTEXT (f_fpr) = record;
4083 DECL_FIELD_CONTEXT (f_ovf) = record;
4084 DECL_FIELD_CONTEXT (f_sav) = record;
4086 TREE_CHAIN (record) = type_decl;
4087 TYPE_NAME (record) = type_decl;
4088 TYPE_FIELDS (record) = f_gpr;
4089 TREE_CHAIN (f_gpr) = f_fpr;
4090 TREE_CHAIN (f_fpr) = f_ovf;
4091 TREE_CHAIN (f_ovf) = f_sav;
4093 layout_type (record);
4095 /* The correct type is an array type of one element. */
4096 return build_array_type (record, build_index_type (size_zero_node));
4099 /* Worker function for TARGET_SETUP_INCOMING_VARARGS. */
4102 ix86_setup_incoming_varargs (CUMULATIVE_ARGS *cum, enum machine_mode mode,
4103 tree type, int *pretend_size ATTRIBUTE_UNUSED,
4106 CUMULATIVE_ARGS next_cum;
4107 rtx save_area = NULL_RTX, mem;
4120 if (! cfun->va_list_gpr_size && ! cfun->va_list_fpr_size)
4123 /* Indicate to allocate space on the stack for varargs save area. */
4124 ix86_save_varrargs_registers = 1;
4126 cfun->stack_alignment_needed = 128;
4128 fntype = TREE_TYPE (current_function_decl);
4129 stdarg_p = (TYPE_ARG_TYPES (fntype) != 0
4130 && (TREE_VALUE (tree_last (TYPE_ARG_TYPES (fntype)))
4131 != void_type_node));
4133 /* For varargs, we do not want to skip the dummy va_dcl argument.
4134 For stdargs, we do want to skip the last named argument. */
4137 function_arg_advance (&next_cum, mode, type, 1);
4140 save_area = frame_pointer_rtx;
4142 set = get_varargs_alias_set ();
4144 for (i = next_cum.regno;
4146 && i < next_cum.regno + cfun->va_list_gpr_size / UNITS_PER_WORD;
4149 mem = gen_rtx_MEM (Pmode,
4150 plus_constant (save_area, i * UNITS_PER_WORD));
4151 MEM_NOTRAP_P (mem) = 1;
4152 set_mem_alias_set (mem, set);
4153 emit_move_insn (mem, gen_rtx_REG (Pmode,
4154 x86_64_int_parameter_registers[i]));
4157 if (next_cum.sse_nregs && cfun->va_list_fpr_size)
4159 /* Now emit code to save SSE registers. The AX parameter contains number
4160 of SSE parameter registers used to call this function. We use
4161 sse_prologue_save insn template that produces computed jump across
4162 SSE saves. We need some preparation work to get this working. */
4164 label = gen_label_rtx ();
4165 label_ref = gen_rtx_LABEL_REF (Pmode, label);
4167 /* Compute address to jump to :
4168 label - 5*eax + nnamed_sse_arguments*5 */
4169 tmp_reg = gen_reg_rtx (Pmode);
4170 nsse_reg = gen_reg_rtx (Pmode);
4171 emit_insn (gen_zero_extendqidi2 (nsse_reg, gen_rtx_REG (QImode, 0)));
4172 emit_insn (gen_rtx_SET (VOIDmode, tmp_reg,
4173 gen_rtx_MULT (Pmode, nsse_reg,
4175 if (next_cum.sse_regno)
4178 gen_rtx_CONST (DImode,
4179 gen_rtx_PLUS (DImode,
4181 GEN_INT (next_cum.sse_regno * 4))));
4183 emit_move_insn (nsse_reg, label_ref);
4184 emit_insn (gen_subdi3 (nsse_reg, nsse_reg, tmp_reg));
4186 /* Compute address of memory block we save into. We always use pointer
4187 pointing 127 bytes after first byte to store - this is needed to keep
4188 instruction size limited by 4 bytes. */
4189 tmp_reg = gen_reg_rtx (Pmode);
4190 emit_insn (gen_rtx_SET (VOIDmode, tmp_reg,
4191 plus_constant (save_area,
4192 8 * REGPARM_MAX + 127)));
4193 mem = gen_rtx_MEM (BLKmode, plus_constant (tmp_reg, -127));
4194 MEM_NOTRAP_P (mem) = 1;
4195 set_mem_alias_set (mem, set);
4196 set_mem_align (mem, BITS_PER_WORD);
4198 /* And finally do the dirty job! */
4199 emit_insn (gen_sse_prologue_save (mem, nsse_reg,
4200 GEN_INT (next_cum.sse_regno), label));
4205 /* Implement va_start. */
4208 ix86_va_start (tree valist, rtx nextarg)
4210 HOST_WIDE_INT words, n_gpr, n_fpr;
4211 tree f_gpr, f_fpr, f_ovf, f_sav;
4212 tree gpr, fpr, ovf, sav, t;
4215 /* Only 64bit target needs something special. */
4218 std_expand_builtin_va_start (valist, nextarg);
4222 f_gpr = TYPE_FIELDS (TREE_TYPE (va_list_type_node));
4223 f_fpr = TREE_CHAIN (f_gpr);
4224 f_ovf = TREE_CHAIN (f_fpr);
4225 f_sav = TREE_CHAIN (f_ovf);
4227 valist = build1 (INDIRECT_REF, TREE_TYPE (TREE_TYPE (valist)), valist);
4228 gpr = build3 (COMPONENT_REF, TREE_TYPE (f_gpr), valist, f_gpr, NULL_TREE);
4229 fpr = build3 (COMPONENT_REF, TREE_TYPE (f_fpr), valist, f_fpr, NULL_TREE);
4230 ovf = build3 (COMPONENT_REF, TREE_TYPE (f_ovf), valist, f_ovf, NULL_TREE);
4231 sav = build3 (COMPONENT_REF, TREE_TYPE (f_sav), valist, f_sav, NULL_TREE);
4233 /* Count number of gp and fp argument registers used. */
4234 words = current_function_args_info.words;
4235 n_gpr = current_function_args_info.regno;
4236 n_fpr = current_function_args_info.sse_regno;
4238 if (TARGET_DEBUG_ARG)
4239 fprintf (stderr, "va_start: words = %d, n_gpr = %d, n_fpr = %d\n",
4240 (int) words, (int) n_gpr, (int) n_fpr);
4242 if (cfun->va_list_gpr_size)
4244 type = TREE_TYPE (gpr);
4245 t = build2 (MODIFY_EXPR, type, gpr,
4246 build_int_cst (type, n_gpr * 8));
4247 TREE_SIDE_EFFECTS (t) = 1;
4248 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
4251 if (cfun->va_list_fpr_size)
4253 type = TREE_TYPE (fpr);
4254 t = build2 (MODIFY_EXPR, type, fpr,
4255 build_int_cst (type, n_fpr * 16 + 8*REGPARM_MAX));
4256 TREE_SIDE_EFFECTS (t) = 1;
4257 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
4260 /* Find the overflow area. */
4261 type = TREE_TYPE (ovf);
4262 t = make_tree (type, virtual_incoming_args_rtx);
4264 t = build2 (PLUS_EXPR, type, t,
4265 build_int_cst (type, words * UNITS_PER_WORD));
4266 t = build2 (MODIFY_EXPR, type, ovf, t);
4267 TREE_SIDE_EFFECTS (t) = 1;
4268 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
4270 if (cfun->va_list_gpr_size || cfun->va_list_fpr_size)
4272 /* Find the register save area.
4273 Prologue of the function save it right above stack frame. */
4274 type = TREE_TYPE (sav);
4275 t = make_tree (type, frame_pointer_rtx);
4276 t = build2 (MODIFY_EXPR, type, sav, t);
4277 TREE_SIDE_EFFECTS (t) = 1;
4278 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
4282 /* Implement va_arg. */
4285 ix86_gimplify_va_arg (tree valist, tree type, tree *pre_p, tree *post_p)
4287 static const int intreg[6] = { 0, 1, 2, 3, 4, 5 };
4288 tree f_gpr, f_fpr, f_ovf, f_sav;
4289 tree gpr, fpr, ovf, sav, t;
4291 tree lab_false, lab_over = NULL_TREE;
4296 enum machine_mode nat_mode;
4298 /* Only 64bit target needs something special. */
4300 return std_gimplify_va_arg_expr (valist, type, pre_p, post_p);
4302 f_gpr = TYPE_FIELDS (TREE_TYPE (va_list_type_node));
4303 f_fpr = TREE_CHAIN (f_gpr);
4304 f_ovf = TREE_CHAIN (f_fpr);
4305 f_sav = TREE_CHAIN (f_ovf);
4307 valist = build_va_arg_indirect_ref (valist);
4308 gpr = build3 (COMPONENT_REF, TREE_TYPE (f_gpr), valist, f_gpr, NULL_TREE);
4309 fpr = build3 (COMPONENT_REF, TREE_TYPE (f_fpr), valist, f_fpr, NULL_TREE);
4310 ovf = build3 (COMPONENT_REF, TREE_TYPE (f_ovf), valist, f_ovf, NULL_TREE);
4311 sav = build3 (COMPONENT_REF, TREE_TYPE (f_sav), valist, f_sav, NULL_TREE);
4313 indirect_p = pass_by_reference (NULL, TYPE_MODE (type), type, false);
4315 type = build_pointer_type (type);
4316 size = int_size_in_bytes (type);
4317 rsize = (size + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
4319 nat_mode = type_natural_mode (type);
4320 container = construct_container (nat_mode, TYPE_MODE (type), type, 0,
4321 REGPARM_MAX, SSE_REGPARM_MAX, intreg, 0);
4323 /* Pull the value out of the saved registers. */
4325 addr = create_tmp_var (ptr_type_node, "addr");
4326 DECL_POINTER_ALIAS_SET (addr) = get_varargs_alias_set ();
4330 int needed_intregs, needed_sseregs;
4332 tree int_addr, sse_addr;
4334 lab_false = create_artificial_label ();
4335 lab_over = create_artificial_label ();
4337 examine_argument (nat_mode, type, 0, &needed_intregs, &needed_sseregs);
4339 need_temp = (!REG_P (container)
4340 && ((needed_intregs && TYPE_ALIGN (type) > 64)
4341 || TYPE_ALIGN (type) > 128));
4343 /* In case we are passing structure, verify that it is consecutive block
4344 on the register save area. If not we need to do moves. */
4345 if (!need_temp && !REG_P (container))
4347 /* Verify that all registers are strictly consecutive */
4348 if (SSE_REGNO_P (REGNO (XEXP (XVECEXP (container, 0, 0), 0))))
4352 for (i = 0; i < XVECLEN (container, 0) && !need_temp; i++)
4354 rtx slot = XVECEXP (container, 0, i);
4355 if (REGNO (XEXP (slot, 0)) != FIRST_SSE_REG + (unsigned int) i
4356 || INTVAL (XEXP (slot, 1)) != i * 16)
4364 for (i = 0; i < XVECLEN (container, 0) && !need_temp; i++)
4366 rtx slot = XVECEXP (container, 0, i);
4367 if (REGNO (XEXP (slot, 0)) != (unsigned int) i
4368 || INTVAL (XEXP (slot, 1)) != i * 8)
4380 int_addr = create_tmp_var (ptr_type_node, "int_addr");
4381 DECL_POINTER_ALIAS_SET (int_addr) = get_varargs_alias_set ();
4382 sse_addr = create_tmp_var (ptr_type_node, "sse_addr");
4383 DECL_POINTER_ALIAS_SET (sse_addr) = get_varargs_alias_set ();
4386 /* First ensure that we fit completely in registers. */
4389 t = build_int_cst (TREE_TYPE (gpr),
4390 (REGPARM_MAX - needed_intregs + 1) * 8);
4391 t = build2 (GE_EXPR, boolean_type_node, gpr, t);
4392 t2 = build1 (GOTO_EXPR, void_type_node, lab_false);
4393 t = build3 (COND_EXPR, void_type_node, t, t2, NULL_TREE);
4394 gimplify_and_add (t, pre_p);
4398 t = build_int_cst (TREE_TYPE (fpr),
4399 (SSE_REGPARM_MAX - needed_sseregs + 1) * 16
4401 t = build2 (GE_EXPR, boolean_type_node, fpr, t);
4402 t2 = build1 (GOTO_EXPR, void_type_node, lab_false);
4403 t = build3 (COND_EXPR, void_type_node, t, t2, NULL_TREE);
4404 gimplify_and_add (t, pre_p);
4407 /* Compute index to start of area used for integer regs. */
4410 /* int_addr = gpr + sav; */
4411 t = fold_convert (ptr_type_node, gpr);
4412 t = build2 (PLUS_EXPR, ptr_type_node, sav, t);
4413 t = build2 (MODIFY_EXPR, void_type_node, int_addr, t);
4414 gimplify_and_add (t, pre_p);
4418 /* sse_addr = fpr + sav; */
4419 t = fold_convert (ptr_type_node, fpr);
4420 t = build2 (PLUS_EXPR, ptr_type_node, sav, t);
4421 t = build2 (MODIFY_EXPR, void_type_node, sse_addr, t);
4422 gimplify_and_add (t, pre_p);
4427 tree temp = create_tmp_var (type, "va_arg_tmp");
4430 t = build1 (ADDR_EXPR, build_pointer_type (type), temp);
4431 t = build2 (MODIFY_EXPR, void_type_node, addr, t);
4432 gimplify_and_add (t, pre_p);
4434 for (i = 0; i < XVECLEN (container, 0); i++)
4436 rtx slot = XVECEXP (container, 0, i);
4437 rtx reg = XEXP (slot, 0);
4438 enum machine_mode mode = GET_MODE (reg);
4439 tree piece_type = lang_hooks.types.type_for_mode (mode, 1);
4440 tree addr_type = build_pointer_type (piece_type);
4443 tree dest_addr, dest;
4445 if (SSE_REGNO_P (REGNO (reg)))
4447 src_addr = sse_addr;
4448 src_offset = (REGNO (reg) - FIRST_SSE_REG) * 16;
4452 src_addr = int_addr;
4453 src_offset = REGNO (reg) * 8;
4455 src_addr = fold_convert (addr_type, src_addr);
4456 src_addr = fold (build2 (PLUS_EXPR, addr_type, src_addr,
4457 size_int (src_offset)));
4458 src = build_va_arg_indirect_ref (src_addr);
4460 dest_addr = fold_convert (addr_type, addr);
4461 dest_addr = fold (build2 (PLUS_EXPR, addr_type, dest_addr,
4462 size_int (INTVAL (XEXP (slot, 1)))));
4463 dest = build_va_arg_indirect_ref (dest_addr);
4465 t = build2 (MODIFY_EXPR, void_type_node, dest, src);
4466 gimplify_and_add (t, pre_p);
4472 t = build2 (PLUS_EXPR, TREE_TYPE (gpr), gpr,
4473 build_int_cst (TREE_TYPE (gpr), needed_intregs * 8));
4474 t = build2 (MODIFY_EXPR, TREE_TYPE (gpr), gpr, t);
4475 gimplify_and_add (t, pre_p);
4479 t = build2 (PLUS_EXPR, TREE_TYPE (fpr), fpr,
4480 build_int_cst (TREE_TYPE (fpr), needed_sseregs * 16));
4481 t = build2 (MODIFY_EXPR, TREE_TYPE (fpr), fpr, t);
4482 gimplify_and_add (t, pre_p);
4485 t = build1 (GOTO_EXPR, void_type_node, lab_over);
4486 gimplify_and_add (t, pre_p);
4488 t = build1 (LABEL_EXPR, void_type_node, lab_false);
4489 append_to_statement_list (t, pre_p);
4492 /* ... otherwise out of the overflow area. */
4494 /* Care for on-stack alignment if needed. */
4495 if (FUNCTION_ARG_BOUNDARY (VOIDmode, type) <= 64
4496 || integer_zerop (TYPE_SIZE (type)))
4500 HOST_WIDE_INT align = FUNCTION_ARG_BOUNDARY (VOIDmode, type) / 8;
4501 t = build2 (PLUS_EXPR, TREE_TYPE (ovf), ovf,
4502 build_int_cst (TREE_TYPE (ovf), align - 1));
4503 t = build2 (BIT_AND_EXPR, TREE_TYPE (t), t,
4504 build_int_cst (TREE_TYPE (t), -align));
4506 gimplify_expr (&t, pre_p, NULL, is_gimple_val, fb_rvalue);
4508 t2 = build2 (MODIFY_EXPR, void_type_node, addr, t);
4509 gimplify_and_add (t2, pre_p);
4511 t = build2 (PLUS_EXPR, TREE_TYPE (t), t,
4512 build_int_cst (TREE_TYPE (t), rsize * UNITS_PER_WORD));
4513 t = build2 (MODIFY_EXPR, TREE_TYPE (ovf), ovf, t);
4514 gimplify_and_add (t, pre_p);
4518 t = build1 (LABEL_EXPR, void_type_node, lab_over);
4519 append_to_statement_list (t, pre_p);
4522 ptrtype = build_pointer_type (type);
4523 addr = fold_convert (ptrtype, addr);
4526 addr = build_va_arg_indirect_ref (addr);
4527 return build_va_arg_indirect_ref (addr);
4530 /* Return nonzero if OPNUM's MEM should be matched
4531 in movabs* patterns. */
4534 ix86_check_movabs (rtx insn, int opnum)
4538 set = PATTERN (insn);
4539 if (GET_CODE (set) == PARALLEL)
4540 set = XVECEXP (set, 0, 0);
4541 gcc_assert (GET_CODE (set) == SET);
4542 mem = XEXP (set, opnum);
4543 while (GET_CODE (mem) == SUBREG)
4544 mem = SUBREG_REG (mem);
4545 gcc_assert (GET_CODE (mem) == MEM);
4546 return (volatile_ok || !MEM_VOLATILE_P (mem));
4549 /* Initialize the table of extra 80387 mathematical constants. */
4552 init_ext_80387_constants (void)
4554 static const char * cst[5] =
4556 "0.3010299956639811952256464283594894482", /* 0: fldlg2 */
4557 "0.6931471805599453094286904741849753009", /* 1: fldln2 */
4558 "1.4426950408889634073876517827983434472", /* 2: fldl2e */
4559 "3.3219280948873623478083405569094566090", /* 3: fldl2t */
4560 "3.1415926535897932385128089594061862044", /* 4: fldpi */
4564 for (i = 0; i < 5; i++)
4566 real_from_string (&ext_80387_constants_table[i], cst[i]);
4567 /* Ensure each constant is rounded to XFmode precision. */
4568 real_convert (&ext_80387_constants_table[i],
4569 XFmode, &ext_80387_constants_table[i]);
4572 ext_80387_constants_init = 1;
4575 /* Return true if the constant is something that can be loaded with
4576 a special instruction. */
4579 standard_80387_constant_p (rtx x)
4581 if (GET_CODE (x) != CONST_DOUBLE || !FLOAT_MODE_P (GET_MODE (x)))
4584 if (x == CONST0_RTX (GET_MODE (x)))
4586 if (x == CONST1_RTX (GET_MODE (x)))
4589 /* For XFmode constants, try to find a special 80387 instruction when
4590 optimizing for size or on those CPUs that benefit from them. */
4591 if (GET_MODE (x) == XFmode
4592 && (optimize_size || x86_ext_80387_constants & TUNEMASK))
4597 if (! ext_80387_constants_init)
4598 init_ext_80387_constants ();
4600 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
4601 for (i = 0; i < 5; i++)
4602 if (real_identical (&r, &ext_80387_constants_table[i]))
4609 /* Return the opcode of the special instruction to be used to load
4613 standard_80387_constant_opcode (rtx x)
4615 switch (standard_80387_constant_p (x))
4636 /* Return the CONST_DOUBLE representing the 80387 constant that is
4637 loaded by the specified special instruction. The argument IDX
4638 matches the return value from standard_80387_constant_p. */
4641 standard_80387_constant_rtx (int idx)
4645 if (! ext_80387_constants_init)
4646 init_ext_80387_constants ();
4662 return CONST_DOUBLE_FROM_REAL_VALUE (ext_80387_constants_table[i],
4666 /* Return 1 if mode is a valid mode for sse. */
4668 standard_sse_mode_p (enum machine_mode mode)
4685 /* Return 1 if X is FP constant we can load to SSE register w/o using memory.
4688 standard_sse_constant_p (rtx x)
4690 enum machine_mode mode = GET_MODE (x);
4692 if (x == const0_rtx || x == CONST0_RTX (GET_MODE (x)))
4694 if (vector_all_ones_operand (x, mode)
4695 && standard_sse_mode_p (mode))
4696 return TARGET_SSE2 ? 2 : -1;
4701 /* Return the opcode of the special instruction to be used to load
4705 standard_sse_constant_opcode (rtx insn, rtx x)
4707 switch (standard_sse_constant_p (x))
4710 if (get_attr_mode (insn) == MODE_V4SF)
4711 return "xorps\t%0, %0";
4712 else if (get_attr_mode (insn) == MODE_V2DF)
4713 return "xorpd\t%0, %0";
4715 return "pxor\t%0, %0";
4717 return "pcmpeqd\t%0, %0";
4722 /* Returns 1 if OP contains a symbol reference */
4725 symbolic_reference_mentioned_p (rtx op)
4730 if (GET_CODE (op) == SYMBOL_REF || GET_CODE (op) == LABEL_REF)
4733 fmt = GET_RTX_FORMAT (GET_CODE (op));
4734 for (i = GET_RTX_LENGTH (GET_CODE (op)) - 1; i >= 0; i--)
4740 for (j = XVECLEN (op, i) - 1; j >= 0; j--)
4741 if (symbolic_reference_mentioned_p (XVECEXP (op, i, j)))
4745 else if (fmt[i] == 'e' && symbolic_reference_mentioned_p (XEXP (op, i)))
4752 /* Return 1 if it is appropriate to emit `ret' instructions in the
4753 body of a function. Do this only if the epilogue is simple, needing a
4754 couple of insns. Prior to reloading, we can't tell how many registers
4755 must be saved, so return 0 then. Return 0 if there is no frame
4756 marker to de-allocate. */
4759 ix86_can_use_return_insn_p (void)
4761 struct ix86_frame frame;
4763 if (! reload_completed || frame_pointer_needed)
4766 /* Don't allow more than 32 pop, since that's all we can do
4767 with one instruction. */
4768 if (current_function_pops_args
4769 && current_function_args_size >= 32768)
4772 ix86_compute_frame_layout (&frame);
4773 return frame.to_allocate == 0 && frame.nregs == 0;
4776 /* Value should be nonzero if functions must have frame pointers.
4777 Zero means the frame pointer need not be set up (and parms may
4778 be accessed via the stack pointer) in functions that seem suitable. */
4781 ix86_frame_pointer_required (void)
4783 /* If we accessed previous frames, then the generated code expects
4784 to be able to access the saved ebp value in our frame. */
4785 if (cfun->machine->accesses_prev_frame)
4788 /* Several x86 os'es need a frame pointer for other reasons,
4789 usually pertaining to setjmp. */
4790 if (SUBTARGET_FRAME_POINTER_REQUIRED)
4793 /* In override_options, TARGET_OMIT_LEAF_FRAME_POINTER turns off
4794 the frame pointer by default. Turn it back on now if we've not
4795 got a leaf function. */
4796 if (TARGET_OMIT_LEAF_FRAME_POINTER
4797 && (!current_function_is_leaf
4798 || ix86_current_function_calls_tls_descriptor))
4801 if (current_function_profile)
4807 /* Record that the current function accesses previous call frames. */
4810 ix86_setup_frame_addresses (void)
4812 cfun->machine->accesses_prev_frame = 1;
4815 #if (defined(HAVE_GAS_HIDDEN) && (SUPPORTS_ONE_ONLY - 0)) || TARGET_MACHO
4816 # define USE_HIDDEN_LINKONCE 1
4818 # define USE_HIDDEN_LINKONCE 0
4821 static int pic_labels_used;
4823 /* Fills in the label name that should be used for a pc thunk for
4824 the given register. */
4827 get_pc_thunk_name (char name[32], unsigned int regno)
4829 gcc_assert (!TARGET_64BIT);
4831 if (USE_HIDDEN_LINKONCE)
4832 sprintf (name, "__i686.get_pc_thunk.%s", reg_names[regno]);
4834 ASM_GENERATE_INTERNAL_LABEL (name, "LPR", regno);
4838 /* This function generates code for -fpic that loads %ebx with
4839 the return address of the caller and then returns. */
4842 ix86_file_end (void)
4847 for (regno = 0; regno < 8; ++regno)
4851 if (! ((pic_labels_used >> regno) & 1))
4854 get_pc_thunk_name (name, regno);
4859 switch_to_section (darwin_sections[text_coal_section]);
4860 fputs ("\t.weak_definition\t", asm_out_file);
4861 assemble_name (asm_out_file, name);
4862 fputs ("\n\t.private_extern\t", asm_out_file);
4863 assemble_name (asm_out_file, name);
4864 fputs ("\n", asm_out_file);
4865 ASM_OUTPUT_LABEL (asm_out_file, name);
4869 if (USE_HIDDEN_LINKONCE)
4873 decl = build_decl (FUNCTION_DECL, get_identifier (name),
4875 TREE_PUBLIC (decl) = 1;
4876 TREE_STATIC (decl) = 1;
4877 DECL_ONE_ONLY (decl) = 1;
4879 (*targetm.asm_out.unique_section) (decl, 0);
4880 switch_to_section (get_named_section (decl, NULL, 0));
4882 (*targetm.asm_out.globalize_label) (asm_out_file, name);
4883 fputs ("\t.hidden\t", asm_out_file);
4884 assemble_name (asm_out_file, name);
4885 fputc ('\n', asm_out_file);
4886 ASM_DECLARE_FUNCTION_NAME (asm_out_file, name, decl);
4890 switch_to_section (text_section);
4891 ASM_OUTPUT_LABEL (asm_out_file, name);
4894 xops[0] = gen_rtx_REG (SImode, regno);
4895 xops[1] = gen_rtx_MEM (SImode, stack_pointer_rtx);
4896 output_asm_insn ("mov{l}\t{%1, %0|%0, %1}", xops);
4897 output_asm_insn ("ret", xops);
4900 if (NEED_INDICATE_EXEC_STACK)
4901 file_end_indicate_exec_stack ();
4904 /* Emit code for the SET_GOT patterns. */
4907 output_set_got (rtx dest, rtx label ATTRIBUTE_UNUSED)
4912 xops[1] = gen_rtx_SYMBOL_REF (Pmode, GOT_SYMBOL_NAME);
4914 if (! TARGET_DEEP_BRANCH_PREDICTION || !flag_pic)
4916 xops[2] = gen_rtx_LABEL_REF (Pmode, label ? label : gen_label_rtx ());
4919 output_asm_insn ("mov{l}\t{%2, %0|%0, %2}", xops);
4921 output_asm_insn ("call\t%a2", xops);
4924 /* Output the Mach-O "canonical" label name ("Lxx$pb") here too. This
4925 is what will be referenced by the Mach-O PIC subsystem. */
4927 ASM_OUTPUT_LABEL (asm_out_file, machopic_function_base_name ());
4930 (*targetm.asm_out.internal_label) (asm_out_file, "L",
4931 CODE_LABEL_NUMBER (XEXP (xops[2], 0)));
4934 output_asm_insn ("pop{l}\t%0", xops);
4939 get_pc_thunk_name (name, REGNO (dest));
4940 pic_labels_used |= 1 << REGNO (dest);
4942 xops[2] = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (name));
4943 xops[2] = gen_rtx_MEM (QImode, xops[2]);
4944 output_asm_insn ("call\t%X2", xops);
4945 /* Output the Mach-O "canonical" label name ("Lxx$pb") here too. This
4946 is what will be referenced by the Mach-O PIC subsystem. */
4949 ASM_OUTPUT_LABEL (asm_out_file, machopic_function_base_name ());
4951 targetm.asm_out.internal_label (asm_out_file, "L",
4952 CODE_LABEL_NUMBER (label));
4959 if (!flag_pic || TARGET_DEEP_BRANCH_PREDICTION)
4960 output_asm_insn ("add{l}\t{%1, %0|%0, %1}", xops);
4962 output_asm_insn ("add{l}\t{%1+[.-%a2], %0|%0, %1+(.-%a2)}", xops);
4967 /* Generate an "push" pattern for input ARG. */
4972 return gen_rtx_SET (VOIDmode,
4974 gen_rtx_PRE_DEC (Pmode,
4975 stack_pointer_rtx)),
4979 /* Return >= 0 if there is an unused call-clobbered register available
4980 for the entire function. */
4983 ix86_select_alt_pic_regnum (void)
4985 if (current_function_is_leaf && !current_function_profile
4986 && !ix86_current_function_calls_tls_descriptor)
4989 for (i = 2; i >= 0; --i)
4990 if (!regs_ever_live[i])
4994 return INVALID_REGNUM;
4997 /* Return 1 if we need to save REGNO. */
4999 ix86_save_reg (unsigned int regno, int maybe_eh_return)
5001 if (pic_offset_table_rtx
5002 && regno == REAL_PIC_OFFSET_TABLE_REGNUM
5003 && (regs_ever_live[REAL_PIC_OFFSET_TABLE_REGNUM]
5004 || current_function_profile
5005 || current_function_calls_eh_return
5006 || current_function_uses_const_pool))
5008 if (ix86_select_alt_pic_regnum () != INVALID_REGNUM)
5013 if (current_function_calls_eh_return && maybe_eh_return)
5018 unsigned test = EH_RETURN_DATA_REGNO (i);
5019 if (test == INVALID_REGNUM)
5026 if (cfun->machine->force_align_arg_pointer
5027 && regno == REGNO (cfun->machine->force_align_arg_pointer))
5030 return (regs_ever_live[regno]
5031 && !call_used_regs[regno]
5032 && !fixed_regs[regno]
5033 && (regno != HARD_FRAME_POINTER_REGNUM || !frame_pointer_needed));
5036 /* Return number of registers to be saved on the stack. */
5039 ix86_nsaved_regs (void)
5044 for (regno = FIRST_PSEUDO_REGISTER - 1; regno >= 0; regno--)
5045 if (ix86_save_reg (regno, true))
5050 /* Return the offset between two registers, one to be eliminated, and the other
5051 its replacement, at the start of a routine. */
5054 ix86_initial_elimination_offset (int from, int to)
5056 struct ix86_frame frame;
5057 ix86_compute_frame_layout (&frame);
5059 if (from == ARG_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
5060 return frame.hard_frame_pointer_offset;
5061 else if (from == FRAME_POINTER_REGNUM
5062 && to == HARD_FRAME_POINTER_REGNUM)
5063 return frame.hard_frame_pointer_offset - frame.frame_pointer_offset;
5066 gcc_assert (to == STACK_POINTER_REGNUM);
5068 if (from == ARG_POINTER_REGNUM)
5069 return frame.stack_pointer_offset;
5071 gcc_assert (from == FRAME_POINTER_REGNUM);
5072 return frame.stack_pointer_offset - frame.frame_pointer_offset;
5076 /* Fill structure ix86_frame about frame of currently computed function. */
5079 ix86_compute_frame_layout (struct ix86_frame *frame)
5081 HOST_WIDE_INT total_size;
5082 unsigned int stack_alignment_needed;
5083 HOST_WIDE_INT offset;
5084 unsigned int preferred_alignment;
5085 HOST_WIDE_INT size = get_frame_size ();
5087 frame->nregs = ix86_nsaved_regs ();
5090 stack_alignment_needed = cfun->stack_alignment_needed / BITS_PER_UNIT;
5091 preferred_alignment = cfun->preferred_stack_boundary / BITS_PER_UNIT;
5093 /* During reload iteration the amount of registers saved can change.
5094 Recompute the value as needed. Do not recompute when amount of registers
5095 didn't change as reload does multiple calls to the function and does not
5096 expect the decision to change within single iteration. */
5098 && cfun->machine->use_fast_prologue_epilogue_nregs != frame->nregs)
5100 int count = frame->nregs;
5102 cfun->machine->use_fast_prologue_epilogue_nregs = count;
5103 /* The fast prologue uses move instead of push to save registers. This
5104 is significantly longer, but also executes faster as modern hardware
5105 can execute the moves in parallel, but can't do that for push/pop.
5107 Be careful about choosing what prologue to emit: When function takes
5108 many instructions to execute we may use slow version as well as in
5109 case function is known to be outside hot spot (this is known with
5110 feedback only). Weight the size of function by number of registers
5111 to save as it is cheap to use one or two push instructions but very
5112 slow to use many of them. */
5114 count = (count - 1) * FAST_PROLOGUE_INSN_COUNT;
5115 if (cfun->function_frequency < FUNCTION_FREQUENCY_NORMAL
5116 || (flag_branch_probabilities
5117 && cfun->function_frequency < FUNCTION_FREQUENCY_HOT))
5118 cfun->machine->use_fast_prologue_epilogue = false;
5120 cfun->machine->use_fast_prologue_epilogue
5121 = !expensive_function_p (count);
5123 if (TARGET_PROLOGUE_USING_MOVE
5124 && cfun->machine->use_fast_prologue_epilogue)
5125 frame->save_regs_using_mov = true;
5127 frame->save_regs_using_mov = false;
5130 /* Skip return address and saved base pointer. */
5131 offset = frame_pointer_needed ? UNITS_PER_WORD * 2 : UNITS_PER_WORD;
5133 frame->hard_frame_pointer_offset = offset;
5135 /* Do some sanity checking of stack_alignment_needed and
5136 preferred_alignment, since i386 port is the only using those features
5137 that may break easily. */
5139 gcc_assert (!size || stack_alignment_needed);
5140 gcc_assert (preferred_alignment >= STACK_BOUNDARY / BITS_PER_UNIT);
5141 gcc_assert (preferred_alignment <= PREFERRED_STACK_BOUNDARY / BITS_PER_UNIT);
5142 gcc_assert (stack_alignment_needed
5143 <= PREFERRED_STACK_BOUNDARY / BITS_PER_UNIT);
5145 if (stack_alignment_needed < STACK_BOUNDARY / BITS_PER_UNIT)
5146 stack_alignment_needed = STACK_BOUNDARY / BITS_PER_UNIT;
5148 /* Register save area */
5149 offset += frame->nregs * UNITS_PER_WORD;
5152 if (ix86_save_varrargs_registers)
5154 offset += X86_64_VARARGS_SIZE;
5155 frame->va_arg_size = X86_64_VARARGS_SIZE;
5158 frame->va_arg_size = 0;
5160 /* Align start of frame for local function. */
5161 frame->padding1 = ((offset + stack_alignment_needed - 1)
5162 & -stack_alignment_needed) - offset;
5164 offset += frame->padding1;
5166 /* Frame pointer points here. */
5167 frame->frame_pointer_offset = offset;
5171 /* Add outgoing arguments area. Can be skipped if we eliminated
5172 all the function calls as dead code.
5173 Skipping is however impossible when function calls alloca. Alloca
5174 expander assumes that last current_function_outgoing_args_size
5175 of stack frame are unused. */
5176 if (ACCUMULATE_OUTGOING_ARGS
5177 && (!current_function_is_leaf || current_function_calls_alloca
5178 || ix86_current_function_calls_tls_descriptor))
5180 offset += current_function_outgoing_args_size;
5181 frame->outgoing_arguments_size = current_function_outgoing_args_size;
5184 frame->outgoing_arguments_size = 0;
5186 /* Align stack boundary. Only needed if we're calling another function
5188 if (!current_function_is_leaf || current_function_calls_alloca
5189 || ix86_current_function_calls_tls_descriptor)
5190 frame->padding2 = ((offset + preferred_alignment - 1)
5191 & -preferred_alignment) - offset;
5193 frame->padding2 = 0;
5195 offset += frame->padding2;
5197 /* We've reached end of stack frame. */
5198 frame->stack_pointer_offset = offset;
5200 /* Size prologue needs to allocate. */
5201 frame->to_allocate =
5202 (size + frame->padding1 + frame->padding2
5203 + frame->outgoing_arguments_size + frame->va_arg_size);
5205 if ((!frame->to_allocate && frame->nregs <= 1)
5206 || (TARGET_64BIT && frame->to_allocate >= (HOST_WIDE_INT) 0x80000000))
5207 frame->save_regs_using_mov = false;
5209 if (TARGET_RED_ZONE && current_function_sp_is_unchanging
5210 && current_function_is_leaf
5211 && !ix86_current_function_calls_tls_descriptor)
5213 frame->red_zone_size = frame->to_allocate;
5214 if (frame->save_regs_using_mov)
5215 frame->red_zone_size += frame->nregs * UNITS_PER_WORD;
5216 if (frame->red_zone_size > RED_ZONE_SIZE - RED_ZONE_RESERVE)
5217 frame->red_zone_size = RED_ZONE_SIZE - RED_ZONE_RESERVE;
5220 frame->red_zone_size = 0;
5221 frame->to_allocate -= frame->red_zone_size;
5222 frame->stack_pointer_offset -= frame->red_zone_size;
5224 fprintf (stderr, "nregs: %i\n", frame->nregs);
5225 fprintf (stderr, "size: %i\n", size);
5226 fprintf (stderr, "alignment1: %i\n", stack_alignment_needed);
5227 fprintf (stderr, "padding1: %i\n", frame->padding1);
5228 fprintf (stderr, "va_arg: %i\n", frame->va_arg_size);
5229 fprintf (stderr, "padding2: %i\n", frame->padding2);
5230 fprintf (stderr, "to_allocate: %i\n", frame->to_allocate);
5231 fprintf (stderr, "red_zone_size: %i\n", frame->red_zone_size);
5232 fprintf (stderr, "frame_pointer_offset: %i\n", frame->frame_pointer_offset);
5233 fprintf (stderr, "hard_frame_pointer_offset: %i\n",
5234 frame->hard_frame_pointer_offset);
5235 fprintf (stderr, "stack_pointer_offset: %i\n", frame->stack_pointer_offset);
5239 /* Emit code to save registers in the prologue. */
5242 ix86_emit_save_regs (void)
5247 for (regno = FIRST_PSEUDO_REGISTER; regno-- > 0; )
5248 if (ix86_save_reg (regno, true))
5250 insn = emit_insn (gen_push (gen_rtx_REG (Pmode, regno)));
5251 RTX_FRAME_RELATED_P (insn) = 1;
5255 /* Emit code to save registers using MOV insns. First register
5256 is restored from POINTER + OFFSET. */
5258 ix86_emit_save_regs_using_mov (rtx pointer, HOST_WIDE_INT offset)
5263 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
5264 if (ix86_save_reg (regno, true))
5266 insn = emit_move_insn (adjust_address (gen_rtx_MEM (Pmode, pointer),
5268 gen_rtx_REG (Pmode, regno));
5269 RTX_FRAME_RELATED_P (insn) = 1;
5270 offset += UNITS_PER_WORD;
5274 /* Expand prologue or epilogue stack adjustment.
5275 The pattern exist to put a dependency on all ebp-based memory accesses.
5276 STYLE should be negative if instructions should be marked as frame related,
5277 zero if %r11 register is live and cannot be freely used and positive
5281 pro_epilogue_adjust_stack (rtx dest, rtx src, rtx offset, int style)
5286 insn = emit_insn (gen_pro_epilogue_adjust_stack_1 (dest, src, offset));
5287 else if (x86_64_immediate_operand (offset, DImode))
5288 insn = emit_insn (gen_pro_epilogue_adjust_stack_rex64 (dest, src, offset));
5292 /* r11 is used by indirect sibcall return as well, set before the
5293 epilogue and used after the epilogue. ATM indirect sibcall
5294 shouldn't be used together with huge frame sizes in one
5295 function because of the frame_size check in sibcall.c. */
5297 r11 = gen_rtx_REG (DImode, FIRST_REX_INT_REG + 3 /* R11 */);
5298 insn = emit_insn (gen_rtx_SET (DImode, r11, offset));
5300 RTX_FRAME_RELATED_P (insn) = 1;
5301 insn = emit_insn (gen_pro_epilogue_adjust_stack_rex64_2 (dest, src, r11,
5305 RTX_FRAME_RELATED_P (insn) = 1;
5308 /* Handle the TARGET_INTERNAL_ARG_POINTER hook. */
5311 ix86_internal_arg_pointer (void)
5313 bool has_force_align_arg_pointer =
5314 (0 != lookup_attribute (ix86_force_align_arg_pointer_string,
5315 TYPE_ATTRIBUTES (TREE_TYPE (current_function_decl))));
5316 if ((FORCE_PREFERRED_STACK_BOUNDARY_IN_MAIN
5317 && DECL_NAME (current_function_decl)
5318 && MAIN_NAME_P (DECL_NAME (current_function_decl))
5319 && DECL_FILE_SCOPE_P (current_function_decl))
5320 || ix86_force_align_arg_pointer
5321 || has_force_align_arg_pointer)
5323 /* Nested functions can't realign the stack due to a register
5325 if (DECL_CONTEXT (current_function_decl)
5326 && TREE_CODE (DECL_CONTEXT (current_function_decl)) == FUNCTION_DECL)
5328 if (ix86_force_align_arg_pointer)
5329 warning (0, "-mstackrealign ignored for nested functions");
5330 if (has_force_align_arg_pointer)
5331 error ("%s not supported for nested functions",
5332 ix86_force_align_arg_pointer_string);
5333 return virtual_incoming_args_rtx;
5335 cfun->machine->force_align_arg_pointer = gen_rtx_REG (Pmode, 2);
5336 return copy_to_reg (cfun->machine->force_align_arg_pointer);
5339 return virtual_incoming_args_rtx;
5342 /* Handle the TARGET_DWARF_HANDLE_FRAME_UNSPEC hook.
5343 This is called from dwarf2out.c to emit call frame instructions
5344 for frame-related insns containing UNSPECs and UNSPEC_VOLATILEs. */
5346 ix86_dwarf_handle_frame_unspec (const char *label, rtx pattern, int index)
5348 rtx unspec = SET_SRC (pattern);
5349 gcc_assert (GET_CODE (unspec) == UNSPEC);
5353 case UNSPEC_REG_SAVE:
5354 dwarf2out_reg_save_reg (label, XVECEXP (unspec, 0, 0),
5355 SET_DEST (pattern));
5357 case UNSPEC_DEF_CFA:
5358 dwarf2out_def_cfa (label, REGNO (SET_DEST (pattern)),
5359 INTVAL (XVECEXP (unspec, 0, 0)));
5366 /* Expand the prologue into a bunch of separate insns. */
5369 ix86_expand_prologue (void)
5373 struct ix86_frame frame;
5374 HOST_WIDE_INT allocate;
5376 ix86_compute_frame_layout (&frame);
5378 if (cfun->machine->force_align_arg_pointer)
5382 /* Grab the argument pointer. */
5383 x = plus_constant (stack_pointer_rtx, 4);
5384 y = cfun->machine->force_align_arg_pointer;
5385 insn = emit_insn (gen_rtx_SET (VOIDmode, y, x));
5386 RTX_FRAME_RELATED_P (insn) = 1;
5388 /* The unwind info consists of two parts: install the fafp as the cfa,
5389 and record the fafp as the "save register" of the stack pointer.
5390 The later is there in order that the unwinder can see where it
5391 should restore the stack pointer across the and insn. */
5392 x = gen_rtx_UNSPEC (VOIDmode, gen_rtvec (1, const0_rtx), UNSPEC_DEF_CFA);
5393 x = gen_rtx_SET (VOIDmode, y, x);
5394 RTX_FRAME_RELATED_P (x) = 1;
5395 y = gen_rtx_UNSPEC (VOIDmode, gen_rtvec (1, stack_pointer_rtx),
5397 y = gen_rtx_SET (VOIDmode, cfun->machine->force_align_arg_pointer, y);
5398 RTX_FRAME_RELATED_P (y) = 1;
5399 x = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, x, y));
5400 x = gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR, x, NULL);
5401 REG_NOTES (insn) = x;
5403 /* Align the stack. */
5404 emit_insn (gen_andsi3 (stack_pointer_rtx, stack_pointer_rtx,
5407 /* And here we cheat like madmen with the unwind info. We force the
5408 cfa register back to sp+4, which is exactly what it was at the
5409 start of the function. Re-pushing the return address results in
5410 the return at the same spot relative to the cfa, and thus is
5411 correct wrt the unwind info. */
5412 x = cfun->machine->force_align_arg_pointer;
5413 x = gen_frame_mem (Pmode, plus_constant (x, -4));
5414 insn = emit_insn (gen_push (x));
5415 RTX_FRAME_RELATED_P (insn) = 1;
5418 x = gen_rtx_UNSPEC (VOIDmode, gen_rtvec (1, x), UNSPEC_DEF_CFA);
5419 x = gen_rtx_SET (VOIDmode, stack_pointer_rtx, x);
5420 x = gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR, x, NULL);
5421 REG_NOTES (insn) = x;
5424 /* Note: AT&T enter does NOT have reversed args. Enter is probably
5425 slower on all targets. Also sdb doesn't like it. */
5427 if (frame_pointer_needed)
5429 insn = emit_insn (gen_push (hard_frame_pointer_rtx));
5430 RTX_FRAME_RELATED_P (insn) = 1;
5432 insn = emit_move_insn (hard_frame_pointer_rtx, stack_pointer_rtx);
5433 RTX_FRAME_RELATED_P (insn) = 1;
5436 allocate = frame.to_allocate;
5438 if (!frame.save_regs_using_mov)
5439 ix86_emit_save_regs ();
5441 allocate += frame.nregs * UNITS_PER_WORD;
5443 /* When using red zone we may start register saving before allocating
5444 the stack frame saving one cycle of the prologue. */
5445 if (TARGET_RED_ZONE && frame.save_regs_using_mov)
5446 ix86_emit_save_regs_using_mov (frame_pointer_needed ? hard_frame_pointer_rtx
5447 : stack_pointer_rtx,
5448 -frame.nregs * UNITS_PER_WORD);
5452 else if (! TARGET_STACK_PROBE || allocate < CHECK_STACK_LIMIT)
5453 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
5454 GEN_INT (-allocate), -1);
5457 /* Only valid for Win32. */
5458 rtx eax = gen_rtx_REG (SImode, 0);
5459 bool eax_live = ix86_eax_live_at_start_p ();
5462 gcc_assert (!TARGET_64BIT);
5466 emit_insn (gen_push (eax));
5470 emit_move_insn (eax, GEN_INT (allocate));
5472 insn = emit_insn (gen_allocate_stack_worker (eax));
5473 RTX_FRAME_RELATED_P (insn) = 1;
5474 t = gen_rtx_PLUS (Pmode, stack_pointer_rtx, GEN_INT (-allocate));
5475 t = gen_rtx_SET (VOIDmode, stack_pointer_rtx, t);
5476 REG_NOTES (insn) = gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR,
5477 t, REG_NOTES (insn));
5481 if (frame_pointer_needed)
5482 t = plus_constant (hard_frame_pointer_rtx,
5485 - frame.nregs * UNITS_PER_WORD);
5487 t = plus_constant (stack_pointer_rtx, allocate);
5488 emit_move_insn (eax, gen_rtx_MEM (SImode, t));
5492 if (frame.save_regs_using_mov && !TARGET_RED_ZONE)
5494 if (!frame_pointer_needed || !frame.to_allocate)
5495 ix86_emit_save_regs_using_mov (stack_pointer_rtx, frame.to_allocate);
5497 ix86_emit_save_regs_using_mov (hard_frame_pointer_rtx,
5498 -frame.nregs * UNITS_PER_WORD);
5501 pic_reg_used = false;
5502 if (pic_offset_table_rtx
5503 && (regs_ever_live[REAL_PIC_OFFSET_TABLE_REGNUM]
5504 || current_function_profile))
5506 unsigned int alt_pic_reg_used = ix86_select_alt_pic_regnum ();
5508 if (alt_pic_reg_used != INVALID_REGNUM)
5509 REGNO (pic_offset_table_rtx) = alt_pic_reg_used;
5511 pic_reg_used = true;
5517 insn = emit_insn (gen_set_got_rex64 (pic_offset_table_rtx));
5519 insn = emit_insn (gen_set_got (pic_offset_table_rtx));
5521 /* Even with accurate pre-reload life analysis, we can wind up
5522 deleting all references to the pic register after reload.
5523 Consider if cross-jumping unifies two sides of a branch
5524 controlled by a comparison vs the only read from a global.
5525 In which case, allow the set_got to be deleted, though we're
5526 too late to do anything about the ebx save in the prologue. */
5527 REG_NOTES (insn) = gen_rtx_EXPR_LIST (REG_MAYBE_DEAD, const0_rtx, NULL);
5530 /* Prevent function calls from be scheduled before the call to mcount.
5531 In the pic_reg_used case, make sure that the got load isn't deleted. */
5532 if (current_function_profile)
5533 emit_insn (gen_blockage (pic_reg_used ? pic_offset_table_rtx : const0_rtx));
5536 /* Emit code to restore saved registers using MOV insns. First register
5537 is restored from POINTER + OFFSET. */
5539 ix86_emit_restore_regs_using_mov (rtx pointer, HOST_WIDE_INT offset,
5540 int maybe_eh_return)
5543 rtx base_address = gen_rtx_MEM (Pmode, pointer);
5545 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
5546 if (ix86_save_reg (regno, maybe_eh_return))
5548 /* Ensure that adjust_address won't be forced to produce pointer
5549 out of range allowed by x86-64 instruction set. */
5550 if (TARGET_64BIT && offset != trunc_int_for_mode (offset, SImode))
5554 r11 = gen_rtx_REG (DImode, FIRST_REX_INT_REG + 3 /* R11 */);
5555 emit_move_insn (r11, GEN_INT (offset));
5556 emit_insn (gen_adddi3 (r11, r11, pointer));
5557 base_address = gen_rtx_MEM (Pmode, r11);
5560 emit_move_insn (gen_rtx_REG (Pmode, regno),
5561 adjust_address (base_address, Pmode, offset));
5562 offset += UNITS_PER_WORD;
5566 /* Restore function stack, frame, and registers. */
5569 ix86_expand_epilogue (int style)
5572 int sp_valid = !frame_pointer_needed || current_function_sp_is_unchanging;
5573 struct ix86_frame frame;
5574 HOST_WIDE_INT offset;
5576 ix86_compute_frame_layout (&frame);
5578 /* Calculate start of saved registers relative to ebp. Special care
5579 must be taken for the normal return case of a function using
5580 eh_return: the eax and edx registers are marked as saved, but not
5581 restored along this path. */
5582 offset = frame.nregs;
5583 if (current_function_calls_eh_return && style != 2)
5585 offset *= -UNITS_PER_WORD;
5587 /* If we're only restoring one register and sp is not valid then
5588 using a move instruction to restore the register since it's
5589 less work than reloading sp and popping the register.
5591 The default code result in stack adjustment using add/lea instruction,
5592 while this code results in LEAVE instruction (or discrete equivalent),
5593 so it is profitable in some other cases as well. Especially when there
5594 are no registers to restore. We also use this code when TARGET_USE_LEAVE
5595 and there is exactly one register to pop. This heuristic may need some
5596 tuning in future. */
5597 if ((!sp_valid && frame.nregs <= 1)
5598 || (TARGET_EPILOGUE_USING_MOVE
5599 && cfun->machine->use_fast_prologue_epilogue
5600 && (frame.nregs > 1 || frame.to_allocate))
5601 || (frame_pointer_needed && !frame.nregs && frame.to_allocate)
5602 || (frame_pointer_needed && TARGET_USE_LEAVE
5603 && cfun->machine->use_fast_prologue_epilogue
5604 && frame.nregs == 1)
5605 || current_function_calls_eh_return)
5607 /* Restore registers. We can use ebp or esp to address the memory
5608 locations. If both are available, default to ebp, since offsets
5609 are known to be small. Only exception is esp pointing directly to the
5610 end of block of saved registers, where we may simplify addressing
5613 if (!frame_pointer_needed || (sp_valid && !frame.to_allocate))
5614 ix86_emit_restore_regs_using_mov (stack_pointer_rtx,
5615 frame.to_allocate, style == 2);
5617 ix86_emit_restore_regs_using_mov (hard_frame_pointer_rtx,
5618 offset, style == 2);
5620 /* eh_return epilogues need %ecx added to the stack pointer. */
5623 rtx tmp, sa = EH_RETURN_STACKADJ_RTX;
5625 if (frame_pointer_needed)
5627 tmp = gen_rtx_PLUS (Pmode, hard_frame_pointer_rtx, sa);
5628 tmp = plus_constant (tmp, UNITS_PER_WORD);
5629 emit_insn (gen_rtx_SET (VOIDmode, sa, tmp));
5631 tmp = gen_rtx_MEM (Pmode, hard_frame_pointer_rtx);
5632 emit_move_insn (hard_frame_pointer_rtx, tmp);
5634 pro_epilogue_adjust_stack (stack_pointer_rtx, sa,
5639 tmp = gen_rtx_PLUS (Pmode, stack_pointer_rtx, sa);
5640 tmp = plus_constant (tmp, (frame.to_allocate
5641 + frame.nregs * UNITS_PER_WORD));
5642 emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx, tmp));
5645 else if (!frame_pointer_needed)
5646 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
5647 GEN_INT (frame.to_allocate
5648 + frame.nregs * UNITS_PER_WORD),
5650 /* If not an i386, mov & pop is faster than "leave". */
5651 else if (TARGET_USE_LEAVE || optimize_size
5652 || !cfun->machine->use_fast_prologue_epilogue)
5653 emit_insn (TARGET_64BIT ? gen_leave_rex64 () : gen_leave ());
5656 pro_epilogue_adjust_stack (stack_pointer_rtx,
5657 hard_frame_pointer_rtx,
5660 emit_insn (gen_popdi1 (hard_frame_pointer_rtx));
5662 emit_insn (gen_popsi1 (hard_frame_pointer_rtx));
5667 /* First step is to deallocate the stack frame so that we can
5668 pop the registers. */
5671 gcc_assert (frame_pointer_needed);
5672 pro_epilogue_adjust_stack (stack_pointer_rtx,
5673 hard_frame_pointer_rtx,
5674 GEN_INT (offset), style);
5676 else if (frame.to_allocate)
5677 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
5678 GEN_INT (frame.to_allocate), style);
5680 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
5681 if (ix86_save_reg (regno, false))
5684 emit_insn (gen_popdi1 (gen_rtx_REG (Pmode, regno)));
5686 emit_insn (gen_popsi1 (gen_rtx_REG (Pmode, regno)));
5688 if (frame_pointer_needed)
5690 /* Leave results in shorter dependency chains on CPUs that are
5691 able to grok it fast. */
5692 if (TARGET_USE_LEAVE)
5693 emit_insn (TARGET_64BIT ? gen_leave_rex64 () : gen_leave ());
5694 else if (TARGET_64BIT)
5695 emit_insn (gen_popdi1 (hard_frame_pointer_rtx));
5697 emit_insn (gen_popsi1 (hard_frame_pointer_rtx));
5701 if (cfun->machine->force_align_arg_pointer)
5703 emit_insn (gen_addsi3 (stack_pointer_rtx,
5704 cfun->machine->force_align_arg_pointer,
5708 /* Sibcall epilogues don't want a return instruction. */
5712 if (current_function_pops_args && current_function_args_size)
5714 rtx popc = GEN_INT (current_function_pops_args);
5716 /* i386 can only pop 64K bytes. If asked to pop more, pop
5717 return address, do explicit add, and jump indirectly to the
5720 if (current_function_pops_args >= 65536)
5722 rtx ecx = gen_rtx_REG (SImode, 2);
5724 /* There is no "pascal" calling convention in 64bit ABI. */
5725 gcc_assert (!TARGET_64BIT);
5727 emit_insn (gen_popsi1 (ecx));
5728 emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx, popc));
5729 emit_jump_insn (gen_return_indirect_internal (ecx));
5732 emit_jump_insn (gen_return_pop_internal (popc));
5735 emit_jump_insn (gen_return_internal ());
5738 /* Reset from the function's potential modifications. */
5741 ix86_output_function_epilogue (FILE *file ATTRIBUTE_UNUSED,
5742 HOST_WIDE_INT size ATTRIBUTE_UNUSED)
5744 if (pic_offset_table_rtx)
5745 REGNO (pic_offset_table_rtx) = REAL_PIC_OFFSET_TABLE_REGNUM;
5747 /* Mach-O doesn't support labels at the end of objects, so if
5748 it looks like we might want one, insert a NOP. */
5750 rtx insn = get_last_insn ();
5753 && NOTE_LINE_NUMBER (insn) != NOTE_INSN_DELETED_LABEL)
5754 insn = PREV_INSN (insn);
5758 && NOTE_LINE_NUMBER (insn) == NOTE_INSN_DELETED_LABEL)))
5759 fputs ("\tnop\n", file);
5765 /* Extract the parts of an RTL expression that is a valid memory address
5766 for an instruction. Return 0 if the structure of the address is
5767 grossly off. Return -1 if the address contains ASHIFT, so it is not
5768 strictly valid, but still used for computing length of lea instruction. */
5771 ix86_decompose_address (rtx addr, struct ix86_address *out)
5773 rtx base = NULL_RTX, index = NULL_RTX, disp = NULL_RTX;
5774 rtx base_reg, index_reg;
5775 HOST_WIDE_INT scale = 1;
5776 rtx scale_rtx = NULL_RTX;
5778 enum ix86_address_seg seg = SEG_DEFAULT;
5780 if (GET_CODE (addr) == REG || GET_CODE (addr) == SUBREG)
5782 else if (GET_CODE (addr) == PLUS)
5792 addends[n++] = XEXP (op, 1);
5795 while (GET_CODE (op) == PLUS);
5800 for (i = n; i >= 0; --i)
5803 switch (GET_CODE (op))
5808 index = XEXP (op, 0);
5809 scale_rtx = XEXP (op, 1);
5813 if (XINT (op, 1) == UNSPEC_TP
5814 && TARGET_TLS_DIRECT_SEG_REFS
5815 && seg == SEG_DEFAULT)
5816 seg = TARGET_64BIT ? SEG_FS : SEG_GS;
5845 else if (GET_CODE (addr) == MULT)
5847 index = XEXP (addr, 0); /* index*scale */
5848 scale_rtx = XEXP (addr, 1);
5850 else if (GET_CODE (addr) == ASHIFT)
5854 /* We're called for lea too, which implements ashift on occasion. */
5855 index = XEXP (addr, 0);
5856 tmp = XEXP (addr, 1);
5857 if (GET_CODE (tmp) != CONST_INT)
5859 scale = INTVAL (tmp);
5860 if ((unsigned HOST_WIDE_INT) scale > 3)
5866 disp = addr; /* displacement */
5868 /* Extract the integral value of scale. */
5871 if (GET_CODE (scale_rtx) != CONST_INT)
5873 scale = INTVAL (scale_rtx);
5876 base_reg = base && GET_CODE (base) == SUBREG ? SUBREG_REG (base) : base;
5877 index_reg = index && GET_CODE (index) == SUBREG ? SUBREG_REG (index) : index;
5879 /* Allow arg pointer and stack pointer as index if there is not scaling. */
5880 if (base_reg && index_reg && scale == 1
5881 && (index_reg == arg_pointer_rtx
5882 || index_reg == frame_pointer_rtx
5883 || (REG_P (index_reg) && REGNO (index_reg) == STACK_POINTER_REGNUM)))
5886 tmp = base, base = index, index = tmp;
5887 tmp = base_reg, base_reg = index_reg, index_reg = tmp;
5890 /* Special case: %ebp cannot be encoded as a base without a displacement. */
5891 if ((base_reg == hard_frame_pointer_rtx
5892 || base_reg == frame_pointer_rtx
5893 || base_reg == arg_pointer_rtx) && !disp)
5896 /* Special case: on K6, [%esi] makes the instruction vector decoded.
5897 Avoid this by transforming to [%esi+0]. */
5898 if (ix86_tune == PROCESSOR_K6 && !optimize_size
5899 && base_reg && !index_reg && !disp
5901 && REGNO_REG_CLASS (REGNO (base_reg)) == SIREG)
5904 /* Special case: encode reg+reg instead of reg*2. */
5905 if (!base && index && scale && scale == 2)
5906 base = index, base_reg = index_reg, scale = 1;
5908 /* Special case: scaling cannot be encoded without base or displacement. */
5909 if (!base && !disp && index && scale != 1)
5921 /* Return cost of the memory address x.
5922 For i386, it is better to use a complex address than let gcc copy
5923 the address into a reg and make a new pseudo. But not if the address
5924 requires to two regs - that would mean more pseudos with longer
5927 ix86_address_cost (rtx x)
5929 struct ix86_address parts;
5931 int ok = ix86_decompose_address (x, &parts);
5935 if (parts.base && GET_CODE (parts.base) == SUBREG)
5936 parts.base = SUBREG_REG (parts.base);
5937 if (parts.index && GET_CODE (parts.index) == SUBREG)
5938 parts.index = SUBREG_REG (parts.index);
5940 /* More complex memory references are better. */
5941 if (parts.disp && parts.disp != const0_rtx)
5943 if (parts.seg != SEG_DEFAULT)
5946 /* Attempt to minimize number of registers in the address. */
5948 && (!REG_P (parts.base) || REGNO (parts.base) >= FIRST_PSEUDO_REGISTER))
5950 && (!REG_P (parts.index)
5951 || REGNO (parts.index) >= FIRST_PSEUDO_REGISTER)))
5955 && (!REG_P (parts.base) || REGNO (parts.base) >= FIRST_PSEUDO_REGISTER)
5957 && (!REG_P (parts.index) || REGNO (parts.index) >= FIRST_PSEUDO_REGISTER)
5958 && parts.base != parts.index)
5961 /* AMD-K6 don't like addresses with ModR/M set to 00_xxx_100b,
5962 since it's predecode logic can't detect the length of instructions
5963 and it degenerates to vector decoded. Increase cost of such
5964 addresses here. The penalty is minimally 2 cycles. It may be worthwhile
5965 to split such addresses or even refuse such addresses at all.
5967 Following addressing modes are affected:
5972 The first and last case may be avoidable by explicitly coding the zero in
5973 memory address, but I don't have AMD-K6 machine handy to check this
5977 && ((!parts.disp && parts.base && parts.index && parts.scale != 1)
5978 || (parts.disp && !parts.base && parts.index && parts.scale != 1)
5979 || (!parts.disp && parts.base && parts.index && parts.scale == 1)))
5985 /* If X is a machine specific address (i.e. a symbol or label being
5986 referenced as a displacement from the GOT implemented using an
5987 UNSPEC), then return the base term. Otherwise return X. */
5990 ix86_find_base_term (rtx x)
5996 if (GET_CODE (x) != CONST)
5999 if (GET_CODE (term) == PLUS
6000 && (GET_CODE (XEXP (term, 1)) == CONST_INT
6001 || GET_CODE (XEXP (term, 1)) == CONST_DOUBLE))
6002 term = XEXP (term, 0);
6003 if (GET_CODE (term) != UNSPEC
6004 || XINT (term, 1) != UNSPEC_GOTPCREL)
6007 term = XVECEXP (term, 0, 0);
6009 if (GET_CODE (term) != SYMBOL_REF
6010 && GET_CODE (term) != LABEL_REF)
6016 term = ix86_delegitimize_address (x);
6018 if (GET_CODE (term) != SYMBOL_REF
6019 && GET_CODE (term) != LABEL_REF)
6025 /* Allow {LABEL | SYMBOL}_REF - SYMBOL_REF-FOR-PICBASE for Mach-O as
6026 this is used for to form addresses to local data when -fPIC is in
6030 darwin_local_data_pic (rtx disp)
6032 if (GET_CODE (disp) == MINUS)
6034 if (GET_CODE (XEXP (disp, 0)) == LABEL_REF
6035 || GET_CODE (XEXP (disp, 0)) == SYMBOL_REF)
6036 if (GET_CODE (XEXP (disp, 1)) == SYMBOL_REF)
6038 const char *sym_name = XSTR (XEXP (disp, 1), 0);
6039 if (! strcmp (sym_name, "<pic base>"))
6047 /* Determine if a given RTX is a valid constant. We already know this
6048 satisfies CONSTANT_P. */
6051 legitimate_constant_p (rtx x)
6053 switch (GET_CODE (x))
6058 if (GET_CODE (x) == PLUS)
6060 if (GET_CODE (XEXP (x, 1)) != CONST_INT)
6065 if (TARGET_MACHO && darwin_local_data_pic (x))
6068 /* Only some unspecs are valid as "constants". */
6069 if (GET_CODE (x) == UNSPEC)
6070 switch (XINT (x, 1))
6073 return TARGET_64BIT;
6076 x = XVECEXP (x, 0, 0);
6077 return (GET_CODE (x) == SYMBOL_REF
6078 && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_EXEC);
6080 x = XVECEXP (x, 0, 0);
6081 return (GET_CODE (x) == SYMBOL_REF
6082 && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_DYNAMIC);
6087 /* We must have drilled down to a symbol. */
6088 if (GET_CODE (x) == LABEL_REF)
6090 if (GET_CODE (x) != SYMBOL_REF)
6095 /* TLS symbols are never valid. */
6096 if (SYMBOL_REF_TLS_MODEL (x))
6101 if (GET_MODE (x) == TImode
6102 && x != CONST0_RTX (TImode)
6108 if (x == CONST0_RTX (GET_MODE (x)))
6116 /* Otherwise we handle everything else in the move patterns. */
6120 /* Determine if it's legal to put X into the constant pool. This
6121 is not possible for the address of thread-local symbols, which
6122 is checked above. */
6125 ix86_cannot_force_const_mem (rtx x)
6127 /* We can always put integral constants and vectors in memory. */
6128 switch (GET_CODE (x))
6138 return !legitimate_constant_p (x);
6141 /* Determine if a given RTX is a valid constant address. */
6144 constant_address_p (rtx x)
6146 return CONSTANT_P (x) && legitimate_address_p (Pmode, x, 1);
6149 /* Nonzero if the constant value X is a legitimate general operand
6150 when generating PIC code. It is given that flag_pic is on and
6151 that X satisfies CONSTANT_P or is a CONST_DOUBLE. */
6154 legitimate_pic_operand_p (rtx x)
6158 switch (GET_CODE (x))
6161 inner = XEXP (x, 0);
6162 if (GET_CODE (inner) == PLUS
6163 && GET_CODE (XEXP (inner, 1)) == CONST_INT)
6164 inner = XEXP (inner, 0);
6166 /* Only some unspecs are valid as "constants". */
6167 if (GET_CODE (inner) == UNSPEC)
6168 switch (XINT (inner, 1))
6171 return TARGET_64BIT;
6173 x = XVECEXP (inner, 0, 0);
6174 return (GET_CODE (x) == SYMBOL_REF
6175 && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_EXEC);
6183 return legitimate_pic_address_disp_p (x);
6190 /* Determine if a given CONST RTX is a valid memory displacement
6194 legitimate_pic_address_disp_p (rtx disp)
6198 /* In 64bit mode we can allow direct addresses of symbols and labels
6199 when they are not dynamic symbols. */
6202 rtx op0 = disp, op1;
6204 switch (GET_CODE (disp))
6210 if (GET_CODE (XEXP (disp, 0)) != PLUS)
6212 op0 = XEXP (XEXP (disp, 0), 0);
6213 op1 = XEXP (XEXP (disp, 0), 1);
6214 if (GET_CODE (op1) != CONST_INT
6215 || INTVAL (op1) >= 16*1024*1024
6216 || INTVAL (op1) < -16*1024*1024)
6218 if (GET_CODE (op0) == LABEL_REF)
6220 if (GET_CODE (op0) != SYMBOL_REF)
6225 /* TLS references should always be enclosed in UNSPEC. */
6226 if (SYMBOL_REF_TLS_MODEL (op0))
6228 if (!SYMBOL_REF_FAR_ADDR_P (op0) && SYMBOL_REF_LOCAL_P (op0))
6236 if (GET_CODE (disp) != CONST)
6238 disp = XEXP (disp, 0);
6242 /* We are unsafe to allow PLUS expressions. This limit allowed distance
6243 of GOT tables. We should not need these anyway. */
6244 if (GET_CODE (disp) != UNSPEC
6245 || (XINT (disp, 1) != UNSPEC_GOTPCREL
6246 && XINT (disp, 1) != UNSPEC_GOTOFF))
6249 if (GET_CODE (XVECEXP (disp, 0, 0)) != SYMBOL_REF
6250 && GET_CODE (XVECEXP (disp, 0, 0)) != LABEL_REF)
6256 if (GET_CODE (disp) == PLUS)
6258 if (GET_CODE (XEXP (disp, 1)) != CONST_INT)
6260 disp = XEXP (disp, 0);
6264 if (TARGET_MACHO && darwin_local_data_pic (disp))
6267 if (GET_CODE (disp) != UNSPEC)
6270 switch (XINT (disp, 1))
6275 return GET_CODE (XVECEXP (disp, 0, 0)) == SYMBOL_REF;
6277 /* Refuse GOTOFF in 64bit mode since it is always 64bit when used.
6278 While ABI specify also 32bit relocation but we don't produce it in
6279 small PIC model at all. */
6280 if ((GET_CODE (XVECEXP (disp, 0, 0)) == SYMBOL_REF
6281 || GET_CODE (XVECEXP (disp, 0, 0)) == LABEL_REF)
6283 return local_symbolic_operand (XVECEXP (disp, 0, 0), Pmode);
6285 case UNSPEC_GOTTPOFF:
6286 case UNSPEC_GOTNTPOFF:
6287 case UNSPEC_INDNTPOFF:
6290 disp = XVECEXP (disp, 0, 0);
6291 return (GET_CODE (disp) == SYMBOL_REF
6292 && SYMBOL_REF_TLS_MODEL (disp) == TLS_MODEL_INITIAL_EXEC);
6294 disp = XVECEXP (disp, 0, 0);
6295 return (GET_CODE (disp) == SYMBOL_REF
6296 && SYMBOL_REF_TLS_MODEL (disp) == TLS_MODEL_LOCAL_EXEC);
6298 disp = XVECEXP (disp, 0, 0);
6299 return (GET_CODE (disp) == SYMBOL_REF
6300 && SYMBOL_REF_TLS_MODEL (disp) == TLS_MODEL_LOCAL_DYNAMIC);
6306 /* GO_IF_LEGITIMATE_ADDRESS recognizes an RTL expression that is a valid
6307 memory address for an instruction. The MODE argument is the machine mode
6308 for the MEM expression that wants to use this address.
6310 It only recognizes address in canonical form. LEGITIMIZE_ADDRESS should
6311 convert common non-canonical forms to canonical form so that they will
6315 legitimate_address_p (enum machine_mode mode, rtx addr, int strict)
6317 struct ix86_address parts;
6318 rtx base, index, disp;
6319 HOST_WIDE_INT scale;
6320 const char *reason = NULL;
6321 rtx reason_rtx = NULL_RTX;
6323 if (TARGET_DEBUG_ADDR)
6326 "\n======\nGO_IF_LEGITIMATE_ADDRESS, mode = %s, strict = %d\n",
6327 GET_MODE_NAME (mode), strict);
6331 if (ix86_decompose_address (addr, &parts) <= 0)
6333 reason = "decomposition failed";
6338 index = parts.index;
6340 scale = parts.scale;
6342 /* Validate base register.
6344 Don't allow SUBREG's that span more than a word here. It can lead to spill
6345 failures when the base is one word out of a two word structure, which is
6346 represented internally as a DImode int. */
6355 else if (GET_CODE (base) == SUBREG
6356 && REG_P (SUBREG_REG (base))
6357 && GET_MODE_SIZE (GET_MODE (SUBREG_REG (base)))
6359 reg = SUBREG_REG (base);
6362 reason = "base is not a register";
6366 if (GET_MODE (base) != Pmode)
6368 reason = "base is not in Pmode";
6372 if ((strict && ! REG_OK_FOR_BASE_STRICT_P (reg))
6373 || (! strict && ! REG_OK_FOR_BASE_NONSTRICT_P (reg)))
6375 reason = "base is not valid";
6380 /* Validate index register.
6382 Don't allow SUBREG's that span more than a word here -- same as above. */
6391 else if (GET_CODE (index) == SUBREG
6392 && REG_P (SUBREG_REG (index))
6393 && GET_MODE_SIZE (GET_MODE (SUBREG_REG (index)))
6395 reg = SUBREG_REG (index);
6398 reason = "index is not a register";
6402 if (GET_MODE (index) != Pmode)
6404 reason = "index is not in Pmode";
6408 if ((strict && ! REG_OK_FOR_INDEX_STRICT_P (reg))
6409 || (! strict && ! REG_OK_FOR_INDEX_NONSTRICT_P (reg)))
6411 reason = "index is not valid";
6416 /* Validate scale factor. */
6419 reason_rtx = GEN_INT (scale);
6422 reason = "scale without index";
6426 if (scale != 2 && scale != 4 && scale != 8)
6428 reason = "scale is not a valid multiplier";
6433 /* Validate displacement. */
6438 if (GET_CODE (disp) == CONST
6439 && GET_CODE (XEXP (disp, 0)) == UNSPEC)
6440 switch (XINT (XEXP (disp, 0), 1))
6442 /* Refuse GOTOFF and GOT in 64bit mode since it is always 64bit when
6443 used. While ABI specify also 32bit relocations, we don't produce
6444 them at all and use IP relative instead. */
6447 gcc_assert (flag_pic);
6449 goto is_legitimate_pic;
6450 reason = "64bit address unspec";
6453 case UNSPEC_GOTPCREL:
6454 gcc_assert (flag_pic);
6455 goto is_legitimate_pic;
6457 case UNSPEC_GOTTPOFF:
6458 case UNSPEC_GOTNTPOFF:
6459 case UNSPEC_INDNTPOFF:
6465 reason = "invalid address unspec";
6469 else if (SYMBOLIC_CONST (disp)
6473 && MACHOPIC_INDIRECT
6474 && !machopic_operand_p (disp)
6480 if (TARGET_64BIT && (index || base))
6482 /* foo@dtpoff(%rX) is ok. */
6483 if (GET_CODE (disp) != CONST
6484 || GET_CODE (XEXP (disp, 0)) != PLUS
6485 || GET_CODE (XEXP (XEXP (disp, 0), 0)) != UNSPEC
6486 || GET_CODE (XEXP (XEXP (disp, 0), 1)) != CONST_INT
6487 || (XINT (XEXP (XEXP (disp, 0), 0), 1) != UNSPEC_DTPOFF
6488 && XINT (XEXP (XEXP (disp, 0), 0), 1) != UNSPEC_NTPOFF))
6490 reason = "non-constant pic memory reference";
6494 else if (! legitimate_pic_address_disp_p (disp))
6496 reason = "displacement is an invalid pic construct";
6500 /* This code used to verify that a symbolic pic displacement
6501 includes the pic_offset_table_rtx register.
6503 While this is good idea, unfortunately these constructs may
6504 be created by "adds using lea" optimization for incorrect
6513 This code is nonsensical, but results in addressing
6514 GOT table with pic_offset_table_rtx base. We can't
6515 just refuse it easily, since it gets matched by
6516 "addsi3" pattern, that later gets split to lea in the
6517 case output register differs from input. While this
6518 can be handled by separate addsi pattern for this case
6519 that never results in lea, this seems to be easier and
6520 correct fix for crash to disable this test. */
6522 else if (GET_CODE (disp) != LABEL_REF
6523 && GET_CODE (disp) != CONST_INT
6524 && (GET_CODE (disp) != CONST
6525 || !legitimate_constant_p (disp))
6526 && (GET_CODE (disp) != SYMBOL_REF
6527 || !legitimate_constant_p (disp)))
6529 reason = "displacement is not constant";
6532 else if (TARGET_64BIT
6533 && !x86_64_immediate_operand (disp, VOIDmode))
6535 reason = "displacement is out of range";
6540 /* Everything looks valid. */
6541 if (TARGET_DEBUG_ADDR)
6542 fprintf (stderr, "Success.\n");
6546 if (TARGET_DEBUG_ADDR)
6548 fprintf (stderr, "Error: %s\n", reason);
6549 debug_rtx (reason_rtx);
6554 /* Return a unique alias set for the GOT. */
6556 static HOST_WIDE_INT
6557 ix86_GOT_alias_set (void)
6559 static HOST_WIDE_INT set = -1;
6561 set = new_alias_set ();
6565 /* Return a legitimate reference for ORIG (an address) using the
6566 register REG. If REG is 0, a new pseudo is generated.
6568 There are two types of references that must be handled:
6570 1. Global data references must load the address from the GOT, via
6571 the PIC reg. An insn is emitted to do this load, and the reg is
6574 2. Static data references, constant pool addresses, and code labels
6575 compute the address as an offset from the GOT, whose base is in
6576 the PIC reg. Static data objects have SYMBOL_FLAG_LOCAL set to
6577 differentiate them from global data objects. The returned
6578 address is the PIC reg + an unspec constant.
6580 GO_IF_LEGITIMATE_ADDRESS rejects symbolic references unless the PIC
6581 reg also appears in the address. */
6584 legitimize_pic_address (rtx orig, rtx reg)
6591 if (TARGET_MACHO && !TARGET_64BIT)
6594 reg = gen_reg_rtx (Pmode);
6595 /* Use the generic Mach-O PIC machinery. */
6596 return machopic_legitimize_pic_address (orig, GET_MODE (orig), reg);
6600 if (TARGET_64BIT && legitimate_pic_address_disp_p (addr))
6602 else if (TARGET_64BIT
6603 && ix86_cmodel != CM_SMALL_PIC
6604 && local_symbolic_operand (addr, Pmode))
6607 /* This symbol may be referenced via a displacement from the PIC
6608 base address (@GOTOFF). */
6610 if (reload_in_progress)
6611 regs_ever_live[PIC_OFFSET_TABLE_REGNUM] = 1;
6612 if (GET_CODE (addr) == CONST)
6613 addr = XEXP (addr, 0);
6614 if (GET_CODE (addr) == PLUS)
6616 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, XEXP (addr, 0)), UNSPEC_GOTOFF);
6617 new = gen_rtx_PLUS (Pmode, new, XEXP (addr, 1));
6620 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTOFF);
6621 new = gen_rtx_CONST (Pmode, new);
6623 tmpreg = gen_reg_rtx (Pmode);
6626 emit_move_insn (tmpreg, new);
6630 new = expand_simple_binop (Pmode, PLUS, reg, pic_offset_table_rtx,
6631 tmpreg, 1, OPTAB_DIRECT);
6634 else new = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, tmpreg);
6636 else if (!TARGET_64BIT && local_symbolic_operand (addr, Pmode))
6638 /* This symbol may be referenced via a displacement from the PIC
6639 base address (@GOTOFF). */
6641 if (reload_in_progress)
6642 regs_ever_live[PIC_OFFSET_TABLE_REGNUM] = 1;
6643 if (GET_CODE (addr) == CONST)
6644 addr = XEXP (addr, 0);
6645 if (GET_CODE (addr) == PLUS)
6647 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, XEXP (addr, 0)), UNSPEC_GOTOFF);
6648 new = gen_rtx_PLUS (Pmode, new, XEXP (addr, 1));
6651 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTOFF);
6652 new = gen_rtx_CONST (Pmode, new);
6653 new = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new);
6657 emit_move_insn (reg, new);
6661 else if (GET_CODE (addr) == SYMBOL_REF && SYMBOL_REF_TLS_MODEL (addr) == 0)
6665 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTPCREL);
6666 new = gen_rtx_CONST (Pmode, new);
6667 new = gen_const_mem (Pmode, new);
6668 set_mem_alias_set (new, ix86_GOT_alias_set ());
6671 reg = gen_reg_rtx (Pmode);
6672 /* Use directly gen_movsi, otherwise the address is loaded
6673 into register for CSE. We don't want to CSE this addresses,
6674 instead we CSE addresses from the GOT table, so skip this. */
6675 emit_insn (gen_movsi (reg, new));
6680 /* This symbol must be referenced via a load from the
6681 Global Offset Table (@GOT). */
6683 if (reload_in_progress)
6684 regs_ever_live[PIC_OFFSET_TABLE_REGNUM] = 1;
6685 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOT);
6686 new = gen_rtx_CONST (Pmode, new);
6687 new = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new);
6688 new = gen_const_mem (Pmode, new);
6689 set_mem_alias_set (new, ix86_GOT_alias_set ());
6692 reg = gen_reg_rtx (Pmode);
6693 emit_move_insn (reg, new);
6699 if (GET_CODE (addr) == CONST_INT
6700 && !x86_64_immediate_operand (addr, VOIDmode))
6704 emit_move_insn (reg, addr);
6708 new = force_reg (Pmode, addr);
6710 else if (GET_CODE (addr) == CONST)
6712 addr = XEXP (addr, 0);
6714 /* We must match stuff we generate before. Assume the only
6715 unspecs that can get here are ours. Not that we could do
6716 anything with them anyway.... */
6717 if (GET_CODE (addr) == UNSPEC
6718 || (GET_CODE (addr) == PLUS
6719 && GET_CODE (XEXP (addr, 0)) == UNSPEC))
6721 gcc_assert (GET_CODE (addr) == PLUS);
6723 if (GET_CODE (addr) == PLUS)
6725 rtx op0 = XEXP (addr, 0), op1 = XEXP (addr, 1);
6727 /* Check first to see if this is a constant offset from a @GOTOFF
6728 symbol reference. */
6729 if (local_symbolic_operand (op0, Pmode)
6730 && GET_CODE (op1) == CONST_INT)
6734 if (reload_in_progress)
6735 regs_ever_live[PIC_OFFSET_TABLE_REGNUM] = 1;
6736 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, op0),
6738 new = gen_rtx_PLUS (Pmode, new, op1);
6739 new = gen_rtx_CONST (Pmode, new);
6740 new = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new);
6744 emit_move_insn (reg, new);
6750 if (INTVAL (op1) < -16*1024*1024
6751 || INTVAL (op1) >= 16*1024*1024)
6753 if (!x86_64_immediate_operand (op1, Pmode))
6754 op1 = force_reg (Pmode, op1);
6755 new = gen_rtx_PLUS (Pmode, force_reg (Pmode, op0), op1);
6761 base = legitimize_pic_address (XEXP (addr, 0), reg);
6762 new = legitimize_pic_address (XEXP (addr, 1),
6763 base == reg ? NULL_RTX : reg);
6765 if (GET_CODE (new) == CONST_INT)
6766 new = plus_constant (base, INTVAL (new));
6769 if (GET_CODE (new) == PLUS && CONSTANT_P (XEXP (new, 1)))
6771 base = gen_rtx_PLUS (Pmode, base, XEXP (new, 0));
6772 new = XEXP (new, 1);
6774 new = gen_rtx_PLUS (Pmode, base, new);
6782 /* Load the thread pointer. If TO_REG is true, force it into a register. */
6785 get_thread_pointer (int to_reg)
6789 tp = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const0_rtx), UNSPEC_TP);
6793 reg = gen_reg_rtx (Pmode);
6794 insn = gen_rtx_SET (VOIDmode, reg, tp);
6795 insn = emit_insn (insn);
6800 /* A subroutine of legitimize_address and ix86_expand_move. FOR_MOV is
6801 false if we expect this to be used for a memory address and true if
6802 we expect to load the address into a register. */
6805 legitimize_tls_address (rtx x, enum tls_model model, int for_mov)
6807 rtx dest, base, off, pic, tp;
6812 case TLS_MODEL_GLOBAL_DYNAMIC:
6813 dest = gen_reg_rtx (Pmode);
6814 tp = TARGET_GNU2_TLS ? get_thread_pointer (1) : 0;
6816 if (TARGET_64BIT && ! TARGET_GNU2_TLS)
6818 rtx rax = gen_rtx_REG (Pmode, 0), insns;
6821 emit_call_insn (gen_tls_global_dynamic_64 (rax, x));
6822 insns = get_insns ();
6825 emit_libcall_block (insns, dest, rax, x);
6827 else if (TARGET_64BIT && TARGET_GNU2_TLS)
6828 emit_insn (gen_tls_global_dynamic_64 (dest, x));
6830 emit_insn (gen_tls_global_dynamic_32 (dest, x));
6832 if (TARGET_GNU2_TLS)
6834 dest = force_reg (Pmode, gen_rtx_PLUS (Pmode, tp, dest));
6836 set_unique_reg_note (get_last_insn (), REG_EQUIV, x);
6840 case TLS_MODEL_LOCAL_DYNAMIC:
6841 base = gen_reg_rtx (Pmode);
6842 tp = TARGET_GNU2_TLS ? get_thread_pointer (1) : 0;
6844 if (TARGET_64BIT && ! TARGET_GNU2_TLS)
6846 rtx rax = gen_rtx_REG (Pmode, 0), insns, note;
6849 emit_call_insn (gen_tls_local_dynamic_base_64 (rax));
6850 insns = get_insns ();
6853 note = gen_rtx_EXPR_LIST (VOIDmode, const0_rtx, NULL);
6854 note = gen_rtx_EXPR_LIST (VOIDmode, ix86_tls_get_addr (), note);
6855 emit_libcall_block (insns, base, rax, note);
6857 else if (TARGET_64BIT && TARGET_GNU2_TLS)
6858 emit_insn (gen_tls_local_dynamic_base_64 (base));
6860 emit_insn (gen_tls_local_dynamic_base_32 (base));
6862 if (TARGET_GNU2_TLS)
6864 rtx x = ix86_tls_module_base ();
6866 set_unique_reg_note (get_last_insn (), REG_EQUIV,
6867 gen_rtx_MINUS (Pmode, x, tp));
6870 off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x), UNSPEC_DTPOFF);
6871 off = gen_rtx_CONST (Pmode, off);
6873 dest = force_reg (Pmode, gen_rtx_PLUS (Pmode, base, off));
6875 if (TARGET_GNU2_TLS)
6877 dest = force_reg (Pmode, gen_rtx_PLUS (Pmode, dest, tp));
6879 set_unique_reg_note (get_last_insn (), REG_EQUIV, x);
6884 case TLS_MODEL_INITIAL_EXEC:
6888 type = UNSPEC_GOTNTPOFF;
6892 if (reload_in_progress)
6893 regs_ever_live[PIC_OFFSET_TABLE_REGNUM] = 1;
6894 pic = pic_offset_table_rtx;
6895 type = TARGET_ANY_GNU_TLS ? UNSPEC_GOTNTPOFF : UNSPEC_GOTTPOFF;
6897 else if (!TARGET_ANY_GNU_TLS)
6899 pic = gen_reg_rtx (Pmode);
6900 emit_insn (gen_set_got (pic));
6901 type = UNSPEC_GOTTPOFF;
6906 type = UNSPEC_INDNTPOFF;
6909 off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x), type);
6910 off = gen_rtx_CONST (Pmode, off);
6912 off = gen_rtx_PLUS (Pmode, pic, off);
6913 off = gen_const_mem (Pmode, off);
6914 set_mem_alias_set (off, ix86_GOT_alias_set ());
6916 if (TARGET_64BIT || TARGET_ANY_GNU_TLS)
6918 base = get_thread_pointer (for_mov || !TARGET_TLS_DIRECT_SEG_REFS);
6919 off = force_reg (Pmode, off);
6920 return gen_rtx_PLUS (Pmode, base, off);
6924 base = get_thread_pointer (true);
6925 dest = gen_reg_rtx (Pmode);
6926 emit_insn (gen_subsi3 (dest, base, off));
6930 case TLS_MODEL_LOCAL_EXEC:
6931 off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x),
6932 (TARGET_64BIT || TARGET_ANY_GNU_TLS)
6933 ? UNSPEC_NTPOFF : UNSPEC_TPOFF);
6934 off = gen_rtx_CONST (Pmode, off);
6936 if (TARGET_64BIT || TARGET_ANY_GNU_TLS)
6938 base = get_thread_pointer (for_mov || !TARGET_TLS_DIRECT_SEG_REFS);
6939 return gen_rtx_PLUS (Pmode, base, off);
6943 base = get_thread_pointer (true);
6944 dest = gen_reg_rtx (Pmode);
6945 emit_insn (gen_subsi3 (dest, base, off));
6956 /* Try machine-dependent ways of modifying an illegitimate address
6957 to be legitimate. If we find one, return the new, valid address.
6958 This macro is used in only one place: `memory_address' in explow.c.
6960 OLDX is the address as it was before break_out_memory_refs was called.
6961 In some cases it is useful to look at this to decide what needs to be done.
6963 MODE and WIN are passed so that this macro can use
6964 GO_IF_LEGITIMATE_ADDRESS.
6966 It is always safe for this macro to do nothing. It exists to recognize
6967 opportunities to optimize the output.
6969 For the 80386, we handle X+REG by loading X into a register R and
6970 using R+REG. R will go in a general reg and indexing will be used.
6971 However, if REG is a broken-out memory address or multiplication,
6972 nothing needs to be done because REG can certainly go in a general reg.
6974 When -fpic is used, special handling is needed for symbolic references.
6975 See comments by legitimize_pic_address in i386.c for details. */
6978 legitimize_address (rtx x, rtx oldx ATTRIBUTE_UNUSED, enum machine_mode mode)
6983 if (TARGET_DEBUG_ADDR)
6985 fprintf (stderr, "\n==========\nLEGITIMIZE_ADDRESS, mode = %s\n",
6986 GET_MODE_NAME (mode));
6990 log = GET_CODE (x) == SYMBOL_REF ? SYMBOL_REF_TLS_MODEL (x) : 0;
6992 return legitimize_tls_address (x, log, false);
6993 if (GET_CODE (x) == CONST
6994 && GET_CODE (XEXP (x, 0)) == PLUS
6995 && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF
6996 && (log = SYMBOL_REF_TLS_MODEL (XEXP (XEXP (x, 0), 0))))
6998 rtx t = legitimize_tls_address (XEXP (XEXP (x, 0), 0), log, false);
6999 return gen_rtx_PLUS (Pmode, t, XEXP (XEXP (x, 0), 1));
7002 if (flag_pic && SYMBOLIC_CONST (x))
7003 return legitimize_pic_address (x, 0);
7005 /* Canonicalize shifts by 0, 1, 2, 3 into multiply */
7006 if (GET_CODE (x) == ASHIFT
7007 && GET_CODE (XEXP (x, 1)) == CONST_INT
7008 && (unsigned HOST_WIDE_INT) INTVAL (XEXP (x, 1)) < 4)
7011 log = INTVAL (XEXP (x, 1));
7012 x = gen_rtx_MULT (Pmode, force_reg (Pmode, XEXP (x, 0)),
7013 GEN_INT (1 << log));
7016 if (GET_CODE (x) == PLUS)
7018 /* Canonicalize shifts by 0, 1, 2, 3 into multiply. */
7020 if (GET_CODE (XEXP (x, 0)) == ASHIFT
7021 && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT
7022 && (unsigned HOST_WIDE_INT) INTVAL (XEXP (XEXP (x, 0), 1)) < 4)
7025 log = INTVAL (XEXP (XEXP (x, 0), 1));
7026 XEXP (x, 0) = gen_rtx_MULT (Pmode,
7027 force_reg (Pmode, XEXP (XEXP (x, 0), 0)),
7028 GEN_INT (1 << log));
7031 if (GET_CODE (XEXP (x, 1)) == ASHIFT
7032 && GET_CODE (XEXP (XEXP (x, 1), 1)) == CONST_INT
7033 && (unsigned HOST_WIDE_INT) INTVAL (XEXP (XEXP (x, 1), 1)) < 4)
7036 log = INTVAL (XEXP (XEXP (x, 1), 1));
7037 XEXP (x, 1) = gen_rtx_MULT (Pmode,
7038 force_reg (Pmode, XEXP (XEXP (x, 1), 0)),
7039 GEN_INT (1 << log));
7042 /* Put multiply first if it isn't already. */
7043 if (GET_CODE (XEXP (x, 1)) == MULT)
7045 rtx tmp = XEXP (x, 0);
7046 XEXP (x, 0) = XEXP (x, 1);
7051 /* Canonicalize (plus (mult (reg) (const)) (plus (reg) (const)))
7052 into (plus (plus (mult (reg) (const)) (reg)) (const)). This can be
7053 created by virtual register instantiation, register elimination, and
7054 similar optimizations. */
7055 if (GET_CODE (XEXP (x, 0)) == MULT && GET_CODE (XEXP (x, 1)) == PLUS)
7058 x = gen_rtx_PLUS (Pmode,
7059 gen_rtx_PLUS (Pmode, XEXP (x, 0),
7060 XEXP (XEXP (x, 1), 0)),
7061 XEXP (XEXP (x, 1), 1));
7065 (plus (plus (mult (reg) (const)) (plus (reg) (const))) const)
7066 into (plus (plus (mult (reg) (const)) (reg)) (const)). */
7067 else if (GET_CODE (x) == PLUS && GET_CODE (XEXP (x, 0)) == PLUS
7068 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
7069 && GET_CODE (XEXP (XEXP (x, 0), 1)) == PLUS
7070 && CONSTANT_P (XEXP (x, 1)))
7073 rtx other = NULL_RTX;
7075 if (GET_CODE (XEXP (x, 1)) == CONST_INT)
7077 constant = XEXP (x, 1);
7078 other = XEXP (XEXP (XEXP (x, 0), 1), 1);
7080 else if (GET_CODE (XEXP (XEXP (XEXP (x, 0), 1), 1)) == CONST_INT)
7082 constant = XEXP (XEXP (XEXP (x, 0), 1), 1);
7083 other = XEXP (x, 1);
7091 x = gen_rtx_PLUS (Pmode,
7092 gen_rtx_PLUS (Pmode, XEXP (XEXP (x, 0), 0),
7093 XEXP (XEXP (XEXP (x, 0), 1), 0)),
7094 plus_constant (other, INTVAL (constant)));
7098 if (changed && legitimate_address_p (mode, x, FALSE))
7101 if (GET_CODE (XEXP (x, 0)) == MULT)
7104 XEXP (x, 0) = force_operand (XEXP (x, 0), 0);
7107 if (GET_CODE (XEXP (x, 1)) == MULT)
7110 XEXP (x, 1) = force_operand (XEXP (x, 1), 0);
7114 && GET_CODE (XEXP (x, 1)) == REG
7115 && GET_CODE (XEXP (x, 0)) == REG)
7118 if (flag_pic && SYMBOLIC_CONST (XEXP (x, 1)))
7121 x = legitimize_pic_address (x, 0);
7124 if (changed && legitimate_address_p (mode, x, FALSE))
7127 if (GET_CODE (XEXP (x, 0)) == REG)
7129 rtx temp = gen_reg_rtx (Pmode);
7130 rtx val = force_operand (XEXP (x, 1), temp);
7132 emit_move_insn (temp, val);
7138 else if (GET_CODE (XEXP (x, 1)) == REG)
7140 rtx temp = gen_reg_rtx (Pmode);
7141 rtx val = force_operand (XEXP (x, 0), temp);
7143 emit_move_insn (temp, val);
7153 /* Print an integer constant expression in assembler syntax. Addition
7154 and subtraction are the only arithmetic that may appear in these
7155 expressions. FILE is the stdio stream to write to, X is the rtx, and
7156 CODE is the operand print code from the output string. */
7159 output_pic_addr_const (FILE *file, rtx x, int code)
7163 switch (GET_CODE (x))
7166 gcc_assert (flag_pic);
7171 if (! TARGET_MACHO || TARGET_64BIT)
7172 output_addr_const (file, x);
7175 const char *name = XSTR (x, 0);
7177 /* Mark the decl as referenced so that cgraph will output the function. */
7178 if (SYMBOL_REF_DECL (x))
7179 mark_decl_referenced (SYMBOL_REF_DECL (x));
7182 if (MACHOPIC_INDIRECT
7183 && machopic_classify_symbol (x) == MACHOPIC_UNDEFINED_FUNCTION)
7184 name = machopic_indirection_name (x, /*stub_p=*/true);
7186 assemble_name (file, name);
7188 if (!TARGET_MACHO && code == 'P' && ! SYMBOL_REF_LOCAL_P (x))
7189 fputs ("@PLT", file);
7196 ASM_GENERATE_INTERNAL_LABEL (buf, "L", CODE_LABEL_NUMBER (x));
7197 assemble_name (asm_out_file, buf);
7201 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
7205 /* This used to output parentheses around the expression,
7206 but that does not work on the 386 (either ATT or BSD assembler). */
7207 output_pic_addr_const (file, XEXP (x, 0), code);
7211 if (GET_MODE (x) == VOIDmode)
7213 /* We can use %d if the number is <32 bits and positive. */
7214 if (CONST_DOUBLE_HIGH (x) || CONST_DOUBLE_LOW (x) < 0)
7215 fprintf (file, "0x%lx%08lx",
7216 (unsigned long) CONST_DOUBLE_HIGH (x),
7217 (unsigned long) CONST_DOUBLE_LOW (x));
7219 fprintf (file, HOST_WIDE_INT_PRINT_DEC, CONST_DOUBLE_LOW (x));
7222 /* We can't handle floating point constants;
7223 PRINT_OPERAND must handle them. */
7224 output_operand_lossage ("floating constant misused");
7228 /* Some assemblers need integer constants to appear first. */
7229 if (GET_CODE (XEXP (x, 0)) == CONST_INT)
7231 output_pic_addr_const (file, XEXP (x, 0), code);
7233 output_pic_addr_const (file, XEXP (x, 1), code);
7237 gcc_assert (GET_CODE (XEXP (x, 1)) == CONST_INT);
7238 output_pic_addr_const (file, XEXP (x, 1), code);
7240 output_pic_addr_const (file, XEXP (x, 0), code);
7246 putc (ASSEMBLER_DIALECT == ASM_INTEL ? '(' : '[', file);
7247 output_pic_addr_const (file, XEXP (x, 0), code);
7249 output_pic_addr_const (file, XEXP (x, 1), code);
7251 putc (ASSEMBLER_DIALECT == ASM_INTEL ? ')' : ']', file);
7255 gcc_assert (XVECLEN (x, 0) == 1);
7256 output_pic_addr_const (file, XVECEXP (x, 0, 0), code);
7257 switch (XINT (x, 1))
7260 fputs ("@GOT", file);
7263 fputs ("@GOTOFF", file);
7265 case UNSPEC_GOTPCREL:
7266 fputs ("@GOTPCREL(%rip)", file);
7268 case UNSPEC_GOTTPOFF:
7269 /* FIXME: This might be @TPOFF in Sun ld too. */
7270 fputs ("@GOTTPOFF", file);
7273 fputs ("@TPOFF", file);
7277 fputs ("@TPOFF", file);
7279 fputs ("@NTPOFF", file);
7282 fputs ("@DTPOFF", file);
7284 case UNSPEC_GOTNTPOFF:
7286 fputs ("@GOTTPOFF(%rip)", file);
7288 fputs ("@GOTNTPOFF", file);
7290 case UNSPEC_INDNTPOFF:
7291 fputs ("@INDNTPOFF", file);
7294 output_operand_lossage ("invalid UNSPEC as operand");
7300 output_operand_lossage ("invalid expression as operand");
7304 /* This is called from dwarf2out.c via TARGET_ASM_OUTPUT_DWARF_DTPREL.
7305 We need to emit DTP-relative relocations. */
7308 i386_output_dwarf_dtprel (FILE *file, int size, rtx x)
7310 fputs (ASM_LONG, file);
7311 output_addr_const (file, x);
7312 fputs ("@DTPOFF", file);
7318 fputs (", 0", file);
7325 /* In the name of slightly smaller debug output, and to cater to
7326 general assembler lossage, recognize PIC+GOTOFF and turn it back
7327 into a direct symbol reference.
7329 On Darwin, this is necessary to avoid a crash, because Darwin
7330 has a different PIC label for each routine but the DWARF debugging
7331 information is not associated with any particular routine, so it's
7332 necessary to remove references to the PIC label from RTL stored by
7333 the DWARF output code. */
7336 ix86_delegitimize_address (rtx orig_x)
7339 /* reg_addend is NULL or a multiple of some register. */
7340 rtx reg_addend = NULL_RTX;
7341 /* const_addend is NULL or a const_int. */
7342 rtx const_addend = NULL_RTX;
7343 /* This is the result, or NULL. */
7344 rtx result = NULL_RTX;
7346 if (GET_CODE (x) == MEM)
7351 if (GET_CODE (x) != CONST
7352 || GET_CODE (XEXP (x, 0)) != UNSPEC
7353 || XINT (XEXP (x, 0), 1) != UNSPEC_GOTPCREL
7354 || GET_CODE (orig_x) != MEM)
7356 return XVECEXP (XEXP (x, 0), 0, 0);
7359 if (GET_CODE (x) != PLUS
7360 || GET_CODE (XEXP (x, 1)) != CONST)
7363 if (GET_CODE (XEXP (x, 0)) == REG
7364 && REGNO (XEXP (x, 0)) == PIC_OFFSET_TABLE_REGNUM)
7365 /* %ebx + GOT/GOTOFF */
7367 else if (GET_CODE (XEXP (x, 0)) == PLUS)
7369 /* %ebx + %reg * scale + GOT/GOTOFF */
7370 reg_addend = XEXP (x, 0);
7371 if (GET_CODE (XEXP (reg_addend, 0)) == REG
7372 && REGNO (XEXP (reg_addend, 0)) == PIC_OFFSET_TABLE_REGNUM)
7373 reg_addend = XEXP (reg_addend, 1);
7374 else if (GET_CODE (XEXP (reg_addend, 1)) == REG
7375 && REGNO (XEXP (reg_addend, 1)) == PIC_OFFSET_TABLE_REGNUM)
7376 reg_addend = XEXP (reg_addend, 0);
7379 if (GET_CODE (reg_addend) != REG
7380 && GET_CODE (reg_addend) != MULT
7381 && GET_CODE (reg_addend) != ASHIFT)
7387 x = XEXP (XEXP (x, 1), 0);
7388 if (GET_CODE (x) == PLUS
7389 && GET_CODE (XEXP (x, 1)) == CONST_INT)
7391 const_addend = XEXP (x, 1);
7395 if (GET_CODE (x) == UNSPEC
7396 && ((XINT (x, 1) == UNSPEC_GOT && GET_CODE (orig_x) == MEM)
7397 || (XINT (x, 1) == UNSPEC_GOTOFF && GET_CODE (orig_x) != MEM)))
7398 result = XVECEXP (x, 0, 0);
7400 if (TARGET_MACHO && darwin_local_data_pic (x)
7401 && GET_CODE (orig_x) != MEM)
7402 result = XEXP (x, 0);
7408 result = gen_rtx_PLUS (Pmode, result, const_addend);
7410 result = gen_rtx_PLUS (Pmode, reg_addend, result);
7415 put_condition_code (enum rtx_code code, enum machine_mode mode, int reverse,
7420 if (mode == CCFPmode || mode == CCFPUmode)
7422 enum rtx_code second_code, bypass_code;
7423 ix86_fp_comparison_codes (code, &bypass_code, &code, &second_code);
7424 gcc_assert (bypass_code == UNKNOWN && second_code == UNKNOWN);
7425 code = ix86_fp_compare_code_to_integer (code);
7429 code = reverse_condition (code);
7440 gcc_assert (mode == CCmode || mode == CCNOmode || mode == CCGCmode);
7444 /* ??? Use "nbe" instead of "a" for fcmov lossage on some assemblers.
7445 Those same assemblers have the same but opposite lossage on cmov. */
7446 gcc_assert (mode == CCmode);
7447 suffix = fp ? "nbe" : "a";
7467 gcc_assert (mode == CCmode);
7489 gcc_assert (mode == CCmode);
7490 suffix = fp ? "nb" : "ae";
7493 gcc_assert (mode == CCmode || mode == CCGCmode || mode == CCNOmode);
7497 gcc_assert (mode == CCmode);
7501 suffix = fp ? "u" : "p";
7504 suffix = fp ? "nu" : "np";
7509 fputs (suffix, file);
7512 /* Print the name of register X to FILE based on its machine mode and number.
7513 If CODE is 'w', pretend the mode is HImode.
7514 If CODE is 'b', pretend the mode is QImode.
7515 If CODE is 'k', pretend the mode is SImode.
7516 If CODE is 'q', pretend the mode is DImode.
7517 If CODE is 'h', pretend the reg is the 'high' byte register.
7518 If CODE is 'y', print "st(0)" instead of "st", if the reg is stack op. */
7521 print_reg (rtx x, int code, FILE *file)
7523 gcc_assert (REGNO (x) != ARG_POINTER_REGNUM
7524 && REGNO (x) != FRAME_POINTER_REGNUM
7525 && REGNO (x) != FLAGS_REG
7526 && REGNO (x) != FPSR_REG);
7528 if (ASSEMBLER_DIALECT == ASM_ATT || USER_LABEL_PREFIX[0] == 0)
7531 if (code == 'w' || MMX_REG_P (x))
7533 else if (code == 'b')
7535 else if (code == 'k')
7537 else if (code == 'q')
7539 else if (code == 'y')
7541 else if (code == 'h')
7544 code = GET_MODE_SIZE (GET_MODE (x));
7546 /* Irritatingly, AMD extended registers use different naming convention
7547 from the normal registers. */
7548 if (REX_INT_REG_P (x))
7550 gcc_assert (TARGET_64BIT);
7554 error ("extended registers have no high halves");
7557 fprintf (file, "r%ib", REGNO (x) - FIRST_REX_INT_REG + 8);
7560 fprintf (file, "r%iw", REGNO (x) - FIRST_REX_INT_REG + 8);
7563 fprintf (file, "r%id", REGNO (x) - FIRST_REX_INT_REG + 8);
7566 fprintf (file, "r%i", REGNO (x) - FIRST_REX_INT_REG + 8);
7569 error ("unsupported operand size for extended register");
7577 if (STACK_TOP_P (x))
7579 fputs ("st(0)", file);
7586 if (! ANY_FP_REG_P (x))
7587 putc (code == 8 && TARGET_64BIT ? 'r' : 'e', file);
7592 fputs (hi_reg_name[REGNO (x)], file);
7595 if (REGNO (x) >= ARRAY_SIZE (qi_reg_name))
7597 fputs (qi_reg_name[REGNO (x)], file);
7600 if (REGNO (x) >= ARRAY_SIZE (qi_high_reg_name))
7602 fputs (qi_high_reg_name[REGNO (x)], file);
7609 /* Locate some local-dynamic symbol still in use by this function
7610 so that we can print its name in some tls_local_dynamic_base
7614 get_some_local_dynamic_name (void)
7618 if (cfun->machine->some_ld_name)
7619 return cfun->machine->some_ld_name;
7621 for (insn = get_insns (); insn ; insn = NEXT_INSN (insn))
7623 && for_each_rtx (&PATTERN (insn), get_some_local_dynamic_name_1, 0))
7624 return cfun->machine->some_ld_name;
7630 get_some_local_dynamic_name_1 (rtx *px, void *data ATTRIBUTE_UNUSED)
7634 if (GET_CODE (x) == SYMBOL_REF
7635 && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_DYNAMIC)
7637 cfun->machine->some_ld_name = XSTR (x, 0);
7645 L,W,B,Q,S,T -- print the opcode suffix for specified size of operand.
7646 C -- print opcode suffix for set/cmov insn.
7647 c -- like C, but print reversed condition
7648 F,f -- likewise, but for floating-point.
7649 O -- if HAVE_AS_IX86_CMOV_SUN_SYNTAX, expand to "w.", "l." or "q.",
7651 R -- print the prefix for register names.
7652 z -- print the opcode suffix for the size of the current operand.
7653 * -- print a star (in certain assembler syntax)
7654 A -- print an absolute memory reference.
7655 w -- print the operand as if it's a "word" (HImode) even if it isn't.
7656 s -- print a shift double count, followed by the assemblers argument
7658 b -- print the QImode name of the register for the indicated operand.
7659 %b0 would print %al if operands[0] is reg 0.
7660 w -- likewise, print the HImode name of the register.
7661 k -- likewise, print the SImode name of the register.
7662 q -- likewise, print the DImode name of the register.
7663 h -- print the QImode name for a "high" register, either ah, bh, ch or dh.
7664 y -- print "st(0)" instead of "st" as a register.
7665 D -- print condition for SSE cmp instruction.
7666 P -- if PIC, print an @PLT suffix.
7667 X -- don't print any sort of PIC '@' suffix for a symbol.
7668 & -- print some in-use local-dynamic symbol name.
7669 H -- print a memory address offset by 8; used for sse high-parts
7673 print_operand (FILE *file, rtx x, int code)
7680 if (ASSEMBLER_DIALECT == ASM_ATT)
7685 assemble_name (file, get_some_local_dynamic_name ());
7689 switch (ASSEMBLER_DIALECT)
7696 /* Intel syntax. For absolute addresses, registers should not
7697 be surrounded by braces. */
7698 if (GET_CODE (x) != REG)
7701 PRINT_OPERAND (file, x, 0);
7711 PRINT_OPERAND (file, x, 0);
7716 if (ASSEMBLER_DIALECT == ASM_ATT)
7721 if (ASSEMBLER_DIALECT == ASM_ATT)
7726 if (ASSEMBLER_DIALECT == ASM_ATT)
7731 if (ASSEMBLER_DIALECT == ASM_ATT)
7736 if (ASSEMBLER_DIALECT == ASM_ATT)
7741 if (ASSEMBLER_DIALECT == ASM_ATT)
7746 /* 387 opcodes don't get size suffixes if the operands are
7748 if (STACK_REG_P (x))
7751 /* Likewise if using Intel opcodes. */
7752 if (ASSEMBLER_DIALECT == ASM_INTEL)
7755 /* This is the size of op from size of operand. */
7756 switch (GET_MODE_SIZE (GET_MODE (x)))
7759 #ifdef HAVE_GAS_FILDS_FISTS
7765 if (GET_MODE (x) == SFmode)
7780 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT)
7782 #ifdef GAS_MNEMONICS
7808 if (GET_CODE (x) == CONST_INT || ! SHIFT_DOUBLE_OMITS_COUNT)
7810 PRINT_OPERAND (file, x, 0);
7816 /* Little bit of braindamage here. The SSE compare instructions
7817 does use completely different names for the comparisons that the
7818 fp conditional moves. */
7819 switch (GET_CODE (x))
7834 fputs ("unord", file);
7838 fputs ("neq", file);
7842 fputs ("nlt", file);
7846 fputs ("nle", file);
7849 fputs ("ord", file);
7856 #ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
7857 if (ASSEMBLER_DIALECT == ASM_ATT)
7859 switch (GET_MODE (x))
7861 case HImode: putc ('w', file); break;
7863 case SFmode: putc ('l', file); break;
7865 case DFmode: putc ('q', file); break;
7866 default: gcc_unreachable ();
7873 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 0, 0, file);
7876 #ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
7877 if (ASSEMBLER_DIALECT == ASM_ATT)
7880 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 0, 1, file);
7883 /* Like above, but reverse condition */
7885 /* Check to see if argument to %c is really a constant
7886 and not a condition code which needs to be reversed. */
7887 if (!COMPARISON_P (x))
7889 output_operand_lossage ("operand is neither a constant nor a condition code, invalid operand code 'c'");
7892 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 1, 0, file);
7895 #ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
7896 if (ASSEMBLER_DIALECT == ASM_ATT)
7899 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 1, 1, file);
7903 /* It doesn't actually matter what mode we use here, as we're
7904 only going to use this for printing. */
7905 x = adjust_address_nv (x, DImode, 8);
7912 if (!optimize || optimize_size || !TARGET_BRANCH_PREDICTION_HINTS)
7915 x = find_reg_note (current_output_insn, REG_BR_PROB, 0);
7918 int pred_val = INTVAL (XEXP (x, 0));
7920 if (pred_val < REG_BR_PROB_BASE * 45 / 100
7921 || pred_val > REG_BR_PROB_BASE * 55 / 100)
7923 int taken = pred_val > REG_BR_PROB_BASE / 2;
7924 int cputaken = final_forward_branch_p (current_output_insn) == 0;
7926 /* Emit hints only in the case default branch prediction
7927 heuristics would fail. */
7928 if (taken != cputaken)
7930 /* We use 3e (DS) prefix for taken branches and
7931 2e (CS) prefix for not taken branches. */
7933 fputs ("ds ; ", file);
7935 fputs ("cs ; ", file);
7942 output_operand_lossage ("invalid operand code '%c'", code);
7946 if (GET_CODE (x) == REG)
7947 print_reg (x, code, file);
7949 else if (GET_CODE (x) == MEM)
7951 /* No `byte ptr' prefix for call instructions. */
7952 if (ASSEMBLER_DIALECT == ASM_INTEL && code != 'X' && code != 'P')
7955 switch (GET_MODE_SIZE (GET_MODE (x)))
7957 case 1: size = "BYTE"; break;
7958 case 2: size = "WORD"; break;
7959 case 4: size = "DWORD"; break;
7960 case 8: size = "QWORD"; break;
7961 case 12: size = "XWORD"; break;
7962 case 16: size = "XMMWORD"; break;
7967 /* Check for explicit size override (codes 'b', 'w' and 'k') */
7970 else if (code == 'w')
7972 else if (code == 'k')
7976 fputs (" PTR ", file);
7980 /* Avoid (%rip) for call operands. */
7981 if (CONSTANT_ADDRESS_P (x) && code == 'P'
7982 && GET_CODE (x) != CONST_INT)
7983 output_addr_const (file, x);
7984 else if (this_is_asm_operands && ! address_operand (x, VOIDmode))
7985 output_operand_lossage ("invalid constraints for operand");
7990 else if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) == SFmode)
7995 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
7996 REAL_VALUE_TO_TARGET_SINGLE (r, l);
7998 if (ASSEMBLER_DIALECT == ASM_ATT)
8000 fprintf (file, "0x%08lx", l);
8003 /* These float cases don't actually occur as immediate operands. */
8004 else if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) == DFmode)
8008 real_to_decimal (dstr, CONST_DOUBLE_REAL_VALUE (x), sizeof (dstr), 0, 1);
8009 fprintf (file, "%s", dstr);
8012 else if (GET_CODE (x) == CONST_DOUBLE
8013 && GET_MODE (x) == XFmode)
8017 real_to_decimal (dstr, CONST_DOUBLE_REAL_VALUE (x), sizeof (dstr), 0, 1);
8018 fprintf (file, "%s", dstr);
8023 /* We have patterns that allow zero sets of memory, for instance.
8024 In 64-bit mode, we should probably support all 8-byte vectors,
8025 since we can in fact encode that into an immediate. */
8026 if (GET_CODE (x) == CONST_VECTOR)
8028 gcc_assert (x == CONST0_RTX (GET_MODE (x)));
8034 if (GET_CODE (x) == CONST_INT || GET_CODE (x) == CONST_DOUBLE)
8036 if (ASSEMBLER_DIALECT == ASM_ATT)
8039 else if (GET_CODE (x) == CONST || GET_CODE (x) == SYMBOL_REF
8040 || GET_CODE (x) == LABEL_REF)
8042 if (ASSEMBLER_DIALECT == ASM_ATT)
8045 fputs ("OFFSET FLAT:", file);
8048 if (GET_CODE (x) == CONST_INT)
8049 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
8051 output_pic_addr_const (file, x, code);
8053 output_addr_const (file, x);
8057 /* Print a memory operand whose address is ADDR. */
8060 print_operand_address (FILE *file, rtx addr)
8062 struct ix86_address parts;
8063 rtx base, index, disp;
8065 int ok = ix86_decompose_address (addr, &parts);
8070 index = parts.index;
8072 scale = parts.scale;
8080 if (USER_LABEL_PREFIX[0] == 0)
8082 fputs ((parts.seg == SEG_FS ? "fs:" : "gs:"), file);
8088 if (!base && !index)
8090 /* Displacement only requires special attention. */
8092 if (GET_CODE (disp) == CONST_INT)
8094 if (ASSEMBLER_DIALECT == ASM_INTEL && parts.seg == SEG_DEFAULT)
8096 if (USER_LABEL_PREFIX[0] == 0)
8098 fputs ("ds:", file);
8100 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (disp));
8103 output_pic_addr_const (file, disp, 0);
8105 output_addr_const (file, disp);
8107 /* Use one byte shorter RIP relative addressing for 64bit mode. */
8110 if (GET_CODE (disp) == CONST
8111 && GET_CODE (XEXP (disp, 0)) == PLUS
8112 && GET_CODE (XEXP (XEXP (disp, 0), 1)) == CONST_INT)
8113 disp = XEXP (XEXP (disp, 0), 0);
8114 if (GET_CODE (disp) == LABEL_REF
8115 || (GET_CODE (disp) == SYMBOL_REF
8116 && SYMBOL_REF_TLS_MODEL (disp) == 0))
8117 fputs ("(%rip)", file);
8122 if (ASSEMBLER_DIALECT == ASM_ATT)
8127 output_pic_addr_const (file, disp, 0);
8128 else if (GET_CODE (disp) == LABEL_REF)
8129 output_asm_label (disp);
8131 output_addr_const (file, disp);
8136 print_reg (base, 0, file);
8140 print_reg (index, 0, file);
8142 fprintf (file, ",%d", scale);
8148 rtx offset = NULL_RTX;
8152 /* Pull out the offset of a symbol; print any symbol itself. */
8153 if (GET_CODE (disp) == CONST
8154 && GET_CODE (XEXP (disp, 0)) == PLUS
8155 && GET_CODE (XEXP (XEXP (disp, 0), 1)) == CONST_INT)
8157 offset = XEXP (XEXP (disp, 0), 1);
8158 disp = gen_rtx_CONST (VOIDmode,
8159 XEXP (XEXP (disp, 0), 0));
8163 output_pic_addr_const (file, disp, 0);
8164 else if (GET_CODE (disp) == LABEL_REF)
8165 output_asm_label (disp);
8166 else if (GET_CODE (disp) == CONST_INT)
8169 output_addr_const (file, disp);
8175 print_reg (base, 0, file);
8178 if (INTVAL (offset) >= 0)
8180 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (offset));
8184 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (offset));
8191 print_reg (index, 0, file);
8193 fprintf (file, "*%d", scale);
8201 output_addr_const_extra (FILE *file, rtx x)
8205 if (GET_CODE (x) != UNSPEC)
8208 op = XVECEXP (x, 0, 0);
8209 switch (XINT (x, 1))
8211 case UNSPEC_GOTTPOFF:
8212 output_addr_const (file, op);
8213 /* FIXME: This might be @TPOFF in Sun ld. */
8214 fputs ("@GOTTPOFF", file);
8217 output_addr_const (file, op);
8218 fputs ("@TPOFF", file);
8221 output_addr_const (file, op);
8223 fputs ("@TPOFF", file);
8225 fputs ("@NTPOFF", file);
8228 output_addr_const (file, op);
8229 fputs ("@DTPOFF", file);
8231 case UNSPEC_GOTNTPOFF:
8232 output_addr_const (file, op);
8234 fputs ("@GOTTPOFF(%rip)", file);
8236 fputs ("@GOTNTPOFF", file);
8238 case UNSPEC_INDNTPOFF:
8239 output_addr_const (file, op);
8240 fputs ("@INDNTPOFF", file);
8250 /* Split one or more DImode RTL references into pairs of SImode
8251 references. The RTL can be REG, offsettable MEM, integer constant, or
8252 CONST_DOUBLE. "operands" is a pointer to an array of DImode RTL to
8253 split and "num" is its length. lo_half and hi_half are output arrays
8254 that parallel "operands". */
8257 split_di (rtx operands[], int num, rtx lo_half[], rtx hi_half[])
8261 rtx op = operands[num];
8263 /* simplify_subreg refuse to split volatile memory addresses,
8264 but we still have to handle it. */
8265 if (GET_CODE (op) == MEM)
8267 lo_half[num] = adjust_address (op, SImode, 0);
8268 hi_half[num] = adjust_address (op, SImode, 4);
8272 lo_half[num] = simplify_gen_subreg (SImode, op,
8273 GET_MODE (op) == VOIDmode
8274 ? DImode : GET_MODE (op), 0);
8275 hi_half[num] = simplify_gen_subreg (SImode, op,
8276 GET_MODE (op) == VOIDmode
8277 ? DImode : GET_MODE (op), 4);
8281 /* Split one or more TImode RTL references into pairs of DImode
8282 references. The RTL can be REG, offsettable MEM, integer constant, or
8283 CONST_DOUBLE. "operands" is a pointer to an array of DImode RTL to
8284 split and "num" is its length. lo_half and hi_half are output arrays
8285 that parallel "operands". */
8288 split_ti (rtx operands[], int num, rtx lo_half[], rtx hi_half[])
8292 rtx op = operands[num];
8294 /* simplify_subreg refuse to split volatile memory addresses, but we
8295 still have to handle it. */
8296 if (GET_CODE (op) == MEM)
8298 lo_half[num] = adjust_address (op, DImode, 0);
8299 hi_half[num] = adjust_address (op, DImode, 8);
8303 lo_half[num] = simplify_gen_subreg (DImode, op, TImode, 0);
8304 hi_half[num] = simplify_gen_subreg (DImode, op, TImode, 8);
8309 /* Output code to perform a 387 binary operation in INSN, one of PLUS,
8310 MINUS, MULT or DIV. OPERANDS are the insn operands, where operands[3]
8311 is the expression of the binary operation. The output may either be
8312 emitted here, or returned to the caller, like all output_* functions.
8314 There is no guarantee that the operands are the same mode, as they
8315 might be within FLOAT or FLOAT_EXTEND expressions. */
8317 #ifndef SYSV386_COMPAT
8318 /* Set to 1 for compatibility with brain-damaged assemblers. No-one
8319 wants to fix the assemblers because that causes incompatibility
8320 with gcc. No-one wants to fix gcc because that causes
8321 incompatibility with assemblers... You can use the option of
8322 -DSYSV386_COMPAT=0 if you recompile both gcc and gas this way. */
8323 #define SYSV386_COMPAT 1
8327 output_387_binary_op (rtx insn, rtx *operands)
8329 static char buf[30];
8332 int is_sse = SSE_REG_P (operands[0]) || SSE_REG_P (operands[1]) || SSE_REG_P (operands[2]);
8334 #ifdef ENABLE_CHECKING
8335 /* Even if we do not want to check the inputs, this documents input
8336 constraints. Which helps in understanding the following code. */
8337 if (STACK_REG_P (operands[0])
8338 && ((REG_P (operands[1])
8339 && REGNO (operands[0]) == REGNO (operands[1])
8340 && (STACK_REG_P (operands[2]) || GET_CODE (operands[2]) == MEM))
8341 || (REG_P (operands[2])
8342 && REGNO (operands[0]) == REGNO (operands[2])
8343 && (STACK_REG_P (operands[1]) || GET_CODE (operands[1]) == MEM)))
8344 && (STACK_TOP_P (operands[1]) || STACK_TOP_P (operands[2])))
8347 gcc_assert (is_sse);
8350 switch (GET_CODE (operands[3]))
8353 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
8354 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
8362 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
8363 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
8371 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
8372 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
8380 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
8381 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
8395 if (GET_MODE (operands[0]) == SFmode)
8396 strcat (buf, "ss\t{%2, %0|%0, %2}");
8398 strcat (buf, "sd\t{%2, %0|%0, %2}");
8403 switch (GET_CODE (operands[3]))
8407 if (REG_P (operands[2]) && REGNO (operands[0]) == REGNO (operands[2]))
8409 rtx temp = operands[2];
8410 operands[2] = operands[1];
8414 /* know operands[0] == operands[1]. */
8416 if (GET_CODE (operands[2]) == MEM)
8422 if (find_regno_note (insn, REG_DEAD, REGNO (operands[2])))
8424 if (STACK_TOP_P (operands[0]))
8425 /* How is it that we are storing to a dead operand[2]?
8426 Well, presumably operands[1] is dead too. We can't
8427 store the result to st(0) as st(0) gets popped on this
8428 instruction. Instead store to operands[2] (which I
8429 think has to be st(1)). st(1) will be popped later.
8430 gcc <= 2.8.1 didn't have this check and generated
8431 assembly code that the Unixware assembler rejected. */
8432 p = "p\t{%0, %2|%2, %0}"; /* st(1) = st(0) op st(1); pop */
8434 p = "p\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0); pop */
8438 if (STACK_TOP_P (operands[0]))
8439 p = "\t{%y2, %0|%0, %y2}"; /* st(0) = st(0) op st(r2) */
8441 p = "\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0) */
8446 if (GET_CODE (operands[1]) == MEM)
8452 if (GET_CODE (operands[2]) == MEM)
8458 if (find_regno_note (insn, REG_DEAD, REGNO (operands[2])))
8461 /* The SystemV/386 SVR3.2 assembler, and probably all AT&T
8462 derived assemblers, confusingly reverse the direction of
8463 the operation for fsub{r} and fdiv{r} when the
8464 destination register is not st(0). The Intel assembler
8465 doesn't have this brain damage. Read !SYSV386_COMPAT to
8466 figure out what the hardware really does. */
8467 if (STACK_TOP_P (operands[0]))
8468 p = "{p\t%0, %2|rp\t%2, %0}";
8470 p = "{rp\t%2, %0|p\t%0, %2}";
8472 if (STACK_TOP_P (operands[0]))
8473 /* As above for fmul/fadd, we can't store to st(0). */
8474 p = "rp\t{%0, %2|%2, %0}"; /* st(1) = st(0) op st(1); pop */
8476 p = "p\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0); pop */
8481 if (find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
8484 if (STACK_TOP_P (operands[0]))
8485 p = "{rp\t%0, %1|p\t%1, %0}";
8487 p = "{p\t%1, %0|rp\t%0, %1}";
8489 if (STACK_TOP_P (operands[0]))
8490 p = "p\t{%0, %1|%1, %0}"; /* st(1) = st(1) op st(0); pop */
8492 p = "rp\t{%1, %0|%0, %1}"; /* st(r2) = st(0) op st(r2); pop */
8497 if (STACK_TOP_P (operands[0]))
8499 if (STACK_TOP_P (operands[1]))
8500 p = "\t{%y2, %0|%0, %y2}"; /* st(0) = st(0) op st(r2) */
8502 p = "r\t{%y1, %0|%0, %y1}"; /* st(0) = st(r1) op st(0) */
8505 else if (STACK_TOP_P (operands[1]))
8508 p = "{\t%1, %0|r\t%0, %1}";
8510 p = "r\t{%1, %0|%0, %1}"; /* st(r2) = st(0) op st(r2) */
8516 p = "{r\t%2, %0|\t%0, %2}";
8518 p = "\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0) */
8531 /* Return needed mode for entity in optimize_mode_switching pass. */
8534 ix86_mode_needed (int entity, rtx insn)
8536 enum attr_i387_cw mode;
8538 /* The mode UNINITIALIZED is used to store control word after a
8539 function call or ASM pattern. The mode ANY specify that function
8540 has no requirements on the control word and make no changes in the
8541 bits we are interested in. */
8544 || (NONJUMP_INSN_P (insn)
8545 && (asm_noperands (PATTERN (insn)) >= 0
8546 || GET_CODE (PATTERN (insn)) == ASM_INPUT)))
8547 return I387_CW_UNINITIALIZED;
8549 if (recog_memoized (insn) < 0)
8552 mode = get_attr_i387_cw (insn);
8557 if (mode == I387_CW_TRUNC)
8562 if (mode == I387_CW_FLOOR)
8567 if (mode == I387_CW_CEIL)
8572 if (mode == I387_CW_MASK_PM)
8583 /* Output code to initialize control word copies used by trunc?f?i and
8584 rounding patterns. CURRENT_MODE is set to current control word,
8585 while NEW_MODE is set to new control word. */
8588 emit_i387_cw_initialization (int mode)
8590 rtx stored_mode = assign_386_stack_local (HImode, SLOT_CW_STORED);
8595 rtx reg = gen_reg_rtx (HImode);
8597 emit_insn (gen_x86_fnstcw_1 (stored_mode));
8598 emit_move_insn (reg, stored_mode);
8600 if (TARGET_64BIT || TARGET_PARTIAL_REG_STALL || optimize_size)
8605 /* round toward zero (truncate) */
8606 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0c00)));
8607 slot = SLOT_CW_TRUNC;
8611 /* round down toward -oo */
8612 emit_insn (gen_andhi3 (reg, reg, GEN_INT (~0x0c00)));
8613 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0400)));
8614 slot = SLOT_CW_FLOOR;
8618 /* round up toward +oo */
8619 emit_insn (gen_andhi3 (reg, reg, GEN_INT (~0x0c00)));
8620 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0800)));
8621 slot = SLOT_CW_CEIL;
8624 case I387_CW_MASK_PM:
8625 /* mask precision exception for nearbyint() */
8626 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0020)));
8627 slot = SLOT_CW_MASK_PM;
8639 /* round toward zero (truncate) */
8640 emit_insn (gen_movsi_insv_1 (reg, GEN_INT (0xc)));
8641 slot = SLOT_CW_TRUNC;
8645 /* round down toward -oo */
8646 emit_insn (gen_movsi_insv_1 (reg, GEN_INT (0x4)));
8647 slot = SLOT_CW_FLOOR;
8651 /* round up toward +oo */
8652 emit_insn (gen_movsi_insv_1 (reg, GEN_INT (0x8)));
8653 slot = SLOT_CW_CEIL;
8656 case I387_CW_MASK_PM:
8657 /* mask precision exception for nearbyint() */
8658 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0020)));
8659 slot = SLOT_CW_MASK_PM;
8667 gcc_assert (slot < MAX_386_STACK_LOCALS);
8669 new_mode = assign_386_stack_local (HImode, slot);
8670 emit_move_insn (new_mode, reg);
8673 /* Output code for INSN to convert a float to a signed int. OPERANDS
8674 are the insn operands. The output may be [HSD]Imode and the input
8675 operand may be [SDX]Fmode. */
8678 output_fix_trunc (rtx insn, rtx *operands, int fisttp)
8680 int stack_top_dies = find_regno_note (insn, REG_DEAD, FIRST_STACK_REG) != 0;
8681 int dimode_p = GET_MODE (operands[0]) == DImode;
8682 int round_mode = get_attr_i387_cw (insn);
8684 /* Jump through a hoop or two for DImode, since the hardware has no
8685 non-popping instruction. We used to do this a different way, but
8686 that was somewhat fragile and broke with post-reload splitters. */
8687 if ((dimode_p || fisttp) && !stack_top_dies)
8688 output_asm_insn ("fld\t%y1", operands);
8690 gcc_assert (STACK_TOP_P (operands[1]));
8691 gcc_assert (GET_CODE (operands[0]) == MEM);
8694 output_asm_insn ("fisttp%z0\t%0", operands);
8697 if (round_mode != I387_CW_ANY)
8698 output_asm_insn ("fldcw\t%3", operands);
8699 if (stack_top_dies || dimode_p)
8700 output_asm_insn ("fistp%z0\t%0", operands);
8702 output_asm_insn ("fist%z0\t%0", operands);
8703 if (round_mode != I387_CW_ANY)
8704 output_asm_insn ("fldcw\t%2", operands);
8710 /* Output code for x87 ffreep insn. The OPNO argument, which may only
8711 have the values zero or one, indicates the ffreep insn's operand
8712 from the OPERANDS array. */
8715 output_387_ffreep (rtx *operands ATTRIBUTE_UNUSED, int opno)
8717 if (TARGET_USE_FFREEP)
8718 #if HAVE_AS_IX86_FFREEP
8719 return opno ? "ffreep\t%y1" : "ffreep\t%y0";
8721 switch (REGNO (operands[opno]))
8723 case FIRST_STACK_REG + 0: return ".word\t0xc0df";
8724 case FIRST_STACK_REG + 1: return ".word\t0xc1df";
8725 case FIRST_STACK_REG + 2: return ".word\t0xc2df";
8726 case FIRST_STACK_REG + 3: return ".word\t0xc3df";
8727 case FIRST_STACK_REG + 4: return ".word\t0xc4df";
8728 case FIRST_STACK_REG + 5: return ".word\t0xc5df";
8729 case FIRST_STACK_REG + 6: return ".word\t0xc6df";
8730 case FIRST_STACK_REG + 7: return ".word\t0xc7df";
8734 return opno ? "fstp\t%y1" : "fstp\t%y0";
8738 /* Output code for INSN to compare OPERANDS. EFLAGS_P is 1 when fcomi
8739 should be used. UNORDERED_P is true when fucom should be used. */
8742 output_fp_compare (rtx insn, rtx *operands, int eflags_p, int unordered_p)
8745 rtx cmp_op0, cmp_op1;
8746 int is_sse = SSE_REG_P (operands[0]) || SSE_REG_P (operands[1]);
8750 cmp_op0 = operands[0];
8751 cmp_op1 = operands[1];
8755 cmp_op0 = operands[1];
8756 cmp_op1 = operands[2];
8761 if (GET_MODE (operands[0]) == SFmode)
8763 return "ucomiss\t{%1, %0|%0, %1}";
8765 return "comiss\t{%1, %0|%0, %1}";
8768 return "ucomisd\t{%1, %0|%0, %1}";
8770 return "comisd\t{%1, %0|%0, %1}";
8773 gcc_assert (STACK_TOP_P (cmp_op0));
8775 stack_top_dies = find_regno_note (insn, REG_DEAD, FIRST_STACK_REG) != 0;
8777 if (cmp_op1 == CONST0_RTX (GET_MODE (cmp_op1)))
8781 output_asm_insn ("ftst\n\tfnstsw\t%0", operands);
8782 return output_387_ffreep (operands, 1);
8785 return "ftst\n\tfnstsw\t%0";
8788 if (STACK_REG_P (cmp_op1)
8790 && find_regno_note (insn, REG_DEAD, REGNO (cmp_op1))
8791 && REGNO (cmp_op1) != FIRST_STACK_REG)
8793 /* If both the top of the 387 stack dies, and the other operand
8794 is also a stack register that dies, then this must be a
8795 `fcompp' float compare */
8799 /* There is no double popping fcomi variant. Fortunately,
8800 eflags is immune from the fstp's cc clobbering. */
8802 output_asm_insn ("fucomip\t{%y1, %0|%0, %y1}", operands);
8804 output_asm_insn ("fcomip\t{%y1, %0|%0, %y1}", operands);
8805 return output_387_ffreep (operands, 0);
8810 return "fucompp\n\tfnstsw\t%0";
8812 return "fcompp\n\tfnstsw\t%0";
8817 /* Encoded here as eflags_p | intmode | unordered_p | stack_top_dies. */
8819 static const char * const alt[16] =
8821 "fcom%z2\t%y2\n\tfnstsw\t%0",
8822 "fcomp%z2\t%y2\n\tfnstsw\t%0",
8823 "fucom%z2\t%y2\n\tfnstsw\t%0",
8824 "fucomp%z2\t%y2\n\tfnstsw\t%0",
8826 "ficom%z2\t%y2\n\tfnstsw\t%0",
8827 "ficomp%z2\t%y2\n\tfnstsw\t%0",
8831 "fcomi\t{%y1, %0|%0, %y1}",
8832 "fcomip\t{%y1, %0|%0, %y1}",
8833 "fucomi\t{%y1, %0|%0, %y1}",
8834 "fucomip\t{%y1, %0|%0, %y1}",
8845 mask = eflags_p << 3;
8846 mask |= (GET_MODE_CLASS (GET_MODE (cmp_op1)) == MODE_INT) << 2;
8847 mask |= unordered_p << 1;
8848 mask |= stack_top_dies;
8850 gcc_assert (mask < 16);
8859 ix86_output_addr_vec_elt (FILE *file, int value)
8861 const char *directive = ASM_LONG;
8865 directive = ASM_QUAD;
8867 gcc_assert (!TARGET_64BIT);
8870 fprintf (file, "%s%s%d\n", directive, LPREFIX, value);
8874 ix86_output_addr_diff_elt (FILE *file, int value, int rel)
8877 fprintf (file, "%s%s%d-%s%d\n",
8878 ASM_LONG, LPREFIX, value, LPREFIX, rel);
8879 else if (HAVE_AS_GOTOFF_IN_DATA)
8880 fprintf (file, "%s%s%d@GOTOFF\n", ASM_LONG, LPREFIX, value);
8882 else if (TARGET_MACHO)
8884 fprintf (file, "%s%s%d-", ASM_LONG, LPREFIX, value);
8885 machopic_output_function_base_name (file);
8886 fprintf(file, "\n");
8890 asm_fprintf (file, "%s%U%s+[.-%s%d]\n",
8891 ASM_LONG, GOT_SYMBOL_NAME, LPREFIX, value);
8894 /* Generate either "mov $0, reg" or "xor reg, reg", as appropriate
8898 ix86_expand_clear (rtx dest)
8902 /* We play register width games, which are only valid after reload. */
8903 gcc_assert (reload_completed);
8905 /* Avoid HImode and its attendant prefix byte. */
8906 if (GET_MODE_SIZE (GET_MODE (dest)) < 4)
8907 dest = gen_rtx_REG (SImode, REGNO (dest));
8909 tmp = gen_rtx_SET (VOIDmode, dest, const0_rtx);
8911 /* This predicate should match that for movsi_xor and movdi_xor_rex64. */
8912 if (reload_completed && (!TARGET_USE_MOV0 || optimize_size))
8914 rtx clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, 17));
8915 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, tmp, clob));
8921 /* X is an unchanging MEM. If it is a constant pool reference, return
8922 the constant pool rtx, else NULL. */
8925 maybe_get_pool_constant (rtx x)
8927 x = ix86_delegitimize_address (XEXP (x, 0));
8929 if (GET_CODE (x) == SYMBOL_REF && CONSTANT_POOL_ADDRESS_P (x))
8930 return get_pool_constant (x);
8936 ix86_expand_move (enum machine_mode mode, rtx operands[])
8938 int strict = (reload_in_progress || reload_completed);
8940 enum tls_model model;
8945 if (GET_CODE (op1) == SYMBOL_REF)
8947 model = SYMBOL_REF_TLS_MODEL (op1);
8950 op1 = legitimize_tls_address (op1, model, true);
8951 op1 = force_operand (op1, op0);
8956 else if (GET_CODE (op1) == CONST
8957 && GET_CODE (XEXP (op1, 0)) == PLUS
8958 && GET_CODE (XEXP (XEXP (op1, 0), 0)) == SYMBOL_REF)
8960 model = SYMBOL_REF_TLS_MODEL (XEXP (XEXP (op1, 0), 0));
8963 rtx addend = XEXP (XEXP (op1, 0), 1);
8964 op1 = legitimize_tls_address (XEXP (XEXP (op1, 0), 0), model, true);
8965 op1 = force_operand (op1, NULL);
8966 op1 = expand_simple_binop (Pmode, PLUS, op1, addend,
8967 op0, 1, OPTAB_DIRECT);
8973 if (flag_pic && mode == Pmode && symbolic_operand (op1, Pmode))
8975 if (TARGET_MACHO && !TARGET_64BIT)
8980 rtx temp = ((reload_in_progress
8981 || ((op0 && GET_CODE (op0) == REG)
8983 ? op0 : gen_reg_rtx (Pmode));
8984 op1 = machopic_indirect_data_reference (op1, temp);
8985 op1 = machopic_legitimize_pic_address (op1, mode,
8986 temp == op1 ? 0 : temp);
8988 else if (MACHOPIC_INDIRECT)
8989 op1 = machopic_indirect_data_reference (op1, 0);
8996 if (GET_CODE (op0) == MEM)
8997 op1 = force_reg (Pmode, op1);
8999 op1 = legitimize_address (op1, op1, Pmode);
9004 if (GET_CODE (op0) == MEM
9005 && (PUSH_ROUNDING (GET_MODE_SIZE (mode)) != GET_MODE_SIZE (mode)
9006 || !push_operand (op0, mode))
9007 && GET_CODE (op1) == MEM)
9008 op1 = force_reg (mode, op1);
9010 if (push_operand (op0, mode)
9011 && ! general_no_elim_operand (op1, mode))
9012 op1 = copy_to_mode_reg (mode, op1);
9014 /* Force large constants in 64bit compilation into register
9015 to get them CSEed. */
9016 if (TARGET_64BIT && mode == DImode
9017 && immediate_operand (op1, mode)
9018 && !x86_64_zext_immediate_operand (op1, VOIDmode)
9019 && !register_operand (op0, mode)
9020 && optimize && !reload_completed && !reload_in_progress)
9021 op1 = copy_to_mode_reg (mode, op1);
9023 if (FLOAT_MODE_P (mode))
9025 /* If we are loading a floating point constant to a register,
9026 force the value to memory now, since we'll get better code
9027 out the back end. */
9031 else if (GET_CODE (op1) == CONST_DOUBLE)
9033 op1 = validize_mem (force_const_mem (mode, op1));
9034 if (!register_operand (op0, mode))
9036 rtx temp = gen_reg_rtx (mode);
9037 emit_insn (gen_rtx_SET (VOIDmode, temp, op1));
9038 emit_move_insn (op0, temp);
9045 emit_insn (gen_rtx_SET (VOIDmode, op0, op1));
9049 ix86_expand_vector_move (enum machine_mode mode, rtx operands[])
9051 rtx op0 = operands[0], op1 = operands[1];
9053 /* Force constants other than zero into memory. We do not know how
9054 the instructions used to build constants modify the upper 64 bits
9055 of the register, once we have that information we may be able
9056 to handle some of them more efficiently. */
9057 if ((reload_in_progress | reload_completed) == 0
9058 && register_operand (op0, mode)
9060 && standard_sse_constant_p (op1) <= 0)
9061 op1 = validize_mem (force_const_mem (mode, op1));
9063 /* Make operand1 a register if it isn't already. */
9065 && !register_operand (op0, mode)
9066 && !register_operand (op1, mode))
9068 emit_move_insn (op0, force_reg (GET_MODE (op0), op1));
9072 emit_insn (gen_rtx_SET (VOIDmode, op0, op1));
9075 /* Implement the movmisalign patterns for SSE. Non-SSE modes go
9076 straight to ix86_expand_vector_move. */
9079 ix86_expand_vector_move_misalign (enum machine_mode mode, rtx operands[])
9088 /* If we're optimizing for size, movups is the smallest. */
9091 op0 = gen_lowpart (V4SFmode, op0);
9092 op1 = gen_lowpart (V4SFmode, op1);
9093 emit_insn (gen_sse_movups (op0, op1));
9097 /* ??? If we have typed data, then it would appear that using
9098 movdqu is the only way to get unaligned data loaded with
9100 if (TARGET_SSE2 && GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
9102 op0 = gen_lowpart (V16QImode, op0);
9103 op1 = gen_lowpart (V16QImode, op1);
9104 emit_insn (gen_sse2_movdqu (op0, op1));
9108 if (TARGET_SSE2 && mode == V2DFmode)
9112 /* When SSE registers are split into halves, we can avoid
9113 writing to the top half twice. */
9114 if (TARGET_SSE_SPLIT_REGS)
9116 emit_insn (gen_rtx_CLOBBER (VOIDmode, op0));
9121 /* ??? Not sure about the best option for the Intel chips.
9122 The following would seem to satisfy; the register is
9123 entirely cleared, breaking the dependency chain. We
9124 then store to the upper half, with a dependency depth
9125 of one. A rumor has it that Intel recommends two movsd
9126 followed by an unpacklpd, but this is unconfirmed. And
9127 given that the dependency depth of the unpacklpd would
9128 still be one, I'm not sure why this would be better. */
9129 zero = CONST0_RTX (V2DFmode);
9132 m = adjust_address (op1, DFmode, 0);
9133 emit_insn (gen_sse2_loadlpd (op0, zero, m));
9134 m = adjust_address (op1, DFmode, 8);
9135 emit_insn (gen_sse2_loadhpd (op0, op0, m));
9139 if (TARGET_SSE_PARTIAL_REG_DEPENDENCY)
9140 emit_move_insn (op0, CONST0_RTX (mode));
9142 emit_insn (gen_rtx_CLOBBER (VOIDmode, op0));
9144 if (mode != V4SFmode)
9145 op0 = gen_lowpart (V4SFmode, op0);
9146 m = adjust_address (op1, V2SFmode, 0);
9147 emit_insn (gen_sse_loadlps (op0, op0, m));
9148 m = adjust_address (op1, V2SFmode, 8);
9149 emit_insn (gen_sse_loadhps (op0, op0, m));
9152 else if (MEM_P (op0))
9154 /* If we're optimizing for size, movups is the smallest. */
9157 op0 = gen_lowpart (V4SFmode, op0);
9158 op1 = gen_lowpart (V4SFmode, op1);
9159 emit_insn (gen_sse_movups (op0, op1));
9163 /* ??? Similar to above, only less clear because of quote
9164 typeless stores unquote. */
9165 if (TARGET_SSE2 && !TARGET_SSE_TYPELESS_STORES
9166 && GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
9168 op0 = gen_lowpart (V16QImode, op0);
9169 op1 = gen_lowpart (V16QImode, op1);
9170 emit_insn (gen_sse2_movdqu (op0, op1));
9174 if (TARGET_SSE2 && mode == V2DFmode)
9176 m = adjust_address (op0, DFmode, 0);
9177 emit_insn (gen_sse2_storelpd (m, op1));
9178 m = adjust_address (op0, DFmode, 8);
9179 emit_insn (gen_sse2_storehpd (m, op1));
9183 if (mode != V4SFmode)
9184 op1 = gen_lowpart (V4SFmode, op1);
9185 m = adjust_address (op0, V2SFmode, 0);
9186 emit_insn (gen_sse_storelps (m, op1));
9187 m = adjust_address (op0, V2SFmode, 8);
9188 emit_insn (gen_sse_storehps (m, op1));
9195 /* Expand a push in MODE. This is some mode for which we do not support
9196 proper push instructions, at least from the registers that we expect
9197 the value to live in. */
9200 ix86_expand_push (enum machine_mode mode, rtx x)
9204 tmp = expand_simple_binop (Pmode, PLUS, stack_pointer_rtx,
9205 GEN_INT (-GET_MODE_SIZE (mode)),
9206 stack_pointer_rtx, 1, OPTAB_DIRECT);
9207 if (tmp != stack_pointer_rtx)
9208 emit_move_insn (stack_pointer_rtx, tmp);
9210 tmp = gen_rtx_MEM (mode, stack_pointer_rtx);
9211 emit_move_insn (tmp, x);
9214 /* Fix up OPERANDS to satisfy ix86_binary_operator_ok. Return the
9215 destination to use for the operation. If different from the true
9216 destination in operands[0], a copy operation will be required. */
9219 ix86_fixup_binary_operands (enum rtx_code code, enum machine_mode mode,
9222 int matching_memory;
9223 rtx src1, src2, dst;
9229 /* Recognize <var1> = <value> <op> <var1> for commutative operators */
9230 if (GET_RTX_CLASS (code) == RTX_COMM_ARITH
9231 && (rtx_equal_p (dst, src2)
9232 || immediate_operand (src1, mode)))
9239 /* If the destination is memory, and we do not have matching source
9240 operands, do things in registers. */
9241 matching_memory = 0;
9242 if (GET_CODE (dst) == MEM)
9244 if (rtx_equal_p (dst, src1))
9245 matching_memory = 1;
9246 else if (GET_RTX_CLASS (code) == RTX_COMM_ARITH
9247 && rtx_equal_p (dst, src2))
9248 matching_memory = 2;
9250 dst = gen_reg_rtx (mode);
9253 /* Both source operands cannot be in memory. */
9254 if (GET_CODE (src1) == MEM && GET_CODE (src2) == MEM)
9256 if (matching_memory != 2)
9257 src2 = force_reg (mode, src2);
9259 src1 = force_reg (mode, src1);
9262 /* If the operation is not commutable, source 1 cannot be a constant
9263 or non-matching memory. */
9264 if ((CONSTANT_P (src1)
9265 || (!matching_memory && GET_CODE (src1) == MEM))
9266 && GET_RTX_CLASS (code) != RTX_COMM_ARITH)
9267 src1 = force_reg (mode, src1);
9269 src1 = operands[1] = src1;
9270 src2 = operands[2] = src2;
9274 /* Similarly, but assume that the destination has already been
9278 ix86_fixup_binary_operands_no_copy (enum rtx_code code,
9279 enum machine_mode mode, rtx operands[])
9281 rtx dst = ix86_fixup_binary_operands (code, mode, operands);
9282 gcc_assert (dst == operands[0]);
9285 /* Attempt to expand a binary operator. Make the expansion closer to the
9286 actual machine, then just general_operand, which will allow 3 separate
9287 memory references (one output, two input) in a single insn. */
9290 ix86_expand_binary_operator (enum rtx_code code, enum machine_mode mode,
9293 rtx src1, src2, dst, op, clob;
9295 dst = ix86_fixup_binary_operands (code, mode, operands);
9299 /* Emit the instruction. */
9301 op = gen_rtx_SET (VOIDmode, dst, gen_rtx_fmt_ee (code, mode, src1, src2));
9302 if (reload_in_progress)
9304 /* Reload doesn't know about the flags register, and doesn't know that
9305 it doesn't want to clobber it. We can only do this with PLUS. */
9306 gcc_assert (code == PLUS);
9311 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
9312 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clob)));
9315 /* Fix up the destination if needed. */
9316 if (dst != operands[0])
9317 emit_move_insn (operands[0], dst);
9320 /* Return TRUE or FALSE depending on whether the binary operator meets the
9321 appropriate constraints. */
9324 ix86_binary_operator_ok (enum rtx_code code,
9325 enum machine_mode mode ATTRIBUTE_UNUSED,
9328 /* Both source operands cannot be in memory. */
9329 if (GET_CODE (operands[1]) == MEM && GET_CODE (operands[2]) == MEM)
9331 /* If the operation is not commutable, source 1 cannot be a constant. */
9332 if (CONSTANT_P (operands[1]) && GET_RTX_CLASS (code) != RTX_COMM_ARITH)
9334 /* If the destination is memory, we must have a matching source operand. */
9335 if (GET_CODE (operands[0]) == MEM
9336 && ! (rtx_equal_p (operands[0], operands[1])
9337 || (GET_RTX_CLASS (code) == RTX_COMM_ARITH
9338 && rtx_equal_p (operands[0], operands[2]))))
9340 /* If the operation is not commutable and the source 1 is memory, we must
9341 have a matching destination. */
9342 if (GET_CODE (operands[1]) == MEM
9343 && GET_RTX_CLASS (code) != RTX_COMM_ARITH
9344 && ! rtx_equal_p (operands[0], operands[1]))
9349 /* Attempt to expand a unary operator. Make the expansion closer to the
9350 actual machine, then just general_operand, which will allow 2 separate
9351 memory references (one output, one input) in a single insn. */
9354 ix86_expand_unary_operator (enum rtx_code code, enum machine_mode mode,
9357 int matching_memory;
9358 rtx src, dst, op, clob;
9363 /* If the destination is memory, and we do not have matching source
9364 operands, do things in registers. */
9365 matching_memory = 0;
9368 if (rtx_equal_p (dst, src))
9369 matching_memory = 1;
9371 dst = gen_reg_rtx (mode);
9374 /* When source operand is memory, destination must match. */
9375 if (MEM_P (src) && !matching_memory)
9376 src = force_reg (mode, src);
9378 /* Emit the instruction. */
9380 op = gen_rtx_SET (VOIDmode, dst, gen_rtx_fmt_e (code, mode, src));
9381 if (reload_in_progress || code == NOT)
9383 /* Reload doesn't know about the flags register, and doesn't know that
9384 it doesn't want to clobber it. */
9385 gcc_assert (code == NOT);
9390 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
9391 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clob)));
9394 /* Fix up the destination if needed. */
9395 if (dst != operands[0])
9396 emit_move_insn (operands[0], dst);
9399 /* Return TRUE or FALSE depending on whether the unary operator meets the
9400 appropriate constraints. */
9403 ix86_unary_operator_ok (enum rtx_code code ATTRIBUTE_UNUSED,
9404 enum machine_mode mode ATTRIBUTE_UNUSED,
9405 rtx operands[2] ATTRIBUTE_UNUSED)
9407 /* If one of operands is memory, source and destination must match. */
9408 if ((GET_CODE (operands[0]) == MEM
9409 || GET_CODE (operands[1]) == MEM)
9410 && ! rtx_equal_p (operands[0], operands[1]))
9415 /* A subroutine of ix86_expand_fp_absneg_operator and copysign expanders.
9416 Create a mask for the sign bit in MODE for an SSE register. If VECT is
9417 true, then replicate the mask for all elements of the vector register.
9418 If INVERT is true, then create a mask excluding the sign bit. */
9421 ix86_build_signbit_mask (enum machine_mode mode, bool vect, bool invert)
9423 enum machine_mode vec_mode;
9424 HOST_WIDE_INT hi, lo;
9429 /* Find the sign bit, sign extended to 2*HWI. */
9431 lo = 0x80000000, hi = lo < 0;
9432 else if (HOST_BITS_PER_WIDE_INT >= 64)
9433 lo = (HOST_WIDE_INT)1 << shift, hi = -1;
9435 lo = 0, hi = (HOST_WIDE_INT)1 << (shift - HOST_BITS_PER_WIDE_INT);
9440 /* Force this value into the low part of a fp vector constant. */
9441 mask = immed_double_const (lo, hi, mode == SFmode ? SImode : DImode);
9442 mask = gen_lowpart (mode, mask);
9447 v = gen_rtvec (4, mask, mask, mask, mask);
9449 v = gen_rtvec (4, mask, CONST0_RTX (SFmode),
9450 CONST0_RTX (SFmode), CONST0_RTX (SFmode));
9451 vec_mode = V4SFmode;
9456 v = gen_rtvec (2, mask, mask);
9458 v = gen_rtvec (2, mask, CONST0_RTX (DFmode));
9459 vec_mode = V2DFmode;
9462 return force_reg (vec_mode, gen_rtx_CONST_VECTOR (vec_mode, v));
9465 /* Generate code for floating point ABS or NEG. */
9468 ix86_expand_fp_absneg_operator (enum rtx_code code, enum machine_mode mode,
9471 rtx mask, set, use, clob, dst, src;
9472 bool matching_memory;
9473 bool use_sse = false;
9474 bool vector_mode = VECTOR_MODE_P (mode);
9475 enum machine_mode elt_mode = mode;
9479 elt_mode = GET_MODE_INNER (mode);
9482 else if (TARGET_SSE_MATH)
9483 use_sse = SSE_FLOAT_MODE_P (mode);
9485 /* NEG and ABS performed with SSE use bitwise mask operations.
9486 Create the appropriate mask now. */
9488 mask = ix86_build_signbit_mask (elt_mode, vector_mode, code == ABS);
9495 /* If the destination is memory, and we don't have matching source
9496 operands or we're using the x87, do things in registers. */
9497 matching_memory = false;
9500 if (use_sse && rtx_equal_p (dst, src))
9501 matching_memory = true;
9503 dst = gen_reg_rtx (mode);
9505 if (MEM_P (src) && !matching_memory)
9506 src = force_reg (mode, src);
9510 set = gen_rtx_fmt_ee (code == NEG ? XOR : AND, mode, src, mask);
9511 set = gen_rtx_SET (VOIDmode, dst, set);
9516 set = gen_rtx_fmt_e (code, mode, src);
9517 set = gen_rtx_SET (VOIDmode, dst, set);
9520 use = gen_rtx_USE (VOIDmode, mask);
9521 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
9522 emit_insn (gen_rtx_PARALLEL (VOIDmode,
9523 gen_rtvec (3, set, use, clob)));
9529 if (dst != operands[0])
9530 emit_move_insn (operands[0], dst);
9533 /* Expand a copysign operation. Special case operand 0 being a constant. */
9536 ix86_expand_copysign (rtx operands[])
9538 enum machine_mode mode, vmode;
9539 rtx dest, op0, op1, mask, nmask;
9545 mode = GET_MODE (dest);
9546 vmode = mode == SFmode ? V4SFmode : V2DFmode;
9548 if (GET_CODE (op0) == CONST_DOUBLE)
9552 if (real_isneg (CONST_DOUBLE_REAL_VALUE (op0)))
9553 op0 = simplify_unary_operation (ABS, mode, op0, mode);
9555 if (op0 == CONST0_RTX (mode))
9556 op0 = CONST0_RTX (vmode);
9560 v = gen_rtvec (4, op0, CONST0_RTX (SFmode),
9561 CONST0_RTX (SFmode), CONST0_RTX (SFmode));
9563 v = gen_rtvec (2, op0, CONST0_RTX (DFmode));
9564 op0 = force_reg (vmode, gen_rtx_CONST_VECTOR (vmode, v));
9567 mask = ix86_build_signbit_mask (mode, 0, 0);
9570 emit_insn (gen_copysignsf3_const (dest, op0, op1, mask));
9572 emit_insn (gen_copysigndf3_const (dest, op0, op1, mask));
9576 nmask = ix86_build_signbit_mask (mode, 0, 1);
9577 mask = ix86_build_signbit_mask (mode, 0, 0);
9580 emit_insn (gen_copysignsf3_var (dest, NULL, op0, op1, nmask, mask));
9582 emit_insn (gen_copysigndf3_var (dest, NULL, op0, op1, nmask, mask));
9586 /* Deconstruct a copysign operation into bit masks. Operand 0 is known to
9587 be a constant, and so has already been expanded into a vector constant. */
9590 ix86_split_copysign_const (rtx operands[])
9592 enum machine_mode mode, vmode;
9593 rtx dest, op0, op1, mask, x;
9600 mode = GET_MODE (dest);
9601 vmode = GET_MODE (mask);
9603 dest = simplify_gen_subreg (vmode, dest, mode, 0);
9604 x = gen_rtx_AND (vmode, dest, mask);
9605 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
9607 if (op0 != CONST0_RTX (vmode))
9609 x = gen_rtx_IOR (vmode, dest, op0);
9610 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
9614 /* Deconstruct a copysign operation into bit masks. Operand 0 is variable,
9615 so we have to do two masks. */
9618 ix86_split_copysign_var (rtx operands[])
9620 enum machine_mode mode, vmode;
9621 rtx dest, scratch, op0, op1, mask, nmask, x;
9624 scratch = operands[1];
9627 nmask = operands[4];
9630 mode = GET_MODE (dest);
9631 vmode = GET_MODE (mask);
9633 if (rtx_equal_p (op0, op1))
9635 /* Shouldn't happen often (it's useless, obviously), but when it does
9636 we'd generate incorrect code if we continue below. */
9637 emit_move_insn (dest, op0);
9641 if (REG_P (mask) && REGNO (dest) == REGNO (mask)) /* alternative 0 */
9643 gcc_assert (REGNO (op1) == REGNO (scratch));
9645 x = gen_rtx_AND (vmode, scratch, mask);
9646 emit_insn (gen_rtx_SET (VOIDmode, scratch, x));
9649 op0 = simplify_gen_subreg (vmode, op0, mode, 0);
9650 x = gen_rtx_NOT (vmode, dest);
9651 x = gen_rtx_AND (vmode, x, op0);
9652 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
9656 if (REGNO (op1) == REGNO (scratch)) /* alternative 1,3 */
9658 x = gen_rtx_AND (vmode, scratch, mask);
9660 else /* alternative 2,4 */
9662 gcc_assert (REGNO (mask) == REGNO (scratch));
9663 op1 = simplify_gen_subreg (vmode, op1, mode, 0);
9664 x = gen_rtx_AND (vmode, scratch, op1);
9666 emit_insn (gen_rtx_SET (VOIDmode, scratch, x));
9668 if (REGNO (op0) == REGNO (dest)) /* alternative 1,2 */
9670 dest = simplify_gen_subreg (vmode, op0, mode, 0);
9671 x = gen_rtx_AND (vmode, dest, nmask);
9673 else /* alternative 3,4 */
9675 gcc_assert (REGNO (nmask) == REGNO (dest));
9677 op0 = simplify_gen_subreg (vmode, op0, mode, 0);
9678 x = gen_rtx_AND (vmode, dest, op0);
9680 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
9683 x = gen_rtx_IOR (vmode, dest, scratch);
9684 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
9687 /* Return TRUE or FALSE depending on whether the first SET in INSN
9688 has source and destination with matching CC modes, and that the
9689 CC mode is at least as constrained as REQ_MODE. */
9692 ix86_match_ccmode (rtx insn, enum machine_mode req_mode)
9695 enum machine_mode set_mode;
9697 set = PATTERN (insn);
9698 if (GET_CODE (set) == PARALLEL)
9699 set = XVECEXP (set, 0, 0);
9700 gcc_assert (GET_CODE (set) == SET);
9701 gcc_assert (GET_CODE (SET_SRC (set)) == COMPARE);
9703 set_mode = GET_MODE (SET_DEST (set));
9707 if (req_mode != CCNOmode
9708 && (req_mode != CCmode
9709 || XEXP (SET_SRC (set), 1) != const0_rtx))
9713 if (req_mode == CCGCmode)
9717 if (req_mode == CCGOCmode || req_mode == CCNOmode)
9721 if (req_mode == CCZmode)
9731 return (GET_MODE (SET_SRC (set)) == set_mode);
9734 /* Generate insn patterns to do an integer compare of OPERANDS. */
9737 ix86_expand_int_compare (enum rtx_code code, rtx op0, rtx op1)
9739 enum machine_mode cmpmode;
9742 cmpmode = SELECT_CC_MODE (code, op0, op1);
9743 flags = gen_rtx_REG (cmpmode, FLAGS_REG);
9745 /* This is very simple, but making the interface the same as in the
9746 FP case makes the rest of the code easier. */
9747 tmp = gen_rtx_COMPARE (cmpmode, op0, op1);
9748 emit_insn (gen_rtx_SET (VOIDmode, flags, tmp));
9750 /* Return the test that should be put into the flags user, i.e.
9751 the bcc, scc, or cmov instruction. */
9752 return gen_rtx_fmt_ee (code, VOIDmode, flags, const0_rtx);
9755 /* Figure out whether to use ordered or unordered fp comparisons.
9756 Return the appropriate mode to use. */
9759 ix86_fp_compare_mode (enum rtx_code code ATTRIBUTE_UNUSED)
9761 /* ??? In order to make all comparisons reversible, we do all comparisons
9762 non-trapping when compiling for IEEE. Once gcc is able to distinguish
9763 all forms trapping and nontrapping comparisons, we can make inequality
9764 comparisons trapping again, since it results in better code when using
9765 FCOM based compares. */
9766 return TARGET_IEEE_FP ? CCFPUmode : CCFPmode;
9770 ix86_cc_mode (enum rtx_code code, rtx op0, rtx op1)
9772 if (SCALAR_FLOAT_MODE_P (GET_MODE (op0)))
9773 return ix86_fp_compare_mode (code);
9776 /* Only zero flag is needed. */
9778 case NE: /* ZF!=0 */
9780 /* Codes needing carry flag. */
9781 case GEU: /* CF=0 */
9782 case GTU: /* CF=0 & ZF=0 */
9783 case LTU: /* CF=1 */
9784 case LEU: /* CF=1 | ZF=1 */
9786 /* Codes possibly doable only with sign flag when
9787 comparing against zero. */
9788 case GE: /* SF=OF or SF=0 */
9789 case LT: /* SF<>OF or SF=1 */
9790 if (op1 == const0_rtx)
9793 /* For other cases Carry flag is not required. */
9795 /* Codes doable only with sign flag when comparing
9796 against zero, but we miss jump instruction for it
9797 so we need to use relational tests against overflow
9798 that thus needs to be zero. */
9799 case GT: /* ZF=0 & SF=OF */
9800 case LE: /* ZF=1 | SF<>OF */
9801 if (op1 == const0_rtx)
9805 /* strcmp pattern do (use flags) and combine may ask us for proper
9814 /* Return the fixed registers used for condition codes. */
9817 ix86_fixed_condition_code_regs (unsigned int *p1, unsigned int *p2)
9824 /* If two condition code modes are compatible, return a condition code
9825 mode which is compatible with both. Otherwise, return
9828 static enum machine_mode
9829 ix86_cc_modes_compatible (enum machine_mode m1, enum machine_mode m2)
9834 if (GET_MODE_CLASS (m1) != MODE_CC || GET_MODE_CLASS (m2) != MODE_CC)
9837 if ((m1 == CCGCmode && m2 == CCGOCmode)
9838 || (m1 == CCGOCmode && m2 == CCGCmode))
9866 /* These are only compatible with themselves, which we already
9872 /* Return true if we should use an FCOMI instruction for this fp comparison. */
9875 ix86_use_fcomi_compare (enum rtx_code code ATTRIBUTE_UNUSED)
9877 enum rtx_code swapped_code = swap_condition (code);
9878 return ((ix86_fp_comparison_cost (code) == ix86_fp_comparison_fcomi_cost (code))
9879 || (ix86_fp_comparison_cost (swapped_code)
9880 == ix86_fp_comparison_fcomi_cost (swapped_code)));
9883 /* Swap, force into registers, or otherwise massage the two operands
9884 to a fp comparison. The operands are updated in place; the new
9885 comparison code is returned. */
9887 static enum rtx_code
9888 ix86_prepare_fp_compare_args (enum rtx_code code, rtx *pop0, rtx *pop1)
9890 enum machine_mode fpcmp_mode = ix86_fp_compare_mode (code);
9891 rtx op0 = *pop0, op1 = *pop1;
9892 enum machine_mode op_mode = GET_MODE (op0);
9893 int is_sse = TARGET_SSE_MATH && SSE_FLOAT_MODE_P (op_mode);
9895 /* All of the unordered compare instructions only work on registers.
9896 The same is true of the fcomi compare instructions. The XFmode
9897 compare instructions require registers except when comparing
9898 against zero or when converting operand 1 from fixed point to
9902 && (fpcmp_mode == CCFPUmode
9903 || (op_mode == XFmode
9904 && ! (standard_80387_constant_p (op0) == 1
9905 || standard_80387_constant_p (op1) == 1)
9906 && GET_CODE (op1) != FLOAT)
9907 || ix86_use_fcomi_compare (code)))
9909 op0 = force_reg (op_mode, op0);
9910 op1 = force_reg (op_mode, op1);
9914 /* %%% We only allow op1 in memory; op0 must be st(0). So swap
9915 things around if they appear profitable, otherwise force op0
9918 if (standard_80387_constant_p (op0) == 0
9919 || (GET_CODE (op0) == MEM
9920 && ! (standard_80387_constant_p (op1) == 0
9921 || GET_CODE (op1) == MEM)))
9924 tmp = op0, op0 = op1, op1 = tmp;
9925 code = swap_condition (code);
9928 if (GET_CODE (op0) != REG)
9929 op0 = force_reg (op_mode, op0);
9931 if (CONSTANT_P (op1))
9933 int tmp = standard_80387_constant_p (op1);
9935 op1 = validize_mem (force_const_mem (op_mode, op1));
9939 op1 = force_reg (op_mode, op1);
9942 op1 = force_reg (op_mode, op1);
9946 /* Try to rearrange the comparison to make it cheaper. */
9947 if (ix86_fp_comparison_cost (code)
9948 > ix86_fp_comparison_cost (swap_condition (code))
9949 && (GET_CODE (op1) == REG || !no_new_pseudos))
9952 tmp = op0, op0 = op1, op1 = tmp;
9953 code = swap_condition (code);
9954 if (GET_CODE (op0) != REG)
9955 op0 = force_reg (op_mode, op0);
9963 /* Convert comparison codes we use to represent FP comparison to integer
9964 code that will result in proper branch. Return UNKNOWN if no such code
9968 ix86_fp_compare_code_to_integer (enum rtx_code code)
9997 /* Split comparison code CODE into comparisons we can do using branch
9998 instructions. BYPASS_CODE is comparison code for branch that will
9999 branch around FIRST_CODE and SECOND_CODE. If some of branches
10000 is not required, set value to UNKNOWN.
10001 We never require more than two branches. */
10004 ix86_fp_comparison_codes (enum rtx_code code, enum rtx_code *bypass_code,
10005 enum rtx_code *first_code,
10006 enum rtx_code *second_code)
10008 *first_code = code;
10009 *bypass_code = UNKNOWN;
10010 *second_code = UNKNOWN;
10012 /* The fcomi comparison sets flags as follows:
10022 case GT: /* GTU - CF=0 & ZF=0 */
10023 case GE: /* GEU - CF=0 */
10024 case ORDERED: /* PF=0 */
10025 case UNORDERED: /* PF=1 */
10026 case UNEQ: /* EQ - ZF=1 */
10027 case UNLT: /* LTU - CF=1 */
10028 case UNLE: /* LEU - CF=1 | ZF=1 */
10029 case LTGT: /* EQ - ZF=0 */
10031 case LT: /* LTU - CF=1 - fails on unordered */
10032 *first_code = UNLT;
10033 *bypass_code = UNORDERED;
10035 case LE: /* LEU - CF=1 | ZF=1 - fails on unordered */
10036 *first_code = UNLE;
10037 *bypass_code = UNORDERED;
10039 case EQ: /* EQ - ZF=1 - fails on unordered */
10040 *first_code = UNEQ;
10041 *bypass_code = UNORDERED;
10043 case NE: /* NE - ZF=0 - fails on unordered */
10044 *first_code = LTGT;
10045 *second_code = UNORDERED;
10047 case UNGE: /* GEU - CF=0 - fails on unordered */
10049 *second_code = UNORDERED;
10051 case UNGT: /* GTU - CF=0 & ZF=0 - fails on unordered */
10053 *second_code = UNORDERED;
10056 gcc_unreachable ();
10058 if (!TARGET_IEEE_FP)
10060 *second_code = UNKNOWN;
10061 *bypass_code = UNKNOWN;
10065 /* Return cost of comparison done fcom + arithmetics operations on AX.
10066 All following functions do use number of instructions as a cost metrics.
10067 In future this should be tweaked to compute bytes for optimize_size and
10068 take into account performance of various instructions on various CPUs. */
10070 ix86_fp_comparison_arithmetics_cost (enum rtx_code code)
10072 if (!TARGET_IEEE_FP)
10074 /* The cost of code output by ix86_expand_fp_compare. */
10098 gcc_unreachable ();
10102 /* Return cost of comparison done using fcomi operation.
10103 See ix86_fp_comparison_arithmetics_cost for the metrics. */
10105 ix86_fp_comparison_fcomi_cost (enum rtx_code code)
10107 enum rtx_code bypass_code, first_code, second_code;
10108 /* Return arbitrarily high cost when instruction is not supported - this
10109 prevents gcc from using it. */
10112 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
10113 return (bypass_code != UNKNOWN || second_code != UNKNOWN) + 2;
10116 /* Return cost of comparison done using sahf operation.
10117 See ix86_fp_comparison_arithmetics_cost for the metrics. */
10119 ix86_fp_comparison_sahf_cost (enum rtx_code code)
10121 enum rtx_code bypass_code, first_code, second_code;
10122 /* Return arbitrarily high cost when instruction is not preferred - this
10123 avoids gcc from using it. */
10124 if (!TARGET_USE_SAHF && !optimize_size)
10126 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
10127 return (bypass_code != UNKNOWN || second_code != UNKNOWN) + 3;
10130 /* Compute cost of the comparison done using any method.
10131 See ix86_fp_comparison_arithmetics_cost for the metrics. */
10133 ix86_fp_comparison_cost (enum rtx_code code)
10135 int fcomi_cost, sahf_cost, arithmetics_cost = 1024;
10138 fcomi_cost = ix86_fp_comparison_fcomi_cost (code);
10139 sahf_cost = ix86_fp_comparison_sahf_cost (code);
10141 min = arithmetics_cost = ix86_fp_comparison_arithmetics_cost (code);
10142 if (min > sahf_cost)
10144 if (min > fcomi_cost)
10149 /* Generate insn patterns to do a floating point compare of OPERANDS. */
10152 ix86_expand_fp_compare (enum rtx_code code, rtx op0, rtx op1, rtx scratch,
10153 rtx *second_test, rtx *bypass_test)
10155 enum machine_mode fpcmp_mode, intcmp_mode;
10157 int cost = ix86_fp_comparison_cost (code);
10158 enum rtx_code bypass_code, first_code, second_code;
10160 fpcmp_mode = ix86_fp_compare_mode (code);
10161 code = ix86_prepare_fp_compare_args (code, &op0, &op1);
10164 *second_test = NULL_RTX;
10166 *bypass_test = NULL_RTX;
10168 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
10170 /* Do fcomi/sahf based test when profitable. */
10171 if ((bypass_code == UNKNOWN || bypass_test)
10172 && (second_code == UNKNOWN || second_test)
10173 && ix86_fp_comparison_arithmetics_cost (code) > cost)
10177 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
10178 tmp = gen_rtx_SET (VOIDmode, gen_rtx_REG (fpcmp_mode, FLAGS_REG),
10184 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
10185 tmp2 = gen_rtx_UNSPEC (HImode, gen_rtvec (1, tmp), UNSPEC_FNSTSW);
10187 scratch = gen_reg_rtx (HImode);
10188 emit_insn (gen_rtx_SET (VOIDmode, scratch, tmp2));
10189 emit_insn (gen_x86_sahf_1 (scratch));
10192 /* The FP codes work out to act like unsigned. */
10193 intcmp_mode = fpcmp_mode;
10195 if (bypass_code != UNKNOWN)
10196 *bypass_test = gen_rtx_fmt_ee (bypass_code, VOIDmode,
10197 gen_rtx_REG (intcmp_mode, FLAGS_REG),
10199 if (second_code != UNKNOWN)
10200 *second_test = gen_rtx_fmt_ee (second_code, VOIDmode,
10201 gen_rtx_REG (intcmp_mode, FLAGS_REG),
10206 /* Sadness wrt reg-stack pops killing fpsr -- gotta get fnstsw first. */
10207 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
10208 tmp2 = gen_rtx_UNSPEC (HImode, gen_rtvec (1, tmp), UNSPEC_FNSTSW);
10210 scratch = gen_reg_rtx (HImode);
10211 emit_insn (gen_rtx_SET (VOIDmode, scratch, tmp2));
10213 /* In the unordered case, we have to check C2 for NaN's, which
10214 doesn't happen to work out to anything nice combination-wise.
10215 So do some bit twiddling on the value we've got in AH to come
10216 up with an appropriate set of condition codes. */
10218 intcmp_mode = CCNOmode;
10223 if (code == GT || !TARGET_IEEE_FP)
10225 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x45)));
10230 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
10231 emit_insn (gen_addqi_ext_1 (scratch, scratch, constm1_rtx));
10232 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x44)));
10233 intcmp_mode = CCmode;
10239 if (code == LT && TARGET_IEEE_FP)
10241 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
10242 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x01)));
10243 intcmp_mode = CCmode;
10248 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x01)));
10254 if (code == GE || !TARGET_IEEE_FP)
10256 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x05)));
10261 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
10262 emit_insn (gen_xorqi_cc_ext_1 (scratch, scratch,
10269 if (code == LE && TARGET_IEEE_FP)
10271 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
10272 emit_insn (gen_addqi_ext_1 (scratch, scratch, constm1_rtx));
10273 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x40)));
10274 intcmp_mode = CCmode;
10279 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x45)));
10285 if (code == EQ && TARGET_IEEE_FP)
10287 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
10288 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x40)));
10289 intcmp_mode = CCmode;
10294 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x40)));
10301 if (code == NE && TARGET_IEEE_FP)
10303 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
10304 emit_insn (gen_xorqi_cc_ext_1 (scratch, scratch,
10310 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x40)));
10316 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x04)));
10320 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x04)));
10325 gcc_unreachable ();
10329 /* Return the test that should be put into the flags user, i.e.
10330 the bcc, scc, or cmov instruction. */
10331 return gen_rtx_fmt_ee (code, VOIDmode,
10332 gen_rtx_REG (intcmp_mode, FLAGS_REG),
10337 ix86_expand_compare (enum rtx_code code, rtx *second_test, rtx *bypass_test)
10340 op0 = ix86_compare_op0;
10341 op1 = ix86_compare_op1;
10344 *second_test = NULL_RTX;
10346 *bypass_test = NULL_RTX;
10348 if (ix86_compare_emitted)
10350 ret = gen_rtx_fmt_ee (code, VOIDmode, ix86_compare_emitted, const0_rtx);
10351 ix86_compare_emitted = NULL_RTX;
10353 else if (SCALAR_FLOAT_MODE_P (GET_MODE (op0)))
10354 ret = ix86_expand_fp_compare (code, op0, op1, NULL_RTX,
10355 second_test, bypass_test);
10357 ret = ix86_expand_int_compare (code, op0, op1);
10362 /* Return true if the CODE will result in nontrivial jump sequence. */
10364 ix86_fp_jump_nontrivial_p (enum rtx_code code)
10366 enum rtx_code bypass_code, first_code, second_code;
10369 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
10370 return bypass_code != UNKNOWN || second_code != UNKNOWN;
10374 ix86_expand_branch (enum rtx_code code, rtx label)
10378 /* If we have emitted a compare insn, go straight to simple.
10379 ix86_expand_compare won't emit anything if ix86_compare_emitted
10381 if (ix86_compare_emitted)
10384 switch (GET_MODE (ix86_compare_op0))
10390 tmp = ix86_expand_compare (code, NULL, NULL);
10391 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
10392 gen_rtx_LABEL_REF (VOIDmode, label),
10394 emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
10403 enum rtx_code bypass_code, first_code, second_code;
10405 code = ix86_prepare_fp_compare_args (code, &ix86_compare_op0,
10406 &ix86_compare_op1);
10408 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
10410 /* Check whether we will use the natural sequence with one jump. If
10411 so, we can expand jump early. Otherwise delay expansion by
10412 creating compound insn to not confuse optimizers. */
10413 if (bypass_code == UNKNOWN && second_code == UNKNOWN
10416 ix86_split_fp_branch (code, ix86_compare_op0, ix86_compare_op1,
10417 gen_rtx_LABEL_REF (VOIDmode, label),
10418 pc_rtx, NULL_RTX, NULL_RTX);
10422 tmp = gen_rtx_fmt_ee (code, VOIDmode,
10423 ix86_compare_op0, ix86_compare_op1);
10424 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
10425 gen_rtx_LABEL_REF (VOIDmode, label),
10427 tmp = gen_rtx_SET (VOIDmode, pc_rtx, tmp);
10429 use_fcomi = ix86_use_fcomi_compare (code);
10430 vec = rtvec_alloc (3 + !use_fcomi);
10431 RTVEC_ELT (vec, 0) = tmp;
10433 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCFPmode, 18));
10435 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCFPmode, 17));
10438 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (HImode));
10440 emit_jump_insn (gen_rtx_PARALLEL (VOIDmode, vec));
10449 /* Expand DImode branch into multiple compare+branch. */
10451 rtx lo[2], hi[2], label2;
10452 enum rtx_code code1, code2, code3;
10453 enum machine_mode submode;
10455 if (CONSTANT_P (ix86_compare_op0) && ! CONSTANT_P (ix86_compare_op1))
10457 tmp = ix86_compare_op0;
10458 ix86_compare_op0 = ix86_compare_op1;
10459 ix86_compare_op1 = tmp;
10460 code = swap_condition (code);
10462 if (GET_MODE (ix86_compare_op0) == DImode)
10464 split_di (&ix86_compare_op0, 1, lo+0, hi+0);
10465 split_di (&ix86_compare_op1, 1, lo+1, hi+1);
10470 split_ti (&ix86_compare_op0, 1, lo+0, hi+0);
10471 split_ti (&ix86_compare_op1, 1, lo+1, hi+1);
10475 /* When comparing for equality, we can use (hi0^hi1)|(lo0^lo1) to
10476 avoid two branches. This costs one extra insn, so disable when
10477 optimizing for size. */
10479 if ((code == EQ || code == NE)
10481 || hi[1] == const0_rtx || lo[1] == const0_rtx))
10486 if (hi[1] != const0_rtx)
10487 xor1 = expand_binop (submode, xor_optab, xor1, hi[1],
10488 NULL_RTX, 0, OPTAB_WIDEN);
10491 if (lo[1] != const0_rtx)
10492 xor0 = expand_binop (submode, xor_optab, xor0, lo[1],
10493 NULL_RTX, 0, OPTAB_WIDEN);
10495 tmp = expand_binop (submode, ior_optab, xor1, xor0,
10496 NULL_RTX, 0, OPTAB_WIDEN);
10498 ix86_compare_op0 = tmp;
10499 ix86_compare_op1 = const0_rtx;
10500 ix86_expand_branch (code, label);
10504 /* Otherwise, if we are doing less-than or greater-or-equal-than,
10505 op1 is a constant and the low word is zero, then we can just
10506 examine the high word. */
10508 if (GET_CODE (hi[1]) == CONST_INT && lo[1] == const0_rtx)
10511 case LT: case LTU: case GE: case GEU:
10512 ix86_compare_op0 = hi[0];
10513 ix86_compare_op1 = hi[1];
10514 ix86_expand_branch (code, label);
10520 /* Otherwise, we need two or three jumps. */
10522 label2 = gen_label_rtx ();
10525 code2 = swap_condition (code);
10526 code3 = unsigned_condition (code);
10530 case LT: case GT: case LTU: case GTU:
10533 case LE: code1 = LT; code2 = GT; break;
10534 case GE: code1 = GT; code2 = LT; break;
10535 case LEU: code1 = LTU; code2 = GTU; break;
10536 case GEU: code1 = GTU; code2 = LTU; break;
10538 case EQ: code1 = UNKNOWN; code2 = NE; break;
10539 case NE: code2 = UNKNOWN; break;
10542 gcc_unreachable ();
10547 * if (hi(a) < hi(b)) goto true;
10548 * if (hi(a) > hi(b)) goto false;
10549 * if (lo(a) < lo(b)) goto true;
10553 ix86_compare_op0 = hi[0];
10554 ix86_compare_op1 = hi[1];
10556 if (code1 != UNKNOWN)
10557 ix86_expand_branch (code1, label);
10558 if (code2 != UNKNOWN)
10559 ix86_expand_branch (code2, label2);
10561 ix86_compare_op0 = lo[0];
10562 ix86_compare_op1 = lo[1];
10563 ix86_expand_branch (code3, label);
10565 if (code2 != UNKNOWN)
10566 emit_label (label2);
10571 gcc_unreachable ();
10575 /* Split branch based on floating point condition. */
10577 ix86_split_fp_branch (enum rtx_code code, rtx op1, rtx op2,
10578 rtx target1, rtx target2, rtx tmp, rtx pushed)
10580 rtx second, bypass;
10581 rtx label = NULL_RTX;
10583 int bypass_probability = -1, second_probability = -1, probability = -1;
10586 if (target2 != pc_rtx)
10589 code = reverse_condition_maybe_unordered (code);
10594 condition = ix86_expand_fp_compare (code, op1, op2,
10595 tmp, &second, &bypass);
10597 /* Remove pushed operand from stack. */
10599 ix86_free_from_memory (GET_MODE (pushed));
10601 if (split_branch_probability >= 0)
10603 /* Distribute the probabilities across the jumps.
10604 Assume the BYPASS and SECOND to be always test
10606 probability = split_branch_probability;
10608 /* Value of 1 is low enough to make no need for probability
10609 to be updated. Later we may run some experiments and see
10610 if unordered values are more frequent in practice. */
10612 bypass_probability = 1;
10614 second_probability = 1;
10616 if (bypass != NULL_RTX)
10618 label = gen_label_rtx ();
10619 i = emit_jump_insn (gen_rtx_SET
10621 gen_rtx_IF_THEN_ELSE (VOIDmode,
10623 gen_rtx_LABEL_REF (VOIDmode,
10626 if (bypass_probability >= 0)
10628 = gen_rtx_EXPR_LIST (REG_BR_PROB,
10629 GEN_INT (bypass_probability),
10632 i = emit_jump_insn (gen_rtx_SET
10634 gen_rtx_IF_THEN_ELSE (VOIDmode,
10635 condition, target1, target2)));
10636 if (probability >= 0)
10638 = gen_rtx_EXPR_LIST (REG_BR_PROB,
10639 GEN_INT (probability),
10641 if (second != NULL_RTX)
10643 i = emit_jump_insn (gen_rtx_SET
10645 gen_rtx_IF_THEN_ELSE (VOIDmode, second, target1,
10647 if (second_probability >= 0)
10649 = gen_rtx_EXPR_LIST (REG_BR_PROB,
10650 GEN_INT (second_probability),
10653 if (label != NULL_RTX)
10654 emit_label (label);
10658 ix86_expand_setcc (enum rtx_code code, rtx dest)
10660 rtx ret, tmp, tmpreg, equiv;
10661 rtx second_test, bypass_test;
10663 if (GET_MODE (ix86_compare_op0) == (TARGET_64BIT ? TImode : DImode))
10664 return 0; /* FAIL */
10666 gcc_assert (GET_MODE (dest) == QImode);
10668 ret = ix86_expand_compare (code, &second_test, &bypass_test);
10669 PUT_MODE (ret, QImode);
10674 emit_insn (gen_rtx_SET (VOIDmode, tmp, ret));
10675 if (bypass_test || second_test)
10677 rtx test = second_test;
10679 rtx tmp2 = gen_reg_rtx (QImode);
10682 gcc_assert (!second_test);
10683 test = bypass_test;
10685 PUT_CODE (test, reverse_condition_maybe_unordered (GET_CODE (test)));
10687 PUT_MODE (test, QImode);
10688 emit_insn (gen_rtx_SET (VOIDmode, tmp2, test));
10691 emit_insn (gen_andqi3 (tmp, tmpreg, tmp2));
10693 emit_insn (gen_iorqi3 (tmp, tmpreg, tmp2));
10696 /* Attach a REG_EQUAL note describing the comparison result. */
10697 if (ix86_compare_op0 && ix86_compare_op1)
10699 equiv = simplify_gen_relational (code, QImode,
10700 GET_MODE (ix86_compare_op0),
10701 ix86_compare_op0, ix86_compare_op1);
10702 set_unique_reg_note (get_last_insn (), REG_EQUAL, equiv);
10705 return 1; /* DONE */
10708 /* Expand comparison setting or clearing carry flag. Return true when
10709 successful and set pop for the operation. */
10711 ix86_expand_carry_flag_compare (enum rtx_code code, rtx op0, rtx op1, rtx *pop)
10713 enum machine_mode mode =
10714 GET_MODE (op0) != VOIDmode ? GET_MODE (op0) : GET_MODE (op1);
10716 /* Do not handle DImode compares that go through special path. Also we can't
10717 deal with FP compares yet. This is possible to add. */
10718 if (mode == (TARGET_64BIT ? TImode : DImode))
10720 if (FLOAT_MODE_P (mode))
10722 rtx second_test = NULL, bypass_test = NULL;
10723 rtx compare_op, compare_seq;
10725 /* Shortcut: following common codes never translate into carry flag compares. */
10726 if (code == EQ || code == NE || code == UNEQ || code == LTGT
10727 || code == ORDERED || code == UNORDERED)
10730 /* These comparisons require zero flag; swap operands so they won't. */
10731 if ((code == GT || code == UNLE || code == LE || code == UNGT)
10732 && !TARGET_IEEE_FP)
10737 code = swap_condition (code);
10740 /* Try to expand the comparison and verify that we end up with carry flag
10741 based comparison. This is fails to be true only when we decide to expand
10742 comparison using arithmetic that is not too common scenario. */
10744 compare_op = ix86_expand_fp_compare (code, op0, op1, NULL_RTX,
10745 &second_test, &bypass_test);
10746 compare_seq = get_insns ();
10749 if (second_test || bypass_test)
10751 if (GET_MODE (XEXP (compare_op, 0)) == CCFPmode
10752 || GET_MODE (XEXP (compare_op, 0)) == CCFPUmode)
10753 code = ix86_fp_compare_code_to_integer (GET_CODE (compare_op));
10755 code = GET_CODE (compare_op);
10756 if (code != LTU && code != GEU)
10758 emit_insn (compare_seq);
10762 if (!INTEGRAL_MODE_P (mode))
10770 /* Convert a==0 into (unsigned)a<1. */
10773 if (op1 != const0_rtx)
10776 code = (code == EQ ? LTU : GEU);
10779 /* Convert a>b into b<a or a>=b-1. */
10782 if (GET_CODE (op1) == CONST_INT)
10784 op1 = gen_int_mode (INTVAL (op1) + 1, GET_MODE (op0));
10785 /* Bail out on overflow. We still can swap operands but that
10786 would force loading of the constant into register. */
10787 if (op1 == const0_rtx
10788 || !x86_64_immediate_operand (op1, GET_MODE (op1)))
10790 code = (code == GTU ? GEU : LTU);
10797 code = (code == GTU ? LTU : GEU);
10801 /* Convert a>=0 into (unsigned)a<0x80000000. */
10804 if (mode == DImode || op1 != const0_rtx)
10806 op1 = gen_int_mode (1 << (GET_MODE_BITSIZE (mode) - 1), mode);
10807 code = (code == LT ? GEU : LTU);
10811 if (mode == DImode || op1 != constm1_rtx)
10813 op1 = gen_int_mode (1 << (GET_MODE_BITSIZE (mode) - 1), mode);
10814 code = (code == LE ? GEU : LTU);
10820 /* Swapping operands may cause constant to appear as first operand. */
10821 if (!nonimmediate_operand (op0, VOIDmode))
10823 if (no_new_pseudos)
10825 op0 = force_reg (mode, op0);
10827 ix86_compare_op0 = op0;
10828 ix86_compare_op1 = op1;
10829 *pop = ix86_expand_compare (code, NULL, NULL);
10830 gcc_assert (GET_CODE (*pop) == LTU || GET_CODE (*pop) == GEU);
10835 ix86_expand_int_movcc (rtx operands[])
10837 enum rtx_code code = GET_CODE (operands[1]), compare_code;
10838 rtx compare_seq, compare_op;
10839 rtx second_test, bypass_test;
10840 enum machine_mode mode = GET_MODE (operands[0]);
10841 bool sign_bit_compare_p = false;;
10844 compare_op = ix86_expand_compare (code, &second_test, &bypass_test);
10845 compare_seq = get_insns ();
10848 compare_code = GET_CODE (compare_op);
10850 if ((ix86_compare_op1 == const0_rtx && (code == GE || code == LT))
10851 || (ix86_compare_op1 == constm1_rtx && (code == GT || code == LE)))
10852 sign_bit_compare_p = true;
10854 /* Don't attempt mode expansion here -- if we had to expand 5 or 6
10855 HImode insns, we'd be swallowed in word prefix ops. */
10857 if ((mode != HImode || TARGET_FAST_PREFIX)
10858 && (mode != (TARGET_64BIT ? TImode : DImode))
10859 && GET_CODE (operands[2]) == CONST_INT
10860 && GET_CODE (operands[3]) == CONST_INT)
10862 rtx out = operands[0];
10863 HOST_WIDE_INT ct = INTVAL (operands[2]);
10864 HOST_WIDE_INT cf = INTVAL (operands[3]);
10865 HOST_WIDE_INT diff;
10868 /* Sign bit compares are better done using shifts than we do by using
10870 if (sign_bit_compare_p
10871 || ix86_expand_carry_flag_compare (code, ix86_compare_op0,
10872 ix86_compare_op1, &compare_op))
10874 /* Detect overlap between destination and compare sources. */
10877 if (!sign_bit_compare_p)
10879 bool fpcmp = false;
10881 compare_code = GET_CODE (compare_op);
10883 if (GET_MODE (XEXP (compare_op, 0)) == CCFPmode
10884 || GET_MODE (XEXP (compare_op, 0)) == CCFPUmode)
10887 compare_code = ix86_fp_compare_code_to_integer (compare_code);
10890 /* To simplify rest of code, restrict to the GEU case. */
10891 if (compare_code == LTU)
10893 HOST_WIDE_INT tmp = ct;
10896 compare_code = reverse_condition (compare_code);
10897 code = reverse_condition (code);
10902 PUT_CODE (compare_op,
10903 reverse_condition_maybe_unordered
10904 (GET_CODE (compare_op)));
10906 PUT_CODE (compare_op, reverse_condition (GET_CODE (compare_op)));
10910 if (reg_overlap_mentioned_p (out, ix86_compare_op0)
10911 || reg_overlap_mentioned_p (out, ix86_compare_op1))
10912 tmp = gen_reg_rtx (mode);
10914 if (mode == DImode)
10915 emit_insn (gen_x86_movdicc_0_m1_rex64 (tmp, compare_op));
10917 emit_insn (gen_x86_movsicc_0_m1 (gen_lowpart (SImode, tmp), compare_op));
10921 if (code == GT || code == GE)
10922 code = reverse_condition (code);
10925 HOST_WIDE_INT tmp = ct;
10930 tmp = emit_store_flag (tmp, code, ix86_compare_op0,
10931 ix86_compare_op1, VOIDmode, 0, -1);
10944 tmp = expand_simple_binop (mode, PLUS,
10946 copy_rtx (tmp), 1, OPTAB_DIRECT);
10957 tmp = expand_simple_binop (mode, IOR,
10959 copy_rtx (tmp), 1, OPTAB_DIRECT);
10961 else if (diff == -1 && ct)
10971 tmp = expand_simple_unop (mode, NOT, tmp, copy_rtx (tmp), 1);
10973 tmp = expand_simple_binop (mode, PLUS,
10974 copy_rtx (tmp), GEN_INT (cf),
10975 copy_rtx (tmp), 1, OPTAB_DIRECT);
10983 * andl cf - ct, dest
10993 tmp = expand_simple_unop (mode, NOT, tmp, copy_rtx (tmp), 1);
10996 tmp = expand_simple_binop (mode, AND,
10998 gen_int_mode (cf - ct, mode),
10999 copy_rtx (tmp), 1, OPTAB_DIRECT);
11001 tmp = expand_simple_binop (mode, PLUS,
11002 copy_rtx (tmp), GEN_INT (ct),
11003 copy_rtx (tmp), 1, OPTAB_DIRECT);
11006 if (!rtx_equal_p (tmp, out))
11007 emit_move_insn (copy_rtx (out), copy_rtx (tmp));
11009 return 1; /* DONE */
11015 tmp = ct, ct = cf, cf = tmp;
11017 if (FLOAT_MODE_P (GET_MODE (ix86_compare_op0)))
11019 /* We may be reversing unordered compare to normal compare, that
11020 is not valid in general (we may convert non-trapping condition
11021 to trapping one), however on i386 we currently emit all
11022 comparisons unordered. */
11023 compare_code = reverse_condition_maybe_unordered (compare_code);
11024 code = reverse_condition_maybe_unordered (code);
11028 compare_code = reverse_condition (compare_code);
11029 code = reverse_condition (code);
11033 compare_code = UNKNOWN;
11034 if (GET_MODE_CLASS (GET_MODE (ix86_compare_op0)) == MODE_INT
11035 && GET_CODE (ix86_compare_op1) == CONST_INT)
11037 if (ix86_compare_op1 == const0_rtx
11038 && (code == LT || code == GE))
11039 compare_code = code;
11040 else if (ix86_compare_op1 == constm1_rtx)
11044 else if (code == GT)
11049 /* Optimize dest = (op0 < 0) ? -1 : cf. */
11050 if (compare_code != UNKNOWN
11051 && GET_MODE (ix86_compare_op0) == GET_MODE (out)
11052 && (cf == -1 || ct == -1))
11054 /* If lea code below could be used, only optimize
11055 if it results in a 2 insn sequence. */
11057 if (! (diff == 1 || diff == 2 || diff == 4 || diff == 8
11058 || diff == 3 || diff == 5 || diff == 9)
11059 || (compare_code == LT && ct == -1)
11060 || (compare_code == GE && cf == -1))
11063 * notl op1 (if necessary)
11071 code = reverse_condition (code);
11074 out = emit_store_flag (out, code, ix86_compare_op0,
11075 ix86_compare_op1, VOIDmode, 0, -1);
11077 out = expand_simple_binop (mode, IOR,
11079 out, 1, OPTAB_DIRECT);
11080 if (out != operands[0])
11081 emit_move_insn (operands[0], out);
11083 return 1; /* DONE */
11088 if ((diff == 1 || diff == 2 || diff == 4 || diff == 8
11089 || diff == 3 || diff == 5 || diff == 9)
11090 && ((mode != QImode && mode != HImode) || !TARGET_PARTIAL_REG_STALL)
11092 || x86_64_immediate_operand (GEN_INT (cf), VOIDmode)))
11098 * lea cf(dest*(ct-cf)),dest
11102 * This also catches the degenerate setcc-only case.
11108 out = emit_store_flag (out, code, ix86_compare_op0,
11109 ix86_compare_op1, VOIDmode, 0, 1);
11112 /* On x86_64 the lea instruction operates on Pmode, so we need
11113 to get arithmetics done in proper mode to match. */
11115 tmp = copy_rtx (out);
11119 out1 = copy_rtx (out);
11120 tmp = gen_rtx_MULT (mode, out1, GEN_INT (diff & ~1));
11124 tmp = gen_rtx_PLUS (mode, tmp, out1);
11130 tmp = gen_rtx_PLUS (mode, tmp, GEN_INT (cf));
11133 if (!rtx_equal_p (tmp, out))
11136 out = force_operand (tmp, copy_rtx (out));
11138 emit_insn (gen_rtx_SET (VOIDmode, copy_rtx (out), copy_rtx (tmp)));
11140 if (!rtx_equal_p (out, operands[0]))
11141 emit_move_insn (operands[0], copy_rtx (out));
11143 return 1; /* DONE */
11147 * General case: Jumpful:
11148 * xorl dest,dest cmpl op1, op2
11149 * cmpl op1, op2 movl ct, dest
11150 * setcc dest jcc 1f
11151 * decl dest movl cf, dest
11152 * andl (cf-ct),dest 1:
11155 * Size 20. Size 14.
11157 * This is reasonably steep, but branch mispredict costs are
11158 * high on modern cpus, so consider failing only if optimizing
11162 if ((!TARGET_CMOVE || (mode == QImode && TARGET_PARTIAL_REG_STALL))
11163 && BRANCH_COST >= 2)
11169 if (FLOAT_MODE_P (GET_MODE (ix86_compare_op0)))
11170 /* We may be reversing unordered compare to normal compare,
11171 that is not valid in general (we may convert non-trapping
11172 condition to trapping one), however on i386 we currently
11173 emit all comparisons unordered. */
11174 code = reverse_condition_maybe_unordered (code);
11177 code = reverse_condition (code);
11178 if (compare_code != UNKNOWN)
11179 compare_code = reverse_condition (compare_code);
11183 if (compare_code != UNKNOWN)
11185 /* notl op1 (if needed)
11190 For x < 0 (resp. x <= -1) there will be no notl,
11191 so if possible swap the constants to get rid of the
11193 True/false will be -1/0 while code below (store flag
11194 followed by decrement) is 0/-1, so the constants need
11195 to be exchanged once more. */
11197 if (compare_code == GE || !cf)
11199 code = reverse_condition (code);
11204 HOST_WIDE_INT tmp = cf;
11209 out = emit_store_flag (out, code, ix86_compare_op0,
11210 ix86_compare_op1, VOIDmode, 0, -1);
11214 out = emit_store_flag (out, code, ix86_compare_op0,
11215 ix86_compare_op1, VOIDmode, 0, 1);
11217 out = expand_simple_binop (mode, PLUS, copy_rtx (out), constm1_rtx,
11218 copy_rtx (out), 1, OPTAB_DIRECT);
11221 out = expand_simple_binop (mode, AND, copy_rtx (out),
11222 gen_int_mode (cf - ct, mode),
11223 copy_rtx (out), 1, OPTAB_DIRECT);
11225 out = expand_simple_binop (mode, PLUS, copy_rtx (out), GEN_INT (ct),
11226 copy_rtx (out), 1, OPTAB_DIRECT);
11227 if (!rtx_equal_p (out, operands[0]))
11228 emit_move_insn (operands[0], copy_rtx (out));
11230 return 1; /* DONE */
11234 if (!TARGET_CMOVE || (mode == QImode && TARGET_PARTIAL_REG_STALL))
11236 /* Try a few things more with specific constants and a variable. */
11239 rtx var, orig_out, out, tmp;
11241 if (BRANCH_COST <= 2)
11242 return 0; /* FAIL */
11244 /* If one of the two operands is an interesting constant, load a
11245 constant with the above and mask it in with a logical operation. */
11247 if (GET_CODE (operands[2]) == CONST_INT)
11250 if (INTVAL (operands[2]) == 0 && operands[3] != constm1_rtx)
11251 operands[3] = constm1_rtx, op = and_optab;
11252 else if (INTVAL (operands[2]) == -1 && operands[3] != const0_rtx)
11253 operands[3] = const0_rtx, op = ior_optab;
11255 return 0; /* FAIL */
11257 else if (GET_CODE (operands[3]) == CONST_INT)
11260 if (INTVAL (operands[3]) == 0 && operands[2] != constm1_rtx)
11261 operands[2] = constm1_rtx, op = and_optab;
11262 else if (INTVAL (operands[3]) == -1 && operands[3] != const0_rtx)
11263 operands[2] = const0_rtx, op = ior_optab;
11265 return 0; /* FAIL */
11268 return 0; /* FAIL */
11270 orig_out = operands[0];
11271 tmp = gen_reg_rtx (mode);
11274 /* Recurse to get the constant loaded. */
11275 if (ix86_expand_int_movcc (operands) == 0)
11276 return 0; /* FAIL */
11278 /* Mask in the interesting variable. */
11279 out = expand_binop (mode, op, var, tmp, orig_out, 0,
11281 if (!rtx_equal_p (out, orig_out))
11282 emit_move_insn (copy_rtx (orig_out), copy_rtx (out));
11284 return 1; /* DONE */
11288 * For comparison with above,
11298 if (! nonimmediate_operand (operands[2], mode))
11299 operands[2] = force_reg (mode, operands[2]);
11300 if (! nonimmediate_operand (operands[3], mode))
11301 operands[3] = force_reg (mode, operands[3]);
11303 if (bypass_test && reg_overlap_mentioned_p (operands[0], operands[3]))
11305 rtx tmp = gen_reg_rtx (mode);
11306 emit_move_insn (tmp, operands[3]);
11309 if (second_test && reg_overlap_mentioned_p (operands[0], operands[2]))
11311 rtx tmp = gen_reg_rtx (mode);
11312 emit_move_insn (tmp, operands[2]);
11316 if (! register_operand (operands[2], VOIDmode)
11318 || ! register_operand (operands[3], VOIDmode)))
11319 operands[2] = force_reg (mode, operands[2]);
11322 && ! register_operand (operands[3], VOIDmode))
11323 operands[3] = force_reg (mode, operands[3]);
11325 emit_insn (compare_seq);
11326 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
11327 gen_rtx_IF_THEN_ELSE (mode,
11328 compare_op, operands[2],
11331 emit_insn (gen_rtx_SET (VOIDmode, copy_rtx (operands[0]),
11332 gen_rtx_IF_THEN_ELSE (mode,
11334 copy_rtx (operands[3]),
11335 copy_rtx (operands[0]))));
11337 emit_insn (gen_rtx_SET (VOIDmode, copy_rtx (operands[0]),
11338 gen_rtx_IF_THEN_ELSE (mode,
11340 copy_rtx (operands[2]),
11341 copy_rtx (operands[0]))));
11343 return 1; /* DONE */
11346 /* Swap, force into registers, or otherwise massage the two operands
11347 to an sse comparison with a mask result. Thus we differ a bit from
11348 ix86_prepare_fp_compare_args which expects to produce a flags result.
11350 The DEST operand exists to help determine whether to commute commutative
11351 operators. The POP0/POP1 operands are updated in place. The new
11352 comparison code is returned, or UNKNOWN if not implementable. */
11354 static enum rtx_code
11355 ix86_prepare_sse_fp_compare_args (rtx dest, enum rtx_code code,
11356 rtx *pop0, rtx *pop1)
11364 /* We have no LTGT as an operator. We could implement it with
11365 NE & ORDERED, but this requires an extra temporary. It's
11366 not clear that it's worth it. */
11373 /* These are supported directly. */
11380 /* For commutative operators, try to canonicalize the destination
11381 operand to be first in the comparison - this helps reload to
11382 avoid extra moves. */
11383 if (!dest || !rtx_equal_p (dest, *pop1))
11391 /* These are not supported directly. Swap the comparison operands
11392 to transform into something that is supported. */
11396 code = swap_condition (code);
11400 gcc_unreachable ();
11406 /* Detect conditional moves that exactly match min/max operational
11407 semantics. Note that this is IEEE safe, as long as we don't
11408 interchange the operands.
11410 Returns FALSE if this conditional move doesn't match a MIN/MAX,
11411 and TRUE if the operation is successful and instructions are emitted. */
11414 ix86_expand_sse_fp_minmax (rtx dest, enum rtx_code code, rtx cmp_op0,
11415 rtx cmp_op1, rtx if_true, rtx if_false)
11417 enum machine_mode mode;
11423 else if (code == UNGE)
11426 if_true = if_false;
11432 if (rtx_equal_p (cmp_op0, if_true) && rtx_equal_p (cmp_op1, if_false))
11434 else if (rtx_equal_p (cmp_op1, if_true) && rtx_equal_p (cmp_op0, if_false))
11439 mode = GET_MODE (dest);
11441 /* We want to check HONOR_NANS and HONOR_SIGNED_ZEROS here,
11442 but MODE may be a vector mode and thus not appropriate. */
11443 if (!flag_finite_math_only || !flag_unsafe_math_optimizations)
11445 int u = is_min ? UNSPEC_IEEE_MIN : UNSPEC_IEEE_MAX;
11448 if_true = force_reg (mode, if_true);
11449 v = gen_rtvec (2, if_true, if_false);
11450 tmp = gen_rtx_UNSPEC (mode, v, u);
11454 code = is_min ? SMIN : SMAX;
11455 tmp = gen_rtx_fmt_ee (code, mode, if_true, if_false);
11458 emit_insn (gen_rtx_SET (VOIDmode, dest, tmp));
11462 /* Expand an sse vector comparison. Return the register with the result. */
11465 ix86_expand_sse_cmp (rtx dest, enum rtx_code code, rtx cmp_op0, rtx cmp_op1,
11466 rtx op_true, rtx op_false)
11468 enum machine_mode mode = GET_MODE (dest);
11471 cmp_op0 = force_reg (mode, cmp_op0);
11472 if (!nonimmediate_operand (cmp_op1, mode))
11473 cmp_op1 = force_reg (mode, cmp_op1);
11476 || reg_overlap_mentioned_p (dest, op_true)
11477 || reg_overlap_mentioned_p (dest, op_false))
11478 dest = gen_reg_rtx (mode);
11480 x = gen_rtx_fmt_ee (code, mode, cmp_op0, cmp_op1);
11481 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
11486 /* Expand DEST = CMP ? OP_TRUE : OP_FALSE into a sequence of logical
11487 operations. This is used for both scalar and vector conditional moves. */
11490 ix86_expand_sse_movcc (rtx dest, rtx cmp, rtx op_true, rtx op_false)
11492 enum machine_mode mode = GET_MODE (dest);
11495 if (op_false == CONST0_RTX (mode))
11497 op_true = force_reg (mode, op_true);
11498 x = gen_rtx_AND (mode, cmp, op_true);
11499 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
11501 else if (op_true == CONST0_RTX (mode))
11503 op_false = force_reg (mode, op_false);
11504 x = gen_rtx_NOT (mode, cmp);
11505 x = gen_rtx_AND (mode, x, op_false);
11506 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
11510 op_true = force_reg (mode, op_true);
11511 op_false = force_reg (mode, op_false);
11513 t2 = gen_reg_rtx (mode);
11515 t3 = gen_reg_rtx (mode);
11519 x = gen_rtx_AND (mode, op_true, cmp);
11520 emit_insn (gen_rtx_SET (VOIDmode, t2, x));
11522 x = gen_rtx_NOT (mode, cmp);
11523 x = gen_rtx_AND (mode, x, op_false);
11524 emit_insn (gen_rtx_SET (VOIDmode, t3, x));
11526 x = gen_rtx_IOR (mode, t3, t2);
11527 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
11531 /* Expand a floating-point conditional move. Return true if successful. */
11534 ix86_expand_fp_movcc (rtx operands[])
11536 enum machine_mode mode = GET_MODE (operands[0]);
11537 enum rtx_code code = GET_CODE (operands[1]);
11538 rtx tmp, compare_op, second_test, bypass_test;
11540 if (TARGET_SSE_MATH && SSE_FLOAT_MODE_P (mode))
11542 enum machine_mode cmode;
11544 /* Since we've no cmove for sse registers, don't force bad register
11545 allocation just to gain access to it. Deny movcc when the
11546 comparison mode doesn't match the move mode. */
11547 cmode = GET_MODE (ix86_compare_op0);
11548 if (cmode == VOIDmode)
11549 cmode = GET_MODE (ix86_compare_op1);
11553 code = ix86_prepare_sse_fp_compare_args (operands[0], code,
11555 &ix86_compare_op1);
11556 if (code == UNKNOWN)
11559 if (ix86_expand_sse_fp_minmax (operands[0], code, ix86_compare_op0,
11560 ix86_compare_op1, operands[2],
11564 tmp = ix86_expand_sse_cmp (operands[0], code, ix86_compare_op0,
11565 ix86_compare_op1, operands[2], operands[3]);
11566 ix86_expand_sse_movcc (operands[0], tmp, operands[2], operands[3]);
11570 /* The floating point conditional move instructions don't directly
11571 support conditions resulting from a signed integer comparison. */
11573 compare_op = ix86_expand_compare (code, &second_test, &bypass_test);
11575 /* The floating point conditional move instructions don't directly
11576 support signed integer comparisons. */
11578 if (!fcmov_comparison_operator (compare_op, VOIDmode))
11580 gcc_assert (!second_test && !bypass_test);
11581 tmp = gen_reg_rtx (QImode);
11582 ix86_expand_setcc (code, tmp);
11584 ix86_compare_op0 = tmp;
11585 ix86_compare_op1 = const0_rtx;
11586 compare_op = ix86_expand_compare (code, &second_test, &bypass_test);
11588 if (bypass_test && reg_overlap_mentioned_p (operands[0], operands[3]))
11590 tmp = gen_reg_rtx (mode);
11591 emit_move_insn (tmp, operands[3]);
11594 if (second_test && reg_overlap_mentioned_p (operands[0], operands[2]))
11596 tmp = gen_reg_rtx (mode);
11597 emit_move_insn (tmp, operands[2]);
11601 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
11602 gen_rtx_IF_THEN_ELSE (mode, compare_op,
11603 operands[2], operands[3])));
11605 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
11606 gen_rtx_IF_THEN_ELSE (mode, bypass_test,
11607 operands[3], operands[0])));
11609 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
11610 gen_rtx_IF_THEN_ELSE (mode, second_test,
11611 operands[2], operands[0])));
11616 /* Expand a floating-point vector conditional move; a vcond operation
11617 rather than a movcc operation. */
11620 ix86_expand_fp_vcond (rtx operands[])
11622 enum rtx_code code = GET_CODE (operands[3]);
11625 code = ix86_prepare_sse_fp_compare_args (operands[0], code,
11626 &operands[4], &operands[5]);
11627 if (code == UNKNOWN)
11630 if (ix86_expand_sse_fp_minmax (operands[0], code, operands[4],
11631 operands[5], operands[1], operands[2]))
11634 cmp = ix86_expand_sse_cmp (operands[0], code, operands[4], operands[5],
11635 operands[1], operands[2]);
11636 ix86_expand_sse_movcc (operands[0], cmp, operands[1], operands[2]);
11640 /* Expand a signed integral vector conditional move. */
11643 ix86_expand_int_vcond (rtx operands[])
11645 enum machine_mode mode = GET_MODE (operands[0]);
11646 enum rtx_code code = GET_CODE (operands[3]);
11647 bool negate = false;
11650 cop0 = operands[4];
11651 cop1 = operands[5];
11653 /* Canonicalize the comparison to EQ, GT, GTU. */
11664 code = reverse_condition (code);
11670 code = reverse_condition (code);
11676 code = swap_condition (code);
11677 x = cop0, cop0 = cop1, cop1 = x;
11681 gcc_unreachable ();
11684 /* Unsigned parallel compare is not supported by the hardware. Play some
11685 tricks to turn this into a signed comparison against 0. */
11688 cop0 = force_reg (mode, cop0);
11696 /* Perform a parallel modulo subtraction. */
11697 t1 = gen_reg_rtx (mode);
11698 emit_insn (gen_subv4si3 (t1, cop0, cop1));
11700 /* Extract the original sign bit of op0. */
11701 mask = GEN_INT (-0x80000000);
11702 mask = gen_rtx_CONST_VECTOR (mode,
11703 gen_rtvec (4, mask, mask, mask, mask));
11704 mask = force_reg (mode, mask);
11705 t2 = gen_reg_rtx (mode);
11706 emit_insn (gen_andv4si3 (t2, cop0, mask));
11708 /* XOR it back into the result of the subtraction. This results
11709 in the sign bit set iff we saw unsigned underflow. */
11710 x = gen_reg_rtx (mode);
11711 emit_insn (gen_xorv4si3 (x, t1, t2));
11719 /* Perform a parallel unsigned saturating subtraction. */
11720 x = gen_reg_rtx (mode);
11721 emit_insn (gen_rtx_SET (VOIDmode, x,
11722 gen_rtx_US_MINUS (mode, cop0, cop1)));
11729 gcc_unreachable ();
11733 cop1 = CONST0_RTX (mode);
11736 x = ix86_expand_sse_cmp (operands[0], code, cop0, cop1,
11737 operands[1+negate], operands[2-negate]);
11739 ix86_expand_sse_movcc (operands[0], x, operands[1+negate],
11740 operands[2-negate]);
11744 /* Expand conditional increment or decrement using adb/sbb instructions.
11745 The default case using setcc followed by the conditional move can be
11746 done by generic code. */
11748 ix86_expand_int_addcc (rtx operands[])
11750 enum rtx_code code = GET_CODE (operands[1]);
11752 rtx val = const0_rtx;
11753 bool fpcmp = false;
11754 enum machine_mode mode = GET_MODE (operands[0]);
11756 if (operands[3] != const1_rtx
11757 && operands[3] != constm1_rtx)
11759 if (!ix86_expand_carry_flag_compare (code, ix86_compare_op0,
11760 ix86_compare_op1, &compare_op))
11762 code = GET_CODE (compare_op);
11764 if (GET_MODE (XEXP (compare_op, 0)) == CCFPmode
11765 || GET_MODE (XEXP (compare_op, 0)) == CCFPUmode)
11768 code = ix86_fp_compare_code_to_integer (code);
11775 PUT_CODE (compare_op,
11776 reverse_condition_maybe_unordered
11777 (GET_CODE (compare_op)));
11779 PUT_CODE (compare_op, reverse_condition (GET_CODE (compare_op)));
11781 PUT_MODE (compare_op, mode);
11783 /* Construct either adc or sbb insn. */
11784 if ((code == LTU) == (operands[3] == constm1_rtx))
11786 switch (GET_MODE (operands[0]))
11789 emit_insn (gen_subqi3_carry (operands[0], operands[2], val, compare_op));
11792 emit_insn (gen_subhi3_carry (operands[0], operands[2], val, compare_op));
11795 emit_insn (gen_subsi3_carry (operands[0], operands[2], val, compare_op));
11798 emit_insn (gen_subdi3_carry_rex64 (operands[0], operands[2], val, compare_op));
11801 gcc_unreachable ();
11806 switch (GET_MODE (operands[0]))
11809 emit_insn (gen_addqi3_carry (operands[0], operands[2], val, compare_op));
11812 emit_insn (gen_addhi3_carry (operands[0], operands[2], val, compare_op));
11815 emit_insn (gen_addsi3_carry (operands[0], operands[2], val, compare_op));
11818 emit_insn (gen_adddi3_carry_rex64 (operands[0], operands[2], val, compare_op));
11821 gcc_unreachable ();
11824 return 1; /* DONE */
11828 /* Split operands 0 and 1 into SImode parts. Similar to split_di, but
11829 works for floating pointer parameters and nonoffsetable memories.
11830 For pushes, it returns just stack offsets; the values will be saved
11831 in the right order. Maximally three parts are generated. */
11834 ix86_split_to_parts (rtx operand, rtx *parts, enum machine_mode mode)
11839 size = mode==XFmode ? 3 : GET_MODE_SIZE (mode) / 4;
11841 size = (GET_MODE_SIZE (mode) + 4) / 8;
11843 gcc_assert (GET_CODE (operand) != REG || !MMX_REGNO_P (REGNO (operand)));
11844 gcc_assert (size >= 2 && size <= 3);
11846 /* Optimize constant pool reference to immediates. This is used by fp
11847 moves, that force all constants to memory to allow combining. */
11848 if (GET_CODE (operand) == MEM && MEM_READONLY_P (operand))
11850 rtx tmp = maybe_get_pool_constant (operand);
11855 if (GET_CODE (operand) == MEM && !offsettable_memref_p (operand))
11857 /* The only non-offsetable memories we handle are pushes. */
11858 int ok = push_operand (operand, VOIDmode);
11862 operand = copy_rtx (operand);
11863 PUT_MODE (operand, Pmode);
11864 parts[0] = parts[1] = parts[2] = operand;
11868 if (GET_CODE (operand) == CONST_VECTOR)
11870 enum machine_mode imode = int_mode_for_mode (mode);
11871 /* Caution: if we looked through a constant pool memory above,
11872 the operand may actually have a different mode now. That's
11873 ok, since we want to pun this all the way back to an integer. */
11874 operand = simplify_subreg (imode, operand, GET_MODE (operand), 0);
11875 gcc_assert (operand != NULL);
11881 if (mode == DImode)
11882 split_di (&operand, 1, &parts[0], &parts[1]);
11885 if (REG_P (operand))
11887 gcc_assert (reload_completed);
11888 parts[0] = gen_rtx_REG (SImode, REGNO (operand) + 0);
11889 parts[1] = gen_rtx_REG (SImode, REGNO (operand) + 1);
11891 parts[2] = gen_rtx_REG (SImode, REGNO (operand) + 2);
11893 else if (offsettable_memref_p (operand))
11895 operand = adjust_address (operand, SImode, 0);
11896 parts[0] = operand;
11897 parts[1] = adjust_address (operand, SImode, 4);
11899 parts[2] = adjust_address (operand, SImode, 8);
11901 else if (GET_CODE (operand) == CONST_DOUBLE)
11906 REAL_VALUE_FROM_CONST_DOUBLE (r, operand);
11910 REAL_VALUE_TO_TARGET_LONG_DOUBLE (r, l);
11911 parts[2] = gen_int_mode (l[2], SImode);
11914 REAL_VALUE_TO_TARGET_DOUBLE (r, l);
11917 gcc_unreachable ();
11919 parts[1] = gen_int_mode (l[1], SImode);
11920 parts[0] = gen_int_mode (l[0], SImode);
11923 gcc_unreachable ();
11928 if (mode == TImode)
11929 split_ti (&operand, 1, &parts[0], &parts[1]);
11930 if (mode == XFmode || mode == TFmode)
11932 enum machine_mode upper_mode = mode==XFmode ? SImode : DImode;
11933 if (REG_P (operand))
11935 gcc_assert (reload_completed);
11936 parts[0] = gen_rtx_REG (DImode, REGNO (operand) + 0);
11937 parts[1] = gen_rtx_REG (upper_mode, REGNO (operand) + 1);
11939 else if (offsettable_memref_p (operand))
11941 operand = adjust_address (operand, DImode, 0);
11942 parts[0] = operand;
11943 parts[1] = adjust_address (operand, upper_mode, 8);
11945 else if (GET_CODE (operand) == CONST_DOUBLE)
11950 REAL_VALUE_FROM_CONST_DOUBLE (r, operand);
11951 real_to_target (l, &r, mode);
11953 /* Do not use shift by 32 to avoid warning on 32bit systems. */
11954 if (HOST_BITS_PER_WIDE_INT >= 64)
11957 ((l[0] & (((HOST_WIDE_INT) 2 << 31) - 1))
11958 + ((((HOST_WIDE_INT) l[1]) << 31) << 1),
11961 parts[0] = immed_double_const (l[0], l[1], DImode);
11963 if (upper_mode == SImode)
11964 parts[1] = gen_int_mode (l[2], SImode);
11965 else if (HOST_BITS_PER_WIDE_INT >= 64)
11968 ((l[2] & (((HOST_WIDE_INT) 2 << 31) - 1))
11969 + ((((HOST_WIDE_INT) l[3]) << 31) << 1),
11972 parts[1] = immed_double_const (l[2], l[3], DImode);
11975 gcc_unreachable ();
11982 /* Emit insns to perform a move or push of DI, DF, and XF values.
11983 Return false when normal moves are needed; true when all required
11984 insns have been emitted. Operands 2-4 contain the input values
11985 int the correct order; operands 5-7 contain the output values. */
11988 ix86_split_long_move (rtx operands[])
11993 int collisions = 0;
11994 enum machine_mode mode = GET_MODE (operands[0]);
11996 /* The DFmode expanders may ask us to move double.
11997 For 64bit target this is single move. By hiding the fact
11998 here we simplify i386.md splitters. */
11999 if (GET_MODE_SIZE (GET_MODE (operands[0])) == 8 && TARGET_64BIT)
12001 /* Optimize constant pool reference to immediates. This is used by
12002 fp moves, that force all constants to memory to allow combining. */
12004 if (GET_CODE (operands[1]) == MEM
12005 && GET_CODE (XEXP (operands[1], 0)) == SYMBOL_REF
12006 && CONSTANT_POOL_ADDRESS_P (XEXP (operands[1], 0)))
12007 operands[1] = get_pool_constant (XEXP (operands[1], 0));
12008 if (push_operand (operands[0], VOIDmode))
12010 operands[0] = copy_rtx (operands[0]);
12011 PUT_MODE (operands[0], Pmode);
12014 operands[0] = gen_lowpart (DImode, operands[0]);
12015 operands[1] = gen_lowpart (DImode, operands[1]);
12016 emit_move_insn (operands[0], operands[1]);
12020 /* The only non-offsettable memory we handle is push. */
12021 if (push_operand (operands[0], VOIDmode))
12024 gcc_assert (GET_CODE (operands[0]) != MEM
12025 || offsettable_memref_p (operands[0]));
12027 nparts = ix86_split_to_parts (operands[1], part[1], GET_MODE (operands[0]));
12028 ix86_split_to_parts (operands[0], part[0], GET_MODE (operands[0]));
12030 /* When emitting push, take care for source operands on the stack. */
12031 if (push && GET_CODE (operands[1]) == MEM
12032 && reg_overlap_mentioned_p (stack_pointer_rtx, operands[1]))
12035 part[1][1] = change_address (part[1][1], GET_MODE (part[1][1]),
12036 XEXP (part[1][2], 0));
12037 part[1][0] = change_address (part[1][0], GET_MODE (part[1][0]),
12038 XEXP (part[1][1], 0));
12041 /* We need to do copy in the right order in case an address register
12042 of the source overlaps the destination. */
12043 if (REG_P (part[0][0]) && GET_CODE (part[1][0]) == MEM)
12045 if (reg_overlap_mentioned_p (part[0][0], XEXP (part[1][0], 0)))
12047 if (reg_overlap_mentioned_p (part[0][1], XEXP (part[1][0], 0)))
12050 && reg_overlap_mentioned_p (part[0][2], XEXP (part[1][0], 0)))
12053 /* Collision in the middle part can be handled by reordering. */
12054 if (collisions == 1 && nparts == 3
12055 && reg_overlap_mentioned_p (part[0][1], XEXP (part[1][0], 0)))
12058 tmp = part[0][1]; part[0][1] = part[0][2]; part[0][2] = tmp;
12059 tmp = part[1][1]; part[1][1] = part[1][2]; part[1][2] = tmp;
12062 /* If there are more collisions, we can't handle it by reordering.
12063 Do an lea to the last part and use only one colliding move. */
12064 else if (collisions > 1)
12070 base = part[0][nparts - 1];
12072 /* Handle the case when the last part isn't valid for lea.
12073 Happens in 64-bit mode storing the 12-byte XFmode. */
12074 if (GET_MODE (base) != Pmode)
12075 base = gen_rtx_REG (Pmode, REGNO (base));
12077 emit_insn (gen_rtx_SET (VOIDmode, base, XEXP (part[1][0], 0)));
12078 part[1][0] = replace_equiv_address (part[1][0], base);
12079 part[1][1] = replace_equiv_address (part[1][1],
12080 plus_constant (base, UNITS_PER_WORD));
12082 part[1][2] = replace_equiv_address (part[1][2],
12083 plus_constant (base, 8));
12093 if (TARGET_128BIT_LONG_DOUBLE && mode == XFmode)
12094 emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx, GEN_INT (-4)));
12095 emit_move_insn (part[0][2], part[1][2]);
12100 /* In 64bit mode we don't have 32bit push available. In case this is
12101 register, it is OK - we will just use larger counterpart. We also
12102 retype memory - these comes from attempt to avoid REX prefix on
12103 moving of second half of TFmode value. */
12104 if (GET_MODE (part[1][1]) == SImode)
12106 switch (GET_CODE (part[1][1]))
12109 part[1][1] = adjust_address (part[1][1], DImode, 0);
12113 part[1][1] = gen_rtx_REG (DImode, REGNO (part[1][1]));
12117 gcc_unreachable ();
12120 if (GET_MODE (part[1][0]) == SImode)
12121 part[1][0] = part[1][1];
12124 emit_move_insn (part[0][1], part[1][1]);
12125 emit_move_insn (part[0][0], part[1][0]);
12129 /* Choose correct order to not overwrite the source before it is copied. */
12130 if ((REG_P (part[0][0])
12131 && REG_P (part[1][1])
12132 && (REGNO (part[0][0]) == REGNO (part[1][1])
12134 && REGNO (part[0][0]) == REGNO (part[1][2]))))
12136 && reg_overlap_mentioned_p (part[0][0], XEXP (part[1][0], 0))))
12140 operands[2] = part[0][2];
12141 operands[3] = part[0][1];
12142 operands[4] = part[0][0];
12143 operands[5] = part[1][2];
12144 operands[6] = part[1][1];
12145 operands[7] = part[1][0];
12149 operands[2] = part[0][1];
12150 operands[3] = part[0][0];
12151 operands[5] = part[1][1];
12152 operands[6] = part[1][0];
12159 operands[2] = part[0][0];
12160 operands[3] = part[0][1];
12161 operands[4] = part[0][2];
12162 operands[5] = part[1][0];
12163 operands[6] = part[1][1];
12164 operands[7] = part[1][2];
12168 operands[2] = part[0][0];
12169 operands[3] = part[0][1];
12170 operands[5] = part[1][0];
12171 operands[6] = part[1][1];
12175 /* If optimizing for size, attempt to locally unCSE nonzero constants. */
12178 if (GET_CODE (operands[5]) == CONST_INT
12179 && operands[5] != const0_rtx
12180 && REG_P (operands[2]))
12182 if (GET_CODE (operands[6]) == CONST_INT
12183 && INTVAL (operands[6]) == INTVAL (operands[5]))
12184 operands[6] = operands[2];
12187 && GET_CODE (operands[7]) == CONST_INT
12188 && INTVAL (operands[7]) == INTVAL (operands[5]))
12189 operands[7] = operands[2];
12193 && GET_CODE (operands[6]) == CONST_INT
12194 && operands[6] != const0_rtx
12195 && REG_P (operands[3])
12196 && GET_CODE (operands[7]) == CONST_INT
12197 && INTVAL (operands[7]) == INTVAL (operands[6]))
12198 operands[7] = operands[3];
12201 emit_move_insn (operands[2], operands[5]);
12202 emit_move_insn (operands[3], operands[6]);
12204 emit_move_insn (operands[4], operands[7]);
12209 /* Helper function of ix86_split_ashl used to generate an SImode/DImode
12210 left shift by a constant, either using a single shift or
12211 a sequence of add instructions. */
12214 ix86_expand_ashl_const (rtx operand, int count, enum machine_mode mode)
12218 emit_insn ((mode == DImode
12220 : gen_adddi3) (operand, operand, operand));
12222 else if (!optimize_size
12223 && count * ix86_cost->add <= ix86_cost->shift_const)
12226 for (i=0; i<count; i++)
12228 emit_insn ((mode == DImode
12230 : gen_adddi3) (operand, operand, operand));
12234 emit_insn ((mode == DImode
12236 : gen_ashldi3) (operand, operand, GEN_INT (count)));
12240 ix86_split_ashl (rtx *operands, rtx scratch, enum machine_mode mode)
12242 rtx low[2], high[2];
12244 const int single_width = mode == DImode ? 32 : 64;
12246 if (GET_CODE (operands[2]) == CONST_INT)
12248 (mode == DImode ? split_di : split_ti) (operands, 2, low, high);
12249 count = INTVAL (operands[2]) & (single_width * 2 - 1);
12251 if (count >= single_width)
12253 emit_move_insn (high[0], low[1]);
12254 emit_move_insn (low[0], const0_rtx);
12256 if (count > single_width)
12257 ix86_expand_ashl_const (high[0], count - single_width, mode);
12261 if (!rtx_equal_p (operands[0], operands[1]))
12262 emit_move_insn (operands[0], operands[1]);
12263 emit_insn ((mode == DImode
12265 : gen_x86_64_shld) (high[0], low[0], GEN_INT (count)));
12266 ix86_expand_ashl_const (low[0], count, mode);
12271 (mode == DImode ? split_di : split_ti) (operands, 1, low, high);
12273 if (operands[1] == const1_rtx)
12275 /* Assuming we've chosen a QImode capable registers, then 1 << N
12276 can be done with two 32/64-bit shifts, no branches, no cmoves. */
12277 if (ANY_QI_REG_P (low[0]) && ANY_QI_REG_P (high[0]))
12279 rtx s, d, flags = gen_rtx_REG (CCZmode, FLAGS_REG);
12281 ix86_expand_clear (low[0]);
12282 ix86_expand_clear (high[0]);
12283 emit_insn (gen_testqi_ccz_1 (operands[2], GEN_INT (single_width)));
12285 d = gen_lowpart (QImode, low[0]);
12286 d = gen_rtx_STRICT_LOW_PART (VOIDmode, d);
12287 s = gen_rtx_EQ (QImode, flags, const0_rtx);
12288 emit_insn (gen_rtx_SET (VOIDmode, d, s));
12290 d = gen_lowpart (QImode, high[0]);
12291 d = gen_rtx_STRICT_LOW_PART (VOIDmode, d);
12292 s = gen_rtx_NE (QImode, flags, const0_rtx);
12293 emit_insn (gen_rtx_SET (VOIDmode, d, s));
12296 /* Otherwise, we can get the same results by manually performing
12297 a bit extract operation on bit 5/6, and then performing the two
12298 shifts. The two methods of getting 0/1 into low/high are exactly
12299 the same size. Avoiding the shift in the bit extract case helps
12300 pentium4 a bit; no one else seems to care much either way. */
12305 if (TARGET_PARTIAL_REG_STALL && !optimize_size)
12306 x = gen_rtx_ZERO_EXTEND (mode == DImode ? SImode : DImode, operands[2]);
12308 x = gen_lowpart (mode == DImode ? SImode : DImode, operands[2]);
12309 emit_insn (gen_rtx_SET (VOIDmode, high[0], x));
12311 emit_insn ((mode == DImode
12313 : gen_lshrdi3) (high[0], high[0], GEN_INT (mode == DImode ? 5 : 6)));
12314 emit_insn ((mode == DImode
12316 : gen_anddi3) (high[0], high[0], GEN_INT (1)));
12317 emit_move_insn (low[0], high[0]);
12318 emit_insn ((mode == DImode
12320 : gen_xordi3) (low[0], low[0], GEN_INT (1)));
12323 emit_insn ((mode == DImode
12325 : gen_ashldi3) (low[0], low[0], operands[2]));
12326 emit_insn ((mode == DImode
12328 : gen_ashldi3) (high[0], high[0], operands[2]));
12332 if (operands[1] == constm1_rtx)
12334 /* For -1 << N, we can avoid the shld instruction, because we
12335 know that we're shifting 0...31/63 ones into a -1. */
12336 emit_move_insn (low[0], constm1_rtx);
12338 emit_move_insn (high[0], low[0]);
12340 emit_move_insn (high[0], constm1_rtx);
12344 if (!rtx_equal_p (operands[0], operands[1]))
12345 emit_move_insn (operands[0], operands[1]);
12347 (mode == DImode ? split_di : split_ti) (operands, 1, low, high);
12348 emit_insn ((mode == DImode
12350 : gen_x86_64_shld) (high[0], low[0], operands[2]));
12353 emit_insn ((mode == DImode ? gen_ashlsi3 : gen_ashldi3) (low[0], low[0], operands[2]));
12355 if (TARGET_CMOVE && scratch)
12357 ix86_expand_clear (scratch);
12358 emit_insn ((mode == DImode
12359 ? gen_x86_shift_adj_1
12360 : gen_x86_64_shift_adj) (high[0], low[0], operands[2], scratch));
12363 emit_insn (gen_x86_shift_adj_2 (high[0], low[0], operands[2]));
12367 ix86_split_ashr (rtx *operands, rtx scratch, enum machine_mode mode)
12369 rtx low[2], high[2];
12371 const int single_width = mode == DImode ? 32 : 64;
12373 if (GET_CODE (operands[2]) == CONST_INT)
12375 (mode == DImode ? split_di : split_ti) (operands, 2, low, high);
12376 count = INTVAL (operands[2]) & (single_width * 2 - 1);
12378 if (count == single_width * 2 - 1)
12380 emit_move_insn (high[0], high[1]);
12381 emit_insn ((mode == DImode
12383 : gen_ashrdi3) (high[0], high[0],
12384 GEN_INT (single_width - 1)));
12385 emit_move_insn (low[0], high[0]);
12388 else if (count >= single_width)
12390 emit_move_insn (low[0], high[1]);
12391 emit_move_insn (high[0], low[0]);
12392 emit_insn ((mode == DImode
12394 : gen_ashrdi3) (high[0], high[0],
12395 GEN_INT (single_width - 1)));
12396 if (count > single_width)
12397 emit_insn ((mode == DImode
12399 : gen_ashrdi3) (low[0], low[0],
12400 GEN_INT (count - single_width)));
12404 if (!rtx_equal_p (operands[0], operands[1]))
12405 emit_move_insn (operands[0], operands[1]);
12406 emit_insn ((mode == DImode
12408 : gen_x86_64_shrd) (low[0], high[0], GEN_INT (count)));
12409 emit_insn ((mode == DImode
12411 : gen_ashrdi3) (high[0], high[0], GEN_INT (count)));
12416 if (!rtx_equal_p (operands[0], operands[1]))
12417 emit_move_insn (operands[0], operands[1]);
12419 (mode == DImode ? split_di : split_ti) (operands, 1, low, high);
12421 emit_insn ((mode == DImode
12423 : gen_x86_64_shrd) (low[0], high[0], operands[2]));
12424 emit_insn ((mode == DImode
12426 : gen_ashrdi3) (high[0], high[0], operands[2]));
12428 if (TARGET_CMOVE && scratch)
12430 emit_move_insn (scratch, high[0]);
12431 emit_insn ((mode == DImode
12433 : gen_ashrdi3) (scratch, scratch,
12434 GEN_INT (single_width - 1)));
12435 emit_insn ((mode == DImode
12436 ? gen_x86_shift_adj_1
12437 : gen_x86_64_shift_adj) (low[0], high[0], operands[2],
12441 emit_insn (gen_x86_shift_adj_3 (low[0], high[0], operands[2]));
12446 ix86_split_lshr (rtx *operands, rtx scratch, enum machine_mode mode)
12448 rtx low[2], high[2];
12450 const int single_width = mode == DImode ? 32 : 64;
12452 if (GET_CODE (operands[2]) == CONST_INT)
12454 (mode == DImode ? split_di : split_ti) (operands, 2, low, high);
12455 count = INTVAL (operands[2]) & (single_width * 2 - 1);
12457 if (count >= single_width)
12459 emit_move_insn (low[0], high[1]);
12460 ix86_expand_clear (high[0]);
12462 if (count > single_width)
12463 emit_insn ((mode == DImode
12465 : gen_lshrdi3) (low[0], low[0],
12466 GEN_INT (count - single_width)));
12470 if (!rtx_equal_p (operands[0], operands[1]))
12471 emit_move_insn (operands[0], operands[1]);
12472 emit_insn ((mode == DImode
12474 : gen_x86_64_shrd) (low[0], high[0], GEN_INT (count)));
12475 emit_insn ((mode == DImode
12477 : gen_lshrdi3) (high[0], high[0], GEN_INT (count)));
12482 if (!rtx_equal_p (operands[0], operands[1]))
12483 emit_move_insn (operands[0], operands[1]);
12485 (mode == DImode ? split_di : split_ti) (operands, 1, low, high);
12487 emit_insn ((mode == DImode
12489 : gen_x86_64_shrd) (low[0], high[0], operands[2]));
12490 emit_insn ((mode == DImode
12492 : gen_lshrdi3) (high[0], high[0], operands[2]));
12494 /* Heh. By reversing the arguments, we can reuse this pattern. */
12495 if (TARGET_CMOVE && scratch)
12497 ix86_expand_clear (scratch);
12498 emit_insn ((mode == DImode
12499 ? gen_x86_shift_adj_1
12500 : gen_x86_64_shift_adj) (low[0], high[0], operands[2],
12504 emit_insn (gen_x86_shift_adj_2 (low[0], high[0], operands[2]));
12508 /* Helper function for the string operations below. Dest VARIABLE whether
12509 it is aligned to VALUE bytes. If true, jump to the label. */
12511 ix86_expand_aligntest (rtx variable, int value)
12513 rtx label = gen_label_rtx ();
12514 rtx tmpcount = gen_reg_rtx (GET_MODE (variable));
12515 if (GET_MODE (variable) == DImode)
12516 emit_insn (gen_anddi3 (tmpcount, variable, GEN_INT (value)));
12518 emit_insn (gen_andsi3 (tmpcount, variable, GEN_INT (value)));
12519 emit_cmp_and_jump_insns (tmpcount, const0_rtx, EQ, 0, GET_MODE (variable),
12524 /* Adjust COUNTER by the VALUE. */
12526 ix86_adjust_counter (rtx countreg, HOST_WIDE_INT value)
12528 if (GET_MODE (countreg) == DImode)
12529 emit_insn (gen_adddi3 (countreg, countreg, GEN_INT (-value)));
12531 emit_insn (gen_addsi3 (countreg, countreg, GEN_INT (-value)));
12534 /* Zero extend possibly SImode EXP to Pmode register. */
12536 ix86_zero_extend_to_Pmode (rtx exp)
12539 if (GET_MODE (exp) == VOIDmode)
12540 return force_reg (Pmode, exp);
12541 if (GET_MODE (exp) == Pmode)
12542 return copy_to_mode_reg (Pmode, exp);
12543 r = gen_reg_rtx (Pmode);
12544 emit_insn (gen_zero_extendsidi2 (r, exp));
12548 /* Expand string move (memcpy) operation. Use i386 string operations when
12549 profitable. expand_clrmem contains similar code. */
12551 ix86_expand_movmem (rtx dst, rtx src, rtx count_exp, rtx align_exp)
12553 rtx srcreg, destreg, countreg, srcexp, destexp;
12554 enum machine_mode counter_mode;
12555 HOST_WIDE_INT align = 0;
12556 unsigned HOST_WIDE_INT count = 0;
12558 if (GET_CODE (align_exp) == CONST_INT)
12559 align = INTVAL (align_exp);
12561 /* Can't use any of this if the user has appropriated esi or edi. */
12562 if (global_regs[4] || global_regs[5])
12565 /* This simple hack avoids all inlining code and simplifies code below. */
12566 if (!TARGET_ALIGN_STRINGOPS)
12569 if (GET_CODE (count_exp) == CONST_INT)
12571 count = INTVAL (count_exp);
12572 if (!TARGET_INLINE_ALL_STRINGOPS && count > 64)
12576 /* Figure out proper mode for counter. For 32bits it is always SImode,
12577 for 64bits use SImode when possible, otherwise DImode.
12578 Set count to number of bytes copied when known at compile time. */
12580 || GET_MODE (count_exp) == SImode
12581 || x86_64_zext_immediate_operand (count_exp, VOIDmode))
12582 counter_mode = SImode;
12584 counter_mode = DImode;
12586 gcc_assert (counter_mode == SImode || counter_mode == DImode);
12588 destreg = copy_to_mode_reg (Pmode, XEXP (dst, 0));
12589 if (destreg != XEXP (dst, 0))
12590 dst = replace_equiv_address_nv (dst, destreg);
12591 srcreg = copy_to_mode_reg (Pmode, XEXP (src, 0));
12592 if (srcreg != XEXP (src, 0))
12593 src = replace_equiv_address_nv (src, srcreg);
12595 /* When optimizing for size emit simple rep ; movsb instruction for
12596 counts not divisible by 4, except when (movsl;)*(movsw;)?(movsb;)?
12597 sequence is shorter than mov{b,l} $count, %{ecx,cl}; rep; movsb.
12598 Sice of (movsl;)*(movsw;)?(movsb;)? sequence is
12599 count / 4 + (count & 3), the other sequence is either 4 or 7 bytes,
12600 but we don't know whether upper 24 (resp. 56) bits of %ecx will be
12601 known to be zero or not. The rep; movsb sequence causes higher
12602 register pressure though, so take that into account. */
12604 if ((!optimize || optimize_size)
12609 || (count & 3) + count / 4 > 6))))
12611 emit_insn (gen_cld ());
12612 countreg = ix86_zero_extend_to_Pmode (count_exp);
12613 destexp = gen_rtx_PLUS (Pmode, destreg, countreg);
12614 srcexp = gen_rtx_PLUS (Pmode, srcreg, countreg);
12615 emit_insn (gen_rep_mov (destreg, dst, srcreg, src, countreg,
12619 /* For constant aligned (or small unaligned) copies use rep movsl
12620 followed by code copying the rest. For PentiumPro ensure 8 byte
12621 alignment to allow rep movsl acceleration. */
12623 else if (count != 0
12625 || (!TARGET_PENTIUMPRO && !TARGET_64BIT && align >= 4)
12626 || optimize_size || count < (unsigned int) 64))
12628 unsigned HOST_WIDE_INT offset = 0;
12629 int size = TARGET_64BIT && !optimize_size ? 8 : 4;
12630 rtx srcmem, dstmem;
12632 emit_insn (gen_cld ());
12633 if (count & ~(size - 1))
12635 if ((TARGET_SINGLE_STRINGOP || optimize_size) && count < 5 * 4)
12637 enum machine_mode movs_mode = size == 4 ? SImode : DImode;
12639 while (offset < (count & ~(size - 1)))
12641 srcmem = adjust_automodify_address_nv (src, movs_mode,
12643 dstmem = adjust_automodify_address_nv (dst, movs_mode,
12645 emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
12651 countreg = GEN_INT ((count >> (size == 4 ? 2 : 3))
12652 & (TARGET_64BIT ? -1 : 0x3fffffff));
12653 countreg = copy_to_mode_reg (counter_mode, countreg);
12654 countreg = ix86_zero_extend_to_Pmode (countreg);
12656 destexp = gen_rtx_ASHIFT (Pmode, countreg,
12657 GEN_INT (size == 4 ? 2 : 3));
12658 srcexp = gen_rtx_PLUS (Pmode, destexp, srcreg);
12659 destexp = gen_rtx_PLUS (Pmode, destexp, destreg);
12661 emit_insn (gen_rep_mov (destreg, dst, srcreg, src,
12662 countreg, destexp, srcexp));
12663 offset = count & ~(size - 1);
12666 if (size == 8 && (count & 0x04))
12668 srcmem = adjust_automodify_address_nv (src, SImode, srcreg,
12670 dstmem = adjust_automodify_address_nv (dst, SImode, destreg,
12672 emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
12677 srcmem = adjust_automodify_address_nv (src, HImode, srcreg,
12679 dstmem = adjust_automodify_address_nv (dst, HImode, destreg,
12681 emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
12686 srcmem = adjust_automodify_address_nv (src, QImode, srcreg,
12688 dstmem = adjust_automodify_address_nv (dst, QImode, destreg,
12690 emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
12693 /* The generic code based on the glibc implementation:
12694 - align destination to 4 bytes (8 byte alignment is used for PentiumPro
12695 allowing accelerated copying there)
12696 - copy the data using rep movsl
12697 - copy the rest. */
12702 rtx srcmem, dstmem;
12703 int desired_alignment = (TARGET_PENTIUMPRO
12704 && (count == 0 || count >= (unsigned int) 260)
12705 ? 8 : UNITS_PER_WORD);
12706 /* Get rid of MEM_OFFSETs, they won't be accurate. */
12707 dst = change_address (dst, BLKmode, destreg);
12708 src = change_address (src, BLKmode, srcreg);
12710 /* In case we don't know anything about the alignment, default to
12711 library version, since it is usually equally fast and result in
12714 Also emit call when we know that the count is large and call overhead
12715 will not be important. */
12716 if (!TARGET_INLINE_ALL_STRINGOPS
12717 && (align < UNITS_PER_WORD || !TARGET_REP_MOVL_OPTIMAL))
12720 if (TARGET_SINGLE_STRINGOP)
12721 emit_insn (gen_cld ());
12723 countreg2 = gen_reg_rtx (Pmode);
12724 countreg = copy_to_mode_reg (counter_mode, count_exp);
12726 /* We don't use loops to align destination and to copy parts smaller
12727 than 4 bytes, because gcc is able to optimize such code better (in
12728 the case the destination or the count really is aligned, gcc is often
12729 able to predict the branches) and also it is friendlier to the
12730 hardware branch prediction.
12732 Using loops is beneficial for generic case, because we can
12733 handle small counts using the loops. Many CPUs (such as Athlon)
12734 have large REP prefix setup costs.
12736 This is quite costly. Maybe we can revisit this decision later or
12737 add some customizability to this code. */
12739 if (count == 0 && align < desired_alignment)
12741 label = gen_label_rtx ();
12742 emit_cmp_and_jump_insns (countreg, GEN_INT (desired_alignment - 1),
12743 LEU, 0, counter_mode, 1, label);
12747 rtx label = ix86_expand_aligntest (destreg, 1);
12748 srcmem = change_address (src, QImode, srcreg);
12749 dstmem = change_address (dst, QImode, destreg);
12750 emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
12751 ix86_adjust_counter (countreg, 1);
12752 emit_label (label);
12753 LABEL_NUSES (label) = 1;
12757 rtx label = ix86_expand_aligntest (destreg, 2);
12758 srcmem = change_address (src, HImode, srcreg);
12759 dstmem = change_address (dst, HImode, destreg);
12760 emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
12761 ix86_adjust_counter (countreg, 2);
12762 emit_label (label);
12763 LABEL_NUSES (label) = 1;
12765 if (align <= 4 && desired_alignment > 4)
12767 rtx label = ix86_expand_aligntest (destreg, 4);
12768 srcmem = change_address (src, SImode, srcreg);
12769 dstmem = change_address (dst, SImode, destreg);
12770 emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
12771 ix86_adjust_counter (countreg, 4);
12772 emit_label (label);
12773 LABEL_NUSES (label) = 1;
12776 if (label && desired_alignment > 4 && !TARGET_64BIT)
12778 emit_label (label);
12779 LABEL_NUSES (label) = 1;
12782 if (!TARGET_SINGLE_STRINGOP)
12783 emit_insn (gen_cld ());
12786 emit_insn (gen_lshrdi3 (countreg2, ix86_zero_extend_to_Pmode (countreg),
12788 destexp = gen_rtx_ASHIFT (Pmode, countreg2, GEN_INT (3));
12792 emit_insn (gen_lshrsi3 (countreg2, countreg, const2_rtx));
12793 destexp = gen_rtx_ASHIFT (Pmode, countreg2, const2_rtx);
12795 srcexp = gen_rtx_PLUS (Pmode, destexp, srcreg);
12796 destexp = gen_rtx_PLUS (Pmode, destexp, destreg);
12797 emit_insn (gen_rep_mov (destreg, dst, srcreg, src,
12798 countreg2, destexp, srcexp));
12802 emit_label (label);
12803 LABEL_NUSES (label) = 1;
12805 if (TARGET_64BIT && align > 4 && count != 0 && (count & 4))
12807 srcmem = change_address (src, SImode, srcreg);
12808 dstmem = change_address (dst, SImode, destreg);
12809 emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
12811 if ((align <= 4 || count == 0) && TARGET_64BIT)
12813 rtx label = ix86_expand_aligntest (countreg, 4);
12814 srcmem = change_address (src, SImode, srcreg);
12815 dstmem = change_address (dst, SImode, destreg);
12816 emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
12817 emit_label (label);
12818 LABEL_NUSES (label) = 1;
12820 if (align > 2 && count != 0 && (count & 2))
12822 srcmem = change_address (src, HImode, srcreg);
12823 dstmem = change_address (dst, HImode, destreg);
12824 emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
12826 if (align <= 2 || count == 0)
12828 rtx label = ix86_expand_aligntest (countreg, 2);
12829 srcmem = change_address (src, HImode, srcreg);
12830 dstmem = change_address (dst, HImode, destreg);
12831 emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
12832 emit_label (label);
12833 LABEL_NUSES (label) = 1;
12835 if (align > 1 && count != 0 && (count & 1))
12837 srcmem = change_address (src, QImode, srcreg);
12838 dstmem = change_address (dst, QImode, destreg);
12839 emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
12841 if (align <= 1 || count == 0)
12843 rtx label = ix86_expand_aligntest (countreg, 1);
12844 srcmem = change_address (src, QImode, srcreg);
12845 dstmem = change_address (dst, QImode, destreg);
12846 emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
12847 emit_label (label);
12848 LABEL_NUSES (label) = 1;
12855 /* Expand string clear operation (bzero). Use i386 string operations when
12856 profitable. expand_movmem contains similar code. */
12858 ix86_expand_clrmem (rtx dst, rtx count_exp, rtx align_exp)
12860 rtx destreg, zeroreg, countreg, destexp;
12861 enum machine_mode counter_mode;
12862 HOST_WIDE_INT align = 0;
12863 unsigned HOST_WIDE_INT count = 0;
12865 if (GET_CODE (align_exp) == CONST_INT)
12866 align = INTVAL (align_exp);
12868 /* Can't use any of this if the user has appropriated esi. */
12869 if (global_regs[4])
12872 /* This simple hack avoids all inlining code and simplifies code below. */
12873 if (!TARGET_ALIGN_STRINGOPS)
12876 if (GET_CODE (count_exp) == CONST_INT)
12878 count = INTVAL (count_exp);
12879 if (!TARGET_INLINE_ALL_STRINGOPS && count > 64)
12882 /* Figure out proper mode for counter. For 32bits it is always SImode,
12883 for 64bits use SImode when possible, otherwise DImode.
12884 Set count to number of bytes copied when known at compile time. */
12886 || GET_MODE (count_exp) == SImode
12887 || x86_64_zext_immediate_operand (count_exp, VOIDmode))
12888 counter_mode = SImode;
12890 counter_mode = DImode;
12892 destreg = copy_to_mode_reg (Pmode, XEXP (dst, 0));
12893 if (destreg != XEXP (dst, 0))
12894 dst = replace_equiv_address_nv (dst, destreg);
12897 /* When optimizing for size emit simple rep ; movsb instruction for
12898 counts not divisible by 4. The movl $N, %ecx; rep; stosb
12899 sequence is 7 bytes long, so if optimizing for size and count is
12900 small enough that some stosl, stosw and stosb instructions without
12901 rep are shorter, fall back into the next if. */
12903 if ((!optimize || optimize_size)
12906 && (!optimize_size || (count & 0x03) + (count >> 2) > 7))))
12908 emit_insn (gen_cld ());
12910 countreg = ix86_zero_extend_to_Pmode (count_exp);
12911 zeroreg = copy_to_mode_reg (QImode, const0_rtx);
12912 destexp = gen_rtx_PLUS (Pmode, destreg, countreg);
12913 emit_insn (gen_rep_stos (destreg, countreg, dst, zeroreg, destexp));
12915 else if (count != 0
12917 || (!TARGET_PENTIUMPRO && !TARGET_64BIT && align >= 4)
12918 || optimize_size || count < (unsigned int) 64))
12920 int size = TARGET_64BIT && !optimize_size ? 8 : 4;
12921 unsigned HOST_WIDE_INT offset = 0;
12923 emit_insn (gen_cld ());
12925 zeroreg = copy_to_mode_reg (size == 4 ? SImode : DImode, const0_rtx);
12926 if (count & ~(size - 1))
12928 unsigned HOST_WIDE_INT repcount;
12929 unsigned int max_nonrep;
12931 repcount = count >> (size == 4 ? 2 : 3);
12933 repcount &= 0x3fffffff;
12935 /* movl $N, %ecx; rep; stosl is 7 bytes, while N x stosl is N bytes.
12936 movl $N, %ecx; rep; stosq is 8 bytes, while N x stosq is 2xN
12937 bytes. In both cases the latter seems to be faster for small
12939 max_nonrep = size == 4 ? 7 : 4;
12940 if (!optimize_size)
12943 case PROCESSOR_PENTIUM4:
12944 case PROCESSOR_NOCONA:
12951 if (repcount <= max_nonrep)
12952 while (repcount-- > 0)
12954 rtx mem = adjust_automodify_address_nv (dst,
12955 GET_MODE (zeroreg),
12957 emit_insn (gen_strset (destreg, mem, zeroreg));
12962 countreg = copy_to_mode_reg (counter_mode, GEN_INT (repcount));
12963 countreg = ix86_zero_extend_to_Pmode (countreg);
12964 destexp = gen_rtx_ASHIFT (Pmode, countreg,
12965 GEN_INT (size == 4 ? 2 : 3));
12966 destexp = gen_rtx_PLUS (Pmode, destexp, destreg);
12967 emit_insn (gen_rep_stos (destreg, countreg, dst, zeroreg,
12969 offset = count & ~(size - 1);
12972 if (size == 8 && (count & 0x04))
12974 rtx mem = adjust_automodify_address_nv (dst, SImode, destreg,
12976 emit_insn (gen_strset (destreg, mem,
12977 gen_rtx_SUBREG (SImode, zeroreg, 0)));
12982 rtx mem = adjust_automodify_address_nv (dst, HImode, destreg,
12984 emit_insn (gen_strset (destreg, mem,
12985 gen_rtx_SUBREG (HImode, zeroreg, 0)));
12990 rtx mem = adjust_automodify_address_nv (dst, QImode, destreg,
12992 emit_insn (gen_strset (destreg, mem,
12993 gen_rtx_SUBREG (QImode, zeroreg, 0)));
13000 /* Compute desired alignment of the string operation. */
13001 int desired_alignment = (TARGET_PENTIUMPRO
13002 && (count == 0 || count >= (unsigned int) 260)
13003 ? 8 : UNITS_PER_WORD);
13005 /* In case we don't know anything about the alignment, default to
13006 library version, since it is usually equally fast and result in
13009 Also emit call when we know that the count is large and call overhead
13010 will not be important. */
13011 if (!TARGET_INLINE_ALL_STRINGOPS
13012 && (align < UNITS_PER_WORD || !TARGET_REP_MOVL_OPTIMAL))
13015 if (TARGET_SINGLE_STRINGOP)
13016 emit_insn (gen_cld ());
13018 countreg2 = gen_reg_rtx (Pmode);
13019 countreg = copy_to_mode_reg (counter_mode, count_exp);
13020 zeroreg = copy_to_mode_reg (Pmode, const0_rtx);
13021 /* Get rid of MEM_OFFSET, it won't be accurate. */
13022 dst = change_address (dst, BLKmode, destreg);
13024 if (count == 0 && align < desired_alignment)
13026 label = gen_label_rtx ();
13027 emit_cmp_and_jump_insns (countreg, GEN_INT (desired_alignment - 1),
13028 LEU, 0, counter_mode, 1, label);
13032 rtx label = ix86_expand_aligntest (destreg, 1);
13033 emit_insn (gen_strset (destreg, dst,
13034 gen_rtx_SUBREG (QImode, zeroreg, 0)));
13035 ix86_adjust_counter (countreg, 1);
13036 emit_label (label);
13037 LABEL_NUSES (label) = 1;
13041 rtx label = ix86_expand_aligntest (destreg, 2);
13042 emit_insn (gen_strset (destreg, dst,
13043 gen_rtx_SUBREG (HImode, zeroreg, 0)));
13044 ix86_adjust_counter (countreg, 2);
13045 emit_label (label);
13046 LABEL_NUSES (label) = 1;
13048 if (align <= 4 && desired_alignment > 4)
13050 rtx label = ix86_expand_aligntest (destreg, 4);
13051 emit_insn (gen_strset (destreg, dst,
13053 ? gen_rtx_SUBREG (SImode, zeroreg, 0)
13055 ix86_adjust_counter (countreg, 4);
13056 emit_label (label);
13057 LABEL_NUSES (label) = 1;
13060 if (label && desired_alignment > 4 && !TARGET_64BIT)
13062 emit_label (label);
13063 LABEL_NUSES (label) = 1;
13067 if (!TARGET_SINGLE_STRINGOP)
13068 emit_insn (gen_cld ());
13071 emit_insn (gen_lshrdi3 (countreg2, ix86_zero_extend_to_Pmode (countreg),
13073 destexp = gen_rtx_ASHIFT (Pmode, countreg2, GEN_INT (3));
13077 emit_insn (gen_lshrsi3 (countreg2, countreg, const2_rtx));
13078 destexp = gen_rtx_ASHIFT (Pmode, countreg2, const2_rtx);
13080 destexp = gen_rtx_PLUS (Pmode, destexp, destreg);
13081 emit_insn (gen_rep_stos (destreg, countreg2, dst, zeroreg, destexp));
13085 emit_label (label);
13086 LABEL_NUSES (label) = 1;
13089 if (TARGET_64BIT && align > 4 && count != 0 && (count & 4))
13090 emit_insn (gen_strset (destreg, dst,
13091 gen_rtx_SUBREG (SImode, zeroreg, 0)));
13092 if (TARGET_64BIT && (align <= 4 || count == 0))
13094 rtx label = ix86_expand_aligntest (countreg, 4);
13095 emit_insn (gen_strset (destreg, dst,
13096 gen_rtx_SUBREG (SImode, zeroreg, 0)));
13097 emit_label (label);
13098 LABEL_NUSES (label) = 1;
13100 if (align > 2 && count != 0 && (count & 2))
13101 emit_insn (gen_strset (destreg, dst,
13102 gen_rtx_SUBREG (HImode, zeroreg, 0)));
13103 if (align <= 2 || count == 0)
13105 rtx label = ix86_expand_aligntest (countreg, 2);
13106 emit_insn (gen_strset (destreg, dst,
13107 gen_rtx_SUBREG (HImode, zeroreg, 0)));
13108 emit_label (label);
13109 LABEL_NUSES (label) = 1;
13111 if (align > 1 && count != 0 && (count & 1))
13112 emit_insn (gen_strset (destreg, dst,
13113 gen_rtx_SUBREG (QImode, zeroreg, 0)));
13114 if (align <= 1 || count == 0)
13116 rtx label = ix86_expand_aligntest (countreg, 1);
13117 emit_insn (gen_strset (destreg, dst,
13118 gen_rtx_SUBREG (QImode, zeroreg, 0)));
13119 emit_label (label);
13120 LABEL_NUSES (label) = 1;
13126 /* Expand strlen. */
13128 ix86_expand_strlen (rtx out, rtx src, rtx eoschar, rtx align)
13130 rtx addr, scratch1, scratch2, scratch3, scratch4;
13132 /* The generic case of strlen expander is long. Avoid it's
13133 expanding unless TARGET_INLINE_ALL_STRINGOPS. */
13135 if (TARGET_UNROLL_STRLEN && eoschar == const0_rtx && optimize > 1
13136 && !TARGET_INLINE_ALL_STRINGOPS
13138 && (GET_CODE (align) != CONST_INT || INTVAL (align) < 4))
13141 addr = force_reg (Pmode, XEXP (src, 0));
13142 scratch1 = gen_reg_rtx (Pmode);
13144 if (TARGET_UNROLL_STRLEN && eoschar == const0_rtx && optimize > 1
13147 /* Well it seems that some optimizer does not combine a call like
13148 foo(strlen(bar), strlen(bar));
13149 when the move and the subtraction is done here. It does calculate
13150 the length just once when these instructions are done inside of
13151 output_strlen_unroll(). But I think since &bar[strlen(bar)] is
13152 often used and I use one fewer register for the lifetime of
13153 output_strlen_unroll() this is better. */
13155 emit_move_insn (out, addr);
13157 ix86_expand_strlensi_unroll_1 (out, src, align);
13159 /* strlensi_unroll_1 returns the address of the zero at the end of
13160 the string, like memchr(), so compute the length by subtracting
13161 the start address. */
13163 emit_insn (gen_subdi3 (out, out, addr));
13165 emit_insn (gen_subsi3 (out, out, addr));
13170 scratch2 = gen_reg_rtx (Pmode);
13171 scratch3 = gen_reg_rtx (Pmode);
13172 scratch4 = force_reg (Pmode, constm1_rtx);
13174 emit_move_insn (scratch3, addr);
13175 eoschar = force_reg (QImode, eoschar);
13177 emit_insn (gen_cld ());
13178 src = replace_equiv_address_nv (src, scratch3);
13180 /* If .md starts supporting :P, this can be done in .md. */
13181 unspec = gen_rtx_UNSPEC (Pmode, gen_rtvec (4, src, eoschar, align,
13182 scratch4), UNSPEC_SCAS);
13183 emit_insn (gen_strlenqi_1 (scratch1, scratch3, unspec));
13186 emit_insn (gen_one_cmpldi2 (scratch2, scratch1));
13187 emit_insn (gen_adddi3 (out, scratch2, constm1_rtx));
13191 emit_insn (gen_one_cmplsi2 (scratch2, scratch1));
13192 emit_insn (gen_addsi3 (out, scratch2, constm1_rtx));
13198 /* Expand the appropriate insns for doing strlen if not just doing
13201 out = result, initialized with the start address
13202 align_rtx = alignment of the address.
13203 scratch = scratch register, initialized with the startaddress when
13204 not aligned, otherwise undefined
13206 This is just the body. It needs the initializations mentioned above and
13207 some address computing at the end. These things are done in i386.md. */
13210 ix86_expand_strlensi_unroll_1 (rtx out, rtx src, rtx align_rtx)
13214 rtx align_2_label = NULL_RTX;
13215 rtx align_3_label = NULL_RTX;
13216 rtx align_4_label = gen_label_rtx ();
13217 rtx end_0_label = gen_label_rtx ();
13219 rtx tmpreg = gen_reg_rtx (SImode);
13220 rtx scratch = gen_reg_rtx (SImode);
13224 if (GET_CODE (align_rtx) == CONST_INT)
13225 align = INTVAL (align_rtx);
13227 /* Loop to check 1..3 bytes for null to get an aligned pointer. */
13229 /* Is there a known alignment and is it less than 4? */
13232 rtx scratch1 = gen_reg_rtx (Pmode);
13233 emit_move_insn (scratch1, out);
13234 /* Is there a known alignment and is it not 2? */
13237 align_3_label = gen_label_rtx (); /* Label when aligned to 3-byte */
13238 align_2_label = gen_label_rtx (); /* Label when aligned to 2-byte */
13240 /* Leave just the 3 lower bits. */
13241 align_rtx = expand_binop (Pmode, and_optab, scratch1, GEN_INT (3),
13242 NULL_RTX, 0, OPTAB_WIDEN);
13244 emit_cmp_and_jump_insns (align_rtx, const0_rtx, EQ, NULL,
13245 Pmode, 1, align_4_label);
13246 emit_cmp_and_jump_insns (align_rtx, const2_rtx, EQ, NULL,
13247 Pmode, 1, align_2_label);
13248 emit_cmp_and_jump_insns (align_rtx, const2_rtx, GTU, NULL,
13249 Pmode, 1, align_3_label);
13253 /* Since the alignment is 2, we have to check 2 or 0 bytes;
13254 check if is aligned to 4 - byte. */
13256 align_rtx = expand_binop (Pmode, and_optab, scratch1, const2_rtx,
13257 NULL_RTX, 0, OPTAB_WIDEN);
13259 emit_cmp_and_jump_insns (align_rtx, const0_rtx, EQ, NULL,
13260 Pmode, 1, align_4_label);
13263 mem = change_address (src, QImode, out);
13265 /* Now compare the bytes. */
13267 /* Compare the first n unaligned byte on a byte per byte basis. */
13268 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL,
13269 QImode, 1, end_0_label);
13271 /* Increment the address. */
13273 emit_insn (gen_adddi3 (out, out, const1_rtx));
13275 emit_insn (gen_addsi3 (out, out, const1_rtx));
13277 /* Not needed with an alignment of 2 */
13280 emit_label (align_2_label);
13282 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL, QImode, 1,
13286 emit_insn (gen_adddi3 (out, out, const1_rtx));
13288 emit_insn (gen_addsi3 (out, out, const1_rtx));
13290 emit_label (align_3_label);
13293 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL, QImode, 1,
13297 emit_insn (gen_adddi3 (out, out, const1_rtx));
13299 emit_insn (gen_addsi3 (out, out, const1_rtx));
13302 /* Generate loop to check 4 bytes at a time. It is not a good idea to
13303 align this loop. It gives only huge programs, but does not help to
13305 emit_label (align_4_label);
13307 mem = change_address (src, SImode, out);
13308 emit_move_insn (scratch, mem);
13310 emit_insn (gen_adddi3 (out, out, GEN_INT (4)));
13312 emit_insn (gen_addsi3 (out, out, GEN_INT (4)));
13314 /* This formula yields a nonzero result iff one of the bytes is zero.
13315 This saves three branches inside loop and many cycles. */
13317 emit_insn (gen_addsi3 (tmpreg, scratch, GEN_INT (-0x01010101)));
13318 emit_insn (gen_one_cmplsi2 (scratch, scratch));
13319 emit_insn (gen_andsi3 (tmpreg, tmpreg, scratch));
13320 emit_insn (gen_andsi3 (tmpreg, tmpreg,
13321 gen_int_mode (0x80808080, SImode)));
13322 emit_cmp_and_jump_insns (tmpreg, const0_rtx, EQ, 0, SImode, 1,
13327 rtx reg = gen_reg_rtx (SImode);
13328 rtx reg2 = gen_reg_rtx (Pmode);
13329 emit_move_insn (reg, tmpreg);
13330 emit_insn (gen_lshrsi3 (reg, reg, GEN_INT (16)));
13332 /* If zero is not in the first two bytes, move two bytes forward. */
13333 emit_insn (gen_testsi_ccno_1 (tmpreg, GEN_INT (0x8080)));
13334 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
13335 tmp = gen_rtx_EQ (VOIDmode, tmp, const0_rtx);
13336 emit_insn (gen_rtx_SET (VOIDmode, tmpreg,
13337 gen_rtx_IF_THEN_ELSE (SImode, tmp,
13340 /* Emit lea manually to avoid clobbering of flags. */
13341 emit_insn (gen_rtx_SET (SImode, reg2,
13342 gen_rtx_PLUS (Pmode, out, const2_rtx)));
13344 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
13345 tmp = gen_rtx_EQ (VOIDmode, tmp, const0_rtx);
13346 emit_insn (gen_rtx_SET (VOIDmode, out,
13347 gen_rtx_IF_THEN_ELSE (Pmode, tmp,
13354 rtx end_2_label = gen_label_rtx ();
13355 /* Is zero in the first two bytes? */
13357 emit_insn (gen_testsi_ccno_1 (tmpreg, GEN_INT (0x8080)));
13358 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
13359 tmp = gen_rtx_NE (VOIDmode, tmp, const0_rtx);
13360 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
13361 gen_rtx_LABEL_REF (VOIDmode, end_2_label),
13363 tmp = emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
13364 JUMP_LABEL (tmp) = end_2_label;
13366 /* Not in the first two. Move two bytes forward. */
13367 emit_insn (gen_lshrsi3 (tmpreg, tmpreg, GEN_INT (16)));
13369 emit_insn (gen_adddi3 (out, out, const2_rtx));
13371 emit_insn (gen_addsi3 (out, out, const2_rtx));
13373 emit_label (end_2_label);
13377 /* Avoid branch in fixing the byte. */
13378 tmpreg = gen_lowpart (QImode, tmpreg);
13379 emit_insn (gen_addqi3_cc (tmpreg, tmpreg, tmpreg));
13380 cmp = gen_rtx_LTU (Pmode, gen_rtx_REG (CCmode, 17), const0_rtx);
13382 emit_insn (gen_subdi3_carry_rex64 (out, out, GEN_INT (3), cmp));
13384 emit_insn (gen_subsi3_carry (out, out, GEN_INT (3), cmp));
13386 emit_label (end_0_label);
13390 ix86_expand_call (rtx retval, rtx fnaddr, rtx callarg1,
13391 rtx callarg2 ATTRIBUTE_UNUSED,
13392 rtx pop, int sibcall)
13394 rtx use = NULL, call;
13396 if (pop == const0_rtx)
13398 gcc_assert (!TARGET_64BIT || !pop);
13400 if (TARGET_MACHO && !TARGET_64BIT)
13403 if (flag_pic && GET_CODE (XEXP (fnaddr, 0)) == SYMBOL_REF)
13404 fnaddr = machopic_indirect_call_target (fnaddr);
13409 /* Static functions and indirect calls don't need the pic register. */
13410 if (! TARGET_64BIT && flag_pic
13411 && GET_CODE (XEXP (fnaddr, 0)) == SYMBOL_REF
13412 && ! SYMBOL_REF_LOCAL_P (XEXP (fnaddr, 0)))
13413 use_reg (&use, pic_offset_table_rtx);
13416 if (TARGET_64BIT && INTVAL (callarg2) >= 0)
13418 rtx al = gen_rtx_REG (QImode, 0);
13419 emit_move_insn (al, callarg2);
13420 use_reg (&use, al);
13423 if (! call_insn_operand (XEXP (fnaddr, 0), Pmode))
13425 fnaddr = copy_to_mode_reg (Pmode, XEXP (fnaddr, 0));
13426 fnaddr = gen_rtx_MEM (QImode, fnaddr);
13428 if (sibcall && TARGET_64BIT
13429 && !constant_call_address_operand (XEXP (fnaddr, 0), Pmode))
13432 addr = copy_to_mode_reg (Pmode, XEXP (fnaddr, 0));
13433 fnaddr = gen_rtx_REG (Pmode, FIRST_REX_INT_REG + 3 /* R11 */);
13434 emit_move_insn (fnaddr, addr);
13435 fnaddr = gen_rtx_MEM (QImode, fnaddr);
13438 call = gen_rtx_CALL (VOIDmode, fnaddr, callarg1);
13440 call = gen_rtx_SET (VOIDmode, retval, call);
13443 pop = gen_rtx_PLUS (Pmode, stack_pointer_rtx, pop);
13444 pop = gen_rtx_SET (VOIDmode, stack_pointer_rtx, pop);
13445 call = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, call, pop));
13448 call = emit_call_insn (call);
13450 CALL_INSN_FUNCTION_USAGE (call) = use;
13454 /* Clear stack slot assignments remembered from previous functions.
13455 This is called from INIT_EXPANDERS once before RTL is emitted for each
13458 static struct machine_function *
13459 ix86_init_machine_status (void)
13461 struct machine_function *f;
13463 f = ggc_alloc_cleared (sizeof (struct machine_function));
13464 f->use_fast_prologue_epilogue_nregs = -1;
13465 f->tls_descriptor_call_expanded_p = 0;
13470 /* Return a MEM corresponding to a stack slot with mode MODE.
13471 Allocate a new slot if necessary.
13473 The RTL for a function can have several slots available: N is
13474 which slot to use. */
13477 assign_386_stack_local (enum machine_mode mode, enum ix86_stack_slot n)
13479 struct stack_local_entry *s;
13481 gcc_assert (n < MAX_386_STACK_LOCALS);
13483 /* Virtual slot is valid only before vregs are instantiated. */
13484 gcc_assert ((n == SLOT_VIRTUAL) == !virtuals_instantiated);
13486 for (s = ix86_stack_locals; s; s = s->next)
13487 if (s->mode == mode && s->n == n)
13490 s = (struct stack_local_entry *)
13491 ggc_alloc (sizeof (struct stack_local_entry));
13494 s->rtl = assign_stack_local (mode, GET_MODE_SIZE (mode), 0);
13496 s->next = ix86_stack_locals;
13497 ix86_stack_locals = s;
13501 /* Construct the SYMBOL_REF for the tls_get_addr function. */
13503 static GTY(()) rtx ix86_tls_symbol;
13505 ix86_tls_get_addr (void)
13508 if (!ix86_tls_symbol)
13510 ix86_tls_symbol = gen_rtx_SYMBOL_REF (Pmode,
13511 (TARGET_ANY_GNU_TLS
13513 ? "___tls_get_addr"
13514 : "__tls_get_addr");
13517 return ix86_tls_symbol;
13520 /* Construct the SYMBOL_REF for the _TLS_MODULE_BASE_ symbol. */
13522 static GTY(()) rtx ix86_tls_module_base_symbol;
13524 ix86_tls_module_base (void)
13527 if (!ix86_tls_module_base_symbol)
13529 ix86_tls_module_base_symbol = gen_rtx_SYMBOL_REF (Pmode,
13530 "_TLS_MODULE_BASE_");
13531 SYMBOL_REF_FLAGS (ix86_tls_module_base_symbol)
13532 |= TLS_MODEL_GLOBAL_DYNAMIC << SYMBOL_FLAG_TLS_SHIFT;
13535 return ix86_tls_module_base_symbol;
13538 /* Calculate the length of the memory address in the instruction
13539 encoding. Does not include the one-byte modrm, opcode, or prefix. */
13542 memory_address_length (rtx addr)
13544 struct ix86_address parts;
13545 rtx base, index, disp;
13549 if (GET_CODE (addr) == PRE_DEC
13550 || GET_CODE (addr) == POST_INC
13551 || GET_CODE (addr) == PRE_MODIFY
13552 || GET_CODE (addr) == POST_MODIFY)
13555 ok = ix86_decompose_address (addr, &parts);
13558 if (parts.base && GET_CODE (parts.base) == SUBREG)
13559 parts.base = SUBREG_REG (parts.base);
13560 if (parts.index && GET_CODE (parts.index) == SUBREG)
13561 parts.index = SUBREG_REG (parts.index);
13564 index = parts.index;
13569 - esp as the base always wants an index,
13570 - ebp as the base always wants a displacement. */
13572 /* Register Indirect. */
13573 if (base && !index && !disp)
13575 /* esp (for its index) and ebp (for its displacement) need
13576 the two-byte modrm form. */
13577 if (addr == stack_pointer_rtx
13578 || addr == arg_pointer_rtx
13579 || addr == frame_pointer_rtx
13580 || addr == hard_frame_pointer_rtx)
13584 /* Direct Addressing. */
13585 else if (disp && !base && !index)
13590 /* Find the length of the displacement constant. */
13593 if (base && satisfies_constraint_K (disp))
13598 /* ebp always wants a displacement. */
13599 else if (base == hard_frame_pointer_rtx)
13602 /* An index requires the two-byte modrm form.... */
13604 /* ...like esp, which always wants an index. */
13605 || base == stack_pointer_rtx
13606 || base == arg_pointer_rtx
13607 || base == frame_pointer_rtx)
13614 /* Compute default value for "length_immediate" attribute. When SHORTFORM
13615 is set, expect that insn have 8bit immediate alternative. */
13617 ix86_attr_length_immediate_default (rtx insn, int shortform)
13621 extract_insn_cached (insn);
13622 for (i = recog_data.n_operands - 1; i >= 0; --i)
13623 if (CONSTANT_P (recog_data.operand[i]))
13626 if (shortform && satisfies_constraint_K (recog_data.operand[i]))
13630 switch (get_attr_mode (insn))
13641 /* Immediates for DImode instructions are encoded as 32bit sign extended values. */
13646 fatal_insn ("unknown insn mode", insn);
13652 /* Compute default value for "length_address" attribute. */
13654 ix86_attr_length_address_default (rtx insn)
13658 if (get_attr_type (insn) == TYPE_LEA)
13660 rtx set = PATTERN (insn);
13662 if (GET_CODE (set) == PARALLEL)
13663 set = XVECEXP (set, 0, 0);
13665 gcc_assert (GET_CODE (set) == SET);
13667 return memory_address_length (SET_SRC (set));
13670 extract_insn_cached (insn);
13671 for (i = recog_data.n_operands - 1; i >= 0; --i)
13672 if (GET_CODE (recog_data.operand[i]) == MEM)
13674 return memory_address_length (XEXP (recog_data.operand[i], 0));
13680 /* Return the maximum number of instructions a cpu can issue. */
13683 ix86_issue_rate (void)
13687 case PROCESSOR_PENTIUM:
13691 case PROCESSOR_PENTIUMPRO:
13692 case PROCESSOR_PENTIUM4:
13693 case PROCESSOR_ATHLON:
13695 case PROCESSOR_NOCONA:
13696 case PROCESSOR_GENERIC32:
13697 case PROCESSOR_GENERIC64:
13705 /* A subroutine of ix86_adjust_cost -- return true iff INSN reads flags set
13706 by DEP_INSN and nothing set by DEP_INSN. */
13709 ix86_flags_dependent (rtx insn, rtx dep_insn, enum attr_type insn_type)
13713 /* Simplify the test for uninteresting insns. */
13714 if (insn_type != TYPE_SETCC
13715 && insn_type != TYPE_ICMOV
13716 && insn_type != TYPE_FCMOV
13717 && insn_type != TYPE_IBR)
13720 if ((set = single_set (dep_insn)) != 0)
13722 set = SET_DEST (set);
13725 else if (GET_CODE (PATTERN (dep_insn)) == PARALLEL
13726 && XVECLEN (PATTERN (dep_insn), 0) == 2
13727 && GET_CODE (XVECEXP (PATTERN (dep_insn), 0, 0)) == SET
13728 && GET_CODE (XVECEXP (PATTERN (dep_insn), 0, 1)) == SET)
13730 set = SET_DEST (XVECEXP (PATTERN (dep_insn), 0, 0));
13731 set2 = SET_DEST (XVECEXP (PATTERN (dep_insn), 0, 0));
13736 if (GET_CODE (set) != REG || REGNO (set) != FLAGS_REG)
13739 /* This test is true if the dependent insn reads the flags but
13740 not any other potentially set register. */
13741 if (!reg_overlap_mentioned_p (set, PATTERN (insn)))
13744 if (set2 && reg_overlap_mentioned_p (set2, PATTERN (insn)))
13750 /* A subroutine of ix86_adjust_cost -- return true iff INSN has a memory
13751 address with operands set by DEP_INSN. */
13754 ix86_agi_dependent (rtx insn, rtx dep_insn, enum attr_type insn_type)
13758 if (insn_type == TYPE_LEA
13761 addr = PATTERN (insn);
13763 if (GET_CODE (addr) == PARALLEL)
13764 addr = XVECEXP (addr, 0, 0);
13766 gcc_assert (GET_CODE (addr) == SET);
13768 addr = SET_SRC (addr);
13773 extract_insn_cached (insn);
13774 for (i = recog_data.n_operands - 1; i >= 0; --i)
13775 if (GET_CODE (recog_data.operand[i]) == MEM)
13777 addr = XEXP (recog_data.operand[i], 0);
13784 return modified_in_p (addr, dep_insn);
13788 ix86_adjust_cost (rtx insn, rtx link, rtx dep_insn, int cost)
13790 enum attr_type insn_type, dep_insn_type;
13791 enum attr_memory memory;
13793 int dep_insn_code_number;
13795 /* Anti and output dependencies have zero cost on all CPUs. */
13796 if (REG_NOTE_KIND (link) != 0)
13799 dep_insn_code_number = recog_memoized (dep_insn);
13801 /* If we can't recognize the insns, we can't really do anything. */
13802 if (dep_insn_code_number < 0 || recog_memoized (insn) < 0)
13805 insn_type = get_attr_type (insn);
13806 dep_insn_type = get_attr_type (dep_insn);
13810 case PROCESSOR_PENTIUM:
13811 /* Address Generation Interlock adds a cycle of latency. */
13812 if (ix86_agi_dependent (insn, dep_insn, insn_type))
13815 /* ??? Compares pair with jump/setcc. */
13816 if (ix86_flags_dependent (insn, dep_insn, insn_type))
13819 /* Floating point stores require value to be ready one cycle earlier. */
13820 if (insn_type == TYPE_FMOV
13821 && get_attr_memory (insn) == MEMORY_STORE
13822 && !ix86_agi_dependent (insn, dep_insn, insn_type))
13826 case PROCESSOR_PENTIUMPRO:
13827 memory = get_attr_memory (insn);
13829 /* INT->FP conversion is expensive. */
13830 if (get_attr_fp_int_src (dep_insn))
13833 /* There is one cycle extra latency between an FP op and a store. */
13834 if (insn_type == TYPE_FMOV
13835 && (set = single_set (dep_insn)) != NULL_RTX
13836 && (set2 = single_set (insn)) != NULL_RTX
13837 && rtx_equal_p (SET_DEST (set), SET_SRC (set2))
13838 && GET_CODE (SET_DEST (set2)) == MEM)
13841 /* Show ability of reorder buffer to hide latency of load by executing
13842 in parallel with previous instruction in case
13843 previous instruction is not needed to compute the address. */
13844 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
13845 && !ix86_agi_dependent (insn, dep_insn, insn_type))
13847 /* Claim moves to take one cycle, as core can issue one load
13848 at time and the next load can start cycle later. */
13849 if (dep_insn_type == TYPE_IMOV
13850 || dep_insn_type == TYPE_FMOV)
13858 memory = get_attr_memory (insn);
13860 /* The esp dependency is resolved before the instruction is really
13862 if ((insn_type == TYPE_PUSH || insn_type == TYPE_POP)
13863 && (dep_insn_type == TYPE_PUSH || dep_insn_type == TYPE_POP))
13866 /* INT->FP conversion is expensive. */
13867 if (get_attr_fp_int_src (dep_insn))
13870 /* Show ability of reorder buffer to hide latency of load by executing
13871 in parallel with previous instruction in case
13872 previous instruction is not needed to compute the address. */
13873 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
13874 && !ix86_agi_dependent (insn, dep_insn, insn_type))
13876 /* Claim moves to take one cycle, as core can issue one load
13877 at time and the next load can start cycle later. */
13878 if (dep_insn_type == TYPE_IMOV
13879 || dep_insn_type == TYPE_FMOV)
13888 case PROCESSOR_ATHLON:
13890 case PROCESSOR_GENERIC32:
13891 case PROCESSOR_GENERIC64:
13892 memory = get_attr_memory (insn);
13894 /* Show ability of reorder buffer to hide latency of load by executing
13895 in parallel with previous instruction in case
13896 previous instruction is not needed to compute the address. */
13897 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
13898 && !ix86_agi_dependent (insn, dep_insn, insn_type))
13900 enum attr_unit unit = get_attr_unit (insn);
13903 /* Because of the difference between the length of integer and
13904 floating unit pipeline preparation stages, the memory operands
13905 for floating point are cheaper.
13907 ??? For Athlon it the difference is most probably 2. */
13908 if (unit == UNIT_INTEGER || unit == UNIT_UNKNOWN)
13911 loadcost = TARGET_ATHLON ? 2 : 0;
13913 if (cost >= loadcost)
13926 /* How many alternative schedules to try. This should be as wide as the
13927 scheduling freedom in the DFA, but no wider. Making this value too
13928 large results extra work for the scheduler. */
13931 ia32_multipass_dfa_lookahead (void)
13933 if (ix86_tune == PROCESSOR_PENTIUM)
13936 if (ix86_tune == PROCESSOR_PENTIUMPRO
13937 || ix86_tune == PROCESSOR_K6)
13945 /* Compute the alignment given to a constant that is being placed in memory.
13946 EXP is the constant and ALIGN is the alignment that the object would
13948 The value of this function is used instead of that alignment to align
13952 ix86_constant_alignment (tree exp, int align)
13954 if (TREE_CODE (exp) == REAL_CST)
13956 if (TYPE_MODE (TREE_TYPE (exp)) == DFmode && align < 64)
13958 else if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (exp))) && align < 128)
13961 else if (!optimize_size && TREE_CODE (exp) == STRING_CST
13962 && !TARGET_NO_ALIGN_LONG_STRINGS
13963 && TREE_STRING_LENGTH (exp) >= 31 && align < BITS_PER_WORD)
13964 return BITS_PER_WORD;
13969 /* Compute the alignment for a static variable.
13970 TYPE is the data type, and ALIGN is the alignment that
13971 the object would ordinarily have. The value of this function is used
13972 instead of that alignment to align the object. */
13975 ix86_data_alignment (tree type, int align)
13977 int max_align = optimize_size ? BITS_PER_WORD : 256;
13979 if (AGGREGATE_TYPE_P (type)
13980 && TYPE_SIZE (type)
13981 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
13982 && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= (unsigned) max_align
13983 || TREE_INT_CST_HIGH (TYPE_SIZE (type)))
13984 && align < max_align)
13987 /* x86-64 ABI requires arrays greater than 16 bytes to be aligned
13988 to 16byte boundary. */
13991 if (AGGREGATE_TYPE_P (type)
13992 && TYPE_SIZE (type)
13993 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
13994 && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= 128
13995 || TREE_INT_CST_HIGH (TYPE_SIZE (type))) && align < 128)
13999 if (TREE_CODE (type) == ARRAY_TYPE)
14001 if (TYPE_MODE (TREE_TYPE (type)) == DFmode && align < 64)
14003 if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type))) && align < 128)
14006 else if (TREE_CODE (type) == COMPLEX_TYPE)
14009 if (TYPE_MODE (type) == DCmode && align < 64)
14011 if (TYPE_MODE (type) == XCmode && align < 128)
14014 else if ((TREE_CODE (type) == RECORD_TYPE
14015 || TREE_CODE (type) == UNION_TYPE
14016 || TREE_CODE (type) == QUAL_UNION_TYPE)
14017 && TYPE_FIELDS (type))
14019 if (DECL_MODE (TYPE_FIELDS (type)) == DFmode && align < 64)
14021 if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type))) && align < 128)
14024 else if (TREE_CODE (type) == REAL_TYPE || TREE_CODE (type) == VECTOR_TYPE
14025 || TREE_CODE (type) == INTEGER_TYPE)
14027 if (TYPE_MODE (type) == DFmode && align < 64)
14029 if (ALIGN_MODE_128 (TYPE_MODE (type)) && align < 128)
14036 /* Compute the alignment for a local variable.
14037 TYPE is the data type, and ALIGN is the alignment that
14038 the object would ordinarily have. The value of this macro is used
14039 instead of that alignment to align the object. */
14042 ix86_local_alignment (tree type, int align)
14044 /* x86-64 ABI requires arrays greater than 16 bytes to be aligned
14045 to 16byte boundary. */
14048 if (AGGREGATE_TYPE_P (type)
14049 && TYPE_SIZE (type)
14050 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
14051 && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= 16
14052 || TREE_INT_CST_HIGH (TYPE_SIZE (type))) && align < 128)
14055 if (TREE_CODE (type) == ARRAY_TYPE)
14057 if (TYPE_MODE (TREE_TYPE (type)) == DFmode && align < 64)
14059 if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type))) && align < 128)
14062 else if (TREE_CODE (type) == COMPLEX_TYPE)
14064 if (TYPE_MODE (type) == DCmode && align < 64)
14066 if (TYPE_MODE (type) == XCmode && align < 128)
14069 else if ((TREE_CODE (type) == RECORD_TYPE
14070 || TREE_CODE (type) == UNION_TYPE
14071 || TREE_CODE (type) == QUAL_UNION_TYPE)
14072 && TYPE_FIELDS (type))
14074 if (DECL_MODE (TYPE_FIELDS (type)) == DFmode && align < 64)
14076 if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type))) && align < 128)
14079 else if (TREE_CODE (type) == REAL_TYPE || TREE_CODE (type) == VECTOR_TYPE
14080 || TREE_CODE (type) == INTEGER_TYPE)
14083 if (TYPE_MODE (type) == DFmode && align < 64)
14085 if (ALIGN_MODE_128 (TYPE_MODE (type)) && align < 128)
14091 /* Emit RTL insns to initialize the variable parts of a trampoline.
14092 FNADDR is an RTX for the address of the function's pure code.
14093 CXT is an RTX for the static chain value for the function. */
14095 x86_initialize_trampoline (rtx tramp, rtx fnaddr, rtx cxt)
14099 /* Compute offset from the end of the jmp to the target function. */
14100 rtx disp = expand_binop (SImode, sub_optab, fnaddr,
14101 plus_constant (tramp, 10),
14102 NULL_RTX, 1, OPTAB_DIRECT);
14103 emit_move_insn (gen_rtx_MEM (QImode, tramp),
14104 gen_int_mode (0xb9, QImode));
14105 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, 1)), cxt);
14106 emit_move_insn (gen_rtx_MEM (QImode, plus_constant (tramp, 5)),
14107 gen_int_mode (0xe9, QImode));
14108 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, 6)), disp);
14113 /* Try to load address using shorter movl instead of movabs.
14114 We may want to support movq for kernel mode, but kernel does not use
14115 trampolines at the moment. */
14116 if (x86_64_zext_immediate_operand (fnaddr, VOIDmode))
14118 fnaddr = copy_to_mode_reg (DImode, fnaddr);
14119 emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
14120 gen_int_mode (0xbb41, HImode));
14121 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, offset + 2)),
14122 gen_lowpart (SImode, fnaddr));
14127 emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
14128 gen_int_mode (0xbb49, HImode));
14129 emit_move_insn (gen_rtx_MEM (DImode, plus_constant (tramp, offset + 2)),
14133 /* Load static chain using movabs to r10. */
14134 emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
14135 gen_int_mode (0xba49, HImode));
14136 emit_move_insn (gen_rtx_MEM (DImode, plus_constant (tramp, offset + 2)),
14139 /* Jump to the r11 */
14140 emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
14141 gen_int_mode (0xff49, HImode));
14142 emit_move_insn (gen_rtx_MEM (QImode, plus_constant (tramp, offset+2)),
14143 gen_int_mode (0xe3, QImode));
14145 gcc_assert (offset <= TRAMPOLINE_SIZE);
14148 #ifdef ENABLE_EXECUTE_STACK
14149 emit_library_call (gen_rtx_SYMBOL_REF (Pmode, "__enable_execute_stack"),
14150 LCT_NORMAL, VOIDmode, 1, tramp, Pmode);
14154 /* Codes for all the SSE/MMX builtins. */
14157 IX86_BUILTIN_ADDPS,
14158 IX86_BUILTIN_ADDSS,
14159 IX86_BUILTIN_DIVPS,
14160 IX86_BUILTIN_DIVSS,
14161 IX86_BUILTIN_MULPS,
14162 IX86_BUILTIN_MULSS,
14163 IX86_BUILTIN_SUBPS,
14164 IX86_BUILTIN_SUBSS,
14166 IX86_BUILTIN_CMPEQPS,
14167 IX86_BUILTIN_CMPLTPS,
14168 IX86_BUILTIN_CMPLEPS,
14169 IX86_BUILTIN_CMPGTPS,
14170 IX86_BUILTIN_CMPGEPS,
14171 IX86_BUILTIN_CMPNEQPS,
14172 IX86_BUILTIN_CMPNLTPS,
14173 IX86_BUILTIN_CMPNLEPS,
14174 IX86_BUILTIN_CMPNGTPS,
14175 IX86_BUILTIN_CMPNGEPS,
14176 IX86_BUILTIN_CMPORDPS,
14177 IX86_BUILTIN_CMPUNORDPS,
14178 IX86_BUILTIN_CMPEQSS,
14179 IX86_BUILTIN_CMPLTSS,
14180 IX86_BUILTIN_CMPLESS,
14181 IX86_BUILTIN_CMPNEQSS,
14182 IX86_BUILTIN_CMPNLTSS,
14183 IX86_BUILTIN_CMPNLESS,
14184 IX86_BUILTIN_CMPNGTSS,
14185 IX86_BUILTIN_CMPNGESS,
14186 IX86_BUILTIN_CMPORDSS,
14187 IX86_BUILTIN_CMPUNORDSS,
14189 IX86_BUILTIN_COMIEQSS,
14190 IX86_BUILTIN_COMILTSS,
14191 IX86_BUILTIN_COMILESS,
14192 IX86_BUILTIN_COMIGTSS,
14193 IX86_BUILTIN_COMIGESS,
14194 IX86_BUILTIN_COMINEQSS,
14195 IX86_BUILTIN_UCOMIEQSS,
14196 IX86_BUILTIN_UCOMILTSS,
14197 IX86_BUILTIN_UCOMILESS,
14198 IX86_BUILTIN_UCOMIGTSS,
14199 IX86_BUILTIN_UCOMIGESS,
14200 IX86_BUILTIN_UCOMINEQSS,
14202 IX86_BUILTIN_CVTPI2PS,
14203 IX86_BUILTIN_CVTPS2PI,
14204 IX86_BUILTIN_CVTSI2SS,
14205 IX86_BUILTIN_CVTSI642SS,
14206 IX86_BUILTIN_CVTSS2SI,
14207 IX86_BUILTIN_CVTSS2SI64,
14208 IX86_BUILTIN_CVTTPS2PI,
14209 IX86_BUILTIN_CVTTSS2SI,
14210 IX86_BUILTIN_CVTTSS2SI64,
14212 IX86_BUILTIN_MAXPS,
14213 IX86_BUILTIN_MAXSS,
14214 IX86_BUILTIN_MINPS,
14215 IX86_BUILTIN_MINSS,
14217 IX86_BUILTIN_LOADUPS,
14218 IX86_BUILTIN_STOREUPS,
14219 IX86_BUILTIN_MOVSS,
14221 IX86_BUILTIN_MOVHLPS,
14222 IX86_BUILTIN_MOVLHPS,
14223 IX86_BUILTIN_LOADHPS,
14224 IX86_BUILTIN_LOADLPS,
14225 IX86_BUILTIN_STOREHPS,
14226 IX86_BUILTIN_STORELPS,
14228 IX86_BUILTIN_MASKMOVQ,
14229 IX86_BUILTIN_MOVMSKPS,
14230 IX86_BUILTIN_PMOVMSKB,
14232 IX86_BUILTIN_MOVNTPS,
14233 IX86_BUILTIN_MOVNTQ,
14235 IX86_BUILTIN_LOADDQU,
14236 IX86_BUILTIN_STOREDQU,
14238 IX86_BUILTIN_PACKSSWB,
14239 IX86_BUILTIN_PACKSSDW,
14240 IX86_BUILTIN_PACKUSWB,
14242 IX86_BUILTIN_PADDB,
14243 IX86_BUILTIN_PADDW,
14244 IX86_BUILTIN_PADDD,
14245 IX86_BUILTIN_PADDQ,
14246 IX86_BUILTIN_PADDSB,
14247 IX86_BUILTIN_PADDSW,
14248 IX86_BUILTIN_PADDUSB,
14249 IX86_BUILTIN_PADDUSW,
14250 IX86_BUILTIN_PSUBB,
14251 IX86_BUILTIN_PSUBW,
14252 IX86_BUILTIN_PSUBD,
14253 IX86_BUILTIN_PSUBQ,
14254 IX86_BUILTIN_PSUBSB,
14255 IX86_BUILTIN_PSUBSW,
14256 IX86_BUILTIN_PSUBUSB,
14257 IX86_BUILTIN_PSUBUSW,
14260 IX86_BUILTIN_PANDN,
14264 IX86_BUILTIN_PAVGB,
14265 IX86_BUILTIN_PAVGW,
14267 IX86_BUILTIN_PCMPEQB,
14268 IX86_BUILTIN_PCMPEQW,
14269 IX86_BUILTIN_PCMPEQD,
14270 IX86_BUILTIN_PCMPGTB,
14271 IX86_BUILTIN_PCMPGTW,
14272 IX86_BUILTIN_PCMPGTD,
14274 IX86_BUILTIN_PMADDWD,
14276 IX86_BUILTIN_PMAXSW,
14277 IX86_BUILTIN_PMAXUB,
14278 IX86_BUILTIN_PMINSW,
14279 IX86_BUILTIN_PMINUB,
14281 IX86_BUILTIN_PMULHUW,
14282 IX86_BUILTIN_PMULHW,
14283 IX86_BUILTIN_PMULLW,
14285 IX86_BUILTIN_PSADBW,
14286 IX86_BUILTIN_PSHUFW,
14288 IX86_BUILTIN_PSLLW,
14289 IX86_BUILTIN_PSLLD,
14290 IX86_BUILTIN_PSLLQ,
14291 IX86_BUILTIN_PSRAW,
14292 IX86_BUILTIN_PSRAD,
14293 IX86_BUILTIN_PSRLW,
14294 IX86_BUILTIN_PSRLD,
14295 IX86_BUILTIN_PSRLQ,
14296 IX86_BUILTIN_PSLLWI,
14297 IX86_BUILTIN_PSLLDI,
14298 IX86_BUILTIN_PSLLQI,
14299 IX86_BUILTIN_PSRAWI,
14300 IX86_BUILTIN_PSRADI,
14301 IX86_BUILTIN_PSRLWI,
14302 IX86_BUILTIN_PSRLDI,
14303 IX86_BUILTIN_PSRLQI,
14305 IX86_BUILTIN_PUNPCKHBW,
14306 IX86_BUILTIN_PUNPCKHWD,
14307 IX86_BUILTIN_PUNPCKHDQ,
14308 IX86_BUILTIN_PUNPCKLBW,
14309 IX86_BUILTIN_PUNPCKLWD,
14310 IX86_BUILTIN_PUNPCKLDQ,
14312 IX86_BUILTIN_SHUFPS,
14314 IX86_BUILTIN_RCPPS,
14315 IX86_BUILTIN_RCPSS,
14316 IX86_BUILTIN_RSQRTPS,
14317 IX86_BUILTIN_RSQRTSS,
14318 IX86_BUILTIN_SQRTPS,
14319 IX86_BUILTIN_SQRTSS,
14321 IX86_BUILTIN_UNPCKHPS,
14322 IX86_BUILTIN_UNPCKLPS,
14324 IX86_BUILTIN_ANDPS,
14325 IX86_BUILTIN_ANDNPS,
14327 IX86_BUILTIN_XORPS,
14330 IX86_BUILTIN_LDMXCSR,
14331 IX86_BUILTIN_STMXCSR,
14332 IX86_BUILTIN_SFENCE,
14334 /* 3DNow! Original */
14335 IX86_BUILTIN_FEMMS,
14336 IX86_BUILTIN_PAVGUSB,
14337 IX86_BUILTIN_PF2ID,
14338 IX86_BUILTIN_PFACC,
14339 IX86_BUILTIN_PFADD,
14340 IX86_BUILTIN_PFCMPEQ,
14341 IX86_BUILTIN_PFCMPGE,
14342 IX86_BUILTIN_PFCMPGT,
14343 IX86_BUILTIN_PFMAX,
14344 IX86_BUILTIN_PFMIN,
14345 IX86_BUILTIN_PFMUL,
14346 IX86_BUILTIN_PFRCP,
14347 IX86_BUILTIN_PFRCPIT1,
14348 IX86_BUILTIN_PFRCPIT2,
14349 IX86_BUILTIN_PFRSQIT1,
14350 IX86_BUILTIN_PFRSQRT,
14351 IX86_BUILTIN_PFSUB,
14352 IX86_BUILTIN_PFSUBR,
14353 IX86_BUILTIN_PI2FD,
14354 IX86_BUILTIN_PMULHRW,
14356 /* 3DNow! Athlon Extensions */
14357 IX86_BUILTIN_PF2IW,
14358 IX86_BUILTIN_PFNACC,
14359 IX86_BUILTIN_PFPNACC,
14360 IX86_BUILTIN_PI2FW,
14361 IX86_BUILTIN_PSWAPDSI,
14362 IX86_BUILTIN_PSWAPDSF,
14365 IX86_BUILTIN_ADDPD,
14366 IX86_BUILTIN_ADDSD,
14367 IX86_BUILTIN_DIVPD,
14368 IX86_BUILTIN_DIVSD,
14369 IX86_BUILTIN_MULPD,
14370 IX86_BUILTIN_MULSD,
14371 IX86_BUILTIN_SUBPD,
14372 IX86_BUILTIN_SUBSD,
14374 IX86_BUILTIN_CMPEQPD,
14375 IX86_BUILTIN_CMPLTPD,
14376 IX86_BUILTIN_CMPLEPD,
14377 IX86_BUILTIN_CMPGTPD,
14378 IX86_BUILTIN_CMPGEPD,
14379 IX86_BUILTIN_CMPNEQPD,
14380 IX86_BUILTIN_CMPNLTPD,
14381 IX86_BUILTIN_CMPNLEPD,
14382 IX86_BUILTIN_CMPNGTPD,
14383 IX86_BUILTIN_CMPNGEPD,
14384 IX86_BUILTIN_CMPORDPD,
14385 IX86_BUILTIN_CMPUNORDPD,
14386 IX86_BUILTIN_CMPNEPD,
14387 IX86_BUILTIN_CMPEQSD,
14388 IX86_BUILTIN_CMPLTSD,
14389 IX86_BUILTIN_CMPLESD,
14390 IX86_BUILTIN_CMPNEQSD,
14391 IX86_BUILTIN_CMPNLTSD,
14392 IX86_BUILTIN_CMPNLESD,
14393 IX86_BUILTIN_CMPORDSD,
14394 IX86_BUILTIN_CMPUNORDSD,
14395 IX86_BUILTIN_CMPNESD,
14397 IX86_BUILTIN_COMIEQSD,
14398 IX86_BUILTIN_COMILTSD,
14399 IX86_BUILTIN_COMILESD,
14400 IX86_BUILTIN_COMIGTSD,
14401 IX86_BUILTIN_COMIGESD,
14402 IX86_BUILTIN_COMINEQSD,
14403 IX86_BUILTIN_UCOMIEQSD,
14404 IX86_BUILTIN_UCOMILTSD,
14405 IX86_BUILTIN_UCOMILESD,
14406 IX86_BUILTIN_UCOMIGTSD,
14407 IX86_BUILTIN_UCOMIGESD,
14408 IX86_BUILTIN_UCOMINEQSD,
14410 IX86_BUILTIN_MAXPD,
14411 IX86_BUILTIN_MAXSD,
14412 IX86_BUILTIN_MINPD,
14413 IX86_BUILTIN_MINSD,
14415 IX86_BUILTIN_ANDPD,
14416 IX86_BUILTIN_ANDNPD,
14418 IX86_BUILTIN_XORPD,
14420 IX86_BUILTIN_SQRTPD,
14421 IX86_BUILTIN_SQRTSD,
14423 IX86_BUILTIN_UNPCKHPD,
14424 IX86_BUILTIN_UNPCKLPD,
14426 IX86_BUILTIN_SHUFPD,
14428 IX86_BUILTIN_LOADUPD,
14429 IX86_BUILTIN_STOREUPD,
14430 IX86_BUILTIN_MOVSD,
14432 IX86_BUILTIN_LOADHPD,
14433 IX86_BUILTIN_LOADLPD,
14435 IX86_BUILTIN_CVTDQ2PD,
14436 IX86_BUILTIN_CVTDQ2PS,
14438 IX86_BUILTIN_CVTPD2DQ,
14439 IX86_BUILTIN_CVTPD2PI,
14440 IX86_BUILTIN_CVTPD2PS,
14441 IX86_BUILTIN_CVTTPD2DQ,
14442 IX86_BUILTIN_CVTTPD2PI,
14444 IX86_BUILTIN_CVTPI2PD,
14445 IX86_BUILTIN_CVTSI2SD,
14446 IX86_BUILTIN_CVTSI642SD,
14448 IX86_BUILTIN_CVTSD2SI,
14449 IX86_BUILTIN_CVTSD2SI64,
14450 IX86_BUILTIN_CVTSD2SS,
14451 IX86_BUILTIN_CVTSS2SD,
14452 IX86_BUILTIN_CVTTSD2SI,
14453 IX86_BUILTIN_CVTTSD2SI64,
14455 IX86_BUILTIN_CVTPS2DQ,
14456 IX86_BUILTIN_CVTPS2PD,
14457 IX86_BUILTIN_CVTTPS2DQ,
14459 IX86_BUILTIN_MOVNTI,
14460 IX86_BUILTIN_MOVNTPD,
14461 IX86_BUILTIN_MOVNTDQ,
14464 IX86_BUILTIN_MASKMOVDQU,
14465 IX86_BUILTIN_MOVMSKPD,
14466 IX86_BUILTIN_PMOVMSKB128,
14468 IX86_BUILTIN_PACKSSWB128,
14469 IX86_BUILTIN_PACKSSDW128,
14470 IX86_BUILTIN_PACKUSWB128,
14472 IX86_BUILTIN_PADDB128,
14473 IX86_BUILTIN_PADDW128,
14474 IX86_BUILTIN_PADDD128,
14475 IX86_BUILTIN_PADDQ128,
14476 IX86_BUILTIN_PADDSB128,
14477 IX86_BUILTIN_PADDSW128,
14478 IX86_BUILTIN_PADDUSB128,
14479 IX86_BUILTIN_PADDUSW128,
14480 IX86_BUILTIN_PSUBB128,
14481 IX86_BUILTIN_PSUBW128,
14482 IX86_BUILTIN_PSUBD128,
14483 IX86_BUILTIN_PSUBQ128,
14484 IX86_BUILTIN_PSUBSB128,
14485 IX86_BUILTIN_PSUBSW128,
14486 IX86_BUILTIN_PSUBUSB128,
14487 IX86_BUILTIN_PSUBUSW128,
14489 IX86_BUILTIN_PAND128,
14490 IX86_BUILTIN_PANDN128,
14491 IX86_BUILTIN_POR128,
14492 IX86_BUILTIN_PXOR128,
14494 IX86_BUILTIN_PAVGB128,
14495 IX86_BUILTIN_PAVGW128,
14497 IX86_BUILTIN_PCMPEQB128,
14498 IX86_BUILTIN_PCMPEQW128,
14499 IX86_BUILTIN_PCMPEQD128,
14500 IX86_BUILTIN_PCMPGTB128,
14501 IX86_BUILTIN_PCMPGTW128,
14502 IX86_BUILTIN_PCMPGTD128,
14504 IX86_BUILTIN_PMADDWD128,
14506 IX86_BUILTIN_PMAXSW128,
14507 IX86_BUILTIN_PMAXUB128,
14508 IX86_BUILTIN_PMINSW128,
14509 IX86_BUILTIN_PMINUB128,
14511 IX86_BUILTIN_PMULUDQ,
14512 IX86_BUILTIN_PMULUDQ128,
14513 IX86_BUILTIN_PMULHUW128,
14514 IX86_BUILTIN_PMULHW128,
14515 IX86_BUILTIN_PMULLW128,
14517 IX86_BUILTIN_PSADBW128,
14518 IX86_BUILTIN_PSHUFHW,
14519 IX86_BUILTIN_PSHUFLW,
14520 IX86_BUILTIN_PSHUFD,
14522 IX86_BUILTIN_PSLLW128,
14523 IX86_BUILTIN_PSLLD128,
14524 IX86_BUILTIN_PSLLQ128,
14525 IX86_BUILTIN_PSRAW128,
14526 IX86_BUILTIN_PSRAD128,
14527 IX86_BUILTIN_PSRLW128,
14528 IX86_BUILTIN_PSRLD128,
14529 IX86_BUILTIN_PSRLQ128,
14530 IX86_BUILTIN_PSLLDQI128,
14531 IX86_BUILTIN_PSLLWI128,
14532 IX86_BUILTIN_PSLLDI128,
14533 IX86_BUILTIN_PSLLQI128,
14534 IX86_BUILTIN_PSRAWI128,
14535 IX86_BUILTIN_PSRADI128,
14536 IX86_BUILTIN_PSRLDQI128,
14537 IX86_BUILTIN_PSRLWI128,
14538 IX86_BUILTIN_PSRLDI128,
14539 IX86_BUILTIN_PSRLQI128,
14541 IX86_BUILTIN_PUNPCKHBW128,
14542 IX86_BUILTIN_PUNPCKHWD128,
14543 IX86_BUILTIN_PUNPCKHDQ128,
14544 IX86_BUILTIN_PUNPCKHQDQ128,
14545 IX86_BUILTIN_PUNPCKLBW128,
14546 IX86_BUILTIN_PUNPCKLWD128,
14547 IX86_BUILTIN_PUNPCKLDQ128,
14548 IX86_BUILTIN_PUNPCKLQDQ128,
14550 IX86_BUILTIN_CLFLUSH,
14551 IX86_BUILTIN_MFENCE,
14552 IX86_BUILTIN_LFENCE,
14554 /* Prescott New Instructions. */
14555 IX86_BUILTIN_ADDSUBPS,
14556 IX86_BUILTIN_HADDPS,
14557 IX86_BUILTIN_HSUBPS,
14558 IX86_BUILTIN_MOVSHDUP,
14559 IX86_BUILTIN_MOVSLDUP,
14560 IX86_BUILTIN_ADDSUBPD,
14561 IX86_BUILTIN_HADDPD,
14562 IX86_BUILTIN_HSUBPD,
14563 IX86_BUILTIN_LDDQU,
14565 IX86_BUILTIN_MONITOR,
14566 IX86_BUILTIN_MWAIT,
14568 IX86_BUILTIN_VEC_INIT_V2SI,
14569 IX86_BUILTIN_VEC_INIT_V4HI,
14570 IX86_BUILTIN_VEC_INIT_V8QI,
14571 IX86_BUILTIN_VEC_EXT_V2DF,
14572 IX86_BUILTIN_VEC_EXT_V2DI,
14573 IX86_BUILTIN_VEC_EXT_V4SF,
14574 IX86_BUILTIN_VEC_EXT_V4SI,
14575 IX86_BUILTIN_VEC_EXT_V8HI,
14576 IX86_BUILTIN_VEC_EXT_V16QI,
14577 IX86_BUILTIN_VEC_EXT_V2SI,
14578 IX86_BUILTIN_VEC_EXT_V4HI,
14579 IX86_BUILTIN_VEC_SET_V8HI,
14580 IX86_BUILTIN_VEC_SET_V4HI,
14585 #define def_builtin(MASK, NAME, TYPE, CODE) \
14587 if ((MASK) & target_flags \
14588 && (!((MASK) & MASK_64BIT) || TARGET_64BIT)) \
14589 lang_hooks.builtin_function ((NAME), (TYPE), (CODE), BUILT_IN_MD, \
14590 NULL, NULL_TREE); \
14593 /* Bits for builtin_description.flag. */
14595 /* Set when we don't support the comparison natively, and should
14596 swap_comparison in order to support it. */
14597 #define BUILTIN_DESC_SWAP_OPERANDS 1
14599 struct builtin_description
14601 const unsigned int mask;
14602 const enum insn_code icode;
14603 const char *const name;
14604 const enum ix86_builtins code;
14605 const enum rtx_code comparison;
14606 const unsigned int flag;
14609 static const struct builtin_description bdesc_comi[] =
14611 { MASK_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comieq", IX86_BUILTIN_COMIEQSS, UNEQ, 0 },
14612 { MASK_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comilt", IX86_BUILTIN_COMILTSS, UNLT, 0 },
14613 { MASK_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comile", IX86_BUILTIN_COMILESS, UNLE, 0 },
14614 { MASK_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comigt", IX86_BUILTIN_COMIGTSS, GT, 0 },
14615 { MASK_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comige", IX86_BUILTIN_COMIGESS, GE, 0 },
14616 { MASK_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comineq", IX86_BUILTIN_COMINEQSS, LTGT, 0 },
14617 { MASK_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomieq", IX86_BUILTIN_UCOMIEQSS, UNEQ, 0 },
14618 { MASK_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomilt", IX86_BUILTIN_UCOMILTSS, UNLT, 0 },
14619 { MASK_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomile", IX86_BUILTIN_UCOMILESS, UNLE, 0 },
14620 { MASK_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomigt", IX86_BUILTIN_UCOMIGTSS, GT, 0 },
14621 { MASK_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomige", IX86_BUILTIN_UCOMIGESS, GE, 0 },
14622 { MASK_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomineq", IX86_BUILTIN_UCOMINEQSS, LTGT, 0 },
14623 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdeq", IX86_BUILTIN_COMIEQSD, UNEQ, 0 },
14624 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdlt", IX86_BUILTIN_COMILTSD, UNLT, 0 },
14625 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdle", IX86_BUILTIN_COMILESD, UNLE, 0 },
14626 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdgt", IX86_BUILTIN_COMIGTSD, GT, 0 },
14627 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdge", IX86_BUILTIN_COMIGESD, GE, 0 },
14628 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdneq", IX86_BUILTIN_COMINEQSD, LTGT, 0 },
14629 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdeq", IX86_BUILTIN_UCOMIEQSD, UNEQ, 0 },
14630 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdlt", IX86_BUILTIN_UCOMILTSD, UNLT, 0 },
14631 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdle", IX86_BUILTIN_UCOMILESD, UNLE, 0 },
14632 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdgt", IX86_BUILTIN_UCOMIGTSD, GT, 0 },
14633 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdge", IX86_BUILTIN_UCOMIGESD, GE, 0 },
14634 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdneq", IX86_BUILTIN_UCOMINEQSD, LTGT, 0 },
14637 static const struct builtin_description bdesc_2arg[] =
14640 { MASK_SSE, CODE_FOR_addv4sf3, "__builtin_ia32_addps", IX86_BUILTIN_ADDPS, 0, 0 },
14641 { MASK_SSE, CODE_FOR_subv4sf3, "__builtin_ia32_subps", IX86_BUILTIN_SUBPS, 0, 0 },
14642 { MASK_SSE, CODE_FOR_mulv4sf3, "__builtin_ia32_mulps", IX86_BUILTIN_MULPS, 0, 0 },
14643 { MASK_SSE, CODE_FOR_divv4sf3, "__builtin_ia32_divps", IX86_BUILTIN_DIVPS, 0, 0 },
14644 { MASK_SSE, CODE_FOR_sse_vmaddv4sf3, "__builtin_ia32_addss", IX86_BUILTIN_ADDSS, 0, 0 },
14645 { MASK_SSE, CODE_FOR_sse_vmsubv4sf3, "__builtin_ia32_subss", IX86_BUILTIN_SUBSS, 0, 0 },
14646 { MASK_SSE, CODE_FOR_sse_vmmulv4sf3, "__builtin_ia32_mulss", IX86_BUILTIN_MULSS, 0, 0 },
14647 { MASK_SSE, CODE_FOR_sse_vmdivv4sf3, "__builtin_ia32_divss", IX86_BUILTIN_DIVSS, 0, 0 },
14649 { MASK_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpeqps", IX86_BUILTIN_CMPEQPS, EQ, 0 },
14650 { MASK_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpltps", IX86_BUILTIN_CMPLTPS, LT, 0 },
14651 { MASK_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpleps", IX86_BUILTIN_CMPLEPS, LE, 0 },
14652 { MASK_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpgtps", IX86_BUILTIN_CMPGTPS, LT,
14653 BUILTIN_DESC_SWAP_OPERANDS },
14654 { MASK_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpgeps", IX86_BUILTIN_CMPGEPS, LE,
14655 BUILTIN_DESC_SWAP_OPERANDS },
14656 { MASK_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpunordps", IX86_BUILTIN_CMPUNORDPS, UNORDERED, 0 },
14657 { MASK_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpneqps", IX86_BUILTIN_CMPNEQPS, NE, 0 },
14658 { MASK_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpnltps", IX86_BUILTIN_CMPNLTPS, UNGE, 0 },
14659 { MASK_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpnleps", IX86_BUILTIN_CMPNLEPS, UNGT, 0 },
14660 { MASK_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpngtps", IX86_BUILTIN_CMPNGTPS, UNGE,
14661 BUILTIN_DESC_SWAP_OPERANDS },
14662 { MASK_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpngeps", IX86_BUILTIN_CMPNGEPS, UNGT,
14663 BUILTIN_DESC_SWAP_OPERANDS },
14664 { MASK_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpordps", IX86_BUILTIN_CMPORDPS, ORDERED, 0 },
14665 { MASK_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpeqss", IX86_BUILTIN_CMPEQSS, EQ, 0 },
14666 { MASK_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpltss", IX86_BUILTIN_CMPLTSS, LT, 0 },
14667 { MASK_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpless", IX86_BUILTIN_CMPLESS, LE, 0 },
14668 { MASK_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpunordss", IX86_BUILTIN_CMPUNORDSS, UNORDERED, 0 },
14669 { MASK_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpneqss", IX86_BUILTIN_CMPNEQSS, NE, 0 },
14670 { MASK_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpnltss", IX86_BUILTIN_CMPNLTSS, UNGE, 0 },
14671 { MASK_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpnless", IX86_BUILTIN_CMPNLESS, UNGT, 0 },
14672 { MASK_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpngtss", IX86_BUILTIN_CMPNGTSS, UNGE,
14673 BUILTIN_DESC_SWAP_OPERANDS },
14674 { MASK_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpngess", IX86_BUILTIN_CMPNGESS, UNGT,
14675 BUILTIN_DESC_SWAP_OPERANDS },
14676 { MASK_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpordss", IX86_BUILTIN_CMPORDSS, ORDERED, 0 },
14678 { MASK_SSE, CODE_FOR_sminv4sf3, "__builtin_ia32_minps", IX86_BUILTIN_MINPS, 0, 0 },
14679 { MASK_SSE, CODE_FOR_smaxv4sf3, "__builtin_ia32_maxps", IX86_BUILTIN_MAXPS, 0, 0 },
14680 { MASK_SSE, CODE_FOR_sse_vmsminv4sf3, "__builtin_ia32_minss", IX86_BUILTIN_MINSS, 0, 0 },
14681 { MASK_SSE, CODE_FOR_sse_vmsmaxv4sf3, "__builtin_ia32_maxss", IX86_BUILTIN_MAXSS, 0, 0 },
14683 { MASK_SSE, CODE_FOR_andv4sf3, "__builtin_ia32_andps", IX86_BUILTIN_ANDPS, 0, 0 },
14684 { MASK_SSE, CODE_FOR_sse_nandv4sf3, "__builtin_ia32_andnps", IX86_BUILTIN_ANDNPS, 0, 0 },
14685 { MASK_SSE, CODE_FOR_iorv4sf3, "__builtin_ia32_orps", IX86_BUILTIN_ORPS, 0, 0 },
14686 { MASK_SSE, CODE_FOR_xorv4sf3, "__builtin_ia32_xorps", IX86_BUILTIN_XORPS, 0, 0 },
14688 { MASK_SSE, CODE_FOR_sse_movss, "__builtin_ia32_movss", IX86_BUILTIN_MOVSS, 0, 0 },
14689 { MASK_SSE, CODE_FOR_sse_movhlps, "__builtin_ia32_movhlps", IX86_BUILTIN_MOVHLPS, 0, 0 },
14690 { MASK_SSE, CODE_FOR_sse_movlhps, "__builtin_ia32_movlhps", IX86_BUILTIN_MOVLHPS, 0, 0 },
14691 { MASK_SSE, CODE_FOR_sse_unpckhps, "__builtin_ia32_unpckhps", IX86_BUILTIN_UNPCKHPS, 0, 0 },
14692 { MASK_SSE, CODE_FOR_sse_unpcklps, "__builtin_ia32_unpcklps", IX86_BUILTIN_UNPCKLPS, 0, 0 },
14695 { MASK_MMX, CODE_FOR_mmx_addv8qi3, "__builtin_ia32_paddb", IX86_BUILTIN_PADDB, 0, 0 },
14696 { MASK_MMX, CODE_FOR_mmx_addv4hi3, "__builtin_ia32_paddw", IX86_BUILTIN_PADDW, 0, 0 },
14697 { MASK_MMX, CODE_FOR_mmx_addv2si3, "__builtin_ia32_paddd", IX86_BUILTIN_PADDD, 0, 0 },
14698 { MASK_SSE2, CODE_FOR_mmx_adddi3, "__builtin_ia32_paddq", IX86_BUILTIN_PADDQ, 0, 0 },
14699 { MASK_MMX, CODE_FOR_mmx_subv8qi3, "__builtin_ia32_psubb", IX86_BUILTIN_PSUBB, 0, 0 },
14700 { MASK_MMX, CODE_FOR_mmx_subv4hi3, "__builtin_ia32_psubw", IX86_BUILTIN_PSUBW, 0, 0 },
14701 { MASK_MMX, CODE_FOR_mmx_subv2si3, "__builtin_ia32_psubd", IX86_BUILTIN_PSUBD, 0, 0 },
14702 { MASK_SSE2, CODE_FOR_mmx_subdi3, "__builtin_ia32_psubq", IX86_BUILTIN_PSUBQ, 0, 0 },
14704 { MASK_MMX, CODE_FOR_mmx_ssaddv8qi3, "__builtin_ia32_paddsb", IX86_BUILTIN_PADDSB, 0, 0 },
14705 { MASK_MMX, CODE_FOR_mmx_ssaddv4hi3, "__builtin_ia32_paddsw", IX86_BUILTIN_PADDSW, 0, 0 },
14706 { MASK_MMX, CODE_FOR_mmx_sssubv8qi3, "__builtin_ia32_psubsb", IX86_BUILTIN_PSUBSB, 0, 0 },
14707 { MASK_MMX, CODE_FOR_mmx_sssubv4hi3, "__builtin_ia32_psubsw", IX86_BUILTIN_PSUBSW, 0, 0 },
14708 { MASK_MMX, CODE_FOR_mmx_usaddv8qi3, "__builtin_ia32_paddusb", IX86_BUILTIN_PADDUSB, 0, 0 },
14709 { MASK_MMX, CODE_FOR_mmx_usaddv4hi3, "__builtin_ia32_paddusw", IX86_BUILTIN_PADDUSW, 0, 0 },
14710 { MASK_MMX, CODE_FOR_mmx_ussubv8qi3, "__builtin_ia32_psubusb", IX86_BUILTIN_PSUBUSB, 0, 0 },
14711 { MASK_MMX, CODE_FOR_mmx_ussubv4hi3, "__builtin_ia32_psubusw", IX86_BUILTIN_PSUBUSW, 0, 0 },
14713 { MASK_MMX, CODE_FOR_mmx_mulv4hi3, "__builtin_ia32_pmullw", IX86_BUILTIN_PMULLW, 0, 0 },
14714 { MASK_MMX, CODE_FOR_mmx_smulv4hi3_highpart, "__builtin_ia32_pmulhw", IX86_BUILTIN_PMULHW, 0, 0 },
14715 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_umulv4hi3_highpart, "__builtin_ia32_pmulhuw", IX86_BUILTIN_PMULHUW, 0, 0 },
14717 { MASK_MMX, CODE_FOR_mmx_andv2si3, "__builtin_ia32_pand", IX86_BUILTIN_PAND, 0, 0 },
14718 { MASK_MMX, CODE_FOR_mmx_nandv2si3, "__builtin_ia32_pandn", IX86_BUILTIN_PANDN, 0, 0 },
14719 { MASK_MMX, CODE_FOR_mmx_iorv2si3, "__builtin_ia32_por", IX86_BUILTIN_POR, 0, 0 },
14720 { MASK_MMX, CODE_FOR_mmx_xorv2si3, "__builtin_ia32_pxor", IX86_BUILTIN_PXOR, 0, 0 },
14722 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_uavgv8qi3, "__builtin_ia32_pavgb", IX86_BUILTIN_PAVGB, 0, 0 },
14723 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_uavgv4hi3, "__builtin_ia32_pavgw", IX86_BUILTIN_PAVGW, 0, 0 },
14725 { MASK_MMX, CODE_FOR_mmx_eqv8qi3, "__builtin_ia32_pcmpeqb", IX86_BUILTIN_PCMPEQB, 0, 0 },
14726 { MASK_MMX, CODE_FOR_mmx_eqv4hi3, "__builtin_ia32_pcmpeqw", IX86_BUILTIN_PCMPEQW, 0, 0 },
14727 { MASK_MMX, CODE_FOR_mmx_eqv2si3, "__builtin_ia32_pcmpeqd", IX86_BUILTIN_PCMPEQD, 0, 0 },
14728 { MASK_MMX, CODE_FOR_mmx_gtv8qi3, "__builtin_ia32_pcmpgtb", IX86_BUILTIN_PCMPGTB, 0, 0 },
14729 { MASK_MMX, CODE_FOR_mmx_gtv4hi3, "__builtin_ia32_pcmpgtw", IX86_BUILTIN_PCMPGTW, 0, 0 },
14730 { MASK_MMX, CODE_FOR_mmx_gtv2si3, "__builtin_ia32_pcmpgtd", IX86_BUILTIN_PCMPGTD, 0, 0 },
14732 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_umaxv8qi3, "__builtin_ia32_pmaxub", IX86_BUILTIN_PMAXUB, 0, 0 },
14733 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_smaxv4hi3, "__builtin_ia32_pmaxsw", IX86_BUILTIN_PMAXSW, 0, 0 },
14734 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_uminv8qi3, "__builtin_ia32_pminub", IX86_BUILTIN_PMINUB, 0, 0 },
14735 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_sminv4hi3, "__builtin_ia32_pminsw", IX86_BUILTIN_PMINSW, 0, 0 },
14737 { MASK_MMX, CODE_FOR_mmx_punpckhbw, "__builtin_ia32_punpckhbw", IX86_BUILTIN_PUNPCKHBW, 0, 0 },
14738 { MASK_MMX, CODE_FOR_mmx_punpckhwd, "__builtin_ia32_punpckhwd", IX86_BUILTIN_PUNPCKHWD, 0, 0 },
14739 { MASK_MMX, CODE_FOR_mmx_punpckhdq, "__builtin_ia32_punpckhdq", IX86_BUILTIN_PUNPCKHDQ, 0, 0 },
14740 { MASK_MMX, CODE_FOR_mmx_punpcklbw, "__builtin_ia32_punpcklbw", IX86_BUILTIN_PUNPCKLBW, 0, 0 },
14741 { MASK_MMX, CODE_FOR_mmx_punpcklwd, "__builtin_ia32_punpcklwd", IX86_BUILTIN_PUNPCKLWD, 0, 0 },
14742 { MASK_MMX, CODE_FOR_mmx_punpckldq, "__builtin_ia32_punpckldq", IX86_BUILTIN_PUNPCKLDQ, 0, 0 },
14745 { MASK_MMX, CODE_FOR_mmx_packsswb, 0, IX86_BUILTIN_PACKSSWB, 0, 0 },
14746 { MASK_MMX, CODE_FOR_mmx_packssdw, 0, IX86_BUILTIN_PACKSSDW, 0, 0 },
14747 { MASK_MMX, CODE_FOR_mmx_packuswb, 0, IX86_BUILTIN_PACKUSWB, 0, 0 },
14749 { MASK_SSE, CODE_FOR_sse_cvtpi2ps, 0, IX86_BUILTIN_CVTPI2PS, 0, 0 },
14750 { MASK_SSE, CODE_FOR_sse_cvtsi2ss, 0, IX86_BUILTIN_CVTSI2SS, 0, 0 },
14751 { MASK_SSE | MASK_64BIT, CODE_FOR_sse_cvtsi2ssq, 0, IX86_BUILTIN_CVTSI642SS, 0, 0 },
14753 { MASK_MMX, CODE_FOR_mmx_ashlv4hi3, 0, IX86_BUILTIN_PSLLW, 0, 0 },
14754 { MASK_MMX, CODE_FOR_mmx_ashlv4hi3, 0, IX86_BUILTIN_PSLLWI, 0, 0 },
14755 { MASK_MMX, CODE_FOR_mmx_ashlv2si3, 0, IX86_BUILTIN_PSLLD, 0, 0 },
14756 { MASK_MMX, CODE_FOR_mmx_ashlv2si3, 0, IX86_BUILTIN_PSLLDI, 0, 0 },
14757 { MASK_MMX, CODE_FOR_mmx_ashldi3, 0, IX86_BUILTIN_PSLLQ, 0, 0 },
14758 { MASK_MMX, CODE_FOR_mmx_ashldi3, 0, IX86_BUILTIN_PSLLQI, 0, 0 },
14760 { MASK_MMX, CODE_FOR_mmx_lshrv4hi3, 0, IX86_BUILTIN_PSRLW, 0, 0 },
14761 { MASK_MMX, CODE_FOR_mmx_lshrv4hi3, 0, IX86_BUILTIN_PSRLWI, 0, 0 },
14762 { MASK_MMX, CODE_FOR_mmx_lshrv2si3, 0, IX86_BUILTIN_PSRLD, 0, 0 },
14763 { MASK_MMX, CODE_FOR_mmx_lshrv2si3, 0, IX86_BUILTIN_PSRLDI, 0, 0 },
14764 { MASK_MMX, CODE_FOR_mmx_lshrdi3, 0, IX86_BUILTIN_PSRLQ, 0, 0 },
14765 { MASK_MMX, CODE_FOR_mmx_lshrdi3, 0, IX86_BUILTIN_PSRLQI, 0, 0 },
14767 { MASK_MMX, CODE_FOR_mmx_ashrv4hi3, 0, IX86_BUILTIN_PSRAW, 0, 0 },
14768 { MASK_MMX, CODE_FOR_mmx_ashrv4hi3, 0, IX86_BUILTIN_PSRAWI, 0, 0 },
14769 { MASK_MMX, CODE_FOR_mmx_ashrv2si3, 0, IX86_BUILTIN_PSRAD, 0, 0 },
14770 { MASK_MMX, CODE_FOR_mmx_ashrv2si3, 0, IX86_BUILTIN_PSRADI, 0, 0 },
14772 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_psadbw, 0, IX86_BUILTIN_PSADBW, 0, 0 },
14773 { MASK_MMX, CODE_FOR_mmx_pmaddwd, 0, IX86_BUILTIN_PMADDWD, 0, 0 },
14776 { MASK_SSE2, CODE_FOR_addv2df3, "__builtin_ia32_addpd", IX86_BUILTIN_ADDPD, 0, 0 },
14777 { MASK_SSE2, CODE_FOR_subv2df3, "__builtin_ia32_subpd", IX86_BUILTIN_SUBPD, 0, 0 },
14778 { MASK_SSE2, CODE_FOR_mulv2df3, "__builtin_ia32_mulpd", IX86_BUILTIN_MULPD, 0, 0 },
14779 { MASK_SSE2, CODE_FOR_divv2df3, "__builtin_ia32_divpd", IX86_BUILTIN_DIVPD, 0, 0 },
14780 { MASK_SSE2, CODE_FOR_sse2_vmaddv2df3, "__builtin_ia32_addsd", IX86_BUILTIN_ADDSD, 0, 0 },
14781 { MASK_SSE2, CODE_FOR_sse2_vmsubv2df3, "__builtin_ia32_subsd", IX86_BUILTIN_SUBSD, 0, 0 },
14782 { MASK_SSE2, CODE_FOR_sse2_vmmulv2df3, "__builtin_ia32_mulsd", IX86_BUILTIN_MULSD, 0, 0 },
14783 { MASK_SSE2, CODE_FOR_sse2_vmdivv2df3, "__builtin_ia32_divsd", IX86_BUILTIN_DIVSD, 0, 0 },
14785 { MASK_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpeqpd", IX86_BUILTIN_CMPEQPD, EQ, 0 },
14786 { MASK_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpltpd", IX86_BUILTIN_CMPLTPD, LT, 0 },
14787 { MASK_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmplepd", IX86_BUILTIN_CMPLEPD, LE, 0 },
14788 { MASK_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpgtpd", IX86_BUILTIN_CMPGTPD, LT,
14789 BUILTIN_DESC_SWAP_OPERANDS },
14790 { MASK_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpgepd", IX86_BUILTIN_CMPGEPD, LE,
14791 BUILTIN_DESC_SWAP_OPERANDS },
14792 { MASK_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpunordpd", IX86_BUILTIN_CMPUNORDPD, UNORDERED, 0 },
14793 { MASK_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpneqpd", IX86_BUILTIN_CMPNEQPD, NE, 0 },
14794 { MASK_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpnltpd", IX86_BUILTIN_CMPNLTPD, UNGE, 0 },
14795 { MASK_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpnlepd", IX86_BUILTIN_CMPNLEPD, UNGT, 0 },
14796 { MASK_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpngtpd", IX86_BUILTIN_CMPNGTPD, UNGE,
14797 BUILTIN_DESC_SWAP_OPERANDS },
14798 { MASK_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpngepd", IX86_BUILTIN_CMPNGEPD, UNGT,
14799 BUILTIN_DESC_SWAP_OPERANDS },
14800 { MASK_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpordpd", IX86_BUILTIN_CMPORDPD, ORDERED, 0 },
14801 { MASK_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpeqsd", IX86_BUILTIN_CMPEQSD, EQ, 0 },
14802 { MASK_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpltsd", IX86_BUILTIN_CMPLTSD, LT, 0 },
14803 { MASK_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmplesd", IX86_BUILTIN_CMPLESD, LE, 0 },
14804 { MASK_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpunordsd", IX86_BUILTIN_CMPUNORDSD, UNORDERED, 0 },
14805 { MASK_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpneqsd", IX86_BUILTIN_CMPNEQSD, NE, 0 },
14806 { MASK_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpnltsd", IX86_BUILTIN_CMPNLTSD, UNGE, 0 },
14807 { MASK_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpnlesd", IX86_BUILTIN_CMPNLESD, UNGT, 0 },
14808 { MASK_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpordsd", IX86_BUILTIN_CMPORDSD, ORDERED, 0 },
14810 { MASK_SSE2, CODE_FOR_sminv2df3, "__builtin_ia32_minpd", IX86_BUILTIN_MINPD, 0, 0 },
14811 { MASK_SSE2, CODE_FOR_smaxv2df3, "__builtin_ia32_maxpd", IX86_BUILTIN_MAXPD, 0, 0 },
14812 { MASK_SSE2, CODE_FOR_sse2_vmsminv2df3, "__builtin_ia32_minsd", IX86_BUILTIN_MINSD, 0, 0 },
14813 { MASK_SSE2, CODE_FOR_sse2_vmsmaxv2df3, "__builtin_ia32_maxsd", IX86_BUILTIN_MAXSD, 0, 0 },
14815 { MASK_SSE2, CODE_FOR_andv2df3, "__builtin_ia32_andpd", IX86_BUILTIN_ANDPD, 0, 0 },
14816 { MASK_SSE2, CODE_FOR_sse2_nandv2df3, "__builtin_ia32_andnpd", IX86_BUILTIN_ANDNPD, 0, 0 },
14817 { MASK_SSE2, CODE_FOR_iorv2df3, "__builtin_ia32_orpd", IX86_BUILTIN_ORPD, 0, 0 },
14818 { MASK_SSE2, CODE_FOR_xorv2df3, "__builtin_ia32_xorpd", IX86_BUILTIN_XORPD, 0, 0 },
14820 { MASK_SSE2, CODE_FOR_sse2_movsd, "__builtin_ia32_movsd", IX86_BUILTIN_MOVSD, 0, 0 },
14821 { MASK_SSE2, CODE_FOR_sse2_unpckhpd, "__builtin_ia32_unpckhpd", IX86_BUILTIN_UNPCKHPD, 0, 0 },
14822 { MASK_SSE2, CODE_FOR_sse2_unpcklpd, "__builtin_ia32_unpcklpd", IX86_BUILTIN_UNPCKLPD, 0, 0 },
14825 { MASK_SSE2, CODE_FOR_addv16qi3, "__builtin_ia32_paddb128", IX86_BUILTIN_PADDB128, 0, 0 },
14826 { MASK_SSE2, CODE_FOR_addv8hi3, "__builtin_ia32_paddw128", IX86_BUILTIN_PADDW128, 0, 0 },
14827 { MASK_SSE2, CODE_FOR_addv4si3, "__builtin_ia32_paddd128", IX86_BUILTIN_PADDD128, 0, 0 },
14828 { MASK_SSE2, CODE_FOR_addv2di3, "__builtin_ia32_paddq128", IX86_BUILTIN_PADDQ128, 0, 0 },
14829 { MASK_SSE2, CODE_FOR_subv16qi3, "__builtin_ia32_psubb128", IX86_BUILTIN_PSUBB128, 0, 0 },
14830 { MASK_SSE2, CODE_FOR_subv8hi3, "__builtin_ia32_psubw128", IX86_BUILTIN_PSUBW128, 0, 0 },
14831 { MASK_SSE2, CODE_FOR_subv4si3, "__builtin_ia32_psubd128", IX86_BUILTIN_PSUBD128, 0, 0 },
14832 { MASK_SSE2, CODE_FOR_subv2di3, "__builtin_ia32_psubq128", IX86_BUILTIN_PSUBQ128, 0, 0 },
14834 { MASK_MMX, CODE_FOR_sse2_ssaddv16qi3, "__builtin_ia32_paddsb128", IX86_BUILTIN_PADDSB128, 0, 0 },
14835 { MASK_MMX, CODE_FOR_sse2_ssaddv8hi3, "__builtin_ia32_paddsw128", IX86_BUILTIN_PADDSW128, 0, 0 },
14836 { MASK_MMX, CODE_FOR_sse2_sssubv16qi3, "__builtin_ia32_psubsb128", IX86_BUILTIN_PSUBSB128, 0, 0 },
14837 { MASK_MMX, CODE_FOR_sse2_sssubv8hi3, "__builtin_ia32_psubsw128", IX86_BUILTIN_PSUBSW128, 0, 0 },
14838 { MASK_MMX, CODE_FOR_sse2_usaddv16qi3, "__builtin_ia32_paddusb128", IX86_BUILTIN_PADDUSB128, 0, 0 },
14839 { MASK_MMX, CODE_FOR_sse2_usaddv8hi3, "__builtin_ia32_paddusw128", IX86_BUILTIN_PADDUSW128, 0, 0 },
14840 { MASK_MMX, CODE_FOR_sse2_ussubv16qi3, "__builtin_ia32_psubusb128", IX86_BUILTIN_PSUBUSB128, 0, 0 },
14841 { MASK_MMX, CODE_FOR_sse2_ussubv8hi3, "__builtin_ia32_psubusw128", IX86_BUILTIN_PSUBUSW128, 0, 0 },
14843 { MASK_SSE2, CODE_FOR_mulv8hi3, "__builtin_ia32_pmullw128", IX86_BUILTIN_PMULLW128, 0, 0 },
14844 { MASK_SSE2, CODE_FOR_sse2_smulv8hi3_highpart, "__builtin_ia32_pmulhw128", IX86_BUILTIN_PMULHW128, 0, 0 },
14846 { MASK_SSE2, CODE_FOR_andv2di3, "__builtin_ia32_pand128", IX86_BUILTIN_PAND128, 0, 0 },
14847 { MASK_SSE2, CODE_FOR_sse2_nandv2di3, "__builtin_ia32_pandn128", IX86_BUILTIN_PANDN128, 0, 0 },
14848 { MASK_SSE2, CODE_FOR_iorv2di3, "__builtin_ia32_por128", IX86_BUILTIN_POR128, 0, 0 },
14849 { MASK_SSE2, CODE_FOR_xorv2di3, "__builtin_ia32_pxor128", IX86_BUILTIN_PXOR128, 0, 0 },
14851 { MASK_SSE2, CODE_FOR_sse2_uavgv16qi3, "__builtin_ia32_pavgb128", IX86_BUILTIN_PAVGB128, 0, 0 },
14852 { MASK_SSE2, CODE_FOR_sse2_uavgv8hi3, "__builtin_ia32_pavgw128", IX86_BUILTIN_PAVGW128, 0, 0 },
14854 { MASK_SSE2, CODE_FOR_sse2_eqv16qi3, "__builtin_ia32_pcmpeqb128", IX86_BUILTIN_PCMPEQB128, 0, 0 },
14855 { MASK_SSE2, CODE_FOR_sse2_eqv8hi3, "__builtin_ia32_pcmpeqw128", IX86_BUILTIN_PCMPEQW128, 0, 0 },
14856 { MASK_SSE2, CODE_FOR_sse2_eqv4si3, "__builtin_ia32_pcmpeqd128", IX86_BUILTIN_PCMPEQD128, 0, 0 },
14857 { MASK_SSE2, CODE_FOR_sse2_gtv16qi3, "__builtin_ia32_pcmpgtb128", IX86_BUILTIN_PCMPGTB128, 0, 0 },
14858 { MASK_SSE2, CODE_FOR_sse2_gtv8hi3, "__builtin_ia32_pcmpgtw128", IX86_BUILTIN_PCMPGTW128, 0, 0 },
14859 { MASK_SSE2, CODE_FOR_sse2_gtv4si3, "__builtin_ia32_pcmpgtd128", IX86_BUILTIN_PCMPGTD128, 0, 0 },
14861 { MASK_SSE2, CODE_FOR_umaxv16qi3, "__builtin_ia32_pmaxub128", IX86_BUILTIN_PMAXUB128, 0, 0 },
14862 { MASK_SSE2, CODE_FOR_smaxv8hi3, "__builtin_ia32_pmaxsw128", IX86_BUILTIN_PMAXSW128, 0, 0 },
14863 { MASK_SSE2, CODE_FOR_uminv16qi3, "__builtin_ia32_pminub128", IX86_BUILTIN_PMINUB128, 0, 0 },
14864 { MASK_SSE2, CODE_FOR_sminv8hi3, "__builtin_ia32_pminsw128", IX86_BUILTIN_PMINSW128, 0, 0 },
14866 { MASK_SSE2, CODE_FOR_sse2_punpckhbw, "__builtin_ia32_punpckhbw128", IX86_BUILTIN_PUNPCKHBW128, 0, 0 },
14867 { MASK_SSE2, CODE_FOR_sse2_punpckhwd, "__builtin_ia32_punpckhwd128", IX86_BUILTIN_PUNPCKHWD128, 0, 0 },
14868 { MASK_SSE2, CODE_FOR_sse2_punpckhdq, "__builtin_ia32_punpckhdq128", IX86_BUILTIN_PUNPCKHDQ128, 0, 0 },
14869 { MASK_SSE2, CODE_FOR_sse2_punpckhqdq, "__builtin_ia32_punpckhqdq128", IX86_BUILTIN_PUNPCKHQDQ128, 0, 0 },
14870 { MASK_SSE2, CODE_FOR_sse2_punpcklbw, "__builtin_ia32_punpcklbw128", IX86_BUILTIN_PUNPCKLBW128, 0, 0 },
14871 { MASK_SSE2, CODE_FOR_sse2_punpcklwd, "__builtin_ia32_punpcklwd128", IX86_BUILTIN_PUNPCKLWD128, 0, 0 },
14872 { MASK_SSE2, CODE_FOR_sse2_punpckldq, "__builtin_ia32_punpckldq128", IX86_BUILTIN_PUNPCKLDQ128, 0, 0 },
14873 { MASK_SSE2, CODE_FOR_sse2_punpcklqdq, "__builtin_ia32_punpcklqdq128", IX86_BUILTIN_PUNPCKLQDQ128, 0, 0 },
14875 { MASK_SSE2, CODE_FOR_sse2_packsswb, "__builtin_ia32_packsswb128", IX86_BUILTIN_PACKSSWB128, 0, 0 },
14876 { MASK_SSE2, CODE_FOR_sse2_packssdw, "__builtin_ia32_packssdw128", IX86_BUILTIN_PACKSSDW128, 0, 0 },
14877 { MASK_SSE2, CODE_FOR_sse2_packuswb, "__builtin_ia32_packuswb128", IX86_BUILTIN_PACKUSWB128, 0, 0 },
14879 { MASK_SSE2, CODE_FOR_sse2_umulv8hi3_highpart, "__builtin_ia32_pmulhuw128", IX86_BUILTIN_PMULHUW128, 0, 0 },
14880 { MASK_SSE2, CODE_FOR_sse2_psadbw, 0, IX86_BUILTIN_PSADBW128, 0, 0 },
14882 { MASK_SSE2, CODE_FOR_sse2_umulsidi3, 0, IX86_BUILTIN_PMULUDQ, 0, 0 },
14883 { MASK_SSE2, CODE_FOR_sse2_umulv2siv2di3, 0, IX86_BUILTIN_PMULUDQ128, 0, 0 },
14885 { MASK_SSE2, CODE_FOR_ashlv8hi3, 0, IX86_BUILTIN_PSLLWI128, 0, 0 },
14886 { MASK_SSE2, CODE_FOR_ashlv4si3, 0, IX86_BUILTIN_PSLLDI128, 0, 0 },
14887 { MASK_SSE2, CODE_FOR_ashlv2di3, 0, IX86_BUILTIN_PSLLQI128, 0, 0 },
14889 { MASK_SSE2, CODE_FOR_lshrv8hi3, 0, IX86_BUILTIN_PSRLWI128, 0, 0 },
14890 { MASK_SSE2, CODE_FOR_lshrv4si3, 0, IX86_BUILTIN_PSRLDI128, 0, 0 },
14891 { MASK_SSE2, CODE_FOR_lshrv2di3, 0, IX86_BUILTIN_PSRLQI128, 0, 0 },
14893 { MASK_SSE2, CODE_FOR_ashrv8hi3, 0, IX86_BUILTIN_PSRAWI128, 0, 0 },
14894 { MASK_SSE2, CODE_FOR_ashrv4si3, 0, IX86_BUILTIN_PSRADI128, 0, 0 },
14896 { MASK_SSE2, CODE_FOR_sse2_pmaddwd, 0, IX86_BUILTIN_PMADDWD128, 0, 0 },
14898 { MASK_SSE2, CODE_FOR_sse2_cvtsi2sd, 0, IX86_BUILTIN_CVTSI2SD, 0, 0 },
14899 { MASK_SSE2 | MASK_64BIT, CODE_FOR_sse2_cvtsi2sdq, 0, IX86_BUILTIN_CVTSI642SD, 0, 0 },
14900 { MASK_SSE2, CODE_FOR_sse2_cvtsd2ss, 0, IX86_BUILTIN_CVTSD2SS, 0, 0 },
14901 { MASK_SSE2, CODE_FOR_sse2_cvtss2sd, 0, IX86_BUILTIN_CVTSS2SD, 0, 0 },
14904 { MASK_SSE3, CODE_FOR_sse3_addsubv4sf3, "__builtin_ia32_addsubps", IX86_BUILTIN_ADDSUBPS, 0, 0 },
14905 { MASK_SSE3, CODE_FOR_sse3_addsubv2df3, "__builtin_ia32_addsubpd", IX86_BUILTIN_ADDSUBPD, 0, 0 },
14906 { MASK_SSE3, CODE_FOR_sse3_haddv4sf3, "__builtin_ia32_haddps", IX86_BUILTIN_HADDPS, 0, 0 },
14907 { MASK_SSE3, CODE_FOR_sse3_haddv2df3, "__builtin_ia32_haddpd", IX86_BUILTIN_HADDPD, 0, 0 },
14908 { MASK_SSE3, CODE_FOR_sse3_hsubv4sf3, "__builtin_ia32_hsubps", IX86_BUILTIN_HSUBPS, 0, 0 },
14909 { MASK_SSE3, CODE_FOR_sse3_hsubv2df3, "__builtin_ia32_hsubpd", IX86_BUILTIN_HSUBPD, 0, 0 }
14912 static const struct builtin_description bdesc_1arg[] =
14914 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_pmovmskb, 0, IX86_BUILTIN_PMOVMSKB, 0, 0 },
14915 { MASK_SSE, CODE_FOR_sse_movmskps, 0, IX86_BUILTIN_MOVMSKPS, 0, 0 },
14917 { MASK_SSE, CODE_FOR_sqrtv4sf2, 0, IX86_BUILTIN_SQRTPS, 0, 0 },
14918 { MASK_SSE, CODE_FOR_sse_rsqrtv4sf2, 0, IX86_BUILTIN_RSQRTPS, 0, 0 },
14919 { MASK_SSE, CODE_FOR_sse_rcpv4sf2, 0, IX86_BUILTIN_RCPPS, 0, 0 },
14921 { MASK_SSE, CODE_FOR_sse_cvtps2pi, 0, IX86_BUILTIN_CVTPS2PI, 0, 0 },
14922 { MASK_SSE, CODE_FOR_sse_cvtss2si, 0, IX86_BUILTIN_CVTSS2SI, 0, 0 },
14923 { MASK_SSE | MASK_64BIT, CODE_FOR_sse_cvtss2siq, 0, IX86_BUILTIN_CVTSS2SI64, 0, 0 },
14924 { MASK_SSE, CODE_FOR_sse_cvttps2pi, 0, IX86_BUILTIN_CVTTPS2PI, 0, 0 },
14925 { MASK_SSE, CODE_FOR_sse_cvttss2si, 0, IX86_BUILTIN_CVTTSS2SI, 0, 0 },
14926 { MASK_SSE | MASK_64BIT, CODE_FOR_sse_cvttss2siq, 0, IX86_BUILTIN_CVTTSS2SI64, 0, 0 },
14928 { MASK_SSE2, CODE_FOR_sse2_pmovmskb, 0, IX86_BUILTIN_PMOVMSKB128, 0, 0 },
14929 { MASK_SSE2, CODE_FOR_sse2_movmskpd, 0, IX86_BUILTIN_MOVMSKPD, 0, 0 },
14931 { MASK_SSE2, CODE_FOR_sqrtv2df2, 0, IX86_BUILTIN_SQRTPD, 0, 0 },
14933 { MASK_SSE2, CODE_FOR_sse2_cvtdq2pd, 0, IX86_BUILTIN_CVTDQ2PD, 0, 0 },
14934 { MASK_SSE2, CODE_FOR_sse2_cvtdq2ps, 0, IX86_BUILTIN_CVTDQ2PS, 0, 0 },
14936 { MASK_SSE2, CODE_FOR_sse2_cvtpd2dq, 0, IX86_BUILTIN_CVTPD2DQ, 0, 0 },
14937 { MASK_SSE2, CODE_FOR_sse2_cvtpd2pi, 0, IX86_BUILTIN_CVTPD2PI, 0, 0 },
14938 { MASK_SSE2, CODE_FOR_sse2_cvtpd2ps, 0, IX86_BUILTIN_CVTPD2PS, 0, 0 },
14939 { MASK_SSE2, CODE_FOR_sse2_cvttpd2dq, 0, IX86_BUILTIN_CVTTPD2DQ, 0, 0 },
14940 { MASK_SSE2, CODE_FOR_sse2_cvttpd2pi, 0, IX86_BUILTIN_CVTTPD2PI, 0, 0 },
14942 { MASK_SSE2, CODE_FOR_sse2_cvtpi2pd, 0, IX86_BUILTIN_CVTPI2PD, 0, 0 },
14944 { MASK_SSE2, CODE_FOR_sse2_cvtsd2si, 0, IX86_BUILTIN_CVTSD2SI, 0, 0 },
14945 { MASK_SSE2, CODE_FOR_sse2_cvttsd2si, 0, IX86_BUILTIN_CVTTSD2SI, 0, 0 },
14946 { MASK_SSE2 | MASK_64BIT, CODE_FOR_sse2_cvtsd2siq, 0, IX86_BUILTIN_CVTSD2SI64, 0, 0 },
14947 { MASK_SSE2 | MASK_64BIT, CODE_FOR_sse2_cvttsd2siq, 0, IX86_BUILTIN_CVTTSD2SI64, 0, 0 },
14949 { MASK_SSE2, CODE_FOR_sse2_cvtps2dq, 0, IX86_BUILTIN_CVTPS2DQ, 0, 0 },
14950 { MASK_SSE2, CODE_FOR_sse2_cvtps2pd, 0, IX86_BUILTIN_CVTPS2PD, 0, 0 },
14951 { MASK_SSE2, CODE_FOR_sse2_cvttps2dq, 0, IX86_BUILTIN_CVTTPS2DQ, 0, 0 },
14954 { MASK_SSE3, CODE_FOR_sse3_movshdup, 0, IX86_BUILTIN_MOVSHDUP, 0, 0 },
14955 { MASK_SSE3, CODE_FOR_sse3_movsldup, 0, IX86_BUILTIN_MOVSLDUP, 0, 0 },
14959 ix86_init_builtins (void)
14962 ix86_init_mmx_sse_builtins ();
14965 /* Set up all the MMX/SSE builtins. This is not called if TARGET_MMX
14966 is zero. Otherwise, if TARGET_SSE is not set, only expand the MMX
14969 ix86_init_mmx_sse_builtins (void)
14971 const struct builtin_description * d;
14974 tree V16QI_type_node = build_vector_type_for_mode (intQI_type_node, V16QImode);
14975 tree V2SI_type_node = build_vector_type_for_mode (intSI_type_node, V2SImode);
14976 tree V2SF_type_node = build_vector_type_for_mode (float_type_node, V2SFmode);
14977 tree V2DI_type_node
14978 = build_vector_type_for_mode (long_long_integer_type_node, V2DImode);
14979 tree V2DF_type_node = build_vector_type_for_mode (double_type_node, V2DFmode);
14980 tree V4SF_type_node = build_vector_type_for_mode (float_type_node, V4SFmode);
14981 tree V4SI_type_node = build_vector_type_for_mode (intSI_type_node, V4SImode);
14982 tree V4HI_type_node = build_vector_type_for_mode (intHI_type_node, V4HImode);
14983 tree V8QI_type_node = build_vector_type_for_mode (intQI_type_node, V8QImode);
14984 tree V8HI_type_node = build_vector_type_for_mode (intHI_type_node, V8HImode);
14986 tree pchar_type_node = build_pointer_type (char_type_node);
14987 tree pcchar_type_node = build_pointer_type (
14988 build_type_variant (char_type_node, 1, 0));
14989 tree pfloat_type_node = build_pointer_type (float_type_node);
14990 tree pcfloat_type_node = build_pointer_type (
14991 build_type_variant (float_type_node, 1, 0));
14992 tree pv2si_type_node = build_pointer_type (V2SI_type_node);
14993 tree pv2di_type_node = build_pointer_type (V2DI_type_node);
14994 tree pdi_type_node = build_pointer_type (long_long_unsigned_type_node);
14997 tree int_ftype_v4sf_v4sf
14998 = build_function_type_list (integer_type_node,
14999 V4SF_type_node, V4SF_type_node, NULL_TREE);
15000 tree v4si_ftype_v4sf_v4sf
15001 = build_function_type_list (V4SI_type_node,
15002 V4SF_type_node, V4SF_type_node, NULL_TREE);
15003 /* MMX/SSE/integer conversions. */
15004 tree int_ftype_v4sf
15005 = build_function_type_list (integer_type_node,
15006 V4SF_type_node, NULL_TREE);
15007 tree int64_ftype_v4sf
15008 = build_function_type_list (long_long_integer_type_node,
15009 V4SF_type_node, NULL_TREE);
15010 tree int_ftype_v8qi
15011 = build_function_type_list (integer_type_node, V8QI_type_node, NULL_TREE);
15012 tree v4sf_ftype_v4sf_int
15013 = build_function_type_list (V4SF_type_node,
15014 V4SF_type_node, integer_type_node, NULL_TREE);
15015 tree v4sf_ftype_v4sf_int64
15016 = build_function_type_list (V4SF_type_node,
15017 V4SF_type_node, long_long_integer_type_node,
15019 tree v4sf_ftype_v4sf_v2si
15020 = build_function_type_list (V4SF_type_node,
15021 V4SF_type_node, V2SI_type_node, NULL_TREE);
15023 /* Miscellaneous. */
15024 tree v8qi_ftype_v4hi_v4hi
15025 = build_function_type_list (V8QI_type_node,
15026 V4HI_type_node, V4HI_type_node, NULL_TREE);
15027 tree v4hi_ftype_v2si_v2si
15028 = build_function_type_list (V4HI_type_node,
15029 V2SI_type_node, V2SI_type_node, NULL_TREE);
15030 tree v4sf_ftype_v4sf_v4sf_int
15031 = build_function_type_list (V4SF_type_node,
15032 V4SF_type_node, V4SF_type_node,
15033 integer_type_node, NULL_TREE);
15034 tree v2si_ftype_v4hi_v4hi
15035 = build_function_type_list (V2SI_type_node,
15036 V4HI_type_node, V4HI_type_node, NULL_TREE);
15037 tree v4hi_ftype_v4hi_int
15038 = build_function_type_list (V4HI_type_node,
15039 V4HI_type_node, integer_type_node, NULL_TREE);
15040 tree v4hi_ftype_v4hi_di
15041 = build_function_type_list (V4HI_type_node,
15042 V4HI_type_node, long_long_unsigned_type_node,
15044 tree v2si_ftype_v2si_di
15045 = build_function_type_list (V2SI_type_node,
15046 V2SI_type_node, long_long_unsigned_type_node,
15048 tree void_ftype_void
15049 = build_function_type (void_type_node, void_list_node);
15050 tree void_ftype_unsigned
15051 = build_function_type_list (void_type_node, unsigned_type_node, NULL_TREE);
15052 tree void_ftype_unsigned_unsigned
15053 = build_function_type_list (void_type_node, unsigned_type_node,
15054 unsigned_type_node, NULL_TREE);
15055 tree void_ftype_pcvoid_unsigned_unsigned
15056 = build_function_type_list (void_type_node, const_ptr_type_node,
15057 unsigned_type_node, unsigned_type_node,
15059 tree unsigned_ftype_void
15060 = build_function_type (unsigned_type_node, void_list_node);
15061 tree v2si_ftype_v4sf
15062 = build_function_type_list (V2SI_type_node, V4SF_type_node, NULL_TREE);
15063 /* Loads/stores. */
15064 tree void_ftype_v8qi_v8qi_pchar
15065 = build_function_type_list (void_type_node,
15066 V8QI_type_node, V8QI_type_node,
15067 pchar_type_node, NULL_TREE);
15068 tree v4sf_ftype_pcfloat
15069 = build_function_type_list (V4SF_type_node, pcfloat_type_node, NULL_TREE);
15070 /* @@@ the type is bogus */
15071 tree v4sf_ftype_v4sf_pv2si
15072 = build_function_type_list (V4SF_type_node,
15073 V4SF_type_node, pv2si_type_node, NULL_TREE);
15074 tree void_ftype_pv2si_v4sf
15075 = build_function_type_list (void_type_node,
15076 pv2si_type_node, V4SF_type_node, NULL_TREE);
15077 tree void_ftype_pfloat_v4sf
15078 = build_function_type_list (void_type_node,
15079 pfloat_type_node, V4SF_type_node, NULL_TREE);
15080 tree void_ftype_pdi_di
15081 = build_function_type_list (void_type_node,
15082 pdi_type_node, long_long_unsigned_type_node,
15084 tree void_ftype_pv2di_v2di
15085 = build_function_type_list (void_type_node,
15086 pv2di_type_node, V2DI_type_node, NULL_TREE);
15087 /* Normal vector unops. */
15088 tree v4sf_ftype_v4sf
15089 = build_function_type_list (V4SF_type_node, V4SF_type_node, NULL_TREE);
15091 /* Normal vector binops. */
15092 tree v4sf_ftype_v4sf_v4sf
15093 = build_function_type_list (V4SF_type_node,
15094 V4SF_type_node, V4SF_type_node, NULL_TREE);
15095 tree v8qi_ftype_v8qi_v8qi
15096 = build_function_type_list (V8QI_type_node,
15097 V8QI_type_node, V8QI_type_node, NULL_TREE);
15098 tree v4hi_ftype_v4hi_v4hi
15099 = build_function_type_list (V4HI_type_node,
15100 V4HI_type_node, V4HI_type_node, NULL_TREE);
15101 tree v2si_ftype_v2si_v2si
15102 = build_function_type_list (V2SI_type_node,
15103 V2SI_type_node, V2SI_type_node, NULL_TREE);
15104 tree di_ftype_di_di
15105 = build_function_type_list (long_long_unsigned_type_node,
15106 long_long_unsigned_type_node,
15107 long_long_unsigned_type_node, NULL_TREE);
15109 tree v2si_ftype_v2sf
15110 = build_function_type_list (V2SI_type_node, V2SF_type_node, NULL_TREE);
15111 tree v2sf_ftype_v2si
15112 = build_function_type_list (V2SF_type_node, V2SI_type_node, NULL_TREE);
15113 tree v2si_ftype_v2si
15114 = build_function_type_list (V2SI_type_node, V2SI_type_node, NULL_TREE);
15115 tree v2sf_ftype_v2sf
15116 = build_function_type_list (V2SF_type_node, V2SF_type_node, NULL_TREE);
15117 tree v2sf_ftype_v2sf_v2sf
15118 = build_function_type_list (V2SF_type_node,
15119 V2SF_type_node, V2SF_type_node, NULL_TREE);
15120 tree v2si_ftype_v2sf_v2sf
15121 = build_function_type_list (V2SI_type_node,
15122 V2SF_type_node, V2SF_type_node, NULL_TREE);
15123 tree pint_type_node = build_pointer_type (integer_type_node);
15124 tree pdouble_type_node = build_pointer_type (double_type_node);
15125 tree pcdouble_type_node = build_pointer_type (
15126 build_type_variant (double_type_node, 1, 0));
15127 tree int_ftype_v2df_v2df
15128 = build_function_type_list (integer_type_node,
15129 V2DF_type_node, V2DF_type_node, NULL_TREE);
15131 tree void_ftype_pcvoid
15132 = build_function_type_list (void_type_node, const_ptr_type_node, NULL_TREE);
15133 tree v4sf_ftype_v4si
15134 = build_function_type_list (V4SF_type_node, V4SI_type_node, NULL_TREE);
15135 tree v4si_ftype_v4sf
15136 = build_function_type_list (V4SI_type_node, V4SF_type_node, NULL_TREE);
15137 tree v2df_ftype_v4si
15138 = build_function_type_list (V2DF_type_node, V4SI_type_node, NULL_TREE);
15139 tree v4si_ftype_v2df
15140 = build_function_type_list (V4SI_type_node, V2DF_type_node, NULL_TREE);
15141 tree v2si_ftype_v2df
15142 = build_function_type_list (V2SI_type_node, V2DF_type_node, NULL_TREE);
15143 tree v4sf_ftype_v2df
15144 = build_function_type_list (V4SF_type_node, V2DF_type_node, NULL_TREE);
15145 tree v2df_ftype_v2si
15146 = build_function_type_list (V2DF_type_node, V2SI_type_node, NULL_TREE);
15147 tree v2df_ftype_v4sf
15148 = build_function_type_list (V2DF_type_node, V4SF_type_node, NULL_TREE);
15149 tree int_ftype_v2df
15150 = build_function_type_list (integer_type_node, V2DF_type_node, NULL_TREE);
15151 tree int64_ftype_v2df
15152 = build_function_type_list (long_long_integer_type_node,
15153 V2DF_type_node, NULL_TREE);
15154 tree v2df_ftype_v2df_int
15155 = build_function_type_list (V2DF_type_node,
15156 V2DF_type_node, integer_type_node, NULL_TREE);
15157 tree v2df_ftype_v2df_int64
15158 = build_function_type_list (V2DF_type_node,
15159 V2DF_type_node, long_long_integer_type_node,
15161 tree v4sf_ftype_v4sf_v2df
15162 = build_function_type_list (V4SF_type_node,
15163 V4SF_type_node, V2DF_type_node, NULL_TREE);
15164 tree v2df_ftype_v2df_v4sf
15165 = build_function_type_list (V2DF_type_node,
15166 V2DF_type_node, V4SF_type_node, NULL_TREE);
15167 tree v2df_ftype_v2df_v2df_int
15168 = build_function_type_list (V2DF_type_node,
15169 V2DF_type_node, V2DF_type_node,
15172 tree v2df_ftype_v2df_pcdouble
15173 = build_function_type_list (V2DF_type_node,
15174 V2DF_type_node, pcdouble_type_node, NULL_TREE);
15175 tree void_ftype_pdouble_v2df
15176 = build_function_type_list (void_type_node,
15177 pdouble_type_node, V2DF_type_node, NULL_TREE);
15178 tree void_ftype_pint_int
15179 = build_function_type_list (void_type_node,
15180 pint_type_node, integer_type_node, NULL_TREE);
15181 tree void_ftype_v16qi_v16qi_pchar
15182 = build_function_type_list (void_type_node,
15183 V16QI_type_node, V16QI_type_node,
15184 pchar_type_node, NULL_TREE);
15185 tree v2df_ftype_pcdouble
15186 = build_function_type_list (V2DF_type_node, pcdouble_type_node, NULL_TREE);
15187 tree v2df_ftype_v2df_v2df
15188 = build_function_type_list (V2DF_type_node,
15189 V2DF_type_node, V2DF_type_node, NULL_TREE);
15190 tree v16qi_ftype_v16qi_v16qi
15191 = build_function_type_list (V16QI_type_node,
15192 V16QI_type_node, V16QI_type_node, NULL_TREE);
15193 tree v8hi_ftype_v8hi_v8hi
15194 = build_function_type_list (V8HI_type_node,
15195 V8HI_type_node, V8HI_type_node, NULL_TREE);
15196 tree v4si_ftype_v4si_v4si
15197 = build_function_type_list (V4SI_type_node,
15198 V4SI_type_node, V4SI_type_node, NULL_TREE);
15199 tree v2di_ftype_v2di_v2di
15200 = build_function_type_list (V2DI_type_node,
15201 V2DI_type_node, V2DI_type_node, NULL_TREE);
15202 tree v2di_ftype_v2df_v2df
15203 = build_function_type_list (V2DI_type_node,
15204 V2DF_type_node, V2DF_type_node, NULL_TREE);
15205 tree v2df_ftype_v2df
15206 = build_function_type_list (V2DF_type_node, V2DF_type_node, NULL_TREE);
15207 tree v2di_ftype_v2di_int
15208 = build_function_type_list (V2DI_type_node,
15209 V2DI_type_node, integer_type_node, NULL_TREE);
15210 tree v4si_ftype_v4si_int
15211 = build_function_type_list (V4SI_type_node,
15212 V4SI_type_node, integer_type_node, NULL_TREE);
15213 tree v8hi_ftype_v8hi_int
15214 = build_function_type_list (V8HI_type_node,
15215 V8HI_type_node, integer_type_node, NULL_TREE);
15216 tree v4si_ftype_v8hi_v8hi
15217 = build_function_type_list (V4SI_type_node,
15218 V8HI_type_node, V8HI_type_node, NULL_TREE);
15219 tree di_ftype_v8qi_v8qi
15220 = build_function_type_list (long_long_unsigned_type_node,
15221 V8QI_type_node, V8QI_type_node, NULL_TREE);
15222 tree di_ftype_v2si_v2si
15223 = build_function_type_list (long_long_unsigned_type_node,
15224 V2SI_type_node, V2SI_type_node, NULL_TREE);
15225 tree v2di_ftype_v16qi_v16qi
15226 = build_function_type_list (V2DI_type_node,
15227 V16QI_type_node, V16QI_type_node, NULL_TREE);
15228 tree v2di_ftype_v4si_v4si
15229 = build_function_type_list (V2DI_type_node,
15230 V4SI_type_node, V4SI_type_node, NULL_TREE);
15231 tree int_ftype_v16qi
15232 = build_function_type_list (integer_type_node, V16QI_type_node, NULL_TREE);
15233 tree v16qi_ftype_pcchar
15234 = build_function_type_list (V16QI_type_node, pcchar_type_node, NULL_TREE);
15235 tree void_ftype_pchar_v16qi
15236 = build_function_type_list (void_type_node,
15237 pchar_type_node, V16QI_type_node, NULL_TREE);
15240 tree float128_type;
15243 /* The __float80 type. */
15244 if (TYPE_MODE (long_double_type_node) == XFmode)
15245 (*lang_hooks.types.register_builtin_type) (long_double_type_node,
15249 /* The __float80 type. */
15250 float80_type = make_node (REAL_TYPE);
15251 TYPE_PRECISION (float80_type) = 80;
15252 layout_type (float80_type);
15253 (*lang_hooks.types.register_builtin_type) (float80_type, "__float80");
15258 float128_type = make_node (REAL_TYPE);
15259 TYPE_PRECISION (float128_type) = 128;
15260 layout_type (float128_type);
15261 (*lang_hooks.types.register_builtin_type) (float128_type, "__float128");
15264 /* Add all builtins that are more or less simple operations on two
15266 for (i = 0, d = bdesc_2arg; i < ARRAY_SIZE (bdesc_2arg); i++, d++)
15268 /* Use one of the operands; the target can have a different mode for
15269 mask-generating compares. */
15270 enum machine_mode mode;
15275 mode = insn_data[d->icode].operand[1].mode;
15280 type = v16qi_ftype_v16qi_v16qi;
15283 type = v8hi_ftype_v8hi_v8hi;
15286 type = v4si_ftype_v4si_v4si;
15289 type = v2di_ftype_v2di_v2di;
15292 type = v2df_ftype_v2df_v2df;
15295 type = v4sf_ftype_v4sf_v4sf;
15298 type = v8qi_ftype_v8qi_v8qi;
15301 type = v4hi_ftype_v4hi_v4hi;
15304 type = v2si_ftype_v2si_v2si;
15307 type = di_ftype_di_di;
15311 gcc_unreachable ();
15314 /* Override for comparisons. */
15315 if (d->icode == CODE_FOR_sse_maskcmpv4sf3
15316 || d->icode == CODE_FOR_sse_vmmaskcmpv4sf3)
15317 type = v4si_ftype_v4sf_v4sf;
15319 if (d->icode == CODE_FOR_sse2_maskcmpv2df3
15320 || d->icode == CODE_FOR_sse2_vmmaskcmpv2df3)
15321 type = v2di_ftype_v2df_v2df;
15323 def_builtin (d->mask, d->name, type, d->code);
15326 /* Add the remaining MMX insns with somewhat more complicated types. */
15327 def_builtin (MASK_MMX, "__builtin_ia32_emms", void_ftype_void, IX86_BUILTIN_EMMS);
15328 def_builtin (MASK_MMX, "__builtin_ia32_psllw", v4hi_ftype_v4hi_di, IX86_BUILTIN_PSLLW);
15329 def_builtin (MASK_MMX, "__builtin_ia32_pslld", v2si_ftype_v2si_di, IX86_BUILTIN_PSLLD);
15330 def_builtin (MASK_MMX, "__builtin_ia32_psllq", di_ftype_di_di, IX86_BUILTIN_PSLLQ);
15332 def_builtin (MASK_MMX, "__builtin_ia32_psrlw", v4hi_ftype_v4hi_di, IX86_BUILTIN_PSRLW);
15333 def_builtin (MASK_MMX, "__builtin_ia32_psrld", v2si_ftype_v2si_di, IX86_BUILTIN_PSRLD);
15334 def_builtin (MASK_MMX, "__builtin_ia32_psrlq", di_ftype_di_di, IX86_BUILTIN_PSRLQ);
15336 def_builtin (MASK_MMX, "__builtin_ia32_psraw", v4hi_ftype_v4hi_di, IX86_BUILTIN_PSRAW);
15337 def_builtin (MASK_MMX, "__builtin_ia32_psrad", v2si_ftype_v2si_di, IX86_BUILTIN_PSRAD);
15339 def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_pshufw", v4hi_ftype_v4hi_int, IX86_BUILTIN_PSHUFW);
15340 def_builtin (MASK_MMX, "__builtin_ia32_pmaddwd", v2si_ftype_v4hi_v4hi, IX86_BUILTIN_PMADDWD);
15342 /* comi/ucomi insns. */
15343 for (i = 0, d = bdesc_comi; i < ARRAY_SIZE (bdesc_comi); i++, d++)
15344 if (d->mask == MASK_SSE2)
15345 def_builtin (d->mask, d->name, int_ftype_v2df_v2df, d->code);
15347 def_builtin (d->mask, d->name, int_ftype_v4sf_v4sf, d->code);
15349 def_builtin (MASK_MMX, "__builtin_ia32_packsswb", v8qi_ftype_v4hi_v4hi, IX86_BUILTIN_PACKSSWB);
15350 def_builtin (MASK_MMX, "__builtin_ia32_packssdw", v4hi_ftype_v2si_v2si, IX86_BUILTIN_PACKSSDW);
15351 def_builtin (MASK_MMX, "__builtin_ia32_packuswb", v8qi_ftype_v4hi_v4hi, IX86_BUILTIN_PACKUSWB);
15353 def_builtin (MASK_SSE, "__builtin_ia32_ldmxcsr", void_ftype_unsigned, IX86_BUILTIN_LDMXCSR);
15354 def_builtin (MASK_SSE, "__builtin_ia32_stmxcsr", unsigned_ftype_void, IX86_BUILTIN_STMXCSR);
15355 def_builtin (MASK_SSE, "__builtin_ia32_cvtpi2ps", v4sf_ftype_v4sf_v2si, IX86_BUILTIN_CVTPI2PS);
15356 def_builtin (MASK_SSE, "__builtin_ia32_cvtps2pi", v2si_ftype_v4sf, IX86_BUILTIN_CVTPS2PI);
15357 def_builtin (MASK_SSE, "__builtin_ia32_cvtsi2ss", v4sf_ftype_v4sf_int, IX86_BUILTIN_CVTSI2SS);
15358 def_builtin (MASK_SSE | MASK_64BIT, "__builtin_ia32_cvtsi642ss", v4sf_ftype_v4sf_int64, IX86_BUILTIN_CVTSI642SS);
15359 def_builtin (MASK_SSE, "__builtin_ia32_cvtss2si", int_ftype_v4sf, IX86_BUILTIN_CVTSS2SI);
15360 def_builtin (MASK_SSE | MASK_64BIT, "__builtin_ia32_cvtss2si64", int64_ftype_v4sf, IX86_BUILTIN_CVTSS2SI64);
15361 def_builtin (MASK_SSE, "__builtin_ia32_cvttps2pi", v2si_ftype_v4sf, IX86_BUILTIN_CVTTPS2PI);
15362 def_builtin (MASK_SSE, "__builtin_ia32_cvttss2si", int_ftype_v4sf, IX86_BUILTIN_CVTTSS2SI);
15363 def_builtin (MASK_SSE | MASK_64BIT, "__builtin_ia32_cvttss2si64", int64_ftype_v4sf, IX86_BUILTIN_CVTTSS2SI64);
15365 def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_maskmovq", void_ftype_v8qi_v8qi_pchar, IX86_BUILTIN_MASKMOVQ);
15367 def_builtin (MASK_SSE, "__builtin_ia32_loadups", v4sf_ftype_pcfloat, IX86_BUILTIN_LOADUPS);
15368 def_builtin (MASK_SSE, "__builtin_ia32_storeups", void_ftype_pfloat_v4sf, IX86_BUILTIN_STOREUPS);
15370 def_builtin (MASK_SSE, "__builtin_ia32_loadhps", v4sf_ftype_v4sf_pv2si, IX86_BUILTIN_LOADHPS);
15371 def_builtin (MASK_SSE, "__builtin_ia32_loadlps", v4sf_ftype_v4sf_pv2si, IX86_BUILTIN_LOADLPS);
15372 def_builtin (MASK_SSE, "__builtin_ia32_storehps", void_ftype_pv2si_v4sf, IX86_BUILTIN_STOREHPS);
15373 def_builtin (MASK_SSE, "__builtin_ia32_storelps", void_ftype_pv2si_v4sf, IX86_BUILTIN_STORELPS);
15375 def_builtin (MASK_SSE, "__builtin_ia32_movmskps", int_ftype_v4sf, IX86_BUILTIN_MOVMSKPS);
15376 def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_pmovmskb", int_ftype_v8qi, IX86_BUILTIN_PMOVMSKB);
15377 def_builtin (MASK_SSE, "__builtin_ia32_movntps", void_ftype_pfloat_v4sf, IX86_BUILTIN_MOVNTPS);
15378 def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_movntq", void_ftype_pdi_di, IX86_BUILTIN_MOVNTQ);
15380 def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_sfence", void_ftype_void, IX86_BUILTIN_SFENCE);
15382 def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_psadbw", di_ftype_v8qi_v8qi, IX86_BUILTIN_PSADBW);
15384 def_builtin (MASK_SSE, "__builtin_ia32_rcpps", v4sf_ftype_v4sf, IX86_BUILTIN_RCPPS);
15385 def_builtin (MASK_SSE, "__builtin_ia32_rcpss", v4sf_ftype_v4sf, IX86_BUILTIN_RCPSS);
15386 def_builtin (MASK_SSE, "__builtin_ia32_rsqrtps", v4sf_ftype_v4sf, IX86_BUILTIN_RSQRTPS);
15387 def_builtin (MASK_SSE, "__builtin_ia32_rsqrtss", v4sf_ftype_v4sf, IX86_BUILTIN_RSQRTSS);
15388 def_builtin (MASK_SSE, "__builtin_ia32_sqrtps", v4sf_ftype_v4sf, IX86_BUILTIN_SQRTPS);
15389 def_builtin (MASK_SSE, "__builtin_ia32_sqrtss", v4sf_ftype_v4sf, IX86_BUILTIN_SQRTSS);
15391 def_builtin (MASK_SSE, "__builtin_ia32_shufps", v4sf_ftype_v4sf_v4sf_int, IX86_BUILTIN_SHUFPS);
15393 /* Original 3DNow! */
15394 def_builtin (MASK_3DNOW, "__builtin_ia32_femms", void_ftype_void, IX86_BUILTIN_FEMMS);
15395 def_builtin (MASK_3DNOW, "__builtin_ia32_pavgusb", v8qi_ftype_v8qi_v8qi, IX86_BUILTIN_PAVGUSB);
15396 def_builtin (MASK_3DNOW, "__builtin_ia32_pf2id", v2si_ftype_v2sf, IX86_BUILTIN_PF2ID);
15397 def_builtin (MASK_3DNOW, "__builtin_ia32_pfacc", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFACC);
15398 def_builtin (MASK_3DNOW, "__builtin_ia32_pfadd", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFADD);
15399 def_builtin (MASK_3DNOW, "__builtin_ia32_pfcmpeq", v2si_ftype_v2sf_v2sf, IX86_BUILTIN_PFCMPEQ);
15400 def_builtin (MASK_3DNOW, "__builtin_ia32_pfcmpge", v2si_ftype_v2sf_v2sf, IX86_BUILTIN_PFCMPGE);
15401 def_builtin (MASK_3DNOW, "__builtin_ia32_pfcmpgt", v2si_ftype_v2sf_v2sf, IX86_BUILTIN_PFCMPGT);
15402 def_builtin (MASK_3DNOW, "__builtin_ia32_pfmax", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFMAX);
15403 def_builtin (MASK_3DNOW, "__builtin_ia32_pfmin", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFMIN);
15404 def_builtin (MASK_3DNOW, "__builtin_ia32_pfmul", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFMUL);
15405 def_builtin (MASK_3DNOW, "__builtin_ia32_pfrcp", v2sf_ftype_v2sf, IX86_BUILTIN_PFRCP);
15406 def_builtin (MASK_3DNOW, "__builtin_ia32_pfrcpit1", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFRCPIT1);
15407 def_builtin (MASK_3DNOW, "__builtin_ia32_pfrcpit2", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFRCPIT2);
15408 def_builtin (MASK_3DNOW, "__builtin_ia32_pfrsqrt", v2sf_ftype_v2sf, IX86_BUILTIN_PFRSQRT);
15409 def_builtin (MASK_3DNOW, "__builtin_ia32_pfrsqit1", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFRSQIT1);
15410 def_builtin (MASK_3DNOW, "__builtin_ia32_pfsub", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFSUB);
15411 def_builtin (MASK_3DNOW, "__builtin_ia32_pfsubr", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFSUBR);
15412 def_builtin (MASK_3DNOW, "__builtin_ia32_pi2fd", v2sf_ftype_v2si, IX86_BUILTIN_PI2FD);
15413 def_builtin (MASK_3DNOW, "__builtin_ia32_pmulhrw", v4hi_ftype_v4hi_v4hi, IX86_BUILTIN_PMULHRW);
15415 /* 3DNow! extension as used in the Athlon CPU. */
15416 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pf2iw", v2si_ftype_v2sf, IX86_BUILTIN_PF2IW);
15417 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pfnacc", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFNACC);
15418 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pfpnacc", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFPNACC);
15419 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pi2fw", v2sf_ftype_v2si, IX86_BUILTIN_PI2FW);
15420 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pswapdsf", v2sf_ftype_v2sf, IX86_BUILTIN_PSWAPDSF);
15421 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pswapdsi", v2si_ftype_v2si, IX86_BUILTIN_PSWAPDSI);
15424 def_builtin (MASK_SSE2, "__builtin_ia32_maskmovdqu", void_ftype_v16qi_v16qi_pchar, IX86_BUILTIN_MASKMOVDQU);
15426 def_builtin (MASK_SSE2, "__builtin_ia32_loadupd", v2df_ftype_pcdouble, IX86_BUILTIN_LOADUPD);
15427 def_builtin (MASK_SSE2, "__builtin_ia32_storeupd", void_ftype_pdouble_v2df, IX86_BUILTIN_STOREUPD);
15429 def_builtin (MASK_SSE2, "__builtin_ia32_loadhpd", v2df_ftype_v2df_pcdouble, IX86_BUILTIN_LOADHPD);
15430 def_builtin (MASK_SSE2, "__builtin_ia32_loadlpd", v2df_ftype_v2df_pcdouble, IX86_BUILTIN_LOADLPD);
15432 def_builtin (MASK_SSE2, "__builtin_ia32_movmskpd", int_ftype_v2df, IX86_BUILTIN_MOVMSKPD);
15433 def_builtin (MASK_SSE2, "__builtin_ia32_pmovmskb128", int_ftype_v16qi, IX86_BUILTIN_PMOVMSKB128);
15434 def_builtin (MASK_SSE2, "__builtin_ia32_movnti", void_ftype_pint_int, IX86_BUILTIN_MOVNTI);
15435 def_builtin (MASK_SSE2, "__builtin_ia32_movntpd", void_ftype_pdouble_v2df, IX86_BUILTIN_MOVNTPD);
15436 def_builtin (MASK_SSE2, "__builtin_ia32_movntdq", void_ftype_pv2di_v2di, IX86_BUILTIN_MOVNTDQ);
15438 def_builtin (MASK_SSE2, "__builtin_ia32_pshufd", v4si_ftype_v4si_int, IX86_BUILTIN_PSHUFD);
15439 def_builtin (MASK_SSE2, "__builtin_ia32_pshuflw", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSHUFLW);
15440 def_builtin (MASK_SSE2, "__builtin_ia32_pshufhw", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSHUFHW);
15441 def_builtin (MASK_SSE2, "__builtin_ia32_psadbw128", v2di_ftype_v16qi_v16qi, IX86_BUILTIN_PSADBW128);
15443 def_builtin (MASK_SSE2, "__builtin_ia32_sqrtpd", v2df_ftype_v2df, IX86_BUILTIN_SQRTPD);
15444 def_builtin (MASK_SSE2, "__builtin_ia32_sqrtsd", v2df_ftype_v2df, IX86_BUILTIN_SQRTSD);
15446 def_builtin (MASK_SSE2, "__builtin_ia32_shufpd", v2df_ftype_v2df_v2df_int, IX86_BUILTIN_SHUFPD);
15448 def_builtin (MASK_SSE2, "__builtin_ia32_cvtdq2pd", v2df_ftype_v4si, IX86_BUILTIN_CVTDQ2PD);
15449 def_builtin (MASK_SSE2, "__builtin_ia32_cvtdq2ps", v4sf_ftype_v4si, IX86_BUILTIN_CVTDQ2PS);
15451 def_builtin (MASK_SSE2, "__builtin_ia32_cvtpd2dq", v4si_ftype_v2df, IX86_BUILTIN_CVTPD2DQ);
15452 def_builtin (MASK_SSE2, "__builtin_ia32_cvtpd2pi", v2si_ftype_v2df, IX86_BUILTIN_CVTPD2PI);
15453 def_builtin (MASK_SSE2, "__builtin_ia32_cvtpd2ps", v4sf_ftype_v2df, IX86_BUILTIN_CVTPD2PS);
15454 def_builtin (MASK_SSE2, "__builtin_ia32_cvttpd2dq", v4si_ftype_v2df, IX86_BUILTIN_CVTTPD2DQ);
15455 def_builtin (MASK_SSE2, "__builtin_ia32_cvttpd2pi", v2si_ftype_v2df, IX86_BUILTIN_CVTTPD2PI);
15457 def_builtin (MASK_SSE2, "__builtin_ia32_cvtpi2pd", v2df_ftype_v2si, IX86_BUILTIN_CVTPI2PD);
15459 def_builtin (MASK_SSE2, "__builtin_ia32_cvtsd2si", int_ftype_v2df, IX86_BUILTIN_CVTSD2SI);
15460 def_builtin (MASK_SSE2, "__builtin_ia32_cvttsd2si", int_ftype_v2df, IX86_BUILTIN_CVTTSD2SI);
15461 def_builtin (MASK_SSE2 | MASK_64BIT, "__builtin_ia32_cvtsd2si64", int64_ftype_v2df, IX86_BUILTIN_CVTSD2SI64);
15462 def_builtin (MASK_SSE2 | MASK_64BIT, "__builtin_ia32_cvttsd2si64", int64_ftype_v2df, IX86_BUILTIN_CVTTSD2SI64);
15464 def_builtin (MASK_SSE2, "__builtin_ia32_cvtps2dq", v4si_ftype_v4sf, IX86_BUILTIN_CVTPS2DQ);
15465 def_builtin (MASK_SSE2, "__builtin_ia32_cvtps2pd", v2df_ftype_v4sf, IX86_BUILTIN_CVTPS2PD);
15466 def_builtin (MASK_SSE2, "__builtin_ia32_cvttps2dq", v4si_ftype_v4sf, IX86_BUILTIN_CVTTPS2DQ);
15468 def_builtin (MASK_SSE2, "__builtin_ia32_cvtsi2sd", v2df_ftype_v2df_int, IX86_BUILTIN_CVTSI2SD);
15469 def_builtin (MASK_SSE2 | MASK_64BIT, "__builtin_ia32_cvtsi642sd", v2df_ftype_v2df_int64, IX86_BUILTIN_CVTSI642SD);
15470 def_builtin (MASK_SSE2, "__builtin_ia32_cvtsd2ss", v4sf_ftype_v4sf_v2df, IX86_BUILTIN_CVTSD2SS);
15471 def_builtin (MASK_SSE2, "__builtin_ia32_cvtss2sd", v2df_ftype_v2df_v4sf, IX86_BUILTIN_CVTSS2SD);
15473 def_builtin (MASK_SSE2, "__builtin_ia32_clflush", void_ftype_pcvoid, IX86_BUILTIN_CLFLUSH);
15474 def_builtin (MASK_SSE2, "__builtin_ia32_lfence", void_ftype_void, IX86_BUILTIN_LFENCE);
15475 def_builtin (MASK_SSE2, "__builtin_ia32_mfence", void_ftype_void, IX86_BUILTIN_MFENCE);
15477 def_builtin (MASK_SSE2, "__builtin_ia32_loaddqu", v16qi_ftype_pcchar, IX86_BUILTIN_LOADDQU);
15478 def_builtin (MASK_SSE2, "__builtin_ia32_storedqu", void_ftype_pchar_v16qi, IX86_BUILTIN_STOREDQU);
15480 def_builtin (MASK_SSE2, "__builtin_ia32_pmuludq", di_ftype_v2si_v2si, IX86_BUILTIN_PMULUDQ);
15481 def_builtin (MASK_SSE2, "__builtin_ia32_pmuludq128", v2di_ftype_v4si_v4si, IX86_BUILTIN_PMULUDQ128);
15483 def_builtin (MASK_SSE2, "__builtin_ia32_psllw128", v8hi_ftype_v8hi_v8hi, IX86_BUILTIN_PSLLW128);
15484 def_builtin (MASK_SSE2, "__builtin_ia32_pslld128", v4si_ftype_v4si_v4si, IX86_BUILTIN_PSLLD128);
15485 def_builtin (MASK_SSE2, "__builtin_ia32_psllq128", v2di_ftype_v2di_v2di, IX86_BUILTIN_PSLLQ128);
15487 def_builtin (MASK_SSE2, "__builtin_ia32_psrlw128", v8hi_ftype_v8hi_v8hi, IX86_BUILTIN_PSRLW128);
15488 def_builtin (MASK_SSE2, "__builtin_ia32_psrld128", v4si_ftype_v4si_v4si, IX86_BUILTIN_PSRLD128);
15489 def_builtin (MASK_SSE2, "__builtin_ia32_psrlq128", v2di_ftype_v2di_v2di, IX86_BUILTIN_PSRLQ128);
15491 def_builtin (MASK_SSE2, "__builtin_ia32_psraw128", v8hi_ftype_v8hi_v8hi, IX86_BUILTIN_PSRAW128);
15492 def_builtin (MASK_SSE2, "__builtin_ia32_psrad128", v4si_ftype_v4si_v4si, IX86_BUILTIN_PSRAD128);
15494 def_builtin (MASK_SSE2, "__builtin_ia32_pslldqi128", v2di_ftype_v2di_int, IX86_BUILTIN_PSLLDQI128);
15495 def_builtin (MASK_SSE2, "__builtin_ia32_psllwi128", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSLLWI128);
15496 def_builtin (MASK_SSE2, "__builtin_ia32_pslldi128", v4si_ftype_v4si_int, IX86_BUILTIN_PSLLDI128);
15497 def_builtin (MASK_SSE2, "__builtin_ia32_psllqi128", v2di_ftype_v2di_int, IX86_BUILTIN_PSLLQI128);
15499 def_builtin (MASK_SSE2, "__builtin_ia32_psrldqi128", v2di_ftype_v2di_int, IX86_BUILTIN_PSRLDQI128);
15500 def_builtin (MASK_SSE2, "__builtin_ia32_psrlwi128", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSRLWI128);
15501 def_builtin (MASK_SSE2, "__builtin_ia32_psrldi128", v4si_ftype_v4si_int, IX86_BUILTIN_PSRLDI128);
15502 def_builtin (MASK_SSE2, "__builtin_ia32_psrlqi128", v2di_ftype_v2di_int, IX86_BUILTIN_PSRLQI128);
15504 def_builtin (MASK_SSE2, "__builtin_ia32_psrawi128", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSRAWI128);
15505 def_builtin (MASK_SSE2, "__builtin_ia32_psradi128", v4si_ftype_v4si_int, IX86_BUILTIN_PSRADI128);
15507 def_builtin (MASK_SSE2, "__builtin_ia32_pmaddwd128", v4si_ftype_v8hi_v8hi, IX86_BUILTIN_PMADDWD128);
15509 /* Prescott New Instructions. */
15510 def_builtin (MASK_SSE3, "__builtin_ia32_monitor",
15511 void_ftype_pcvoid_unsigned_unsigned,
15512 IX86_BUILTIN_MONITOR);
15513 def_builtin (MASK_SSE3, "__builtin_ia32_mwait",
15514 void_ftype_unsigned_unsigned,
15515 IX86_BUILTIN_MWAIT);
15516 def_builtin (MASK_SSE3, "__builtin_ia32_movshdup",
15518 IX86_BUILTIN_MOVSHDUP);
15519 def_builtin (MASK_SSE3, "__builtin_ia32_movsldup",
15521 IX86_BUILTIN_MOVSLDUP);
15522 def_builtin (MASK_SSE3, "__builtin_ia32_lddqu",
15523 v16qi_ftype_pcchar, IX86_BUILTIN_LDDQU);
15525 /* Access to the vec_init patterns. */
15526 ftype = build_function_type_list (V2SI_type_node, integer_type_node,
15527 integer_type_node, NULL_TREE);
15528 def_builtin (MASK_MMX, "__builtin_ia32_vec_init_v2si",
15529 ftype, IX86_BUILTIN_VEC_INIT_V2SI);
15531 ftype = build_function_type_list (V4HI_type_node, short_integer_type_node,
15532 short_integer_type_node,
15533 short_integer_type_node,
15534 short_integer_type_node, NULL_TREE);
15535 def_builtin (MASK_MMX, "__builtin_ia32_vec_init_v4hi",
15536 ftype, IX86_BUILTIN_VEC_INIT_V4HI);
15538 ftype = build_function_type_list (V8QI_type_node, char_type_node,
15539 char_type_node, char_type_node,
15540 char_type_node, char_type_node,
15541 char_type_node, char_type_node,
15542 char_type_node, NULL_TREE);
15543 def_builtin (MASK_MMX, "__builtin_ia32_vec_init_v8qi",
15544 ftype, IX86_BUILTIN_VEC_INIT_V8QI);
15546 /* Access to the vec_extract patterns. */
15547 ftype = build_function_type_list (double_type_node, V2DF_type_node,
15548 integer_type_node, NULL_TREE);
15549 def_builtin (MASK_SSE2, "__builtin_ia32_vec_ext_v2df",
15550 ftype, IX86_BUILTIN_VEC_EXT_V2DF);
15552 ftype = build_function_type_list (long_long_integer_type_node,
15553 V2DI_type_node, integer_type_node,
15555 def_builtin (MASK_SSE2, "__builtin_ia32_vec_ext_v2di",
15556 ftype, IX86_BUILTIN_VEC_EXT_V2DI);
15558 ftype = build_function_type_list (float_type_node, V4SF_type_node,
15559 integer_type_node, NULL_TREE);
15560 def_builtin (MASK_SSE, "__builtin_ia32_vec_ext_v4sf",
15561 ftype, IX86_BUILTIN_VEC_EXT_V4SF);
15563 ftype = build_function_type_list (intSI_type_node, V4SI_type_node,
15564 integer_type_node, NULL_TREE);
15565 def_builtin (MASK_SSE2, "__builtin_ia32_vec_ext_v4si",
15566 ftype, IX86_BUILTIN_VEC_EXT_V4SI);
15568 ftype = build_function_type_list (intHI_type_node, V8HI_type_node,
15569 integer_type_node, NULL_TREE);
15570 def_builtin (MASK_SSE2, "__builtin_ia32_vec_ext_v8hi",
15571 ftype, IX86_BUILTIN_VEC_EXT_V8HI);
15573 ftype = build_function_type_list (intHI_type_node, V4HI_type_node,
15574 integer_type_node, NULL_TREE);
15575 def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_vec_ext_v4hi",
15576 ftype, IX86_BUILTIN_VEC_EXT_V4HI);
15578 ftype = build_function_type_list (intSI_type_node, V2SI_type_node,
15579 integer_type_node, NULL_TREE);
15580 def_builtin (MASK_MMX, "__builtin_ia32_vec_ext_v2si",
15581 ftype, IX86_BUILTIN_VEC_EXT_V2SI);
15583 ftype = build_function_type_list (intQI_type_node, V16QI_type_node,
15584 integer_type_node, NULL_TREE);
15585 def_builtin (MASK_SSE2, "__builtin_ia32_vec_ext_v16qi", ftype, IX86_BUILTIN_VEC_EXT_V16QI);
15587 /* Access to the vec_set patterns. */
15588 ftype = build_function_type_list (V8HI_type_node, V8HI_type_node,
15590 integer_type_node, NULL_TREE);
15591 def_builtin (MASK_SSE2, "__builtin_ia32_vec_set_v8hi",
15592 ftype, IX86_BUILTIN_VEC_SET_V8HI);
15594 ftype = build_function_type_list (V4HI_type_node, V4HI_type_node,
15596 integer_type_node, NULL_TREE);
15597 def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_vec_set_v4hi",
15598 ftype, IX86_BUILTIN_VEC_SET_V4HI);
15601 /* Errors in the source file can cause expand_expr to return const0_rtx
15602 where we expect a vector. To avoid crashing, use one of the vector
15603 clear instructions. */
15605 safe_vector_operand (rtx x, enum machine_mode mode)
15607 if (x == const0_rtx)
15608 x = CONST0_RTX (mode);
15612 /* Subroutine of ix86_expand_builtin to take care of binop insns. */
15615 ix86_expand_binop_builtin (enum insn_code icode, tree arglist, rtx target)
15618 tree arg0 = TREE_VALUE (arglist);
15619 tree arg1 = TREE_VALUE (TREE_CHAIN (arglist));
15620 rtx op0 = expand_normal (arg0);
15621 rtx op1 = expand_normal (arg1);
15622 enum machine_mode tmode = insn_data[icode].operand[0].mode;
15623 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
15624 enum machine_mode mode1 = insn_data[icode].operand[2].mode;
15626 if (VECTOR_MODE_P (mode0))
15627 op0 = safe_vector_operand (op0, mode0);
15628 if (VECTOR_MODE_P (mode1))
15629 op1 = safe_vector_operand (op1, mode1);
15631 if (optimize || !target
15632 || GET_MODE (target) != tmode
15633 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
15634 target = gen_reg_rtx (tmode);
15636 if (GET_MODE (op1) == SImode && mode1 == TImode)
15638 rtx x = gen_reg_rtx (V4SImode);
15639 emit_insn (gen_sse2_loadd (x, op1));
15640 op1 = gen_lowpart (TImode, x);
15643 /* The insn must want input operands in the same modes as the
15645 gcc_assert ((GET_MODE (op0) == mode0 || GET_MODE (op0) == VOIDmode)
15646 && (GET_MODE (op1) == mode1 || GET_MODE (op1) == VOIDmode));
15648 if (!(*insn_data[icode].operand[1].predicate) (op0, mode0))
15649 op0 = copy_to_mode_reg (mode0, op0);
15650 if (!(*insn_data[icode].operand[2].predicate) (op1, mode1))
15651 op1 = copy_to_mode_reg (mode1, op1);
15653 /* ??? Using ix86_fixup_binary_operands is problematic when
15654 we've got mismatched modes. Fake it. */
15660 if (tmode == mode0 && tmode == mode1)
15662 target = ix86_fixup_binary_operands (UNKNOWN, tmode, xops);
15666 else if (optimize || !ix86_binary_operator_ok (UNKNOWN, tmode, xops))
15668 op0 = force_reg (mode0, op0);
15669 op1 = force_reg (mode1, op1);
15670 target = gen_reg_rtx (tmode);
15673 pat = GEN_FCN (icode) (target, op0, op1);
15680 /* Subroutine of ix86_expand_builtin to take care of stores. */
15683 ix86_expand_store_builtin (enum insn_code icode, tree arglist)
15686 tree arg0 = TREE_VALUE (arglist);
15687 tree arg1 = TREE_VALUE (TREE_CHAIN (arglist));
15688 rtx op0 = expand_normal (arg0);
15689 rtx op1 = expand_normal (arg1);
15690 enum machine_mode mode0 = insn_data[icode].operand[0].mode;
15691 enum machine_mode mode1 = insn_data[icode].operand[1].mode;
15693 if (VECTOR_MODE_P (mode1))
15694 op1 = safe_vector_operand (op1, mode1);
15696 op0 = gen_rtx_MEM (mode0, copy_to_mode_reg (Pmode, op0));
15697 op1 = copy_to_mode_reg (mode1, op1);
15699 pat = GEN_FCN (icode) (op0, op1);
15705 /* Subroutine of ix86_expand_builtin to take care of unop insns. */
15708 ix86_expand_unop_builtin (enum insn_code icode, tree arglist,
15709 rtx target, int do_load)
15712 tree arg0 = TREE_VALUE (arglist);
15713 rtx op0 = expand_normal (arg0);
15714 enum machine_mode tmode = insn_data[icode].operand[0].mode;
15715 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
15717 if (optimize || !target
15718 || GET_MODE (target) != tmode
15719 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
15720 target = gen_reg_rtx (tmode);
15722 op0 = gen_rtx_MEM (mode0, copy_to_mode_reg (Pmode, op0));
15725 if (VECTOR_MODE_P (mode0))
15726 op0 = safe_vector_operand (op0, mode0);
15728 if ((optimize && !register_operand (op0, mode0))
15729 || ! (*insn_data[icode].operand[1].predicate) (op0, mode0))
15730 op0 = copy_to_mode_reg (mode0, op0);
15733 pat = GEN_FCN (icode) (target, op0);
15740 /* Subroutine of ix86_expand_builtin to take care of three special unop insns:
15741 sqrtss, rsqrtss, rcpss. */
15744 ix86_expand_unop1_builtin (enum insn_code icode, tree arglist, rtx target)
15747 tree arg0 = TREE_VALUE (arglist);
15748 rtx op1, op0 = expand_normal (arg0);
15749 enum machine_mode tmode = insn_data[icode].operand[0].mode;
15750 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
15752 if (optimize || !target
15753 || GET_MODE (target) != tmode
15754 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
15755 target = gen_reg_rtx (tmode);
15757 if (VECTOR_MODE_P (mode0))
15758 op0 = safe_vector_operand (op0, mode0);
15760 if ((optimize && !register_operand (op0, mode0))
15761 || ! (*insn_data[icode].operand[1].predicate) (op0, mode0))
15762 op0 = copy_to_mode_reg (mode0, op0);
15765 if (! (*insn_data[icode].operand[2].predicate) (op1, mode0))
15766 op1 = copy_to_mode_reg (mode0, op1);
15768 pat = GEN_FCN (icode) (target, op0, op1);
15775 /* Subroutine of ix86_expand_builtin to take care of comparison insns. */
15778 ix86_expand_sse_compare (const struct builtin_description *d, tree arglist,
15782 tree arg0 = TREE_VALUE (arglist);
15783 tree arg1 = TREE_VALUE (TREE_CHAIN (arglist));
15784 rtx op0 = expand_normal (arg0);
15785 rtx op1 = expand_normal (arg1);
15787 enum machine_mode tmode = insn_data[d->icode].operand[0].mode;
15788 enum machine_mode mode0 = insn_data[d->icode].operand[1].mode;
15789 enum machine_mode mode1 = insn_data[d->icode].operand[2].mode;
15790 enum rtx_code comparison = d->comparison;
15792 if (VECTOR_MODE_P (mode0))
15793 op0 = safe_vector_operand (op0, mode0);
15794 if (VECTOR_MODE_P (mode1))
15795 op1 = safe_vector_operand (op1, mode1);
15797 /* Swap operands if we have a comparison that isn't available in
15799 if (d->flag & BUILTIN_DESC_SWAP_OPERANDS)
15801 rtx tmp = gen_reg_rtx (mode1);
15802 emit_move_insn (tmp, op1);
15807 if (optimize || !target
15808 || GET_MODE (target) != tmode
15809 || ! (*insn_data[d->icode].operand[0].predicate) (target, tmode))
15810 target = gen_reg_rtx (tmode);
15812 if ((optimize && !register_operand (op0, mode0))
15813 || ! (*insn_data[d->icode].operand[1].predicate) (op0, mode0))
15814 op0 = copy_to_mode_reg (mode0, op0);
15815 if ((optimize && !register_operand (op1, mode1))
15816 || ! (*insn_data[d->icode].operand[2].predicate) (op1, mode1))
15817 op1 = copy_to_mode_reg (mode1, op1);
15819 op2 = gen_rtx_fmt_ee (comparison, mode0, op0, op1);
15820 pat = GEN_FCN (d->icode) (target, op0, op1, op2);
15827 /* Subroutine of ix86_expand_builtin to take care of comi insns. */
15830 ix86_expand_sse_comi (const struct builtin_description *d, tree arglist,
15834 tree arg0 = TREE_VALUE (arglist);
15835 tree arg1 = TREE_VALUE (TREE_CHAIN (arglist));
15836 rtx op0 = expand_normal (arg0);
15837 rtx op1 = expand_normal (arg1);
15839 enum machine_mode mode0 = insn_data[d->icode].operand[0].mode;
15840 enum machine_mode mode1 = insn_data[d->icode].operand[1].mode;
15841 enum rtx_code comparison = d->comparison;
15843 if (VECTOR_MODE_P (mode0))
15844 op0 = safe_vector_operand (op0, mode0);
15845 if (VECTOR_MODE_P (mode1))
15846 op1 = safe_vector_operand (op1, mode1);
15848 /* Swap operands if we have a comparison that isn't available in
15850 if (d->flag & BUILTIN_DESC_SWAP_OPERANDS)
15857 target = gen_reg_rtx (SImode);
15858 emit_move_insn (target, const0_rtx);
15859 target = gen_rtx_SUBREG (QImode, target, 0);
15861 if ((optimize && !register_operand (op0, mode0))
15862 || !(*insn_data[d->icode].operand[0].predicate) (op0, mode0))
15863 op0 = copy_to_mode_reg (mode0, op0);
15864 if ((optimize && !register_operand (op1, mode1))
15865 || !(*insn_data[d->icode].operand[1].predicate) (op1, mode1))
15866 op1 = copy_to_mode_reg (mode1, op1);
15868 op2 = gen_rtx_fmt_ee (comparison, mode0, op0, op1);
15869 pat = GEN_FCN (d->icode) (op0, op1);
15873 emit_insn (gen_rtx_SET (VOIDmode,
15874 gen_rtx_STRICT_LOW_PART (VOIDmode, target),
15875 gen_rtx_fmt_ee (comparison, QImode,
15879 return SUBREG_REG (target);
15882 /* Return the integer constant in ARG. Constrain it to be in the range
15883 of the subparts of VEC_TYPE; issue an error if not. */
15886 get_element_number (tree vec_type, tree arg)
15888 unsigned HOST_WIDE_INT elt, max = TYPE_VECTOR_SUBPARTS (vec_type) - 1;
15890 if (!host_integerp (arg, 1)
15891 || (elt = tree_low_cst (arg, 1), elt > max))
15893 error ("selector must be an integer constant in the range 0..%wi", max);
15900 /* A subroutine of ix86_expand_builtin. These builtins are a wrapper around
15901 ix86_expand_vector_init. We DO have language-level syntax for this, in
15902 the form of (type){ init-list }. Except that since we can't place emms
15903 instructions from inside the compiler, we can't allow the use of MMX
15904 registers unless the user explicitly asks for it. So we do *not* define
15905 vec_set/vec_extract/vec_init patterns for MMX modes in mmx.md. Instead
15906 we have builtins invoked by mmintrin.h that gives us license to emit
15907 these sorts of instructions. */
15910 ix86_expand_vec_init_builtin (tree type, tree arglist, rtx target)
15912 enum machine_mode tmode = TYPE_MODE (type);
15913 enum machine_mode inner_mode = GET_MODE_INNER (tmode);
15914 int i, n_elt = GET_MODE_NUNITS (tmode);
15915 rtvec v = rtvec_alloc (n_elt);
15917 gcc_assert (VECTOR_MODE_P (tmode));
15919 for (i = 0; i < n_elt; ++i, arglist = TREE_CHAIN (arglist))
15921 rtx x = expand_normal (TREE_VALUE (arglist));
15922 RTVEC_ELT (v, i) = gen_lowpart (inner_mode, x);
15925 gcc_assert (arglist == NULL);
15927 if (!target || !register_operand (target, tmode))
15928 target = gen_reg_rtx (tmode);
15930 ix86_expand_vector_init (true, target, gen_rtx_PARALLEL (tmode, v));
15934 /* A subroutine of ix86_expand_builtin. These builtins are a wrapper around
15935 ix86_expand_vector_extract. They would be redundant (for non-MMX) if we
15936 had a language-level syntax for referencing vector elements. */
15939 ix86_expand_vec_ext_builtin (tree arglist, rtx target)
15941 enum machine_mode tmode, mode0;
15946 arg0 = TREE_VALUE (arglist);
15947 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
15949 op0 = expand_normal (arg0);
15950 elt = get_element_number (TREE_TYPE (arg0), arg1);
15952 tmode = TYPE_MODE (TREE_TYPE (TREE_TYPE (arg0)));
15953 mode0 = TYPE_MODE (TREE_TYPE (arg0));
15954 gcc_assert (VECTOR_MODE_P (mode0));
15956 op0 = force_reg (mode0, op0);
15958 if (optimize || !target || !register_operand (target, tmode))
15959 target = gen_reg_rtx (tmode);
15961 ix86_expand_vector_extract (true, target, op0, elt);
15966 /* A subroutine of ix86_expand_builtin. These builtins are a wrapper around
15967 ix86_expand_vector_set. They would be redundant (for non-MMX) if we had
15968 a language-level syntax for referencing vector elements. */
15971 ix86_expand_vec_set_builtin (tree arglist)
15973 enum machine_mode tmode, mode1;
15974 tree arg0, arg1, arg2;
15976 rtx op0, op1, target;
15978 arg0 = TREE_VALUE (arglist);
15979 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
15980 arg2 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist)));
15982 tmode = TYPE_MODE (TREE_TYPE (arg0));
15983 mode1 = TYPE_MODE (TREE_TYPE (TREE_TYPE (arg0)));
15984 gcc_assert (VECTOR_MODE_P (tmode));
15986 op0 = expand_expr (arg0, NULL_RTX, tmode, 0);
15987 op1 = expand_expr (arg1, NULL_RTX, mode1, 0);
15988 elt = get_element_number (TREE_TYPE (arg0), arg2);
15990 if (GET_MODE (op1) != mode1 && GET_MODE (op1) != VOIDmode)
15991 op1 = convert_modes (mode1, GET_MODE (op1), op1, true);
15993 op0 = force_reg (tmode, op0);
15994 op1 = force_reg (mode1, op1);
15996 /* OP0 is the source of these builtin functions and shouldn't be
15997 modified. Create a copy, use it and return it as target. */
15998 target = gen_reg_rtx (tmode);
15999 emit_move_insn (target, op0);
16000 ix86_expand_vector_set (true, target, op1, elt);
16005 /* Expand an expression EXP that calls a built-in function,
16006 with result going to TARGET if that's convenient
16007 (and in mode MODE if that's convenient).
16008 SUBTARGET may be used as the target for computing one of EXP's operands.
16009 IGNORE is nonzero if the value is to be ignored. */
16012 ix86_expand_builtin (tree exp, rtx target, rtx subtarget ATTRIBUTE_UNUSED,
16013 enum machine_mode mode ATTRIBUTE_UNUSED,
16014 int ignore ATTRIBUTE_UNUSED)
16016 const struct builtin_description *d;
16018 enum insn_code icode;
16019 tree fndecl = TREE_OPERAND (TREE_OPERAND (exp, 0), 0);
16020 tree arglist = TREE_OPERAND (exp, 1);
16021 tree arg0, arg1, arg2;
16022 rtx op0, op1, op2, pat;
16023 enum machine_mode tmode, mode0, mode1, mode2;
16024 unsigned int fcode = DECL_FUNCTION_CODE (fndecl);
16028 case IX86_BUILTIN_EMMS:
16029 emit_insn (gen_mmx_emms ());
16032 case IX86_BUILTIN_SFENCE:
16033 emit_insn (gen_sse_sfence ());
16036 case IX86_BUILTIN_MASKMOVQ:
16037 case IX86_BUILTIN_MASKMOVDQU:
16038 icode = (fcode == IX86_BUILTIN_MASKMOVQ
16039 ? CODE_FOR_mmx_maskmovq
16040 : CODE_FOR_sse2_maskmovdqu);
16041 /* Note the arg order is different from the operand order. */
16042 arg1 = TREE_VALUE (arglist);
16043 arg2 = TREE_VALUE (TREE_CHAIN (arglist));
16044 arg0 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist)));
16045 op0 = expand_normal (arg0);
16046 op1 = expand_normal (arg1);
16047 op2 = expand_normal (arg2);
16048 mode0 = insn_data[icode].operand[0].mode;
16049 mode1 = insn_data[icode].operand[1].mode;
16050 mode2 = insn_data[icode].operand[2].mode;
16052 op0 = force_reg (Pmode, op0);
16053 op0 = gen_rtx_MEM (mode1, op0);
16055 if (! (*insn_data[icode].operand[0].predicate) (op0, mode0))
16056 op0 = copy_to_mode_reg (mode0, op0);
16057 if (! (*insn_data[icode].operand[1].predicate) (op1, mode1))
16058 op1 = copy_to_mode_reg (mode1, op1);
16059 if (! (*insn_data[icode].operand[2].predicate) (op2, mode2))
16060 op2 = copy_to_mode_reg (mode2, op2);
16061 pat = GEN_FCN (icode) (op0, op1, op2);
16067 case IX86_BUILTIN_SQRTSS:
16068 return ix86_expand_unop1_builtin (CODE_FOR_sse_vmsqrtv4sf2, arglist, target);
16069 case IX86_BUILTIN_RSQRTSS:
16070 return ix86_expand_unop1_builtin (CODE_FOR_sse_vmrsqrtv4sf2, arglist, target);
16071 case IX86_BUILTIN_RCPSS:
16072 return ix86_expand_unop1_builtin (CODE_FOR_sse_vmrcpv4sf2, arglist, target);
16074 case IX86_BUILTIN_LOADUPS:
16075 return ix86_expand_unop_builtin (CODE_FOR_sse_movups, arglist, target, 1);
16077 case IX86_BUILTIN_STOREUPS:
16078 return ix86_expand_store_builtin (CODE_FOR_sse_movups, arglist);
16080 case IX86_BUILTIN_LOADHPS:
16081 case IX86_BUILTIN_LOADLPS:
16082 case IX86_BUILTIN_LOADHPD:
16083 case IX86_BUILTIN_LOADLPD:
16084 icode = (fcode == IX86_BUILTIN_LOADHPS ? CODE_FOR_sse_loadhps
16085 : fcode == IX86_BUILTIN_LOADLPS ? CODE_FOR_sse_loadlps
16086 : fcode == IX86_BUILTIN_LOADHPD ? CODE_FOR_sse2_loadhpd
16087 : CODE_FOR_sse2_loadlpd);
16088 arg0 = TREE_VALUE (arglist);
16089 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
16090 op0 = expand_normal (arg0);
16091 op1 = expand_normal (arg1);
16092 tmode = insn_data[icode].operand[0].mode;
16093 mode0 = insn_data[icode].operand[1].mode;
16094 mode1 = insn_data[icode].operand[2].mode;
16096 op0 = force_reg (mode0, op0);
16097 op1 = gen_rtx_MEM (mode1, copy_to_mode_reg (Pmode, op1));
16098 if (optimize || target == 0
16099 || GET_MODE (target) != tmode
16100 || !register_operand (target, tmode))
16101 target = gen_reg_rtx (tmode);
16102 pat = GEN_FCN (icode) (target, op0, op1);
16108 case IX86_BUILTIN_STOREHPS:
16109 case IX86_BUILTIN_STORELPS:
16110 icode = (fcode == IX86_BUILTIN_STOREHPS ? CODE_FOR_sse_storehps
16111 : CODE_FOR_sse_storelps);
16112 arg0 = TREE_VALUE (arglist);
16113 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
16114 op0 = expand_normal (arg0);
16115 op1 = expand_normal (arg1);
16116 mode0 = insn_data[icode].operand[0].mode;
16117 mode1 = insn_data[icode].operand[1].mode;
16119 op0 = gen_rtx_MEM (mode0, copy_to_mode_reg (Pmode, op0));
16120 op1 = force_reg (mode1, op1);
16122 pat = GEN_FCN (icode) (op0, op1);
16128 case IX86_BUILTIN_MOVNTPS:
16129 return ix86_expand_store_builtin (CODE_FOR_sse_movntv4sf, arglist);
16130 case IX86_BUILTIN_MOVNTQ:
16131 return ix86_expand_store_builtin (CODE_FOR_sse_movntdi, arglist);
16133 case IX86_BUILTIN_LDMXCSR:
16134 op0 = expand_normal (TREE_VALUE (arglist));
16135 target = assign_386_stack_local (SImode, SLOT_VIRTUAL);
16136 emit_move_insn (target, op0);
16137 emit_insn (gen_sse_ldmxcsr (target));
16140 case IX86_BUILTIN_STMXCSR:
16141 target = assign_386_stack_local (SImode, SLOT_VIRTUAL);
16142 emit_insn (gen_sse_stmxcsr (target));
16143 return copy_to_mode_reg (SImode, target);
16145 case IX86_BUILTIN_SHUFPS:
16146 case IX86_BUILTIN_SHUFPD:
16147 icode = (fcode == IX86_BUILTIN_SHUFPS
16148 ? CODE_FOR_sse_shufps
16149 : CODE_FOR_sse2_shufpd);
16150 arg0 = TREE_VALUE (arglist);
16151 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
16152 arg2 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist)));
16153 op0 = expand_normal (arg0);
16154 op1 = expand_normal (arg1);
16155 op2 = expand_normal (arg2);
16156 tmode = insn_data[icode].operand[0].mode;
16157 mode0 = insn_data[icode].operand[1].mode;
16158 mode1 = insn_data[icode].operand[2].mode;
16159 mode2 = insn_data[icode].operand[3].mode;
16161 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
16162 op0 = copy_to_mode_reg (mode0, op0);
16163 if ((optimize && !register_operand (op1, mode1))
16164 || !(*insn_data[icode].operand[2].predicate) (op1, mode1))
16165 op1 = copy_to_mode_reg (mode1, op1);
16166 if (! (*insn_data[icode].operand[3].predicate) (op2, mode2))
16168 /* @@@ better error message */
16169 error ("mask must be an immediate");
16170 return gen_reg_rtx (tmode);
16172 if (optimize || target == 0
16173 || GET_MODE (target) != tmode
16174 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
16175 target = gen_reg_rtx (tmode);
16176 pat = GEN_FCN (icode) (target, op0, op1, op2);
16182 case IX86_BUILTIN_PSHUFW:
16183 case IX86_BUILTIN_PSHUFD:
16184 case IX86_BUILTIN_PSHUFHW:
16185 case IX86_BUILTIN_PSHUFLW:
16186 icode = ( fcode == IX86_BUILTIN_PSHUFHW ? CODE_FOR_sse2_pshufhw
16187 : fcode == IX86_BUILTIN_PSHUFLW ? CODE_FOR_sse2_pshuflw
16188 : fcode == IX86_BUILTIN_PSHUFD ? CODE_FOR_sse2_pshufd
16189 : CODE_FOR_mmx_pshufw);
16190 arg0 = TREE_VALUE (arglist);
16191 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
16192 op0 = expand_normal (arg0);
16193 op1 = expand_normal (arg1);
16194 tmode = insn_data[icode].operand[0].mode;
16195 mode1 = insn_data[icode].operand[1].mode;
16196 mode2 = insn_data[icode].operand[2].mode;
16198 if (! (*insn_data[icode].operand[1].predicate) (op0, mode1))
16199 op0 = copy_to_mode_reg (mode1, op0);
16200 if (! (*insn_data[icode].operand[2].predicate) (op1, mode2))
16202 /* @@@ better error message */
16203 error ("mask must be an immediate");
16207 || GET_MODE (target) != tmode
16208 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
16209 target = gen_reg_rtx (tmode);
16210 pat = GEN_FCN (icode) (target, op0, op1);
16216 case IX86_BUILTIN_PSLLWI128:
16217 icode = CODE_FOR_ashlv8hi3;
16219 case IX86_BUILTIN_PSLLDI128:
16220 icode = CODE_FOR_ashlv4si3;
16222 case IX86_BUILTIN_PSLLQI128:
16223 icode = CODE_FOR_ashlv2di3;
16225 case IX86_BUILTIN_PSRAWI128:
16226 icode = CODE_FOR_ashrv8hi3;
16228 case IX86_BUILTIN_PSRADI128:
16229 icode = CODE_FOR_ashrv4si3;
16231 case IX86_BUILTIN_PSRLWI128:
16232 icode = CODE_FOR_lshrv8hi3;
16234 case IX86_BUILTIN_PSRLDI128:
16235 icode = CODE_FOR_lshrv4si3;
16237 case IX86_BUILTIN_PSRLQI128:
16238 icode = CODE_FOR_lshrv2di3;
16241 arg0 = TREE_VALUE (arglist);
16242 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
16243 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
16244 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
16246 if (GET_CODE (op1) != CONST_INT)
16248 error ("shift must be an immediate");
16251 if (INTVAL (op1) < 0 || INTVAL (op1) > 255)
16252 op1 = GEN_INT (255);
16254 tmode = insn_data[icode].operand[0].mode;
16255 mode1 = insn_data[icode].operand[1].mode;
16256 if (! (*insn_data[icode].operand[1].predicate) (op0, mode1))
16257 op0 = copy_to_reg (op0);
16259 target = gen_reg_rtx (tmode);
16260 pat = GEN_FCN (icode) (target, op0, op1);
16266 case IX86_BUILTIN_PSLLW128:
16267 icode = CODE_FOR_ashlv8hi3;
16269 case IX86_BUILTIN_PSLLD128:
16270 icode = CODE_FOR_ashlv4si3;
16272 case IX86_BUILTIN_PSLLQ128:
16273 icode = CODE_FOR_ashlv2di3;
16275 case IX86_BUILTIN_PSRAW128:
16276 icode = CODE_FOR_ashrv8hi3;
16278 case IX86_BUILTIN_PSRAD128:
16279 icode = CODE_FOR_ashrv4si3;
16281 case IX86_BUILTIN_PSRLW128:
16282 icode = CODE_FOR_lshrv8hi3;
16284 case IX86_BUILTIN_PSRLD128:
16285 icode = CODE_FOR_lshrv4si3;
16287 case IX86_BUILTIN_PSRLQ128:
16288 icode = CODE_FOR_lshrv2di3;
16291 arg0 = TREE_VALUE (arglist);
16292 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
16293 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
16294 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
16296 tmode = insn_data[icode].operand[0].mode;
16297 mode1 = insn_data[icode].operand[1].mode;
16299 if (! (*insn_data[icode].operand[1].predicate) (op0, mode1))
16300 op0 = copy_to_reg (op0);
16302 op1 = simplify_gen_subreg (TImode, op1, GET_MODE (op1), 0);
16303 if (! (*insn_data[icode].operand[2].predicate) (op1, TImode))
16304 op1 = copy_to_reg (op1);
16306 target = gen_reg_rtx (tmode);
16307 pat = GEN_FCN (icode) (target, op0, op1);
16313 case IX86_BUILTIN_PSLLDQI128:
16314 case IX86_BUILTIN_PSRLDQI128:
16315 icode = (fcode == IX86_BUILTIN_PSLLDQI128 ? CODE_FOR_sse2_ashlti3
16316 : CODE_FOR_sse2_lshrti3);
16317 arg0 = TREE_VALUE (arglist);
16318 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
16319 op0 = expand_normal (arg0);
16320 op1 = expand_normal (arg1);
16321 tmode = insn_data[icode].operand[0].mode;
16322 mode1 = insn_data[icode].operand[1].mode;
16323 mode2 = insn_data[icode].operand[2].mode;
16325 if (! (*insn_data[icode].operand[1].predicate) (op0, mode1))
16327 op0 = copy_to_reg (op0);
16328 op0 = simplify_gen_subreg (mode1, op0, GET_MODE (op0), 0);
16330 if (! (*insn_data[icode].operand[2].predicate) (op1, mode2))
16332 error ("shift must be an immediate");
16335 target = gen_reg_rtx (V2DImode);
16336 pat = GEN_FCN (icode) (simplify_gen_subreg (tmode, target, V2DImode, 0),
16343 case IX86_BUILTIN_FEMMS:
16344 emit_insn (gen_mmx_femms ());
16347 case IX86_BUILTIN_PAVGUSB:
16348 return ix86_expand_binop_builtin (CODE_FOR_mmx_uavgv8qi3, arglist, target);
16350 case IX86_BUILTIN_PF2ID:
16351 return ix86_expand_unop_builtin (CODE_FOR_mmx_pf2id, arglist, target, 0);
16353 case IX86_BUILTIN_PFACC:
16354 return ix86_expand_binop_builtin (CODE_FOR_mmx_haddv2sf3, arglist, target);
16356 case IX86_BUILTIN_PFADD:
16357 return ix86_expand_binop_builtin (CODE_FOR_mmx_addv2sf3, arglist, target);
16359 case IX86_BUILTIN_PFCMPEQ:
16360 return ix86_expand_binop_builtin (CODE_FOR_mmx_eqv2sf3, arglist, target);
16362 case IX86_BUILTIN_PFCMPGE:
16363 return ix86_expand_binop_builtin (CODE_FOR_mmx_gev2sf3, arglist, target);
16365 case IX86_BUILTIN_PFCMPGT:
16366 return ix86_expand_binop_builtin (CODE_FOR_mmx_gtv2sf3, arglist, target);
16368 case IX86_BUILTIN_PFMAX:
16369 return ix86_expand_binop_builtin (CODE_FOR_mmx_smaxv2sf3, arglist, target);
16371 case IX86_BUILTIN_PFMIN:
16372 return ix86_expand_binop_builtin (CODE_FOR_mmx_sminv2sf3, arglist, target);
16374 case IX86_BUILTIN_PFMUL:
16375 return ix86_expand_binop_builtin (CODE_FOR_mmx_mulv2sf3, arglist, target);
16377 case IX86_BUILTIN_PFRCP:
16378 return ix86_expand_unop_builtin (CODE_FOR_mmx_rcpv2sf2, arglist, target, 0);
16380 case IX86_BUILTIN_PFRCPIT1:
16381 return ix86_expand_binop_builtin (CODE_FOR_mmx_rcpit1v2sf3, arglist, target);
16383 case IX86_BUILTIN_PFRCPIT2:
16384 return ix86_expand_binop_builtin (CODE_FOR_mmx_rcpit2v2sf3, arglist, target);
16386 case IX86_BUILTIN_PFRSQIT1:
16387 return ix86_expand_binop_builtin (CODE_FOR_mmx_rsqit1v2sf3, arglist, target);
16389 case IX86_BUILTIN_PFRSQRT:
16390 return ix86_expand_unop_builtin (CODE_FOR_mmx_rsqrtv2sf2, arglist, target, 0);
16392 case IX86_BUILTIN_PFSUB:
16393 return ix86_expand_binop_builtin (CODE_FOR_mmx_subv2sf3, arglist, target);
16395 case IX86_BUILTIN_PFSUBR:
16396 return ix86_expand_binop_builtin (CODE_FOR_mmx_subrv2sf3, arglist, target);
16398 case IX86_BUILTIN_PI2FD:
16399 return ix86_expand_unop_builtin (CODE_FOR_mmx_floatv2si2, arglist, target, 0);
16401 case IX86_BUILTIN_PMULHRW:
16402 return ix86_expand_binop_builtin (CODE_FOR_mmx_pmulhrwv4hi3, arglist, target);
16404 case IX86_BUILTIN_PF2IW:
16405 return ix86_expand_unop_builtin (CODE_FOR_mmx_pf2iw, arglist, target, 0);
16407 case IX86_BUILTIN_PFNACC:
16408 return ix86_expand_binop_builtin (CODE_FOR_mmx_hsubv2sf3, arglist, target);
16410 case IX86_BUILTIN_PFPNACC:
16411 return ix86_expand_binop_builtin (CODE_FOR_mmx_addsubv2sf3, arglist, target);
16413 case IX86_BUILTIN_PI2FW:
16414 return ix86_expand_unop_builtin (CODE_FOR_mmx_pi2fw, arglist, target, 0);
16416 case IX86_BUILTIN_PSWAPDSI:
16417 return ix86_expand_unop_builtin (CODE_FOR_mmx_pswapdv2si2, arglist, target, 0);
16419 case IX86_BUILTIN_PSWAPDSF:
16420 return ix86_expand_unop_builtin (CODE_FOR_mmx_pswapdv2sf2, arglist, target, 0);
16422 case IX86_BUILTIN_SQRTSD:
16423 return ix86_expand_unop1_builtin (CODE_FOR_sse2_vmsqrtv2df2, arglist, target);
16424 case IX86_BUILTIN_LOADUPD:
16425 return ix86_expand_unop_builtin (CODE_FOR_sse2_movupd, arglist, target, 1);
16426 case IX86_BUILTIN_STOREUPD:
16427 return ix86_expand_store_builtin (CODE_FOR_sse2_movupd, arglist);
16429 case IX86_BUILTIN_MFENCE:
16430 emit_insn (gen_sse2_mfence ());
16432 case IX86_BUILTIN_LFENCE:
16433 emit_insn (gen_sse2_lfence ());
16436 case IX86_BUILTIN_CLFLUSH:
16437 arg0 = TREE_VALUE (arglist);
16438 op0 = expand_normal (arg0);
16439 icode = CODE_FOR_sse2_clflush;
16440 if (! (*insn_data[icode].operand[0].predicate) (op0, Pmode))
16441 op0 = copy_to_mode_reg (Pmode, op0);
16443 emit_insn (gen_sse2_clflush (op0));
16446 case IX86_BUILTIN_MOVNTPD:
16447 return ix86_expand_store_builtin (CODE_FOR_sse2_movntv2df, arglist);
16448 case IX86_BUILTIN_MOVNTDQ:
16449 return ix86_expand_store_builtin (CODE_FOR_sse2_movntv2di, arglist);
16450 case IX86_BUILTIN_MOVNTI:
16451 return ix86_expand_store_builtin (CODE_FOR_sse2_movntsi, arglist);
16453 case IX86_BUILTIN_LOADDQU:
16454 return ix86_expand_unop_builtin (CODE_FOR_sse2_movdqu, arglist, target, 1);
16455 case IX86_BUILTIN_STOREDQU:
16456 return ix86_expand_store_builtin (CODE_FOR_sse2_movdqu, arglist);
16458 case IX86_BUILTIN_MONITOR:
16459 arg0 = TREE_VALUE (arglist);
16460 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
16461 arg2 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist)));
16462 op0 = expand_normal (arg0);
16463 op1 = expand_normal (arg1);
16464 op2 = expand_normal (arg2);
16466 op0 = copy_to_mode_reg (Pmode, op0);
16468 op1 = copy_to_mode_reg (SImode, op1);
16470 op2 = copy_to_mode_reg (SImode, op2);
16472 emit_insn (gen_sse3_monitor (op0, op1, op2));
16474 emit_insn (gen_sse3_monitor64 (op0, op1, op2));
16477 case IX86_BUILTIN_MWAIT:
16478 arg0 = TREE_VALUE (arglist);
16479 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
16480 op0 = expand_normal (arg0);
16481 op1 = expand_normal (arg1);
16483 op0 = copy_to_mode_reg (SImode, op0);
16485 op1 = copy_to_mode_reg (SImode, op1);
16486 emit_insn (gen_sse3_mwait (op0, op1));
16489 case IX86_BUILTIN_LDDQU:
16490 return ix86_expand_unop_builtin (CODE_FOR_sse3_lddqu, arglist,
16493 case IX86_BUILTIN_VEC_INIT_V2SI:
16494 case IX86_BUILTIN_VEC_INIT_V4HI:
16495 case IX86_BUILTIN_VEC_INIT_V8QI:
16496 return ix86_expand_vec_init_builtin (TREE_TYPE (exp), arglist, target);
16498 case IX86_BUILTIN_VEC_EXT_V2DF:
16499 case IX86_BUILTIN_VEC_EXT_V2DI:
16500 case IX86_BUILTIN_VEC_EXT_V4SF:
16501 case IX86_BUILTIN_VEC_EXT_V4SI:
16502 case IX86_BUILTIN_VEC_EXT_V8HI:
16503 case IX86_BUILTIN_VEC_EXT_V16QI:
16504 case IX86_BUILTIN_VEC_EXT_V2SI:
16505 case IX86_BUILTIN_VEC_EXT_V4HI:
16506 return ix86_expand_vec_ext_builtin (arglist, target);
16508 case IX86_BUILTIN_VEC_SET_V8HI:
16509 case IX86_BUILTIN_VEC_SET_V4HI:
16510 return ix86_expand_vec_set_builtin (arglist);
16516 for (i = 0, d = bdesc_2arg; i < ARRAY_SIZE (bdesc_2arg); i++, d++)
16517 if (d->code == fcode)
16519 /* Compares are treated specially. */
16520 if (d->icode == CODE_FOR_sse_maskcmpv4sf3
16521 || d->icode == CODE_FOR_sse_vmmaskcmpv4sf3
16522 || d->icode == CODE_FOR_sse2_maskcmpv2df3
16523 || d->icode == CODE_FOR_sse2_vmmaskcmpv2df3)
16524 return ix86_expand_sse_compare (d, arglist, target);
16526 return ix86_expand_binop_builtin (d->icode, arglist, target);
16529 for (i = 0, d = bdesc_1arg; i < ARRAY_SIZE (bdesc_1arg); i++, d++)
16530 if (d->code == fcode)
16531 return ix86_expand_unop_builtin (d->icode, arglist, target, 0);
16533 for (i = 0, d = bdesc_comi; i < ARRAY_SIZE (bdesc_comi); i++, d++)
16534 if (d->code == fcode)
16535 return ix86_expand_sse_comi (d, arglist, target);
16537 gcc_unreachable ();
16540 /* Store OPERAND to the memory after reload is completed. This means
16541 that we can't easily use assign_stack_local. */
16543 ix86_force_to_memory (enum machine_mode mode, rtx operand)
16547 gcc_assert (reload_completed);
16548 if (TARGET_RED_ZONE)
16550 result = gen_rtx_MEM (mode,
16551 gen_rtx_PLUS (Pmode,
16553 GEN_INT (-RED_ZONE_SIZE)));
16554 emit_move_insn (result, operand);
16556 else if (!TARGET_RED_ZONE && TARGET_64BIT)
16562 operand = gen_lowpart (DImode, operand);
16566 gen_rtx_SET (VOIDmode,
16567 gen_rtx_MEM (DImode,
16568 gen_rtx_PRE_DEC (DImode,
16569 stack_pointer_rtx)),
16573 gcc_unreachable ();
16575 result = gen_rtx_MEM (mode, stack_pointer_rtx);
16584 split_di (&operand, 1, operands, operands + 1);
16586 gen_rtx_SET (VOIDmode,
16587 gen_rtx_MEM (SImode,
16588 gen_rtx_PRE_DEC (Pmode,
16589 stack_pointer_rtx)),
16592 gen_rtx_SET (VOIDmode,
16593 gen_rtx_MEM (SImode,
16594 gen_rtx_PRE_DEC (Pmode,
16595 stack_pointer_rtx)),
16600 /* Store HImodes as SImodes. */
16601 operand = gen_lowpart (SImode, operand);
16605 gen_rtx_SET (VOIDmode,
16606 gen_rtx_MEM (GET_MODE (operand),
16607 gen_rtx_PRE_DEC (SImode,
16608 stack_pointer_rtx)),
16612 gcc_unreachable ();
16614 result = gen_rtx_MEM (mode, stack_pointer_rtx);
16619 /* Free operand from the memory. */
16621 ix86_free_from_memory (enum machine_mode mode)
16623 if (!TARGET_RED_ZONE)
16627 if (mode == DImode || TARGET_64BIT)
16631 /* Use LEA to deallocate stack space. In peephole2 it will be converted
16632 to pop or add instruction if registers are available. */
16633 emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx,
16634 gen_rtx_PLUS (Pmode, stack_pointer_rtx,
16639 /* Put float CONST_DOUBLE in the constant pool instead of fp regs.
16640 QImode must go into class Q_REGS.
16641 Narrow ALL_REGS to GENERAL_REGS. This supports allowing movsf and
16642 movdf to do mem-to-mem moves through integer regs. */
16644 ix86_preferred_reload_class (rtx x, enum reg_class class)
16646 enum machine_mode mode = GET_MODE (x);
16648 /* We're only allowed to return a subclass of CLASS. Many of the
16649 following checks fail for NO_REGS, so eliminate that early. */
16650 if (class == NO_REGS)
16653 /* All classes can load zeros. */
16654 if (x == CONST0_RTX (mode))
16657 /* Force constants into memory if we are loading a (nonzero) constant into
16658 an MMX or SSE register. This is because there are no MMX/SSE instructions
16659 to load from a constant. */
16661 && (MAYBE_MMX_CLASS_P (class) || MAYBE_SSE_CLASS_P (class)))
16664 /* Prefer SSE regs only, if we can use them for math. */
16665 if (TARGET_SSE_MATH && !TARGET_MIX_SSE_I387 && SSE_FLOAT_MODE_P (mode))
16666 return SSE_CLASS_P (class) ? class : NO_REGS;
16668 /* Floating-point constants need more complex checks. */
16669 if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) != VOIDmode)
16671 /* General regs can load everything. */
16672 if (reg_class_subset_p (class, GENERAL_REGS))
16675 /* Floats can load 0 and 1 plus some others. Note that we eliminated
16676 zero above. We only want to wind up preferring 80387 registers if
16677 we plan on doing computation with them. */
16679 && standard_80387_constant_p (x))
16681 /* Limit class to non-sse. */
16682 if (class == FLOAT_SSE_REGS)
16684 if (class == FP_TOP_SSE_REGS)
16686 if (class == FP_SECOND_SSE_REGS)
16687 return FP_SECOND_REG;
16688 if (class == FLOAT_INT_REGS || class == FLOAT_REGS)
16695 /* Generally when we see PLUS here, it's the function invariant
16696 (plus soft-fp const_int). Which can only be computed into general
16698 if (GET_CODE (x) == PLUS)
16699 return reg_class_subset_p (class, GENERAL_REGS) ? class : NO_REGS;
16701 /* QImode constants are easy to load, but non-constant QImode data
16702 must go into Q_REGS. */
16703 if (GET_MODE (x) == QImode && !CONSTANT_P (x))
16705 if (reg_class_subset_p (class, Q_REGS))
16707 if (reg_class_subset_p (Q_REGS, class))
16715 /* Discourage putting floating-point values in SSE registers unless
16716 SSE math is being used, and likewise for the 387 registers. */
16718 ix86_preferred_output_reload_class (rtx x, enum reg_class class)
16720 enum machine_mode mode = GET_MODE (x);
16722 /* Restrict the output reload class to the register bank that we are doing
16723 math on. If we would like not to return a subset of CLASS, reject this
16724 alternative: if reload cannot do this, it will still use its choice. */
16725 mode = GET_MODE (x);
16726 if (TARGET_SSE_MATH && SSE_FLOAT_MODE_P (mode))
16727 return MAYBE_SSE_CLASS_P (class) ? SSE_REGS : NO_REGS;
16729 if (TARGET_80387 && SCALAR_FLOAT_MODE_P (mode))
16731 if (class == FP_TOP_SSE_REGS)
16733 else if (class == FP_SECOND_SSE_REGS)
16734 return FP_SECOND_REG;
16736 return FLOAT_CLASS_P (class) ? class : NO_REGS;
16742 /* If we are copying between general and FP registers, we need a memory
16743 location. The same is true for SSE and MMX registers.
16745 The macro can't work reliably when one of the CLASSES is class containing
16746 registers from multiple units (SSE, MMX, integer). We avoid this by never
16747 combining those units in single alternative in the machine description.
16748 Ensure that this constraint holds to avoid unexpected surprises.
16750 When STRICT is false, we are being called from REGISTER_MOVE_COST, so do not
16751 enforce these sanity checks. */
16754 ix86_secondary_memory_needed (enum reg_class class1, enum reg_class class2,
16755 enum machine_mode mode, int strict)
16757 if (MAYBE_FLOAT_CLASS_P (class1) != FLOAT_CLASS_P (class1)
16758 || MAYBE_FLOAT_CLASS_P (class2) != FLOAT_CLASS_P (class2)
16759 || MAYBE_SSE_CLASS_P (class1) != SSE_CLASS_P (class1)
16760 || MAYBE_SSE_CLASS_P (class2) != SSE_CLASS_P (class2)
16761 || MAYBE_MMX_CLASS_P (class1) != MMX_CLASS_P (class1)
16762 || MAYBE_MMX_CLASS_P (class2) != MMX_CLASS_P (class2))
16764 gcc_assert (!strict);
16768 if (FLOAT_CLASS_P (class1) != FLOAT_CLASS_P (class2))
16771 /* ??? This is a lie. We do have moves between mmx/general, and for
16772 mmx/sse2. But by saying we need secondary memory we discourage the
16773 register allocator from using the mmx registers unless needed. */
16774 if (MMX_CLASS_P (class1) != MMX_CLASS_P (class2))
16777 if (SSE_CLASS_P (class1) != SSE_CLASS_P (class2))
16779 /* SSE1 doesn't have any direct moves from other classes. */
16783 /* If the target says that inter-unit moves are more expensive
16784 than moving through memory, then don't generate them. */
16785 if (!TARGET_INTER_UNIT_MOVES && !optimize_size)
16788 /* Between SSE and general, we have moves no larger than word size. */
16789 if (GET_MODE_SIZE (mode) > UNITS_PER_WORD)
16792 /* ??? For the cost of one register reformat penalty, we could use
16793 the same instructions to move SFmode and DFmode data, but the
16794 relevant move patterns don't support those alternatives. */
16795 if (mode == SFmode || mode == DFmode)
16802 /* Return true if the registers in CLASS cannot represent the change from
16803 modes FROM to TO. */
16806 ix86_cannot_change_mode_class (enum machine_mode from, enum machine_mode to,
16807 enum reg_class class)
16812 /* x87 registers can't do subreg at all, as all values are reformatted
16813 to extended precision. */
16814 if (MAYBE_FLOAT_CLASS_P (class))
16817 if (MAYBE_SSE_CLASS_P (class) || MAYBE_MMX_CLASS_P (class))
16819 /* Vector registers do not support QI or HImode loads. If we don't
16820 disallow a change to these modes, reload will assume it's ok to
16821 drop the subreg from (subreg:SI (reg:HI 100) 0). This affects
16822 the vec_dupv4hi pattern. */
16823 if (GET_MODE_SIZE (from) < 4)
16826 /* Vector registers do not support subreg with nonzero offsets, which
16827 are otherwise valid for integer registers. Since we can't see
16828 whether we have a nonzero offset from here, prohibit all
16829 nonparadoxical subregs changing size. */
16830 if (GET_MODE_SIZE (to) < GET_MODE_SIZE (from))
16837 /* Return the cost of moving data from a register in class CLASS1 to
16838 one in class CLASS2.
16840 It is not required that the cost always equal 2 when FROM is the same as TO;
16841 on some machines it is expensive to move between registers if they are not
16842 general registers. */
16845 ix86_register_move_cost (enum machine_mode mode, enum reg_class class1,
16846 enum reg_class class2)
16848 /* In case we require secondary memory, compute cost of the store followed
16849 by load. In order to avoid bad register allocation choices, we need
16850 for this to be *at least* as high as the symmetric MEMORY_MOVE_COST. */
16852 if (ix86_secondary_memory_needed (class1, class2, mode, 0))
16856 cost += MAX (MEMORY_MOVE_COST (mode, class1, 0),
16857 MEMORY_MOVE_COST (mode, class1, 1));
16858 cost += MAX (MEMORY_MOVE_COST (mode, class2, 0),
16859 MEMORY_MOVE_COST (mode, class2, 1));
16861 /* In case of copying from general_purpose_register we may emit multiple
16862 stores followed by single load causing memory size mismatch stall.
16863 Count this as arbitrarily high cost of 20. */
16864 if (CLASS_MAX_NREGS (class1, mode) > CLASS_MAX_NREGS (class2, mode))
16867 /* In the case of FP/MMX moves, the registers actually overlap, and we
16868 have to switch modes in order to treat them differently. */
16869 if ((MMX_CLASS_P (class1) && MAYBE_FLOAT_CLASS_P (class2))
16870 || (MMX_CLASS_P (class2) && MAYBE_FLOAT_CLASS_P (class1)))
16876 /* Moves between SSE/MMX and integer unit are expensive. */
16877 if (MMX_CLASS_P (class1) != MMX_CLASS_P (class2)
16878 || SSE_CLASS_P (class1) != SSE_CLASS_P (class2))
16879 return ix86_cost->mmxsse_to_integer;
16880 if (MAYBE_FLOAT_CLASS_P (class1))
16881 return ix86_cost->fp_move;
16882 if (MAYBE_SSE_CLASS_P (class1))
16883 return ix86_cost->sse_move;
16884 if (MAYBE_MMX_CLASS_P (class1))
16885 return ix86_cost->mmx_move;
16889 /* Return 1 if hard register REGNO can hold a value of machine-mode MODE. */
16892 ix86_hard_regno_mode_ok (int regno, enum machine_mode mode)
16894 /* Flags and only flags can only hold CCmode values. */
16895 if (CC_REGNO_P (regno))
16896 return GET_MODE_CLASS (mode) == MODE_CC;
16897 if (GET_MODE_CLASS (mode) == MODE_CC
16898 || GET_MODE_CLASS (mode) == MODE_RANDOM
16899 || GET_MODE_CLASS (mode) == MODE_PARTIAL_INT)
16901 if (FP_REGNO_P (regno))
16902 return VALID_FP_MODE_P (mode);
16903 if (SSE_REGNO_P (regno))
16905 /* We implement the move patterns for all vector modes into and
16906 out of SSE registers, even when no operation instructions
16908 return (VALID_SSE_REG_MODE (mode)
16909 || VALID_SSE2_REG_MODE (mode)
16910 || VALID_MMX_REG_MODE (mode)
16911 || VALID_MMX_REG_MODE_3DNOW (mode));
16913 if (MMX_REGNO_P (regno))
16915 /* We implement the move patterns for 3DNOW modes even in MMX mode,
16916 so if the register is available at all, then we can move data of
16917 the given mode into or out of it. */
16918 return (VALID_MMX_REG_MODE (mode)
16919 || VALID_MMX_REG_MODE_3DNOW (mode));
16922 if (mode == QImode)
16924 /* Take care for QImode values - they can be in non-QI regs,
16925 but then they do cause partial register stalls. */
16926 if (regno < 4 || TARGET_64BIT)
16928 if (!TARGET_PARTIAL_REG_STALL)
16930 return reload_in_progress || reload_completed;
16932 /* We handle both integer and floats in the general purpose registers. */
16933 else if (VALID_INT_MODE_P (mode))
16935 else if (VALID_FP_MODE_P (mode))
16937 /* Lots of MMX code casts 8 byte vector modes to DImode. If we then go
16938 on to use that value in smaller contexts, this can easily force a
16939 pseudo to be allocated to GENERAL_REGS. Since this is no worse than
16940 supporting DImode, allow it. */
16941 else if (VALID_MMX_REG_MODE_3DNOW (mode) || VALID_MMX_REG_MODE (mode))
16947 /* A subroutine of ix86_modes_tieable_p. Return true if MODE is a
16948 tieable integer mode. */
16951 ix86_tieable_integer_mode_p (enum machine_mode mode)
16960 return TARGET_64BIT || !TARGET_PARTIAL_REG_STALL;
16963 return TARGET_64BIT;
16970 /* Return true if MODE1 is accessible in a register that can hold MODE2
16971 without copying. That is, all register classes that can hold MODE2
16972 can also hold MODE1. */
16975 ix86_modes_tieable_p (enum machine_mode mode1, enum machine_mode mode2)
16977 if (mode1 == mode2)
16980 if (ix86_tieable_integer_mode_p (mode1)
16981 && ix86_tieable_integer_mode_p (mode2))
16984 /* MODE2 being XFmode implies fp stack or general regs, which means we
16985 can tie any smaller floating point modes to it. Note that we do not
16986 tie this with TFmode. */
16987 if (mode2 == XFmode)
16988 return mode1 == SFmode || mode1 == DFmode;
16990 /* MODE2 being DFmode implies fp stack, general or sse regs, which means
16991 that we can tie it with SFmode. */
16992 if (mode2 == DFmode)
16993 return mode1 == SFmode;
16995 /* If MODE2 is only appropriate for an SSE register, then tie with
16996 any other mode acceptable to SSE registers. */
16997 if (GET_MODE_SIZE (mode2) >= 8
16998 && ix86_hard_regno_mode_ok (FIRST_SSE_REG, mode2))
16999 return ix86_hard_regno_mode_ok (FIRST_SSE_REG, mode1);
17001 /* If MODE2 is appropriate for an MMX (or SSE) register, then tie
17002 with any other mode acceptable to MMX registers. */
17003 if (GET_MODE_SIZE (mode2) == 8
17004 && ix86_hard_regno_mode_ok (FIRST_MMX_REG, mode2))
17005 return ix86_hard_regno_mode_ok (FIRST_MMX_REG, mode1);
17010 /* Return the cost of moving data of mode M between a
17011 register and memory. A value of 2 is the default; this cost is
17012 relative to those in `REGISTER_MOVE_COST'.
17014 If moving between registers and memory is more expensive than
17015 between two registers, you should define this macro to express the
17018 Model also increased moving costs of QImode registers in non
17022 ix86_memory_move_cost (enum machine_mode mode, enum reg_class class, int in)
17024 if (FLOAT_CLASS_P (class))
17041 return in ? ix86_cost->fp_load [index] : ix86_cost->fp_store [index];
17043 if (SSE_CLASS_P (class))
17046 switch (GET_MODE_SIZE (mode))
17060 return in ? ix86_cost->sse_load [index] : ix86_cost->sse_store [index];
17062 if (MMX_CLASS_P (class))
17065 switch (GET_MODE_SIZE (mode))
17076 return in ? ix86_cost->mmx_load [index] : ix86_cost->mmx_store [index];
17078 switch (GET_MODE_SIZE (mode))
17082 return (Q_CLASS_P (class) ? ix86_cost->int_load[0]
17083 : ix86_cost->movzbl_load);
17085 return (Q_CLASS_P (class) ? ix86_cost->int_store[0]
17086 : ix86_cost->int_store[0] + 4);
17089 return in ? ix86_cost->int_load[1] : ix86_cost->int_store[1];
17091 /* Compute number of 32bit moves needed. TFmode is moved as XFmode. */
17092 if (mode == TFmode)
17094 return ((in ? ix86_cost->int_load[2] : ix86_cost->int_store[2])
17095 * (((int) GET_MODE_SIZE (mode)
17096 + UNITS_PER_WORD - 1) / UNITS_PER_WORD));
17100 /* Compute a (partial) cost for rtx X. Return true if the complete
17101 cost has been computed, and false if subexpressions should be
17102 scanned. In either case, *TOTAL contains the cost result. */
17105 ix86_rtx_costs (rtx x, int code, int outer_code, int *total)
17107 enum machine_mode mode = GET_MODE (x);
17115 if (TARGET_64BIT && !x86_64_immediate_operand (x, VOIDmode))
17117 else if (TARGET_64BIT && !x86_64_zext_immediate_operand (x, VOIDmode))
17119 else if (flag_pic && SYMBOLIC_CONST (x)
17121 || (!GET_CODE (x) != LABEL_REF
17122 && (GET_CODE (x) != SYMBOL_REF
17123 || !SYMBOL_REF_LOCAL_P (x)))))
17130 if (mode == VOIDmode)
17133 switch (standard_80387_constant_p (x))
17138 default: /* Other constants */
17143 /* Start with (MEM (SYMBOL_REF)), since that's where
17144 it'll probably end up. Add a penalty for size. */
17145 *total = (COSTS_N_INSNS (1)
17146 + (flag_pic != 0 && !TARGET_64BIT)
17147 + (mode == SFmode ? 0 : mode == DFmode ? 1 : 2));
17153 /* The zero extensions is often completely free on x86_64, so make
17154 it as cheap as possible. */
17155 if (TARGET_64BIT && mode == DImode
17156 && GET_MODE (XEXP (x, 0)) == SImode)
17158 else if (TARGET_ZERO_EXTEND_WITH_AND)
17159 *total = ix86_cost->add;
17161 *total = ix86_cost->movzx;
17165 *total = ix86_cost->movsx;
17169 if (GET_CODE (XEXP (x, 1)) == CONST_INT
17170 && (GET_MODE (XEXP (x, 0)) != DImode || TARGET_64BIT))
17172 HOST_WIDE_INT value = INTVAL (XEXP (x, 1));
17175 *total = ix86_cost->add;
17178 if ((value == 2 || value == 3)
17179 && ix86_cost->lea <= ix86_cost->shift_const)
17181 *total = ix86_cost->lea;
17191 if (!TARGET_64BIT && GET_MODE (XEXP (x, 0)) == DImode)
17193 if (GET_CODE (XEXP (x, 1)) == CONST_INT)
17195 if (INTVAL (XEXP (x, 1)) > 32)
17196 *total = ix86_cost->shift_const + COSTS_N_INSNS (2);
17198 *total = ix86_cost->shift_const * 2;
17202 if (GET_CODE (XEXP (x, 1)) == AND)
17203 *total = ix86_cost->shift_var * 2;
17205 *total = ix86_cost->shift_var * 6 + COSTS_N_INSNS (2);
17210 if (GET_CODE (XEXP (x, 1)) == CONST_INT)
17211 *total = ix86_cost->shift_const;
17213 *total = ix86_cost->shift_var;
17218 if (FLOAT_MODE_P (mode))
17220 *total = ix86_cost->fmul;
17225 rtx op0 = XEXP (x, 0);
17226 rtx op1 = XEXP (x, 1);
17228 if (GET_CODE (XEXP (x, 1)) == CONST_INT)
17230 unsigned HOST_WIDE_INT value = INTVAL (XEXP (x, 1));
17231 for (nbits = 0; value != 0; value &= value - 1)
17235 /* This is arbitrary. */
17238 /* Compute costs correctly for widening multiplication. */
17239 if ((GET_CODE (op0) == SIGN_EXTEND || GET_CODE (op1) == ZERO_EXTEND)
17240 && GET_MODE_SIZE (GET_MODE (XEXP (op0, 0))) * 2
17241 == GET_MODE_SIZE (mode))
17243 int is_mulwiden = 0;
17244 enum machine_mode inner_mode = GET_MODE (op0);
17246 if (GET_CODE (op0) == GET_CODE (op1))
17247 is_mulwiden = 1, op1 = XEXP (op1, 0);
17248 else if (GET_CODE (op1) == CONST_INT)
17250 if (GET_CODE (op0) == SIGN_EXTEND)
17251 is_mulwiden = trunc_int_for_mode (INTVAL (op1), inner_mode)
17254 is_mulwiden = !(INTVAL (op1) & ~GET_MODE_MASK (inner_mode));
17258 op0 = XEXP (op0, 0), mode = GET_MODE (op0);
17261 *total = (ix86_cost->mult_init[MODE_INDEX (mode)]
17262 + nbits * ix86_cost->mult_bit
17263 + rtx_cost (op0, outer_code) + rtx_cost (op1, outer_code));
17272 if (FLOAT_MODE_P (mode))
17273 *total = ix86_cost->fdiv;
17275 *total = ix86_cost->divide[MODE_INDEX (mode)];
17279 if (FLOAT_MODE_P (mode))
17280 *total = ix86_cost->fadd;
17281 else if (GET_MODE_CLASS (mode) == MODE_INT
17282 && GET_MODE_BITSIZE (mode) <= GET_MODE_BITSIZE (Pmode))
17284 if (GET_CODE (XEXP (x, 0)) == PLUS
17285 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
17286 && GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 1)) == CONST_INT
17287 && CONSTANT_P (XEXP (x, 1)))
17289 HOST_WIDE_INT val = INTVAL (XEXP (XEXP (XEXP (x, 0), 0), 1));
17290 if (val == 2 || val == 4 || val == 8)
17292 *total = ix86_cost->lea;
17293 *total += rtx_cost (XEXP (XEXP (x, 0), 1), outer_code);
17294 *total += rtx_cost (XEXP (XEXP (XEXP (x, 0), 0), 0),
17296 *total += rtx_cost (XEXP (x, 1), outer_code);
17300 else if (GET_CODE (XEXP (x, 0)) == MULT
17301 && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT)
17303 HOST_WIDE_INT val = INTVAL (XEXP (XEXP (x, 0), 1));
17304 if (val == 2 || val == 4 || val == 8)
17306 *total = ix86_cost->lea;
17307 *total += rtx_cost (XEXP (XEXP (x, 0), 0), outer_code);
17308 *total += rtx_cost (XEXP (x, 1), outer_code);
17312 else if (GET_CODE (XEXP (x, 0)) == PLUS)
17314 *total = ix86_cost->lea;
17315 *total += rtx_cost (XEXP (XEXP (x, 0), 0), outer_code);
17316 *total += rtx_cost (XEXP (XEXP (x, 0), 1), outer_code);
17317 *total += rtx_cost (XEXP (x, 1), outer_code);
17324 if (FLOAT_MODE_P (mode))
17326 *total = ix86_cost->fadd;
17334 if (!TARGET_64BIT && mode == DImode)
17336 *total = (ix86_cost->add * 2
17337 + (rtx_cost (XEXP (x, 0), outer_code)
17338 << (GET_MODE (XEXP (x, 0)) != DImode))
17339 + (rtx_cost (XEXP (x, 1), outer_code)
17340 << (GET_MODE (XEXP (x, 1)) != DImode)));
17346 if (FLOAT_MODE_P (mode))
17348 *total = ix86_cost->fchs;
17354 if (!TARGET_64BIT && mode == DImode)
17355 *total = ix86_cost->add * 2;
17357 *total = ix86_cost->add;
17361 if (GET_CODE (XEXP (x, 0)) == ZERO_EXTRACT
17362 && XEXP (XEXP (x, 0), 1) == const1_rtx
17363 && GET_CODE (XEXP (XEXP (x, 0), 2)) == CONST_INT
17364 && XEXP (x, 1) == const0_rtx)
17366 /* This kind of construct is implemented using test[bwl].
17367 Treat it as if we had an AND. */
17368 *total = (ix86_cost->add
17369 + rtx_cost (XEXP (XEXP (x, 0), 0), outer_code)
17370 + rtx_cost (const1_rtx, outer_code));
17376 if (!TARGET_SSE_MATH
17378 || (mode == DFmode && !TARGET_SSE2))
17379 /* For standard 80387 constants, raise the cost to prevent
17380 compress_float_constant() to generate load from memory. */
17381 switch (standard_80387_constant_p (XEXP (x, 0)))
17391 *total = (x86_ext_80387_constants & TUNEMASK
17398 if (FLOAT_MODE_P (mode))
17399 *total = ix86_cost->fabs;
17403 if (FLOAT_MODE_P (mode))
17404 *total = ix86_cost->fsqrt;
17408 if (XINT (x, 1) == UNSPEC_TP)
17419 static int current_machopic_label_num;
17421 /* Given a symbol name and its associated stub, write out the
17422 definition of the stub. */
17425 machopic_output_stub (FILE *file, const char *symb, const char *stub)
17427 unsigned int length;
17428 char *binder_name, *symbol_name, lazy_ptr_name[32];
17429 int label = ++current_machopic_label_num;
17431 /* For 64-bit we shouldn't get here. */
17432 gcc_assert (!TARGET_64BIT);
17434 /* Lose our funky encoding stuff so it doesn't contaminate the stub. */
17435 symb = (*targetm.strip_name_encoding) (symb);
17437 length = strlen (stub);
17438 binder_name = alloca (length + 32);
17439 GEN_BINDER_NAME_FOR_STUB (binder_name, stub, length);
17441 length = strlen (symb);
17442 symbol_name = alloca (length + 32);
17443 GEN_SYMBOL_NAME_FOR_SYMBOL (symbol_name, symb, length);
17445 sprintf (lazy_ptr_name, "L%d$lz", label);
17448 switch_to_section (darwin_sections[machopic_picsymbol_stub_section]);
17450 switch_to_section (darwin_sections[machopic_symbol_stub_section]);
17452 fprintf (file, "%s:\n", stub);
17453 fprintf (file, "\t.indirect_symbol %s\n", symbol_name);
17457 fprintf (file, "\tcall\tLPC$%d\nLPC$%d:\tpopl\t%%eax\n", label, label);
17458 fprintf (file, "\tmovl\t%s-LPC$%d(%%eax),%%edx\n", lazy_ptr_name, label);
17459 fprintf (file, "\tjmp\t*%%edx\n");
17462 fprintf (file, "\tjmp\t*%s\n", lazy_ptr_name);
17464 fprintf (file, "%s:\n", binder_name);
17468 fprintf (file, "\tlea\t%s-LPC$%d(%%eax),%%eax\n", lazy_ptr_name, label);
17469 fprintf (file, "\tpushl\t%%eax\n");
17472 fprintf (file, "\tpushl\t$%s\n", lazy_ptr_name);
17474 fprintf (file, "\tjmp\tdyld_stub_binding_helper\n");
17476 switch_to_section (darwin_sections[machopic_lazy_symbol_ptr_section]);
17477 fprintf (file, "%s:\n", lazy_ptr_name);
17478 fprintf (file, "\t.indirect_symbol %s\n", symbol_name);
17479 fprintf (file, "\t.long %s\n", binder_name);
17483 darwin_x86_file_end (void)
17485 darwin_file_end ();
17488 #endif /* TARGET_MACHO */
17490 /* Order the registers for register allocator. */
17493 x86_order_regs_for_local_alloc (void)
17498 /* First allocate the local general purpose registers. */
17499 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
17500 if (GENERAL_REGNO_P (i) && call_used_regs[i])
17501 reg_alloc_order [pos++] = i;
17503 /* Global general purpose registers. */
17504 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
17505 if (GENERAL_REGNO_P (i) && !call_used_regs[i])
17506 reg_alloc_order [pos++] = i;
17508 /* x87 registers come first in case we are doing FP math
17510 if (!TARGET_SSE_MATH)
17511 for (i = FIRST_STACK_REG; i <= LAST_STACK_REG; i++)
17512 reg_alloc_order [pos++] = i;
17514 /* SSE registers. */
17515 for (i = FIRST_SSE_REG; i <= LAST_SSE_REG; i++)
17516 reg_alloc_order [pos++] = i;
17517 for (i = FIRST_REX_SSE_REG; i <= LAST_REX_SSE_REG; i++)
17518 reg_alloc_order [pos++] = i;
17520 /* x87 registers. */
17521 if (TARGET_SSE_MATH)
17522 for (i = FIRST_STACK_REG; i <= LAST_STACK_REG; i++)
17523 reg_alloc_order [pos++] = i;
17525 for (i = FIRST_MMX_REG; i <= LAST_MMX_REG; i++)
17526 reg_alloc_order [pos++] = i;
17528 /* Initialize the rest of array as we do not allocate some registers
17530 while (pos < FIRST_PSEUDO_REGISTER)
17531 reg_alloc_order [pos++] = 0;
17534 /* Handle a "ms_struct" or "gcc_struct" attribute; arguments as in
17535 struct attribute_spec.handler. */
17537 ix86_handle_struct_attribute (tree *node, tree name,
17538 tree args ATTRIBUTE_UNUSED,
17539 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
17542 if (DECL_P (*node))
17544 if (TREE_CODE (*node) == TYPE_DECL)
17545 type = &TREE_TYPE (*node);
17550 if (!(type && (TREE_CODE (*type) == RECORD_TYPE
17551 || TREE_CODE (*type) == UNION_TYPE)))
17553 warning (OPT_Wattributes, "%qs attribute ignored",
17554 IDENTIFIER_POINTER (name));
17555 *no_add_attrs = true;
17558 else if ((is_attribute_p ("ms_struct", name)
17559 && lookup_attribute ("gcc_struct", TYPE_ATTRIBUTES (*type)))
17560 || ((is_attribute_p ("gcc_struct", name)
17561 && lookup_attribute ("ms_struct", TYPE_ATTRIBUTES (*type)))))
17563 warning (OPT_Wattributes, "%qs incompatible attribute ignored",
17564 IDENTIFIER_POINTER (name));
17565 *no_add_attrs = true;
17572 ix86_ms_bitfield_layout_p (tree record_type)
17574 return (TARGET_MS_BITFIELD_LAYOUT &&
17575 !lookup_attribute ("gcc_struct", TYPE_ATTRIBUTES (record_type)))
17576 || lookup_attribute ("ms_struct", TYPE_ATTRIBUTES (record_type));
17579 /* Returns an expression indicating where the this parameter is
17580 located on entry to the FUNCTION. */
17583 x86_this_parameter (tree function)
17585 tree type = TREE_TYPE (function);
17589 int n = aggregate_value_p (TREE_TYPE (type), type) != 0;
17590 return gen_rtx_REG (DImode, x86_64_int_parameter_registers[n]);
17593 if (ix86_function_regparm (type, function) > 0)
17597 parm = TYPE_ARG_TYPES (type);
17598 /* Figure out whether or not the function has a variable number of
17600 for (; parm; parm = TREE_CHAIN (parm))
17601 if (TREE_VALUE (parm) == void_type_node)
17603 /* If not, the this parameter is in the first argument. */
17607 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type)))
17609 return gen_rtx_REG (SImode, regno);
17613 if (aggregate_value_p (TREE_TYPE (type), type))
17614 return gen_rtx_MEM (SImode, plus_constant (stack_pointer_rtx, 8));
17616 return gen_rtx_MEM (SImode, plus_constant (stack_pointer_rtx, 4));
17619 /* Determine whether x86_output_mi_thunk can succeed. */
17622 x86_can_output_mi_thunk (tree thunk ATTRIBUTE_UNUSED,
17623 HOST_WIDE_INT delta ATTRIBUTE_UNUSED,
17624 HOST_WIDE_INT vcall_offset, tree function)
17626 /* 64-bit can handle anything. */
17630 /* For 32-bit, everything's fine if we have one free register. */
17631 if (ix86_function_regparm (TREE_TYPE (function), function) < 3)
17634 /* Need a free register for vcall_offset. */
17638 /* Need a free register for GOT references. */
17639 if (flag_pic && !(*targetm.binds_local_p) (function))
17642 /* Otherwise ok. */
17646 /* Output the assembler code for a thunk function. THUNK_DECL is the
17647 declaration for the thunk function itself, FUNCTION is the decl for
17648 the target function. DELTA is an immediate constant offset to be
17649 added to THIS. If VCALL_OFFSET is nonzero, the word at
17650 *(*this + vcall_offset) should be added to THIS. */
17653 x86_output_mi_thunk (FILE *file ATTRIBUTE_UNUSED,
17654 tree thunk ATTRIBUTE_UNUSED, HOST_WIDE_INT delta,
17655 HOST_WIDE_INT vcall_offset, tree function)
17658 rtx this = x86_this_parameter (function);
17661 /* If VCALL_OFFSET, we'll need THIS in a register. Might as well
17662 pull it in now and let DELTA benefit. */
17665 else if (vcall_offset)
17667 /* Put the this parameter into %eax. */
17669 xops[1] = this_reg = gen_rtx_REG (Pmode, 0);
17670 output_asm_insn ("mov{l}\t{%0, %1|%1, %0}", xops);
17673 this_reg = NULL_RTX;
17675 /* Adjust the this parameter by a fixed constant. */
17678 xops[0] = GEN_INT (delta);
17679 xops[1] = this_reg ? this_reg : this;
17682 if (!x86_64_general_operand (xops[0], DImode))
17684 tmp = gen_rtx_REG (DImode, FIRST_REX_INT_REG + 2 /* R10 */);
17686 output_asm_insn ("mov{q}\t{%1, %0|%0, %1}", xops);
17690 output_asm_insn ("add{q}\t{%0, %1|%1, %0}", xops);
17693 output_asm_insn ("add{l}\t{%0, %1|%1, %0}", xops);
17696 /* Adjust the this parameter by a value stored in the vtable. */
17700 tmp = gen_rtx_REG (DImode, FIRST_REX_INT_REG + 2 /* R10 */);
17703 int tmp_regno = 2 /* ECX */;
17704 if (lookup_attribute ("fastcall",
17705 TYPE_ATTRIBUTES (TREE_TYPE (function))))
17706 tmp_regno = 0 /* EAX */;
17707 tmp = gen_rtx_REG (SImode, tmp_regno);
17710 xops[0] = gen_rtx_MEM (Pmode, this_reg);
17713 output_asm_insn ("mov{q}\t{%0, %1|%1, %0}", xops);
17715 output_asm_insn ("mov{l}\t{%0, %1|%1, %0}", xops);
17717 /* Adjust the this parameter. */
17718 xops[0] = gen_rtx_MEM (Pmode, plus_constant (tmp, vcall_offset));
17719 if (TARGET_64BIT && !memory_operand (xops[0], Pmode))
17721 rtx tmp2 = gen_rtx_REG (DImode, FIRST_REX_INT_REG + 3 /* R11 */);
17722 xops[0] = GEN_INT (vcall_offset);
17724 output_asm_insn ("mov{q}\t{%0, %1|%1, %0}", xops);
17725 xops[0] = gen_rtx_MEM (Pmode, gen_rtx_PLUS (Pmode, tmp, tmp2));
17727 xops[1] = this_reg;
17729 output_asm_insn ("add{q}\t{%0, %1|%1, %0}", xops);
17731 output_asm_insn ("add{l}\t{%0, %1|%1, %0}", xops);
17734 /* If necessary, drop THIS back to its stack slot. */
17735 if (this_reg && this_reg != this)
17737 xops[0] = this_reg;
17739 output_asm_insn ("mov{l}\t{%0, %1|%1, %0}", xops);
17742 xops[0] = XEXP (DECL_RTL (function), 0);
17745 if (!flag_pic || (*targetm.binds_local_p) (function))
17746 output_asm_insn ("jmp\t%P0", xops);
17749 tmp = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, xops[0]), UNSPEC_GOTPCREL);
17750 tmp = gen_rtx_CONST (Pmode, tmp);
17751 tmp = gen_rtx_MEM (QImode, tmp);
17753 output_asm_insn ("jmp\t%A0", xops);
17758 if (!flag_pic || (*targetm.binds_local_p) (function))
17759 output_asm_insn ("jmp\t%P0", xops);
17764 rtx sym_ref = XEXP (DECL_RTL (function), 0);
17765 tmp = (gen_rtx_SYMBOL_REF
17767 machopic_indirection_name (sym_ref, /*stub_p=*/true)));
17768 tmp = gen_rtx_MEM (QImode, tmp);
17770 output_asm_insn ("jmp\t%0", xops);
17773 #endif /* TARGET_MACHO */
17775 tmp = gen_rtx_REG (SImode, 2 /* ECX */);
17776 output_set_got (tmp, NULL_RTX);
17779 output_asm_insn ("mov{l}\t{%0@GOT(%1), %1|%1, %0@GOT[%1]}", xops);
17780 output_asm_insn ("jmp\t{*}%1", xops);
17786 x86_file_start (void)
17788 default_file_start ();
17790 darwin_file_start ();
17792 if (X86_FILE_START_VERSION_DIRECTIVE)
17793 fputs ("\t.version\t\"01.01\"\n", asm_out_file);
17794 if (X86_FILE_START_FLTUSED)
17795 fputs ("\t.global\t__fltused\n", asm_out_file);
17796 if (ix86_asm_dialect == ASM_INTEL)
17797 fputs ("\t.intel_syntax\n", asm_out_file);
17801 x86_field_alignment (tree field, int computed)
17803 enum machine_mode mode;
17804 tree type = TREE_TYPE (field);
17806 if (TARGET_64BIT || TARGET_ALIGN_DOUBLE)
17808 mode = TYPE_MODE (TREE_CODE (type) == ARRAY_TYPE
17809 ? get_inner_array_type (type) : type);
17810 if (mode == DFmode || mode == DCmode
17811 || GET_MODE_CLASS (mode) == MODE_INT
17812 || GET_MODE_CLASS (mode) == MODE_COMPLEX_INT)
17813 return MIN (32, computed);
17817 /* Output assembler code to FILE to increment profiler label # LABELNO
17818 for profiling a function entry. */
17820 x86_function_profiler (FILE *file, int labelno ATTRIBUTE_UNUSED)
17825 #ifndef NO_PROFILE_COUNTERS
17826 fprintf (file, "\tleaq\t%sP%d@(%%rip),%%r11\n", LPREFIX, labelno);
17828 fprintf (file, "\tcall\t*%s@GOTPCREL(%%rip)\n", MCOUNT_NAME);
17832 #ifndef NO_PROFILE_COUNTERS
17833 fprintf (file, "\tmovq\t$%sP%d,%%r11\n", LPREFIX, labelno);
17835 fprintf (file, "\tcall\t%s\n", MCOUNT_NAME);
17839 #ifndef NO_PROFILE_COUNTERS
17840 fprintf (file, "\tleal\t%sP%d@GOTOFF(%%ebx),%%%s\n",
17841 LPREFIX, labelno, PROFILE_COUNT_REGISTER);
17843 fprintf (file, "\tcall\t*%s@GOT(%%ebx)\n", MCOUNT_NAME);
17847 #ifndef NO_PROFILE_COUNTERS
17848 fprintf (file, "\tmovl\t$%sP%d,%%%s\n", LPREFIX, labelno,
17849 PROFILE_COUNT_REGISTER);
17851 fprintf (file, "\tcall\t%s\n", MCOUNT_NAME);
17855 /* We don't have exact information about the insn sizes, but we may assume
17856 quite safely that we are informed about all 1 byte insns and memory
17857 address sizes. This is enough to eliminate unnecessary padding in
17861 min_insn_size (rtx insn)
17865 if (!INSN_P (insn) || !active_insn_p (insn))
17868 /* Discard alignments we've emit and jump instructions. */
17869 if (GET_CODE (PATTERN (insn)) == UNSPEC_VOLATILE
17870 && XINT (PATTERN (insn), 1) == UNSPECV_ALIGN)
17872 if (GET_CODE (insn) == JUMP_INSN
17873 && (GET_CODE (PATTERN (insn)) == ADDR_VEC
17874 || GET_CODE (PATTERN (insn)) == ADDR_DIFF_VEC))
17877 /* Important case - calls are always 5 bytes.
17878 It is common to have many calls in the row. */
17879 if (GET_CODE (insn) == CALL_INSN
17880 && symbolic_reference_mentioned_p (PATTERN (insn))
17881 && !SIBLING_CALL_P (insn))
17883 if (get_attr_length (insn) <= 1)
17886 /* For normal instructions we may rely on the sizes of addresses
17887 and the presence of symbol to require 4 bytes of encoding.
17888 This is not the case for jumps where references are PC relative. */
17889 if (GET_CODE (insn) != JUMP_INSN)
17891 l = get_attr_length_address (insn);
17892 if (l < 4 && symbolic_reference_mentioned_p (PATTERN (insn)))
17901 /* AMD K8 core mispredicts jumps when there are more than 3 jumps in 16 byte
17905 ix86_avoid_jump_misspredicts (void)
17907 rtx insn, start = get_insns ();
17908 int nbytes = 0, njumps = 0;
17911 /* Look for all minimal intervals of instructions containing 4 jumps.
17912 The intervals are bounded by START and INSN. NBYTES is the total
17913 size of instructions in the interval including INSN and not including
17914 START. When the NBYTES is smaller than 16 bytes, it is possible
17915 that the end of START and INSN ends up in the same 16byte page.
17917 The smallest offset in the page INSN can start is the case where START
17918 ends on the offset 0. Offset of INSN is then NBYTES - sizeof (INSN).
17919 We add p2align to 16byte window with maxskip 17 - NBYTES + sizeof (INSN).
17921 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
17924 nbytes += min_insn_size (insn);
17926 fprintf(dump_file, "Insn %i estimated to %i bytes\n",
17927 INSN_UID (insn), min_insn_size (insn));
17928 if ((GET_CODE (insn) == JUMP_INSN
17929 && GET_CODE (PATTERN (insn)) != ADDR_VEC
17930 && GET_CODE (PATTERN (insn)) != ADDR_DIFF_VEC)
17931 || GET_CODE (insn) == CALL_INSN)
17938 start = NEXT_INSN (start);
17939 if ((GET_CODE (start) == JUMP_INSN
17940 && GET_CODE (PATTERN (start)) != ADDR_VEC
17941 && GET_CODE (PATTERN (start)) != ADDR_DIFF_VEC)
17942 || GET_CODE (start) == CALL_INSN)
17943 njumps--, isjump = 1;
17946 nbytes -= min_insn_size (start);
17948 gcc_assert (njumps >= 0);
17950 fprintf (dump_file, "Interval %i to %i has %i bytes\n",
17951 INSN_UID (start), INSN_UID (insn), nbytes);
17953 if (njumps == 3 && isjump && nbytes < 16)
17955 int padsize = 15 - nbytes + min_insn_size (insn);
17958 fprintf (dump_file, "Padding insn %i by %i bytes!\n",
17959 INSN_UID (insn), padsize);
17960 emit_insn_before (gen_align (GEN_INT (padsize)), insn);
17965 /* AMD Athlon works faster
17966 when RET is not destination of conditional jump or directly preceded
17967 by other jump instruction. We avoid the penalty by inserting NOP just
17968 before the RET instructions in such cases. */
17970 ix86_pad_returns (void)
17975 FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR->preds)
17977 basic_block bb = e->src;
17978 rtx ret = BB_END (bb);
17980 bool replace = false;
17982 if (GET_CODE (ret) != JUMP_INSN || GET_CODE (PATTERN (ret)) != RETURN
17983 || !maybe_hot_bb_p (bb))
17985 for (prev = PREV_INSN (ret); prev; prev = PREV_INSN (prev))
17986 if (active_insn_p (prev) || GET_CODE (prev) == CODE_LABEL)
17988 if (prev && GET_CODE (prev) == CODE_LABEL)
17993 FOR_EACH_EDGE (e, ei, bb->preds)
17994 if (EDGE_FREQUENCY (e) && e->src->index >= 0
17995 && !(e->flags & EDGE_FALLTHRU))
18000 prev = prev_active_insn (ret);
18002 && ((GET_CODE (prev) == JUMP_INSN && any_condjump_p (prev))
18003 || GET_CODE (prev) == CALL_INSN))
18005 /* Empty functions get branch mispredict even when the jump destination
18006 is not visible to us. */
18007 if (!prev && cfun->function_frequency > FUNCTION_FREQUENCY_UNLIKELY_EXECUTED)
18012 emit_insn_before (gen_return_internal_long (), ret);
18018 /* Implement machine specific optimizations. We implement padding of returns
18019 for K8 CPUs and pass to avoid 4 jumps in the single 16 byte window. */
18023 if (TARGET_PAD_RETURNS && optimize && !optimize_size)
18024 ix86_pad_returns ();
18025 if (TARGET_FOUR_JUMP_LIMIT && optimize && !optimize_size)
18026 ix86_avoid_jump_misspredicts ();
18029 /* Return nonzero when QImode register that must be represented via REX prefix
18032 x86_extended_QIreg_mentioned_p (rtx insn)
18035 extract_insn_cached (insn);
18036 for (i = 0; i < recog_data.n_operands; i++)
18037 if (REG_P (recog_data.operand[i])
18038 && REGNO (recog_data.operand[i]) >= 4)
18043 /* Return nonzero when P points to register encoded via REX prefix.
18044 Called via for_each_rtx. */
18046 extended_reg_mentioned_1 (rtx *p, void *data ATTRIBUTE_UNUSED)
18048 unsigned int regno;
18051 regno = REGNO (*p);
18052 return REX_INT_REGNO_P (regno) || REX_SSE_REGNO_P (regno);
18055 /* Return true when INSN mentions register that must be encoded using REX
18058 x86_extended_reg_mentioned_p (rtx insn)
18060 return for_each_rtx (&PATTERN (insn), extended_reg_mentioned_1, NULL);
18063 /* Generate an unsigned DImode/SImode to FP conversion. This is the same code
18064 optabs would emit if we didn't have TFmode patterns. */
18067 x86_emit_floatuns (rtx operands[2])
18069 rtx neglab, donelab, i0, i1, f0, in, out;
18070 enum machine_mode mode, inmode;
18072 inmode = GET_MODE (operands[1]);
18073 gcc_assert (inmode == SImode || inmode == DImode);
18076 in = force_reg (inmode, operands[1]);
18077 mode = GET_MODE (out);
18078 neglab = gen_label_rtx ();
18079 donelab = gen_label_rtx ();
18080 i1 = gen_reg_rtx (Pmode);
18081 f0 = gen_reg_rtx (mode);
18083 emit_cmp_and_jump_insns (in, const0_rtx, LT, const0_rtx, Pmode, 0, neglab);
18085 emit_insn (gen_rtx_SET (VOIDmode, out, gen_rtx_FLOAT (mode, in)));
18086 emit_jump_insn (gen_jump (donelab));
18089 emit_label (neglab);
18091 i0 = expand_simple_binop (Pmode, LSHIFTRT, in, const1_rtx, NULL, 1, OPTAB_DIRECT);
18092 i1 = expand_simple_binop (Pmode, AND, in, const1_rtx, NULL, 1, OPTAB_DIRECT);
18093 i0 = expand_simple_binop (Pmode, IOR, i0, i1, i0, 1, OPTAB_DIRECT);
18094 expand_float (f0, i0, 0);
18095 emit_insn (gen_rtx_SET (VOIDmode, out, gen_rtx_PLUS (mode, f0, f0)));
18097 emit_label (donelab);
18100 /* A subroutine of ix86_expand_vector_init. Store into TARGET a vector
18101 with all elements equal to VAR. Return true if successful. */
18104 ix86_expand_vector_init_duplicate (bool mmx_ok, enum machine_mode mode,
18105 rtx target, rtx val)
18107 enum machine_mode smode, wsmode, wvmode;
18122 val = force_reg (GET_MODE_INNER (mode), val);
18123 x = gen_rtx_VEC_DUPLICATE (mode, val);
18124 emit_insn (gen_rtx_SET (VOIDmode, target, x));
18130 if (TARGET_SSE || TARGET_3DNOW_A)
18132 val = gen_lowpart (SImode, val);
18133 x = gen_rtx_TRUNCATE (HImode, val);
18134 x = gen_rtx_VEC_DUPLICATE (mode, x);
18135 emit_insn (gen_rtx_SET (VOIDmode, target, x));
18157 /* Extend HImode to SImode using a paradoxical SUBREG. */
18158 tmp1 = gen_reg_rtx (SImode);
18159 emit_move_insn (tmp1, gen_lowpart (SImode, val));
18160 /* Insert the SImode value as low element of V4SImode vector. */
18161 tmp2 = gen_reg_rtx (V4SImode);
18162 tmp1 = gen_rtx_VEC_MERGE (V4SImode,
18163 gen_rtx_VEC_DUPLICATE (V4SImode, tmp1),
18164 CONST0_RTX (V4SImode),
18166 emit_insn (gen_rtx_SET (VOIDmode, tmp2, tmp1));
18167 /* Cast the V4SImode vector back to a V8HImode vector. */
18168 tmp1 = gen_reg_rtx (V8HImode);
18169 emit_move_insn (tmp1, gen_lowpart (V8HImode, tmp2));
18170 /* Duplicate the low short through the whole low SImode word. */
18171 emit_insn (gen_sse2_punpcklwd (tmp1, tmp1, tmp1));
18172 /* Cast the V8HImode vector back to a V4SImode vector. */
18173 tmp2 = gen_reg_rtx (V4SImode);
18174 emit_move_insn (tmp2, gen_lowpart (V4SImode, tmp1));
18175 /* Replicate the low element of the V4SImode vector. */
18176 emit_insn (gen_sse2_pshufd (tmp2, tmp2, const0_rtx));
18177 /* Cast the V2SImode back to V8HImode, and store in target. */
18178 emit_move_insn (target, gen_lowpart (V8HImode, tmp2));
18189 /* Extend QImode to SImode using a paradoxical SUBREG. */
18190 tmp1 = gen_reg_rtx (SImode);
18191 emit_move_insn (tmp1, gen_lowpart (SImode, val));
18192 /* Insert the SImode value as low element of V4SImode vector. */
18193 tmp2 = gen_reg_rtx (V4SImode);
18194 tmp1 = gen_rtx_VEC_MERGE (V4SImode,
18195 gen_rtx_VEC_DUPLICATE (V4SImode, tmp1),
18196 CONST0_RTX (V4SImode),
18198 emit_insn (gen_rtx_SET (VOIDmode, tmp2, tmp1));
18199 /* Cast the V4SImode vector back to a V16QImode vector. */
18200 tmp1 = gen_reg_rtx (V16QImode);
18201 emit_move_insn (tmp1, gen_lowpart (V16QImode, tmp2));
18202 /* Duplicate the low byte through the whole low SImode word. */
18203 emit_insn (gen_sse2_punpcklbw (tmp1, tmp1, tmp1));
18204 emit_insn (gen_sse2_punpcklbw (tmp1, tmp1, tmp1));
18205 /* Cast the V16QImode vector back to a V4SImode vector. */
18206 tmp2 = gen_reg_rtx (V4SImode);
18207 emit_move_insn (tmp2, gen_lowpart (V4SImode, tmp1));
18208 /* Replicate the low element of the V4SImode vector. */
18209 emit_insn (gen_sse2_pshufd (tmp2, tmp2, const0_rtx));
18210 /* Cast the V2SImode back to V16QImode, and store in target. */
18211 emit_move_insn (target, gen_lowpart (V16QImode, tmp2));
18219 /* Replicate the value once into the next wider mode and recurse. */
18220 val = convert_modes (wsmode, smode, val, true);
18221 x = expand_simple_binop (wsmode, ASHIFT, val,
18222 GEN_INT (GET_MODE_BITSIZE (smode)),
18223 NULL_RTX, 1, OPTAB_LIB_WIDEN);
18224 val = expand_simple_binop (wsmode, IOR, val, x, x, 1, OPTAB_LIB_WIDEN);
18226 x = gen_reg_rtx (wvmode);
18227 if (!ix86_expand_vector_init_duplicate (mmx_ok, wvmode, x, val))
18228 gcc_unreachable ();
18229 emit_move_insn (target, gen_lowpart (mode, x));
18237 /* A subroutine of ix86_expand_vector_init. Store into TARGET a vector
18238 whose ONE_VAR element is VAR, and other elements are zero. Return true
18242 ix86_expand_vector_init_one_nonzero (bool mmx_ok, enum machine_mode mode,
18243 rtx target, rtx var, int one_var)
18245 enum machine_mode vsimode;
18261 var = force_reg (GET_MODE_INNER (mode), var);
18262 x = gen_rtx_VEC_CONCAT (mode, var, CONST0_RTX (GET_MODE_INNER (mode)));
18263 emit_insn (gen_rtx_SET (VOIDmode, target, x));
18268 if (!REG_P (target) || REGNO (target) < FIRST_PSEUDO_REGISTER)
18269 new_target = gen_reg_rtx (mode);
18271 new_target = target;
18272 var = force_reg (GET_MODE_INNER (mode), var);
18273 x = gen_rtx_VEC_DUPLICATE (mode, var);
18274 x = gen_rtx_VEC_MERGE (mode, x, CONST0_RTX (mode), const1_rtx);
18275 emit_insn (gen_rtx_SET (VOIDmode, new_target, x));
18278 /* We need to shuffle the value to the correct position, so
18279 create a new pseudo to store the intermediate result. */
18281 /* With SSE2, we can use the integer shuffle insns. */
18282 if (mode != V4SFmode && TARGET_SSE2)
18284 emit_insn (gen_sse2_pshufd_1 (new_target, new_target,
18286 GEN_INT (one_var == 1 ? 0 : 1),
18287 GEN_INT (one_var == 2 ? 0 : 1),
18288 GEN_INT (one_var == 3 ? 0 : 1)));
18289 if (target != new_target)
18290 emit_move_insn (target, new_target);
18294 /* Otherwise convert the intermediate result to V4SFmode and
18295 use the SSE1 shuffle instructions. */
18296 if (mode != V4SFmode)
18298 tmp = gen_reg_rtx (V4SFmode);
18299 emit_move_insn (tmp, gen_lowpart (V4SFmode, new_target));
18304 emit_insn (gen_sse_shufps_1 (tmp, tmp, tmp,
18306 GEN_INT (one_var == 1 ? 0 : 1),
18307 GEN_INT (one_var == 2 ? 0+4 : 1+4),
18308 GEN_INT (one_var == 3 ? 0+4 : 1+4)));
18310 if (mode != V4SFmode)
18311 emit_move_insn (target, gen_lowpart (V4SImode, tmp));
18312 else if (tmp != target)
18313 emit_move_insn (target, tmp);
18315 else if (target != new_target)
18316 emit_move_insn (target, new_target);
18321 vsimode = V4SImode;
18327 vsimode = V2SImode;
18333 /* Zero extend the variable element to SImode and recurse. */
18334 var = convert_modes (SImode, GET_MODE_INNER (mode), var, true);
18336 x = gen_reg_rtx (vsimode);
18337 if (!ix86_expand_vector_init_one_nonzero (mmx_ok, vsimode, x,
18339 gcc_unreachable ();
18341 emit_move_insn (target, gen_lowpart (mode, x));
18349 /* A subroutine of ix86_expand_vector_init. Store into TARGET a vector
18350 consisting of the values in VALS. It is known that all elements
18351 except ONE_VAR are constants. Return true if successful. */
18354 ix86_expand_vector_init_one_var (bool mmx_ok, enum machine_mode mode,
18355 rtx target, rtx vals, int one_var)
18357 rtx var = XVECEXP (vals, 0, one_var);
18358 enum machine_mode wmode;
18361 const_vec = copy_rtx (vals);
18362 XVECEXP (const_vec, 0, one_var) = CONST0_RTX (GET_MODE_INNER (mode));
18363 const_vec = gen_rtx_CONST_VECTOR (mode, XVEC (const_vec, 0));
18371 /* For the two element vectors, it's just as easy to use
18372 the general case. */
18388 /* There's no way to set one QImode entry easily. Combine
18389 the variable value with its adjacent constant value, and
18390 promote to an HImode set. */
18391 x = XVECEXP (vals, 0, one_var ^ 1);
18394 var = convert_modes (HImode, QImode, var, true);
18395 var = expand_simple_binop (HImode, ASHIFT, var, GEN_INT (8),
18396 NULL_RTX, 1, OPTAB_LIB_WIDEN);
18397 x = GEN_INT (INTVAL (x) & 0xff);
18401 var = convert_modes (HImode, QImode, var, true);
18402 x = gen_int_mode (INTVAL (x) << 8, HImode);
18404 if (x != const0_rtx)
18405 var = expand_simple_binop (HImode, IOR, var, x, var,
18406 1, OPTAB_LIB_WIDEN);
18408 x = gen_reg_rtx (wmode);
18409 emit_move_insn (x, gen_lowpart (wmode, const_vec));
18410 ix86_expand_vector_set (mmx_ok, x, var, one_var >> 1);
18412 emit_move_insn (target, gen_lowpart (mode, x));
18419 emit_move_insn (target, const_vec);
18420 ix86_expand_vector_set (mmx_ok, target, var, one_var);
18424 /* A subroutine of ix86_expand_vector_init. Handle the most general case:
18425 all values variable, and none identical. */
18428 ix86_expand_vector_init_general (bool mmx_ok, enum machine_mode mode,
18429 rtx target, rtx vals)
18431 enum machine_mode half_mode = GET_MODE_INNER (mode);
18432 rtx op0 = NULL, op1 = NULL;
18433 bool use_vec_concat = false;
18439 if (!mmx_ok && !TARGET_SSE)
18445 /* For the two element vectors, we always implement VEC_CONCAT. */
18446 op0 = XVECEXP (vals, 0, 0);
18447 op1 = XVECEXP (vals, 0, 1);
18448 use_vec_concat = true;
18452 half_mode = V2SFmode;
18455 half_mode = V2SImode;
18461 /* For V4SF and V4SI, we implement a concat of two V2 vectors.
18462 Recurse to load the two halves. */
18464 op0 = gen_reg_rtx (half_mode);
18465 v = gen_rtvec (2, XVECEXP (vals, 0, 0), XVECEXP (vals, 0, 1));
18466 ix86_expand_vector_init (false, op0, gen_rtx_PARALLEL (half_mode, v));
18468 op1 = gen_reg_rtx (half_mode);
18469 v = gen_rtvec (2, XVECEXP (vals, 0, 2), XVECEXP (vals, 0, 3));
18470 ix86_expand_vector_init (false, op1, gen_rtx_PARALLEL (half_mode, v));
18472 use_vec_concat = true;
18483 gcc_unreachable ();
18486 if (use_vec_concat)
18488 if (!register_operand (op0, half_mode))
18489 op0 = force_reg (half_mode, op0);
18490 if (!register_operand (op1, half_mode))
18491 op1 = force_reg (half_mode, op1);
18493 emit_insn (gen_rtx_SET (VOIDmode, target,
18494 gen_rtx_VEC_CONCAT (mode, op0, op1)));
18498 int i, j, n_elts, n_words, n_elt_per_word;
18499 enum machine_mode inner_mode;
18500 rtx words[4], shift;
18502 inner_mode = GET_MODE_INNER (mode);
18503 n_elts = GET_MODE_NUNITS (mode);
18504 n_words = GET_MODE_SIZE (mode) / UNITS_PER_WORD;
18505 n_elt_per_word = n_elts / n_words;
18506 shift = GEN_INT (GET_MODE_BITSIZE (inner_mode));
18508 for (i = 0; i < n_words; ++i)
18510 rtx word = NULL_RTX;
18512 for (j = 0; j < n_elt_per_word; ++j)
18514 rtx elt = XVECEXP (vals, 0, (i+1)*n_elt_per_word - j - 1);
18515 elt = convert_modes (word_mode, inner_mode, elt, true);
18521 word = expand_simple_binop (word_mode, ASHIFT, word, shift,
18522 word, 1, OPTAB_LIB_WIDEN);
18523 word = expand_simple_binop (word_mode, IOR, word, elt,
18524 word, 1, OPTAB_LIB_WIDEN);
18532 emit_move_insn (target, gen_lowpart (mode, words[0]));
18533 else if (n_words == 2)
18535 rtx tmp = gen_reg_rtx (mode);
18536 emit_insn (gen_rtx_CLOBBER (VOIDmode, tmp));
18537 emit_move_insn (gen_lowpart (word_mode, tmp), words[0]);
18538 emit_move_insn (gen_highpart (word_mode, tmp), words[1]);
18539 emit_move_insn (target, tmp);
18541 else if (n_words == 4)
18543 rtx tmp = gen_reg_rtx (V4SImode);
18544 vals = gen_rtx_PARALLEL (V4SImode, gen_rtvec_v (4, words));
18545 ix86_expand_vector_init_general (false, V4SImode, tmp, vals);
18546 emit_move_insn (target, gen_lowpart (mode, tmp));
18549 gcc_unreachable ();
18553 /* Initialize vector TARGET via VALS. Suppress the use of MMX
18554 instructions unless MMX_OK is true. */
18557 ix86_expand_vector_init (bool mmx_ok, rtx target, rtx vals)
18559 enum machine_mode mode = GET_MODE (target);
18560 enum machine_mode inner_mode = GET_MODE_INNER (mode);
18561 int n_elts = GET_MODE_NUNITS (mode);
18562 int n_var = 0, one_var = -1;
18563 bool all_same = true, all_const_zero = true;
18567 for (i = 0; i < n_elts; ++i)
18569 x = XVECEXP (vals, 0, i);
18570 if (!CONSTANT_P (x))
18571 n_var++, one_var = i;
18572 else if (x != CONST0_RTX (inner_mode))
18573 all_const_zero = false;
18574 if (i > 0 && !rtx_equal_p (x, XVECEXP (vals, 0, 0)))
18578 /* Constants are best loaded from the constant pool. */
18581 emit_move_insn (target, gen_rtx_CONST_VECTOR (mode, XVEC (vals, 0)));
18585 /* If all values are identical, broadcast the value. */
18587 && ix86_expand_vector_init_duplicate (mmx_ok, mode, target,
18588 XVECEXP (vals, 0, 0)))
18591 /* Values where only one field is non-constant are best loaded from
18592 the pool and overwritten via move later. */
18596 && ix86_expand_vector_init_one_nonzero (mmx_ok, mode, target,
18597 XVECEXP (vals, 0, one_var),
18601 if (ix86_expand_vector_init_one_var (mmx_ok, mode, target, vals, one_var))
18605 ix86_expand_vector_init_general (mmx_ok, mode, target, vals);
18609 ix86_expand_vector_set (bool mmx_ok, rtx target, rtx val, int elt)
18611 enum machine_mode mode = GET_MODE (target);
18612 enum machine_mode inner_mode = GET_MODE_INNER (mode);
18613 bool use_vec_merge = false;
18622 tmp = gen_reg_rtx (GET_MODE_INNER (mode));
18623 ix86_expand_vector_extract (true, tmp, target, 1 - elt);
18625 tmp = gen_rtx_VEC_CONCAT (mode, tmp, val);
18627 tmp = gen_rtx_VEC_CONCAT (mode, val, tmp);
18628 emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
18638 /* For the two element vectors, we implement a VEC_CONCAT with
18639 the extraction of the other element. */
18641 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, GEN_INT (1 - elt)));
18642 tmp = gen_rtx_VEC_SELECT (inner_mode, target, tmp);
18645 op0 = val, op1 = tmp;
18647 op0 = tmp, op1 = val;
18649 tmp = gen_rtx_VEC_CONCAT (mode, op0, op1);
18650 emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
18658 use_vec_merge = true;
18662 /* tmp = target = A B C D */
18663 tmp = copy_to_reg (target);
18664 /* target = A A B B */
18665 emit_insn (gen_sse_unpcklps (target, target, target));
18666 /* target = X A B B */
18667 ix86_expand_vector_set (false, target, val, 0);
18668 /* target = A X C D */
18669 emit_insn (gen_sse_shufps_1 (target, target, tmp,
18670 GEN_INT (1), GEN_INT (0),
18671 GEN_INT (2+4), GEN_INT (3+4)));
18675 /* tmp = target = A B C D */
18676 tmp = copy_to_reg (target);
18677 /* tmp = X B C D */
18678 ix86_expand_vector_set (false, tmp, val, 0);
18679 /* target = A B X D */
18680 emit_insn (gen_sse_shufps_1 (target, target, tmp,
18681 GEN_INT (0), GEN_INT (1),
18682 GEN_INT (0+4), GEN_INT (3+4)));
18686 /* tmp = target = A B C D */
18687 tmp = copy_to_reg (target);
18688 /* tmp = X B C D */
18689 ix86_expand_vector_set (false, tmp, val, 0);
18690 /* target = A B X D */
18691 emit_insn (gen_sse_shufps_1 (target, target, tmp,
18692 GEN_INT (0), GEN_INT (1),
18693 GEN_INT (2+4), GEN_INT (0+4)));
18697 gcc_unreachable ();
18702 /* Element 0 handled by vec_merge below. */
18705 use_vec_merge = true;
18711 /* With SSE2, use integer shuffles to swap element 0 and ELT,
18712 store into element 0, then shuffle them back. */
18716 order[0] = GEN_INT (elt);
18717 order[1] = const1_rtx;
18718 order[2] = const2_rtx;
18719 order[3] = GEN_INT (3);
18720 order[elt] = const0_rtx;
18722 emit_insn (gen_sse2_pshufd_1 (target, target, order[0],
18723 order[1], order[2], order[3]));
18725 ix86_expand_vector_set (false, target, val, 0);
18727 emit_insn (gen_sse2_pshufd_1 (target, target, order[0],
18728 order[1], order[2], order[3]));
18732 /* For SSE1, we have to reuse the V4SF code. */
18733 ix86_expand_vector_set (false, gen_lowpart (V4SFmode, target),
18734 gen_lowpart (SFmode, val), elt);
18739 use_vec_merge = TARGET_SSE2;
18742 use_vec_merge = mmx_ok && (TARGET_SSE || TARGET_3DNOW_A);
18753 tmp = gen_rtx_VEC_DUPLICATE (mode, val);
18754 tmp = gen_rtx_VEC_MERGE (mode, tmp, target, GEN_INT (1 << elt));
18755 emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
18759 rtx mem = assign_stack_temp (mode, GET_MODE_SIZE (mode), false);
18761 emit_move_insn (mem, target);
18763 tmp = adjust_address (mem, inner_mode, elt*GET_MODE_SIZE (inner_mode));
18764 emit_move_insn (tmp, val);
18766 emit_move_insn (target, mem);
18771 ix86_expand_vector_extract (bool mmx_ok, rtx target, rtx vec, int elt)
18773 enum machine_mode mode = GET_MODE (vec);
18774 enum machine_mode inner_mode = GET_MODE_INNER (mode);
18775 bool use_vec_extr = false;
18788 use_vec_extr = true;
18800 tmp = gen_reg_rtx (mode);
18801 emit_insn (gen_sse_shufps_1 (tmp, vec, vec,
18802 GEN_INT (elt), GEN_INT (elt),
18803 GEN_INT (elt+4), GEN_INT (elt+4)));
18807 tmp = gen_reg_rtx (mode);
18808 emit_insn (gen_sse_unpckhps (tmp, vec, vec));
18812 gcc_unreachable ();
18815 use_vec_extr = true;
18830 tmp = gen_reg_rtx (mode);
18831 emit_insn (gen_sse2_pshufd_1 (tmp, vec,
18832 GEN_INT (elt), GEN_INT (elt),
18833 GEN_INT (elt), GEN_INT (elt)));
18837 tmp = gen_reg_rtx (mode);
18838 emit_insn (gen_sse2_punpckhdq (tmp, vec, vec));
18842 gcc_unreachable ();
18845 use_vec_extr = true;
18850 /* For SSE1, we have to reuse the V4SF code. */
18851 ix86_expand_vector_extract (false, gen_lowpart (SFmode, target),
18852 gen_lowpart (V4SFmode, vec), elt);
18858 use_vec_extr = TARGET_SSE2;
18861 use_vec_extr = mmx_ok && (TARGET_SSE || TARGET_3DNOW_A);
18866 /* ??? Could extract the appropriate HImode element and shift. */
18873 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, GEN_INT (elt)));
18874 tmp = gen_rtx_VEC_SELECT (inner_mode, vec, tmp);
18876 /* Let the rtl optimizers know about the zero extension performed. */
18877 if (inner_mode == HImode)
18879 tmp = gen_rtx_ZERO_EXTEND (SImode, tmp);
18880 target = gen_lowpart (SImode, target);
18883 emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
18887 rtx mem = assign_stack_temp (mode, GET_MODE_SIZE (mode), false);
18889 emit_move_insn (mem, vec);
18891 tmp = adjust_address (mem, inner_mode, elt*GET_MODE_SIZE (inner_mode));
18892 emit_move_insn (target, tmp);
18896 /* Expand a vector reduction on V4SFmode for SSE1. FN is the binary
18897 pattern to reduce; DEST is the destination; IN is the input vector. */
18900 ix86_expand_reduc_v4sf (rtx (*fn) (rtx, rtx, rtx), rtx dest, rtx in)
18902 rtx tmp1, tmp2, tmp3;
18904 tmp1 = gen_reg_rtx (V4SFmode);
18905 tmp2 = gen_reg_rtx (V4SFmode);
18906 tmp3 = gen_reg_rtx (V4SFmode);
18908 emit_insn (gen_sse_movhlps (tmp1, in, in));
18909 emit_insn (fn (tmp2, tmp1, in));
18911 emit_insn (gen_sse_shufps_1 (tmp3, tmp2, tmp2,
18912 GEN_INT (1), GEN_INT (1),
18913 GEN_INT (1+4), GEN_INT (1+4)));
18914 emit_insn (fn (dest, tmp2, tmp3));
18917 /* Target hook for scalar_mode_supported_p. */
18919 ix86_scalar_mode_supported_p (enum machine_mode mode)
18921 if (DECIMAL_FLOAT_MODE_P (mode))
18924 return default_scalar_mode_supported_p (mode);
18927 /* Implements target hook vector_mode_supported_p. */
18929 ix86_vector_mode_supported_p (enum machine_mode mode)
18931 if (TARGET_SSE && VALID_SSE_REG_MODE (mode))
18933 if (TARGET_SSE2 && VALID_SSE2_REG_MODE (mode))
18935 if (TARGET_MMX && VALID_MMX_REG_MODE (mode))
18937 if (TARGET_3DNOW && VALID_MMX_REG_MODE_3DNOW (mode))
18942 /* Worker function for TARGET_MD_ASM_CLOBBERS.
18944 We do this in the new i386 backend to maintain source compatibility
18945 with the old cc0-based compiler. */
18948 ix86_md_asm_clobbers (tree outputs ATTRIBUTE_UNUSED,
18949 tree inputs ATTRIBUTE_UNUSED,
18952 clobbers = tree_cons (NULL_TREE, build_string (5, "flags"),
18954 clobbers = tree_cons (NULL_TREE, build_string (4, "fpsr"),
18956 clobbers = tree_cons (NULL_TREE, build_string (7, "dirflag"),
18961 /* Return true if this goes in small data/bss. */
18964 ix86_in_large_data_p (tree exp)
18966 if (ix86_cmodel != CM_MEDIUM && ix86_cmodel != CM_MEDIUM_PIC)
18969 /* Functions are never large data. */
18970 if (TREE_CODE (exp) == FUNCTION_DECL)
18973 if (TREE_CODE (exp) == VAR_DECL && DECL_SECTION_NAME (exp))
18975 const char *section = TREE_STRING_POINTER (DECL_SECTION_NAME (exp));
18976 if (strcmp (section, ".ldata") == 0
18977 || strcmp (section, ".lbss") == 0)
18983 HOST_WIDE_INT size = int_size_in_bytes (TREE_TYPE (exp));
18985 /* If this is an incomplete type with size 0, then we can't put it
18986 in data because it might be too big when completed. */
18987 if (!size || size > ix86_section_threshold)
18994 ix86_encode_section_info (tree decl, rtx rtl, int first)
18996 default_encode_section_info (decl, rtl, first);
18998 if (TREE_CODE (decl) == VAR_DECL
18999 && (TREE_STATIC (decl) || DECL_EXTERNAL (decl))
19000 && ix86_in_large_data_p (decl))
19001 SYMBOL_REF_FLAGS (XEXP (rtl, 0)) |= SYMBOL_FLAG_FAR_ADDR;
19004 /* Worker function for REVERSE_CONDITION. */
19007 ix86_reverse_condition (enum rtx_code code, enum machine_mode mode)
19009 return (mode != CCFPmode && mode != CCFPUmode
19010 ? reverse_condition (code)
19011 : reverse_condition_maybe_unordered (code));
19014 /* Output code to perform an x87 FP register move, from OPERANDS[1]
19018 output_387_reg_move (rtx insn, rtx *operands)
19020 if (REG_P (operands[1])
19021 && find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
19023 if (REGNO (operands[0]) == FIRST_STACK_REG)
19024 return output_387_ffreep (operands, 0);
19025 return "fstp\t%y0";
19027 if (STACK_TOP_P (operands[0]))
19028 return "fld%z1\t%y1";
19032 /* Output code to perform a conditional jump to LABEL, if C2 flag in
19033 FP status register is set. */
19036 ix86_emit_fp_unordered_jump (rtx label)
19038 rtx reg = gen_reg_rtx (HImode);
19041 emit_insn (gen_x86_fnstsw_1 (reg));
19043 if (TARGET_USE_SAHF)
19045 emit_insn (gen_x86_sahf_1 (reg));
19047 temp = gen_rtx_REG (CCmode, FLAGS_REG);
19048 temp = gen_rtx_UNORDERED (VOIDmode, temp, const0_rtx);
19052 emit_insn (gen_testqi_ext_ccno_0 (reg, GEN_INT (0x04)));
19054 temp = gen_rtx_REG (CCNOmode, FLAGS_REG);
19055 temp = gen_rtx_NE (VOIDmode, temp, const0_rtx);
19058 temp = gen_rtx_IF_THEN_ELSE (VOIDmode, temp,
19059 gen_rtx_LABEL_REF (VOIDmode, label),
19061 temp = gen_rtx_SET (VOIDmode, pc_rtx, temp);
19062 emit_jump_insn (temp);
19065 /* Output code to perform a log1p XFmode calculation. */
19067 void ix86_emit_i387_log1p (rtx op0, rtx op1)
19069 rtx label1 = gen_label_rtx ();
19070 rtx label2 = gen_label_rtx ();
19072 rtx tmp = gen_reg_rtx (XFmode);
19073 rtx tmp2 = gen_reg_rtx (XFmode);
19075 emit_insn (gen_absxf2 (tmp, op1));
19076 emit_insn (gen_cmpxf (tmp,
19077 CONST_DOUBLE_FROM_REAL_VALUE (
19078 REAL_VALUE_ATOF ("0.29289321881345247561810596348408353", XFmode),
19080 emit_jump_insn (gen_bge (label1));
19082 emit_move_insn (tmp2, standard_80387_constant_rtx (4)); /* fldln2 */
19083 emit_insn (gen_fyl2xp1_xf3 (op0, tmp2, op1));
19084 emit_jump (label2);
19086 emit_label (label1);
19087 emit_move_insn (tmp, CONST1_RTX (XFmode));
19088 emit_insn (gen_addxf3 (tmp, op1, tmp));
19089 emit_move_insn (tmp2, standard_80387_constant_rtx (4)); /* fldln2 */
19090 emit_insn (gen_fyl2x_xf3 (op0, tmp2, tmp));
19092 emit_label (label2);
19095 /* Solaris implementation of TARGET_ASM_NAMED_SECTION. */
19098 i386_solaris_elf_named_section (const char *name, unsigned int flags,
19101 /* With Binutils 2.15, the "@unwind" marker must be specified on
19102 every occurrence of the ".eh_frame" section, not just the first
19105 && strcmp (name, ".eh_frame") == 0)
19107 fprintf (asm_out_file, "\t.section\t%s,\"%s\",@unwind\n", name,
19108 flags & SECTION_WRITE ? "aw" : "a");
19111 default_elf_asm_named_section (name, flags, decl);
19114 /* Return the mangling of TYPE if it is an extended fundamental type. */
19116 static const char *
19117 ix86_mangle_fundamental_type (tree type)
19119 switch (TYPE_MODE (type))
19122 /* __float128 is "g". */
19125 /* "long double" or __float80 is "e". */
19132 /* For 32-bit code we can save PIC register setup by using
19133 __stack_chk_fail_local hidden function instead of calling
19134 __stack_chk_fail directly. 64-bit code doesn't need to setup any PIC
19135 register, so it is better to call __stack_chk_fail directly. */
19138 ix86_stack_protect_fail (void)
19140 return TARGET_64BIT
19141 ? default_external_stack_protect_fail ()
19142 : default_hidden_stack_protect_fail ();
19145 /* Select a format to encode pointers in exception handling data. CODE
19146 is 0 for data, 1 for code labels, 2 for function pointers. GLOBAL is
19147 true if the symbol may be affected by dynamic relocations.
19149 ??? All x86 object file formats are capable of representing this.
19150 After all, the relocation needed is the same as for the call insn.
19151 Whether or not a particular assembler allows us to enter such, I
19152 guess we'll have to see. */
19154 asm_preferred_eh_data_format (int code, int global)
19158 int type = DW_EH_PE_sdata8;
19160 || ix86_cmodel == CM_SMALL_PIC
19161 || (ix86_cmodel == CM_MEDIUM_PIC && (global || code)))
19162 type = DW_EH_PE_sdata4;
19163 return (global ? DW_EH_PE_indirect : 0) | DW_EH_PE_pcrel | type;
19165 if (ix86_cmodel == CM_SMALL
19166 || (ix86_cmodel == CM_MEDIUM && code))
19167 return DW_EH_PE_udata4;
19168 return DW_EH_PE_absptr;
19171 #include "gt-i386.h"