1 /* Subroutines used for code generation on IA-32.
2 Copyright (C) 1988, 1992, 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001,
3 2002, 2003, 2004 Free Software Foundation, Inc.
5 This file is part of GCC.
7 GCC is free software; you can redistribute it and/or modify
8 it under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 2, or (at your option)
12 GCC is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 GNU General Public License for more details.
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING. If not, write to
19 the Free Software Foundation, 59 Temple Place - Suite 330,
20 Boston, MA 02111-1307, USA. */
28 #include "coretypes.h"
34 #include "hard-reg-set.h"
36 #include "insn-config.h"
37 #include "conditions.h"
39 #include "insn-attr.h"
47 #include "basic-block.h"
50 #include "target-def.h"
51 #include "langhooks.h"
54 #ifndef CHECK_STACK_LIMIT
55 #define CHECK_STACK_LIMIT (-1)
58 /* Return index of given mode in mult and division cost tables. */
59 #define MODE_INDEX(mode) \
60 ((mode) == QImode ? 0 \
61 : (mode) == HImode ? 1 \
62 : (mode) == SImode ? 2 \
63 : (mode) == DImode ? 3 \
66 /* Processor costs (relative to an add) */
68 struct processor_costs size_cost = { /* costs for tunning for size */
69 2, /* cost of an add instruction */
70 3, /* cost of a lea instruction */
71 2, /* variable shift costs */
72 3, /* constant shift costs */
73 {3, 3, 3, 3, 5}, /* cost of starting a multiply */
74 0, /* cost of multiply per each bit set */
75 {3, 3, 3, 3, 5}, /* cost of a divide/mod */
76 3, /* cost of movsx */
77 3, /* cost of movzx */
80 2, /* cost for loading QImode using movzbl */
81 {2, 2, 2}, /* cost of loading integer registers
82 in QImode, HImode and SImode.
83 Relative to reg-reg move (2). */
84 {2, 2, 2}, /* cost of storing integer registers */
85 2, /* cost of reg,reg fld/fst */
86 {2, 2, 2}, /* cost of loading fp registers
87 in SFmode, DFmode and XFmode */
88 {2, 2, 2}, /* cost of loading integer registers */
89 3, /* cost of moving MMX register */
90 {3, 3}, /* cost of loading MMX registers
91 in SImode and DImode */
92 {3, 3}, /* cost of storing MMX registers
93 in SImode and DImode */
94 3, /* cost of moving SSE register */
95 {3, 3, 3}, /* cost of loading SSE registers
96 in SImode, DImode and TImode */
97 {3, 3, 3}, /* cost of storing SSE registers
98 in SImode, DImode and TImode */
99 3, /* MMX or SSE register to integer */
100 0, /* size of prefetch block */
101 0, /* number of parallel prefetches */
103 2, /* cost of FADD and FSUB insns. */
104 2, /* cost of FMUL instruction. */
105 2, /* cost of FDIV instruction. */
106 2, /* cost of FABS instruction. */
107 2, /* cost of FCHS instruction. */
108 2, /* cost of FSQRT instruction. */
111 /* Processor costs (relative to an add) */
113 struct processor_costs i386_cost = { /* 386 specific costs */
114 1, /* cost of an add instruction */
115 1, /* cost of a lea instruction */
116 3, /* variable shift costs */
117 2, /* constant shift costs */
118 {6, 6, 6, 6, 6}, /* cost of starting a multiply */
119 1, /* cost of multiply per each bit set */
120 {23, 23, 23, 23, 23}, /* cost of a divide/mod */
121 3, /* cost of movsx */
122 2, /* cost of movzx */
123 15, /* "large" insn */
125 4, /* cost for loading QImode using movzbl */
126 {2, 4, 2}, /* cost of loading integer registers
127 in QImode, HImode and SImode.
128 Relative to reg-reg move (2). */
129 {2, 4, 2}, /* cost of storing integer registers */
130 2, /* cost of reg,reg fld/fst */
131 {8, 8, 8}, /* cost of loading fp registers
132 in SFmode, DFmode and XFmode */
133 {8, 8, 8}, /* cost of loading integer registers */
134 2, /* cost of moving MMX register */
135 {4, 8}, /* cost of loading MMX registers
136 in SImode and DImode */
137 {4, 8}, /* cost of storing MMX registers
138 in SImode and DImode */
139 2, /* cost of moving SSE register */
140 {4, 8, 16}, /* cost of loading SSE registers
141 in SImode, DImode and TImode */
142 {4, 8, 16}, /* cost of storing SSE registers
143 in SImode, DImode and TImode */
144 3, /* MMX or SSE register to integer */
145 0, /* size of prefetch block */
146 0, /* number of parallel prefetches */
148 23, /* cost of FADD and FSUB insns. */
149 27, /* cost of FMUL instruction. */
150 88, /* cost of FDIV instruction. */
151 22, /* cost of FABS instruction. */
152 24, /* cost of FCHS instruction. */
153 122, /* cost of FSQRT instruction. */
157 struct processor_costs i486_cost = { /* 486 specific costs */
158 1, /* cost of an add instruction */
159 1, /* cost of a lea instruction */
160 3, /* variable shift costs */
161 2, /* constant shift costs */
162 {12, 12, 12, 12, 12}, /* cost of starting a multiply */
163 1, /* cost of multiply per each bit set */
164 {40, 40, 40, 40, 40}, /* cost of a divide/mod */
165 3, /* cost of movsx */
166 2, /* cost of movzx */
167 15, /* "large" insn */
169 4, /* cost for loading QImode using movzbl */
170 {2, 4, 2}, /* cost of loading integer registers
171 in QImode, HImode and SImode.
172 Relative to reg-reg move (2). */
173 {2, 4, 2}, /* cost of storing integer registers */
174 2, /* cost of reg,reg fld/fst */
175 {8, 8, 8}, /* cost of loading fp registers
176 in SFmode, DFmode and XFmode */
177 {8, 8, 8}, /* cost of loading integer registers */
178 2, /* cost of moving MMX register */
179 {4, 8}, /* cost of loading MMX registers
180 in SImode and DImode */
181 {4, 8}, /* cost of storing MMX registers
182 in SImode and DImode */
183 2, /* cost of moving SSE register */
184 {4, 8, 16}, /* cost of loading SSE registers
185 in SImode, DImode and TImode */
186 {4, 8, 16}, /* cost of storing SSE registers
187 in SImode, DImode and TImode */
188 3, /* MMX or SSE register to integer */
189 0, /* size of prefetch block */
190 0, /* number of parallel prefetches */
192 8, /* cost of FADD and FSUB insns. */
193 16, /* cost of FMUL instruction. */
194 73, /* cost of FDIV instruction. */
195 3, /* cost of FABS instruction. */
196 3, /* cost of FCHS instruction. */
197 83, /* cost of FSQRT instruction. */
201 struct processor_costs pentium_cost = {
202 1, /* cost of an add instruction */
203 1, /* cost of a lea instruction */
204 4, /* variable shift costs */
205 1, /* constant shift costs */
206 {11, 11, 11, 11, 11}, /* cost of starting a multiply */
207 0, /* cost of multiply per each bit set */
208 {25, 25, 25, 25, 25}, /* cost of a divide/mod */
209 3, /* cost of movsx */
210 2, /* cost of movzx */
211 8, /* "large" insn */
213 6, /* cost for loading QImode using movzbl */
214 {2, 4, 2}, /* cost of loading integer registers
215 in QImode, HImode and SImode.
216 Relative to reg-reg move (2). */
217 {2, 4, 2}, /* cost of storing integer registers */
218 2, /* cost of reg,reg fld/fst */
219 {2, 2, 6}, /* cost of loading fp registers
220 in SFmode, DFmode and XFmode */
221 {4, 4, 6}, /* cost of loading integer registers */
222 8, /* cost of moving MMX register */
223 {8, 8}, /* cost of loading MMX registers
224 in SImode and DImode */
225 {8, 8}, /* cost of storing MMX registers
226 in SImode and DImode */
227 2, /* cost of moving SSE register */
228 {4, 8, 16}, /* cost of loading SSE registers
229 in SImode, DImode and TImode */
230 {4, 8, 16}, /* cost of storing SSE registers
231 in SImode, DImode and TImode */
232 3, /* MMX or SSE register to integer */
233 0, /* size of prefetch block */
234 0, /* number of parallel prefetches */
236 3, /* cost of FADD and FSUB insns. */
237 3, /* cost of FMUL instruction. */
238 39, /* cost of FDIV instruction. */
239 1, /* cost of FABS instruction. */
240 1, /* cost of FCHS instruction. */
241 70, /* cost of FSQRT instruction. */
245 struct processor_costs pentiumpro_cost = {
246 1, /* cost of an add instruction */
247 1, /* cost of a lea instruction */
248 1, /* variable shift costs */
249 1, /* constant shift costs */
250 {4, 4, 4, 4, 4}, /* cost of starting a multiply */
251 0, /* cost of multiply per each bit set */
252 {17, 17, 17, 17, 17}, /* cost of a divide/mod */
253 1, /* cost of movsx */
254 1, /* cost of movzx */
255 8, /* "large" insn */
257 2, /* cost for loading QImode using movzbl */
258 {4, 4, 4}, /* cost of loading integer registers
259 in QImode, HImode and SImode.
260 Relative to reg-reg move (2). */
261 {2, 2, 2}, /* cost of storing integer registers */
262 2, /* cost of reg,reg fld/fst */
263 {2, 2, 6}, /* cost of loading fp registers
264 in SFmode, DFmode and XFmode */
265 {4, 4, 6}, /* cost of loading integer registers */
266 2, /* cost of moving MMX register */
267 {2, 2}, /* cost of loading MMX registers
268 in SImode and DImode */
269 {2, 2}, /* cost of storing MMX registers
270 in SImode and DImode */
271 2, /* cost of moving SSE register */
272 {2, 2, 8}, /* cost of loading SSE registers
273 in SImode, DImode and TImode */
274 {2, 2, 8}, /* cost of storing SSE registers
275 in SImode, DImode and TImode */
276 3, /* MMX or SSE register to integer */
277 32, /* size of prefetch block */
278 6, /* number of parallel prefetches */
280 3, /* cost of FADD and FSUB insns. */
281 5, /* cost of FMUL instruction. */
282 56, /* cost of FDIV instruction. */
283 2, /* cost of FABS instruction. */
284 2, /* cost of FCHS instruction. */
285 56, /* cost of FSQRT instruction. */
289 struct processor_costs k6_cost = {
290 1, /* cost of an add instruction */
291 2, /* cost of a lea instruction */
292 1, /* variable shift costs */
293 1, /* constant shift costs */
294 {3, 3, 3, 3, 3}, /* cost of starting a multiply */
295 0, /* cost of multiply per each bit set */
296 {18, 18, 18, 18, 18}, /* cost of a divide/mod */
297 2, /* cost of movsx */
298 2, /* cost of movzx */
299 8, /* "large" insn */
301 3, /* cost for loading QImode using movzbl */
302 {4, 5, 4}, /* cost of loading integer registers
303 in QImode, HImode and SImode.
304 Relative to reg-reg move (2). */
305 {2, 3, 2}, /* cost of storing integer registers */
306 4, /* cost of reg,reg fld/fst */
307 {6, 6, 6}, /* cost of loading fp registers
308 in SFmode, DFmode and XFmode */
309 {4, 4, 4}, /* cost of loading integer registers */
310 2, /* cost of moving MMX register */
311 {2, 2}, /* cost of loading MMX registers
312 in SImode and DImode */
313 {2, 2}, /* cost of storing MMX registers
314 in SImode and DImode */
315 2, /* cost of moving SSE register */
316 {2, 2, 8}, /* cost of loading SSE registers
317 in SImode, DImode and TImode */
318 {2, 2, 8}, /* cost of storing SSE registers
319 in SImode, DImode and TImode */
320 6, /* MMX or SSE register to integer */
321 32, /* size of prefetch block */
322 1, /* number of parallel prefetches */
324 2, /* cost of FADD and FSUB insns. */
325 2, /* cost of FMUL instruction. */
326 56, /* cost of FDIV instruction. */
327 2, /* cost of FABS instruction. */
328 2, /* cost of FCHS instruction. */
329 56, /* cost of FSQRT instruction. */
333 struct processor_costs athlon_cost = {
334 1, /* cost of an add instruction */
335 2, /* cost of a lea instruction */
336 1, /* variable shift costs */
337 1, /* constant shift costs */
338 {5, 5, 5, 5, 5}, /* cost of starting a multiply */
339 0, /* cost of multiply per each bit set */
340 {18, 26, 42, 74, 74}, /* cost of a divide/mod */
341 1, /* cost of movsx */
342 1, /* cost of movzx */
343 8, /* "large" insn */
345 4, /* cost for loading QImode using movzbl */
346 {3, 4, 3}, /* cost of loading integer registers
347 in QImode, HImode and SImode.
348 Relative to reg-reg move (2). */
349 {3, 4, 3}, /* cost of storing integer registers */
350 4, /* cost of reg,reg fld/fst */
351 {4, 4, 12}, /* cost of loading fp registers
352 in SFmode, DFmode and XFmode */
353 {6, 6, 8}, /* cost of loading integer registers */
354 2, /* cost of moving MMX register */
355 {4, 4}, /* cost of loading MMX registers
356 in SImode and DImode */
357 {4, 4}, /* cost of storing MMX registers
358 in SImode and DImode */
359 2, /* cost of moving SSE register */
360 {4, 4, 6}, /* cost of loading SSE registers
361 in SImode, DImode and TImode */
362 {4, 4, 5}, /* cost of storing SSE registers
363 in SImode, DImode and TImode */
364 5, /* MMX or SSE register to integer */
365 64, /* size of prefetch block */
366 6, /* number of parallel prefetches */
368 4, /* cost of FADD and FSUB insns. */
369 4, /* cost of FMUL instruction. */
370 24, /* cost of FDIV instruction. */
371 2, /* cost of FABS instruction. */
372 2, /* cost of FCHS instruction. */
373 35, /* cost of FSQRT instruction. */
377 struct processor_costs k8_cost = {
378 1, /* cost of an add instruction */
379 2, /* cost of a lea instruction */
380 1, /* variable shift costs */
381 1, /* constant shift costs */
382 {3, 4, 3, 4, 5}, /* cost of starting a multiply */
383 0, /* cost of multiply per each bit set */
384 {18, 26, 42, 74, 74}, /* cost of a divide/mod */
385 1, /* cost of movsx */
386 1, /* cost of movzx */
387 8, /* "large" insn */
389 4, /* cost for loading QImode using movzbl */
390 {3, 4, 3}, /* cost of loading integer registers
391 in QImode, HImode and SImode.
392 Relative to reg-reg move (2). */
393 {3, 4, 3}, /* cost of storing integer registers */
394 4, /* cost of reg,reg fld/fst */
395 {4, 4, 12}, /* cost of loading fp registers
396 in SFmode, DFmode and XFmode */
397 {6, 6, 8}, /* cost of loading integer registers */
398 2, /* cost of moving MMX register */
399 {3, 3}, /* cost of loading MMX registers
400 in SImode and DImode */
401 {4, 4}, /* cost of storing MMX registers
402 in SImode and DImode */
403 2, /* cost of moving SSE register */
404 {4, 3, 6}, /* cost of loading SSE registers
405 in SImode, DImode and TImode */
406 {4, 4, 5}, /* cost of storing SSE registers
407 in SImode, DImode and TImode */
408 5, /* MMX or SSE register to integer */
409 64, /* size of prefetch block */
410 6, /* number of parallel prefetches */
412 4, /* cost of FADD and FSUB insns. */
413 4, /* cost of FMUL instruction. */
414 19, /* cost of FDIV instruction. */
415 2, /* cost of FABS instruction. */
416 2, /* cost of FCHS instruction. */
417 35, /* cost of FSQRT instruction. */
421 struct processor_costs pentium4_cost = {
422 1, /* cost of an add instruction */
423 1, /* cost of a lea instruction */
424 4, /* variable shift costs */
425 4, /* constant shift costs */
426 {15, 15, 15, 15, 15}, /* cost of starting a multiply */
427 0, /* cost of multiply per each bit set */
428 {56, 56, 56, 56, 56}, /* cost of a divide/mod */
429 1, /* cost of movsx */
430 1, /* cost of movzx */
431 16, /* "large" insn */
433 2, /* cost for loading QImode using movzbl */
434 {4, 5, 4}, /* cost of loading integer registers
435 in QImode, HImode and SImode.
436 Relative to reg-reg move (2). */
437 {2, 3, 2}, /* cost of storing integer registers */
438 2, /* cost of reg,reg fld/fst */
439 {2, 2, 6}, /* cost of loading fp registers
440 in SFmode, DFmode and XFmode */
441 {4, 4, 6}, /* cost of loading integer registers */
442 2, /* cost of moving MMX register */
443 {2, 2}, /* cost of loading MMX registers
444 in SImode and DImode */
445 {2, 2}, /* cost of storing MMX registers
446 in SImode and DImode */
447 12, /* cost of moving SSE register */
448 {12, 12, 12}, /* cost of loading SSE registers
449 in SImode, DImode and TImode */
450 {2, 2, 8}, /* cost of storing SSE registers
451 in SImode, DImode and TImode */
452 10, /* MMX or SSE register to integer */
453 64, /* size of prefetch block */
454 6, /* number of parallel prefetches */
456 5, /* cost of FADD and FSUB insns. */
457 7, /* cost of FMUL instruction. */
458 43, /* cost of FDIV instruction. */
459 2, /* cost of FABS instruction. */
460 2, /* cost of FCHS instruction. */
461 43, /* cost of FSQRT instruction. */
464 const struct processor_costs *ix86_cost = &pentium_cost;
466 /* Processor feature/optimization bitmasks. */
467 #define m_386 (1<<PROCESSOR_I386)
468 #define m_486 (1<<PROCESSOR_I486)
469 #define m_PENT (1<<PROCESSOR_PENTIUM)
470 #define m_PPRO (1<<PROCESSOR_PENTIUMPRO)
471 #define m_K6 (1<<PROCESSOR_K6)
472 #define m_ATHLON (1<<PROCESSOR_ATHLON)
473 #define m_PENT4 (1<<PROCESSOR_PENTIUM4)
474 #define m_K8 (1<<PROCESSOR_K8)
475 #define m_ATHLON_K8 (m_K8 | m_ATHLON)
477 const int x86_use_leave = m_386 | m_K6 | m_ATHLON_K8;
478 const int x86_push_memory = m_386 | m_K6 | m_ATHLON_K8 | m_PENT4;
479 const int x86_zero_extend_with_and = m_486 | m_PENT;
480 const int x86_movx = m_ATHLON_K8 | m_PPRO | m_PENT4 /* m_386 | m_K6 */;
481 const int x86_double_with_add = ~m_386;
482 const int x86_use_bit_test = m_386;
483 const int x86_unroll_strlen = m_486 | m_PENT | m_PPRO | m_ATHLON_K8 | m_K6;
484 const int x86_cmove = m_PPRO | m_ATHLON_K8 | m_PENT4;
485 const int x86_3dnow_a = m_ATHLON_K8;
486 const int x86_deep_branch = m_PPRO | m_K6 | m_ATHLON_K8 | m_PENT4;
487 const int x86_branch_hints = m_PENT4;
488 const int x86_use_sahf = m_PPRO | m_K6 | m_PENT4;
489 const int x86_partial_reg_stall = m_PPRO;
490 const int x86_use_loop = m_K6;
491 const int x86_use_fiop = ~(m_PPRO | m_ATHLON_K8 | m_PENT);
492 const int x86_use_mov0 = m_K6;
493 const int x86_use_cltd = ~(m_PENT | m_K6);
494 const int x86_read_modify_write = ~m_PENT;
495 const int x86_read_modify = ~(m_PENT | m_PPRO);
496 const int x86_split_long_moves = m_PPRO;
497 const int x86_promote_QImode = m_K6 | m_PENT | m_386 | m_486 | m_ATHLON_K8;
498 const int x86_fast_prefix = ~(m_PENT | m_486 | m_386);
499 const int x86_single_stringop = m_386 | m_PENT4;
500 const int x86_qimode_math = ~(0);
501 const int x86_promote_qi_regs = 0;
502 const int x86_himode_math = ~(m_PPRO);
503 const int x86_promote_hi_regs = m_PPRO;
504 const int x86_sub_esp_4 = m_ATHLON_K8 | m_PPRO | m_PENT4;
505 const int x86_sub_esp_8 = m_ATHLON_K8 | m_PPRO | m_386 | m_486 | m_PENT4;
506 const int x86_add_esp_4 = m_ATHLON_K8 | m_K6 | m_PENT4;
507 const int x86_add_esp_8 = m_ATHLON_K8 | m_PPRO | m_K6 | m_386 | m_486 | m_PENT4;
508 const int x86_integer_DFmode_moves = ~(m_ATHLON_K8 | m_PENT4 | m_PPRO);
509 const int x86_partial_reg_dependency = m_ATHLON_K8 | m_PENT4;
510 const int x86_memory_mismatch_stall = m_ATHLON_K8 | m_PENT4;
511 const int x86_accumulate_outgoing_args = m_ATHLON_K8 | m_PENT4 | m_PPRO;
512 const int x86_prologue_using_move = m_ATHLON_K8 | m_PENT4 | m_PPRO;
513 const int x86_epilogue_using_move = m_ATHLON_K8 | m_PENT4 | m_PPRO;
514 const int x86_decompose_lea = m_PENT4;
515 const int x86_shift1 = ~m_486;
516 const int x86_arch_always_fancy_math_387 = m_PENT | m_PPRO | m_ATHLON_K8 | m_PENT4;
517 const int x86_sse_partial_reg_dependency = m_PENT4 | m_PPRO;
518 /* Set for machines where the type and dependencies are resolved on SSE register
519 parts instead of whole registers, so we may maintain just lower part of
520 scalar values in proper format leaving the upper part undefined. */
521 const int x86_sse_partial_regs = m_ATHLON_K8;
522 /* Athlon optimizes partial-register FPS special case, thus avoiding the
523 need for extra instructions beforehand */
524 const int x86_sse_partial_regs_for_cvtsd2ss = 0;
525 const int x86_sse_typeless_stores = m_ATHLON_K8;
526 const int x86_sse_load0_by_pxor = m_PPRO | m_PENT4;
527 const int x86_use_ffreep = m_ATHLON_K8;
528 const int x86_rep_movl_optimal = m_386 | m_PENT | m_PPRO | m_K6;
530 /* ??? HACK! The following is a lie. SSE can hold e.g. SImode, and
531 indeed *must* be able to hold SImode so that SSE2 shifts are able
532 to work right. But this can result in some mighty surprising
533 register allocation when building kernels. Turning this off should
534 make us less likely to all-of-the-sudden select an SSE register. */
535 const int x86_inter_unit_moves = 0; /* ~(m_ATHLON_K8) */
537 const int x86_ext_80387_constants = m_K6 | m_ATHLON | m_PENT4 | m_PPRO;
539 /* In case the average insn count for single function invocation is
540 lower than this constant, emit fast (but longer) prologue and
542 #define FAST_PROLOGUE_INSN_COUNT 20
544 /* Names for 8 (low), 8 (high), and 16-bit registers, respectively. */
545 static const char *const qi_reg_name[] = QI_REGISTER_NAMES;
546 static const char *const qi_high_reg_name[] = QI_HIGH_REGISTER_NAMES;
547 static const char *const hi_reg_name[] = HI_REGISTER_NAMES;
549 /* Array of the smallest class containing reg number REGNO, indexed by
550 REGNO. Used by REGNO_REG_CLASS in i386.h. */
552 enum reg_class const regclass_map[FIRST_PSEUDO_REGISTER] =
555 AREG, DREG, CREG, BREG,
557 SIREG, DIREG, NON_Q_REGS, NON_Q_REGS,
559 FP_TOP_REG, FP_SECOND_REG, FLOAT_REGS, FLOAT_REGS,
560 FLOAT_REGS, FLOAT_REGS, FLOAT_REGS, FLOAT_REGS,
563 /* flags, fpsr, dirflag, frame */
564 NO_REGS, NO_REGS, NO_REGS, NON_Q_REGS,
565 SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS,
567 MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS,
569 NON_Q_REGS, NON_Q_REGS, NON_Q_REGS, NON_Q_REGS,
570 NON_Q_REGS, NON_Q_REGS, NON_Q_REGS, NON_Q_REGS,
571 SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS,
575 /* The "default" register map used in 32bit mode. */
577 int const dbx_register_map[FIRST_PSEUDO_REGISTER] =
579 0, 2, 1, 3, 6, 7, 4, 5, /* general regs */
580 12, 13, 14, 15, 16, 17, 18, 19, /* fp regs */
581 -1, -1, -1, -1, -1, /* arg, flags, fpsr, dir, frame */
582 21, 22, 23, 24, 25, 26, 27, 28, /* SSE */
583 29, 30, 31, 32, 33, 34, 35, 36, /* MMX */
584 -1, -1, -1, -1, -1, -1, -1, -1, /* extended integer registers */
585 -1, -1, -1, -1, -1, -1, -1, -1, /* extended SSE registers */
588 static int const x86_64_int_parameter_registers[6] =
590 5 /*RDI*/, 4 /*RSI*/, 1 /*RDX*/, 2 /*RCX*/,
591 FIRST_REX_INT_REG /*R8 */, FIRST_REX_INT_REG + 1 /*R9 */
594 static int const x86_64_int_return_registers[4] =
596 0 /*RAX*/, 1 /*RDI*/, 5 /*RDI*/, 4 /*RSI*/
599 /* The "default" register map used in 64bit mode. */
600 int const dbx64_register_map[FIRST_PSEUDO_REGISTER] =
602 0, 1, 2, 3, 4, 5, 6, 7, /* general regs */
603 33, 34, 35, 36, 37, 38, 39, 40, /* fp regs */
604 -1, -1, -1, -1, -1, /* arg, flags, fpsr, dir, frame */
605 17, 18, 19, 20, 21, 22, 23, 24, /* SSE */
606 41, 42, 43, 44, 45, 46, 47, 48, /* MMX */
607 8,9,10,11,12,13,14,15, /* extended integer registers */
608 25, 26, 27, 28, 29, 30, 31, 32, /* extended SSE registers */
611 /* Define the register numbers to be used in Dwarf debugging information.
612 The SVR4 reference port C compiler uses the following register numbers
613 in its Dwarf output code:
614 0 for %eax (gcc regno = 0)
615 1 for %ecx (gcc regno = 2)
616 2 for %edx (gcc regno = 1)
617 3 for %ebx (gcc regno = 3)
618 4 for %esp (gcc regno = 7)
619 5 for %ebp (gcc regno = 6)
620 6 for %esi (gcc regno = 4)
621 7 for %edi (gcc regno = 5)
622 The following three DWARF register numbers are never generated by
623 the SVR4 C compiler or by the GNU compilers, but SDB on x86/svr4
624 believes these numbers have these meanings.
625 8 for %eip (no gcc equivalent)
626 9 for %eflags (gcc regno = 17)
627 10 for %trapno (no gcc equivalent)
628 It is not at all clear how we should number the FP stack registers
629 for the x86 architecture. If the version of SDB on x86/svr4 were
630 a bit less brain dead with respect to floating-point then we would
631 have a precedent to follow with respect to DWARF register numbers
632 for x86 FP registers, but the SDB on x86/svr4 is so completely
633 broken with respect to FP registers that it is hardly worth thinking
634 of it as something to strive for compatibility with.
635 The version of x86/svr4 SDB I have at the moment does (partially)
636 seem to believe that DWARF register number 11 is associated with
637 the x86 register %st(0), but that's about all. Higher DWARF
638 register numbers don't seem to be associated with anything in
639 particular, and even for DWARF regno 11, SDB only seems to under-
640 stand that it should say that a variable lives in %st(0) (when
641 asked via an `=' command) if we said it was in DWARF regno 11,
642 but SDB still prints garbage when asked for the value of the
643 variable in question (via a `/' command).
644 (Also note that the labels SDB prints for various FP stack regs
645 when doing an `x' command are all wrong.)
646 Note that these problems generally don't affect the native SVR4
647 C compiler because it doesn't allow the use of -O with -g and
648 because when it is *not* optimizing, it allocates a memory
649 location for each floating-point variable, and the memory
650 location is what gets described in the DWARF AT_location
651 attribute for the variable in question.
652 Regardless of the severe mental illness of the x86/svr4 SDB, we
653 do something sensible here and we use the following DWARF
654 register numbers. Note that these are all stack-top-relative
656 11 for %st(0) (gcc regno = 8)
657 12 for %st(1) (gcc regno = 9)
658 13 for %st(2) (gcc regno = 10)
659 14 for %st(3) (gcc regno = 11)
660 15 for %st(4) (gcc regno = 12)
661 16 for %st(5) (gcc regno = 13)
662 17 for %st(6) (gcc regno = 14)
663 18 for %st(7) (gcc regno = 15)
665 int const svr4_dbx_register_map[FIRST_PSEUDO_REGISTER] =
667 0, 2, 1, 3, 6, 7, 5, 4, /* general regs */
668 11, 12, 13, 14, 15, 16, 17, 18, /* fp regs */
669 -1, 9, -1, -1, -1, /* arg, flags, fpsr, dir, frame */
670 21, 22, 23, 24, 25, 26, 27, 28, /* SSE registers */
671 29, 30, 31, 32, 33, 34, 35, 36, /* MMX registers */
672 -1, -1, -1, -1, -1, -1, -1, -1, /* extended integer registers */
673 -1, -1, -1, -1, -1, -1, -1, -1, /* extended SSE registers */
676 /* Test and compare insns in i386.md store the information needed to
677 generate branch and scc insns here. */
679 rtx ix86_compare_op0 = NULL_RTX;
680 rtx ix86_compare_op1 = NULL_RTX;
682 #define MAX_386_STACK_LOCALS 3
683 /* Size of the register save area. */
684 #define X86_64_VARARGS_SIZE (REGPARM_MAX * UNITS_PER_WORD + SSE_REGPARM_MAX * 16)
686 /* Define the structure for the machine field in struct function. */
688 struct stack_local_entry GTY(())
693 struct stack_local_entry *next;
696 /* Structure describing stack frame layout.
697 Stack grows downward:
703 saved frame pointer if frame_pointer_needed
704 <- HARD_FRAME_POINTER
710 > to_allocate <- FRAME_POINTER
722 int outgoing_arguments_size;
725 HOST_WIDE_INT to_allocate;
726 /* The offsets relative to ARG_POINTER. */
727 HOST_WIDE_INT frame_pointer_offset;
728 HOST_WIDE_INT hard_frame_pointer_offset;
729 HOST_WIDE_INT stack_pointer_offset;
731 /* When save_regs_using_mov is set, emit prologue using
732 move instead of push instructions. */
733 bool save_regs_using_mov;
736 /* Used to enable/disable debugging features. */
737 const char *ix86_debug_arg_string, *ix86_debug_addr_string;
738 /* Code model option as passed by user. */
739 const char *ix86_cmodel_string;
741 enum cmodel ix86_cmodel;
743 const char *ix86_asm_string;
744 enum asm_dialect ix86_asm_dialect = ASM_ATT;
746 const char *ix86_tls_dialect_string;
747 enum tls_dialect ix86_tls_dialect = TLS_DIALECT_GNU;
749 /* Which unit we are generating floating point math for. */
750 enum fpmath_unit ix86_fpmath;
752 /* Which cpu are we scheduling for. */
753 enum processor_type ix86_tune;
754 /* Which instruction set architecture to use. */
755 enum processor_type ix86_arch;
757 /* Strings to hold which cpu and instruction set architecture to use. */
758 const char *ix86_tune_string; /* for -mtune=<xxx> */
759 const char *ix86_arch_string; /* for -march=<xxx> */
760 const char *ix86_fpmath_string; /* for -mfpmath=<xxx> */
762 /* # of registers to use to pass arguments. */
763 const char *ix86_regparm_string;
765 /* true if sse prefetch instruction is not NOOP. */
766 int x86_prefetch_sse;
768 /* ix86_regparm_string as a number */
771 /* Alignment to use for loops and jumps: */
773 /* Power of two alignment for loops. */
774 const char *ix86_align_loops_string;
776 /* Power of two alignment for non-loop jumps. */
777 const char *ix86_align_jumps_string;
779 /* Power of two alignment for stack boundary in bytes. */
780 const char *ix86_preferred_stack_boundary_string;
782 /* Preferred alignment for stack boundary in bits. */
783 int ix86_preferred_stack_boundary;
785 /* Values 1-5: see jump.c */
786 int ix86_branch_cost;
787 const char *ix86_branch_cost_string;
789 /* Power of two alignment for functions. */
790 const char *ix86_align_funcs_string;
792 /* Prefix built by ASM_GENERATE_INTERNAL_LABEL. */
793 static char internal_label_prefix[16];
794 static int internal_label_prefix_len;
796 static int local_symbolic_operand (rtx, enum machine_mode);
797 static int tls_symbolic_operand_1 (rtx, enum tls_model);
798 static void output_pic_addr_const (FILE *, rtx, int);
799 static void put_condition_code (enum rtx_code, enum machine_mode,
801 static const char *get_some_local_dynamic_name (void);
802 static int get_some_local_dynamic_name_1 (rtx *, void *);
803 static rtx maybe_get_pool_constant (rtx);
804 static rtx ix86_expand_int_compare (enum rtx_code, rtx, rtx);
805 static enum rtx_code ix86_prepare_fp_compare_args (enum rtx_code, rtx *,
807 static bool ix86_fixed_condition_code_regs (unsigned int *, unsigned int *);
808 static enum machine_mode ix86_cc_modes_compatible (enum machine_mode,
810 static rtx get_thread_pointer (int);
811 static rtx legitimize_tls_address (rtx, enum tls_model, int);
812 static void get_pc_thunk_name (char [32], unsigned int);
813 static rtx gen_push (rtx);
814 static int memory_address_length (rtx addr);
815 static int ix86_flags_dependant (rtx, rtx, enum attr_type);
816 static int ix86_agi_dependant (rtx, rtx, enum attr_type);
817 static enum attr_ppro_uops ix86_safe_ppro_uops (rtx);
818 static void ix86_dump_ppro_packet (FILE *);
819 static void ix86_reorder_insn (rtx *, rtx *);
820 static struct machine_function * ix86_init_machine_status (void);
821 static int ix86_split_to_parts (rtx, rtx *, enum machine_mode);
822 static int ix86_nsaved_regs (void);
823 static void ix86_emit_save_regs (void);
824 static void ix86_emit_save_regs_using_mov (rtx, HOST_WIDE_INT);
825 static void ix86_emit_restore_regs_using_mov (rtx, HOST_WIDE_INT, int);
826 static void ix86_output_function_epilogue (FILE *, HOST_WIDE_INT);
827 static void ix86_sched_reorder_ppro (rtx *, rtx *);
828 static HOST_WIDE_INT ix86_GOT_alias_set (void);
829 static void ix86_adjust_counter (rtx, HOST_WIDE_INT);
830 static rtx ix86_expand_aligntest (rtx, int);
831 static void ix86_expand_strlensi_unroll_1 (rtx, rtx, rtx);
832 static int ix86_issue_rate (void);
833 static int ix86_adjust_cost (rtx, rtx, rtx, int);
834 static void ix86_sched_init (FILE *, int, int);
835 static int ix86_sched_reorder (FILE *, int, rtx *, int *, int);
836 static int ix86_variable_issue (FILE *, int, rtx, int);
837 static int ia32_use_dfa_pipeline_interface (void);
838 static int ia32_multipass_dfa_lookahead (void);
839 static void ix86_init_mmx_sse_builtins (void);
840 static rtx x86_this_parameter (tree);
841 static void x86_output_mi_thunk (FILE *, tree, HOST_WIDE_INT,
842 HOST_WIDE_INT, tree);
843 static bool x86_can_output_mi_thunk (tree, HOST_WIDE_INT, HOST_WIDE_INT, tree);
844 static void x86_file_start (void);
845 static void ix86_reorg (void);
846 static bool ix86_expand_carry_flag_compare (enum rtx_code, rtx, rtx, rtx*);
847 static tree ix86_build_builtin_va_list (void);
851 rtx base, index, disp;
853 enum ix86_address_seg { SEG_DEFAULT, SEG_FS, SEG_GS } seg;
856 static int ix86_decompose_address (rtx, struct ix86_address *);
857 static int ix86_address_cost (rtx);
858 static bool ix86_cannot_force_const_mem (rtx);
859 static rtx ix86_delegitimize_address (rtx);
861 struct builtin_description;
862 static rtx ix86_expand_sse_comi (const struct builtin_description *,
864 static rtx ix86_expand_sse_compare (const struct builtin_description *,
866 static rtx ix86_expand_unop1_builtin (enum insn_code, tree, rtx);
867 static rtx ix86_expand_unop_builtin (enum insn_code, tree, rtx, int);
868 static rtx ix86_expand_binop_builtin (enum insn_code, tree, rtx);
869 static rtx ix86_expand_store_builtin (enum insn_code, tree);
870 static rtx safe_vector_operand (rtx, enum machine_mode);
871 static enum rtx_code ix86_fp_compare_code_to_integer (enum rtx_code);
872 static void ix86_fp_comparison_codes (enum rtx_code code, enum rtx_code *,
873 enum rtx_code *, enum rtx_code *);
874 static rtx ix86_expand_fp_compare (enum rtx_code, rtx, rtx, rtx, rtx *, rtx *);
875 static int ix86_fp_comparison_arithmetics_cost (enum rtx_code code);
876 static int ix86_fp_comparison_fcomi_cost (enum rtx_code code);
877 static int ix86_fp_comparison_sahf_cost (enum rtx_code code);
878 static int ix86_fp_comparison_cost (enum rtx_code code);
879 static unsigned int ix86_select_alt_pic_regnum (void);
880 static int ix86_save_reg (unsigned int, int);
881 static void ix86_compute_frame_layout (struct ix86_frame *);
882 static int ix86_comp_type_attributes (tree, tree);
883 static int ix86_function_regparm (tree, tree);
884 const struct attribute_spec ix86_attribute_table[];
885 static bool ix86_function_ok_for_sibcall (tree, tree);
886 static tree ix86_handle_cdecl_attribute (tree *, tree, tree, int, bool *);
887 static tree ix86_handle_regparm_attribute (tree *, tree, tree, int, bool *);
888 static int ix86_value_regno (enum machine_mode);
889 static bool contains_128bit_aligned_vector_p (tree);
890 static bool ix86_ms_bitfield_layout_p (tree);
891 static tree ix86_handle_struct_attribute (tree *, tree, tree, int, bool *);
892 static int extended_reg_mentioned_1 (rtx *, void *);
893 static bool ix86_rtx_costs (rtx, int, int, int *);
894 static int min_insn_size (rtx);
895 static void k8_avoid_jump_misspredicts (void);
897 #if defined (DO_GLOBAL_CTORS_BODY) && defined (HAS_INIT_SECTION)
898 static void ix86_svr3_asm_out_constructor (rtx, int);
901 /* Register class used for passing given 64bit part of the argument.
902 These represent classes as documented by the PS ABI, with the exception
903 of SSESF, SSEDF classes, that are basically SSE class, just gcc will
904 use SF or DFmode move instead of DImode to avoid reformatting penalties.
906 Similarly we play games with INTEGERSI_CLASS to use cheaper SImode moves
907 whenever possible (upper half does contain padding).
909 enum x86_64_reg_class
912 X86_64_INTEGER_CLASS,
913 X86_64_INTEGERSI_CLASS,
922 static const char * const x86_64_reg_class_name[] =
923 {"no", "integer", "integerSI", "sse", "sseSF", "sseDF", "sseup", "x87", "x87up", "no"};
925 #define MAX_CLASSES 4
926 static int classify_argument (enum machine_mode, tree,
927 enum x86_64_reg_class [MAX_CLASSES], int);
928 static int examine_argument (enum machine_mode, tree, int, int *, int *);
929 static rtx construct_container (enum machine_mode, tree, int, int, int,
931 static enum x86_64_reg_class merge_classes (enum x86_64_reg_class,
932 enum x86_64_reg_class);
934 /* Table of constants used by fldpi, fldln2, etc.... */
935 static REAL_VALUE_TYPE ext_80387_constants_table [5];
936 static bool ext_80387_constants_init = 0;
937 static void init_ext_80387_constants (void);
939 /* Initialize the GCC target structure. */
940 #undef TARGET_ATTRIBUTE_TABLE
941 #define TARGET_ATTRIBUTE_TABLE ix86_attribute_table
942 #ifdef TARGET_DLLIMPORT_DECL_ATTRIBUTES
943 # undef TARGET_MERGE_DECL_ATTRIBUTES
944 # define TARGET_MERGE_DECL_ATTRIBUTES merge_dllimport_decl_attributes
947 #undef TARGET_COMP_TYPE_ATTRIBUTES
948 #define TARGET_COMP_TYPE_ATTRIBUTES ix86_comp_type_attributes
950 #undef TARGET_INIT_BUILTINS
951 #define TARGET_INIT_BUILTINS ix86_init_builtins
953 #undef TARGET_EXPAND_BUILTIN
954 #define TARGET_EXPAND_BUILTIN ix86_expand_builtin
956 #undef TARGET_ASM_FUNCTION_EPILOGUE
957 #define TARGET_ASM_FUNCTION_EPILOGUE ix86_output_function_epilogue
959 #undef TARGET_ASM_OPEN_PAREN
960 #define TARGET_ASM_OPEN_PAREN ""
961 #undef TARGET_ASM_CLOSE_PAREN
962 #define TARGET_ASM_CLOSE_PAREN ""
964 #undef TARGET_ASM_ALIGNED_HI_OP
965 #define TARGET_ASM_ALIGNED_HI_OP ASM_SHORT
966 #undef TARGET_ASM_ALIGNED_SI_OP
967 #define TARGET_ASM_ALIGNED_SI_OP ASM_LONG
969 #undef TARGET_ASM_ALIGNED_DI_OP
970 #define TARGET_ASM_ALIGNED_DI_OP ASM_QUAD
973 #undef TARGET_ASM_UNALIGNED_HI_OP
974 #define TARGET_ASM_UNALIGNED_HI_OP TARGET_ASM_ALIGNED_HI_OP
975 #undef TARGET_ASM_UNALIGNED_SI_OP
976 #define TARGET_ASM_UNALIGNED_SI_OP TARGET_ASM_ALIGNED_SI_OP
977 #undef TARGET_ASM_UNALIGNED_DI_OP
978 #define TARGET_ASM_UNALIGNED_DI_OP TARGET_ASM_ALIGNED_DI_OP
980 #undef TARGET_SCHED_ADJUST_COST
981 #define TARGET_SCHED_ADJUST_COST ix86_adjust_cost
982 #undef TARGET_SCHED_ISSUE_RATE
983 #define TARGET_SCHED_ISSUE_RATE ix86_issue_rate
984 #undef TARGET_SCHED_VARIABLE_ISSUE
985 #define TARGET_SCHED_VARIABLE_ISSUE ix86_variable_issue
986 #undef TARGET_SCHED_INIT
987 #define TARGET_SCHED_INIT ix86_sched_init
988 #undef TARGET_SCHED_REORDER
989 #define TARGET_SCHED_REORDER ix86_sched_reorder
990 #undef TARGET_SCHED_USE_DFA_PIPELINE_INTERFACE
991 #define TARGET_SCHED_USE_DFA_PIPELINE_INTERFACE \
992 ia32_use_dfa_pipeline_interface
993 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD
994 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD \
995 ia32_multipass_dfa_lookahead
997 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
998 #define TARGET_FUNCTION_OK_FOR_SIBCALL ix86_function_ok_for_sibcall
1001 #undef TARGET_HAVE_TLS
1002 #define TARGET_HAVE_TLS true
1004 #undef TARGET_CANNOT_FORCE_CONST_MEM
1005 #define TARGET_CANNOT_FORCE_CONST_MEM ix86_cannot_force_const_mem
1007 #undef TARGET_DELEGITIMIZE_ADDRESS
1008 #define TARGET_DELEGITIMIZE_ADDRESS ix86_delegitimize_address
1010 #undef TARGET_MS_BITFIELD_LAYOUT_P
1011 #define TARGET_MS_BITFIELD_LAYOUT_P ix86_ms_bitfield_layout_p
1013 #undef TARGET_ASM_OUTPUT_MI_THUNK
1014 #define TARGET_ASM_OUTPUT_MI_THUNK x86_output_mi_thunk
1015 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
1016 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK x86_can_output_mi_thunk
1018 #undef TARGET_ASM_FILE_START
1019 #define TARGET_ASM_FILE_START x86_file_start
1021 #undef TARGET_RTX_COSTS
1022 #define TARGET_RTX_COSTS ix86_rtx_costs
1023 #undef TARGET_ADDRESS_COST
1024 #define TARGET_ADDRESS_COST ix86_address_cost
1026 #undef TARGET_FIXED_CONDITION_CODE_REGS
1027 #define TARGET_FIXED_CONDITION_CODE_REGS ix86_fixed_condition_code_regs
1028 #undef TARGET_CC_MODES_COMPATIBLE
1029 #define TARGET_CC_MODES_COMPATIBLE ix86_cc_modes_compatible
1031 #undef TARGET_MACHINE_DEPENDENT_REORG
1032 #define TARGET_MACHINE_DEPENDENT_REORG ix86_reorg
1034 #undef TARGET_BUILD_BUILTIN_VA_LIST
1035 #define TARGET_BUILD_BUILTIN_VA_LIST ix86_build_builtin_va_list
1037 struct gcc_target targetm = TARGET_INITIALIZER;
1039 /* The svr4 ABI for the i386 says that records and unions are returned
1041 #ifndef DEFAULT_PCC_STRUCT_RETURN
1042 #define DEFAULT_PCC_STRUCT_RETURN 1
1045 /* Sometimes certain combinations of command options do not make
1046 sense on a particular target machine. You can define a macro
1047 `OVERRIDE_OPTIONS' to take account of this. This macro, if
1048 defined, is executed once just after all the command options have
1051 Don't use this macro to turn on various extra optimizations for
1052 `-O'. That is what `OPTIMIZATION_OPTIONS' is for. */
1055 override_options (void)
1058 /* Comes from final.c -- no real reason to change it. */
1059 #define MAX_CODE_ALIGN 16
1063 const struct processor_costs *cost; /* Processor costs */
1064 const int target_enable; /* Target flags to enable. */
1065 const int target_disable; /* Target flags to disable. */
1066 const int align_loop; /* Default alignments. */
1067 const int align_loop_max_skip;
1068 const int align_jump;
1069 const int align_jump_max_skip;
1070 const int align_func;
1072 const processor_target_table[PROCESSOR_max] =
1074 {&i386_cost, 0, 0, 4, 3, 4, 3, 4},
1075 {&i486_cost, 0, 0, 16, 15, 16, 15, 16},
1076 {&pentium_cost, 0, 0, 16, 7, 16, 7, 16},
1077 {&pentiumpro_cost, 0, 0, 16, 15, 16, 7, 16},
1078 {&k6_cost, 0, 0, 32, 7, 32, 7, 32},
1079 {&athlon_cost, 0, 0, 16, 7, 16, 7, 16},
1080 {&pentium4_cost, 0, 0, 0, 0, 0, 0, 0},
1081 {&k8_cost, 0, 0, 16, 7, 16, 7, 16}
1084 static const char * const cpu_names[] = TARGET_CPU_DEFAULT_NAMES;
1087 const char *const name; /* processor name or nickname. */
1088 const enum processor_type processor;
1089 const enum pta_flags
1095 PTA_PREFETCH_SSE = 16,
1101 const processor_alias_table[] =
1103 {"i386", PROCESSOR_I386, 0},
1104 {"i486", PROCESSOR_I486, 0},
1105 {"i586", PROCESSOR_PENTIUM, 0},
1106 {"pentium", PROCESSOR_PENTIUM, 0},
1107 {"pentium-mmx", PROCESSOR_PENTIUM, PTA_MMX},
1108 {"winchip-c6", PROCESSOR_I486, PTA_MMX},
1109 {"winchip2", PROCESSOR_I486, PTA_MMX | PTA_3DNOW},
1110 {"c3", PROCESSOR_I486, PTA_MMX | PTA_3DNOW},
1111 {"c3-2", PROCESSOR_PENTIUMPRO, PTA_MMX | PTA_PREFETCH_SSE | PTA_SSE},
1112 {"i686", PROCESSOR_PENTIUMPRO, 0},
1113 {"pentiumpro", PROCESSOR_PENTIUMPRO, 0},
1114 {"pentium2", PROCESSOR_PENTIUMPRO, PTA_MMX},
1115 {"pentium3", PROCESSOR_PENTIUMPRO, PTA_MMX | PTA_SSE | PTA_PREFETCH_SSE},
1116 {"pentium3m", PROCESSOR_PENTIUMPRO, PTA_MMX | PTA_SSE | PTA_PREFETCH_SSE},
1117 {"pentium-m", PROCESSOR_PENTIUMPRO, PTA_MMX | PTA_SSE | PTA_PREFETCH_SSE | PTA_SSE2},
1118 {"pentium4", PROCESSOR_PENTIUM4, PTA_SSE | PTA_SSE2
1119 | PTA_MMX | PTA_PREFETCH_SSE},
1120 {"pentium4m", PROCESSOR_PENTIUM4, PTA_SSE | PTA_SSE2
1121 | PTA_MMX | PTA_PREFETCH_SSE},
1122 {"prescott", PROCESSOR_PENTIUM4, PTA_SSE | PTA_SSE2 | PTA_SSE3
1123 | PTA_MMX | PTA_PREFETCH_SSE},
1124 {"nocona", PROCESSOR_PENTIUM4, PTA_SSE | PTA_SSE2 | PTA_SSE3 | PTA_64BIT
1125 | PTA_MMX | PTA_PREFETCH_SSE},
1126 {"k6", PROCESSOR_K6, PTA_MMX},
1127 {"k6-2", PROCESSOR_K6, PTA_MMX | PTA_3DNOW},
1128 {"k6-3", PROCESSOR_K6, PTA_MMX | PTA_3DNOW},
1129 {"athlon", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW
1131 {"athlon-tbird", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE
1132 | PTA_3DNOW | PTA_3DNOW_A},
1133 {"athlon-4", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW
1134 | PTA_3DNOW_A | PTA_SSE},
1135 {"athlon-xp", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW
1136 | PTA_3DNOW_A | PTA_SSE},
1137 {"athlon-mp", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW
1138 | PTA_3DNOW_A | PTA_SSE},
1139 {"x86-64", PROCESSOR_K8, PTA_MMX | PTA_PREFETCH_SSE | PTA_64BIT
1140 | PTA_SSE | PTA_SSE2 },
1141 {"k8", PROCESSOR_K8, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW | PTA_64BIT
1142 | PTA_3DNOW_A | PTA_SSE | PTA_SSE2},
1143 {"opteron", PROCESSOR_K8, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW | PTA_64BIT
1144 | PTA_3DNOW_A | PTA_SSE | PTA_SSE2},
1145 {"athlon64", PROCESSOR_K8, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW | PTA_64BIT
1146 | PTA_3DNOW_A | PTA_SSE | PTA_SSE2},
1147 {"athlon-fx", PROCESSOR_K8, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW | PTA_64BIT
1148 | PTA_3DNOW_A | PTA_SSE | PTA_SSE2},
1151 int const pta_size = ARRAY_SIZE (processor_alias_table);
1153 /* Set the default values for switches whose default depends on TARGET_64BIT
1154 in case they weren't overwritten by command line options. */
1157 if (flag_omit_frame_pointer == 2)
1158 flag_omit_frame_pointer = 1;
1159 if (flag_asynchronous_unwind_tables == 2)
1160 flag_asynchronous_unwind_tables = 1;
1161 if (flag_pcc_struct_return == 2)
1162 flag_pcc_struct_return = 0;
1166 if (flag_omit_frame_pointer == 2)
1167 flag_omit_frame_pointer = 0;
1168 if (flag_asynchronous_unwind_tables == 2)
1169 flag_asynchronous_unwind_tables = 0;
1170 if (flag_pcc_struct_return == 2)
1171 flag_pcc_struct_return = DEFAULT_PCC_STRUCT_RETURN;
1174 #ifdef SUBTARGET_OVERRIDE_OPTIONS
1175 SUBTARGET_OVERRIDE_OPTIONS;
1178 if (!ix86_tune_string && ix86_arch_string)
1179 ix86_tune_string = ix86_arch_string;
1180 if (!ix86_tune_string)
1181 ix86_tune_string = cpu_names [TARGET_CPU_DEFAULT];
1182 if (!ix86_arch_string)
1183 ix86_arch_string = TARGET_64BIT ? "x86-64" : "i386";
1185 if (ix86_cmodel_string != 0)
1187 if (!strcmp (ix86_cmodel_string, "small"))
1188 ix86_cmodel = flag_pic ? CM_SMALL_PIC : CM_SMALL;
1190 sorry ("code model %s not supported in PIC mode", ix86_cmodel_string);
1191 else if (!strcmp (ix86_cmodel_string, "32"))
1192 ix86_cmodel = CM_32;
1193 else if (!strcmp (ix86_cmodel_string, "kernel") && !flag_pic)
1194 ix86_cmodel = CM_KERNEL;
1195 else if (!strcmp (ix86_cmodel_string, "medium") && !flag_pic)
1196 ix86_cmodel = CM_MEDIUM;
1197 else if (!strcmp (ix86_cmodel_string, "large") && !flag_pic)
1198 ix86_cmodel = CM_LARGE;
1200 error ("bad value (%s) for -mcmodel= switch", ix86_cmodel_string);
1204 ix86_cmodel = CM_32;
1206 ix86_cmodel = flag_pic ? CM_SMALL_PIC : CM_SMALL;
1208 if (ix86_asm_string != 0)
1210 if (!strcmp (ix86_asm_string, "intel"))
1211 ix86_asm_dialect = ASM_INTEL;
1212 else if (!strcmp (ix86_asm_string, "att"))
1213 ix86_asm_dialect = ASM_ATT;
1215 error ("bad value (%s) for -masm= switch", ix86_asm_string);
1217 if ((TARGET_64BIT == 0) != (ix86_cmodel == CM_32))
1218 error ("code model `%s' not supported in the %s bit mode",
1219 ix86_cmodel_string, TARGET_64BIT ? "64" : "32");
1220 if (ix86_cmodel == CM_LARGE)
1221 sorry ("code model `large' not supported yet");
1222 if ((TARGET_64BIT != 0) != ((target_flags & MASK_64BIT) != 0))
1223 sorry ("%i-bit mode not compiled in",
1224 (target_flags & MASK_64BIT) ? 64 : 32);
1226 for (i = 0; i < pta_size; i++)
1227 if (! strcmp (ix86_arch_string, processor_alias_table[i].name))
1229 ix86_arch = processor_alias_table[i].processor;
1230 /* Default cpu tuning to the architecture. */
1231 ix86_tune = ix86_arch;
1232 if (processor_alias_table[i].flags & PTA_MMX
1233 && !(target_flags_explicit & MASK_MMX))
1234 target_flags |= MASK_MMX;
1235 if (processor_alias_table[i].flags & PTA_3DNOW
1236 && !(target_flags_explicit & MASK_3DNOW))
1237 target_flags |= MASK_3DNOW;
1238 if (processor_alias_table[i].flags & PTA_3DNOW_A
1239 && !(target_flags_explicit & MASK_3DNOW_A))
1240 target_flags |= MASK_3DNOW_A;
1241 if (processor_alias_table[i].flags & PTA_SSE
1242 && !(target_flags_explicit & MASK_SSE))
1243 target_flags |= MASK_SSE;
1244 if (processor_alias_table[i].flags & PTA_SSE2
1245 && !(target_flags_explicit & MASK_SSE2))
1246 target_flags |= MASK_SSE2;
1247 if (processor_alias_table[i].flags & PTA_SSE3
1248 && !(target_flags_explicit & MASK_SSE3))
1249 target_flags |= MASK_SSE3;
1250 if (processor_alias_table[i].flags & PTA_PREFETCH_SSE)
1251 x86_prefetch_sse = true;
1252 if (TARGET_64BIT && !(processor_alias_table[i].flags & PTA_64BIT))
1253 error ("CPU you selected does not support x86-64 instruction set");
1258 error ("bad value (%s) for -march= switch", ix86_arch_string);
1260 for (i = 0; i < pta_size; i++)
1261 if (! strcmp (ix86_tune_string, processor_alias_table[i].name))
1263 ix86_tune = processor_alias_table[i].processor;
1264 if (TARGET_64BIT && !(processor_alias_table[i].flags & PTA_64BIT))
1265 error ("CPU you selected does not support x86-64 instruction set");
1267 /* Intel CPUs have always interpreted SSE prefetch instructions as
1268 NOPs; so, we can enable SSE prefetch instructions even when
1269 -mtune (rather than -march) points us to a processor that has them.
1270 However, the VIA C3 gives a SIGILL, so we only do that for i686 and
1271 higher processors. */
1272 if (TARGET_CMOVE && (processor_alias_table[i].flags & PTA_PREFETCH_SSE))
1273 x86_prefetch_sse = true;
1277 error ("bad value (%s) for -mtune= switch", ix86_tune_string);
1280 ix86_cost = &size_cost;
1282 ix86_cost = processor_target_table[ix86_tune].cost;
1283 target_flags |= processor_target_table[ix86_tune].target_enable;
1284 target_flags &= ~processor_target_table[ix86_tune].target_disable;
1286 /* Arrange to set up i386_stack_locals for all functions. */
1287 init_machine_status = ix86_init_machine_status;
1289 /* Validate -mregparm= value. */
1290 if (ix86_regparm_string)
1292 i = atoi (ix86_regparm_string);
1293 if (i < 0 || i > REGPARM_MAX)
1294 error ("-mregparm=%d is not between 0 and %d", i, REGPARM_MAX);
1300 ix86_regparm = REGPARM_MAX;
1302 /* If the user has provided any of the -malign-* options,
1303 warn and use that value only if -falign-* is not set.
1304 Remove this code in GCC 3.2 or later. */
1305 if (ix86_align_loops_string)
1307 warning ("-malign-loops is obsolete, use -falign-loops");
1308 if (align_loops == 0)
1310 i = atoi (ix86_align_loops_string);
1311 if (i < 0 || i > MAX_CODE_ALIGN)
1312 error ("-malign-loops=%d is not between 0 and %d", i, MAX_CODE_ALIGN);
1314 align_loops = 1 << i;
1318 if (ix86_align_jumps_string)
1320 warning ("-malign-jumps is obsolete, use -falign-jumps");
1321 if (align_jumps == 0)
1323 i = atoi (ix86_align_jumps_string);
1324 if (i < 0 || i > MAX_CODE_ALIGN)
1325 error ("-malign-loops=%d is not between 0 and %d", i, MAX_CODE_ALIGN);
1327 align_jumps = 1 << i;
1331 if (ix86_align_funcs_string)
1333 warning ("-malign-functions is obsolete, use -falign-functions");
1334 if (align_functions == 0)
1336 i = atoi (ix86_align_funcs_string);
1337 if (i < 0 || i > MAX_CODE_ALIGN)
1338 error ("-malign-loops=%d is not between 0 and %d", i, MAX_CODE_ALIGN);
1340 align_functions = 1 << i;
1344 /* Default align_* from the processor table. */
1345 if (align_loops == 0)
1347 align_loops = processor_target_table[ix86_tune].align_loop;
1348 align_loops_max_skip = processor_target_table[ix86_tune].align_loop_max_skip;
1350 if (align_jumps == 0)
1352 align_jumps = processor_target_table[ix86_tune].align_jump;
1353 align_jumps_max_skip = processor_target_table[ix86_tune].align_jump_max_skip;
1355 if (align_functions == 0)
1357 align_functions = processor_target_table[ix86_tune].align_func;
1360 /* Validate -mpreferred-stack-boundary= value, or provide default.
1361 The default of 128 bits is for Pentium III's SSE __m128, but we
1362 don't want additional code to keep the stack aligned when
1363 optimizing for code size. */
1364 ix86_preferred_stack_boundary = (optimize_size
1365 ? TARGET_64BIT ? 128 : 32
1367 if (ix86_preferred_stack_boundary_string)
1369 i = atoi (ix86_preferred_stack_boundary_string);
1370 if (i < (TARGET_64BIT ? 4 : 2) || i > 12)
1371 error ("-mpreferred-stack-boundary=%d is not between %d and 12", i,
1372 TARGET_64BIT ? 4 : 2);
1374 ix86_preferred_stack_boundary = (1 << i) * BITS_PER_UNIT;
1377 /* Validate -mbranch-cost= value, or provide default. */
1378 ix86_branch_cost = processor_target_table[ix86_tune].cost->branch_cost;
1379 if (ix86_branch_cost_string)
1381 i = atoi (ix86_branch_cost_string);
1383 error ("-mbranch-cost=%d is not between 0 and 5", i);
1385 ix86_branch_cost = i;
1388 if (ix86_tls_dialect_string)
1390 if (strcmp (ix86_tls_dialect_string, "gnu") == 0)
1391 ix86_tls_dialect = TLS_DIALECT_GNU;
1392 else if (strcmp (ix86_tls_dialect_string, "sun") == 0)
1393 ix86_tls_dialect = TLS_DIALECT_SUN;
1395 error ("bad value (%s) for -mtls-dialect= switch",
1396 ix86_tls_dialect_string);
1399 /* Keep nonleaf frame pointers. */
1400 if (TARGET_OMIT_LEAF_FRAME_POINTER)
1401 flag_omit_frame_pointer = 1;
1403 /* If we're doing fast math, we don't care about comparison order
1404 wrt NaNs. This lets us use a shorter comparison sequence. */
1405 if (flag_unsafe_math_optimizations)
1406 target_flags &= ~MASK_IEEE_FP;
1408 /* If the architecture always has an FPU, turn off NO_FANCY_MATH_387,
1409 since the insns won't need emulation. */
1410 if (x86_arch_always_fancy_math_387 & (1 << ix86_arch))
1411 target_flags &= ~MASK_NO_FANCY_MATH_387;
1413 /* Turn on SSE2 builtins for -msse3. */
1415 target_flags |= MASK_SSE2;
1417 /* Turn on SSE builtins for -msse2. */
1419 target_flags |= MASK_SSE;
1423 if (TARGET_ALIGN_DOUBLE)
1424 error ("-malign-double makes no sense in the 64bit mode");
1426 error ("-mrtd calling convention not supported in the 64bit mode");
1427 /* Enable by default the SSE and MMX builtins. */
1428 target_flags |= (MASK_SSE2 | MASK_SSE | MASK_MMX | MASK_128BIT_LONG_DOUBLE);
1429 ix86_fpmath = FPMATH_SSE;
1433 ix86_fpmath = FPMATH_387;
1434 /* i386 ABI does not specify red zone. It still makes sense to use it
1435 when programmer takes care to stack from being destroyed. */
1436 if (!(target_flags_explicit & MASK_NO_RED_ZONE))
1437 target_flags |= MASK_NO_RED_ZONE;
1440 if (ix86_fpmath_string != 0)
1442 if (! strcmp (ix86_fpmath_string, "387"))
1443 ix86_fpmath = FPMATH_387;
1444 else if (! strcmp (ix86_fpmath_string, "sse"))
1448 warning ("SSE instruction set disabled, using 387 arithmetics");
1449 ix86_fpmath = FPMATH_387;
1452 ix86_fpmath = FPMATH_SSE;
1454 else if (! strcmp (ix86_fpmath_string, "387,sse")
1455 || ! strcmp (ix86_fpmath_string, "sse,387"))
1459 warning ("SSE instruction set disabled, using 387 arithmetics");
1460 ix86_fpmath = FPMATH_387;
1462 else if (!TARGET_80387)
1464 warning ("387 instruction set disabled, using SSE arithmetics");
1465 ix86_fpmath = FPMATH_SSE;
1468 ix86_fpmath = FPMATH_SSE | FPMATH_387;
1471 error ("bad value (%s) for -mfpmath= switch", ix86_fpmath_string);
1474 /* It makes no sense to ask for just SSE builtins, so MMX is also turned
1478 target_flags |= MASK_MMX;
1479 x86_prefetch_sse = true;
1482 /* If it has 3DNow! it also has MMX so MMX is also turned on by -m3dnow */
1485 target_flags |= MASK_MMX;
1486 /* If we are targeting the Athlon architecture, enable the 3Dnow/MMX
1487 extensions it adds. */
1488 if (x86_3dnow_a & (1 << ix86_arch))
1489 target_flags |= MASK_3DNOW_A;
1491 if ((x86_accumulate_outgoing_args & TUNEMASK)
1492 && !(target_flags_explicit & MASK_ACCUMULATE_OUTGOING_ARGS)
1494 target_flags |= MASK_ACCUMULATE_OUTGOING_ARGS;
1496 /* Figure out what ASM_GENERATE_INTERNAL_LABEL builds as a prefix. */
1499 ASM_GENERATE_INTERNAL_LABEL (internal_label_prefix, "LX", 0);
1500 p = strchr (internal_label_prefix, 'X');
1501 internal_label_prefix_len = p - internal_label_prefix;
1507 optimization_options (int level, int size ATTRIBUTE_UNUSED)
1509 /* For -O2 and beyond, turn off -fschedule-insns by default. It tends to
1510 make the problem with not enough registers even worse. */
1511 #ifdef INSN_SCHEDULING
1513 flag_schedule_insns = 0;
1516 /* The default values of these switches depend on the TARGET_64BIT
1517 that is not known at this moment. Mark these values with 2 and
1518 let user the to override these. In case there is no command line option
1519 specifying them, we will set the defaults in override_options. */
1521 flag_omit_frame_pointer = 2;
1522 flag_pcc_struct_return = 2;
1523 flag_asynchronous_unwind_tables = 2;
1526 /* Table of valid machine attributes. */
1527 const struct attribute_spec ix86_attribute_table[] =
1529 /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler } */
1530 /* Stdcall attribute says callee is responsible for popping arguments
1531 if they are not variable. */
1532 { "stdcall", 0, 0, false, true, true, ix86_handle_cdecl_attribute },
1533 /* Fastcall attribute says callee is responsible for popping arguments
1534 if they are not variable. */
1535 { "fastcall", 0, 0, false, true, true, ix86_handle_cdecl_attribute },
1536 /* Cdecl attribute says the callee is a normal C declaration */
1537 { "cdecl", 0, 0, false, true, true, ix86_handle_cdecl_attribute },
1538 /* Regparm attribute specifies how many integer arguments are to be
1539 passed in registers. */
1540 { "regparm", 1, 1, false, true, true, ix86_handle_regparm_attribute },
1541 #ifdef TARGET_DLLIMPORT_DECL_ATTRIBUTES
1542 { "dllimport", 0, 0, false, false, false, ix86_handle_dll_attribute },
1543 { "dllexport", 0, 0, false, false, false, ix86_handle_dll_attribute },
1544 { "shared", 0, 0, true, false, false, ix86_handle_shared_attribute },
1546 { "ms_struct", 0, 0, false, false, false, ix86_handle_struct_attribute },
1547 { "gcc_struct", 0, 0, false, false, false, ix86_handle_struct_attribute },
1548 { NULL, 0, 0, false, false, false, NULL }
1551 /* Decide whether we can make a sibling call to a function. DECL is the
1552 declaration of the function being targeted by the call and EXP is the
1553 CALL_EXPR representing the call. */
1556 ix86_function_ok_for_sibcall (tree decl, tree exp)
1558 /* If we are generating position-independent code, we cannot sibcall
1559 optimize any indirect call, or a direct call to a global function,
1560 as the PLT requires %ebx be live. */
1561 if (!TARGET_64BIT && flag_pic && (!decl || TREE_PUBLIC (decl)))
1564 /* If we are returning floats on the 80387 register stack, we cannot
1565 make a sibcall from a function that doesn't return a float to a
1566 function that does or, conversely, from a function that does return
1567 a float to a function that doesn't; the necessary stack adjustment
1568 would not be executed. */
1569 if (STACK_REG_P (ix86_function_value (TREE_TYPE (exp)))
1570 != STACK_REG_P (ix86_function_value (TREE_TYPE (DECL_RESULT (cfun->decl)))))
1573 /* If this call is indirect, we'll need to be able to use a call-clobbered
1574 register for the address of the target function. Make sure that all
1575 such registers are not used for passing parameters. */
1576 if (!decl && !TARGET_64BIT)
1580 /* We're looking at the CALL_EXPR, we need the type of the function. */
1581 type = TREE_OPERAND (exp, 0); /* pointer expression */
1582 type = TREE_TYPE (type); /* pointer type */
1583 type = TREE_TYPE (type); /* function type */
1585 if (ix86_function_regparm (type, NULL) >= 3)
1587 /* ??? Need to count the actual number of registers to be used,
1588 not the possible number of registers. Fix later. */
1593 /* Otherwise okay. That also includes certain types of indirect calls. */
1597 /* Handle a "cdecl", "stdcall", or "fastcall" attribute;
1598 arguments as in struct attribute_spec.handler. */
1600 ix86_handle_cdecl_attribute (tree *node, tree name,
1601 tree args ATTRIBUTE_UNUSED,
1602 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
1604 if (TREE_CODE (*node) != FUNCTION_TYPE
1605 && TREE_CODE (*node) != METHOD_TYPE
1606 && TREE_CODE (*node) != FIELD_DECL
1607 && TREE_CODE (*node) != TYPE_DECL)
1609 warning ("`%s' attribute only applies to functions",
1610 IDENTIFIER_POINTER (name));
1611 *no_add_attrs = true;
1615 if (is_attribute_p ("fastcall", name))
1617 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (*node)))
1619 error ("fastcall and stdcall attributes are not compatible");
1621 else if (lookup_attribute ("regparm", TYPE_ATTRIBUTES (*node)))
1623 error ("fastcall and regparm attributes are not compatible");
1626 else if (is_attribute_p ("stdcall", name))
1628 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
1630 error ("fastcall and stdcall attributes are not compatible");
1637 warning ("`%s' attribute ignored", IDENTIFIER_POINTER (name));
1638 *no_add_attrs = true;
1644 /* Handle a "regparm" attribute;
1645 arguments as in struct attribute_spec.handler. */
1647 ix86_handle_regparm_attribute (tree *node, tree name, tree args,
1648 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
1650 if (TREE_CODE (*node) != FUNCTION_TYPE
1651 && TREE_CODE (*node) != METHOD_TYPE
1652 && TREE_CODE (*node) != FIELD_DECL
1653 && TREE_CODE (*node) != TYPE_DECL)
1655 warning ("`%s' attribute only applies to functions",
1656 IDENTIFIER_POINTER (name));
1657 *no_add_attrs = true;
1663 cst = TREE_VALUE (args);
1664 if (TREE_CODE (cst) != INTEGER_CST)
1666 warning ("`%s' attribute requires an integer constant argument",
1667 IDENTIFIER_POINTER (name));
1668 *no_add_attrs = true;
1670 else if (compare_tree_int (cst, REGPARM_MAX) > 0)
1672 warning ("argument to `%s' attribute larger than %d",
1673 IDENTIFIER_POINTER (name), REGPARM_MAX);
1674 *no_add_attrs = true;
1677 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
1679 error ("fastcall and regparm attributes are not compatible");
1686 /* Return 0 if the attributes for two types are incompatible, 1 if they
1687 are compatible, and 2 if they are nearly compatible (which causes a
1688 warning to be generated). */
1691 ix86_comp_type_attributes (tree type1, tree type2)
1693 /* Check for mismatch of non-default calling convention. */
1694 const char *const rtdstr = TARGET_RTD ? "cdecl" : "stdcall";
1696 if (TREE_CODE (type1) != FUNCTION_TYPE)
1699 /* Check for mismatched fastcall types */
1700 if (!lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type1))
1701 != !lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type2)))
1704 /* Check for mismatched return types (cdecl vs stdcall). */
1705 if (!lookup_attribute (rtdstr, TYPE_ATTRIBUTES (type1))
1706 != !lookup_attribute (rtdstr, TYPE_ATTRIBUTES (type2)))
1708 if (ix86_function_regparm (type1, NULL)
1709 != ix86_function_regparm (type2, NULL))
1714 /* Return the regparm value for a fuctio with the indicated TYPE and DECL.
1715 DECL may be NULL when calling function indirectly
1716 or considering a libcall. */
1719 ix86_function_regparm (tree type, tree decl)
1722 int regparm = ix86_regparm;
1723 bool user_convention = false;
1727 attr = lookup_attribute ("regparm", TYPE_ATTRIBUTES (type));
1730 regparm = TREE_INT_CST_LOW (TREE_VALUE (TREE_VALUE (attr)));
1731 user_convention = true;
1734 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type)))
1737 user_convention = true;
1740 /* Use register calling convention for local functions when possible. */
1741 if (!TARGET_64BIT && !user_convention && decl
1742 && flag_unit_at_a_time && !profile_flag)
1744 struct cgraph_local_info *i = cgraph_local_info (decl);
1747 /* We can't use regparm(3) for nested functions as these use
1748 static chain pointer in third argument. */
1749 if (DECL_CONTEXT (decl) && !DECL_NO_STATIC_CHAIN (decl))
1759 /* Return true if EAX is live at the start of the function. Used by
1760 ix86_expand_prologue to determine if we need special help before
1761 calling allocate_stack_worker. */
1764 ix86_eax_live_at_start_p (void)
1766 /* Cheat. Don't bother working forward from ix86_function_regparm
1767 to the function type to whether an actual argument is located in
1768 eax. Instead just look at cfg info, which is still close enough
1769 to correct at this point. This gives false positives for broken
1770 functions that might use uninitialized data that happens to be
1771 allocated in eax, but who cares? */
1772 return REGNO_REG_SET_P (ENTRY_BLOCK_PTR->global_live_at_end, 0);
1775 /* Value is the number of bytes of arguments automatically
1776 popped when returning from a subroutine call.
1777 FUNDECL is the declaration node of the function (as a tree),
1778 FUNTYPE is the data type of the function (as a tree),
1779 or for a library call it is an identifier node for the subroutine name.
1780 SIZE is the number of bytes of arguments passed on the stack.
1782 On the 80386, the RTD insn may be used to pop them if the number
1783 of args is fixed, but if the number is variable then the caller
1784 must pop them all. RTD can't be used for library calls now
1785 because the library is compiled with the Unix compiler.
1786 Use of RTD is a selectable option, since it is incompatible with
1787 standard Unix calling sequences. If the option is not selected,
1788 the caller must always pop the args.
1790 The attribute stdcall is equivalent to RTD on a per module basis. */
1793 ix86_return_pops_args (tree fundecl, tree funtype, int size)
1795 int rtd = TARGET_RTD && (!fundecl || TREE_CODE (fundecl) != IDENTIFIER_NODE);
1797 /* Cdecl functions override -mrtd, and never pop the stack. */
1798 if (! lookup_attribute ("cdecl", TYPE_ATTRIBUTES (funtype))) {
1800 /* Stdcall and fastcall functions will pop the stack if not
1802 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (funtype))
1803 || lookup_attribute ("fastcall", TYPE_ATTRIBUTES (funtype)))
1807 && (TYPE_ARG_TYPES (funtype) == NULL_TREE
1808 || (TREE_VALUE (tree_last (TYPE_ARG_TYPES (funtype)))
1809 == void_type_node)))
1813 /* Lose any fake structure return argument if it is passed on the stack. */
1814 if (aggregate_value_p (TREE_TYPE (funtype), fundecl)
1817 int nregs = ix86_function_regparm (funtype, fundecl);
1820 return GET_MODE_SIZE (Pmode);
1826 /* Argument support functions. */
1828 /* Return true when register may be used to pass function parameters. */
1830 ix86_function_arg_regno_p (int regno)
1834 return (regno < REGPARM_MAX
1835 || (TARGET_SSE && SSE_REGNO_P (regno) && !fixed_regs[regno]));
1836 if (SSE_REGNO_P (regno) && TARGET_SSE)
1838 /* RAX is used as hidden argument to va_arg functions. */
1841 for (i = 0; i < REGPARM_MAX; i++)
1842 if (regno == x86_64_int_parameter_registers[i])
1847 /* Initialize a variable CUM of type CUMULATIVE_ARGS
1848 for a call to a function whose data type is FNTYPE.
1849 For a library call, FNTYPE is 0. */
1852 init_cumulative_args (CUMULATIVE_ARGS *cum, /* Argument info to initialize */
1853 tree fntype, /* tree ptr for function decl */
1854 rtx libname, /* SYMBOL_REF of library name or 0 */
1857 static CUMULATIVE_ARGS zero_cum;
1858 tree param, next_param;
1860 if (TARGET_DEBUG_ARG)
1862 fprintf (stderr, "\ninit_cumulative_args (");
1864 fprintf (stderr, "fntype code = %s, ret code = %s",
1865 tree_code_name[(int) TREE_CODE (fntype)],
1866 tree_code_name[(int) TREE_CODE (TREE_TYPE (fntype))]);
1868 fprintf (stderr, "no fntype");
1871 fprintf (stderr, ", libname = %s", XSTR (libname, 0));
1876 /* Set up the number of registers to use for passing arguments. */
1878 cum->nregs = ix86_function_regparm (fntype, fndecl);
1880 cum->nregs = ix86_regparm;
1881 cum->sse_nregs = SSE_REGPARM_MAX;
1882 cum->mmx_nregs = MMX_REGPARM_MAX;
1883 cum->warn_sse = true;
1884 cum->warn_mmx = true;
1885 cum->maybe_vaarg = false;
1887 /* Use ecx and edx registers if function has fastcall attribute */
1888 if (fntype && !TARGET_64BIT)
1890 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (fntype)))
1898 /* Determine if this function has variable arguments. This is
1899 indicated by the last argument being 'void_type_mode' if there
1900 are no variable arguments. If there are variable arguments, then
1901 we won't pass anything in registers */
1903 if (cum->nregs || !TARGET_MMX || !TARGET_SSE)
1905 for (param = (fntype) ? TYPE_ARG_TYPES (fntype) : 0;
1906 param != 0; param = next_param)
1908 next_param = TREE_CHAIN (param);
1909 if (next_param == 0 && TREE_VALUE (param) != void_type_node)
1920 cum->maybe_vaarg = true;
1924 if ((!fntype && !libname)
1925 || (fntype && !TYPE_ARG_TYPES (fntype)))
1926 cum->maybe_vaarg = 1;
1928 if (TARGET_DEBUG_ARG)
1929 fprintf (stderr, ", nregs=%d )\n", cum->nregs);
1934 /* x86-64 register passing implementation. See x86-64 ABI for details. Goal
1935 of this code is to classify each 8bytes of incoming argument by the register
1936 class and assign registers accordingly. */
1938 /* Return the union class of CLASS1 and CLASS2.
1939 See the x86-64 PS ABI for details. */
1941 static enum x86_64_reg_class
1942 merge_classes (enum x86_64_reg_class class1, enum x86_64_reg_class class2)
1944 /* Rule #1: If both classes are equal, this is the resulting class. */
1945 if (class1 == class2)
1948 /* Rule #2: If one of the classes is NO_CLASS, the resulting class is
1950 if (class1 == X86_64_NO_CLASS)
1952 if (class2 == X86_64_NO_CLASS)
1955 /* Rule #3: If one of the classes is MEMORY, the result is MEMORY. */
1956 if (class1 == X86_64_MEMORY_CLASS || class2 == X86_64_MEMORY_CLASS)
1957 return X86_64_MEMORY_CLASS;
1959 /* Rule #4: If one of the classes is INTEGER, the result is INTEGER. */
1960 if ((class1 == X86_64_INTEGERSI_CLASS && class2 == X86_64_SSESF_CLASS)
1961 || (class2 == X86_64_INTEGERSI_CLASS && class1 == X86_64_SSESF_CLASS))
1962 return X86_64_INTEGERSI_CLASS;
1963 if (class1 == X86_64_INTEGER_CLASS || class1 == X86_64_INTEGERSI_CLASS
1964 || class2 == X86_64_INTEGER_CLASS || class2 == X86_64_INTEGERSI_CLASS)
1965 return X86_64_INTEGER_CLASS;
1967 /* Rule #5: If one of the classes is X87 or X87UP class, MEMORY is used. */
1968 if (class1 == X86_64_X87_CLASS || class1 == X86_64_X87UP_CLASS
1969 || class2 == X86_64_X87_CLASS || class2 == X86_64_X87UP_CLASS)
1970 return X86_64_MEMORY_CLASS;
1972 /* Rule #6: Otherwise class SSE is used. */
1973 return X86_64_SSE_CLASS;
1976 /* Classify the argument of type TYPE and mode MODE.
1977 CLASSES will be filled by the register class used to pass each word
1978 of the operand. The number of words is returned. In case the parameter
1979 should be passed in memory, 0 is returned. As a special case for zero
1980 sized containers, classes[0] will be NO_CLASS and 1 is returned.
1982 BIT_OFFSET is used internally for handling records and specifies offset
1983 of the offset in bits modulo 256 to avoid overflow cases.
1985 See the x86-64 PS ABI for details.
1989 classify_argument (enum machine_mode mode, tree type,
1990 enum x86_64_reg_class classes[MAX_CLASSES], int bit_offset)
1992 HOST_WIDE_INT bytes =
1993 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
1994 int words = (bytes + (bit_offset % 64) / 8 + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
1996 /* Variable sized entities are always passed/returned in memory. */
2000 if (mode != VOIDmode
2001 && MUST_PASS_IN_STACK (mode, type))
2004 if (type && AGGREGATE_TYPE_P (type))
2008 enum x86_64_reg_class subclasses[MAX_CLASSES];
2010 /* On x86-64 we pass structures larger than 16 bytes on the stack. */
2014 for (i = 0; i < words; i++)
2015 classes[i] = X86_64_NO_CLASS;
2017 /* Zero sized arrays or structures are NO_CLASS. We return 0 to
2018 signalize memory class, so handle it as special case. */
2021 classes[0] = X86_64_NO_CLASS;
2025 /* Classify each field of record and merge classes. */
2026 if (TREE_CODE (type) == RECORD_TYPE)
2028 /* For classes first merge in the field of the subclasses. */
2029 if (TYPE_BINFO (type) != NULL && TYPE_BINFO_BASETYPES (type) != NULL)
2031 tree bases = TYPE_BINFO_BASETYPES (type);
2032 int n_bases = TREE_VEC_LENGTH (bases);
2035 for (i = 0; i < n_bases; ++i)
2037 tree binfo = TREE_VEC_ELT (bases, i);
2039 int offset = tree_low_cst (BINFO_OFFSET (binfo), 0) * 8;
2040 tree type = BINFO_TYPE (binfo);
2042 num = classify_argument (TYPE_MODE (type),
2044 (offset + bit_offset) % 256);
2047 for (i = 0; i < num; i++)
2049 int pos = (offset + (bit_offset % 64)) / 8 / 8;
2051 merge_classes (subclasses[i], classes[i + pos]);
2055 /* And now merge the fields of structure. */
2056 for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
2058 if (TREE_CODE (field) == FIELD_DECL)
2062 /* Bitfields are always classified as integer. Handle them
2063 early, since later code would consider them to be
2064 misaligned integers. */
2065 if (DECL_BIT_FIELD (field))
2067 for (i = int_bit_position (field) / 8 / 8;
2068 i < (int_bit_position (field)
2069 + tree_low_cst (DECL_SIZE (field), 0)
2072 merge_classes (X86_64_INTEGER_CLASS,
2077 num = classify_argument (TYPE_MODE (TREE_TYPE (field)),
2078 TREE_TYPE (field), subclasses,
2079 (int_bit_position (field)
2080 + bit_offset) % 256);
2083 for (i = 0; i < num; i++)
2086 (int_bit_position (field) + (bit_offset % 64)) / 8 / 8;
2088 merge_classes (subclasses[i], classes[i + pos]);
2094 /* Arrays are handled as small records. */
2095 else if (TREE_CODE (type) == ARRAY_TYPE)
2098 num = classify_argument (TYPE_MODE (TREE_TYPE (type)),
2099 TREE_TYPE (type), subclasses, bit_offset);
2103 /* The partial classes are now full classes. */
2104 if (subclasses[0] == X86_64_SSESF_CLASS && bytes != 4)
2105 subclasses[0] = X86_64_SSE_CLASS;
2106 if (subclasses[0] == X86_64_INTEGERSI_CLASS && bytes != 4)
2107 subclasses[0] = X86_64_INTEGER_CLASS;
2109 for (i = 0; i < words; i++)
2110 classes[i] = subclasses[i % num];
2112 /* Unions are similar to RECORD_TYPE but offset is always 0. */
2113 else if (TREE_CODE (type) == UNION_TYPE
2114 || TREE_CODE (type) == QUAL_UNION_TYPE)
2116 /* For classes first merge in the field of the subclasses. */
2117 if (TYPE_BINFO (type) != NULL && TYPE_BINFO_BASETYPES (type) != NULL)
2119 tree bases = TYPE_BINFO_BASETYPES (type);
2120 int n_bases = TREE_VEC_LENGTH (bases);
2123 for (i = 0; i < n_bases; ++i)
2125 tree binfo = TREE_VEC_ELT (bases, i);
2127 int offset = tree_low_cst (BINFO_OFFSET (binfo), 0) * 8;
2128 tree type = BINFO_TYPE (binfo);
2130 num = classify_argument (TYPE_MODE (type),
2132 (offset + (bit_offset % 64)) % 256);
2135 for (i = 0; i < num; i++)
2137 int pos = (offset + (bit_offset % 64)) / 8 / 8;
2139 merge_classes (subclasses[i], classes[i + pos]);
2143 for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
2145 if (TREE_CODE (field) == FIELD_DECL)
2148 num = classify_argument (TYPE_MODE (TREE_TYPE (field)),
2149 TREE_TYPE (field), subclasses,
2153 for (i = 0; i < num; i++)
2154 classes[i] = merge_classes (subclasses[i], classes[i]);
2158 else if (TREE_CODE (type) == SET_TYPE)
2162 classes[0] = X86_64_INTEGERSI_CLASS;
2165 else if (bytes <= 8)
2167 classes[0] = X86_64_INTEGER_CLASS;
2170 else if (bytes <= 12)
2172 classes[0] = X86_64_INTEGER_CLASS;
2173 classes[1] = X86_64_INTEGERSI_CLASS;
2178 classes[0] = X86_64_INTEGER_CLASS;
2179 classes[1] = X86_64_INTEGER_CLASS;
2186 /* Final merger cleanup. */
2187 for (i = 0; i < words; i++)
2189 /* If one class is MEMORY, everything should be passed in
2191 if (classes[i] == X86_64_MEMORY_CLASS)
2194 /* The X86_64_SSEUP_CLASS should be always preceded by
2195 X86_64_SSE_CLASS. */
2196 if (classes[i] == X86_64_SSEUP_CLASS
2197 && (i == 0 || classes[i - 1] != X86_64_SSE_CLASS))
2198 classes[i] = X86_64_SSE_CLASS;
2200 /* X86_64_X87UP_CLASS should be preceded by X86_64_X87_CLASS. */
2201 if (classes[i] == X86_64_X87UP_CLASS
2202 && (i == 0 || classes[i - 1] != X86_64_X87_CLASS))
2203 classes[i] = X86_64_SSE_CLASS;
2208 /* Compute alignment needed. We align all types to natural boundaries with
2209 exception of XFmode that is aligned to 64bits. */
2210 if (mode != VOIDmode && mode != BLKmode)
2212 int mode_alignment = GET_MODE_BITSIZE (mode);
2215 mode_alignment = 128;
2216 else if (mode == XCmode)
2217 mode_alignment = 256;
2218 if (COMPLEX_MODE_P (mode))
2219 mode_alignment /= 2;
2220 /* Misaligned fields are always returned in memory. */
2221 if (bit_offset % mode_alignment)
2225 /* Classification of atomic types. */
2235 if (bit_offset + GET_MODE_BITSIZE (mode) <= 32)
2236 classes[0] = X86_64_INTEGERSI_CLASS;
2238 classes[0] = X86_64_INTEGER_CLASS;
2242 classes[0] = classes[1] = X86_64_INTEGER_CLASS;
2245 classes[0] = classes[1] = X86_64_INTEGER_CLASS;
2246 classes[2] = classes[3] = X86_64_INTEGER_CLASS;
2249 if (!(bit_offset % 64))
2250 classes[0] = X86_64_SSESF_CLASS;
2252 classes[0] = X86_64_SSE_CLASS;
2255 classes[0] = X86_64_SSEDF_CLASS;
2258 classes[0] = X86_64_X87_CLASS;
2259 classes[1] = X86_64_X87UP_CLASS;
2265 classes[0] = X86_64_X87_CLASS;
2266 classes[1] = X86_64_X87UP_CLASS;
2267 classes[2] = X86_64_X87_CLASS;
2268 classes[3] = X86_64_X87UP_CLASS;
2271 classes[0] = X86_64_SSEDF_CLASS;
2272 classes[1] = X86_64_SSEDF_CLASS;
2275 classes[0] = X86_64_SSE_CLASS;
2283 classes[0] = X86_64_SSE_CLASS;
2284 classes[1] = X86_64_SSEUP_CLASS;
2299 /* Examine the argument and return set number of register required in each
2300 class. Return 0 iff parameter should be passed in memory. */
2302 examine_argument (enum machine_mode mode, tree type, int in_return,
2303 int *int_nregs, int *sse_nregs)
2305 enum x86_64_reg_class class[MAX_CLASSES];
2306 int n = classify_argument (mode, type, class, 0);
2312 for (n--; n >= 0; n--)
2315 case X86_64_INTEGER_CLASS:
2316 case X86_64_INTEGERSI_CLASS:
2319 case X86_64_SSE_CLASS:
2320 case X86_64_SSESF_CLASS:
2321 case X86_64_SSEDF_CLASS:
2324 case X86_64_NO_CLASS:
2325 case X86_64_SSEUP_CLASS:
2327 case X86_64_X87_CLASS:
2328 case X86_64_X87UP_CLASS:
2332 case X86_64_MEMORY_CLASS:
2337 /* Construct container for the argument used by GCC interface. See
2338 FUNCTION_ARG for the detailed description. */
2340 construct_container (enum machine_mode mode, tree type, int in_return,
2341 int nintregs, int nsseregs, const int * intreg,
2344 enum machine_mode tmpmode;
2346 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
2347 enum x86_64_reg_class class[MAX_CLASSES];
2351 int needed_sseregs, needed_intregs;
2352 rtx exp[MAX_CLASSES];
2355 n = classify_argument (mode, type, class, 0);
2356 if (TARGET_DEBUG_ARG)
2359 fprintf (stderr, "Memory class\n");
2362 fprintf (stderr, "Classes:");
2363 for (i = 0; i < n; i++)
2365 fprintf (stderr, " %s", x86_64_reg_class_name[class[i]]);
2367 fprintf (stderr, "\n");
2372 if (!examine_argument (mode, type, in_return, &needed_intregs, &needed_sseregs))
2374 if (needed_intregs > nintregs || needed_sseregs > nsseregs)
2377 /* First construct simple cases. Avoid SCmode, since we want to use
2378 single register to pass this type. */
2379 if (n == 1 && mode != SCmode)
2382 case X86_64_INTEGER_CLASS:
2383 case X86_64_INTEGERSI_CLASS:
2384 return gen_rtx_REG (mode, intreg[0]);
2385 case X86_64_SSE_CLASS:
2386 case X86_64_SSESF_CLASS:
2387 case X86_64_SSEDF_CLASS:
2388 return gen_rtx_REG (mode, SSE_REGNO (sse_regno));
2389 case X86_64_X87_CLASS:
2390 return gen_rtx_REG (mode, FIRST_STACK_REG);
2391 case X86_64_NO_CLASS:
2392 /* Zero sized array, struct or class. */
2397 if (n == 2 && class[0] == X86_64_SSE_CLASS && class[1] == X86_64_SSEUP_CLASS
2399 return gen_rtx_REG (mode, SSE_REGNO (sse_regno));
2401 && class[0] == X86_64_X87_CLASS && class[1] == X86_64_X87UP_CLASS)
2402 return gen_rtx_REG (XFmode, FIRST_STACK_REG);
2403 if (n == 2 && class[0] == X86_64_INTEGER_CLASS
2404 && class[1] == X86_64_INTEGER_CLASS
2405 && (mode == CDImode || mode == TImode || mode == TFmode)
2406 && intreg[0] + 1 == intreg[1])
2407 return gen_rtx_REG (mode, intreg[0]);
2409 && class[0] == X86_64_X87_CLASS && class[1] == X86_64_X87UP_CLASS
2410 && class[2] == X86_64_X87_CLASS && class[3] == X86_64_X87UP_CLASS
2412 return gen_rtx_REG (XCmode, FIRST_STACK_REG);
2414 /* Otherwise figure out the entries of the PARALLEL. */
2415 for (i = 0; i < n; i++)
2419 case X86_64_NO_CLASS:
2421 case X86_64_INTEGER_CLASS:
2422 case X86_64_INTEGERSI_CLASS:
2423 /* Merge TImodes on aligned occasions here too. */
2424 if (i * 8 + 8 > bytes)
2425 tmpmode = mode_for_size ((bytes - i * 8) * BITS_PER_UNIT, MODE_INT, 0);
2426 else if (class[i] == X86_64_INTEGERSI_CLASS)
2430 /* We've requested 24 bytes we don't have mode for. Use DImode. */
2431 if (tmpmode == BLKmode)
2433 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
2434 gen_rtx_REG (tmpmode, *intreg),
2438 case X86_64_SSESF_CLASS:
2439 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
2440 gen_rtx_REG (SFmode,
2441 SSE_REGNO (sse_regno)),
2445 case X86_64_SSEDF_CLASS:
2446 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
2447 gen_rtx_REG (DFmode,
2448 SSE_REGNO (sse_regno)),
2452 case X86_64_SSE_CLASS:
2453 if (i < n - 1 && class[i + 1] == X86_64_SSEUP_CLASS)
2457 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
2458 gen_rtx_REG (tmpmode,
2459 SSE_REGNO (sse_regno)),
2461 if (tmpmode == TImode)
2469 ret = gen_rtx_PARALLEL (mode, rtvec_alloc (nexps));
2470 for (i = 0; i < nexps; i++)
2471 XVECEXP (ret, 0, i) = exp [i];
2475 /* Update the data in CUM to advance over an argument
2476 of mode MODE and data type TYPE.
2477 (TYPE is null for libcalls where that information may not be available.) */
2480 function_arg_advance (CUMULATIVE_ARGS *cum, /* current arg information */
2481 enum machine_mode mode, /* current arg mode */
2482 tree type, /* type of the argument or 0 if lib support */
2483 int named) /* whether or not the argument was named */
2486 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
2487 int words = (bytes + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
2489 if (TARGET_DEBUG_ARG)
2491 "function_adv (sz=%d, wds=%2d, nregs=%d, ssenregs=%d, mode=%s, named=%d)\n\n",
2492 words, cum->words, cum->nregs, cum->sse_nregs, GET_MODE_NAME (mode), named);
2495 int int_nregs, sse_nregs;
2496 if (!examine_argument (mode, type, 0, &int_nregs, &sse_nregs))
2497 cum->words += words;
2498 else if (sse_nregs <= cum->sse_nregs && int_nregs <= cum->nregs)
2500 cum->nregs -= int_nregs;
2501 cum->sse_nregs -= sse_nregs;
2502 cum->regno += int_nregs;
2503 cum->sse_regno += sse_nregs;
2506 cum->words += words;
2510 if (TARGET_SSE && SSE_REG_MODE_P (mode)
2511 && (!type || !AGGREGATE_TYPE_P (type)))
2513 cum->sse_words += words;
2514 cum->sse_nregs -= 1;
2515 cum->sse_regno += 1;
2516 if (cum->sse_nregs <= 0)
2522 else if (TARGET_MMX && MMX_REG_MODE_P (mode)
2523 && (!type || !AGGREGATE_TYPE_P (type)))
2525 cum->mmx_words += words;
2526 cum->mmx_nregs -= 1;
2527 cum->mmx_regno += 1;
2528 if (cum->mmx_nregs <= 0)
2536 cum->words += words;
2537 cum->nregs -= words;
2538 cum->regno += words;
2540 if (cum->nregs <= 0)
2550 /* A subroutine of function_arg. We want to pass a parameter whose nominal
2551 type is MODE in REGNO. We try to minimize ABI variation, so MODE may not
2552 actually be valid for REGNO with the current ISA. In this case, ALT_MODE
2553 is used instead. It must be the same size as MODE, and must be known to
2554 be valid for REGNO. Finally, ORIG_MODE is the original mode of the
2555 parameter, as seen by the type system. This may be different from MODE
2556 when we're mucking with things minimizing ABI variations.
2558 Returns a REG or a PARALLEL as appropriate. */
2561 gen_reg_or_parallel (enum machine_mode mode, enum machine_mode alt_mode,
2562 enum machine_mode orig_mode, unsigned int regno)
2566 if (HARD_REGNO_MODE_OK (regno, mode))
2567 tmp = gen_rtx_REG (mode, regno);
2570 tmp = gen_rtx_REG (alt_mode, regno);
2571 tmp = gen_rtx_EXPR_LIST (VOIDmode, tmp, const0_rtx);
2572 tmp = gen_rtx_PARALLEL (orig_mode, gen_rtvec (1, tmp));
2578 /* Define where to put the arguments to a function.
2579 Value is zero to push the argument on the stack,
2580 or a hard register in which to store the argument.
2582 MODE is the argument's machine mode.
2583 TYPE is the data type of the argument (as a tree).
2584 This is null for libcalls where that information may
2586 CUM is a variable of type CUMULATIVE_ARGS which gives info about
2587 the preceding args and about the function being called.
2588 NAMED is nonzero if this argument is a named parameter
2589 (otherwise it is an extra parameter matching an ellipsis). */
2592 function_arg (CUMULATIVE_ARGS *cum, enum machine_mode orig_mode,
2593 tree type, int named)
2595 enum machine_mode mode = orig_mode;
2598 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
2599 int words = (bytes + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
2600 static bool warnedsse, warnedmmx;
2602 /* Handle a hidden AL argument containing number of registers for varargs
2603 x86-64 functions. For i386 ABI just return constm1_rtx to avoid
2605 if (mode == VOIDmode)
2608 return GEN_INT (cum->maybe_vaarg
2609 ? (cum->sse_nregs < 0
2617 ret = construct_container (mode, type, 0, cum->nregs, cum->sse_nregs,
2618 &x86_64_int_parameter_registers [cum->regno],
2623 /* For now, pass fp/complex values on the stack. */
2635 if (words <= cum->nregs)
2637 int regno = cum->regno;
2639 /* Fastcall allocates the first two DWORD (SImode) or
2640 smaller arguments to ECX and EDX. */
2643 if (mode == BLKmode || mode == DImode)
2646 /* ECX not EAX is the first allocated register. */
2650 ret = gen_rtx_REG (mode, regno);
2660 if (!type || !AGGREGATE_TYPE_P (type))
2662 if (!TARGET_SSE && !warnedmmx && cum->warn_sse)
2665 warning ("SSE vector argument without SSE enabled "
2669 ret = gen_reg_or_parallel (mode, TImode, orig_mode,
2670 cum->sse_regno + FIRST_SSE_REG);
2677 if (!type || !AGGREGATE_TYPE_P (type))
2679 if (!TARGET_MMX && !warnedmmx && cum->warn_mmx)
2682 warning ("MMX vector argument without MMX enabled "
2686 ret = gen_reg_or_parallel (mode, DImode, orig_mode,
2687 cum->mmx_regno + FIRST_MMX_REG);
2692 if (TARGET_DEBUG_ARG)
2695 "function_arg (size=%d, wds=%2d, nregs=%d, mode=%4s, named=%d, ",
2696 words, cum->words, cum->nregs, GET_MODE_NAME (mode), named);
2699 print_simple_rtl (stderr, ret);
2701 fprintf (stderr, ", stack");
2703 fprintf (stderr, " )\n");
2709 /* A C expression that indicates when an argument must be passed by
2710 reference. If nonzero for an argument, a copy of that argument is
2711 made in memory and a pointer to the argument is passed instead of
2712 the argument itself. The pointer is passed in whatever way is
2713 appropriate for passing a pointer to that type. */
2716 function_arg_pass_by_reference (CUMULATIVE_ARGS *cum ATTRIBUTE_UNUSED,
2717 enum machine_mode mode ATTRIBUTE_UNUSED,
2718 tree type, int named ATTRIBUTE_UNUSED)
2723 if (type && int_size_in_bytes (type) == -1)
2725 if (TARGET_DEBUG_ARG)
2726 fprintf (stderr, "function_arg_pass_by_reference\n");
2733 /* Return true when TYPE should be 128bit aligned for 32bit argument passing
2736 contains_128bit_aligned_vector_p (tree type)
2738 enum machine_mode mode = TYPE_MODE (type);
2739 if (SSE_REG_MODE_P (mode)
2740 && (!TYPE_USER_ALIGN (type) || TYPE_ALIGN (type) > 128))
2742 if (TYPE_ALIGN (type) < 128)
2745 if (AGGREGATE_TYPE_P (type))
2747 /* Walk the aggregates recursively. */
2748 if (TREE_CODE (type) == RECORD_TYPE
2749 || TREE_CODE (type) == UNION_TYPE
2750 || TREE_CODE (type) == QUAL_UNION_TYPE)
2754 if (TYPE_BINFO (type) != NULL
2755 && TYPE_BINFO_BASETYPES (type) != NULL)
2757 tree bases = TYPE_BINFO_BASETYPES (type);
2758 int n_bases = TREE_VEC_LENGTH (bases);
2761 for (i = 0; i < n_bases; ++i)
2763 tree binfo = TREE_VEC_ELT (bases, i);
2764 tree type = BINFO_TYPE (binfo);
2766 if (contains_128bit_aligned_vector_p (type))
2770 /* And now merge the fields of structure. */
2771 for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
2773 if (TREE_CODE (field) == FIELD_DECL
2774 && contains_128bit_aligned_vector_p (TREE_TYPE (field)))
2778 /* Just for use if some languages passes arrays by value. */
2779 else if (TREE_CODE (type) == ARRAY_TYPE)
2781 if (contains_128bit_aligned_vector_p (TREE_TYPE (type)))
2790 /* Gives the alignment boundary, in bits, of an argument with the
2791 specified mode and type. */
2794 ix86_function_arg_boundary (enum machine_mode mode, tree type)
2798 align = TYPE_ALIGN (type);
2800 align = GET_MODE_ALIGNMENT (mode);
2801 if (align < PARM_BOUNDARY)
2802 align = PARM_BOUNDARY;
2805 /* i386 ABI defines all arguments to be 4 byte aligned. We have to
2806 make an exception for SSE modes since these require 128bit
2809 The handling here differs from field_alignment. ICC aligns MMX
2810 arguments to 4 byte boundaries, while structure fields are aligned
2811 to 8 byte boundaries. */
2814 if (!SSE_REG_MODE_P (mode))
2815 align = PARM_BOUNDARY;
2819 if (!contains_128bit_aligned_vector_p (type))
2820 align = PARM_BOUNDARY;
2828 /* Return true if N is a possible register number of function value. */
2830 ix86_function_value_regno_p (int regno)
2834 return ((regno) == 0
2835 || ((regno) == FIRST_FLOAT_REG && TARGET_FLOAT_RETURNS_IN_80387)
2836 || ((regno) == FIRST_SSE_REG && TARGET_SSE));
2838 return ((regno) == 0 || (regno) == FIRST_FLOAT_REG
2839 || ((regno) == FIRST_SSE_REG && TARGET_SSE)
2840 || ((regno) == FIRST_FLOAT_REG && TARGET_FLOAT_RETURNS_IN_80387));
2843 /* Define how to find the value returned by a function.
2844 VALTYPE is the data type of the value (as a tree).
2845 If the precise function being called is known, FUNC is its FUNCTION_DECL;
2846 otherwise, FUNC is 0. */
2848 ix86_function_value (tree valtype)
2852 rtx ret = construct_container (TYPE_MODE (valtype), valtype, 1,
2853 REGPARM_MAX, SSE_REGPARM_MAX,
2854 x86_64_int_return_registers, 0);
2855 /* For zero sized structures, construct_container return NULL, but we need
2856 to keep rest of compiler happy by returning meaningful value. */
2858 ret = gen_rtx_REG (TYPE_MODE (valtype), 0);
2862 return gen_rtx_REG (TYPE_MODE (valtype),
2863 ix86_value_regno (TYPE_MODE (valtype)));
2866 /* Return false iff type is returned in memory. */
2868 ix86_return_in_memory (tree type)
2870 int needed_intregs, needed_sseregs, size;
2871 enum machine_mode mode = TYPE_MODE (type);
2874 return !examine_argument (mode, type, 1, &needed_intregs, &needed_sseregs);
2876 if (mode == BLKmode)
2879 size = int_size_in_bytes (type);
2881 if (MS_AGGREGATE_RETURN && AGGREGATE_TYPE_P (type) && size <= 8)
2884 if (VECTOR_MODE_P (mode) || mode == TImode)
2886 /* User-created vectors small enough to fit in EAX. */
2890 /* MMX/3dNow values are returned on the stack, since we've
2891 got to EMMS/FEMMS before returning. */
2895 /* SSE values are returned in XMM0. */
2896 /* ??? Except when it doesn't exist? We have a choice of
2897 either (1) being abi incompatible with a -march switch,
2898 or (2) generating an error here. Given no good solution,
2899 I think the safest thing is one warning. The user won't
2900 be able to use -Werror, but.... */
2911 warning ("SSE vector return without SSE enabled "
2926 /* Define how to find the value returned by a library function
2927 assuming the value has mode MODE. */
2929 ix86_libcall_value (enum machine_mode mode)
2939 return gen_rtx_REG (mode, FIRST_SSE_REG);
2942 return gen_rtx_REG (mode, FIRST_FLOAT_REG);
2947 return gen_rtx_REG (mode, 0);
2951 return gen_rtx_REG (mode, ix86_value_regno (mode));
2954 /* Given a mode, return the register to use for a return value. */
2957 ix86_value_regno (enum machine_mode mode)
2959 /* Floating point return values in %st(0). */
2960 if (GET_MODE_CLASS (mode) == MODE_FLOAT && TARGET_FLOAT_RETURNS_IN_80387)
2961 return FIRST_FLOAT_REG;
2962 /* 16-byte vector modes in %xmm0. See ix86_return_in_memory for where
2963 we prevent this case when sse is not available. */
2964 if (mode == TImode || (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 16))
2965 return FIRST_SSE_REG;
2966 /* Everything else in %eax. */
2970 /* Create the va_list data type. */
2973 ix86_build_builtin_va_list (void)
2975 tree f_gpr, f_fpr, f_ovf, f_sav, record, type_decl;
2977 /* For i386 we use plain pointer to argument area. */
2979 return build_pointer_type (char_type_node);
2981 record = (*lang_hooks.types.make_type) (RECORD_TYPE);
2982 type_decl = build_decl (TYPE_DECL, get_identifier ("__va_list_tag"), record);
2984 f_gpr = build_decl (FIELD_DECL, get_identifier ("gp_offset"),
2985 unsigned_type_node);
2986 f_fpr = build_decl (FIELD_DECL, get_identifier ("fp_offset"),
2987 unsigned_type_node);
2988 f_ovf = build_decl (FIELD_DECL, get_identifier ("overflow_arg_area"),
2990 f_sav = build_decl (FIELD_DECL, get_identifier ("reg_save_area"),
2993 DECL_FIELD_CONTEXT (f_gpr) = record;
2994 DECL_FIELD_CONTEXT (f_fpr) = record;
2995 DECL_FIELD_CONTEXT (f_ovf) = record;
2996 DECL_FIELD_CONTEXT (f_sav) = record;
2998 TREE_CHAIN (record) = type_decl;
2999 TYPE_NAME (record) = type_decl;
3000 TYPE_FIELDS (record) = f_gpr;
3001 TREE_CHAIN (f_gpr) = f_fpr;
3002 TREE_CHAIN (f_fpr) = f_ovf;
3003 TREE_CHAIN (f_ovf) = f_sav;
3005 layout_type (record);
3007 /* The correct type is an array type of one element. */
3008 return build_array_type (record, build_index_type (size_zero_node));
3011 /* Perform any needed actions needed for a function that is receiving a
3012 variable number of arguments.
3016 MODE and TYPE are the mode and type of the current parameter.
3018 PRETEND_SIZE is a variable that should be set to the amount of stack
3019 that must be pushed by the prolog to pretend that our caller pushed
3022 Normally, this macro will push all remaining incoming registers on the
3023 stack and set PRETEND_SIZE to the length of the registers pushed. */
3026 ix86_setup_incoming_varargs (CUMULATIVE_ARGS *cum, enum machine_mode mode,
3027 tree type, int *pretend_size ATTRIBUTE_UNUSED,
3030 CUMULATIVE_ARGS next_cum;
3031 rtx save_area = NULL_RTX, mem;
3044 /* Indicate to allocate space on the stack for varargs save area. */
3045 ix86_save_varrargs_registers = 1;
3047 cfun->stack_alignment_needed = 128;
3049 fntype = TREE_TYPE (current_function_decl);
3050 stdarg_p = (TYPE_ARG_TYPES (fntype) != 0
3051 && (TREE_VALUE (tree_last (TYPE_ARG_TYPES (fntype)))
3052 != void_type_node));
3054 /* For varargs, we do not want to skip the dummy va_dcl argument.
3055 For stdargs, we do want to skip the last named argument. */
3058 function_arg_advance (&next_cum, mode, type, 1);
3061 save_area = frame_pointer_rtx;
3063 set = get_varargs_alias_set ();
3065 for (i = next_cum.regno; i < ix86_regparm; i++)
3067 mem = gen_rtx_MEM (Pmode,
3068 plus_constant (save_area, i * UNITS_PER_WORD));
3069 set_mem_alias_set (mem, set);
3070 emit_move_insn (mem, gen_rtx_REG (Pmode,
3071 x86_64_int_parameter_registers[i]));
3074 if (next_cum.sse_nregs)
3076 /* Now emit code to save SSE registers. The AX parameter contains number
3077 of SSE parameter registers used to call this function. We use
3078 sse_prologue_save insn template that produces computed jump across
3079 SSE saves. We need some preparation work to get this working. */
3081 label = gen_label_rtx ();
3082 label_ref = gen_rtx_LABEL_REF (Pmode, label);
3084 /* Compute address to jump to :
3085 label - 5*eax + nnamed_sse_arguments*5 */
3086 tmp_reg = gen_reg_rtx (Pmode);
3087 nsse_reg = gen_reg_rtx (Pmode);
3088 emit_insn (gen_zero_extendqidi2 (nsse_reg, gen_rtx_REG (QImode, 0)));
3089 emit_insn (gen_rtx_SET (VOIDmode, tmp_reg,
3090 gen_rtx_MULT (Pmode, nsse_reg,
3092 if (next_cum.sse_regno)
3095 gen_rtx_CONST (DImode,
3096 gen_rtx_PLUS (DImode,
3098 GEN_INT (next_cum.sse_regno * 4))));
3100 emit_move_insn (nsse_reg, label_ref);
3101 emit_insn (gen_subdi3 (nsse_reg, nsse_reg, tmp_reg));
3103 /* Compute address of memory block we save into. We always use pointer
3104 pointing 127 bytes after first byte to store - this is needed to keep
3105 instruction size limited by 4 bytes. */
3106 tmp_reg = gen_reg_rtx (Pmode);
3107 emit_insn (gen_rtx_SET (VOIDmode, tmp_reg,
3108 plus_constant (save_area,
3109 8 * REGPARM_MAX + 127)));
3110 mem = gen_rtx_MEM (BLKmode, plus_constant (tmp_reg, -127));
3111 set_mem_alias_set (mem, set);
3112 set_mem_align (mem, BITS_PER_WORD);
3114 /* And finally do the dirty job! */
3115 emit_insn (gen_sse_prologue_save (mem, nsse_reg,
3116 GEN_INT (next_cum.sse_regno), label));
3121 /* Implement va_start. */
3124 ix86_va_start (tree valist, rtx nextarg)
3126 HOST_WIDE_INT words, n_gpr, n_fpr;
3127 tree f_gpr, f_fpr, f_ovf, f_sav;
3128 tree gpr, fpr, ovf, sav, t;
3130 /* Only 64bit target needs something special. */
3133 std_expand_builtin_va_start (valist, nextarg);
3137 f_gpr = TYPE_FIELDS (TREE_TYPE (va_list_type_node));
3138 f_fpr = TREE_CHAIN (f_gpr);
3139 f_ovf = TREE_CHAIN (f_fpr);
3140 f_sav = TREE_CHAIN (f_ovf);
3142 valist = build1 (INDIRECT_REF, TREE_TYPE (TREE_TYPE (valist)), valist);
3143 gpr = build (COMPONENT_REF, TREE_TYPE (f_gpr), valist, f_gpr);
3144 fpr = build (COMPONENT_REF, TREE_TYPE (f_fpr), valist, f_fpr);
3145 ovf = build (COMPONENT_REF, TREE_TYPE (f_ovf), valist, f_ovf);
3146 sav = build (COMPONENT_REF, TREE_TYPE (f_sav), valist, f_sav);
3148 /* Count number of gp and fp argument registers used. */
3149 words = current_function_args_info.words;
3150 n_gpr = current_function_args_info.regno;
3151 n_fpr = current_function_args_info.sse_regno;
3153 if (TARGET_DEBUG_ARG)
3154 fprintf (stderr, "va_start: words = %d, n_gpr = %d, n_fpr = %d\n",
3155 (int) words, (int) n_gpr, (int) n_fpr);
3157 t = build (MODIFY_EXPR, TREE_TYPE (gpr), gpr,
3158 build_int_2 (n_gpr * 8, 0));
3159 TREE_SIDE_EFFECTS (t) = 1;
3160 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
3162 t = build (MODIFY_EXPR, TREE_TYPE (fpr), fpr,
3163 build_int_2 (n_fpr * 16 + 8*REGPARM_MAX, 0));
3164 TREE_SIDE_EFFECTS (t) = 1;
3165 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
3167 /* Find the overflow area. */
3168 t = make_tree (TREE_TYPE (ovf), virtual_incoming_args_rtx);
3170 t = build (PLUS_EXPR, TREE_TYPE (ovf), t,
3171 build_int_2 (words * UNITS_PER_WORD, 0));
3172 t = build (MODIFY_EXPR, TREE_TYPE (ovf), ovf, t);
3173 TREE_SIDE_EFFECTS (t) = 1;
3174 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
3176 /* Find the register save area.
3177 Prologue of the function save it right above stack frame. */
3178 t = make_tree (TREE_TYPE (sav), frame_pointer_rtx);
3179 t = build (MODIFY_EXPR, TREE_TYPE (sav), sav, t);
3180 TREE_SIDE_EFFECTS (t) = 1;
3181 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
3184 /* Implement va_arg. */
3186 ix86_va_arg (tree valist, tree type)
3188 static const int intreg[6] = { 0, 1, 2, 3, 4, 5 };
3189 tree f_gpr, f_fpr, f_ovf, f_sav;
3190 tree gpr, fpr, ovf, sav, t;
3192 rtx lab_false, lab_over = NULL_RTX;
3197 /* Only 64bit target needs something special. */
3200 return std_expand_builtin_va_arg (valist, type);
3203 f_gpr = TYPE_FIELDS (TREE_TYPE (va_list_type_node));
3204 f_fpr = TREE_CHAIN (f_gpr);
3205 f_ovf = TREE_CHAIN (f_fpr);
3206 f_sav = TREE_CHAIN (f_ovf);
3208 valist = build1 (INDIRECT_REF, TREE_TYPE (TREE_TYPE (valist)), valist);
3209 gpr = build (COMPONENT_REF, TREE_TYPE (f_gpr), valist, f_gpr);
3210 fpr = build (COMPONENT_REF, TREE_TYPE (f_fpr), valist, f_fpr);
3211 ovf = build (COMPONENT_REF, TREE_TYPE (f_ovf), valist, f_ovf);
3212 sav = build (COMPONENT_REF, TREE_TYPE (f_sav), valist, f_sav);
3214 size = int_size_in_bytes (type);
3217 /* Passed by reference. */
3219 type = build_pointer_type (type);
3220 size = int_size_in_bytes (type);
3222 rsize = (size + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
3224 container = construct_container (TYPE_MODE (type), type, 0,
3225 REGPARM_MAX, SSE_REGPARM_MAX, intreg, 0);
3227 * Pull the value out of the saved registers ...
3230 addr_rtx = gen_reg_rtx (Pmode);
3234 rtx int_addr_rtx, sse_addr_rtx;
3235 int needed_intregs, needed_sseregs;
3238 lab_over = gen_label_rtx ();
3239 lab_false = gen_label_rtx ();
3241 examine_argument (TYPE_MODE (type), type, 0,
3242 &needed_intregs, &needed_sseregs);
3245 need_temp = ((needed_intregs && TYPE_ALIGN (type) > 64)
3246 || TYPE_ALIGN (type) > 128);
3248 /* In case we are passing structure, verify that it is consecutive block
3249 on the register save area. If not we need to do moves. */
3250 if (!need_temp && !REG_P (container))
3252 /* Verify that all registers are strictly consecutive */
3253 if (SSE_REGNO_P (REGNO (XEXP (XVECEXP (container, 0, 0), 0))))
3257 for (i = 0; i < XVECLEN (container, 0) && !need_temp; i++)
3259 rtx slot = XVECEXP (container, 0, i);
3260 if (REGNO (XEXP (slot, 0)) != FIRST_SSE_REG + (unsigned int) i
3261 || INTVAL (XEXP (slot, 1)) != i * 16)
3269 for (i = 0; i < XVECLEN (container, 0) && !need_temp; i++)
3271 rtx slot = XVECEXP (container, 0, i);
3272 if (REGNO (XEXP (slot, 0)) != (unsigned int) i
3273 || INTVAL (XEXP (slot, 1)) != i * 8)
3280 int_addr_rtx = addr_rtx;
3281 sse_addr_rtx = addr_rtx;
3285 int_addr_rtx = gen_reg_rtx (Pmode);
3286 sse_addr_rtx = gen_reg_rtx (Pmode);
3288 /* First ensure that we fit completely in registers. */
3291 emit_cmp_and_jump_insns (expand_expr
3292 (gpr, NULL_RTX, SImode, EXPAND_NORMAL),
3293 GEN_INT ((REGPARM_MAX - needed_intregs +
3294 1) * 8), GE, const1_rtx, SImode,
3299 emit_cmp_and_jump_insns (expand_expr
3300 (fpr, NULL_RTX, SImode, EXPAND_NORMAL),
3301 GEN_INT ((SSE_REGPARM_MAX -
3302 needed_sseregs + 1) * 16 +
3303 REGPARM_MAX * 8), GE, const1_rtx,
3304 SImode, 1, lab_false);
3307 /* Compute index to start of area used for integer regs. */
3310 t = build (PLUS_EXPR, ptr_type_node, sav, gpr);
3311 r = expand_expr (t, int_addr_rtx, Pmode, EXPAND_NORMAL);
3312 if (r != int_addr_rtx)
3313 emit_move_insn (int_addr_rtx, r);
3317 t = build (PLUS_EXPR, ptr_type_node, sav, fpr);
3318 r = expand_expr (t, sse_addr_rtx, Pmode, EXPAND_NORMAL);
3319 if (r != sse_addr_rtx)
3320 emit_move_insn (sse_addr_rtx, r);
3328 /* Never use the memory itself, as it has the alias set. */
3329 x = XEXP (assign_temp (type, 0, 1, 0), 0);
3330 mem = gen_rtx_MEM (BLKmode, x);
3331 force_operand (x, addr_rtx);
3332 set_mem_alias_set (mem, get_varargs_alias_set ());
3333 set_mem_align (mem, BITS_PER_UNIT);
3335 for (i = 0; i < XVECLEN (container, 0); i++)
3337 rtx slot = XVECEXP (container, 0, i);
3338 rtx reg = XEXP (slot, 0);
3339 enum machine_mode mode = GET_MODE (reg);
3345 if (SSE_REGNO_P (REGNO (reg)))
3347 src_addr = sse_addr_rtx;
3348 src_offset = (REGNO (reg) - FIRST_SSE_REG) * 16;
3352 src_addr = int_addr_rtx;
3353 src_offset = REGNO (reg) * 8;
3355 src_mem = gen_rtx_MEM (mode, src_addr);
3356 set_mem_alias_set (src_mem, get_varargs_alias_set ());
3357 src_mem = adjust_address (src_mem, mode, src_offset);
3358 dest_mem = adjust_address (mem, mode, INTVAL (XEXP (slot, 1)));
3359 emit_move_insn (dest_mem, src_mem);
3366 build (PLUS_EXPR, TREE_TYPE (gpr), gpr,
3367 build_int_2 (needed_intregs * 8, 0));
3368 t = build (MODIFY_EXPR, TREE_TYPE (gpr), gpr, t);
3369 TREE_SIDE_EFFECTS (t) = 1;
3370 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
3375 build (PLUS_EXPR, TREE_TYPE (fpr), fpr,
3376 build_int_2 (needed_sseregs * 16, 0));
3377 t = build (MODIFY_EXPR, TREE_TYPE (fpr), fpr, t);
3378 TREE_SIDE_EFFECTS (t) = 1;
3379 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
3382 emit_jump_insn (gen_jump (lab_over));
3384 emit_label (lab_false);
3387 /* ... otherwise out of the overflow area. */
3389 /* Care for on-stack alignment if needed. */
3390 if (FUNCTION_ARG_BOUNDARY (VOIDmode, type) <= 64)
3394 HOST_WIDE_INT align = FUNCTION_ARG_BOUNDARY (VOIDmode, type) / 8;
3395 t = build (PLUS_EXPR, TREE_TYPE (ovf), ovf, build_int_2 (align - 1, 0));
3396 t = build (BIT_AND_EXPR, TREE_TYPE (t), t, build_int_2 (-align, -1));
3400 r = expand_expr (t, addr_rtx, Pmode, EXPAND_NORMAL);
3402 emit_move_insn (addr_rtx, r);
3405 build (PLUS_EXPR, TREE_TYPE (t), t,
3406 build_int_2 (rsize * UNITS_PER_WORD, 0));
3407 t = build (MODIFY_EXPR, TREE_TYPE (ovf), ovf, t);
3408 TREE_SIDE_EFFECTS (t) = 1;
3409 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
3412 emit_label (lab_over);
3416 r = gen_rtx_MEM (Pmode, addr_rtx);
3417 set_mem_alias_set (r, get_varargs_alias_set ());
3418 emit_move_insn (addr_rtx, r);
3424 /* Return nonzero if OP is either a i387 or SSE fp register. */
3426 any_fp_register_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
3428 return ANY_FP_REG_P (op);
3431 /* Return nonzero if OP is an i387 fp register. */
3433 fp_register_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
3435 return FP_REG_P (op);
3438 /* Return nonzero if OP is a non-fp register_operand. */
3440 register_and_not_any_fp_reg_operand (rtx op, enum machine_mode mode)
3442 return register_operand (op, mode) && !ANY_FP_REG_P (op);
3445 /* Return nonzero if OP is a register operand other than an
3446 i387 fp register. */
3448 register_and_not_fp_reg_operand (rtx op, enum machine_mode mode)
3450 return register_operand (op, mode) && !FP_REG_P (op);
3453 /* Return nonzero if OP is general operand representable on x86_64. */
3456 x86_64_general_operand (rtx op, enum machine_mode mode)
3459 return general_operand (op, mode);
3460 if (nonimmediate_operand (op, mode))
3462 return x86_64_sign_extended_value (op);
3465 /* Return nonzero if OP is general operand representable on x86_64
3466 as either sign extended or zero extended constant. */
3469 x86_64_szext_general_operand (rtx op, enum machine_mode mode)
3472 return general_operand (op, mode);
3473 if (nonimmediate_operand (op, mode))
3475 return x86_64_sign_extended_value (op) || x86_64_zero_extended_value (op);
3478 /* Return nonzero if OP is nonmemory operand representable on x86_64. */
3481 x86_64_nonmemory_operand (rtx op, enum machine_mode mode)
3484 return nonmemory_operand (op, mode);
3485 if (register_operand (op, mode))
3487 return x86_64_sign_extended_value (op);
3490 /* Return nonzero if OP is nonmemory operand acceptable by movabs patterns. */
3493 x86_64_movabs_operand (rtx op, enum machine_mode mode)
3495 if (!TARGET_64BIT || !flag_pic)
3496 return nonmemory_operand (op, mode);
3497 if (register_operand (op, mode) || x86_64_sign_extended_value (op))
3499 if (CONSTANT_P (op) && !symbolic_reference_mentioned_p (op))
3504 /* Return nonzero if OPNUM's MEM should be matched
3505 in movabs* patterns. */
3508 ix86_check_movabs (rtx insn, int opnum)
3512 set = PATTERN (insn);
3513 if (GET_CODE (set) == PARALLEL)
3514 set = XVECEXP (set, 0, 0);
3515 if (GET_CODE (set) != SET)
3517 mem = XEXP (set, opnum);
3518 while (GET_CODE (mem) == SUBREG)
3519 mem = SUBREG_REG (mem);
3520 if (GET_CODE (mem) != MEM)
3522 return (volatile_ok || !MEM_VOLATILE_P (mem));
3525 /* Return nonzero if OP is nonmemory operand representable on x86_64. */
3528 x86_64_szext_nonmemory_operand (rtx op, enum machine_mode mode)
3531 return nonmemory_operand (op, mode);
3532 if (register_operand (op, mode))
3534 return x86_64_sign_extended_value (op) || x86_64_zero_extended_value (op);
3537 /* Return nonzero if OP is immediate operand representable on x86_64. */
3540 x86_64_immediate_operand (rtx op, enum machine_mode mode)
3543 return immediate_operand (op, mode);
3544 return x86_64_sign_extended_value (op);
3547 /* Return nonzero if OP is immediate operand representable on x86_64. */
3550 x86_64_zext_immediate_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
3552 return x86_64_zero_extended_value (op);
3555 /* Return nonzero if OP is CONST_INT >= 1 and <= 31 (a valid operand
3556 for shift & compare patterns, as shifting by 0 does not change flags),
3557 else return zero. */
3560 const_int_1_31_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
3562 return (GET_CODE (op) == CONST_INT && INTVAL (op) >= 1 && INTVAL (op) <= 31);
3565 /* Returns 1 if OP is either a symbol reference or a sum of a symbol
3566 reference and a constant. */
3569 symbolic_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
3571 switch (GET_CODE (op))
3579 if (GET_CODE (op) == SYMBOL_REF
3580 || GET_CODE (op) == LABEL_REF
3581 || (GET_CODE (op) == UNSPEC
3582 && (XINT (op, 1) == UNSPEC_GOT
3583 || XINT (op, 1) == UNSPEC_GOTOFF
3584 || XINT (op, 1) == UNSPEC_GOTPCREL)))
3586 if (GET_CODE (op) != PLUS
3587 || GET_CODE (XEXP (op, 1)) != CONST_INT)
3591 if (GET_CODE (op) == SYMBOL_REF
3592 || GET_CODE (op) == LABEL_REF)
3594 /* Only @GOTOFF gets offsets. */
3595 if (GET_CODE (op) != UNSPEC
3596 || XINT (op, 1) != UNSPEC_GOTOFF)
3599 op = XVECEXP (op, 0, 0);
3600 if (GET_CODE (op) == SYMBOL_REF
3601 || GET_CODE (op) == LABEL_REF)
3610 /* Return true if the operand contains a @GOT or @GOTOFF reference. */
3613 pic_symbolic_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
3615 if (GET_CODE (op) != CONST)
3620 if (GET_CODE (op) == UNSPEC
3621 && XINT (op, 1) == UNSPEC_GOTPCREL)
3623 if (GET_CODE (op) == PLUS
3624 && GET_CODE (XEXP (op, 0)) == UNSPEC
3625 && XINT (XEXP (op, 0), 1) == UNSPEC_GOTPCREL)
3630 if (GET_CODE (op) == UNSPEC)
3632 if (GET_CODE (op) != PLUS
3633 || GET_CODE (XEXP (op, 1)) != CONST_INT)
3636 if (GET_CODE (op) == UNSPEC)
3642 /* Return true if OP is a symbolic operand that resolves locally. */
3645 local_symbolic_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
3647 if (GET_CODE (op) == CONST
3648 && GET_CODE (XEXP (op, 0)) == PLUS
3649 && GET_CODE (XEXP (XEXP (op, 0), 1)) == CONST_INT)
3650 op = XEXP (XEXP (op, 0), 0);
3652 if (GET_CODE (op) == LABEL_REF)
3655 if (GET_CODE (op) != SYMBOL_REF)
3658 if (SYMBOL_REF_LOCAL_P (op))
3661 /* There is, however, a not insubstantial body of code in the rest of
3662 the compiler that assumes it can just stick the results of
3663 ASM_GENERATE_INTERNAL_LABEL in a symbol_ref and have done. */
3664 /* ??? This is a hack. Should update the body of the compiler to
3665 always create a DECL an invoke targetm.encode_section_info. */
3666 if (strncmp (XSTR (op, 0), internal_label_prefix,
3667 internal_label_prefix_len) == 0)
3673 /* Test for various thread-local symbols. */
3676 tls_symbolic_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
3678 if (GET_CODE (op) != SYMBOL_REF)
3680 return SYMBOL_REF_TLS_MODEL (op);
3684 tls_symbolic_operand_1 (rtx op, enum tls_model kind)
3686 if (GET_CODE (op) != SYMBOL_REF)
3688 return SYMBOL_REF_TLS_MODEL (op) == kind;
3692 global_dynamic_symbolic_operand (rtx op,
3693 enum machine_mode mode ATTRIBUTE_UNUSED)
3695 return tls_symbolic_operand_1 (op, TLS_MODEL_GLOBAL_DYNAMIC);
3699 local_dynamic_symbolic_operand (rtx op,
3700 enum machine_mode mode ATTRIBUTE_UNUSED)
3702 return tls_symbolic_operand_1 (op, TLS_MODEL_LOCAL_DYNAMIC);
3706 initial_exec_symbolic_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
3708 return tls_symbolic_operand_1 (op, TLS_MODEL_INITIAL_EXEC);
3712 local_exec_symbolic_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
3714 return tls_symbolic_operand_1 (op, TLS_MODEL_LOCAL_EXEC);
3717 /* Test for a valid operand for a call instruction. Don't allow the
3718 arg pointer register or virtual regs since they may decay into
3719 reg + const, which the patterns can't handle. */
3722 call_insn_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
3724 /* Disallow indirect through a virtual register. This leads to
3725 compiler aborts when trying to eliminate them. */
3726 if (GET_CODE (op) == REG
3727 && (op == arg_pointer_rtx
3728 || op == frame_pointer_rtx
3729 || (REGNO (op) >= FIRST_PSEUDO_REGISTER
3730 && REGNO (op) <= LAST_VIRTUAL_REGISTER)))
3733 /* Disallow `call 1234'. Due to varying assembler lameness this
3734 gets either rejected or translated to `call .+1234'. */
3735 if (GET_CODE (op) == CONST_INT)
3738 /* Explicitly allow SYMBOL_REF even if pic. */
3739 if (GET_CODE (op) == SYMBOL_REF)
3742 /* Otherwise we can allow any general_operand in the address. */
3743 return general_operand (op, Pmode);
3746 /* Test for a valid operand for a call instruction. Don't allow the
3747 arg pointer register or virtual regs since they may decay into
3748 reg + const, which the patterns can't handle. */
3751 sibcall_insn_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
3753 /* Disallow indirect through a virtual register. This leads to
3754 compiler aborts when trying to eliminate them. */
3755 if (GET_CODE (op) == REG
3756 && (op == arg_pointer_rtx
3757 || op == frame_pointer_rtx
3758 || (REGNO (op) >= FIRST_PSEUDO_REGISTER
3759 && REGNO (op) <= LAST_VIRTUAL_REGISTER)))
3762 /* Explicitly allow SYMBOL_REF even if pic. */
3763 if (GET_CODE (op) == SYMBOL_REF)
3766 /* Otherwise we can only allow register operands. */
3767 return register_operand (op, Pmode);
3771 constant_call_address_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
3773 if (GET_CODE (op) == CONST
3774 && GET_CODE (XEXP (op, 0)) == PLUS
3775 && GET_CODE (XEXP (XEXP (op, 0), 1)) == CONST_INT)
3776 op = XEXP (XEXP (op, 0), 0);
3777 return GET_CODE (op) == SYMBOL_REF;
3780 /* Match exactly zero and one. */
3783 const0_operand (rtx op, enum machine_mode mode)
3785 return op == CONST0_RTX (mode);
3789 const1_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
3791 return op == const1_rtx;
3794 /* Match 2, 4, or 8. Used for leal multiplicands. */
3797 const248_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
3799 return (GET_CODE (op) == CONST_INT
3800 && (INTVAL (op) == 2 || INTVAL (op) == 4 || INTVAL (op) == 8));
3804 const_0_to_3_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
3806 return (GET_CODE (op) == CONST_INT && INTVAL (op) >= 0 && INTVAL (op) < 4);
3810 const_0_to_7_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
3812 return (GET_CODE (op) == CONST_INT && INTVAL (op) >= 0 && INTVAL (op) < 8);
3816 const_0_to_15_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
3818 return (GET_CODE (op) == CONST_INT && INTVAL (op) >= 0 && INTVAL (op) < 16);
3822 const_0_to_255_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
3824 return (GET_CODE (op) == CONST_INT && INTVAL (op) >= 0 && INTVAL (op) < 256);
3828 /* True if this is a constant appropriate for an increment or decrement. */
3831 incdec_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
3833 /* On Pentium4, the inc and dec operations causes extra dependency on flag
3834 registers, since carry flag is not set. */
3835 if (TARGET_PENTIUM4 && !optimize_size)
3837 return op == const1_rtx || op == constm1_rtx;
3840 /* Return nonzero if OP is acceptable as operand of DImode shift
3844 shiftdi_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
3847 return nonimmediate_operand (op, mode);
3849 return register_operand (op, mode);
3852 /* Return false if this is the stack pointer, or any other fake
3853 register eliminable to the stack pointer. Otherwise, this is
3856 This is used to prevent esp from being used as an index reg.
3857 Which would only happen in pathological cases. */
3860 reg_no_sp_operand (rtx op, enum machine_mode mode)
3863 if (GET_CODE (t) == SUBREG)
3865 if (t == stack_pointer_rtx || t == arg_pointer_rtx || t == frame_pointer_rtx)
3868 return register_operand (op, mode);
3872 mmx_reg_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
3874 return MMX_REG_P (op);
3877 /* Return false if this is any eliminable register. Otherwise
3881 general_no_elim_operand (rtx op, enum machine_mode mode)
3884 if (GET_CODE (t) == SUBREG)
3886 if (t == arg_pointer_rtx || t == frame_pointer_rtx
3887 || t == virtual_incoming_args_rtx || t == virtual_stack_vars_rtx
3888 || t == virtual_stack_dynamic_rtx)
3891 && REGNO (t) >= FIRST_VIRTUAL_REGISTER
3892 && REGNO (t) <= LAST_VIRTUAL_REGISTER)
3895 return general_operand (op, mode);
3898 /* Return false if this is any eliminable register. Otherwise
3899 register_operand or const_int. */
3902 nonmemory_no_elim_operand (rtx op, enum machine_mode mode)
3905 if (GET_CODE (t) == SUBREG)
3907 if (t == arg_pointer_rtx || t == frame_pointer_rtx
3908 || t == virtual_incoming_args_rtx || t == virtual_stack_vars_rtx
3909 || t == virtual_stack_dynamic_rtx)
3912 return GET_CODE (op) == CONST_INT || register_operand (op, mode);
3915 /* Return false if this is any eliminable register or stack register,
3916 otherwise work like register_operand. */
3919 index_register_operand (rtx op, enum machine_mode mode)
3922 if (GET_CODE (t) == SUBREG)
3926 if (t == arg_pointer_rtx
3927 || t == frame_pointer_rtx
3928 || t == virtual_incoming_args_rtx
3929 || t == virtual_stack_vars_rtx
3930 || t == virtual_stack_dynamic_rtx
3931 || REGNO (t) == STACK_POINTER_REGNUM)
3934 return general_operand (op, mode);
3937 /* Return true if op is a Q_REGS class register. */
3940 q_regs_operand (rtx op, enum machine_mode mode)
3942 if (mode != VOIDmode && GET_MODE (op) != mode)
3944 if (GET_CODE (op) == SUBREG)
3945 op = SUBREG_REG (op);
3946 return ANY_QI_REG_P (op);
3949 /* Return true if op is an flags register. */
3952 flags_reg_operand (rtx op, enum machine_mode mode)
3954 if (mode != VOIDmode && GET_MODE (op) != mode)
3956 return REG_P (op) && REGNO (op) == FLAGS_REG && GET_MODE (op) != VOIDmode;
3959 /* Return true if op is a NON_Q_REGS class register. */
3962 non_q_regs_operand (rtx op, enum machine_mode mode)
3964 if (mode != VOIDmode && GET_MODE (op) != mode)
3966 if (GET_CODE (op) == SUBREG)
3967 op = SUBREG_REG (op);
3968 return NON_QI_REG_P (op);
3972 zero_extended_scalar_load_operand (rtx op,
3973 enum machine_mode mode ATTRIBUTE_UNUSED)
3976 if (GET_CODE (op) != MEM)
3978 op = maybe_get_pool_constant (op);
3981 if (GET_CODE (op) != CONST_VECTOR)
3984 (GET_MODE_SIZE (GET_MODE (op)) /
3985 GET_MODE_SIZE (GET_MODE_INNER (GET_MODE (op))));
3986 for (n_elts--; n_elts > 0; n_elts--)
3988 rtx elt = CONST_VECTOR_ELT (op, n_elts);
3989 if (elt != CONST0_RTX (GET_MODE_INNER (GET_MODE (op))))
3995 /* Return 1 when OP is operand acceptable for standard SSE move. */
3997 vector_move_operand (rtx op, enum machine_mode mode)
3999 if (nonimmediate_operand (op, mode))
4001 if (GET_MODE (op) != mode && mode != VOIDmode)
4003 return (op == CONST0_RTX (GET_MODE (op)));
4006 /* Return true if op if a valid address, and does not contain
4007 a segment override. */
4010 no_seg_address_operand (rtx op, enum machine_mode mode)
4012 struct ix86_address parts;
4014 if (! address_operand (op, mode))
4017 if (! ix86_decompose_address (op, &parts))
4020 return parts.seg == SEG_DEFAULT;
4023 /* Return 1 if OP is a comparison that can be used in the CMPSS/CMPPS
4026 sse_comparison_operator (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
4028 enum rtx_code code = GET_CODE (op);
4031 /* Operations supported directly. */
4041 /* These are equivalent to ones above in non-IEEE comparisons. */
4048 return !TARGET_IEEE_FP;
4053 /* Return 1 if OP is a valid comparison operator in valid mode. */
4055 ix86_comparison_operator (rtx op, enum machine_mode mode)
4057 enum machine_mode inmode;
4058 enum rtx_code code = GET_CODE (op);
4059 if (mode != VOIDmode && GET_MODE (op) != mode)
4061 if (GET_RTX_CLASS (code) != '<')
4063 inmode = GET_MODE (XEXP (op, 0));
4065 if (inmode == CCFPmode || inmode == CCFPUmode)
4067 enum rtx_code second_code, bypass_code;
4068 ix86_fp_comparison_codes (code, &bypass_code, &code, &second_code);
4069 return (bypass_code == NIL && second_code == NIL);
4076 if (inmode == CCmode || inmode == CCGCmode
4077 || inmode == CCGOCmode || inmode == CCNOmode)
4080 case LTU: case GTU: case LEU: case ORDERED: case UNORDERED: case GEU:
4081 if (inmode == CCmode)
4085 if (inmode == CCmode || inmode == CCGCmode || inmode == CCNOmode)
4093 /* Return 1 if OP is a valid comparison operator testing carry flag
4096 ix86_carry_flag_operator (rtx op, enum machine_mode mode)
4098 enum machine_mode inmode;
4099 enum rtx_code code = GET_CODE (op);
4101 if (mode != VOIDmode && GET_MODE (op) != mode)
4103 if (GET_RTX_CLASS (code) != '<')
4105 inmode = GET_MODE (XEXP (op, 0));
4106 if (GET_CODE (XEXP (op, 0)) != REG
4107 || REGNO (XEXP (op, 0)) != 17
4108 || XEXP (op, 1) != const0_rtx)
4111 if (inmode == CCFPmode || inmode == CCFPUmode)
4113 enum rtx_code second_code, bypass_code;
4115 ix86_fp_comparison_codes (code, &bypass_code, &code, &second_code);
4116 if (bypass_code != NIL || second_code != NIL)
4118 code = ix86_fp_compare_code_to_integer (code);
4120 else if (inmode != CCmode)
4125 /* Return 1 if OP is a comparison operator that can be issued by fcmov. */
4128 fcmov_comparison_operator (rtx op, enum machine_mode mode)
4130 enum machine_mode inmode;
4131 enum rtx_code code = GET_CODE (op);
4133 if (mode != VOIDmode && GET_MODE (op) != mode)
4135 if (GET_RTX_CLASS (code) != '<')
4137 inmode = GET_MODE (XEXP (op, 0));
4138 if (inmode == CCFPmode || inmode == CCFPUmode)
4140 enum rtx_code second_code, bypass_code;
4142 ix86_fp_comparison_codes (code, &bypass_code, &code, &second_code);
4143 if (bypass_code != NIL || second_code != NIL)
4145 code = ix86_fp_compare_code_to_integer (code);
4147 /* i387 supports just limited amount of conditional codes. */
4150 case LTU: case GTU: case LEU: case GEU:
4151 if (inmode == CCmode || inmode == CCFPmode || inmode == CCFPUmode)
4154 case ORDERED: case UNORDERED:
4162 /* Return 1 if OP is a binary operator that can be promoted to wider mode. */
4165 promotable_binary_operator (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
4167 switch (GET_CODE (op))
4170 /* Modern CPUs have same latency for HImode and SImode multiply,
4171 but 386 and 486 do HImode multiply faster. */
4172 return ix86_tune > PROCESSOR_I486;
4184 /* Nearly general operand, but accept any const_double, since we wish
4185 to be able to drop them into memory rather than have them get pulled
4189 cmp_fp_expander_operand (rtx op, enum machine_mode mode)
4191 if (mode != VOIDmode && mode != GET_MODE (op))
4193 if (GET_CODE (op) == CONST_DOUBLE)
4195 return general_operand (op, mode);
4198 /* Match an SI or HImode register for a zero_extract. */
4201 ext_register_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
4204 if ((!TARGET_64BIT || GET_MODE (op) != DImode)
4205 && GET_MODE (op) != SImode && GET_MODE (op) != HImode)
4208 if (!register_operand (op, VOIDmode))
4211 /* Be careful to accept only registers having upper parts. */
4212 regno = REG_P (op) ? REGNO (op) : REGNO (SUBREG_REG (op));
4213 return (regno > LAST_VIRTUAL_REGISTER || regno < 4);
4216 /* Return 1 if this is a valid binary floating-point operation.
4217 OP is the expression matched, and MODE is its mode. */
4220 binary_fp_operator (rtx op, enum machine_mode mode)
4222 if (mode != VOIDmode && mode != GET_MODE (op))
4225 switch (GET_CODE (op))
4231 return GET_MODE_CLASS (GET_MODE (op)) == MODE_FLOAT;
4239 mult_operator (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
4241 return GET_CODE (op) == MULT;
4245 div_operator (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
4247 return GET_CODE (op) == DIV;
4251 arith_or_logical_operator (rtx op, enum machine_mode mode)
4253 return ((mode == VOIDmode || GET_MODE (op) == mode)
4254 && (GET_RTX_CLASS (GET_CODE (op)) == 'c'
4255 || GET_RTX_CLASS (GET_CODE (op)) == '2'));
4258 /* Returns 1 if OP is memory operand with a displacement. */
4261 memory_displacement_operand (rtx op, enum machine_mode mode)
4263 struct ix86_address parts;
4265 if (! memory_operand (op, mode))
4268 if (! ix86_decompose_address (XEXP (op, 0), &parts))
4271 return parts.disp != NULL_RTX;
4274 /* To avoid problems when jump re-emits comparisons like testqi_ext_ccno_0,
4275 re-recognize the operand to avoid a copy_to_mode_reg that will fail.
4277 ??? It seems likely that this will only work because cmpsi is an
4278 expander, and no actual insns use this. */
4281 cmpsi_operand (rtx op, enum machine_mode mode)
4283 if (nonimmediate_operand (op, mode))
4286 if (GET_CODE (op) == AND
4287 && GET_MODE (op) == SImode
4288 && GET_CODE (XEXP (op, 0)) == ZERO_EXTRACT
4289 && GET_CODE (XEXP (XEXP (op, 0), 1)) == CONST_INT
4290 && GET_CODE (XEXP (XEXP (op, 0), 2)) == CONST_INT
4291 && INTVAL (XEXP (XEXP (op, 0), 1)) == 8
4292 && INTVAL (XEXP (XEXP (op, 0), 2)) == 8
4293 && GET_CODE (XEXP (op, 1)) == CONST_INT)
4299 /* Returns 1 if OP is memory operand that can not be represented by the
4303 long_memory_operand (rtx op, enum machine_mode mode)
4305 if (! memory_operand (op, mode))
4308 return memory_address_length (op) != 0;
4311 /* Return nonzero if the rtx is known aligned. */
4314 aligned_operand (rtx op, enum machine_mode mode)
4316 struct ix86_address parts;
4318 if (!general_operand (op, mode))
4321 /* Registers and immediate operands are always "aligned". */
4322 if (GET_CODE (op) != MEM)
4325 /* Don't even try to do any aligned optimizations with volatiles. */
4326 if (MEM_VOLATILE_P (op))
4331 /* Pushes and pops are only valid on the stack pointer. */
4332 if (GET_CODE (op) == PRE_DEC
4333 || GET_CODE (op) == POST_INC)
4336 /* Decode the address. */
4337 if (! ix86_decompose_address (op, &parts))
4340 /* Look for some component that isn't known to be aligned. */
4344 && REGNO_POINTER_ALIGN (REGNO (parts.index)) < 32)
4349 if (REGNO_POINTER_ALIGN (REGNO (parts.base)) < 32)
4354 if (GET_CODE (parts.disp) != CONST_INT
4355 || (INTVAL (parts.disp) & 3) != 0)
4359 /* Didn't find one -- this must be an aligned address. */
4364 compare_operator (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
4366 return GET_CODE (op) == COMPARE;
4369 /* Initialize the table of extra 80387 mathematical constants. */
4372 init_ext_80387_constants (void)
4374 static const char * cst[5] =
4376 "0.3010299956639811952256464283594894482", /* 0: fldlg2 */
4377 "0.6931471805599453094286904741849753009", /* 1: fldln2 */
4378 "1.4426950408889634073876517827983434472", /* 2: fldl2e */
4379 "3.3219280948873623478083405569094566090", /* 3: fldl2t */
4380 "3.1415926535897932385128089594061862044", /* 4: fldpi */
4384 for (i = 0; i < 5; i++)
4386 real_from_string (&ext_80387_constants_table[i], cst[i]);
4387 /* Ensure each constant is rounded to XFmode precision. */
4388 real_convert (&ext_80387_constants_table[i],
4389 XFmode, &ext_80387_constants_table[i]);
4392 ext_80387_constants_init = 1;
4395 /* Return true if the constant is something that can be loaded with
4396 a special instruction. */
4399 standard_80387_constant_p (rtx x)
4401 if (GET_CODE (x) != CONST_DOUBLE || !FLOAT_MODE_P (GET_MODE (x)))
4404 if (x == CONST0_RTX (GET_MODE (x)))
4406 if (x == CONST1_RTX (GET_MODE (x)))
4409 /* For XFmode constants, try to find a special 80387 instruction on
4410 those CPUs that benefit from them. */
4411 if (GET_MODE (x) == XFmode
4412 && x86_ext_80387_constants & TUNEMASK)
4417 if (! ext_80387_constants_init)
4418 init_ext_80387_constants ();
4420 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
4421 for (i = 0; i < 5; i++)
4422 if (real_identical (&r, &ext_80387_constants_table[i]))
4429 /* Return the opcode of the special instruction to be used to load
4433 standard_80387_constant_opcode (rtx x)
4435 switch (standard_80387_constant_p (x))
4455 /* Return the CONST_DOUBLE representing the 80387 constant that is
4456 loaded by the specified special instruction. The argument IDX
4457 matches the return value from standard_80387_constant_p. */
4460 standard_80387_constant_rtx (int idx)
4464 if (! ext_80387_constants_init)
4465 init_ext_80387_constants ();
4481 return CONST_DOUBLE_FROM_REAL_VALUE (ext_80387_constants_table[i],
4485 /* Return 1 if X is FP constant we can load to SSE register w/o using memory.
4488 standard_sse_constant_p (rtx x)
4490 if (x == const0_rtx)
4492 return (x == CONST0_RTX (GET_MODE (x)));
4495 /* Returns 1 if OP contains a symbol reference */
4498 symbolic_reference_mentioned_p (rtx op)
4503 if (GET_CODE (op) == SYMBOL_REF || GET_CODE (op) == LABEL_REF)
4506 fmt = GET_RTX_FORMAT (GET_CODE (op));
4507 for (i = GET_RTX_LENGTH (GET_CODE (op)) - 1; i >= 0; i--)
4513 for (j = XVECLEN (op, i) - 1; j >= 0; j--)
4514 if (symbolic_reference_mentioned_p (XVECEXP (op, i, j)))
4518 else if (fmt[i] == 'e' && symbolic_reference_mentioned_p (XEXP (op, i)))
4525 /* Return 1 if it is appropriate to emit `ret' instructions in the
4526 body of a function. Do this only if the epilogue is simple, needing a
4527 couple of insns. Prior to reloading, we can't tell how many registers
4528 must be saved, so return 0 then. Return 0 if there is no frame
4529 marker to de-allocate.
4531 If NON_SAVING_SETJMP is defined and true, then it is not possible
4532 for the epilogue to be simple, so return 0. This is a special case
4533 since NON_SAVING_SETJMP will not cause regs_ever_live to change
4534 until final, but jump_optimize may need to know sooner if a
4538 ix86_can_use_return_insn_p (void)
4540 struct ix86_frame frame;
4542 #ifdef NON_SAVING_SETJMP
4543 if (NON_SAVING_SETJMP && current_function_calls_setjmp)
4547 if (! reload_completed || frame_pointer_needed)
4550 /* Don't allow more than 32 pop, since that's all we can do
4551 with one instruction. */
4552 if (current_function_pops_args
4553 && current_function_args_size >= 32768)
4556 ix86_compute_frame_layout (&frame);
4557 return frame.to_allocate == 0 && frame.nregs == 0;
4560 /* Return 1 if VALUE can be stored in the sign extended immediate field. */
4562 x86_64_sign_extended_value (rtx value)
4564 switch (GET_CODE (value))
4566 /* CONST_DOUBLES never match, since HOST_BITS_PER_WIDE_INT is known
4567 to be at least 32 and this all acceptable constants are
4568 represented as CONST_INT. */
4570 if (HOST_BITS_PER_WIDE_INT == 32)
4574 HOST_WIDE_INT val = trunc_int_for_mode (INTVAL (value), DImode);
4575 return trunc_int_for_mode (val, SImode) == val;
4579 /* For certain code models, the symbolic references are known to fit.
4580 in CM_SMALL_PIC model we know it fits if it is local to the shared
4581 library. Don't count TLS SYMBOL_REFs here, since they should fit
4582 only if inside of UNSPEC handled below. */
4584 /* TLS symbols are not constant. */
4585 if (tls_symbolic_operand (value, Pmode))
4587 return (ix86_cmodel == CM_SMALL || ix86_cmodel == CM_KERNEL);
4589 /* For certain code models, the code is near as well. */
4591 return (ix86_cmodel == CM_SMALL || ix86_cmodel == CM_MEDIUM
4592 || ix86_cmodel == CM_KERNEL);
4594 /* We also may accept the offsetted memory references in certain special
4597 if (GET_CODE (XEXP (value, 0)) == UNSPEC)
4598 switch (XINT (XEXP (value, 0), 1))
4600 case UNSPEC_GOTPCREL:
4602 case UNSPEC_GOTNTPOFF:
4608 if (GET_CODE (XEXP (value, 0)) == PLUS)
4610 rtx op1 = XEXP (XEXP (value, 0), 0);
4611 rtx op2 = XEXP (XEXP (value, 0), 1);
4612 HOST_WIDE_INT offset;
4614 if (ix86_cmodel == CM_LARGE)
4616 if (GET_CODE (op2) != CONST_INT)
4618 offset = trunc_int_for_mode (INTVAL (op2), DImode);
4619 switch (GET_CODE (op1))
4622 /* For CM_SMALL assume that latest object is 16MB before
4623 end of 31bits boundary. We may also accept pretty
4624 large negative constants knowing that all objects are
4625 in the positive half of address space. */
4626 if (ix86_cmodel == CM_SMALL
4627 && offset < 16*1024*1024
4628 && trunc_int_for_mode (offset, SImode) == offset)
4630 /* For CM_KERNEL we know that all object resist in the
4631 negative half of 32bits address space. We may not
4632 accept negative offsets, since they may be just off
4633 and we may accept pretty large positive ones. */
4634 if (ix86_cmodel == CM_KERNEL
4636 && trunc_int_for_mode (offset, SImode) == offset)
4640 /* These conditions are similar to SYMBOL_REF ones, just the
4641 constraints for code models differ. */
4642 if ((ix86_cmodel == CM_SMALL || ix86_cmodel == CM_MEDIUM)
4643 && offset < 16*1024*1024
4644 && trunc_int_for_mode (offset, SImode) == offset)
4646 if (ix86_cmodel == CM_KERNEL
4648 && trunc_int_for_mode (offset, SImode) == offset)
4652 switch (XINT (op1, 1))
4657 && trunc_int_for_mode (offset, SImode) == offset)
4671 /* Return 1 if VALUE can be stored in the zero extended immediate field. */
4673 x86_64_zero_extended_value (rtx value)
4675 switch (GET_CODE (value))
4678 if (HOST_BITS_PER_WIDE_INT == 32)
4679 return (GET_MODE (value) == VOIDmode
4680 && !CONST_DOUBLE_HIGH (value));
4684 if (HOST_BITS_PER_WIDE_INT == 32)
4685 return INTVAL (value) >= 0;
4687 return !(INTVAL (value) & ~(HOST_WIDE_INT) 0xffffffff);
4690 /* For certain code models, the symbolic references are known to fit. */
4692 /* TLS symbols are not constant. */
4693 if (tls_symbolic_operand (value, Pmode))
4695 return ix86_cmodel == CM_SMALL;
4697 /* For certain code models, the code is near as well. */
4699 return ix86_cmodel == CM_SMALL || ix86_cmodel == CM_MEDIUM;
4701 /* We also may accept the offsetted memory references in certain special
4704 if (GET_CODE (XEXP (value, 0)) == PLUS)
4706 rtx op1 = XEXP (XEXP (value, 0), 0);
4707 rtx op2 = XEXP (XEXP (value, 0), 1);
4709 if (ix86_cmodel == CM_LARGE)
4711 switch (GET_CODE (op1))
4715 /* For small code model we may accept pretty large positive
4716 offsets, since one bit is available for free. Negative
4717 offsets are limited by the size of NULL pointer area
4718 specified by the ABI. */
4719 if (ix86_cmodel == CM_SMALL
4720 && GET_CODE (op2) == CONST_INT
4721 && trunc_int_for_mode (INTVAL (op2), DImode) > -0x10000
4722 && (trunc_int_for_mode (INTVAL (op2), SImode)
4725 /* ??? For the kernel, we may accept adjustment of
4726 -0x10000000, since we know that it will just convert
4727 negative address space to positive, but perhaps this
4728 is not worthwhile. */
4731 /* These conditions are similar to SYMBOL_REF ones, just the
4732 constraints for code models differ. */
4733 if ((ix86_cmodel == CM_SMALL || ix86_cmodel == CM_MEDIUM)
4734 && GET_CODE (op2) == CONST_INT
4735 && trunc_int_for_mode (INTVAL (op2), DImode) > -0x10000
4736 && (trunc_int_for_mode (INTVAL (op2), SImode)
4750 /* Value should be nonzero if functions must have frame pointers.
4751 Zero means the frame pointer need not be set up (and parms may
4752 be accessed via the stack pointer) in functions that seem suitable. */
4755 ix86_frame_pointer_required (void)
4757 /* If we accessed previous frames, then the generated code expects
4758 to be able to access the saved ebp value in our frame. */
4759 if (cfun->machine->accesses_prev_frame)
4762 /* Several x86 os'es need a frame pointer for other reasons,
4763 usually pertaining to setjmp. */
4764 if (SUBTARGET_FRAME_POINTER_REQUIRED)
4767 /* In override_options, TARGET_OMIT_LEAF_FRAME_POINTER turns off
4768 the frame pointer by default. Turn it back on now if we've not
4769 got a leaf function. */
4770 if (TARGET_OMIT_LEAF_FRAME_POINTER
4771 && (!current_function_is_leaf))
4774 if (current_function_profile)
4780 /* Record that the current function accesses previous call frames. */
4783 ix86_setup_frame_addresses (void)
4785 cfun->machine->accesses_prev_frame = 1;
4788 #if defined(HAVE_GAS_HIDDEN) && defined(SUPPORTS_ONE_ONLY)
4789 # define USE_HIDDEN_LINKONCE 1
4791 # define USE_HIDDEN_LINKONCE 0
4794 static int pic_labels_used;
4796 /* Fills in the label name that should be used for a pc thunk for
4797 the given register. */
4800 get_pc_thunk_name (char name[32], unsigned int regno)
4802 if (USE_HIDDEN_LINKONCE)
4803 sprintf (name, "__i686.get_pc_thunk.%s", reg_names[regno]);
4805 ASM_GENERATE_INTERNAL_LABEL (name, "LPR", regno);
4809 /* This function generates code for -fpic that loads %ebx with
4810 the return address of the caller and then returns. */
4813 ix86_file_end (void)
4818 for (regno = 0; regno < 8; ++regno)
4822 if (! ((pic_labels_used >> regno) & 1))
4825 get_pc_thunk_name (name, regno);
4827 if (USE_HIDDEN_LINKONCE)
4831 decl = build_decl (FUNCTION_DECL, get_identifier (name),
4833 TREE_PUBLIC (decl) = 1;
4834 TREE_STATIC (decl) = 1;
4835 DECL_ONE_ONLY (decl) = 1;
4837 (*targetm.asm_out.unique_section) (decl, 0);
4838 named_section (decl, NULL, 0);
4840 (*targetm.asm_out.globalize_label) (asm_out_file, name);
4841 fputs ("\t.hidden\t", asm_out_file);
4842 assemble_name (asm_out_file, name);
4843 fputc ('\n', asm_out_file);
4844 ASM_DECLARE_FUNCTION_NAME (asm_out_file, name, decl);
4849 ASM_OUTPUT_LABEL (asm_out_file, name);
4852 xops[0] = gen_rtx_REG (SImode, regno);
4853 xops[1] = gen_rtx_MEM (SImode, stack_pointer_rtx);
4854 output_asm_insn ("mov{l}\t{%1, %0|%0, %1}", xops);
4855 output_asm_insn ("ret", xops);
4858 if (NEED_INDICATE_EXEC_STACK)
4859 file_end_indicate_exec_stack ();
4862 /* Emit code for the SET_GOT patterns. */
4865 output_set_got (rtx dest)
4870 xops[1] = gen_rtx_SYMBOL_REF (Pmode, GOT_SYMBOL_NAME);
4872 if (! TARGET_DEEP_BRANCH_PREDICTION || !flag_pic)
4874 xops[2] = gen_rtx_LABEL_REF (Pmode, gen_label_rtx ());
4877 output_asm_insn ("mov{l}\t{%2, %0|%0, %2}", xops);
4879 output_asm_insn ("call\t%a2", xops);
4882 /* Output the "canonical" label name ("Lxx$pb") here too. This
4883 is what will be referred to by the Mach-O PIC subsystem. */
4884 ASM_OUTPUT_LABEL (asm_out_file, machopic_function_base_name ());
4886 (*targetm.asm_out.internal_label) (asm_out_file, "L",
4887 CODE_LABEL_NUMBER (XEXP (xops[2], 0)));
4890 output_asm_insn ("pop{l}\t%0", xops);
4895 get_pc_thunk_name (name, REGNO (dest));
4896 pic_labels_used |= 1 << REGNO (dest);
4898 xops[2] = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (name));
4899 xops[2] = gen_rtx_MEM (QImode, xops[2]);
4900 output_asm_insn ("call\t%X2", xops);
4903 if (!flag_pic || TARGET_DEEP_BRANCH_PREDICTION)
4904 output_asm_insn ("add{l}\t{%1, %0|%0, %1}", xops);
4905 else if (!TARGET_MACHO)
4906 output_asm_insn ("add{l}\t{%1+[.-%a2], %0|%0, %a1+(.-%a2)}", xops);
4911 /* Generate an "push" pattern for input ARG. */
4916 return gen_rtx_SET (VOIDmode,
4918 gen_rtx_PRE_DEC (Pmode,
4919 stack_pointer_rtx)),
4923 /* Return >= 0 if there is an unused call-clobbered register available
4924 for the entire function. */
4927 ix86_select_alt_pic_regnum (void)
4929 if (current_function_is_leaf && !current_function_profile)
4932 for (i = 2; i >= 0; --i)
4933 if (!regs_ever_live[i])
4937 return INVALID_REGNUM;
4940 /* Return 1 if we need to save REGNO. */
4942 ix86_save_reg (unsigned int regno, int maybe_eh_return)
4944 if (pic_offset_table_rtx
4945 && regno == REAL_PIC_OFFSET_TABLE_REGNUM
4946 && (regs_ever_live[REAL_PIC_OFFSET_TABLE_REGNUM]
4947 || current_function_profile
4948 || current_function_calls_eh_return
4949 || current_function_uses_const_pool))
4951 if (ix86_select_alt_pic_regnum () != INVALID_REGNUM)
4956 if (current_function_calls_eh_return && maybe_eh_return)
4961 unsigned test = EH_RETURN_DATA_REGNO (i);
4962 if (test == INVALID_REGNUM)
4969 return (regs_ever_live[regno]
4970 && !call_used_regs[regno]
4971 && !fixed_regs[regno]
4972 && (regno != HARD_FRAME_POINTER_REGNUM || !frame_pointer_needed));
4975 /* Return number of registers to be saved on the stack. */
4978 ix86_nsaved_regs (void)
4983 for (regno = FIRST_PSEUDO_REGISTER - 1; regno >= 0; regno--)
4984 if (ix86_save_reg (regno, true))
4989 /* Return the offset between two registers, one to be eliminated, and the other
4990 its replacement, at the start of a routine. */
4993 ix86_initial_elimination_offset (int from, int to)
4995 struct ix86_frame frame;
4996 ix86_compute_frame_layout (&frame);
4998 if (from == ARG_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
4999 return frame.hard_frame_pointer_offset;
5000 else if (from == FRAME_POINTER_REGNUM
5001 && to == HARD_FRAME_POINTER_REGNUM)
5002 return frame.hard_frame_pointer_offset - frame.frame_pointer_offset;
5005 if (to != STACK_POINTER_REGNUM)
5007 else if (from == ARG_POINTER_REGNUM)
5008 return frame.stack_pointer_offset;
5009 else if (from != FRAME_POINTER_REGNUM)
5012 return frame.stack_pointer_offset - frame.frame_pointer_offset;
5016 /* Fill structure ix86_frame about frame of currently computed function. */
5019 ix86_compute_frame_layout (struct ix86_frame *frame)
5021 HOST_WIDE_INT total_size;
5022 int stack_alignment_needed = cfun->stack_alignment_needed / BITS_PER_UNIT;
5023 HOST_WIDE_INT offset;
5024 int preferred_alignment = cfun->preferred_stack_boundary / BITS_PER_UNIT;
5025 HOST_WIDE_INT size = get_frame_size ();
5027 frame->nregs = ix86_nsaved_regs ();
5030 /* During reload iteration the amount of registers saved can change.
5031 Recompute the value as needed. Do not recompute when amount of registers
5032 didn't change as reload does mutiple calls to the function and does not
5033 expect the decision to change within single iteration. */
5035 && cfun->machine->use_fast_prologue_epilogue_nregs != frame->nregs)
5037 int count = frame->nregs;
5039 cfun->machine->use_fast_prologue_epilogue_nregs = count;
5040 /* The fast prologue uses move instead of push to save registers. This
5041 is significantly longer, but also executes faster as modern hardware
5042 can execute the moves in parallel, but can't do that for push/pop.
5044 Be careful about choosing what prologue to emit: When function takes
5045 many instructions to execute we may use slow version as well as in
5046 case function is known to be outside hot spot (this is known with
5047 feedback only). Weight the size of function by number of registers
5048 to save as it is cheap to use one or two push instructions but very
5049 slow to use many of them. */
5051 count = (count - 1) * FAST_PROLOGUE_INSN_COUNT;
5052 if (cfun->function_frequency < FUNCTION_FREQUENCY_NORMAL
5053 || (flag_branch_probabilities
5054 && cfun->function_frequency < FUNCTION_FREQUENCY_HOT))
5055 cfun->machine->use_fast_prologue_epilogue = false;
5057 cfun->machine->use_fast_prologue_epilogue
5058 = !expensive_function_p (count);
5060 if (TARGET_PROLOGUE_USING_MOVE
5061 && cfun->machine->use_fast_prologue_epilogue)
5062 frame->save_regs_using_mov = true;
5064 frame->save_regs_using_mov = false;
5067 /* Skip return address and saved base pointer. */
5068 offset = frame_pointer_needed ? UNITS_PER_WORD * 2 : UNITS_PER_WORD;
5070 frame->hard_frame_pointer_offset = offset;
5072 /* Do some sanity checking of stack_alignment_needed and
5073 preferred_alignment, since i386 port is the only using those features
5074 that may break easily. */
5076 if (size && !stack_alignment_needed)
5078 if (preferred_alignment < STACK_BOUNDARY / BITS_PER_UNIT)
5080 if (preferred_alignment > PREFERRED_STACK_BOUNDARY / BITS_PER_UNIT)
5082 if (stack_alignment_needed > PREFERRED_STACK_BOUNDARY / BITS_PER_UNIT)
5085 if (stack_alignment_needed < STACK_BOUNDARY / BITS_PER_UNIT)
5086 stack_alignment_needed = STACK_BOUNDARY / BITS_PER_UNIT;
5088 /* Register save area */
5089 offset += frame->nregs * UNITS_PER_WORD;
5092 if (ix86_save_varrargs_registers)
5094 offset += X86_64_VARARGS_SIZE;
5095 frame->va_arg_size = X86_64_VARARGS_SIZE;
5098 frame->va_arg_size = 0;
5100 /* Align start of frame for local function. */
5101 frame->padding1 = ((offset + stack_alignment_needed - 1)
5102 & -stack_alignment_needed) - offset;
5104 offset += frame->padding1;
5106 /* Frame pointer points here. */
5107 frame->frame_pointer_offset = offset;
5111 /* Add outgoing arguments area. Can be skipped if we eliminated
5112 all the function calls as dead code.
5113 Skipping is however impossible when function calls alloca. Alloca
5114 expander assumes that last current_function_outgoing_args_size
5115 of stack frame are unused. */
5116 if (ACCUMULATE_OUTGOING_ARGS
5117 && (!current_function_is_leaf || current_function_calls_alloca))
5119 offset += current_function_outgoing_args_size;
5120 frame->outgoing_arguments_size = current_function_outgoing_args_size;
5123 frame->outgoing_arguments_size = 0;
5125 /* Align stack boundary. Only needed if we're calling another function
5127 if (!current_function_is_leaf || current_function_calls_alloca)
5128 frame->padding2 = ((offset + preferred_alignment - 1)
5129 & -preferred_alignment) - offset;
5131 frame->padding2 = 0;
5133 offset += frame->padding2;
5135 /* We've reached end of stack frame. */
5136 frame->stack_pointer_offset = offset;
5138 /* Size prologue needs to allocate. */
5139 frame->to_allocate =
5140 (size + frame->padding1 + frame->padding2
5141 + frame->outgoing_arguments_size + frame->va_arg_size);
5143 if ((!frame->to_allocate && frame->nregs <= 1)
5144 || (TARGET_64BIT && frame->to_allocate >= (HOST_WIDE_INT) 0x80000000))
5145 frame->save_regs_using_mov = false;
5147 if (TARGET_RED_ZONE && current_function_sp_is_unchanging
5148 && current_function_is_leaf)
5150 frame->red_zone_size = frame->to_allocate;
5151 if (frame->save_regs_using_mov)
5152 frame->red_zone_size += frame->nregs * UNITS_PER_WORD;
5153 if (frame->red_zone_size > RED_ZONE_SIZE - RED_ZONE_RESERVE)
5154 frame->red_zone_size = RED_ZONE_SIZE - RED_ZONE_RESERVE;
5157 frame->red_zone_size = 0;
5158 frame->to_allocate -= frame->red_zone_size;
5159 frame->stack_pointer_offset -= frame->red_zone_size;
5161 fprintf (stderr, "nregs: %i\n", frame->nregs);
5162 fprintf (stderr, "size: %i\n", size);
5163 fprintf (stderr, "alignment1: %i\n", stack_alignment_needed);
5164 fprintf (stderr, "padding1: %i\n", frame->padding1);
5165 fprintf (stderr, "va_arg: %i\n", frame->va_arg_size);
5166 fprintf (stderr, "padding2: %i\n", frame->padding2);
5167 fprintf (stderr, "to_allocate: %i\n", frame->to_allocate);
5168 fprintf (stderr, "red_zone_size: %i\n", frame->red_zone_size);
5169 fprintf (stderr, "frame_pointer_offset: %i\n", frame->frame_pointer_offset);
5170 fprintf (stderr, "hard_frame_pointer_offset: %i\n",
5171 frame->hard_frame_pointer_offset);
5172 fprintf (stderr, "stack_pointer_offset: %i\n", frame->stack_pointer_offset);
5176 /* Emit code to save registers in the prologue. */
5179 ix86_emit_save_regs (void)
5184 for (regno = FIRST_PSEUDO_REGISTER - 1; regno >= 0; regno--)
5185 if (ix86_save_reg (regno, true))
5187 insn = emit_insn (gen_push (gen_rtx_REG (Pmode, regno)));
5188 RTX_FRAME_RELATED_P (insn) = 1;
5192 /* Emit code to save registers using MOV insns. First register
5193 is restored from POINTER + OFFSET. */
5195 ix86_emit_save_regs_using_mov (rtx pointer, HOST_WIDE_INT offset)
5200 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
5201 if (ix86_save_reg (regno, true))
5203 insn = emit_move_insn (adjust_address (gen_rtx_MEM (Pmode, pointer),
5205 gen_rtx_REG (Pmode, regno));
5206 RTX_FRAME_RELATED_P (insn) = 1;
5207 offset += UNITS_PER_WORD;
5211 /* Expand prologue or epilogue stack adjustment.
5212 The pattern exist to put a dependency on all ebp-based memory accesses.
5213 STYLE should be negative if instructions should be marked as frame related,
5214 zero if %r11 register is live and cannot be freely used and positive
5218 pro_epilogue_adjust_stack (rtx dest, rtx src, rtx offset, int style)
5223 insn = emit_insn (gen_pro_epilogue_adjust_stack_1 (dest, src, offset));
5224 else if (x86_64_immediate_operand (offset, DImode))
5225 insn = emit_insn (gen_pro_epilogue_adjust_stack_rex64 (dest, src, offset));
5229 /* r11 is used by indirect sibcall return as well, set before the
5230 epilogue and used after the epilogue. ATM indirect sibcall
5231 shouldn't be used together with huge frame sizes in one
5232 function because of the frame_size check in sibcall.c. */
5235 r11 = gen_rtx_REG (DImode, FIRST_REX_INT_REG + 3 /* R11 */);
5236 insn = emit_insn (gen_rtx_SET (DImode, r11, offset));
5238 RTX_FRAME_RELATED_P (insn) = 1;
5239 insn = emit_insn (gen_pro_epilogue_adjust_stack_rex64_2 (dest, src, r11,
5243 RTX_FRAME_RELATED_P (insn) = 1;
5246 /* Expand the prologue into a bunch of separate insns. */
5249 ix86_expand_prologue (void)
5253 struct ix86_frame frame;
5254 HOST_WIDE_INT allocate;
5256 ix86_compute_frame_layout (&frame);
5258 /* Note: AT&T enter does NOT have reversed args. Enter is probably
5259 slower on all targets. Also sdb doesn't like it. */
5261 if (frame_pointer_needed)
5263 insn = emit_insn (gen_push (hard_frame_pointer_rtx));
5264 RTX_FRAME_RELATED_P (insn) = 1;
5266 insn = emit_move_insn (hard_frame_pointer_rtx, stack_pointer_rtx);
5267 RTX_FRAME_RELATED_P (insn) = 1;
5270 allocate = frame.to_allocate;
5272 if (!frame.save_regs_using_mov)
5273 ix86_emit_save_regs ();
5275 allocate += frame.nregs * UNITS_PER_WORD;
5277 /* When using red zone we may start register saving before allocating
5278 the stack frame saving one cycle of the prologue. */
5279 if (TARGET_RED_ZONE && frame.save_regs_using_mov)
5280 ix86_emit_save_regs_using_mov (frame_pointer_needed ? hard_frame_pointer_rtx
5281 : stack_pointer_rtx,
5282 -frame.nregs * UNITS_PER_WORD);
5286 else if (! TARGET_STACK_PROBE || allocate < CHECK_STACK_LIMIT)
5287 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
5288 GEN_INT (-allocate), -1);
5291 /* Only valid for Win32. */
5292 rtx eax = gen_rtx_REG (SImode, 0);
5293 bool eax_live = ix86_eax_live_at_start_p ();
5300 emit_insn (gen_push (eax));
5304 insn = emit_move_insn (eax, GEN_INT (allocate));
5305 RTX_FRAME_RELATED_P (insn) = 1;
5307 insn = emit_insn (gen_allocate_stack_worker (eax));
5308 RTX_FRAME_RELATED_P (insn) = 1;
5312 rtx t = plus_constant (stack_pointer_rtx, allocate);
5313 emit_move_insn (eax, gen_rtx_MEM (SImode, t));
5317 if (frame.save_regs_using_mov && !TARGET_RED_ZONE)
5319 if (!frame_pointer_needed || !frame.to_allocate)
5320 ix86_emit_save_regs_using_mov (stack_pointer_rtx, frame.to_allocate);
5322 ix86_emit_save_regs_using_mov (hard_frame_pointer_rtx,
5323 -frame.nregs * UNITS_PER_WORD);
5326 pic_reg_used = false;
5327 if (pic_offset_table_rtx
5328 && (regs_ever_live[REAL_PIC_OFFSET_TABLE_REGNUM]
5329 || current_function_profile))
5331 unsigned int alt_pic_reg_used = ix86_select_alt_pic_regnum ();
5333 if (alt_pic_reg_used != INVALID_REGNUM)
5334 REGNO (pic_offset_table_rtx) = alt_pic_reg_used;
5336 pic_reg_used = true;
5341 insn = emit_insn (gen_set_got (pic_offset_table_rtx));
5343 /* Even with accurate pre-reload life analysis, we can wind up
5344 deleting all references to the pic register after reload.
5345 Consider if cross-jumping unifies two sides of a branch
5346 controlled by a comparison vs the only read from a global.
5347 In which case, allow the set_got to be deleted, though we're
5348 too late to do anything about the ebx save in the prologue. */
5349 REG_NOTES (insn) = gen_rtx_EXPR_LIST (REG_MAYBE_DEAD, const0_rtx, NULL);
5352 /* Prevent function calls from be scheduled before the call to mcount.
5353 In the pic_reg_used case, make sure that the got load isn't deleted. */
5354 if (current_function_profile)
5355 emit_insn (gen_blockage (pic_reg_used ? pic_offset_table_rtx : const0_rtx));
5358 /* Emit code to restore saved registers using MOV insns. First register
5359 is restored from POINTER + OFFSET. */
5361 ix86_emit_restore_regs_using_mov (rtx pointer, HOST_WIDE_INT offset,
5362 int maybe_eh_return)
5365 rtx base_address = gen_rtx_MEM (Pmode, pointer);
5367 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
5368 if (ix86_save_reg (regno, maybe_eh_return))
5370 /* Ensure that adjust_address won't be forced to produce pointer
5371 out of range allowed by x86-64 instruction set. */
5372 if (TARGET_64BIT && offset != trunc_int_for_mode (offset, SImode))
5376 r11 = gen_rtx_REG (DImode, FIRST_REX_INT_REG + 3 /* R11 */);
5377 emit_move_insn (r11, GEN_INT (offset));
5378 emit_insn (gen_adddi3 (r11, r11, pointer));
5379 base_address = gen_rtx_MEM (Pmode, r11);
5382 emit_move_insn (gen_rtx_REG (Pmode, regno),
5383 adjust_address (base_address, Pmode, offset));
5384 offset += UNITS_PER_WORD;
5388 /* Restore function stack, frame, and registers. */
5391 ix86_expand_epilogue (int style)
5394 int sp_valid = !frame_pointer_needed || current_function_sp_is_unchanging;
5395 struct ix86_frame frame;
5396 HOST_WIDE_INT offset;
5398 ix86_compute_frame_layout (&frame);
5400 /* Calculate start of saved registers relative to ebp. Special care
5401 must be taken for the normal return case of a function using
5402 eh_return: the eax and edx registers are marked as saved, but not
5403 restored along this path. */
5404 offset = frame.nregs;
5405 if (current_function_calls_eh_return && style != 2)
5407 offset *= -UNITS_PER_WORD;
5409 /* If we're only restoring one register and sp is not valid then
5410 using a move instruction to restore the register since it's
5411 less work than reloading sp and popping the register.
5413 The default code result in stack adjustment using add/lea instruction,
5414 while this code results in LEAVE instruction (or discrete equivalent),
5415 so it is profitable in some other cases as well. Especially when there
5416 are no registers to restore. We also use this code when TARGET_USE_LEAVE
5417 and there is exactly one register to pop. This heuristic may need some
5418 tuning in future. */
5419 if ((!sp_valid && frame.nregs <= 1)
5420 || (TARGET_EPILOGUE_USING_MOVE
5421 && cfun->machine->use_fast_prologue_epilogue
5422 && (frame.nregs > 1 || frame.to_allocate))
5423 || (frame_pointer_needed && !frame.nregs && frame.to_allocate)
5424 || (frame_pointer_needed && TARGET_USE_LEAVE
5425 && cfun->machine->use_fast_prologue_epilogue
5426 && frame.nregs == 1)
5427 || current_function_calls_eh_return)
5429 /* Restore registers. We can use ebp or esp to address the memory
5430 locations. If both are available, default to ebp, since offsets
5431 are known to be small. Only exception is esp pointing directly to the
5432 end of block of saved registers, where we may simplify addressing
5435 if (!frame_pointer_needed || (sp_valid && !frame.to_allocate))
5436 ix86_emit_restore_regs_using_mov (stack_pointer_rtx,
5437 frame.to_allocate, style == 2);
5439 ix86_emit_restore_regs_using_mov (hard_frame_pointer_rtx,
5440 offset, style == 2);
5442 /* eh_return epilogues need %ecx added to the stack pointer. */
5445 rtx tmp, sa = EH_RETURN_STACKADJ_RTX;
5447 if (frame_pointer_needed)
5449 tmp = gen_rtx_PLUS (Pmode, hard_frame_pointer_rtx, sa);
5450 tmp = plus_constant (tmp, UNITS_PER_WORD);
5451 emit_insn (gen_rtx_SET (VOIDmode, sa, tmp));
5453 tmp = gen_rtx_MEM (Pmode, hard_frame_pointer_rtx);
5454 emit_move_insn (hard_frame_pointer_rtx, tmp);
5456 pro_epilogue_adjust_stack (stack_pointer_rtx, sa,
5461 tmp = gen_rtx_PLUS (Pmode, stack_pointer_rtx, sa);
5462 tmp = plus_constant (tmp, (frame.to_allocate
5463 + frame.nregs * UNITS_PER_WORD));
5464 emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx, tmp));
5467 else if (!frame_pointer_needed)
5468 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
5469 GEN_INT (frame.to_allocate
5470 + frame.nregs * UNITS_PER_WORD),
5472 /* If not an i386, mov & pop is faster than "leave". */
5473 else if (TARGET_USE_LEAVE || optimize_size
5474 || !cfun->machine->use_fast_prologue_epilogue)
5475 emit_insn (TARGET_64BIT ? gen_leave_rex64 () : gen_leave ());
5478 pro_epilogue_adjust_stack (stack_pointer_rtx,
5479 hard_frame_pointer_rtx,
5482 emit_insn (gen_popdi1 (hard_frame_pointer_rtx));
5484 emit_insn (gen_popsi1 (hard_frame_pointer_rtx));
5489 /* First step is to deallocate the stack frame so that we can
5490 pop the registers. */
5493 if (!frame_pointer_needed)
5495 pro_epilogue_adjust_stack (stack_pointer_rtx,
5496 hard_frame_pointer_rtx,
5497 GEN_INT (offset), style);
5499 else if (frame.to_allocate)
5500 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
5501 GEN_INT (frame.to_allocate), style);
5503 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
5504 if (ix86_save_reg (regno, false))
5507 emit_insn (gen_popdi1 (gen_rtx_REG (Pmode, regno)));
5509 emit_insn (gen_popsi1 (gen_rtx_REG (Pmode, regno)));
5511 if (frame_pointer_needed)
5513 /* Leave results in shorter dependency chains on CPUs that are
5514 able to grok it fast. */
5515 if (TARGET_USE_LEAVE)
5516 emit_insn (TARGET_64BIT ? gen_leave_rex64 () : gen_leave ());
5517 else if (TARGET_64BIT)
5518 emit_insn (gen_popdi1 (hard_frame_pointer_rtx));
5520 emit_insn (gen_popsi1 (hard_frame_pointer_rtx));
5524 /* Sibcall epilogues don't want a return instruction. */
5528 if (current_function_pops_args && current_function_args_size)
5530 rtx popc = GEN_INT (current_function_pops_args);
5532 /* i386 can only pop 64K bytes. If asked to pop more, pop
5533 return address, do explicit add, and jump indirectly to the
5536 if (current_function_pops_args >= 65536)
5538 rtx ecx = gen_rtx_REG (SImode, 2);
5540 /* There is no "pascal" calling convention in 64bit ABI. */
5544 emit_insn (gen_popsi1 (ecx));
5545 emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx, popc));
5546 emit_jump_insn (gen_return_indirect_internal (ecx));
5549 emit_jump_insn (gen_return_pop_internal (popc));
5552 emit_jump_insn (gen_return_internal ());
5555 /* Reset from the function's potential modifications. */
5558 ix86_output_function_epilogue (FILE *file ATTRIBUTE_UNUSED,
5559 HOST_WIDE_INT size ATTRIBUTE_UNUSED)
5561 if (pic_offset_table_rtx)
5562 REGNO (pic_offset_table_rtx) = REAL_PIC_OFFSET_TABLE_REGNUM;
5565 /* Extract the parts of an RTL expression that is a valid memory address
5566 for an instruction. Return 0 if the structure of the address is
5567 grossly off. Return -1 if the address contains ASHIFT, so it is not
5568 strictly valid, but still used for computing length of lea instruction. */
5571 ix86_decompose_address (rtx addr, struct ix86_address *out)
5573 rtx base = NULL_RTX;
5574 rtx index = NULL_RTX;
5575 rtx disp = NULL_RTX;
5576 HOST_WIDE_INT scale = 1;
5577 rtx scale_rtx = NULL_RTX;
5579 enum ix86_address_seg seg = SEG_DEFAULT;
5581 if (GET_CODE (addr) == REG || GET_CODE (addr) == SUBREG)
5583 else if (GET_CODE (addr) == PLUS)
5593 addends[n++] = XEXP (op, 1);
5596 while (GET_CODE (op) == PLUS);
5601 for (i = n; i >= 0; --i)
5604 switch (GET_CODE (op))
5609 index = XEXP (op, 0);
5610 scale_rtx = XEXP (op, 1);
5614 if (XINT (op, 1) == UNSPEC_TP
5615 && TARGET_TLS_DIRECT_SEG_REFS
5616 && seg == SEG_DEFAULT)
5617 seg = TARGET_64BIT ? SEG_FS : SEG_GS;
5646 else if (GET_CODE (addr) == MULT)
5648 index = XEXP (addr, 0); /* index*scale */
5649 scale_rtx = XEXP (addr, 1);
5651 else if (GET_CODE (addr) == ASHIFT)
5655 /* We're called for lea too, which implements ashift on occasion. */
5656 index = XEXP (addr, 0);
5657 tmp = XEXP (addr, 1);
5658 if (GET_CODE (tmp) != CONST_INT)
5660 scale = INTVAL (tmp);
5661 if ((unsigned HOST_WIDE_INT) scale > 3)
5667 disp = addr; /* displacement */
5669 /* Extract the integral value of scale. */
5672 if (GET_CODE (scale_rtx) != CONST_INT)
5674 scale = INTVAL (scale_rtx);
5677 /* Allow arg pointer and stack pointer as index if there is not scaling. */
5678 if (base && index && scale == 1
5679 && (index == arg_pointer_rtx
5680 || index == frame_pointer_rtx
5681 || (REG_P (index) && REGNO (index) == STACK_POINTER_REGNUM)))
5688 /* Special case: %ebp cannot be encoded as a base without a displacement. */
5689 if ((base == hard_frame_pointer_rtx
5690 || base == frame_pointer_rtx
5691 || base == arg_pointer_rtx) && !disp)
5694 /* Special case: on K6, [%esi] makes the instruction vector decoded.
5695 Avoid this by transforming to [%esi+0]. */
5696 if (ix86_tune == PROCESSOR_K6 && !optimize_size
5697 && base && !index && !disp
5699 && REGNO_REG_CLASS (REGNO (base)) == SIREG)
5702 /* Special case: encode reg+reg instead of reg*2. */
5703 if (!base && index && scale && scale == 2)
5704 base = index, scale = 1;
5706 /* Special case: scaling cannot be encoded without base or displacement. */
5707 if (!base && !disp && index && scale != 1)
5719 /* Return cost of the memory address x.
5720 For i386, it is better to use a complex address than let gcc copy
5721 the address into a reg and make a new pseudo. But not if the address
5722 requires to two regs - that would mean more pseudos with longer
5725 ix86_address_cost (rtx x)
5727 struct ix86_address parts;
5730 if (!ix86_decompose_address (x, &parts))
5733 /* More complex memory references are better. */
5734 if (parts.disp && parts.disp != const0_rtx)
5736 if (parts.seg != SEG_DEFAULT)
5739 /* Attempt to minimize number of registers in the address. */
5741 && (!REG_P (parts.base) || REGNO (parts.base) >= FIRST_PSEUDO_REGISTER))
5743 && (!REG_P (parts.index)
5744 || REGNO (parts.index) >= FIRST_PSEUDO_REGISTER)))
5748 && (!REG_P (parts.base) || REGNO (parts.base) >= FIRST_PSEUDO_REGISTER)
5750 && (!REG_P (parts.index) || REGNO (parts.index) >= FIRST_PSEUDO_REGISTER)
5751 && parts.base != parts.index)
5754 /* AMD-K6 don't like addresses with ModR/M set to 00_xxx_100b,
5755 since it's predecode logic can't detect the length of instructions
5756 and it degenerates to vector decoded. Increase cost of such
5757 addresses here. The penalty is minimally 2 cycles. It may be worthwhile
5758 to split such addresses or even refuse such addresses at all.
5760 Following addressing modes are affected:
5765 The first and last case may be avoidable by explicitly coding the zero in
5766 memory address, but I don't have AMD-K6 machine handy to check this
5770 && ((!parts.disp && parts.base && parts.index && parts.scale != 1)
5771 || (parts.disp && !parts.base && parts.index && parts.scale != 1)
5772 || (!parts.disp && parts.base && parts.index && parts.scale == 1)))
5778 /* If X is a machine specific address (i.e. a symbol or label being
5779 referenced as a displacement from the GOT implemented using an
5780 UNSPEC), then return the base term. Otherwise return X. */
5783 ix86_find_base_term (rtx x)
5789 if (GET_CODE (x) != CONST)
5792 if (GET_CODE (term) == PLUS
5793 && (GET_CODE (XEXP (term, 1)) == CONST_INT
5794 || GET_CODE (XEXP (term, 1)) == CONST_DOUBLE))
5795 term = XEXP (term, 0);
5796 if (GET_CODE (term) != UNSPEC
5797 || XINT (term, 1) != UNSPEC_GOTPCREL)
5800 term = XVECEXP (term, 0, 0);
5802 if (GET_CODE (term) != SYMBOL_REF
5803 && GET_CODE (term) != LABEL_REF)
5809 term = ix86_delegitimize_address (x);
5811 if (GET_CODE (term) != SYMBOL_REF
5812 && GET_CODE (term) != LABEL_REF)
5818 /* Determine if a given RTX is a valid constant. We already know this
5819 satisfies CONSTANT_P. */
5822 legitimate_constant_p (rtx x)
5824 switch (GET_CODE (x))
5829 if (GET_CODE (x) == PLUS)
5831 if (GET_CODE (XEXP (x, 1)) != CONST_INT)
5836 /* Only some unspecs are valid as "constants". */
5837 if (GET_CODE (x) == UNSPEC)
5838 switch (XINT (x, 1))
5842 return local_exec_symbolic_operand (XVECEXP (x, 0, 0), Pmode);
5844 return local_dynamic_symbolic_operand (XVECEXP (x, 0, 0), Pmode);
5849 /* We must have drilled down to a symbol. */
5850 if (!symbolic_operand (x, Pmode))
5855 /* TLS symbols are never valid. */
5856 if (tls_symbolic_operand (x, Pmode))
5864 /* Otherwise we handle everything else in the move patterns. */
5868 /* Determine if it's legal to put X into the constant pool. This
5869 is not possible for the address of thread-local symbols, which
5870 is checked above. */
5873 ix86_cannot_force_const_mem (rtx x)
5875 return !legitimate_constant_p (x);
5878 /* Determine if a given RTX is a valid constant address. */
5881 constant_address_p (rtx x)
5883 return CONSTANT_P (x) && legitimate_address_p (Pmode, x, 1);
5886 /* Nonzero if the constant value X is a legitimate general operand
5887 when generating PIC code. It is given that flag_pic is on and
5888 that X satisfies CONSTANT_P or is a CONST_DOUBLE. */
5891 legitimate_pic_operand_p (rtx x)
5895 switch (GET_CODE (x))
5898 inner = XEXP (x, 0);
5900 /* Only some unspecs are valid as "constants". */
5901 if (GET_CODE (inner) == UNSPEC)
5902 switch (XINT (inner, 1))
5905 return local_exec_symbolic_operand (XVECEXP (inner, 0, 0), Pmode);
5913 return legitimate_pic_address_disp_p (x);
5920 /* Determine if a given CONST RTX is a valid memory displacement
5924 legitimate_pic_address_disp_p (rtx disp)
5928 /* In 64bit mode we can allow direct addresses of symbols and labels
5929 when they are not dynamic symbols. */
5932 /* TLS references should always be enclosed in UNSPEC. */
5933 if (tls_symbolic_operand (disp, GET_MODE (disp)))
5935 if (GET_CODE (disp) == SYMBOL_REF
5936 && ix86_cmodel == CM_SMALL_PIC
5937 && SYMBOL_REF_LOCAL_P (disp))
5939 if (GET_CODE (disp) == LABEL_REF)
5941 if (GET_CODE (disp) == CONST
5942 && GET_CODE (XEXP (disp, 0)) == PLUS)
5944 rtx op0 = XEXP (XEXP (disp, 0), 0);
5945 rtx op1 = XEXP (XEXP (disp, 0), 1);
5947 /* TLS references should always be enclosed in UNSPEC. */
5948 if (tls_symbolic_operand (op0, GET_MODE (op0)))
5950 if (((GET_CODE (op0) == SYMBOL_REF
5951 && ix86_cmodel == CM_SMALL_PIC
5952 && SYMBOL_REF_LOCAL_P (op0))
5953 || GET_CODE (op0) == LABEL_REF)
5954 && GET_CODE (op1) == CONST_INT
5955 && INTVAL (op1) < 16*1024*1024
5956 && INTVAL (op1) >= -16*1024*1024)
5960 if (GET_CODE (disp) != CONST)
5962 disp = XEXP (disp, 0);
5966 /* We are unsafe to allow PLUS expressions. This limit allowed distance
5967 of GOT tables. We should not need these anyway. */
5968 if (GET_CODE (disp) != UNSPEC
5969 || XINT (disp, 1) != UNSPEC_GOTPCREL)
5972 if (GET_CODE (XVECEXP (disp, 0, 0)) != SYMBOL_REF
5973 && GET_CODE (XVECEXP (disp, 0, 0)) != LABEL_REF)
5979 if (GET_CODE (disp) == PLUS)
5981 if (GET_CODE (XEXP (disp, 1)) != CONST_INT)
5983 disp = XEXP (disp, 0);
5987 /* Allow {LABEL | SYMBOL}_REF - SYMBOL_REF-FOR-PICBASE for Mach-O. */
5988 if (TARGET_MACHO && GET_CODE (disp) == MINUS)
5990 if (GET_CODE (XEXP (disp, 0)) == LABEL_REF
5991 || GET_CODE (XEXP (disp, 0)) == SYMBOL_REF)
5992 if (GET_CODE (XEXP (disp, 1)) == SYMBOL_REF)
5994 const char *sym_name = XSTR (XEXP (disp, 1), 0);
5995 if (! strcmp (sym_name, "<pic base>"))
6000 if (GET_CODE (disp) != UNSPEC)
6003 switch (XINT (disp, 1))
6008 return GET_CODE (XVECEXP (disp, 0, 0)) == SYMBOL_REF;
6010 if (GET_CODE (XVECEXP (disp, 0, 0)) == SYMBOL_REF
6011 || GET_CODE (XVECEXP (disp, 0, 0)) == LABEL_REF)
6012 return local_symbolic_operand (XVECEXP (disp, 0, 0), Pmode);
6014 case UNSPEC_GOTTPOFF:
6015 case UNSPEC_GOTNTPOFF:
6016 case UNSPEC_INDNTPOFF:
6019 return initial_exec_symbolic_operand (XVECEXP (disp, 0, 0), Pmode);
6021 return local_exec_symbolic_operand (XVECEXP (disp, 0, 0), Pmode);
6023 return local_dynamic_symbolic_operand (XVECEXP (disp, 0, 0), Pmode);
6029 /* GO_IF_LEGITIMATE_ADDRESS recognizes an RTL expression that is a valid
6030 memory address for an instruction. The MODE argument is the machine mode
6031 for the MEM expression that wants to use this address.
6033 It only recognizes address in canonical form. LEGITIMIZE_ADDRESS should
6034 convert common non-canonical forms to canonical form so that they will
6038 legitimate_address_p (enum machine_mode mode, rtx addr, int strict)
6040 struct ix86_address parts;
6041 rtx base, index, disp;
6042 HOST_WIDE_INT scale;
6043 const char *reason = NULL;
6044 rtx reason_rtx = NULL_RTX;
6046 if (TARGET_DEBUG_ADDR)
6049 "\n======\nGO_IF_LEGITIMATE_ADDRESS, mode = %s, strict = %d\n",
6050 GET_MODE_NAME (mode), strict);
6054 if (ix86_decompose_address (addr, &parts) <= 0)
6056 reason = "decomposition failed";
6061 index = parts.index;
6063 scale = parts.scale;
6065 /* Validate base register.
6067 Don't allow SUBREG's here, it can lead to spill failures when the base
6068 is one word out of a two word structure, which is represented internally
6075 if (GET_CODE (base) != REG)
6077 reason = "base is not a register";
6081 if (GET_MODE (base) != Pmode)
6083 reason = "base is not in Pmode";
6087 if ((strict && ! REG_OK_FOR_BASE_STRICT_P (base))
6088 || (! strict && ! REG_OK_FOR_BASE_NONSTRICT_P (base)))
6090 reason = "base is not valid";
6095 /* Validate index register.
6097 Don't allow SUBREG's here, it can lead to spill failures when the index
6098 is one word out of a two word structure, which is represented internally
6105 if (GET_CODE (index) != REG)
6107 reason = "index is not a register";
6111 if (GET_MODE (index) != Pmode)
6113 reason = "index is not in Pmode";
6117 if ((strict && ! REG_OK_FOR_INDEX_STRICT_P (index))
6118 || (! strict && ! REG_OK_FOR_INDEX_NONSTRICT_P (index)))
6120 reason = "index is not valid";
6125 /* Validate scale factor. */
6128 reason_rtx = GEN_INT (scale);
6131 reason = "scale without index";
6135 if (scale != 2 && scale != 4 && scale != 8)
6137 reason = "scale is not a valid multiplier";
6142 /* Validate displacement. */
6147 if (GET_CODE (disp) == CONST
6148 && GET_CODE (XEXP (disp, 0)) == UNSPEC)
6149 switch (XINT (XEXP (disp, 0), 1))
6153 case UNSPEC_GOTPCREL:
6156 goto is_legitimate_pic;
6158 case UNSPEC_GOTTPOFF:
6159 case UNSPEC_GOTNTPOFF:
6160 case UNSPEC_INDNTPOFF:
6166 reason = "invalid address unspec";
6170 else if (flag_pic && (SYMBOLIC_CONST (disp)
6172 && !machopic_operand_p (disp)
6177 if (TARGET_64BIT && (index || base))
6179 /* foo@dtpoff(%rX) is ok. */
6180 if (GET_CODE (disp) != CONST
6181 || GET_CODE (XEXP (disp, 0)) != PLUS
6182 || GET_CODE (XEXP (XEXP (disp, 0), 0)) != UNSPEC
6183 || GET_CODE (XEXP (XEXP (disp, 0), 1)) != CONST_INT
6184 || (XINT (XEXP (XEXP (disp, 0), 0), 1) != UNSPEC_DTPOFF
6185 && XINT (XEXP (XEXP (disp, 0), 0), 1) != UNSPEC_NTPOFF))
6187 reason = "non-constant pic memory reference";
6191 else if (! legitimate_pic_address_disp_p (disp))
6193 reason = "displacement is an invalid pic construct";
6197 /* This code used to verify that a symbolic pic displacement
6198 includes the pic_offset_table_rtx register.
6200 While this is good idea, unfortunately these constructs may
6201 be created by "adds using lea" optimization for incorrect
6210 This code is nonsensical, but results in addressing
6211 GOT table with pic_offset_table_rtx base. We can't
6212 just refuse it easily, since it gets matched by
6213 "addsi3" pattern, that later gets split to lea in the
6214 case output register differs from input. While this
6215 can be handled by separate addsi pattern for this case
6216 that never results in lea, this seems to be easier and
6217 correct fix for crash to disable this test. */
6219 else if (GET_CODE (disp) != LABEL_REF
6220 && GET_CODE (disp) != CONST_INT
6221 && (GET_CODE (disp) != CONST
6222 || !legitimate_constant_p (disp))
6223 && (GET_CODE (disp) != SYMBOL_REF
6224 || !legitimate_constant_p (disp)))
6226 reason = "displacement is not constant";
6229 else if (TARGET_64BIT && !x86_64_sign_extended_value (disp))
6231 reason = "displacement is out of range";
6236 /* Everything looks valid. */
6237 if (TARGET_DEBUG_ADDR)
6238 fprintf (stderr, "Success.\n");
6242 if (TARGET_DEBUG_ADDR)
6244 fprintf (stderr, "Error: %s\n", reason);
6245 debug_rtx (reason_rtx);
6250 /* Return an unique alias set for the GOT. */
6252 static HOST_WIDE_INT
6253 ix86_GOT_alias_set (void)
6255 static HOST_WIDE_INT set = -1;
6257 set = new_alias_set ();
6261 /* Return a legitimate reference for ORIG (an address) using the
6262 register REG. If REG is 0, a new pseudo is generated.
6264 There are two types of references that must be handled:
6266 1. Global data references must load the address from the GOT, via
6267 the PIC reg. An insn is emitted to do this load, and the reg is
6270 2. Static data references, constant pool addresses, and code labels
6271 compute the address as an offset from the GOT, whose base is in
6272 the PIC reg. Static data objects have SYMBOL_FLAG_LOCAL set to
6273 differentiate them from global data objects. The returned
6274 address is the PIC reg + an unspec constant.
6276 GO_IF_LEGITIMATE_ADDRESS rejects symbolic references unless the PIC
6277 reg also appears in the address. */
6280 legitimize_pic_address (rtx orig, rtx reg)
6288 reg = gen_reg_rtx (Pmode);
6289 /* Use the generic Mach-O PIC machinery. */
6290 return machopic_legitimize_pic_address (orig, GET_MODE (orig), reg);
6293 if (TARGET_64BIT && legitimate_pic_address_disp_p (addr))
6295 else if (!TARGET_64BIT && local_symbolic_operand (addr, Pmode))
6297 /* This symbol may be referenced via a displacement from the PIC
6298 base address (@GOTOFF). */
6300 if (reload_in_progress)
6301 regs_ever_live[PIC_OFFSET_TABLE_REGNUM] = 1;
6302 if (GET_CODE (addr) == CONST)
6303 addr = XEXP (addr, 0);
6304 if (GET_CODE (addr) == PLUS)
6306 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, XEXP (addr, 0)), UNSPEC_GOTOFF);
6307 new = gen_rtx_PLUS (Pmode, new, XEXP (addr, 1));
6310 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTOFF);
6311 new = gen_rtx_CONST (Pmode, new);
6312 new = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new);
6316 emit_move_insn (reg, new);
6320 else if (GET_CODE (addr) == SYMBOL_REF)
6324 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTPCREL);
6325 new = gen_rtx_CONST (Pmode, new);
6326 new = gen_rtx_MEM (Pmode, new);
6327 RTX_UNCHANGING_P (new) = 1;
6328 set_mem_alias_set (new, ix86_GOT_alias_set ());
6331 reg = gen_reg_rtx (Pmode);
6332 /* Use directly gen_movsi, otherwise the address is loaded
6333 into register for CSE. We don't want to CSE this addresses,
6334 instead we CSE addresses from the GOT table, so skip this. */
6335 emit_insn (gen_movsi (reg, new));
6340 /* This symbol must be referenced via a load from the
6341 Global Offset Table (@GOT). */
6343 if (reload_in_progress)
6344 regs_ever_live[PIC_OFFSET_TABLE_REGNUM] = 1;
6345 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOT);
6346 new = gen_rtx_CONST (Pmode, new);
6347 new = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new);
6348 new = gen_rtx_MEM (Pmode, new);
6349 RTX_UNCHANGING_P (new) = 1;
6350 set_mem_alias_set (new, ix86_GOT_alias_set ());
6353 reg = gen_reg_rtx (Pmode);
6354 emit_move_insn (reg, new);
6360 if (GET_CODE (addr) == CONST)
6362 addr = XEXP (addr, 0);
6364 /* We must match stuff we generate before. Assume the only
6365 unspecs that can get here are ours. Not that we could do
6366 anything with them anyway.... */
6367 if (GET_CODE (addr) == UNSPEC
6368 || (GET_CODE (addr) == PLUS
6369 && GET_CODE (XEXP (addr, 0)) == UNSPEC))
6371 if (GET_CODE (addr) != PLUS)
6374 if (GET_CODE (addr) == PLUS)
6376 rtx op0 = XEXP (addr, 0), op1 = XEXP (addr, 1);
6378 /* Check first to see if this is a constant offset from a @GOTOFF
6379 symbol reference. */
6380 if (local_symbolic_operand (op0, Pmode)
6381 && GET_CODE (op1) == CONST_INT)
6385 if (reload_in_progress)
6386 regs_ever_live[PIC_OFFSET_TABLE_REGNUM] = 1;
6387 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, op0),
6389 new = gen_rtx_PLUS (Pmode, new, op1);
6390 new = gen_rtx_CONST (Pmode, new);
6391 new = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new);
6395 emit_move_insn (reg, new);
6401 if (INTVAL (op1) < -16*1024*1024
6402 || INTVAL (op1) >= 16*1024*1024)
6403 new = gen_rtx_PLUS (Pmode, op0, force_reg (Pmode, op1));
6408 base = legitimize_pic_address (XEXP (addr, 0), reg);
6409 new = legitimize_pic_address (XEXP (addr, 1),
6410 base == reg ? NULL_RTX : reg);
6412 if (GET_CODE (new) == CONST_INT)
6413 new = plus_constant (base, INTVAL (new));
6416 if (GET_CODE (new) == PLUS && CONSTANT_P (XEXP (new, 1)))
6418 base = gen_rtx_PLUS (Pmode, base, XEXP (new, 0));
6419 new = XEXP (new, 1);
6421 new = gen_rtx_PLUS (Pmode, base, new);
6429 /* Load the thread pointer. If TO_REG is true, force it into a register. */
6432 get_thread_pointer (int to_reg)
6436 tp = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const0_rtx), UNSPEC_TP);
6440 reg = gen_reg_rtx (Pmode);
6441 insn = gen_rtx_SET (VOIDmode, reg, tp);
6442 insn = emit_insn (insn);
6447 /* A subroutine of legitimize_address and ix86_expand_move. FOR_MOV is
6448 false if we expect this to be used for a memory address and true if
6449 we expect to load the address into a register. */
6452 legitimize_tls_address (rtx x, enum tls_model model, int for_mov)
6454 rtx dest, base, off, pic;
6459 case TLS_MODEL_GLOBAL_DYNAMIC:
6460 dest = gen_reg_rtx (Pmode);
6463 rtx rax = gen_rtx_REG (Pmode, 0), insns;
6466 emit_call_insn (gen_tls_global_dynamic_64 (rax, x));
6467 insns = get_insns ();
6470 emit_libcall_block (insns, dest, rax, x);
6473 emit_insn (gen_tls_global_dynamic_32 (dest, x));
6476 case TLS_MODEL_LOCAL_DYNAMIC:
6477 base = gen_reg_rtx (Pmode);
6480 rtx rax = gen_rtx_REG (Pmode, 0), insns, note;
6483 emit_call_insn (gen_tls_local_dynamic_base_64 (rax));
6484 insns = get_insns ();
6487 note = gen_rtx_EXPR_LIST (VOIDmode, const0_rtx, NULL);
6488 note = gen_rtx_EXPR_LIST (VOIDmode, ix86_tls_get_addr (), note);
6489 emit_libcall_block (insns, base, rax, note);
6492 emit_insn (gen_tls_local_dynamic_base_32 (base));
6494 off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x), UNSPEC_DTPOFF);
6495 off = gen_rtx_CONST (Pmode, off);
6497 return gen_rtx_PLUS (Pmode, base, off);
6499 case TLS_MODEL_INITIAL_EXEC:
6503 type = UNSPEC_GOTNTPOFF;
6507 if (reload_in_progress)
6508 regs_ever_live[PIC_OFFSET_TABLE_REGNUM] = 1;
6509 pic = pic_offset_table_rtx;
6510 type = TARGET_GNU_TLS ? UNSPEC_GOTNTPOFF : UNSPEC_GOTTPOFF;
6512 else if (!TARGET_GNU_TLS)
6514 pic = gen_reg_rtx (Pmode);
6515 emit_insn (gen_set_got (pic));
6516 type = UNSPEC_GOTTPOFF;
6521 type = UNSPEC_INDNTPOFF;
6524 off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x), type);
6525 off = gen_rtx_CONST (Pmode, off);
6527 off = gen_rtx_PLUS (Pmode, pic, off);
6528 off = gen_rtx_MEM (Pmode, off);
6529 RTX_UNCHANGING_P (off) = 1;
6530 set_mem_alias_set (off, ix86_GOT_alias_set ());
6532 if (TARGET_64BIT || TARGET_GNU_TLS)
6534 base = get_thread_pointer (for_mov || !TARGET_TLS_DIRECT_SEG_REFS);
6535 off = force_reg (Pmode, off);
6536 return gen_rtx_PLUS (Pmode, base, off);
6540 base = get_thread_pointer (true);
6541 dest = gen_reg_rtx (Pmode);
6542 emit_insn (gen_subsi3 (dest, base, off));
6546 case TLS_MODEL_LOCAL_EXEC:
6547 off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x),
6548 (TARGET_64BIT || TARGET_GNU_TLS)
6549 ? UNSPEC_NTPOFF : UNSPEC_TPOFF);
6550 off = gen_rtx_CONST (Pmode, off);
6552 if (TARGET_64BIT || TARGET_GNU_TLS)
6554 base = get_thread_pointer (for_mov || !TARGET_TLS_DIRECT_SEG_REFS);
6555 return gen_rtx_PLUS (Pmode, base, off);
6559 base = get_thread_pointer (true);
6560 dest = gen_reg_rtx (Pmode);
6561 emit_insn (gen_subsi3 (dest, base, off));
6572 /* Try machine-dependent ways of modifying an illegitimate address
6573 to be legitimate. If we find one, return the new, valid address.
6574 This macro is used in only one place: `memory_address' in explow.c.
6576 OLDX is the address as it was before break_out_memory_refs was called.
6577 In some cases it is useful to look at this to decide what needs to be done.
6579 MODE and WIN are passed so that this macro can use
6580 GO_IF_LEGITIMATE_ADDRESS.
6582 It is always safe for this macro to do nothing. It exists to recognize
6583 opportunities to optimize the output.
6585 For the 80386, we handle X+REG by loading X into a register R and
6586 using R+REG. R will go in a general reg and indexing will be used.
6587 However, if REG is a broken-out memory address or multiplication,
6588 nothing needs to be done because REG can certainly go in a general reg.
6590 When -fpic is used, special handling is needed for symbolic references.
6591 See comments by legitimize_pic_address in i386.c for details. */
6594 legitimize_address (rtx x, rtx oldx ATTRIBUTE_UNUSED, enum machine_mode mode)
6599 if (TARGET_DEBUG_ADDR)
6601 fprintf (stderr, "\n==========\nLEGITIMIZE_ADDRESS, mode = %s\n",
6602 GET_MODE_NAME (mode));
6606 log = tls_symbolic_operand (x, mode);
6608 return legitimize_tls_address (x, log, false);
6610 if (flag_pic && SYMBOLIC_CONST (x))
6611 return legitimize_pic_address (x, 0);
6613 /* Canonicalize shifts by 0, 1, 2, 3 into multiply */
6614 if (GET_CODE (x) == ASHIFT
6615 && GET_CODE (XEXP (x, 1)) == CONST_INT
6616 && (log = (unsigned) exact_log2 (INTVAL (XEXP (x, 1)))) < 4)
6619 x = gen_rtx_MULT (Pmode, force_reg (Pmode, XEXP (x, 0)),
6620 GEN_INT (1 << log));
6623 if (GET_CODE (x) == PLUS)
6625 /* Canonicalize shifts by 0, 1, 2, 3 into multiply. */
6627 if (GET_CODE (XEXP (x, 0)) == ASHIFT
6628 && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT
6629 && (log = (unsigned) exact_log2 (INTVAL (XEXP (XEXP (x, 0), 1)))) < 4)
6632 XEXP (x, 0) = gen_rtx_MULT (Pmode,
6633 force_reg (Pmode, XEXP (XEXP (x, 0), 0)),
6634 GEN_INT (1 << log));
6637 if (GET_CODE (XEXP (x, 1)) == ASHIFT
6638 && GET_CODE (XEXP (XEXP (x, 1), 1)) == CONST_INT
6639 && (log = (unsigned) exact_log2 (INTVAL (XEXP (XEXP (x, 1), 1)))) < 4)
6642 XEXP (x, 1) = gen_rtx_MULT (Pmode,
6643 force_reg (Pmode, XEXP (XEXP (x, 1), 0)),
6644 GEN_INT (1 << log));
6647 /* Put multiply first if it isn't already. */
6648 if (GET_CODE (XEXP (x, 1)) == MULT)
6650 rtx tmp = XEXP (x, 0);
6651 XEXP (x, 0) = XEXP (x, 1);
6656 /* Canonicalize (plus (mult (reg) (const)) (plus (reg) (const)))
6657 into (plus (plus (mult (reg) (const)) (reg)) (const)). This can be
6658 created by virtual register instantiation, register elimination, and
6659 similar optimizations. */
6660 if (GET_CODE (XEXP (x, 0)) == MULT && GET_CODE (XEXP (x, 1)) == PLUS)
6663 x = gen_rtx_PLUS (Pmode,
6664 gen_rtx_PLUS (Pmode, XEXP (x, 0),
6665 XEXP (XEXP (x, 1), 0)),
6666 XEXP (XEXP (x, 1), 1));
6670 (plus (plus (mult (reg) (const)) (plus (reg) (const))) const)
6671 into (plus (plus (mult (reg) (const)) (reg)) (const)). */
6672 else if (GET_CODE (x) == PLUS && GET_CODE (XEXP (x, 0)) == PLUS
6673 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
6674 && GET_CODE (XEXP (XEXP (x, 0), 1)) == PLUS
6675 && CONSTANT_P (XEXP (x, 1)))
6678 rtx other = NULL_RTX;
6680 if (GET_CODE (XEXP (x, 1)) == CONST_INT)
6682 constant = XEXP (x, 1);
6683 other = XEXP (XEXP (XEXP (x, 0), 1), 1);
6685 else if (GET_CODE (XEXP (XEXP (XEXP (x, 0), 1), 1)) == CONST_INT)
6687 constant = XEXP (XEXP (XEXP (x, 0), 1), 1);
6688 other = XEXP (x, 1);
6696 x = gen_rtx_PLUS (Pmode,
6697 gen_rtx_PLUS (Pmode, XEXP (XEXP (x, 0), 0),
6698 XEXP (XEXP (XEXP (x, 0), 1), 0)),
6699 plus_constant (other, INTVAL (constant)));
6703 if (changed && legitimate_address_p (mode, x, FALSE))
6706 if (GET_CODE (XEXP (x, 0)) == MULT)
6709 XEXP (x, 0) = force_operand (XEXP (x, 0), 0);
6712 if (GET_CODE (XEXP (x, 1)) == MULT)
6715 XEXP (x, 1) = force_operand (XEXP (x, 1), 0);
6719 && GET_CODE (XEXP (x, 1)) == REG
6720 && GET_CODE (XEXP (x, 0)) == REG)
6723 if (flag_pic && SYMBOLIC_CONST (XEXP (x, 1)))
6726 x = legitimize_pic_address (x, 0);
6729 if (changed && legitimate_address_p (mode, x, FALSE))
6732 if (GET_CODE (XEXP (x, 0)) == REG)
6734 rtx temp = gen_reg_rtx (Pmode);
6735 rtx val = force_operand (XEXP (x, 1), temp);
6737 emit_move_insn (temp, val);
6743 else if (GET_CODE (XEXP (x, 1)) == REG)
6745 rtx temp = gen_reg_rtx (Pmode);
6746 rtx val = force_operand (XEXP (x, 0), temp);
6748 emit_move_insn (temp, val);
6758 /* Print an integer constant expression in assembler syntax. Addition
6759 and subtraction are the only arithmetic that may appear in these
6760 expressions. FILE is the stdio stream to write to, X is the rtx, and
6761 CODE is the operand print code from the output string. */
6764 output_pic_addr_const (FILE *file, rtx x, int code)
6768 switch (GET_CODE (x))
6778 assemble_name (file, XSTR (x, 0));
6779 if (!TARGET_MACHO && code == 'P' && ! SYMBOL_REF_LOCAL_P (x))
6780 fputs ("@PLT", file);
6787 ASM_GENERATE_INTERNAL_LABEL (buf, "L", CODE_LABEL_NUMBER (x));
6788 assemble_name (asm_out_file, buf);
6792 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
6796 /* This used to output parentheses around the expression,
6797 but that does not work on the 386 (either ATT or BSD assembler). */
6798 output_pic_addr_const (file, XEXP (x, 0), code);
6802 if (GET_MODE (x) == VOIDmode)
6804 /* We can use %d if the number is <32 bits and positive. */
6805 if (CONST_DOUBLE_HIGH (x) || CONST_DOUBLE_LOW (x) < 0)
6806 fprintf (file, "0x%lx%08lx",
6807 (unsigned long) CONST_DOUBLE_HIGH (x),
6808 (unsigned long) CONST_DOUBLE_LOW (x));
6810 fprintf (file, HOST_WIDE_INT_PRINT_DEC, CONST_DOUBLE_LOW (x));
6813 /* We can't handle floating point constants;
6814 PRINT_OPERAND must handle them. */
6815 output_operand_lossage ("floating constant misused");
6819 /* Some assemblers need integer constants to appear first. */
6820 if (GET_CODE (XEXP (x, 0)) == CONST_INT)
6822 output_pic_addr_const (file, XEXP (x, 0), code);
6824 output_pic_addr_const (file, XEXP (x, 1), code);
6826 else if (GET_CODE (XEXP (x, 1)) == CONST_INT)
6828 output_pic_addr_const (file, XEXP (x, 1), code);
6830 output_pic_addr_const (file, XEXP (x, 0), code);
6838 putc (ASSEMBLER_DIALECT == ASM_INTEL ? '(' : '[', file);
6839 output_pic_addr_const (file, XEXP (x, 0), code);
6841 output_pic_addr_const (file, XEXP (x, 1), code);
6843 putc (ASSEMBLER_DIALECT == ASM_INTEL ? ')' : ']', file);
6847 if (XVECLEN (x, 0) != 1)
6849 output_pic_addr_const (file, XVECEXP (x, 0, 0), code);
6850 switch (XINT (x, 1))
6853 fputs ("@GOT", file);
6856 fputs ("@GOTOFF", file);
6858 case UNSPEC_GOTPCREL:
6859 fputs ("@GOTPCREL(%rip)", file);
6861 case UNSPEC_GOTTPOFF:
6862 /* FIXME: This might be @TPOFF in Sun ld too. */
6863 fputs ("@GOTTPOFF", file);
6866 fputs ("@TPOFF", file);
6870 fputs ("@TPOFF", file);
6872 fputs ("@NTPOFF", file);
6875 fputs ("@DTPOFF", file);
6877 case UNSPEC_GOTNTPOFF:
6879 fputs ("@GOTTPOFF(%rip)", file);
6881 fputs ("@GOTNTPOFF", file);
6883 case UNSPEC_INDNTPOFF:
6884 fputs ("@INDNTPOFF", file);
6887 output_operand_lossage ("invalid UNSPEC as operand");
6893 output_operand_lossage ("invalid expression as operand");
6897 /* This is called from dwarfout.c via ASM_OUTPUT_DWARF_ADDR_CONST.
6898 We need to handle our special PIC relocations. */
6901 i386_dwarf_output_addr_const (FILE *file, rtx x)
6904 fprintf (file, "%s", TARGET_64BIT ? ASM_QUAD : ASM_LONG);
6908 fprintf (file, "%s", ASM_LONG);
6911 output_pic_addr_const (file, x, '\0');
6913 output_addr_const (file, x);
6917 /* This is called from dwarf2out.c via ASM_OUTPUT_DWARF_DTPREL.
6918 We need to emit DTP-relative relocations. */
6921 i386_output_dwarf_dtprel (FILE *file, int size, rtx x)
6923 fputs (ASM_LONG, file);
6924 output_addr_const (file, x);
6925 fputs ("@DTPOFF", file);
6931 fputs (", 0", file);
6938 /* In the name of slightly smaller debug output, and to cater to
6939 general assembler losage, recognize PIC+GOTOFF and turn it back
6940 into a direct symbol reference. */
6943 ix86_delegitimize_address (rtx orig_x)
6947 if (GET_CODE (x) == MEM)
6952 if (GET_CODE (x) != CONST
6953 || GET_CODE (XEXP (x, 0)) != UNSPEC
6954 || XINT (XEXP (x, 0), 1) != UNSPEC_GOTPCREL
6955 || GET_CODE (orig_x) != MEM)
6957 return XVECEXP (XEXP (x, 0), 0, 0);
6960 if (GET_CODE (x) != PLUS
6961 || GET_CODE (XEXP (x, 1)) != CONST)
6964 if (GET_CODE (XEXP (x, 0)) == REG
6965 && REGNO (XEXP (x, 0)) == PIC_OFFSET_TABLE_REGNUM)
6966 /* %ebx + GOT/GOTOFF */
6968 else if (GET_CODE (XEXP (x, 0)) == PLUS)
6970 /* %ebx + %reg * scale + GOT/GOTOFF */
6972 if (GET_CODE (XEXP (y, 0)) == REG
6973 && REGNO (XEXP (y, 0)) == PIC_OFFSET_TABLE_REGNUM)
6975 else if (GET_CODE (XEXP (y, 1)) == REG
6976 && REGNO (XEXP (y, 1)) == PIC_OFFSET_TABLE_REGNUM)
6980 if (GET_CODE (y) != REG
6981 && GET_CODE (y) != MULT
6982 && GET_CODE (y) != ASHIFT)
6988 x = XEXP (XEXP (x, 1), 0);
6989 if (GET_CODE (x) == UNSPEC
6990 && ((XINT (x, 1) == UNSPEC_GOT && GET_CODE (orig_x) == MEM)
6991 || (XINT (x, 1) == UNSPEC_GOTOFF && GET_CODE (orig_x) != MEM)))
6994 return gen_rtx_PLUS (Pmode, y, XVECEXP (x, 0, 0));
6995 return XVECEXP (x, 0, 0);
6998 if (GET_CODE (x) == PLUS
6999 && GET_CODE (XEXP (x, 0)) == UNSPEC
7000 && GET_CODE (XEXP (x, 1)) == CONST_INT
7001 && ((XINT (XEXP (x, 0), 1) == UNSPEC_GOT && GET_CODE (orig_x) == MEM)
7002 || (XINT (XEXP (x, 0), 1) == UNSPEC_GOTOFF
7003 && GET_CODE (orig_x) != MEM)))
7005 x = gen_rtx_PLUS (VOIDmode, XVECEXP (XEXP (x, 0), 0, 0), XEXP (x, 1));
7007 return gen_rtx_PLUS (Pmode, y, x);
7015 put_condition_code (enum rtx_code code, enum machine_mode mode, int reverse,
7020 if (mode == CCFPmode || mode == CCFPUmode)
7022 enum rtx_code second_code, bypass_code;
7023 ix86_fp_comparison_codes (code, &bypass_code, &code, &second_code);
7024 if (bypass_code != NIL || second_code != NIL)
7026 code = ix86_fp_compare_code_to_integer (code);
7030 code = reverse_condition (code);
7041 if (mode != CCmode && mode != CCNOmode && mode != CCGCmode)
7046 /* ??? Use "nbe" instead of "a" for fcmov losage on some assemblers.
7047 Those same assemblers have the same but opposite losage on cmov. */
7050 suffix = fp ? "nbe" : "a";
7053 if (mode == CCNOmode || mode == CCGOCmode)
7055 else if (mode == CCmode || mode == CCGCmode)
7066 if (mode == CCNOmode || mode == CCGOCmode)
7068 else if (mode == CCmode || mode == CCGCmode)
7077 suffix = fp ? "nb" : "ae";
7080 if (mode != CCmode && mode != CCGCmode && mode != CCNOmode)
7090 suffix = fp ? "u" : "p";
7093 suffix = fp ? "nu" : "np";
7098 fputs (suffix, file);
7101 /* Print the name of register X to FILE based on its machine mode and number.
7102 If CODE is 'w', pretend the mode is HImode.
7103 If CODE is 'b', pretend the mode is QImode.
7104 If CODE is 'k', pretend the mode is SImode.
7105 If CODE is 'q', pretend the mode is DImode.
7106 If CODE is 'h', pretend the reg is the `high' byte register.
7107 If CODE is 'y', print "st(0)" instead of "st", if the reg is stack op. */
7110 print_reg (rtx x, int code, FILE *file)
7112 if (REGNO (x) == ARG_POINTER_REGNUM
7113 || REGNO (x) == FRAME_POINTER_REGNUM
7114 || REGNO (x) == FLAGS_REG
7115 || REGNO (x) == FPSR_REG)
7118 if (ASSEMBLER_DIALECT == ASM_ATT || USER_LABEL_PREFIX[0] == 0)
7121 if (code == 'w' || MMX_REG_P (x))
7123 else if (code == 'b')
7125 else if (code == 'k')
7127 else if (code == 'q')
7129 else if (code == 'y')
7131 else if (code == 'h')
7134 code = GET_MODE_SIZE (GET_MODE (x));
7136 /* Irritatingly, AMD extended registers use different naming convention
7137 from the normal registers. */
7138 if (REX_INT_REG_P (x))
7145 error ("extended registers have no high halves");
7148 fprintf (file, "r%ib", REGNO (x) - FIRST_REX_INT_REG + 8);
7151 fprintf (file, "r%iw", REGNO (x) - FIRST_REX_INT_REG + 8);
7154 fprintf (file, "r%id", REGNO (x) - FIRST_REX_INT_REG + 8);
7157 fprintf (file, "r%i", REGNO (x) - FIRST_REX_INT_REG + 8);
7160 error ("unsupported operand size for extended register");
7168 if (STACK_TOP_P (x))
7170 fputs ("st(0)", file);
7177 if (! ANY_FP_REG_P (x))
7178 putc (code == 8 && TARGET_64BIT ? 'r' : 'e', file);
7183 fputs (hi_reg_name[REGNO (x)], file);
7186 if (REGNO (x) >= ARRAY_SIZE (qi_reg_name))
7188 fputs (qi_reg_name[REGNO (x)], file);
7191 if (REGNO (x) >= ARRAY_SIZE (qi_high_reg_name))
7193 fputs (qi_high_reg_name[REGNO (x)], file);
7200 /* Locate some local-dynamic symbol still in use by this function
7201 so that we can print its name in some tls_local_dynamic_base
7205 get_some_local_dynamic_name (void)
7209 if (cfun->machine->some_ld_name)
7210 return cfun->machine->some_ld_name;
7212 for (insn = get_insns (); insn ; insn = NEXT_INSN (insn))
7214 && for_each_rtx (&PATTERN (insn), get_some_local_dynamic_name_1, 0))
7215 return cfun->machine->some_ld_name;
7221 get_some_local_dynamic_name_1 (rtx *px, void *data ATTRIBUTE_UNUSED)
7225 if (GET_CODE (x) == SYMBOL_REF
7226 && local_dynamic_symbolic_operand (x, Pmode))
7228 cfun->machine->some_ld_name = XSTR (x, 0);
7236 L,W,B,Q,S,T -- print the opcode suffix for specified size of operand.
7237 C -- print opcode suffix for set/cmov insn.
7238 c -- like C, but print reversed condition
7239 F,f -- likewise, but for floating-point.
7240 O -- if HAVE_AS_IX86_CMOV_SUN_SYNTAX, expand to "w.", "l." or "q.",
7242 R -- print the prefix for register names.
7243 z -- print the opcode suffix for the size of the current operand.
7244 * -- print a star (in certain assembler syntax)
7245 A -- print an absolute memory reference.
7246 w -- print the operand as if it's a "word" (HImode) even if it isn't.
7247 s -- print a shift double count, followed by the assemblers argument
7249 b -- print the QImode name of the register for the indicated operand.
7250 %b0 would print %al if operands[0] is reg 0.
7251 w -- likewise, print the HImode name of the register.
7252 k -- likewise, print the SImode name of the register.
7253 q -- likewise, print the DImode name of the register.
7254 h -- print the QImode name for a "high" register, either ah, bh, ch or dh.
7255 y -- print "st(0)" instead of "st" as a register.
7256 D -- print condition for SSE cmp instruction.
7257 P -- if PIC, print an @PLT suffix.
7258 X -- don't print any sort of PIC '@' suffix for a symbol.
7259 & -- print some in-use local-dynamic symbol name.
7263 print_operand (FILE *file, rtx x, int code)
7270 if (ASSEMBLER_DIALECT == ASM_ATT)
7275 assemble_name (file, get_some_local_dynamic_name ());
7279 if (ASSEMBLER_DIALECT == ASM_ATT)
7281 else if (ASSEMBLER_DIALECT == ASM_INTEL)
7283 /* Intel syntax. For absolute addresses, registers should not
7284 be surrounded by braces. */
7285 if (GET_CODE (x) != REG)
7288 PRINT_OPERAND (file, x, 0);
7296 PRINT_OPERAND (file, x, 0);
7301 if (ASSEMBLER_DIALECT == ASM_ATT)
7306 if (ASSEMBLER_DIALECT == ASM_ATT)
7311 if (ASSEMBLER_DIALECT == ASM_ATT)
7316 if (ASSEMBLER_DIALECT == ASM_ATT)
7321 if (ASSEMBLER_DIALECT == ASM_ATT)
7326 if (ASSEMBLER_DIALECT == ASM_ATT)
7331 /* 387 opcodes don't get size suffixes if the operands are
7333 if (STACK_REG_P (x))
7336 /* Likewise if using Intel opcodes. */
7337 if (ASSEMBLER_DIALECT == ASM_INTEL)
7340 /* This is the size of op from size of operand. */
7341 switch (GET_MODE_SIZE (GET_MODE (x)))
7344 #ifdef HAVE_GAS_FILDS_FISTS
7350 if (GET_MODE (x) == SFmode)
7365 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT)
7367 #ifdef GAS_MNEMONICS
7393 if (GET_CODE (x) == CONST_INT || ! SHIFT_DOUBLE_OMITS_COUNT)
7395 PRINT_OPERAND (file, x, 0);
7401 /* Little bit of braindamage here. The SSE compare instructions
7402 does use completely different names for the comparisons that the
7403 fp conditional moves. */
7404 switch (GET_CODE (x))
7419 fputs ("unord", file);
7423 fputs ("neq", file);
7427 fputs ("nlt", file);
7431 fputs ("nle", file);
7434 fputs ("ord", file);
7442 #ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
7443 if (ASSEMBLER_DIALECT == ASM_ATT)
7445 switch (GET_MODE (x))
7447 case HImode: putc ('w', file); break;
7449 case SFmode: putc ('l', file); break;
7451 case DFmode: putc ('q', file); break;
7459 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 0, 0, file);
7462 #ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
7463 if (ASSEMBLER_DIALECT == ASM_ATT)
7466 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 0, 1, file);
7469 /* Like above, but reverse condition */
7471 /* Check to see if argument to %c is really a constant
7472 and not a condition code which needs to be reversed. */
7473 if (GET_RTX_CLASS (GET_CODE (x)) != '<')
7475 output_operand_lossage ("operand is neither a constant nor a condition code, invalid operand code 'c'");
7478 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 1, 0, file);
7481 #ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
7482 if (ASSEMBLER_DIALECT == ASM_ATT)
7485 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 1, 1, file);
7491 if (!optimize || optimize_size || !TARGET_BRANCH_PREDICTION_HINTS)
7494 x = find_reg_note (current_output_insn, REG_BR_PROB, 0);
7497 int pred_val = INTVAL (XEXP (x, 0));
7499 if (pred_val < REG_BR_PROB_BASE * 45 / 100
7500 || pred_val > REG_BR_PROB_BASE * 55 / 100)
7502 int taken = pred_val > REG_BR_PROB_BASE / 2;
7503 int cputaken = final_forward_branch_p (current_output_insn) == 0;
7505 /* Emit hints only in the case default branch prediction
7506 heuristics would fail. */
7507 if (taken != cputaken)
7509 /* We use 3e (DS) prefix for taken branches and
7510 2e (CS) prefix for not taken branches. */
7512 fputs ("ds ; ", file);
7514 fputs ("cs ; ", file);
7521 output_operand_lossage ("invalid operand code `%c'", code);
7525 if (GET_CODE (x) == REG)
7526 print_reg (x, code, file);
7528 else if (GET_CODE (x) == MEM)
7530 /* No `byte ptr' prefix for call instructions. */
7531 if (ASSEMBLER_DIALECT == ASM_INTEL && code != 'X' && code != 'P')
7534 switch (GET_MODE_SIZE (GET_MODE (x)))
7536 case 1: size = "BYTE"; break;
7537 case 2: size = "WORD"; break;
7538 case 4: size = "DWORD"; break;
7539 case 8: size = "QWORD"; break;
7540 case 12: size = "XWORD"; break;
7541 case 16: size = "XMMWORD"; break;
7546 /* Check for explicit size override (codes 'b', 'w' and 'k') */
7549 else if (code == 'w')
7551 else if (code == 'k')
7555 fputs (" PTR ", file);
7559 /* Avoid (%rip) for call operands. */
7560 if (CONSTANT_ADDRESS_P (x) && code == 'P'
7561 && GET_CODE (x) != CONST_INT)
7562 output_addr_const (file, x);
7563 else if (this_is_asm_operands && ! address_operand (x, VOIDmode))
7564 output_operand_lossage ("invalid constraints for operand");
7569 else if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) == SFmode)
7574 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
7575 REAL_VALUE_TO_TARGET_SINGLE (r, l);
7577 if (ASSEMBLER_DIALECT == ASM_ATT)
7579 fprintf (file, "0x%08lx", l);
7582 /* These float cases don't actually occur as immediate operands. */
7583 else if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) == DFmode)
7587 real_to_decimal (dstr, CONST_DOUBLE_REAL_VALUE (x), sizeof (dstr), 0, 1);
7588 fprintf (file, "%s", dstr);
7591 else if (GET_CODE (x) == CONST_DOUBLE
7592 && GET_MODE (x) == XFmode)
7596 real_to_decimal (dstr, CONST_DOUBLE_REAL_VALUE (x), sizeof (dstr), 0, 1);
7597 fprintf (file, "%s", dstr);
7604 if (GET_CODE (x) == CONST_INT || GET_CODE (x) == CONST_DOUBLE)
7606 if (ASSEMBLER_DIALECT == ASM_ATT)
7609 else if (GET_CODE (x) == CONST || GET_CODE (x) == SYMBOL_REF
7610 || GET_CODE (x) == LABEL_REF)
7612 if (ASSEMBLER_DIALECT == ASM_ATT)
7615 fputs ("OFFSET FLAT:", file);
7618 if (GET_CODE (x) == CONST_INT)
7619 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
7621 output_pic_addr_const (file, x, code);
7623 output_addr_const (file, x);
7627 /* Print a memory operand whose address is ADDR. */
7630 print_operand_address (FILE *file, rtx addr)
7632 struct ix86_address parts;
7633 rtx base, index, disp;
7636 if (! ix86_decompose_address (addr, &parts))
7640 index = parts.index;
7642 scale = parts.scale;
7650 if (USER_LABEL_PREFIX[0] == 0)
7652 fputs ((parts.seg == SEG_FS ? "fs:" : "gs:"), file);
7658 if (!base && !index)
7660 /* Displacement only requires special attention. */
7662 if (GET_CODE (disp) == CONST_INT)
7664 if (ASSEMBLER_DIALECT == ASM_INTEL && parts.seg == SEG_DEFAULT)
7666 if (USER_LABEL_PREFIX[0] == 0)
7668 fputs ("ds:", file);
7670 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (disp));
7673 output_pic_addr_const (file, disp, 0);
7675 output_addr_const (file, disp);
7677 /* Use one byte shorter RIP relative addressing for 64bit mode. */
7679 && ((GET_CODE (disp) == SYMBOL_REF
7680 && ! tls_symbolic_operand (disp, GET_MODE (disp)))
7681 || GET_CODE (disp) == LABEL_REF
7682 || (GET_CODE (disp) == CONST
7683 && GET_CODE (XEXP (disp, 0)) == PLUS
7684 && (GET_CODE (XEXP (XEXP (disp, 0), 0)) == SYMBOL_REF
7685 || GET_CODE (XEXP (XEXP (disp, 0), 0)) == LABEL_REF)
7686 && GET_CODE (XEXP (XEXP (disp, 0), 1)) == CONST_INT)))
7687 fputs ("(%rip)", file);
7691 if (ASSEMBLER_DIALECT == ASM_ATT)
7696 output_pic_addr_const (file, disp, 0);
7697 else if (GET_CODE (disp) == LABEL_REF)
7698 output_asm_label (disp);
7700 output_addr_const (file, disp);
7705 print_reg (base, 0, file);
7709 print_reg (index, 0, file);
7711 fprintf (file, ",%d", scale);
7717 rtx offset = NULL_RTX;
7721 /* Pull out the offset of a symbol; print any symbol itself. */
7722 if (GET_CODE (disp) == CONST
7723 && GET_CODE (XEXP (disp, 0)) == PLUS
7724 && GET_CODE (XEXP (XEXP (disp, 0), 1)) == CONST_INT)
7726 offset = XEXP (XEXP (disp, 0), 1);
7727 disp = gen_rtx_CONST (VOIDmode,
7728 XEXP (XEXP (disp, 0), 0));
7732 output_pic_addr_const (file, disp, 0);
7733 else if (GET_CODE (disp) == LABEL_REF)
7734 output_asm_label (disp);
7735 else if (GET_CODE (disp) == CONST_INT)
7738 output_addr_const (file, disp);
7744 print_reg (base, 0, file);
7747 if (INTVAL (offset) >= 0)
7749 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (offset));
7753 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (offset));
7760 print_reg (index, 0, file);
7762 fprintf (file, "*%d", scale);
7770 output_addr_const_extra (FILE *file, rtx x)
7774 if (GET_CODE (x) != UNSPEC)
7777 op = XVECEXP (x, 0, 0);
7778 switch (XINT (x, 1))
7780 case UNSPEC_GOTTPOFF:
7781 output_addr_const (file, op);
7782 /* FIXME: This might be @TPOFF in Sun ld. */
7783 fputs ("@GOTTPOFF", file);
7786 output_addr_const (file, op);
7787 fputs ("@TPOFF", file);
7790 output_addr_const (file, op);
7792 fputs ("@TPOFF", file);
7794 fputs ("@NTPOFF", file);
7797 output_addr_const (file, op);
7798 fputs ("@DTPOFF", file);
7800 case UNSPEC_GOTNTPOFF:
7801 output_addr_const (file, op);
7803 fputs ("@GOTTPOFF(%rip)", file);
7805 fputs ("@GOTNTPOFF", file);
7807 case UNSPEC_INDNTPOFF:
7808 output_addr_const (file, op);
7809 fputs ("@INDNTPOFF", file);
7819 /* Split one or more DImode RTL references into pairs of SImode
7820 references. The RTL can be REG, offsettable MEM, integer constant, or
7821 CONST_DOUBLE. "operands" is a pointer to an array of DImode RTL to
7822 split and "num" is its length. lo_half and hi_half are output arrays
7823 that parallel "operands". */
7826 split_di (rtx operands[], int num, rtx lo_half[], rtx hi_half[])
7830 rtx op = operands[num];
7832 /* simplify_subreg refuse to split volatile memory addresses,
7833 but we still have to handle it. */
7834 if (GET_CODE (op) == MEM)
7836 lo_half[num] = adjust_address (op, SImode, 0);
7837 hi_half[num] = adjust_address (op, SImode, 4);
7841 lo_half[num] = simplify_gen_subreg (SImode, op,
7842 GET_MODE (op) == VOIDmode
7843 ? DImode : GET_MODE (op), 0);
7844 hi_half[num] = simplify_gen_subreg (SImode, op,
7845 GET_MODE (op) == VOIDmode
7846 ? DImode : GET_MODE (op), 4);
7850 /* Split one or more TImode RTL references into pairs of SImode
7851 references. The RTL can be REG, offsettable MEM, integer constant, or
7852 CONST_DOUBLE. "operands" is a pointer to an array of DImode RTL to
7853 split and "num" is its length. lo_half and hi_half are output arrays
7854 that parallel "operands". */
7857 split_ti (rtx operands[], int num, rtx lo_half[], rtx hi_half[])
7861 rtx op = operands[num];
7863 /* simplify_subreg refuse to split volatile memory addresses, but we
7864 still have to handle it. */
7865 if (GET_CODE (op) == MEM)
7867 lo_half[num] = adjust_address (op, DImode, 0);
7868 hi_half[num] = adjust_address (op, DImode, 8);
7872 lo_half[num] = simplify_gen_subreg (DImode, op, TImode, 0);
7873 hi_half[num] = simplify_gen_subreg (DImode, op, TImode, 8);
7878 /* Output code to perform a 387 binary operation in INSN, one of PLUS,
7879 MINUS, MULT or DIV. OPERANDS are the insn operands, where operands[3]
7880 is the expression of the binary operation. The output may either be
7881 emitted here, or returned to the caller, like all output_* functions.
7883 There is no guarantee that the operands are the same mode, as they
7884 might be within FLOAT or FLOAT_EXTEND expressions. */
7886 #ifndef SYSV386_COMPAT
7887 /* Set to 1 for compatibility with brain-damaged assemblers. No-one
7888 wants to fix the assemblers because that causes incompatibility
7889 with gcc. No-one wants to fix gcc because that causes
7890 incompatibility with assemblers... You can use the option of
7891 -DSYSV386_COMPAT=0 if you recompile both gcc and gas this way. */
7892 #define SYSV386_COMPAT 1
7896 output_387_binary_op (rtx insn, rtx *operands)
7898 static char buf[30];
7901 int is_sse = SSE_REG_P (operands[0]) | SSE_REG_P (operands[1]) | SSE_REG_P (operands[2]);
7903 #ifdef ENABLE_CHECKING
7904 /* Even if we do not want to check the inputs, this documents input
7905 constraints. Which helps in understanding the following code. */
7906 if (STACK_REG_P (operands[0])
7907 && ((REG_P (operands[1])
7908 && REGNO (operands[0]) == REGNO (operands[1])
7909 && (STACK_REG_P (operands[2]) || GET_CODE (operands[2]) == MEM))
7910 || (REG_P (operands[2])
7911 && REGNO (operands[0]) == REGNO (operands[2])
7912 && (STACK_REG_P (operands[1]) || GET_CODE (operands[1]) == MEM)))
7913 && (STACK_TOP_P (operands[1]) || STACK_TOP_P (operands[2])))
7919 switch (GET_CODE (operands[3]))
7922 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
7923 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
7931 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
7932 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
7940 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
7941 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
7949 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
7950 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
7964 if (GET_MODE (operands[0]) == SFmode)
7965 strcat (buf, "ss\t{%2, %0|%0, %2}");
7967 strcat (buf, "sd\t{%2, %0|%0, %2}");
7972 switch (GET_CODE (operands[3]))
7976 if (REG_P (operands[2]) && REGNO (operands[0]) == REGNO (operands[2]))
7978 rtx temp = operands[2];
7979 operands[2] = operands[1];
7983 /* know operands[0] == operands[1]. */
7985 if (GET_CODE (operands[2]) == MEM)
7991 if (find_regno_note (insn, REG_DEAD, REGNO (operands[2])))
7993 if (STACK_TOP_P (operands[0]))
7994 /* How is it that we are storing to a dead operand[2]?
7995 Well, presumably operands[1] is dead too. We can't
7996 store the result to st(0) as st(0) gets popped on this
7997 instruction. Instead store to operands[2] (which I
7998 think has to be st(1)). st(1) will be popped later.
7999 gcc <= 2.8.1 didn't have this check and generated
8000 assembly code that the Unixware assembler rejected. */
8001 p = "p\t{%0, %2|%2, %0}"; /* st(1) = st(0) op st(1); pop */
8003 p = "p\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0); pop */
8007 if (STACK_TOP_P (operands[0]))
8008 p = "\t{%y2, %0|%0, %y2}"; /* st(0) = st(0) op st(r2) */
8010 p = "\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0) */
8015 if (GET_CODE (operands[1]) == MEM)
8021 if (GET_CODE (operands[2]) == MEM)
8027 if (find_regno_note (insn, REG_DEAD, REGNO (operands[2])))
8030 /* The SystemV/386 SVR3.2 assembler, and probably all AT&T
8031 derived assemblers, confusingly reverse the direction of
8032 the operation for fsub{r} and fdiv{r} when the
8033 destination register is not st(0). The Intel assembler
8034 doesn't have this brain damage. Read !SYSV386_COMPAT to
8035 figure out what the hardware really does. */
8036 if (STACK_TOP_P (operands[0]))
8037 p = "{p\t%0, %2|rp\t%2, %0}";
8039 p = "{rp\t%2, %0|p\t%0, %2}";
8041 if (STACK_TOP_P (operands[0]))
8042 /* As above for fmul/fadd, we can't store to st(0). */
8043 p = "rp\t{%0, %2|%2, %0}"; /* st(1) = st(0) op st(1); pop */
8045 p = "p\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0); pop */
8050 if (find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
8053 if (STACK_TOP_P (operands[0]))
8054 p = "{rp\t%0, %1|p\t%1, %0}";
8056 p = "{p\t%1, %0|rp\t%0, %1}";
8058 if (STACK_TOP_P (operands[0]))
8059 p = "p\t{%0, %1|%1, %0}"; /* st(1) = st(1) op st(0); pop */
8061 p = "rp\t{%1, %0|%0, %1}"; /* st(r2) = st(0) op st(r2); pop */
8066 if (STACK_TOP_P (operands[0]))
8068 if (STACK_TOP_P (operands[1]))
8069 p = "\t{%y2, %0|%0, %y2}"; /* st(0) = st(0) op st(r2) */
8071 p = "r\t{%y1, %0|%0, %y1}"; /* st(0) = st(r1) op st(0) */
8074 else if (STACK_TOP_P (operands[1]))
8077 p = "{\t%1, %0|r\t%0, %1}";
8079 p = "r\t{%1, %0|%0, %1}"; /* st(r2) = st(0) op st(r2) */
8085 p = "{r\t%2, %0|\t%0, %2}";
8087 p = "\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0) */
8100 /* Output code to initialize control word copies used by
8101 trunc?f?i patterns. NORMAL is set to current control word, while ROUND_DOWN
8102 is set to control word rounding downwards. */
8104 emit_i387_cw_initialization (rtx normal, rtx round_down)
8106 rtx reg = gen_reg_rtx (HImode);
8108 emit_insn (gen_x86_fnstcw_1 (normal));
8109 emit_move_insn (reg, normal);
8110 if (!TARGET_PARTIAL_REG_STALL && !optimize_size
8112 emit_insn (gen_movsi_insv_1 (reg, GEN_INT (0xc)));
8114 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0xc00)));
8115 emit_move_insn (round_down, reg);
8118 /* Output code for INSN to convert a float to a signed int. OPERANDS
8119 are the insn operands. The output may be [HSD]Imode and the input
8120 operand may be [SDX]Fmode. */
8123 output_fix_trunc (rtx insn, rtx *operands)
8125 int stack_top_dies = find_regno_note (insn, REG_DEAD, FIRST_STACK_REG) != 0;
8126 int dimode_p = GET_MODE (operands[0]) == DImode;
8128 /* Jump through a hoop or two for DImode, since the hardware has no
8129 non-popping instruction. We used to do this a different way, but
8130 that was somewhat fragile and broke with post-reload splitters. */
8131 if (dimode_p && !stack_top_dies)
8132 output_asm_insn ("fld\t%y1", operands);
8134 if (!STACK_TOP_P (operands[1]))
8137 if (GET_CODE (operands[0]) != MEM)
8140 output_asm_insn ("fldcw\t%3", operands);
8141 if (stack_top_dies || dimode_p)
8142 output_asm_insn ("fistp%z0\t%0", operands);
8144 output_asm_insn ("fist%z0\t%0", operands);
8145 output_asm_insn ("fldcw\t%2", operands);
8150 /* Output code for INSN to compare OPERANDS. EFLAGS_P is 1 when fcomi
8151 should be used and 2 when fnstsw should be used. UNORDERED_P is true
8152 when fucom should be used. */
8155 output_fp_compare (rtx insn, rtx *operands, int eflags_p, int unordered_p)
8158 rtx cmp_op0 = operands[0];
8159 rtx cmp_op1 = operands[1];
8160 int is_sse = SSE_REG_P (operands[0]) | SSE_REG_P (operands[1]);
8165 cmp_op1 = operands[2];
8169 if (GET_MODE (operands[0]) == SFmode)
8171 return "ucomiss\t{%1, %0|%0, %1}";
8173 return "comiss\t{%1, %0|%0, %1}";
8176 return "ucomisd\t{%1, %0|%0, %1}";
8178 return "comisd\t{%1, %0|%0, %1}";
8181 if (! STACK_TOP_P (cmp_op0))
8184 stack_top_dies = find_regno_note (insn, REG_DEAD, FIRST_STACK_REG) != 0;
8186 if (STACK_REG_P (cmp_op1)
8188 && find_regno_note (insn, REG_DEAD, REGNO (cmp_op1))
8189 && REGNO (cmp_op1) != FIRST_STACK_REG)
8191 /* If both the top of the 387 stack dies, and the other operand
8192 is also a stack register that dies, then this must be a
8193 `fcompp' float compare */
8197 /* There is no double popping fcomi variant. Fortunately,
8198 eflags is immune from the fstp's cc clobbering. */
8200 output_asm_insn ("fucomip\t{%y1, %0|%0, %y1}", operands);
8202 output_asm_insn ("fcomip\t{%y1, %0|%0, %y1}", operands);
8210 return "fucompp\n\tfnstsw\t%0";
8212 return "fcompp\n\tfnstsw\t%0";
8225 /* Encoded here as eflags_p | intmode | unordered_p | stack_top_dies. */
8227 static const char * const alt[24] =
8239 "fcomi\t{%y1, %0|%0, %y1}",
8240 "fcomip\t{%y1, %0|%0, %y1}",
8241 "fucomi\t{%y1, %0|%0, %y1}",
8242 "fucomip\t{%y1, %0|%0, %y1}",
8249 "fcom%z2\t%y2\n\tfnstsw\t%0",
8250 "fcomp%z2\t%y2\n\tfnstsw\t%0",
8251 "fucom%z2\t%y2\n\tfnstsw\t%0",
8252 "fucomp%z2\t%y2\n\tfnstsw\t%0",
8254 "ficom%z2\t%y2\n\tfnstsw\t%0",
8255 "ficomp%z2\t%y2\n\tfnstsw\t%0",
8263 mask = eflags_p << 3;
8264 mask |= (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT) << 2;
8265 mask |= unordered_p << 1;
8266 mask |= stack_top_dies;
8279 ix86_output_addr_vec_elt (FILE *file, int value)
8281 const char *directive = ASM_LONG;
8286 directive = ASM_QUAD;
8292 fprintf (file, "%s%s%d\n", directive, LPREFIX, value);
8296 ix86_output_addr_diff_elt (FILE *file, int value, int rel)
8299 fprintf (file, "%s%s%d-%s%d\n",
8300 ASM_LONG, LPREFIX, value, LPREFIX, rel);
8301 else if (HAVE_AS_GOTOFF_IN_DATA)
8302 fprintf (file, "%s%s%d@GOTOFF\n", ASM_LONG, LPREFIX, value);
8304 else if (TARGET_MACHO)
8306 fprintf (file, "%s%s%d-", ASM_LONG, LPREFIX, value);
8307 machopic_output_function_base_name (file);
8308 fprintf(file, "\n");
8312 asm_fprintf (file, "%s%U%s+[.-%s%d]\n",
8313 ASM_LONG, GOT_SYMBOL_NAME, LPREFIX, value);
8316 /* Generate either "mov $0, reg" or "xor reg, reg", as appropriate
8320 ix86_expand_clear (rtx dest)
8324 /* We play register width games, which are only valid after reload. */
8325 if (!reload_completed)
8328 /* Avoid HImode and its attendant prefix byte. */
8329 if (GET_MODE_SIZE (GET_MODE (dest)) < 4)
8330 dest = gen_rtx_REG (SImode, REGNO (dest));
8332 tmp = gen_rtx_SET (VOIDmode, dest, const0_rtx);
8334 /* This predicate should match that for movsi_xor and movdi_xor_rex64. */
8335 if (reload_completed && (!TARGET_USE_MOV0 || optimize_size))
8337 rtx clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, 17));
8338 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, tmp, clob));
8344 /* X is an unchanging MEM. If it is a constant pool reference, return
8345 the constant pool rtx, else NULL. */
8348 maybe_get_pool_constant (rtx x)
8350 x = ix86_delegitimize_address (XEXP (x, 0));
8352 if (GET_CODE (x) == SYMBOL_REF && CONSTANT_POOL_ADDRESS_P (x))
8353 return get_pool_constant (x);
8359 ix86_expand_move (enum machine_mode mode, rtx operands[])
8361 int strict = (reload_in_progress || reload_completed);
8363 enum tls_model model;
8368 model = tls_symbolic_operand (op1, Pmode);
8371 op1 = legitimize_tls_address (op1, model, true);
8372 op1 = force_operand (op1, op0);
8377 if (flag_pic && mode == Pmode && symbolic_operand (op1, Pmode))
8382 rtx temp = ((reload_in_progress
8383 || ((op0 && GET_CODE (op0) == REG)
8385 ? op0 : gen_reg_rtx (Pmode));
8386 op1 = machopic_indirect_data_reference (op1, temp);
8387 op1 = machopic_legitimize_pic_address (op1, mode,
8388 temp == op1 ? 0 : temp);
8390 else if (MACHOPIC_INDIRECT)
8391 op1 = machopic_indirect_data_reference (op1, 0);
8395 if (GET_CODE (op0) == MEM)
8396 op1 = force_reg (Pmode, op1);
8400 if (GET_CODE (temp) != REG)
8401 temp = gen_reg_rtx (Pmode);
8402 temp = legitimize_pic_address (op1, temp);
8407 #endif /* TARGET_MACHO */
8411 if (GET_CODE (op0) == MEM
8412 && (PUSH_ROUNDING (GET_MODE_SIZE (mode)) != GET_MODE_SIZE (mode)
8413 || !push_operand (op0, mode))
8414 && GET_CODE (op1) == MEM)
8415 op1 = force_reg (mode, op1);
8417 if (push_operand (op0, mode)
8418 && ! general_no_elim_operand (op1, mode))
8419 op1 = copy_to_mode_reg (mode, op1);
8421 /* Force large constants in 64bit compilation into register
8422 to get them CSEed. */
8423 if (TARGET_64BIT && mode == DImode
8424 && immediate_operand (op1, mode)
8425 && !x86_64_zero_extended_value (op1)
8426 && !register_operand (op0, mode)
8427 && optimize && !reload_completed && !reload_in_progress)
8428 op1 = copy_to_mode_reg (mode, op1);
8430 if (FLOAT_MODE_P (mode))
8432 /* If we are loading a floating point constant to a register,
8433 force the value to memory now, since we'll get better code
8434 out the back end. */
8438 else if (GET_CODE (op1) == CONST_DOUBLE)
8440 op1 = validize_mem (force_const_mem (mode, op1));
8441 if (!register_operand (op0, mode))
8443 rtx temp = gen_reg_rtx (mode);
8444 emit_insn (gen_rtx_SET (VOIDmode, temp, op1));
8445 emit_move_insn (op0, temp);
8452 emit_insn (gen_rtx_SET (VOIDmode, op0, op1));
8456 ix86_expand_vector_move (enum machine_mode mode, rtx operands[])
8458 /* Force constants other than zero into memory. We do not know how
8459 the instructions used to build constants modify the upper 64 bits
8460 of the register, once we have that information we may be able
8461 to handle some of them more efficiently. */
8462 if ((reload_in_progress | reload_completed) == 0
8463 && register_operand (operands[0], mode)
8464 && CONSTANT_P (operands[1]) && operands[1] != CONST0_RTX (mode))
8465 operands[1] = validize_mem (force_const_mem (mode, operands[1]));
8467 /* Make operand1 a register if it isn't already. */
8469 && !register_operand (operands[0], mode)
8470 && !register_operand (operands[1], mode))
8472 rtx temp = force_reg (GET_MODE (operands[1]), operands[1]);
8473 emit_move_insn (operands[0], temp);
8477 emit_insn (gen_rtx_SET (VOIDmode, operands[0], operands[1]));
8480 /* Attempt to expand a binary operator. Make the expansion closer to the
8481 actual machine, then just general_operand, which will allow 3 separate
8482 memory references (one output, two input) in a single insn. */
8485 ix86_expand_binary_operator (enum rtx_code code, enum machine_mode mode,
8488 int matching_memory;
8489 rtx src1, src2, dst, op, clob;
8495 /* Recognize <var1> = <value> <op> <var1> for commutative operators */
8496 if (GET_RTX_CLASS (code) == 'c'
8497 && (rtx_equal_p (dst, src2)
8498 || immediate_operand (src1, mode)))
8505 /* If the destination is memory, and we do not have matching source
8506 operands, do things in registers. */
8507 matching_memory = 0;
8508 if (GET_CODE (dst) == MEM)
8510 if (rtx_equal_p (dst, src1))
8511 matching_memory = 1;
8512 else if (GET_RTX_CLASS (code) == 'c'
8513 && rtx_equal_p (dst, src2))
8514 matching_memory = 2;
8516 dst = gen_reg_rtx (mode);
8519 /* Both source operands cannot be in memory. */
8520 if (GET_CODE (src1) == MEM && GET_CODE (src2) == MEM)
8522 if (matching_memory != 2)
8523 src2 = force_reg (mode, src2);
8525 src1 = force_reg (mode, src1);
8528 /* If the operation is not commutable, source 1 cannot be a constant
8529 or non-matching memory. */
8530 if ((CONSTANT_P (src1)
8531 || (!matching_memory && GET_CODE (src1) == MEM))
8532 && GET_RTX_CLASS (code) != 'c')
8533 src1 = force_reg (mode, src1);
8535 /* If optimizing, copy to regs to improve CSE */
8536 if (optimize && ! no_new_pseudos)
8538 if (GET_CODE (dst) == MEM)
8539 dst = gen_reg_rtx (mode);
8540 if (GET_CODE (src1) == MEM)
8541 src1 = force_reg (mode, src1);
8542 if (GET_CODE (src2) == MEM)
8543 src2 = force_reg (mode, src2);
8546 /* Emit the instruction. */
8548 op = gen_rtx_SET (VOIDmode, dst, gen_rtx_fmt_ee (code, mode, src1, src2));
8549 if (reload_in_progress)
8551 /* Reload doesn't know about the flags register, and doesn't know that
8552 it doesn't want to clobber it. We can only do this with PLUS. */
8559 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
8560 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clob)));
8563 /* Fix up the destination if needed. */
8564 if (dst != operands[0])
8565 emit_move_insn (operands[0], dst);
8568 /* Return TRUE or FALSE depending on whether the binary operator meets the
8569 appropriate constraints. */
8572 ix86_binary_operator_ok (enum rtx_code code,
8573 enum machine_mode mode ATTRIBUTE_UNUSED,
8576 /* Both source operands cannot be in memory. */
8577 if (GET_CODE (operands[1]) == MEM && GET_CODE (operands[2]) == MEM)
8579 /* If the operation is not commutable, source 1 cannot be a constant. */
8580 if (CONSTANT_P (operands[1]) && GET_RTX_CLASS (code) != 'c')
8582 /* If the destination is memory, we must have a matching source operand. */
8583 if (GET_CODE (operands[0]) == MEM
8584 && ! (rtx_equal_p (operands[0], operands[1])
8585 || (GET_RTX_CLASS (code) == 'c'
8586 && rtx_equal_p (operands[0], operands[2]))))
8588 /* If the operation is not commutable and the source 1 is memory, we must
8589 have a matching destination. */
8590 if (GET_CODE (operands[1]) == MEM
8591 && GET_RTX_CLASS (code) != 'c'
8592 && ! rtx_equal_p (operands[0], operands[1]))
8597 /* Attempt to expand a unary operator. Make the expansion closer to the
8598 actual machine, then just general_operand, which will allow 2 separate
8599 memory references (one output, one input) in a single insn. */
8602 ix86_expand_unary_operator (enum rtx_code code, enum machine_mode mode,
8605 int matching_memory;
8606 rtx src, dst, op, clob;
8611 /* If the destination is memory, and we do not have matching source
8612 operands, do things in registers. */
8613 matching_memory = 0;
8614 if (GET_CODE (dst) == MEM)
8616 if (rtx_equal_p (dst, src))
8617 matching_memory = 1;
8619 dst = gen_reg_rtx (mode);
8622 /* When source operand is memory, destination must match. */
8623 if (!matching_memory && GET_CODE (src) == MEM)
8624 src = force_reg (mode, src);
8626 /* If optimizing, copy to regs to improve CSE */
8627 if (optimize && ! no_new_pseudos)
8629 if (GET_CODE (dst) == MEM)
8630 dst = gen_reg_rtx (mode);
8631 if (GET_CODE (src) == MEM)
8632 src = force_reg (mode, src);
8635 /* Emit the instruction. */
8637 op = gen_rtx_SET (VOIDmode, dst, gen_rtx_fmt_e (code, mode, src));
8638 if (reload_in_progress || code == NOT)
8640 /* Reload doesn't know about the flags register, and doesn't know that
8641 it doesn't want to clobber it. */
8648 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
8649 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clob)));
8652 /* Fix up the destination if needed. */
8653 if (dst != operands[0])
8654 emit_move_insn (operands[0], dst);
8657 /* Return TRUE or FALSE depending on whether the unary operator meets the
8658 appropriate constraints. */
8661 ix86_unary_operator_ok (enum rtx_code code ATTRIBUTE_UNUSED,
8662 enum machine_mode mode ATTRIBUTE_UNUSED,
8663 rtx operands[2] ATTRIBUTE_UNUSED)
8665 /* If one of operands is memory, source and destination must match. */
8666 if ((GET_CODE (operands[0]) == MEM
8667 || GET_CODE (operands[1]) == MEM)
8668 && ! rtx_equal_p (operands[0], operands[1]))
8673 /* Return TRUE or FALSE depending on whether the first SET in INSN
8674 has source and destination with matching CC modes, and that the
8675 CC mode is at least as constrained as REQ_MODE. */
8678 ix86_match_ccmode (rtx insn, enum machine_mode req_mode)
8681 enum machine_mode set_mode;
8683 set = PATTERN (insn);
8684 if (GET_CODE (set) == PARALLEL)
8685 set = XVECEXP (set, 0, 0);
8686 if (GET_CODE (set) != SET)
8688 if (GET_CODE (SET_SRC (set)) != COMPARE)
8691 set_mode = GET_MODE (SET_DEST (set));
8695 if (req_mode != CCNOmode
8696 && (req_mode != CCmode
8697 || XEXP (SET_SRC (set), 1) != const0_rtx))
8701 if (req_mode == CCGCmode)
8705 if (req_mode == CCGOCmode || req_mode == CCNOmode)
8709 if (req_mode == CCZmode)
8719 return (GET_MODE (SET_SRC (set)) == set_mode);
8722 /* Generate insn patterns to do an integer compare of OPERANDS. */
8725 ix86_expand_int_compare (enum rtx_code code, rtx op0, rtx op1)
8727 enum machine_mode cmpmode;
8730 cmpmode = SELECT_CC_MODE (code, op0, op1);
8731 flags = gen_rtx_REG (cmpmode, FLAGS_REG);
8733 /* This is very simple, but making the interface the same as in the
8734 FP case makes the rest of the code easier. */
8735 tmp = gen_rtx_COMPARE (cmpmode, op0, op1);
8736 emit_insn (gen_rtx_SET (VOIDmode, flags, tmp));
8738 /* Return the test that should be put into the flags user, i.e.
8739 the bcc, scc, or cmov instruction. */
8740 return gen_rtx_fmt_ee (code, VOIDmode, flags, const0_rtx);
8743 /* Figure out whether to use ordered or unordered fp comparisons.
8744 Return the appropriate mode to use. */
8747 ix86_fp_compare_mode (enum rtx_code code ATTRIBUTE_UNUSED)
8749 /* ??? In order to make all comparisons reversible, we do all comparisons
8750 non-trapping when compiling for IEEE. Once gcc is able to distinguish
8751 all forms trapping and nontrapping comparisons, we can make inequality
8752 comparisons trapping again, since it results in better code when using
8753 FCOM based compares. */
8754 return TARGET_IEEE_FP ? CCFPUmode : CCFPmode;
8758 ix86_cc_mode (enum rtx_code code, rtx op0, rtx op1)
8760 if (GET_MODE_CLASS (GET_MODE (op0)) == MODE_FLOAT)
8761 return ix86_fp_compare_mode (code);
8764 /* Only zero flag is needed. */
8766 case NE: /* ZF!=0 */
8768 /* Codes needing carry flag. */
8769 case GEU: /* CF=0 */
8770 case GTU: /* CF=0 & ZF=0 */
8771 case LTU: /* CF=1 */
8772 case LEU: /* CF=1 | ZF=1 */
8774 /* Codes possibly doable only with sign flag when
8775 comparing against zero. */
8776 case GE: /* SF=OF or SF=0 */
8777 case LT: /* SF<>OF or SF=1 */
8778 if (op1 == const0_rtx)
8781 /* For other cases Carry flag is not required. */
8783 /* Codes doable only with sign flag when comparing
8784 against zero, but we miss jump instruction for it
8785 so we need to use relational tests against overflow
8786 that thus needs to be zero. */
8787 case GT: /* ZF=0 & SF=OF */
8788 case LE: /* ZF=1 | SF<>OF */
8789 if (op1 == const0_rtx)
8793 /* strcmp pattern do (use flags) and combine may ask us for proper
8802 /* Return the fixed registers used for condition codes. */
8805 ix86_fixed_condition_code_regs (unsigned int *p1, unsigned int *p2)
8812 /* If two condition code modes are compatible, return a condition code
8813 mode which is compatible with both. Otherwise, return
8816 static enum machine_mode
8817 ix86_cc_modes_compatible (enum machine_mode m1, enum machine_mode m2)
8822 if (GET_MODE_CLASS (m1) != MODE_CC || GET_MODE_CLASS (m2) != MODE_CC)
8825 if ((m1 == CCGCmode && m2 == CCGOCmode)
8826 || (m1 == CCGOCmode && m2 == CCGCmode))
8854 /* These are only compatible with themselves, which we already
8860 /* Return true if we should use an FCOMI instruction for this fp comparison. */
8863 ix86_use_fcomi_compare (enum rtx_code code ATTRIBUTE_UNUSED)
8865 enum rtx_code swapped_code = swap_condition (code);
8866 return ((ix86_fp_comparison_cost (code) == ix86_fp_comparison_fcomi_cost (code))
8867 || (ix86_fp_comparison_cost (swapped_code)
8868 == ix86_fp_comparison_fcomi_cost (swapped_code)));
8871 /* Swap, force into registers, or otherwise massage the two operands
8872 to a fp comparison. The operands are updated in place; the new
8873 comparison code is returned. */
8875 static enum rtx_code
8876 ix86_prepare_fp_compare_args (enum rtx_code code, rtx *pop0, rtx *pop1)
8878 enum machine_mode fpcmp_mode = ix86_fp_compare_mode (code);
8879 rtx op0 = *pop0, op1 = *pop1;
8880 enum machine_mode op_mode = GET_MODE (op0);
8881 int is_sse = SSE_REG_P (op0) | SSE_REG_P (op1);
8883 /* All of the unordered compare instructions only work on registers.
8884 The same is true of the XFmode compare instructions. The same is
8885 true of the fcomi compare instructions. */
8888 && (fpcmp_mode == CCFPUmode
8889 || op_mode == XFmode
8890 || ix86_use_fcomi_compare (code)))
8892 op0 = force_reg (op_mode, op0);
8893 op1 = force_reg (op_mode, op1);
8897 /* %%% We only allow op1 in memory; op0 must be st(0). So swap
8898 things around if they appear profitable, otherwise force op0
8901 if (standard_80387_constant_p (op0) == 0
8902 || (GET_CODE (op0) == MEM
8903 && ! (standard_80387_constant_p (op1) == 0
8904 || GET_CODE (op1) == MEM)))
8907 tmp = op0, op0 = op1, op1 = tmp;
8908 code = swap_condition (code);
8911 if (GET_CODE (op0) != REG)
8912 op0 = force_reg (op_mode, op0);
8914 if (CONSTANT_P (op1))
8916 if (standard_80387_constant_p (op1))
8917 op1 = force_reg (op_mode, op1);
8919 op1 = validize_mem (force_const_mem (op_mode, op1));
8923 /* Try to rearrange the comparison to make it cheaper. */
8924 if (ix86_fp_comparison_cost (code)
8925 > ix86_fp_comparison_cost (swap_condition (code))
8926 && (GET_CODE (op1) == REG || !no_new_pseudos))
8929 tmp = op0, op0 = op1, op1 = tmp;
8930 code = swap_condition (code);
8931 if (GET_CODE (op0) != REG)
8932 op0 = force_reg (op_mode, op0);
8940 /* Convert comparison codes we use to represent FP comparison to integer
8941 code that will result in proper branch. Return UNKNOWN if no such code
8943 static enum rtx_code
8944 ix86_fp_compare_code_to_integer (enum rtx_code code)
8973 /* Split comparison code CODE into comparisons we can do using branch
8974 instructions. BYPASS_CODE is comparison code for branch that will
8975 branch around FIRST_CODE and SECOND_CODE. If some of branches
8976 is not required, set value to NIL.
8977 We never require more than two branches. */
8979 ix86_fp_comparison_codes (enum rtx_code code, enum rtx_code *bypass_code,
8980 enum rtx_code *first_code,
8981 enum rtx_code *second_code)
8987 /* The fcomi comparison sets flags as follows:
8997 case GT: /* GTU - CF=0 & ZF=0 */
8998 case GE: /* GEU - CF=0 */
8999 case ORDERED: /* PF=0 */
9000 case UNORDERED: /* PF=1 */
9001 case UNEQ: /* EQ - ZF=1 */
9002 case UNLT: /* LTU - CF=1 */
9003 case UNLE: /* LEU - CF=1 | ZF=1 */
9004 case LTGT: /* EQ - ZF=0 */
9006 case LT: /* LTU - CF=1 - fails on unordered */
9008 *bypass_code = UNORDERED;
9010 case LE: /* LEU - CF=1 | ZF=1 - fails on unordered */
9012 *bypass_code = UNORDERED;
9014 case EQ: /* EQ - ZF=1 - fails on unordered */
9016 *bypass_code = UNORDERED;
9018 case NE: /* NE - ZF=0 - fails on unordered */
9020 *second_code = UNORDERED;
9022 case UNGE: /* GEU - CF=0 - fails on unordered */
9024 *second_code = UNORDERED;
9026 case UNGT: /* GTU - CF=0 & ZF=0 - fails on unordered */
9028 *second_code = UNORDERED;
9033 if (!TARGET_IEEE_FP)
9040 /* Return cost of comparison done fcom + arithmetics operations on AX.
9041 All following functions do use number of instructions as a cost metrics.
9042 In future this should be tweaked to compute bytes for optimize_size and
9043 take into account performance of various instructions on various CPUs. */
9045 ix86_fp_comparison_arithmetics_cost (enum rtx_code code)
9047 if (!TARGET_IEEE_FP)
9049 /* The cost of code output by ix86_expand_fp_compare. */
9077 /* Return cost of comparison done using fcomi operation.
9078 See ix86_fp_comparison_arithmetics_cost for the metrics. */
9080 ix86_fp_comparison_fcomi_cost (enum rtx_code code)
9082 enum rtx_code bypass_code, first_code, second_code;
9083 /* Return arbitrarily high cost when instruction is not supported - this
9084 prevents gcc from using it. */
9087 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
9088 return (bypass_code != NIL || second_code != NIL) + 2;
9091 /* Return cost of comparison done using sahf operation.
9092 See ix86_fp_comparison_arithmetics_cost for the metrics. */
9094 ix86_fp_comparison_sahf_cost (enum rtx_code code)
9096 enum rtx_code bypass_code, first_code, second_code;
9097 /* Return arbitrarily high cost when instruction is not preferred - this
9098 avoids gcc from using it. */
9099 if (!TARGET_USE_SAHF && !optimize_size)
9101 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
9102 return (bypass_code != NIL || second_code != NIL) + 3;
9105 /* Compute cost of the comparison done using any method.
9106 See ix86_fp_comparison_arithmetics_cost for the metrics. */
9108 ix86_fp_comparison_cost (enum rtx_code code)
9110 int fcomi_cost, sahf_cost, arithmetics_cost = 1024;
9113 fcomi_cost = ix86_fp_comparison_fcomi_cost (code);
9114 sahf_cost = ix86_fp_comparison_sahf_cost (code);
9116 min = arithmetics_cost = ix86_fp_comparison_arithmetics_cost (code);
9117 if (min > sahf_cost)
9119 if (min > fcomi_cost)
9124 /* Generate insn patterns to do a floating point compare of OPERANDS. */
9127 ix86_expand_fp_compare (enum rtx_code code, rtx op0, rtx op1, rtx scratch,
9128 rtx *second_test, rtx *bypass_test)
9130 enum machine_mode fpcmp_mode, intcmp_mode;
9132 int cost = ix86_fp_comparison_cost (code);
9133 enum rtx_code bypass_code, first_code, second_code;
9135 fpcmp_mode = ix86_fp_compare_mode (code);
9136 code = ix86_prepare_fp_compare_args (code, &op0, &op1);
9139 *second_test = NULL_RTX;
9141 *bypass_test = NULL_RTX;
9143 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
9145 /* Do fcomi/sahf based test when profitable. */
9146 if ((bypass_code == NIL || bypass_test)
9147 && (second_code == NIL || second_test)
9148 && ix86_fp_comparison_arithmetics_cost (code) > cost)
9152 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
9153 tmp = gen_rtx_SET (VOIDmode, gen_rtx_REG (fpcmp_mode, FLAGS_REG),
9159 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
9160 tmp2 = gen_rtx_UNSPEC (HImode, gen_rtvec (1, tmp), UNSPEC_FNSTSW);
9162 scratch = gen_reg_rtx (HImode);
9163 emit_insn (gen_rtx_SET (VOIDmode, scratch, tmp2));
9164 emit_insn (gen_x86_sahf_1 (scratch));
9167 /* The FP codes work out to act like unsigned. */
9168 intcmp_mode = fpcmp_mode;
9170 if (bypass_code != NIL)
9171 *bypass_test = gen_rtx_fmt_ee (bypass_code, VOIDmode,
9172 gen_rtx_REG (intcmp_mode, FLAGS_REG),
9174 if (second_code != NIL)
9175 *second_test = gen_rtx_fmt_ee (second_code, VOIDmode,
9176 gen_rtx_REG (intcmp_mode, FLAGS_REG),
9181 /* Sadness wrt reg-stack pops killing fpsr -- gotta get fnstsw first. */
9182 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
9183 tmp2 = gen_rtx_UNSPEC (HImode, gen_rtvec (1, tmp), UNSPEC_FNSTSW);
9185 scratch = gen_reg_rtx (HImode);
9186 emit_insn (gen_rtx_SET (VOIDmode, scratch, tmp2));
9188 /* In the unordered case, we have to check C2 for NaN's, which
9189 doesn't happen to work out to anything nice combination-wise.
9190 So do some bit twiddling on the value we've got in AH to come
9191 up with an appropriate set of condition codes. */
9193 intcmp_mode = CCNOmode;
9198 if (code == GT || !TARGET_IEEE_FP)
9200 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x45)));
9205 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
9206 emit_insn (gen_addqi_ext_1 (scratch, scratch, constm1_rtx));
9207 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x44)));
9208 intcmp_mode = CCmode;
9214 if (code == LT && TARGET_IEEE_FP)
9216 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
9217 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x01)));
9218 intcmp_mode = CCmode;
9223 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x01)));
9229 if (code == GE || !TARGET_IEEE_FP)
9231 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x05)));
9236 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
9237 emit_insn (gen_xorqi_cc_ext_1 (scratch, scratch,
9244 if (code == LE && TARGET_IEEE_FP)
9246 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
9247 emit_insn (gen_addqi_ext_1 (scratch, scratch, constm1_rtx));
9248 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x40)));
9249 intcmp_mode = CCmode;
9254 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x45)));
9260 if (code == EQ && TARGET_IEEE_FP)
9262 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
9263 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x40)));
9264 intcmp_mode = CCmode;
9269 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x40)));
9276 if (code == NE && TARGET_IEEE_FP)
9278 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
9279 emit_insn (gen_xorqi_cc_ext_1 (scratch, scratch,
9285 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x40)));
9291 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x04)));
9295 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x04)));
9304 /* Return the test that should be put into the flags user, i.e.
9305 the bcc, scc, or cmov instruction. */
9306 return gen_rtx_fmt_ee (code, VOIDmode,
9307 gen_rtx_REG (intcmp_mode, FLAGS_REG),
9312 ix86_expand_compare (enum rtx_code code, rtx *second_test, rtx *bypass_test)
9315 op0 = ix86_compare_op0;
9316 op1 = ix86_compare_op1;
9319 *second_test = NULL_RTX;
9321 *bypass_test = NULL_RTX;
9323 if (GET_MODE_CLASS (GET_MODE (op0)) == MODE_FLOAT)
9324 ret = ix86_expand_fp_compare (code, op0, op1, NULL_RTX,
9325 second_test, bypass_test);
9327 ret = ix86_expand_int_compare (code, op0, op1);
9332 /* Return true if the CODE will result in nontrivial jump sequence. */
9334 ix86_fp_jump_nontrivial_p (enum rtx_code code)
9336 enum rtx_code bypass_code, first_code, second_code;
9339 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
9340 return bypass_code != NIL || second_code != NIL;
9344 ix86_expand_branch (enum rtx_code code, rtx label)
9348 switch (GET_MODE (ix86_compare_op0))
9354 tmp = ix86_expand_compare (code, NULL, NULL);
9355 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
9356 gen_rtx_LABEL_REF (VOIDmode, label),
9358 emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
9367 enum rtx_code bypass_code, first_code, second_code;
9369 code = ix86_prepare_fp_compare_args (code, &ix86_compare_op0,
9372 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
9374 /* Check whether we will use the natural sequence with one jump. If
9375 so, we can expand jump early. Otherwise delay expansion by
9376 creating compound insn to not confuse optimizers. */
9377 if (bypass_code == NIL && second_code == NIL
9380 ix86_split_fp_branch (code, ix86_compare_op0, ix86_compare_op1,
9381 gen_rtx_LABEL_REF (VOIDmode, label),
9386 tmp = gen_rtx_fmt_ee (code, VOIDmode,
9387 ix86_compare_op0, ix86_compare_op1);
9388 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
9389 gen_rtx_LABEL_REF (VOIDmode, label),
9391 tmp = gen_rtx_SET (VOIDmode, pc_rtx, tmp);
9393 use_fcomi = ix86_use_fcomi_compare (code);
9394 vec = rtvec_alloc (3 + !use_fcomi);
9395 RTVEC_ELT (vec, 0) = tmp;
9397 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCFPmode, 18));
9399 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCFPmode, 17));
9402 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (HImode));
9404 emit_jump_insn (gen_rtx_PARALLEL (VOIDmode, vec));
9412 /* Expand DImode branch into multiple compare+branch. */
9414 rtx lo[2], hi[2], label2;
9415 enum rtx_code code1, code2, code3;
9417 if (CONSTANT_P (ix86_compare_op0) && ! CONSTANT_P (ix86_compare_op1))
9419 tmp = ix86_compare_op0;
9420 ix86_compare_op0 = ix86_compare_op1;
9421 ix86_compare_op1 = tmp;
9422 code = swap_condition (code);
9424 split_di (&ix86_compare_op0, 1, lo+0, hi+0);
9425 split_di (&ix86_compare_op1, 1, lo+1, hi+1);
9427 /* When comparing for equality, we can use (hi0^hi1)|(lo0^lo1) to
9428 avoid two branches. This costs one extra insn, so disable when
9429 optimizing for size. */
9431 if ((code == EQ || code == NE)
9433 || hi[1] == const0_rtx || lo[1] == const0_rtx))
9438 if (hi[1] != const0_rtx)
9439 xor1 = expand_binop (SImode, xor_optab, xor1, hi[1],
9440 NULL_RTX, 0, OPTAB_WIDEN);
9443 if (lo[1] != const0_rtx)
9444 xor0 = expand_binop (SImode, xor_optab, xor0, lo[1],
9445 NULL_RTX, 0, OPTAB_WIDEN);
9447 tmp = expand_binop (SImode, ior_optab, xor1, xor0,
9448 NULL_RTX, 0, OPTAB_WIDEN);
9450 ix86_compare_op0 = tmp;
9451 ix86_compare_op1 = const0_rtx;
9452 ix86_expand_branch (code, label);
9456 /* Otherwise, if we are doing less-than or greater-or-equal-than,
9457 op1 is a constant and the low word is zero, then we can just
9458 examine the high word. */
9460 if (GET_CODE (hi[1]) == CONST_INT && lo[1] == const0_rtx)
9463 case LT: case LTU: case GE: case GEU:
9464 ix86_compare_op0 = hi[0];
9465 ix86_compare_op1 = hi[1];
9466 ix86_expand_branch (code, label);
9472 /* Otherwise, we need two or three jumps. */
9474 label2 = gen_label_rtx ();
9477 code2 = swap_condition (code);
9478 code3 = unsigned_condition (code);
9482 case LT: case GT: case LTU: case GTU:
9485 case LE: code1 = LT; code2 = GT; break;
9486 case GE: code1 = GT; code2 = LT; break;
9487 case LEU: code1 = LTU; code2 = GTU; break;
9488 case GEU: code1 = GTU; code2 = LTU; break;
9490 case EQ: code1 = NIL; code2 = NE; break;
9491 case NE: code2 = NIL; break;
9499 * if (hi(a) < hi(b)) goto true;
9500 * if (hi(a) > hi(b)) goto false;
9501 * if (lo(a) < lo(b)) goto true;
9505 ix86_compare_op0 = hi[0];
9506 ix86_compare_op1 = hi[1];
9509 ix86_expand_branch (code1, label);
9511 ix86_expand_branch (code2, label2);
9513 ix86_compare_op0 = lo[0];
9514 ix86_compare_op1 = lo[1];
9515 ix86_expand_branch (code3, label);
9518 emit_label (label2);
9527 /* Split branch based on floating point condition. */
9529 ix86_split_fp_branch (enum rtx_code code, rtx op1, rtx op2,
9530 rtx target1, rtx target2, rtx tmp)
9533 rtx label = NULL_RTX;
9535 int bypass_probability = -1, second_probability = -1, probability = -1;
9538 if (target2 != pc_rtx)
9541 code = reverse_condition_maybe_unordered (code);
9546 condition = ix86_expand_fp_compare (code, op1, op2,
9547 tmp, &second, &bypass);
9549 if (split_branch_probability >= 0)
9551 /* Distribute the probabilities across the jumps.
9552 Assume the BYPASS and SECOND to be always test
9554 probability = split_branch_probability;
9556 /* Value of 1 is low enough to make no need for probability
9557 to be updated. Later we may run some experiments and see
9558 if unordered values are more frequent in practice. */
9560 bypass_probability = 1;
9562 second_probability = 1;
9564 if (bypass != NULL_RTX)
9566 label = gen_label_rtx ();
9567 i = emit_jump_insn (gen_rtx_SET
9569 gen_rtx_IF_THEN_ELSE (VOIDmode,
9571 gen_rtx_LABEL_REF (VOIDmode,
9574 if (bypass_probability >= 0)
9576 = gen_rtx_EXPR_LIST (REG_BR_PROB,
9577 GEN_INT (bypass_probability),
9580 i = emit_jump_insn (gen_rtx_SET
9582 gen_rtx_IF_THEN_ELSE (VOIDmode,
9583 condition, target1, target2)));
9584 if (probability >= 0)
9586 = gen_rtx_EXPR_LIST (REG_BR_PROB,
9587 GEN_INT (probability),
9589 if (second != NULL_RTX)
9591 i = emit_jump_insn (gen_rtx_SET
9593 gen_rtx_IF_THEN_ELSE (VOIDmode, second, target1,
9595 if (second_probability >= 0)
9597 = gen_rtx_EXPR_LIST (REG_BR_PROB,
9598 GEN_INT (second_probability),
9601 if (label != NULL_RTX)
9606 ix86_expand_setcc (enum rtx_code code, rtx dest)
9608 rtx ret, tmp, tmpreg, equiv;
9609 rtx second_test, bypass_test;
9611 if (GET_MODE (ix86_compare_op0) == DImode
9613 return 0; /* FAIL */
9615 if (GET_MODE (dest) != QImode)
9618 ret = ix86_expand_compare (code, &second_test, &bypass_test);
9619 PUT_MODE (ret, QImode);
9624 emit_insn (gen_rtx_SET (VOIDmode, tmp, ret));
9625 if (bypass_test || second_test)
9627 rtx test = second_test;
9629 rtx tmp2 = gen_reg_rtx (QImode);
9636 PUT_CODE (test, reverse_condition_maybe_unordered (GET_CODE (test)));
9638 PUT_MODE (test, QImode);
9639 emit_insn (gen_rtx_SET (VOIDmode, tmp2, test));
9642 emit_insn (gen_andqi3 (tmp, tmpreg, tmp2));
9644 emit_insn (gen_iorqi3 (tmp, tmpreg, tmp2));
9647 /* Attach a REG_EQUAL note describing the comparison result. */
9648 equiv = simplify_gen_relational (code, QImode,
9649 GET_MODE (ix86_compare_op0),
9650 ix86_compare_op0, ix86_compare_op1);
9651 set_unique_reg_note (get_last_insn (), REG_EQUAL, equiv);
9653 return 1; /* DONE */
9656 /* Expand comparison setting or clearing carry flag. Return true when
9657 successful and set pop for the operation. */
9659 ix86_expand_carry_flag_compare (enum rtx_code code, rtx op0, rtx op1, rtx *pop)
9661 enum machine_mode mode =
9662 GET_MODE (op0) != VOIDmode ? GET_MODE (op0) : GET_MODE (op1);
9664 /* Do not handle DImode compares that go trought special path. Also we can't
9665 deal with FP compares yet. This is possible to add. */
9666 if ((mode == DImode && !TARGET_64BIT))
9668 if (FLOAT_MODE_P (mode))
9670 rtx second_test = NULL, bypass_test = NULL;
9671 rtx compare_op, compare_seq;
9673 /* Shortcut: following common codes never translate into carry flag compares. */
9674 if (code == EQ || code == NE || code == UNEQ || code == LTGT
9675 || code == ORDERED || code == UNORDERED)
9678 /* These comparisons require zero flag; swap operands so they won't. */
9679 if ((code == GT || code == UNLE || code == LE || code == UNGT)
9685 code = swap_condition (code);
9688 /* Try to expand the comparison and verify that we end up with carry flag
9689 based comparison. This is fails to be true only when we decide to expand
9690 comparison using arithmetic that is not too common scenario. */
9692 compare_op = ix86_expand_fp_compare (code, op0, op1, NULL_RTX,
9693 &second_test, &bypass_test);
9694 compare_seq = get_insns ();
9697 if (second_test || bypass_test)
9699 if (GET_MODE (XEXP (compare_op, 0)) == CCFPmode
9700 || GET_MODE (XEXP (compare_op, 0)) == CCFPUmode)
9701 code = ix86_fp_compare_code_to_integer (GET_CODE (compare_op));
9703 code = GET_CODE (compare_op);
9704 if (code != LTU && code != GEU)
9706 emit_insn (compare_seq);
9710 if (!INTEGRAL_MODE_P (mode))
9718 /* Convert a==0 into (unsigned)a<1. */
9721 if (op1 != const0_rtx)
9724 code = (code == EQ ? LTU : GEU);
9727 /* Convert a>b into b<a or a>=b-1. */
9730 if (GET_CODE (op1) == CONST_INT)
9732 op1 = gen_int_mode (INTVAL (op1) + 1, GET_MODE (op0));
9733 /* Bail out on overflow. We still can swap operands but that
9734 would force loading of the constant into register. */
9735 if (op1 == const0_rtx
9736 || !x86_64_immediate_operand (op1, GET_MODE (op1)))
9738 code = (code == GTU ? GEU : LTU);
9745 code = (code == GTU ? LTU : GEU);
9749 /* Convert a>=0 into (unsigned)a<0x80000000. */
9752 if (mode == DImode || op1 != const0_rtx)
9754 op1 = gen_int_mode (1 << (GET_MODE_BITSIZE (mode) - 1), mode);
9755 code = (code == LT ? GEU : LTU);
9759 if (mode == DImode || op1 != constm1_rtx)
9761 op1 = gen_int_mode (1 << (GET_MODE_BITSIZE (mode) - 1), mode);
9762 code = (code == LE ? GEU : LTU);
9768 /* Swapping operands may cause constant to appear as first operand. */
9769 if (!nonimmediate_operand (op0, VOIDmode))
9773 op0 = force_reg (mode, op0);
9775 ix86_compare_op0 = op0;
9776 ix86_compare_op1 = op1;
9777 *pop = ix86_expand_compare (code, NULL, NULL);
9778 if (GET_CODE (*pop) != LTU && GET_CODE (*pop) != GEU)
9784 ix86_expand_int_movcc (rtx operands[])
9786 enum rtx_code code = GET_CODE (operands[1]), compare_code;
9787 rtx compare_seq, compare_op;
9788 rtx second_test, bypass_test;
9789 enum machine_mode mode = GET_MODE (operands[0]);
9790 bool sign_bit_compare_p = false;;
9793 compare_op = ix86_expand_compare (code, &second_test, &bypass_test);
9794 compare_seq = get_insns ();
9797 compare_code = GET_CODE (compare_op);
9799 if ((ix86_compare_op1 == const0_rtx && (code == GE || code == LT))
9800 || (ix86_compare_op1 == constm1_rtx && (code == GT || code == LE)))
9801 sign_bit_compare_p = true;
9803 /* Don't attempt mode expansion here -- if we had to expand 5 or 6
9804 HImode insns, we'd be swallowed in word prefix ops. */
9806 if ((mode != HImode || TARGET_FAST_PREFIX)
9807 && (mode != DImode || TARGET_64BIT)
9808 && GET_CODE (operands[2]) == CONST_INT
9809 && GET_CODE (operands[3]) == CONST_INT)
9811 rtx out = operands[0];
9812 HOST_WIDE_INT ct = INTVAL (operands[2]);
9813 HOST_WIDE_INT cf = INTVAL (operands[3]);
9817 /* Sign bit compares are better done using shifts than we do by using
9819 if (sign_bit_compare_p
9820 || ix86_expand_carry_flag_compare (code, ix86_compare_op0,
9821 ix86_compare_op1, &compare_op))
9823 /* Detect overlap between destination and compare sources. */
9826 if (!sign_bit_compare_p)
9830 compare_code = GET_CODE (compare_op);
9832 if (GET_MODE (XEXP (compare_op, 0)) == CCFPmode
9833 || GET_MODE (XEXP (compare_op, 0)) == CCFPUmode)
9836 compare_code = ix86_fp_compare_code_to_integer (compare_code);
9839 /* To simplify rest of code, restrict to the GEU case. */
9840 if (compare_code == LTU)
9842 HOST_WIDE_INT tmp = ct;
9845 compare_code = reverse_condition (compare_code);
9846 code = reverse_condition (code);
9851 PUT_CODE (compare_op,
9852 reverse_condition_maybe_unordered
9853 (GET_CODE (compare_op)));
9855 PUT_CODE (compare_op, reverse_condition (GET_CODE (compare_op)));
9859 if (reg_overlap_mentioned_p (out, ix86_compare_op0)
9860 || reg_overlap_mentioned_p (out, ix86_compare_op1))
9861 tmp = gen_reg_rtx (mode);
9864 emit_insn (gen_x86_movdicc_0_m1_rex64 (tmp, compare_op));
9866 emit_insn (gen_x86_movsicc_0_m1 (gen_lowpart (SImode, tmp), compare_op));
9870 if (code == GT || code == GE)
9871 code = reverse_condition (code);
9874 HOST_WIDE_INT tmp = ct;
9879 tmp = emit_store_flag (tmp, code, ix86_compare_op0,
9880 ix86_compare_op1, VOIDmode, 0, -1);
9893 tmp = expand_simple_binop (mode, PLUS,
9895 copy_rtx (tmp), 1, OPTAB_DIRECT);
9906 tmp = expand_simple_binop (mode, IOR,
9908 copy_rtx (tmp), 1, OPTAB_DIRECT);
9910 else if (diff == -1 && ct)
9920 tmp = expand_simple_unop (mode, NOT, tmp, copy_rtx (tmp), 1);
9922 tmp = expand_simple_binop (mode, PLUS,
9923 copy_rtx (tmp), GEN_INT (cf),
9924 copy_rtx (tmp), 1, OPTAB_DIRECT);
9932 * andl cf - ct, dest
9942 tmp = expand_simple_unop (mode, NOT, tmp, copy_rtx (tmp), 1);
9945 tmp = expand_simple_binop (mode, AND,
9947 gen_int_mode (cf - ct, mode),
9948 copy_rtx (tmp), 1, OPTAB_DIRECT);
9950 tmp = expand_simple_binop (mode, PLUS,
9951 copy_rtx (tmp), GEN_INT (ct),
9952 copy_rtx (tmp), 1, OPTAB_DIRECT);
9955 if (!rtx_equal_p (tmp, out))
9956 emit_move_insn (copy_rtx (out), copy_rtx (tmp));
9958 return 1; /* DONE */
9964 tmp = ct, ct = cf, cf = tmp;
9966 if (FLOAT_MODE_P (GET_MODE (ix86_compare_op0)))
9968 /* We may be reversing unordered compare to normal compare, that
9969 is not valid in general (we may convert non-trapping condition
9970 to trapping one), however on i386 we currently emit all
9971 comparisons unordered. */
9972 compare_code = reverse_condition_maybe_unordered (compare_code);
9973 code = reverse_condition_maybe_unordered (code);
9977 compare_code = reverse_condition (compare_code);
9978 code = reverse_condition (code);
9983 if (GET_MODE_CLASS (GET_MODE (ix86_compare_op0)) == MODE_INT
9984 && GET_CODE (ix86_compare_op1) == CONST_INT)
9986 if (ix86_compare_op1 == const0_rtx
9987 && (code == LT || code == GE))
9988 compare_code = code;
9989 else if (ix86_compare_op1 == constm1_rtx)
9993 else if (code == GT)
9998 /* Optimize dest = (op0 < 0) ? -1 : cf. */
9999 if (compare_code != NIL
10000 && GET_MODE (ix86_compare_op0) == GET_MODE (out)
10001 && (cf == -1 || ct == -1))
10003 /* If lea code below could be used, only optimize
10004 if it results in a 2 insn sequence. */
10006 if (! (diff == 1 || diff == 2 || diff == 4 || diff == 8
10007 || diff == 3 || diff == 5 || diff == 9)
10008 || (compare_code == LT && ct == -1)
10009 || (compare_code == GE && cf == -1))
10012 * notl op1 (if necessary)
10020 code = reverse_condition (code);
10023 out = emit_store_flag (out, code, ix86_compare_op0,
10024 ix86_compare_op1, VOIDmode, 0, -1);
10026 out = expand_simple_binop (mode, IOR,
10028 out, 1, OPTAB_DIRECT);
10029 if (out != operands[0])
10030 emit_move_insn (operands[0], out);
10032 return 1; /* DONE */
10037 if ((diff == 1 || diff == 2 || diff == 4 || diff == 8
10038 || diff == 3 || diff == 5 || diff == 9)
10039 && ((mode != QImode && mode != HImode) || !TARGET_PARTIAL_REG_STALL)
10040 && (mode != DImode || x86_64_sign_extended_value (GEN_INT (cf))))
10046 * lea cf(dest*(ct-cf)),dest
10050 * This also catches the degenerate setcc-only case.
10056 out = emit_store_flag (out, code, ix86_compare_op0,
10057 ix86_compare_op1, VOIDmode, 0, 1);
10060 /* On x86_64 the lea instruction operates on Pmode, so we need
10061 to get arithmetics done in proper mode to match. */
10063 tmp = copy_rtx (out);
10067 out1 = copy_rtx (out);
10068 tmp = gen_rtx_MULT (mode, out1, GEN_INT (diff & ~1));
10072 tmp = gen_rtx_PLUS (mode, tmp, out1);
10078 tmp = gen_rtx_PLUS (mode, tmp, GEN_INT (cf));
10081 if (!rtx_equal_p (tmp, out))
10084 out = force_operand (tmp, copy_rtx (out));
10086 emit_insn (gen_rtx_SET (VOIDmode, copy_rtx (out), copy_rtx (tmp)));
10088 if (!rtx_equal_p (out, operands[0]))
10089 emit_move_insn (operands[0], copy_rtx (out));
10091 return 1; /* DONE */
10095 * General case: Jumpful:
10096 * xorl dest,dest cmpl op1, op2
10097 * cmpl op1, op2 movl ct, dest
10098 * setcc dest jcc 1f
10099 * decl dest movl cf, dest
10100 * andl (cf-ct),dest 1:
10103 * Size 20. Size 14.
10105 * This is reasonably steep, but branch mispredict costs are
10106 * high on modern cpus, so consider failing only if optimizing
10110 if ((!TARGET_CMOVE || (mode == QImode && TARGET_PARTIAL_REG_STALL))
10111 && BRANCH_COST >= 2)
10117 if (FLOAT_MODE_P (GET_MODE (ix86_compare_op0)))
10118 /* We may be reversing unordered compare to normal compare,
10119 that is not valid in general (we may convert non-trapping
10120 condition to trapping one), however on i386 we currently
10121 emit all comparisons unordered. */
10122 code = reverse_condition_maybe_unordered (code);
10125 code = reverse_condition (code);
10126 if (compare_code != NIL)
10127 compare_code = reverse_condition (compare_code);
10131 if (compare_code != NIL)
10133 /* notl op1 (if needed)
10138 For x < 0 (resp. x <= -1) there will be no notl,
10139 so if possible swap the constants to get rid of the
10141 True/false will be -1/0 while code below (store flag
10142 followed by decrement) is 0/-1, so the constants need
10143 to be exchanged once more. */
10145 if (compare_code == GE || !cf)
10147 code = reverse_condition (code);
10152 HOST_WIDE_INT tmp = cf;
10157 out = emit_store_flag (out, code, ix86_compare_op0,
10158 ix86_compare_op1, VOIDmode, 0, -1);
10162 out = emit_store_flag (out, code, ix86_compare_op0,
10163 ix86_compare_op1, VOIDmode, 0, 1);
10165 out = expand_simple_binop (mode, PLUS, copy_rtx (out), constm1_rtx,
10166 copy_rtx (out), 1, OPTAB_DIRECT);
10169 out = expand_simple_binop (mode, AND, copy_rtx (out),
10170 gen_int_mode (cf - ct, mode),
10171 copy_rtx (out), 1, OPTAB_DIRECT);
10173 out = expand_simple_binop (mode, PLUS, copy_rtx (out), GEN_INT (ct),
10174 copy_rtx (out), 1, OPTAB_DIRECT);
10175 if (!rtx_equal_p (out, operands[0]))
10176 emit_move_insn (operands[0], copy_rtx (out));
10178 return 1; /* DONE */
10182 if (!TARGET_CMOVE || (mode == QImode && TARGET_PARTIAL_REG_STALL))
10184 /* Try a few things more with specific constants and a variable. */
10187 rtx var, orig_out, out, tmp;
10189 if (BRANCH_COST <= 2)
10190 return 0; /* FAIL */
10192 /* If one of the two operands is an interesting constant, load a
10193 constant with the above and mask it in with a logical operation. */
10195 if (GET_CODE (operands[2]) == CONST_INT)
10198 if (INTVAL (operands[2]) == 0 && operands[3] != constm1_rtx)
10199 operands[3] = constm1_rtx, op = and_optab;
10200 else if (INTVAL (operands[2]) == -1 && operands[3] != const0_rtx)
10201 operands[3] = const0_rtx, op = ior_optab;
10203 return 0; /* FAIL */
10205 else if (GET_CODE (operands[3]) == CONST_INT)
10208 if (INTVAL (operands[3]) == 0 && operands[2] != constm1_rtx)
10209 operands[2] = constm1_rtx, op = and_optab;
10210 else if (INTVAL (operands[3]) == -1 && operands[3] != const0_rtx)
10211 operands[2] = const0_rtx, op = ior_optab;
10213 return 0; /* FAIL */
10216 return 0; /* FAIL */
10218 orig_out = operands[0];
10219 tmp = gen_reg_rtx (mode);
10222 /* Recurse to get the constant loaded. */
10223 if (ix86_expand_int_movcc (operands) == 0)
10224 return 0; /* FAIL */
10226 /* Mask in the interesting variable. */
10227 out = expand_binop (mode, op, var, tmp, orig_out, 0,
10229 if (!rtx_equal_p (out, orig_out))
10230 emit_move_insn (copy_rtx (orig_out), copy_rtx (out));
10232 return 1; /* DONE */
10236 * For comparison with above,
10246 if (! nonimmediate_operand (operands[2], mode))
10247 operands[2] = force_reg (mode, operands[2]);
10248 if (! nonimmediate_operand (operands[3], mode))
10249 operands[3] = force_reg (mode, operands[3]);
10251 if (bypass_test && reg_overlap_mentioned_p (operands[0], operands[3]))
10253 rtx tmp = gen_reg_rtx (mode);
10254 emit_move_insn (tmp, operands[3]);
10257 if (second_test && reg_overlap_mentioned_p (operands[0], operands[2]))
10259 rtx tmp = gen_reg_rtx (mode);
10260 emit_move_insn (tmp, operands[2]);
10264 if (! register_operand (operands[2], VOIDmode)
10266 || ! register_operand (operands[3], VOIDmode)))
10267 operands[2] = force_reg (mode, operands[2]);
10270 && ! register_operand (operands[3], VOIDmode))
10271 operands[3] = force_reg (mode, operands[3]);
10273 emit_insn (compare_seq);
10274 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
10275 gen_rtx_IF_THEN_ELSE (mode,
10276 compare_op, operands[2],
10279 emit_insn (gen_rtx_SET (VOIDmode, copy_rtx (operands[0]),
10280 gen_rtx_IF_THEN_ELSE (mode,
10282 copy_rtx (operands[3]),
10283 copy_rtx (operands[0]))));
10285 emit_insn (gen_rtx_SET (VOIDmode, copy_rtx (operands[0]),
10286 gen_rtx_IF_THEN_ELSE (mode,
10288 copy_rtx (operands[2]),
10289 copy_rtx (operands[0]))));
10291 return 1; /* DONE */
10295 ix86_expand_fp_movcc (rtx operands[])
10297 enum rtx_code code;
10299 rtx compare_op, second_test, bypass_test;
10301 /* For SF/DFmode conditional moves based on comparisons
10302 in same mode, we may want to use SSE min/max instructions. */
10303 if (((TARGET_SSE_MATH && GET_MODE (operands[0]) == SFmode)
10304 || (TARGET_SSE2 && TARGET_SSE_MATH && GET_MODE (operands[0]) == DFmode))
10305 && GET_MODE (ix86_compare_op0) == GET_MODE (operands[0])
10306 /* The SSE comparisons does not support the LTGT/UNEQ pair. */
10307 && (!TARGET_IEEE_FP
10308 || (GET_CODE (operands[1]) != LTGT && GET_CODE (operands[1]) != UNEQ))
10309 /* We may be called from the post-reload splitter. */
10310 && (!REG_P (operands[0])
10311 || SSE_REG_P (operands[0])
10312 || REGNO (operands[0]) >= FIRST_PSEUDO_REGISTER))
10314 rtx op0 = ix86_compare_op0, op1 = ix86_compare_op1;
10315 code = GET_CODE (operands[1]);
10317 /* See if we have (cross) match between comparison operands and
10318 conditional move operands. */
10319 if (rtx_equal_p (operands[2], op1))
10324 code = reverse_condition_maybe_unordered (code);
10326 if (rtx_equal_p (operands[2], op0) && rtx_equal_p (operands[3], op1))
10328 /* Check for min operation. */
10329 if (code == LT || code == UNLE)
10337 operands[0] = force_reg (GET_MODE (operands[0]), operands[0]);
10338 if (memory_operand (op0, VOIDmode))
10339 op0 = force_reg (GET_MODE (operands[0]), op0);
10340 if (GET_MODE (operands[0]) == SFmode)
10341 emit_insn (gen_minsf3 (operands[0], op0, op1));
10343 emit_insn (gen_mindf3 (operands[0], op0, op1));
10346 /* Check for max operation. */
10347 if (code == GT || code == UNGE)
10355 operands[0] = force_reg (GET_MODE (operands[0]), operands[0]);
10356 if (memory_operand (op0, VOIDmode))
10357 op0 = force_reg (GET_MODE (operands[0]), op0);
10358 if (GET_MODE (operands[0]) == SFmode)
10359 emit_insn (gen_maxsf3 (operands[0], op0, op1));
10361 emit_insn (gen_maxdf3 (operands[0], op0, op1));
10365 /* Manage condition to be sse_comparison_operator. In case we are
10366 in non-ieee mode, try to canonicalize the destination operand
10367 to be first in the comparison - this helps reload to avoid extra
10369 if (!sse_comparison_operator (operands[1], VOIDmode)
10370 || (rtx_equal_p (operands[0], ix86_compare_op1) && !TARGET_IEEE_FP))
10372 rtx tmp = ix86_compare_op0;
10373 ix86_compare_op0 = ix86_compare_op1;
10374 ix86_compare_op1 = tmp;
10375 operands[1] = gen_rtx_fmt_ee (swap_condition (GET_CODE (operands[1])),
10376 VOIDmode, ix86_compare_op0,
10379 /* Similarly try to manage result to be first operand of conditional
10380 move. We also don't support the NE comparison on SSE, so try to
10382 if ((rtx_equal_p (operands[0], operands[3])
10383 && (!TARGET_IEEE_FP || GET_CODE (operands[1]) != EQ))
10384 || (GET_CODE (operands[1]) == NE && TARGET_IEEE_FP))
10386 rtx tmp = operands[2];
10387 operands[2] = operands[3];
10389 operands[1] = gen_rtx_fmt_ee (reverse_condition_maybe_unordered
10390 (GET_CODE (operands[1])),
10391 VOIDmode, ix86_compare_op0,
10394 if (GET_MODE (operands[0]) == SFmode)
10395 emit_insn (gen_sse_movsfcc (operands[0], operands[1],
10396 operands[2], operands[3],
10397 ix86_compare_op0, ix86_compare_op1));
10399 emit_insn (gen_sse_movdfcc (operands[0], operands[1],
10400 operands[2], operands[3],
10401 ix86_compare_op0, ix86_compare_op1));
10405 /* The floating point conditional move instructions don't directly
10406 support conditions resulting from a signed integer comparison. */
10408 code = GET_CODE (operands[1]);
10409 compare_op = ix86_expand_compare (code, &second_test, &bypass_test);
10411 /* The floating point conditional move instructions don't directly
10412 support signed integer comparisons. */
10414 if (!fcmov_comparison_operator (compare_op, VOIDmode))
10416 if (second_test != NULL || bypass_test != NULL)
10418 tmp = gen_reg_rtx (QImode);
10419 ix86_expand_setcc (code, tmp);
10421 ix86_compare_op0 = tmp;
10422 ix86_compare_op1 = const0_rtx;
10423 compare_op = ix86_expand_compare (code, &second_test, &bypass_test);
10425 if (bypass_test && reg_overlap_mentioned_p (operands[0], operands[3]))
10427 tmp = gen_reg_rtx (GET_MODE (operands[0]));
10428 emit_move_insn (tmp, operands[3]);
10431 if (second_test && reg_overlap_mentioned_p (operands[0], operands[2]))
10433 tmp = gen_reg_rtx (GET_MODE (operands[0]));
10434 emit_move_insn (tmp, operands[2]);
10438 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
10439 gen_rtx_IF_THEN_ELSE (GET_MODE (operands[0]),
10444 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
10445 gen_rtx_IF_THEN_ELSE (GET_MODE (operands[0]),
10450 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
10451 gen_rtx_IF_THEN_ELSE (GET_MODE (operands[0]),
10459 /* Expand conditional increment or decrement using adb/sbb instructions.
10460 The default case using setcc followed by the conditional move can be
10461 done by generic code. */
10463 ix86_expand_int_addcc (rtx operands[])
10465 enum rtx_code code = GET_CODE (operands[1]);
10467 rtx val = const0_rtx;
10468 bool fpcmp = false;
10469 enum machine_mode mode = GET_MODE (operands[0]);
10471 if (operands[3] != const1_rtx
10472 && operands[3] != constm1_rtx)
10474 if (!ix86_expand_carry_flag_compare (code, ix86_compare_op0,
10475 ix86_compare_op1, &compare_op))
10477 code = GET_CODE (compare_op);
10479 if (GET_MODE (XEXP (compare_op, 0)) == CCFPmode
10480 || GET_MODE (XEXP (compare_op, 0)) == CCFPUmode)
10483 code = ix86_fp_compare_code_to_integer (code);
10490 PUT_CODE (compare_op,
10491 reverse_condition_maybe_unordered
10492 (GET_CODE (compare_op)));
10494 PUT_CODE (compare_op, reverse_condition (GET_CODE (compare_op)));
10496 PUT_MODE (compare_op, mode);
10498 /* Construct either adc or sbb insn. */
10499 if ((code == LTU) == (operands[3] == constm1_rtx))
10501 switch (GET_MODE (operands[0]))
10504 emit_insn (gen_subqi3_carry (operands[0], operands[2], val, compare_op));
10507 emit_insn (gen_subhi3_carry (operands[0], operands[2], val, compare_op));
10510 emit_insn (gen_subsi3_carry (operands[0], operands[2], val, compare_op));
10513 emit_insn (gen_subdi3_carry_rex64 (operands[0], operands[2], val, compare_op));
10521 switch (GET_MODE (operands[0]))
10524 emit_insn (gen_addqi3_carry (operands[0], operands[2], val, compare_op));
10527 emit_insn (gen_addhi3_carry (operands[0], operands[2], val, compare_op));
10530 emit_insn (gen_addsi3_carry (operands[0], operands[2], val, compare_op));
10533 emit_insn (gen_adddi3_carry_rex64 (operands[0], operands[2], val, compare_op));
10539 return 1; /* DONE */
10543 /* Split operands 0 and 1 into SImode parts. Similar to split_di, but
10544 works for floating pointer parameters and nonoffsetable memories.
10545 For pushes, it returns just stack offsets; the values will be saved
10546 in the right order. Maximally three parts are generated. */
10549 ix86_split_to_parts (rtx operand, rtx *parts, enum machine_mode mode)
10554 size = mode==XFmode ? 3 : GET_MODE_SIZE (mode) / 4;
10556 size = (GET_MODE_SIZE (mode) + 4) / 8;
10558 if (GET_CODE (operand) == REG && MMX_REGNO_P (REGNO (operand)))
10560 if (size < 2 || size > 3)
10563 /* Optimize constant pool reference to immediates. This is used by fp
10564 moves, that force all constants to memory to allow combining. */
10565 if (GET_CODE (operand) == MEM && RTX_UNCHANGING_P (operand))
10567 rtx tmp = maybe_get_pool_constant (operand);
10572 if (GET_CODE (operand) == MEM && !offsettable_memref_p (operand))
10574 /* The only non-offsetable memories we handle are pushes. */
10575 if (! push_operand (operand, VOIDmode))
10578 operand = copy_rtx (operand);
10579 PUT_MODE (operand, Pmode);
10580 parts[0] = parts[1] = parts[2] = operand;
10582 else if (!TARGET_64BIT)
10584 if (mode == DImode)
10585 split_di (&operand, 1, &parts[0], &parts[1]);
10588 if (REG_P (operand))
10590 if (!reload_completed)
10592 parts[0] = gen_rtx_REG (SImode, REGNO (operand) + 0);
10593 parts[1] = gen_rtx_REG (SImode, REGNO (operand) + 1);
10595 parts[2] = gen_rtx_REG (SImode, REGNO (operand) + 2);
10597 else if (offsettable_memref_p (operand))
10599 operand = adjust_address (operand, SImode, 0);
10600 parts[0] = operand;
10601 parts[1] = adjust_address (operand, SImode, 4);
10603 parts[2] = adjust_address (operand, SImode, 8);
10605 else if (GET_CODE (operand) == CONST_DOUBLE)
10610 REAL_VALUE_FROM_CONST_DOUBLE (r, operand);
10614 REAL_VALUE_TO_TARGET_LONG_DOUBLE (r, l);
10615 parts[2] = gen_int_mode (l[2], SImode);
10618 REAL_VALUE_TO_TARGET_DOUBLE (r, l);
10623 parts[1] = gen_int_mode (l[1], SImode);
10624 parts[0] = gen_int_mode (l[0], SImode);
10632 if (mode == TImode)
10633 split_ti (&operand, 1, &parts[0], &parts[1]);
10634 if (mode == XFmode || mode == TFmode)
10636 enum machine_mode upper_mode = mode==XFmode ? SImode : DImode;
10637 if (REG_P (operand))
10639 if (!reload_completed)
10641 parts[0] = gen_rtx_REG (DImode, REGNO (operand) + 0);
10642 parts[1] = gen_rtx_REG (upper_mode, REGNO (operand) + 1);
10644 else if (offsettable_memref_p (operand))
10646 operand = adjust_address (operand, DImode, 0);
10647 parts[0] = operand;
10648 parts[1] = adjust_address (operand, upper_mode, 8);
10650 else if (GET_CODE (operand) == CONST_DOUBLE)
10655 REAL_VALUE_FROM_CONST_DOUBLE (r, operand);
10656 real_to_target (l, &r, mode);
10658 /* Do not use shift by 32 to avoid warning on 32bit systems. */
10659 if (HOST_BITS_PER_WIDE_INT >= 64)
10662 ((l[0] & (((HOST_WIDE_INT) 2 << 31) - 1))
10663 + ((((HOST_WIDE_INT) l[1]) << 31) << 1),
10666 parts[0] = immed_double_const (l[0], l[1], DImode);
10668 if (upper_mode == SImode)
10669 parts[1] = gen_int_mode (l[2], SImode);
10670 else if (HOST_BITS_PER_WIDE_INT >= 64)
10673 ((l[2] & (((HOST_WIDE_INT) 2 << 31) - 1))
10674 + ((((HOST_WIDE_INT) l[3]) << 31) << 1),
10677 parts[1] = immed_double_const (l[2], l[3], DImode);
10687 /* Emit insns to perform a move or push of DI, DF, and XF values.
10688 Return false when normal moves are needed; true when all required
10689 insns have been emitted. Operands 2-4 contain the input values
10690 int the correct order; operands 5-7 contain the output values. */
10693 ix86_split_long_move (rtx operands[])
10698 int collisions = 0;
10699 enum machine_mode mode = GET_MODE (operands[0]);
10701 /* The DFmode expanders may ask us to move double.
10702 For 64bit target this is single move. By hiding the fact
10703 here we simplify i386.md splitters. */
10704 if (GET_MODE_SIZE (GET_MODE (operands[0])) == 8 && TARGET_64BIT)
10706 /* Optimize constant pool reference to immediates. This is used by
10707 fp moves, that force all constants to memory to allow combining. */
10709 if (GET_CODE (operands[1]) == MEM
10710 && GET_CODE (XEXP (operands[1], 0)) == SYMBOL_REF
10711 && CONSTANT_POOL_ADDRESS_P (XEXP (operands[1], 0)))
10712 operands[1] = get_pool_constant (XEXP (operands[1], 0));
10713 if (push_operand (operands[0], VOIDmode))
10715 operands[0] = copy_rtx (operands[0]);
10716 PUT_MODE (operands[0], Pmode);
10719 operands[0] = gen_lowpart (DImode, operands[0]);
10720 operands[1] = gen_lowpart (DImode, operands[1]);
10721 emit_move_insn (operands[0], operands[1]);
10725 /* The only non-offsettable memory we handle is push. */
10726 if (push_operand (operands[0], VOIDmode))
10728 else if (GET_CODE (operands[0]) == MEM
10729 && ! offsettable_memref_p (operands[0]))
10732 nparts = ix86_split_to_parts (operands[1], part[1], GET_MODE (operands[0]));
10733 ix86_split_to_parts (operands[0], part[0], GET_MODE (operands[0]));
10735 /* When emitting push, take care for source operands on the stack. */
10736 if (push && GET_CODE (operands[1]) == MEM
10737 && reg_overlap_mentioned_p (stack_pointer_rtx, operands[1]))
10740 part[1][1] = change_address (part[1][1], GET_MODE (part[1][1]),
10741 XEXP (part[1][2], 0));
10742 part[1][0] = change_address (part[1][0], GET_MODE (part[1][0]),
10743 XEXP (part[1][1], 0));
10746 /* We need to do copy in the right order in case an address register
10747 of the source overlaps the destination. */
10748 if (REG_P (part[0][0]) && GET_CODE (part[1][0]) == MEM)
10750 if (reg_overlap_mentioned_p (part[0][0], XEXP (part[1][0], 0)))
10752 if (reg_overlap_mentioned_p (part[0][1], XEXP (part[1][0], 0)))
10755 && reg_overlap_mentioned_p (part[0][2], XEXP (part[1][0], 0)))
10758 /* Collision in the middle part can be handled by reordering. */
10759 if (collisions == 1 && nparts == 3
10760 && reg_overlap_mentioned_p (part[0][1], XEXP (part[1][0], 0)))
10763 tmp = part[0][1]; part[0][1] = part[0][2]; part[0][2] = tmp;
10764 tmp = part[1][1]; part[1][1] = part[1][2]; part[1][2] = tmp;
10767 /* If there are more collisions, we can't handle it by reordering.
10768 Do an lea to the last part and use only one colliding move. */
10769 else if (collisions > 1)
10775 base = part[0][nparts - 1];
10777 /* Handle the case when the last part isn't valid for lea.
10778 Happens in 64-bit mode storing the 12-byte XFmode. */
10779 if (GET_MODE (base) != Pmode)
10780 base = gen_rtx_REG (Pmode, REGNO (base));
10782 emit_insn (gen_rtx_SET (VOIDmode, base, XEXP (part[1][0], 0)));
10783 part[1][0] = replace_equiv_address (part[1][0], base);
10784 part[1][1] = replace_equiv_address (part[1][1],
10785 plus_constant (base, UNITS_PER_WORD));
10787 part[1][2] = replace_equiv_address (part[1][2],
10788 plus_constant (base, 8));
10798 if (TARGET_128BIT_LONG_DOUBLE && mode == XFmode)
10799 emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx, GEN_INT (-4)));
10800 emit_move_insn (part[0][2], part[1][2]);
10805 /* In 64bit mode we don't have 32bit push available. In case this is
10806 register, it is OK - we will just use larger counterpart. We also
10807 retype memory - these comes from attempt to avoid REX prefix on
10808 moving of second half of TFmode value. */
10809 if (GET_MODE (part[1][1]) == SImode)
10811 if (GET_CODE (part[1][1]) == MEM)
10812 part[1][1] = adjust_address (part[1][1], DImode, 0);
10813 else if (REG_P (part[1][1]))
10814 part[1][1] = gen_rtx_REG (DImode, REGNO (part[1][1]));
10817 if (GET_MODE (part[1][0]) == SImode)
10818 part[1][0] = part[1][1];
10821 emit_move_insn (part[0][1], part[1][1]);
10822 emit_move_insn (part[0][0], part[1][0]);
10826 /* Choose correct order to not overwrite the source before it is copied. */
10827 if ((REG_P (part[0][0])
10828 && REG_P (part[1][1])
10829 && (REGNO (part[0][0]) == REGNO (part[1][1])
10831 && REGNO (part[0][0]) == REGNO (part[1][2]))))
10833 && reg_overlap_mentioned_p (part[0][0], XEXP (part[1][0], 0))))
10837 operands[2] = part[0][2];
10838 operands[3] = part[0][1];
10839 operands[4] = part[0][0];
10840 operands[5] = part[1][2];
10841 operands[6] = part[1][1];
10842 operands[7] = part[1][0];
10846 operands[2] = part[0][1];
10847 operands[3] = part[0][0];
10848 operands[5] = part[1][1];
10849 operands[6] = part[1][0];
10856 operands[2] = part[0][0];
10857 operands[3] = part[0][1];
10858 operands[4] = part[0][2];
10859 operands[5] = part[1][0];
10860 operands[6] = part[1][1];
10861 operands[7] = part[1][2];
10865 operands[2] = part[0][0];
10866 operands[3] = part[0][1];
10867 operands[5] = part[1][0];
10868 operands[6] = part[1][1];
10871 emit_move_insn (operands[2], operands[5]);
10872 emit_move_insn (operands[3], operands[6]);
10874 emit_move_insn (operands[4], operands[7]);
10880 ix86_split_ashldi (rtx *operands, rtx scratch)
10882 rtx low[2], high[2];
10885 if (GET_CODE (operands[2]) == CONST_INT)
10887 split_di (operands, 2, low, high);
10888 count = INTVAL (operands[2]) & 63;
10892 emit_move_insn (high[0], low[1]);
10893 emit_move_insn (low[0], const0_rtx);
10896 emit_insn (gen_ashlsi3 (high[0], high[0], GEN_INT (count - 32)));
10900 if (!rtx_equal_p (operands[0], operands[1]))
10901 emit_move_insn (operands[0], operands[1]);
10902 emit_insn (gen_x86_shld_1 (high[0], low[0], GEN_INT (count)));
10903 emit_insn (gen_ashlsi3 (low[0], low[0], GEN_INT (count)));
10908 if (!rtx_equal_p (operands[0], operands[1]))
10909 emit_move_insn (operands[0], operands[1]);
10911 split_di (operands, 1, low, high);
10913 emit_insn (gen_x86_shld_1 (high[0], low[0], operands[2]));
10914 emit_insn (gen_ashlsi3 (low[0], low[0], operands[2]));
10916 if (TARGET_CMOVE && (! no_new_pseudos || scratch))
10918 if (! no_new_pseudos)
10919 scratch = force_reg (SImode, const0_rtx);
10921 emit_move_insn (scratch, const0_rtx);
10923 emit_insn (gen_x86_shift_adj_1 (high[0], low[0], operands[2],
10927 emit_insn (gen_x86_shift_adj_2 (high[0], low[0], operands[2]));
10932 ix86_split_ashrdi (rtx *operands, rtx scratch)
10934 rtx low[2], high[2];
10937 if (GET_CODE (operands[2]) == CONST_INT)
10939 split_di (operands, 2, low, high);
10940 count = INTVAL (operands[2]) & 63;
10944 emit_move_insn (low[0], high[1]);
10946 if (! reload_completed)
10947 emit_insn (gen_ashrsi3 (high[0], low[0], GEN_INT (31)));
10950 emit_move_insn (high[0], low[0]);
10951 emit_insn (gen_ashrsi3 (high[0], high[0], GEN_INT (31)));
10955 emit_insn (gen_ashrsi3 (low[0], low[0], GEN_INT (count - 32)));
10959 if (!rtx_equal_p (operands[0], operands[1]))
10960 emit_move_insn (operands[0], operands[1]);
10961 emit_insn (gen_x86_shrd_1 (low[0], high[0], GEN_INT (count)));
10962 emit_insn (gen_ashrsi3 (high[0], high[0], GEN_INT (count)));
10967 if (!rtx_equal_p (operands[0], operands[1]))
10968 emit_move_insn (operands[0], operands[1]);
10970 split_di (operands, 1, low, high);
10972 emit_insn (gen_x86_shrd_1 (low[0], high[0], operands[2]));
10973 emit_insn (gen_ashrsi3 (high[0], high[0], operands[2]));
10975 if (TARGET_CMOVE && (! no_new_pseudos || scratch))
10977 if (! no_new_pseudos)
10978 scratch = gen_reg_rtx (SImode);
10979 emit_move_insn (scratch, high[0]);
10980 emit_insn (gen_ashrsi3 (scratch, scratch, GEN_INT (31)));
10981 emit_insn (gen_x86_shift_adj_1 (low[0], high[0], operands[2],
10985 emit_insn (gen_x86_shift_adj_3 (low[0], high[0], operands[2]));
10990 ix86_split_lshrdi (rtx *operands, rtx scratch)
10992 rtx low[2], high[2];
10995 if (GET_CODE (operands[2]) == CONST_INT)
10997 split_di (operands, 2, low, high);
10998 count = INTVAL (operands[2]) & 63;
11002 emit_move_insn (low[0], high[1]);
11003 emit_move_insn (high[0], const0_rtx);
11006 emit_insn (gen_lshrsi3 (low[0], low[0], GEN_INT (count - 32)));
11010 if (!rtx_equal_p (operands[0], operands[1]))
11011 emit_move_insn (operands[0], operands[1]);
11012 emit_insn (gen_x86_shrd_1 (low[0], high[0], GEN_INT (count)));
11013 emit_insn (gen_lshrsi3 (high[0], high[0], GEN_INT (count)));
11018 if (!rtx_equal_p (operands[0], operands[1]))
11019 emit_move_insn (operands[0], operands[1]);
11021 split_di (operands, 1, low, high);
11023 emit_insn (gen_x86_shrd_1 (low[0], high[0], operands[2]));
11024 emit_insn (gen_lshrsi3 (high[0], high[0], operands[2]));
11026 /* Heh. By reversing the arguments, we can reuse this pattern. */
11027 if (TARGET_CMOVE && (! no_new_pseudos || scratch))
11029 if (! no_new_pseudos)
11030 scratch = force_reg (SImode, const0_rtx);
11032 emit_move_insn (scratch, const0_rtx);
11034 emit_insn (gen_x86_shift_adj_1 (low[0], high[0], operands[2],
11038 emit_insn (gen_x86_shift_adj_2 (low[0], high[0], operands[2]));
11042 /* Helper function for the string operations below. Dest VARIABLE whether
11043 it is aligned to VALUE bytes. If true, jump to the label. */
11045 ix86_expand_aligntest (rtx variable, int value)
11047 rtx label = gen_label_rtx ();
11048 rtx tmpcount = gen_reg_rtx (GET_MODE (variable));
11049 if (GET_MODE (variable) == DImode)
11050 emit_insn (gen_anddi3 (tmpcount, variable, GEN_INT (value)));
11052 emit_insn (gen_andsi3 (tmpcount, variable, GEN_INT (value)));
11053 emit_cmp_and_jump_insns (tmpcount, const0_rtx, EQ, 0, GET_MODE (variable),
11058 /* Adjust COUNTER by the VALUE. */
11060 ix86_adjust_counter (rtx countreg, HOST_WIDE_INT value)
11062 if (GET_MODE (countreg) == DImode)
11063 emit_insn (gen_adddi3 (countreg, countreg, GEN_INT (-value)));
11065 emit_insn (gen_addsi3 (countreg, countreg, GEN_INT (-value)));
11068 /* Zero extend possibly SImode EXP to Pmode register. */
11070 ix86_zero_extend_to_Pmode (rtx exp)
11073 if (GET_MODE (exp) == VOIDmode)
11074 return force_reg (Pmode, exp);
11075 if (GET_MODE (exp) == Pmode)
11076 return copy_to_mode_reg (Pmode, exp);
11077 r = gen_reg_rtx (Pmode);
11078 emit_insn (gen_zero_extendsidi2 (r, exp));
11082 /* Expand string move (memcpy) operation. Use i386 string operations when
11083 profitable. expand_clrstr contains similar code. */
11085 ix86_expand_movstr (rtx dst, rtx src, rtx count_exp, rtx align_exp)
11087 rtx srcreg, destreg, countreg, srcexp, destexp;
11088 enum machine_mode counter_mode;
11089 HOST_WIDE_INT align = 0;
11090 unsigned HOST_WIDE_INT count = 0;
11092 if (GET_CODE (align_exp) == CONST_INT)
11093 align = INTVAL (align_exp);
11095 /* Can't use any of this if the user has appropriated esi or edi. */
11096 if (global_regs[4] || global_regs[5])
11099 /* This simple hack avoids all inlining code and simplifies code below. */
11100 if (!TARGET_ALIGN_STRINGOPS)
11103 if (GET_CODE (count_exp) == CONST_INT)
11105 count = INTVAL (count_exp);
11106 if (!TARGET_INLINE_ALL_STRINGOPS && count > 64)
11110 /* Figure out proper mode for counter. For 32bits it is always SImode,
11111 for 64bits use SImode when possible, otherwise DImode.
11112 Set count to number of bytes copied when known at compile time. */
11113 if (!TARGET_64BIT || GET_MODE (count_exp) == SImode
11114 || x86_64_zero_extended_value (count_exp))
11115 counter_mode = SImode;
11117 counter_mode = DImode;
11119 if (counter_mode != SImode && counter_mode != DImode)
11122 destreg = copy_to_mode_reg (Pmode, XEXP (dst, 0));
11123 if (destreg != XEXP (dst, 0))
11124 dst = replace_equiv_address_nv (dst, destreg);
11125 srcreg = copy_to_mode_reg (Pmode, XEXP (src, 0));
11126 if (srcreg != XEXP (src, 0))
11127 src = replace_equiv_address_nv (src, srcreg);
11129 /* When optimizing for size emit simple rep ; movsb instruction for
11130 counts not divisible by 4. */
11132 if ((!optimize || optimize_size) && (count == 0 || (count & 0x03)))
11134 emit_insn (gen_cld ());
11135 countreg = ix86_zero_extend_to_Pmode (count_exp);
11136 destexp = gen_rtx_PLUS (Pmode, destreg, countreg);
11137 srcexp = gen_rtx_PLUS (Pmode, srcreg, countreg);
11138 emit_insn (gen_rep_mov (destreg, dst, srcreg, src, countreg,
11142 /* For constant aligned (or small unaligned) copies use rep movsl
11143 followed by code copying the rest. For PentiumPro ensure 8 byte
11144 alignment to allow rep movsl acceleration. */
11146 else if (count != 0
11148 || (!TARGET_PENTIUMPRO && !TARGET_64BIT && align >= 4)
11149 || optimize_size || count < (unsigned int) 64))
11151 unsigned HOST_WIDE_INT offset = 0;
11152 int size = TARGET_64BIT && !optimize_size ? 8 : 4;
11153 rtx srcmem, dstmem;
11155 emit_insn (gen_cld ());
11156 if (count & ~(size - 1))
11158 countreg = copy_to_mode_reg (counter_mode,
11159 GEN_INT ((count >> (size == 4 ? 2 : 3))
11160 & (TARGET_64BIT ? -1 : 0x3fffffff)));
11161 countreg = ix86_zero_extend_to_Pmode (countreg);
11163 destexp = gen_rtx_ASHIFT (Pmode, countreg,
11164 GEN_INT (size == 4 ? 2 : 3));
11165 srcexp = gen_rtx_PLUS (Pmode, destexp, srcreg);
11166 destexp = gen_rtx_PLUS (Pmode, destexp, destreg);
11168 emit_insn (gen_rep_mov (destreg, dst, srcreg, src,
11169 countreg, destexp, srcexp));
11170 offset = count & ~(size - 1);
11172 if (size == 8 && (count & 0x04))
11174 srcmem = adjust_automodify_address_nv (src, SImode, srcreg,
11176 dstmem = adjust_automodify_address_nv (dst, SImode, destreg,
11178 emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
11183 srcmem = adjust_automodify_address_nv (src, HImode, srcreg,
11185 dstmem = adjust_automodify_address_nv (dst, HImode, destreg,
11187 emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
11192 srcmem = adjust_automodify_address_nv (src, QImode, srcreg,
11194 dstmem = adjust_automodify_address_nv (dst, QImode, destreg,
11196 emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
11199 /* The generic code based on the glibc implementation:
11200 - align destination to 4 bytes (8 byte alignment is used for PentiumPro
11201 allowing accelerated copying there)
11202 - copy the data using rep movsl
11203 - copy the rest. */
11208 rtx srcmem, dstmem;
11209 int desired_alignment = (TARGET_PENTIUMPRO
11210 && (count == 0 || count >= (unsigned int) 260)
11211 ? 8 : UNITS_PER_WORD);
11212 /* Get rid of MEM_OFFSETs, they won't be accurate. */
11213 dst = change_address (dst, BLKmode, destreg);
11214 src = change_address (src, BLKmode, srcreg);
11216 /* In case we don't know anything about the alignment, default to
11217 library version, since it is usually equally fast and result in
11220 Also emit call when we know that the count is large and call overhead
11221 will not be important. */
11222 if (!TARGET_INLINE_ALL_STRINGOPS
11223 && (align < UNITS_PER_WORD || !TARGET_REP_MOVL_OPTIMAL))
11226 if (TARGET_SINGLE_STRINGOP)
11227 emit_insn (gen_cld ());
11229 countreg2 = gen_reg_rtx (Pmode);
11230 countreg = copy_to_mode_reg (counter_mode, count_exp);
11232 /* We don't use loops to align destination and to copy parts smaller
11233 than 4 bytes, because gcc is able to optimize such code better (in
11234 the case the destination or the count really is aligned, gcc is often
11235 able to predict the branches) and also it is friendlier to the
11236 hardware branch prediction.
11238 Using loops is beneficial for generic case, because we can
11239 handle small counts using the loops. Many CPUs (such as Athlon)
11240 have large REP prefix setup costs.
11242 This is quite costly. Maybe we can revisit this decision later or
11243 add some customizability to this code. */
11245 if (count == 0 && align < desired_alignment)
11247 label = gen_label_rtx ();
11248 emit_cmp_and_jump_insns (countreg, GEN_INT (desired_alignment - 1),
11249 LEU, 0, counter_mode, 1, label);
11253 rtx label = ix86_expand_aligntest (destreg, 1);
11254 srcmem = change_address (src, QImode, srcreg);
11255 dstmem = change_address (dst, QImode, destreg);
11256 emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
11257 ix86_adjust_counter (countreg, 1);
11258 emit_label (label);
11259 LABEL_NUSES (label) = 1;
11263 rtx label = ix86_expand_aligntest (destreg, 2);
11264 srcmem = change_address (src, HImode, srcreg);
11265 dstmem = change_address (dst, HImode, destreg);
11266 emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
11267 ix86_adjust_counter (countreg, 2);
11268 emit_label (label);
11269 LABEL_NUSES (label) = 1;
11271 if (align <= 4 && desired_alignment > 4)
11273 rtx label = ix86_expand_aligntest (destreg, 4);
11274 srcmem = change_address (src, SImode, srcreg);
11275 dstmem = change_address (dst, SImode, destreg);
11276 emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
11277 ix86_adjust_counter (countreg, 4);
11278 emit_label (label);
11279 LABEL_NUSES (label) = 1;
11282 if (label && desired_alignment > 4 && !TARGET_64BIT)
11284 emit_label (label);
11285 LABEL_NUSES (label) = 1;
11288 if (!TARGET_SINGLE_STRINGOP)
11289 emit_insn (gen_cld ());
11292 emit_insn (gen_lshrdi3 (countreg2, ix86_zero_extend_to_Pmode (countreg),
11294 destexp = gen_rtx_ASHIFT (Pmode, countreg2, GEN_INT (3));
11298 emit_insn (gen_lshrsi3 (countreg2, countreg, const2_rtx));
11299 destexp = gen_rtx_ASHIFT (Pmode, countreg2, const2_rtx);
11301 srcexp = gen_rtx_PLUS (Pmode, destexp, srcreg);
11302 destexp = gen_rtx_PLUS (Pmode, destexp, destreg);
11303 emit_insn (gen_rep_mov (destreg, dst, srcreg, src,
11304 countreg2, destexp, srcexp));
11308 emit_label (label);
11309 LABEL_NUSES (label) = 1;
11311 if (TARGET_64BIT && align > 4 && count != 0 && (count & 4))
11313 srcmem = change_address (src, SImode, srcreg);
11314 dstmem = change_address (dst, SImode, destreg);
11315 emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
11317 if ((align <= 4 || count == 0) && TARGET_64BIT)
11319 rtx label = ix86_expand_aligntest (countreg, 4);
11320 srcmem = change_address (src, SImode, srcreg);
11321 dstmem = change_address (dst, SImode, destreg);
11322 emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
11323 emit_label (label);
11324 LABEL_NUSES (label) = 1;
11326 if (align > 2 && count != 0 && (count & 2))
11328 srcmem = change_address (src, HImode, srcreg);
11329 dstmem = change_address (dst, HImode, destreg);
11330 emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
11332 if (align <= 2 || count == 0)
11334 rtx label = ix86_expand_aligntest (countreg, 2);
11335 srcmem = change_address (src, HImode, srcreg);
11336 dstmem = change_address (dst, HImode, destreg);
11337 emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
11338 emit_label (label);
11339 LABEL_NUSES (label) = 1;
11341 if (align > 1 && count != 0 && (count & 1))
11343 srcmem = change_address (src, QImode, srcreg);
11344 dstmem = change_address (dst, QImode, destreg);
11345 emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
11347 if (align <= 1 || count == 0)
11349 rtx label = ix86_expand_aligntest (countreg, 1);
11350 srcmem = change_address (src, QImode, srcreg);
11351 dstmem = change_address (dst, QImode, destreg);
11352 emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
11353 emit_label (label);
11354 LABEL_NUSES (label) = 1;
11361 /* Expand string clear operation (bzero). Use i386 string operations when
11362 profitable. expand_movstr contains similar code. */
11364 ix86_expand_clrstr (rtx dst, rtx count_exp, rtx align_exp)
11366 rtx destreg, zeroreg, countreg, destexp;
11367 enum machine_mode counter_mode;
11368 HOST_WIDE_INT align = 0;
11369 unsigned HOST_WIDE_INT count = 0;
11371 if (GET_CODE (align_exp) == CONST_INT)
11372 align = INTVAL (align_exp);
11374 /* Can't use any of this if the user has appropriated esi. */
11375 if (global_regs[4])
11378 /* This simple hack avoids all inlining code and simplifies code below. */
11379 if (!TARGET_ALIGN_STRINGOPS)
11382 if (GET_CODE (count_exp) == CONST_INT)
11384 count = INTVAL (count_exp);
11385 if (!TARGET_INLINE_ALL_STRINGOPS && count > 64)
11388 /* Figure out proper mode for counter. For 32bits it is always SImode,
11389 for 64bits use SImode when possible, otherwise DImode.
11390 Set count to number of bytes copied when known at compile time. */
11391 if (!TARGET_64BIT || GET_MODE (count_exp) == SImode
11392 || x86_64_zero_extended_value (count_exp))
11393 counter_mode = SImode;
11395 counter_mode = DImode;
11397 destreg = copy_to_mode_reg (Pmode, XEXP (dst, 0));
11398 if (destreg != XEXP (dst, 0))
11399 dst = replace_equiv_address_nv (dst, destreg);
11401 emit_insn (gen_cld ());
11403 /* When optimizing for size emit simple rep ; movsb instruction for
11404 counts not divisible by 4. */
11406 if ((!optimize || optimize_size) && (count == 0 || (count & 0x03)))
11408 countreg = ix86_zero_extend_to_Pmode (count_exp);
11409 zeroreg = copy_to_mode_reg (QImode, const0_rtx);
11410 destexp = gen_rtx_PLUS (Pmode, destreg, countreg);
11411 emit_insn (gen_rep_stos (destreg, countreg, dst, zeroreg, destexp));
11413 else if (count != 0
11415 || (!TARGET_PENTIUMPRO && !TARGET_64BIT && align >= 4)
11416 || optimize_size || count < (unsigned int) 64))
11418 int size = TARGET_64BIT && !optimize_size ? 8 : 4;
11419 unsigned HOST_WIDE_INT offset = 0;
11421 zeroreg = copy_to_mode_reg (size == 4 ? SImode : DImode, const0_rtx);
11422 if (count & ~(size - 1))
11424 countreg = copy_to_mode_reg (counter_mode,
11425 GEN_INT ((count >> (size == 4 ? 2 : 3))
11426 & (TARGET_64BIT ? -1 : 0x3fffffff)));
11427 countreg = ix86_zero_extend_to_Pmode (countreg);
11428 destexp = gen_rtx_ASHIFT (Pmode, countreg, GEN_INT (size == 4 ? 2 : 3));
11429 destexp = gen_rtx_PLUS (Pmode, destexp, destreg);
11430 emit_insn (gen_rep_stos (destreg, countreg, dst, zeroreg, destexp));
11431 offset = count & ~(size - 1);
11433 if (size == 8 && (count & 0x04))
11435 rtx mem = adjust_automodify_address_nv (dst, SImode, destreg,
11437 emit_insn (gen_strset (destreg, mem,
11438 gen_rtx_SUBREG (SImode, zeroreg, 0)));
11443 rtx mem = adjust_automodify_address_nv (dst, HImode, destreg,
11445 emit_insn (gen_strset (destreg, mem,
11446 gen_rtx_SUBREG (HImode, zeroreg, 0)));
11451 rtx mem = adjust_automodify_address_nv (dst, QImode, destreg,
11453 emit_insn (gen_strset (destreg, mem,
11454 gen_rtx_SUBREG (QImode, zeroreg, 0)));
11461 /* Compute desired alignment of the string operation. */
11462 int desired_alignment = (TARGET_PENTIUMPRO
11463 && (count == 0 || count >= (unsigned int) 260)
11464 ? 8 : UNITS_PER_WORD);
11466 /* In case we don't know anything about the alignment, default to
11467 library version, since it is usually equally fast and result in
11470 Also emit call when we know that the count is large and call overhead
11471 will not be important. */
11472 if (!TARGET_INLINE_ALL_STRINGOPS
11473 && (align < UNITS_PER_WORD || !TARGET_REP_MOVL_OPTIMAL))
11476 if (TARGET_SINGLE_STRINGOP)
11477 emit_insn (gen_cld ());
11479 countreg2 = gen_reg_rtx (Pmode);
11480 countreg = copy_to_mode_reg (counter_mode, count_exp);
11481 zeroreg = copy_to_mode_reg (Pmode, const0_rtx);
11482 /* Get rid of MEM_OFFSET, it won't be accurate. */
11483 dst = change_address (dst, BLKmode, destreg);
11485 if (count == 0 && align < desired_alignment)
11487 label = gen_label_rtx ();
11488 emit_cmp_and_jump_insns (countreg, GEN_INT (desired_alignment - 1),
11489 LEU, 0, counter_mode, 1, label);
11493 rtx label = ix86_expand_aligntest (destreg, 1);
11494 emit_insn (gen_strset (destreg, dst,
11495 gen_rtx_SUBREG (QImode, zeroreg, 0)));
11496 ix86_adjust_counter (countreg, 1);
11497 emit_label (label);
11498 LABEL_NUSES (label) = 1;
11502 rtx label = ix86_expand_aligntest (destreg, 2);
11503 emit_insn (gen_strset (destreg, dst,
11504 gen_rtx_SUBREG (HImode, zeroreg, 0)));
11505 ix86_adjust_counter (countreg, 2);
11506 emit_label (label);
11507 LABEL_NUSES (label) = 1;
11509 if (align <= 4 && desired_alignment > 4)
11511 rtx label = ix86_expand_aligntest (destreg, 4);
11512 emit_insn (gen_strset (destreg, dst,
11514 ? gen_rtx_SUBREG (SImode, zeroreg, 0)
11516 ix86_adjust_counter (countreg, 4);
11517 emit_label (label);
11518 LABEL_NUSES (label) = 1;
11521 if (label && desired_alignment > 4 && !TARGET_64BIT)
11523 emit_label (label);
11524 LABEL_NUSES (label) = 1;
11528 if (!TARGET_SINGLE_STRINGOP)
11529 emit_insn (gen_cld ());
11532 emit_insn (gen_lshrdi3 (countreg2, ix86_zero_extend_to_Pmode (countreg),
11534 destexp = gen_rtx_ASHIFT (Pmode, countreg2, GEN_INT (3));
11538 emit_insn (gen_lshrsi3 (countreg2, countreg, const2_rtx));
11539 destexp = gen_rtx_ASHIFT (Pmode, countreg2, const2_rtx);
11541 destexp = gen_rtx_PLUS (Pmode, destexp, destreg);
11542 emit_insn (gen_rep_stos (destreg, countreg2, dst, zeroreg, destexp));
11546 emit_label (label);
11547 LABEL_NUSES (label) = 1;
11550 if (TARGET_64BIT && align > 4 && count != 0 && (count & 4))
11551 emit_insn (gen_strset (destreg, dst,
11552 gen_rtx_SUBREG (SImode, zeroreg, 0)));
11553 if (TARGET_64BIT && (align <= 4 || count == 0))
11555 rtx label = ix86_expand_aligntest (countreg, 4);
11556 emit_insn (gen_strset (destreg, dst,
11557 gen_rtx_SUBREG (SImode, zeroreg, 0)));
11558 emit_label (label);
11559 LABEL_NUSES (label) = 1;
11561 if (align > 2 && count != 0 && (count & 2))
11562 emit_insn (gen_strset (destreg, dst,
11563 gen_rtx_SUBREG (HImode, zeroreg, 0)));
11564 if (align <= 2 || count == 0)
11566 rtx label = ix86_expand_aligntest (countreg, 2);
11567 emit_insn (gen_strset (destreg, dst,
11568 gen_rtx_SUBREG (HImode, zeroreg, 0)));
11569 emit_label (label);
11570 LABEL_NUSES (label) = 1;
11572 if (align > 1 && count != 0 && (count & 1))
11573 emit_insn (gen_strset (destreg, dst,
11574 gen_rtx_SUBREG (QImode, zeroreg, 0)));
11575 if (align <= 1 || count == 0)
11577 rtx label = ix86_expand_aligntest (countreg, 1);
11578 emit_insn (gen_strset (destreg, dst,
11579 gen_rtx_SUBREG (QImode, zeroreg, 0)));
11580 emit_label (label);
11581 LABEL_NUSES (label) = 1;
11587 /* Expand strlen. */
11589 ix86_expand_strlen (rtx out, rtx src, rtx eoschar, rtx align)
11591 rtx addr, scratch1, scratch2, scratch3, scratch4;
11593 /* The generic case of strlen expander is long. Avoid it's
11594 expanding unless TARGET_INLINE_ALL_STRINGOPS. */
11596 if (TARGET_UNROLL_STRLEN && eoschar == const0_rtx && optimize > 1
11597 && !TARGET_INLINE_ALL_STRINGOPS
11599 && (GET_CODE (align) != CONST_INT || INTVAL (align) < 4))
11602 addr = force_reg (Pmode, XEXP (src, 0));
11603 scratch1 = gen_reg_rtx (Pmode);
11605 if (TARGET_UNROLL_STRLEN && eoschar == const0_rtx && optimize > 1
11608 /* Well it seems that some optimizer does not combine a call like
11609 foo(strlen(bar), strlen(bar));
11610 when the move and the subtraction is done here. It does calculate
11611 the length just once when these instructions are done inside of
11612 output_strlen_unroll(). But I think since &bar[strlen(bar)] is
11613 often used and I use one fewer register for the lifetime of
11614 output_strlen_unroll() this is better. */
11616 emit_move_insn (out, addr);
11618 ix86_expand_strlensi_unroll_1 (out, src, align);
11620 /* strlensi_unroll_1 returns the address of the zero at the end of
11621 the string, like memchr(), so compute the length by subtracting
11622 the start address. */
11624 emit_insn (gen_subdi3 (out, out, addr));
11626 emit_insn (gen_subsi3 (out, out, addr));
11631 scratch2 = gen_reg_rtx (Pmode);
11632 scratch3 = gen_reg_rtx (Pmode);
11633 scratch4 = force_reg (Pmode, constm1_rtx);
11635 emit_move_insn (scratch3, addr);
11636 eoschar = force_reg (QImode, eoschar);
11638 emit_insn (gen_cld ());
11639 src = replace_equiv_address_nv (src, scratch3);
11641 /* If .md starts supporting :P, this can be done in .md. */
11642 unspec = gen_rtx_UNSPEC (Pmode, gen_rtvec (4, src, eoschar, align,
11643 scratch4), UNSPEC_SCAS);
11644 emit_insn (gen_strlenqi_1 (scratch1, scratch3, unspec));
11647 emit_insn (gen_one_cmpldi2 (scratch2, scratch1));
11648 emit_insn (gen_adddi3 (out, scratch2, constm1_rtx));
11652 emit_insn (gen_one_cmplsi2 (scratch2, scratch1));
11653 emit_insn (gen_addsi3 (out, scratch2, constm1_rtx));
11659 /* Expand the appropriate insns for doing strlen if not just doing
11662 out = result, initialized with the start address
11663 align_rtx = alignment of the address.
11664 scratch = scratch register, initialized with the startaddress when
11665 not aligned, otherwise undefined
11667 This is just the body. It needs the initializations mentioned above and
11668 some address computing at the end. These things are done in i386.md. */
11671 ix86_expand_strlensi_unroll_1 (rtx out, rtx src, rtx align_rtx)
11675 rtx align_2_label = NULL_RTX;
11676 rtx align_3_label = NULL_RTX;
11677 rtx align_4_label = gen_label_rtx ();
11678 rtx end_0_label = gen_label_rtx ();
11680 rtx tmpreg = gen_reg_rtx (SImode);
11681 rtx scratch = gen_reg_rtx (SImode);
11685 if (GET_CODE (align_rtx) == CONST_INT)
11686 align = INTVAL (align_rtx);
11688 /* Loop to check 1..3 bytes for null to get an aligned pointer. */
11690 /* Is there a known alignment and is it less than 4? */
11693 rtx scratch1 = gen_reg_rtx (Pmode);
11694 emit_move_insn (scratch1, out);
11695 /* Is there a known alignment and is it not 2? */
11698 align_3_label = gen_label_rtx (); /* Label when aligned to 3-byte */
11699 align_2_label = gen_label_rtx (); /* Label when aligned to 2-byte */
11701 /* Leave just the 3 lower bits. */
11702 align_rtx = expand_binop (Pmode, and_optab, scratch1, GEN_INT (3),
11703 NULL_RTX, 0, OPTAB_WIDEN);
11705 emit_cmp_and_jump_insns (align_rtx, const0_rtx, EQ, NULL,
11706 Pmode, 1, align_4_label);
11707 emit_cmp_and_jump_insns (align_rtx, GEN_INT (2), EQ, NULL,
11708 Pmode, 1, align_2_label);
11709 emit_cmp_and_jump_insns (align_rtx, GEN_INT (2), GTU, NULL,
11710 Pmode, 1, align_3_label);
11714 /* Since the alignment is 2, we have to check 2 or 0 bytes;
11715 check if is aligned to 4 - byte. */
11717 align_rtx = expand_binop (Pmode, and_optab, scratch1, GEN_INT (2),
11718 NULL_RTX, 0, OPTAB_WIDEN);
11720 emit_cmp_and_jump_insns (align_rtx, const0_rtx, EQ, NULL,
11721 Pmode, 1, align_4_label);
11724 mem = change_address (src, QImode, out);
11726 /* Now compare the bytes. */
11728 /* Compare the first n unaligned byte on a byte per byte basis. */
11729 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL,
11730 QImode, 1, end_0_label);
11732 /* Increment the address. */
11734 emit_insn (gen_adddi3 (out, out, const1_rtx));
11736 emit_insn (gen_addsi3 (out, out, const1_rtx));
11738 /* Not needed with an alignment of 2 */
11741 emit_label (align_2_label);
11743 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL, QImode, 1,
11747 emit_insn (gen_adddi3 (out, out, const1_rtx));
11749 emit_insn (gen_addsi3 (out, out, const1_rtx));
11751 emit_label (align_3_label);
11754 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL, QImode, 1,
11758 emit_insn (gen_adddi3 (out, out, const1_rtx));
11760 emit_insn (gen_addsi3 (out, out, const1_rtx));
11763 /* Generate loop to check 4 bytes at a time. It is not a good idea to
11764 align this loop. It gives only huge programs, but does not help to
11766 emit_label (align_4_label);
11768 mem = change_address (src, SImode, out);
11769 emit_move_insn (scratch, mem);
11771 emit_insn (gen_adddi3 (out, out, GEN_INT (4)));
11773 emit_insn (gen_addsi3 (out, out, GEN_INT (4)));
11775 /* This formula yields a nonzero result iff one of the bytes is zero.
11776 This saves three branches inside loop and many cycles. */
11778 emit_insn (gen_addsi3 (tmpreg, scratch, GEN_INT (-0x01010101)));
11779 emit_insn (gen_one_cmplsi2 (scratch, scratch));
11780 emit_insn (gen_andsi3 (tmpreg, tmpreg, scratch));
11781 emit_insn (gen_andsi3 (tmpreg, tmpreg,
11782 gen_int_mode (0x80808080, SImode)));
11783 emit_cmp_and_jump_insns (tmpreg, const0_rtx, EQ, 0, SImode, 1,
11788 rtx reg = gen_reg_rtx (SImode);
11789 rtx reg2 = gen_reg_rtx (Pmode);
11790 emit_move_insn (reg, tmpreg);
11791 emit_insn (gen_lshrsi3 (reg, reg, GEN_INT (16)));
11793 /* If zero is not in the first two bytes, move two bytes forward. */
11794 emit_insn (gen_testsi_ccno_1 (tmpreg, GEN_INT (0x8080)));
11795 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
11796 tmp = gen_rtx_EQ (VOIDmode, tmp, const0_rtx);
11797 emit_insn (gen_rtx_SET (VOIDmode, tmpreg,
11798 gen_rtx_IF_THEN_ELSE (SImode, tmp,
11801 /* Emit lea manually to avoid clobbering of flags. */
11802 emit_insn (gen_rtx_SET (SImode, reg2,
11803 gen_rtx_PLUS (Pmode, out, GEN_INT (2))));
11805 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
11806 tmp = gen_rtx_EQ (VOIDmode, tmp, const0_rtx);
11807 emit_insn (gen_rtx_SET (VOIDmode, out,
11808 gen_rtx_IF_THEN_ELSE (Pmode, tmp,
11815 rtx end_2_label = gen_label_rtx ();
11816 /* Is zero in the first two bytes? */
11818 emit_insn (gen_testsi_ccno_1 (tmpreg, GEN_INT (0x8080)));
11819 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
11820 tmp = gen_rtx_NE (VOIDmode, tmp, const0_rtx);
11821 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
11822 gen_rtx_LABEL_REF (VOIDmode, end_2_label),
11824 tmp = emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
11825 JUMP_LABEL (tmp) = end_2_label;
11827 /* Not in the first two. Move two bytes forward. */
11828 emit_insn (gen_lshrsi3 (tmpreg, tmpreg, GEN_INT (16)));
11830 emit_insn (gen_adddi3 (out, out, GEN_INT (2)));
11832 emit_insn (gen_addsi3 (out, out, GEN_INT (2)));
11834 emit_label (end_2_label);
11838 /* Avoid branch in fixing the byte. */
11839 tmpreg = gen_lowpart (QImode, tmpreg);
11840 emit_insn (gen_addqi3_cc (tmpreg, tmpreg, tmpreg));
11841 cmp = gen_rtx_LTU (Pmode, gen_rtx_REG (CCmode, 17), const0_rtx);
11843 emit_insn (gen_subdi3_carry_rex64 (out, out, GEN_INT (3), cmp));
11845 emit_insn (gen_subsi3_carry (out, out, GEN_INT (3), cmp));
11847 emit_label (end_0_label);
11851 ix86_expand_call (rtx retval, rtx fnaddr, rtx callarg1,
11852 rtx callarg2 ATTRIBUTE_UNUSED,
11853 rtx pop, int sibcall)
11855 rtx use = NULL, call;
11857 if (pop == const0_rtx)
11859 if (TARGET_64BIT && pop)
11863 if (flag_pic && GET_CODE (XEXP (fnaddr, 0)) == SYMBOL_REF)
11864 fnaddr = machopic_indirect_call_target (fnaddr);
11866 /* Static functions and indirect calls don't need the pic register. */
11867 if (! TARGET_64BIT && flag_pic
11868 && GET_CODE (XEXP (fnaddr, 0)) == SYMBOL_REF
11869 && ! SYMBOL_REF_LOCAL_P (XEXP (fnaddr, 0)))
11870 use_reg (&use, pic_offset_table_rtx);
11872 if (TARGET_64BIT && INTVAL (callarg2) >= 0)
11874 rtx al = gen_rtx_REG (QImode, 0);
11875 emit_move_insn (al, callarg2);
11876 use_reg (&use, al);
11878 #endif /* TARGET_MACHO */
11880 if (! call_insn_operand (XEXP (fnaddr, 0), Pmode))
11882 fnaddr = copy_to_mode_reg (Pmode, XEXP (fnaddr, 0));
11883 fnaddr = gen_rtx_MEM (QImode, fnaddr);
11885 if (sibcall && TARGET_64BIT
11886 && !constant_call_address_operand (XEXP (fnaddr, 0), Pmode))
11889 addr = copy_to_mode_reg (Pmode, XEXP (fnaddr, 0));
11890 fnaddr = gen_rtx_REG (Pmode, FIRST_REX_INT_REG + 3 /* R11 */);
11891 emit_move_insn (fnaddr, addr);
11892 fnaddr = gen_rtx_MEM (QImode, fnaddr);
11895 call = gen_rtx_CALL (VOIDmode, fnaddr, callarg1);
11897 call = gen_rtx_SET (VOIDmode, retval, call);
11900 pop = gen_rtx_PLUS (Pmode, stack_pointer_rtx, pop);
11901 pop = gen_rtx_SET (VOIDmode, stack_pointer_rtx, pop);
11902 call = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, call, pop));
11905 call = emit_call_insn (call);
11907 CALL_INSN_FUNCTION_USAGE (call) = use;
11911 /* Clear stack slot assignments remembered from previous functions.
11912 This is called from INIT_EXPANDERS once before RTL is emitted for each
11915 static struct machine_function *
11916 ix86_init_machine_status (void)
11918 struct machine_function *f;
11920 f = ggc_alloc_cleared (sizeof (struct machine_function));
11921 f->use_fast_prologue_epilogue_nregs = -1;
11926 /* Return a MEM corresponding to a stack slot with mode MODE.
11927 Allocate a new slot if necessary.
11929 The RTL for a function can have several slots available: N is
11930 which slot to use. */
11933 assign_386_stack_local (enum machine_mode mode, int n)
11935 struct stack_local_entry *s;
11937 if (n < 0 || n >= MAX_386_STACK_LOCALS)
11940 for (s = ix86_stack_locals; s; s = s->next)
11941 if (s->mode == mode && s->n == n)
11944 s = (struct stack_local_entry *)
11945 ggc_alloc (sizeof (struct stack_local_entry));
11948 s->rtl = assign_stack_local (mode, GET_MODE_SIZE (mode), 0);
11950 s->next = ix86_stack_locals;
11951 ix86_stack_locals = s;
11955 /* Construct the SYMBOL_REF for the tls_get_addr function. */
11957 static GTY(()) rtx ix86_tls_symbol;
11959 ix86_tls_get_addr (void)
11962 if (!ix86_tls_symbol)
11964 ix86_tls_symbol = gen_rtx_SYMBOL_REF (Pmode,
11965 (TARGET_GNU_TLS && !TARGET_64BIT)
11966 ? "___tls_get_addr"
11967 : "__tls_get_addr");
11970 return ix86_tls_symbol;
11973 /* Calculate the length of the memory address in the instruction
11974 encoding. Does not include the one-byte modrm, opcode, or prefix. */
11977 memory_address_length (rtx addr)
11979 struct ix86_address parts;
11980 rtx base, index, disp;
11983 if (GET_CODE (addr) == PRE_DEC
11984 || GET_CODE (addr) == POST_INC
11985 || GET_CODE (addr) == PRE_MODIFY
11986 || GET_CODE (addr) == POST_MODIFY)
11989 if (! ix86_decompose_address (addr, &parts))
11993 index = parts.index;
11998 - esp as the base always wants an index,
11999 - ebp as the base always wants a displacement. */
12001 /* Register Indirect. */
12002 if (base && !index && !disp)
12004 /* esp (for its index) and ebp (for its displacement) need
12005 the two-byte modrm form. */
12006 if (addr == stack_pointer_rtx
12007 || addr == arg_pointer_rtx
12008 || addr == frame_pointer_rtx
12009 || addr == hard_frame_pointer_rtx)
12013 /* Direct Addressing. */
12014 else if (disp && !base && !index)
12019 /* Find the length of the displacement constant. */
12022 if (GET_CODE (disp) == CONST_INT
12023 && CONST_OK_FOR_LETTER_P (INTVAL (disp), 'K')
12029 /* ebp always wants a displacement. */
12030 else if (base == hard_frame_pointer_rtx)
12033 /* An index requires the two-byte modrm form.... */
12035 /* ...like esp, which always wants an index. */
12036 || base == stack_pointer_rtx
12037 || base == arg_pointer_rtx
12038 || base == frame_pointer_rtx)
12045 /* Compute default value for "length_immediate" attribute. When SHORTFORM
12046 is set, expect that insn have 8bit immediate alternative. */
12048 ix86_attr_length_immediate_default (rtx insn, int shortform)
12052 extract_insn_cached (insn);
12053 for (i = recog_data.n_operands - 1; i >= 0; --i)
12054 if (CONSTANT_P (recog_data.operand[i]))
12059 && GET_CODE (recog_data.operand[i]) == CONST_INT
12060 && CONST_OK_FOR_LETTER_P (INTVAL (recog_data.operand[i]), 'K'))
12064 switch (get_attr_mode (insn))
12075 /* Immediates for DImode instructions are encoded as 32bit sign extended values. */
12080 fatal_insn ("unknown insn mode", insn);
12086 /* Compute default value for "length_address" attribute. */
12088 ix86_attr_length_address_default (rtx insn)
12092 if (get_attr_type (insn) == TYPE_LEA)
12094 rtx set = PATTERN (insn);
12095 if (GET_CODE (set) == SET)
12097 else if (GET_CODE (set) == PARALLEL
12098 && GET_CODE (XVECEXP (set, 0, 0)) == SET)
12099 set = XVECEXP (set, 0, 0);
12102 #ifdef ENABLE_CHECKING
12108 return memory_address_length (SET_SRC (set));
12111 extract_insn_cached (insn);
12112 for (i = recog_data.n_operands - 1; i >= 0; --i)
12113 if (GET_CODE (recog_data.operand[i]) == MEM)
12115 return memory_address_length (XEXP (recog_data.operand[i], 0));
12121 /* Return the maximum number of instructions a cpu can issue. */
12124 ix86_issue_rate (void)
12128 case PROCESSOR_PENTIUM:
12132 case PROCESSOR_PENTIUMPRO:
12133 case PROCESSOR_PENTIUM4:
12134 case PROCESSOR_ATHLON:
12143 /* A subroutine of ix86_adjust_cost -- return true iff INSN reads flags set
12144 by DEP_INSN and nothing set by DEP_INSN. */
12147 ix86_flags_dependant (rtx insn, rtx dep_insn, enum attr_type insn_type)
12151 /* Simplify the test for uninteresting insns. */
12152 if (insn_type != TYPE_SETCC
12153 && insn_type != TYPE_ICMOV
12154 && insn_type != TYPE_FCMOV
12155 && insn_type != TYPE_IBR)
12158 if ((set = single_set (dep_insn)) != 0)
12160 set = SET_DEST (set);
12163 else if (GET_CODE (PATTERN (dep_insn)) == PARALLEL
12164 && XVECLEN (PATTERN (dep_insn), 0) == 2
12165 && GET_CODE (XVECEXP (PATTERN (dep_insn), 0, 0)) == SET
12166 && GET_CODE (XVECEXP (PATTERN (dep_insn), 0, 1)) == SET)
12168 set = SET_DEST (XVECEXP (PATTERN (dep_insn), 0, 0));
12169 set2 = SET_DEST (XVECEXP (PATTERN (dep_insn), 0, 0));
12174 if (GET_CODE (set) != REG || REGNO (set) != FLAGS_REG)
12177 /* This test is true if the dependent insn reads the flags but
12178 not any other potentially set register. */
12179 if (!reg_overlap_mentioned_p (set, PATTERN (insn)))
12182 if (set2 && reg_overlap_mentioned_p (set2, PATTERN (insn)))
12188 /* A subroutine of ix86_adjust_cost -- return true iff INSN has a memory
12189 address with operands set by DEP_INSN. */
12192 ix86_agi_dependant (rtx insn, rtx dep_insn, enum attr_type insn_type)
12196 if (insn_type == TYPE_LEA
12199 addr = PATTERN (insn);
12200 if (GET_CODE (addr) == SET)
12202 else if (GET_CODE (addr) == PARALLEL
12203 && GET_CODE (XVECEXP (addr, 0, 0)) == SET)
12204 addr = XVECEXP (addr, 0, 0);
12207 addr = SET_SRC (addr);
12212 extract_insn_cached (insn);
12213 for (i = recog_data.n_operands - 1; i >= 0; --i)
12214 if (GET_CODE (recog_data.operand[i]) == MEM)
12216 addr = XEXP (recog_data.operand[i], 0);
12223 return modified_in_p (addr, dep_insn);
12227 ix86_adjust_cost (rtx insn, rtx link, rtx dep_insn, int cost)
12229 enum attr_type insn_type, dep_insn_type;
12230 enum attr_memory memory, dep_memory;
12232 int dep_insn_code_number;
12234 /* Anti and output dependencies have zero cost on all CPUs. */
12235 if (REG_NOTE_KIND (link) != 0)
12238 dep_insn_code_number = recog_memoized (dep_insn);
12240 /* If we can't recognize the insns, we can't really do anything. */
12241 if (dep_insn_code_number < 0 || recog_memoized (insn) < 0)
12244 insn_type = get_attr_type (insn);
12245 dep_insn_type = get_attr_type (dep_insn);
12249 case PROCESSOR_PENTIUM:
12250 /* Address Generation Interlock adds a cycle of latency. */
12251 if (ix86_agi_dependant (insn, dep_insn, insn_type))
12254 /* ??? Compares pair with jump/setcc. */
12255 if (ix86_flags_dependant (insn, dep_insn, insn_type))
12258 /* Floating point stores require value to be ready one cycle earlier. */
12259 if (insn_type == TYPE_FMOV
12260 && get_attr_memory (insn) == MEMORY_STORE
12261 && !ix86_agi_dependant (insn, dep_insn, insn_type))
12265 case PROCESSOR_PENTIUMPRO:
12266 memory = get_attr_memory (insn);
12267 dep_memory = get_attr_memory (dep_insn);
12269 /* Since we can't represent delayed latencies of load+operation,
12270 increase the cost here for non-imov insns. */
12271 if (dep_insn_type != TYPE_IMOV
12272 && dep_insn_type != TYPE_FMOV
12273 && (dep_memory == MEMORY_LOAD || dep_memory == MEMORY_BOTH))
12276 /* INT->FP conversion is expensive. */
12277 if (get_attr_fp_int_src (dep_insn))
12280 /* There is one cycle extra latency between an FP op and a store. */
12281 if (insn_type == TYPE_FMOV
12282 && (set = single_set (dep_insn)) != NULL_RTX
12283 && (set2 = single_set (insn)) != NULL_RTX
12284 && rtx_equal_p (SET_DEST (set), SET_SRC (set2))
12285 && GET_CODE (SET_DEST (set2)) == MEM)
12288 /* Show ability of reorder buffer to hide latency of load by executing
12289 in parallel with previous instruction in case
12290 previous instruction is not needed to compute the address. */
12291 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
12292 && !ix86_agi_dependant (insn, dep_insn, insn_type))
12294 /* Claim moves to take one cycle, as core can issue one load
12295 at time and the next load can start cycle later. */
12296 if (dep_insn_type == TYPE_IMOV
12297 || dep_insn_type == TYPE_FMOV)
12305 memory = get_attr_memory (insn);
12306 dep_memory = get_attr_memory (dep_insn);
12307 /* The esp dependency is resolved before the instruction is really
12309 if ((insn_type == TYPE_PUSH || insn_type == TYPE_POP)
12310 && (dep_insn_type == TYPE_PUSH || dep_insn_type == TYPE_POP))
12313 /* Since we can't represent delayed latencies of load+operation,
12314 increase the cost here for non-imov insns. */
12315 if (dep_memory == MEMORY_LOAD || dep_memory == MEMORY_BOTH)
12316 cost += (dep_insn_type != TYPE_IMOV) ? 2 : 1;
12318 /* INT->FP conversion is expensive. */
12319 if (get_attr_fp_int_src (dep_insn))
12322 /* Show ability of reorder buffer to hide latency of load by executing
12323 in parallel with previous instruction in case
12324 previous instruction is not needed to compute the address. */
12325 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
12326 && !ix86_agi_dependant (insn, dep_insn, insn_type))
12328 /* Claim moves to take one cycle, as core can issue one load
12329 at time and the next load can start cycle later. */
12330 if (dep_insn_type == TYPE_IMOV
12331 || dep_insn_type == TYPE_FMOV)
12340 case PROCESSOR_ATHLON:
12342 memory = get_attr_memory (insn);
12343 dep_memory = get_attr_memory (dep_insn);
12345 /* Show ability of reorder buffer to hide latency of load by executing
12346 in parallel with previous instruction in case
12347 previous instruction is not needed to compute the address. */
12348 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
12349 && !ix86_agi_dependant (insn, dep_insn, insn_type))
12351 enum attr_unit unit = get_attr_unit (insn);
12354 /* Because of the difference between the length of integer and
12355 floating unit pipeline preparation stages, the memory operands
12356 for floating point are cheaper.
12358 ??? For Athlon it the difference is most probably 2. */
12359 if (unit == UNIT_INTEGER || unit == UNIT_UNKNOWN)
12362 loadcost = TARGET_ATHLON ? 2 : 0;
12364 if (cost >= loadcost)
12379 struct ppro_sched_data
12382 int issued_this_cycle;
12386 static enum attr_ppro_uops
12387 ix86_safe_ppro_uops (rtx insn)
12389 if (recog_memoized (insn) >= 0)
12390 return get_attr_ppro_uops (insn);
12392 return PPRO_UOPS_MANY;
12396 ix86_dump_ppro_packet (FILE *dump)
12398 if (ix86_sched_data.ppro.decode[0])
12400 fprintf (dump, "PPRO packet: %d",
12401 INSN_UID (ix86_sched_data.ppro.decode[0]));
12402 if (ix86_sched_data.ppro.decode[1])
12403 fprintf (dump, " %d", INSN_UID (ix86_sched_data.ppro.decode[1]));
12404 if (ix86_sched_data.ppro.decode[2])
12405 fprintf (dump, " %d", INSN_UID (ix86_sched_data.ppro.decode[2]));
12406 fputc ('\n', dump);
12410 /* We're beginning a new block. Initialize data structures as necessary. */
12413 ix86_sched_init (FILE *dump ATTRIBUTE_UNUSED,
12414 int sched_verbose ATTRIBUTE_UNUSED,
12415 int veclen ATTRIBUTE_UNUSED)
12417 memset (&ix86_sched_data, 0, sizeof (ix86_sched_data));
12420 /* Shift INSN to SLOT, and shift everything else down. */
12423 ix86_reorder_insn (rtx *insnp, rtx *slot)
12429 insnp[0] = insnp[1];
12430 while (++insnp != slot);
12436 ix86_sched_reorder_ppro (rtx *ready, rtx *e_ready)
12439 enum attr_ppro_uops cur_uops;
12440 int issued_this_cycle;
12444 /* At this point .ppro.decode contains the state of the three
12445 decoders from last "cycle". That is, those insns that were
12446 actually independent. But here we're scheduling for the
12447 decoder, and we may find things that are decodable in the
12450 memcpy (decode, ix86_sched_data.ppro.decode, sizeof (decode));
12451 issued_this_cycle = 0;
12454 cur_uops = ix86_safe_ppro_uops (*insnp);
12456 /* If the decoders are empty, and we've a complex insn at the
12457 head of the priority queue, let it issue without complaint. */
12458 if (decode[0] == NULL)
12460 if (cur_uops == PPRO_UOPS_MANY)
12462 decode[0] = *insnp;
12466 /* Otherwise, search for a 2-4 uop unsn to issue. */
12467 while (cur_uops != PPRO_UOPS_FEW)
12469 if (insnp == ready)
12471 cur_uops = ix86_safe_ppro_uops (*--insnp);
12474 /* If so, move it to the head of the line. */
12475 if (cur_uops == PPRO_UOPS_FEW)
12476 ix86_reorder_insn (insnp, e_ready);
12478 /* Issue the head of the queue. */
12479 issued_this_cycle = 1;
12480 decode[0] = *e_ready--;
12483 /* Look for simple insns to fill in the other two slots. */
12484 for (i = 1; i < 3; ++i)
12485 if (decode[i] == NULL)
12487 if (ready > e_ready)
12491 cur_uops = ix86_safe_ppro_uops (*insnp);
12492 while (cur_uops != PPRO_UOPS_ONE)
12494 if (insnp == ready)
12496 cur_uops = ix86_safe_ppro_uops (*--insnp);
12499 /* Found one. Move it to the head of the queue and issue it. */
12500 if (cur_uops == PPRO_UOPS_ONE)
12502 ix86_reorder_insn (insnp, e_ready);
12503 decode[i] = *e_ready--;
12504 issued_this_cycle++;
12508 /* ??? Didn't find one. Ideally, here we would do a lazy split
12509 of 2-uop insns, issue one and queue the other. */
12513 if (issued_this_cycle == 0)
12514 issued_this_cycle = 1;
12515 ix86_sched_data.ppro.issued_this_cycle = issued_this_cycle;
12518 /* We are about to being issuing insns for this clock cycle.
12519 Override the default sort algorithm to better slot instructions. */
12521 ix86_sched_reorder (FILE *dump ATTRIBUTE_UNUSED,
12522 int sched_verbose ATTRIBUTE_UNUSED, rtx *ready,
12523 int *n_readyp, int clock_var ATTRIBUTE_UNUSED)
12525 int n_ready = *n_readyp;
12526 rtx *e_ready = ready + n_ready - 1;
12528 /* Make sure to go ahead and initialize key items in
12529 ix86_sched_data if we are not going to bother trying to
12530 reorder the ready queue. */
12533 ix86_sched_data.ppro.issued_this_cycle = 1;
12542 case PROCESSOR_PENTIUMPRO:
12543 ix86_sched_reorder_ppro (ready, e_ready);
12548 return ix86_issue_rate ();
12551 /* We are about to issue INSN. Return the number of insns left on the
12552 ready queue that can be issued this cycle. */
12555 ix86_variable_issue (FILE *dump, int sched_verbose, rtx insn,
12556 int can_issue_more)
12562 return can_issue_more - 1;
12564 case PROCESSOR_PENTIUMPRO:
12566 enum attr_ppro_uops uops = ix86_safe_ppro_uops (insn);
12568 if (uops == PPRO_UOPS_MANY)
12571 ix86_dump_ppro_packet (dump);
12572 ix86_sched_data.ppro.decode[0] = insn;
12573 ix86_sched_data.ppro.decode[1] = NULL;
12574 ix86_sched_data.ppro.decode[2] = NULL;
12576 ix86_dump_ppro_packet (dump);
12577 ix86_sched_data.ppro.decode[0] = NULL;
12579 else if (uops == PPRO_UOPS_FEW)
12582 ix86_dump_ppro_packet (dump);
12583 ix86_sched_data.ppro.decode[0] = insn;
12584 ix86_sched_data.ppro.decode[1] = NULL;
12585 ix86_sched_data.ppro.decode[2] = NULL;
12589 for (i = 0; i < 3; ++i)
12590 if (ix86_sched_data.ppro.decode[i] == NULL)
12592 ix86_sched_data.ppro.decode[i] = insn;
12600 ix86_dump_ppro_packet (dump);
12601 ix86_sched_data.ppro.decode[0] = NULL;
12602 ix86_sched_data.ppro.decode[1] = NULL;
12603 ix86_sched_data.ppro.decode[2] = NULL;
12607 return --ix86_sched_data.ppro.issued_this_cycle;
12612 ia32_use_dfa_pipeline_interface (void)
12614 if (TARGET_PENTIUM || TARGET_ATHLON_K8)
12619 /* How many alternative schedules to try. This should be as wide as the
12620 scheduling freedom in the DFA, but no wider. Making this value too
12621 large results extra work for the scheduler. */
12624 ia32_multipass_dfa_lookahead (void)
12626 if (ix86_tune == PROCESSOR_PENTIUM)
12633 /* Compute the alignment given to a constant that is being placed in memory.
12634 EXP is the constant and ALIGN is the alignment that the object would
12636 The value of this function is used instead of that alignment to align
12640 ix86_constant_alignment (tree exp, int align)
12642 if (TREE_CODE (exp) == REAL_CST)
12644 if (TYPE_MODE (TREE_TYPE (exp)) == DFmode && align < 64)
12646 else if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (exp))) && align < 128)
12649 else if (!optimize_size && TREE_CODE (exp) == STRING_CST
12650 && !TARGET_NO_ALIGN_LONG_STRINGS
12651 && TREE_STRING_LENGTH (exp) >= 31 && align < BITS_PER_WORD)
12652 return BITS_PER_WORD;
12657 /* Compute the alignment for a static variable.
12658 TYPE is the data type, and ALIGN is the alignment that
12659 the object would ordinarily have. The value of this function is used
12660 instead of that alignment to align the object. */
12663 ix86_data_alignment (tree type, int align)
12665 if (AGGREGATE_TYPE_P (type)
12666 && TYPE_SIZE (type)
12667 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
12668 && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= 256
12669 || TREE_INT_CST_HIGH (TYPE_SIZE (type))) && align < 256)
12672 /* x86-64 ABI requires arrays greater than 16 bytes to be aligned
12673 to 16byte boundary. */
12676 if (AGGREGATE_TYPE_P (type)
12677 && TYPE_SIZE (type)
12678 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
12679 && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= 128
12680 || TREE_INT_CST_HIGH (TYPE_SIZE (type))) && align < 128)
12684 if (TREE_CODE (type) == ARRAY_TYPE)
12686 if (TYPE_MODE (TREE_TYPE (type)) == DFmode && align < 64)
12688 if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type))) && align < 128)
12691 else if (TREE_CODE (type) == COMPLEX_TYPE)
12694 if (TYPE_MODE (type) == DCmode && align < 64)
12696 if (TYPE_MODE (type) == XCmode && align < 128)
12699 else if ((TREE_CODE (type) == RECORD_TYPE
12700 || TREE_CODE (type) == UNION_TYPE
12701 || TREE_CODE (type) == QUAL_UNION_TYPE)
12702 && TYPE_FIELDS (type))
12704 if (DECL_MODE (TYPE_FIELDS (type)) == DFmode && align < 64)
12706 if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type))) && align < 128)
12709 else if (TREE_CODE (type) == REAL_TYPE || TREE_CODE (type) == VECTOR_TYPE
12710 || TREE_CODE (type) == INTEGER_TYPE)
12712 if (TYPE_MODE (type) == DFmode && align < 64)
12714 if (ALIGN_MODE_128 (TYPE_MODE (type)) && align < 128)
12721 /* Compute the alignment for a local variable.
12722 TYPE is the data type, and ALIGN is the alignment that
12723 the object would ordinarily have. The value of this macro is used
12724 instead of that alignment to align the object. */
12727 ix86_local_alignment (tree type, int align)
12729 /* x86-64 ABI requires arrays greater than 16 bytes to be aligned
12730 to 16byte boundary. */
12733 if (AGGREGATE_TYPE_P (type)
12734 && TYPE_SIZE (type)
12735 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
12736 && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= 16
12737 || TREE_INT_CST_HIGH (TYPE_SIZE (type))) && align < 128)
12740 if (TREE_CODE (type) == ARRAY_TYPE)
12742 if (TYPE_MODE (TREE_TYPE (type)) == DFmode && align < 64)
12744 if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type))) && align < 128)
12747 else if (TREE_CODE (type) == COMPLEX_TYPE)
12749 if (TYPE_MODE (type) == DCmode && align < 64)
12751 if (TYPE_MODE (type) == XCmode && align < 128)
12754 else if ((TREE_CODE (type) == RECORD_TYPE
12755 || TREE_CODE (type) == UNION_TYPE
12756 || TREE_CODE (type) == QUAL_UNION_TYPE)
12757 && TYPE_FIELDS (type))
12759 if (DECL_MODE (TYPE_FIELDS (type)) == DFmode && align < 64)
12761 if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type))) && align < 128)
12764 else if (TREE_CODE (type) == REAL_TYPE || TREE_CODE (type) == VECTOR_TYPE
12765 || TREE_CODE (type) == INTEGER_TYPE)
12768 if (TYPE_MODE (type) == DFmode && align < 64)
12770 if (ALIGN_MODE_128 (TYPE_MODE (type)) && align < 128)
12776 /* Emit RTL insns to initialize the variable parts of a trampoline.
12777 FNADDR is an RTX for the address of the function's pure code.
12778 CXT is an RTX for the static chain value for the function. */
12780 x86_initialize_trampoline (rtx tramp, rtx fnaddr, rtx cxt)
12784 /* Compute offset from the end of the jmp to the target function. */
12785 rtx disp = expand_binop (SImode, sub_optab, fnaddr,
12786 plus_constant (tramp, 10),
12787 NULL_RTX, 1, OPTAB_DIRECT);
12788 emit_move_insn (gen_rtx_MEM (QImode, tramp),
12789 gen_int_mode (0xb9, QImode));
12790 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, 1)), cxt);
12791 emit_move_insn (gen_rtx_MEM (QImode, plus_constant (tramp, 5)),
12792 gen_int_mode (0xe9, QImode));
12793 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, 6)), disp);
12798 /* Try to load address using shorter movl instead of movabs.
12799 We may want to support movq for kernel mode, but kernel does not use
12800 trampolines at the moment. */
12801 if (x86_64_zero_extended_value (fnaddr))
12803 fnaddr = copy_to_mode_reg (DImode, fnaddr);
12804 emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
12805 gen_int_mode (0xbb41, HImode));
12806 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, offset + 2)),
12807 gen_lowpart (SImode, fnaddr));
12812 emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
12813 gen_int_mode (0xbb49, HImode));
12814 emit_move_insn (gen_rtx_MEM (DImode, plus_constant (tramp, offset + 2)),
12818 /* Load static chain using movabs to r10. */
12819 emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
12820 gen_int_mode (0xba49, HImode));
12821 emit_move_insn (gen_rtx_MEM (DImode, plus_constant (tramp, offset + 2)),
12824 /* Jump to the r11 */
12825 emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
12826 gen_int_mode (0xff49, HImode));
12827 emit_move_insn (gen_rtx_MEM (QImode, plus_constant (tramp, offset+2)),
12828 gen_int_mode (0xe3, QImode));
12830 if (offset > TRAMPOLINE_SIZE)
12834 #ifdef ENABLE_EXECUTE_STACK
12835 emit_library_call (gen_rtx (SYMBOL_REF, Pmode, "__enable_execute_stack"),
12836 LCT_NORMAL, VOIDmode, 1, tramp, Pmode);
12840 #define def_builtin(MASK, NAME, TYPE, CODE) \
12842 if ((MASK) & target_flags \
12843 && (!((MASK) & MASK_64BIT) || TARGET_64BIT)) \
12844 builtin_function ((NAME), (TYPE), (CODE), BUILT_IN_MD, \
12845 NULL, NULL_TREE); \
12848 struct builtin_description
12850 const unsigned int mask;
12851 const enum insn_code icode;
12852 const char *const name;
12853 const enum ix86_builtins code;
12854 const enum rtx_code comparison;
12855 const unsigned int flag;
12858 static const struct builtin_description bdesc_comi[] =
12860 { MASK_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comieq", IX86_BUILTIN_COMIEQSS, UNEQ, 0 },
12861 { MASK_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comilt", IX86_BUILTIN_COMILTSS, UNLT, 0 },
12862 { MASK_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comile", IX86_BUILTIN_COMILESS, UNLE, 0 },
12863 { MASK_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comigt", IX86_BUILTIN_COMIGTSS, GT, 0 },
12864 { MASK_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comige", IX86_BUILTIN_COMIGESS, GE, 0 },
12865 { MASK_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comineq", IX86_BUILTIN_COMINEQSS, LTGT, 0 },
12866 { MASK_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomieq", IX86_BUILTIN_UCOMIEQSS, UNEQ, 0 },
12867 { MASK_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomilt", IX86_BUILTIN_UCOMILTSS, UNLT, 0 },
12868 { MASK_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomile", IX86_BUILTIN_UCOMILESS, UNLE, 0 },
12869 { MASK_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomigt", IX86_BUILTIN_UCOMIGTSS, GT, 0 },
12870 { MASK_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomige", IX86_BUILTIN_UCOMIGESS, GE, 0 },
12871 { MASK_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomineq", IX86_BUILTIN_UCOMINEQSS, LTGT, 0 },
12872 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdeq", IX86_BUILTIN_COMIEQSD, UNEQ, 0 },
12873 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdlt", IX86_BUILTIN_COMILTSD, UNLT, 0 },
12874 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdle", IX86_BUILTIN_COMILESD, UNLE, 0 },
12875 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdgt", IX86_BUILTIN_COMIGTSD, GT, 0 },
12876 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdge", IX86_BUILTIN_COMIGESD, GE, 0 },
12877 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdneq", IX86_BUILTIN_COMINEQSD, LTGT, 0 },
12878 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdeq", IX86_BUILTIN_UCOMIEQSD, UNEQ, 0 },
12879 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdlt", IX86_BUILTIN_UCOMILTSD, UNLT, 0 },
12880 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdle", IX86_BUILTIN_UCOMILESD, UNLE, 0 },
12881 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdgt", IX86_BUILTIN_UCOMIGTSD, GT, 0 },
12882 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdge", IX86_BUILTIN_UCOMIGESD, GE, 0 },
12883 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdneq", IX86_BUILTIN_UCOMINEQSD, LTGT, 0 },
12886 static const struct builtin_description bdesc_2arg[] =
12889 { MASK_SSE, CODE_FOR_addv4sf3, "__builtin_ia32_addps", IX86_BUILTIN_ADDPS, 0, 0 },
12890 { MASK_SSE, CODE_FOR_subv4sf3, "__builtin_ia32_subps", IX86_BUILTIN_SUBPS, 0, 0 },
12891 { MASK_SSE, CODE_FOR_mulv4sf3, "__builtin_ia32_mulps", IX86_BUILTIN_MULPS, 0, 0 },
12892 { MASK_SSE, CODE_FOR_divv4sf3, "__builtin_ia32_divps", IX86_BUILTIN_DIVPS, 0, 0 },
12893 { MASK_SSE, CODE_FOR_vmaddv4sf3, "__builtin_ia32_addss", IX86_BUILTIN_ADDSS, 0, 0 },
12894 { MASK_SSE, CODE_FOR_vmsubv4sf3, "__builtin_ia32_subss", IX86_BUILTIN_SUBSS, 0, 0 },
12895 { MASK_SSE, CODE_FOR_vmmulv4sf3, "__builtin_ia32_mulss", IX86_BUILTIN_MULSS, 0, 0 },
12896 { MASK_SSE, CODE_FOR_vmdivv4sf3, "__builtin_ia32_divss", IX86_BUILTIN_DIVSS, 0, 0 },
12898 { MASK_SSE, CODE_FOR_maskcmpv4sf3, "__builtin_ia32_cmpeqps", IX86_BUILTIN_CMPEQPS, EQ, 0 },
12899 { MASK_SSE, CODE_FOR_maskcmpv4sf3, "__builtin_ia32_cmpltps", IX86_BUILTIN_CMPLTPS, LT, 0 },
12900 { MASK_SSE, CODE_FOR_maskcmpv4sf3, "__builtin_ia32_cmpleps", IX86_BUILTIN_CMPLEPS, LE, 0 },
12901 { MASK_SSE, CODE_FOR_maskcmpv4sf3, "__builtin_ia32_cmpgtps", IX86_BUILTIN_CMPGTPS, LT, 1 },
12902 { MASK_SSE, CODE_FOR_maskcmpv4sf3, "__builtin_ia32_cmpgeps", IX86_BUILTIN_CMPGEPS, LE, 1 },
12903 { MASK_SSE, CODE_FOR_maskcmpv4sf3, "__builtin_ia32_cmpunordps", IX86_BUILTIN_CMPUNORDPS, UNORDERED, 0 },
12904 { MASK_SSE, CODE_FOR_maskncmpv4sf3, "__builtin_ia32_cmpneqps", IX86_BUILTIN_CMPNEQPS, EQ, 0 },
12905 { MASK_SSE, CODE_FOR_maskncmpv4sf3, "__builtin_ia32_cmpnltps", IX86_BUILTIN_CMPNLTPS, LT, 0 },
12906 { MASK_SSE, CODE_FOR_maskncmpv4sf3, "__builtin_ia32_cmpnleps", IX86_BUILTIN_CMPNLEPS, LE, 0 },
12907 { MASK_SSE, CODE_FOR_maskncmpv4sf3, "__builtin_ia32_cmpngtps", IX86_BUILTIN_CMPNGTPS, LT, 1 },
12908 { MASK_SSE, CODE_FOR_maskncmpv4sf3, "__builtin_ia32_cmpngeps", IX86_BUILTIN_CMPNGEPS, LE, 1 },
12909 { MASK_SSE, CODE_FOR_maskncmpv4sf3, "__builtin_ia32_cmpordps", IX86_BUILTIN_CMPORDPS, UNORDERED, 0 },
12910 { MASK_SSE, CODE_FOR_vmmaskcmpv4sf3, "__builtin_ia32_cmpeqss", IX86_BUILTIN_CMPEQSS, EQ, 0 },
12911 { MASK_SSE, CODE_FOR_vmmaskcmpv4sf3, "__builtin_ia32_cmpltss", IX86_BUILTIN_CMPLTSS, LT, 0 },
12912 { MASK_SSE, CODE_FOR_vmmaskcmpv4sf3, "__builtin_ia32_cmpless", IX86_BUILTIN_CMPLESS, LE, 0 },
12913 { MASK_SSE, CODE_FOR_vmmaskcmpv4sf3, "__builtin_ia32_cmpunordss", IX86_BUILTIN_CMPUNORDSS, UNORDERED, 0 },
12914 { MASK_SSE, CODE_FOR_vmmaskncmpv4sf3, "__builtin_ia32_cmpneqss", IX86_BUILTIN_CMPNEQSS, EQ, 0 },
12915 { MASK_SSE, CODE_FOR_vmmaskncmpv4sf3, "__builtin_ia32_cmpnltss", IX86_BUILTIN_CMPNLTSS, LT, 0 },
12916 { MASK_SSE, CODE_FOR_vmmaskncmpv4sf3, "__builtin_ia32_cmpnless", IX86_BUILTIN_CMPNLESS, LE, 0 },
12917 { MASK_SSE, CODE_FOR_vmmaskncmpv4sf3, "__builtin_ia32_cmpordss", IX86_BUILTIN_CMPORDSS, UNORDERED, 0 },
12919 { MASK_SSE, CODE_FOR_sminv4sf3, "__builtin_ia32_minps", IX86_BUILTIN_MINPS, 0, 0 },
12920 { MASK_SSE, CODE_FOR_smaxv4sf3, "__builtin_ia32_maxps", IX86_BUILTIN_MAXPS, 0, 0 },
12921 { MASK_SSE, CODE_FOR_vmsminv4sf3, "__builtin_ia32_minss", IX86_BUILTIN_MINSS, 0, 0 },
12922 { MASK_SSE, CODE_FOR_vmsmaxv4sf3, "__builtin_ia32_maxss", IX86_BUILTIN_MAXSS, 0, 0 },
12924 { MASK_SSE, CODE_FOR_sse_andv4sf3, "__builtin_ia32_andps", IX86_BUILTIN_ANDPS, 0, 0 },
12925 { MASK_SSE, CODE_FOR_sse_nandv4sf3, "__builtin_ia32_andnps", IX86_BUILTIN_ANDNPS, 0, 0 },
12926 { MASK_SSE, CODE_FOR_sse_iorv4sf3, "__builtin_ia32_orps", IX86_BUILTIN_ORPS, 0, 0 },
12927 { MASK_SSE, CODE_FOR_sse_xorv4sf3, "__builtin_ia32_xorps", IX86_BUILTIN_XORPS, 0, 0 },
12929 { MASK_SSE, CODE_FOR_sse_movss, "__builtin_ia32_movss", IX86_BUILTIN_MOVSS, 0, 0 },
12930 { MASK_SSE, CODE_FOR_sse_movhlps, "__builtin_ia32_movhlps", IX86_BUILTIN_MOVHLPS, 0, 0 },
12931 { MASK_SSE, CODE_FOR_sse_movlhps, "__builtin_ia32_movlhps", IX86_BUILTIN_MOVLHPS, 0, 0 },
12932 { MASK_SSE, CODE_FOR_sse_unpckhps, "__builtin_ia32_unpckhps", IX86_BUILTIN_UNPCKHPS, 0, 0 },
12933 { MASK_SSE, CODE_FOR_sse_unpcklps, "__builtin_ia32_unpcklps", IX86_BUILTIN_UNPCKLPS, 0, 0 },
12936 { MASK_MMX, CODE_FOR_addv8qi3, "__builtin_ia32_paddb", IX86_BUILTIN_PADDB, 0, 0 },
12937 { MASK_MMX, CODE_FOR_addv4hi3, "__builtin_ia32_paddw", IX86_BUILTIN_PADDW, 0, 0 },
12938 { MASK_MMX, CODE_FOR_addv2si3, "__builtin_ia32_paddd", IX86_BUILTIN_PADDD, 0, 0 },
12939 { MASK_MMX, CODE_FOR_mmx_adddi3, "__builtin_ia32_paddq", IX86_BUILTIN_PADDQ, 0, 0 },
12940 { MASK_MMX, CODE_FOR_subv8qi3, "__builtin_ia32_psubb", IX86_BUILTIN_PSUBB, 0, 0 },
12941 { MASK_MMX, CODE_FOR_subv4hi3, "__builtin_ia32_psubw", IX86_BUILTIN_PSUBW, 0, 0 },
12942 { MASK_MMX, CODE_FOR_subv2si3, "__builtin_ia32_psubd", IX86_BUILTIN_PSUBD, 0, 0 },
12943 { MASK_MMX, CODE_FOR_mmx_subdi3, "__builtin_ia32_psubq", IX86_BUILTIN_PSUBQ, 0, 0 },
12945 { MASK_MMX, CODE_FOR_ssaddv8qi3, "__builtin_ia32_paddsb", IX86_BUILTIN_PADDSB, 0, 0 },
12946 { MASK_MMX, CODE_FOR_ssaddv4hi3, "__builtin_ia32_paddsw", IX86_BUILTIN_PADDSW, 0, 0 },
12947 { MASK_MMX, CODE_FOR_sssubv8qi3, "__builtin_ia32_psubsb", IX86_BUILTIN_PSUBSB, 0, 0 },
12948 { MASK_MMX, CODE_FOR_sssubv4hi3, "__builtin_ia32_psubsw", IX86_BUILTIN_PSUBSW, 0, 0 },
12949 { MASK_MMX, CODE_FOR_usaddv8qi3, "__builtin_ia32_paddusb", IX86_BUILTIN_PADDUSB, 0, 0 },
12950 { MASK_MMX, CODE_FOR_usaddv4hi3, "__builtin_ia32_paddusw", IX86_BUILTIN_PADDUSW, 0, 0 },
12951 { MASK_MMX, CODE_FOR_ussubv8qi3, "__builtin_ia32_psubusb", IX86_BUILTIN_PSUBUSB, 0, 0 },
12952 { MASK_MMX, CODE_FOR_ussubv4hi3, "__builtin_ia32_psubusw", IX86_BUILTIN_PSUBUSW, 0, 0 },
12954 { MASK_MMX, CODE_FOR_mulv4hi3, "__builtin_ia32_pmullw", IX86_BUILTIN_PMULLW, 0, 0 },
12955 { MASK_MMX, CODE_FOR_smulv4hi3_highpart, "__builtin_ia32_pmulhw", IX86_BUILTIN_PMULHW, 0, 0 },
12956 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_umulv4hi3_highpart, "__builtin_ia32_pmulhuw", IX86_BUILTIN_PMULHUW, 0, 0 },
12958 { MASK_MMX, CODE_FOR_mmx_anddi3, "__builtin_ia32_pand", IX86_BUILTIN_PAND, 0, 0 },
12959 { MASK_MMX, CODE_FOR_mmx_nanddi3, "__builtin_ia32_pandn", IX86_BUILTIN_PANDN, 0, 0 },
12960 { MASK_MMX, CODE_FOR_mmx_iordi3, "__builtin_ia32_por", IX86_BUILTIN_POR, 0, 0 },
12961 { MASK_MMX, CODE_FOR_mmx_xordi3, "__builtin_ia32_pxor", IX86_BUILTIN_PXOR, 0, 0 },
12963 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_uavgv8qi3, "__builtin_ia32_pavgb", IX86_BUILTIN_PAVGB, 0, 0 },
12964 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_uavgv4hi3, "__builtin_ia32_pavgw", IX86_BUILTIN_PAVGW, 0, 0 },
12966 { MASK_MMX, CODE_FOR_eqv8qi3, "__builtin_ia32_pcmpeqb", IX86_BUILTIN_PCMPEQB, 0, 0 },
12967 { MASK_MMX, CODE_FOR_eqv4hi3, "__builtin_ia32_pcmpeqw", IX86_BUILTIN_PCMPEQW, 0, 0 },
12968 { MASK_MMX, CODE_FOR_eqv2si3, "__builtin_ia32_pcmpeqd", IX86_BUILTIN_PCMPEQD, 0, 0 },
12969 { MASK_MMX, CODE_FOR_gtv8qi3, "__builtin_ia32_pcmpgtb", IX86_BUILTIN_PCMPGTB, 0, 0 },
12970 { MASK_MMX, CODE_FOR_gtv4hi3, "__builtin_ia32_pcmpgtw", IX86_BUILTIN_PCMPGTW, 0, 0 },
12971 { MASK_MMX, CODE_FOR_gtv2si3, "__builtin_ia32_pcmpgtd", IX86_BUILTIN_PCMPGTD, 0, 0 },
12973 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_umaxv8qi3, "__builtin_ia32_pmaxub", IX86_BUILTIN_PMAXUB, 0, 0 },
12974 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_smaxv4hi3, "__builtin_ia32_pmaxsw", IX86_BUILTIN_PMAXSW, 0, 0 },
12975 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_uminv8qi3, "__builtin_ia32_pminub", IX86_BUILTIN_PMINUB, 0, 0 },
12976 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_sminv4hi3, "__builtin_ia32_pminsw", IX86_BUILTIN_PMINSW, 0, 0 },
12978 { MASK_MMX, CODE_FOR_mmx_punpckhbw, "__builtin_ia32_punpckhbw", IX86_BUILTIN_PUNPCKHBW, 0, 0 },
12979 { MASK_MMX, CODE_FOR_mmx_punpckhwd, "__builtin_ia32_punpckhwd", IX86_BUILTIN_PUNPCKHWD, 0, 0 },
12980 { MASK_MMX, CODE_FOR_mmx_punpckhdq, "__builtin_ia32_punpckhdq", IX86_BUILTIN_PUNPCKHDQ, 0, 0 },
12981 { MASK_MMX, CODE_FOR_mmx_punpcklbw, "__builtin_ia32_punpcklbw", IX86_BUILTIN_PUNPCKLBW, 0, 0 },
12982 { MASK_MMX, CODE_FOR_mmx_punpcklwd, "__builtin_ia32_punpcklwd", IX86_BUILTIN_PUNPCKLWD, 0, 0 },
12983 { MASK_MMX, CODE_FOR_mmx_punpckldq, "__builtin_ia32_punpckldq", IX86_BUILTIN_PUNPCKLDQ, 0, 0 },
12986 { MASK_MMX, CODE_FOR_mmx_packsswb, 0, IX86_BUILTIN_PACKSSWB, 0, 0 },
12987 { MASK_MMX, CODE_FOR_mmx_packssdw, 0, IX86_BUILTIN_PACKSSDW, 0, 0 },
12988 { MASK_MMX, CODE_FOR_mmx_packuswb, 0, IX86_BUILTIN_PACKUSWB, 0, 0 },
12990 { MASK_SSE, CODE_FOR_cvtpi2ps, 0, IX86_BUILTIN_CVTPI2PS, 0, 0 },
12991 { MASK_SSE, CODE_FOR_cvtsi2ss, 0, IX86_BUILTIN_CVTSI2SS, 0, 0 },
12992 { MASK_SSE | MASK_64BIT, CODE_FOR_cvtsi2ssq, 0, IX86_BUILTIN_CVTSI642SS, 0, 0 },
12994 { MASK_MMX, CODE_FOR_ashlv4hi3, 0, IX86_BUILTIN_PSLLW, 0, 0 },
12995 { MASK_MMX, CODE_FOR_ashlv4hi3, 0, IX86_BUILTIN_PSLLWI, 0, 0 },
12996 { MASK_MMX, CODE_FOR_ashlv2si3, 0, IX86_BUILTIN_PSLLD, 0, 0 },
12997 { MASK_MMX, CODE_FOR_ashlv2si3, 0, IX86_BUILTIN_PSLLDI, 0, 0 },
12998 { MASK_MMX, CODE_FOR_mmx_ashldi3, 0, IX86_BUILTIN_PSLLQ, 0, 0 },
12999 { MASK_MMX, CODE_FOR_mmx_ashldi3, 0, IX86_BUILTIN_PSLLQI, 0, 0 },
13001 { MASK_MMX, CODE_FOR_lshrv4hi3, 0, IX86_BUILTIN_PSRLW, 0, 0 },
13002 { MASK_MMX, CODE_FOR_lshrv4hi3, 0, IX86_BUILTIN_PSRLWI, 0, 0 },
13003 { MASK_MMX, CODE_FOR_lshrv2si3, 0, IX86_BUILTIN_PSRLD, 0, 0 },
13004 { MASK_MMX, CODE_FOR_lshrv2si3, 0, IX86_BUILTIN_PSRLDI, 0, 0 },
13005 { MASK_MMX, CODE_FOR_mmx_lshrdi3, 0, IX86_BUILTIN_PSRLQ, 0, 0 },
13006 { MASK_MMX, CODE_FOR_mmx_lshrdi3, 0, IX86_BUILTIN_PSRLQI, 0, 0 },
13008 { MASK_MMX, CODE_FOR_ashrv4hi3, 0, IX86_BUILTIN_PSRAW, 0, 0 },
13009 { MASK_MMX, CODE_FOR_ashrv4hi3, 0, IX86_BUILTIN_PSRAWI, 0, 0 },
13010 { MASK_MMX, CODE_FOR_ashrv2si3, 0, IX86_BUILTIN_PSRAD, 0, 0 },
13011 { MASK_MMX, CODE_FOR_ashrv2si3, 0, IX86_BUILTIN_PSRADI, 0, 0 },
13013 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_psadbw, 0, IX86_BUILTIN_PSADBW, 0, 0 },
13014 { MASK_MMX, CODE_FOR_mmx_pmaddwd, 0, IX86_BUILTIN_PMADDWD, 0, 0 },
13017 { MASK_SSE2, CODE_FOR_addv2df3, "__builtin_ia32_addpd", IX86_BUILTIN_ADDPD, 0, 0 },
13018 { MASK_SSE2, CODE_FOR_subv2df3, "__builtin_ia32_subpd", IX86_BUILTIN_SUBPD, 0, 0 },
13019 { MASK_SSE2, CODE_FOR_mulv2df3, "__builtin_ia32_mulpd", IX86_BUILTIN_MULPD, 0, 0 },
13020 { MASK_SSE2, CODE_FOR_divv2df3, "__builtin_ia32_divpd", IX86_BUILTIN_DIVPD, 0, 0 },
13021 { MASK_SSE2, CODE_FOR_vmaddv2df3, "__builtin_ia32_addsd", IX86_BUILTIN_ADDSD, 0, 0 },
13022 { MASK_SSE2, CODE_FOR_vmsubv2df3, "__builtin_ia32_subsd", IX86_BUILTIN_SUBSD, 0, 0 },
13023 { MASK_SSE2, CODE_FOR_vmmulv2df3, "__builtin_ia32_mulsd", IX86_BUILTIN_MULSD, 0, 0 },
13024 { MASK_SSE2, CODE_FOR_vmdivv2df3, "__builtin_ia32_divsd", IX86_BUILTIN_DIVSD, 0, 0 },
13026 { MASK_SSE2, CODE_FOR_maskcmpv2df3, "__builtin_ia32_cmpeqpd", IX86_BUILTIN_CMPEQPD, EQ, 0 },
13027 { MASK_SSE2, CODE_FOR_maskcmpv2df3, "__builtin_ia32_cmpltpd", IX86_BUILTIN_CMPLTPD, LT, 0 },
13028 { MASK_SSE2, CODE_FOR_maskcmpv2df3, "__builtin_ia32_cmplepd", IX86_BUILTIN_CMPLEPD, LE, 0 },
13029 { MASK_SSE2, CODE_FOR_maskcmpv2df3, "__builtin_ia32_cmpgtpd", IX86_BUILTIN_CMPGTPD, LT, 1 },
13030 { MASK_SSE2, CODE_FOR_maskcmpv2df3, "__builtin_ia32_cmpgepd", IX86_BUILTIN_CMPGEPD, LE, 1 },
13031 { MASK_SSE2, CODE_FOR_maskcmpv2df3, "__builtin_ia32_cmpunordpd", IX86_BUILTIN_CMPUNORDPD, UNORDERED, 0 },
13032 { MASK_SSE2, CODE_FOR_maskncmpv2df3, "__builtin_ia32_cmpneqpd", IX86_BUILTIN_CMPNEQPD, EQ, 0 },
13033 { MASK_SSE2, CODE_FOR_maskncmpv2df3, "__builtin_ia32_cmpnltpd", IX86_BUILTIN_CMPNLTPD, LT, 0 },
13034 { MASK_SSE2, CODE_FOR_maskncmpv2df3, "__builtin_ia32_cmpnlepd", IX86_BUILTIN_CMPNLEPD, LE, 0 },
13035 { MASK_SSE2, CODE_FOR_maskncmpv2df3, "__builtin_ia32_cmpngtpd", IX86_BUILTIN_CMPNGTPD, LT, 1 },
13036 { MASK_SSE2, CODE_FOR_maskncmpv2df3, "__builtin_ia32_cmpngepd", IX86_BUILTIN_CMPNGEPD, LE, 1 },
13037 { MASK_SSE2, CODE_FOR_maskncmpv2df3, "__builtin_ia32_cmpordpd", IX86_BUILTIN_CMPORDPD, UNORDERED, 0 },
13038 { MASK_SSE2, CODE_FOR_vmmaskcmpv2df3, "__builtin_ia32_cmpeqsd", IX86_BUILTIN_CMPEQSD, EQ, 0 },
13039 { MASK_SSE2, CODE_FOR_vmmaskcmpv2df3, "__builtin_ia32_cmpltsd", IX86_BUILTIN_CMPLTSD, LT, 0 },
13040 { MASK_SSE2, CODE_FOR_vmmaskcmpv2df3, "__builtin_ia32_cmplesd", IX86_BUILTIN_CMPLESD, LE, 0 },
13041 { MASK_SSE2, CODE_FOR_vmmaskcmpv2df3, "__builtin_ia32_cmpunordsd", IX86_BUILTIN_CMPUNORDSD, UNORDERED, 0 },
13042 { MASK_SSE2, CODE_FOR_vmmaskncmpv2df3, "__builtin_ia32_cmpneqsd", IX86_BUILTIN_CMPNEQSD, EQ, 0 },
13043 { MASK_SSE2, CODE_FOR_vmmaskncmpv2df3, "__builtin_ia32_cmpnltsd", IX86_BUILTIN_CMPNLTSD, LT, 0 },
13044 { MASK_SSE2, CODE_FOR_vmmaskncmpv2df3, "__builtin_ia32_cmpnlesd", IX86_BUILTIN_CMPNLESD, LE, 0 },
13045 { MASK_SSE2, CODE_FOR_vmmaskncmpv2df3, "__builtin_ia32_cmpordsd", IX86_BUILTIN_CMPORDSD, UNORDERED, 0 },
13047 { MASK_SSE2, CODE_FOR_sminv2df3, "__builtin_ia32_minpd", IX86_BUILTIN_MINPD, 0, 0 },
13048 { MASK_SSE2, CODE_FOR_smaxv2df3, "__builtin_ia32_maxpd", IX86_BUILTIN_MAXPD, 0, 0 },
13049 { MASK_SSE2, CODE_FOR_vmsminv2df3, "__builtin_ia32_minsd", IX86_BUILTIN_MINSD, 0, 0 },
13050 { MASK_SSE2, CODE_FOR_vmsmaxv2df3, "__builtin_ia32_maxsd", IX86_BUILTIN_MAXSD, 0, 0 },
13052 { MASK_SSE2, CODE_FOR_sse2_andv2df3, "__builtin_ia32_andpd", IX86_BUILTIN_ANDPD, 0, 0 },
13053 { MASK_SSE2, CODE_FOR_sse2_nandv2df3, "__builtin_ia32_andnpd", IX86_BUILTIN_ANDNPD, 0, 0 },
13054 { MASK_SSE2, CODE_FOR_sse2_iorv2df3, "__builtin_ia32_orpd", IX86_BUILTIN_ORPD, 0, 0 },
13055 { MASK_SSE2, CODE_FOR_sse2_xorv2df3, "__builtin_ia32_xorpd", IX86_BUILTIN_XORPD, 0, 0 },
13057 { MASK_SSE2, CODE_FOR_sse2_movsd, "__builtin_ia32_movsd", IX86_BUILTIN_MOVSD, 0, 0 },
13058 { MASK_SSE2, CODE_FOR_sse2_unpckhpd, "__builtin_ia32_unpckhpd", IX86_BUILTIN_UNPCKHPD, 0, 0 },
13059 { MASK_SSE2, CODE_FOR_sse2_unpcklpd, "__builtin_ia32_unpcklpd", IX86_BUILTIN_UNPCKLPD, 0, 0 },
13062 { MASK_SSE2, CODE_FOR_addv16qi3, "__builtin_ia32_paddb128", IX86_BUILTIN_PADDB128, 0, 0 },
13063 { MASK_SSE2, CODE_FOR_addv8hi3, "__builtin_ia32_paddw128", IX86_BUILTIN_PADDW128, 0, 0 },
13064 { MASK_SSE2, CODE_FOR_addv4si3, "__builtin_ia32_paddd128", IX86_BUILTIN_PADDD128, 0, 0 },
13065 { MASK_SSE2, CODE_FOR_addv2di3, "__builtin_ia32_paddq128", IX86_BUILTIN_PADDQ128, 0, 0 },
13066 { MASK_SSE2, CODE_FOR_subv16qi3, "__builtin_ia32_psubb128", IX86_BUILTIN_PSUBB128, 0, 0 },
13067 { MASK_SSE2, CODE_FOR_subv8hi3, "__builtin_ia32_psubw128", IX86_BUILTIN_PSUBW128, 0, 0 },
13068 { MASK_SSE2, CODE_FOR_subv4si3, "__builtin_ia32_psubd128", IX86_BUILTIN_PSUBD128, 0, 0 },
13069 { MASK_SSE2, CODE_FOR_subv2di3, "__builtin_ia32_psubq128", IX86_BUILTIN_PSUBQ128, 0, 0 },
13071 { MASK_MMX, CODE_FOR_ssaddv16qi3, "__builtin_ia32_paddsb128", IX86_BUILTIN_PADDSB128, 0, 0 },
13072 { MASK_MMX, CODE_FOR_ssaddv8hi3, "__builtin_ia32_paddsw128", IX86_BUILTIN_PADDSW128, 0, 0 },
13073 { MASK_MMX, CODE_FOR_sssubv16qi3, "__builtin_ia32_psubsb128", IX86_BUILTIN_PSUBSB128, 0, 0 },
13074 { MASK_MMX, CODE_FOR_sssubv8hi3, "__builtin_ia32_psubsw128", IX86_BUILTIN_PSUBSW128, 0, 0 },
13075 { MASK_MMX, CODE_FOR_usaddv16qi3, "__builtin_ia32_paddusb128", IX86_BUILTIN_PADDUSB128, 0, 0 },
13076 { MASK_MMX, CODE_FOR_usaddv8hi3, "__builtin_ia32_paddusw128", IX86_BUILTIN_PADDUSW128, 0, 0 },
13077 { MASK_MMX, CODE_FOR_ussubv16qi3, "__builtin_ia32_psubusb128", IX86_BUILTIN_PSUBUSB128, 0, 0 },
13078 { MASK_MMX, CODE_FOR_ussubv8hi3, "__builtin_ia32_psubusw128", IX86_BUILTIN_PSUBUSW128, 0, 0 },
13080 { MASK_SSE2, CODE_FOR_mulv8hi3, "__builtin_ia32_pmullw128", IX86_BUILTIN_PMULLW128, 0, 0 },
13081 { MASK_SSE2, CODE_FOR_smulv8hi3_highpart, "__builtin_ia32_pmulhw128", IX86_BUILTIN_PMULHW128, 0, 0 },
13082 { MASK_SSE2, CODE_FOR_sse2_umulsidi3, "__builtin_ia32_pmuludq", IX86_BUILTIN_PMULUDQ, 0, 0 },
13083 { MASK_SSE2, CODE_FOR_sse2_umulv2siv2di3, "__builtin_ia32_pmuludq128", IX86_BUILTIN_PMULUDQ128, 0, 0 },
13085 { MASK_SSE2, CODE_FOR_sse2_andv2di3, "__builtin_ia32_pand128", IX86_BUILTIN_PAND128, 0, 0 },
13086 { MASK_SSE2, CODE_FOR_sse2_nandv2di3, "__builtin_ia32_pandn128", IX86_BUILTIN_PANDN128, 0, 0 },
13087 { MASK_SSE2, CODE_FOR_sse2_iorv2di3, "__builtin_ia32_por128", IX86_BUILTIN_POR128, 0, 0 },
13088 { MASK_SSE2, CODE_FOR_sse2_xorv2di3, "__builtin_ia32_pxor128", IX86_BUILTIN_PXOR128, 0, 0 },
13090 { MASK_SSE2, CODE_FOR_sse2_uavgv16qi3, "__builtin_ia32_pavgb128", IX86_BUILTIN_PAVGB128, 0, 0 },
13091 { MASK_SSE2, CODE_FOR_sse2_uavgv8hi3, "__builtin_ia32_pavgw128", IX86_BUILTIN_PAVGW128, 0, 0 },
13093 { MASK_SSE2, CODE_FOR_eqv16qi3, "__builtin_ia32_pcmpeqb128", IX86_BUILTIN_PCMPEQB128, 0, 0 },
13094 { MASK_SSE2, CODE_FOR_eqv8hi3, "__builtin_ia32_pcmpeqw128", IX86_BUILTIN_PCMPEQW128, 0, 0 },
13095 { MASK_SSE2, CODE_FOR_eqv4si3, "__builtin_ia32_pcmpeqd128", IX86_BUILTIN_PCMPEQD128, 0, 0 },
13096 { MASK_SSE2, CODE_FOR_gtv16qi3, "__builtin_ia32_pcmpgtb128", IX86_BUILTIN_PCMPGTB128, 0, 0 },
13097 { MASK_SSE2, CODE_FOR_gtv8hi3, "__builtin_ia32_pcmpgtw128", IX86_BUILTIN_PCMPGTW128, 0, 0 },
13098 { MASK_SSE2, CODE_FOR_gtv4si3, "__builtin_ia32_pcmpgtd128", IX86_BUILTIN_PCMPGTD128, 0, 0 },
13100 { MASK_SSE2, CODE_FOR_umaxv16qi3, "__builtin_ia32_pmaxub128", IX86_BUILTIN_PMAXUB128, 0, 0 },
13101 { MASK_SSE2, CODE_FOR_smaxv8hi3, "__builtin_ia32_pmaxsw128", IX86_BUILTIN_PMAXSW128, 0, 0 },
13102 { MASK_SSE2, CODE_FOR_uminv16qi3, "__builtin_ia32_pminub128", IX86_BUILTIN_PMINUB128, 0, 0 },
13103 { MASK_SSE2, CODE_FOR_sminv8hi3, "__builtin_ia32_pminsw128", IX86_BUILTIN_PMINSW128, 0, 0 },
13105 { MASK_SSE2, CODE_FOR_sse2_punpckhbw, "__builtin_ia32_punpckhbw128", IX86_BUILTIN_PUNPCKHBW128, 0, 0 },
13106 { MASK_SSE2, CODE_FOR_sse2_punpckhwd, "__builtin_ia32_punpckhwd128", IX86_BUILTIN_PUNPCKHWD128, 0, 0 },
13107 { MASK_SSE2, CODE_FOR_sse2_punpckhdq, "__builtin_ia32_punpckhdq128", IX86_BUILTIN_PUNPCKHDQ128, 0, 0 },
13108 { MASK_SSE2, CODE_FOR_sse2_punpckhqdq, "__builtin_ia32_punpckhqdq128", IX86_BUILTIN_PUNPCKHQDQ128, 0, 0 },
13109 { MASK_SSE2, CODE_FOR_sse2_punpcklbw, "__builtin_ia32_punpcklbw128", IX86_BUILTIN_PUNPCKLBW128, 0, 0 },
13110 { MASK_SSE2, CODE_FOR_sse2_punpcklwd, "__builtin_ia32_punpcklwd128", IX86_BUILTIN_PUNPCKLWD128, 0, 0 },
13111 { MASK_SSE2, CODE_FOR_sse2_punpckldq, "__builtin_ia32_punpckldq128", IX86_BUILTIN_PUNPCKLDQ128, 0, 0 },
13112 { MASK_SSE2, CODE_FOR_sse2_punpcklqdq, "__builtin_ia32_punpcklqdq128", IX86_BUILTIN_PUNPCKLQDQ128, 0, 0 },
13114 { MASK_SSE2, CODE_FOR_sse2_packsswb, "__builtin_ia32_packsswb128", IX86_BUILTIN_PACKSSWB128, 0, 0 },
13115 { MASK_SSE2, CODE_FOR_sse2_packssdw, "__builtin_ia32_packssdw128", IX86_BUILTIN_PACKSSDW128, 0, 0 },
13116 { MASK_SSE2, CODE_FOR_sse2_packuswb, "__builtin_ia32_packuswb128", IX86_BUILTIN_PACKUSWB128, 0, 0 },
13118 { MASK_SSE2, CODE_FOR_umulv8hi3_highpart, "__builtin_ia32_pmulhuw128", IX86_BUILTIN_PMULHUW128, 0, 0 },
13119 { MASK_SSE2, CODE_FOR_sse2_psadbw, 0, IX86_BUILTIN_PSADBW128, 0, 0 },
13121 { MASK_SSE2, CODE_FOR_ashlv8hi3_ti, 0, IX86_BUILTIN_PSLLW128, 0, 0 },
13122 { MASK_SSE2, CODE_FOR_ashlv8hi3, 0, IX86_BUILTIN_PSLLWI128, 0, 0 },
13123 { MASK_SSE2, CODE_FOR_ashlv4si3_ti, 0, IX86_BUILTIN_PSLLD128, 0, 0 },
13124 { MASK_SSE2, CODE_FOR_ashlv4si3, 0, IX86_BUILTIN_PSLLDI128, 0, 0 },
13125 { MASK_SSE2, CODE_FOR_ashlv2di3_ti, 0, IX86_BUILTIN_PSLLQ128, 0, 0 },
13126 { MASK_SSE2, CODE_FOR_ashlv2di3, 0, IX86_BUILTIN_PSLLQI128, 0, 0 },
13128 { MASK_SSE2, CODE_FOR_lshrv8hi3_ti, 0, IX86_BUILTIN_PSRLW128, 0, 0 },
13129 { MASK_SSE2, CODE_FOR_lshrv8hi3, 0, IX86_BUILTIN_PSRLWI128, 0, 0 },
13130 { MASK_SSE2, CODE_FOR_lshrv4si3_ti, 0, IX86_BUILTIN_PSRLD128, 0, 0 },
13131 { MASK_SSE2, CODE_FOR_lshrv4si3, 0, IX86_BUILTIN_PSRLDI128, 0, 0 },
13132 { MASK_SSE2, CODE_FOR_lshrv2di3_ti, 0, IX86_BUILTIN_PSRLQ128, 0, 0 },
13133 { MASK_SSE2, CODE_FOR_lshrv2di3, 0, IX86_BUILTIN_PSRLQI128, 0, 0 },
13135 { MASK_SSE2, CODE_FOR_ashrv8hi3_ti, 0, IX86_BUILTIN_PSRAW128, 0, 0 },
13136 { MASK_SSE2, CODE_FOR_ashrv8hi3, 0, IX86_BUILTIN_PSRAWI128, 0, 0 },
13137 { MASK_SSE2, CODE_FOR_ashrv4si3_ti, 0, IX86_BUILTIN_PSRAD128, 0, 0 },
13138 { MASK_SSE2, CODE_FOR_ashrv4si3, 0, IX86_BUILTIN_PSRADI128, 0, 0 },
13140 { MASK_SSE2, CODE_FOR_sse2_pmaddwd, 0, IX86_BUILTIN_PMADDWD128, 0, 0 },
13142 { MASK_SSE2, CODE_FOR_cvtsi2sd, 0, IX86_BUILTIN_CVTSI2SD, 0, 0 },
13143 { MASK_SSE2 | MASK_64BIT, CODE_FOR_cvtsi2sdq, 0, IX86_BUILTIN_CVTSI642SD, 0, 0 },
13144 { MASK_SSE2, CODE_FOR_cvtsd2ss, 0, IX86_BUILTIN_CVTSD2SS, 0, 0 },
13145 { MASK_SSE2, CODE_FOR_cvtss2sd, 0, IX86_BUILTIN_CVTSS2SD, 0, 0 },
13148 { MASK_SSE3, CODE_FOR_addsubv4sf3, "__builtin_ia32_addsubps", IX86_BUILTIN_ADDSUBPS, 0, 0 },
13149 { MASK_SSE3, CODE_FOR_addsubv2df3, "__builtin_ia32_addsubpd", IX86_BUILTIN_ADDSUBPD, 0, 0 },
13150 { MASK_SSE3, CODE_FOR_haddv4sf3, "__builtin_ia32_haddps", IX86_BUILTIN_HADDPS, 0, 0 },
13151 { MASK_SSE3, CODE_FOR_haddv2df3, "__builtin_ia32_haddpd", IX86_BUILTIN_HADDPD, 0, 0 },
13152 { MASK_SSE3, CODE_FOR_hsubv4sf3, "__builtin_ia32_hsubps", IX86_BUILTIN_HSUBPS, 0, 0 },
13153 { MASK_SSE3, CODE_FOR_hsubv2df3, "__builtin_ia32_hsubpd", IX86_BUILTIN_HSUBPD, 0, 0 }
13156 static const struct builtin_description bdesc_1arg[] =
13158 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_pmovmskb, 0, IX86_BUILTIN_PMOVMSKB, 0, 0 },
13159 { MASK_SSE, CODE_FOR_sse_movmskps, 0, IX86_BUILTIN_MOVMSKPS, 0, 0 },
13161 { MASK_SSE, CODE_FOR_sqrtv4sf2, 0, IX86_BUILTIN_SQRTPS, 0, 0 },
13162 { MASK_SSE, CODE_FOR_rsqrtv4sf2, 0, IX86_BUILTIN_RSQRTPS, 0, 0 },
13163 { MASK_SSE, CODE_FOR_rcpv4sf2, 0, IX86_BUILTIN_RCPPS, 0, 0 },
13165 { MASK_SSE, CODE_FOR_cvtps2pi, 0, IX86_BUILTIN_CVTPS2PI, 0, 0 },
13166 { MASK_SSE, CODE_FOR_cvtss2si, 0, IX86_BUILTIN_CVTSS2SI, 0, 0 },
13167 { MASK_SSE | MASK_64BIT, CODE_FOR_cvtss2siq, 0, IX86_BUILTIN_CVTSS2SI64, 0, 0 },
13168 { MASK_SSE, CODE_FOR_cvttps2pi, 0, IX86_BUILTIN_CVTTPS2PI, 0, 0 },
13169 { MASK_SSE, CODE_FOR_cvttss2si, 0, IX86_BUILTIN_CVTTSS2SI, 0, 0 },
13170 { MASK_SSE | MASK_64BIT, CODE_FOR_cvttss2siq, 0, IX86_BUILTIN_CVTTSS2SI64, 0, 0 },
13172 { MASK_SSE2, CODE_FOR_sse2_pmovmskb, 0, IX86_BUILTIN_PMOVMSKB128, 0, 0 },
13173 { MASK_SSE2, CODE_FOR_sse2_movmskpd, 0, IX86_BUILTIN_MOVMSKPD, 0, 0 },
13174 { MASK_SSE2, CODE_FOR_sse2_movq2dq, 0, IX86_BUILTIN_MOVQ2DQ, 0, 0 },
13175 { MASK_SSE2, CODE_FOR_sse2_movdq2q, 0, IX86_BUILTIN_MOVDQ2Q, 0, 0 },
13177 { MASK_SSE2, CODE_FOR_sqrtv2df2, 0, IX86_BUILTIN_SQRTPD, 0, 0 },
13179 { MASK_SSE2, CODE_FOR_cvtdq2pd, 0, IX86_BUILTIN_CVTDQ2PD, 0, 0 },
13180 { MASK_SSE2, CODE_FOR_cvtdq2ps, 0, IX86_BUILTIN_CVTDQ2PS, 0, 0 },
13182 { MASK_SSE2, CODE_FOR_cvtpd2dq, 0, IX86_BUILTIN_CVTPD2DQ, 0, 0 },
13183 { MASK_SSE2, CODE_FOR_cvtpd2pi, 0, IX86_BUILTIN_CVTPD2PI, 0, 0 },
13184 { MASK_SSE2, CODE_FOR_cvtpd2ps, 0, IX86_BUILTIN_CVTPD2PS, 0, 0 },
13185 { MASK_SSE2, CODE_FOR_cvttpd2dq, 0, IX86_BUILTIN_CVTTPD2DQ, 0, 0 },
13186 { MASK_SSE2, CODE_FOR_cvttpd2pi, 0, IX86_BUILTIN_CVTTPD2PI, 0, 0 },
13188 { MASK_SSE2, CODE_FOR_cvtpi2pd, 0, IX86_BUILTIN_CVTPI2PD, 0, 0 },
13190 { MASK_SSE2, CODE_FOR_cvtsd2si, 0, IX86_BUILTIN_CVTSD2SI, 0, 0 },
13191 { MASK_SSE2, CODE_FOR_cvttsd2si, 0, IX86_BUILTIN_CVTTSD2SI, 0, 0 },
13192 { MASK_SSE2 | MASK_64BIT, CODE_FOR_cvtsd2siq, 0, IX86_BUILTIN_CVTSD2SI64, 0, 0 },
13193 { MASK_SSE2 | MASK_64BIT, CODE_FOR_cvttsd2siq, 0, IX86_BUILTIN_CVTTSD2SI64, 0, 0 },
13195 { MASK_SSE2, CODE_FOR_cvtps2dq, 0, IX86_BUILTIN_CVTPS2DQ, 0, 0 },
13196 { MASK_SSE2, CODE_FOR_cvtps2pd, 0, IX86_BUILTIN_CVTPS2PD, 0, 0 },
13197 { MASK_SSE2, CODE_FOR_cvttps2dq, 0, IX86_BUILTIN_CVTTPS2DQ, 0, 0 },
13199 { MASK_SSE2, CODE_FOR_sse2_movq, 0, IX86_BUILTIN_MOVQ, 0, 0 },
13202 { MASK_SSE3, CODE_FOR_movshdup, 0, IX86_BUILTIN_MOVSHDUP, 0, 0 },
13203 { MASK_SSE3, CODE_FOR_movsldup, 0, IX86_BUILTIN_MOVSLDUP, 0, 0 },
13204 { MASK_SSE3, CODE_FOR_movddup, 0, IX86_BUILTIN_MOVDDUP, 0, 0 }
13208 ix86_init_builtins (void)
13211 ix86_init_mmx_sse_builtins ();
13214 /* Set up all the MMX/SSE builtins. This is not called if TARGET_MMX
13215 is zero. Otherwise, if TARGET_SSE is not set, only expand the MMX
13218 ix86_init_mmx_sse_builtins (void)
13220 const struct builtin_description * d;
13223 tree pchar_type_node = build_pointer_type (char_type_node);
13224 tree pcchar_type_node = build_pointer_type (
13225 build_type_variant (char_type_node, 1, 0));
13226 tree pfloat_type_node = build_pointer_type (float_type_node);
13227 tree pcfloat_type_node = build_pointer_type (
13228 build_type_variant (float_type_node, 1, 0));
13229 tree pv2si_type_node = build_pointer_type (V2SI_type_node);
13230 tree pv2di_type_node = build_pointer_type (V2DI_type_node);
13231 tree pdi_type_node = build_pointer_type (long_long_unsigned_type_node);
13234 tree int_ftype_v4sf_v4sf
13235 = build_function_type_list (integer_type_node,
13236 V4SF_type_node, V4SF_type_node, NULL_TREE);
13237 tree v4si_ftype_v4sf_v4sf
13238 = build_function_type_list (V4SI_type_node,
13239 V4SF_type_node, V4SF_type_node, NULL_TREE);
13240 /* MMX/SSE/integer conversions. */
13241 tree int_ftype_v4sf
13242 = build_function_type_list (integer_type_node,
13243 V4SF_type_node, NULL_TREE);
13244 tree int64_ftype_v4sf
13245 = build_function_type_list (long_long_integer_type_node,
13246 V4SF_type_node, NULL_TREE);
13247 tree int_ftype_v8qi
13248 = build_function_type_list (integer_type_node, V8QI_type_node, NULL_TREE);
13249 tree v4sf_ftype_v4sf_int
13250 = build_function_type_list (V4SF_type_node,
13251 V4SF_type_node, integer_type_node, NULL_TREE);
13252 tree v4sf_ftype_v4sf_int64
13253 = build_function_type_list (V4SF_type_node,
13254 V4SF_type_node, long_long_integer_type_node,
13256 tree v4sf_ftype_v4sf_v2si
13257 = build_function_type_list (V4SF_type_node,
13258 V4SF_type_node, V2SI_type_node, NULL_TREE);
13259 tree int_ftype_v4hi_int
13260 = build_function_type_list (integer_type_node,
13261 V4HI_type_node, integer_type_node, NULL_TREE);
13262 tree v4hi_ftype_v4hi_int_int
13263 = build_function_type_list (V4HI_type_node, V4HI_type_node,
13264 integer_type_node, integer_type_node,
13266 /* Miscellaneous. */
13267 tree v8qi_ftype_v4hi_v4hi
13268 = build_function_type_list (V8QI_type_node,
13269 V4HI_type_node, V4HI_type_node, NULL_TREE);
13270 tree v4hi_ftype_v2si_v2si
13271 = build_function_type_list (V4HI_type_node,
13272 V2SI_type_node, V2SI_type_node, NULL_TREE);
13273 tree v4sf_ftype_v4sf_v4sf_int
13274 = build_function_type_list (V4SF_type_node,
13275 V4SF_type_node, V4SF_type_node,
13276 integer_type_node, NULL_TREE);
13277 tree v2si_ftype_v4hi_v4hi
13278 = build_function_type_list (V2SI_type_node,
13279 V4HI_type_node, V4HI_type_node, NULL_TREE);
13280 tree v4hi_ftype_v4hi_int
13281 = build_function_type_list (V4HI_type_node,
13282 V4HI_type_node, integer_type_node, NULL_TREE);
13283 tree v4hi_ftype_v4hi_di
13284 = build_function_type_list (V4HI_type_node,
13285 V4HI_type_node, long_long_unsigned_type_node,
13287 tree v2si_ftype_v2si_di
13288 = build_function_type_list (V2SI_type_node,
13289 V2SI_type_node, long_long_unsigned_type_node,
13291 tree void_ftype_void
13292 = build_function_type (void_type_node, void_list_node);
13293 tree void_ftype_unsigned
13294 = build_function_type_list (void_type_node, unsigned_type_node, NULL_TREE);
13295 tree void_ftype_unsigned_unsigned
13296 = build_function_type_list (void_type_node, unsigned_type_node,
13297 unsigned_type_node, NULL_TREE);
13298 tree void_ftype_pcvoid_unsigned_unsigned
13299 = build_function_type_list (void_type_node, const_ptr_type_node,
13300 unsigned_type_node, unsigned_type_node,
13302 tree unsigned_ftype_void
13303 = build_function_type (unsigned_type_node, void_list_node);
13305 = build_function_type (long_long_unsigned_type_node, void_list_node);
13306 tree v4sf_ftype_void
13307 = build_function_type (V4SF_type_node, void_list_node);
13308 tree v2si_ftype_v4sf
13309 = build_function_type_list (V2SI_type_node, V4SF_type_node, NULL_TREE);
13310 /* Loads/stores. */
13311 tree void_ftype_v8qi_v8qi_pchar
13312 = build_function_type_list (void_type_node,
13313 V8QI_type_node, V8QI_type_node,
13314 pchar_type_node, NULL_TREE);
13315 tree v4sf_ftype_pcfloat
13316 = build_function_type_list (V4SF_type_node, pcfloat_type_node, NULL_TREE);
13317 /* @@@ the type is bogus */
13318 tree v4sf_ftype_v4sf_pv2si
13319 = build_function_type_list (V4SF_type_node,
13320 V4SF_type_node, pv2si_type_node, NULL_TREE);
13321 tree void_ftype_pv2si_v4sf
13322 = build_function_type_list (void_type_node,
13323 pv2si_type_node, V4SF_type_node, NULL_TREE);
13324 tree void_ftype_pfloat_v4sf
13325 = build_function_type_list (void_type_node,
13326 pfloat_type_node, V4SF_type_node, NULL_TREE);
13327 tree void_ftype_pdi_di
13328 = build_function_type_list (void_type_node,
13329 pdi_type_node, long_long_unsigned_type_node,
13331 tree void_ftype_pv2di_v2di
13332 = build_function_type_list (void_type_node,
13333 pv2di_type_node, V2DI_type_node, NULL_TREE);
13334 /* Normal vector unops. */
13335 tree v4sf_ftype_v4sf
13336 = build_function_type_list (V4SF_type_node, V4SF_type_node, NULL_TREE);
13338 /* Normal vector binops. */
13339 tree v4sf_ftype_v4sf_v4sf
13340 = build_function_type_list (V4SF_type_node,
13341 V4SF_type_node, V4SF_type_node, NULL_TREE);
13342 tree v8qi_ftype_v8qi_v8qi
13343 = build_function_type_list (V8QI_type_node,
13344 V8QI_type_node, V8QI_type_node, NULL_TREE);
13345 tree v4hi_ftype_v4hi_v4hi
13346 = build_function_type_list (V4HI_type_node,
13347 V4HI_type_node, V4HI_type_node, NULL_TREE);
13348 tree v2si_ftype_v2si_v2si
13349 = build_function_type_list (V2SI_type_node,
13350 V2SI_type_node, V2SI_type_node, NULL_TREE);
13351 tree di_ftype_di_di
13352 = build_function_type_list (long_long_unsigned_type_node,
13353 long_long_unsigned_type_node,
13354 long_long_unsigned_type_node, NULL_TREE);
13356 tree v2si_ftype_v2sf
13357 = build_function_type_list (V2SI_type_node, V2SF_type_node, NULL_TREE);
13358 tree v2sf_ftype_v2si
13359 = build_function_type_list (V2SF_type_node, V2SI_type_node, NULL_TREE);
13360 tree v2si_ftype_v2si
13361 = build_function_type_list (V2SI_type_node, V2SI_type_node, NULL_TREE);
13362 tree v2sf_ftype_v2sf
13363 = build_function_type_list (V2SF_type_node, V2SF_type_node, NULL_TREE);
13364 tree v2sf_ftype_v2sf_v2sf
13365 = build_function_type_list (V2SF_type_node,
13366 V2SF_type_node, V2SF_type_node, NULL_TREE);
13367 tree v2si_ftype_v2sf_v2sf
13368 = build_function_type_list (V2SI_type_node,
13369 V2SF_type_node, V2SF_type_node, NULL_TREE);
13370 tree pint_type_node = build_pointer_type (integer_type_node);
13371 tree pcint_type_node = build_pointer_type (
13372 build_type_variant (integer_type_node, 1, 0));
13373 tree pdouble_type_node = build_pointer_type (double_type_node);
13374 tree pcdouble_type_node = build_pointer_type (
13375 build_type_variant (double_type_node, 1, 0));
13376 tree int_ftype_v2df_v2df
13377 = build_function_type_list (integer_type_node,
13378 V2DF_type_node, V2DF_type_node, NULL_TREE);
13381 = build_function_type (intTI_type_node, void_list_node);
13382 tree v2di_ftype_void
13383 = build_function_type (V2DI_type_node, void_list_node);
13384 tree ti_ftype_ti_ti
13385 = build_function_type_list (intTI_type_node,
13386 intTI_type_node, intTI_type_node, NULL_TREE);
13387 tree void_ftype_pcvoid
13388 = build_function_type_list (void_type_node, const_ptr_type_node, NULL_TREE);
13390 = build_function_type_list (V2DI_type_node,
13391 long_long_unsigned_type_node, NULL_TREE);
13393 = build_function_type_list (long_long_unsigned_type_node,
13394 V2DI_type_node, NULL_TREE);
13395 tree v4sf_ftype_v4si
13396 = build_function_type_list (V4SF_type_node, V4SI_type_node, NULL_TREE);
13397 tree v4si_ftype_v4sf
13398 = build_function_type_list (V4SI_type_node, V4SF_type_node, NULL_TREE);
13399 tree v2df_ftype_v4si
13400 = build_function_type_list (V2DF_type_node, V4SI_type_node, NULL_TREE);
13401 tree v4si_ftype_v2df
13402 = build_function_type_list (V4SI_type_node, V2DF_type_node, NULL_TREE);
13403 tree v2si_ftype_v2df
13404 = build_function_type_list (V2SI_type_node, V2DF_type_node, NULL_TREE);
13405 tree v4sf_ftype_v2df
13406 = build_function_type_list (V4SF_type_node, V2DF_type_node, NULL_TREE);
13407 tree v2df_ftype_v2si
13408 = build_function_type_list (V2DF_type_node, V2SI_type_node, NULL_TREE);
13409 tree v2df_ftype_v4sf
13410 = build_function_type_list (V2DF_type_node, V4SF_type_node, NULL_TREE);
13411 tree int_ftype_v2df
13412 = build_function_type_list (integer_type_node, V2DF_type_node, NULL_TREE);
13413 tree int64_ftype_v2df
13414 = build_function_type_list (long_long_integer_type_node,
13415 V2DF_type_node, NULL_TREE);
13416 tree v2df_ftype_v2df_int
13417 = build_function_type_list (V2DF_type_node,
13418 V2DF_type_node, integer_type_node, NULL_TREE);
13419 tree v2df_ftype_v2df_int64
13420 = build_function_type_list (V2DF_type_node,
13421 V2DF_type_node, long_long_integer_type_node,
13423 tree v4sf_ftype_v4sf_v2df
13424 = build_function_type_list (V4SF_type_node,
13425 V4SF_type_node, V2DF_type_node, NULL_TREE);
13426 tree v2df_ftype_v2df_v4sf
13427 = build_function_type_list (V2DF_type_node,
13428 V2DF_type_node, V4SF_type_node, NULL_TREE);
13429 tree v2df_ftype_v2df_v2df_int
13430 = build_function_type_list (V2DF_type_node,
13431 V2DF_type_node, V2DF_type_node,
13434 tree v2df_ftype_v2df_pv2si
13435 = build_function_type_list (V2DF_type_node,
13436 V2DF_type_node, pv2si_type_node, NULL_TREE);
13437 tree void_ftype_pv2si_v2df
13438 = build_function_type_list (void_type_node,
13439 pv2si_type_node, V2DF_type_node, NULL_TREE);
13440 tree void_ftype_pdouble_v2df
13441 = build_function_type_list (void_type_node,
13442 pdouble_type_node, V2DF_type_node, NULL_TREE);
13443 tree void_ftype_pint_int
13444 = build_function_type_list (void_type_node,
13445 pint_type_node, integer_type_node, NULL_TREE);
13446 tree void_ftype_v16qi_v16qi_pchar
13447 = build_function_type_list (void_type_node,
13448 V16QI_type_node, V16QI_type_node,
13449 pchar_type_node, NULL_TREE);
13450 tree v2df_ftype_pcdouble
13451 = build_function_type_list (V2DF_type_node, pcdouble_type_node, NULL_TREE);
13452 tree v2df_ftype_v2df_v2df
13453 = build_function_type_list (V2DF_type_node,
13454 V2DF_type_node, V2DF_type_node, NULL_TREE);
13455 tree v16qi_ftype_v16qi_v16qi
13456 = build_function_type_list (V16QI_type_node,
13457 V16QI_type_node, V16QI_type_node, NULL_TREE);
13458 tree v8hi_ftype_v8hi_v8hi
13459 = build_function_type_list (V8HI_type_node,
13460 V8HI_type_node, V8HI_type_node, NULL_TREE);
13461 tree v4si_ftype_v4si_v4si
13462 = build_function_type_list (V4SI_type_node,
13463 V4SI_type_node, V4SI_type_node, NULL_TREE);
13464 tree v2di_ftype_v2di_v2di
13465 = build_function_type_list (V2DI_type_node,
13466 V2DI_type_node, V2DI_type_node, NULL_TREE);
13467 tree v2di_ftype_v2df_v2df
13468 = build_function_type_list (V2DI_type_node,
13469 V2DF_type_node, V2DF_type_node, NULL_TREE);
13470 tree v2df_ftype_v2df
13471 = build_function_type_list (V2DF_type_node, V2DF_type_node, NULL_TREE);
13472 tree v2df_ftype_double
13473 = build_function_type_list (V2DF_type_node, double_type_node, NULL_TREE);
13474 tree v2df_ftype_double_double
13475 = build_function_type_list (V2DF_type_node,
13476 double_type_node, double_type_node, NULL_TREE);
13477 tree int_ftype_v8hi_int
13478 = build_function_type_list (integer_type_node,
13479 V8HI_type_node, integer_type_node, NULL_TREE);
13480 tree v8hi_ftype_v8hi_int_int
13481 = build_function_type_list (V8HI_type_node,
13482 V8HI_type_node, integer_type_node,
13483 integer_type_node, NULL_TREE);
13484 tree v2di_ftype_v2di_int
13485 = build_function_type_list (V2DI_type_node,
13486 V2DI_type_node, integer_type_node, NULL_TREE);
13487 tree v4si_ftype_v4si_int
13488 = build_function_type_list (V4SI_type_node,
13489 V4SI_type_node, integer_type_node, NULL_TREE);
13490 tree v8hi_ftype_v8hi_int
13491 = build_function_type_list (V8HI_type_node,
13492 V8HI_type_node, integer_type_node, NULL_TREE);
13493 tree v8hi_ftype_v8hi_v2di
13494 = build_function_type_list (V8HI_type_node,
13495 V8HI_type_node, V2DI_type_node, NULL_TREE);
13496 tree v4si_ftype_v4si_v2di
13497 = build_function_type_list (V4SI_type_node,
13498 V4SI_type_node, V2DI_type_node, NULL_TREE);
13499 tree v4si_ftype_v8hi_v8hi
13500 = build_function_type_list (V4SI_type_node,
13501 V8HI_type_node, V8HI_type_node, NULL_TREE);
13502 tree di_ftype_v8qi_v8qi
13503 = build_function_type_list (long_long_unsigned_type_node,
13504 V8QI_type_node, V8QI_type_node, NULL_TREE);
13505 tree v2di_ftype_v16qi_v16qi
13506 = build_function_type_list (V2DI_type_node,
13507 V16QI_type_node, V16QI_type_node, NULL_TREE);
13508 tree int_ftype_v16qi
13509 = build_function_type_list (integer_type_node, V16QI_type_node, NULL_TREE);
13510 tree v16qi_ftype_pcchar
13511 = build_function_type_list (V16QI_type_node, pcchar_type_node, NULL_TREE);
13512 tree void_ftype_pchar_v16qi
13513 = build_function_type_list (void_type_node,
13514 pchar_type_node, V16QI_type_node, NULL_TREE);
13515 tree v4si_ftype_pcint
13516 = build_function_type_list (V4SI_type_node, pcint_type_node, NULL_TREE);
13517 tree void_ftype_pcint_v4si
13518 = build_function_type_list (void_type_node,
13519 pcint_type_node, V4SI_type_node, NULL_TREE);
13520 tree v2di_ftype_v2di
13521 = build_function_type_list (V2DI_type_node, V2DI_type_node, NULL_TREE);
13524 tree float128_type;
13526 /* The __float80 type. */
13527 if (TYPE_MODE (long_double_type_node) == XFmode)
13528 (*lang_hooks.types.register_builtin_type) (long_double_type_node,
13532 /* The __float80 type. */
13533 float80_type = make_node (REAL_TYPE);
13534 TYPE_PRECISION (float80_type) = 96;
13535 layout_type (float80_type);
13536 (*lang_hooks.types.register_builtin_type) (float80_type, "__float80");
13539 float128_type = make_node (REAL_TYPE);
13540 TYPE_PRECISION (float128_type) = 128;
13541 layout_type (float128_type);
13542 (*lang_hooks.types.register_builtin_type) (float128_type, "__float128");
13544 /* Add all builtins that are more or less simple operations on two
13546 for (i = 0, d = bdesc_2arg; i < ARRAY_SIZE (bdesc_2arg); i++, d++)
13548 /* Use one of the operands; the target can have a different mode for
13549 mask-generating compares. */
13550 enum machine_mode mode;
13555 mode = insn_data[d->icode].operand[1].mode;
13560 type = v16qi_ftype_v16qi_v16qi;
13563 type = v8hi_ftype_v8hi_v8hi;
13566 type = v4si_ftype_v4si_v4si;
13569 type = v2di_ftype_v2di_v2di;
13572 type = v2df_ftype_v2df_v2df;
13575 type = ti_ftype_ti_ti;
13578 type = v4sf_ftype_v4sf_v4sf;
13581 type = v8qi_ftype_v8qi_v8qi;
13584 type = v4hi_ftype_v4hi_v4hi;
13587 type = v2si_ftype_v2si_v2si;
13590 type = di_ftype_di_di;
13597 /* Override for comparisons. */
13598 if (d->icode == CODE_FOR_maskcmpv4sf3
13599 || d->icode == CODE_FOR_maskncmpv4sf3
13600 || d->icode == CODE_FOR_vmmaskcmpv4sf3
13601 || d->icode == CODE_FOR_vmmaskncmpv4sf3)
13602 type = v4si_ftype_v4sf_v4sf;
13604 if (d->icode == CODE_FOR_maskcmpv2df3
13605 || d->icode == CODE_FOR_maskncmpv2df3
13606 || d->icode == CODE_FOR_vmmaskcmpv2df3
13607 || d->icode == CODE_FOR_vmmaskncmpv2df3)
13608 type = v2di_ftype_v2df_v2df;
13610 def_builtin (d->mask, d->name, type, d->code);
13613 /* Add the remaining MMX insns with somewhat more complicated types. */
13614 def_builtin (MASK_MMX, "__builtin_ia32_mmx_zero", di_ftype_void, IX86_BUILTIN_MMX_ZERO);
13615 def_builtin (MASK_MMX, "__builtin_ia32_emms", void_ftype_void, IX86_BUILTIN_EMMS);
13616 def_builtin (MASK_MMX, "__builtin_ia32_psllw", v4hi_ftype_v4hi_di, IX86_BUILTIN_PSLLW);
13617 def_builtin (MASK_MMX, "__builtin_ia32_pslld", v2si_ftype_v2si_di, IX86_BUILTIN_PSLLD);
13618 def_builtin (MASK_MMX, "__builtin_ia32_psllq", di_ftype_di_di, IX86_BUILTIN_PSLLQ);
13620 def_builtin (MASK_MMX, "__builtin_ia32_psrlw", v4hi_ftype_v4hi_di, IX86_BUILTIN_PSRLW);
13621 def_builtin (MASK_MMX, "__builtin_ia32_psrld", v2si_ftype_v2si_di, IX86_BUILTIN_PSRLD);
13622 def_builtin (MASK_MMX, "__builtin_ia32_psrlq", di_ftype_di_di, IX86_BUILTIN_PSRLQ);
13624 def_builtin (MASK_MMX, "__builtin_ia32_psraw", v4hi_ftype_v4hi_di, IX86_BUILTIN_PSRAW);
13625 def_builtin (MASK_MMX, "__builtin_ia32_psrad", v2si_ftype_v2si_di, IX86_BUILTIN_PSRAD);
13627 def_builtin (MASK_MMX, "__builtin_ia32_pshufw", v4hi_ftype_v4hi_int, IX86_BUILTIN_PSHUFW);
13628 def_builtin (MASK_MMX, "__builtin_ia32_pmaddwd", v2si_ftype_v4hi_v4hi, IX86_BUILTIN_PMADDWD);
13630 /* comi/ucomi insns. */
13631 for (i = 0, d = bdesc_comi; i < ARRAY_SIZE (bdesc_comi); i++, d++)
13632 if (d->mask == MASK_SSE2)
13633 def_builtin (d->mask, d->name, int_ftype_v2df_v2df, d->code);
13635 def_builtin (d->mask, d->name, int_ftype_v4sf_v4sf, d->code);
13637 def_builtin (MASK_MMX, "__builtin_ia32_packsswb", v8qi_ftype_v4hi_v4hi, IX86_BUILTIN_PACKSSWB);
13638 def_builtin (MASK_MMX, "__builtin_ia32_packssdw", v4hi_ftype_v2si_v2si, IX86_BUILTIN_PACKSSDW);
13639 def_builtin (MASK_MMX, "__builtin_ia32_packuswb", v8qi_ftype_v4hi_v4hi, IX86_BUILTIN_PACKUSWB);
13641 def_builtin (MASK_SSE, "__builtin_ia32_ldmxcsr", void_ftype_unsigned, IX86_BUILTIN_LDMXCSR);
13642 def_builtin (MASK_SSE, "__builtin_ia32_stmxcsr", unsigned_ftype_void, IX86_BUILTIN_STMXCSR);
13643 def_builtin (MASK_SSE, "__builtin_ia32_cvtpi2ps", v4sf_ftype_v4sf_v2si, IX86_BUILTIN_CVTPI2PS);
13644 def_builtin (MASK_SSE, "__builtin_ia32_cvtps2pi", v2si_ftype_v4sf, IX86_BUILTIN_CVTPS2PI);
13645 def_builtin (MASK_SSE, "__builtin_ia32_cvtsi2ss", v4sf_ftype_v4sf_int, IX86_BUILTIN_CVTSI2SS);
13646 def_builtin (MASK_SSE | MASK_64BIT, "__builtin_ia32_cvtsi642ss", v4sf_ftype_v4sf_int64, IX86_BUILTIN_CVTSI642SS);
13647 def_builtin (MASK_SSE, "__builtin_ia32_cvtss2si", int_ftype_v4sf, IX86_BUILTIN_CVTSS2SI);
13648 def_builtin (MASK_SSE | MASK_64BIT, "__builtin_ia32_cvtss2si64", int64_ftype_v4sf, IX86_BUILTIN_CVTSS2SI64);
13649 def_builtin (MASK_SSE, "__builtin_ia32_cvttps2pi", v2si_ftype_v4sf, IX86_BUILTIN_CVTTPS2PI);
13650 def_builtin (MASK_SSE, "__builtin_ia32_cvttss2si", int_ftype_v4sf, IX86_BUILTIN_CVTTSS2SI);
13651 def_builtin (MASK_SSE | MASK_64BIT, "__builtin_ia32_cvttss2si64", int64_ftype_v4sf, IX86_BUILTIN_CVTTSS2SI64);
13653 def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_pextrw", int_ftype_v4hi_int, IX86_BUILTIN_PEXTRW);
13654 def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_pinsrw", v4hi_ftype_v4hi_int_int, IX86_BUILTIN_PINSRW);
13656 def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_maskmovq", void_ftype_v8qi_v8qi_pchar, IX86_BUILTIN_MASKMOVQ);
13658 def_builtin (MASK_SSE, "__builtin_ia32_loadaps", v4sf_ftype_pcfloat, IX86_BUILTIN_LOADAPS);
13659 def_builtin (MASK_SSE, "__builtin_ia32_loadups", v4sf_ftype_pcfloat, IX86_BUILTIN_LOADUPS);
13660 def_builtin (MASK_SSE, "__builtin_ia32_loadss", v4sf_ftype_pcfloat, IX86_BUILTIN_LOADSS);
13661 def_builtin (MASK_SSE, "__builtin_ia32_storeaps", void_ftype_pfloat_v4sf, IX86_BUILTIN_STOREAPS);
13662 def_builtin (MASK_SSE, "__builtin_ia32_storeups", void_ftype_pfloat_v4sf, IX86_BUILTIN_STOREUPS);
13663 def_builtin (MASK_SSE, "__builtin_ia32_storess", void_ftype_pfloat_v4sf, IX86_BUILTIN_STORESS);
13665 def_builtin (MASK_SSE, "__builtin_ia32_loadhps", v4sf_ftype_v4sf_pv2si, IX86_BUILTIN_LOADHPS);
13666 def_builtin (MASK_SSE, "__builtin_ia32_loadlps", v4sf_ftype_v4sf_pv2si, IX86_BUILTIN_LOADLPS);
13667 def_builtin (MASK_SSE, "__builtin_ia32_storehps", void_ftype_pv2si_v4sf, IX86_BUILTIN_STOREHPS);
13668 def_builtin (MASK_SSE, "__builtin_ia32_storelps", void_ftype_pv2si_v4sf, IX86_BUILTIN_STORELPS);
13670 def_builtin (MASK_SSE, "__builtin_ia32_movmskps", int_ftype_v4sf, IX86_BUILTIN_MOVMSKPS);
13671 def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_pmovmskb", int_ftype_v8qi, IX86_BUILTIN_PMOVMSKB);
13672 def_builtin (MASK_SSE, "__builtin_ia32_movntps", void_ftype_pfloat_v4sf, IX86_BUILTIN_MOVNTPS);
13673 def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_movntq", void_ftype_pdi_di, IX86_BUILTIN_MOVNTQ);
13675 def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_sfence", void_ftype_void, IX86_BUILTIN_SFENCE);
13677 def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_psadbw", di_ftype_v8qi_v8qi, IX86_BUILTIN_PSADBW);
13679 def_builtin (MASK_SSE, "__builtin_ia32_rcpps", v4sf_ftype_v4sf, IX86_BUILTIN_RCPPS);
13680 def_builtin (MASK_SSE, "__builtin_ia32_rcpss", v4sf_ftype_v4sf, IX86_BUILTIN_RCPSS);
13681 def_builtin (MASK_SSE, "__builtin_ia32_rsqrtps", v4sf_ftype_v4sf, IX86_BUILTIN_RSQRTPS);
13682 def_builtin (MASK_SSE, "__builtin_ia32_rsqrtss", v4sf_ftype_v4sf, IX86_BUILTIN_RSQRTSS);
13683 def_builtin (MASK_SSE, "__builtin_ia32_sqrtps", v4sf_ftype_v4sf, IX86_BUILTIN_SQRTPS);
13684 def_builtin (MASK_SSE, "__builtin_ia32_sqrtss", v4sf_ftype_v4sf, IX86_BUILTIN_SQRTSS);
13686 def_builtin (MASK_SSE, "__builtin_ia32_shufps", v4sf_ftype_v4sf_v4sf_int, IX86_BUILTIN_SHUFPS);
13688 /* Original 3DNow! */
13689 def_builtin (MASK_3DNOW, "__builtin_ia32_femms", void_ftype_void, IX86_BUILTIN_FEMMS);
13690 def_builtin (MASK_3DNOW, "__builtin_ia32_pavgusb", v8qi_ftype_v8qi_v8qi, IX86_BUILTIN_PAVGUSB);
13691 def_builtin (MASK_3DNOW, "__builtin_ia32_pf2id", v2si_ftype_v2sf, IX86_BUILTIN_PF2ID);
13692 def_builtin (MASK_3DNOW, "__builtin_ia32_pfacc", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFACC);
13693 def_builtin (MASK_3DNOW, "__builtin_ia32_pfadd", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFADD);
13694 def_builtin (MASK_3DNOW, "__builtin_ia32_pfcmpeq", v2si_ftype_v2sf_v2sf, IX86_BUILTIN_PFCMPEQ);
13695 def_builtin (MASK_3DNOW, "__builtin_ia32_pfcmpge", v2si_ftype_v2sf_v2sf, IX86_BUILTIN_PFCMPGE);
13696 def_builtin (MASK_3DNOW, "__builtin_ia32_pfcmpgt", v2si_ftype_v2sf_v2sf, IX86_BUILTIN_PFCMPGT);
13697 def_builtin (MASK_3DNOW, "__builtin_ia32_pfmax", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFMAX);
13698 def_builtin (MASK_3DNOW, "__builtin_ia32_pfmin", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFMIN);
13699 def_builtin (MASK_3DNOW, "__builtin_ia32_pfmul", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFMUL);
13700 def_builtin (MASK_3DNOW, "__builtin_ia32_pfrcp", v2sf_ftype_v2sf, IX86_BUILTIN_PFRCP);
13701 def_builtin (MASK_3DNOW, "__builtin_ia32_pfrcpit1", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFRCPIT1);
13702 def_builtin (MASK_3DNOW, "__builtin_ia32_pfrcpit2", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFRCPIT2);
13703 def_builtin (MASK_3DNOW, "__builtin_ia32_pfrsqrt", v2sf_ftype_v2sf, IX86_BUILTIN_PFRSQRT);
13704 def_builtin (MASK_3DNOW, "__builtin_ia32_pfrsqit1", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFRSQIT1);
13705 def_builtin (MASK_3DNOW, "__builtin_ia32_pfsub", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFSUB);
13706 def_builtin (MASK_3DNOW, "__builtin_ia32_pfsubr", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFSUBR);
13707 def_builtin (MASK_3DNOW, "__builtin_ia32_pi2fd", v2sf_ftype_v2si, IX86_BUILTIN_PI2FD);
13708 def_builtin (MASK_3DNOW, "__builtin_ia32_pmulhrw", v4hi_ftype_v4hi_v4hi, IX86_BUILTIN_PMULHRW);
13710 /* 3DNow! extension as used in the Athlon CPU. */
13711 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pf2iw", v2si_ftype_v2sf, IX86_BUILTIN_PF2IW);
13712 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pfnacc", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFNACC);
13713 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pfpnacc", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFPNACC);
13714 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pi2fw", v2sf_ftype_v2si, IX86_BUILTIN_PI2FW);
13715 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pswapdsf", v2sf_ftype_v2sf, IX86_BUILTIN_PSWAPDSF);
13716 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pswapdsi", v2si_ftype_v2si, IX86_BUILTIN_PSWAPDSI);
13718 def_builtin (MASK_SSE, "__builtin_ia32_setzerops", v4sf_ftype_void, IX86_BUILTIN_SSE_ZERO);
13721 def_builtin (MASK_SSE2, "__builtin_ia32_pextrw128", int_ftype_v8hi_int, IX86_BUILTIN_PEXTRW128);
13722 def_builtin (MASK_SSE2, "__builtin_ia32_pinsrw128", v8hi_ftype_v8hi_int_int, IX86_BUILTIN_PINSRW128);
13724 def_builtin (MASK_SSE2, "__builtin_ia32_maskmovdqu", void_ftype_v16qi_v16qi_pchar, IX86_BUILTIN_MASKMOVDQU);
13725 def_builtin (MASK_SSE2, "__builtin_ia32_movq2dq", v2di_ftype_di, IX86_BUILTIN_MOVQ2DQ);
13726 def_builtin (MASK_SSE2, "__builtin_ia32_movdq2q", di_ftype_v2di, IX86_BUILTIN_MOVDQ2Q);
13728 def_builtin (MASK_SSE2, "__builtin_ia32_loadapd", v2df_ftype_pcdouble, IX86_BUILTIN_LOADAPD);
13729 def_builtin (MASK_SSE2, "__builtin_ia32_loadupd", v2df_ftype_pcdouble, IX86_BUILTIN_LOADUPD);
13730 def_builtin (MASK_SSE2, "__builtin_ia32_loadsd", v2df_ftype_pcdouble, IX86_BUILTIN_LOADSD);
13731 def_builtin (MASK_SSE2, "__builtin_ia32_storeapd", void_ftype_pdouble_v2df, IX86_BUILTIN_STOREAPD);
13732 def_builtin (MASK_SSE2, "__builtin_ia32_storeupd", void_ftype_pdouble_v2df, IX86_BUILTIN_STOREUPD);
13733 def_builtin (MASK_SSE2, "__builtin_ia32_storesd", void_ftype_pdouble_v2df, IX86_BUILTIN_STORESD);
13735 def_builtin (MASK_SSE2, "__builtin_ia32_loadhpd", v2df_ftype_v2df_pv2si, IX86_BUILTIN_LOADHPD);
13736 def_builtin (MASK_SSE2, "__builtin_ia32_loadlpd", v2df_ftype_v2df_pv2si, IX86_BUILTIN_LOADLPD);
13737 def_builtin (MASK_SSE2, "__builtin_ia32_storehpd", void_ftype_pv2si_v2df, IX86_BUILTIN_STOREHPD);
13738 def_builtin (MASK_SSE2, "__builtin_ia32_storelpd", void_ftype_pv2si_v2df, IX86_BUILTIN_STORELPD);
13740 def_builtin (MASK_SSE2, "__builtin_ia32_movmskpd", int_ftype_v2df, IX86_BUILTIN_MOVMSKPD);
13741 def_builtin (MASK_SSE2, "__builtin_ia32_pmovmskb128", int_ftype_v16qi, IX86_BUILTIN_PMOVMSKB128);
13742 def_builtin (MASK_SSE2, "__builtin_ia32_movnti", void_ftype_pint_int, IX86_BUILTIN_MOVNTI);
13743 def_builtin (MASK_SSE2, "__builtin_ia32_movntpd", void_ftype_pdouble_v2df, IX86_BUILTIN_MOVNTPD);
13744 def_builtin (MASK_SSE2, "__builtin_ia32_movntdq", void_ftype_pv2di_v2di, IX86_BUILTIN_MOVNTDQ);
13746 def_builtin (MASK_SSE2, "__builtin_ia32_pshufd", v4si_ftype_v4si_int, IX86_BUILTIN_PSHUFD);
13747 def_builtin (MASK_SSE2, "__builtin_ia32_pshuflw", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSHUFLW);
13748 def_builtin (MASK_SSE2, "__builtin_ia32_pshufhw", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSHUFHW);
13749 def_builtin (MASK_SSE2, "__builtin_ia32_psadbw128", v2di_ftype_v16qi_v16qi, IX86_BUILTIN_PSADBW128);
13751 def_builtin (MASK_SSE2, "__builtin_ia32_sqrtpd", v2df_ftype_v2df, IX86_BUILTIN_SQRTPD);
13752 def_builtin (MASK_SSE2, "__builtin_ia32_sqrtsd", v2df_ftype_v2df, IX86_BUILTIN_SQRTSD);
13754 def_builtin (MASK_SSE2, "__builtin_ia32_shufpd", v2df_ftype_v2df_v2df_int, IX86_BUILTIN_SHUFPD);
13756 def_builtin (MASK_SSE2, "__builtin_ia32_cvtdq2pd", v2df_ftype_v4si, IX86_BUILTIN_CVTDQ2PD);
13757 def_builtin (MASK_SSE2, "__builtin_ia32_cvtdq2ps", v4sf_ftype_v4si, IX86_BUILTIN_CVTDQ2PS);
13759 def_builtin (MASK_SSE2, "__builtin_ia32_cvtpd2dq", v4si_ftype_v2df, IX86_BUILTIN_CVTPD2DQ);
13760 def_builtin (MASK_SSE2, "__builtin_ia32_cvtpd2pi", v2si_ftype_v2df, IX86_BUILTIN_CVTPD2PI);
13761 def_builtin (MASK_SSE2, "__builtin_ia32_cvtpd2ps", v4sf_ftype_v2df, IX86_BUILTIN_CVTPD2PS);
13762 def_builtin (MASK_SSE2, "__builtin_ia32_cvttpd2dq", v4si_ftype_v2df, IX86_BUILTIN_CVTTPD2DQ);
13763 def_builtin (MASK_SSE2, "__builtin_ia32_cvttpd2pi", v2si_ftype_v2df, IX86_BUILTIN_CVTTPD2PI);
13765 def_builtin (MASK_SSE2, "__builtin_ia32_cvtpi2pd", v2df_ftype_v2si, IX86_BUILTIN_CVTPI2PD);
13767 def_builtin (MASK_SSE2, "__builtin_ia32_cvtsd2si", int_ftype_v2df, IX86_BUILTIN_CVTSD2SI);
13768 def_builtin (MASK_SSE2, "__builtin_ia32_cvttsd2si", int_ftype_v2df, IX86_BUILTIN_CVTTSD2SI);
13769 def_builtin (MASK_SSE2 | MASK_64BIT, "__builtin_ia32_cvtsd2si64", int64_ftype_v2df, IX86_BUILTIN_CVTSD2SI64);
13770 def_builtin (MASK_SSE2 | MASK_64BIT, "__builtin_ia32_cvttsd2si64", int64_ftype_v2df, IX86_BUILTIN_CVTTSD2SI64);
13772 def_builtin (MASK_SSE2, "__builtin_ia32_cvtps2dq", v4si_ftype_v4sf, IX86_BUILTIN_CVTPS2DQ);
13773 def_builtin (MASK_SSE2, "__builtin_ia32_cvtps2pd", v2df_ftype_v4sf, IX86_BUILTIN_CVTPS2PD);
13774 def_builtin (MASK_SSE2, "__builtin_ia32_cvttps2dq", v4si_ftype_v4sf, IX86_BUILTIN_CVTTPS2DQ);
13776 def_builtin (MASK_SSE2, "__builtin_ia32_cvtsi2sd", v2df_ftype_v2df_int, IX86_BUILTIN_CVTSI2SD);
13777 def_builtin (MASK_SSE2 | MASK_64BIT, "__builtin_ia32_cvtsi642sd", v2df_ftype_v2df_int64, IX86_BUILTIN_CVTSI642SD);
13778 def_builtin (MASK_SSE2, "__builtin_ia32_cvtsd2ss", v4sf_ftype_v4sf_v2df, IX86_BUILTIN_CVTSD2SS);
13779 def_builtin (MASK_SSE2, "__builtin_ia32_cvtss2sd", v2df_ftype_v2df_v4sf, IX86_BUILTIN_CVTSS2SD);
13781 def_builtin (MASK_SSE2, "__builtin_ia32_setpd1", v2df_ftype_double, IX86_BUILTIN_SETPD1);
13782 def_builtin (MASK_SSE2, "__builtin_ia32_setpd", v2df_ftype_double_double, IX86_BUILTIN_SETPD);
13783 def_builtin (MASK_SSE2, "__builtin_ia32_setzeropd", ti_ftype_void, IX86_BUILTIN_CLRPD);
13784 def_builtin (MASK_SSE2, "__builtin_ia32_loadpd1", v2df_ftype_pcdouble, IX86_BUILTIN_LOADPD1);
13785 def_builtin (MASK_SSE2, "__builtin_ia32_loadrpd", v2df_ftype_pcdouble, IX86_BUILTIN_LOADRPD);
13786 def_builtin (MASK_SSE2, "__builtin_ia32_storepd1", void_ftype_pdouble_v2df, IX86_BUILTIN_STOREPD1);
13787 def_builtin (MASK_SSE2, "__builtin_ia32_storerpd", void_ftype_pdouble_v2df, IX86_BUILTIN_STORERPD);
13789 def_builtin (MASK_SSE2, "__builtin_ia32_clflush", void_ftype_pcvoid, IX86_BUILTIN_CLFLUSH);
13790 def_builtin (MASK_SSE2, "__builtin_ia32_lfence", void_ftype_void, IX86_BUILTIN_LFENCE);
13791 def_builtin (MASK_SSE2, "__builtin_ia32_mfence", void_ftype_void, IX86_BUILTIN_MFENCE);
13793 def_builtin (MASK_SSE2, "__builtin_ia32_loaddqa", v16qi_ftype_pcchar, IX86_BUILTIN_LOADDQA);
13794 def_builtin (MASK_SSE2, "__builtin_ia32_loaddqu", v16qi_ftype_pcchar, IX86_BUILTIN_LOADDQU);
13795 def_builtin (MASK_SSE2, "__builtin_ia32_loadd", v4si_ftype_pcint, IX86_BUILTIN_LOADD);
13796 def_builtin (MASK_SSE2, "__builtin_ia32_storedqa", void_ftype_pchar_v16qi, IX86_BUILTIN_STOREDQA);
13797 def_builtin (MASK_SSE2, "__builtin_ia32_storedqu", void_ftype_pchar_v16qi, IX86_BUILTIN_STOREDQU);
13798 def_builtin (MASK_SSE2, "__builtin_ia32_stored", void_ftype_pcint_v4si, IX86_BUILTIN_STORED);
13799 def_builtin (MASK_SSE2, "__builtin_ia32_movq", v2di_ftype_v2di, IX86_BUILTIN_MOVQ);
13801 def_builtin (MASK_SSE, "__builtin_ia32_setzero128", v2di_ftype_void, IX86_BUILTIN_CLRTI);
13803 def_builtin (MASK_SSE2, "__builtin_ia32_psllw128", v8hi_ftype_v8hi_v2di, IX86_BUILTIN_PSLLW128);
13804 def_builtin (MASK_SSE2, "__builtin_ia32_pslld128", v4si_ftype_v4si_v2di, IX86_BUILTIN_PSLLD128);
13805 def_builtin (MASK_SSE2, "__builtin_ia32_psllq128", v2di_ftype_v2di_v2di, IX86_BUILTIN_PSLLQ128);
13807 def_builtin (MASK_SSE2, "__builtin_ia32_psrlw128", v8hi_ftype_v8hi_v2di, IX86_BUILTIN_PSRLW128);
13808 def_builtin (MASK_SSE2, "__builtin_ia32_psrld128", v4si_ftype_v4si_v2di, IX86_BUILTIN_PSRLD128);
13809 def_builtin (MASK_SSE2, "__builtin_ia32_psrlq128", v2di_ftype_v2di_v2di, IX86_BUILTIN_PSRLQ128);
13811 def_builtin (MASK_SSE2, "__builtin_ia32_psraw128", v8hi_ftype_v8hi_v2di, IX86_BUILTIN_PSRAW128);
13812 def_builtin (MASK_SSE2, "__builtin_ia32_psrad128", v4si_ftype_v4si_v2di, IX86_BUILTIN_PSRAD128);
13814 def_builtin (MASK_SSE2, "__builtin_ia32_pslldqi128", v2di_ftype_v2di_int, IX86_BUILTIN_PSLLDQI128);
13815 def_builtin (MASK_SSE2, "__builtin_ia32_psllwi128", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSLLWI128);
13816 def_builtin (MASK_SSE2, "__builtin_ia32_pslldi128", v4si_ftype_v4si_int, IX86_BUILTIN_PSLLDI128);
13817 def_builtin (MASK_SSE2, "__builtin_ia32_psllqi128", v2di_ftype_v2di_int, IX86_BUILTIN_PSLLQI128);
13819 def_builtin (MASK_SSE2, "__builtin_ia32_psrldqi128", v2di_ftype_v2di_int, IX86_BUILTIN_PSRLDQI128);
13820 def_builtin (MASK_SSE2, "__builtin_ia32_psrlwi128", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSRLWI128);
13821 def_builtin (MASK_SSE2, "__builtin_ia32_psrldi128", v4si_ftype_v4si_int, IX86_BUILTIN_PSRLDI128);
13822 def_builtin (MASK_SSE2, "__builtin_ia32_psrlqi128", v2di_ftype_v2di_int, IX86_BUILTIN_PSRLQI128);
13824 def_builtin (MASK_SSE2, "__builtin_ia32_psrawi128", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSRAWI128);
13825 def_builtin (MASK_SSE2, "__builtin_ia32_psradi128", v4si_ftype_v4si_int, IX86_BUILTIN_PSRADI128);
13827 def_builtin (MASK_SSE2, "__builtin_ia32_pmaddwd128", v4si_ftype_v8hi_v8hi, IX86_BUILTIN_PMADDWD128);
13829 /* Prescott New Instructions. */
13830 def_builtin (MASK_SSE3, "__builtin_ia32_monitor",
13831 void_ftype_pcvoid_unsigned_unsigned,
13832 IX86_BUILTIN_MONITOR);
13833 def_builtin (MASK_SSE3, "__builtin_ia32_mwait",
13834 void_ftype_unsigned_unsigned,
13835 IX86_BUILTIN_MWAIT);
13836 def_builtin (MASK_SSE3, "__builtin_ia32_movshdup",
13838 IX86_BUILTIN_MOVSHDUP);
13839 def_builtin (MASK_SSE3, "__builtin_ia32_movsldup",
13841 IX86_BUILTIN_MOVSLDUP);
13842 def_builtin (MASK_SSE3, "__builtin_ia32_lddqu",
13843 v16qi_ftype_pcchar, IX86_BUILTIN_LDDQU);
13844 def_builtin (MASK_SSE3, "__builtin_ia32_loadddup",
13845 v2df_ftype_pcdouble, IX86_BUILTIN_LOADDDUP);
13846 def_builtin (MASK_SSE3, "__builtin_ia32_movddup",
13847 v2df_ftype_v2df, IX86_BUILTIN_MOVDDUP);
13850 /* Errors in the source file can cause expand_expr to return const0_rtx
13851 where we expect a vector. To avoid crashing, use one of the vector
13852 clear instructions. */
13854 safe_vector_operand (rtx x, enum machine_mode mode)
13856 if (x != const0_rtx)
13858 x = gen_reg_rtx (mode);
13860 if (VALID_MMX_REG_MODE (mode) || VALID_MMX_REG_MODE_3DNOW (mode))
13861 emit_insn (gen_mmx_clrdi (mode == DImode ? x
13862 : gen_rtx_SUBREG (DImode, x, 0)));
13864 emit_insn (gen_sse_clrv4sf (mode == V4SFmode ? x
13865 : gen_rtx_SUBREG (V4SFmode, x, 0),
13866 CONST0_RTX (V4SFmode)));
13870 /* Subroutine of ix86_expand_builtin to take care of binop insns. */
13873 ix86_expand_binop_builtin (enum insn_code icode, tree arglist, rtx target)
13876 tree arg0 = TREE_VALUE (arglist);
13877 tree arg1 = TREE_VALUE (TREE_CHAIN (arglist));
13878 rtx op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
13879 rtx op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
13880 enum machine_mode tmode = insn_data[icode].operand[0].mode;
13881 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
13882 enum machine_mode mode1 = insn_data[icode].operand[2].mode;
13884 if (VECTOR_MODE_P (mode0))
13885 op0 = safe_vector_operand (op0, mode0);
13886 if (VECTOR_MODE_P (mode1))
13887 op1 = safe_vector_operand (op1, mode1);
13890 || GET_MODE (target) != tmode
13891 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
13892 target = gen_reg_rtx (tmode);
13894 if (GET_MODE (op1) == SImode && mode1 == TImode)
13896 rtx x = gen_reg_rtx (V4SImode);
13897 emit_insn (gen_sse2_loadd (x, op1));
13898 op1 = gen_lowpart (TImode, x);
13901 /* In case the insn wants input operands in modes different from
13902 the result, abort. */
13903 if ((GET_MODE (op0) != mode0 && GET_MODE (op0) != VOIDmode)
13904 || (GET_MODE (op1) != mode1 && GET_MODE (op1) != VOIDmode))
13907 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
13908 op0 = copy_to_mode_reg (mode0, op0);
13909 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
13910 op1 = copy_to_mode_reg (mode1, op1);
13912 /* In the commutative cases, both op0 and op1 are nonimmediate_operand,
13913 yet one of the two must not be a memory. This is normally enforced
13914 by expanders, but we didn't bother to create one here. */
13915 if (GET_CODE (op0) == MEM && GET_CODE (op1) == MEM)
13916 op0 = copy_to_mode_reg (mode0, op0);
13918 pat = GEN_FCN (icode) (target, op0, op1);
13925 /* Subroutine of ix86_expand_builtin to take care of stores. */
13928 ix86_expand_store_builtin (enum insn_code icode, tree arglist)
13931 tree arg0 = TREE_VALUE (arglist);
13932 tree arg1 = TREE_VALUE (TREE_CHAIN (arglist));
13933 rtx op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
13934 rtx op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
13935 enum machine_mode mode0 = insn_data[icode].operand[0].mode;
13936 enum machine_mode mode1 = insn_data[icode].operand[1].mode;
13938 if (VECTOR_MODE_P (mode1))
13939 op1 = safe_vector_operand (op1, mode1);
13941 op0 = gen_rtx_MEM (mode0, copy_to_mode_reg (Pmode, op0));
13942 op1 = copy_to_mode_reg (mode1, op1);
13944 pat = GEN_FCN (icode) (op0, op1);
13950 /* Subroutine of ix86_expand_builtin to take care of unop insns. */
13953 ix86_expand_unop_builtin (enum insn_code icode, tree arglist,
13954 rtx target, int do_load)
13957 tree arg0 = TREE_VALUE (arglist);
13958 rtx op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
13959 enum machine_mode tmode = insn_data[icode].operand[0].mode;
13960 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
13963 || GET_MODE (target) != tmode
13964 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
13965 target = gen_reg_rtx (tmode);
13967 op0 = gen_rtx_MEM (mode0, copy_to_mode_reg (Pmode, op0));
13970 if (VECTOR_MODE_P (mode0))
13971 op0 = safe_vector_operand (op0, mode0);
13973 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
13974 op0 = copy_to_mode_reg (mode0, op0);
13977 pat = GEN_FCN (icode) (target, op0);
13984 /* Subroutine of ix86_expand_builtin to take care of three special unop insns:
13985 sqrtss, rsqrtss, rcpss. */
13988 ix86_expand_unop1_builtin (enum insn_code icode, tree arglist, rtx target)
13991 tree arg0 = TREE_VALUE (arglist);
13992 rtx op1, op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
13993 enum machine_mode tmode = insn_data[icode].operand[0].mode;
13994 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
13997 || GET_MODE (target) != tmode
13998 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
13999 target = gen_reg_rtx (tmode);
14001 if (VECTOR_MODE_P (mode0))
14002 op0 = safe_vector_operand (op0, mode0);
14004 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
14005 op0 = copy_to_mode_reg (mode0, op0);
14008 if (! (*insn_data[icode].operand[2].predicate) (op1, mode0))
14009 op1 = copy_to_mode_reg (mode0, op1);
14011 pat = GEN_FCN (icode) (target, op0, op1);
14018 /* Subroutine of ix86_expand_builtin to take care of comparison insns. */
14021 ix86_expand_sse_compare (const struct builtin_description *d, tree arglist,
14025 tree arg0 = TREE_VALUE (arglist);
14026 tree arg1 = TREE_VALUE (TREE_CHAIN (arglist));
14027 rtx op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
14028 rtx op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
14030 enum machine_mode tmode = insn_data[d->icode].operand[0].mode;
14031 enum machine_mode mode0 = insn_data[d->icode].operand[1].mode;
14032 enum machine_mode mode1 = insn_data[d->icode].operand[2].mode;
14033 enum rtx_code comparison = d->comparison;
14035 if (VECTOR_MODE_P (mode0))
14036 op0 = safe_vector_operand (op0, mode0);
14037 if (VECTOR_MODE_P (mode1))
14038 op1 = safe_vector_operand (op1, mode1);
14040 /* Swap operands if we have a comparison that isn't available in
14044 rtx tmp = gen_reg_rtx (mode1);
14045 emit_move_insn (tmp, op1);
14051 || GET_MODE (target) != tmode
14052 || ! (*insn_data[d->icode].operand[0].predicate) (target, tmode))
14053 target = gen_reg_rtx (tmode);
14055 if (! (*insn_data[d->icode].operand[1].predicate) (op0, mode0))
14056 op0 = copy_to_mode_reg (mode0, op0);
14057 if (! (*insn_data[d->icode].operand[2].predicate) (op1, mode1))
14058 op1 = copy_to_mode_reg (mode1, op1);
14060 op2 = gen_rtx_fmt_ee (comparison, mode0, op0, op1);
14061 pat = GEN_FCN (d->icode) (target, op0, op1, op2);
14068 /* Subroutine of ix86_expand_builtin to take care of comi insns. */
14071 ix86_expand_sse_comi (const struct builtin_description *d, tree arglist,
14075 tree arg0 = TREE_VALUE (arglist);
14076 tree arg1 = TREE_VALUE (TREE_CHAIN (arglist));
14077 rtx op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
14078 rtx op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
14080 enum machine_mode mode0 = insn_data[d->icode].operand[0].mode;
14081 enum machine_mode mode1 = insn_data[d->icode].operand[1].mode;
14082 enum rtx_code comparison = d->comparison;
14084 if (VECTOR_MODE_P (mode0))
14085 op0 = safe_vector_operand (op0, mode0);
14086 if (VECTOR_MODE_P (mode1))
14087 op1 = safe_vector_operand (op1, mode1);
14089 /* Swap operands if we have a comparison that isn't available in
14098 target = gen_reg_rtx (SImode);
14099 emit_move_insn (target, const0_rtx);
14100 target = gen_rtx_SUBREG (QImode, target, 0);
14102 if (! (*insn_data[d->icode].operand[0].predicate) (op0, mode0))
14103 op0 = copy_to_mode_reg (mode0, op0);
14104 if (! (*insn_data[d->icode].operand[1].predicate) (op1, mode1))
14105 op1 = copy_to_mode_reg (mode1, op1);
14107 op2 = gen_rtx_fmt_ee (comparison, mode0, op0, op1);
14108 pat = GEN_FCN (d->icode) (op0, op1);
14112 emit_insn (gen_rtx_SET (VOIDmode,
14113 gen_rtx_STRICT_LOW_PART (VOIDmode, target),
14114 gen_rtx_fmt_ee (comparison, QImode,
14118 return SUBREG_REG (target);
14121 /* Expand an expression EXP that calls a built-in function,
14122 with result going to TARGET if that's convenient
14123 (and in mode MODE if that's convenient).
14124 SUBTARGET may be used as the target for computing one of EXP's operands.
14125 IGNORE is nonzero if the value is to be ignored. */
14128 ix86_expand_builtin (tree exp, rtx target, rtx subtarget ATTRIBUTE_UNUSED,
14129 enum machine_mode mode ATTRIBUTE_UNUSED,
14130 int ignore ATTRIBUTE_UNUSED)
14132 const struct builtin_description *d;
14134 enum insn_code icode;
14135 tree fndecl = TREE_OPERAND (TREE_OPERAND (exp, 0), 0);
14136 tree arglist = TREE_OPERAND (exp, 1);
14137 tree arg0, arg1, arg2;
14138 rtx op0, op1, op2, pat;
14139 enum machine_mode tmode, mode0, mode1, mode2;
14140 unsigned int fcode = DECL_FUNCTION_CODE (fndecl);
14144 case IX86_BUILTIN_EMMS:
14145 emit_insn (gen_emms ());
14148 case IX86_BUILTIN_SFENCE:
14149 emit_insn (gen_sfence ());
14152 case IX86_BUILTIN_PEXTRW:
14153 case IX86_BUILTIN_PEXTRW128:
14154 icode = (fcode == IX86_BUILTIN_PEXTRW
14155 ? CODE_FOR_mmx_pextrw
14156 : CODE_FOR_sse2_pextrw);
14157 arg0 = TREE_VALUE (arglist);
14158 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
14159 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
14160 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
14161 tmode = insn_data[icode].operand[0].mode;
14162 mode0 = insn_data[icode].operand[1].mode;
14163 mode1 = insn_data[icode].operand[2].mode;
14165 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
14166 op0 = copy_to_mode_reg (mode0, op0);
14167 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
14169 error ("selector must be an integer constant in the range 0..%i",
14170 fcode == IX86_BUILTIN_PEXTRW ? 3:7);
14171 return gen_reg_rtx (tmode);
14174 || GET_MODE (target) != tmode
14175 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
14176 target = gen_reg_rtx (tmode);
14177 pat = GEN_FCN (icode) (target, op0, op1);
14183 case IX86_BUILTIN_PINSRW:
14184 case IX86_BUILTIN_PINSRW128:
14185 icode = (fcode == IX86_BUILTIN_PINSRW
14186 ? CODE_FOR_mmx_pinsrw
14187 : CODE_FOR_sse2_pinsrw);
14188 arg0 = TREE_VALUE (arglist);
14189 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
14190 arg2 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist)));
14191 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
14192 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
14193 op2 = expand_expr (arg2, NULL_RTX, VOIDmode, 0);
14194 tmode = insn_data[icode].operand[0].mode;
14195 mode0 = insn_data[icode].operand[1].mode;
14196 mode1 = insn_data[icode].operand[2].mode;
14197 mode2 = insn_data[icode].operand[3].mode;
14199 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
14200 op0 = copy_to_mode_reg (mode0, op0);
14201 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
14202 op1 = copy_to_mode_reg (mode1, op1);
14203 if (! (*insn_data[icode].operand[3].predicate) (op2, mode2))
14205 error ("selector must be an integer constant in the range 0..%i",
14206 fcode == IX86_BUILTIN_PINSRW ? 15:255);
14210 || GET_MODE (target) != tmode
14211 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
14212 target = gen_reg_rtx (tmode);
14213 pat = GEN_FCN (icode) (target, op0, op1, op2);
14219 case IX86_BUILTIN_MASKMOVQ:
14220 case IX86_BUILTIN_MASKMOVDQU:
14221 icode = (fcode == IX86_BUILTIN_MASKMOVQ
14222 ? (TARGET_64BIT ? CODE_FOR_mmx_maskmovq_rex : CODE_FOR_mmx_maskmovq)
14223 : (TARGET_64BIT ? CODE_FOR_sse2_maskmovdqu_rex64
14224 : CODE_FOR_sse2_maskmovdqu));
14225 /* Note the arg order is different from the operand order. */
14226 arg1 = TREE_VALUE (arglist);
14227 arg2 = TREE_VALUE (TREE_CHAIN (arglist));
14228 arg0 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist)));
14229 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
14230 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
14231 op2 = expand_expr (arg2, NULL_RTX, VOIDmode, 0);
14232 mode0 = insn_data[icode].operand[0].mode;
14233 mode1 = insn_data[icode].operand[1].mode;
14234 mode2 = insn_data[icode].operand[2].mode;
14236 if (! (*insn_data[icode].operand[0].predicate) (op0, mode0))
14237 op0 = copy_to_mode_reg (mode0, op0);
14238 if (! (*insn_data[icode].operand[1].predicate) (op1, mode1))
14239 op1 = copy_to_mode_reg (mode1, op1);
14240 if (! (*insn_data[icode].operand[2].predicate) (op2, mode2))
14241 op2 = copy_to_mode_reg (mode2, op2);
14242 pat = GEN_FCN (icode) (op0, op1, op2);
14248 case IX86_BUILTIN_SQRTSS:
14249 return ix86_expand_unop1_builtin (CODE_FOR_vmsqrtv4sf2, arglist, target);
14250 case IX86_BUILTIN_RSQRTSS:
14251 return ix86_expand_unop1_builtin (CODE_FOR_vmrsqrtv4sf2, arglist, target);
14252 case IX86_BUILTIN_RCPSS:
14253 return ix86_expand_unop1_builtin (CODE_FOR_vmrcpv4sf2, arglist, target);
14255 case IX86_BUILTIN_LOADAPS:
14256 return ix86_expand_unop_builtin (CODE_FOR_sse_movaps, arglist, target, 1);
14258 case IX86_BUILTIN_LOADUPS:
14259 return ix86_expand_unop_builtin (CODE_FOR_sse_movups, arglist, target, 1);
14261 case IX86_BUILTIN_STOREAPS:
14262 return ix86_expand_store_builtin (CODE_FOR_sse_movaps, arglist);
14264 case IX86_BUILTIN_STOREUPS:
14265 return ix86_expand_store_builtin (CODE_FOR_sse_movups, arglist);
14267 case IX86_BUILTIN_LOADSS:
14268 return ix86_expand_unop_builtin (CODE_FOR_sse_loadss, arglist, target, 1);
14270 case IX86_BUILTIN_STORESS:
14271 return ix86_expand_store_builtin (CODE_FOR_sse_storess, arglist);
14273 case IX86_BUILTIN_LOADHPS:
14274 case IX86_BUILTIN_LOADLPS:
14275 case IX86_BUILTIN_LOADHPD:
14276 case IX86_BUILTIN_LOADLPD:
14277 icode = (fcode == IX86_BUILTIN_LOADHPS ? CODE_FOR_sse_movhps
14278 : fcode == IX86_BUILTIN_LOADLPS ? CODE_FOR_sse_movlps
14279 : fcode == IX86_BUILTIN_LOADHPD ? CODE_FOR_sse2_movhpd
14280 : CODE_FOR_sse2_movsd);
14281 arg0 = TREE_VALUE (arglist);
14282 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
14283 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
14284 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
14285 tmode = insn_data[icode].operand[0].mode;
14286 mode0 = insn_data[icode].operand[1].mode;
14287 mode1 = insn_data[icode].operand[2].mode;
14289 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
14290 op0 = copy_to_mode_reg (mode0, op0);
14291 op1 = gen_rtx_MEM (mode1, copy_to_mode_reg (Pmode, op1));
14293 || GET_MODE (target) != tmode
14294 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
14295 target = gen_reg_rtx (tmode);
14296 pat = GEN_FCN (icode) (target, op0, op1);
14302 case IX86_BUILTIN_STOREHPS:
14303 case IX86_BUILTIN_STORELPS:
14304 case IX86_BUILTIN_STOREHPD:
14305 case IX86_BUILTIN_STORELPD:
14306 icode = (fcode == IX86_BUILTIN_STOREHPS ? CODE_FOR_sse_movhps
14307 : fcode == IX86_BUILTIN_STORELPS ? CODE_FOR_sse_movlps
14308 : fcode == IX86_BUILTIN_STOREHPD ? CODE_FOR_sse2_movhpd
14309 : CODE_FOR_sse2_movsd);
14310 arg0 = TREE_VALUE (arglist);
14311 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
14312 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
14313 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
14314 mode0 = insn_data[icode].operand[1].mode;
14315 mode1 = insn_data[icode].operand[2].mode;
14317 op0 = gen_rtx_MEM (mode0, copy_to_mode_reg (Pmode, op0));
14318 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
14319 op1 = copy_to_mode_reg (mode1, op1);
14321 pat = GEN_FCN (icode) (op0, op0, op1);
14327 case IX86_BUILTIN_MOVNTPS:
14328 return ix86_expand_store_builtin (CODE_FOR_sse_movntv4sf, arglist);
14329 case IX86_BUILTIN_MOVNTQ:
14330 return ix86_expand_store_builtin (CODE_FOR_sse_movntdi, arglist);
14332 case IX86_BUILTIN_LDMXCSR:
14333 op0 = expand_expr (TREE_VALUE (arglist), NULL_RTX, VOIDmode, 0);
14334 target = assign_386_stack_local (SImode, 0);
14335 emit_move_insn (target, op0);
14336 emit_insn (gen_ldmxcsr (target));
14339 case IX86_BUILTIN_STMXCSR:
14340 target = assign_386_stack_local (SImode, 0);
14341 emit_insn (gen_stmxcsr (target));
14342 return copy_to_mode_reg (SImode, target);
14344 case IX86_BUILTIN_SHUFPS:
14345 case IX86_BUILTIN_SHUFPD:
14346 icode = (fcode == IX86_BUILTIN_SHUFPS
14347 ? CODE_FOR_sse_shufps
14348 : CODE_FOR_sse2_shufpd);
14349 arg0 = TREE_VALUE (arglist);
14350 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
14351 arg2 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist)));
14352 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
14353 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
14354 op2 = expand_expr (arg2, NULL_RTX, VOIDmode, 0);
14355 tmode = insn_data[icode].operand[0].mode;
14356 mode0 = insn_data[icode].operand[1].mode;
14357 mode1 = insn_data[icode].operand[2].mode;
14358 mode2 = insn_data[icode].operand[3].mode;
14360 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
14361 op0 = copy_to_mode_reg (mode0, op0);
14362 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
14363 op1 = copy_to_mode_reg (mode1, op1);
14364 if (! (*insn_data[icode].operand[3].predicate) (op2, mode2))
14366 /* @@@ better error message */
14367 error ("mask must be an immediate");
14368 return gen_reg_rtx (tmode);
14371 || GET_MODE (target) != tmode
14372 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
14373 target = gen_reg_rtx (tmode);
14374 pat = GEN_FCN (icode) (target, op0, op1, op2);
14380 case IX86_BUILTIN_PSHUFW:
14381 case IX86_BUILTIN_PSHUFD:
14382 case IX86_BUILTIN_PSHUFHW:
14383 case IX86_BUILTIN_PSHUFLW:
14384 icode = ( fcode == IX86_BUILTIN_PSHUFHW ? CODE_FOR_sse2_pshufhw
14385 : fcode == IX86_BUILTIN_PSHUFLW ? CODE_FOR_sse2_pshuflw
14386 : fcode == IX86_BUILTIN_PSHUFD ? CODE_FOR_sse2_pshufd
14387 : CODE_FOR_mmx_pshufw);
14388 arg0 = TREE_VALUE (arglist);
14389 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
14390 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
14391 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
14392 tmode = insn_data[icode].operand[0].mode;
14393 mode1 = insn_data[icode].operand[1].mode;
14394 mode2 = insn_data[icode].operand[2].mode;
14396 if (! (*insn_data[icode].operand[1].predicate) (op0, mode1))
14397 op0 = copy_to_mode_reg (mode1, op0);
14398 if (! (*insn_data[icode].operand[2].predicate) (op1, mode2))
14400 /* @@@ better error message */
14401 error ("mask must be an immediate");
14405 || GET_MODE (target) != tmode
14406 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
14407 target = gen_reg_rtx (tmode);
14408 pat = GEN_FCN (icode) (target, op0, op1);
14414 case IX86_BUILTIN_PSLLDQI128:
14415 case IX86_BUILTIN_PSRLDQI128:
14416 icode = ( fcode == IX86_BUILTIN_PSLLDQI128 ? CODE_FOR_sse2_ashlti3
14417 : CODE_FOR_sse2_lshrti3);
14418 arg0 = TREE_VALUE (arglist);
14419 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
14420 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
14421 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
14422 tmode = insn_data[icode].operand[0].mode;
14423 mode1 = insn_data[icode].operand[1].mode;
14424 mode2 = insn_data[icode].operand[2].mode;
14426 if (! (*insn_data[icode].operand[1].predicate) (op0, mode1))
14428 op0 = copy_to_reg (op0);
14429 op0 = simplify_gen_subreg (mode1, op0, GET_MODE (op0), 0);
14431 if (! (*insn_data[icode].operand[2].predicate) (op1, mode2))
14433 error ("shift must be an immediate");
14436 target = gen_reg_rtx (V2DImode);
14437 pat = GEN_FCN (icode) (simplify_gen_subreg (tmode, target, V2DImode, 0), op0, op1);
14443 case IX86_BUILTIN_FEMMS:
14444 emit_insn (gen_femms ());
14447 case IX86_BUILTIN_PAVGUSB:
14448 return ix86_expand_binop_builtin (CODE_FOR_pavgusb, arglist, target);
14450 case IX86_BUILTIN_PF2ID:
14451 return ix86_expand_unop_builtin (CODE_FOR_pf2id, arglist, target, 0);
14453 case IX86_BUILTIN_PFACC:
14454 return ix86_expand_binop_builtin (CODE_FOR_pfacc, arglist, target);
14456 case IX86_BUILTIN_PFADD:
14457 return ix86_expand_binop_builtin (CODE_FOR_addv2sf3, arglist, target);
14459 case IX86_BUILTIN_PFCMPEQ:
14460 return ix86_expand_binop_builtin (CODE_FOR_eqv2sf3, arglist, target);
14462 case IX86_BUILTIN_PFCMPGE:
14463 return ix86_expand_binop_builtin (CODE_FOR_gev2sf3, arglist, target);
14465 case IX86_BUILTIN_PFCMPGT:
14466 return ix86_expand_binop_builtin (CODE_FOR_gtv2sf3, arglist, target);
14468 case IX86_BUILTIN_PFMAX:
14469 return ix86_expand_binop_builtin (CODE_FOR_pfmaxv2sf3, arglist, target);
14471 case IX86_BUILTIN_PFMIN:
14472 return ix86_expand_binop_builtin (CODE_FOR_pfminv2sf3, arglist, target);
14474 case IX86_BUILTIN_PFMUL:
14475 return ix86_expand_binop_builtin (CODE_FOR_mulv2sf3, arglist, target);
14477 case IX86_BUILTIN_PFRCP:
14478 return ix86_expand_unop_builtin (CODE_FOR_pfrcpv2sf2, arglist, target, 0);
14480 case IX86_BUILTIN_PFRCPIT1:
14481 return ix86_expand_binop_builtin (CODE_FOR_pfrcpit1v2sf3, arglist, target);
14483 case IX86_BUILTIN_PFRCPIT2:
14484 return ix86_expand_binop_builtin (CODE_FOR_pfrcpit2v2sf3, arglist, target);
14486 case IX86_BUILTIN_PFRSQIT1:
14487 return ix86_expand_binop_builtin (CODE_FOR_pfrsqit1v2sf3, arglist, target);
14489 case IX86_BUILTIN_PFRSQRT:
14490 return ix86_expand_unop_builtin (CODE_FOR_pfrsqrtv2sf2, arglist, target, 0);
14492 case IX86_BUILTIN_PFSUB:
14493 return ix86_expand_binop_builtin (CODE_FOR_subv2sf3, arglist, target);
14495 case IX86_BUILTIN_PFSUBR:
14496 return ix86_expand_binop_builtin (CODE_FOR_subrv2sf3, arglist, target);
14498 case IX86_BUILTIN_PI2FD:
14499 return ix86_expand_unop_builtin (CODE_FOR_floatv2si2, arglist, target, 0);
14501 case IX86_BUILTIN_PMULHRW:
14502 return ix86_expand_binop_builtin (CODE_FOR_pmulhrwv4hi3, arglist, target);
14504 case IX86_BUILTIN_PF2IW:
14505 return ix86_expand_unop_builtin (CODE_FOR_pf2iw, arglist, target, 0);
14507 case IX86_BUILTIN_PFNACC:
14508 return ix86_expand_binop_builtin (CODE_FOR_pfnacc, arglist, target);
14510 case IX86_BUILTIN_PFPNACC:
14511 return ix86_expand_binop_builtin (CODE_FOR_pfpnacc, arglist, target);
14513 case IX86_BUILTIN_PI2FW:
14514 return ix86_expand_unop_builtin (CODE_FOR_pi2fw, arglist, target, 0);
14516 case IX86_BUILTIN_PSWAPDSI:
14517 return ix86_expand_unop_builtin (CODE_FOR_pswapdv2si2, arglist, target, 0);
14519 case IX86_BUILTIN_PSWAPDSF:
14520 return ix86_expand_unop_builtin (CODE_FOR_pswapdv2sf2, arglist, target, 0);
14522 case IX86_BUILTIN_SSE_ZERO:
14523 target = gen_reg_rtx (V4SFmode);
14524 emit_insn (gen_sse_clrv4sf (target, CONST0_RTX (V4SFmode)));
14527 case IX86_BUILTIN_MMX_ZERO:
14528 target = gen_reg_rtx (DImode);
14529 emit_insn (gen_mmx_clrdi (target));
14532 case IX86_BUILTIN_CLRTI:
14533 target = gen_reg_rtx (V2DImode);
14534 emit_insn (gen_sse2_clrti (simplify_gen_subreg (TImode, target, V2DImode, 0)));
14538 case IX86_BUILTIN_SQRTSD:
14539 return ix86_expand_unop1_builtin (CODE_FOR_vmsqrtv2df2, arglist, target);
14540 case IX86_BUILTIN_LOADAPD:
14541 return ix86_expand_unop_builtin (CODE_FOR_sse2_movapd, arglist, target, 1);
14542 case IX86_BUILTIN_LOADUPD:
14543 return ix86_expand_unop_builtin (CODE_FOR_sse2_movupd, arglist, target, 1);
14545 case IX86_BUILTIN_STOREAPD:
14546 return ix86_expand_store_builtin (CODE_FOR_sse2_movapd, arglist);
14547 case IX86_BUILTIN_STOREUPD:
14548 return ix86_expand_store_builtin (CODE_FOR_sse2_movupd, arglist);
14550 case IX86_BUILTIN_LOADSD:
14551 return ix86_expand_unop_builtin (CODE_FOR_sse2_loadsd, arglist, target, 1);
14553 case IX86_BUILTIN_STORESD:
14554 return ix86_expand_store_builtin (CODE_FOR_sse2_storesd, arglist);
14556 case IX86_BUILTIN_SETPD1:
14557 target = assign_386_stack_local (DFmode, 0);
14558 arg0 = TREE_VALUE (arglist);
14559 emit_move_insn (adjust_address (target, DFmode, 0),
14560 expand_expr (arg0, NULL_RTX, VOIDmode, 0));
14561 op0 = gen_reg_rtx (V2DFmode);
14562 emit_insn (gen_sse2_loadsd (op0, adjust_address (target, V2DFmode, 0)));
14563 emit_insn (gen_sse2_shufpd (op0, op0, op0, GEN_INT (0)));
14566 case IX86_BUILTIN_SETPD:
14567 target = assign_386_stack_local (V2DFmode, 0);
14568 arg0 = TREE_VALUE (arglist);
14569 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
14570 emit_move_insn (adjust_address (target, DFmode, 0),
14571 expand_expr (arg0, NULL_RTX, VOIDmode, 0));
14572 emit_move_insn (adjust_address (target, DFmode, 8),
14573 expand_expr (arg1, NULL_RTX, VOIDmode, 0));
14574 op0 = gen_reg_rtx (V2DFmode);
14575 emit_insn (gen_sse2_movapd (op0, target));
14578 case IX86_BUILTIN_LOADRPD:
14579 target = ix86_expand_unop_builtin (CODE_FOR_sse2_movapd, arglist,
14580 gen_reg_rtx (V2DFmode), 1);
14581 emit_insn (gen_sse2_shufpd (target, target, target, GEN_INT (1)));
14584 case IX86_BUILTIN_LOADPD1:
14585 target = ix86_expand_unop_builtin (CODE_FOR_sse2_loadsd, arglist,
14586 gen_reg_rtx (V2DFmode), 1);
14587 emit_insn (gen_sse2_shufpd (target, target, target, const0_rtx));
14590 case IX86_BUILTIN_STOREPD1:
14591 return ix86_expand_store_builtin (CODE_FOR_sse2_movapd, arglist);
14592 case IX86_BUILTIN_STORERPD:
14593 return ix86_expand_store_builtin (CODE_FOR_sse2_movapd, arglist);
14595 case IX86_BUILTIN_CLRPD:
14596 target = gen_reg_rtx (V2DFmode);
14597 emit_insn (gen_sse_clrv2df (target));
14600 case IX86_BUILTIN_MFENCE:
14601 emit_insn (gen_sse2_mfence ());
14603 case IX86_BUILTIN_LFENCE:
14604 emit_insn (gen_sse2_lfence ());
14607 case IX86_BUILTIN_CLFLUSH:
14608 arg0 = TREE_VALUE (arglist);
14609 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
14610 icode = CODE_FOR_sse2_clflush;
14611 if (! (*insn_data[icode].operand[0].predicate) (op0, Pmode))
14612 op0 = copy_to_mode_reg (Pmode, op0);
14614 emit_insn (gen_sse2_clflush (op0));
14617 case IX86_BUILTIN_MOVNTPD:
14618 return ix86_expand_store_builtin (CODE_FOR_sse2_movntv2df, arglist);
14619 case IX86_BUILTIN_MOVNTDQ:
14620 return ix86_expand_store_builtin (CODE_FOR_sse2_movntv2di, arglist);
14621 case IX86_BUILTIN_MOVNTI:
14622 return ix86_expand_store_builtin (CODE_FOR_sse2_movntsi, arglist);
14624 case IX86_BUILTIN_LOADDQA:
14625 return ix86_expand_unop_builtin (CODE_FOR_sse2_movdqa, arglist, target, 1);
14626 case IX86_BUILTIN_LOADDQU:
14627 return ix86_expand_unop_builtin (CODE_FOR_sse2_movdqu, arglist, target, 1);
14628 case IX86_BUILTIN_LOADD:
14629 return ix86_expand_unop_builtin (CODE_FOR_sse2_loadd, arglist, target, 1);
14631 case IX86_BUILTIN_STOREDQA:
14632 return ix86_expand_store_builtin (CODE_FOR_sse2_movdqa, arglist);
14633 case IX86_BUILTIN_STOREDQU:
14634 return ix86_expand_store_builtin (CODE_FOR_sse2_movdqu, arglist);
14635 case IX86_BUILTIN_STORED:
14636 return ix86_expand_store_builtin (CODE_FOR_sse2_stored, arglist);
14638 case IX86_BUILTIN_MONITOR:
14639 arg0 = TREE_VALUE (arglist);
14640 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
14641 arg2 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist)));
14642 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
14643 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
14644 op2 = expand_expr (arg2, NULL_RTX, VOIDmode, 0);
14646 op0 = copy_to_mode_reg (SImode, op0);
14648 op1 = copy_to_mode_reg (SImode, op1);
14650 op2 = copy_to_mode_reg (SImode, op2);
14651 emit_insn (gen_monitor (op0, op1, op2));
14654 case IX86_BUILTIN_MWAIT:
14655 arg0 = TREE_VALUE (arglist);
14656 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
14657 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
14658 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
14660 op0 = copy_to_mode_reg (SImode, op0);
14662 op1 = copy_to_mode_reg (SImode, op1);
14663 emit_insn (gen_mwait (op0, op1));
14666 case IX86_BUILTIN_LOADDDUP:
14667 return ix86_expand_unop_builtin (CODE_FOR_loadddup, arglist, target, 1);
14669 case IX86_BUILTIN_LDDQU:
14670 return ix86_expand_unop_builtin (CODE_FOR_lddqu, arglist, target,
14677 for (i = 0, d = bdesc_2arg; i < ARRAY_SIZE (bdesc_2arg); i++, d++)
14678 if (d->code == fcode)
14680 /* Compares are treated specially. */
14681 if (d->icode == CODE_FOR_maskcmpv4sf3
14682 || d->icode == CODE_FOR_vmmaskcmpv4sf3
14683 || d->icode == CODE_FOR_maskncmpv4sf3
14684 || d->icode == CODE_FOR_vmmaskncmpv4sf3
14685 || d->icode == CODE_FOR_maskcmpv2df3
14686 || d->icode == CODE_FOR_vmmaskcmpv2df3
14687 || d->icode == CODE_FOR_maskncmpv2df3
14688 || d->icode == CODE_FOR_vmmaskncmpv2df3)
14689 return ix86_expand_sse_compare (d, arglist, target);
14691 return ix86_expand_binop_builtin (d->icode, arglist, target);
14694 for (i = 0, d = bdesc_1arg; i < ARRAY_SIZE (bdesc_1arg); i++, d++)
14695 if (d->code == fcode)
14696 return ix86_expand_unop_builtin (d->icode, arglist, target, 0);
14698 for (i = 0, d = bdesc_comi; i < ARRAY_SIZE (bdesc_comi); i++, d++)
14699 if (d->code == fcode)
14700 return ix86_expand_sse_comi (d, arglist, target);
14702 /* @@@ Should really do something sensible here. */
14706 /* Store OPERAND to the memory after reload is completed. This means
14707 that we can't easily use assign_stack_local. */
14709 ix86_force_to_memory (enum machine_mode mode, rtx operand)
14712 if (!reload_completed)
14714 if (TARGET_RED_ZONE)
14716 result = gen_rtx_MEM (mode,
14717 gen_rtx_PLUS (Pmode,
14719 GEN_INT (-RED_ZONE_SIZE)));
14720 emit_move_insn (result, operand);
14722 else if (!TARGET_RED_ZONE && TARGET_64BIT)
14728 operand = gen_lowpart (DImode, operand);
14732 gen_rtx_SET (VOIDmode,
14733 gen_rtx_MEM (DImode,
14734 gen_rtx_PRE_DEC (DImode,
14735 stack_pointer_rtx)),
14741 result = gen_rtx_MEM (mode, stack_pointer_rtx);
14750 split_di (&operand, 1, operands, operands + 1);
14752 gen_rtx_SET (VOIDmode,
14753 gen_rtx_MEM (SImode,
14754 gen_rtx_PRE_DEC (Pmode,
14755 stack_pointer_rtx)),
14758 gen_rtx_SET (VOIDmode,
14759 gen_rtx_MEM (SImode,
14760 gen_rtx_PRE_DEC (Pmode,
14761 stack_pointer_rtx)),
14766 /* It is better to store HImodes as SImodes. */
14767 if (!TARGET_PARTIAL_REG_STALL)
14768 operand = gen_lowpart (SImode, operand);
14772 gen_rtx_SET (VOIDmode,
14773 gen_rtx_MEM (GET_MODE (operand),
14774 gen_rtx_PRE_DEC (SImode,
14775 stack_pointer_rtx)),
14781 result = gen_rtx_MEM (mode, stack_pointer_rtx);
14786 /* Free operand from the memory. */
14788 ix86_free_from_memory (enum machine_mode mode)
14790 if (!TARGET_RED_ZONE)
14794 if (mode == DImode || TARGET_64BIT)
14796 else if (mode == HImode && TARGET_PARTIAL_REG_STALL)
14800 /* Use LEA to deallocate stack space. In peephole2 it will be converted
14801 to pop or add instruction if registers are available. */
14802 emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx,
14803 gen_rtx_PLUS (Pmode, stack_pointer_rtx,
14808 /* Put float CONST_DOUBLE in the constant pool instead of fp regs.
14809 QImode must go into class Q_REGS.
14810 Narrow ALL_REGS to GENERAL_REGS. This supports allowing movsf and
14811 movdf to do mem-to-mem moves through integer regs. */
14813 ix86_preferred_reload_class (rtx x, enum reg_class class)
14815 if (GET_CODE (x) == CONST_VECTOR && x != CONST0_RTX (GET_MODE (x)))
14817 if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) != VOIDmode)
14819 /* SSE can't load any constant directly yet. */
14820 if (SSE_CLASS_P (class))
14822 /* Floats can load 0 and 1. */
14823 if (MAYBE_FLOAT_CLASS_P (class) && standard_80387_constant_p (x))
14825 /* Limit class to non-SSE. Use GENERAL_REGS if possible. */
14826 if (MAYBE_SSE_CLASS_P (class))
14827 return (reg_class_subset_p (class, GENERAL_REGS)
14828 ? GENERAL_REGS : FLOAT_REGS);
14832 /* General regs can load everything. */
14833 if (reg_class_subset_p (class, GENERAL_REGS))
14834 return GENERAL_REGS;
14835 /* In case we haven't resolved FLOAT or SSE yet, give up. */
14836 if (MAYBE_FLOAT_CLASS_P (class) || MAYBE_SSE_CLASS_P (class))
14839 if (MAYBE_MMX_CLASS_P (class) && CONSTANT_P (x))
14841 if (GET_MODE (x) == QImode && ! reg_class_subset_p (class, Q_REGS))
14846 /* If we are copying between general and FP registers, we need a memory
14847 location. The same is true for SSE and MMX registers.
14849 The macro can't work reliably when one of the CLASSES is class containing
14850 registers from multiple units (SSE, MMX, integer). We avoid this by never
14851 combining those units in single alternative in the machine description.
14852 Ensure that this constraint holds to avoid unexpected surprises.
14854 When STRICT is false, we are being called from REGISTER_MOVE_COST, so do not
14855 enforce these sanity checks. */
14857 ix86_secondary_memory_needed (enum reg_class class1, enum reg_class class2,
14858 enum machine_mode mode, int strict)
14860 if (MAYBE_FLOAT_CLASS_P (class1) != FLOAT_CLASS_P (class1)
14861 || MAYBE_FLOAT_CLASS_P (class2) != FLOAT_CLASS_P (class2)
14862 || MAYBE_SSE_CLASS_P (class1) != SSE_CLASS_P (class1)
14863 || MAYBE_SSE_CLASS_P (class2) != SSE_CLASS_P (class2)
14864 || MAYBE_MMX_CLASS_P (class1) != MMX_CLASS_P (class1)
14865 || MAYBE_MMX_CLASS_P (class2) != MMX_CLASS_P (class2))
14872 return (FLOAT_CLASS_P (class1) != FLOAT_CLASS_P (class2)
14873 || ((SSE_CLASS_P (class1) != SSE_CLASS_P (class2)
14874 || MMX_CLASS_P (class1) != MMX_CLASS_P (class2))
14875 && ((mode != SImode && (mode != DImode || !TARGET_64BIT))
14876 || (!TARGET_INTER_UNIT_MOVES && !optimize_size))));
14878 /* Return the cost of moving data from a register in class CLASS1 to
14879 one in class CLASS2.
14881 It is not required that the cost always equal 2 when FROM is the same as TO;
14882 on some machines it is expensive to move between registers if they are not
14883 general registers. */
14885 ix86_register_move_cost (enum machine_mode mode, enum reg_class class1,
14886 enum reg_class class2)
14888 /* In case we require secondary memory, compute cost of the store followed
14889 by load. In order to avoid bad register allocation choices, we need
14890 for this to be *at least* as high as the symmetric MEMORY_MOVE_COST. */
14892 if (ix86_secondary_memory_needed (class1, class2, mode, 0))
14896 cost += MAX (MEMORY_MOVE_COST (mode, class1, 0),
14897 MEMORY_MOVE_COST (mode, class1, 1));
14898 cost += MAX (MEMORY_MOVE_COST (mode, class2, 0),
14899 MEMORY_MOVE_COST (mode, class2, 1));
14901 /* In case of copying from general_purpose_register we may emit multiple
14902 stores followed by single load causing memory size mismatch stall.
14903 Count this as arbitrarily high cost of 20. */
14904 if (CLASS_MAX_NREGS (class1, mode) > CLASS_MAX_NREGS (class2, mode))
14907 /* In the case of FP/MMX moves, the registers actually overlap, and we
14908 have to switch modes in order to treat them differently. */
14909 if ((MMX_CLASS_P (class1) && MAYBE_FLOAT_CLASS_P (class2))
14910 || (MMX_CLASS_P (class2) && MAYBE_FLOAT_CLASS_P (class1)))
14916 /* Moves between SSE/MMX and integer unit are expensive. */
14917 if (MMX_CLASS_P (class1) != MMX_CLASS_P (class2)
14918 || SSE_CLASS_P (class1) != SSE_CLASS_P (class2))
14919 return ix86_cost->mmxsse_to_integer;
14920 if (MAYBE_FLOAT_CLASS_P (class1))
14921 return ix86_cost->fp_move;
14922 if (MAYBE_SSE_CLASS_P (class1))
14923 return ix86_cost->sse_move;
14924 if (MAYBE_MMX_CLASS_P (class1))
14925 return ix86_cost->mmx_move;
14929 /* Return 1 if hard register REGNO can hold a value of machine-mode MODE. */
14931 ix86_hard_regno_mode_ok (int regno, enum machine_mode mode)
14933 /* Flags and only flags can only hold CCmode values. */
14934 if (CC_REGNO_P (regno))
14935 return GET_MODE_CLASS (mode) == MODE_CC;
14936 if (GET_MODE_CLASS (mode) == MODE_CC
14937 || GET_MODE_CLASS (mode) == MODE_RANDOM
14938 || GET_MODE_CLASS (mode) == MODE_PARTIAL_INT)
14940 if (FP_REGNO_P (regno))
14941 return VALID_FP_MODE_P (mode);
14942 if (SSE_REGNO_P (regno))
14944 /* HACK! We didn't change all of the constraints for SSE1 for the
14945 scalar modes on the branch. Fortunately, they're not required
14946 for ABI compatibility. */
14947 if (!TARGET_SSE2 && !VECTOR_MODE_P (mode))
14948 return VALID_SSE_REG_MODE (mode);
14950 /* We implement the move patterns for all vector modes into and
14951 out of SSE registers, even when no operation instructions
14953 return (VALID_SSE_REG_MODE (mode)
14954 || VALID_SSE2_REG_MODE (mode)
14955 || VALID_MMX_REG_MODE (mode)
14956 || VALID_MMX_REG_MODE_3DNOW (mode));
14958 if (MMX_REGNO_P (regno))
14960 /* We implement the move patterns for 3DNOW modes even in MMX mode,
14961 so if the register is available at all, then we can move data of
14962 the given mode into or out of it. */
14963 return (VALID_MMX_REG_MODE (mode)
14964 || VALID_MMX_REG_MODE_3DNOW (mode));
14966 /* We handle both integer and floats in the general purpose registers.
14967 In future we should be able to handle vector modes as well. */
14968 if (!VALID_INT_MODE_P (mode) && !VALID_FP_MODE_P (mode))
14970 /* Take care for QImode values - they can be in non-QI regs, but then
14971 they do cause partial register stalls. */
14972 if (regno < 4 || mode != QImode || TARGET_64BIT)
14974 return reload_in_progress || reload_completed || !TARGET_PARTIAL_REG_STALL;
14977 /* Return the cost of moving data of mode M between a
14978 register and memory. A value of 2 is the default; this cost is
14979 relative to those in `REGISTER_MOVE_COST'.
14981 If moving between registers and memory is more expensive than
14982 between two registers, you should define this macro to express the
14985 Model also increased moving costs of QImode registers in non
14989 ix86_memory_move_cost (enum machine_mode mode, enum reg_class class, int in)
14991 if (FLOAT_CLASS_P (class))
15008 return in ? ix86_cost->fp_load [index] : ix86_cost->fp_store [index];
15010 if (SSE_CLASS_P (class))
15013 switch (GET_MODE_SIZE (mode))
15027 return in ? ix86_cost->sse_load [index] : ix86_cost->sse_store [index];
15029 if (MMX_CLASS_P (class))
15032 switch (GET_MODE_SIZE (mode))
15043 return in ? ix86_cost->mmx_load [index] : ix86_cost->mmx_store [index];
15045 switch (GET_MODE_SIZE (mode))
15049 return (Q_CLASS_P (class) ? ix86_cost->int_load[0]
15050 : ix86_cost->movzbl_load);
15052 return (Q_CLASS_P (class) ? ix86_cost->int_store[0]
15053 : ix86_cost->int_store[0] + 4);
15056 return in ? ix86_cost->int_load[1] : ix86_cost->int_store[1];
15058 /* Compute number of 32bit moves needed. TFmode is moved as XFmode. */
15059 if (mode == TFmode)
15061 return ((in ? ix86_cost->int_load[2] : ix86_cost->int_store[2])
15062 * (((int) GET_MODE_SIZE (mode)
15063 + UNITS_PER_WORD - 1) / UNITS_PER_WORD));
15067 /* Compute a (partial) cost for rtx X. Return true if the complete
15068 cost has been computed, and false if subexpressions should be
15069 scanned. In either case, *TOTAL contains the cost result. */
15072 ix86_rtx_costs (rtx x, int code, int outer_code, int *total)
15074 enum machine_mode mode = GET_MODE (x);
15082 if (TARGET_64BIT && !x86_64_sign_extended_value (x))
15084 else if (TARGET_64BIT && !x86_64_zero_extended_value (x))
15086 else if (flag_pic && SYMBOLIC_CONST (x)
15088 || (!GET_CODE (x) != LABEL_REF
15089 && (GET_CODE (x) != SYMBOL_REF
15090 || !SYMBOL_REF_LOCAL_P (x)))))
15097 if (mode == VOIDmode)
15100 switch (standard_80387_constant_p (x))
15105 default: /* Other constants */
15110 /* Start with (MEM (SYMBOL_REF)), since that's where
15111 it'll probably end up. Add a penalty for size. */
15112 *total = (COSTS_N_INSNS (1)
15113 + (flag_pic != 0 && !TARGET_64BIT)
15114 + (mode == SFmode ? 0 : mode == DFmode ? 1 : 2));
15120 /* The zero extensions is often completely free on x86_64, so make
15121 it as cheap as possible. */
15122 if (TARGET_64BIT && mode == DImode
15123 && GET_MODE (XEXP (x, 0)) == SImode)
15125 else if (TARGET_ZERO_EXTEND_WITH_AND)
15126 *total = COSTS_N_INSNS (ix86_cost->add);
15128 *total = COSTS_N_INSNS (ix86_cost->movzx);
15132 *total = COSTS_N_INSNS (ix86_cost->movsx);
15136 if (GET_CODE (XEXP (x, 1)) == CONST_INT
15137 && (GET_MODE (XEXP (x, 0)) != DImode || TARGET_64BIT))
15139 HOST_WIDE_INT value = INTVAL (XEXP (x, 1));
15142 *total = COSTS_N_INSNS (ix86_cost->add);
15145 if ((value == 2 || value == 3)
15146 && !TARGET_DECOMPOSE_LEA
15147 && ix86_cost->lea <= ix86_cost->shift_const)
15149 *total = COSTS_N_INSNS (ix86_cost->lea);
15159 if (!TARGET_64BIT && GET_MODE (XEXP (x, 0)) == DImode)
15161 if (GET_CODE (XEXP (x, 1)) == CONST_INT)
15163 if (INTVAL (XEXP (x, 1)) > 32)
15164 *total = COSTS_N_INSNS(ix86_cost->shift_const + 2);
15166 *total = COSTS_N_INSNS(ix86_cost->shift_const * 2);
15170 if (GET_CODE (XEXP (x, 1)) == AND)
15171 *total = COSTS_N_INSNS(ix86_cost->shift_var * 2);
15173 *total = COSTS_N_INSNS(ix86_cost->shift_var * 6 + 2);
15178 if (GET_CODE (XEXP (x, 1)) == CONST_INT)
15179 *total = COSTS_N_INSNS (ix86_cost->shift_const);
15181 *total = COSTS_N_INSNS (ix86_cost->shift_var);
15186 if (FLOAT_MODE_P (mode))
15187 *total = COSTS_N_INSNS (ix86_cost->fmul);
15188 else if (GET_CODE (XEXP (x, 1)) == CONST_INT)
15190 unsigned HOST_WIDE_INT value = INTVAL (XEXP (x, 1));
15193 for (nbits = 0; value != 0; value >>= 1)
15196 *total = COSTS_N_INSNS (ix86_cost->mult_init[MODE_INDEX (mode)]
15197 + nbits * ix86_cost->mult_bit);
15201 /* This is arbitrary */
15202 *total = COSTS_N_INSNS (ix86_cost->mult_init[MODE_INDEX (mode)]
15203 + 7 * ix86_cost->mult_bit);
15211 if (FLOAT_MODE_P (mode))
15212 *total = COSTS_N_INSNS (ix86_cost->fdiv);
15214 *total = COSTS_N_INSNS (ix86_cost->divide[MODE_INDEX (mode)]);
15218 if (FLOAT_MODE_P (mode))
15219 *total = COSTS_N_INSNS (ix86_cost->fadd);
15220 else if (!TARGET_DECOMPOSE_LEA
15221 && GET_MODE_CLASS (mode) == MODE_INT
15222 && GET_MODE_BITSIZE (mode) <= GET_MODE_BITSIZE (Pmode))
15224 if (GET_CODE (XEXP (x, 0)) == PLUS
15225 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
15226 && GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 1)) == CONST_INT
15227 && CONSTANT_P (XEXP (x, 1)))
15229 HOST_WIDE_INT val = INTVAL (XEXP (XEXP (XEXP (x, 0), 0), 1));
15230 if (val == 2 || val == 4 || val == 8)
15232 *total = COSTS_N_INSNS (ix86_cost->lea);
15233 *total += rtx_cost (XEXP (XEXP (x, 0), 1), outer_code);
15234 *total += rtx_cost (XEXP (XEXP (XEXP (x, 0), 0), 0),
15236 *total += rtx_cost (XEXP (x, 1), outer_code);
15240 else if (GET_CODE (XEXP (x, 0)) == MULT
15241 && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT)
15243 HOST_WIDE_INT val = INTVAL (XEXP (XEXP (x, 0), 1));
15244 if (val == 2 || val == 4 || val == 8)
15246 *total = COSTS_N_INSNS (ix86_cost->lea);
15247 *total += rtx_cost (XEXP (XEXP (x, 0), 0), outer_code);
15248 *total += rtx_cost (XEXP (x, 1), outer_code);
15252 else if (GET_CODE (XEXP (x, 0)) == PLUS)
15254 *total = COSTS_N_INSNS (ix86_cost->lea);
15255 *total += rtx_cost (XEXP (XEXP (x, 0), 0), outer_code);
15256 *total += rtx_cost (XEXP (XEXP (x, 0), 1), outer_code);
15257 *total += rtx_cost (XEXP (x, 1), outer_code);
15264 if (FLOAT_MODE_P (mode))
15266 *total = COSTS_N_INSNS (ix86_cost->fadd);
15274 if (!TARGET_64BIT && mode == DImode)
15276 *total = (COSTS_N_INSNS (ix86_cost->add) * 2
15277 + (rtx_cost (XEXP (x, 0), outer_code)
15278 << (GET_MODE (XEXP (x, 0)) != DImode))
15279 + (rtx_cost (XEXP (x, 1), outer_code)
15280 << (GET_MODE (XEXP (x, 1)) != DImode)));
15286 if (FLOAT_MODE_P (mode))
15288 *total = COSTS_N_INSNS (ix86_cost->fchs);
15294 if (!TARGET_64BIT && mode == DImode)
15295 *total = COSTS_N_INSNS (ix86_cost->add * 2);
15297 *total = COSTS_N_INSNS (ix86_cost->add);
15301 if (!TARGET_SSE_MATH
15303 || (mode == DFmode && !TARGET_SSE2))
15308 if (FLOAT_MODE_P (mode))
15309 *total = COSTS_N_INSNS (ix86_cost->fabs);
15313 if (FLOAT_MODE_P (mode))
15314 *total = COSTS_N_INSNS (ix86_cost->fsqrt);
15318 if (XINT (x, 1) == UNSPEC_TP)
15327 #if defined (DO_GLOBAL_CTORS_BODY) && defined (HAS_INIT_SECTION)
15329 ix86_svr3_asm_out_constructor (rtx symbol, int priority ATTRIBUTE_UNUSED)
15332 fputs ("\tpushl $", asm_out_file);
15333 assemble_name (asm_out_file, XSTR (symbol, 0));
15334 fputc ('\n', asm_out_file);
15340 static int current_machopic_label_num;
15342 /* Given a symbol name and its associated stub, write out the
15343 definition of the stub. */
15346 machopic_output_stub (FILE *file, const char *symb, const char *stub)
15348 unsigned int length;
15349 char *binder_name, *symbol_name, lazy_ptr_name[32];
15350 int label = ++current_machopic_label_num;
15352 /* Lose our funky encoding stuff so it doesn't contaminate the stub. */
15353 symb = (*targetm.strip_name_encoding) (symb);
15355 length = strlen (stub);
15356 binder_name = alloca (length + 32);
15357 GEN_BINDER_NAME_FOR_STUB (binder_name, stub, length);
15359 length = strlen (symb);
15360 symbol_name = alloca (length + 32);
15361 GEN_SYMBOL_NAME_FOR_SYMBOL (symbol_name, symb, length);
15363 sprintf (lazy_ptr_name, "L%d$lz", label);
15366 machopic_picsymbol_stub_section ();
15368 machopic_symbol_stub_section ();
15370 fprintf (file, "%s:\n", stub);
15371 fprintf (file, "\t.indirect_symbol %s\n", symbol_name);
15375 fprintf (file, "\tcall LPC$%d\nLPC$%d:\tpopl %%eax\n", label, label);
15376 fprintf (file, "\tmovl %s-LPC$%d(%%eax),%%edx\n", lazy_ptr_name, label);
15377 fprintf (file, "\tjmp %%edx\n");
15380 fprintf (file, "\tjmp *%s\n", lazy_ptr_name);
15382 fprintf (file, "%s:\n", binder_name);
15386 fprintf (file, "\tlea %s-LPC$%d(%%eax),%%eax\n", lazy_ptr_name, label);
15387 fprintf (file, "\tpushl %%eax\n");
15390 fprintf (file, "\t pushl $%s\n", lazy_ptr_name);
15392 fprintf (file, "\tjmp dyld_stub_binding_helper\n");
15394 machopic_lazy_symbol_ptr_section ();
15395 fprintf (file, "%s:\n", lazy_ptr_name);
15396 fprintf (file, "\t.indirect_symbol %s\n", symbol_name);
15397 fprintf (file, "\t.long %s\n", binder_name);
15399 #endif /* TARGET_MACHO */
15401 /* Order the registers for register allocator. */
15404 x86_order_regs_for_local_alloc (void)
15409 /* First allocate the local general purpose registers. */
15410 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
15411 if (GENERAL_REGNO_P (i) && call_used_regs[i])
15412 reg_alloc_order [pos++] = i;
15414 /* Global general purpose registers. */
15415 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
15416 if (GENERAL_REGNO_P (i) && !call_used_regs[i])
15417 reg_alloc_order [pos++] = i;
15419 /* x87 registers come first in case we are doing FP math
15421 if (!TARGET_SSE_MATH)
15422 for (i = FIRST_STACK_REG; i <= LAST_STACK_REG; i++)
15423 reg_alloc_order [pos++] = i;
15425 /* SSE registers. */
15426 for (i = FIRST_SSE_REG; i <= LAST_SSE_REG; i++)
15427 reg_alloc_order [pos++] = i;
15428 for (i = FIRST_REX_SSE_REG; i <= LAST_REX_SSE_REG; i++)
15429 reg_alloc_order [pos++] = i;
15431 /* x87 registers. */
15432 if (TARGET_SSE_MATH)
15433 for (i = FIRST_STACK_REG; i <= LAST_STACK_REG; i++)
15434 reg_alloc_order [pos++] = i;
15436 for (i = FIRST_MMX_REG; i <= LAST_MMX_REG; i++)
15437 reg_alloc_order [pos++] = i;
15439 /* Initialize the rest of array as we do not allocate some registers
15441 while (pos < FIRST_PSEUDO_REGISTER)
15442 reg_alloc_order [pos++] = 0;
15445 #ifndef TARGET_USE_MS_BITFIELD_LAYOUT
15446 #define TARGET_USE_MS_BITFIELD_LAYOUT 0
15449 /* Handle a "ms_struct" or "gcc_struct" attribute; arguments as in
15450 struct attribute_spec.handler. */
15452 ix86_handle_struct_attribute (tree *node, tree name,
15453 tree args ATTRIBUTE_UNUSED,
15454 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
15457 if (DECL_P (*node))
15459 if (TREE_CODE (*node) == TYPE_DECL)
15460 type = &TREE_TYPE (*node);
15465 if (!(type && (TREE_CODE (*type) == RECORD_TYPE
15466 || TREE_CODE (*type) == UNION_TYPE)))
15468 warning ("`%s' attribute ignored", IDENTIFIER_POINTER (name));
15469 *no_add_attrs = true;
15472 else if ((is_attribute_p ("ms_struct", name)
15473 && lookup_attribute ("gcc_struct", TYPE_ATTRIBUTES (*type)))
15474 || ((is_attribute_p ("gcc_struct", name)
15475 && lookup_attribute ("ms_struct", TYPE_ATTRIBUTES (*type)))))
15477 warning ("`%s' incompatible attribute ignored",
15478 IDENTIFIER_POINTER (name));
15479 *no_add_attrs = true;
15486 ix86_ms_bitfield_layout_p (tree record_type)
15488 return (TARGET_USE_MS_BITFIELD_LAYOUT &&
15489 !lookup_attribute ("gcc_struct", TYPE_ATTRIBUTES (record_type)))
15490 || lookup_attribute ("ms_struct", TYPE_ATTRIBUTES (record_type));
15493 /* Returns an expression indicating where the this parameter is
15494 located on entry to the FUNCTION. */
15497 x86_this_parameter (tree function)
15499 tree type = TREE_TYPE (function);
15503 int n = aggregate_value_p (TREE_TYPE (type), type) != 0;
15504 return gen_rtx_REG (DImode, x86_64_int_parameter_registers[n]);
15507 if (ix86_function_regparm (type, function) > 0)
15511 parm = TYPE_ARG_TYPES (type);
15512 /* Figure out whether or not the function has a variable number of
15514 for (; parm; parm = TREE_CHAIN (parm))
15515 if (TREE_VALUE (parm) == void_type_node)
15517 /* If not, the this parameter is in the first argument. */
15521 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type)))
15523 return gen_rtx_REG (SImode, regno);
15527 if (aggregate_value_p (TREE_TYPE (type), type))
15528 return gen_rtx_MEM (SImode, plus_constant (stack_pointer_rtx, 8));
15530 return gen_rtx_MEM (SImode, plus_constant (stack_pointer_rtx, 4));
15533 /* Determine whether x86_output_mi_thunk can succeed. */
15536 x86_can_output_mi_thunk (tree thunk ATTRIBUTE_UNUSED,
15537 HOST_WIDE_INT delta ATTRIBUTE_UNUSED,
15538 HOST_WIDE_INT vcall_offset, tree function)
15540 /* 64-bit can handle anything. */
15544 /* For 32-bit, everything's fine if we have one free register. */
15545 if (ix86_function_regparm (TREE_TYPE (function), function) < 3)
15548 /* Need a free register for vcall_offset. */
15552 /* Need a free register for GOT references. */
15553 if (flag_pic && !(*targetm.binds_local_p) (function))
15556 /* Otherwise ok. */
15560 /* Output the assembler code for a thunk function. THUNK_DECL is the
15561 declaration for the thunk function itself, FUNCTION is the decl for
15562 the target function. DELTA is an immediate constant offset to be
15563 added to THIS. If VCALL_OFFSET is nonzero, the word at
15564 *(*this + vcall_offset) should be added to THIS. */
15567 x86_output_mi_thunk (FILE *file ATTRIBUTE_UNUSED,
15568 tree thunk ATTRIBUTE_UNUSED, HOST_WIDE_INT delta,
15569 HOST_WIDE_INT vcall_offset, tree function)
15572 rtx this = x86_this_parameter (function);
15575 /* If VCALL_OFFSET, we'll need THIS in a register. Might as well
15576 pull it in now and let DELTA benefit. */
15579 else if (vcall_offset)
15581 /* Put the this parameter into %eax. */
15583 xops[1] = this_reg = gen_rtx_REG (Pmode, 0);
15584 output_asm_insn ("mov{l}\t{%0, %1|%1, %0}", xops);
15587 this_reg = NULL_RTX;
15589 /* Adjust the this parameter by a fixed constant. */
15592 xops[0] = GEN_INT (delta);
15593 xops[1] = this_reg ? this_reg : this;
15596 if (!x86_64_general_operand (xops[0], DImode))
15598 tmp = gen_rtx_REG (DImode, FIRST_REX_INT_REG + 2 /* R10 */);
15600 output_asm_insn ("mov{q}\t{%1, %0|%0, %1}", xops);
15604 output_asm_insn ("add{q}\t{%0, %1|%1, %0}", xops);
15607 output_asm_insn ("add{l}\t{%0, %1|%1, %0}", xops);
15610 /* Adjust the this parameter by a value stored in the vtable. */
15614 tmp = gen_rtx_REG (DImode, FIRST_REX_INT_REG + 2 /* R10 */);
15617 int tmp_regno = 2 /* ECX */;
15618 if (lookup_attribute ("fastcall",
15619 TYPE_ATTRIBUTES (TREE_TYPE (function))))
15620 tmp_regno = 0 /* EAX */;
15621 tmp = gen_rtx_REG (SImode, tmp_regno);
15624 xops[0] = gen_rtx_MEM (Pmode, this_reg);
15627 output_asm_insn ("mov{q}\t{%0, %1|%1, %0}", xops);
15629 output_asm_insn ("mov{l}\t{%0, %1|%1, %0}", xops);
15631 /* Adjust the this parameter. */
15632 xops[0] = gen_rtx_MEM (Pmode, plus_constant (tmp, vcall_offset));
15633 if (TARGET_64BIT && !memory_operand (xops[0], Pmode))
15635 rtx tmp2 = gen_rtx_REG (DImode, FIRST_REX_INT_REG + 3 /* R11 */);
15636 xops[0] = GEN_INT (vcall_offset);
15638 output_asm_insn ("mov{q}\t{%0, %1|%1, %0}", xops);
15639 xops[0] = gen_rtx_MEM (Pmode, gen_rtx_PLUS (Pmode, tmp, tmp2));
15641 xops[1] = this_reg;
15643 output_asm_insn ("add{q}\t{%0, %1|%1, %0}", xops);
15645 output_asm_insn ("add{l}\t{%0, %1|%1, %0}", xops);
15648 /* If necessary, drop THIS back to its stack slot. */
15649 if (this_reg && this_reg != this)
15651 xops[0] = this_reg;
15653 output_asm_insn ("mov{l}\t{%0, %1|%1, %0}", xops);
15656 xops[0] = XEXP (DECL_RTL (function), 0);
15659 if (!flag_pic || (*targetm.binds_local_p) (function))
15660 output_asm_insn ("jmp\t%P0", xops);
15663 tmp = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, xops[0]), UNSPEC_GOTPCREL);
15664 tmp = gen_rtx_CONST (Pmode, tmp);
15665 tmp = gen_rtx_MEM (QImode, tmp);
15667 output_asm_insn ("jmp\t%A0", xops);
15672 if (!flag_pic || (*targetm.binds_local_p) (function))
15673 output_asm_insn ("jmp\t%P0", xops);
15678 const char *ip = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (function));
15679 tmp = gen_rtx_SYMBOL_REF (Pmode, machopic_stub_name (ip));
15680 tmp = gen_rtx_MEM (QImode, tmp);
15682 output_asm_insn ("jmp\t%0", xops);
15685 #endif /* TARGET_MACHO */
15687 tmp = gen_rtx_REG (SImode, 2 /* ECX */);
15688 output_set_got (tmp);
15691 output_asm_insn ("mov{l}\t{%0@GOT(%1), %1|%1, %0@GOT[%1]}", xops);
15692 output_asm_insn ("jmp\t{*}%1", xops);
15698 x86_file_start (void)
15700 default_file_start ();
15701 if (X86_FILE_START_VERSION_DIRECTIVE)
15702 fputs ("\t.version\t\"01.01\"\n", asm_out_file);
15703 if (X86_FILE_START_FLTUSED)
15704 fputs ("\t.global\t__fltused\n", asm_out_file);
15705 if (ix86_asm_dialect == ASM_INTEL)
15706 fputs ("\t.intel_syntax\n", asm_out_file);
15710 x86_field_alignment (tree field, int computed)
15712 enum machine_mode mode;
15713 tree type = TREE_TYPE (field);
15715 if (TARGET_64BIT || TARGET_ALIGN_DOUBLE)
15717 mode = TYPE_MODE (TREE_CODE (type) == ARRAY_TYPE
15718 ? get_inner_array_type (type) : type);
15719 if (mode == DFmode || mode == DCmode
15720 || GET_MODE_CLASS (mode) == MODE_INT
15721 || GET_MODE_CLASS (mode) == MODE_COMPLEX_INT)
15722 return MIN (32, computed);
15726 /* Output assembler code to FILE to increment profiler label # LABELNO
15727 for profiling a function entry. */
15729 x86_function_profiler (FILE *file, int labelno ATTRIBUTE_UNUSED)
15734 #ifndef NO_PROFILE_COUNTERS
15735 fprintf (file, "\tleaq\t%sP%d@(%%rip),%%r11\n", LPREFIX, labelno);
15737 fprintf (file, "\tcall\t*%s@GOTPCREL(%%rip)\n", MCOUNT_NAME);
15741 #ifndef NO_PROFILE_COUNTERS
15742 fprintf (file, "\tmovq\t$%sP%d,%%r11\n", LPREFIX, labelno);
15744 fprintf (file, "\tcall\t%s\n", MCOUNT_NAME);
15748 #ifndef NO_PROFILE_COUNTERS
15749 fprintf (file, "\tleal\t%sP%d@GOTOFF(%%ebx),%%%s\n",
15750 LPREFIX, labelno, PROFILE_COUNT_REGISTER);
15752 fprintf (file, "\tcall\t*%s@GOT(%%ebx)\n", MCOUNT_NAME);
15756 #ifndef NO_PROFILE_COUNTERS
15757 fprintf (file, "\tmovl\t$%sP%d,%%%s\n", LPREFIX, labelno,
15758 PROFILE_COUNT_REGISTER);
15760 fprintf (file, "\tcall\t%s\n", MCOUNT_NAME);
15764 /* We don't have exact information about the insn sizes, but we may assume
15765 quite safely that we are informed about all 1 byte insns and memory
15766 address sizes. This is enough to eliminate unnecessary padding in
15770 min_insn_size (rtx insn)
15774 if (!INSN_P (insn) || !active_insn_p (insn))
15777 /* Discard alignments we've emit and jump instructions. */
15778 if (GET_CODE (PATTERN (insn)) == UNSPEC_VOLATILE
15779 && XINT (PATTERN (insn), 1) == UNSPECV_ALIGN)
15781 if (GET_CODE (insn) == JUMP_INSN
15782 && (GET_CODE (PATTERN (insn)) == ADDR_VEC
15783 || GET_CODE (PATTERN (insn)) == ADDR_DIFF_VEC))
15786 /* Important case - calls are always 5 bytes.
15787 It is common to have many calls in the row. */
15788 if (GET_CODE (insn) == CALL_INSN
15789 && symbolic_reference_mentioned_p (PATTERN (insn))
15790 && !SIBLING_CALL_P (insn))
15792 if (get_attr_length (insn) <= 1)
15795 /* For normal instructions we may rely on the sizes of addresses
15796 and the presence of symbol to require 4 bytes of encoding.
15797 This is not the case for jumps where references are PC relative. */
15798 if (GET_CODE (insn) != JUMP_INSN)
15800 l = get_attr_length_address (insn);
15801 if (l < 4 && symbolic_reference_mentioned_p (PATTERN (insn)))
15810 /* AMD K8 core mispredicts jumps when there are more than 3 jumps in 16 byte
15814 k8_avoid_jump_misspredicts (void)
15816 rtx insn, start = get_insns ();
15817 int nbytes = 0, njumps = 0;
15820 /* Look for all minimal intervals of instructions containing 4 jumps.
15821 The intervals are bounded by START and INSN. NBYTES is the total
15822 size of instructions in the interval including INSN and not including
15823 START. When the NBYTES is smaller than 16 bytes, it is possible
15824 that the end of START and INSN ends up in the same 16byte page.
15826 The smallest offset in the page INSN can start is the case where START
15827 ends on the offset 0. Offset of INSN is then NBYTES - sizeof (INSN).
15828 We add p2align to 16byte window with maxskip 17 - NBYTES + sizeof (INSN).
15830 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
15833 nbytes += min_insn_size (insn);
15835 fprintf(rtl_dump_file, "Insn %i estimated to %i bytes\n",
15836 INSN_UID (insn), min_insn_size (insn));
15837 if ((GET_CODE (insn) == JUMP_INSN
15838 && GET_CODE (PATTERN (insn)) != ADDR_VEC
15839 && GET_CODE (PATTERN (insn)) != ADDR_DIFF_VEC)
15840 || GET_CODE (insn) == CALL_INSN)
15847 start = NEXT_INSN (start);
15848 if ((GET_CODE (start) == JUMP_INSN
15849 && GET_CODE (PATTERN (start)) != ADDR_VEC
15850 && GET_CODE (PATTERN (start)) != ADDR_DIFF_VEC)
15851 || GET_CODE (start) == CALL_INSN)
15852 njumps--, isjump = 1;
15855 nbytes -= min_insn_size (start);
15860 fprintf(rtl_dump_file, "Interval %i to %i has %i bytes\n",
15861 INSN_UID (start), INSN_UID (insn), nbytes);
15863 if (njumps == 3 && isjump && nbytes < 16)
15865 int padsize = 15 - nbytes + min_insn_size (insn);
15868 fprintf (rtl_dump_file, "Padding insn %i by %i bytes!\n", INSN_UID (insn), padsize);
15869 emit_insn_before (gen_align (GEN_INT (padsize)), insn);
15874 /* Implement machine specific optimizations.
15875 At the moment we implement single transformation: AMD Athlon works faster
15876 when RET is not destination of conditional jump or directly preceded
15877 by other jump instruction. We avoid the penalty by inserting NOP just
15878 before the RET instructions in such cases. */
15884 if (!TARGET_ATHLON_K8 || !optimize || optimize_size)
15886 for (e = EXIT_BLOCK_PTR->pred; e; e = e->pred_next)
15888 basic_block bb = e->src;
15889 rtx ret = BB_END (bb);
15891 bool replace = false;
15893 if (GET_CODE (ret) != JUMP_INSN || GET_CODE (PATTERN (ret)) != RETURN
15894 || !maybe_hot_bb_p (bb))
15896 for (prev = PREV_INSN (ret); prev; prev = PREV_INSN (prev))
15897 if (active_insn_p (prev) || GET_CODE (prev) == CODE_LABEL)
15899 if (prev && GET_CODE (prev) == CODE_LABEL)
15902 for (e = bb->pred; e; e = e->pred_next)
15903 if (EDGE_FREQUENCY (e) && e->src->index >= 0
15904 && !(e->flags & EDGE_FALLTHRU))
15909 prev = prev_active_insn (ret);
15911 && ((GET_CODE (prev) == JUMP_INSN && any_condjump_p (prev))
15912 || GET_CODE (prev) == CALL_INSN))
15914 /* Empty functions get branch mispredict even when the jump destination
15915 is not visible to us. */
15916 if (!prev && cfun->function_frequency > FUNCTION_FREQUENCY_UNLIKELY_EXECUTED)
15921 emit_insn_before (gen_return_internal_long (), ret);
15925 k8_avoid_jump_misspredicts ();
15928 /* Return nonzero when QImode register that must be represented via REX prefix
15931 x86_extended_QIreg_mentioned_p (rtx insn)
15934 extract_insn_cached (insn);
15935 for (i = 0; i < recog_data.n_operands; i++)
15936 if (REG_P (recog_data.operand[i])
15937 && REGNO (recog_data.operand[i]) >= 4)
15942 /* Return nonzero when P points to register encoded via REX prefix.
15943 Called via for_each_rtx. */
15945 extended_reg_mentioned_1 (rtx *p, void *data ATTRIBUTE_UNUSED)
15947 unsigned int regno;
15950 regno = REGNO (*p);
15951 return REX_INT_REGNO_P (regno) || REX_SSE_REGNO_P (regno);
15954 /* Return true when INSN mentions register that must be encoded using REX
15957 x86_extended_reg_mentioned_p (rtx insn)
15959 return for_each_rtx (&PATTERN (insn), extended_reg_mentioned_1, NULL);
15962 /* Generate an unsigned DImode/SImode to FP conversion. This is the same code
15963 optabs would emit if we didn't have TFmode patterns. */
15966 x86_emit_floatuns (rtx operands[2])
15968 rtx neglab, donelab, i0, i1, f0, in, out;
15969 enum machine_mode mode, inmode;
15971 inmode = GET_MODE (operands[1]);
15972 if (inmode != SImode
15973 && inmode != DImode)
15977 in = force_reg (inmode, operands[1]);
15978 mode = GET_MODE (out);
15979 neglab = gen_label_rtx ();
15980 donelab = gen_label_rtx ();
15981 i1 = gen_reg_rtx (Pmode);
15982 f0 = gen_reg_rtx (mode);
15984 emit_cmp_and_jump_insns (in, const0_rtx, LT, const0_rtx, Pmode, 0, neglab);
15986 emit_insn (gen_rtx_SET (VOIDmode, out, gen_rtx_FLOAT (mode, in)));
15987 emit_jump_insn (gen_jump (donelab));
15990 emit_label (neglab);
15992 i0 = expand_simple_binop (Pmode, LSHIFTRT, in, const1_rtx, NULL, 1, OPTAB_DIRECT);
15993 i1 = expand_simple_binop (Pmode, AND, in, const1_rtx, NULL, 1, OPTAB_DIRECT);
15994 i0 = expand_simple_binop (Pmode, IOR, i0, i1, i0, 1, OPTAB_DIRECT);
15995 expand_float (f0, i0, 0);
15996 emit_insn (gen_rtx_SET (VOIDmode, out, gen_rtx_PLUS (mode, f0, f0)));
15998 emit_label (donelab);
16001 /* Return if we do not know how to pass TYPE solely in registers. */
16003 ix86_must_pass_in_stack (enum machine_mode mode, tree type)
16005 if (default_must_pass_in_stack (mode, type))
16007 return (!TARGET_64BIT && type && mode == TImode);
16010 /* Initialize vector TARGET via VALS. */
16012 ix86_expand_vector_init (rtx target, rtx vals)
16014 enum machine_mode mode = GET_MODE (target);
16015 int elt_size = GET_MODE_SIZE (GET_MODE_INNER (mode));
16016 int n_elts = (GET_MODE_SIZE (mode) / elt_size);
16019 for (i = n_elts - 1; i >= 0; i--)
16020 if (GET_CODE (XVECEXP (vals, 0, i)) != CONST_INT
16021 && GET_CODE (XVECEXP (vals, 0, i)) != CONST_DOUBLE)
16024 /* Few special cases first...
16025 ... constants are best loaded from constant pool. */
16028 emit_move_insn (target, gen_rtx_CONST_VECTOR (mode, XVEC (vals, 0)));
16032 /* ... values where only first field is non-constant are best loaded
16033 from the pool and overwriten via move later. */
16036 rtx op = simplify_gen_subreg (mode, XVECEXP (vals, 0, 0),
16037 GET_MODE_INNER (mode), 0);
16039 op = force_reg (mode, op);
16040 XVECEXP (vals, 0, 0) = CONST0_RTX (GET_MODE_INNER (mode));
16041 emit_move_insn (target, gen_rtx_CONST_VECTOR (mode, XVEC (vals, 0)));
16042 switch (GET_MODE (target))
16045 emit_insn (gen_sse2_movsd (target, target, op));
16048 emit_insn (gen_sse_movss (target, target, op));
16056 /* And the busy sequence doing rotations. */
16057 switch (GET_MODE (target))
16062 simplify_gen_subreg (V2DFmode, XVECEXP (vals, 0, 0), DFmode, 0);
16064 simplify_gen_subreg (V2DFmode, XVECEXP (vals, 0, 1), DFmode, 0);
16066 vecop0 = force_reg (V2DFmode, vecop0);
16067 vecop1 = force_reg (V2DFmode, vecop1);
16068 emit_insn (gen_sse2_unpcklpd (target, vecop0, vecop1));
16074 simplify_gen_subreg (V4SFmode, XVECEXP (vals, 0, 0), SFmode, 0);
16076 simplify_gen_subreg (V4SFmode, XVECEXP (vals, 0, 1), SFmode, 0);
16078 simplify_gen_subreg (V4SFmode, XVECEXP (vals, 0, 2), SFmode, 0);
16080 simplify_gen_subreg (V4SFmode, XVECEXP (vals, 0, 3), SFmode, 0);
16081 rtx tmp1 = gen_reg_rtx (V4SFmode);
16082 rtx tmp2 = gen_reg_rtx (V4SFmode);
16084 vecop0 = force_reg (V4SFmode, vecop0);
16085 vecop1 = force_reg (V4SFmode, vecop1);
16086 vecop2 = force_reg (V4SFmode, vecop2);
16087 vecop3 = force_reg (V4SFmode, vecop3);
16088 emit_insn (gen_sse_unpcklps (tmp1, vecop1, vecop3));
16089 emit_insn (gen_sse_unpcklps (tmp2, vecop0, vecop2));
16090 emit_insn (gen_sse_unpcklps (target, tmp2, tmp1));
16098 #include "gt-i386.h"