1 /* Subroutines used for code generation on IA-32.
2 Copyright (C) 1988, 1992, 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001,
3 2002, 2003, 2004, 2005, 2006, 2007 Free Software Foundation, Inc.
5 This file is part of GCC.
7 GCC is free software; you can redistribute it and/or modify
8 it under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 2, or (at your option)
12 GCC is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 GNU General Public License for more details.
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING. If not, write to
19 the Free Software Foundation, 51 Franklin Street, Fifth Floor,
20 Boston, MA 02110-1301, USA. */
26 #include "coretypes.h"
32 #include "hard-reg-set.h"
34 #include "insn-config.h"
35 #include "conditions.h"
37 #include "insn-codes.h"
38 #include "insn-attr.h"
46 #include "basic-block.h"
49 #include "target-def.h"
50 #include "langhooks.h"
52 #include "tree-gimple.h"
54 #include "tm-constrs.h"
56 #ifndef CHECK_STACK_LIMIT
57 #define CHECK_STACK_LIMIT (-1)
60 /* Return index of given mode in mult and division cost tables. */
61 #define MODE_INDEX(mode) \
62 ((mode) == QImode ? 0 \
63 : (mode) == HImode ? 1 \
64 : (mode) == SImode ? 2 \
65 : (mode) == DImode ? 3 \
68 /* Processor costs (relative to an add) */
69 /* We assume COSTS_N_INSNS is defined as (N)*4 and an addition is 2 bytes. */
70 #define COSTS_N_BYTES(N) ((N) * 2)
73 struct processor_costs size_cost = { /* costs for tuning for size */
74 COSTS_N_BYTES (2), /* cost of an add instruction */
75 COSTS_N_BYTES (3), /* cost of a lea instruction */
76 COSTS_N_BYTES (2), /* variable shift costs */
77 COSTS_N_BYTES (3), /* constant shift costs */
78 {COSTS_N_BYTES (3), /* cost of starting multiply for QI */
79 COSTS_N_BYTES (3), /* HI */
80 COSTS_N_BYTES (3), /* SI */
81 COSTS_N_BYTES (3), /* DI */
82 COSTS_N_BYTES (5)}, /* other */
83 0, /* cost of multiply per each bit set */
84 {COSTS_N_BYTES (3), /* cost of a divide/mod for QI */
85 COSTS_N_BYTES (3), /* HI */
86 COSTS_N_BYTES (3), /* SI */
87 COSTS_N_BYTES (3), /* DI */
88 COSTS_N_BYTES (5)}, /* other */
89 COSTS_N_BYTES (3), /* cost of movsx */
90 COSTS_N_BYTES (3), /* cost of movzx */
93 2, /* cost for loading QImode using movzbl */
94 {2, 2, 2}, /* cost of loading integer registers
95 in QImode, HImode and SImode.
96 Relative to reg-reg move (2). */
97 {2, 2, 2}, /* cost of storing integer registers */
98 2, /* cost of reg,reg fld/fst */
99 {2, 2, 2}, /* cost of loading fp registers
100 in SFmode, DFmode and XFmode */
101 {2, 2, 2}, /* cost of storing fp registers
102 in SFmode, DFmode and XFmode */
103 3, /* cost of moving MMX register */
104 {3, 3}, /* cost of loading MMX registers
105 in SImode and DImode */
106 {3, 3}, /* cost of storing MMX registers
107 in SImode and DImode */
108 3, /* cost of moving SSE register */
109 {3, 3, 3}, /* cost of loading SSE registers
110 in SImode, DImode and TImode */
111 {3, 3, 3}, /* cost of storing SSE registers
112 in SImode, DImode and TImode */
113 3, /* MMX or SSE register to integer */
114 0, /* size of prefetch block */
115 0, /* number of parallel prefetches */
117 COSTS_N_BYTES (2), /* cost of FADD and FSUB insns. */
118 COSTS_N_BYTES (2), /* cost of FMUL instruction. */
119 COSTS_N_BYTES (2), /* cost of FDIV instruction. */
120 COSTS_N_BYTES (2), /* cost of FABS instruction. */
121 COSTS_N_BYTES (2), /* cost of FCHS instruction. */
122 COSTS_N_BYTES (2), /* cost of FSQRT instruction. */
125 /* Processor costs (relative to an add) */
127 struct processor_costs i386_cost = { /* 386 specific costs */
128 COSTS_N_INSNS (1), /* cost of an add instruction */
129 COSTS_N_INSNS (1), /* cost of a lea instruction */
130 COSTS_N_INSNS (3), /* variable shift costs */
131 COSTS_N_INSNS (2), /* constant shift costs */
132 {COSTS_N_INSNS (6), /* cost of starting multiply for QI */
133 COSTS_N_INSNS (6), /* HI */
134 COSTS_N_INSNS (6), /* SI */
135 COSTS_N_INSNS (6), /* DI */
136 COSTS_N_INSNS (6)}, /* other */
137 COSTS_N_INSNS (1), /* cost of multiply per each bit set */
138 {COSTS_N_INSNS (23), /* cost of a divide/mod for QI */
139 COSTS_N_INSNS (23), /* HI */
140 COSTS_N_INSNS (23), /* SI */
141 COSTS_N_INSNS (23), /* DI */
142 COSTS_N_INSNS (23)}, /* other */
143 COSTS_N_INSNS (3), /* cost of movsx */
144 COSTS_N_INSNS (2), /* cost of movzx */
145 15, /* "large" insn */
147 4, /* cost for loading QImode using movzbl */
148 {2, 4, 2}, /* cost of loading integer registers
149 in QImode, HImode and SImode.
150 Relative to reg-reg move (2). */
151 {2, 4, 2}, /* cost of storing integer registers */
152 2, /* cost of reg,reg fld/fst */
153 {8, 8, 8}, /* cost of loading fp registers
154 in SFmode, DFmode and XFmode */
155 {8, 8, 8}, /* cost of storing fp registers
156 in SFmode, DFmode and XFmode */
157 2, /* cost of moving MMX register */
158 {4, 8}, /* cost of loading MMX registers
159 in SImode and DImode */
160 {4, 8}, /* cost of storing MMX registers
161 in SImode and DImode */
162 2, /* cost of moving SSE register */
163 {4, 8, 16}, /* cost of loading SSE registers
164 in SImode, DImode and TImode */
165 {4, 8, 16}, /* cost of storing SSE registers
166 in SImode, DImode and TImode */
167 3, /* MMX or SSE register to integer */
168 0, /* size of prefetch block */
169 0, /* number of parallel prefetches */
171 COSTS_N_INSNS (23), /* cost of FADD and FSUB insns. */
172 COSTS_N_INSNS (27), /* cost of FMUL instruction. */
173 COSTS_N_INSNS (88), /* cost of FDIV instruction. */
174 COSTS_N_INSNS (22), /* cost of FABS instruction. */
175 COSTS_N_INSNS (24), /* cost of FCHS instruction. */
176 COSTS_N_INSNS (122), /* cost of FSQRT instruction. */
180 struct processor_costs i486_cost = { /* 486 specific costs */
181 COSTS_N_INSNS (1), /* cost of an add instruction */
182 COSTS_N_INSNS (1), /* cost of a lea instruction */
183 COSTS_N_INSNS (3), /* variable shift costs */
184 COSTS_N_INSNS (2), /* constant shift costs */
185 {COSTS_N_INSNS (12), /* cost of starting multiply for QI */
186 COSTS_N_INSNS (12), /* HI */
187 COSTS_N_INSNS (12), /* SI */
188 COSTS_N_INSNS (12), /* DI */
189 COSTS_N_INSNS (12)}, /* other */
190 1, /* cost of multiply per each bit set */
191 {COSTS_N_INSNS (40), /* cost of a divide/mod for QI */
192 COSTS_N_INSNS (40), /* HI */
193 COSTS_N_INSNS (40), /* SI */
194 COSTS_N_INSNS (40), /* DI */
195 COSTS_N_INSNS (40)}, /* other */
196 COSTS_N_INSNS (3), /* cost of movsx */
197 COSTS_N_INSNS (2), /* cost of movzx */
198 15, /* "large" insn */
200 4, /* cost for loading QImode using movzbl */
201 {2, 4, 2}, /* cost of loading integer registers
202 in QImode, HImode and SImode.
203 Relative to reg-reg move (2). */
204 {2, 4, 2}, /* cost of storing integer registers */
205 2, /* cost of reg,reg fld/fst */
206 {8, 8, 8}, /* cost of loading fp registers
207 in SFmode, DFmode and XFmode */
208 {8, 8, 8}, /* cost of storing fp registers
209 in SFmode, DFmode and XFmode */
210 2, /* cost of moving MMX register */
211 {4, 8}, /* cost of loading MMX registers
212 in SImode and DImode */
213 {4, 8}, /* cost of storing MMX registers
214 in SImode and DImode */
215 2, /* cost of moving SSE register */
216 {4, 8, 16}, /* cost of loading SSE registers
217 in SImode, DImode and TImode */
218 {4, 8, 16}, /* cost of storing SSE registers
219 in SImode, DImode and TImode */
220 3, /* MMX or SSE register to integer */
221 0, /* size of prefetch block */
222 0, /* number of parallel prefetches */
224 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
225 COSTS_N_INSNS (16), /* cost of FMUL instruction. */
226 COSTS_N_INSNS (73), /* cost of FDIV instruction. */
227 COSTS_N_INSNS (3), /* cost of FABS instruction. */
228 COSTS_N_INSNS (3), /* cost of FCHS instruction. */
229 COSTS_N_INSNS (83), /* cost of FSQRT instruction. */
233 struct processor_costs pentium_cost = {
234 COSTS_N_INSNS (1), /* cost of an add instruction */
235 COSTS_N_INSNS (1), /* cost of a lea instruction */
236 COSTS_N_INSNS (4), /* variable shift costs */
237 COSTS_N_INSNS (1), /* constant shift costs */
238 {COSTS_N_INSNS (11), /* cost of starting multiply for QI */
239 COSTS_N_INSNS (11), /* HI */
240 COSTS_N_INSNS (11), /* SI */
241 COSTS_N_INSNS (11), /* DI */
242 COSTS_N_INSNS (11)}, /* other */
243 0, /* cost of multiply per each bit set */
244 {COSTS_N_INSNS (25), /* cost of a divide/mod for QI */
245 COSTS_N_INSNS (25), /* HI */
246 COSTS_N_INSNS (25), /* SI */
247 COSTS_N_INSNS (25), /* DI */
248 COSTS_N_INSNS (25)}, /* other */
249 COSTS_N_INSNS (3), /* cost of movsx */
250 COSTS_N_INSNS (2), /* cost of movzx */
251 8, /* "large" insn */
253 6, /* cost for loading QImode using movzbl */
254 {2, 4, 2}, /* cost of loading integer registers
255 in QImode, HImode and SImode.
256 Relative to reg-reg move (2). */
257 {2, 4, 2}, /* cost of storing integer registers */
258 2, /* cost of reg,reg fld/fst */
259 {2, 2, 6}, /* cost of loading fp registers
260 in SFmode, DFmode and XFmode */
261 {4, 4, 6}, /* cost of storing fp registers
262 in SFmode, DFmode and XFmode */
263 8, /* cost of moving MMX register */
264 {8, 8}, /* cost of loading MMX registers
265 in SImode and DImode */
266 {8, 8}, /* cost of storing MMX registers
267 in SImode and DImode */
268 2, /* cost of moving SSE register */
269 {4, 8, 16}, /* cost of loading SSE registers
270 in SImode, DImode and TImode */
271 {4, 8, 16}, /* cost of storing SSE registers
272 in SImode, DImode and TImode */
273 3, /* MMX or SSE register to integer */
274 0, /* size of prefetch block */
275 0, /* number of parallel prefetches */
277 COSTS_N_INSNS (3), /* cost of FADD and FSUB insns. */
278 COSTS_N_INSNS (3), /* cost of FMUL instruction. */
279 COSTS_N_INSNS (39), /* cost of FDIV instruction. */
280 COSTS_N_INSNS (1), /* cost of FABS instruction. */
281 COSTS_N_INSNS (1), /* cost of FCHS instruction. */
282 COSTS_N_INSNS (70), /* cost of FSQRT instruction. */
286 struct processor_costs pentiumpro_cost = {
287 COSTS_N_INSNS (1), /* cost of an add instruction */
288 COSTS_N_INSNS (1), /* cost of a lea instruction */
289 COSTS_N_INSNS (1), /* variable shift costs */
290 COSTS_N_INSNS (1), /* constant shift costs */
291 {COSTS_N_INSNS (4), /* cost of starting multiply for QI */
292 COSTS_N_INSNS (4), /* HI */
293 COSTS_N_INSNS (4), /* SI */
294 COSTS_N_INSNS (4), /* DI */
295 COSTS_N_INSNS (4)}, /* other */
296 0, /* cost of multiply per each bit set */
297 {COSTS_N_INSNS (17), /* cost of a divide/mod for QI */
298 COSTS_N_INSNS (17), /* HI */
299 COSTS_N_INSNS (17), /* SI */
300 COSTS_N_INSNS (17), /* DI */
301 COSTS_N_INSNS (17)}, /* other */
302 COSTS_N_INSNS (1), /* cost of movsx */
303 COSTS_N_INSNS (1), /* cost of movzx */
304 8, /* "large" insn */
306 2, /* cost for loading QImode using movzbl */
307 {4, 4, 4}, /* cost of loading integer registers
308 in QImode, HImode and SImode.
309 Relative to reg-reg move (2). */
310 {2, 2, 2}, /* cost of storing integer registers */
311 2, /* cost of reg,reg fld/fst */
312 {2, 2, 6}, /* cost of loading fp registers
313 in SFmode, DFmode and XFmode */
314 {4, 4, 6}, /* cost of storing fp registers
315 in SFmode, DFmode and XFmode */
316 2, /* cost of moving MMX register */
317 {2, 2}, /* cost of loading MMX registers
318 in SImode and DImode */
319 {2, 2}, /* cost of storing MMX registers
320 in SImode and DImode */
321 2, /* cost of moving SSE register */
322 {2, 2, 8}, /* cost of loading SSE registers
323 in SImode, DImode and TImode */
324 {2, 2, 8}, /* cost of storing SSE registers
325 in SImode, DImode and TImode */
326 3, /* MMX or SSE register to integer */
327 32, /* size of prefetch block */
328 6, /* number of parallel prefetches */
330 COSTS_N_INSNS (3), /* cost of FADD and FSUB insns. */
331 COSTS_N_INSNS (5), /* cost of FMUL instruction. */
332 COSTS_N_INSNS (56), /* cost of FDIV instruction. */
333 COSTS_N_INSNS (2), /* cost of FABS instruction. */
334 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
335 COSTS_N_INSNS (56), /* cost of FSQRT instruction. */
339 struct processor_costs geode_cost = {
340 COSTS_N_INSNS (1), /* cost of an add instruction */
341 COSTS_N_INSNS (1), /* cost of a lea instruction */
342 COSTS_N_INSNS (2), /* variable shift costs */
343 COSTS_N_INSNS (1), /* constant shift costs */
344 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
345 COSTS_N_INSNS (4), /* HI */
346 COSTS_N_INSNS (7), /* SI */
347 COSTS_N_INSNS (7), /* DI */
348 COSTS_N_INSNS (7)}, /* other */
349 0, /* cost of multiply per each bit set */
350 {COSTS_N_INSNS (15), /* cost of a divide/mod for QI */
351 COSTS_N_INSNS (23), /* HI */
352 COSTS_N_INSNS (39), /* SI */
353 COSTS_N_INSNS (39), /* DI */
354 COSTS_N_INSNS (39)}, /* other */
355 COSTS_N_INSNS (1), /* cost of movsx */
356 COSTS_N_INSNS (1), /* cost of movzx */
357 8, /* "large" insn */
359 1, /* cost for loading QImode using movzbl */
360 {1, 1, 1}, /* cost of loading integer registers
361 in QImode, HImode and SImode.
362 Relative to reg-reg move (2). */
363 {1, 1, 1}, /* cost of storing integer registers */
364 1, /* cost of reg,reg fld/fst */
365 {1, 1, 1}, /* cost of loading fp registers
366 in SFmode, DFmode and XFmode */
367 {4, 6, 6}, /* cost of storing fp registers
368 in SFmode, DFmode and XFmode */
370 1, /* cost of moving MMX register */
371 {1, 1}, /* cost of loading MMX registers
372 in SImode and DImode */
373 {1, 1}, /* cost of storing MMX registers
374 in SImode and DImode */
375 1, /* cost of moving SSE register */
376 {1, 1, 1}, /* cost of loading SSE registers
377 in SImode, DImode and TImode */
378 {1, 1, 1}, /* cost of storing SSE registers
379 in SImode, DImode and TImode */
380 1, /* MMX or SSE register to integer */
381 32, /* size of prefetch block */
382 1, /* number of parallel prefetches */
384 COSTS_N_INSNS (6), /* cost of FADD and FSUB insns. */
385 COSTS_N_INSNS (11), /* cost of FMUL instruction. */
386 COSTS_N_INSNS (47), /* cost of FDIV instruction. */
387 COSTS_N_INSNS (1), /* cost of FABS instruction. */
388 COSTS_N_INSNS (1), /* cost of FCHS instruction. */
389 COSTS_N_INSNS (54), /* cost of FSQRT instruction. */
393 struct processor_costs k6_cost = {
394 COSTS_N_INSNS (1), /* cost of an add instruction */
395 COSTS_N_INSNS (2), /* cost of a lea instruction */
396 COSTS_N_INSNS (1), /* variable shift costs */
397 COSTS_N_INSNS (1), /* constant shift costs */
398 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
399 COSTS_N_INSNS (3), /* HI */
400 COSTS_N_INSNS (3), /* SI */
401 COSTS_N_INSNS (3), /* DI */
402 COSTS_N_INSNS (3)}, /* other */
403 0, /* cost of multiply per each bit set */
404 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
405 COSTS_N_INSNS (18), /* HI */
406 COSTS_N_INSNS (18), /* SI */
407 COSTS_N_INSNS (18), /* DI */
408 COSTS_N_INSNS (18)}, /* other */
409 COSTS_N_INSNS (2), /* cost of movsx */
410 COSTS_N_INSNS (2), /* cost of movzx */
411 8, /* "large" insn */
413 3, /* cost for loading QImode using movzbl */
414 {4, 5, 4}, /* cost of loading integer registers
415 in QImode, HImode and SImode.
416 Relative to reg-reg move (2). */
417 {2, 3, 2}, /* cost of storing integer registers */
418 4, /* cost of reg,reg fld/fst */
419 {6, 6, 6}, /* cost of loading fp registers
420 in SFmode, DFmode and XFmode */
421 {4, 4, 4}, /* cost of storing fp registers
422 in SFmode, DFmode and XFmode */
423 2, /* cost of moving MMX register */
424 {2, 2}, /* cost of loading MMX registers
425 in SImode and DImode */
426 {2, 2}, /* cost of storing MMX registers
427 in SImode and DImode */
428 2, /* cost of moving SSE register */
429 {2, 2, 8}, /* cost of loading SSE registers
430 in SImode, DImode and TImode */
431 {2, 2, 8}, /* cost of storing SSE registers
432 in SImode, DImode and TImode */
433 6, /* MMX or SSE register to integer */
434 32, /* size of prefetch block */
435 1, /* number of parallel prefetches */
437 COSTS_N_INSNS (2), /* cost of FADD and FSUB insns. */
438 COSTS_N_INSNS (2), /* cost of FMUL instruction. */
439 COSTS_N_INSNS (56), /* cost of FDIV instruction. */
440 COSTS_N_INSNS (2), /* cost of FABS instruction. */
441 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
442 COSTS_N_INSNS (56), /* cost of FSQRT instruction. */
446 struct processor_costs athlon_cost = {
447 COSTS_N_INSNS (1), /* cost of an add instruction */
448 COSTS_N_INSNS (2), /* cost of a lea instruction */
449 COSTS_N_INSNS (1), /* variable shift costs */
450 COSTS_N_INSNS (1), /* constant shift costs */
451 {COSTS_N_INSNS (5), /* cost of starting multiply for QI */
452 COSTS_N_INSNS (5), /* HI */
453 COSTS_N_INSNS (5), /* SI */
454 COSTS_N_INSNS (5), /* DI */
455 COSTS_N_INSNS (5)}, /* other */
456 0, /* cost of multiply per each bit set */
457 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
458 COSTS_N_INSNS (26), /* HI */
459 COSTS_N_INSNS (42), /* SI */
460 COSTS_N_INSNS (74), /* DI */
461 COSTS_N_INSNS (74)}, /* other */
462 COSTS_N_INSNS (1), /* cost of movsx */
463 COSTS_N_INSNS (1), /* cost of movzx */
464 8, /* "large" insn */
466 4, /* cost for loading QImode using movzbl */
467 {3, 4, 3}, /* cost of loading integer registers
468 in QImode, HImode and SImode.
469 Relative to reg-reg move (2). */
470 {3, 4, 3}, /* cost of storing integer registers */
471 4, /* cost of reg,reg fld/fst */
472 {4, 4, 12}, /* cost of loading fp registers
473 in SFmode, DFmode and XFmode */
474 {6, 6, 8}, /* cost of storing fp registers
475 in SFmode, DFmode and XFmode */
476 2, /* cost of moving MMX register */
477 {4, 4}, /* cost of loading MMX registers
478 in SImode and DImode */
479 {4, 4}, /* cost of storing MMX registers
480 in SImode and DImode */
481 2, /* cost of moving SSE register */
482 {4, 4, 6}, /* cost of loading SSE registers
483 in SImode, DImode and TImode */
484 {4, 4, 5}, /* cost of storing SSE registers
485 in SImode, DImode and TImode */
486 5, /* MMX or SSE register to integer */
487 64, /* size of prefetch block */
488 6, /* number of parallel prefetches */
490 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
491 COSTS_N_INSNS (4), /* cost of FMUL instruction. */
492 COSTS_N_INSNS (24), /* cost of FDIV instruction. */
493 COSTS_N_INSNS (2), /* cost of FABS instruction. */
494 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
495 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
499 struct processor_costs k8_cost = {
500 COSTS_N_INSNS (1), /* cost of an add instruction */
501 COSTS_N_INSNS (2), /* cost of a lea instruction */
502 COSTS_N_INSNS (1), /* variable shift costs */
503 COSTS_N_INSNS (1), /* constant shift costs */
504 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
505 COSTS_N_INSNS (4), /* HI */
506 COSTS_N_INSNS (3), /* SI */
507 COSTS_N_INSNS (4), /* DI */
508 COSTS_N_INSNS (5)}, /* other */
509 0, /* cost of multiply per each bit set */
510 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
511 COSTS_N_INSNS (26), /* HI */
512 COSTS_N_INSNS (42), /* SI */
513 COSTS_N_INSNS (74), /* DI */
514 COSTS_N_INSNS (74)}, /* other */
515 COSTS_N_INSNS (1), /* cost of movsx */
516 COSTS_N_INSNS (1), /* cost of movzx */
517 8, /* "large" insn */
519 4, /* cost for loading QImode using movzbl */
520 {3, 4, 3}, /* cost of loading integer registers
521 in QImode, HImode and SImode.
522 Relative to reg-reg move (2). */
523 {3, 4, 3}, /* cost of storing integer registers */
524 4, /* cost of reg,reg fld/fst */
525 {4, 4, 12}, /* cost of loading fp registers
526 in SFmode, DFmode and XFmode */
527 {6, 6, 8}, /* cost of storing fp registers
528 in SFmode, DFmode and XFmode */
529 2, /* cost of moving MMX register */
530 {3, 3}, /* cost of loading MMX registers
531 in SImode and DImode */
532 {4, 4}, /* cost of storing MMX registers
533 in SImode and DImode */
534 2, /* cost of moving SSE register */
535 {4, 3, 6}, /* cost of loading SSE registers
536 in SImode, DImode and TImode */
537 {4, 4, 5}, /* cost of storing SSE registers
538 in SImode, DImode and TImode */
539 5, /* MMX or SSE register to integer */
540 64, /* size of prefetch block */
541 6, /* number of parallel prefetches */
543 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
544 COSTS_N_INSNS (4), /* cost of FMUL instruction. */
545 COSTS_N_INSNS (19), /* cost of FDIV instruction. */
546 COSTS_N_INSNS (2), /* cost of FABS instruction. */
547 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
548 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
551 struct processor_costs amdfam10_cost = {
552 COSTS_N_INSNS (1), /* cost of an add instruction */
553 COSTS_N_INSNS (2), /* cost of a lea instruction */
554 COSTS_N_INSNS (1), /* variable shift costs */
555 COSTS_N_INSNS (1), /* constant shift costs */
556 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
557 COSTS_N_INSNS (4), /* HI */
558 COSTS_N_INSNS (3), /* SI */
559 COSTS_N_INSNS (4), /* DI */
560 COSTS_N_INSNS (5)}, /* other */
561 0, /* cost of multiply per each bit set */
562 {COSTS_N_INSNS (19), /* cost of a divide/mod for QI */
563 COSTS_N_INSNS (35), /* HI */
564 COSTS_N_INSNS (51), /* SI */
565 COSTS_N_INSNS (83), /* DI */
566 COSTS_N_INSNS (83)}, /* other */
567 COSTS_N_INSNS (1), /* cost of movsx */
568 COSTS_N_INSNS (1), /* cost of movzx */
569 8, /* "large" insn */
571 4, /* cost for loading QImode using movzbl */
572 {3, 4, 3}, /* cost of loading integer registers
573 in QImode, HImode and SImode.
574 Relative to reg-reg move (2). */
575 {3, 4, 3}, /* cost of storing integer registers */
576 4, /* cost of reg,reg fld/fst */
577 {4, 4, 12}, /* cost of loading fp registers
578 in SFmode, DFmode and XFmode */
579 {6, 6, 8}, /* cost of storing fp registers
580 in SFmode, DFmode and XFmode */
581 2, /* cost of moving MMX register */
582 {3, 3}, /* cost of loading MMX registers
583 in SImode and DImode */
584 {4, 4}, /* cost of storing MMX registers
585 in SImode and DImode */
586 2, /* cost of moving SSE register */
587 {4, 4, 3}, /* cost of loading SSE registers
588 in SImode, DImode and TImode */
589 {4, 4, 5}, /* cost of storing SSE registers
590 in SImode, DImode and TImode */
591 3, /* MMX or SSE register to integer */
593 MOVD reg64, xmmreg Double FSTORE 4
594 MOVD reg32, xmmreg Double FSTORE 4
596 MOVD reg64, xmmreg Double FADD 3
598 MOVD reg32, xmmreg Double FADD 3
600 64, /* size of prefetch block */
601 /* New AMD processors never drop prefetches; if they cannot be performed
602 immediately, they are queued. We set number of simultaneous prefetches
603 to a large constant to reflect this (it probably is not a good idea not
604 to limit number of prefetches at all, as their execution also takes some
606 100, /* number of parallel prefetches */
608 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
609 COSTS_N_INSNS (4), /* cost of FMUL instruction. */
610 COSTS_N_INSNS (19), /* cost of FDIV instruction. */
611 COSTS_N_INSNS (2), /* cost of FABS instruction. */
612 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
613 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
617 struct processor_costs pentium4_cost = {
618 COSTS_N_INSNS (1), /* cost of an add instruction */
619 COSTS_N_INSNS (3), /* cost of a lea instruction */
620 COSTS_N_INSNS (4), /* variable shift costs */
621 COSTS_N_INSNS (4), /* constant shift costs */
622 {COSTS_N_INSNS (15), /* cost of starting multiply for QI */
623 COSTS_N_INSNS (15), /* HI */
624 COSTS_N_INSNS (15), /* SI */
625 COSTS_N_INSNS (15), /* DI */
626 COSTS_N_INSNS (15)}, /* other */
627 0, /* cost of multiply per each bit set */
628 {COSTS_N_INSNS (56), /* cost of a divide/mod for QI */
629 COSTS_N_INSNS (56), /* HI */
630 COSTS_N_INSNS (56), /* SI */
631 COSTS_N_INSNS (56), /* DI */
632 COSTS_N_INSNS (56)}, /* other */
633 COSTS_N_INSNS (1), /* cost of movsx */
634 COSTS_N_INSNS (1), /* cost of movzx */
635 16, /* "large" insn */
637 2, /* cost for loading QImode using movzbl */
638 {4, 5, 4}, /* cost of loading integer registers
639 in QImode, HImode and SImode.
640 Relative to reg-reg move (2). */
641 {2, 3, 2}, /* cost of storing integer registers */
642 2, /* cost of reg,reg fld/fst */
643 {2, 2, 6}, /* cost of loading fp registers
644 in SFmode, DFmode and XFmode */
645 {4, 4, 6}, /* cost of storing fp registers
646 in SFmode, DFmode and XFmode */
647 2, /* cost of moving MMX register */
648 {2, 2}, /* cost of loading MMX registers
649 in SImode and DImode */
650 {2, 2}, /* cost of storing MMX registers
651 in SImode and DImode */
652 12, /* cost of moving SSE register */
653 {12, 12, 12}, /* cost of loading SSE registers
654 in SImode, DImode and TImode */
655 {2, 2, 8}, /* cost of storing SSE registers
656 in SImode, DImode and TImode */
657 10, /* MMX or SSE register to integer */
658 64, /* size of prefetch block */
659 6, /* number of parallel prefetches */
661 COSTS_N_INSNS (5), /* cost of FADD and FSUB insns. */
662 COSTS_N_INSNS (7), /* cost of FMUL instruction. */
663 COSTS_N_INSNS (43), /* cost of FDIV instruction. */
664 COSTS_N_INSNS (2), /* cost of FABS instruction. */
665 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
666 COSTS_N_INSNS (43), /* cost of FSQRT instruction. */
670 struct processor_costs nocona_cost = {
671 COSTS_N_INSNS (1), /* cost of an add instruction */
672 COSTS_N_INSNS (1), /* cost of a lea instruction */
673 COSTS_N_INSNS (1), /* variable shift costs */
674 COSTS_N_INSNS (1), /* constant shift costs */
675 {COSTS_N_INSNS (10), /* cost of starting multiply for QI */
676 COSTS_N_INSNS (10), /* HI */
677 COSTS_N_INSNS (10), /* SI */
678 COSTS_N_INSNS (10), /* DI */
679 COSTS_N_INSNS (10)}, /* other */
680 0, /* cost of multiply per each bit set */
681 {COSTS_N_INSNS (66), /* cost of a divide/mod for QI */
682 COSTS_N_INSNS (66), /* HI */
683 COSTS_N_INSNS (66), /* SI */
684 COSTS_N_INSNS (66), /* DI */
685 COSTS_N_INSNS (66)}, /* other */
686 COSTS_N_INSNS (1), /* cost of movsx */
687 COSTS_N_INSNS (1), /* cost of movzx */
688 16, /* "large" insn */
690 4, /* cost for loading QImode using movzbl */
691 {4, 4, 4}, /* cost of loading integer registers
692 in QImode, HImode and SImode.
693 Relative to reg-reg move (2). */
694 {4, 4, 4}, /* cost of storing integer registers */
695 3, /* cost of reg,reg fld/fst */
696 {12, 12, 12}, /* cost of loading fp registers
697 in SFmode, DFmode and XFmode */
698 {4, 4, 4}, /* cost of storing fp registers
699 in SFmode, DFmode and XFmode */
700 6, /* cost of moving MMX register */
701 {12, 12}, /* cost of loading MMX registers
702 in SImode and DImode */
703 {12, 12}, /* cost of storing MMX registers
704 in SImode and DImode */
705 6, /* cost of moving SSE register */
706 {12, 12, 12}, /* cost of loading SSE registers
707 in SImode, DImode and TImode */
708 {12, 12, 12}, /* cost of storing SSE registers
709 in SImode, DImode and TImode */
710 8, /* MMX or SSE register to integer */
711 128, /* size of prefetch block */
712 8, /* number of parallel prefetches */
714 COSTS_N_INSNS (6), /* cost of FADD and FSUB insns. */
715 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
716 COSTS_N_INSNS (40), /* cost of FDIV instruction. */
717 COSTS_N_INSNS (3), /* cost of FABS instruction. */
718 COSTS_N_INSNS (3), /* cost of FCHS instruction. */
719 COSTS_N_INSNS (44), /* cost of FSQRT instruction. */
723 struct processor_costs core2_cost = {
724 COSTS_N_INSNS (1), /* cost of an add instruction */
725 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
726 COSTS_N_INSNS (1), /* variable shift costs */
727 COSTS_N_INSNS (1), /* constant shift costs */
728 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
729 COSTS_N_INSNS (3), /* HI */
730 COSTS_N_INSNS (3), /* SI */
731 COSTS_N_INSNS (3), /* DI */
732 COSTS_N_INSNS (3)}, /* other */
733 0, /* cost of multiply per each bit set */
734 {COSTS_N_INSNS (22), /* cost of a divide/mod for QI */
735 COSTS_N_INSNS (22), /* HI */
736 COSTS_N_INSNS (22), /* SI */
737 COSTS_N_INSNS (22), /* DI */
738 COSTS_N_INSNS (22)}, /* other */
739 COSTS_N_INSNS (1), /* cost of movsx */
740 COSTS_N_INSNS (1), /* cost of movzx */
741 8, /* "large" insn */
743 2, /* cost for loading QImode using movzbl */
744 {6, 6, 6}, /* cost of loading integer registers
745 in QImode, HImode and SImode.
746 Relative to reg-reg move (2). */
747 {4, 4, 4}, /* cost of storing integer registers */
748 2, /* cost of reg,reg fld/fst */
749 {6, 6, 6}, /* cost of loading fp registers
750 in SFmode, DFmode and XFmode */
751 {4, 4, 4}, /* cost of loading integer registers */
752 2, /* cost of moving MMX register */
753 {6, 6}, /* cost of loading MMX registers
754 in SImode and DImode */
755 {4, 4}, /* cost of storing MMX registers
756 in SImode and DImode */
757 2, /* cost of moving SSE register */
758 {6, 6, 6}, /* cost of loading SSE registers
759 in SImode, DImode and TImode */
760 {4, 4, 4}, /* cost of storing SSE registers
761 in SImode, DImode and TImode */
762 2, /* MMX or SSE register to integer */
763 128, /* size of prefetch block */
764 8, /* number of parallel prefetches */
766 COSTS_N_INSNS (3), /* cost of FADD and FSUB insns. */
767 COSTS_N_INSNS (5), /* cost of FMUL instruction. */
768 COSTS_N_INSNS (32), /* cost of FDIV instruction. */
769 COSTS_N_INSNS (1), /* cost of FABS instruction. */
770 COSTS_N_INSNS (1), /* cost of FCHS instruction. */
771 COSTS_N_INSNS (58), /* cost of FSQRT instruction. */
774 /* Generic64 should produce code tuned for Nocona and K8. */
776 struct processor_costs generic64_cost = {
777 COSTS_N_INSNS (1), /* cost of an add instruction */
778 /* On all chips taken into consideration lea is 2 cycles and more. With
779 this cost however our current implementation of synth_mult results in
780 use of unnecessary temporary registers causing regression on several
781 SPECfp benchmarks. */
782 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
783 COSTS_N_INSNS (1), /* variable shift costs */
784 COSTS_N_INSNS (1), /* constant shift costs */
785 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
786 COSTS_N_INSNS (4), /* HI */
787 COSTS_N_INSNS (3), /* SI */
788 COSTS_N_INSNS (4), /* DI */
789 COSTS_N_INSNS (2)}, /* other */
790 0, /* cost of multiply per each bit set */
791 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
792 COSTS_N_INSNS (26), /* HI */
793 COSTS_N_INSNS (42), /* SI */
794 COSTS_N_INSNS (74), /* DI */
795 COSTS_N_INSNS (74)}, /* other */
796 COSTS_N_INSNS (1), /* cost of movsx */
797 COSTS_N_INSNS (1), /* cost of movzx */
798 8, /* "large" insn */
800 4, /* cost for loading QImode using movzbl */
801 {4, 4, 4}, /* cost of loading integer registers
802 in QImode, HImode and SImode.
803 Relative to reg-reg move (2). */
804 {4, 4, 4}, /* cost of storing integer registers */
805 4, /* cost of reg,reg fld/fst */
806 {12, 12, 12}, /* cost of loading fp registers
807 in SFmode, DFmode and XFmode */
808 {6, 6, 8}, /* cost of storing fp registers
809 in SFmode, DFmode and XFmode */
810 2, /* cost of moving MMX register */
811 {8, 8}, /* cost of loading MMX registers
812 in SImode and DImode */
813 {8, 8}, /* cost of storing MMX registers
814 in SImode and DImode */
815 2, /* cost of moving SSE register */
816 {8, 8, 8}, /* cost of loading SSE registers
817 in SImode, DImode and TImode */
818 {8, 8, 8}, /* cost of storing SSE registers
819 in SImode, DImode and TImode */
820 5, /* MMX or SSE register to integer */
821 64, /* size of prefetch block */
822 6, /* number of parallel prefetches */
823 /* Benchmarks shows large regressions on K8 sixtrack benchmark when this value
824 is increased to perhaps more appropriate value of 5. */
826 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
827 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
828 COSTS_N_INSNS (20), /* cost of FDIV instruction. */
829 COSTS_N_INSNS (8), /* cost of FABS instruction. */
830 COSTS_N_INSNS (8), /* cost of FCHS instruction. */
831 COSTS_N_INSNS (40), /* cost of FSQRT instruction. */
834 /* Generic32 should produce code tuned for Athlon, PPro, Pentium4, Nocona and K8. */
836 struct processor_costs generic32_cost = {
837 COSTS_N_INSNS (1), /* cost of an add instruction */
838 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
839 COSTS_N_INSNS (1), /* variable shift costs */
840 COSTS_N_INSNS (1), /* constant shift costs */
841 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
842 COSTS_N_INSNS (4), /* HI */
843 COSTS_N_INSNS (3), /* SI */
844 COSTS_N_INSNS (4), /* DI */
845 COSTS_N_INSNS (2)}, /* other */
846 0, /* cost of multiply per each bit set */
847 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
848 COSTS_N_INSNS (26), /* HI */
849 COSTS_N_INSNS (42), /* SI */
850 COSTS_N_INSNS (74), /* DI */
851 COSTS_N_INSNS (74)}, /* other */
852 COSTS_N_INSNS (1), /* cost of movsx */
853 COSTS_N_INSNS (1), /* cost of movzx */
854 8, /* "large" insn */
856 4, /* cost for loading QImode using movzbl */
857 {4, 4, 4}, /* cost of loading integer registers
858 in QImode, HImode and SImode.
859 Relative to reg-reg move (2). */
860 {4, 4, 4}, /* cost of storing integer registers */
861 4, /* cost of reg,reg fld/fst */
862 {12, 12, 12}, /* cost of loading fp registers
863 in SFmode, DFmode and XFmode */
864 {6, 6, 8}, /* cost of storing fp registers
865 in SFmode, DFmode and XFmode */
866 2, /* cost of moving MMX register */
867 {8, 8}, /* cost of loading MMX registers
868 in SImode and DImode */
869 {8, 8}, /* cost of storing MMX registers
870 in SImode and DImode */
871 2, /* cost of moving SSE register */
872 {8, 8, 8}, /* cost of loading SSE registers
873 in SImode, DImode and TImode */
874 {8, 8, 8}, /* cost of storing SSE registers
875 in SImode, DImode and TImode */
876 5, /* MMX or SSE register to integer */
877 64, /* size of prefetch block */
878 6, /* number of parallel prefetches */
880 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
881 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
882 COSTS_N_INSNS (20), /* cost of FDIV instruction. */
883 COSTS_N_INSNS (8), /* cost of FABS instruction. */
884 COSTS_N_INSNS (8), /* cost of FCHS instruction. */
885 COSTS_N_INSNS (40), /* cost of FSQRT instruction. */
888 const struct processor_costs *ix86_cost = &pentium_cost;
890 /* Processor feature/optimization bitmasks. */
891 #define m_386 (1<<PROCESSOR_I386)
892 #define m_486 (1<<PROCESSOR_I486)
893 #define m_PENT (1<<PROCESSOR_PENTIUM)
894 #define m_PPRO (1<<PROCESSOR_PENTIUMPRO)
895 #define m_GEODE (1<<PROCESSOR_GEODE)
896 #define m_K6_GEODE (m_K6 | m_GEODE)
897 #define m_K6 (1<<PROCESSOR_K6)
898 #define m_ATHLON (1<<PROCESSOR_ATHLON)
899 #define m_PENT4 (1<<PROCESSOR_PENTIUM4)
900 #define m_K8 (1<<PROCESSOR_K8)
901 #define m_ATHLON_K8 (m_K8 | m_ATHLON)
902 #define m_AMDFAM10 (1<<PROCESSOR_AMDFAM10)
903 #define m_NOCONA (1<<PROCESSOR_NOCONA)
904 #define m_CORE2 (1<<PROCESSOR_CORE2)
905 #define m_GENERIC32 (1<<PROCESSOR_GENERIC32)
906 #define m_GENERIC64 (1<<PROCESSOR_GENERIC64)
907 #define m_GENERIC (m_GENERIC32 | m_GENERIC64)
908 #define m_ATHLON_K8_AMDFAM10 (m_K8 | m_ATHLON | m_AMDFAM10)
910 /* Generic instruction choice should be common subset of supported CPUs
911 (PPro/PENT4/NOCONA/CORE2/Athlon/K8). */
913 /* Leave is not affecting Nocona SPEC2000 results negatively, so enabling for
914 Generic64 seems like good code size tradeoff. We can't enable it for 32bit
915 generic because it is not working well with PPro base chips. */
916 const int x86_use_leave = m_386 | m_K6_GEODE | m_ATHLON_K8_AMDFAM10 | m_CORE2
918 const int x86_push_memory = m_386 | m_K6_GEODE | m_ATHLON_K8_AMDFAM10 | m_PENT4
919 | m_NOCONA | m_CORE2 | m_GENERIC;
920 const int x86_zero_extend_with_and = m_486 | m_PENT;
921 /* Enable to zero extend integer registers to avoid partial dependencies */
922 const int x86_movx = m_ATHLON_K8_AMDFAM10 | m_PPRO | m_PENT4 | m_NOCONA
923 | m_CORE2 | m_GENERIC | m_GEODE /* m_386 | m_K6 */;
924 const int x86_double_with_add = ~m_386;
925 const int x86_use_bit_test = m_386;
926 const int x86_unroll_strlen = m_486 | m_PENT | m_PPRO | m_ATHLON_K8_AMDFAM10
927 | m_K6 | m_CORE2 | m_GENERIC;
928 const int x86_cmove = m_PPRO | m_GEODE | m_ATHLON_K8_AMDFAM10 | m_PENT4
930 const int x86_3dnow_a = m_ATHLON_K8_AMDFAM10;
931 const int x86_deep_branch = m_PPRO | m_K6_GEODE | m_ATHLON_K8_AMDFAM10
932 | m_PENT4 | m_NOCONA | m_CORE2 | m_GENERIC;
933 /* Branch hints were put in P4 based on simulation result. But
934 after P4 was made, no performance benefit was observed with
935 branch hints. It also increases the code size. As the result,
936 icc never generates branch hints. */
937 const int x86_branch_hints = 0;
938 const int x86_use_sahf = m_PPRO | m_K6_GEODE | m_PENT4 | m_NOCONA | m_GENERIC32;
939 /*m_GENERIC | m_ATHLON_K8 ? */
940 /* We probably ought to watch for partial register stalls on Generic32
941 compilation setting as well. However in current implementation the
942 partial register stalls are not eliminated very well - they can
943 be introduced via subregs synthesized by combine and can happen
944 in caller/callee saving sequences.
945 Because this option pays back little on PPro based chips and is in conflict
946 with partial reg. dependencies used by Athlon/P4 based chips, it is better
947 to leave it off for generic32 for now. */
948 const int x86_partial_reg_stall = m_PPRO;
949 const int x86_partial_flag_reg_stall = m_CORE2 | m_GENERIC;
950 const int x86_use_himode_fiop = m_386 | m_486 | m_K6_GEODE;
951 const int x86_use_simode_fiop = ~(m_PPRO | m_ATHLON_K8_AMDFAM10 | m_PENT
952 | m_CORE2 | m_GENERIC);
953 const int x86_use_mov0 = m_K6;
954 const int x86_use_cltd = ~(m_PENT | m_K6 | m_CORE2 | m_GENERIC);
955 const int x86_read_modify_write = ~m_PENT;
956 const int x86_read_modify = ~(m_PENT | m_PPRO);
957 const int x86_split_long_moves = m_PPRO;
958 const int x86_promote_QImode = m_K6_GEODE | m_PENT | m_386 | m_486
959 | m_ATHLON_K8_AMDFAM10 | m_CORE2 | m_GENERIC;
961 const int x86_fast_prefix = ~(m_PENT | m_486 | m_386);
962 const int x86_single_stringop = m_386 | m_PENT4 | m_NOCONA;
963 const int x86_qimode_math = ~(0);
964 const int x86_promote_qi_regs = 0;
965 /* On PPro this flag is meant to avoid partial register stalls. Just like
966 the x86_partial_reg_stall this option might be considered for Generic32
967 if our scheme for avoiding partial stalls was more effective. */
968 const int x86_himode_math = ~(m_PPRO);
969 const int x86_promote_hi_regs = m_PPRO;
970 /* Enable if add/sub rsp is preferred over 1 or 2 push/pop */
971 const int x86_sub_esp_4 = m_ATHLON_K8_AMDFAM10 | m_PPRO | m_PENT4 | m_NOCONA
972 | m_CORE2 | m_GENERIC;
973 const int x86_sub_esp_8 = m_ATHLON_K8_AMDFAM10 | m_PPRO | m_386 | m_486
974 | m_PENT4 | m_NOCONA | m_CORE2 | m_GENERIC;
975 const int x86_add_esp_4 = m_ATHLON_K8_AMDFAM10 | m_K6_GEODE | m_PENT4 | m_NOCONA
976 | m_CORE2 | m_GENERIC;
977 const int x86_add_esp_8 = m_ATHLON_K8_AMDFAM10 | m_PPRO | m_K6_GEODE | m_386
978 | m_486 | m_PENT4 | m_NOCONA | m_CORE2 | m_GENERIC;
979 /* Enable if integer moves are preferred for DFmode copies */
980 const int x86_integer_DFmode_moves = ~(m_ATHLON_K8_AMDFAM10 | m_PENT4 | m_NOCONA
981 | m_PPRO | m_CORE2 | m_GENERIC | m_GEODE);
982 const int x86_partial_reg_dependency = m_ATHLON_K8_AMDFAM10 | m_PENT4 | m_NOCONA
983 | m_CORE2 | m_GENERIC;
984 const int x86_memory_mismatch_stall = m_ATHLON_K8_AMDFAM10 | m_PENT4 | m_NOCONA
985 | m_CORE2 | m_GENERIC;
986 /* If ACCUMULATE_OUTGOING_ARGS is enabled, the maximum amount of space required
987 for outgoing arguments will be computed and placed into the variable
988 `current_function_outgoing_args_size'. No space will be pushed onto the stack
989 for each call; instead, the function prologue should increase the stack frame
990 size by this amount. Setting both PUSH_ARGS and ACCUMULATE_OUTGOING_ARGS is
992 const int x86_accumulate_outgoing_args = m_ATHLON_K8_AMDFAM10 | m_PENT4
993 | m_NOCONA | m_PPRO | m_CORE2
995 const int x86_prologue_using_move = m_ATHLON_K8 | m_PPRO | m_CORE2 | m_GENERIC;
996 const int x86_epilogue_using_move = m_ATHLON_K8 | m_PPRO | m_CORE2 | m_GENERIC;
997 const int x86_shift1 = ~m_486;
998 const int x86_arch_always_fancy_math_387 = m_PENT | m_PPRO
999 | m_ATHLON_K8_AMDFAM10 | m_PENT4
1000 | m_NOCONA | m_CORE2 | m_GENERIC;
1001 /* In Generic model we have an conflict here in between PPro/Pentium4 based chips
1002 that thread 128bit SSE registers as single units versus K8 based chips that
1003 divide SSE registers to two 64bit halves.
1004 x86_sse_partial_reg_dependency promote all store destinations to be 128bit
1005 to allow register renaming on 128bit SSE units, but usually results in one
1006 extra microop on 64bit SSE units. Experimental results shows that disabling
1007 this option on P4 brings over 20% SPECfp regression, while enabling it on
1008 K8 brings roughly 2.4% regression that can be partly masked by careful scheduling
1010 const int x86_sse_partial_reg_dependency = m_PENT4 | m_NOCONA | m_PPRO | m_CORE2
1011 | m_GENERIC | m_AMDFAM10;
1012 /* Set for machines where the type and dependencies are resolved on SSE
1013 register parts instead of whole registers, so we may maintain just
1014 lower part of scalar values in proper format leaving the upper part
1016 const int x86_sse_split_regs = m_ATHLON_K8;
1017 /* Code generation for scalar reg-reg moves of single and double precision data:
1018 if (x86_sse_partial_reg_dependency == true | x86_sse_split_regs == true)
1022 if (x86_sse_partial_reg_dependency == true)
1027 Code generation for scalar loads of double precision data:
1028 if (x86_sse_split_regs == true)
1029 movlpd mem, reg (gas syntax)
1033 Code generation for unaligned packed loads of single precision data
1034 (x86_sse_unaligned_move_optimal overrides x86_sse_partial_reg_dependency):
1035 if (x86_sse_unaligned_move_optimal)
1038 if (x86_sse_partial_reg_dependency == true)
1050 Code generation for unaligned packed loads of double precision data
1051 (x86_sse_unaligned_move_optimal overrides x86_sse_split_regs):
1052 if (x86_sse_unaligned_move_optimal)
1055 if (x86_sse_split_regs == true)
1066 const int x86_sse_unaligned_move_optimal = m_AMDFAM10;
1067 const int x86_sse_typeless_stores = m_ATHLON_K8_AMDFAM10;
1068 const int x86_sse_load0_by_pxor = m_PPRO | m_PENT4 | m_NOCONA;
1069 const int x86_use_ffreep = m_ATHLON_K8_AMDFAM10;
1070 const int x86_rep_movl_optimal = m_386 | m_PENT | m_PPRO | m_K6_GEODE | m_CORE2;
1071 const int x86_use_incdec = ~(m_PENT4 | m_NOCONA | m_CORE2 | m_GENERIC);
1073 /* ??? Allowing interunit moves makes it all too easy for the compiler to put
1074 integer data in xmm registers. Which results in pretty abysmal code. */
1075 const int x86_inter_unit_moves = 0 /* ~(m_ATHLON_K8) */;
1077 const int x86_ext_80387_constants = m_K6_GEODE | m_ATHLON_K8 | m_PENT4
1078 | m_NOCONA | m_PPRO | m_CORE2 | m_GENERIC;
1079 /* Some CPU cores are not able to predict more than 4 branch instructions in
1080 the 16 byte window. */
1081 const int x86_four_jump_limit = m_PPRO | m_ATHLON_K8_AMDFAM10 | m_PENT4
1082 | m_NOCONA | m_CORE2 | m_GENERIC;
1083 const int x86_schedule = m_PPRO | m_ATHLON_K8_AMDFAM10 | m_K6_GEODE | m_PENT
1084 | m_CORE2 | m_GENERIC;
1085 const int x86_use_bt = m_ATHLON_K8_AMDFAM10;
1086 /* Compare and exchange was added for 80486. */
1087 const int x86_cmpxchg = ~m_386;
1088 /* Compare and exchange 8 bytes was added for pentium. */
1089 const int x86_cmpxchg8b = ~(m_386 | m_486);
1090 /* Exchange and add was added for 80486. */
1091 const int x86_xadd = ~m_386;
1092 const int x86_pad_returns = m_ATHLON_K8_AMDFAM10 | m_CORE2 | m_GENERIC;
1094 /* In case the average insn count for single function invocation is
1095 lower than this constant, emit fast (but longer) prologue and
1097 #define FAST_PROLOGUE_INSN_COUNT 20
1099 /* Names for 8 (low), 8 (high), and 16-bit registers, respectively. */
1100 static const char *const qi_reg_name[] = QI_REGISTER_NAMES;
1101 static const char *const qi_high_reg_name[] = QI_HIGH_REGISTER_NAMES;
1102 static const char *const hi_reg_name[] = HI_REGISTER_NAMES;
1104 /* Array of the smallest class containing reg number REGNO, indexed by
1105 REGNO. Used by REGNO_REG_CLASS in i386.h. */
1107 enum reg_class const regclass_map[FIRST_PSEUDO_REGISTER] =
1109 /* ax, dx, cx, bx */
1110 AREG, DREG, CREG, BREG,
1111 /* si, di, bp, sp */
1112 SIREG, DIREG, NON_Q_REGS, NON_Q_REGS,
1114 FP_TOP_REG, FP_SECOND_REG, FLOAT_REGS, FLOAT_REGS,
1115 FLOAT_REGS, FLOAT_REGS, FLOAT_REGS, FLOAT_REGS,
1118 /* flags, fpsr, dirflag, frame */
1119 NO_REGS, NO_REGS, NO_REGS, NON_Q_REGS,
1120 SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS,
1122 MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS,
1124 NON_Q_REGS, NON_Q_REGS, NON_Q_REGS, NON_Q_REGS,
1125 NON_Q_REGS, NON_Q_REGS, NON_Q_REGS, NON_Q_REGS,
1126 SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS,
1130 /* The "default" register map used in 32bit mode. */
1132 int const dbx_register_map[FIRST_PSEUDO_REGISTER] =
1134 0, 2, 1, 3, 6, 7, 4, 5, /* general regs */
1135 12, 13, 14, 15, 16, 17, 18, 19, /* fp regs */
1136 -1, -1, -1, -1, -1, /* arg, flags, fpsr, dir, frame */
1137 21, 22, 23, 24, 25, 26, 27, 28, /* SSE */
1138 29, 30, 31, 32, 33, 34, 35, 36, /* MMX */
1139 -1, -1, -1, -1, -1, -1, -1, -1, /* extended integer registers */
1140 -1, -1, -1, -1, -1, -1, -1, -1, /* extended SSE registers */
1143 static int const x86_64_int_parameter_registers[6] =
1145 5 /*RDI*/, 4 /*RSI*/, 1 /*RDX*/, 2 /*RCX*/,
1146 FIRST_REX_INT_REG /*R8 */, FIRST_REX_INT_REG + 1 /*R9 */
1149 static int const x86_64_int_return_registers[4] =
1151 0 /*RAX*/, 1 /*RDI*/, 5 /*RDI*/, 4 /*RSI*/
1154 /* The "default" register map used in 64bit mode. */
1155 int const dbx64_register_map[FIRST_PSEUDO_REGISTER] =
1157 0, 1, 2, 3, 4, 5, 6, 7, /* general regs */
1158 33, 34, 35, 36, 37, 38, 39, 40, /* fp regs */
1159 -1, -1, -1, -1, -1, /* arg, flags, fpsr, dir, frame */
1160 17, 18, 19, 20, 21, 22, 23, 24, /* SSE */
1161 41, 42, 43, 44, 45, 46, 47, 48, /* MMX */
1162 8,9,10,11,12,13,14,15, /* extended integer registers */
1163 25, 26, 27, 28, 29, 30, 31, 32, /* extended SSE registers */
1166 /* Define the register numbers to be used in Dwarf debugging information.
1167 The SVR4 reference port C compiler uses the following register numbers
1168 in its Dwarf output code:
1169 0 for %eax (gcc regno = 0)
1170 1 for %ecx (gcc regno = 2)
1171 2 for %edx (gcc regno = 1)
1172 3 for %ebx (gcc regno = 3)
1173 4 for %esp (gcc regno = 7)
1174 5 for %ebp (gcc regno = 6)
1175 6 for %esi (gcc regno = 4)
1176 7 for %edi (gcc regno = 5)
1177 The following three DWARF register numbers are never generated by
1178 the SVR4 C compiler or by the GNU compilers, but SDB on x86/svr4
1179 believes these numbers have these meanings.
1180 8 for %eip (no gcc equivalent)
1181 9 for %eflags (gcc regno = 17)
1182 10 for %trapno (no gcc equivalent)
1183 It is not at all clear how we should number the FP stack registers
1184 for the x86 architecture. If the version of SDB on x86/svr4 were
1185 a bit less brain dead with respect to floating-point then we would
1186 have a precedent to follow with respect to DWARF register numbers
1187 for x86 FP registers, but the SDB on x86/svr4 is so completely
1188 broken with respect to FP registers that it is hardly worth thinking
1189 of it as something to strive for compatibility with.
1190 The version of x86/svr4 SDB I have at the moment does (partially)
1191 seem to believe that DWARF register number 11 is associated with
1192 the x86 register %st(0), but that's about all. Higher DWARF
1193 register numbers don't seem to be associated with anything in
1194 particular, and even for DWARF regno 11, SDB only seems to under-
1195 stand that it should say that a variable lives in %st(0) (when
1196 asked via an `=' command) if we said it was in DWARF regno 11,
1197 but SDB still prints garbage when asked for the value of the
1198 variable in question (via a `/' command).
1199 (Also note that the labels SDB prints for various FP stack regs
1200 when doing an `x' command are all wrong.)
1201 Note that these problems generally don't affect the native SVR4
1202 C compiler because it doesn't allow the use of -O with -g and
1203 because when it is *not* optimizing, it allocates a memory
1204 location for each floating-point variable, and the memory
1205 location is what gets described in the DWARF AT_location
1206 attribute for the variable in question.
1207 Regardless of the severe mental illness of the x86/svr4 SDB, we
1208 do something sensible here and we use the following DWARF
1209 register numbers. Note that these are all stack-top-relative
1211 11 for %st(0) (gcc regno = 8)
1212 12 for %st(1) (gcc regno = 9)
1213 13 for %st(2) (gcc regno = 10)
1214 14 for %st(3) (gcc regno = 11)
1215 15 for %st(4) (gcc regno = 12)
1216 16 for %st(5) (gcc regno = 13)
1217 17 for %st(6) (gcc regno = 14)
1218 18 for %st(7) (gcc regno = 15)
1220 int const svr4_dbx_register_map[FIRST_PSEUDO_REGISTER] =
1222 0, 2, 1, 3, 6, 7, 5, 4, /* general regs */
1223 11, 12, 13, 14, 15, 16, 17, 18, /* fp regs */
1224 -1, 9, -1, -1, -1, /* arg, flags, fpsr, dir, frame */
1225 21, 22, 23, 24, 25, 26, 27, 28, /* SSE registers */
1226 29, 30, 31, 32, 33, 34, 35, 36, /* MMX registers */
1227 -1, -1, -1, -1, -1, -1, -1, -1, /* extended integer registers */
1228 -1, -1, -1, -1, -1, -1, -1, -1, /* extended SSE registers */
1231 /* Test and compare insns in i386.md store the information needed to
1232 generate branch and scc insns here. */
1234 rtx ix86_compare_op0 = NULL_RTX;
1235 rtx ix86_compare_op1 = NULL_RTX;
1236 rtx ix86_compare_emitted = NULL_RTX;
1238 /* Size of the register save area. */
1239 #define X86_64_VARARGS_SIZE (REGPARM_MAX * UNITS_PER_WORD + SSE_REGPARM_MAX * 16)
1241 /* Define the structure for the machine field in struct function. */
1243 struct stack_local_entry GTY(())
1245 unsigned short mode;
1248 struct stack_local_entry *next;
1251 /* Structure describing stack frame layout.
1252 Stack grows downward:
1258 saved frame pointer if frame_pointer_needed
1259 <- HARD_FRAME_POINTER
1264 [va_arg registers] (
1265 > to_allocate <- FRAME_POINTER
1275 HOST_WIDE_INT frame;
1277 int outgoing_arguments_size;
1280 HOST_WIDE_INT to_allocate;
1281 /* The offsets relative to ARG_POINTER. */
1282 HOST_WIDE_INT frame_pointer_offset;
1283 HOST_WIDE_INT hard_frame_pointer_offset;
1284 HOST_WIDE_INT stack_pointer_offset;
1286 /* When save_regs_using_mov is set, emit prologue using
1287 move instead of push instructions. */
1288 bool save_regs_using_mov;
1291 /* Code model option. */
1292 enum cmodel ix86_cmodel;
1294 enum asm_dialect ix86_asm_dialect = ASM_ATT;
1296 enum tls_dialect ix86_tls_dialect = TLS_DIALECT_GNU;
1298 /* Which unit we are generating floating point math for. */
1299 enum fpmath_unit ix86_fpmath;
1301 /* Which cpu are we scheduling for. */
1302 enum processor_type ix86_tune;
1303 /* Which instruction set architecture to use. */
1304 enum processor_type ix86_arch;
1306 /* true if sse prefetch instruction is not NOOP. */
1307 int x86_prefetch_sse;
1309 /* true if cmpxchg16b is supported. */
1312 /* ix86_regparm_string as a number */
1313 static int ix86_regparm;
1315 /* -mstackrealign option */
1316 extern int ix86_force_align_arg_pointer;
1317 static const char ix86_force_align_arg_pointer_string[] = "force_align_arg_pointer";
1319 /* Preferred alignment for stack boundary in bits. */
1320 unsigned int ix86_preferred_stack_boundary;
1322 /* Values 1-5: see jump.c */
1323 int ix86_branch_cost;
1325 /* Variables which are this size or smaller are put in the data/bss
1326 or ldata/lbss sections. */
1328 int ix86_section_threshold = 65536;
1330 /* Prefix built by ASM_GENERATE_INTERNAL_LABEL. */
1331 char internal_label_prefix[16];
1332 int internal_label_prefix_len;
1334 static bool ix86_handle_option (size_t, const char *, int);
1335 static void output_pic_addr_const (FILE *, rtx, int);
1336 static void put_condition_code (enum rtx_code, enum machine_mode,
1338 static const char *get_some_local_dynamic_name (void);
1339 static int get_some_local_dynamic_name_1 (rtx *, void *);
1340 static rtx ix86_expand_int_compare (enum rtx_code, rtx, rtx);
1341 static enum rtx_code ix86_prepare_fp_compare_args (enum rtx_code, rtx *,
1343 static bool ix86_fixed_condition_code_regs (unsigned int *, unsigned int *);
1344 static enum machine_mode ix86_cc_modes_compatible (enum machine_mode,
1346 static rtx get_thread_pointer (int);
1347 static rtx legitimize_tls_address (rtx, enum tls_model, int);
1348 static void get_pc_thunk_name (char [32], unsigned int);
1349 static rtx gen_push (rtx);
1350 static int ix86_flags_dependent (rtx, rtx, enum attr_type);
1351 static int ix86_agi_dependent (rtx, rtx, enum attr_type);
1352 static struct machine_function * ix86_init_machine_status (void);
1353 static int ix86_split_to_parts (rtx, rtx *, enum machine_mode);
1354 static int ix86_nsaved_regs (void);
1355 static void ix86_emit_save_regs (void);
1356 static void ix86_emit_save_regs_using_mov (rtx, HOST_WIDE_INT);
1357 static void ix86_emit_restore_regs_using_mov (rtx, HOST_WIDE_INT, int);
1358 static void ix86_output_function_epilogue (FILE *, HOST_WIDE_INT);
1359 static HOST_WIDE_INT ix86_GOT_alias_set (void);
1360 static void ix86_adjust_counter (rtx, HOST_WIDE_INT);
1361 static rtx ix86_expand_aligntest (rtx, int);
1362 static void ix86_expand_strlensi_unroll_1 (rtx, rtx, rtx);
1363 static int ix86_issue_rate (void);
1364 static int ix86_adjust_cost (rtx, rtx, rtx, int);
1365 static int ia32_multipass_dfa_lookahead (void);
1366 static void ix86_init_mmx_sse_builtins (void);
1367 static rtx x86_this_parameter (tree);
1368 static void x86_output_mi_thunk (FILE *, tree, HOST_WIDE_INT,
1369 HOST_WIDE_INT, tree);
1370 static bool x86_can_output_mi_thunk (tree, HOST_WIDE_INT, HOST_WIDE_INT, tree);
1371 static void x86_file_start (void);
1372 static void ix86_reorg (void);
1373 static bool ix86_expand_carry_flag_compare (enum rtx_code, rtx, rtx, rtx*);
1374 static tree ix86_build_builtin_va_list (void);
1375 static void ix86_setup_incoming_varargs (CUMULATIVE_ARGS *, enum machine_mode,
1377 static tree ix86_gimplify_va_arg (tree, tree, tree *, tree *);
1378 static bool ix86_scalar_mode_supported_p (enum machine_mode);
1379 static bool ix86_vector_mode_supported_p (enum machine_mode);
1381 static int ix86_address_cost (rtx);
1382 static bool ix86_cannot_force_const_mem (rtx);
1383 static rtx ix86_delegitimize_address (rtx);
1385 static void i386_output_dwarf_dtprel (FILE *, int, rtx) ATTRIBUTE_UNUSED;
1387 struct builtin_description;
1388 static rtx ix86_expand_sse_comi (const struct builtin_description *,
1390 static rtx ix86_expand_sse_compare (const struct builtin_description *,
1392 static rtx ix86_expand_unop1_builtin (enum insn_code, tree, rtx);
1393 static rtx ix86_expand_unop_builtin (enum insn_code, tree, rtx, int);
1394 static rtx ix86_expand_binop_builtin (enum insn_code, tree, rtx);
1395 static rtx ix86_expand_store_builtin (enum insn_code, tree);
1396 static rtx safe_vector_operand (rtx, enum machine_mode);
1397 static rtx ix86_expand_fp_compare (enum rtx_code, rtx, rtx, rtx, rtx *, rtx *);
1398 static int ix86_fp_comparison_arithmetics_cost (enum rtx_code code);
1399 static int ix86_fp_comparison_fcomi_cost (enum rtx_code code);
1400 static int ix86_fp_comparison_sahf_cost (enum rtx_code code);
1401 static int ix86_fp_comparison_cost (enum rtx_code code);
1402 static unsigned int ix86_select_alt_pic_regnum (void);
1403 static int ix86_save_reg (unsigned int, int);
1404 static void ix86_compute_frame_layout (struct ix86_frame *);
1405 static int ix86_comp_type_attributes (tree, tree);
1406 static int ix86_function_regparm (tree, tree);
1407 const struct attribute_spec ix86_attribute_table[];
1408 static bool ix86_function_ok_for_sibcall (tree, tree);
1409 static tree ix86_handle_cconv_attribute (tree *, tree, tree, int, bool *);
1410 static int ix86_value_regno (enum machine_mode, tree, tree);
1411 static bool contains_128bit_aligned_vector_p (tree);
1412 static rtx ix86_struct_value_rtx (tree, int);
1413 static bool ix86_ms_bitfield_layout_p (tree);
1414 static tree ix86_handle_struct_attribute (tree *, tree, tree, int, bool *);
1415 static int extended_reg_mentioned_1 (rtx *, void *);
1416 static bool ix86_rtx_costs (rtx, int, int, int *);
1417 static int min_insn_size (rtx);
1418 static tree ix86_md_asm_clobbers (tree outputs, tree inputs, tree clobbers);
1419 static bool ix86_must_pass_in_stack (enum machine_mode mode, tree type);
1420 static bool ix86_pass_by_reference (CUMULATIVE_ARGS *, enum machine_mode,
1422 static void ix86_init_builtins (void);
1423 static rtx ix86_expand_builtin (tree, rtx, rtx, enum machine_mode, int);
1424 static const char *ix86_mangle_fundamental_type (tree);
1425 static tree ix86_stack_protect_fail (void);
1426 static rtx ix86_internal_arg_pointer (void);
1427 static void ix86_dwarf_handle_frame_unspec (const char *, rtx, int);
1429 /* This function is only used on Solaris. */
1430 static void i386_solaris_elf_named_section (const char *, unsigned int, tree)
1433 /* Register class used for passing given 64bit part of the argument.
1434 These represent classes as documented by the PS ABI, with the exception
1435 of SSESF, SSEDF classes, that are basically SSE class, just gcc will
1436 use SF or DFmode move instead of DImode to avoid reformatting penalties.
1438 Similarly we play games with INTEGERSI_CLASS to use cheaper SImode moves
1439 whenever possible (upper half does contain padding).
1441 enum x86_64_reg_class
1444 X86_64_INTEGER_CLASS,
1445 X86_64_INTEGERSI_CLASS,
1452 X86_64_COMPLEX_X87_CLASS,
1455 static const char * const x86_64_reg_class_name[] = {
1456 "no", "integer", "integerSI", "sse", "sseSF", "sseDF",
1457 "sseup", "x87", "x87up", "cplx87", "no"
1460 #define MAX_CLASSES 4
1462 /* Table of constants used by fldpi, fldln2, etc.... */
1463 static REAL_VALUE_TYPE ext_80387_constants_table [5];
1464 static bool ext_80387_constants_init = 0;
1465 static void init_ext_80387_constants (void);
1466 static bool ix86_in_large_data_p (tree) ATTRIBUTE_UNUSED;
1467 static void ix86_encode_section_info (tree, rtx, int) ATTRIBUTE_UNUSED;
1468 static void x86_64_elf_unique_section (tree decl, int reloc) ATTRIBUTE_UNUSED;
1469 static section *x86_64_elf_select_section (tree decl, int reloc,
1470 unsigned HOST_WIDE_INT align)
1473 /* Initialize the GCC target structure. */
1474 #undef TARGET_ATTRIBUTE_TABLE
1475 #define TARGET_ATTRIBUTE_TABLE ix86_attribute_table
1476 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
1477 # undef TARGET_MERGE_DECL_ATTRIBUTES
1478 # define TARGET_MERGE_DECL_ATTRIBUTES merge_dllimport_decl_attributes
1481 #undef TARGET_COMP_TYPE_ATTRIBUTES
1482 #define TARGET_COMP_TYPE_ATTRIBUTES ix86_comp_type_attributes
1484 #undef TARGET_INIT_BUILTINS
1485 #define TARGET_INIT_BUILTINS ix86_init_builtins
1486 #undef TARGET_EXPAND_BUILTIN
1487 #define TARGET_EXPAND_BUILTIN ix86_expand_builtin
1489 #undef TARGET_ASM_FUNCTION_EPILOGUE
1490 #define TARGET_ASM_FUNCTION_EPILOGUE ix86_output_function_epilogue
1492 #undef TARGET_ENCODE_SECTION_INFO
1493 #ifndef SUBTARGET_ENCODE_SECTION_INFO
1494 #define TARGET_ENCODE_SECTION_INFO ix86_encode_section_info
1496 #define TARGET_ENCODE_SECTION_INFO SUBTARGET_ENCODE_SECTION_INFO
1499 #undef TARGET_ASM_OPEN_PAREN
1500 #define TARGET_ASM_OPEN_PAREN ""
1501 #undef TARGET_ASM_CLOSE_PAREN
1502 #define TARGET_ASM_CLOSE_PAREN ""
1504 #undef TARGET_ASM_ALIGNED_HI_OP
1505 #define TARGET_ASM_ALIGNED_HI_OP ASM_SHORT
1506 #undef TARGET_ASM_ALIGNED_SI_OP
1507 #define TARGET_ASM_ALIGNED_SI_OP ASM_LONG
1509 #undef TARGET_ASM_ALIGNED_DI_OP
1510 #define TARGET_ASM_ALIGNED_DI_OP ASM_QUAD
1513 #undef TARGET_ASM_UNALIGNED_HI_OP
1514 #define TARGET_ASM_UNALIGNED_HI_OP TARGET_ASM_ALIGNED_HI_OP
1515 #undef TARGET_ASM_UNALIGNED_SI_OP
1516 #define TARGET_ASM_UNALIGNED_SI_OP TARGET_ASM_ALIGNED_SI_OP
1517 #undef TARGET_ASM_UNALIGNED_DI_OP
1518 #define TARGET_ASM_UNALIGNED_DI_OP TARGET_ASM_ALIGNED_DI_OP
1520 #undef TARGET_SCHED_ADJUST_COST
1521 #define TARGET_SCHED_ADJUST_COST ix86_adjust_cost
1522 #undef TARGET_SCHED_ISSUE_RATE
1523 #define TARGET_SCHED_ISSUE_RATE ix86_issue_rate
1524 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD
1525 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD \
1526 ia32_multipass_dfa_lookahead
1528 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
1529 #define TARGET_FUNCTION_OK_FOR_SIBCALL ix86_function_ok_for_sibcall
1532 #undef TARGET_HAVE_TLS
1533 #define TARGET_HAVE_TLS true
1535 #undef TARGET_CANNOT_FORCE_CONST_MEM
1536 #define TARGET_CANNOT_FORCE_CONST_MEM ix86_cannot_force_const_mem
1537 #undef TARGET_USE_BLOCKS_FOR_CONSTANT_P
1538 #define TARGET_USE_BLOCKS_FOR_CONSTANT_P hook_bool_mode_rtx_true
1540 #undef TARGET_DELEGITIMIZE_ADDRESS
1541 #define TARGET_DELEGITIMIZE_ADDRESS ix86_delegitimize_address
1543 #undef TARGET_MS_BITFIELD_LAYOUT_P
1544 #define TARGET_MS_BITFIELD_LAYOUT_P ix86_ms_bitfield_layout_p
1547 #undef TARGET_BINDS_LOCAL_P
1548 #define TARGET_BINDS_LOCAL_P darwin_binds_local_p
1551 #undef TARGET_ASM_OUTPUT_MI_THUNK
1552 #define TARGET_ASM_OUTPUT_MI_THUNK x86_output_mi_thunk
1553 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
1554 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK x86_can_output_mi_thunk
1556 #undef TARGET_ASM_FILE_START
1557 #define TARGET_ASM_FILE_START x86_file_start
1559 #undef TARGET_DEFAULT_TARGET_FLAGS
1560 #define TARGET_DEFAULT_TARGET_FLAGS \
1562 | TARGET_64BIT_DEFAULT \
1563 | TARGET_SUBTARGET_DEFAULT \
1564 | TARGET_TLS_DIRECT_SEG_REFS_DEFAULT)
1566 #undef TARGET_HANDLE_OPTION
1567 #define TARGET_HANDLE_OPTION ix86_handle_option
1569 #undef TARGET_RTX_COSTS
1570 #define TARGET_RTX_COSTS ix86_rtx_costs
1571 #undef TARGET_ADDRESS_COST
1572 #define TARGET_ADDRESS_COST ix86_address_cost
1574 #undef TARGET_FIXED_CONDITION_CODE_REGS
1575 #define TARGET_FIXED_CONDITION_CODE_REGS ix86_fixed_condition_code_regs
1576 #undef TARGET_CC_MODES_COMPATIBLE
1577 #define TARGET_CC_MODES_COMPATIBLE ix86_cc_modes_compatible
1579 #undef TARGET_MACHINE_DEPENDENT_REORG
1580 #define TARGET_MACHINE_DEPENDENT_REORG ix86_reorg
1582 #undef TARGET_BUILD_BUILTIN_VA_LIST
1583 #define TARGET_BUILD_BUILTIN_VA_LIST ix86_build_builtin_va_list
1585 #undef TARGET_MD_ASM_CLOBBERS
1586 #define TARGET_MD_ASM_CLOBBERS ix86_md_asm_clobbers
1588 #undef TARGET_PROMOTE_PROTOTYPES
1589 #define TARGET_PROMOTE_PROTOTYPES hook_bool_tree_true
1590 #undef TARGET_STRUCT_VALUE_RTX
1591 #define TARGET_STRUCT_VALUE_RTX ix86_struct_value_rtx
1592 #undef TARGET_SETUP_INCOMING_VARARGS
1593 #define TARGET_SETUP_INCOMING_VARARGS ix86_setup_incoming_varargs
1594 #undef TARGET_MUST_PASS_IN_STACK
1595 #define TARGET_MUST_PASS_IN_STACK ix86_must_pass_in_stack
1596 #undef TARGET_PASS_BY_REFERENCE
1597 #define TARGET_PASS_BY_REFERENCE ix86_pass_by_reference
1598 #undef TARGET_INTERNAL_ARG_POINTER
1599 #define TARGET_INTERNAL_ARG_POINTER ix86_internal_arg_pointer
1600 #undef TARGET_DWARF_HANDLE_FRAME_UNSPEC
1601 #define TARGET_DWARF_HANDLE_FRAME_UNSPEC ix86_dwarf_handle_frame_unspec
1603 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
1604 #define TARGET_GIMPLIFY_VA_ARG_EXPR ix86_gimplify_va_arg
1606 #undef TARGET_SCALAR_MODE_SUPPORTED_P
1607 #define TARGET_SCALAR_MODE_SUPPORTED_P ix86_scalar_mode_supported_p
1609 #undef TARGET_VECTOR_MODE_SUPPORTED_P
1610 #define TARGET_VECTOR_MODE_SUPPORTED_P ix86_vector_mode_supported_p
1613 #undef TARGET_ASM_OUTPUT_DWARF_DTPREL
1614 #define TARGET_ASM_OUTPUT_DWARF_DTPREL i386_output_dwarf_dtprel
1617 #ifdef SUBTARGET_INSERT_ATTRIBUTES
1618 #undef TARGET_INSERT_ATTRIBUTES
1619 #define TARGET_INSERT_ATTRIBUTES SUBTARGET_INSERT_ATTRIBUTES
1622 #undef TARGET_MANGLE_FUNDAMENTAL_TYPE
1623 #define TARGET_MANGLE_FUNDAMENTAL_TYPE ix86_mangle_fundamental_type
1625 #undef TARGET_STACK_PROTECT_FAIL
1626 #define TARGET_STACK_PROTECT_FAIL ix86_stack_protect_fail
1628 #undef TARGET_FUNCTION_VALUE
1629 #define TARGET_FUNCTION_VALUE ix86_function_value
1631 struct gcc_target targetm = TARGET_INITIALIZER;
1634 /* The svr4 ABI for the i386 says that records and unions are returned
1636 #ifndef DEFAULT_PCC_STRUCT_RETURN
1637 #define DEFAULT_PCC_STRUCT_RETURN 1
1640 /* Implement TARGET_HANDLE_OPTION. */
1643 ix86_handle_option (size_t code, const char *arg ATTRIBUTE_UNUSED, int value)
1650 target_flags &= ~MASK_3DNOW_A;
1651 target_flags_explicit |= MASK_3DNOW_A;
1658 target_flags &= ~(MASK_3DNOW | MASK_3DNOW_A);
1659 target_flags_explicit |= MASK_3DNOW | MASK_3DNOW_A;
1666 target_flags &= ~(MASK_SSE2 | MASK_SSE3 | MASK_SSSE3 | MASK_SSE4A);
1667 target_flags_explicit |= MASK_SSE2 | MASK_SSE3 | MASK_SSSE3 | MASK_SSE4A;
1674 target_flags &= ~(MASK_SSE3 | MASK_SSSE3 | MASK_SSE4A);
1675 target_flags_explicit |= MASK_SSE3 | MASK_SSSE3 | MASK_SSE4A;
1682 target_flags &= ~(MASK_SSSE3 | MASK_SSE4A);
1683 target_flags_explicit |= MASK_SSSE3 | MASK_SSE4A;
1690 target_flags &= ~MASK_AES;
1691 target_flags_explicit |= MASK_AES;
1700 /* Sometimes certain combinations of command options do not make
1701 sense on a particular target machine. You can define a macro
1702 `OVERRIDE_OPTIONS' to take account of this. This macro, if
1703 defined, is executed once just after all the command options have
1706 Don't use this macro to turn on various extra optimizations for
1707 `-O'. That is what `OPTIMIZATION_OPTIONS' is for. */
1710 override_options (void)
1713 int ix86_tune_defaulted = 0;
1715 /* Comes from final.c -- no real reason to change it. */
1716 #define MAX_CODE_ALIGN 16
1720 const struct processor_costs *cost; /* Processor costs */
1721 const int target_enable; /* Target flags to enable. */
1722 const int target_disable; /* Target flags to disable. */
1723 const int align_loop; /* Default alignments. */
1724 const int align_loop_max_skip;
1725 const int align_jump;
1726 const int align_jump_max_skip;
1727 const int align_func;
1729 const processor_target_table[PROCESSOR_max] =
1731 {&i386_cost, 0, 0, 4, 3, 4, 3, 4},
1732 {&i486_cost, 0, 0, 16, 15, 16, 15, 16},
1733 {&pentium_cost, 0, 0, 16, 7, 16, 7, 16},
1734 {&pentiumpro_cost, 0, 0, 16, 15, 16, 7, 16},
1735 {&geode_cost, 0, 0, 0, 0, 0, 0, 0},
1736 {&k6_cost, 0, 0, 32, 7, 32, 7, 32},
1737 {&athlon_cost, 0, 0, 16, 7, 16, 7, 16},
1738 {&pentium4_cost, 0, 0, 0, 0, 0, 0, 0},
1739 {&k8_cost, 0, 0, 16, 7, 16, 7, 16},
1740 {&nocona_cost, 0, 0, 0, 0, 0, 0, 0},
1741 {&core2_cost, 0, 0, 16, 7, 16, 7, 16},
1742 {&generic32_cost, 0, 0, 16, 7, 16, 7, 16},
1743 {&generic64_cost, 0, 0, 16, 7, 16, 7, 16},
1744 {&amdfam10_cost, 0, 0, 32, 24, 32, 7, 32}
1747 static const char * const cpu_names[] = TARGET_CPU_DEFAULT_NAMES;
1750 const char *const name; /* processor name or nickname. */
1751 const enum processor_type processor;
1752 const enum pta_flags
1758 PTA_PREFETCH_SSE = 16,
1769 const processor_alias_table[] =
1771 {"i386", PROCESSOR_I386, 0},
1772 {"i486", PROCESSOR_I486, 0},
1773 {"i586", PROCESSOR_PENTIUM, 0},
1774 {"pentium", PROCESSOR_PENTIUM, 0},
1775 {"pentium-mmx", PROCESSOR_PENTIUM, PTA_MMX},
1776 {"winchip-c6", PROCESSOR_I486, PTA_MMX},
1777 {"winchip2", PROCESSOR_I486, PTA_MMX | PTA_3DNOW},
1778 {"c3", PROCESSOR_I486, PTA_MMX | PTA_3DNOW},
1779 {"c3-2", PROCESSOR_PENTIUMPRO, PTA_MMX | PTA_PREFETCH_SSE | PTA_SSE},
1780 {"i686", PROCESSOR_PENTIUMPRO, 0},
1781 {"pentiumpro", PROCESSOR_PENTIUMPRO, 0},
1782 {"pentium2", PROCESSOR_PENTIUMPRO, PTA_MMX},
1783 {"pentium3", PROCESSOR_PENTIUMPRO, PTA_MMX | PTA_SSE | PTA_PREFETCH_SSE},
1784 {"pentium3m", PROCESSOR_PENTIUMPRO, PTA_MMX | PTA_SSE | PTA_PREFETCH_SSE},
1785 {"pentium-m", PROCESSOR_PENTIUMPRO, PTA_MMX | PTA_SSE | PTA_PREFETCH_SSE | PTA_SSE2},
1786 {"pentium4", PROCESSOR_PENTIUM4, PTA_SSE | PTA_SSE2
1787 | PTA_MMX | PTA_PREFETCH_SSE},
1788 {"pentium4m", PROCESSOR_PENTIUM4, PTA_SSE | PTA_SSE2
1789 | PTA_MMX | PTA_PREFETCH_SSE},
1790 {"prescott", PROCESSOR_NOCONA, PTA_SSE | PTA_SSE2 | PTA_SSE3
1791 | PTA_MMX | PTA_PREFETCH_SSE},
1792 {"nocona", PROCESSOR_NOCONA, PTA_SSE | PTA_SSE2 | PTA_SSE3 | PTA_64BIT
1793 | PTA_MMX | PTA_PREFETCH_SSE | PTA_CX16},
1794 {"core2", PROCESSOR_CORE2, PTA_SSE | PTA_SSE2 | PTA_SSE3 | PTA_SSSE3
1795 | PTA_64BIT | PTA_MMX
1796 | PTA_PREFETCH_SSE | PTA_CX16},
1797 {"geode", PROCESSOR_GEODE, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW
1799 {"k6", PROCESSOR_K6, PTA_MMX},
1800 {"k6-2", PROCESSOR_K6, PTA_MMX | PTA_3DNOW},
1801 {"k6-3", PROCESSOR_K6, PTA_MMX | PTA_3DNOW},
1802 {"athlon", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW
1804 {"athlon-tbird", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE
1805 | PTA_3DNOW | PTA_3DNOW_A},
1806 {"athlon-4", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW
1807 | PTA_3DNOW_A | PTA_SSE},
1808 {"athlon-xp", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW
1809 | PTA_3DNOW_A | PTA_SSE},
1810 {"athlon-mp", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW
1811 | PTA_3DNOW_A | PTA_SSE},
1812 {"x86-64", PROCESSOR_K8, PTA_MMX | PTA_PREFETCH_SSE | PTA_64BIT
1813 | PTA_SSE | PTA_SSE2 },
1814 {"k8", PROCESSOR_K8, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW | PTA_64BIT
1815 | PTA_3DNOW_A | PTA_SSE | PTA_SSE2},
1816 {"k8-sse3", PROCESSOR_K8, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW | PTA_64BIT
1817 | PTA_3DNOW_A | PTA_SSE | PTA_SSE2
1819 {"opteron", PROCESSOR_K8, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW | PTA_64BIT
1820 | PTA_3DNOW_A | PTA_SSE | PTA_SSE2},
1821 {"opteron-sse3", PROCESSOR_K8, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW | PTA_64BIT
1822 | PTA_3DNOW_A | PTA_SSE | PTA_SSE2
1824 {"athlon64", PROCESSOR_K8, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW | PTA_64BIT
1825 | PTA_3DNOW_A | PTA_SSE | PTA_SSE2},
1826 {"athlon64-sse3", PROCESSOR_K8, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW | PTA_64BIT
1827 | PTA_3DNOW_A | PTA_SSE | PTA_SSE2
1829 {"athlon-fx", PROCESSOR_K8, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW | PTA_64BIT
1830 | PTA_3DNOW_A | PTA_SSE | PTA_SSE2},
1831 {"amdfam10", PROCESSOR_AMDFAM10, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW
1832 | PTA_64BIT | PTA_3DNOW_A | PTA_SSE
1833 | PTA_SSE2 | PTA_SSE3 | PTA_POPCNT
1834 | PTA_ABM | PTA_SSE4A | PTA_CX16},
1835 {"barcelona", PROCESSOR_AMDFAM10, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW
1836 | PTA_64BIT | PTA_3DNOW_A | PTA_SSE
1837 | PTA_SSE2 | PTA_SSE3 | PTA_POPCNT
1838 | PTA_ABM | PTA_SSE4A | PTA_CX16},
1839 {"generic32", PROCESSOR_GENERIC32, 0 /* flags are only used for -march switch. */ },
1840 {"generic64", PROCESSOR_GENERIC64, PTA_64BIT /* flags are only used for -march switch. */ },
1843 int const pta_size = ARRAY_SIZE (processor_alias_table);
1845 #ifdef SUBTARGET_OVERRIDE_OPTIONS
1846 SUBTARGET_OVERRIDE_OPTIONS;
1849 #ifdef SUBSUBTARGET_OVERRIDE_OPTIONS
1850 SUBSUBTARGET_OVERRIDE_OPTIONS;
1853 /* -fPIC is the default for x86_64. */
1854 if (TARGET_MACHO && TARGET_64BIT)
1857 /* Set the default values for switches whose default depends on TARGET_64BIT
1858 in case they weren't overwritten by command line options. */
1861 /* Mach-O doesn't support omitting the frame pointer for now. */
1862 if (flag_omit_frame_pointer == 2)
1863 flag_omit_frame_pointer = (TARGET_MACHO ? 0 : 1);
1864 if (flag_asynchronous_unwind_tables == 2)
1865 flag_asynchronous_unwind_tables = 1;
1866 if (flag_pcc_struct_return == 2)
1867 flag_pcc_struct_return = 0;
1871 if (flag_omit_frame_pointer == 2)
1872 flag_omit_frame_pointer = 0;
1873 if (flag_asynchronous_unwind_tables == 2)
1874 flag_asynchronous_unwind_tables = 0;
1875 if (flag_pcc_struct_return == 2)
1876 flag_pcc_struct_return = DEFAULT_PCC_STRUCT_RETURN;
1879 /* Need to check -mtune=generic first. */
1880 if (ix86_tune_string)
1882 if (!strcmp (ix86_tune_string, "generic")
1883 || !strcmp (ix86_tune_string, "i686")
1884 /* As special support for cross compilers we read -mtune=native
1885 as -mtune=generic. With native compilers we won't see the
1886 -mtune=native, as it was changed by the driver. */
1887 || !strcmp (ix86_tune_string, "native"))
1890 ix86_tune_string = "generic64";
1892 ix86_tune_string = "generic32";
1894 else if (!strncmp (ix86_tune_string, "generic", 7))
1895 error ("bad value (%s) for -mtune= switch", ix86_tune_string);
1899 if (ix86_arch_string)
1900 ix86_tune_string = ix86_arch_string;
1901 if (!ix86_tune_string)
1903 ix86_tune_string = cpu_names [TARGET_CPU_DEFAULT];
1904 ix86_tune_defaulted = 1;
1907 /* ix86_tune_string is set to ix86_arch_string or defaulted. We
1908 need to use a sensible tune option. */
1909 if (!strcmp (ix86_tune_string, "generic")
1910 || !strcmp (ix86_tune_string, "x86-64")
1911 || !strcmp (ix86_tune_string, "i686"))
1914 ix86_tune_string = "generic64";
1916 ix86_tune_string = "generic32";
1919 if (!strcmp (ix86_tune_string, "x86-64"))
1920 warning (OPT_Wdeprecated, "-mtune=x86-64 is deprecated. Use -mtune=k8 or "
1921 "-mtune=generic instead as appropriate.");
1923 if (!ix86_arch_string)
1924 ix86_arch_string = TARGET_64BIT ? "x86-64" : "i486";
1925 if (!strcmp (ix86_arch_string, "generic"))
1926 error ("generic CPU can be used only for -mtune= switch");
1927 if (!strncmp (ix86_arch_string, "generic", 7))
1928 error ("bad value (%s) for -march= switch", ix86_arch_string);
1930 if (ix86_cmodel_string != 0)
1932 if (!strcmp (ix86_cmodel_string, "small"))
1933 ix86_cmodel = flag_pic ? CM_SMALL_PIC : CM_SMALL;
1934 else if (!strcmp (ix86_cmodel_string, "medium"))
1935 ix86_cmodel = flag_pic ? CM_MEDIUM_PIC : CM_MEDIUM;
1937 sorry ("code model %s not supported in PIC mode", ix86_cmodel_string);
1938 else if (!strcmp (ix86_cmodel_string, "32"))
1939 ix86_cmodel = CM_32;
1940 else if (!strcmp (ix86_cmodel_string, "kernel") && !flag_pic)
1941 ix86_cmodel = CM_KERNEL;
1942 else if (!strcmp (ix86_cmodel_string, "large") && !flag_pic)
1943 ix86_cmodel = CM_LARGE;
1945 error ("bad value (%s) for -mcmodel= switch", ix86_cmodel_string);
1949 ix86_cmodel = CM_32;
1951 ix86_cmodel = flag_pic ? CM_SMALL_PIC : CM_SMALL;
1953 if (ix86_asm_string != 0)
1956 && !strcmp (ix86_asm_string, "intel"))
1957 ix86_asm_dialect = ASM_INTEL;
1958 else if (!strcmp (ix86_asm_string, "att"))
1959 ix86_asm_dialect = ASM_ATT;
1961 error ("bad value (%s) for -masm= switch", ix86_asm_string);
1963 if ((TARGET_64BIT == 0) != (ix86_cmodel == CM_32))
1964 error ("code model %qs not supported in the %s bit mode",
1965 ix86_cmodel_string, TARGET_64BIT ? "64" : "32");
1966 if (ix86_cmodel == CM_LARGE)
1967 sorry ("code model %<large%> not supported yet");
1968 if ((TARGET_64BIT != 0) != ((target_flags & MASK_64BIT) != 0))
1969 sorry ("%i-bit mode not compiled in",
1970 (target_flags & MASK_64BIT) ? 64 : 32);
1972 for (i = 0; i < pta_size; i++)
1973 if (! strcmp (ix86_arch_string, processor_alias_table[i].name))
1975 ix86_arch = processor_alias_table[i].processor;
1976 /* Default cpu tuning to the architecture. */
1977 ix86_tune = ix86_arch;
1978 if (processor_alias_table[i].flags & PTA_MMX
1979 && !(target_flags_explicit & MASK_MMX))
1980 target_flags |= MASK_MMX;
1981 if (processor_alias_table[i].flags & PTA_3DNOW
1982 && !(target_flags_explicit & MASK_3DNOW))
1983 target_flags |= MASK_3DNOW;
1984 if (processor_alias_table[i].flags & PTA_3DNOW_A
1985 && !(target_flags_explicit & MASK_3DNOW_A))
1986 target_flags |= MASK_3DNOW_A;
1987 if (processor_alias_table[i].flags & PTA_SSE
1988 && !(target_flags_explicit & MASK_SSE))
1989 target_flags |= MASK_SSE;
1990 if (processor_alias_table[i].flags & PTA_SSE2
1991 && !(target_flags_explicit & MASK_SSE2))
1992 target_flags |= MASK_SSE2;
1993 if (processor_alias_table[i].flags & PTA_SSE3
1994 && !(target_flags_explicit & MASK_SSE3))
1995 target_flags |= MASK_SSE3;
1996 if (processor_alias_table[i].flags & PTA_SSSE3
1997 && !(target_flags_explicit & MASK_SSSE3))
1998 target_flags |= MASK_SSSE3;
1999 if (processor_alias_table[i].flags & PTA_PREFETCH_SSE)
2000 x86_prefetch_sse = true;
2001 if (processor_alias_table[i].flags & PTA_CX16)
2002 x86_cmpxchg16b = true;
2003 if (processor_alias_table[i].flags & PTA_POPCNT
2004 && !(target_flags_explicit & MASK_POPCNT))
2005 target_flags |= MASK_POPCNT;
2006 if (processor_alias_table[i].flags & PTA_ABM
2007 && !(target_flags_explicit & MASK_ABM))
2008 target_flags |= MASK_ABM;
2009 if (processor_alias_table[i].flags & PTA_SSE4A
2010 && !(target_flags_explicit & MASK_SSE4A))
2011 target_flags |= MASK_SSE4A;
2012 if (TARGET_64BIT && !(processor_alias_table[i].flags & PTA_64BIT))
2013 error ("CPU you selected does not support x86-64 "
2019 error ("bad value (%s) for -march= switch", ix86_arch_string);
2021 for (i = 0; i < pta_size; i++)
2022 if (! strcmp (ix86_tune_string, processor_alias_table[i].name))
2024 ix86_tune = processor_alias_table[i].processor;
2025 if (TARGET_64BIT && !(processor_alias_table[i].flags & PTA_64BIT))
2027 if (ix86_tune_defaulted)
2029 ix86_tune_string = "x86-64";
2030 for (i = 0; i < pta_size; i++)
2031 if (! strcmp (ix86_tune_string,
2032 processor_alias_table[i].name))
2034 ix86_tune = processor_alias_table[i].processor;
2037 error ("CPU you selected does not support x86-64 "
2040 /* Intel CPUs have always interpreted SSE prefetch instructions as
2041 NOPs; so, we can enable SSE prefetch instructions even when
2042 -mtune (rather than -march) points us to a processor that has them.
2043 However, the VIA C3 gives a SIGILL, so we only do that for i686 and
2044 higher processors. */
2045 if (TARGET_CMOVE && (processor_alias_table[i].flags & PTA_PREFETCH_SSE))
2046 x86_prefetch_sse = true;
2050 error ("bad value (%s) for -mtune= switch", ix86_tune_string);
2053 ix86_cost = &size_cost;
2055 ix86_cost = processor_target_table[ix86_tune].cost;
2056 target_flags |= processor_target_table[ix86_tune].target_enable;
2057 target_flags &= ~processor_target_table[ix86_tune].target_disable;
2059 /* Arrange to set up i386_stack_locals for all functions. */
2060 init_machine_status = ix86_init_machine_status;
2062 /* Validate -mregparm= value. */
2063 if (ix86_regparm_string)
2065 i = atoi (ix86_regparm_string);
2066 if (i < 0 || i > REGPARM_MAX)
2067 error ("-mregparm=%d is not between 0 and %d", i, REGPARM_MAX);
2073 ix86_regparm = REGPARM_MAX;
2075 /* If the user has provided any of the -malign-* options,
2076 warn and use that value only if -falign-* is not set.
2077 Remove this code in GCC 3.2 or later. */
2078 if (ix86_align_loops_string)
2080 warning (0, "-malign-loops is obsolete, use -falign-loops");
2081 if (align_loops == 0)
2083 i = atoi (ix86_align_loops_string);
2084 if (i < 0 || i > MAX_CODE_ALIGN)
2085 error ("-malign-loops=%d is not between 0 and %d", i, MAX_CODE_ALIGN);
2087 align_loops = 1 << i;
2091 if (ix86_align_jumps_string)
2093 warning (0, "-malign-jumps is obsolete, use -falign-jumps");
2094 if (align_jumps == 0)
2096 i = atoi (ix86_align_jumps_string);
2097 if (i < 0 || i > MAX_CODE_ALIGN)
2098 error ("-malign-loops=%d is not between 0 and %d", i, MAX_CODE_ALIGN);
2100 align_jumps = 1 << i;
2104 if (ix86_align_funcs_string)
2106 warning (0, "-malign-functions is obsolete, use -falign-functions");
2107 if (align_functions == 0)
2109 i = atoi (ix86_align_funcs_string);
2110 if (i < 0 || i > MAX_CODE_ALIGN)
2111 error ("-malign-loops=%d is not between 0 and %d", i, MAX_CODE_ALIGN);
2113 align_functions = 1 << i;
2117 /* Default align_* from the processor table. */
2118 if (align_loops == 0)
2120 align_loops = processor_target_table[ix86_tune].align_loop;
2121 align_loops_max_skip = processor_target_table[ix86_tune].align_loop_max_skip;
2123 if (align_jumps == 0)
2125 align_jumps = processor_target_table[ix86_tune].align_jump;
2126 align_jumps_max_skip = processor_target_table[ix86_tune].align_jump_max_skip;
2128 if (align_functions == 0)
2130 align_functions = processor_target_table[ix86_tune].align_func;
2133 /* Validate -mbranch-cost= value, or provide default. */
2134 ix86_branch_cost = ix86_cost->branch_cost;
2135 if (ix86_branch_cost_string)
2137 i = atoi (ix86_branch_cost_string);
2139 error ("-mbranch-cost=%d is not between 0 and 5", i);
2141 ix86_branch_cost = i;
2143 if (ix86_section_threshold_string)
2145 i = atoi (ix86_section_threshold_string);
2147 error ("-mlarge-data-threshold=%d is negative", i);
2149 ix86_section_threshold = i;
2152 if (ix86_tls_dialect_string)
2154 if (strcmp (ix86_tls_dialect_string, "gnu") == 0)
2155 ix86_tls_dialect = TLS_DIALECT_GNU;
2156 else if (strcmp (ix86_tls_dialect_string, "gnu2") == 0)
2157 ix86_tls_dialect = TLS_DIALECT_GNU2;
2158 else if (strcmp (ix86_tls_dialect_string, "sun") == 0)
2159 ix86_tls_dialect = TLS_DIALECT_SUN;
2161 error ("bad value (%s) for -mtls-dialect= switch",
2162 ix86_tls_dialect_string);
2165 /* Keep nonleaf frame pointers. */
2166 if (flag_omit_frame_pointer)
2167 target_flags &= ~MASK_OMIT_LEAF_FRAME_POINTER;
2168 else if (TARGET_OMIT_LEAF_FRAME_POINTER)
2169 flag_omit_frame_pointer = 1;
2171 /* If we're doing fast math, we don't care about comparison order
2172 wrt NaNs. This lets us use a shorter comparison sequence. */
2173 if (flag_finite_math_only)
2174 target_flags &= ~MASK_IEEE_FP;
2176 /* If the architecture always has an FPU, turn off NO_FANCY_MATH_387,
2177 since the insns won't need emulation. */
2178 if (x86_arch_always_fancy_math_387 & (1 << ix86_arch))
2179 target_flags &= ~MASK_NO_FANCY_MATH_387;
2181 /* Likewise, if the target doesn't have a 387, or we've specified
2182 software floating point, don't use 387 inline intrinsics. */
2184 target_flags |= MASK_NO_FANCY_MATH_387;
2186 /* Turn on SSE3 builtins for -mssse3. */
2188 target_flags |= MASK_SSE3;
2190 /* Turn on SSE3 builtins for -msse4a. */
2192 target_flags |= MASK_SSE3;
2194 /* Turn on SSE2 builtins for -msse3. */
2196 target_flags |= MASK_SSE2;
2198 /* Turn on SSE2 builtins for -maes. */
2200 target_flags |= MASK_SSE2;
2202 /* Turn on SSE builtins for -msse2. */
2204 target_flags |= MASK_SSE;
2206 /* Turn on MMX builtins for -msse. */
2209 target_flags |= MASK_MMX & ~target_flags_explicit;
2210 x86_prefetch_sse = true;
2213 /* Turn on MMX builtins for 3Dnow. */
2215 target_flags |= MASK_MMX;
2217 /* Turn on POPCNT builtins for -mabm. */
2219 target_flags |= MASK_POPCNT;
2223 if (TARGET_ALIGN_DOUBLE)
2224 error ("-malign-double makes no sense in the 64bit mode");
2226 error ("-mrtd calling convention not supported in the 64bit mode");
2228 /* Enable by default the SSE and MMX builtins. Do allow the user to
2229 explicitly disable any of these. In particular, disabling SSE and
2230 MMX for kernel code is extremely useful. */
2232 |= ((MASK_SSE2 | MASK_SSE | MASK_MMX | MASK_128BIT_LONG_DOUBLE)
2233 & ~target_flags_explicit);
2237 /* i386 ABI does not specify red zone. It still makes sense to use it
2238 when programmer takes care to stack from being destroyed. */
2239 if (!(target_flags_explicit & MASK_NO_RED_ZONE))
2240 target_flags |= MASK_NO_RED_ZONE;
2243 /* Validate -mpreferred-stack-boundary= value, or provide default.
2244 The default of 128 bits is for Pentium III's SSE __m128. We can't
2245 change it because of optimize_size. Otherwise, we can't mix object
2246 files compiled with -Os and -On. */
2247 ix86_preferred_stack_boundary = 128;
2248 if (ix86_preferred_stack_boundary_string)
2250 i = atoi (ix86_preferred_stack_boundary_string);
2251 if (i < (TARGET_64BIT ? 4 : 2) || i > 12)
2252 error ("-mpreferred-stack-boundary=%d is not between %d and 12", i,
2253 TARGET_64BIT ? 4 : 2);
2255 ix86_preferred_stack_boundary = (1 << i) * BITS_PER_UNIT;
2258 /* Accept -msseregparm only if at least SSE support is enabled. */
2259 if (TARGET_SSEREGPARM
2261 error ("-msseregparm used without SSE enabled");
2263 ix86_fpmath = TARGET_FPMATH_DEFAULT;
2265 if (ix86_fpmath_string != 0)
2267 if (! strcmp (ix86_fpmath_string, "387"))
2268 ix86_fpmath = FPMATH_387;
2269 else if (! strcmp (ix86_fpmath_string, "sse"))
2273 warning (0, "SSE instruction set disabled, using 387 arithmetics");
2274 ix86_fpmath = FPMATH_387;
2277 ix86_fpmath = FPMATH_SSE;
2279 else if (! strcmp (ix86_fpmath_string, "387,sse")
2280 || ! strcmp (ix86_fpmath_string, "sse,387"))
2284 warning (0, "SSE instruction set disabled, using 387 arithmetics");
2285 ix86_fpmath = FPMATH_387;
2287 else if (!TARGET_80387)
2289 warning (0, "387 instruction set disabled, using SSE arithmetics");
2290 ix86_fpmath = FPMATH_SSE;
2293 ix86_fpmath = FPMATH_SSE | FPMATH_387;
2296 error ("bad value (%s) for -mfpmath= switch", ix86_fpmath_string);
2299 /* If the i387 is disabled, then do not return values in it. */
2301 target_flags &= ~MASK_FLOAT_RETURNS;
2303 if ((x86_accumulate_outgoing_args & TUNEMASK)
2304 && !(target_flags_explicit & MASK_ACCUMULATE_OUTGOING_ARGS)
2306 target_flags |= MASK_ACCUMULATE_OUTGOING_ARGS;
2308 /* ??? Unwind info is not correct around the CFG unless either a frame
2309 pointer is present or M_A_O_A is set. Fixing this requires rewriting
2310 unwind info generation to be aware of the CFG and propagating states
2312 if ((flag_unwind_tables || flag_asynchronous_unwind_tables
2313 || flag_exceptions || flag_non_call_exceptions)
2314 && flag_omit_frame_pointer
2315 && !(target_flags & MASK_ACCUMULATE_OUTGOING_ARGS))
2317 if (target_flags_explicit & MASK_ACCUMULATE_OUTGOING_ARGS)
2318 warning (0, "unwind tables currently require either a frame pointer "
2319 "or -maccumulate-outgoing-args for correctness");
2320 target_flags |= MASK_ACCUMULATE_OUTGOING_ARGS;
2323 /* Figure out what ASM_GENERATE_INTERNAL_LABEL builds as a prefix. */
2326 ASM_GENERATE_INTERNAL_LABEL (internal_label_prefix, "LX", 0);
2327 p = strchr (internal_label_prefix, 'X');
2328 internal_label_prefix_len = p - internal_label_prefix;
2332 /* When scheduling description is not available, disable scheduler pass
2333 so it won't slow down the compilation and make x87 code slower. */
2334 if (!TARGET_SCHEDULE)
2335 flag_schedule_insns_after_reload = flag_schedule_insns = 0;
2338 /* switch to the appropriate section for output of DECL.
2339 DECL is either a `VAR_DECL' node or a constant of some sort.
2340 RELOC indicates whether forming the initial value of DECL requires
2341 link-time relocations. */
2344 x86_64_elf_select_section (tree decl, int reloc,
2345 unsigned HOST_WIDE_INT align)
2347 if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC)
2348 && ix86_in_large_data_p (decl))
2350 const char *sname = NULL;
2351 unsigned int flags = SECTION_WRITE;
2352 switch (categorize_decl_for_section (decl, reloc))
2357 case SECCAT_DATA_REL:
2358 sname = ".ldata.rel";
2360 case SECCAT_DATA_REL_LOCAL:
2361 sname = ".ldata.rel.local";
2363 case SECCAT_DATA_REL_RO:
2364 sname = ".ldata.rel.ro";
2366 case SECCAT_DATA_REL_RO_LOCAL:
2367 sname = ".ldata.rel.ro.local";
2371 flags |= SECTION_BSS;
2374 case SECCAT_RODATA_MERGE_STR:
2375 case SECCAT_RODATA_MERGE_STR_INIT:
2376 case SECCAT_RODATA_MERGE_CONST:
2380 case SECCAT_SRODATA:
2387 /* We don't split these for medium model. Place them into
2388 default sections and hope for best. */
2393 /* We might get called with string constants, but get_named_section
2394 doesn't like them as they are not DECLs. Also, we need to set
2395 flags in that case. */
2397 return get_section (sname, flags, NULL);
2398 return get_named_section (decl, sname, reloc);
2401 return default_elf_select_section (decl, reloc, align);
2404 /* Build up a unique section name, expressed as a
2405 STRING_CST node, and assign it to DECL_SECTION_NAME (decl).
2406 RELOC indicates whether the initial value of EXP requires
2407 link-time relocations. */
2410 x86_64_elf_unique_section (tree decl, int reloc)
2412 if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC)
2413 && ix86_in_large_data_p (decl))
2415 const char *prefix = NULL;
2416 /* We only need to use .gnu.linkonce if we don't have COMDAT groups. */
2417 bool one_only = DECL_ONE_ONLY (decl) && !HAVE_COMDAT_GROUP;
2419 switch (categorize_decl_for_section (decl, reloc))
2422 case SECCAT_DATA_REL:
2423 case SECCAT_DATA_REL_LOCAL:
2424 case SECCAT_DATA_REL_RO:
2425 case SECCAT_DATA_REL_RO_LOCAL:
2426 prefix = one_only ? ".gnu.linkonce.ld." : ".ldata.";
2429 prefix = one_only ? ".gnu.linkonce.lb." : ".lbss.";
2432 case SECCAT_RODATA_MERGE_STR:
2433 case SECCAT_RODATA_MERGE_STR_INIT:
2434 case SECCAT_RODATA_MERGE_CONST:
2435 prefix = one_only ? ".gnu.linkonce.lr." : ".lrodata.";
2437 case SECCAT_SRODATA:
2444 /* We don't split these for medium model. Place them into
2445 default sections and hope for best. */
2453 plen = strlen (prefix);
2455 name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl));
2456 name = targetm.strip_name_encoding (name);
2457 nlen = strlen (name);
2459 string = alloca (nlen + plen + 1);
2460 memcpy (string, prefix, plen);
2461 memcpy (string + plen, name, nlen + 1);
2463 DECL_SECTION_NAME (decl) = build_string (nlen + plen, string);
2467 default_unique_section (decl, reloc);
2470 #ifdef COMMON_ASM_OP
2471 /* This says how to output assembler code to declare an
2472 uninitialized external linkage data object.
2474 For medium model x86-64 we need to use .largecomm opcode for
2477 x86_elf_aligned_common (FILE *file,
2478 const char *name, unsigned HOST_WIDE_INT size,
2481 if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC)
2482 && size > (unsigned int)ix86_section_threshold)
2483 fprintf (file, ".largecomm\t");
2485 fprintf (file, "%s", COMMON_ASM_OP);
2486 assemble_name (file, name);
2487 fprintf (file, ","HOST_WIDE_INT_PRINT_UNSIGNED",%u\n",
2488 size, align / BITS_PER_UNIT);
2491 /* Utility function for targets to use in implementing
2492 ASM_OUTPUT_ALIGNED_BSS. */
2495 x86_output_aligned_bss (FILE *file, tree decl ATTRIBUTE_UNUSED,
2496 const char *name, unsigned HOST_WIDE_INT size,
2499 if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC)
2500 && size > (unsigned int)ix86_section_threshold)
2501 switch_to_section (get_named_section (decl, ".lbss", 0));
2503 switch_to_section (bss_section);
2504 ASM_OUTPUT_ALIGN (file, floor_log2 (align / BITS_PER_UNIT));
2505 #ifdef ASM_DECLARE_OBJECT_NAME
2506 last_assemble_variable_decl = decl;
2507 ASM_DECLARE_OBJECT_NAME (file, name, decl);
2509 /* Standard thing is just output label for the object. */
2510 ASM_OUTPUT_LABEL (file, name);
2511 #endif /* ASM_DECLARE_OBJECT_NAME */
2512 ASM_OUTPUT_SKIP (file, size ? size : 1);
2517 optimization_options (int level, int size ATTRIBUTE_UNUSED)
2519 /* For -O2 and beyond, turn off -fschedule-insns by default. It tends to
2520 make the problem with not enough registers even worse. */
2521 #ifdef INSN_SCHEDULING
2523 flag_schedule_insns = 0;
2527 /* The Darwin libraries never set errno, so we might as well
2528 avoid calling them when that's the only reason we would. */
2529 flag_errno_math = 0;
2531 /* The default values of these switches depend on the TARGET_64BIT
2532 that is not known at this moment. Mark these values with 2 and
2533 let user the to override these. In case there is no command line option
2534 specifying them, we will set the defaults in override_options. */
2536 flag_omit_frame_pointer = 2;
2537 flag_pcc_struct_return = 2;
2538 flag_asynchronous_unwind_tables = 2;
2539 #ifdef SUBTARGET_OPTIMIZATION_OPTIONS
2540 SUBTARGET_OPTIMIZATION_OPTIONS;
2544 /* Table of valid machine attributes. */
2545 const struct attribute_spec ix86_attribute_table[] =
2547 /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler } */
2548 /* Stdcall attribute says callee is responsible for popping arguments
2549 if they are not variable. */
2550 { "stdcall", 0, 0, false, true, true, ix86_handle_cconv_attribute },
2551 /* Fastcall attribute says callee is responsible for popping arguments
2552 if they are not variable. */
2553 { "fastcall", 0, 0, false, true, true, ix86_handle_cconv_attribute },
2554 /* Cdecl attribute says the callee is a normal C declaration */
2555 { "cdecl", 0, 0, false, true, true, ix86_handle_cconv_attribute },
2556 /* Regparm attribute specifies how many integer arguments are to be
2557 passed in registers. */
2558 { "regparm", 1, 1, false, true, true, ix86_handle_cconv_attribute },
2559 /* Sseregparm attribute says we are using x86_64 calling conventions
2560 for FP arguments. */
2561 { "sseregparm", 0, 0, false, true, true, ix86_handle_cconv_attribute },
2562 /* force_align_arg_pointer says this function realigns the stack at entry. */
2563 { (const char *)&ix86_force_align_arg_pointer_string, 0, 0,
2564 false, true, true, ix86_handle_cconv_attribute },
2565 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
2566 { "dllimport", 0, 0, false, false, false, handle_dll_attribute },
2567 { "dllexport", 0, 0, false, false, false, handle_dll_attribute },
2568 { "shared", 0, 0, true, false, false, ix86_handle_shared_attribute },
2570 { "ms_struct", 0, 0, false, false, false, ix86_handle_struct_attribute },
2571 { "gcc_struct", 0, 0, false, false, false, ix86_handle_struct_attribute },
2572 #ifdef SUBTARGET_ATTRIBUTE_TABLE
2573 SUBTARGET_ATTRIBUTE_TABLE,
2575 { NULL, 0, 0, false, false, false, NULL }
2578 /* Decide whether we can make a sibling call to a function. DECL is the
2579 declaration of the function being targeted by the call and EXP is the
2580 CALL_EXPR representing the call. */
2583 ix86_function_ok_for_sibcall (tree decl, tree exp)
2588 /* If we are generating position-independent code, we cannot sibcall
2589 optimize any indirect call, or a direct call to a global function,
2590 as the PLT requires %ebx be live. */
2591 if (!TARGET_64BIT && flag_pic && (!decl || !targetm.binds_local_p (decl)))
2598 func = TREE_TYPE (TREE_OPERAND (exp, 0));
2599 if (POINTER_TYPE_P (func))
2600 func = TREE_TYPE (func);
2603 /* Check that the return value locations are the same. Like
2604 if we are returning floats on the 80387 register stack, we cannot
2605 make a sibcall from a function that doesn't return a float to a
2606 function that does or, conversely, from a function that does return
2607 a float to a function that doesn't; the necessary stack adjustment
2608 would not be executed. This is also the place we notice
2609 differences in the return value ABI. Note that it is ok for one
2610 of the functions to have void return type as long as the return
2611 value of the other is passed in a register. */
2612 a = ix86_function_value (TREE_TYPE (exp), func, false);
2613 b = ix86_function_value (TREE_TYPE (DECL_RESULT (cfun->decl)),
2615 if (STACK_REG_P (a) || STACK_REG_P (b))
2617 if (!rtx_equal_p (a, b))
2620 else if (VOID_TYPE_P (TREE_TYPE (DECL_RESULT (cfun->decl))))
2622 else if (!rtx_equal_p (a, b))
2625 /* If this call is indirect, we'll need to be able to use a call-clobbered
2626 register for the address of the target function. Make sure that all
2627 such registers are not used for passing parameters. */
2628 if (!decl && !TARGET_64BIT)
2632 /* We're looking at the CALL_EXPR, we need the type of the function. */
2633 type = TREE_OPERAND (exp, 0); /* pointer expression */
2634 type = TREE_TYPE (type); /* pointer type */
2635 type = TREE_TYPE (type); /* function type */
2637 if (ix86_function_regparm (type, NULL) >= 3)
2639 /* ??? Need to count the actual number of registers to be used,
2640 not the possible number of registers. Fix later. */
2645 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
2646 /* Dllimport'd functions are also called indirectly. */
2647 if (decl && DECL_DLLIMPORT_P (decl)
2648 && ix86_function_regparm (TREE_TYPE (decl), NULL) >= 3)
2652 /* If we forced aligned the stack, then sibcalling would unalign the
2653 stack, which may break the called function. */
2654 if (cfun->machine->force_align_arg_pointer)
2657 /* Otherwise okay. That also includes certain types of indirect calls. */
2661 /* Handle "cdecl", "stdcall", "fastcall", "regparm" and "sseregparm"
2662 calling convention attributes;
2663 arguments as in struct attribute_spec.handler. */
2666 ix86_handle_cconv_attribute (tree *node, tree name,
2668 int flags ATTRIBUTE_UNUSED,
2671 if (TREE_CODE (*node) != FUNCTION_TYPE
2672 && TREE_CODE (*node) != METHOD_TYPE
2673 && TREE_CODE (*node) != FIELD_DECL
2674 && TREE_CODE (*node) != TYPE_DECL)
2676 warning (OPT_Wattributes, "%qs attribute only applies to functions",
2677 IDENTIFIER_POINTER (name));
2678 *no_add_attrs = true;
2682 /* Can combine regparm with all attributes but fastcall. */
2683 if (is_attribute_p ("regparm", name))
2687 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
2689 error ("fastcall and regparm attributes are not compatible");
2692 cst = TREE_VALUE (args);
2693 if (TREE_CODE (cst) != INTEGER_CST)
2695 warning (OPT_Wattributes,
2696 "%qs attribute requires an integer constant argument",
2697 IDENTIFIER_POINTER (name));
2698 *no_add_attrs = true;
2700 else if (compare_tree_int (cst, REGPARM_MAX) > 0)
2702 warning (OPT_Wattributes, "argument to %qs attribute larger than %d",
2703 IDENTIFIER_POINTER (name), REGPARM_MAX);
2704 *no_add_attrs = true;
2708 && lookup_attribute (ix86_force_align_arg_pointer_string,
2709 TYPE_ATTRIBUTES (*node))
2710 && compare_tree_int (cst, REGPARM_MAX-1))
2712 error ("%s functions limited to %d register parameters",
2713 ix86_force_align_arg_pointer_string, REGPARM_MAX-1);
2721 warning (OPT_Wattributes, "%qs attribute ignored",
2722 IDENTIFIER_POINTER (name));
2723 *no_add_attrs = true;
2727 /* Can combine fastcall with stdcall (redundant) and sseregparm. */
2728 if (is_attribute_p ("fastcall", name))
2730 if (lookup_attribute ("cdecl", TYPE_ATTRIBUTES (*node)))
2732 error ("fastcall and cdecl attributes are not compatible");
2734 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (*node)))
2736 error ("fastcall and stdcall attributes are not compatible");
2738 if (lookup_attribute ("regparm", TYPE_ATTRIBUTES (*node)))
2740 error ("fastcall and regparm attributes are not compatible");
2744 /* Can combine stdcall with fastcall (redundant), regparm and
2746 else if (is_attribute_p ("stdcall", name))
2748 if (lookup_attribute ("cdecl", TYPE_ATTRIBUTES (*node)))
2750 error ("stdcall and cdecl attributes are not compatible");
2752 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
2754 error ("stdcall and fastcall attributes are not compatible");
2758 /* Can combine cdecl with regparm and sseregparm. */
2759 else if (is_attribute_p ("cdecl", name))
2761 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (*node)))
2763 error ("stdcall and cdecl attributes are not compatible");
2765 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
2767 error ("fastcall and cdecl attributes are not compatible");
2771 /* Can combine sseregparm with all attributes. */
2776 /* Return 0 if the attributes for two types are incompatible, 1 if they
2777 are compatible, and 2 if they are nearly compatible (which causes a
2778 warning to be generated). */
2781 ix86_comp_type_attributes (tree type1, tree type2)
2783 /* Check for mismatch of non-default calling convention. */
2784 const char *const rtdstr = TARGET_RTD ? "cdecl" : "stdcall";
2786 if (TREE_CODE (type1) != FUNCTION_TYPE)
2789 /* Check for mismatched fastcall/regparm types. */
2790 if ((!lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type1))
2791 != !lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type2)))
2792 || (ix86_function_regparm (type1, NULL)
2793 != ix86_function_regparm (type2, NULL)))
2796 /* Check for mismatched sseregparm types. */
2797 if (!lookup_attribute ("sseregparm", TYPE_ATTRIBUTES (type1))
2798 != !lookup_attribute ("sseregparm", TYPE_ATTRIBUTES (type2)))
2801 /* Check for mismatched return types (cdecl vs stdcall). */
2802 if (!lookup_attribute (rtdstr, TYPE_ATTRIBUTES (type1))
2803 != !lookup_attribute (rtdstr, TYPE_ATTRIBUTES (type2)))
2809 /* Return the regparm value for a function with the indicated TYPE and DECL.
2810 DECL may be NULL when calling function indirectly
2811 or considering a libcall. */
2814 ix86_function_regparm (tree type, tree decl)
2817 int regparm = ix86_regparm;
2818 bool user_convention = false;
2822 attr = lookup_attribute ("regparm", TYPE_ATTRIBUTES (type));
2825 regparm = TREE_INT_CST_LOW (TREE_VALUE (TREE_VALUE (attr)));
2826 user_convention = true;
2829 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type)))
2832 user_convention = true;
2835 /* Use register calling convention for local functions when possible. */
2836 if (!TARGET_64BIT && !user_convention && decl
2837 && flag_unit_at_a_time && !profile_flag)
2839 struct cgraph_local_info *i = cgraph_local_info (decl);
2842 int local_regparm, globals = 0, regno;
2844 /* Make sure no regparm register is taken by a global register
2846 for (local_regparm = 0; local_regparm < 3; local_regparm++)
2847 if (global_regs[local_regparm])
2849 /* We can't use regparm(3) for nested functions as these use
2850 static chain pointer in third argument. */
2851 if (local_regparm == 3
2852 && decl_function_context (decl)
2853 && !DECL_NO_STATIC_CHAIN (decl))
2855 /* If the function realigns its stackpointer, the
2856 prologue will clobber %ecx. If we've already
2857 generated code for the callee, the callee
2858 DECL_STRUCT_FUNCTION is gone, so we fall back to
2859 scanning the attributes for the self-realigning
2861 if ((DECL_STRUCT_FUNCTION (decl)
2862 && DECL_STRUCT_FUNCTION (decl)->machine->force_align_arg_pointer)
2863 || (!DECL_STRUCT_FUNCTION (decl)
2864 && lookup_attribute (ix86_force_align_arg_pointer_string,
2865 TYPE_ATTRIBUTES (TREE_TYPE (decl)))))
2867 /* Each global register variable increases register preassure,
2868 so the more global reg vars there are, the smaller regparm
2869 optimization use, unless requested by the user explicitly. */
2870 for (regno = 0; regno < 6; regno++)
2871 if (global_regs[regno])
2874 = globals < local_regparm ? local_regparm - globals : 0;
2876 if (local_regparm > regparm)
2877 regparm = local_regparm;
2884 /* Return 1 or 2, if we can pass up to SSE_REGPARM_MAX SFmode (1) and
2885 DFmode (2) arguments in SSE registers for a function with the
2886 indicated TYPE and DECL. DECL may be NULL when calling function
2887 indirectly or considering a libcall. Otherwise return 0. */
2890 ix86_function_sseregparm (tree type, tree decl)
2892 /* Use SSE registers to pass SFmode and DFmode arguments if requested
2893 by the sseregparm attribute. */
2894 if (TARGET_SSEREGPARM
2896 && lookup_attribute ("sseregparm", TYPE_ATTRIBUTES (type))))
2901 error ("Calling %qD with attribute sseregparm without "
2902 "SSE/SSE2 enabled", decl);
2904 error ("Calling %qT with attribute sseregparm without "
2905 "SSE/SSE2 enabled", type);
2912 /* For local functions, pass up to SSE_REGPARM_MAX SFmode
2913 (and DFmode for SSE2) arguments in SSE registers,
2914 even for 32-bit targets. */
2915 if (!TARGET_64BIT && decl
2916 && TARGET_SSE_MATH && flag_unit_at_a_time && !profile_flag)
2918 struct cgraph_local_info *i = cgraph_local_info (decl);
2920 return TARGET_SSE2 ? 2 : 1;
2926 /* Return true if EAX is live at the start of the function. Used by
2927 ix86_expand_prologue to determine if we need special help before
2928 calling allocate_stack_worker. */
2931 ix86_eax_live_at_start_p (void)
2933 /* Cheat. Don't bother working forward from ix86_function_regparm
2934 to the function type to whether an actual argument is located in
2935 eax. Instead just look at cfg info, which is still close enough
2936 to correct at this point. This gives false positives for broken
2937 functions that might use uninitialized data that happens to be
2938 allocated in eax, but who cares? */
2939 return REGNO_REG_SET_P (ENTRY_BLOCK_PTR->il.rtl->global_live_at_end, 0);
2942 /* Value is the number of bytes of arguments automatically
2943 popped when returning from a subroutine call.
2944 FUNDECL is the declaration node of the function (as a tree),
2945 FUNTYPE is the data type of the function (as a tree),
2946 or for a library call it is an identifier node for the subroutine name.
2947 SIZE is the number of bytes of arguments passed on the stack.
2949 On the 80386, the RTD insn may be used to pop them if the number
2950 of args is fixed, but if the number is variable then the caller
2951 must pop them all. RTD can't be used for library calls now
2952 because the library is compiled with the Unix compiler.
2953 Use of RTD is a selectable option, since it is incompatible with
2954 standard Unix calling sequences. If the option is not selected,
2955 the caller must always pop the args.
2957 The attribute stdcall is equivalent to RTD on a per module basis. */
2960 ix86_return_pops_args (tree fundecl, tree funtype, int size)
2962 int rtd = TARGET_RTD && (!fundecl || TREE_CODE (fundecl) != IDENTIFIER_NODE);
2964 /* Cdecl functions override -mrtd, and never pop the stack. */
2965 if (! lookup_attribute ("cdecl", TYPE_ATTRIBUTES (funtype))) {
2967 /* Stdcall and fastcall functions will pop the stack if not
2969 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (funtype))
2970 || lookup_attribute ("fastcall", TYPE_ATTRIBUTES (funtype)))
2974 && (TYPE_ARG_TYPES (funtype) == NULL_TREE
2975 || (TREE_VALUE (tree_last (TYPE_ARG_TYPES (funtype)))
2976 == void_type_node)))
2980 /* Lose any fake structure return argument if it is passed on the stack. */
2981 if (aggregate_value_p (TREE_TYPE (funtype), fundecl)
2983 && !KEEP_AGGREGATE_RETURN_POINTER)
2985 int nregs = ix86_function_regparm (funtype, fundecl);
2988 return GET_MODE_SIZE (Pmode);
2994 /* Argument support functions. */
2996 /* Return true when register may be used to pass function parameters. */
2998 ix86_function_arg_regno_p (int regno)
3004 return (regno < REGPARM_MAX
3005 || (TARGET_SSE && SSE_REGNO_P (regno) && !fixed_regs[regno]));
3007 return (regno < REGPARM_MAX
3008 || (TARGET_MMX && MMX_REGNO_P (regno)
3009 && (regno < FIRST_MMX_REG + MMX_REGPARM_MAX))
3010 || (TARGET_SSE && SSE_REGNO_P (regno)
3011 && (regno < FIRST_SSE_REG + SSE_REGPARM_MAX)));
3016 if (SSE_REGNO_P (regno) && TARGET_SSE)
3021 if (TARGET_SSE && SSE_REGNO_P (regno)
3022 && (regno < FIRST_SSE_REG + SSE_REGPARM_MAX))
3025 /* RAX is used as hidden argument to va_arg functions. */
3028 for (i = 0; i < REGPARM_MAX; i++)
3029 if (regno == x86_64_int_parameter_registers[i])
3034 /* Return if we do not know how to pass TYPE solely in registers. */
3037 ix86_must_pass_in_stack (enum machine_mode mode, tree type)
3039 if (must_pass_in_stack_var_size_or_pad (mode, type))
3042 /* For 32-bit, we want TImode aggregates to go on the stack. But watch out!
3043 The layout_type routine is crafty and tries to trick us into passing
3044 currently unsupported vector types on the stack by using TImode. */
3045 return (!TARGET_64BIT && mode == TImode
3046 && type && TREE_CODE (type) != VECTOR_TYPE);
3049 /* Initialize a variable CUM of type CUMULATIVE_ARGS
3050 for a call to a function whose data type is FNTYPE.
3051 For a library call, FNTYPE is 0. */
3054 init_cumulative_args (CUMULATIVE_ARGS *cum, /* Argument info to initialize */
3055 tree fntype, /* tree ptr for function decl */
3056 rtx libname, /* SYMBOL_REF of library name or 0 */
3059 static CUMULATIVE_ARGS zero_cum;
3060 tree param, next_param;
3062 if (TARGET_DEBUG_ARG)
3064 fprintf (stderr, "\ninit_cumulative_args (");
3066 fprintf (stderr, "fntype code = %s, ret code = %s",
3067 tree_code_name[(int) TREE_CODE (fntype)],
3068 tree_code_name[(int) TREE_CODE (TREE_TYPE (fntype))]);
3070 fprintf (stderr, "no fntype");
3073 fprintf (stderr, ", libname = %s", XSTR (libname, 0));
3078 /* Set up the number of registers to use for passing arguments. */
3079 cum->nregs = ix86_regparm;
3081 cum->sse_nregs = SSE_REGPARM_MAX;
3083 cum->mmx_nregs = MMX_REGPARM_MAX;
3084 cum->warn_sse = true;
3085 cum->warn_mmx = true;
3086 cum->maybe_vaarg = false;
3088 /* Use ecx and edx registers if function has fastcall attribute,
3089 else look for regparm information. */
3090 if (fntype && !TARGET_64BIT)
3092 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (fntype)))
3098 cum->nregs = ix86_function_regparm (fntype, fndecl);
3101 /* Set up the number of SSE registers used for passing SFmode
3102 and DFmode arguments. Warn for mismatching ABI. */
3103 cum->float_in_sse = ix86_function_sseregparm (fntype, fndecl);
3105 /* Determine if this function has variable arguments. This is
3106 indicated by the last argument being 'void_type_mode' if there
3107 are no variable arguments. If there are variable arguments, then
3108 we won't pass anything in registers in 32-bit mode. */
3110 if (cum->nregs || cum->mmx_nregs || cum->sse_nregs)
3112 for (param = (fntype) ? TYPE_ARG_TYPES (fntype) : 0;
3113 param != 0; param = next_param)
3115 next_param = TREE_CHAIN (param);
3116 if (next_param == 0 && TREE_VALUE (param) != void_type_node)
3126 cum->float_in_sse = 0;
3128 cum->maybe_vaarg = true;
3132 if ((!fntype && !libname)
3133 || (fntype && !TYPE_ARG_TYPES (fntype)))
3134 cum->maybe_vaarg = true;
3136 if (TARGET_DEBUG_ARG)
3137 fprintf (stderr, ", nregs=%d )\n", cum->nregs);
3142 /* Return the "natural" mode for TYPE. In most cases, this is just TYPE_MODE.
3143 But in the case of vector types, it is some vector mode.
3145 When we have only some of our vector isa extensions enabled, then there
3146 are some modes for which vector_mode_supported_p is false. For these
3147 modes, the generic vector support in gcc will choose some non-vector mode
3148 in order to implement the type. By computing the natural mode, we'll
3149 select the proper ABI location for the operand and not depend on whatever
3150 the middle-end decides to do with these vector types. */
3152 static enum machine_mode
3153 type_natural_mode (tree type)
3155 enum machine_mode mode = TYPE_MODE (type);
3157 if (TREE_CODE (type) == VECTOR_TYPE && !VECTOR_MODE_P (mode))
3159 HOST_WIDE_INT size = int_size_in_bytes (type);
3160 if ((size == 8 || size == 16)
3161 /* ??? Generic code allows us to create width 1 vectors. Ignore. */
3162 && TYPE_VECTOR_SUBPARTS (type) > 1)
3164 enum machine_mode innermode = TYPE_MODE (TREE_TYPE (type));
3166 if (TREE_CODE (TREE_TYPE (type)) == REAL_TYPE)
3167 mode = MIN_MODE_VECTOR_FLOAT;
3169 mode = MIN_MODE_VECTOR_INT;
3171 /* Get the mode which has this inner mode and number of units. */
3172 for (; mode != VOIDmode; mode = GET_MODE_WIDER_MODE (mode))
3173 if (GET_MODE_NUNITS (mode) == TYPE_VECTOR_SUBPARTS (type)
3174 && GET_MODE_INNER (mode) == innermode)
3184 /* We want to pass a value in REGNO whose "natural" mode is MODE. However,
3185 this may not agree with the mode that the type system has chosen for the
3186 register, which is ORIG_MODE. If ORIG_MODE is not BLKmode, then we can
3187 go ahead and use it. Otherwise we have to build a PARALLEL instead. */
3190 gen_reg_or_parallel (enum machine_mode mode, enum machine_mode orig_mode,
3195 if (orig_mode != BLKmode)
3196 tmp = gen_rtx_REG (orig_mode, regno);
3199 tmp = gen_rtx_REG (mode, regno);
3200 tmp = gen_rtx_EXPR_LIST (VOIDmode, tmp, const0_rtx);
3201 tmp = gen_rtx_PARALLEL (orig_mode, gen_rtvec (1, tmp));
3207 /* x86-64 register passing implementation. See x86-64 ABI for details. Goal
3208 of this code is to classify each 8bytes of incoming argument by the register
3209 class and assign registers accordingly. */
3211 /* Return the union class of CLASS1 and CLASS2.
3212 See the x86-64 PS ABI for details. */
3214 static enum x86_64_reg_class
3215 merge_classes (enum x86_64_reg_class class1, enum x86_64_reg_class class2)
3217 /* Rule #1: If both classes are equal, this is the resulting class. */
3218 if (class1 == class2)
3221 /* Rule #2: If one of the classes is NO_CLASS, the resulting class is
3223 if (class1 == X86_64_NO_CLASS)
3225 if (class2 == X86_64_NO_CLASS)
3228 /* Rule #3: If one of the classes is MEMORY, the result is MEMORY. */
3229 if (class1 == X86_64_MEMORY_CLASS || class2 == X86_64_MEMORY_CLASS)
3230 return X86_64_MEMORY_CLASS;
3232 /* Rule #4: If one of the classes is INTEGER, the result is INTEGER. */
3233 if ((class1 == X86_64_INTEGERSI_CLASS && class2 == X86_64_SSESF_CLASS)
3234 || (class2 == X86_64_INTEGERSI_CLASS && class1 == X86_64_SSESF_CLASS))
3235 return X86_64_INTEGERSI_CLASS;
3236 if (class1 == X86_64_INTEGER_CLASS || class1 == X86_64_INTEGERSI_CLASS
3237 || class2 == X86_64_INTEGER_CLASS || class2 == X86_64_INTEGERSI_CLASS)
3238 return X86_64_INTEGER_CLASS;
3240 /* Rule #5: If one of the classes is X87, X87UP, or COMPLEX_X87 class,
3242 if (class1 == X86_64_X87_CLASS
3243 || class1 == X86_64_X87UP_CLASS
3244 || class1 == X86_64_COMPLEX_X87_CLASS
3245 || class2 == X86_64_X87_CLASS
3246 || class2 == X86_64_X87UP_CLASS
3247 || class2 == X86_64_COMPLEX_X87_CLASS)
3248 return X86_64_MEMORY_CLASS;
3250 /* Rule #6: Otherwise class SSE is used. */
3251 return X86_64_SSE_CLASS;
3254 /* Classify the argument of type TYPE and mode MODE.
3255 CLASSES will be filled by the register class used to pass each word
3256 of the operand. The number of words is returned. In case the parameter
3257 should be passed in memory, 0 is returned. As a special case for zero
3258 sized containers, classes[0] will be NO_CLASS and 1 is returned.
3260 BIT_OFFSET is used internally for handling records and specifies offset
3261 of the offset in bits modulo 256 to avoid overflow cases.
3263 See the x86-64 PS ABI for details.
3267 classify_argument (enum machine_mode mode, tree type,
3268 enum x86_64_reg_class classes[MAX_CLASSES], int bit_offset)
3270 HOST_WIDE_INT bytes =
3271 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
3272 int words = (bytes + (bit_offset % 64) / 8 + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
3274 /* Variable sized entities are always passed/returned in memory. */
3278 if (mode != VOIDmode
3279 && targetm.calls.must_pass_in_stack (mode, type))
3282 if (type && AGGREGATE_TYPE_P (type))
3286 enum x86_64_reg_class subclasses[MAX_CLASSES];
3288 /* On x86-64 we pass structures larger than 16 bytes on the stack. */
3292 for (i = 0; i < words; i++)
3293 classes[i] = X86_64_NO_CLASS;
3295 /* Zero sized arrays or structures are NO_CLASS. We return 0 to
3296 signalize memory class, so handle it as special case. */
3299 classes[0] = X86_64_NO_CLASS;
3303 /* Classify each field of record and merge classes. */
3304 switch (TREE_CODE (type))
3307 /* For classes first merge in the field of the subclasses. */
3308 if (TYPE_BINFO (type))
3310 tree binfo, base_binfo;
3313 for (binfo = TYPE_BINFO (type), basenum = 0;
3314 BINFO_BASE_ITERATE (binfo, basenum, base_binfo); basenum++)
3317 int offset = tree_low_cst (BINFO_OFFSET (base_binfo), 0) * 8;
3318 tree type = BINFO_TYPE (base_binfo);
3320 num = classify_argument (TYPE_MODE (type),
3322 (offset + bit_offset) % 256);
3325 for (i = 0; i < num; i++)
3327 int pos = (offset + (bit_offset % 64)) / 8 / 8;
3329 merge_classes (subclasses[i], classes[i + pos]);
3333 /* And now merge the fields of structure. */
3334 for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
3336 if (TREE_CODE (field) == FIELD_DECL)
3340 if (TREE_TYPE (field) == error_mark_node)
3343 /* Bitfields are always classified as integer. Handle them
3344 early, since later code would consider them to be
3345 misaligned integers. */
3346 if (DECL_BIT_FIELD (field))
3348 for (i = (int_bit_position (field) + (bit_offset % 64)) / 8 / 8;
3349 i < ((int_bit_position (field) + (bit_offset % 64))
3350 + tree_low_cst (DECL_SIZE (field), 0)
3353 merge_classes (X86_64_INTEGER_CLASS,
3358 num = classify_argument (TYPE_MODE (TREE_TYPE (field)),
3359 TREE_TYPE (field), subclasses,
3360 (int_bit_position (field)
3361 + bit_offset) % 256);
3364 for (i = 0; i < num; i++)
3367 (int_bit_position (field) + (bit_offset % 64)) / 8 / 8;
3369 merge_classes (subclasses[i], classes[i + pos]);
3377 /* Arrays are handled as small records. */
3380 num = classify_argument (TYPE_MODE (TREE_TYPE (type)),
3381 TREE_TYPE (type), subclasses, bit_offset);
3385 /* The partial classes are now full classes. */
3386 if (subclasses[0] == X86_64_SSESF_CLASS && bytes != 4)
3387 subclasses[0] = X86_64_SSE_CLASS;
3388 if (subclasses[0] == X86_64_INTEGERSI_CLASS && bytes != 4)
3389 subclasses[0] = X86_64_INTEGER_CLASS;
3391 for (i = 0; i < words; i++)
3392 classes[i] = subclasses[i % num];
3397 case QUAL_UNION_TYPE:
3398 /* Unions are similar to RECORD_TYPE but offset is always 0.
3401 /* Unions are not derived. */
3402 gcc_assert (!TYPE_BINFO (type)
3403 || !BINFO_N_BASE_BINFOS (TYPE_BINFO (type)));
3404 for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
3406 if (TREE_CODE (field) == FIELD_DECL)
3410 if (TREE_TYPE (field) == error_mark_node)
3413 num = classify_argument (TYPE_MODE (TREE_TYPE (field)),
3414 TREE_TYPE (field), subclasses,
3418 for (i = 0; i < num; i++)
3419 classes[i] = merge_classes (subclasses[i], classes[i]);
3428 /* Final merger cleanup. */
3429 for (i = 0; i < words; i++)
3431 /* If one class is MEMORY, everything should be passed in
3433 if (classes[i] == X86_64_MEMORY_CLASS)
3436 /* The X86_64_SSEUP_CLASS should be always preceded by
3437 X86_64_SSE_CLASS. */
3438 if (classes[i] == X86_64_SSEUP_CLASS
3439 && (i == 0 || classes[i - 1] != X86_64_SSE_CLASS))
3440 classes[i] = X86_64_SSE_CLASS;
3442 /* X86_64_X87UP_CLASS should be preceded by X86_64_X87_CLASS. */
3443 if (classes[i] == X86_64_X87UP_CLASS
3444 && (i == 0 || classes[i - 1] != X86_64_X87_CLASS))
3445 classes[i] = X86_64_SSE_CLASS;
3450 /* Compute alignment needed. We align all types to natural boundaries with
3451 exception of XFmode that is aligned to 64bits. */
3452 if (mode != VOIDmode && mode != BLKmode)
3454 int mode_alignment = GET_MODE_BITSIZE (mode);
3457 mode_alignment = 128;
3458 else if (mode == XCmode)
3459 mode_alignment = 256;
3460 if (COMPLEX_MODE_P (mode))
3461 mode_alignment /= 2;
3462 /* Misaligned fields are always returned in memory. */
3463 if (bit_offset % mode_alignment)
3467 /* for V1xx modes, just use the base mode */
3468 if (VECTOR_MODE_P (mode)
3469 && GET_MODE_SIZE (GET_MODE_INNER (mode)) == bytes)
3470 mode = GET_MODE_INNER (mode);
3472 /* Classification of atomic types. */
3477 classes[0] = X86_64_SSE_CLASS;
3480 classes[0] = X86_64_SSE_CLASS;
3481 classes[1] = X86_64_SSEUP_CLASS;
3490 if (bit_offset + GET_MODE_BITSIZE (mode) <= 32)
3491 classes[0] = X86_64_INTEGERSI_CLASS;
3493 classes[0] = X86_64_INTEGER_CLASS;
3497 classes[0] = classes[1] = X86_64_INTEGER_CLASS;
3502 if (!(bit_offset % 64))
3503 classes[0] = X86_64_SSESF_CLASS;
3505 classes[0] = X86_64_SSE_CLASS;
3508 classes[0] = X86_64_SSEDF_CLASS;
3511 classes[0] = X86_64_X87_CLASS;
3512 classes[1] = X86_64_X87UP_CLASS;
3515 classes[0] = X86_64_SSE_CLASS;
3516 classes[1] = X86_64_SSEUP_CLASS;
3519 classes[0] = X86_64_SSE_CLASS;
3522 classes[0] = X86_64_SSEDF_CLASS;
3523 classes[1] = X86_64_SSEDF_CLASS;
3526 classes[0] = X86_64_COMPLEX_X87_CLASS;
3529 /* This modes is larger than 16 bytes. */
3537 classes[0] = X86_64_SSE_CLASS;
3538 classes[1] = X86_64_SSEUP_CLASS;
3544 classes[0] = X86_64_SSE_CLASS;
3550 gcc_assert (VECTOR_MODE_P (mode));
3555 gcc_assert (GET_MODE_CLASS (GET_MODE_INNER (mode)) == MODE_INT);
3557 if (bit_offset + GET_MODE_BITSIZE (mode) <= 32)
3558 classes[0] = X86_64_INTEGERSI_CLASS;
3560 classes[0] = X86_64_INTEGER_CLASS;
3561 classes[1] = X86_64_INTEGER_CLASS;
3562 return 1 + (bytes > 8);
3566 /* Examine the argument and return set number of register required in each
3567 class. Return 0 iff parameter should be passed in memory. */
3569 examine_argument (enum machine_mode mode, tree type, int in_return,
3570 int *int_nregs, int *sse_nregs)
3572 enum x86_64_reg_class class[MAX_CLASSES];
3573 int n = classify_argument (mode, type, class, 0);
3579 for (n--; n >= 0; n--)
3582 case X86_64_INTEGER_CLASS:
3583 case X86_64_INTEGERSI_CLASS:
3586 case X86_64_SSE_CLASS:
3587 case X86_64_SSESF_CLASS:
3588 case X86_64_SSEDF_CLASS:
3591 case X86_64_NO_CLASS:
3592 case X86_64_SSEUP_CLASS:
3594 case X86_64_X87_CLASS:
3595 case X86_64_X87UP_CLASS:
3599 case X86_64_COMPLEX_X87_CLASS:
3600 return in_return ? 2 : 0;
3601 case X86_64_MEMORY_CLASS:
3607 /* Construct container for the argument used by GCC interface. See
3608 FUNCTION_ARG for the detailed description. */
3611 construct_container (enum machine_mode mode, enum machine_mode orig_mode,
3612 tree type, int in_return, int nintregs, int nsseregs,
3613 const int *intreg, int sse_regno)
3615 /* The following variables hold the static issued_error state. */
3616 static bool issued_sse_arg_error;
3617 static bool issued_sse_ret_error;
3618 static bool issued_x87_ret_error;
3620 enum machine_mode tmpmode;
3622 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
3623 enum x86_64_reg_class class[MAX_CLASSES];
3627 int needed_sseregs, needed_intregs;
3628 rtx exp[MAX_CLASSES];
3631 n = classify_argument (mode, type, class, 0);
3632 if (TARGET_DEBUG_ARG)
3635 fprintf (stderr, "Memory class\n");
3638 fprintf (stderr, "Classes:");
3639 for (i = 0; i < n; i++)
3641 fprintf (stderr, " %s", x86_64_reg_class_name[class[i]]);
3643 fprintf (stderr, "\n");
3648 if (!examine_argument (mode, type, in_return, &needed_intregs,
3651 if (needed_intregs > nintregs || needed_sseregs > nsseregs)
3654 /* We allowed the user to turn off SSE for kernel mode. Don't crash if
3655 some less clueful developer tries to use floating-point anyway. */
3656 if (needed_sseregs && !TARGET_SSE)
3660 if (!issued_sse_ret_error)
3662 error ("SSE register return with SSE disabled");
3663 issued_sse_ret_error = true;
3666 else if (!issued_sse_arg_error)
3668 error ("SSE register argument with SSE disabled");
3669 issued_sse_arg_error = true;
3674 /* Likewise, error if the ABI requires us to return values in the
3675 x87 registers and the user specified -mno-80387. */
3676 if (!TARGET_80387 && in_return)
3677 for (i = 0; i < n; i++)
3678 if (class[i] == X86_64_X87_CLASS
3679 || class[i] == X86_64_X87UP_CLASS
3680 || class[i] == X86_64_COMPLEX_X87_CLASS)
3682 if (!issued_x87_ret_error)
3684 error ("x87 register return with x87 disabled");
3685 issued_x87_ret_error = true;
3690 /* First construct simple cases. Avoid SCmode, since we want to use
3691 single register to pass this type. */
3692 if (n == 1 && mode != SCmode)
3695 case X86_64_INTEGER_CLASS:
3696 case X86_64_INTEGERSI_CLASS:
3697 return gen_rtx_REG (mode, intreg[0]);
3698 case X86_64_SSE_CLASS:
3699 case X86_64_SSESF_CLASS:
3700 case X86_64_SSEDF_CLASS:
3701 return gen_reg_or_parallel (mode, orig_mode, SSE_REGNO (sse_regno));
3702 case X86_64_X87_CLASS:
3703 case X86_64_COMPLEX_X87_CLASS:
3704 return gen_rtx_REG (mode, FIRST_STACK_REG);
3705 case X86_64_NO_CLASS:
3706 /* Zero sized array, struct or class. */
3711 if (n == 2 && class[0] == X86_64_SSE_CLASS && class[1] == X86_64_SSEUP_CLASS
3713 return gen_rtx_REG (mode, SSE_REGNO (sse_regno));
3715 && class[0] == X86_64_X87_CLASS && class[1] == X86_64_X87UP_CLASS)
3716 return gen_rtx_REG (XFmode, FIRST_STACK_REG);
3717 if (n == 2 && class[0] == X86_64_INTEGER_CLASS
3718 && class[1] == X86_64_INTEGER_CLASS
3719 && (mode == CDImode || mode == TImode || mode == TFmode)
3720 && intreg[0] + 1 == intreg[1])
3721 return gen_rtx_REG (mode, intreg[0]);
3723 /* Otherwise figure out the entries of the PARALLEL. */
3724 for (i = 0; i < n; i++)
3728 case X86_64_NO_CLASS:
3730 case X86_64_INTEGER_CLASS:
3731 case X86_64_INTEGERSI_CLASS:
3732 /* Merge TImodes on aligned occasions here too. */
3733 if (i * 8 + 8 > bytes)
3734 tmpmode = mode_for_size ((bytes - i * 8) * BITS_PER_UNIT, MODE_INT, 0);
3735 else if (class[i] == X86_64_INTEGERSI_CLASS)
3739 /* We've requested 24 bytes we don't have mode for. Use DImode. */
3740 if (tmpmode == BLKmode)
3742 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
3743 gen_rtx_REG (tmpmode, *intreg),
3747 case X86_64_SSESF_CLASS:
3748 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
3749 gen_rtx_REG (SFmode,
3750 SSE_REGNO (sse_regno)),
3754 case X86_64_SSEDF_CLASS:
3755 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
3756 gen_rtx_REG (DFmode,
3757 SSE_REGNO (sse_regno)),
3761 case X86_64_SSE_CLASS:
3762 if (i < n - 1 && class[i + 1] == X86_64_SSEUP_CLASS)
3766 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
3767 gen_rtx_REG (tmpmode,
3768 SSE_REGNO (sse_regno)),
3770 if (tmpmode == TImode)
3779 /* Empty aligned struct, union or class. */
3783 ret = gen_rtx_PARALLEL (mode, rtvec_alloc (nexps));
3784 for (i = 0; i < nexps; i++)
3785 XVECEXP (ret, 0, i) = exp [i];
3789 /* Update the data in CUM to advance over an argument
3790 of mode MODE and data type TYPE.
3791 (TYPE is null for libcalls where that information may not be available.) */
3794 function_arg_advance (CUMULATIVE_ARGS *cum, enum machine_mode mode,
3795 tree type, int named)
3798 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
3799 int words = (bytes + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
3802 mode = type_natural_mode (type);
3804 if (TARGET_DEBUG_ARG)
3805 fprintf (stderr, "function_adv (sz=%d, wds=%2d, nregs=%d, ssenregs=%d, "
3806 "mode=%s, named=%d)\n\n",
3807 words, cum->words, cum->nregs, cum->sse_nregs,
3808 GET_MODE_NAME (mode), named);
3812 int int_nregs, sse_nregs;
3813 if (!examine_argument (mode, type, 0, &int_nregs, &sse_nregs))
3814 cum->words += words;
3815 else if (sse_nregs <= cum->sse_nregs && int_nregs <= cum->nregs)
3817 cum->nregs -= int_nregs;
3818 cum->sse_nregs -= sse_nregs;
3819 cum->regno += int_nregs;
3820 cum->sse_regno += sse_nregs;
3823 cum->words += words;
3841 cum->words += words;
3842 cum->nregs -= words;
3843 cum->regno += words;
3845 if (cum->nregs <= 0)
3853 if (cum->float_in_sse < 2)
3856 if (cum->float_in_sse < 1)
3867 if (!type || !AGGREGATE_TYPE_P (type))
3869 cum->sse_words += words;
3870 cum->sse_nregs -= 1;
3871 cum->sse_regno += 1;
3872 if (cum->sse_nregs <= 0)
3884 if (!type || !AGGREGATE_TYPE_P (type))
3886 cum->mmx_words += words;
3887 cum->mmx_nregs -= 1;
3888 cum->mmx_regno += 1;
3889 if (cum->mmx_nregs <= 0)
3900 /* Define where to put the arguments to a function.
3901 Value is zero to push the argument on the stack,
3902 or a hard register in which to store the argument.
3904 MODE is the argument's machine mode.
3905 TYPE is the data type of the argument (as a tree).
3906 This is null for libcalls where that information may
3908 CUM is a variable of type CUMULATIVE_ARGS which gives info about
3909 the preceding args and about the function being called.
3910 NAMED is nonzero if this argument is a named parameter
3911 (otherwise it is an extra parameter matching an ellipsis). */
3914 function_arg (CUMULATIVE_ARGS *cum, enum machine_mode orig_mode,
3915 tree type, int named)
3917 enum machine_mode mode = orig_mode;
3920 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
3921 int words = (bytes + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
3922 static bool warnedsse, warnedmmx;
3924 /* To simplify the code below, represent vector types with a vector mode
3925 even if MMX/SSE are not active. */
3926 if (type && TREE_CODE (type) == VECTOR_TYPE)
3927 mode = type_natural_mode (type);
3929 /* Handle a hidden AL argument containing number of registers for varargs
3930 x86-64 functions. For i386 ABI just return constm1_rtx to avoid
3932 if (mode == VOIDmode)
3935 return GEN_INT (cum->maybe_vaarg
3936 ? (cum->sse_nregs < 0
3944 ret = construct_container (mode, orig_mode, type, 0, cum->nregs,
3946 &x86_64_int_parameter_registers [cum->regno],
3951 /* For now, pass fp/complex values on the stack. */
3963 if (words <= cum->nregs)
3965 int regno = cum->regno;
3967 /* Fastcall allocates the first two DWORD (SImode) or
3968 smaller arguments to ECX and EDX. */
3971 if (mode == BLKmode || mode == DImode)
3974 /* ECX not EAX is the first allocated register. */
3978 ret = gen_rtx_REG (mode, regno);
3982 if (cum->float_in_sse < 2)
3985 if (cum->float_in_sse < 1)
3995 if (!type || !AGGREGATE_TYPE_P (type))
3997 if (!TARGET_SSE && !warnedsse && cum->warn_sse)
4000 warning (0, "SSE vector argument without SSE enabled "
4004 ret = gen_reg_or_parallel (mode, orig_mode,
4005 cum->sse_regno + FIRST_SSE_REG);
4012 if (!type || !AGGREGATE_TYPE_P (type))
4014 if (!TARGET_MMX && !warnedmmx && cum->warn_mmx)
4017 warning (0, "MMX vector argument without MMX enabled "
4021 ret = gen_reg_or_parallel (mode, orig_mode,
4022 cum->mmx_regno + FIRST_MMX_REG);
4027 if (TARGET_DEBUG_ARG)
4030 "function_arg (size=%d, wds=%2d, nregs=%d, mode=%4s, named=%d, ",
4031 words, cum->words, cum->nregs, GET_MODE_NAME (mode), named);
4034 print_simple_rtl (stderr, ret);
4036 fprintf (stderr, ", stack");
4038 fprintf (stderr, " )\n");
4044 /* A C expression that indicates when an argument must be passed by
4045 reference. If nonzero for an argument, a copy of that argument is
4046 made in memory and a pointer to the argument is passed instead of
4047 the argument itself. The pointer is passed in whatever way is
4048 appropriate for passing a pointer to that type. */
4051 ix86_pass_by_reference (CUMULATIVE_ARGS *cum ATTRIBUTE_UNUSED,
4052 enum machine_mode mode ATTRIBUTE_UNUSED,
4053 tree type, bool named ATTRIBUTE_UNUSED)
4058 if (type && int_size_in_bytes (type) == -1)
4060 if (TARGET_DEBUG_ARG)
4061 fprintf (stderr, "function_arg_pass_by_reference\n");
4068 /* Return true when TYPE should be 128bit aligned for 32bit argument passing
4069 ABI. Only called if TARGET_SSE. */
4071 contains_128bit_aligned_vector_p (tree type)
4073 enum machine_mode mode = TYPE_MODE (type);
4074 if (SSE_REG_MODE_P (mode)
4075 && (!TYPE_USER_ALIGN (type) || TYPE_ALIGN (type) > 128))
4077 if (TYPE_ALIGN (type) < 128)
4080 if (AGGREGATE_TYPE_P (type))
4082 /* Walk the aggregates recursively. */
4083 switch (TREE_CODE (type))
4087 case QUAL_UNION_TYPE:
4091 if (TYPE_BINFO (type))
4093 tree binfo, base_binfo;
4096 for (binfo = TYPE_BINFO (type), i = 0;
4097 BINFO_BASE_ITERATE (binfo, i, base_binfo); i++)
4098 if (contains_128bit_aligned_vector_p
4099 (BINFO_TYPE (base_binfo)))
4102 /* And now merge the fields of structure. */
4103 for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
4105 if (TREE_CODE (field) == FIELD_DECL
4106 && contains_128bit_aligned_vector_p (TREE_TYPE (field)))
4113 /* Just for use if some languages passes arrays by value. */
4114 if (contains_128bit_aligned_vector_p (TREE_TYPE (type)))
4125 /* Gives the alignment boundary, in bits, of an argument with the
4126 specified mode and type. */
4129 ix86_function_arg_boundary (enum machine_mode mode, tree type)
4133 align = TYPE_ALIGN (type);
4135 align = GET_MODE_ALIGNMENT (mode);
4136 if (align < PARM_BOUNDARY)
4137 align = PARM_BOUNDARY;
4140 /* i386 ABI defines all arguments to be 4 byte aligned. We have to
4141 make an exception for SSE modes since these require 128bit
4144 The handling here differs from field_alignment. ICC aligns MMX
4145 arguments to 4 byte boundaries, while structure fields are aligned
4146 to 8 byte boundaries. */
4148 align = PARM_BOUNDARY;
4151 if (!SSE_REG_MODE_P (mode))
4152 align = PARM_BOUNDARY;
4156 if (!contains_128bit_aligned_vector_p (type))
4157 align = PARM_BOUNDARY;
4165 /* Return true if N is a possible register number of function value. */
4167 ix86_function_value_regno_p (int regno)
4173 return ((regno) == 0
4174 || ((regno) == FIRST_FLOAT_REG && TARGET_FLOAT_RETURNS_IN_80387)
4175 || ((regno) == FIRST_SSE_REG && TARGET_SSE));
4177 return ((regno) == 0 || (regno) == FIRST_FLOAT_REG
4178 || ((regno) == FIRST_SSE_REG && TARGET_SSE)
4179 || ((regno) == FIRST_FLOAT_REG && TARGET_FLOAT_RETURNS_IN_80387));
4184 || (regno == FIRST_FLOAT_REG && TARGET_FLOAT_RETURNS_IN_80387)
4185 || (regno == FIRST_SSE_REG && TARGET_SSE))
4189 && (regno == FIRST_MMX_REG && TARGET_MMX))
4196 /* Define how to find the value returned by a function.
4197 VALTYPE is the data type of the value (as a tree).
4198 If the precise function being called is known, FUNC is its FUNCTION_DECL;
4199 otherwise, FUNC is 0. */
4201 ix86_function_value (tree valtype, tree fntype_or_decl,
4202 bool outgoing ATTRIBUTE_UNUSED)
4204 enum machine_mode natmode = type_natural_mode (valtype);
4208 rtx ret = construct_container (natmode, TYPE_MODE (valtype), valtype,
4209 1, REGPARM_MAX, SSE_REGPARM_MAX,
4210 x86_64_int_return_registers, 0);
4211 /* For zero sized structures, construct_container return NULL, but we
4212 need to keep rest of compiler happy by returning meaningful value. */
4214 ret = gen_rtx_REG (TYPE_MODE (valtype), 0);
4219 tree fn = NULL_TREE, fntype;
4221 && DECL_P (fntype_or_decl))
4222 fn = fntype_or_decl;
4223 fntype = fn ? TREE_TYPE (fn) : fntype_or_decl;
4224 return gen_rtx_REG (TYPE_MODE (valtype),
4225 ix86_value_regno (natmode, fn, fntype));
4229 /* Return true iff type is returned in memory. */
4231 ix86_return_in_memory (tree type)
4233 int needed_intregs, needed_sseregs, size;
4234 enum machine_mode mode = type_natural_mode (type);
4237 return !examine_argument (mode, type, 1, &needed_intregs, &needed_sseregs);
4239 if (mode == BLKmode)
4242 size = int_size_in_bytes (type);
4244 if (MS_AGGREGATE_RETURN && AGGREGATE_TYPE_P (type) && size <= 8)
4247 if (VECTOR_MODE_P (mode) || mode == TImode)
4249 /* User-created vectors small enough to fit in EAX. */
4253 /* MMX/3dNow values are returned in MM0,
4254 except when it doesn't exits. */
4256 return (TARGET_MMX ? 0 : 1);
4258 /* SSE values are returned in XMM0, except when it doesn't exist. */
4260 return (TARGET_SSE ? 0 : 1);
4274 /* When returning SSE vector types, we have a choice of either
4275 (1) being abi incompatible with a -march switch, or
4276 (2) generating an error.
4277 Given no good solution, I think the safest thing is one warning.
4278 The user won't be able to use -Werror, but....
4280 Choose the STRUCT_VALUE_RTX hook because that's (at present) only
4281 called in response to actually generating a caller or callee that
4282 uses such a type. As opposed to RETURN_IN_MEMORY, which is called
4283 via aggregate_value_p for general type probing from tree-ssa. */
4286 ix86_struct_value_rtx (tree type, int incoming ATTRIBUTE_UNUSED)
4288 static bool warnedsse, warnedmmx;
4292 /* Look at the return type of the function, not the function type. */
4293 enum machine_mode mode = TYPE_MODE (TREE_TYPE (type));
4295 if (!TARGET_SSE && !warnedsse)
4298 || (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 16))
4301 warning (0, "SSE vector return without SSE enabled "
4306 if (!TARGET_MMX && !warnedmmx)
4308 if (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 8)
4311 warning (0, "MMX vector return without MMX enabled "
4320 /* Define how to find the value returned by a library function
4321 assuming the value has mode MODE. */
4323 ix86_libcall_value (enum machine_mode mode)
4337 return gen_rtx_REG (mode, FIRST_SSE_REG);
4340 return gen_rtx_REG (mode, FIRST_FLOAT_REG);
4344 return gen_rtx_REG (mode, 0);
4348 return gen_rtx_REG (mode, ix86_value_regno (mode, NULL, NULL));
4351 /* Given a mode, return the register to use for a return value. */
4354 ix86_value_regno (enum machine_mode mode, tree func, tree fntype)
4356 gcc_assert (!TARGET_64BIT);
4358 /* 8-byte vector modes in %mm0. See ix86_return_in_memory for where
4359 we normally prevent this case when mmx is not available. However
4360 some ABIs may require the result to be returned like DImode. */
4361 if (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 8)
4362 return TARGET_MMX ? FIRST_MMX_REG : 0;
4364 /* 16-byte vector modes in %xmm0. See ix86_return_in_memory for where
4365 we prevent this case when sse is not available. However some ABIs
4366 may require the result to be returned like integer TImode. */
4367 if (mode == TImode || (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 16))
4368 return TARGET_SSE ? FIRST_SSE_REG : 0;
4370 /* Decimal floating point values can go in %eax, unlike other float modes. */
4371 if (DECIMAL_FLOAT_MODE_P (mode))
4374 /* Most things go in %eax, except (unless -mno-fp-ret-in-387) fp values. */
4375 if (!SCALAR_FLOAT_MODE_P (mode) || !TARGET_FLOAT_RETURNS_IN_80387)
4378 /* Floating point return values in %st(0), except for local functions when
4379 SSE math is enabled or for functions with sseregparm attribute. */
4380 if ((func || fntype)
4381 && (mode == SFmode || mode == DFmode))
4383 int sse_level = ix86_function_sseregparm (fntype, func);
4384 if ((sse_level >= 1 && mode == SFmode)
4385 || (sse_level == 2 && mode == DFmode))
4386 return FIRST_SSE_REG;
4389 return FIRST_FLOAT_REG;
4392 /* Create the va_list data type. */
4395 ix86_build_builtin_va_list (void)
4397 tree f_gpr, f_fpr, f_ovf, f_sav, record, type_decl;
4399 /* For i386 we use plain pointer to argument area. */
4401 return build_pointer_type (char_type_node);
4403 record = (*lang_hooks.types.make_type) (RECORD_TYPE);
4404 type_decl = build_decl (TYPE_DECL, get_identifier ("__va_list_tag"), record);
4406 f_gpr = build_decl (FIELD_DECL, get_identifier ("gp_offset"),
4407 unsigned_type_node);
4408 f_fpr = build_decl (FIELD_DECL, get_identifier ("fp_offset"),
4409 unsigned_type_node);
4410 f_ovf = build_decl (FIELD_DECL, get_identifier ("overflow_arg_area"),
4412 f_sav = build_decl (FIELD_DECL, get_identifier ("reg_save_area"),
4415 va_list_gpr_counter_field = f_gpr;
4416 va_list_fpr_counter_field = f_fpr;
4418 DECL_FIELD_CONTEXT (f_gpr) = record;
4419 DECL_FIELD_CONTEXT (f_fpr) = record;
4420 DECL_FIELD_CONTEXT (f_ovf) = record;
4421 DECL_FIELD_CONTEXT (f_sav) = record;
4423 TREE_CHAIN (record) = type_decl;
4424 TYPE_NAME (record) = type_decl;
4425 TYPE_FIELDS (record) = f_gpr;
4426 TREE_CHAIN (f_gpr) = f_fpr;
4427 TREE_CHAIN (f_fpr) = f_ovf;
4428 TREE_CHAIN (f_ovf) = f_sav;
4430 layout_type (record);
4432 /* The correct type is an array type of one element. */
4433 return build_array_type (record, build_index_type (size_zero_node));
4436 /* Worker function for TARGET_SETUP_INCOMING_VARARGS. */
4439 ix86_setup_incoming_varargs (CUMULATIVE_ARGS *cum, enum machine_mode mode,
4440 tree type, int *pretend_size ATTRIBUTE_UNUSED,
4443 CUMULATIVE_ARGS next_cum;
4444 rtx save_area = NULL_RTX, mem;
4457 if (! cfun->va_list_gpr_size && ! cfun->va_list_fpr_size)
4460 /* Indicate to allocate space on the stack for varargs save area. */
4461 ix86_save_varrargs_registers = 1;
4463 cfun->stack_alignment_needed = 128;
4465 fntype = TREE_TYPE (current_function_decl);
4466 stdarg_p = (TYPE_ARG_TYPES (fntype) != 0
4467 && (TREE_VALUE (tree_last (TYPE_ARG_TYPES (fntype)))
4468 != void_type_node));
4470 /* For varargs, we do not want to skip the dummy va_dcl argument.
4471 For stdargs, we do want to skip the last named argument. */
4474 function_arg_advance (&next_cum, mode, type, 1);
4477 save_area = frame_pointer_rtx;
4479 set = get_varargs_alias_set ();
4481 for (i = next_cum.regno;
4483 && i < next_cum.regno + cfun->va_list_gpr_size / UNITS_PER_WORD;
4486 mem = gen_rtx_MEM (Pmode,
4487 plus_constant (save_area, i * UNITS_PER_WORD));
4488 MEM_NOTRAP_P (mem) = 1;
4489 set_mem_alias_set (mem, set);
4490 emit_move_insn (mem, gen_rtx_REG (Pmode,
4491 x86_64_int_parameter_registers[i]));
4494 if (next_cum.sse_nregs && cfun->va_list_fpr_size)
4496 /* Now emit code to save SSE registers. The AX parameter contains number
4497 of SSE parameter registers used to call this function. We use
4498 sse_prologue_save insn template that produces computed jump across
4499 SSE saves. We need some preparation work to get this working. */
4501 label = gen_label_rtx ();
4502 label_ref = gen_rtx_LABEL_REF (Pmode, label);
4504 /* Compute address to jump to :
4505 label - 5*eax + nnamed_sse_arguments*5 */
4506 tmp_reg = gen_reg_rtx (Pmode);
4507 nsse_reg = gen_reg_rtx (Pmode);
4508 emit_insn (gen_zero_extendqidi2 (nsse_reg, gen_rtx_REG (QImode, 0)));
4509 emit_insn (gen_rtx_SET (VOIDmode, tmp_reg,
4510 gen_rtx_MULT (Pmode, nsse_reg,
4512 if (next_cum.sse_regno)
4515 gen_rtx_CONST (DImode,
4516 gen_rtx_PLUS (DImode,
4518 GEN_INT (next_cum.sse_regno * 4))));
4520 emit_move_insn (nsse_reg, label_ref);
4521 emit_insn (gen_subdi3 (nsse_reg, nsse_reg, tmp_reg));
4523 /* Compute address of memory block we save into. We always use pointer
4524 pointing 127 bytes after first byte to store - this is needed to keep
4525 instruction size limited by 4 bytes. */
4526 tmp_reg = gen_reg_rtx (Pmode);
4527 emit_insn (gen_rtx_SET (VOIDmode, tmp_reg,
4528 plus_constant (save_area,
4529 8 * REGPARM_MAX + 127)));
4530 mem = gen_rtx_MEM (BLKmode, plus_constant (tmp_reg, -127));
4531 MEM_NOTRAP_P (mem) = 1;
4532 set_mem_alias_set (mem, set);
4533 set_mem_align (mem, BITS_PER_WORD);
4535 /* And finally do the dirty job! */
4536 emit_insn (gen_sse_prologue_save (mem, nsse_reg,
4537 GEN_INT (next_cum.sse_regno), label));
4542 /* Implement va_start. */
4545 ix86_va_start (tree valist, rtx nextarg)
4547 HOST_WIDE_INT words, n_gpr, n_fpr;
4548 tree f_gpr, f_fpr, f_ovf, f_sav;
4549 tree gpr, fpr, ovf, sav, t;
4552 /* Only 64bit target needs something special. */
4555 std_expand_builtin_va_start (valist, nextarg);
4559 f_gpr = TYPE_FIELDS (TREE_TYPE (va_list_type_node));
4560 f_fpr = TREE_CHAIN (f_gpr);
4561 f_ovf = TREE_CHAIN (f_fpr);
4562 f_sav = TREE_CHAIN (f_ovf);
4564 valist = build1 (INDIRECT_REF, TREE_TYPE (TREE_TYPE (valist)), valist);
4565 gpr = build3 (COMPONENT_REF, TREE_TYPE (f_gpr), valist, f_gpr, NULL_TREE);
4566 fpr = build3 (COMPONENT_REF, TREE_TYPE (f_fpr), valist, f_fpr, NULL_TREE);
4567 ovf = build3 (COMPONENT_REF, TREE_TYPE (f_ovf), valist, f_ovf, NULL_TREE);
4568 sav = build3 (COMPONENT_REF, TREE_TYPE (f_sav), valist, f_sav, NULL_TREE);
4570 /* Count number of gp and fp argument registers used. */
4571 words = current_function_args_info.words;
4572 n_gpr = current_function_args_info.regno;
4573 n_fpr = current_function_args_info.sse_regno;
4575 if (TARGET_DEBUG_ARG)
4576 fprintf (stderr, "va_start: words = %d, n_gpr = %d, n_fpr = %d\n",
4577 (int) words, (int) n_gpr, (int) n_fpr);
4579 if (cfun->va_list_gpr_size)
4581 type = TREE_TYPE (gpr);
4582 t = build2 (MODIFY_EXPR, type, gpr,
4583 build_int_cst (type, n_gpr * 8));
4584 TREE_SIDE_EFFECTS (t) = 1;
4585 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
4588 if (cfun->va_list_fpr_size)
4590 type = TREE_TYPE (fpr);
4591 t = build2 (MODIFY_EXPR, type, fpr,
4592 build_int_cst (type, n_fpr * 16 + 8*REGPARM_MAX));
4593 TREE_SIDE_EFFECTS (t) = 1;
4594 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
4597 /* Find the overflow area. */
4598 type = TREE_TYPE (ovf);
4599 t = make_tree (type, virtual_incoming_args_rtx);
4601 t = build2 (PLUS_EXPR, type, t,
4602 build_int_cst (type, words * UNITS_PER_WORD));
4603 t = build2 (MODIFY_EXPR, type, ovf, t);
4604 TREE_SIDE_EFFECTS (t) = 1;
4605 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
4607 if (cfun->va_list_gpr_size || cfun->va_list_fpr_size)
4609 /* Find the register save area.
4610 Prologue of the function save it right above stack frame. */
4611 type = TREE_TYPE (sav);
4612 t = make_tree (type, frame_pointer_rtx);
4613 t = build2 (MODIFY_EXPR, type, sav, t);
4614 TREE_SIDE_EFFECTS (t) = 1;
4615 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
4619 /* Implement va_arg. */
4622 ix86_gimplify_va_arg (tree valist, tree type, tree *pre_p, tree *post_p)
4624 static const int intreg[6] = { 0, 1, 2, 3, 4, 5 };
4625 tree f_gpr, f_fpr, f_ovf, f_sav;
4626 tree gpr, fpr, ovf, sav, t;
4628 tree lab_false, lab_over = NULL_TREE;
4633 enum machine_mode nat_mode;
4635 /* Only 64bit target needs something special. */
4637 return std_gimplify_va_arg_expr (valist, type, pre_p, post_p);
4639 f_gpr = TYPE_FIELDS (TREE_TYPE (va_list_type_node));
4640 f_fpr = TREE_CHAIN (f_gpr);
4641 f_ovf = TREE_CHAIN (f_fpr);
4642 f_sav = TREE_CHAIN (f_ovf);
4644 valist = build_va_arg_indirect_ref (valist);
4645 gpr = build3 (COMPONENT_REF, TREE_TYPE (f_gpr), valist, f_gpr, NULL_TREE);
4646 fpr = build3 (COMPONENT_REF, TREE_TYPE (f_fpr), valist, f_fpr, NULL_TREE);
4647 ovf = build3 (COMPONENT_REF, TREE_TYPE (f_ovf), valist, f_ovf, NULL_TREE);
4648 sav = build3 (COMPONENT_REF, TREE_TYPE (f_sav), valist, f_sav, NULL_TREE);
4650 indirect_p = pass_by_reference (NULL, TYPE_MODE (type), type, false);
4652 type = build_pointer_type (type);
4653 size = int_size_in_bytes (type);
4654 rsize = (size + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
4656 nat_mode = type_natural_mode (type);
4657 container = construct_container (nat_mode, TYPE_MODE (type), type, 0,
4658 REGPARM_MAX, SSE_REGPARM_MAX, intreg, 0);
4660 /* Pull the value out of the saved registers. */
4662 addr = create_tmp_var (ptr_type_node, "addr");
4663 DECL_POINTER_ALIAS_SET (addr) = get_varargs_alias_set ();
4667 int needed_intregs, needed_sseregs;
4669 tree int_addr, sse_addr;
4671 lab_false = create_artificial_label ();
4672 lab_over = create_artificial_label ();
4674 examine_argument (nat_mode, type, 0, &needed_intregs, &needed_sseregs);
4676 need_temp = (!REG_P (container)
4677 && ((needed_intregs && TYPE_ALIGN (type) > 64)
4678 || TYPE_ALIGN (type) > 128));
4680 /* In case we are passing structure, verify that it is consecutive block
4681 on the register save area. If not we need to do moves. */
4682 if (!need_temp && !REG_P (container))
4684 /* Verify that all registers are strictly consecutive */
4685 if (SSE_REGNO_P (REGNO (XEXP (XVECEXP (container, 0, 0), 0))))
4689 for (i = 0; i < XVECLEN (container, 0) && !need_temp; i++)
4691 rtx slot = XVECEXP (container, 0, i);
4692 if (REGNO (XEXP (slot, 0)) != FIRST_SSE_REG + (unsigned int) i
4693 || INTVAL (XEXP (slot, 1)) != i * 16)
4701 for (i = 0; i < XVECLEN (container, 0) && !need_temp; i++)
4703 rtx slot = XVECEXP (container, 0, i);
4704 if (REGNO (XEXP (slot, 0)) != (unsigned int) i
4705 || INTVAL (XEXP (slot, 1)) != i * 8)
4717 int_addr = create_tmp_var (ptr_type_node, "int_addr");
4718 DECL_POINTER_ALIAS_SET (int_addr) = get_varargs_alias_set ();
4719 sse_addr = create_tmp_var (ptr_type_node, "sse_addr");
4720 DECL_POINTER_ALIAS_SET (sse_addr) = get_varargs_alias_set ();
4723 /* First ensure that we fit completely in registers. */
4726 t = build_int_cst (TREE_TYPE (gpr),
4727 (REGPARM_MAX - needed_intregs + 1) * 8);
4728 t = build2 (GE_EXPR, boolean_type_node, gpr, t);
4729 t2 = build1 (GOTO_EXPR, void_type_node, lab_false);
4730 t = build3 (COND_EXPR, void_type_node, t, t2, NULL_TREE);
4731 gimplify_and_add (t, pre_p);
4735 t = build_int_cst (TREE_TYPE (fpr),
4736 (SSE_REGPARM_MAX - needed_sseregs + 1) * 16
4738 t = build2 (GE_EXPR, boolean_type_node, fpr, t);
4739 t2 = build1 (GOTO_EXPR, void_type_node, lab_false);
4740 t = build3 (COND_EXPR, void_type_node, t, t2, NULL_TREE);
4741 gimplify_and_add (t, pre_p);
4744 /* Compute index to start of area used for integer regs. */
4747 /* int_addr = gpr + sav; */
4748 t = fold_convert (ptr_type_node, gpr);
4749 t = build2 (PLUS_EXPR, ptr_type_node, sav, t);
4750 t = build2 (MODIFY_EXPR, void_type_node, int_addr, t);
4751 gimplify_and_add (t, pre_p);
4755 /* sse_addr = fpr + sav; */
4756 t = fold_convert (ptr_type_node, fpr);
4757 t = build2 (PLUS_EXPR, ptr_type_node, sav, t);
4758 t = build2 (MODIFY_EXPR, void_type_node, sse_addr, t);
4759 gimplify_and_add (t, pre_p);
4764 tree temp = create_tmp_var (type, "va_arg_tmp");
4767 t = build1 (ADDR_EXPR, build_pointer_type (type), temp);
4768 t = build2 (MODIFY_EXPR, void_type_node, addr, t);
4769 gimplify_and_add (t, pre_p);
4771 for (i = 0; i < XVECLEN (container, 0); i++)
4773 rtx slot = XVECEXP (container, 0, i);
4774 rtx reg = XEXP (slot, 0);
4775 enum machine_mode mode = GET_MODE (reg);
4776 tree piece_type = lang_hooks.types.type_for_mode (mode, 1);
4777 tree addr_type = build_pointer_type (piece_type);
4780 tree dest_addr, dest;
4782 if (SSE_REGNO_P (REGNO (reg)))
4784 src_addr = sse_addr;
4785 src_offset = (REGNO (reg) - FIRST_SSE_REG) * 16;
4789 src_addr = int_addr;
4790 src_offset = REGNO (reg) * 8;
4792 src_addr = fold_convert (addr_type, src_addr);
4793 src_addr = fold (build2 (PLUS_EXPR, addr_type, src_addr,
4794 size_int (src_offset)));
4795 src = build_va_arg_indirect_ref (src_addr);
4797 dest_addr = fold_convert (addr_type, addr);
4798 dest_addr = fold (build2 (PLUS_EXPR, addr_type, dest_addr,
4799 size_int (INTVAL (XEXP (slot, 1)))));
4800 dest = build_va_arg_indirect_ref (dest_addr);
4802 t = build2 (MODIFY_EXPR, void_type_node, dest, src);
4803 gimplify_and_add (t, pre_p);
4809 t = build2 (PLUS_EXPR, TREE_TYPE (gpr), gpr,
4810 build_int_cst (TREE_TYPE (gpr), needed_intregs * 8));
4811 t = build2 (MODIFY_EXPR, TREE_TYPE (gpr), gpr, t);
4812 gimplify_and_add (t, pre_p);
4816 t = build2 (PLUS_EXPR, TREE_TYPE (fpr), fpr,
4817 build_int_cst (TREE_TYPE (fpr), needed_sseregs * 16));
4818 t = build2 (MODIFY_EXPR, TREE_TYPE (fpr), fpr, t);
4819 gimplify_and_add (t, pre_p);
4822 t = build1 (GOTO_EXPR, void_type_node, lab_over);
4823 gimplify_and_add (t, pre_p);
4825 t = build1 (LABEL_EXPR, void_type_node, lab_false);
4826 append_to_statement_list (t, pre_p);
4829 /* ... otherwise out of the overflow area. */
4831 /* Care for on-stack alignment if needed. */
4832 if (FUNCTION_ARG_BOUNDARY (VOIDmode, type) <= 64
4833 || integer_zerop (TYPE_SIZE (type)))
4837 HOST_WIDE_INT align = FUNCTION_ARG_BOUNDARY (VOIDmode, type) / 8;
4838 t = build2 (PLUS_EXPR, TREE_TYPE (ovf), ovf,
4839 build_int_cst (TREE_TYPE (ovf), align - 1));
4840 t = build2 (BIT_AND_EXPR, TREE_TYPE (t), t,
4841 build_int_cst (TREE_TYPE (t), -align));
4843 gimplify_expr (&t, pre_p, NULL, is_gimple_val, fb_rvalue);
4845 t2 = build2 (MODIFY_EXPR, void_type_node, addr, t);
4846 gimplify_and_add (t2, pre_p);
4848 t = build2 (PLUS_EXPR, TREE_TYPE (t), t,
4849 build_int_cst (TREE_TYPE (t), rsize * UNITS_PER_WORD));
4850 t = build2 (MODIFY_EXPR, TREE_TYPE (ovf), ovf, t);
4851 gimplify_and_add (t, pre_p);
4855 t = build1 (LABEL_EXPR, void_type_node, lab_over);
4856 append_to_statement_list (t, pre_p);
4859 ptrtype = build_pointer_type (type);
4860 addr = fold_convert (ptrtype, addr);
4863 addr = build_va_arg_indirect_ref (addr);
4864 return build_va_arg_indirect_ref (addr);
4867 /* Return nonzero if OPNUM's MEM should be matched
4868 in movabs* patterns. */
4871 ix86_check_movabs (rtx insn, int opnum)
4875 set = PATTERN (insn);
4876 if (GET_CODE (set) == PARALLEL)
4877 set = XVECEXP (set, 0, 0);
4878 gcc_assert (GET_CODE (set) == SET);
4879 mem = XEXP (set, opnum);
4880 while (GET_CODE (mem) == SUBREG)
4881 mem = SUBREG_REG (mem);
4882 gcc_assert (GET_CODE (mem) == MEM);
4883 return (volatile_ok || !MEM_VOLATILE_P (mem));
4886 /* Initialize the table of extra 80387 mathematical constants. */
4889 init_ext_80387_constants (void)
4891 static const char * cst[5] =
4893 "0.3010299956639811952256464283594894482", /* 0: fldlg2 */
4894 "0.6931471805599453094286904741849753009", /* 1: fldln2 */
4895 "1.4426950408889634073876517827983434472", /* 2: fldl2e */
4896 "3.3219280948873623478083405569094566090", /* 3: fldl2t */
4897 "3.1415926535897932385128089594061862044", /* 4: fldpi */
4901 for (i = 0; i < 5; i++)
4903 real_from_string (&ext_80387_constants_table[i], cst[i]);
4904 /* Ensure each constant is rounded to XFmode precision. */
4905 real_convert (&ext_80387_constants_table[i],
4906 XFmode, &ext_80387_constants_table[i]);
4909 ext_80387_constants_init = 1;
4912 /* Return true if the constant is something that can be loaded with
4913 a special instruction. */
4916 standard_80387_constant_p (rtx x)
4918 if (GET_CODE (x) != CONST_DOUBLE || !FLOAT_MODE_P (GET_MODE (x)))
4921 if (x == CONST0_RTX (GET_MODE (x)))
4923 if (x == CONST1_RTX (GET_MODE (x)))
4926 /* For XFmode constants, try to find a special 80387 instruction when
4927 optimizing for size or on those CPUs that benefit from them. */
4928 if (GET_MODE (x) == XFmode
4929 && (optimize_size || x86_ext_80387_constants & TUNEMASK))
4934 if (! ext_80387_constants_init)
4935 init_ext_80387_constants ();
4937 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
4938 for (i = 0; i < 5; i++)
4939 if (real_identical (&r, &ext_80387_constants_table[i]))
4946 /* Return the opcode of the special instruction to be used to load
4950 standard_80387_constant_opcode (rtx x)
4952 switch (standard_80387_constant_p (x))
4973 /* Return the CONST_DOUBLE representing the 80387 constant that is
4974 loaded by the specified special instruction. The argument IDX
4975 matches the return value from standard_80387_constant_p. */
4978 standard_80387_constant_rtx (int idx)
4982 if (! ext_80387_constants_init)
4983 init_ext_80387_constants ();
4999 return CONST_DOUBLE_FROM_REAL_VALUE (ext_80387_constants_table[i],
5003 /* Return 1 if mode is a valid mode for sse. */
5005 standard_sse_mode_p (enum machine_mode mode)
5022 /* Return 1 if X is FP constant we can load to SSE register w/o using memory.
5025 standard_sse_constant_p (rtx x)
5027 enum machine_mode mode = GET_MODE (x);
5029 if (x == const0_rtx || x == CONST0_RTX (GET_MODE (x)))
5031 if (vector_all_ones_operand (x, mode)
5032 && standard_sse_mode_p (mode))
5033 return TARGET_SSE2 ? 2 : -1;
5038 /* Return the opcode of the special instruction to be used to load
5042 standard_sse_constant_opcode (rtx insn, rtx x)
5044 switch (standard_sse_constant_p (x))
5047 if (get_attr_mode (insn) == MODE_V4SF)
5048 return "xorps\t%0, %0";
5049 else if (get_attr_mode (insn) == MODE_V2DF)
5050 return "xorpd\t%0, %0";
5052 return "pxor\t%0, %0";
5054 return "pcmpeqd\t%0, %0";
5059 /* Returns 1 if OP contains a symbol reference */
5062 symbolic_reference_mentioned_p (rtx op)
5067 if (GET_CODE (op) == SYMBOL_REF || GET_CODE (op) == LABEL_REF)
5070 fmt = GET_RTX_FORMAT (GET_CODE (op));
5071 for (i = GET_RTX_LENGTH (GET_CODE (op)) - 1; i >= 0; i--)
5077 for (j = XVECLEN (op, i) - 1; j >= 0; j--)
5078 if (symbolic_reference_mentioned_p (XVECEXP (op, i, j)))
5082 else if (fmt[i] == 'e' && symbolic_reference_mentioned_p (XEXP (op, i)))
5089 /* Return 1 if it is appropriate to emit `ret' instructions in the
5090 body of a function. Do this only if the epilogue is simple, needing a
5091 couple of insns. Prior to reloading, we can't tell how many registers
5092 must be saved, so return 0 then. Return 0 if there is no frame
5093 marker to de-allocate. */
5096 ix86_can_use_return_insn_p (void)
5098 struct ix86_frame frame;
5100 if (! reload_completed || frame_pointer_needed)
5103 /* Don't allow more than 32 pop, since that's all we can do
5104 with one instruction. */
5105 if (current_function_pops_args
5106 && current_function_args_size >= 32768)
5109 ix86_compute_frame_layout (&frame);
5110 return frame.to_allocate == 0 && frame.nregs == 0;
5113 /* Value should be nonzero if functions must have frame pointers.
5114 Zero means the frame pointer need not be set up (and parms may
5115 be accessed via the stack pointer) in functions that seem suitable. */
5118 ix86_frame_pointer_required (void)
5120 /* If we accessed previous frames, then the generated code expects
5121 to be able to access the saved ebp value in our frame. */
5122 if (cfun->machine->accesses_prev_frame)
5125 /* Several x86 os'es need a frame pointer for other reasons,
5126 usually pertaining to setjmp. */
5127 if (SUBTARGET_FRAME_POINTER_REQUIRED)
5130 /* In override_options, TARGET_OMIT_LEAF_FRAME_POINTER turns off
5131 the frame pointer by default. Turn it back on now if we've not
5132 got a leaf function. */
5133 if (TARGET_OMIT_LEAF_FRAME_POINTER
5134 && (!current_function_is_leaf
5135 || ix86_current_function_calls_tls_descriptor))
5138 if (current_function_profile)
5144 /* Record that the current function accesses previous call frames. */
5147 ix86_setup_frame_addresses (void)
5149 cfun->machine->accesses_prev_frame = 1;
5152 #if (defined(HAVE_GAS_HIDDEN) && (SUPPORTS_ONE_ONLY - 0)) || TARGET_MACHO
5153 # define USE_HIDDEN_LINKONCE 1
5155 # define USE_HIDDEN_LINKONCE 0
5158 static int pic_labels_used;
5160 /* Fills in the label name that should be used for a pc thunk for
5161 the given register. */
5164 get_pc_thunk_name (char name[32], unsigned int regno)
5166 gcc_assert (!TARGET_64BIT);
5168 if (USE_HIDDEN_LINKONCE)
5169 sprintf (name, "__i686.get_pc_thunk.%s", reg_names[regno]);
5171 ASM_GENERATE_INTERNAL_LABEL (name, "LPR", regno);
5175 /* This function generates code for -fpic that loads %ebx with
5176 the return address of the caller and then returns. */
5179 ix86_file_end (void)
5184 for (regno = 0; regno < 8; ++regno)
5188 if (! ((pic_labels_used >> regno) & 1))
5191 get_pc_thunk_name (name, regno);
5196 switch_to_section (darwin_sections[text_coal_section]);
5197 fputs ("\t.weak_definition\t", asm_out_file);
5198 assemble_name (asm_out_file, name);
5199 fputs ("\n\t.private_extern\t", asm_out_file);
5200 assemble_name (asm_out_file, name);
5201 fputs ("\n", asm_out_file);
5202 ASM_OUTPUT_LABEL (asm_out_file, name);
5206 if (USE_HIDDEN_LINKONCE)
5210 decl = build_decl (FUNCTION_DECL, get_identifier (name),
5212 TREE_PUBLIC (decl) = 1;
5213 TREE_STATIC (decl) = 1;
5214 DECL_ONE_ONLY (decl) = 1;
5216 (*targetm.asm_out.unique_section) (decl, 0);
5217 switch_to_section (get_named_section (decl, NULL, 0));
5219 (*targetm.asm_out.globalize_label) (asm_out_file, name);
5220 fputs ("\t.hidden\t", asm_out_file);
5221 assemble_name (asm_out_file, name);
5222 fputc ('\n', asm_out_file);
5223 ASM_DECLARE_FUNCTION_NAME (asm_out_file, name, decl);
5227 switch_to_section (text_section);
5228 ASM_OUTPUT_LABEL (asm_out_file, name);
5231 xops[0] = gen_rtx_REG (SImode, regno);
5232 xops[1] = gen_rtx_MEM (SImode, stack_pointer_rtx);
5233 output_asm_insn ("mov{l}\t{%1, %0|%0, %1}", xops);
5234 output_asm_insn ("ret", xops);
5237 if (NEED_INDICATE_EXEC_STACK)
5238 file_end_indicate_exec_stack ();
5241 /* Emit code for the SET_GOT patterns. */
5244 output_set_got (rtx dest, rtx label ATTRIBUTE_UNUSED)
5249 xops[1] = gen_rtx_SYMBOL_REF (Pmode, GOT_SYMBOL_NAME);
5251 if (! TARGET_DEEP_BRANCH_PREDICTION || !flag_pic)
5253 xops[2] = gen_rtx_LABEL_REF (Pmode, label ? label : gen_label_rtx ());
5256 output_asm_insn ("mov{l}\t{%2, %0|%0, %2}", xops);
5258 output_asm_insn ("call\t%a2", xops);
5261 /* Output the Mach-O "canonical" label name ("Lxx$pb") here too. This
5262 is what will be referenced by the Mach-O PIC subsystem. */
5264 ASM_OUTPUT_LABEL (asm_out_file, machopic_function_base_name ());
5267 (*targetm.asm_out.internal_label) (asm_out_file, "L",
5268 CODE_LABEL_NUMBER (XEXP (xops[2], 0)));
5271 output_asm_insn ("pop{l}\t%0", xops);
5276 get_pc_thunk_name (name, REGNO (dest));
5277 pic_labels_used |= 1 << REGNO (dest);
5279 xops[2] = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (name));
5280 xops[2] = gen_rtx_MEM (QImode, xops[2]);
5281 output_asm_insn ("call\t%X2", xops);
5282 /* Output the Mach-O "canonical" label name ("Lxx$pb") here too. This
5283 is what will be referenced by the Mach-O PIC subsystem. */
5286 ASM_OUTPUT_LABEL (asm_out_file, machopic_function_base_name ());
5288 targetm.asm_out.internal_label (asm_out_file, "L",
5289 CODE_LABEL_NUMBER (label));
5296 if (!flag_pic || TARGET_DEEP_BRANCH_PREDICTION)
5297 output_asm_insn ("add{l}\t{%1, %0|%0, %1}", xops);
5299 output_asm_insn ("add{l}\t{%1+[.-%a2], %0|%0, %1+(.-%a2)}", xops);
5304 /* Generate an "push" pattern for input ARG. */
5309 return gen_rtx_SET (VOIDmode,
5311 gen_rtx_PRE_DEC (Pmode,
5312 stack_pointer_rtx)),
5316 /* Return >= 0 if there is an unused call-clobbered register available
5317 for the entire function. */
5320 ix86_select_alt_pic_regnum (void)
5322 if (current_function_is_leaf && !current_function_profile
5323 && !ix86_current_function_calls_tls_descriptor)
5326 for (i = 2; i >= 0; --i)
5327 if (!regs_ever_live[i])
5331 return INVALID_REGNUM;
5334 /* Return 1 if we need to save REGNO. */
5336 ix86_save_reg (unsigned int regno, int maybe_eh_return)
5338 if (pic_offset_table_rtx
5339 && regno == REAL_PIC_OFFSET_TABLE_REGNUM
5340 && (regs_ever_live[REAL_PIC_OFFSET_TABLE_REGNUM]
5341 || current_function_profile
5342 || current_function_calls_eh_return
5343 || current_function_uses_const_pool))
5345 if (ix86_select_alt_pic_regnum () != INVALID_REGNUM)
5350 if (current_function_calls_eh_return && maybe_eh_return)
5355 unsigned test = EH_RETURN_DATA_REGNO (i);
5356 if (test == INVALID_REGNUM)
5363 if (cfun->machine->force_align_arg_pointer
5364 && regno == REGNO (cfun->machine->force_align_arg_pointer))
5367 return (regs_ever_live[regno]
5368 && !call_used_regs[regno]
5369 && !fixed_regs[regno]
5370 && (regno != HARD_FRAME_POINTER_REGNUM || !frame_pointer_needed));
5373 /* Return number of registers to be saved on the stack. */
5376 ix86_nsaved_regs (void)
5381 for (regno = FIRST_PSEUDO_REGISTER - 1; regno >= 0; regno--)
5382 if (ix86_save_reg (regno, true))
5387 /* Return the offset between two registers, one to be eliminated, and the other
5388 its replacement, at the start of a routine. */
5391 ix86_initial_elimination_offset (int from, int to)
5393 struct ix86_frame frame;
5394 ix86_compute_frame_layout (&frame);
5396 if (from == ARG_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
5397 return frame.hard_frame_pointer_offset;
5398 else if (from == FRAME_POINTER_REGNUM
5399 && to == HARD_FRAME_POINTER_REGNUM)
5400 return frame.hard_frame_pointer_offset - frame.frame_pointer_offset;
5403 gcc_assert (to == STACK_POINTER_REGNUM);
5405 if (from == ARG_POINTER_REGNUM)
5406 return frame.stack_pointer_offset;
5408 gcc_assert (from == FRAME_POINTER_REGNUM);
5409 return frame.stack_pointer_offset - frame.frame_pointer_offset;
5413 /* Fill structure ix86_frame about frame of currently computed function. */
5416 ix86_compute_frame_layout (struct ix86_frame *frame)
5418 HOST_WIDE_INT total_size;
5419 unsigned int stack_alignment_needed;
5420 HOST_WIDE_INT offset;
5421 unsigned int preferred_alignment;
5422 HOST_WIDE_INT size = get_frame_size ();
5424 frame->nregs = ix86_nsaved_regs ();
5427 stack_alignment_needed = cfun->stack_alignment_needed / BITS_PER_UNIT;
5428 preferred_alignment = cfun->preferred_stack_boundary / BITS_PER_UNIT;
5430 /* During reload iteration the amount of registers saved can change.
5431 Recompute the value as needed. Do not recompute when amount of registers
5432 didn't change as reload does multiple calls to the function and does not
5433 expect the decision to change within single iteration. */
5435 && cfun->machine->use_fast_prologue_epilogue_nregs != frame->nregs)
5437 int count = frame->nregs;
5439 cfun->machine->use_fast_prologue_epilogue_nregs = count;
5440 /* The fast prologue uses move instead of push to save registers. This
5441 is significantly longer, but also executes faster as modern hardware
5442 can execute the moves in parallel, but can't do that for push/pop.
5444 Be careful about choosing what prologue to emit: When function takes
5445 many instructions to execute we may use slow version as well as in
5446 case function is known to be outside hot spot (this is known with
5447 feedback only). Weight the size of function by number of registers
5448 to save as it is cheap to use one or two push instructions but very
5449 slow to use many of them. */
5451 count = (count - 1) * FAST_PROLOGUE_INSN_COUNT;
5452 if (cfun->function_frequency < FUNCTION_FREQUENCY_NORMAL
5453 || (flag_branch_probabilities
5454 && cfun->function_frequency < FUNCTION_FREQUENCY_HOT))
5455 cfun->machine->use_fast_prologue_epilogue = false;
5457 cfun->machine->use_fast_prologue_epilogue
5458 = !expensive_function_p (count);
5460 if (TARGET_PROLOGUE_USING_MOVE
5461 && cfun->machine->use_fast_prologue_epilogue)
5462 frame->save_regs_using_mov = true;
5464 frame->save_regs_using_mov = false;
5467 /* Skip return address and saved base pointer. */
5468 offset = frame_pointer_needed ? UNITS_PER_WORD * 2 : UNITS_PER_WORD;
5470 frame->hard_frame_pointer_offset = offset;
5472 /* Do some sanity checking of stack_alignment_needed and
5473 preferred_alignment, since i386 port is the only using those features
5474 that may break easily. */
5476 gcc_assert (!size || stack_alignment_needed);
5477 gcc_assert (preferred_alignment >= STACK_BOUNDARY / BITS_PER_UNIT);
5478 gcc_assert (preferred_alignment <= PREFERRED_STACK_BOUNDARY / BITS_PER_UNIT);
5479 gcc_assert (stack_alignment_needed
5480 <= PREFERRED_STACK_BOUNDARY / BITS_PER_UNIT);
5482 if (stack_alignment_needed < STACK_BOUNDARY / BITS_PER_UNIT)
5483 stack_alignment_needed = STACK_BOUNDARY / BITS_PER_UNIT;
5485 /* Register save area */
5486 offset += frame->nregs * UNITS_PER_WORD;
5489 if (ix86_save_varrargs_registers)
5491 offset += X86_64_VARARGS_SIZE;
5492 frame->va_arg_size = X86_64_VARARGS_SIZE;
5495 frame->va_arg_size = 0;
5497 /* Align start of frame for local function. */
5498 frame->padding1 = ((offset + stack_alignment_needed - 1)
5499 & -stack_alignment_needed) - offset;
5501 offset += frame->padding1;
5503 /* Frame pointer points here. */
5504 frame->frame_pointer_offset = offset;
5508 /* Add outgoing arguments area. Can be skipped if we eliminated
5509 all the function calls as dead code.
5510 Skipping is however impossible when function calls alloca. Alloca
5511 expander assumes that last current_function_outgoing_args_size
5512 of stack frame are unused. */
5513 if (ACCUMULATE_OUTGOING_ARGS
5514 && (!current_function_is_leaf || current_function_calls_alloca
5515 || ix86_current_function_calls_tls_descriptor))
5517 offset += current_function_outgoing_args_size;
5518 frame->outgoing_arguments_size = current_function_outgoing_args_size;
5521 frame->outgoing_arguments_size = 0;
5523 /* Align stack boundary. Only needed if we're calling another function
5525 if (!current_function_is_leaf || current_function_calls_alloca
5526 || ix86_current_function_calls_tls_descriptor)
5527 frame->padding2 = ((offset + preferred_alignment - 1)
5528 & -preferred_alignment) - offset;
5530 frame->padding2 = 0;
5532 offset += frame->padding2;
5534 /* We've reached end of stack frame. */
5535 frame->stack_pointer_offset = offset;
5537 /* Size prologue needs to allocate. */
5538 frame->to_allocate =
5539 (size + frame->padding1 + frame->padding2
5540 + frame->outgoing_arguments_size + frame->va_arg_size);
5542 if ((!frame->to_allocate && frame->nregs <= 1)
5543 || (TARGET_64BIT && frame->to_allocate >= (HOST_WIDE_INT) 0x80000000))
5544 frame->save_regs_using_mov = false;
5546 if (TARGET_RED_ZONE && current_function_sp_is_unchanging
5547 && current_function_is_leaf
5548 && !ix86_current_function_calls_tls_descriptor)
5550 frame->red_zone_size = frame->to_allocate;
5551 if (frame->save_regs_using_mov)
5552 frame->red_zone_size += frame->nregs * UNITS_PER_WORD;
5553 if (frame->red_zone_size > RED_ZONE_SIZE - RED_ZONE_RESERVE)
5554 frame->red_zone_size = RED_ZONE_SIZE - RED_ZONE_RESERVE;
5557 frame->red_zone_size = 0;
5558 frame->to_allocate -= frame->red_zone_size;
5559 frame->stack_pointer_offset -= frame->red_zone_size;
5561 fprintf (stderr, "nregs: %i\n", frame->nregs);
5562 fprintf (stderr, "size: %i\n", size);
5563 fprintf (stderr, "alignment1: %i\n", stack_alignment_needed);
5564 fprintf (stderr, "padding1: %i\n", frame->padding1);
5565 fprintf (stderr, "va_arg: %i\n", frame->va_arg_size);
5566 fprintf (stderr, "padding2: %i\n", frame->padding2);
5567 fprintf (stderr, "to_allocate: %i\n", frame->to_allocate);
5568 fprintf (stderr, "red_zone_size: %i\n", frame->red_zone_size);
5569 fprintf (stderr, "frame_pointer_offset: %i\n", frame->frame_pointer_offset);
5570 fprintf (stderr, "hard_frame_pointer_offset: %i\n",
5571 frame->hard_frame_pointer_offset);
5572 fprintf (stderr, "stack_pointer_offset: %i\n", frame->stack_pointer_offset);
5576 /* Emit code to save registers in the prologue. */
5579 ix86_emit_save_regs (void)
5584 for (regno = FIRST_PSEUDO_REGISTER; regno-- > 0; )
5585 if (ix86_save_reg (regno, true))
5587 insn = emit_insn (gen_push (gen_rtx_REG (Pmode, regno)));
5588 RTX_FRAME_RELATED_P (insn) = 1;
5592 /* Emit code to save registers using MOV insns. First register
5593 is restored from POINTER + OFFSET. */
5595 ix86_emit_save_regs_using_mov (rtx pointer, HOST_WIDE_INT offset)
5600 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
5601 if (ix86_save_reg (regno, true))
5603 insn = emit_move_insn (adjust_address (gen_rtx_MEM (Pmode, pointer),
5605 gen_rtx_REG (Pmode, regno));
5606 RTX_FRAME_RELATED_P (insn) = 1;
5607 offset += UNITS_PER_WORD;
5611 /* Expand prologue or epilogue stack adjustment.
5612 The pattern exist to put a dependency on all ebp-based memory accesses.
5613 STYLE should be negative if instructions should be marked as frame related,
5614 zero if %r11 register is live and cannot be freely used and positive
5618 pro_epilogue_adjust_stack (rtx dest, rtx src, rtx offset, int style)
5623 insn = emit_insn (gen_pro_epilogue_adjust_stack_1 (dest, src, offset));
5624 else if (x86_64_immediate_operand (offset, DImode))
5625 insn = emit_insn (gen_pro_epilogue_adjust_stack_rex64 (dest, src, offset));
5629 /* r11 is used by indirect sibcall return as well, set before the
5630 epilogue and used after the epilogue. ATM indirect sibcall
5631 shouldn't be used together with huge frame sizes in one
5632 function because of the frame_size check in sibcall.c. */
5634 r11 = gen_rtx_REG (DImode, FIRST_REX_INT_REG + 3 /* R11 */);
5635 insn = emit_insn (gen_rtx_SET (DImode, r11, offset));
5637 RTX_FRAME_RELATED_P (insn) = 1;
5638 insn = emit_insn (gen_pro_epilogue_adjust_stack_rex64_2 (dest, src, r11,
5642 RTX_FRAME_RELATED_P (insn) = 1;
5645 /* Handle the TARGET_INTERNAL_ARG_POINTER hook. */
5648 ix86_internal_arg_pointer (void)
5650 bool has_force_align_arg_pointer =
5651 (0 != lookup_attribute (ix86_force_align_arg_pointer_string,
5652 TYPE_ATTRIBUTES (TREE_TYPE (current_function_decl))));
5653 if ((FORCE_PREFERRED_STACK_BOUNDARY_IN_MAIN
5654 && DECL_NAME (current_function_decl)
5655 && MAIN_NAME_P (DECL_NAME (current_function_decl))
5656 && DECL_FILE_SCOPE_P (current_function_decl))
5657 || ix86_force_align_arg_pointer
5658 || has_force_align_arg_pointer)
5660 /* Nested functions can't realign the stack due to a register
5662 if (DECL_CONTEXT (current_function_decl)
5663 && TREE_CODE (DECL_CONTEXT (current_function_decl)) == FUNCTION_DECL)
5665 if (ix86_force_align_arg_pointer)
5666 warning (0, "-mstackrealign ignored for nested functions");
5667 if (has_force_align_arg_pointer)
5668 error ("%s not supported for nested functions",
5669 ix86_force_align_arg_pointer_string);
5670 return virtual_incoming_args_rtx;
5672 cfun->machine->force_align_arg_pointer = gen_rtx_REG (Pmode, 2);
5673 return copy_to_reg (cfun->machine->force_align_arg_pointer);
5676 return virtual_incoming_args_rtx;
5679 /* Handle the TARGET_DWARF_HANDLE_FRAME_UNSPEC hook.
5680 This is called from dwarf2out.c to emit call frame instructions
5681 for frame-related insns containing UNSPECs and UNSPEC_VOLATILEs. */
5683 ix86_dwarf_handle_frame_unspec (const char *label, rtx pattern, int index)
5685 rtx unspec = SET_SRC (pattern);
5686 gcc_assert (GET_CODE (unspec) == UNSPEC);
5690 case UNSPEC_REG_SAVE:
5691 dwarf2out_reg_save_reg (label, XVECEXP (unspec, 0, 0),
5692 SET_DEST (pattern));
5694 case UNSPEC_DEF_CFA:
5695 dwarf2out_def_cfa (label, REGNO (SET_DEST (pattern)),
5696 INTVAL (XVECEXP (unspec, 0, 0)));
5703 /* Expand the prologue into a bunch of separate insns. */
5706 ix86_expand_prologue (void)
5710 struct ix86_frame frame;
5711 HOST_WIDE_INT allocate;
5713 ix86_compute_frame_layout (&frame);
5715 if (cfun->machine->force_align_arg_pointer)
5719 /* Grab the argument pointer. */
5720 x = plus_constant (stack_pointer_rtx, 4);
5721 y = cfun->machine->force_align_arg_pointer;
5722 insn = emit_insn (gen_rtx_SET (VOIDmode, y, x));
5723 RTX_FRAME_RELATED_P (insn) = 1;
5725 /* The unwind info consists of two parts: install the fafp as the cfa,
5726 and record the fafp as the "save register" of the stack pointer.
5727 The later is there in order that the unwinder can see where it
5728 should restore the stack pointer across the and insn. */
5729 x = gen_rtx_UNSPEC (VOIDmode, gen_rtvec (1, const0_rtx), UNSPEC_DEF_CFA);
5730 x = gen_rtx_SET (VOIDmode, y, x);
5731 RTX_FRAME_RELATED_P (x) = 1;
5732 y = gen_rtx_UNSPEC (VOIDmode, gen_rtvec (1, stack_pointer_rtx),
5734 y = gen_rtx_SET (VOIDmode, cfun->machine->force_align_arg_pointer, y);
5735 RTX_FRAME_RELATED_P (y) = 1;
5736 x = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, x, y));
5737 x = gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR, x, NULL);
5738 REG_NOTES (insn) = x;
5740 /* Align the stack. */
5741 emit_insn (gen_andsi3 (stack_pointer_rtx, stack_pointer_rtx,
5744 /* And here we cheat like madmen with the unwind info. We force the
5745 cfa register back to sp+4, which is exactly what it was at the
5746 start of the function. Re-pushing the return address results in
5747 the return at the same spot relative to the cfa, and thus is
5748 correct wrt the unwind info. */
5749 x = cfun->machine->force_align_arg_pointer;
5750 x = gen_frame_mem (Pmode, plus_constant (x, -4));
5751 insn = emit_insn (gen_push (x));
5752 RTX_FRAME_RELATED_P (insn) = 1;
5755 x = gen_rtx_UNSPEC (VOIDmode, gen_rtvec (1, x), UNSPEC_DEF_CFA);
5756 x = gen_rtx_SET (VOIDmode, stack_pointer_rtx, x);
5757 x = gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR, x, NULL);
5758 REG_NOTES (insn) = x;
5761 /* Note: AT&T enter does NOT have reversed args. Enter is probably
5762 slower on all targets. Also sdb doesn't like it. */
5764 if (frame_pointer_needed)
5766 insn = emit_insn (gen_push (hard_frame_pointer_rtx));
5767 RTX_FRAME_RELATED_P (insn) = 1;
5769 insn = emit_move_insn (hard_frame_pointer_rtx, stack_pointer_rtx);
5770 RTX_FRAME_RELATED_P (insn) = 1;
5773 allocate = frame.to_allocate;
5775 if (!frame.save_regs_using_mov)
5776 ix86_emit_save_regs ();
5778 allocate += frame.nregs * UNITS_PER_WORD;
5780 /* When using red zone we may start register saving before allocating
5781 the stack frame saving one cycle of the prologue. */
5782 if (TARGET_RED_ZONE && frame.save_regs_using_mov)
5783 ix86_emit_save_regs_using_mov (frame_pointer_needed ? hard_frame_pointer_rtx
5784 : stack_pointer_rtx,
5785 -frame.nregs * UNITS_PER_WORD);
5789 else if (! TARGET_STACK_PROBE || allocate < CHECK_STACK_LIMIT)
5790 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
5791 GEN_INT (-allocate), -1);
5794 /* Only valid for Win32. */
5795 rtx eax = gen_rtx_REG (SImode, 0);
5796 bool eax_live = ix86_eax_live_at_start_p ();
5799 gcc_assert (!TARGET_64BIT);
5803 emit_insn (gen_push (eax));
5807 emit_move_insn (eax, GEN_INT (allocate));
5809 insn = emit_insn (gen_allocate_stack_worker (eax));
5810 RTX_FRAME_RELATED_P (insn) = 1;
5811 t = gen_rtx_PLUS (Pmode, stack_pointer_rtx, GEN_INT (-allocate));
5812 t = gen_rtx_SET (VOIDmode, stack_pointer_rtx, t);
5813 REG_NOTES (insn) = gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR,
5814 t, REG_NOTES (insn));
5818 if (frame_pointer_needed)
5819 t = plus_constant (hard_frame_pointer_rtx,
5822 - frame.nregs * UNITS_PER_WORD);
5824 t = plus_constant (stack_pointer_rtx, allocate);
5825 emit_move_insn (eax, gen_rtx_MEM (SImode, t));
5829 if (frame.save_regs_using_mov && !TARGET_RED_ZONE)
5831 if (!frame_pointer_needed || !frame.to_allocate)
5832 ix86_emit_save_regs_using_mov (stack_pointer_rtx, frame.to_allocate);
5834 ix86_emit_save_regs_using_mov (hard_frame_pointer_rtx,
5835 -frame.nregs * UNITS_PER_WORD);
5838 pic_reg_used = false;
5839 if (pic_offset_table_rtx
5840 && (regs_ever_live[REAL_PIC_OFFSET_TABLE_REGNUM]
5841 || current_function_profile))
5843 unsigned int alt_pic_reg_used = ix86_select_alt_pic_regnum ();
5845 if (alt_pic_reg_used != INVALID_REGNUM)
5846 REGNO (pic_offset_table_rtx) = alt_pic_reg_used;
5848 pic_reg_used = true;
5854 insn = emit_insn (gen_set_got_rex64 (pic_offset_table_rtx));
5856 insn = emit_insn (gen_set_got (pic_offset_table_rtx));
5858 /* Even with accurate pre-reload life analysis, we can wind up
5859 deleting all references to the pic register after reload.
5860 Consider if cross-jumping unifies two sides of a branch
5861 controlled by a comparison vs the only read from a global.
5862 In which case, allow the set_got to be deleted, though we're
5863 too late to do anything about the ebx save in the prologue. */
5864 REG_NOTES (insn) = gen_rtx_EXPR_LIST (REG_MAYBE_DEAD, const0_rtx, NULL);
5867 /* Prevent function calls from be scheduled before the call to mcount.
5868 In the pic_reg_used case, make sure that the got load isn't deleted. */
5869 if (current_function_profile)
5870 emit_insn (gen_blockage (pic_reg_used ? pic_offset_table_rtx : const0_rtx));
5873 /* Emit code to restore saved registers using MOV insns. First register
5874 is restored from POINTER + OFFSET. */
5876 ix86_emit_restore_regs_using_mov (rtx pointer, HOST_WIDE_INT offset,
5877 int maybe_eh_return)
5880 rtx base_address = gen_rtx_MEM (Pmode, pointer);
5882 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
5883 if (ix86_save_reg (regno, maybe_eh_return))
5885 /* Ensure that adjust_address won't be forced to produce pointer
5886 out of range allowed by x86-64 instruction set. */
5887 if (TARGET_64BIT && offset != trunc_int_for_mode (offset, SImode))
5891 r11 = gen_rtx_REG (DImode, FIRST_REX_INT_REG + 3 /* R11 */);
5892 emit_move_insn (r11, GEN_INT (offset));
5893 emit_insn (gen_adddi3 (r11, r11, pointer));
5894 base_address = gen_rtx_MEM (Pmode, r11);
5897 emit_move_insn (gen_rtx_REG (Pmode, regno),
5898 adjust_address (base_address, Pmode, offset));
5899 offset += UNITS_PER_WORD;
5903 /* Restore function stack, frame, and registers. */
5906 ix86_expand_epilogue (int style)
5909 int sp_valid = !frame_pointer_needed || current_function_sp_is_unchanging;
5910 struct ix86_frame frame;
5911 HOST_WIDE_INT offset;
5913 ix86_compute_frame_layout (&frame);
5915 /* Calculate start of saved registers relative to ebp. Special care
5916 must be taken for the normal return case of a function using
5917 eh_return: the eax and edx registers are marked as saved, but not
5918 restored along this path. */
5919 offset = frame.nregs;
5920 if (current_function_calls_eh_return && style != 2)
5922 offset *= -UNITS_PER_WORD;
5924 /* If we're only restoring one register and sp is not valid then
5925 using a move instruction to restore the register since it's
5926 less work than reloading sp and popping the register.
5928 The default code result in stack adjustment using add/lea instruction,
5929 while this code results in LEAVE instruction (or discrete equivalent),
5930 so it is profitable in some other cases as well. Especially when there
5931 are no registers to restore. We also use this code when TARGET_USE_LEAVE
5932 and there is exactly one register to pop. This heuristic may need some
5933 tuning in future. */
5934 if ((!sp_valid && frame.nregs <= 1)
5935 || (TARGET_EPILOGUE_USING_MOVE
5936 && cfun->machine->use_fast_prologue_epilogue
5937 && (frame.nregs > 1 || frame.to_allocate))
5938 || (frame_pointer_needed && !frame.nregs && frame.to_allocate)
5939 || (frame_pointer_needed && TARGET_USE_LEAVE
5940 && cfun->machine->use_fast_prologue_epilogue
5941 && frame.nregs == 1)
5942 || current_function_calls_eh_return)
5944 /* Restore registers. We can use ebp or esp to address the memory
5945 locations. If both are available, default to ebp, since offsets
5946 are known to be small. Only exception is esp pointing directly to the
5947 end of block of saved registers, where we may simplify addressing
5950 if (!frame_pointer_needed || (sp_valid && !frame.to_allocate))
5951 ix86_emit_restore_regs_using_mov (stack_pointer_rtx,
5952 frame.to_allocate, style == 2);
5954 ix86_emit_restore_regs_using_mov (hard_frame_pointer_rtx,
5955 offset, style == 2);
5957 /* eh_return epilogues need %ecx added to the stack pointer. */
5960 rtx tmp, sa = EH_RETURN_STACKADJ_RTX;
5962 if (frame_pointer_needed)
5964 tmp = gen_rtx_PLUS (Pmode, hard_frame_pointer_rtx, sa);
5965 tmp = plus_constant (tmp, UNITS_PER_WORD);
5966 emit_insn (gen_rtx_SET (VOIDmode, sa, tmp));
5968 tmp = gen_rtx_MEM (Pmode, hard_frame_pointer_rtx);
5969 emit_move_insn (hard_frame_pointer_rtx, tmp);
5971 pro_epilogue_adjust_stack (stack_pointer_rtx, sa,
5976 tmp = gen_rtx_PLUS (Pmode, stack_pointer_rtx, sa);
5977 tmp = plus_constant (tmp, (frame.to_allocate
5978 + frame.nregs * UNITS_PER_WORD));
5979 emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx, tmp));
5982 else if (!frame_pointer_needed)
5983 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
5984 GEN_INT (frame.to_allocate
5985 + frame.nregs * UNITS_PER_WORD),
5987 /* If not an i386, mov & pop is faster than "leave". */
5988 else if (TARGET_USE_LEAVE || optimize_size
5989 || !cfun->machine->use_fast_prologue_epilogue)
5990 emit_insn (TARGET_64BIT ? gen_leave_rex64 () : gen_leave ());
5993 pro_epilogue_adjust_stack (stack_pointer_rtx,
5994 hard_frame_pointer_rtx,
5997 emit_insn (gen_popdi1 (hard_frame_pointer_rtx));
5999 emit_insn (gen_popsi1 (hard_frame_pointer_rtx));
6004 /* First step is to deallocate the stack frame so that we can
6005 pop the registers. */
6008 gcc_assert (frame_pointer_needed);
6009 pro_epilogue_adjust_stack (stack_pointer_rtx,
6010 hard_frame_pointer_rtx,
6011 GEN_INT (offset), style);
6013 else if (frame.to_allocate)
6014 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
6015 GEN_INT (frame.to_allocate), style);
6017 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
6018 if (ix86_save_reg (regno, false))
6021 emit_insn (gen_popdi1 (gen_rtx_REG (Pmode, regno)));
6023 emit_insn (gen_popsi1 (gen_rtx_REG (Pmode, regno)));
6025 if (frame_pointer_needed)
6027 /* Leave results in shorter dependency chains on CPUs that are
6028 able to grok it fast. */
6029 if (TARGET_USE_LEAVE)
6030 emit_insn (TARGET_64BIT ? gen_leave_rex64 () : gen_leave ());
6031 else if (TARGET_64BIT)
6032 emit_insn (gen_popdi1 (hard_frame_pointer_rtx));
6034 emit_insn (gen_popsi1 (hard_frame_pointer_rtx));
6038 if (cfun->machine->force_align_arg_pointer)
6040 emit_insn (gen_addsi3 (stack_pointer_rtx,
6041 cfun->machine->force_align_arg_pointer,
6045 /* Sibcall epilogues don't want a return instruction. */
6049 if (current_function_pops_args && current_function_args_size)
6051 rtx popc = GEN_INT (current_function_pops_args);
6053 /* i386 can only pop 64K bytes. If asked to pop more, pop
6054 return address, do explicit add, and jump indirectly to the
6057 if (current_function_pops_args >= 65536)
6059 rtx ecx = gen_rtx_REG (SImode, 2);
6061 /* There is no "pascal" calling convention in 64bit ABI. */
6062 gcc_assert (!TARGET_64BIT);
6064 emit_insn (gen_popsi1 (ecx));
6065 emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx, popc));
6066 emit_jump_insn (gen_return_indirect_internal (ecx));
6069 emit_jump_insn (gen_return_pop_internal (popc));
6072 emit_jump_insn (gen_return_internal ());
6075 /* Reset from the function's potential modifications. */
6078 ix86_output_function_epilogue (FILE *file ATTRIBUTE_UNUSED,
6079 HOST_WIDE_INT size ATTRIBUTE_UNUSED)
6081 if (pic_offset_table_rtx)
6082 REGNO (pic_offset_table_rtx) = REAL_PIC_OFFSET_TABLE_REGNUM;
6084 /* Mach-O doesn't support labels at the end of objects, so if
6085 it looks like we might want one, insert a NOP. */
6087 rtx insn = get_last_insn ();
6090 && NOTE_LINE_NUMBER (insn) != NOTE_INSN_DELETED_LABEL)
6091 insn = PREV_INSN (insn);
6095 && NOTE_LINE_NUMBER (insn) == NOTE_INSN_DELETED_LABEL)))
6096 fputs ("\tnop\n", file);
6102 /* Extract the parts of an RTL expression that is a valid memory address
6103 for an instruction. Return 0 if the structure of the address is
6104 grossly off. Return -1 if the address contains ASHIFT, so it is not
6105 strictly valid, but still used for computing length of lea instruction. */
6108 ix86_decompose_address (rtx addr, struct ix86_address *out)
6110 rtx base = NULL_RTX, index = NULL_RTX, disp = NULL_RTX;
6111 rtx base_reg, index_reg;
6112 HOST_WIDE_INT scale = 1;
6113 rtx scale_rtx = NULL_RTX;
6115 enum ix86_address_seg seg = SEG_DEFAULT;
6117 if (GET_CODE (addr) == REG || GET_CODE (addr) == SUBREG)
6119 else if (GET_CODE (addr) == PLUS)
6129 addends[n++] = XEXP (op, 1);
6132 while (GET_CODE (op) == PLUS);
6137 for (i = n; i >= 0; --i)
6140 switch (GET_CODE (op))
6145 index = XEXP (op, 0);
6146 scale_rtx = XEXP (op, 1);
6150 if (XINT (op, 1) == UNSPEC_TP
6151 && TARGET_TLS_DIRECT_SEG_REFS
6152 && seg == SEG_DEFAULT)
6153 seg = TARGET_64BIT ? SEG_FS : SEG_GS;
6182 else if (GET_CODE (addr) == MULT)
6184 index = XEXP (addr, 0); /* index*scale */
6185 scale_rtx = XEXP (addr, 1);
6187 else if (GET_CODE (addr) == ASHIFT)
6191 /* We're called for lea too, which implements ashift on occasion. */
6192 index = XEXP (addr, 0);
6193 tmp = XEXP (addr, 1);
6194 if (GET_CODE (tmp) != CONST_INT)
6196 scale = INTVAL (tmp);
6197 if ((unsigned HOST_WIDE_INT) scale > 3)
6203 disp = addr; /* displacement */
6205 /* Extract the integral value of scale. */
6208 if (GET_CODE (scale_rtx) != CONST_INT)
6210 scale = INTVAL (scale_rtx);
6213 base_reg = base && GET_CODE (base) == SUBREG ? SUBREG_REG (base) : base;
6214 index_reg = index && GET_CODE (index) == SUBREG ? SUBREG_REG (index) : index;
6216 /* Allow arg pointer and stack pointer as index if there is not scaling. */
6217 if (base_reg && index_reg && scale == 1
6218 && (index_reg == arg_pointer_rtx
6219 || index_reg == frame_pointer_rtx
6220 || (REG_P (index_reg) && REGNO (index_reg) == STACK_POINTER_REGNUM)))
6223 tmp = base, base = index, index = tmp;
6224 tmp = base_reg, base_reg = index_reg, index_reg = tmp;
6227 /* Special case: %ebp cannot be encoded as a base without a displacement. */
6228 if ((base_reg == hard_frame_pointer_rtx
6229 || base_reg == frame_pointer_rtx
6230 || base_reg == arg_pointer_rtx) && !disp)
6233 /* Special case: on K6, [%esi] makes the instruction vector decoded.
6234 Avoid this by transforming to [%esi+0]. */
6235 if (ix86_tune == PROCESSOR_K6 && !optimize_size
6236 && base_reg && !index_reg && !disp
6238 && REGNO_REG_CLASS (REGNO (base_reg)) == SIREG)
6241 /* Special case: encode reg+reg instead of reg*2. */
6242 if (!base && index && scale && scale == 2)
6243 base = index, base_reg = index_reg, scale = 1;
6245 /* Special case: scaling cannot be encoded without base or displacement. */
6246 if (!base && !disp && index && scale != 1)
6258 /* Return cost of the memory address x.
6259 For i386, it is better to use a complex address than let gcc copy
6260 the address into a reg and make a new pseudo. But not if the address
6261 requires to two regs - that would mean more pseudos with longer
6264 ix86_address_cost (rtx x)
6266 struct ix86_address parts;
6268 int ok = ix86_decompose_address (x, &parts);
6272 if (parts.base && GET_CODE (parts.base) == SUBREG)
6273 parts.base = SUBREG_REG (parts.base);
6274 if (parts.index && GET_CODE (parts.index) == SUBREG)
6275 parts.index = SUBREG_REG (parts.index);
6277 /* More complex memory references are better. */
6278 if (parts.disp && parts.disp != const0_rtx)
6280 if (parts.seg != SEG_DEFAULT)
6283 /* Attempt to minimize number of registers in the address. */
6285 && (!REG_P (parts.base) || REGNO (parts.base) >= FIRST_PSEUDO_REGISTER))
6287 && (!REG_P (parts.index)
6288 || REGNO (parts.index) >= FIRST_PSEUDO_REGISTER)))
6292 && (!REG_P (parts.base) || REGNO (parts.base) >= FIRST_PSEUDO_REGISTER)
6294 && (!REG_P (parts.index) || REGNO (parts.index) >= FIRST_PSEUDO_REGISTER)
6295 && parts.base != parts.index)
6298 /* AMD-K6 don't like addresses with ModR/M set to 00_xxx_100b,
6299 since it's predecode logic can't detect the length of instructions
6300 and it degenerates to vector decoded. Increase cost of such
6301 addresses here. The penalty is minimally 2 cycles. It may be worthwhile
6302 to split such addresses or even refuse such addresses at all.
6304 Following addressing modes are affected:
6309 The first and last case may be avoidable by explicitly coding the zero in
6310 memory address, but I don't have AMD-K6 machine handy to check this
6314 && ((!parts.disp && parts.base && parts.index && parts.scale != 1)
6315 || (parts.disp && !parts.base && parts.index && parts.scale != 1)
6316 || (!parts.disp && parts.base && parts.index && parts.scale == 1)))
6322 /* If X is a machine specific address (i.e. a symbol or label being
6323 referenced as a displacement from the GOT implemented using an
6324 UNSPEC), then return the base term. Otherwise return X. */
6327 ix86_find_base_term (rtx x)
6333 if (GET_CODE (x) != CONST)
6336 if (GET_CODE (term) == PLUS
6337 && (GET_CODE (XEXP (term, 1)) == CONST_INT
6338 || GET_CODE (XEXP (term, 1)) == CONST_DOUBLE))
6339 term = XEXP (term, 0);
6340 if (GET_CODE (term) != UNSPEC
6341 || XINT (term, 1) != UNSPEC_GOTPCREL)
6344 term = XVECEXP (term, 0, 0);
6346 if (GET_CODE (term) != SYMBOL_REF
6347 && GET_CODE (term) != LABEL_REF)
6353 term = ix86_delegitimize_address (x);
6355 if (GET_CODE (term) != SYMBOL_REF
6356 && GET_CODE (term) != LABEL_REF)
6362 /* Allow {LABEL | SYMBOL}_REF - SYMBOL_REF-FOR-PICBASE for Mach-O as
6363 this is used for to form addresses to local data when -fPIC is in
6367 darwin_local_data_pic (rtx disp)
6369 if (GET_CODE (disp) == MINUS)
6371 if (GET_CODE (XEXP (disp, 0)) == LABEL_REF
6372 || GET_CODE (XEXP (disp, 0)) == SYMBOL_REF)
6373 if (GET_CODE (XEXP (disp, 1)) == SYMBOL_REF)
6375 const char *sym_name = XSTR (XEXP (disp, 1), 0);
6376 if (! strcmp (sym_name, "<pic base>"))
6384 /* Determine if a given RTX is a valid constant. We already know this
6385 satisfies CONSTANT_P. */
6388 legitimate_constant_p (rtx x)
6390 switch (GET_CODE (x))
6395 if (GET_CODE (x) == PLUS)
6397 if (GET_CODE (XEXP (x, 1)) != CONST_INT)
6402 if (TARGET_MACHO && darwin_local_data_pic (x))
6405 /* Only some unspecs are valid as "constants". */
6406 if (GET_CODE (x) == UNSPEC)
6407 switch (XINT (x, 1))
6410 return TARGET_64BIT;
6413 x = XVECEXP (x, 0, 0);
6414 return (GET_CODE (x) == SYMBOL_REF
6415 && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_EXEC);
6417 x = XVECEXP (x, 0, 0);
6418 return (GET_CODE (x) == SYMBOL_REF
6419 && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_DYNAMIC);
6424 /* We must have drilled down to a symbol. */
6425 if (GET_CODE (x) == LABEL_REF)
6427 if (GET_CODE (x) != SYMBOL_REF)
6432 /* TLS symbols are never valid. */
6433 if (SYMBOL_REF_TLS_MODEL (x))
6438 if (GET_MODE (x) == TImode
6439 && x != CONST0_RTX (TImode)
6445 if (x == CONST0_RTX (GET_MODE (x)))
6453 /* Otherwise we handle everything else in the move patterns. */
6457 /* Determine if it's legal to put X into the constant pool. This
6458 is not possible for the address of thread-local symbols, which
6459 is checked above. */
6462 ix86_cannot_force_const_mem (rtx x)
6464 /* We can always put integral constants and vectors in memory. */
6465 switch (GET_CODE (x))
6475 return !legitimate_constant_p (x);
6478 /* Determine if a given RTX is a valid constant address. */
6481 constant_address_p (rtx x)
6483 return CONSTANT_P (x) && legitimate_address_p (Pmode, x, 1);
6486 /* Nonzero if the constant value X is a legitimate general operand
6487 when generating PIC code. It is given that flag_pic is on and
6488 that X satisfies CONSTANT_P or is a CONST_DOUBLE. */
6491 legitimate_pic_operand_p (rtx x)
6495 switch (GET_CODE (x))
6498 inner = XEXP (x, 0);
6499 if (GET_CODE (inner) == PLUS
6500 && GET_CODE (XEXP (inner, 1)) == CONST_INT)
6501 inner = XEXP (inner, 0);
6503 /* Only some unspecs are valid as "constants". */
6504 if (GET_CODE (inner) == UNSPEC)
6505 switch (XINT (inner, 1))
6508 return TARGET_64BIT;
6510 x = XVECEXP (inner, 0, 0);
6511 return (GET_CODE (x) == SYMBOL_REF
6512 && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_EXEC);
6520 return legitimate_pic_address_disp_p (x);
6527 /* Determine if a given CONST RTX is a valid memory displacement
6531 legitimate_pic_address_disp_p (rtx disp)
6535 /* In 64bit mode we can allow direct addresses of symbols and labels
6536 when they are not dynamic symbols. */
6539 rtx op0 = disp, op1;
6541 switch (GET_CODE (disp))
6547 if (GET_CODE (XEXP (disp, 0)) != PLUS)
6549 op0 = XEXP (XEXP (disp, 0), 0);
6550 op1 = XEXP (XEXP (disp, 0), 1);
6551 if (GET_CODE (op1) != CONST_INT
6552 || INTVAL (op1) >= 16*1024*1024
6553 || INTVAL (op1) < -16*1024*1024)
6555 if (GET_CODE (op0) == LABEL_REF)
6557 if (GET_CODE (op0) != SYMBOL_REF)
6562 /* TLS references should always be enclosed in UNSPEC. */
6563 if (SYMBOL_REF_TLS_MODEL (op0))
6565 if (!SYMBOL_REF_FAR_ADDR_P (op0) && SYMBOL_REF_LOCAL_P (op0))
6573 if (GET_CODE (disp) != CONST)
6575 disp = XEXP (disp, 0);
6579 /* We are unsafe to allow PLUS expressions. This limit allowed distance
6580 of GOT tables. We should not need these anyway. */
6581 if (GET_CODE (disp) != UNSPEC
6582 || (XINT (disp, 1) != UNSPEC_GOTPCREL
6583 && XINT (disp, 1) != UNSPEC_GOTOFF))
6586 if (GET_CODE (XVECEXP (disp, 0, 0)) != SYMBOL_REF
6587 && GET_CODE (XVECEXP (disp, 0, 0)) != LABEL_REF)
6593 if (GET_CODE (disp) == PLUS)
6595 if (GET_CODE (XEXP (disp, 1)) != CONST_INT)
6597 disp = XEXP (disp, 0);
6601 if (TARGET_MACHO && darwin_local_data_pic (disp))
6604 if (GET_CODE (disp) != UNSPEC)
6607 switch (XINT (disp, 1))
6612 return GET_CODE (XVECEXP (disp, 0, 0)) == SYMBOL_REF;
6614 /* Refuse GOTOFF in 64bit mode since it is always 64bit when used.
6615 While ABI specify also 32bit relocation but we don't produce it in
6616 small PIC model at all. */
6617 if ((GET_CODE (XVECEXP (disp, 0, 0)) == SYMBOL_REF
6618 || GET_CODE (XVECEXP (disp, 0, 0)) == LABEL_REF)
6620 return local_symbolic_operand (XVECEXP (disp, 0, 0), Pmode);
6622 case UNSPEC_GOTTPOFF:
6623 case UNSPEC_GOTNTPOFF:
6624 case UNSPEC_INDNTPOFF:
6627 disp = XVECEXP (disp, 0, 0);
6628 return (GET_CODE (disp) == SYMBOL_REF
6629 && SYMBOL_REF_TLS_MODEL (disp) == TLS_MODEL_INITIAL_EXEC);
6631 disp = XVECEXP (disp, 0, 0);
6632 return (GET_CODE (disp) == SYMBOL_REF
6633 && SYMBOL_REF_TLS_MODEL (disp) == TLS_MODEL_LOCAL_EXEC);
6635 disp = XVECEXP (disp, 0, 0);
6636 return (GET_CODE (disp) == SYMBOL_REF
6637 && SYMBOL_REF_TLS_MODEL (disp) == TLS_MODEL_LOCAL_DYNAMIC);
6643 /* GO_IF_LEGITIMATE_ADDRESS recognizes an RTL expression that is a valid
6644 memory address for an instruction. The MODE argument is the machine mode
6645 for the MEM expression that wants to use this address.
6647 It only recognizes address in canonical form. LEGITIMIZE_ADDRESS should
6648 convert common non-canonical forms to canonical form so that they will
6652 legitimate_address_p (enum machine_mode mode, rtx addr, int strict)
6654 struct ix86_address parts;
6655 rtx base, index, disp;
6656 HOST_WIDE_INT scale;
6657 const char *reason = NULL;
6658 rtx reason_rtx = NULL_RTX;
6660 if (TARGET_DEBUG_ADDR)
6663 "\n======\nGO_IF_LEGITIMATE_ADDRESS, mode = %s, strict = %d\n",
6664 GET_MODE_NAME (mode), strict);
6668 if (ix86_decompose_address (addr, &parts) <= 0)
6670 reason = "decomposition failed";
6675 index = parts.index;
6677 scale = parts.scale;
6679 /* Validate base register.
6681 Don't allow SUBREG's that span more than a word here. It can lead to spill
6682 failures when the base is one word out of a two word structure, which is
6683 represented internally as a DImode int. */
6692 else if (GET_CODE (base) == SUBREG
6693 && REG_P (SUBREG_REG (base))
6694 && GET_MODE_SIZE (GET_MODE (SUBREG_REG (base)))
6696 reg = SUBREG_REG (base);
6699 reason = "base is not a register";
6703 if (GET_MODE (base) != Pmode)
6705 reason = "base is not in Pmode";
6709 if ((strict && ! REG_OK_FOR_BASE_STRICT_P (reg))
6710 || (! strict && ! REG_OK_FOR_BASE_NONSTRICT_P (reg)))
6712 reason = "base is not valid";
6717 /* Validate index register.
6719 Don't allow SUBREG's that span more than a word here -- same as above. */
6728 else if (GET_CODE (index) == SUBREG
6729 && REG_P (SUBREG_REG (index))
6730 && GET_MODE_SIZE (GET_MODE (SUBREG_REG (index)))
6732 reg = SUBREG_REG (index);
6735 reason = "index is not a register";
6739 if (GET_MODE (index) != Pmode)
6741 reason = "index is not in Pmode";
6745 if ((strict && ! REG_OK_FOR_INDEX_STRICT_P (reg))
6746 || (! strict && ! REG_OK_FOR_INDEX_NONSTRICT_P (reg)))
6748 reason = "index is not valid";
6753 /* Validate scale factor. */
6756 reason_rtx = GEN_INT (scale);
6759 reason = "scale without index";
6763 if (scale != 2 && scale != 4 && scale != 8)
6765 reason = "scale is not a valid multiplier";
6770 /* Validate displacement. */
6775 if (GET_CODE (disp) == CONST
6776 && GET_CODE (XEXP (disp, 0)) == UNSPEC)
6777 switch (XINT (XEXP (disp, 0), 1))
6779 /* Refuse GOTOFF and GOT in 64bit mode since it is always 64bit when
6780 used. While ABI specify also 32bit relocations, we don't produce
6781 them at all and use IP relative instead. */
6784 gcc_assert (flag_pic);
6786 goto is_legitimate_pic;
6787 reason = "64bit address unspec";
6790 case UNSPEC_GOTPCREL:
6791 gcc_assert (flag_pic);
6792 goto is_legitimate_pic;
6794 case UNSPEC_GOTTPOFF:
6795 case UNSPEC_GOTNTPOFF:
6796 case UNSPEC_INDNTPOFF:
6802 reason = "invalid address unspec";
6806 else if (SYMBOLIC_CONST (disp)
6810 && MACHOPIC_INDIRECT
6811 && !machopic_operand_p (disp)
6817 if (TARGET_64BIT && (index || base))
6819 /* foo@dtpoff(%rX) is ok. */
6820 if (GET_CODE (disp) != CONST
6821 || GET_CODE (XEXP (disp, 0)) != PLUS
6822 || GET_CODE (XEXP (XEXP (disp, 0), 0)) != UNSPEC
6823 || GET_CODE (XEXP (XEXP (disp, 0), 1)) != CONST_INT
6824 || (XINT (XEXP (XEXP (disp, 0), 0), 1) != UNSPEC_DTPOFF
6825 && XINT (XEXP (XEXP (disp, 0), 0), 1) != UNSPEC_NTPOFF))
6827 reason = "non-constant pic memory reference";
6831 else if (! legitimate_pic_address_disp_p (disp))
6833 reason = "displacement is an invalid pic construct";
6837 /* This code used to verify that a symbolic pic displacement
6838 includes the pic_offset_table_rtx register.
6840 While this is good idea, unfortunately these constructs may
6841 be created by "adds using lea" optimization for incorrect
6850 This code is nonsensical, but results in addressing
6851 GOT table with pic_offset_table_rtx base. We can't
6852 just refuse it easily, since it gets matched by
6853 "addsi3" pattern, that later gets split to lea in the
6854 case output register differs from input. While this
6855 can be handled by separate addsi pattern for this case
6856 that never results in lea, this seems to be easier and
6857 correct fix for crash to disable this test. */
6859 else if (GET_CODE (disp) != LABEL_REF
6860 && GET_CODE (disp) != CONST_INT
6861 && (GET_CODE (disp) != CONST
6862 || !legitimate_constant_p (disp))
6863 && (GET_CODE (disp) != SYMBOL_REF
6864 || !legitimate_constant_p (disp)))
6866 reason = "displacement is not constant";
6869 else if (TARGET_64BIT
6870 && !x86_64_immediate_operand (disp, VOIDmode))
6872 reason = "displacement is out of range";
6877 /* Everything looks valid. */
6878 if (TARGET_DEBUG_ADDR)
6879 fprintf (stderr, "Success.\n");
6883 if (TARGET_DEBUG_ADDR)
6885 fprintf (stderr, "Error: %s\n", reason);
6886 debug_rtx (reason_rtx);
6891 /* Return a unique alias set for the GOT. */
6893 static HOST_WIDE_INT
6894 ix86_GOT_alias_set (void)
6896 static HOST_WIDE_INT set = -1;
6898 set = new_alias_set ();
6902 /* Return a legitimate reference for ORIG (an address) using the
6903 register REG. If REG is 0, a new pseudo is generated.
6905 There are two types of references that must be handled:
6907 1. Global data references must load the address from the GOT, via
6908 the PIC reg. An insn is emitted to do this load, and the reg is
6911 2. Static data references, constant pool addresses, and code labels
6912 compute the address as an offset from the GOT, whose base is in
6913 the PIC reg. Static data objects have SYMBOL_FLAG_LOCAL set to
6914 differentiate them from global data objects. The returned
6915 address is the PIC reg + an unspec constant.
6917 GO_IF_LEGITIMATE_ADDRESS rejects symbolic references unless the PIC
6918 reg also appears in the address. */
6921 legitimize_pic_address (rtx orig, rtx reg)
6928 if (TARGET_MACHO && !TARGET_64BIT)
6931 reg = gen_reg_rtx (Pmode);
6932 /* Use the generic Mach-O PIC machinery. */
6933 return machopic_legitimize_pic_address (orig, GET_MODE (orig), reg);
6937 if (TARGET_64BIT && legitimate_pic_address_disp_p (addr))
6939 else if (TARGET_64BIT
6940 && ix86_cmodel != CM_SMALL_PIC
6941 && local_symbolic_operand (addr, Pmode))
6944 /* This symbol may be referenced via a displacement from the PIC
6945 base address (@GOTOFF). */
6947 if (reload_in_progress)
6948 regs_ever_live[PIC_OFFSET_TABLE_REGNUM] = 1;
6949 if (GET_CODE (addr) == CONST)
6950 addr = XEXP (addr, 0);
6951 if (GET_CODE (addr) == PLUS)
6953 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, XEXP (addr, 0)), UNSPEC_GOTOFF);
6954 new = gen_rtx_PLUS (Pmode, new, XEXP (addr, 1));
6957 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTOFF);
6958 new = gen_rtx_CONST (Pmode, new);
6960 tmpreg = gen_reg_rtx (Pmode);
6963 emit_move_insn (tmpreg, new);
6967 new = expand_simple_binop (Pmode, PLUS, reg, pic_offset_table_rtx,
6968 tmpreg, 1, OPTAB_DIRECT);
6971 else new = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, tmpreg);
6973 else if (!TARGET_64BIT && local_symbolic_operand (addr, Pmode))
6975 /* This symbol may be referenced via a displacement from the PIC
6976 base address (@GOTOFF). */
6978 if (reload_in_progress)
6979 regs_ever_live[PIC_OFFSET_TABLE_REGNUM] = 1;
6980 if (GET_CODE (addr) == CONST)
6981 addr = XEXP (addr, 0);
6982 if (GET_CODE (addr) == PLUS)
6984 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, XEXP (addr, 0)), UNSPEC_GOTOFF);
6985 new = gen_rtx_PLUS (Pmode, new, XEXP (addr, 1));
6988 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTOFF);
6989 new = gen_rtx_CONST (Pmode, new);
6990 new = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new);
6994 emit_move_insn (reg, new);
6998 else if (GET_CODE (addr) == SYMBOL_REF && SYMBOL_REF_TLS_MODEL (addr) == 0)
7002 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTPCREL);
7003 new = gen_rtx_CONST (Pmode, new);
7004 new = gen_const_mem (Pmode, new);
7005 set_mem_alias_set (new, ix86_GOT_alias_set ());
7008 reg = gen_reg_rtx (Pmode);
7009 /* Use directly gen_movsi, otherwise the address is loaded
7010 into register for CSE. We don't want to CSE this addresses,
7011 instead we CSE addresses from the GOT table, so skip this. */
7012 emit_insn (gen_movsi (reg, new));
7017 /* This symbol must be referenced via a load from the
7018 Global Offset Table (@GOT). */
7020 if (reload_in_progress)
7021 regs_ever_live[PIC_OFFSET_TABLE_REGNUM] = 1;
7022 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOT);
7023 new = gen_rtx_CONST (Pmode, new);
7024 new = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new);
7025 new = gen_const_mem (Pmode, new);
7026 set_mem_alias_set (new, ix86_GOT_alias_set ());
7029 reg = gen_reg_rtx (Pmode);
7030 emit_move_insn (reg, new);
7036 if (GET_CODE (addr) == CONST_INT
7037 && !x86_64_immediate_operand (addr, VOIDmode))
7041 emit_move_insn (reg, addr);
7045 new = force_reg (Pmode, addr);
7047 else if (GET_CODE (addr) == CONST)
7049 addr = XEXP (addr, 0);
7051 /* We must match stuff we generate before. Assume the only
7052 unspecs that can get here are ours. Not that we could do
7053 anything with them anyway.... */
7054 if (GET_CODE (addr) == UNSPEC
7055 || (GET_CODE (addr) == PLUS
7056 && GET_CODE (XEXP (addr, 0)) == UNSPEC))
7058 gcc_assert (GET_CODE (addr) == PLUS);
7060 if (GET_CODE (addr) == PLUS)
7062 rtx op0 = XEXP (addr, 0), op1 = XEXP (addr, 1);
7064 /* Check first to see if this is a constant offset from a @GOTOFF
7065 symbol reference. */
7066 if (local_symbolic_operand (op0, Pmode)
7067 && GET_CODE (op1) == CONST_INT)
7071 if (reload_in_progress)
7072 regs_ever_live[PIC_OFFSET_TABLE_REGNUM] = 1;
7073 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, op0),
7075 new = gen_rtx_PLUS (Pmode, new, op1);
7076 new = gen_rtx_CONST (Pmode, new);
7077 new = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new);
7081 emit_move_insn (reg, new);
7087 if (INTVAL (op1) < -16*1024*1024
7088 || INTVAL (op1) >= 16*1024*1024)
7090 if (!x86_64_immediate_operand (op1, Pmode))
7091 op1 = force_reg (Pmode, op1);
7092 new = gen_rtx_PLUS (Pmode, force_reg (Pmode, op0), op1);
7098 base = legitimize_pic_address (XEXP (addr, 0), reg);
7099 new = legitimize_pic_address (XEXP (addr, 1),
7100 base == reg ? NULL_RTX : reg);
7102 if (GET_CODE (new) == CONST_INT)
7103 new = plus_constant (base, INTVAL (new));
7106 if (GET_CODE (new) == PLUS && CONSTANT_P (XEXP (new, 1)))
7108 base = gen_rtx_PLUS (Pmode, base, XEXP (new, 0));
7109 new = XEXP (new, 1);
7111 new = gen_rtx_PLUS (Pmode, base, new);
7119 /* Load the thread pointer. If TO_REG is true, force it into a register. */
7122 get_thread_pointer (int to_reg)
7126 tp = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const0_rtx), UNSPEC_TP);
7130 reg = gen_reg_rtx (Pmode);
7131 insn = gen_rtx_SET (VOIDmode, reg, tp);
7132 insn = emit_insn (insn);
7137 /* A subroutine of legitimize_address and ix86_expand_move. FOR_MOV is
7138 false if we expect this to be used for a memory address and true if
7139 we expect to load the address into a register. */
7142 legitimize_tls_address (rtx x, enum tls_model model, int for_mov)
7144 rtx dest, base, off, pic, tp;
7149 case TLS_MODEL_GLOBAL_DYNAMIC:
7150 dest = gen_reg_rtx (Pmode);
7151 tp = TARGET_GNU2_TLS ? get_thread_pointer (1) : 0;
7153 if (TARGET_64BIT && ! TARGET_GNU2_TLS)
7155 rtx rax = gen_rtx_REG (Pmode, 0), insns;
7158 emit_call_insn (gen_tls_global_dynamic_64 (rax, x));
7159 insns = get_insns ();
7162 emit_libcall_block (insns, dest, rax, x);
7164 else if (TARGET_64BIT && TARGET_GNU2_TLS)
7165 emit_insn (gen_tls_global_dynamic_64 (dest, x));
7167 emit_insn (gen_tls_global_dynamic_32 (dest, x));
7169 if (TARGET_GNU2_TLS)
7171 dest = force_reg (Pmode, gen_rtx_PLUS (Pmode, tp, dest));
7173 set_unique_reg_note (get_last_insn (), REG_EQUIV, x);
7177 case TLS_MODEL_LOCAL_DYNAMIC:
7178 base = gen_reg_rtx (Pmode);
7179 tp = TARGET_GNU2_TLS ? get_thread_pointer (1) : 0;
7181 if (TARGET_64BIT && ! TARGET_GNU2_TLS)
7183 rtx rax = gen_rtx_REG (Pmode, 0), insns, note;
7186 emit_call_insn (gen_tls_local_dynamic_base_64 (rax));
7187 insns = get_insns ();
7190 note = gen_rtx_EXPR_LIST (VOIDmode, const0_rtx, NULL);
7191 note = gen_rtx_EXPR_LIST (VOIDmode, ix86_tls_get_addr (), note);
7192 emit_libcall_block (insns, base, rax, note);
7194 else if (TARGET_64BIT && TARGET_GNU2_TLS)
7195 emit_insn (gen_tls_local_dynamic_base_64 (base));
7197 emit_insn (gen_tls_local_dynamic_base_32 (base));
7199 if (TARGET_GNU2_TLS)
7201 rtx x = ix86_tls_module_base ();
7203 set_unique_reg_note (get_last_insn (), REG_EQUIV,
7204 gen_rtx_MINUS (Pmode, x, tp));
7207 off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x), UNSPEC_DTPOFF);
7208 off = gen_rtx_CONST (Pmode, off);
7210 dest = force_reg (Pmode, gen_rtx_PLUS (Pmode, base, off));
7212 if (TARGET_GNU2_TLS)
7214 dest = force_reg (Pmode, gen_rtx_PLUS (Pmode, dest, tp));
7216 set_unique_reg_note (get_last_insn (), REG_EQUIV, x);
7221 case TLS_MODEL_INITIAL_EXEC:
7225 type = UNSPEC_GOTNTPOFF;
7229 if (reload_in_progress)
7230 regs_ever_live[PIC_OFFSET_TABLE_REGNUM] = 1;
7231 pic = pic_offset_table_rtx;
7232 type = TARGET_ANY_GNU_TLS ? UNSPEC_GOTNTPOFF : UNSPEC_GOTTPOFF;
7234 else if (!TARGET_ANY_GNU_TLS)
7236 pic = gen_reg_rtx (Pmode);
7237 emit_insn (gen_set_got (pic));
7238 type = UNSPEC_GOTTPOFF;
7243 type = UNSPEC_INDNTPOFF;
7246 off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x), type);
7247 off = gen_rtx_CONST (Pmode, off);
7249 off = gen_rtx_PLUS (Pmode, pic, off);
7250 off = gen_const_mem (Pmode, off);
7251 set_mem_alias_set (off, ix86_GOT_alias_set ());
7253 if (TARGET_64BIT || TARGET_ANY_GNU_TLS)
7255 base = get_thread_pointer (for_mov || !TARGET_TLS_DIRECT_SEG_REFS);
7256 off = force_reg (Pmode, off);
7257 return gen_rtx_PLUS (Pmode, base, off);
7261 base = get_thread_pointer (true);
7262 dest = gen_reg_rtx (Pmode);
7263 emit_insn (gen_subsi3 (dest, base, off));
7267 case TLS_MODEL_LOCAL_EXEC:
7268 off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x),
7269 (TARGET_64BIT || TARGET_ANY_GNU_TLS)
7270 ? UNSPEC_NTPOFF : UNSPEC_TPOFF);
7271 off = gen_rtx_CONST (Pmode, off);
7273 if (TARGET_64BIT || TARGET_ANY_GNU_TLS)
7275 base = get_thread_pointer (for_mov || !TARGET_TLS_DIRECT_SEG_REFS);
7276 return gen_rtx_PLUS (Pmode, base, off);
7280 base = get_thread_pointer (true);
7281 dest = gen_reg_rtx (Pmode);
7282 emit_insn (gen_subsi3 (dest, base, off));
7293 /* Try machine-dependent ways of modifying an illegitimate address
7294 to be legitimate. If we find one, return the new, valid address.
7295 This macro is used in only one place: `memory_address' in explow.c.
7297 OLDX is the address as it was before break_out_memory_refs was called.
7298 In some cases it is useful to look at this to decide what needs to be done.
7300 MODE and WIN are passed so that this macro can use
7301 GO_IF_LEGITIMATE_ADDRESS.
7303 It is always safe for this macro to do nothing. It exists to recognize
7304 opportunities to optimize the output.
7306 For the 80386, we handle X+REG by loading X into a register R and
7307 using R+REG. R will go in a general reg and indexing will be used.
7308 However, if REG is a broken-out memory address or multiplication,
7309 nothing needs to be done because REG can certainly go in a general reg.
7311 When -fpic is used, special handling is needed for symbolic references.
7312 See comments by legitimize_pic_address in i386.c for details. */
7315 legitimize_address (rtx x, rtx oldx ATTRIBUTE_UNUSED, enum machine_mode mode)
7320 if (TARGET_DEBUG_ADDR)
7322 fprintf (stderr, "\n==========\nLEGITIMIZE_ADDRESS, mode = %s\n",
7323 GET_MODE_NAME (mode));
7327 log = GET_CODE (x) == SYMBOL_REF ? SYMBOL_REF_TLS_MODEL (x) : 0;
7329 return legitimize_tls_address (x, log, false);
7330 if (GET_CODE (x) == CONST
7331 && GET_CODE (XEXP (x, 0)) == PLUS
7332 && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF
7333 && (log = SYMBOL_REF_TLS_MODEL (XEXP (XEXP (x, 0), 0))))
7335 rtx t = legitimize_tls_address (XEXP (XEXP (x, 0), 0), log, false);
7336 return gen_rtx_PLUS (Pmode, t, XEXP (XEXP (x, 0), 1));
7339 if (flag_pic && SYMBOLIC_CONST (x))
7340 return legitimize_pic_address (x, 0);
7342 /* Canonicalize shifts by 0, 1, 2, 3 into multiply */
7343 if (GET_CODE (x) == ASHIFT
7344 && GET_CODE (XEXP (x, 1)) == CONST_INT
7345 && (unsigned HOST_WIDE_INT) INTVAL (XEXP (x, 1)) < 4)
7348 log = INTVAL (XEXP (x, 1));
7349 x = gen_rtx_MULT (Pmode, force_reg (Pmode, XEXP (x, 0)),
7350 GEN_INT (1 << log));
7353 if (GET_CODE (x) == PLUS)
7355 /* Canonicalize shifts by 0, 1, 2, 3 into multiply. */
7357 if (GET_CODE (XEXP (x, 0)) == ASHIFT
7358 && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT
7359 && (unsigned HOST_WIDE_INT) INTVAL (XEXP (XEXP (x, 0), 1)) < 4)
7362 log = INTVAL (XEXP (XEXP (x, 0), 1));
7363 XEXP (x, 0) = gen_rtx_MULT (Pmode,
7364 force_reg (Pmode, XEXP (XEXP (x, 0), 0)),
7365 GEN_INT (1 << log));
7368 if (GET_CODE (XEXP (x, 1)) == ASHIFT
7369 && GET_CODE (XEXP (XEXP (x, 1), 1)) == CONST_INT
7370 && (unsigned HOST_WIDE_INT) INTVAL (XEXP (XEXP (x, 1), 1)) < 4)
7373 log = INTVAL (XEXP (XEXP (x, 1), 1));
7374 XEXP (x, 1) = gen_rtx_MULT (Pmode,
7375 force_reg (Pmode, XEXP (XEXP (x, 1), 0)),
7376 GEN_INT (1 << log));
7379 /* Put multiply first if it isn't already. */
7380 if (GET_CODE (XEXP (x, 1)) == MULT)
7382 rtx tmp = XEXP (x, 0);
7383 XEXP (x, 0) = XEXP (x, 1);
7388 /* Canonicalize (plus (mult (reg) (const)) (plus (reg) (const)))
7389 into (plus (plus (mult (reg) (const)) (reg)) (const)). This can be
7390 created by virtual register instantiation, register elimination, and
7391 similar optimizations. */
7392 if (GET_CODE (XEXP (x, 0)) == MULT && GET_CODE (XEXP (x, 1)) == PLUS)
7395 x = gen_rtx_PLUS (Pmode,
7396 gen_rtx_PLUS (Pmode, XEXP (x, 0),
7397 XEXP (XEXP (x, 1), 0)),
7398 XEXP (XEXP (x, 1), 1));
7402 (plus (plus (mult (reg) (const)) (plus (reg) (const))) const)
7403 into (plus (plus (mult (reg) (const)) (reg)) (const)). */
7404 else if (GET_CODE (x) == PLUS && GET_CODE (XEXP (x, 0)) == PLUS
7405 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
7406 && GET_CODE (XEXP (XEXP (x, 0), 1)) == PLUS
7407 && CONSTANT_P (XEXP (x, 1)))
7410 rtx other = NULL_RTX;
7412 if (GET_CODE (XEXP (x, 1)) == CONST_INT)
7414 constant = XEXP (x, 1);
7415 other = XEXP (XEXP (XEXP (x, 0), 1), 1);
7417 else if (GET_CODE (XEXP (XEXP (XEXP (x, 0), 1), 1)) == CONST_INT)
7419 constant = XEXP (XEXP (XEXP (x, 0), 1), 1);
7420 other = XEXP (x, 1);
7428 x = gen_rtx_PLUS (Pmode,
7429 gen_rtx_PLUS (Pmode, XEXP (XEXP (x, 0), 0),
7430 XEXP (XEXP (XEXP (x, 0), 1), 0)),
7431 plus_constant (other, INTVAL (constant)));
7435 if (changed && legitimate_address_p (mode, x, FALSE))
7438 if (GET_CODE (XEXP (x, 0)) == MULT)
7441 XEXP (x, 0) = force_operand (XEXP (x, 0), 0);
7444 if (GET_CODE (XEXP (x, 1)) == MULT)
7447 XEXP (x, 1) = force_operand (XEXP (x, 1), 0);
7451 && GET_CODE (XEXP (x, 1)) == REG
7452 && GET_CODE (XEXP (x, 0)) == REG)
7455 if (flag_pic && SYMBOLIC_CONST (XEXP (x, 1)))
7458 x = legitimize_pic_address (x, 0);
7461 if (changed && legitimate_address_p (mode, x, FALSE))
7464 if (GET_CODE (XEXP (x, 0)) == REG)
7466 rtx temp = gen_reg_rtx (Pmode);
7467 rtx val = force_operand (XEXP (x, 1), temp);
7469 emit_move_insn (temp, val);
7475 else if (GET_CODE (XEXP (x, 1)) == REG)
7477 rtx temp = gen_reg_rtx (Pmode);
7478 rtx val = force_operand (XEXP (x, 0), temp);
7480 emit_move_insn (temp, val);
7490 /* Print an integer constant expression in assembler syntax. Addition
7491 and subtraction are the only arithmetic that may appear in these
7492 expressions. FILE is the stdio stream to write to, X is the rtx, and
7493 CODE is the operand print code from the output string. */
7496 output_pic_addr_const (FILE *file, rtx x, int code)
7500 switch (GET_CODE (x))
7503 gcc_assert (flag_pic);
7508 if (! TARGET_MACHO || TARGET_64BIT)
7509 output_addr_const (file, x);
7512 const char *name = XSTR (x, 0);
7514 /* Mark the decl as referenced so that cgraph will output the function. */
7515 if (SYMBOL_REF_DECL (x))
7516 mark_decl_referenced (SYMBOL_REF_DECL (x));
7519 if (MACHOPIC_INDIRECT
7520 && machopic_classify_symbol (x) == MACHOPIC_UNDEFINED_FUNCTION)
7521 name = machopic_indirection_name (x, /*stub_p=*/true);
7523 assemble_name (file, name);
7525 if (!TARGET_MACHO && code == 'P' && ! SYMBOL_REF_LOCAL_P (x))
7526 fputs ("@PLT", file);
7533 ASM_GENERATE_INTERNAL_LABEL (buf, "L", CODE_LABEL_NUMBER (x));
7534 assemble_name (asm_out_file, buf);
7538 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
7542 /* This used to output parentheses around the expression,
7543 but that does not work on the 386 (either ATT or BSD assembler). */
7544 output_pic_addr_const (file, XEXP (x, 0), code);
7548 if (GET_MODE (x) == VOIDmode)
7550 /* We can use %d if the number is <32 bits and positive. */
7551 if (CONST_DOUBLE_HIGH (x) || CONST_DOUBLE_LOW (x) < 0)
7552 fprintf (file, "0x%lx%08lx",
7553 (unsigned long) CONST_DOUBLE_HIGH (x),
7554 (unsigned long) CONST_DOUBLE_LOW (x));
7556 fprintf (file, HOST_WIDE_INT_PRINT_DEC, CONST_DOUBLE_LOW (x));
7559 /* We can't handle floating point constants;
7560 PRINT_OPERAND must handle them. */
7561 output_operand_lossage ("floating constant misused");
7565 /* Some assemblers need integer constants to appear first. */
7566 if (GET_CODE (XEXP (x, 0)) == CONST_INT)
7568 output_pic_addr_const (file, XEXP (x, 0), code);
7570 output_pic_addr_const (file, XEXP (x, 1), code);
7574 gcc_assert (GET_CODE (XEXP (x, 1)) == CONST_INT);
7575 output_pic_addr_const (file, XEXP (x, 1), code);
7577 output_pic_addr_const (file, XEXP (x, 0), code);
7583 putc (ASSEMBLER_DIALECT == ASM_INTEL ? '(' : '[', file);
7584 output_pic_addr_const (file, XEXP (x, 0), code);
7586 output_pic_addr_const (file, XEXP (x, 1), code);
7588 putc (ASSEMBLER_DIALECT == ASM_INTEL ? ')' : ']', file);
7592 gcc_assert (XVECLEN (x, 0) == 1);
7593 output_pic_addr_const (file, XVECEXP (x, 0, 0), code);
7594 switch (XINT (x, 1))
7597 fputs ("@GOT", file);
7600 fputs ("@GOTOFF", file);
7602 case UNSPEC_GOTPCREL:
7603 fputs ("@GOTPCREL(%rip)", file);
7605 case UNSPEC_GOTTPOFF:
7606 /* FIXME: This might be @TPOFF in Sun ld too. */
7607 fputs ("@GOTTPOFF", file);
7610 fputs ("@TPOFF", file);
7614 fputs ("@TPOFF", file);
7616 fputs ("@NTPOFF", file);
7619 fputs ("@DTPOFF", file);
7621 case UNSPEC_GOTNTPOFF:
7623 fputs ("@GOTTPOFF(%rip)", file);
7625 fputs ("@GOTNTPOFF", file);
7627 case UNSPEC_INDNTPOFF:
7628 fputs ("@INDNTPOFF", file);
7631 output_operand_lossage ("invalid UNSPEC as operand");
7637 output_operand_lossage ("invalid expression as operand");
7641 /* This is called from dwarf2out.c via TARGET_ASM_OUTPUT_DWARF_DTPREL.
7642 We need to emit DTP-relative relocations. */
7645 i386_output_dwarf_dtprel (FILE *file, int size, rtx x)
7647 fputs (ASM_LONG, file);
7648 output_addr_const (file, x);
7649 fputs ("@DTPOFF", file);
7655 fputs (", 0", file);
7662 /* In the name of slightly smaller debug output, and to cater to
7663 general assembler lossage, recognize PIC+GOTOFF and turn it back
7664 into a direct symbol reference.
7666 On Darwin, this is necessary to avoid a crash, because Darwin
7667 has a different PIC label for each routine but the DWARF debugging
7668 information is not associated with any particular routine, so it's
7669 necessary to remove references to the PIC label from RTL stored by
7670 the DWARF output code. */
7673 ix86_delegitimize_address (rtx orig_x)
7676 /* reg_addend is NULL or a multiple of some register. */
7677 rtx reg_addend = NULL_RTX;
7678 /* const_addend is NULL or a const_int. */
7679 rtx const_addend = NULL_RTX;
7680 /* This is the result, or NULL. */
7681 rtx result = NULL_RTX;
7683 if (GET_CODE (x) == MEM)
7688 if (GET_CODE (x) != CONST
7689 || GET_CODE (XEXP (x, 0)) != UNSPEC
7690 || XINT (XEXP (x, 0), 1) != UNSPEC_GOTPCREL
7691 || GET_CODE (orig_x) != MEM)
7693 return XVECEXP (XEXP (x, 0), 0, 0);
7696 if (GET_CODE (x) != PLUS
7697 || GET_CODE (XEXP (x, 1)) != CONST)
7700 if (GET_CODE (XEXP (x, 0)) == REG
7701 && REGNO (XEXP (x, 0)) == PIC_OFFSET_TABLE_REGNUM)
7702 /* %ebx + GOT/GOTOFF */
7704 else if (GET_CODE (XEXP (x, 0)) == PLUS)
7706 /* %ebx + %reg * scale + GOT/GOTOFF */
7707 reg_addend = XEXP (x, 0);
7708 if (GET_CODE (XEXP (reg_addend, 0)) == REG
7709 && REGNO (XEXP (reg_addend, 0)) == PIC_OFFSET_TABLE_REGNUM)
7710 reg_addend = XEXP (reg_addend, 1);
7711 else if (GET_CODE (XEXP (reg_addend, 1)) == REG
7712 && REGNO (XEXP (reg_addend, 1)) == PIC_OFFSET_TABLE_REGNUM)
7713 reg_addend = XEXP (reg_addend, 0);
7716 if (GET_CODE (reg_addend) != REG
7717 && GET_CODE (reg_addend) != MULT
7718 && GET_CODE (reg_addend) != ASHIFT)
7724 x = XEXP (XEXP (x, 1), 0);
7725 if (GET_CODE (x) == PLUS
7726 && GET_CODE (XEXP (x, 1)) == CONST_INT)
7728 const_addend = XEXP (x, 1);
7732 if (GET_CODE (x) == UNSPEC
7733 && ((XINT (x, 1) == UNSPEC_GOT && GET_CODE (orig_x) == MEM)
7734 || (XINT (x, 1) == UNSPEC_GOTOFF && GET_CODE (orig_x) != MEM)))
7735 result = XVECEXP (x, 0, 0);
7737 if (TARGET_MACHO && darwin_local_data_pic (x)
7738 && GET_CODE (orig_x) != MEM)
7739 result = XEXP (x, 0);
7745 result = gen_rtx_PLUS (Pmode, result, const_addend);
7747 result = gen_rtx_PLUS (Pmode, reg_addend, result);
7752 put_condition_code (enum rtx_code code, enum machine_mode mode, int reverse,
7757 if (mode == CCFPmode || mode == CCFPUmode)
7759 enum rtx_code second_code, bypass_code;
7760 ix86_fp_comparison_codes (code, &bypass_code, &code, &second_code);
7761 gcc_assert (bypass_code == UNKNOWN && second_code == UNKNOWN);
7762 code = ix86_fp_compare_code_to_integer (code);
7766 code = reverse_condition (code);
7777 gcc_assert (mode == CCmode || mode == CCNOmode || mode == CCGCmode);
7781 /* ??? Use "nbe" instead of "a" for fcmov lossage on some assemblers.
7782 Those same assemblers have the same but opposite lossage on cmov. */
7783 gcc_assert (mode == CCmode);
7784 suffix = fp ? "nbe" : "a";
7804 gcc_assert (mode == CCmode);
7826 gcc_assert (mode == CCmode);
7827 suffix = fp ? "nb" : "ae";
7830 gcc_assert (mode == CCmode || mode == CCGCmode || mode == CCNOmode);
7834 gcc_assert (mode == CCmode);
7838 suffix = fp ? "u" : "p";
7841 suffix = fp ? "nu" : "np";
7846 fputs (suffix, file);
7849 /* Print the name of register X to FILE based on its machine mode and number.
7850 If CODE is 'w', pretend the mode is HImode.
7851 If CODE is 'b', pretend the mode is QImode.
7852 If CODE is 'k', pretend the mode is SImode.
7853 If CODE is 'q', pretend the mode is DImode.
7854 If CODE is 'h', pretend the reg is the 'high' byte register.
7855 If CODE is 'y', print "st(0)" instead of "st", if the reg is stack op. */
7858 print_reg (rtx x, int code, FILE *file)
7860 gcc_assert (REGNO (x) != ARG_POINTER_REGNUM
7861 && REGNO (x) != FRAME_POINTER_REGNUM
7862 && REGNO (x) != FLAGS_REG
7863 && REGNO (x) != FPSR_REG);
7865 if (ASSEMBLER_DIALECT == ASM_ATT || USER_LABEL_PREFIX[0] == 0)
7868 if (code == 'w' || MMX_REG_P (x))
7870 else if (code == 'b')
7872 else if (code == 'k')
7874 else if (code == 'q')
7876 else if (code == 'y')
7878 else if (code == 'h')
7881 code = GET_MODE_SIZE (GET_MODE (x));
7883 /* Irritatingly, AMD extended registers use different naming convention
7884 from the normal registers. */
7885 if (REX_INT_REG_P (x))
7887 gcc_assert (TARGET_64BIT);
7891 error ("extended registers have no high halves");
7894 fprintf (file, "r%ib", REGNO (x) - FIRST_REX_INT_REG + 8);
7897 fprintf (file, "r%iw", REGNO (x) - FIRST_REX_INT_REG + 8);
7900 fprintf (file, "r%id", REGNO (x) - FIRST_REX_INT_REG + 8);
7903 fprintf (file, "r%i", REGNO (x) - FIRST_REX_INT_REG + 8);
7906 error ("unsupported operand size for extended register");
7914 if (STACK_TOP_P (x))
7916 fputs ("st(0)", file);
7923 if (! ANY_FP_REG_P (x))
7924 putc (code == 8 && TARGET_64BIT ? 'r' : 'e', file);
7929 fputs (hi_reg_name[REGNO (x)], file);
7932 if (REGNO (x) >= ARRAY_SIZE (qi_reg_name))
7934 fputs (qi_reg_name[REGNO (x)], file);
7937 if (REGNO (x) >= ARRAY_SIZE (qi_high_reg_name))
7939 fputs (qi_high_reg_name[REGNO (x)], file);
7946 /* Locate some local-dynamic symbol still in use by this function
7947 so that we can print its name in some tls_local_dynamic_base
7951 get_some_local_dynamic_name (void)
7955 if (cfun->machine->some_ld_name)
7956 return cfun->machine->some_ld_name;
7958 for (insn = get_insns (); insn ; insn = NEXT_INSN (insn))
7960 && for_each_rtx (&PATTERN (insn), get_some_local_dynamic_name_1, 0))
7961 return cfun->machine->some_ld_name;
7967 get_some_local_dynamic_name_1 (rtx *px, void *data ATTRIBUTE_UNUSED)
7971 if (GET_CODE (x) == SYMBOL_REF
7972 && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_DYNAMIC)
7974 cfun->machine->some_ld_name = XSTR (x, 0);
7982 L,W,B,Q,S,T -- print the opcode suffix for specified size of operand.
7983 C -- print opcode suffix for set/cmov insn.
7984 c -- like C, but print reversed condition
7985 F,f -- likewise, but for floating-point.
7986 O -- if HAVE_AS_IX86_CMOV_SUN_SYNTAX, expand to "w.", "l." or "q.",
7988 R -- print the prefix for register names.
7989 z -- print the opcode suffix for the size of the current operand.
7990 * -- print a star (in certain assembler syntax)
7991 A -- print an absolute memory reference.
7992 w -- print the operand as if it's a "word" (HImode) even if it isn't.
7993 s -- print a shift double count, followed by the assemblers argument
7995 b -- print the QImode name of the register for the indicated operand.
7996 %b0 would print %al if operands[0] is reg 0.
7997 w -- likewise, print the HImode name of the register.
7998 k -- likewise, print the SImode name of the register.
7999 q -- likewise, print the DImode name of the register.
8000 h -- print the QImode name for a "high" register, either ah, bh, ch or dh.
8001 y -- print "st(0)" instead of "st" as a register.
8002 D -- print condition for SSE cmp instruction.
8003 P -- if PIC, print an @PLT suffix.
8004 X -- don't print any sort of PIC '@' suffix for a symbol.
8005 & -- print some in-use local-dynamic symbol name.
8006 H -- print a memory address offset by 8; used for sse high-parts
8010 print_operand (FILE *file, rtx x, int code)
8017 if (ASSEMBLER_DIALECT == ASM_ATT)
8022 assemble_name (file, get_some_local_dynamic_name ());
8026 switch (ASSEMBLER_DIALECT)
8033 /* Intel syntax. For absolute addresses, registers should not
8034 be surrounded by braces. */
8035 if (GET_CODE (x) != REG)
8038 PRINT_OPERAND (file, x, 0);
8048 PRINT_OPERAND (file, x, 0);
8053 if (ASSEMBLER_DIALECT == ASM_ATT)
8058 if (ASSEMBLER_DIALECT == ASM_ATT)
8063 if (ASSEMBLER_DIALECT == ASM_ATT)
8068 if (ASSEMBLER_DIALECT == ASM_ATT)
8073 if (ASSEMBLER_DIALECT == ASM_ATT)
8078 if (ASSEMBLER_DIALECT == ASM_ATT)
8083 /* 387 opcodes don't get size suffixes if the operands are
8085 if (STACK_REG_P (x))
8088 /* Likewise if using Intel opcodes. */
8089 if (ASSEMBLER_DIALECT == ASM_INTEL)
8092 /* This is the size of op from size of operand. */
8093 switch (GET_MODE_SIZE (GET_MODE (x)))
8096 #ifdef HAVE_GAS_FILDS_FISTS
8102 if (GET_MODE (x) == SFmode)
8117 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT)
8119 #ifdef GAS_MNEMONICS
8145 if (GET_CODE (x) == CONST_INT || ! SHIFT_DOUBLE_OMITS_COUNT)
8147 PRINT_OPERAND (file, x, 0);
8153 /* Little bit of braindamage here. The SSE compare instructions
8154 does use completely different names for the comparisons that the
8155 fp conditional moves. */
8156 switch (GET_CODE (x))
8171 fputs ("unord", file);
8175 fputs ("neq", file);
8179 fputs ("nlt", file);
8183 fputs ("nle", file);
8186 fputs ("ord", file);
8193 #ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
8194 if (ASSEMBLER_DIALECT == ASM_ATT)
8196 switch (GET_MODE (x))
8198 case HImode: putc ('w', file); break;
8200 case SFmode: putc ('l', file); break;
8202 case DFmode: putc ('q', file); break;
8203 default: gcc_unreachable ();
8210 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 0, 0, file);
8213 #ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
8214 if (ASSEMBLER_DIALECT == ASM_ATT)
8217 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 0, 1, file);
8220 /* Like above, but reverse condition */
8222 /* Check to see if argument to %c is really a constant
8223 and not a condition code which needs to be reversed. */
8224 if (!COMPARISON_P (x))
8226 output_operand_lossage ("operand is neither a constant nor a condition code, invalid operand code 'c'");
8229 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 1, 0, file);
8232 #ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
8233 if (ASSEMBLER_DIALECT == ASM_ATT)
8236 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 1, 1, file);
8240 /* It doesn't actually matter what mode we use here, as we're
8241 only going to use this for printing. */
8242 x = adjust_address_nv (x, DImode, 8);
8249 if (!optimize || optimize_size || !TARGET_BRANCH_PREDICTION_HINTS)
8252 x = find_reg_note (current_output_insn, REG_BR_PROB, 0);
8255 int pred_val = INTVAL (XEXP (x, 0));
8257 if (pred_val < REG_BR_PROB_BASE * 45 / 100
8258 || pred_val > REG_BR_PROB_BASE * 55 / 100)
8260 int taken = pred_val > REG_BR_PROB_BASE / 2;
8261 int cputaken = final_forward_branch_p (current_output_insn) == 0;
8263 /* Emit hints only in the case default branch prediction
8264 heuristics would fail. */
8265 if (taken != cputaken)
8267 /* We use 3e (DS) prefix for taken branches and
8268 2e (CS) prefix for not taken branches. */
8270 fputs ("ds ; ", file);
8272 fputs ("cs ; ", file);
8279 output_operand_lossage ("invalid operand code '%c'", code);
8283 if (GET_CODE (x) == REG)
8284 print_reg (x, code, file);
8286 else if (GET_CODE (x) == MEM)
8288 /* No `byte ptr' prefix for call instructions. */
8289 if (ASSEMBLER_DIALECT == ASM_INTEL && code != 'X' && code != 'P')
8292 switch (GET_MODE_SIZE (GET_MODE (x)))
8294 case 1: size = "BYTE"; break;
8295 case 2: size = "WORD"; break;
8296 case 4: size = "DWORD"; break;
8297 case 8: size = "QWORD"; break;
8298 case 12: size = "XWORD"; break;
8299 case 16: size = "XMMWORD"; break;
8304 /* Check for explicit size override (codes 'b', 'w' and 'k') */
8307 else if (code == 'w')
8309 else if (code == 'k')
8313 fputs (" PTR ", file);
8317 /* Avoid (%rip) for call operands. */
8318 if (CONSTANT_ADDRESS_P (x) && code == 'P'
8319 && GET_CODE (x) != CONST_INT)
8320 output_addr_const (file, x);
8321 else if (this_is_asm_operands && ! address_operand (x, VOIDmode))
8322 output_operand_lossage ("invalid constraints for operand");
8327 else if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) == SFmode)
8332 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
8333 REAL_VALUE_TO_TARGET_SINGLE (r, l);
8335 if (ASSEMBLER_DIALECT == ASM_ATT)
8337 fprintf (file, "0x%08lx", l);
8340 /* These float cases don't actually occur as immediate operands. */
8341 else if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) == DFmode)
8345 real_to_decimal (dstr, CONST_DOUBLE_REAL_VALUE (x), sizeof (dstr), 0, 1);
8346 fprintf (file, "%s", dstr);
8349 else if (GET_CODE (x) == CONST_DOUBLE
8350 && GET_MODE (x) == XFmode)
8354 real_to_decimal (dstr, CONST_DOUBLE_REAL_VALUE (x), sizeof (dstr), 0, 1);
8355 fprintf (file, "%s", dstr);
8360 /* We have patterns that allow zero sets of memory, for instance.
8361 In 64-bit mode, we should probably support all 8-byte vectors,
8362 since we can in fact encode that into an immediate. */
8363 if (GET_CODE (x) == CONST_VECTOR)
8365 gcc_assert (x == CONST0_RTX (GET_MODE (x)));
8371 if (GET_CODE (x) == CONST_INT || GET_CODE (x) == CONST_DOUBLE)
8373 if (ASSEMBLER_DIALECT == ASM_ATT)
8376 else if (GET_CODE (x) == CONST || GET_CODE (x) == SYMBOL_REF
8377 || GET_CODE (x) == LABEL_REF)
8379 if (ASSEMBLER_DIALECT == ASM_ATT)
8382 fputs ("OFFSET FLAT:", file);
8385 if (GET_CODE (x) == CONST_INT)
8386 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
8388 output_pic_addr_const (file, x, code);
8390 output_addr_const (file, x);
8394 /* Print a memory operand whose address is ADDR. */
8397 print_operand_address (FILE *file, rtx addr)
8399 struct ix86_address parts;
8400 rtx base, index, disp;
8402 int ok = ix86_decompose_address (addr, &parts);
8407 index = parts.index;
8409 scale = parts.scale;
8417 if (USER_LABEL_PREFIX[0] == 0)
8419 fputs ((parts.seg == SEG_FS ? "fs:" : "gs:"), file);
8425 if (!base && !index)
8427 /* Displacement only requires special attention. */
8429 if (GET_CODE (disp) == CONST_INT)
8431 if (ASSEMBLER_DIALECT == ASM_INTEL && parts.seg == SEG_DEFAULT)
8433 if (USER_LABEL_PREFIX[0] == 0)
8435 fputs ("ds:", file);
8437 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (disp));
8440 output_pic_addr_const (file, disp, 0);
8442 output_addr_const (file, disp);
8444 /* Use one byte shorter RIP relative addressing for 64bit mode. */
8447 if (GET_CODE (disp) == CONST
8448 && GET_CODE (XEXP (disp, 0)) == PLUS
8449 && GET_CODE (XEXP (XEXP (disp, 0), 1)) == CONST_INT)
8450 disp = XEXP (XEXP (disp, 0), 0);
8451 if (GET_CODE (disp) == LABEL_REF
8452 || (GET_CODE (disp) == SYMBOL_REF
8453 && SYMBOL_REF_TLS_MODEL (disp) == 0))
8454 fputs ("(%rip)", file);
8459 if (ASSEMBLER_DIALECT == ASM_ATT)
8464 output_pic_addr_const (file, disp, 0);
8465 else if (GET_CODE (disp) == LABEL_REF)
8466 output_asm_label (disp);
8468 output_addr_const (file, disp);
8473 print_reg (base, 0, file);
8477 print_reg (index, 0, file);
8479 fprintf (file, ",%d", scale);
8485 rtx offset = NULL_RTX;
8489 /* Pull out the offset of a symbol; print any symbol itself. */
8490 if (GET_CODE (disp) == CONST
8491 && GET_CODE (XEXP (disp, 0)) == PLUS
8492 && GET_CODE (XEXP (XEXP (disp, 0), 1)) == CONST_INT)
8494 offset = XEXP (XEXP (disp, 0), 1);
8495 disp = gen_rtx_CONST (VOIDmode,
8496 XEXP (XEXP (disp, 0), 0));
8500 output_pic_addr_const (file, disp, 0);
8501 else if (GET_CODE (disp) == LABEL_REF)
8502 output_asm_label (disp);
8503 else if (GET_CODE (disp) == CONST_INT)
8506 output_addr_const (file, disp);
8512 print_reg (base, 0, file);
8515 if (INTVAL (offset) >= 0)
8517 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (offset));
8521 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (offset));
8528 print_reg (index, 0, file);
8530 fprintf (file, "*%d", scale);
8538 output_addr_const_extra (FILE *file, rtx x)
8542 if (GET_CODE (x) != UNSPEC)
8545 op = XVECEXP (x, 0, 0);
8546 switch (XINT (x, 1))
8548 case UNSPEC_GOTTPOFF:
8549 output_addr_const (file, op);
8550 /* FIXME: This might be @TPOFF in Sun ld. */
8551 fputs ("@GOTTPOFF", file);
8554 output_addr_const (file, op);
8555 fputs ("@TPOFF", file);
8558 output_addr_const (file, op);
8560 fputs ("@TPOFF", file);
8562 fputs ("@NTPOFF", file);
8565 output_addr_const (file, op);
8566 fputs ("@DTPOFF", file);
8568 case UNSPEC_GOTNTPOFF:
8569 output_addr_const (file, op);
8571 fputs ("@GOTTPOFF(%rip)", file);
8573 fputs ("@GOTNTPOFF", file);
8575 case UNSPEC_INDNTPOFF:
8576 output_addr_const (file, op);
8577 fputs ("@INDNTPOFF", file);
8587 /* Split one or more DImode RTL references into pairs of SImode
8588 references. The RTL can be REG, offsettable MEM, integer constant, or
8589 CONST_DOUBLE. "operands" is a pointer to an array of DImode RTL to
8590 split and "num" is its length. lo_half and hi_half are output arrays
8591 that parallel "operands". */
8594 split_di (rtx operands[], int num, rtx lo_half[], rtx hi_half[])
8598 rtx op = operands[num];
8600 /* simplify_subreg refuse to split volatile memory addresses,
8601 but we still have to handle it. */
8602 if (GET_CODE (op) == MEM)
8604 lo_half[num] = adjust_address (op, SImode, 0);
8605 hi_half[num] = adjust_address (op, SImode, 4);
8609 lo_half[num] = simplify_gen_subreg (SImode, op,
8610 GET_MODE (op) == VOIDmode
8611 ? DImode : GET_MODE (op), 0);
8612 hi_half[num] = simplify_gen_subreg (SImode, op,
8613 GET_MODE (op) == VOIDmode
8614 ? DImode : GET_MODE (op), 4);
8618 /* Split one or more TImode RTL references into pairs of DImode
8619 references. The RTL can be REG, offsettable MEM, integer constant, or
8620 CONST_DOUBLE. "operands" is a pointer to an array of DImode RTL to
8621 split and "num" is its length. lo_half and hi_half are output arrays
8622 that parallel "operands". */
8625 split_ti (rtx operands[], int num, rtx lo_half[], rtx hi_half[])
8629 rtx op = operands[num];
8631 /* simplify_subreg refuse to split volatile memory addresses, but we
8632 still have to handle it. */
8633 if (GET_CODE (op) == MEM)
8635 lo_half[num] = adjust_address (op, DImode, 0);
8636 hi_half[num] = adjust_address (op, DImode, 8);
8640 lo_half[num] = simplify_gen_subreg (DImode, op, TImode, 0);
8641 hi_half[num] = simplify_gen_subreg (DImode, op, TImode, 8);
8646 /* Output code to perform a 387 binary operation in INSN, one of PLUS,
8647 MINUS, MULT or DIV. OPERANDS are the insn operands, where operands[3]
8648 is the expression of the binary operation. The output may either be
8649 emitted here, or returned to the caller, like all output_* functions.
8651 There is no guarantee that the operands are the same mode, as they
8652 might be within FLOAT or FLOAT_EXTEND expressions. */
8654 #ifndef SYSV386_COMPAT
8655 /* Set to 1 for compatibility with brain-damaged assemblers. No-one
8656 wants to fix the assemblers because that causes incompatibility
8657 with gcc. No-one wants to fix gcc because that causes
8658 incompatibility with assemblers... You can use the option of
8659 -DSYSV386_COMPAT=0 if you recompile both gcc and gas this way. */
8660 #define SYSV386_COMPAT 1
8664 output_387_binary_op (rtx insn, rtx *operands)
8666 static char buf[30];
8669 int is_sse = SSE_REG_P (operands[0]) || SSE_REG_P (operands[1]) || SSE_REG_P (operands[2]);
8671 #ifdef ENABLE_CHECKING
8672 /* Even if we do not want to check the inputs, this documents input
8673 constraints. Which helps in understanding the following code. */
8674 if (STACK_REG_P (operands[0])
8675 && ((REG_P (operands[1])
8676 && REGNO (operands[0]) == REGNO (operands[1])
8677 && (STACK_REG_P (operands[2]) || GET_CODE (operands[2]) == MEM))
8678 || (REG_P (operands[2])
8679 && REGNO (operands[0]) == REGNO (operands[2])
8680 && (STACK_REG_P (operands[1]) || GET_CODE (operands[1]) == MEM)))
8681 && (STACK_TOP_P (operands[1]) || STACK_TOP_P (operands[2])))
8684 gcc_assert (is_sse);
8687 switch (GET_CODE (operands[3]))
8690 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
8691 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
8699 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
8700 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
8708 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
8709 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
8717 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
8718 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
8732 if (GET_MODE (operands[0]) == SFmode)
8733 strcat (buf, "ss\t{%2, %0|%0, %2}");
8735 strcat (buf, "sd\t{%2, %0|%0, %2}");
8740 switch (GET_CODE (operands[3]))
8744 if (REG_P (operands[2]) && REGNO (operands[0]) == REGNO (operands[2]))
8746 rtx temp = operands[2];
8747 operands[2] = operands[1];
8751 /* know operands[0] == operands[1]. */
8753 if (GET_CODE (operands[2]) == MEM)
8759 if (find_regno_note (insn, REG_DEAD, REGNO (operands[2])))
8761 if (STACK_TOP_P (operands[0]))
8762 /* How is it that we are storing to a dead operand[2]?
8763 Well, presumably operands[1] is dead too. We can't
8764 store the result to st(0) as st(0) gets popped on this
8765 instruction. Instead store to operands[2] (which I
8766 think has to be st(1)). st(1) will be popped later.
8767 gcc <= 2.8.1 didn't have this check and generated
8768 assembly code that the Unixware assembler rejected. */
8769 p = "p\t{%0, %2|%2, %0}"; /* st(1) = st(0) op st(1); pop */
8771 p = "p\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0); pop */
8775 if (STACK_TOP_P (operands[0]))
8776 p = "\t{%y2, %0|%0, %y2}"; /* st(0) = st(0) op st(r2) */
8778 p = "\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0) */
8783 if (GET_CODE (operands[1]) == MEM)
8789 if (GET_CODE (operands[2]) == MEM)
8795 if (find_regno_note (insn, REG_DEAD, REGNO (operands[2])))
8798 /* The SystemV/386 SVR3.2 assembler, and probably all AT&T
8799 derived assemblers, confusingly reverse the direction of
8800 the operation for fsub{r} and fdiv{r} when the
8801 destination register is not st(0). The Intel assembler
8802 doesn't have this brain damage. Read !SYSV386_COMPAT to
8803 figure out what the hardware really does. */
8804 if (STACK_TOP_P (operands[0]))
8805 p = "{p\t%0, %2|rp\t%2, %0}";
8807 p = "{rp\t%2, %0|p\t%0, %2}";
8809 if (STACK_TOP_P (operands[0]))
8810 /* As above for fmul/fadd, we can't store to st(0). */
8811 p = "rp\t{%0, %2|%2, %0}"; /* st(1) = st(0) op st(1); pop */
8813 p = "p\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0); pop */
8818 if (find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
8821 if (STACK_TOP_P (operands[0]))
8822 p = "{rp\t%0, %1|p\t%1, %0}";
8824 p = "{p\t%1, %0|rp\t%0, %1}";
8826 if (STACK_TOP_P (operands[0]))
8827 p = "p\t{%0, %1|%1, %0}"; /* st(1) = st(1) op st(0); pop */
8829 p = "rp\t{%1, %0|%0, %1}"; /* st(r2) = st(0) op st(r2); pop */
8834 if (STACK_TOP_P (operands[0]))
8836 if (STACK_TOP_P (operands[1]))
8837 p = "\t{%y2, %0|%0, %y2}"; /* st(0) = st(0) op st(r2) */
8839 p = "r\t{%y1, %0|%0, %y1}"; /* st(0) = st(r1) op st(0) */
8842 else if (STACK_TOP_P (operands[1]))
8845 p = "{\t%1, %0|r\t%0, %1}";
8847 p = "r\t{%1, %0|%0, %1}"; /* st(r2) = st(0) op st(r2) */
8853 p = "{r\t%2, %0|\t%0, %2}";
8855 p = "\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0) */
8868 /* Return needed mode for entity in optimize_mode_switching pass. */
8871 ix86_mode_needed (int entity, rtx insn)
8873 enum attr_i387_cw mode;
8875 /* The mode UNINITIALIZED is used to store control word after a
8876 function call or ASM pattern. The mode ANY specify that function
8877 has no requirements on the control word and make no changes in the
8878 bits we are interested in. */
8881 || (NONJUMP_INSN_P (insn)
8882 && (asm_noperands (PATTERN (insn)) >= 0
8883 || GET_CODE (PATTERN (insn)) == ASM_INPUT)))
8884 return I387_CW_UNINITIALIZED;
8886 if (recog_memoized (insn) < 0)
8889 mode = get_attr_i387_cw (insn);
8894 if (mode == I387_CW_TRUNC)
8899 if (mode == I387_CW_FLOOR)
8904 if (mode == I387_CW_CEIL)
8909 if (mode == I387_CW_MASK_PM)
8920 /* Output code to initialize control word copies used by trunc?f?i and
8921 rounding patterns. CURRENT_MODE is set to current control word,
8922 while NEW_MODE is set to new control word. */
8925 emit_i387_cw_initialization (int mode)
8927 rtx stored_mode = assign_386_stack_local (HImode, SLOT_CW_STORED);
8932 rtx reg = gen_reg_rtx (HImode);
8934 emit_insn (gen_x86_fnstcw_1 (stored_mode));
8935 emit_move_insn (reg, stored_mode);
8937 if (TARGET_64BIT || TARGET_PARTIAL_REG_STALL || optimize_size)
8942 /* round toward zero (truncate) */
8943 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0c00)));
8944 slot = SLOT_CW_TRUNC;
8948 /* round down toward -oo */
8949 emit_insn (gen_andhi3 (reg, reg, GEN_INT (~0x0c00)));
8950 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0400)));
8951 slot = SLOT_CW_FLOOR;
8955 /* round up toward +oo */
8956 emit_insn (gen_andhi3 (reg, reg, GEN_INT (~0x0c00)));
8957 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0800)));
8958 slot = SLOT_CW_CEIL;
8961 case I387_CW_MASK_PM:
8962 /* mask precision exception for nearbyint() */
8963 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0020)));
8964 slot = SLOT_CW_MASK_PM;
8976 /* round toward zero (truncate) */
8977 emit_insn (gen_movsi_insv_1 (reg, GEN_INT (0xc)));
8978 slot = SLOT_CW_TRUNC;
8982 /* round down toward -oo */
8983 emit_insn (gen_movsi_insv_1 (reg, GEN_INT (0x4)));
8984 slot = SLOT_CW_FLOOR;
8988 /* round up toward +oo */
8989 emit_insn (gen_movsi_insv_1 (reg, GEN_INT (0x8)));
8990 slot = SLOT_CW_CEIL;
8993 case I387_CW_MASK_PM:
8994 /* mask precision exception for nearbyint() */
8995 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0020)));
8996 slot = SLOT_CW_MASK_PM;
9004 gcc_assert (slot < MAX_386_STACK_LOCALS);
9006 new_mode = assign_386_stack_local (HImode, slot);
9007 emit_move_insn (new_mode, reg);
9010 /* Output code for INSN to convert a float to a signed int. OPERANDS
9011 are the insn operands. The output may be [HSD]Imode and the input
9012 operand may be [SDX]Fmode. */
9015 output_fix_trunc (rtx insn, rtx *operands, int fisttp)
9017 int stack_top_dies = find_regno_note (insn, REG_DEAD, FIRST_STACK_REG) != 0;
9018 int dimode_p = GET_MODE (operands[0]) == DImode;
9019 int round_mode = get_attr_i387_cw (insn);
9021 /* Jump through a hoop or two for DImode, since the hardware has no
9022 non-popping instruction. We used to do this a different way, but
9023 that was somewhat fragile and broke with post-reload splitters. */
9024 if ((dimode_p || fisttp) && !stack_top_dies)
9025 output_asm_insn ("fld\t%y1", operands);
9027 gcc_assert (STACK_TOP_P (operands[1]));
9028 gcc_assert (GET_CODE (operands[0]) == MEM);
9031 output_asm_insn ("fisttp%z0\t%0", operands);
9034 if (round_mode != I387_CW_ANY)
9035 output_asm_insn ("fldcw\t%3", operands);
9036 if (stack_top_dies || dimode_p)
9037 output_asm_insn ("fistp%z0\t%0", operands);
9039 output_asm_insn ("fist%z0\t%0", operands);
9040 if (round_mode != I387_CW_ANY)
9041 output_asm_insn ("fldcw\t%2", operands);
9047 /* Output code for x87 ffreep insn. The OPNO argument, which may only
9048 have the values zero or one, indicates the ffreep insn's operand
9049 from the OPERANDS array. */
9052 output_387_ffreep (rtx *operands ATTRIBUTE_UNUSED, int opno)
9054 if (TARGET_USE_FFREEP)
9055 #if HAVE_AS_IX86_FFREEP
9056 return opno ? "ffreep\t%y1" : "ffreep\t%y0";
9058 switch (REGNO (operands[opno]))
9060 case FIRST_STACK_REG + 0: return ".word\t0xc0df";
9061 case FIRST_STACK_REG + 1: return ".word\t0xc1df";
9062 case FIRST_STACK_REG + 2: return ".word\t0xc2df";
9063 case FIRST_STACK_REG + 3: return ".word\t0xc3df";
9064 case FIRST_STACK_REG + 4: return ".word\t0xc4df";
9065 case FIRST_STACK_REG + 5: return ".word\t0xc5df";
9066 case FIRST_STACK_REG + 6: return ".word\t0xc6df";
9067 case FIRST_STACK_REG + 7: return ".word\t0xc7df";
9071 return opno ? "fstp\t%y1" : "fstp\t%y0";
9075 /* Output code for INSN to compare OPERANDS. EFLAGS_P is 1 when fcomi
9076 should be used. UNORDERED_P is true when fucom should be used. */
9079 output_fp_compare (rtx insn, rtx *operands, int eflags_p, int unordered_p)
9082 rtx cmp_op0, cmp_op1;
9083 int is_sse = SSE_REG_P (operands[0]) || SSE_REG_P (operands[1]);
9087 cmp_op0 = operands[0];
9088 cmp_op1 = operands[1];
9092 cmp_op0 = operands[1];
9093 cmp_op1 = operands[2];
9098 if (GET_MODE (operands[0]) == SFmode)
9100 return "ucomiss\t{%1, %0|%0, %1}";
9102 return "comiss\t{%1, %0|%0, %1}";
9105 return "ucomisd\t{%1, %0|%0, %1}";
9107 return "comisd\t{%1, %0|%0, %1}";
9110 gcc_assert (STACK_TOP_P (cmp_op0));
9112 stack_top_dies = find_regno_note (insn, REG_DEAD, FIRST_STACK_REG) != 0;
9114 if (cmp_op1 == CONST0_RTX (GET_MODE (cmp_op1)))
9118 output_asm_insn ("ftst\n\tfnstsw\t%0", operands);
9119 return output_387_ffreep (operands, 1);
9122 return "ftst\n\tfnstsw\t%0";
9125 if (STACK_REG_P (cmp_op1)
9127 && find_regno_note (insn, REG_DEAD, REGNO (cmp_op1))
9128 && REGNO (cmp_op1) != FIRST_STACK_REG)
9130 /* If both the top of the 387 stack dies, and the other operand
9131 is also a stack register that dies, then this must be a
9132 `fcompp' float compare */
9136 /* There is no double popping fcomi variant. Fortunately,
9137 eflags is immune from the fstp's cc clobbering. */
9139 output_asm_insn ("fucomip\t{%y1, %0|%0, %y1}", operands);
9141 output_asm_insn ("fcomip\t{%y1, %0|%0, %y1}", operands);
9142 return output_387_ffreep (operands, 0);
9147 return "fucompp\n\tfnstsw\t%0";
9149 return "fcompp\n\tfnstsw\t%0";
9154 /* Encoded here as eflags_p | intmode | unordered_p | stack_top_dies. */
9156 static const char * const alt[16] =
9158 "fcom%z2\t%y2\n\tfnstsw\t%0",
9159 "fcomp%z2\t%y2\n\tfnstsw\t%0",
9160 "fucom%z2\t%y2\n\tfnstsw\t%0",
9161 "fucomp%z2\t%y2\n\tfnstsw\t%0",
9163 "ficom%z2\t%y2\n\tfnstsw\t%0",
9164 "ficomp%z2\t%y2\n\tfnstsw\t%0",
9168 "fcomi\t{%y1, %0|%0, %y1}",
9169 "fcomip\t{%y1, %0|%0, %y1}",
9170 "fucomi\t{%y1, %0|%0, %y1}",
9171 "fucomip\t{%y1, %0|%0, %y1}",
9182 mask = eflags_p << 3;
9183 mask |= (GET_MODE_CLASS (GET_MODE (cmp_op1)) == MODE_INT) << 2;
9184 mask |= unordered_p << 1;
9185 mask |= stack_top_dies;
9187 gcc_assert (mask < 16);
9196 ix86_output_addr_vec_elt (FILE *file, int value)
9198 const char *directive = ASM_LONG;
9202 directive = ASM_QUAD;
9204 gcc_assert (!TARGET_64BIT);
9207 fprintf (file, "%s%s%d\n", directive, LPREFIX, value);
9211 ix86_output_addr_diff_elt (FILE *file, int value, int rel)
9214 fprintf (file, "%s%s%d-%s%d\n",
9215 ASM_LONG, LPREFIX, value, LPREFIX, rel);
9216 else if (HAVE_AS_GOTOFF_IN_DATA)
9217 fprintf (file, "%s%s%d@GOTOFF\n", ASM_LONG, LPREFIX, value);
9219 else if (TARGET_MACHO)
9221 fprintf (file, "%s%s%d-", ASM_LONG, LPREFIX, value);
9222 machopic_output_function_base_name (file);
9223 fprintf(file, "\n");
9227 asm_fprintf (file, "%s%U%s+[.-%s%d]\n",
9228 ASM_LONG, GOT_SYMBOL_NAME, LPREFIX, value);
9231 /* Generate either "mov $0, reg" or "xor reg, reg", as appropriate
9235 ix86_expand_clear (rtx dest)
9239 /* We play register width games, which are only valid after reload. */
9240 gcc_assert (reload_completed);
9242 /* Avoid HImode and its attendant prefix byte. */
9243 if (GET_MODE_SIZE (GET_MODE (dest)) < 4)
9244 dest = gen_rtx_REG (SImode, REGNO (dest));
9246 tmp = gen_rtx_SET (VOIDmode, dest, const0_rtx);
9248 /* This predicate should match that for movsi_xor and movdi_xor_rex64. */
9249 if (reload_completed && (!TARGET_USE_MOV0 || optimize_size))
9251 rtx clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, 17));
9252 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, tmp, clob));
9258 /* X is an unchanging MEM. If it is a constant pool reference, return
9259 the constant pool rtx, else NULL. */
9262 maybe_get_pool_constant (rtx x)
9264 x = ix86_delegitimize_address (XEXP (x, 0));
9266 if (GET_CODE (x) == SYMBOL_REF && CONSTANT_POOL_ADDRESS_P (x))
9267 return get_pool_constant (x);
9273 ix86_expand_move (enum machine_mode mode, rtx operands[])
9275 int strict = (reload_in_progress || reload_completed);
9277 enum tls_model model;
9282 if (GET_CODE (op1) == SYMBOL_REF)
9284 model = SYMBOL_REF_TLS_MODEL (op1);
9287 op1 = legitimize_tls_address (op1, model, true);
9288 op1 = force_operand (op1, op0);
9293 else if (GET_CODE (op1) == CONST
9294 && GET_CODE (XEXP (op1, 0)) == PLUS
9295 && GET_CODE (XEXP (XEXP (op1, 0), 0)) == SYMBOL_REF)
9297 model = SYMBOL_REF_TLS_MODEL (XEXP (XEXP (op1, 0), 0));
9300 rtx addend = XEXP (XEXP (op1, 0), 1);
9301 op1 = legitimize_tls_address (XEXP (XEXP (op1, 0), 0), model, true);
9302 op1 = force_operand (op1, NULL);
9303 op1 = expand_simple_binop (Pmode, PLUS, op1, addend,
9304 op0, 1, OPTAB_DIRECT);
9310 if (flag_pic && mode == Pmode && symbolic_operand (op1, Pmode))
9312 if (TARGET_MACHO && !TARGET_64BIT)
9317 rtx temp = ((reload_in_progress
9318 || ((op0 && GET_CODE (op0) == REG)
9320 ? op0 : gen_reg_rtx (Pmode));
9321 op1 = machopic_indirect_data_reference (op1, temp);
9322 op1 = machopic_legitimize_pic_address (op1, mode,
9323 temp == op1 ? 0 : temp);
9325 else if (MACHOPIC_INDIRECT)
9326 op1 = machopic_indirect_data_reference (op1, 0);
9333 if (GET_CODE (op0) == MEM)
9334 op1 = force_reg (Pmode, op1);
9336 op1 = legitimize_address (op1, op1, Pmode);
9341 if (GET_CODE (op0) == MEM
9342 && (PUSH_ROUNDING (GET_MODE_SIZE (mode)) != GET_MODE_SIZE (mode)
9343 || !push_operand (op0, mode))
9344 && GET_CODE (op1) == MEM)
9345 op1 = force_reg (mode, op1);
9347 if (push_operand (op0, mode)
9348 && ! general_no_elim_operand (op1, mode))
9349 op1 = copy_to_mode_reg (mode, op1);
9351 /* Force large constants in 64bit compilation into register
9352 to get them CSEed. */
9353 if (TARGET_64BIT && mode == DImode
9354 && immediate_operand (op1, mode)
9355 && !x86_64_zext_immediate_operand (op1, VOIDmode)
9356 && !register_operand (op0, mode)
9357 && optimize && !reload_completed && !reload_in_progress)
9358 op1 = copy_to_mode_reg (mode, op1);
9360 if (FLOAT_MODE_P (mode))
9362 /* If we are loading a floating point constant to a register,
9363 force the value to memory now, since we'll get better code
9364 out the back end. */
9368 else if (GET_CODE (op1) == CONST_DOUBLE)
9370 op1 = validize_mem (force_const_mem (mode, op1));
9371 if (!register_operand (op0, mode))
9373 rtx temp = gen_reg_rtx (mode);
9374 emit_insn (gen_rtx_SET (VOIDmode, temp, op1));
9375 emit_move_insn (op0, temp);
9382 emit_insn (gen_rtx_SET (VOIDmode, op0, op1));
9386 ix86_expand_vector_move (enum machine_mode mode, rtx operands[])
9388 rtx op0 = operands[0], op1 = operands[1];
9390 /* Force constants other than zero into memory. We do not know how
9391 the instructions used to build constants modify the upper 64 bits
9392 of the register, once we have that information we may be able
9393 to handle some of them more efficiently. */
9394 if ((reload_in_progress | reload_completed) == 0
9395 && register_operand (op0, mode)
9397 && standard_sse_constant_p (op1) <= 0)
9398 op1 = validize_mem (force_const_mem (mode, op1));
9400 /* Make operand1 a register if it isn't already. */
9402 && !register_operand (op0, mode)
9403 && !register_operand (op1, mode))
9405 emit_move_insn (op0, force_reg (GET_MODE (op0), op1));
9409 emit_insn (gen_rtx_SET (VOIDmode, op0, op1));
9412 /* Implement the movmisalign patterns for SSE. Non-SSE modes go
9413 straight to ix86_expand_vector_move. */
9416 ix86_expand_vector_move_misalign (enum machine_mode mode, rtx operands[])
9425 /* If we're optimizing for size, movups is the smallest. */
9428 op0 = gen_lowpart (V4SFmode, op0);
9429 op1 = gen_lowpart (V4SFmode, op1);
9430 emit_insn (gen_sse_movups (op0, op1));
9434 /* ??? If we have typed data, then it would appear that using
9435 movdqu is the only way to get unaligned data loaded with
9437 if (TARGET_SSE2 && GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
9439 op0 = gen_lowpart (V16QImode, op0);
9440 op1 = gen_lowpart (V16QImode, op1);
9441 emit_insn (gen_sse2_movdqu (op0, op1));
9445 if (TARGET_SSE2 && mode == V2DFmode)
9449 if (TARGET_SSE_UNALIGNED_MOVE_OPTIMAL)
9451 op0 = gen_lowpart (V2DFmode, op0);
9452 op1 = gen_lowpart (V2DFmode, op1);
9453 emit_insn (gen_sse2_movupd (op0, op1));
9457 /* When SSE registers are split into halves, we can avoid
9458 writing to the top half twice. */
9459 if (TARGET_SSE_SPLIT_REGS)
9461 emit_insn (gen_rtx_CLOBBER (VOIDmode, op0));
9466 /* ??? Not sure about the best option for the Intel chips.
9467 The following would seem to satisfy; the register is
9468 entirely cleared, breaking the dependency chain. We
9469 then store to the upper half, with a dependency depth
9470 of one. A rumor has it that Intel recommends two movsd
9471 followed by an unpacklpd, but this is unconfirmed. And
9472 given that the dependency depth of the unpacklpd would
9473 still be one, I'm not sure why this would be better. */
9474 zero = CONST0_RTX (V2DFmode);
9477 m = adjust_address (op1, DFmode, 0);
9478 emit_insn (gen_sse2_loadlpd (op0, zero, m));
9479 m = adjust_address (op1, DFmode, 8);
9480 emit_insn (gen_sse2_loadhpd (op0, op0, m));
9484 if (TARGET_SSE_UNALIGNED_MOVE_OPTIMAL)
9486 op0 = gen_lowpart (V4SFmode, op0);
9487 op1 = gen_lowpart (V4SFmode, op1);
9488 emit_insn (gen_sse_movups (op0, op1));
9492 if (TARGET_SSE_PARTIAL_REG_DEPENDENCY)
9493 emit_move_insn (op0, CONST0_RTX (mode));
9495 emit_insn (gen_rtx_CLOBBER (VOIDmode, op0));
9497 if (mode != V4SFmode)
9498 op0 = gen_lowpart (V4SFmode, op0);
9499 m = adjust_address (op1, V2SFmode, 0);
9500 emit_insn (gen_sse_loadlps (op0, op0, m));
9501 m = adjust_address (op1, V2SFmode, 8);
9502 emit_insn (gen_sse_loadhps (op0, op0, m));
9505 else if (MEM_P (op0))
9507 /* If we're optimizing for size, movups is the smallest. */
9510 op0 = gen_lowpart (V4SFmode, op0);
9511 op1 = gen_lowpart (V4SFmode, op1);
9512 emit_insn (gen_sse_movups (op0, op1));
9516 /* ??? Similar to above, only less clear because of quote
9517 typeless stores unquote. */
9518 if (TARGET_SSE2 && !TARGET_SSE_TYPELESS_STORES
9519 && GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
9521 op0 = gen_lowpart (V16QImode, op0);
9522 op1 = gen_lowpart (V16QImode, op1);
9523 emit_insn (gen_sse2_movdqu (op0, op1));
9527 if (TARGET_SSE2 && mode == V2DFmode)
9529 m = adjust_address (op0, DFmode, 0);
9530 emit_insn (gen_sse2_storelpd (m, op1));
9531 m = adjust_address (op0, DFmode, 8);
9532 emit_insn (gen_sse2_storehpd (m, op1));
9536 if (mode != V4SFmode)
9537 op1 = gen_lowpart (V4SFmode, op1);
9538 m = adjust_address (op0, V2SFmode, 0);
9539 emit_insn (gen_sse_storelps (m, op1));
9540 m = adjust_address (op0, V2SFmode, 8);
9541 emit_insn (gen_sse_storehps (m, op1));
9548 /* Expand a push in MODE. This is some mode for which we do not support
9549 proper push instructions, at least from the registers that we expect
9550 the value to live in. */
9553 ix86_expand_push (enum machine_mode mode, rtx x)
9557 tmp = expand_simple_binop (Pmode, PLUS, stack_pointer_rtx,
9558 GEN_INT (-GET_MODE_SIZE (mode)),
9559 stack_pointer_rtx, 1, OPTAB_DIRECT);
9560 if (tmp != stack_pointer_rtx)
9561 emit_move_insn (stack_pointer_rtx, tmp);
9563 tmp = gen_rtx_MEM (mode, stack_pointer_rtx);
9564 emit_move_insn (tmp, x);
9567 /* Fix up OPERANDS to satisfy ix86_binary_operator_ok. Return the
9568 destination to use for the operation. If different from the true
9569 destination in operands[0], a copy operation will be required. */
9572 ix86_fixup_binary_operands (enum rtx_code code, enum machine_mode mode,
9575 int matching_memory;
9576 rtx src1, src2, dst;
9582 /* Recognize <var1> = <value> <op> <var1> for commutative operators */
9583 if (GET_RTX_CLASS (code) == RTX_COMM_ARITH
9584 && (rtx_equal_p (dst, src2)
9585 || immediate_operand (src1, mode)))
9592 /* If the destination is memory, and we do not have matching source
9593 operands, do things in registers. */
9594 matching_memory = 0;
9595 if (GET_CODE (dst) == MEM)
9597 if (rtx_equal_p (dst, src1))
9598 matching_memory = 1;
9599 else if (GET_RTX_CLASS (code) == RTX_COMM_ARITH
9600 && rtx_equal_p (dst, src2))
9601 matching_memory = 2;
9603 dst = gen_reg_rtx (mode);
9606 /* Both source operands cannot be in memory. */
9607 if (GET_CODE (src1) == MEM && GET_CODE (src2) == MEM)
9609 if (matching_memory != 2)
9610 src2 = force_reg (mode, src2);
9612 src1 = force_reg (mode, src1);
9615 /* If the operation is not commutable, source 1 cannot be a constant
9616 or non-matching memory. */
9617 if ((CONSTANT_P (src1)
9618 || (!matching_memory && GET_CODE (src1) == MEM))
9619 && GET_RTX_CLASS (code) != RTX_COMM_ARITH)
9620 src1 = force_reg (mode, src1);
9622 src1 = operands[1] = src1;
9623 src2 = operands[2] = src2;
9627 /* Similarly, but assume that the destination has already been
9631 ix86_fixup_binary_operands_no_copy (enum rtx_code code,
9632 enum machine_mode mode, rtx operands[])
9634 rtx dst = ix86_fixup_binary_operands (code, mode, operands);
9635 gcc_assert (dst == operands[0]);
9638 /* Attempt to expand a binary operator. Make the expansion closer to the
9639 actual machine, then just general_operand, which will allow 3 separate
9640 memory references (one output, two input) in a single insn. */
9643 ix86_expand_binary_operator (enum rtx_code code, enum machine_mode mode,
9646 rtx src1, src2, dst, op, clob;
9648 dst = ix86_fixup_binary_operands (code, mode, operands);
9652 /* Emit the instruction. */
9654 op = gen_rtx_SET (VOIDmode, dst, gen_rtx_fmt_ee (code, mode, src1, src2));
9655 if (reload_in_progress)
9657 /* Reload doesn't know about the flags register, and doesn't know that
9658 it doesn't want to clobber it. We can only do this with PLUS. */
9659 gcc_assert (code == PLUS);
9664 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
9665 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clob)));
9668 /* Fix up the destination if needed. */
9669 if (dst != operands[0])
9670 emit_move_insn (operands[0], dst);
9673 /* Return TRUE or FALSE depending on whether the binary operator meets the
9674 appropriate constraints. */
9677 ix86_binary_operator_ok (enum rtx_code code,
9678 enum machine_mode mode ATTRIBUTE_UNUSED,
9681 /* Both source operands cannot be in memory. */
9682 if (GET_CODE (operands[1]) == MEM && GET_CODE (operands[2]) == MEM)
9684 /* If the operation is not commutable, source 1 cannot be a constant. */
9685 if (CONSTANT_P (operands[1]) && GET_RTX_CLASS (code) != RTX_COMM_ARITH)
9687 /* If the destination is memory, we must have a matching source operand. */
9688 if (GET_CODE (operands[0]) == MEM
9689 && ! (rtx_equal_p (operands[0], operands[1])
9690 || (GET_RTX_CLASS (code) == RTX_COMM_ARITH
9691 && rtx_equal_p (operands[0], operands[2]))))
9693 /* If the operation is not commutable and the source 1 is memory, we must
9694 have a matching destination. */
9695 if (GET_CODE (operands[1]) == MEM
9696 && GET_RTX_CLASS (code) != RTX_COMM_ARITH
9697 && ! rtx_equal_p (operands[0], operands[1]))
9702 /* Attempt to expand a unary operator. Make the expansion closer to the
9703 actual machine, then just general_operand, which will allow 2 separate
9704 memory references (one output, one input) in a single insn. */
9707 ix86_expand_unary_operator (enum rtx_code code, enum machine_mode mode,
9710 int matching_memory;
9711 rtx src, dst, op, clob;
9716 /* If the destination is memory, and we do not have matching source
9717 operands, do things in registers. */
9718 matching_memory = 0;
9721 if (rtx_equal_p (dst, src))
9722 matching_memory = 1;
9724 dst = gen_reg_rtx (mode);
9727 /* When source operand is memory, destination must match. */
9728 if (MEM_P (src) && !matching_memory)
9729 src = force_reg (mode, src);
9731 /* Emit the instruction. */
9733 op = gen_rtx_SET (VOIDmode, dst, gen_rtx_fmt_e (code, mode, src));
9734 if (reload_in_progress || code == NOT)
9736 /* Reload doesn't know about the flags register, and doesn't know that
9737 it doesn't want to clobber it. */
9738 gcc_assert (code == NOT);
9743 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
9744 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clob)));
9747 /* Fix up the destination if needed. */
9748 if (dst != operands[0])
9749 emit_move_insn (operands[0], dst);
9752 /* Return TRUE or FALSE depending on whether the unary operator meets the
9753 appropriate constraints. */
9756 ix86_unary_operator_ok (enum rtx_code code ATTRIBUTE_UNUSED,
9757 enum machine_mode mode ATTRIBUTE_UNUSED,
9758 rtx operands[2] ATTRIBUTE_UNUSED)
9760 /* If one of operands is memory, source and destination must match. */
9761 if ((GET_CODE (operands[0]) == MEM
9762 || GET_CODE (operands[1]) == MEM)
9763 && ! rtx_equal_p (operands[0], operands[1]))
9768 /* A subroutine of ix86_expand_fp_absneg_operator and copysign expanders.
9769 Create a mask for the sign bit in MODE for an SSE register. If VECT is
9770 true, then replicate the mask for all elements of the vector register.
9771 If INVERT is true, then create a mask excluding the sign bit. */
9774 ix86_build_signbit_mask (enum machine_mode mode, bool vect, bool invert)
9776 enum machine_mode vec_mode;
9777 HOST_WIDE_INT hi, lo;
9782 /* Find the sign bit, sign extended to 2*HWI. */
9784 lo = 0x80000000, hi = lo < 0;
9785 else if (HOST_BITS_PER_WIDE_INT >= 64)
9786 lo = (HOST_WIDE_INT)1 << shift, hi = -1;
9788 lo = 0, hi = (HOST_WIDE_INT)1 << (shift - HOST_BITS_PER_WIDE_INT);
9793 /* Force this value into the low part of a fp vector constant. */
9794 mask = immed_double_const (lo, hi, mode == SFmode ? SImode : DImode);
9795 mask = gen_lowpart (mode, mask);
9800 v = gen_rtvec (4, mask, mask, mask, mask);
9802 v = gen_rtvec (4, mask, CONST0_RTX (SFmode),
9803 CONST0_RTX (SFmode), CONST0_RTX (SFmode));
9804 vec_mode = V4SFmode;
9809 v = gen_rtvec (2, mask, mask);
9811 v = gen_rtvec (2, mask, CONST0_RTX (DFmode));
9812 vec_mode = V2DFmode;
9815 return force_reg (vec_mode, gen_rtx_CONST_VECTOR (vec_mode, v));
9818 /* Generate code for floating point ABS or NEG. */
9821 ix86_expand_fp_absneg_operator (enum rtx_code code, enum machine_mode mode,
9824 rtx mask, set, use, clob, dst, src;
9825 bool matching_memory;
9826 bool use_sse = false;
9827 bool vector_mode = VECTOR_MODE_P (mode);
9828 enum machine_mode elt_mode = mode;
9832 elt_mode = GET_MODE_INNER (mode);
9835 else if (TARGET_SSE_MATH)
9836 use_sse = SSE_FLOAT_MODE_P (mode);
9838 /* NEG and ABS performed with SSE use bitwise mask operations.
9839 Create the appropriate mask now. */
9841 mask = ix86_build_signbit_mask (elt_mode, vector_mode, code == ABS);
9848 /* If the destination is memory, and we don't have matching source
9849 operands or we're using the x87, do things in registers. */
9850 matching_memory = false;
9853 if (use_sse && rtx_equal_p (dst, src))
9854 matching_memory = true;
9856 dst = gen_reg_rtx (mode);
9858 if (MEM_P (src) && !matching_memory)
9859 src = force_reg (mode, src);
9863 set = gen_rtx_fmt_ee (code == NEG ? XOR : AND, mode, src, mask);
9864 set = gen_rtx_SET (VOIDmode, dst, set);
9869 set = gen_rtx_fmt_e (code, mode, src);
9870 set = gen_rtx_SET (VOIDmode, dst, set);
9873 use = gen_rtx_USE (VOIDmode, mask);
9874 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
9875 emit_insn (gen_rtx_PARALLEL (VOIDmode,
9876 gen_rtvec (3, set, use, clob)));
9882 if (dst != operands[0])
9883 emit_move_insn (operands[0], dst);
9886 /* Expand a copysign operation. Special case operand 0 being a constant. */
9889 ix86_expand_copysign (rtx operands[])
9891 enum machine_mode mode, vmode;
9892 rtx dest, op0, op1, mask, nmask;
9898 mode = GET_MODE (dest);
9899 vmode = mode == SFmode ? V4SFmode : V2DFmode;
9901 if (GET_CODE (op0) == CONST_DOUBLE)
9905 if (real_isneg (CONST_DOUBLE_REAL_VALUE (op0)))
9906 op0 = simplify_unary_operation (ABS, mode, op0, mode);
9908 if (op0 == CONST0_RTX (mode))
9909 op0 = CONST0_RTX (vmode);
9913 v = gen_rtvec (4, op0, CONST0_RTX (SFmode),
9914 CONST0_RTX (SFmode), CONST0_RTX (SFmode));
9916 v = gen_rtvec (2, op0, CONST0_RTX (DFmode));
9917 op0 = force_reg (vmode, gen_rtx_CONST_VECTOR (vmode, v));
9920 mask = ix86_build_signbit_mask (mode, 0, 0);
9923 emit_insn (gen_copysignsf3_const (dest, op0, op1, mask));
9925 emit_insn (gen_copysigndf3_const (dest, op0, op1, mask));
9929 nmask = ix86_build_signbit_mask (mode, 0, 1);
9930 mask = ix86_build_signbit_mask (mode, 0, 0);
9933 emit_insn (gen_copysignsf3_var (dest, NULL, op0, op1, nmask, mask));
9935 emit_insn (gen_copysigndf3_var (dest, NULL, op0, op1, nmask, mask));
9939 /* Deconstruct a copysign operation into bit masks. Operand 0 is known to
9940 be a constant, and so has already been expanded into a vector constant. */
9943 ix86_split_copysign_const (rtx operands[])
9945 enum machine_mode mode, vmode;
9946 rtx dest, op0, op1, mask, x;
9953 mode = GET_MODE (dest);
9954 vmode = GET_MODE (mask);
9956 dest = simplify_gen_subreg (vmode, dest, mode, 0);
9957 x = gen_rtx_AND (vmode, dest, mask);
9958 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
9960 if (op0 != CONST0_RTX (vmode))
9962 x = gen_rtx_IOR (vmode, dest, op0);
9963 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
9967 /* Deconstruct a copysign operation into bit masks. Operand 0 is variable,
9968 so we have to do two masks. */
9971 ix86_split_copysign_var (rtx operands[])
9973 enum machine_mode mode, vmode;
9974 rtx dest, scratch, op0, op1, mask, nmask, x;
9977 scratch = operands[1];
9980 nmask = operands[4];
9983 mode = GET_MODE (dest);
9984 vmode = GET_MODE (mask);
9986 if (rtx_equal_p (op0, op1))
9988 /* Shouldn't happen often (it's useless, obviously), but when it does
9989 we'd generate incorrect code if we continue below. */
9990 emit_move_insn (dest, op0);
9994 if (REG_P (mask) && REGNO (dest) == REGNO (mask)) /* alternative 0 */
9996 gcc_assert (REGNO (op1) == REGNO (scratch));
9998 x = gen_rtx_AND (vmode, scratch, mask);
9999 emit_insn (gen_rtx_SET (VOIDmode, scratch, x));
10002 op0 = simplify_gen_subreg (vmode, op0, mode, 0);
10003 x = gen_rtx_NOT (vmode, dest);
10004 x = gen_rtx_AND (vmode, x, op0);
10005 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
10009 if (REGNO (op1) == REGNO (scratch)) /* alternative 1,3 */
10011 x = gen_rtx_AND (vmode, scratch, mask);
10013 else /* alternative 2,4 */
10015 gcc_assert (REGNO (mask) == REGNO (scratch));
10016 op1 = simplify_gen_subreg (vmode, op1, mode, 0);
10017 x = gen_rtx_AND (vmode, scratch, op1);
10019 emit_insn (gen_rtx_SET (VOIDmode, scratch, x));
10021 if (REGNO (op0) == REGNO (dest)) /* alternative 1,2 */
10023 dest = simplify_gen_subreg (vmode, op0, mode, 0);
10024 x = gen_rtx_AND (vmode, dest, nmask);
10026 else /* alternative 3,4 */
10028 gcc_assert (REGNO (nmask) == REGNO (dest));
10030 op0 = simplify_gen_subreg (vmode, op0, mode, 0);
10031 x = gen_rtx_AND (vmode, dest, op0);
10033 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
10036 x = gen_rtx_IOR (vmode, dest, scratch);
10037 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
10040 /* Return TRUE or FALSE depending on whether the first SET in INSN
10041 has source and destination with matching CC modes, and that the
10042 CC mode is at least as constrained as REQ_MODE. */
10045 ix86_match_ccmode (rtx insn, enum machine_mode req_mode)
10048 enum machine_mode set_mode;
10050 set = PATTERN (insn);
10051 if (GET_CODE (set) == PARALLEL)
10052 set = XVECEXP (set, 0, 0);
10053 gcc_assert (GET_CODE (set) == SET);
10054 gcc_assert (GET_CODE (SET_SRC (set)) == COMPARE);
10056 set_mode = GET_MODE (SET_DEST (set));
10060 if (req_mode != CCNOmode
10061 && (req_mode != CCmode
10062 || XEXP (SET_SRC (set), 1) != const0_rtx))
10066 if (req_mode == CCGCmode)
10070 if (req_mode == CCGOCmode || req_mode == CCNOmode)
10074 if (req_mode == CCZmode)
10081 gcc_unreachable ();
10084 return (GET_MODE (SET_SRC (set)) == set_mode);
10087 /* Generate insn patterns to do an integer compare of OPERANDS. */
10090 ix86_expand_int_compare (enum rtx_code code, rtx op0, rtx op1)
10092 enum machine_mode cmpmode;
10095 cmpmode = SELECT_CC_MODE (code, op0, op1);
10096 flags = gen_rtx_REG (cmpmode, FLAGS_REG);
10098 /* This is very simple, but making the interface the same as in the
10099 FP case makes the rest of the code easier. */
10100 tmp = gen_rtx_COMPARE (cmpmode, op0, op1);
10101 emit_insn (gen_rtx_SET (VOIDmode, flags, tmp));
10103 /* Return the test that should be put into the flags user, i.e.
10104 the bcc, scc, or cmov instruction. */
10105 return gen_rtx_fmt_ee (code, VOIDmode, flags, const0_rtx);
10108 /* Figure out whether to use ordered or unordered fp comparisons.
10109 Return the appropriate mode to use. */
10112 ix86_fp_compare_mode (enum rtx_code code ATTRIBUTE_UNUSED)
10114 /* ??? In order to make all comparisons reversible, we do all comparisons
10115 non-trapping when compiling for IEEE. Once gcc is able to distinguish
10116 all forms trapping and nontrapping comparisons, we can make inequality
10117 comparisons trapping again, since it results in better code when using
10118 FCOM based compares. */
10119 return TARGET_IEEE_FP ? CCFPUmode : CCFPmode;
10123 ix86_cc_mode (enum rtx_code code, rtx op0, rtx op1)
10125 if (SCALAR_FLOAT_MODE_P (GET_MODE (op0)))
10126 return ix86_fp_compare_mode (code);
10129 /* Only zero flag is needed. */
10130 case EQ: /* ZF=0 */
10131 case NE: /* ZF!=0 */
10133 /* Codes needing carry flag. */
10134 case GEU: /* CF=0 */
10135 case GTU: /* CF=0 & ZF=0 */
10136 case LTU: /* CF=1 */
10137 case LEU: /* CF=1 | ZF=1 */
10139 /* Codes possibly doable only with sign flag when
10140 comparing against zero. */
10141 case GE: /* SF=OF or SF=0 */
10142 case LT: /* SF<>OF or SF=1 */
10143 if (op1 == const0_rtx)
10146 /* For other cases Carry flag is not required. */
10148 /* Codes doable only with sign flag when comparing
10149 against zero, but we miss jump instruction for it
10150 so we need to use relational tests against overflow
10151 that thus needs to be zero. */
10152 case GT: /* ZF=0 & SF=OF */
10153 case LE: /* ZF=1 | SF<>OF */
10154 if (op1 == const0_rtx)
10158 /* strcmp pattern do (use flags) and combine may ask us for proper
10163 gcc_unreachable ();
10167 /* Return the fixed registers used for condition codes. */
10170 ix86_fixed_condition_code_regs (unsigned int *p1, unsigned int *p2)
10177 /* If two condition code modes are compatible, return a condition code
10178 mode which is compatible with both. Otherwise, return
10181 static enum machine_mode
10182 ix86_cc_modes_compatible (enum machine_mode m1, enum machine_mode m2)
10187 if (GET_MODE_CLASS (m1) != MODE_CC || GET_MODE_CLASS (m2) != MODE_CC)
10190 if ((m1 == CCGCmode && m2 == CCGOCmode)
10191 || (m1 == CCGOCmode && m2 == CCGCmode))
10197 gcc_unreachable ();
10219 /* These are only compatible with themselves, which we already
10225 /* Return true if we should use an FCOMI instruction for this fp comparison. */
10228 ix86_use_fcomi_compare (enum rtx_code code ATTRIBUTE_UNUSED)
10230 enum rtx_code swapped_code = swap_condition (code);
10231 return ((ix86_fp_comparison_cost (code) == ix86_fp_comparison_fcomi_cost (code))
10232 || (ix86_fp_comparison_cost (swapped_code)
10233 == ix86_fp_comparison_fcomi_cost (swapped_code)));
10236 /* Swap, force into registers, or otherwise massage the two operands
10237 to a fp comparison. The operands are updated in place; the new
10238 comparison code is returned. */
10240 static enum rtx_code
10241 ix86_prepare_fp_compare_args (enum rtx_code code, rtx *pop0, rtx *pop1)
10243 enum machine_mode fpcmp_mode = ix86_fp_compare_mode (code);
10244 rtx op0 = *pop0, op1 = *pop1;
10245 enum machine_mode op_mode = GET_MODE (op0);
10246 int is_sse = TARGET_SSE_MATH && SSE_FLOAT_MODE_P (op_mode);
10248 /* All of the unordered compare instructions only work on registers.
10249 The same is true of the fcomi compare instructions. The XFmode
10250 compare instructions require registers except when comparing
10251 against zero or when converting operand 1 from fixed point to
10255 && (fpcmp_mode == CCFPUmode
10256 || (op_mode == XFmode
10257 && ! (standard_80387_constant_p (op0) == 1
10258 || standard_80387_constant_p (op1) == 1)
10259 && GET_CODE (op1) != FLOAT)
10260 || ix86_use_fcomi_compare (code)))
10262 op0 = force_reg (op_mode, op0);
10263 op1 = force_reg (op_mode, op1);
10267 /* %%% We only allow op1 in memory; op0 must be st(0). So swap
10268 things around if they appear profitable, otherwise force op0
10269 into a register. */
10271 if (standard_80387_constant_p (op0) == 0
10272 || (GET_CODE (op0) == MEM
10273 && ! (standard_80387_constant_p (op1) == 0
10274 || GET_CODE (op1) == MEM)))
10277 tmp = op0, op0 = op1, op1 = tmp;
10278 code = swap_condition (code);
10281 if (GET_CODE (op0) != REG)
10282 op0 = force_reg (op_mode, op0);
10284 if (CONSTANT_P (op1))
10286 int tmp = standard_80387_constant_p (op1);
10288 op1 = validize_mem (force_const_mem (op_mode, op1));
10292 op1 = force_reg (op_mode, op1);
10295 op1 = force_reg (op_mode, op1);
10299 /* Try to rearrange the comparison to make it cheaper. */
10300 if (ix86_fp_comparison_cost (code)
10301 > ix86_fp_comparison_cost (swap_condition (code))
10302 && (GET_CODE (op1) == REG || !no_new_pseudos))
10305 tmp = op0, op0 = op1, op1 = tmp;
10306 code = swap_condition (code);
10307 if (GET_CODE (op0) != REG)
10308 op0 = force_reg (op_mode, op0);
10316 /* Convert comparison codes we use to represent FP comparison to integer
10317 code that will result in proper branch. Return UNKNOWN if no such code
10321 ix86_fp_compare_code_to_integer (enum rtx_code code)
10350 /* Split comparison code CODE into comparisons we can do using branch
10351 instructions. BYPASS_CODE is comparison code for branch that will
10352 branch around FIRST_CODE and SECOND_CODE. If some of branches
10353 is not required, set value to UNKNOWN.
10354 We never require more than two branches. */
10357 ix86_fp_comparison_codes (enum rtx_code code, enum rtx_code *bypass_code,
10358 enum rtx_code *first_code,
10359 enum rtx_code *second_code)
10361 *first_code = code;
10362 *bypass_code = UNKNOWN;
10363 *second_code = UNKNOWN;
10365 /* The fcomi comparison sets flags as follows:
10375 case GT: /* GTU - CF=0 & ZF=0 */
10376 case GE: /* GEU - CF=0 */
10377 case ORDERED: /* PF=0 */
10378 case UNORDERED: /* PF=1 */
10379 case UNEQ: /* EQ - ZF=1 */
10380 case UNLT: /* LTU - CF=1 */
10381 case UNLE: /* LEU - CF=1 | ZF=1 */
10382 case LTGT: /* EQ - ZF=0 */
10384 case LT: /* LTU - CF=1 - fails on unordered */
10385 *first_code = UNLT;
10386 *bypass_code = UNORDERED;
10388 case LE: /* LEU - CF=1 | ZF=1 - fails on unordered */
10389 *first_code = UNLE;
10390 *bypass_code = UNORDERED;
10392 case EQ: /* EQ - ZF=1 - fails on unordered */
10393 *first_code = UNEQ;
10394 *bypass_code = UNORDERED;
10396 case NE: /* NE - ZF=0 - fails on unordered */
10397 *first_code = LTGT;
10398 *second_code = UNORDERED;
10400 case UNGE: /* GEU - CF=0 - fails on unordered */
10402 *second_code = UNORDERED;
10404 case UNGT: /* GTU - CF=0 & ZF=0 - fails on unordered */
10406 *second_code = UNORDERED;
10409 gcc_unreachable ();
10411 if (!TARGET_IEEE_FP)
10413 *second_code = UNKNOWN;
10414 *bypass_code = UNKNOWN;
10418 /* Return cost of comparison done fcom + arithmetics operations on AX.
10419 All following functions do use number of instructions as a cost metrics.
10420 In future this should be tweaked to compute bytes for optimize_size and
10421 take into account performance of various instructions on various CPUs. */
10423 ix86_fp_comparison_arithmetics_cost (enum rtx_code code)
10425 if (!TARGET_IEEE_FP)
10427 /* The cost of code output by ix86_expand_fp_compare. */
10451 gcc_unreachable ();
10455 /* Return cost of comparison done using fcomi operation.
10456 See ix86_fp_comparison_arithmetics_cost for the metrics. */
10458 ix86_fp_comparison_fcomi_cost (enum rtx_code code)
10460 enum rtx_code bypass_code, first_code, second_code;
10461 /* Return arbitrarily high cost when instruction is not supported - this
10462 prevents gcc from using it. */
10465 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
10466 return (bypass_code != UNKNOWN || second_code != UNKNOWN) + 2;
10469 /* Return cost of comparison done using sahf operation.
10470 See ix86_fp_comparison_arithmetics_cost for the metrics. */
10472 ix86_fp_comparison_sahf_cost (enum rtx_code code)
10474 enum rtx_code bypass_code, first_code, second_code;
10475 /* Return arbitrarily high cost when instruction is not preferred - this
10476 avoids gcc from using it. */
10477 if (!TARGET_USE_SAHF && !optimize_size)
10479 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
10480 return (bypass_code != UNKNOWN || second_code != UNKNOWN) + 3;
10483 /* Compute cost of the comparison done using any method.
10484 See ix86_fp_comparison_arithmetics_cost for the metrics. */
10486 ix86_fp_comparison_cost (enum rtx_code code)
10488 int fcomi_cost, sahf_cost, arithmetics_cost = 1024;
10491 fcomi_cost = ix86_fp_comparison_fcomi_cost (code);
10492 sahf_cost = ix86_fp_comparison_sahf_cost (code);
10494 min = arithmetics_cost = ix86_fp_comparison_arithmetics_cost (code);
10495 if (min > sahf_cost)
10497 if (min > fcomi_cost)
10502 /* Generate insn patterns to do a floating point compare of OPERANDS. */
10505 ix86_expand_fp_compare (enum rtx_code code, rtx op0, rtx op1, rtx scratch,
10506 rtx *second_test, rtx *bypass_test)
10508 enum machine_mode fpcmp_mode, intcmp_mode;
10510 int cost = ix86_fp_comparison_cost (code);
10511 enum rtx_code bypass_code, first_code, second_code;
10513 fpcmp_mode = ix86_fp_compare_mode (code);
10514 code = ix86_prepare_fp_compare_args (code, &op0, &op1);
10517 *second_test = NULL_RTX;
10519 *bypass_test = NULL_RTX;
10521 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
10523 /* Do fcomi/sahf based test when profitable. */
10524 if ((bypass_code == UNKNOWN || bypass_test)
10525 && (second_code == UNKNOWN || second_test)
10526 && ix86_fp_comparison_arithmetics_cost (code) > cost)
10530 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
10531 tmp = gen_rtx_SET (VOIDmode, gen_rtx_REG (fpcmp_mode, FLAGS_REG),
10537 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
10538 tmp2 = gen_rtx_UNSPEC (HImode, gen_rtvec (1, tmp), UNSPEC_FNSTSW);
10540 scratch = gen_reg_rtx (HImode);
10541 emit_insn (gen_rtx_SET (VOIDmode, scratch, tmp2));
10542 emit_insn (gen_x86_sahf_1 (scratch));
10545 /* The FP codes work out to act like unsigned. */
10546 intcmp_mode = fpcmp_mode;
10548 if (bypass_code != UNKNOWN)
10549 *bypass_test = gen_rtx_fmt_ee (bypass_code, VOIDmode,
10550 gen_rtx_REG (intcmp_mode, FLAGS_REG),
10552 if (second_code != UNKNOWN)
10553 *second_test = gen_rtx_fmt_ee (second_code, VOIDmode,
10554 gen_rtx_REG (intcmp_mode, FLAGS_REG),
10559 /* Sadness wrt reg-stack pops killing fpsr -- gotta get fnstsw first. */
10560 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
10561 tmp2 = gen_rtx_UNSPEC (HImode, gen_rtvec (1, tmp), UNSPEC_FNSTSW);
10563 scratch = gen_reg_rtx (HImode);
10564 emit_insn (gen_rtx_SET (VOIDmode, scratch, tmp2));
10566 /* In the unordered case, we have to check C2 for NaN's, which
10567 doesn't happen to work out to anything nice combination-wise.
10568 So do some bit twiddling on the value we've got in AH to come
10569 up with an appropriate set of condition codes. */
10571 intcmp_mode = CCNOmode;
10576 if (code == GT || !TARGET_IEEE_FP)
10578 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x45)));
10583 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
10584 emit_insn (gen_addqi_ext_1 (scratch, scratch, constm1_rtx));
10585 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x44)));
10586 intcmp_mode = CCmode;
10592 if (code == LT && TARGET_IEEE_FP)
10594 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
10595 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x01)));
10596 intcmp_mode = CCmode;
10601 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x01)));
10607 if (code == GE || !TARGET_IEEE_FP)
10609 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x05)));
10614 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
10615 emit_insn (gen_xorqi_cc_ext_1 (scratch, scratch,
10622 if (code == LE && TARGET_IEEE_FP)
10624 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
10625 emit_insn (gen_addqi_ext_1 (scratch, scratch, constm1_rtx));
10626 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x40)));
10627 intcmp_mode = CCmode;
10632 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x45)));
10638 if (code == EQ && TARGET_IEEE_FP)
10640 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
10641 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x40)));
10642 intcmp_mode = CCmode;
10647 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x40)));
10654 if (code == NE && TARGET_IEEE_FP)
10656 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
10657 emit_insn (gen_xorqi_cc_ext_1 (scratch, scratch,
10663 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x40)));
10669 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x04)));
10673 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x04)));
10678 gcc_unreachable ();
10682 /* Return the test that should be put into the flags user, i.e.
10683 the bcc, scc, or cmov instruction. */
10684 return gen_rtx_fmt_ee (code, VOIDmode,
10685 gen_rtx_REG (intcmp_mode, FLAGS_REG),
10690 ix86_expand_compare (enum rtx_code code, rtx *second_test, rtx *bypass_test)
10693 op0 = ix86_compare_op0;
10694 op1 = ix86_compare_op1;
10697 *second_test = NULL_RTX;
10699 *bypass_test = NULL_RTX;
10701 if (ix86_compare_emitted)
10703 ret = gen_rtx_fmt_ee (code, VOIDmode, ix86_compare_emitted, const0_rtx);
10704 ix86_compare_emitted = NULL_RTX;
10706 else if (SCALAR_FLOAT_MODE_P (GET_MODE (op0)))
10707 ret = ix86_expand_fp_compare (code, op0, op1, NULL_RTX,
10708 second_test, bypass_test);
10710 ret = ix86_expand_int_compare (code, op0, op1);
10715 /* Return true if the CODE will result in nontrivial jump sequence. */
10717 ix86_fp_jump_nontrivial_p (enum rtx_code code)
10719 enum rtx_code bypass_code, first_code, second_code;
10722 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
10723 return bypass_code != UNKNOWN || second_code != UNKNOWN;
10727 ix86_expand_branch (enum rtx_code code, rtx label)
10731 /* If we have emitted a compare insn, go straight to simple.
10732 ix86_expand_compare won't emit anything if ix86_compare_emitted
10734 if (ix86_compare_emitted)
10737 switch (GET_MODE (ix86_compare_op0))
10743 tmp = ix86_expand_compare (code, NULL, NULL);
10744 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
10745 gen_rtx_LABEL_REF (VOIDmode, label),
10747 emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
10756 enum rtx_code bypass_code, first_code, second_code;
10758 code = ix86_prepare_fp_compare_args (code, &ix86_compare_op0,
10759 &ix86_compare_op1);
10761 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
10763 /* Check whether we will use the natural sequence with one jump. If
10764 so, we can expand jump early. Otherwise delay expansion by
10765 creating compound insn to not confuse optimizers. */
10766 if (bypass_code == UNKNOWN && second_code == UNKNOWN
10769 ix86_split_fp_branch (code, ix86_compare_op0, ix86_compare_op1,
10770 gen_rtx_LABEL_REF (VOIDmode, label),
10771 pc_rtx, NULL_RTX, NULL_RTX);
10775 tmp = gen_rtx_fmt_ee (code, VOIDmode,
10776 ix86_compare_op0, ix86_compare_op1);
10777 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
10778 gen_rtx_LABEL_REF (VOIDmode, label),
10780 tmp = gen_rtx_SET (VOIDmode, pc_rtx, tmp);
10782 use_fcomi = ix86_use_fcomi_compare (code);
10783 vec = rtvec_alloc (3 + !use_fcomi);
10784 RTVEC_ELT (vec, 0) = tmp;
10786 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCFPmode, 18));
10788 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCFPmode, 17));
10791 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (HImode));
10793 emit_jump_insn (gen_rtx_PARALLEL (VOIDmode, vec));
10802 /* Expand DImode branch into multiple compare+branch. */
10804 rtx lo[2], hi[2], label2;
10805 enum rtx_code code1, code2, code3;
10806 enum machine_mode submode;
10808 if (CONSTANT_P (ix86_compare_op0) && ! CONSTANT_P (ix86_compare_op1))
10810 tmp = ix86_compare_op0;
10811 ix86_compare_op0 = ix86_compare_op1;
10812 ix86_compare_op1 = tmp;
10813 code = swap_condition (code);
10815 if (GET_MODE (ix86_compare_op0) == DImode)
10817 split_di (&ix86_compare_op0, 1, lo+0, hi+0);
10818 split_di (&ix86_compare_op1, 1, lo+1, hi+1);
10823 split_ti (&ix86_compare_op0, 1, lo+0, hi+0);
10824 split_ti (&ix86_compare_op1, 1, lo+1, hi+1);
10828 /* When comparing for equality, we can use (hi0^hi1)|(lo0^lo1) to
10829 avoid two branches. This costs one extra insn, so disable when
10830 optimizing for size. */
10832 if ((code == EQ || code == NE)
10834 || hi[1] == const0_rtx || lo[1] == const0_rtx))
10839 if (hi[1] != const0_rtx)
10840 xor1 = expand_binop (submode, xor_optab, xor1, hi[1],
10841 NULL_RTX, 0, OPTAB_WIDEN);
10844 if (lo[1] != const0_rtx)
10845 xor0 = expand_binop (submode, xor_optab, xor0, lo[1],
10846 NULL_RTX, 0, OPTAB_WIDEN);
10848 tmp = expand_binop (submode, ior_optab, xor1, xor0,
10849 NULL_RTX, 0, OPTAB_WIDEN);
10851 ix86_compare_op0 = tmp;
10852 ix86_compare_op1 = const0_rtx;
10853 ix86_expand_branch (code, label);
10857 /* Otherwise, if we are doing less-than or greater-or-equal-than,
10858 op1 is a constant and the low word is zero, then we can just
10859 examine the high word. */
10861 if (GET_CODE (hi[1]) == CONST_INT && lo[1] == const0_rtx)
10864 case LT: case LTU: case GE: case GEU:
10865 ix86_compare_op0 = hi[0];
10866 ix86_compare_op1 = hi[1];
10867 ix86_expand_branch (code, label);
10873 /* Otherwise, we need two or three jumps. */
10875 label2 = gen_label_rtx ();
10878 code2 = swap_condition (code);
10879 code3 = unsigned_condition (code);
10883 case LT: case GT: case LTU: case GTU:
10886 case LE: code1 = LT; code2 = GT; break;
10887 case GE: code1 = GT; code2 = LT; break;
10888 case LEU: code1 = LTU; code2 = GTU; break;
10889 case GEU: code1 = GTU; code2 = LTU; break;
10891 case EQ: code1 = UNKNOWN; code2 = NE; break;
10892 case NE: code2 = UNKNOWN; break;
10895 gcc_unreachable ();
10900 * if (hi(a) < hi(b)) goto true;
10901 * if (hi(a) > hi(b)) goto false;
10902 * if (lo(a) < lo(b)) goto true;
10906 ix86_compare_op0 = hi[0];
10907 ix86_compare_op1 = hi[1];
10909 if (code1 != UNKNOWN)
10910 ix86_expand_branch (code1, label);
10911 if (code2 != UNKNOWN)
10912 ix86_expand_branch (code2, label2);
10914 ix86_compare_op0 = lo[0];
10915 ix86_compare_op1 = lo[1];
10916 ix86_expand_branch (code3, label);
10918 if (code2 != UNKNOWN)
10919 emit_label (label2);
10924 gcc_unreachable ();
10928 /* Split branch based on floating point condition. */
10930 ix86_split_fp_branch (enum rtx_code code, rtx op1, rtx op2,
10931 rtx target1, rtx target2, rtx tmp, rtx pushed)
10933 rtx second, bypass;
10934 rtx label = NULL_RTX;
10936 int bypass_probability = -1, second_probability = -1, probability = -1;
10939 if (target2 != pc_rtx)
10942 code = reverse_condition_maybe_unordered (code);
10947 condition = ix86_expand_fp_compare (code, op1, op2,
10948 tmp, &second, &bypass);
10950 /* Remove pushed operand from stack. */
10952 ix86_free_from_memory (GET_MODE (pushed));
10954 if (split_branch_probability >= 0)
10956 /* Distribute the probabilities across the jumps.
10957 Assume the BYPASS and SECOND to be always test
10959 probability = split_branch_probability;
10961 /* Value of 1 is low enough to make no need for probability
10962 to be updated. Later we may run some experiments and see
10963 if unordered values are more frequent in practice. */
10965 bypass_probability = 1;
10967 second_probability = 1;
10969 if (bypass != NULL_RTX)
10971 label = gen_label_rtx ();
10972 i = emit_jump_insn (gen_rtx_SET
10974 gen_rtx_IF_THEN_ELSE (VOIDmode,
10976 gen_rtx_LABEL_REF (VOIDmode,
10979 if (bypass_probability >= 0)
10981 = gen_rtx_EXPR_LIST (REG_BR_PROB,
10982 GEN_INT (bypass_probability),
10985 i = emit_jump_insn (gen_rtx_SET
10987 gen_rtx_IF_THEN_ELSE (VOIDmode,
10988 condition, target1, target2)));
10989 if (probability >= 0)
10991 = gen_rtx_EXPR_LIST (REG_BR_PROB,
10992 GEN_INT (probability),
10994 if (second != NULL_RTX)
10996 i = emit_jump_insn (gen_rtx_SET
10998 gen_rtx_IF_THEN_ELSE (VOIDmode, second, target1,
11000 if (second_probability >= 0)
11002 = gen_rtx_EXPR_LIST (REG_BR_PROB,
11003 GEN_INT (second_probability),
11006 if (label != NULL_RTX)
11007 emit_label (label);
11011 ix86_expand_setcc (enum rtx_code code, rtx dest)
11013 rtx ret, tmp, tmpreg, equiv;
11014 rtx second_test, bypass_test;
11016 if (GET_MODE (ix86_compare_op0) == (TARGET_64BIT ? TImode : DImode))
11017 return 0; /* FAIL */
11019 gcc_assert (GET_MODE (dest) == QImode);
11021 ret = ix86_expand_compare (code, &second_test, &bypass_test);
11022 PUT_MODE (ret, QImode);
11027 emit_insn (gen_rtx_SET (VOIDmode, tmp, ret));
11028 if (bypass_test || second_test)
11030 rtx test = second_test;
11032 rtx tmp2 = gen_reg_rtx (QImode);
11035 gcc_assert (!second_test);
11036 test = bypass_test;
11038 PUT_CODE (test, reverse_condition_maybe_unordered (GET_CODE (test)));
11040 PUT_MODE (test, QImode);
11041 emit_insn (gen_rtx_SET (VOIDmode, tmp2, test));
11044 emit_insn (gen_andqi3 (tmp, tmpreg, tmp2));
11046 emit_insn (gen_iorqi3 (tmp, tmpreg, tmp2));
11049 /* Attach a REG_EQUAL note describing the comparison result. */
11050 if (ix86_compare_op0 && ix86_compare_op1)
11052 equiv = simplify_gen_relational (code, QImode,
11053 GET_MODE (ix86_compare_op0),
11054 ix86_compare_op0, ix86_compare_op1);
11055 set_unique_reg_note (get_last_insn (), REG_EQUAL, equiv);
11058 return 1; /* DONE */
11061 /* Expand comparison setting or clearing carry flag. Return true when
11062 successful and set pop for the operation. */
11064 ix86_expand_carry_flag_compare (enum rtx_code code, rtx op0, rtx op1, rtx *pop)
11066 enum machine_mode mode =
11067 GET_MODE (op0) != VOIDmode ? GET_MODE (op0) : GET_MODE (op1);
11069 /* Do not handle DImode compares that go through special path. Also we can't
11070 deal with FP compares yet. This is possible to add. */
11071 if (mode == (TARGET_64BIT ? TImode : DImode))
11073 if (FLOAT_MODE_P (mode))
11075 rtx second_test = NULL, bypass_test = NULL;
11076 rtx compare_op, compare_seq;
11078 /* Shortcut: following common codes never translate into carry flag compares. */
11079 if (code == EQ || code == NE || code == UNEQ || code == LTGT
11080 || code == ORDERED || code == UNORDERED)
11083 /* These comparisons require zero flag; swap operands so they won't. */
11084 if ((code == GT || code == UNLE || code == LE || code == UNGT)
11085 && !TARGET_IEEE_FP)
11090 code = swap_condition (code);
11093 /* Try to expand the comparison and verify that we end up with carry flag
11094 based comparison. This is fails to be true only when we decide to expand
11095 comparison using arithmetic that is not too common scenario. */
11097 compare_op = ix86_expand_fp_compare (code, op0, op1, NULL_RTX,
11098 &second_test, &bypass_test);
11099 compare_seq = get_insns ();
11102 if (second_test || bypass_test)
11104 if (GET_MODE (XEXP (compare_op, 0)) == CCFPmode
11105 || GET_MODE (XEXP (compare_op, 0)) == CCFPUmode)
11106 code = ix86_fp_compare_code_to_integer (GET_CODE (compare_op));
11108 code = GET_CODE (compare_op);
11109 if (code != LTU && code != GEU)
11111 emit_insn (compare_seq);
11115 if (!INTEGRAL_MODE_P (mode))
11123 /* Convert a==0 into (unsigned)a<1. */
11126 if (op1 != const0_rtx)
11129 code = (code == EQ ? LTU : GEU);
11132 /* Convert a>b into b<a or a>=b-1. */
11135 if (GET_CODE (op1) == CONST_INT)
11137 op1 = gen_int_mode (INTVAL (op1) + 1, GET_MODE (op0));
11138 /* Bail out on overflow. We still can swap operands but that
11139 would force loading of the constant into register. */
11140 if (op1 == const0_rtx
11141 || !x86_64_immediate_operand (op1, GET_MODE (op1)))
11143 code = (code == GTU ? GEU : LTU);
11150 code = (code == GTU ? LTU : GEU);
11154 /* Convert a>=0 into (unsigned)a<0x80000000. */
11157 if (mode == DImode || op1 != const0_rtx)
11159 op1 = gen_int_mode (1 << (GET_MODE_BITSIZE (mode) - 1), mode);
11160 code = (code == LT ? GEU : LTU);
11164 if (mode == DImode || op1 != constm1_rtx)
11166 op1 = gen_int_mode (1 << (GET_MODE_BITSIZE (mode) - 1), mode);
11167 code = (code == LE ? GEU : LTU);
11173 /* Swapping operands may cause constant to appear as first operand. */
11174 if (!nonimmediate_operand (op0, VOIDmode))
11176 if (no_new_pseudos)
11178 op0 = force_reg (mode, op0);
11180 ix86_compare_op0 = op0;
11181 ix86_compare_op1 = op1;
11182 *pop = ix86_expand_compare (code, NULL, NULL);
11183 gcc_assert (GET_CODE (*pop) == LTU || GET_CODE (*pop) == GEU);
11188 ix86_expand_int_movcc (rtx operands[])
11190 enum rtx_code code = GET_CODE (operands[1]), compare_code;
11191 rtx compare_seq, compare_op;
11192 rtx second_test, bypass_test;
11193 enum machine_mode mode = GET_MODE (operands[0]);
11194 bool sign_bit_compare_p = false;;
11197 compare_op = ix86_expand_compare (code, &second_test, &bypass_test);
11198 compare_seq = get_insns ();
11201 compare_code = GET_CODE (compare_op);
11203 if ((ix86_compare_op1 == const0_rtx && (code == GE || code == LT))
11204 || (ix86_compare_op1 == constm1_rtx && (code == GT || code == LE)))
11205 sign_bit_compare_p = true;
11207 /* Don't attempt mode expansion here -- if we had to expand 5 or 6
11208 HImode insns, we'd be swallowed in word prefix ops. */
11210 if ((mode != HImode || TARGET_FAST_PREFIX)
11211 && (mode != (TARGET_64BIT ? TImode : DImode))
11212 && GET_CODE (operands[2]) == CONST_INT
11213 && GET_CODE (operands[3]) == CONST_INT)
11215 rtx out = operands[0];
11216 HOST_WIDE_INT ct = INTVAL (operands[2]);
11217 HOST_WIDE_INT cf = INTVAL (operands[3]);
11218 HOST_WIDE_INT diff;
11221 /* Sign bit compares are better done using shifts than we do by using
11223 if (sign_bit_compare_p
11224 || ix86_expand_carry_flag_compare (code, ix86_compare_op0,
11225 ix86_compare_op1, &compare_op))
11227 /* Detect overlap between destination and compare sources. */
11230 if (!sign_bit_compare_p)
11232 bool fpcmp = false;
11234 compare_code = GET_CODE (compare_op);
11236 if (GET_MODE (XEXP (compare_op, 0)) == CCFPmode
11237 || GET_MODE (XEXP (compare_op, 0)) == CCFPUmode)
11240 compare_code = ix86_fp_compare_code_to_integer (compare_code);
11243 /* To simplify rest of code, restrict to the GEU case. */
11244 if (compare_code == LTU)
11246 HOST_WIDE_INT tmp = ct;
11249 compare_code = reverse_condition (compare_code);
11250 code = reverse_condition (code);
11255 PUT_CODE (compare_op,
11256 reverse_condition_maybe_unordered
11257 (GET_CODE (compare_op)));
11259 PUT_CODE (compare_op, reverse_condition (GET_CODE (compare_op)));
11263 if (reg_overlap_mentioned_p (out, ix86_compare_op0)
11264 || reg_overlap_mentioned_p (out, ix86_compare_op1))
11265 tmp = gen_reg_rtx (mode);
11267 if (mode == DImode)
11268 emit_insn (gen_x86_movdicc_0_m1_rex64 (tmp, compare_op));
11270 emit_insn (gen_x86_movsicc_0_m1 (gen_lowpart (SImode, tmp), compare_op));
11274 if (code == GT || code == GE)
11275 code = reverse_condition (code);
11278 HOST_WIDE_INT tmp = ct;
11283 tmp = emit_store_flag (tmp, code, ix86_compare_op0,
11284 ix86_compare_op1, VOIDmode, 0, -1);
11297 tmp = expand_simple_binop (mode, PLUS,
11299 copy_rtx (tmp), 1, OPTAB_DIRECT);
11310 tmp = expand_simple_binop (mode, IOR,
11312 copy_rtx (tmp), 1, OPTAB_DIRECT);
11314 else if (diff == -1 && ct)
11324 tmp = expand_simple_unop (mode, NOT, tmp, copy_rtx (tmp), 1);
11326 tmp = expand_simple_binop (mode, PLUS,
11327 copy_rtx (tmp), GEN_INT (cf),
11328 copy_rtx (tmp), 1, OPTAB_DIRECT);
11336 * andl cf - ct, dest
11346 tmp = expand_simple_unop (mode, NOT, tmp, copy_rtx (tmp), 1);
11349 tmp = expand_simple_binop (mode, AND,
11351 gen_int_mode (cf - ct, mode),
11352 copy_rtx (tmp), 1, OPTAB_DIRECT);
11354 tmp = expand_simple_binop (mode, PLUS,
11355 copy_rtx (tmp), GEN_INT (ct),
11356 copy_rtx (tmp), 1, OPTAB_DIRECT);
11359 if (!rtx_equal_p (tmp, out))
11360 emit_move_insn (copy_rtx (out), copy_rtx (tmp));
11362 return 1; /* DONE */
11368 tmp = ct, ct = cf, cf = tmp;
11370 if (FLOAT_MODE_P (GET_MODE (ix86_compare_op0)))
11372 /* We may be reversing unordered compare to normal compare, that
11373 is not valid in general (we may convert non-trapping condition
11374 to trapping one), however on i386 we currently emit all
11375 comparisons unordered. */
11376 compare_code = reverse_condition_maybe_unordered (compare_code);
11377 code = reverse_condition_maybe_unordered (code);
11381 compare_code = reverse_condition (compare_code);
11382 code = reverse_condition (code);
11386 compare_code = UNKNOWN;
11387 if (GET_MODE_CLASS (GET_MODE (ix86_compare_op0)) == MODE_INT
11388 && GET_CODE (ix86_compare_op1) == CONST_INT)
11390 if (ix86_compare_op1 == const0_rtx
11391 && (code == LT || code == GE))
11392 compare_code = code;
11393 else if (ix86_compare_op1 == constm1_rtx)
11397 else if (code == GT)
11402 /* Optimize dest = (op0 < 0) ? -1 : cf. */
11403 if (compare_code != UNKNOWN
11404 && GET_MODE (ix86_compare_op0) == GET_MODE (out)
11405 && (cf == -1 || ct == -1))
11407 /* If lea code below could be used, only optimize
11408 if it results in a 2 insn sequence. */
11410 if (! (diff == 1 || diff == 2 || diff == 4 || diff == 8
11411 || diff == 3 || diff == 5 || diff == 9)
11412 || (compare_code == LT && ct == -1)
11413 || (compare_code == GE && cf == -1))
11416 * notl op1 (if necessary)
11424 code = reverse_condition (code);
11427 out = emit_store_flag (out, code, ix86_compare_op0,
11428 ix86_compare_op1, VOIDmode, 0, -1);
11430 out = expand_simple_binop (mode, IOR,
11432 out, 1, OPTAB_DIRECT);
11433 if (out != operands[0])
11434 emit_move_insn (operands[0], out);
11436 return 1; /* DONE */
11441 if ((diff == 1 || diff == 2 || diff == 4 || diff == 8
11442 || diff == 3 || diff == 5 || diff == 9)
11443 && ((mode != QImode && mode != HImode) || !TARGET_PARTIAL_REG_STALL)
11445 || x86_64_immediate_operand (GEN_INT (cf), VOIDmode)))
11451 * lea cf(dest*(ct-cf)),dest
11455 * This also catches the degenerate setcc-only case.
11461 out = emit_store_flag (out, code, ix86_compare_op0,
11462 ix86_compare_op1, VOIDmode, 0, 1);
11465 /* On x86_64 the lea instruction operates on Pmode, so we need
11466 to get arithmetics done in proper mode to match. */
11468 tmp = copy_rtx (out);
11472 out1 = copy_rtx (out);
11473 tmp = gen_rtx_MULT (mode, out1, GEN_INT (diff & ~1));
11477 tmp = gen_rtx_PLUS (mode, tmp, out1);
11483 tmp = gen_rtx_PLUS (mode, tmp, GEN_INT (cf));
11486 if (!rtx_equal_p (tmp, out))
11489 out = force_operand (tmp, copy_rtx (out));
11491 emit_insn (gen_rtx_SET (VOIDmode, copy_rtx (out), copy_rtx (tmp)));
11493 if (!rtx_equal_p (out, operands[0]))
11494 emit_move_insn (operands[0], copy_rtx (out));
11496 return 1; /* DONE */
11500 * General case: Jumpful:
11501 * xorl dest,dest cmpl op1, op2
11502 * cmpl op1, op2 movl ct, dest
11503 * setcc dest jcc 1f
11504 * decl dest movl cf, dest
11505 * andl (cf-ct),dest 1:
11508 * Size 20. Size 14.
11510 * This is reasonably steep, but branch mispredict costs are
11511 * high on modern cpus, so consider failing only if optimizing
11515 if ((!TARGET_CMOVE || (mode == QImode && TARGET_PARTIAL_REG_STALL))
11516 && BRANCH_COST >= 2)
11522 if (FLOAT_MODE_P (GET_MODE (ix86_compare_op0)))
11523 /* We may be reversing unordered compare to normal compare,
11524 that is not valid in general (we may convert non-trapping
11525 condition to trapping one), however on i386 we currently
11526 emit all comparisons unordered. */
11527 code = reverse_condition_maybe_unordered (code);
11530 code = reverse_condition (code);
11531 if (compare_code != UNKNOWN)
11532 compare_code = reverse_condition (compare_code);
11536 if (compare_code != UNKNOWN)
11538 /* notl op1 (if needed)
11543 For x < 0 (resp. x <= -1) there will be no notl,
11544 so if possible swap the constants to get rid of the
11546 True/false will be -1/0 while code below (store flag
11547 followed by decrement) is 0/-1, so the constants need
11548 to be exchanged once more. */
11550 if (compare_code == GE || !cf)
11552 code = reverse_condition (code);
11557 HOST_WIDE_INT tmp = cf;
11562 out = emit_store_flag (out, code, ix86_compare_op0,
11563 ix86_compare_op1, VOIDmode, 0, -1);
11567 out = emit_store_flag (out, code, ix86_compare_op0,
11568 ix86_compare_op1, VOIDmode, 0, 1);
11570 out = expand_simple_binop (mode, PLUS, copy_rtx (out), constm1_rtx,
11571 copy_rtx (out), 1, OPTAB_DIRECT);
11574 out = expand_simple_binop (mode, AND, copy_rtx (out),
11575 gen_int_mode (cf - ct, mode),
11576 copy_rtx (out), 1, OPTAB_DIRECT);
11578 out = expand_simple_binop (mode, PLUS, copy_rtx (out), GEN_INT (ct),
11579 copy_rtx (out), 1, OPTAB_DIRECT);
11580 if (!rtx_equal_p (out, operands[0]))
11581 emit_move_insn (operands[0], copy_rtx (out));
11583 return 1; /* DONE */
11587 if (!TARGET_CMOVE || (mode == QImode && TARGET_PARTIAL_REG_STALL))
11589 /* Try a few things more with specific constants and a variable. */
11592 rtx var, orig_out, out, tmp;
11594 if (BRANCH_COST <= 2)
11595 return 0; /* FAIL */
11597 /* If one of the two operands is an interesting constant, load a
11598 constant with the above and mask it in with a logical operation. */
11600 if (GET_CODE (operands[2]) == CONST_INT)
11603 if (INTVAL (operands[2]) == 0 && operands[3] != constm1_rtx)
11604 operands[3] = constm1_rtx, op = and_optab;
11605 else if (INTVAL (operands[2]) == -1 && operands[3] != const0_rtx)
11606 operands[3] = const0_rtx, op = ior_optab;
11608 return 0; /* FAIL */
11610 else if (GET_CODE (operands[3]) == CONST_INT)
11613 if (INTVAL (operands[3]) == 0 && operands[2] != constm1_rtx)
11614 operands[2] = constm1_rtx, op = and_optab;
11615 else if (INTVAL (operands[3]) == -1 && operands[3] != const0_rtx)
11616 operands[2] = const0_rtx, op = ior_optab;
11618 return 0; /* FAIL */
11621 return 0; /* FAIL */
11623 orig_out = operands[0];
11624 tmp = gen_reg_rtx (mode);
11627 /* Recurse to get the constant loaded. */
11628 if (ix86_expand_int_movcc (operands) == 0)
11629 return 0; /* FAIL */
11631 /* Mask in the interesting variable. */
11632 out = expand_binop (mode, op, var, tmp, orig_out, 0,
11634 if (!rtx_equal_p (out, orig_out))
11635 emit_move_insn (copy_rtx (orig_out), copy_rtx (out));
11637 return 1; /* DONE */
11641 * For comparison with above,
11651 if (! nonimmediate_operand (operands[2], mode))
11652 operands[2] = force_reg (mode, operands[2]);
11653 if (! nonimmediate_operand (operands[3], mode))
11654 operands[3] = force_reg (mode, operands[3]);
11656 if (bypass_test && reg_overlap_mentioned_p (operands[0], operands[3]))
11658 rtx tmp = gen_reg_rtx (mode);
11659 emit_move_insn (tmp, operands[3]);
11662 if (second_test && reg_overlap_mentioned_p (operands[0], operands[2]))
11664 rtx tmp = gen_reg_rtx (mode);
11665 emit_move_insn (tmp, operands[2]);
11669 if (! register_operand (operands[2], VOIDmode)
11671 || ! register_operand (operands[3], VOIDmode)))
11672 operands[2] = force_reg (mode, operands[2]);
11675 && ! register_operand (operands[3], VOIDmode))
11676 operands[3] = force_reg (mode, operands[3]);
11678 emit_insn (compare_seq);
11679 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
11680 gen_rtx_IF_THEN_ELSE (mode,
11681 compare_op, operands[2],
11684 emit_insn (gen_rtx_SET (VOIDmode, copy_rtx (operands[0]),
11685 gen_rtx_IF_THEN_ELSE (mode,
11687 copy_rtx (operands[3]),
11688 copy_rtx (operands[0]))));
11690 emit_insn (gen_rtx_SET (VOIDmode, copy_rtx (operands[0]),
11691 gen_rtx_IF_THEN_ELSE (mode,
11693 copy_rtx (operands[2]),
11694 copy_rtx (operands[0]))));
11696 return 1; /* DONE */
11699 /* Swap, force into registers, or otherwise massage the two operands
11700 to an sse comparison with a mask result. Thus we differ a bit from
11701 ix86_prepare_fp_compare_args which expects to produce a flags result.
11703 The DEST operand exists to help determine whether to commute commutative
11704 operators. The POP0/POP1 operands are updated in place. The new
11705 comparison code is returned, or UNKNOWN if not implementable. */
11707 static enum rtx_code
11708 ix86_prepare_sse_fp_compare_args (rtx dest, enum rtx_code code,
11709 rtx *pop0, rtx *pop1)
11717 /* We have no LTGT as an operator. We could implement it with
11718 NE & ORDERED, but this requires an extra temporary. It's
11719 not clear that it's worth it. */
11726 /* These are supported directly. */
11733 /* For commutative operators, try to canonicalize the destination
11734 operand to be first in the comparison - this helps reload to
11735 avoid extra moves. */
11736 if (!dest || !rtx_equal_p (dest, *pop1))
11744 /* These are not supported directly. Swap the comparison operands
11745 to transform into something that is supported. */
11749 code = swap_condition (code);
11753 gcc_unreachable ();
11759 /* Detect conditional moves that exactly match min/max operational
11760 semantics. Note that this is IEEE safe, as long as we don't
11761 interchange the operands.
11763 Returns FALSE if this conditional move doesn't match a MIN/MAX,
11764 and TRUE if the operation is successful and instructions are emitted. */
11767 ix86_expand_sse_fp_minmax (rtx dest, enum rtx_code code, rtx cmp_op0,
11768 rtx cmp_op1, rtx if_true, rtx if_false)
11770 enum machine_mode mode;
11776 else if (code == UNGE)
11779 if_true = if_false;
11785 if (rtx_equal_p (cmp_op0, if_true) && rtx_equal_p (cmp_op1, if_false))
11787 else if (rtx_equal_p (cmp_op1, if_true) && rtx_equal_p (cmp_op0, if_false))
11792 mode = GET_MODE (dest);
11794 /* We want to check HONOR_NANS and HONOR_SIGNED_ZEROS here,
11795 but MODE may be a vector mode and thus not appropriate. */
11796 if (!flag_finite_math_only || !flag_unsafe_math_optimizations)
11798 int u = is_min ? UNSPEC_IEEE_MIN : UNSPEC_IEEE_MAX;
11801 if_true = force_reg (mode, if_true);
11802 v = gen_rtvec (2, if_true, if_false);
11803 tmp = gen_rtx_UNSPEC (mode, v, u);
11807 code = is_min ? SMIN : SMAX;
11808 tmp = gen_rtx_fmt_ee (code, mode, if_true, if_false);
11811 emit_insn (gen_rtx_SET (VOIDmode, dest, tmp));
11815 /* Expand an sse vector comparison. Return the register with the result. */
11818 ix86_expand_sse_cmp (rtx dest, enum rtx_code code, rtx cmp_op0, rtx cmp_op1,
11819 rtx op_true, rtx op_false)
11821 enum machine_mode mode = GET_MODE (dest);
11824 cmp_op0 = force_reg (mode, cmp_op0);
11825 if (!nonimmediate_operand (cmp_op1, mode))
11826 cmp_op1 = force_reg (mode, cmp_op1);
11829 || reg_overlap_mentioned_p (dest, op_true)
11830 || reg_overlap_mentioned_p (dest, op_false))
11831 dest = gen_reg_rtx (mode);
11833 x = gen_rtx_fmt_ee (code, mode, cmp_op0, cmp_op1);
11834 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
11839 /* Expand DEST = CMP ? OP_TRUE : OP_FALSE into a sequence of logical
11840 operations. This is used for both scalar and vector conditional moves. */
11843 ix86_expand_sse_movcc (rtx dest, rtx cmp, rtx op_true, rtx op_false)
11845 enum machine_mode mode = GET_MODE (dest);
11848 if (op_false == CONST0_RTX (mode))
11850 op_true = force_reg (mode, op_true);
11851 x = gen_rtx_AND (mode, cmp, op_true);
11852 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
11854 else if (op_true == CONST0_RTX (mode))
11856 op_false = force_reg (mode, op_false);
11857 x = gen_rtx_NOT (mode, cmp);
11858 x = gen_rtx_AND (mode, x, op_false);
11859 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
11863 op_true = force_reg (mode, op_true);
11864 op_false = force_reg (mode, op_false);
11866 t2 = gen_reg_rtx (mode);
11868 t3 = gen_reg_rtx (mode);
11872 x = gen_rtx_AND (mode, op_true, cmp);
11873 emit_insn (gen_rtx_SET (VOIDmode, t2, x));
11875 x = gen_rtx_NOT (mode, cmp);
11876 x = gen_rtx_AND (mode, x, op_false);
11877 emit_insn (gen_rtx_SET (VOIDmode, t3, x));
11879 x = gen_rtx_IOR (mode, t3, t2);
11880 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
11884 /* Expand a floating-point conditional move. Return true if successful. */
11887 ix86_expand_fp_movcc (rtx operands[])
11889 enum machine_mode mode = GET_MODE (operands[0]);
11890 enum rtx_code code = GET_CODE (operands[1]);
11891 rtx tmp, compare_op, second_test, bypass_test;
11893 if (TARGET_SSE_MATH && SSE_FLOAT_MODE_P (mode))
11895 enum machine_mode cmode;
11897 /* Since we've no cmove for sse registers, don't force bad register
11898 allocation just to gain access to it. Deny movcc when the
11899 comparison mode doesn't match the move mode. */
11900 cmode = GET_MODE (ix86_compare_op0);
11901 if (cmode == VOIDmode)
11902 cmode = GET_MODE (ix86_compare_op1);
11906 code = ix86_prepare_sse_fp_compare_args (operands[0], code,
11908 &ix86_compare_op1);
11909 if (code == UNKNOWN)
11912 if (ix86_expand_sse_fp_minmax (operands[0], code, ix86_compare_op0,
11913 ix86_compare_op1, operands[2],
11917 tmp = ix86_expand_sse_cmp (operands[0], code, ix86_compare_op0,
11918 ix86_compare_op1, operands[2], operands[3]);
11919 ix86_expand_sse_movcc (operands[0], tmp, operands[2], operands[3]);
11923 /* The floating point conditional move instructions don't directly
11924 support conditions resulting from a signed integer comparison. */
11926 compare_op = ix86_expand_compare (code, &second_test, &bypass_test);
11928 /* The floating point conditional move instructions don't directly
11929 support signed integer comparisons. */
11931 if (!fcmov_comparison_operator (compare_op, VOIDmode))
11933 gcc_assert (!second_test && !bypass_test);
11934 tmp = gen_reg_rtx (QImode);
11935 ix86_expand_setcc (code, tmp);
11937 ix86_compare_op0 = tmp;
11938 ix86_compare_op1 = const0_rtx;
11939 compare_op = ix86_expand_compare (code, &second_test, &bypass_test);
11941 if (bypass_test && reg_overlap_mentioned_p (operands[0], operands[3]))
11943 tmp = gen_reg_rtx (mode);
11944 emit_move_insn (tmp, operands[3]);
11947 if (second_test && reg_overlap_mentioned_p (operands[0], operands[2]))
11949 tmp = gen_reg_rtx (mode);
11950 emit_move_insn (tmp, operands[2]);
11954 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
11955 gen_rtx_IF_THEN_ELSE (mode, compare_op,
11956 operands[2], operands[3])));
11958 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
11959 gen_rtx_IF_THEN_ELSE (mode, bypass_test,
11960 operands[3], operands[0])));
11962 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
11963 gen_rtx_IF_THEN_ELSE (mode, second_test,
11964 operands[2], operands[0])));
11969 /* Expand a floating-point vector conditional move; a vcond operation
11970 rather than a movcc operation. */
11973 ix86_expand_fp_vcond (rtx operands[])
11975 enum rtx_code code = GET_CODE (operands[3]);
11978 code = ix86_prepare_sse_fp_compare_args (operands[0], code,
11979 &operands[4], &operands[5]);
11980 if (code == UNKNOWN)
11983 if (ix86_expand_sse_fp_minmax (operands[0], code, operands[4],
11984 operands[5], operands[1], operands[2]))
11987 cmp = ix86_expand_sse_cmp (operands[0], code, operands[4], operands[5],
11988 operands[1], operands[2]);
11989 ix86_expand_sse_movcc (operands[0], cmp, operands[1], operands[2]);
11993 /* Expand a signed integral vector conditional move. */
11996 ix86_expand_int_vcond (rtx operands[])
11998 enum machine_mode mode = GET_MODE (operands[0]);
11999 enum rtx_code code = GET_CODE (operands[3]);
12000 bool negate = false;
12003 cop0 = operands[4];
12004 cop1 = operands[5];
12006 /* Canonicalize the comparison to EQ, GT, GTU. */
12017 code = reverse_condition (code);
12023 code = reverse_condition (code);
12029 code = swap_condition (code);
12030 x = cop0, cop0 = cop1, cop1 = x;
12034 gcc_unreachable ();
12037 /* Unsigned parallel compare is not supported by the hardware. Play some
12038 tricks to turn this into a signed comparison against 0. */
12041 cop0 = force_reg (mode, cop0);
12049 /* Perform a parallel modulo subtraction. */
12050 t1 = gen_reg_rtx (mode);
12051 emit_insn (gen_subv4si3 (t1, cop0, cop1));
12053 /* Extract the original sign bit of op0. */
12054 mask = GEN_INT (-0x80000000);
12055 mask = gen_rtx_CONST_VECTOR (mode,
12056 gen_rtvec (4, mask, mask, mask, mask));
12057 mask = force_reg (mode, mask);
12058 t2 = gen_reg_rtx (mode);
12059 emit_insn (gen_andv4si3 (t2, cop0, mask));
12061 /* XOR it back into the result of the subtraction. This results
12062 in the sign bit set iff we saw unsigned underflow. */
12063 x = gen_reg_rtx (mode);
12064 emit_insn (gen_xorv4si3 (x, t1, t2));
12072 /* Perform a parallel unsigned saturating subtraction. */
12073 x = gen_reg_rtx (mode);
12074 emit_insn (gen_rtx_SET (VOIDmode, x,
12075 gen_rtx_US_MINUS (mode, cop0, cop1)));
12082 gcc_unreachable ();
12086 cop1 = CONST0_RTX (mode);
12089 x = ix86_expand_sse_cmp (operands[0], code, cop0, cop1,
12090 operands[1+negate], operands[2-negate]);
12092 ix86_expand_sse_movcc (operands[0], x, operands[1+negate],
12093 operands[2-negate]);
12097 /* Expand conditional increment or decrement using adb/sbb instructions.
12098 The default case using setcc followed by the conditional move can be
12099 done by generic code. */
12101 ix86_expand_int_addcc (rtx operands[])
12103 enum rtx_code code = GET_CODE (operands[1]);
12105 rtx val = const0_rtx;
12106 bool fpcmp = false;
12107 enum machine_mode mode = GET_MODE (operands[0]);
12109 if (operands[3] != const1_rtx
12110 && operands[3] != constm1_rtx)
12112 if (!ix86_expand_carry_flag_compare (code, ix86_compare_op0,
12113 ix86_compare_op1, &compare_op))
12115 code = GET_CODE (compare_op);
12117 if (GET_MODE (XEXP (compare_op, 0)) == CCFPmode
12118 || GET_MODE (XEXP (compare_op, 0)) == CCFPUmode)
12121 code = ix86_fp_compare_code_to_integer (code);
12128 PUT_CODE (compare_op,
12129 reverse_condition_maybe_unordered
12130 (GET_CODE (compare_op)));
12132 PUT_CODE (compare_op, reverse_condition (GET_CODE (compare_op)));
12134 PUT_MODE (compare_op, mode);
12136 /* Construct either adc or sbb insn. */
12137 if ((code == LTU) == (operands[3] == constm1_rtx))
12139 switch (GET_MODE (operands[0]))
12142 emit_insn (gen_subqi3_carry (operands[0], operands[2], val, compare_op));
12145 emit_insn (gen_subhi3_carry (operands[0], operands[2], val, compare_op));
12148 emit_insn (gen_subsi3_carry (operands[0], operands[2], val, compare_op));
12151 emit_insn (gen_subdi3_carry_rex64 (operands[0], operands[2], val, compare_op));
12154 gcc_unreachable ();
12159 switch (GET_MODE (operands[0]))
12162 emit_insn (gen_addqi3_carry (operands[0], operands[2], val, compare_op));
12165 emit_insn (gen_addhi3_carry (operands[0], operands[2], val, compare_op));
12168 emit_insn (gen_addsi3_carry (operands[0], operands[2], val, compare_op));
12171 emit_insn (gen_adddi3_carry_rex64 (operands[0], operands[2], val, compare_op));
12174 gcc_unreachable ();
12177 return 1; /* DONE */
12181 /* Split operands 0 and 1 into SImode parts. Similar to split_di, but
12182 works for floating pointer parameters and nonoffsetable memories.
12183 For pushes, it returns just stack offsets; the values will be saved
12184 in the right order. Maximally three parts are generated. */
12187 ix86_split_to_parts (rtx operand, rtx *parts, enum machine_mode mode)
12192 size = mode==XFmode ? 3 : GET_MODE_SIZE (mode) / 4;
12194 size = (GET_MODE_SIZE (mode) + 4) / 8;
12196 gcc_assert (GET_CODE (operand) != REG || !MMX_REGNO_P (REGNO (operand)));
12197 gcc_assert (size >= 2 && size <= 3);
12199 /* Optimize constant pool reference to immediates. This is used by fp
12200 moves, that force all constants to memory to allow combining. */
12201 if (GET_CODE (operand) == MEM && MEM_READONLY_P (operand))
12203 rtx tmp = maybe_get_pool_constant (operand);
12208 if (GET_CODE (operand) == MEM && !offsettable_memref_p (operand))
12210 /* The only non-offsetable memories we handle are pushes. */
12211 int ok = push_operand (operand, VOIDmode);
12215 operand = copy_rtx (operand);
12216 PUT_MODE (operand, Pmode);
12217 parts[0] = parts[1] = parts[2] = operand;
12221 if (GET_CODE (operand) == CONST_VECTOR)
12223 enum machine_mode imode = int_mode_for_mode (mode);
12224 /* Caution: if we looked through a constant pool memory above,
12225 the operand may actually have a different mode now. That's
12226 ok, since we want to pun this all the way back to an integer. */
12227 operand = simplify_subreg (imode, operand, GET_MODE (operand), 0);
12228 gcc_assert (operand != NULL);
12234 if (mode == DImode)
12235 split_di (&operand, 1, &parts[0], &parts[1]);
12238 if (REG_P (operand))
12240 gcc_assert (reload_completed);
12241 parts[0] = gen_rtx_REG (SImode, REGNO (operand) + 0);
12242 parts[1] = gen_rtx_REG (SImode, REGNO (operand) + 1);
12244 parts[2] = gen_rtx_REG (SImode, REGNO (operand) + 2);
12246 else if (offsettable_memref_p (operand))
12248 operand = adjust_address (operand, SImode, 0);
12249 parts[0] = operand;
12250 parts[1] = adjust_address (operand, SImode, 4);
12252 parts[2] = adjust_address (operand, SImode, 8);
12254 else if (GET_CODE (operand) == CONST_DOUBLE)
12259 REAL_VALUE_FROM_CONST_DOUBLE (r, operand);
12263 REAL_VALUE_TO_TARGET_LONG_DOUBLE (r, l);
12264 parts[2] = gen_int_mode (l[2], SImode);
12267 REAL_VALUE_TO_TARGET_DOUBLE (r, l);
12270 gcc_unreachable ();
12272 parts[1] = gen_int_mode (l[1], SImode);
12273 parts[0] = gen_int_mode (l[0], SImode);
12276 gcc_unreachable ();
12281 if (mode == TImode)
12282 split_ti (&operand, 1, &parts[0], &parts[1]);
12283 if (mode == XFmode || mode == TFmode)
12285 enum machine_mode upper_mode = mode==XFmode ? SImode : DImode;
12286 if (REG_P (operand))
12288 gcc_assert (reload_completed);
12289 parts[0] = gen_rtx_REG (DImode, REGNO (operand) + 0);
12290 parts[1] = gen_rtx_REG (upper_mode, REGNO (operand) + 1);
12292 else if (offsettable_memref_p (operand))
12294 operand = adjust_address (operand, DImode, 0);
12295 parts[0] = operand;
12296 parts[1] = adjust_address (operand, upper_mode, 8);
12298 else if (GET_CODE (operand) == CONST_DOUBLE)
12303 REAL_VALUE_FROM_CONST_DOUBLE (r, operand);
12304 real_to_target (l, &r, mode);
12306 /* Do not use shift by 32 to avoid warning on 32bit systems. */
12307 if (HOST_BITS_PER_WIDE_INT >= 64)
12310 ((l[0] & (((HOST_WIDE_INT) 2 << 31) - 1))
12311 + ((((HOST_WIDE_INT) l[1]) << 31) << 1),
12314 parts[0] = immed_double_const (l[0], l[1], DImode);
12316 if (upper_mode == SImode)
12317 parts[1] = gen_int_mode (l[2], SImode);
12318 else if (HOST_BITS_PER_WIDE_INT >= 64)
12321 ((l[2] & (((HOST_WIDE_INT) 2 << 31) - 1))
12322 + ((((HOST_WIDE_INT) l[3]) << 31) << 1),
12325 parts[1] = immed_double_const (l[2], l[3], DImode);
12328 gcc_unreachable ();
12335 /* Emit insns to perform a move or push of DI, DF, and XF values.
12336 Return false when normal moves are needed; true when all required
12337 insns have been emitted. Operands 2-4 contain the input values
12338 int the correct order; operands 5-7 contain the output values. */
12341 ix86_split_long_move (rtx operands[])
12346 int collisions = 0;
12347 enum machine_mode mode = GET_MODE (operands[0]);
12349 /* The DFmode expanders may ask us to move double.
12350 For 64bit target this is single move. By hiding the fact
12351 here we simplify i386.md splitters. */
12352 if (GET_MODE_SIZE (GET_MODE (operands[0])) == 8 && TARGET_64BIT)
12354 /* Optimize constant pool reference to immediates. This is used by
12355 fp moves, that force all constants to memory to allow combining. */
12357 if (GET_CODE (operands[1]) == MEM
12358 && GET_CODE (XEXP (operands[1], 0)) == SYMBOL_REF
12359 && CONSTANT_POOL_ADDRESS_P (XEXP (operands[1], 0)))
12360 operands[1] = get_pool_constant (XEXP (operands[1], 0));
12361 if (push_operand (operands[0], VOIDmode))
12363 operands[0] = copy_rtx (operands[0]);
12364 PUT_MODE (operands[0], Pmode);
12367 operands[0] = gen_lowpart (DImode, operands[0]);
12368 operands[1] = gen_lowpart (DImode, operands[1]);
12369 emit_move_insn (operands[0], operands[1]);
12373 /* The only non-offsettable memory we handle is push. */
12374 if (push_operand (operands[0], VOIDmode))
12377 gcc_assert (GET_CODE (operands[0]) != MEM
12378 || offsettable_memref_p (operands[0]));
12380 nparts = ix86_split_to_parts (operands[1], part[1], GET_MODE (operands[0]));
12381 ix86_split_to_parts (operands[0], part[0], GET_MODE (operands[0]));
12383 /* When emitting push, take care for source operands on the stack. */
12384 if (push && GET_CODE (operands[1]) == MEM
12385 && reg_overlap_mentioned_p (stack_pointer_rtx, operands[1]))
12388 part[1][1] = change_address (part[1][1], GET_MODE (part[1][1]),
12389 XEXP (part[1][2], 0));
12390 part[1][0] = change_address (part[1][0], GET_MODE (part[1][0]),
12391 XEXP (part[1][1], 0));
12394 /* We need to do copy in the right order in case an address register
12395 of the source overlaps the destination. */
12396 if (REG_P (part[0][0]) && GET_CODE (part[1][0]) == MEM)
12398 if (reg_overlap_mentioned_p (part[0][0], XEXP (part[1][0], 0)))
12400 if (reg_overlap_mentioned_p (part[0][1], XEXP (part[1][0], 0)))
12403 && reg_overlap_mentioned_p (part[0][2], XEXP (part[1][0], 0)))
12406 /* Collision in the middle part can be handled by reordering. */
12407 if (collisions == 1 && nparts == 3
12408 && reg_overlap_mentioned_p (part[0][1], XEXP (part[1][0], 0)))
12411 tmp = part[0][1]; part[0][1] = part[0][2]; part[0][2] = tmp;
12412 tmp = part[1][1]; part[1][1] = part[1][2]; part[1][2] = tmp;
12415 /* If there are more collisions, we can't handle it by reordering.
12416 Do an lea to the last part and use only one colliding move. */
12417 else if (collisions > 1)
12423 base = part[0][nparts - 1];
12425 /* Handle the case when the last part isn't valid for lea.
12426 Happens in 64-bit mode storing the 12-byte XFmode. */
12427 if (GET_MODE (base) != Pmode)
12428 base = gen_rtx_REG (Pmode, REGNO (base));
12430 emit_insn (gen_rtx_SET (VOIDmode, base, XEXP (part[1][0], 0)));
12431 part[1][0] = replace_equiv_address (part[1][0], base);
12432 part[1][1] = replace_equiv_address (part[1][1],
12433 plus_constant (base, UNITS_PER_WORD));
12435 part[1][2] = replace_equiv_address (part[1][2],
12436 plus_constant (base, 8));
12446 if (TARGET_128BIT_LONG_DOUBLE && mode == XFmode)
12447 emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx, GEN_INT (-4)));
12448 emit_move_insn (part[0][2], part[1][2]);
12453 /* In 64bit mode we don't have 32bit push available. In case this is
12454 register, it is OK - we will just use larger counterpart. We also
12455 retype memory - these comes from attempt to avoid REX prefix on
12456 moving of second half of TFmode value. */
12457 if (GET_MODE (part[1][1]) == SImode)
12459 switch (GET_CODE (part[1][1]))
12462 part[1][1] = adjust_address (part[1][1], DImode, 0);
12466 part[1][1] = gen_rtx_REG (DImode, REGNO (part[1][1]));
12470 gcc_unreachable ();
12473 if (GET_MODE (part[1][0]) == SImode)
12474 part[1][0] = part[1][1];
12477 emit_move_insn (part[0][1], part[1][1]);
12478 emit_move_insn (part[0][0], part[1][0]);
12482 /* Choose correct order to not overwrite the source before it is copied. */
12483 if ((REG_P (part[0][0])
12484 && REG_P (part[1][1])
12485 && (REGNO (part[0][0]) == REGNO (part[1][1])
12487 && REGNO (part[0][0]) == REGNO (part[1][2]))))
12489 && reg_overlap_mentioned_p (part[0][0], XEXP (part[1][0], 0))))
12493 operands[2] = part[0][2];
12494 operands[3] = part[0][1];
12495 operands[4] = part[0][0];
12496 operands[5] = part[1][2];
12497 operands[6] = part[1][1];
12498 operands[7] = part[1][0];
12502 operands[2] = part[0][1];
12503 operands[3] = part[0][0];
12504 operands[5] = part[1][1];
12505 operands[6] = part[1][0];
12512 operands[2] = part[0][0];
12513 operands[3] = part[0][1];
12514 operands[4] = part[0][2];
12515 operands[5] = part[1][0];
12516 operands[6] = part[1][1];
12517 operands[7] = part[1][2];
12521 operands[2] = part[0][0];
12522 operands[3] = part[0][1];
12523 operands[5] = part[1][0];
12524 operands[6] = part[1][1];
12528 /* If optimizing for size, attempt to locally unCSE nonzero constants. */
12531 if (GET_CODE (operands[5]) == CONST_INT
12532 && operands[5] != const0_rtx
12533 && REG_P (operands[2]))
12535 if (GET_CODE (operands[6]) == CONST_INT
12536 && INTVAL (operands[6]) == INTVAL (operands[5]))
12537 operands[6] = operands[2];
12540 && GET_CODE (operands[7]) == CONST_INT
12541 && INTVAL (operands[7]) == INTVAL (operands[5]))
12542 operands[7] = operands[2];
12546 && GET_CODE (operands[6]) == CONST_INT
12547 && operands[6] != const0_rtx
12548 && REG_P (operands[3])
12549 && GET_CODE (operands[7]) == CONST_INT
12550 && INTVAL (operands[7]) == INTVAL (operands[6]))
12551 operands[7] = operands[3];
12554 emit_move_insn (operands[2], operands[5]);
12555 emit_move_insn (operands[3], operands[6]);
12557 emit_move_insn (operands[4], operands[7]);
12562 /* Helper function of ix86_split_ashl used to generate an SImode/DImode
12563 left shift by a constant, either using a single shift or
12564 a sequence of add instructions. */
12567 ix86_expand_ashl_const (rtx operand, int count, enum machine_mode mode)
12571 emit_insn ((mode == DImode
12573 : gen_adddi3) (operand, operand, operand));
12575 else if (!optimize_size
12576 && count * ix86_cost->add <= ix86_cost->shift_const)
12579 for (i=0; i<count; i++)
12581 emit_insn ((mode == DImode
12583 : gen_adddi3) (operand, operand, operand));
12587 emit_insn ((mode == DImode
12589 : gen_ashldi3) (operand, operand, GEN_INT (count)));
12593 ix86_split_ashl (rtx *operands, rtx scratch, enum machine_mode mode)
12595 rtx low[2], high[2];
12597 const int single_width = mode == DImode ? 32 : 64;
12599 if (GET_CODE (operands[2]) == CONST_INT)
12601 (mode == DImode ? split_di : split_ti) (operands, 2, low, high);
12602 count = INTVAL (operands[2]) & (single_width * 2 - 1);
12604 if (count >= single_width)
12606 emit_move_insn (high[0], low[1]);
12607 emit_move_insn (low[0], const0_rtx);
12609 if (count > single_width)
12610 ix86_expand_ashl_const (high[0], count - single_width, mode);
12614 if (!rtx_equal_p (operands[0], operands[1]))
12615 emit_move_insn (operands[0], operands[1]);
12616 emit_insn ((mode == DImode
12618 : gen_x86_64_shld) (high[0], low[0], GEN_INT (count)));
12619 ix86_expand_ashl_const (low[0], count, mode);
12624 (mode == DImode ? split_di : split_ti) (operands, 1, low, high);
12626 if (operands[1] == const1_rtx)
12628 /* Assuming we've chosen a QImode capable registers, then 1 << N
12629 can be done with two 32/64-bit shifts, no branches, no cmoves. */
12630 if (ANY_QI_REG_P (low[0]) && ANY_QI_REG_P (high[0]))
12632 rtx s, d, flags = gen_rtx_REG (CCZmode, FLAGS_REG);
12634 ix86_expand_clear (low[0]);
12635 ix86_expand_clear (high[0]);
12636 emit_insn (gen_testqi_ccz_1 (operands[2], GEN_INT (single_width)));
12638 d = gen_lowpart (QImode, low[0]);
12639 d = gen_rtx_STRICT_LOW_PART (VOIDmode, d);
12640 s = gen_rtx_EQ (QImode, flags, const0_rtx);
12641 emit_insn (gen_rtx_SET (VOIDmode, d, s));
12643 d = gen_lowpart (QImode, high[0]);
12644 d = gen_rtx_STRICT_LOW_PART (VOIDmode, d);
12645 s = gen_rtx_NE (QImode, flags, const0_rtx);
12646 emit_insn (gen_rtx_SET (VOIDmode, d, s));
12649 /* Otherwise, we can get the same results by manually performing
12650 a bit extract operation on bit 5/6, and then performing the two
12651 shifts. The two methods of getting 0/1 into low/high are exactly
12652 the same size. Avoiding the shift in the bit extract case helps
12653 pentium4 a bit; no one else seems to care much either way. */
12658 if (TARGET_PARTIAL_REG_STALL && !optimize_size)
12659 x = gen_rtx_ZERO_EXTEND (mode == DImode ? SImode : DImode, operands[2]);
12661 x = gen_lowpart (mode == DImode ? SImode : DImode, operands[2]);
12662 emit_insn (gen_rtx_SET (VOIDmode, high[0], x));
12664 emit_insn ((mode == DImode
12666 : gen_lshrdi3) (high[0], high[0], GEN_INT (mode == DImode ? 5 : 6)));
12667 emit_insn ((mode == DImode
12669 : gen_anddi3) (high[0], high[0], GEN_INT (1)));
12670 emit_move_insn (low[0], high[0]);
12671 emit_insn ((mode == DImode
12673 : gen_xordi3) (low[0], low[0], GEN_INT (1)));
12676 emit_insn ((mode == DImode
12678 : gen_ashldi3) (low[0], low[0], operands[2]));
12679 emit_insn ((mode == DImode
12681 : gen_ashldi3) (high[0], high[0], operands[2]));
12685 if (operands[1] == constm1_rtx)
12687 /* For -1 << N, we can avoid the shld instruction, because we
12688 know that we're shifting 0...31/63 ones into a -1. */
12689 emit_move_insn (low[0], constm1_rtx);
12691 emit_move_insn (high[0], low[0]);
12693 emit_move_insn (high[0], constm1_rtx);
12697 if (!rtx_equal_p (operands[0], operands[1]))
12698 emit_move_insn (operands[0], operands[1]);
12700 (mode == DImode ? split_di : split_ti) (operands, 1, low, high);
12701 emit_insn ((mode == DImode
12703 : gen_x86_64_shld) (high[0], low[0], operands[2]));
12706 emit_insn ((mode == DImode ? gen_ashlsi3 : gen_ashldi3) (low[0], low[0], operands[2]));
12708 if (TARGET_CMOVE && scratch)
12710 ix86_expand_clear (scratch);
12711 emit_insn ((mode == DImode
12712 ? gen_x86_shift_adj_1
12713 : gen_x86_64_shift_adj) (high[0], low[0], operands[2], scratch));
12716 emit_insn (gen_x86_shift_adj_2 (high[0], low[0], operands[2]));
12720 ix86_split_ashr (rtx *operands, rtx scratch, enum machine_mode mode)
12722 rtx low[2], high[2];
12724 const int single_width = mode == DImode ? 32 : 64;
12726 if (GET_CODE (operands[2]) == CONST_INT)
12728 (mode == DImode ? split_di : split_ti) (operands, 2, low, high);
12729 count = INTVAL (operands[2]) & (single_width * 2 - 1);
12731 if (count == single_width * 2 - 1)
12733 emit_move_insn (high[0], high[1]);
12734 emit_insn ((mode == DImode
12736 : gen_ashrdi3) (high[0], high[0],
12737 GEN_INT (single_width - 1)));
12738 emit_move_insn (low[0], high[0]);
12741 else if (count >= single_width)
12743 emit_move_insn (low[0], high[1]);
12744 emit_move_insn (high[0], low[0]);
12745 emit_insn ((mode == DImode
12747 : gen_ashrdi3) (high[0], high[0],
12748 GEN_INT (single_width - 1)));
12749 if (count > single_width)
12750 emit_insn ((mode == DImode
12752 : gen_ashrdi3) (low[0], low[0],
12753 GEN_INT (count - single_width)));
12757 if (!rtx_equal_p (operands[0], operands[1]))
12758 emit_move_insn (operands[0], operands[1]);
12759 emit_insn ((mode == DImode
12761 : gen_x86_64_shrd) (low[0], high[0], GEN_INT (count)));
12762 emit_insn ((mode == DImode
12764 : gen_ashrdi3) (high[0], high[0], GEN_INT (count)));
12769 if (!rtx_equal_p (operands[0], operands[1]))
12770 emit_move_insn (operands[0], operands[1]);
12772 (mode == DImode ? split_di : split_ti) (operands, 1, low, high);
12774 emit_insn ((mode == DImode
12776 : gen_x86_64_shrd) (low[0], high[0], operands[2]));
12777 emit_insn ((mode == DImode
12779 : gen_ashrdi3) (high[0], high[0], operands[2]));
12781 if (TARGET_CMOVE && scratch)
12783 emit_move_insn (scratch, high[0]);
12784 emit_insn ((mode == DImode
12786 : gen_ashrdi3) (scratch, scratch,
12787 GEN_INT (single_width - 1)));
12788 emit_insn ((mode == DImode
12789 ? gen_x86_shift_adj_1
12790 : gen_x86_64_shift_adj) (low[0], high[0], operands[2],
12794 emit_insn (gen_x86_shift_adj_3 (low[0], high[0], operands[2]));
12799 ix86_split_lshr (rtx *operands, rtx scratch, enum machine_mode mode)
12801 rtx low[2], high[2];
12803 const int single_width = mode == DImode ? 32 : 64;
12805 if (GET_CODE (operands[2]) == CONST_INT)
12807 (mode == DImode ? split_di : split_ti) (operands, 2, low, high);
12808 count = INTVAL (operands[2]) & (single_width * 2 - 1);
12810 if (count >= single_width)
12812 emit_move_insn (low[0], high[1]);
12813 ix86_expand_clear (high[0]);
12815 if (count > single_width)
12816 emit_insn ((mode == DImode
12818 : gen_lshrdi3) (low[0], low[0],
12819 GEN_INT (count - single_width)));
12823 if (!rtx_equal_p (operands[0], operands[1]))
12824 emit_move_insn (operands[0], operands[1]);
12825 emit_insn ((mode == DImode
12827 : gen_x86_64_shrd) (low[0], high[0], GEN_INT (count)));
12828 emit_insn ((mode == DImode
12830 : gen_lshrdi3) (high[0], high[0], GEN_INT (count)));
12835 if (!rtx_equal_p (operands[0], operands[1]))
12836 emit_move_insn (operands[0], operands[1]);
12838 (mode == DImode ? split_di : split_ti) (operands, 1, low, high);
12840 emit_insn ((mode == DImode
12842 : gen_x86_64_shrd) (low[0], high[0], operands[2]));
12843 emit_insn ((mode == DImode
12845 : gen_lshrdi3) (high[0], high[0], operands[2]));
12847 /* Heh. By reversing the arguments, we can reuse this pattern. */
12848 if (TARGET_CMOVE && scratch)
12850 ix86_expand_clear (scratch);
12851 emit_insn ((mode == DImode
12852 ? gen_x86_shift_adj_1
12853 : gen_x86_64_shift_adj) (low[0], high[0], operands[2],
12857 emit_insn (gen_x86_shift_adj_2 (low[0], high[0], operands[2]));
12861 /* Helper function for the string operations below. Dest VARIABLE whether
12862 it is aligned to VALUE bytes. If true, jump to the label. */
12864 ix86_expand_aligntest (rtx variable, int value)
12866 rtx label = gen_label_rtx ();
12867 rtx tmpcount = gen_reg_rtx (GET_MODE (variable));
12868 if (GET_MODE (variable) == DImode)
12869 emit_insn (gen_anddi3 (tmpcount, variable, GEN_INT (value)));
12871 emit_insn (gen_andsi3 (tmpcount, variable, GEN_INT (value)));
12872 emit_cmp_and_jump_insns (tmpcount, const0_rtx, EQ, 0, GET_MODE (variable),
12877 /* Adjust COUNTER by the VALUE. */
12879 ix86_adjust_counter (rtx countreg, HOST_WIDE_INT value)
12881 if (GET_MODE (countreg) == DImode)
12882 emit_insn (gen_adddi3 (countreg, countreg, GEN_INT (-value)));
12884 emit_insn (gen_addsi3 (countreg, countreg, GEN_INT (-value)));
12887 /* Zero extend possibly SImode EXP to Pmode register. */
12889 ix86_zero_extend_to_Pmode (rtx exp)
12892 if (GET_MODE (exp) == VOIDmode)
12893 return force_reg (Pmode, exp);
12894 if (GET_MODE (exp) == Pmode)
12895 return copy_to_mode_reg (Pmode, exp);
12896 r = gen_reg_rtx (Pmode);
12897 emit_insn (gen_zero_extendsidi2 (r, exp));
12901 /* Expand string move (memcpy) operation. Use i386 string operations when
12902 profitable. expand_clrmem contains similar code. */
12904 ix86_expand_movmem (rtx dst, rtx src, rtx count_exp, rtx align_exp)
12906 rtx srcreg, destreg, countreg, srcexp, destexp;
12907 enum machine_mode counter_mode;
12908 HOST_WIDE_INT align = 0;
12909 unsigned HOST_WIDE_INT count = 0;
12911 if (GET_CODE (align_exp) == CONST_INT)
12912 align = INTVAL (align_exp);
12914 /* Can't use any of this if the user has appropriated esi or edi. */
12915 if (global_regs[4] || global_regs[5])
12918 /* This simple hack avoids all inlining code and simplifies code below. */
12919 if (!TARGET_ALIGN_STRINGOPS)
12922 if (GET_CODE (count_exp) == CONST_INT)
12924 count = INTVAL (count_exp);
12925 if (!TARGET_INLINE_ALL_STRINGOPS && count > 64)
12929 /* Figure out proper mode for counter. For 32bits it is always SImode,
12930 for 64bits use SImode when possible, otherwise DImode.
12931 Set count to number of bytes copied when known at compile time. */
12933 || GET_MODE (count_exp) == SImode
12934 || x86_64_zext_immediate_operand (count_exp, VOIDmode))
12935 counter_mode = SImode;
12937 counter_mode = DImode;
12939 gcc_assert (counter_mode == SImode || counter_mode == DImode);
12941 destreg = copy_to_mode_reg (Pmode, XEXP (dst, 0));
12942 if (destreg != XEXP (dst, 0))
12943 dst = replace_equiv_address_nv (dst, destreg);
12944 srcreg = copy_to_mode_reg (Pmode, XEXP (src, 0));
12945 if (srcreg != XEXP (src, 0))
12946 src = replace_equiv_address_nv (src, srcreg);
12948 /* When optimizing for size emit simple rep ; movsb instruction for
12949 counts not divisible by 4, except when (movsl;)*(movsw;)?(movsb;)?
12950 sequence is shorter than mov{b,l} $count, %{ecx,cl}; rep; movsb.
12951 Sice of (movsl;)*(movsw;)?(movsb;)? sequence is
12952 count / 4 + (count & 3), the other sequence is either 4 or 7 bytes,
12953 but we don't know whether upper 24 (resp. 56) bits of %ecx will be
12954 known to be zero or not. The rep; movsb sequence causes higher
12955 register pressure though, so take that into account. */
12957 if ((!optimize || optimize_size)
12962 || (count & 3) + count / 4 > 6))))
12964 emit_insn (gen_cld ());
12965 countreg = ix86_zero_extend_to_Pmode (count_exp);
12966 destexp = gen_rtx_PLUS (Pmode, destreg, countreg);
12967 srcexp = gen_rtx_PLUS (Pmode, srcreg, countreg);
12968 emit_insn (gen_rep_mov (destreg, dst, srcreg, src, countreg,
12972 /* For constant aligned (or small unaligned) copies use rep movsl
12973 followed by code copying the rest. For PentiumPro ensure 8 byte
12974 alignment to allow rep movsl acceleration. */
12976 else if (count != 0
12978 || (!TARGET_PENTIUMPRO && !TARGET_64BIT && align >= 4)
12979 || optimize_size || count < (unsigned int) 64))
12981 unsigned HOST_WIDE_INT offset = 0;
12982 int size = TARGET_64BIT && !optimize_size ? 8 : 4;
12983 rtx srcmem, dstmem;
12985 emit_insn (gen_cld ());
12986 if (count & ~(size - 1))
12988 if ((TARGET_SINGLE_STRINGOP || optimize_size) && count < 5 * 4)
12990 enum machine_mode movs_mode = size == 4 ? SImode : DImode;
12992 while (offset < (count & ~(size - 1)))
12994 srcmem = adjust_automodify_address_nv (src, movs_mode,
12996 dstmem = adjust_automodify_address_nv (dst, movs_mode,
12998 emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
13004 countreg = GEN_INT ((count >> (size == 4 ? 2 : 3))
13005 & (TARGET_64BIT ? -1 : 0x3fffffff));
13006 countreg = copy_to_mode_reg (counter_mode, countreg);
13007 countreg = ix86_zero_extend_to_Pmode (countreg);
13009 destexp = gen_rtx_ASHIFT (Pmode, countreg,
13010 GEN_INT (size == 4 ? 2 : 3));
13011 srcexp = gen_rtx_PLUS (Pmode, destexp, srcreg);
13012 destexp = gen_rtx_PLUS (Pmode, destexp, destreg);
13014 emit_insn (gen_rep_mov (destreg, dst, srcreg, src,
13015 countreg, destexp, srcexp));
13016 offset = count & ~(size - 1);
13019 if (size == 8 && (count & 0x04))
13021 srcmem = adjust_automodify_address_nv (src, SImode, srcreg,
13023 dstmem = adjust_automodify_address_nv (dst, SImode, destreg,
13025 emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
13030 srcmem = adjust_automodify_address_nv (src, HImode, srcreg,
13032 dstmem = adjust_automodify_address_nv (dst, HImode, destreg,
13034 emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
13039 srcmem = adjust_automodify_address_nv (src, QImode, srcreg,
13041 dstmem = adjust_automodify_address_nv (dst, QImode, destreg,
13043 emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
13046 /* The generic code based on the glibc implementation:
13047 - align destination to 4 bytes (8 byte alignment is used for PentiumPro
13048 allowing accelerated copying there)
13049 - copy the data using rep movsl
13050 - copy the rest. */
13055 rtx srcmem, dstmem;
13056 int desired_alignment = (TARGET_PENTIUMPRO
13057 && (count == 0 || count >= (unsigned int) 260)
13058 ? 8 : UNITS_PER_WORD);
13059 /* Get rid of MEM_OFFSETs, they won't be accurate. */
13060 dst = change_address (dst, BLKmode, destreg);
13061 src = change_address (src, BLKmode, srcreg);
13063 /* In case we don't know anything about the alignment, default to
13064 library version, since it is usually equally fast and result in
13067 Also emit call when we know that the count is large and call overhead
13068 will not be important. */
13069 if (!TARGET_INLINE_ALL_STRINGOPS
13070 && (align < UNITS_PER_WORD || !TARGET_REP_MOVL_OPTIMAL))
13073 if (TARGET_SINGLE_STRINGOP)
13074 emit_insn (gen_cld ());
13076 countreg2 = gen_reg_rtx (Pmode);
13077 countreg = copy_to_mode_reg (counter_mode, count_exp);
13079 /* We don't use loops to align destination and to copy parts smaller
13080 than 4 bytes, because gcc is able to optimize such code better (in
13081 the case the destination or the count really is aligned, gcc is often
13082 able to predict the branches) and also it is friendlier to the
13083 hardware branch prediction.
13085 Using loops is beneficial for generic case, because we can
13086 handle small counts using the loops. Many CPUs (such as Athlon)
13087 have large REP prefix setup costs.
13089 This is quite costly. Maybe we can revisit this decision later or
13090 add some customizability to this code. */
13092 if (count == 0 && align < desired_alignment)
13094 label = gen_label_rtx ();
13095 emit_cmp_and_jump_insns (countreg, GEN_INT (desired_alignment - 1),
13096 LEU, 0, counter_mode, 1, label);
13100 rtx label = ix86_expand_aligntest (destreg, 1);
13101 srcmem = change_address (src, QImode, srcreg);
13102 dstmem = change_address (dst, QImode, destreg);
13103 emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
13104 ix86_adjust_counter (countreg, 1);
13105 emit_label (label);
13106 LABEL_NUSES (label) = 1;
13110 rtx label = ix86_expand_aligntest (destreg, 2);
13111 srcmem = change_address (src, HImode, srcreg);
13112 dstmem = change_address (dst, HImode, destreg);
13113 emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
13114 ix86_adjust_counter (countreg, 2);
13115 emit_label (label);
13116 LABEL_NUSES (label) = 1;
13118 if (align <= 4 && desired_alignment > 4)
13120 rtx label = ix86_expand_aligntest (destreg, 4);
13121 srcmem = change_address (src, SImode, srcreg);
13122 dstmem = change_address (dst, SImode, destreg);
13123 emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
13124 ix86_adjust_counter (countreg, 4);
13125 emit_label (label);
13126 LABEL_NUSES (label) = 1;
13129 if (label && desired_alignment > 4 && !TARGET_64BIT)
13131 emit_label (label);
13132 LABEL_NUSES (label) = 1;
13135 if (!TARGET_SINGLE_STRINGOP)
13136 emit_insn (gen_cld ());
13139 emit_insn (gen_lshrdi3 (countreg2, ix86_zero_extend_to_Pmode (countreg),
13141 destexp = gen_rtx_ASHIFT (Pmode, countreg2, GEN_INT (3));
13145 emit_insn (gen_lshrsi3 (countreg2, countreg, const2_rtx));
13146 destexp = gen_rtx_ASHIFT (Pmode, countreg2, const2_rtx);
13148 srcexp = gen_rtx_PLUS (Pmode, destexp, srcreg);
13149 destexp = gen_rtx_PLUS (Pmode, destexp, destreg);
13150 emit_insn (gen_rep_mov (destreg, dst, srcreg, src,
13151 countreg2, destexp, srcexp));
13155 emit_label (label);
13156 LABEL_NUSES (label) = 1;
13158 if (TARGET_64BIT && align > 4 && count != 0 && (count & 4))
13160 srcmem = change_address (src, SImode, srcreg);
13161 dstmem = change_address (dst, SImode, destreg);
13162 emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
13164 if ((align <= 4 || count == 0) && TARGET_64BIT)
13166 rtx label = ix86_expand_aligntest (countreg, 4);
13167 srcmem = change_address (src, SImode, srcreg);
13168 dstmem = change_address (dst, SImode, destreg);
13169 emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
13170 emit_label (label);
13171 LABEL_NUSES (label) = 1;
13173 if (align > 2 && count != 0 && (count & 2))
13175 srcmem = change_address (src, HImode, srcreg);
13176 dstmem = change_address (dst, HImode, destreg);
13177 emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
13179 if (align <= 2 || count == 0)
13181 rtx label = ix86_expand_aligntest (countreg, 2);
13182 srcmem = change_address (src, HImode, srcreg);
13183 dstmem = change_address (dst, HImode, destreg);
13184 emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
13185 emit_label (label);
13186 LABEL_NUSES (label) = 1;
13188 if (align > 1 && count != 0 && (count & 1))
13190 srcmem = change_address (src, QImode, srcreg);
13191 dstmem = change_address (dst, QImode, destreg);
13192 emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
13194 if (align <= 1 || count == 0)
13196 rtx label = ix86_expand_aligntest (countreg, 1);
13197 srcmem = change_address (src, QImode, srcreg);
13198 dstmem = change_address (dst, QImode, destreg);
13199 emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
13200 emit_label (label);
13201 LABEL_NUSES (label) = 1;
13208 /* Expand string clear operation (bzero). Use i386 string operations when
13209 profitable. expand_movmem contains similar code. */
13211 ix86_expand_clrmem (rtx dst, rtx count_exp, rtx align_exp)
13213 rtx destreg, zeroreg, countreg, destexp;
13214 enum machine_mode counter_mode;
13215 HOST_WIDE_INT align = 0;
13216 unsigned HOST_WIDE_INT count = 0;
13218 if (GET_CODE (align_exp) == CONST_INT)
13219 align = INTVAL (align_exp);
13221 /* Can't use any of this if the user has appropriated esi. */
13222 if (global_regs[4])
13225 /* This simple hack avoids all inlining code and simplifies code below. */
13226 if (!TARGET_ALIGN_STRINGOPS)
13229 if (GET_CODE (count_exp) == CONST_INT)
13231 count = INTVAL (count_exp);
13232 if (!TARGET_INLINE_ALL_STRINGOPS && count > 64)
13235 /* Figure out proper mode for counter. For 32bits it is always SImode,
13236 for 64bits use SImode when possible, otherwise DImode.
13237 Set count to number of bytes copied when known at compile time. */
13239 || GET_MODE (count_exp) == SImode
13240 || x86_64_zext_immediate_operand (count_exp, VOIDmode))
13241 counter_mode = SImode;
13243 counter_mode = DImode;
13245 destreg = copy_to_mode_reg (Pmode, XEXP (dst, 0));
13246 if (destreg != XEXP (dst, 0))
13247 dst = replace_equiv_address_nv (dst, destreg);
13250 /* When optimizing for size emit simple rep ; movsb instruction for
13251 counts not divisible by 4. The movl $N, %ecx; rep; stosb
13252 sequence is 7 bytes long, so if optimizing for size and count is
13253 small enough that some stosl, stosw and stosb instructions without
13254 rep are shorter, fall back into the next if. */
13256 if ((!optimize || optimize_size)
13259 && (!optimize_size || (count & 0x03) + (count >> 2) > 7))))
13261 emit_insn (gen_cld ());
13263 countreg = ix86_zero_extend_to_Pmode (count_exp);
13264 zeroreg = copy_to_mode_reg (QImode, const0_rtx);
13265 destexp = gen_rtx_PLUS (Pmode, destreg, countreg);
13266 emit_insn (gen_rep_stos (destreg, countreg, dst, zeroreg, destexp));
13268 else if (count != 0
13270 || (!TARGET_PENTIUMPRO && !TARGET_64BIT && align >= 4)
13271 || optimize_size || count < (unsigned int) 64))
13273 int size = TARGET_64BIT && !optimize_size ? 8 : 4;
13274 unsigned HOST_WIDE_INT offset = 0;
13276 emit_insn (gen_cld ());
13278 zeroreg = copy_to_mode_reg (size == 4 ? SImode : DImode, const0_rtx);
13279 if (count & ~(size - 1))
13281 unsigned HOST_WIDE_INT repcount;
13282 unsigned int max_nonrep;
13284 repcount = count >> (size == 4 ? 2 : 3);
13286 repcount &= 0x3fffffff;
13288 /* movl $N, %ecx; rep; stosl is 7 bytes, while N x stosl is N bytes.
13289 movl $N, %ecx; rep; stosq is 8 bytes, while N x stosq is 2xN
13290 bytes. In both cases the latter seems to be faster for small
13292 max_nonrep = size == 4 ? 7 : 4;
13293 if (!optimize_size)
13296 case PROCESSOR_PENTIUM4:
13297 case PROCESSOR_NOCONA:
13304 if (repcount <= max_nonrep)
13305 while (repcount-- > 0)
13307 rtx mem = adjust_automodify_address_nv (dst,
13308 GET_MODE (zeroreg),
13310 emit_insn (gen_strset (destreg, mem, zeroreg));
13315 countreg = copy_to_mode_reg (counter_mode, GEN_INT (repcount));
13316 countreg = ix86_zero_extend_to_Pmode (countreg);
13317 destexp = gen_rtx_ASHIFT (Pmode, countreg,
13318 GEN_INT (size == 4 ? 2 : 3));
13319 destexp = gen_rtx_PLUS (Pmode, destexp, destreg);
13320 emit_insn (gen_rep_stos (destreg, countreg, dst, zeroreg,
13322 offset = count & ~(size - 1);
13325 if (size == 8 && (count & 0x04))
13327 rtx mem = adjust_automodify_address_nv (dst, SImode, destreg,
13329 emit_insn (gen_strset (destreg, mem,
13330 gen_rtx_SUBREG (SImode, zeroreg, 0)));
13335 rtx mem = adjust_automodify_address_nv (dst, HImode, destreg,
13337 emit_insn (gen_strset (destreg, mem,
13338 gen_rtx_SUBREG (HImode, zeroreg, 0)));
13343 rtx mem = adjust_automodify_address_nv (dst, QImode, destreg,
13345 emit_insn (gen_strset (destreg, mem,
13346 gen_rtx_SUBREG (QImode, zeroreg, 0)));
13353 /* Compute desired alignment of the string operation. */
13354 int desired_alignment = (TARGET_PENTIUMPRO
13355 && (count == 0 || count >= (unsigned int) 260)
13356 ? 8 : UNITS_PER_WORD);
13358 /* In case we don't know anything about the alignment, default to
13359 library version, since it is usually equally fast and result in
13362 Also emit call when we know that the count is large and call overhead
13363 will not be important. */
13364 if (!TARGET_INLINE_ALL_STRINGOPS
13365 && (align < UNITS_PER_WORD || !TARGET_REP_MOVL_OPTIMAL))
13368 if (TARGET_SINGLE_STRINGOP)
13369 emit_insn (gen_cld ());
13371 countreg2 = gen_reg_rtx (Pmode);
13372 countreg = copy_to_mode_reg (counter_mode, count_exp);
13373 zeroreg = copy_to_mode_reg (Pmode, const0_rtx);
13374 /* Get rid of MEM_OFFSET, it won't be accurate. */
13375 dst = change_address (dst, BLKmode, destreg);
13377 if (count == 0 && align < desired_alignment)
13379 label = gen_label_rtx ();
13380 emit_cmp_and_jump_insns (countreg, GEN_INT (desired_alignment - 1),
13381 LEU, 0, counter_mode, 1, label);
13385 rtx label = ix86_expand_aligntest (destreg, 1);
13386 emit_insn (gen_strset (destreg, dst,
13387 gen_rtx_SUBREG (QImode, zeroreg, 0)));
13388 ix86_adjust_counter (countreg, 1);
13389 emit_label (label);
13390 LABEL_NUSES (label) = 1;
13394 rtx label = ix86_expand_aligntest (destreg, 2);
13395 emit_insn (gen_strset (destreg, dst,
13396 gen_rtx_SUBREG (HImode, zeroreg, 0)));
13397 ix86_adjust_counter (countreg, 2);
13398 emit_label (label);
13399 LABEL_NUSES (label) = 1;
13401 if (align <= 4 && desired_alignment > 4)
13403 rtx label = ix86_expand_aligntest (destreg, 4);
13404 emit_insn (gen_strset (destreg, dst,
13406 ? gen_rtx_SUBREG (SImode, zeroreg, 0)
13408 ix86_adjust_counter (countreg, 4);
13409 emit_label (label);
13410 LABEL_NUSES (label) = 1;
13413 if (label && desired_alignment > 4 && !TARGET_64BIT)
13415 emit_label (label);
13416 LABEL_NUSES (label) = 1;
13420 if (!TARGET_SINGLE_STRINGOP)
13421 emit_insn (gen_cld ());
13424 emit_insn (gen_lshrdi3 (countreg2, ix86_zero_extend_to_Pmode (countreg),
13426 destexp = gen_rtx_ASHIFT (Pmode, countreg2, GEN_INT (3));
13430 emit_insn (gen_lshrsi3 (countreg2, countreg, const2_rtx));
13431 destexp = gen_rtx_ASHIFT (Pmode, countreg2, const2_rtx);
13433 destexp = gen_rtx_PLUS (Pmode, destexp, destreg);
13434 emit_insn (gen_rep_stos (destreg, countreg2, dst, zeroreg, destexp));
13438 emit_label (label);
13439 LABEL_NUSES (label) = 1;
13442 if (TARGET_64BIT && align > 4 && count != 0 && (count & 4))
13443 emit_insn (gen_strset (destreg, dst,
13444 gen_rtx_SUBREG (SImode, zeroreg, 0)));
13445 if (TARGET_64BIT && (align <= 4 || count == 0))
13447 rtx label = ix86_expand_aligntest (countreg, 4);
13448 emit_insn (gen_strset (destreg, dst,
13449 gen_rtx_SUBREG (SImode, zeroreg, 0)));
13450 emit_label (label);
13451 LABEL_NUSES (label) = 1;
13453 if (align > 2 && count != 0 && (count & 2))
13454 emit_insn (gen_strset (destreg, dst,
13455 gen_rtx_SUBREG (HImode, zeroreg, 0)));
13456 if (align <= 2 || count == 0)
13458 rtx label = ix86_expand_aligntest (countreg, 2);
13459 emit_insn (gen_strset (destreg, dst,
13460 gen_rtx_SUBREG (HImode, zeroreg, 0)));
13461 emit_label (label);
13462 LABEL_NUSES (label) = 1;
13464 if (align > 1 && count != 0 && (count & 1))
13465 emit_insn (gen_strset (destreg, dst,
13466 gen_rtx_SUBREG (QImode, zeroreg, 0)));
13467 if (align <= 1 || count == 0)
13469 rtx label = ix86_expand_aligntest (countreg, 1);
13470 emit_insn (gen_strset (destreg, dst,
13471 gen_rtx_SUBREG (QImode, zeroreg, 0)));
13472 emit_label (label);
13473 LABEL_NUSES (label) = 1;
13479 /* Expand strlen. */
13481 ix86_expand_strlen (rtx out, rtx src, rtx eoschar, rtx align)
13483 rtx addr, scratch1, scratch2, scratch3, scratch4;
13485 /* The generic case of strlen expander is long. Avoid it's
13486 expanding unless TARGET_INLINE_ALL_STRINGOPS. */
13488 if (TARGET_UNROLL_STRLEN && eoschar == const0_rtx && optimize > 1
13489 && !TARGET_INLINE_ALL_STRINGOPS
13491 && (GET_CODE (align) != CONST_INT || INTVAL (align) < 4))
13494 addr = force_reg (Pmode, XEXP (src, 0));
13495 scratch1 = gen_reg_rtx (Pmode);
13497 if (TARGET_UNROLL_STRLEN && eoschar == const0_rtx && optimize > 1
13500 /* Well it seems that some optimizer does not combine a call like
13501 foo(strlen(bar), strlen(bar));
13502 when the move and the subtraction is done here. It does calculate
13503 the length just once when these instructions are done inside of
13504 output_strlen_unroll(). But I think since &bar[strlen(bar)] is
13505 often used and I use one fewer register for the lifetime of
13506 output_strlen_unroll() this is better. */
13508 emit_move_insn (out, addr);
13510 ix86_expand_strlensi_unroll_1 (out, src, align);
13512 /* strlensi_unroll_1 returns the address of the zero at the end of
13513 the string, like memchr(), so compute the length by subtracting
13514 the start address. */
13516 emit_insn (gen_subdi3 (out, out, addr));
13518 emit_insn (gen_subsi3 (out, out, addr));
13523 scratch2 = gen_reg_rtx (Pmode);
13524 scratch3 = gen_reg_rtx (Pmode);
13525 scratch4 = force_reg (Pmode, constm1_rtx);
13527 emit_move_insn (scratch3, addr);
13528 eoschar = force_reg (QImode, eoschar);
13530 emit_insn (gen_cld ());
13531 src = replace_equiv_address_nv (src, scratch3);
13533 /* If .md starts supporting :P, this can be done in .md. */
13534 unspec = gen_rtx_UNSPEC (Pmode, gen_rtvec (4, src, eoschar, align,
13535 scratch4), UNSPEC_SCAS);
13536 emit_insn (gen_strlenqi_1 (scratch1, scratch3, unspec));
13539 emit_insn (gen_one_cmpldi2 (scratch2, scratch1));
13540 emit_insn (gen_adddi3 (out, scratch2, constm1_rtx));
13544 emit_insn (gen_one_cmplsi2 (scratch2, scratch1));
13545 emit_insn (gen_addsi3 (out, scratch2, constm1_rtx));
13551 /* Expand the appropriate insns for doing strlen if not just doing
13554 out = result, initialized with the start address
13555 align_rtx = alignment of the address.
13556 scratch = scratch register, initialized with the startaddress when
13557 not aligned, otherwise undefined
13559 This is just the body. It needs the initializations mentioned above and
13560 some address computing at the end. These things are done in i386.md. */
13563 ix86_expand_strlensi_unroll_1 (rtx out, rtx src, rtx align_rtx)
13567 rtx align_2_label = NULL_RTX;
13568 rtx align_3_label = NULL_RTX;
13569 rtx align_4_label = gen_label_rtx ();
13570 rtx end_0_label = gen_label_rtx ();
13572 rtx tmpreg = gen_reg_rtx (SImode);
13573 rtx scratch = gen_reg_rtx (SImode);
13577 if (GET_CODE (align_rtx) == CONST_INT)
13578 align = INTVAL (align_rtx);
13580 /* Loop to check 1..3 bytes for null to get an aligned pointer. */
13582 /* Is there a known alignment and is it less than 4? */
13585 rtx scratch1 = gen_reg_rtx (Pmode);
13586 emit_move_insn (scratch1, out);
13587 /* Is there a known alignment and is it not 2? */
13590 align_3_label = gen_label_rtx (); /* Label when aligned to 3-byte */
13591 align_2_label = gen_label_rtx (); /* Label when aligned to 2-byte */
13593 /* Leave just the 3 lower bits. */
13594 align_rtx = expand_binop (Pmode, and_optab, scratch1, GEN_INT (3),
13595 NULL_RTX, 0, OPTAB_WIDEN);
13597 emit_cmp_and_jump_insns (align_rtx, const0_rtx, EQ, NULL,
13598 Pmode, 1, align_4_label);
13599 emit_cmp_and_jump_insns (align_rtx, const2_rtx, EQ, NULL,
13600 Pmode, 1, align_2_label);
13601 emit_cmp_and_jump_insns (align_rtx, const2_rtx, GTU, NULL,
13602 Pmode, 1, align_3_label);
13606 /* Since the alignment is 2, we have to check 2 or 0 bytes;
13607 check if is aligned to 4 - byte. */
13609 align_rtx = expand_binop (Pmode, and_optab, scratch1, const2_rtx,
13610 NULL_RTX, 0, OPTAB_WIDEN);
13612 emit_cmp_and_jump_insns (align_rtx, const0_rtx, EQ, NULL,
13613 Pmode, 1, align_4_label);
13616 mem = change_address (src, QImode, out);
13618 /* Now compare the bytes. */
13620 /* Compare the first n unaligned byte on a byte per byte basis. */
13621 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL,
13622 QImode, 1, end_0_label);
13624 /* Increment the address. */
13626 emit_insn (gen_adddi3 (out, out, const1_rtx));
13628 emit_insn (gen_addsi3 (out, out, const1_rtx));
13630 /* Not needed with an alignment of 2 */
13633 emit_label (align_2_label);
13635 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL, QImode, 1,
13639 emit_insn (gen_adddi3 (out, out, const1_rtx));
13641 emit_insn (gen_addsi3 (out, out, const1_rtx));
13643 emit_label (align_3_label);
13646 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL, QImode, 1,
13650 emit_insn (gen_adddi3 (out, out, const1_rtx));
13652 emit_insn (gen_addsi3 (out, out, const1_rtx));
13655 /* Generate loop to check 4 bytes at a time. It is not a good idea to
13656 align this loop. It gives only huge programs, but does not help to
13658 emit_label (align_4_label);
13660 mem = change_address (src, SImode, out);
13661 emit_move_insn (scratch, mem);
13663 emit_insn (gen_adddi3 (out, out, GEN_INT (4)));
13665 emit_insn (gen_addsi3 (out, out, GEN_INT (4)));
13667 /* This formula yields a nonzero result iff one of the bytes is zero.
13668 This saves three branches inside loop and many cycles. */
13670 emit_insn (gen_addsi3 (tmpreg, scratch, GEN_INT (-0x01010101)));
13671 emit_insn (gen_one_cmplsi2 (scratch, scratch));
13672 emit_insn (gen_andsi3 (tmpreg, tmpreg, scratch));
13673 emit_insn (gen_andsi3 (tmpreg, tmpreg,
13674 gen_int_mode (0x80808080, SImode)));
13675 emit_cmp_and_jump_insns (tmpreg, const0_rtx, EQ, 0, SImode, 1,
13680 rtx reg = gen_reg_rtx (SImode);
13681 rtx reg2 = gen_reg_rtx (Pmode);
13682 emit_move_insn (reg, tmpreg);
13683 emit_insn (gen_lshrsi3 (reg, reg, GEN_INT (16)));
13685 /* If zero is not in the first two bytes, move two bytes forward. */
13686 emit_insn (gen_testsi_ccno_1 (tmpreg, GEN_INT (0x8080)));
13687 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
13688 tmp = gen_rtx_EQ (VOIDmode, tmp, const0_rtx);
13689 emit_insn (gen_rtx_SET (VOIDmode, tmpreg,
13690 gen_rtx_IF_THEN_ELSE (SImode, tmp,
13693 /* Emit lea manually to avoid clobbering of flags. */
13694 emit_insn (gen_rtx_SET (SImode, reg2,
13695 gen_rtx_PLUS (Pmode, out, const2_rtx)));
13697 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
13698 tmp = gen_rtx_EQ (VOIDmode, tmp, const0_rtx);
13699 emit_insn (gen_rtx_SET (VOIDmode, out,
13700 gen_rtx_IF_THEN_ELSE (Pmode, tmp,
13707 rtx end_2_label = gen_label_rtx ();
13708 /* Is zero in the first two bytes? */
13710 emit_insn (gen_testsi_ccno_1 (tmpreg, GEN_INT (0x8080)));
13711 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
13712 tmp = gen_rtx_NE (VOIDmode, tmp, const0_rtx);
13713 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
13714 gen_rtx_LABEL_REF (VOIDmode, end_2_label),
13716 tmp = emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
13717 JUMP_LABEL (tmp) = end_2_label;
13719 /* Not in the first two. Move two bytes forward. */
13720 emit_insn (gen_lshrsi3 (tmpreg, tmpreg, GEN_INT (16)));
13722 emit_insn (gen_adddi3 (out, out, const2_rtx));
13724 emit_insn (gen_addsi3 (out, out, const2_rtx));
13726 emit_label (end_2_label);
13730 /* Avoid branch in fixing the byte. */
13731 tmpreg = gen_lowpart (QImode, tmpreg);
13732 emit_insn (gen_addqi3_cc (tmpreg, tmpreg, tmpreg));
13733 cmp = gen_rtx_LTU (Pmode, gen_rtx_REG (CCmode, 17), const0_rtx);
13735 emit_insn (gen_subdi3_carry_rex64 (out, out, GEN_INT (3), cmp));
13737 emit_insn (gen_subsi3_carry (out, out, GEN_INT (3), cmp));
13739 emit_label (end_0_label);
13743 ix86_expand_call (rtx retval, rtx fnaddr, rtx callarg1,
13744 rtx callarg2 ATTRIBUTE_UNUSED,
13745 rtx pop, int sibcall)
13747 rtx use = NULL, call;
13749 if (pop == const0_rtx)
13751 gcc_assert (!TARGET_64BIT || !pop);
13753 if (TARGET_MACHO && !TARGET_64BIT)
13756 if (flag_pic && GET_CODE (XEXP (fnaddr, 0)) == SYMBOL_REF)
13757 fnaddr = machopic_indirect_call_target (fnaddr);
13762 /* Static functions and indirect calls don't need the pic register. */
13763 if (! TARGET_64BIT && flag_pic
13764 && GET_CODE (XEXP (fnaddr, 0)) == SYMBOL_REF
13765 && ! SYMBOL_REF_LOCAL_P (XEXP (fnaddr, 0)))
13766 use_reg (&use, pic_offset_table_rtx);
13769 if (TARGET_64BIT && INTVAL (callarg2) >= 0)
13771 rtx al = gen_rtx_REG (QImode, 0);
13772 emit_move_insn (al, callarg2);
13773 use_reg (&use, al);
13776 if (! call_insn_operand (XEXP (fnaddr, 0), Pmode))
13778 fnaddr = copy_to_mode_reg (Pmode, XEXP (fnaddr, 0));
13779 fnaddr = gen_rtx_MEM (QImode, fnaddr);
13781 if (sibcall && TARGET_64BIT
13782 && !constant_call_address_operand (XEXP (fnaddr, 0), Pmode))
13785 addr = copy_to_mode_reg (Pmode, XEXP (fnaddr, 0));
13786 fnaddr = gen_rtx_REG (Pmode, FIRST_REX_INT_REG + 3 /* R11 */);
13787 emit_move_insn (fnaddr, addr);
13788 fnaddr = gen_rtx_MEM (QImode, fnaddr);
13791 call = gen_rtx_CALL (VOIDmode, fnaddr, callarg1);
13793 call = gen_rtx_SET (VOIDmode, retval, call);
13796 pop = gen_rtx_PLUS (Pmode, stack_pointer_rtx, pop);
13797 pop = gen_rtx_SET (VOIDmode, stack_pointer_rtx, pop);
13798 call = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, call, pop));
13801 call = emit_call_insn (call);
13803 CALL_INSN_FUNCTION_USAGE (call) = use;
13807 /* Clear stack slot assignments remembered from previous functions.
13808 This is called from INIT_EXPANDERS once before RTL is emitted for each
13811 static struct machine_function *
13812 ix86_init_machine_status (void)
13814 struct machine_function *f;
13816 f = ggc_alloc_cleared (sizeof (struct machine_function));
13817 f->use_fast_prologue_epilogue_nregs = -1;
13818 f->tls_descriptor_call_expanded_p = 0;
13823 /* Return a MEM corresponding to a stack slot with mode MODE.
13824 Allocate a new slot if necessary.
13826 The RTL for a function can have several slots available: N is
13827 which slot to use. */
13830 assign_386_stack_local (enum machine_mode mode, enum ix86_stack_slot n)
13832 struct stack_local_entry *s;
13834 gcc_assert (n < MAX_386_STACK_LOCALS);
13836 /* Virtual slot is valid only before vregs are instantiated. */
13837 gcc_assert ((n == SLOT_VIRTUAL) == !virtuals_instantiated);
13839 for (s = ix86_stack_locals; s; s = s->next)
13840 if (s->mode == mode && s->n == n)
13843 s = (struct stack_local_entry *)
13844 ggc_alloc (sizeof (struct stack_local_entry));
13847 s->rtl = assign_stack_local (mode, GET_MODE_SIZE (mode), 0);
13849 s->next = ix86_stack_locals;
13850 ix86_stack_locals = s;
13854 /* Construct the SYMBOL_REF for the tls_get_addr function. */
13856 static GTY(()) rtx ix86_tls_symbol;
13858 ix86_tls_get_addr (void)
13861 if (!ix86_tls_symbol)
13863 ix86_tls_symbol = gen_rtx_SYMBOL_REF (Pmode,
13864 (TARGET_ANY_GNU_TLS
13866 ? "___tls_get_addr"
13867 : "__tls_get_addr");
13870 return ix86_tls_symbol;
13873 /* Construct the SYMBOL_REF for the _TLS_MODULE_BASE_ symbol. */
13875 static GTY(()) rtx ix86_tls_module_base_symbol;
13877 ix86_tls_module_base (void)
13880 if (!ix86_tls_module_base_symbol)
13882 ix86_tls_module_base_symbol = gen_rtx_SYMBOL_REF (Pmode,
13883 "_TLS_MODULE_BASE_");
13884 SYMBOL_REF_FLAGS (ix86_tls_module_base_symbol)
13885 |= TLS_MODEL_GLOBAL_DYNAMIC << SYMBOL_FLAG_TLS_SHIFT;
13888 return ix86_tls_module_base_symbol;
13891 /* Calculate the length of the memory address in the instruction
13892 encoding. Does not include the one-byte modrm, opcode, or prefix. */
13895 memory_address_length (rtx addr)
13897 struct ix86_address parts;
13898 rtx base, index, disp;
13902 if (GET_CODE (addr) == PRE_DEC
13903 || GET_CODE (addr) == POST_INC
13904 || GET_CODE (addr) == PRE_MODIFY
13905 || GET_CODE (addr) == POST_MODIFY)
13908 ok = ix86_decompose_address (addr, &parts);
13911 if (parts.base && GET_CODE (parts.base) == SUBREG)
13912 parts.base = SUBREG_REG (parts.base);
13913 if (parts.index && GET_CODE (parts.index) == SUBREG)
13914 parts.index = SUBREG_REG (parts.index);
13917 index = parts.index;
13922 - esp as the base always wants an index,
13923 - ebp as the base always wants a displacement. */
13925 /* Register Indirect. */
13926 if (base && !index && !disp)
13928 /* esp (for its index) and ebp (for its displacement) need
13929 the two-byte modrm form. */
13930 if (addr == stack_pointer_rtx
13931 || addr == arg_pointer_rtx
13932 || addr == frame_pointer_rtx
13933 || addr == hard_frame_pointer_rtx)
13937 /* Direct Addressing. */
13938 else if (disp && !base && !index)
13943 /* Find the length of the displacement constant. */
13946 if (base && satisfies_constraint_K (disp))
13951 /* ebp always wants a displacement. */
13952 else if (base == hard_frame_pointer_rtx)
13955 /* An index requires the two-byte modrm form.... */
13957 /* ...like esp, which always wants an index. */
13958 || base == stack_pointer_rtx
13959 || base == arg_pointer_rtx
13960 || base == frame_pointer_rtx)
13967 /* Compute default value for "length_immediate" attribute. When SHORTFORM
13968 is set, expect that insn have 8bit immediate alternative. */
13970 ix86_attr_length_immediate_default (rtx insn, int shortform)
13974 extract_insn_cached (insn);
13975 for (i = recog_data.n_operands - 1; i >= 0; --i)
13976 if (CONSTANT_P (recog_data.operand[i]))
13979 if (shortform && satisfies_constraint_K (recog_data.operand[i]))
13983 switch (get_attr_mode (insn))
13994 /* Immediates for DImode instructions are encoded as 32bit sign extended values. */
13999 fatal_insn ("unknown insn mode", insn);
14005 /* Compute default value for "length_address" attribute. */
14007 ix86_attr_length_address_default (rtx insn)
14011 if (get_attr_type (insn) == TYPE_LEA)
14013 rtx set = PATTERN (insn);
14015 if (GET_CODE (set) == PARALLEL)
14016 set = XVECEXP (set, 0, 0);
14018 gcc_assert (GET_CODE (set) == SET);
14020 return memory_address_length (SET_SRC (set));
14023 extract_insn_cached (insn);
14024 for (i = recog_data.n_operands - 1; i >= 0; --i)
14025 if (GET_CODE (recog_data.operand[i]) == MEM)
14027 return memory_address_length (XEXP (recog_data.operand[i], 0));
14033 /* Return the maximum number of instructions a cpu can issue. */
14036 ix86_issue_rate (void)
14040 case PROCESSOR_PENTIUM:
14044 case PROCESSOR_PENTIUMPRO:
14045 case PROCESSOR_PENTIUM4:
14046 case PROCESSOR_ATHLON:
14048 case PROCESSOR_AMDFAM10:
14049 case PROCESSOR_NOCONA:
14050 case PROCESSOR_GENERIC32:
14051 case PROCESSOR_GENERIC64:
14054 case PROCESSOR_CORE2:
14062 /* A subroutine of ix86_adjust_cost -- return true iff INSN reads flags set
14063 by DEP_INSN and nothing set by DEP_INSN. */
14066 ix86_flags_dependent (rtx insn, rtx dep_insn, enum attr_type insn_type)
14070 /* Simplify the test for uninteresting insns. */
14071 if (insn_type != TYPE_SETCC
14072 && insn_type != TYPE_ICMOV
14073 && insn_type != TYPE_FCMOV
14074 && insn_type != TYPE_IBR)
14077 if ((set = single_set (dep_insn)) != 0)
14079 set = SET_DEST (set);
14082 else if (GET_CODE (PATTERN (dep_insn)) == PARALLEL
14083 && XVECLEN (PATTERN (dep_insn), 0) == 2
14084 && GET_CODE (XVECEXP (PATTERN (dep_insn), 0, 0)) == SET
14085 && GET_CODE (XVECEXP (PATTERN (dep_insn), 0, 1)) == SET)
14087 set = SET_DEST (XVECEXP (PATTERN (dep_insn), 0, 0));
14088 set2 = SET_DEST (XVECEXP (PATTERN (dep_insn), 0, 0));
14093 if (GET_CODE (set) != REG || REGNO (set) != FLAGS_REG)
14096 /* This test is true if the dependent insn reads the flags but
14097 not any other potentially set register. */
14098 if (!reg_overlap_mentioned_p (set, PATTERN (insn)))
14101 if (set2 && reg_overlap_mentioned_p (set2, PATTERN (insn)))
14107 /* A subroutine of ix86_adjust_cost -- return true iff INSN has a memory
14108 address with operands set by DEP_INSN. */
14111 ix86_agi_dependent (rtx insn, rtx dep_insn, enum attr_type insn_type)
14115 if (insn_type == TYPE_LEA
14118 addr = PATTERN (insn);
14120 if (GET_CODE (addr) == PARALLEL)
14121 addr = XVECEXP (addr, 0, 0);
14123 gcc_assert (GET_CODE (addr) == SET);
14125 addr = SET_SRC (addr);
14130 extract_insn_cached (insn);
14131 for (i = recog_data.n_operands - 1; i >= 0; --i)
14132 if (GET_CODE (recog_data.operand[i]) == MEM)
14134 addr = XEXP (recog_data.operand[i], 0);
14141 return modified_in_p (addr, dep_insn);
14145 ix86_adjust_cost (rtx insn, rtx link, rtx dep_insn, int cost)
14147 enum attr_type insn_type, dep_insn_type;
14148 enum attr_memory memory;
14150 int dep_insn_code_number;
14152 /* Anti and output dependencies have zero cost on all CPUs. */
14153 if (REG_NOTE_KIND (link) != 0)
14156 dep_insn_code_number = recog_memoized (dep_insn);
14158 /* If we can't recognize the insns, we can't really do anything. */
14159 if (dep_insn_code_number < 0 || recog_memoized (insn) < 0)
14162 insn_type = get_attr_type (insn);
14163 dep_insn_type = get_attr_type (dep_insn);
14167 case PROCESSOR_PENTIUM:
14168 /* Address Generation Interlock adds a cycle of latency. */
14169 if (ix86_agi_dependent (insn, dep_insn, insn_type))
14172 /* ??? Compares pair with jump/setcc. */
14173 if (ix86_flags_dependent (insn, dep_insn, insn_type))
14176 /* Floating point stores require value to be ready one cycle earlier. */
14177 if (insn_type == TYPE_FMOV
14178 && get_attr_memory (insn) == MEMORY_STORE
14179 && !ix86_agi_dependent (insn, dep_insn, insn_type))
14183 case PROCESSOR_PENTIUMPRO:
14184 memory = get_attr_memory (insn);
14186 /* INT->FP conversion is expensive. */
14187 if (get_attr_fp_int_src (dep_insn))
14190 /* There is one cycle extra latency between an FP op and a store. */
14191 if (insn_type == TYPE_FMOV
14192 && (set = single_set (dep_insn)) != NULL_RTX
14193 && (set2 = single_set (insn)) != NULL_RTX
14194 && rtx_equal_p (SET_DEST (set), SET_SRC (set2))
14195 && GET_CODE (SET_DEST (set2)) == MEM)
14198 /* Show ability of reorder buffer to hide latency of load by executing
14199 in parallel with previous instruction in case
14200 previous instruction is not needed to compute the address. */
14201 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
14202 && !ix86_agi_dependent (insn, dep_insn, insn_type))
14204 /* Claim moves to take one cycle, as core can issue one load
14205 at time and the next load can start cycle later. */
14206 if (dep_insn_type == TYPE_IMOV
14207 || dep_insn_type == TYPE_FMOV)
14215 memory = get_attr_memory (insn);
14217 /* The esp dependency is resolved before the instruction is really
14219 if ((insn_type == TYPE_PUSH || insn_type == TYPE_POP)
14220 && (dep_insn_type == TYPE_PUSH || dep_insn_type == TYPE_POP))
14223 /* INT->FP conversion is expensive. */
14224 if (get_attr_fp_int_src (dep_insn))
14227 /* Show ability of reorder buffer to hide latency of load by executing
14228 in parallel with previous instruction in case
14229 previous instruction is not needed to compute the address. */
14230 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
14231 && !ix86_agi_dependent (insn, dep_insn, insn_type))
14233 /* Claim moves to take one cycle, as core can issue one load
14234 at time and the next load can start cycle later. */
14235 if (dep_insn_type == TYPE_IMOV
14236 || dep_insn_type == TYPE_FMOV)
14245 case PROCESSOR_ATHLON:
14247 case PROCESSOR_AMDFAM10:
14248 case PROCESSOR_GENERIC32:
14249 case PROCESSOR_GENERIC64:
14250 memory = get_attr_memory (insn);
14252 /* Show ability of reorder buffer to hide latency of load by executing
14253 in parallel with previous instruction in case
14254 previous instruction is not needed to compute the address. */
14255 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
14256 && !ix86_agi_dependent (insn, dep_insn, insn_type))
14258 enum attr_unit unit = get_attr_unit (insn);
14261 /* Because of the difference between the length of integer and
14262 floating unit pipeline preparation stages, the memory operands
14263 for floating point are cheaper.
14265 ??? For Athlon it the difference is most probably 2. */
14266 if (unit == UNIT_INTEGER || unit == UNIT_UNKNOWN)
14269 loadcost = TARGET_ATHLON ? 2 : 0;
14271 if (cost >= loadcost)
14284 /* How many alternative schedules to try. This should be as wide as the
14285 scheduling freedom in the DFA, but no wider. Making this value too
14286 large results extra work for the scheduler. */
14289 ia32_multipass_dfa_lookahead (void)
14291 if (ix86_tune == PROCESSOR_PENTIUM)
14294 if (ix86_tune == PROCESSOR_PENTIUMPRO
14295 || ix86_tune == PROCESSOR_K6)
14303 /* Compute the alignment given to a constant that is being placed in memory.
14304 EXP is the constant and ALIGN is the alignment that the object would
14306 The value of this function is used instead of that alignment to align
14310 ix86_constant_alignment (tree exp, int align)
14312 if (TREE_CODE (exp) == REAL_CST)
14314 if (TYPE_MODE (TREE_TYPE (exp)) == DFmode && align < 64)
14316 else if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (exp))) && align < 128)
14319 else if (!optimize_size && TREE_CODE (exp) == STRING_CST
14320 && !TARGET_NO_ALIGN_LONG_STRINGS
14321 && TREE_STRING_LENGTH (exp) >= 31 && align < BITS_PER_WORD)
14322 return BITS_PER_WORD;
14327 /* Compute the alignment for a static variable.
14328 TYPE is the data type, and ALIGN is the alignment that
14329 the object would ordinarily have. The value of this function is used
14330 instead of that alignment to align the object. */
14333 ix86_data_alignment (tree type, int align)
14335 int max_align = optimize_size ? BITS_PER_WORD : 256;
14337 if (AGGREGATE_TYPE_P (type)
14338 && TYPE_SIZE (type)
14339 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
14340 && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= (unsigned) max_align
14341 || TREE_INT_CST_HIGH (TYPE_SIZE (type)))
14342 && align < max_align)
14345 /* x86-64 ABI requires arrays greater than 16 bytes to be aligned
14346 to 16byte boundary. */
14349 if (AGGREGATE_TYPE_P (type)
14350 && TYPE_SIZE (type)
14351 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
14352 && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= 128
14353 || TREE_INT_CST_HIGH (TYPE_SIZE (type))) && align < 128)
14357 if (TREE_CODE (type) == ARRAY_TYPE)
14359 if (TYPE_MODE (TREE_TYPE (type)) == DFmode && align < 64)
14361 if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type))) && align < 128)
14364 else if (TREE_CODE (type) == COMPLEX_TYPE)
14367 if (TYPE_MODE (type) == DCmode && align < 64)
14369 if (TYPE_MODE (type) == XCmode && align < 128)
14372 else if ((TREE_CODE (type) == RECORD_TYPE
14373 || TREE_CODE (type) == UNION_TYPE
14374 || TREE_CODE (type) == QUAL_UNION_TYPE)
14375 && TYPE_FIELDS (type))
14377 if (DECL_MODE (TYPE_FIELDS (type)) == DFmode && align < 64)
14379 if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type))) && align < 128)
14382 else if (TREE_CODE (type) == REAL_TYPE || TREE_CODE (type) == VECTOR_TYPE
14383 || TREE_CODE (type) == INTEGER_TYPE)
14385 if (TYPE_MODE (type) == DFmode && align < 64)
14387 if (ALIGN_MODE_128 (TYPE_MODE (type)) && align < 128)
14394 /* Compute the alignment for a local variable.
14395 TYPE is the data type, and ALIGN is the alignment that
14396 the object would ordinarily have. The value of this macro is used
14397 instead of that alignment to align the object. */
14400 ix86_local_alignment (tree type, int align)
14402 /* x86-64 ABI requires arrays greater than 16 bytes to be aligned
14403 to 16byte boundary. */
14406 if (AGGREGATE_TYPE_P (type)
14407 && TYPE_SIZE (type)
14408 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
14409 && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= 16
14410 || TREE_INT_CST_HIGH (TYPE_SIZE (type))) && align < 128)
14413 if (TREE_CODE (type) == ARRAY_TYPE)
14415 if (TYPE_MODE (TREE_TYPE (type)) == DFmode && align < 64)
14417 if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type))) && align < 128)
14420 else if (TREE_CODE (type) == COMPLEX_TYPE)
14422 if (TYPE_MODE (type) == DCmode && align < 64)
14424 if (TYPE_MODE (type) == XCmode && align < 128)
14427 else if ((TREE_CODE (type) == RECORD_TYPE
14428 || TREE_CODE (type) == UNION_TYPE
14429 || TREE_CODE (type) == QUAL_UNION_TYPE)
14430 && TYPE_FIELDS (type))
14432 if (DECL_MODE (TYPE_FIELDS (type)) == DFmode && align < 64)
14434 if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type))) && align < 128)
14437 else if (TREE_CODE (type) == REAL_TYPE || TREE_CODE (type) == VECTOR_TYPE
14438 || TREE_CODE (type) == INTEGER_TYPE)
14441 if (TYPE_MODE (type) == DFmode && align < 64)
14443 if (ALIGN_MODE_128 (TYPE_MODE (type)) && align < 128)
14449 /* Emit RTL insns to initialize the variable parts of a trampoline.
14450 FNADDR is an RTX for the address of the function's pure code.
14451 CXT is an RTX for the static chain value for the function. */
14453 x86_initialize_trampoline (rtx tramp, rtx fnaddr, rtx cxt)
14457 /* Compute offset from the end of the jmp to the target function. */
14458 rtx disp = expand_binop (SImode, sub_optab, fnaddr,
14459 plus_constant (tramp, 10),
14460 NULL_RTX, 1, OPTAB_DIRECT);
14461 emit_move_insn (gen_rtx_MEM (QImode, tramp),
14462 gen_int_mode (0xb9, QImode));
14463 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, 1)), cxt);
14464 emit_move_insn (gen_rtx_MEM (QImode, plus_constant (tramp, 5)),
14465 gen_int_mode (0xe9, QImode));
14466 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, 6)), disp);
14471 /* Try to load address using shorter movl instead of movabs.
14472 We may want to support movq for kernel mode, but kernel does not use
14473 trampolines at the moment. */
14474 if (x86_64_zext_immediate_operand (fnaddr, VOIDmode))
14476 fnaddr = copy_to_mode_reg (DImode, fnaddr);
14477 emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
14478 gen_int_mode (0xbb41, HImode));
14479 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, offset + 2)),
14480 gen_lowpart (SImode, fnaddr));
14485 emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
14486 gen_int_mode (0xbb49, HImode));
14487 emit_move_insn (gen_rtx_MEM (DImode, plus_constant (tramp, offset + 2)),
14491 /* Load static chain using movabs to r10. */
14492 emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
14493 gen_int_mode (0xba49, HImode));
14494 emit_move_insn (gen_rtx_MEM (DImode, plus_constant (tramp, offset + 2)),
14497 /* Jump to the r11 */
14498 emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
14499 gen_int_mode (0xff49, HImode));
14500 emit_move_insn (gen_rtx_MEM (QImode, plus_constant (tramp, offset+2)),
14501 gen_int_mode (0xe3, QImode));
14503 gcc_assert (offset <= TRAMPOLINE_SIZE);
14506 #ifdef ENABLE_EXECUTE_STACK
14507 emit_library_call (gen_rtx_SYMBOL_REF (Pmode, "__enable_execute_stack"),
14508 LCT_NORMAL, VOIDmode, 1, tramp, Pmode);
14512 /* Codes for all the SSE/MMX builtins. */
14515 IX86_BUILTIN_ADDPS,
14516 IX86_BUILTIN_ADDSS,
14517 IX86_BUILTIN_DIVPS,
14518 IX86_BUILTIN_DIVSS,
14519 IX86_BUILTIN_MULPS,
14520 IX86_BUILTIN_MULSS,
14521 IX86_BUILTIN_SUBPS,
14522 IX86_BUILTIN_SUBSS,
14524 IX86_BUILTIN_CMPEQPS,
14525 IX86_BUILTIN_CMPLTPS,
14526 IX86_BUILTIN_CMPLEPS,
14527 IX86_BUILTIN_CMPGTPS,
14528 IX86_BUILTIN_CMPGEPS,
14529 IX86_BUILTIN_CMPNEQPS,
14530 IX86_BUILTIN_CMPNLTPS,
14531 IX86_BUILTIN_CMPNLEPS,
14532 IX86_BUILTIN_CMPNGTPS,
14533 IX86_BUILTIN_CMPNGEPS,
14534 IX86_BUILTIN_CMPORDPS,
14535 IX86_BUILTIN_CMPUNORDPS,
14536 IX86_BUILTIN_CMPEQSS,
14537 IX86_BUILTIN_CMPLTSS,
14538 IX86_BUILTIN_CMPLESS,
14539 IX86_BUILTIN_CMPNEQSS,
14540 IX86_BUILTIN_CMPNLTSS,
14541 IX86_BUILTIN_CMPNLESS,
14542 IX86_BUILTIN_CMPNGTSS,
14543 IX86_BUILTIN_CMPNGESS,
14544 IX86_BUILTIN_CMPORDSS,
14545 IX86_BUILTIN_CMPUNORDSS,
14547 IX86_BUILTIN_COMIEQSS,
14548 IX86_BUILTIN_COMILTSS,
14549 IX86_BUILTIN_COMILESS,
14550 IX86_BUILTIN_COMIGTSS,
14551 IX86_BUILTIN_COMIGESS,
14552 IX86_BUILTIN_COMINEQSS,
14553 IX86_BUILTIN_UCOMIEQSS,
14554 IX86_BUILTIN_UCOMILTSS,
14555 IX86_BUILTIN_UCOMILESS,
14556 IX86_BUILTIN_UCOMIGTSS,
14557 IX86_BUILTIN_UCOMIGESS,
14558 IX86_BUILTIN_UCOMINEQSS,
14560 IX86_BUILTIN_CVTPI2PS,
14561 IX86_BUILTIN_CVTPS2PI,
14562 IX86_BUILTIN_CVTSI2SS,
14563 IX86_BUILTIN_CVTSI642SS,
14564 IX86_BUILTIN_CVTSS2SI,
14565 IX86_BUILTIN_CVTSS2SI64,
14566 IX86_BUILTIN_CVTTPS2PI,
14567 IX86_BUILTIN_CVTTSS2SI,
14568 IX86_BUILTIN_CVTTSS2SI64,
14570 IX86_BUILTIN_MAXPS,
14571 IX86_BUILTIN_MAXSS,
14572 IX86_BUILTIN_MINPS,
14573 IX86_BUILTIN_MINSS,
14575 IX86_BUILTIN_LOADUPS,
14576 IX86_BUILTIN_STOREUPS,
14577 IX86_BUILTIN_MOVSS,
14579 IX86_BUILTIN_MOVHLPS,
14580 IX86_BUILTIN_MOVLHPS,
14581 IX86_BUILTIN_LOADHPS,
14582 IX86_BUILTIN_LOADLPS,
14583 IX86_BUILTIN_STOREHPS,
14584 IX86_BUILTIN_STORELPS,
14586 IX86_BUILTIN_MASKMOVQ,
14587 IX86_BUILTIN_MOVMSKPS,
14588 IX86_BUILTIN_PMOVMSKB,
14590 IX86_BUILTIN_MOVNTPS,
14591 IX86_BUILTIN_MOVNTQ,
14593 IX86_BUILTIN_LOADDQU,
14594 IX86_BUILTIN_STOREDQU,
14596 IX86_BUILTIN_PACKSSWB,
14597 IX86_BUILTIN_PACKSSDW,
14598 IX86_BUILTIN_PACKUSWB,
14600 IX86_BUILTIN_PADDB,
14601 IX86_BUILTIN_PADDW,
14602 IX86_BUILTIN_PADDD,
14603 IX86_BUILTIN_PADDQ,
14604 IX86_BUILTIN_PADDSB,
14605 IX86_BUILTIN_PADDSW,
14606 IX86_BUILTIN_PADDUSB,
14607 IX86_BUILTIN_PADDUSW,
14608 IX86_BUILTIN_PSUBB,
14609 IX86_BUILTIN_PSUBW,
14610 IX86_BUILTIN_PSUBD,
14611 IX86_BUILTIN_PSUBQ,
14612 IX86_BUILTIN_PSUBSB,
14613 IX86_BUILTIN_PSUBSW,
14614 IX86_BUILTIN_PSUBUSB,
14615 IX86_BUILTIN_PSUBUSW,
14618 IX86_BUILTIN_PANDN,
14622 IX86_BUILTIN_PAVGB,
14623 IX86_BUILTIN_PAVGW,
14625 IX86_BUILTIN_PCMPEQB,
14626 IX86_BUILTIN_PCMPEQW,
14627 IX86_BUILTIN_PCMPEQD,
14628 IX86_BUILTIN_PCMPGTB,
14629 IX86_BUILTIN_PCMPGTW,
14630 IX86_BUILTIN_PCMPGTD,
14632 IX86_BUILTIN_PMADDWD,
14634 IX86_BUILTIN_PMAXSW,
14635 IX86_BUILTIN_PMAXUB,
14636 IX86_BUILTIN_PMINSW,
14637 IX86_BUILTIN_PMINUB,
14639 IX86_BUILTIN_PMULHUW,
14640 IX86_BUILTIN_PMULHW,
14641 IX86_BUILTIN_PMULLW,
14643 IX86_BUILTIN_PSADBW,
14644 IX86_BUILTIN_PSHUFW,
14646 IX86_BUILTIN_PSLLW,
14647 IX86_BUILTIN_PSLLD,
14648 IX86_BUILTIN_PSLLQ,
14649 IX86_BUILTIN_PSRAW,
14650 IX86_BUILTIN_PSRAD,
14651 IX86_BUILTIN_PSRLW,
14652 IX86_BUILTIN_PSRLD,
14653 IX86_BUILTIN_PSRLQ,
14654 IX86_BUILTIN_PSLLWI,
14655 IX86_BUILTIN_PSLLDI,
14656 IX86_BUILTIN_PSLLQI,
14657 IX86_BUILTIN_PSRAWI,
14658 IX86_BUILTIN_PSRADI,
14659 IX86_BUILTIN_PSRLWI,
14660 IX86_BUILTIN_PSRLDI,
14661 IX86_BUILTIN_PSRLQI,
14663 IX86_BUILTIN_PUNPCKHBW,
14664 IX86_BUILTIN_PUNPCKHWD,
14665 IX86_BUILTIN_PUNPCKHDQ,
14666 IX86_BUILTIN_PUNPCKLBW,
14667 IX86_BUILTIN_PUNPCKLWD,
14668 IX86_BUILTIN_PUNPCKLDQ,
14670 IX86_BUILTIN_SHUFPS,
14672 IX86_BUILTIN_RCPPS,
14673 IX86_BUILTIN_RCPSS,
14674 IX86_BUILTIN_RSQRTPS,
14675 IX86_BUILTIN_RSQRTSS,
14676 IX86_BUILTIN_SQRTPS,
14677 IX86_BUILTIN_SQRTSS,
14679 IX86_BUILTIN_UNPCKHPS,
14680 IX86_BUILTIN_UNPCKLPS,
14682 IX86_BUILTIN_ANDPS,
14683 IX86_BUILTIN_ANDNPS,
14685 IX86_BUILTIN_XORPS,
14688 IX86_BUILTIN_LDMXCSR,
14689 IX86_BUILTIN_STMXCSR,
14690 IX86_BUILTIN_SFENCE,
14692 /* 3DNow! Original */
14693 IX86_BUILTIN_FEMMS,
14694 IX86_BUILTIN_PAVGUSB,
14695 IX86_BUILTIN_PF2ID,
14696 IX86_BUILTIN_PFACC,
14697 IX86_BUILTIN_PFADD,
14698 IX86_BUILTIN_PFCMPEQ,
14699 IX86_BUILTIN_PFCMPGE,
14700 IX86_BUILTIN_PFCMPGT,
14701 IX86_BUILTIN_PFMAX,
14702 IX86_BUILTIN_PFMIN,
14703 IX86_BUILTIN_PFMUL,
14704 IX86_BUILTIN_PFRCP,
14705 IX86_BUILTIN_PFRCPIT1,
14706 IX86_BUILTIN_PFRCPIT2,
14707 IX86_BUILTIN_PFRSQIT1,
14708 IX86_BUILTIN_PFRSQRT,
14709 IX86_BUILTIN_PFSUB,
14710 IX86_BUILTIN_PFSUBR,
14711 IX86_BUILTIN_PI2FD,
14712 IX86_BUILTIN_PMULHRW,
14714 /* 3DNow! Athlon Extensions */
14715 IX86_BUILTIN_PF2IW,
14716 IX86_BUILTIN_PFNACC,
14717 IX86_BUILTIN_PFPNACC,
14718 IX86_BUILTIN_PI2FW,
14719 IX86_BUILTIN_PSWAPDSI,
14720 IX86_BUILTIN_PSWAPDSF,
14723 IX86_BUILTIN_ADDPD,
14724 IX86_BUILTIN_ADDSD,
14725 IX86_BUILTIN_DIVPD,
14726 IX86_BUILTIN_DIVSD,
14727 IX86_BUILTIN_MULPD,
14728 IX86_BUILTIN_MULSD,
14729 IX86_BUILTIN_SUBPD,
14730 IX86_BUILTIN_SUBSD,
14732 IX86_BUILTIN_CMPEQPD,
14733 IX86_BUILTIN_CMPLTPD,
14734 IX86_BUILTIN_CMPLEPD,
14735 IX86_BUILTIN_CMPGTPD,
14736 IX86_BUILTIN_CMPGEPD,
14737 IX86_BUILTIN_CMPNEQPD,
14738 IX86_BUILTIN_CMPNLTPD,
14739 IX86_BUILTIN_CMPNLEPD,
14740 IX86_BUILTIN_CMPNGTPD,
14741 IX86_BUILTIN_CMPNGEPD,
14742 IX86_BUILTIN_CMPORDPD,
14743 IX86_BUILTIN_CMPUNORDPD,
14744 IX86_BUILTIN_CMPNEPD,
14745 IX86_BUILTIN_CMPEQSD,
14746 IX86_BUILTIN_CMPLTSD,
14747 IX86_BUILTIN_CMPLESD,
14748 IX86_BUILTIN_CMPNEQSD,
14749 IX86_BUILTIN_CMPNLTSD,
14750 IX86_BUILTIN_CMPNLESD,
14751 IX86_BUILTIN_CMPORDSD,
14752 IX86_BUILTIN_CMPUNORDSD,
14753 IX86_BUILTIN_CMPNESD,
14755 IX86_BUILTIN_COMIEQSD,
14756 IX86_BUILTIN_COMILTSD,
14757 IX86_BUILTIN_COMILESD,
14758 IX86_BUILTIN_COMIGTSD,
14759 IX86_BUILTIN_COMIGESD,
14760 IX86_BUILTIN_COMINEQSD,
14761 IX86_BUILTIN_UCOMIEQSD,
14762 IX86_BUILTIN_UCOMILTSD,
14763 IX86_BUILTIN_UCOMILESD,
14764 IX86_BUILTIN_UCOMIGTSD,
14765 IX86_BUILTIN_UCOMIGESD,
14766 IX86_BUILTIN_UCOMINEQSD,
14768 IX86_BUILTIN_MAXPD,
14769 IX86_BUILTIN_MAXSD,
14770 IX86_BUILTIN_MINPD,
14771 IX86_BUILTIN_MINSD,
14773 IX86_BUILTIN_ANDPD,
14774 IX86_BUILTIN_ANDNPD,
14776 IX86_BUILTIN_XORPD,
14778 IX86_BUILTIN_SQRTPD,
14779 IX86_BUILTIN_SQRTSD,
14781 IX86_BUILTIN_UNPCKHPD,
14782 IX86_BUILTIN_UNPCKLPD,
14784 IX86_BUILTIN_SHUFPD,
14786 IX86_BUILTIN_LOADUPD,
14787 IX86_BUILTIN_STOREUPD,
14788 IX86_BUILTIN_MOVSD,
14790 IX86_BUILTIN_LOADHPD,
14791 IX86_BUILTIN_LOADLPD,
14793 IX86_BUILTIN_CVTDQ2PD,
14794 IX86_BUILTIN_CVTDQ2PS,
14796 IX86_BUILTIN_CVTPD2DQ,
14797 IX86_BUILTIN_CVTPD2PI,
14798 IX86_BUILTIN_CVTPD2PS,
14799 IX86_BUILTIN_CVTTPD2DQ,
14800 IX86_BUILTIN_CVTTPD2PI,
14802 IX86_BUILTIN_CVTPI2PD,
14803 IX86_BUILTIN_CVTSI2SD,
14804 IX86_BUILTIN_CVTSI642SD,
14806 IX86_BUILTIN_CVTSD2SI,
14807 IX86_BUILTIN_CVTSD2SI64,
14808 IX86_BUILTIN_CVTSD2SS,
14809 IX86_BUILTIN_CVTSS2SD,
14810 IX86_BUILTIN_CVTTSD2SI,
14811 IX86_BUILTIN_CVTTSD2SI64,
14813 IX86_BUILTIN_CVTPS2DQ,
14814 IX86_BUILTIN_CVTPS2PD,
14815 IX86_BUILTIN_CVTTPS2DQ,
14817 IX86_BUILTIN_MOVNTI,
14818 IX86_BUILTIN_MOVNTPD,
14819 IX86_BUILTIN_MOVNTDQ,
14822 IX86_BUILTIN_MASKMOVDQU,
14823 IX86_BUILTIN_MOVMSKPD,
14824 IX86_BUILTIN_PMOVMSKB128,
14826 IX86_BUILTIN_PACKSSWB128,
14827 IX86_BUILTIN_PACKSSDW128,
14828 IX86_BUILTIN_PACKUSWB128,
14830 IX86_BUILTIN_PADDB128,
14831 IX86_BUILTIN_PADDW128,
14832 IX86_BUILTIN_PADDD128,
14833 IX86_BUILTIN_PADDQ128,
14834 IX86_BUILTIN_PADDSB128,
14835 IX86_BUILTIN_PADDSW128,
14836 IX86_BUILTIN_PADDUSB128,
14837 IX86_BUILTIN_PADDUSW128,
14838 IX86_BUILTIN_PSUBB128,
14839 IX86_BUILTIN_PSUBW128,
14840 IX86_BUILTIN_PSUBD128,
14841 IX86_BUILTIN_PSUBQ128,
14842 IX86_BUILTIN_PSUBSB128,
14843 IX86_BUILTIN_PSUBSW128,
14844 IX86_BUILTIN_PSUBUSB128,
14845 IX86_BUILTIN_PSUBUSW128,
14847 IX86_BUILTIN_PAND128,
14848 IX86_BUILTIN_PANDN128,
14849 IX86_BUILTIN_POR128,
14850 IX86_BUILTIN_PXOR128,
14852 IX86_BUILTIN_PAVGB128,
14853 IX86_BUILTIN_PAVGW128,
14855 IX86_BUILTIN_PCMPEQB128,
14856 IX86_BUILTIN_PCMPEQW128,
14857 IX86_BUILTIN_PCMPEQD128,
14858 IX86_BUILTIN_PCMPGTB128,
14859 IX86_BUILTIN_PCMPGTW128,
14860 IX86_BUILTIN_PCMPGTD128,
14862 IX86_BUILTIN_PMADDWD128,
14864 IX86_BUILTIN_PMAXSW128,
14865 IX86_BUILTIN_PMAXUB128,
14866 IX86_BUILTIN_PMINSW128,
14867 IX86_BUILTIN_PMINUB128,
14869 IX86_BUILTIN_PMULUDQ,
14870 IX86_BUILTIN_PMULUDQ128,
14871 IX86_BUILTIN_PMULHUW128,
14872 IX86_BUILTIN_PMULHW128,
14873 IX86_BUILTIN_PMULLW128,
14875 IX86_BUILTIN_PSADBW128,
14876 IX86_BUILTIN_PSHUFHW,
14877 IX86_BUILTIN_PSHUFLW,
14878 IX86_BUILTIN_PSHUFD,
14880 IX86_BUILTIN_PSLLW128,
14881 IX86_BUILTIN_PSLLD128,
14882 IX86_BUILTIN_PSLLQ128,
14883 IX86_BUILTIN_PSRAW128,
14884 IX86_BUILTIN_PSRAD128,
14885 IX86_BUILTIN_PSRLW128,
14886 IX86_BUILTIN_PSRLD128,
14887 IX86_BUILTIN_PSRLQ128,
14888 IX86_BUILTIN_PSLLDQI128,
14889 IX86_BUILTIN_PSLLWI128,
14890 IX86_BUILTIN_PSLLDI128,
14891 IX86_BUILTIN_PSLLQI128,
14892 IX86_BUILTIN_PSRAWI128,
14893 IX86_BUILTIN_PSRADI128,
14894 IX86_BUILTIN_PSRLDQI128,
14895 IX86_BUILTIN_PSRLWI128,
14896 IX86_BUILTIN_PSRLDI128,
14897 IX86_BUILTIN_PSRLQI128,
14899 IX86_BUILTIN_PUNPCKHBW128,
14900 IX86_BUILTIN_PUNPCKHWD128,
14901 IX86_BUILTIN_PUNPCKHDQ128,
14902 IX86_BUILTIN_PUNPCKHQDQ128,
14903 IX86_BUILTIN_PUNPCKLBW128,
14904 IX86_BUILTIN_PUNPCKLWD128,
14905 IX86_BUILTIN_PUNPCKLDQ128,
14906 IX86_BUILTIN_PUNPCKLQDQ128,
14908 IX86_BUILTIN_CLFLUSH,
14909 IX86_BUILTIN_MFENCE,
14910 IX86_BUILTIN_LFENCE,
14912 /* Prescott New Instructions. */
14913 IX86_BUILTIN_ADDSUBPS,
14914 IX86_BUILTIN_HADDPS,
14915 IX86_BUILTIN_HSUBPS,
14916 IX86_BUILTIN_MOVSHDUP,
14917 IX86_BUILTIN_MOVSLDUP,
14918 IX86_BUILTIN_ADDSUBPD,
14919 IX86_BUILTIN_HADDPD,
14920 IX86_BUILTIN_HSUBPD,
14921 IX86_BUILTIN_LDDQU,
14923 IX86_BUILTIN_MONITOR,
14924 IX86_BUILTIN_MWAIT,
14927 IX86_BUILTIN_PHADDW,
14928 IX86_BUILTIN_PHADDD,
14929 IX86_BUILTIN_PHADDSW,
14930 IX86_BUILTIN_PHSUBW,
14931 IX86_BUILTIN_PHSUBD,
14932 IX86_BUILTIN_PHSUBSW,
14933 IX86_BUILTIN_PMADDUBSW,
14934 IX86_BUILTIN_PMULHRSW,
14935 IX86_BUILTIN_PSHUFB,
14936 IX86_BUILTIN_PSIGNB,
14937 IX86_BUILTIN_PSIGNW,
14938 IX86_BUILTIN_PSIGND,
14939 IX86_BUILTIN_PALIGNR,
14940 IX86_BUILTIN_PABSB,
14941 IX86_BUILTIN_PABSW,
14942 IX86_BUILTIN_PABSD,
14944 IX86_BUILTIN_PHADDW128,
14945 IX86_BUILTIN_PHADDD128,
14946 IX86_BUILTIN_PHADDSW128,
14947 IX86_BUILTIN_PHSUBW128,
14948 IX86_BUILTIN_PHSUBD128,
14949 IX86_BUILTIN_PHSUBSW128,
14950 IX86_BUILTIN_PMADDUBSW128,
14951 IX86_BUILTIN_PMULHRSW128,
14952 IX86_BUILTIN_PSHUFB128,
14953 IX86_BUILTIN_PSIGNB128,
14954 IX86_BUILTIN_PSIGNW128,
14955 IX86_BUILTIN_PSIGND128,
14956 IX86_BUILTIN_PALIGNR128,
14957 IX86_BUILTIN_PABSB128,
14958 IX86_BUILTIN_PABSW128,
14959 IX86_BUILTIN_PABSD128,
14961 /* AMDFAM10 - SSE4A New Instructions. */
14962 IX86_BUILTIN_MOVNTSD,
14963 IX86_BUILTIN_MOVNTSS,
14964 IX86_BUILTIN_EXTRQI,
14965 IX86_BUILTIN_EXTRQ,
14966 IX86_BUILTIN_INSERTQI,
14967 IX86_BUILTIN_INSERTQ,
14969 IX86_BUILTIN_VEC_INIT_V2SI,
14970 IX86_BUILTIN_VEC_INIT_V4HI,
14971 IX86_BUILTIN_VEC_INIT_V8QI,
14972 IX86_BUILTIN_VEC_EXT_V2DF,
14973 IX86_BUILTIN_VEC_EXT_V2DI,
14974 IX86_BUILTIN_VEC_EXT_V4SF,
14975 IX86_BUILTIN_VEC_EXT_V4SI,
14976 IX86_BUILTIN_VEC_EXT_V8HI,
14977 IX86_BUILTIN_VEC_EXT_V16QI,
14978 IX86_BUILTIN_VEC_EXT_V2SI,
14979 IX86_BUILTIN_VEC_EXT_V4HI,
14980 IX86_BUILTIN_VEC_SET_V8HI,
14981 IX86_BUILTIN_VEC_SET_V4HI,
14986 #define def_builtin(MASK, NAME, TYPE, CODE) \
14988 if ((MASK) & target_flags \
14989 && (!((MASK) & MASK_64BIT) || TARGET_64BIT)) \
14990 lang_hooks.builtin_function ((NAME), (TYPE), (CODE), BUILT_IN_MD, \
14991 NULL, NULL_TREE); \
14994 /* Bits for builtin_description.flag. */
14996 /* Set when we don't support the comparison natively, and should
14997 swap_comparison in order to support it. */
14998 #define BUILTIN_DESC_SWAP_OPERANDS 1
15000 struct builtin_description
15002 const unsigned int mask;
15003 const enum insn_code icode;
15004 const char *const name;
15005 const enum ix86_builtins code;
15006 const enum rtx_code comparison;
15007 const unsigned int flag;
15010 static const struct builtin_description bdesc_comi[] =
15012 { MASK_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comieq", IX86_BUILTIN_COMIEQSS, UNEQ, 0 },
15013 { MASK_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comilt", IX86_BUILTIN_COMILTSS, UNLT, 0 },
15014 { MASK_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comile", IX86_BUILTIN_COMILESS, UNLE, 0 },
15015 { MASK_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comigt", IX86_BUILTIN_COMIGTSS, GT, 0 },
15016 { MASK_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comige", IX86_BUILTIN_COMIGESS, GE, 0 },
15017 { MASK_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comineq", IX86_BUILTIN_COMINEQSS, LTGT, 0 },
15018 { MASK_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomieq", IX86_BUILTIN_UCOMIEQSS, UNEQ, 0 },
15019 { MASK_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomilt", IX86_BUILTIN_UCOMILTSS, UNLT, 0 },
15020 { MASK_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomile", IX86_BUILTIN_UCOMILESS, UNLE, 0 },
15021 { MASK_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomigt", IX86_BUILTIN_UCOMIGTSS, GT, 0 },
15022 { MASK_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomige", IX86_BUILTIN_UCOMIGESS, GE, 0 },
15023 { MASK_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomineq", IX86_BUILTIN_UCOMINEQSS, LTGT, 0 },
15024 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdeq", IX86_BUILTIN_COMIEQSD, UNEQ, 0 },
15025 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdlt", IX86_BUILTIN_COMILTSD, UNLT, 0 },
15026 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdle", IX86_BUILTIN_COMILESD, UNLE, 0 },
15027 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdgt", IX86_BUILTIN_COMIGTSD, GT, 0 },
15028 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdge", IX86_BUILTIN_COMIGESD, GE, 0 },
15029 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdneq", IX86_BUILTIN_COMINEQSD, LTGT, 0 },
15030 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdeq", IX86_BUILTIN_UCOMIEQSD, UNEQ, 0 },
15031 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdlt", IX86_BUILTIN_UCOMILTSD, UNLT, 0 },
15032 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdle", IX86_BUILTIN_UCOMILESD, UNLE, 0 },
15033 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdgt", IX86_BUILTIN_UCOMIGTSD, GT, 0 },
15034 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdge", IX86_BUILTIN_UCOMIGESD, GE, 0 },
15035 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdneq", IX86_BUILTIN_UCOMINEQSD, LTGT, 0 },
15038 static const struct builtin_description bdesc_2arg[] =
15041 { MASK_SSE, CODE_FOR_addv4sf3, "__builtin_ia32_addps", IX86_BUILTIN_ADDPS, 0, 0 },
15042 { MASK_SSE, CODE_FOR_subv4sf3, "__builtin_ia32_subps", IX86_BUILTIN_SUBPS, 0, 0 },
15043 { MASK_SSE, CODE_FOR_mulv4sf3, "__builtin_ia32_mulps", IX86_BUILTIN_MULPS, 0, 0 },
15044 { MASK_SSE, CODE_FOR_divv4sf3, "__builtin_ia32_divps", IX86_BUILTIN_DIVPS, 0, 0 },
15045 { MASK_SSE, CODE_FOR_sse_vmaddv4sf3, "__builtin_ia32_addss", IX86_BUILTIN_ADDSS, 0, 0 },
15046 { MASK_SSE, CODE_FOR_sse_vmsubv4sf3, "__builtin_ia32_subss", IX86_BUILTIN_SUBSS, 0, 0 },
15047 { MASK_SSE, CODE_FOR_sse_vmmulv4sf3, "__builtin_ia32_mulss", IX86_BUILTIN_MULSS, 0, 0 },
15048 { MASK_SSE, CODE_FOR_sse_vmdivv4sf3, "__builtin_ia32_divss", IX86_BUILTIN_DIVSS, 0, 0 },
15050 { MASK_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpeqps", IX86_BUILTIN_CMPEQPS, EQ, 0 },
15051 { MASK_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpltps", IX86_BUILTIN_CMPLTPS, LT, 0 },
15052 { MASK_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpleps", IX86_BUILTIN_CMPLEPS, LE, 0 },
15053 { MASK_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpgtps", IX86_BUILTIN_CMPGTPS, LT,
15054 BUILTIN_DESC_SWAP_OPERANDS },
15055 { MASK_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpgeps", IX86_BUILTIN_CMPGEPS, LE,
15056 BUILTIN_DESC_SWAP_OPERANDS },
15057 { MASK_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpunordps", IX86_BUILTIN_CMPUNORDPS, UNORDERED, 0 },
15058 { MASK_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpneqps", IX86_BUILTIN_CMPNEQPS, NE, 0 },
15059 { MASK_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpnltps", IX86_BUILTIN_CMPNLTPS, UNGE, 0 },
15060 { MASK_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpnleps", IX86_BUILTIN_CMPNLEPS, UNGT, 0 },
15061 { MASK_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpngtps", IX86_BUILTIN_CMPNGTPS, UNGE,
15062 BUILTIN_DESC_SWAP_OPERANDS },
15063 { MASK_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpngeps", IX86_BUILTIN_CMPNGEPS, UNGT,
15064 BUILTIN_DESC_SWAP_OPERANDS },
15065 { MASK_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpordps", IX86_BUILTIN_CMPORDPS, ORDERED, 0 },
15066 { MASK_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpeqss", IX86_BUILTIN_CMPEQSS, EQ, 0 },
15067 { MASK_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpltss", IX86_BUILTIN_CMPLTSS, LT, 0 },
15068 { MASK_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpless", IX86_BUILTIN_CMPLESS, LE, 0 },
15069 { MASK_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpunordss", IX86_BUILTIN_CMPUNORDSS, UNORDERED, 0 },
15070 { MASK_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpneqss", IX86_BUILTIN_CMPNEQSS, NE, 0 },
15071 { MASK_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpnltss", IX86_BUILTIN_CMPNLTSS, UNGE, 0 },
15072 { MASK_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpnless", IX86_BUILTIN_CMPNLESS, UNGT, 0 },
15073 { MASK_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpngtss", IX86_BUILTIN_CMPNGTSS, UNGE,
15074 BUILTIN_DESC_SWAP_OPERANDS },
15075 { MASK_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpngess", IX86_BUILTIN_CMPNGESS, UNGT,
15076 BUILTIN_DESC_SWAP_OPERANDS },
15077 { MASK_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpordss", IX86_BUILTIN_CMPORDSS, ORDERED, 0 },
15079 { MASK_SSE, CODE_FOR_sminv4sf3, "__builtin_ia32_minps", IX86_BUILTIN_MINPS, 0, 0 },
15080 { MASK_SSE, CODE_FOR_smaxv4sf3, "__builtin_ia32_maxps", IX86_BUILTIN_MAXPS, 0, 0 },
15081 { MASK_SSE, CODE_FOR_sse_vmsminv4sf3, "__builtin_ia32_minss", IX86_BUILTIN_MINSS, 0, 0 },
15082 { MASK_SSE, CODE_FOR_sse_vmsmaxv4sf3, "__builtin_ia32_maxss", IX86_BUILTIN_MAXSS, 0, 0 },
15084 { MASK_SSE, CODE_FOR_andv4sf3, "__builtin_ia32_andps", IX86_BUILTIN_ANDPS, 0, 0 },
15085 { MASK_SSE, CODE_FOR_sse_nandv4sf3, "__builtin_ia32_andnps", IX86_BUILTIN_ANDNPS, 0, 0 },
15086 { MASK_SSE, CODE_FOR_iorv4sf3, "__builtin_ia32_orps", IX86_BUILTIN_ORPS, 0, 0 },
15087 { MASK_SSE, CODE_FOR_xorv4sf3, "__builtin_ia32_xorps", IX86_BUILTIN_XORPS, 0, 0 },
15089 { MASK_SSE, CODE_FOR_sse_movss, "__builtin_ia32_movss", IX86_BUILTIN_MOVSS, 0, 0 },
15090 { MASK_SSE, CODE_FOR_sse_movhlps, "__builtin_ia32_movhlps", IX86_BUILTIN_MOVHLPS, 0, 0 },
15091 { MASK_SSE, CODE_FOR_sse_movlhps, "__builtin_ia32_movlhps", IX86_BUILTIN_MOVLHPS, 0, 0 },
15092 { MASK_SSE, CODE_FOR_sse_unpckhps, "__builtin_ia32_unpckhps", IX86_BUILTIN_UNPCKHPS, 0, 0 },
15093 { MASK_SSE, CODE_FOR_sse_unpcklps, "__builtin_ia32_unpcklps", IX86_BUILTIN_UNPCKLPS, 0, 0 },
15096 { MASK_MMX, CODE_FOR_mmx_addv8qi3, "__builtin_ia32_paddb", IX86_BUILTIN_PADDB, 0, 0 },
15097 { MASK_MMX, CODE_FOR_mmx_addv4hi3, "__builtin_ia32_paddw", IX86_BUILTIN_PADDW, 0, 0 },
15098 { MASK_MMX, CODE_FOR_mmx_addv2si3, "__builtin_ia32_paddd", IX86_BUILTIN_PADDD, 0, 0 },
15099 { MASK_SSE2, CODE_FOR_mmx_adddi3, "__builtin_ia32_paddq", IX86_BUILTIN_PADDQ, 0, 0 },
15100 { MASK_MMX, CODE_FOR_mmx_subv8qi3, "__builtin_ia32_psubb", IX86_BUILTIN_PSUBB, 0, 0 },
15101 { MASK_MMX, CODE_FOR_mmx_subv4hi3, "__builtin_ia32_psubw", IX86_BUILTIN_PSUBW, 0, 0 },
15102 { MASK_MMX, CODE_FOR_mmx_subv2si3, "__builtin_ia32_psubd", IX86_BUILTIN_PSUBD, 0, 0 },
15103 { MASK_SSE2, CODE_FOR_mmx_subdi3, "__builtin_ia32_psubq", IX86_BUILTIN_PSUBQ, 0, 0 },
15105 { MASK_MMX, CODE_FOR_mmx_ssaddv8qi3, "__builtin_ia32_paddsb", IX86_BUILTIN_PADDSB, 0, 0 },
15106 { MASK_MMX, CODE_FOR_mmx_ssaddv4hi3, "__builtin_ia32_paddsw", IX86_BUILTIN_PADDSW, 0, 0 },
15107 { MASK_MMX, CODE_FOR_mmx_sssubv8qi3, "__builtin_ia32_psubsb", IX86_BUILTIN_PSUBSB, 0, 0 },
15108 { MASK_MMX, CODE_FOR_mmx_sssubv4hi3, "__builtin_ia32_psubsw", IX86_BUILTIN_PSUBSW, 0, 0 },
15109 { MASK_MMX, CODE_FOR_mmx_usaddv8qi3, "__builtin_ia32_paddusb", IX86_BUILTIN_PADDUSB, 0, 0 },
15110 { MASK_MMX, CODE_FOR_mmx_usaddv4hi3, "__builtin_ia32_paddusw", IX86_BUILTIN_PADDUSW, 0, 0 },
15111 { MASK_MMX, CODE_FOR_mmx_ussubv8qi3, "__builtin_ia32_psubusb", IX86_BUILTIN_PSUBUSB, 0, 0 },
15112 { MASK_MMX, CODE_FOR_mmx_ussubv4hi3, "__builtin_ia32_psubusw", IX86_BUILTIN_PSUBUSW, 0, 0 },
15114 { MASK_MMX, CODE_FOR_mmx_mulv4hi3, "__builtin_ia32_pmullw", IX86_BUILTIN_PMULLW, 0, 0 },
15115 { MASK_MMX, CODE_FOR_mmx_smulv4hi3_highpart, "__builtin_ia32_pmulhw", IX86_BUILTIN_PMULHW, 0, 0 },
15116 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_umulv4hi3_highpart, "__builtin_ia32_pmulhuw", IX86_BUILTIN_PMULHUW, 0, 0 },
15118 { MASK_MMX, CODE_FOR_mmx_andv2si3, "__builtin_ia32_pand", IX86_BUILTIN_PAND, 0, 0 },
15119 { MASK_MMX, CODE_FOR_mmx_nandv2si3, "__builtin_ia32_pandn", IX86_BUILTIN_PANDN, 0, 0 },
15120 { MASK_MMX, CODE_FOR_mmx_iorv2si3, "__builtin_ia32_por", IX86_BUILTIN_POR, 0, 0 },
15121 { MASK_MMX, CODE_FOR_mmx_xorv2si3, "__builtin_ia32_pxor", IX86_BUILTIN_PXOR, 0, 0 },
15123 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_uavgv8qi3, "__builtin_ia32_pavgb", IX86_BUILTIN_PAVGB, 0, 0 },
15124 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_uavgv4hi3, "__builtin_ia32_pavgw", IX86_BUILTIN_PAVGW, 0, 0 },
15126 { MASK_MMX, CODE_FOR_mmx_eqv8qi3, "__builtin_ia32_pcmpeqb", IX86_BUILTIN_PCMPEQB, 0, 0 },
15127 { MASK_MMX, CODE_FOR_mmx_eqv4hi3, "__builtin_ia32_pcmpeqw", IX86_BUILTIN_PCMPEQW, 0, 0 },
15128 { MASK_MMX, CODE_FOR_mmx_eqv2si3, "__builtin_ia32_pcmpeqd", IX86_BUILTIN_PCMPEQD, 0, 0 },
15129 { MASK_MMX, CODE_FOR_mmx_gtv8qi3, "__builtin_ia32_pcmpgtb", IX86_BUILTIN_PCMPGTB, 0, 0 },
15130 { MASK_MMX, CODE_FOR_mmx_gtv4hi3, "__builtin_ia32_pcmpgtw", IX86_BUILTIN_PCMPGTW, 0, 0 },
15131 { MASK_MMX, CODE_FOR_mmx_gtv2si3, "__builtin_ia32_pcmpgtd", IX86_BUILTIN_PCMPGTD, 0, 0 },
15133 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_umaxv8qi3, "__builtin_ia32_pmaxub", IX86_BUILTIN_PMAXUB, 0, 0 },
15134 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_smaxv4hi3, "__builtin_ia32_pmaxsw", IX86_BUILTIN_PMAXSW, 0, 0 },
15135 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_uminv8qi3, "__builtin_ia32_pminub", IX86_BUILTIN_PMINUB, 0, 0 },
15136 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_sminv4hi3, "__builtin_ia32_pminsw", IX86_BUILTIN_PMINSW, 0, 0 },
15138 { MASK_MMX, CODE_FOR_mmx_punpckhbw, "__builtin_ia32_punpckhbw", IX86_BUILTIN_PUNPCKHBW, 0, 0 },
15139 { MASK_MMX, CODE_FOR_mmx_punpckhwd, "__builtin_ia32_punpckhwd", IX86_BUILTIN_PUNPCKHWD, 0, 0 },
15140 { MASK_MMX, CODE_FOR_mmx_punpckhdq, "__builtin_ia32_punpckhdq", IX86_BUILTIN_PUNPCKHDQ, 0, 0 },
15141 { MASK_MMX, CODE_FOR_mmx_punpcklbw, "__builtin_ia32_punpcklbw", IX86_BUILTIN_PUNPCKLBW, 0, 0 },
15142 { MASK_MMX, CODE_FOR_mmx_punpcklwd, "__builtin_ia32_punpcklwd", IX86_BUILTIN_PUNPCKLWD, 0, 0 },
15143 { MASK_MMX, CODE_FOR_mmx_punpckldq, "__builtin_ia32_punpckldq", IX86_BUILTIN_PUNPCKLDQ, 0, 0 },
15146 { MASK_MMX, CODE_FOR_mmx_packsswb, 0, IX86_BUILTIN_PACKSSWB, 0, 0 },
15147 { MASK_MMX, CODE_FOR_mmx_packssdw, 0, IX86_BUILTIN_PACKSSDW, 0, 0 },
15148 { MASK_MMX, CODE_FOR_mmx_packuswb, 0, IX86_BUILTIN_PACKUSWB, 0, 0 },
15150 { MASK_SSE, CODE_FOR_sse_cvtpi2ps, 0, IX86_BUILTIN_CVTPI2PS, 0, 0 },
15151 { MASK_SSE, CODE_FOR_sse_cvtsi2ss, 0, IX86_BUILTIN_CVTSI2SS, 0, 0 },
15152 { MASK_SSE | MASK_64BIT, CODE_FOR_sse_cvtsi2ssq, 0, IX86_BUILTIN_CVTSI642SS, 0, 0 },
15154 { MASK_MMX, CODE_FOR_mmx_ashlv4hi3, 0, IX86_BUILTIN_PSLLW, 0, 0 },
15155 { MASK_MMX, CODE_FOR_mmx_ashlv4hi3, 0, IX86_BUILTIN_PSLLWI, 0, 0 },
15156 { MASK_MMX, CODE_FOR_mmx_ashlv2si3, 0, IX86_BUILTIN_PSLLD, 0, 0 },
15157 { MASK_MMX, CODE_FOR_mmx_ashlv2si3, 0, IX86_BUILTIN_PSLLDI, 0, 0 },
15158 { MASK_MMX, CODE_FOR_mmx_ashldi3, 0, IX86_BUILTIN_PSLLQ, 0, 0 },
15159 { MASK_MMX, CODE_FOR_mmx_ashldi3, 0, IX86_BUILTIN_PSLLQI, 0, 0 },
15161 { MASK_MMX, CODE_FOR_mmx_lshrv4hi3, 0, IX86_BUILTIN_PSRLW, 0, 0 },
15162 { MASK_MMX, CODE_FOR_mmx_lshrv4hi3, 0, IX86_BUILTIN_PSRLWI, 0, 0 },
15163 { MASK_MMX, CODE_FOR_mmx_lshrv2si3, 0, IX86_BUILTIN_PSRLD, 0, 0 },
15164 { MASK_MMX, CODE_FOR_mmx_lshrv2si3, 0, IX86_BUILTIN_PSRLDI, 0, 0 },
15165 { MASK_MMX, CODE_FOR_mmx_lshrdi3, 0, IX86_BUILTIN_PSRLQ, 0, 0 },
15166 { MASK_MMX, CODE_FOR_mmx_lshrdi3, 0, IX86_BUILTIN_PSRLQI, 0, 0 },
15168 { MASK_MMX, CODE_FOR_mmx_ashrv4hi3, 0, IX86_BUILTIN_PSRAW, 0, 0 },
15169 { MASK_MMX, CODE_FOR_mmx_ashrv4hi3, 0, IX86_BUILTIN_PSRAWI, 0, 0 },
15170 { MASK_MMX, CODE_FOR_mmx_ashrv2si3, 0, IX86_BUILTIN_PSRAD, 0, 0 },
15171 { MASK_MMX, CODE_FOR_mmx_ashrv2si3, 0, IX86_BUILTIN_PSRADI, 0, 0 },
15173 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_psadbw, 0, IX86_BUILTIN_PSADBW, 0, 0 },
15174 { MASK_MMX, CODE_FOR_mmx_pmaddwd, 0, IX86_BUILTIN_PMADDWD, 0, 0 },
15177 { MASK_SSE2, CODE_FOR_addv2df3, "__builtin_ia32_addpd", IX86_BUILTIN_ADDPD, 0, 0 },
15178 { MASK_SSE2, CODE_FOR_subv2df3, "__builtin_ia32_subpd", IX86_BUILTIN_SUBPD, 0, 0 },
15179 { MASK_SSE2, CODE_FOR_mulv2df3, "__builtin_ia32_mulpd", IX86_BUILTIN_MULPD, 0, 0 },
15180 { MASK_SSE2, CODE_FOR_divv2df3, "__builtin_ia32_divpd", IX86_BUILTIN_DIVPD, 0, 0 },
15181 { MASK_SSE2, CODE_FOR_sse2_vmaddv2df3, "__builtin_ia32_addsd", IX86_BUILTIN_ADDSD, 0, 0 },
15182 { MASK_SSE2, CODE_FOR_sse2_vmsubv2df3, "__builtin_ia32_subsd", IX86_BUILTIN_SUBSD, 0, 0 },
15183 { MASK_SSE2, CODE_FOR_sse2_vmmulv2df3, "__builtin_ia32_mulsd", IX86_BUILTIN_MULSD, 0, 0 },
15184 { MASK_SSE2, CODE_FOR_sse2_vmdivv2df3, "__builtin_ia32_divsd", IX86_BUILTIN_DIVSD, 0, 0 },
15186 { MASK_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpeqpd", IX86_BUILTIN_CMPEQPD, EQ, 0 },
15187 { MASK_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpltpd", IX86_BUILTIN_CMPLTPD, LT, 0 },
15188 { MASK_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmplepd", IX86_BUILTIN_CMPLEPD, LE, 0 },
15189 { MASK_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpgtpd", IX86_BUILTIN_CMPGTPD, LT,
15190 BUILTIN_DESC_SWAP_OPERANDS },
15191 { MASK_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpgepd", IX86_BUILTIN_CMPGEPD, LE,
15192 BUILTIN_DESC_SWAP_OPERANDS },
15193 { MASK_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpunordpd", IX86_BUILTIN_CMPUNORDPD, UNORDERED, 0 },
15194 { MASK_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpneqpd", IX86_BUILTIN_CMPNEQPD, NE, 0 },
15195 { MASK_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpnltpd", IX86_BUILTIN_CMPNLTPD, UNGE, 0 },
15196 { MASK_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpnlepd", IX86_BUILTIN_CMPNLEPD, UNGT, 0 },
15197 { MASK_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpngtpd", IX86_BUILTIN_CMPNGTPD, UNGE,
15198 BUILTIN_DESC_SWAP_OPERANDS },
15199 { MASK_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpngepd", IX86_BUILTIN_CMPNGEPD, UNGT,
15200 BUILTIN_DESC_SWAP_OPERANDS },
15201 { MASK_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpordpd", IX86_BUILTIN_CMPORDPD, ORDERED, 0 },
15202 { MASK_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpeqsd", IX86_BUILTIN_CMPEQSD, EQ, 0 },
15203 { MASK_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpltsd", IX86_BUILTIN_CMPLTSD, LT, 0 },
15204 { MASK_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmplesd", IX86_BUILTIN_CMPLESD, LE, 0 },
15205 { MASK_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpunordsd", IX86_BUILTIN_CMPUNORDSD, UNORDERED, 0 },
15206 { MASK_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpneqsd", IX86_BUILTIN_CMPNEQSD, NE, 0 },
15207 { MASK_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpnltsd", IX86_BUILTIN_CMPNLTSD, UNGE, 0 },
15208 { MASK_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpnlesd", IX86_BUILTIN_CMPNLESD, UNGT, 0 },
15209 { MASK_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpordsd", IX86_BUILTIN_CMPORDSD, ORDERED, 0 },
15211 { MASK_SSE2, CODE_FOR_sminv2df3, "__builtin_ia32_minpd", IX86_BUILTIN_MINPD, 0, 0 },
15212 { MASK_SSE2, CODE_FOR_smaxv2df3, "__builtin_ia32_maxpd", IX86_BUILTIN_MAXPD, 0, 0 },
15213 { MASK_SSE2, CODE_FOR_sse2_vmsminv2df3, "__builtin_ia32_minsd", IX86_BUILTIN_MINSD, 0, 0 },
15214 { MASK_SSE2, CODE_FOR_sse2_vmsmaxv2df3, "__builtin_ia32_maxsd", IX86_BUILTIN_MAXSD, 0, 0 },
15216 { MASK_SSE2, CODE_FOR_andv2df3, "__builtin_ia32_andpd", IX86_BUILTIN_ANDPD, 0, 0 },
15217 { MASK_SSE2, CODE_FOR_sse2_nandv2df3, "__builtin_ia32_andnpd", IX86_BUILTIN_ANDNPD, 0, 0 },
15218 { MASK_SSE2, CODE_FOR_iorv2df3, "__builtin_ia32_orpd", IX86_BUILTIN_ORPD, 0, 0 },
15219 { MASK_SSE2, CODE_FOR_xorv2df3, "__builtin_ia32_xorpd", IX86_BUILTIN_XORPD, 0, 0 },
15221 { MASK_SSE2, CODE_FOR_sse2_movsd, "__builtin_ia32_movsd", IX86_BUILTIN_MOVSD, 0, 0 },
15222 { MASK_SSE2, CODE_FOR_sse2_unpckhpd, "__builtin_ia32_unpckhpd", IX86_BUILTIN_UNPCKHPD, 0, 0 },
15223 { MASK_SSE2, CODE_FOR_sse2_unpcklpd, "__builtin_ia32_unpcklpd", IX86_BUILTIN_UNPCKLPD, 0, 0 },
15226 { MASK_SSE2, CODE_FOR_addv16qi3, "__builtin_ia32_paddb128", IX86_BUILTIN_PADDB128, 0, 0 },
15227 { MASK_SSE2, CODE_FOR_addv8hi3, "__builtin_ia32_paddw128", IX86_BUILTIN_PADDW128, 0, 0 },
15228 { MASK_SSE2, CODE_FOR_addv4si3, "__builtin_ia32_paddd128", IX86_BUILTIN_PADDD128, 0, 0 },
15229 { MASK_SSE2, CODE_FOR_addv2di3, "__builtin_ia32_paddq128", IX86_BUILTIN_PADDQ128, 0, 0 },
15230 { MASK_SSE2, CODE_FOR_subv16qi3, "__builtin_ia32_psubb128", IX86_BUILTIN_PSUBB128, 0, 0 },
15231 { MASK_SSE2, CODE_FOR_subv8hi3, "__builtin_ia32_psubw128", IX86_BUILTIN_PSUBW128, 0, 0 },
15232 { MASK_SSE2, CODE_FOR_subv4si3, "__builtin_ia32_psubd128", IX86_BUILTIN_PSUBD128, 0, 0 },
15233 { MASK_SSE2, CODE_FOR_subv2di3, "__builtin_ia32_psubq128", IX86_BUILTIN_PSUBQ128, 0, 0 },
15235 { MASK_MMX, CODE_FOR_sse2_ssaddv16qi3, "__builtin_ia32_paddsb128", IX86_BUILTIN_PADDSB128, 0, 0 },
15236 { MASK_MMX, CODE_FOR_sse2_ssaddv8hi3, "__builtin_ia32_paddsw128", IX86_BUILTIN_PADDSW128, 0, 0 },
15237 { MASK_MMX, CODE_FOR_sse2_sssubv16qi3, "__builtin_ia32_psubsb128", IX86_BUILTIN_PSUBSB128, 0, 0 },
15238 { MASK_MMX, CODE_FOR_sse2_sssubv8hi3, "__builtin_ia32_psubsw128", IX86_BUILTIN_PSUBSW128, 0, 0 },
15239 { MASK_MMX, CODE_FOR_sse2_usaddv16qi3, "__builtin_ia32_paddusb128", IX86_BUILTIN_PADDUSB128, 0, 0 },
15240 { MASK_MMX, CODE_FOR_sse2_usaddv8hi3, "__builtin_ia32_paddusw128", IX86_BUILTIN_PADDUSW128, 0, 0 },
15241 { MASK_MMX, CODE_FOR_sse2_ussubv16qi3, "__builtin_ia32_psubusb128", IX86_BUILTIN_PSUBUSB128, 0, 0 },
15242 { MASK_MMX, CODE_FOR_sse2_ussubv8hi3, "__builtin_ia32_psubusw128", IX86_BUILTIN_PSUBUSW128, 0, 0 },
15244 { MASK_SSE2, CODE_FOR_mulv8hi3, "__builtin_ia32_pmullw128", IX86_BUILTIN_PMULLW128, 0, 0 },
15245 { MASK_SSE2, CODE_FOR_sse2_smulv8hi3_highpart, "__builtin_ia32_pmulhw128", IX86_BUILTIN_PMULHW128, 0, 0 },
15247 { MASK_SSE2, CODE_FOR_andv2di3, "__builtin_ia32_pand128", IX86_BUILTIN_PAND128, 0, 0 },
15248 { MASK_SSE2, CODE_FOR_sse2_nandv2di3, "__builtin_ia32_pandn128", IX86_BUILTIN_PANDN128, 0, 0 },
15249 { MASK_SSE2, CODE_FOR_iorv2di3, "__builtin_ia32_por128", IX86_BUILTIN_POR128, 0, 0 },
15250 { MASK_SSE2, CODE_FOR_xorv2di3, "__builtin_ia32_pxor128", IX86_BUILTIN_PXOR128, 0, 0 },
15252 { MASK_SSE2, CODE_FOR_sse2_uavgv16qi3, "__builtin_ia32_pavgb128", IX86_BUILTIN_PAVGB128, 0, 0 },
15253 { MASK_SSE2, CODE_FOR_sse2_uavgv8hi3, "__builtin_ia32_pavgw128", IX86_BUILTIN_PAVGW128, 0, 0 },
15255 { MASK_SSE2, CODE_FOR_sse2_eqv16qi3, "__builtin_ia32_pcmpeqb128", IX86_BUILTIN_PCMPEQB128, 0, 0 },
15256 { MASK_SSE2, CODE_FOR_sse2_eqv8hi3, "__builtin_ia32_pcmpeqw128", IX86_BUILTIN_PCMPEQW128, 0, 0 },
15257 { MASK_SSE2, CODE_FOR_sse2_eqv4si3, "__builtin_ia32_pcmpeqd128", IX86_BUILTIN_PCMPEQD128, 0, 0 },
15258 { MASK_SSE2, CODE_FOR_sse2_gtv16qi3, "__builtin_ia32_pcmpgtb128", IX86_BUILTIN_PCMPGTB128, 0, 0 },
15259 { MASK_SSE2, CODE_FOR_sse2_gtv8hi3, "__builtin_ia32_pcmpgtw128", IX86_BUILTIN_PCMPGTW128, 0, 0 },
15260 { MASK_SSE2, CODE_FOR_sse2_gtv4si3, "__builtin_ia32_pcmpgtd128", IX86_BUILTIN_PCMPGTD128, 0, 0 },
15262 { MASK_SSE2, CODE_FOR_umaxv16qi3, "__builtin_ia32_pmaxub128", IX86_BUILTIN_PMAXUB128, 0, 0 },
15263 { MASK_SSE2, CODE_FOR_smaxv8hi3, "__builtin_ia32_pmaxsw128", IX86_BUILTIN_PMAXSW128, 0, 0 },
15264 { MASK_SSE2, CODE_FOR_uminv16qi3, "__builtin_ia32_pminub128", IX86_BUILTIN_PMINUB128, 0, 0 },
15265 { MASK_SSE2, CODE_FOR_sminv8hi3, "__builtin_ia32_pminsw128", IX86_BUILTIN_PMINSW128, 0, 0 },
15267 { MASK_SSE2, CODE_FOR_sse2_punpckhbw, "__builtin_ia32_punpckhbw128", IX86_BUILTIN_PUNPCKHBW128, 0, 0 },
15268 { MASK_SSE2, CODE_FOR_sse2_punpckhwd, "__builtin_ia32_punpckhwd128", IX86_BUILTIN_PUNPCKHWD128, 0, 0 },
15269 { MASK_SSE2, CODE_FOR_sse2_punpckhdq, "__builtin_ia32_punpckhdq128", IX86_BUILTIN_PUNPCKHDQ128, 0, 0 },
15270 { MASK_SSE2, CODE_FOR_sse2_punpckhqdq, "__builtin_ia32_punpckhqdq128", IX86_BUILTIN_PUNPCKHQDQ128, 0, 0 },
15271 { MASK_SSE2, CODE_FOR_sse2_punpcklbw, "__builtin_ia32_punpcklbw128", IX86_BUILTIN_PUNPCKLBW128, 0, 0 },
15272 { MASK_SSE2, CODE_FOR_sse2_punpcklwd, "__builtin_ia32_punpcklwd128", IX86_BUILTIN_PUNPCKLWD128, 0, 0 },
15273 { MASK_SSE2, CODE_FOR_sse2_punpckldq, "__builtin_ia32_punpckldq128", IX86_BUILTIN_PUNPCKLDQ128, 0, 0 },
15274 { MASK_SSE2, CODE_FOR_sse2_punpcklqdq, "__builtin_ia32_punpcklqdq128", IX86_BUILTIN_PUNPCKLQDQ128, 0, 0 },
15276 { MASK_SSE2, CODE_FOR_sse2_packsswb, "__builtin_ia32_packsswb128", IX86_BUILTIN_PACKSSWB128, 0, 0 },
15277 { MASK_SSE2, CODE_FOR_sse2_packssdw, "__builtin_ia32_packssdw128", IX86_BUILTIN_PACKSSDW128, 0, 0 },
15278 { MASK_SSE2, CODE_FOR_sse2_packuswb, "__builtin_ia32_packuswb128", IX86_BUILTIN_PACKUSWB128, 0, 0 },
15280 { MASK_SSE2, CODE_FOR_sse2_umulv8hi3_highpart, "__builtin_ia32_pmulhuw128", IX86_BUILTIN_PMULHUW128, 0, 0 },
15281 { MASK_SSE2, CODE_FOR_sse2_psadbw, 0, IX86_BUILTIN_PSADBW128, 0, 0 },
15283 { MASK_SSE2, CODE_FOR_sse2_umulsidi3, 0, IX86_BUILTIN_PMULUDQ, 0, 0 },
15284 { MASK_SSE2, CODE_FOR_sse2_umulv2siv2di3, 0, IX86_BUILTIN_PMULUDQ128, 0, 0 },
15286 { MASK_SSE2, CODE_FOR_ashlv8hi3, 0, IX86_BUILTIN_PSLLWI128, 0, 0 },
15287 { MASK_SSE2, CODE_FOR_ashlv4si3, 0, IX86_BUILTIN_PSLLDI128, 0, 0 },
15288 { MASK_SSE2, CODE_FOR_ashlv2di3, 0, IX86_BUILTIN_PSLLQI128, 0, 0 },
15290 { MASK_SSE2, CODE_FOR_lshrv8hi3, 0, IX86_BUILTIN_PSRLWI128, 0, 0 },
15291 { MASK_SSE2, CODE_FOR_lshrv4si3, 0, IX86_BUILTIN_PSRLDI128, 0, 0 },
15292 { MASK_SSE2, CODE_FOR_lshrv2di3, 0, IX86_BUILTIN_PSRLQI128, 0, 0 },
15294 { MASK_SSE2, CODE_FOR_ashrv8hi3, 0, IX86_BUILTIN_PSRAWI128, 0, 0 },
15295 { MASK_SSE2, CODE_FOR_ashrv4si3, 0, IX86_BUILTIN_PSRADI128, 0, 0 },
15297 { MASK_SSE2, CODE_FOR_sse2_pmaddwd, 0, IX86_BUILTIN_PMADDWD128, 0, 0 },
15299 { MASK_SSE2, CODE_FOR_sse2_cvtsi2sd, 0, IX86_BUILTIN_CVTSI2SD, 0, 0 },
15300 { MASK_SSE2 | MASK_64BIT, CODE_FOR_sse2_cvtsi2sdq, 0, IX86_BUILTIN_CVTSI642SD, 0, 0 },
15301 { MASK_SSE2, CODE_FOR_sse2_cvtsd2ss, 0, IX86_BUILTIN_CVTSD2SS, 0, 0 },
15302 { MASK_SSE2, CODE_FOR_sse2_cvtss2sd, 0, IX86_BUILTIN_CVTSS2SD, 0, 0 },
15305 { MASK_SSE3, CODE_FOR_sse3_addsubv4sf3, "__builtin_ia32_addsubps", IX86_BUILTIN_ADDSUBPS, 0, 0 },
15306 { MASK_SSE3, CODE_FOR_sse3_addsubv2df3, "__builtin_ia32_addsubpd", IX86_BUILTIN_ADDSUBPD, 0, 0 },
15307 { MASK_SSE3, CODE_FOR_sse3_haddv4sf3, "__builtin_ia32_haddps", IX86_BUILTIN_HADDPS, 0, 0 },
15308 { MASK_SSE3, CODE_FOR_sse3_haddv2df3, "__builtin_ia32_haddpd", IX86_BUILTIN_HADDPD, 0, 0 },
15309 { MASK_SSE3, CODE_FOR_sse3_hsubv4sf3, "__builtin_ia32_hsubps", IX86_BUILTIN_HSUBPS, 0, 0 },
15310 { MASK_SSE3, CODE_FOR_sse3_hsubv2df3, "__builtin_ia32_hsubpd", IX86_BUILTIN_HSUBPD, 0, 0 },
15313 { MASK_SSSE3, CODE_FOR_ssse3_phaddwv8hi3, "__builtin_ia32_phaddw128", IX86_BUILTIN_PHADDW128, 0, 0 },
15314 { MASK_SSSE3, CODE_FOR_ssse3_phaddwv4hi3, "__builtin_ia32_phaddw", IX86_BUILTIN_PHADDW, 0, 0 },
15315 { MASK_SSSE3, CODE_FOR_ssse3_phadddv4si3, "__builtin_ia32_phaddd128", IX86_BUILTIN_PHADDD128, 0, 0 },
15316 { MASK_SSSE3, CODE_FOR_ssse3_phadddv2si3, "__builtin_ia32_phaddd", IX86_BUILTIN_PHADDD, 0, 0 },
15317 { MASK_SSSE3, CODE_FOR_ssse3_phaddswv8hi3, "__builtin_ia32_phaddsw128", IX86_BUILTIN_PHADDSW128, 0, 0 },
15318 { MASK_SSSE3, CODE_FOR_ssse3_phaddswv4hi3, "__builtin_ia32_phaddsw", IX86_BUILTIN_PHADDSW, 0, 0 },
15319 { MASK_SSSE3, CODE_FOR_ssse3_phsubwv8hi3, "__builtin_ia32_phsubw128", IX86_BUILTIN_PHSUBW128, 0, 0 },
15320 { MASK_SSSE3, CODE_FOR_ssse3_phsubwv4hi3, "__builtin_ia32_phsubw", IX86_BUILTIN_PHSUBW, 0, 0 },
15321 { MASK_SSSE3, CODE_FOR_ssse3_phsubdv4si3, "__builtin_ia32_phsubd128", IX86_BUILTIN_PHSUBD128, 0, 0 },
15322 { MASK_SSSE3, CODE_FOR_ssse3_phsubdv2si3, "__builtin_ia32_phsubd", IX86_BUILTIN_PHSUBD, 0, 0 },
15323 { MASK_SSSE3, CODE_FOR_ssse3_phsubswv8hi3, "__builtin_ia32_phsubsw128", IX86_BUILTIN_PHSUBSW128, 0, 0 },
15324 { MASK_SSSE3, CODE_FOR_ssse3_phsubswv4hi3, "__builtin_ia32_phsubsw", IX86_BUILTIN_PHSUBSW, 0, 0 },
15325 { MASK_SSSE3, CODE_FOR_ssse3_pmaddubswv8hi3, "__builtin_ia32_pmaddubsw128", IX86_BUILTIN_PMADDUBSW128, 0, 0 },
15326 { MASK_SSSE3, CODE_FOR_ssse3_pmaddubswv4hi3, "__builtin_ia32_pmaddubsw", IX86_BUILTIN_PMADDUBSW, 0, 0 },
15327 { MASK_SSSE3, CODE_FOR_ssse3_pmulhrswv8hi3, "__builtin_ia32_pmulhrsw128", IX86_BUILTIN_PMULHRSW128, 0, 0 },
15328 { MASK_SSSE3, CODE_FOR_ssse3_pmulhrswv4hi3, "__builtin_ia32_pmulhrsw", IX86_BUILTIN_PMULHRSW, 0, 0 },
15329 { MASK_SSSE3, CODE_FOR_ssse3_pshufbv16qi3, "__builtin_ia32_pshufb128", IX86_BUILTIN_PSHUFB128, 0, 0 },
15330 { MASK_SSSE3, CODE_FOR_ssse3_pshufbv8qi3, "__builtin_ia32_pshufb", IX86_BUILTIN_PSHUFB, 0, 0 },
15331 { MASK_SSSE3, CODE_FOR_ssse3_psignv16qi3, "__builtin_ia32_psignb128", IX86_BUILTIN_PSIGNB128, 0, 0 },
15332 { MASK_SSSE3, CODE_FOR_ssse3_psignv8qi3, "__builtin_ia32_psignb", IX86_BUILTIN_PSIGNB, 0, 0 },
15333 { MASK_SSSE3, CODE_FOR_ssse3_psignv8hi3, "__builtin_ia32_psignw128", IX86_BUILTIN_PSIGNW128, 0, 0 },
15334 { MASK_SSSE3, CODE_FOR_ssse3_psignv4hi3, "__builtin_ia32_psignw", IX86_BUILTIN_PSIGNW, 0, 0 },
15335 { MASK_SSSE3, CODE_FOR_ssse3_psignv4si3, "__builtin_ia32_psignd128", IX86_BUILTIN_PSIGND128, 0, 0 },
15336 { MASK_SSSE3, CODE_FOR_ssse3_psignv2si3, "__builtin_ia32_psignd", IX86_BUILTIN_PSIGND, 0, 0 }
15339 static const struct builtin_description bdesc_1arg[] =
15341 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_pmovmskb, 0, IX86_BUILTIN_PMOVMSKB, 0, 0 },
15342 { MASK_SSE, CODE_FOR_sse_movmskps, 0, IX86_BUILTIN_MOVMSKPS, 0, 0 },
15344 { MASK_SSE, CODE_FOR_sqrtv4sf2, 0, IX86_BUILTIN_SQRTPS, 0, 0 },
15345 { MASK_SSE, CODE_FOR_sse_rsqrtv4sf2, 0, IX86_BUILTIN_RSQRTPS, 0, 0 },
15346 { MASK_SSE, CODE_FOR_sse_rcpv4sf2, 0, IX86_BUILTIN_RCPPS, 0, 0 },
15348 { MASK_SSE, CODE_FOR_sse_cvtps2pi, 0, IX86_BUILTIN_CVTPS2PI, 0, 0 },
15349 { MASK_SSE, CODE_FOR_sse_cvtss2si, 0, IX86_BUILTIN_CVTSS2SI, 0, 0 },
15350 { MASK_SSE | MASK_64BIT, CODE_FOR_sse_cvtss2siq, 0, IX86_BUILTIN_CVTSS2SI64, 0, 0 },
15351 { MASK_SSE, CODE_FOR_sse_cvttps2pi, 0, IX86_BUILTIN_CVTTPS2PI, 0, 0 },
15352 { MASK_SSE, CODE_FOR_sse_cvttss2si, 0, IX86_BUILTIN_CVTTSS2SI, 0, 0 },
15353 { MASK_SSE | MASK_64BIT, CODE_FOR_sse_cvttss2siq, 0, IX86_BUILTIN_CVTTSS2SI64, 0, 0 },
15355 { MASK_SSE2, CODE_FOR_sse2_pmovmskb, 0, IX86_BUILTIN_PMOVMSKB128, 0, 0 },
15356 { MASK_SSE2, CODE_FOR_sse2_movmskpd, 0, IX86_BUILTIN_MOVMSKPD, 0, 0 },
15358 { MASK_SSE2, CODE_FOR_sqrtv2df2, 0, IX86_BUILTIN_SQRTPD, 0, 0 },
15360 { MASK_SSE2, CODE_FOR_sse2_cvtdq2pd, 0, IX86_BUILTIN_CVTDQ2PD, 0, 0 },
15361 { MASK_SSE2, CODE_FOR_sse2_cvtdq2ps, 0, IX86_BUILTIN_CVTDQ2PS, 0, 0 },
15363 { MASK_SSE2, CODE_FOR_sse2_cvtpd2dq, 0, IX86_BUILTIN_CVTPD2DQ, 0, 0 },
15364 { MASK_SSE2, CODE_FOR_sse2_cvtpd2pi, 0, IX86_BUILTIN_CVTPD2PI, 0, 0 },
15365 { MASK_SSE2, CODE_FOR_sse2_cvtpd2ps, 0, IX86_BUILTIN_CVTPD2PS, 0, 0 },
15366 { MASK_SSE2, CODE_FOR_sse2_cvttpd2dq, 0, IX86_BUILTIN_CVTTPD2DQ, 0, 0 },
15367 { MASK_SSE2, CODE_FOR_sse2_cvttpd2pi, 0, IX86_BUILTIN_CVTTPD2PI, 0, 0 },
15369 { MASK_SSE2, CODE_FOR_sse2_cvtpi2pd, 0, IX86_BUILTIN_CVTPI2PD, 0, 0 },
15371 { MASK_SSE2, CODE_FOR_sse2_cvtsd2si, 0, IX86_BUILTIN_CVTSD2SI, 0, 0 },
15372 { MASK_SSE2, CODE_FOR_sse2_cvttsd2si, 0, IX86_BUILTIN_CVTTSD2SI, 0, 0 },
15373 { MASK_SSE2 | MASK_64BIT, CODE_FOR_sse2_cvtsd2siq, 0, IX86_BUILTIN_CVTSD2SI64, 0, 0 },
15374 { MASK_SSE2 | MASK_64BIT, CODE_FOR_sse2_cvttsd2siq, 0, IX86_BUILTIN_CVTTSD2SI64, 0, 0 },
15376 { MASK_SSE2, CODE_FOR_sse2_cvtps2dq, 0, IX86_BUILTIN_CVTPS2DQ, 0, 0 },
15377 { MASK_SSE2, CODE_FOR_sse2_cvtps2pd, 0, IX86_BUILTIN_CVTPS2PD, 0, 0 },
15378 { MASK_SSE2, CODE_FOR_sse2_cvttps2dq, 0, IX86_BUILTIN_CVTTPS2DQ, 0, 0 },
15381 { MASK_SSE3, CODE_FOR_sse3_movshdup, 0, IX86_BUILTIN_MOVSHDUP, 0, 0 },
15382 { MASK_SSE3, CODE_FOR_sse3_movsldup, 0, IX86_BUILTIN_MOVSLDUP, 0, 0 },
15385 { MASK_SSSE3, CODE_FOR_absv16qi2, "__builtin_ia32_pabsb128", IX86_BUILTIN_PABSB128, 0, 0 },
15386 { MASK_SSSE3, CODE_FOR_absv8qi2, "__builtin_ia32_pabsb", IX86_BUILTIN_PABSB, 0, 0 },
15387 { MASK_SSSE3, CODE_FOR_absv8hi2, "__builtin_ia32_pabsw128", IX86_BUILTIN_PABSW128, 0, 0 },
15388 { MASK_SSSE3, CODE_FOR_absv4hi2, "__builtin_ia32_pabsw", IX86_BUILTIN_PABSW, 0, 0 },
15389 { MASK_SSSE3, CODE_FOR_absv4si2, "__builtin_ia32_pabsd128", IX86_BUILTIN_PABSD128, 0, 0 },
15390 { MASK_SSSE3, CODE_FOR_absv2si2, "__builtin_ia32_pabsd", IX86_BUILTIN_PABSD, 0, 0 },
15394 ix86_init_builtins (void)
15397 ix86_init_mmx_sse_builtins ();
15400 /* Set up all the MMX/SSE builtins. This is not called if TARGET_MMX
15401 is zero. Otherwise, if TARGET_SSE is not set, only expand the MMX
15404 ix86_init_mmx_sse_builtins (void)
15406 const struct builtin_description * d;
15409 tree V16QI_type_node = build_vector_type_for_mode (intQI_type_node, V16QImode);
15410 tree V2SI_type_node = build_vector_type_for_mode (intSI_type_node, V2SImode);
15411 tree V2SF_type_node = build_vector_type_for_mode (float_type_node, V2SFmode);
15412 tree V2DI_type_node
15413 = build_vector_type_for_mode (long_long_integer_type_node, V2DImode);
15414 tree V2DF_type_node = build_vector_type_for_mode (double_type_node, V2DFmode);
15415 tree V4SF_type_node = build_vector_type_for_mode (float_type_node, V4SFmode);
15416 tree V4SI_type_node = build_vector_type_for_mode (intSI_type_node, V4SImode);
15417 tree V4HI_type_node = build_vector_type_for_mode (intHI_type_node, V4HImode);
15418 tree V8QI_type_node = build_vector_type_for_mode (intQI_type_node, V8QImode);
15419 tree V8HI_type_node = build_vector_type_for_mode (intHI_type_node, V8HImode);
15421 tree pchar_type_node = build_pointer_type (char_type_node);
15422 tree pcchar_type_node = build_pointer_type (
15423 build_type_variant (char_type_node, 1, 0));
15424 tree pfloat_type_node = build_pointer_type (float_type_node);
15425 tree pcfloat_type_node = build_pointer_type (
15426 build_type_variant (float_type_node, 1, 0));
15427 tree pv2si_type_node = build_pointer_type (V2SI_type_node);
15428 tree pv2di_type_node = build_pointer_type (V2DI_type_node);
15429 tree pdi_type_node = build_pointer_type (long_long_unsigned_type_node);
15432 tree int_ftype_v4sf_v4sf
15433 = build_function_type_list (integer_type_node,
15434 V4SF_type_node, V4SF_type_node, NULL_TREE);
15435 tree v4si_ftype_v4sf_v4sf
15436 = build_function_type_list (V4SI_type_node,
15437 V4SF_type_node, V4SF_type_node, NULL_TREE);
15438 /* MMX/SSE/integer conversions. */
15439 tree int_ftype_v4sf
15440 = build_function_type_list (integer_type_node,
15441 V4SF_type_node, NULL_TREE);
15442 tree int64_ftype_v4sf
15443 = build_function_type_list (long_long_integer_type_node,
15444 V4SF_type_node, NULL_TREE);
15445 tree int_ftype_v8qi
15446 = build_function_type_list (integer_type_node, V8QI_type_node, NULL_TREE);
15447 tree v4sf_ftype_v4sf_int
15448 = build_function_type_list (V4SF_type_node,
15449 V4SF_type_node, integer_type_node, NULL_TREE);
15450 tree v4sf_ftype_v4sf_int64
15451 = build_function_type_list (V4SF_type_node,
15452 V4SF_type_node, long_long_integer_type_node,
15454 tree v4sf_ftype_v4sf_v2si
15455 = build_function_type_list (V4SF_type_node,
15456 V4SF_type_node, V2SI_type_node, NULL_TREE);
15458 /* Miscellaneous. */
15459 tree v8qi_ftype_v4hi_v4hi
15460 = build_function_type_list (V8QI_type_node,
15461 V4HI_type_node, V4HI_type_node, NULL_TREE);
15462 tree v4hi_ftype_v2si_v2si
15463 = build_function_type_list (V4HI_type_node,
15464 V2SI_type_node, V2SI_type_node, NULL_TREE);
15465 tree v4sf_ftype_v4sf_v4sf_int
15466 = build_function_type_list (V4SF_type_node,
15467 V4SF_type_node, V4SF_type_node,
15468 integer_type_node, NULL_TREE);
15469 tree v2si_ftype_v4hi_v4hi
15470 = build_function_type_list (V2SI_type_node,
15471 V4HI_type_node, V4HI_type_node, NULL_TREE);
15472 tree v4hi_ftype_v4hi_int
15473 = build_function_type_list (V4HI_type_node,
15474 V4HI_type_node, integer_type_node, NULL_TREE);
15475 tree v4hi_ftype_v4hi_di
15476 = build_function_type_list (V4HI_type_node,
15477 V4HI_type_node, long_long_unsigned_type_node,
15479 tree v2si_ftype_v2si_di
15480 = build_function_type_list (V2SI_type_node,
15481 V2SI_type_node, long_long_unsigned_type_node,
15483 tree void_ftype_void
15484 = build_function_type (void_type_node, void_list_node);
15485 tree void_ftype_unsigned
15486 = build_function_type_list (void_type_node, unsigned_type_node, NULL_TREE);
15487 tree void_ftype_unsigned_unsigned
15488 = build_function_type_list (void_type_node, unsigned_type_node,
15489 unsigned_type_node, NULL_TREE);
15490 tree void_ftype_pcvoid_unsigned_unsigned
15491 = build_function_type_list (void_type_node, const_ptr_type_node,
15492 unsigned_type_node, unsigned_type_node,
15494 tree unsigned_ftype_void
15495 = build_function_type (unsigned_type_node, void_list_node);
15496 tree v2si_ftype_v4sf
15497 = build_function_type_list (V2SI_type_node, V4SF_type_node, NULL_TREE);
15498 /* Loads/stores. */
15499 tree void_ftype_v8qi_v8qi_pchar
15500 = build_function_type_list (void_type_node,
15501 V8QI_type_node, V8QI_type_node,
15502 pchar_type_node, NULL_TREE);
15503 tree v4sf_ftype_pcfloat
15504 = build_function_type_list (V4SF_type_node, pcfloat_type_node, NULL_TREE);
15505 /* @@@ the type is bogus */
15506 tree v4sf_ftype_v4sf_pv2si
15507 = build_function_type_list (V4SF_type_node,
15508 V4SF_type_node, pv2si_type_node, NULL_TREE);
15509 tree void_ftype_pv2si_v4sf
15510 = build_function_type_list (void_type_node,
15511 pv2si_type_node, V4SF_type_node, NULL_TREE);
15512 tree void_ftype_pfloat_v4sf
15513 = build_function_type_list (void_type_node,
15514 pfloat_type_node, V4SF_type_node, NULL_TREE);
15515 tree void_ftype_pdi_di
15516 = build_function_type_list (void_type_node,
15517 pdi_type_node, long_long_unsigned_type_node,
15519 tree void_ftype_pv2di_v2di
15520 = build_function_type_list (void_type_node,
15521 pv2di_type_node, V2DI_type_node, NULL_TREE);
15522 /* Normal vector unops. */
15523 tree v4sf_ftype_v4sf
15524 = build_function_type_list (V4SF_type_node, V4SF_type_node, NULL_TREE);
15525 tree v16qi_ftype_v16qi
15526 = build_function_type_list (V16QI_type_node, V16QI_type_node, NULL_TREE);
15527 tree v8hi_ftype_v8hi
15528 = build_function_type_list (V8HI_type_node, V8HI_type_node, NULL_TREE);
15529 tree v4si_ftype_v4si
15530 = build_function_type_list (V4SI_type_node, V4SI_type_node, NULL_TREE);
15531 tree v8qi_ftype_v8qi
15532 = build_function_type_list (V8QI_type_node, V8QI_type_node, NULL_TREE);
15533 tree v4hi_ftype_v4hi
15534 = build_function_type_list (V4HI_type_node, V4HI_type_node, NULL_TREE);
15536 /* Normal vector binops. */
15537 tree v4sf_ftype_v4sf_v4sf
15538 = build_function_type_list (V4SF_type_node,
15539 V4SF_type_node, V4SF_type_node, NULL_TREE);
15540 tree v8qi_ftype_v8qi_v8qi
15541 = build_function_type_list (V8QI_type_node,
15542 V8QI_type_node, V8QI_type_node, NULL_TREE);
15543 tree v4hi_ftype_v4hi_v4hi
15544 = build_function_type_list (V4HI_type_node,
15545 V4HI_type_node, V4HI_type_node, NULL_TREE);
15546 tree v2si_ftype_v2si_v2si
15547 = build_function_type_list (V2SI_type_node,
15548 V2SI_type_node, V2SI_type_node, NULL_TREE);
15549 tree di_ftype_di_di
15550 = build_function_type_list (long_long_unsigned_type_node,
15551 long_long_unsigned_type_node,
15552 long_long_unsigned_type_node, NULL_TREE);
15554 tree di_ftype_di_di_int
15555 = build_function_type_list (long_long_unsigned_type_node,
15556 long_long_unsigned_type_node,
15557 long_long_unsigned_type_node,
15558 integer_type_node, NULL_TREE);
15560 tree v2si_ftype_v2sf
15561 = build_function_type_list (V2SI_type_node, V2SF_type_node, NULL_TREE);
15562 tree v2sf_ftype_v2si
15563 = build_function_type_list (V2SF_type_node, V2SI_type_node, NULL_TREE);
15564 tree v2si_ftype_v2si
15565 = build_function_type_list (V2SI_type_node, V2SI_type_node, NULL_TREE);
15566 tree v2sf_ftype_v2sf
15567 = build_function_type_list (V2SF_type_node, V2SF_type_node, NULL_TREE);
15568 tree v2sf_ftype_v2sf_v2sf
15569 = build_function_type_list (V2SF_type_node,
15570 V2SF_type_node, V2SF_type_node, NULL_TREE);
15571 tree v2si_ftype_v2sf_v2sf
15572 = build_function_type_list (V2SI_type_node,
15573 V2SF_type_node, V2SF_type_node, NULL_TREE);
15574 tree pint_type_node = build_pointer_type (integer_type_node);
15575 tree pdouble_type_node = build_pointer_type (double_type_node);
15576 tree pcdouble_type_node = build_pointer_type (
15577 build_type_variant (double_type_node, 1, 0));
15578 tree int_ftype_v2df_v2df
15579 = build_function_type_list (integer_type_node,
15580 V2DF_type_node, V2DF_type_node, NULL_TREE);
15582 tree void_ftype_pcvoid
15583 = build_function_type_list (void_type_node, const_ptr_type_node, NULL_TREE);
15584 tree v4sf_ftype_v4si
15585 = build_function_type_list (V4SF_type_node, V4SI_type_node, NULL_TREE);
15586 tree v4si_ftype_v4sf
15587 = build_function_type_list (V4SI_type_node, V4SF_type_node, NULL_TREE);
15588 tree v2df_ftype_v4si
15589 = build_function_type_list (V2DF_type_node, V4SI_type_node, NULL_TREE);
15590 tree v4si_ftype_v2df
15591 = build_function_type_list (V4SI_type_node, V2DF_type_node, NULL_TREE);
15592 tree v2si_ftype_v2df
15593 = build_function_type_list (V2SI_type_node, V2DF_type_node, NULL_TREE);
15594 tree v4sf_ftype_v2df
15595 = build_function_type_list (V4SF_type_node, V2DF_type_node, NULL_TREE);
15596 tree v2df_ftype_v2si
15597 = build_function_type_list (V2DF_type_node, V2SI_type_node, NULL_TREE);
15598 tree v2df_ftype_v4sf
15599 = build_function_type_list (V2DF_type_node, V4SF_type_node, NULL_TREE);
15600 tree int_ftype_v2df
15601 = build_function_type_list (integer_type_node, V2DF_type_node, NULL_TREE);
15602 tree int64_ftype_v2df
15603 = build_function_type_list (long_long_integer_type_node,
15604 V2DF_type_node, NULL_TREE);
15605 tree v2df_ftype_v2df_int
15606 = build_function_type_list (V2DF_type_node,
15607 V2DF_type_node, integer_type_node, NULL_TREE);
15608 tree v2df_ftype_v2df_int64
15609 = build_function_type_list (V2DF_type_node,
15610 V2DF_type_node, long_long_integer_type_node,
15612 tree v4sf_ftype_v4sf_v2df
15613 = build_function_type_list (V4SF_type_node,
15614 V4SF_type_node, V2DF_type_node, NULL_TREE);
15615 tree v2df_ftype_v2df_v4sf
15616 = build_function_type_list (V2DF_type_node,
15617 V2DF_type_node, V4SF_type_node, NULL_TREE);
15618 tree v2df_ftype_v2df_v2df_int
15619 = build_function_type_list (V2DF_type_node,
15620 V2DF_type_node, V2DF_type_node,
15623 tree v2df_ftype_v2df_pcdouble
15624 = build_function_type_list (V2DF_type_node,
15625 V2DF_type_node, pcdouble_type_node, NULL_TREE);
15626 tree void_ftype_pdouble_v2df
15627 = build_function_type_list (void_type_node,
15628 pdouble_type_node, V2DF_type_node, NULL_TREE);
15629 tree void_ftype_pint_int
15630 = build_function_type_list (void_type_node,
15631 pint_type_node, integer_type_node, NULL_TREE);
15632 tree void_ftype_v16qi_v16qi_pchar
15633 = build_function_type_list (void_type_node,
15634 V16QI_type_node, V16QI_type_node,
15635 pchar_type_node, NULL_TREE);
15636 tree v2df_ftype_pcdouble
15637 = build_function_type_list (V2DF_type_node, pcdouble_type_node, NULL_TREE);
15638 tree v2df_ftype_v2df_v2df
15639 = build_function_type_list (V2DF_type_node,
15640 V2DF_type_node, V2DF_type_node, NULL_TREE);
15641 tree v16qi_ftype_v16qi_v16qi
15642 = build_function_type_list (V16QI_type_node,
15643 V16QI_type_node, V16QI_type_node, NULL_TREE);
15644 tree v8hi_ftype_v8hi_v8hi
15645 = build_function_type_list (V8HI_type_node,
15646 V8HI_type_node, V8HI_type_node, NULL_TREE);
15647 tree v4si_ftype_v4si_v4si
15648 = build_function_type_list (V4SI_type_node,
15649 V4SI_type_node, V4SI_type_node, NULL_TREE);
15650 tree v2di_ftype_v2di_v2di
15651 = build_function_type_list (V2DI_type_node,
15652 V2DI_type_node, V2DI_type_node, NULL_TREE);
15653 tree v2di_ftype_v2df_v2df
15654 = build_function_type_list (V2DI_type_node,
15655 V2DF_type_node, V2DF_type_node, NULL_TREE);
15656 tree v2df_ftype_v2df
15657 = build_function_type_list (V2DF_type_node, V2DF_type_node, NULL_TREE);
15658 tree v2di_ftype_v2di_int
15659 = build_function_type_list (V2DI_type_node,
15660 V2DI_type_node, integer_type_node, NULL_TREE);
15661 tree v2di_ftype_v2di_v2di_int
15662 = build_function_type_list (V2DI_type_node, V2DI_type_node,
15663 V2DI_type_node, integer_type_node, NULL_TREE);
15664 tree v4si_ftype_v4si_int
15665 = build_function_type_list (V4SI_type_node,
15666 V4SI_type_node, integer_type_node, NULL_TREE);
15667 tree v8hi_ftype_v8hi_int
15668 = build_function_type_list (V8HI_type_node,
15669 V8HI_type_node, integer_type_node, NULL_TREE);
15670 tree v4si_ftype_v8hi_v8hi
15671 = build_function_type_list (V4SI_type_node,
15672 V8HI_type_node, V8HI_type_node, NULL_TREE);
15673 tree di_ftype_v8qi_v8qi
15674 = build_function_type_list (long_long_unsigned_type_node,
15675 V8QI_type_node, V8QI_type_node, NULL_TREE);
15676 tree di_ftype_v2si_v2si
15677 = build_function_type_list (long_long_unsigned_type_node,
15678 V2SI_type_node, V2SI_type_node, NULL_TREE);
15679 tree v2di_ftype_v16qi_v16qi
15680 = build_function_type_list (V2DI_type_node,
15681 V16QI_type_node, V16QI_type_node, NULL_TREE);
15682 tree v2di_ftype_v4si_v4si
15683 = build_function_type_list (V2DI_type_node,
15684 V4SI_type_node, V4SI_type_node, NULL_TREE);
15685 tree int_ftype_v16qi
15686 = build_function_type_list (integer_type_node, V16QI_type_node, NULL_TREE);
15687 tree v16qi_ftype_pcchar
15688 = build_function_type_list (V16QI_type_node, pcchar_type_node, NULL_TREE);
15689 tree void_ftype_pchar_v16qi
15690 = build_function_type_list (void_type_node,
15691 pchar_type_node, V16QI_type_node, NULL_TREE);
15693 tree v2di_ftype_v2di_unsigned_unsigned
15694 = build_function_type_list (V2DI_type_node, V2DI_type_node,
15695 unsigned_type_node, unsigned_type_node,
15697 tree v2di_ftype_v2di_v2di_unsigned_unsigned
15698 = build_function_type_list (V2DI_type_node, V2DI_type_node, V2DI_type_node,
15699 unsigned_type_node, unsigned_type_node,
15701 tree v2di_ftype_v2di_v16qi
15702 = build_function_type_list (V2DI_type_node, V2DI_type_node, V16QI_type_node,
15706 tree float128_type;
15709 /* The __float80 type. */
15710 if (TYPE_MODE (long_double_type_node) == XFmode)
15711 (*lang_hooks.types.register_builtin_type) (long_double_type_node,
15715 /* The __float80 type. */
15716 float80_type = make_node (REAL_TYPE);
15717 TYPE_PRECISION (float80_type) = 80;
15718 layout_type (float80_type);
15719 (*lang_hooks.types.register_builtin_type) (float80_type, "__float80");
15724 float128_type = make_node (REAL_TYPE);
15725 TYPE_PRECISION (float128_type) = 128;
15726 layout_type (float128_type);
15727 (*lang_hooks.types.register_builtin_type) (float128_type, "__float128");
15730 /* Add all builtins that are more or less simple operations on two
15732 for (i = 0, d = bdesc_2arg; i < ARRAY_SIZE (bdesc_2arg); i++, d++)
15734 /* Use one of the operands; the target can have a different mode for
15735 mask-generating compares. */
15736 enum machine_mode mode;
15741 mode = insn_data[d->icode].operand[1].mode;
15746 type = v16qi_ftype_v16qi_v16qi;
15749 type = v8hi_ftype_v8hi_v8hi;
15752 type = v4si_ftype_v4si_v4si;
15755 type = v2di_ftype_v2di_v2di;
15758 type = v2df_ftype_v2df_v2df;
15761 type = v4sf_ftype_v4sf_v4sf;
15764 type = v8qi_ftype_v8qi_v8qi;
15767 type = v4hi_ftype_v4hi_v4hi;
15770 type = v2si_ftype_v2si_v2si;
15773 type = di_ftype_di_di;
15777 gcc_unreachable ();
15780 /* Override for comparisons. */
15781 if (d->icode == CODE_FOR_sse_maskcmpv4sf3
15782 || d->icode == CODE_FOR_sse_vmmaskcmpv4sf3)
15783 type = v4si_ftype_v4sf_v4sf;
15785 if (d->icode == CODE_FOR_sse2_maskcmpv2df3
15786 || d->icode == CODE_FOR_sse2_vmmaskcmpv2df3)
15787 type = v2di_ftype_v2df_v2df;
15789 def_builtin (d->mask, d->name, type, d->code);
15792 /* Add all builtins that are more or less simple operations on 1 operand. */
15793 for (i = 0, d = bdesc_1arg; i < ARRAY_SIZE (bdesc_1arg); i++, d++)
15795 enum machine_mode mode;
15800 mode = insn_data[d->icode].operand[1].mode;
15805 type = v16qi_ftype_v16qi;
15808 type = v8hi_ftype_v8hi;
15811 type = v4si_ftype_v4si;
15814 type = v2df_ftype_v2df;
15817 type = v4sf_ftype_v4sf;
15820 type = v8qi_ftype_v8qi;
15823 type = v4hi_ftype_v4hi;
15826 type = v2si_ftype_v2si;
15833 def_builtin (d->mask, d->name, type, d->code);
15836 /* Add the remaining MMX insns with somewhat more complicated types. */
15837 def_builtin (MASK_MMX, "__builtin_ia32_emms", void_ftype_void, IX86_BUILTIN_EMMS);
15838 def_builtin (MASK_MMX, "__builtin_ia32_psllw", v4hi_ftype_v4hi_di, IX86_BUILTIN_PSLLW);
15839 def_builtin (MASK_MMX, "__builtin_ia32_pslld", v2si_ftype_v2si_di, IX86_BUILTIN_PSLLD);
15840 def_builtin (MASK_MMX, "__builtin_ia32_psllq", di_ftype_di_di, IX86_BUILTIN_PSLLQ);
15842 def_builtin (MASK_MMX, "__builtin_ia32_psrlw", v4hi_ftype_v4hi_di, IX86_BUILTIN_PSRLW);
15843 def_builtin (MASK_MMX, "__builtin_ia32_psrld", v2si_ftype_v2si_di, IX86_BUILTIN_PSRLD);
15844 def_builtin (MASK_MMX, "__builtin_ia32_psrlq", di_ftype_di_di, IX86_BUILTIN_PSRLQ);
15846 def_builtin (MASK_MMX, "__builtin_ia32_psraw", v4hi_ftype_v4hi_di, IX86_BUILTIN_PSRAW);
15847 def_builtin (MASK_MMX, "__builtin_ia32_psrad", v2si_ftype_v2si_di, IX86_BUILTIN_PSRAD);
15849 def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_pshufw", v4hi_ftype_v4hi_int, IX86_BUILTIN_PSHUFW);
15850 def_builtin (MASK_MMX, "__builtin_ia32_pmaddwd", v2si_ftype_v4hi_v4hi, IX86_BUILTIN_PMADDWD);
15852 /* comi/ucomi insns. */
15853 for (i = 0, d = bdesc_comi; i < ARRAY_SIZE (bdesc_comi); i++, d++)
15854 if (d->mask == MASK_SSE2)
15855 def_builtin (d->mask, d->name, int_ftype_v2df_v2df, d->code);
15857 def_builtin (d->mask, d->name, int_ftype_v4sf_v4sf, d->code);
15859 def_builtin (MASK_MMX, "__builtin_ia32_packsswb", v8qi_ftype_v4hi_v4hi, IX86_BUILTIN_PACKSSWB);
15860 def_builtin (MASK_MMX, "__builtin_ia32_packssdw", v4hi_ftype_v2si_v2si, IX86_BUILTIN_PACKSSDW);
15861 def_builtin (MASK_MMX, "__builtin_ia32_packuswb", v8qi_ftype_v4hi_v4hi, IX86_BUILTIN_PACKUSWB);
15863 def_builtin (MASK_SSE, "__builtin_ia32_ldmxcsr", void_ftype_unsigned, IX86_BUILTIN_LDMXCSR);
15864 def_builtin (MASK_SSE, "__builtin_ia32_stmxcsr", unsigned_ftype_void, IX86_BUILTIN_STMXCSR);
15865 def_builtin (MASK_SSE, "__builtin_ia32_cvtpi2ps", v4sf_ftype_v4sf_v2si, IX86_BUILTIN_CVTPI2PS);
15866 def_builtin (MASK_SSE, "__builtin_ia32_cvtps2pi", v2si_ftype_v4sf, IX86_BUILTIN_CVTPS2PI);
15867 def_builtin (MASK_SSE, "__builtin_ia32_cvtsi2ss", v4sf_ftype_v4sf_int, IX86_BUILTIN_CVTSI2SS);
15868 def_builtin (MASK_SSE | MASK_64BIT, "__builtin_ia32_cvtsi642ss", v4sf_ftype_v4sf_int64, IX86_BUILTIN_CVTSI642SS);
15869 def_builtin (MASK_SSE, "__builtin_ia32_cvtss2si", int_ftype_v4sf, IX86_BUILTIN_CVTSS2SI);
15870 def_builtin (MASK_SSE | MASK_64BIT, "__builtin_ia32_cvtss2si64", int64_ftype_v4sf, IX86_BUILTIN_CVTSS2SI64);
15871 def_builtin (MASK_SSE, "__builtin_ia32_cvttps2pi", v2si_ftype_v4sf, IX86_BUILTIN_CVTTPS2PI);
15872 def_builtin (MASK_SSE, "__builtin_ia32_cvttss2si", int_ftype_v4sf, IX86_BUILTIN_CVTTSS2SI);
15873 def_builtin (MASK_SSE | MASK_64BIT, "__builtin_ia32_cvttss2si64", int64_ftype_v4sf, IX86_BUILTIN_CVTTSS2SI64);
15875 def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_maskmovq", void_ftype_v8qi_v8qi_pchar, IX86_BUILTIN_MASKMOVQ);
15877 def_builtin (MASK_SSE, "__builtin_ia32_loadups", v4sf_ftype_pcfloat, IX86_BUILTIN_LOADUPS);
15878 def_builtin (MASK_SSE, "__builtin_ia32_storeups", void_ftype_pfloat_v4sf, IX86_BUILTIN_STOREUPS);
15880 def_builtin (MASK_SSE, "__builtin_ia32_loadhps", v4sf_ftype_v4sf_pv2si, IX86_BUILTIN_LOADHPS);
15881 def_builtin (MASK_SSE, "__builtin_ia32_loadlps", v4sf_ftype_v4sf_pv2si, IX86_BUILTIN_LOADLPS);
15882 def_builtin (MASK_SSE, "__builtin_ia32_storehps", void_ftype_pv2si_v4sf, IX86_BUILTIN_STOREHPS);
15883 def_builtin (MASK_SSE, "__builtin_ia32_storelps", void_ftype_pv2si_v4sf, IX86_BUILTIN_STORELPS);
15885 def_builtin (MASK_SSE, "__builtin_ia32_movmskps", int_ftype_v4sf, IX86_BUILTIN_MOVMSKPS);
15886 def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_pmovmskb", int_ftype_v8qi, IX86_BUILTIN_PMOVMSKB);
15887 def_builtin (MASK_SSE, "__builtin_ia32_movntps", void_ftype_pfloat_v4sf, IX86_BUILTIN_MOVNTPS);
15888 def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_movntq", void_ftype_pdi_di, IX86_BUILTIN_MOVNTQ);
15890 def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_sfence", void_ftype_void, IX86_BUILTIN_SFENCE);
15892 def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_psadbw", di_ftype_v8qi_v8qi, IX86_BUILTIN_PSADBW);
15894 def_builtin (MASK_SSE, "__builtin_ia32_rcpps", v4sf_ftype_v4sf, IX86_BUILTIN_RCPPS);
15895 def_builtin (MASK_SSE, "__builtin_ia32_rcpss", v4sf_ftype_v4sf, IX86_BUILTIN_RCPSS);
15896 def_builtin (MASK_SSE, "__builtin_ia32_rsqrtps", v4sf_ftype_v4sf, IX86_BUILTIN_RSQRTPS);
15897 def_builtin (MASK_SSE, "__builtin_ia32_rsqrtss", v4sf_ftype_v4sf, IX86_BUILTIN_RSQRTSS);
15898 def_builtin (MASK_SSE, "__builtin_ia32_sqrtps", v4sf_ftype_v4sf, IX86_BUILTIN_SQRTPS);
15899 def_builtin (MASK_SSE, "__builtin_ia32_sqrtss", v4sf_ftype_v4sf, IX86_BUILTIN_SQRTSS);
15901 def_builtin (MASK_SSE, "__builtin_ia32_shufps", v4sf_ftype_v4sf_v4sf_int, IX86_BUILTIN_SHUFPS);
15903 /* Original 3DNow! */
15904 def_builtin (MASK_3DNOW, "__builtin_ia32_femms", void_ftype_void, IX86_BUILTIN_FEMMS);
15905 def_builtin (MASK_3DNOW, "__builtin_ia32_pavgusb", v8qi_ftype_v8qi_v8qi, IX86_BUILTIN_PAVGUSB);
15906 def_builtin (MASK_3DNOW, "__builtin_ia32_pf2id", v2si_ftype_v2sf, IX86_BUILTIN_PF2ID);
15907 def_builtin (MASK_3DNOW, "__builtin_ia32_pfacc", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFACC);
15908 def_builtin (MASK_3DNOW, "__builtin_ia32_pfadd", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFADD);
15909 def_builtin (MASK_3DNOW, "__builtin_ia32_pfcmpeq", v2si_ftype_v2sf_v2sf, IX86_BUILTIN_PFCMPEQ);
15910 def_builtin (MASK_3DNOW, "__builtin_ia32_pfcmpge", v2si_ftype_v2sf_v2sf, IX86_BUILTIN_PFCMPGE);
15911 def_builtin (MASK_3DNOW, "__builtin_ia32_pfcmpgt", v2si_ftype_v2sf_v2sf, IX86_BUILTIN_PFCMPGT);
15912 def_builtin (MASK_3DNOW, "__builtin_ia32_pfmax", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFMAX);
15913 def_builtin (MASK_3DNOW, "__builtin_ia32_pfmin", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFMIN);
15914 def_builtin (MASK_3DNOW, "__builtin_ia32_pfmul", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFMUL);
15915 def_builtin (MASK_3DNOW, "__builtin_ia32_pfrcp", v2sf_ftype_v2sf, IX86_BUILTIN_PFRCP);
15916 def_builtin (MASK_3DNOW, "__builtin_ia32_pfrcpit1", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFRCPIT1);
15917 def_builtin (MASK_3DNOW, "__builtin_ia32_pfrcpit2", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFRCPIT2);
15918 def_builtin (MASK_3DNOW, "__builtin_ia32_pfrsqrt", v2sf_ftype_v2sf, IX86_BUILTIN_PFRSQRT);
15919 def_builtin (MASK_3DNOW, "__builtin_ia32_pfrsqit1", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFRSQIT1);
15920 def_builtin (MASK_3DNOW, "__builtin_ia32_pfsub", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFSUB);
15921 def_builtin (MASK_3DNOW, "__builtin_ia32_pfsubr", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFSUBR);
15922 def_builtin (MASK_3DNOW, "__builtin_ia32_pi2fd", v2sf_ftype_v2si, IX86_BUILTIN_PI2FD);
15923 def_builtin (MASK_3DNOW, "__builtin_ia32_pmulhrw", v4hi_ftype_v4hi_v4hi, IX86_BUILTIN_PMULHRW);
15925 /* 3DNow! extension as used in the Athlon CPU. */
15926 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pf2iw", v2si_ftype_v2sf, IX86_BUILTIN_PF2IW);
15927 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pfnacc", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFNACC);
15928 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pfpnacc", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFPNACC);
15929 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pi2fw", v2sf_ftype_v2si, IX86_BUILTIN_PI2FW);
15930 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pswapdsf", v2sf_ftype_v2sf, IX86_BUILTIN_PSWAPDSF);
15931 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pswapdsi", v2si_ftype_v2si, IX86_BUILTIN_PSWAPDSI);
15934 def_builtin (MASK_SSE2, "__builtin_ia32_maskmovdqu", void_ftype_v16qi_v16qi_pchar, IX86_BUILTIN_MASKMOVDQU);
15936 def_builtin (MASK_SSE2, "__builtin_ia32_loadupd", v2df_ftype_pcdouble, IX86_BUILTIN_LOADUPD);
15937 def_builtin (MASK_SSE2, "__builtin_ia32_storeupd", void_ftype_pdouble_v2df, IX86_BUILTIN_STOREUPD);
15939 def_builtin (MASK_SSE2, "__builtin_ia32_loadhpd", v2df_ftype_v2df_pcdouble, IX86_BUILTIN_LOADHPD);
15940 def_builtin (MASK_SSE2, "__builtin_ia32_loadlpd", v2df_ftype_v2df_pcdouble, IX86_BUILTIN_LOADLPD);
15942 def_builtin (MASK_SSE2, "__builtin_ia32_movmskpd", int_ftype_v2df, IX86_BUILTIN_MOVMSKPD);
15943 def_builtin (MASK_SSE2, "__builtin_ia32_pmovmskb128", int_ftype_v16qi, IX86_BUILTIN_PMOVMSKB128);
15944 def_builtin (MASK_SSE2, "__builtin_ia32_movnti", void_ftype_pint_int, IX86_BUILTIN_MOVNTI);
15945 def_builtin (MASK_SSE2, "__builtin_ia32_movntpd", void_ftype_pdouble_v2df, IX86_BUILTIN_MOVNTPD);
15946 def_builtin (MASK_SSE2, "__builtin_ia32_movntdq", void_ftype_pv2di_v2di, IX86_BUILTIN_MOVNTDQ);
15948 def_builtin (MASK_SSE2, "__builtin_ia32_pshufd", v4si_ftype_v4si_int, IX86_BUILTIN_PSHUFD);
15949 def_builtin (MASK_SSE2, "__builtin_ia32_pshuflw", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSHUFLW);
15950 def_builtin (MASK_SSE2, "__builtin_ia32_pshufhw", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSHUFHW);
15951 def_builtin (MASK_SSE2, "__builtin_ia32_psadbw128", v2di_ftype_v16qi_v16qi, IX86_BUILTIN_PSADBW128);
15953 def_builtin (MASK_SSE2, "__builtin_ia32_sqrtpd", v2df_ftype_v2df, IX86_BUILTIN_SQRTPD);
15954 def_builtin (MASK_SSE2, "__builtin_ia32_sqrtsd", v2df_ftype_v2df, IX86_BUILTIN_SQRTSD);
15956 def_builtin (MASK_SSE2, "__builtin_ia32_shufpd", v2df_ftype_v2df_v2df_int, IX86_BUILTIN_SHUFPD);
15958 def_builtin (MASK_SSE2, "__builtin_ia32_cvtdq2pd", v2df_ftype_v4si, IX86_BUILTIN_CVTDQ2PD);
15959 def_builtin (MASK_SSE2, "__builtin_ia32_cvtdq2ps", v4sf_ftype_v4si, IX86_BUILTIN_CVTDQ2PS);
15961 def_builtin (MASK_SSE2, "__builtin_ia32_cvtpd2dq", v4si_ftype_v2df, IX86_BUILTIN_CVTPD2DQ);
15962 def_builtin (MASK_SSE2, "__builtin_ia32_cvtpd2pi", v2si_ftype_v2df, IX86_BUILTIN_CVTPD2PI);
15963 def_builtin (MASK_SSE2, "__builtin_ia32_cvtpd2ps", v4sf_ftype_v2df, IX86_BUILTIN_CVTPD2PS);
15964 def_builtin (MASK_SSE2, "__builtin_ia32_cvttpd2dq", v4si_ftype_v2df, IX86_BUILTIN_CVTTPD2DQ);
15965 def_builtin (MASK_SSE2, "__builtin_ia32_cvttpd2pi", v2si_ftype_v2df, IX86_BUILTIN_CVTTPD2PI);
15967 def_builtin (MASK_SSE2, "__builtin_ia32_cvtpi2pd", v2df_ftype_v2si, IX86_BUILTIN_CVTPI2PD);
15969 def_builtin (MASK_SSE2, "__builtin_ia32_cvtsd2si", int_ftype_v2df, IX86_BUILTIN_CVTSD2SI);
15970 def_builtin (MASK_SSE2, "__builtin_ia32_cvttsd2si", int_ftype_v2df, IX86_BUILTIN_CVTTSD2SI);
15971 def_builtin (MASK_SSE2 | MASK_64BIT, "__builtin_ia32_cvtsd2si64", int64_ftype_v2df, IX86_BUILTIN_CVTSD2SI64);
15972 def_builtin (MASK_SSE2 | MASK_64BIT, "__builtin_ia32_cvttsd2si64", int64_ftype_v2df, IX86_BUILTIN_CVTTSD2SI64);
15974 def_builtin (MASK_SSE2, "__builtin_ia32_cvtps2dq", v4si_ftype_v4sf, IX86_BUILTIN_CVTPS2DQ);
15975 def_builtin (MASK_SSE2, "__builtin_ia32_cvtps2pd", v2df_ftype_v4sf, IX86_BUILTIN_CVTPS2PD);
15976 def_builtin (MASK_SSE2, "__builtin_ia32_cvttps2dq", v4si_ftype_v4sf, IX86_BUILTIN_CVTTPS2DQ);
15978 def_builtin (MASK_SSE2, "__builtin_ia32_cvtsi2sd", v2df_ftype_v2df_int, IX86_BUILTIN_CVTSI2SD);
15979 def_builtin (MASK_SSE2 | MASK_64BIT, "__builtin_ia32_cvtsi642sd", v2df_ftype_v2df_int64, IX86_BUILTIN_CVTSI642SD);
15980 def_builtin (MASK_SSE2, "__builtin_ia32_cvtsd2ss", v4sf_ftype_v4sf_v2df, IX86_BUILTIN_CVTSD2SS);
15981 def_builtin (MASK_SSE2, "__builtin_ia32_cvtss2sd", v2df_ftype_v2df_v4sf, IX86_BUILTIN_CVTSS2SD);
15983 def_builtin (MASK_SSE2, "__builtin_ia32_clflush", void_ftype_pcvoid, IX86_BUILTIN_CLFLUSH);
15984 def_builtin (MASK_SSE2, "__builtin_ia32_lfence", void_ftype_void, IX86_BUILTIN_LFENCE);
15985 def_builtin (MASK_SSE2, "__builtin_ia32_mfence", void_ftype_void, IX86_BUILTIN_MFENCE);
15987 def_builtin (MASK_SSE2, "__builtin_ia32_loaddqu", v16qi_ftype_pcchar, IX86_BUILTIN_LOADDQU);
15988 def_builtin (MASK_SSE2, "__builtin_ia32_storedqu", void_ftype_pchar_v16qi, IX86_BUILTIN_STOREDQU);
15990 def_builtin (MASK_SSE2, "__builtin_ia32_pmuludq", di_ftype_v2si_v2si, IX86_BUILTIN_PMULUDQ);
15991 def_builtin (MASK_SSE2, "__builtin_ia32_pmuludq128", v2di_ftype_v4si_v4si, IX86_BUILTIN_PMULUDQ128);
15993 def_builtin (MASK_SSE2, "__builtin_ia32_psllw128", v8hi_ftype_v8hi_v8hi, IX86_BUILTIN_PSLLW128);
15994 def_builtin (MASK_SSE2, "__builtin_ia32_pslld128", v4si_ftype_v4si_v4si, IX86_BUILTIN_PSLLD128);
15995 def_builtin (MASK_SSE2, "__builtin_ia32_psllq128", v2di_ftype_v2di_v2di, IX86_BUILTIN_PSLLQ128);
15997 def_builtin (MASK_SSE2, "__builtin_ia32_psrlw128", v8hi_ftype_v8hi_v8hi, IX86_BUILTIN_PSRLW128);
15998 def_builtin (MASK_SSE2, "__builtin_ia32_psrld128", v4si_ftype_v4si_v4si, IX86_BUILTIN_PSRLD128);
15999 def_builtin (MASK_SSE2, "__builtin_ia32_psrlq128", v2di_ftype_v2di_v2di, IX86_BUILTIN_PSRLQ128);
16001 def_builtin (MASK_SSE2, "__builtin_ia32_psraw128", v8hi_ftype_v8hi_v8hi, IX86_BUILTIN_PSRAW128);
16002 def_builtin (MASK_SSE2, "__builtin_ia32_psrad128", v4si_ftype_v4si_v4si, IX86_BUILTIN_PSRAD128);
16004 def_builtin (MASK_SSE2, "__builtin_ia32_pslldqi128", v2di_ftype_v2di_int, IX86_BUILTIN_PSLLDQI128);
16005 def_builtin (MASK_SSE2, "__builtin_ia32_psllwi128", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSLLWI128);
16006 def_builtin (MASK_SSE2, "__builtin_ia32_pslldi128", v4si_ftype_v4si_int, IX86_BUILTIN_PSLLDI128);
16007 def_builtin (MASK_SSE2, "__builtin_ia32_psllqi128", v2di_ftype_v2di_int, IX86_BUILTIN_PSLLQI128);
16009 def_builtin (MASK_SSE2, "__builtin_ia32_psrldqi128", v2di_ftype_v2di_int, IX86_BUILTIN_PSRLDQI128);
16010 def_builtin (MASK_SSE2, "__builtin_ia32_psrlwi128", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSRLWI128);
16011 def_builtin (MASK_SSE2, "__builtin_ia32_psrldi128", v4si_ftype_v4si_int, IX86_BUILTIN_PSRLDI128);
16012 def_builtin (MASK_SSE2, "__builtin_ia32_psrlqi128", v2di_ftype_v2di_int, IX86_BUILTIN_PSRLQI128);
16014 def_builtin (MASK_SSE2, "__builtin_ia32_psrawi128", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSRAWI128);
16015 def_builtin (MASK_SSE2, "__builtin_ia32_psradi128", v4si_ftype_v4si_int, IX86_BUILTIN_PSRADI128);
16017 def_builtin (MASK_SSE2, "__builtin_ia32_pmaddwd128", v4si_ftype_v8hi_v8hi, IX86_BUILTIN_PMADDWD128);
16019 /* Prescott New Instructions. */
16020 def_builtin (MASK_SSE3, "__builtin_ia32_monitor",
16021 void_ftype_pcvoid_unsigned_unsigned,
16022 IX86_BUILTIN_MONITOR);
16023 def_builtin (MASK_SSE3, "__builtin_ia32_mwait",
16024 void_ftype_unsigned_unsigned,
16025 IX86_BUILTIN_MWAIT);
16026 def_builtin (MASK_SSE3, "__builtin_ia32_movshdup",
16028 IX86_BUILTIN_MOVSHDUP);
16029 def_builtin (MASK_SSE3, "__builtin_ia32_movsldup",
16031 IX86_BUILTIN_MOVSLDUP);
16032 def_builtin (MASK_SSE3, "__builtin_ia32_lddqu",
16033 v16qi_ftype_pcchar, IX86_BUILTIN_LDDQU);
16036 def_builtin (MASK_SSSE3, "__builtin_ia32_palignr128",
16037 v2di_ftype_v2di_v2di_int, IX86_BUILTIN_PALIGNR128);
16038 def_builtin (MASK_SSSE3, "__builtin_ia32_palignr", di_ftype_di_di_int,
16039 IX86_BUILTIN_PALIGNR);
16041 /* AMDFAM10 SSE4A New built-ins */
16042 def_builtin (MASK_SSE4A, "__builtin_ia32_movntsd",
16043 void_ftype_pdouble_v2df, IX86_BUILTIN_MOVNTSD);
16044 def_builtin (MASK_SSE4A, "__builtin_ia32_movntss",
16045 void_ftype_pfloat_v4sf, IX86_BUILTIN_MOVNTSS);
16046 def_builtin (MASK_SSE4A, "__builtin_ia32_extrqi",
16047 v2di_ftype_v2di_unsigned_unsigned, IX86_BUILTIN_EXTRQI);
16048 def_builtin (MASK_SSE4A, "__builtin_ia32_extrq",
16049 v2di_ftype_v2di_v16qi, IX86_BUILTIN_EXTRQ);
16050 def_builtin (MASK_SSE4A, "__builtin_ia32_insertqi",
16051 v2di_ftype_v2di_v2di_unsigned_unsigned, IX86_BUILTIN_INSERTQI);
16052 def_builtin (MASK_SSE4A, "__builtin_ia32_insertq",
16053 v2di_ftype_v2di_v2di, IX86_BUILTIN_INSERTQ);
16055 /* Access to the vec_init patterns. */
16056 ftype = build_function_type_list (V2SI_type_node, integer_type_node,
16057 integer_type_node, NULL_TREE);
16058 def_builtin (MASK_MMX, "__builtin_ia32_vec_init_v2si",
16059 ftype, IX86_BUILTIN_VEC_INIT_V2SI);
16061 ftype = build_function_type_list (V4HI_type_node, short_integer_type_node,
16062 short_integer_type_node,
16063 short_integer_type_node,
16064 short_integer_type_node, NULL_TREE);
16065 def_builtin (MASK_MMX, "__builtin_ia32_vec_init_v4hi",
16066 ftype, IX86_BUILTIN_VEC_INIT_V4HI);
16068 ftype = build_function_type_list (V8QI_type_node, char_type_node,
16069 char_type_node, char_type_node,
16070 char_type_node, char_type_node,
16071 char_type_node, char_type_node,
16072 char_type_node, NULL_TREE);
16073 def_builtin (MASK_MMX, "__builtin_ia32_vec_init_v8qi",
16074 ftype, IX86_BUILTIN_VEC_INIT_V8QI);
16076 /* Access to the vec_extract patterns. */
16077 ftype = build_function_type_list (double_type_node, V2DF_type_node,
16078 integer_type_node, NULL_TREE);
16079 def_builtin (MASK_SSE2, "__builtin_ia32_vec_ext_v2df",
16080 ftype, IX86_BUILTIN_VEC_EXT_V2DF);
16082 ftype = build_function_type_list (long_long_integer_type_node,
16083 V2DI_type_node, integer_type_node,
16085 def_builtin (MASK_SSE2, "__builtin_ia32_vec_ext_v2di",
16086 ftype, IX86_BUILTIN_VEC_EXT_V2DI);
16088 ftype = build_function_type_list (float_type_node, V4SF_type_node,
16089 integer_type_node, NULL_TREE);
16090 def_builtin (MASK_SSE, "__builtin_ia32_vec_ext_v4sf",
16091 ftype, IX86_BUILTIN_VEC_EXT_V4SF);
16093 ftype = build_function_type_list (intSI_type_node, V4SI_type_node,
16094 integer_type_node, NULL_TREE);
16095 def_builtin (MASK_SSE2, "__builtin_ia32_vec_ext_v4si",
16096 ftype, IX86_BUILTIN_VEC_EXT_V4SI);
16098 ftype = build_function_type_list (intHI_type_node, V8HI_type_node,
16099 integer_type_node, NULL_TREE);
16100 def_builtin (MASK_SSE2, "__builtin_ia32_vec_ext_v8hi",
16101 ftype, IX86_BUILTIN_VEC_EXT_V8HI);
16103 ftype = build_function_type_list (intHI_type_node, V4HI_type_node,
16104 integer_type_node, NULL_TREE);
16105 def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_vec_ext_v4hi",
16106 ftype, IX86_BUILTIN_VEC_EXT_V4HI);
16108 ftype = build_function_type_list (intSI_type_node, V2SI_type_node,
16109 integer_type_node, NULL_TREE);
16110 def_builtin (MASK_MMX, "__builtin_ia32_vec_ext_v2si",
16111 ftype, IX86_BUILTIN_VEC_EXT_V2SI);
16113 ftype = build_function_type_list (intQI_type_node, V16QI_type_node,
16114 integer_type_node, NULL_TREE);
16115 def_builtin (MASK_SSE2, "__builtin_ia32_vec_ext_v16qi", ftype, IX86_BUILTIN_VEC_EXT_V16QI);
16117 /* Access to the vec_set patterns. */
16118 ftype = build_function_type_list (V8HI_type_node, V8HI_type_node,
16120 integer_type_node, NULL_TREE);
16121 def_builtin (MASK_SSE2, "__builtin_ia32_vec_set_v8hi",
16122 ftype, IX86_BUILTIN_VEC_SET_V8HI);
16124 ftype = build_function_type_list (V4HI_type_node, V4HI_type_node,
16126 integer_type_node, NULL_TREE);
16127 def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_vec_set_v4hi",
16128 ftype, IX86_BUILTIN_VEC_SET_V4HI);
16131 /* Errors in the source file can cause expand_expr to return const0_rtx
16132 where we expect a vector. To avoid crashing, use one of the vector
16133 clear instructions. */
16135 safe_vector_operand (rtx x, enum machine_mode mode)
16137 if (x == const0_rtx)
16138 x = CONST0_RTX (mode);
16142 /* Subroutine of ix86_expand_builtin to take care of binop insns. */
16145 ix86_expand_binop_builtin (enum insn_code icode, tree arglist, rtx target)
16148 tree arg0 = TREE_VALUE (arglist);
16149 tree arg1 = TREE_VALUE (TREE_CHAIN (arglist));
16150 rtx op0 = expand_normal (arg0);
16151 rtx op1 = expand_normal (arg1);
16152 enum machine_mode tmode = insn_data[icode].operand[0].mode;
16153 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
16154 enum machine_mode mode1 = insn_data[icode].operand[2].mode;
16156 if (VECTOR_MODE_P (mode0))
16157 op0 = safe_vector_operand (op0, mode0);
16158 if (VECTOR_MODE_P (mode1))
16159 op1 = safe_vector_operand (op1, mode1);
16161 if (optimize || !target
16162 || GET_MODE (target) != tmode
16163 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
16164 target = gen_reg_rtx (tmode);
16166 if (GET_MODE (op1) == SImode && mode1 == TImode)
16168 rtx x = gen_reg_rtx (V4SImode);
16169 emit_insn (gen_sse2_loadd (x, op1));
16170 op1 = gen_lowpart (TImode, x);
16173 /* The insn must want input operands in the same modes as the
16175 gcc_assert ((GET_MODE (op0) == mode0 || GET_MODE (op0) == VOIDmode)
16176 && (GET_MODE (op1) == mode1 || GET_MODE (op1) == VOIDmode));
16178 if (!(*insn_data[icode].operand[1].predicate) (op0, mode0))
16179 op0 = copy_to_mode_reg (mode0, op0);
16180 if (!(*insn_data[icode].operand[2].predicate) (op1, mode1))
16181 op1 = copy_to_mode_reg (mode1, op1);
16183 /* ??? Using ix86_fixup_binary_operands is problematic when
16184 we've got mismatched modes. Fake it. */
16190 if (tmode == mode0 && tmode == mode1)
16192 target = ix86_fixup_binary_operands (UNKNOWN, tmode, xops);
16196 else if (optimize || !ix86_binary_operator_ok (UNKNOWN, tmode, xops))
16198 op0 = force_reg (mode0, op0);
16199 op1 = force_reg (mode1, op1);
16200 target = gen_reg_rtx (tmode);
16203 pat = GEN_FCN (icode) (target, op0, op1);
16210 /* Subroutine of ix86_expand_builtin to take care of stores. */
16213 ix86_expand_store_builtin (enum insn_code icode, tree arglist)
16216 tree arg0 = TREE_VALUE (arglist);
16217 tree arg1 = TREE_VALUE (TREE_CHAIN (arglist));
16218 rtx op0 = expand_normal (arg0);
16219 rtx op1 = expand_normal (arg1);
16220 enum machine_mode mode0 = insn_data[icode].operand[0].mode;
16221 enum machine_mode mode1 = insn_data[icode].operand[1].mode;
16223 if (VECTOR_MODE_P (mode1))
16224 op1 = safe_vector_operand (op1, mode1);
16226 op0 = gen_rtx_MEM (mode0, copy_to_mode_reg (Pmode, op0));
16227 op1 = copy_to_mode_reg (mode1, op1);
16229 pat = GEN_FCN (icode) (op0, op1);
16235 /* Subroutine of ix86_expand_builtin to take care of unop insns. */
16238 ix86_expand_unop_builtin (enum insn_code icode, tree arglist,
16239 rtx target, int do_load)
16242 tree arg0 = TREE_VALUE (arglist);
16243 rtx op0 = expand_normal (arg0);
16244 enum machine_mode tmode = insn_data[icode].operand[0].mode;
16245 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
16247 if (optimize || !target
16248 || GET_MODE (target) != tmode
16249 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
16250 target = gen_reg_rtx (tmode);
16252 op0 = gen_rtx_MEM (mode0, copy_to_mode_reg (Pmode, op0));
16255 if (VECTOR_MODE_P (mode0))
16256 op0 = safe_vector_operand (op0, mode0);
16258 if ((optimize && !register_operand (op0, mode0))
16259 || ! (*insn_data[icode].operand[1].predicate) (op0, mode0))
16260 op0 = copy_to_mode_reg (mode0, op0);
16263 pat = GEN_FCN (icode) (target, op0);
16270 /* Subroutine of ix86_expand_builtin to take care of three special unop insns:
16271 sqrtss, rsqrtss, rcpss. */
16274 ix86_expand_unop1_builtin (enum insn_code icode, tree arglist, rtx target)
16277 tree arg0 = TREE_VALUE (arglist);
16278 rtx op1, op0 = expand_normal (arg0);
16279 enum machine_mode tmode = insn_data[icode].operand[0].mode;
16280 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
16282 if (optimize || !target
16283 || GET_MODE (target) != tmode
16284 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
16285 target = gen_reg_rtx (tmode);
16287 if (VECTOR_MODE_P (mode0))
16288 op0 = safe_vector_operand (op0, mode0);
16290 if ((optimize && !register_operand (op0, mode0))
16291 || ! (*insn_data[icode].operand[1].predicate) (op0, mode0))
16292 op0 = copy_to_mode_reg (mode0, op0);
16295 if (! (*insn_data[icode].operand[2].predicate) (op1, mode0))
16296 op1 = copy_to_mode_reg (mode0, op1);
16298 pat = GEN_FCN (icode) (target, op0, op1);
16305 /* Subroutine of ix86_expand_builtin to take care of comparison insns. */
16308 ix86_expand_sse_compare (const struct builtin_description *d, tree arglist,
16312 tree arg0 = TREE_VALUE (arglist);
16313 tree arg1 = TREE_VALUE (TREE_CHAIN (arglist));
16314 rtx op0 = expand_normal (arg0);
16315 rtx op1 = expand_normal (arg1);
16317 enum machine_mode tmode = insn_data[d->icode].operand[0].mode;
16318 enum machine_mode mode0 = insn_data[d->icode].operand[1].mode;
16319 enum machine_mode mode1 = insn_data[d->icode].operand[2].mode;
16320 enum rtx_code comparison = d->comparison;
16322 if (VECTOR_MODE_P (mode0))
16323 op0 = safe_vector_operand (op0, mode0);
16324 if (VECTOR_MODE_P (mode1))
16325 op1 = safe_vector_operand (op1, mode1);
16327 /* Swap operands if we have a comparison that isn't available in
16329 if (d->flag & BUILTIN_DESC_SWAP_OPERANDS)
16331 rtx tmp = gen_reg_rtx (mode1);
16332 emit_move_insn (tmp, op1);
16337 if (optimize || !target
16338 || GET_MODE (target) != tmode
16339 || ! (*insn_data[d->icode].operand[0].predicate) (target, tmode))
16340 target = gen_reg_rtx (tmode);
16342 if ((optimize && !register_operand (op0, mode0))
16343 || ! (*insn_data[d->icode].operand[1].predicate) (op0, mode0))
16344 op0 = copy_to_mode_reg (mode0, op0);
16345 if ((optimize && !register_operand (op1, mode1))
16346 || ! (*insn_data[d->icode].operand[2].predicate) (op1, mode1))
16347 op1 = copy_to_mode_reg (mode1, op1);
16349 op2 = gen_rtx_fmt_ee (comparison, mode0, op0, op1);
16350 pat = GEN_FCN (d->icode) (target, op0, op1, op2);
16357 /* Subroutine of ix86_expand_builtin to take care of comi insns. */
16360 ix86_expand_sse_comi (const struct builtin_description *d, tree arglist,
16364 tree arg0 = TREE_VALUE (arglist);
16365 tree arg1 = TREE_VALUE (TREE_CHAIN (arglist));
16366 rtx op0 = expand_normal (arg0);
16367 rtx op1 = expand_normal (arg1);
16369 enum machine_mode mode0 = insn_data[d->icode].operand[0].mode;
16370 enum machine_mode mode1 = insn_data[d->icode].operand[1].mode;
16371 enum rtx_code comparison = d->comparison;
16373 if (VECTOR_MODE_P (mode0))
16374 op0 = safe_vector_operand (op0, mode0);
16375 if (VECTOR_MODE_P (mode1))
16376 op1 = safe_vector_operand (op1, mode1);
16378 /* Swap operands if we have a comparison that isn't available in
16380 if (d->flag & BUILTIN_DESC_SWAP_OPERANDS)
16387 target = gen_reg_rtx (SImode);
16388 emit_move_insn (target, const0_rtx);
16389 target = gen_rtx_SUBREG (QImode, target, 0);
16391 if ((optimize && !register_operand (op0, mode0))
16392 || !(*insn_data[d->icode].operand[0].predicate) (op0, mode0))
16393 op0 = copy_to_mode_reg (mode0, op0);
16394 if ((optimize && !register_operand (op1, mode1))
16395 || !(*insn_data[d->icode].operand[1].predicate) (op1, mode1))
16396 op1 = copy_to_mode_reg (mode1, op1);
16398 op2 = gen_rtx_fmt_ee (comparison, mode0, op0, op1);
16399 pat = GEN_FCN (d->icode) (op0, op1);
16403 emit_insn (gen_rtx_SET (VOIDmode,
16404 gen_rtx_STRICT_LOW_PART (VOIDmode, target),
16405 gen_rtx_fmt_ee (comparison, QImode,
16409 return SUBREG_REG (target);
16412 /* Return the integer constant in ARG. Constrain it to be in the range
16413 of the subparts of VEC_TYPE; issue an error if not. */
16416 get_element_number (tree vec_type, tree arg)
16418 unsigned HOST_WIDE_INT elt, max = TYPE_VECTOR_SUBPARTS (vec_type) - 1;
16420 if (!host_integerp (arg, 1)
16421 || (elt = tree_low_cst (arg, 1), elt > max))
16423 error ("selector must be an integer constant in the range 0..%wi", max);
16430 /* A subroutine of ix86_expand_builtin. These builtins are a wrapper around
16431 ix86_expand_vector_init. We DO have language-level syntax for this, in
16432 the form of (type){ init-list }. Except that since we can't place emms
16433 instructions from inside the compiler, we can't allow the use of MMX
16434 registers unless the user explicitly asks for it. So we do *not* define
16435 vec_set/vec_extract/vec_init patterns for MMX modes in mmx.md. Instead
16436 we have builtins invoked by mmintrin.h that gives us license to emit
16437 these sorts of instructions. */
16440 ix86_expand_vec_init_builtin (tree type, tree arglist, rtx target)
16442 enum machine_mode tmode = TYPE_MODE (type);
16443 enum machine_mode inner_mode = GET_MODE_INNER (tmode);
16444 int i, n_elt = GET_MODE_NUNITS (tmode);
16445 rtvec v = rtvec_alloc (n_elt);
16447 gcc_assert (VECTOR_MODE_P (tmode));
16449 for (i = 0; i < n_elt; ++i, arglist = TREE_CHAIN (arglist))
16451 rtx x = expand_normal (TREE_VALUE (arglist));
16452 RTVEC_ELT (v, i) = gen_lowpart (inner_mode, x);
16455 gcc_assert (arglist == NULL);
16457 if (!target || !register_operand (target, tmode))
16458 target = gen_reg_rtx (tmode);
16460 ix86_expand_vector_init (true, target, gen_rtx_PARALLEL (tmode, v));
16464 /* A subroutine of ix86_expand_builtin. These builtins are a wrapper around
16465 ix86_expand_vector_extract. They would be redundant (for non-MMX) if we
16466 had a language-level syntax for referencing vector elements. */
16469 ix86_expand_vec_ext_builtin (tree arglist, rtx target)
16471 enum machine_mode tmode, mode0;
16476 arg0 = TREE_VALUE (arglist);
16477 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
16479 op0 = expand_normal (arg0);
16480 elt = get_element_number (TREE_TYPE (arg0), arg1);
16482 tmode = TYPE_MODE (TREE_TYPE (TREE_TYPE (arg0)));
16483 mode0 = TYPE_MODE (TREE_TYPE (arg0));
16484 gcc_assert (VECTOR_MODE_P (mode0));
16486 op0 = force_reg (mode0, op0);
16488 if (optimize || !target || !register_operand (target, tmode))
16489 target = gen_reg_rtx (tmode);
16491 ix86_expand_vector_extract (true, target, op0, elt);
16496 /* A subroutine of ix86_expand_builtin. These builtins are a wrapper around
16497 ix86_expand_vector_set. They would be redundant (for non-MMX) if we had
16498 a language-level syntax for referencing vector elements. */
16501 ix86_expand_vec_set_builtin (tree arglist)
16503 enum machine_mode tmode, mode1;
16504 tree arg0, arg1, arg2;
16506 rtx op0, op1, target;
16508 arg0 = TREE_VALUE (arglist);
16509 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
16510 arg2 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist)));
16512 tmode = TYPE_MODE (TREE_TYPE (arg0));
16513 mode1 = TYPE_MODE (TREE_TYPE (TREE_TYPE (arg0)));
16514 gcc_assert (VECTOR_MODE_P (tmode));
16516 op0 = expand_expr (arg0, NULL_RTX, tmode, 0);
16517 op1 = expand_expr (arg1, NULL_RTX, mode1, 0);
16518 elt = get_element_number (TREE_TYPE (arg0), arg2);
16520 if (GET_MODE (op1) != mode1 && GET_MODE (op1) != VOIDmode)
16521 op1 = convert_modes (mode1, GET_MODE (op1), op1, true);
16523 op0 = force_reg (tmode, op0);
16524 op1 = force_reg (mode1, op1);
16526 /* OP0 is the source of these builtin functions and shouldn't be
16527 modified. Create a copy, use it and return it as target. */
16528 target = gen_reg_rtx (tmode);
16529 emit_move_insn (target, op0);
16530 ix86_expand_vector_set (true, target, op1, elt);
16535 /* Expand an expression EXP that calls a built-in function,
16536 with result going to TARGET if that's convenient
16537 (and in mode MODE if that's convenient).
16538 SUBTARGET may be used as the target for computing one of EXP's operands.
16539 IGNORE is nonzero if the value is to be ignored. */
16542 ix86_expand_builtin (tree exp, rtx target, rtx subtarget ATTRIBUTE_UNUSED,
16543 enum machine_mode mode ATTRIBUTE_UNUSED,
16544 int ignore ATTRIBUTE_UNUSED)
16546 const struct builtin_description *d;
16548 enum insn_code icode;
16549 tree fndecl = TREE_OPERAND (TREE_OPERAND (exp, 0), 0);
16550 tree arglist = TREE_OPERAND (exp, 1);
16551 tree arg0, arg1, arg2, arg3;
16552 rtx op0, op1, op2, op3, pat;
16553 enum machine_mode tmode, mode0, mode1, mode2, mode3, mode4;
16554 unsigned int fcode = DECL_FUNCTION_CODE (fndecl);
16558 case IX86_BUILTIN_EMMS:
16559 emit_insn (gen_mmx_emms ());
16562 case IX86_BUILTIN_SFENCE:
16563 emit_insn (gen_sse_sfence ());
16566 case IX86_BUILTIN_MASKMOVQ:
16567 case IX86_BUILTIN_MASKMOVDQU:
16568 icode = (fcode == IX86_BUILTIN_MASKMOVQ
16569 ? CODE_FOR_mmx_maskmovq
16570 : CODE_FOR_sse2_maskmovdqu);
16571 /* Note the arg order is different from the operand order. */
16572 arg1 = TREE_VALUE (arglist);
16573 arg2 = TREE_VALUE (TREE_CHAIN (arglist));
16574 arg0 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist)));
16575 op0 = expand_normal (arg0);
16576 op1 = expand_normal (arg1);
16577 op2 = expand_normal (arg2);
16578 mode0 = insn_data[icode].operand[0].mode;
16579 mode1 = insn_data[icode].operand[1].mode;
16580 mode2 = insn_data[icode].operand[2].mode;
16582 op0 = force_reg (Pmode, op0);
16583 op0 = gen_rtx_MEM (mode1, op0);
16585 if (! (*insn_data[icode].operand[0].predicate) (op0, mode0))
16586 op0 = copy_to_mode_reg (mode0, op0);
16587 if (! (*insn_data[icode].operand[1].predicate) (op1, mode1))
16588 op1 = copy_to_mode_reg (mode1, op1);
16589 if (! (*insn_data[icode].operand[2].predicate) (op2, mode2))
16590 op2 = copy_to_mode_reg (mode2, op2);
16591 pat = GEN_FCN (icode) (op0, op1, op2);
16597 case IX86_BUILTIN_SQRTSS:
16598 return ix86_expand_unop1_builtin (CODE_FOR_sse_vmsqrtv4sf2, arglist, target);
16599 case IX86_BUILTIN_RSQRTSS:
16600 return ix86_expand_unop1_builtin (CODE_FOR_sse_vmrsqrtv4sf2, arglist, target);
16601 case IX86_BUILTIN_RCPSS:
16602 return ix86_expand_unop1_builtin (CODE_FOR_sse_vmrcpv4sf2, arglist, target);
16604 case IX86_BUILTIN_LOADUPS:
16605 return ix86_expand_unop_builtin (CODE_FOR_sse_movups, arglist, target, 1);
16607 case IX86_BUILTIN_STOREUPS:
16608 return ix86_expand_store_builtin (CODE_FOR_sse_movups, arglist);
16610 case IX86_BUILTIN_LOADHPS:
16611 case IX86_BUILTIN_LOADLPS:
16612 case IX86_BUILTIN_LOADHPD:
16613 case IX86_BUILTIN_LOADLPD:
16614 icode = (fcode == IX86_BUILTIN_LOADHPS ? CODE_FOR_sse_loadhps
16615 : fcode == IX86_BUILTIN_LOADLPS ? CODE_FOR_sse_loadlps
16616 : fcode == IX86_BUILTIN_LOADHPD ? CODE_FOR_sse2_loadhpd
16617 : CODE_FOR_sse2_loadlpd);
16618 arg0 = TREE_VALUE (arglist);
16619 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
16620 op0 = expand_normal (arg0);
16621 op1 = expand_normal (arg1);
16622 tmode = insn_data[icode].operand[0].mode;
16623 mode0 = insn_data[icode].operand[1].mode;
16624 mode1 = insn_data[icode].operand[2].mode;
16626 op0 = force_reg (mode0, op0);
16627 op1 = gen_rtx_MEM (mode1, copy_to_mode_reg (Pmode, op1));
16628 if (optimize || target == 0
16629 || GET_MODE (target) != tmode
16630 || !register_operand (target, tmode))
16631 target = gen_reg_rtx (tmode);
16632 pat = GEN_FCN (icode) (target, op0, op1);
16638 case IX86_BUILTIN_STOREHPS:
16639 case IX86_BUILTIN_STORELPS:
16640 icode = (fcode == IX86_BUILTIN_STOREHPS ? CODE_FOR_sse_storehps
16641 : CODE_FOR_sse_storelps);
16642 arg0 = TREE_VALUE (arglist);
16643 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
16644 op0 = expand_normal (arg0);
16645 op1 = expand_normal (arg1);
16646 mode0 = insn_data[icode].operand[0].mode;
16647 mode1 = insn_data[icode].operand[1].mode;
16649 op0 = gen_rtx_MEM (mode0, copy_to_mode_reg (Pmode, op0));
16650 op1 = force_reg (mode1, op1);
16652 pat = GEN_FCN (icode) (op0, op1);
16658 case IX86_BUILTIN_MOVNTPS:
16659 return ix86_expand_store_builtin (CODE_FOR_sse_movntv4sf, arglist);
16660 case IX86_BUILTIN_MOVNTQ:
16661 return ix86_expand_store_builtin (CODE_FOR_sse_movntdi, arglist);
16663 case IX86_BUILTIN_LDMXCSR:
16664 op0 = expand_normal (TREE_VALUE (arglist));
16665 target = assign_386_stack_local (SImode, SLOT_VIRTUAL);
16666 emit_move_insn (target, op0);
16667 emit_insn (gen_sse_ldmxcsr (target));
16670 case IX86_BUILTIN_STMXCSR:
16671 target = assign_386_stack_local (SImode, SLOT_VIRTUAL);
16672 emit_insn (gen_sse_stmxcsr (target));
16673 return copy_to_mode_reg (SImode, target);
16675 case IX86_BUILTIN_SHUFPS:
16676 case IX86_BUILTIN_SHUFPD:
16677 icode = (fcode == IX86_BUILTIN_SHUFPS
16678 ? CODE_FOR_sse_shufps
16679 : CODE_FOR_sse2_shufpd);
16680 arg0 = TREE_VALUE (arglist);
16681 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
16682 arg2 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist)));
16683 op0 = expand_normal (arg0);
16684 op1 = expand_normal (arg1);
16685 op2 = expand_normal (arg2);
16686 tmode = insn_data[icode].operand[0].mode;
16687 mode0 = insn_data[icode].operand[1].mode;
16688 mode1 = insn_data[icode].operand[2].mode;
16689 mode2 = insn_data[icode].operand[3].mode;
16691 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
16692 op0 = copy_to_mode_reg (mode0, op0);
16693 if ((optimize && !register_operand (op1, mode1))
16694 || !(*insn_data[icode].operand[2].predicate) (op1, mode1))
16695 op1 = copy_to_mode_reg (mode1, op1);
16696 if (! (*insn_data[icode].operand[3].predicate) (op2, mode2))
16698 /* @@@ better error message */
16699 error ("mask must be an immediate");
16700 return gen_reg_rtx (tmode);
16702 if (optimize || target == 0
16703 || GET_MODE (target) != tmode
16704 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
16705 target = gen_reg_rtx (tmode);
16706 pat = GEN_FCN (icode) (target, op0, op1, op2);
16712 case IX86_BUILTIN_PSHUFW:
16713 case IX86_BUILTIN_PSHUFD:
16714 case IX86_BUILTIN_PSHUFHW:
16715 case IX86_BUILTIN_PSHUFLW:
16716 icode = ( fcode == IX86_BUILTIN_PSHUFHW ? CODE_FOR_sse2_pshufhw
16717 : fcode == IX86_BUILTIN_PSHUFLW ? CODE_FOR_sse2_pshuflw
16718 : fcode == IX86_BUILTIN_PSHUFD ? CODE_FOR_sse2_pshufd
16719 : CODE_FOR_mmx_pshufw);
16720 arg0 = TREE_VALUE (arglist);
16721 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
16722 op0 = expand_normal (arg0);
16723 op1 = expand_normal (arg1);
16724 tmode = insn_data[icode].operand[0].mode;
16725 mode1 = insn_data[icode].operand[1].mode;
16726 mode2 = insn_data[icode].operand[2].mode;
16728 if (! (*insn_data[icode].operand[1].predicate) (op0, mode1))
16729 op0 = copy_to_mode_reg (mode1, op0);
16730 if (! (*insn_data[icode].operand[2].predicate) (op1, mode2))
16732 /* @@@ better error message */
16733 error ("mask must be an immediate");
16737 || GET_MODE (target) != tmode
16738 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
16739 target = gen_reg_rtx (tmode);
16740 pat = GEN_FCN (icode) (target, op0, op1);
16746 case IX86_BUILTIN_PSLLWI128:
16747 icode = CODE_FOR_ashlv8hi3;
16749 case IX86_BUILTIN_PSLLDI128:
16750 icode = CODE_FOR_ashlv4si3;
16752 case IX86_BUILTIN_PSLLQI128:
16753 icode = CODE_FOR_ashlv2di3;
16755 case IX86_BUILTIN_PSRAWI128:
16756 icode = CODE_FOR_ashrv8hi3;
16758 case IX86_BUILTIN_PSRADI128:
16759 icode = CODE_FOR_ashrv4si3;
16761 case IX86_BUILTIN_PSRLWI128:
16762 icode = CODE_FOR_lshrv8hi3;
16764 case IX86_BUILTIN_PSRLDI128:
16765 icode = CODE_FOR_lshrv4si3;
16767 case IX86_BUILTIN_PSRLQI128:
16768 icode = CODE_FOR_lshrv2di3;
16771 arg0 = TREE_VALUE (arglist);
16772 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
16773 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
16774 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
16776 if (GET_CODE (op1) != CONST_INT)
16778 error ("shift must be an immediate");
16781 if (INTVAL (op1) < 0 || INTVAL (op1) > 255)
16782 op1 = GEN_INT (255);
16784 tmode = insn_data[icode].operand[0].mode;
16785 mode1 = insn_data[icode].operand[1].mode;
16786 if (! (*insn_data[icode].operand[1].predicate) (op0, mode1))
16787 op0 = copy_to_reg (op0);
16789 target = gen_reg_rtx (tmode);
16790 pat = GEN_FCN (icode) (target, op0, op1);
16796 case IX86_BUILTIN_PSLLW128:
16797 icode = CODE_FOR_ashlv8hi3;
16799 case IX86_BUILTIN_PSLLD128:
16800 icode = CODE_FOR_ashlv4si3;
16802 case IX86_BUILTIN_PSLLQ128:
16803 icode = CODE_FOR_ashlv2di3;
16805 case IX86_BUILTIN_PSRAW128:
16806 icode = CODE_FOR_ashrv8hi3;
16808 case IX86_BUILTIN_PSRAD128:
16809 icode = CODE_FOR_ashrv4si3;
16811 case IX86_BUILTIN_PSRLW128:
16812 icode = CODE_FOR_lshrv8hi3;
16814 case IX86_BUILTIN_PSRLD128:
16815 icode = CODE_FOR_lshrv4si3;
16817 case IX86_BUILTIN_PSRLQ128:
16818 icode = CODE_FOR_lshrv2di3;
16821 arg0 = TREE_VALUE (arglist);
16822 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
16823 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
16824 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
16826 tmode = insn_data[icode].operand[0].mode;
16827 mode1 = insn_data[icode].operand[1].mode;
16829 if (! (*insn_data[icode].operand[1].predicate) (op0, mode1))
16830 op0 = copy_to_reg (op0);
16832 op1 = simplify_gen_subreg (TImode, op1, GET_MODE (op1), 0);
16833 if (! (*insn_data[icode].operand[2].predicate) (op1, TImode))
16834 op1 = copy_to_reg (op1);
16836 target = gen_reg_rtx (tmode);
16837 pat = GEN_FCN (icode) (target, op0, op1);
16843 case IX86_BUILTIN_PSLLDQI128:
16844 case IX86_BUILTIN_PSRLDQI128:
16845 icode = (fcode == IX86_BUILTIN_PSLLDQI128 ? CODE_FOR_sse2_ashlti3
16846 : CODE_FOR_sse2_lshrti3);
16847 arg0 = TREE_VALUE (arglist);
16848 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
16849 op0 = expand_normal (arg0);
16850 op1 = expand_normal (arg1);
16851 tmode = insn_data[icode].operand[0].mode;
16852 mode1 = insn_data[icode].operand[1].mode;
16853 mode2 = insn_data[icode].operand[2].mode;
16855 if (! (*insn_data[icode].operand[1].predicate) (op0, mode1))
16857 op0 = copy_to_reg (op0);
16858 op0 = simplify_gen_subreg (mode1, op0, GET_MODE (op0), 0);
16860 if (! (*insn_data[icode].operand[2].predicate) (op1, mode2))
16862 error ("shift must be an immediate");
16865 target = gen_reg_rtx (V2DImode);
16866 pat = GEN_FCN (icode) (simplify_gen_subreg (tmode, target, V2DImode, 0),
16873 case IX86_BUILTIN_FEMMS:
16874 emit_insn (gen_mmx_femms ());
16877 case IX86_BUILTIN_PAVGUSB:
16878 return ix86_expand_binop_builtin (CODE_FOR_mmx_uavgv8qi3, arglist, target);
16880 case IX86_BUILTIN_PF2ID:
16881 return ix86_expand_unop_builtin (CODE_FOR_mmx_pf2id, arglist, target, 0);
16883 case IX86_BUILTIN_PFACC:
16884 return ix86_expand_binop_builtin (CODE_FOR_mmx_haddv2sf3, arglist, target);
16886 case IX86_BUILTIN_PFADD:
16887 return ix86_expand_binop_builtin (CODE_FOR_mmx_addv2sf3, arglist, target);
16889 case IX86_BUILTIN_PFCMPEQ:
16890 return ix86_expand_binop_builtin (CODE_FOR_mmx_eqv2sf3, arglist, target);
16892 case IX86_BUILTIN_PFCMPGE:
16893 return ix86_expand_binop_builtin (CODE_FOR_mmx_gev2sf3, arglist, target);
16895 case IX86_BUILTIN_PFCMPGT:
16896 return ix86_expand_binop_builtin (CODE_FOR_mmx_gtv2sf3, arglist, target);
16898 case IX86_BUILTIN_PFMAX:
16899 return ix86_expand_binop_builtin (CODE_FOR_mmx_smaxv2sf3, arglist, target);
16901 case IX86_BUILTIN_PFMIN:
16902 return ix86_expand_binop_builtin (CODE_FOR_mmx_sminv2sf3, arglist, target);
16904 case IX86_BUILTIN_PFMUL:
16905 return ix86_expand_binop_builtin (CODE_FOR_mmx_mulv2sf3, arglist, target);
16907 case IX86_BUILTIN_PFRCP:
16908 return ix86_expand_unop_builtin (CODE_FOR_mmx_rcpv2sf2, arglist, target, 0);
16910 case IX86_BUILTIN_PFRCPIT1:
16911 return ix86_expand_binop_builtin (CODE_FOR_mmx_rcpit1v2sf3, arglist, target);
16913 case IX86_BUILTIN_PFRCPIT2:
16914 return ix86_expand_binop_builtin (CODE_FOR_mmx_rcpit2v2sf3, arglist, target);
16916 case IX86_BUILTIN_PFRSQIT1:
16917 return ix86_expand_binop_builtin (CODE_FOR_mmx_rsqit1v2sf3, arglist, target);
16919 case IX86_BUILTIN_PFRSQRT:
16920 return ix86_expand_unop_builtin (CODE_FOR_mmx_rsqrtv2sf2, arglist, target, 0);
16922 case IX86_BUILTIN_PFSUB:
16923 return ix86_expand_binop_builtin (CODE_FOR_mmx_subv2sf3, arglist, target);
16925 case IX86_BUILTIN_PFSUBR:
16926 return ix86_expand_binop_builtin (CODE_FOR_mmx_subrv2sf3, arglist, target);
16928 case IX86_BUILTIN_PI2FD:
16929 return ix86_expand_unop_builtin (CODE_FOR_mmx_floatv2si2, arglist, target, 0);
16931 case IX86_BUILTIN_PMULHRW:
16932 return ix86_expand_binop_builtin (CODE_FOR_mmx_pmulhrwv4hi3, arglist, target);
16934 case IX86_BUILTIN_PF2IW:
16935 return ix86_expand_unop_builtin (CODE_FOR_mmx_pf2iw, arglist, target, 0);
16937 case IX86_BUILTIN_PFNACC:
16938 return ix86_expand_binop_builtin (CODE_FOR_mmx_hsubv2sf3, arglist, target);
16940 case IX86_BUILTIN_PFPNACC:
16941 return ix86_expand_binop_builtin (CODE_FOR_mmx_addsubv2sf3, arglist, target);
16943 case IX86_BUILTIN_PI2FW:
16944 return ix86_expand_unop_builtin (CODE_FOR_mmx_pi2fw, arglist, target, 0);
16946 case IX86_BUILTIN_PSWAPDSI:
16947 return ix86_expand_unop_builtin (CODE_FOR_mmx_pswapdv2si2, arglist, target, 0);
16949 case IX86_BUILTIN_PSWAPDSF:
16950 return ix86_expand_unop_builtin (CODE_FOR_mmx_pswapdv2sf2, arglist, target, 0);
16952 case IX86_BUILTIN_SQRTSD:
16953 return ix86_expand_unop1_builtin (CODE_FOR_sse2_vmsqrtv2df2, arglist, target);
16954 case IX86_BUILTIN_LOADUPD:
16955 return ix86_expand_unop_builtin (CODE_FOR_sse2_movupd, arglist, target, 1);
16956 case IX86_BUILTIN_STOREUPD:
16957 return ix86_expand_store_builtin (CODE_FOR_sse2_movupd, arglist);
16959 case IX86_BUILTIN_MFENCE:
16960 emit_insn (gen_sse2_mfence ());
16962 case IX86_BUILTIN_LFENCE:
16963 emit_insn (gen_sse2_lfence ());
16966 case IX86_BUILTIN_CLFLUSH:
16967 arg0 = TREE_VALUE (arglist);
16968 op0 = expand_normal (arg0);
16969 icode = CODE_FOR_sse2_clflush;
16970 if (! (*insn_data[icode].operand[0].predicate) (op0, Pmode))
16971 op0 = copy_to_mode_reg (Pmode, op0);
16973 emit_insn (gen_sse2_clflush (op0));
16976 case IX86_BUILTIN_MOVNTPD:
16977 return ix86_expand_store_builtin (CODE_FOR_sse2_movntv2df, arglist);
16978 case IX86_BUILTIN_MOVNTDQ:
16979 return ix86_expand_store_builtin (CODE_FOR_sse2_movntv2di, arglist);
16980 case IX86_BUILTIN_MOVNTI:
16981 return ix86_expand_store_builtin (CODE_FOR_sse2_movntsi, arglist);
16983 case IX86_BUILTIN_LOADDQU:
16984 return ix86_expand_unop_builtin (CODE_FOR_sse2_movdqu, arglist, target, 1);
16985 case IX86_BUILTIN_STOREDQU:
16986 return ix86_expand_store_builtin (CODE_FOR_sse2_movdqu, arglist);
16988 case IX86_BUILTIN_MONITOR:
16989 arg0 = TREE_VALUE (arglist);
16990 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
16991 arg2 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist)));
16992 op0 = expand_normal (arg0);
16993 op1 = expand_normal (arg1);
16994 op2 = expand_normal (arg2);
16996 op0 = copy_to_mode_reg (Pmode, op0);
16998 op1 = copy_to_mode_reg (SImode, op1);
17000 op2 = copy_to_mode_reg (SImode, op2);
17002 emit_insn (gen_sse3_monitor (op0, op1, op2));
17004 emit_insn (gen_sse3_monitor64 (op0, op1, op2));
17007 case IX86_BUILTIN_MWAIT:
17008 arg0 = TREE_VALUE (arglist);
17009 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
17010 op0 = expand_normal (arg0);
17011 op1 = expand_normal (arg1);
17013 op0 = copy_to_mode_reg (SImode, op0);
17015 op1 = copy_to_mode_reg (SImode, op1);
17016 emit_insn (gen_sse3_mwait (op0, op1));
17019 case IX86_BUILTIN_LDDQU:
17020 return ix86_expand_unop_builtin (CODE_FOR_sse3_lddqu, arglist,
17023 case IX86_BUILTIN_PALIGNR:
17024 case IX86_BUILTIN_PALIGNR128:
17025 if (fcode == IX86_BUILTIN_PALIGNR)
17027 icode = CODE_FOR_ssse3_palignrdi;
17032 icode = CODE_FOR_ssse3_palignrti;
17035 arg0 = TREE_VALUE (arglist);
17036 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
17037 arg2 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist)));
17038 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
17039 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
17040 op2 = expand_expr (arg2, NULL_RTX, VOIDmode, 0);
17041 tmode = insn_data[icode].operand[0].mode;
17042 mode1 = insn_data[icode].operand[1].mode;
17043 mode2 = insn_data[icode].operand[2].mode;
17044 mode3 = insn_data[icode].operand[3].mode;
17046 if (! (*insn_data[icode].operand[1].predicate) (op0, mode1))
17048 op0 = copy_to_reg (op0);
17049 op0 = simplify_gen_subreg (mode1, op0, GET_MODE (op0), 0);
17051 if (! (*insn_data[icode].operand[2].predicate) (op1, mode2))
17053 op1 = copy_to_reg (op1);
17054 op1 = simplify_gen_subreg (mode2, op1, GET_MODE (op1), 0);
17056 if (! (*insn_data[icode].operand[3].predicate) (op2, mode3))
17058 error ("shift must be an immediate");
17061 target = gen_reg_rtx (mode);
17062 pat = GEN_FCN (icode) (simplify_gen_subreg (tmode, target, mode, 0),
17069 case IX86_BUILTIN_MOVNTSD:
17070 return ix86_expand_store_builtin (CODE_FOR_sse4a_vmmovntv2df, arglist);
17072 case IX86_BUILTIN_MOVNTSS:
17073 return ix86_expand_store_builtin (CODE_FOR_sse4a_vmmovntv4sf, arglist);
17075 case IX86_BUILTIN_INSERTQ:
17076 case IX86_BUILTIN_EXTRQ:
17077 icode = (fcode == IX86_BUILTIN_EXTRQ
17078 ? CODE_FOR_sse4a_extrq
17079 : CODE_FOR_sse4a_insertq);
17080 arg0 = TREE_VALUE (arglist);
17081 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
17082 op0 = expand_normal (arg0);
17083 op1 = expand_normal (arg1);
17084 tmode = insn_data[icode].operand[0].mode;
17085 mode1 = insn_data[icode].operand[1].mode;
17086 mode2 = insn_data[icode].operand[2].mode;
17087 if (! (*insn_data[icode].operand[1].predicate) (op0, mode1))
17088 op0 = copy_to_mode_reg (mode1, op0);
17089 if (! (*insn_data[icode].operand[2].predicate) (op1, mode2))
17090 op1 = copy_to_mode_reg (mode2, op1);
17091 if (optimize || target == 0
17092 || GET_MODE (target) != tmode
17093 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
17094 target = gen_reg_rtx (tmode);
17095 pat = GEN_FCN (icode) (target, op0, op1);
17101 case IX86_BUILTIN_EXTRQI:
17102 icode = CODE_FOR_sse4a_extrqi;
17103 arg0 = TREE_VALUE (arglist);
17104 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
17105 arg2 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist)));
17106 op0 = expand_normal (arg0);
17107 op1 = expand_normal (arg1);
17108 op2 = expand_normal (arg2);
17109 tmode = insn_data[icode].operand[0].mode;
17110 mode1 = insn_data[icode].operand[1].mode;
17111 mode2 = insn_data[icode].operand[2].mode;
17112 mode3 = insn_data[icode].operand[3].mode;
17113 if (! (*insn_data[icode].operand[1].predicate) (op0, mode1))
17114 op0 = copy_to_mode_reg (mode1, op0);
17115 if (! (*insn_data[icode].operand[2].predicate) (op1, mode2))
17117 error ("index mask must be an immediate");
17118 return gen_reg_rtx (tmode);
17120 if (! (*insn_data[icode].operand[3].predicate) (op2, mode3))
17122 error ("length mask must be an immediate");
17123 return gen_reg_rtx (tmode);
17125 if (optimize || target == 0
17126 || GET_MODE (target) != tmode
17127 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
17128 target = gen_reg_rtx (tmode);
17129 pat = GEN_FCN (icode) (target, op0, op1, op2);
17135 case IX86_BUILTIN_INSERTQI:
17136 icode = CODE_FOR_sse4a_insertqi;
17137 arg0 = TREE_VALUE (arglist);
17138 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
17139 arg2 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist)));
17140 arg3 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (TREE_CHAIN (arglist))));
17141 op0 = expand_normal (arg0);
17142 op1 = expand_normal (arg1);
17143 op2 = expand_normal (arg2);
17144 op3 = expand_normal (arg3);
17145 tmode = insn_data[icode].operand[0].mode;
17146 mode1 = insn_data[icode].operand[1].mode;
17147 mode2 = insn_data[icode].operand[2].mode;
17148 mode3 = insn_data[icode].operand[3].mode;
17149 mode4 = insn_data[icode].operand[4].mode;
17151 if (! (*insn_data[icode].operand[1].predicate) (op0, mode1))
17152 op0 = copy_to_mode_reg (mode1, op0);
17154 if (! (*insn_data[icode].operand[2].predicate) (op1, mode2))
17155 op1 = copy_to_mode_reg (mode2, op1);
17157 if (! (*insn_data[icode].operand[3].predicate) (op2, mode3))
17159 error ("index mask must be an immediate");
17160 return gen_reg_rtx (tmode);
17162 if (! (*insn_data[icode].operand[4].predicate) (op3, mode4))
17164 error ("length mask must be an immediate");
17165 return gen_reg_rtx (tmode);
17167 if (optimize || target == 0
17168 || GET_MODE (target) != tmode
17169 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
17170 target = gen_reg_rtx (tmode);
17171 pat = GEN_FCN (icode) (target, op0, op1, op2, op3);
17177 case IX86_BUILTIN_VEC_INIT_V2SI:
17178 case IX86_BUILTIN_VEC_INIT_V4HI:
17179 case IX86_BUILTIN_VEC_INIT_V8QI:
17180 return ix86_expand_vec_init_builtin (TREE_TYPE (exp), arglist, target);
17182 case IX86_BUILTIN_VEC_EXT_V2DF:
17183 case IX86_BUILTIN_VEC_EXT_V2DI:
17184 case IX86_BUILTIN_VEC_EXT_V4SF:
17185 case IX86_BUILTIN_VEC_EXT_V4SI:
17186 case IX86_BUILTIN_VEC_EXT_V8HI:
17187 case IX86_BUILTIN_VEC_EXT_V16QI:
17188 case IX86_BUILTIN_VEC_EXT_V2SI:
17189 case IX86_BUILTIN_VEC_EXT_V4HI:
17190 return ix86_expand_vec_ext_builtin (arglist, target);
17192 case IX86_BUILTIN_VEC_SET_V8HI:
17193 case IX86_BUILTIN_VEC_SET_V4HI:
17194 return ix86_expand_vec_set_builtin (arglist);
17200 for (i = 0, d = bdesc_2arg; i < ARRAY_SIZE (bdesc_2arg); i++, d++)
17201 if (d->code == fcode)
17203 /* Compares are treated specially. */
17204 if (d->icode == CODE_FOR_sse_maskcmpv4sf3
17205 || d->icode == CODE_FOR_sse_vmmaskcmpv4sf3
17206 || d->icode == CODE_FOR_sse2_maskcmpv2df3
17207 || d->icode == CODE_FOR_sse2_vmmaskcmpv2df3)
17208 return ix86_expand_sse_compare (d, arglist, target);
17210 return ix86_expand_binop_builtin (d->icode, arglist, target);
17213 for (i = 0, d = bdesc_1arg; i < ARRAY_SIZE (bdesc_1arg); i++, d++)
17214 if (d->code == fcode)
17215 return ix86_expand_unop_builtin (d->icode, arglist, target, 0);
17217 for (i = 0, d = bdesc_comi; i < ARRAY_SIZE (bdesc_comi); i++, d++)
17218 if (d->code == fcode)
17219 return ix86_expand_sse_comi (d, arglist, target);
17221 gcc_unreachable ();
17224 /* Store OPERAND to the memory after reload is completed. This means
17225 that we can't easily use assign_stack_local. */
17227 ix86_force_to_memory (enum machine_mode mode, rtx operand)
17231 gcc_assert (reload_completed);
17232 if (TARGET_RED_ZONE)
17234 result = gen_rtx_MEM (mode,
17235 gen_rtx_PLUS (Pmode,
17237 GEN_INT (-RED_ZONE_SIZE)));
17238 emit_move_insn (result, operand);
17240 else if (!TARGET_RED_ZONE && TARGET_64BIT)
17246 operand = gen_lowpart (DImode, operand);
17250 gen_rtx_SET (VOIDmode,
17251 gen_rtx_MEM (DImode,
17252 gen_rtx_PRE_DEC (DImode,
17253 stack_pointer_rtx)),
17257 gcc_unreachable ();
17259 result = gen_rtx_MEM (mode, stack_pointer_rtx);
17268 split_di (&operand, 1, operands, operands + 1);
17270 gen_rtx_SET (VOIDmode,
17271 gen_rtx_MEM (SImode,
17272 gen_rtx_PRE_DEC (Pmode,
17273 stack_pointer_rtx)),
17276 gen_rtx_SET (VOIDmode,
17277 gen_rtx_MEM (SImode,
17278 gen_rtx_PRE_DEC (Pmode,
17279 stack_pointer_rtx)),
17284 /* Store HImodes as SImodes. */
17285 operand = gen_lowpart (SImode, operand);
17289 gen_rtx_SET (VOIDmode,
17290 gen_rtx_MEM (GET_MODE (operand),
17291 gen_rtx_PRE_DEC (SImode,
17292 stack_pointer_rtx)),
17296 gcc_unreachable ();
17298 result = gen_rtx_MEM (mode, stack_pointer_rtx);
17303 /* Free operand from the memory. */
17305 ix86_free_from_memory (enum machine_mode mode)
17307 if (!TARGET_RED_ZONE)
17311 if (mode == DImode || TARGET_64BIT)
17315 /* Use LEA to deallocate stack space. In peephole2 it will be converted
17316 to pop or add instruction if registers are available. */
17317 emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx,
17318 gen_rtx_PLUS (Pmode, stack_pointer_rtx,
17323 /* Put float CONST_DOUBLE in the constant pool instead of fp regs.
17324 QImode must go into class Q_REGS.
17325 Narrow ALL_REGS to GENERAL_REGS. This supports allowing movsf and
17326 movdf to do mem-to-mem moves through integer regs. */
17328 ix86_preferred_reload_class (rtx x, enum reg_class class)
17330 enum machine_mode mode = GET_MODE (x);
17332 /* We're only allowed to return a subclass of CLASS. Many of the
17333 following checks fail for NO_REGS, so eliminate that early. */
17334 if (class == NO_REGS)
17337 /* All classes can load zeros. */
17338 if (x == CONST0_RTX (mode))
17341 /* Force constants into memory if we are loading a (nonzero) constant into
17342 an MMX or SSE register. This is because there are no MMX/SSE instructions
17343 to load from a constant. */
17345 && (MAYBE_MMX_CLASS_P (class) || MAYBE_SSE_CLASS_P (class)))
17348 /* Prefer SSE regs only, if we can use them for math. */
17349 if (TARGET_SSE_MATH && !TARGET_MIX_SSE_I387 && SSE_FLOAT_MODE_P (mode))
17350 return SSE_CLASS_P (class) ? class : NO_REGS;
17352 /* Floating-point constants need more complex checks. */
17353 if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) != VOIDmode)
17355 /* General regs can load everything. */
17356 if (reg_class_subset_p (class, GENERAL_REGS))
17359 /* Floats can load 0 and 1 plus some others. Note that we eliminated
17360 zero above. We only want to wind up preferring 80387 registers if
17361 we plan on doing computation with them. */
17363 && standard_80387_constant_p (x))
17365 /* Limit class to non-sse. */
17366 if (class == FLOAT_SSE_REGS)
17368 if (class == FP_TOP_SSE_REGS)
17370 if (class == FP_SECOND_SSE_REGS)
17371 return FP_SECOND_REG;
17372 if (class == FLOAT_INT_REGS || class == FLOAT_REGS)
17379 /* Generally when we see PLUS here, it's the function invariant
17380 (plus soft-fp const_int). Which can only be computed into general
17382 if (GET_CODE (x) == PLUS)
17383 return reg_class_subset_p (class, GENERAL_REGS) ? class : NO_REGS;
17385 /* QImode constants are easy to load, but non-constant QImode data
17386 must go into Q_REGS. */
17387 if (GET_MODE (x) == QImode && !CONSTANT_P (x))
17389 if (reg_class_subset_p (class, Q_REGS))
17391 if (reg_class_subset_p (Q_REGS, class))
17399 /* Discourage putting floating-point values in SSE registers unless
17400 SSE math is being used, and likewise for the 387 registers. */
17402 ix86_preferred_output_reload_class (rtx x, enum reg_class class)
17404 enum machine_mode mode = GET_MODE (x);
17406 /* Restrict the output reload class to the register bank that we are doing
17407 math on. If we would like not to return a subset of CLASS, reject this
17408 alternative: if reload cannot do this, it will still use its choice. */
17409 mode = GET_MODE (x);
17410 if (TARGET_SSE_MATH && SSE_FLOAT_MODE_P (mode))
17411 return MAYBE_SSE_CLASS_P (class) ? SSE_REGS : NO_REGS;
17413 if (TARGET_80387 && SCALAR_FLOAT_MODE_P (mode))
17415 if (class == FP_TOP_SSE_REGS)
17417 else if (class == FP_SECOND_SSE_REGS)
17418 return FP_SECOND_REG;
17420 return FLOAT_CLASS_P (class) ? class : NO_REGS;
17426 /* If we are copying between general and FP registers, we need a memory
17427 location. The same is true for SSE and MMX registers.
17429 The macro can't work reliably when one of the CLASSES is class containing
17430 registers from multiple units (SSE, MMX, integer). We avoid this by never
17431 combining those units in single alternative in the machine description.
17432 Ensure that this constraint holds to avoid unexpected surprises.
17434 When STRICT is false, we are being called from REGISTER_MOVE_COST, so do not
17435 enforce these sanity checks. */
17438 ix86_secondary_memory_needed (enum reg_class class1, enum reg_class class2,
17439 enum machine_mode mode, int strict)
17441 if (MAYBE_FLOAT_CLASS_P (class1) != FLOAT_CLASS_P (class1)
17442 || MAYBE_FLOAT_CLASS_P (class2) != FLOAT_CLASS_P (class2)
17443 || MAYBE_SSE_CLASS_P (class1) != SSE_CLASS_P (class1)
17444 || MAYBE_SSE_CLASS_P (class2) != SSE_CLASS_P (class2)
17445 || MAYBE_MMX_CLASS_P (class1) != MMX_CLASS_P (class1)
17446 || MAYBE_MMX_CLASS_P (class2) != MMX_CLASS_P (class2))
17448 gcc_assert (!strict);
17452 if (FLOAT_CLASS_P (class1) != FLOAT_CLASS_P (class2))
17455 /* ??? This is a lie. We do have moves between mmx/general, and for
17456 mmx/sse2. But by saying we need secondary memory we discourage the
17457 register allocator from using the mmx registers unless needed. */
17458 if (MMX_CLASS_P (class1) != MMX_CLASS_P (class2))
17461 if (SSE_CLASS_P (class1) != SSE_CLASS_P (class2))
17463 /* SSE1 doesn't have any direct moves from other classes. */
17467 /* If the target says that inter-unit moves are more expensive
17468 than moving through memory, then don't generate them. */
17469 if (!TARGET_INTER_UNIT_MOVES && !optimize_size)
17472 /* Between SSE and general, we have moves no larger than word size. */
17473 if (GET_MODE_SIZE (mode) > UNITS_PER_WORD)
17476 /* ??? For the cost of one register reformat penalty, we could use
17477 the same instructions to move SFmode and DFmode data, but the
17478 relevant move patterns don't support those alternatives. */
17479 if (mode == SFmode || mode == DFmode)
17486 /* Return true if the registers in CLASS cannot represent the change from
17487 modes FROM to TO. */
17490 ix86_cannot_change_mode_class (enum machine_mode from, enum machine_mode to,
17491 enum reg_class class)
17496 /* x87 registers can't do subreg at all, as all values are reformatted
17497 to extended precision. */
17498 if (MAYBE_FLOAT_CLASS_P (class))
17501 if (MAYBE_SSE_CLASS_P (class) || MAYBE_MMX_CLASS_P (class))
17503 /* Vector registers do not support QI or HImode loads. If we don't
17504 disallow a change to these modes, reload will assume it's ok to
17505 drop the subreg from (subreg:SI (reg:HI 100) 0). This affects
17506 the vec_dupv4hi pattern. */
17507 if (GET_MODE_SIZE (from) < 4)
17510 /* Vector registers do not support subreg with nonzero offsets, which
17511 are otherwise valid for integer registers. Since we can't see
17512 whether we have a nonzero offset from here, prohibit all
17513 nonparadoxical subregs changing size. */
17514 if (GET_MODE_SIZE (to) < GET_MODE_SIZE (from))
17521 /* Return the cost of moving data from a register in class CLASS1 to
17522 one in class CLASS2.
17524 It is not required that the cost always equal 2 when FROM is the same as TO;
17525 on some machines it is expensive to move between registers if they are not
17526 general registers. */
17529 ix86_register_move_cost (enum machine_mode mode, enum reg_class class1,
17530 enum reg_class class2)
17532 /* In case we require secondary memory, compute cost of the store followed
17533 by load. In order to avoid bad register allocation choices, we need
17534 for this to be *at least* as high as the symmetric MEMORY_MOVE_COST. */
17536 if (ix86_secondary_memory_needed (class1, class2, mode, 0))
17540 cost += MAX (MEMORY_MOVE_COST (mode, class1, 0),
17541 MEMORY_MOVE_COST (mode, class1, 1));
17542 cost += MAX (MEMORY_MOVE_COST (mode, class2, 0),
17543 MEMORY_MOVE_COST (mode, class2, 1));
17545 /* In case of copying from general_purpose_register we may emit multiple
17546 stores followed by single load causing memory size mismatch stall.
17547 Count this as arbitrarily high cost of 20. */
17548 if (CLASS_MAX_NREGS (class1, mode) > CLASS_MAX_NREGS (class2, mode))
17551 /* In the case of FP/MMX moves, the registers actually overlap, and we
17552 have to switch modes in order to treat them differently. */
17553 if ((MMX_CLASS_P (class1) && MAYBE_FLOAT_CLASS_P (class2))
17554 || (MMX_CLASS_P (class2) && MAYBE_FLOAT_CLASS_P (class1)))
17560 /* Moves between SSE/MMX and integer unit are expensive. */
17561 if (MMX_CLASS_P (class1) != MMX_CLASS_P (class2)
17562 || SSE_CLASS_P (class1) != SSE_CLASS_P (class2))
17563 return ix86_cost->mmxsse_to_integer;
17564 if (MAYBE_FLOAT_CLASS_P (class1))
17565 return ix86_cost->fp_move;
17566 if (MAYBE_SSE_CLASS_P (class1))
17567 return ix86_cost->sse_move;
17568 if (MAYBE_MMX_CLASS_P (class1))
17569 return ix86_cost->mmx_move;
17573 /* Return 1 if hard register REGNO can hold a value of machine-mode MODE. */
17576 ix86_hard_regno_mode_ok (int regno, enum machine_mode mode)
17578 /* Flags and only flags can only hold CCmode values. */
17579 if (CC_REGNO_P (regno))
17580 return GET_MODE_CLASS (mode) == MODE_CC;
17581 if (GET_MODE_CLASS (mode) == MODE_CC
17582 || GET_MODE_CLASS (mode) == MODE_RANDOM
17583 || GET_MODE_CLASS (mode) == MODE_PARTIAL_INT)
17585 if (FP_REGNO_P (regno))
17586 return VALID_FP_MODE_P (mode);
17587 if (SSE_REGNO_P (regno))
17589 /* We implement the move patterns for all vector modes into and
17590 out of SSE registers, even when no operation instructions
17592 return (VALID_SSE_REG_MODE (mode)
17593 || VALID_SSE2_REG_MODE (mode)
17594 || VALID_MMX_REG_MODE (mode)
17595 || VALID_MMX_REG_MODE_3DNOW (mode));
17597 if (MMX_REGNO_P (regno))
17599 /* We implement the move patterns for 3DNOW modes even in MMX mode,
17600 so if the register is available at all, then we can move data of
17601 the given mode into or out of it. */
17602 return (VALID_MMX_REG_MODE (mode)
17603 || VALID_MMX_REG_MODE_3DNOW (mode));
17606 if (mode == QImode)
17608 /* Take care for QImode values - they can be in non-QI regs,
17609 but then they do cause partial register stalls. */
17610 if (regno < 4 || TARGET_64BIT)
17612 if (!TARGET_PARTIAL_REG_STALL)
17614 return reload_in_progress || reload_completed;
17616 /* We handle both integer and floats in the general purpose registers. */
17617 else if (VALID_INT_MODE_P (mode))
17619 else if (VALID_FP_MODE_P (mode))
17621 /* Lots of MMX code casts 8 byte vector modes to DImode. If we then go
17622 on to use that value in smaller contexts, this can easily force a
17623 pseudo to be allocated to GENERAL_REGS. Since this is no worse than
17624 supporting DImode, allow it. */
17625 else if (VALID_MMX_REG_MODE_3DNOW (mode) || VALID_MMX_REG_MODE (mode))
17631 /* A subroutine of ix86_modes_tieable_p. Return true if MODE is a
17632 tieable integer mode. */
17635 ix86_tieable_integer_mode_p (enum machine_mode mode)
17644 return TARGET_64BIT || !TARGET_PARTIAL_REG_STALL;
17647 return TARGET_64BIT;
17654 /* Return true if MODE1 is accessible in a register that can hold MODE2
17655 without copying. That is, all register classes that can hold MODE2
17656 can also hold MODE1. */
17659 ix86_modes_tieable_p (enum machine_mode mode1, enum machine_mode mode2)
17661 if (mode1 == mode2)
17664 if (ix86_tieable_integer_mode_p (mode1)
17665 && ix86_tieable_integer_mode_p (mode2))
17668 /* MODE2 being XFmode implies fp stack or general regs, which means we
17669 can tie any smaller floating point modes to it. Note that we do not
17670 tie this with TFmode. */
17671 if (mode2 == XFmode)
17672 return mode1 == SFmode || mode1 == DFmode;
17674 /* MODE2 being DFmode implies fp stack, general or sse regs, which means
17675 that we can tie it with SFmode. */
17676 if (mode2 == DFmode)
17677 return mode1 == SFmode;
17679 /* If MODE2 is only appropriate for an SSE register, then tie with
17680 any other mode acceptable to SSE registers. */
17681 if (GET_MODE_SIZE (mode2) >= 8
17682 && ix86_hard_regno_mode_ok (FIRST_SSE_REG, mode2))
17683 return ix86_hard_regno_mode_ok (FIRST_SSE_REG, mode1);
17685 /* If MODE2 is appropriate for an MMX (or SSE) register, then tie
17686 with any other mode acceptable to MMX registers. */
17687 if (GET_MODE_SIZE (mode2) == 8
17688 && ix86_hard_regno_mode_ok (FIRST_MMX_REG, mode2))
17689 return ix86_hard_regno_mode_ok (FIRST_MMX_REG, mode1);
17694 /* Return the cost of moving data of mode M between a
17695 register and memory. A value of 2 is the default; this cost is
17696 relative to those in `REGISTER_MOVE_COST'.
17698 If moving between registers and memory is more expensive than
17699 between two registers, you should define this macro to express the
17702 Model also increased moving costs of QImode registers in non
17706 ix86_memory_move_cost (enum machine_mode mode, enum reg_class class, int in)
17708 if (FLOAT_CLASS_P (class))
17725 return in ? ix86_cost->fp_load [index] : ix86_cost->fp_store [index];
17727 if (SSE_CLASS_P (class))
17730 switch (GET_MODE_SIZE (mode))
17744 return in ? ix86_cost->sse_load [index] : ix86_cost->sse_store [index];
17746 if (MMX_CLASS_P (class))
17749 switch (GET_MODE_SIZE (mode))
17760 return in ? ix86_cost->mmx_load [index] : ix86_cost->mmx_store [index];
17762 switch (GET_MODE_SIZE (mode))
17766 return (Q_CLASS_P (class) ? ix86_cost->int_load[0]
17767 : ix86_cost->movzbl_load);
17769 return (Q_CLASS_P (class) ? ix86_cost->int_store[0]
17770 : ix86_cost->int_store[0] + 4);
17773 return in ? ix86_cost->int_load[1] : ix86_cost->int_store[1];
17775 /* Compute number of 32bit moves needed. TFmode is moved as XFmode. */
17776 if (mode == TFmode)
17778 return ((in ? ix86_cost->int_load[2] : ix86_cost->int_store[2])
17779 * (((int) GET_MODE_SIZE (mode)
17780 + UNITS_PER_WORD - 1) / UNITS_PER_WORD));
17784 /* Compute a (partial) cost for rtx X. Return true if the complete
17785 cost has been computed, and false if subexpressions should be
17786 scanned. In either case, *TOTAL contains the cost result. */
17789 ix86_rtx_costs (rtx x, int code, int outer_code, int *total)
17791 enum machine_mode mode = GET_MODE (x);
17799 if (TARGET_64BIT && !x86_64_immediate_operand (x, VOIDmode))
17801 else if (TARGET_64BIT && !x86_64_zext_immediate_operand (x, VOIDmode))
17803 else if (flag_pic && SYMBOLIC_CONST (x)
17805 || (!GET_CODE (x) != LABEL_REF
17806 && (GET_CODE (x) != SYMBOL_REF
17807 || !SYMBOL_REF_LOCAL_P (x)))))
17814 if (mode == VOIDmode)
17817 switch (standard_80387_constant_p (x))
17822 default: /* Other constants */
17827 /* Start with (MEM (SYMBOL_REF)), since that's where
17828 it'll probably end up. Add a penalty for size. */
17829 *total = (COSTS_N_INSNS (1)
17830 + (flag_pic != 0 && !TARGET_64BIT)
17831 + (mode == SFmode ? 0 : mode == DFmode ? 1 : 2));
17837 /* The zero extensions is often completely free on x86_64, so make
17838 it as cheap as possible. */
17839 if (TARGET_64BIT && mode == DImode
17840 && GET_MODE (XEXP (x, 0)) == SImode)
17842 else if (TARGET_ZERO_EXTEND_WITH_AND)
17843 *total = ix86_cost->add;
17845 *total = ix86_cost->movzx;
17849 *total = ix86_cost->movsx;
17853 if (GET_CODE (XEXP (x, 1)) == CONST_INT
17854 && (GET_MODE (XEXP (x, 0)) != DImode || TARGET_64BIT))
17856 HOST_WIDE_INT value = INTVAL (XEXP (x, 1));
17859 *total = ix86_cost->add;
17862 if ((value == 2 || value == 3)
17863 && ix86_cost->lea <= ix86_cost->shift_const)
17865 *total = ix86_cost->lea;
17875 if (!TARGET_64BIT && GET_MODE (XEXP (x, 0)) == DImode)
17877 if (GET_CODE (XEXP (x, 1)) == CONST_INT)
17879 if (INTVAL (XEXP (x, 1)) > 32)
17880 *total = ix86_cost->shift_const + COSTS_N_INSNS (2);
17882 *total = ix86_cost->shift_const * 2;
17886 if (GET_CODE (XEXP (x, 1)) == AND)
17887 *total = ix86_cost->shift_var * 2;
17889 *total = ix86_cost->shift_var * 6 + COSTS_N_INSNS (2);
17894 if (GET_CODE (XEXP (x, 1)) == CONST_INT)
17895 *total = ix86_cost->shift_const;
17897 *total = ix86_cost->shift_var;
17902 if (FLOAT_MODE_P (mode))
17904 *total = ix86_cost->fmul;
17909 rtx op0 = XEXP (x, 0);
17910 rtx op1 = XEXP (x, 1);
17912 if (GET_CODE (XEXP (x, 1)) == CONST_INT)
17914 unsigned HOST_WIDE_INT value = INTVAL (XEXP (x, 1));
17915 for (nbits = 0; value != 0; value &= value - 1)
17919 /* This is arbitrary. */
17922 /* Compute costs correctly for widening multiplication. */
17923 if ((GET_CODE (op0) == SIGN_EXTEND || GET_CODE (op1) == ZERO_EXTEND)
17924 && GET_MODE_SIZE (GET_MODE (XEXP (op0, 0))) * 2
17925 == GET_MODE_SIZE (mode))
17927 int is_mulwiden = 0;
17928 enum machine_mode inner_mode = GET_MODE (op0);
17930 if (GET_CODE (op0) == GET_CODE (op1))
17931 is_mulwiden = 1, op1 = XEXP (op1, 0);
17932 else if (GET_CODE (op1) == CONST_INT)
17934 if (GET_CODE (op0) == SIGN_EXTEND)
17935 is_mulwiden = trunc_int_for_mode (INTVAL (op1), inner_mode)
17938 is_mulwiden = !(INTVAL (op1) & ~GET_MODE_MASK (inner_mode));
17942 op0 = XEXP (op0, 0), mode = GET_MODE (op0);
17945 *total = (ix86_cost->mult_init[MODE_INDEX (mode)]
17946 + nbits * ix86_cost->mult_bit
17947 + rtx_cost (op0, outer_code) + rtx_cost (op1, outer_code));
17956 if (FLOAT_MODE_P (mode))
17957 *total = ix86_cost->fdiv;
17959 *total = ix86_cost->divide[MODE_INDEX (mode)];
17963 if (FLOAT_MODE_P (mode))
17964 *total = ix86_cost->fadd;
17965 else if (GET_MODE_CLASS (mode) == MODE_INT
17966 && GET_MODE_BITSIZE (mode) <= GET_MODE_BITSIZE (Pmode))
17968 if (GET_CODE (XEXP (x, 0)) == PLUS
17969 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
17970 && GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 1)) == CONST_INT
17971 && CONSTANT_P (XEXP (x, 1)))
17973 HOST_WIDE_INT val = INTVAL (XEXP (XEXP (XEXP (x, 0), 0), 1));
17974 if (val == 2 || val == 4 || val == 8)
17976 *total = ix86_cost->lea;
17977 *total += rtx_cost (XEXP (XEXP (x, 0), 1), outer_code);
17978 *total += rtx_cost (XEXP (XEXP (XEXP (x, 0), 0), 0),
17980 *total += rtx_cost (XEXP (x, 1), outer_code);
17984 else if (GET_CODE (XEXP (x, 0)) == MULT
17985 && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT)
17987 HOST_WIDE_INT val = INTVAL (XEXP (XEXP (x, 0), 1));
17988 if (val == 2 || val == 4 || val == 8)
17990 *total = ix86_cost->lea;
17991 *total += rtx_cost (XEXP (XEXP (x, 0), 0), outer_code);
17992 *total += rtx_cost (XEXP (x, 1), outer_code);
17996 else if (GET_CODE (XEXP (x, 0)) == PLUS)
17998 *total = ix86_cost->lea;
17999 *total += rtx_cost (XEXP (XEXP (x, 0), 0), outer_code);
18000 *total += rtx_cost (XEXP (XEXP (x, 0), 1), outer_code);
18001 *total += rtx_cost (XEXP (x, 1), outer_code);
18008 if (FLOAT_MODE_P (mode))
18010 *total = ix86_cost->fadd;
18018 if (!TARGET_64BIT && mode == DImode)
18020 *total = (ix86_cost->add * 2
18021 + (rtx_cost (XEXP (x, 0), outer_code)
18022 << (GET_MODE (XEXP (x, 0)) != DImode))
18023 + (rtx_cost (XEXP (x, 1), outer_code)
18024 << (GET_MODE (XEXP (x, 1)) != DImode)));
18030 if (FLOAT_MODE_P (mode))
18032 *total = ix86_cost->fchs;
18038 if (!TARGET_64BIT && mode == DImode)
18039 *total = ix86_cost->add * 2;
18041 *total = ix86_cost->add;
18045 if (GET_CODE (XEXP (x, 0)) == ZERO_EXTRACT
18046 && XEXP (XEXP (x, 0), 1) == const1_rtx
18047 && GET_CODE (XEXP (XEXP (x, 0), 2)) == CONST_INT
18048 && XEXP (x, 1) == const0_rtx)
18050 /* This kind of construct is implemented using test[bwl].
18051 Treat it as if we had an AND. */
18052 *total = (ix86_cost->add
18053 + rtx_cost (XEXP (XEXP (x, 0), 0), outer_code)
18054 + rtx_cost (const1_rtx, outer_code));
18060 if (!TARGET_SSE_MATH
18062 || (mode == DFmode && !TARGET_SSE2))
18063 /* For standard 80387 constants, raise the cost to prevent
18064 compress_float_constant() to generate load from memory. */
18065 switch (standard_80387_constant_p (XEXP (x, 0)))
18075 *total = (x86_ext_80387_constants & TUNEMASK
18082 if (FLOAT_MODE_P (mode))
18083 *total = ix86_cost->fabs;
18087 if (FLOAT_MODE_P (mode))
18088 *total = ix86_cost->fsqrt;
18092 if (XINT (x, 1) == UNSPEC_TP)
18103 static int current_machopic_label_num;
18105 /* Given a symbol name and its associated stub, write out the
18106 definition of the stub. */
18109 machopic_output_stub (FILE *file, const char *symb, const char *stub)
18111 unsigned int length;
18112 char *binder_name, *symbol_name, lazy_ptr_name[32];
18113 int label = ++current_machopic_label_num;
18115 /* For 64-bit we shouldn't get here. */
18116 gcc_assert (!TARGET_64BIT);
18118 /* Lose our funky encoding stuff so it doesn't contaminate the stub. */
18119 symb = (*targetm.strip_name_encoding) (symb);
18121 length = strlen (stub);
18122 binder_name = alloca (length + 32);
18123 GEN_BINDER_NAME_FOR_STUB (binder_name, stub, length);
18125 length = strlen (symb);
18126 symbol_name = alloca (length + 32);
18127 GEN_SYMBOL_NAME_FOR_SYMBOL (symbol_name, symb, length);
18129 sprintf (lazy_ptr_name, "L%d$lz", label);
18132 switch_to_section (darwin_sections[machopic_picsymbol_stub_section]);
18134 switch_to_section (darwin_sections[machopic_symbol_stub_section]);
18136 fprintf (file, "%s:\n", stub);
18137 fprintf (file, "\t.indirect_symbol %s\n", symbol_name);
18141 fprintf (file, "\tcall\tLPC$%d\nLPC$%d:\tpopl\t%%eax\n", label, label);
18142 fprintf (file, "\tmovl\t%s-LPC$%d(%%eax),%%edx\n", lazy_ptr_name, label);
18143 fprintf (file, "\tjmp\t*%%edx\n");
18146 fprintf (file, "\tjmp\t*%s\n", lazy_ptr_name);
18148 fprintf (file, "%s:\n", binder_name);
18152 fprintf (file, "\tlea\t%s-LPC$%d(%%eax),%%eax\n", lazy_ptr_name, label);
18153 fprintf (file, "\tpushl\t%%eax\n");
18156 fprintf (file, "\tpushl\t$%s\n", lazy_ptr_name);
18158 fprintf (file, "\tjmp\tdyld_stub_binding_helper\n");
18160 switch_to_section (darwin_sections[machopic_lazy_symbol_ptr_section]);
18161 fprintf (file, "%s:\n", lazy_ptr_name);
18162 fprintf (file, "\t.indirect_symbol %s\n", symbol_name);
18163 fprintf (file, "\t.long %s\n", binder_name);
18167 darwin_x86_file_end (void)
18169 darwin_file_end ();
18172 #endif /* TARGET_MACHO */
18174 /* Order the registers for register allocator. */
18177 x86_order_regs_for_local_alloc (void)
18182 /* First allocate the local general purpose registers. */
18183 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
18184 if (GENERAL_REGNO_P (i) && call_used_regs[i])
18185 reg_alloc_order [pos++] = i;
18187 /* Global general purpose registers. */
18188 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
18189 if (GENERAL_REGNO_P (i) && !call_used_regs[i])
18190 reg_alloc_order [pos++] = i;
18192 /* x87 registers come first in case we are doing FP math
18194 if (!TARGET_SSE_MATH)
18195 for (i = FIRST_STACK_REG; i <= LAST_STACK_REG; i++)
18196 reg_alloc_order [pos++] = i;
18198 /* SSE registers. */
18199 for (i = FIRST_SSE_REG; i <= LAST_SSE_REG; i++)
18200 reg_alloc_order [pos++] = i;
18201 for (i = FIRST_REX_SSE_REG; i <= LAST_REX_SSE_REG; i++)
18202 reg_alloc_order [pos++] = i;
18204 /* x87 registers. */
18205 if (TARGET_SSE_MATH)
18206 for (i = FIRST_STACK_REG; i <= LAST_STACK_REG; i++)
18207 reg_alloc_order [pos++] = i;
18209 for (i = FIRST_MMX_REG; i <= LAST_MMX_REG; i++)
18210 reg_alloc_order [pos++] = i;
18212 /* Initialize the rest of array as we do not allocate some registers
18214 while (pos < FIRST_PSEUDO_REGISTER)
18215 reg_alloc_order [pos++] = 0;
18218 /* Handle a "ms_struct" or "gcc_struct" attribute; arguments as in
18219 struct attribute_spec.handler. */
18221 ix86_handle_struct_attribute (tree *node, tree name,
18222 tree args ATTRIBUTE_UNUSED,
18223 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
18226 if (DECL_P (*node))
18228 if (TREE_CODE (*node) == TYPE_DECL)
18229 type = &TREE_TYPE (*node);
18234 if (!(type && (TREE_CODE (*type) == RECORD_TYPE
18235 || TREE_CODE (*type) == UNION_TYPE)))
18237 warning (OPT_Wattributes, "%qs attribute ignored",
18238 IDENTIFIER_POINTER (name));
18239 *no_add_attrs = true;
18242 else if ((is_attribute_p ("ms_struct", name)
18243 && lookup_attribute ("gcc_struct", TYPE_ATTRIBUTES (*type)))
18244 || ((is_attribute_p ("gcc_struct", name)
18245 && lookup_attribute ("ms_struct", TYPE_ATTRIBUTES (*type)))))
18247 warning (OPT_Wattributes, "%qs incompatible attribute ignored",
18248 IDENTIFIER_POINTER (name));
18249 *no_add_attrs = true;
18256 ix86_ms_bitfield_layout_p (tree record_type)
18258 return (TARGET_MS_BITFIELD_LAYOUT &&
18259 !lookup_attribute ("gcc_struct", TYPE_ATTRIBUTES (record_type)))
18260 || lookup_attribute ("ms_struct", TYPE_ATTRIBUTES (record_type));
18263 /* Returns an expression indicating where the this parameter is
18264 located on entry to the FUNCTION. */
18267 x86_this_parameter (tree function)
18269 tree type = TREE_TYPE (function);
18273 int n = aggregate_value_p (TREE_TYPE (type), type) != 0;
18274 return gen_rtx_REG (DImode, x86_64_int_parameter_registers[n]);
18277 if (ix86_function_regparm (type, function) > 0)
18281 parm = TYPE_ARG_TYPES (type);
18282 /* Figure out whether or not the function has a variable number of
18284 for (; parm; parm = TREE_CHAIN (parm))
18285 if (TREE_VALUE (parm) == void_type_node)
18287 /* If not, the this parameter is in the first argument. */
18291 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type)))
18293 return gen_rtx_REG (SImode, regno);
18297 if (aggregate_value_p (TREE_TYPE (type), type))
18298 return gen_rtx_MEM (SImode, plus_constant (stack_pointer_rtx, 8));
18300 return gen_rtx_MEM (SImode, plus_constant (stack_pointer_rtx, 4));
18303 /* Determine whether x86_output_mi_thunk can succeed. */
18306 x86_can_output_mi_thunk (tree thunk ATTRIBUTE_UNUSED,
18307 HOST_WIDE_INT delta ATTRIBUTE_UNUSED,
18308 HOST_WIDE_INT vcall_offset, tree function)
18310 /* 64-bit can handle anything. */
18314 /* For 32-bit, everything's fine if we have one free register. */
18315 if (ix86_function_regparm (TREE_TYPE (function), function) < 3)
18318 /* Need a free register for vcall_offset. */
18322 /* Need a free register for GOT references. */
18323 if (flag_pic && !(*targetm.binds_local_p) (function))
18326 /* Otherwise ok. */
18330 /* Output the assembler code for a thunk function. THUNK_DECL is the
18331 declaration for the thunk function itself, FUNCTION is the decl for
18332 the target function. DELTA is an immediate constant offset to be
18333 added to THIS. If VCALL_OFFSET is nonzero, the word at
18334 *(*this + vcall_offset) should be added to THIS. */
18337 x86_output_mi_thunk (FILE *file ATTRIBUTE_UNUSED,
18338 tree thunk ATTRIBUTE_UNUSED, HOST_WIDE_INT delta,
18339 HOST_WIDE_INT vcall_offset, tree function)
18342 rtx this = x86_this_parameter (function);
18345 /* If VCALL_OFFSET, we'll need THIS in a register. Might as well
18346 pull it in now and let DELTA benefit. */
18349 else if (vcall_offset)
18351 /* Put the this parameter into %eax. */
18353 xops[1] = this_reg = gen_rtx_REG (Pmode, 0);
18354 output_asm_insn ("mov{l}\t{%0, %1|%1, %0}", xops);
18357 this_reg = NULL_RTX;
18359 /* Adjust the this parameter by a fixed constant. */
18362 xops[0] = GEN_INT (delta);
18363 xops[1] = this_reg ? this_reg : this;
18366 if (!x86_64_general_operand (xops[0], DImode))
18368 tmp = gen_rtx_REG (DImode, FIRST_REX_INT_REG + 2 /* R10 */);
18370 output_asm_insn ("mov{q}\t{%1, %0|%0, %1}", xops);
18374 output_asm_insn ("add{q}\t{%0, %1|%1, %0}", xops);
18377 output_asm_insn ("add{l}\t{%0, %1|%1, %0}", xops);
18380 /* Adjust the this parameter by a value stored in the vtable. */
18384 tmp = gen_rtx_REG (DImode, FIRST_REX_INT_REG + 2 /* R10 */);
18387 int tmp_regno = 2 /* ECX */;
18388 if (lookup_attribute ("fastcall",
18389 TYPE_ATTRIBUTES (TREE_TYPE (function))))
18390 tmp_regno = 0 /* EAX */;
18391 tmp = gen_rtx_REG (SImode, tmp_regno);
18394 xops[0] = gen_rtx_MEM (Pmode, this_reg);
18397 output_asm_insn ("mov{q}\t{%0, %1|%1, %0}", xops);
18399 output_asm_insn ("mov{l}\t{%0, %1|%1, %0}", xops);
18401 /* Adjust the this parameter. */
18402 xops[0] = gen_rtx_MEM (Pmode, plus_constant (tmp, vcall_offset));
18403 if (TARGET_64BIT && !memory_operand (xops[0], Pmode))
18405 rtx tmp2 = gen_rtx_REG (DImode, FIRST_REX_INT_REG + 3 /* R11 */);
18406 xops[0] = GEN_INT (vcall_offset);
18408 output_asm_insn ("mov{q}\t{%0, %1|%1, %0}", xops);
18409 xops[0] = gen_rtx_MEM (Pmode, gen_rtx_PLUS (Pmode, tmp, tmp2));
18411 xops[1] = this_reg;
18413 output_asm_insn ("add{q}\t{%0, %1|%1, %0}", xops);
18415 output_asm_insn ("add{l}\t{%0, %1|%1, %0}", xops);
18418 /* If necessary, drop THIS back to its stack slot. */
18419 if (this_reg && this_reg != this)
18421 xops[0] = this_reg;
18423 output_asm_insn ("mov{l}\t{%0, %1|%1, %0}", xops);
18426 xops[0] = XEXP (DECL_RTL (function), 0);
18429 if (!flag_pic || (*targetm.binds_local_p) (function))
18430 output_asm_insn ("jmp\t%P0", xops);
18433 tmp = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, xops[0]), UNSPEC_GOTPCREL);
18434 tmp = gen_rtx_CONST (Pmode, tmp);
18435 tmp = gen_rtx_MEM (QImode, tmp);
18437 output_asm_insn ("jmp\t%A0", xops);
18442 if (!flag_pic || (*targetm.binds_local_p) (function))
18443 output_asm_insn ("jmp\t%P0", xops);
18448 rtx sym_ref = XEXP (DECL_RTL (function), 0);
18449 tmp = (gen_rtx_SYMBOL_REF
18451 machopic_indirection_name (sym_ref, /*stub_p=*/true)));
18452 tmp = gen_rtx_MEM (QImode, tmp);
18454 output_asm_insn ("jmp\t%0", xops);
18457 #endif /* TARGET_MACHO */
18459 tmp = gen_rtx_REG (SImode, 2 /* ECX */);
18460 output_set_got (tmp, NULL_RTX);
18463 output_asm_insn ("mov{l}\t{%0@GOT(%1), %1|%1, %0@GOT[%1]}", xops);
18464 output_asm_insn ("jmp\t{*}%1", xops);
18470 x86_file_start (void)
18472 default_file_start ();
18474 darwin_file_start ();
18476 if (X86_FILE_START_VERSION_DIRECTIVE)
18477 fputs ("\t.version\t\"01.01\"\n", asm_out_file);
18478 if (X86_FILE_START_FLTUSED)
18479 fputs ("\t.global\t__fltused\n", asm_out_file);
18480 if (ix86_asm_dialect == ASM_INTEL)
18481 fputs ("\t.intel_syntax\n", asm_out_file);
18485 x86_field_alignment (tree field, int computed)
18487 enum machine_mode mode;
18488 tree type = TREE_TYPE (field);
18490 if (TARGET_64BIT || TARGET_ALIGN_DOUBLE)
18492 mode = TYPE_MODE (TREE_CODE (type) == ARRAY_TYPE
18493 ? get_inner_array_type (type) : type);
18494 if (mode == DFmode || mode == DCmode
18495 || GET_MODE_CLASS (mode) == MODE_INT
18496 || GET_MODE_CLASS (mode) == MODE_COMPLEX_INT)
18497 return MIN (32, computed);
18501 /* Output assembler code to FILE to increment profiler label # LABELNO
18502 for profiling a function entry. */
18504 x86_function_profiler (FILE *file, int labelno ATTRIBUTE_UNUSED)
18509 #ifndef NO_PROFILE_COUNTERS
18510 fprintf (file, "\tleaq\t%sP%d@(%%rip),%%r11\n", LPREFIX, labelno);
18512 fprintf (file, "\tcall\t*%s@GOTPCREL(%%rip)\n", MCOUNT_NAME);
18516 #ifndef NO_PROFILE_COUNTERS
18517 fprintf (file, "\tmovq\t$%sP%d,%%r11\n", LPREFIX, labelno);
18519 fprintf (file, "\tcall\t%s\n", MCOUNT_NAME);
18523 #ifndef NO_PROFILE_COUNTERS
18524 fprintf (file, "\tleal\t%sP%d@GOTOFF(%%ebx),%%%s\n",
18525 LPREFIX, labelno, PROFILE_COUNT_REGISTER);
18527 fprintf (file, "\tcall\t*%s@GOT(%%ebx)\n", MCOUNT_NAME);
18531 #ifndef NO_PROFILE_COUNTERS
18532 fprintf (file, "\tmovl\t$%sP%d,%%%s\n", LPREFIX, labelno,
18533 PROFILE_COUNT_REGISTER);
18535 fprintf (file, "\tcall\t%s\n", MCOUNT_NAME);
18539 /* We don't have exact information about the insn sizes, but we may assume
18540 quite safely that we are informed about all 1 byte insns and memory
18541 address sizes. This is enough to eliminate unnecessary padding in
18545 min_insn_size (rtx insn)
18549 if (!INSN_P (insn) || !active_insn_p (insn))
18552 /* Discard alignments we've emit and jump instructions. */
18553 if (GET_CODE (PATTERN (insn)) == UNSPEC_VOLATILE
18554 && XINT (PATTERN (insn), 1) == UNSPECV_ALIGN)
18556 if (GET_CODE (insn) == JUMP_INSN
18557 && (GET_CODE (PATTERN (insn)) == ADDR_VEC
18558 || GET_CODE (PATTERN (insn)) == ADDR_DIFF_VEC))
18561 /* Important case - calls are always 5 bytes.
18562 It is common to have many calls in the row. */
18563 if (GET_CODE (insn) == CALL_INSN
18564 && symbolic_reference_mentioned_p (PATTERN (insn))
18565 && !SIBLING_CALL_P (insn))
18567 if (get_attr_length (insn) <= 1)
18570 /* For normal instructions we may rely on the sizes of addresses
18571 and the presence of symbol to require 4 bytes of encoding.
18572 This is not the case for jumps where references are PC relative. */
18573 if (GET_CODE (insn) != JUMP_INSN)
18575 l = get_attr_length_address (insn);
18576 if (l < 4 && symbolic_reference_mentioned_p (PATTERN (insn)))
18585 /* AMD K8 core mispredicts jumps when there are more than 3 jumps in 16 byte
18589 ix86_avoid_jump_misspredicts (void)
18591 rtx insn, start = get_insns ();
18592 int nbytes = 0, njumps = 0;
18595 /* Look for all minimal intervals of instructions containing 4 jumps.
18596 The intervals are bounded by START and INSN. NBYTES is the total
18597 size of instructions in the interval including INSN and not including
18598 START. When the NBYTES is smaller than 16 bytes, it is possible
18599 that the end of START and INSN ends up in the same 16byte page.
18601 The smallest offset in the page INSN can start is the case where START
18602 ends on the offset 0. Offset of INSN is then NBYTES - sizeof (INSN).
18603 We add p2align to 16byte window with maxskip 17 - NBYTES + sizeof (INSN).
18605 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
18608 nbytes += min_insn_size (insn);
18610 fprintf(dump_file, "Insn %i estimated to %i bytes\n",
18611 INSN_UID (insn), min_insn_size (insn));
18612 if ((GET_CODE (insn) == JUMP_INSN
18613 && GET_CODE (PATTERN (insn)) != ADDR_VEC
18614 && GET_CODE (PATTERN (insn)) != ADDR_DIFF_VEC)
18615 || GET_CODE (insn) == CALL_INSN)
18622 start = NEXT_INSN (start);
18623 if ((GET_CODE (start) == JUMP_INSN
18624 && GET_CODE (PATTERN (start)) != ADDR_VEC
18625 && GET_CODE (PATTERN (start)) != ADDR_DIFF_VEC)
18626 || GET_CODE (start) == CALL_INSN)
18627 njumps--, isjump = 1;
18630 nbytes -= min_insn_size (start);
18632 gcc_assert (njumps >= 0);
18634 fprintf (dump_file, "Interval %i to %i has %i bytes\n",
18635 INSN_UID (start), INSN_UID (insn), nbytes);
18637 if (njumps == 3 && isjump && nbytes < 16)
18639 int padsize = 15 - nbytes + min_insn_size (insn);
18642 fprintf (dump_file, "Padding insn %i by %i bytes!\n",
18643 INSN_UID (insn), padsize);
18644 emit_insn_before (gen_align (GEN_INT (padsize)), insn);
18649 /* AMD Athlon works faster
18650 when RET is not destination of conditional jump or directly preceded
18651 by other jump instruction. We avoid the penalty by inserting NOP just
18652 before the RET instructions in such cases. */
18654 ix86_pad_returns (void)
18659 FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR->preds)
18661 basic_block bb = e->src;
18662 rtx ret = BB_END (bb);
18664 bool replace = false;
18666 if (GET_CODE (ret) != JUMP_INSN || GET_CODE (PATTERN (ret)) != RETURN
18667 || !maybe_hot_bb_p (bb))
18669 for (prev = PREV_INSN (ret); prev; prev = PREV_INSN (prev))
18670 if (active_insn_p (prev) || GET_CODE (prev) == CODE_LABEL)
18672 if (prev && GET_CODE (prev) == CODE_LABEL)
18677 FOR_EACH_EDGE (e, ei, bb->preds)
18678 if (EDGE_FREQUENCY (e) && e->src->index >= 0
18679 && !(e->flags & EDGE_FALLTHRU))
18684 prev = prev_active_insn (ret);
18686 && ((GET_CODE (prev) == JUMP_INSN && any_condjump_p (prev))
18687 || GET_CODE (prev) == CALL_INSN))
18689 /* Empty functions get branch mispredict even when the jump destination
18690 is not visible to us. */
18691 if (!prev && cfun->function_frequency > FUNCTION_FREQUENCY_UNLIKELY_EXECUTED)
18696 emit_insn_before (gen_return_internal_long (), ret);
18702 /* Implement machine specific optimizations. We implement padding of returns
18703 for K8 CPUs and pass to avoid 4 jumps in the single 16 byte window. */
18707 if (TARGET_PAD_RETURNS && optimize && !optimize_size)
18708 ix86_pad_returns ();
18709 if (TARGET_FOUR_JUMP_LIMIT && optimize && !optimize_size)
18710 ix86_avoid_jump_misspredicts ();
18713 /* Return nonzero when QImode register that must be represented via REX prefix
18716 x86_extended_QIreg_mentioned_p (rtx insn)
18719 extract_insn_cached (insn);
18720 for (i = 0; i < recog_data.n_operands; i++)
18721 if (REG_P (recog_data.operand[i])
18722 && REGNO (recog_data.operand[i]) >= 4)
18727 /* Return nonzero when P points to register encoded via REX prefix.
18728 Called via for_each_rtx. */
18730 extended_reg_mentioned_1 (rtx *p, void *data ATTRIBUTE_UNUSED)
18732 unsigned int regno;
18735 regno = REGNO (*p);
18736 return REX_INT_REGNO_P (regno) || REX_SSE_REGNO_P (regno);
18739 /* Return true when INSN mentions register that must be encoded using REX
18742 x86_extended_reg_mentioned_p (rtx insn)
18744 return for_each_rtx (&PATTERN (insn), extended_reg_mentioned_1, NULL);
18747 /* Generate an unsigned DImode/SImode to FP conversion. This is the same code
18748 optabs would emit if we didn't have TFmode patterns. */
18751 x86_emit_floatuns (rtx operands[2])
18753 rtx neglab, donelab, i0, i1, f0, in, out;
18754 enum machine_mode mode, inmode;
18756 inmode = GET_MODE (operands[1]);
18757 gcc_assert (inmode == SImode || inmode == DImode);
18760 in = force_reg (inmode, operands[1]);
18761 mode = GET_MODE (out);
18762 neglab = gen_label_rtx ();
18763 donelab = gen_label_rtx ();
18764 i1 = gen_reg_rtx (Pmode);
18765 f0 = gen_reg_rtx (mode);
18767 emit_cmp_and_jump_insns (in, const0_rtx, LT, const0_rtx, Pmode, 0, neglab);
18769 emit_insn (gen_rtx_SET (VOIDmode, out, gen_rtx_FLOAT (mode, in)));
18770 emit_jump_insn (gen_jump (donelab));
18773 emit_label (neglab);
18775 i0 = expand_simple_binop (Pmode, LSHIFTRT, in, const1_rtx, NULL, 1, OPTAB_DIRECT);
18776 i1 = expand_simple_binop (Pmode, AND, in, const1_rtx, NULL, 1, OPTAB_DIRECT);
18777 i0 = expand_simple_binop (Pmode, IOR, i0, i1, i0, 1, OPTAB_DIRECT);
18778 expand_float (f0, i0, 0);
18779 emit_insn (gen_rtx_SET (VOIDmode, out, gen_rtx_PLUS (mode, f0, f0)));
18781 emit_label (donelab);
18784 /* A subroutine of ix86_expand_vector_init. Store into TARGET a vector
18785 with all elements equal to VAR. Return true if successful. */
18788 ix86_expand_vector_init_duplicate (bool mmx_ok, enum machine_mode mode,
18789 rtx target, rtx val)
18791 enum machine_mode smode, wsmode, wvmode;
18806 val = force_reg (GET_MODE_INNER (mode), val);
18807 x = gen_rtx_VEC_DUPLICATE (mode, val);
18808 emit_insn (gen_rtx_SET (VOIDmode, target, x));
18814 if (TARGET_SSE || TARGET_3DNOW_A)
18816 val = gen_lowpart (SImode, val);
18817 x = gen_rtx_TRUNCATE (HImode, val);
18818 x = gen_rtx_VEC_DUPLICATE (mode, x);
18819 emit_insn (gen_rtx_SET (VOIDmode, target, x));
18841 /* Extend HImode to SImode using a paradoxical SUBREG. */
18842 tmp1 = gen_reg_rtx (SImode);
18843 emit_move_insn (tmp1, gen_lowpart (SImode, val));
18844 /* Insert the SImode value as low element of V4SImode vector. */
18845 tmp2 = gen_reg_rtx (V4SImode);
18846 tmp1 = gen_rtx_VEC_MERGE (V4SImode,
18847 gen_rtx_VEC_DUPLICATE (V4SImode, tmp1),
18848 CONST0_RTX (V4SImode),
18850 emit_insn (gen_rtx_SET (VOIDmode, tmp2, tmp1));
18851 /* Cast the V4SImode vector back to a V8HImode vector. */
18852 tmp1 = gen_reg_rtx (V8HImode);
18853 emit_move_insn (tmp1, gen_lowpart (V8HImode, tmp2));
18854 /* Duplicate the low short through the whole low SImode word. */
18855 emit_insn (gen_sse2_punpcklwd (tmp1, tmp1, tmp1));
18856 /* Cast the V8HImode vector back to a V4SImode vector. */
18857 tmp2 = gen_reg_rtx (V4SImode);
18858 emit_move_insn (tmp2, gen_lowpart (V4SImode, tmp1));
18859 /* Replicate the low element of the V4SImode vector. */
18860 emit_insn (gen_sse2_pshufd (tmp2, tmp2, const0_rtx));
18861 /* Cast the V2SImode back to V8HImode, and store in target. */
18862 emit_move_insn (target, gen_lowpart (V8HImode, tmp2));
18873 /* Extend QImode to SImode using a paradoxical SUBREG. */
18874 tmp1 = gen_reg_rtx (SImode);
18875 emit_move_insn (tmp1, gen_lowpart (SImode, val));
18876 /* Insert the SImode value as low element of V4SImode vector. */
18877 tmp2 = gen_reg_rtx (V4SImode);
18878 tmp1 = gen_rtx_VEC_MERGE (V4SImode,
18879 gen_rtx_VEC_DUPLICATE (V4SImode, tmp1),
18880 CONST0_RTX (V4SImode),
18882 emit_insn (gen_rtx_SET (VOIDmode, tmp2, tmp1));
18883 /* Cast the V4SImode vector back to a V16QImode vector. */
18884 tmp1 = gen_reg_rtx (V16QImode);
18885 emit_move_insn (tmp1, gen_lowpart (V16QImode, tmp2));
18886 /* Duplicate the low byte through the whole low SImode word. */
18887 emit_insn (gen_sse2_punpcklbw (tmp1, tmp1, tmp1));
18888 emit_insn (gen_sse2_punpcklbw (tmp1, tmp1, tmp1));
18889 /* Cast the V16QImode vector back to a V4SImode vector. */
18890 tmp2 = gen_reg_rtx (V4SImode);
18891 emit_move_insn (tmp2, gen_lowpart (V4SImode, tmp1));
18892 /* Replicate the low element of the V4SImode vector. */
18893 emit_insn (gen_sse2_pshufd (tmp2, tmp2, const0_rtx));
18894 /* Cast the V2SImode back to V16QImode, and store in target. */
18895 emit_move_insn (target, gen_lowpart (V16QImode, tmp2));
18903 /* Replicate the value once into the next wider mode and recurse. */
18904 val = convert_modes (wsmode, smode, val, true);
18905 x = expand_simple_binop (wsmode, ASHIFT, val,
18906 GEN_INT (GET_MODE_BITSIZE (smode)),
18907 NULL_RTX, 1, OPTAB_LIB_WIDEN);
18908 val = expand_simple_binop (wsmode, IOR, val, x, x, 1, OPTAB_LIB_WIDEN);
18910 x = gen_reg_rtx (wvmode);
18911 if (!ix86_expand_vector_init_duplicate (mmx_ok, wvmode, x, val))
18912 gcc_unreachable ();
18913 emit_move_insn (target, gen_lowpart (mode, x));
18921 /* A subroutine of ix86_expand_vector_init. Store into TARGET a vector
18922 whose ONE_VAR element is VAR, and other elements are zero. Return true
18926 ix86_expand_vector_init_one_nonzero (bool mmx_ok, enum machine_mode mode,
18927 rtx target, rtx var, int one_var)
18929 enum machine_mode vsimode;
18945 var = force_reg (GET_MODE_INNER (mode), var);
18946 x = gen_rtx_VEC_CONCAT (mode, var, CONST0_RTX (GET_MODE_INNER (mode)));
18947 emit_insn (gen_rtx_SET (VOIDmode, target, x));
18952 if (!REG_P (target) || REGNO (target) < FIRST_PSEUDO_REGISTER)
18953 new_target = gen_reg_rtx (mode);
18955 new_target = target;
18956 var = force_reg (GET_MODE_INNER (mode), var);
18957 x = gen_rtx_VEC_DUPLICATE (mode, var);
18958 x = gen_rtx_VEC_MERGE (mode, x, CONST0_RTX (mode), const1_rtx);
18959 emit_insn (gen_rtx_SET (VOIDmode, new_target, x));
18962 /* We need to shuffle the value to the correct position, so
18963 create a new pseudo to store the intermediate result. */
18965 /* With SSE2, we can use the integer shuffle insns. */
18966 if (mode != V4SFmode && TARGET_SSE2)
18968 emit_insn (gen_sse2_pshufd_1 (new_target, new_target,
18970 GEN_INT (one_var == 1 ? 0 : 1),
18971 GEN_INT (one_var == 2 ? 0 : 1),
18972 GEN_INT (one_var == 3 ? 0 : 1)));
18973 if (target != new_target)
18974 emit_move_insn (target, new_target);
18978 /* Otherwise convert the intermediate result to V4SFmode and
18979 use the SSE1 shuffle instructions. */
18980 if (mode != V4SFmode)
18982 tmp = gen_reg_rtx (V4SFmode);
18983 emit_move_insn (tmp, gen_lowpart (V4SFmode, new_target));
18988 emit_insn (gen_sse_shufps_1 (tmp, tmp, tmp,
18990 GEN_INT (one_var == 1 ? 0 : 1),
18991 GEN_INT (one_var == 2 ? 0+4 : 1+4),
18992 GEN_INT (one_var == 3 ? 0+4 : 1+4)));
18994 if (mode != V4SFmode)
18995 emit_move_insn (target, gen_lowpart (V4SImode, tmp));
18996 else if (tmp != target)
18997 emit_move_insn (target, tmp);
18999 else if (target != new_target)
19000 emit_move_insn (target, new_target);
19005 vsimode = V4SImode;
19011 vsimode = V2SImode;
19017 /* Zero extend the variable element to SImode and recurse. */
19018 var = convert_modes (SImode, GET_MODE_INNER (mode), var, true);
19020 x = gen_reg_rtx (vsimode);
19021 if (!ix86_expand_vector_init_one_nonzero (mmx_ok, vsimode, x,
19023 gcc_unreachable ();
19025 emit_move_insn (target, gen_lowpart (mode, x));
19033 /* A subroutine of ix86_expand_vector_init. Store into TARGET a vector
19034 consisting of the values in VALS. It is known that all elements
19035 except ONE_VAR are constants. Return true if successful. */
19038 ix86_expand_vector_init_one_var (bool mmx_ok, enum machine_mode mode,
19039 rtx target, rtx vals, int one_var)
19041 rtx var = XVECEXP (vals, 0, one_var);
19042 enum machine_mode wmode;
19045 const_vec = copy_rtx (vals);
19046 XVECEXP (const_vec, 0, one_var) = CONST0_RTX (GET_MODE_INNER (mode));
19047 const_vec = gen_rtx_CONST_VECTOR (mode, XVEC (const_vec, 0));
19055 /* For the two element vectors, it's just as easy to use
19056 the general case. */
19072 /* There's no way to set one QImode entry easily. Combine
19073 the variable value with its adjacent constant value, and
19074 promote to an HImode set. */
19075 x = XVECEXP (vals, 0, one_var ^ 1);
19078 var = convert_modes (HImode, QImode, var, true);
19079 var = expand_simple_binop (HImode, ASHIFT, var, GEN_INT (8),
19080 NULL_RTX, 1, OPTAB_LIB_WIDEN);
19081 x = GEN_INT (INTVAL (x) & 0xff);
19085 var = convert_modes (HImode, QImode, var, true);
19086 x = gen_int_mode (INTVAL (x) << 8, HImode);
19088 if (x != const0_rtx)
19089 var = expand_simple_binop (HImode, IOR, var, x, var,
19090 1, OPTAB_LIB_WIDEN);
19092 x = gen_reg_rtx (wmode);
19093 emit_move_insn (x, gen_lowpart (wmode, const_vec));
19094 ix86_expand_vector_set (mmx_ok, x, var, one_var >> 1);
19096 emit_move_insn (target, gen_lowpart (mode, x));
19103 emit_move_insn (target, const_vec);
19104 ix86_expand_vector_set (mmx_ok, target, var, one_var);
19108 /* A subroutine of ix86_expand_vector_init. Handle the most general case:
19109 all values variable, and none identical. */
19112 ix86_expand_vector_init_general (bool mmx_ok, enum machine_mode mode,
19113 rtx target, rtx vals)
19115 enum machine_mode half_mode = GET_MODE_INNER (mode);
19116 rtx op0 = NULL, op1 = NULL;
19117 bool use_vec_concat = false;
19123 if (!mmx_ok && !TARGET_SSE)
19129 /* For the two element vectors, we always implement VEC_CONCAT. */
19130 op0 = XVECEXP (vals, 0, 0);
19131 op1 = XVECEXP (vals, 0, 1);
19132 use_vec_concat = true;
19136 half_mode = V2SFmode;
19139 half_mode = V2SImode;
19145 /* For V4SF and V4SI, we implement a concat of two V2 vectors.
19146 Recurse to load the two halves. */
19148 op0 = gen_reg_rtx (half_mode);
19149 v = gen_rtvec (2, XVECEXP (vals, 0, 0), XVECEXP (vals, 0, 1));
19150 ix86_expand_vector_init (false, op0, gen_rtx_PARALLEL (half_mode, v));
19152 op1 = gen_reg_rtx (half_mode);
19153 v = gen_rtvec (2, XVECEXP (vals, 0, 2), XVECEXP (vals, 0, 3));
19154 ix86_expand_vector_init (false, op1, gen_rtx_PARALLEL (half_mode, v));
19156 use_vec_concat = true;
19167 gcc_unreachable ();
19170 if (use_vec_concat)
19172 if (!register_operand (op0, half_mode))
19173 op0 = force_reg (half_mode, op0);
19174 if (!register_operand (op1, half_mode))
19175 op1 = force_reg (half_mode, op1);
19177 emit_insn (gen_rtx_SET (VOIDmode, target,
19178 gen_rtx_VEC_CONCAT (mode, op0, op1)));
19182 int i, j, n_elts, n_words, n_elt_per_word;
19183 enum machine_mode inner_mode;
19184 rtx words[4], shift;
19186 inner_mode = GET_MODE_INNER (mode);
19187 n_elts = GET_MODE_NUNITS (mode);
19188 n_words = GET_MODE_SIZE (mode) / UNITS_PER_WORD;
19189 n_elt_per_word = n_elts / n_words;
19190 shift = GEN_INT (GET_MODE_BITSIZE (inner_mode));
19192 for (i = 0; i < n_words; ++i)
19194 rtx word = NULL_RTX;
19196 for (j = 0; j < n_elt_per_word; ++j)
19198 rtx elt = XVECEXP (vals, 0, (i+1)*n_elt_per_word - j - 1);
19199 elt = convert_modes (word_mode, inner_mode, elt, true);
19205 word = expand_simple_binop (word_mode, ASHIFT, word, shift,
19206 word, 1, OPTAB_LIB_WIDEN);
19207 word = expand_simple_binop (word_mode, IOR, word, elt,
19208 word, 1, OPTAB_LIB_WIDEN);
19216 emit_move_insn (target, gen_lowpart (mode, words[0]));
19217 else if (n_words == 2)
19219 rtx tmp = gen_reg_rtx (mode);
19220 emit_insn (gen_rtx_CLOBBER (VOIDmode, tmp));
19221 emit_move_insn (gen_lowpart (word_mode, tmp), words[0]);
19222 emit_move_insn (gen_highpart (word_mode, tmp), words[1]);
19223 emit_move_insn (target, tmp);
19225 else if (n_words == 4)
19227 rtx tmp = gen_reg_rtx (V4SImode);
19228 vals = gen_rtx_PARALLEL (V4SImode, gen_rtvec_v (4, words));
19229 ix86_expand_vector_init_general (false, V4SImode, tmp, vals);
19230 emit_move_insn (target, gen_lowpart (mode, tmp));
19233 gcc_unreachable ();
19237 /* Initialize vector TARGET via VALS. Suppress the use of MMX
19238 instructions unless MMX_OK is true. */
19241 ix86_expand_vector_init (bool mmx_ok, rtx target, rtx vals)
19243 enum machine_mode mode = GET_MODE (target);
19244 enum machine_mode inner_mode = GET_MODE_INNER (mode);
19245 int n_elts = GET_MODE_NUNITS (mode);
19246 int n_var = 0, one_var = -1;
19247 bool all_same = true, all_const_zero = true;
19251 for (i = 0; i < n_elts; ++i)
19253 x = XVECEXP (vals, 0, i);
19254 if (!CONSTANT_P (x))
19255 n_var++, one_var = i;
19256 else if (x != CONST0_RTX (inner_mode))
19257 all_const_zero = false;
19258 if (i > 0 && !rtx_equal_p (x, XVECEXP (vals, 0, 0)))
19262 /* Constants are best loaded from the constant pool. */
19265 emit_move_insn (target, gen_rtx_CONST_VECTOR (mode, XVEC (vals, 0)));
19269 /* If all values are identical, broadcast the value. */
19271 && ix86_expand_vector_init_duplicate (mmx_ok, mode, target,
19272 XVECEXP (vals, 0, 0)))
19275 /* Values where only one field is non-constant are best loaded from
19276 the pool and overwritten via move later. */
19280 && ix86_expand_vector_init_one_nonzero (mmx_ok, mode, target,
19281 XVECEXP (vals, 0, one_var),
19285 if (ix86_expand_vector_init_one_var (mmx_ok, mode, target, vals, one_var))
19289 ix86_expand_vector_init_general (mmx_ok, mode, target, vals);
19293 ix86_expand_vector_set (bool mmx_ok, rtx target, rtx val, int elt)
19295 enum machine_mode mode = GET_MODE (target);
19296 enum machine_mode inner_mode = GET_MODE_INNER (mode);
19297 bool use_vec_merge = false;
19306 tmp = gen_reg_rtx (GET_MODE_INNER (mode));
19307 ix86_expand_vector_extract (true, tmp, target, 1 - elt);
19309 tmp = gen_rtx_VEC_CONCAT (mode, tmp, val);
19311 tmp = gen_rtx_VEC_CONCAT (mode, val, tmp);
19312 emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
19322 /* For the two element vectors, we implement a VEC_CONCAT with
19323 the extraction of the other element. */
19325 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, GEN_INT (1 - elt)));
19326 tmp = gen_rtx_VEC_SELECT (inner_mode, target, tmp);
19329 op0 = val, op1 = tmp;
19331 op0 = tmp, op1 = val;
19333 tmp = gen_rtx_VEC_CONCAT (mode, op0, op1);
19334 emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
19342 use_vec_merge = true;
19346 /* tmp = target = A B C D */
19347 tmp = copy_to_reg (target);
19348 /* target = A A B B */
19349 emit_insn (gen_sse_unpcklps (target, target, target));
19350 /* target = X A B B */
19351 ix86_expand_vector_set (false, target, val, 0);
19352 /* target = A X C D */
19353 emit_insn (gen_sse_shufps_1 (target, target, tmp,
19354 GEN_INT (1), GEN_INT (0),
19355 GEN_INT (2+4), GEN_INT (3+4)));
19359 /* tmp = target = A B C D */
19360 tmp = copy_to_reg (target);
19361 /* tmp = X B C D */
19362 ix86_expand_vector_set (false, tmp, val, 0);
19363 /* target = A B X D */
19364 emit_insn (gen_sse_shufps_1 (target, target, tmp,
19365 GEN_INT (0), GEN_INT (1),
19366 GEN_INT (0+4), GEN_INT (3+4)));
19370 /* tmp = target = A B C D */
19371 tmp = copy_to_reg (target);
19372 /* tmp = X B C D */
19373 ix86_expand_vector_set (false, tmp, val, 0);
19374 /* target = A B X D */
19375 emit_insn (gen_sse_shufps_1 (target, target, tmp,
19376 GEN_INT (0), GEN_INT (1),
19377 GEN_INT (2+4), GEN_INT (0+4)));
19381 gcc_unreachable ();
19386 /* Element 0 handled by vec_merge below. */
19389 use_vec_merge = true;
19395 /* With SSE2, use integer shuffles to swap element 0 and ELT,
19396 store into element 0, then shuffle them back. */
19400 order[0] = GEN_INT (elt);
19401 order[1] = const1_rtx;
19402 order[2] = const2_rtx;
19403 order[3] = GEN_INT (3);
19404 order[elt] = const0_rtx;
19406 emit_insn (gen_sse2_pshufd_1 (target, target, order[0],
19407 order[1], order[2], order[3]));
19409 ix86_expand_vector_set (false, target, val, 0);
19411 emit_insn (gen_sse2_pshufd_1 (target, target, order[0],
19412 order[1], order[2], order[3]));
19416 /* For SSE1, we have to reuse the V4SF code. */
19417 ix86_expand_vector_set (false, gen_lowpart (V4SFmode, target),
19418 gen_lowpart (SFmode, val), elt);
19423 use_vec_merge = TARGET_SSE2;
19426 use_vec_merge = mmx_ok && (TARGET_SSE || TARGET_3DNOW_A);
19437 tmp = gen_rtx_VEC_DUPLICATE (mode, val);
19438 tmp = gen_rtx_VEC_MERGE (mode, tmp, target, GEN_INT (1 << elt));
19439 emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
19443 rtx mem = assign_stack_temp (mode, GET_MODE_SIZE (mode), false);
19445 emit_move_insn (mem, target);
19447 tmp = adjust_address (mem, inner_mode, elt*GET_MODE_SIZE (inner_mode));
19448 emit_move_insn (tmp, val);
19450 emit_move_insn (target, mem);
19455 ix86_expand_vector_extract (bool mmx_ok, rtx target, rtx vec, int elt)
19457 enum machine_mode mode = GET_MODE (vec);
19458 enum machine_mode inner_mode = GET_MODE_INNER (mode);
19459 bool use_vec_extr = false;
19472 use_vec_extr = true;
19484 tmp = gen_reg_rtx (mode);
19485 emit_insn (gen_sse_shufps_1 (tmp, vec, vec,
19486 GEN_INT (elt), GEN_INT (elt),
19487 GEN_INT (elt+4), GEN_INT (elt+4)));
19491 tmp = gen_reg_rtx (mode);
19492 emit_insn (gen_sse_unpckhps (tmp, vec, vec));
19496 gcc_unreachable ();
19499 use_vec_extr = true;
19514 tmp = gen_reg_rtx (mode);
19515 emit_insn (gen_sse2_pshufd_1 (tmp, vec,
19516 GEN_INT (elt), GEN_INT (elt),
19517 GEN_INT (elt), GEN_INT (elt)));
19521 tmp = gen_reg_rtx (mode);
19522 emit_insn (gen_sse2_punpckhdq (tmp, vec, vec));
19526 gcc_unreachable ();
19529 use_vec_extr = true;
19534 /* For SSE1, we have to reuse the V4SF code. */
19535 ix86_expand_vector_extract (false, gen_lowpart (SFmode, target),
19536 gen_lowpart (V4SFmode, vec), elt);
19542 use_vec_extr = TARGET_SSE2;
19545 use_vec_extr = mmx_ok && (TARGET_SSE || TARGET_3DNOW_A);
19550 /* ??? Could extract the appropriate HImode element and shift. */
19557 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, GEN_INT (elt)));
19558 tmp = gen_rtx_VEC_SELECT (inner_mode, vec, tmp);
19560 /* Let the rtl optimizers know about the zero extension performed. */
19561 if (inner_mode == HImode)
19563 tmp = gen_rtx_ZERO_EXTEND (SImode, tmp);
19564 target = gen_lowpart (SImode, target);
19567 emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
19571 rtx mem = assign_stack_temp (mode, GET_MODE_SIZE (mode), false);
19573 emit_move_insn (mem, vec);
19575 tmp = adjust_address (mem, inner_mode, elt*GET_MODE_SIZE (inner_mode));
19576 emit_move_insn (target, tmp);
19580 /* Expand a vector reduction on V4SFmode for SSE1. FN is the binary
19581 pattern to reduce; DEST is the destination; IN is the input vector. */
19584 ix86_expand_reduc_v4sf (rtx (*fn) (rtx, rtx, rtx), rtx dest, rtx in)
19586 rtx tmp1, tmp2, tmp3;
19588 tmp1 = gen_reg_rtx (V4SFmode);
19589 tmp2 = gen_reg_rtx (V4SFmode);
19590 tmp3 = gen_reg_rtx (V4SFmode);
19592 emit_insn (gen_sse_movhlps (tmp1, in, in));
19593 emit_insn (fn (tmp2, tmp1, in));
19595 emit_insn (gen_sse_shufps_1 (tmp3, tmp2, tmp2,
19596 GEN_INT (1), GEN_INT (1),
19597 GEN_INT (1+4), GEN_INT (1+4)));
19598 emit_insn (fn (dest, tmp2, tmp3));
19601 /* Target hook for scalar_mode_supported_p. */
19603 ix86_scalar_mode_supported_p (enum machine_mode mode)
19605 if (DECIMAL_FLOAT_MODE_P (mode))
19608 return default_scalar_mode_supported_p (mode);
19611 /* Implements target hook vector_mode_supported_p. */
19613 ix86_vector_mode_supported_p (enum machine_mode mode)
19615 if (TARGET_SSE && VALID_SSE_REG_MODE (mode))
19617 if (TARGET_SSE2 && VALID_SSE2_REG_MODE (mode))
19619 if (TARGET_MMX && VALID_MMX_REG_MODE (mode))
19621 if (TARGET_3DNOW && VALID_MMX_REG_MODE_3DNOW (mode))
19626 /* Worker function for TARGET_MD_ASM_CLOBBERS.
19628 We do this in the new i386 backend to maintain source compatibility
19629 with the old cc0-based compiler. */
19632 ix86_md_asm_clobbers (tree outputs ATTRIBUTE_UNUSED,
19633 tree inputs ATTRIBUTE_UNUSED,
19636 clobbers = tree_cons (NULL_TREE, build_string (5, "flags"),
19638 clobbers = tree_cons (NULL_TREE, build_string (4, "fpsr"),
19640 clobbers = tree_cons (NULL_TREE, build_string (7, "dirflag"),
19645 /* Return true if this goes in small data/bss. */
19648 ix86_in_large_data_p (tree exp)
19650 if (ix86_cmodel != CM_MEDIUM && ix86_cmodel != CM_MEDIUM_PIC)
19653 /* Functions are never large data. */
19654 if (TREE_CODE (exp) == FUNCTION_DECL)
19657 if (TREE_CODE (exp) == VAR_DECL && DECL_SECTION_NAME (exp))
19659 const char *section = TREE_STRING_POINTER (DECL_SECTION_NAME (exp));
19660 if (strcmp (section, ".ldata") == 0
19661 || strcmp (section, ".lbss") == 0)
19667 HOST_WIDE_INT size = int_size_in_bytes (TREE_TYPE (exp));
19669 /* If this is an incomplete type with size 0, then we can't put it
19670 in data because it might be too big when completed. */
19671 if (!size || size > ix86_section_threshold)
19678 ix86_encode_section_info (tree decl, rtx rtl, int first)
19680 default_encode_section_info (decl, rtl, first);
19682 if (TREE_CODE (decl) == VAR_DECL
19683 && (TREE_STATIC (decl) || DECL_EXTERNAL (decl))
19684 && ix86_in_large_data_p (decl))
19685 SYMBOL_REF_FLAGS (XEXP (rtl, 0)) |= SYMBOL_FLAG_FAR_ADDR;
19688 /* Worker function for REVERSE_CONDITION. */
19691 ix86_reverse_condition (enum rtx_code code, enum machine_mode mode)
19693 return (mode != CCFPmode && mode != CCFPUmode
19694 ? reverse_condition (code)
19695 : reverse_condition_maybe_unordered (code));
19698 /* Output code to perform an x87 FP register move, from OPERANDS[1]
19702 output_387_reg_move (rtx insn, rtx *operands)
19704 if (REG_P (operands[1])
19705 && find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
19707 if (REGNO (operands[0]) == FIRST_STACK_REG)
19708 return output_387_ffreep (operands, 0);
19709 return "fstp\t%y0";
19711 if (STACK_TOP_P (operands[0]))
19712 return "fld%z1\t%y1";
19716 /* Output code to perform a conditional jump to LABEL, if C2 flag in
19717 FP status register is set. */
19720 ix86_emit_fp_unordered_jump (rtx label)
19722 rtx reg = gen_reg_rtx (HImode);
19725 emit_insn (gen_x86_fnstsw_1 (reg));
19727 if (TARGET_USE_SAHF)
19729 emit_insn (gen_x86_sahf_1 (reg));
19731 temp = gen_rtx_REG (CCmode, FLAGS_REG);
19732 temp = gen_rtx_UNORDERED (VOIDmode, temp, const0_rtx);
19736 emit_insn (gen_testqi_ext_ccno_0 (reg, GEN_INT (0x04)));
19738 temp = gen_rtx_REG (CCNOmode, FLAGS_REG);
19739 temp = gen_rtx_NE (VOIDmode, temp, const0_rtx);
19742 temp = gen_rtx_IF_THEN_ELSE (VOIDmode, temp,
19743 gen_rtx_LABEL_REF (VOIDmode, label),
19745 temp = gen_rtx_SET (VOIDmode, pc_rtx, temp);
19746 emit_jump_insn (temp);
19749 /* Output code to perform a log1p XFmode calculation. */
19751 void ix86_emit_i387_log1p (rtx op0, rtx op1)
19753 rtx label1 = gen_label_rtx ();
19754 rtx label2 = gen_label_rtx ();
19756 rtx tmp = gen_reg_rtx (XFmode);
19757 rtx tmp2 = gen_reg_rtx (XFmode);
19759 emit_insn (gen_absxf2 (tmp, op1));
19760 emit_insn (gen_cmpxf (tmp,
19761 CONST_DOUBLE_FROM_REAL_VALUE (
19762 REAL_VALUE_ATOF ("0.29289321881345247561810596348408353", XFmode),
19764 emit_jump_insn (gen_bge (label1));
19766 emit_move_insn (tmp2, standard_80387_constant_rtx (4)); /* fldln2 */
19767 emit_insn (gen_fyl2xp1_xf3 (op0, tmp2, op1));
19768 emit_jump (label2);
19770 emit_label (label1);
19771 emit_move_insn (tmp, CONST1_RTX (XFmode));
19772 emit_insn (gen_addxf3 (tmp, op1, tmp));
19773 emit_move_insn (tmp2, standard_80387_constant_rtx (4)); /* fldln2 */
19774 emit_insn (gen_fyl2x_xf3 (op0, tmp2, tmp));
19776 emit_label (label2);
19779 /* Solaris implementation of TARGET_ASM_NAMED_SECTION. */
19782 i386_solaris_elf_named_section (const char *name, unsigned int flags,
19785 /* With Binutils 2.15, the "@unwind" marker must be specified on
19786 every occurrence of the ".eh_frame" section, not just the first
19789 && strcmp (name, ".eh_frame") == 0)
19791 fprintf (asm_out_file, "\t.section\t%s,\"%s\",@unwind\n", name,
19792 flags & SECTION_WRITE ? "aw" : "a");
19795 default_elf_asm_named_section (name, flags, decl);
19798 /* Return the mangling of TYPE if it is an extended fundamental type. */
19800 static const char *
19801 ix86_mangle_fundamental_type (tree type)
19803 switch (TYPE_MODE (type))
19806 /* __float128 is "g". */
19809 /* "long double" or __float80 is "e". */
19816 /* For 32-bit code we can save PIC register setup by using
19817 __stack_chk_fail_local hidden function instead of calling
19818 __stack_chk_fail directly. 64-bit code doesn't need to setup any PIC
19819 register, so it is better to call __stack_chk_fail directly. */
19822 ix86_stack_protect_fail (void)
19824 return TARGET_64BIT
19825 ? default_external_stack_protect_fail ()
19826 : default_hidden_stack_protect_fail ();
19829 /* Select a format to encode pointers in exception handling data. CODE
19830 is 0 for data, 1 for code labels, 2 for function pointers. GLOBAL is
19831 true if the symbol may be affected by dynamic relocations.
19833 ??? All x86 object file formats are capable of representing this.
19834 After all, the relocation needed is the same as for the call insn.
19835 Whether or not a particular assembler allows us to enter such, I
19836 guess we'll have to see. */
19838 asm_preferred_eh_data_format (int code, int global)
19842 int type = DW_EH_PE_sdata8;
19844 || ix86_cmodel == CM_SMALL_PIC
19845 || (ix86_cmodel == CM_MEDIUM_PIC && (global || code)))
19846 type = DW_EH_PE_sdata4;
19847 return (global ? DW_EH_PE_indirect : 0) | DW_EH_PE_pcrel | type;
19849 if (ix86_cmodel == CM_SMALL
19850 || (ix86_cmodel == CM_MEDIUM && code))
19851 return DW_EH_PE_udata4;
19852 return DW_EH_PE_absptr;
19855 #include "gt-i386.h"