1 /* Subroutines used for code generation on IA-32.
2 Copyright (C) 1988, 1992, 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001,
3 2002, 2003, 2004, 2005, 2006, 2007 Free Software Foundation, Inc.
5 This file is part of GCC.
7 GCC is free software; you can redistribute it and/or modify
8 it under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 2, or (at your option)
12 GCC is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 GNU General Public License for more details.
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING. If not, write to
19 the Free Software Foundation, 51 Franklin Street, Fifth Floor,
20 Boston, MA 02110-1301, USA. */
26 #include "coretypes.h"
32 #include "hard-reg-set.h"
34 #include "insn-config.h"
35 #include "conditions.h"
37 #include "insn-codes.h"
38 #include "insn-attr.h"
46 #include "basic-block.h"
49 #include "target-def.h"
50 #include "langhooks.h"
52 #include "tree-gimple.h"
54 #include "tm-constrs.h"
56 #ifndef CHECK_STACK_LIMIT
57 #define CHECK_STACK_LIMIT (-1)
60 /* Return index of given mode in mult and division cost tables. */
61 #define MODE_INDEX(mode) \
62 ((mode) == QImode ? 0 \
63 : (mode) == HImode ? 1 \
64 : (mode) == SImode ? 2 \
65 : (mode) == DImode ? 3 \
68 /* Processor costs (relative to an add) */
69 /* We assume COSTS_N_INSNS is defined as (N)*4 and an addition is 2 bytes. */
70 #define COSTS_N_BYTES(N) ((N) * 2)
73 struct processor_costs size_cost = { /* costs for tuning for size */
74 COSTS_N_BYTES (2), /* cost of an add instruction */
75 COSTS_N_BYTES (3), /* cost of a lea instruction */
76 COSTS_N_BYTES (2), /* variable shift costs */
77 COSTS_N_BYTES (3), /* constant shift costs */
78 {COSTS_N_BYTES (3), /* cost of starting multiply for QI */
79 COSTS_N_BYTES (3), /* HI */
80 COSTS_N_BYTES (3), /* SI */
81 COSTS_N_BYTES (3), /* DI */
82 COSTS_N_BYTES (5)}, /* other */
83 0, /* cost of multiply per each bit set */
84 {COSTS_N_BYTES (3), /* cost of a divide/mod for QI */
85 COSTS_N_BYTES (3), /* HI */
86 COSTS_N_BYTES (3), /* SI */
87 COSTS_N_BYTES (3), /* DI */
88 COSTS_N_BYTES (5)}, /* other */
89 COSTS_N_BYTES (3), /* cost of movsx */
90 COSTS_N_BYTES (3), /* cost of movzx */
93 2, /* cost for loading QImode using movzbl */
94 {2, 2, 2}, /* cost of loading integer registers
95 in QImode, HImode and SImode.
96 Relative to reg-reg move (2). */
97 {2, 2, 2}, /* cost of storing integer registers */
98 2, /* cost of reg,reg fld/fst */
99 {2, 2, 2}, /* cost of loading fp registers
100 in SFmode, DFmode and XFmode */
101 {2, 2, 2}, /* cost of storing fp registers
102 in SFmode, DFmode and XFmode */
103 3, /* cost of moving MMX register */
104 {3, 3}, /* cost of loading MMX registers
105 in SImode and DImode */
106 {3, 3}, /* cost of storing MMX registers
107 in SImode and DImode */
108 3, /* cost of moving SSE register */
109 {3, 3, 3}, /* cost of loading SSE registers
110 in SImode, DImode and TImode */
111 {3, 3, 3}, /* cost of storing SSE registers
112 in SImode, DImode and TImode */
113 3, /* MMX or SSE register to integer */
114 0, /* size of prefetch block */
115 0, /* number of parallel prefetches */
117 COSTS_N_BYTES (2), /* cost of FADD and FSUB insns. */
118 COSTS_N_BYTES (2), /* cost of FMUL instruction. */
119 COSTS_N_BYTES (2), /* cost of FDIV instruction. */
120 COSTS_N_BYTES (2), /* cost of FABS instruction. */
121 COSTS_N_BYTES (2), /* cost of FCHS instruction. */
122 COSTS_N_BYTES (2), /* cost of FSQRT instruction. */
125 /* Processor costs (relative to an add) */
127 struct processor_costs i386_cost = { /* 386 specific costs */
128 COSTS_N_INSNS (1), /* cost of an add instruction */
129 COSTS_N_INSNS (1), /* cost of a lea instruction */
130 COSTS_N_INSNS (3), /* variable shift costs */
131 COSTS_N_INSNS (2), /* constant shift costs */
132 {COSTS_N_INSNS (6), /* cost of starting multiply for QI */
133 COSTS_N_INSNS (6), /* HI */
134 COSTS_N_INSNS (6), /* SI */
135 COSTS_N_INSNS (6), /* DI */
136 COSTS_N_INSNS (6)}, /* other */
137 COSTS_N_INSNS (1), /* cost of multiply per each bit set */
138 {COSTS_N_INSNS (23), /* cost of a divide/mod for QI */
139 COSTS_N_INSNS (23), /* HI */
140 COSTS_N_INSNS (23), /* SI */
141 COSTS_N_INSNS (23), /* DI */
142 COSTS_N_INSNS (23)}, /* other */
143 COSTS_N_INSNS (3), /* cost of movsx */
144 COSTS_N_INSNS (2), /* cost of movzx */
145 15, /* "large" insn */
147 4, /* cost for loading QImode using movzbl */
148 {2, 4, 2}, /* cost of loading integer registers
149 in QImode, HImode and SImode.
150 Relative to reg-reg move (2). */
151 {2, 4, 2}, /* cost of storing integer registers */
152 2, /* cost of reg,reg fld/fst */
153 {8, 8, 8}, /* cost of loading fp registers
154 in SFmode, DFmode and XFmode */
155 {8, 8, 8}, /* cost of storing fp registers
156 in SFmode, DFmode and XFmode */
157 2, /* cost of moving MMX register */
158 {4, 8}, /* cost of loading MMX registers
159 in SImode and DImode */
160 {4, 8}, /* cost of storing MMX registers
161 in SImode and DImode */
162 2, /* cost of moving SSE register */
163 {4, 8, 16}, /* cost of loading SSE registers
164 in SImode, DImode and TImode */
165 {4, 8, 16}, /* cost of storing SSE registers
166 in SImode, DImode and TImode */
167 3, /* MMX or SSE register to integer */
168 0, /* size of prefetch block */
169 0, /* number of parallel prefetches */
171 COSTS_N_INSNS (23), /* cost of FADD and FSUB insns. */
172 COSTS_N_INSNS (27), /* cost of FMUL instruction. */
173 COSTS_N_INSNS (88), /* cost of FDIV instruction. */
174 COSTS_N_INSNS (22), /* cost of FABS instruction. */
175 COSTS_N_INSNS (24), /* cost of FCHS instruction. */
176 COSTS_N_INSNS (122), /* cost of FSQRT instruction. */
180 struct processor_costs i486_cost = { /* 486 specific costs */
181 COSTS_N_INSNS (1), /* cost of an add instruction */
182 COSTS_N_INSNS (1), /* cost of a lea instruction */
183 COSTS_N_INSNS (3), /* variable shift costs */
184 COSTS_N_INSNS (2), /* constant shift costs */
185 {COSTS_N_INSNS (12), /* cost of starting multiply for QI */
186 COSTS_N_INSNS (12), /* HI */
187 COSTS_N_INSNS (12), /* SI */
188 COSTS_N_INSNS (12), /* DI */
189 COSTS_N_INSNS (12)}, /* other */
190 1, /* cost of multiply per each bit set */
191 {COSTS_N_INSNS (40), /* cost of a divide/mod for QI */
192 COSTS_N_INSNS (40), /* HI */
193 COSTS_N_INSNS (40), /* SI */
194 COSTS_N_INSNS (40), /* DI */
195 COSTS_N_INSNS (40)}, /* other */
196 COSTS_N_INSNS (3), /* cost of movsx */
197 COSTS_N_INSNS (2), /* cost of movzx */
198 15, /* "large" insn */
200 4, /* cost for loading QImode using movzbl */
201 {2, 4, 2}, /* cost of loading integer registers
202 in QImode, HImode and SImode.
203 Relative to reg-reg move (2). */
204 {2, 4, 2}, /* cost of storing integer registers */
205 2, /* cost of reg,reg fld/fst */
206 {8, 8, 8}, /* cost of loading fp registers
207 in SFmode, DFmode and XFmode */
208 {8, 8, 8}, /* cost of storing fp registers
209 in SFmode, DFmode and XFmode */
210 2, /* cost of moving MMX register */
211 {4, 8}, /* cost of loading MMX registers
212 in SImode and DImode */
213 {4, 8}, /* cost of storing MMX registers
214 in SImode and DImode */
215 2, /* cost of moving SSE register */
216 {4, 8, 16}, /* cost of loading SSE registers
217 in SImode, DImode and TImode */
218 {4, 8, 16}, /* cost of storing SSE registers
219 in SImode, DImode and TImode */
220 3, /* MMX or SSE register to integer */
221 0, /* size of prefetch block */
222 0, /* number of parallel prefetches */
224 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
225 COSTS_N_INSNS (16), /* cost of FMUL instruction. */
226 COSTS_N_INSNS (73), /* cost of FDIV instruction. */
227 COSTS_N_INSNS (3), /* cost of FABS instruction. */
228 COSTS_N_INSNS (3), /* cost of FCHS instruction. */
229 COSTS_N_INSNS (83), /* cost of FSQRT instruction. */
233 struct processor_costs pentium_cost = {
234 COSTS_N_INSNS (1), /* cost of an add instruction */
235 COSTS_N_INSNS (1), /* cost of a lea instruction */
236 COSTS_N_INSNS (4), /* variable shift costs */
237 COSTS_N_INSNS (1), /* constant shift costs */
238 {COSTS_N_INSNS (11), /* cost of starting multiply for QI */
239 COSTS_N_INSNS (11), /* HI */
240 COSTS_N_INSNS (11), /* SI */
241 COSTS_N_INSNS (11), /* DI */
242 COSTS_N_INSNS (11)}, /* other */
243 0, /* cost of multiply per each bit set */
244 {COSTS_N_INSNS (25), /* cost of a divide/mod for QI */
245 COSTS_N_INSNS (25), /* HI */
246 COSTS_N_INSNS (25), /* SI */
247 COSTS_N_INSNS (25), /* DI */
248 COSTS_N_INSNS (25)}, /* other */
249 COSTS_N_INSNS (3), /* cost of movsx */
250 COSTS_N_INSNS (2), /* cost of movzx */
251 8, /* "large" insn */
253 6, /* cost for loading QImode using movzbl */
254 {2, 4, 2}, /* cost of loading integer registers
255 in QImode, HImode and SImode.
256 Relative to reg-reg move (2). */
257 {2, 4, 2}, /* cost of storing integer registers */
258 2, /* cost of reg,reg fld/fst */
259 {2, 2, 6}, /* cost of loading fp registers
260 in SFmode, DFmode and XFmode */
261 {4, 4, 6}, /* cost of storing fp registers
262 in SFmode, DFmode and XFmode */
263 8, /* cost of moving MMX register */
264 {8, 8}, /* cost of loading MMX registers
265 in SImode and DImode */
266 {8, 8}, /* cost of storing MMX registers
267 in SImode and DImode */
268 2, /* cost of moving SSE register */
269 {4, 8, 16}, /* cost of loading SSE registers
270 in SImode, DImode and TImode */
271 {4, 8, 16}, /* cost of storing SSE registers
272 in SImode, DImode and TImode */
273 3, /* MMX or SSE register to integer */
274 0, /* size of prefetch block */
275 0, /* number of parallel prefetches */
277 COSTS_N_INSNS (3), /* cost of FADD and FSUB insns. */
278 COSTS_N_INSNS (3), /* cost of FMUL instruction. */
279 COSTS_N_INSNS (39), /* cost of FDIV instruction. */
280 COSTS_N_INSNS (1), /* cost of FABS instruction. */
281 COSTS_N_INSNS (1), /* cost of FCHS instruction. */
282 COSTS_N_INSNS (70), /* cost of FSQRT instruction. */
286 struct processor_costs pentiumpro_cost = {
287 COSTS_N_INSNS (1), /* cost of an add instruction */
288 COSTS_N_INSNS (1), /* cost of a lea instruction */
289 COSTS_N_INSNS (1), /* variable shift costs */
290 COSTS_N_INSNS (1), /* constant shift costs */
291 {COSTS_N_INSNS (4), /* cost of starting multiply for QI */
292 COSTS_N_INSNS (4), /* HI */
293 COSTS_N_INSNS (4), /* SI */
294 COSTS_N_INSNS (4), /* DI */
295 COSTS_N_INSNS (4)}, /* other */
296 0, /* cost of multiply per each bit set */
297 {COSTS_N_INSNS (17), /* cost of a divide/mod for QI */
298 COSTS_N_INSNS (17), /* HI */
299 COSTS_N_INSNS (17), /* SI */
300 COSTS_N_INSNS (17), /* DI */
301 COSTS_N_INSNS (17)}, /* other */
302 COSTS_N_INSNS (1), /* cost of movsx */
303 COSTS_N_INSNS (1), /* cost of movzx */
304 8, /* "large" insn */
306 2, /* cost for loading QImode using movzbl */
307 {4, 4, 4}, /* cost of loading integer registers
308 in QImode, HImode and SImode.
309 Relative to reg-reg move (2). */
310 {2, 2, 2}, /* cost of storing integer registers */
311 2, /* cost of reg,reg fld/fst */
312 {2, 2, 6}, /* cost of loading fp registers
313 in SFmode, DFmode and XFmode */
314 {4, 4, 6}, /* cost of storing fp registers
315 in SFmode, DFmode and XFmode */
316 2, /* cost of moving MMX register */
317 {2, 2}, /* cost of loading MMX registers
318 in SImode and DImode */
319 {2, 2}, /* cost of storing MMX registers
320 in SImode and DImode */
321 2, /* cost of moving SSE register */
322 {2, 2, 8}, /* cost of loading SSE registers
323 in SImode, DImode and TImode */
324 {2, 2, 8}, /* cost of storing SSE registers
325 in SImode, DImode and TImode */
326 3, /* MMX or SSE register to integer */
327 32, /* size of prefetch block */
328 6, /* number of parallel prefetches */
330 COSTS_N_INSNS (3), /* cost of FADD and FSUB insns. */
331 COSTS_N_INSNS (5), /* cost of FMUL instruction. */
332 COSTS_N_INSNS (56), /* cost of FDIV instruction. */
333 COSTS_N_INSNS (2), /* cost of FABS instruction. */
334 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
335 COSTS_N_INSNS (56), /* cost of FSQRT instruction. */
339 struct processor_costs geode_cost = {
340 COSTS_N_INSNS (1), /* cost of an add instruction */
341 COSTS_N_INSNS (1), /* cost of a lea instruction */
342 COSTS_N_INSNS (2), /* variable shift costs */
343 COSTS_N_INSNS (1), /* constant shift costs */
344 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
345 COSTS_N_INSNS (4), /* HI */
346 COSTS_N_INSNS (7), /* SI */
347 COSTS_N_INSNS (7), /* DI */
348 COSTS_N_INSNS (7)}, /* other */
349 0, /* cost of multiply per each bit set */
350 {COSTS_N_INSNS (15), /* cost of a divide/mod for QI */
351 COSTS_N_INSNS (23), /* HI */
352 COSTS_N_INSNS (39), /* SI */
353 COSTS_N_INSNS (39), /* DI */
354 COSTS_N_INSNS (39)}, /* other */
355 COSTS_N_INSNS (1), /* cost of movsx */
356 COSTS_N_INSNS (1), /* cost of movzx */
357 8, /* "large" insn */
359 1, /* cost for loading QImode using movzbl */
360 {1, 1, 1}, /* cost of loading integer registers
361 in QImode, HImode and SImode.
362 Relative to reg-reg move (2). */
363 {1, 1, 1}, /* cost of storing integer registers */
364 1, /* cost of reg,reg fld/fst */
365 {1, 1, 1}, /* cost of loading fp registers
366 in SFmode, DFmode and XFmode */
367 {4, 6, 6}, /* cost of storing fp registers
368 in SFmode, DFmode and XFmode */
370 1, /* cost of moving MMX register */
371 {1, 1}, /* cost of loading MMX registers
372 in SImode and DImode */
373 {1, 1}, /* cost of storing MMX registers
374 in SImode and DImode */
375 1, /* cost of moving SSE register */
376 {1, 1, 1}, /* cost of loading SSE registers
377 in SImode, DImode and TImode */
378 {1, 1, 1}, /* cost of storing SSE registers
379 in SImode, DImode and TImode */
380 1, /* MMX or SSE register to integer */
381 32, /* size of prefetch block */
382 1, /* number of parallel prefetches */
384 COSTS_N_INSNS (6), /* cost of FADD and FSUB insns. */
385 COSTS_N_INSNS (11), /* cost of FMUL instruction. */
386 COSTS_N_INSNS (47), /* cost of FDIV instruction. */
387 COSTS_N_INSNS (1), /* cost of FABS instruction. */
388 COSTS_N_INSNS (1), /* cost of FCHS instruction. */
389 COSTS_N_INSNS (54), /* cost of FSQRT instruction. */
393 struct processor_costs k6_cost = {
394 COSTS_N_INSNS (1), /* cost of an add instruction */
395 COSTS_N_INSNS (2), /* cost of a lea instruction */
396 COSTS_N_INSNS (1), /* variable shift costs */
397 COSTS_N_INSNS (1), /* constant shift costs */
398 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
399 COSTS_N_INSNS (3), /* HI */
400 COSTS_N_INSNS (3), /* SI */
401 COSTS_N_INSNS (3), /* DI */
402 COSTS_N_INSNS (3)}, /* other */
403 0, /* cost of multiply per each bit set */
404 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
405 COSTS_N_INSNS (18), /* HI */
406 COSTS_N_INSNS (18), /* SI */
407 COSTS_N_INSNS (18), /* DI */
408 COSTS_N_INSNS (18)}, /* other */
409 COSTS_N_INSNS (2), /* cost of movsx */
410 COSTS_N_INSNS (2), /* cost of movzx */
411 8, /* "large" insn */
413 3, /* cost for loading QImode using movzbl */
414 {4, 5, 4}, /* cost of loading integer registers
415 in QImode, HImode and SImode.
416 Relative to reg-reg move (2). */
417 {2, 3, 2}, /* cost of storing integer registers */
418 4, /* cost of reg,reg fld/fst */
419 {6, 6, 6}, /* cost of loading fp registers
420 in SFmode, DFmode and XFmode */
421 {4, 4, 4}, /* cost of storing fp registers
422 in SFmode, DFmode and XFmode */
423 2, /* cost of moving MMX register */
424 {2, 2}, /* cost of loading MMX registers
425 in SImode and DImode */
426 {2, 2}, /* cost of storing MMX registers
427 in SImode and DImode */
428 2, /* cost of moving SSE register */
429 {2, 2, 8}, /* cost of loading SSE registers
430 in SImode, DImode and TImode */
431 {2, 2, 8}, /* cost of storing SSE registers
432 in SImode, DImode and TImode */
433 6, /* MMX or SSE register to integer */
434 32, /* size of prefetch block */
435 1, /* number of parallel prefetches */
437 COSTS_N_INSNS (2), /* cost of FADD and FSUB insns. */
438 COSTS_N_INSNS (2), /* cost of FMUL instruction. */
439 COSTS_N_INSNS (56), /* cost of FDIV instruction. */
440 COSTS_N_INSNS (2), /* cost of FABS instruction. */
441 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
442 COSTS_N_INSNS (56), /* cost of FSQRT instruction. */
446 struct processor_costs athlon_cost = {
447 COSTS_N_INSNS (1), /* cost of an add instruction */
448 COSTS_N_INSNS (2), /* cost of a lea instruction */
449 COSTS_N_INSNS (1), /* variable shift costs */
450 COSTS_N_INSNS (1), /* constant shift costs */
451 {COSTS_N_INSNS (5), /* cost of starting multiply for QI */
452 COSTS_N_INSNS (5), /* HI */
453 COSTS_N_INSNS (5), /* SI */
454 COSTS_N_INSNS (5), /* DI */
455 COSTS_N_INSNS (5)}, /* other */
456 0, /* cost of multiply per each bit set */
457 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
458 COSTS_N_INSNS (26), /* HI */
459 COSTS_N_INSNS (42), /* SI */
460 COSTS_N_INSNS (74), /* DI */
461 COSTS_N_INSNS (74)}, /* other */
462 COSTS_N_INSNS (1), /* cost of movsx */
463 COSTS_N_INSNS (1), /* cost of movzx */
464 8, /* "large" insn */
466 4, /* cost for loading QImode using movzbl */
467 {3, 4, 3}, /* cost of loading integer registers
468 in QImode, HImode and SImode.
469 Relative to reg-reg move (2). */
470 {3, 4, 3}, /* cost of storing integer registers */
471 4, /* cost of reg,reg fld/fst */
472 {4, 4, 12}, /* cost of loading fp registers
473 in SFmode, DFmode and XFmode */
474 {6, 6, 8}, /* cost of storing fp registers
475 in SFmode, DFmode and XFmode */
476 2, /* cost of moving MMX register */
477 {4, 4}, /* cost of loading MMX registers
478 in SImode and DImode */
479 {4, 4}, /* cost of storing MMX registers
480 in SImode and DImode */
481 2, /* cost of moving SSE register */
482 {4, 4, 6}, /* cost of loading SSE registers
483 in SImode, DImode and TImode */
484 {4, 4, 5}, /* cost of storing SSE registers
485 in SImode, DImode and TImode */
486 5, /* MMX or SSE register to integer */
487 64, /* size of prefetch block */
488 6, /* number of parallel prefetches */
490 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
491 COSTS_N_INSNS (4), /* cost of FMUL instruction. */
492 COSTS_N_INSNS (24), /* cost of FDIV instruction. */
493 COSTS_N_INSNS (2), /* cost of FABS instruction. */
494 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
495 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
499 struct processor_costs k8_cost = {
500 COSTS_N_INSNS (1), /* cost of an add instruction */
501 COSTS_N_INSNS (2), /* cost of a lea instruction */
502 COSTS_N_INSNS (1), /* variable shift costs */
503 COSTS_N_INSNS (1), /* constant shift costs */
504 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
505 COSTS_N_INSNS (4), /* HI */
506 COSTS_N_INSNS (3), /* SI */
507 COSTS_N_INSNS (4), /* DI */
508 COSTS_N_INSNS (5)}, /* other */
509 0, /* cost of multiply per each bit set */
510 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
511 COSTS_N_INSNS (26), /* HI */
512 COSTS_N_INSNS (42), /* SI */
513 COSTS_N_INSNS (74), /* DI */
514 COSTS_N_INSNS (74)}, /* other */
515 COSTS_N_INSNS (1), /* cost of movsx */
516 COSTS_N_INSNS (1), /* cost of movzx */
517 8, /* "large" insn */
519 4, /* cost for loading QImode using movzbl */
520 {3, 4, 3}, /* cost of loading integer registers
521 in QImode, HImode and SImode.
522 Relative to reg-reg move (2). */
523 {3, 4, 3}, /* cost of storing integer registers */
524 4, /* cost of reg,reg fld/fst */
525 {4, 4, 12}, /* cost of loading fp registers
526 in SFmode, DFmode and XFmode */
527 {6, 6, 8}, /* cost of storing fp registers
528 in SFmode, DFmode and XFmode */
529 2, /* cost of moving MMX register */
530 {3, 3}, /* cost of loading MMX registers
531 in SImode and DImode */
532 {4, 4}, /* cost of storing MMX registers
533 in SImode and DImode */
534 2, /* cost of moving SSE register */
535 {4, 3, 6}, /* cost of loading SSE registers
536 in SImode, DImode and TImode */
537 {4, 4, 5}, /* cost of storing SSE registers
538 in SImode, DImode and TImode */
539 5, /* MMX or SSE register to integer */
540 64, /* size of prefetch block */
541 6, /* number of parallel prefetches */
543 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
544 COSTS_N_INSNS (4), /* cost of FMUL instruction. */
545 COSTS_N_INSNS (19), /* cost of FDIV instruction. */
546 COSTS_N_INSNS (2), /* cost of FABS instruction. */
547 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
548 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
551 struct processor_costs amdfam10_cost = {
552 COSTS_N_INSNS (1), /* cost of an add instruction */
553 COSTS_N_INSNS (2), /* cost of a lea instruction */
554 COSTS_N_INSNS (1), /* variable shift costs */
555 COSTS_N_INSNS (1), /* constant shift costs */
556 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
557 COSTS_N_INSNS (4), /* HI */
558 COSTS_N_INSNS (3), /* SI */
559 COSTS_N_INSNS (4), /* DI */
560 COSTS_N_INSNS (5)}, /* other */
561 0, /* cost of multiply per each bit set */
562 {COSTS_N_INSNS (19), /* cost of a divide/mod for QI */
563 COSTS_N_INSNS (35), /* HI */
564 COSTS_N_INSNS (51), /* SI */
565 COSTS_N_INSNS (83), /* DI */
566 COSTS_N_INSNS (83)}, /* other */
567 COSTS_N_INSNS (1), /* cost of movsx */
568 COSTS_N_INSNS (1), /* cost of movzx */
569 8, /* "large" insn */
571 4, /* cost for loading QImode using movzbl */
572 {3, 4, 3}, /* cost of loading integer registers
573 in QImode, HImode and SImode.
574 Relative to reg-reg move (2). */
575 {3, 4, 3}, /* cost of storing integer registers */
576 4, /* cost of reg,reg fld/fst */
577 {4, 4, 12}, /* cost of loading fp registers
578 in SFmode, DFmode and XFmode */
579 {6, 6, 8}, /* cost of storing fp registers
580 in SFmode, DFmode and XFmode */
581 2, /* cost of moving MMX register */
582 {3, 3}, /* cost of loading MMX registers
583 in SImode and DImode */
584 {4, 4}, /* cost of storing MMX registers
585 in SImode and DImode */
586 2, /* cost of moving SSE register */
587 {4, 4, 3}, /* cost of loading SSE registers
588 in SImode, DImode and TImode */
589 {4, 4, 5}, /* cost of storing SSE registers
590 in SImode, DImode and TImode */
591 3, /* MMX or SSE register to integer */
593 MOVD reg64, xmmreg Double FSTORE 4
594 MOVD reg32, xmmreg Double FSTORE 4
596 MOVD reg64, xmmreg Double FADD 3
598 MOVD reg32, xmmreg Double FADD 3
600 64, /* size of prefetch block */
601 /* New AMD processors never drop prefetches; if they cannot be performed
602 immediately, they are queued. We set number of simultaneous prefetches
603 to a large constant to reflect this (it probably is not a good idea not
604 to limit number of prefetches at all, as their execution also takes some
606 100, /* number of parallel prefetches */
608 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
609 COSTS_N_INSNS (4), /* cost of FMUL instruction. */
610 COSTS_N_INSNS (19), /* cost of FDIV instruction. */
611 COSTS_N_INSNS (2), /* cost of FABS instruction. */
612 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
613 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
617 struct processor_costs pentium4_cost = {
618 COSTS_N_INSNS (1), /* cost of an add instruction */
619 COSTS_N_INSNS (3), /* cost of a lea instruction */
620 COSTS_N_INSNS (4), /* variable shift costs */
621 COSTS_N_INSNS (4), /* constant shift costs */
622 {COSTS_N_INSNS (15), /* cost of starting multiply for QI */
623 COSTS_N_INSNS (15), /* HI */
624 COSTS_N_INSNS (15), /* SI */
625 COSTS_N_INSNS (15), /* DI */
626 COSTS_N_INSNS (15)}, /* other */
627 0, /* cost of multiply per each bit set */
628 {COSTS_N_INSNS (56), /* cost of a divide/mod for QI */
629 COSTS_N_INSNS (56), /* HI */
630 COSTS_N_INSNS (56), /* SI */
631 COSTS_N_INSNS (56), /* DI */
632 COSTS_N_INSNS (56)}, /* other */
633 COSTS_N_INSNS (1), /* cost of movsx */
634 COSTS_N_INSNS (1), /* cost of movzx */
635 16, /* "large" insn */
637 2, /* cost for loading QImode using movzbl */
638 {4, 5, 4}, /* cost of loading integer registers
639 in QImode, HImode and SImode.
640 Relative to reg-reg move (2). */
641 {2, 3, 2}, /* cost of storing integer registers */
642 2, /* cost of reg,reg fld/fst */
643 {2, 2, 6}, /* cost of loading fp registers
644 in SFmode, DFmode and XFmode */
645 {4, 4, 6}, /* cost of storing fp registers
646 in SFmode, DFmode and XFmode */
647 2, /* cost of moving MMX register */
648 {2, 2}, /* cost of loading MMX registers
649 in SImode and DImode */
650 {2, 2}, /* cost of storing MMX registers
651 in SImode and DImode */
652 12, /* cost of moving SSE register */
653 {12, 12, 12}, /* cost of loading SSE registers
654 in SImode, DImode and TImode */
655 {2, 2, 8}, /* cost of storing SSE registers
656 in SImode, DImode and TImode */
657 10, /* MMX or SSE register to integer */
658 64, /* size of prefetch block */
659 6, /* number of parallel prefetches */
661 COSTS_N_INSNS (5), /* cost of FADD and FSUB insns. */
662 COSTS_N_INSNS (7), /* cost of FMUL instruction. */
663 COSTS_N_INSNS (43), /* cost of FDIV instruction. */
664 COSTS_N_INSNS (2), /* cost of FABS instruction. */
665 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
666 COSTS_N_INSNS (43), /* cost of FSQRT instruction. */
670 struct processor_costs nocona_cost = {
671 COSTS_N_INSNS (1), /* cost of an add instruction */
672 COSTS_N_INSNS (1), /* cost of a lea instruction */
673 COSTS_N_INSNS (1), /* variable shift costs */
674 COSTS_N_INSNS (1), /* constant shift costs */
675 {COSTS_N_INSNS (10), /* cost of starting multiply for QI */
676 COSTS_N_INSNS (10), /* HI */
677 COSTS_N_INSNS (10), /* SI */
678 COSTS_N_INSNS (10), /* DI */
679 COSTS_N_INSNS (10)}, /* other */
680 0, /* cost of multiply per each bit set */
681 {COSTS_N_INSNS (66), /* cost of a divide/mod for QI */
682 COSTS_N_INSNS (66), /* HI */
683 COSTS_N_INSNS (66), /* SI */
684 COSTS_N_INSNS (66), /* DI */
685 COSTS_N_INSNS (66)}, /* other */
686 COSTS_N_INSNS (1), /* cost of movsx */
687 COSTS_N_INSNS (1), /* cost of movzx */
688 16, /* "large" insn */
690 4, /* cost for loading QImode using movzbl */
691 {4, 4, 4}, /* cost of loading integer registers
692 in QImode, HImode and SImode.
693 Relative to reg-reg move (2). */
694 {4, 4, 4}, /* cost of storing integer registers */
695 3, /* cost of reg,reg fld/fst */
696 {12, 12, 12}, /* cost of loading fp registers
697 in SFmode, DFmode and XFmode */
698 {4, 4, 4}, /* cost of storing fp registers
699 in SFmode, DFmode and XFmode */
700 6, /* cost of moving MMX register */
701 {12, 12}, /* cost of loading MMX registers
702 in SImode and DImode */
703 {12, 12}, /* cost of storing MMX registers
704 in SImode and DImode */
705 6, /* cost of moving SSE register */
706 {12, 12, 12}, /* cost of loading SSE registers
707 in SImode, DImode and TImode */
708 {12, 12, 12}, /* cost of storing SSE registers
709 in SImode, DImode and TImode */
710 8, /* MMX or SSE register to integer */
711 128, /* size of prefetch block */
712 8, /* number of parallel prefetches */
714 COSTS_N_INSNS (6), /* cost of FADD and FSUB insns. */
715 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
716 COSTS_N_INSNS (40), /* cost of FDIV instruction. */
717 COSTS_N_INSNS (3), /* cost of FABS instruction. */
718 COSTS_N_INSNS (3), /* cost of FCHS instruction. */
719 COSTS_N_INSNS (44), /* cost of FSQRT instruction. */
723 struct processor_costs core2_cost = {
724 COSTS_N_INSNS (1), /* cost of an add instruction */
725 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
726 COSTS_N_INSNS (1), /* variable shift costs */
727 COSTS_N_INSNS (1), /* constant shift costs */
728 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
729 COSTS_N_INSNS (3), /* HI */
730 COSTS_N_INSNS (3), /* SI */
731 COSTS_N_INSNS (3), /* DI */
732 COSTS_N_INSNS (3)}, /* other */
733 0, /* cost of multiply per each bit set */
734 {COSTS_N_INSNS (22), /* cost of a divide/mod for QI */
735 COSTS_N_INSNS (22), /* HI */
736 COSTS_N_INSNS (22), /* SI */
737 COSTS_N_INSNS (22), /* DI */
738 COSTS_N_INSNS (22)}, /* other */
739 COSTS_N_INSNS (1), /* cost of movsx */
740 COSTS_N_INSNS (1), /* cost of movzx */
741 8, /* "large" insn */
743 2, /* cost for loading QImode using movzbl */
744 {6, 6, 6}, /* cost of loading integer registers
745 in QImode, HImode and SImode.
746 Relative to reg-reg move (2). */
747 {4, 4, 4}, /* cost of storing integer registers */
748 2, /* cost of reg,reg fld/fst */
749 {6, 6, 6}, /* cost of loading fp registers
750 in SFmode, DFmode and XFmode */
751 {4, 4, 4}, /* cost of loading integer registers */
752 2, /* cost of moving MMX register */
753 {6, 6}, /* cost of loading MMX registers
754 in SImode and DImode */
755 {4, 4}, /* cost of storing MMX registers
756 in SImode and DImode */
757 2, /* cost of moving SSE register */
758 {6, 6, 6}, /* cost of loading SSE registers
759 in SImode, DImode and TImode */
760 {4, 4, 4}, /* cost of storing SSE registers
761 in SImode, DImode and TImode */
762 2, /* MMX or SSE register to integer */
763 128, /* size of prefetch block */
764 8, /* number of parallel prefetches */
766 COSTS_N_INSNS (3), /* cost of FADD and FSUB insns. */
767 COSTS_N_INSNS (5), /* cost of FMUL instruction. */
768 COSTS_N_INSNS (32), /* cost of FDIV instruction. */
769 COSTS_N_INSNS (1), /* cost of FABS instruction. */
770 COSTS_N_INSNS (1), /* cost of FCHS instruction. */
771 COSTS_N_INSNS (58), /* cost of FSQRT instruction. */
774 /* Generic64 should produce code tuned for Nocona and K8. */
776 struct processor_costs generic64_cost = {
777 COSTS_N_INSNS (1), /* cost of an add instruction */
778 /* On all chips taken into consideration lea is 2 cycles and more. With
779 this cost however our current implementation of synth_mult results in
780 use of unnecessary temporary registers causing regression on several
781 SPECfp benchmarks. */
782 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
783 COSTS_N_INSNS (1), /* variable shift costs */
784 COSTS_N_INSNS (1), /* constant shift costs */
785 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
786 COSTS_N_INSNS (4), /* HI */
787 COSTS_N_INSNS (3), /* SI */
788 COSTS_N_INSNS (4), /* DI */
789 COSTS_N_INSNS (2)}, /* other */
790 0, /* cost of multiply per each bit set */
791 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
792 COSTS_N_INSNS (26), /* HI */
793 COSTS_N_INSNS (42), /* SI */
794 COSTS_N_INSNS (74), /* DI */
795 COSTS_N_INSNS (74)}, /* other */
796 COSTS_N_INSNS (1), /* cost of movsx */
797 COSTS_N_INSNS (1), /* cost of movzx */
798 8, /* "large" insn */
800 4, /* cost for loading QImode using movzbl */
801 {4, 4, 4}, /* cost of loading integer registers
802 in QImode, HImode and SImode.
803 Relative to reg-reg move (2). */
804 {4, 4, 4}, /* cost of storing integer registers */
805 4, /* cost of reg,reg fld/fst */
806 {12, 12, 12}, /* cost of loading fp registers
807 in SFmode, DFmode and XFmode */
808 {6, 6, 8}, /* cost of storing fp registers
809 in SFmode, DFmode and XFmode */
810 2, /* cost of moving MMX register */
811 {8, 8}, /* cost of loading MMX registers
812 in SImode and DImode */
813 {8, 8}, /* cost of storing MMX registers
814 in SImode and DImode */
815 2, /* cost of moving SSE register */
816 {8, 8, 8}, /* cost of loading SSE registers
817 in SImode, DImode and TImode */
818 {8, 8, 8}, /* cost of storing SSE registers
819 in SImode, DImode and TImode */
820 5, /* MMX or SSE register to integer */
821 64, /* size of prefetch block */
822 6, /* number of parallel prefetches */
823 /* Benchmarks shows large regressions on K8 sixtrack benchmark when this value
824 is increased to perhaps more appropriate value of 5. */
826 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
827 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
828 COSTS_N_INSNS (20), /* cost of FDIV instruction. */
829 COSTS_N_INSNS (8), /* cost of FABS instruction. */
830 COSTS_N_INSNS (8), /* cost of FCHS instruction. */
831 COSTS_N_INSNS (40), /* cost of FSQRT instruction. */
834 /* Generic32 should produce code tuned for Athlon, PPro, Pentium4, Nocona and K8. */
836 struct processor_costs generic32_cost = {
837 COSTS_N_INSNS (1), /* cost of an add instruction */
838 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
839 COSTS_N_INSNS (1), /* variable shift costs */
840 COSTS_N_INSNS (1), /* constant shift costs */
841 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
842 COSTS_N_INSNS (4), /* HI */
843 COSTS_N_INSNS (3), /* SI */
844 COSTS_N_INSNS (4), /* DI */
845 COSTS_N_INSNS (2)}, /* other */
846 0, /* cost of multiply per each bit set */
847 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
848 COSTS_N_INSNS (26), /* HI */
849 COSTS_N_INSNS (42), /* SI */
850 COSTS_N_INSNS (74), /* DI */
851 COSTS_N_INSNS (74)}, /* other */
852 COSTS_N_INSNS (1), /* cost of movsx */
853 COSTS_N_INSNS (1), /* cost of movzx */
854 8, /* "large" insn */
856 4, /* cost for loading QImode using movzbl */
857 {4, 4, 4}, /* cost of loading integer registers
858 in QImode, HImode and SImode.
859 Relative to reg-reg move (2). */
860 {4, 4, 4}, /* cost of storing integer registers */
861 4, /* cost of reg,reg fld/fst */
862 {12, 12, 12}, /* cost of loading fp registers
863 in SFmode, DFmode and XFmode */
864 {6, 6, 8}, /* cost of storing fp registers
865 in SFmode, DFmode and XFmode */
866 2, /* cost of moving MMX register */
867 {8, 8}, /* cost of loading MMX registers
868 in SImode and DImode */
869 {8, 8}, /* cost of storing MMX registers
870 in SImode and DImode */
871 2, /* cost of moving SSE register */
872 {8, 8, 8}, /* cost of loading SSE registers
873 in SImode, DImode and TImode */
874 {8, 8, 8}, /* cost of storing SSE registers
875 in SImode, DImode and TImode */
876 5, /* MMX or SSE register to integer */
877 64, /* size of prefetch block */
878 6, /* number of parallel prefetches */
880 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
881 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
882 COSTS_N_INSNS (20), /* cost of FDIV instruction. */
883 COSTS_N_INSNS (8), /* cost of FABS instruction. */
884 COSTS_N_INSNS (8), /* cost of FCHS instruction. */
885 COSTS_N_INSNS (40), /* cost of FSQRT instruction. */
888 const struct processor_costs *ix86_cost = &pentium_cost;
890 /* Processor feature/optimization bitmasks. */
891 #define m_386 (1<<PROCESSOR_I386)
892 #define m_486 (1<<PROCESSOR_I486)
893 #define m_PENT (1<<PROCESSOR_PENTIUM)
894 #define m_PPRO (1<<PROCESSOR_PENTIUMPRO)
895 #define m_GEODE (1<<PROCESSOR_GEODE)
896 #define m_K6_GEODE (m_K6 | m_GEODE)
897 #define m_K6 (1<<PROCESSOR_K6)
898 #define m_ATHLON (1<<PROCESSOR_ATHLON)
899 #define m_PENT4 (1<<PROCESSOR_PENTIUM4)
900 #define m_K8 (1<<PROCESSOR_K8)
901 #define m_ATHLON_K8 (m_K8 | m_ATHLON)
902 #define m_AMDFAM10 (1<<PROCESSOR_AMDFAM10)
903 #define m_NOCONA (1<<PROCESSOR_NOCONA)
904 #define m_CORE2 (1<<PROCESSOR_CORE2)
905 #define m_GENERIC32 (1<<PROCESSOR_GENERIC32)
906 #define m_GENERIC64 (1<<PROCESSOR_GENERIC64)
907 #define m_GENERIC (m_GENERIC32 | m_GENERIC64)
908 #define m_ATHLON_K8_AMDFAM10 (m_K8 | m_ATHLON | m_AMDFAM10)
910 /* Generic instruction choice should be common subset of supported CPUs
911 (PPro/PENT4/NOCONA/CORE2/Athlon/K8). */
913 /* Leave is not affecting Nocona SPEC2000 results negatively, so enabling for
914 Generic64 seems like good code size tradeoff. We can't enable it for 32bit
915 generic because it is not working well with PPro base chips. */
916 const int x86_use_leave = m_386 | m_K6_GEODE | m_ATHLON_K8_AMDFAM10 | m_CORE2
918 const int x86_push_memory = m_386 | m_K6_GEODE | m_ATHLON_K8_AMDFAM10 | m_PENT4
919 | m_NOCONA | m_CORE2 | m_GENERIC;
920 const int x86_zero_extend_with_and = m_486 | m_PENT;
921 /* Enable to zero extend integer registers to avoid partial dependencies */
922 const int x86_movx = m_ATHLON_K8_AMDFAM10 | m_PPRO | m_PENT4 | m_NOCONA
923 | m_CORE2 | m_GENERIC | m_GEODE /* m_386 | m_K6 */;
924 const int x86_double_with_add = ~m_386;
925 const int x86_use_bit_test = m_386;
926 const int x86_unroll_strlen = m_486 | m_PENT | m_PPRO | m_ATHLON_K8_AMDFAM10
927 | m_K6 | m_CORE2 | m_GENERIC;
928 const int x86_cmove = m_PPRO | m_GEODE | m_ATHLON_K8_AMDFAM10 | m_PENT4
930 const int x86_3dnow_a = m_ATHLON_K8_AMDFAM10;
931 const int x86_deep_branch = m_PPRO | m_K6_GEODE | m_ATHLON_K8_AMDFAM10
932 | m_PENT4 | m_NOCONA | m_CORE2 | m_GENERIC;
933 /* Branch hints were put in P4 based on simulation result. But
934 after P4 was made, no performance benefit was observed with
935 branch hints. It also increases the code size. As the result,
936 icc never generates branch hints. */
937 const int x86_branch_hints = 0;
938 const int x86_use_sahf = m_PPRO | m_K6_GEODE | m_PENT4 | m_NOCONA | m_GENERIC32;
939 /*m_GENERIC | m_ATHLON_K8 ? */
940 /* We probably ought to watch for partial register stalls on Generic32
941 compilation setting as well. However in current implementation the
942 partial register stalls are not eliminated very well - they can
943 be introduced via subregs synthesized by combine and can happen
944 in caller/callee saving sequences.
945 Because this option pays back little on PPro based chips and is in conflict
946 with partial reg. dependencies used by Athlon/P4 based chips, it is better
947 to leave it off for generic32 for now. */
948 const int x86_partial_reg_stall = m_PPRO;
949 const int x86_partial_flag_reg_stall = m_CORE2 | m_GENERIC;
950 const int x86_use_himode_fiop = m_386 | m_486 | m_K6_GEODE;
951 const int x86_use_simode_fiop = ~(m_PPRO | m_ATHLON_K8_AMDFAM10 | m_PENT
952 | m_CORE2 | m_GENERIC);
953 const int x86_use_mov0 = m_K6;
954 const int x86_use_cltd = ~(m_PENT | m_K6 | m_CORE2 | m_GENERIC);
955 const int x86_read_modify_write = ~m_PENT;
956 const int x86_read_modify = ~(m_PENT | m_PPRO);
957 const int x86_split_long_moves = m_PPRO;
958 const int x86_promote_QImode = m_K6_GEODE | m_PENT | m_386 | m_486
959 | m_ATHLON_K8_AMDFAM10 | m_CORE2 | m_GENERIC;
961 const int x86_fast_prefix = ~(m_PENT | m_486 | m_386);
962 const int x86_single_stringop = m_386 | m_PENT4 | m_NOCONA;
963 const int x86_qimode_math = ~(0);
964 const int x86_promote_qi_regs = 0;
965 /* On PPro this flag is meant to avoid partial register stalls. Just like
966 the x86_partial_reg_stall this option might be considered for Generic32
967 if our scheme for avoiding partial stalls was more effective. */
968 const int x86_himode_math = ~(m_PPRO);
969 const int x86_promote_hi_regs = m_PPRO;
970 /* Enable if add/sub rsp is preferred over 1 or 2 push/pop */
971 const int x86_sub_esp_4 = m_ATHLON_K8_AMDFAM10 | m_PPRO | m_PENT4 | m_NOCONA
972 | m_CORE2 | m_GENERIC;
973 const int x86_sub_esp_8 = m_ATHLON_K8_AMDFAM10 | m_PPRO | m_386 | m_486
974 | m_PENT4 | m_NOCONA | m_CORE2 | m_GENERIC;
975 const int x86_add_esp_4 = m_ATHLON_K8_AMDFAM10 | m_K6_GEODE | m_PENT4 | m_NOCONA
976 | m_CORE2 | m_GENERIC;
977 const int x86_add_esp_8 = m_ATHLON_K8_AMDFAM10 | m_PPRO | m_K6_GEODE | m_386
978 | m_486 | m_PENT4 | m_NOCONA | m_CORE2 | m_GENERIC;
979 /* Enable if integer moves are preferred for DFmode copies */
980 const int x86_integer_DFmode_moves = ~(m_ATHLON_K8_AMDFAM10 | m_PENT4 | m_NOCONA
981 | m_PPRO | m_CORE2 | m_GENERIC | m_GEODE);
982 const int x86_partial_reg_dependency = m_ATHLON_K8_AMDFAM10 | m_PENT4 | m_NOCONA
983 | m_CORE2 | m_GENERIC;
984 const int x86_memory_mismatch_stall = m_ATHLON_K8_AMDFAM10 | m_PENT4 | m_NOCONA
985 | m_CORE2 | m_GENERIC;
986 /* If ACCUMULATE_OUTGOING_ARGS is enabled, the maximum amount of space required
987 for outgoing arguments will be computed and placed into the variable
988 `current_function_outgoing_args_size'. No space will be pushed onto the stack
989 for each call; instead, the function prologue should increase the stack frame
990 size by this amount. Setting both PUSH_ARGS and ACCUMULATE_OUTGOING_ARGS is
992 const int x86_accumulate_outgoing_args = m_ATHLON_K8_AMDFAM10 | m_PENT4
993 | m_NOCONA | m_PPRO | m_CORE2
995 const int x86_prologue_using_move = m_ATHLON_K8 | m_PPRO | m_CORE2 | m_GENERIC;
996 const int x86_epilogue_using_move = m_ATHLON_K8 | m_PPRO | m_CORE2 | m_GENERIC;
997 const int x86_shift1 = ~m_486;
998 const int x86_arch_always_fancy_math_387 = m_PENT | m_PPRO
999 | m_ATHLON_K8_AMDFAM10 | m_PENT4
1000 | m_NOCONA | m_CORE2 | m_GENERIC;
1001 /* In Generic model we have an conflict here in between PPro/Pentium4 based chips
1002 that thread 128bit SSE registers as single units versus K8 based chips that
1003 divide SSE registers to two 64bit halves.
1004 x86_sse_partial_reg_dependency promote all store destinations to be 128bit
1005 to allow register renaming on 128bit SSE units, but usually results in one
1006 extra microop on 64bit SSE units. Experimental results shows that disabling
1007 this option on P4 brings over 20% SPECfp regression, while enabling it on
1008 K8 brings roughly 2.4% regression that can be partly masked by careful scheduling
1010 const int x86_sse_partial_reg_dependency = m_PENT4 | m_NOCONA | m_PPRO | m_CORE2
1011 | m_GENERIC | m_AMDFAM10;
1012 /* Set for machines where the type and dependencies are resolved on SSE
1013 register parts instead of whole registers, so we may maintain just
1014 lower part of scalar values in proper format leaving the upper part
1016 const int x86_sse_split_regs = m_ATHLON_K8;
1017 /* Code generation for scalar reg-reg moves of single and double precision data:
1018 if (x86_sse_partial_reg_dependency == true | x86_sse_split_regs == true)
1022 if (x86_sse_partial_reg_dependency == true)
1027 Code generation for scalar loads of double precision data:
1028 if (x86_sse_split_regs == true)
1029 movlpd mem, reg (gas syntax)
1033 Code generation for unaligned packed loads of single precision data
1034 (x86_sse_unaligned_move_optimal overrides x86_sse_partial_reg_dependency):
1035 if (x86_sse_unaligned_move_optimal)
1038 if (x86_sse_partial_reg_dependency == true)
1050 Code generation for unaligned packed loads of double precision data
1051 (x86_sse_unaligned_move_optimal overrides x86_sse_split_regs):
1052 if (x86_sse_unaligned_move_optimal)
1055 if (x86_sse_split_regs == true)
1066 const int x86_sse_unaligned_move_optimal = m_AMDFAM10;
1067 const int x86_sse_typeless_stores = m_ATHLON_K8_AMDFAM10;
1068 const int x86_sse_load0_by_pxor = m_PPRO | m_PENT4 | m_NOCONA;
1069 const int x86_use_ffreep = m_ATHLON_K8_AMDFAM10;
1070 const int x86_rep_movl_optimal = m_386 | m_PENT | m_PPRO | m_K6_GEODE | m_CORE2;
1071 const int x86_use_incdec = ~(m_PENT4 | m_NOCONA | m_CORE2 | m_GENERIC);
1073 /* ??? Allowing interunit moves makes it all too easy for the compiler to put
1074 integer data in xmm registers. Which results in pretty abysmal code. */
1075 const int x86_inter_unit_moves = 0 /* ~(m_ATHLON_K8) */;
1077 const int x86_ext_80387_constants = m_K6_GEODE | m_ATHLON_K8 | m_PENT4
1078 | m_NOCONA | m_PPRO | m_CORE2 | m_GENERIC;
1079 /* Some CPU cores are not able to predict more than 4 branch instructions in
1080 the 16 byte window. */
1081 const int x86_four_jump_limit = m_PPRO | m_ATHLON_K8_AMDFAM10 | m_PENT4
1082 | m_NOCONA | m_CORE2 | m_GENERIC;
1083 const int x86_schedule = m_PPRO | m_ATHLON_K8_AMDFAM10 | m_K6_GEODE | m_PENT
1084 | m_CORE2 | m_GENERIC;
1085 const int x86_use_bt = m_ATHLON_K8_AMDFAM10;
1086 /* Compare and exchange was added for 80486. */
1087 const int x86_cmpxchg = ~m_386;
1088 /* Compare and exchange 8 bytes was added for pentium. */
1089 const int x86_cmpxchg8b = ~(m_386 | m_486);
1090 /* Exchange and add was added for 80486. */
1091 const int x86_xadd = ~m_386;
1092 const int x86_pad_returns = m_ATHLON_K8_AMDFAM10 | m_CORE2 | m_GENERIC;
1094 /* In case the average insn count for single function invocation is
1095 lower than this constant, emit fast (but longer) prologue and
1097 #define FAST_PROLOGUE_INSN_COUNT 20
1099 /* Names for 8 (low), 8 (high), and 16-bit registers, respectively. */
1100 static const char *const qi_reg_name[] = QI_REGISTER_NAMES;
1101 static const char *const qi_high_reg_name[] = QI_HIGH_REGISTER_NAMES;
1102 static const char *const hi_reg_name[] = HI_REGISTER_NAMES;
1104 /* Array of the smallest class containing reg number REGNO, indexed by
1105 REGNO. Used by REGNO_REG_CLASS in i386.h. */
1107 enum reg_class const regclass_map[FIRST_PSEUDO_REGISTER] =
1109 /* ax, dx, cx, bx */
1110 AREG, DREG, CREG, BREG,
1111 /* si, di, bp, sp */
1112 SIREG, DIREG, NON_Q_REGS, NON_Q_REGS,
1114 FP_TOP_REG, FP_SECOND_REG, FLOAT_REGS, FLOAT_REGS,
1115 FLOAT_REGS, FLOAT_REGS, FLOAT_REGS, FLOAT_REGS,
1118 /* flags, fpsr, dirflag, frame */
1119 NO_REGS, NO_REGS, NO_REGS, NON_Q_REGS,
1120 SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS,
1122 MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS,
1124 NON_Q_REGS, NON_Q_REGS, NON_Q_REGS, NON_Q_REGS,
1125 NON_Q_REGS, NON_Q_REGS, NON_Q_REGS, NON_Q_REGS,
1126 SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS,
1130 /* The "default" register map used in 32bit mode. */
1132 int const dbx_register_map[FIRST_PSEUDO_REGISTER] =
1134 0, 2, 1, 3, 6, 7, 4, 5, /* general regs */
1135 12, 13, 14, 15, 16, 17, 18, 19, /* fp regs */
1136 -1, -1, -1, -1, -1, /* arg, flags, fpsr, dir, frame */
1137 21, 22, 23, 24, 25, 26, 27, 28, /* SSE */
1138 29, 30, 31, 32, 33, 34, 35, 36, /* MMX */
1139 -1, -1, -1, -1, -1, -1, -1, -1, /* extended integer registers */
1140 -1, -1, -1, -1, -1, -1, -1, -1, /* extended SSE registers */
1143 static int const x86_64_int_parameter_registers[6] =
1145 5 /*RDI*/, 4 /*RSI*/, 1 /*RDX*/, 2 /*RCX*/,
1146 FIRST_REX_INT_REG /*R8 */, FIRST_REX_INT_REG + 1 /*R9 */
1149 static int const x86_64_int_return_registers[4] =
1151 0 /*RAX*/, 1 /*RDI*/, 5 /*RDI*/, 4 /*RSI*/
1154 /* The "default" register map used in 64bit mode. */
1155 int const dbx64_register_map[FIRST_PSEUDO_REGISTER] =
1157 0, 1, 2, 3, 4, 5, 6, 7, /* general regs */
1158 33, 34, 35, 36, 37, 38, 39, 40, /* fp regs */
1159 -1, -1, -1, -1, -1, /* arg, flags, fpsr, dir, frame */
1160 17, 18, 19, 20, 21, 22, 23, 24, /* SSE */
1161 41, 42, 43, 44, 45, 46, 47, 48, /* MMX */
1162 8,9,10,11,12,13,14,15, /* extended integer registers */
1163 25, 26, 27, 28, 29, 30, 31, 32, /* extended SSE registers */
1166 /* Define the register numbers to be used in Dwarf debugging information.
1167 The SVR4 reference port C compiler uses the following register numbers
1168 in its Dwarf output code:
1169 0 for %eax (gcc regno = 0)
1170 1 for %ecx (gcc regno = 2)
1171 2 for %edx (gcc regno = 1)
1172 3 for %ebx (gcc regno = 3)
1173 4 for %esp (gcc regno = 7)
1174 5 for %ebp (gcc regno = 6)
1175 6 for %esi (gcc regno = 4)
1176 7 for %edi (gcc regno = 5)
1177 The following three DWARF register numbers are never generated by
1178 the SVR4 C compiler or by the GNU compilers, but SDB on x86/svr4
1179 believes these numbers have these meanings.
1180 8 for %eip (no gcc equivalent)
1181 9 for %eflags (gcc regno = 17)
1182 10 for %trapno (no gcc equivalent)
1183 It is not at all clear how we should number the FP stack registers
1184 for the x86 architecture. If the version of SDB on x86/svr4 were
1185 a bit less brain dead with respect to floating-point then we would
1186 have a precedent to follow with respect to DWARF register numbers
1187 for x86 FP registers, but the SDB on x86/svr4 is so completely
1188 broken with respect to FP registers that it is hardly worth thinking
1189 of it as something to strive for compatibility with.
1190 The version of x86/svr4 SDB I have at the moment does (partially)
1191 seem to believe that DWARF register number 11 is associated with
1192 the x86 register %st(0), but that's about all. Higher DWARF
1193 register numbers don't seem to be associated with anything in
1194 particular, and even for DWARF regno 11, SDB only seems to under-
1195 stand that it should say that a variable lives in %st(0) (when
1196 asked via an `=' command) if we said it was in DWARF regno 11,
1197 but SDB still prints garbage when asked for the value of the
1198 variable in question (via a `/' command).
1199 (Also note that the labels SDB prints for various FP stack regs
1200 when doing an `x' command are all wrong.)
1201 Note that these problems generally don't affect the native SVR4
1202 C compiler because it doesn't allow the use of -O with -g and
1203 because when it is *not* optimizing, it allocates a memory
1204 location for each floating-point variable, and the memory
1205 location is what gets described in the DWARF AT_location
1206 attribute for the variable in question.
1207 Regardless of the severe mental illness of the x86/svr4 SDB, we
1208 do something sensible here and we use the following DWARF
1209 register numbers. Note that these are all stack-top-relative
1211 11 for %st(0) (gcc regno = 8)
1212 12 for %st(1) (gcc regno = 9)
1213 13 for %st(2) (gcc regno = 10)
1214 14 for %st(3) (gcc regno = 11)
1215 15 for %st(4) (gcc regno = 12)
1216 16 for %st(5) (gcc regno = 13)
1217 17 for %st(6) (gcc regno = 14)
1218 18 for %st(7) (gcc regno = 15)
1220 int const svr4_dbx_register_map[FIRST_PSEUDO_REGISTER] =
1222 0, 2, 1, 3, 6, 7, 5, 4, /* general regs */
1223 11, 12, 13, 14, 15, 16, 17, 18, /* fp regs */
1224 -1, 9, -1, -1, -1, /* arg, flags, fpsr, dir, frame */
1225 21, 22, 23, 24, 25, 26, 27, 28, /* SSE registers */
1226 29, 30, 31, 32, 33, 34, 35, 36, /* MMX registers */
1227 -1, -1, -1, -1, -1, -1, -1, -1, /* extended integer registers */
1228 -1, -1, -1, -1, -1, -1, -1, -1, /* extended SSE registers */
1231 /* Test and compare insns in i386.md store the information needed to
1232 generate branch and scc insns here. */
1234 rtx ix86_compare_op0 = NULL_RTX;
1235 rtx ix86_compare_op1 = NULL_RTX;
1236 rtx ix86_compare_emitted = NULL_RTX;
1238 /* Size of the register save area. */
1239 #define X86_64_VARARGS_SIZE (REGPARM_MAX * UNITS_PER_WORD + SSE_REGPARM_MAX * 16)
1241 /* Define the structure for the machine field in struct function. */
1243 struct stack_local_entry GTY(())
1245 unsigned short mode;
1248 struct stack_local_entry *next;
1251 /* Structure describing stack frame layout.
1252 Stack grows downward:
1258 saved frame pointer if frame_pointer_needed
1259 <- HARD_FRAME_POINTER
1264 [va_arg registers] (
1265 > to_allocate <- FRAME_POINTER
1275 HOST_WIDE_INT frame;
1277 int outgoing_arguments_size;
1280 HOST_WIDE_INT to_allocate;
1281 /* The offsets relative to ARG_POINTER. */
1282 HOST_WIDE_INT frame_pointer_offset;
1283 HOST_WIDE_INT hard_frame_pointer_offset;
1284 HOST_WIDE_INT stack_pointer_offset;
1286 /* When save_regs_using_mov is set, emit prologue using
1287 move instead of push instructions. */
1288 bool save_regs_using_mov;
1291 /* Code model option. */
1292 enum cmodel ix86_cmodel;
1294 enum asm_dialect ix86_asm_dialect = ASM_ATT;
1296 enum tls_dialect ix86_tls_dialect = TLS_DIALECT_GNU;
1298 /* Which unit we are generating floating point math for. */
1299 enum fpmath_unit ix86_fpmath;
1301 /* Which cpu are we scheduling for. */
1302 enum processor_type ix86_tune;
1303 /* Which instruction set architecture to use. */
1304 enum processor_type ix86_arch;
1306 /* true if sse prefetch instruction is not NOOP. */
1307 int x86_prefetch_sse;
1309 /* true if cmpxchg16b is supported. */
1312 /* ix86_regparm_string as a number */
1313 static int ix86_regparm;
1315 /* -mstackrealign option */
1316 extern int ix86_force_align_arg_pointer;
1317 static const char ix86_force_align_arg_pointer_string[] = "force_align_arg_pointer";
1319 /* Preferred alignment for stack boundary in bits. */
1320 unsigned int ix86_preferred_stack_boundary;
1322 /* Values 1-5: see jump.c */
1323 int ix86_branch_cost;
1325 /* Variables which are this size or smaller are put in the data/bss
1326 or ldata/lbss sections. */
1328 int ix86_section_threshold = 65536;
1330 /* Prefix built by ASM_GENERATE_INTERNAL_LABEL. */
1331 char internal_label_prefix[16];
1332 int internal_label_prefix_len;
1334 static bool ix86_handle_option (size_t, const char *, int);
1335 static void output_pic_addr_const (FILE *, rtx, int);
1336 static void put_condition_code (enum rtx_code, enum machine_mode,
1338 static const char *get_some_local_dynamic_name (void);
1339 static int get_some_local_dynamic_name_1 (rtx *, void *);
1340 static rtx ix86_expand_int_compare (enum rtx_code, rtx, rtx);
1341 static enum rtx_code ix86_prepare_fp_compare_args (enum rtx_code, rtx *,
1343 static bool ix86_fixed_condition_code_regs (unsigned int *, unsigned int *);
1344 static enum machine_mode ix86_cc_modes_compatible (enum machine_mode,
1346 static rtx get_thread_pointer (int);
1347 static rtx legitimize_tls_address (rtx, enum tls_model, int);
1348 static void get_pc_thunk_name (char [32], unsigned int);
1349 static rtx gen_push (rtx);
1350 static int ix86_flags_dependent (rtx, rtx, enum attr_type);
1351 static int ix86_agi_dependent (rtx, rtx, enum attr_type);
1352 static struct machine_function * ix86_init_machine_status (void);
1353 static int ix86_split_to_parts (rtx, rtx *, enum machine_mode);
1354 static int ix86_nsaved_regs (void);
1355 static void ix86_emit_save_regs (void);
1356 static void ix86_emit_save_regs_using_mov (rtx, HOST_WIDE_INT);
1357 static void ix86_emit_restore_regs_using_mov (rtx, HOST_WIDE_INT, int);
1358 static void ix86_output_function_epilogue (FILE *, HOST_WIDE_INT);
1359 static HOST_WIDE_INT ix86_GOT_alias_set (void);
1360 static void ix86_adjust_counter (rtx, HOST_WIDE_INT);
1361 static rtx ix86_expand_aligntest (rtx, int);
1362 static void ix86_expand_strlensi_unroll_1 (rtx, rtx, rtx);
1363 static int ix86_issue_rate (void);
1364 static int ix86_adjust_cost (rtx, rtx, rtx, int);
1365 static int ia32_multipass_dfa_lookahead (void);
1366 static void ix86_init_mmx_sse_builtins (void);
1367 static rtx x86_this_parameter (tree);
1368 static void x86_output_mi_thunk (FILE *, tree, HOST_WIDE_INT,
1369 HOST_WIDE_INT, tree);
1370 static bool x86_can_output_mi_thunk (tree, HOST_WIDE_INT, HOST_WIDE_INT, tree);
1371 static void x86_file_start (void);
1372 static void ix86_reorg (void);
1373 static bool ix86_expand_carry_flag_compare (enum rtx_code, rtx, rtx, rtx*);
1374 static tree ix86_build_builtin_va_list (void);
1375 static void ix86_setup_incoming_varargs (CUMULATIVE_ARGS *, enum machine_mode,
1377 static tree ix86_gimplify_va_arg (tree, tree, tree *, tree *);
1378 static bool ix86_scalar_mode_supported_p (enum machine_mode);
1379 static bool ix86_vector_mode_supported_p (enum machine_mode);
1381 static int ix86_address_cost (rtx);
1382 static bool ix86_cannot_force_const_mem (rtx);
1383 static rtx ix86_delegitimize_address (rtx);
1385 static void i386_output_dwarf_dtprel (FILE *, int, rtx) ATTRIBUTE_UNUSED;
1387 struct builtin_description;
1388 static rtx ix86_expand_sse_comi (const struct builtin_description *,
1390 static rtx ix86_expand_sse_compare (const struct builtin_description *,
1392 static rtx ix86_expand_unop1_builtin (enum insn_code, tree, rtx);
1393 static rtx ix86_expand_unop_builtin (enum insn_code, tree, rtx, int);
1394 static rtx ix86_expand_binop_builtin (enum insn_code, tree, rtx);
1395 static rtx ix86_expand_store_builtin (enum insn_code, tree);
1396 static rtx safe_vector_operand (rtx, enum machine_mode);
1397 static rtx ix86_expand_fp_compare (enum rtx_code, rtx, rtx, rtx, rtx *, rtx *);
1398 static int ix86_fp_comparison_arithmetics_cost (enum rtx_code code);
1399 static int ix86_fp_comparison_fcomi_cost (enum rtx_code code);
1400 static int ix86_fp_comparison_sahf_cost (enum rtx_code code);
1401 static int ix86_fp_comparison_cost (enum rtx_code code);
1402 static unsigned int ix86_select_alt_pic_regnum (void);
1403 static int ix86_save_reg (unsigned int, int);
1404 static void ix86_compute_frame_layout (struct ix86_frame *);
1405 static int ix86_comp_type_attributes (tree, tree);
1406 static int ix86_function_regparm (tree, tree);
1407 const struct attribute_spec ix86_attribute_table[];
1408 static bool ix86_function_ok_for_sibcall (tree, tree);
1409 static tree ix86_handle_cconv_attribute (tree *, tree, tree, int, bool *);
1410 static int ix86_value_regno (enum machine_mode, tree, tree);
1411 static bool contains_128bit_aligned_vector_p (tree);
1412 static rtx ix86_struct_value_rtx (tree, int);
1413 static bool ix86_ms_bitfield_layout_p (tree);
1414 static tree ix86_handle_struct_attribute (tree *, tree, tree, int, bool *);
1415 static int extended_reg_mentioned_1 (rtx *, void *);
1416 static bool ix86_rtx_costs (rtx, int, int, int *);
1417 static int min_insn_size (rtx);
1418 static tree ix86_md_asm_clobbers (tree outputs, tree inputs, tree clobbers);
1419 static bool ix86_must_pass_in_stack (enum machine_mode mode, tree type);
1420 static bool ix86_pass_by_reference (CUMULATIVE_ARGS *, enum machine_mode,
1422 static void ix86_init_builtins (void);
1423 static rtx ix86_expand_builtin (tree, rtx, rtx, enum machine_mode, int);
1424 static const char *ix86_mangle_fundamental_type (tree);
1425 static tree ix86_stack_protect_fail (void);
1426 static rtx ix86_internal_arg_pointer (void);
1427 static void ix86_dwarf_handle_frame_unspec (const char *, rtx, int);
1429 /* This function is only used on Solaris. */
1430 static void i386_solaris_elf_named_section (const char *, unsigned int, tree)
1433 /* Register class used for passing given 64bit part of the argument.
1434 These represent classes as documented by the PS ABI, with the exception
1435 of SSESF, SSEDF classes, that are basically SSE class, just gcc will
1436 use SF or DFmode move instead of DImode to avoid reformatting penalties.
1438 Similarly we play games with INTEGERSI_CLASS to use cheaper SImode moves
1439 whenever possible (upper half does contain padding).
1441 enum x86_64_reg_class
1444 X86_64_INTEGER_CLASS,
1445 X86_64_INTEGERSI_CLASS,
1452 X86_64_COMPLEX_X87_CLASS,
1455 static const char * const x86_64_reg_class_name[] = {
1456 "no", "integer", "integerSI", "sse", "sseSF", "sseDF",
1457 "sseup", "x87", "x87up", "cplx87", "no"
1460 #define MAX_CLASSES 4
1462 /* Table of constants used by fldpi, fldln2, etc.... */
1463 static REAL_VALUE_TYPE ext_80387_constants_table [5];
1464 static bool ext_80387_constants_init = 0;
1465 static void init_ext_80387_constants (void);
1466 static bool ix86_in_large_data_p (tree) ATTRIBUTE_UNUSED;
1467 static void ix86_encode_section_info (tree, rtx, int) ATTRIBUTE_UNUSED;
1468 static void x86_64_elf_unique_section (tree decl, int reloc) ATTRIBUTE_UNUSED;
1469 static section *x86_64_elf_select_section (tree decl, int reloc,
1470 unsigned HOST_WIDE_INT align)
1473 /* Initialize the GCC target structure. */
1474 #undef TARGET_ATTRIBUTE_TABLE
1475 #define TARGET_ATTRIBUTE_TABLE ix86_attribute_table
1476 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
1477 # undef TARGET_MERGE_DECL_ATTRIBUTES
1478 # define TARGET_MERGE_DECL_ATTRIBUTES merge_dllimport_decl_attributes
1481 #undef TARGET_COMP_TYPE_ATTRIBUTES
1482 #define TARGET_COMP_TYPE_ATTRIBUTES ix86_comp_type_attributes
1484 #undef TARGET_INIT_BUILTINS
1485 #define TARGET_INIT_BUILTINS ix86_init_builtins
1486 #undef TARGET_EXPAND_BUILTIN
1487 #define TARGET_EXPAND_BUILTIN ix86_expand_builtin
1489 #undef TARGET_ASM_FUNCTION_EPILOGUE
1490 #define TARGET_ASM_FUNCTION_EPILOGUE ix86_output_function_epilogue
1492 #undef TARGET_ENCODE_SECTION_INFO
1493 #ifndef SUBTARGET_ENCODE_SECTION_INFO
1494 #define TARGET_ENCODE_SECTION_INFO ix86_encode_section_info
1496 #define TARGET_ENCODE_SECTION_INFO SUBTARGET_ENCODE_SECTION_INFO
1499 #undef TARGET_ASM_OPEN_PAREN
1500 #define TARGET_ASM_OPEN_PAREN ""
1501 #undef TARGET_ASM_CLOSE_PAREN
1502 #define TARGET_ASM_CLOSE_PAREN ""
1504 #undef TARGET_ASM_ALIGNED_HI_OP
1505 #define TARGET_ASM_ALIGNED_HI_OP ASM_SHORT
1506 #undef TARGET_ASM_ALIGNED_SI_OP
1507 #define TARGET_ASM_ALIGNED_SI_OP ASM_LONG
1509 #undef TARGET_ASM_ALIGNED_DI_OP
1510 #define TARGET_ASM_ALIGNED_DI_OP ASM_QUAD
1513 #undef TARGET_ASM_UNALIGNED_HI_OP
1514 #define TARGET_ASM_UNALIGNED_HI_OP TARGET_ASM_ALIGNED_HI_OP
1515 #undef TARGET_ASM_UNALIGNED_SI_OP
1516 #define TARGET_ASM_UNALIGNED_SI_OP TARGET_ASM_ALIGNED_SI_OP
1517 #undef TARGET_ASM_UNALIGNED_DI_OP
1518 #define TARGET_ASM_UNALIGNED_DI_OP TARGET_ASM_ALIGNED_DI_OP
1520 #undef TARGET_SCHED_ADJUST_COST
1521 #define TARGET_SCHED_ADJUST_COST ix86_adjust_cost
1522 #undef TARGET_SCHED_ISSUE_RATE
1523 #define TARGET_SCHED_ISSUE_RATE ix86_issue_rate
1524 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD
1525 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD \
1526 ia32_multipass_dfa_lookahead
1528 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
1529 #define TARGET_FUNCTION_OK_FOR_SIBCALL ix86_function_ok_for_sibcall
1532 #undef TARGET_HAVE_TLS
1533 #define TARGET_HAVE_TLS true
1535 #undef TARGET_CANNOT_FORCE_CONST_MEM
1536 #define TARGET_CANNOT_FORCE_CONST_MEM ix86_cannot_force_const_mem
1537 #undef TARGET_USE_BLOCKS_FOR_CONSTANT_P
1538 #define TARGET_USE_BLOCKS_FOR_CONSTANT_P hook_bool_mode_rtx_true
1540 #undef TARGET_DELEGITIMIZE_ADDRESS
1541 #define TARGET_DELEGITIMIZE_ADDRESS ix86_delegitimize_address
1543 #undef TARGET_MS_BITFIELD_LAYOUT_P
1544 #define TARGET_MS_BITFIELD_LAYOUT_P ix86_ms_bitfield_layout_p
1547 #undef TARGET_BINDS_LOCAL_P
1548 #define TARGET_BINDS_LOCAL_P darwin_binds_local_p
1551 #undef TARGET_ASM_OUTPUT_MI_THUNK
1552 #define TARGET_ASM_OUTPUT_MI_THUNK x86_output_mi_thunk
1553 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
1554 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK x86_can_output_mi_thunk
1556 #undef TARGET_ASM_FILE_START
1557 #define TARGET_ASM_FILE_START x86_file_start
1559 #undef TARGET_DEFAULT_TARGET_FLAGS
1560 #define TARGET_DEFAULT_TARGET_FLAGS \
1562 | TARGET_64BIT_DEFAULT \
1563 | TARGET_SUBTARGET_DEFAULT \
1564 | TARGET_TLS_DIRECT_SEG_REFS_DEFAULT)
1566 #undef TARGET_HANDLE_OPTION
1567 #define TARGET_HANDLE_OPTION ix86_handle_option
1569 #undef TARGET_RTX_COSTS
1570 #define TARGET_RTX_COSTS ix86_rtx_costs
1571 #undef TARGET_ADDRESS_COST
1572 #define TARGET_ADDRESS_COST ix86_address_cost
1574 #undef TARGET_FIXED_CONDITION_CODE_REGS
1575 #define TARGET_FIXED_CONDITION_CODE_REGS ix86_fixed_condition_code_regs
1576 #undef TARGET_CC_MODES_COMPATIBLE
1577 #define TARGET_CC_MODES_COMPATIBLE ix86_cc_modes_compatible
1579 #undef TARGET_MACHINE_DEPENDENT_REORG
1580 #define TARGET_MACHINE_DEPENDENT_REORG ix86_reorg
1582 #undef TARGET_BUILD_BUILTIN_VA_LIST
1583 #define TARGET_BUILD_BUILTIN_VA_LIST ix86_build_builtin_va_list
1585 #undef TARGET_MD_ASM_CLOBBERS
1586 #define TARGET_MD_ASM_CLOBBERS ix86_md_asm_clobbers
1588 #undef TARGET_PROMOTE_PROTOTYPES
1589 #define TARGET_PROMOTE_PROTOTYPES hook_bool_tree_true
1590 #undef TARGET_STRUCT_VALUE_RTX
1591 #define TARGET_STRUCT_VALUE_RTX ix86_struct_value_rtx
1592 #undef TARGET_SETUP_INCOMING_VARARGS
1593 #define TARGET_SETUP_INCOMING_VARARGS ix86_setup_incoming_varargs
1594 #undef TARGET_MUST_PASS_IN_STACK
1595 #define TARGET_MUST_PASS_IN_STACK ix86_must_pass_in_stack
1596 #undef TARGET_PASS_BY_REFERENCE
1597 #define TARGET_PASS_BY_REFERENCE ix86_pass_by_reference
1598 #undef TARGET_INTERNAL_ARG_POINTER
1599 #define TARGET_INTERNAL_ARG_POINTER ix86_internal_arg_pointer
1600 #undef TARGET_DWARF_HANDLE_FRAME_UNSPEC
1601 #define TARGET_DWARF_HANDLE_FRAME_UNSPEC ix86_dwarf_handle_frame_unspec
1603 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
1604 #define TARGET_GIMPLIFY_VA_ARG_EXPR ix86_gimplify_va_arg
1606 #undef TARGET_SCALAR_MODE_SUPPORTED_P
1607 #define TARGET_SCALAR_MODE_SUPPORTED_P ix86_scalar_mode_supported_p
1609 #undef TARGET_VECTOR_MODE_SUPPORTED_P
1610 #define TARGET_VECTOR_MODE_SUPPORTED_P ix86_vector_mode_supported_p
1613 #undef TARGET_ASM_OUTPUT_DWARF_DTPREL
1614 #define TARGET_ASM_OUTPUT_DWARF_DTPREL i386_output_dwarf_dtprel
1617 #ifdef SUBTARGET_INSERT_ATTRIBUTES
1618 #undef TARGET_INSERT_ATTRIBUTES
1619 #define TARGET_INSERT_ATTRIBUTES SUBTARGET_INSERT_ATTRIBUTES
1622 #undef TARGET_MANGLE_FUNDAMENTAL_TYPE
1623 #define TARGET_MANGLE_FUNDAMENTAL_TYPE ix86_mangle_fundamental_type
1625 #undef TARGET_STACK_PROTECT_FAIL
1626 #define TARGET_STACK_PROTECT_FAIL ix86_stack_protect_fail
1628 #undef TARGET_FUNCTION_VALUE
1629 #define TARGET_FUNCTION_VALUE ix86_function_value
1631 struct gcc_target targetm = TARGET_INITIALIZER;
1634 /* The svr4 ABI for the i386 says that records and unions are returned
1636 #ifndef DEFAULT_PCC_STRUCT_RETURN
1637 #define DEFAULT_PCC_STRUCT_RETURN 1
1640 /* Implement TARGET_HANDLE_OPTION. */
1643 ix86_handle_option (size_t code, const char *arg ATTRIBUTE_UNUSED, int value)
1650 target_flags &= ~MASK_3DNOW_A;
1651 target_flags_explicit |= MASK_3DNOW_A;
1658 target_flags &= ~(MASK_3DNOW | MASK_3DNOW_A);
1659 target_flags_explicit |= MASK_3DNOW | MASK_3DNOW_A;
1666 target_flags &= ~(MASK_SSE2 | MASK_SSE3 | MASK_SSSE3 | MASK_SSE4A);
1667 target_flags_explicit |= MASK_SSE2 | MASK_SSE3 | MASK_SSSE3 | MASK_SSE4A;
1674 target_flags &= ~(MASK_SSE3 | MASK_SSSE3 | MASK_SSE4A);
1675 target_flags_explicit |= MASK_SSE3 | MASK_SSSE3 | MASK_SSE4A;
1682 target_flags &= ~(MASK_SSSE3 | MASK_SSE4A);
1683 target_flags_explicit |= MASK_SSSE3 | MASK_SSE4A;
1692 /* Sometimes certain combinations of command options do not make
1693 sense on a particular target machine. You can define a macro
1694 `OVERRIDE_OPTIONS' to take account of this. This macro, if
1695 defined, is executed once just after all the command options have
1698 Don't use this macro to turn on various extra optimizations for
1699 `-O'. That is what `OPTIMIZATION_OPTIONS' is for. */
1702 override_options (void)
1705 int ix86_tune_defaulted = 0;
1707 /* Comes from final.c -- no real reason to change it. */
1708 #define MAX_CODE_ALIGN 16
1712 const struct processor_costs *cost; /* Processor costs */
1713 const int target_enable; /* Target flags to enable. */
1714 const int target_disable; /* Target flags to disable. */
1715 const int align_loop; /* Default alignments. */
1716 const int align_loop_max_skip;
1717 const int align_jump;
1718 const int align_jump_max_skip;
1719 const int align_func;
1721 const processor_target_table[PROCESSOR_max] =
1723 {&i386_cost, 0, 0, 4, 3, 4, 3, 4},
1724 {&i486_cost, 0, 0, 16, 15, 16, 15, 16},
1725 {&pentium_cost, 0, 0, 16, 7, 16, 7, 16},
1726 {&pentiumpro_cost, 0, 0, 16, 15, 16, 7, 16},
1727 {&geode_cost, 0, 0, 0, 0, 0, 0, 0},
1728 {&k6_cost, 0, 0, 32, 7, 32, 7, 32},
1729 {&athlon_cost, 0, 0, 16, 7, 16, 7, 16},
1730 {&pentium4_cost, 0, 0, 0, 0, 0, 0, 0},
1731 {&k8_cost, 0, 0, 16, 7, 16, 7, 16},
1732 {&nocona_cost, 0, 0, 0, 0, 0, 0, 0},
1733 {&core2_cost, 0, 0, 16, 7, 16, 7, 16},
1734 {&generic32_cost, 0, 0, 16, 7, 16, 7, 16},
1735 {&generic64_cost, 0, 0, 16, 7, 16, 7, 16},
1736 {&amdfam10_cost, 0, 0, 32, 24, 32, 7, 32}
1739 static const char * const cpu_names[] = TARGET_CPU_DEFAULT_NAMES;
1742 const char *const name; /* processor name or nickname. */
1743 const enum processor_type processor;
1744 const enum pta_flags
1750 PTA_PREFETCH_SSE = 16,
1761 const processor_alias_table[] =
1763 {"i386", PROCESSOR_I386, 0},
1764 {"i486", PROCESSOR_I486, 0},
1765 {"i586", PROCESSOR_PENTIUM, 0},
1766 {"pentium", PROCESSOR_PENTIUM, 0},
1767 {"pentium-mmx", PROCESSOR_PENTIUM, PTA_MMX},
1768 {"winchip-c6", PROCESSOR_I486, PTA_MMX},
1769 {"winchip2", PROCESSOR_I486, PTA_MMX | PTA_3DNOW},
1770 {"c3", PROCESSOR_I486, PTA_MMX | PTA_3DNOW},
1771 {"c3-2", PROCESSOR_PENTIUMPRO, PTA_MMX | PTA_PREFETCH_SSE | PTA_SSE},
1772 {"i686", PROCESSOR_PENTIUMPRO, 0},
1773 {"pentiumpro", PROCESSOR_PENTIUMPRO, 0},
1774 {"pentium2", PROCESSOR_PENTIUMPRO, PTA_MMX},
1775 {"pentium3", PROCESSOR_PENTIUMPRO, PTA_MMX | PTA_SSE | PTA_PREFETCH_SSE},
1776 {"pentium3m", PROCESSOR_PENTIUMPRO, PTA_MMX | PTA_SSE | PTA_PREFETCH_SSE},
1777 {"pentium-m", PROCESSOR_PENTIUMPRO, PTA_MMX | PTA_SSE | PTA_PREFETCH_SSE | PTA_SSE2},
1778 {"pentium4", PROCESSOR_PENTIUM4, PTA_SSE | PTA_SSE2
1779 | PTA_MMX | PTA_PREFETCH_SSE},
1780 {"pentium4m", PROCESSOR_PENTIUM4, PTA_SSE | PTA_SSE2
1781 | PTA_MMX | PTA_PREFETCH_SSE},
1782 {"prescott", PROCESSOR_NOCONA, PTA_SSE | PTA_SSE2 | PTA_SSE3
1783 | PTA_MMX | PTA_PREFETCH_SSE},
1784 {"nocona", PROCESSOR_NOCONA, PTA_SSE | PTA_SSE2 | PTA_SSE3 | PTA_64BIT
1785 | PTA_MMX | PTA_PREFETCH_SSE | PTA_CX16},
1786 {"core2", PROCESSOR_CORE2, PTA_SSE | PTA_SSE2 | PTA_SSE3 | PTA_SSSE3
1787 | PTA_64BIT | PTA_MMX
1788 | PTA_PREFETCH_SSE | PTA_CX16},
1789 {"geode", PROCESSOR_GEODE, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW
1791 {"k6", PROCESSOR_K6, PTA_MMX},
1792 {"k6-2", PROCESSOR_K6, PTA_MMX | PTA_3DNOW},
1793 {"k6-3", PROCESSOR_K6, PTA_MMX | PTA_3DNOW},
1794 {"athlon", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW
1796 {"athlon-tbird", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE
1797 | PTA_3DNOW | PTA_3DNOW_A},
1798 {"athlon-4", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW
1799 | PTA_3DNOW_A | PTA_SSE},
1800 {"athlon-xp", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW
1801 | PTA_3DNOW_A | PTA_SSE},
1802 {"athlon-mp", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW
1803 | PTA_3DNOW_A | PTA_SSE},
1804 {"x86-64", PROCESSOR_K8, PTA_MMX | PTA_PREFETCH_SSE | PTA_64BIT
1805 | PTA_SSE | PTA_SSE2 },
1806 {"k8", PROCESSOR_K8, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW | PTA_64BIT
1807 | PTA_3DNOW_A | PTA_SSE | PTA_SSE2},
1808 {"k8-sse3", PROCESSOR_K8, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW | PTA_64BIT
1809 | PTA_3DNOW_A | PTA_SSE | PTA_SSE2
1811 {"opteron", PROCESSOR_K8, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW | PTA_64BIT
1812 | PTA_3DNOW_A | PTA_SSE | PTA_SSE2},
1813 {"opteron-sse3", PROCESSOR_K8, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW | PTA_64BIT
1814 | PTA_3DNOW_A | PTA_SSE | PTA_SSE2
1816 {"athlon64", PROCESSOR_K8, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW | PTA_64BIT
1817 | PTA_3DNOW_A | PTA_SSE | PTA_SSE2},
1818 {"athlon64-sse3", PROCESSOR_K8, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW | PTA_64BIT
1819 | PTA_3DNOW_A | PTA_SSE | PTA_SSE2
1821 {"athlon-fx", PROCESSOR_K8, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW | PTA_64BIT
1822 | PTA_3DNOW_A | PTA_SSE | PTA_SSE2},
1823 {"amdfam10", PROCESSOR_AMDFAM10, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW
1824 | PTA_64BIT | PTA_3DNOW_A | PTA_SSE
1825 | PTA_SSE2 | PTA_SSE3 | PTA_POPCNT
1826 | PTA_ABM | PTA_SSE4A | PTA_CX16},
1827 {"barcelona", PROCESSOR_AMDFAM10, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW
1828 | PTA_64BIT | PTA_3DNOW_A | PTA_SSE
1829 | PTA_SSE2 | PTA_SSE3 | PTA_POPCNT
1830 | PTA_ABM | PTA_SSE4A | PTA_CX16},
1831 {"generic32", PROCESSOR_GENERIC32, 0 /* flags are only used for -march switch. */ },
1832 {"generic64", PROCESSOR_GENERIC64, PTA_64BIT /* flags are only used for -march switch. */ },
1835 int const pta_size = ARRAY_SIZE (processor_alias_table);
1837 #ifdef SUBTARGET_OVERRIDE_OPTIONS
1838 SUBTARGET_OVERRIDE_OPTIONS;
1841 #ifdef SUBSUBTARGET_OVERRIDE_OPTIONS
1842 SUBSUBTARGET_OVERRIDE_OPTIONS;
1845 /* -fPIC is the default for x86_64. */
1846 if (TARGET_MACHO && TARGET_64BIT)
1849 /* Set the default values for switches whose default depends on TARGET_64BIT
1850 in case they weren't overwritten by command line options. */
1853 /* Mach-O doesn't support omitting the frame pointer for now. */
1854 if (flag_omit_frame_pointer == 2)
1855 flag_omit_frame_pointer = (TARGET_MACHO ? 0 : 1);
1856 if (flag_asynchronous_unwind_tables == 2)
1857 flag_asynchronous_unwind_tables = 1;
1858 if (flag_pcc_struct_return == 2)
1859 flag_pcc_struct_return = 0;
1863 if (flag_omit_frame_pointer == 2)
1864 flag_omit_frame_pointer = 0;
1865 if (flag_asynchronous_unwind_tables == 2)
1866 flag_asynchronous_unwind_tables = 0;
1867 if (flag_pcc_struct_return == 2)
1868 flag_pcc_struct_return = DEFAULT_PCC_STRUCT_RETURN;
1871 /* Need to check -mtune=generic first. */
1872 if (ix86_tune_string)
1874 if (!strcmp (ix86_tune_string, "generic")
1875 || !strcmp (ix86_tune_string, "i686")
1876 /* As special support for cross compilers we read -mtune=native
1877 as -mtune=generic. With native compilers we won't see the
1878 -mtune=native, as it was changed by the driver. */
1879 || !strcmp (ix86_tune_string, "native"))
1882 ix86_tune_string = "generic64";
1884 ix86_tune_string = "generic32";
1886 else if (!strncmp (ix86_tune_string, "generic", 7))
1887 error ("bad value (%s) for -mtune= switch", ix86_tune_string);
1891 if (ix86_arch_string)
1892 ix86_tune_string = ix86_arch_string;
1893 if (!ix86_tune_string)
1895 ix86_tune_string = cpu_names [TARGET_CPU_DEFAULT];
1896 ix86_tune_defaulted = 1;
1899 /* ix86_tune_string is set to ix86_arch_string or defaulted. We
1900 need to use a sensible tune option. */
1901 if (!strcmp (ix86_tune_string, "generic")
1902 || !strcmp (ix86_tune_string, "x86-64")
1903 || !strcmp (ix86_tune_string, "i686"))
1906 ix86_tune_string = "generic64";
1908 ix86_tune_string = "generic32";
1911 if (!strcmp (ix86_tune_string, "x86-64"))
1912 warning (OPT_Wdeprecated, "-mtune=x86-64 is deprecated. Use -mtune=k8 or "
1913 "-mtune=generic instead as appropriate.");
1915 if (!ix86_arch_string)
1916 ix86_arch_string = TARGET_64BIT ? "x86-64" : "i486";
1917 if (!strcmp (ix86_arch_string, "generic"))
1918 error ("generic CPU can be used only for -mtune= switch");
1919 if (!strncmp (ix86_arch_string, "generic", 7))
1920 error ("bad value (%s) for -march= switch", ix86_arch_string);
1922 if (ix86_cmodel_string != 0)
1924 if (!strcmp (ix86_cmodel_string, "small"))
1925 ix86_cmodel = flag_pic ? CM_SMALL_PIC : CM_SMALL;
1926 else if (!strcmp (ix86_cmodel_string, "medium"))
1927 ix86_cmodel = flag_pic ? CM_MEDIUM_PIC : CM_MEDIUM;
1929 sorry ("code model %s not supported in PIC mode", ix86_cmodel_string);
1930 else if (!strcmp (ix86_cmodel_string, "32"))
1931 ix86_cmodel = CM_32;
1932 else if (!strcmp (ix86_cmodel_string, "kernel") && !flag_pic)
1933 ix86_cmodel = CM_KERNEL;
1934 else if (!strcmp (ix86_cmodel_string, "large") && !flag_pic)
1935 ix86_cmodel = CM_LARGE;
1937 error ("bad value (%s) for -mcmodel= switch", ix86_cmodel_string);
1941 ix86_cmodel = CM_32;
1943 ix86_cmodel = flag_pic ? CM_SMALL_PIC : CM_SMALL;
1945 if (ix86_asm_string != 0)
1948 && !strcmp (ix86_asm_string, "intel"))
1949 ix86_asm_dialect = ASM_INTEL;
1950 else if (!strcmp (ix86_asm_string, "att"))
1951 ix86_asm_dialect = ASM_ATT;
1953 error ("bad value (%s) for -masm= switch", ix86_asm_string);
1955 if ((TARGET_64BIT == 0) != (ix86_cmodel == CM_32))
1956 error ("code model %qs not supported in the %s bit mode",
1957 ix86_cmodel_string, TARGET_64BIT ? "64" : "32");
1958 if (ix86_cmodel == CM_LARGE)
1959 sorry ("code model %<large%> not supported yet");
1960 if ((TARGET_64BIT != 0) != ((target_flags & MASK_64BIT) != 0))
1961 sorry ("%i-bit mode not compiled in",
1962 (target_flags & MASK_64BIT) ? 64 : 32);
1964 for (i = 0; i < pta_size; i++)
1965 if (! strcmp (ix86_arch_string, processor_alias_table[i].name))
1967 ix86_arch = processor_alias_table[i].processor;
1968 /* Default cpu tuning to the architecture. */
1969 ix86_tune = ix86_arch;
1970 if (processor_alias_table[i].flags & PTA_MMX
1971 && !(target_flags_explicit & MASK_MMX))
1972 target_flags |= MASK_MMX;
1973 if (processor_alias_table[i].flags & PTA_3DNOW
1974 && !(target_flags_explicit & MASK_3DNOW))
1975 target_flags |= MASK_3DNOW;
1976 if (processor_alias_table[i].flags & PTA_3DNOW_A
1977 && !(target_flags_explicit & MASK_3DNOW_A))
1978 target_flags |= MASK_3DNOW_A;
1979 if (processor_alias_table[i].flags & PTA_SSE
1980 && !(target_flags_explicit & MASK_SSE))
1981 target_flags |= MASK_SSE;
1982 if (processor_alias_table[i].flags & PTA_SSE2
1983 && !(target_flags_explicit & MASK_SSE2))
1984 target_flags |= MASK_SSE2;
1985 if (processor_alias_table[i].flags & PTA_SSE3
1986 && !(target_flags_explicit & MASK_SSE3))
1987 target_flags |= MASK_SSE3;
1988 if (processor_alias_table[i].flags & PTA_SSSE3
1989 && !(target_flags_explicit & MASK_SSSE3))
1990 target_flags |= MASK_SSSE3;
1991 if (processor_alias_table[i].flags & PTA_PREFETCH_SSE)
1992 x86_prefetch_sse = true;
1993 if (processor_alias_table[i].flags & PTA_CX16)
1994 x86_cmpxchg16b = true;
1995 if (processor_alias_table[i].flags & PTA_POPCNT
1996 && !(target_flags_explicit & MASK_POPCNT))
1997 target_flags |= MASK_POPCNT;
1998 if (processor_alias_table[i].flags & PTA_ABM
1999 && !(target_flags_explicit & MASK_ABM))
2000 target_flags |= MASK_ABM;
2001 if (processor_alias_table[i].flags & PTA_SSE4A
2002 && !(target_flags_explicit & MASK_SSE4A))
2003 target_flags |= MASK_SSE4A;
2004 if (TARGET_64BIT && !(processor_alias_table[i].flags & PTA_64BIT))
2005 error ("CPU you selected does not support x86-64 "
2011 error ("bad value (%s) for -march= switch", ix86_arch_string);
2013 for (i = 0; i < pta_size; i++)
2014 if (! strcmp (ix86_tune_string, processor_alias_table[i].name))
2016 ix86_tune = processor_alias_table[i].processor;
2017 if (TARGET_64BIT && !(processor_alias_table[i].flags & PTA_64BIT))
2019 if (ix86_tune_defaulted)
2021 ix86_tune_string = "x86-64";
2022 for (i = 0; i < pta_size; i++)
2023 if (! strcmp (ix86_tune_string,
2024 processor_alias_table[i].name))
2026 ix86_tune = processor_alias_table[i].processor;
2029 error ("CPU you selected does not support x86-64 "
2032 /* Intel CPUs have always interpreted SSE prefetch instructions as
2033 NOPs; so, we can enable SSE prefetch instructions even when
2034 -mtune (rather than -march) points us to a processor that has them.
2035 However, the VIA C3 gives a SIGILL, so we only do that for i686 and
2036 higher processors. */
2037 if (TARGET_CMOVE && (processor_alias_table[i].flags & PTA_PREFETCH_SSE))
2038 x86_prefetch_sse = true;
2042 error ("bad value (%s) for -mtune= switch", ix86_tune_string);
2045 ix86_cost = &size_cost;
2047 ix86_cost = processor_target_table[ix86_tune].cost;
2048 target_flags |= processor_target_table[ix86_tune].target_enable;
2049 target_flags &= ~processor_target_table[ix86_tune].target_disable;
2051 /* Arrange to set up i386_stack_locals for all functions. */
2052 init_machine_status = ix86_init_machine_status;
2054 /* Validate -mregparm= value. */
2055 if (ix86_regparm_string)
2057 i = atoi (ix86_regparm_string);
2058 if (i < 0 || i > REGPARM_MAX)
2059 error ("-mregparm=%d is not between 0 and %d", i, REGPARM_MAX);
2065 ix86_regparm = REGPARM_MAX;
2067 /* If the user has provided any of the -malign-* options,
2068 warn and use that value only if -falign-* is not set.
2069 Remove this code in GCC 3.2 or later. */
2070 if (ix86_align_loops_string)
2072 warning (0, "-malign-loops is obsolete, use -falign-loops");
2073 if (align_loops == 0)
2075 i = atoi (ix86_align_loops_string);
2076 if (i < 0 || i > MAX_CODE_ALIGN)
2077 error ("-malign-loops=%d is not between 0 and %d", i, MAX_CODE_ALIGN);
2079 align_loops = 1 << i;
2083 if (ix86_align_jumps_string)
2085 warning (0, "-malign-jumps is obsolete, use -falign-jumps");
2086 if (align_jumps == 0)
2088 i = atoi (ix86_align_jumps_string);
2089 if (i < 0 || i > MAX_CODE_ALIGN)
2090 error ("-malign-loops=%d is not between 0 and %d", i, MAX_CODE_ALIGN);
2092 align_jumps = 1 << i;
2096 if (ix86_align_funcs_string)
2098 warning (0, "-malign-functions is obsolete, use -falign-functions");
2099 if (align_functions == 0)
2101 i = atoi (ix86_align_funcs_string);
2102 if (i < 0 || i > MAX_CODE_ALIGN)
2103 error ("-malign-loops=%d is not between 0 and %d", i, MAX_CODE_ALIGN);
2105 align_functions = 1 << i;
2109 /* Default align_* from the processor table. */
2110 if (align_loops == 0)
2112 align_loops = processor_target_table[ix86_tune].align_loop;
2113 align_loops_max_skip = processor_target_table[ix86_tune].align_loop_max_skip;
2115 if (align_jumps == 0)
2117 align_jumps = processor_target_table[ix86_tune].align_jump;
2118 align_jumps_max_skip = processor_target_table[ix86_tune].align_jump_max_skip;
2120 if (align_functions == 0)
2122 align_functions = processor_target_table[ix86_tune].align_func;
2125 /* Validate -mbranch-cost= value, or provide default. */
2126 ix86_branch_cost = ix86_cost->branch_cost;
2127 if (ix86_branch_cost_string)
2129 i = atoi (ix86_branch_cost_string);
2131 error ("-mbranch-cost=%d is not between 0 and 5", i);
2133 ix86_branch_cost = i;
2135 if (ix86_section_threshold_string)
2137 i = atoi (ix86_section_threshold_string);
2139 error ("-mlarge-data-threshold=%d is negative", i);
2141 ix86_section_threshold = i;
2144 if (ix86_tls_dialect_string)
2146 if (strcmp (ix86_tls_dialect_string, "gnu") == 0)
2147 ix86_tls_dialect = TLS_DIALECT_GNU;
2148 else if (strcmp (ix86_tls_dialect_string, "gnu2") == 0)
2149 ix86_tls_dialect = TLS_DIALECT_GNU2;
2150 else if (strcmp (ix86_tls_dialect_string, "sun") == 0)
2151 ix86_tls_dialect = TLS_DIALECT_SUN;
2153 error ("bad value (%s) for -mtls-dialect= switch",
2154 ix86_tls_dialect_string);
2157 /* Keep nonleaf frame pointers. */
2158 if (flag_omit_frame_pointer)
2159 target_flags &= ~MASK_OMIT_LEAF_FRAME_POINTER;
2160 else if (TARGET_OMIT_LEAF_FRAME_POINTER)
2161 flag_omit_frame_pointer = 1;
2163 /* If we're doing fast math, we don't care about comparison order
2164 wrt NaNs. This lets us use a shorter comparison sequence. */
2165 if (flag_finite_math_only)
2166 target_flags &= ~MASK_IEEE_FP;
2168 /* If the architecture always has an FPU, turn off NO_FANCY_MATH_387,
2169 since the insns won't need emulation. */
2170 if (x86_arch_always_fancy_math_387 & (1 << ix86_arch))
2171 target_flags &= ~MASK_NO_FANCY_MATH_387;
2173 /* Likewise, if the target doesn't have a 387, or we've specified
2174 software floating point, don't use 387 inline intrinsics. */
2176 target_flags |= MASK_NO_FANCY_MATH_387;
2178 /* Turn on SSE3 builtins for -mssse3. */
2180 target_flags |= MASK_SSE3;
2182 /* Turn on SSE3 builtins for -msse4a. */
2184 target_flags |= MASK_SSE3;
2186 /* Turn on SSE2 builtins for -msse3. */
2188 target_flags |= MASK_SSE2;
2190 /* Turn on SSE builtins for -msse2. */
2192 target_flags |= MASK_SSE;
2194 /* Turn on MMX builtins for -msse. */
2197 target_flags |= MASK_MMX & ~target_flags_explicit;
2198 x86_prefetch_sse = true;
2201 /* Turn on MMX builtins for 3Dnow. */
2203 target_flags |= MASK_MMX;
2205 /* Turn on POPCNT builtins for -mabm. */
2207 target_flags |= MASK_POPCNT;
2211 if (TARGET_ALIGN_DOUBLE)
2212 error ("-malign-double makes no sense in the 64bit mode");
2214 error ("-mrtd calling convention not supported in the 64bit mode");
2216 /* Enable by default the SSE and MMX builtins. Do allow the user to
2217 explicitly disable any of these. In particular, disabling SSE and
2218 MMX for kernel code is extremely useful. */
2220 |= ((MASK_SSE2 | MASK_SSE | MASK_MMX | MASK_128BIT_LONG_DOUBLE)
2221 & ~target_flags_explicit);
2225 /* i386 ABI does not specify red zone. It still makes sense to use it
2226 when programmer takes care to stack from being destroyed. */
2227 if (!(target_flags_explicit & MASK_NO_RED_ZONE))
2228 target_flags |= MASK_NO_RED_ZONE;
2231 /* Validate -mpreferred-stack-boundary= value, or provide default.
2232 The default of 128 bits is for Pentium III's SSE __m128. We can't
2233 change it because of optimize_size. Otherwise, we can't mix object
2234 files compiled with -Os and -On. */
2235 ix86_preferred_stack_boundary = 128;
2236 if (ix86_preferred_stack_boundary_string)
2238 i = atoi (ix86_preferred_stack_boundary_string);
2239 if (i < (TARGET_64BIT ? 4 : 2) || i > 12)
2240 error ("-mpreferred-stack-boundary=%d is not between %d and 12", i,
2241 TARGET_64BIT ? 4 : 2);
2243 ix86_preferred_stack_boundary = (1 << i) * BITS_PER_UNIT;
2246 /* Accept -msseregparm only if at least SSE support is enabled. */
2247 if (TARGET_SSEREGPARM
2249 error ("-msseregparm used without SSE enabled");
2251 ix86_fpmath = TARGET_FPMATH_DEFAULT;
2253 if (ix86_fpmath_string != 0)
2255 if (! strcmp (ix86_fpmath_string, "387"))
2256 ix86_fpmath = FPMATH_387;
2257 else if (! strcmp (ix86_fpmath_string, "sse"))
2261 warning (0, "SSE instruction set disabled, using 387 arithmetics");
2262 ix86_fpmath = FPMATH_387;
2265 ix86_fpmath = FPMATH_SSE;
2267 else if (! strcmp (ix86_fpmath_string, "387,sse")
2268 || ! strcmp (ix86_fpmath_string, "sse,387"))
2272 warning (0, "SSE instruction set disabled, using 387 arithmetics");
2273 ix86_fpmath = FPMATH_387;
2275 else if (!TARGET_80387)
2277 warning (0, "387 instruction set disabled, using SSE arithmetics");
2278 ix86_fpmath = FPMATH_SSE;
2281 ix86_fpmath = FPMATH_SSE | FPMATH_387;
2284 error ("bad value (%s) for -mfpmath= switch", ix86_fpmath_string);
2287 /* If the i387 is disabled, then do not return values in it. */
2289 target_flags &= ~MASK_FLOAT_RETURNS;
2291 if ((x86_accumulate_outgoing_args & TUNEMASK)
2292 && !(target_flags_explicit & MASK_ACCUMULATE_OUTGOING_ARGS)
2294 target_flags |= MASK_ACCUMULATE_OUTGOING_ARGS;
2296 /* ??? Unwind info is not correct around the CFG unless either a frame
2297 pointer is present or M_A_O_A is set. Fixing this requires rewriting
2298 unwind info generation to be aware of the CFG and propagating states
2300 if ((flag_unwind_tables || flag_asynchronous_unwind_tables
2301 || flag_exceptions || flag_non_call_exceptions)
2302 && flag_omit_frame_pointer
2303 && !(target_flags & MASK_ACCUMULATE_OUTGOING_ARGS))
2305 if (target_flags_explicit & MASK_ACCUMULATE_OUTGOING_ARGS)
2306 warning (0, "unwind tables currently require either a frame pointer "
2307 "or -maccumulate-outgoing-args for correctness");
2308 target_flags |= MASK_ACCUMULATE_OUTGOING_ARGS;
2311 /* Figure out what ASM_GENERATE_INTERNAL_LABEL builds as a prefix. */
2314 ASM_GENERATE_INTERNAL_LABEL (internal_label_prefix, "LX", 0);
2315 p = strchr (internal_label_prefix, 'X');
2316 internal_label_prefix_len = p - internal_label_prefix;
2320 /* When scheduling description is not available, disable scheduler pass
2321 so it won't slow down the compilation and make x87 code slower. */
2322 if (!TARGET_SCHEDULE)
2323 flag_schedule_insns_after_reload = flag_schedule_insns = 0;
2326 /* switch to the appropriate section for output of DECL.
2327 DECL is either a `VAR_DECL' node or a constant of some sort.
2328 RELOC indicates whether forming the initial value of DECL requires
2329 link-time relocations. */
2332 x86_64_elf_select_section (tree decl, int reloc,
2333 unsigned HOST_WIDE_INT align)
2335 if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC)
2336 && ix86_in_large_data_p (decl))
2338 const char *sname = NULL;
2339 unsigned int flags = SECTION_WRITE;
2340 switch (categorize_decl_for_section (decl, reloc))
2345 case SECCAT_DATA_REL:
2346 sname = ".ldata.rel";
2348 case SECCAT_DATA_REL_LOCAL:
2349 sname = ".ldata.rel.local";
2351 case SECCAT_DATA_REL_RO:
2352 sname = ".ldata.rel.ro";
2354 case SECCAT_DATA_REL_RO_LOCAL:
2355 sname = ".ldata.rel.ro.local";
2359 flags |= SECTION_BSS;
2362 case SECCAT_RODATA_MERGE_STR:
2363 case SECCAT_RODATA_MERGE_STR_INIT:
2364 case SECCAT_RODATA_MERGE_CONST:
2368 case SECCAT_SRODATA:
2375 /* We don't split these for medium model. Place them into
2376 default sections and hope for best. */
2381 /* We might get called with string constants, but get_named_section
2382 doesn't like them as they are not DECLs. Also, we need to set
2383 flags in that case. */
2385 return get_section (sname, flags, NULL);
2386 return get_named_section (decl, sname, reloc);
2389 return default_elf_select_section (decl, reloc, align);
2392 /* Build up a unique section name, expressed as a
2393 STRING_CST node, and assign it to DECL_SECTION_NAME (decl).
2394 RELOC indicates whether the initial value of EXP requires
2395 link-time relocations. */
2398 x86_64_elf_unique_section (tree decl, int reloc)
2400 if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC)
2401 && ix86_in_large_data_p (decl))
2403 const char *prefix = NULL;
2404 /* We only need to use .gnu.linkonce if we don't have COMDAT groups. */
2405 bool one_only = DECL_ONE_ONLY (decl) && !HAVE_COMDAT_GROUP;
2407 switch (categorize_decl_for_section (decl, reloc))
2410 case SECCAT_DATA_REL:
2411 case SECCAT_DATA_REL_LOCAL:
2412 case SECCAT_DATA_REL_RO:
2413 case SECCAT_DATA_REL_RO_LOCAL:
2414 prefix = one_only ? ".gnu.linkonce.ld." : ".ldata.";
2417 prefix = one_only ? ".gnu.linkonce.lb." : ".lbss.";
2420 case SECCAT_RODATA_MERGE_STR:
2421 case SECCAT_RODATA_MERGE_STR_INIT:
2422 case SECCAT_RODATA_MERGE_CONST:
2423 prefix = one_only ? ".gnu.linkonce.lr." : ".lrodata.";
2425 case SECCAT_SRODATA:
2432 /* We don't split these for medium model. Place them into
2433 default sections and hope for best. */
2441 plen = strlen (prefix);
2443 name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl));
2444 name = targetm.strip_name_encoding (name);
2445 nlen = strlen (name);
2447 string = alloca (nlen + plen + 1);
2448 memcpy (string, prefix, plen);
2449 memcpy (string + plen, name, nlen + 1);
2451 DECL_SECTION_NAME (decl) = build_string (nlen + plen, string);
2455 default_unique_section (decl, reloc);
2458 #ifdef COMMON_ASM_OP
2459 /* This says how to output assembler code to declare an
2460 uninitialized external linkage data object.
2462 For medium model x86-64 we need to use .largecomm opcode for
2465 x86_elf_aligned_common (FILE *file,
2466 const char *name, unsigned HOST_WIDE_INT size,
2469 if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC)
2470 && size > (unsigned int)ix86_section_threshold)
2471 fprintf (file, ".largecomm\t");
2473 fprintf (file, "%s", COMMON_ASM_OP);
2474 assemble_name (file, name);
2475 fprintf (file, ","HOST_WIDE_INT_PRINT_UNSIGNED",%u\n",
2476 size, align / BITS_PER_UNIT);
2479 /* Utility function for targets to use in implementing
2480 ASM_OUTPUT_ALIGNED_BSS. */
2483 x86_output_aligned_bss (FILE *file, tree decl ATTRIBUTE_UNUSED,
2484 const char *name, unsigned HOST_WIDE_INT size,
2487 if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC)
2488 && size > (unsigned int)ix86_section_threshold)
2489 switch_to_section (get_named_section (decl, ".lbss", 0));
2491 switch_to_section (bss_section);
2492 ASM_OUTPUT_ALIGN (file, floor_log2 (align / BITS_PER_UNIT));
2493 #ifdef ASM_DECLARE_OBJECT_NAME
2494 last_assemble_variable_decl = decl;
2495 ASM_DECLARE_OBJECT_NAME (file, name, decl);
2497 /* Standard thing is just output label for the object. */
2498 ASM_OUTPUT_LABEL (file, name);
2499 #endif /* ASM_DECLARE_OBJECT_NAME */
2500 ASM_OUTPUT_SKIP (file, size ? size : 1);
2505 optimization_options (int level, int size ATTRIBUTE_UNUSED)
2507 /* For -O2 and beyond, turn off -fschedule-insns by default. It tends to
2508 make the problem with not enough registers even worse. */
2509 #ifdef INSN_SCHEDULING
2511 flag_schedule_insns = 0;
2515 /* The Darwin libraries never set errno, so we might as well
2516 avoid calling them when that's the only reason we would. */
2517 flag_errno_math = 0;
2519 /* The default values of these switches depend on the TARGET_64BIT
2520 that is not known at this moment. Mark these values with 2 and
2521 let user the to override these. In case there is no command line option
2522 specifying them, we will set the defaults in override_options. */
2524 flag_omit_frame_pointer = 2;
2525 flag_pcc_struct_return = 2;
2526 flag_asynchronous_unwind_tables = 2;
2527 #ifdef SUBTARGET_OPTIMIZATION_OPTIONS
2528 SUBTARGET_OPTIMIZATION_OPTIONS;
2532 /* Table of valid machine attributes. */
2533 const struct attribute_spec ix86_attribute_table[] =
2535 /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler } */
2536 /* Stdcall attribute says callee is responsible for popping arguments
2537 if they are not variable. */
2538 { "stdcall", 0, 0, false, true, true, ix86_handle_cconv_attribute },
2539 /* Fastcall attribute says callee is responsible for popping arguments
2540 if they are not variable. */
2541 { "fastcall", 0, 0, false, true, true, ix86_handle_cconv_attribute },
2542 /* Cdecl attribute says the callee is a normal C declaration */
2543 { "cdecl", 0, 0, false, true, true, ix86_handle_cconv_attribute },
2544 /* Regparm attribute specifies how many integer arguments are to be
2545 passed in registers. */
2546 { "regparm", 1, 1, false, true, true, ix86_handle_cconv_attribute },
2547 /* Sseregparm attribute says we are using x86_64 calling conventions
2548 for FP arguments. */
2549 { "sseregparm", 0, 0, false, true, true, ix86_handle_cconv_attribute },
2550 /* force_align_arg_pointer says this function realigns the stack at entry. */
2551 { (const char *)&ix86_force_align_arg_pointer_string, 0, 0,
2552 false, true, true, ix86_handle_cconv_attribute },
2553 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
2554 { "dllimport", 0, 0, false, false, false, handle_dll_attribute },
2555 { "dllexport", 0, 0, false, false, false, handle_dll_attribute },
2556 { "shared", 0, 0, true, false, false, ix86_handle_shared_attribute },
2558 { "ms_struct", 0, 0, false, false, false, ix86_handle_struct_attribute },
2559 { "gcc_struct", 0, 0, false, false, false, ix86_handle_struct_attribute },
2560 #ifdef SUBTARGET_ATTRIBUTE_TABLE
2561 SUBTARGET_ATTRIBUTE_TABLE,
2563 { NULL, 0, 0, false, false, false, NULL }
2566 /* Decide whether we can make a sibling call to a function. DECL is the
2567 declaration of the function being targeted by the call and EXP is the
2568 CALL_EXPR representing the call. */
2571 ix86_function_ok_for_sibcall (tree decl, tree exp)
2576 /* If we are generating position-independent code, we cannot sibcall
2577 optimize any indirect call, or a direct call to a global function,
2578 as the PLT requires %ebx be live. */
2579 if (!TARGET_64BIT && flag_pic && (!decl || !targetm.binds_local_p (decl)))
2586 func = TREE_TYPE (TREE_OPERAND (exp, 0));
2587 if (POINTER_TYPE_P (func))
2588 func = TREE_TYPE (func);
2591 /* Check that the return value locations are the same. Like
2592 if we are returning floats on the 80387 register stack, we cannot
2593 make a sibcall from a function that doesn't return a float to a
2594 function that does or, conversely, from a function that does return
2595 a float to a function that doesn't; the necessary stack adjustment
2596 would not be executed. This is also the place we notice
2597 differences in the return value ABI. Note that it is ok for one
2598 of the functions to have void return type as long as the return
2599 value of the other is passed in a register. */
2600 a = ix86_function_value (TREE_TYPE (exp), func, false);
2601 b = ix86_function_value (TREE_TYPE (DECL_RESULT (cfun->decl)),
2603 if (STACK_REG_P (a) || STACK_REG_P (b))
2605 if (!rtx_equal_p (a, b))
2608 else if (VOID_TYPE_P (TREE_TYPE (DECL_RESULT (cfun->decl))))
2610 else if (!rtx_equal_p (a, b))
2613 /* If this call is indirect, we'll need to be able to use a call-clobbered
2614 register for the address of the target function. Make sure that all
2615 such registers are not used for passing parameters. */
2616 if (!decl && !TARGET_64BIT)
2620 /* We're looking at the CALL_EXPR, we need the type of the function. */
2621 type = TREE_OPERAND (exp, 0); /* pointer expression */
2622 type = TREE_TYPE (type); /* pointer type */
2623 type = TREE_TYPE (type); /* function type */
2625 if (ix86_function_regparm (type, NULL) >= 3)
2627 /* ??? Need to count the actual number of registers to be used,
2628 not the possible number of registers. Fix later. */
2633 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
2634 /* Dllimport'd functions are also called indirectly. */
2635 if (decl && DECL_DLLIMPORT_P (decl)
2636 && ix86_function_regparm (TREE_TYPE (decl), NULL) >= 3)
2640 /* If we forced aligned the stack, then sibcalling would unalign the
2641 stack, which may break the called function. */
2642 if (cfun->machine->force_align_arg_pointer)
2645 /* Otherwise okay. That also includes certain types of indirect calls. */
2649 /* Handle "cdecl", "stdcall", "fastcall", "regparm" and "sseregparm"
2650 calling convention attributes;
2651 arguments as in struct attribute_spec.handler. */
2654 ix86_handle_cconv_attribute (tree *node, tree name,
2656 int flags ATTRIBUTE_UNUSED,
2659 if (TREE_CODE (*node) != FUNCTION_TYPE
2660 && TREE_CODE (*node) != METHOD_TYPE
2661 && TREE_CODE (*node) != FIELD_DECL
2662 && TREE_CODE (*node) != TYPE_DECL)
2664 warning (OPT_Wattributes, "%qs attribute only applies to functions",
2665 IDENTIFIER_POINTER (name));
2666 *no_add_attrs = true;
2670 /* Can combine regparm with all attributes but fastcall. */
2671 if (is_attribute_p ("regparm", name))
2675 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
2677 error ("fastcall and regparm attributes are not compatible");
2680 cst = TREE_VALUE (args);
2681 if (TREE_CODE (cst) != INTEGER_CST)
2683 warning (OPT_Wattributes,
2684 "%qs attribute requires an integer constant argument",
2685 IDENTIFIER_POINTER (name));
2686 *no_add_attrs = true;
2688 else if (compare_tree_int (cst, REGPARM_MAX) > 0)
2690 warning (OPT_Wattributes, "argument to %qs attribute larger than %d",
2691 IDENTIFIER_POINTER (name), REGPARM_MAX);
2692 *no_add_attrs = true;
2696 && lookup_attribute (ix86_force_align_arg_pointer_string,
2697 TYPE_ATTRIBUTES (*node))
2698 && compare_tree_int (cst, REGPARM_MAX-1))
2700 error ("%s functions limited to %d register parameters",
2701 ix86_force_align_arg_pointer_string, REGPARM_MAX-1);
2709 warning (OPT_Wattributes, "%qs attribute ignored",
2710 IDENTIFIER_POINTER (name));
2711 *no_add_attrs = true;
2715 /* Can combine fastcall with stdcall (redundant) and sseregparm. */
2716 if (is_attribute_p ("fastcall", name))
2718 if (lookup_attribute ("cdecl", TYPE_ATTRIBUTES (*node)))
2720 error ("fastcall and cdecl attributes are not compatible");
2722 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (*node)))
2724 error ("fastcall and stdcall attributes are not compatible");
2726 if (lookup_attribute ("regparm", TYPE_ATTRIBUTES (*node)))
2728 error ("fastcall and regparm attributes are not compatible");
2732 /* Can combine stdcall with fastcall (redundant), regparm and
2734 else if (is_attribute_p ("stdcall", name))
2736 if (lookup_attribute ("cdecl", TYPE_ATTRIBUTES (*node)))
2738 error ("stdcall and cdecl attributes are not compatible");
2740 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
2742 error ("stdcall and fastcall attributes are not compatible");
2746 /* Can combine cdecl with regparm and sseregparm. */
2747 else if (is_attribute_p ("cdecl", name))
2749 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (*node)))
2751 error ("stdcall and cdecl attributes are not compatible");
2753 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
2755 error ("fastcall and cdecl attributes are not compatible");
2759 /* Can combine sseregparm with all attributes. */
2764 /* Return 0 if the attributes for two types are incompatible, 1 if they
2765 are compatible, and 2 if they are nearly compatible (which causes a
2766 warning to be generated). */
2769 ix86_comp_type_attributes (tree type1, tree type2)
2771 /* Check for mismatch of non-default calling convention. */
2772 const char *const rtdstr = TARGET_RTD ? "cdecl" : "stdcall";
2774 if (TREE_CODE (type1) != FUNCTION_TYPE)
2777 /* Check for mismatched fastcall/regparm types. */
2778 if ((!lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type1))
2779 != !lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type2)))
2780 || (ix86_function_regparm (type1, NULL)
2781 != ix86_function_regparm (type2, NULL)))
2784 /* Check for mismatched sseregparm types. */
2785 if (!lookup_attribute ("sseregparm", TYPE_ATTRIBUTES (type1))
2786 != !lookup_attribute ("sseregparm", TYPE_ATTRIBUTES (type2)))
2789 /* Check for mismatched return types (cdecl vs stdcall). */
2790 if (!lookup_attribute (rtdstr, TYPE_ATTRIBUTES (type1))
2791 != !lookup_attribute (rtdstr, TYPE_ATTRIBUTES (type2)))
2797 /* Return the regparm value for a function with the indicated TYPE and DECL.
2798 DECL may be NULL when calling function indirectly
2799 or considering a libcall. */
2802 ix86_function_regparm (tree type, tree decl)
2805 int regparm = ix86_regparm;
2806 bool user_convention = false;
2810 attr = lookup_attribute ("regparm", TYPE_ATTRIBUTES (type));
2813 regparm = TREE_INT_CST_LOW (TREE_VALUE (TREE_VALUE (attr)));
2814 user_convention = true;
2817 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type)))
2820 user_convention = true;
2823 /* Use register calling convention for local functions when possible. */
2824 if (!TARGET_64BIT && !user_convention && decl
2825 && flag_unit_at_a_time && !profile_flag)
2827 struct cgraph_local_info *i = cgraph_local_info (decl);
2830 int local_regparm, globals = 0, regno;
2832 /* Make sure no regparm register is taken by a global register
2834 for (local_regparm = 0; local_regparm < 3; local_regparm++)
2835 if (global_regs[local_regparm])
2837 /* We can't use regparm(3) for nested functions as these use
2838 static chain pointer in third argument. */
2839 if (local_regparm == 3
2840 && decl_function_context (decl)
2841 && !DECL_NO_STATIC_CHAIN (decl))
2843 /* If the function realigns its stackpointer, the
2844 prologue will clobber %ecx. If we've already
2845 generated code for the callee, the callee
2846 DECL_STRUCT_FUNCTION is gone, so we fall back to
2847 scanning the attributes for the self-realigning
2849 if ((DECL_STRUCT_FUNCTION (decl)
2850 && DECL_STRUCT_FUNCTION (decl)->machine->force_align_arg_pointer)
2851 || (!DECL_STRUCT_FUNCTION (decl)
2852 && lookup_attribute (ix86_force_align_arg_pointer_string,
2853 TYPE_ATTRIBUTES (TREE_TYPE (decl)))))
2855 /* Each global register variable increases register preassure,
2856 so the more global reg vars there are, the smaller regparm
2857 optimization use, unless requested by the user explicitly. */
2858 for (regno = 0; regno < 6; regno++)
2859 if (global_regs[regno])
2862 = globals < local_regparm ? local_regparm - globals : 0;
2864 if (local_regparm > regparm)
2865 regparm = local_regparm;
2872 /* Return 1 or 2, if we can pass up to SSE_REGPARM_MAX SFmode (1) and
2873 DFmode (2) arguments in SSE registers for a function with the
2874 indicated TYPE and DECL. DECL may be NULL when calling function
2875 indirectly or considering a libcall. Otherwise return 0. */
2878 ix86_function_sseregparm (tree type, tree decl)
2880 /* Use SSE registers to pass SFmode and DFmode arguments if requested
2881 by the sseregparm attribute. */
2882 if (TARGET_SSEREGPARM
2884 && lookup_attribute ("sseregparm", TYPE_ATTRIBUTES (type))))
2889 error ("Calling %qD with attribute sseregparm without "
2890 "SSE/SSE2 enabled", decl);
2892 error ("Calling %qT with attribute sseregparm without "
2893 "SSE/SSE2 enabled", type);
2900 /* For local functions, pass up to SSE_REGPARM_MAX SFmode
2901 (and DFmode for SSE2) arguments in SSE registers,
2902 even for 32-bit targets. */
2903 if (!TARGET_64BIT && decl
2904 && TARGET_SSE_MATH && flag_unit_at_a_time && !profile_flag)
2906 struct cgraph_local_info *i = cgraph_local_info (decl);
2908 return TARGET_SSE2 ? 2 : 1;
2914 /* Return true if EAX is live at the start of the function. Used by
2915 ix86_expand_prologue to determine if we need special help before
2916 calling allocate_stack_worker. */
2919 ix86_eax_live_at_start_p (void)
2921 /* Cheat. Don't bother working forward from ix86_function_regparm
2922 to the function type to whether an actual argument is located in
2923 eax. Instead just look at cfg info, which is still close enough
2924 to correct at this point. This gives false positives for broken
2925 functions that might use uninitialized data that happens to be
2926 allocated in eax, but who cares? */
2927 return REGNO_REG_SET_P (ENTRY_BLOCK_PTR->il.rtl->global_live_at_end, 0);
2930 /* Value is the number of bytes of arguments automatically
2931 popped when returning from a subroutine call.
2932 FUNDECL is the declaration node of the function (as a tree),
2933 FUNTYPE is the data type of the function (as a tree),
2934 or for a library call it is an identifier node for the subroutine name.
2935 SIZE is the number of bytes of arguments passed on the stack.
2937 On the 80386, the RTD insn may be used to pop them if the number
2938 of args is fixed, but if the number is variable then the caller
2939 must pop them all. RTD can't be used for library calls now
2940 because the library is compiled with the Unix compiler.
2941 Use of RTD is a selectable option, since it is incompatible with
2942 standard Unix calling sequences. If the option is not selected,
2943 the caller must always pop the args.
2945 The attribute stdcall is equivalent to RTD on a per module basis. */
2948 ix86_return_pops_args (tree fundecl, tree funtype, int size)
2950 int rtd = TARGET_RTD && (!fundecl || TREE_CODE (fundecl) != IDENTIFIER_NODE);
2952 /* Cdecl functions override -mrtd, and never pop the stack. */
2953 if (! lookup_attribute ("cdecl", TYPE_ATTRIBUTES (funtype))) {
2955 /* Stdcall and fastcall functions will pop the stack if not
2957 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (funtype))
2958 || lookup_attribute ("fastcall", TYPE_ATTRIBUTES (funtype)))
2962 && (TYPE_ARG_TYPES (funtype) == NULL_TREE
2963 || (TREE_VALUE (tree_last (TYPE_ARG_TYPES (funtype)))
2964 == void_type_node)))
2968 /* Lose any fake structure return argument if it is passed on the stack. */
2969 if (aggregate_value_p (TREE_TYPE (funtype), fundecl)
2971 && !KEEP_AGGREGATE_RETURN_POINTER)
2973 int nregs = ix86_function_regparm (funtype, fundecl);
2976 return GET_MODE_SIZE (Pmode);
2982 /* Argument support functions. */
2984 /* Return true when register may be used to pass function parameters. */
2986 ix86_function_arg_regno_p (int regno)
2992 return (regno < REGPARM_MAX
2993 || (TARGET_SSE && SSE_REGNO_P (regno) && !fixed_regs[regno]));
2995 return (regno < REGPARM_MAX
2996 || (TARGET_MMX && MMX_REGNO_P (regno)
2997 && (regno < FIRST_MMX_REG + MMX_REGPARM_MAX))
2998 || (TARGET_SSE && SSE_REGNO_P (regno)
2999 && (regno < FIRST_SSE_REG + SSE_REGPARM_MAX)));
3004 if (SSE_REGNO_P (regno) && TARGET_SSE)
3009 if (TARGET_SSE && SSE_REGNO_P (regno)
3010 && (regno < FIRST_SSE_REG + SSE_REGPARM_MAX))
3013 /* RAX is used as hidden argument to va_arg functions. */
3016 for (i = 0; i < REGPARM_MAX; i++)
3017 if (regno == x86_64_int_parameter_registers[i])
3022 /* Return if we do not know how to pass TYPE solely in registers. */
3025 ix86_must_pass_in_stack (enum machine_mode mode, tree type)
3027 if (must_pass_in_stack_var_size_or_pad (mode, type))
3030 /* For 32-bit, we want TImode aggregates to go on the stack. But watch out!
3031 The layout_type routine is crafty and tries to trick us into passing
3032 currently unsupported vector types on the stack by using TImode. */
3033 return (!TARGET_64BIT && mode == TImode
3034 && type && TREE_CODE (type) != VECTOR_TYPE);
3037 /* Initialize a variable CUM of type CUMULATIVE_ARGS
3038 for a call to a function whose data type is FNTYPE.
3039 For a library call, FNTYPE is 0. */
3042 init_cumulative_args (CUMULATIVE_ARGS *cum, /* Argument info to initialize */
3043 tree fntype, /* tree ptr for function decl */
3044 rtx libname, /* SYMBOL_REF of library name or 0 */
3047 static CUMULATIVE_ARGS zero_cum;
3048 tree param, next_param;
3050 if (TARGET_DEBUG_ARG)
3052 fprintf (stderr, "\ninit_cumulative_args (");
3054 fprintf (stderr, "fntype code = %s, ret code = %s",
3055 tree_code_name[(int) TREE_CODE (fntype)],
3056 tree_code_name[(int) TREE_CODE (TREE_TYPE (fntype))]);
3058 fprintf (stderr, "no fntype");
3061 fprintf (stderr, ", libname = %s", XSTR (libname, 0));
3066 /* Set up the number of registers to use for passing arguments. */
3067 cum->nregs = ix86_regparm;
3069 cum->sse_nregs = SSE_REGPARM_MAX;
3071 cum->mmx_nregs = MMX_REGPARM_MAX;
3072 cum->warn_sse = true;
3073 cum->warn_mmx = true;
3074 cum->maybe_vaarg = false;
3076 /* Use ecx and edx registers if function has fastcall attribute,
3077 else look for regparm information. */
3078 if (fntype && !TARGET_64BIT)
3080 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (fntype)))
3086 cum->nregs = ix86_function_regparm (fntype, fndecl);
3089 /* Set up the number of SSE registers used for passing SFmode
3090 and DFmode arguments. Warn for mismatching ABI. */
3091 cum->float_in_sse = ix86_function_sseregparm (fntype, fndecl);
3093 /* Determine if this function has variable arguments. This is
3094 indicated by the last argument being 'void_type_mode' if there
3095 are no variable arguments. If there are variable arguments, then
3096 we won't pass anything in registers in 32-bit mode. */
3098 if (cum->nregs || cum->mmx_nregs || cum->sse_nregs)
3100 for (param = (fntype) ? TYPE_ARG_TYPES (fntype) : 0;
3101 param != 0; param = next_param)
3103 next_param = TREE_CHAIN (param);
3104 if (next_param == 0 && TREE_VALUE (param) != void_type_node)
3114 cum->float_in_sse = 0;
3116 cum->maybe_vaarg = true;
3120 if ((!fntype && !libname)
3121 || (fntype && !TYPE_ARG_TYPES (fntype)))
3122 cum->maybe_vaarg = true;
3124 if (TARGET_DEBUG_ARG)
3125 fprintf (stderr, ", nregs=%d )\n", cum->nregs);
3130 /* Return the "natural" mode for TYPE. In most cases, this is just TYPE_MODE.
3131 But in the case of vector types, it is some vector mode.
3133 When we have only some of our vector isa extensions enabled, then there
3134 are some modes for which vector_mode_supported_p is false. For these
3135 modes, the generic vector support in gcc will choose some non-vector mode
3136 in order to implement the type. By computing the natural mode, we'll
3137 select the proper ABI location for the operand and not depend on whatever
3138 the middle-end decides to do with these vector types. */
3140 static enum machine_mode
3141 type_natural_mode (tree type)
3143 enum machine_mode mode = TYPE_MODE (type);
3145 if (TREE_CODE (type) == VECTOR_TYPE && !VECTOR_MODE_P (mode))
3147 HOST_WIDE_INT size = int_size_in_bytes (type);
3148 if ((size == 8 || size == 16)
3149 /* ??? Generic code allows us to create width 1 vectors. Ignore. */
3150 && TYPE_VECTOR_SUBPARTS (type) > 1)
3152 enum machine_mode innermode = TYPE_MODE (TREE_TYPE (type));
3154 if (TREE_CODE (TREE_TYPE (type)) == REAL_TYPE)
3155 mode = MIN_MODE_VECTOR_FLOAT;
3157 mode = MIN_MODE_VECTOR_INT;
3159 /* Get the mode which has this inner mode and number of units. */
3160 for (; mode != VOIDmode; mode = GET_MODE_WIDER_MODE (mode))
3161 if (GET_MODE_NUNITS (mode) == TYPE_VECTOR_SUBPARTS (type)
3162 && GET_MODE_INNER (mode) == innermode)
3172 /* We want to pass a value in REGNO whose "natural" mode is MODE. However,
3173 this may not agree with the mode that the type system has chosen for the
3174 register, which is ORIG_MODE. If ORIG_MODE is not BLKmode, then we can
3175 go ahead and use it. Otherwise we have to build a PARALLEL instead. */
3178 gen_reg_or_parallel (enum machine_mode mode, enum machine_mode orig_mode,
3183 if (orig_mode != BLKmode)
3184 tmp = gen_rtx_REG (orig_mode, regno);
3187 tmp = gen_rtx_REG (mode, regno);
3188 tmp = gen_rtx_EXPR_LIST (VOIDmode, tmp, const0_rtx);
3189 tmp = gen_rtx_PARALLEL (orig_mode, gen_rtvec (1, tmp));
3195 /* x86-64 register passing implementation. See x86-64 ABI for details. Goal
3196 of this code is to classify each 8bytes of incoming argument by the register
3197 class and assign registers accordingly. */
3199 /* Return the union class of CLASS1 and CLASS2.
3200 See the x86-64 PS ABI for details. */
3202 static enum x86_64_reg_class
3203 merge_classes (enum x86_64_reg_class class1, enum x86_64_reg_class class2)
3205 /* Rule #1: If both classes are equal, this is the resulting class. */
3206 if (class1 == class2)
3209 /* Rule #2: If one of the classes is NO_CLASS, the resulting class is
3211 if (class1 == X86_64_NO_CLASS)
3213 if (class2 == X86_64_NO_CLASS)
3216 /* Rule #3: If one of the classes is MEMORY, the result is MEMORY. */
3217 if (class1 == X86_64_MEMORY_CLASS || class2 == X86_64_MEMORY_CLASS)
3218 return X86_64_MEMORY_CLASS;
3220 /* Rule #4: If one of the classes is INTEGER, the result is INTEGER. */
3221 if ((class1 == X86_64_INTEGERSI_CLASS && class2 == X86_64_SSESF_CLASS)
3222 || (class2 == X86_64_INTEGERSI_CLASS && class1 == X86_64_SSESF_CLASS))
3223 return X86_64_INTEGERSI_CLASS;
3224 if (class1 == X86_64_INTEGER_CLASS || class1 == X86_64_INTEGERSI_CLASS
3225 || class2 == X86_64_INTEGER_CLASS || class2 == X86_64_INTEGERSI_CLASS)
3226 return X86_64_INTEGER_CLASS;
3228 /* Rule #5: If one of the classes is X87, X87UP, or COMPLEX_X87 class,
3230 if (class1 == X86_64_X87_CLASS
3231 || class1 == X86_64_X87UP_CLASS
3232 || class1 == X86_64_COMPLEX_X87_CLASS
3233 || class2 == X86_64_X87_CLASS
3234 || class2 == X86_64_X87UP_CLASS
3235 || class2 == X86_64_COMPLEX_X87_CLASS)
3236 return X86_64_MEMORY_CLASS;
3238 /* Rule #6: Otherwise class SSE is used. */
3239 return X86_64_SSE_CLASS;
3242 /* Classify the argument of type TYPE and mode MODE.
3243 CLASSES will be filled by the register class used to pass each word
3244 of the operand. The number of words is returned. In case the parameter
3245 should be passed in memory, 0 is returned. As a special case for zero
3246 sized containers, classes[0] will be NO_CLASS and 1 is returned.
3248 BIT_OFFSET is used internally for handling records and specifies offset
3249 of the offset in bits modulo 256 to avoid overflow cases.
3251 See the x86-64 PS ABI for details.
3255 classify_argument (enum machine_mode mode, tree type,
3256 enum x86_64_reg_class classes[MAX_CLASSES], int bit_offset)
3258 HOST_WIDE_INT bytes =
3259 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
3260 int words = (bytes + (bit_offset % 64) / 8 + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
3262 /* Variable sized entities are always passed/returned in memory. */
3266 if (mode != VOIDmode
3267 && targetm.calls.must_pass_in_stack (mode, type))
3270 if (type && AGGREGATE_TYPE_P (type))
3274 enum x86_64_reg_class subclasses[MAX_CLASSES];
3276 /* On x86-64 we pass structures larger than 16 bytes on the stack. */
3280 for (i = 0; i < words; i++)
3281 classes[i] = X86_64_NO_CLASS;
3283 /* Zero sized arrays or structures are NO_CLASS. We return 0 to
3284 signalize memory class, so handle it as special case. */
3287 classes[0] = X86_64_NO_CLASS;
3291 /* Classify each field of record and merge classes. */
3292 switch (TREE_CODE (type))
3295 /* For classes first merge in the field of the subclasses. */
3296 if (TYPE_BINFO (type))
3298 tree binfo, base_binfo;
3301 for (binfo = TYPE_BINFO (type), basenum = 0;
3302 BINFO_BASE_ITERATE (binfo, basenum, base_binfo); basenum++)
3305 int offset = tree_low_cst (BINFO_OFFSET (base_binfo), 0) * 8;
3306 tree type = BINFO_TYPE (base_binfo);
3308 num = classify_argument (TYPE_MODE (type),
3310 (offset + bit_offset) % 256);
3313 for (i = 0; i < num; i++)
3315 int pos = (offset + (bit_offset % 64)) / 8 / 8;
3317 merge_classes (subclasses[i], classes[i + pos]);
3321 /* And now merge the fields of structure. */
3322 for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
3324 if (TREE_CODE (field) == FIELD_DECL)
3328 if (TREE_TYPE (field) == error_mark_node)
3331 /* Bitfields are always classified as integer. Handle them
3332 early, since later code would consider them to be
3333 misaligned integers. */
3334 if (DECL_BIT_FIELD (field))
3336 for (i = (int_bit_position (field) + (bit_offset % 64)) / 8 / 8;
3337 i < ((int_bit_position (field) + (bit_offset % 64))
3338 + tree_low_cst (DECL_SIZE (field), 0)
3341 merge_classes (X86_64_INTEGER_CLASS,
3346 num = classify_argument (TYPE_MODE (TREE_TYPE (field)),
3347 TREE_TYPE (field), subclasses,
3348 (int_bit_position (field)
3349 + bit_offset) % 256);
3352 for (i = 0; i < num; i++)
3355 (int_bit_position (field) + (bit_offset % 64)) / 8 / 8;
3357 merge_classes (subclasses[i], classes[i + pos]);
3365 /* Arrays are handled as small records. */
3368 num = classify_argument (TYPE_MODE (TREE_TYPE (type)),
3369 TREE_TYPE (type), subclasses, bit_offset);
3373 /* The partial classes are now full classes. */
3374 if (subclasses[0] == X86_64_SSESF_CLASS && bytes != 4)
3375 subclasses[0] = X86_64_SSE_CLASS;
3376 if (subclasses[0] == X86_64_INTEGERSI_CLASS && bytes != 4)
3377 subclasses[0] = X86_64_INTEGER_CLASS;
3379 for (i = 0; i < words; i++)
3380 classes[i] = subclasses[i % num];
3385 case QUAL_UNION_TYPE:
3386 /* Unions are similar to RECORD_TYPE but offset is always 0.
3389 /* Unions are not derived. */
3390 gcc_assert (!TYPE_BINFO (type)
3391 || !BINFO_N_BASE_BINFOS (TYPE_BINFO (type)));
3392 for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
3394 if (TREE_CODE (field) == FIELD_DECL)
3398 if (TREE_TYPE (field) == error_mark_node)
3401 num = classify_argument (TYPE_MODE (TREE_TYPE (field)),
3402 TREE_TYPE (field), subclasses,
3406 for (i = 0; i < num; i++)
3407 classes[i] = merge_classes (subclasses[i], classes[i]);
3416 /* Final merger cleanup. */
3417 for (i = 0; i < words; i++)
3419 /* If one class is MEMORY, everything should be passed in
3421 if (classes[i] == X86_64_MEMORY_CLASS)
3424 /* The X86_64_SSEUP_CLASS should be always preceded by
3425 X86_64_SSE_CLASS. */
3426 if (classes[i] == X86_64_SSEUP_CLASS
3427 && (i == 0 || classes[i - 1] != X86_64_SSE_CLASS))
3428 classes[i] = X86_64_SSE_CLASS;
3430 /* X86_64_X87UP_CLASS should be preceded by X86_64_X87_CLASS. */
3431 if (classes[i] == X86_64_X87UP_CLASS
3432 && (i == 0 || classes[i - 1] != X86_64_X87_CLASS))
3433 classes[i] = X86_64_SSE_CLASS;
3438 /* Compute alignment needed. We align all types to natural boundaries with
3439 exception of XFmode that is aligned to 64bits. */
3440 if (mode != VOIDmode && mode != BLKmode)
3442 int mode_alignment = GET_MODE_BITSIZE (mode);
3445 mode_alignment = 128;
3446 else if (mode == XCmode)
3447 mode_alignment = 256;
3448 if (COMPLEX_MODE_P (mode))
3449 mode_alignment /= 2;
3450 /* Misaligned fields are always returned in memory. */
3451 if (bit_offset % mode_alignment)
3455 /* for V1xx modes, just use the base mode */
3456 if (VECTOR_MODE_P (mode)
3457 && GET_MODE_SIZE (GET_MODE_INNER (mode)) == bytes)
3458 mode = GET_MODE_INNER (mode);
3460 /* Classification of atomic types. */
3465 classes[0] = X86_64_SSE_CLASS;
3468 classes[0] = X86_64_SSE_CLASS;
3469 classes[1] = X86_64_SSEUP_CLASS;
3478 if (bit_offset + GET_MODE_BITSIZE (mode) <= 32)
3479 classes[0] = X86_64_INTEGERSI_CLASS;
3481 classes[0] = X86_64_INTEGER_CLASS;
3485 classes[0] = classes[1] = X86_64_INTEGER_CLASS;
3490 if (!(bit_offset % 64))
3491 classes[0] = X86_64_SSESF_CLASS;
3493 classes[0] = X86_64_SSE_CLASS;
3496 classes[0] = X86_64_SSEDF_CLASS;
3499 classes[0] = X86_64_X87_CLASS;
3500 classes[1] = X86_64_X87UP_CLASS;
3503 classes[0] = X86_64_SSE_CLASS;
3504 classes[1] = X86_64_SSEUP_CLASS;
3507 classes[0] = X86_64_SSE_CLASS;
3510 classes[0] = X86_64_SSEDF_CLASS;
3511 classes[1] = X86_64_SSEDF_CLASS;
3514 classes[0] = X86_64_COMPLEX_X87_CLASS;
3517 /* This modes is larger than 16 bytes. */
3525 classes[0] = X86_64_SSE_CLASS;
3526 classes[1] = X86_64_SSEUP_CLASS;
3532 classes[0] = X86_64_SSE_CLASS;
3538 gcc_assert (VECTOR_MODE_P (mode));
3543 gcc_assert (GET_MODE_CLASS (GET_MODE_INNER (mode)) == MODE_INT);
3545 if (bit_offset + GET_MODE_BITSIZE (mode) <= 32)
3546 classes[0] = X86_64_INTEGERSI_CLASS;
3548 classes[0] = X86_64_INTEGER_CLASS;
3549 classes[1] = X86_64_INTEGER_CLASS;
3550 return 1 + (bytes > 8);
3554 /* Examine the argument and return set number of register required in each
3555 class. Return 0 iff parameter should be passed in memory. */
3557 examine_argument (enum machine_mode mode, tree type, int in_return,
3558 int *int_nregs, int *sse_nregs)
3560 enum x86_64_reg_class class[MAX_CLASSES];
3561 int n = classify_argument (mode, type, class, 0);
3567 for (n--; n >= 0; n--)
3570 case X86_64_INTEGER_CLASS:
3571 case X86_64_INTEGERSI_CLASS:
3574 case X86_64_SSE_CLASS:
3575 case X86_64_SSESF_CLASS:
3576 case X86_64_SSEDF_CLASS:
3579 case X86_64_NO_CLASS:
3580 case X86_64_SSEUP_CLASS:
3582 case X86_64_X87_CLASS:
3583 case X86_64_X87UP_CLASS:
3587 case X86_64_COMPLEX_X87_CLASS:
3588 return in_return ? 2 : 0;
3589 case X86_64_MEMORY_CLASS:
3595 /* Construct container for the argument used by GCC interface. See
3596 FUNCTION_ARG for the detailed description. */
3599 construct_container (enum machine_mode mode, enum machine_mode orig_mode,
3600 tree type, int in_return, int nintregs, int nsseregs,
3601 const int *intreg, int sse_regno)
3603 /* The following variables hold the static issued_error state. */
3604 static bool issued_sse_arg_error;
3605 static bool issued_sse_ret_error;
3606 static bool issued_x87_ret_error;
3608 enum machine_mode tmpmode;
3610 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
3611 enum x86_64_reg_class class[MAX_CLASSES];
3615 int needed_sseregs, needed_intregs;
3616 rtx exp[MAX_CLASSES];
3619 n = classify_argument (mode, type, class, 0);
3620 if (TARGET_DEBUG_ARG)
3623 fprintf (stderr, "Memory class\n");
3626 fprintf (stderr, "Classes:");
3627 for (i = 0; i < n; i++)
3629 fprintf (stderr, " %s", x86_64_reg_class_name[class[i]]);
3631 fprintf (stderr, "\n");
3636 if (!examine_argument (mode, type, in_return, &needed_intregs,
3639 if (needed_intregs > nintregs || needed_sseregs > nsseregs)
3642 /* We allowed the user to turn off SSE for kernel mode. Don't crash if
3643 some less clueful developer tries to use floating-point anyway. */
3644 if (needed_sseregs && !TARGET_SSE)
3648 if (!issued_sse_ret_error)
3650 error ("SSE register return with SSE disabled");
3651 issued_sse_ret_error = true;
3654 else if (!issued_sse_arg_error)
3656 error ("SSE register argument with SSE disabled");
3657 issued_sse_arg_error = true;
3662 /* Likewise, error if the ABI requires us to return values in the
3663 x87 registers and the user specified -mno-80387. */
3664 if (!TARGET_80387 && in_return)
3665 for (i = 0; i < n; i++)
3666 if (class[i] == X86_64_X87_CLASS
3667 || class[i] == X86_64_X87UP_CLASS
3668 || class[i] == X86_64_COMPLEX_X87_CLASS)
3670 if (!issued_x87_ret_error)
3672 error ("x87 register return with x87 disabled");
3673 issued_x87_ret_error = true;
3678 /* First construct simple cases. Avoid SCmode, since we want to use
3679 single register to pass this type. */
3680 if (n == 1 && mode != SCmode)
3683 case X86_64_INTEGER_CLASS:
3684 case X86_64_INTEGERSI_CLASS:
3685 return gen_rtx_REG (mode, intreg[0]);
3686 case X86_64_SSE_CLASS:
3687 case X86_64_SSESF_CLASS:
3688 case X86_64_SSEDF_CLASS:
3689 return gen_reg_or_parallel (mode, orig_mode, SSE_REGNO (sse_regno));
3690 case X86_64_X87_CLASS:
3691 case X86_64_COMPLEX_X87_CLASS:
3692 return gen_rtx_REG (mode, FIRST_STACK_REG);
3693 case X86_64_NO_CLASS:
3694 /* Zero sized array, struct or class. */
3699 if (n == 2 && class[0] == X86_64_SSE_CLASS && class[1] == X86_64_SSEUP_CLASS
3701 return gen_rtx_REG (mode, SSE_REGNO (sse_regno));
3703 && class[0] == X86_64_X87_CLASS && class[1] == X86_64_X87UP_CLASS)
3704 return gen_rtx_REG (XFmode, FIRST_STACK_REG);
3705 if (n == 2 && class[0] == X86_64_INTEGER_CLASS
3706 && class[1] == X86_64_INTEGER_CLASS
3707 && (mode == CDImode || mode == TImode || mode == TFmode)
3708 && intreg[0] + 1 == intreg[1])
3709 return gen_rtx_REG (mode, intreg[0]);
3711 /* Otherwise figure out the entries of the PARALLEL. */
3712 for (i = 0; i < n; i++)
3716 case X86_64_NO_CLASS:
3718 case X86_64_INTEGER_CLASS:
3719 case X86_64_INTEGERSI_CLASS:
3720 /* Merge TImodes on aligned occasions here too. */
3721 if (i * 8 + 8 > bytes)
3722 tmpmode = mode_for_size ((bytes - i * 8) * BITS_PER_UNIT, MODE_INT, 0);
3723 else if (class[i] == X86_64_INTEGERSI_CLASS)
3727 /* We've requested 24 bytes we don't have mode for. Use DImode. */
3728 if (tmpmode == BLKmode)
3730 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
3731 gen_rtx_REG (tmpmode, *intreg),
3735 case X86_64_SSESF_CLASS:
3736 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
3737 gen_rtx_REG (SFmode,
3738 SSE_REGNO (sse_regno)),
3742 case X86_64_SSEDF_CLASS:
3743 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
3744 gen_rtx_REG (DFmode,
3745 SSE_REGNO (sse_regno)),
3749 case X86_64_SSE_CLASS:
3750 if (i < n - 1 && class[i + 1] == X86_64_SSEUP_CLASS)
3754 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
3755 gen_rtx_REG (tmpmode,
3756 SSE_REGNO (sse_regno)),
3758 if (tmpmode == TImode)
3767 /* Empty aligned struct, union or class. */
3771 ret = gen_rtx_PARALLEL (mode, rtvec_alloc (nexps));
3772 for (i = 0; i < nexps; i++)
3773 XVECEXP (ret, 0, i) = exp [i];
3777 /* Update the data in CUM to advance over an argument
3778 of mode MODE and data type TYPE.
3779 (TYPE is null for libcalls where that information may not be available.) */
3782 function_arg_advance (CUMULATIVE_ARGS *cum, enum machine_mode mode,
3783 tree type, int named)
3786 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
3787 int words = (bytes + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
3790 mode = type_natural_mode (type);
3792 if (TARGET_DEBUG_ARG)
3793 fprintf (stderr, "function_adv (sz=%d, wds=%2d, nregs=%d, ssenregs=%d, "
3794 "mode=%s, named=%d)\n\n",
3795 words, cum->words, cum->nregs, cum->sse_nregs,
3796 GET_MODE_NAME (mode), named);
3800 int int_nregs, sse_nregs;
3801 if (!examine_argument (mode, type, 0, &int_nregs, &sse_nregs))
3802 cum->words += words;
3803 else if (sse_nregs <= cum->sse_nregs && int_nregs <= cum->nregs)
3805 cum->nregs -= int_nregs;
3806 cum->sse_nregs -= sse_nregs;
3807 cum->regno += int_nregs;
3808 cum->sse_regno += sse_nregs;
3811 cum->words += words;
3829 cum->words += words;
3830 cum->nregs -= words;
3831 cum->regno += words;
3833 if (cum->nregs <= 0)
3841 if (cum->float_in_sse < 2)
3844 if (cum->float_in_sse < 1)
3855 if (!type || !AGGREGATE_TYPE_P (type))
3857 cum->sse_words += words;
3858 cum->sse_nregs -= 1;
3859 cum->sse_regno += 1;
3860 if (cum->sse_nregs <= 0)
3872 if (!type || !AGGREGATE_TYPE_P (type))
3874 cum->mmx_words += words;
3875 cum->mmx_nregs -= 1;
3876 cum->mmx_regno += 1;
3877 if (cum->mmx_nregs <= 0)
3888 /* Define where to put the arguments to a function.
3889 Value is zero to push the argument on the stack,
3890 or a hard register in which to store the argument.
3892 MODE is the argument's machine mode.
3893 TYPE is the data type of the argument (as a tree).
3894 This is null for libcalls where that information may
3896 CUM is a variable of type CUMULATIVE_ARGS which gives info about
3897 the preceding args and about the function being called.
3898 NAMED is nonzero if this argument is a named parameter
3899 (otherwise it is an extra parameter matching an ellipsis). */
3902 function_arg (CUMULATIVE_ARGS *cum, enum machine_mode orig_mode,
3903 tree type, int named)
3905 enum machine_mode mode = orig_mode;
3908 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
3909 int words = (bytes + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
3910 static bool warnedsse, warnedmmx;
3912 /* To simplify the code below, represent vector types with a vector mode
3913 even if MMX/SSE are not active. */
3914 if (type && TREE_CODE (type) == VECTOR_TYPE)
3915 mode = type_natural_mode (type);
3917 /* Handle a hidden AL argument containing number of registers for varargs
3918 x86-64 functions. For i386 ABI just return constm1_rtx to avoid
3920 if (mode == VOIDmode)
3923 return GEN_INT (cum->maybe_vaarg
3924 ? (cum->sse_nregs < 0
3932 ret = construct_container (mode, orig_mode, type, 0, cum->nregs,
3934 &x86_64_int_parameter_registers [cum->regno],
3939 /* For now, pass fp/complex values on the stack. */
3951 if (words <= cum->nregs)
3953 int regno = cum->regno;
3955 /* Fastcall allocates the first two DWORD (SImode) or
3956 smaller arguments to ECX and EDX. */
3959 if (mode == BLKmode || mode == DImode)
3962 /* ECX not EAX is the first allocated register. */
3966 ret = gen_rtx_REG (mode, regno);
3970 if (cum->float_in_sse < 2)
3973 if (cum->float_in_sse < 1)
3983 if (!type || !AGGREGATE_TYPE_P (type))
3985 if (!TARGET_SSE && !warnedsse && cum->warn_sse)
3988 warning (0, "SSE vector argument without SSE enabled "
3992 ret = gen_reg_or_parallel (mode, orig_mode,
3993 cum->sse_regno + FIRST_SSE_REG);
4000 if (!type || !AGGREGATE_TYPE_P (type))
4002 if (!TARGET_MMX && !warnedmmx && cum->warn_mmx)
4005 warning (0, "MMX vector argument without MMX enabled "
4009 ret = gen_reg_or_parallel (mode, orig_mode,
4010 cum->mmx_regno + FIRST_MMX_REG);
4015 if (TARGET_DEBUG_ARG)
4018 "function_arg (size=%d, wds=%2d, nregs=%d, mode=%4s, named=%d, ",
4019 words, cum->words, cum->nregs, GET_MODE_NAME (mode), named);
4022 print_simple_rtl (stderr, ret);
4024 fprintf (stderr, ", stack");
4026 fprintf (stderr, " )\n");
4032 /* A C expression that indicates when an argument must be passed by
4033 reference. If nonzero for an argument, a copy of that argument is
4034 made in memory and a pointer to the argument is passed instead of
4035 the argument itself. The pointer is passed in whatever way is
4036 appropriate for passing a pointer to that type. */
4039 ix86_pass_by_reference (CUMULATIVE_ARGS *cum ATTRIBUTE_UNUSED,
4040 enum machine_mode mode ATTRIBUTE_UNUSED,
4041 tree type, bool named ATTRIBUTE_UNUSED)
4046 if (type && int_size_in_bytes (type) == -1)
4048 if (TARGET_DEBUG_ARG)
4049 fprintf (stderr, "function_arg_pass_by_reference\n");
4056 /* Return true when TYPE should be 128bit aligned for 32bit argument passing
4057 ABI. Only called if TARGET_SSE. */
4059 contains_128bit_aligned_vector_p (tree type)
4061 enum machine_mode mode = TYPE_MODE (type);
4062 if (SSE_REG_MODE_P (mode)
4063 && (!TYPE_USER_ALIGN (type) || TYPE_ALIGN (type) > 128))
4065 if (TYPE_ALIGN (type) < 128)
4068 if (AGGREGATE_TYPE_P (type))
4070 /* Walk the aggregates recursively. */
4071 switch (TREE_CODE (type))
4075 case QUAL_UNION_TYPE:
4079 if (TYPE_BINFO (type))
4081 tree binfo, base_binfo;
4084 for (binfo = TYPE_BINFO (type), i = 0;
4085 BINFO_BASE_ITERATE (binfo, i, base_binfo); i++)
4086 if (contains_128bit_aligned_vector_p
4087 (BINFO_TYPE (base_binfo)))
4090 /* And now merge the fields of structure. */
4091 for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
4093 if (TREE_CODE (field) == FIELD_DECL
4094 && contains_128bit_aligned_vector_p (TREE_TYPE (field)))
4101 /* Just for use if some languages passes arrays by value. */
4102 if (contains_128bit_aligned_vector_p (TREE_TYPE (type)))
4113 /* Gives the alignment boundary, in bits, of an argument with the
4114 specified mode and type. */
4117 ix86_function_arg_boundary (enum machine_mode mode, tree type)
4121 align = TYPE_ALIGN (type);
4123 align = GET_MODE_ALIGNMENT (mode);
4124 if (align < PARM_BOUNDARY)
4125 align = PARM_BOUNDARY;
4128 /* i386 ABI defines all arguments to be 4 byte aligned. We have to
4129 make an exception for SSE modes since these require 128bit
4132 The handling here differs from field_alignment. ICC aligns MMX
4133 arguments to 4 byte boundaries, while structure fields are aligned
4134 to 8 byte boundaries. */
4136 align = PARM_BOUNDARY;
4139 if (!SSE_REG_MODE_P (mode))
4140 align = PARM_BOUNDARY;
4144 if (!contains_128bit_aligned_vector_p (type))
4145 align = PARM_BOUNDARY;
4153 /* Return true if N is a possible register number of function value. */
4155 ix86_function_value_regno_p (int regno)
4161 return ((regno) == 0
4162 || ((regno) == FIRST_FLOAT_REG && TARGET_FLOAT_RETURNS_IN_80387)
4163 || ((regno) == FIRST_SSE_REG && TARGET_SSE));
4165 return ((regno) == 0 || (regno) == FIRST_FLOAT_REG
4166 || ((regno) == FIRST_SSE_REG && TARGET_SSE)
4167 || ((regno) == FIRST_FLOAT_REG && TARGET_FLOAT_RETURNS_IN_80387));
4172 || (regno == FIRST_FLOAT_REG && TARGET_FLOAT_RETURNS_IN_80387)
4173 || (regno == FIRST_SSE_REG && TARGET_SSE))
4177 && (regno == FIRST_MMX_REG && TARGET_MMX))
4184 /* Define how to find the value returned by a function.
4185 VALTYPE is the data type of the value (as a tree).
4186 If the precise function being called is known, FUNC is its FUNCTION_DECL;
4187 otherwise, FUNC is 0. */
4189 ix86_function_value (tree valtype, tree fntype_or_decl,
4190 bool outgoing ATTRIBUTE_UNUSED)
4192 enum machine_mode natmode = type_natural_mode (valtype);
4196 rtx ret = construct_container (natmode, TYPE_MODE (valtype), valtype,
4197 1, REGPARM_MAX, SSE_REGPARM_MAX,
4198 x86_64_int_return_registers, 0);
4199 /* For zero sized structures, construct_container return NULL, but we
4200 need to keep rest of compiler happy by returning meaningful value. */
4202 ret = gen_rtx_REG (TYPE_MODE (valtype), 0);
4207 tree fn = NULL_TREE, fntype;
4209 && DECL_P (fntype_or_decl))
4210 fn = fntype_or_decl;
4211 fntype = fn ? TREE_TYPE (fn) : fntype_or_decl;
4212 return gen_rtx_REG (TYPE_MODE (valtype),
4213 ix86_value_regno (natmode, fn, fntype));
4217 /* Return true iff type is returned in memory. */
4219 ix86_return_in_memory (tree type)
4221 int needed_intregs, needed_sseregs, size;
4222 enum machine_mode mode = type_natural_mode (type);
4225 return !examine_argument (mode, type, 1, &needed_intregs, &needed_sseregs);
4227 if (mode == BLKmode)
4230 size = int_size_in_bytes (type);
4232 if (MS_AGGREGATE_RETURN && AGGREGATE_TYPE_P (type) && size <= 8)
4235 if (VECTOR_MODE_P (mode) || mode == TImode)
4237 /* User-created vectors small enough to fit in EAX. */
4241 /* MMX/3dNow values are returned in MM0,
4242 except when it doesn't exits. */
4244 return (TARGET_MMX ? 0 : 1);
4246 /* SSE values are returned in XMM0, except when it doesn't exist. */
4248 return (TARGET_SSE ? 0 : 1);
4262 /* When returning SSE vector types, we have a choice of either
4263 (1) being abi incompatible with a -march switch, or
4264 (2) generating an error.
4265 Given no good solution, I think the safest thing is one warning.
4266 The user won't be able to use -Werror, but....
4268 Choose the STRUCT_VALUE_RTX hook because that's (at present) only
4269 called in response to actually generating a caller or callee that
4270 uses such a type. As opposed to RETURN_IN_MEMORY, which is called
4271 via aggregate_value_p for general type probing from tree-ssa. */
4274 ix86_struct_value_rtx (tree type, int incoming ATTRIBUTE_UNUSED)
4276 static bool warnedsse, warnedmmx;
4280 /* Look at the return type of the function, not the function type. */
4281 enum machine_mode mode = TYPE_MODE (TREE_TYPE (type));
4283 if (!TARGET_SSE && !warnedsse)
4286 || (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 16))
4289 warning (0, "SSE vector return without SSE enabled "
4294 if (!TARGET_MMX && !warnedmmx)
4296 if (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 8)
4299 warning (0, "MMX vector return without MMX enabled "
4308 /* Define how to find the value returned by a library function
4309 assuming the value has mode MODE. */
4311 ix86_libcall_value (enum machine_mode mode)
4325 return gen_rtx_REG (mode, FIRST_SSE_REG);
4328 return gen_rtx_REG (mode, FIRST_FLOAT_REG);
4332 return gen_rtx_REG (mode, 0);
4336 return gen_rtx_REG (mode, ix86_value_regno (mode, NULL, NULL));
4339 /* Given a mode, return the register to use for a return value. */
4342 ix86_value_regno (enum machine_mode mode, tree func, tree fntype)
4344 gcc_assert (!TARGET_64BIT);
4346 /* 8-byte vector modes in %mm0. See ix86_return_in_memory for where
4347 we normally prevent this case when mmx is not available. However
4348 some ABIs may require the result to be returned like DImode. */
4349 if (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 8)
4350 return TARGET_MMX ? FIRST_MMX_REG : 0;
4352 /* 16-byte vector modes in %xmm0. See ix86_return_in_memory for where
4353 we prevent this case when sse is not available. However some ABIs
4354 may require the result to be returned like integer TImode. */
4355 if (mode == TImode || (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 16))
4356 return TARGET_SSE ? FIRST_SSE_REG : 0;
4358 /* Decimal floating point values can go in %eax, unlike other float modes. */
4359 if (DECIMAL_FLOAT_MODE_P (mode))
4362 /* Most things go in %eax, except (unless -mno-fp-ret-in-387) fp values. */
4363 if (!SCALAR_FLOAT_MODE_P (mode) || !TARGET_FLOAT_RETURNS_IN_80387)
4366 /* Floating point return values in %st(0), except for local functions when
4367 SSE math is enabled or for functions with sseregparm attribute. */
4368 if ((func || fntype)
4369 && (mode == SFmode || mode == DFmode))
4371 int sse_level = ix86_function_sseregparm (fntype, func);
4372 if ((sse_level >= 1 && mode == SFmode)
4373 || (sse_level == 2 && mode == DFmode))
4374 return FIRST_SSE_REG;
4377 return FIRST_FLOAT_REG;
4380 /* Create the va_list data type. */
4383 ix86_build_builtin_va_list (void)
4385 tree f_gpr, f_fpr, f_ovf, f_sav, record, type_decl;
4387 /* For i386 we use plain pointer to argument area. */
4389 return build_pointer_type (char_type_node);
4391 record = (*lang_hooks.types.make_type) (RECORD_TYPE);
4392 type_decl = build_decl (TYPE_DECL, get_identifier ("__va_list_tag"), record);
4394 f_gpr = build_decl (FIELD_DECL, get_identifier ("gp_offset"),
4395 unsigned_type_node);
4396 f_fpr = build_decl (FIELD_DECL, get_identifier ("fp_offset"),
4397 unsigned_type_node);
4398 f_ovf = build_decl (FIELD_DECL, get_identifier ("overflow_arg_area"),
4400 f_sav = build_decl (FIELD_DECL, get_identifier ("reg_save_area"),
4403 va_list_gpr_counter_field = f_gpr;
4404 va_list_fpr_counter_field = f_fpr;
4406 DECL_FIELD_CONTEXT (f_gpr) = record;
4407 DECL_FIELD_CONTEXT (f_fpr) = record;
4408 DECL_FIELD_CONTEXT (f_ovf) = record;
4409 DECL_FIELD_CONTEXT (f_sav) = record;
4411 TREE_CHAIN (record) = type_decl;
4412 TYPE_NAME (record) = type_decl;
4413 TYPE_FIELDS (record) = f_gpr;
4414 TREE_CHAIN (f_gpr) = f_fpr;
4415 TREE_CHAIN (f_fpr) = f_ovf;
4416 TREE_CHAIN (f_ovf) = f_sav;
4418 layout_type (record);
4420 /* The correct type is an array type of one element. */
4421 return build_array_type (record, build_index_type (size_zero_node));
4424 /* Worker function for TARGET_SETUP_INCOMING_VARARGS. */
4427 ix86_setup_incoming_varargs (CUMULATIVE_ARGS *cum, enum machine_mode mode,
4428 tree type, int *pretend_size ATTRIBUTE_UNUSED,
4431 CUMULATIVE_ARGS next_cum;
4432 rtx save_area = NULL_RTX, mem;
4445 if (! cfun->va_list_gpr_size && ! cfun->va_list_fpr_size)
4448 /* Indicate to allocate space on the stack for varargs save area. */
4449 ix86_save_varrargs_registers = 1;
4451 cfun->stack_alignment_needed = 128;
4453 fntype = TREE_TYPE (current_function_decl);
4454 stdarg_p = (TYPE_ARG_TYPES (fntype) != 0
4455 && (TREE_VALUE (tree_last (TYPE_ARG_TYPES (fntype)))
4456 != void_type_node));
4458 /* For varargs, we do not want to skip the dummy va_dcl argument.
4459 For stdargs, we do want to skip the last named argument. */
4462 function_arg_advance (&next_cum, mode, type, 1);
4465 save_area = frame_pointer_rtx;
4467 set = get_varargs_alias_set ();
4469 for (i = next_cum.regno;
4471 && i < next_cum.regno + cfun->va_list_gpr_size / UNITS_PER_WORD;
4474 mem = gen_rtx_MEM (Pmode,
4475 plus_constant (save_area, i * UNITS_PER_WORD));
4476 MEM_NOTRAP_P (mem) = 1;
4477 set_mem_alias_set (mem, set);
4478 emit_move_insn (mem, gen_rtx_REG (Pmode,
4479 x86_64_int_parameter_registers[i]));
4482 if (next_cum.sse_nregs && cfun->va_list_fpr_size)
4484 /* Now emit code to save SSE registers. The AX parameter contains number
4485 of SSE parameter registers used to call this function. We use
4486 sse_prologue_save insn template that produces computed jump across
4487 SSE saves. We need some preparation work to get this working. */
4489 label = gen_label_rtx ();
4490 label_ref = gen_rtx_LABEL_REF (Pmode, label);
4492 /* Compute address to jump to :
4493 label - 5*eax + nnamed_sse_arguments*5 */
4494 tmp_reg = gen_reg_rtx (Pmode);
4495 nsse_reg = gen_reg_rtx (Pmode);
4496 emit_insn (gen_zero_extendqidi2 (nsse_reg, gen_rtx_REG (QImode, 0)));
4497 emit_insn (gen_rtx_SET (VOIDmode, tmp_reg,
4498 gen_rtx_MULT (Pmode, nsse_reg,
4500 if (next_cum.sse_regno)
4503 gen_rtx_CONST (DImode,
4504 gen_rtx_PLUS (DImode,
4506 GEN_INT (next_cum.sse_regno * 4))));
4508 emit_move_insn (nsse_reg, label_ref);
4509 emit_insn (gen_subdi3 (nsse_reg, nsse_reg, tmp_reg));
4511 /* Compute address of memory block we save into. We always use pointer
4512 pointing 127 bytes after first byte to store - this is needed to keep
4513 instruction size limited by 4 bytes. */
4514 tmp_reg = gen_reg_rtx (Pmode);
4515 emit_insn (gen_rtx_SET (VOIDmode, tmp_reg,
4516 plus_constant (save_area,
4517 8 * REGPARM_MAX + 127)));
4518 mem = gen_rtx_MEM (BLKmode, plus_constant (tmp_reg, -127));
4519 MEM_NOTRAP_P (mem) = 1;
4520 set_mem_alias_set (mem, set);
4521 set_mem_align (mem, BITS_PER_WORD);
4523 /* And finally do the dirty job! */
4524 emit_insn (gen_sse_prologue_save (mem, nsse_reg,
4525 GEN_INT (next_cum.sse_regno), label));
4530 /* Implement va_start. */
4533 ix86_va_start (tree valist, rtx nextarg)
4535 HOST_WIDE_INT words, n_gpr, n_fpr;
4536 tree f_gpr, f_fpr, f_ovf, f_sav;
4537 tree gpr, fpr, ovf, sav, t;
4540 /* Only 64bit target needs something special. */
4543 std_expand_builtin_va_start (valist, nextarg);
4547 f_gpr = TYPE_FIELDS (TREE_TYPE (va_list_type_node));
4548 f_fpr = TREE_CHAIN (f_gpr);
4549 f_ovf = TREE_CHAIN (f_fpr);
4550 f_sav = TREE_CHAIN (f_ovf);
4552 valist = build1 (INDIRECT_REF, TREE_TYPE (TREE_TYPE (valist)), valist);
4553 gpr = build3 (COMPONENT_REF, TREE_TYPE (f_gpr), valist, f_gpr, NULL_TREE);
4554 fpr = build3 (COMPONENT_REF, TREE_TYPE (f_fpr), valist, f_fpr, NULL_TREE);
4555 ovf = build3 (COMPONENT_REF, TREE_TYPE (f_ovf), valist, f_ovf, NULL_TREE);
4556 sav = build3 (COMPONENT_REF, TREE_TYPE (f_sav), valist, f_sav, NULL_TREE);
4558 /* Count number of gp and fp argument registers used. */
4559 words = current_function_args_info.words;
4560 n_gpr = current_function_args_info.regno;
4561 n_fpr = current_function_args_info.sse_regno;
4563 if (TARGET_DEBUG_ARG)
4564 fprintf (stderr, "va_start: words = %d, n_gpr = %d, n_fpr = %d\n",
4565 (int) words, (int) n_gpr, (int) n_fpr);
4567 if (cfun->va_list_gpr_size)
4569 type = TREE_TYPE (gpr);
4570 t = build2 (MODIFY_EXPR, type, gpr,
4571 build_int_cst (type, n_gpr * 8));
4572 TREE_SIDE_EFFECTS (t) = 1;
4573 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
4576 if (cfun->va_list_fpr_size)
4578 type = TREE_TYPE (fpr);
4579 t = build2 (MODIFY_EXPR, type, fpr,
4580 build_int_cst (type, n_fpr * 16 + 8*REGPARM_MAX));
4581 TREE_SIDE_EFFECTS (t) = 1;
4582 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
4585 /* Find the overflow area. */
4586 type = TREE_TYPE (ovf);
4587 t = make_tree (type, virtual_incoming_args_rtx);
4589 t = build2 (PLUS_EXPR, type, t,
4590 build_int_cst (type, words * UNITS_PER_WORD));
4591 t = build2 (MODIFY_EXPR, type, ovf, t);
4592 TREE_SIDE_EFFECTS (t) = 1;
4593 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
4595 if (cfun->va_list_gpr_size || cfun->va_list_fpr_size)
4597 /* Find the register save area.
4598 Prologue of the function save it right above stack frame. */
4599 type = TREE_TYPE (sav);
4600 t = make_tree (type, frame_pointer_rtx);
4601 t = build2 (MODIFY_EXPR, type, sav, t);
4602 TREE_SIDE_EFFECTS (t) = 1;
4603 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
4607 /* Implement va_arg. */
4610 ix86_gimplify_va_arg (tree valist, tree type, tree *pre_p, tree *post_p)
4612 static const int intreg[6] = { 0, 1, 2, 3, 4, 5 };
4613 tree f_gpr, f_fpr, f_ovf, f_sav;
4614 tree gpr, fpr, ovf, sav, t;
4616 tree lab_false, lab_over = NULL_TREE;
4621 enum machine_mode nat_mode;
4623 /* Only 64bit target needs something special. */
4625 return std_gimplify_va_arg_expr (valist, type, pre_p, post_p);
4627 f_gpr = TYPE_FIELDS (TREE_TYPE (va_list_type_node));
4628 f_fpr = TREE_CHAIN (f_gpr);
4629 f_ovf = TREE_CHAIN (f_fpr);
4630 f_sav = TREE_CHAIN (f_ovf);
4632 valist = build_va_arg_indirect_ref (valist);
4633 gpr = build3 (COMPONENT_REF, TREE_TYPE (f_gpr), valist, f_gpr, NULL_TREE);
4634 fpr = build3 (COMPONENT_REF, TREE_TYPE (f_fpr), valist, f_fpr, NULL_TREE);
4635 ovf = build3 (COMPONENT_REF, TREE_TYPE (f_ovf), valist, f_ovf, NULL_TREE);
4636 sav = build3 (COMPONENT_REF, TREE_TYPE (f_sav), valist, f_sav, NULL_TREE);
4638 indirect_p = pass_by_reference (NULL, TYPE_MODE (type), type, false);
4640 type = build_pointer_type (type);
4641 size = int_size_in_bytes (type);
4642 rsize = (size + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
4644 nat_mode = type_natural_mode (type);
4645 container = construct_container (nat_mode, TYPE_MODE (type), type, 0,
4646 REGPARM_MAX, SSE_REGPARM_MAX, intreg, 0);
4648 /* Pull the value out of the saved registers. */
4650 addr = create_tmp_var (ptr_type_node, "addr");
4651 DECL_POINTER_ALIAS_SET (addr) = get_varargs_alias_set ();
4655 int needed_intregs, needed_sseregs;
4657 tree int_addr, sse_addr;
4659 lab_false = create_artificial_label ();
4660 lab_over = create_artificial_label ();
4662 examine_argument (nat_mode, type, 0, &needed_intregs, &needed_sseregs);
4664 need_temp = (!REG_P (container)
4665 && ((needed_intregs && TYPE_ALIGN (type) > 64)
4666 || TYPE_ALIGN (type) > 128));
4668 /* In case we are passing structure, verify that it is consecutive block
4669 on the register save area. If not we need to do moves. */
4670 if (!need_temp && !REG_P (container))
4672 /* Verify that all registers are strictly consecutive */
4673 if (SSE_REGNO_P (REGNO (XEXP (XVECEXP (container, 0, 0), 0))))
4677 for (i = 0; i < XVECLEN (container, 0) && !need_temp; i++)
4679 rtx slot = XVECEXP (container, 0, i);
4680 if (REGNO (XEXP (slot, 0)) != FIRST_SSE_REG + (unsigned int) i
4681 || INTVAL (XEXP (slot, 1)) != i * 16)
4689 for (i = 0; i < XVECLEN (container, 0) && !need_temp; i++)
4691 rtx slot = XVECEXP (container, 0, i);
4692 if (REGNO (XEXP (slot, 0)) != (unsigned int) i
4693 || INTVAL (XEXP (slot, 1)) != i * 8)
4705 int_addr = create_tmp_var (ptr_type_node, "int_addr");
4706 DECL_POINTER_ALIAS_SET (int_addr) = get_varargs_alias_set ();
4707 sse_addr = create_tmp_var (ptr_type_node, "sse_addr");
4708 DECL_POINTER_ALIAS_SET (sse_addr) = get_varargs_alias_set ();
4711 /* First ensure that we fit completely in registers. */
4714 t = build_int_cst (TREE_TYPE (gpr),
4715 (REGPARM_MAX - needed_intregs + 1) * 8);
4716 t = build2 (GE_EXPR, boolean_type_node, gpr, t);
4717 t2 = build1 (GOTO_EXPR, void_type_node, lab_false);
4718 t = build3 (COND_EXPR, void_type_node, t, t2, NULL_TREE);
4719 gimplify_and_add (t, pre_p);
4723 t = build_int_cst (TREE_TYPE (fpr),
4724 (SSE_REGPARM_MAX - needed_sseregs + 1) * 16
4726 t = build2 (GE_EXPR, boolean_type_node, fpr, t);
4727 t2 = build1 (GOTO_EXPR, void_type_node, lab_false);
4728 t = build3 (COND_EXPR, void_type_node, t, t2, NULL_TREE);
4729 gimplify_and_add (t, pre_p);
4732 /* Compute index to start of area used for integer regs. */
4735 /* int_addr = gpr + sav; */
4736 t = fold_convert (ptr_type_node, gpr);
4737 t = build2 (PLUS_EXPR, ptr_type_node, sav, t);
4738 t = build2 (MODIFY_EXPR, void_type_node, int_addr, t);
4739 gimplify_and_add (t, pre_p);
4743 /* sse_addr = fpr + sav; */
4744 t = fold_convert (ptr_type_node, fpr);
4745 t = build2 (PLUS_EXPR, ptr_type_node, sav, t);
4746 t = build2 (MODIFY_EXPR, void_type_node, sse_addr, t);
4747 gimplify_and_add (t, pre_p);
4752 tree temp = create_tmp_var (type, "va_arg_tmp");
4755 t = build1 (ADDR_EXPR, build_pointer_type (type), temp);
4756 t = build2 (MODIFY_EXPR, void_type_node, addr, t);
4757 gimplify_and_add (t, pre_p);
4759 for (i = 0; i < XVECLEN (container, 0); i++)
4761 rtx slot = XVECEXP (container, 0, i);
4762 rtx reg = XEXP (slot, 0);
4763 enum machine_mode mode = GET_MODE (reg);
4764 tree piece_type = lang_hooks.types.type_for_mode (mode, 1);
4765 tree addr_type = build_pointer_type (piece_type);
4768 tree dest_addr, dest;
4770 if (SSE_REGNO_P (REGNO (reg)))
4772 src_addr = sse_addr;
4773 src_offset = (REGNO (reg) - FIRST_SSE_REG) * 16;
4777 src_addr = int_addr;
4778 src_offset = REGNO (reg) * 8;
4780 src_addr = fold_convert (addr_type, src_addr);
4781 src_addr = fold (build2 (PLUS_EXPR, addr_type, src_addr,
4782 size_int (src_offset)));
4783 src = build_va_arg_indirect_ref (src_addr);
4785 dest_addr = fold_convert (addr_type, addr);
4786 dest_addr = fold (build2 (PLUS_EXPR, addr_type, dest_addr,
4787 size_int (INTVAL (XEXP (slot, 1)))));
4788 dest = build_va_arg_indirect_ref (dest_addr);
4790 t = build2 (MODIFY_EXPR, void_type_node, dest, src);
4791 gimplify_and_add (t, pre_p);
4797 t = build2 (PLUS_EXPR, TREE_TYPE (gpr), gpr,
4798 build_int_cst (TREE_TYPE (gpr), needed_intregs * 8));
4799 t = build2 (MODIFY_EXPR, TREE_TYPE (gpr), gpr, t);
4800 gimplify_and_add (t, pre_p);
4804 t = build2 (PLUS_EXPR, TREE_TYPE (fpr), fpr,
4805 build_int_cst (TREE_TYPE (fpr), needed_sseregs * 16));
4806 t = build2 (MODIFY_EXPR, TREE_TYPE (fpr), fpr, t);
4807 gimplify_and_add (t, pre_p);
4810 t = build1 (GOTO_EXPR, void_type_node, lab_over);
4811 gimplify_and_add (t, pre_p);
4813 t = build1 (LABEL_EXPR, void_type_node, lab_false);
4814 append_to_statement_list (t, pre_p);
4817 /* ... otherwise out of the overflow area. */
4819 /* Care for on-stack alignment if needed. */
4820 if (FUNCTION_ARG_BOUNDARY (VOIDmode, type) <= 64
4821 || integer_zerop (TYPE_SIZE (type)))
4825 HOST_WIDE_INT align = FUNCTION_ARG_BOUNDARY (VOIDmode, type) / 8;
4826 t = build2 (PLUS_EXPR, TREE_TYPE (ovf), ovf,
4827 build_int_cst (TREE_TYPE (ovf), align - 1));
4828 t = build2 (BIT_AND_EXPR, TREE_TYPE (t), t,
4829 build_int_cst (TREE_TYPE (t), -align));
4831 gimplify_expr (&t, pre_p, NULL, is_gimple_val, fb_rvalue);
4833 t2 = build2 (MODIFY_EXPR, void_type_node, addr, t);
4834 gimplify_and_add (t2, pre_p);
4836 t = build2 (PLUS_EXPR, TREE_TYPE (t), t,
4837 build_int_cst (TREE_TYPE (t), rsize * UNITS_PER_WORD));
4838 t = build2 (MODIFY_EXPR, TREE_TYPE (ovf), ovf, t);
4839 gimplify_and_add (t, pre_p);
4843 t = build1 (LABEL_EXPR, void_type_node, lab_over);
4844 append_to_statement_list (t, pre_p);
4847 ptrtype = build_pointer_type (type);
4848 addr = fold_convert (ptrtype, addr);
4851 addr = build_va_arg_indirect_ref (addr);
4852 return build_va_arg_indirect_ref (addr);
4855 /* Return nonzero if OPNUM's MEM should be matched
4856 in movabs* patterns. */
4859 ix86_check_movabs (rtx insn, int opnum)
4863 set = PATTERN (insn);
4864 if (GET_CODE (set) == PARALLEL)
4865 set = XVECEXP (set, 0, 0);
4866 gcc_assert (GET_CODE (set) == SET);
4867 mem = XEXP (set, opnum);
4868 while (GET_CODE (mem) == SUBREG)
4869 mem = SUBREG_REG (mem);
4870 gcc_assert (GET_CODE (mem) == MEM);
4871 return (volatile_ok || !MEM_VOLATILE_P (mem));
4874 /* Initialize the table of extra 80387 mathematical constants. */
4877 init_ext_80387_constants (void)
4879 static const char * cst[5] =
4881 "0.3010299956639811952256464283594894482", /* 0: fldlg2 */
4882 "0.6931471805599453094286904741849753009", /* 1: fldln2 */
4883 "1.4426950408889634073876517827983434472", /* 2: fldl2e */
4884 "3.3219280948873623478083405569094566090", /* 3: fldl2t */
4885 "3.1415926535897932385128089594061862044", /* 4: fldpi */
4889 for (i = 0; i < 5; i++)
4891 real_from_string (&ext_80387_constants_table[i], cst[i]);
4892 /* Ensure each constant is rounded to XFmode precision. */
4893 real_convert (&ext_80387_constants_table[i],
4894 XFmode, &ext_80387_constants_table[i]);
4897 ext_80387_constants_init = 1;
4900 /* Return true if the constant is something that can be loaded with
4901 a special instruction. */
4904 standard_80387_constant_p (rtx x)
4906 if (GET_CODE (x) != CONST_DOUBLE || !FLOAT_MODE_P (GET_MODE (x)))
4909 if (x == CONST0_RTX (GET_MODE (x)))
4911 if (x == CONST1_RTX (GET_MODE (x)))
4914 /* For XFmode constants, try to find a special 80387 instruction when
4915 optimizing for size or on those CPUs that benefit from them. */
4916 if (GET_MODE (x) == XFmode
4917 && (optimize_size || x86_ext_80387_constants & TUNEMASK))
4922 if (! ext_80387_constants_init)
4923 init_ext_80387_constants ();
4925 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
4926 for (i = 0; i < 5; i++)
4927 if (real_identical (&r, &ext_80387_constants_table[i]))
4934 /* Return the opcode of the special instruction to be used to load
4938 standard_80387_constant_opcode (rtx x)
4940 switch (standard_80387_constant_p (x))
4961 /* Return the CONST_DOUBLE representing the 80387 constant that is
4962 loaded by the specified special instruction. The argument IDX
4963 matches the return value from standard_80387_constant_p. */
4966 standard_80387_constant_rtx (int idx)
4970 if (! ext_80387_constants_init)
4971 init_ext_80387_constants ();
4987 return CONST_DOUBLE_FROM_REAL_VALUE (ext_80387_constants_table[i],
4991 /* Return 1 if mode is a valid mode for sse. */
4993 standard_sse_mode_p (enum machine_mode mode)
5010 /* Return 1 if X is FP constant we can load to SSE register w/o using memory.
5013 standard_sse_constant_p (rtx x)
5015 enum machine_mode mode = GET_MODE (x);
5017 if (x == const0_rtx || x == CONST0_RTX (GET_MODE (x)))
5019 if (vector_all_ones_operand (x, mode)
5020 && standard_sse_mode_p (mode))
5021 return TARGET_SSE2 ? 2 : -1;
5026 /* Return the opcode of the special instruction to be used to load
5030 standard_sse_constant_opcode (rtx insn, rtx x)
5032 switch (standard_sse_constant_p (x))
5035 if (get_attr_mode (insn) == MODE_V4SF)
5036 return "xorps\t%0, %0";
5037 else if (get_attr_mode (insn) == MODE_V2DF)
5038 return "xorpd\t%0, %0";
5040 return "pxor\t%0, %0";
5042 return "pcmpeqd\t%0, %0";
5047 /* Returns 1 if OP contains a symbol reference */
5050 symbolic_reference_mentioned_p (rtx op)
5055 if (GET_CODE (op) == SYMBOL_REF || GET_CODE (op) == LABEL_REF)
5058 fmt = GET_RTX_FORMAT (GET_CODE (op));
5059 for (i = GET_RTX_LENGTH (GET_CODE (op)) - 1; i >= 0; i--)
5065 for (j = XVECLEN (op, i) - 1; j >= 0; j--)
5066 if (symbolic_reference_mentioned_p (XVECEXP (op, i, j)))
5070 else if (fmt[i] == 'e' && symbolic_reference_mentioned_p (XEXP (op, i)))
5077 /* Return 1 if it is appropriate to emit `ret' instructions in the
5078 body of a function. Do this only if the epilogue is simple, needing a
5079 couple of insns. Prior to reloading, we can't tell how many registers
5080 must be saved, so return 0 then. Return 0 if there is no frame
5081 marker to de-allocate. */
5084 ix86_can_use_return_insn_p (void)
5086 struct ix86_frame frame;
5088 if (! reload_completed || frame_pointer_needed)
5091 /* Don't allow more than 32 pop, since that's all we can do
5092 with one instruction. */
5093 if (current_function_pops_args
5094 && current_function_args_size >= 32768)
5097 ix86_compute_frame_layout (&frame);
5098 return frame.to_allocate == 0 && frame.nregs == 0;
5101 /* Value should be nonzero if functions must have frame pointers.
5102 Zero means the frame pointer need not be set up (and parms may
5103 be accessed via the stack pointer) in functions that seem suitable. */
5106 ix86_frame_pointer_required (void)
5108 /* If we accessed previous frames, then the generated code expects
5109 to be able to access the saved ebp value in our frame. */
5110 if (cfun->machine->accesses_prev_frame)
5113 /* Several x86 os'es need a frame pointer for other reasons,
5114 usually pertaining to setjmp. */
5115 if (SUBTARGET_FRAME_POINTER_REQUIRED)
5118 /* In override_options, TARGET_OMIT_LEAF_FRAME_POINTER turns off
5119 the frame pointer by default. Turn it back on now if we've not
5120 got a leaf function. */
5121 if (TARGET_OMIT_LEAF_FRAME_POINTER
5122 && (!current_function_is_leaf
5123 || ix86_current_function_calls_tls_descriptor))
5126 if (current_function_profile)
5132 /* Record that the current function accesses previous call frames. */
5135 ix86_setup_frame_addresses (void)
5137 cfun->machine->accesses_prev_frame = 1;
5140 #if (defined(HAVE_GAS_HIDDEN) && (SUPPORTS_ONE_ONLY - 0)) || TARGET_MACHO
5141 # define USE_HIDDEN_LINKONCE 1
5143 # define USE_HIDDEN_LINKONCE 0
5146 static int pic_labels_used;
5148 /* Fills in the label name that should be used for a pc thunk for
5149 the given register. */
5152 get_pc_thunk_name (char name[32], unsigned int regno)
5154 gcc_assert (!TARGET_64BIT);
5156 if (USE_HIDDEN_LINKONCE)
5157 sprintf (name, "__i686.get_pc_thunk.%s", reg_names[regno]);
5159 ASM_GENERATE_INTERNAL_LABEL (name, "LPR", regno);
5163 /* This function generates code for -fpic that loads %ebx with
5164 the return address of the caller and then returns. */
5167 ix86_file_end (void)
5172 for (regno = 0; regno < 8; ++regno)
5176 if (! ((pic_labels_used >> regno) & 1))
5179 get_pc_thunk_name (name, regno);
5184 switch_to_section (darwin_sections[text_coal_section]);
5185 fputs ("\t.weak_definition\t", asm_out_file);
5186 assemble_name (asm_out_file, name);
5187 fputs ("\n\t.private_extern\t", asm_out_file);
5188 assemble_name (asm_out_file, name);
5189 fputs ("\n", asm_out_file);
5190 ASM_OUTPUT_LABEL (asm_out_file, name);
5194 if (USE_HIDDEN_LINKONCE)
5198 decl = build_decl (FUNCTION_DECL, get_identifier (name),
5200 TREE_PUBLIC (decl) = 1;
5201 TREE_STATIC (decl) = 1;
5202 DECL_ONE_ONLY (decl) = 1;
5204 (*targetm.asm_out.unique_section) (decl, 0);
5205 switch_to_section (get_named_section (decl, NULL, 0));
5207 (*targetm.asm_out.globalize_label) (asm_out_file, name);
5208 fputs ("\t.hidden\t", asm_out_file);
5209 assemble_name (asm_out_file, name);
5210 fputc ('\n', asm_out_file);
5211 ASM_DECLARE_FUNCTION_NAME (asm_out_file, name, decl);
5215 switch_to_section (text_section);
5216 ASM_OUTPUT_LABEL (asm_out_file, name);
5219 xops[0] = gen_rtx_REG (SImode, regno);
5220 xops[1] = gen_rtx_MEM (SImode, stack_pointer_rtx);
5221 output_asm_insn ("mov{l}\t{%1, %0|%0, %1}", xops);
5222 output_asm_insn ("ret", xops);
5225 if (NEED_INDICATE_EXEC_STACK)
5226 file_end_indicate_exec_stack ();
5229 /* Emit code for the SET_GOT patterns. */
5232 output_set_got (rtx dest, rtx label ATTRIBUTE_UNUSED)
5237 xops[1] = gen_rtx_SYMBOL_REF (Pmode, GOT_SYMBOL_NAME);
5239 if (! TARGET_DEEP_BRANCH_PREDICTION || !flag_pic)
5241 xops[2] = gen_rtx_LABEL_REF (Pmode, label ? label : gen_label_rtx ());
5244 output_asm_insn ("mov{l}\t{%2, %0|%0, %2}", xops);
5246 output_asm_insn ("call\t%a2", xops);
5249 /* Output the Mach-O "canonical" label name ("Lxx$pb") here too. This
5250 is what will be referenced by the Mach-O PIC subsystem. */
5252 ASM_OUTPUT_LABEL (asm_out_file, machopic_function_base_name ());
5255 (*targetm.asm_out.internal_label) (asm_out_file, "L",
5256 CODE_LABEL_NUMBER (XEXP (xops[2], 0)));
5259 output_asm_insn ("pop{l}\t%0", xops);
5264 get_pc_thunk_name (name, REGNO (dest));
5265 pic_labels_used |= 1 << REGNO (dest);
5267 xops[2] = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (name));
5268 xops[2] = gen_rtx_MEM (QImode, xops[2]);
5269 output_asm_insn ("call\t%X2", xops);
5270 /* Output the Mach-O "canonical" label name ("Lxx$pb") here too. This
5271 is what will be referenced by the Mach-O PIC subsystem. */
5274 ASM_OUTPUT_LABEL (asm_out_file, machopic_function_base_name ());
5276 targetm.asm_out.internal_label (asm_out_file, "L",
5277 CODE_LABEL_NUMBER (label));
5284 if (!flag_pic || TARGET_DEEP_BRANCH_PREDICTION)
5285 output_asm_insn ("add{l}\t{%1, %0|%0, %1}", xops);
5287 output_asm_insn ("add{l}\t{%1+[.-%a2], %0|%0, %1+(.-%a2)}", xops);
5292 /* Generate an "push" pattern for input ARG. */
5297 return gen_rtx_SET (VOIDmode,
5299 gen_rtx_PRE_DEC (Pmode,
5300 stack_pointer_rtx)),
5304 /* Return >= 0 if there is an unused call-clobbered register available
5305 for the entire function. */
5308 ix86_select_alt_pic_regnum (void)
5310 if (current_function_is_leaf && !current_function_profile
5311 && !ix86_current_function_calls_tls_descriptor)
5314 for (i = 2; i >= 0; --i)
5315 if (!regs_ever_live[i])
5319 return INVALID_REGNUM;
5322 /* Return 1 if we need to save REGNO. */
5324 ix86_save_reg (unsigned int regno, int maybe_eh_return)
5326 if (pic_offset_table_rtx
5327 && regno == REAL_PIC_OFFSET_TABLE_REGNUM
5328 && (regs_ever_live[REAL_PIC_OFFSET_TABLE_REGNUM]
5329 || current_function_profile
5330 || current_function_calls_eh_return
5331 || current_function_uses_const_pool))
5333 if (ix86_select_alt_pic_regnum () != INVALID_REGNUM)
5338 if (current_function_calls_eh_return && maybe_eh_return)
5343 unsigned test = EH_RETURN_DATA_REGNO (i);
5344 if (test == INVALID_REGNUM)
5351 if (cfun->machine->force_align_arg_pointer
5352 && regno == REGNO (cfun->machine->force_align_arg_pointer))
5355 return (regs_ever_live[regno]
5356 && !call_used_regs[regno]
5357 && !fixed_regs[regno]
5358 && (regno != HARD_FRAME_POINTER_REGNUM || !frame_pointer_needed));
5361 /* Return number of registers to be saved on the stack. */
5364 ix86_nsaved_regs (void)
5369 for (regno = FIRST_PSEUDO_REGISTER - 1; regno >= 0; regno--)
5370 if (ix86_save_reg (regno, true))
5375 /* Return the offset between two registers, one to be eliminated, and the other
5376 its replacement, at the start of a routine. */
5379 ix86_initial_elimination_offset (int from, int to)
5381 struct ix86_frame frame;
5382 ix86_compute_frame_layout (&frame);
5384 if (from == ARG_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
5385 return frame.hard_frame_pointer_offset;
5386 else if (from == FRAME_POINTER_REGNUM
5387 && to == HARD_FRAME_POINTER_REGNUM)
5388 return frame.hard_frame_pointer_offset - frame.frame_pointer_offset;
5391 gcc_assert (to == STACK_POINTER_REGNUM);
5393 if (from == ARG_POINTER_REGNUM)
5394 return frame.stack_pointer_offset;
5396 gcc_assert (from == FRAME_POINTER_REGNUM);
5397 return frame.stack_pointer_offset - frame.frame_pointer_offset;
5401 /* Fill structure ix86_frame about frame of currently computed function. */
5404 ix86_compute_frame_layout (struct ix86_frame *frame)
5406 HOST_WIDE_INT total_size;
5407 unsigned int stack_alignment_needed;
5408 HOST_WIDE_INT offset;
5409 unsigned int preferred_alignment;
5410 HOST_WIDE_INT size = get_frame_size ();
5412 frame->nregs = ix86_nsaved_regs ();
5415 stack_alignment_needed = cfun->stack_alignment_needed / BITS_PER_UNIT;
5416 preferred_alignment = cfun->preferred_stack_boundary / BITS_PER_UNIT;
5418 /* During reload iteration the amount of registers saved can change.
5419 Recompute the value as needed. Do not recompute when amount of registers
5420 didn't change as reload does multiple calls to the function and does not
5421 expect the decision to change within single iteration. */
5423 && cfun->machine->use_fast_prologue_epilogue_nregs != frame->nregs)
5425 int count = frame->nregs;
5427 cfun->machine->use_fast_prologue_epilogue_nregs = count;
5428 /* The fast prologue uses move instead of push to save registers. This
5429 is significantly longer, but also executes faster as modern hardware
5430 can execute the moves in parallel, but can't do that for push/pop.
5432 Be careful about choosing what prologue to emit: When function takes
5433 many instructions to execute we may use slow version as well as in
5434 case function is known to be outside hot spot (this is known with
5435 feedback only). Weight the size of function by number of registers
5436 to save as it is cheap to use one or two push instructions but very
5437 slow to use many of them. */
5439 count = (count - 1) * FAST_PROLOGUE_INSN_COUNT;
5440 if (cfun->function_frequency < FUNCTION_FREQUENCY_NORMAL
5441 || (flag_branch_probabilities
5442 && cfun->function_frequency < FUNCTION_FREQUENCY_HOT))
5443 cfun->machine->use_fast_prologue_epilogue = false;
5445 cfun->machine->use_fast_prologue_epilogue
5446 = !expensive_function_p (count);
5448 if (TARGET_PROLOGUE_USING_MOVE
5449 && cfun->machine->use_fast_prologue_epilogue)
5450 frame->save_regs_using_mov = true;
5452 frame->save_regs_using_mov = false;
5455 /* Skip return address and saved base pointer. */
5456 offset = frame_pointer_needed ? UNITS_PER_WORD * 2 : UNITS_PER_WORD;
5458 frame->hard_frame_pointer_offset = offset;
5460 /* Do some sanity checking of stack_alignment_needed and
5461 preferred_alignment, since i386 port is the only using those features
5462 that may break easily. */
5464 gcc_assert (!size || stack_alignment_needed);
5465 gcc_assert (preferred_alignment >= STACK_BOUNDARY / BITS_PER_UNIT);
5466 gcc_assert (preferred_alignment <= PREFERRED_STACK_BOUNDARY / BITS_PER_UNIT);
5467 gcc_assert (stack_alignment_needed
5468 <= PREFERRED_STACK_BOUNDARY / BITS_PER_UNIT);
5470 if (stack_alignment_needed < STACK_BOUNDARY / BITS_PER_UNIT)
5471 stack_alignment_needed = STACK_BOUNDARY / BITS_PER_UNIT;
5473 /* Register save area */
5474 offset += frame->nregs * UNITS_PER_WORD;
5477 if (ix86_save_varrargs_registers)
5479 offset += X86_64_VARARGS_SIZE;
5480 frame->va_arg_size = X86_64_VARARGS_SIZE;
5483 frame->va_arg_size = 0;
5485 /* Align start of frame for local function. */
5486 frame->padding1 = ((offset + stack_alignment_needed - 1)
5487 & -stack_alignment_needed) - offset;
5489 offset += frame->padding1;
5491 /* Frame pointer points here. */
5492 frame->frame_pointer_offset = offset;
5496 /* Add outgoing arguments area. Can be skipped if we eliminated
5497 all the function calls as dead code.
5498 Skipping is however impossible when function calls alloca. Alloca
5499 expander assumes that last current_function_outgoing_args_size
5500 of stack frame are unused. */
5501 if (ACCUMULATE_OUTGOING_ARGS
5502 && (!current_function_is_leaf || current_function_calls_alloca
5503 || ix86_current_function_calls_tls_descriptor))
5505 offset += current_function_outgoing_args_size;
5506 frame->outgoing_arguments_size = current_function_outgoing_args_size;
5509 frame->outgoing_arguments_size = 0;
5511 /* Align stack boundary. Only needed if we're calling another function
5513 if (!current_function_is_leaf || current_function_calls_alloca
5514 || ix86_current_function_calls_tls_descriptor)
5515 frame->padding2 = ((offset + preferred_alignment - 1)
5516 & -preferred_alignment) - offset;
5518 frame->padding2 = 0;
5520 offset += frame->padding2;
5522 /* We've reached end of stack frame. */
5523 frame->stack_pointer_offset = offset;
5525 /* Size prologue needs to allocate. */
5526 frame->to_allocate =
5527 (size + frame->padding1 + frame->padding2
5528 + frame->outgoing_arguments_size + frame->va_arg_size);
5530 if ((!frame->to_allocate && frame->nregs <= 1)
5531 || (TARGET_64BIT && frame->to_allocate >= (HOST_WIDE_INT) 0x80000000))
5532 frame->save_regs_using_mov = false;
5534 if (TARGET_RED_ZONE && current_function_sp_is_unchanging
5535 && current_function_is_leaf
5536 && !ix86_current_function_calls_tls_descriptor)
5538 frame->red_zone_size = frame->to_allocate;
5539 if (frame->save_regs_using_mov)
5540 frame->red_zone_size += frame->nregs * UNITS_PER_WORD;
5541 if (frame->red_zone_size > RED_ZONE_SIZE - RED_ZONE_RESERVE)
5542 frame->red_zone_size = RED_ZONE_SIZE - RED_ZONE_RESERVE;
5545 frame->red_zone_size = 0;
5546 frame->to_allocate -= frame->red_zone_size;
5547 frame->stack_pointer_offset -= frame->red_zone_size;
5549 fprintf (stderr, "nregs: %i\n", frame->nregs);
5550 fprintf (stderr, "size: %i\n", size);
5551 fprintf (stderr, "alignment1: %i\n", stack_alignment_needed);
5552 fprintf (stderr, "padding1: %i\n", frame->padding1);
5553 fprintf (stderr, "va_arg: %i\n", frame->va_arg_size);
5554 fprintf (stderr, "padding2: %i\n", frame->padding2);
5555 fprintf (stderr, "to_allocate: %i\n", frame->to_allocate);
5556 fprintf (stderr, "red_zone_size: %i\n", frame->red_zone_size);
5557 fprintf (stderr, "frame_pointer_offset: %i\n", frame->frame_pointer_offset);
5558 fprintf (stderr, "hard_frame_pointer_offset: %i\n",
5559 frame->hard_frame_pointer_offset);
5560 fprintf (stderr, "stack_pointer_offset: %i\n", frame->stack_pointer_offset);
5564 /* Emit code to save registers in the prologue. */
5567 ix86_emit_save_regs (void)
5572 for (regno = FIRST_PSEUDO_REGISTER; regno-- > 0; )
5573 if (ix86_save_reg (regno, true))
5575 insn = emit_insn (gen_push (gen_rtx_REG (Pmode, regno)));
5576 RTX_FRAME_RELATED_P (insn) = 1;
5580 /* Emit code to save registers using MOV insns. First register
5581 is restored from POINTER + OFFSET. */
5583 ix86_emit_save_regs_using_mov (rtx pointer, HOST_WIDE_INT offset)
5588 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
5589 if (ix86_save_reg (regno, true))
5591 insn = emit_move_insn (adjust_address (gen_rtx_MEM (Pmode, pointer),
5593 gen_rtx_REG (Pmode, regno));
5594 RTX_FRAME_RELATED_P (insn) = 1;
5595 offset += UNITS_PER_WORD;
5599 /* Expand prologue or epilogue stack adjustment.
5600 The pattern exist to put a dependency on all ebp-based memory accesses.
5601 STYLE should be negative if instructions should be marked as frame related,
5602 zero if %r11 register is live and cannot be freely used and positive
5606 pro_epilogue_adjust_stack (rtx dest, rtx src, rtx offset, int style)
5611 insn = emit_insn (gen_pro_epilogue_adjust_stack_1 (dest, src, offset));
5612 else if (x86_64_immediate_operand (offset, DImode))
5613 insn = emit_insn (gen_pro_epilogue_adjust_stack_rex64 (dest, src, offset));
5617 /* r11 is used by indirect sibcall return as well, set before the
5618 epilogue and used after the epilogue. ATM indirect sibcall
5619 shouldn't be used together with huge frame sizes in one
5620 function because of the frame_size check in sibcall.c. */
5622 r11 = gen_rtx_REG (DImode, FIRST_REX_INT_REG + 3 /* R11 */);
5623 insn = emit_insn (gen_rtx_SET (DImode, r11, offset));
5625 RTX_FRAME_RELATED_P (insn) = 1;
5626 insn = emit_insn (gen_pro_epilogue_adjust_stack_rex64_2 (dest, src, r11,
5630 RTX_FRAME_RELATED_P (insn) = 1;
5633 /* Handle the TARGET_INTERNAL_ARG_POINTER hook. */
5636 ix86_internal_arg_pointer (void)
5638 bool has_force_align_arg_pointer =
5639 (0 != lookup_attribute (ix86_force_align_arg_pointer_string,
5640 TYPE_ATTRIBUTES (TREE_TYPE (current_function_decl))));
5641 if ((FORCE_PREFERRED_STACK_BOUNDARY_IN_MAIN
5642 && DECL_NAME (current_function_decl)
5643 && MAIN_NAME_P (DECL_NAME (current_function_decl))
5644 && DECL_FILE_SCOPE_P (current_function_decl))
5645 || ix86_force_align_arg_pointer
5646 || has_force_align_arg_pointer)
5648 /* Nested functions can't realign the stack due to a register
5650 if (DECL_CONTEXT (current_function_decl)
5651 && TREE_CODE (DECL_CONTEXT (current_function_decl)) == FUNCTION_DECL)
5653 if (ix86_force_align_arg_pointer)
5654 warning (0, "-mstackrealign ignored for nested functions");
5655 if (has_force_align_arg_pointer)
5656 error ("%s not supported for nested functions",
5657 ix86_force_align_arg_pointer_string);
5658 return virtual_incoming_args_rtx;
5660 cfun->machine->force_align_arg_pointer = gen_rtx_REG (Pmode, 2);
5661 return copy_to_reg (cfun->machine->force_align_arg_pointer);
5664 return virtual_incoming_args_rtx;
5667 /* Handle the TARGET_DWARF_HANDLE_FRAME_UNSPEC hook.
5668 This is called from dwarf2out.c to emit call frame instructions
5669 for frame-related insns containing UNSPECs and UNSPEC_VOLATILEs. */
5671 ix86_dwarf_handle_frame_unspec (const char *label, rtx pattern, int index)
5673 rtx unspec = SET_SRC (pattern);
5674 gcc_assert (GET_CODE (unspec) == UNSPEC);
5678 case UNSPEC_REG_SAVE:
5679 dwarf2out_reg_save_reg (label, XVECEXP (unspec, 0, 0),
5680 SET_DEST (pattern));
5682 case UNSPEC_DEF_CFA:
5683 dwarf2out_def_cfa (label, REGNO (SET_DEST (pattern)),
5684 INTVAL (XVECEXP (unspec, 0, 0)));
5691 /* Expand the prologue into a bunch of separate insns. */
5694 ix86_expand_prologue (void)
5698 struct ix86_frame frame;
5699 HOST_WIDE_INT allocate;
5701 ix86_compute_frame_layout (&frame);
5703 if (cfun->machine->force_align_arg_pointer)
5707 /* Grab the argument pointer. */
5708 x = plus_constant (stack_pointer_rtx, 4);
5709 y = cfun->machine->force_align_arg_pointer;
5710 insn = emit_insn (gen_rtx_SET (VOIDmode, y, x));
5711 RTX_FRAME_RELATED_P (insn) = 1;
5713 /* The unwind info consists of two parts: install the fafp as the cfa,
5714 and record the fafp as the "save register" of the stack pointer.
5715 The later is there in order that the unwinder can see where it
5716 should restore the stack pointer across the and insn. */
5717 x = gen_rtx_UNSPEC (VOIDmode, gen_rtvec (1, const0_rtx), UNSPEC_DEF_CFA);
5718 x = gen_rtx_SET (VOIDmode, y, x);
5719 RTX_FRAME_RELATED_P (x) = 1;
5720 y = gen_rtx_UNSPEC (VOIDmode, gen_rtvec (1, stack_pointer_rtx),
5722 y = gen_rtx_SET (VOIDmode, cfun->machine->force_align_arg_pointer, y);
5723 RTX_FRAME_RELATED_P (y) = 1;
5724 x = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, x, y));
5725 x = gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR, x, NULL);
5726 REG_NOTES (insn) = x;
5728 /* Align the stack. */
5729 emit_insn (gen_andsi3 (stack_pointer_rtx, stack_pointer_rtx,
5732 /* And here we cheat like madmen with the unwind info. We force the
5733 cfa register back to sp+4, which is exactly what it was at the
5734 start of the function. Re-pushing the return address results in
5735 the return at the same spot relative to the cfa, and thus is
5736 correct wrt the unwind info. */
5737 x = cfun->machine->force_align_arg_pointer;
5738 x = gen_frame_mem (Pmode, plus_constant (x, -4));
5739 insn = emit_insn (gen_push (x));
5740 RTX_FRAME_RELATED_P (insn) = 1;
5743 x = gen_rtx_UNSPEC (VOIDmode, gen_rtvec (1, x), UNSPEC_DEF_CFA);
5744 x = gen_rtx_SET (VOIDmode, stack_pointer_rtx, x);
5745 x = gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR, x, NULL);
5746 REG_NOTES (insn) = x;
5749 /* Note: AT&T enter does NOT have reversed args. Enter is probably
5750 slower on all targets. Also sdb doesn't like it. */
5752 if (frame_pointer_needed)
5754 insn = emit_insn (gen_push (hard_frame_pointer_rtx));
5755 RTX_FRAME_RELATED_P (insn) = 1;
5757 insn = emit_move_insn (hard_frame_pointer_rtx, stack_pointer_rtx);
5758 RTX_FRAME_RELATED_P (insn) = 1;
5761 allocate = frame.to_allocate;
5763 if (!frame.save_regs_using_mov)
5764 ix86_emit_save_regs ();
5766 allocate += frame.nregs * UNITS_PER_WORD;
5768 /* When using red zone we may start register saving before allocating
5769 the stack frame saving one cycle of the prologue. */
5770 if (TARGET_RED_ZONE && frame.save_regs_using_mov)
5771 ix86_emit_save_regs_using_mov (frame_pointer_needed ? hard_frame_pointer_rtx
5772 : stack_pointer_rtx,
5773 -frame.nregs * UNITS_PER_WORD);
5777 else if (! TARGET_STACK_PROBE || allocate < CHECK_STACK_LIMIT)
5778 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
5779 GEN_INT (-allocate), -1);
5782 /* Only valid for Win32. */
5783 rtx eax = gen_rtx_REG (SImode, 0);
5784 bool eax_live = ix86_eax_live_at_start_p ();
5787 gcc_assert (!TARGET_64BIT);
5791 emit_insn (gen_push (eax));
5795 emit_move_insn (eax, GEN_INT (allocate));
5797 insn = emit_insn (gen_allocate_stack_worker (eax));
5798 RTX_FRAME_RELATED_P (insn) = 1;
5799 t = gen_rtx_PLUS (Pmode, stack_pointer_rtx, GEN_INT (-allocate));
5800 t = gen_rtx_SET (VOIDmode, stack_pointer_rtx, t);
5801 REG_NOTES (insn) = gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR,
5802 t, REG_NOTES (insn));
5806 if (frame_pointer_needed)
5807 t = plus_constant (hard_frame_pointer_rtx,
5810 - frame.nregs * UNITS_PER_WORD);
5812 t = plus_constant (stack_pointer_rtx, allocate);
5813 emit_move_insn (eax, gen_rtx_MEM (SImode, t));
5817 if (frame.save_regs_using_mov && !TARGET_RED_ZONE)
5819 if (!frame_pointer_needed || !frame.to_allocate)
5820 ix86_emit_save_regs_using_mov (stack_pointer_rtx, frame.to_allocate);
5822 ix86_emit_save_regs_using_mov (hard_frame_pointer_rtx,
5823 -frame.nregs * UNITS_PER_WORD);
5826 pic_reg_used = false;
5827 if (pic_offset_table_rtx
5828 && (regs_ever_live[REAL_PIC_OFFSET_TABLE_REGNUM]
5829 || current_function_profile))
5831 unsigned int alt_pic_reg_used = ix86_select_alt_pic_regnum ();
5833 if (alt_pic_reg_used != INVALID_REGNUM)
5834 REGNO (pic_offset_table_rtx) = alt_pic_reg_used;
5836 pic_reg_used = true;
5842 insn = emit_insn (gen_set_got_rex64 (pic_offset_table_rtx));
5844 insn = emit_insn (gen_set_got (pic_offset_table_rtx));
5846 /* Even with accurate pre-reload life analysis, we can wind up
5847 deleting all references to the pic register after reload.
5848 Consider if cross-jumping unifies two sides of a branch
5849 controlled by a comparison vs the only read from a global.
5850 In which case, allow the set_got to be deleted, though we're
5851 too late to do anything about the ebx save in the prologue. */
5852 REG_NOTES (insn) = gen_rtx_EXPR_LIST (REG_MAYBE_DEAD, const0_rtx, NULL);
5855 /* Prevent function calls from be scheduled before the call to mcount.
5856 In the pic_reg_used case, make sure that the got load isn't deleted. */
5857 if (current_function_profile)
5858 emit_insn (gen_blockage (pic_reg_used ? pic_offset_table_rtx : const0_rtx));
5861 /* Emit code to restore saved registers using MOV insns. First register
5862 is restored from POINTER + OFFSET. */
5864 ix86_emit_restore_regs_using_mov (rtx pointer, HOST_WIDE_INT offset,
5865 int maybe_eh_return)
5868 rtx base_address = gen_rtx_MEM (Pmode, pointer);
5870 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
5871 if (ix86_save_reg (regno, maybe_eh_return))
5873 /* Ensure that adjust_address won't be forced to produce pointer
5874 out of range allowed by x86-64 instruction set. */
5875 if (TARGET_64BIT && offset != trunc_int_for_mode (offset, SImode))
5879 r11 = gen_rtx_REG (DImode, FIRST_REX_INT_REG + 3 /* R11 */);
5880 emit_move_insn (r11, GEN_INT (offset));
5881 emit_insn (gen_adddi3 (r11, r11, pointer));
5882 base_address = gen_rtx_MEM (Pmode, r11);
5885 emit_move_insn (gen_rtx_REG (Pmode, regno),
5886 adjust_address (base_address, Pmode, offset));
5887 offset += UNITS_PER_WORD;
5891 /* Restore function stack, frame, and registers. */
5894 ix86_expand_epilogue (int style)
5897 int sp_valid = !frame_pointer_needed || current_function_sp_is_unchanging;
5898 struct ix86_frame frame;
5899 HOST_WIDE_INT offset;
5901 ix86_compute_frame_layout (&frame);
5903 /* Calculate start of saved registers relative to ebp. Special care
5904 must be taken for the normal return case of a function using
5905 eh_return: the eax and edx registers are marked as saved, but not
5906 restored along this path. */
5907 offset = frame.nregs;
5908 if (current_function_calls_eh_return && style != 2)
5910 offset *= -UNITS_PER_WORD;
5912 /* If we're only restoring one register and sp is not valid then
5913 using a move instruction to restore the register since it's
5914 less work than reloading sp and popping the register.
5916 The default code result in stack adjustment using add/lea instruction,
5917 while this code results in LEAVE instruction (or discrete equivalent),
5918 so it is profitable in some other cases as well. Especially when there
5919 are no registers to restore. We also use this code when TARGET_USE_LEAVE
5920 and there is exactly one register to pop. This heuristic may need some
5921 tuning in future. */
5922 if ((!sp_valid && frame.nregs <= 1)
5923 || (TARGET_EPILOGUE_USING_MOVE
5924 && cfun->machine->use_fast_prologue_epilogue
5925 && (frame.nregs > 1 || frame.to_allocate))
5926 || (frame_pointer_needed && !frame.nregs && frame.to_allocate)
5927 || (frame_pointer_needed && TARGET_USE_LEAVE
5928 && cfun->machine->use_fast_prologue_epilogue
5929 && frame.nregs == 1)
5930 || current_function_calls_eh_return)
5932 /* Restore registers. We can use ebp or esp to address the memory
5933 locations. If both are available, default to ebp, since offsets
5934 are known to be small. Only exception is esp pointing directly to the
5935 end of block of saved registers, where we may simplify addressing
5938 if (!frame_pointer_needed || (sp_valid && !frame.to_allocate))
5939 ix86_emit_restore_regs_using_mov (stack_pointer_rtx,
5940 frame.to_allocate, style == 2);
5942 ix86_emit_restore_regs_using_mov (hard_frame_pointer_rtx,
5943 offset, style == 2);
5945 /* eh_return epilogues need %ecx added to the stack pointer. */
5948 rtx tmp, sa = EH_RETURN_STACKADJ_RTX;
5950 if (frame_pointer_needed)
5952 tmp = gen_rtx_PLUS (Pmode, hard_frame_pointer_rtx, sa);
5953 tmp = plus_constant (tmp, UNITS_PER_WORD);
5954 emit_insn (gen_rtx_SET (VOIDmode, sa, tmp));
5956 tmp = gen_rtx_MEM (Pmode, hard_frame_pointer_rtx);
5957 emit_move_insn (hard_frame_pointer_rtx, tmp);
5959 pro_epilogue_adjust_stack (stack_pointer_rtx, sa,
5964 tmp = gen_rtx_PLUS (Pmode, stack_pointer_rtx, sa);
5965 tmp = plus_constant (tmp, (frame.to_allocate
5966 + frame.nregs * UNITS_PER_WORD));
5967 emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx, tmp));
5970 else if (!frame_pointer_needed)
5971 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
5972 GEN_INT (frame.to_allocate
5973 + frame.nregs * UNITS_PER_WORD),
5975 /* If not an i386, mov & pop is faster than "leave". */
5976 else if (TARGET_USE_LEAVE || optimize_size
5977 || !cfun->machine->use_fast_prologue_epilogue)
5978 emit_insn (TARGET_64BIT ? gen_leave_rex64 () : gen_leave ());
5981 pro_epilogue_adjust_stack (stack_pointer_rtx,
5982 hard_frame_pointer_rtx,
5985 emit_insn (gen_popdi1 (hard_frame_pointer_rtx));
5987 emit_insn (gen_popsi1 (hard_frame_pointer_rtx));
5992 /* First step is to deallocate the stack frame so that we can
5993 pop the registers. */
5996 gcc_assert (frame_pointer_needed);
5997 pro_epilogue_adjust_stack (stack_pointer_rtx,
5998 hard_frame_pointer_rtx,
5999 GEN_INT (offset), style);
6001 else if (frame.to_allocate)
6002 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
6003 GEN_INT (frame.to_allocate), style);
6005 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
6006 if (ix86_save_reg (regno, false))
6009 emit_insn (gen_popdi1 (gen_rtx_REG (Pmode, regno)));
6011 emit_insn (gen_popsi1 (gen_rtx_REG (Pmode, regno)));
6013 if (frame_pointer_needed)
6015 /* Leave results in shorter dependency chains on CPUs that are
6016 able to grok it fast. */
6017 if (TARGET_USE_LEAVE)
6018 emit_insn (TARGET_64BIT ? gen_leave_rex64 () : gen_leave ());
6019 else if (TARGET_64BIT)
6020 emit_insn (gen_popdi1 (hard_frame_pointer_rtx));
6022 emit_insn (gen_popsi1 (hard_frame_pointer_rtx));
6026 if (cfun->machine->force_align_arg_pointer)
6028 emit_insn (gen_addsi3 (stack_pointer_rtx,
6029 cfun->machine->force_align_arg_pointer,
6033 /* Sibcall epilogues don't want a return instruction. */
6037 if (current_function_pops_args && current_function_args_size)
6039 rtx popc = GEN_INT (current_function_pops_args);
6041 /* i386 can only pop 64K bytes. If asked to pop more, pop
6042 return address, do explicit add, and jump indirectly to the
6045 if (current_function_pops_args >= 65536)
6047 rtx ecx = gen_rtx_REG (SImode, 2);
6049 /* There is no "pascal" calling convention in 64bit ABI. */
6050 gcc_assert (!TARGET_64BIT);
6052 emit_insn (gen_popsi1 (ecx));
6053 emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx, popc));
6054 emit_jump_insn (gen_return_indirect_internal (ecx));
6057 emit_jump_insn (gen_return_pop_internal (popc));
6060 emit_jump_insn (gen_return_internal ());
6063 /* Reset from the function's potential modifications. */
6066 ix86_output_function_epilogue (FILE *file ATTRIBUTE_UNUSED,
6067 HOST_WIDE_INT size ATTRIBUTE_UNUSED)
6069 if (pic_offset_table_rtx)
6070 REGNO (pic_offset_table_rtx) = REAL_PIC_OFFSET_TABLE_REGNUM;
6072 /* Mach-O doesn't support labels at the end of objects, so if
6073 it looks like we might want one, insert a NOP. */
6075 rtx insn = get_last_insn ();
6078 && NOTE_LINE_NUMBER (insn) != NOTE_INSN_DELETED_LABEL)
6079 insn = PREV_INSN (insn);
6083 && NOTE_LINE_NUMBER (insn) == NOTE_INSN_DELETED_LABEL)))
6084 fputs ("\tnop\n", file);
6090 /* Extract the parts of an RTL expression that is a valid memory address
6091 for an instruction. Return 0 if the structure of the address is
6092 grossly off. Return -1 if the address contains ASHIFT, so it is not
6093 strictly valid, but still used for computing length of lea instruction. */
6096 ix86_decompose_address (rtx addr, struct ix86_address *out)
6098 rtx base = NULL_RTX, index = NULL_RTX, disp = NULL_RTX;
6099 rtx base_reg, index_reg;
6100 HOST_WIDE_INT scale = 1;
6101 rtx scale_rtx = NULL_RTX;
6103 enum ix86_address_seg seg = SEG_DEFAULT;
6105 if (GET_CODE (addr) == REG || GET_CODE (addr) == SUBREG)
6107 else if (GET_CODE (addr) == PLUS)
6117 addends[n++] = XEXP (op, 1);
6120 while (GET_CODE (op) == PLUS);
6125 for (i = n; i >= 0; --i)
6128 switch (GET_CODE (op))
6133 index = XEXP (op, 0);
6134 scale_rtx = XEXP (op, 1);
6138 if (XINT (op, 1) == UNSPEC_TP
6139 && TARGET_TLS_DIRECT_SEG_REFS
6140 && seg == SEG_DEFAULT)
6141 seg = TARGET_64BIT ? SEG_FS : SEG_GS;
6170 else if (GET_CODE (addr) == MULT)
6172 index = XEXP (addr, 0); /* index*scale */
6173 scale_rtx = XEXP (addr, 1);
6175 else if (GET_CODE (addr) == ASHIFT)
6179 /* We're called for lea too, which implements ashift on occasion. */
6180 index = XEXP (addr, 0);
6181 tmp = XEXP (addr, 1);
6182 if (GET_CODE (tmp) != CONST_INT)
6184 scale = INTVAL (tmp);
6185 if ((unsigned HOST_WIDE_INT) scale > 3)
6191 disp = addr; /* displacement */
6193 /* Extract the integral value of scale. */
6196 if (GET_CODE (scale_rtx) != CONST_INT)
6198 scale = INTVAL (scale_rtx);
6201 base_reg = base && GET_CODE (base) == SUBREG ? SUBREG_REG (base) : base;
6202 index_reg = index && GET_CODE (index) == SUBREG ? SUBREG_REG (index) : index;
6204 /* Allow arg pointer and stack pointer as index if there is not scaling. */
6205 if (base_reg && index_reg && scale == 1
6206 && (index_reg == arg_pointer_rtx
6207 || index_reg == frame_pointer_rtx
6208 || (REG_P (index_reg) && REGNO (index_reg) == STACK_POINTER_REGNUM)))
6211 tmp = base, base = index, index = tmp;
6212 tmp = base_reg, base_reg = index_reg, index_reg = tmp;
6215 /* Special case: %ebp cannot be encoded as a base without a displacement. */
6216 if ((base_reg == hard_frame_pointer_rtx
6217 || base_reg == frame_pointer_rtx
6218 || base_reg == arg_pointer_rtx) && !disp)
6221 /* Special case: on K6, [%esi] makes the instruction vector decoded.
6222 Avoid this by transforming to [%esi+0]. */
6223 if (ix86_tune == PROCESSOR_K6 && !optimize_size
6224 && base_reg && !index_reg && !disp
6226 && REGNO_REG_CLASS (REGNO (base_reg)) == SIREG)
6229 /* Special case: encode reg+reg instead of reg*2. */
6230 if (!base && index && scale && scale == 2)
6231 base = index, base_reg = index_reg, scale = 1;
6233 /* Special case: scaling cannot be encoded without base or displacement. */
6234 if (!base && !disp && index && scale != 1)
6246 /* Return cost of the memory address x.
6247 For i386, it is better to use a complex address than let gcc copy
6248 the address into a reg and make a new pseudo. But not if the address
6249 requires to two regs - that would mean more pseudos with longer
6252 ix86_address_cost (rtx x)
6254 struct ix86_address parts;
6256 int ok = ix86_decompose_address (x, &parts);
6260 if (parts.base && GET_CODE (parts.base) == SUBREG)
6261 parts.base = SUBREG_REG (parts.base);
6262 if (parts.index && GET_CODE (parts.index) == SUBREG)
6263 parts.index = SUBREG_REG (parts.index);
6265 /* More complex memory references are better. */
6266 if (parts.disp && parts.disp != const0_rtx)
6268 if (parts.seg != SEG_DEFAULT)
6271 /* Attempt to minimize number of registers in the address. */
6273 && (!REG_P (parts.base) || REGNO (parts.base) >= FIRST_PSEUDO_REGISTER))
6275 && (!REG_P (parts.index)
6276 || REGNO (parts.index) >= FIRST_PSEUDO_REGISTER)))
6280 && (!REG_P (parts.base) || REGNO (parts.base) >= FIRST_PSEUDO_REGISTER)
6282 && (!REG_P (parts.index) || REGNO (parts.index) >= FIRST_PSEUDO_REGISTER)
6283 && parts.base != parts.index)
6286 /* AMD-K6 don't like addresses with ModR/M set to 00_xxx_100b,
6287 since it's predecode logic can't detect the length of instructions
6288 and it degenerates to vector decoded. Increase cost of such
6289 addresses here. The penalty is minimally 2 cycles. It may be worthwhile
6290 to split such addresses or even refuse such addresses at all.
6292 Following addressing modes are affected:
6297 The first and last case may be avoidable by explicitly coding the zero in
6298 memory address, but I don't have AMD-K6 machine handy to check this
6302 && ((!parts.disp && parts.base && parts.index && parts.scale != 1)
6303 || (parts.disp && !parts.base && parts.index && parts.scale != 1)
6304 || (!parts.disp && parts.base && parts.index && parts.scale == 1)))
6310 /* If X is a machine specific address (i.e. a symbol or label being
6311 referenced as a displacement from the GOT implemented using an
6312 UNSPEC), then return the base term. Otherwise return X. */
6315 ix86_find_base_term (rtx x)
6321 if (GET_CODE (x) != CONST)
6324 if (GET_CODE (term) == PLUS
6325 && (GET_CODE (XEXP (term, 1)) == CONST_INT
6326 || GET_CODE (XEXP (term, 1)) == CONST_DOUBLE))
6327 term = XEXP (term, 0);
6328 if (GET_CODE (term) != UNSPEC
6329 || XINT (term, 1) != UNSPEC_GOTPCREL)
6332 term = XVECEXP (term, 0, 0);
6334 if (GET_CODE (term) != SYMBOL_REF
6335 && GET_CODE (term) != LABEL_REF)
6341 term = ix86_delegitimize_address (x);
6343 if (GET_CODE (term) != SYMBOL_REF
6344 && GET_CODE (term) != LABEL_REF)
6350 /* Allow {LABEL | SYMBOL}_REF - SYMBOL_REF-FOR-PICBASE for Mach-O as
6351 this is used for to form addresses to local data when -fPIC is in
6355 darwin_local_data_pic (rtx disp)
6357 if (GET_CODE (disp) == MINUS)
6359 if (GET_CODE (XEXP (disp, 0)) == LABEL_REF
6360 || GET_CODE (XEXP (disp, 0)) == SYMBOL_REF)
6361 if (GET_CODE (XEXP (disp, 1)) == SYMBOL_REF)
6363 const char *sym_name = XSTR (XEXP (disp, 1), 0);
6364 if (! strcmp (sym_name, "<pic base>"))
6372 /* Determine if a given RTX is a valid constant. We already know this
6373 satisfies CONSTANT_P. */
6376 legitimate_constant_p (rtx x)
6378 switch (GET_CODE (x))
6383 if (GET_CODE (x) == PLUS)
6385 if (GET_CODE (XEXP (x, 1)) != CONST_INT)
6390 if (TARGET_MACHO && darwin_local_data_pic (x))
6393 /* Only some unspecs are valid as "constants". */
6394 if (GET_CODE (x) == UNSPEC)
6395 switch (XINT (x, 1))
6398 return TARGET_64BIT;
6401 x = XVECEXP (x, 0, 0);
6402 return (GET_CODE (x) == SYMBOL_REF
6403 && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_EXEC);
6405 x = XVECEXP (x, 0, 0);
6406 return (GET_CODE (x) == SYMBOL_REF
6407 && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_DYNAMIC);
6412 /* We must have drilled down to a symbol. */
6413 if (GET_CODE (x) == LABEL_REF)
6415 if (GET_CODE (x) != SYMBOL_REF)
6420 /* TLS symbols are never valid. */
6421 if (SYMBOL_REF_TLS_MODEL (x))
6426 if (GET_MODE (x) == TImode
6427 && x != CONST0_RTX (TImode)
6433 if (x == CONST0_RTX (GET_MODE (x)))
6441 /* Otherwise we handle everything else in the move patterns. */
6445 /* Determine if it's legal to put X into the constant pool. This
6446 is not possible for the address of thread-local symbols, which
6447 is checked above. */
6450 ix86_cannot_force_const_mem (rtx x)
6452 /* We can always put integral constants and vectors in memory. */
6453 switch (GET_CODE (x))
6463 return !legitimate_constant_p (x);
6466 /* Determine if a given RTX is a valid constant address. */
6469 constant_address_p (rtx x)
6471 return CONSTANT_P (x) && legitimate_address_p (Pmode, x, 1);
6474 /* Nonzero if the constant value X is a legitimate general operand
6475 when generating PIC code. It is given that flag_pic is on and
6476 that X satisfies CONSTANT_P or is a CONST_DOUBLE. */
6479 legitimate_pic_operand_p (rtx x)
6483 switch (GET_CODE (x))
6486 inner = XEXP (x, 0);
6487 if (GET_CODE (inner) == PLUS
6488 && GET_CODE (XEXP (inner, 1)) == CONST_INT)
6489 inner = XEXP (inner, 0);
6491 /* Only some unspecs are valid as "constants". */
6492 if (GET_CODE (inner) == UNSPEC)
6493 switch (XINT (inner, 1))
6496 return TARGET_64BIT;
6498 x = XVECEXP (inner, 0, 0);
6499 return (GET_CODE (x) == SYMBOL_REF
6500 && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_EXEC);
6508 return legitimate_pic_address_disp_p (x);
6515 /* Determine if a given CONST RTX is a valid memory displacement
6519 legitimate_pic_address_disp_p (rtx disp)
6523 /* In 64bit mode we can allow direct addresses of symbols and labels
6524 when they are not dynamic symbols. */
6527 rtx op0 = disp, op1;
6529 switch (GET_CODE (disp))
6535 if (GET_CODE (XEXP (disp, 0)) != PLUS)
6537 op0 = XEXP (XEXP (disp, 0), 0);
6538 op1 = XEXP (XEXP (disp, 0), 1);
6539 if (GET_CODE (op1) != CONST_INT
6540 || INTVAL (op1) >= 16*1024*1024
6541 || INTVAL (op1) < -16*1024*1024)
6543 if (GET_CODE (op0) == LABEL_REF)
6545 if (GET_CODE (op0) != SYMBOL_REF)
6550 /* TLS references should always be enclosed in UNSPEC. */
6551 if (SYMBOL_REF_TLS_MODEL (op0))
6553 if (!SYMBOL_REF_FAR_ADDR_P (op0) && SYMBOL_REF_LOCAL_P (op0))
6561 if (GET_CODE (disp) != CONST)
6563 disp = XEXP (disp, 0);
6567 /* We are unsafe to allow PLUS expressions. This limit allowed distance
6568 of GOT tables. We should not need these anyway. */
6569 if (GET_CODE (disp) != UNSPEC
6570 || (XINT (disp, 1) != UNSPEC_GOTPCREL
6571 && XINT (disp, 1) != UNSPEC_GOTOFF))
6574 if (GET_CODE (XVECEXP (disp, 0, 0)) != SYMBOL_REF
6575 && GET_CODE (XVECEXP (disp, 0, 0)) != LABEL_REF)
6581 if (GET_CODE (disp) == PLUS)
6583 if (GET_CODE (XEXP (disp, 1)) != CONST_INT)
6585 disp = XEXP (disp, 0);
6589 if (TARGET_MACHO && darwin_local_data_pic (disp))
6592 if (GET_CODE (disp) != UNSPEC)
6595 switch (XINT (disp, 1))
6600 return GET_CODE (XVECEXP (disp, 0, 0)) == SYMBOL_REF;
6602 /* Refuse GOTOFF in 64bit mode since it is always 64bit when used.
6603 While ABI specify also 32bit relocation but we don't produce it in
6604 small PIC model at all. */
6605 if ((GET_CODE (XVECEXP (disp, 0, 0)) == SYMBOL_REF
6606 || GET_CODE (XVECEXP (disp, 0, 0)) == LABEL_REF)
6608 return local_symbolic_operand (XVECEXP (disp, 0, 0), Pmode);
6610 case UNSPEC_GOTTPOFF:
6611 case UNSPEC_GOTNTPOFF:
6612 case UNSPEC_INDNTPOFF:
6615 disp = XVECEXP (disp, 0, 0);
6616 return (GET_CODE (disp) == SYMBOL_REF
6617 && SYMBOL_REF_TLS_MODEL (disp) == TLS_MODEL_INITIAL_EXEC);
6619 disp = XVECEXP (disp, 0, 0);
6620 return (GET_CODE (disp) == SYMBOL_REF
6621 && SYMBOL_REF_TLS_MODEL (disp) == TLS_MODEL_LOCAL_EXEC);
6623 disp = XVECEXP (disp, 0, 0);
6624 return (GET_CODE (disp) == SYMBOL_REF
6625 && SYMBOL_REF_TLS_MODEL (disp) == TLS_MODEL_LOCAL_DYNAMIC);
6631 /* GO_IF_LEGITIMATE_ADDRESS recognizes an RTL expression that is a valid
6632 memory address for an instruction. The MODE argument is the machine mode
6633 for the MEM expression that wants to use this address.
6635 It only recognizes address in canonical form. LEGITIMIZE_ADDRESS should
6636 convert common non-canonical forms to canonical form so that they will
6640 legitimate_address_p (enum machine_mode mode, rtx addr, int strict)
6642 struct ix86_address parts;
6643 rtx base, index, disp;
6644 HOST_WIDE_INT scale;
6645 const char *reason = NULL;
6646 rtx reason_rtx = NULL_RTX;
6648 if (TARGET_DEBUG_ADDR)
6651 "\n======\nGO_IF_LEGITIMATE_ADDRESS, mode = %s, strict = %d\n",
6652 GET_MODE_NAME (mode), strict);
6656 if (ix86_decompose_address (addr, &parts) <= 0)
6658 reason = "decomposition failed";
6663 index = parts.index;
6665 scale = parts.scale;
6667 /* Validate base register.
6669 Don't allow SUBREG's that span more than a word here. It can lead to spill
6670 failures when the base is one word out of a two word structure, which is
6671 represented internally as a DImode int. */
6680 else if (GET_CODE (base) == SUBREG
6681 && REG_P (SUBREG_REG (base))
6682 && GET_MODE_SIZE (GET_MODE (SUBREG_REG (base)))
6684 reg = SUBREG_REG (base);
6687 reason = "base is not a register";
6691 if (GET_MODE (base) != Pmode)
6693 reason = "base is not in Pmode";
6697 if ((strict && ! REG_OK_FOR_BASE_STRICT_P (reg))
6698 || (! strict && ! REG_OK_FOR_BASE_NONSTRICT_P (reg)))
6700 reason = "base is not valid";
6705 /* Validate index register.
6707 Don't allow SUBREG's that span more than a word here -- same as above. */
6716 else if (GET_CODE (index) == SUBREG
6717 && REG_P (SUBREG_REG (index))
6718 && GET_MODE_SIZE (GET_MODE (SUBREG_REG (index)))
6720 reg = SUBREG_REG (index);
6723 reason = "index is not a register";
6727 if (GET_MODE (index) != Pmode)
6729 reason = "index is not in Pmode";
6733 if ((strict && ! REG_OK_FOR_INDEX_STRICT_P (reg))
6734 || (! strict && ! REG_OK_FOR_INDEX_NONSTRICT_P (reg)))
6736 reason = "index is not valid";
6741 /* Validate scale factor. */
6744 reason_rtx = GEN_INT (scale);
6747 reason = "scale without index";
6751 if (scale != 2 && scale != 4 && scale != 8)
6753 reason = "scale is not a valid multiplier";
6758 /* Validate displacement. */
6763 if (GET_CODE (disp) == CONST
6764 && GET_CODE (XEXP (disp, 0)) == UNSPEC)
6765 switch (XINT (XEXP (disp, 0), 1))
6767 /* Refuse GOTOFF and GOT in 64bit mode since it is always 64bit when
6768 used. While ABI specify also 32bit relocations, we don't produce
6769 them at all and use IP relative instead. */
6772 gcc_assert (flag_pic);
6774 goto is_legitimate_pic;
6775 reason = "64bit address unspec";
6778 case UNSPEC_GOTPCREL:
6779 gcc_assert (flag_pic);
6780 goto is_legitimate_pic;
6782 case UNSPEC_GOTTPOFF:
6783 case UNSPEC_GOTNTPOFF:
6784 case UNSPEC_INDNTPOFF:
6790 reason = "invalid address unspec";
6794 else if (SYMBOLIC_CONST (disp)
6798 && MACHOPIC_INDIRECT
6799 && !machopic_operand_p (disp)
6805 if (TARGET_64BIT && (index || base))
6807 /* foo@dtpoff(%rX) is ok. */
6808 if (GET_CODE (disp) != CONST
6809 || GET_CODE (XEXP (disp, 0)) != PLUS
6810 || GET_CODE (XEXP (XEXP (disp, 0), 0)) != UNSPEC
6811 || GET_CODE (XEXP (XEXP (disp, 0), 1)) != CONST_INT
6812 || (XINT (XEXP (XEXP (disp, 0), 0), 1) != UNSPEC_DTPOFF
6813 && XINT (XEXP (XEXP (disp, 0), 0), 1) != UNSPEC_NTPOFF))
6815 reason = "non-constant pic memory reference";
6819 else if (! legitimate_pic_address_disp_p (disp))
6821 reason = "displacement is an invalid pic construct";
6825 /* This code used to verify that a symbolic pic displacement
6826 includes the pic_offset_table_rtx register.
6828 While this is good idea, unfortunately these constructs may
6829 be created by "adds using lea" optimization for incorrect
6838 This code is nonsensical, but results in addressing
6839 GOT table with pic_offset_table_rtx base. We can't
6840 just refuse it easily, since it gets matched by
6841 "addsi3" pattern, that later gets split to lea in the
6842 case output register differs from input. While this
6843 can be handled by separate addsi pattern for this case
6844 that never results in lea, this seems to be easier and
6845 correct fix for crash to disable this test. */
6847 else if (GET_CODE (disp) != LABEL_REF
6848 && GET_CODE (disp) != CONST_INT
6849 && (GET_CODE (disp) != CONST
6850 || !legitimate_constant_p (disp))
6851 && (GET_CODE (disp) != SYMBOL_REF
6852 || !legitimate_constant_p (disp)))
6854 reason = "displacement is not constant";
6857 else if (TARGET_64BIT
6858 && !x86_64_immediate_operand (disp, VOIDmode))
6860 reason = "displacement is out of range";
6865 /* Everything looks valid. */
6866 if (TARGET_DEBUG_ADDR)
6867 fprintf (stderr, "Success.\n");
6871 if (TARGET_DEBUG_ADDR)
6873 fprintf (stderr, "Error: %s\n", reason);
6874 debug_rtx (reason_rtx);
6879 /* Return a unique alias set for the GOT. */
6881 static HOST_WIDE_INT
6882 ix86_GOT_alias_set (void)
6884 static HOST_WIDE_INT set = -1;
6886 set = new_alias_set ();
6890 /* Return a legitimate reference for ORIG (an address) using the
6891 register REG. If REG is 0, a new pseudo is generated.
6893 There are two types of references that must be handled:
6895 1. Global data references must load the address from the GOT, via
6896 the PIC reg. An insn is emitted to do this load, and the reg is
6899 2. Static data references, constant pool addresses, and code labels
6900 compute the address as an offset from the GOT, whose base is in
6901 the PIC reg. Static data objects have SYMBOL_FLAG_LOCAL set to
6902 differentiate them from global data objects. The returned
6903 address is the PIC reg + an unspec constant.
6905 GO_IF_LEGITIMATE_ADDRESS rejects symbolic references unless the PIC
6906 reg also appears in the address. */
6909 legitimize_pic_address (rtx orig, rtx reg)
6916 if (TARGET_MACHO && !TARGET_64BIT)
6919 reg = gen_reg_rtx (Pmode);
6920 /* Use the generic Mach-O PIC machinery. */
6921 return machopic_legitimize_pic_address (orig, GET_MODE (orig), reg);
6925 if (TARGET_64BIT && legitimate_pic_address_disp_p (addr))
6927 else if (TARGET_64BIT
6928 && ix86_cmodel != CM_SMALL_PIC
6929 && local_symbolic_operand (addr, Pmode))
6932 /* This symbol may be referenced via a displacement from the PIC
6933 base address (@GOTOFF). */
6935 if (reload_in_progress)
6936 regs_ever_live[PIC_OFFSET_TABLE_REGNUM] = 1;
6937 if (GET_CODE (addr) == CONST)
6938 addr = XEXP (addr, 0);
6939 if (GET_CODE (addr) == PLUS)
6941 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, XEXP (addr, 0)), UNSPEC_GOTOFF);
6942 new = gen_rtx_PLUS (Pmode, new, XEXP (addr, 1));
6945 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTOFF);
6946 new = gen_rtx_CONST (Pmode, new);
6948 tmpreg = gen_reg_rtx (Pmode);
6951 emit_move_insn (tmpreg, new);
6955 new = expand_simple_binop (Pmode, PLUS, reg, pic_offset_table_rtx,
6956 tmpreg, 1, OPTAB_DIRECT);
6959 else new = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, tmpreg);
6961 else if (!TARGET_64BIT && local_symbolic_operand (addr, Pmode))
6963 /* This symbol may be referenced via a displacement from the PIC
6964 base address (@GOTOFF). */
6966 if (reload_in_progress)
6967 regs_ever_live[PIC_OFFSET_TABLE_REGNUM] = 1;
6968 if (GET_CODE (addr) == CONST)
6969 addr = XEXP (addr, 0);
6970 if (GET_CODE (addr) == PLUS)
6972 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, XEXP (addr, 0)), UNSPEC_GOTOFF);
6973 new = gen_rtx_PLUS (Pmode, new, XEXP (addr, 1));
6976 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTOFF);
6977 new = gen_rtx_CONST (Pmode, new);
6978 new = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new);
6982 emit_move_insn (reg, new);
6986 else if (GET_CODE (addr) == SYMBOL_REF && SYMBOL_REF_TLS_MODEL (addr) == 0)
6990 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTPCREL);
6991 new = gen_rtx_CONST (Pmode, new);
6992 new = gen_const_mem (Pmode, new);
6993 set_mem_alias_set (new, ix86_GOT_alias_set ());
6996 reg = gen_reg_rtx (Pmode);
6997 /* Use directly gen_movsi, otherwise the address is loaded
6998 into register for CSE. We don't want to CSE this addresses,
6999 instead we CSE addresses from the GOT table, so skip this. */
7000 emit_insn (gen_movsi (reg, new));
7005 /* This symbol must be referenced via a load from the
7006 Global Offset Table (@GOT). */
7008 if (reload_in_progress)
7009 regs_ever_live[PIC_OFFSET_TABLE_REGNUM] = 1;
7010 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOT);
7011 new = gen_rtx_CONST (Pmode, new);
7012 new = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new);
7013 new = gen_const_mem (Pmode, new);
7014 set_mem_alias_set (new, ix86_GOT_alias_set ());
7017 reg = gen_reg_rtx (Pmode);
7018 emit_move_insn (reg, new);
7024 if (GET_CODE (addr) == CONST_INT
7025 && !x86_64_immediate_operand (addr, VOIDmode))
7029 emit_move_insn (reg, addr);
7033 new = force_reg (Pmode, addr);
7035 else if (GET_CODE (addr) == CONST)
7037 addr = XEXP (addr, 0);
7039 /* We must match stuff we generate before. Assume the only
7040 unspecs that can get here are ours. Not that we could do
7041 anything with them anyway.... */
7042 if (GET_CODE (addr) == UNSPEC
7043 || (GET_CODE (addr) == PLUS
7044 && GET_CODE (XEXP (addr, 0)) == UNSPEC))
7046 gcc_assert (GET_CODE (addr) == PLUS);
7048 if (GET_CODE (addr) == PLUS)
7050 rtx op0 = XEXP (addr, 0), op1 = XEXP (addr, 1);
7052 /* Check first to see if this is a constant offset from a @GOTOFF
7053 symbol reference. */
7054 if (local_symbolic_operand (op0, Pmode)
7055 && GET_CODE (op1) == CONST_INT)
7059 if (reload_in_progress)
7060 regs_ever_live[PIC_OFFSET_TABLE_REGNUM] = 1;
7061 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, op0),
7063 new = gen_rtx_PLUS (Pmode, new, op1);
7064 new = gen_rtx_CONST (Pmode, new);
7065 new = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new);
7069 emit_move_insn (reg, new);
7075 if (INTVAL (op1) < -16*1024*1024
7076 || INTVAL (op1) >= 16*1024*1024)
7078 if (!x86_64_immediate_operand (op1, Pmode))
7079 op1 = force_reg (Pmode, op1);
7080 new = gen_rtx_PLUS (Pmode, force_reg (Pmode, op0), op1);
7086 base = legitimize_pic_address (XEXP (addr, 0), reg);
7087 new = legitimize_pic_address (XEXP (addr, 1),
7088 base == reg ? NULL_RTX : reg);
7090 if (GET_CODE (new) == CONST_INT)
7091 new = plus_constant (base, INTVAL (new));
7094 if (GET_CODE (new) == PLUS && CONSTANT_P (XEXP (new, 1)))
7096 base = gen_rtx_PLUS (Pmode, base, XEXP (new, 0));
7097 new = XEXP (new, 1);
7099 new = gen_rtx_PLUS (Pmode, base, new);
7107 /* Load the thread pointer. If TO_REG is true, force it into a register. */
7110 get_thread_pointer (int to_reg)
7114 tp = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const0_rtx), UNSPEC_TP);
7118 reg = gen_reg_rtx (Pmode);
7119 insn = gen_rtx_SET (VOIDmode, reg, tp);
7120 insn = emit_insn (insn);
7125 /* A subroutine of legitimize_address and ix86_expand_move. FOR_MOV is
7126 false if we expect this to be used for a memory address and true if
7127 we expect to load the address into a register. */
7130 legitimize_tls_address (rtx x, enum tls_model model, int for_mov)
7132 rtx dest, base, off, pic, tp;
7137 case TLS_MODEL_GLOBAL_DYNAMIC:
7138 dest = gen_reg_rtx (Pmode);
7139 tp = TARGET_GNU2_TLS ? get_thread_pointer (1) : 0;
7141 if (TARGET_64BIT && ! TARGET_GNU2_TLS)
7143 rtx rax = gen_rtx_REG (Pmode, 0), insns;
7146 emit_call_insn (gen_tls_global_dynamic_64 (rax, x));
7147 insns = get_insns ();
7150 emit_libcall_block (insns, dest, rax, x);
7152 else if (TARGET_64BIT && TARGET_GNU2_TLS)
7153 emit_insn (gen_tls_global_dynamic_64 (dest, x));
7155 emit_insn (gen_tls_global_dynamic_32 (dest, x));
7157 if (TARGET_GNU2_TLS)
7159 dest = force_reg (Pmode, gen_rtx_PLUS (Pmode, tp, dest));
7161 set_unique_reg_note (get_last_insn (), REG_EQUIV, x);
7165 case TLS_MODEL_LOCAL_DYNAMIC:
7166 base = gen_reg_rtx (Pmode);
7167 tp = TARGET_GNU2_TLS ? get_thread_pointer (1) : 0;
7169 if (TARGET_64BIT && ! TARGET_GNU2_TLS)
7171 rtx rax = gen_rtx_REG (Pmode, 0), insns, note;
7174 emit_call_insn (gen_tls_local_dynamic_base_64 (rax));
7175 insns = get_insns ();
7178 note = gen_rtx_EXPR_LIST (VOIDmode, const0_rtx, NULL);
7179 note = gen_rtx_EXPR_LIST (VOIDmode, ix86_tls_get_addr (), note);
7180 emit_libcall_block (insns, base, rax, note);
7182 else if (TARGET_64BIT && TARGET_GNU2_TLS)
7183 emit_insn (gen_tls_local_dynamic_base_64 (base));
7185 emit_insn (gen_tls_local_dynamic_base_32 (base));
7187 if (TARGET_GNU2_TLS)
7189 rtx x = ix86_tls_module_base ();
7191 set_unique_reg_note (get_last_insn (), REG_EQUIV,
7192 gen_rtx_MINUS (Pmode, x, tp));
7195 off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x), UNSPEC_DTPOFF);
7196 off = gen_rtx_CONST (Pmode, off);
7198 dest = force_reg (Pmode, gen_rtx_PLUS (Pmode, base, off));
7200 if (TARGET_GNU2_TLS)
7202 dest = force_reg (Pmode, gen_rtx_PLUS (Pmode, dest, tp));
7204 set_unique_reg_note (get_last_insn (), REG_EQUIV, x);
7209 case TLS_MODEL_INITIAL_EXEC:
7213 type = UNSPEC_GOTNTPOFF;
7217 if (reload_in_progress)
7218 regs_ever_live[PIC_OFFSET_TABLE_REGNUM] = 1;
7219 pic = pic_offset_table_rtx;
7220 type = TARGET_ANY_GNU_TLS ? UNSPEC_GOTNTPOFF : UNSPEC_GOTTPOFF;
7222 else if (!TARGET_ANY_GNU_TLS)
7224 pic = gen_reg_rtx (Pmode);
7225 emit_insn (gen_set_got (pic));
7226 type = UNSPEC_GOTTPOFF;
7231 type = UNSPEC_INDNTPOFF;
7234 off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x), type);
7235 off = gen_rtx_CONST (Pmode, off);
7237 off = gen_rtx_PLUS (Pmode, pic, off);
7238 off = gen_const_mem (Pmode, off);
7239 set_mem_alias_set (off, ix86_GOT_alias_set ());
7241 if (TARGET_64BIT || TARGET_ANY_GNU_TLS)
7243 base = get_thread_pointer (for_mov || !TARGET_TLS_DIRECT_SEG_REFS);
7244 off = force_reg (Pmode, off);
7245 return gen_rtx_PLUS (Pmode, base, off);
7249 base = get_thread_pointer (true);
7250 dest = gen_reg_rtx (Pmode);
7251 emit_insn (gen_subsi3 (dest, base, off));
7255 case TLS_MODEL_LOCAL_EXEC:
7256 off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x),
7257 (TARGET_64BIT || TARGET_ANY_GNU_TLS)
7258 ? UNSPEC_NTPOFF : UNSPEC_TPOFF);
7259 off = gen_rtx_CONST (Pmode, off);
7261 if (TARGET_64BIT || TARGET_ANY_GNU_TLS)
7263 base = get_thread_pointer (for_mov || !TARGET_TLS_DIRECT_SEG_REFS);
7264 return gen_rtx_PLUS (Pmode, base, off);
7268 base = get_thread_pointer (true);
7269 dest = gen_reg_rtx (Pmode);
7270 emit_insn (gen_subsi3 (dest, base, off));
7281 /* Try machine-dependent ways of modifying an illegitimate address
7282 to be legitimate. If we find one, return the new, valid address.
7283 This macro is used in only one place: `memory_address' in explow.c.
7285 OLDX is the address as it was before break_out_memory_refs was called.
7286 In some cases it is useful to look at this to decide what needs to be done.
7288 MODE and WIN are passed so that this macro can use
7289 GO_IF_LEGITIMATE_ADDRESS.
7291 It is always safe for this macro to do nothing. It exists to recognize
7292 opportunities to optimize the output.
7294 For the 80386, we handle X+REG by loading X into a register R and
7295 using R+REG. R will go in a general reg and indexing will be used.
7296 However, if REG is a broken-out memory address or multiplication,
7297 nothing needs to be done because REG can certainly go in a general reg.
7299 When -fpic is used, special handling is needed for symbolic references.
7300 See comments by legitimize_pic_address in i386.c for details. */
7303 legitimize_address (rtx x, rtx oldx ATTRIBUTE_UNUSED, enum machine_mode mode)
7308 if (TARGET_DEBUG_ADDR)
7310 fprintf (stderr, "\n==========\nLEGITIMIZE_ADDRESS, mode = %s\n",
7311 GET_MODE_NAME (mode));
7315 log = GET_CODE (x) == SYMBOL_REF ? SYMBOL_REF_TLS_MODEL (x) : 0;
7317 return legitimize_tls_address (x, log, false);
7318 if (GET_CODE (x) == CONST
7319 && GET_CODE (XEXP (x, 0)) == PLUS
7320 && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF
7321 && (log = SYMBOL_REF_TLS_MODEL (XEXP (XEXP (x, 0), 0))))
7323 rtx t = legitimize_tls_address (XEXP (XEXP (x, 0), 0), log, false);
7324 return gen_rtx_PLUS (Pmode, t, XEXP (XEXP (x, 0), 1));
7327 if (flag_pic && SYMBOLIC_CONST (x))
7328 return legitimize_pic_address (x, 0);
7330 /* Canonicalize shifts by 0, 1, 2, 3 into multiply */
7331 if (GET_CODE (x) == ASHIFT
7332 && GET_CODE (XEXP (x, 1)) == CONST_INT
7333 && (unsigned HOST_WIDE_INT) INTVAL (XEXP (x, 1)) < 4)
7336 log = INTVAL (XEXP (x, 1));
7337 x = gen_rtx_MULT (Pmode, force_reg (Pmode, XEXP (x, 0)),
7338 GEN_INT (1 << log));
7341 if (GET_CODE (x) == PLUS)
7343 /* Canonicalize shifts by 0, 1, 2, 3 into multiply. */
7345 if (GET_CODE (XEXP (x, 0)) == ASHIFT
7346 && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT
7347 && (unsigned HOST_WIDE_INT) INTVAL (XEXP (XEXP (x, 0), 1)) < 4)
7350 log = INTVAL (XEXP (XEXP (x, 0), 1));
7351 XEXP (x, 0) = gen_rtx_MULT (Pmode,
7352 force_reg (Pmode, XEXP (XEXP (x, 0), 0)),
7353 GEN_INT (1 << log));
7356 if (GET_CODE (XEXP (x, 1)) == ASHIFT
7357 && GET_CODE (XEXP (XEXP (x, 1), 1)) == CONST_INT
7358 && (unsigned HOST_WIDE_INT) INTVAL (XEXP (XEXP (x, 1), 1)) < 4)
7361 log = INTVAL (XEXP (XEXP (x, 1), 1));
7362 XEXP (x, 1) = gen_rtx_MULT (Pmode,
7363 force_reg (Pmode, XEXP (XEXP (x, 1), 0)),
7364 GEN_INT (1 << log));
7367 /* Put multiply first if it isn't already. */
7368 if (GET_CODE (XEXP (x, 1)) == MULT)
7370 rtx tmp = XEXP (x, 0);
7371 XEXP (x, 0) = XEXP (x, 1);
7376 /* Canonicalize (plus (mult (reg) (const)) (plus (reg) (const)))
7377 into (plus (plus (mult (reg) (const)) (reg)) (const)). This can be
7378 created by virtual register instantiation, register elimination, and
7379 similar optimizations. */
7380 if (GET_CODE (XEXP (x, 0)) == MULT && GET_CODE (XEXP (x, 1)) == PLUS)
7383 x = gen_rtx_PLUS (Pmode,
7384 gen_rtx_PLUS (Pmode, XEXP (x, 0),
7385 XEXP (XEXP (x, 1), 0)),
7386 XEXP (XEXP (x, 1), 1));
7390 (plus (plus (mult (reg) (const)) (plus (reg) (const))) const)
7391 into (plus (plus (mult (reg) (const)) (reg)) (const)). */
7392 else if (GET_CODE (x) == PLUS && GET_CODE (XEXP (x, 0)) == PLUS
7393 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
7394 && GET_CODE (XEXP (XEXP (x, 0), 1)) == PLUS
7395 && CONSTANT_P (XEXP (x, 1)))
7398 rtx other = NULL_RTX;
7400 if (GET_CODE (XEXP (x, 1)) == CONST_INT)
7402 constant = XEXP (x, 1);
7403 other = XEXP (XEXP (XEXP (x, 0), 1), 1);
7405 else if (GET_CODE (XEXP (XEXP (XEXP (x, 0), 1), 1)) == CONST_INT)
7407 constant = XEXP (XEXP (XEXP (x, 0), 1), 1);
7408 other = XEXP (x, 1);
7416 x = gen_rtx_PLUS (Pmode,
7417 gen_rtx_PLUS (Pmode, XEXP (XEXP (x, 0), 0),
7418 XEXP (XEXP (XEXP (x, 0), 1), 0)),
7419 plus_constant (other, INTVAL (constant)));
7423 if (changed && legitimate_address_p (mode, x, FALSE))
7426 if (GET_CODE (XEXP (x, 0)) == MULT)
7429 XEXP (x, 0) = force_operand (XEXP (x, 0), 0);
7432 if (GET_CODE (XEXP (x, 1)) == MULT)
7435 XEXP (x, 1) = force_operand (XEXP (x, 1), 0);
7439 && GET_CODE (XEXP (x, 1)) == REG
7440 && GET_CODE (XEXP (x, 0)) == REG)
7443 if (flag_pic && SYMBOLIC_CONST (XEXP (x, 1)))
7446 x = legitimize_pic_address (x, 0);
7449 if (changed && legitimate_address_p (mode, x, FALSE))
7452 if (GET_CODE (XEXP (x, 0)) == REG)
7454 rtx temp = gen_reg_rtx (Pmode);
7455 rtx val = force_operand (XEXP (x, 1), temp);
7457 emit_move_insn (temp, val);
7463 else if (GET_CODE (XEXP (x, 1)) == REG)
7465 rtx temp = gen_reg_rtx (Pmode);
7466 rtx val = force_operand (XEXP (x, 0), temp);
7468 emit_move_insn (temp, val);
7478 /* Print an integer constant expression in assembler syntax. Addition
7479 and subtraction are the only arithmetic that may appear in these
7480 expressions. FILE is the stdio stream to write to, X is the rtx, and
7481 CODE is the operand print code from the output string. */
7484 output_pic_addr_const (FILE *file, rtx x, int code)
7488 switch (GET_CODE (x))
7491 gcc_assert (flag_pic);
7496 if (! TARGET_MACHO || TARGET_64BIT)
7497 output_addr_const (file, x);
7500 const char *name = XSTR (x, 0);
7502 /* Mark the decl as referenced so that cgraph will output the function. */
7503 if (SYMBOL_REF_DECL (x))
7504 mark_decl_referenced (SYMBOL_REF_DECL (x));
7507 if (MACHOPIC_INDIRECT
7508 && machopic_classify_symbol (x) == MACHOPIC_UNDEFINED_FUNCTION)
7509 name = machopic_indirection_name (x, /*stub_p=*/true);
7511 assemble_name (file, name);
7513 if (!TARGET_MACHO && code == 'P' && ! SYMBOL_REF_LOCAL_P (x))
7514 fputs ("@PLT", file);
7521 ASM_GENERATE_INTERNAL_LABEL (buf, "L", CODE_LABEL_NUMBER (x));
7522 assemble_name (asm_out_file, buf);
7526 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
7530 /* This used to output parentheses around the expression,
7531 but that does not work on the 386 (either ATT or BSD assembler). */
7532 output_pic_addr_const (file, XEXP (x, 0), code);
7536 if (GET_MODE (x) == VOIDmode)
7538 /* We can use %d if the number is <32 bits and positive. */
7539 if (CONST_DOUBLE_HIGH (x) || CONST_DOUBLE_LOW (x) < 0)
7540 fprintf (file, "0x%lx%08lx",
7541 (unsigned long) CONST_DOUBLE_HIGH (x),
7542 (unsigned long) CONST_DOUBLE_LOW (x));
7544 fprintf (file, HOST_WIDE_INT_PRINT_DEC, CONST_DOUBLE_LOW (x));
7547 /* We can't handle floating point constants;
7548 PRINT_OPERAND must handle them. */
7549 output_operand_lossage ("floating constant misused");
7553 /* Some assemblers need integer constants to appear first. */
7554 if (GET_CODE (XEXP (x, 0)) == CONST_INT)
7556 output_pic_addr_const (file, XEXP (x, 0), code);
7558 output_pic_addr_const (file, XEXP (x, 1), code);
7562 gcc_assert (GET_CODE (XEXP (x, 1)) == CONST_INT);
7563 output_pic_addr_const (file, XEXP (x, 1), code);
7565 output_pic_addr_const (file, XEXP (x, 0), code);
7571 putc (ASSEMBLER_DIALECT == ASM_INTEL ? '(' : '[', file);
7572 output_pic_addr_const (file, XEXP (x, 0), code);
7574 output_pic_addr_const (file, XEXP (x, 1), code);
7576 putc (ASSEMBLER_DIALECT == ASM_INTEL ? ')' : ']', file);
7580 gcc_assert (XVECLEN (x, 0) == 1);
7581 output_pic_addr_const (file, XVECEXP (x, 0, 0), code);
7582 switch (XINT (x, 1))
7585 fputs ("@GOT", file);
7588 fputs ("@GOTOFF", file);
7590 case UNSPEC_GOTPCREL:
7591 fputs ("@GOTPCREL(%rip)", file);
7593 case UNSPEC_GOTTPOFF:
7594 /* FIXME: This might be @TPOFF in Sun ld too. */
7595 fputs ("@GOTTPOFF", file);
7598 fputs ("@TPOFF", file);
7602 fputs ("@TPOFF", file);
7604 fputs ("@NTPOFF", file);
7607 fputs ("@DTPOFF", file);
7609 case UNSPEC_GOTNTPOFF:
7611 fputs ("@GOTTPOFF(%rip)", file);
7613 fputs ("@GOTNTPOFF", file);
7615 case UNSPEC_INDNTPOFF:
7616 fputs ("@INDNTPOFF", file);
7619 output_operand_lossage ("invalid UNSPEC as operand");
7625 output_operand_lossage ("invalid expression as operand");
7629 /* This is called from dwarf2out.c via TARGET_ASM_OUTPUT_DWARF_DTPREL.
7630 We need to emit DTP-relative relocations. */
7633 i386_output_dwarf_dtprel (FILE *file, int size, rtx x)
7635 fputs (ASM_LONG, file);
7636 output_addr_const (file, x);
7637 fputs ("@DTPOFF", file);
7643 fputs (", 0", file);
7650 /* In the name of slightly smaller debug output, and to cater to
7651 general assembler lossage, recognize PIC+GOTOFF and turn it back
7652 into a direct symbol reference.
7654 On Darwin, this is necessary to avoid a crash, because Darwin
7655 has a different PIC label for each routine but the DWARF debugging
7656 information is not associated with any particular routine, so it's
7657 necessary to remove references to the PIC label from RTL stored by
7658 the DWARF output code. */
7661 ix86_delegitimize_address (rtx orig_x)
7664 /* reg_addend is NULL or a multiple of some register. */
7665 rtx reg_addend = NULL_RTX;
7666 /* const_addend is NULL or a const_int. */
7667 rtx const_addend = NULL_RTX;
7668 /* This is the result, or NULL. */
7669 rtx result = NULL_RTX;
7671 if (GET_CODE (x) == MEM)
7676 if (GET_CODE (x) != CONST
7677 || GET_CODE (XEXP (x, 0)) != UNSPEC
7678 || XINT (XEXP (x, 0), 1) != UNSPEC_GOTPCREL
7679 || GET_CODE (orig_x) != MEM)
7681 return XVECEXP (XEXP (x, 0), 0, 0);
7684 if (GET_CODE (x) != PLUS
7685 || GET_CODE (XEXP (x, 1)) != CONST)
7688 if (GET_CODE (XEXP (x, 0)) == REG
7689 && REGNO (XEXP (x, 0)) == PIC_OFFSET_TABLE_REGNUM)
7690 /* %ebx + GOT/GOTOFF */
7692 else if (GET_CODE (XEXP (x, 0)) == PLUS)
7694 /* %ebx + %reg * scale + GOT/GOTOFF */
7695 reg_addend = XEXP (x, 0);
7696 if (GET_CODE (XEXP (reg_addend, 0)) == REG
7697 && REGNO (XEXP (reg_addend, 0)) == PIC_OFFSET_TABLE_REGNUM)
7698 reg_addend = XEXP (reg_addend, 1);
7699 else if (GET_CODE (XEXP (reg_addend, 1)) == REG
7700 && REGNO (XEXP (reg_addend, 1)) == PIC_OFFSET_TABLE_REGNUM)
7701 reg_addend = XEXP (reg_addend, 0);
7704 if (GET_CODE (reg_addend) != REG
7705 && GET_CODE (reg_addend) != MULT
7706 && GET_CODE (reg_addend) != ASHIFT)
7712 x = XEXP (XEXP (x, 1), 0);
7713 if (GET_CODE (x) == PLUS
7714 && GET_CODE (XEXP (x, 1)) == CONST_INT)
7716 const_addend = XEXP (x, 1);
7720 if (GET_CODE (x) == UNSPEC
7721 && ((XINT (x, 1) == UNSPEC_GOT && GET_CODE (orig_x) == MEM)
7722 || (XINT (x, 1) == UNSPEC_GOTOFF && GET_CODE (orig_x) != MEM)))
7723 result = XVECEXP (x, 0, 0);
7725 if (TARGET_MACHO && darwin_local_data_pic (x)
7726 && GET_CODE (orig_x) != MEM)
7727 result = XEXP (x, 0);
7733 result = gen_rtx_PLUS (Pmode, result, const_addend);
7735 result = gen_rtx_PLUS (Pmode, reg_addend, result);
7740 put_condition_code (enum rtx_code code, enum machine_mode mode, int reverse,
7745 if (mode == CCFPmode || mode == CCFPUmode)
7747 enum rtx_code second_code, bypass_code;
7748 ix86_fp_comparison_codes (code, &bypass_code, &code, &second_code);
7749 gcc_assert (bypass_code == UNKNOWN && second_code == UNKNOWN);
7750 code = ix86_fp_compare_code_to_integer (code);
7754 code = reverse_condition (code);
7765 gcc_assert (mode == CCmode || mode == CCNOmode || mode == CCGCmode);
7769 /* ??? Use "nbe" instead of "a" for fcmov lossage on some assemblers.
7770 Those same assemblers have the same but opposite lossage on cmov. */
7771 gcc_assert (mode == CCmode);
7772 suffix = fp ? "nbe" : "a";
7792 gcc_assert (mode == CCmode);
7814 gcc_assert (mode == CCmode);
7815 suffix = fp ? "nb" : "ae";
7818 gcc_assert (mode == CCmode || mode == CCGCmode || mode == CCNOmode);
7822 gcc_assert (mode == CCmode);
7826 suffix = fp ? "u" : "p";
7829 suffix = fp ? "nu" : "np";
7834 fputs (suffix, file);
7837 /* Print the name of register X to FILE based on its machine mode and number.
7838 If CODE is 'w', pretend the mode is HImode.
7839 If CODE is 'b', pretend the mode is QImode.
7840 If CODE is 'k', pretend the mode is SImode.
7841 If CODE is 'q', pretend the mode is DImode.
7842 If CODE is 'h', pretend the reg is the 'high' byte register.
7843 If CODE is 'y', print "st(0)" instead of "st", if the reg is stack op. */
7846 print_reg (rtx x, int code, FILE *file)
7848 gcc_assert (REGNO (x) != ARG_POINTER_REGNUM
7849 && REGNO (x) != FRAME_POINTER_REGNUM
7850 && REGNO (x) != FLAGS_REG
7851 && REGNO (x) != FPSR_REG);
7853 if (ASSEMBLER_DIALECT == ASM_ATT || USER_LABEL_PREFIX[0] == 0)
7856 if (code == 'w' || MMX_REG_P (x))
7858 else if (code == 'b')
7860 else if (code == 'k')
7862 else if (code == 'q')
7864 else if (code == 'y')
7866 else if (code == 'h')
7869 code = GET_MODE_SIZE (GET_MODE (x));
7871 /* Irritatingly, AMD extended registers use different naming convention
7872 from the normal registers. */
7873 if (REX_INT_REG_P (x))
7875 gcc_assert (TARGET_64BIT);
7879 error ("extended registers have no high halves");
7882 fprintf (file, "r%ib", REGNO (x) - FIRST_REX_INT_REG + 8);
7885 fprintf (file, "r%iw", REGNO (x) - FIRST_REX_INT_REG + 8);
7888 fprintf (file, "r%id", REGNO (x) - FIRST_REX_INT_REG + 8);
7891 fprintf (file, "r%i", REGNO (x) - FIRST_REX_INT_REG + 8);
7894 error ("unsupported operand size for extended register");
7902 if (STACK_TOP_P (x))
7904 fputs ("st(0)", file);
7911 if (! ANY_FP_REG_P (x))
7912 putc (code == 8 && TARGET_64BIT ? 'r' : 'e', file);
7917 fputs (hi_reg_name[REGNO (x)], file);
7920 if (REGNO (x) >= ARRAY_SIZE (qi_reg_name))
7922 fputs (qi_reg_name[REGNO (x)], file);
7925 if (REGNO (x) >= ARRAY_SIZE (qi_high_reg_name))
7927 fputs (qi_high_reg_name[REGNO (x)], file);
7934 /* Locate some local-dynamic symbol still in use by this function
7935 so that we can print its name in some tls_local_dynamic_base
7939 get_some_local_dynamic_name (void)
7943 if (cfun->machine->some_ld_name)
7944 return cfun->machine->some_ld_name;
7946 for (insn = get_insns (); insn ; insn = NEXT_INSN (insn))
7948 && for_each_rtx (&PATTERN (insn), get_some_local_dynamic_name_1, 0))
7949 return cfun->machine->some_ld_name;
7955 get_some_local_dynamic_name_1 (rtx *px, void *data ATTRIBUTE_UNUSED)
7959 if (GET_CODE (x) == SYMBOL_REF
7960 && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_DYNAMIC)
7962 cfun->machine->some_ld_name = XSTR (x, 0);
7970 L,W,B,Q,S,T -- print the opcode suffix for specified size of operand.
7971 C -- print opcode suffix for set/cmov insn.
7972 c -- like C, but print reversed condition
7973 F,f -- likewise, but for floating-point.
7974 O -- if HAVE_AS_IX86_CMOV_SUN_SYNTAX, expand to "w.", "l." or "q.",
7976 R -- print the prefix for register names.
7977 z -- print the opcode suffix for the size of the current operand.
7978 * -- print a star (in certain assembler syntax)
7979 A -- print an absolute memory reference.
7980 w -- print the operand as if it's a "word" (HImode) even if it isn't.
7981 s -- print a shift double count, followed by the assemblers argument
7983 b -- print the QImode name of the register for the indicated operand.
7984 %b0 would print %al if operands[0] is reg 0.
7985 w -- likewise, print the HImode name of the register.
7986 k -- likewise, print the SImode name of the register.
7987 q -- likewise, print the DImode name of the register.
7988 h -- print the QImode name for a "high" register, either ah, bh, ch or dh.
7989 y -- print "st(0)" instead of "st" as a register.
7990 D -- print condition for SSE cmp instruction.
7991 P -- if PIC, print an @PLT suffix.
7992 X -- don't print any sort of PIC '@' suffix for a symbol.
7993 & -- print some in-use local-dynamic symbol name.
7994 H -- print a memory address offset by 8; used for sse high-parts
7998 print_operand (FILE *file, rtx x, int code)
8005 if (ASSEMBLER_DIALECT == ASM_ATT)
8010 assemble_name (file, get_some_local_dynamic_name ());
8014 switch (ASSEMBLER_DIALECT)
8021 /* Intel syntax. For absolute addresses, registers should not
8022 be surrounded by braces. */
8023 if (GET_CODE (x) != REG)
8026 PRINT_OPERAND (file, x, 0);
8036 PRINT_OPERAND (file, x, 0);
8041 if (ASSEMBLER_DIALECT == ASM_ATT)
8046 if (ASSEMBLER_DIALECT == ASM_ATT)
8051 if (ASSEMBLER_DIALECT == ASM_ATT)
8056 if (ASSEMBLER_DIALECT == ASM_ATT)
8061 if (ASSEMBLER_DIALECT == ASM_ATT)
8066 if (ASSEMBLER_DIALECT == ASM_ATT)
8071 /* 387 opcodes don't get size suffixes if the operands are
8073 if (STACK_REG_P (x))
8076 /* Likewise if using Intel opcodes. */
8077 if (ASSEMBLER_DIALECT == ASM_INTEL)
8080 /* This is the size of op from size of operand. */
8081 switch (GET_MODE_SIZE (GET_MODE (x)))
8084 #ifdef HAVE_GAS_FILDS_FISTS
8090 if (GET_MODE (x) == SFmode)
8105 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT)
8107 #ifdef GAS_MNEMONICS
8133 if (GET_CODE (x) == CONST_INT || ! SHIFT_DOUBLE_OMITS_COUNT)
8135 PRINT_OPERAND (file, x, 0);
8141 /* Little bit of braindamage here. The SSE compare instructions
8142 does use completely different names for the comparisons that the
8143 fp conditional moves. */
8144 switch (GET_CODE (x))
8159 fputs ("unord", file);
8163 fputs ("neq", file);
8167 fputs ("nlt", file);
8171 fputs ("nle", file);
8174 fputs ("ord", file);
8181 #ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
8182 if (ASSEMBLER_DIALECT == ASM_ATT)
8184 switch (GET_MODE (x))
8186 case HImode: putc ('w', file); break;
8188 case SFmode: putc ('l', file); break;
8190 case DFmode: putc ('q', file); break;
8191 default: gcc_unreachable ();
8198 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 0, 0, file);
8201 #ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
8202 if (ASSEMBLER_DIALECT == ASM_ATT)
8205 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 0, 1, file);
8208 /* Like above, but reverse condition */
8210 /* Check to see if argument to %c is really a constant
8211 and not a condition code which needs to be reversed. */
8212 if (!COMPARISON_P (x))
8214 output_operand_lossage ("operand is neither a constant nor a condition code, invalid operand code 'c'");
8217 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 1, 0, file);
8220 #ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
8221 if (ASSEMBLER_DIALECT == ASM_ATT)
8224 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 1, 1, file);
8228 /* It doesn't actually matter what mode we use here, as we're
8229 only going to use this for printing. */
8230 x = adjust_address_nv (x, DImode, 8);
8237 if (!optimize || optimize_size || !TARGET_BRANCH_PREDICTION_HINTS)
8240 x = find_reg_note (current_output_insn, REG_BR_PROB, 0);
8243 int pred_val = INTVAL (XEXP (x, 0));
8245 if (pred_val < REG_BR_PROB_BASE * 45 / 100
8246 || pred_val > REG_BR_PROB_BASE * 55 / 100)
8248 int taken = pred_val > REG_BR_PROB_BASE / 2;
8249 int cputaken = final_forward_branch_p (current_output_insn) == 0;
8251 /* Emit hints only in the case default branch prediction
8252 heuristics would fail. */
8253 if (taken != cputaken)
8255 /* We use 3e (DS) prefix for taken branches and
8256 2e (CS) prefix for not taken branches. */
8258 fputs ("ds ; ", file);
8260 fputs ("cs ; ", file);
8267 output_operand_lossage ("invalid operand code '%c'", code);
8271 if (GET_CODE (x) == REG)
8272 print_reg (x, code, file);
8274 else if (GET_CODE (x) == MEM)
8276 /* No `byte ptr' prefix for call instructions. */
8277 if (ASSEMBLER_DIALECT == ASM_INTEL && code != 'X' && code != 'P')
8280 switch (GET_MODE_SIZE (GET_MODE (x)))
8282 case 1: size = "BYTE"; break;
8283 case 2: size = "WORD"; break;
8284 case 4: size = "DWORD"; break;
8285 case 8: size = "QWORD"; break;
8286 case 12: size = "XWORD"; break;
8287 case 16: size = "XMMWORD"; break;
8292 /* Check for explicit size override (codes 'b', 'w' and 'k') */
8295 else if (code == 'w')
8297 else if (code == 'k')
8301 fputs (" PTR ", file);
8305 /* Avoid (%rip) for call operands. */
8306 if (CONSTANT_ADDRESS_P (x) && code == 'P'
8307 && GET_CODE (x) != CONST_INT)
8308 output_addr_const (file, x);
8309 else if (this_is_asm_operands && ! address_operand (x, VOIDmode))
8310 output_operand_lossage ("invalid constraints for operand");
8315 else if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) == SFmode)
8320 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
8321 REAL_VALUE_TO_TARGET_SINGLE (r, l);
8323 if (ASSEMBLER_DIALECT == ASM_ATT)
8325 fprintf (file, "0x%08lx", l);
8328 /* These float cases don't actually occur as immediate operands. */
8329 else if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) == DFmode)
8333 real_to_decimal (dstr, CONST_DOUBLE_REAL_VALUE (x), sizeof (dstr), 0, 1);
8334 fprintf (file, "%s", dstr);
8337 else if (GET_CODE (x) == CONST_DOUBLE
8338 && GET_MODE (x) == XFmode)
8342 real_to_decimal (dstr, CONST_DOUBLE_REAL_VALUE (x), sizeof (dstr), 0, 1);
8343 fprintf (file, "%s", dstr);
8348 /* We have patterns that allow zero sets of memory, for instance.
8349 In 64-bit mode, we should probably support all 8-byte vectors,
8350 since we can in fact encode that into an immediate. */
8351 if (GET_CODE (x) == CONST_VECTOR)
8353 gcc_assert (x == CONST0_RTX (GET_MODE (x)));
8359 if (GET_CODE (x) == CONST_INT || GET_CODE (x) == CONST_DOUBLE)
8361 if (ASSEMBLER_DIALECT == ASM_ATT)
8364 else if (GET_CODE (x) == CONST || GET_CODE (x) == SYMBOL_REF
8365 || GET_CODE (x) == LABEL_REF)
8367 if (ASSEMBLER_DIALECT == ASM_ATT)
8370 fputs ("OFFSET FLAT:", file);
8373 if (GET_CODE (x) == CONST_INT)
8374 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
8376 output_pic_addr_const (file, x, code);
8378 output_addr_const (file, x);
8382 /* Print a memory operand whose address is ADDR. */
8385 print_operand_address (FILE *file, rtx addr)
8387 struct ix86_address parts;
8388 rtx base, index, disp;
8390 int ok = ix86_decompose_address (addr, &parts);
8395 index = parts.index;
8397 scale = parts.scale;
8405 if (USER_LABEL_PREFIX[0] == 0)
8407 fputs ((parts.seg == SEG_FS ? "fs:" : "gs:"), file);
8413 if (!base && !index)
8415 /* Displacement only requires special attention. */
8417 if (GET_CODE (disp) == CONST_INT)
8419 if (ASSEMBLER_DIALECT == ASM_INTEL && parts.seg == SEG_DEFAULT)
8421 if (USER_LABEL_PREFIX[0] == 0)
8423 fputs ("ds:", file);
8425 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (disp));
8428 output_pic_addr_const (file, disp, 0);
8430 output_addr_const (file, disp);
8432 /* Use one byte shorter RIP relative addressing for 64bit mode. */
8435 if (GET_CODE (disp) == CONST
8436 && GET_CODE (XEXP (disp, 0)) == PLUS
8437 && GET_CODE (XEXP (XEXP (disp, 0), 1)) == CONST_INT)
8438 disp = XEXP (XEXP (disp, 0), 0);
8439 if (GET_CODE (disp) == LABEL_REF
8440 || (GET_CODE (disp) == SYMBOL_REF
8441 && SYMBOL_REF_TLS_MODEL (disp) == 0))
8442 fputs ("(%rip)", file);
8447 if (ASSEMBLER_DIALECT == ASM_ATT)
8452 output_pic_addr_const (file, disp, 0);
8453 else if (GET_CODE (disp) == LABEL_REF)
8454 output_asm_label (disp);
8456 output_addr_const (file, disp);
8461 print_reg (base, 0, file);
8465 print_reg (index, 0, file);
8467 fprintf (file, ",%d", scale);
8473 rtx offset = NULL_RTX;
8477 /* Pull out the offset of a symbol; print any symbol itself. */
8478 if (GET_CODE (disp) == CONST
8479 && GET_CODE (XEXP (disp, 0)) == PLUS
8480 && GET_CODE (XEXP (XEXP (disp, 0), 1)) == CONST_INT)
8482 offset = XEXP (XEXP (disp, 0), 1);
8483 disp = gen_rtx_CONST (VOIDmode,
8484 XEXP (XEXP (disp, 0), 0));
8488 output_pic_addr_const (file, disp, 0);
8489 else if (GET_CODE (disp) == LABEL_REF)
8490 output_asm_label (disp);
8491 else if (GET_CODE (disp) == CONST_INT)
8494 output_addr_const (file, disp);
8500 print_reg (base, 0, file);
8503 if (INTVAL (offset) >= 0)
8505 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (offset));
8509 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (offset));
8516 print_reg (index, 0, file);
8518 fprintf (file, "*%d", scale);
8526 output_addr_const_extra (FILE *file, rtx x)
8530 if (GET_CODE (x) != UNSPEC)
8533 op = XVECEXP (x, 0, 0);
8534 switch (XINT (x, 1))
8536 case UNSPEC_GOTTPOFF:
8537 output_addr_const (file, op);
8538 /* FIXME: This might be @TPOFF in Sun ld. */
8539 fputs ("@GOTTPOFF", file);
8542 output_addr_const (file, op);
8543 fputs ("@TPOFF", file);
8546 output_addr_const (file, op);
8548 fputs ("@TPOFF", file);
8550 fputs ("@NTPOFF", file);
8553 output_addr_const (file, op);
8554 fputs ("@DTPOFF", file);
8556 case UNSPEC_GOTNTPOFF:
8557 output_addr_const (file, op);
8559 fputs ("@GOTTPOFF(%rip)", file);
8561 fputs ("@GOTNTPOFF", file);
8563 case UNSPEC_INDNTPOFF:
8564 output_addr_const (file, op);
8565 fputs ("@INDNTPOFF", file);
8575 /* Split one or more DImode RTL references into pairs of SImode
8576 references. The RTL can be REG, offsettable MEM, integer constant, or
8577 CONST_DOUBLE. "operands" is a pointer to an array of DImode RTL to
8578 split and "num" is its length. lo_half and hi_half are output arrays
8579 that parallel "operands". */
8582 split_di (rtx operands[], int num, rtx lo_half[], rtx hi_half[])
8586 rtx op = operands[num];
8588 /* simplify_subreg refuse to split volatile memory addresses,
8589 but we still have to handle it. */
8590 if (GET_CODE (op) == MEM)
8592 lo_half[num] = adjust_address (op, SImode, 0);
8593 hi_half[num] = adjust_address (op, SImode, 4);
8597 lo_half[num] = simplify_gen_subreg (SImode, op,
8598 GET_MODE (op) == VOIDmode
8599 ? DImode : GET_MODE (op), 0);
8600 hi_half[num] = simplify_gen_subreg (SImode, op,
8601 GET_MODE (op) == VOIDmode
8602 ? DImode : GET_MODE (op), 4);
8606 /* Split one or more TImode RTL references into pairs of DImode
8607 references. The RTL can be REG, offsettable MEM, integer constant, or
8608 CONST_DOUBLE. "operands" is a pointer to an array of DImode RTL to
8609 split and "num" is its length. lo_half and hi_half are output arrays
8610 that parallel "operands". */
8613 split_ti (rtx operands[], int num, rtx lo_half[], rtx hi_half[])
8617 rtx op = operands[num];
8619 /* simplify_subreg refuse to split volatile memory addresses, but we
8620 still have to handle it. */
8621 if (GET_CODE (op) == MEM)
8623 lo_half[num] = adjust_address (op, DImode, 0);
8624 hi_half[num] = adjust_address (op, DImode, 8);
8628 lo_half[num] = simplify_gen_subreg (DImode, op, TImode, 0);
8629 hi_half[num] = simplify_gen_subreg (DImode, op, TImode, 8);
8634 /* Output code to perform a 387 binary operation in INSN, one of PLUS,
8635 MINUS, MULT or DIV. OPERANDS are the insn operands, where operands[3]
8636 is the expression of the binary operation. The output may either be
8637 emitted here, or returned to the caller, like all output_* functions.
8639 There is no guarantee that the operands are the same mode, as they
8640 might be within FLOAT or FLOAT_EXTEND expressions. */
8642 #ifndef SYSV386_COMPAT
8643 /* Set to 1 for compatibility with brain-damaged assemblers. No-one
8644 wants to fix the assemblers because that causes incompatibility
8645 with gcc. No-one wants to fix gcc because that causes
8646 incompatibility with assemblers... You can use the option of
8647 -DSYSV386_COMPAT=0 if you recompile both gcc and gas this way. */
8648 #define SYSV386_COMPAT 1
8652 output_387_binary_op (rtx insn, rtx *operands)
8654 static char buf[30];
8657 int is_sse = SSE_REG_P (operands[0]) || SSE_REG_P (operands[1]) || SSE_REG_P (operands[2]);
8659 #ifdef ENABLE_CHECKING
8660 /* Even if we do not want to check the inputs, this documents input
8661 constraints. Which helps in understanding the following code. */
8662 if (STACK_REG_P (operands[0])
8663 && ((REG_P (operands[1])
8664 && REGNO (operands[0]) == REGNO (operands[1])
8665 && (STACK_REG_P (operands[2]) || GET_CODE (operands[2]) == MEM))
8666 || (REG_P (operands[2])
8667 && REGNO (operands[0]) == REGNO (operands[2])
8668 && (STACK_REG_P (operands[1]) || GET_CODE (operands[1]) == MEM)))
8669 && (STACK_TOP_P (operands[1]) || STACK_TOP_P (operands[2])))
8672 gcc_assert (is_sse);
8675 switch (GET_CODE (operands[3]))
8678 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
8679 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
8687 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
8688 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
8696 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
8697 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
8705 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
8706 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
8720 if (GET_MODE (operands[0]) == SFmode)
8721 strcat (buf, "ss\t{%2, %0|%0, %2}");
8723 strcat (buf, "sd\t{%2, %0|%0, %2}");
8728 switch (GET_CODE (operands[3]))
8732 if (REG_P (operands[2]) && REGNO (operands[0]) == REGNO (operands[2]))
8734 rtx temp = operands[2];
8735 operands[2] = operands[1];
8739 /* know operands[0] == operands[1]. */
8741 if (GET_CODE (operands[2]) == MEM)
8747 if (find_regno_note (insn, REG_DEAD, REGNO (operands[2])))
8749 if (STACK_TOP_P (operands[0]))
8750 /* How is it that we are storing to a dead operand[2]?
8751 Well, presumably operands[1] is dead too. We can't
8752 store the result to st(0) as st(0) gets popped on this
8753 instruction. Instead store to operands[2] (which I
8754 think has to be st(1)). st(1) will be popped later.
8755 gcc <= 2.8.1 didn't have this check and generated
8756 assembly code that the Unixware assembler rejected. */
8757 p = "p\t{%0, %2|%2, %0}"; /* st(1) = st(0) op st(1); pop */
8759 p = "p\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0); pop */
8763 if (STACK_TOP_P (operands[0]))
8764 p = "\t{%y2, %0|%0, %y2}"; /* st(0) = st(0) op st(r2) */
8766 p = "\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0) */
8771 if (GET_CODE (operands[1]) == MEM)
8777 if (GET_CODE (operands[2]) == MEM)
8783 if (find_regno_note (insn, REG_DEAD, REGNO (operands[2])))
8786 /* The SystemV/386 SVR3.2 assembler, and probably all AT&T
8787 derived assemblers, confusingly reverse the direction of
8788 the operation for fsub{r} and fdiv{r} when the
8789 destination register is not st(0). The Intel assembler
8790 doesn't have this brain damage. Read !SYSV386_COMPAT to
8791 figure out what the hardware really does. */
8792 if (STACK_TOP_P (operands[0]))
8793 p = "{p\t%0, %2|rp\t%2, %0}";
8795 p = "{rp\t%2, %0|p\t%0, %2}";
8797 if (STACK_TOP_P (operands[0]))
8798 /* As above for fmul/fadd, we can't store to st(0). */
8799 p = "rp\t{%0, %2|%2, %0}"; /* st(1) = st(0) op st(1); pop */
8801 p = "p\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0); pop */
8806 if (find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
8809 if (STACK_TOP_P (operands[0]))
8810 p = "{rp\t%0, %1|p\t%1, %0}";
8812 p = "{p\t%1, %0|rp\t%0, %1}";
8814 if (STACK_TOP_P (operands[0]))
8815 p = "p\t{%0, %1|%1, %0}"; /* st(1) = st(1) op st(0); pop */
8817 p = "rp\t{%1, %0|%0, %1}"; /* st(r2) = st(0) op st(r2); pop */
8822 if (STACK_TOP_P (operands[0]))
8824 if (STACK_TOP_P (operands[1]))
8825 p = "\t{%y2, %0|%0, %y2}"; /* st(0) = st(0) op st(r2) */
8827 p = "r\t{%y1, %0|%0, %y1}"; /* st(0) = st(r1) op st(0) */
8830 else if (STACK_TOP_P (operands[1]))
8833 p = "{\t%1, %0|r\t%0, %1}";
8835 p = "r\t{%1, %0|%0, %1}"; /* st(r2) = st(0) op st(r2) */
8841 p = "{r\t%2, %0|\t%0, %2}";
8843 p = "\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0) */
8856 /* Return needed mode for entity in optimize_mode_switching pass. */
8859 ix86_mode_needed (int entity, rtx insn)
8861 enum attr_i387_cw mode;
8863 /* The mode UNINITIALIZED is used to store control word after a
8864 function call or ASM pattern. The mode ANY specify that function
8865 has no requirements on the control word and make no changes in the
8866 bits we are interested in. */
8869 || (NONJUMP_INSN_P (insn)
8870 && (asm_noperands (PATTERN (insn)) >= 0
8871 || GET_CODE (PATTERN (insn)) == ASM_INPUT)))
8872 return I387_CW_UNINITIALIZED;
8874 if (recog_memoized (insn) < 0)
8877 mode = get_attr_i387_cw (insn);
8882 if (mode == I387_CW_TRUNC)
8887 if (mode == I387_CW_FLOOR)
8892 if (mode == I387_CW_CEIL)
8897 if (mode == I387_CW_MASK_PM)
8908 /* Output code to initialize control word copies used by trunc?f?i and
8909 rounding patterns. CURRENT_MODE is set to current control word,
8910 while NEW_MODE is set to new control word. */
8913 emit_i387_cw_initialization (int mode)
8915 rtx stored_mode = assign_386_stack_local (HImode, SLOT_CW_STORED);
8920 rtx reg = gen_reg_rtx (HImode);
8922 emit_insn (gen_x86_fnstcw_1 (stored_mode));
8923 emit_move_insn (reg, stored_mode);
8925 if (TARGET_64BIT || TARGET_PARTIAL_REG_STALL || optimize_size)
8930 /* round toward zero (truncate) */
8931 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0c00)));
8932 slot = SLOT_CW_TRUNC;
8936 /* round down toward -oo */
8937 emit_insn (gen_andhi3 (reg, reg, GEN_INT (~0x0c00)));
8938 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0400)));
8939 slot = SLOT_CW_FLOOR;
8943 /* round up toward +oo */
8944 emit_insn (gen_andhi3 (reg, reg, GEN_INT (~0x0c00)));
8945 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0800)));
8946 slot = SLOT_CW_CEIL;
8949 case I387_CW_MASK_PM:
8950 /* mask precision exception for nearbyint() */
8951 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0020)));
8952 slot = SLOT_CW_MASK_PM;
8964 /* round toward zero (truncate) */
8965 emit_insn (gen_movsi_insv_1 (reg, GEN_INT (0xc)));
8966 slot = SLOT_CW_TRUNC;
8970 /* round down toward -oo */
8971 emit_insn (gen_movsi_insv_1 (reg, GEN_INT (0x4)));
8972 slot = SLOT_CW_FLOOR;
8976 /* round up toward +oo */
8977 emit_insn (gen_movsi_insv_1 (reg, GEN_INT (0x8)));
8978 slot = SLOT_CW_CEIL;
8981 case I387_CW_MASK_PM:
8982 /* mask precision exception for nearbyint() */
8983 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0020)));
8984 slot = SLOT_CW_MASK_PM;
8992 gcc_assert (slot < MAX_386_STACK_LOCALS);
8994 new_mode = assign_386_stack_local (HImode, slot);
8995 emit_move_insn (new_mode, reg);
8998 /* Output code for INSN to convert a float to a signed int. OPERANDS
8999 are the insn operands. The output may be [HSD]Imode and the input
9000 operand may be [SDX]Fmode. */
9003 output_fix_trunc (rtx insn, rtx *operands, int fisttp)
9005 int stack_top_dies = find_regno_note (insn, REG_DEAD, FIRST_STACK_REG) != 0;
9006 int dimode_p = GET_MODE (operands[0]) == DImode;
9007 int round_mode = get_attr_i387_cw (insn);
9009 /* Jump through a hoop or two for DImode, since the hardware has no
9010 non-popping instruction. We used to do this a different way, but
9011 that was somewhat fragile and broke with post-reload splitters. */
9012 if ((dimode_p || fisttp) && !stack_top_dies)
9013 output_asm_insn ("fld\t%y1", operands);
9015 gcc_assert (STACK_TOP_P (operands[1]));
9016 gcc_assert (GET_CODE (operands[0]) == MEM);
9019 output_asm_insn ("fisttp%z0\t%0", operands);
9022 if (round_mode != I387_CW_ANY)
9023 output_asm_insn ("fldcw\t%3", operands);
9024 if (stack_top_dies || dimode_p)
9025 output_asm_insn ("fistp%z0\t%0", operands);
9027 output_asm_insn ("fist%z0\t%0", operands);
9028 if (round_mode != I387_CW_ANY)
9029 output_asm_insn ("fldcw\t%2", operands);
9035 /* Output code for x87 ffreep insn. The OPNO argument, which may only
9036 have the values zero or one, indicates the ffreep insn's operand
9037 from the OPERANDS array. */
9040 output_387_ffreep (rtx *operands ATTRIBUTE_UNUSED, int opno)
9042 if (TARGET_USE_FFREEP)
9043 #if HAVE_AS_IX86_FFREEP
9044 return opno ? "ffreep\t%y1" : "ffreep\t%y0";
9046 switch (REGNO (operands[opno]))
9048 case FIRST_STACK_REG + 0: return ".word\t0xc0df";
9049 case FIRST_STACK_REG + 1: return ".word\t0xc1df";
9050 case FIRST_STACK_REG + 2: return ".word\t0xc2df";
9051 case FIRST_STACK_REG + 3: return ".word\t0xc3df";
9052 case FIRST_STACK_REG + 4: return ".word\t0xc4df";
9053 case FIRST_STACK_REG + 5: return ".word\t0xc5df";
9054 case FIRST_STACK_REG + 6: return ".word\t0xc6df";
9055 case FIRST_STACK_REG + 7: return ".word\t0xc7df";
9059 return opno ? "fstp\t%y1" : "fstp\t%y0";
9063 /* Output code for INSN to compare OPERANDS. EFLAGS_P is 1 when fcomi
9064 should be used. UNORDERED_P is true when fucom should be used. */
9067 output_fp_compare (rtx insn, rtx *operands, int eflags_p, int unordered_p)
9070 rtx cmp_op0, cmp_op1;
9071 int is_sse = SSE_REG_P (operands[0]) || SSE_REG_P (operands[1]);
9075 cmp_op0 = operands[0];
9076 cmp_op1 = operands[1];
9080 cmp_op0 = operands[1];
9081 cmp_op1 = operands[2];
9086 if (GET_MODE (operands[0]) == SFmode)
9088 return "ucomiss\t{%1, %0|%0, %1}";
9090 return "comiss\t{%1, %0|%0, %1}";
9093 return "ucomisd\t{%1, %0|%0, %1}";
9095 return "comisd\t{%1, %0|%0, %1}";
9098 gcc_assert (STACK_TOP_P (cmp_op0));
9100 stack_top_dies = find_regno_note (insn, REG_DEAD, FIRST_STACK_REG) != 0;
9102 if (cmp_op1 == CONST0_RTX (GET_MODE (cmp_op1)))
9106 output_asm_insn ("ftst\n\tfnstsw\t%0", operands);
9107 return output_387_ffreep (operands, 1);
9110 return "ftst\n\tfnstsw\t%0";
9113 if (STACK_REG_P (cmp_op1)
9115 && find_regno_note (insn, REG_DEAD, REGNO (cmp_op1))
9116 && REGNO (cmp_op1) != FIRST_STACK_REG)
9118 /* If both the top of the 387 stack dies, and the other operand
9119 is also a stack register that dies, then this must be a
9120 `fcompp' float compare */
9124 /* There is no double popping fcomi variant. Fortunately,
9125 eflags is immune from the fstp's cc clobbering. */
9127 output_asm_insn ("fucomip\t{%y1, %0|%0, %y1}", operands);
9129 output_asm_insn ("fcomip\t{%y1, %0|%0, %y1}", operands);
9130 return output_387_ffreep (operands, 0);
9135 return "fucompp\n\tfnstsw\t%0";
9137 return "fcompp\n\tfnstsw\t%0";
9142 /* Encoded here as eflags_p | intmode | unordered_p | stack_top_dies. */
9144 static const char * const alt[16] =
9146 "fcom%z2\t%y2\n\tfnstsw\t%0",
9147 "fcomp%z2\t%y2\n\tfnstsw\t%0",
9148 "fucom%z2\t%y2\n\tfnstsw\t%0",
9149 "fucomp%z2\t%y2\n\tfnstsw\t%0",
9151 "ficom%z2\t%y2\n\tfnstsw\t%0",
9152 "ficomp%z2\t%y2\n\tfnstsw\t%0",
9156 "fcomi\t{%y1, %0|%0, %y1}",
9157 "fcomip\t{%y1, %0|%0, %y1}",
9158 "fucomi\t{%y1, %0|%0, %y1}",
9159 "fucomip\t{%y1, %0|%0, %y1}",
9170 mask = eflags_p << 3;
9171 mask |= (GET_MODE_CLASS (GET_MODE (cmp_op1)) == MODE_INT) << 2;
9172 mask |= unordered_p << 1;
9173 mask |= stack_top_dies;
9175 gcc_assert (mask < 16);
9184 ix86_output_addr_vec_elt (FILE *file, int value)
9186 const char *directive = ASM_LONG;
9190 directive = ASM_QUAD;
9192 gcc_assert (!TARGET_64BIT);
9195 fprintf (file, "%s%s%d\n", directive, LPREFIX, value);
9199 ix86_output_addr_diff_elt (FILE *file, int value, int rel)
9202 fprintf (file, "%s%s%d-%s%d\n",
9203 ASM_LONG, LPREFIX, value, LPREFIX, rel);
9204 else if (HAVE_AS_GOTOFF_IN_DATA)
9205 fprintf (file, "%s%s%d@GOTOFF\n", ASM_LONG, LPREFIX, value);
9207 else if (TARGET_MACHO)
9209 fprintf (file, "%s%s%d-", ASM_LONG, LPREFIX, value);
9210 machopic_output_function_base_name (file);
9211 fprintf(file, "\n");
9215 asm_fprintf (file, "%s%U%s+[.-%s%d]\n",
9216 ASM_LONG, GOT_SYMBOL_NAME, LPREFIX, value);
9219 /* Generate either "mov $0, reg" or "xor reg, reg", as appropriate
9223 ix86_expand_clear (rtx dest)
9227 /* We play register width games, which are only valid after reload. */
9228 gcc_assert (reload_completed);
9230 /* Avoid HImode and its attendant prefix byte. */
9231 if (GET_MODE_SIZE (GET_MODE (dest)) < 4)
9232 dest = gen_rtx_REG (SImode, REGNO (dest));
9234 tmp = gen_rtx_SET (VOIDmode, dest, const0_rtx);
9236 /* This predicate should match that for movsi_xor and movdi_xor_rex64. */
9237 if (reload_completed && (!TARGET_USE_MOV0 || optimize_size))
9239 rtx clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, 17));
9240 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, tmp, clob));
9246 /* X is an unchanging MEM. If it is a constant pool reference, return
9247 the constant pool rtx, else NULL. */
9250 maybe_get_pool_constant (rtx x)
9252 x = ix86_delegitimize_address (XEXP (x, 0));
9254 if (GET_CODE (x) == SYMBOL_REF && CONSTANT_POOL_ADDRESS_P (x))
9255 return get_pool_constant (x);
9261 ix86_expand_move (enum machine_mode mode, rtx operands[])
9263 int strict = (reload_in_progress || reload_completed);
9265 enum tls_model model;
9270 if (GET_CODE (op1) == SYMBOL_REF)
9272 model = SYMBOL_REF_TLS_MODEL (op1);
9275 op1 = legitimize_tls_address (op1, model, true);
9276 op1 = force_operand (op1, op0);
9281 else if (GET_CODE (op1) == CONST
9282 && GET_CODE (XEXP (op1, 0)) == PLUS
9283 && GET_CODE (XEXP (XEXP (op1, 0), 0)) == SYMBOL_REF)
9285 model = SYMBOL_REF_TLS_MODEL (XEXP (XEXP (op1, 0), 0));
9288 rtx addend = XEXP (XEXP (op1, 0), 1);
9289 op1 = legitimize_tls_address (XEXP (XEXP (op1, 0), 0), model, true);
9290 op1 = force_operand (op1, NULL);
9291 op1 = expand_simple_binop (Pmode, PLUS, op1, addend,
9292 op0, 1, OPTAB_DIRECT);
9298 if (flag_pic && mode == Pmode && symbolic_operand (op1, Pmode))
9300 if (TARGET_MACHO && !TARGET_64BIT)
9305 rtx temp = ((reload_in_progress
9306 || ((op0 && GET_CODE (op0) == REG)
9308 ? op0 : gen_reg_rtx (Pmode));
9309 op1 = machopic_indirect_data_reference (op1, temp);
9310 op1 = machopic_legitimize_pic_address (op1, mode,
9311 temp == op1 ? 0 : temp);
9313 else if (MACHOPIC_INDIRECT)
9314 op1 = machopic_indirect_data_reference (op1, 0);
9321 if (GET_CODE (op0) == MEM)
9322 op1 = force_reg (Pmode, op1);
9324 op1 = legitimize_address (op1, op1, Pmode);
9329 if (GET_CODE (op0) == MEM
9330 && (PUSH_ROUNDING (GET_MODE_SIZE (mode)) != GET_MODE_SIZE (mode)
9331 || !push_operand (op0, mode))
9332 && GET_CODE (op1) == MEM)
9333 op1 = force_reg (mode, op1);
9335 if (push_operand (op0, mode)
9336 && ! general_no_elim_operand (op1, mode))
9337 op1 = copy_to_mode_reg (mode, op1);
9339 /* Force large constants in 64bit compilation into register
9340 to get them CSEed. */
9341 if (TARGET_64BIT && mode == DImode
9342 && immediate_operand (op1, mode)
9343 && !x86_64_zext_immediate_operand (op1, VOIDmode)
9344 && !register_operand (op0, mode)
9345 && optimize && !reload_completed && !reload_in_progress)
9346 op1 = copy_to_mode_reg (mode, op1);
9348 if (FLOAT_MODE_P (mode))
9350 /* If we are loading a floating point constant to a register,
9351 force the value to memory now, since we'll get better code
9352 out the back end. */
9356 else if (GET_CODE (op1) == CONST_DOUBLE)
9358 op1 = validize_mem (force_const_mem (mode, op1));
9359 if (!register_operand (op0, mode))
9361 rtx temp = gen_reg_rtx (mode);
9362 emit_insn (gen_rtx_SET (VOIDmode, temp, op1));
9363 emit_move_insn (op0, temp);
9370 emit_insn (gen_rtx_SET (VOIDmode, op0, op1));
9374 ix86_expand_vector_move (enum machine_mode mode, rtx operands[])
9376 rtx op0 = operands[0], op1 = operands[1];
9378 /* Force constants other than zero into memory. We do not know how
9379 the instructions used to build constants modify the upper 64 bits
9380 of the register, once we have that information we may be able
9381 to handle some of them more efficiently. */
9382 if ((reload_in_progress | reload_completed) == 0
9383 && register_operand (op0, mode)
9385 && standard_sse_constant_p (op1) <= 0)
9386 op1 = validize_mem (force_const_mem (mode, op1));
9388 /* Make operand1 a register if it isn't already. */
9390 && !register_operand (op0, mode)
9391 && !register_operand (op1, mode))
9393 emit_move_insn (op0, force_reg (GET_MODE (op0), op1));
9397 emit_insn (gen_rtx_SET (VOIDmode, op0, op1));
9400 /* Implement the movmisalign patterns for SSE. Non-SSE modes go
9401 straight to ix86_expand_vector_move. */
9404 ix86_expand_vector_move_misalign (enum machine_mode mode, rtx operands[])
9413 /* If we're optimizing for size, movups is the smallest. */
9416 op0 = gen_lowpart (V4SFmode, op0);
9417 op1 = gen_lowpart (V4SFmode, op1);
9418 emit_insn (gen_sse_movups (op0, op1));
9422 /* ??? If we have typed data, then it would appear that using
9423 movdqu is the only way to get unaligned data loaded with
9425 if (TARGET_SSE2 && GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
9427 op0 = gen_lowpart (V16QImode, op0);
9428 op1 = gen_lowpart (V16QImode, op1);
9429 emit_insn (gen_sse2_movdqu (op0, op1));
9433 if (TARGET_SSE2 && mode == V2DFmode)
9437 if (TARGET_SSE_UNALIGNED_MOVE_OPTIMAL)
9439 op0 = gen_lowpart (V2DFmode, op0);
9440 op1 = gen_lowpart (V2DFmode, op1);
9441 emit_insn (gen_sse2_movupd (op0, op1));
9445 /* When SSE registers are split into halves, we can avoid
9446 writing to the top half twice. */
9447 if (TARGET_SSE_SPLIT_REGS)
9449 emit_insn (gen_rtx_CLOBBER (VOIDmode, op0));
9454 /* ??? Not sure about the best option for the Intel chips.
9455 The following would seem to satisfy; the register is
9456 entirely cleared, breaking the dependency chain. We
9457 then store to the upper half, with a dependency depth
9458 of one. A rumor has it that Intel recommends two movsd
9459 followed by an unpacklpd, but this is unconfirmed. And
9460 given that the dependency depth of the unpacklpd would
9461 still be one, I'm not sure why this would be better. */
9462 zero = CONST0_RTX (V2DFmode);
9465 m = adjust_address (op1, DFmode, 0);
9466 emit_insn (gen_sse2_loadlpd (op0, zero, m));
9467 m = adjust_address (op1, DFmode, 8);
9468 emit_insn (gen_sse2_loadhpd (op0, op0, m));
9472 if (TARGET_SSE_UNALIGNED_MOVE_OPTIMAL)
9474 op0 = gen_lowpart (V4SFmode, op0);
9475 op1 = gen_lowpart (V4SFmode, op1);
9476 emit_insn (gen_sse_movups (op0, op1));
9480 if (TARGET_SSE_PARTIAL_REG_DEPENDENCY)
9481 emit_move_insn (op0, CONST0_RTX (mode));
9483 emit_insn (gen_rtx_CLOBBER (VOIDmode, op0));
9485 if (mode != V4SFmode)
9486 op0 = gen_lowpart (V4SFmode, op0);
9487 m = adjust_address (op1, V2SFmode, 0);
9488 emit_insn (gen_sse_loadlps (op0, op0, m));
9489 m = adjust_address (op1, V2SFmode, 8);
9490 emit_insn (gen_sse_loadhps (op0, op0, m));
9493 else if (MEM_P (op0))
9495 /* If we're optimizing for size, movups is the smallest. */
9498 op0 = gen_lowpart (V4SFmode, op0);
9499 op1 = gen_lowpart (V4SFmode, op1);
9500 emit_insn (gen_sse_movups (op0, op1));
9504 /* ??? Similar to above, only less clear because of quote
9505 typeless stores unquote. */
9506 if (TARGET_SSE2 && !TARGET_SSE_TYPELESS_STORES
9507 && GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
9509 op0 = gen_lowpart (V16QImode, op0);
9510 op1 = gen_lowpart (V16QImode, op1);
9511 emit_insn (gen_sse2_movdqu (op0, op1));
9515 if (TARGET_SSE2 && mode == V2DFmode)
9517 m = adjust_address (op0, DFmode, 0);
9518 emit_insn (gen_sse2_storelpd (m, op1));
9519 m = adjust_address (op0, DFmode, 8);
9520 emit_insn (gen_sse2_storehpd (m, op1));
9524 if (mode != V4SFmode)
9525 op1 = gen_lowpart (V4SFmode, op1);
9526 m = adjust_address (op0, V2SFmode, 0);
9527 emit_insn (gen_sse_storelps (m, op1));
9528 m = adjust_address (op0, V2SFmode, 8);
9529 emit_insn (gen_sse_storehps (m, op1));
9536 /* Expand a push in MODE. This is some mode for which we do not support
9537 proper push instructions, at least from the registers that we expect
9538 the value to live in. */
9541 ix86_expand_push (enum machine_mode mode, rtx x)
9545 tmp = expand_simple_binop (Pmode, PLUS, stack_pointer_rtx,
9546 GEN_INT (-GET_MODE_SIZE (mode)),
9547 stack_pointer_rtx, 1, OPTAB_DIRECT);
9548 if (tmp != stack_pointer_rtx)
9549 emit_move_insn (stack_pointer_rtx, tmp);
9551 tmp = gen_rtx_MEM (mode, stack_pointer_rtx);
9552 emit_move_insn (tmp, x);
9555 /* Fix up OPERANDS to satisfy ix86_binary_operator_ok. Return the
9556 destination to use for the operation. If different from the true
9557 destination in operands[0], a copy operation will be required. */
9560 ix86_fixup_binary_operands (enum rtx_code code, enum machine_mode mode,
9563 int matching_memory;
9564 rtx src1, src2, dst;
9570 /* Recognize <var1> = <value> <op> <var1> for commutative operators */
9571 if (GET_RTX_CLASS (code) == RTX_COMM_ARITH
9572 && (rtx_equal_p (dst, src2)
9573 || immediate_operand (src1, mode)))
9580 /* If the destination is memory, and we do not have matching source
9581 operands, do things in registers. */
9582 matching_memory = 0;
9583 if (GET_CODE (dst) == MEM)
9585 if (rtx_equal_p (dst, src1))
9586 matching_memory = 1;
9587 else if (GET_RTX_CLASS (code) == RTX_COMM_ARITH
9588 && rtx_equal_p (dst, src2))
9589 matching_memory = 2;
9591 dst = gen_reg_rtx (mode);
9594 /* Both source operands cannot be in memory. */
9595 if (GET_CODE (src1) == MEM && GET_CODE (src2) == MEM)
9597 if (matching_memory != 2)
9598 src2 = force_reg (mode, src2);
9600 src1 = force_reg (mode, src1);
9603 /* If the operation is not commutable, source 1 cannot be a constant
9604 or non-matching memory. */
9605 if ((CONSTANT_P (src1)
9606 || (!matching_memory && GET_CODE (src1) == MEM))
9607 && GET_RTX_CLASS (code) != RTX_COMM_ARITH)
9608 src1 = force_reg (mode, src1);
9610 src1 = operands[1] = src1;
9611 src2 = operands[2] = src2;
9615 /* Similarly, but assume that the destination has already been
9619 ix86_fixup_binary_operands_no_copy (enum rtx_code code,
9620 enum machine_mode mode, rtx operands[])
9622 rtx dst = ix86_fixup_binary_operands (code, mode, operands);
9623 gcc_assert (dst == operands[0]);
9626 /* Attempt to expand a binary operator. Make the expansion closer to the
9627 actual machine, then just general_operand, which will allow 3 separate
9628 memory references (one output, two input) in a single insn. */
9631 ix86_expand_binary_operator (enum rtx_code code, enum machine_mode mode,
9634 rtx src1, src2, dst, op, clob;
9636 dst = ix86_fixup_binary_operands (code, mode, operands);
9640 /* Emit the instruction. */
9642 op = gen_rtx_SET (VOIDmode, dst, gen_rtx_fmt_ee (code, mode, src1, src2));
9643 if (reload_in_progress)
9645 /* Reload doesn't know about the flags register, and doesn't know that
9646 it doesn't want to clobber it. We can only do this with PLUS. */
9647 gcc_assert (code == PLUS);
9652 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
9653 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clob)));
9656 /* Fix up the destination if needed. */
9657 if (dst != operands[0])
9658 emit_move_insn (operands[0], dst);
9661 /* Return TRUE or FALSE depending on whether the binary operator meets the
9662 appropriate constraints. */
9665 ix86_binary_operator_ok (enum rtx_code code,
9666 enum machine_mode mode ATTRIBUTE_UNUSED,
9669 /* Both source operands cannot be in memory. */
9670 if (GET_CODE (operands[1]) == MEM && GET_CODE (operands[2]) == MEM)
9672 /* If the operation is not commutable, source 1 cannot be a constant. */
9673 if (CONSTANT_P (operands[1]) && GET_RTX_CLASS (code) != RTX_COMM_ARITH)
9675 /* If the destination is memory, we must have a matching source operand. */
9676 if (GET_CODE (operands[0]) == MEM
9677 && ! (rtx_equal_p (operands[0], operands[1])
9678 || (GET_RTX_CLASS (code) == RTX_COMM_ARITH
9679 && rtx_equal_p (operands[0], operands[2]))))
9681 /* If the operation is not commutable and the source 1 is memory, we must
9682 have a matching destination. */
9683 if (GET_CODE (operands[1]) == MEM
9684 && GET_RTX_CLASS (code) != RTX_COMM_ARITH
9685 && ! rtx_equal_p (operands[0], operands[1]))
9690 /* Attempt to expand a unary operator. Make the expansion closer to the
9691 actual machine, then just general_operand, which will allow 2 separate
9692 memory references (one output, one input) in a single insn. */
9695 ix86_expand_unary_operator (enum rtx_code code, enum machine_mode mode,
9698 int matching_memory;
9699 rtx src, dst, op, clob;
9704 /* If the destination is memory, and we do not have matching source
9705 operands, do things in registers. */
9706 matching_memory = 0;
9709 if (rtx_equal_p (dst, src))
9710 matching_memory = 1;
9712 dst = gen_reg_rtx (mode);
9715 /* When source operand is memory, destination must match. */
9716 if (MEM_P (src) && !matching_memory)
9717 src = force_reg (mode, src);
9719 /* Emit the instruction. */
9721 op = gen_rtx_SET (VOIDmode, dst, gen_rtx_fmt_e (code, mode, src));
9722 if (reload_in_progress || code == NOT)
9724 /* Reload doesn't know about the flags register, and doesn't know that
9725 it doesn't want to clobber it. */
9726 gcc_assert (code == NOT);
9731 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
9732 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clob)));
9735 /* Fix up the destination if needed. */
9736 if (dst != operands[0])
9737 emit_move_insn (operands[0], dst);
9740 /* Return TRUE or FALSE depending on whether the unary operator meets the
9741 appropriate constraints. */
9744 ix86_unary_operator_ok (enum rtx_code code ATTRIBUTE_UNUSED,
9745 enum machine_mode mode ATTRIBUTE_UNUSED,
9746 rtx operands[2] ATTRIBUTE_UNUSED)
9748 /* If one of operands is memory, source and destination must match. */
9749 if ((GET_CODE (operands[0]) == MEM
9750 || GET_CODE (operands[1]) == MEM)
9751 && ! rtx_equal_p (operands[0], operands[1]))
9756 /* A subroutine of ix86_expand_fp_absneg_operator and copysign expanders.
9757 Create a mask for the sign bit in MODE for an SSE register. If VECT is
9758 true, then replicate the mask for all elements of the vector register.
9759 If INVERT is true, then create a mask excluding the sign bit. */
9762 ix86_build_signbit_mask (enum machine_mode mode, bool vect, bool invert)
9764 enum machine_mode vec_mode;
9765 HOST_WIDE_INT hi, lo;
9770 /* Find the sign bit, sign extended to 2*HWI. */
9772 lo = 0x80000000, hi = lo < 0;
9773 else if (HOST_BITS_PER_WIDE_INT >= 64)
9774 lo = (HOST_WIDE_INT)1 << shift, hi = -1;
9776 lo = 0, hi = (HOST_WIDE_INT)1 << (shift - HOST_BITS_PER_WIDE_INT);
9781 /* Force this value into the low part of a fp vector constant. */
9782 mask = immed_double_const (lo, hi, mode == SFmode ? SImode : DImode);
9783 mask = gen_lowpart (mode, mask);
9788 v = gen_rtvec (4, mask, mask, mask, mask);
9790 v = gen_rtvec (4, mask, CONST0_RTX (SFmode),
9791 CONST0_RTX (SFmode), CONST0_RTX (SFmode));
9792 vec_mode = V4SFmode;
9797 v = gen_rtvec (2, mask, mask);
9799 v = gen_rtvec (2, mask, CONST0_RTX (DFmode));
9800 vec_mode = V2DFmode;
9803 return force_reg (vec_mode, gen_rtx_CONST_VECTOR (vec_mode, v));
9806 /* Generate code for floating point ABS or NEG. */
9809 ix86_expand_fp_absneg_operator (enum rtx_code code, enum machine_mode mode,
9812 rtx mask, set, use, clob, dst, src;
9813 bool matching_memory;
9814 bool use_sse = false;
9815 bool vector_mode = VECTOR_MODE_P (mode);
9816 enum machine_mode elt_mode = mode;
9820 elt_mode = GET_MODE_INNER (mode);
9823 else if (TARGET_SSE_MATH)
9824 use_sse = SSE_FLOAT_MODE_P (mode);
9826 /* NEG and ABS performed with SSE use bitwise mask operations.
9827 Create the appropriate mask now. */
9829 mask = ix86_build_signbit_mask (elt_mode, vector_mode, code == ABS);
9836 /* If the destination is memory, and we don't have matching source
9837 operands or we're using the x87, do things in registers. */
9838 matching_memory = false;
9841 if (use_sse && rtx_equal_p (dst, src))
9842 matching_memory = true;
9844 dst = gen_reg_rtx (mode);
9846 if (MEM_P (src) && !matching_memory)
9847 src = force_reg (mode, src);
9851 set = gen_rtx_fmt_ee (code == NEG ? XOR : AND, mode, src, mask);
9852 set = gen_rtx_SET (VOIDmode, dst, set);
9857 set = gen_rtx_fmt_e (code, mode, src);
9858 set = gen_rtx_SET (VOIDmode, dst, set);
9861 use = gen_rtx_USE (VOIDmode, mask);
9862 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
9863 emit_insn (gen_rtx_PARALLEL (VOIDmode,
9864 gen_rtvec (3, set, use, clob)));
9870 if (dst != operands[0])
9871 emit_move_insn (operands[0], dst);
9874 /* Expand a copysign operation. Special case operand 0 being a constant. */
9877 ix86_expand_copysign (rtx operands[])
9879 enum machine_mode mode, vmode;
9880 rtx dest, op0, op1, mask, nmask;
9886 mode = GET_MODE (dest);
9887 vmode = mode == SFmode ? V4SFmode : V2DFmode;
9889 if (GET_CODE (op0) == CONST_DOUBLE)
9893 if (real_isneg (CONST_DOUBLE_REAL_VALUE (op0)))
9894 op0 = simplify_unary_operation (ABS, mode, op0, mode);
9896 if (op0 == CONST0_RTX (mode))
9897 op0 = CONST0_RTX (vmode);
9901 v = gen_rtvec (4, op0, CONST0_RTX (SFmode),
9902 CONST0_RTX (SFmode), CONST0_RTX (SFmode));
9904 v = gen_rtvec (2, op0, CONST0_RTX (DFmode));
9905 op0 = force_reg (vmode, gen_rtx_CONST_VECTOR (vmode, v));
9908 mask = ix86_build_signbit_mask (mode, 0, 0);
9911 emit_insn (gen_copysignsf3_const (dest, op0, op1, mask));
9913 emit_insn (gen_copysigndf3_const (dest, op0, op1, mask));
9917 nmask = ix86_build_signbit_mask (mode, 0, 1);
9918 mask = ix86_build_signbit_mask (mode, 0, 0);
9921 emit_insn (gen_copysignsf3_var (dest, NULL, op0, op1, nmask, mask));
9923 emit_insn (gen_copysigndf3_var (dest, NULL, op0, op1, nmask, mask));
9927 /* Deconstruct a copysign operation into bit masks. Operand 0 is known to
9928 be a constant, and so has already been expanded into a vector constant. */
9931 ix86_split_copysign_const (rtx operands[])
9933 enum machine_mode mode, vmode;
9934 rtx dest, op0, op1, mask, x;
9941 mode = GET_MODE (dest);
9942 vmode = GET_MODE (mask);
9944 dest = simplify_gen_subreg (vmode, dest, mode, 0);
9945 x = gen_rtx_AND (vmode, dest, mask);
9946 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
9948 if (op0 != CONST0_RTX (vmode))
9950 x = gen_rtx_IOR (vmode, dest, op0);
9951 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
9955 /* Deconstruct a copysign operation into bit masks. Operand 0 is variable,
9956 so we have to do two masks. */
9959 ix86_split_copysign_var (rtx operands[])
9961 enum machine_mode mode, vmode;
9962 rtx dest, scratch, op0, op1, mask, nmask, x;
9965 scratch = operands[1];
9968 nmask = operands[4];
9971 mode = GET_MODE (dest);
9972 vmode = GET_MODE (mask);
9974 if (rtx_equal_p (op0, op1))
9976 /* Shouldn't happen often (it's useless, obviously), but when it does
9977 we'd generate incorrect code if we continue below. */
9978 emit_move_insn (dest, op0);
9982 if (REG_P (mask) && REGNO (dest) == REGNO (mask)) /* alternative 0 */
9984 gcc_assert (REGNO (op1) == REGNO (scratch));
9986 x = gen_rtx_AND (vmode, scratch, mask);
9987 emit_insn (gen_rtx_SET (VOIDmode, scratch, x));
9990 op0 = simplify_gen_subreg (vmode, op0, mode, 0);
9991 x = gen_rtx_NOT (vmode, dest);
9992 x = gen_rtx_AND (vmode, x, op0);
9993 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
9997 if (REGNO (op1) == REGNO (scratch)) /* alternative 1,3 */
9999 x = gen_rtx_AND (vmode, scratch, mask);
10001 else /* alternative 2,4 */
10003 gcc_assert (REGNO (mask) == REGNO (scratch));
10004 op1 = simplify_gen_subreg (vmode, op1, mode, 0);
10005 x = gen_rtx_AND (vmode, scratch, op1);
10007 emit_insn (gen_rtx_SET (VOIDmode, scratch, x));
10009 if (REGNO (op0) == REGNO (dest)) /* alternative 1,2 */
10011 dest = simplify_gen_subreg (vmode, op0, mode, 0);
10012 x = gen_rtx_AND (vmode, dest, nmask);
10014 else /* alternative 3,4 */
10016 gcc_assert (REGNO (nmask) == REGNO (dest));
10018 op0 = simplify_gen_subreg (vmode, op0, mode, 0);
10019 x = gen_rtx_AND (vmode, dest, op0);
10021 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
10024 x = gen_rtx_IOR (vmode, dest, scratch);
10025 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
10028 /* Return TRUE or FALSE depending on whether the first SET in INSN
10029 has source and destination with matching CC modes, and that the
10030 CC mode is at least as constrained as REQ_MODE. */
10033 ix86_match_ccmode (rtx insn, enum machine_mode req_mode)
10036 enum machine_mode set_mode;
10038 set = PATTERN (insn);
10039 if (GET_CODE (set) == PARALLEL)
10040 set = XVECEXP (set, 0, 0);
10041 gcc_assert (GET_CODE (set) == SET);
10042 gcc_assert (GET_CODE (SET_SRC (set)) == COMPARE);
10044 set_mode = GET_MODE (SET_DEST (set));
10048 if (req_mode != CCNOmode
10049 && (req_mode != CCmode
10050 || XEXP (SET_SRC (set), 1) != const0_rtx))
10054 if (req_mode == CCGCmode)
10058 if (req_mode == CCGOCmode || req_mode == CCNOmode)
10062 if (req_mode == CCZmode)
10069 gcc_unreachable ();
10072 return (GET_MODE (SET_SRC (set)) == set_mode);
10075 /* Generate insn patterns to do an integer compare of OPERANDS. */
10078 ix86_expand_int_compare (enum rtx_code code, rtx op0, rtx op1)
10080 enum machine_mode cmpmode;
10083 cmpmode = SELECT_CC_MODE (code, op0, op1);
10084 flags = gen_rtx_REG (cmpmode, FLAGS_REG);
10086 /* This is very simple, but making the interface the same as in the
10087 FP case makes the rest of the code easier. */
10088 tmp = gen_rtx_COMPARE (cmpmode, op0, op1);
10089 emit_insn (gen_rtx_SET (VOIDmode, flags, tmp));
10091 /* Return the test that should be put into the flags user, i.e.
10092 the bcc, scc, or cmov instruction. */
10093 return gen_rtx_fmt_ee (code, VOIDmode, flags, const0_rtx);
10096 /* Figure out whether to use ordered or unordered fp comparisons.
10097 Return the appropriate mode to use. */
10100 ix86_fp_compare_mode (enum rtx_code code ATTRIBUTE_UNUSED)
10102 /* ??? In order to make all comparisons reversible, we do all comparisons
10103 non-trapping when compiling for IEEE. Once gcc is able to distinguish
10104 all forms trapping and nontrapping comparisons, we can make inequality
10105 comparisons trapping again, since it results in better code when using
10106 FCOM based compares. */
10107 return TARGET_IEEE_FP ? CCFPUmode : CCFPmode;
10111 ix86_cc_mode (enum rtx_code code, rtx op0, rtx op1)
10113 if (SCALAR_FLOAT_MODE_P (GET_MODE (op0)))
10114 return ix86_fp_compare_mode (code);
10117 /* Only zero flag is needed. */
10118 case EQ: /* ZF=0 */
10119 case NE: /* ZF!=0 */
10121 /* Codes needing carry flag. */
10122 case GEU: /* CF=0 */
10123 case GTU: /* CF=0 & ZF=0 */
10124 case LTU: /* CF=1 */
10125 case LEU: /* CF=1 | ZF=1 */
10127 /* Codes possibly doable only with sign flag when
10128 comparing against zero. */
10129 case GE: /* SF=OF or SF=0 */
10130 case LT: /* SF<>OF or SF=1 */
10131 if (op1 == const0_rtx)
10134 /* For other cases Carry flag is not required. */
10136 /* Codes doable only with sign flag when comparing
10137 against zero, but we miss jump instruction for it
10138 so we need to use relational tests against overflow
10139 that thus needs to be zero. */
10140 case GT: /* ZF=0 & SF=OF */
10141 case LE: /* ZF=1 | SF<>OF */
10142 if (op1 == const0_rtx)
10146 /* strcmp pattern do (use flags) and combine may ask us for proper
10151 gcc_unreachable ();
10155 /* Return the fixed registers used for condition codes. */
10158 ix86_fixed_condition_code_regs (unsigned int *p1, unsigned int *p2)
10165 /* If two condition code modes are compatible, return a condition code
10166 mode which is compatible with both. Otherwise, return
10169 static enum machine_mode
10170 ix86_cc_modes_compatible (enum machine_mode m1, enum machine_mode m2)
10175 if (GET_MODE_CLASS (m1) != MODE_CC || GET_MODE_CLASS (m2) != MODE_CC)
10178 if ((m1 == CCGCmode && m2 == CCGOCmode)
10179 || (m1 == CCGOCmode && m2 == CCGCmode))
10185 gcc_unreachable ();
10207 /* These are only compatible with themselves, which we already
10213 /* Return true if we should use an FCOMI instruction for this fp comparison. */
10216 ix86_use_fcomi_compare (enum rtx_code code ATTRIBUTE_UNUSED)
10218 enum rtx_code swapped_code = swap_condition (code);
10219 return ((ix86_fp_comparison_cost (code) == ix86_fp_comparison_fcomi_cost (code))
10220 || (ix86_fp_comparison_cost (swapped_code)
10221 == ix86_fp_comparison_fcomi_cost (swapped_code)));
10224 /* Swap, force into registers, or otherwise massage the two operands
10225 to a fp comparison. The operands are updated in place; the new
10226 comparison code is returned. */
10228 static enum rtx_code
10229 ix86_prepare_fp_compare_args (enum rtx_code code, rtx *pop0, rtx *pop1)
10231 enum machine_mode fpcmp_mode = ix86_fp_compare_mode (code);
10232 rtx op0 = *pop0, op1 = *pop1;
10233 enum machine_mode op_mode = GET_MODE (op0);
10234 int is_sse = TARGET_SSE_MATH && SSE_FLOAT_MODE_P (op_mode);
10236 /* All of the unordered compare instructions only work on registers.
10237 The same is true of the fcomi compare instructions. The XFmode
10238 compare instructions require registers except when comparing
10239 against zero or when converting operand 1 from fixed point to
10243 && (fpcmp_mode == CCFPUmode
10244 || (op_mode == XFmode
10245 && ! (standard_80387_constant_p (op0) == 1
10246 || standard_80387_constant_p (op1) == 1)
10247 && GET_CODE (op1) != FLOAT)
10248 || ix86_use_fcomi_compare (code)))
10250 op0 = force_reg (op_mode, op0);
10251 op1 = force_reg (op_mode, op1);
10255 /* %%% We only allow op1 in memory; op0 must be st(0). So swap
10256 things around if they appear profitable, otherwise force op0
10257 into a register. */
10259 if (standard_80387_constant_p (op0) == 0
10260 || (GET_CODE (op0) == MEM
10261 && ! (standard_80387_constant_p (op1) == 0
10262 || GET_CODE (op1) == MEM)))
10265 tmp = op0, op0 = op1, op1 = tmp;
10266 code = swap_condition (code);
10269 if (GET_CODE (op0) != REG)
10270 op0 = force_reg (op_mode, op0);
10272 if (CONSTANT_P (op1))
10274 int tmp = standard_80387_constant_p (op1);
10276 op1 = validize_mem (force_const_mem (op_mode, op1));
10280 op1 = force_reg (op_mode, op1);
10283 op1 = force_reg (op_mode, op1);
10287 /* Try to rearrange the comparison to make it cheaper. */
10288 if (ix86_fp_comparison_cost (code)
10289 > ix86_fp_comparison_cost (swap_condition (code))
10290 && (GET_CODE (op1) == REG || !no_new_pseudos))
10293 tmp = op0, op0 = op1, op1 = tmp;
10294 code = swap_condition (code);
10295 if (GET_CODE (op0) != REG)
10296 op0 = force_reg (op_mode, op0);
10304 /* Convert comparison codes we use to represent FP comparison to integer
10305 code that will result in proper branch. Return UNKNOWN if no such code
10309 ix86_fp_compare_code_to_integer (enum rtx_code code)
10338 /* Split comparison code CODE into comparisons we can do using branch
10339 instructions. BYPASS_CODE is comparison code for branch that will
10340 branch around FIRST_CODE and SECOND_CODE. If some of branches
10341 is not required, set value to UNKNOWN.
10342 We never require more than two branches. */
10345 ix86_fp_comparison_codes (enum rtx_code code, enum rtx_code *bypass_code,
10346 enum rtx_code *first_code,
10347 enum rtx_code *second_code)
10349 *first_code = code;
10350 *bypass_code = UNKNOWN;
10351 *second_code = UNKNOWN;
10353 /* The fcomi comparison sets flags as follows:
10363 case GT: /* GTU - CF=0 & ZF=0 */
10364 case GE: /* GEU - CF=0 */
10365 case ORDERED: /* PF=0 */
10366 case UNORDERED: /* PF=1 */
10367 case UNEQ: /* EQ - ZF=1 */
10368 case UNLT: /* LTU - CF=1 */
10369 case UNLE: /* LEU - CF=1 | ZF=1 */
10370 case LTGT: /* EQ - ZF=0 */
10372 case LT: /* LTU - CF=1 - fails on unordered */
10373 *first_code = UNLT;
10374 *bypass_code = UNORDERED;
10376 case LE: /* LEU - CF=1 | ZF=1 - fails on unordered */
10377 *first_code = UNLE;
10378 *bypass_code = UNORDERED;
10380 case EQ: /* EQ - ZF=1 - fails on unordered */
10381 *first_code = UNEQ;
10382 *bypass_code = UNORDERED;
10384 case NE: /* NE - ZF=0 - fails on unordered */
10385 *first_code = LTGT;
10386 *second_code = UNORDERED;
10388 case UNGE: /* GEU - CF=0 - fails on unordered */
10390 *second_code = UNORDERED;
10392 case UNGT: /* GTU - CF=0 & ZF=0 - fails on unordered */
10394 *second_code = UNORDERED;
10397 gcc_unreachable ();
10399 if (!TARGET_IEEE_FP)
10401 *second_code = UNKNOWN;
10402 *bypass_code = UNKNOWN;
10406 /* Return cost of comparison done fcom + arithmetics operations on AX.
10407 All following functions do use number of instructions as a cost metrics.
10408 In future this should be tweaked to compute bytes for optimize_size and
10409 take into account performance of various instructions on various CPUs. */
10411 ix86_fp_comparison_arithmetics_cost (enum rtx_code code)
10413 if (!TARGET_IEEE_FP)
10415 /* The cost of code output by ix86_expand_fp_compare. */
10439 gcc_unreachable ();
10443 /* Return cost of comparison done using fcomi operation.
10444 See ix86_fp_comparison_arithmetics_cost for the metrics. */
10446 ix86_fp_comparison_fcomi_cost (enum rtx_code code)
10448 enum rtx_code bypass_code, first_code, second_code;
10449 /* Return arbitrarily high cost when instruction is not supported - this
10450 prevents gcc from using it. */
10453 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
10454 return (bypass_code != UNKNOWN || second_code != UNKNOWN) + 2;
10457 /* Return cost of comparison done using sahf operation.
10458 See ix86_fp_comparison_arithmetics_cost for the metrics. */
10460 ix86_fp_comparison_sahf_cost (enum rtx_code code)
10462 enum rtx_code bypass_code, first_code, second_code;
10463 /* Return arbitrarily high cost when instruction is not preferred - this
10464 avoids gcc from using it. */
10465 if (!TARGET_USE_SAHF && !optimize_size)
10467 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
10468 return (bypass_code != UNKNOWN || second_code != UNKNOWN) + 3;
10471 /* Compute cost of the comparison done using any method.
10472 See ix86_fp_comparison_arithmetics_cost for the metrics. */
10474 ix86_fp_comparison_cost (enum rtx_code code)
10476 int fcomi_cost, sahf_cost, arithmetics_cost = 1024;
10479 fcomi_cost = ix86_fp_comparison_fcomi_cost (code);
10480 sahf_cost = ix86_fp_comparison_sahf_cost (code);
10482 min = arithmetics_cost = ix86_fp_comparison_arithmetics_cost (code);
10483 if (min > sahf_cost)
10485 if (min > fcomi_cost)
10490 /* Generate insn patterns to do a floating point compare of OPERANDS. */
10493 ix86_expand_fp_compare (enum rtx_code code, rtx op0, rtx op1, rtx scratch,
10494 rtx *second_test, rtx *bypass_test)
10496 enum machine_mode fpcmp_mode, intcmp_mode;
10498 int cost = ix86_fp_comparison_cost (code);
10499 enum rtx_code bypass_code, first_code, second_code;
10501 fpcmp_mode = ix86_fp_compare_mode (code);
10502 code = ix86_prepare_fp_compare_args (code, &op0, &op1);
10505 *second_test = NULL_RTX;
10507 *bypass_test = NULL_RTX;
10509 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
10511 /* Do fcomi/sahf based test when profitable. */
10512 if ((bypass_code == UNKNOWN || bypass_test)
10513 && (second_code == UNKNOWN || second_test)
10514 && ix86_fp_comparison_arithmetics_cost (code) > cost)
10518 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
10519 tmp = gen_rtx_SET (VOIDmode, gen_rtx_REG (fpcmp_mode, FLAGS_REG),
10525 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
10526 tmp2 = gen_rtx_UNSPEC (HImode, gen_rtvec (1, tmp), UNSPEC_FNSTSW);
10528 scratch = gen_reg_rtx (HImode);
10529 emit_insn (gen_rtx_SET (VOIDmode, scratch, tmp2));
10530 emit_insn (gen_x86_sahf_1 (scratch));
10533 /* The FP codes work out to act like unsigned. */
10534 intcmp_mode = fpcmp_mode;
10536 if (bypass_code != UNKNOWN)
10537 *bypass_test = gen_rtx_fmt_ee (bypass_code, VOIDmode,
10538 gen_rtx_REG (intcmp_mode, FLAGS_REG),
10540 if (second_code != UNKNOWN)
10541 *second_test = gen_rtx_fmt_ee (second_code, VOIDmode,
10542 gen_rtx_REG (intcmp_mode, FLAGS_REG),
10547 /* Sadness wrt reg-stack pops killing fpsr -- gotta get fnstsw first. */
10548 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
10549 tmp2 = gen_rtx_UNSPEC (HImode, gen_rtvec (1, tmp), UNSPEC_FNSTSW);
10551 scratch = gen_reg_rtx (HImode);
10552 emit_insn (gen_rtx_SET (VOIDmode, scratch, tmp2));
10554 /* In the unordered case, we have to check C2 for NaN's, which
10555 doesn't happen to work out to anything nice combination-wise.
10556 So do some bit twiddling on the value we've got in AH to come
10557 up with an appropriate set of condition codes. */
10559 intcmp_mode = CCNOmode;
10564 if (code == GT || !TARGET_IEEE_FP)
10566 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x45)));
10571 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
10572 emit_insn (gen_addqi_ext_1 (scratch, scratch, constm1_rtx));
10573 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x44)));
10574 intcmp_mode = CCmode;
10580 if (code == LT && TARGET_IEEE_FP)
10582 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
10583 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x01)));
10584 intcmp_mode = CCmode;
10589 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x01)));
10595 if (code == GE || !TARGET_IEEE_FP)
10597 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x05)));
10602 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
10603 emit_insn (gen_xorqi_cc_ext_1 (scratch, scratch,
10610 if (code == LE && TARGET_IEEE_FP)
10612 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
10613 emit_insn (gen_addqi_ext_1 (scratch, scratch, constm1_rtx));
10614 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x40)));
10615 intcmp_mode = CCmode;
10620 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x45)));
10626 if (code == EQ && TARGET_IEEE_FP)
10628 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
10629 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x40)));
10630 intcmp_mode = CCmode;
10635 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x40)));
10642 if (code == NE && TARGET_IEEE_FP)
10644 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
10645 emit_insn (gen_xorqi_cc_ext_1 (scratch, scratch,
10651 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x40)));
10657 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x04)));
10661 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x04)));
10666 gcc_unreachable ();
10670 /* Return the test that should be put into the flags user, i.e.
10671 the bcc, scc, or cmov instruction. */
10672 return gen_rtx_fmt_ee (code, VOIDmode,
10673 gen_rtx_REG (intcmp_mode, FLAGS_REG),
10678 ix86_expand_compare (enum rtx_code code, rtx *second_test, rtx *bypass_test)
10681 op0 = ix86_compare_op0;
10682 op1 = ix86_compare_op1;
10685 *second_test = NULL_RTX;
10687 *bypass_test = NULL_RTX;
10689 if (ix86_compare_emitted)
10691 ret = gen_rtx_fmt_ee (code, VOIDmode, ix86_compare_emitted, const0_rtx);
10692 ix86_compare_emitted = NULL_RTX;
10694 else if (SCALAR_FLOAT_MODE_P (GET_MODE (op0)))
10695 ret = ix86_expand_fp_compare (code, op0, op1, NULL_RTX,
10696 second_test, bypass_test);
10698 ret = ix86_expand_int_compare (code, op0, op1);
10703 /* Return true if the CODE will result in nontrivial jump sequence. */
10705 ix86_fp_jump_nontrivial_p (enum rtx_code code)
10707 enum rtx_code bypass_code, first_code, second_code;
10710 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
10711 return bypass_code != UNKNOWN || second_code != UNKNOWN;
10715 ix86_expand_branch (enum rtx_code code, rtx label)
10719 /* If we have emitted a compare insn, go straight to simple.
10720 ix86_expand_compare won't emit anything if ix86_compare_emitted
10722 if (ix86_compare_emitted)
10725 switch (GET_MODE (ix86_compare_op0))
10731 tmp = ix86_expand_compare (code, NULL, NULL);
10732 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
10733 gen_rtx_LABEL_REF (VOIDmode, label),
10735 emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
10744 enum rtx_code bypass_code, first_code, second_code;
10746 code = ix86_prepare_fp_compare_args (code, &ix86_compare_op0,
10747 &ix86_compare_op1);
10749 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
10751 /* Check whether we will use the natural sequence with one jump. If
10752 so, we can expand jump early. Otherwise delay expansion by
10753 creating compound insn to not confuse optimizers. */
10754 if (bypass_code == UNKNOWN && second_code == UNKNOWN
10757 ix86_split_fp_branch (code, ix86_compare_op0, ix86_compare_op1,
10758 gen_rtx_LABEL_REF (VOIDmode, label),
10759 pc_rtx, NULL_RTX, NULL_RTX);
10763 tmp = gen_rtx_fmt_ee (code, VOIDmode,
10764 ix86_compare_op0, ix86_compare_op1);
10765 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
10766 gen_rtx_LABEL_REF (VOIDmode, label),
10768 tmp = gen_rtx_SET (VOIDmode, pc_rtx, tmp);
10770 use_fcomi = ix86_use_fcomi_compare (code);
10771 vec = rtvec_alloc (3 + !use_fcomi);
10772 RTVEC_ELT (vec, 0) = tmp;
10774 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCFPmode, 18));
10776 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCFPmode, 17));
10779 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (HImode));
10781 emit_jump_insn (gen_rtx_PARALLEL (VOIDmode, vec));
10790 /* Expand DImode branch into multiple compare+branch. */
10792 rtx lo[2], hi[2], label2;
10793 enum rtx_code code1, code2, code3;
10794 enum machine_mode submode;
10796 if (CONSTANT_P (ix86_compare_op0) && ! CONSTANT_P (ix86_compare_op1))
10798 tmp = ix86_compare_op0;
10799 ix86_compare_op0 = ix86_compare_op1;
10800 ix86_compare_op1 = tmp;
10801 code = swap_condition (code);
10803 if (GET_MODE (ix86_compare_op0) == DImode)
10805 split_di (&ix86_compare_op0, 1, lo+0, hi+0);
10806 split_di (&ix86_compare_op1, 1, lo+1, hi+1);
10811 split_ti (&ix86_compare_op0, 1, lo+0, hi+0);
10812 split_ti (&ix86_compare_op1, 1, lo+1, hi+1);
10816 /* When comparing for equality, we can use (hi0^hi1)|(lo0^lo1) to
10817 avoid two branches. This costs one extra insn, so disable when
10818 optimizing for size. */
10820 if ((code == EQ || code == NE)
10822 || hi[1] == const0_rtx || lo[1] == const0_rtx))
10827 if (hi[1] != const0_rtx)
10828 xor1 = expand_binop (submode, xor_optab, xor1, hi[1],
10829 NULL_RTX, 0, OPTAB_WIDEN);
10832 if (lo[1] != const0_rtx)
10833 xor0 = expand_binop (submode, xor_optab, xor0, lo[1],
10834 NULL_RTX, 0, OPTAB_WIDEN);
10836 tmp = expand_binop (submode, ior_optab, xor1, xor0,
10837 NULL_RTX, 0, OPTAB_WIDEN);
10839 ix86_compare_op0 = tmp;
10840 ix86_compare_op1 = const0_rtx;
10841 ix86_expand_branch (code, label);
10845 /* Otherwise, if we are doing less-than or greater-or-equal-than,
10846 op1 is a constant and the low word is zero, then we can just
10847 examine the high word. */
10849 if (GET_CODE (hi[1]) == CONST_INT && lo[1] == const0_rtx)
10852 case LT: case LTU: case GE: case GEU:
10853 ix86_compare_op0 = hi[0];
10854 ix86_compare_op1 = hi[1];
10855 ix86_expand_branch (code, label);
10861 /* Otherwise, we need two or three jumps. */
10863 label2 = gen_label_rtx ();
10866 code2 = swap_condition (code);
10867 code3 = unsigned_condition (code);
10871 case LT: case GT: case LTU: case GTU:
10874 case LE: code1 = LT; code2 = GT; break;
10875 case GE: code1 = GT; code2 = LT; break;
10876 case LEU: code1 = LTU; code2 = GTU; break;
10877 case GEU: code1 = GTU; code2 = LTU; break;
10879 case EQ: code1 = UNKNOWN; code2 = NE; break;
10880 case NE: code2 = UNKNOWN; break;
10883 gcc_unreachable ();
10888 * if (hi(a) < hi(b)) goto true;
10889 * if (hi(a) > hi(b)) goto false;
10890 * if (lo(a) < lo(b)) goto true;
10894 ix86_compare_op0 = hi[0];
10895 ix86_compare_op1 = hi[1];
10897 if (code1 != UNKNOWN)
10898 ix86_expand_branch (code1, label);
10899 if (code2 != UNKNOWN)
10900 ix86_expand_branch (code2, label2);
10902 ix86_compare_op0 = lo[0];
10903 ix86_compare_op1 = lo[1];
10904 ix86_expand_branch (code3, label);
10906 if (code2 != UNKNOWN)
10907 emit_label (label2);
10912 gcc_unreachable ();
10916 /* Split branch based on floating point condition. */
10918 ix86_split_fp_branch (enum rtx_code code, rtx op1, rtx op2,
10919 rtx target1, rtx target2, rtx tmp, rtx pushed)
10921 rtx second, bypass;
10922 rtx label = NULL_RTX;
10924 int bypass_probability = -1, second_probability = -1, probability = -1;
10927 if (target2 != pc_rtx)
10930 code = reverse_condition_maybe_unordered (code);
10935 condition = ix86_expand_fp_compare (code, op1, op2,
10936 tmp, &second, &bypass);
10938 /* Remove pushed operand from stack. */
10940 ix86_free_from_memory (GET_MODE (pushed));
10942 if (split_branch_probability >= 0)
10944 /* Distribute the probabilities across the jumps.
10945 Assume the BYPASS and SECOND to be always test
10947 probability = split_branch_probability;
10949 /* Value of 1 is low enough to make no need for probability
10950 to be updated. Later we may run some experiments and see
10951 if unordered values are more frequent in practice. */
10953 bypass_probability = 1;
10955 second_probability = 1;
10957 if (bypass != NULL_RTX)
10959 label = gen_label_rtx ();
10960 i = emit_jump_insn (gen_rtx_SET
10962 gen_rtx_IF_THEN_ELSE (VOIDmode,
10964 gen_rtx_LABEL_REF (VOIDmode,
10967 if (bypass_probability >= 0)
10969 = gen_rtx_EXPR_LIST (REG_BR_PROB,
10970 GEN_INT (bypass_probability),
10973 i = emit_jump_insn (gen_rtx_SET
10975 gen_rtx_IF_THEN_ELSE (VOIDmode,
10976 condition, target1, target2)));
10977 if (probability >= 0)
10979 = gen_rtx_EXPR_LIST (REG_BR_PROB,
10980 GEN_INT (probability),
10982 if (second != NULL_RTX)
10984 i = emit_jump_insn (gen_rtx_SET
10986 gen_rtx_IF_THEN_ELSE (VOIDmode, second, target1,
10988 if (second_probability >= 0)
10990 = gen_rtx_EXPR_LIST (REG_BR_PROB,
10991 GEN_INT (second_probability),
10994 if (label != NULL_RTX)
10995 emit_label (label);
10999 ix86_expand_setcc (enum rtx_code code, rtx dest)
11001 rtx ret, tmp, tmpreg, equiv;
11002 rtx second_test, bypass_test;
11004 if (GET_MODE (ix86_compare_op0) == (TARGET_64BIT ? TImode : DImode))
11005 return 0; /* FAIL */
11007 gcc_assert (GET_MODE (dest) == QImode);
11009 ret = ix86_expand_compare (code, &second_test, &bypass_test);
11010 PUT_MODE (ret, QImode);
11015 emit_insn (gen_rtx_SET (VOIDmode, tmp, ret));
11016 if (bypass_test || second_test)
11018 rtx test = second_test;
11020 rtx tmp2 = gen_reg_rtx (QImode);
11023 gcc_assert (!second_test);
11024 test = bypass_test;
11026 PUT_CODE (test, reverse_condition_maybe_unordered (GET_CODE (test)));
11028 PUT_MODE (test, QImode);
11029 emit_insn (gen_rtx_SET (VOIDmode, tmp2, test));
11032 emit_insn (gen_andqi3 (tmp, tmpreg, tmp2));
11034 emit_insn (gen_iorqi3 (tmp, tmpreg, tmp2));
11037 /* Attach a REG_EQUAL note describing the comparison result. */
11038 if (ix86_compare_op0 && ix86_compare_op1)
11040 equiv = simplify_gen_relational (code, QImode,
11041 GET_MODE (ix86_compare_op0),
11042 ix86_compare_op0, ix86_compare_op1);
11043 set_unique_reg_note (get_last_insn (), REG_EQUAL, equiv);
11046 return 1; /* DONE */
11049 /* Expand comparison setting or clearing carry flag. Return true when
11050 successful and set pop for the operation. */
11052 ix86_expand_carry_flag_compare (enum rtx_code code, rtx op0, rtx op1, rtx *pop)
11054 enum machine_mode mode =
11055 GET_MODE (op0) != VOIDmode ? GET_MODE (op0) : GET_MODE (op1);
11057 /* Do not handle DImode compares that go through special path. Also we can't
11058 deal with FP compares yet. This is possible to add. */
11059 if (mode == (TARGET_64BIT ? TImode : DImode))
11061 if (FLOAT_MODE_P (mode))
11063 rtx second_test = NULL, bypass_test = NULL;
11064 rtx compare_op, compare_seq;
11066 /* Shortcut: following common codes never translate into carry flag compares. */
11067 if (code == EQ || code == NE || code == UNEQ || code == LTGT
11068 || code == ORDERED || code == UNORDERED)
11071 /* These comparisons require zero flag; swap operands so they won't. */
11072 if ((code == GT || code == UNLE || code == LE || code == UNGT)
11073 && !TARGET_IEEE_FP)
11078 code = swap_condition (code);
11081 /* Try to expand the comparison and verify that we end up with carry flag
11082 based comparison. This is fails to be true only when we decide to expand
11083 comparison using arithmetic that is not too common scenario. */
11085 compare_op = ix86_expand_fp_compare (code, op0, op1, NULL_RTX,
11086 &second_test, &bypass_test);
11087 compare_seq = get_insns ();
11090 if (second_test || bypass_test)
11092 if (GET_MODE (XEXP (compare_op, 0)) == CCFPmode
11093 || GET_MODE (XEXP (compare_op, 0)) == CCFPUmode)
11094 code = ix86_fp_compare_code_to_integer (GET_CODE (compare_op));
11096 code = GET_CODE (compare_op);
11097 if (code != LTU && code != GEU)
11099 emit_insn (compare_seq);
11103 if (!INTEGRAL_MODE_P (mode))
11111 /* Convert a==0 into (unsigned)a<1. */
11114 if (op1 != const0_rtx)
11117 code = (code == EQ ? LTU : GEU);
11120 /* Convert a>b into b<a or a>=b-1. */
11123 if (GET_CODE (op1) == CONST_INT)
11125 op1 = gen_int_mode (INTVAL (op1) + 1, GET_MODE (op0));
11126 /* Bail out on overflow. We still can swap operands but that
11127 would force loading of the constant into register. */
11128 if (op1 == const0_rtx
11129 || !x86_64_immediate_operand (op1, GET_MODE (op1)))
11131 code = (code == GTU ? GEU : LTU);
11138 code = (code == GTU ? LTU : GEU);
11142 /* Convert a>=0 into (unsigned)a<0x80000000. */
11145 if (mode == DImode || op1 != const0_rtx)
11147 op1 = gen_int_mode (1 << (GET_MODE_BITSIZE (mode) - 1), mode);
11148 code = (code == LT ? GEU : LTU);
11152 if (mode == DImode || op1 != constm1_rtx)
11154 op1 = gen_int_mode (1 << (GET_MODE_BITSIZE (mode) - 1), mode);
11155 code = (code == LE ? GEU : LTU);
11161 /* Swapping operands may cause constant to appear as first operand. */
11162 if (!nonimmediate_operand (op0, VOIDmode))
11164 if (no_new_pseudos)
11166 op0 = force_reg (mode, op0);
11168 ix86_compare_op0 = op0;
11169 ix86_compare_op1 = op1;
11170 *pop = ix86_expand_compare (code, NULL, NULL);
11171 gcc_assert (GET_CODE (*pop) == LTU || GET_CODE (*pop) == GEU);
11176 ix86_expand_int_movcc (rtx operands[])
11178 enum rtx_code code = GET_CODE (operands[1]), compare_code;
11179 rtx compare_seq, compare_op;
11180 rtx second_test, bypass_test;
11181 enum machine_mode mode = GET_MODE (operands[0]);
11182 bool sign_bit_compare_p = false;;
11185 compare_op = ix86_expand_compare (code, &second_test, &bypass_test);
11186 compare_seq = get_insns ();
11189 compare_code = GET_CODE (compare_op);
11191 if ((ix86_compare_op1 == const0_rtx && (code == GE || code == LT))
11192 || (ix86_compare_op1 == constm1_rtx && (code == GT || code == LE)))
11193 sign_bit_compare_p = true;
11195 /* Don't attempt mode expansion here -- if we had to expand 5 or 6
11196 HImode insns, we'd be swallowed in word prefix ops. */
11198 if ((mode != HImode || TARGET_FAST_PREFIX)
11199 && (mode != (TARGET_64BIT ? TImode : DImode))
11200 && GET_CODE (operands[2]) == CONST_INT
11201 && GET_CODE (operands[3]) == CONST_INT)
11203 rtx out = operands[0];
11204 HOST_WIDE_INT ct = INTVAL (operands[2]);
11205 HOST_WIDE_INT cf = INTVAL (operands[3]);
11206 HOST_WIDE_INT diff;
11209 /* Sign bit compares are better done using shifts than we do by using
11211 if (sign_bit_compare_p
11212 || ix86_expand_carry_flag_compare (code, ix86_compare_op0,
11213 ix86_compare_op1, &compare_op))
11215 /* Detect overlap between destination and compare sources. */
11218 if (!sign_bit_compare_p)
11220 bool fpcmp = false;
11222 compare_code = GET_CODE (compare_op);
11224 if (GET_MODE (XEXP (compare_op, 0)) == CCFPmode
11225 || GET_MODE (XEXP (compare_op, 0)) == CCFPUmode)
11228 compare_code = ix86_fp_compare_code_to_integer (compare_code);
11231 /* To simplify rest of code, restrict to the GEU case. */
11232 if (compare_code == LTU)
11234 HOST_WIDE_INT tmp = ct;
11237 compare_code = reverse_condition (compare_code);
11238 code = reverse_condition (code);
11243 PUT_CODE (compare_op,
11244 reverse_condition_maybe_unordered
11245 (GET_CODE (compare_op)));
11247 PUT_CODE (compare_op, reverse_condition (GET_CODE (compare_op)));
11251 if (reg_overlap_mentioned_p (out, ix86_compare_op0)
11252 || reg_overlap_mentioned_p (out, ix86_compare_op1))
11253 tmp = gen_reg_rtx (mode);
11255 if (mode == DImode)
11256 emit_insn (gen_x86_movdicc_0_m1_rex64 (tmp, compare_op));
11258 emit_insn (gen_x86_movsicc_0_m1 (gen_lowpart (SImode, tmp), compare_op));
11262 if (code == GT || code == GE)
11263 code = reverse_condition (code);
11266 HOST_WIDE_INT tmp = ct;
11271 tmp = emit_store_flag (tmp, code, ix86_compare_op0,
11272 ix86_compare_op1, VOIDmode, 0, -1);
11285 tmp = expand_simple_binop (mode, PLUS,
11287 copy_rtx (tmp), 1, OPTAB_DIRECT);
11298 tmp = expand_simple_binop (mode, IOR,
11300 copy_rtx (tmp), 1, OPTAB_DIRECT);
11302 else if (diff == -1 && ct)
11312 tmp = expand_simple_unop (mode, NOT, tmp, copy_rtx (tmp), 1);
11314 tmp = expand_simple_binop (mode, PLUS,
11315 copy_rtx (tmp), GEN_INT (cf),
11316 copy_rtx (tmp), 1, OPTAB_DIRECT);
11324 * andl cf - ct, dest
11334 tmp = expand_simple_unop (mode, NOT, tmp, copy_rtx (tmp), 1);
11337 tmp = expand_simple_binop (mode, AND,
11339 gen_int_mode (cf - ct, mode),
11340 copy_rtx (tmp), 1, OPTAB_DIRECT);
11342 tmp = expand_simple_binop (mode, PLUS,
11343 copy_rtx (tmp), GEN_INT (ct),
11344 copy_rtx (tmp), 1, OPTAB_DIRECT);
11347 if (!rtx_equal_p (tmp, out))
11348 emit_move_insn (copy_rtx (out), copy_rtx (tmp));
11350 return 1; /* DONE */
11356 tmp = ct, ct = cf, cf = tmp;
11358 if (FLOAT_MODE_P (GET_MODE (ix86_compare_op0)))
11360 /* We may be reversing unordered compare to normal compare, that
11361 is not valid in general (we may convert non-trapping condition
11362 to trapping one), however on i386 we currently emit all
11363 comparisons unordered. */
11364 compare_code = reverse_condition_maybe_unordered (compare_code);
11365 code = reverse_condition_maybe_unordered (code);
11369 compare_code = reverse_condition (compare_code);
11370 code = reverse_condition (code);
11374 compare_code = UNKNOWN;
11375 if (GET_MODE_CLASS (GET_MODE (ix86_compare_op0)) == MODE_INT
11376 && GET_CODE (ix86_compare_op1) == CONST_INT)
11378 if (ix86_compare_op1 == const0_rtx
11379 && (code == LT || code == GE))
11380 compare_code = code;
11381 else if (ix86_compare_op1 == constm1_rtx)
11385 else if (code == GT)
11390 /* Optimize dest = (op0 < 0) ? -1 : cf. */
11391 if (compare_code != UNKNOWN
11392 && GET_MODE (ix86_compare_op0) == GET_MODE (out)
11393 && (cf == -1 || ct == -1))
11395 /* If lea code below could be used, only optimize
11396 if it results in a 2 insn sequence. */
11398 if (! (diff == 1 || diff == 2 || diff == 4 || diff == 8
11399 || diff == 3 || diff == 5 || diff == 9)
11400 || (compare_code == LT && ct == -1)
11401 || (compare_code == GE && cf == -1))
11404 * notl op1 (if necessary)
11412 code = reverse_condition (code);
11415 out = emit_store_flag (out, code, ix86_compare_op0,
11416 ix86_compare_op1, VOIDmode, 0, -1);
11418 out = expand_simple_binop (mode, IOR,
11420 out, 1, OPTAB_DIRECT);
11421 if (out != operands[0])
11422 emit_move_insn (operands[0], out);
11424 return 1; /* DONE */
11429 if ((diff == 1 || diff == 2 || diff == 4 || diff == 8
11430 || diff == 3 || diff == 5 || diff == 9)
11431 && ((mode != QImode && mode != HImode) || !TARGET_PARTIAL_REG_STALL)
11433 || x86_64_immediate_operand (GEN_INT (cf), VOIDmode)))
11439 * lea cf(dest*(ct-cf)),dest
11443 * This also catches the degenerate setcc-only case.
11449 out = emit_store_flag (out, code, ix86_compare_op0,
11450 ix86_compare_op1, VOIDmode, 0, 1);
11453 /* On x86_64 the lea instruction operates on Pmode, so we need
11454 to get arithmetics done in proper mode to match. */
11456 tmp = copy_rtx (out);
11460 out1 = copy_rtx (out);
11461 tmp = gen_rtx_MULT (mode, out1, GEN_INT (diff & ~1));
11465 tmp = gen_rtx_PLUS (mode, tmp, out1);
11471 tmp = gen_rtx_PLUS (mode, tmp, GEN_INT (cf));
11474 if (!rtx_equal_p (tmp, out))
11477 out = force_operand (tmp, copy_rtx (out));
11479 emit_insn (gen_rtx_SET (VOIDmode, copy_rtx (out), copy_rtx (tmp)));
11481 if (!rtx_equal_p (out, operands[0]))
11482 emit_move_insn (operands[0], copy_rtx (out));
11484 return 1; /* DONE */
11488 * General case: Jumpful:
11489 * xorl dest,dest cmpl op1, op2
11490 * cmpl op1, op2 movl ct, dest
11491 * setcc dest jcc 1f
11492 * decl dest movl cf, dest
11493 * andl (cf-ct),dest 1:
11496 * Size 20. Size 14.
11498 * This is reasonably steep, but branch mispredict costs are
11499 * high on modern cpus, so consider failing only if optimizing
11503 if ((!TARGET_CMOVE || (mode == QImode && TARGET_PARTIAL_REG_STALL))
11504 && BRANCH_COST >= 2)
11510 if (FLOAT_MODE_P (GET_MODE (ix86_compare_op0)))
11511 /* We may be reversing unordered compare to normal compare,
11512 that is not valid in general (we may convert non-trapping
11513 condition to trapping one), however on i386 we currently
11514 emit all comparisons unordered. */
11515 code = reverse_condition_maybe_unordered (code);
11518 code = reverse_condition (code);
11519 if (compare_code != UNKNOWN)
11520 compare_code = reverse_condition (compare_code);
11524 if (compare_code != UNKNOWN)
11526 /* notl op1 (if needed)
11531 For x < 0 (resp. x <= -1) there will be no notl,
11532 so if possible swap the constants to get rid of the
11534 True/false will be -1/0 while code below (store flag
11535 followed by decrement) is 0/-1, so the constants need
11536 to be exchanged once more. */
11538 if (compare_code == GE || !cf)
11540 code = reverse_condition (code);
11545 HOST_WIDE_INT tmp = cf;
11550 out = emit_store_flag (out, code, ix86_compare_op0,
11551 ix86_compare_op1, VOIDmode, 0, -1);
11555 out = emit_store_flag (out, code, ix86_compare_op0,
11556 ix86_compare_op1, VOIDmode, 0, 1);
11558 out = expand_simple_binop (mode, PLUS, copy_rtx (out), constm1_rtx,
11559 copy_rtx (out), 1, OPTAB_DIRECT);
11562 out = expand_simple_binop (mode, AND, copy_rtx (out),
11563 gen_int_mode (cf - ct, mode),
11564 copy_rtx (out), 1, OPTAB_DIRECT);
11566 out = expand_simple_binop (mode, PLUS, copy_rtx (out), GEN_INT (ct),
11567 copy_rtx (out), 1, OPTAB_DIRECT);
11568 if (!rtx_equal_p (out, operands[0]))
11569 emit_move_insn (operands[0], copy_rtx (out));
11571 return 1; /* DONE */
11575 if (!TARGET_CMOVE || (mode == QImode && TARGET_PARTIAL_REG_STALL))
11577 /* Try a few things more with specific constants and a variable. */
11580 rtx var, orig_out, out, tmp;
11582 if (BRANCH_COST <= 2)
11583 return 0; /* FAIL */
11585 /* If one of the two operands is an interesting constant, load a
11586 constant with the above and mask it in with a logical operation. */
11588 if (GET_CODE (operands[2]) == CONST_INT)
11591 if (INTVAL (operands[2]) == 0 && operands[3] != constm1_rtx)
11592 operands[3] = constm1_rtx, op = and_optab;
11593 else if (INTVAL (operands[2]) == -1 && operands[3] != const0_rtx)
11594 operands[3] = const0_rtx, op = ior_optab;
11596 return 0; /* FAIL */
11598 else if (GET_CODE (operands[3]) == CONST_INT)
11601 if (INTVAL (operands[3]) == 0 && operands[2] != constm1_rtx)
11602 operands[2] = constm1_rtx, op = and_optab;
11603 else if (INTVAL (operands[3]) == -1 && operands[3] != const0_rtx)
11604 operands[2] = const0_rtx, op = ior_optab;
11606 return 0; /* FAIL */
11609 return 0; /* FAIL */
11611 orig_out = operands[0];
11612 tmp = gen_reg_rtx (mode);
11615 /* Recurse to get the constant loaded. */
11616 if (ix86_expand_int_movcc (operands) == 0)
11617 return 0; /* FAIL */
11619 /* Mask in the interesting variable. */
11620 out = expand_binop (mode, op, var, tmp, orig_out, 0,
11622 if (!rtx_equal_p (out, orig_out))
11623 emit_move_insn (copy_rtx (orig_out), copy_rtx (out));
11625 return 1; /* DONE */
11629 * For comparison with above,
11639 if (! nonimmediate_operand (operands[2], mode))
11640 operands[2] = force_reg (mode, operands[2]);
11641 if (! nonimmediate_operand (operands[3], mode))
11642 operands[3] = force_reg (mode, operands[3]);
11644 if (bypass_test && reg_overlap_mentioned_p (operands[0], operands[3]))
11646 rtx tmp = gen_reg_rtx (mode);
11647 emit_move_insn (tmp, operands[3]);
11650 if (second_test && reg_overlap_mentioned_p (operands[0], operands[2]))
11652 rtx tmp = gen_reg_rtx (mode);
11653 emit_move_insn (tmp, operands[2]);
11657 if (! register_operand (operands[2], VOIDmode)
11659 || ! register_operand (operands[3], VOIDmode)))
11660 operands[2] = force_reg (mode, operands[2]);
11663 && ! register_operand (operands[3], VOIDmode))
11664 operands[3] = force_reg (mode, operands[3]);
11666 emit_insn (compare_seq);
11667 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
11668 gen_rtx_IF_THEN_ELSE (mode,
11669 compare_op, operands[2],
11672 emit_insn (gen_rtx_SET (VOIDmode, copy_rtx (operands[0]),
11673 gen_rtx_IF_THEN_ELSE (mode,
11675 copy_rtx (operands[3]),
11676 copy_rtx (operands[0]))));
11678 emit_insn (gen_rtx_SET (VOIDmode, copy_rtx (operands[0]),
11679 gen_rtx_IF_THEN_ELSE (mode,
11681 copy_rtx (operands[2]),
11682 copy_rtx (operands[0]))));
11684 return 1; /* DONE */
11687 /* Swap, force into registers, or otherwise massage the two operands
11688 to an sse comparison with a mask result. Thus we differ a bit from
11689 ix86_prepare_fp_compare_args which expects to produce a flags result.
11691 The DEST operand exists to help determine whether to commute commutative
11692 operators. The POP0/POP1 operands are updated in place. The new
11693 comparison code is returned, or UNKNOWN if not implementable. */
11695 static enum rtx_code
11696 ix86_prepare_sse_fp_compare_args (rtx dest, enum rtx_code code,
11697 rtx *pop0, rtx *pop1)
11705 /* We have no LTGT as an operator. We could implement it with
11706 NE & ORDERED, but this requires an extra temporary. It's
11707 not clear that it's worth it. */
11714 /* These are supported directly. */
11721 /* For commutative operators, try to canonicalize the destination
11722 operand to be first in the comparison - this helps reload to
11723 avoid extra moves. */
11724 if (!dest || !rtx_equal_p (dest, *pop1))
11732 /* These are not supported directly. Swap the comparison operands
11733 to transform into something that is supported. */
11737 code = swap_condition (code);
11741 gcc_unreachable ();
11747 /* Detect conditional moves that exactly match min/max operational
11748 semantics. Note that this is IEEE safe, as long as we don't
11749 interchange the operands.
11751 Returns FALSE if this conditional move doesn't match a MIN/MAX,
11752 and TRUE if the operation is successful and instructions are emitted. */
11755 ix86_expand_sse_fp_minmax (rtx dest, enum rtx_code code, rtx cmp_op0,
11756 rtx cmp_op1, rtx if_true, rtx if_false)
11758 enum machine_mode mode;
11764 else if (code == UNGE)
11767 if_true = if_false;
11773 if (rtx_equal_p (cmp_op0, if_true) && rtx_equal_p (cmp_op1, if_false))
11775 else if (rtx_equal_p (cmp_op1, if_true) && rtx_equal_p (cmp_op0, if_false))
11780 mode = GET_MODE (dest);
11782 /* We want to check HONOR_NANS and HONOR_SIGNED_ZEROS here,
11783 but MODE may be a vector mode and thus not appropriate. */
11784 if (!flag_finite_math_only || !flag_unsafe_math_optimizations)
11786 int u = is_min ? UNSPEC_IEEE_MIN : UNSPEC_IEEE_MAX;
11789 if_true = force_reg (mode, if_true);
11790 v = gen_rtvec (2, if_true, if_false);
11791 tmp = gen_rtx_UNSPEC (mode, v, u);
11795 code = is_min ? SMIN : SMAX;
11796 tmp = gen_rtx_fmt_ee (code, mode, if_true, if_false);
11799 emit_insn (gen_rtx_SET (VOIDmode, dest, tmp));
11803 /* Expand an sse vector comparison. Return the register with the result. */
11806 ix86_expand_sse_cmp (rtx dest, enum rtx_code code, rtx cmp_op0, rtx cmp_op1,
11807 rtx op_true, rtx op_false)
11809 enum machine_mode mode = GET_MODE (dest);
11812 cmp_op0 = force_reg (mode, cmp_op0);
11813 if (!nonimmediate_operand (cmp_op1, mode))
11814 cmp_op1 = force_reg (mode, cmp_op1);
11817 || reg_overlap_mentioned_p (dest, op_true)
11818 || reg_overlap_mentioned_p (dest, op_false))
11819 dest = gen_reg_rtx (mode);
11821 x = gen_rtx_fmt_ee (code, mode, cmp_op0, cmp_op1);
11822 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
11827 /* Expand DEST = CMP ? OP_TRUE : OP_FALSE into a sequence of logical
11828 operations. This is used for both scalar and vector conditional moves. */
11831 ix86_expand_sse_movcc (rtx dest, rtx cmp, rtx op_true, rtx op_false)
11833 enum machine_mode mode = GET_MODE (dest);
11836 if (op_false == CONST0_RTX (mode))
11838 op_true = force_reg (mode, op_true);
11839 x = gen_rtx_AND (mode, cmp, op_true);
11840 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
11842 else if (op_true == CONST0_RTX (mode))
11844 op_false = force_reg (mode, op_false);
11845 x = gen_rtx_NOT (mode, cmp);
11846 x = gen_rtx_AND (mode, x, op_false);
11847 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
11851 op_true = force_reg (mode, op_true);
11852 op_false = force_reg (mode, op_false);
11854 t2 = gen_reg_rtx (mode);
11856 t3 = gen_reg_rtx (mode);
11860 x = gen_rtx_AND (mode, op_true, cmp);
11861 emit_insn (gen_rtx_SET (VOIDmode, t2, x));
11863 x = gen_rtx_NOT (mode, cmp);
11864 x = gen_rtx_AND (mode, x, op_false);
11865 emit_insn (gen_rtx_SET (VOIDmode, t3, x));
11867 x = gen_rtx_IOR (mode, t3, t2);
11868 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
11872 /* Expand a floating-point conditional move. Return true if successful. */
11875 ix86_expand_fp_movcc (rtx operands[])
11877 enum machine_mode mode = GET_MODE (operands[0]);
11878 enum rtx_code code = GET_CODE (operands[1]);
11879 rtx tmp, compare_op, second_test, bypass_test;
11881 if (TARGET_SSE_MATH && SSE_FLOAT_MODE_P (mode))
11883 enum machine_mode cmode;
11885 /* Since we've no cmove for sse registers, don't force bad register
11886 allocation just to gain access to it. Deny movcc when the
11887 comparison mode doesn't match the move mode. */
11888 cmode = GET_MODE (ix86_compare_op0);
11889 if (cmode == VOIDmode)
11890 cmode = GET_MODE (ix86_compare_op1);
11894 code = ix86_prepare_sse_fp_compare_args (operands[0], code,
11896 &ix86_compare_op1);
11897 if (code == UNKNOWN)
11900 if (ix86_expand_sse_fp_minmax (operands[0], code, ix86_compare_op0,
11901 ix86_compare_op1, operands[2],
11905 tmp = ix86_expand_sse_cmp (operands[0], code, ix86_compare_op0,
11906 ix86_compare_op1, operands[2], operands[3]);
11907 ix86_expand_sse_movcc (operands[0], tmp, operands[2], operands[3]);
11911 /* The floating point conditional move instructions don't directly
11912 support conditions resulting from a signed integer comparison. */
11914 compare_op = ix86_expand_compare (code, &second_test, &bypass_test);
11916 /* The floating point conditional move instructions don't directly
11917 support signed integer comparisons. */
11919 if (!fcmov_comparison_operator (compare_op, VOIDmode))
11921 gcc_assert (!second_test && !bypass_test);
11922 tmp = gen_reg_rtx (QImode);
11923 ix86_expand_setcc (code, tmp);
11925 ix86_compare_op0 = tmp;
11926 ix86_compare_op1 = const0_rtx;
11927 compare_op = ix86_expand_compare (code, &second_test, &bypass_test);
11929 if (bypass_test && reg_overlap_mentioned_p (operands[0], operands[3]))
11931 tmp = gen_reg_rtx (mode);
11932 emit_move_insn (tmp, operands[3]);
11935 if (second_test && reg_overlap_mentioned_p (operands[0], operands[2]))
11937 tmp = gen_reg_rtx (mode);
11938 emit_move_insn (tmp, operands[2]);
11942 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
11943 gen_rtx_IF_THEN_ELSE (mode, compare_op,
11944 operands[2], operands[3])));
11946 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
11947 gen_rtx_IF_THEN_ELSE (mode, bypass_test,
11948 operands[3], operands[0])));
11950 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
11951 gen_rtx_IF_THEN_ELSE (mode, second_test,
11952 operands[2], operands[0])));
11957 /* Expand a floating-point vector conditional move; a vcond operation
11958 rather than a movcc operation. */
11961 ix86_expand_fp_vcond (rtx operands[])
11963 enum rtx_code code = GET_CODE (operands[3]);
11966 code = ix86_prepare_sse_fp_compare_args (operands[0], code,
11967 &operands[4], &operands[5]);
11968 if (code == UNKNOWN)
11971 if (ix86_expand_sse_fp_minmax (operands[0], code, operands[4],
11972 operands[5], operands[1], operands[2]))
11975 cmp = ix86_expand_sse_cmp (operands[0], code, operands[4], operands[5],
11976 operands[1], operands[2]);
11977 ix86_expand_sse_movcc (operands[0], cmp, operands[1], operands[2]);
11981 /* Expand a signed integral vector conditional move. */
11984 ix86_expand_int_vcond (rtx operands[])
11986 enum machine_mode mode = GET_MODE (operands[0]);
11987 enum rtx_code code = GET_CODE (operands[3]);
11988 bool negate = false;
11991 cop0 = operands[4];
11992 cop1 = operands[5];
11994 /* Canonicalize the comparison to EQ, GT, GTU. */
12005 code = reverse_condition (code);
12011 code = reverse_condition (code);
12017 code = swap_condition (code);
12018 x = cop0, cop0 = cop1, cop1 = x;
12022 gcc_unreachable ();
12025 /* Unsigned parallel compare is not supported by the hardware. Play some
12026 tricks to turn this into a signed comparison against 0. */
12029 cop0 = force_reg (mode, cop0);
12037 /* Perform a parallel modulo subtraction. */
12038 t1 = gen_reg_rtx (mode);
12039 emit_insn (gen_subv4si3 (t1, cop0, cop1));
12041 /* Extract the original sign bit of op0. */
12042 mask = GEN_INT (-0x80000000);
12043 mask = gen_rtx_CONST_VECTOR (mode,
12044 gen_rtvec (4, mask, mask, mask, mask));
12045 mask = force_reg (mode, mask);
12046 t2 = gen_reg_rtx (mode);
12047 emit_insn (gen_andv4si3 (t2, cop0, mask));
12049 /* XOR it back into the result of the subtraction. This results
12050 in the sign bit set iff we saw unsigned underflow. */
12051 x = gen_reg_rtx (mode);
12052 emit_insn (gen_xorv4si3 (x, t1, t2));
12060 /* Perform a parallel unsigned saturating subtraction. */
12061 x = gen_reg_rtx (mode);
12062 emit_insn (gen_rtx_SET (VOIDmode, x,
12063 gen_rtx_US_MINUS (mode, cop0, cop1)));
12070 gcc_unreachable ();
12074 cop1 = CONST0_RTX (mode);
12077 x = ix86_expand_sse_cmp (operands[0], code, cop0, cop1,
12078 operands[1+negate], operands[2-negate]);
12080 ix86_expand_sse_movcc (operands[0], x, operands[1+negate],
12081 operands[2-negate]);
12085 /* Expand conditional increment or decrement using adb/sbb instructions.
12086 The default case using setcc followed by the conditional move can be
12087 done by generic code. */
12089 ix86_expand_int_addcc (rtx operands[])
12091 enum rtx_code code = GET_CODE (operands[1]);
12093 rtx val = const0_rtx;
12094 bool fpcmp = false;
12095 enum machine_mode mode = GET_MODE (operands[0]);
12097 if (operands[3] != const1_rtx
12098 && operands[3] != constm1_rtx)
12100 if (!ix86_expand_carry_flag_compare (code, ix86_compare_op0,
12101 ix86_compare_op1, &compare_op))
12103 code = GET_CODE (compare_op);
12105 if (GET_MODE (XEXP (compare_op, 0)) == CCFPmode
12106 || GET_MODE (XEXP (compare_op, 0)) == CCFPUmode)
12109 code = ix86_fp_compare_code_to_integer (code);
12116 PUT_CODE (compare_op,
12117 reverse_condition_maybe_unordered
12118 (GET_CODE (compare_op)));
12120 PUT_CODE (compare_op, reverse_condition (GET_CODE (compare_op)));
12122 PUT_MODE (compare_op, mode);
12124 /* Construct either adc or sbb insn. */
12125 if ((code == LTU) == (operands[3] == constm1_rtx))
12127 switch (GET_MODE (operands[0]))
12130 emit_insn (gen_subqi3_carry (operands[0], operands[2], val, compare_op));
12133 emit_insn (gen_subhi3_carry (operands[0], operands[2], val, compare_op));
12136 emit_insn (gen_subsi3_carry (operands[0], operands[2], val, compare_op));
12139 emit_insn (gen_subdi3_carry_rex64 (operands[0], operands[2], val, compare_op));
12142 gcc_unreachable ();
12147 switch (GET_MODE (operands[0]))
12150 emit_insn (gen_addqi3_carry (operands[0], operands[2], val, compare_op));
12153 emit_insn (gen_addhi3_carry (operands[0], operands[2], val, compare_op));
12156 emit_insn (gen_addsi3_carry (operands[0], operands[2], val, compare_op));
12159 emit_insn (gen_adddi3_carry_rex64 (operands[0], operands[2], val, compare_op));
12162 gcc_unreachable ();
12165 return 1; /* DONE */
12169 /* Split operands 0 and 1 into SImode parts. Similar to split_di, but
12170 works for floating pointer parameters and nonoffsetable memories.
12171 For pushes, it returns just stack offsets; the values will be saved
12172 in the right order. Maximally three parts are generated. */
12175 ix86_split_to_parts (rtx operand, rtx *parts, enum machine_mode mode)
12180 size = mode==XFmode ? 3 : GET_MODE_SIZE (mode) / 4;
12182 size = (GET_MODE_SIZE (mode) + 4) / 8;
12184 gcc_assert (GET_CODE (operand) != REG || !MMX_REGNO_P (REGNO (operand)));
12185 gcc_assert (size >= 2 && size <= 3);
12187 /* Optimize constant pool reference to immediates. This is used by fp
12188 moves, that force all constants to memory to allow combining. */
12189 if (GET_CODE (operand) == MEM && MEM_READONLY_P (operand))
12191 rtx tmp = maybe_get_pool_constant (operand);
12196 if (GET_CODE (operand) == MEM && !offsettable_memref_p (operand))
12198 /* The only non-offsetable memories we handle are pushes. */
12199 int ok = push_operand (operand, VOIDmode);
12203 operand = copy_rtx (operand);
12204 PUT_MODE (operand, Pmode);
12205 parts[0] = parts[1] = parts[2] = operand;
12209 if (GET_CODE (operand) == CONST_VECTOR)
12211 enum machine_mode imode = int_mode_for_mode (mode);
12212 /* Caution: if we looked through a constant pool memory above,
12213 the operand may actually have a different mode now. That's
12214 ok, since we want to pun this all the way back to an integer. */
12215 operand = simplify_subreg (imode, operand, GET_MODE (operand), 0);
12216 gcc_assert (operand != NULL);
12222 if (mode == DImode)
12223 split_di (&operand, 1, &parts[0], &parts[1]);
12226 if (REG_P (operand))
12228 gcc_assert (reload_completed);
12229 parts[0] = gen_rtx_REG (SImode, REGNO (operand) + 0);
12230 parts[1] = gen_rtx_REG (SImode, REGNO (operand) + 1);
12232 parts[2] = gen_rtx_REG (SImode, REGNO (operand) + 2);
12234 else if (offsettable_memref_p (operand))
12236 operand = adjust_address (operand, SImode, 0);
12237 parts[0] = operand;
12238 parts[1] = adjust_address (operand, SImode, 4);
12240 parts[2] = adjust_address (operand, SImode, 8);
12242 else if (GET_CODE (operand) == CONST_DOUBLE)
12247 REAL_VALUE_FROM_CONST_DOUBLE (r, operand);
12251 REAL_VALUE_TO_TARGET_LONG_DOUBLE (r, l);
12252 parts[2] = gen_int_mode (l[2], SImode);
12255 REAL_VALUE_TO_TARGET_DOUBLE (r, l);
12258 gcc_unreachable ();
12260 parts[1] = gen_int_mode (l[1], SImode);
12261 parts[0] = gen_int_mode (l[0], SImode);
12264 gcc_unreachable ();
12269 if (mode == TImode)
12270 split_ti (&operand, 1, &parts[0], &parts[1]);
12271 if (mode == XFmode || mode == TFmode)
12273 enum machine_mode upper_mode = mode==XFmode ? SImode : DImode;
12274 if (REG_P (operand))
12276 gcc_assert (reload_completed);
12277 parts[0] = gen_rtx_REG (DImode, REGNO (operand) + 0);
12278 parts[1] = gen_rtx_REG (upper_mode, REGNO (operand) + 1);
12280 else if (offsettable_memref_p (operand))
12282 operand = adjust_address (operand, DImode, 0);
12283 parts[0] = operand;
12284 parts[1] = adjust_address (operand, upper_mode, 8);
12286 else if (GET_CODE (operand) == CONST_DOUBLE)
12291 REAL_VALUE_FROM_CONST_DOUBLE (r, operand);
12292 real_to_target (l, &r, mode);
12294 /* Do not use shift by 32 to avoid warning on 32bit systems. */
12295 if (HOST_BITS_PER_WIDE_INT >= 64)
12298 ((l[0] & (((HOST_WIDE_INT) 2 << 31) - 1))
12299 + ((((HOST_WIDE_INT) l[1]) << 31) << 1),
12302 parts[0] = immed_double_const (l[0], l[1], DImode);
12304 if (upper_mode == SImode)
12305 parts[1] = gen_int_mode (l[2], SImode);
12306 else if (HOST_BITS_PER_WIDE_INT >= 64)
12309 ((l[2] & (((HOST_WIDE_INT) 2 << 31) - 1))
12310 + ((((HOST_WIDE_INT) l[3]) << 31) << 1),
12313 parts[1] = immed_double_const (l[2], l[3], DImode);
12316 gcc_unreachable ();
12323 /* Emit insns to perform a move or push of DI, DF, and XF values.
12324 Return false when normal moves are needed; true when all required
12325 insns have been emitted. Operands 2-4 contain the input values
12326 int the correct order; operands 5-7 contain the output values. */
12329 ix86_split_long_move (rtx operands[])
12334 int collisions = 0;
12335 enum machine_mode mode = GET_MODE (operands[0]);
12337 /* The DFmode expanders may ask us to move double.
12338 For 64bit target this is single move. By hiding the fact
12339 here we simplify i386.md splitters. */
12340 if (GET_MODE_SIZE (GET_MODE (operands[0])) == 8 && TARGET_64BIT)
12342 /* Optimize constant pool reference to immediates. This is used by
12343 fp moves, that force all constants to memory to allow combining. */
12345 if (GET_CODE (operands[1]) == MEM
12346 && GET_CODE (XEXP (operands[1], 0)) == SYMBOL_REF
12347 && CONSTANT_POOL_ADDRESS_P (XEXP (operands[1], 0)))
12348 operands[1] = get_pool_constant (XEXP (operands[1], 0));
12349 if (push_operand (operands[0], VOIDmode))
12351 operands[0] = copy_rtx (operands[0]);
12352 PUT_MODE (operands[0], Pmode);
12355 operands[0] = gen_lowpart (DImode, operands[0]);
12356 operands[1] = gen_lowpart (DImode, operands[1]);
12357 emit_move_insn (operands[0], operands[1]);
12361 /* The only non-offsettable memory we handle is push. */
12362 if (push_operand (operands[0], VOIDmode))
12365 gcc_assert (GET_CODE (operands[0]) != MEM
12366 || offsettable_memref_p (operands[0]));
12368 nparts = ix86_split_to_parts (operands[1], part[1], GET_MODE (operands[0]));
12369 ix86_split_to_parts (operands[0], part[0], GET_MODE (operands[0]));
12371 /* When emitting push, take care for source operands on the stack. */
12372 if (push && GET_CODE (operands[1]) == MEM
12373 && reg_overlap_mentioned_p (stack_pointer_rtx, operands[1]))
12376 part[1][1] = change_address (part[1][1], GET_MODE (part[1][1]),
12377 XEXP (part[1][2], 0));
12378 part[1][0] = change_address (part[1][0], GET_MODE (part[1][0]),
12379 XEXP (part[1][1], 0));
12382 /* We need to do copy in the right order in case an address register
12383 of the source overlaps the destination. */
12384 if (REG_P (part[0][0]) && GET_CODE (part[1][0]) == MEM)
12386 if (reg_overlap_mentioned_p (part[0][0], XEXP (part[1][0], 0)))
12388 if (reg_overlap_mentioned_p (part[0][1], XEXP (part[1][0], 0)))
12391 && reg_overlap_mentioned_p (part[0][2], XEXP (part[1][0], 0)))
12394 /* Collision in the middle part can be handled by reordering. */
12395 if (collisions == 1 && nparts == 3
12396 && reg_overlap_mentioned_p (part[0][1], XEXP (part[1][0], 0)))
12399 tmp = part[0][1]; part[0][1] = part[0][2]; part[0][2] = tmp;
12400 tmp = part[1][1]; part[1][1] = part[1][2]; part[1][2] = tmp;
12403 /* If there are more collisions, we can't handle it by reordering.
12404 Do an lea to the last part and use only one colliding move. */
12405 else if (collisions > 1)
12411 base = part[0][nparts - 1];
12413 /* Handle the case when the last part isn't valid for lea.
12414 Happens in 64-bit mode storing the 12-byte XFmode. */
12415 if (GET_MODE (base) != Pmode)
12416 base = gen_rtx_REG (Pmode, REGNO (base));
12418 emit_insn (gen_rtx_SET (VOIDmode, base, XEXP (part[1][0], 0)));
12419 part[1][0] = replace_equiv_address (part[1][0], base);
12420 part[1][1] = replace_equiv_address (part[1][1],
12421 plus_constant (base, UNITS_PER_WORD));
12423 part[1][2] = replace_equiv_address (part[1][2],
12424 plus_constant (base, 8));
12434 if (TARGET_128BIT_LONG_DOUBLE && mode == XFmode)
12435 emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx, GEN_INT (-4)));
12436 emit_move_insn (part[0][2], part[1][2]);
12441 /* In 64bit mode we don't have 32bit push available. In case this is
12442 register, it is OK - we will just use larger counterpart. We also
12443 retype memory - these comes from attempt to avoid REX prefix on
12444 moving of second half of TFmode value. */
12445 if (GET_MODE (part[1][1]) == SImode)
12447 switch (GET_CODE (part[1][1]))
12450 part[1][1] = adjust_address (part[1][1], DImode, 0);
12454 part[1][1] = gen_rtx_REG (DImode, REGNO (part[1][1]));
12458 gcc_unreachable ();
12461 if (GET_MODE (part[1][0]) == SImode)
12462 part[1][0] = part[1][1];
12465 emit_move_insn (part[0][1], part[1][1]);
12466 emit_move_insn (part[0][0], part[1][0]);
12470 /* Choose correct order to not overwrite the source before it is copied. */
12471 if ((REG_P (part[0][0])
12472 && REG_P (part[1][1])
12473 && (REGNO (part[0][0]) == REGNO (part[1][1])
12475 && REGNO (part[0][0]) == REGNO (part[1][2]))))
12477 && reg_overlap_mentioned_p (part[0][0], XEXP (part[1][0], 0))))
12481 operands[2] = part[0][2];
12482 operands[3] = part[0][1];
12483 operands[4] = part[0][0];
12484 operands[5] = part[1][2];
12485 operands[6] = part[1][1];
12486 operands[7] = part[1][0];
12490 operands[2] = part[0][1];
12491 operands[3] = part[0][0];
12492 operands[5] = part[1][1];
12493 operands[6] = part[1][0];
12500 operands[2] = part[0][0];
12501 operands[3] = part[0][1];
12502 operands[4] = part[0][2];
12503 operands[5] = part[1][0];
12504 operands[6] = part[1][1];
12505 operands[7] = part[1][2];
12509 operands[2] = part[0][0];
12510 operands[3] = part[0][1];
12511 operands[5] = part[1][0];
12512 operands[6] = part[1][1];
12516 /* If optimizing for size, attempt to locally unCSE nonzero constants. */
12519 if (GET_CODE (operands[5]) == CONST_INT
12520 && operands[5] != const0_rtx
12521 && REG_P (operands[2]))
12523 if (GET_CODE (operands[6]) == CONST_INT
12524 && INTVAL (operands[6]) == INTVAL (operands[5]))
12525 operands[6] = operands[2];
12528 && GET_CODE (operands[7]) == CONST_INT
12529 && INTVAL (operands[7]) == INTVAL (operands[5]))
12530 operands[7] = operands[2];
12534 && GET_CODE (operands[6]) == CONST_INT
12535 && operands[6] != const0_rtx
12536 && REG_P (operands[3])
12537 && GET_CODE (operands[7]) == CONST_INT
12538 && INTVAL (operands[7]) == INTVAL (operands[6]))
12539 operands[7] = operands[3];
12542 emit_move_insn (operands[2], operands[5]);
12543 emit_move_insn (operands[3], operands[6]);
12545 emit_move_insn (operands[4], operands[7]);
12550 /* Helper function of ix86_split_ashl used to generate an SImode/DImode
12551 left shift by a constant, either using a single shift or
12552 a sequence of add instructions. */
12555 ix86_expand_ashl_const (rtx operand, int count, enum machine_mode mode)
12559 emit_insn ((mode == DImode
12561 : gen_adddi3) (operand, operand, operand));
12563 else if (!optimize_size
12564 && count * ix86_cost->add <= ix86_cost->shift_const)
12567 for (i=0; i<count; i++)
12569 emit_insn ((mode == DImode
12571 : gen_adddi3) (operand, operand, operand));
12575 emit_insn ((mode == DImode
12577 : gen_ashldi3) (operand, operand, GEN_INT (count)));
12581 ix86_split_ashl (rtx *operands, rtx scratch, enum machine_mode mode)
12583 rtx low[2], high[2];
12585 const int single_width = mode == DImode ? 32 : 64;
12587 if (GET_CODE (operands[2]) == CONST_INT)
12589 (mode == DImode ? split_di : split_ti) (operands, 2, low, high);
12590 count = INTVAL (operands[2]) & (single_width * 2 - 1);
12592 if (count >= single_width)
12594 emit_move_insn (high[0], low[1]);
12595 emit_move_insn (low[0], const0_rtx);
12597 if (count > single_width)
12598 ix86_expand_ashl_const (high[0], count - single_width, mode);
12602 if (!rtx_equal_p (operands[0], operands[1]))
12603 emit_move_insn (operands[0], operands[1]);
12604 emit_insn ((mode == DImode
12606 : gen_x86_64_shld) (high[0], low[0], GEN_INT (count)));
12607 ix86_expand_ashl_const (low[0], count, mode);
12612 (mode == DImode ? split_di : split_ti) (operands, 1, low, high);
12614 if (operands[1] == const1_rtx)
12616 /* Assuming we've chosen a QImode capable registers, then 1 << N
12617 can be done with two 32/64-bit shifts, no branches, no cmoves. */
12618 if (ANY_QI_REG_P (low[0]) && ANY_QI_REG_P (high[0]))
12620 rtx s, d, flags = gen_rtx_REG (CCZmode, FLAGS_REG);
12622 ix86_expand_clear (low[0]);
12623 ix86_expand_clear (high[0]);
12624 emit_insn (gen_testqi_ccz_1 (operands[2], GEN_INT (single_width)));
12626 d = gen_lowpart (QImode, low[0]);
12627 d = gen_rtx_STRICT_LOW_PART (VOIDmode, d);
12628 s = gen_rtx_EQ (QImode, flags, const0_rtx);
12629 emit_insn (gen_rtx_SET (VOIDmode, d, s));
12631 d = gen_lowpart (QImode, high[0]);
12632 d = gen_rtx_STRICT_LOW_PART (VOIDmode, d);
12633 s = gen_rtx_NE (QImode, flags, const0_rtx);
12634 emit_insn (gen_rtx_SET (VOIDmode, d, s));
12637 /* Otherwise, we can get the same results by manually performing
12638 a bit extract operation on bit 5/6, and then performing the two
12639 shifts. The two methods of getting 0/1 into low/high are exactly
12640 the same size. Avoiding the shift in the bit extract case helps
12641 pentium4 a bit; no one else seems to care much either way. */
12646 if (TARGET_PARTIAL_REG_STALL && !optimize_size)
12647 x = gen_rtx_ZERO_EXTEND (mode == DImode ? SImode : DImode, operands[2]);
12649 x = gen_lowpart (mode == DImode ? SImode : DImode, operands[2]);
12650 emit_insn (gen_rtx_SET (VOIDmode, high[0], x));
12652 emit_insn ((mode == DImode
12654 : gen_lshrdi3) (high[0], high[0], GEN_INT (mode == DImode ? 5 : 6)));
12655 emit_insn ((mode == DImode
12657 : gen_anddi3) (high[0], high[0], GEN_INT (1)));
12658 emit_move_insn (low[0], high[0]);
12659 emit_insn ((mode == DImode
12661 : gen_xordi3) (low[0], low[0], GEN_INT (1)));
12664 emit_insn ((mode == DImode
12666 : gen_ashldi3) (low[0], low[0], operands[2]));
12667 emit_insn ((mode == DImode
12669 : gen_ashldi3) (high[0], high[0], operands[2]));
12673 if (operands[1] == constm1_rtx)
12675 /* For -1 << N, we can avoid the shld instruction, because we
12676 know that we're shifting 0...31/63 ones into a -1. */
12677 emit_move_insn (low[0], constm1_rtx);
12679 emit_move_insn (high[0], low[0]);
12681 emit_move_insn (high[0], constm1_rtx);
12685 if (!rtx_equal_p (operands[0], operands[1]))
12686 emit_move_insn (operands[0], operands[1]);
12688 (mode == DImode ? split_di : split_ti) (operands, 1, low, high);
12689 emit_insn ((mode == DImode
12691 : gen_x86_64_shld) (high[0], low[0], operands[2]));
12694 emit_insn ((mode == DImode ? gen_ashlsi3 : gen_ashldi3) (low[0], low[0], operands[2]));
12696 if (TARGET_CMOVE && scratch)
12698 ix86_expand_clear (scratch);
12699 emit_insn ((mode == DImode
12700 ? gen_x86_shift_adj_1
12701 : gen_x86_64_shift_adj) (high[0], low[0], operands[2], scratch));
12704 emit_insn (gen_x86_shift_adj_2 (high[0], low[0], operands[2]));
12708 ix86_split_ashr (rtx *operands, rtx scratch, enum machine_mode mode)
12710 rtx low[2], high[2];
12712 const int single_width = mode == DImode ? 32 : 64;
12714 if (GET_CODE (operands[2]) == CONST_INT)
12716 (mode == DImode ? split_di : split_ti) (operands, 2, low, high);
12717 count = INTVAL (operands[2]) & (single_width * 2 - 1);
12719 if (count == single_width * 2 - 1)
12721 emit_move_insn (high[0], high[1]);
12722 emit_insn ((mode == DImode
12724 : gen_ashrdi3) (high[0], high[0],
12725 GEN_INT (single_width - 1)));
12726 emit_move_insn (low[0], high[0]);
12729 else if (count >= single_width)
12731 emit_move_insn (low[0], high[1]);
12732 emit_move_insn (high[0], low[0]);
12733 emit_insn ((mode == DImode
12735 : gen_ashrdi3) (high[0], high[0],
12736 GEN_INT (single_width - 1)));
12737 if (count > single_width)
12738 emit_insn ((mode == DImode
12740 : gen_ashrdi3) (low[0], low[0],
12741 GEN_INT (count - single_width)));
12745 if (!rtx_equal_p (operands[0], operands[1]))
12746 emit_move_insn (operands[0], operands[1]);
12747 emit_insn ((mode == DImode
12749 : gen_x86_64_shrd) (low[0], high[0], GEN_INT (count)));
12750 emit_insn ((mode == DImode
12752 : gen_ashrdi3) (high[0], high[0], GEN_INT (count)));
12757 if (!rtx_equal_p (operands[0], operands[1]))
12758 emit_move_insn (operands[0], operands[1]);
12760 (mode == DImode ? split_di : split_ti) (operands, 1, low, high);
12762 emit_insn ((mode == DImode
12764 : gen_x86_64_shrd) (low[0], high[0], operands[2]));
12765 emit_insn ((mode == DImode
12767 : gen_ashrdi3) (high[0], high[0], operands[2]));
12769 if (TARGET_CMOVE && scratch)
12771 emit_move_insn (scratch, high[0]);
12772 emit_insn ((mode == DImode
12774 : gen_ashrdi3) (scratch, scratch,
12775 GEN_INT (single_width - 1)));
12776 emit_insn ((mode == DImode
12777 ? gen_x86_shift_adj_1
12778 : gen_x86_64_shift_adj) (low[0], high[0], operands[2],
12782 emit_insn (gen_x86_shift_adj_3 (low[0], high[0], operands[2]));
12787 ix86_split_lshr (rtx *operands, rtx scratch, enum machine_mode mode)
12789 rtx low[2], high[2];
12791 const int single_width = mode == DImode ? 32 : 64;
12793 if (GET_CODE (operands[2]) == CONST_INT)
12795 (mode == DImode ? split_di : split_ti) (operands, 2, low, high);
12796 count = INTVAL (operands[2]) & (single_width * 2 - 1);
12798 if (count >= single_width)
12800 emit_move_insn (low[0], high[1]);
12801 ix86_expand_clear (high[0]);
12803 if (count > single_width)
12804 emit_insn ((mode == DImode
12806 : gen_lshrdi3) (low[0], low[0],
12807 GEN_INT (count - single_width)));
12811 if (!rtx_equal_p (operands[0], operands[1]))
12812 emit_move_insn (operands[0], operands[1]);
12813 emit_insn ((mode == DImode
12815 : gen_x86_64_shrd) (low[0], high[0], GEN_INT (count)));
12816 emit_insn ((mode == DImode
12818 : gen_lshrdi3) (high[0], high[0], GEN_INT (count)));
12823 if (!rtx_equal_p (operands[0], operands[1]))
12824 emit_move_insn (operands[0], operands[1]);
12826 (mode == DImode ? split_di : split_ti) (operands, 1, low, high);
12828 emit_insn ((mode == DImode
12830 : gen_x86_64_shrd) (low[0], high[0], operands[2]));
12831 emit_insn ((mode == DImode
12833 : gen_lshrdi3) (high[0], high[0], operands[2]));
12835 /* Heh. By reversing the arguments, we can reuse this pattern. */
12836 if (TARGET_CMOVE && scratch)
12838 ix86_expand_clear (scratch);
12839 emit_insn ((mode == DImode
12840 ? gen_x86_shift_adj_1
12841 : gen_x86_64_shift_adj) (low[0], high[0], operands[2],
12845 emit_insn (gen_x86_shift_adj_2 (low[0], high[0], operands[2]));
12849 /* Helper function for the string operations below. Dest VARIABLE whether
12850 it is aligned to VALUE bytes. If true, jump to the label. */
12852 ix86_expand_aligntest (rtx variable, int value)
12854 rtx label = gen_label_rtx ();
12855 rtx tmpcount = gen_reg_rtx (GET_MODE (variable));
12856 if (GET_MODE (variable) == DImode)
12857 emit_insn (gen_anddi3 (tmpcount, variable, GEN_INT (value)));
12859 emit_insn (gen_andsi3 (tmpcount, variable, GEN_INT (value)));
12860 emit_cmp_and_jump_insns (tmpcount, const0_rtx, EQ, 0, GET_MODE (variable),
12865 /* Adjust COUNTER by the VALUE. */
12867 ix86_adjust_counter (rtx countreg, HOST_WIDE_INT value)
12869 if (GET_MODE (countreg) == DImode)
12870 emit_insn (gen_adddi3 (countreg, countreg, GEN_INT (-value)));
12872 emit_insn (gen_addsi3 (countreg, countreg, GEN_INT (-value)));
12875 /* Zero extend possibly SImode EXP to Pmode register. */
12877 ix86_zero_extend_to_Pmode (rtx exp)
12880 if (GET_MODE (exp) == VOIDmode)
12881 return force_reg (Pmode, exp);
12882 if (GET_MODE (exp) == Pmode)
12883 return copy_to_mode_reg (Pmode, exp);
12884 r = gen_reg_rtx (Pmode);
12885 emit_insn (gen_zero_extendsidi2 (r, exp));
12889 /* Expand string move (memcpy) operation. Use i386 string operations when
12890 profitable. expand_clrmem contains similar code. */
12892 ix86_expand_movmem (rtx dst, rtx src, rtx count_exp, rtx align_exp)
12894 rtx srcreg, destreg, countreg, srcexp, destexp;
12895 enum machine_mode counter_mode;
12896 HOST_WIDE_INT align = 0;
12897 unsigned HOST_WIDE_INT count = 0;
12899 if (GET_CODE (align_exp) == CONST_INT)
12900 align = INTVAL (align_exp);
12902 /* Can't use any of this if the user has appropriated esi or edi. */
12903 if (global_regs[4] || global_regs[5])
12906 /* This simple hack avoids all inlining code and simplifies code below. */
12907 if (!TARGET_ALIGN_STRINGOPS)
12910 if (GET_CODE (count_exp) == CONST_INT)
12912 count = INTVAL (count_exp);
12913 if (!TARGET_INLINE_ALL_STRINGOPS && count > 64)
12917 /* Figure out proper mode for counter. For 32bits it is always SImode,
12918 for 64bits use SImode when possible, otherwise DImode.
12919 Set count to number of bytes copied when known at compile time. */
12921 || GET_MODE (count_exp) == SImode
12922 || x86_64_zext_immediate_operand (count_exp, VOIDmode))
12923 counter_mode = SImode;
12925 counter_mode = DImode;
12927 gcc_assert (counter_mode == SImode || counter_mode == DImode);
12929 destreg = copy_to_mode_reg (Pmode, XEXP (dst, 0));
12930 if (destreg != XEXP (dst, 0))
12931 dst = replace_equiv_address_nv (dst, destreg);
12932 srcreg = copy_to_mode_reg (Pmode, XEXP (src, 0));
12933 if (srcreg != XEXP (src, 0))
12934 src = replace_equiv_address_nv (src, srcreg);
12936 /* When optimizing for size emit simple rep ; movsb instruction for
12937 counts not divisible by 4, except when (movsl;)*(movsw;)?(movsb;)?
12938 sequence is shorter than mov{b,l} $count, %{ecx,cl}; rep; movsb.
12939 Sice of (movsl;)*(movsw;)?(movsb;)? sequence is
12940 count / 4 + (count & 3), the other sequence is either 4 or 7 bytes,
12941 but we don't know whether upper 24 (resp. 56) bits of %ecx will be
12942 known to be zero or not. The rep; movsb sequence causes higher
12943 register pressure though, so take that into account. */
12945 if ((!optimize || optimize_size)
12950 || (count & 3) + count / 4 > 6))))
12952 emit_insn (gen_cld ());
12953 countreg = ix86_zero_extend_to_Pmode (count_exp);
12954 destexp = gen_rtx_PLUS (Pmode, destreg, countreg);
12955 srcexp = gen_rtx_PLUS (Pmode, srcreg, countreg);
12956 emit_insn (gen_rep_mov (destreg, dst, srcreg, src, countreg,
12960 /* For constant aligned (or small unaligned) copies use rep movsl
12961 followed by code copying the rest. For PentiumPro ensure 8 byte
12962 alignment to allow rep movsl acceleration. */
12964 else if (count != 0
12966 || (!TARGET_PENTIUMPRO && !TARGET_64BIT && align >= 4)
12967 || optimize_size || count < (unsigned int) 64))
12969 unsigned HOST_WIDE_INT offset = 0;
12970 int size = TARGET_64BIT && !optimize_size ? 8 : 4;
12971 rtx srcmem, dstmem;
12973 emit_insn (gen_cld ());
12974 if (count & ~(size - 1))
12976 if ((TARGET_SINGLE_STRINGOP || optimize_size) && count < 5 * 4)
12978 enum machine_mode movs_mode = size == 4 ? SImode : DImode;
12980 while (offset < (count & ~(size - 1)))
12982 srcmem = adjust_automodify_address_nv (src, movs_mode,
12984 dstmem = adjust_automodify_address_nv (dst, movs_mode,
12986 emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
12992 countreg = GEN_INT ((count >> (size == 4 ? 2 : 3))
12993 & (TARGET_64BIT ? -1 : 0x3fffffff));
12994 countreg = copy_to_mode_reg (counter_mode, countreg);
12995 countreg = ix86_zero_extend_to_Pmode (countreg);
12997 destexp = gen_rtx_ASHIFT (Pmode, countreg,
12998 GEN_INT (size == 4 ? 2 : 3));
12999 srcexp = gen_rtx_PLUS (Pmode, destexp, srcreg);
13000 destexp = gen_rtx_PLUS (Pmode, destexp, destreg);
13002 emit_insn (gen_rep_mov (destreg, dst, srcreg, src,
13003 countreg, destexp, srcexp));
13004 offset = count & ~(size - 1);
13007 if (size == 8 && (count & 0x04))
13009 srcmem = adjust_automodify_address_nv (src, SImode, srcreg,
13011 dstmem = adjust_automodify_address_nv (dst, SImode, destreg,
13013 emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
13018 srcmem = adjust_automodify_address_nv (src, HImode, srcreg,
13020 dstmem = adjust_automodify_address_nv (dst, HImode, destreg,
13022 emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
13027 srcmem = adjust_automodify_address_nv (src, QImode, srcreg,
13029 dstmem = adjust_automodify_address_nv (dst, QImode, destreg,
13031 emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
13034 /* The generic code based on the glibc implementation:
13035 - align destination to 4 bytes (8 byte alignment is used for PentiumPro
13036 allowing accelerated copying there)
13037 - copy the data using rep movsl
13038 - copy the rest. */
13043 rtx srcmem, dstmem;
13044 int desired_alignment = (TARGET_PENTIUMPRO
13045 && (count == 0 || count >= (unsigned int) 260)
13046 ? 8 : UNITS_PER_WORD);
13047 /* Get rid of MEM_OFFSETs, they won't be accurate. */
13048 dst = change_address (dst, BLKmode, destreg);
13049 src = change_address (src, BLKmode, srcreg);
13051 /* In case we don't know anything about the alignment, default to
13052 library version, since it is usually equally fast and result in
13055 Also emit call when we know that the count is large and call overhead
13056 will not be important. */
13057 if (!TARGET_INLINE_ALL_STRINGOPS
13058 && (align < UNITS_PER_WORD || !TARGET_REP_MOVL_OPTIMAL))
13061 if (TARGET_SINGLE_STRINGOP)
13062 emit_insn (gen_cld ());
13064 countreg2 = gen_reg_rtx (Pmode);
13065 countreg = copy_to_mode_reg (counter_mode, count_exp);
13067 /* We don't use loops to align destination and to copy parts smaller
13068 than 4 bytes, because gcc is able to optimize such code better (in
13069 the case the destination or the count really is aligned, gcc is often
13070 able to predict the branches) and also it is friendlier to the
13071 hardware branch prediction.
13073 Using loops is beneficial for generic case, because we can
13074 handle small counts using the loops. Many CPUs (such as Athlon)
13075 have large REP prefix setup costs.
13077 This is quite costly. Maybe we can revisit this decision later or
13078 add some customizability to this code. */
13080 if (count == 0 && align < desired_alignment)
13082 label = gen_label_rtx ();
13083 emit_cmp_and_jump_insns (countreg, GEN_INT (desired_alignment - 1),
13084 LEU, 0, counter_mode, 1, label);
13088 rtx label = ix86_expand_aligntest (destreg, 1);
13089 srcmem = change_address (src, QImode, srcreg);
13090 dstmem = change_address (dst, QImode, destreg);
13091 emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
13092 ix86_adjust_counter (countreg, 1);
13093 emit_label (label);
13094 LABEL_NUSES (label) = 1;
13098 rtx label = ix86_expand_aligntest (destreg, 2);
13099 srcmem = change_address (src, HImode, srcreg);
13100 dstmem = change_address (dst, HImode, destreg);
13101 emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
13102 ix86_adjust_counter (countreg, 2);
13103 emit_label (label);
13104 LABEL_NUSES (label) = 1;
13106 if (align <= 4 && desired_alignment > 4)
13108 rtx label = ix86_expand_aligntest (destreg, 4);
13109 srcmem = change_address (src, SImode, srcreg);
13110 dstmem = change_address (dst, SImode, destreg);
13111 emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
13112 ix86_adjust_counter (countreg, 4);
13113 emit_label (label);
13114 LABEL_NUSES (label) = 1;
13117 if (label && desired_alignment > 4 && !TARGET_64BIT)
13119 emit_label (label);
13120 LABEL_NUSES (label) = 1;
13123 if (!TARGET_SINGLE_STRINGOP)
13124 emit_insn (gen_cld ());
13127 emit_insn (gen_lshrdi3 (countreg2, ix86_zero_extend_to_Pmode (countreg),
13129 destexp = gen_rtx_ASHIFT (Pmode, countreg2, GEN_INT (3));
13133 emit_insn (gen_lshrsi3 (countreg2, countreg, const2_rtx));
13134 destexp = gen_rtx_ASHIFT (Pmode, countreg2, const2_rtx);
13136 srcexp = gen_rtx_PLUS (Pmode, destexp, srcreg);
13137 destexp = gen_rtx_PLUS (Pmode, destexp, destreg);
13138 emit_insn (gen_rep_mov (destreg, dst, srcreg, src,
13139 countreg2, destexp, srcexp));
13143 emit_label (label);
13144 LABEL_NUSES (label) = 1;
13146 if (TARGET_64BIT && align > 4 && count != 0 && (count & 4))
13148 srcmem = change_address (src, SImode, srcreg);
13149 dstmem = change_address (dst, SImode, destreg);
13150 emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
13152 if ((align <= 4 || count == 0) && TARGET_64BIT)
13154 rtx label = ix86_expand_aligntest (countreg, 4);
13155 srcmem = change_address (src, SImode, srcreg);
13156 dstmem = change_address (dst, SImode, destreg);
13157 emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
13158 emit_label (label);
13159 LABEL_NUSES (label) = 1;
13161 if (align > 2 && count != 0 && (count & 2))
13163 srcmem = change_address (src, HImode, srcreg);
13164 dstmem = change_address (dst, HImode, destreg);
13165 emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
13167 if (align <= 2 || count == 0)
13169 rtx label = ix86_expand_aligntest (countreg, 2);
13170 srcmem = change_address (src, HImode, srcreg);
13171 dstmem = change_address (dst, HImode, destreg);
13172 emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
13173 emit_label (label);
13174 LABEL_NUSES (label) = 1;
13176 if (align > 1 && count != 0 && (count & 1))
13178 srcmem = change_address (src, QImode, srcreg);
13179 dstmem = change_address (dst, QImode, destreg);
13180 emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
13182 if (align <= 1 || count == 0)
13184 rtx label = ix86_expand_aligntest (countreg, 1);
13185 srcmem = change_address (src, QImode, srcreg);
13186 dstmem = change_address (dst, QImode, destreg);
13187 emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
13188 emit_label (label);
13189 LABEL_NUSES (label) = 1;
13196 /* Expand string clear operation (bzero). Use i386 string operations when
13197 profitable. expand_movmem contains similar code. */
13199 ix86_expand_clrmem (rtx dst, rtx count_exp, rtx align_exp)
13201 rtx destreg, zeroreg, countreg, destexp;
13202 enum machine_mode counter_mode;
13203 HOST_WIDE_INT align = 0;
13204 unsigned HOST_WIDE_INT count = 0;
13206 if (GET_CODE (align_exp) == CONST_INT)
13207 align = INTVAL (align_exp);
13209 /* Can't use any of this if the user has appropriated esi. */
13210 if (global_regs[4])
13213 /* This simple hack avoids all inlining code and simplifies code below. */
13214 if (!TARGET_ALIGN_STRINGOPS)
13217 if (GET_CODE (count_exp) == CONST_INT)
13219 count = INTVAL (count_exp);
13220 if (!TARGET_INLINE_ALL_STRINGOPS && count > 64)
13223 /* Figure out proper mode for counter. For 32bits it is always SImode,
13224 for 64bits use SImode when possible, otherwise DImode.
13225 Set count to number of bytes copied when known at compile time. */
13227 || GET_MODE (count_exp) == SImode
13228 || x86_64_zext_immediate_operand (count_exp, VOIDmode))
13229 counter_mode = SImode;
13231 counter_mode = DImode;
13233 destreg = copy_to_mode_reg (Pmode, XEXP (dst, 0));
13234 if (destreg != XEXP (dst, 0))
13235 dst = replace_equiv_address_nv (dst, destreg);
13238 /* When optimizing for size emit simple rep ; movsb instruction for
13239 counts not divisible by 4. The movl $N, %ecx; rep; stosb
13240 sequence is 7 bytes long, so if optimizing for size and count is
13241 small enough that some stosl, stosw and stosb instructions without
13242 rep are shorter, fall back into the next if. */
13244 if ((!optimize || optimize_size)
13247 && (!optimize_size || (count & 0x03) + (count >> 2) > 7))))
13249 emit_insn (gen_cld ());
13251 countreg = ix86_zero_extend_to_Pmode (count_exp);
13252 zeroreg = copy_to_mode_reg (QImode, const0_rtx);
13253 destexp = gen_rtx_PLUS (Pmode, destreg, countreg);
13254 emit_insn (gen_rep_stos (destreg, countreg, dst, zeroreg, destexp));
13256 else if (count != 0
13258 || (!TARGET_PENTIUMPRO && !TARGET_64BIT && align >= 4)
13259 || optimize_size || count < (unsigned int) 64))
13261 int size = TARGET_64BIT && !optimize_size ? 8 : 4;
13262 unsigned HOST_WIDE_INT offset = 0;
13264 emit_insn (gen_cld ());
13266 zeroreg = copy_to_mode_reg (size == 4 ? SImode : DImode, const0_rtx);
13267 if (count & ~(size - 1))
13269 unsigned HOST_WIDE_INT repcount;
13270 unsigned int max_nonrep;
13272 repcount = count >> (size == 4 ? 2 : 3);
13274 repcount &= 0x3fffffff;
13276 /* movl $N, %ecx; rep; stosl is 7 bytes, while N x stosl is N bytes.
13277 movl $N, %ecx; rep; stosq is 8 bytes, while N x stosq is 2xN
13278 bytes. In both cases the latter seems to be faster for small
13280 max_nonrep = size == 4 ? 7 : 4;
13281 if (!optimize_size)
13284 case PROCESSOR_PENTIUM4:
13285 case PROCESSOR_NOCONA:
13292 if (repcount <= max_nonrep)
13293 while (repcount-- > 0)
13295 rtx mem = adjust_automodify_address_nv (dst,
13296 GET_MODE (zeroreg),
13298 emit_insn (gen_strset (destreg, mem, zeroreg));
13303 countreg = copy_to_mode_reg (counter_mode, GEN_INT (repcount));
13304 countreg = ix86_zero_extend_to_Pmode (countreg);
13305 destexp = gen_rtx_ASHIFT (Pmode, countreg,
13306 GEN_INT (size == 4 ? 2 : 3));
13307 destexp = gen_rtx_PLUS (Pmode, destexp, destreg);
13308 emit_insn (gen_rep_stos (destreg, countreg, dst, zeroreg,
13310 offset = count & ~(size - 1);
13313 if (size == 8 && (count & 0x04))
13315 rtx mem = adjust_automodify_address_nv (dst, SImode, destreg,
13317 emit_insn (gen_strset (destreg, mem,
13318 gen_rtx_SUBREG (SImode, zeroreg, 0)));
13323 rtx mem = adjust_automodify_address_nv (dst, HImode, destreg,
13325 emit_insn (gen_strset (destreg, mem,
13326 gen_rtx_SUBREG (HImode, zeroreg, 0)));
13331 rtx mem = adjust_automodify_address_nv (dst, QImode, destreg,
13333 emit_insn (gen_strset (destreg, mem,
13334 gen_rtx_SUBREG (QImode, zeroreg, 0)));
13341 /* Compute desired alignment of the string operation. */
13342 int desired_alignment = (TARGET_PENTIUMPRO
13343 && (count == 0 || count >= (unsigned int) 260)
13344 ? 8 : UNITS_PER_WORD);
13346 /* In case we don't know anything about the alignment, default to
13347 library version, since it is usually equally fast and result in
13350 Also emit call when we know that the count is large and call overhead
13351 will not be important. */
13352 if (!TARGET_INLINE_ALL_STRINGOPS
13353 && (align < UNITS_PER_WORD || !TARGET_REP_MOVL_OPTIMAL))
13356 if (TARGET_SINGLE_STRINGOP)
13357 emit_insn (gen_cld ());
13359 countreg2 = gen_reg_rtx (Pmode);
13360 countreg = copy_to_mode_reg (counter_mode, count_exp);
13361 zeroreg = copy_to_mode_reg (Pmode, const0_rtx);
13362 /* Get rid of MEM_OFFSET, it won't be accurate. */
13363 dst = change_address (dst, BLKmode, destreg);
13365 if (count == 0 && align < desired_alignment)
13367 label = gen_label_rtx ();
13368 emit_cmp_and_jump_insns (countreg, GEN_INT (desired_alignment - 1),
13369 LEU, 0, counter_mode, 1, label);
13373 rtx label = ix86_expand_aligntest (destreg, 1);
13374 emit_insn (gen_strset (destreg, dst,
13375 gen_rtx_SUBREG (QImode, zeroreg, 0)));
13376 ix86_adjust_counter (countreg, 1);
13377 emit_label (label);
13378 LABEL_NUSES (label) = 1;
13382 rtx label = ix86_expand_aligntest (destreg, 2);
13383 emit_insn (gen_strset (destreg, dst,
13384 gen_rtx_SUBREG (HImode, zeroreg, 0)));
13385 ix86_adjust_counter (countreg, 2);
13386 emit_label (label);
13387 LABEL_NUSES (label) = 1;
13389 if (align <= 4 && desired_alignment > 4)
13391 rtx label = ix86_expand_aligntest (destreg, 4);
13392 emit_insn (gen_strset (destreg, dst,
13394 ? gen_rtx_SUBREG (SImode, zeroreg, 0)
13396 ix86_adjust_counter (countreg, 4);
13397 emit_label (label);
13398 LABEL_NUSES (label) = 1;
13401 if (label && desired_alignment > 4 && !TARGET_64BIT)
13403 emit_label (label);
13404 LABEL_NUSES (label) = 1;
13408 if (!TARGET_SINGLE_STRINGOP)
13409 emit_insn (gen_cld ());
13412 emit_insn (gen_lshrdi3 (countreg2, ix86_zero_extend_to_Pmode (countreg),
13414 destexp = gen_rtx_ASHIFT (Pmode, countreg2, GEN_INT (3));
13418 emit_insn (gen_lshrsi3 (countreg2, countreg, const2_rtx));
13419 destexp = gen_rtx_ASHIFT (Pmode, countreg2, const2_rtx);
13421 destexp = gen_rtx_PLUS (Pmode, destexp, destreg);
13422 emit_insn (gen_rep_stos (destreg, countreg2, dst, zeroreg, destexp));
13426 emit_label (label);
13427 LABEL_NUSES (label) = 1;
13430 if (TARGET_64BIT && align > 4 && count != 0 && (count & 4))
13431 emit_insn (gen_strset (destreg, dst,
13432 gen_rtx_SUBREG (SImode, zeroreg, 0)));
13433 if (TARGET_64BIT && (align <= 4 || count == 0))
13435 rtx label = ix86_expand_aligntest (countreg, 4);
13436 emit_insn (gen_strset (destreg, dst,
13437 gen_rtx_SUBREG (SImode, zeroreg, 0)));
13438 emit_label (label);
13439 LABEL_NUSES (label) = 1;
13441 if (align > 2 && count != 0 && (count & 2))
13442 emit_insn (gen_strset (destreg, dst,
13443 gen_rtx_SUBREG (HImode, zeroreg, 0)));
13444 if (align <= 2 || count == 0)
13446 rtx label = ix86_expand_aligntest (countreg, 2);
13447 emit_insn (gen_strset (destreg, dst,
13448 gen_rtx_SUBREG (HImode, zeroreg, 0)));
13449 emit_label (label);
13450 LABEL_NUSES (label) = 1;
13452 if (align > 1 && count != 0 && (count & 1))
13453 emit_insn (gen_strset (destreg, dst,
13454 gen_rtx_SUBREG (QImode, zeroreg, 0)));
13455 if (align <= 1 || count == 0)
13457 rtx label = ix86_expand_aligntest (countreg, 1);
13458 emit_insn (gen_strset (destreg, dst,
13459 gen_rtx_SUBREG (QImode, zeroreg, 0)));
13460 emit_label (label);
13461 LABEL_NUSES (label) = 1;
13467 /* Expand strlen. */
13469 ix86_expand_strlen (rtx out, rtx src, rtx eoschar, rtx align)
13471 rtx addr, scratch1, scratch2, scratch3, scratch4;
13473 /* The generic case of strlen expander is long. Avoid it's
13474 expanding unless TARGET_INLINE_ALL_STRINGOPS. */
13476 if (TARGET_UNROLL_STRLEN && eoschar == const0_rtx && optimize > 1
13477 && !TARGET_INLINE_ALL_STRINGOPS
13479 && (GET_CODE (align) != CONST_INT || INTVAL (align) < 4))
13482 addr = force_reg (Pmode, XEXP (src, 0));
13483 scratch1 = gen_reg_rtx (Pmode);
13485 if (TARGET_UNROLL_STRLEN && eoschar == const0_rtx && optimize > 1
13488 /* Well it seems that some optimizer does not combine a call like
13489 foo(strlen(bar), strlen(bar));
13490 when the move and the subtraction is done here. It does calculate
13491 the length just once when these instructions are done inside of
13492 output_strlen_unroll(). But I think since &bar[strlen(bar)] is
13493 often used and I use one fewer register for the lifetime of
13494 output_strlen_unroll() this is better. */
13496 emit_move_insn (out, addr);
13498 ix86_expand_strlensi_unroll_1 (out, src, align);
13500 /* strlensi_unroll_1 returns the address of the zero at the end of
13501 the string, like memchr(), so compute the length by subtracting
13502 the start address. */
13504 emit_insn (gen_subdi3 (out, out, addr));
13506 emit_insn (gen_subsi3 (out, out, addr));
13511 scratch2 = gen_reg_rtx (Pmode);
13512 scratch3 = gen_reg_rtx (Pmode);
13513 scratch4 = force_reg (Pmode, constm1_rtx);
13515 emit_move_insn (scratch3, addr);
13516 eoschar = force_reg (QImode, eoschar);
13518 emit_insn (gen_cld ());
13519 src = replace_equiv_address_nv (src, scratch3);
13521 /* If .md starts supporting :P, this can be done in .md. */
13522 unspec = gen_rtx_UNSPEC (Pmode, gen_rtvec (4, src, eoschar, align,
13523 scratch4), UNSPEC_SCAS);
13524 emit_insn (gen_strlenqi_1 (scratch1, scratch3, unspec));
13527 emit_insn (gen_one_cmpldi2 (scratch2, scratch1));
13528 emit_insn (gen_adddi3 (out, scratch2, constm1_rtx));
13532 emit_insn (gen_one_cmplsi2 (scratch2, scratch1));
13533 emit_insn (gen_addsi3 (out, scratch2, constm1_rtx));
13539 /* Expand the appropriate insns for doing strlen if not just doing
13542 out = result, initialized with the start address
13543 align_rtx = alignment of the address.
13544 scratch = scratch register, initialized with the startaddress when
13545 not aligned, otherwise undefined
13547 This is just the body. It needs the initializations mentioned above and
13548 some address computing at the end. These things are done in i386.md. */
13551 ix86_expand_strlensi_unroll_1 (rtx out, rtx src, rtx align_rtx)
13555 rtx align_2_label = NULL_RTX;
13556 rtx align_3_label = NULL_RTX;
13557 rtx align_4_label = gen_label_rtx ();
13558 rtx end_0_label = gen_label_rtx ();
13560 rtx tmpreg = gen_reg_rtx (SImode);
13561 rtx scratch = gen_reg_rtx (SImode);
13565 if (GET_CODE (align_rtx) == CONST_INT)
13566 align = INTVAL (align_rtx);
13568 /* Loop to check 1..3 bytes for null to get an aligned pointer. */
13570 /* Is there a known alignment and is it less than 4? */
13573 rtx scratch1 = gen_reg_rtx (Pmode);
13574 emit_move_insn (scratch1, out);
13575 /* Is there a known alignment and is it not 2? */
13578 align_3_label = gen_label_rtx (); /* Label when aligned to 3-byte */
13579 align_2_label = gen_label_rtx (); /* Label when aligned to 2-byte */
13581 /* Leave just the 3 lower bits. */
13582 align_rtx = expand_binop (Pmode, and_optab, scratch1, GEN_INT (3),
13583 NULL_RTX, 0, OPTAB_WIDEN);
13585 emit_cmp_and_jump_insns (align_rtx, const0_rtx, EQ, NULL,
13586 Pmode, 1, align_4_label);
13587 emit_cmp_and_jump_insns (align_rtx, const2_rtx, EQ, NULL,
13588 Pmode, 1, align_2_label);
13589 emit_cmp_and_jump_insns (align_rtx, const2_rtx, GTU, NULL,
13590 Pmode, 1, align_3_label);
13594 /* Since the alignment is 2, we have to check 2 or 0 bytes;
13595 check if is aligned to 4 - byte. */
13597 align_rtx = expand_binop (Pmode, and_optab, scratch1, const2_rtx,
13598 NULL_RTX, 0, OPTAB_WIDEN);
13600 emit_cmp_and_jump_insns (align_rtx, const0_rtx, EQ, NULL,
13601 Pmode, 1, align_4_label);
13604 mem = change_address (src, QImode, out);
13606 /* Now compare the bytes. */
13608 /* Compare the first n unaligned byte on a byte per byte basis. */
13609 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL,
13610 QImode, 1, end_0_label);
13612 /* Increment the address. */
13614 emit_insn (gen_adddi3 (out, out, const1_rtx));
13616 emit_insn (gen_addsi3 (out, out, const1_rtx));
13618 /* Not needed with an alignment of 2 */
13621 emit_label (align_2_label);
13623 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL, QImode, 1,
13627 emit_insn (gen_adddi3 (out, out, const1_rtx));
13629 emit_insn (gen_addsi3 (out, out, const1_rtx));
13631 emit_label (align_3_label);
13634 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL, QImode, 1,
13638 emit_insn (gen_adddi3 (out, out, const1_rtx));
13640 emit_insn (gen_addsi3 (out, out, const1_rtx));
13643 /* Generate loop to check 4 bytes at a time. It is not a good idea to
13644 align this loop. It gives only huge programs, but does not help to
13646 emit_label (align_4_label);
13648 mem = change_address (src, SImode, out);
13649 emit_move_insn (scratch, mem);
13651 emit_insn (gen_adddi3 (out, out, GEN_INT (4)));
13653 emit_insn (gen_addsi3 (out, out, GEN_INT (4)));
13655 /* This formula yields a nonzero result iff one of the bytes is zero.
13656 This saves three branches inside loop and many cycles. */
13658 emit_insn (gen_addsi3 (tmpreg, scratch, GEN_INT (-0x01010101)));
13659 emit_insn (gen_one_cmplsi2 (scratch, scratch));
13660 emit_insn (gen_andsi3 (tmpreg, tmpreg, scratch));
13661 emit_insn (gen_andsi3 (tmpreg, tmpreg,
13662 gen_int_mode (0x80808080, SImode)));
13663 emit_cmp_and_jump_insns (tmpreg, const0_rtx, EQ, 0, SImode, 1,
13668 rtx reg = gen_reg_rtx (SImode);
13669 rtx reg2 = gen_reg_rtx (Pmode);
13670 emit_move_insn (reg, tmpreg);
13671 emit_insn (gen_lshrsi3 (reg, reg, GEN_INT (16)));
13673 /* If zero is not in the first two bytes, move two bytes forward. */
13674 emit_insn (gen_testsi_ccno_1 (tmpreg, GEN_INT (0x8080)));
13675 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
13676 tmp = gen_rtx_EQ (VOIDmode, tmp, const0_rtx);
13677 emit_insn (gen_rtx_SET (VOIDmode, tmpreg,
13678 gen_rtx_IF_THEN_ELSE (SImode, tmp,
13681 /* Emit lea manually to avoid clobbering of flags. */
13682 emit_insn (gen_rtx_SET (SImode, reg2,
13683 gen_rtx_PLUS (Pmode, out, const2_rtx)));
13685 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
13686 tmp = gen_rtx_EQ (VOIDmode, tmp, const0_rtx);
13687 emit_insn (gen_rtx_SET (VOIDmode, out,
13688 gen_rtx_IF_THEN_ELSE (Pmode, tmp,
13695 rtx end_2_label = gen_label_rtx ();
13696 /* Is zero in the first two bytes? */
13698 emit_insn (gen_testsi_ccno_1 (tmpreg, GEN_INT (0x8080)));
13699 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
13700 tmp = gen_rtx_NE (VOIDmode, tmp, const0_rtx);
13701 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
13702 gen_rtx_LABEL_REF (VOIDmode, end_2_label),
13704 tmp = emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
13705 JUMP_LABEL (tmp) = end_2_label;
13707 /* Not in the first two. Move two bytes forward. */
13708 emit_insn (gen_lshrsi3 (tmpreg, tmpreg, GEN_INT (16)));
13710 emit_insn (gen_adddi3 (out, out, const2_rtx));
13712 emit_insn (gen_addsi3 (out, out, const2_rtx));
13714 emit_label (end_2_label);
13718 /* Avoid branch in fixing the byte. */
13719 tmpreg = gen_lowpart (QImode, tmpreg);
13720 emit_insn (gen_addqi3_cc (tmpreg, tmpreg, tmpreg));
13721 cmp = gen_rtx_LTU (Pmode, gen_rtx_REG (CCmode, 17), const0_rtx);
13723 emit_insn (gen_subdi3_carry_rex64 (out, out, GEN_INT (3), cmp));
13725 emit_insn (gen_subsi3_carry (out, out, GEN_INT (3), cmp));
13727 emit_label (end_0_label);
13731 ix86_expand_call (rtx retval, rtx fnaddr, rtx callarg1,
13732 rtx callarg2 ATTRIBUTE_UNUSED,
13733 rtx pop, int sibcall)
13735 rtx use = NULL, call;
13737 if (pop == const0_rtx)
13739 gcc_assert (!TARGET_64BIT || !pop);
13741 if (TARGET_MACHO && !TARGET_64BIT)
13744 if (flag_pic && GET_CODE (XEXP (fnaddr, 0)) == SYMBOL_REF)
13745 fnaddr = machopic_indirect_call_target (fnaddr);
13750 /* Static functions and indirect calls don't need the pic register. */
13751 if (! TARGET_64BIT && flag_pic
13752 && GET_CODE (XEXP (fnaddr, 0)) == SYMBOL_REF
13753 && ! SYMBOL_REF_LOCAL_P (XEXP (fnaddr, 0)))
13754 use_reg (&use, pic_offset_table_rtx);
13757 if (TARGET_64BIT && INTVAL (callarg2) >= 0)
13759 rtx al = gen_rtx_REG (QImode, 0);
13760 emit_move_insn (al, callarg2);
13761 use_reg (&use, al);
13764 if (! call_insn_operand (XEXP (fnaddr, 0), Pmode))
13766 fnaddr = copy_to_mode_reg (Pmode, XEXP (fnaddr, 0));
13767 fnaddr = gen_rtx_MEM (QImode, fnaddr);
13769 if (sibcall && TARGET_64BIT
13770 && !constant_call_address_operand (XEXP (fnaddr, 0), Pmode))
13773 addr = copy_to_mode_reg (Pmode, XEXP (fnaddr, 0));
13774 fnaddr = gen_rtx_REG (Pmode, FIRST_REX_INT_REG + 3 /* R11 */);
13775 emit_move_insn (fnaddr, addr);
13776 fnaddr = gen_rtx_MEM (QImode, fnaddr);
13779 call = gen_rtx_CALL (VOIDmode, fnaddr, callarg1);
13781 call = gen_rtx_SET (VOIDmode, retval, call);
13784 pop = gen_rtx_PLUS (Pmode, stack_pointer_rtx, pop);
13785 pop = gen_rtx_SET (VOIDmode, stack_pointer_rtx, pop);
13786 call = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, call, pop));
13789 call = emit_call_insn (call);
13791 CALL_INSN_FUNCTION_USAGE (call) = use;
13795 /* Clear stack slot assignments remembered from previous functions.
13796 This is called from INIT_EXPANDERS once before RTL is emitted for each
13799 static struct machine_function *
13800 ix86_init_machine_status (void)
13802 struct machine_function *f;
13804 f = ggc_alloc_cleared (sizeof (struct machine_function));
13805 f->use_fast_prologue_epilogue_nregs = -1;
13806 f->tls_descriptor_call_expanded_p = 0;
13811 /* Return a MEM corresponding to a stack slot with mode MODE.
13812 Allocate a new slot if necessary.
13814 The RTL for a function can have several slots available: N is
13815 which slot to use. */
13818 assign_386_stack_local (enum machine_mode mode, enum ix86_stack_slot n)
13820 struct stack_local_entry *s;
13822 gcc_assert (n < MAX_386_STACK_LOCALS);
13824 /* Virtual slot is valid only before vregs are instantiated. */
13825 gcc_assert ((n == SLOT_VIRTUAL) == !virtuals_instantiated);
13827 for (s = ix86_stack_locals; s; s = s->next)
13828 if (s->mode == mode && s->n == n)
13831 s = (struct stack_local_entry *)
13832 ggc_alloc (sizeof (struct stack_local_entry));
13835 s->rtl = assign_stack_local (mode, GET_MODE_SIZE (mode), 0);
13837 s->next = ix86_stack_locals;
13838 ix86_stack_locals = s;
13842 /* Construct the SYMBOL_REF for the tls_get_addr function. */
13844 static GTY(()) rtx ix86_tls_symbol;
13846 ix86_tls_get_addr (void)
13849 if (!ix86_tls_symbol)
13851 ix86_tls_symbol = gen_rtx_SYMBOL_REF (Pmode,
13852 (TARGET_ANY_GNU_TLS
13854 ? "___tls_get_addr"
13855 : "__tls_get_addr");
13858 return ix86_tls_symbol;
13861 /* Construct the SYMBOL_REF for the _TLS_MODULE_BASE_ symbol. */
13863 static GTY(()) rtx ix86_tls_module_base_symbol;
13865 ix86_tls_module_base (void)
13868 if (!ix86_tls_module_base_symbol)
13870 ix86_tls_module_base_symbol = gen_rtx_SYMBOL_REF (Pmode,
13871 "_TLS_MODULE_BASE_");
13872 SYMBOL_REF_FLAGS (ix86_tls_module_base_symbol)
13873 |= TLS_MODEL_GLOBAL_DYNAMIC << SYMBOL_FLAG_TLS_SHIFT;
13876 return ix86_tls_module_base_symbol;
13879 /* Calculate the length of the memory address in the instruction
13880 encoding. Does not include the one-byte modrm, opcode, or prefix. */
13883 memory_address_length (rtx addr)
13885 struct ix86_address parts;
13886 rtx base, index, disp;
13890 if (GET_CODE (addr) == PRE_DEC
13891 || GET_CODE (addr) == POST_INC
13892 || GET_CODE (addr) == PRE_MODIFY
13893 || GET_CODE (addr) == POST_MODIFY)
13896 ok = ix86_decompose_address (addr, &parts);
13899 if (parts.base && GET_CODE (parts.base) == SUBREG)
13900 parts.base = SUBREG_REG (parts.base);
13901 if (parts.index && GET_CODE (parts.index) == SUBREG)
13902 parts.index = SUBREG_REG (parts.index);
13905 index = parts.index;
13910 - esp as the base always wants an index,
13911 - ebp as the base always wants a displacement. */
13913 /* Register Indirect. */
13914 if (base && !index && !disp)
13916 /* esp (for its index) and ebp (for its displacement) need
13917 the two-byte modrm form. */
13918 if (addr == stack_pointer_rtx
13919 || addr == arg_pointer_rtx
13920 || addr == frame_pointer_rtx
13921 || addr == hard_frame_pointer_rtx)
13925 /* Direct Addressing. */
13926 else if (disp && !base && !index)
13931 /* Find the length of the displacement constant. */
13934 if (base && satisfies_constraint_K (disp))
13939 /* ebp always wants a displacement. */
13940 else if (base == hard_frame_pointer_rtx)
13943 /* An index requires the two-byte modrm form.... */
13945 /* ...like esp, which always wants an index. */
13946 || base == stack_pointer_rtx
13947 || base == arg_pointer_rtx
13948 || base == frame_pointer_rtx)
13955 /* Compute default value for "length_immediate" attribute. When SHORTFORM
13956 is set, expect that insn have 8bit immediate alternative. */
13958 ix86_attr_length_immediate_default (rtx insn, int shortform)
13962 extract_insn_cached (insn);
13963 for (i = recog_data.n_operands - 1; i >= 0; --i)
13964 if (CONSTANT_P (recog_data.operand[i]))
13967 if (shortform && satisfies_constraint_K (recog_data.operand[i]))
13971 switch (get_attr_mode (insn))
13982 /* Immediates for DImode instructions are encoded as 32bit sign extended values. */
13987 fatal_insn ("unknown insn mode", insn);
13993 /* Compute default value for "length_address" attribute. */
13995 ix86_attr_length_address_default (rtx insn)
13999 if (get_attr_type (insn) == TYPE_LEA)
14001 rtx set = PATTERN (insn);
14003 if (GET_CODE (set) == PARALLEL)
14004 set = XVECEXP (set, 0, 0);
14006 gcc_assert (GET_CODE (set) == SET);
14008 return memory_address_length (SET_SRC (set));
14011 extract_insn_cached (insn);
14012 for (i = recog_data.n_operands - 1; i >= 0; --i)
14013 if (GET_CODE (recog_data.operand[i]) == MEM)
14015 return memory_address_length (XEXP (recog_data.operand[i], 0));
14021 /* Return the maximum number of instructions a cpu can issue. */
14024 ix86_issue_rate (void)
14028 case PROCESSOR_PENTIUM:
14032 case PROCESSOR_PENTIUMPRO:
14033 case PROCESSOR_PENTIUM4:
14034 case PROCESSOR_ATHLON:
14036 case PROCESSOR_AMDFAM10:
14037 case PROCESSOR_NOCONA:
14038 case PROCESSOR_GENERIC32:
14039 case PROCESSOR_GENERIC64:
14042 case PROCESSOR_CORE2:
14050 /* A subroutine of ix86_adjust_cost -- return true iff INSN reads flags set
14051 by DEP_INSN and nothing set by DEP_INSN. */
14054 ix86_flags_dependent (rtx insn, rtx dep_insn, enum attr_type insn_type)
14058 /* Simplify the test for uninteresting insns. */
14059 if (insn_type != TYPE_SETCC
14060 && insn_type != TYPE_ICMOV
14061 && insn_type != TYPE_FCMOV
14062 && insn_type != TYPE_IBR)
14065 if ((set = single_set (dep_insn)) != 0)
14067 set = SET_DEST (set);
14070 else if (GET_CODE (PATTERN (dep_insn)) == PARALLEL
14071 && XVECLEN (PATTERN (dep_insn), 0) == 2
14072 && GET_CODE (XVECEXP (PATTERN (dep_insn), 0, 0)) == SET
14073 && GET_CODE (XVECEXP (PATTERN (dep_insn), 0, 1)) == SET)
14075 set = SET_DEST (XVECEXP (PATTERN (dep_insn), 0, 0));
14076 set2 = SET_DEST (XVECEXP (PATTERN (dep_insn), 0, 0));
14081 if (GET_CODE (set) != REG || REGNO (set) != FLAGS_REG)
14084 /* This test is true if the dependent insn reads the flags but
14085 not any other potentially set register. */
14086 if (!reg_overlap_mentioned_p (set, PATTERN (insn)))
14089 if (set2 && reg_overlap_mentioned_p (set2, PATTERN (insn)))
14095 /* A subroutine of ix86_adjust_cost -- return true iff INSN has a memory
14096 address with operands set by DEP_INSN. */
14099 ix86_agi_dependent (rtx insn, rtx dep_insn, enum attr_type insn_type)
14103 if (insn_type == TYPE_LEA
14106 addr = PATTERN (insn);
14108 if (GET_CODE (addr) == PARALLEL)
14109 addr = XVECEXP (addr, 0, 0);
14111 gcc_assert (GET_CODE (addr) == SET);
14113 addr = SET_SRC (addr);
14118 extract_insn_cached (insn);
14119 for (i = recog_data.n_operands - 1; i >= 0; --i)
14120 if (GET_CODE (recog_data.operand[i]) == MEM)
14122 addr = XEXP (recog_data.operand[i], 0);
14129 return modified_in_p (addr, dep_insn);
14133 ix86_adjust_cost (rtx insn, rtx link, rtx dep_insn, int cost)
14135 enum attr_type insn_type, dep_insn_type;
14136 enum attr_memory memory;
14138 int dep_insn_code_number;
14140 /* Anti and output dependencies have zero cost on all CPUs. */
14141 if (REG_NOTE_KIND (link) != 0)
14144 dep_insn_code_number = recog_memoized (dep_insn);
14146 /* If we can't recognize the insns, we can't really do anything. */
14147 if (dep_insn_code_number < 0 || recog_memoized (insn) < 0)
14150 insn_type = get_attr_type (insn);
14151 dep_insn_type = get_attr_type (dep_insn);
14155 case PROCESSOR_PENTIUM:
14156 /* Address Generation Interlock adds a cycle of latency. */
14157 if (ix86_agi_dependent (insn, dep_insn, insn_type))
14160 /* ??? Compares pair with jump/setcc. */
14161 if (ix86_flags_dependent (insn, dep_insn, insn_type))
14164 /* Floating point stores require value to be ready one cycle earlier. */
14165 if (insn_type == TYPE_FMOV
14166 && get_attr_memory (insn) == MEMORY_STORE
14167 && !ix86_agi_dependent (insn, dep_insn, insn_type))
14171 case PROCESSOR_PENTIUMPRO:
14172 memory = get_attr_memory (insn);
14174 /* INT->FP conversion is expensive. */
14175 if (get_attr_fp_int_src (dep_insn))
14178 /* There is one cycle extra latency between an FP op and a store. */
14179 if (insn_type == TYPE_FMOV
14180 && (set = single_set (dep_insn)) != NULL_RTX
14181 && (set2 = single_set (insn)) != NULL_RTX
14182 && rtx_equal_p (SET_DEST (set), SET_SRC (set2))
14183 && GET_CODE (SET_DEST (set2)) == MEM)
14186 /* Show ability of reorder buffer to hide latency of load by executing
14187 in parallel with previous instruction in case
14188 previous instruction is not needed to compute the address. */
14189 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
14190 && !ix86_agi_dependent (insn, dep_insn, insn_type))
14192 /* Claim moves to take one cycle, as core can issue one load
14193 at time and the next load can start cycle later. */
14194 if (dep_insn_type == TYPE_IMOV
14195 || dep_insn_type == TYPE_FMOV)
14203 memory = get_attr_memory (insn);
14205 /* The esp dependency is resolved before the instruction is really
14207 if ((insn_type == TYPE_PUSH || insn_type == TYPE_POP)
14208 && (dep_insn_type == TYPE_PUSH || dep_insn_type == TYPE_POP))
14211 /* INT->FP conversion is expensive. */
14212 if (get_attr_fp_int_src (dep_insn))
14215 /* Show ability of reorder buffer to hide latency of load by executing
14216 in parallel with previous instruction in case
14217 previous instruction is not needed to compute the address. */
14218 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
14219 && !ix86_agi_dependent (insn, dep_insn, insn_type))
14221 /* Claim moves to take one cycle, as core can issue one load
14222 at time and the next load can start cycle later. */
14223 if (dep_insn_type == TYPE_IMOV
14224 || dep_insn_type == TYPE_FMOV)
14233 case PROCESSOR_ATHLON:
14235 case PROCESSOR_AMDFAM10:
14236 case PROCESSOR_GENERIC32:
14237 case PROCESSOR_GENERIC64:
14238 memory = get_attr_memory (insn);
14240 /* Show ability of reorder buffer to hide latency of load by executing
14241 in parallel with previous instruction in case
14242 previous instruction is not needed to compute the address. */
14243 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
14244 && !ix86_agi_dependent (insn, dep_insn, insn_type))
14246 enum attr_unit unit = get_attr_unit (insn);
14249 /* Because of the difference between the length of integer and
14250 floating unit pipeline preparation stages, the memory operands
14251 for floating point are cheaper.
14253 ??? For Athlon it the difference is most probably 2. */
14254 if (unit == UNIT_INTEGER || unit == UNIT_UNKNOWN)
14257 loadcost = TARGET_ATHLON ? 2 : 0;
14259 if (cost >= loadcost)
14272 /* How many alternative schedules to try. This should be as wide as the
14273 scheduling freedom in the DFA, but no wider. Making this value too
14274 large results extra work for the scheduler. */
14277 ia32_multipass_dfa_lookahead (void)
14279 if (ix86_tune == PROCESSOR_PENTIUM)
14282 if (ix86_tune == PROCESSOR_PENTIUMPRO
14283 || ix86_tune == PROCESSOR_K6)
14291 /* Compute the alignment given to a constant that is being placed in memory.
14292 EXP is the constant and ALIGN is the alignment that the object would
14294 The value of this function is used instead of that alignment to align
14298 ix86_constant_alignment (tree exp, int align)
14300 if (TREE_CODE (exp) == REAL_CST)
14302 if (TYPE_MODE (TREE_TYPE (exp)) == DFmode && align < 64)
14304 else if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (exp))) && align < 128)
14307 else if (!optimize_size && TREE_CODE (exp) == STRING_CST
14308 && !TARGET_NO_ALIGN_LONG_STRINGS
14309 && TREE_STRING_LENGTH (exp) >= 31 && align < BITS_PER_WORD)
14310 return BITS_PER_WORD;
14315 /* Compute the alignment for a static variable.
14316 TYPE is the data type, and ALIGN is the alignment that
14317 the object would ordinarily have. The value of this function is used
14318 instead of that alignment to align the object. */
14321 ix86_data_alignment (tree type, int align)
14323 int max_align = optimize_size ? BITS_PER_WORD : 256;
14325 if (AGGREGATE_TYPE_P (type)
14326 && TYPE_SIZE (type)
14327 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
14328 && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= (unsigned) max_align
14329 || TREE_INT_CST_HIGH (TYPE_SIZE (type)))
14330 && align < max_align)
14333 /* x86-64 ABI requires arrays greater than 16 bytes to be aligned
14334 to 16byte boundary. */
14337 if (AGGREGATE_TYPE_P (type)
14338 && TYPE_SIZE (type)
14339 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
14340 && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= 128
14341 || TREE_INT_CST_HIGH (TYPE_SIZE (type))) && align < 128)
14345 if (TREE_CODE (type) == ARRAY_TYPE)
14347 if (TYPE_MODE (TREE_TYPE (type)) == DFmode && align < 64)
14349 if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type))) && align < 128)
14352 else if (TREE_CODE (type) == COMPLEX_TYPE)
14355 if (TYPE_MODE (type) == DCmode && align < 64)
14357 if (TYPE_MODE (type) == XCmode && align < 128)
14360 else if ((TREE_CODE (type) == RECORD_TYPE
14361 || TREE_CODE (type) == UNION_TYPE
14362 || TREE_CODE (type) == QUAL_UNION_TYPE)
14363 && TYPE_FIELDS (type))
14365 if (DECL_MODE (TYPE_FIELDS (type)) == DFmode && align < 64)
14367 if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type))) && align < 128)
14370 else if (TREE_CODE (type) == REAL_TYPE || TREE_CODE (type) == VECTOR_TYPE
14371 || TREE_CODE (type) == INTEGER_TYPE)
14373 if (TYPE_MODE (type) == DFmode && align < 64)
14375 if (ALIGN_MODE_128 (TYPE_MODE (type)) && align < 128)
14382 /* Compute the alignment for a local variable.
14383 TYPE is the data type, and ALIGN is the alignment that
14384 the object would ordinarily have. The value of this macro is used
14385 instead of that alignment to align the object. */
14388 ix86_local_alignment (tree type, int align)
14390 /* x86-64 ABI requires arrays greater than 16 bytes to be aligned
14391 to 16byte boundary. */
14394 if (AGGREGATE_TYPE_P (type)
14395 && TYPE_SIZE (type)
14396 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
14397 && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= 16
14398 || TREE_INT_CST_HIGH (TYPE_SIZE (type))) && align < 128)
14401 if (TREE_CODE (type) == ARRAY_TYPE)
14403 if (TYPE_MODE (TREE_TYPE (type)) == DFmode && align < 64)
14405 if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type))) && align < 128)
14408 else if (TREE_CODE (type) == COMPLEX_TYPE)
14410 if (TYPE_MODE (type) == DCmode && align < 64)
14412 if (TYPE_MODE (type) == XCmode && align < 128)
14415 else if ((TREE_CODE (type) == RECORD_TYPE
14416 || TREE_CODE (type) == UNION_TYPE
14417 || TREE_CODE (type) == QUAL_UNION_TYPE)
14418 && TYPE_FIELDS (type))
14420 if (DECL_MODE (TYPE_FIELDS (type)) == DFmode && align < 64)
14422 if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type))) && align < 128)
14425 else if (TREE_CODE (type) == REAL_TYPE || TREE_CODE (type) == VECTOR_TYPE
14426 || TREE_CODE (type) == INTEGER_TYPE)
14429 if (TYPE_MODE (type) == DFmode && align < 64)
14431 if (ALIGN_MODE_128 (TYPE_MODE (type)) && align < 128)
14437 /* Emit RTL insns to initialize the variable parts of a trampoline.
14438 FNADDR is an RTX for the address of the function's pure code.
14439 CXT is an RTX for the static chain value for the function. */
14441 x86_initialize_trampoline (rtx tramp, rtx fnaddr, rtx cxt)
14445 /* Compute offset from the end of the jmp to the target function. */
14446 rtx disp = expand_binop (SImode, sub_optab, fnaddr,
14447 plus_constant (tramp, 10),
14448 NULL_RTX, 1, OPTAB_DIRECT);
14449 emit_move_insn (gen_rtx_MEM (QImode, tramp),
14450 gen_int_mode (0xb9, QImode));
14451 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, 1)), cxt);
14452 emit_move_insn (gen_rtx_MEM (QImode, plus_constant (tramp, 5)),
14453 gen_int_mode (0xe9, QImode));
14454 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, 6)), disp);
14459 /* Try to load address using shorter movl instead of movabs.
14460 We may want to support movq for kernel mode, but kernel does not use
14461 trampolines at the moment. */
14462 if (x86_64_zext_immediate_operand (fnaddr, VOIDmode))
14464 fnaddr = copy_to_mode_reg (DImode, fnaddr);
14465 emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
14466 gen_int_mode (0xbb41, HImode));
14467 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, offset + 2)),
14468 gen_lowpart (SImode, fnaddr));
14473 emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
14474 gen_int_mode (0xbb49, HImode));
14475 emit_move_insn (gen_rtx_MEM (DImode, plus_constant (tramp, offset + 2)),
14479 /* Load static chain using movabs to r10. */
14480 emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
14481 gen_int_mode (0xba49, HImode));
14482 emit_move_insn (gen_rtx_MEM (DImode, plus_constant (tramp, offset + 2)),
14485 /* Jump to the r11 */
14486 emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
14487 gen_int_mode (0xff49, HImode));
14488 emit_move_insn (gen_rtx_MEM (QImode, plus_constant (tramp, offset+2)),
14489 gen_int_mode (0xe3, QImode));
14491 gcc_assert (offset <= TRAMPOLINE_SIZE);
14494 #ifdef ENABLE_EXECUTE_STACK
14495 emit_library_call (gen_rtx_SYMBOL_REF (Pmode, "__enable_execute_stack"),
14496 LCT_NORMAL, VOIDmode, 1, tramp, Pmode);
14500 /* Codes for all the SSE/MMX builtins. */
14503 IX86_BUILTIN_ADDPS,
14504 IX86_BUILTIN_ADDSS,
14505 IX86_BUILTIN_DIVPS,
14506 IX86_BUILTIN_DIVSS,
14507 IX86_BUILTIN_MULPS,
14508 IX86_BUILTIN_MULSS,
14509 IX86_BUILTIN_SUBPS,
14510 IX86_BUILTIN_SUBSS,
14512 IX86_BUILTIN_CMPEQPS,
14513 IX86_BUILTIN_CMPLTPS,
14514 IX86_BUILTIN_CMPLEPS,
14515 IX86_BUILTIN_CMPGTPS,
14516 IX86_BUILTIN_CMPGEPS,
14517 IX86_BUILTIN_CMPNEQPS,
14518 IX86_BUILTIN_CMPNLTPS,
14519 IX86_BUILTIN_CMPNLEPS,
14520 IX86_BUILTIN_CMPNGTPS,
14521 IX86_BUILTIN_CMPNGEPS,
14522 IX86_BUILTIN_CMPORDPS,
14523 IX86_BUILTIN_CMPUNORDPS,
14524 IX86_BUILTIN_CMPEQSS,
14525 IX86_BUILTIN_CMPLTSS,
14526 IX86_BUILTIN_CMPLESS,
14527 IX86_BUILTIN_CMPNEQSS,
14528 IX86_BUILTIN_CMPNLTSS,
14529 IX86_BUILTIN_CMPNLESS,
14530 IX86_BUILTIN_CMPNGTSS,
14531 IX86_BUILTIN_CMPNGESS,
14532 IX86_BUILTIN_CMPORDSS,
14533 IX86_BUILTIN_CMPUNORDSS,
14535 IX86_BUILTIN_COMIEQSS,
14536 IX86_BUILTIN_COMILTSS,
14537 IX86_BUILTIN_COMILESS,
14538 IX86_BUILTIN_COMIGTSS,
14539 IX86_BUILTIN_COMIGESS,
14540 IX86_BUILTIN_COMINEQSS,
14541 IX86_BUILTIN_UCOMIEQSS,
14542 IX86_BUILTIN_UCOMILTSS,
14543 IX86_BUILTIN_UCOMILESS,
14544 IX86_BUILTIN_UCOMIGTSS,
14545 IX86_BUILTIN_UCOMIGESS,
14546 IX86_BUILTIN_UCOMINEQSS,
14548 IX86_BUILTIN_CVTPI2PS,
14549 IX86_BUILTIN_CVTPS2PI,
14550 IX86_BUILTIN_CVTSI2SS,
14551 IX86_BUILTIN_CVTSI642SS,
14552 IX86_BUILTIN_CVTSS2SI,
14553 IX86_BUILTIN_CVTSS2SI64,
14554 IX86_BUILTIN_CVTTPS2PI,
14555 IX86_BUILTIN_CVTTSS2SI,
14556 IX86_BUILTIN_CVTTSS2SI64,
14558 IX86_BUILTIN_MAXPS,
14559 IX86_BUILTIN_MAXSS,
14560 IX86_BUILTIN_MINPS,
14561 IX86_BUILTIN_MINSS,
14563 IX86_BUILTIN_LOADUPS,
14564 IX86_BUILTIN_STOREUPS,
14565 IX86_BUILTIN_MOVSS,
14567 IX86_BUILTIN_MOVHLPS,
14568 IX86_BUILTIN_MOVLHPS,
14569 IX86_BUILTIN_LOADHPS,
14570 IX86_BUILTIN_LOADLPS,
14571 IX86_BUILTIN_STOREHPS,
14572 IX86_BUILTIN_STORELPS,
14574 IX86_BUILTIN_MASKMOVQ,
14575 IX86_BUILTIN_MOVMSKPS,
14576 IX86_BUILTIN_PMOVMSKB,
14578 IX86_BUILTIN_MOVNTPS,
14579 IX86_BUILTIN_MOVNTQ,
14581 IX86_BUILTIN_LOADDQU,
14582 IX86_BUILTIN_STOREDQU,
14584 IX86_BUILTIN_PACKSSWB,
14585 IX86_BUILTIN_PACKSSDW,
14586 IX86_BUILTIN_PACKUSWB,
14588 IX86_BUILTIN_PADDB,
14589 IX86_BUILTIN_PADDW,
14590 IX86_BUILTIN_PADDD,
14591 IX86_BUILTIN_PADDQ,
14592 IX86_BUILTIN_PADDSB,
14593 IX86_BUILTIN_PADDSW,
14594 IX86_BUILTIN_PADDUSB,
14595 IX86_BUILTIN_PADDUSW,
14596 IX86_BUILTIN_PSUBB,
14597 IX86_BUILTIN_PSUBW,
14598 IX86_BUILTIN_PSUBD,
14599 IX86_BUILTIN_PSUBQ,
14600 IX86_BUILTIN_PSUBSB,
14601 IX86_BUILTIN_PSUBSW,
14602 IX86_BUILTIN_PSUBUSB,
14603 IX86_BUILTIN_PSUBUSW,
14606 IX86_BUILTIN_PANDN,
14610 IX86_BUILTIN_PAVGB,
14611 IX86_BUILTIN_PAVGW,
14613 IX86_BUILTIN_PCMPEQB,
14614 IX86_BUILTIN_PCMPEQW,
14615 IX86_BUILTIN_PCMPEQD,
14616 IX86_BUILTIN_PCMPGTB,
14617 IX86_BUILTIN_PCMPGTW,
14618 IX86_BUILTIN_PCMPGTD,
14620 IX86_BUILTIN_PMADDWD,
14622 IX86_BUILTIN_PMAXSW,
14623 IX86_BUILTIN_PMAXUB,
14624 IX86_BUILTIN_PMINSW,
14625 IX86_BUILTIN_PMINUB,
14627 IX86_BUILTIN_PMULHUW,
14628 IX86_BUILTIN_PMULHW,
14629 IX86_BUILTIN_PMULLW,
14631 IX86_BUILTIN_PSADBW,
14632 IX86_BUILTIN_PSHUFW,
14634 IX86_BUILTIN_PSLLW,
14635 IX86_BUILTIN_PSLLD,
14636 IX86_BUILTIN_PSLLQ,
14637 IX86_BUILTIN_PSRAW,
14638 IX86_BUILTIN_PSRAD,
14639 IX86_BUILTIN_PSRLW,
14640 IX86_BUILTIN_PSRLD,
14641 IX86_BUILTIN_PSRLQ,
14642 IX86_BUILTIN_PSLLWI,
14643 IX86_BUILTIN_PSLLDI,
14644 IX86_BUILTIN_PSLLQI,
14645 IX86_BUILTIN_PSRAWI,
14646 IX86_BUILTIN_PSRADI,
14647 IX86_BUILTIN_PSRLWI,
14648 IX86_BUILTIN_PSRLDI,
14649 IX86_BUILTIN_PSRLQI,
14651 IX86_BUILTIN_PUNPCKHBW,
14652 IX86_BUILTIN_PUNPCKHWD,
14653 IX86_BUILTIN_PUNPCKHDQ,
14654 IX86_BUILTIN_PUNPCKLBW,
14655 IX86_BUILTIN_PUNPCKLWD,
14656 IX86_BUILTIN_PUNPCKLDQ,
14658 IX86_BUILTIN_SHUFPS,
14660 IX86_BUILTIN_RCPPS,
14661 IX86_BUILTIN_RCPSS,
14662 IX86_BUILTIN_RSQRTPS,
14663 IX86_BUILTIN_RSQRTSS,
14664 IX86_BUILTIN_SQRTPS,
14665 IX86_BUILTIN_SQRTSS,
14667 IX86_BUILTIN_UNPCKHPS,
14668 IX86_BUILTIN_UNPCKLPS,
14670 IX86_BUILTIN_ANDPS,
14671 IX86_BUILTIN_ANDNPS,
14673 IX86_BUILTIN_XORPS,
14676 IX86_BUILTIN_LDMXCSR,
14677 IX86_BUILTIN_STMXCSR,
14678 IX86_BUILTIN_SFENCE,
14680 /* 3DNow! Original */
14681 IX86_BUILTIN_FEMMS,
14682 IX86_BUILTIN_PAVGUSB,
14683 IX86_BUILTIN_PF2ID,
14684 IX86_BUILTIN_PFACC,
14685 IX86_BUILTIN_PFADD,
14686 IX86_BUILTIN_PFCMPEQ,
14687 IX86_BUILTIN_PFCMPGE,
14688 IX86_BUILTIN_PFCMPGT,
14689 IX86_BUILTIN_PFMAX,
14690 IX86_BUILTIN_PFMIN,
14691 IX86_BUILTIN_PFMUL,
14692 IX86_BUILTIN_PFRCP,
14693 IX86_BUILTIN_PFRCPIT1,
14694 IX86_BUILTIN_PFRCPIT2,
14695 IX86_BUILTIN_PFRSQIT1,
14696 IX86_BUILTIN_PFRSQRT,
14697 IX86_BUILTIN_PFSUB,
14698 IX86_BUILTIN_PFSUBR,
14699 IX86_BUILTIN_PI2FD,
14700 IX86_BUILTIN_PMULHRW,
14702 /* 3DNow! Athlon Extensions */
14703 IX86_BUILTIN_PF2IW,
14704 IX86_BUILTIN_PFNACC,
14705 IX86_BUILTIN_PFPNACC,
14706 IX86_BUILTIN_PI2FW,
14707 IX86_BUILTIN_PSWAPDSI,
14708 IX86_BUILTIN_PSWAPDSF,
14711 IX86_BUILTIN_ADDPD,
14712 IX86_BUILTIN_ADDSD,
14713 IX86_BUILTIN_DIVPD,
14714 IX86_BUILTIN_DIVSD,
14715 IX86_BUILTIN_MULPD,
14716 IX86_BUILTIN_MULSD,
14717 IX86_BUILTIN_SUBPD,
14718 IX86_BUILTIN_SUBSD,
14720 IX86_BUILTIN_CMPEQPD,
14721 IX86_BUILTIN_CMPLTPD,
14722 IX86_BUILTIN_CMPLEPD,
14723 IX86_BUILTIN_CMPGTPD,
14724 IX86_BUILTIN_CMPGEPD,
14725 IX86_BUILTIN_CMPNEQPD,
14726 IX86_BUILTIN_CMPNLTPD,
14727 IX86_BUILTIN_CMPNLEPD,
14728 IX86_BUILTIN_CMPNGTPD,
14729 IX86_BUILTIN_CMPNGEPD,
14730 IX86_BUILTIN_CMPORDPD,
14731 IX86_BUILTIN_CMPUNORDPD,
14732 IX86_BUILTIN_CMPNEPD,
14733 IX86_BUILTIN_CMPEQSD,
14734 IX86_BUILTIN_CMPLTSD,
14735 IX86_BUILTIN_CMPLESD,
14736 IX86_BUILTIN_CMPNEQSD,
14737 IX86_BUILTIN_CMPNLTSD,
14738 IX86_BUILTIN_CMPNLESD,
14739 IX86_BUILTIN_CMPORDSD,
14740 IX86_BUILTIN_CMPUNORDSD,
14741 IX86_BUILTIN_CMPNESD,
14743 IX86_BUILTIN_COMIEQSD,
14744 IX86_BUILTIN_COMILTSD,
14745 IX86_BUILTIN_COMILESD,
14746 IX86_BUILTIN_COMIGTSD,
14747 IX86_BUILTIN_COMIGESD,
14748 IX86_BUILTIN_COMINEQSD,
14749 IX86_BUILTIN_UCOMIEQSD,
14750 IX86_BUILTIN_UCOMILTSD,
14751 IX86_BUILTIN_UCOMILESD,
14752 IX86_BUILTIN_UCOMIGTSD,
14753 IX86_BUILTIN_UCOMIGESD,
14754 IX86_BUILTIN_UCOMINEQSD,
14756 IX86_BUILTIN_MAXPD,
14757 IX86_BUILTIN_MAXSD,
14758 IX86_BUILTIN_MINPD,
14759 IX86_BUILTIN_MINSD,
14761 IX86_BUILTIN_ANDPD,
14762 IX86_BUILTIN_ANDNPD,
14764 IX86_BUILTIN_XORPD,
14766 IX86_BUILTIN_SQRTPD,
14767 IX86_BUILTIN_SQRTSD,
14769 IX86_BUILTIN_UNPCKHPD,
14770 IX86_BUILTIN_UNPCKLPD,
14772 IX86_BUILTIN_SHUFPD,
14774 IX86_BUILTIN_LOADUPD,
14775 IX86_BUILTIN_STOREUPD,
14776 IX86_BUILTIN_MOVSD,
14778 IX86_BUILTIN_LOADHPD,
14779 IX86_BUILTIN_LOADLPD,
14781 IX86_BUILTIN_CVTDQ2PD,
14782 IX86_BUILTIN_CVTDQ2PS,
14784 IX86_BUILTIN_CVTPD2DQ,
14785 IX86_BUILTIN_CVTPD2PI,
14786 IX86_BUILTIN_CVTPD2PS,
14787 IX86_BUILTIN_CVTTPD2DQ,
14788 IX86_BUILTIN_CVTTPD2PI,
14790 IX86_BUILTIN_CVTPI2PD,
14791 IX86_BUILTIN_CVTSI2SD,
14792 IX86_BUILTIN_CVTSI642SD,
14794 IX86_BUILTIN_CVTSD2SI,
14795 IX86_BUILTIN_CVTSD2SI64,
14796 IX86_BUILTIN_CVTSD2SS,
14797 IX86_BUILTIN_CVTSS2SD,
14798 IX86_BUILTIN_CVTTSD2SI,
14799 IX86_BUILTIN_CVTTSD2SI64,
14801 IX86_BUILTIN_CVTPS2DQ,
14802 IX86_BUILTIN_CVTPS2PD,
14803 IX86_BUILTIN_CVTTPS2DQ,
14805 IX86_BUILTIN_MOVNTI,
14806 IX86_BUILTIN_MOVNTPD,
14807 IX86_BUILTIN_MOVNTDQ,
14810 IX86_BUILTIN_MASKMOVDQU,
14811 IX86_BUILTIN_MOVMSKPD,
14812 IX86_BUILTIN_PMOVMSKB128,
14814 IX86_BUILTIN_PACKSSWB128,
14815 IX86_BUILTIN_PACKSSDW128,
14816 IX86_BUILTIN_PACKUSWB128,
14818 IX86_BUILTIN_PADDB128,
14819 IX86_BUILTIN_PADDW128,
14820 IX86_BUILTIN_PADDD128,
14821 IX86_BUILTIN_PADDQ128,
14822 IX86_BUILTIN_PADDSB128,
14823 IX86_BUILTIN_PADDSW128,
14824 IX86_BUILTIN_PADDUSB128,
14825 IX86_BUILTIN_PADDUSW128,
14826 IX86_BUILTIN_PSUBB128,
14827 IX86_BUILTIN_PSUBW128,
14828 IX86_BUILTIN_PSUBD128,
14829 IX86_BUILTIN_PSUBQ128,
14830 IX86_BUILTIN_PSUBSB128,
14831 IX86_BUILTIN_PSUBSW128,
14832 IX86_BUILTIN_PSUBUSB128,
14833 IX86_BUILTIN_PSUBUSW128,
14835 IX86_BUILTIN_PAND128,
14836 IX86_BUILTIN_PANDN128,
14837 IX86_BUILTIN_POR128,
14838 IX86_BUILTIN_PXOR128,
14840 IX86_BUILTIN_PAVGB128,
14841 IX86_BUILTIN_PAVGW128,
14843 IX86_BUILTIN_PCMPEQB128,
14844 IX86_BUILTIN_PCMPEQW128,
14845 IX86_BUILTIN_PCMPEQD128,
14846 IX86_BUILTIN_PCMPGTB128,
14847 IX86_BUILTIN_PCMPGTW128,
14848 IX86_BUILTIN_PCMPGTD128,
14850 IX86_BUILTIN_PMADDWD128,
14852 IX86_BUILTIN_PMAXSW128,
14853 IX86_BUILTIN_PMAXUB128,
14854 IX86_BUILTIN_PMINSW128,
14855 IX86_BUILTIN_PMINUB128,
14857 IX86_BUILTIN_PMULUDQ,
14858 IX86_BUILTIN_PMULUDQ128,
14859 IX86_BUILTIN_PMULHUW128,
14860 IX86_BUILTIN_PMULHW128,
14861 IX86_BUILTIN_PMULLW128,
14863 IX86_BUILTIN_PSADBW128,
14864 IX86_BUILTIN_PSHUFHW,
14865 IX86_BUILTIN_PSHUFLW,
14866 IX86_BUILTIN_PSHUFD,
14868 IX86_BUILTIN_PSLLW128,
14869 IX86_BUILTIN_PSLLD128,
14870 IX86_BUILTIN_PSLLQ128,
14871 IX86_BUILTIN_PSRAW128,
14872 IX86_BUILTIN_PSRAD128,
14873 IX86_BUILTIN_PSRLW128,
14874 IX86_BUILTIN_PSRLD128,
14875 IX86_BUILTIN_PSRLQ128,
14876 IX86_BUILTIN_PSLLDQI128,
14877 IX86_BUILTIN_PSLLWI128,
14878 IX86_BUILTIN_PSLLDI128,
14879 IX86_BUILTIN_PSLLQI128,
14880 IX86_BUILTIN_PSRAWI128,
14881 IX86_BUILTIN_PSRADI128,
14882 IX86_BUILTIN_PSRLDQI128,
14883 IX86_BUILTIN_PSRLWI128,
14884 IX86_BUILTIN_PSRLDI128,
14885 IX86_BUILTIN_PSRLQI128,
14887 IX86_BUILTIN_PUNPCKHBW128,
14888 IX86_BUILTIN_PUNPCKHWD128,
14889 IX86_BUILTIN_PUNPCKHDQ128,
14890 IX86_BUILTIN_PUNPCKHQDQ128,
14891 IX86_BUILTIN_PUNPCKLBW128,
14892 IX86_BUILTIN_PUNPCKLWD128,
14893 IX86_BUILTIN_PUNPCKLDQ128,
14894 IX86_BUILTIN_PUNPCKLQDQ128,
14896 IX86_BUILTIN_CLFLUSH,
14897 IX86_BUILTIN_MFENCE,
14898 IX86_BUILTIN_LFENCE,
14900 /* Prescott New Instructions. */
14901 IX86_BUILTIN_ADDSUBPS,
14902 IX86_BUILTIN_HADDPS,
14903 IX86_BUILTIN_HSUBPS,
14904 IX86_BUILTIN_MOVSHDUP,
14905 IX86_BUILTIN_MOVSLDUP,
14906 IX86_BUILTIN_ADDSUBPD,
14907 IX86_BUILTIN_HADDPD,
14908 IX86_BUILTIN_HSUBPD,
14909 IX86_BUILTIN_LDDQU,
14911 IX86_BUILTIN_MONITOR,
14912 IX86_BUILTIN_MWAIT,
14915 IX86_BUILTIN_PHADDW,
14916 IX86_BUILTIN_PHADDD,
14917 IX86_BUILTIN_PHADDSW,
14918 IX86_BUILTIN_PHSUBW,
14919 IX86_BUILTIN_PHSUBD,
14920 IX86_BUILTIN_PHSUBSW,
14921 IX86_BUILTIN_PMADDUBSW,
14922 IX86_BUILTIN_PMULHRSW,
14923 IX86_BUILTIN_PSHUFB,
14924 IX86_BUILTIN_PSIGNB,
14925 IX86_BUILTIN_PSIGNW,
14926 IX86_BUILTIN_PSIGND,
14927 IX86_BUILTIN_PALIGNR,
14928 IX86_BUILTIN_PABSB,
14929 IX86_BUILTIN_PABSW,
14930 IX86_BUILTIN_PABSD,
14932 IX86_BUILTIN_PHADDW128,
14933 IX86_BUILTIN_PHADDD128,
14934 IX86_BUILTIN_PHADDSW128,
14935 IX86_BUILTIN_PHSUBW128,
14936 IX86_BUILTIN_PHSUBD128,
14937 IX86_BUILTIN_PHSUBSW128,
14938 IX86_BUILTIN_PMADDUBSW128,
14939 IX86_BUILTIN_PMULHRSW128,
14940 IX86_BUILTIN_PSHUFB128,
14941 IX86_BUILTIN_PSIGNB128,
14942 IX86_BUILTIN_PSIGNW128,
14943 IX86_BUILTIN_PSIGND128,
14944 IX86_BUILTIN_PALIGNR128,
14945 IX86_BUILTIN_PABSB128,
14946 IX86_BUILTIN_PABSW128,
14947 IX86_BUILTIN_PABSD128,
14949 /* AMDFAM10 - SSE4A New Instructions. */
14950 IX86_BUILTIN_MOVNTSD,
14951 IX86_BUILTIN_MOVNTSS,
14952 IX86_BUILTIN_EXTRQI,
14953 IX86_BUILTIN_EXTRQ,
14954 IX86_BUILTIN_INSERTQI,
14955 IX86_BUILTIN_INSERTQ,
14957 IX86_BUILTIN_VEC_INIT_V2SI,
14958 IX86_BUILTIN_VEC_INIT_V4HI,
14959 IX86_BUILTIN_VEC_INIT_V8QI,
14960 IX86_BUILTIN_VEC_EXT_V2DF,
14961 IX86_BUILTIN_VEC_EXT_V2DI,
14962 IX86_BUILTIN_VEC_EXT_V4SF,
14963 IX86_BUILTIN_VEC_EXT_V4SI,
14964 IX86_BUILTIN_VEC_EXT_V8HI,
14965 IX86_BUILTIN_VEC_EXT_V16QI,
14966 IX86_BUILTIN_VEC_EXT_V2SI,
14967 IX86_BUILTIN_VEC_EXT_V4HI,
14968 IX86_BUILTIN_VEC_SET_V8HI,
14969 IX86_BUILTIN_VEC_SET_V4HI,
14974 #define def_builtin(MASK, NAME, TYPE, CODE) \
14976 if ((MASK) & target_flags \
14977 && (!((MASK) & MASK_64BIT) || TARGET_64BIT)) \
14978 lang_hooks.builtin_function ((NAME), (TYPE), (CODE), BUILT_IN_MD, \
14979 NULL, NULL_TREE); \
14982 /* Bits for builtin_description.flag. */
14984 /* Set when we don't support the comparison natively, and should
14985 swap_comparison in order to support it. */
14986 #define BUILTIN_DESC_SWAP_OPERANDS 1
14988 struct builtin_description
14990 const unsigned int mask;
14991 const enum insn_code icode;
14992 const char *const name;
14993 const enum ix86_builtins code;
14994 const enum rtx_code comparison;
14995 const unsigned int flag;
14998 static const struct builtin_description bdesc_comi[] =
15000 { MASK_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comieq", IX86_BUILTIN_COMIEQSS, UNEQ, 0 },
15001 { MASK_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comilt", IX86_BUILTIN_COMILTSS, UNLT, 0 },
15002 { MASK_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comile", IX86_BUILTIN_COMILESS, UNLE, 0 },
15003 { MASK_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comigt", IX86_BUILTIN_COMIGTSS, GT, 0 },
15004 { MASK_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comige", IX86_BUILTIN_COMIGESS, GE, 0 },
15005 { MASK_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comineq", IX86_BUILTIN_COMINEQSS, LTGT, 0 },
15006 { MASK_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomieq", IX86_BUILTIN_UCOMIEQSS, UNEQ, 0 },
15007 { MASK_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomilt", IX86_BUILTIN_UCOMILTSS, UNLT, 0 },
15008 { MASK_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomile", IX86_BUILTIN_UCOMILESS, UNLE, 0 },
15009 { MASK_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomigt", IX86_BUILTIN_UCOMIGTSS, GT, 0 },
15010 { MASK_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomige", IX86_BUILTIN_UCOMIGESS, GE, 0 },
15011 { MASK_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomineq", IX86_BUILTIN_UCOMINEQSS, LTGT, 0 },
15012 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdeq", IX86_BUILTIN_COMIEQSD, UNEQ, 0 },
15013 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdlt", IX86_BUILTIN_COMILTSD, UNLT, 0 },
15014 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdle", IX86_BUILTIN_COMILESD, UNLE, 0 },
15015 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdgt", IX86_BUILTIN_COMIGTSD, GT, 0 },
15016 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdge", IX86_BUILTIN_COMIGESD, GE, 0 },
15017 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdneq", IX86_BUILTIN_COMINEQSD, LTGT, 0 },
15018 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdeq", IX86_BUILTIN_UCOMIEQSD, UNEQ, 0 },
15019 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdlt", IX86_BUILTIN_UCOMILTSD, UNLT, 0 },
15020 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdle", IX86_BUILTIN_UCOMILESD, UNLE, 0 },
15021 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdgt", IX86_BUILTIN_UCOMIGTSD, GT, 0 },
15022 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdge", IX86_BUILTIN_UCOMIGESD, GE, 0 },
15023 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdneq", IX86_BUILTIN_UCOMINEQSD, LTGT, 0 },
15026 static const struct builtin_description bdesc_2arg[] =
15029 { MASK_SSE, CODE_FOR_addv4sf3, "__builtin_ia32_addps", IX86_BUILTIN_ADDPS, 0, 0 },
15030 { MASK_SSE, CODE_FOR_subv4sf3, "__builtin_ia32_subps", IX86_BUILTIN_SUBPS, 0, 0 },
15031 { MASK_SSE, CODE_FOR_mulv4sf3, "__builtin_ia32_mulps", IX86_BUILTIN_MULPS, 0, 0 },
15032 { MASK_SSE, CODE_FOR_divv4sf3, "__builtin_ia32_divps", IX86_BUILTIN_DIVPS, 0, 0 },
15033 { MASK_SSE, CODE_FOR_sse_vmaddv4sf3, "__builtin_ia32_addss", IX86_BUILTIN_ADDSS, 0, 0 },
15034 { MASK_SSE, CODE_FOR_sse_vmsubv4sf3, "__builtin_ia32_subss", IX86_BUILTIN_SUBSS, 0, 0 },
15035 { MASK_SSE, CODE_FOR_sse_vmmulv4sf3, "__builtin_ia32_mulss", IX86_BUILTIN_MULSS, 0, 0 },
15036 { MASK_SSE, CODE_FOR_sse_vmdivv4sf3, "__builtin_ia32_divss", IX86_BUILTIN_DIVSS, 0, 0 },
15038 { MASK_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpeqps", IX86_BUILTIN_CMPEQPS, EQ, 0 },
15039 { MASK_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpltps", IX86_BUILTIN_CMPLTPS, LT, 0 },
15040 { MASK_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpleps", IX86_BUILTIN_CMPLEPS, LE, 0 },
15041 { MASK_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpgtps", IX86_BUILTIN_CMPGTPS, LT,
15042 BUILTIN_DESC_SWAP_OPERANDS },
15043 { MASK_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpgeps", IX86_BUILTIN_CMPGEPS, LE,
15044 BUILTIN_DESC_SWAP_OPERANDS },
15045 { MASK_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpunordps", IX86_BUILTIN_CMPUNORDPS, UNORDERED, 0 },
15046 { MASK_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpneqps", IX86_BUILTIN_CMPNEQPS, NE, 0 },
15047 { MASK_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpnltps", IX86_BUILTIN_CMPNLTPS, UNGE, 0 },
15048 { MASK_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpnleps", IX86_BUILTIN_CMPNLEPS, UNGT, 0 },
15049 { MASK_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpngtps", IX86_BUILTIN_CMPNGTPS, UNGE,
15050 BUILTIN_DESC_SWAP_OPERANDS },
15051 { MASK_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpngeps", IX86_BUILTIN_CMPNGEPS, UNGT,
15052 BUILTIN_DESC_SWAP_OPERANDS },
15053 { MASK_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpordps", IX86_BUILTIN_CMPORDPS, ORDERED, 0 },
15054 { MASK_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpeqss", IX86_BUILTIN_CMPEQSS, EQ, 0 },
15055 { MASK_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpltss", IX86_BUILTIN_CMPLTSS, LT, 0 },
15056 { MASK_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpless", IX86_BUILTIN_CMPLESS, LE, 0 },
15057 { MASK_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpunordss", IX86_BUILTIN_CMPUNORDSS, UNORDERED, 0 },
15058 { MASK_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpneqss", IX86_BUILTIN_CMPNEQSS, NE, 0 },
15059 { MASK_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpnltss", IX86_BUILTIN_CMPNLTSS, UNGE, 0 },
15060 { MASK_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpnless", IX86_BUILTIN_CMPNLESS, UNGT, 0 },
15061 { MASK_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpngtss", IX86_BUILTIN_CMPNGTSS, UNGE,
15062 BUILTIN_DESC_SWAP_OPERANDS },
15063 { MASK_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpngess", IX86_BUILTIN_CMPNGESS, UNGT,
15064 BUILTIN_DESC_SWAP_OPERANDS },
15065 { MASK_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpordss", IX86_BUILTIN_CMPORDSS, ORDERED, 0 },
15067 { MASK_SSE, CODE_FOR_sminv4sf3, "__builtin_ia32_minps", IX86_BUILTIN_MINPS, 0, 0 },
15068 { MASK_SSE, CODE_FOR_smaxv4sf3, "__builtin_ia32_maxps", IX86_BUILTIN_MAXPS, 0, 0 },
15069 { MASK_SSE, CODE_FOR_sse_vmsminv4sf3, "__builtin_ia32_minss", IX86_BUILTIN_MINSS, 0, 0 },
15070 { MASK_SSE, CODE_FOR_sse_vmsmaxv4sf3, "__builtin_ia32_maxss", IX86_BUILTIN_MAXSS, 0, 0 },
15072 { MASK_SSE, CODE_FOR_andv4sf3, "__builtin_ia32_andps", IX86_BUILTIN_ANDPS, 0, 0 },
15073 { MASK_SSE, CODE_FOR_sse_nandv4sf3, "__builtin_ia32_andnps", IX86_BUILTIN_ANDNPS, 0, 0 },
15074 { MASK_SSE, CODE_FOR_iorv4sf3, "__builtin_ia32_orps", IX86_BUILTIN_ORPS, 0, 0 },
15075 { MASK_SSE, CODE_FOR_xorv4sf3, "__builtin_ia32_xorps", IX86_BUILTIN_XORPS, 0, 0 },
15077 { MASK_SSE, CODE_FOR_sse_movss, "__builtin_ia32_movss", IX86_BUILTIN_MOVSS, 0, 0 },
15078 { MASK_SSE, CODE_FOR_sse_movhlps, "__builtin_ia32_movhlps", IX86_BUILTIN_MOVHLPS, 0, 0 },
15079 { MASK_SSE, CODE_FOR_sse_movlhps, "__builtin_ia32_movlhps", IX86_BUILTIN_MOVLHPS, 0, 0 },
15080 { MASK_SSE, CODE_FOR_sse_unpckhps, "__builtin_ia32_unpckhps", IX86_BUILTIN_UNPCKHPS, 0, 0 },
15081 { MASK_SSE, CODE_FOR_sse_unpcklps, "__builtin_ia32_unpcklps", IX86_BUILTIN_UNPCKLPS, 0, 0 },
15084 { MASK_MMX, CODE_FOR_mmx_addv8qi3, "__builtin_ia32_paddb", IX86_BUILTIN_PADDB, 0, 0 },
15085 { MASK_MMX, CODE_FOR_mmx_addv4hi3, "__builtin_ia32_paddw", IX86_BUILTIN_PADDW, 0, 0 },
15086 { MASK_MMX, CODE_FOR_mmx_addv2si3, "__builtin_ia32_paddd", IX86_BUILTIN_PADDD, 0, 0 },
15087 { MASK_SSE2, CODE_FOR_mmx_adddi3, "__builtin_ia32_paddq", IX86_BUILTIN_PADDQ, 0, 0 },
15088 { MASK_MMX, CODE_FOR_mmx_subv8qi3, "__builtin_ia32_psubb", IX86_BUILTIN_PSUBB, 0, 0 },
15089 { MASK_MMX, CODE_FOR_mmx_subv4hi3, "__builtin_ia32_psubw", IX86_BUILTIN_PSUBW, 0, 0 },
15090 { MASK_MMX, CODE_FOR_mmx_subv2si3, "__builtin_ia32_psubd", IX86_BUILTIN_PSUBD, 0, 0 },
15091 { MASK_SSE2, CODE_FOR_mmx_subdi3, "__builtin_ia32_psubq", IX86_BUILTIN_PSUBQ, 0, 0 },
15093 { MASK_MMX, CODE_FOR_mmx_ssaddv8qi3, "__builtin_ia32_paddsb", IX86_BUILTIN_PADDSB, 0, 0 },
15094 { MASK_MMX, CODE_FOR_mmx_ssaddv4hi3, "__builtin_ia32_paddsw", IX86_BUILTIN_PADDSW, 0, 0 },
15095 { MASK_MMX, CODE_FOR_mmx_sssubv8qi3, "__builtin_ia32_psubsb", IX86_BUILTIN_PSUBSB, 0, 0 },
15096 { MASK_MMX, CODE_FOR_mmx_sssubv4hi3, "__builtin_ia32_psubsw", IX86_BUILTIN_PSUBSW, 0, 0 },
15097 { MASK_MMX, CODE_FOR_mmx_usaddv8qi3, "__builtin_ia32_paddusb", IX86_BUILTIN_PADDUSB, 0, 0 },
15098 { MASK_MMX, CODE_FOR_mmx_usaddv4hi3, "__builtin_ia32_paddusw", IX86_BUILTIN_PADDUSW, 0, 0 },
15099 { MASK_MMX, CODE_FOR_mmx_ussubv8qi3, "__builtin_ia32_psubusb", IX86_BUILTIN_PSUBUSB, 0, 0 },
15100 { MASK_MMX, CODE_FOR_mmx_ussubv4hi3, "__builtin_ia32_psubusw", IX86_BUILTIN_PSUBUSW, 0, 0 },
15102 { MASK_MMX, CODE_FOR_mmx_mulv4hi3, "__builtin_ia32_pmullw", IX86_BUILTIN_PMULLW, 0, 0 },
15103 { MASK_MMX, CODE_FOR_mmx_smulv4hi3_highpart, "__builtin_ia32_pmulhw", IX86_BUILTIN_PMULHW, 0, 0 },
15104 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_umulv4hi3_highpart, "__builtin_ia32_pmulhuw", IX86_BUILTIN_PMULHUW, 0, 0 },
15106 { MASK_MMX, CODE_FOR_mmx_andv2si3, "__builtin_ia32_pand", IX86_BUILTIN_PAND, 0, 0 },
15107 { MASK_MMX, CODE_FOR_mmx_nandv2si3, "__builtin_ia32_pandn", IX86_BUILTIN_PANDN, 0, 0 },
15108 { MASK_MMX, CODE_FOR_mmx_iorv2si3, "__builtin_ia32_por", IX86_BUILTIN_POR, 0, 0 },
15109 { MASK_MMX, CODE_FOR_mmx_xorv2si3, "__builtin_ia32_pxor", IX86_BUILTIN_PXOR, 0, 0 },
15111 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_uavgv8qi3, "__builtin_ia32_pavgb", IX86_BUILTIN_PAVGB, 0, 0 },
15112 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_uavgv4hi3, "__builtin_ia32_pavgw", IX86_BUILTIN_PAVGW, 0, 0 },
15114 { MASK_MMX, CODE_FOR_mmx_eqv8qi3, "__builtin_ia32_pcmpeqb", IX86_BUILTIN_PCMPEQB, 0, 0 },
15115 { MASK_MMX, CODE_FOR_mmx_eqv4hi3, "__builtin_ia32_pcmpeqw", IX86_BUILTIN_PCMPEQW, 0, 0 },
15116 { MASK_MMX, CODE_FOR_mmx_eqv2si3, "__builtin_ia32_pcmpeqd", IX86_BUILTIN_PCMPEQD, 0, 0 },
15117 { MASK_MMX, CODE_FOR_mmx_gtv8qi3, "__builtin_ia32_pcmpgtb", IX86_BUILTIN_PCMPGTB, 0, 0 },
15118 { MASK_MMX, CODE_FOR_mmx_gtv4hi3, "__builtin_ia32_pcmpgtw", IX86_BUILTIN_PCMPGTW, 0, 0 },
15119 { MASK_MMX, CODE_FOR_mmx_gtv2si3, "__builtin_ia32_pcmpgtd", IX86_BUILTIN_PCMPGTD, 0, 0 },
15121 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_umaxv8qi3, "__builtin_ia32_pmaxub", IX86_BUILTIN_PMAXUB, 0, 0 },
15122 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_smaxv4hi3, "__builtin_ia32_pmaxsw", IX86_BUILTIN_PMAXSW, 0, 0 },
15123 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_uminv8qi3, "__builtin_ia32_pminub", IX86_BUILTIN_PMINUB, 0, 0 },
15124 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_sminv4hi3, "__builtin_ia32_pminsw", IX86_BUILTIN_PMINSW, 0, 0 },
15126 { MASK_MMX, CODE_FOR_mmx_punpckhbw, "__builtin_ia32_punpckhbw", IX86_BUILTIN_PUNPCKHBW, 0, 0 },
15127 { MASK_MMX, CODE_FOR_mmx_punpckhwd, "__builtin_ia32_punpckhwd", IX86_BUILTIN_PUNPCKHWD, 0, 0 },
15128 { MASK_MMX, CODE_FOR_mmx_punpckhdq, "__builtin_ia32_punpckhdq", IX86_BUILTIN_PUNPCKHDQ, 0, 0 },
15129 { MASK_MMX, CODE_FOR_mmx_punpcklbw, "__builtin_ia32_punpcklbw", IX86_BUILTIN_PUNPCKLBW, 0, 0 },
15130 { MASK_MMX, CODE_FOR_mmx_punpcklwd, "__builtin_ia32_punpcklwd", IX86_BUILTIN_PUNPCKLWD, 0, 0 },
15131 { MASK_MMX, CODE_FOR_mmx_punpckldq, "__builtin_ia32_punpckldq", IX86_BUILTIN_PUNPCKLDQ, 0, 0 },
15134 { MASK_MMX, CODE_FOR_mmx_packsswb, 0, IX86_BUILTIN_PACKSSWB, 0, 0 },
15135 { MASK_MMX, CODE_FOR_mmx_packssdw, 0, IX86_BUILTIN_PACKSSDW, 0, 0 },
15136 { MASK_MMX, CODE_FOR_mmx_packuswb, 0, IX86_BUILTIN_PACKUSWB, 0, 0 },
15138 { MASK_SSE, CODE_FOR_sse_cvtpi2ps, 0, IX86_BUILTIN_CVTPI2PS, 0, 0 },
15139 { MASK_SSE, CODE_FOR_sse_cvtsi2ss, 0, IX86_BUILTIN_CVTSI2SS, 0, 0 },
15140 { MASK_SSE | MASK_64BIT, CODE_FOR_sse_cvtsi2ssq, 0, IX86_BUILTIN_CVTSI642SS, 0, 0 },
15142 { MASK_MMX, CODE_FOR_mmx_ashlv4hi3, 0, IX86_BUILTIN_PSLLW, 0, 0 },
15143 { MASK_MMX, CODE_FOR_mmx_ashlv4hi3, 0, IX86_BUILTIN_PSLLWI, 0, 0 },
15144 { MASK_MMX, CODE_FOR_mmx_ashlv2si3, 0, IX86_BUILTIN_PSLLD, 0, 0 },
15145 { MASK_MMX, CODE_FOR_mmx_ashlv2si3, 0, IX86_BUILTIN_PSLLDI, 0, 0 },
15146 { MASK_MMX, CODE_FOR_mmx_ashldi3, 0, IX86_BUILTIN_PSLLQ, 0, 0 },
15147 { MASK_MMX, CODE_FOR_mmx_ashldi3, 0, IX86_BUILTIN_PSLLQI, 0, 0 },
15149 { MASK_MMX, CODE_FOR_mmx_lshrv4hi3, 0, IX86_BUILTIN_PSRLW, 0, 0 },
15150 { MASK_MMX, CODE_FOR_mmx_lshrv4hi3, 0, IX86_BUILTIN_PSRLWI, 0, 0 },
15151 { MASK_MMX, CODE_FOR_mmx_lshrv2si3, 0, IX86_BUILTIN_PSRLD, 0, 0 },
15152 { MASK_MMX, CODE_FOR_mmx_lshrv2si3, 0, IX86_BUILTIN_PSRLDI, 0, 0 },
15153 { MASK_MMX, CODE_FOR_mmx_lshrdi3, 0, IX86_BUILTIN_PSRLQ, 0, 0 },
15154 { MASK_MMX, CODE_FOR_mmx_lshrdi3, 0, IX86_BUILTIN_PSRLQI, 0, 0 },
15156 { MASK_MMX, CODE_FOR_mmx_ashrv4hi3, 0, IX86_BUILTIN_PSRAW, 0, 0 },
15157 { MASK_MMX, CODE_FOR_mmx_ashrv4hi3, 0, IX86_BUILTIN_PSRAWI, 0, 0 },
15158 { MASK_MMX, CODE_FOR_mmx_ashrv2si3, 0, IX86_BUILTIN_PSRAD, 0, 0 },
15159 { MASK_MMX, CODE_FOR_mmx_ashrv2si3, 0, IX86_BUILTIN_PSRADI, 0, 0 },
15161 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_psadbw, 0, IX86_BUILTIN_PSADBW, 0, 0 },
15162 { MASK_MMX, CODE_FOR_mmx_pmaddwd, 0, IX86_BUILTIN_PMADDWD, 0, 0 },
15165 { MASK_SSE2, CODE_FOR_addv2df3, "__builtin_ia32_addpd", IX86_BUILTIN_ADDPD, 0, 0 },
15166 { MASK_SSE2, CODE_FOR_subv2df3, "__builtin_ia32_subpd", IX86_BUILTIN_SUBPD, 0, 0 },
15167 { MASK_SSE2, CODE_FOR_mulv2df3, "__builtin_ia32_mulpd", IX86_BUILTIN_MULPD, 0, 0 },
15168 { MASK_SSE2, CODE_FOR_divv2df3, "__builtin_ia32_divpd", IX86_BUILTIN_DIVPD, 0, 0 },
15169 { MASK_SSE2, CODE_FOR_sse2_vmaddv2df3, "__builtin_ia32_addsd", IX86_BUILTIN_ADDSD, 0, 0 },
15170 { MASK_SSE2, CODE_FOR_sse2_vmsubv2df3, "__builtin_ia32_subsd", IX86_BUILTIN_SUBSD, 0, 0 },
15171 { MASK_SSE2, CODE_FOR_sse2_vmmulv2df3, "__builtin_ia32_mulsd", IX86_BUILTIN_MULSD, 0, 0 },
15172 { MASK_SSE2, CODE_FOR_sse2_vmdivv2df3, "__builtin_ia32_divsd", IX86_BUILTIN_DIVSD, 0, 0 },
15174 { MASK_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpeqpd", IX86_BUILTIN_CMPEQPD, EQ, 0 },
15175 { MASK_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpltpd", IX86_BUILTIN_CMPLTPD, LT, 0 },
15176 { MASK_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmplepd", IX86_BUILTIN_CMPLEPD, LE, 0 },
15177 { MASK_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpgtpd", IX86_BUILTIN_CMPGTPD, LT,
15178 BUILTIN_DESC_SWAP_OPERANDS },
15179 { MASK_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpgepd", IX86_BUILTIN_CMPGEPD, LE,
15180 BUILTIN_DESC_SWAP_OPERANDS },
15181 { MASK_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpunordpd", IX86_BUILTIN_CMPUNORDPD, UNORDERED, 0 },
15182 { MASK_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpneqpd", IX86_BUILTIN_CMPNEQPD, NE, 0 },
15183 { MASK_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpnltpd", IX86_BUILTIN_CMPNLTPD, UNGE, 0 },
15184 { MASK_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpnlepd", IX86_BUILTIN_CMPNLEPD, UNGT, 0 },
15185 { MASK_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpngtpd", IX86_BUILTIN_CMPNGTPD, UNGE,
15186 BUILTIN_DESC_SWAP_OPERANDS },
15187 { MASK_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpngepd", IX86_BUILTIN_CMPNGEPD, UNGT,
15188 BUILTIN_DESC_SWAP_OPERANDS },
15189 { MASK_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpordpd", IX86_BUILTIN_CMPORDPD, ORDERED, 0 },
15190 { MASK_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpeqsd", IX86_BUILTIN_CMPEQSD, EQ, 0 },
15191 { MASK_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpltsd", IX86_BUILTIN_CMPLTSD, LT, 0 },
15192 { MASK_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmplesd", IX86_BUILTIN_CMPLESD, LE, 0 },
15193 { MASK_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpunordsd", IX86_BUILTIN_CMPUNORDSD, UNORDERED, 0 },
15194 { MASK_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpneqsd", IX86_BUILTIN_CMPNEQSD, NE, 0 },
15195 { MASK_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpnltsd", IX86_BUILTIN_CMPNLTSD, UNGE, 0 },
15196 { MASK_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpnlesd", IX86_BUILTIN_CMPNLESD, UNGT, 0 },
15197 { MASK_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpordsd", IX86_BUILTIN_CMPORDSD, ORDERED, 0 },
15199 { MASK_SSE2, CODE_FOR_sminv2df3, "__builtin_ia32_minpd", IX86_BUILTIN_MINPD, 0, 0 },
15200 { MASK_SSE2, CODE_FOR_smaxv2df3, "__builtin_ia32_maxpd", IX86_BUILTIN_MAXPD, 0, 0 },
15201 { MASK_SSE2, CODE_FOR_sse2_vmsminv2df3, "__builtin_ia32_minsd", IX86_BUILTIN_MINSD, 0, 0 },
15202 { MASK_SSE2, CODE_FOR_sse2_vmsmaxv2df3, "__builtin_ia32_maxsd", IX86_BUILTIN_MAXSD, 0, 0 },
15204 { MASK_SSE2, CODE_FOR_andv2df3, "__builtin_ia32_andpd", IX86_BUILTIN_ANDPD, 0, 0 },
15205 { MASK_SSE2, CODE_FOR_sse2_nandv2df3, "__builtin_ia32_andnpd", IX86_BUILTIN_ANDNPD, 0, 0 },
15206 { MASK_SSE2, CODE_FOR_iorv2df3, "__builtin_ia32_orpd", IX86_BUILTIN_ORPD, 0, 0 },
15207 { MASK_SSE2, CODE_FOR_xorv2df3, "__builtin_ia32_xorpd", IX86_BUILTIN_XORPD, 0, 0 },
15209 { MASK_SSE2, CODE_FOR_sse2_movsd, "__builtin_ia32_movsd", IX86_BUILTIN_MOVSD, 0, 0 },
15210 { MASK_SSE2, CODE_FOR_sse2_unpckhpd, "__builtin_ia32_unpckhpd", IX86_BUILTIN_UNPCKHPD, 0, 0 },
15211 { MASK_SSE2, CODE_FOR_sse2_unpcklpd, "__builtin_ia32_unpcklpd", IX86_BUILTIN_UNPCKLPD, 0, 0 },
15214 { MASK_SSE2, CODE_FOR_addv16qi3, "__builtin_ia32_paddb128", IX86_BUILTIN_PADDB128, 0, 0 },
15215 { MASK_SSE2, CODE_FOR_addv8hi3, "__builtin_ia32_paddw128", IX86_BUILTIN_PADDW128, 0, 0 },
15216 { MASK_SSE2, CODE_FOR_addv4si3, "__builtin_ia32_paddd128", IX86_BUILTIN_PADDD128, 0, 0 },
15217 { MASK_SSE2, CODE_FOR_addv2di3, "__builtin_ia32_paddq128", IX86_BUILTIN_PADDQ128, 0, 0 },
15218 { MASK_SSE2, CODE_FOR_subv16qi3, "__builtin_ia32_psubb128", IX86_BUILTIN_PSUBB128, 0, 0 },
15219 { MASK_SSE2, CODE_FOR_subv8hi3, "__builtin_ia32_psubw128", IX86_BUILTIN_PSUBW128, 0, 0 },
15220 { MASK_SSE2, CODE_FOR_subv4si3, "__builtin_ia32_psubd128", IX86_BUILTIN_PSUBD128, 0, 0 },
15221 { MASK_SSE2, CODE_FOR_subv2di3, "__builtin_ia32_psubq128", IX86_BUILTIN_PSUBQ128, 0, 0 },
15223 { MASK_MMX, CODE_FOR_sse2_ssaddv16qi3, "__builtin_ia32_paddsb128", IX86_BUILTIN_PADDSB128, 0, 0 },
15224 { MASK_MMX, CODE_FOR_sse2_ssaddv8hi3, "__builtin_ia32_paddsw128", IX86_BUILTIN_PADDSW128, 0, 0 },
15225 { MASK_MMX, CODE_FOR_sse2_sssubv16qi3, "__builtin_ia32_psubsb128", IX86_BUILTIN_PSUBSB128, 0, 0 },
15226 { MASK_MMX, CODE_FOR_sse2_sssubv8hi3, "__builtin_ia32_psubsw128", IX86_BUILTIN_PSUBSW128, 0, 0 },
15227 { MASK_MMX, CODE_FOR_sse2_usaddv16qi3, "__builtin_ia32_paddusb128", IX86_BUILTIN_PADDUSB128, 0, 0 },
15228 { MASK_MMX, CODE_FOR_sse2_usaddv8hi3, "__builtin_ia32_paddusw128", IX86_BUILTIN_PADDUSW128, 0, 0 },
15229 { MASK_MMX, CODE_FOR_sse2_ussubv16qi3, "__builtin_ia32_psubusb128", IX86_BUILTIN_PSUBUSB128, 0, 0 },
15230 { MASK_MMX, CODE_FOR_sse2_ussubv8hi3, "__builtin_ia32_psubusw128", IX86_BUILTIN_PSUBUSW128, 0, 0 },
15232 { MASK_SSE2, CODE_FOR_mulv8hi3, "__builtin_ia32_pmullw128", IX86_BUILTIN_PMULLW128, 0, 0 },
15233 { MASK_SSE2, CODE_FOR_sse2_smulv8hi3_highpart, "__builtin_ia32_pmulhw128", IX86_BUILTIN_PMULHW128, 0, 0 },
15235 { MASK_SSE2, CODE_FOR_andv2di3, "__builtin_ia32_pand128", IX86_BUILTIN_PAND128, 0, 0 },
15236 { MASK_SSE2, CODE_FOR_sse2_nandv2di3, "__builtin_ia32_pandn128", IX86_BUILTIN_PANDN128, 0, 0 },
15237 { MASK_SSE2, CODE_FOR_iorv2di3, "__builtin_ia32_por128", IX86_BUILTIN_POR128, 0, 0 },
15238 { MASK_SSE2, CODE_FOR_xorv2di3, "__builtin_ia32_pxor128", IX86_BUILTIN_PXOR128, 0, 0 },
15240 { MASK_SSE2, CODE_FOR_sse2_uavgv16qi3, "__builtin_ia32_pavgb128", IX86_BUILTIN_PAVGB128, 0, 0 },
15241 { MASK_SSE2, CODE_FOR_sse2_uavgv8hi3, "__builtin_ia32_pavgw128", IX86_BUILTIN_PAVGW128, 0, 0 },
15243 { MASK_SSE2, CODE_FOR_sse2_eqv16qi3, "__builtin_ia32_pcmpeqb128", IX86_BUILTIN_PCMPEQB128, 0, 0 },
15244 { MASK_SSE2, CODE_FOR_sse2_eqv8hi3, "__builtin_ia32_pcmpeqw128", IX86_BUILTIN_PCMPEQW128, 0, 0 },
15245 { MASK_SSE2, CODE_FOR_sse2_eqv4si3, "__builtin_ia32_pcmpeqd128", IX86_BUILTIN_PCMPEQD128, 0, 0 },
15246 { MASK_SSE2, CODE_FOR_sse2_gtv16qi3, "__builtin_ia32_pcmpgtb128", IX86_BUILTIN_PCMPGTB128, 0, 0 },
15247 { MASK_SSE2, CODE_FOR_sse2_gtv8hi3, "__builtin_ia32_pcmpgtw128", IX86_BUILTIN_PCMPGTW128, 0, 0 },
15248 { MASK_SSE2, CODE_FOR_sse2_gtv4si3, "__builtin_ia32_pcmpgtd128", IX86_BUILTIN_PCMPGTD128, 0, 0 },
15250 { MASK_SSE2, CODE_FOR_umaxv16qi3, "__builtin_ia32_pmaxub128", IX86_BUILTIN_PMAXUB128, 0, 0 },
15251 { MASK_SSE2, CODE_FOR_smaxv8hi3, "__builtin_ia32_pmaxsw128", IX86_BUILTIN_PMAXSW128, 0, 0 },
15252 { MASK_SSE2, CODE_FOR_uminv16qi3, "__builtin_ia32_pminub128", IX86_BUILTIN_PMINUB128, 0, 0 },
15253 { MASK_SSE2, CODE_FOR_sminv8hi3, "__builtin_ia32_pminsw128", IX86_BUILTIN_PMINSW128, 0, 0 },
15255 { MASK_SSE2, CODE_FOR_sse2_punpckhbw, "__builtin_ia32_punpckhbw128", IX86_BUILTIN_PUNPCKHBW128, 0, 0 },
15256 { MASK_SSE2, CODE_FOR_sse2_punpckhwd, "__builtin_ia32_punpckhwd128", IX86_BUILTIN_PUNPCKHWD128, 0, 0 },
15257 { MASK_SSE2, CODE_FOR_sse2_punpckhdq, "__builtin_ia32_punpckhdq128", IX86_BUILTIN_PUNPCKHDQ128, 0, 0 },
15258 { MASK_SSE2, CODE_FOR_sse2_punpckhqdq, "__builtin_ia32_punpckhqdq128", IX86_BUILTIN_PUNPCKHQDQ128, 0, 0 },
15259 { MASK_SSE2, CODE_FOR_sse2_punpcklbw, "__builtin_ia32_punpcklbw128", IX86_BUILTIN_PUNPCKLBW128, 0, 0 },
15260 { MASK_SSE2, CODE_FOR_sse2_punpcklwd, "__builtin_ia32_punpcklwd128", IX86_BUILTIN_PUNPCKLWD128, 0, 0 },
15261 { MASK_SSE2, CODE_FOR_sse2_punpckldq, "__builtin_ia32_punpckldq128", IX86_BUILTIN_PUNPCKLDQ128, 0, 0 },
15262 { MASK_SSE2, CODE_FOR_sse2_punpcklqdq, "__builtin_ia32_punpcklqdq128", IX86_BUILTIN_PUNPCKLQDQ128, 0, 0 },
15264 { MASK_SSE2, CODE_FOR_sse2_packsswb, "__builtin_ia32_packsswb128", IX86_BUILTIN_PACKSSWB128, 0, 0 },
15265 { MASK_SSE2, CODE_FOR_sse2_packssdw, "__builtin_ia32_packssdw128", IX86_BUILTIN_PACKSSDW128, 0, 0 },
15266 { MASK_SSE2, CODE_FOR_sse2_packuswb, "__builtin_ia32_packuswb128", IX86_BUILTIN_PACKUSWB128, 0, 0 },
15268 { MASK_SSE2, CODE_FOR_sse2_umulv8hi3_highpart, "__builtin_ia32_pmulhuw128", IX86_BUILTIN_PMULHUW128, 0, 0 },
15269 { MASK_SSE2, CODE_FOR_sse2_psadbw, 0, IX86_BUILTIN_PSADBW128, 0, 0 },
15271 { MASK_SSE2, CODE_FOR_sse2_umulsidi3, 0, IX86_BUILTIN_PMULUDQ, 0, 0 },
15272 { MASK_SSE2, CODE_FOR_sse2_umulv2siv2di3, 0, IX86_BUILTIN_PMULUDQ128, 0, 0 },
15274 { MASK_SSE2, CODE_FOR_ashlv8hi3, 0, IX86_BUILTIN_PSLLWI128, 0, 0 },
15275 { MASK_SSE2, CODE_FOR_ashlv4si3, 0, IX86_BUILTIN_PSLLDI128, 0, 0 },
15276 { MASK_SSE2, CODE_FOR_ashlv2di3, 0, IX86_BUILTIN_PSLLQI128, 0, 0 },
15278 { MASK_SSE2, CODE_FOR_lshrv8hi3, 0, IX86_BUILTIN_PSRLWI128, 0, 0 },
15279 { MASK_SSE2, CODE_FOR_lshrv4si3, 0, IX86_BUILTIN_PSRLDI128, 0, 0 },
15280 { MASK_SSE2, CODE_FOR_lshrv2di3, 0, IX86_BUILTIN_PSRLQI128, 0, 0 },
15282 { MASK_SSE2, CODE_FOR_ashrv8hi3, 0, IX86_BUILTIN_PSRAWI128, 0, 0 },
15283 { MASK_SSE2, CODE_FOR_ashrv4si3, 0, IX86_BUILTIN_PSRADI128, 0, 0 },
15285 { MASK_SSE2, CODE_FOR_sse2_pmaddwd, 0, IX86_BUILTIN_PMADDWD128, 0, 0 },
15287 { MASK_SSE2, CODE_FOR_sse2_cvtsi2sd, 0, IX86_BUILTIN_CVTSI2SD, 0, 0 },
15288 { MASK_SSE2 | MASK_64BIT, CODE_FOR_sse2_cvtsi2sdq, 0, IX86_BUILTIN_CVTSI642SD, 0, 0 },
15289 { MASK_SSE2, CODE_FOR_sse2_cvtsd2ss, 0, IX86_BUILTIN_CVTSD2SS, 0, 0 },
15290 { MASK_SSE2, CODE_FOR_sse2_cvtss2sd, 0, IX86_BUILTIN_CVTSS2SD, 0, 0 },
15293 { MASK_SSE3, CODE_FOR_sse3_addsubv4sf3, "__builtin_ia32_addsubps", IX86_BUILTIN_ADDSUBPS, 0, 0 },
15294 { MASK_SSE3, CODE_FOR_sse3_addsubv2df3, "__builtin_ia32_addsubpd", IX86_BUILTIN_ADDSUBPD, 0, 0 },
15295 { MASK_SSE3, CODE_FOR_sse3_haddv4sf3, "__builtin_ia32_haddps", IX86_BUILTIN_HADDPS, 0, 0 },
15296 { MASK_SSE3, CODE_FOR_sse3_haddv2df3, "__builtin_ia32_haddpd", IX86_BUILTIN_HADDPD, 0, 0 },
15297 { MASK_SSE3, CODE_FOR_sse3_hsubv4sf3, "__builtin_ia32_hsubps", IX86_BUILTIN_HSUBPS, 0, 0 },
15298 { MASK_SSE3, CODE_FOR_sse3_hsubv2df3, "__builtin_ia32_hsubpd", IX86_BUILTIN_HSUBPD, 0, 0 },
15301 { MASK_SSSE3, CODE_FOR_ssse3_phaddwv8hi3, "__builtin_ia32_phaddw128", IX86_BUILTIN_PHADDW128, 0, 0 },
15302 { MASK_SSSE3, CODE_FOR_ssse3_phaddwv4hi3, "__builtin_ia32_phaddw", IX86_BUILTIN_PHADDW, 0, 0 },
15303 { MASK_SSSE3, CODE_FOR_ssse3_phadddv4si3, "__builtin_ia32_phaddd128", IX86_BUILTIN_PHADDD128, 0, 0 },
15304 { MASK_SSSE3, CODE_FOR_ssse3_phadddv2si3, "__builtin_ia32_phaddd", IX86_BUILTIN_PHADDD, 0, 0 },
15305 { MASK_SSSE3, CODE_FOR_ssse3_phaddswv8hi3, "__builtin_ia32_phaddsw128", IX86_BUILTIN_PHADDSW128, 0, 0 },
15306 { MASK_SSSE3, CODE_FOR_ssse3_phaddswv4hi3, "__builtin_ia32_phaddsw", IX86_BUILTIN_PHADDSW, 0, 0 },
15307 { MASK_SSSE3, CODE_FOR_ssse3_phsubwv8hi3, "__builtin_ia32_phsubw128", IX86_BUILTIN_PHSUBW128, 0, 0 },
15308 { MASK_SSSE3, CODE_FOR_ssse3_phsubwv4hi3, "__builtin_ia32_phsubw", IX86_BUILTIN_PHSUBW, 0, 0 },
15309 { MASK_SSSE3, CODE_FOR_ssse3_phsubdv4si3, "__builtin_ia32_phsubd128", IX86_BUILTIN_PHSUBD128, 0, 0 },
15310 { MASK_SSSE3, CODE_FOR_ssse3_phsubdv2si3, "__builtin_ia32_phsubd", IX86_BUILTIN_PHSUBD, 0, 0 },
15311 { MASK_SSSE3, CODE_FOR_ssse3_phsubswv8hi3, "__builtin_ia32_phsubsw128", IX86_BUILTIN_PHSUBSW128, 0, 0 },
15312 { MASK_SSSE3, CODE_FOR_ssse3_phsubswv4hi3, "__builtin_ia32_phsubsw", IX86_BUILTIN_PHSUBSW, 0, 0 },
15313 { MASK_SSSE3, CODE_FOR_ssse3_pmaddubswv8hi3, "__builtin_ia32_pmaddubsw128", IX86_BUILTIN_PMADDUBSW128, 0, 0 },
15314 { MASK_SSSE3, CODE_FOR_ssse3_pmaddubswv4hi3, "__builtin_ia32_pmaddubsw", IX86_BUILTIN_PMADDUBSW, 0, 0 },
15315 { MASK_SSSE3, CODE_FOR_ssse3_pmulhrswv8hi3, "__builtin_ia32_pmulhrsw128", IX86_BUILTIN_PMULHRSW128, 0, 0 },
15316 { MASK_SSSE3, CODE_FOR_ssse3_pmulhrswv4hi3, "__builtin_ia32_pmulhrsw", IX86_BUILTIN_PMULHRSW, 0, 0 },
15317 { MASK_SSSE3, CODE_FOR_ssse3_pshufbv16qi3, "__builtin_ia32_pshufb128", IX86_BUILTIN_PSHUFB128, 0, 0 },
15318 { MASK_SSSE3, CODE_FOR_ssse3_pshufbv8qi3, "__builtin_ia32_pshufb", IX86_BUILTIN_PSHUFB, 0, 0 },
15319 { MASK_SSSE3, CODE_FOR_ssse3_psignv16qi3, "__builtin_ia32_psignb128", IX86_BUILTIN_PSIGNB128, 0, 0 },
15320 { MASK_SSSE3, CODE_FOR_ssse3_psignv8qi3, "__builtin_ia32_psignb", IX86_BUILTIN_PSIGNB, 0, 0 },
15321 { MASK_SSSE3, CODE_FOR_ssse3_psignv8hi3, "__builtin_ia32_psignw128", IX86_BUILTIN_PSIGNW128, 0, 0 },
15322 { MASK_SSSE3, CODE_FOR_ssse3_psignv4hi3, "__builtin_ia32_psignw", IX86_BUILTIN_PSIGNW, 0, 0 },
15323 { MASK_SSSE3, CODE_FOR_ssse3_psignv4si3, "__builtin_ia32_psignd128", IX86_BUILTIN_PSIGND128, 0, 0 },
15324 { MASK_SSSE3, CODE_FOR_ssse3_psignv2si3, "__builtin_ia32_psignd", IX86_BUILTIN_PSIGND, 0, 0 }
15327 static const struct builtin_description bdesc_1arg[] =
15329 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_pmovmskb, 0, IX86_BUILTIN_PMOVMSKB, 0, 0 },
15330 { MASK_SSE, CODE_FOR_sse_movmskps, 0, IX86_BUILTIN_MOVMSKPS, 0, 0 },
15332 { MASK_SSE, CODE_FOR_sqrtv4sf2, 0, IX86_BUILTIN_SQRTPS, 0, 0 },
15333 { MASK_SSE, CODE_FOR_sse_rsqrtv4sf2, 0, IX86_BUILTIN_RSQRTPS, 0, 0 },
15334 { MASK_SSE, CODE_FOR_sse_rcpv4sf2, 0, IX86_BUILTIN_RCPPS, 0, 0 },
15336 { MASK_SSE, CODE_FOR_sse_cvtps2pi, 0, IX86_BUILTIN_CVTPS2PI, 0, 0 },
15337 { MASK_SSE, CODE_FOR_sse_cvtss2si, 0, IX86_BUILTIN_CVTSS2SI, 0, 0 },
15338 { MASK_SSE | MASK_64BIT, CODE_FOR_sse_cvtss2siq, 0, IX86_BUILTIN_CVTSS2SI64, 0, 0 },
15339 { MASK_SSE, CODE_FOR_sse_cvttps2pi, 0, IX86_BUILTIN_CVTTPS2PI, 0, 0 },
15340 { MASK_SSE, CODE_FOR_sse_cvttss2si, 0, IX86_BUILTIN_CVTTSS2SI, 0, 0 },
15341 { MASK_SSE | MASK_64BIT, CODE_FOR_sse_cvttss2siq, 0, IX86_BUILTIN_CVTTSS2SI64, 0, 0 },
15343 { MASK_SSE2, CODE_FOR_sse2_pmovmskb, 0, IX86_BUILTIN_PMOVMSKB128, 0, 0 },
15344 { MASK_SSE2, CODE_FOR_sse2_movmskpd, 0, IX86_BUILTIN_MOVMSKPD, 0, 0 },
15346 { MASK_SSE2, CODE_FOR_sqrtv2df2, 0, IX86_BUILTIN_SQRTPD, 0, 0 },
15348 { MASK_SSE2, CODE_FOR_sse2_cvtdq2pd, 0, IX86_BUILTIN_CVTDQ2PD, 0, 0 },
15349 { MASK_SSE2, CODE_FOR_sse2_cvtdq2ps, 0, IX86_BUILTIN_CVTDQ2PS, 0, 0 },
15351 { MASK_SSE2, CODE_FOR_sse2_cvtpd2dq, 0, IX86_BUILTIN_CVTPD2DQ, 0, 0 },
15352 { MASK_SSE2, CODE_FOR_sse2_cvtpd2pi, 0, IX86_BUILTIN_CVTPD2PI, 0, 0 },
15353 { MASK_SSE2, CODE_FOR_sse2_cvtpd2ps, 0, IX86_BUILTIN_CVTPD2PS, 0, 0 },
15354 { MASK_SSE2, CODE_FOR_sse2_cvttpd2dq, 0, IX86_BUILTIN_CVTTPD2DQ, 0, 0 },
15355 { MASK_SSE2, CODE_FOR_sse2_cvttpd2pi, 0, IX86_BUILTIN_CVTTPD2PI, 0, 0 },
15357 { MASK_SSE2, CODE_FOR_sse2_cvtpi2pd, 0, IX86_BUILTIN_CVTPI2PD, 0, 0 },
15359 { MASK_SSE2, CODE_FOR_sse2_cvtsd2si, 0, IX86_BUILTIN_CVTSD2SI, 0, 0 },
15360 { MASK_SSE2, CODE_FOR_sse2_cvttsd2si, 0, IX86_BUILTIN_CVTTSD2SI, 0, 0 },
15361 { MASK_SSE2 | MASK_64BIT, CODE_FOR_sse2_cvtsd2siq, 0, IX86_BUILTIN_CVTSD2SI64, 0, 0 },
15362 { MASK_SSE2 | MASK_64BIT, CODE_FOR_sse2_cvttsd2siq, 0, IX86_BUILTIN_CVTTSD2SI64, 0, 0 },
15364 { MASK_SSE2, CODE_FOR_sse2_cvtps2dq, 0, IX86_BUILTIN_CVTPS2DQ, 0, 0 },
15365 { MASK_SSE2, CODE_FOR_sse2_cvtps2pd, 0, IX86_BUILTIN_CVTPS2PD, 0, 0 },
15366 { MASK_SSE2, CODE_FOR_sse2_cvttps2dq, 0, IX86_BUILTIN_CVTTPS2DQ, 0, 0 },
15369 { MASK_SSE3, CODE_FOR_sse3_movshdup, 0, IX86_BUILTIN_MOVSHDUP, 0, 0 },
15370 { MASK_SSE3, CODE_FOR_sse3_movsldup, 0, IX86_BUILTIN_MOVSLDUP, 0, 0 },
15373 { MASK_SSSE3, CODE_FOR_absv16qi2, "__builtin_ia32_pabsb128", IX86_BUILTIN_PABSB128, 0, 0 },
15374 { MASK_SSSE3, CODE_FOR_absv8qi2, "__builtin_ia32_pabsb", IX86_BUILTIN_PABSB, 0, 0 },
15375 { MASK_SSSE3, CODE_FOR_absv8hi2, "__builtin_ia32_pabsw128", IX86_BUILTIN_PABSW128, 0, 0 },
15376 { MASK_SSSE3, CODE_FOR_absv4hi2, "__builtin_ia32_pabsw", IX86_BUILTIN_PABSW, 0, 0 },
15377 { MASK_SSSE3, CODE_FOR_absv4si2, "__builtin_ia32_pabsd128", IX86_BUILTIN_PABSD128, 0, 0 },
15378 { MASK_SSSE3, CODE_FOR_absv2si2, "__builtin_ia32_pabsd", IX86_BUILTIN_PABSD, 0, 0 },
15382 ix86_init_builtins (void)
15385 ix86_init_mmx_sse_builtins ();
15388 /* Set up all the MMX/SSE builtins. This is not called if TARGET_MMX
15389 is zero. Otherwise, if TARGET_SSE is not set, only expand the MMX
15392 ix86_init_mmx_sse_builtins (void)
15394 const struct builtin_description * d;
15397 tree V16QI_type_node = build_vector_type_for_mode (char_type_node, V16QImode);
15398 tree V2SI_type_node = build_vector_type_for_mode (intSI_type_node, V2SImode);
15399 tree V2SF_type_node = build_vector_type_for_mode (float_type_node, V2SFmode);
15400 tree V2DI_type_node
15401 = build_vector_type_for_mode (long_long_integer_type_node, V2DImode);
15402 tree V2DF_type_node = build_vector_type_for_mode (double_type_node, V2DFmode);
15403 tree V4SF_type_node = build_vector_type_for_mode (float_type_node, V4SFmode);
15404 tree V4SI_type_node = build_vector_type_for_mode (intSI_type_node, V4SImode);
15405 tree V4HI_type_node = build_vector_type_for_mode (intHI_type_node, V4HImode);
15406 tree V8QI_type_node = build_vector_type_for_mode (char_type_node, V8QImode);
15407 tree V8HI_type_node = build_vector_type_for_mode (intHI_type_node, V8HImode);
15409 tree pchar_type_node = build_pointer_type (char_type_node);
15410 tree pcchar_type_node = build_pointer_type (
15411 build_type_variant (char_type_node, 1, 0));
15412 tree pfloat_type_node = build_pointer_type (float_type_node);
15413 tree pcfloat_type_node = build_pointer_type (
15414 build_type_variant (float_type_node, 1, 0));
15415 tree pv2si_type_node = build_pointer_type (V2SI_type_node);
15416 tree pv2di_type_node = build_pointer_type (V2DI_type_node);
15417 tree pdi_type_node = build_pointer_type (long_long_unsigned_type_node);
15420 tree int_ftype_v4sf_v4sf
15421 = build_function_type_list (integer_type_node,
15422 V4SF_type_node, V4SF_type_node, NULL_TREE);
15423 tree v4si_ftype_v4sf_v4sf
15424 = build_function_type_list (V4SI_type_node,
15425 V4SF_type_node, V4SF_type_node, NULL_TREE);
15426 /* MMX/SSE/integer conversions. */
15427 tree int_ftype_v4sf
15428 = build_function_type_list (integer_type_node,
15429 V4SF_type_node, NULL_TREE);
15430 tree int64_ftype_v4sf
15431 = build_function_type_list (long_long_integer_type_node,
15432 V4SF_type_node, NULL_TREE);
15433 tree int_ftype_v8qi
15434 = build_function_type_list (integer_type_node, V8QI_type_node, NULL_TREE);
15435 tree v4sf_ftype_v4sf_int
15436 = build_function_type_list (V4SF_type_node,
15437 V4SF_type_node, integer_type_node, NULL_TREE);
15438 tree v4sf_ftype_v4sf_int64
15439 = build_function_type_list (V4SF_type_node,
15440 V4SF_type_node, long_long_integer_type_node,
15442 tree v4sf_ftype_v4sf_v2si
15443 = build_function_type_list (V4SF_type_node,
15444 V4SF_type_node, V2SI_type_node, NULL_TREE);
15446 /* Miscellaneous. */
15447 tree v8qi_ftype_v4hi_v4hi
15448 = build_function_type_list (V8QI_type_node,
15449 V4HI_type_node, V4HI_type_node, NULL_TREE);
15450 tree v4hi_ftype_v2si_v2si
15451 = build_function_type_list (V4HI_type_node,
15452 V2SI_type_node, V2SI_type_node, NULL_TREE);
15453 tree v4sf_ftype_v4sf_v4sf_int
15454 = build_function_type_list (V4SF_type_node,
15455 V4SF_type_node, V4SF_type_node,
15456 integer_type_node, NULL_TREE);
15457 tree v2si_ftype_v4hi_v4hi
15458 = build_function_type_list (V2SI_type_node,
15459 V4HI_type_node, V4HI_type_node, NULL_TREE);
15460 tree v4hi_ftype_v4hi_int
15461 = build_function_type_list (V4HI_type_node,
15462 V4HI_type_node, integer_type_node, NULL_TREE);
15463 tree v4hi_ftype_v4hi_di
15464 = build_function_type_list (V4HI_type_node,
15465 V4HI_type_node, long_long_unsigned_type_node,
15467 tree v2si_ftype_v2si_di
15468 = build_function_type_list (V2SI_type_node,
15469 V2SI_type_node, long_long_unsigned_type_node,
15471 tree void_ftype_void
15472 = build_function_type (void_type_node, void_list_node);
15473 tree void_ftype_unsigned
15474 = build_function_type_list (void_type_node, unsigned_type_node, NULL_TREE);
15475 tree void_ftype_unsigned_unsigned
15476 = build_function_type_list (void_type_node, unsigned_type_node,
15477 unsigned_type_node, NULL_TREE);
15478 tree void_ftype_pcvoid_unsigned_unsigned
15479 = build_function_type_list (void_type_node, const_ptr_type_node,
15480 unsigned_type_node, unsigned_type_node,
15482 tree unsigned_ftype_void
15483 = build_function_type (unsigned_type_node, void_list_node);
15484 tree v2si_ftype_v4sf
15485 = build_function_type_list (V2SI_type_node, V4SF_type_node, NULL_TREE);
15486 /* Loads/stores. */
15487 tree void_ftype_v8qi_v8qi_pchar
15488 = build_function_type_list (void_type_node,
15489 V8QI_type_node, V8QI_type_node,
15490 pchar_type_node, NULL_TREE);
15491 tree v4sf_ftype_pcfloat
15492 = build_function_type_list (V4SF_type_node, pcfloat_type_node, NULL_TREE);
15493 /* @@@ the type is bogus */
15494 tree v4sf_ftype_v4sf_pv2si
15495 = build_function_type_list (V4SF_type_node,
15496 V4SF_type_node, pv2si_type_node, NULL_TREE);
15497 tree void_ftype_pv2si_v4sf
15498 = build_function_type_list (void_type_node,
15499 pv2si_type_node, V4SF_type_node, NULL_TREE);
15500 tree void_ftype_pfloat_v4sf
15501 = build_function_type_list (void_type_node,
15502 pfloat_type_node, V4SF_type_node, NULL_TREE);
15503 tree void_ftype_pdi_di
15504 = build_function_type_list (void_type_node,
15505 pdi_type_node, long_long_unsigned_type_node,
15507 tree void_ftype_pv2di_v2di
15508 = build_function_type_list (void_type_node,
15509 pv2di_type_node, V2DI_type_node, NULL_TREE);
15510 /* Normal vector unops. */
15511 tree v4sf_ftype_v4sf
15512 = build_function_type_list (V4SF_type_node, V4SF_type_node, NULL_TREE);
15513 tree v16qi_ftype_v16qi
15514 = build_function_type_list (V16QI_type_node, V16QI_type_node, NULL_TREE);
15515 tree v8hi_ftype_v8hi
15516 = build_function_type_list (V8HI_type_node, V8HI_type_node, NULL_TREE);
15517 tree v4si_ftype_v4si
15518 = build_function_type_list (V4SI_type_node, V4SI_type_node, NULL_TREE);
15519 tree v8qi_ftype_v8qi
15520 = build_function_type_list (V8QI_type_node, V8QI_type_node, NULL_TREE);
15521 tree v4hi_ftype_v4hi
15522 = build_function_type_list (V4HI_type_node, V4HI_type_node, NULL_TREE);
15524 /* Normal vector binops. */
15525 tree v4sf_ftype_v4sf_v4sf
15526 = build_function_type_list (V4SF_type_node,
15527 V4SF_type_node, V4SF_type_node, NULL_TREE);
15528 tree v8qi_ftype_v8qi_v8qi
15529 = build_function_type_list (V8QI_type_node,
15530 V8QI_type_node, V8QI_type_node, NULL_TREE);
15531 tree v4hi_ftype_v4hi_v4hi
15532 = build_function_type_list (V4HI_type_node,
15533 V4HI_type_node, V4HI_type_node, NULL_TREE);
15534 tree v2si_ftype_v2si_v2si
15535 = build_function_type_list (V2SI_type_node,
15536 V2SI_type_node, V2SI_type_node, NULL_TREE);
15537 tree di_ftype_di_di
15538 = build_function_type_list (long_long_unsigned_type_node,
15539 long_long_unsigned_type_node,
15540 long_long_unsigned_type_node, NULL_TREE);
15542 tree di_ftype_di_di_int
15543 = build_function_type_list (long_long_unsigned_type_node,
15544 long_long_unsigned_type_node,
15545 long_long_unsigned_type_node,
15546 integer_type_node, NULL_TREE);
15548 tree v2si_ftype_v2sf
15549 = build_function_type_list (V2SI_type_node, V2SF_type_node, NULL_TREE);
15550 tree v2sf_ftype_v2si
15551 = build_function_type_list (V2SF_type_node, V2SI_type_node, NULL_TREE);
15552 tree v2si_ftype_v2si
15553 = build_function_type_list (V2SI_type_node, V2SI_type_node, NULL_TREE);
15554 tree v2sf_ftype_v2sf
15555 = build_function_type_list (V2SF_type_node, V2SF_type_node, NULL_TREE);
15556 tree v2sf_ftype_v2sf_v2sf
15557 = build_function_type_list (V2SF_type_node,
15558 V2SF_type_node, V2SF_type_node, NULL_TREE);
15559 tree v2si_ftype_v2sf_v2sf
15560 = build_function_type_list (V2SI_type_node,
15561 V2SF_type_node, V2SF_type_node, NULL_TREE);
15562 tree pint_type_node = build_pointer_type (integer_type_node);
15563 tree pdouble_type_node = build_pointer_type (double_type_node);
15564 tree pcdouble_type_node = build_pointer_type (
15565 build_type_variant (double_type_node, 1, 0));
15566 tree int_ftype_v2df_v2df
15567 = build_function_type_list (integer_type_node,
15568 V2DF_type_node, V2DF_type_node, NULL_TREE);
15570 tree void_ftype_pcvoid
15571 = build_function_type_list (void_type_node, const_ptr_type_node, NULL_TREE);
15572 tree v4sf_ftype_v4si
15573 = build_function_type_list (V4SF_type_node, V4SI_type_node, NULL_TREE);
15574 tree v4si_ftype_v4sf
15575 = build_function_type_list (V4SI_type_node, V4SF_type_node, NULL_TREE);
15576 tree v2df_ftype_v4si
15577 = build_function_type_list (V2DF_type_node, V4SI_type_node, NULL_TREE);
15578 tree v4si_ftype_v2df
15579 = build_function_type_list (V4SI_type_node, V2DF_type_node, NULL_TREE);
15580 tree v2si_ftype_v2df
15581 = build_function_type_list (V2SI_type_node, V2DF_type_node, NULL_TREE);
15582 tree v4sf_ftype_v2df
15583 = build_function_type_list (V4SF_type_node, V2DF_type_node, NULL_TREE);
15584 tree v2df_ftype_v2si
15585 = build_function_type_list (V2DF_type_node, V2SI_type_node, NULL_TREE);
15586 tree v2df_ftype_v4sf
15587 = build_function_type_list (V2DF_type_node, V4SF_type_node, NULL_TREE);
15588 tree int_ftype_v2df
15589 = build_function_type_list (integer_type_node, V2DF_type_node, NULL_TREE);
15590 tree int64_ftype_v2df
15591 = build_function_type_list (long_long_integer_type_node,
15592 V2DF_type_node, NULL_TREE);
15593 tree v2df_ftype_v2df_int
15594 = build_function_type_list (V2DF_type_node,
15595 V2DF_type_node, integer_type_node, NULL_TREE);
15596 tree v2df_ftype_v2df_int64
15597 = build_function_type_list (V2DF_type_node,
15598 V2DF_type_node, long_long_integer_type_node,
15600 tree v4sf_ftype_v4sf_v2df
15601 = build_function_type_list (V4SF_type_node,
15602 V4SF_type_node, V2DF_type_node, NULL_TREE);
15603 tree v2df_ftype_v2df_v4sf
15604 = build_function_type_list (V2DF_type_node,
15605 V2DF_type_node, V4SF_type_node, NULL_TREE);
15606 tree v2df_ftype_v2df_v2df_int
15607 = build_function_type_list (V2DF_type_node,
15608 V2DF_type_node, V2DF_type_node,
15611 tree v2df_ftype_v2df_pcdouble
15612 = build_function_type_list (V2DF_type_node,
15613 V2DF_type_node, pcdouble_type_node, NULL_TREE);
15614 tree void_ftype_pdouble_v2df
15615 = build_function_type_list (void_type_node,
15616 pdouble_type_node, V2DF_type_node, NULL_TREE);
15617 tree void_ftype_pint_int
15618 = build_function_type_list (void_type_node,
15619 pint_type_node, integer_type_node, NULL_TREE);
15620 tree void_ftype_v16qi_v16qi_pchar
15621 = build_function_type_list (void_type_node,
15622 V16QI_type_node, V16QI_type_node,
15623 pchar_type_node, NULL_TREE);
15624 tree v2df_ftype_pcdouble
15625 = build_function_type_list (V2DF_type_node, pcdouble_type_node, NULL_TREE);
15626 tree v2df_ftype_v2df_v2df
15627 = build_function_type_list (V2DF_type_node,
15628 V2DF_type_node, V2DF_type_node, NULL_TREE);
15629 tree v16qi_ftype_v16qi_v16qi
15630 = build_function_type_list (V16QI_type_node,
15631 V16QI_type_node, V16QI_type_node, NULL_TREE);
15632 tree v8hi_ftype_v8hi_v8hi
15633 = build_function_type_list (V8HI_type_node,
15634 V8HI_type_node, V8HI_type_node, NULL_TREE);
15635 tree v4si_ftype_v4si_v4si
15636 = build_function_type_list (V4SI_type_node,
15637 V4SI_type_node, V4SI_type_node, NULL_TREE);
15638 tree v2di_ftype_v2di_v2di
15639 = build_function_type_list (V2DI_type_node,
15640 V2DI_type_node, V2DI_type_node, NULL_TREE);
15641 tree v2di_ftype_v2df_v2df
15642 = build_function_type_list (V2DI_type_node,
15643 V2DF_type_node, V2DF_type_node, NULL_TREE);
15644 tree v2df_ftype_v2df
15645 = build_function_type_list (V2DF_type_node, V2DF_type_node, NULL_TREE);
15646 tree v2di_ftype_v2di_int
15647 = build_function_type_list (V2DI_type_node,
15648 V2DI_type_node, integer_type_node, NULL_TREE);
15649 tree v2di_ftype_v2di_v2di_int
15650 = build_function_type_list (V2DI_type_node, V2DI_type_node,
15651 V2DI_type_node, integer_type_node, NULL_TREE);
15652 tree v4si_ftype_v4si_int
15653 = build_function_type_list (V4SI_type_node,
15654 V4SI_type_node, integer_type_node, NULL_TREE);
15655 tree v8hi_ftype_v8hi_int
15656 = build_function_type_list (V8HI_type_node,
15657 V8HI_type_node, integer_type_node, NULL_TREE);
15658 tree v4si_ftype_v8hi_v8hi
15659 = build_function_type_list (V4SI_type_node,
15660 V8HI_type_node, V8HI_type_node, NULL_TREE);
15661 tree di_ftype_v8qi_v8qi
15662 = build_function_type_list (long_long_unsigned_type_node,
15663 V8QI_type_node, V8QI_type_node, NULL_TREE);
15664 tree di_ftype_v2si_v2si
15665 = build_function_type_list (long_long_unsigned_type_node,
15666 V2SI_type_node, V2SI_type_node, NULL_TREE);
15667 tree v2di_ftype_v16qi_v16qi
15668 = build_function_type_list (V2DI_type_node,
15669 V16QI_type_node, V16QI_type_node, NULL_TREE);
15670 tree v2di_ftype_v4si_v4si
15671 = build_function_type_list (V2DI_type_node,
15672 V4SI_type_node, V4SI_type_node, NULL_TREE);
15673 tree int_ftype_v16qi
15674 = build_function_type_list (integer_type_node, V16QI_type_node, NULL_TREE);
15675 tree v16qi_ftype_pcchar
15676 = build_function_type_list (V16QI_type_node, pcchar_type_node, NULL_TREE);
15677 tree void_ftype_pchar_v16qi
15678 = build_function_type_list (void_type_node,
15679 pchar_type_node, V16QI_type_node, NULL_TREE);
15681 tree v2di_ftype_v2di_unsigned_unsigned
15682 = build_function_type_list (V2DI_type_node, V2DI_type_node,
15683 unsigned_type_node, unsigned_type_node,
15685 tree v2di_ftype_v2di_v2di_unsigned_unsigned
15686 = build_function_type_list (V2DI_type_node, V2DI_type_node, V2DI_type_node,
15687 unsigned_type_node, unsigned_type_node,
15689 tree v2di_ftype_v2di_v16qi
15690 = build_function_type_list (V2DI_type_node, V2DI_type_node, V16QI_type_node,
15694 tree float128_type;
15697 /* The __float80 type. */
15698 if (TYPE_MODE (long_double_type_node) == XFmode)
15699 (*lang_hooks.types.register_builtin_type) (long_double_type_node,
15703 /* The __float80 type. */
15704 float80_type = make_node (REAL_TYPE);
15705 TYPE_PRECISION (float80_type) = 80;
15706 layout_type (float80_type);
15707 (*lang_hooks.types.register_builtin_type) (float80_type, "__float80");
15712 float128_type = make_node (REAL_TYPE);
15713 TYPE_PRECISION (float128_type) = 128;
15714 layout_type (float128_type);
15715 (*lang_hooks.types.register_builtin_type) (float128_type, "__float128");
15718 /* Add all builtins that are more or less simple operations on two
15720 for (i = 0, d = bdesc_2arg; i < ARRAY_SIZE (bdesc_2arg); i++, d++)
15722 /* Use one of the operands; the target can have a different mode for
15723 mask-generating compares. */
15724 enum machine_mode mode;
15729 mode = insn_data[d->icode].operand[1].mode;
15734 type = v16qi_ftype_v16qi_v16qi;
15737 type = v8hi_ftype_v8hi_v8hi;
15740 type = v4si_ftype_v4si_v4si;
15743 type = v2di_ftype_v2di_v2di;
15746 type = v2df_ftype_v2df_v2df;
15749 type = v4sf_ftype_v4sf_v4sf;
15752 type = v8qi_ftype_v8qi_v8qi;
15755 type = v4hi_ftype_v4hi_v4hi;
15758 type = v2si_ftype_v2si_v2si;
15761 type = di_ftype_di_di;
15765 gcc_unreachable ();
15768 /* Override for comparisons. */
15769 if (d->icode == CODE_FOR_sse_maskcmpv4sf3
15770 || d->icode == CODE_FOR_sse_vmmaskcmpv4sf3)
15771 type = v4si_ftype_v4sf_v4sf;
15773 if (d->icode == CODE_FOR_sse2_maskcmpv2df3
15774 || d->icode == CODE_FOR_sse2_vmmaskcmpv2df3)
15775 type = v2di_ftype_v2df_v2df;
15777 def_builtin (d->mask, d->name, type, d->code);
15780 /* Add all builtins that are more or less simple operations on 1 operand. */
15781 for (i = 0, d = bdesc_1arg; i < ARRAY_SIZE (bdesc_1arg); i++, d++)
15783 enum machine_mode mode;
15788 mode = insn_data[d->icode].operand[1].mode;
15793 type = v16qi_ftype_v16qi;
15796 type = v8hi_ftype_v8hi;
15799 type = v4si_ftype_v4si;
15802 type = v2df_ftype_v2df;
15805 type = v4sf_ftype_v4sf;
15808 type = v8qi_ftype_v8qi;
15811 type = v4hi_ftype_v4hi;
15814 type = v2si_ftype_v2si;
15821 def_builtin (d->mask, d->name, type, d->code);
15824 /* Add the remaining MMX insns with somewhat more complicated types. */
15825 def_builtin (MASK_MMX, "__builtin_ia32_emms", void_ftype_void, IX86_BUILTIN_EMMS);
15826 def_builtin (MASK_MMX, "__builtin_ia32_psllw", v4hi_ftype_v4hi_di, IX86_BUILTIN_PSLLW);
15827 def_builtin (MASK_MMX, "__builtin_ia32_pslld", v2si_ftype_v2si_di, IX86_BUILTIN_PSLLD);
15828 def_builtin (MASK_MMX, "__builtin_ia32_psllq", di_ftype_di_di, IX86_BUILTIN_PSLLQ);
15830 def_builtin (MASK_MMX, "__builtin_ia32_psrlw", v4hi_ftype_v4hi_di, IX86_BUILTIN_PSRLW);
15831 def_builtin (MASK_MMX, "__builtin_ia32_psrld", v2si_ftype_v2si_di, IX86_BUILTIN_PSRLD);
15832 def_builtin (MASK_MMX, "__builtin_ia32_psrlq", di_ftype_di_di, IX86_BUILTIN_PSRLQ);
15834 def_builtin (MASK_MMX, "__builtin_ia32_psraw", v4hi_ftype_v4hi_di, IX86_BUILTIN_PSRAW);
15835 def_builtin (MASK_MMX, "__builtin_ia32_psrad", v2si_ftype_v2si_di, IX86_BUILTIN_PSRAD);
15837 def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_pshufw", v4hi_ftype_v4hi_int, IX86_BUILTIN_PSHUFW);
15838 def_builtin (MASK_MMX, "__builtin_ia32_pmaddwd", v2si_ftype_v4hi_v4hi, IX86_BUILTIN_PMADDWD);
15840 /* comi/ucomi insns. */
15841 for (i = 0, d = bdesc_comi; i < ARRAY_SIZE (bdesc_comi); i++, d++)
15842 if (d->mask == MASK_SSE2)
15843 def_builtin (d->mask, d->name, int_ftype_v2df_v2df, d->code);
15845 def_builtin (d->mask, d->name, int_ftype_v4sf_v4sf, d->code);
15847 def_builtin (MASK_MMX, "__builtin_ia32_packsswb", v8qi_ftype_v4hi_v4hi, IX86_BUILTIN_PACKSSWB);
15848 def_builtin (MASK_MMX, "__builtin_ia32_packssdw", v4hi_ftype_v2si_v2si, IX86_BUILTIN_PACKSSDW);
15849 def_builtin (MASK_MMX, "__builtin_ia32_packuswb", v8qi_ftype_v4hi_v4hi, IX86_BUILTIN_PACKUSWB);
15851 def_builtin (MASK_SSE, "__builtin_ia32_ldmxcsr", void_ftype_unsigned, IX86_BUILTIN_LDMXCSR);
15852 def_builtin (MASK_SSE, "__builtin_ia32_stmxcsr", unsigned_ftype_void, IX86_BUILTIN_STMXCSR);
15853 def_builtin (MASK_SSE, "__builtin_ia32_cvtpi2ps", v4sf_ftype_v4sf_v2si, IX86_BUILTIN_CVTPI2PS);
15854 def_builtin (MASK_SSE, "__builtin_ia32_cvtps2pi", v2si_ftype_v4sf, IX86_BUILTIN_CVTPS2PI);
15855 def_builtin (MASK_SSE, "__builtin_ia32_cvtsi2ss", v4sf_ftype_v4sf_int, IX86_BUILTIN_CVTSI2SS);
15856 def_builtin (MASK_SSE | MASK_64BIT, "__builtin_ia32_cvtsi642ss", v4sf_ftype_v4sf_int64, IX86_BUILTIN_CVTSI642SS);
15857 def_builtin (MASK_SSE, "__builtin_ia32_cvtss2si", int_ftype_v4sf, IX86_BUILTIN_CVTSS2SI);
15858 def_builtin (MASK_SSE | MASK_64BIT, "__builtin_ia32_cvtss2si64", int64_ftype_v4sf, IX86_BUILTIN_CVTSS2SI64);
15859 def_builtin (MASK_SSE, "__builtin_ia32_cvttps2pi", v2si_ftype_v4sf, IX86_BUILTIN_CVTTPS2PI);
15860 def_builtin (MASK_SSE, "__builtin_ia32_cvttss2si", int_ftype_v4sf, IX86_BUILTIN_CVTTSS2SI);
15861 def_builtin (MASK_SSE | MASK_64BIT, "__builtin_ia32_cvttss2si64", int64_ftype_v4sf, IX86_BUILTIN_CVTTSS2SI64);
15863 def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_maskmovq", void_ftype_v8qi_v8qi_pchar, IX86_BUILTIN_MASKMOVQ);
15865 def_builtin (MASK_SSE, "__builtin_ia32_loadups", v4sf_ftype_pcfloat, IX86_BUILTIN_LOADUPS);
15866 def_builtin (MASK_SSE, "__builtin_ia32_storeups", void_ftype_pfloat_v4sf, IX86_BUILTIN_STOREUPS);
15868 def_builtin (MASK_SSE, "__builtin_ia32_loadhps", v4sf_ftype_v4sf_pv2si, IX86_BUILTIN_LOADHPS);
15869 def_builtin (MASK_SSE, "__builtin_ia32_loadlps", v4sf_ftype_v4sf_pv2si, IX86_BUILTIN_LOADLPS);
15870 def_builtin (MASK_SSE, "__builtin_ia32_storehps", void_ftype_pv2si_v4sf, IX86_BUILTIN_STOREHPS);
15871 def_builtin (MASK_SSE, "__builtin_ia32_storelps", void_ftype_pv2si_v4sf, IX86_BUILTIN_STORELPS);
15873 def_builtin (MASK_SSE, "__builtin_ia32_movmskps", int_ftype_v4sf, IX86_BUILTIN_MOVMSKPS);
15874 def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_pmovmskb", int_ftype_v8qi, IX86_BUILTIN_PMOVMSKB);
15875 def_builtin (MASK_SSE, "__builtin_ia32_movntps", void_ftype_pfloat_v4sf, IX86_BUILTIN_MOVNTPS);
15876 def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_movntq", void_ftype_pdi_di, IX86_BUILTIN_MOVNTQ);
15878 def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_sfence", void_ftype_void, IX86_BUILTIN_SFENCE);
15880 def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_psadbw", di_ftype_v8qi_v8qi, IX86_BUILTIN_PSADBW);
15882 def_builtin (MASK_SSE, "__builtin_ia32_rcpps", v4sf_ftype_v4sf, IX86_BUILTIN_RCPPS);
15883 def_builtin (MASK_SSE, "__builtin_ia32_rcpss", v4sf_ftype_v4sf, IX86_BUILTIN_RCPSS);
15884 def_builtin (MASK_SSE, "__builtin_ia32_rsqrtps", v4sf_ftype_v4sf, IX86_BUILTIN_RSQRTPS);
15885 def_builtin (MASK_SSE, "__builtin_ia32_rsqrtss", v4sf_ftype_v4sf, IX86_BUILTIN_RSQRTSS);
15886 def_builtin (MASK_SSE, "__builtin_ia32_sqrtps", v4sf_ftype_v4sf, IX86_BUILTIN_SQRTPS);
15887 def_builtin (MASK_SSE, "__builtin_ia32_sqrtss", v4sf_ftype_v4sf, IX86_BUILTIN_SQRTSS);
15889 def_builtin (MASK_SSE, "__builtin_ia32_shufps", v4sf_ftype_v4sf_v4sf_int, IX86_BUILTIN_SHUFPS);
15891 /* Original 3DNow! */
15892 def_builtin (MASK_3DNOW, "__builtin_ia32_femms", void_ftype_void, IX86_BUILTIN_FEMMS);
15893 def_builtin (MASK_3DNOW, "__builtin_ia32_pavgusb", v8qi_ftype_v8qi_v8qi, IX86_BUILTIN_PAVGUSB);
15894 def_builtin (MASK_3DNOW, "__builtin_ia32_pf2id", v2si_ftype_v2sf, IX86_BUILTIN_PF2ID);
15895 def_builtin (MASK_3DNOW, "__builtin_ia32_pfacc", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFACC);
15896 def_builtin (MASK_3DNOW, "__builtin_ia32_pfadd", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFADD);
15897 def_builtin (MASK_3DNOW, "__builtin_ia32_pfcmpeq", v2si_ftype_v2sf_v2sf, IX86_BUILTIN_PFCMPEQ);
15898 def_builtin (MASK_3DNOW, "__builtin_ia32_pfcmpge", v2si_ftype_v2sf_v2sf, IX86_BUILTIN_PFCMPGE);
15899 def_builtin (MASK_3DNOW, "__builtin_ia32_pfcmpgt", v2si_ftype_v2sf_v2sf, IX86_BUILTIN_PFCMPGT);
15900 def_builtin (MASK_3DNOW, "__builtin_ia32_pfmax", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFMAX);
15901 def_builtin (MASK_3DNOW, "__builtin_ia32_pfmin", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFMIN);
15902 def_builtin (MASK_3DNOW, "__builtin_ia32_pfmul", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFMUL);
15903 def_builtin (MASK_3DNOW, "__builtin_ia32_pfrcp", v2sf_ftype_v2sf, IX86_BUILTIN_PFRCP);
15904 def_builtin (MASK_3DNOW, "__builtin_ia32_pfrcpit1", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFRCPIT1);
15905 def_builtin (MASK_3DNOW, "__builtin_ia32_pfrcpit2", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFRCPIT2);
15906 def_builtin (MASK_3DNOW, "__builtin_ia32_pfrsqrt", v2sf_ftype_v2sf, IX86_BUILTIN_PFRSQRT);
15907 def_builtin (MASK_3DNOW, "__builtin_ia32_pfrsqit1", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFRSQIT1);
15908 def_builtin (MASK_3DNOW, "__builtin_ia32_pfsub", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFSUB);
15909 def_builtin (MASK_3DNOW, "__builtin_ia32_pfsubr", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFSUBR);
15910 def_builtin (MASK_3DNOW, "__builtin_ia32_pi2fd", v2sf_ftype_v2si, IX86_BUILTIN_PI2FD);
15911 def_builtin (MASK_3DNOW, "__builtin_ia32_pmulhrw", v4hi_ftype_v4hi_v4hi, IX86_BUILTIN_PMULHRW);
15913 /* 3DNow! extension as used in the Athlon CPU. */
15914 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pf2iw", v2si_ftype_v2sf, IX86_BUILTIN_PF2IW);
15915 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pfnacc", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFNACC);
15916 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pfpnacc", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFPNACC);
15917 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pi2fw", v2sf_ftype_v2si, IX86_BUILTIN_PI2FW);
15918 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pswapdsf", v2sf_ftype_v2sf, IX86_BUILTIN_PSWAPDSF);
15919 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pswapdsi", v2si_ftype_v2si, IX86_BUILTIN_PSWAPDSI);
15922 def_builtin (MASK_SSE2, "__builtin_ia32_maskmovdqu", void_ftype_v16qi_v16qi_pchar, IX86_BUILTIN_MASKMOVDQU);
15924 def_builtin (MASK_SSE2, "__builtin_ia32_loadupd", v2df_ftype_pcdouble, IX86_BUILTIN_LOADUPD);
15925 def_builtin (MASK_SSE2, "__builtin_ia32_storeupd", void_ftype_pdouble_v2df, IX86_BUILTIN_STOREUPD);
15927 def_builtin (MASK_SSE2, "__builtin_ia32_loadhpd", v2df_ftype_v2df_pcdouble, IX86_BUILTIN_LOADHPD);
15928 def_builtin (MASK_SSE2, "__builtin_ia32_loadlpd", v2df_ftype_v2df_pcdouble, IX86_BUILTIN_LOADLPD);
15930 def_builtin (MASK_SSE2, "__builtin_ia32_movmskpd", int_ftype_v2df, IX86_BUILTIN_MOVMSKPD);
15931 def_builtin (MASK_SSE2, "__builtin_ia32_pmovmskb128", int_ftype_v16qi, IX86_BUILTIN_PMOVMSKB128);
15932 def_builtin (MASK_SSE2, "__builtin_ia32_movnti", void_ftype_pint_int, IX86_BUILTIN_MOVNTI);
15933 def_builtin (MASK_SSE2, "__builtin_ia32_movntpd", void_ftype_pdouble_v2df, IX86_BUILTIN_MOVNTPD);
15934 def_builtin (MASK_SSE2, "__builtin_ia32_movntdq", void_ftype_pv2di_v2di, IX86_BUILTIN_MOVNTDQ);
15936 def_builtin (MASK_SSE2, "__builtin_ia32_pshufd", v4si_ftype_v4si_int, IX86_BUILTIN_PSHUFD);
15937 def_builtin (MASK_SSE2, "__builtin_ia32_pshuflw", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSHUFLW);
15938 def_builtin (MASK_SSE2, "__builtin_ia32_pshufhw", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSHUFHW);
15939 def_builtin (MASK_SSE2, "__builtin_ia32_psadbw128", v2di_ftype_v16qi_v16qi, IX86_BUILTIN_PSADBW128);
15941 def_builtin (MASK_SSE2, "__builtin_ia32_sqrtpd", v2df_ftype_v2df, IX86_BUILTIN_SQRTPD);
15942 def_builtin (MASK_SSE2, "__builtin_ia32_sqrtsd", v2df_ftype_v2df, IX86_BUILTIN_SQRTSD);
15944 def_builtin (MASK_SSE2, "__builtin_ia32_shufpd", v2df_ftype_v2df_v2df_int, IX86_BUILTIN_SHUFPD);
15946 def_builtin (MASK_SSE2, "__builtin_ia32_cvtdq2pd", v2df_ftype_v4si, IX86_BUILTIN_CVTDQ2PD);
15947 def_builtin (MASK_SSE2, "__builtin_ia32_cvtdq2ps", v4sf_ftype_v4si, IX86_BUILTIN_CVTDQ2PS);
15949 def_builtin (MASK_SSE2, "__builtin_ia32_cvtpd2dq", v4si_ftype_v2df, IX86_BUILTIN_CVTPD2DQ);
15950 def_builtin (MASK_SSE2, "__builtin_ia32_cvtpd2pi", v2si_ftype_v2df, IX86_BUILTIN_CVTPD2PI);
15951 def_builtin (MASK_SSE2, "__builtin_ia32_cvtpd2ps", v4sf_ftype_v2df, IX86_BUILTIN_CVTPD2PS);
15952 def_builtin (MASK_SSE2, "__builtin_ia32_cvttpd2dq", v4si_ftype_v2df, IX86_BUILTIN_CVTTPD2DQ);
15953 def_builtin (MASK_SSE2, "__builtin_ia32_cvttpd2pi", v2si_ftype_v2df, IX86_BUILTIN_CVTTPD2PI);
15955 def_builtin (MASK_SSE2, "__builtin_ia32_cvtpi2pd", v2df_ftype_v2si, IX86_BUILTIN_CVTPI2PD);
15957 def_builtin (MASK_SSE2, "__builtin_ia32_cvtsd2si", int_ftype_v2df, IX86_BUILTIN_CVTSD2SI);
15958 def_builtin (MASK_SSE2, "__builtin_ia32_cvttsd2si", int_ftype_v2df, IX86_BUILTIN_CVTTSD2SI);
15959 def_builtin (MASK_SSE2 | MASK_64BIT, "__builtin_ia32_cvtsd2si64", int64_ftype_v2df, IX86_BUILTIN_CVTSD2SI64);
15960 def_builtin (MASK_SSE2 | MASK_64BIT, "__builtin_ia32_cvttsd2si64", int64_ftype_v2df, IX86_BUILTIN_CVTTSD2SI64);
15962 def_builtin (MASK_SSE2, "__builtin_ia32_cvtps2dq", v4si_ftype_v4sf, IX86_BUILTIN_CVTPS2DQ);
15963 def_builtin (MASK_SSE2, "__builtin_ia32_cvtps2pd", v2df_ftype_v4sf, IX86_BUILTIN_CVTPS2PD);
15964 def_builtin (MASK_SSE2, "__builtin_ia32_cvttps2dq", v4si_ftype_v4sf, IX86_BUILTIN_CVTTPS2DQ);
15966 def_builtin (MASK_SSE2, "__builtin_ia32_cvtsi2sd", v2df_ftype_v2df_int, IX86_BUILTIN_CVTSI2SD);
15967 def_builtin (MASK_SSE2 | MASK_64BIT, "__builtin_ia32_cvtsi642sd", v2df_ftype_v2df_int64, IX86_BUILTIN_CVTSI642SD);
15968 def_builtin (MASK_SSE2, "__builtin_ia32_cvtsd2ss", v4sf_ftype_v4sf_v2df, IX86_BUILTIN_CVTSD2SS);
15969 def_builtin (MASK_SSE2, "__builtin_ia32_cvtss2sd", v2df_ftype_v2df_v4sf, IX86_BUILTIN_CVTSS2SD);
15971 def_builtin (MASK_SSE2, "__builtin_ia32_clflush", void_ftype_pcvoid, IX86_BUILTIN_CLFLUSH);
15972 def_builtin (MASK_SSE2, "__builtin_ia32_lfence", void_ftype_void, IX86_BUILTIN_LFENCE);
15973 def_builtin (MASK_SSE2, "__builtin_ia32_mfence", void_ftype_void, IX86_BUILTIN_MFENCE);
15975 def_builtin (MASK_SSE2, "__builtin_ia32_loaddqu", v16qi_ftype_pcchar, IX86_BUILTIN_LOADDQU);
15976 def_builtin (MASK_SSE2, "__builtin_ia32_storedqu", void_ftype_pchar_v16qi, IX86_BUILTIN_STOREDQU);
15978 def_builtin (MASK_SSE2, "__builtin_ia32_pmuludq", di_ftype_v2si_v2si, IX86_BUILTIN_PMULUDQ);
15979 def_builtin (MASK_SSE2, "__builtin_ia32_pmuludq128", v2di_ftype_v4si_v4si, IX86_BUILTIN_PMULUDQ128);
15981 def_builtin (MASK_SSE2, "__builtin_ia32_psllw128", v8hi_ftype_v8hi_v8hi, IX86_BUILTIN_PSLLW128);
15982 def_builtin (MASK_SSE2, "__builtin_ia32_pslld128", v4si_ftype_v4si_v4si, IX86_BUILTIN_PSLLD128);
15983 def_builtin (MASK_SSE2, "__builtin_ia32_psllq128", v2di_ftype_v2di_v2di, IX86_BUILTIN_PSLLQ128);
15985 def_builtin (MASK_SSE2, "__builtin_ia32_psrlw128", v8hi_ftype_v8hi_v8hi, IX86_BUILTIN_PSRLW128);
15986 def_builtin (MASK_SSE2, "__builtin_ia32_psrld128", v4si_ftype_v4si_v4si, IX86_BUILTIN_PSRLD128);
15987 def_builtin (MASK_SSE2, "__builtin_ia32_psrlq128", v2di_ftype_v2di_v2di, IX86_BUILTIN_PSRLQ128);
15989 def_builtin (MASK_SSE2, "__builtin_ia32_psraw128", v8hi_ftype_v8hi_v8hi, IX86_BUILTIN_PSRAW128);
15990 def_builtin (MASK_SSE2, "__builtin_ia32_psrad128", v4si_ftype_v4si_v4si, IX86_BUILTIN_PSRAD128);
15992 def_builtin (MASK_SSE2, "__builtin_ia32_pslldqi128", v2di_ftype_v2di_int, IX86_BUILTIN_PSLLDQI128);
15993 def_builtin (MASK_SSE2, "__builtin_ia32_psllwi128", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSLLWI128);
15994 def_builtin (MASK_SSE2, "__builtin_ia32_pslldi128", v4si_ftype_v4si_int, IX86_BUILTIN_PSLLDI128);
15995 def_builtin (MASK_SSE2, "__builtin_ia32_psllqi128", v2di_ftype_v2di_int, IX86_BUILTIN_PSLLQI128);
15997 def_builtin (MASK_SSE2, "__builtin_ia32_psrldqi128", v2di_ftype_v2di_int, IX86_BUILTIN_PSRLDQI128);
15998 def_builtin (MASK_SSE2, "__builtin_ia32_psrlwi128", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSRLWI128);
15999 def_builtin (MASK_SSE2, "__builtin_ia32_psrldi128", v4si_ftype_v4si_int, IX86_BUILTIN_PSRLDI128);
16000 def_builtin (MASK_SSE2, "__builtin_ia32_psrlqi128", v2di_ftype_v2di_int, IX86_BUILTIN_PSRLQI128);
16002 def_builtin (MASK_SSE2, "__builtin_ia32_psrawi128", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSRAWI128);
16003 def_builtin (MASK_SSE2, "__builtin_ia32_psradi128", v4si_ftype_v4si_int, IX86_BUILTIN_PSRADI128);
16005 def_builtin (MASK_SSE2, "__builtin_ia32_pmaddwd128", v4si_ftype_v8hi_v8hi, IX86_BUILTIN_PMADDWD128);
16007 /* Prescott New Instructions. */
16008 def_builtin (MASK_SSE3, "__builtin_ia32_monitor",
16009 void_ftype_pcvoid_unsigned_unsigned,
16010 IX86_BUILTIN_MONITOR);
16011 def_builtin (MASK_SSE3, "__builtin_ia32_mwait",
16012 void_ftype_unsigned_unsigned,
16013 IX86_BUILTIN_MWAIT);
16014 def_builtin (MASK_SSE3, "__builtin_ia32_movshdup",
16016 IX86_BUILTIN_MOVSHDUP);
16017 def_builtin (MASK_SSE3, "__builtin_ia32_movsldup",
16019 IX86_BUILTIN_MOVSLDUP);
16020 def_builtin (MASK_SSE3, "__builtin_ia32_lddqu",
16021 v16qi_ftype_pcchar, IX86_BUILTIN_LDDQU);
16024 def_builtin (MASK_SSSE3, "__builtin_ia32_palignr128",
16025 v2di_ftype_v2di_v2di_int, IX86_BUILTIN_PALIGNR128);
16026 def_builtin (MASK_SSSE3, "__builtin_ia32_palignr", di_ftype_di_di_int,
16027 IX86_BUILTIN_PALIGNR);
16029 /* AMDFAM10 SSE4A New built-ins */
16030 def_builtin (MASK_SSE4A, "__builtin_ia32_movntsd",
16031 void_ftype_pdouble_v2df, IX86_BUILTIN_MOVNTSD);
16032 def_builtin (MASK_SSE4A, "__builtin_ia32_movntss",
16033 void_ftype_pfloat_v4sf, IX86_BUILTIN_MOVNTSS);
16034 def_builtin (MASK_SSE4A, "__builtin_ia32_extrqi",
16035 v2di_ftype_v2di_unsigned_unsigned, IX86_BUILTIN_EXTRQI);
16036 def_builtin (MASK_SSE4A, "__builtin_ia32_extrq",
16037 v2di_ftype_v2di_v16qi, IX86_BUILTIN_EXTRQ);
16038 def_builtin (MASK_SSE4A, "__builtin_ia32_insertqi",
16039 v2di_ftype_v2di_v2di_unsigned_unsigned, IX86_BUILTIN_INSERTQI);
16040 def_builtin (MASK_SSE4A, "__builtin_ia32_insertq",
16041 v2di_ftype_v2di_v2di, IX86_BUILTIN_INSERTQ);
16043 /* Access to the vec_init patterns. */
16044 ftype = build_function_type_list (V2SI_type_node, integer_type_node,
16045 integer_type_node, NULL_TREE);
16046 def_builtin (MASK_MMX, "__builtin_ia32_vec_init_v2si",
16047 ftype, IX86_BUILTIN_VEC_INIT_V2SI);
16049 ftype = build_function_type_list (V4HI_type_node, short_integer_type_node,
16050 short_integer_type_node,
16051 short_integer_type_node,
16052 short_integer_type_node, NULL_TREE);
16053 def_builtin (MASK_MMX, "__builtin_ia32_vec_init_v4hi",
16054 ftype, IX86_BUILTIN_VEC_INIT_V4HI);
16056 ftype = build_function_type_list (V8QI_type_node, char_type_node,
16057 char_type_node, char_type_node,
16058 char_type_node, char_type_node,
16059 char_type_node, char_type_node,
16060 char_type_node, NULL_TREE);
16061 def_builtin (MASK_MMX, "__builtin_ia32_vec_init_v8qi",
16062 ftype, IX86_BUILTIN_VEC_INIT_V8QI);
16064 /* Access to the vec_extract patterns. */
16065 ftype = build_function_type_list (double_type_node, V2DF_type_node,
16066 integer_type_node, NULL_TREE);
16067 def_builtin (MASK_SSE2, "__builtin_ia32_vec_ext_v2df",
16068 ftype, IX86_BUILTIN_VEC_EXT_V2DF);
16070 ftype = build_function_type_list (long_long_integer_type_node,
16071 V2DI_type_node, integer_type_node,
16073 def_builtin (MASK_SSE2, "__builtin_ia32_vec_ext_v2di",
16074 ftype, IX86_BUILTIN_VEC_EXT_V2DI);
16076 ftype = build_function_type_list (float_type_node, V4SF_type_node,
16077 integer_type_node, NULL_TREE);
16078 def_builtin (MASK_SSE, "__builtin_ia32_vec_ext_v4sf",
16079 ftype, IX86_BUILTIN_VEC_EXT_V4SF);
16081 ftype = build_function_type_list (intSI_type_node, V4SI_type_node,
16082 integer_type_node, NULL_TREE);
16083 def_builtin (MASK_SSE2, "__builtin_ia32_vec_ext_v4si",
16084 ftype, IX86_BUILTIN_VEC_EXT_V4SI);
16086 ftype = build_function_type_list (intHI_type_node, V8HI_type_node,
16087 integer_type_node, NULL_TREE);
16088 def_builtin (MASK_SSE2, "__builtin_ia32_vec_ext_v8hi",
16089 ftype, IX86_BUILTIN_VEC_EXT_V8HI);
16091 ftype = build_function_type_list (intHI_type_node, V4HI_type_node,
16092 integer_type_node, NULL_TREE);
16093 def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_vec_ext_v4hi",
16094 ftype, IX86_BUILTIN_VEC_EXT_V4HI);
16096 ftype = build_function_type_list (intSI_type_node, V2SI_type_node,
16097 integer_type_node, NULL_TREE);
16098 def_builtin (MASK_MMX, "__builtin_ia32_vec_ext_v2si",
16099 ftype, IX86_BUILTIN_VEC_EXT_V2SI);
16101 ftype = build_function_type_list (intQI_type_node, V16QI_type_node,
16102 integer_type_node, NULL_TREE);
16103 def_builtin (MASK_SSE2, "__builtin_ia32_vec_ext_v16qi", ftype, IX86_BUILTIN_VEC_EXT_V16QI);
16105 /* Access to the vec_set patterns. */
16106 ftype = build_function_type_list (V8HI_type_node, V8HI_type_node,
16108 integer_type_node, NULL_TREE);
16109 def_builtin (MASK_SSE2, "__builtin_ia32_vec_set_v8hi",
16110 ftype, IX86_BUILTIN_VEC_SET_V8HI);
16112 ftype = build_function_type_list (V4HI_type_node, V4HI_type_node,
16114 integer_type_node, NULL_TREE);
16115 def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_vec_set_v4hi",
16116 ftype, IX86_BUILTIN_VEC_SET_V4HI);
16119 /* Errors in the source file can cause expand_expr to return const0_rtx
16120 where we expect a vector. To avoid crashing, use one of the vector
16121 clear instructions. */
16123 safe_vector_operand (rtx x, enum machine_mode mode)
16125 if (x == const0_rtx)
16126 x = CONST0_RTX (mode);
16130 /* Subroutine of ix86_expand_builtin to take care of binop insns. */
16133 ix86_expand_binop_builtin (enum insn_code icode, tree arglist, rtx target)
16136 tree arg0 = TREE_VALUE (arglist);
16137 tree arg1 = TREE_VALUE (TREE_CHAIN (arglist));
16138 rtx op0 = expand_normal (arg0);
16139 rtx op1 = expand_normal (arg1);
16140 enum machine_mode tmode = insn_data[icode].operand[0].mode;
16141 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
16142 enum machine_mode mode1 = insn_data[icode].operand[2].mode;
16144 if (VECTOR_MODE_P (mode0))
16145 op0 = safe_vector_operand (op0, mode0);
16146 if (VECTOR_MODE_P (mode1))
16147 op1 = safe_vector_operand (op1, mode1);
16149 if (optimize || !target
16150 || GET_MODE (target) != tmode
16151 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
16152 target = gen_reg_rtx (tmode);
16154 if (GET_MODE (op1) == SImode && mode1 == TImode)
16156 rtx x = gen_reg_rtx (V4SImode);
16157 emit_insn (gen_sse2_loadd (x, op1));
16158 op1 = gen_lowpart (TImode, x);
16161 /* The insn must want input operands in the same modes as the
16163 gcc_assert ((GET_MODE (op0) == mode0 || GET_MODE (op0) == VOIDmode)
16164 && (GET_MODE (op1) == mode1 || GET_MODE (op1) == VOIDmode));
16166 if (!(*insn_data[icode].operand[1].predicate) (op0, mode0))
16167 op0 = copy_to_mode_reg (mode0, op0);
16168 if (!(*insn_data[icode].operand[2].predicate) (op1, mode1))
16169 op1 = copy_to_mode_reg (mode1, op1);
16171 /* ??? Using ix86_fixup_binary_operands is problematic when
16172 we've got mismatched modes. Fake it. */
16178 if (tmode == mode0 && tmode == mode1)
16180 target = ix86_fixup_binary_operands (UNKNOWN, tmode, xops);
16184 else if (optimize || !ix86_binary_operator_ok (UNKNOWN, tmode, xops))
16186 op0 = force_reg (mode0, op0);
16187 op1 = force_reg (mode1, op1);
16188 target = gen_reg_rtx (tmode);
16191 pat = GEN_FCN (icode) (target, op0, op1);
16198 /* Subroutine of ix86_expand_builtin to take care of stores. */
16201 ix86_expand_store_builtin (enum insn_code icode, tree arglist)
16204 tree arg0 = TREE_VALUE (arglist);
16205 tree arg1 = TREE_VALUE (TREE_CHAIN (arglist));
16206 rtx op0 = expand_normal (arg0);
16207 rtx op1 = expand_normal (arg1);
16208 enum machine_mode mode0 = insn_data[icode].operand[0].mode;
16209 enum machine_mode mode1 = insn_data[icode].operand[1].mode;
16211 if (VECTOR_MODE_P (mode1))
16212 op1 = safe_vector_operand (op1, mode1);
16214 op0 = gen_rtx_MEM (mode0, copy_to_mode_reg (Pmode, op0));
16215 op1 = copy_to_mode_reg (mode1, op1);
16217 pat = GEN_FCN (icode) (op0, op1);
16223 /* Subroutine of ix86_expand_builtin to take care of unop insns. */
16226 ix86_expand_unop_builtin (enum insn_code icode, tree arglist,
16227 rtx target, int do_load)
16230 tree arg0 = TREE_VALUE (arglist);
16231 rtx op0 = expand_normal (arg0);
16232 enum machine_mode tmode = insn_data[icode].operand[0].mode;
16233 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
16235 if (optimize || !target
16236 || GET_MODE (target) != tmode
16237 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
16238 target = gen_reg_rtx (tmode);
16240 op0 = gen_rtx_MEM (mode0, copy_to_mode_reg (Pmode, op0));
16243 if (VECTOR_MODE_P (mode0))
16244 op0 = safe_vector_operand (op0, mode0);
16246 if ((optimize && !register_operand (op0, mode0))
16247 || ! (*insn_data[icode].operand[1].predicate) (op0, mode0))
16248 op0 = copy_to_mode_reg (mode0, op0);
16251 pat = GEN_FCN (icode) (target, op0);
16258 /* Subroutine of ix86_expand_builtin to take care of three special unop insns:
16259 sqrtss, rsqrtss, rcpss. */
16262 ix86_expand_unop1_builtin (enum insn_code icode, tree arglist, rtx target)
16265 tree arg0 = TREE_VALUE (arglist);
16266 rtx op1, op0 = expand_normal (arg0);
16267 enum machine_mode tmode = insn_data[icode].operand[0].mode;
16268 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
16270 if (optimize || !target
16271 || GET_MODE (target) != tmode
16272 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
16273 target = gen_reg_rtx (tmode);
16275 if (VECTOR_MODE_P (mode0))
16276 op0 = safe_vector_operand (op0, mode0);
16278 if ((optimize && !register_operand (op0, mode0))
16279 || ! (*insn_data[icode].operand[1].predicate) (op0, mode0))
16280 op0 = copy_to_mode_reg (mode0, op0);
16283 if (! (*insn_data[icode].operand[2].predicate) (op1, mode0))
16284 op1 = copy_to_mode_reg (mode0, op1);
16286 pat = GEN_FCN (icode) (target, op0, op1);
16293 /* Subroutine of ix86_expand_builtin to take care of comparison insns. */
16296 ix86_expand_sse_compare (const struct builtin_description *d, tree arglist,
16300 tree arg0 = TREE_VALUE (arglist);
16301 tree arg1 = TREE_VALUE (TREE_CHAIN (arglist));
16302 rtx op0 = expand_normal (arg0);
16303 rtx op1 = expand_normal (arg1);
16305 enum machine_mode tmode = insn_data[d->icode].operand[0].mode;
16306 enum machine_mode mode0 = insn_data[d->icode].operand[1].mode;
16307 enum machine_mode mode1 = insn_data[d->icode].operand[2].mode;
16308 enum rtx_code comparison = d->comparison;
16310 if (VECTOR_MODE_P (mode0))
16311 op0 = safe_vector_operand (op0, mode0);
16312 if (VECTOR_MODE_P (mode1))
16313 op1 = safe_vector_operand (op1, mode1);
16315 /* Swap operands if we have a comparison that isn't available in
16317 if (d->flag & BUILTIN_DESC_SWAP_OPERANDS)
16319 rtx tmp = gen_reg_rtx (mode1);
16320 emit_move_insn (tmp, op1);
16325 if (optimize || !target
16326 || GET_MODE (target) != tmode
16327 || ! (*insn_data[d->icode].operand[0].predicate) (target, tmode))
16328 target = gen_reg_rtx (tmode);
16330 if ((optimize && !register_operand (op0, mode0))
16331 || ! (*insn_data[d->icode].operand[1].predicate) (op0, mode0))
16332 op0 = copy_to_mode_reg (mode0, op0);
16333 if ((optimize && !register_operand (op1, mode1))
16334 || ! (*insn_data[d->icode].operand[2].predicate) (op1, mode1))
16335 op1 = copy_to_mode_reg (mode1, op1);
16337 op2 = gen_rtx_fmt_ee (comparison, mode0, op0, op1);
16338 pat = GEN_FCN (d->icode) (target, op0, op1, op2);
16345 /* Subroutine of ix86_expand_builtin to take care of comi insns. */
16348 ix86_expand_sse_comi (const struct builtin_description *d, tree arglist,
16352 tree arg0 = TREE_VALUE (arglist);
16353 tree arg1 = TREE_VALUE (TREE_CHAIN (arglist));
16354 rtx op0 = expand_normal (arg0);
16355 rtx op1 = expand_normal (arg1);
16357 enum machine_mode mode0 = insn_data[d->icode].operand[0].mode;
16358 enum machine_mode mode1 = insn_data[d->icode].operand[1].mode;
16359 enum rtx_code comparison = d->comparison;
16361 if (VECTOR_MODE_P (mode0))
16362 op0 = safe_vector_operand (op0, mode0);
16363 if (VECTOR_MODE_P (mode1))
16364 op1 = safe_vector_operand (op1, mode1);
16366 /* Swap operands if we have a comparison that isn't available in
16368 if (d->flag & BUILTIN_DESC_SWAP_OPERANDS)
16375 target = gen_reg_rtx (SImode);
16376 emit_move_insn (target, const0_rtx);
16377 target = gen_rtx_SUBREG (QImode, target, 0);
16379 if ((optimize && !register_operand (op0, mode0))
16380 || !(*insn_data[d->icode].operand[0].predicate) (op0, mode0))
16381 op0 = copy_to_mode_reg (mode0, op0);
16382 if ((optimize && !register_operand (op1, mode1))
16383 || !(*insn_data[d->icode].operand[1].predicate) (op1, mode1))
16384 op1 = copy_to_mode_reg (mode1, op1);
16386 op2 = gen_rtx_fmt_ee (comparison, mode0, op0, op1);
16387 pat = GEN_FCN (d->icode) (op0, op1);
16391 emit_insn (gen_rtx_SET (VOIDmode,
16392 gen_rtx_STRICT_LOW_PART (VOIDmode, target),
16393 gen_rtx_fmt_ee (comparison, QImode,
16397 return SUBREG_REG (target);
16400 /* Return the integer constant in ARG. Constrain it to be in the range
16401 of the subparts of VEC_TYPE; issue an error if not. */
16404 get_element_number (tree vec_type, tree arg)
16406 unsigned HOST_WIDE_INT elt, max = TYPE_VECTOR_SUBPARTS (vec_type) - 1;
16408 if (!host_integerp (arg, 1)
16409 || (elt = tree_low_cst (arg, 1), elt > max))
16411 error ("selector must be an integer constant in the range 0..%wi", max);
16418 /* A subroutine of ix86_expand_builtin. These builtins are a wrapper around
16419 ix86_expand_vector_init. We DO have language-level syntax for this, in
16420 the form of (type){ init-list }. Except that since we can't place emms
16421 instructions from inside the compiler, we can't allow the use of MMX
16422 registers unless the user explicitly asks for it. So we do *not* define
16423 vec_set/vec_extract/vec_init patterns for MMX modes in mmx.md. Instead
16424 we have builtins invoked by mmintrin.h that gives us license to emit
16425 these sorts of instructions. */
16428 ix86_expand_vec_init_builtin (tree type, tree arglist, rtx target)
16430 enum machine_mode tmode = TYPE_MODE (type);
16431 enum machine_mode inner_mode = GET_MODE_INNER (tmode);
16432 int i, n_elt = GET_MODE_NUNITS (tmode);
16433 rtvec v = rtvec_alloc (n_elt);
16435 gcc_assert (VECTOR_MODE_P (tmode));
16437 for (i = 0; i < n_elt; ++i, arglist = TREE_CHAIN (arglist))
16439 rtx x = expand_normal (TREE_VALUE (arglist));
16440 RTVEC_ELT (v, i) = gen_lowpart (inner_mode, x);
16443 gcc_assert (arglist == NULL);
16445 if (!target || !register_operand (target, tmode))
16446 target = gen_reg_rtx (tmode);
16448 ix86_expand_vector_init (true, target, gen_rtx_PARALLEL (tmode, v));
16452 /* A subroutine of ix86_expand_builtin. These builtins are a wrapper around
16453 ix86_expand_vector_extract. They would be redundant (for non-MMX) if we
16454 had a language-level syntax for referencing vector elements. */
16457 ix86_expand_vec_ext_builtin (tree arglist, rtx target)
16459 enum machine_mode tmode, mode0;
16464 arg0 = TREE_VALUE (arglist);
16465 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
16467 op0 = expand_normal (arg0);
16468 elt = get_element_number (TREE_TYPE (arg0), arg1);
16470 tmode = TYPE_MODE (TREE_TYPE (TREE_TYPE (arg0)));
16471 mode0 = TYPE_MODE (TREE_TYPE (arg0));
16472 gcc_assert (VECTOR_MODE_P (mode0));
16474 op0 = force_reg (mode0, op0);
16476 if (optimize || !target || !register_operand (target, tmode))
16477 target = gen_reg_rtx (tmode);
16479 ix86_expand_vector_extract (true, target, op0, elt);
16484 /* A subroutine of ix86_expand_builtin. These builtins are a wrapper around
16485 ix86_expand_vector_set. They would be redundant (for non-MMX) if we had
16486 a language-level syntax for referencing vector elements. */
16489 ix86_expand_vec_set_builtin (tree arglist)
16491 enum machine_mode tmode, mode1;
16492 tree arg0, arg1, arg2;
16494 rtx op0, op1, target;
16496 arg0 = TREE_VALUE (arglist);
16497 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
16498 arg2 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist)));
16500 tmode = TYPE_MODE (TREE_TYPE (arg0));
16501 mode1 = TYPE_MODE (TREE_TYPE (TREE_TYPE (arg0)));
16502 gcc_assert (VECTOR_MODE_P (tmode));
16504 op0 = expand_expr (arg0, NULL_RTX, tmode, 0);
16505 op1 = expand_expr (arg1, NULL_RTX, mode1, 0);
16506 elt = get_element_number (TREE_TYPE (arg0), arg2);
16508 if (GET_MODE (op1) != mode1 && GET_MODE (op1) != VOIDmode)
16509 op1 = convert_modes (mode1, GET_MODE (op1), op1, true);
16511 op0 = force_reg (tmode, op0);
16512 op1 = force_reg (mode1, op1);
16514 /* OP0 is the source of these builtin functions and shouldn't be
16515 modified. Create a copy, use it and return it as target. */
16516 target = gen_reg_rtx (tmode);
16517 emit_move_insn (target, op0);
16518 ix86_expand_vector_set (true, target, op1, elt);
16523 /* Expand an expression EXP that calls a built-in function,
16524 with result going to TARGET if that's convenient
16525 (and in mode MODE if that's convenient).
16526 SUBTARGET may be used as the target for computing one of EXP's operands.
16527 IGNORE is nonzero if the value is to be ignored. */
16530 ix86_expand_builtin (tree exp, rtx target, rtx subtarget ATTRIBUTE_UNUSED,
16531 enum machine_mode mode ATTRIBUTE_UNUSED,
16532 int ignore ATTRIBUTE_UNUSED)
16534 const struct builtin_description *d;
16536 enum insn_code icode;
16537 tree fndecl = TREE_OPERAND (TREE_OPERAND (exp, 0), 0);
16538 tree arglist = TREE_OPERAND (exp, 1);
16539 tree arg0, arg1, arg2, arg3;
16540 rtx op0, op1, op2, op3, pat;
16541 enum machine_mode tmode, mode0, mode1, mode2, mode3, mode4;
16542 unsigned int fcode = DECL_FUNCTION_CODE (fndecl);
16546 case IX86_BUILTIN_EMMS:
16547 emit_insn (gen_mmx_emms ());
16550 case IX86_BUILTIN_SFENCE:
16551 emit_insn (gen_sse_sfence ());
16554 case IX86_BUILTIN_MASKMOVQ:
16555 case IX86_BUILTIN_MASKMOVDQU:
16556 icode = (fcode == IX86_BUILTIN_MASKMOVQ
16557 ? CODE_FOR_mmx_maskmovq
16558 : CODE_FOR_sse2_maskmovdqu);
16559 /* Note the arg order is different from the operand order. */
16560 arg1 = TREE_VALUE (arglist);
16561 arg2 = TREE_VALUE (TREE_CHAIN (arglist));
16562 arg0 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist)));
16563 op0 = expand_normal (arg0);
16564 op1 = expand_normal (arg1);
16565 op2 = expand_normal (arg2);
16566 mode0 = insn_data[icode].operand[0].mode;
16567 mode1 = insn_data[icode].operand[1].mode;
16568 mode2 = insn_data[icode].operand[2].mode;
16570 op0 = force_reg (Pmode, op0);
16571 op0 = gen_rtx_MEM (mode1, op0);
16573 if (! (*insn_data[icode].operand[0].predicate) (op0, mode0))
16574 op0 = copy_to_mode_reg (mode0, op0);
16575 if (! (*insn_data[icode].operand[1].predicate) (op1, mode1))
16576 op1 = copy_to_mode_reg (mode1, op1);
16577 if (! (*insn_data[icode].operand[2].predicate) (op2, mode2))
16578 op2 = copy_to_mode_reg (mode2, op2);
16579 pat = GEN_FCN (icode) (op0, op1, op2);
16585 case IX86_BUILTIN_SQRTSS:
16586 return ix86_expand_unop1_builtin (CODE_FOR_sse_vmsqrtv4sf2, arglist, target);
16587 case IX86_BUILTIN_RSQRTSS:
16588 return ix86_expand_unop1_builtin (CODE_FOR_sse_vmrsqrtv4sf2, arglist, target);
16589 case IX86_BUILTIN_RCPSS:
16590 return ix86_expand_unop1_builtin (CODE_FOR_sse_vmrcpv4sf2, arglist, target);
16592 case IX86_BUILTIN_LOADUPS:
16593 return ix86_expand_unop_builtin (CODE_FOR_sse_movups, arglist, target, 1);
16595 case IX86_BUILTIN_STOREUPS:
16596 return ix86_expand_store_builtin (CODE_FOR_sse_movups, arglist);
16598 case IX86_BUILTIN_LOADHPS:
16599 case IX86_BUILTIN_LOADLPS:
16600 case IX86_BUILTIN_LOADHPD:
16601 case IX86_BUILTIN_LOADLPD:
16602 icode = (fcode == IX86_BUILTIN_LOADHPS ? CODE_FOR_sse_loadhps
16603 : fcode == IX86_BUILTIN_LOADLPS ? CODE_FOR_sse_loadlps
16604 : fcode == IX86_BUILTIN_LOADHPD ? CODE_FOR_sse2_loadhpd
16605 : CODE_FOR_sse2_loadlpd);
16606 arg0 = TREE_VALUE (arglist);
16607 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
16608 op0 = expand_normal (arg0);
16609 op1 = expand_normal (arg1);
16610 tmode = insn_data[icode].operand[0].mode;
16611 mode0 = insn_data[icode].operand[1].mode;
16612 mode1 = insn_data[icode].operand[2].mode;
16614 op0 = force_reg (mode0, op0);
16615 op1 = gen_rtx_MEM (mode1, copy_to_mode_reg (Pmode, op1));
16616 if (optimize || target == 0
16617 || GET_MODE (target) != tmode
16618 || !register_operand (target, tmode))
16619 target = gen_reg_rtx (tmode);
16620 pat = GEN_FCN (icode) (target, op0, op1);
16626 case IX86_BUILTIN_STOREHPS:
16627 case IX86_BUILTIN_STORELPS:
16628 icode = (fcode == IX86_BUILTIN_STOREHPS ? CODE_FOR_sse_storehps
16629 : CODE_FOR_sse_storelps);
16630 arg0 = TREE_VALUE (arglist);
16631 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
16632 op0 = expand_normal (arg0);
16633 op1 = expand_normal (arg1);
16634 mode0 = insn_data[icode].operand[0].mode;
16635 mode1 = insn_data[icode].operand[1].mode;
16637 op0 = gen_rtx_MEM (mode0, copy_to_mode_reg (Pmode, op0));
16638 op1 = force_reg (mode1, op1);
16640 pat = GEN_FCN (icode) (op0, op1);
16646 case IX86_BUILTIN_MOVNTPS:
16647 return ix86_expand_store_builtin (CODE_FOR_sse_movntv4sf, arglist);
16648 case IX86_BUILTIN_MOVNTQ:
16649 return ix86_expand_store_builtin (CODE_FOR_sse_movntdi, arglist);
16651 case IX86_BUILTIN_LDMXCSR:
16652 op0 = expand_normal (TREE_VALUE (arglist));
16653 target = assign_386_stack_local (SImode, SLOT_VIRTUAL);
16654 emit_move_insn (target, op0);
16655 emit_insn (gen_sse_ldmxcsr (target));
16658 case IX86_BUILTIN_STMXCSR:
16659 target = assign_386_stack_local (SImode, SLOT_VIRTUAL);
16660 emit_insn (gen_sse_stmxcsr (target));
16661 return copy_to_mode_reg (SImode, target);
16663 case IX86_BUILTIN_SHUFPS:
16664 case IX86_BUILTIN_SHUFPD:
16665 icode = (fcode == IX86_BUILTIN_SHUFPS
16666 ? CODE_FOR_sse_shufps
16667 : CODE_FOR_sse2_shufpd);
16668 arg0 = TREE_VALUE (arglist);
16669 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
16670 arg2 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist)));
16671 op0 = expand_normal (arg0);
16672 op1 = expand_normal (arg1);
16673 op2 = expand_normal (arg2);
16674 tmode = insn_data[icode].operand[0].mode;
16675 mode0 = insn_data[icode].operand[1].mode;
16676 mode1 = insn_data[icode].operand[2].mode;
16677 mode2 = insn_data[icode].operand[3].mode;
16679 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
16680 op0 = copy_to_mode_reg (mode0, op0);
16681 if ((optimize && !register_operand (op1, mode1))
16682 || !(*insn_data[icode].operand[2].predicate) (op1, mode1))
16683 op1 = copy_to_mode_reg (mode1, op1);
16684 if (! (*insn_data[icode].operand[3].predicate) (op2, mode2))
16686 /* @@@ better error message */
16687 error ("mask must be an immediate");
16688 return gen_reg_rtx (tmode);
16690 if (optimize || target == 0
16691 || GET_MODE (target) != tmode
16692 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
16693 target = gen_reg_rtx (tmode);
16694 pat = GEN_FCN (icode) (target, op0, op1, op2);
16700 case IX86_BUILTIN_PSHUFW:
16701 case IX86_BUILTIN_PSHUFD:
16702 case IX86_BUILTIN_PSHUFHW:
16703 case IX86_BUILTIN_PSHUFLW:
16704 icode = ( fcode == IX86_BUILTIN_PSHUFHW ? CODE_FOR_sse2_pshufhw
16705 : fcode == IX86_BUILTIN_PSHUFLW ? CODE_FOR_sse2_pshuflw
16706 : fcode == IX86_BUILTIN_PSHUFD ? CODE_FOR_sse2_pshufd
16707 : CODE_FOR_mmx_pshufw);
16708 arg0 = TREE_VALUE (arglist);
16709 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
16710 op0 = expand_normal (arg0);
16711 op1 = expand_normal (arg1);
16712 tmode = insn_data[icode].operand[0].mode;
16713 mode1 = insn_data[icode].operand[1].mode;
16714 mode2 = insn_data[icode].operand[2].mode;
16716 if (! (*insn_data[icode].operand[1].predicate) (op0, mode1))
16717 op0 = copy_to_mode_reg (mode1, op0);
16718 if (! (*insn_data[icode].operand[2].predicate) (op1, mode2))
16720 /* @@@ better error message */
16721 error ("mask must be an immediate");
16725 || GET_MODE (target) != tmode
16726 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
16727 target = gen_reg_rtx (tmode);
16728 pat = GEN_FCN (icode) (target, op0, op1);
16734 case IX86_BUILTIN_PSLLWI128:
16735 icode = CODE_FOR_ashlv8hi3;
16737 case IX86_BUILTIN_PSLLDI128:
16738 icode = CODE_FOR_ashlv4si3;
16740 case IX86_BUILTIN_PSLLQI128:
16741 icode = CODE_FOR_ashlv2di3;
16743 case IX86_BUILTIN_PSRAWI128:
16744 icode = CODE_FOR_ashrv8hi3;
16746 case IX86_BUILTIN_PSRADI128:
16747 icode = CODE_FOR_ashrv4si3;
16749 case IX86_BUILTIN_PSRLWI128:
16750 icode = CODE_FOR_lshrv8hi3;
16752 case IX86_BUILTIN_PSRLDI128:
16753 icode = CODE_FOR_lshrv4si3;
16755 case IX86_BUILTIN_PSRLQI128:
16756 icode = CODE_FOR_lshrv2di3;
16759 arg0 = TREE_VALUE (arglist);
16760 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
16761 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
16762 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
16764 if (GET_CODE (op1) != CONST_INT)
16766 error ("shift must be an immediate");
16769 if (INTVAL (op1) < 0 || INTVAL (op1) > 255)
16770 op1 = GEN_INT (255);
16772 tmode = insn_data[icode].operand[0].mode;
16773 mode1 = insn_data[icode].operand[1].mode;
16774 if (! (*insn_data[icode].operand[1].predicate) (op0, mode1))
16775 op0 = copy_to_reg (op0);
16777 target = gen_reg_rtx (tmode);
16778 pat = GEN_FCN (icode) (target, op0, op1);
16784 case IX86_BUILTIN_PSLLW128:
16785 icode = CODE_FOR_ashlv8hi3;
16787 case IX86_BUILTIN_PSLLD128:
16788 icode = CODE_FOR_ashlv4si3;
16790 case IX86_BUILTIN_PSLLQ128:
16791 icode = CODE_FOR_ashlv2di3;
16793 case IX86_BUILTIN_PSRAW128:
16794 icode = CODE_FOR_ashrv8hi3;
16796 case IX86_BUILTIN_PSRAD128:
16797 icode = CODE_FOR_ashrv4si3;
16799 case IX86_BUILTIN_PSRLW128:
16800 icode = CODE_FOR_lshrv8hi3;
16802 case IX86_BUILTIN_PSRLD128:
16803 icode = CODE_FOR_lshrv4si3;
16805 case IX86_BUILTIN_PSRLQ128:
16806 icode = CODE_FOR_lshrv2di3;
16809 arg0 = TREE_VALUE (arglist);
16810 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
16811 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
16812 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
16814 tmode = insn_data[icode].operand[0].mode;
16815 mode1 = insn_data[icode].operand[1].mode;
16817 if (! (*insn_data[icode].operand[1].predicate) (op0, mode1))
16818 op0 = copy_to_reg (op0);
16820 op1 = simplify_gen_subreg (TImode, op1, GET_MODE (op1), 0);
16821 if (! (*insn_data[icode].operand[2].predicate) (op1, TImode))
16822 op1 = copy_to_reg (op1);
16824 target = gen_reg_rtx (tmode);
16825 pat = GEN_FCN (icode) (target, op0, op1);
16831 case IX86_BUILTIN_PSLLDQI128:
16832 case IX86_BUILTIN_PSRLDQI128:
16833 icode = (fcode == IX86_BUILTIN_PSLLDQI128 ? CODE_FOR_sse2_ashlti3
16834 : CODE_FOR_sse2_lshrti3);
16835 arg0 = TREE_VALUE (arglist);
16836 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
16837 op0 = expand_normal (arg0);
16838 op1 = expand_normal (arg1);
16839 tmode = insn_data[icode].operand[0].mode;
16840 mode1 = insn_data[icode].operand[1].mode;
16841 mode2 = insn_data[icode].operand[2].mode;
16843 if (! (*insn_data[icode].operand[1].predicate) (op0, mode1))
16845 op0 = copy_to_reg (op0);
16846 op0 = simplify_gen_subreg (mode1, op0, GET_MODE (op0), 0);
16848 if (! (*insn_data[icode].operand[2].predicate) (op1, mode2))
16850 error ("shift must be an immediate");
16853 target = gen_reg_rtx (V2DImode);
16854 pat = GEN_FCN (icode) (simplify_gen_subreg (tmode, target, V2DImode, 0),
16861 case IX86_BUILTIN_FEMMS:
16862 emit_insn (gen_mmx_femms ());
16865 case IX86_BUILTIN_PAVGUSB:
16866 return ix86_expand_binop_builtin (CODE_FOR_mmx_uavgv8qi3, arglist, target);
16868 case IX86_BUILTIN_PF2ID:
16869 return ix86_expand_unop_builtin (CODE_FOR_mmx_pf2id, arglist, target, 0);
16871 case IX86_BUILTIN_PFACC:
16872 return ix86_expand_binop_builtin (CODE_FOR_mmx_haddv2sf3, arglist, target);
16874 case IX86_BUILTIN_PFADD:
16875 return ix86_expand_binop_builtin (CODE_FOR_mmx_addv2sf3, arglist, target);
16877 case IX86_BUILTIN_PFCMPEQ:
16878 return ix86_expand_binop_builtin (CODE_FOR_mmx_eqv2sf3, arglist, target);
16880 case IX86_BUILTIN_PFCMPGE:
16881 return ix86_expand_binop_builtin (CODE_FOR_mmx_gev2sf3, arglist, target);
16883 case IX86_BUILTIN_PFCMPGT:
16884 return ix86_expand_binop_builtin (CODE_FOR_mmx_gtv2sf3, arglist, target);
16886 case IX86_BUILTIN_PFMAX:
16887 return ix86_expand_binop_builtin (CODE_FOR_mmx_smaxv2sf3, arglist, target);
16889 case IX86_BUILTIN_PFMIN:
16890 return ix86_expand_binop_builtin (CODE_FOR_mmx_sminv2sf3, arglist, target);
16892 case IX86_BUILTIN_PFMUL:
16893 return ix86_expand_binop_builtin (CODE_FOR_mmx_mulv2sf3, arglist, target);
16895 case IX86_BUILTIN_PFRCP:
16896 return ix86_expand_unop_builtin (CODE_FOR_mmx_rcpv2sf2, arglist, target, 0);
16898 case IX86_BUILTIN_PFRCPIT1:
16899 return ix86_expand_binop_builtin (CODE_FOR_mmx_rcpit1v2sf3, arglist, target);
16901 case IX86_BUILTIN_PFRCPIT2:
16902 return ix86_expand_binop_builtin (CODE_FOR_mmx_rcpit2v2sf3, arglist, target);
16904 case IX86_BUILTIN_PFRSQIT1:
16905 return ix86_expand_binop_builtin (CODE_FOR_mmx_rsqit1v2sf3, arglist, target);
16907 case IX86_BUILTIN_PFRSQRT:
16908 return ix86_expand_unop_builtin (CODE_FOR_mmx_rsqrtv2sf2, arglist, target, 0);
16910 case IX86_BUILTIN_PFSUB:
16911 return ix86_expand_binop_builtin (CODE_FOR_mmx_subv2sf3, arglist, target);
16913 case IX86_BUILTIN_PFSUBR:
16914 return ix86_expand_binop_builtin (CODE_FOR_mmx_subrv2sf3, arglist, target);
16916 case IX86_BUILTIN_PI2FD:
16917 return ix86_expand_unop_builtin (CODE_FOR_mmx_floatv2si2, arglist, target, 0);
16919 case IX86_BUILTIN_PMULHRW:
16920 return ix86_expand_binop_builtin (CODE_FOR_mmx_pmulhrwv4hi3, arglist, target);
16922 case IX86_BUILTIN_PF2IW:
16923 return ix86_expand_unop_builtin (CODE_FOR_mmx_pf2iw, arglist, target, 0);
16925 case IX86_BUILTIN_PFNACC:
16926 return ix86_expand_binop_builtin (CODE_FOR_mmx_hsubv2sf3, arglist, target);
16928 case IX86_BUILTIN_PFPNACC:
16929 return ix86_expand_binop_builtin (CODE_FOR_mmx_addsubv2sf3, arglist, target);
16931 case IX86_BUILTIN_PI2FW:
16932 return ix86_expand_unop_builtin (CODE_FOR_mmx_pi2fw, arglist, target, 0);
16934 case IX86_BUILTIN_PSWAPDSI:
16935 return ix86_expand_unop_builtin (CODE_FOR_mmx_pswapdv2si2, arglist, target, 0);
16937 case IX86_BUILTIN_PSWAPDSF:
16938 return ix86_expand_unop_builtin (CODE_FOR_mmx_pswapdv2sf2, arglist, target, 0);
16940 case IX86_BUILTIN_SQRTSD:
16941 return ix86_expand_unop1_builtin (CODE_FOR_sse2_vmsqrtv2df2, arglist, target);
16942 case IX86_BUILTIN_LOADUPD:
16943 return ix86_expand_unop_builtin (CODE_FOR_sse2_movupd, arglist, target, 1);
16944 case IX86_BUILTIN_STOREUPD:
16945 return ix86_expand_store_builtin (CODE_FOR_sse2_movupd, arglist);
16947 case IX86_BUILTIN_MFENCE:
16948 emit_insn (gen_sse2_mfence ());
16950 case IX86_BUILTIN_LFENCE:
16951 emit_insn (gen_sse2_lfence ());
16954 case IX86_BUILTIN_CLFLUSH:
16955 arg0 = TREE_VALUE (arglist);
16956 op0 = expand_normal (arg0);
16957 icode = CODE_FOR_sse2_clflush;
16958 if (! (*insn_data[icode].operand[0].predicate) (op0, Pmode))
16959 op0 = copy_to_mode_reg (Pmode, op0);
16961 emit_insn (gen_sse2_clflush (op0));
16964 case IX86_BUILTIN_MOVNTPD:
16965 return ix86_expand_store_builtin (CODE_FOR_sse2_movntv2df, arglist);
16966 case IX86_BUILTIN_MOVNTDQ:
16967 return ix86_expand_store_builtin (CODE_FOR_sse2_movntv2di, arglist);
16968 case IX86_BUILTIN_MOVNTI:
16969 return ix86_expand_store_builtin (CODE_FOR_sse2_movntsi, arglist);
16971 case IX86_BUILTIN_LOADDQU:
16972 return ix86_expand_unop_builtin (CODE_FOR_sse2_movdqu, arglist, target, 1);
16973 case IX86_BUILTIN_STOREDQU:
16974 return ix86_expand_store_builtin (CODE_FOR_sse2_movdqu, arglist);
16976 case IX86_BUILTIN_MONITOR:
16977 arg0 = TREE_VALUE (arglist);
16978 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
16979 arg2 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist)));
16980 op0 = expand_normal (arg0);
16981 op1 = expand_normal (arg1);
16982 op2 = expand_normal (arg2);
16984 op0 = copy_to_mode_reg (Pmode, op0);
16986 op1 = copy_to_mode_reg (SImode, op1);
16988 op2 = copy_to_mode_reg (SImode, op2);
16990 emit_insn (gen_sse3_monitor (op0, op1, op2));
16992 emit_insn (gen_sse3_monitor64 (op0, op1, op2));
16995 case IX86_BUILTIN_MWAIT:
16996 arg0 = TREE_VALUE (arglist);
16997 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
16998 op0 = expand_normal (arg0);
16999 op1 = expand_normal (arg1);
17001 op0 = copy_to_mode_reg (SImode, op0);
17003 op1 = copy_to_mode_reg (SImode, op1);
17004 emit_insn (gen_sse3_mwait (op0, op1));
17007 case IX86_BUILTIN_LDDQU:
17008 return ix86_expand_unop_builtin (CODE_FOR_sse3_lddqu, arglist,
17011 case IX86_BUILTIN_PALIGNR:
17012 case IX86_BUILTIN_PALIGNR128:
17013 if (fcode == IX86_BUILTIN_PALIGNR)
17015 icode = CODE_FOR_ssse3_palignrdi;
17020 icode = CODE_FOR_ssse3_palignrti;
17023 arg0 = TREE_VALUE (arglist);
17024 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
17025 arg2 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist)));
17026 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
17027 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
17028 op2 = expand_expr (arg2, NULL_RTX, VOIDmode, 0);
17029 tmode = insn_data[icode].operand[0].mode;
17030 mode1 = insn_data[icode].operand[1].mode;
17031 mode2 = insn_data[icode].operand[2].mode;
17032 mode3 = insn_data[icode].operand[3].mode;
17034 if (! (*insn_data[icode].operand[1].predicate) (op0, mode1))
17036 op0 = copy_to_reg (op0);
17037 op0 = simplify_gen_subreg (mode1, op0, GET_MODE (op0), 0);
17039 if (! (*insn_data[icode].operand[2].predicate) (op1, mode2))
17041 op1 = copy_to_reg (op1);
17042 op1 = simplify_gen_subreg (mode2, op1, GET_MODE (op1), 0);
17044 if (! (*insn_data[icode].operand[3].predicate) (op2, mode3))
17046 error ("shift must be an immediate");
17049 target = gen_reg_rtx (mode);
17050 pat = GEN_FCN (icode) (simplify_gen_subreg (tmode, target, mode, 0),
17057 case IX86_BUILTIN_MOVNTSD:
17058 return ix86_expand_store_builtin (CODE_FOR_sse4a_vmmovntv2df, arglist);
17060 case IX86_BUILTIN_MOVNTSS:
17061 return ix86_expand_store_builtin (CODE_FOR_sse4a_vmmovntv4sf, arglist);
17063 case IX86_BUILTIN_INSERTQ:
17064 case IX86_BUILTIN_EXTRQ:
17065 icode = (fcode == IX86_BUILTIN_EXTRQ
17066 ? CODE_FOR_sse4a_extrq
17067 : CODE_FOR_sse4a_insertq);
17068 arg0 = TREE_VALUE (arglist);
17069 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
17070 op0 = expand_normal (arg0);
17071 op1 = expand_normal (arg1);
17072 tmode = insn_data[icode].operand[0].mode;
17073 mode1 = insn_data[icode].operand[1].mode;
17074 mode2 = insn_data[icode].operand[2].mode;
17075 if (! (*insn_data[icode].operand[1].predicate) (op0, mode1))
17076 op0 = copy_to_mode_reg (mode1, op0);
17077 if (! (*insn_data[icode].operand[2].predicate) (op1, mode2))
17078 op1 = copy_to_mode_reg (mode2, op1);
17079 if (optimize || target == 0
17080 || GET_MODE (target) != tmode
17081 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
17082 target = gen_reg_rtx (tmode);
17083 pat = GEN_FCN (icode) (target, op0, op1);
17089 case IX86_BUILTIN_EXTRQI:
17090 icode = CODE_FOR_sse4a_extrqi;
17091 arg0 = TREE_VALUE (arglist);
17092 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
17093 arg2 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist)));
17094 op0 = expand_normal (arg0);
17095 op1 = expand_normal (arg1);
17096 op2 = expand_normal (arg2);
17097 tmode = insn_data[icode].operand[0].mode;
17098 mode1 = insn_data[icode].operand[1].mode;
17099 mode2 = insn_data[icode].operand[2].mode;
17100 mode3 = insn_data[icode].operand[3].mode;
17101 if (! (*insn_data[icode].operand[1].predicate) (op0, mode1))
17102 op0 = copy_to_mode_reg (mode1, op0);
17103 if (! (*insn_data[icode].operand[2].predicate) (op1, mode2))
17105 error ("index mask must be an immediate");
17106 return gen_reg_rtx (tmode);
17108 if (! (*insn_data[icode].operand[3].predicate) (op2, mode3))
17110 error ("length mask must be an immediate");
17111 return gen_reg_rtx (tmode);
17113 if (optimize || target == 0
17114 || GET_MODE (target) != tmode
17115 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
17116 target = gen_reg_rtx (tmode);
17117 pat = GEN_FCN (icode) (target, op0, op1, op2);
17123 case IX86_BUILTIN_INSERTQI:
17124 icode = CODE_FOR_sse4a_insertqi;
17125 arg0 = TREE_VALUE (arglist);
17126 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
17127 arg2 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist)));
17128 arg3 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (TREE_CHAIN (arglist))));
17129 op0 = expand_normal (arg0);
17130 op1 = expand_normal (arg1);
17131 op2 = expand_normal (arg2);
17132 op3 = expand_normal (arg3);
17133 tmode = insn_data[icode].operand[0].mode;
17134 mode1 = insn_data[icode].operand[1].mode;
17135 mode2 = insn_data[icode].operand[2].mode;
17136 mode3 = insn_data[icode].operand[3].mode;
17137 mode4 = insn_data[icode].operand[4].mode;
17139 if (! (*insn_data[icode].operand[1].predicate) (op0, mode1))
17140 op0 = copy_to_mode_reg (mode1, op0);
17142 if (! (*insn_data[icode].operand[2].predicate) (op1, mode2))
17143 op1 = copy_to_mode_reg (mode2, op1);
17145 if (! (*insn_data[icode].operand[3].predicate) (op2, mode3))
17147 error ("index mask must be an immediate");
17148 return gen_reg_rtx (tmode);
17150 if (! (*insn_data[icode].operand[4].predicate) (op3, mode4))
17152 error ("length mask must be an immediate");
17153 return gen_reg_rtx (tmode);
17155 if (optimize || target == 0
17156 || GET_MODE (target) != tmode
17157 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
17158 target = gen_reg_rtx (tmode);
17159 pat = GEN_FCN (icode) (target, op0, op1, op2, op3);
17165 case IX86_BUILTIN_VEC_INIT_V2SI:
17166 case IX86_BUILTIN_VEC_INIT_V4HI:
17167 case IX86_BUILTIN_VEC_INIT_V8QI:
17168 return ix86_expand_vec_init_builtin (TREE_TYPE (exp), arglist, target);
17170 case IX86_BUILTIN_VEC_EXT_V2DF:
17171 case IX86_BUILTIN_VEC_EXT_V2DI:
17172 case IX86_BUILTIN_VEC_EXT_V4SF:
17173 case IX86_BUILTIN_VEC_EXT_V4SI:
17174 case IX86_BUILTIN_VEC_EXT_V8HI:
17175 case IX86_BUILTIN_VEC_EXT_V16QI:
17176 case IX86_BUILTIN_VEC_EXT_V2SI:
17177 case IX86_BUILTIN_VEC_EXT_V4HI:
17178 return ix86_expand_vec_ext_builtin (arglist, target);
17180 case IX86_BUILTIN_VEC_SET_V8HI:
17181 case IX86_BUILTIN_VEC_SET_V4HI:
17182 return ix86_expand_vec_set_builtin (arglist);
17188 for (i = 0, d = bdesc_2arg; i < ARRAY_SIZE (bdesc_2arg); i++, d++)
17189 if (d->code == fcode)
17191 /* Compares are treated specially. */
17192 if (d->icode == CODE_FOR_sse_maskcmpv4sf3
17193 || d->icode == CODE_FOR_sse_vmmaskcmpv4sf3
17194 || d->icode == CODE_FOR_sse2_maskcmpv2df3
17195 || d->icode == CODE_FOR_sse2_vmmaskcmpv2df3)
17196 return ix86_expand_sse_compare (d, arglist, target);
17198 return ix86_expand_binop_builtin (d->icode, arglist, target);
17201 for (i = 0, d = bdesc_1arg; i < ARRAY_SIZE (bdesc_1arg); i++, d++)
17202 if (d->code == fcode)
17203 return ix86_expand_unop_builtin (d->icode, arglist, target, 0);
17205 for (i = 0, d = bdesc_comi; i < ARRAY_SIZE (bdesc_comi); i++, d++)
17206 if (d->code == fcode)
17207 return ix86_expand_sse_comi (d, arglist, target);
17209 gcc_unreachable ();
17212 /* Store OPERAND to the memory after reload is completed. This means
17213 that we can't easily use assign_stack_local. */
17215 ix86_force_to_memory (enum machine_mode mode, rtx operand)
17219 gcc_assert (reload_completed);
17220 if (TARGET_RED_ZONE)
17222 result = gen_rtx_MEM (mode,
17223 gen_rtx_PLUS (Pmode,
17225 GEN_INT (-RED_ZONE_SIZE)));
17226 emit_move_insn (result, operand);
17228 else if (!TARGET_RED_ZONE && TARGET_64BIT)
17234 operand = gen_lowpart (DImode, operand);
17238 gen_rtx_SET (VOIDmode,
17239 gen_rtx_MEM (DImode,
17240 gen_rtx_PRE_DEC (DImode,
17241 stack_pointer_rtx)),
17245 gcc_unreachable ();
17247 result = gen_rtx_MEM (mode, stack_pointer_rtx);
17256 split_di (&operand, 1, operands, operands + 1);
17258 gen_rtx_SET (VOIDmode,
17259 gen_rtx_MEM (SImode,
17260 gen_rtx_PRE_DEC (Pmode,
17261 stack_pointer_rtx)),
17264 gen_rtx_SET (VOIDmode,
17265 gen_rtx_MEM (SImode,
17266 gen_rtx_PRE_DEC (Pmode,
17267 stack_pointer_rtx)),
17272 /* Store HImodes as SImodes. */
17273 operand = gen_lowpart (SImode, operand);
17277 gen_rtx_SET (VOIDmode,
17278 gen_rtx_MEM (GET_MODE (operand),
17279 gen_rtx_PRE_DEC (SImode,
17280 stack_pointer_rtx)),
17284 gcc_unreachable ();
17286 result = gen_rtx_MEM (mode, stack_pointer_rtx);
17291 /* Free operand from the memory. */
17293 ix86_free_from_memory (enum machine_mode mode)
17295 if (!TARGET_RED_ZONE)
17299 if (mode == DImode || TARGET_64BIT)
17303 /* Use LEA to deallocate stack space. In peephole2 it will be converted
17304 to pop or add instruction if registers are available. */
17305 emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx,
17306 gen_rtx_PLUS (Pmode, stack_pointer_rtx,
17311 /* Put float CONST_DOUBLE in the constant pool instead of fp regs.
17312 QImode must go into class Q_REGS.
17313 Narrow ALL_REGS to GENERAL_REGS. This supports allowing movsf and
17314 movdf to do mem-to-mem moves through integer regs. */
17316 ix86_preferred_reload_class (rtx x, enum reg_class class)
17318 enum machine_mode mode = GET_MODE (x);
17320 /* We're only allowed to return a subclass of CLASS. Many of the
17321 following checks fail for NO_REGS, so eliminate that early. */
17322 if (class == NO_REGS)
17325 /* All classes can load zeros. */
17326 if (x == CONST0_RTX (mode))
17329 /* Force constants into memory if we are loading a (nonzero) constant into
17330 an MMX or SSE register. This is because there are no MMX/SSE instructions
17331 to load from a constant. */
17333 && (MAYBE_MMX_CLASS_P (class) || MAYBE_SSE_CLASS_P (class)))
17336 /* Prefer SSE regs only, if we can use them for math. */
17337 if (TARGET_SSE_MATH && !TARGET_MIX_SSE_I387 && SSE_FLOAT_MODE_P (mode))
17338 return SSE_CLASS_P (class) ? class : NO_REGS;
17340 /* Floating-point constants need more complex checks. */
17341 if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) != VOIDmode)
17343 /* General regs can load everything. */
17344 if (reg_class_subset_p (class, GENERAL_REGS))
17347 /* Floats can load 0 and 1 plus some others. Note that we eliminated
17348 zero above. We only want to wind up preferring 80387 registers if
17349 we plan on doing computation with them. */
17351 && standard_80387_constant_p (x))
17353 /* Limit class to non-sse. */
17354 if (class == FLOAT_SSE_REGS)
17356 if (class == FP_TOP_SSE_REGS)
17358 if (class == FP_SECOND_SSE_REGS)
17359 return FP_SECOND_REG;
17360 if (class == FLOAT_INT_REGS || class == FLOAT_REGS)
17367 /* Generally when we see PLUS here, it's the function invariant
17368 (plus soft-fp const_int). Which can only be computed into general
17370 if (GET_CODE (x) == PLUS)
17371 return reg_class_subset_p (class, GENERAL_REGS) ? class : NO_REGS;
17373 /* QImode constants are easy to load, but non-constant QImode data
17374 must go into Q_REGS. */
17375 if (GET_MODE (x) == QImode && !CONSTANT_P (x))
17377 if (reg_class_subset_p (class, Q_REGS))
17379 if (reg_class_subset_p (Q_REGS, class))
17387 /* Discourage putting floating-point values in SSE registers unless
17388 SSE math is being used, and likewise for the 387 registers. */
17390 ix86_preferred_output_reload_class (rtx x, enum reg_class class)
17392 enum machine_mode mode = GET_MODE (x);
17394 /* Restrict the output reload class to the register bank that we are doing
17395 math on. If we would like not to return a subset of CLASS, reject this
17396 alternative: if reload cannot do this, it will still use its choice. */
17397 mode = GET_MODE (x);
17398 if (TARGET_SSE_MATH && SSE_FLOAT_MODE_P (mode))
17399 return MAYBE_SSE_CLASS_P (class) ? SSE_REGS : NO_REGS;
17401 if (TARGET_80387 && SCALAR_FLOAT_MODE_P (mode))
17403 if (class == FP_TOP_SSE_REGS)
17405 else if (class == FP_SECOND_SSE_REGS)
17406 return FP_SECOND_REG;
17408 return FLOAT_CLASS_P (class) ? class : NO_REGS;
17414 /* If we are copying between general and FP registers, we need a memory
17415 location. The same is true for SSE and MMX registers.
17417 The macro can't work reliably when one of the CLASSES is class containing
17418 registers from multiple units (SSE, MMX, integer). We avoid this by never
17419 combining those units in single alternative in the machine description.
17420 Ensure that this constraint holds to avoid unexpected surprises.
17422 When STRICT is false, we are being called from REGISTER_MOVE_COST, so do not
17423 enforce these sanity checks. */
17426 ix86_secondary_memory_needed (enum reg_class class1, enum reg_class class2,
17427 enum machine_mode mode, int strict)
17429 if (MAYBE_FLOAT_CLASS_P (class1) != FLOAT_CLASS_P (class1)
17430 || MAYBE_FLOAT_CLASS_P (class2) != FLOAT_CLASS_P (class2)
17431 || MAYBE_SSE_CLASS_P (class1) != SSE_CLASS_P (class1)
17432 || MAYBE_SSE_CLASS_P (class2) != SSE_CLASS_P (class2)
17433 || MAYBE_MMX_CLASS_P (class1) != MMX_CLASS_P (class1)
17434 || MAYBE_MMX_CLASS_P (class2) != MMX_CLASS_P (class2))
17436 gcc_assert (!strict);
17440 if (FLOAT_CLASS_P (class1) != FLOAT_CLASS_P (class2))
17443 /* ??? This is a lie. We do have moves between mmx/general, and for
17444 mmx/sse2. But by saying we need secondary memory we discourage the
17445 register allocator from using the mmx registers unless needed. */
17446 if (MMX_CLASS_P (class1) != MMX_CLASS_P (class2))
17449 if (SSE_CLASS_P (class1) != SSE_CLASS_P (class2))
17451 /* SSE1 doesn't have any direct moves from other classes. */
17455 /* If the target says that inter-unit moves are more expensive
17456 than moving through memory, then don't generate them. */
17457 if (!TARGET_INTER_UNIT_MOVES && !optimize_size)
17460 /* Between SSE and general, we have moves no larger than word size. */
17461 if (GET_MODE_SIZE (mode) > UNITS_PER_WORD)
17464 /* ??? For the cost of one register reformat penalty, we could use
17465 the same instructions to move SFmode and DFmode data, but the
17466 relevant move patterns don't support those alternatives. */
17467 if (mode == SFmode || mode == DFmode)
17474 /* Return true if the registers in CLASS cannot represent the change from
17475 modes FROM to TO. */
17478 ix86_cannot_change_mode_class (enum machine_mode from, enum machine_mode to,
17479 enum reg_class class)
17484 /* x87 registers can't do subreg at all, as all values are reformatted
17485 to extended precision. */
17486 if (MAYBE_FLOAT_CLASS_P (class))
17489 if (MAYBE_SSE_CLASS_P (class) || MAYBE_MMX_CLASS_P (class))
17491 /* Vector registers do not support QI or HImode loads. If we don't
17492 disallow a change to these modes, reload will assume it's ok to
17493 drop the subreg from (subreg:SI (reg:HI 100) 0). This affects
17494 the vec_dupv4hi pattern. */
17495 if (GET_MODE_SIZE (from) < 4)
17498 /* Vector registers do not support subreg with nonzero offsets, which
17499 are otherwise valid for integer registers. Since we can't see
17500 whether we have a nonzero offset from here, prohibit all
17501 nonparadoxical subregs changing size. */
17502 if (GET_MODE_SIZE (to) < GET_MODE_SIZE (from))
17509 /* Return the cost of moving data from a register in class CLASS1 to
17510 one in class CLASS2.
17512 It is not required that the cost always equal 2 when FROM is the same as TO;
17513 on some machines it is expensive to move between registers if they are not
17514 general registers. */
17517 ix86_register_move_cost (enum machine_mode mode, enum reg_class class1,
17518 enum reg_class class2)
17520 /* In case we require secondary memory, compute cost of the store followed
17521 by load. In order to avoid bad register allocation choices, we need
17522 for this to be *at least* as high as the symmetric MEMORY_MOVE_COST. */
17524 if (ix86_secondary_memory_needed (class1, class2, mode, 0))
17528 cost += MAX (MEMORY_MOVE_COST (mode, class1, 0),
17529 MEMORY_MOVE_COST (mode, class1, 1));
17530 cost += MAX (MEMORY_MOVE_COST (mode, class2, 0),
17531 MEMORY_MOVE_COST (mode, class2, 1));
17533 /* In case of copying from general_purpose_register we may emit multiple
17534 stores followed by single load causing memory size mismatch stall.
17535 Count this as arbitrarily high cost of 20. */
17536 if (CLASS_MAX_NREGS (class1, mode) > CLASS_MAX_NREGS (class2, mode))
17539 /* In the case of FP/MMX moves, the registers actually overlap, and we
17540 have to switch modes in order to treat them differently. */
17541 if ((MMX_CLASS_P (class1) && MAYBE_FLOAT_CLASS_P (class2))
17542 || (MMX_CLASS_P (class2) && MAYBE_FLOAT_CLASS_P (class1)))
17548 /* Moves between SSE/MMX and integer unit are expensive. */
17549 if (MMX_CLASS_P (class1) != MMX_CLASS_P (class2)
17550 || SSE_CLASS_P (class1) != SSE_CLASS_P (class2))
17551 return ix86_cost->mmxsse_to_integer;
17552 if (MAYBE_FLOAT_CLASS_P (class1))
17553 return ix86_cost->fp_move;
17554 if (MAYBE_SSE_CLASS_P (class1))
17555 return ix86_cost->sse_move;
17556 if (MAYBE_MMX_CLASS_P (class1))
17557 return ix86_cost->mmx_move;
17561 /* Return 1 if hard register REGNO can hold a value of machine-mode MODE. */
17564 ix86_hard_regno_mode_ok (int regno, enum machine_mode mode)
17566 /* Flags and only flags can only hold CCmode values. */
17567 if (CC_REGNO_P (regno))
17568 return GET_MODE_CLASS (mode) == MODE_CC;
17569 if (GET_MODE_CLASS (mode) == MODE_CC
17570 || GET_MODE_CLASS (mode) == MODE_RANDOM
17571 || GET_MODE_CLASS (mode) == MODE_PARTIAL_INT)
17573 if (FP_REGNO_P (regno))
17574 return VALID_FP_MODE_P (mode);
17575 if (SSE_REGNO_P (regno))
17577 /* We implement the move patterns for all vector modes into and
17578 out of SSE registers, even when no operation instructions
17580 return (VALID_SSE_REG_MODE (mode)
17581 || VALID_SSE2_REG_MODE (mode)
17582 || VALID_MMX_REG_MODE (mode)
17583 || VALID_MMX_REG_MODE_3DNOW (mode));
17585 if (MMX_REGNO_P (regno))
17587 /* We implement the move patterns for 3DNOW modes even in MMX mode,
17588 so if the register is available at all, then we can move data of
17589 the given mode into or out of it. */
17590 return (VALID_MMX_REG_MODE (mode)
17591 || VALID_MMX_REG_MODE_3DNOW (mode));
17594 if (mode == QImode)
17596 /* Take care for QImode values - they can be in non-QI regs,
17597 but then they do cause partial register stalls. */
17598 if (regno < 4 || TARGET_64BIT)
17600 if (!TARGET_PARTIAL_REG_STALL)
17602 return reload_in_progress || reload_completed;
17604 /* We handle both integer and floats in the general purpose registers. */
17605 else if (VALID_INT_MODE_P (mode))
17607 else if (VALID_FP_MODE_P (mode))
17609 /* Lots of MMX code casts 8 byte vector modes to DImode. If we then go
17610 on to use that value in smaller contexts, this can easily force a
17611 pseudo to be allocated to GENERAL_REGS. Since this is no worse than
17612 supporting DImode, allow it. */
17613 else if (VALID_MMX_REG_MODE_3DNOW (mode) || VALID_MMX_REG_MODE (mode))
17619 /* A subroutine of ix86_modes_tieable_p. Return true if MODE is a
17620 tieable integer mode. */
17623 ix86_tieable_integer_mode_p (enum machine_mode mode)
17632 return TARGET_64BIT || !TARGET_PARTIAL_REG_STALL;
17635 return TARGET_64BIT;
17642 /* Return true if MODE1 is accessible in a register that can hold MODE2
17643 without copying. That is, all register classes that can hold MODE2
17644 can also hold MODE1. */
17647 ix86_modes_tieable_p (enum machine_mode mode1, enum machine_mode mode2)
17649 if (mode1 == mode2)
17652 if (ix86_tieable_integer_mode_p (mode1)
17653 && ix86_tieable_integer_mode_p (mode2))
17656 /* MODE2 being XFmode implies fp stack or general regs, which means we
17657 can tie any smaller floating point modes to it. Note that we do not
17658 tie this with TFmode. */
17659 if (mode2 == XFmode)
17660 return mode1 == SFmode || mode1 == DFmode;
17662 /* MODE2 being DFmode implies fp stack, general or sse regs, which means
17663 that we can tie it with SFmode. */
17664 if (mode2 == DFmode)
17665 return mode1 == SFmode;
17667 /* If MODE2 is only appropriate for an SSE register, then tie with
17668 any other mode acceptable to SSE registers. */
17669 if (GET_MODE_SIZE (mode2) >= 8
17670 && ix86_hard_regno_mode_ok (FIRST_SSE_REG, mode2))
17671 return ix86_hard_regno_mode_ok (FIRST_SSE_REG, mode1);
17673 /* If MODE2 is appropriate for an MMX (or SSE) register, then tie
17674 with any other mode acceptable to MMX registers. */
17675 if (GET_MODE_SIZE (mode2) == 8
17676 && ix86_hard_regno_mode_ok (FIRST_MMX_REG, mode2))
17677 return ix86_hard_regno_mode_ok (FIRST_MMX_REG, mode1);
17682 /* Return the cost of moving data of mode M between a
17683 register and memory. A value of 2 is the default; this cost is
17684 relative to those in `REGISTER_MOVE_COST'.
17686 If moving between registers and memory is more expensive than
17687 between two registers, you should define this macro to express the
17690 Model also increased moving costs of QImode registers in non
17694 ix86_memory_move_cost (enum machine_mode mode, enum reg_class class, int in)
17696 if (FLOAT_CLASS_P (class))
17713 return in ? ix86_cost->fp_load [index] : ix86_cost->fp_store [index];
17715 if (SSE_CLASS_P (class))
17718 switch (GET_MODE_SIZE (mode))
17732 return in ? ix86_cost->sse_load [index] : ix86_cost->sse_store [index];
17734 if (MMX_CLASS_P (class))
17737 switch (GET_MODE_SIZE (mode))
17748 return in ? ix86_cost->mmx_load [index] : ix86_cost->mmx_store [index];
17750 switch (GET_MODE_SIZE (mode))
17754 return (Q_CLASS_P (class) ? ix86_cost->int_load[0]
17755 : ix86_cost->movzbl_load);
17757 return (Q_CLASS_P (class) ? ix86_cost->int_store[0]
17758 : ix86_cost->int_store[0] + 4);
17761 return in ? ix86_cost->int_load[1] : ix86_cost->int_store[1];
17763 /* Compute number of 32bit moves needed. TFmode is moved as XFmode. */
17764 if (mode == TFmode)
17766 return ((in ? ix86_cost->int_load[2] : ix86_cost->int_store[2])
17767 * (((int) GET_MODE_SIZE (mode)
17768 + UNITS_PER_WORD - 1) / UNITS_PER_WORD));
17772 /* Compute a (partial) cost for rtx X. Return true if the complete
17773 cost has been computed, and false if subexpressions should be
17774 scanned. In either case, *TOTAL contains the cost result. */
17777 ix86_rtx_costs (rtx x, int code, int outer_code, int *total)
17779 enum machine_mode mode = GET_MODE (x);
17787 if (TARGET_64BIT && !x86_64_immediate_operand (x, VOIDmode))
17789 else if (TARGET_64BIT && !x86_64_zext_immediate_operand (x, VOIDmode))
17791 else if (flag_pic && SYMBOLIC_CONST (x)
17793 || (!GET_CODE (x) != LABEL_REF
17794 && (GET_CODE (x) != SYMBOL_REF
17795 || !SYMBOL_REF_LOCAL_P (x)))))
17802 if (mode == VOIDmode)
17805 switch (standard_80387_constant_p (x))
17810 default: /* Other constants */
17815 /* Start with (MEM (SYMBOL_REF)), since that's where
17816 it'll probably end up. Add a penalty for size. */
17817 *total = (COSTS_N_INSNS (1)
17818 + (flag_pic != 0 && !TARGET_64BIT)
17819 + (mode == SFmode ? 0 : mode == DFmode ? 1 : 2));
17825 /* The zero extensions is often completely free on x86_64, so make
17826 it as cheap as possible. */
17827 if (TARGET_64BIT && mode == DImode
17828 && GET_MODE (XEXP (x, 0)) == SImode)
17830 else if (TARGET_ZERO_EXTEND_WITH_AND)
17831 *total = ix86_cost->add;
17833 *total = ix86_cost->movzx;
17837 *total = ix86_cost->movsx;
17841 if (GET_CODE (XEXP (x, 1)) == CONST_INT
17842 && (GET_MODE (XEXP (x, 0)) != DImode || TARGET_64BIT))
17844 HOST_WIDE_INT value = INTVAL (XEXP (x, 1));
17847 *total = ix86_cost->add;
17850 if ((value == 2 || value == 3)
17851 && ix86_cost->lea <= ix86_cost->shift_const)
17853 *total = ix86_cost->lea;
17863 if (!TARGET_64BIT && GET_MODE (XEXP (x, 0)) == DImode)
17865 if (GET_CODE (XEXP (x, 1)) == CONST_INT)
17867 if (INTVAL (XEXP (x, 1)) > 32)
17868 *total = ix86_cost->shift_const + COSTS_N_INSNS (2);
17870 *total = ix86_cost->shift_const * 2;
17874 if (GET_CODE (XEXP (x, 1)) == AND)
17875 *total = ix86_cost->shift_var * 2;
17877 *total = ix86_cost->shift_var * 6 + COSTS_N_INSNS (2);
17882 if (GET_CODE (XEXP (x, 1)) == CONST_INT)
17883 *total = ix86_cost->shift_const;
17885 *total = ix86_cost->shift_var;
17890 if (FLOAT_MODE_P (mode))
17892 *total = ix86_cost->fmul;
17897 rtx op0 = XEXP (x, 0);
17898 rtx op1 = XEXP (x, 1);
17900 if (GET_CODE (XEXP (x, 1)) == CONST_INT)
17902 unsigned HOST_WIDE_INT value = INTVAL (XEXP (x, 1));
17903 for (nbits = 0; value != 0; value &= value - 1)
17907 /* This is arbitrary. */
17910 /* Compute costs correctly for widening multiplication. */
17911 if ((GET_CODE (op0) == SIGN_EXTEND || GET_CODE (op1) == ZERO_EXTEND)
17912 && GET_MODE_SIZE (GET_MODE (XEXP (op0, 0))) * 2
17913 == GET_MODE_SIZE (mode))
17915 int is_mulwiden = 0;
17916 enum machine_mode inner_mode = GET_MODE (op0);
17918 if (GET_CODE (op0) == GET_CODE (op1))
17919 is_mulwiden = 1, op1 = XEXP (op1, 0);
17920 else if (GET_CODE (op1) == CONST_INT)
17922 if (GET_CODE (op0) == SIGN_EXTEND)
17923 is_mulwiden = trunc_int_for_mode (INTVAL (op1), inner_mode)
17926 is_mulwiden = !(INTVAL (op1) & ~GET_MODE_MASK (inner_mode));
17930 op0 = XEXP (op0, 0), mode = GET_MODE (op0);
17933 *total = (ix86_cost->mult_init[MODE_INDEX (mode)]
17934 + nbits * ix86_cost->mult_bit
17935 + rtx_cost (op0, outer_code) + rtx_cost (op1, outer_code));
17944 if (FLOAT_MODE_P (mode))
17945 *total = ix86_cost->fdiv;
17947 *total = ix86_cost->divide[MODE_INDEX (mode)];
17951 if (FLOAT_MODE_P (mode))
17952 *total = ix86_cost->fadd;
17953 else if (GET_MODE_CLASS (mode) == MODE_INT
17954 && GET_MODE_BITSIZE (mode) <= GET_MODE_BITSIZE (Pmode))
17956 if (GET_CODE (XEXP (x, 0)) == PLUS
17957 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
17958 && GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 1)) == CONST_INT
17959 && CONSTANT_P (XEXP (x, 1)))
17961 HOST_WIDE_INT val = INTVAL (XEXP (XEXP (XEXP (x, 0), 0), 1));
17962 if (val == 2 || val == 4 || val == 8)
17964 *total = ix86_cost->lea;
17965 *total += rtx_cost (XEXP (XEXP (x, 0), 1), outer_code);
17966 *total += rtx_cost (XEXP (XEXP (XEXP (x, 0), 0), 0),
17968 *total += rtx_cost (XEXP (x, 1), outer_code);
17972 else if (GET_CODE (XEXP (x, 0)) == MULT
17973 && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT)
17975 HOST_WIDE_INT val = INTVAL (XEXP (XEXP (x, 0), 1));
17976 if (val == 2 || val == 4 || val == 8)
17978 *total = ix86_cost->lea;
17979 *total += rtx_cost (XEXP (XEXP (x, 0), 0), outer_code);
17980 *total += rtx_cost (XEXP (x, 1), outer_code);
17984 else if (GET_CODE (XEXP (x, 0)) == PLUS)
17986 *total = ix86_cost->lea;
17987 *total += rtx_cost (XEXP (XEXP (x, 0), 0), outer_code);
17988 *total += rtx_cost (XEXP (XEXP (x, 0), 1), outer_code);
17989 *total += rtx_cost (XEXP (x, 1), outer_code);
17996 if (FLOAT_MODE_P (mode))
17998 *total = ix86_cost->fadd;
18006 if (!TARGET_64BIT && mode == DImode)
18008 *total = (ix86_cost->add * 2
18009 + (rtx_cost (XEXP (x, 0), outer_code)
18010 << (GET_MODE (XEXP (x, 0)) != DImode))
18011 + (rtx_cost (XEXP (x, 1), outer_code)
18012 << (GET_MODE (XEXP (x, 1)) != DImode)));
18018 if (FLOAT_MODE_P (mode))
18020 *total = ix86_cost->fchs;
18026 if (!TARGET_64BIT && mode == DImode)
18027 *total = ix86_cost->add * 2;
18029 *total = ix86_cost->add;
18033 if (GET_CODE (XEXP (x, 0)) == ZERO_EXTRACT
18034 && XEXP (XEXP (x, 0), 1) == const1_rtx
18035 && GET_CODE (XEXP (XEXP (x, 0), 2)) == CONST_INT
18036 && XEXP (x, 1) == const0_rtx)
18038 /* This kind of construct is implemented using test[bwl].
18039 Treat it as if we had an AND. */
18040 *total = (ix86_cost->add
18041 + rtx_cost (XEXP (XEXP (x, 0), 0), outer_code)
18042 + rtx_cost (const1_rtx, outer_code));
18048 if (!TARGET_SSE_MATH
18050 || (mode == DFmode && !TARGET_SSE2))
18051 /* For standard 80387 constants, raise the cost to prevent
18052 compress_float_constant() to generate load from memory. */
18053 switch (standard_80387_constant_p (XEXP (x, 0)))
18063 *total = (x86_ext_80387_constants & TUNEMASK
18070 if (FLOAT_MODE_P (mode))
18071 *total = ix86_cost->fabs;
18075 if (FLOAT_MODE_P (mode))
18076 *total = ix86_cost->fsqrt;
18080 if (XINT (x, 1) == UNSPEC_TP)
18091 static int current_machopic_label_num;
18093 /* Given a symbol name and its associated stub, write out the
18094 definition of the stub. */
18097 machopic_output_stub (FILE *file, const char *symb, const char *stub)
18099 unsigned int length;
18100 char *binder_name, *symbol_name, lazy_ptr_name[32];
18101 int label = ++current_machopic_label_num;
18103 /* For 64-bit we shouldn't get here. */
18104 gcc_assert (!TARGET_64BIT);
18106 /* Lose our funky encoding stuff so it doesn't contaminate the stub. */
18107 symb = (*targetm.strip_name_encoding) (symb);
18109 length = strlen (stub);
18110 binder_name = alloca (length + 32);
18111 GEN_BINDER_NAME_FOR_STUB (binder_name, stub, length);
18113 length = strlen (symb);
18114 symbol_name = alloca (length + 32);
18115 GEN_SYMBOL_NAME_FOR_SYMBOL (symbol_name, symb, length);
18117 sprintf (lazy_ptr_name, "L%d$lz", label);
18120 switch_to_section (darwin_sections[machopic_picsymbol_stub_section]);
18122 switch_to_section (darwin_sections[machopic_symbol_stub_section]);
18124 fprintf (file, "%s:\n", stub);
18125 fprintf (file, "\t.indirect_symbol %s\n", symbol_name);
18129 fprintf (file, "\tcall\tLPC$%d\nLPC$%d:\tpopl\t%%eax\n", label, label);
18130 fprintf (file, "\tmovl\t%s-LPC$%d(%%eax),%%edx\n", lazy_ptr_name, label);
18131 fprintf (file, "\tjmp\t*%%edx\n");
18134 fprintf (file, "\tjmp\t*%s\n", lazy_ptr_name);
18136 fprintf (file, "%s:\n", binder_name);
18140 fprintf (file, "\tlea\t%s-LPC$%d(%%eax),%%eax\n", lazy_ptr_name, label);
18141 fprintf (file, "\tpushl\t%%eax\n");
18144 fprintf (file, "\tpushl\t$%s\n", lazy_ptr_name);
18146 fprintf (file, "\tjmp\tdyld_stub_binding_helper\n");
18148 switch_to_section (darwin_sections[machopic_lazy_symbol_ptr_section]);
18149 fprintf (file, "%s:\n", lazy_ptr_name);
18150 fprintf (file, "\t.indirect_symbol %s\n", symbol_name);
18151 fprintf (file, "\t.long %s\n", binder_name);
18155 darwin_x86_file_end (void)
18157 darwin_file_end ();
18160 #endif /* TARGET_MACHO */
18162 /* Order the registers for register allocator. */
18165 x86_order_regs_for_local_alloc (void)
18170 /* First allocate the local general purpose registers. */
18171 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
18172 if (GENERAL_REGNO_P (i) && call_used_regs[i])
18173 reg_alloc_order [pos++] = i;
18175 /* Global general purpose registers. */
18176 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
18177 if (GENERAL_REGNO_P (i) && !call_used_regs[i])
18178 reg_alloc_order [pos++] = i;
18180 /* x87 registers come first in case we are doing FP math
18182 if (!TARGET_SSE_MATH)
18183 for (i = FIRST_STACK_REG; i <= LAST_STACK_REG; i++)
18184 reg_alloc_order [pos++] = i;
18186 /* SSE registers. */
18187 for (i = FIRST_SSE_REG; i <= LAST_SSE_REG; i++)
18188 reg_alloc_order [pos++] = i;
18189 for (i = FIRST_REX_SSE_REG; i <= LAST_REX_SSE_REG; i++)
18190 reg_alloc_order [pos++] = i;
18192 /* x87 registers. */
18193 if (TARGET_SSE_MATH)
18194 for (i = FIRST_STACK_REG; i <= LAST_STACK_REG; i++)
18195 reg_alloc_order [pos++] = i;
18197 for (i = FIRST_MMX_REG; i <= LAST_MMX_REG; i++)
18198 reg_alloc_order [pos++] = i;
18200 /* Initialize the rest of array as we do not allocate some registers
18202 while (pos < FIRST_PSEUDO_REGISTER)
18203 reg_alloc_order [pos++] = 0;
18206 /* Handle a "ms_struct" or "gcc_struct" attribute; arguments as in
18207 struct attribute_spec.handler. */
18209 ix86_handle_struct_attribute (tree *node, tree name,
18210 tree args ATTRIBUTE_UNUSED,
18211 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
18214 if (DECL_P (*node))
18216 if (TREE_CODE (*node) == TYPE_DECL)
18217 type = &TREE_TYPE (*node);
18222 if (!(type && (TREE_CODE (*type) == RECORD_TYPE
18223 || TREE_CODE (*type) == UNION_TYPE)))
18225 warning (OPT_Wattributes, "%qs attribute ignored",
18226 IDENTIFIER_POINTER (name));
18227 *no_add_attrs = true;
18230 else if ((is_attribute_p ("ms_struct", name)
18231 && lookup_attribute ("gcc_struct", TYPE_ATTRIBUTES (*type)))
18232 || ((is_attribute_p ("gcc_struct", name)
18233 && lookup_attribute ("ms_struct", TYPE_ATTRIBUTES (*type)))))
18235 warning (OPT_Wattributes, "%qs incompatible attribute ignored",
18236 IDENTIFIER_POINTER (name));
18237 *no_add_attrs = true;
18244 ix86_ms_bitfield_layout_p (tree record_type)
18246 return (TARGET_MS_BITFIELD_LAYOUT &&
18247 !lookup_attribute ("gcc_struct", TYPE_ATTRIBUTES (record_type)))
18248 || lookup_attribute ("ms_struct", TYPE_ATTRIBUTES (record_type));
18251 /* Returns an expression indicating where the this parameter is
18252 located on entry to the FUNCTION. */
18255 x86_this_parameter (tree function)
18257 tree type = TREE_TYPE (function);
18261 int n = aggregate_value_p (TREE_TYPE (type), type) != 0;
18262 return gen_rtx_REG (DImode, x86_64_int_parameter_registers[n]);
18265 if (ix86_function_regparm (type, function) > 0)
18269 parm = TYPE_ARG_TYPES (type);
18270 /* Figure out whether or not the function has a variable number of
18272 for (; parm; parm = TREE_CHAIN (parm))
18273 if (TREE_VALUE (parm) == void_type_node)
18275 /* If not, the this parameter is in the first argument. */
18279 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type)))
18281 return gen_rtx_REG (SImode, regno);
18285 if (aggregate_value_p (TREE_TYPE (type), type))
18286 return gen_rtx_MEM (SImode, plus_constant (stack_pointer_rtx, 8));
18288 return gen_rtx_MEM (SImode, plus_constant (stack_pointer_rtx, 4));
18291 /* Determine whether x86_output_mi_thunk can succeed. */
18294 x86_can_output_mi_thunk (tree thunk ATTRIBUTE_UNUSED,
18295 HOST_WIDE_INT delta ATTRIBUTE_UNUSED,
18296 HOST_WIDE_INT vcall_offset, tree function)
18298 /* 64-bit can handle anything. */
18302 /* For 32-bit, everything's fine if we have one free register. */
18303 if (ix86_function_regparm (TREE_TYPE (function), function) < 3)
18306 /* Need a free register for vcall_offset. */
18310 /* Need a free register for GOT references. */
18311 if (flag_pic && !(*targetm.binds_local_p) (function))
18314 /* Otherwise ok. */
18318 /* Output the assembler code for a thunk function. THUNK_DECL is the
18319 declaration for the thunk function itself, FUNCTION is the decl for
18320 the target function. DELTA is an immediate constant offset to be
18321 added to THIS. If VCALL_OFFSET is nonzero, the word at
18322 *(*this + vcall_offset) should be added to THIS. */
18325 x86_output_mi_thunk (FILE *file ATTRIBUTE_UNUSED,
18326 tree thunk ATTRIBUTE_UNUSED, HOST_WIDE_INT delta,
18327 HOST_WIDE_INT vcall_offset, tree function)
18330 rtx this = x86_this_parameter (function);
18333 /* If VCALL_OFFSET, we'll need THIS in a register. Might as well
18334 pull it in now and let DELTA benefit. */
18337 else if (vcall_offset)
18339 /* Put the this parameter into %eax. */
18341 xops[1] = this_reg = gen_rtx_REG (Pmode, 0);
18342 output_asm_insn ("mov{l}\t{%0, %1|%1, %0}", xops);
18345 this_reg = NULL_RTX;
18347 /* Adjust the this parameter by a fixed constant. */
18350 xops[0] = GEN_INT (delta);
18351 xops[1] = this_reg ? this_reg : this;
18354 if (!x86_64_general_operand (xops[0], DImode))
18356 tmp = gen_rtx_REG (DImode, FIRST_REX_INT_REG + 2 /* R10 */);
18358 output_asm_insn ("mov{q}\t{%1, %0|%0, %1}", xops);
18362 output_asm_insn ("add{q}\t{%0, %1|%1, %0}", xops);
18365 output_asm_insn ("add{l}\t{%0, %1|%1, %0}", xops);
18368 /* Adjust the this parameter by a value stored in the vtable. */
18372 tmp = gen_rtx_REG (DImode, FIRST_REX_INT_REG + 2 /* R10 */);
18375 int tmp_regno = 2 /* ECX */;
18376 if (lookup_attribute ("fastcall",
18377 TYPE_ATTRIBUTES (TREE_TYPE (function))))
18378 tmp_regno = 0 /* EAX */;
18379 tmp = gen_rtx_REG (SImode, tmp_regno);
18382 xops[0] = gen_rtx_MEM (Pmode, this_reg);
18385 output_asm_insn ("mov{q}\t{%0, %1|%1, %0}", xops);
18387 output_asm_insn ("mov{l}\t{%0, %1|%1, %0}", xops);
18389 /* Adjust the this parameter. */
18390 xops[0] = gen_rtx_MEM (Pmode, plus_constant (tmp, vcall_offset));
18391 if (TARGET_64BIT && !memory_operand (xops[0], Pmode))
18393 rtx tmp2 = gen_rtx_REG (DImode, FIRST_REX_INT_REG + 3 /* R11 */);
18394 xops[0] = GEN_INT (vcall_offset);
18396 output_asm_insn ("mov{q}\t{%0, %1|%1, %0}", xops);
18397 xops[0] = gen_rtx_MEM (Pmode, gen_rtx_PLUS (Pmode, tmp, tmp2));
18399 xops[1] = this_reg;
18401 output_asm_insn ("add{q}\t{%0, %1|%1, %0}", xops);
18403 output_asm_insn ("add{l}\t{%0, %1|%1, %0}", xops);
18406 /* If necessary, drop THIS back to its stack slot. */
18407 if (this_reg && this_reg != this)
18409 xops[0] = this_reg;
18411 output_asm_insn ("mov{l}\t{%0, %1|%1, %0}", xops);
18414 xops[0] = XEXP (DECL_RTL (function), 0);
18417 if (!flag_pic || (*targetm.binds_local_p) (function))
18418 output_asm_insn ("jmp\t%P0", xops);
18421 tmp = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, xops[0]), UNSPEC_GOTPCREL);
18422 tmp = gen_rtx_CONST (Pmode, tmp);
18423 tmp = gen_rtx_MEM (QImode, tmp);
18425 output_asm_insn ("jmp\t%A0", xops);
18430 if (!flag_pic || (*targetm.binds_local_p) (function))
18431 output_asm_insn ("jmp\t%P0", xops);
18436 rtx sym_ref = XEXP (DECL_RTL (function), 0);
18437 tmp = (gen_rtx_SYMBOL_REF
18439 machopic_indirection_name (sym_ref, /*stub_p=*/true)));
18440 tmp = gen_rtx_MEM (QImode, tmp);
18442 output_asm_insn ("jmp\t%0", xops);
18445 #endif /* TARGET_MACHO */
18447 tmp = gen_rtx_REG (SImode, 2 /* ECX */);
18448 output_set_got (tmp, NULL_RTX);
18451 output_asm_insn ("mov{l}\t{%0@GOT(%1), %1|%1, %0@GOT[%1]}", xops);
18452 output_asm_insn ("jmp\t{*}%1", xops);
18458 x86_file_start (void)
18460 default_file_start ();
18462 darwin_file_start ();
18464 if (X86_FILE_START_VERSION_DIRECTIVE)
18465 fputs ("\t.version\t\"01.01\"\n", asm_out_file);
18466 if (X86_FILE_START_FLTUSED)
18467 fputs ("\t.global\t__fltused\n", asm_out_file);
18468 if (ix86_asm_dialect == ASM_INTEL)
18469 fputs ("\t.intel_syntax\n", asm_out_file);
18473 x86_field_alignment (tree field, int computed)
18475 enum machine_mode mode;
18476 tree type = TREE_TYPE (field);
18478 if (TARGET_64BIT || TARGET_ALIGN_DOUBLE)
18480 mode = TYPE_MODE (TREE_CODE (type) == ARRAY_TYPE
18481 ? get_inner_array_type (type) : type);
18482 if (mode == DFmode || mode == DCmode
18483 || GET_MODE_CLASS (mode) == MODE_INT
18484 || GET_MODE_CLASS (mode) == MODE_COMPLEX_INT)
18485 return MIN (32, computed);
18489 /* Output assembler code to FILE to increment profiler label # LABELNO
18490 for profiling a function entry. */
18492 x86_function_profiler (FILE *file, int labelno ATTRIBUTE_UNUSED)
18497 #ifndef NO_PROFILE_COUNTERS
18498 fprintf (file, "\tleaq\t%sP%d@(%%rip),%%r11\n", LPREFIX, labelno);
18500 fprintf (file, "\tcall\t*%s@GOTPCREL(%%rip)\n", MCOUNT_NAME);
18504 #ifndef NO_PROFILE_COUNTERS
18505 fprintf (file, "\tmovq\t$%sP%d,%%r11\n", LPREFIX, labelno);
18507 fprintf (file, "\tcall\t%s\n", MCOUNT_NAME);
18511 #ifndef NO_PROFILE_COUNTERS
18512 fprintf (file, "\tleal\t%sP%d@GOTOFF(%%ebx),%%%s\n",
18513 LPREFIX, labelno, PROFILE_COUNT_REGISTER);
18515 fprintf (file, "\tcall\t*%s@GOT(%%ebx)\n", MCOUNT_NAME);
18519 #ifndef NO_PROFILE_COUNTERS
18520 fprintf (file, "\tmovl\t$%sP%d,%%%s\n", LPREFIX, labelno,
18521 PROFILE_COUNT_REGISTER);
18523 fprintf (file, "\tcall\t%s\n", MCOUNT_NAME);
18527 /* We don't have exact information about the insn sizes, but we may assume
18528 quite safely that we are informed about all 1 byte insns and memory
18529 address sizes. This is enough to eliminate unnecessary padding in
18533 min_insn_size (rtx insn)
18537 if (!INSN_P (insn) || !active_insn_p (insn))
18540 /* Discard alignments we've emit and jump instructions. */
18541 if (GET_CODE (PATTERN (insn)) == UNSPEC_VOLATILE
18542 && XINT (PATTERN (insn), 1) == UNSPECV_ALIGN)
18544 if (GET_CODE (insn) == JUMP_INSN
18545 && (GET_CODE (PATTERN (insn)) == ADDR_VEC
18546 || GET_CODE (PATTERN (insn)) == ADDR_DIFF_VEC))
18549 /* Important case - calls are always 5 bytes.
18550 It is common to have many calls in the row. */
18551 if (GET_CODE (insn) == CALL_INSN
18552 && symbolic_reference_mentioned_p (PATTERN (insn))
18553 && !SIBLING_CALL_P (insn))
18555 if (get_attr_length (insn) <= 1)
18558 /* For normal instructions we may rely on the sizes of addresses
18559 and the presence of symbol to require 4 bytes of encoding.
18560 This is not the case for jumps where references are PC relative. */
18561 if (GET_CODE (insn) != JUMP_INSN)
18563 l = get_attr_length_address (insn);
18564 if (l < 4 && symbolic_reference_mentioned_p (PATTERN (insn)))
18573 /* AMD K8 core mispredicts jumps when there are more than 3 jumps in 16 byte
18577 ix86_avoid_jump_misspredicts (void)
18579 rtx insn, start = get_insns ();
18580 int nbytes = 0, njumps = 0;
18583 /* Look for all minimal intervals of instructions containing 4 jumps.
18584 The intervals are bounded by START and INSN. NBYTES is the total
18585 size of instructions in the interval including INSN and not including
18586 START. When the NBYTES is smaller than 16 bytes, it is possible
18587 that the end of START and INSN ends up in the same 16byte page.
18589 The smallest offset in the page INSN can start is the case where START
18590 ends on the offset 0. Offset of INSN is then NBYTES - sizeof (INSN).
18591 We add p2align to 16byte window with maxskip 17 - NBYTES + sizeof (INSN).
18593 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
18596 nbytes += min_insn_size (insn);
18598 fprintf(dump_file, "Insn %i estimated to %i bytes\n",
18599 INSN_UID (insn), min_insn_size (insn));
18600 if ((GET_CODE (insn) == JUMP_INSN
18601 && GET_CODE (PATTERN (insn)) != ADDR_VEC
18602 && GET_CODE (PATTERN (insn)) != ADDR_DIFF_VEC)
18603 || GET_CODE (insn) == CALL_INSN)
18610 start = NEXT_INSN (start);
18611 if ((GET_CODE (start) == JUMP_INSN
18612 && GET_CODE (PATTERN (start)) != ADDR_VEC
18613 && GET_CODE (PATTERN (start)) != ADDR_DIFF_VEC)
18614 || GET_CODE (start) == CALL_INSN)
18615 njumps--, isjump = 1;
18618 nbytes -= min_insn_size (start);
18620 gcc_assert (njumps >= 0);
18622 fprintf (dump_file, "Interval %i to %i has %i bytes\n",
18623 INSN_UID (start), INSN_UID (insn), nbytes);
18625 if (njumps == 3 && isjump && nbytes < 16)
18627 int padsize = 15 - nbytes + min_insn_size (insn);
18630 fprintf (dump_file, "Padding insn %i by %i bytes!\n",
18631 INSN_UID (insn), padsize);
18632 emit_insn_before (gen_align (GEN_INT (padsize)), insn);
18637 /* AMD Athlon works faster
18638 when RET is not destination of conditional jump or directly preceded
18639 by other jump instruction. We avoid the penalty by inserting NOP just
18640 before the RET instructions in such cases. */
18642 ix86_pad_returns (void)
18647 FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR->preds)
18649 basic_block bb = e->src;
18650 rtx ret = BB_END (bb);
18652 bool replace = false;
18654 if (GET_CODE (ret) != JUMP_INSN || GET_CODE (PATTERN (ret)) != RETURN
18655 || !maybe_hot_bb_p (bb))
18657 for (prev = PREV_INSN (ret); prev; prev = PREV_INSN (prev))
18658 if (active_insn_p (prev) || GET_CODE (prev) == CODE_LABEL)
18660 if (prev && GET_CODE (prev) == CODE_LABEL)
18665 FOR_EACH_EDGE (e, ei, bb->preds)
18666 if (EDGE_FREQUENCY (e) && e->src->index >= 0
18667 && !(e->flags & EDGE_FALLTHRU))
18672 prev = prev_active_insn (ret);
18674 && ((GET_CODE (prev) == JUMP_INSN && any_condjump_p (prev))
18675 || GET_CODE (prev) == CALL_INSN))
18677 /* Empty functions get branch mispredict even when the jump destination
18678 is not visible to us. */
18679 if (!prev && cfun->function_frequency > FUNCTION_FREQUENCY_UNLIKELY_EXECUTED)
18684 emit_insn_before (gen_return_internal_long (), ret);
18690 /* Implement machine specific optimizations. We implement padding of returns
18691 for K8 CPUs and pass to avoid 4 jumps in the single 16 byte window. */
18695 if (TARGET_PAD_RETURNS && optimize && !optimize_size)
18696 ix86_pad_returns ();
18697 if (TARGET_FOUR_JUMP_LIMIT && optimize && !optimize_size)
18698 ix86_avoid_jump_misspredicts ();
18701 /* Return nonzero when QImode register that must be represented via REX prefix
18704 x86_extended_QIreg_mentioned_p (rtx insn)
18707 extract_insn_cached (insn);
18708 for (i = 0; i < recog_data.n_operands; i++)
18709 if (REG_P (recog_data.operand[i])
18710 && REGNO (recog_data.operand[i]) >= 4)
18715 /* Return nonzero when P points to register encoded via REX prefix.
18716 Called via for_each_rtx. */
18718 extended_reg_mentioned_1 (rtx *p, void *data ATTRIBUTE_UNUSED)
18720 unsigned int regno;
18723 regno = REGNO (*p);
18724 return REX_INT_REGNO_P (regno) || REX_SSE_REGNO_P (regno);
18727 /* Return true when INSN mentions register that must be encoded using REX
18730 x86_extended_reg_mentioned_p (rtx insn)
18732 return for_each_rtx (&PATTERN (insn), extended_reg_mentioned_1, NULL);
18735 /* Generate an unsigned DImode/SImode to FP conversion. This is the same code
18736 optabs would emit if we didn't have TFmode patterns. */
18739 x86_emit_floatuns (rtx operands[2])
18741 rtx neglab, donelab, i0, i1, f0, in, out;
18742 enum machine_mode mode, inmode;
18744 inmode = GET_MODE (operands[1]);
18745 gcc_assert (inmode == SImode || inmode == DImode);
18748 in = force_reg (inmode, operands[1]);
18749 mode = GET_MODE (out);
18750 neglab = gen_label_rtx ();
18751 donelab = gen_label_rtx ();
18752 i1 = gen_reg_rtx (Pmode);
18753 f0 = gen_reg_rtx (mode);
18755 emit_cmp_and_jump_insns (in, const0_rtx, LT, const0_rtx, Pmode, 0, neglab);
18757 emit_insn (gen_rtx_SET (VOIDmode, out, gen_rtx_FLOAT (mode, in)));
18758 emit_jump_insn (gen_jump (donelab));
18761 emit_label (neglab);
18763 i0 = expand_simple_binop (Pmode, LSHIFTRT, in, const1_rtx, NULL, 1, OPTAB_DIRECT);
18764 i1 = expand_simple_binop (Pmode, AND, in, const1_rtx, NULL, 1, OPTAB_DIRECT);
18765 i0 = expand_simple_binop (Pmode, IOR, i0, i1, i0, 1, OPTAB_DIRECT);
18766 expand_float (f0, i0, 0);
18767 emit_insn (gen_rtx_SET (VOIDmode, out, gen_rtx_PLUS (mode, f0, f0)));
18769 emit_label (donelab);
18772 /* A subroutine of ix86_expand_vector_init. Store into TARGET a vector
18773 with all elements equal to VAR. Return true if successful. */
18776 ix86_expand_vector_init_duplicate (bool mmx_ok, enum machine_mode mode,
18777 rtx target, rtx val)
18779 enum machine_mode smode, wsmode, wvmode;
18794 val = force_reg (GET_MODE_INNER (mode), val);
18795 x = gen_rtx_VEC_DUPLICATE (mode, val);
18796 emit_insn (gen_rtx_SET (VOIDmode, target, x));
18802 if (TARGET_SSE || TARGET_3DNOW_A)
18804 val = gen_lowpart (SImode, val);
18805 x = gen_rtx_TRUNCATE (HImode, val);
18806 x = gen_rtx_VEC_DUPLICATE (mode, x);
18807 emit_insn (gen_rtx_SET (VOIDmode, target, x));
18829 /* Extend HImode to SImode using a paradoxical SUBREG. */
18830 tmp1 = gen_reg_rtx (SImode);
18831 emit_move_insn (tmp1, gen_lowpart (SImode, val));
18832 /* Insert the SImode value as low element of V4SImode vector. */
18833 tmp2 = gen_reg_rtx (V4SImode);
18834 tmp1 = gen_rtx_VEC_MERGE (V4SImode,
18835 gen_rtx_VEC_DUPLICATE (V4SImode, tmp1),
18836 CONST0_RTX (V4SImode),
18838 emit_insn (gen_rtx_SET (VOIDmode, tmp2, tmp1));
18839 /* Cast the V4SImode vector back to a V8HImode vector. */
18840 tmp1 = gen_reg_rtx (V8HImode);
18841 emit_move_insn (tmp1, gen_lowpart (V8HImode, tmp2));
18842 /* Duplicate the low short through the whole low SImode word. */
18843 emit_insn (gen_sse2_punpcklwd (tmp1, tmp1, tmp1));
18844 /* Cast the V8HImode vector back to a V4SImode vector. */
18845 tmp2 = gen_reg_rtx (V4SImode);
18846 emit_move_insn (tmp2, gen_lowpart (V4SImode, tmp1));
18847 /* Replicate the low element of the V4SImode vector. */
18848 emit_insn (gen_sse2_pshufd (tmp2, tmp2, const0_rtx));
18849 /* Cast the V2SImode back to V8HImode, and store in target. */
18850 emit_move_insn (target, gen_lowpart (V8HImode, tmp2));
18861 /* Extend QImode to SImode using a paradoxical SUBREG. */
18862 tmp1 = gen_reg_rtx (SImode);
18863 emit_move_insn (tmp1, gen_lowpart (SImode, val));
18864 /* Insert the SImode value as low element of V4SImode vector. */
18865 tmp2 = gen_reg_rtx (V4SImode);
18866 tmp1 = gen_rtx_VEC_MERGE (V4SImode,
18867 gen_rtx_VEC_DUPLICATE (V4SImode, tmp1),
18868 CONST0_RTX (V4SImode),
18870 emit_insn (gen_rtx_SET (VOIDmode, tmp2, tmp1));
18871 /* Cast the V4SImode vector back to a V16QImode vector. */
18872 tmp1 = gen_reg_rtx (V16QImode);
18873 emit_move_insn (tmp1, gen_lowpart (V16QImode, tmp2));
18874 /* Duplicate the low byte through the whole low SImode word. */
18875 emit_insn (gen_sse2_punpcklbw (tmp1, tmp1, tmp1));
18876 emit_insn (gen_sse2_punpcklbw (tmp1, tmp1, tmp1));
18877 /* Cast the V16QImode vector back to a V4SImode vector. */
18878 tmp2 = gen_reg_rtx (V4SImode);
18879 emit_move_insn (tmp2, gen_lowpart (V4SImode, tmp1));
18880 /* Replicate the low element of the V4SImode vector. */
18881 emit_insn (gen_sse2_pshufd (tmp2, tmp2, const0_rtx));
18882 /* Cast the V2SImode back to V16QImode, and store in target. */
18883 emit_move_insn (target, gen_lowpart (V16QImode, tmp2));
18891 /* Replicate the value once into the next wider mode and recurse. */
18892 val = convert_modes (wsmode, smode, val, true);
18893 x = expand_simple_binop (wsmode, ASHIFT, val,
18894 GEN_INT (GET_MODE_BITSIZE (smode)),
18895 NULL_RTX, 1, OPTAB_LIB_WIDEN);
18896 val = expand_simple_binop (wsmode, IOR, val, x, x, 1, OPTAB_LIB_WIDEN);
18898 x = gen_reg_rtx (wvmode);
18899 if (!ix86_expand_vector_init_duplicate (mmx_ok, wvmode, x, val))
18900 gcc_unreachable ();
18901 emit_move_insn (target, gen_lowpart (mode, x));
18909 /* A subroutine of ix86_expand_vector_init. Store into TARGET a vector
18910 whose ONE_VAR element is VAR, and other elements are zero. Return true
18914 ix86_expand_vector_init_one_nonzero (bool mmx_ok, enum machine_mode mode,
18915 rtx target, rtx var, int one_var)
18917 enum machine_mode vsimode;
18933 var = force_reg (GET_MODE_INNER (mode), var);
18934 x = gen_rtx_VEC_CONCAT (mode, var, CONST0_RTX (GET_MODE_INNER (mode)));
18935 emit_insn (gen_rtx_SET (VOIDmode, target, x));
18940 if (!REG_P (target) || REGNO (target) < FIRST_PSEUDO_REGISTER)
18941 new_target = gen_reg_rtx (mode);
18943 new_target = target;
18944 var = force_reg (GET_MODE_INNER (mode), var);
18945 x = gen_rtx_VEC_DUPLICATE (mode, var);
18946 x = gen_rtx_VEC_MERGE (mode, x, CONST0_RTX (mode), const1_rtx);
18947 emit_insn (gen_rtx_SET (VOIDmode, new_target, x));
18950 /* We need to shuffle the value to the correct position, so
18951 create a new pseudo to store the intermediate result. */
18953 /* With SSE2, we can use the integer shuffle insns. */
18954 if (mode != V4SFmode && TARGET_SSE2)
18956 emit_insn (gen_sse2_pshufd_1 (new_target, new_target,
18958 GEN_INT (one_var == 1 ? 0 : 1),
18959 GEN_INT (one_var == 2 ? 0 : 1),
18960 GEN_INT (one_var == 3 ? 0 : 1)));
18961 if (target != new_target)
18962 emit_move_insn (target, new_target);
18966 /* Otherwise convert the intermediate result to V4SFmode and
18967 use the SSE1 shuffle instructions. */
18968 if (mode != V4SFmode)
18970 tmp = gen_reg_rtx (V4SFmode);
18971 emit_move_insn (tmp, gen_lowpart (V4SFmode, new_target));
18976 emit_insn (gen_sse_shufps_1 (tmp, tmp, tmp,
18978 GEN_INT (one_var == 1 ? 0 : 1),
18979 GEN_INT (one_var == 2 ? 0+4 : 1+4),
18980 GEN_INT (one_var == 3 ? 0+4 : 1+4)));
18982 if (mode != V4SFmode)
18983 emit_move_insn (target, gen_lowpart (V4SImode, tmp));
18984 else if (tmp != target)
18985 emit_move_insn (target, tmp);
18987 else if (target != new_target)
18988 emit_move_insn (target, new_target);
18993 vsimode = V4SImode;
18999 vsimode = V2SImode;
19005 /* Zero extend the variable element to SImode and recurse. */
19006 var = convert_modes (SImode, GET_MODE_INNER (mode), var, true);
19008 x = gen_reg_rtx (vsimode);
19009 if (!ix86_expand_vector_init_one_nonzero (mmx_ok, vsimode, x,
19011 gcc_unreachable ();
19013 emit_move_insn (target, gen_lowpart (mode, x));
19021 /* A subroutine of ix86_expand_vector_init. Store into TARGET a vector
19022 consisting of the values in VALS. It is known that all elements
19023 except ONE_VAR are constants. Return true if successful. */
19026 ix86_expand_vector_init_one_var (bool mmx_ok, enum machine_mode mode,
19027 rtx target, rtx vals, int one_var)
19029 rtx var = XVECEXP (vals, 0, one_var);
19030 enum machine_mode wmode;
19033 const_vec = copy_rtx (vals);
19034 XVECEXP (const_vec, 0, one_var) = CONST0_RTX (GET_MODE_INNER (mode));
19035 const_vec = gen_rtx_CONST_VECTOR (mode, XVEC (const_vec, 0));
19043 /* For the two element vectors, it's just as easy to use
19044 the general case. */
19060 /* There's no way to set one QImode entry easily. Combine
19061 the variable value with its adjacent constant value, and
19062 promote to an HImode set. */
19063 x = XVECEXP (vals, 0, one_var ^ 1);
19066 var = convert_modes (HImode, QImode, var, true);
19067 var = expand_simple_binop (HImode, ASHIFT, var, GEN_INT (8),
19068 NULL_RTX, 1, OPTAB_LIB_WIDEN);
19069 x = GEN_INT (INTVAL (x) & 0xff);
19073 var = convert_modes (HImode, QImode, var, true);
19074 x = gen_int_mode (INTVAL (x) << 8, HImode);
19076 if (x != const0_rtx)
19077 var = expand_simple_binop (HImode, IOR, var, x, var,
19078 1, OPTAB_LIB_WIDEN);
19080 x = gen_reg_rtx (wmode);
19081 emit_move_insn (x, gen_lowpart (wmode, const_vec));
19082 ix86_expand_vector_set (mmx_ok, x, var, one_var >> 1);
19084 emit_move_insn (target, gen_lowpart (mode, x));
19091 emit_move_insn (target, const_vec);
19092 ix86_expand_vector_set (mmx_ok, target, var, one_var);
19096 /* A subroutine of ix86_expand_vector_init. Handle the most general case:
19097 all values variable, and none identical. */
19100 ix86_expand_vector_init_general (bool mmx_ok, enum machine_mode mode,
19101 rtx target, rtx vals)
19103 enum machine_mode half_mode = GET_MODE_INNER (mode);
19104 rtx op0 = NULL, op1 = NULL;
19105 bool use_vec_concat = false;
19111 if (!mmx_ok && !TARGET_SSE)
19117 /* For the two element vectors, we always implement VEC_CONCAT. */
19118 op0 = XVECEXP (vals, 0, 0);
19119 op1 = XVECEXP (vals, 0, 1);
19120 use_vec_concat = true;
19124 half_mode = V2SFmode;
19127 half_mode = V2SImode;
19133 /* For V4SF and V4SI, we implement a concat of two V2 vectors.
19134 Recurse to load the two halves. */
19136 op0 = gen_reg_rtx (half_mode);
19137 v = gen_rtvec (2, XVECEXP (vals, 0, 0), XVECEXP (vals, 0, 1));
19138 ix86_expand_vector_init (false, op0, gen_rtx_PARALLEL (half_mode, v));
19140 op1 = gen_reg_rtx (half_mode);
19141 v = gen_rtvec (2, XVECEXP (vals, 0, 2), XVECEXP (vals, 0, 3));
19142 ix86_expand_vector_init (false, op1, gen_rtx_PARALLEL (half_mode, v));
19144 use_vec_concat = true;
19155 gcc_unreachable ();
19158 if (use_vec_concat)
19160 if (!register_operand (op0, half_mode))
19161 op0 = force_reg (half_mode, op0);
19162 if (!register_operand (op1, half_mode))
19163 op1 = force_reg (half_mode, op1);
19165 emit_insn (gen_rtx_SET (VOIDmode, target,
19166 gen_rtx_VEC_CONCAT (mode, op0, op1)));
19170 int i, j, n_elts, n_words, n_elt_per_word;
19171 enum machine_mode inner_mode;
19172 rtx words[4], shift;
19174 inner_mode = GET_MODE_INNER (mode);
19175 n_elts = GET_MODE_NUNITS (mode);
19176 n_words = GET_MODE_SIZE (mode) / UNITS_PER_WORD;
19177 n_elt_per_word = n_elts / n_words;
19178 shift = GEN_INT (GET_MODE_BITSIZE (inner_mode));
19180 for (i = 0; i < n_words; ++i)
19182 rtx word = NULL_RTX;
19184 for (j = 0; j < n_elt_per_word; ++j)
19186 rtx elt = XVECEXP (vals, 0, (i+1)*n_elt_per_word - j - 1);
19187 elt = convert_modes (word_mode, inner_mode, elt, true);
19193 word = expand_simple_binop (word_mode, ASHIFT, word, shift,
19194 word, 1, OPTAB_LIB_WIDEN);
19195 word = expand_simple_binop (word_mode, IOR, word, elt,
19196 word, 1, OPTAB_LIB_WIDEN);
19204 emit_move_insn (target, gen_lowpart (mode, words[0]));
19205 else if (n_words == 2)
19207 rtx tmp = gen_reg_rtx (mode);
19208 emit_insn (gen_rtx_CLOBBER (VOIDmode, tmp));
19209 emit_move_insn (gen_lowpart (word_mode, tmp), words[0]);
19210 emit_move_insn (gen_highpart (word_mode, tmp), words[1]);
19211 emit_move_insn (target, tmp);
19213 else if (n_words == 4)
19215 rtx tmp = gen_reg_rtx (V4SImode);
19216 vals = gen_rtx_PARALLEL (V4SImode, gen_rtvec_v (4, words));
19217 ix86_expand_vector_init_general (false, V4SImode, tmp, vals);
19218 emit_move_insn (target, gen_lowpart (mode, tmp));
19221 gcc_unreachable ();
19225 /* Initialize vector TARGET via VALS. Suppress the use of MMX
19226 instructions unless MMX_OK is true. */
19229 ix86_expand_vector_init (bool mmx_ok, rtx target, rtx vals)
19231 enum machine_mode mode = GET_MODE (target);
19232 enum machine_mode inner_mode = GET_MODE_INNER (mode);
19233 int n_elts = GET_MODE_NUNITS (mode);
19234 int n_var = 0, one_var = -1;
19235 bool all_same = true, all_const_zero = true;
19239 for (i = 0; i < n_elts; ++i)
19241 x = XVECEXP (vals, 0, i);
19242 if (!CONSTANT_P (x))
19243 n_var++, one_var = i;
19244 else if (x != CONST0_RTX (inner_mode))
19245 all_const_zero = false;
19246 if (i > 0 && !rtx_equal_p (x, XVECEXP (vals, 0, 0)))
19250 /* Constants are best loaded from the constant pool. */
19253 emit_move_insn (target, gen_rtx_CONST_VECTOR (mode, XVEC (vals, 0)));
19257 /* If all values are identical, broadcast the value. */
19259 && ix86_expand_vector_init_duplicate (mmx_ok, mode, target,
19260 XVECEXP (vals, 0, 0)))
19263 /* Values where only one field is non-constant are best loaded from
19264 the pool and overwritten via move later. */
19268 && ix86_expand_vector_init_one_nonzero (mmx_ok, mode, target,
19269 XVECEXP (vals, 0, one_var),
19273 if (ix86_expand_vector_init_one_var (mmx_ok, mode, target, vals, one_var))
19277 ix86_expand_vector_init_general (mmx_ok, mode, target, vals);
19281 ix86_expand_vector_set (bool mmx_ok, rtx target, rtx val, int elt)
19283 enum machine_mode mode = GET_MODE (target);
19284 enum machine_mode inner_mode = GET_MODE_INNER (mode);
19285 bool use_vec_merge = false;
19294 tmp = gen_reg_rtx (GET_MODE_INNER (mode));
19295 ix86_expand_vector_extract (true, tmp, target, 1 - elt);
19297 tmp = gen_rtx_VEC_CONCAT (mode, tmp, val);
19299 tmp = gen_rtx_VEC_CONCAT (mode, val, tmp);
19300 emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
19310 /* For the two element vectors, we implement a VEC_CONCAT with
19311 the extraction of the other element. */
19313 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, GEN_INT (1 - elt)));
19314 tmp = gen_rtx_VEC_SELECT (inner_mode, target, tmp);
19317 op0 = val, op1 = tmp;
19319 op0 = tmp, op1 = val;
19321 tmp = gen_rtx_VEC_CONCAT (mode, op0, op1);
19322 emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
19330 use_vec_merge = true;
19334 /* tmp = target = A B C D */
19335 tmp = copy_to_reg (target);
19336 /* target = A A B B */
19337 emit_insn (gen_sse_unpcklps (target, target, target));
19338 /* target = X A B B */
19339 ix86_expand_vector_set (false, target, val, 0);
19340 /* target = A X C D */
19341 emit_insn (gen_sse_shufps_1 (target, target, tmp,
19342 GEN_INT (1), GEN_INT (0),
19343 GEN_INT (2+4), GEN_INT (3+4)));
19347 /* tmp = target = A B C D */
19348 tmp = copy_to_reg (target);
19349 /* tmp = X B C D */
19350 ix86_expand_vector_set (false, tmp, val, 0);
19351 /* target = A B X D */
19352 emit_insn (gen_sse_shufps_1 (target, target, tmp,
19353 GEN_INT (0), GEN_INT (1),
19354 GEN_INT (0+4), GEN_INT (3+4)));
19358 /* tmp = target = A B C D */
19359 tmp = copy_to_reg (target);
19360 /* tmp = X B C D */
19361 ix86_expand_vector_set (false, tmp, val, 0);
19362 /* target = A B X D */
19363 emit_insn (gen_sse_shufps_1 (target, target, tmp,
19364 GEN_INT (0), GEN_INT (1),
19365 GEN_INT (2+4), GEN_INT (0+4)));
19369 gcc_unreachable ();
19374 /* Element 0 handled by vec_merge below. */
19377 use_vec_merge = true;
19383 /* With SSE2, use integer shuffles to swap element 0 and ELT,
19384 store into element 0, then shuffle them back. */
19388 order[0] = GEN_INT (elt);
19389 order[1] = const1_rtx;
19390 order[2] = const2_rtx;
19391 order[3] = GEN_INT (3);
19392 order[elt] = const0_rtx;
19394 emit_insn (gen_sse2_pshufd_1 (target, target, order[0],
19395 order[1], order[2], order[3]));
19397 ix86_expand_vector_set (false, target, val, 0);
19399 emit_insn (gen_sse2_pshufd_1 (target, target, order[0],
19400 order[1], order[2], order[3]));
19404 /* For SSE1, we have to reuse the V4SF code. */
19405 ix86_expand_vector_set (false, gen_lowpart (V4SFmode, target),
19406 gen_lowpart (SFmode, val), elt);
19411 use_vec_merge = TARGET_SSE2;
19414 use_vec_merge = mmx_ok && (TARGET_SSE || TARGET_3DNOW_A);
19425 tmp = gen_rtx_VEC_DUPLICATE (mode, val);
19426 tmp = gen_rtx_VEC_MERGE (mode, tmp, target, GEN_INT (1 << elt));
19427 emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
19431 rtx mem = assign_stack_temp (mode, GET_MODE_SIZE (mode), false);
19433 emit_move_insn (mem, target);
19435 tmp = adjust_address (mem, inner_mode, elt*GET_MODE_SIZE (inner_mode));
19436 emit_move_insn (tmp, val);
19438 emit_move_insn (target, mem);
19443 ix86_expand_vector_extract (bool mmx_ok, rtx target, rtx vec, int elt)
19445 enum machine_mode mode = GET_MODE (vec);
19446 enum machine_mode inner_mode = GET_MODE_INNER (mode);
19447 bool use_vec_extr = false;
19460 use_vec_extr = true;
19472 tmp = gen_reg_rtx (mode);
19473 emit_insn (gen_sse_shufps_1 (tmp, vec, vec,
19474 GEN_INT (elt), GEN_INT (elt),
19475 GEN_INT (elt+4), GEN_INT (elt+4)));
19479 tmp = gen_reg_rtx (mode);
19480 emit_insn (gen_sse_unpckhps (tmp, vec, vec));
19484 gcc_unreachable ();
19487 use_vec_extr = true;
19502 tmp = gen_reg_rtx (mode);
19503 emit_insn (gen_sse2_pshufd_1 (tmp, vec,
19504 GEN_INT (elt), GEN_INT (elt),
19505 GEN_INT (elt), GEN_INT (elt)));
19509 tmp = gen_reg_rtx (mode);
19510 emit_insn (gen_sse2_punpckhdq (tmp, vec, vec));
19514 gcc_unreachable ();
19517 use_vec_extr = true;
19522 /* For SSE1, we have to reuse the V4SF code. */
19523 ix86_expand_vector_extract (false, gen_lowpart (SFmode, target),
19524 gen_lowpart (V4SFmode, vec), elt);
19530 use_vec_extr = TARGET_SSE2;
19533 use_vec_extr = mmx_ok && (TARGET_SSE || TARGET_3DNOW_A);
19538 /* ??? Could extract the appropriate HImode element and shift. */
19545 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, GEN_INT (elt)));
19546 tmp = gen_rtx_VEC_SELECT (inner_mode, vec, tmp);
19548 /* Let the rtl optimizers know about the zero extension performed. */
19549 if (inner_mode == HImode)
19551 tmp = gen_rtx_ZERO_EXTEND (SImode, tmp);
19552 target = gen_lowpart (SImode, target);
19555 emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
19559 rtx mem = assign_stack_temp (mode, GET_MODE_SIZE (mode), false);
19561 emit_move_insn (mem, vec);
19563 tmp = adjust_address (mem, inner_mode, elt*GET_MODE_SIZE (inner_mode));
19564 emit_move_insn (target, tmp);
19568 /* Expand a vector reduction on V4SFmode for SSE1. FN is the binary
19569 pattern to reduce; DEST is the destination; IN is the input vector. */
19572 ix86_expand_reduc_v4sf (rtx (*fn) (rtx, rtx, rtx), rtx dest, rtx in)
19574 rtx tmp1, tmp2, tmp3;
19576 tmp1 = gen_reg_rtx (V4SFmode);
19577 tmp2 = gen_reg_rtx (V4SFmode);
19578 tmp3 = gen_reg_rtx (V4SFmode);
19580 emit_insn (gen_sse_movhlps (tmp1, in, in));
19581 emit_insn (fn (tmp2, tmp1, in));
19583 emit_insn (gen_sse_shufps_1 (tmp3, tmp2, tmp2,
19584 GEN_INT (1), GEN_INT (1),
19585 GEN_INT (1+4), GEN_INT (1+4)));
19586 emit_insn (fn (dest, tmp2, tmp3));
19589 /* Target hook for scalar_mode_supported_p. */
19591 ix86_scalar_mode_supported_p (enum machine_mode mode)
19593 if (DECIMAL_FLOAT_MODE_P (mode))
19596 return default_scalar_mode_supported_p (mode);
19599 /* Implements target hook vector_mode_supported_p. */
19601 ix86_vector_mode_supported_p (enum machine_mode mode)
19603 if (TARGET_SSE && VALID_SSE_REG_MODE (mode))
19605 if (TARGET_SSE2 && VALID_SSE2_REG_MODE (mode))
19607 if (TARGET_MMX && VALID_MMX_REG_MODE (mode))
19609 if (TARGET_3DNOW && VALID_MMX_REG_MODE_3DNOW (mode))
19614 /* Worker function for TARGET_MD_ASM_CLOBBERS.
19616 We do this in the new i386 backend to maintain source compatibility
19617 with the old cc0-based compiler. */
19620 ix86_md_asm_clobbers (tree outputs ATTRIBUTE_UNUSED,
19621 tree inputs ATTRIBUTE_UNUSED,
19624 clobbers = tree_cons (NULL_TREE, build_string (5, "flags"),
19626 clobbers = tree_cons (NULL_TREE, build_string (4, "fpsr"),
19628 clobbers = tree_cons (NULL_TREE, build_string (7, "dirflag"),
19633 /* Return true if this goes in small data/bss. */
19636 ix86_in_large_data_p (tree exp)
19638 if (ix86_cmodel != CM_MEDIUM && ix86_cmodel != CM_MEDIUM_PIC)
19641 /* Functions are never large data. */
19642 if (TREE_CODE (exp) == FUNCTION_DECL)
19645 if (TREE_CODE (exp) == VAR_DECL && DECL_SECTION_NAME (exp))
19647 const char *section = TREE_STRING_POINTER (DECL_SECTION_NAME (exp));
19648 if (strcmp (section, ".ldata") == 0
19649 || strcmp (section, ".lbss") == 0)
19655 HOST_WIDE_INT size = int_size_in_bytes (TREE_TYPE (exp));
19657 /* If this is an incomplete type with size 0, then we can't put it
19658 in data because it might be too big when completed. */
19659 if (!size || size > ix86_section_threshold)
19666 ix86_encode_section_info (tree decl, rtx rtl, int first)
19668 default_encode_section_info (decl, rtl, first);
19670 if (TREE_CODE (decl) == VAR_DECL
19671 && (TREE_STATIC (decl) || DECL_EXTERNAL (decl))
19672 && ix86_in_large_data_p (decl))
19673 SYMBOL_REF_FLAGS (XEXP (rtl, 0)) |= SYMBOL_FLAG_FAR_ADDR;
19676 /* Worker function for REVERSE_CONDITION. */
19679 ix86_reverse_condition (enum rtx_code code, enum machine_mode mode)
19681 return (mode != CCFPmode && mode != CCFPUmode
19682 ? reverse_condition (code)
19683 : reverse_condition_maybe_unordered (code));
19686 /* Output code to perform an x87 FP register move, from OPERANDS[1]
19690 output_387_reg_move (rtx insn, rtx *operands)
19692 if (REG_P (operands[1])
19693 && find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
19695 if (REGNO (operands[0]) == FIRST_STACK_REG)
19696 return output_387_ffreep (operands, 0);
19697 return "fstp\t%y0";
19699 if (STACK_TOP_P (operands[0]))
19700 return "fld%z1\t%y1";
19704 /* Output code to perform a conditional jump to LABEL, if C2 flag in
19705 FP status register is set. */
19708 ix86_emit_fp_unordered_jump (rtx label)
19710 rtx reg = gen_reg_rtx (HImode);
19713 emit_insn (gen_x86_fnstsw_1 (reg));
19715 if (TARGET_USE_SAHF)
19717 emit_insn (gen_x86_sahf_1 (reg));
19719 temp = gen_rtx_REG (CCmode, FLAGS_REG);
19720 temp = gen_rtx_UNORDERED (VOIDmode, temp, const0_rtx);
19724 emit_insn (gen_testqi_ext_ccno_0 (reg, GEN_INT (0x04)));
19726 temp = gen_rtx_REG (CCNOmode, FLAGS_REG);
19727 temp = gen_rtx_NE (VOIDmode, temp, const0_rtx);
19730 temp = gen_rtx_IF_THEN_ELSE (VOIDmode, temp,
19731 gen_rtx_LABEL_REF (VOIDmode, label),
19733 temp = gen_rtx_SET (VOIDmode, pc_rtx, temp);
19734 emit_jump_insn (temp);
19737 /* Output code to perform a log1p XFmode calculation. */
19739 void ix86_emit_i387_log1p (rtx op0, rtx op1)
19741 rtx label1 = gen_label_rtx ();
19742 rtx label2 = gen_label_rtx ();
19744 rtx tmp = gen_reg_rtx (XFmode);
19745 rtx tmp2 = gen_reg_rtx (XFmode);
19747 emit_insn (gen_absxf2 (tmp, op1));
19748 emit_insn (gen_cmpxf (tmp,
19749 CONST_DOUBLE_FROM_REAL_VALUE (
19750 REAL_VALUE_ATOF ("0.29289321881345247561810596348408353", XFmode),
19752 emit_jump_insn (gen_bge (label1));
19754 emit_move_insn (tmp2, standard_80387_constant_rtx (4)); /* fldln2 */
19755 emit_insn (gen_fyl2xp1_xf3 (op0, tmp2, op1));
19756 emit_jump (label2);
19758 emit_label (label1);
19759 emit_move_insn (tmp, CONST1_RTX (XFmode));
19760 emit_insn (gen_addxf3 (tmp, op1, tmp));
19761 emit_move_insn (tmp2, standard_80387_constant_rtx (4)); /* fldln2 */
19762 emit_insn (gen_fyl2x_xf3 (op0, tmp2, tmp));
19764 emit_label (label2);
19767 /* Solaris implementation of TARGET_ASM_NAMED_SECTION. */
19770 i386_solaris_elf_named_section (const char *name, unsigned int flags,
19773 /* With Binutils 2.15, the "@unwind" marker must be specified on
19774 every occurrence of the ".eh_frame" section, not just the first
19777 && strcmp (name, ".eh_frame") == 0)
19779 fprintf (asm_out_file, "\t.section\t%s,\"%s\",@unwind\n", name,
19780 flags & SECTION_WRITE ? "aw" : "a");
19783 default_elf_asm_named_section (name, flags, decl);
19786 /* Return the mangling of TYPE if it is an extended fundamental type. */
19788 static const char *
19789 ix86_mangle_fundamental_type (tree type)
19791 switch (TYPE_MODE (type))
19794 /* __float128 is "g". */
19797 /* "long double" or __float80 is "e". */
19804 /* For 32-bit code we can save PIC register setup by using
19805 __stack_chk_fail_local hidden function instead of calling
19806 __stack_chk_fail directly. 64-bit code doesn't need to setup any PIC
19807 register, so it is better to call __stack_chk_fail directly. */
19810 ix86_stack_protect_fail (void)
19812 return TARGET_64BIT
19813 ? default_external_stack_protect_fail ()
19814 : default_hidden_stack_protect_fail ();
19817 /* Select a format to encode pointers in exception handling data. CODE
19818 is 0 for data, 1 for code labels, 2 for function pointers. GLOBAL is
19819 true if the symbol may be affected by dynamic relocations.
19821 ??? All x86 object file formats are capable of representing this.
19822 After all, the relocation needed is the same as for the call insn.
19823 Whether or not a particular assembler allows us to enter such, I
19824 guess we'll have to see. */
19826 asm_preferred_eh_data_format (int code, int global)
19830 int type = DW_EH_PE_sdata8;
19832 || ix86_cmodel == CM_SMALL_PIC
19833 || (ix86_cmodel == CM_MEDIUM_PIC && (global || code)))
19834 type = DW_EH_PE_sdata4;
19835 return (global ? DW_EH_PE_indirect : 0) | DW_EH_PE_pcrel | type;
19837 if (ix86_cmodel == CM_SMALL
19838 || (ix86_cmodel == CM_MEDIUM && code))
19839 return DW_EH_PE_udata4;
19840 return DW_EH_PE_absptr;
19843 #include "gt-i386.h"