1 /* Subroutines used for code generation on IA-32.
2 Copyright (C) 1988, 1992, 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001,
3 2002, 2003, 2004, 2005, 2006, 2007 Free Software Foundation, Inc.
5 This file is part of GCC.
7 GCC is free software; you can redistribute it and/or modify
8 it under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 2, or (at your option)
12 GCC is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 GNU General Public License for more details.
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING. If not, write to
19 the Free Software Foundation, 51 Franklin Street, Fifth Floor,
20 Boston, MA 02110-1301, USA. */
26 #include "coretypes.h"
32 #include "hard-reg-set.h"
34 #include "insn-config.h"
35 #include "conditions.h"
37 #include "insn-codes.h"
38 #include "insn-attr.h"
46 #include "basic-block.h"
49 #include "target-def.h"
50 #include "langhooks.h"
52 #include "tree-gimple.h"
54 #include "tm-constrs.h"
56 #ifndef CHECK_STACK_LIMIT
57 #define CHECK_STACK_LIMIT (-1)
60 /* Return index of given mode in mult and division cost tables. */
61 #define MODE_INDEX(mode) \
62 ((mode) == QImode ? 0 \
63 : (mode) == HImode ? 1 \
64 : (mode) == SImode ? 2 \
65 : (mode) == DImode ? 3 \
68 /* Processor costs (relative to an add) */
69 /* We assume COSTS_N_INSNS is defined as (N)*4 and an addition is 2 bytes. */
70 #define COSTS_N_BYTES(N) ((N) * 2)
73 struct processor_costs size_cost = { /* costs for tuning for size */
74 COSTS_N_BYTES (2), /* cost of an add instruction */
75 COSTS_N_BYTES (3), /* cost of a lea instruction */
76 COSTS_N_BYTES (2), /* variable shift costs */
77 COSTS_N_BYTES (3), /* constant shift costs */
78 {COSTS_N_BYTES (3), /* cost of starting multiply for QI */
79 COSTS_N_BYTES (3), /* HI */
80 COSTS_N_BYTES (3), /* SI */
81 COSTS_N_BYTES (3), /* DI */
82 COSTS_N_BYTES (5)}, /* other */
83 0, /* cost of multiply per each bit set */
84 {COSTS_N_BYTES (3), /* cost of a divide/mod for QI */
85 COSTS_N_BYTES (3), /* HI */
86 COSTS_N_BYTES (3), /* SI */
87 COSTS_N_BYTES (3), /* DI */
88 COSTS_N_BYTES (5)}, /* other */
89 COSTS_N_BYTES (3), /* cost of movsx */
90 COSTS_N_BYTES (3), /* cost of movzx */
93 2, /* cost for loading QImode using movzbl */
94 {2, 2, 2}, /* cost of loading integer registers
95 in QImode, HImode and SImode.
96 Relative to reg-reg move (2). */
97 {2, 2, 2}, /* cost of storing integer registers */
98 2, /* cost of reg,reg fld/fst */
99 {2, 2, 2}, /* cost of loading fp registers
100 in SFmode, DFmode and XFmode */
101 {2, 2, 2}, /* cost of storing fp registers
102 in SFmode, DFmode and XFmode */
103 3, /* cost of moving MMX register */
104 {3, 3}, /* cost of loading MMX registers
105 in SImode and DImode */
106 {3, 3}, /* cost of storing MMX registers
107 in SImode and DImode */
108 3, /* cost of moving SSE register */
109 {3, 3, 3}, /* cost of loading SSE registers
110 in SImode, DImode and TImode */
111 {3, 3, 3}, /* cost of storing SSE registers
112 in SImode, DImode and TImode */
113 3, /* MMX or SSE register to integer */
114 0, /* size of prefetch block */
115 0, /* number of parallel prefetches */
117 COSTS_N_BYTES (2), /* cost of FADD and FSUB insns. */
118 COSTS_N_BYTES (2), /* cost of FMUL instruction. */
119 COSTS_N_BYTES (2), /* cost of FDIV instruction. */
120 COSTS_N_BYTES (2), /* cost of FABS instruction. */
121 COSTS_N_BYTES (2), /* cost of FCHS instruction. */
122 COSTS_N_BYTES (2), /* cost of FSQRT instruction. */
125 /* Processor costs (relative to an add) */
127 struct processor_costs i386_cost = { /* 386 specific costs */
128 COSTS_N_INSNS (1), /* cost of an add instruction */
129 COSTS_N_INSNS (1), /* cost of a lea instruction */
130 COSTS_N_INSNS (3), /* variable shift costs */
131 COSTS_N_INSNS (2), /* constant shift costs */
132 {COSTS_N_INSNS (6), /* cost of starting multiply for QI */
133 COSTS_N_INSNS (6), /* HI */
134 COSTS_N_INSNS (6), /* SI */
135 COSTS_N_INSNS (6), /* DI */
136 COSTS_N_INSNS (6)}, /* other */
137 COSTS_N_INSNS (1), /* cost of multiply per each bit set */
138 {COSTS_N_INSNS (23), /* cost of a divide/mod for QI */
139 COSTS_N_INSNS (23), /* HI */
140 COSTS_N_INSNS (23), /* SI */
141 COSTS_N_INSNS (23), /* DI */
142 COSTS_N_INSNS (23)}, /* other */
143 COSTS_N_INSNS (3), /* cost of movsx */
144 COSTS_N_INSNS (2), /* cost of movzx */
145 15, /* "large" insn */
147 4, /* cost for loading QImode using movzbl */
148 {2, 4, 2}, /* cost of loading integer registers
149 in QImode, HImode and SImode.
150 Relative to reg-reg move (2). */
151 {2, 4, 2}, /* cost of storing integer registers */
152 2, /* cost of reg,reg fld/fst */
153 {8, 8, 8}, /* cost of loading fp registers
154 in SFmode, DFmode and XFmode */
155 {8, 8, 8}, /* cost of storing fp registers
156 in SFmode, DFmode and XFmode */
157 2, /* cost of moving MMX register */
158 {4, 8}, /* cost of loading MMX registers
159 in SImode and DImode */
160 {4, 8}, /* cost of storing MMX registers
161 in SImode and DImode */
162 2, /* cost of moving SSE register */
163 {4, 8, 16}, /* cost of loading SSE registers
164 in SImode, DImode and TImode */
165 {4, 8, 16}, /* cost of storing SSE registers
166 in SImode, DImode and TImode */
167 3, /* MMX or SSE register to integer */
168 0, /* size of prefetch block */
169 0, /* number of parallel prefetches */
171 COSTS_N_INSNS (23), /* cost of FADD and FSUB insns. */
172 COSTS_N_INSNS (27), /* cost of FMUL instruction. */
173 COSTS_N_INSNS (88), /* cost of FDIV instruction. */
174 COSTS_N_INSNS (22), /* cost of FABS instruction. */
175 COSTS_N_INSNS (24), /* cost of FCHS instruction. */
176 COSTS_N_INSNS (122), /* cost of FSQRT instruction. */
180 struct processor_costs i486_cost = { /* 486 specific costs */
181 COSTS_N_INSNS (1), /* cost of an add instruction */
182 COSTS_N_INSNS (1), /* cost of a lea instruction */
183 COSTS_N_INSNS (3), /* variable shift costs */
184 COSTS_N_INSNS (2), /* constant shift costs */
185 {COSTS_N_INSNS (12), /* cost of starting multiply for QI */
186 COSTS_N_INSNS (12), /* HI */
187 COSTS_N_INSNS (12), /* SI */
188 COSTS_N_INSNS (12), /* DI */
189 COSTS_N_INSNS (12)}, /* other */
190 1, /* cost of multiply per each bit set */
191 {COSTS_N_INSNS (40), /* cost of a divide/mod for QI */
192 COSTS_N_INSNS (40), /* HI */
193 COSTS_N_INSNS (40), /* SI */
194 COSTS_N_INSNS (40), /* DI */
195 COSTS_N_INSNS (40)}, /* other */
196 COSTS_N_INSNS (3), /* cost of movsx */
197 COSTS_N_INSNS (2), /* cost of movzx */
198 15, /* "large" insn */
200 4, /* cost for loading QImode using movzbl */
201 {2, 4, 2}, /* cost of loading integer registers
202 in QImode, HImode and SImode.
203 Relative to reg-reg move (2). */
204 {2, 4, 2}, /* cost of storing integer registers */
205 2, /* cost of reg,reg fld/fst */
206 {8, 8, 8}, /* cost of loading fp registers
207 in SFmode, DFmode and XFmode */
208 {8, 8, 8}, /* cost of storing fp registers
209 in SFmode, DFmode and XFmode */
210 2, /* cost of moving MMX register */
211 {4, 8}, /* cost of loading MMX registers
212 in SImode and DImode */
213 {4, 8}, /* cost of storing MMX registers
214 in SImode and DImode */
215 2, /* cost of moving SSE register */
216 {4, 8, 16}, /* cost of loading SSE registers
217 in SImode, DImode and TImode */
218 {4, 8, 16}, /* cost of storing SSE registers
219 in SImode, DImode and TImode */
220 3, /* MMX or SSE register to integer */
221 0, /* size of prefetch block */
222 0, /* number of parallel prefetches */
224 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
225 COSTS_N_INSNS (16), /* cost of FMUL instruction. */
226 COSTS_N_INSNS (73), /* cost of FDIV instruction. */
227 COSTS_N_INSNS (3), /* cost of FABS instruction. */
228 COSTS_N_INSNS (3), /* cost of FCHS instruction. */
229 COSTS_N_INSNS (83), /* cost of FSQRT instruction. */
233 struct processor_costs pentium_cost = {
234 COSTS_N_INSNS (1), /* cost of an add instruction */
235 COSTS_N_INSNS (1), /* cost of a lea instruction */
236 COSTS_N_INSNS (4), /* variable shift costs */
237 COSTS_N_INSNS (1), /* constant shift costs */
238 {COSTS_N_INSNS (11), /* cost of starting multiply for QI */
239 COSTS_N_INSNS (11), /* HI */
240 COSTS_N_INSNS (11), /* SI */
241 COSTS_N_INSNS (11), /* DI */
242 COSTS_N_INSNS (11)}, /* other */
243 0, /* cost of multiply per each bit set */
244 {COSTS_N_INSNS (25), /* cost of a divide/mod for QI */
245 COSTS_N_INSNS (25), /* HI */
246 COSTS_N_INSNS (25), /* SI */
247 COSTS_N_INSNS (25), /* DI */
248 COSTS_N_INSNS (25)}, /* other */
249 COSTS_N_INSNS (3), /* cost of movsx */
250 COSTS_N_INSNS (2), /* cost of movzx */
251 8, /* "large" insn */
253 6, /* cost for loading QImode using movzbl */
254 {2, 4, 2}, /* cost of loading integer registers
255 in QImode, HImode and SImode.
256 Relative to reg-reg move (2). */
257 {2, 4, 2}, /* cost of storing integer registers */
258 2, /* cost of reg,reg fld/fst */
259 {2, 2, 6}, /* cost of loading fp registers
260 in SFmode, DFmode and XFmode */
261 {4, 4, 6}, /* cost of storing fp registers
262 in SFmode, DFmode and XFmode */
263 8, /* cost of moving MMX register */
264 {8, 8}, /* cost of loading MMX registers
265 in SImode and DImode */
266 {8, 8}, /* cost of storing MMX registers
267 in SImode and DImode */
268 2, /* cost of moving SSE register */
269 {4, 8, 16}, /* cost of loading SSE registers
270 in SImode, DImode and TImode */
271 {4, 8, 16}, /* cost of storing SSE registers
272 in SImode, DImode and TImode */
273 3, /* MMX or SSE register to integer */
274 0, /* size of prefetch block */
275 0, /* number of parallel prefetches */
277 COSTS_N_INSNS (3), /* cost of FADD and FSUB insns. */
278 COSTS_N_INSNS (3), /* cost of FMUL instruction. */
279 COSTS_N_INSNS (39), /* cost of FDIV instruction. */
280 COSTS_N_INSNS (1), /* cost of FABS instruction. */
281 COSTS_N_INSNS (1), /* cost of FCHS instruction. */
282 COSTS_N_INSNS (70), /* cost of FSQRT instruction. */
286 struct processor_costs pentiumpro_cost = {
287 COSTS_N_INSNS (1), /* cost of an add instruction */
288 COSTS_N_INSNS (1), /* cost of a lea instruction */
289 COSTS_N_INSNS (1), /* variable shift costs */
290 COSTS_N_INSNS (1), /* constant shift costs */
291 {COSTS_N_INSNS (4), /* cost of starting multiply for QI */
292 COSTS_N_INSNS (4), /* HI */
293 COSTS_N_INSNS (4), /* SI */
294 COSTS_N_INSNS (4), /* DI */
295 COSTS_N_INSNS (4)}, /* other */
296 0, /* cost of multiply per each bit set */
297 {COSTS_N_INSNS (17), /* cost of a divide/mod for QI */
298 COSTS_N_INSNS (17), /* HI */
299 COSTS_N_INSNS (17), /* SI */
300 COSTS_N_INSNS (17), /* DI */
301 COSTS_N_INSNS (17)}, /* other */
302 COSTS_N_INSNS (1), /* cost of movsx */
303 COSTS_N_INSNS (1), /* cost of movzx */
304 8, /* "large" insn */
306 2, /* cost for loading QImode using movzbl */
307 {4, 4, 4}, /* cost of loading integer registers
308 in QImode, HImode and SImode.
309 Relative to reg-reg move (2). */
310 {2, 2, 2}, /* cost of storing integer registers */
311 2, /* cost of reg,reg fld/fst */
312 {2, 2, 6}, /* cost of loading fp registers
313 in SFmode, DFmode and XFmode */
314 {4, 4, 6}, /* cost of storing fp registers
315 in SFmode, DFmode and XFmode */
316 2, /* cost of moving MMX register */
317 {2, 2}, /* cost of loading MMX registers
318 in SImode and DImode */
319 {2, 2}, /* cost of storing MMX registers
320 in SImode and DImode */
321 2, /* cost of moving SSE register */
322 {2, 2, 8}, /* cost of loading SSE registers
323 in SImode, DImode and TImode */
324 {2, 2, 8}, /* cost of storing SSE registers
325 in SImode, DImode and TImode */
326 3, /* MMX or SSE register to integer */
327 32, /* size of prefetch block */
328 6, /* number of parallel prefetches */
330 COSTS_N_INSNS (3), /* cost of FADD and FSUB insns. */
331 COSTS_N_INSNS (5), /* cost of FMUL instruction. */
332 COSTS_N_INSNS (56), /* cost of FDIV instruction. */
333 COSTS_N_INSNS (2), /* cost of FABS instruction. */
334 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
335 COSTS_N_INSNS (56), /* cost of FSQRT instruction. */
339 struct processor_costs geode_cost = {
340 COSTS_N_INSNS (1), /* cost of an add instruction */
341 COSTS_N_INSNS (1), /* cost of a lea instruction */
342 COSTS_N_INSNS (2), /* variable shift costs */
343 COSTS_N_INSNS (1), /* constant shift costs */
344 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
345 COSTS_N_INSNS (4), /* HI */
346 COSTS_N_INSNS (7), /* SI */
347 COSTS_N_INSNS (7), /* DI */
348 COSTS_N_INSNS (7)}, /* other */
349 0, /* cost of multiply per each bit set */
350 {COSTS_N_INSNS (15), /* cost of a divide/mod for QI */
351 COSTS_N_INSNS (23), /* HI */
352 COSTS_N_INSNS (39), /* SI */
353 COSTS_N_INSNS (39), /* DI */
354 COSTS_N_INSNS (39)}, /* other */
355 COSTS_N_INSNS (1), /* cost of movsx */
356 COSTS_N_INSNS (1), /* cost of movzx */
357 8, /* "large" insn */
359 1, /* cost for loading QImode using movzbl */
360 {1, 1, 1}, /* cost of loading integer registers
361 in QImode, HImode and SImode.
362 Relative to reg-reg move (2). */
363 {1, 1, 1}, /* cost of storing integer registers */
364 1, /* cost of reg,reg fld/fst */
365 {1, 1, 1}, /* cost of loading fp registers
366 in SFmode, DFmode and XFmode */
367 {4, 6, 6}, /* cost of storing fp registers
368 in SFmode, DFmode and XFmode */
370 1, /* cost of moving MMX register */
371 {1, 1}, /* cost of loading MMX registers
372 in SImode and DImode */
373 {1, 1}, /* cost of storing MMX registers
374 in SImode and DImode */
375 1, /* cost of moving SSE register */
376 {1, 1, 1}, /* cost of loading SSE registers
377 in SImode, DImode and TImode */
378 {1, 1, 1}, /* cost of storing SSE registers
379 in SImode, DImode and TImode */
380 1, /* MMX or SSE register to integer */
381 32, /* size of prefetch block */
382 1, /* number of parallel prefetches */
384 COSTS_N_INSNS (6), /* cost of FADD and FSUB insns. */
385 COSTS_N_INSNS (11), /* cost of FMUL instruction. */
386 COSTS_N_INSNS (47), /* cost of FDIV instruction. */
387 COSTS_N_INSNS (1), /* cost of FABS instruction. */
388 COSTS_N_INSNS (1), /* cost of FCHS instruction. */
389 COSTS_N_INSNS (54), /* cost of FSQRT instruction. */
393 struct processor_costs k6_cost = {
394 COSTS_N_INSNS (1), /* cost of an add instruction */
395 COSTS_N_INSNS (2), /* cost of a lea instruction */
396 COSTS_N_INSNS (1), /* variable shift costs */
397 COSTS_N_INSNS (1), /* constant shift costs */
398 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
399 COSTS_N_INSNS (3), /* HI */
400 COSTS_N_INSNS (3), /* SI */
401 COSTS_N_INSNS (3), /* DI */
402 COSTS_N_INSNS (3)}, /* other */
403 0, /* cost of multiply per each bit set */
404 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
405 COSTS_N_INSNS (18), /* HI */
406 COSTS_N_INSNS (18), /* SI */
407 COSTS_N_INSNS (18), /* DI */
408 COSTS_N_INSNS (18)}, /* other */
409 COSTS_N_INSNS (2), /* cost of movsx */
410 COSTS_N_INSNS (2), /* cost of movzx */
411 8, /* "large" insn */
413 3, /* cost for loading QImode using movzbl */
414 {4, 5, 4}, /* cost of loading integer registers
415 in QImode, HImode and SImode.
416 Relative to reg-reg move (2). */
417 {2, 3, 2}, /* cost of storing integer registers */
418 4, /* cost of reg,reg fld/fst */
419 {6, 6, 6}, /* cost of loading fp registers
420 in SFmode, DFmode and XFmode */
421 {4, 4, 4}, /* cost of storing fp registers
422 in SFmode, DFmode and XFmode */
423 2, /* cost of moving MMX register */
424 {2, 2}, /* cost of loading MMX registers
425 in SImode and DImode */
426 {2, 2}, /* cost of storing MMX registers
427 in SImode and DImode */
428 2, /* cost of moving SSE register */
429 {2, 2, 8}, /* cost of loading SSE registers
430 in SImode, DImode and TImode */
431 {2, 2, 8}, /* cost of storing SSE registers
432 in SImode, DImode and TImode */
433 6, /* MMX or SSE register to integer */
434 32, /* size of prefetch block */
435 1, /* number of parallel prefetches */
437 COSTS_N_INSNS (2), /* cost of FADD and FSUB insns. */
438 COSTS_N_INSNS (2), /* cost of FMUL instruction. */
439 COSTS_N_INSNS (56), /* cost of FDIV instruction. */
440 COSTS_N_INSNS (2), /* cost of FABS instruction. */
441 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
442 COSTS_N_INSNS (56), /* cost of FSQRT instruction. */
446 struct processor_costs athlon_cost = {
447 COSTS_N_INSNS (1), /* cost of an add instruction */
448 COSTS_N_INSNS (2), /* cost of a lea instruction */
449 COSTS_N_INSNS (1), /* variable shift costs */
450 COSTS_N_INSNS (1), /* constant shift costs */
451 {COSTS_N_INSNS (5), /* cost of starting multiply for QI */
452 COSTS_N_INSNS (5), /* HI */
453 COSTS_N_INSNS (5), /* SI */
454 COSTS_N_INSNS (5), /* DI */
455 COSTS_N_INSNS (5)}, /* other */
456 0, /* cost of multiply per each bit set */
457 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
458 COSTS_N_INSNS (26), /* HI */
459 COSTS_N_INSNS (42), /* SI */
460 COSTS_N_INSNS (74), /* DI */
461 COSTS_N_INSNS (74)}, /* other */
462 COSTS_N_INSNS (1), /* cost of movsx */
463 COSTS_N_INSNS (1), /* cost of movzx */
464 8, /* "large" insn */
466 4, /* cost for loading QImode using movzbl */
467 {3, 4, 3}, /* cost of loading integer registers
468 in QImode, HImode and SImode.
469 Relative to reg-reg move (2). */
470 {3, 4, 3}, /* cost of storing integer registers */
471 4, /* cost of reg,reg fld/fst */
472 {4, 4, 12}, /* cost of loading fp registers
473 in SFmode, DFmode and XFmode */
474 {6, 6, 8}, /* cost of storing fp registers
475 in SFmode, DFmode and XFmode */
476 2, /* cost of moving MMX register */
477 {4, 4}, /* cost of loading MMX registers
478 in SImode and DImode */
479 {4, 4}, /* cost of storing MMX registers
480 in SImode and DImode */
481 2, /* cost of moving SSE register */
482 {4, 4, 6}, /* cost of loading SSE registers
483 in SImode, DImode and TImode */
484 {4, 4, 5}, /* cost of storing SSE registers
485 in SImode, DImode and TImode */
486 5, /* MMX or SSE register to integer */
487 64, /* size of prefetch block */
488 6, /* number of parallel prefetches */
490 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
491 COSTS_N_INSNS (4), /* cost of FMUL instruction. */
492 COSTS_N_INSNS (24), /* cost of FDIV instruction. */
493 COSTS_N_INSNS (2), /* cost of FABS instruction. */
494 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
495 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
499 struct processor_costs k8_cost = {
500 COSTS_N_INSNS (1), /* cost of an add instruction */
501 COSTS_N_INSNS (2), /* cost of a lea instruction */
502 COSTS_N_INSNS (1), /* variable shift costs */
503 COSTS_N_INSNS (1), /* constant shift costs */
504 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
505 COSTS_N_INSNS (4), /* HI */
506 COSTS_N_INSNS (3), /* SI */
507 COSTS_N_INSNS (4), /* DI */
508 COSTS_N_INSNS (5)}, /* other */
509 0, /* cost of multiply per each bit set */
510 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
511 COSTS_N_INSNS (26), /* HI */
512 COSTS_N_INSNS (42), /* SI */
513 COSTS_N_INSNS (74), /* DI */
514 COSTS_N_INSNS (74)}, /* other */
515 COSTS_N_INSNS (1), /* cost of movsx */
516 COSTS_N_INSNS (1), /* cost of movzx */
517 8, /* "large" insn */
519 4, /* cost for loading QImode using movzbl */
520 {3, 4, 3}, /* cost of loading integer registers
521 in QImode, HImode and SImode.
522 Relative to reg-reg move (2). */
523 {3, 4, 3}, /* cost of storing integer registers */
524 4, /* cost of reg,reg fld/fst */
525 {4, 4, 12}, /* cost of loading fp registers
526 in SFmode, DFmode and XFmode */
527 {6, 6, 8}, /* cost of storing fp registers
528 in SFmode, DFmode and XFmode */
529 2, /* cost of moving MMX register */
530 {3, 3}, /* cost of loading MMX registers
531 in SImode and DImode */
532 {4, 4}, /* cost of storing MMX registers
533 in SImode and DImode */
534 2, /* cost of moving SSE register */
535 {4, 3, 6}, /* cost of loading SSE registers
536 in SImode, DImode and TImode */
537 {4, 4, 5}, /* cost of storing SSE registers
538 in SImode, DImode and TImode */
539 5, /* MMX or SSE register to integer */
540 64, /* size of prefetch block */
541 6, /* number of parallel prefetches */
543 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
544 COSTS_N_INSNS (4), /* cost of FMUL instruction. */
545 COSTS_N_INSNS (19), /* cost of FDIV instruction. */
546 COSTS_N_INSNS (2), /* cost of FABS instruction. */
547 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
548 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
552 struct processor_costs pentium4_cost = {
553 COSTS_N_INSNS (1), /* cost of an add instruction */
554 COSTS_N_INSNS (3), /* cost of a lea instruction */
555 COSTS_N_INSNS (4), /* variable shift costs */
556 COSTS_N_INSNS (4), /* constant shift costs */
557 {COSTS_N_INSNS (15), /* cost of starting multiply for QI */
558 COSTS_N_INSNS (15), /* HI */
559 COSTS_N_INSNS (15), /* SI */
560 COSTS_N_INSNS (15), /* DI */
561 COSTS_N_INSNS (15)}, /* other */
562 0, /* cost of multiply per each bit set */
563 {COSTS_N_INSNS (56), /* cost of a divide/mod for QI */
564 COSTS_N_INSNS (56), /* HI */
565 COSTS_N_INSNS (56), /* SI */
566 COSTS_N_INSNS (56), /* DI */
567 COSTS_N_INSNS (56)}, /* other */
568 COSTS_N_INSNS (1), /* cost of movsx */
569 COSTS_N_INSNS (1), /* cost of movzx */
570 16, /* "large" insn */
572 2, /* cost for loading QImode using movzbl */
573 {4, 5, 4}, /* cost of loading integer registers
574 in QImode, HImode and SImode.
575 Relative to reg-reg move (2). */
576 {2, 3, 2}, /* cost of storing integer registers */
577 2, /* cost of reg,reg fld/fst */
578 {2, 2, 6}, /* cost of loading fp registers
579 in SFmode, DFmode and XFmode */
580 {4, 4, 6}, /* cost of storing fp registers
581 in SFmode, DFmode and XFmode */
582 2, /* cost of moving MMX register */
583 {2, 2}, /* cost of loading MMX registers
584 in SImode and DImode */
585 {2, 2}, /* cost of storing MMX registers
586 in SImode and DImode */
587 12, /* cost of moving SSE register */
588 {12, 12, 12}, /* cost of loading SSE registers
589 in SImode, DImode and TImode */
590 {2, 2, 8}, /* cost of storing SSE registers
591 in SImode, DImode and TImode */
592 10, /* MMX or SSE register to integer */
593 64, /* size of prefetch block */
594 6, /* number of parallel prefetches */
596 COSTS_N_INSNS (5), /* cost of FADD and FSUB insns. */
597 COSTS_N_INSNS (7), /* cost of FMUL instruction. */
598 COSTS_N_INSNS (43), /* cost of FDIV instruction. */
599 COSTS_N_INSNS (2), /* cost of FABS instruction. */
600 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
601 COSTS_N_INSNS (43), /* cost of FSQRT instruction. */
605 struct processor_costs nocona_cost = {
606 COSTS_N_INSNS (1), /* cost of an add instruction */
607 COSTS_N_INSNS (1), /* cost of a lea instruction */
608 COSTS_N_INSNS (1), /* variable shift costs */
609 COSTS_N_INSNS (1), /* constant shift costs */
610 {COSTS_N_INSNS (10), /* cost of starting multiply for QI */
611 COSTS_N_INSNS (10), /* HI */
612 COSTS_N_INSNS (10), /* SI */
613 COSTS_N_INSNS (10), /* DI */
614 COSTS_N_INSNS (10)}, /* other */
615 0, /* cost of multiply per each bit set */
616 {COSTS_N_INSNS (66), /* cost of a divide/mod for QI */
617 COSTS_N_INSNS (66), /* HI */
618 COSTS_N_INSNS (66), /* SI */
619 COSTS_N_INSNS (66), /* DI */
620 COSTS_N_INSNS (66)}, /* other */
621 COSTS_N_INSNS (1), /* cost of movsx */
622 COSTS_N_INSNS (1), /* cost of movzx */
623 16, /* "large" insn */
625 4, /* cost for loading QImode using movzbl */
626 {4, 4, 4}, /* cost of loading integer registers
627 in QImode, HImode and SImode.
628 Relative to reg-reg move (2). */
629 {4, 4, 4}, /* cost of storing integer registers */
630 3, /* cost of reg,reg fld/fst */
631 {12, 12, 12}, /* cost of loading fp registers
632 in SFmode, DFmode and XFmode */
633 {4, 4, 4}, /* cost of storing fp registers
634 in SFmode, DFmode and XFmode */
635 6, /* cost of moving MMX register */
636 {12, 12}, /* cost of loading MMX registers
637 in SImode and DImode */
638 {12, 12}, /* cost of storing MMX registers
639 in SImode and DImode */
640 6, /* cost of moving SSE register */
641 {12, 12, 12}, /* cost of loading SSE registers
642 in SImode, DImode and TImode */
643 {12, 12, 12}, /* cost of storing SSE registers
644 in SImode, DImode and TImode */
645 8, /* MMX or SSE register to integer */
646 128, /* size of prefetch block */
647 8, /* number of parallel prefetches */
649 COSTS_N_INSNS (6), /* cost of FADD and FSUB insns. */
650 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
651 COSTS_N_INSNS (40), /* cost of FDIV instruction. */
652 COSTS_N_INSNS (3), /* cost of FABS instruction. */
653 COSTS_N_INSNS (3), /* cost of FCHS instruction. */
654 COSTS_N_INSNS (44), /* cost of FSQRT instruction. */
658 struct processor_costs core2_cost = {
659 COSTS_N_INSNS (1), /* cost of an add instruction */
660 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
661 COSTS_N_INSNS (1), /* variable shift costs */
662 COSTS_N_INSNS (1), /* constant shift costs */
663 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
664 COSTS_N_INSNS (3), /* HI */
665 COSTS_N_INSNS (3), /* SI */
666 COSTS_N_INSNS (3), /* DI */
667 COSTS_N_INSNS (3)}, /* other */
668 0, /* cost of multiply per each bit set */
669 {COSTS_N_INSNS (22), /* cost of a divide/mod for QI */
670 COSTS_N_INSNS (22), /* HI */
671 COSTS_N_INSNS (22), /* SI */
672 COSTS_N_INSNS (22), /* DI */
673 COSTS_N_INSNS (22)}, /* other */
674 COSTS_N_INSNS (1), /* cost of movsx */
675 COSTS_N_INSNS (1), /* cost of movzx */
676 8, /* "large" insn */
678 2, /* cost for loading QImode using movzbl */
679 {6, 6, 6}, /* cost of loading integer registers
680 in QImode, HImode and SImode.
681 Relative to reg-reg move (2). */
682 {4, 4, 4}, /* cost of storing integer registers */
683 2, /* cost of reg,reg fld/fst */
684 {6, 6, 6}, /* cost of loading fp registers
685 in SFmode, DFmode and XFmode */
686 {4, 4, 4}, /* cost of loading integer registers */
687 2, /* cost of moving MMX register */
688 {6, 6}, /* cost of loading MMX registers
689 in SImode and DImode */
690 {4, 4}, /* cost of storing MMX registers
691 in SImode and DImode */
692 2, /* cost of moving SSE register */
693 {6, 6, 6}, /* cost of loading SSE registers
694 in SImode, DImode and TImode */
695 {4, 4, 4}, /* cost of storing SSE registers
696 in SImode, DImode and TImode */
697 2, /* MMX or SSE register to integer */
698 128, /* size of prefetch block */
699 8, /* number of parallel prefetches */
701 COSTS_N_INSNS (3), /* cost of FADD and FSUB insns. */
702 COSTS_N_INSNS (5), /* cost of FMUL instruction. */
703 COSTS_N_INSNS (32), /* cost of FDIV instruction. */
704 COSTS_N_INSNS (1), /* cost of FABS instruction. */
705 COSTS_N_INSNS (1), /* cost of FCHS instruction. */
706 COSTS_N_INSNS (58), /* cost of FSQRT instruction. */
709 /* Generic64 should produce code tuned for Nocona and K8. */
711 struct processor_costs generic64_cost = {
712 COSTS_N_INSNS (1), /* cost of an add instruction */
713 /* On all chips taken into consideration lea is 2 cycles and more. With
714 this cost however our current implementation of synth_mult results in
715 use of unnecessary temporary registers causing regression on several
716 SPECfp benchmarks. */
717 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
718 COSTS_N_INSNS (1), /* variable shift costs */
719 COSTS_N_INSNS (1), /* constant shift costs */
720 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
721 COSTS_N_INSNS (4), /* HI */
722 COSTS_N_INSNS (3), /* SI */
723 COSTS_N_INSNS (4), /* DI */
724 COSTS_N_INSNS (2)}, /* other */
725 0, /* cost of multiply per each bit set */
726 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
727 COSTS_N_INSNS (26), /* HI */
728 COSTS_N_INSNS (42), /* SI */
729 COSTS_N_INSNS (74), /* DI */
730 COSTS_N_INSNS (74)}, /* other */
731 COSTS_N_INSNS (1), /* cost of movsx */
732 COSTS_N_INSNS (1), /* cost of movzx */
733 8, /* "large" insn */
735 4, /* cost for loading QImode using movzbl */
736 {4, 4, 4}, /* cost of loading integer registers
737 in QImode, HImode and SImode.
738 Relative to reg-reg move (2). */
739 {4, 4, 4}, /* cost of storing integer registers */
740 4, /* cost of reg,reg fld/fst */
741 {12, 12, 12}, /* cost of loading fp registers
742 in SFmode, DFmode and XFmode */
743 {6, 6, 8}, /* cost of storing fp registers
744 in SFmode, DFmode and XFmode */
745 2, /* cost of moving MMX register */
746 {8, 8}, /* cost of loading MMX registers
747 in SImode and DImode */
748 {8, 8}, /* cost of storing MMX registers
749 in SImode and DImode */
750 2, /* cost of moving SSE register */
751 {8, 8, 8}, /* cost of loading SSE registers
752 in SImode, DImode and TImode */
753 {8, 8, 8}, /* cost of storing SSE registers
754 in SImode, DImode and TImode */
755 5, /* MMX or SSE register to integer */
756 64, /* size of prefetch block */
757 6, /* number of parallel prefetches */
758 /* Benchmarks shows large regressions on K8 sixtrack benchmark when this value
759 is increased to perhaps more appropriate value of 5. */
761 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
762 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
763 COSTS_N_INSNS (20), /* cost of FDIV instruction. */
764 COSTS_N_INSNS (8), /* cost of FABS instruction. */
765 COSTS_N_INSNS (8), /* cost of FCHS instruction. */
766 COSTS_N_INSNS (40), /* cost of FSQRT instruction. */
769 /* Generic32 should produce code tuned for Athlon, PPro, Pentium4, Nocona and K8. */
771 struct processor_costs generic32_cost = {
772 COSTS_N_INSNS (1), /* cost of an add instruction */
773 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
774 COSTS_N_INSNS (1), /* variable shift costs */
775 COSTS_N_INSNS (1), /* constant shift costs */
776 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
777 COSTS_N_INSNS (4), /* HI */
778 COSTS_N_INSNS (3), /* SI */
779 COSTS_N_INSNS (4), /* DI */
780 COSTS_N_INSNS (2)}, /* other */
781 0, /* cost of multiply per each bit set */
782 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
783 COSTS_N_INSNS (26), /* HI */
784 COSTS_N_INSNS (42), /* SI */
785 COSTS_N_INSNS (74), /* DI */
786 COSTS_N_INSNS (74)}, /* other */
787 COSTS_N_INSNS (1), /* cost of movsx */
788 COSTS_N_INSNS (1), /* cost of movzx */
789 8, /* "large" insn */
791 4, /* cost for loading QImode using movzbl */
792 {4, 4, 4}, /* cost of loading integer registers
793 in QImode, HImode and SImode.
794 Relative to reg-reg move (2). */
795 {4, 4, 4}, /* cost of storing integer registers */
796 4, /* cost of reg,reg fld/fst */
797 {12, 12, 12}, /* cost of loading fp registers
798 in SFmode, DFmode and XFmode */
799 {6, 6, 8}, /* cost of storing fp registers
800 in SFmode, DFmode and XFmode */
801 2, /* cost of moving MMX register */
802 {8, 8}, /* cost of loading MMX registers
803 in SImode and DImode */
804 {8, 8}, /* cost of storing MMX registers
805 in SImode and DImode */
806 2, /* cost of moving SSE register */
807 {8, 8, 8}, /* cost of loading SSE registers
808 in SImode, DImode and TImode */
809 {8, 8, 8}, /* cost of storing SSE registers
810 in SImode, DImode and TImode */
811 5, /* MMX or SSE register to integer */
812 64, /* size of prefetch block */
813 6, /* number of parallel prefetches */
815 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
816 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
817 COSTS_N_INSNS (20), /* cost of FDIV instruction. */
818 COSTS_N_INSNS (8), /* cost of FABS instruction. */
819 COSTS_N_INSNS (8), /* cost of FCHS instruction. */
820 COSTS_N_INSNS (40), /* cost of FSQRT instruction. */
823 const struct processor_costs *ix86_cost = &pentium_cost;
825 /* Processor feature/optimization bitmasks. */
826 #define m_386 (1<<PROCESSOR_I386)
827 #define m_486 (1<<PROCESSOR_I486)
828 #define m_PENT (1<<PROCESSOR_PENTIUM)
829 #define m_PPRO (1<<PROCESSOR_PENTIUMPRO)
830 #define m_GEODE (1<<PROCESSOR_GEODE)
831 #define m_K6_GEODE (m_K6 | m_GEODE)
832 #define m_K6 (1<<PROCESSOR_K6)
833 #define m_ATHLON (1<<PROCESSOR_ATHLON)
834 #define m_PENT4 (1<<PROCESSOR_PENTIUM4)
835 #define m_K8 (1<<PROCESSOR_K8)
836 #define m_ATHLON_K8 (m_K8 | m_ATHLON)
837 #define m_NOCONA (1<<PROCESSOR_NOCONA)
838 #define m_CORE2 (1<<PROCESSOR_CORE2)
839 #define m_GENERIC32 (1<<PROCESSOR_GENERIC32)
840 #define m_GENERIC64 (1<<PROCESSOR_GENERIC64)
841 #define m_GENERIC (m_GENERIC32 | m_GENERIC64)
843 /* Generic instruction choice should be common subset of supported CPUs
844 (PPro/PENT4/NOCONA/CORE2/Athlon/K8). */
846 /* Leave is not affecting Nocona SPEC2000 results negatively, so enabling for
847 Generic64 seems like good code size tradeoff. We can't enable it for 32bit
848 generic because it is not working well with PPro base chips. */
849 const int x86_use_leave = m_386 | m_K6_GEODE | m_ATHLON_K8 | m_CORE2 | m_GENERIC64;
850 const int x86_push_memory = m_386 | m_K6_GEODE | m_ATHLON_K8 | m_PENT4 | m_NOCONA | m_CORE2 | m_GENERIC;
851 const int x86_zero_extend_with_and = m_486 | m_PENT;
852 const int x86_movx = m_ATHLON_K8 | m_PPRO | m_PENT4 | m_NOCONA | m_CORE2 | m_GENERIC | m_GEODE /* m_386 | m_K6 */;
853 const int x86_double_with_add = ~m_386;
854 const int x86_use_bit_test = m_386;
855 const int x86_unroll_strlen = m_486 | m_PENT | m_PPRO | m_ATHLON_K8 | m_K6 | m_CORE2 | m_GENERIC;
856 const int x86_cmove = m_PPRO | m_GEODE | m_ATHLON_K8 | m_PENT4 | m_NOCONA;
857 const int x86_3dnow_a = m_ATHLON_K8;
858 const int x86_deep_branch = m_PPRO | m_K6_GEODE | m_ATHLON_K8 | m_PENT4 | m_NOCONA | m_CORE2 | m_GENERIC;
859 /* Branch hints were put in P4 based on simulation result. But
860 after P4 was made, no performance benefit was observed with
861 branch hints. It also increases the code size. As the result,
862 icc never generates branch hints. */
863 const int x86_branch_hints = 0;
864 const int x86_use_sahf = m_PPRO | m_K6_GEODE | m_PENT4 | m_NOCONA | m_GENERIC32; /*m_GENERIC | m_ATHLON_K8 ? */
865 /* We probably ought to watch for partial register stalls on Generic32
866 compilation setting as well. However in current implementation the
867 partial register stalls are not eliminated very well - they can
868 be introduced via subregs synthesized by combine and can happen
869 in caller/callee saving sequences.
870 Because this option pays back little on PPro based chips and is in conflict
871 with partial reg. dependencies used by Athlon/P4 based chips, it is better
872 to leave it off for generic32 for now. */
873 const int x86_partial_reg_stall = m_PPRO;
874 const int x86_partial_flag_reg_stall = m_CORE2 | m_GENERIC;
875 const int x86_use_himode_fiop = m_386 | m_486 | m_K6_GEODE;
876 const int x86_use_simode_fiop = ~(m_PPRO | m_ATHLON_K8 | m_PENT | m_CORE2 | m_GENERIC);
877 const int x86_use_mov0 = m_K6;
878 const int x86_use_cltd = ~(m_PENT | m_K6 | m_CORE2 | m_GENERIC);
879 const int x86_read_modify_write = ~m_PENT;
880 const int x86_read_modify = ~(m_PENT | m_PPRO);
881 const int x86_split_long_moves = m_PPRO;
882 const int x86_promote_QImode = m_K6_GEODE | m_PENT | m_386 | m_486 | m_ATHLON_K8 | m_CORE2 | m_GENERIC; /* m_PENT4 ? */
883 const int x86_fast_prefix = ~(m_PENT | m_486 | m_386);
884 const int x86_single_stringop = m_386 | m_PENT4 | m_NOCONA;
885 const int x86_qimode_math = ~(0);
886 const int x86_promote_qi_regs = 0;
887 /* On PPro this flag is meant to avoid partial register stalls. Just like
888 the x86_partial_reg_stall this option might be considered for Generic32
889 if our scheme for avoiding partial stalls was more effective. */
890 const int x86_himode_math = ~(m_PPRO);
891 const int x86_promote_hi_regs = m_PPRO;
892 const int x86_sub_esp_4 = m_ATHLON_K8 | m_PPRO | m_PENT4 | m_NOCONA | m_CORE2 | m_GENERIC;
893 const int x86_sub_esp_8 = m_ATHLON_K8 | m_PPRO | m_386 | m_486 | m_PENT4 | m_NOCONA | m_CORE2 | m_GENERIC;
894 const int x86_add_esp_4 = m_ATHLON_K8 | m_K6_GEODE | m_PENT4 | m_NOCONA | m_CORE2 | m_GENERIC;
895 const int x86_add_esp_8 = m_ATHLON_K8 | m_PPRO | m_K6_GEODE | m_386 | m_486 | m_PENT4 | m_NOCONA | m_CORE2 | m_GENERIC;
896 const int x86_integer_DFmode_moves = ~(m_ATHLON_K8 | m_PENT4 | m_NOCONA | m_PPRO | m_CORE2 | m_GENERIC | m_GEODE);
897 const int x86_partial_reg_dependency = m_ATHLON_K8 | m_PENT4 | m_NOCONA | m_CORE2 | m_GENERIC;
898 const int x86_memory_mismatch_stall = m_ATHLON_K8 | m_PENT4 | m_NOCONA | m_CORE2 | m_GENERIC;
899 const int x86_accumulate_outgoing_args = m_ATHLON_K8 | m_PENT4 | m_NOCONA | m_PPRO | m_CORE2 | m_GENERIC;
900 const int x86_prologue_using_move = m_ATHLON_K8 | m_PPRO | m_CORE2 | m_GENERIC;
901 const int x86_epilogue_using_move = m_ATHLON_K8 | m_PPRO | m_CORE2 | m_GENERIC;
902 const int x86_shift1 = ~m_486;
903 const int x86_arch_always_fancy_math_387 = m_PENT | m_PPRO | m_ATHLON_K8 | m_PENT4 | m_NOCONA | m_CORE2 | m_GENERIC;
904 /* In Generic model we have an conflict here in between PPro/Pentium4 based chips
905 that thread 128bit SSE registers as single units versus K8 based chips that
906 divide SSE registers to two 64bit halves.
907 x86_sse_partial_reg_dependency promote all store destinations to be 128bit
908 to allow register renaming on 128bit SSE units, but usually results in one
909 extra microop on 64bit SSE units. Experimental results shows that disabling
910 this option on P4 brings over 20% SPECfp regression, while enabling it on
911 K8 brings roughly 2.4% regression that can be partly masked by careful scheduling
913 const int x86_sse_partial_reg_dependency = m_PENT4 | m_NOCONA | m_PPRO | m_CORE2 | m_GENERIC;
914 /* Set for machines where the type and dependencies are resolved on SSE
915 register parts instead of whole registers, so we may maintain just
916 lower part of scalar values in proper format leaving the upper part
918 const int x86_sse_split_regs = m_ATHLON_K8;
919 const int x86_sse_typeless_stores = m_ATHLON_K8;
920 const int x86_sse_load0_by_pxor = m_PPRO | m_PENT4 | m_NOCONA;
921 const int x86_use_ffreep = m_ATHLON_K8;
922 const int x86_rep_movl_optimal = m_386 | m_PENT | m_PPRO | m_K6_GEODE | m_CORE2;
923 const int x86_use_incdec = ~(m_PENT4 | m_NOCONA | m_CORE2 | m_GENERIC);
925 /* ??? Allowing interunit moves makes it all too easy for the compiler to put
926 integer data in xmm registers. Which results in pretty abysmal code. */
927 const int x86_inter_unit_moves = 0 /* ~(m_ATHLON_K8) */;
929 const int x86_ext_80387_constants = m_K6_GEODE | m_ATHLON | m_PENT4 | m_NOCONA | m_CORE2 | m_PPRO | m_GENERIC32;
930 /* Some CPU cores are not able to predict more than 4 branch instructions in
931 the 16 byte window. */
932 const int x86_four_jump_limit = m_PPRO | m_ATHLON_K8 | m_PENT4 | m_NOCONA | m_CORE2 | m_GENERIC;
933 const int x86_schedule = m_PPRO | m_ATHLON_K8 | m_K6_GEODE | m_PENT | m_CORE2 | m_GENERIC;
934 const int x86_use_bt = m_ATHLON_K8;
935 /* Compare and exchange was added for 80486. */
936 const int x86_cmpxchg = ~m_386;
937 /* Compare and exchange 8 bytes was added for pentium. */
938 const int x86_cmpxchg8b = ~(m_386 | m_486);
939 /* Compare and exchange 16 bytes was added for nocona. */
940 const int x86_cmpxchg16b = m_NOCONA | m_CORE2;
941 /* Exchange and add was added for 80486. */
942 const int x86_xadd = ~m_386;
943 const int x86_pad_returns = m_ATHLON_K8 | m_CORE2 | m_GENERIC;
945 /* In case the average insn count for single function invocation is
946 lower than this constant, emit fast (but longer) prologue and
948 #define FAST_PROLOGUE_INSN_COUNT 20
950 /* Names for 8 (low), 8 (high), and 16-bit registers, respectively. */
951 static const char *const qi_reg_name[] = QI_REGISTER_NAMES;
952 static const char *const qi_high_reg_name[] = QI_HIGH_REGISTER_NAMES;
953 static const char *const hi_reg_name[] = HI_REGISTER_NAMES;
955 /* Array of the smallest class containing reg number REGNO, indexed by
956 REGNO. Used by REGNO_REG_CLASS in i386.h. */
958 enum reg_class const regclass_map[FIRST_PSEUDO_REGISTER] =
961 AREG, DREG, CREG, BREG,
963 SIREG, DIREG, NON_Q_REGS, NON_Q_REGS,
965 FP_TOP_REG, FP_SECOND_REG, FLOAT_REGS, FLOAT_REGS,
966 FLOAT_REGS, FLOAT_REGS, FLOAT_REGS, FLOAT_REGS,
969 /* flags, fpsr, dirflag, frame */
970 NO_REGS, NO_REGS, NO_REGS, NON_Q_REGS,
971 SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS,
973 MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS,
975 NON_Q_REGS, NON_Q_REGS, NON_Q_REGS, NON_Q_REGS,
976 NON_Q_REGS, NON_Q_REGS, NON_Q_REGS, NON_Q_REGS,
977 SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS,
981 /* The "default" register map used in 32bit mode. */
983 int const dbx_register_map[FIRST_PSEUDO_REGISTER] =
985 0, 2, 1, 3, 6, 7, 4, 5, /* general regs */
986 12, 13, 14, 15, 16, 17, 18, 19, /* fp regs */
987 -1, -1, -1, -1, -1, /* arg, flags, fpsr, dir, frame */
988 21, 22, 23, 24, 25, 26, 27, 28, /* SSE */
989 29, 30, 31, 32, 33, 34, 35, 36, /* MMX */
990 -1, -1, -1, -1, -1, -1, -1, -1, /* extended integer registers */
991 -1, -1, -1, -1, -1, -1, -1, -1, /* extended SSE registers */
994 static int const x86_64_int_parameter_registers[6] =
996 5 /*RDI*/, 4 /*RSI*/, 1 /*RDX*/, 2 /*RCX*/,
997 FIRST_REX_INT_REG /*R8 */, FIRST_REX_INT_REG + 1 /*R9 */
1000 static int const x86_64_int_return_registers[4] =
1002 0 /*RAX*/, 1 /*RDI*/, 5 /*RDI*/, 4 /*RSI*/
1005 /* The "default" register map used in 64bit mode. */
1006 int const dbx64_register_map[FIRST_PSEUDO_REGISTER] =
1008 0, 1, 2, 3, 4, 5, 6, 7, /* general regs */
1009 33, 34, 35, 36, 37, 38, 39, 40, /* fp regs */
1010 -1, -1, -1, -1, -1, /* arg, flags, fpsr, dir, frame */
1011 17, 18, 19, 20, 21, 22, 23, 24, /* SSE */
1012 41, 42, 43, 44, 45, 46, 47, 48, /* MMX */
1013 8,9,10,11,12,13,14,15, /* extended integer registers */
1014 25, 26, 27, 28, 29, 30, 31, 32, /* extended SSE registers */
1017 /* Define the register numbers to be used in Dwarf debugging information.
1018 The SVR4 reference port C compiler uses the following register numbers
1019 in its Dwarf output code:
1020 0 for %eax (gcc regno = 0)
1021 1 for %ecx (gcc regno = 2)
1022 2 for %edx (gcc regno = 1)
1023 3 for %ebx (gcc regno = 3)
1024 4 for %esp (gcc regno = 7)
1025 5 for %ebp (gcc regno = 6)
1026 6 for %esi (gcc regno = 4)
1027 7 for %edi (gcc regno = 5)
1028 The following three DWARF register numbers are never generated by
1029 the SVR4 C compiler or by the GNU compilers, but SDB on x86/svr4
1030 believes these numbers have these meanings.
1031 8 for %eip (no gcc equivalent)
1032 9 for %eflags (gcc regno = 17)
1033 10 for %trapno (no gcc equivalent)
1034 It is not at all clear how we should number the FP stack registers
1035 for the x86 architecture. If the version of SDB on x86/svr4 were
1036 a bit less brain dead with respect to floating-point then we would
1037 have a precedent to follow with respect to DWARF register numbers
1038 for x86 FP registers, but the SDB on x86/svr4 is so completely
1039 broken with respect to FP registers that it is hardly worth thinking
1040 of it as something to strive for compatibility with.
1041 The version of x86/svr4 SDB I have at the moment does (partially)
1042 seem to believe that DWARF register number 11 is associated with
1043 the x86 register %st(0), but that's about all. Higher DWARF
1044 register numbers don't seem to be associated with anything in
1045 particular, and even for DWARF regno 11, SDB only seems to under-
1046 stand that it should say that a variable lives in %st(0) (when
1047 asked via an `=' command) if we said it was in DWARF regno 11,
1048 but SDB still prints garbage when asked for the value of the
1049 variable in question (via a `/' command).
1050 (Also note that the labels SDB prints for various FP stack regs
1051 when doing an `x' command are all wrong.)
1052 Note that these problems generally don't affect the native SVR4
1053 C compiler because it doesn't allow the use of -O with -g and
1054 because when it is *not* optimizing, it allocates a memory
1055 location for each floating-point variable, and the memory
1056 location is what gets described in the DWARF AT_location
1057 attribute for the variable in question.
1058 Regardless of the severe mental illness of the x86/svr4 SDB, we
1059 do something sensible here and we use the following DWARF
1060 register numbers. Note that these are all stack-top-relative
1062 11 for %st(0) (gcc regno = 8)
1063 12 for %st(1) (gcc regno = 9)
1064 13 for %st(2) (gcc regno = 10)
1065 14 for %st(3) (gcc regno = 11)
1066 15 for %st(4) (gcc regno = 12)
1067 16 for %st(5) (gcc regno = 13)
1068 17 for %st(6) (gcc regno = 14)
1069 18 for %st(7) (gcc regno = 15)
1071 int const svr4_dbx_register_map[FIRST_PSEUDO_REGISTER] =
1073 0, 2, 1, 3, 6, 7, 5, 4, /* general regs */
1074 11, 12, 13, 14, 15, 16, 17, 18, /* fp regs */
1075 -1, 9, -1, -1, -1, /* arg, flags, fpsr, dir, frame */
1076 21, 22, 23, 24, 25, 26, 27, 28, /* SSE registers */
1077 29, 30, 31, 32, 33, 34, 35, 36, /* MMX registers */
1078 -1, -1, -1, -1, -1, -1, -1, -1, /* extended integer registers */
1079 -1, -1, -1, -1, -1, -1, -1, -1, /* extended SSE registers */
1082 /* Test and compare insns in i386.md store the information needed to
1083 generate branch and scc insns here. */
1085 rtx ix86_compare_op0 = NULL_RTX;
1086 rtx ix86_compare_op1 = NULL_RTX;
1087 rtx ix86_compare_emitted = NULL_RTX;
1089 /* Size of the register save area. */
1090 #define X86_64_VARARGS_SIZE (REGPARM_MAX * UNITS_PER_WORD + SSE_REGPARM_MAX * 16)
1092 /* Define the structure for the machine field in struct function. */
1094 struct stack_local_entry GTY(())
1096 unsigned short mode;
1099 struct stack_local_entry *next;
1102 /* Structure describing stack frame layout.
1103 Stack grows downward:
1109 saved frame pointer if frame_pointer_needed
1110 <- HARD_FRAME_POINTER
1115 [va_arg registers] (
1116 > to_allocate <- FRAME_POINTER
1126 HOST_WIDE_INT frame;
1128 int outgoing_arguments_size;
1131 HOST_WIDE_INT to_allocate;
1132 /* The offsets relative to ARG_POINTER. */
1133 HOST_WIDE_INT frame_pointer_offset;
1134 HOST_WIDE_INT hard_frame_pointer_offset;
1135 HOST_WIDE_INT stack_pointer_offset;
1137 /* When save_regs_using_mov is set, emit prologue using
1138 move instead of push instructions. */
1139 bool save_regs_using_mov;
1142 /* Code model option. */
1143 enum cmodel ix86_cmodel;
1145 enum asm_dialect ix86_asm_dialect = ASM_ATT;
1147 enum tls_dialect ix86_tls_dialect = TLS_DIALECT_GNU;
1149 /* Which unit we are generating floating point math for. */
1150 enum fpmath_unit ix86_fpmath;
1152 /* Which cpu are we scheduling for. */
1153 enum processor_type ix86_tune;
1154 /* Which instruction set architecture to use. */
1155 enum processor_type ix86_arch;
1157 /* true if sse prefetch instruction is not NOOP. */
1158 int x86_prefetch_sse;
1160 /* ix86_regparm_string as a number */
1161 static int ix86_regparm;
1163 /* -mstackrealign option */
1164 extern int ix86_force_align_arg_pointer;
1165 static const char ix86_force_align_arg_pointer_string[] = "force_align_arg_pointer";
1167 /* Preferred alignment for stack boundary in bits. */
1168 unsigned int ix86_preferred_stack_boundary;
1170 /* Values 1-5: see jump.c */
1171 int ix86_branch_cost;
1173 /* Variables which are this size or smaller are put in the data/bss
1174 or ldata/lbss sections. */
1176 int ix86_section_threshold = 65536;
1178 /* Prefix built by ASM_GENERATE_INTERNAL_LABEL. */
1179 char internal_label_prefix[16];
1180 int internal_label_prefix_len;
1182 static bool ix86_handle_option (size_t, const char *, int);
1183 static void output_pic_addr_const (FILE *, rtx, int);
1184 static void put_condition_code (enum rtx_code, enum machine_mode,
1186 static const char *get_some_local_dynamic_name (void);
1187 static int get_some_local_dynamic_name_1 (rtx *, void *);
1188 static rtx ix86_expand_int_compare (enum rtx_code, rtx, rtx);
1189 static enum rtx_code ix86_prepare_fp_compare_args (enum rtx_code, rtx *,
1191 static bool ix86_fixed_condition_code_regs (unsigned int *, unsigned int *);
1192 static enum machine_mode ix86_cc_modes_compatible (enum machine_mode,
1194 static rtx get_thread_pointer (int);
1195 static rtx legitimize_tls_address (rtx, enum tls_model, int);
1196 static void get_pc_thunk_name (char [32], unsigned int);
1197 static rtx gen_push (rtx);
1198 static int ix86_flags_dependent (rtx, rtx, enum attr_type);
1199 static int ix86_agi_dependent (rtx, rtx, enum attr_type);
1200 static struct machine_function * ix86_init_machine_status (void);
1201 static int ix86_split_to_parts (rtx, rtx *, enum machine_mode);
1202 static int ix86_nsaved_regs (void);
1203 static void ix86_emit_save_regs (void);
1204 static void ix86_emit_save_regs_using_mov (rtx, HOST_WIDE_INT);
1205 static void ix86_emit_restore_regs_using_mov (rtx, HOST_WIDE_INT, int);
1206 static void ix86_output_function_epilogue (FILE *, HOST_WIDE_INT);
1207 static HOST_WIDE_INT ix86_GOT_alias_set (void);
1208 static void ix86_adjust_counter (rtx, HOST_WIDE_INT);
1209 static rtx ix86_expand_aligntest (rtx, int);
1210 static void ix86_expand_strlensi_unroll_1 (rtx, rtx, rtx);
1211 static int ix86_issue_rate (void);
1212 static int ix86_adjust_cost (rtx, rtx, rtx, int);
1213 static int ia32_multipass_dfa_lookahead (void);
1214 static void ix86_init_mmx_sse_builtins (void);
1215 static rtx x86_this_parameter (tree);
1216 static void x86_output_mi_thunk (FILE *, tree, HOST_WIDE_INT,
1217 HOST_WIDE_INT, tree);
1218 static bool x86_can_output_mi_thunk (tree, HOST_WIDE_INT, HOST_WIDE_INT, tree);
1219 static void x86_file_start (void);
1220 static void ix86_reorg (void);
1221 static bool ix86_expand_carry_flag_compare (enum rtx_code, rtx, rtx, rtx*);
1222 static tree ix86_build_builtin_va_list (void);
1223 static void ix86_setup_incoming_varargs (CUMULATIVE_ARGS *, enum machine_mode,
1225 static tree ix86_gimplify_va_arg (tree, tree, tree *, tree *);
1226 static bool ix86_scalar_mode_supported_p (enum machine_mode);
1227 static bool ix86_vector_mode_supported_p (enum machine_mode);
1229 static int ix86_address_cost (rtx);
1230 static bool ix86_cannot_force_const_mem (rtx);
1231 static rtx ix86_delegitimize_address (rtx);
1233 static void i386_output_dwarf_dtprel (FILE *, int, rtx) ATTRIBUTE_UNUSED;
1235 struct builtin_description;
1236 static rtx ix86_expand_sse_comi (const struct builtin_description *,
1238 static rtx ix86_expand_sse_compare (const struct builtin_description *,
1240 static rtx ix86_expand_unop1_builtin (enum insn_code, tree, rtx);
1241 static rtx ix86_expand_unop_builtin (enum insn_code, tree, rtx, int);
1242 static rtx ix86_expand_binop_builtin (enum insn_code, tree, rtx);
1243 static rtx ix86_expand_store_builtin (enum insn_code, tree);
1244 static rtx safe_vector_operand (rtx, enum machine_mode);
1245 static rtx ix86_expand_fp_compare (enum rtx_code, rtx, rtx, rtx, rtx *, rtx *);
1246 static int ix86_fp_comparison_arithmetics_cost (enum rtx_code code);
1247 static int ix86_fp_comparison_fcomi_cost (enum rtx_code code);
1248 static int ix86_fp_comparison_sahf_cost (enum rtx_code code);
1249 static int ix86_fp_comparison_cost (enum rtx_code code);
1250 static unsigned int ix86_select_alt_pic_regnum (void);
1251 static int ix86_save_reg (unsigned int, int);
1252 static void ix86_compute_frame_layout (struct ix86_frame *);
1253 static int ix86_comp_type_attributes (tree, tree);
1254 static int ix86_function_regparm (tree, tree);
1255 const struct attribute_spec ix86_attribute_table[];
1256 static bool ix86_function_ok_for_sibcall (tree, tree);
1257 static tree ix86_handle_cconv_attribute (tree *, tree, tree, int, bool *);
1258 static int ix86_value_regno (enum machine_mode, tree, tree);
1259 static bool contains_128bit_aligned_vector_p (tree);
1260 static rtx ix86_struct_value_rtx (tree, int);
1261 static bool ix86_ms_bitfield_layout_p (tree);
1262 static tree ix86_handle_struct_attribute (tree *, tree, tree, int, bool *);
1263 static int extended_reg_mentioned_1 (rtx *, void *);
1264 static bool ix86_rtx_costs (rtx, int, int, int *);
1265 static int min_insn_size (rtx);
1266 static tree ix86_md_asm_clobbers (tree outputs, tree inputs, tree clobbers);
1267 static bool ix86_must_pass_in_stack (enum machine_mode mode, tree type);
1268 static bool ix86_pass_by_reference (CUMULATIVE_ARGS *, enum machine_mode,
1270 static void ix86_init_builtins (void);
1271 static rtx ix86_expand_builtin (tree, rtx, rtx, enum machine_mode, int);
1272 static const char *ix86_mangle_fundamental_type (tree);
1273 static tree ix86_stack_protect_fail (void);
1274 static rtx ix86_internal_arg_pointer (void);
1275 static void ix86_dwarf_handle_frame_unspec (const char *, rtx, int);
1277 /* This function is only used on Solaris. */
1278 static void i386_solaris_elf_named_section (const char *, unsigned int, tree)
1281 /* Register class used for passing given 64bit part of the argument.
1282 These represent classes as documented by the PS ABI, with the exception
1283 of SSESF, SSEDF classes, that are basically SSE class, just gcc will
1284 use SF or DFmode move instead of DImode to avoid reformatting penalties.
1286 Similarly we play games with INTEGERSI_CLASS to use cheaper SImode moves
1287 whenever possible (upper half does contain padding).
1289 enum x86_64_reg_class
1292 X86_64_INTEGER_CLASS,
1293 X86_64_INTEGERSI_CLASS,
1300 X86_64_COMPLEX_X87_CLASS,
1303 static const char * const x86_64_reg_class_name[] = {
1304 "no", "integer", "integerSI", "sse", "sseSF", "sseDF",
1305 "sseup", "x87", "x87up", "cplx87", "no"
1308 #define MAX_CLASSES 4
1310 /* Table of constants used by fldpi, fldln2, etc.... */
1311 static REAL_VALUE_TYPE ext_80387_constants_table [5];
1312 static bool ext_80387_constants_init = 0;
1313 static void init_ext_80387_constants (void);
1314 static bool ix86_in_large_data_p (tree) ATTRIBUTE_UNUSED;
1315 static void ix86_encode_section_info (tree, rtx, int) ATTRIBUTE_UNUSED;
1316 static void x86_64_elf_unique_section (tree decl, int reloc) ATTRIBUTE_UNUSED;
1317 static section *x86_64_elf_select_section (tree decl, int reloc,
1318 unsigned HOST_WIDE_INT align)
1321 /* Initialize the GCC target structure. */
1322 #undef TARGET_ATTRIBUTE_TABLE
1323 #define TARGET_ATTRIBUTE_TABLE ix86_attribute_table
1324 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
1325 # undef TARGET_MERGE_DECL_ATTRIBUTES
1326 # define TARGET_MERGE_DECL_ATTRIBUTES merge_dllimport_decl_attributes
1329 #undef TARGET_COMP_TYPE_ATTRIBUTES
1330 #define TARGET_COMP_TYPE_ATTRIBUTES ix86_comp_type_attributes
1332 #undef TARGET_INIT_BUILTINS
1333 #define TARGET_INIT_BUILTINS ix86_init_builtins
1334 #undef TARGET_EXPAND_BUILTIN
1335 #define TARGET_EXPAND_BUILTIN ix86_expand_builtin
1337 #undef TARGET_ASM_FUNCTION_EPILOGUE
1338 #define TARGET_ASM_FUNCTION_EPILOGUE ix86_output_function_epilogue
1340 #undef TARGET_ENCODE_SECTION_INFO
1341 #ifndef SUBTARGET_ENCODE_SECTION_INFO
1342 #define TARGET_ENCODE_SECTION_INFO ix86_encode_section_info
1344 #define TARGET_ENCODE_SECTION_INFO SUBTARGET_ENCODE_SECTION_INFO
1347 #undef TARGET_ASM_OPEN_PAREN
1348 #define TARGET_ASM_OPEN_PAREN ""
1349 #undef TARGET_ASM_CLOSE_PAREN
1350 #define TARGET_ASM_CLOSE_PAREN ""
1352 #undef TARGET_ASM_ALIGNED_HI_OP
1353 #define TARGET_ASM_ALIGNED_HI_OP ASM_SHORT
1354 #undef TARGET_ASM_ALIGNED_SI_OP
1355 #define TARGET_ASM_ALIGNED_SI_OP ASM_LONG
1357 #undef TARGET_ASM_ALIGNED_DI_OP
1358 #define TARGET_ASM_ALIGNED_DI_OP ASM_QUAD
1361 #undef TARGET_ASM_UNALIGNED_HI_OP
1362 #define TARGET_ASM_UNALIGNED_HI_OP TARGET_ASM_ALIGNED_HI_OP
1363 #undef TARGET_ASM_UNALIGNED_SI_OP
1364 #define TARGET_ASM_UNALIGNED_SI_OP TARGET_ASM_ALIGNED_SI_OP
1365 #undef TARGET_ASM_UNALIGNED_DI_OP
1366 #define TARGET_ASM_UNALIGNED_DI_OP TARGET_ASM_ALIGNED_DI_OP
1368 #undef TARGET_SCHED_ADJUST_COST
1369 #define TARGET_SCHED_ADJUST_COST ix86_adjust_cost
1370 #undef TARGET_SCHED_ISSUE_RATE
1371 #define TARGET_SCHED_ISSUE_RATE ix86_issue_rate
1372 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD
1373 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD \
1374 ia32_multipass_dfa_lookahead
1376 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
1377 #define TARGET_FUNCTION_OK_FOR_SIBCALL ix86_function_ok_for_sibcall
1380 #undef TARGET_HAVE_TLS
1381 #define TARGET_HAVE_TLS true
1383 #undef TARGET_CANNOT_FORCE_CONST_MEM
1384 #define TARGET_CANNOT_FORCE_CONST_MEM ix86_cannot_force_const_mem
1385 #undef TARGET_USE_BLOCKS_FOR_CONSTANT_P
1386 #define TARGET_USE_BLOCKS_FOR_CONSTANT_P hook_bool_mode_rtx_true
1388 #undef TARGET_DELEGITIMIZE_ADDRESS
1389 #define TARGET_DELEGITIMIZE_ADDRESS ix86_delegitimize_address
1391 #undef TARGET_MS_BITFIELD_LAYOUT_P
1392 #define TARGET_MS_BITFIELD_LAYOUT_P ix86_ms_bitfield_layout_p
1395 #undef TARGET_BINDS_LOCAL_P
1396 #define TARGET_BINDS_LOCAL_P darwin_binds_local_p
1399 #undef TARGET_ASM_OUTPUT_MI_THUNK
1400 #define TARGET_ASM_OUTPUT_MI_THUNK x86_output_mi_thunk
1401 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
1402 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK x86_can_output_mi_thunk
1404 #undef TARGET_ASM_FILE_START
1405 #define TARGET_ASM_FILE_START x86_file_start
1407 #undef TARGET_DEFAULT_TARGET_FLAGS
1408 #define TARGET_DEFAULT_TARGET_FLAGS \
1410 | TARGET_64BIT_DEFAULT \
1411 | TARGET_SUBTARGET_DEFAULT \
1412 | TARGET_TLS_DIRECT_SEG_REFS_DEFAULT)
1414 #undef TARGET_HANDLE_OPTION
1415 #define TARGET_HANDLE_OPTION ix86_handle_option
1417 #undef TARGET_RTX_COSTS
1418 #define TARGET_RTX_COSTS ix86_rtx_costs
1419 #undef TARGET_ADDRESS_COST
1420 #define TARGET_ADDRESS_COST ix86_address_cost
1422 #undef TARGET_FIXED_CONDITION_CODE_REGS
1423 #define TARGET_FIXED_CONDITION_CODE_REGS ix86_fixed_condition_code_regs
1424 #undef TARGET_CC_MODES_COMPATIBLE
1425 #define TARGET_CC_MODES_COMPATIBLE ix86_cc_modes_compatible
1427 #undef TARGET_MACHINE_DEPENDENT_REORG
1428 #define TARGET_MACHINE_DEPENDENT_REORG ix86_reorg
1430 #undef TARGET_BUILD_BUILTIN_VA_LIST
1431 #define TARGET_BUILD_BUILTIN_VA_LIST ix86_build_builtin_va_list
1433 #undef TARGET_MD_ASM_CLOBBERS
1434 #define TARGET_MD_ASM_CLOBBERS ix86_md_asm_clobbers
1436 #undef TARGET_PROMOTE_PROTOTYPES
1437 #define TARGET_PROMOTE_PROTOTYPES hook_bool_tree_true
1438 #undef TARGET_STRUCT_VALUE_RTX
1439 #define TARGET_STRUCT_VALUE_RTX ix86_struct_value_rtx
1440 #undef TARGET_SETUP_INCOMING_VARARGS
1441 #define TARGET_SETUP_INCOMING_VARARGS ix86_setup_incoming_varargs
1442 #undef TARGET_MUST_PASS_IN_STACK
1443 #define TARGET_MUST_PASS_IN_STACK ix86_must_pass_in_stack
1444 #undef TARGET_PASS_BY_REFERENCE
1445 #define TARGET_PASS_BY_REFERENCE ix86_pass_by_reference
1446 #undef TARGET_INTERNAL_ARG_POINTER
1447 #define TARGET_INTERNAL_ARG_POINTER ix86_internal_arg_pointer
1448 #undef TARGET_DWARF_HANDLE_FRAME_UNSPEC
1449 #define TARGET_DWARF_HANDLE_FRAME_UNSPEC ix86_dwarf_handle_frame_unspec
1451 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
1452 #define TARGET_GIMPLIFY_VA_ARG_EXPR ix86_gimplify_va_arg
1454 #undef TARGET_SCALAR_MODE_SUPPORTED_P
1455 #define TARGET_SCALAR_MODE_SUPPORTED_P ix86_scalar_mode_supported_p
1457 #undef TARGET_VECTOR_MODE_SUPPORTED_P
1458 #define TARGET_VECTOR_MODE_SUPPORTED_P ix86_vector_mode_supported_p
1461 #undef TARGET_ASM_OUTPUT_DWARF_DTPREL
1462 #define TARGET_ASM_OUTPUT_DWARF_DTPREL i386_output_dwarf_dtprel
1465 #ifdef SUBTARGET_INSERT_ATTRIBUTES
1466 #undef TARGET_INSERT_ATTRIBUTES
1467 #define TARGET_INSERT_ATTRIBUTES SUBTARGET_INSERT_ATTRIBUTES
1470 #undef TARGET_MANGLE_FUNDAMENTAL_TYPE
1471 #define TARGET_MANGLE_FUNDAMENTAL_TYPE ix86_mangle_fundamental_type
1473 #undef TARGET_STACK_PROTECT_FAIL
1474 #define TARGET_STACK_PROTECT_FAIL ix86_stack_protect_fail
1476 #undef TARGET_FUNCTION_VALUE
1477 #define TARGET_FUNCTION_VALUE ix86_function_value
1479 struct gcc_target targetm = TARGET_INITIALIZER;
1482 /* The svr4 ABI for the i386 says that records and unions are returned
1484 #ifndef DEFAULT_PCC_STRUCT_RETURN
1485 #define DEFAULT_PCC_STRUCT_RETURN 1
1488 /* Implement TARGET_HANDLE_OPTION. */
1491 ix86_handle_option (size_t code, const char *arg ATTRIBUTE_UNUSED, int value)
1498 target_flags &= ~MASK_3DNOW_A;
1499 target_flags_explicit |= MASK_3DNOW_A;
1506 target_flags &= ~(MASK_3DNOW | MASK_3DNOW_A);
1507 target_flags_explicit |= MASK_3DNOW | MASK_3DNOW_A;
1514 target_flags &= ~(MASK_SSE2 | MASK_SSE3 | MASK_SSSE3);
1515 target_flags_explicit |= MASK_SSE2 | MASK_SSE3 | MASK_SSSE3;
1522 target_flags &= ~(MASK_SSE3 | MASK_SSSE3);
1523 target_flags_explicit |= MASK_SSE3 | MASK_SSSE3;
1530 target_flags &= ~MASK_SSSE3;
1531 target_flags_explicit |= MASK_SSSE3;
1540 /* Sometimes certain combinations of command options do not make
1541 sense on a particular target machine. You can define a macro
1542 `OVERRIDE_OPTIONS' to take account of this. This macro, if
1543 defined, is executed once just after all the command options have
1546 Don't use this macro to turn on various extra optimizations for
1547 `-O'. That is what `OPTIMIZATION_OPTIONS' is for. */
1550 override_options (void)
1553 int ix86_tune_defaulted = 0;
1555 /* Comes from final.c -- no real reason to change it. */
1556 #define MAX_CODE_ALIGN 16
1560 const struct processor_costs *cost; /* Processor costs */
1561 const int target_enable; /* Target flags to enable. */
1562 const int target_disable; /* Target flags to disable. */
1563 const int align_loop; /* Default alignments. */
1564 const int align_loop_max_skip;
1565 const int align_jump;
1566 const int align_jump_max_skip;
1567 const int align_func;
1569 const processor_target_table[PROCESSOR_max] =
1571 {&i386_cost, 0, 0, 4, 3, 4, 3, 4},
1572 {&i486_cost, 0, 0, 16, 15, 16, 15, 16},
1573 {&pentium_cost, 0, 0, 16, 7, 16, 7, 16},
1574 {&pentiumpro_cost, 0, 0, 16, 15, 16, 7, 16},
1575 {&geode_cost, 0, 0, 0, 0, 0, 0, 0},
1576 {&k6_cost, 0, 0, 32, 7, 32, 7, 32},
1577 {&athlon_cost, 0, 0, 16, 7, 16, 7, 16},
1578 {&pentium4_cost, 0, 0, 0, 0, 0, 0, 0},
1579 {&k8_cost, 0, 0, 16, 7, 16, 7, 16},
1580 {&nocona_cost, 0, 0, 0, 0, 0, 0, 0},
1581 {&core2_cost, 0, 0, 16, 7, 16, 7, 16},
1582 {&generic32_cost, 0, 0, 16, 7, 16, 7, 16},
1583 {&generic64_cost, 0, 0, 16, 7, 16, 7, 16}
1586 static const char * const cpu_names[] = TARGET_CPU_DEFAULT_NAMES;
1589 const char *const name; /* processor name or nickname. */
1590 const enum processor_type processor;
1591 const enum pta_flags
1597 PTA_PREFETCH_SSE = 16,
1604 const processor_alias_table[] =
1606 {"i386", PROCESSOR_I386, 0},
1607 {"i486", PROCESSOR_I486, 0},
1608 {"i586", PROCESSOR_PENTIUM, 0},
1609 {"pentium", PROCESSOR_PENTIUM, 0},
1610 {"pentium-mmx", PROCESSOR_PENTIUM, PTA_MMX},
1611 {"winchip-c6", PROCESSOR_I486, PTA_MMX},
1612 {"winchip2", PROCESSOR_I486, PTA_MMX | PTA_3DNOW},
1613 {"c3", PROCESSOR_I486, PTA_MMX | PTA_3DNOW},
1614 {"c3-2", PROCESSOR_PENTIUMPRO, PTA_MMX | PTA_PREFETCH_SSE | PTA_SSE},
1615 {"i686", PROCESSOR_PENTIUMPRO, 0},
1616 {"pentiumpro", PROCESSOR_PENTIUMPRO, 0},
1617 {"pentium2", PROCESSOR_PENTIUMPRO, PTA_MMX},
1618 {"pentium3", PROCESSOR_PENTIUMPRO, PTA_MMX | PTA_SSE | PTA_PREFETCH_SSE},
1619 {"pentium3m", PROCESSOR_PENTIUMPRO, PTA_MMX | PTA_SSE | PTA_PREFETCH_SSE},
1620 {"pentium-m", PROCESSOR_PENTIUMPRO, PTA_MMX | PTA_SSE | PTA_PREFETCH_SSE | PTA_SSE2},
1621 {"pentium4", PROCESSOR_PENTIUM4, PTA_SSE | PTA_SSE2
1622 | PTA_MMX | PTA_PREFETCH_SSE},
1623 {"pentium4m", PROCESSOR_PENTIUM4, PTA_SSE | PTA_SSE2
1624 | PTA_MMX | PTA_PREFETCH_SSE},
1625 {"prescott", PROCESSOR_NOCONA, PTA_SSE | PTA_SSE2 | PTA_SSE3
1626 | PTA_MMX | PTA_PREFETCH_SSE},
1627 {"nocona", PROCESSOR_NOCONA, PTA_SSE | PTA_SSE2 | PTA_SSE3 | PTA_64BIT
1628 | PTA_MMX | PTA_PREFETCH_SSE},
1629 {"core2", PROCESSOR_CORE2, PTA_SSE | PTA_SSE2 | PTA_SSE3 | PTA_SSSE3
1630 | PTA_64BIT | PTA_MMX
1631 | PTA_PREFETCH_SSE},
1632 {"geode", PROCESSOR_GEODE, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW
1634 {"k6", PROCESSOR_K6, PTA_MMX},
1635 {"k6-2", PROCESSOR_K6, PTA_MMX | PTA_3DNOW},
1636 {"k6-3", PROCESSOR_K6, PTA_MMX | PTA_3DNOW},
1637 {"athlon", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW
1639 {"athlon-tbird", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE
1640 | PTA_3DNOW | PTA_3DNOW_A},
1641 {"athlon-4", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW
1642 | PTA_3DNOW_A | PTA_SSE},
1643 {"athlon-xp", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW
1644 | PTA_3DNOW_A | PTA_SSE},
1645 {"athlon-mp", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW
1646 | PTA_3DNOW_A | PTA_SSE},
1647 {"x86-64", PROCESSOR_K8, PTA_MMX | PTA_PREFETCH_SSE | PTA_64BIT
1648 | PTA_SSE | PTA_SSE2 },
1649 {"k8", PROCESSOR_K8, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW | PTA_64BIT
1650 | PTA_3DNOW_A | PTA_SSE | PTA_SSE2},
1651 {"k8-sse3", PROCESSOR_K8, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW | PTA_64BIT
1652 | PTA_3DNOW_A | PTA_SSE | PTA_SSE2
1654 {"opteron", PROCESSOR_K8, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW | PTA_64BIT
1655 | PTA_3DNOW_A | PTA_SSE | PTA_SSE2},
1656 {"opteron-sse3", PROCESSOR_K8, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW | PTA_64BIT
1657 | PTA_3DNOW_A | PTA_SSE | PTA_SSE2
1659 {"athlon64", PROCESSOR_K8, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW | PTA_64BIT
1660 | PTA_3DNOW_A | PTA_SSE | PTA_SSE2},
1661 {"athlon64-sse3", PROCESSOR_K8, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW | PTA_64BIT
1662 | PTA_3DNOW_A | PTA_SSE | PTA_SSE2
1664 {"athlon-fx", PROCESSOR_K8, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW | PTA_64BIT
1665 | PTA_3DNOW_A | PTA_SSE | PTA_SSE2},
1666 {"generic32", PROCESSOR_GENERIC32, 0 /* flags are only used for -march switch. */ },
1667 {"generic64", PROCESSOR_GENERIC64, PTA_64BIT /* flags are only used for -march switch. */ },
1670 int const pta_size = ARRAY_SIZE (processor_alias_table);
1672 #ifdef SUBTARGET_OVERRIDE_OPTIONS
1673 SUBTARGET_OVERRIDE_OPTIONS;
1676 #ifdef SUBSUBTARGET_OVERRIDE_OPTIONS
1677 SUBSUBTARGET_OVERRIDE_OPTIONS;
1680 /* -fPIC is the default for x86_64. */
1681 if (TARGET_MACHO && TARGET_64BIT)
1684 /* Set the default values for switches whose default depends on TARGET_64BIT
1685 in case they weren't overwritten by command line options. */
1688 /* Mach-O doesn't support omitting the frame pointer for now. */
1689 if (flag_omit_frame_pointer == 2)
1690 flag_omit_frame_pointer = (TARGET_MACHO ? 0 : 1);
1691 if (flag_asynchronous_unwind_tables == 2)
1692 flag_asynchronous_unwind_tables = 1;
1693 if (flag_pcc_struct_return == 2)
1694 flag_pcc_struct_return = 0;
1698 if (flag_omit_frame_pointer == 2)
1699 flag_omit_frame_pointer = 0;
1700 if (flag_asynchronous_unwind_tables == 2)
1701 flag_asynchronous_unwind_tables = 0;
1702 if (flag_pcc_struct_return == 2)
1703 flag_pcc_struct_return = DEFAULT_PCC_STRUCT_RETURN;
1706 /* Need to check -mtune=generic first. */
1707 if (ix86_tune_string)
1709 if (!strcmp (ix86_tune_string, "generic")
1710 || !strcmp (ix86_tune_string, "i686")
1711 /* As special support for cross compilers we read -mtune=native
1712 as -mtune=generic. With native compilers we won't see the
1713 -mtune=native, as it was changed by the driver. */
1714 || !strcmp (ix86_tune_string, "native"))
1717 ix86_tune_string = "generic64";
1719 ix86_tune_string = "generic32";
1721 else if (!strncmp (ix86_tune_string, "generic", 7))
1722 error ("bad value (%s) for -mtune= switch", ix86_tune_string);
1726 if (ix86_arch_string)
1727 ix86_tune_string = ix86_arch_string;
1728 if (!ix86_tune_string)
1730 ix86_tune_string = cpu_names [TARGET_CPU_DEFAULT];
1731 ix86_tune_defaulted = 1;
1734 /* ix86_tune_string is set to ix86_arch_string or defaulted. We
1735 need to use a sensible tune option. */
1736 if (!strcmp (ix86_tune_string, "generic")
1737 || !strcmp (ix86_tune_string, "x86-64")
1738 || !strcmp (ix86_tune_string, "i686"))
1741 ix86_tune_string = "generic64";
1743 ix86_tune_string = "generic32";
1746 if (!strcmp (ix86_tune_string, "x86-64"))
1747 warning (OPT_Wdeprecated, "-mtune=x86-64 is deprecated. Use -mtune=k8 or "
1748 "-mtune=generic instead as appropriate.");
1750 if (!ix86_arch_string)
1751 ix86_arch_string = TARGET_64BIT ? "x86-64" : "i486";
1752 if (!strcmp (ix86_arch_string, "generic"))
1753 error ("generic CPU can be used only for -mtune= switch");
1754 if (!strncmp (ix86_arch_string, "generic", 7))
1755 error ("bad value (%s) for -march= switch", ix86_arch_string);
1757 if (ix86_cmodel_string != 0)
1759 if (!strcmp (ix86_cmodel_string, "small"))
1760 ix86_cmodel = flag_pic ? CM_SMALL_PIC : CM_SMALL;
1761 else if (!strcmp (ix86_cmodel_string, "medium"))
1762 ix86_cmodel = flag_pic ? CM_MEDIUM_PIC : CM_MEDIUM;
1764 sorry ("code model %s not supported in PIC mode", ix86_cmodel_string);
1765 else if (!strcmp (ix86_cmodel_string, "32"))
1766 ix86_cmodel = CM_32;
1767 else if (!strcmp (ix86_cmodel_string, "kernel") && !flag_pic)
1768 ix86_cmodel = CM_KERNEL;
1769 else if (!strcmp (ix86_cmodel_string, "large") && !flag_pic)
1770 ix86_cmodel = CM_LARGE;
1772 error ("bad value (%s) for -mcmodel= switch", ix86_cmodel_string);
1776 ix86_cmodel = CM_32;
1778 ix86_cmodel = flag_pic ? CM_SMALL_PIC : CM_SMALL;
1780 if (ix86_asm_string != 0)
1783 && !strcmp (ix86_asm_string, "intel"))
1784 ix86_asm_dialect = ASM_INTEL;
1785 else if (!strcmp (ix86_asm_string, "att"))
1786 ix86_asm_dialect = ASM_ATT;
1788 error ("bad value (%s) for -masm= switch", ix86_asm_string);
1790 if ((TARGET_64BIT == 0) != (ix86_cmodel == CM_32))
1791 error ("code model %qs not supported in the %s bit mode",
1792 ix86_cmodel_string, TARGET_64BIT ? "64" : "32");
1793 if (ix86_cmodel == CM_LARGE)
1794 sorry ("code model %<large%> not supported yet");
1795 if ((TARGET_64BIT != 0) != ((target_flags & MASK_64BIT) != 0))
1796 sorry ("%i-bit mode not compiled in",
1797 (target_flags & MASK_64BIT) ? 64 : 32);
1799 for (i = 0; i < pta_size; i++)
1800 if (! strcmp (ix86_arch_string, processor_alias_table[i].name))
1802 ix86_arch = processor_alias_table[i].processor;
1803 /* Default cpu tuning to the architecture. */
1804 ix86_tune = ix86_arch;
1805 if (processor_alias_table[i].flags & PTA_MMX
1806 && !(target_flags_explicit & MASK_MMX))
1807 target_flags |= MASK_MMX;
1808 if (processor_alias_table[i].flags & PTA_3DNOW
1809 && !(target_flags_explicit & MASK_3DNOW))
1810 target_flags |= MASK_3DNOW;
1811 if (processor_alias_table[i].flags & PTA_3DNOW_A
1812 && !(target_flags_explicit & MASK_3DNOW_A))
1813 target_flags |= MASK_3DNOW_A;
1814 if (processor_alias_table[i].flags & PTA_SSE
1815 && !(target_flags_explicit & MASK_SSE))
1816 target_flags |= MASK_SSE;
1817 if (processor_alias_table[i].flags & PTA_SSE2
1818 && !(target_flags_explicit & MASK_SSE2))
1819 target_flags |= MASK_SSE2;
1820 if (processor_alias_table[i].flags & PTA_SSE3
1821 && !(target_flags_explicit & MASK_SSE3))
1822 target_flags |= MASK_SSE3;
1823 if (processor_alias_table[i].flags & PTA_SSSE3
1824 && !(target_flags_explicit & MASK_SSSE3))
1825 target_flags |= MASK_SSSE3;
1826 if (processor_alias_table[i].flags & PTA_PREFETCH_SSE)
1827 x86_prefetch_sse = true;
1828 if (TARGET_64BIT && !(processor_alias_table[i].flags & PTA_64BIT))
1829 error ("CPU you selected does not support x86-64 "
1835 error ("bad value (%s) for -march= switch", ix86_arch_string);
1837 for (i = 0; i < pta_size; i++)
1838 if (! strcmp (ix86_tune_string, processor_alias_table[i].name))
1840 ix86_tune = processor_alias_table[i].processor;
1841 if (TARGET_64BIT && !(processor_alias_table[i].flags & PTA_64BIT))
1843 if (ix86_tune_defaulted)
1845 ix86_tune_string = "x86-64";
1846 for (i = 0; i < pta_size; i++)
1847 if (! strcmp (ix86_tune_string,
1848 processor_alias_table[i].name))
1850 ix86_tune = processor_alias_table[i].processor;
1853 error ("CPU you selected does not support x86-64 "
1856 /* Intel CPUs have always interpreted SSE prefetch instructions as
1857 NOPs; so, we can enable SSE prefetch instructions even when
1858 -mtune (rather than -march) points us to a processor that has them.
1859 However, the VIA C3 gives a SIGILL, so we only do that for i686 and
1860 higher processors. */
1861 if (TARGET_CMOVE && (processor_alias_table[i].flags & PTA_PREFETCH_SSE))
1862 x86_prefetch_sse = true;
1866 error ("bad value (%s) for -mtune= switch", ix86_tune_string);
1869 ix86_cost = &size_cost;
1871 ix86_cost = processor_target_table[ix86_tune].cost;
1872 target_flags |= processor_target_table[ix86_tune].target_enable;
1873 target_flags &= ~processor_target_table[ix86_tune].target_disable;
1875 /* Arrange to set up i386_stack_locals for all functions. */
1876 init_machine_status = ix86_init_machine_status;
1878 /* Validate -mregparm= value. */
1879 if (ix86_regparm_string)
1881 i = atoi (ix86_regparm_string);
1882 if (i < 0 || i > REGPARM_MAX)
1883 error ("-mregparm=%d is not between 0 and %d", i, REGPARM_MAX);
1889 ix86_regparm = REGPARM_MAX;
1891 /* If the user has provided any of the -malign-* options,
1892 warn and use that value only if -falign-* is not set.
1893 Remove this code in GCC 3.2 or later. */
1894 if (ix86_align_loops_string)
1896 warning (0, "-malign-loops is obsolete, use -falign-loops");
1897 if (align_loops == 0)
1899 i = atoi (ix86_align_loops_string);
1900 if (i < 0 || i > MAX_CODE_ALIGN)
1901 error ("-malign-loops=%d is not between 0 and %d", i, MAX_CODE_ALIGN);
1903 align_loops = 1 << i;
1907 if (ix86_align_jumps_string)
1909 warning (0, "-malign-jumps is obsolete, use -falign-jumps");
1910 if (align_jumps == 0)
1912 i = atoi (ix86_align_jumps_string);
1913 if (i < 0 || i > MAX_CODE_ALIGN)
1914 error ("-malign-loops=%d is not between 0 and %d", i, MAX_CODE_ALIGN);
1916 align_jumps = 1 << i;
1920 if (ix86_align_funcs_string)
1922 warning (0, "-malign-functions is obsolete, use -falign-functions");
1923 if (align_functions == 0)
1925 i = atoi (ix86_align_funcs_string);
1926 if (i < 0 || i > MAX_CODE_ALIGN)
1927 error ("-malign-loops=%d is not between 0 and %d", i, MAX_CODE_ALIGN);
1929 align_functions = 1 << i;
1933 /* Default align_* from the processor table. */
1934 if (align_loops == 0)
1936 align_loops = processor_target_table[ix86_tune].align_loop;
1937 align_loops_max_skip = processor_target_table[ix86_tune].align_loop_max_skip;
1939 if (align_jumps == 0)
1941 align_jumps = processor_target_table[ix86_tune].align_jump;
1942 align_jumps_max_skip = processor_target_table[ix86_tune].align_jump_max_skip;
1944 if (align_functions == 0)
1946 align_functions = processor_target_table[ix86_tune].align_func;
1949 /* Validate -mbranch-cost= value, or provide default. */
1950 ix86_branch_cost = ix86_cost->branch_cost;
1951 if (ix86_branch_cost_string)
1953 i = atoi (ix86_branch_cost_string);
1955 error ("-mbranch-cost=%d is not between 0 and 5", i);
1957 ix86_branch_cost = i;
1959 if (ix86_section_threshold_string)
1961 i = atoi (ix86_section_threshold_string);
1963 error ("-mlarge-data-threshold=%d is negative", i);
1965 ix86_section_threshold = i;
1968 if (ix86_tls_dialect_string)
1970 if (strcmp (ix86_tls_dialect_string, "gnu") == 0)
1971 ix86_tls_dialect = TLS_DIALECT_GNU;
1972 else if (strcmp (ix86_tls_dialect_string, "gnu2") == 0)
1973 ix86_tls_dialect = TLS_DIALECT_GNU2;
1974 else if (strcmp (ix86_tls_dialect_string, "sun") == 0)
1975 ix86_tls_dialect = TLS_DIALECT_SUN;
1977 error ("bad value (%s) for -mtls-dialect= switch",
1978 ix86_tls_dialect_string);
1981 /* Keep nonleaf frame pointers. */
1982 if (flag_omit_frame_pointer)
1983 target_flags &= ~MASK_OMIT_LEAF_FRAME_POINTER;
1984 else if (TARGET_OMIT_LEAF_FRAME_POINTER)
1985 flag_omit_frame_pointer = 1;
1987 /* If we're doing fast math, we don't care about comparison order
1988 wrt NaNs. This lets us use a shorter comparison sequence. */
1989 if (flag_finite_math_only)
1990 target_flags &= ~MASK_IEEE_FP;
1992 /* If the architecture always has an FPU, turn off NO_FANCY_MATH_387,
1993 since the insns won't need emulation. */
1994 if (x86_arch_always_fancy_math_387 & (1 << ix86_arch))
1995 target_flags &= ~MASK_NO_FANCY_MATH_387;
1997 /* Likewise, if the target doesn't have a 387, or we've specified
1998 software floating point, don't use 387 inline intrinsics. */
2000 target_flags |= MASK_NO_FANCY_MATH_387;
2002 /* Turn on SSE3 builtins for -mssse3. */
2004 target_flags |= MASK_SSE3;
2006 /* Turn on SSE2 builtins for -msse3. */
2008 target_flags |= MASK_SSE2;
2010 /* Turn on SSE builtins for -msse2. */
2012 target_flags |= MASK_SSE;
2014 /* Turn on MMX builtins for -msse. */
2017 target_flags |= MASK_MMX & ~target_flags_explicit;
2018 x86_prefetch_sse = true;
2021 /* Turn on MMX builtins for 3Dnow. */
2023 target_flags |= MASK_MMX;
2027 if (TARGET_ALIGN_DOUBLE)
2028 error ("-malign-double makes no sense in the 64bit mode");
2030 error ("-mrtd calling convention not supported in the 64bit mode");
2032 /* Enable by default the SSE and MMX builtins. Do allow the user to
2033 explicitly disable any of these. In particular, disabling SSE and
2034 MMX for kernel code is extremely useful. */
2036 |= ((MASK_SSE2 | MASK_SSE | MASK_MMX | MASK_128BIT_LONG_DOUBLE)
2037 & ~target_flags_explicit);
2041 /* i386 ABI does not specify red zone. It still makes sense to use it
2042 when programmer takes care to stack from being destroyed. */
2043 if (!(target_flags_explicit & MASK_NO_RED_ZONE))
2044 target_flags |= MASK_NO_RED_ZONE;
2047 /* Validate -mpreferred-stack-boundary= value, or provide default.
2048 The default of 128 bits is for Pentium III's SSE __m128. We can't
2049 change it because of optimize_size. Otherwise, we can't mix object
2050 files compiled with -Os and -On. */
2051 ix86_preferred_stack_boundary = 128;
2052 if (ix86_preferred_stack_boundary_string)
2054 i = atoi (ix86_preferred_stack_boundary_string);
2055 if (i < (TARGET_64BIT ? 4 : 2) || i > 12)
2056 error ("-mpreferred-stack-boundary=%d is not between %d and 12", i,
2057 TARGET_64BIT ? 4 : 2);
2059 ix86_preferred_stack_boundary = (1 << i) * BITS_PER_UNIT;
2062 /* Accept -msseregparm only if at least SSE support is enabled. */
2063 if (TARGET_SSEREGPARM
2065 error ("-msseregparm used without SSE enabled");
2067 ix86_fpmath = TARGET_FPMATH_DEFAULT;
2069 if (ix86_fpmath_string != 0)
2071 if (! strcmp (ix86_fpmath_string, "387"))
2072 ix86_fpmath = FPMATH_387;
2073 else if (! strcmp (ix86_fpmath_string, "sse"))
2077 warning (0, "SSE instruction set disabled, using 387 arithmetics");
2078 ix86_fpmath = FPMATH_387;
2081 ix86_fpmath = FPMATH_SSE;
2083 else if (! strcmp (ix86_fpmath_string, "387,sse")
2084 || ! strcmp (ix86_fpmath_string, "sse,387"))
2088 warning (0, "SSE instruction set disabled, using 387 arithmetics");
2089 ix86_fpmath = FPMATH_387;
2091 else if (!TARGET_80387)
2093 warning (0, "387 instruction set disabled, using SSE arithmetics");
2094 ix86_fpmath = FPMATH_SSE;
2097 ix86_fpmath = FPMATH_SSE | FPMATH_387;
2100 error ("bad value (%s) for -mfpmath= switch", ix86_fpmath_string);
2103 /* If the i387 is disabled, then do not return values in it. */
2105 target_flags &= ~MASK_FLOAT_RETURNS;
2107 if ((x86_accumulate_outgoing_args & TUNEMASK)
2108 && !(target_flags_explicit & MASK_ACCUMULATE_OUTGOING_ARGS)
2110 target_flags |= MASK_ACCUMULATE_OUTGOING_ARGS;
2112 /* ??? Unwind info is not correct around the CFG unless either a frame
2113 pointer is present or M_A_O_A is set. Fixing this requires rewriting
2114 unwind info generation to be aware of the CFG and propagating states
2116 if ((flag_unwind_tables || flag_asynchronous_unwind_tables
2117 || flag_exceptions || flag_non_call_exceptions)
2118 && flag_omit_frame_pointer
2119 && !(target_flags & MASK_ACCUMULATE_OUTGOING_ARGS))
2121 if (target_flags_explicit & MASK_ACCUMULATE_OUTGOING_ARGS)
2122 warning (0, "unwind tables currently require either a frame pointer "
2123 "or -maccumulate-outgoing-args for correctness");
2124 target_flags |= MASK_ACCUMULATE_OUTGOING_ARGS;
2127 /* Figure out what ASM_GENERATE_INTERNAL_LABEL builds as a prefix. */
2130 ASM_GENERATE_INTERNAL_LABEL (internal_label_prefix, "LX", 0);
2131 p = strchr (internal_label_prefix, 'X');
2132 internal_label_prefix_len = p - internal_label_prefix;
2136 /* When scheduling description is not available, disable scheduler pass
2137 so it won't slow down the compilation and make x87 code slower. */
2138 if (!TARGET_SCHEDULE)
2139 flag_schedule_insns_after_reload = flag_schedule_insns = 0;
2142 /* switch to the appropriate section for output of DECL.
2143 DECL is either a `VAR_DECL' node or a constant of some sort.
2144 RELOC indicates whether forming the initial value of DECL requires
2145 link-time relocations. */
2148 x86_64_elf_select_section (tree decl, int reloc,
2149 unsigned HOST_WIDE_INT align)
2151 if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC)
2152 && ix86_in_large_data_p (decl))
2154 const char *sname = NULL;
2155 unsigned int flags = SECTION_WRITE;
2156 switch (categorize_decl_for_section (decl, reloc))
2161 case SECCAT_DATA_REL:
2162 sname = ".ldata.rel";
2164 case SECCAT_DATA_REL_LOCAL:
2165 sname = ".ldata.rel.local";
2167 case SECCAT_DATA_REL_RO:
2168 sname = ".ldata.rel.ro";
2170 case SECCAT_DATA_REL_RO_LOCAL:
2171 sname = ".ldata.rel.ro.local";
2175 flags |= SECTION_BSS;
2178 case SECCAT_RODATA_MERGE_STR:
2179 case SECCAT_RODATA_MERGE_STR_INIT:
2180 case SECCAT_RODATA_MERGE_CONST:
2184 case SECCAT_SRODATA:
2191 /* We don't split these for medium model. Place them into
2192 default sections and hope for best. */
2197 /* We might get called with string constants, but get_named_section
2198 doesn't like them as they are not DECLs. Also, we need to set
2199 flags in that case. */
2201 return get_section (sname, flags, NULL);
2202 return get_named_section (decl, sname, reloc);
2205 return default_elf_select_section (decl, reloc, align);
2208 /* Build up a unique section name, expressed as a
2209 STRING_CST node, and assign it to DECL_SECTION_NAME (decl).
2210 RELOC indicates whether the initial value of EXP requires
2211 link-time relocations. */
2214 x86_64_elf_unique_section (tree decl, int reloc)
2216 if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC)
2217 && ix86_in_large_data_p (decl))
2219 const char *prefix = NULL;
2220 /* We only need to use .gnu.linkonce if we don't have COMDAT groups. */
2221 bool one_only = DECL_ONE_ONLY (decl) && !HAVE_COMDAT_GROUP;
2223 switch (categorize_decl_for_section (decl, reloc))
2226 case SECCAT_DATA_REL:
2227 case SECCAT_DATA_REL_LOCAL:
2228 case SECCAT_DATA_REL_RO:
2229 case SECCAT_DATA_REL_RO_LOCAL:
2230 prefix = one_only ? ".gnu.linkonce.ld." : ".ldata.";
2233 prefix = one_only ? ".gnu.linkonce.lb." : ".lbss.";
2236 case SECCAT_RODATA_MERGE_STR:
2237 case SECCAT_RODATA_MERGE_STR_INIT:
2238 case SECCAT_RODATA_MERGE_CONST:
2239 prefix = one_only ? ".gnu.linkonce.lr." : ".lrodata.";
2241 case SECCAT_SRODATA:
2248 /* We don't split these for medium model. Place them into
2249 default sections and hope for best. */
2257 plen = strlen (prefix);
2259 name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl));
2260 name = targetm.strip_name_encoding (name);
2261 nlen = strlen (name);
2263 string = alloca (nlen + plen + 1);
2264 memcpy (string, prefix, plen);
2265 memcpy (string + plen, name, nlen + 1);
2267 DECL_SECTION_NAME (decl) = build_string (nlen + plen, string);
2271 default_unique_section (decl, reloc);
2274 #ifdef COMMON_ASM_OP
2275 /* This says how to output assembler code to declare an
2276 uninitialized external linkage data object.
2278 For medium model x86-64 we need to use .largecomm opcode for
2281 x86_elf_aligned_common (FILE *file,
2282 const char *name, unsigned HOST_WIDE_INT size,
2285 if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC)
2286 && size > (unsigned int)ix86_section_threshold)
2287 fprintf (file, ".largecomm\t");
2289 fprintf (file, "%s", COMMON_ASM_OP);
2290 assemble_name (file, name);
2291 fprintf (file, ","HOST_WIDE_INT_PRINT_UNSIGNED",%u\n",
2292 size, align / BITS_PER_UNIT);
2295 /* Utility function for targets to use in implementing
2296 ASM_OUTPUT_ALIGNED_BSS. */
2299 x86_output_aligned_bss (FILE *file, tree decl ATTRIBUTE_UNUSED,
2300 const char *name, unsigned HOST_WIDE_INT size,
2303 if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC)
2304 && size > (unsigned int)ix86_section_threshold)
2305 switch_to_section (get_named_section (decl, ".lbss", 0));
2307 switch_to_section (bss_section);
2308 ASM_OUTPUT_ALIGN (file, floor_log2 (align / BITS_PER_UNIT));
2309 #ifdef ASM_DECLARE_OBJECT_NAME
2310 last_assemble_variable_decl = decl;
2311 ASM_DECLARE_OBJECT_NAME (file, name, decl);
2313 /* Standard thing is just output label for the object. */
2314 ASM_OUTPUT_LABEL (file, name);
2315 #endif /* ASM_DECLARE_OBJECT_NAME */
2316 ASM_OUTPUT_SKIP (file, size ? size : 1);
2321 optimization_options (int level, int size ATTRIBUTE_UNUSED)
2323 /* For -O2 and beyond, turn off -fschedule-insns by default. It tends to
2324 make the problem with not enough registers even worse. */
2325 #ifdef INSN_SCHEDULING
2327 flag_schedule_insns = 0;
2331 /* The Darwin libraries never set errno, so we might as well
2332 avoid calling them when that's the only reason we would. */
2333 flag_errno_math = 0;
2335 /* The default values of these switches depend on the TARGET_64BIT
2336 that is not known at this moment. Mark these values with 2 and
2337 let user the to override these. In case there is no command line option
2338 specifying them, we will set the defaults in override_options. */
2340 flag_omit_frame_pointer = 2;
2341 flag_pcc_struct_return = 2;
2342 flag_asynchronous_unwind_tables = 2;
2343 #ifdef SUBTARGET_OPTIMIZATION_OPTIONS
2344 SUBTARGET_OPTIMIZATION_OPTIONS;
2348 /* Table of valid machine attributes. */
2349 const struct attribute_spec ix86_attribute_table[] =
2351 /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler } */
2352 /* Stdcall attribute says callee is responsible for popping arguments
2353 if they are not variable. */
2354 { "stdcall", 0, 0, false, true, true, ix86_handle_cconv_attribute },
2355 /* Fastcall attribute says callee is responsible for popping arguments
2356 if they are not variable. */
2357 { "fastcall", 0, 0, false, true, true, ix86_handle_cconv_attribute },
2358 /* Cdecl attribute says the callee is a normal C declaration */
2359 { "cdecl", 0, 0, false, true, true, ix86_handle_cconv_attribute },
2360 /* Regparm attribute specifies how many integer arguments are to be
2361 passed in registers. */
2362 { "regparm", 1, 1, false, true, true, ix86_handle_cconv_attribute },
2363 /* Sseregparm attribute says we are using x86_64 calling conventions
2364 for FP arguments. */
2365 { "sseregparm", 0, 0, false, true, true, ix86_handle_cconv_attribute },
2366 /* force_align_arg_pointer says this function realigns the stack at entry. */
2367 { (const char *)&ix86_force_align_arg_pointer_string, 0, 0,
2368 false, true, true, ix86_handle_cconv_attribute },
2369 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
2370 { "dllimport", 0, 0, false, false, false, handle_dll_attribute },
2371 { "dllexport", 0, 0, false, false, false, handle_dll_attribute },
2372 { "shared", 0, 0, true, false, false, ix86_handle_shared_attribute },
2374 { "ms_struct", 0, 0, false, false, false, ix86_handle_struct_attribute },
2375 { "gcc_struct", 0, 0, false, false, false, ix86_handle_struct_attribute },
2376 #ifdef SUBTARGET_ATTRIBUTE_TABLE
2377 SUBTARGET_ATTRIBUTE_TABLE,
2379 { NULL, 0, 0, false, false, false, NULL }
2382 /* Decide whether we can make a sibling call to a function. DECL is the
2383 declaration of the function being targeted by the call and EXP is the
2384 CALL_EXPR representing the call. */
2387 ix86_function_ok_for_sibcall (tree decl, tree exp)
2392 /* If we are generating position-independent code, we cannot sibcall
2393 optimize any indirect call, or a direct call to a global function,
2394 as the PLT requires %ebx be live. */
2395 if (!TARGET_64BIT && flag_pic && (!decl || !targetm.binds_local_p (decl)))
2402 func = TREE_TYPE (TREE_OPERAND (exp, 0));
2403 if (POINTER_TYPE_P (func))
2404 func = TREE_TYPE (func);
2407 /* Check that the return value locations are the same. Like
2408 if we are returning floats on the 80387 register stack, we cannot
2409 make a sibcall from a function that doesn't return a float to a
2410 function that does or, conversely, from a function that does return
2411 a float to a function that doesn't; the necessary stack adjustment
2412 would not be executed. This is also the place we notice
2413 differences in the return value ABI. Note that it is ok for one
2414 of the functions to have void return type as long as the return
2415 value of the other is passed in a register. */
2416 a = ix86_function_value (TREE_TYPE (exp), func, false);
2417 b = ix86_function_value (TREE_TYPE (DECL_RESULT (cfun->decl)),
2419 if (STACK_REG_P (a) || STACK_REG_P (b))
2421 if (!rtx_equal_p (a, b))
2424 else if (VOID_TYPE_P (TREE_TYPE (DECL_RESULT (cfun->decl))))
2426 else if (!rtx_equal_p (a, b))
2429 /* If this call is indirect, we'll need to be able to use a call-clobbered
2430 register for the address of the target function. Make sure that all
2431 such registers are not used for passing parameters. */
2432 if (!decl && !TARGET_64BIT)
2436 /* We're looking at the CALL_EXPR, we need the type of the function. */
2437 type = TREE_OPERAND (exp, 0); /* pointer expression */
2438 type = TREE_TYPE (type); /* pointer type */
2439 type = TREE_TYPE (type); /* function type */
2441 if (ix86_function_regparm (type, NULL) >= 3)
2443 /* ??? Need to count the actual number of registers to be used,
2444 not the possible number of registers. Fix later. */
2449 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
2450 /* Dllimport'd functions are also called indirectly. */
2451 if (decl && DECL_DLLIMPORT_P (decl)
2452 && ix86_function_regparm (TREE_TYPE (decl), NULL) >= 3)
2456 /* If we forced aligned the stack, then sibcalling would unalign the
2457 stack, which may break the called function. */
2458 if (cfun->machine->force_align_arg_pointer)
2461 /* Otherwise okay. That also includes certain types of indirect calls. */
2465 /* Handle "cdecl", "stdcall", "fastcall", "regparm" and "sseregparm"
2466 calling convention attributes;
2467 arguments as in struct attribute_spec.handler. */
2470 ix86_handle_cconv_attribute (tree *node, tree name,
2472 int flags ATTRIBUTE_UNUSED,
2475 if (TREE_CODE (*node) != FUNCTION_TYPE
2476 && TREE_CODE (*node) != METHOD_TYPE
2477 && TREE_CODE (*node) != FIELD_DECL
2478 && TREE_CODE (*node) != TYPE_DECL)
2480 warning (OPT_Wattributes, "%qs attribute only applies to functions",
2481 IDENTIFIER_POINTER (name));
2482 *no_add_attrs = true;
2486 /* Can combine regparm with all attributes but fastcall. */
2487 if (is_attribute_p ("regparm", name))
2491 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
2493 error ("fastcall and regparm attributes are not compatible");
2496 cst = TREE_VALUE (args);
2497 if (TREE_CODE (cst) != INTEGER_CST)
2499 warning (OPT_Wattributes,
2500 "%qs attribute requires an integer constant argument",
2501 IDENTIFIER_POINTER (name));
2502 *no_add_attrs = true;
2504 else if (compare_tree_int (cst, REGPARM_MAX) > 0)
2506 warning (OPT_Wattributes, "argument to %qs attribute larger than %d",
2507 IDENTIFIER_POINTER (name), REGPARM_MAX);
2508 *no_add_attrs = true;
2512 && lookup_attribute (ix86_force_align_arg_pointer_string,
2513 TYPE_ATTRIBUTES (*node))
2514 && compare_tree_int (cst, REGPARM_MAX-1))
2516 error ("%s functions limited to %d register parameters",
2517 ix86_force_align_arg_pointer_string, REGPARM_MAX-1);
2525 warning (OPT_Wattributes, "%qs attribute ignored",
2526 IDENTIFIER_POINTER (name));
2527 *no_add_attrs = true;
2531 /* Can combine fastcall with stdcall (redundant) and sseregparm. */
2532 if (is_attribute_p ("fastcall", name))
2534 if (lookup_attribute ("cdecl", TYPE_ATTRIBUTES (*node)))
2536 error ("fastcall and cdecl attributes are not compatible");
2538 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (*node)))
2540 error ("fastcall and stdcall attributes are not compatible");
2542 if (lookup_attribute ("regparm", TYPE_ATTRIBUTES (*node)))
2544 error ("fastcall and regparm attributes are not compatible");
2548 /* Can combine stdcall with fastcall (redundant), regparm and
2550 else if (is_attribute_p ("stdcall", name))
2552 if (lookup_attribute ("cdecl", TYPE_ATTRIBUTES (*node)))
2554 error ("stdcall and cdecl attributes are not compatible");
2556 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
2558 error ("stdcall and fastcall attributes are not compatible");
2562 /* Can combine cdecl with regparm and sseregparm. */
2563 else if (is_attribute_p ("cdecl", name))
2565 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (*node)))
2567 error ("stdcall and cdecl attributes are not compatible");
2569 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
2571 error ("fastcall and cdecl attributes are not compatible");
2575 /* Can combine sseregparm with all attributes. */
2580 /* Return 0 if the attributes for two types are incompatible, 1 if they
2581 are compatible, and 2 if they are nearly compatible (which causes a
2582 warning to be generated). */
2585 ix86_comp_type_attributes (tree type1, tree type2)
2587 /* Check for mismatch of non-default calling convention. */
2588 const char *const rtdstr = TARGET_RTD ? "cdecl" : "stdcall";
2590 if (TREE_CODE (type1) != FUNCTION_TYPE)
2593 /* Check for mismatched fastcall/regparm types. */
2594 if ((!lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type1))
2595 != !lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type2)))
2596 || (ix86_function_regparm (type1, NULL)
2597 != ix86_function_regparm (type2, NULL)))
2600 /* Check for mismatched sseregparm types. */
2601 if (!lookup_attribute ("sseregparm", TYPE_ATTRIBUTES (type1))
2602 != !lookup_attribute ("sseregparm", TYPE_ATTRIBUTES (type2)))
2605 /* Check for mismatched return types (cdecl vs stdcall). */
2606 if (!lookup_attribute (rtdstr, TYPE_ATTRIBUTES (type1))
2607 != !lookup_attribute (rtdstr, TYPE_ATTRIBUTES (type2)))
2613 /* Return the regparm value for a function with the indicated TYPE and DECL.
2614 DECL may be NULL when calling function indirectly
2615 or considering a libcall. */
2618 ix86_function_regparm (tree type, tree decl)
2621 int regparm = ix86_regparm;
2622 bool user_convention = false;
2626 attr = lookup_attribute ("regparm", TYPE_ATTRIBUTES (type));
2629 regparm = TREE_INT_CST_LOW (TREE_VALUE (TREE_VALUE (attr)));
2630 user_convention = true;
2633 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type)))
2636 user_convention = true;
2639 /* Use register calling convention for local functions when possible. */
2640 if (!TARGET_64BIT && !user_convention && decl
2641 && flag_unit_at_a_time && !profile_flag)
2643 struct cgraph_local_info *i = cgraph_local_info (decl);
2646 int local_regparm, globals = 0, regno;
2648 /* Make sure no regparm register is taken by a global register
2650 for (local_regparm = 0; local_regparm < 3; local_regparm++)
2651 if (global_regs[local_regparm])
2653 /* We can't use regparm(3) for nested functions as these use
2654 static chain pointer in third argument. */
2655 if (local_regparm == 3
2656 && decl_function_context (decl)
2657 && !DECL_NO_STATIC_CHAIN (decl))
2659 /* If the function realigns its stackpointer, the
2660 prologue will clobber %ecx. If we've already
2661 generated code for the callee, the callee
2662 DECL_STRUCT_FUNCTION is gone, so we fall back to
2663 scanning the attributes for the self-realigning
2665 if ((DECL_STRUCT_FUNCTION (decl)
2666 && DECL_STRUCT_FUNCTION (decl)->machine->force_align_arg_pointer)
2667 || (!DECL_STRUCT_FUNCTION (decl)
2668 && lookup_attribute (ix86_force_align_arg_pointer_string,
2669 TYPE_ATTRIBUTES (TREE_TYPE (decl)))))
2671 /* Each global register variable increases register preassure,
2672 so the more global reg vars there are, the smaller regparm
2673 optimization use, unless requested by the user explicitly. */
2674 for (regno = 0; regno < 6; regno++)
2675 if (global_regs[regno])
2678 = globals < local_regparm ? local_regparm - globals : 0;
2680 if (local_regparm > regparm)
2681 regparm = local_regparm;
2688 /* Return 1 or 2, if we can pass up to SSE_REGPARM_MAX SFmode (1) and
2689 DFmode (2) arguments in SSE registers for a function with the
2690 indicated TYPE and DECL. DECL may be NULL when calling function
2691 indirectly or considering a libcall. Otherwise return 0. */
2694 ix86_function_sseregparm (tree type, tree decl)
2696 /* Use SSE registers to pass SFmode and DFmode arguments if requested
2697 by the sseregparm attribute. */
2698 if (TARGET_SSEREGPARM
2700 && lookup_attribute ("sseregparm", TYPE_ATTRIBUTES (type))))
2705 error ("Calling %qD with attribute sseregparm without "
2706 "SSE/SSE2 enabled", decl);
2708 error ("Calling %qT with attribute sseregparm without "
2709 "SSE/SSE2 enabled", type);
2716 /* For local functions, pass up to SSE_REGPARM_MAX SFmode
2717 (and DFmode for SSE2) arguments in SSE registers,
2718 even for 32-bit targets. */
2719 if (!TARGET_64BIT && decl
2720 && TARGET_SSE_MATH && flag_unit_at_a_time && !profile_flag)
2722 struct cgraph_local_info *i = cgraph_local_info (decl);
2724 return TARGET_SSE2 ? 2 : 1;
2730 /* Return true if EAX is live at the start of the function. Used by
2731 ix86_expand_prologue to determine if we need special help before
2732 calling allocate_stack_worker. */
2735 ix86_eax_live_at_start_p (void)
2737 /* Cheat. Don't bother working forward from ix86_function_regparm
2738 to the function type to whether an actual argument is located in
2739 eax. Instead just look at cfg info, which is still close enough
2740 to correct at this point. This gives false positives for broken
2741 functions that might use uninitialized data that happens to be
2742 allocated in eax, but who cares? */
2743 return REGNO_REG_SET_P (ENTRY_BLOCK_PTR->il.rtl->global_live_at_end, 0);
2746 /* Value is the number of bytes of arguments automatically
2747 popped when returning from a subroutine call.
2748 FUNDECL is the declaration node of the function (as a tree),
2749 FUNTYPE is the data type of the function (as a tree),
2750 or for a library call it is an identifier node for the subroutine name.
2751 SIZE is the number of bytes of arguments passed on the stack.
2753 On the 80386, the RTD insn may be used to pop them if the number
2754 of args is fixed, but if the number is variable then the caller
2755 must pop them all. RTD can't be used for library calls now
2756 because the library is compiled with the Unix compiler.
2757 Use of RTD is a selectable option, since it is incompatible with
2758 standard Unix calling sequences. If the option is not selected,
2759 the caller must always pop the args.
2761 The attribute stdcall is equivalent to RTD on a per module basis. */
2764 ix86_return_pops_args (tree fundecl, tree funtype, int size)
2766 int rtd = TARGET_RTD && (!fundecl || TREE_CODE (fundecl) != IDENTIFIER_NODE);
2768 /* Cdecl functions override -mrtd, and never pop the stack. */
2769 if (! lookup_attribute ("cdecl", TYPE_ATTRIBUTES (funtype))) {
2771 /* Stdcall and fastcall functions will pop the stack if not
2773 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (funtype))
2774 || lookup_attribute ("fastcall", TYPE_ATTRIBUTES (funtype)))
2778 && (TYPE_ARG_TYPES (funtype) == NULL_TREE
2779 || (TREE_VALUE (tree_last (TYPE_ARG_TYPES (funtype)))
2780 == void_type_node)))
2784 /* Lose any fake structure return argument if it is passed on the stack. */
2785 if (aggregate_value_p (TREE_TYPE (funtype), fundecl)
2787 && !KEEP_AGGREGATE_RETURN_POINTER)
2789 int nregs = ix86_function_regparm (funtype, fundecl);
2792 return GET_MODE_SIZE (Pmode);
2798 /* Argument support functions. */
2800 /* Return true when register may be used to pass function parameters. */
2802 ix86_function_arg_regno_p (int regno)
2808 return (regno < REGPARM_MAX
2809 || (TARGET_SSE && SSE_REGNO_P (regno) && !fixed_regs[regno]));
2811 return (regno < REGPARM_MAX
2812 || (TARGET_MMX && MMX_REGNO_P (regno)
2813 && (regno < FIRST_MMX_REG + MMX_REGPARM_MAX))
2814 || (TARGET_SSE && SSE_REGNO_P (regno)
2815 && (regno < FIRST_SSE_REG + SSE_REGPARM_MAX)));
2820 if (SSE_REGNO_P (regno) && TARGET_SSE)
2825 if (TARGET_SSE && SSE_REGNO_P (regno)
2826 && (regno < FIRST_SSE_REG + SSE_REGPARM_MAX))
2829 /* RAX is used as hidden argument to va_arg functions. */
2832 for (i = 0; i < REGPARM_MAX; i++)
2833 if (regno == x86_64_int_parameter_registers[i])
2838 /* Return if we do not know how to pass TYPE solely in registers. */
2841 ix86_must_pass_in_stack (enum machine_mode mode, tree type)
2843 if (must_pass_in_stack_var_size_or_pad (mode, type))
2846 /* For 32-bit, we want TImode aggregates to go on the stack. But watch out!
2847 The layout_type routine is crafty and tries to trick us into passing
2848 currently unsupported vector types on the stack by using TImode. */
2849 return (!TARGET_64BIT && mode == TImode
2850 && type && TREE_CODE (type) != VECTOR_TYPE);
2853 /* Initialize a variable CUM of type CUMULATIVE_ARGS
2854 for a call to a function whose data type is FNTYPE.
2855 For a library call, FNTYPE is 0. */
2858 init_cumulative_args (CUMULATIVE_ARGS *cum, /* Argument info to initialize */
2859 tree fntype, /* tree ptr for function decl */
2860 rtx libname, /* SYMBOL_REF of library name or 0 */
2863 static CUMULATIVE_ARGS zero_cum;
2864 tree param, next_param;
2866 if (TARGET_DEBUG_ARG)
2868 fprintf (stderr, "\ninit_cumulative_args (");
2870 fprintf (stderr, "fntype code = %s, ret code = %s",
2871 tree_code_name[(int) TREE_CODE (fntype)],
2872 tree_code_name[(int) TREE_CODE (TREE_TYPE (fntype))]);
2874 fprintf (stderr, "no fntype");
2877 fprintf (stderr, ", libname = %s", XSTR (libname, 0));
2882 /* Set up the number of registers to use for passing arguments. */
2883 cum->nregs = ix86_regparm;
2885 cum->sse_nregs = SSE_REGPARM_MAX;
2887 cum->mmx_nregs = MMX_REGPARM_MAX;
2888 cum->warn_sse = true;
2889 cum->warn_mmx = true;
2890 cum->maybe_vaarg = false;
2892 /* Use ecx and edx registers if function has fastcall attribute,
2893 else look for regparm information. */
2894 if (fntype && !TARGET_64BIT)
2896 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (fntype)))
2902 cum->nregs = ix86_function_regparm (fntype, fndecl);
2905 /* Set up the number of SSE registers used for passing SFmode
2906 and DFmode arguments. Warn for mismatching ABI. */
2907 cum->float_in_sse = ix86_function_sseregparm (fntype, fndecl);
2909 /* Determine if this function has variable arguments. This is
2910 indicated by the last argument being 'void_type_mode' if there
2911 are no variable arguments. If there are variable arguments, then
2912 we won't pass anything in registers in 32-bit mode. */
2914 if (cum->nregs || cum->mmx_nregs || cum->sse_nregs)
2916 for (param = (fntype) ? TYPE_ARG_TYPES (fntype) : 0;
2917 param != 0; param = next_param)
2919 next_param = TREE_CHAIN (param);
2920 if (next_param == 0 && TREE_VALUE (param) != void_type_node)
2930 cum->float_in_sse = 0;
2932 cum->maybe_vaarg = true;
2936 if ((!fntype && !libname)
2937 || (fntype && !TYPE_ARG_TYPES (fntype)))
2938 cum->maybe_vaarg = true;
2940 if (TARGET_DEBUG_ARG)
2941 fprintf (stderr, ", nregs=%d )\n", cum->nregs);
2946 /* Return the "natural" mode for TYPE. In most cases, this is just TYPE_MODE.
2947 But in the case of vector types, it is some vector mode.
2949 When we have only some of our vector isa extensions enabled, then there
2950 are some modes for which vector_mode_supported_p is false. For these
2951 modes, the generic vector support in gcc will choose some non-vector mode
2952 in order to implement the type. By computing the natural mode, we'll
2953 select the proper ABI location for the operand and not depend on whatever
2954 the middle-end decides to do with these vector types. */
2956 static enum machine_mode
2957 type_natural_mode (tree type)
2959 enum machine_mode mode = TYPE_MODE (type);
2961 if (TREE_CODE (type) == VECTOR_TYPE && !VECTOR_MODE_P (mode))
2963 HOST_WIDE_INT size = int_size_in_bytes (type);
2964 if ((size == 8 || size == 16)
2965 /* ??? Generic code allows us to create width 1 vectors. Ignore. */
2966 && TYPE_VECTOR_SUBPARTS (type) > 1)
2968 enum machine_mode innermode = TYPE_MODE (TREE_TYPE (type));
2970 if (TREE_CODE (TREE_TYPE (type)) == REAL_TYPE)
2971 mode = MIN_MODE_VECTOR_FLOAT;
2973 mode = MIN_MODE_VECTOR_INT;
2975 /* Get the mode which has this inner mode and number of units. */
2976 for (; mode != VOIDmode; mode = GET_MODE_WIDER_MODE (mode))
2977 if (GET_MODE_NUNITS (mode) == TYPE_VECTOR_SUBPARTS (type)
2978 && GET_MODE_INNER (mode) == innermode)
2988 /* We want to pass a value in REGNO whose "natural" mode is MODE. However,
2989 this may not agree with the mode that the type system has chosen for the
2990 register, which is ORIG_MODE. If ORIG_MODE is not BLKmode, then we can
2991 go ahead and use it. Otherwise we have to build a PARALLEL instead. */
2994 gen_reg_or_parallel (enum machine_mode mode, enum machine_mode orig_mode,
2999 if (orig_mode != BLKmode)
3000 tmp = gen_rtx_REG (orig_mode, regno);
3003 tmp = gen_rtx_REG (mode, regno);
3004 tmp = gen_rtx_EXPR_LIST (VOIDmode, tmp, const0_rtx);
3005 tmp = gen_rtx_PARALLEL (orig_mode, gen_rtvec (1, tmp));
3011 /* x86-64 register passing implementation. See x86-64 ABI for details. Goal
3012 of this code is to classify each 8bytes of incoming argument by the register
3013 class and assign registers accordingly. */
3015 /* Return the union class of CLASS1 and CLASS2.
3016 See the x86-64 PS ABI for details. */
3018 static enum x86_64_reg_class
3019 merge_classes (enum x86_64_reg_class class1, enum x86_64_reg_class class2)
3021 /* Rule #1: If both classes are equal, this is the resulting class. */
3022 if (class1 == class2)
3025 /* Rule #2: If one of the classes is NO_CLASS, the resulting class is
3027 if (class1 == X86_64_NO_CLASS)
3029 if (class2 == X86_64_NO_CLASS)
3032 /* Rule #3: If one of the classes is MEMORY, the result is MEMORY. */
3033 if (class1 == X86_64_MEMORY_CLASS || class2 == X86_64_MEMORY_CLASS)
3034 return X86_64_MEMORY_CLASS;
3036 /* Rule #4: If one of the classes is INTEGER, the result is INTEGER. */
3037 if ((class1 == X86_64_INTEGERSI_CLASS && class2 == X86_64_SSESF_CLASS)
3038 || (class2 == X86_64_INTEGERSI_CLASS && class1 == X86_64_SSESF_CLASS))
3039 return X86_64_INTEGERSI_CLASS;
3040 if (class1 == X86_64_INTEGER_CLASS || class1 == X86_64_INTEGERSI_CLASS
3041 || class2 == X86_64_INTEGER_CLASS || class2 == X86_64_INTEGERSI_CLASS)
3042 return X86_64_INTEGER_CLASS;
3044 /* Rule #5: If one of the classes is X87, X87UP, or COMPLEX_X87 class,
3046 if (class1 == X86_64_X87_CLASS
3047 || class1 == X86_64_X87UP_CLASS
3048 || class1 == X86_64_COMPLEX_X87_CLASS
3049 || class2 == X86_64_X87_CLASS
3050 || class2 == X86_64_X87UP_CLASS
3051 || class2 == X86_64_COMPLEX_X87_CLASS)
3052 return X86_64_MEMORY_CLASS;
3054 /* Rule #6: Otherwise class SSE is used. */
3055 return X86_64_SSE_CLASS;
3058 /* Classify the argument of type TYPE and mode MODE.
3059 CLASSES will be filled by the register class used to pass each word
3060 of the operand. The number of words is returned. In case the parameter
3061 should be passed in memory, 0 is returned. As a special case for zero
3062 sized containers, classes[0] will be NO_CLASS and 1 is returned.
3064 BIT_OFFSET is used internally for handling records and specifies offset
3065 of the offset in bits modulo 256 to avoid overflow cases.
3067 See the x86-64 PS ABI for details.
3071 classify_argument (enum machine_mode mode, tree type,
3072 enum x86_64_reg_class classes[MAX_CLASSES], int bit_offset)
3074 HOST_WIDE_INT bytes =
3075 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
3076 int words = (bytes + (bit_offset % 64) / 8 + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
3078 /* Variable sized entities are always passed/returned in memory. */
3082 if (mode != VOIDmode
3083 && targetm.calls.must_pass_in_stack (mode, type))
3086 if (type && AGGREGATE_TYPE_P (type))
3090 enum x86_64_reg_class subclasses[MAX_CLASSES];
3092 /* On x86-64 we pass structures larger than 16 bytes on the stack. */
3096 for (i = 0; i < words; i++)
3097 classes[i] = X86_64_NO_CLASS;
3099 /* Zero sized arrays or structures are NO_CLASS. We return 0 to
3100 signalize memory class, so handle it as special case. */
3103 classes[0] = X86_64_NO_CLASS;
3107 /* Classify each field of record and merge classes. */
3108 switch (TREE_CODE (type))
3111 /* For classes first merge in the field of the subclasses. */
3112 if (TYPE_BINFO (type))
3114 tree binfo, base_binfo;
3117 for (binfo = TYPE_BINFO (type), basenum = 0;
3118 BINFO_BASE_ITERATE (binfo, basenum, base_binfo); basenum++)
3121 int offset = tree_low_cst (BINFO_OFFSET (base_binfo), 0) * 8;
3122 tree type = BINFO_TYPE (base_binfo);
3124 num = classify_argument (TYPE_MODE (type),
3126 (offset + bit_offset) % 256);
3129 for (i = 0; i < num; i++)
3131 int pos = (offset + (bit_offset % 64)) / 8 / 8;
3133 merge_classes (subclasses[i], classes[i + pos]);
3137 /* And now merge the fields of structure. */
3138 for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
3140 if (TREE_CODE (field) == FIELD_DECL)
3144 if (TREE_TYPE (field) == error_mark_node)
3147 /* Bitfields are always classified as integer. Handle them
3148 early, since later code would consider them to be
3149 misaligned integers. */
3150 if (DECL_BIT_FIELD (field))
3152 for (i = (int_bit_position (field) + (bit_offset % 64)) / 8 / 8;
3153 i < ((int_bit_position (field) + (bit_offset % 64))
3154 + tree_low_cst (DECL_SIZE (field), 0)
3157 merge_classes (X86_64_INTEGER_CLASS,
3162 num = classify_argument (TYPE_MODE (TREE_TYPE (field)),
3163 TREE_TYPE (field), subclasses,
3164 (int_bit_position (field)
3165 + bit_offset) % 256);
3168 for (i = 0; i < num; i++)
3171 (int_bit_position (field) + (bit_offset % 64)) / 8 / 8;
3173 merge_classes (subclasses[i], classes[i + pos]);
3181 /* Arrays are handled as small records. */
3184 num = classify_argument (TYPE_MODE (TREE_TYPE (type)),
3185 TREE_TYPE (type), subclasses, bit_offset);
3189 /* The partial classes are now full classes. */
3190 if (subclasses[0] == X86_64_SSESF_CLASS && bytes != 4)
3191 subclasses[0] = X86_64_SSE_CLASS;
3192 if (subclasses[0] == X86_64_INTEGERSI_CLASS && bytes != 4)
3193 subclasses[0] = X86_64_INTEGER_CLASS;
3195 for (i = 0; i < words; i++)
3196 classes[i] = subclasses[i % num];
3201 case QUAL_UNION_TYPE:
3202 /* Unions are similar to RECORD_TYPE but offset is always 0.
3205 /* Unions are not derived. */
3206 gcc_assert (!TYPE_BINFO (type)
3207 || !BINFO_N_BASE_BINFOS (TYPE_BINFO (type)));
3208 for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
3210 if (TREE_CODE (field) == FIELD_DECL)
3214 if (TREE_TYPE (field) == error_mark_node)
3217 num = classify_argument (TYPE_MODE (TREE_TYPE (field)),
3218 TREE_TYPE (field), subclasses,
3222 for (i = 0; i < num; i++)
3223 classes[i] = merge_classes (subclasses[i], classes[i]);
3232 /* Final merger cleanup. */
3233 for (i = 0; i < words; i++)
3235 /* If one class is MEMORY, everything should be passed in
3237 if (classes[i] == X86_64_MEMORY_CLASS)
3240 /* The X86_64_SSEUP_CLASS should be always preceded by
3241 X86_64_SSE_CLASS. */
3242 if (classes[i] == X86_64_SSEUP_CLASS
3243 && (i == 0 || classes[i - 1] != X86_64_SSE_CLASS))
3244 classes[i] = X86_64_SSE_CLASS;
3246 /* X86_64_X87UP_CLASS should be preceded by X86_64_X87_CLASS. */
3247 if (classes[i] == X86_64_X87UP_CLASS
3248 && (i == 0 || classes[i - 1] != X86_64_X87_CLASS))
3249 classes[i] = X86_64_SSE_CLASS;
3254 /* Compute alignment needed. We align all types to natural boundaries with
3255 exception of XFmode that is aligned to 64bits. */
3256 if (mode != VOIDmode && mode != BLKmode)
3258 int mode_alignment = GET_MODE_BITSIZE (mode);
3261 mode_alignment = 128;
3262 else if (mode == XCmode)
3263 mode_alignment = 256;
3264 if (COMPLEX_MODE_P (mode))
3265 mode_alignment /= 2;
3266 /* Misaligned fields are always returned in memory. */
3267 if (bit_offset % mode_alignment)
3271 /* for V1xx modes, just use the base mode */
3272 if (VECTOR_MODE_P (mode)
3273 && GET_MODE_SIZE (GET_MODE_INNER (mode)) == bytes)
3274 mode = GET_MODE_INNER (mode);
3276 /* Classification of atomic types. */
3281 classes[0] = X86_64_SSE_CLASS;
3284 classes[0] = X86_64_SSE_CLASS;
3285 classes[1] = X86_64_SSEUP_CLASS;
3294 if (bit_offset + GET_MODE_BITSIZE (mode) <= 32)
3295 classes[0] = X86_64_INTEGERSI_CLASS;
3297 classes[0] = X86_64_INTEGER_CLASS;
3301 classes[0] = classes[1] = X86_64_INTEGER_CLASS;
3306 if (!(bit_offset % 64))
3307 classes[0] = X86_64_SSESF_CLASS;
3309 classes[0] = X86_64_SSE_CLASS;
3312 classes[0] = X86_64_SSEDF_CLASS;
3315 classes[0] = X86_64_X87_CLASS;
3316 classes[1] = X86_64_X87UP_CLASS;
3319 classes[0] = X86_64_SSE_CLASS;
3320 classes[1] = X86_64_SSEUP_CLASS;
3323 classes[0] = X86_64_SSE_CLASS;
3326 classes[0] = X86_64_SSEDF_CLASS;
3327 classes[1] = X86_64_SSEDF_CLASS;
3330 classes[0] = X86_64_COMPLEX_X87_CLASS;
3333 /* This modes is larger than 16 bytes. */
3341 classes[0] = X86_64_SSE_CLASS;
3342 classes[1] = X86_64_SSEUP_CLASS;
3348 classes[0] = X86_64_SSE_CLASS;
3354 gcc_assert (VECTOR_MODE_P (mode));
3359 gcc_assert (GET_MODE_CLASS (GET_MODE_INNER (mode)) == MODE_INT);
3361 if (bit_offset + GET_MODE_BITSIZE (mode) <= 32)
3362 classes[0] = X86_64_INTEGERSI_CLASS;
3364 classes[0] = X86_64_INTEGER_CLASS;
3365 classes[1] = X86_64_INTEGER_CLASS;
3366 return 1 + (bytes > 8);
3370 /* Examine the argument and return set number of register required in each
3371 class. Return 0 iff parameter should be passed in memory. */
3373 examine_argument (enum machine_mode mode, tree type, int in_return,
3374 int *int_nregs, int *sse_nregs)
3376 enum x86_64_reg_class class[MAX_CLASSES];
3377 int n = classify_argument (mode, type, class, 0);
3383 for (n--; n >= 0; n--)
3386 case X86_64_INTEGER_CLASS:
3387 case X86_64_INTEGERSI_CLASS:
3390 case X86_64_SSE_CLASS:
3391 case X86_64_SSESF_CLASS:
3392 case X86_64_SSEDF_CLASS:
3395 case X86_64_NO_CLASS:
3396 case X86_64_SSEUP_CLASS:
3398 case X86_64_X87_CLASS:
3399 case X86_64_X87UP_CLASS:
3403 case X86_64_COMPLEX_X87_CLASS:
3404 return in_return ? 2 : 0;
3405 case X86_64_MEMORY_CLASS:
3411 /* Construct container for the argument used by GCC interface. See
3412 FUNCTION_ARG for the detailed description. */
3415 construct_container (enum machine_mode mode, enum machine_mode orig_mode,
3416 tree type, int in_return, int nintregs, int nsseregs,
3417 const int *intreg, int sse_regno)
3419 /* The following variables hold the static issued_error state. */
3420 static bool issued_sse_arg_error;
3421 static bool issued_sse_ret_error;
3422 static bool issued_x87_ret_error;
3424 enum machine_mode tmpmode;
3426 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
3427 enum x86_64_reg_class class[MAX_CLASSES];
3431 int needed_sseregs, needed_intregs;
3432 rtx exp[MAX_CLASSES];
3435 n = classify_argument (mode, type, class, 0);
3436 if (TARGET_DEBUG_ARG)
3439 fprintf (stderr, "Memory class\n");
3442 fprintf (stderr, "Classes:");
3443 for (i = 0; i < n; i++)
3445 fprintf (stderr, " %s", x86_64_reg_class_name[class[i]]);
3447 fprintf (stderr, "\n");
3452 if (!examine_argument (mode, type, in_return, &needed_intregs,
3455 if (needed_intregs > nintregs || needed_sseregs > nsseregs)
3458 /* We allowed the user to turn off SSE for kernel mode. Don't crash if
3459 some less clueful developer tries to use floating-point anyway. */
3460 if (needed_sseregs && !TARGET_SSE)
3464 if (!issued_sse_ret_error)
3466 error ("SSE register return with SSE disabled");
3467 issued_sse_ret_error = true;
3470 else if (!issued_sse_arg_error)
3472 error ("SSE register argument with SSE disabled");
3473 issued_sse_arg_error = true;
3478 /* Likewise, error if the ABI requires us to return values in the
3479 x87 registers and the user specified -mno-80387. */
3480 if (!TARGET_80387 && in_return)
3481 for (i = 0; i < n; i++)
3482 if (class[i] == X86_64_X87_CLASS
3483 || class[i] == X86_64_X87UP_CLASS
3484 || class[i] == X86_64_COMPLEX_X87_CLASS)
3486 if (!issued_x87_ret_error)
3488 error ("x87 register return with x87 disabled");
3489 issued_x87_ret_error = true;
3494 /* First construct simple cases. Avoid SCmode, since we want to use
3495 single register to pass this type. */
3496 if (n == 1 && mode != SCmode)
3499 case X86_64_INTEGER_CLASS:
3500 case X86_64_INTEGERSI_CLASS:
3501 return gen_rtx_REG (mode, intreg[0]);
3502 case X86_64_SSE_CLASS:
3503 case X86_64_SSESF_CLASS:
3504 case X86_64_SSEDF_CLASS:
3505 return gen_reg_or_parallel (mode, orig_mode, SSE_REGNO (sse_regno));
3506 case X86_64_X87_CLASS:
3507 case X86_64_COMPLEX_X87_CLASS:
3508 return gen_rtx_REG (mode, FIRST_STACK_REG);
3509 case X86_64_NO_CLASS:
3510 /* Zero sized array, struct or class. */
3515 if (n == 2 && class[0] == X86_64_SSE_CLASS && class[1] == X86_64_SSEUP_CLASS
3517 return gen_rtx_REG (mode, SSE_REGNO (sse_regno));
3519 && class[0] == X86_64_X87_CLASS && class[1] == X86_64_X87UP_CLASS)
3520 return gen_rtx_REG (XFmode, FIRST_STACK_REG);
3521 if (n == 2 && class[0] == X86_64_INTEGER_CLASS
3522 && class[1] == X86_64_INTEGER_CLASS
3523 && (mode == CDImode || mode == TImode || mode == TFmode)
3524 && intreg[0] + 1 == intreg[1])
3525 return gen_rtx_REG (mode, intreg[0]);
3527 /* Otherwise figure out the entries of the PARALLEL. */
3528 for (i = 0; i < n; i++)
3532 case X86_64_NO_CLASS:
3534 case X86_64_INTEGER_CLASS:
3535 case X86_64_INTEGERSI_CLASS:
3536 /* Merge TImodes on aligned occasions here too. */
3537 if (i * 8 + 8 > bytes)
3538 tmpmode = mode_for_size ((bytes - i * 8) * BITS_PER_UNIT, MODE_INT, 0);
3539 else if (class[i] == X86_64_INTEGERSI_CLASS)
3543 /* We've requested 24 bytes we don't have mode for. Use DImode. */
3544 if (tmpmode == BLKmode)
3546 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
3547 gen_rtx_REG (tmpmode, *intreg),
3551 case X86_64_SSESF_CLASS:
3552 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
3553 gen_rtx_REG (SFmode,
3554 SSE_REGNO (sse_regno)),
3558 case X86_64_SSEDF_CLASS:
3559 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
3560 gen_rtx_REG (DFmode,
3561 SSE_REGNO (sse_regno)),
3565 case X86_64_SSE_CLASS:
3566 if (i < n - 1 && class[i + 1] == X86_64_SSEUP_CLASS)
3570 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
3571 gen_rtx_REG (tmpmode,
3572 SSE_REGNO (sse_regno)),
3574 if (tmpmode == TImode)
3583 /* Empty aligned struct, union or class. */
3587 ret = gen_rtx_PARALLEL (mode, rtvec_alloc (nexps));
3588 for (i = 0; i < nexps; i++)
3589 XVECEXP (ret, 0, i) = exp [i];
3593 /* Update the data in CUM to advance over an argument
3594 of mode MODE and data type TYPE.
3595 (TYPE is null for libcalls where that information may not be available.) */
3598 function_arg_advance (CUMULATIVE_ARGS *cum, enum machine_mode mode,
3599 tree type, int named)
3602 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
3603 int words = (bytes + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
3606 mode = type_natural_mode (type);
3608 if (TARGET_DEBUG_ARG)
3609 fprintf (stderr, "function_adv (sz=%d, wds=%2d, nregs=%d, ssenregs=%d, "
3610 "mode=%s, named=%d)\n\n",
3611 words, cum->words, cum->nregs, cum->sse_nregs,
3612 GET_MODE_NAME (mode), named);
3616 int int_nregs, sse_nregs;
3617 if (!examine_argument (mode, type, 0, &int_nregs, &sse_nregs))
3618 cum->words += words;
3619 else if (sse_nregs <= cum->sse_nregs && int_nregs <= cum->nregs)
3621 cum->nregs -= int_nregs;
3622 cum->sse_nregs -= sse_nregs;
3623 cum->regno += int_nregs;
3624 cum->sse_regno += sse_nregs;
3627 cum->words += words;
3645 cum->words += words;
3646 cum->nregs -= words;
3647 cum->regno += words;
3649 if (cum->nregs <= 0)
3657 if (cum->float_in_sse < 2)
3660 if (cum->float_in_sse < 1)
3671 if (!type || !AGGREGATE_TYPE_P (type))
3673 cum->sse_words += words;
3674 cum->sse_nregs -= 1;
3675 cum->sse_regno += 1;
3676 if (cum->sse_nregs <= 0)
3688 if (!type || !AGGREGATE_TYPE_P (type))
3690 cum->mmx_words += words;
3691 cum->mmx_nregs -= 1;
3692 cum->mmx_regno += 1;
3693 if (cum->mmx_nregs <= 0)
3704 /* Define where to put the arguments to a function.
3705 Value is zero to push the argument on the stack,
3706 or a hard register in which to store the argument.
3708 MODE is the argument's machine mode.
3709 TYPE is the data type of the argument (as a tree).
3710 This is null for libcalls where that information may
3712 CUM is a variable of type CUMULATIVE_ARGS which gives info about
3713 the preceding args and about the function being called.
3714 NAMED is nonzero if this argument is a named parameter
3715 (otherwise it is an extra parameter matching an ellipsis). */
3718 function_arg (CUMULATIVE_ARGS *cum, enum machine_mode orig_mode,
3719 tree type, int named)
3721 enum machine_mode mode = orig_mode;
3724 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
3725 int words = (bytes + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
3726 static bool warnedsse, warnedmmx;
3728 /* To simplify the code below, represent vector types with a vector mode
3729 even if MMX/SSE are not active. */
3730 if (type && TREE_CODE (type) == VECTOR_TYPE)
3731 mode = type_natural_mode (type);
3733 /* Handle a hidden AL argument containing number of registers for varargs
3734 x86-64 functions. For i386 ABI just return constm1_rtx to avoid
3736 if (mode == VOIDmode)
3739 return GEN_INT (cum->maybe_vaarg
3740 ? (cum->sse_nregs < 0
3748 ret = construct_container (mode, orig_mode, type, 0, cum->nregs,
3750 &x86_64_int_parameter_registers [cum->regno],
3755 /* For now, pass fp/complex values on the stack. */
3767 if (words <= cum->nregs)
3769 int regno = cum->regno;
3771 /* Fastcall allocates the first two DWORD (SImode) or
3772 smaller arguments to ECX and EDX. */
3775 if (mode == BLKmode || mode == DImode)
3778 /* ECX not EAX is the first allocated register. */
3782 ret = gen_rtx_REG (mode, regno);
3786 if (cum->float_in_sse < 2)
3789 if (cum->float_in_sse < 1)
3799 if (!type || !AGGREGATE_TYPE_P (type))
3801 if (!TARGET_SSE && !warnedsse && cum->warn_sse)
3804 warning (0, "SSE vector argument without SSE enabled "
3808 ret = gen_reg_or_parallel (mode, orig_mode,
3809 cum->sse_regno + FIRST_SSE_REG);
3816 if (!type || !AGGREGATE_TYPE_P (type))
3818 if (!TARGET_MMX && !warnedmmx && cum->warn_mmx)
3821 warning (0, "MMX vector argument without MMX enabled "
3825 ret = gen_reg_or_parallel (mode, orig_mode,
3826 cum->mmx_regno + FIRST_MMX_REG);
3831 if (TARGET_DEBUG_ARG)
3834 "function_arg (size=%d, wds=%2d, nregs=%d, mode=%4s, named=%d, ",
3835 words, cum->words, cum->nregs, GET_MODE_NAME (mode), named);
3838 print_simple_rtl (stderr, ret);
3840 fprintf (stderr, ", stack");
3842 fprintf (stderr, " )\n");
3848 /* A C expression that indicates when an argument must be passed by
3849 reference. If nonzero for an argument, a copy of that argument is
3850 made in memory and a pointer to the argument is passed instead of
3851 the argument itself. The pointer is passed in whatever way is
3852 appropriate for passing a pointer to that type. */
3855 ix86_pass_by_reference (CUMULATIVE_ARGS *cum ATTRIBUTE_UNUSED,
3856 enum machine_mode mode ATTRIBUTE_UNUSED,
3857 tree type, bool named ATTRIBUTE_UNUSED)
3862 if (type && int_size_in_bytes (type) == -1)
3864 if (TARGET_DEBUG_ARG)
3865 fprintf (stderr, "function_arg_pass_by_reference\n");
3872 /* Return true when TYPE should be 128bit aligned for 32bit argument passing
3873 ABI. Only called if TARGET_SSE. */
3875 contains_128bit_aligned_vector_p (tree type)
3877 enum machine_mode mode = TYPE_MODE (type);
3878 if (SSE_REG_MODE_P (mode)
3879 && (!TYPE_USER_ALIGN (type) || TYPE_ALIGN (type) > 128))
3881 if (TYPE_ALIGN (type) < 128)
3884 if (AGGREGATE_TYPE_P (type))
3886 /* Walk the aggregates recursively. */
3887 switch (TREE_CODE (type))
3891 case QUAL_UNION_TYPE:
3895 if (TYPE_BINFO (type))
3897 tree binfo, base_binfo;
3900 for (binfo = TYPE_BINFO (type), i = 0;
3901 BINFO_BASE_ITERATE (binfo, i, base_binfo); i++)
3902 if (contains_128bit_aligned_vector_p
3903 (BINFO_TYPE (base_binfo)))
3906 /* And now merge the fields of structure. */
3907 for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
3909 if (TREE_CODE (field) == FIELD_DECL
3910 && contains_128bit_aligned_vector_p (TREE_TYPE (field)))
3917 /* Just for use if some languages passes arrays by value. */
3918 if (contains_128bit_aligned_vector_p (TREE_TYPE (type)))
3929 /* Gives the alignment boundary, in bits, of an argument with the
3930 specified mode and type. */
3933 ix86_function_arg_boundary (enum machine_mode mode, tree type)
3937 align = TYPE_ALIGN (type);
3939 align = GET_MODE_ALIGNMENT (mode);
3940 if (align < PARM_BOUNDARY)
3941 align = PARM_BOUNDARY;
3944 /* i386 ABI defines all arguments to be 4 byte aligned. We have to
3945 make an exception for SSE modes since these require 128bit
3948 The handling here differs from field_alignment. ICC aligns MMX
3949 arguments to 4 byte boundaries, while structure fields are aligned
3950 to 8 byte boundaries. */
3952 align = PARM_BOUNDARY;
3955 if (!SSE_REG_MODE_P (mode))
3956 align = PARM_BOUNDARY;
3960 if (!contains_128bit_aligned_vector_p (type))
3961 align = PARM_BOUNDARY;
3969 /* Return true if N is a possible register number of function value. */
3971 ix86_function_value_regno_p (int regno)
3977 return ((regno) == 0
3978 || ((regno) == FIRST_FLOAT_REG && TARGET_FLOAT_RETURNS_IN_80387)
3979 || ((regno) == FIRST_SSE_REG && TARGET_SSE));
3981 return ((regno) == 0 || (regno) == FIRST_FLOAT_REG
3982 || ((regno) == FIRST_SSE_REG && TARGET_SSE)
3983 || ((regno) == FIRST_FLOAT_REG && TARGET_FLOAT_RETURNS_IN_80387));
3988 || (regno == FIRST_FLOAT_REG && TARGET_FLOAT_RETURNS_IN_80387)
3989 || (regno == FIRST_SSE_REG && TARGET_SSE))
3993 && (regno == FIRST_MMX_REG && TARGET_MMX))
4000 /* Define how to find the value returned by a function.
4001 VALTYPE is the data type of the value (as a tree).
4002 If the precise function being called is known, FUNC is its FUNCTION_DECL;
4003 otherwise, FUNC is 0. */
4005 ix86_function_value (tree valtype, tree fntype_or_decl,
4006 bool outgoing ATTRIBUTE_UNUSED)
4008 enum machine_mode natmode = type_natural_mode (valtype);
4012 rtx ret = construct_container (natmode, TYPE_MODE (valtype), valtype,
4013 1, REGPARM_MAX, SSE_REGPARM_MAX,
4014 x86_64_int_return_registers, 0);
4015 /* For zero sized structures, construct_container return NULL, but we
4016 need to keep rest of compiler happy by returning meaningful value. */
4018 ret = gen_rtx_REG (TYPE_MODE (valtype), 0);
4023 tree fn = NULL_TREE, fntype;
4025 && DECL_P (fntype_or_decl))
4026 fn = fntype_or_decl;
4027 fntype = fn ? TREE_TYPE (fn) : fntype_or_decl;
4028 return gen_rtx_REG (TYPE_MODE (valtype),
4029 ix86_value_regno (natmode, fn, fntype));
4033 /* Return true iff type is returned in memory. */
4035 ix86_return_in_memory (tree type)
4037 int needed_intregs, needed_sseregs, size;
4038 enum machine_mode mode = type_natural_mode (type);
4041 return !examine_argument (mode, type, 1, &needed_intregs, &needed_sseregs);
4043 if (mode == BLKmode)
4046 size = int_size_in_bytes (type);
4048 if (MS_AGGREGATE_RETURN && AGGREGATE_TYPE_P (type) && size <= 8)
4051 if (VECTOR_MODE_P (mode) || mode == TImode)
4053 /* User-created vectors small enough to fit in EAX. */
4057 /* MMX/3dNow values are returned in MM0,
4058 except when it doesn't exits. */
4060 return (TARGET_MMX ? 0 : 1);
4062 /* SSE values are returned in XMM0, except when it doesn't exist. */
4064 return (TARGET_SSE ? 0 : 1);
4078 /* When returning SSE vector types, we have a choice of either
4079 (1) being abi incompatible with a -march switch, or
4080 (2) generating an error.
4081 Given no good solution, I think the safest thing is one warning.
4082 The user won't be able to use -Werror, but....
4084 Choose the STRUCT_VALUE_RTX hook because that's (at present) only
4085 called in response to actually generating a caller or callee that
4086 uses such a type. As opposed to RETURN_IN_MEMORY, which is called
4087 via aggregate_value_p for general type probing from tree-ssa. */
4090 ix86_struct_value_rtx (tree type, int incoming ATTRIBUTE_UNUSED)
4092 static bool warnedsse, warnedmmx;
4096 /* Look at the return type of the function, not the function type. */
4097 enum machine_mode mode = TYPE_MODE (TREE_TYPE (type));
4099 if (!TARGET_SSE && !warnedsse)
4102 || (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 16))
4105 warning (0, "SSE vector return without SSE enabled "
4110 if (!TARGET_MMX && !warnedmmx)
4112 if (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 8)
4115 warning (0, "MMX vector return without MMX enabled "
4124 /* Define how to find the value returned by a library function
4125 assuming the value has mode MODE. */
4127 ix86_libcall_value (enum machine_mode mode)
4141 return gen_rtx_REG (mode, FIRST_SSE_REG);
4144 return gen_rtx_REG (mode, FIRST_FLOAT_REG);
4148 return gen_rtx_REG (mode, 0);
4152 return gen_rtx_REG (mode, ix86_value_regno (mode, NULL, NULL));
4155 /* Given a mode, return the register to use for a return value. */
4158 ix86_value_regno (enum machine_mode mode, tree func, tree fntype)
4160 gcc_assert (!TARGET_64BIT);
4162 /* 8-byte vector modes in %mm0. See ix86_return_in_memory for where
4163 we normally prevent this case when mmx is not available. However
4164 some ABIs may require the result to be returned like DImode. */
4165 if (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 8)
4166 return TARGET_MMX ? FIRST_MMX_REG : 0;
4168 /* 16-byte vector modes in %xmm0. See ix86_return_in_memory for where
4169 we prevent this case when sse is not available. However some ABIs
4170 may require the result to be returned like integer TImode. */
4171 if (mode == TImode || (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 16))
4172 return TARGET_SSE ? FIRST_SSE_REG : 0;
4174 /* Decimal floating point values can go in %eax, unlike other float modes. */
4175 if (DECIMAL_FLOAT_MODE_P (mode))
4178 /* Most things go in %eax, except (unless -mno-fp-ret-in-387) fp values. */
4179 if (!SCALAR_FLOAT_MODE_P (mode) || !TARGET_FLOAT_RETURNS_IN_80387)
4182 /* Floating point return values in %st(0), except for local functions when
4183 SSE math is enabled or for functions with sseregparm attribute. */
4184 if ((func || fntype)
4185 && (mode == SFmode || mode == DFmode))
4187 int sse_level = ix86_function_sseregparm (fntype, func);
4188 if ((sse_level >= 1 && mode == SFmode)
4189 || (sse_level == 2 && mode == DFmode))
4190 return FIRST_SSE_REG;
4193 return FIRST_FLOAT_REG;
4196 /* Create the va_list data type. */
4199 ix86_build_builtin_va_list (void)
4201 tree f_gpr, f_fpr, f_ovf, f_sav, record, type_decl;
4203 /* For i386 we use plain pointer to argument area. */
4205 return build_pointer_type (char_type_node);
4207 record = (*lang_hooks.types.make_type) (RECORD_TYPE);
4208 type_decl = build_decl (TYPE_DECL, get_identifier ("__va_list_tag"), record);
4210 f_gpr = build_decl (FIELD_DECL, get_identifier ("gp_offset"),
4211 unsigned_type_node);
4212 f_fpr = build_decl (FIELD_DECL, get_identifier ("fp_offset"),
4213 unsigned_type_node);
4214 f_ovf = build_decl (FIELD_DECL, get_identifier ("overflow_arg_area"),
4216 f_sav = build_decl (FIELD_DECL, get_identifier ("reg_save_area"),
4219 va_list_gpr_counter_field = f_gpr;
4220 va_list_fpr_counter_field = f_fpr;
4222 DECL_FIELD_CONTEXT (f_gpr) = record;
4223 DECL_FIELD_CONTEXT (f_fpr) = record;
4224 DECL_FIELD_CONTEXT (f_ovf) = record;
4225 DECL_FIELD_CONTEXT (f_sav) = record;
4227 TREE_CHAIN (record) = type_decl;
4228 TYPE_NAME (record) = type_decl;
4229 TYPE_FIELDS (record) = f_gpr;
4230 TREE_CHAIN (f_gpr) = f_fpr;
4231 TREE_CHAIN (f_fpr) = f_ovf;
4232 TREE_CHAIN (f_ovf) = f_sav;
4234 layout_type (record);
4236 /* The correct type is an array type of one element. */
4237 return build_array_type (record, build_index_type (size_zero_node));
4240 /* Worker function for TARGET_SETUP_INCOMING_VARARGS. */
4243 ix86_setup_incoming_varargs (CUMULATIVE_ARGS *cum, enum machine_mode mode,
4244 tree type, int *pretend_size ATTRIBUTE_UNUSED,
4247 CUMULATIVE_ARGS next_cum;
4248 rtx save_area = NULL_RTX, mem;
4261 if (! cfun->va_list_gpr_size && ! cfun->va_list_fpr_size)
4264 /* Indicate to allocate space on the stack for varargs save area. */
4265 ix86_save_varrargs_registers = 1;
4267 cfun->stack_alignment_needed = 128;
4269 fntype = TREE_TYPE (current_function_decl);
4270 stdarg_p = (TYPE_ARG_TYPES (fntype) != 0
4271 && (TREE_VALUE (tree_last (TYPE_ARG_TYPES (fntype)))
4272 != void_type_node));
4274 /* For varargs, we do not want to skip the dummy va_dcl argument.
4275 For stdargs, we do want to skip the last named argument. */
4278 function_arg_advance (&next_cum, mode, type, 1);
4281 save_area = frame_pointer_rtx;
4283 set = get_varargs_alias_set ();
4285 for (i = next_cum.regno;
4287 && i < next_cum.regno + cfun->va_list_gpr_size / UNITS_PER_WORD;
4290 mem = gen_rtx_MEM (Pmode,
4291 plus_constant (save_area, i * UNITS_PER_WORD));
4292 MEM_NOTRAP_P (mem) = 1;
4293 set_mem_alias_set (mem, set);
4294 emit_move_insn (mem, gen_rtx_REG (Pmode,
4295 x86_64_int_parameter_registers[i]));
4298 if (next_cum.sse_nregs && cfun->va_list_fpr_size)
4300 /* Now emit code to save SSE registers. The AX parameter contains number
4301 of SSE parameter registers used to call this function. We use
4302 sse_prologue_save insn template that produces computed jump across
4303 SSE saves. We need some preparation work to get this working. */
4305 label = gen_label_rtx ();
4306 label_ref = gen_rtx_LABEL_REF (Pmode, label);
4308 /* Compute address to jump to :
4309 label - 5*eax + nnamed_sse_arguments*5 */
4310 tmp_reg = gen_reg_rtx (Pmode);
4311 nsse_reg = gen_reg_rtx (Pmode);
4312 emit_insn (gen_zero_extendqidi2 (nsse_reg, gen_rtx_REG (QImode, 0)));
4313 emit_insn (gen_rtx_SET (VOIDmode, tmp_reg,
4314 gen_rtx_MULT (Pmode, nsse_reg,
4316 if (next_cum.sse_regno)
4319 gen_rtx_CONST (DImode,
4320 gen_rtx_PLUS (DImode,
4322 GEN_INT (next_cum.sse_regno * 4))));
4324 emit_move_insn (nsse_reg, label_ref);
4325 emit_insn (gen_subdi3 (nsse_reg, nsse_reg, tmp_reg));
4327 /* Compute address of memory block we save into. We always use pointer
4328 pointing 127 bytes after first byte to store - this is needed to keep
4329 instruction size limited by 4 bytes. */
4330 tmp_reg = gen_reg_rtx (Pmode);
4331 emit_insn (gen_rtx_SET (VOIDmode, tmp_reg,
4332 plus_constant (save_area,
4333 8 * REGPARM_MAX + 127)));
4334 mem = gen_rtx_MEM (BLKmode, plus_constant (tmp_reg, -127));
4335 MEM_NOTRAP_P (mem) = 1;
4336 set_mem_alias_set (mem, set);
4337 set_mem_align (mem, BITS_PER_WORD);
4339 /* And finally do the dirty job! */
4340 emit_insn (gen_sse_prologue_save (mem, nsse_reg,
4341 GEN_INT (next_cum.sse_regno), label));
4346 /* Implement va_start. */
4349 ix86_va_start (tree valist, rtx nextarg)
4351 HOST_WIDE_INT words, n_gpr, n_fpr;
4352 tree f_gpr, f_fpr, f_ovf, f_sav;
4353 tree gpr, fpr, ovf, sav, t;
4356 /* Only 64bit target needs something special. */
4359 std_expand_builtin_va_start (valist, nextarg);
4363 f_gpr = TYPE_FIELDS (TREE_TYPE (va_list_type_node));
4364 f_fpr = TREE_CHAIN (f_gpr);
4365 f_ovf = TREE_CHAIN (f_fpr);
4366 f_sav = TREE_CHAIN (f_ovf);
4368 valist = build1 (INDIRECT_REF, TREE_TYPE (TREE_TYPE (valist)), valist);
4369 gpr = build3 (COMPONENT_REF, TREE_TYPE (f_gpr), valist, f_gpr, NULL_TREE);
4370 fpr = build3 (COMPONENT_REF, TREE_TYPE (f_fpr), valist, f_fpr, NULL_TREE);
4371 ovf = build3 (COMPONENT_REF, TREE_TYPE (f_ovf), valist, f_ovf, NULL_TREE);
4372 sav = build3 (COMPONENT_REF, TREE_TYPE (f_sav), valist, f_sav, NULL_TREE);
4374 /* Count number of gp and fp argument registers used. */
4375 words = current_function_args_info.words;
4376 n_gpr = current_function_args_info.regno;
4377 n_fpr = current_function_args_info.sse_regno;
4379 if (TARGET_DEBUG_ARG)
4380 fprintf (stderr, "va_start: words = %d, n_gpr = %d, n_fpr = %d\n",
4381 (int) words, (int) n_gpr, (int) n_fpr);
4383 if (cfun->va_list_gpr_size)
4385 type = TREE_TYPE (gpr);
4386 t = build2 (MODIFY_EXPR, type, gpr,
4387 build_int_cst (type, n_gpr * 8));
4388 TREE_SIDE_EFFECTS (t) = 1;
4389 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
4392 if (cfun->va_list_fpr_size)
4394 type = TREE_TYPE (fpr);
4395 t = build2 (MODIFY_EXPR, type, fpr,
4396 build_int_cst (type, n_fpr * 16 + 8*REGPARM_MAX));
4397 TREE_SIDE_EFFECTS (t) = 1;
4398 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
4401 /* Find the overflow area. */
4402 type = TREE_TYPE (ovf);
4403 t = make_tree (type, virtual_incoming_args_rtx);
4405 t = build2 (PLUS_EXPR, type, t,
4406 build_int_cst (type, words * UNITS_PER_WORD));
4407 t = build2 (MODIFY_EXPR, type, ovf, t);
4408 TREE_SIDE_EFFECTS (t) = 1;
4409 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
4411 if (cfun->va_list_gpr_size || cfun->va_list_fpr_size)
4413 /* Find the register save area.
4414 Prologue of the function save it right above stack frame. */
4415 type = TREE_TYPE (sav);
4416 t = make_tree (type, frame_pointer_rtx);
4417 t = build2 (MODIFY_EXPR, type, sav, t);
4418 TREE_SIDE_EFFECTS (t) = 1;
4419 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
4423 /* Implement va_arg. */
4426 ix86_gimplify_va_arg (tree valist, tree type, tree *pre_p, tree *post_p)
4428 static const int intreg[6] = { 0, 1, 2, 3, 4, 5 };
4429 tree f_gpr, f_fpr, f_ovf, f_sav;
4430 tree gpr, fpr, ovf, sav, t;
4432 tree lab_false, lab_over = NULL_TREE;
4437 enum machine_mode nat_mode;
4439 /* Only 64bit target needs something special. */
4441 return std_gimplify_va_arg_expr (valist, type, pre_p, post_p);
4443 f_gpr = TYPE_FIELDS (TREE_TYPE (va_list_type_node));
4444 f_fpr = TREE_CHAIN (f_gpr);
4445 f_ovf = TREE_CHAIN (f_fpr);
4446 f_sav = TREE_CHAIN (f_ovf);
4448 valist = build_va_arg_indirect_ref (valist);
4449 gpr = build3 (COMPONENT_REF, TREE_TYPE (f_gpr), valist, f_gpr, NULL_TREE);
4450 fpr = build3 (COMPONENT_REF, TREE_TYPE (f_fpr), valist, f_fpr, NULL_TREE);
4451 ovf = build3 (COMPONENT_REF, TREE_TYPE (f_ovf), valist, f_ovf, NULL_TREE);
4452 sav = build3 (COMPONENT_REF, TREE_TYPE (f_sav), valist, f_sav, NULL_TREE);
4454 indirect_p = pass_by_reference (NULL, TYPE_MODE (type), type, false);
4456 type = build_pointer_type (type);
4457 size = int_size_in_bytes (type);
4458 rsize = (size + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
4460 nat_mode = type_natural_mode (type);
4461 container = construct_container (nat_mode, TYPE_MODE (type), type, 0,
4462 REGPARM_MAX, SSE_REGPARM_MAX, intreg, 0);
4464 /* Pull the value out of the saved registers. */
4466 addr = create_tmp_var (ptr_type_node, "addr");
4467 DECL_POINTER_ALIAS_SET (addr) = get_varargs_alias_set ();
4471 int needed_intregs, needed_sseregs;
4473 tree int_addr, sse_addr;
4475 lab_false = create_artificial_label ();
4476 lab_over = create_artificial_label ();
4478 examine_argument (nat_mode, type, 0, &needed_intregs, &needed_sseregs);
4480 need_temp = (!REG_P (container)
4481 && ((needed_intregs && TYPE_ALIGN (type) > 64)
4482 || TYPE_ALIGN (type) > 128));
4484 /* In case we are passing structure, verify that it is consecutive block
4485 on the register save area. If not we need to do moves. */
4486 if (!need_temp && !REG_P (container))
4488 /* Verify that all registers are strictly consecutive */
4489 if (SSE_REGNO_P (REGNO (XEXP (XVECEXP (container, 0, 0), 0))))
4493 for (i = 0; i < XVECLEN (container, 0) && !need_temp; i++)
4495 rtx slot = XVECEXP (container, 0, i);
4496 if (REGNO (XEXP (slot, 0)) != FIRST_SSE_REG + (unsigned int) i
4497 || INTVAL (XEXP (slot, 1)) != i * 16)
4505 for (i = 0; i < XVECLEN (container, 0) && !need_temp; i++)
4507 rtx slot = XVECEXP (container, 0, i);
4508 if (REGNO (XEXP (slot, 0)) != (unsigned int) i
4509 || INTVAL (XEXP (slot, 1)) != i * 8)
4521 int_addr = create_tmp_var (ptr_type_node, "int_addr");
4522 DECL_POINTER_ALIAS_SET (int_addr) = get_varargs_alias_set ();
4523 sse_addr = create_tmp_var (ptr_type_node, "sse_addr");
4524 DECL_POINTER_ALIAS_SET (sse_addr) = get_varargs_alias_set ();
4527 /* First ensure that we fit completely in registers. */
4530 t = build_int_cst (TREE_TYPE (gpr),
4531 (REGPARM_MAX - needed_intregs + 1) * 8);
4532 t = build2 (GE_EXPR, boolean_type_node, gpr, t);
4533 t2 = build1 (GOTO_EXPR, void_type_node, lab_false);
4534 t = build3 (COND_EXPR, void_type_node, t, t2, NULL_TREE);
4535 gimplify_and_add (t, pre_p);
4539 t = build_int_cst (TREE_TYPE (fpr),
4540 (SSE_REGPARM_MAX - needed_sseregs + 1) * 16
4542 t = build2 (GE_EXPR, boolean_type_node, fpr, t);
4543 t2 = build1 (GOTO_EXPR, void_type_node, lab_false);
4544 t = build3 (COND_EXPR, void_type_node, t, t2, NULL_TREE);
4545 gimplify_and_add (t, pre_p);
4548 /* Compute index to start of area used for integer regs. */
4551 /* int_addr = gpr + sav; */
4552 t = fold_convert (ptr_type_node, gpr);
4553 t = build2 (PLUS_EXPR, ptr_type_node, sav, t);
4554 t = build2 (MODIFY_EXPR, void_type_node, int_addr, t);
4555 gimplify_and_add (t, pre_p);
4559 /* sse_addr = fpr + sav; */
4560 t = fold_convert (ptr_type_node, fpr);
4561 t = build2 (PLUS_EXPR, ptr_type_node, sav, t);
4562 t = build2 (MODIFY_EXPR, void_type_node, sse_addr, t);
4563 gimplify_and_add (t, pre_p);
4568 tree temp = create_tmp_var (type, "va_arg_tmp");
4571 t = build1 (ADDR_EXPR, build_pointer_type (type), temp);
4572 t = build2 (MODIFY_EXPR, void_type_node, addr, t);
4573 gimplify_and_add (t, pre_p);
4575 for (i = 0; i < XVECLEN (container, 0); i++)
4577 rtx slot = XVECEXP (container, 0, i);
4578 rtx reg = XEXP (slot, 0);
4579 enum machine_mode mode = GET_MODE (reg);
4580 tree piece_type = lang_hooks.types.type_for_mode (mode, 1);
4581 tree addr_type = build_pointer_type (piece_type);
4584 tree dest_addr, dest;
4586 if (SSE_REGNO_P (REGNO (reg)))
4588 src_addr = sse_addr;
4589 src_offset = (REGNO (reg) - FIRST_SSE_REG) * 16;
4593 src_addr = int_addr;
4594 src_offset = REGNO (reg) * 8;
4596 src_addr = fold_convert (addr_type, src_addr);
4597 src_addr = fold (build2 (PLUS_EXPR, addr_type, src_addr,
4598 size_int (src_offset)));
4599 src = build_va_arg_indirect_ref (src_addr);
4601 dest_addr = fold_convert (addr_type, addr);
4602 dest_addr = fold (build2 (PLUS_EXPR, addr_type, dest_addr,
4603 size_int (INTVAL (XEXP (slot, 1)))));
4604 dest = build_va_arg_indirect_ref (dest_addr);
4606 t = build2 (MODIFY_EXPR, void_type_node, dest, src);
4607 gimplify_and_add (t, pre_p);
4613 t = build2 (PLUS_EXPR, TREE_TYPE (gpr), gpr,
4614 build_int_cst (TREE_TYPE (gpr), needed_intregs * 8));
4615 t = build2 (MODIFY_EXPR, TREE_TYPE (gpr), gpr, t);
4616 gimplify_and_add (t, pre_p);
4620 t = build2 (PLUS_EXPR, TREE_TYPE (fpr), fpr,
4621 build_int_cst (TREE_TYPE (fpr), needed_sseregs * 16));
4622 t = build2 (MODIFY_EXPR, TREE_TYPE (fpr), fpr, t);
4623 gimplify_and_add (t, pre_p);
4626 t = build1 (GOTO_EXPR, void_type_node, lab_over);
4627 gimplify_and_add (t, pre_p);
4629 t = build1 (LABEL_EXPR, void_type_node, lab_false);
4630 append_to_statement_list (t, pre_p);
4633 /* ... otherwise out of the overflow area. */
4635 /* Care for on-stack alignment if needed. */
4636 if (FUNCTION_ARG_BOUNDARY (VOIDmode, type) <= 64
4637 || integer_zerop (TYPE_SIZE (type)))
4641 HOST_WIDE_INT align = FUNCTION_ARG_BOUNDARY (VOIDmode, type) / 8;
4642 t = build2 (PLUS_EXPR, TREE_TYPE (ovf), ovf,
4643 build_int_cst (TREE_TYPE (ovf), align - 1));
4644 t = build2 (BIT_AND_EXPR, TREE_TYPE (t), t,
4645 build_int_cst (TREE_TYPE (t), -align));
4647 gimplify_expr (&t, pre_p, NULL, is_gimple_val, fb_rvalue);
4649 t2 = build2 (MODIFY_EXPR, void_type_node, addr, t);
4650 gimplify_and_add (t2, pre_p);
4652 t = build2 (PLUS_EXPR, TREE_TYPE (t), t,
4653 build_int_cst (TREE_TYPE (t), rsize * UNITS_PER_WORD));
4654 t = build2 (MODIFY_EXPR, TREE_TYPE (ovf), ovf, t);
4655 gimplify_and_add (t, pre_p);
4659 t = build1 (LABEL_EXPR, void_type_node, lab_over);
4660 append_to_statement_list (t, pre_p);
4663 ptrtype = build_pointer_type (type);
4664 addr = fold_convert (ptrtype, addr);
4667 addr = build_va_arg_indirect_ref (addr);
4668 return build_va_arg_indirect_ref (addr);
4671 /* Return nonzero if OPNUM's MEM should be matched
4672 in movabs* patterns. */
4675 ix86_check_movabs (rtx insn, int opnum)
4679 set = PATTERN (insn);
4680 if (GET_CODE (set) == PARALLEL)
4681 set = XVECEXP (set, 0, 0);
4682 gcc_assert (GET_CODE (set) == SET);
4683 mem = XEXP (set, opnum);
4684 while (GET_CODE (mem) == SUBREG)
4685 mem = SUBREG_REG (mem);
4686 gcc_assert (GET_CODE (mem) == MEM);
4687 return (volatile_ok || !MEM_VOLATILE_P (mem));
4690 /* Initialize the table of extra 80387 mathematical constants. */
4693 init_ext_80387_constants (void)
4695 static const char * cst[5] =
4697 "0.3010299956639811952256464283594894482", /* 0: fldlg2 */
4698 "0.6931471805599453094286904741849753009", /* 1: fldln2 */
4699 "1.4426950408889634073876517827983434472", /* 2: fldl2e */
4700 "3.3219280948873623478083405569094566090", /* 3: fldl2t */
4701 "3.1415926535897932385128089594061862044", /* 4: fldpi */
4705 for (i = 0; i < 5; i++)
4707 real_from_string (&ext_80387_constants_table[i], cst[i]);
4708 /* Ensure each constant is rounded to XFmode precision. */
4709 real_convert (&ext_80387_constants_table[i],
4710 XFmode, &ext_80387_constants_table[i]);
4713 ext_80387_constants_init = 1;
4716 /* Return true if the constant is something that can be loaded with
4717 a special instruction. */
4720 standard_80387_constant_p (rtx x)
4722 if (GET_CODE (x) != CONST_DOUBLE || !FLOAT_MODE_P (GET_MODE (x)))
4725 if (x == CONST0_RTX (GET_MODE (x)))
4727 if (x == CONST1_RTX (GET_MODE (x)))
4730 /* For XFmode constants, try to find a special 80387 instruction when
4731 optimizing for size or on those CPUs that benefit from them. */
4732 if (GET_MODE (x) == XFmode
4733 && (optimize_size || x86_ext_80387_constants & TUNEMASK))
4738 if (! ext_80387_constants_init)
4739 init_ext_80387_constants ();
4741 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
4742 for (i = 0; i < 5; i++)
4743 if (real_identical (&r, &ext_80387_constants_table[i]))
4750 /* Return the opcode of the special instruction to be used to load
4754 standard_80387_constant_opcode (rtx x)
4756 switch (standard_80387_constant_p (x))
4777 /* Return the CONST_DOUBLE representing the 80387 constant that is
4778 loaded by the specified special instruction. The argument IDX
4779 matches the return value from standard_80387_constant_p. */
4782 standard_80387_constant_rtx (int idx)
4786 if (! ext_80387_constants_init)
4787 init_ext_80387_constants ();
4803 return CONST_DOUBLE_FROM_REAL_VALUE (ext_80387_constants_table[i],
4807 /* Return 1 if mode is a valid mode for sse. */
4809 standard_sse_mode_p (enum machine_mode mode)
4826 /* Return 1 if X is FP constant we can load to SSE register w/o using memory.
4829 standard_sse_constant_p (rtx x)
4831 enum machine_mode mode = GET_MODE (x);
4833 if (x == const0_rtx || x == CONST0_RTX (GET_MODE (x)))
4835 if (vector_all_ones_operand (x, mode)
4836 && standard_sse_mode_p (mode))
4837 return TARGET_SSE2 ? 2 : -1;
4842 /* Return the opcode of the special instruction to be used to load
4846 standard_sse_constant_opcode (rtx insn, rtx x)
4848 switch (standard_sse_constant_p (x))
4851 if (get_attr_mode (insn) == MODE_V4SF)
4852 return "xorps\t%0, %0";
4853 else if (get_attr_mode (insn) == MODE_V2DF)
4854 return "xorpd\t%0, %0";
4856 return "pxor\t%0, %0";
4858 return "pcmpeqd\t%0, %0";
4863 /* Returns 1 if OP contains a symbol reference */
4866 symbolic_reference_mentioned_p (rtx op)
4871 if (GET_CODE (op) == SYMBOL_REF || GET_CODE (op) == LABEL_REF)
4874 fmt = GET_RTX_FORMAT (GET_CODE (op));
4875 for (i = GET_RTX_LENGTH (GET_CODE (op)) - 1; i >= 0; i--)
4881 for (j = XVECLEN (op, i) - 1; j >= 0; j--)
4882 if (symbolic_reference_mentioned_p (XVECEXP (op, i, j)))
4886 else if (fmt[i] == 'e' && symbolic_reference_mentioned_p (XEXP (op, i)))
4893 /* Return 1 if it is appropriate to emit `ret' instructions in the
4894 body of a function. Do this only if the epilogue is simple, needing a
4895 couple of insns. Prior to reloading, we can't tell how many registers
4896 must be saved, so return 0 then. Return 0 if there is no frame
4897 marker to de-allocate. */
4900 ix86_can_use_return_insn_p (void)
4902 struct ix86_frame frame;
4904 if (! reload_completed || frame_pointer_needed)
4907 /* Don't allow more than 32 pop, since that's all we can do
4908 with one instruction. */
4909 if (current_function_pops_args
4910 && current_function_args_size >= 32768)
4913 ix86_compute_frame_layout (&frame);
4914 return frame.to_allocate == 0 && frame.nregs == 0;
4917 /* Value should be nonzero if functions must have frame pointers.
4918 Zero means the frame pointer need not be set up (and parms may
4919 be accessed via the stack pointer) in functions that seem suitable. */
4922 ix86_frame_pointer_required (void)
4924 /* If we accessed previous frames, then the generated code expects
4925 to be able to access the saved ebp value in our frame. */
4926 if (cfun->machine->accesses_prev_frame)
4929 /* Several x86 os'es need a frame pointer for other reasons,
4930 usually pertaining to setjmp. */
4931 if (SUBTARGET_FRAME_POINTER_REQUIRED)
4934 /* In override_options, TARGET_OMIT_LEAF_FRAME_POINTER turns off
4935 the frame pointer by default. Turn it back on now if we've not
4936 got a leaf function. */
4937 if (TARGET_OMIT_LEAF_FRAME_POINTER
4938 && (!current_function_is_leaf
4939 || ix86_current_function_calls_tls_descriptor))
4942 if (current_function_profile)
4948 /* Record that the current function accesses previous call frames. */
4951 ix86_setup_frame_addresses (void)
4953 cfun->machine->accesses_prev_frame = 1;
4956 #if (defined(HAVE_GAS_HIDDEN) && (SUPPORTS_ONE_ONLY - 0)) || TARGET_MACHO
4957 # define USE_HIDDEN_LINKONCE 1
4959 # define USE_HIDDEN_LINKONCE 0
4962 static int pic_labels_used;
4964 /* Fills in the label name that should be used for a pc thunk for
4965 the given register. */
4968 get_pc_thunk_name (char name[32], unsigned int regno)
4970 gcc_assert (!TARGET_64BIT);
4972 if (USE_HIDDEN_LINKONCE)
4973 sprintf (name, "__i686.get_pc_thunk.%s", reg_names[regno]);
4975 ASM_GENERATE_INTERNAL_LABEL (name, "LPR", regno);
4979 /* This function generates code for -fpic that loads %ebx with
4980 the return address of the caller and then returns. */
4983 ix86_file_end (void)
4988 for (regno = 0; regno < 8; ++regno)
4992 if (! ((pic_labels_used >> regno) & 1))
4995 get_pc_thunk_name (name, regno);
5000 switch_to_section (darwin_sections[text_coal_section]);
5001 fputs ("\t.weak_definition\t", asm_out_file);
5002 assemble_name (asm_out_file, name);
5003 fputs ("\n\t.private_extern\t", asm_out_file);
5004 assemble_name (asm_out_file, name);
5005 fputs ("\n", asm_out_file);
5006 ASM_OUTPUT_LABEL (asm_out_file, name);
5010 if (USE_HIDDEN_LINKONCE)
5014 decl = build_decl (FUNCTION_DECL, get_identifier (name),
5016 TREE_PUBLIC (decl) = 1;
5017 TREE_STATIC (decl) = 1;
5018 DECL_ONE_ONLY (decl) = 1;
5020 (*targetm.asm_out.unique_section) (decl, 0);
5021 switch_to_section (get_named_section (decl, NULL, 0));
5023 (*targetm.asm_out.globalize_label) (asm_out_file, name);
5024 fputs ("\t.hidden\t", asm_out_file);
5025 assemble_name (asm_out_file, name);
5026 fputc ('\n', asm_out_file);
5027 ASM_DECLARE_FUNCTION_NAME (asm_out_file, name, decl);
5031 switch_to_section (text_section);
5032 ASM_OUTPUT_LABEL (asm_out_file, name);
5035 xops[0] = gen_rtx_REG (SImode, regno);
5036 xops[1] = gen_rtx_MEM (SImode, stack_pointer_rtx);
5037 output_asm_insn ("mov{l}\t{%1, %0|%0, %1}", xops);
5038 output_asm_insn ("ret", xops);
5041 if (NEED_INDICATE_EXEC_STACK)
5042 file_end_indicate_exec_stack ();
5045 /* Emit code for the SET_GOT patterns. */
5048 output_set_got (rtx dest, rtx label ATTRIBUTE_UNUSED)
5053 xops[1] = gen_rtx_SYMBOL_REF (Pmode, GOT_SYMBOL_NAME);
5055 if (! TARGET_DEEP_BRANCH_PREDICTION || !flag_pic)
5057 xops[2] = gen_rtx_LABEL_REF (Pmode, label ? label : gen_label_rtx ());
5060 output_asm_insn ("mov{l}\t{%2, %0|%0, %2}", xops);
5062 output_asm_insn ("call\t%a2", xops);
5065 /* Output the Mach-O "canonical" label name ("Lxx$pb") here too. This
5066 is what will be referenced by the Mach-O PIC subsystem. */
5068 ASM_OUTPUT_LABEL (asm_out_file, machopic_function_base_name ());
5071 (*targetm.asm_out.internal_label) (asm_out_file, "L",
5072 CODE_LABEL_NUMBER (XEXP (xops[2], 0)));
5075 output_asm_insn ("pop{l}\t%0", xops);
5080 get_pc_thunk_name (name, REGNO (dest));
5081 pic_labels_used |= 1 << REGNO (dest);
5083 xops[2] = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (name));
5084 xops[2] = gen_rtx_MEM (QImode, xops[2]);
5085 output_asm_insn ("call\t%X2", xops);
5086 /* Output the Mach-O "canonical" label name ("Lxx$pb") here too. This
5087 is what will be referenced by the Mach-O PIC subsystem. */
5090 ASM_OUTPUT_LABEL (asm_out_file, machopic_function_base_name ());
5092 targetm.asm_out.internal_label (asm_out_file, "L",
5093 CODE_LABEL_NUMBER (label));
5100 if (!flag_pic || TARGET_DEEP_BRANCH_PREDICTION)
5101 output_asm_insn ("add{l}\t{%1, %0|%0, %1}", xops);
5103 output_asm_insn ("add{l}\t{%1+[.-%a2], %0|%0, %1+(.-%a2)}", xops);
5108 /* Generate an "push" pattern for input ARG. */
5113 return gen_rtx_SET (VOIDmode,
5115 gen_rtx_PRE_DEC (Pmode,
5116 stack_pointer_rtx)),
5120 /* Return >= 0 if there is an unused call-clobbered register available
5121 for the entire function. */
5124 ix86_select_alt_pic_regnum (void)
5126 if (current_function_is_leaf && !current_function_profile
5127 && !ix86_current_function_calls_tls_descriptor)
5130 for (i = 2; i >= 0; --i)
5131 if (!regs_ever_live[i])
5135 return INVALID_REGNUM;
5138 /* Return 1 if we need to save REGNO. */
5140 ix86_save_reg (unsigned int regno, int maybe_eh_return)
5142 if (pic_offset_table_rtx
5143 && regno == REAL_PIC_OFFSET_TABLE_REGNUM
5144 && (regs_ever_live[REAL_PIC_OFFSET_TABLE_REGNUM]
5145 || current_function_profile
5146 || current_function_calls_eh_return
5147 || current_function_uses_const_pool))
5149 if (ix86_select_alt_pic_regnum () != INVALID_REGNUM)
5154 if (current_function_calls_eh_return && maybe_eh_return)
5159 unsigned test = EH_RETURN_DATA_REGNO (i);
5160 if (test == INVALID_REGNUM)
5167 if (cfun->machine->force_align_arg_pointer
5168 && regno == REGNO (cfun->machine->force_align_arg_pointer))
5171 return (regs_ever_live[regno]
5172 && !call_used_regs[regno]
5173 && !fixed_regs[regno]
5174 && (regno != HARD_FRAME_POINTER_REGNUM || !frame_pointer_needed));
5177 /* Return number of registers to be saved on the stack. */
5180 ix86_nsaved_regs (void)
5185 for (regno = FIRST_PSEUDO_REGISTER - 1; regno >= 0; regno--)
5186 if (ix86_save_reg (regno, true))
5191 /* Return the offset between two registers, one to be eliminated, and the other
5192 its replacement, at the start of a routine. */
5195 ix86_initial_elimination_offset (int from, int to)
5197 struct ix86_frame frame;
5198 ix86_compute_frame_layout (&frame);
5200 if (from == ARG_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
5201 return frame.hard_frame_pointer_offset;
5202 else if (from == FRAME_POINTER_REGNUM
5203 && to == HARD_FRAME_POINTER_REGNUM)
5204 return frame.hard_frame_pointer_offset - frame.frame_pointer_offset;
5207 gcc_assert (to == STACK_POINTER_REGNUM);
5209 if (from == ARG_POINTER_REGNUM)
5210 return frame.stack_pointer_offset;
5212 gcc_assert (from == FRAME_POINTER_REGNUM);
5213 return frame.stack_pointer_offset - frame.frame_pointer_offset;
5217 /* Fill structure ix86_frame about frame of currently computed function. */
5220 ix86_compute_frame_layout (struct ix86_frame *frame)
5222 HOST_WIDE_INT total_size;
5223 unsigned int stack_alignment_needed;
5224 HOST_WIDE_INT offset;
5225 unsigned int preferred_alignment;
5226 HOST_WIDE_INT size = get_frame_size ();
5228 frame->nregs = ix86_nsaved_regs ();
5231 stack_alignment_needed = cfun->stack_alignment_needed / BITS_PER_UNIT;
5232 preferred_alignment = cfun->preferred_stack_boundary / BITS_PER_UNIT;
5234 /* During reload iteration the amount of registers saved can change.
5235 Recompute the value as needed. Do not recompute when amount of registers
5236 didn't change as reload does multiple calls to the function and does not
5237 expect the decision to change within single iteration. */
5239 && cfun->machine->use_fast_prologue_epilogue_nregs != frame->nregs)
5241 int count = frame->nregs;
5243 cfun->machine->use_fast_prologue_epilogue_nregs = count;
5244 /* The fast prologue uses move instead of push to save registers. This
5245 is significantly longer, but also executes faster as modern hardware
5246 can execute the moves in parallel, but can't do that for push/pop.
5248 Be careful about choosing what prologue to emit: When function takes
5249 many instructions to execute we may use slow version as well as in
5250 case function is known to be outside hot spot (this is known with
5251 feedback only). Weight the size of function by number of registers
5252 to save as it is cheap to use one or two push instructions but very
5253 slow to use many of them. */
5255 count = (count - 1) * FAST_PROLOGUE_INSN_COUNT;
5256 if (cfun->function_frequency < FUNCTION_FREQUENCY_NORMAL
5257 || (flag_branch_probabilities
5258 && cfun->function_frequency < FUNCTION_FREQUENCY_HOT))
5259 cfun->machine->use_fast_prologue_epilogue = false;
5261 cfun->machine->use_fast_prologue_epilogue
5262 = !expensive_function_p (count);
5264 if (TARGET_PROLOGUE_USING_MOVE
5265 && cfun->machine->use_fast_prologue_epilogue)
5266 frame->save_regs_using_mov = true;
5268 frame->save_regs_using_mov = false;
5271 /* Skip return address and saved base pointer. */
5272 offset = frame_pointer_needed ? UNITS_PER_WORD * 2 : UNITS_PER_WORD;
5274 frame->hard_frame_pointer_offset = offset;
5276 /* Do some sanity checking of stack_alignment_needed and
5277 preferred_alignment, since i386 port is the only using those features
5278 that may break easily. */
5280 gcc_assert (!size || stack_alignment_needed);
5281 gcc_assert (preferred_alignment >= STACK_BOUNDARY / BITS_PER_UNIT);
5282 gcc_assert (preferred_alignment <= PREFERRED_STACK_BOUNDARY / BITS_PER_UNIT);
5283 gcc_assert (stack_alignment_needed
5284 <= PREFERRED_STACK_BOUNDARY / BITS_PER_UNIT);
5286 if (stack_alignment_needed < STACK_BOUNDARY / BITS_PER_UNIT)
5287 stack_alignment_needed = STACK_BOUNDARY / BITS_PER_UNIT;
5289 /* Register save area */
5290 offset += frame->nregs * UNITS_PER_WORD;
5293 if (ix86_save_varrargs_registers)
5295 offset += X86_64_VARARGS_SIZE;
5296 frame->va_arg_size = X86_64_VARARGS_SIZE;
5299 frame->va_arg_size = 0;
5301 /* Align start of frame for local function. */
5302 frame->padding1 = ((offset + stack_alignment_needed - 1)
5303 & -stack_alignment_needed) - offset;
5305 offset += frame->padding1;
5307 /* Frame pointer points here. */
5308 frame->frame_pointer_offset = offset;
5312 /* Add outgoing arguments area. Can be skipped if we eliminated
5313 all the function calls as dead code.
5314 Skipping is however impossible when function calls alloca. Alloca
5315 expander assumes that last current_function_outgoing_args_size
5316 of stack frame are unused. */
5317 if (ACCUMULATE_OUTGOING_ARGS
5318 && (!current_function_is_leaf || current_function_calls_alloca
5319 || ix86_current_function_calls_tls_descriptor))
5321 offset += current_function_outgoing_args_size;
5322 frame->outgoing_arguments_size = current_function_outgoing_args_size;
5325 frame->outgoing_arguments_size = 0;
5327 /* Align stack boundary. Only needed if we're calling another function
5329 if (!current_function_is_leaf || current_function_calls_alloca
5330 || ix86_current_function_calls_tls_descriptor)
5331 frame->padding2 = ((offset + preferred_alignment - 1)
5332 & -preferred_alignment) - offset;
5334 frame->padding2 = 0;
5336 offset += frame->padding2;
5338 /* We've reached end of stack frame. */
5339 frame->stack_pointer_offset = offset;
5341 /* Size prologue needs to allocate. */
5342 frame->to_allocate =
5343 (size + frame->padding1 + frame->padding2
5344 + frame->outgoing_arguments_size + frame->va_arg_size);
5346 if ((!frame->to_allocate && frame->nregs <= 1)
5347 || (TARGET_64BIT && frame->to_allocate >= (HOST_WIDE_INT) 0x80000000))
5348 frame->save_regs_using_mov = false;
5350 if (TARGET_RED_ZONE && current_function_sp_is_unchanging
5351 && current_function_is_leaf
5352 && !ix86_current_function_calls_tls_descriptor)
5354 frame->red_zone_size = frame->to_allocate;
5355 if (frame->save_regs_using_mov)
5356 frame->red_zone_size += frame->nregs * UNITS_PER_WORD;
5357 if (frame->red_zone_size > RED_ZONE_SIZE - RED_ZONE_RESERVE)
5358 frame->red_zone_size = RED_ZONE_SIZE - RED_ZONE_RESERVE;
5361 frame->red_zone_size = 0;
5362 frame->to_allocate -= frame->red_zone_size;
5363 frame->stack_pointer_offset -= frame->red_zone_size;
5365 fprintf (stderr, "nregs: %i\n", frame->nregs);
5366 fprintf (stderr, "size: %i\n", size);
5367 fprintf (stderr, "alignment1: %i\n", stack_alignment_needed);
5368 fprintf (stderr, "padding1: %i\n", frame->padding1);
5369 fprintf (stderr, "va_arg: %i\n", frame->va_arg_size);
5370 fprintf (stderr, "padding2: %i\n", frame->padding2);
5371 fprintf (stderr, "to_allocate: %i\n", frame->to_allocate);
5372 fprintf (stderr, "red_zone_size: %i\n", frame->red_zone_size);
5373 fprintf (stderr, "frame_pointer_offset: %i\n", frame->frame_pointer_offset);
5374 fprintf (stderr, "hard_frame_pointer_offset: %i\n",
5375 frame->hard_frame_pointer_offset);
5376 fprintf (stderr, "stack_pointer_offset: %i\n", frame->stack_pointer_offset);
5380 /* Emit code to save registers in the prologue. */
5383 ix86_emit_save_regs (void)
5388 for (regno = FIRST_PSEUDO_REGISTER; regno-- > 0; )
5389 if (ix86_save_reg (regno, true))
5391 insn = emit_insn (gen_push (gen_rtx_REG (Pmode, regno)));
5392 RTX_FRAME_RELATED_P (insn) = 1;
5396 /* Emit code to save registers using MOV insns. First register
5397 is restored from POINTER + OFFSET. */
5399 ix86_emit_save_regs_using_mov (rtx pointer, HOST_WIDE_INT offset)
5404 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
5405 if (ix86_save_reg (regno, true))
5407 insn = emit_move_insn (adjust_address (gen_rtx_MEM (Pmode, pointer),
5409 gen_rtx_REG (Pmode, regno));
5410 RTX_FRAME_RELATED_P (insn) = 1;
5411 offset += UNITS_PER_WORD;
5415 /* Expand prologue or epilogue stack adjustment.
5416 The pattern exist to put a dependency on all ebp-based memory accesses.
5417 STYLE should be negative if instructions should be marked as frame related,
5418 zero if %r11 register is live and cannot be freely used and positive
5422 pro_epilogue_adjust_stack (rtx dest, rtx src, rtx offset, int style)
5427 insn = emit_insn (gen_pro_epilogue_adjust_stack_1 (dest, src, offset));
5428 else if (x86_64_immediate_operand (offset, DImode))
5429 insn = emit_insn (gen_pro_epilogue_adjust_stack_rex64 (dest, src, offset));
5433 /* r11 is used by indirect sibcall return as well, set before the
5434 epilogue and used after the epilogue. ATM indirect sibcall
5435 shouldn't be used together with huge frame sizes in one
5436 function because of the frame_size check in sibcall.c. */
5438 r11 = gen_rtx_REG (DImode, FIRST_REX_INT_REG + 3 /* R11 */);
5439 insn = emit_insn (gen_rtx_SET (DImode, r11, offset));
5441 RTX_FRAME_RELATED_P (insn) = 1;
5442 insn = emit_insn (gen_pro_epilogue_adjust_stack_rex64_2 (dest, src, r11,
5446 RTX_FRAME_RELATED_P (insn) = 1;
5449 /* Handle the TARGET_INTERNAL_ARG_POINTER hook. */
5452 ix86_internal_arg_pointer (void)
5454 bool has_force_align_arg_pointer =
5455 (0 != lookup_attribute (ix86_force_align_arg_pointer_string,
5456 TYPE_ATTRIBUTES (TREE_TYPE (current_function_decl))));
5457 if ((FORCE_PREFERRED_STACK_BOUNDARY_IN_MAIN
5458 && DECL_NAME (current_function_decl)
5459 && MAIN_NAME_P (DECL_NAME (current_function_decl))
5460 && DECL_FILE_SCOPE_P (current_function_decl))
5461 || ix86_force_align_arg_pointer
5462 || has_force_align_arg_pointer)
5464 /* Nested functions can't realign the stack due to a register
5466 if (DECL_CONTEXT (current_function_decl)
5467 && TREE_CODE (DECL_CONTEXT (current_function_decl)) == FUNCTION_DECL)
5469 if (ix86_force_align_arg_pointer)
5470 warning (0, "-mstackrealign ignored for nested functions");
5471 if (has_force_align_arg_pointer)
5472 error ("%s not supported for nested functions",
5473 ix86_force_align_arg_pointer_string);
5474 return virtual_incoming_args_rtx;
5476 cfun->machine->force_align_arg_pointer = gen_rtx_REG (Pmode, 2);
5477 return copy_to_reg (cfun->machine->force_align_arg_pointer);
5480 return virtual_incoming_args_rtx;
5483 /* Handle the TARGET_DWARF_HANDLE_FRAME_UNSPEC hook.
5484 This is called from dwarf2out.c to emit call frame instructions
5485 for frame-related insns containing UNSPECs and UNSPEC_VOLATILEs. */
5487 ix86_dwarf_handle_frame_unspec (const char *label, rtx pattern, int index)
5489 rtx unspec = SET_SRC (pattern);
5490 gcc_assert (GET_CODE (unspec) == UNSPEC);
5494 case UNSPEC_REG_SAVE:
5495 dwarf2out_reg_save_reg (label, XVECEXP (unspec, 0, 0),
5496 SET_DEST (pattern));
5498 case UNSPEC_DEF_CFA:
5499 dwarf2out_def_cfa (label, REGNO (SET_DEST (pattern)),
5500 INTVAL (XVECEXP (unspec, 0, 0)));
5507 /* Expand the prologue into a bunch of separate insns. */
5510 ix86_expand_prologue (void)
5514 struct ix86_frame frame;
5515 HOST_WIDE_INT allocate;
5517 ix86_compute_frame_layout (&frame);
5519 if (cfun->machine->force_align_arg_pointer)
5523 /* Grab the argument pointer. */
5524 x = plus_constant (stack_pointer_rtx, 4);
5525 y = cfun->machine->force_align_arg_pointer;
5526 insn = emit_insn (gen_rtx_SET (VOIDmode, y, x));
5527 RTX_FRAME_RELATED_P (insn) = 1;
5529 /* The unwind info consists of two parts: install the fafp as the cfa,
5530 and record the fafp as the "save register" of the stack pointer.
5531 The later is there in order that the unwinder can see where it
5532 should restore the stack pointer across the and insn. */
5533 x = gen_rtx_UNSPEC (VOIDmode, gen_rtvec (1, const0_rtx), UNSPEC_DEF_CFA);
5534 x = gen_rtx_SET (VOIDmode, y, x);
5535 RTX_FRAME_RELATED_P (x) = 1;
5536 y = gen_rtx_UNSPEC (VOIDmode, gen_rtvec (1, stack_pointer_rtx),
5538 y = gen_rtx_SET (VOIDmode, cfun->machine->force_align_arg_pointer, y);
5539 RTX_FRAME_RELATED_P (y) = 1;
5540 x = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, x, y));
5541 x = gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR, x, NULL);
5542 REG_NOTES (insn) = x;
5544 /* Align the stack. */
5545 emit_insn (gen_andsi3 (stack_pointer_rtx, stack_pointer_rtx,
5548 /* And here we cheat like madmen with the unwind info. We force the
5549 cfa register back to sp+4, which is exactly what it was at the
5550 start of the function. Re-pushing the return address results in
5551 the return at the same spot relative to the cfa, and thus is
5552 correct wrt the unwind info. */
5553 x = cfun->machine->force_align_arg_pointer;
5554 x = gen_frame_mem (Pmode, plus_constant (x, -4));
5555 insn = emit_insn (gen_push (x));
5556 RTX_FRAME_RELATED_P (insn) = 1;
5559 x = gen_rtx_UNSPEC (VOIDmode, gen_rtvec (1, x), UNSPEC_DEF_CFA);
5560 x = gen_rtx_SET (VOIDmode, stack_pointer_rtx, x);
5561 x = gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR, x, NULL);
5562 REG_NOTES (insn) = x;
5565 /* Note: AT&T enter does NOT have reversed args. Enter is probably
5566 slower on all targets. Also sdb doesn't like it. */
5568 if (frame_pointer_needed)
5570 insn = emit_insn (gen_push (hard_frame_pointer_rtx));
5571 RTX_FRAME_RELATED_P (insn) = 1;
5573 insn = emit_move_insn (hard_frame_pointer_rtx, stack_pointer_rtx);
5574 RTX_FRAME_RELATED_P (insn) = 1;
5577 allocate = frame.to_allocate;
5579 if (!frame.save_regs_using_mov)
5580 ix86_emit_save_regs ();
5582 allocate += frame.nregs * UNITS_PER_WORD;
5584 /* When using red zone we may start register saving before allocating
5585 the stack frame saving one cycle of the prologue. */
5586 if (TARGET_RED_ZONE && frame.save_regs_using_mov)
5587 ix86_emit_save_regs_using_mov (frame_pointer_needed ? hard_frame_pointer_rtx
5588 : stack_pointer_rtx,
5589 -frame.nregs * UNITS_PER_WORD);
5593 else if (! TARGET_STACK_PROBE || allocate < CHECK_STACK_LIMIT)
5594 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
5595 GEN_INT (-allocate), -1);
5598 /* Only valid for Win32. */
5599 rtx eax = gen_rtx_REG (SImode, 0);
5600 bool eax_live = ix86_eax_live_at_start_p ();
5603 gcc_assert (!TARGET_64BIT);
5607 emit_insn (gen_push (eax));
5611 emit_move_insn (eax, GEN_INT (allocate));
5613 insn = emit_insn (gen_allocate_stack_worker (eax));
5614 RTX_FRAME_RELATED_P (insn) = 1;
5615 t = gen_rtx_PLUS (Pmode, stack_pointer_rtx, GEN_INT (-allocate));
5616 t = gen_rtx_SET (VOIDmode, stack_pointer_rtx, t);
5617 REG_NOTES (insn) = gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR,
5618 t, REG_NOTES (insn));
5622 if (frame_pointer_needed)
5623 t = plus_constant (hard_frame_pointer_rtx,
5626 - frame.nregs * UNITS_PER_WORD);
5628 t = plus_constant (stack_pointer_rtx, allocate);
5629 emit_move_insn (eax, gen_rtx_MEM (SImode, t));
5633 if (frame.save_regs_using_mov && !TARGET_RED_ZONE)
5635 if (!frame_pointer_needed || !frame.to_allocate)
5636 ix86_emit_save_regs_using_mov (stack_pointer_rtx, frame.to_allocate);
5638 ix86_emit_save_regs_using_mov (hard_frame_pointer_rtx,
5639 -frame.nregs * UNITS_PER_WORD);
5642 pic_reg_used = false;
5643 if (pic_offset_table_rtx
5644 && (regs_ever_live[REAL_PIC_OFFSET_TABLE_REGNUM]
5645 || current_function_profile))
5647 unsigned int alt_pic_reg_used = ix86_select_alt_pic_regnum ();
5649 if (alt_pic_reg_used != INVALID_REGNUM)
5650 REGNO (pic_offset_table_rtx) = alt_pic_reg_used;
5652 pic_reg_used = true;
5658 insn = emit_insn (gen_set_got_rex64 (pic_offset_table_rtx));
5660 insn = emit_insn (gen_set_got (pic_offset_table_rtx));
5662 /* Even with accurate pre-reload life analysis, we can wind up
5663 deleting all references to the pic register after reload.
5664 Consider if cross-jumping unifies two sides of a branch
5665 controlled by a comparison vs the only read from a global.
5666 In which case, allow the set_got to be deleted, though we're
5667 too late to do anything about the ebx save in the prologue. */
5668 REG_NOTES (insn) = gen_rtx_EXPR_LIST (REG_MAYBE_DEAD, const0_rtx, NULL);
5671 /* Prevent function calls from be scheduled before the call to mcount.
5672 In the pic_reg_used case, make sure that the got load isn't deleted. */
5673 if (current_function_profile)
5674 emit_insn (gen_blockage (pic_reg_used ? pic_offset_table_rtx : const0_rtx));
5677 /* Emit code to restore saved registers using MOV insns. First register
5678 is restored from POINTER + OFFSET. */
5680 ix86_emit_restore_regs_using_mov (rtx pointer, HOST_WIDE_INT offset,
5681 int maybe_eh_return)
5684 rtx base_address = gen_rtx_MEM (Pmode, pointer);
5686 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
5687 if (ix86_save_reg (regno, maybe_eh_return))
5689 /* Ensure that adjust_address won't be forced to produce pointer
5690 out of range allowed by x86-64 instruction set. */
5691 if (TARGET_64BIT && offset != trunc_int_for_mode (offset, SImode))
5695 r11 = gen_rtx_REG (DImode, FIRST_REX_INT_REG + 3 /* R11 */);
5696 emit_move_insn (r11, GEN_INT (offset));
5697 emit_insn (gen_adddi3 (r11, r11, pointer));
5698 base_address = gen_rtx_MEM (Pmode, r11);
5701 emit_move_insn (gen_rtx_REG (Pmode, regno),
5702 adjust_address (base_address, Pmode, offset));
5703 offset += UNITS_PER_WORD;
5707 /* Restore function stack, frame, and registers. */
5710 ix86_expand_epilogue (int style)
5713 int sp_valid = !frame_pointer_needed || current_function_sp_is_unchanging;
5714 struct ix86_frame frame;
5715 HOST_WIDE_INT offset;
5717 ix86_compute_frame_layout (&frame);
5719 /* Calculate start of saved registers relative to ebp. Special care
5720 must be taken for the normal return case of a function using
5721 eh_return: the eax and edx registers are marked as saved, but not
5722 restored along this path. */
5723 offset = frame.nregs;
5724 if (current_function_calls_eh_return && style != 2)
5726 offset *= -UNITS_PER_WORD;
5728 /* If we're only restoring one register and sp is not valid then
5729 using a move instruction to restore the register since it's
5730 less work than reloading sp and popping the register.
5732 The default code result in stack adjustment using add/lea instruction,
5733 while this code results in LEAVE instruction (or discrete equivalent),
5734 so it is profitable in some other cases as well. Especially when there
5735 are no registers to restore. We also use this code when TARGET_USE_LEAVE
5736 and there is exactly one register to pop. This heuristic may need some
5737 tuning in future. */
5738 if ((!sp_valid && frame.nregs <= 1)
5739 || (TARGET_EPILOGUE_USING_MOVE
5740 && cfun->machine->use_fast_prologue_epilogue
5741 && (frame.nregs > 1 || frame.to_allocate))
5742 || (frame_pointer_needed && !frame.nregs && frame.to_allocate)
5743 || (frame_pointer_needed && TARGET_USE_LEAVE
5744 && cfun->machine->use_fast_prologue_epilogue
5745 && frame.nregs == 1)
5746 || current_function_calls_eh_return)
5748 /* Restore registers. We can use ebp or esp to address the memory
5749 locations. If both are available, default to ebp, since offsets
5750 are known to be small. Only exception is esp pointing directly to the
5751 end of block of saved registers, where we may simplify addressing
5754 if (!frame_pointer_needed || (sp_valid && !frame.to_allocate))
5755 ix86_emit_restore_regs_using_mov (stack_pointer_rtx,
5756 frame.to_allocate, style == 2);
5758 ix86_emit_restore_regs_using_mov (hard_frame_pointer_rtx,
5759 offset, style == 2);
5761 /* eh_return epilogues need %ecx added to the stack pointer. */
5764 rtx tmp, sa = EH_RETURN_STACKADJ_RTX;
5766 if (frame_pointer_needed)
5768 tmp = gen_rtx_PLUS (Pmode, hard_frame_pointer_rtx, sa);
5769 tmp = plus_constant (tmp, UNITS_PER_WORD);
5770 emit_insn (gen_rtx_SET (VOIDmode, sa, tmp));
5772 tmp = gen_rtx_MEM (Pmode, hard_frame_pointer_rtx);
5773 emit_move_insn (hard_frame_pointer_rtx, tmp);
5775 pro_epilogue_adjust_stack (stack_pointer_rtx, sa,
5780 tmp = gen_rtx_PLUS (Pmode, stack_pointer_rtx, sa);
5781 tmp = plus_constant (tmp, (frame.to_allocate
5782 + frame.nregs * UNITS_PER_WORD));
5783 emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx, tmp));
5786 else if (!frame_pointer_needed)
5787 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
5788 GEN_INT (frame.to_allocate
5789 + frame.nregs * UNITS_PER_WORD),
5791 /* If not an i386, mov & pop is faster than "leave". */
5792 else if (TARGET_USE_LEAVE || optimize_size
5793 || !cfun->machine->use_fast_prologue_epilogue)
5794 emit_insn (TARGET_64BIT ? gen_leave_rex64 () : gen_leave ());
5797 pro_epilogue_adjust_stack (stack_pointer_rtx,
5798 hard_frame_pointer_rtx,
5801 emit_insn (gen_popdi1 (hard_frame_pointer_rtx));
5803 emit_insn (gen_popsi1 (hard_frame_pointer_rtx));
5808 /* First step is to deallocate the stack frame so that we can
5809 pop the registers. */
5812 gcc_assert (frame_pointer_needed);
5813 pro_epilogue_adjust_stack (stack_pointer_rtx,
5814 hard_frame_pointer_rtx,
5815 GEN_INT (offset), style);
5817 else if (frame.to_allocate)
5818 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
5819 GEN_INT (frame.to_allocate), style);
5821 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
5822 if (ix86_save_reg (regno, false))
5825 emit_insn (gen_popdi1 (gen_rtx_REG (Pmode, regno)));
5827 emit_insn (gen_popsi1 (gen_rtx_REG (Pmode, regno)));
5829 if (frame_pointer_needed)
5831 /* Leave results in shorter dependency chains on CPUs that are
5832 able to grok it fast. */
5833 if (TARGET_USE_LEAVE)
5834 emit_insn (TARGET_64BIT ? gen_leave_rex64 () : gen_leave ());
5835 else if (TARGET_64BIT)
5836 emit_insn (gen_popdi1 (hard_frame_pointer_rtx));
5838 emit_insn (gen_popsi1 (hard_frame_pointer_rtx));
5842 if (cfun->machine->force_align_arg_pointer)
5844 emit_insn (gen_addsi3 (stack_pointer_rtx,
5845 cfun->machine->force_align_arg_pointer,
5849 /* Sibcall epilogues don't want a return instruction. */
5853 if (current_function_pops_args && current_function_args_size)
5855 rtx popc = GEN_INT (current_function_pops_args);
5857 /* i386 can only pop 64K bytes. If asked to pop more, pop
5858 return address, do explicit add, and jump indirectly to the
5861 if (current_function_pops_args >= 65536)
5863 rtx ecx = gen_rtx_REG (SImode, 2);
5865 /* There is no "pascal" calling convention in 64bit ABI. */
5866 gcc_assert (!TARGET_64BIT);
5868 emit_insn (gen_popsi1 (ecx));
5869 emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx, popc));
5870 emit_jump_insn (gen_return_indirect_internal (ecx));
5873 emit_jump_insn (gen_return_pop_internal (popc));
5876 emit_jump_insn (gen_return_internal ());
5879 /* Reset from the function's potential modifications. */
5882 ix86_output_function_epilogue (FILE *file ATTRIBUTE_UNUSED,
5883 HOST_WIDE_INT size ATTRIBUTE_UNUSED)
5885 if (pic_offset_table_rtx)
5886 REGNO (pic_offset_table_rtx) = REAL_PIC_OFFSET_TABLE_REGNUM;
5888 /* Mach-O doesn't support labels at the end of objects, so if
5889 it looks like we might want one, insert a NOP. */
5891 rtx insn = get_last_insn ();
5894 && NOTE_LINE_NUMBER (insn) != NOTE_INSN_DELETED_LABEL)
5895 insn = PREV_INSN (insn);
5899 && NOTE_LINE_NUMBER (insn) == NOTE_INSN_DELETED_LABEL)))
5900 fputs ("\tnop\n", file);
5906 /* Extract the parts of an RTL expression that is a valid memory address
5907 for an instruction. Return 0 if the structure of the address is
5908 grossly off. Return -1 if the address contains ASHIFT, so it is not
5909 strictly valid, but still used for computing length of lea instruction. */
5912 ix86_decompose_address (rtx addr, struct ix86_address *out)
5914 rtx base = NULL_RTX, index = NULL_RTX, disp = NULL_RTX;
5915 rtx base_reg, index_reg;
5916 HOST_WIDE_INT scale = 1;
5917 rtx scale_rtx = NULL_RTX;
5919 enum ix86_address_seg seg = SEG_DEFAULT;
5921 if (GET_CODE (addr) == REG || GET_CODE (addr) == SUBREG)
5923 else if (GET_CODE (addr) == PLUS)
5933 addends[n++] = XEXP (op, 1);
5936 while (GET_CODE (op) == PLUS);
5941 for (i = n; i >= 0; --i)
5944 switch (GET_CODE (op))
5949 index = XEXP (op, 0);
5950 scale_rtx = XEXP (op, 1);
5954 if (XINT (op, 1) == UNSPEC_TP
5955 && TARGET_TLS_DIRECT_SEG_REFS
5956 && seg == SEG_DEFAULT)
5957 seg = TARGET_64BIT ? SEG_FS : SEG_GS;
5986 else if (GET_CODE (addr) == MULT)
5988 index = XEXP (addr, 0); /* index*scale */
5989 scale_rtx = XEXP (addr, 1);
5991 else if (GET_CODE (addr) == ASHIFT)
5995 /* We're called for lea too, which implements ashift on occasion. */
5996 index = XEXP (addr, 0);
5997 tmp = XEXP (addr, 1);
5998 if (GET_CODE (tmp) != CONST_INT)
6000 scale = INTVAL (tmp);
6001 if ((unsigned HOST_WIDE_INT) scale > 3)
6007 disp = addr; /* displacement */
6009 /* Extract the integral value of scale. */
6012 if (GET_CODE (scale_rtx) != CONST_INT)
6014 scale = INTVAL (scale_rtx);
6017 base_reg = base && GET_CODE (base) == SUBREG ? SUBREG_REG (base) : base;
6018 index_reg = index && GET_CODE (index) == SUBREG ? SUBREG_REG (index) : index;
6020 /* Allow arg pointer and stack pointer as index if there is not scaling. */
6021 if (base_reg && index_reg && scale == 1
6022 && (index_reg == arg_pointer_rtx
6023 || index_reg == frame_pointer_rtx
6024 || (REG_P (index_reg) && REGNO (index_reg) == STACK_POINTER_REGNUM)))
6027 tmp = base, base = index, index = tmp;
6028 tmp = base_reg, base_reg = index_reg, index_reg = tmp;
6031 /* Special case: %ebp cannot be encoded as a base without a displacement. */
6032 if ((base_reg == hard_frame_pointer_rtx
6033 || base_reg == frame_pointer_rtx
6034 || base_reg == arg_pointer_rtx) && !disp)
6037 /* Special case: on K6, [%esi] makes the instruction vector decoded.
6038 Avoid this by transforming to [%esi+0]. */
6039 if (ix86_tune == PROCESSOR_K6 && !optimize_size
6040 && base_reg && !index_reg && !disp
6042 && REGNO_REG_CLASS (REGNO (base_reg)) == SIREG)
6045 /* Special case: encode reg+reg instead of reg*2. */
6046 if (!base && index && scale && scale == 2)
6047 base = index, base_reg = index_reg, scale = 1;
6049 /* Special case: scaling cannot be encoded without base or displacement. */
6050 if (!base && !disp && index && scale != 1)
6062 /* Return cost of the memory address x.
6063 For i386, it is better to use a complex address than let gcc copy
6064 the address into a reg and make a new pseudo. But not if the address
6065 requires to two regs - that would mean more pseudos with longer
6068 ix86_address_cost (rtx x)
6070 struct ix86_address parts;
6072 int ok = ix86_decompose_address (x, &parts);
6076 if (parts.base && GET_CODE (parts.base) == SUBREG)
6077 parts.base = SUBREG_REG (parts.base);
6078 if (parts.index && GET_CODE (parts.index) == SUBREG)
6079 parts.index = SUBREG_REG (parts.index);
6081 /* More complex memory references are better. */
6082 if (parts.disp && parts.disp != const0_rtx)
6084 if (parts.seg != SEG_DEFAULT)
6087 /* Attempt to minimize number of registers in the address. */
6089 && (!REG_P (parts.base) || REGNO (parts.base) >= FIRST_PSEUDO_REGISTER))
6091 && (!REG_P (parts.index)
6092 || REGNO (parts.index) >= FIRST_PSEUDO_REGISTER)))
6096 && (!REG_P (parts.base) || REGNO (parts.base) >= FIRST_PSEUDO_REGISTER)
6098 && (!REG_P (parts.index) || REGNO (parts.index) >= FIRST_PSEUDO_REGISTER)
6099 && parts.base != parts.index)
6102 /* AMD-K6 don't like addresses with ModR/M set to 00_xxx_100b,
6103 since it's predecode logic can't detect the length of instructions
6104 and it degenerates to vector decoded. Increase cost of such
6105 addresses here. The penalty is minimally 2 cycles. It may be worthwhile
6106 to split such addresses or even refuse such addresses at all.
6108 Following addressing modes are affected:
6113 The first and last case may be avoidable by explicitly coding the zero in
6114 memory address, but I don't have AMD-K6 machine handy to check this
6118 && ((!parts.disp && parts.base && parts.index && parts.scale != 1)
6119 || (parts.disp && !parts.base && parts.index && parts.scale != 1)
6120 || (!parts.disp && parts.base && parts.index && parts.scale == 1)))
6126 /* If X is a machine specific address (i.e. a symbol or label being
6127 referenced as a displacement from the GOT implemented using an
6128 UNSPEC), then return the base term. Otherwise return X. */
6131 ix86_find_base_term (rtx x)
6137 if (GET_CODE (x) != CONST)
6140 if (GET_CODE (term) == PLUS
6141 && (GET_CODE (XEXP (term, 1)) == CONST_INT
6142 || GET_CODE (XEXP (term, 1)) == CONST_DOUBLE))
6143 term = XEXP (term, 0);
6144 if (GET_CODE (term) != UNSPEC
6145 || XINT (term, 1) != UNSPEC_GOTPCREL)
6148 term = XVECEXP (term, 0, 0);
6150 if (GET_CODE (term) != SYMBOL_REF
6151 && GET_CODE (term) != LABEL_REF)
6157 term = ix86_delegitimize_address (x);
6159 if (GET_CODE (term) != SYMBOL_REF
6160 && GET_CODE (term) != LABEL_REF)
6166 /* Allow {LABEL | SYMBOL}_REF - SYMBOL_REF-FOR-PICBASE for Mach-O as
6167 this is used for to form addresses to local data when -fPIC is in
6171 darwin_local_data_pic (rtx disp)
6173 if (GET_CODE (disp) == MINUS)
6175 if (GET_CODE (XEXP (disp, 0)) == LABEL_REF
6176 || GET_CODE (XEXP (disp, 0)) == SYMBOL_REF)
6177 if (GET_CODE (XEXP (disp, 1)) == SYMBOL_REF)
6179 const char *sym_name = XSTR (XEXP (disp, 1), 0);
6180 if (! strcmp (sym_name, "<pic base>"))
6188 /* Determine if a given RTX is a valid constant. We already know this
6189 satisfies CONSTANT_P. */
6192 legitimate_constant_p (rtx x)
6194 switch (GET_CODE (x))
6199 if (GET_CODE (x) == PLUS)
6201 if (GET_CODE (XEXP (x, 1)) != CONST_INT)
6206 if (TARGET_MACHO && darwin_local_data_pic (x))
6209 /* Only some unspecs are valid as "constants". */
6210 if (GET_CODE (x) == UNSPEC)
6211 switch (XINT (x, 1))
6214 return TARGET_64BIT;
6217 x = XVECEXP (x, 0, 0);
6218 return (GET_CODE (x) == SYMBOL_REF
6219 && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_EXEC);
6221 x = XVECEXP (x, 0, 0);
6222 return (GET_CODE (x) == SYMBOL_REF
6223 && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_DYNAMIC);
6228 /* We must have drilled down to a symbol. */
6229 if (GET_CODE (x) == LABEL_REF)
6231 if (GET_CODE (x) != SYMBOL_REF)
6236 /* TLS symbols are never valid. */
6237 if (SYMBOL_REF_TLS_MODEL (x))
6242 if (GET_MODE (x) == TImode
6243 && x != CONST0_RTX (TImode)
6249 if (x == CONST0_RTX (GET_MODE (x)))
6257 /* Otherwise we handle everything else in the move patterns. */
6261 /* Determine if it's legal to put X into the constant pool. This
6262 is not possible for the address of thread-local symbols, which
6263 is checked above. */
6266 ix86_cannot_force_const_mem (rtx x)
6268 /* We can always put integral constants and vectors in memory. */
6269 switch (GET_CODE (x))
6279 return !legitimate_constant_p (x);
6282 /* Determine if a given RTX is a valid constant address. */
6285 constant_address_p (rtx x)
6287 return CONSTANT_P (x) && legitimate_address_p (Pmode, x, 1);
6290 /* Nonzero if the constant value X is a legitimate general operand
6291 when generating PIC code. It is given that flag_pic is on and
6292 that X satisfies CONSTANT_P or is a CONST_DOUBLE. */
6295 legitimate_pic_operand_p (rtx x)
6299 switch (GET_CODE (x))
6302 inner = XEXP (x, 0);
6303 if (GET_CODE (inner) == PLUS
6304 && GET_CODE (XEXP (inner, 1)) == CONST_INT)
6305 inner = XEXP (inner, 0);
6307 /* Only some unspecs are valid as "constants". */
6308 if (GET_CODE (inner) == UNSPEC)
6309 switch (XINT (inner, 1))
6312 return TARGET_64BIT;
6314 x = XVECEXP (inner, 0, 0);
6315 return (GET_CODE (x) == SYMBOL_REF
6316 && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_EXEC);
6324 return legitimate_pic_address_disp_p (x);
6331 /* Determine if a given CONST RTX is a valid memory displacement
6335 legitimate_pic_address_disp_p (rtx disp)
6339 /* In 64bit mode we can allow direct addresses of symbols and labels
6340 when they are not dynamic symbols. */
6343 rtx op0 = disp, op1;
6345 switch (GET_CODE (disp))
6351 if (GET_CODE (XEXP (disp, 0)) != PLUS)
6353 op0 = XEXP (XEXP (disp, 0), 0);
6354 op1 = XEXP (XEXP (disp, 0), 1);
6355 if (GET_CODE (op1) != CONST_INT
6356 || INTVAL (op1) >= 16*1024*1024
6357 || INTVAL (op1) < -16*1024*1024)
6359 if (GET_CODE (op0) == LABEL_REF)
6361 if (GET_CODE (op0) != SYMBOL_REF)
6366 /* TLS references should always be enclosed in UNSPEC. */
6367 if (SYMBOL_REF_TLS_MODEL (op0))
6369 if (!SYMBOL_REF_FAR_ADDR_P (op0) && SYMBOL_REF_LOCAL_P (op0))
6377 if (GET_CODE (disp) != CONST)
6379 disp = XEXP (disp, 0);
6383 /* We are unsafe to allow PLUS expressions. This limit allowed distance
6384 of GOT tables. We should not need these anyway. */
6385 if (GET_CODE (disp) != UNSPEC
6386 || (XINT (disp, 1) != UNSPEC_GOTPCREL
6387 && XINT (disp, 1) != UNSPEC_GOTOFF))
6390 if (GET_CODE (XVECEXP (disp, 0, 0)) != SYMBOL_REF
6391 && GET_CODE (XVECEXP (disp, 0, 0)) != LABEL_REF)
6397 if (GET_CODE (disp) == PLUS)
6399 if (GET_CODE (XEXP (disp, 1)) != CONST_INT)
6401 disp = XEXP (disp, 0);
6405 if (TARGET_MACHO && darwin_local_data_pic (disp))
6408 if (GET_CODE (disp) != UNSPEC)
6411 switch (XINT (disp, 1))
6416 return GET_CODE (XVECEXP (disp, 0, 0)) == SYMBOL_REF;
6418 /* Refuse GOTOFF in 64bit mode since it is always 64bit when used.
6419 While ABI specify also 32bit relocation but we don't produce it in
6420 small PIC model at all. */
6421 if ((GET_CODE (XVECEXP (disp, 0, 0)) == SYMBOL_REF
6422 || GET_CODE (XVECEXP (disp, 0, 0)) == LABEL_REF)
6424 return local_symbolic_operand (XVECEXP (disp, 0, 0), Pmode);
6426 case UNSPEC_GOTTPOFF:
6427 case UNSPEC_GOTNTPOFF:
6428 case UNSPEC_INDNTPOFF:
6431 disp = XVECEXP (disp, 0, 0);
6432 return (GET_CODE (disp) == SYMBOL_REF
6433 && SYMBOL_REF_TLS_MODEL (disp) == TLS_MODEL_INITIAL_EXEC);
6435 disp = XVECEXP (disp, 0, 0);
6436 return (GET_CODE (disp) == SYMBOL_REF
6437 && SYMBOL_REF_TLS_MODEL (disp) == TLS_MODEL_LOCAL_EXEC);
6439 disp = XVECEXP (disp, 0, 0);
6440 return (GET_CODE (disp) == SYMBOL_REF
6441 && SYMBOL_REF_TLS_MODEL (disp) == TLS_MODEL_LOCAL_DYNAMIC);
6447 /* GO_IF_LEGITIMATE_ADDRESS recognizes an RTL expression that is a valid
6448 memory address for an instruction. The MODE argument is the machine mode
6449 for the MEM expression that wants to use this address.
6451 It only recognizes address in canonical form. LEGITIMIZE_ADDRESS should
6452 convert common non-canonical forms to canonical form so that they will
6456 legitimate_address_p (enum machine_mode mode, rtx addr, int strict)
6458 struct ix86_address parts;
6459 rtx base, index, disp;
6460 HOST_WIDE_INT scale;
6461 const char *reason = NULL;
6462 rtx reason_rtx = NULL_RTX;
6464 if (TARGET_DEBUG_ADDR)
6467 "\n======\nGO_IF_LEGITIMATE_ADDRESS, mode = %s, strict = %d\n",
6468 GET_MODE_NAME (mode), strict);
6472 if (ix86_decompose_address (addr, &parts) <= 0)
6474 reason = "decomposition failed";
6479 index = parts.index;
6481 scale = parts.scale;
6483 /* Validate base register.
6485 Don't allow SUBREG's that span more than a word here. It can lead to spill
6486 failures when the base is one word out of a two word structure, which is
6487 represented internally as a DImode int. */
6496 else if (GET_CODE (base) == SUBREG
6497 && REG_P (SUBREG_REG (base))
6498 && GET_MODE_SIZE (GET_MODE (SUBREG_REG (base)))
6500 reg = SUBREG_REG (base);
6503 reason = "base is not a register";
6507 if (GET_MODE (base) != Pmode)
6509 reason = "base is not in Pmode";
6513 if ((strict && ! REG_OK_FOR_BASE_STRICT_P (reg))
6514 || (! strict && ! REG_OK_FOR_BASE_NONSTRICT_P (reg)))
6516 reason = "base is not valid";
6521 /* Validate index register.
6523 Don't allow SUBREG's that span more than a word here -- same as above. */
6532 else if (GET_CODE (index) == SUBREG
6533 && REG_P (SUBREG_REG (index))
6534 && GET_MODE_SIZE (GET_MODE (SUBREG_REG (index)))
6536 reg = SUBREG_REG (index);
6539 reason = "index is not a register";
6543 if (GET_MODE (index) != Pmode)
6545 reason = "index is not in Pmode";
6549 if ((strict && ! REG_OK_FOR_INDEX_STRICT_P (reg))
6550 || (! strict && ! REG_OK_FOR_INDEX_NONSTRICT_P (reg)))
6552 reason = "index is not valid";
6557 /* Validate scale factor. */
6560 reason_rtx = GEN_INT (scale);
6563 reason = "scale without index";
6567 if (scale != 2 && scale != 4 && scale != 8)
6569 reason = "scale is not a valid multiplier";
6574 /* Validate displacement. */
6579 if (GET_CODE (disp) == CONST
6580 && GET_CODE (XEXP (disp, 0)) == UNSPEC)
6581 switch (XINT (XEXP (disp, 0), 1))
6583 /* Refuse GOTOFF and GOT in 64bit mode since it is always 64bit when
6584 used. While ABI specify also 32bit relocations, we don't produce
6585 them at all and use IP relative instead. */
6588 gcc_assert (flag_pic);
6590 goto is_legitimate_pic;
6591 reason = "64bit address unspec";
6594 case UNSPEC_GOTPCREL:
6595 gcc_assert (flag_pic);
6596 goto is_legitimate_pic;
6598 case UNSPEC_GOTTPOFF:
6599 case UNSPEC_GOTNTPOFF:
6600 case UNSPEC_INDNTPOFF:
6606 reason = "invalid address unspec";
6610 else if (SYMBOLIC_CONST (disp)
6614 && MACHOPIC_INDIRECT
6615 && !machopic_operand_p (disp)
6621 if (TARGET_64BIT && (index || base))
6623 /* foo@dtpoff(%rX) is ok. */
6624 if (GET_CODE (disp) != CONST
6625 || GET_CODE (XEXP (disp, 0)) != PLUS
6626 || GET_CODE (XEXP (XEXP (disp, 0), 0)) != UNSPEC
6627 || GET_CODE (XEXP (XEXP (disp, 0), 1)) != CONST_INT
6628 || (XINT (XEXP (XEXP (disp, 0), 0), 1) != UNSPEC_DTPOFF
6629 && XINT (XEXP (XEXP (disp, 0), 0), 1) != UNSPEC_NTPOFF))
6631 reason = "non-constant pic memory reference";
6635 else if (! legitimate_pic_address_disp_p (disp))
6637 reason = "displacement is an invalid pic construct";
6641 /* This code used to verify that a symbolic pic displacement
6642 includes the pic_offset_table_rtx register.
6644 While this is good idea, unfortunately these constructs may
6645 be created by "adds using lea" optimization for incorrect
6654 This code is nonsensical, but results in addressing
6655 GOT table with pic_offset_table_rtx base. We can't
6656 just refuse it easily, since it gets matched by
6657 "addsi3" pattern, that later gets split to lea in the
6658 case output register differs from input. While this
6659 can be handled by separate addsi pattern for this case
6660 that never results in lea, this seems to be easier and
6661 correct fix for crash to disable this test. */
6663 else if (GET_CODE (disp) != LABEL_REF
6664 && GET_CODE (disp) != CONST_INT
6665 && (GET_CODE (disp) != CONST
6666 || !legitimate_constant_p (disp))
6667 && (GET_CODE (disp) != SYMBOL_REF
6668 || !legitimate_constant_p (disp)))
6670 reason = "displacement is not constant";
6673 else if (TARGET_64BIT
6674 && !x86_64_immediate_operand (disp, VOIDmode))
6676 reason = "displacement is out of range";
6681 /* Everything looks valid. */
6682 if (TARGET_DEBUG_ADDR)
6683 fprintf (stderr, "Success.\n");
6687 if (TARGET_DEBUG_ADDR)
6689 fprintf (stderr, "Error: %s\n", reason);
6690 debug_rtx (reason_rtx);
6695 /* Return a unique alias set for the GOT. */
6697 static HOST_WIDE_INT
6698 ix86_GOT_alias_set (void)
6700 static HOST_WIDE_INT set = -1;
6702 set = new_alias_set ();
6706 /* Return a legitimate reference for ORIG (an address) using the
6707 register REG. If REG is 0, a new pseudo is generated.
6709 There are two types of references that must be handled:
6711 1. Global data references must load the address from the GOT, via
6712 the PIC reg. An insn is emitted to do this load, and the reg is
6715 2. Static data references, constant pool addresses, and code labels
6716 compute the address as an offset from the GOT, whose base is in
6717 the PIC reg. Static data objects have SYMBOL_FLAG_LOCAL set to
6718 differentiate them from global data objects. The returned
6719 address is the PIC reg + an unspec constant.
6721 GO_IF_LEGITIMATE_ADDRESS rejects symbolic references unless the PIC
6722 reg also appears in the address. */
6725 legitimize_pic_address (rtx orig, rtx reg)
6732 if (TARGET_MACHO && !TARGET_64BIT)
6735 reg = gen_reg_rtx (Pmode);
6736 /* Use the generic Mach-O PIC machinery. */
6737 return machopic_legitimize_pic_address (orig, GET_MODE (orig), reg);
6741 if (TARGET_64BIT && legitimate_pic_address_disp_p (addr))
6743 else if (TARGET_64BIT
6744 && ix86_cmodel != CM_SMALL_PIC
6745 && local_symbolic_operand (addr, Pmode))
6748 /* This symbol may be referenced via a displacement from the PIC
6749 base address (@GOTOFF). */
6751 if (reload_in_progress)
6752 regs_ever_live[PIC_OFFSET_TABLE_REGNUM] = 1;
6753 if (GET_CODE (addr) == CONST)
6754 addr = XEXP (addr, 0);
6755 if (GET_CODE (addr) == PLUS)
6757 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, XEXP (addr, 0)), UNSPEC_GOTOFF);
6758 new = gen_rtx_PLUS (Pmode, new, XEXP (addr, 1));
6761 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTOFF);
6762 new = gen_rtx_CONST (Pmode, new);
6764 tmpreg = gen_reg_rtx (Pmode);
6767 emit_move_insn (tmpreg, new);
6771 new = expand_simple_binop (Pmode, PLUS, reg, pic_offset_table_rtx,
6772 tmpreg, 1, OPTAB_DIRECT);
6775 else new = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, tmpreg);
6777 else if (!TARGET_64BIT && local_symbolic_operand (addr, Pmode))
6779 /* This symbol may be referenced via a displacement from the PIC
6780 base address (@GOTOFF). */
6782 if (reload_in_progress)
6783 regs_ever_live[PIC_OFFSET_TABLE_REGNUM] = 1;
6784 if (GET_CODE (addr) == CONST)
6785 addr = XEXP (addr, 0);
6786 if (GET_CODE (addr) == PLUS)
6788 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, XEXP (addr, 0)), UNSPEC_GOTOFF);
6789 new = gen_rtx_PLUS (Pmode, new, XEXP (addr, 1));
6792 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTOFF);
6793 new = gen_rtx_CONST (Pmode, new);
6794 new = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new);
6798 emit_move_insn (reg, new);
6802 else if (GET_CODE (addr) == SYMBOL_REF && SYMBOL_REF_TLS_MODEL (addr) == 0)
6806 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTPCREL);
6807 new = gen_rtx_CONST (Pmode, new);
6808 new = gen_const_mem (Pmode, new);
6809 set_mem_alias_set (new, ix86_GOT_alias_set ());
6812 reg = gen_reg_rtx (Pmode);
6813 /* Use directly gen_movsi, otherwise the address is loaded
6814 into register for CSE. We don't want to CSE this addresses,
6815 instead we CSE addresses from the GOT table, so skip this. */
6816 emit_insn (gen_movsi (reg, new));
6821 /* This symbol must be referenced via a load from the
6822 Global Offset Table (@GOT). */
6824 if (reload_in_progress)
6825 regs_ever_live[PIC_OFFSET_TABLE_REGNUM] = 1;
6826 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOT);
6827 new = gen_rtx_CONST (Pmode, new);
6828 new = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new);
6829 new = gen_const_mem (Pmode, new);
6830 set_mem_alias_set (new, ix86_GOT_alias_set ());
6833 reg = gen_reg_rtx (Pmode);
6834 emit_move_insn (reg, new);
6840 if (GET_CODE (addr) == CONST_INT
6841 && !x86_64_immediate_operand (addr, VOIDmode))
6845 emit_move_insn (reg, addr);
6849 new = force_reg (Pmode, addr);
6851 else if (GET_CODE (addr) == CONST)
6853 addr = XEXP (addr, 0);
6855 /* We must match stuff we generate before. Assume the only
6856 unspecs that can get here are ours. Not that we could do
6857 anything with them anyway.... */
6858 if (GET_CODE (addr) == UNSPEC
6859 || (GET_CODE (addr) == PLUS
6860 && GET_CODE (XEXP (addr, 0)) == UNSPEC))
6862 gcc_assert (GET_CODE (addr) == PLUS);
6864 if (GET_CODE (addr) == PLUS)
6866 rtx op0 = XEXP (addr, 0), op1 = XEXP (addr, 1);
6868 /* Check first to see if this is a constant offset from a @GOTOFF
6869 symbol reference. */
6870 if (local_symbolic_operand (op0, Pmode)
6871 && GET_CODE (op1) == CONST_INT)
6875 if (reload_in_progress)
6876 regs_ever_live[PIC_OFFSET_TABLE_REGNUM] = 1;
6877 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, op0),
6879 new = gen_rtx_PLUS (Pmode, new, op1);
6880 new = gen_rtx_CONST (Pmode, new);
6881 new = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new);
6885 emit_move_insn (reg, new);
6891 if (INTVAL (op1) < -16*1024*1024
6892 || INTVAL (op1) >= 16*1024*1024)
6894 if (!x86_64_immediate_operand (op1, Pmode))
6895 op1 = force_reg (Pmode, op1);
6896 new = gen_rtx_PLUS (Pmode, force_reg (Pmode, op0), op1);
6902 base = legitimize_pic_address (XEXP (addr, 0), reg);
6903 new = legitimize_pic_address (XEXP (addr, 1),
6904 base == reg ? NULL_RTX : reg);
6906 if (GET_CODE (new) == CONST_INT)
6907 new = plus_constant (base, INTVAL (new));
6910 if (GET_CODE (new) == PLUS && CONSTANT_P (XEXP (new, 1)))
6912 base = gen_rtx_PLUS (Pmode, base, XEXP (new, 0));
6913 new = XEXP (new, 1);
6915 new = gen_rtx_PLUS (Pmode, base, new);
6923 /* Load the thread pointer. If TO_REG is true, force it into a register. */
6926 get_thread_pointer (int to_reg)
6930 tp = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const0_rtx), UNSPEC_TP);
6934 reg = gen_reg_rtx (Pmode);
6935 insn = gen_rtx_SET (VOIDmode, reg, tp);
6936 insn = emit_insn (insn);
6941 /* A subroutine of legitimize_address and ix86_expand_move. FOR_MOV is
6942 false if we expect this to be used for a memory address and true if
6943 we expect to load the address into a register. */
6946 legitimize_tls_address (rtx x, enum tls_model model, int for_mov)
6948 rtx dest, base, off, pic, tp;
6953 case TLS_MODEL_GLOBAL_DYNAMIC:
6954 dest = gen_reg_rtx (Pmode);
6955 tp = TARGET_GNU2_TLS ? get_thread_pointer (1) : 0;
6957 if (TARGET_64BIT && ! TARGET_GNU2_TLS)
6959 rtx rax = gen_rtx_REG (Pmode, 0), insns;
6962 emit_call_insn (gen_tls_global_dynamic_64 (rax, x));
6963 insns = get_insns ();
6966 emit_libcall_block (insns, dest, rax, x);
6968 else if (TARGET_64BIT && TARGET_GNU2_TLS)
6969 emit_insn (gen_tls_global_dynamic_64 (dest, x));
6971 emit_insn (gen_tls_global_dynamic_32 (dest, x));
6973 if (TARGET_GNU2_TLS)
6975 dest = force_reg (Pmode, gen_rtx_PLUS (Pmode, tp, dest));
6977 set_unique_reg_note (get_last_insn (), REG_EQUIV, x);
6981 case TLS_MODEL_LOCAL_DYNAMIC:
6982 base = gen_reg_rtx (Pmode);
6983 tp = TARGET_GNU2_TLS ? get_thread_pointer (1) : 0;
6985 if (TARGET_64BIT && ! TARGET_GNU2_TLS)
6987 rtx rax = gen_rtx_REG (Pmode, 0), insns, note;
6990 emit_call_insn (gen_tls_local_dynamic_base_64 (rax));
6991 insns = get_insns ();
6994 note = gen_rtx_EXPR_LIST (VOIDmode, const0_rtx, NULL);
6995 note = gen_rtx_EXPR_LIST (VOIDmode, ix86_tls_get_addr (), note);
6996 emit_libcall_block (insns, base, rax, note);
6998 else if (TARGET_64BIT && TARGET_GNU2_TLS)
6999 emit_insn (gen_tls_local_dynamic_base_64 (base));
7001 emit_insn (gen_tls_local_dynamic_base_32 (base));
7003 if (TARGET_GNU2_TLS)
7005 rtx x = ix86_tls_module_base ();
7007 set_unique_reg_note (get_last_insn (), REG_EQUIV,
7008 gen_rtx_MINUS (Pmode, x, tp));
7011 off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x), UNSPEC_DTPOFF);
7012 off = gen_rtx_CONST (Pmode, off);
7014 dest = force_reg (Pmode, gen_rtx_PLUS (Pmode, base, off));
7016 if (TARGET_GNU2_TLS)
7018 dest = force_reg (Pmode, gen_rtx_PLUS (Pmode, dest, tp));
7020 set_unique_reg_note (get_last_insn (), REG_EQUIV, x);
7025 case TLS_MODEL_INITIAL_EXEC:
7029 type = UNSPEC_GOTNTPOFF;
7033 if (reload_in_progress)
7034 regs_ever_live[PIC_OFFSET_TABLE_REGNUM] = 1;
7035 pic = pic_offset_table_rtx;
7036 type = TARGET_ANY_GNU_TLS ? UNSPEC_GOTNTPOFF : UNSPEC_GOTTPOFF;
7038 else if (!TARGET_ANY_GNU_TLS)
7040 pic = gen_reg_rtx (Pmode);
7041 emit_insn (gen_set_got (pic));
7042 type = UNSPEC_GOTTPOFF;
7047 type = UNSPEC_INDNTPOFF;
7050 off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x), type);
7051 off = gen_rtx_CONST (Pmode, off);
7053 off = gen_rtx_PLUS (Pmode, pic, off);
7054 off = gen_const_mem (Pmode, off);
7055 set_mem_alias_set (off, ix86_GOT_alias_set ());
7057 if (TARGET_64BIT || TARGET_ANY_GNU_TLS)
7059 base = get_thread_pointer (for_mov || !TARGET_TLS_DIRECT_SEG_REFS);
7060 off = force_reg (Pmode, off);
7061 return gen_rtx_PLUS (Pmode, base, off);
7065 base = get_thread_pointer (true);
7066 dest = gen_reg_rtx (Pmode);
7067 emit_insn (gen_subsi3 (dest, base, off));
7071 case TLS_MODEL_LOCAL_EXEC:
7072 off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x),
7073 (TARGET_64BIT || TARGET_ANY_GNU_TLS)
7074 ? UNSPEC_NTPOFF : UNSPEC_TPOFF);
7075 off = gen_rtx_CONST (Pmode, off);
7077 if (TARGET_64BIT || TARGET_ANY_GNU_TLS)
7079 base = get_thread_pointer (for_mov || !TARGET_TLS_DIRECT_SEG_REFS);
7080 return gen_rtx_PLUS (Pmode, base, off);
7084 base = get_thread_pointer (true);
7085 dest = gen_reg_rtx (Pmode);
7086 emit_insn (gen_subsi3 (dest, base, off));
7097 /* Try machine-dependent ways of modifying an illegitimate address
7098 to be legitimate. If we find one, return the new, valid address.
7099 This macro is used in only one place: `memory_address' in explow.c.
7101 OLDX is the address as it was before break_out_memory_refs was called.
7102 In some cases it is useful to look at this to decide what needs to be done.
7104 MODE and WIN are passed so that this macro can use
7105 GO_IF_LEGITIMATE_ADDRESS.
7107 It is always safe for this macro to do nothing. It exists to recognize
7108 opportunities to optimize the output.
7110 For the 80386, we handle X+REG by loading X into a register R and
7111 using R+REG. R will go in a general reg and indexing will be used.
7112 However, if REG is a broken-out memory address or multiplication,
7113 nothing needs to be done because REG can certainly go in a general reg.
7115 When -fpic is used, special handling is needed for symbolic references.
7116 See comments by legitimize_pic_address in i386.c for details. */
7119 legitimize_address (rtx x, rtx oldx ATTRIBUTE_UNUSED, enum machine_mode mode)
7124 if (TARGET_DEBUG_ADDR)
7126 fprintf (stderr, "\n==========\nLEGITIMIZE_ADDRESS, mode = %s\n",
7127 GET_MODE_NAME (mode));
7131 log = GET_CODE (x) == SYMBOL_REF ? SYMBOL_REF_TLS_MODEL (x) : 0;
7133 return legitimize_tls_address (x, log, false);
7134 if (GET_CODE (x) == CONST
7135 && GET_CODE (XEXP (x, 0)) == PLUS
7136 && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF
7137 && (log = SYMBOL_REF_TLS_MODEL (XEXP (XEXP (x, 0), 0))))
7139 rtx t = legitimize_tls_address (XEXP (XEXP (x, 0), 0), log, false);
7140 return gen_rtx_PLUS (Pmode, t, XEXP (XEXP (x, 0), 1));
7143 if (flag_pic && SYMBOLIC_CONST (x))
7144 return legitimize_pic_address (x, 0);
7146 /* Canonicalize shifts by 0, 1, 2, 3 into multiply */
7147 if (GET_CODE (x) == ASHIFT
7148 && GET_CODE (XEXP (x, 1)) == CONST_INT
7149 && (unsigned HOST_WIDE_INT) INTVAL (XEXP (x, 1)) < 4)
7152 log = INTVAL (XEXP (x, 1));
7153 x = gen_rtx_MULT (Pmode, force_reg (Pmode, XEXP (x, 0)),
7154 GEN_INT (1 << log));
7157 if (GET_CODE (x) == PLUS)
7159 /* Canonicalize shifts by 0, 1, 2, 3 into multiply. */
7161 if (GET_CODE (XEXP (x, 0)) == ASHIFT
7162 && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT
7163 && (unsigned HOST_WIDE_INT) INTVAL (XEXP (XEXP (x, 0), 1)) < 4)
7166 log = INTVAL (XEXP (XEXP (x, 0), 1));
7167 XEXP (x, 0) = gen_rtx_MULT (Pmode,
7168 force_reg (Pmode, XEXP (XEXP (x, 0), 0)),
7169 GEN_INT (1 << log));
7172 if (GET_CODE (XEXP (x, 1)) == ASHIFT
7173 && GET_CODE (XEXP (XEXP (x, 1), 1)) == CONST_INT
7174 && (unsigned HOST_WIDE_INT) INTVAL (XEXP (XEXP (x, 1), 1)) < 4)
7177 log = INTVAL (XEXP (XEXP (x, 1), 1));
7178 XEXP (x, 1) = gen_rtx_MULT (Pmode,
7179 force_reg (Pmode, XEXP (XEXP (x, 1), 0)),
7180 GEN_INT (1 << log));
7183 /* Put multiply first if it isn't already. */
7184 if (GET_CODE (XEXP (x, 1)) == MULT)
7186 rtx tmp = XEXP (x, 0);
7187 XEXP (x, 0) = XEXP (x, 1);
7192 /* Canonicalize (plus (mult (reg) (const)) (plus (reg) (const)))
7193 into (plus (plus (mult (reg) (const)) (reg)) (const)). This can be
7194 created by virtual register instantiation, register elimination, and
7195 similar optimizations. */
7196 if (GET_CODE (XEXP (x, 0)) == MULT && GET_CODE (XEXP (x, 1)) == PLUS)
7199 x = gen_rtx_PLUS (Pmode,
7200 gen_rtx_PLUS (Pmode, XEXP (x, 0),
7201 XEXP (XEXP (x, 1), 0)),
7202 XEXP (XEXP (x, 1), 1));
7206 (plus (plus (mult (reg) (const)) (plus (reg) (const))) const)
7207 into (plus (plus (mult (reg) (const)) (reg)) (const)). */
7208 else if (GET_CODE (x) == PLUS && GET_CODE (XEXP (x, 0)) == PLUS
7209 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
7210 && GET_CODE (XEXP (XEXP (x, 0), 1)) == PLUS
7211 && CONSTANT_P (XEXP (x, 1)))
7214 rtx other = NULL_RTX;
7216 if (GET_CODE (XEXP (x, 1)) == CONST_INT)
7218 constant = XEXP (x, 1);
7219 other = XEXP (XEXP (XEXP (x, 0), 1), 1);
7221 else if (GET_CODE (XEXP (XEXP (XEXP (x, 0), 1), 1)) == CONST_INT)
7223 constant = XEXP (XEXP (XEXP (x, 0), 1), 1);
7224 other = XEXP (x, 1);
7232 x = gen_rtx_PLUS (Pmode,
7233 gen_rtx_PLUS (Pmode, XEXP (XEXP (x, 0), 0),
7234 XEXP (XEXP (XEXP (x, 0), 1), 0)),
7235 plus_constant (other, INTVAL (constant)));
7239 if (changed && legitimate_address_p (mode, x, FALSE))
7242 if (GET_CODE (XEXP (x, 0)) == MULT)
7245 XEXP (x, 0) = force_operand (XEXP (x, 0), 0);
7248 if (GET_CODE (XEXP (x, 1)) == MULT)
7251 XEXP (x, 1) = force_operand (XEXP (x, 1), 0);
7255 && GET_CODE (XEXP (x, 1)) == REG
7256 && GET_CODE (XEXP (x, 0)) == REG)
7259 if (flag_pic && SYMBOLIC_CONST (XEXP (x, 1)))
7262 x = legitimize_pic_address (x, 0);
7265 if (changed && legitimate_address_p (mode, x, FALSE))
7268 if (GET_CODE (XEXP (x, 0)) == REG)
7270 rtx temp = gen_reg_rtx (Pmode);
7271 rtx val = force_operand (XEXP (x, 1), temp);
7273 emit_move_insn (temp, val);
7279 else if (GET_CODE (XEXP (x, 1)) == REG)
7281 rtx temp = gen_reg_rtx (Pmode);
7282 rtx val = force_operand (XEXP (x, 0), temp);
7284 emit_move_insn (temp, val);
7294 /* Print an integer constant expression in assembler syntax. Addition
7295 and subtraction are the only arithmetic that may appear in these
7296 expressions. FILE is the stdio stream to write to, X is the rtx, and
7297 CODE is the operand print code from the output string. */
7300 output_pic_addr_const (FILE *file, rtx x, int code)
7304 switch (GET_CODE (x))
7307 gcc_assert (flag_pic);
7312 if (! TARGET_MACHO || TARGET_64BIT)
7313 output_addr_const (file, x);
7316 const char *name = XSTR (x, 0);
7318 /* Mark the decl as referenced so that cgraph will output the function. */
7319 if (SYMBOL_REF_DECL (x))
7320 mark_decl_referenced (SYMBOL_REF_DECL (x));
7323 if (MACHOPIC_INDIRECT
7324 && machopic_classify_symbol (x) == MACHOPIC_UNDEFINED_FUNCTION)
7325 name = machopic_indirection_name (x, /*stub_p=*/true);
7327 assemble_name (file, name);
7329 if (!TARGET_MACHO && code == 'P' && ! SYMBOL_REF_LOCAL_P (x))
7330 fputs ("@PLT", file);
7337 ASM_GENERATE_INTERNAL_LABEL (buf, "L", CODE_LABEL_NUMBER (x));
7338 assemble_name (asm_out_file, buf);
7342 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
7346 /* This used to output parentheses around the expression,
7347 but that does not work on the 386 (either ATT or BSD assembler). */
7348 output_pic_addr_const (file, XEXP (x, 0), code);
7352 if (GET_MODE (x) == VOIDmode)
7354 /* We can use %d if the number is <32 bits and positive. */
7355 if (CONST_DOUBLE_HIGH (x) || CONST_DOUBLE_LOW (x) < 0)
7356 fprintf (file, "0x%lx%08lx",
7357 (unsigned long) CONST_DOUBLE_HIGH (x),
7358 (unsigned long) CONST_DOUBLE_LOW (x));
7360 fprintf (file, HOST_WIDE_INT_PRINT_DEC, CONST_DOUBLE_LOW (x));
7363 /* We can't handle floating point constants;
7364 PRINT_OPERAND must handle them. */
7365 output_operand_lossage ("floating constant misused");
7369 /* Some assemblers need integer constants to appear first. */
7370 if (GET_CODE (XEXP (x, 0)) == CONST_INT)
7372 output_pic_addr_const (file, XEXP (x, 0), code);
7374 output_pic_addr_const (file, XEXP (x, 1), code);
7378 gcc_assert (GET_CODE (XEXP (x, 1)) == CONST_INT);
7379 output_pic_addr_const (file, XEXP (x, 1), code);
7381 output_pic_addr_const (file, XEXP (x, 0), code);
7387 putc (ASSEMBLER_DIALECT == ASM_INTEL ? '(' : '[', file);
7388 output_pic_addr_const (file, XEXP (x, 0), code);
7390 output_pic_addr_const (file, XEXP (x, 1), code);
7392 putc (ASSEMBLER_DIALECT == ASM_INTEL ? ')' : ']', file);
7396 gcc_assert (XVECLEN (x, 0) == 1);
7397 output_pic_addr_const (file, XVECEXP (x, 0, 0), code);
7398 switch (XINT (x, 1))
7401 fputs ("@GOT", file);
7404 fputs ("@GOTOFF", file);
7406 case UNSPEC_GOTPCREL:
7407 fputs ("@GOTPCREL(%rip)", file);
7409 case UNSPEC_GOTTPOFF:
7410 /* FIXME: This might be @TPOFF in Sun ld too. */
7411 fputs ("@GOTTPOFF", file);
7414 fputs ("@TPOFF", file);
7418 fputs ("@TPOFF", file);
7420 fputs ("@NTPOFF", file);
7423 fputs ("@DTPOFF", file);
7425 case UNSPEC_GOTNTPOFF:
7427 fputs ("@GOTTPOFF(%rip)", file);
7429 fputs ("@GOTNTPOFF", file);
7431 case UNSPEC_INDNTPOFF:
7432 fputs ("@INDNTPOFF", file);
7435 output_operand_lossage ("invalid UNSPEC as operand");
7441 output_operand_lossage ("invalid expression as operand");
7445 /* This is called from dwarf2out.c via TARGET_ASM_OUTPUT_DWARF_DTPREL.
7446 We need to emit DTP-relative relocations. */
7449 i386_output_dwarf_dtprel (FILE *file, int size, rtx x)
7451 fputs (ASM_LONG, file);
7452 output_addr_const (file, x);
7453 fputs ("@DTPOFF", file);
7459 fputs (", 0", file);
7466 /* In the name of slightly smaller debug output, and to cater to
7467 general assembler lossage, recognize PIC+GOTOFF and turn it back
7468 into a direct symbol reference.
7470 On Darwin, this is necessary to avoid a crash, because Darwin
7471 has a different PIC label for each routine but the DWARF debugging
7472 information is not associated with any particular routine, so it's
7473 necessary to remove references to the PIC label from RTL stored by
7474 the DWARF output code. */
7477 ix86_delegitimize_address (rtx orig_x)
7480 /* reg_addend is NULL or a multiple of some register. */
7481 rtx reg_addend = NULL_RTX;
7482 /* const_addend is NULL or a const_int. */
7483 rtx const_addend = NULL_RTX;
7484 /* This is the result, or NULL. */
7485 rtx result = NULL_RTX;
7487 if (GET_CODE (x) == MEM)
7492 if (GET_CODE (x) != CONST
7493 || GET_CODE (XEXP (x, 0)) != UNSPEC
7494 || XINT (XEXP (x, 0), 1) != UNSPEC_GOTPCREL
7495 || GET_CODE (orig_x) != MEM)
7497 return XVECEXP (XEXP (x, 0), 0, 0);
7500 if (GET_CODE (x) != PLUS
7501 || GET_CODE (XEXP (x, 1)) != CONST)
7504 if (GET_CODE (XEXP (x, 0)) == REG
7505 && REGNO (XEXP (x, 0)) == PIC_OFFSET_TABLE_REGNUM)
7506 /* %ebx + GOT/GOTOFF */
7508 else if (GET_CODE (XEXP (x, 0)) == PLUS)
7510 /* %ebx + %reg * scale + GOT/GOTOFF */
7511 reg_addend = XEXP (x, 0);
7512 if (GET_CODE (XEXP (reg_addend, 0)) == REG
7513 && REGNO (XEXP (reg_addend, 0)) == PIC_OFFSET_TABLE_REGNUM)
7514 reg_addend = XEXP (reg_addend, 1);
7515 else if (GET_CODE (XEXP (reg_addend, 1)) == REG
7516 && REGNO (XEXP (reg_addend, 1)) == PIC_OFFSET_TABLE_REGNUM)
7517 reg_addend = XEXP (reg_addend, 0);
7520 if (GET_CODE (reg_addend) != REG
7521 && GET_CODE (reg_addend) != MULT
7522 && GET_CODE (reg_addend) != ASHIFT)
7528 x = XEXP (XEXP (x, 1), 0);
7529 if (GET_CODE (x) == PLUS
7530 && GET_CODE (XEXP (x, 1)) == CONST_INT)
7532 const_addend = XEXP (x, 1);
7536 if (GET_CODE (x) == UNSPEC
7537 && ((XINT (x, 1) == UNSPEC_GOT && GET_CODE (orig_x) == MEM)
7538 || (XINT (x, 1) == UNSPEC_GOTOFF && GET_CODE (orig_x) != MEM)))
7539 result = XVECEXP (x, 0, 0);
7541 if (TARGET_MACHO && darwin_local_data_pic (x)
7542 && GET_CODE (orig_x) != MEM)
7543 result = XEXP (x, 0);
7549 result = gen_rtx_PLUS (Pmode, result, const_addend);
7551 result = gen_rtx_PLUS (Pmode, reg_addend, result);
7556 put_condition_code (enum rtx_code code, enum machine_mode mode, int reverse,
7561 if (mode == CCFPmode || mode == CCFPUmode)
7563 enum rtx_code second_code, bypass_code;
7564 ix86_fp_comparison_codes (code, &bypass_code, &code, &second_code);
7565 gcc_assert (bypass_code == UNKNOWN && second_code == UNKNOWN);
7566 code = ix86_fp_compare_code_to_integer (code);
7570 code = reverse_condition (code);
7581 gcc_assert (mode == CCmode || mode == CCNOmode || mode == CCGCmode);
7585 /* ??? Use "nbe" instead of "a" for fcmov lossage on some assemblers.
7586 Those same assemblers have the same but opposite lossage on cmov. */
7587 gcc_assert (mode == CCmode);
7588 suffix = fp ? "nbe" : "a";
7608 gcc_assert (mode == CCmode);
7630 gcc_assert (mode == CCmode);
7631 suffix = fp ? "nb" : "ae";
7634 gcc_assert (mode == CCmode || mode == CCGCmode || mode == CCNOmode);
7638 gcc_assert (mode == CCmode);
7642 suffix = fp ? "u" : "p";
7645 suffix = fp ? "nu" : "np";
7650 fputs (suffix, file);
7653 /* Print the name of register X to FILE based on its machine mode and number.
7654 If CODE is 'w', pretend the mode is HImode.
7655 If CODE is 'b', pretend the mode is QImode.
7656 If CODE is 'k', pretend the mode is SImode.
7657 If CODE is 'q', pretend the mode is DImode.
7658 If CODE is 'h', pretend the reg is the 'high' byte register.
7659 If CODE is 'y', print "st(0)" instead of "st", if the reg is stack op. */
7662 print_reg (rtx x, int code, FILE *file)
7664 gcc_assert (REGNO (x) != ARG_POINTER_REGNUM
7665 && REGNO (x) != FRAME_POINTER_REGNUM
7666 && REGNO (x) != FLAGS_REG
7667 && REGNO (x) != FPSR_REG);
7669 if (ASSEMBLER_DIALECT == ASM_ATT || USER_LABEL_PREFIX[0] == 0)
7672 if (code == 'w' || MMX_REG_P (x))
7674 else if (code == 'b')
7676 else if (code == 'k')
7678 else if (code == 'q')
7680 else if (code == 'y')
7682 else if (code == 'h')
7685 code = GET_MODE_SIZE (GET_MODE (x));
7687 /* Irritatingly, AMD extended registers use different naming convention
7688 from the normal registers. */
7689 if (REX_INT_REG_P (x))
7691 gcc_assert (TARGET_64BIT);
7695 error ("extended registers have no high halves");
7698 fprintf (file, "r%ib", REGNO (x) - FIRST_REX_INT_REG + 8);
7701 fprintf (file, "r%iw", REGNO (x) - FIRST_REX_INT_REG + 8);
7704 fprintf (file, "r%id", REGNO (x) - FIRST_REX_INT_REG + 8);
7707 fprintf (file, "r%i", REGNO (x) - FIRST_REX_INT_REG + 8);
7710 error ("unsupported operand size for extended register");
7718 if (STACK_TOP_P (x))
7720 fputs ("st(0)", file);
7727 if (! ANY_FP_REG_P (x))
7728 putc (code == 8 && TARGET_64BIT ? 'r' : 'e', file);
7733 fputs (hi_reg_name[REGNO (x)], file);
7736 if (REGNO (x) >= ARRAY_SIZE (qi_reg_name))
7738 fputs (qi_reg_name[REGNO (x)], file);
7741 if (REGNO (x) >= ARRAY_SIZE (qi_high_reg_name))
7743 fputs (qi_high_reg_name[REGNO (x)], file);
7750 /* Locate some local-dynamic symbol still in use by this function
7751 so that we can print its name in some tls_local_dynamic_base
7755 get_some_local_dynamic_name (void)
7759 if (cfun->machine->some_ld_name)
7760 return cfun->machine->some_ld_name;
7762 for (insn = get_insns (); insn ; insn = NEXT_INSN (insn))
7764 && for_each_rtx (&PATTERN (insn), get_some_local_dynamic_name_1, 0))
7765 return cfun->machine->some_ld_name;
7771 get_some_local_dynamic_name_1 (rtx *px, void *data ATTRIBUTE_UNUSED)
7775 if (GET_CODE (x) == SYMBOL_REF
7776 && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_DYNAMIC)
7778 cfun->machine->some_ld_name = XSTR (x, 0);
7786 L,W,B,Q,S,T -- print the opcode suffix for specified size of operand.
7787 C -- print opcode suffix for set/cmov insn.
7788 c -- like C, but print reversed condition
7789 F,f -- likewise, but for floating-point.
7790 O -- if HAVE_AS_IX86_CMOV_SUN_SYNTAX, expand to "w.", "l." or "q.",
7792 R -- print the prefix for register names.
7793 z -- print the opcode suffix for the size of the current operand.
7794 * -- print a star (in certain assembler syntax)
7795 A -- print an absolute memory reference.
7796 w -- print the operand as if it's a "word" (HImode) even if it isn't.
7797 s -- print a shift double count, followed by the assemblers argument
7799 b -- print the QImode name of the register for the indicated operand.
7800 %b0 would print %al if operands[0] is reg 0.
7801 w -- likewise, print the HImode name of the register.
7802 k -- likewise, print the SImode name of the register.
7803 q -- likewise, print the DImode name of the register.
7804 h -- print the QImode name for a "high" register, either ah, bh, ch or dh.
7805 y -- print "st(0)" instead of "st" as a register.
7806 D -- print condition for SSE cmp instruction.
7807 P -- if PIC, print an @PLT suffix.
7808 X -- don't print any sort of PIC '@' suffix for a symbol.
7809 & -- print some in-use local-dynamic symbol name.
7810 H -- print a memory address offset by 8; used for sse high-parts
7814 print_operand (FILE *file, rtx x, int code)
7821 if (ASSEMBLER_DIALECT == ASM_ATT)
7826 assemble_name (file, get_some_local_dynamic_name ());
7830 switch (ASSEMBLER_DIALECT)
7837 /* Intel syntax. For absolute addresses, registers should not
7838 be surrounded by braces. */
7839 if (GET_CODE (x) != REG)
7842 PRINT_OPERAND (file, x, 0);
7852 PRINT_OPERAND (file, x, 0);
7857 if (ASSEMBLER_DIALECT == ASM_ATT)
7862 if (ASSEMBLER_DIALECT == ASM_ATT)
7867 if (ASSEMBLER_DIALECT == ASM_ATT)
7872 if (ASSEMBLER_DIALECT == ASM_ATT)
7877 if (ASSEMBLER_DIALECT == ASM_ATT)
7882 if (ASSEMBLER_DIALECT == ASM_ATT)
7887 /* 387 opcodes don't get size suffixes if the operands are
7889 if (STACK_REG_P (x))
7892 /* Likewise if using Intel opcodes. */
7893 if (ASSEMBLER_DIALECT == ASM_INTEL)
7896 /* This is the size of op from size of operand. */
7897 switch (GET_MODE_SIZE (GET_MODE (x)))
7900 #ifdef HAVE_GAS_FILDS_FISTS
7906 if (GET_MODE (x) == SFmode)
7921 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT)
7923 #ifdef GAS_MNEMONICS
7949 if (GET_CODE (x) == CONST_INT || ! SHIFT_DOUBLE_OMITS_COUNT)
7951 PRINT_OPERAND (file, x, 0);
7957 /* Little bit of braindamage here. The SSE compare instructions
7958 does use completely different names for the comparisons that the
7959 fp conditional moves. */
7960 switch (GET_CODE (x))
7975 fputs ("unord", file);
7979 fputs ("neq", file);
7983 fputs ("nlt", file);
7987 fputs ("nle", file);
7990 fputs ("ord", file);
7997 #ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
7998 if (ASSEMBLER_DIALECT == ASM_ATT)
8000 switch (GET_MODE (x))
8002 case HImode: putc ('w', file); break;
8004 case SFmode: putc ('l', file); break;
8006 case DFmode: putc ('q', file); break;
8007 default: gcc_unreachable ();
8014 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 0, 0, file);
8017 #ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
8018 if (ASSEMBLER_DIALECT == ASM_ATT)
8021 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 0, 1, file);
8024 /* Like above, but reverse condition */
8026 /* Check to see if argument to %c is really a constant
8027 and not a condition code which needs to be reversed. */
8028 if (!COMPARISON_P (x))
8030 output_operand_lossage ("operand is neither a constant nor a condition code, invalid operand code 'c'");
8033 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 1, 0, file);
8036 #ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
8037 if (ASSEMBLER_DIALECT == ASM_ATT)
8040 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 1, 1, file);
8044 /* It doesn't actually matter what mode we use here, as we're
8045 only going to use this for printing. */
8046 x = adjust_address_nv (x, DImode, 8);
8053 if (!optimize || optimize_size || !TARGET_BRANCH_PREDICTION_HINTS)
8056 x = find_reg_note (current_output_insn, REG_BR_PROB, 0);
8059 int pred_val = INTVAL (XEXP (x, 0));
8061 if (pred_val < REG_BR_PROB_BASE * 45 / 100
8062 || pred_val > REG_BR_PROB_BASE * 55 / 100)
8064 int taken = pred_val > REG_BR_PROB_BASE / 2;
8065 int cputaken = final_forward_branch_p (current_output_insn) == 0;
8067 /* Emit hints only in the case default branch prediction
8068 heuristics would fail. */
8069 if (taken != cputaken)
8071 /* We use 3e (DS) prefix for taken branches and
8072 2e (CS) prefix for not taken branches. */
8074 fputs ("ds ; ", file);
8076 fputs ("cs ; ", file);
8083 output_operand_lossage ("invalid operand code '%c'", code);
8087 if (GET_CODE (x) == REG)
8088 print_reg (x, code, file);
8090 else if (GET_CODE (x) == MEM)
8092 /* No `byte ptr' prefix for call instructions. */
8093 if (ASSEMBLER_DIALECT == ASM_INTEL && code != 'X' && code != 'P')
8096 switch (GET_MODE_SIZE (GET_MODE (x)))
8098 case 1: size = "BYTE"; break;
8099 case 2: size = "WORD"; break;
8100 case 4: size = "DWORD"; break;
8101 case 8: size = "QWORD"; break;
8102 case 12: size = "XWORD"; break;
8103 case 16: size = "XMMWORD"; break;
8108 /* Check for explicit size override (codes 'b', 'w' and 'k') */
8111 else if (code == 'w')
8113 else if (code == 'k')
8117 fputs (" PTR ", file);
8121 /* Avoid (%rip) for call operands. */
8122 if (CONSTANT_ADDRESS_P (x) && code == 'P'
8123 && GET_CODE (x) != CONST_INT)
8124 output_addr_const (file, x);
8125 else if (this_is_asm_operands && ! address_operand (x, VOIDmode))
8126 output_operand_lossage ("invalid constraints for operand");
8131 else if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) == SFmode)
8136 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
8137 REAL_VALUE_TO_TARGET_SINGLE (r, l);
8139 if (ASSEMBLER_DIALECT == ASM_ATT)
8141 fprintf (file, "0x%08lx", l);
8144 /* These float cases don't actually occur as immediate operands. */
8145 else if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) == DFmode)
8149 real_to_decimal (dstr, CONST_DOUBLE_REAL_VALUE (x), sizeof (dstr), 0, 1);
8150 fprintf (file, "%s", dstr);
8153 else if (GET_CODE (x) == CONST_DOUBLE
8154 && GET_MODE (x) == XFmode)
8158 real_to_decimal (dstr, CONST_DOUBLE_REAL_VALUE (x), sizeof (dstr), 0, 1);
8159 fprintf (file, "%s", dstr);
8164 /* We have patterns that allow zero sets of memory, for instance.
8165 In 64-bit mode, we should probably support all 8-byte vectors,
8166 since we can in fact encode that into an immediate. */
8167 if (GET_CODE (x) == CONST_VECTOR)
8169 gcc_assert (x == CONST0_RTX (GET_MODE (x)));
8175 if (GET_CODE (x) == CONST_INT || GET_CODE (x) == CONST_DOUBLE)
8177 if (ASSEMBLER_DIALECT == ASM_ATT)
8180 else if (GET_CODE (x) == CONST || GET_CODE (x) == SYMBOL_REF
8181 || GET_CODE (x) == LABEL_REF)
8183 if (ASSEMBLER_DIALECT == ASM_ATT)
8186 fputs ("OFFSET FLAT:", file);
8189 if (GET_CODE (x) == CONST_INT)
8190 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
8192 output_pic_addr_const (file, x, code);
8194 output_addr_const (file, x);
8198 /* Print a memory operand whose address is ADDR. */
8201 print_operand_address (FILE *file, rtx addr)
8203 struct ix86_address parts;
8204 rtx base, index, disp;
8206 int ok = ix86_decompose_address (addr, &parts);
8211 index = parts.index;
8213 scale = parts.scale;
8221 if (USER_LABEL_PREFIX[0] == 0)
8223 fputs ((parts.seg == SEG_FS ? "fs:" : "gs:"), file);
8229 if (!base && !index)
8231 /* Displacement only requires special attention. */
8233 if (GET_CODE (disp) == CONST_INT)
8235 if (ASSEMBLER_DIALECT == ASM_INTEL && parts.seg == SEG_DEFAULT)
8237 if (USER_LABEL_PREFIX[0] == 0)
8239 fputs ("ds:", file);
8241 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (disp));
8244 output_pic_addr_const (file, disp, 0);
8246 output_addr_const (file, disp);
8248 /* Use one byte shorter RIP relative addressing for 64bit mode. */
8251 if (GET_CODE (disp) == CONST
8252 && GET_CODE (XEXP (disp, 0)) == PLUS
8253 && GET_CODE (XEXP (XEXP (disp, 0), 1)) == CONST_INT)
8254 disp = XEXP (XEXP (disp, 0), 0);
8255 if (GET_CODE (disp) == LABEL_REF
8256 || (GET_CODE (disp) == SYMBOL_REF
8257 && SYMBOL_REF_TLS_MODEL (disp) == 0))
8258 fputs ("(%rip)", file);
8263 if (ASSEMBLER_DIALECT == ASM_ATT)
8268 output_pic_addr_const (file, disp, 0);
8269 else if (GET_CODE (disp) == LABEL_REF)
8270 output_asm_label (disp);
8272 output_addr_const (file, disp);
8277 print_reg (base, 0, file);
8281 print_reg (index, 0, file);
8283 fprintf (file, ",%d", scale);
8289 rtx offset = NULL_RTX;
8293 /* Pull out the offset of a symbol; print any symbol itself. */
8294 if (GET_CODE (disp) == CONST
8295 && GET_CODE (XEXP (disp, 0)) == PLUS
8296 && GET_CODE (XEXP (XEXP (disp, 0), 1)) == CONST_INT)
8298 offset = XEXP (XEXP (disp, 0), 1);
8299 disp = gen_rtx_CONST (VOIDmode,
8300 XEXP (XEXP (disp, 0), 0));
8304 output_pic_addr_const (file, disp, 0);
8305 else if (GET_CODE (disp) == LABEL_REF)
8306 output_asm_label (disp);
8307 else if (GET_CODE (disp) == CONST_INT)
8310 output_addr_const (file, disp);
8316 print_reg (base, 0, file);
8319 if (INTVAL (offset) >= 0)
8321 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (offset));
8325 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (offset));
8332 print_reg (index, 0, file);
8334 fprintf (file, "*%d", scale);
8342 output_addr_const_extra (FILE *file, rtx x)
8346 if (GET_CODE (x) != UNSPEC)
8349 op = XVECEXP (x, 0, 0);
8350 switch (XINT (x, 1))
8352 case UNSPEC_GOTTPOFF:
8353 output_addr_const (file, op);
8354 /* FIXME: This might be @TPOFF in Sun ld. */
8355 fputs ("@GOTTPOFF", file);
8358 output_addr_const (file, op);
8359 fputs ("@TPOFF", file);
8362 output_addr_const (file, op);
8364 fputs ("@TPOFF", file);
8366 fputs ("@NTPOFF", file);
8369 output_addr_const (file, op);
8370 fputs ("@DTPOFF", file);
8372 case UNSPEC_GOTNTPOFF:
8373 output_addr_const (file, op);
8375 fputs ("@GOTTPOFF(%rip)", file);
8377 fputs ("@GOTNTPOFF", file);
8379 case UNSPEC_INDNTPOFF:
8380 output_addr_const (file, op);
8381 fputs ("@INDNTPOFF", file);
8391 /* Split one or more DImode RTL references into pairs of SImode
8392 references. The RTL can be REG, offsettable MEM, integer constant, or
8393 CONST_DOUBLE. "operands" is a pointer to an array of DImode RTL to
8394 split and "num" is its length. lo_half and hi_half are output arrays
8395 that parallel "operands". */
8398 split_di (rtx operands[], int num, rtx lo_half[], rtx hi_half[])
8402 rtx op = operands[num];
8404 /* simplify_subreg refuse to split volatile memory addresses,
8405 but we still have to handle it. */
8406 if (GET_CODE (op) == MEM)
8408 lo_half[num] = adjust_address (op, SImode, 0);
8409 hi_half[num] = adjust_address (op, SImode, 4);
8413 lo_half[num] = simplify_gen_subreg (SImode, op,
8414 GET_MODE (op) == VOIDmode
8415 ? DImode : GET_MODE (op), 0);
8416 hi_half[num] = simplify_gen_subreg (SImode, op,
8417 GET_MODE (op) == VOIDmode
8418 ? DImode : GET_MODE (op), 4);
8422 /* Split one or more TImode RTL references into pairs of DImode
8423 references. The RTL can be REG, offsettable MEM, integer constant, or
8424 CONST_DOUBLE. "operands" is a pointer to an array of DImode RTL to
8425 split and "num" is its length. lo_half and hi_half are output arrays
8426 that parallel "operands". */
8429 split_ti (rtx operands[], int num, rtx lo_half[], rtx hi_half[])
8433 rtx op = operands[num];
8435 /* simplify_subreg refuse to split volatile memory addresses, but we
8436 still have to handle it. */
8437 if (GET_CODE (op) == MEM)
8439 lo_half[num] = adjust_address (op, DImode, 0);
8440 hi_half[num] = adjust_address (op, DImode, 8);
8444 lo_half[num] = simplify_gen_subreg (DImode, op, TImode, 0);
8445 hi_half[num] = simplify_gen_subreg (DImode, op, TImode, 8);
8450 /* Output code to perform a 387 binary operation in INSN, one of PLUS,
8451 MINUS, MULT or DIV. OPERANDS are the insn operands, where operands[3]
8452 is the expression of the binary operation. The output may either be
8453 emitted here, or returned to the caller, like all output_* functions.
8455 There is no guarantee that the operands are the same mode, as they
8456 might be within FLOAT or FLOAT_EXTEND expressions. */
8458 #ifndef SYSV386_COMPAT
8459 /* Set to 1 for compatibility with brain-damaged assemblers. No-one
8460 wants to fix the assemblers because that causes incompatibility
8461 with gcc. No-one wants to fix gcc because that causes
8462 incompatibility with assemblers... You can use the option of
8463 -DSYSV386_COMPAT=0 if you recompile both gcc and gas this way. */
8464 #define SYSV386_COMPAT 1
8468 output_387_binary_op (rtx insn, rtx *operands)
8470 static char buf[30];
8473 int is_sse = SSE_REG_P (operands[0]) || SSE_REG_P (operands[1]) || SSE_REG_P (operands[2]);
8475 #ifdef ENABLE_CHECKING
8476 /* Even if we do not want to check the inputs, this documents input
8477 constraints. Which helps in understanding the following code. */
8478 if (STACK_REG_P (operands[0])
8479 && ((REG_P (operands[1])
8480 && REGNO (operands[0]) == REGNO (operands[1])
8481 && (STACK_REG_P (operands[2]) || GET_CODE (operands[2]) == MEM))
8482 || (REG_P (operands[2])
8483 && REGNO (operands[0]) == REGNO (operands[2])
8484 && (STACK_REG_P (operands[1]) || GET_CODE (operands[1]) == MEM)))
8485 && (STACK_TOP_P (operands[1]) || STACK_TOP_P (operands[2])))
8488 gcc_assert (is_sse);
8491 switch (GET_CODE (operands[3]))
8494 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
8495 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
8503 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
8504 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
8512 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
8513 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
8521 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
8522 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
8536 if (GET_MODE (operands[0]) == SFmode)
8537 strcat (buf, "ss\t{%2, %0|%0, %2}");
8539 strcat (buf, "sd\t{%2, %0|%0, %2}");
8544 switch (GET_CODE (operands[3]))
8548 if (REG_P (operands[2]) && REGNO (operands[0]) == REGNO (operands[2]))
8550 rtx temp = operands[2];
8551 operands[2] = operands[1];
8555 /* know operands[0] == operands[1]. */
8557 if (GET_CODE (operands[2]) == MEM)
8563 if (find_regno_note (insn, REG_DEAD, REGNO (operands[2])))
8565 if (STACK_TOP_P (operands[0]))
8566 /* How is it that we are storing to a dead operand[2]?
8567 Well, presumably operands[1] is dead too. We can't
8568 store the result to st(0) as st(0) gets popped on this
8569 instruction. Instead store to operands[2] (which I
8570 think has to be st(1)). st(1) will be popped later.
8571 gcc <= 2.8.1 didn't have this check and generated
8572 assembly code that the Unixware assembler rejected. */
8573 p = "p\t{%0, %2|%2, %0}"; /* st(1) = st(0) op st(1); pop */
8575 p = "p\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0); pop */
8579 if (STACK_TOP_P (operands[0]))
8580 p = "\t{%y2, %0|%0, %y2}"; /* st(0) = st(0) op st(r2) */
8582 p = "\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0) */
8587 if (GET_CODE (operands[1]) == MEM)
8593 if (GET_CODE (operands[2]) == MEM)
8599 if (find_regno_note (insn, REG_DEAD, REGNO (operands[2])))
8602 /* The SystemV/386 SVR3.2 assembler, and probably all AT&T
8603 derived assemblers, confusingly reverse the direction of
8604 the operation for fsub{r} and fdiv{r} when the
8605 destination register is not st(0). The Intel assembler
8606 doesn't have this brain damage. Read !SYSV386_COMPAT to
8607 figure out what the hardware really does. */
8608 if (STACK_TOP_P (operands[0]))
8609 p = "{p\t%0, %2|rp\t%2, %0}";
8611 p = "{rp\t%2, %0|p\t%0, %2}";
8613 if (STACK_TOP_P (operands[0]))
8614 /* As above for fmul/fadd, we can't store to st(0). */
8615 p = "rp\t{%0, %2|%2, %0}"; /* st(1) = st(0) op st(1); pop */
8617 p = "p\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0); pop */
8622 if (find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
8625 if (STACK_TOP_P (operands[0]))
8626 p = "{rp\t%0, %1|p\t%1, %0}";
8628 p = "{p\t%1, %0|rp\t%0, %1}";
8630 if (STACK_TOP_P (operands[0]))
8631 p = "p\t{%0, %1|%1, %0}"; /* st(1) = st(1) op st(0); pop */
8633 p = "rp\t{%1, %0|%0, %1}"; /* st(r2) = st(0) op st(r2); pop */
8638 if (STACK_TOP_P (operands[0]))
8640 if (STACK_TOP_P (operands[1]))
8641 p = "\t{%y2, %0|%0, %y2}"; /* st(0) = st(0) op st(r2) */
8643 p = "r\t{%y1, %0|%0, %y1}"; /* st(0) = st(r1) op st(0) */
8646 else if (STACK_TOP_P (operands[1]))
8649 p = "{\t%1, %0|r\t%0, %1}";
8651 p = "r\t{%1, %0|%0, %1}"; /* st(r2) = st(0) op st(r2) */
8657 p = "{r\t%2, %0|\t%0, %2}";
8659 p = "\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0) */
8672 /* Return needed mode for entity in optimize_mode_switching pass. */
8675 ix86_mode_needed (int entity, rtx insn)
8677 enum attr_i387_cw mode;
8679 /* The mode UNINITIALIZED is used to store control word after a
8680 function call or ASM pattern. The mode ANY specify that function
8681 has no requirements on the control word and make no changes in the
8682 bits we are interested in. */
8685 || (NONJUMP_INSN_P (insn)
8686 && (asm_noperands (PATTERN (insn)) >= 0
8687 || GET_CODE (PATTERN (insn)) == ASM_INPUT)))
8688 return I387_CW_UNINITIALIZED;
8690 if (recog_memoized (insn) < 0)
8693 mode = get_attr_i387_cw (insn);
8698 if (mode == I387_CW_TRUNC)
8703 if (mode == I387_CW_FLOOR)
8708 if (mode == I387_CW_CEIL)
8713 if (mode == I387_CW_MASK_PM)
8724 /* Output code to initialize control word copies used by trunc?f?i and
8725 rounding patterns. CURRENT_MODE is set to current control word,
8726 while NEW_MODE is set to new control word. */
8729 emit_i387_cw_initialization (int mode)
8731 rtx stored_mode = assign_386_stack_local (HImode, SLOT_CW_STORED);
8736 rtx reg = gen_reg_rtx (HImode);
8738 emit_insn (gen_x86_fnstcw_1 (stored_mode));
8739 emit_move_insn (reg, stored_mode);
8741 if (TARGET_64BIT || TARGET_PARTIAL_REG_STALL || optimize_size)
8746 /* round toward zero (truncate) */
8747 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0c00)));
8748 slot = SLOT_CW_TRUNC;
8752 /* round down toward -oo */
8753 emit_insn (gen_andhi3 (reg, reg, GEN_INT (~0x0c00)));
8754 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0400)));
8755 slot = SLOT_CW_FLOOR;
8759 /* round up toward +oo */
8760 emit_insn (gen_andhi3 (reg, reg, GEN_INT (~0x0c00)));
8761 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0800)));
8762 slot = SLOT_CW_CEIL;
8765 case I387_CW_MASK_PM:
8766 /* mask precision exception for nearbyint() */
8767 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0020)));
8768 slot = SLOT_CW_MASK_PM;
8780 /* round toward zero (truncate) */
8781 emit_insn (gen_movsi_insv_1 (reg, GEN_INT (0xc)));
8782 slot = SLOT_CW_TRUNC;
8786 /* round down toward -oo */
8787 emit_insn (gen_movsi_insv_1 (reg, GEN_INT (0x4)));
8788 slot = SLOT_CW_FLOOR;
8792 /* round up toward +oo */
8793 emit_insn (gen_movsi_insv_1 (reg, GEN_INT (0x8)));
8794 slot = SLOT_CW_CEIL;
8797 case I387_CW_MASK_PM:
8798 /* mask precision exception for nearbyint() */
8799 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0020)));
8800 slot = SLOT_CW_MASK_PM;
8808 gcc_assert (slot < MAX_386_STACK_LOCALS);
8810 new_mode = assign_386_stack_local (HImode, slot);
8811 emit_move_insn (new_mode, reg);
8814 /* Output code for INSN to convert a float to a signed int. OPERANDS
8815 are the insn operands. The output may be [HSD]Imode and the input
8816 operand may be [SDX]Fmode. */
8819 output_fix_trunc (rtx insn, rtx *operands, int fisttp)
8821 int stack_top_dies = find_regno_note (insn, REG_DEAD, FIRST_STACK_REG) != 0;
8822 int dimode_p = GET_MODE (operands[0]) == DImode;
8823 int round_mode = get_attr_i387_cw (insn);
8825 /* Jump through a hoop or two for DImode, since the hardware has no
8826 non-popping instruction. We used to do this a different way, but
8827 that was somewhat fragile and broke with post-reload splitters. */
8828 if ((dimode_p || fisttp) && !stack_top_dies)
8829 output_asm_insn ("fld\t%y1", operands);
8831 gcc_assert (STACK_TOP_P (operands[1]));
8832 gcc_assert (GET_CODE (operands[0]) == MEM);
8835 output_asm_insn ("fisttp%z0\t%0", operands);
8838 if (round_mode != I387_CW_ANY)
8839 output_asm_insn ("fldcw\t%3", operands);
8840 if (stack_top_dies || dimode_p)
8841 output_asm_insn ("fistp%z0\t%0", operands);
8843 output_asm_insn ("fist%z0\t%0", operands);
8844 if (round_mode != I387_CW_ANY)
8845 output_asm_insn ("fldcw\t%2", operands);
8851 /* Output code for x87 ffreep insn. The OPNO argument, which may only
8852 have the values zero or one, indicates the ffreep insn's operand
8853 from the OPERANDS array. */
8856 output_387_ffreep (rtx *operands ATTRIBUTE_UNUSED, int opno)
8858 if (TARGET_USE_FFREEP)
8859 #if HAVE_AS_IX86_FFREEP
8860 return opno ? "ffreep\t%y1" : "ffreep\t%y0";
8862 switch (REGNO (operands[opno]))
8864 case FIRST_STACK_REG + 0: return ".word\t0xc0df";
8865 case FIRST_STACK_REG + 1: return ".word\t0xc1df";
8866 case FIRST_STACK_REG + 2: return ".word\t0xc2df";
8867 case FIRST_STACK_REG + 3: return ".word\t0xc3df";
8868 case FIRST_STACK_REG + 4: return ".word\t0xc4df";
8869 case FIRST_STACK_REG + 5: return ".word\t0xc5df";
8870 case FIRST_STACK_REG + 6: return ".word\t0xc6df";
8871 case FIRST_STACK_REG + 7: return ".word\t0xc7df";
8875 return opno ? "fstp\t%y1" : "fstp\t%y0";
8879 /* Output code for INSN to compare OPERANDS. EFLAGS_P is 1 when fcomi
8880 should be used. UNORDERED_P is true when fucom should be used. */
8883 output_fp_compare (rtx insn, rtx *operands, int eflags_p, int unordered_p)
8886 rtx cmp_op0, cmp_op1;
8887 int is_sse = SSE_REG_P (operands[0]) || SSE_REG_P (operands[1]);
8891 cmp_op0 = operands[0];
8892 cmp_op1 = operands[1];
8896 cmp_op0 = operands[1];
8897 cmp_op1 = operands[2];
8902 if (GET_MODE (operands[0]) == SFmode)
8904 return "ucomiss\t{%1, %0|%0, %1}";
8906 return "comiss\t{%1, %0|%0, %1}";
8909 return "ucomisd\t{%1, %0|%0, %1}";
8911 return "comisd\t{%1, %0|%0, %1}";
8914 gcc_assert (STACK_TOP_P (cmp_op0));
8916 stack_top_dies = find_regno_note (insn, REG_DEAD, FIRST_STACK_REG) != 0;
8918 if (cmp_op1 == CONST0_RTX (GET_MODE (cmp_op1)))
8922 output_asm_insn ("ftst\n\tfnstsw\t%0", operands);
8923 return output_387_ffreep (operands, 1);
8926 return "ftst\n\tfnstsw\t%0";
8929 if (STACK_REG_P (cmp_op1)
8931 && find_regno_note (insn, REG_DEAD, REGNO (cmp_op1))
8932 && REGNO (cmp_op1) != FIRST_STACK_REG)
8934 /* If both the top of the 387 stack dies, and the other operand
8935 is also a stack register that dies, then this must be a
8936 `fcompp' float compare */
8940 /* There is no double popping fcomi variant. Fortunately,
8941 eflags is immune from the fstp's cc clobbering. */
8943 output_asm_insn ("fucomip\t{%y1, %0|%0, %y1}", operands);
8945 output_asm_insn ("fcomip\t{%y1, %0|%0, %y1}", operands);
8946 return output_387_ffreep (operands, 0);
8951 return "fucompp\n\tfnstsw\t%0";
8953 return "fcompp\n\tfnstsw\t%0";
8958 /* Encoded here as eflags_p | intmode | unordered_p | stack_top_dies. */
8960 static const char * const alt[16] =
8962 "fcom%z2\t%y2\n\tfnstsw\t%0",
8963 "fcomp%z2\t%y2\n\tfnstsw\t%0",
8964 "fucom%z2\t%y2\n\tfnstsw\t%0",
8965 "fucomp%z2\t%y2\n\tfnstsw\t%0",
8967 "ficom%z2\t%y2\n\tfnstsw\t%0",
8968 "ficomp%z2\t%y2\n\tfnstsw\t%0",
8972 "fcomi\t{%y1, %0|%0, %y1}",
8973 "fcomip\t{%y1, %0|%0, %y1}",
8974 "fucomi\t{%y1, %0|%0, %y1}",
8975 "fucomip\t{%y1, %0|%0, %y1}",
8986 mask = eflags_p << 3;
8987 mask |= (GET_MODE_CLASS (GET_MODE (cmp_op1)) == MODE_INT) << 2;
8988 mask |= unordered_p << 1;
8989 mask |= stack_top_dies;
8991 gcc_assert (mask < 16);
9000 ix86_output_addr_vec_elt (FILE *file, int value)
9002 const char *directive = ASM_LONG;
9006 directive = ASM_QUAD;
9008 gcc_assert (!TARGET_64BIT);
9011 fprintf (file, "%s%s%d\n", directive, LPREFIX, value);
9015 ix86_output_addr_diff_elt (FILE *file, int value, int rel)
9018 fprintf (file, "%s%s%d-%s%d\n",
9019 ASM_LONG, LPREFIX, value, LPREFIX, rel);
9020 else if (HAVE_AS_GOTOFF_IN_DATA)
9021 fprintf (file, "%s%s%d@GOTOFF\n", ASM_LONG, LPREFIX, value);
9023 else if (TARGET_MACHO)
9025 fprintf (file, "%s%s%d-", ASM_LONG, LPREFIX, value);
9026 machopic_output_function_base_name (file);
9027 fprintf(file, "\n");
9031 asm_fprintf (file, "%s%U%s+[.-%s%d]\n",
9032 ASM_LONG, GOT_SYMBOL_NAME, LPREFIX, value);
9035 /* Generate either "mov $0, reg" or "xor reg, reg", as appropriate
9039 ix86_expand_clear (rtx dest)
9043 /* We play register width games, which are only valid after reload. */
9044 gcc_assert (reload_completed);
9046 /* Avoid HImode and its attendant prefix byte. */
9047 if (GET_MODE_SIZE (GET_MODE (dest)) < 4)
9048 dest = gen_rtx_REG (SImode, REGNO (dest));
9050 tmp = gen_rtx_SET (VOIDmode, dest, const0_rtx);
9052 /* This predicate should match that for movsi_xor and movdi_xor_rex64. */
9053 if (reload_completed && (!TARGET_USE_MOV0 || optimize_size))
9055 rtx clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, 17));
9056 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, tmp, clob));
9062 /* X is an unchanging MEM. If it is a constant pool reference, return
9063 the constant pool rtx, else NULL. */
9066 maybe_get_pool_constant (rtx x)
9068 x = ix86_delegitimize_address (XEXP (x, 0));
9070 if (GET_CODE (x) == SYMBOL_REF && CONSTANT_POOL_ADDRESS_P (x))
9071 return get_pool_constant (x);
9077 ix86_expand_move (enum machine_mode mode, rtx operands[])
9079 int strict = (reload_in_progress || reload_completed);
9081 enum tls_model model;
9086 if (GET_CODE (op1) == SYMBOL_REF)
9088 model = SYMBOL_REF_TLS_MODEL (op1);
9091 op1 = legitimize_tls_address (op1, model, true);
9092 op1 = force_operand (op1, op0);
9097 else if (GET_CODE (op1) == CONST
9098 && GET_CODE (XEXP (op1, 0)) == PLUS
9099 && GET_CODE (XEXP (XEXP (op1, 0), 0)) == SYMBOL_REF)
9101 model = SYMBOL_REF_TLS_MODEL (XEXP (XEXP (op1, 0), 0));
9104 rtx addend = XEXP (XEXP (op1, 0), 1);
9105 op1 = legitimize_tls_address (XEXP (XEXP (op1, 0), 0), model, true);
9106 op1 = force_operand (op1, NULL);
9107 op1 = expand_simple_binop (Pmode, PLUS, op1, addend,
9108 op0, 1, OPTAB_DIRECT);
9114 if (flag_pic && mode == Pmode && symbolic_operand (op1, Pmode))
9116 if (TARGET_MACHO && !TARGET_64BIT)
9121 rtx temp = ((reload_in_progress
9122 || ((op0 && GET_CODE (op0) == REG)
9124 ? op0 : gen_reg_rtx (Pmode));
9125 op1 = machopic_indirect_data_reference (op1, temp);
9126 op1 = machopic_legitimize_pic_address (op1, mode,
9127 temp == op1 ? 0 : temp);
9129 else if (MACHOPIC_INDIRECT)
9130 op1 = machopic_indirect_data_reference (op1, 0);
9137 if (GET_CODE (op0) == MEM)
9138 op1 = force_reg (Pmode, op1);
9140 op1 = legitimize_address (op1, op1, Pmode);
9145 if (GET_CODE (op0) == MEM
9146 && (PUSH_ROUNDING (GET_MODE_SIZE (mode)) != GET_MODE_SIZE (mode)
9147 || !push_operand (op0, mode))
9148 && GET_CODE (op1) == MEM)
9149 op1 = force_reg (mode, op1);
9151 if (push_operand (op0, mode)
9152 && ! general_no_elim_operand (op1, mode))
9153 op1 = copy_to_mode_reg (mode, op1);
9155 /* Force large constants in 64bit compilation into register
9156 to get them CSEed. */
9157 if (TARGET_64BIT && mode == DImode
9158 && immediate_operand (op1, mode)
9159 && !x86_64_zext_immediate_operand (op1, VOIDmode)
9160 && !register_operand (op0, mode)
9161 && optimize && !reload_completed && !reload_in_progress)
9162 op1 = copy_to_mode_reg (mode, op1);
9164 if (FLOAT_MODE_P (mode))
9166 /* If we are loading a floating point constant to a register,
9167 force the value to memory now, since we'll get better code
9168 out the back end. */
9172 else if (GET_CODE (op1) == CONST_DOUBLE)
9174 op1 = validize_mem (force_const_mem (mode, op1));
9175 if (!register_operand (op0, mode))
9177 rtx temp = gen_reg_rtx (mode);
9178 emit_insn (gen_rtx_SET (VOIDmode, temp, op1));
9179 emit_move_insn (op0, temp);
9186 emit_insn (gen_rtx_SET (VOIDmode, op0, op1));
9190 ix86_expand_vector_move (enum machine_mode mode, rtx operands[])
9192 rtx op0 = operands[0], op1 = operands[1];
9194 /* Force constants other than zero into memory. We do not know how
9195 the instructions used to build constants modify the upper 64 bits
9196 of the register, once we have that information we may be able
9197 to handle some of them more efficiently. */
9198 if ((reload_in_progress | reload_completed) == 0
9199 && register_operand (op0, mode)
9201 && standard_sse_constant_p (op1) <= 0)
9202 op1 = validize_mem (force_const_mem (mode, op1));
9204 /* Make operand1 a register if it isn't already. */
9206 && !register_operand (op0, mode)
9207 && !register_operand (op1, mode))
9209 emit_move_insn (op0, force_reg (GET_MODE (op0), op1));
9213 emit_insn (gen_rtx_SET (VOIDmode, op0, op1));
9216 /* Implement the movmisalign patterns for SSE. Non-SSE modes go
9217 straight to ix86_expand_vector_move. */
9220 ix86_expand_vector_move_misalign (enum machine_mode mode, rtx operands[])
9229 /* If we're optimizing for size, movups is the smallest. */
9232 op0 = gen_lowpart (V4SFmode, op0);
9233 op1 = gen_lowpart (V4SFmode, op1);
9234 emit_insn (gen_sse_movups (op0, op1));
9238 /* ??? If we have typed data, then it would appear that using
9239 movdqu is the only way to get unaligned data loaded with
9241 if (TARGET_SSE2 && GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
9243 op0 = gen_lowpart (V16QImode, op0);
9244 op1 = gen_lowpart (V16QImode, op1);
9245 emit_insn (gen_sse2_movdqu (op0, op1));
9249 if (TARGET_SSE2 && mode == V2DFmode)
9253 /* When SSE registers are split into halves, we can avoid
9254 writing to the top half twice. */
9255 if (TARGET_SSE_SPLIT_REGS)
9257 emit_insn (gen_rtx_CLOBBER (VOIDmode, op0));
9262 /* ??? Not sure about the best option for the Intel chips.
9263 The following would seem to satisfy; the register is
9264 entirely cleared, breaking the dependency chain. We
9265 then store to the upper half, with a dependency depth
9266 of one. A rumor has it that Intel recommends two movsd
9267 followed by an unpacklpd, but this is unconfirmed. And
9268 given that the dependency depth of the unpacklpd would
9269 still be one, I'm not sure why this would be better. */
9270 zero = CONST0_RTX (V2DFmode);
9273 m = adjust_address (op1, DFmode, 0);
9274 emit_insn (gen_sse2_loadlpd (op0, zero, m));
9275 m = adjust_address (op1, DFmode, 8);
9276 emit_insn (gen_sse2_loadhpd (op0, op0, m));
9280 if (TARGET_SSE_PARTIAL_REG_DEPENDENCY)
9281 emit_move_insn (op0, CONST0_RTX (mode));
9283 emit_insn (gen_rtx_CLOBBER (VOIDmode, op0));
9285 if (mode != V4SFmode)
9286 op0 = gen_lowpart (V4SFmode, op0);
9287 m = adjust_address (op1, V2SFmode, 0);
9288 emit_insn (gen_sse_loadlps (op0, op0, m));
9289 m = adjust_address (op1, V2SFmode, 8);
9290 emit_insn (gen_sse_loadhps (op0, op0, m));
9293 else if (MEM_P (op0))
9295 /* If we're optimizing for size, movups is the smallest. */
9298 op0 = gen_lowpart (V4SFmode, op0);
9299 op1 = gen_lowpart (V4SFmode, op1);
9300 emit_insn (gen_sse_movups (op0, op1));
9304 /* ??? Similar to above, only less clear because of quote
9305 typeless stores unquote. */
9306 if (TARGET_SSE2 && !TARGET_SSE_TYPELESS_STORES
9307 && GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
9309 op0 = gen_lowpart (V16QImode, op0);
9310 op1 = gen_lowpart (V16QImode, op1);
9311 emit_insn (gen_sse2_movdqu (op0, op1));
9315 if (TARGET_SSE2 && mode == V2DFmode)
9317 m = adjust_address (op0, DFmode, 0);
9318 emit_insn (gen_sse2_storelpd (m, op1));
9319 m = adjust_address (op0, DFmode, 8);
9320 emit_insn (gen_sse2_storehpd (m, op1));
9324 if (mode != V4SFmode)
9325 op1 = gen_lowpart (V4SFmode, op1);
9326 m = adjust_address (op0, V2SFmode, 0);
9327 emit_insn (gen_sse_storelps (m, op1));
9328 m = adjust_address (op0, V2SFmode, 8);
9329 emit_insn (gen_sse_storehps (m, op1));
9336 /* Expand a push in MODE. This is some mode for which we do not support
9337 proper push instructions, at least from the registers that we expect
9338 the value to live in. */
9341 ix86_expand_push (enum machine_mode mode, rtx x)
9345 tmp = expand_simple_binop (Pmode, PLUS, stack_pointer_rtx,
9346 GEN_INT (-GET_MODE_SIZE (mode)),
9347 stack_pointer_rtx, 1, OPTAB_DIRECT);
9348 if (tmp != stack_pointer_rtx)
9349 emit_move_insn (stack_pointer_rtx, tmp);
9351 tmp = gen_rtx_MEM (mode, stack_pointer_rtx);
9352 emit_move_insn (tmp, x);
9355 /* Fix up OPERANDS to satisfy ix86_binary_operator_ok. Return the
9356 destination to use for the operation. If different from the true
9357 destination in operands[0], a copy operation will be required. */
9360 ix86_fixup_binary_operands (enum rtx_code code, enum machine_mode mode,
9363 int matching_memory;
9364 rtx src1, src2, dst;
9370 /* Recognize <var1> = <value> <op> <var1> for commutative operators */
9371 if (GET_RTX_CLASS (code) == RTX_COMM_ARITH
9372 && (rtx_equal_p (dst, src2)
9373 || immediate_operand (src1, mode)))
9380 /* If the destination is memory, and we do not have matching source
9381 operands, do things in registers. */
9382 matching_memory = 0;
9383 if (GET_CODE (dst) == MEM)
9385 if (rtx_equal_p (dst, src1))
9386 matching_memory = 1;
9387 else if (GET_RTX_CLASS (code) == RTX_COMM_ARITH
9388 && rtx_equal_p (dst, src2))
9389 matching_memory = 2;
9391 dst = gen_reg_rtx (mode);
9394 /* Both source operands cannot be in memory. */
9395 if (GET_CODE (src1) == MEM && GET_CODE (src2) == MEM)
9397 if (matching_memory != 2)
9398 src2 = force_reg (mode, src2);
9400 src1 = force_reg (mode, src1);
9403 /* If the operation is not commutable, source 1 cannot be a constant
9404 or non-matching memory. */
9405 if ((CONSTANT_P (src1)
9406 || (!matching_memory && GET_CODE (src1) == MEM))
9407 && GET_RTX_CLASS (code) != RTX_COMM_ARITH)
9408 src1 = force_reg (mode, src1);
9410 src1 = operands[1] = src1;
9411 src2 = operands[2] = src2;
9415 /* Similarly, but assume that the destination has already been
9419 ix86_fixup_binary_operands_no_copy (enum rtx_code code,
9420 enum machine_mode mode, rtx operands[])
9422 rtx dst = ix86_fixup_binary_operands (code, mode, operands);
9423 gcc_assert (dst == operands[0]);
9426 /* Attempt to expand a binary operator. Make the expansion closer to the
9427 actual machine, then just general_operand, which will allow 3 separate
9428 memory references (one output, two input) in a single insn. */
9431 ix86_expand_binary_operator (enum rtx_code code, enum machine_mode mode,
9434 rtx src1, src2, dst, op, clob;
9436 dst = ix86_fixup_binary_operands (code, mode, operands);
9440 /* Emit the instruction. */
9442 op = gen_rtx_SET (VOIDmode, dst, gen_rtx_fmt_ee (code, mode, src1, src2));
9443 if (reload_in_progress)
9445 /* Reload doesn't know about the flags register, and doesn't know that
9446 it doesn't want to clobber it. We can only do this with PLUS. */
9447 gcc_assert (code == PLUS);
9452 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
9453 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clob)));
9456 /* Fix up the destination if needed. */
9457 if (dst != operands[0])
9458 emit_move_insn (operands[0], dst);
9461 /* Return TRUE or FALSE depending on whether the binary operator meets the
9462 appropriate constraints. */
9465 ix86_binary_operator_ok (enum rtx_code code,
9466 enum machine_mode mode ATTRIBUTE_UNUSED,
9469 /* Both source operands cannot be in memory. */
9470 if (GET_CODE (operands[1]) == MEM && GET_CODE (operands[2]) == MEM)
9472 /* If the operation is not commutable, source 1 cannot be a constant. */
9473 if (CONSTANT_P (operands[1]) && GET_RTX_CLASS (code) != RTX_COMM_ARITH)
9475 /* If the destination is memory, we must have a matching source operand. */
9476 if (GET_CODE (operands[0]) == MEM
9477 && ! (rtx_equal_p (operands[0], operands[1])
9478 || (GET_RTX_CLASS (code) == RTX_COMM_ARITH
9479 && rtx_equal_p (operands[0], operands[2]))))
9481 /* If the operation is not commutable and the source 1 is memory, we must
9482 have a matching destination. */
9483 if (GET_CODE (operands[1]) == MEM
9484 && GET_RTX_CLASS (code) != RTX_COMM_ARITH
9485 && ! rtx_equal_p (operands[0], operands[1]))
9490 /* Attempt to expand a unary operator. Make the expansion closer to the
9491 actual machine, then just general_operand, which will allow 2 separate
9492 memory references (one output, one input) in a single insn. */
9495 ix86_expand_unary_operator (enum rtx_code code, enum machine_mode mode,
9498 int matching_memory;
9499 rtx src, dst, op, clob;
9504 /* If the destination is memory, and we do not have matching source
9505 operands, do things in registers. */
9506 matching_memory = 0;
9509 if (rtx_equal_p (dst, src))
9510 matching_memory = 1;
9512 dst = gen_reg_rtx (mode);
9515 /* When source operand is memory, destination must match. */
9516 if (MEM_P (src) && !matching_memory)
9517 src = force_reg (mode, src);
9519 /* Emit the instruction. */
9521 op = gen_rtx_SET (VOIDmode, dst, gen_rtx_fmt_e (code, mode, src));
9522 if (reload_in_progress || code == NOT)
9524 /* Reload doesn't know about the flags register, and doesn't know that
9525 it doesn't want to clobber it. */
9526 gcc_assert (code == NOT);
9531 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
9532 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clob)));
9535 /* Fix up the destination if needed. */
9536 if (dst != operands[0])
9537 emit_move_insn (operands[0], dst);
9540 /* Return TRUE or FALSE depending on whether the unary operator meets the
9541 appropriate constraints. */
9544 ix86_unary_operator_ok (enum rtx_code code ATTRIBUTE_UNUSED,
9545 enum machine_mode mode ATTRIBUTE_UNUSED,
9546 rtx operands[2] ATTRIBUTE_UNUSED)
9548 /* If one of operands is memory, source and destination must match. */
9549 if ((GET_CODE (operands[0]) == MEM
9550 || GET_CODE (operands[1]) == MEM)
9551 && ! rtx_equal_p (operands[0], operands[1]))
9556 /* A subroutine of ix86_expand_fp_absneg_operator and copysign expanders.
9557 Create a mask for the sign bit in MODE for an SSE register. If VECT is
9558 true, then replicate the mask for all elements of the vector register.
9559 If INVERT is true, then create a mask excluding the sign bit. */
9562 ix86_build_signbit_mask (enum machine_mode mode, bool vect, bool invert)
9564 enum machine_mode vec_mode;
9565 HOST_WIDE_INT hi, lo;
9570 /* Find the sign bit, sign extended to 2*HWI. */
9572 lo = 0x80000000, hi = lo < 0;
9573 else if (HOST_BITS_PER_WIDE_INT >= 64)
9574 lo = (HOST_WIDE_INT)1 << shift, hi = -1;
9576 lo = 0, hi = (HOST_WIDE_INT)1 << (shift - HOST_BITS_PER_WIDE_INT);
9581 /* Force this value into the low part of a fp vector constant. */
9582 mask = immed_double_const (lo, hi, mode == SFmode ? SImode : DImode);
9583 mask = gen_lowpart (mode, mask);
9588 v = gen_rtvec (4, mask, mask, mask, mask);
9590 v = gen_rtvec (4, mask, CONST0_RTX (SFmode),
9591 CONST0_RTX (SFmode), CONST0_RTX (SFmode));
9592 vec_mode = V4SFmode;
9597 v = gen_rtvec (2, mask, mask);
9599 v = gen_rtvec (2, mask, CONST0_RTX (DFmode));
9600 vec_mode = V2DFmode;
9603 return force_reg (vec_mode, gen_rtx_CONST_VECTOR (vec_mode, v));
9606 /* Generate code for floating point ABS or NEG. */
9609 ix86_expand_fp_absneg_operator (enum rtx_code code, enum machine_mode mode,
9612 rtx mask, set, use, clob, dst, src;
9613 bool matching_memory;
9614 bool use_sse = false;
9615 bool vector_mode = VECTOR_MODE_P (mode);
9616 enum machine_mode elt_mode = mode;
9620 elt_mode = GET_MODE_INNER (mode);
9623 else if (TARGET_SSE_MATH)
9624 use_sse = SSE_FLOAT_MODE_P (mode);
9626 /* NEG and ABS performed with SSE use bitwise mask operations.
9627 Create the appropriate mask now. */
9629 mask = ix86_build_signbit_mask (elt_mode, vector_mode, code == ABS);
9636 /* If the destination is memory, and we don't have matching source
9637 operands or we're using the x87, do things in registers. */
9638 matching_memory = false;
9641 if (use_sse && rtx_equal_p (dst, src))
9642 matching_memory = true;
9644 dst = gen_reg_rtx (mode);
9646 if (MEM_P (src) && !matching_memory)
9647 src = force_reg (mode, src);
9651 set = gen_rtx_fmt_ee (code == NEG ? XOR : AND, mode, src, mask);
9652 set = gen_rtx_SET (VOIDmode, dst, set);
9657 set = gen_rtx_fmt_e (code, mode, src);
9658 set = gen_rtx_SET (VOIDmode, dst, set);
9661 use = gen_rtx_USE (VOIDmode, mask);
9662 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
9663 emit_insn (gen_rtx_PARALLEL (VOIDmode,
9664 gen_rtvec (3, set, use, clob)));
9670 if (dst != operands[0])
9671 emit_move_insn (operands[0], dst);
9674 /* Expand a copysign operation. Special case operand 0 being a constant. */
9677 ix86_expand_copysign (rtx operands[])
9679 enum machine_mode mode, vmode;
9680 rtx dest, op0, op1, mask, nmask;
9686 mode = GET_MODE (dest);
9687 vmode = mode == SFmode ? V4SFmode : V2DFmode;
9689 if (GET_CODE (op0) == CONST_DOUBLE)
9693 if (real_isneg (CONST_DOUBLE_REAL_VALUE (op0)))
9694 op0 = simplify_unary_operation (ABS, mode, op0, mode);
9696 if (op0 == CONST0_RTX (mode))
9697 op0 = CONST0_RTX (vmode);
9701 v = gen_rtvec (4, op0, CONST0_RTX (SFmode),
9702 CONST0_RTX (SFmode), CONST0_RTX (SFmode));
9704 v = gen_rtvec (2, op0, CONST0_RTX (DFmode));
9705 op0 = force_reg (vmode, gen_rtx_CONST_VECTOR (vmode, v));
9708 mask = ix86_build_signbit_mask (mode, 0, 0);
9711 emit_insn (gen_copysignsf3_const (dest, op0, op1, mask));
9713 emit_insn (gen_copysigndf3_const (dest, op0, op1, mask));
9717 nmask = ix86_build_signbit_mask (mode, 0, 1);
9718 mask = ix86_build_signbit_mask (mode, 0, 0);
9721 emit_insn (gen_copysignsf3_var (dest, NULL, op0, op1, nmask, mask));
9723 emit_insn (gen_copysigndf3_var (dest, NULL, op0, op1, nmask, mask));
9727 /* Deconstruct a copysign operation into bit masks. Operand 0 is known to
9728 be a constant, and so has already been expanded into a vector constant. */
9731 ix86_split_copysign_const (rtx operands[])
9733 enum machine_mode mode, vmode;
9734 rtx dest, op0, op1, mask, x;
9741 mode = GET_MODE (dest);
9742 vmode = GET_MODE (mask);
9744 dest = simplify_gen_subreg (vmode, dest, mode, 0);
9745 x = gen_rtx_AND (vmode, dest, mask);
9746 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
9748 if (op0 != CONST0_RTX (vmode))
9750 x = gen_rtx_IOR (vmode, dest, op0);
9751 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
9755 /* Deconstruct a copysign operation into bit masks. Operand 0 is variable,
9756 so we have to do two masks. */
9759 ix86_split_copysign_var (rtx operands[])
9761 enum machine_mode mode, vmode;
9762 rtx dest, scratch, op0, op1, mask, nmask, x;
9765 scratch = operands[1];
9768 nmask = operands[4];
9771 mode = GET_MODE (dest);
9772 vmode = GET_MODE (mask);
9774 if (rtx_equal_p (op0, op1))
9776 /* Shouldn't happen often (it's useless, obviously), but when it does
9777 we'd generate incorrect code if we continue below. */
9778 emit_move_insn (dest, op0);
9782 if (REG_P (mask) && REGNO (dest) == REGNO (mask)) /* alternative 0 */
9784 gcc_assert (REGNO (op1) == REGNO (scratch));
9786 x = gen_rtx_AND (vmode, scratch, mask);
9787 emit_insn (gen_rtx_SET (VOIDmode, scratch, x));
9790 op0 = simplify_gen_subreg (vmode, op0, mode, 0);
9791 x = gen_rtx_NOT (vmode, dest);
9792 x = gen_rtx_AND (vmode, x, op0);
9793 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
9797 if (REGNO (op1) == REGNO (scratch)) /* alternative 1,3 */
9799 x = gen_rtx_AND (vmode, scratch, mask);
9801 else /* alternative 2,4 */
9803 gcc_assert (REGNO (mask) == REGNO (scratch));
9804 op1 = simplify_gen_subreg (vmode, op1, mode, 0);
9805 x = gen_rtx_AND (vmode, scratch, op1);
9807 emit_insn (gen_rtx_SET (VOIDmode, scratch, x));
9809 if (REGNO (op0) == REGNO (dest)) /* alternative 1,2 */
9811 dest = simplify_gen_subreg (vmode, op0, mode, 0);
9812 x = gen_rtx_AND (vmode, dest, nmask);
9814 else /* alternative 3,4 */
9816 gcc_assert (REGNO (nmask) == REGNO (dest));
9818 op0 = simplify_gen_subreg (vmode, op0, mode, 0);
9819 x = gen_rtx_AND (vmode, dest, op0);
9821 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
9824 x = gen_rtx_IOR (vmode, dest, scratch);
9825 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
9828 /* Return TRUE or FALSE depending on whether the first SET in INSN
9829 has source and destination with matching CC modes, and that the
9830 CC mode is at least as constrained as REQ_MODE. */
9833 ix86_match_ccmode (rtx insn, enum machine_mode req_mode)
9836 enum machine_mode set_mode;
9838 set = PATTERN (insn);
9839 if (GET_CODE (set) == PARALLEL)
9840 set = XVECEXP (set, 0, 0);
9841 gcc_assert (GET_CODE (set) == SET);
9842 gcc_assert (GET_CODE (SET_SRC (set)) == COMPARE);
9844 set_mode = GET_MODE (SET_DEST (set));
9848 if (req_mode != CCNOmode
9849 && (req_mode != CCmode
9850 || XEXP (SET_SRC (set), 1) != const0_rtx))
9854 if (req_mode == CCGCmode)
9858 if (req_mode == CCGOCmode || req_mode == CCNOmode)
9862 if (req_mode == CCZmode)
9872 return (GET_MODE (SET_SRC (set)) == set_mode);
9875 /* Generate insn patterns to do an integer compare of OPERANDS. */
9878 ix86_expand_int_compare (enum rtx_code code, rtx op0, rtx op1)
9880 enum machine_mode cmpmode;
9883 cmpmode = SELECT_CC_MODE (code, op0, op1);
9884 flags = gen_rtx_REG (cmpmode, FLAGS_REG);
9886 /* This is very simple, but making the interface the same as in the
9887 FP case makes the rest of the code easier. */
9888 tmp = gen_rtx_COMPARE (cmpmode, op0, op1);
9889 emit_insn (gen_rtx_SET (VOIDmode, flags, tmp));
9891 /* Return the test that should be put into the flags user, i.e.
9892 the bcc, scc, or cmov instruction. */
9893 return gen_rtx_fmt_ee (code, VOIDmode, flags, const0_rtx);
9896 /* Figure out whether to use ordered or unordered fp comparisons.
9897 Return the appropriate mode to use. */
9900 ix86_fp_compare_mode (enum rtx_code code ATTRIBUTE_UNUSED)
9902 /* ??? In order to make all comparisons reversible, we do all comparisons
9903 non-trapping when compiling for IEEE. Once gcc is able to distinguish
9904 all forms trapping and nontrapping comparisons, we can make inequality
9905 comparisons trapping again, since it results in better code when using
9906 FCOM based compares. */
9907 return TARGET_IEEE_FP ? CCFPUmode : CCFPmode;
9911 ix86_cc_mode (enum rtx_code code, rtx op0, rtx op1)
9913 if (SCALAR_FLOAT_MODE_P (GET_MODE (op0)))
9914 return ix86_fp_compare_mode (code);
9917 /* Only zero flag is needed. */
9919 case NE: /* ZF!=0 */
9921 /* Codes needing carry flag. */
9922 case GEU: /* CF=0 */
9923 case GTU: /* CF=0 & ZF=0 */
9924 case LTU: /* CF=1 */
9925 case LEU: /* CF=1 | ZF=1 */
9927 /* Codes possibly doable only with sign flag when
9928 comparing against zero. */
9929 case GE: /* SF=OF or SF=0 */
9930 case LT: /* SF<>OF or SF=1 */
9931 if (op1 == const0_rtx)
9934 /* For other cases Carry flag is not required. */
9936 /* Codes doable only with sign flag when comparing
9937 against zero, but we miss jump instruction for it
9938 so we need to use relational tests against overflow
9939 that thus needs to be zero. */
9940 case GT: /* ZF=0 & SF=OF */
9941 case LE: /* ZF=1 | SF<>OF */
9942 if (op1 == const0_rtx)
9946 /* strcmp pattern do (use flags) and combine may ask us for proper
9955 /* Return the fixed registers used for condition codes. */
9958 ix86_fixed_condition_code_regs (unsigned int *p1, unsigned int *p2)
9965 /* If two condition code modes are compatible, return a condition code
9966 mode which is compatible with both. Otherwise, return
9969 static enum machine_mode
9970 ix86_cc_modes_compatible (enum machine_mode m1, enum machine_mode m2)
9975 if (GET_MODE_CLASS (m1) != MODE_CC || GET_MODE_CLASS (m2) != MODE_CC)
9978 if ((m1 == CCGCmode && m2 == CCGOCmode)
9979 || (m1 == CCGOCmode && m2 == CCGCmode))
10007 /* These are only compatible with themselves, which we already
10013 /* Return true if we should use an FCOMI instruction for this fp comparison. */
10016 ix86_use_fcomi_compare (enum rtx_code code ATTRIBUTE_UNUSED)
10018 enum rtx_code swapped_code = swap_condition (code);
10019 return ((ix86_fp_comparison_cost (code) == ix86_fp_comparison_fcomi_cost (code))
10020 || (ix86_fp_comparison_cost (swapped_code)
10021 == ix86_fp_comparison_fcomi_cost (swapped_code)));
10024 /* Swap, force into registers, or otherwise massage the two operands
10025 to a fp comparison. The operands are updated in place; the new
10026 comparison code is returned. */
10028 static enum rtx_code
10029 ix86_prepare_fp_compare_args (enum rtx_code code, rtx *pop0, rtx *pop1)
10031 enum machine_mode fpcmp_mode = ix86_fp_compare_mode (code);
10032 rtx op0 = *pop0, op1 = *pop1;
10033 enum machine_mode op_mode = GET_MODE (op0);
10034 int is_sse = TARGET_SSE_MATH && SSE_FLOAT_MODE_P (op_mode);
10036 /* All of the unordered compare instructions only work on registers.
10037 The same is true of the fcomi compare instructions. The XFmode
10038 compare instructions require registers except when comparing
10039 against zero or when converting operand 1 from fixed point to
10043 && (fpcmp_mode == CCFPUmode
10044 || (op_mode == XFmode
10045 && ! (standard_80387_constant_p (op0) == 1
10046 || standard_80387_constant_p (op1) == 1)
10047 && GET_CODE (op1) != FLOAT)
10048 || ix86_use_fcomi_compare (code)))
10050 op0 = force_reg (op_mode, op0);
10051 op1 = force_reg (op_mode, op1);
10055 /* %%% We only allow op1 in memory; op0 must be st(0). So swap
10056 things around if they appear profitable, otherwise force op0
10057 into a register. */
10059 if (standard_80387_constant_p (op0) == 0
10060 || (GET_CODE (op0) == MEM
10061 && ! (standard_80387_constant_p (op1) == 0
10062 || GET_CODE (op1) == MEM)))
10065 tmp = op0, op0 = op1, op1 = tmp;
10066 code = swap_condition (code);
10069 if (GET_CODE (op0) != REG)
10070 op0 = force_reg (op_mode, op0);
10072 if (CONSTANT_P (op1))
10074 int tmp = standard_80387_constant_p (op1);
10076 op1 = validize_mem (force_const_mem (op_mode, op1));
10080 op1 = force_reg (op_mode, op1);
10083 op1 = force_reg (op_mode, op1);
10087 /* Try to rearrange the comparison to make it cheaper. */
10088 if (ix86_fp_comparison_cost (code)
10089 > ix86_fp_comparison_cost (swap_condition (code))
10090 && (GET_CODE (op1) == REG || !no_new_pseudos))
10093 tmp = op0, op0 = op1, op1 = tmp;
10094 code = swap_condition (code);
10095 if (GET_CODE (op0) != REG)
10096 op0 = force_reg (op_mode, op0);
10104 /* Convert comparison codes we use to represent FP comparison to integer
10105 code that will result in proper branch. Return UNKNOWN if no such code
10109 ix86_fp_compare_code_to_integer (enum rtx_code code)
10138 /* Split comparison code CODE into comparisons we can do using branch
10139 instructions. BYPASS_CODE is comparison code for branch that will
10140 branch around FIRST_CODE and SECOND_CODE. If some of branches
10141 is not required, set value to UNKNOWN.
10142 We never require more than two branches. */
10145 ix86_fp_comparison_codes (enum rtx_code code, enum rtx_code *bypass_code,
10146 enum rtx_code *first_code,
10147 enum rtx_code *second_code)
10149 *first_code = code;
10150 *bypass_code = UNKNOWN;
10151 *second_code = UNKNOWN;
10153 /* The fcomi comparison sets flags as follows:
10163 case GT: /* GTU - CF=0 & ZF=0 */
10164 case GE: /* GEU - CF=0 */
10165 case ORDERED: /* PF=0 */
10166 case UNORDERED: /* PF=1 */
10167 case UNEQ: /* EQ - ZF=1 */
10168 case UNLT: /* LTU - CF=1 */
10169 case UNLE: /* LEU - CF=1 | ZF=1 */
10170 case LTGT: /* EQ - ZF=0 */
10172 case LT: /* LTU - CF=1 - fails on unordered */
10173 *first_code = UNLT;
10174 *bypass_code = UNORDERED;
10176 case LE: /* LEU - CF=1 | ZF=1 - fails on unordered */
10177 *first_code = UNLE;
10178 *bypass_code = UNORDERED;
10180 case EQ: /* EQ - ZF=1 - fails on unordered */
10181 *first_code = UNEQ;
10182 *bypass_code = UNORDERED;
10184 case NE: /* NE - ZF=0 - fails on unordered */
10185 *first_code = LTGT;
10186 *second_code = UNORDERED;
10188 case UNGE: /* GEU - CF=0 - fails on unordered */
10190 *second_code = UNORDERED;
10192 case UNGT: /* GTU - CF=0 & ZF=0 - fails on unordered */
10194 *second_code = UNORDERED;
10197 gcc_unreachable ();
10199 if (!TARGET_IEEE_FP)
10201 *second_code = UNKNOWN;
10202 *bypass_code = UNKNOWN;
10206 /* Return cost of comparison done fcom + arithmetics operations on AX.
10207 All following functions do use number of instructions as a cost metrics.
10208 In future this should be tweaked to compute bytes for optimize_size and
10209 take into account performance of various instructions on various CPUs. */
10211 ix86_fp_comparison_arithmetics_cost (enum rtx_code code)
10213 if (!TARGET_IEEE_FP)
10215 /* The cost of code output by ix86_expand_fp_compare. */
10239 gcc_unreachable ();
10243 /* Return cost of comparison done using fcomi operation.
10244 See ix86_fp_comparison_arithmetics_cost for the metrics. */
10246 ix86_fp_comparison_fcomi_cost (enum rtx_code code)
10248 enum rtx_code bypass_code, first_code, second_code;
10249 /* Return arbitrarily high cost when instruction is not supported - this
10250 prevents gcc from using it. */
10253 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
10254 return (bypass_code != UNKNOWN || second_code != UNKNOWN) + 2;
10257 /* Return cost of comparison done using sahf operation.
10258 See ix86_fp_comparison_arithmetics_cost for the metrics. */
10260 ix86_fp_comparison_sahf_cost (enum rtx_code code)
10262 enum rtx_code bypass_code, first_code, second_code;
10263 /* Return arbitrarily high cost when instruction is not preferred - this
10264 avoids gcc from using it. */
10265 if (!TARGET_USE_SAHF && !optimize_size)
10267 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
10268 return (bypass_code != UNKNOWN || second_code != UNKNOWN) + 3;
10271 /* Compute cost of the comparison done using any method.
10272 See ix86_fp_comparison_arithmetics_cost for the metrics. */
10274 ix86_fp_comparison_cost (enum rtx_code code)
10276 int fcomi_cost, sahf_cost, arithmetics_cost = 1024;
10279 fcomi_cost = ix86_fp_comparison_fcomi_cost (code);
10280 sahf_cost = ix86_fp_comparison_sahf_cost (code);
10282 min = arithmetics_cost = ix86_fp_comparison_arithmetics_cost (code);
10283 if (min > sahf_cost)
10285 if (min > fcomi_cost)
10290 /* Generate insn patterns to do a floating point compare of OPERANDS. */
10293 ix86_expand_fp_compare (enum rtx_code code, rtx op0, rtx op1, rtx scratch,
10294 rtx *second_test, rtx *bypass_test)
10296 enum machine_mode fpcmp_mode, intcmp_mode;
10298 int cost = ix86_fp_comparison_cost (code);
10299 enum rtx_code bypass_code, first_code, second_code;
10301 fpcmp_mode = ix86_fp_compare_mode (code);
10302 code = ix86_prepare_fp_compare_args (code, &op0, &op1);
10305 *second_test = NULL_RTX;
10307 *bypass_test = NULL_RTX;
10309 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
10311 /* Do fcomi/sahf based test when profitable. */
10312 if ((bypass_code == UNKNOWN || bypass_test)
10313 && (second_code == UNKNOWN || second_test)
10314 && ix86_fp_comparison_arithmetics_cost (code) > cost)
10318 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
10319 tmp = gen_rtx_SET (VOIDmode, gen_rtx_REG (fpcmp_mode, FLAGS_REG),
10325 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
10326 tmp2 = gen_rtx_UNSPEC (HImode, gen_rtvec (1, tmp), UNSPEC_FNSTSW);
10328 scratch = gen_reg_rtx (HImode);
10329 emit_insn (gen_rtx_SET (VOIDmode, scratch, tmp2));
10330 emit_insn (gen_x86_sahf_1 (scratch));
10333 /* The FP codes work out to act like unsigned. */
10334 intcmp_mode = fpcmp_mode;
10336 if (bypass_code != UNKNOWN)
10337 *bypass_test = gen_rtx_fmt_ee (bypass_code, VOIDmode,
10338 gen_rtx_REG (intcmp_mode, FLAGS_REG),
10340 if (second_code != UNKNOWN)
10341 *second_test = gen_rtx_fmt_ee (second_code, VOIDmode,
10342 gen_rtx_REG (intcmp_mode, FLAGS_REG),
10347 /* Sadness wrt reg-stack pops killing fpsr -- gotta get fnstsw first. */
10348 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
10349 tmp2 = gen_rtx_UNSPEC (HImode, gen_rtvec (1, tmp), UNSPEC_FNSTSW);
10351 scratch = gen_reg_rtx (HImode);
10352 emit_insn (gen_rtx_SET (VOIDmode, scratch, tmp2));
10354 /* In the unordered case, we have to check C2 for NaN's, which
10355 doesn't happen to work out to anything nice combination-wise.
10356 So do some bit twiddling on the value we've got in AH to come
10357 up with an appropriate set of condition codes. */
10359 intcmp_mode = CCNOmode;
10364 if (code == GT || !TARGET_IEEE_FP)
10366 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x45)));
10371 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
10372 emit_insn (gen_addqi_ext_1 (scratch, scratch, constm1_rtx));
10373 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x44)));
10374 intcmp_mode = CCmode;
10380 if (code == LT && TARGET_IEEE_FP)
10382 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
10383 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x01)));
10384 intcmp_mode = CCmode;
10389 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x01)));
10395 if (code == GE || !TARGET_IEEE_FP)
10397 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x05)));
10402 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
10403 emit_insn (gen_xorqi_cc_ext_1 (scratch, scratch,
10410 if (code == LE && TARGET_IEEE_FP)
10412 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
10413 emit_insn (gen_addqi_ext_1 (scratch, scratch, constm1_rtx));
10414 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x40)));
10415 intcmp_mode = CCmode;
10420 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x45)));
10426 if (code == EQ && TARGET_IEEE_FP)
10428 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
10429 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x40)));
10430 intcmp_mode = CCmode;
10435 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x40)));
10442 if (code == NE && TARGET_IEEE_FP)
10444 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
10445 emit_insn (gen_xorqi_cc_ext_1 (scratch, scratch,
10451 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x40)));
10457 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x04)));
10461 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x04)));
10466 gcc_unreachable ();
10470 /* Return the test that should be put into the flags user, i.e.
10471 the bcc, scc, or cmov instruction. */
10472 return gen_rtx_fmt_ee (code, VOIDmode,
10473 gen_rtx_REG (intcmp_mode, FLAGS_REG),
10478 ix86_expand_compare (enum rtx_code code, rtx *second_test, rtx *bypass_test)
10481 op0 = ix86_compare_op0;
10482 op1 = ix86_compare_op1;
10485 *second_test = NULL_RTX;
10487 *bypass_test = NULL_RTX;
10489 if (ix86_compare_emitted)
10491 ret = gen_rtx_fmt_ee (code, VOIDmode, ix86_compare_emitted, const0_rtx);
10492 ix86_compare_emitted = NULL_RTX;
10494 else if (SCALAR_FLOAT_MODE_P (GET_MODE (op0)))
10495 ret = ix86_expand_fp_compare (code, op0, op1, NULL_RTX,
10496 second_test, bypass_test);
10498 ret = ix86_expand_int_compare (code, op0, op1);
10503 /* Return true if the CODE will result in nontrivial jump sequence. */
10505 ix86_fp_jump_nontrivial_p (enum rtx_code code)
10507 enum rtx_code bypass_code, first_code, second_code;
10510 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
10511 return bypass_code != UNKNOWN || second_code != UNKNOWN;
10515 ix86_expand_branch (enum rtx_code code, rtx label)
10519 /* If we have emitted a compare insn, go straight to simple.
10520 ix86_expand_compare won't emit anything if ix86_compare_emitted
10522 if (ix86_compare_emitted)
10525 switch (GET_MODE (ix86_compare_op0))
10531 tmp = ix86_expand_compare (code, NULL, NULL);
10532 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
10533 gen_rtx_LABEL_REF (VOIDmode, label),
10535 emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
10544 enum rtx_code bypass_code, first_code, second_code;
10546 code = ix86_prepare_fp_compare_args (code, &ix86_compare_op0,
10547 &ix86_compare_op1);
10549 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
10551 /* Check whether we will use the natural sequence with one jump. If
10552 so, we can expand jump early. Otherwise delay expansion by
10553 creating compound insn to not confuse optimizers. */
10554 if (bypass_code == UNKNOWN && second_code == UNKNOWN
10557 ix86_split_fp_branch (code, ix86_compare_op0, ix86_compare_op1,
10558 gen_rtx_LABEL_REF (VOIDmode, label),
10559 pc_rtx, NULL_RTX, NULL_RTX);
10563 tmp = gen_rtx_fmt_ee (code, VOIDmode,
10564 ix86_compare_op0, ix86_compare_op1);
10565 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
10566 gen_rtx_LABEL_REF (VOIDmode, label),
10568 tmp = gen_rtx_SET (VOIDmode, pc_rtx, tmp);
10570 use_fcomi = ix86_use_fcomi_compare (code);
10571 vec = rtvec_alloc (3 + !use_fcomi);
10572 RTVEC_ELT (vec, 0) = tmp;
10574 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCFPmode, 18));
10576 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCFPmode, 17));
10579 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (HImode));
10581 emit_jump_insn (gen_rtx_PARALLEL (VOIDmode, vec));
10590 /* Expand DImode branch into multiple compare+branch. */
10592 rtx lo[2], hi[2], label2;
10593 enum rtx_code code1, code2, code3;
10594 enum machine_mode submode;
10596 if (CONSTANT_P (ix86_compare_op0) && ! CONSTANT_P (ix86_compare_op1))
10598 tmp = ix86_compare_op0;
10599 ix86_compare_op0 = ix86_compare_op1;
10600 ix86_compare_op1 = tmp;
10601 code = swap_condition (code);
10603 if (GET_MODE (ix86_compare_op0) == DImode)
10605 split_di (&ix86_compare_op0, 1, lo+0, hi+0);
10606 split_di (&ix86_compare_op1, 1, lo+1, hi+1);
10611 split_ti (&ix86_compare_op0, 1, lo+0, hi+0);
10612 split_ti (&ix86_compare_op1, 1, lo+1, hi+1);
10616 /* When comparing for equality, we can use (hi0^hi1)|(lo0^lo1) to
10617 avoid two branches. This costs one extra insn, so disable when
10618 optimizing for size. */
10620 if ((code == EQ || code == NE)
10622 || hi[1] == const0_rtx || lo[1] == const0_rtx))
10627 if (hi[1] != const0_rtx)
10628 xor1 = expand_binop (submode, xor_optab, xor1, hi[1],
10629 NULL_RTX, 0, OPTAB_WIDEN);
10632 if (lo[1] != const0_rtx)
10633 xor0 = expand_binop (submode, xor_optab, xor0, lo[1],
10634 NULL_RTX, 0, OPTAB_WIDEN);
10636 tmp = expand_binop (submode, ior_optab, xor1, xor0,
10637 NULL_RTX, 0, OPTAB_WIDEN);
10639 ix86_compare_op0 = tmp;
10640 ix86_compare_op1 = const0_rtx;
10641 ix86_expand_branch (code, label);
10645 /* Otherwise, if we are doing less-than or greater-or-equal-than,
10646 op1 is a constant and the low word is zero, then we can just
10647 examine the high word. */
10649 if (GET_CODE (hi[1]) == CONST_INT && lo[1] == const0_rtx)
10652 case LT: case LTU: case GE: case GEU:
10653 ix86_compare_op0 = hi[0];
10654 ix86_compare_op1 = hi[1];
10655 ix86_expand_branch (code, label);
10661 /* Otherwise, we need two or three jumps. */
10663 label2 = gen_label_rtx ();
10666 code2 = swap_condition (code);
10667 code3 = unsigned_condition (code);
10671 case LT: case GT: case LTU: case GTU:
10674 case LE: code1 = LT; code2 = GT; break;
10675 case GE: code1 = GT; code2 = LT; break;
10676 case LEU: code1 = LTU; code2 = GTU; break;
10677 case GEU: code1 = GTU; code2 = LTU; break;
10679 case EQ: code1 = UNKNOWN; code2 = NE; break;
10680 case NE: code2 = UNKNOWN; break;
10683 gcc_unreachable ();
10688 * if (hi(a) < hi(b)) goto true;
10689 * if (hi(a) > hi(b)) goto false;
10690 * if (lo(a) < lo(b)) goto true;
10694 ix86_compare_op0 = hi[0];
10695 ix86_compare_op1 = hi[1];
10697 if (code1 != UNKNOWN)
10698 ix86_expand_branch (code1, label);
10699 if (code2 != UNKNOWN)
10700 ix86_expand_branch (code2, label2);
10702 ix86_compare_op0 = lo[0];
10703 ix86_compare_op1 = lo[1];
10704 ix86_expand_branch (code3, label);
10706 if (code2 != UNKNOWN)
10707 emit_label (label2);
10712 gcc_unreachable ();
10716 /* Split branch based on floating point condition. */
10718 ix86_split_fp_branch (enum rtx_code code, rtx op1, rtx op2,
10719 rtx target1, rtx target2, rtx tmp, rtx pushed)
10721 rtx second, bypass;
10722 rtx label = NULL_RTX;
10724 int bypass_probability = -1, second_probability = -1, probability = -1;
10727 if (target2 != pc_rtx)
10730 code = reverse_condition_maybe_unordered (code);
10735 condition = ix86_expand_fp_compare (code, op1, op2,
10736 tmp, &second, &bypass);
10738 /* Remove pushed operand from stack. */
10740 ix86_free_from_memory (GET_MODE (pushed));
10742 if (split_branch_probability >= 0)
10744 /* Distribute the probabilities across the jumps.
10745 Assume the BYPASS and SECOND to be always test
10747 probability = split_branch_probability;
10749 /* Value of 1 is low enough to make no need for probability
10750 to be updated. Later we may run some experiments and see
10751 if unordered values are more frequent in practice. */
10753 bypass_probability = 1;
10755 second_probability = 1;
10757 if (bypass != NULL_RTX)
10759 label = gen_label_rtx ();
10760 i = emit_jump_insn (gen_rtx_SET
10762 gen_rtx_IF_THEN_ELSE (VOIDmode,
10764 gen_rtx_LABEL_REF (VOIDmode,
10767 if (bypass_probability >= 0)
10769 = gen_rtx_EXPR_LIST (REG_BR_PROB,
10770 GEN_INT (bypass_probability),
10773 i = emit_jump_insn (gen_rtx_SET
10775 gen_rtx_IF_THEN_ELSE (VOIDmode,
10776 condition, target1, target2)));
10777 if (probability >= 0)
10779 = gen_rtx_EXPR_LIST (REG_BR_PROB,
10780 GEN_INT (probability),
10782 if (second != NULL_RTX)
10784 i = emit_jump_insn (gen_rtx_SET
10786 gen_rtx_IF_THEN_ELSE (VOIDmode, second, target1,
10788 if (second_probability >= 0)
10790 = gen_rtx_EXPR_LIST (REG_BR_PROB,
10791 GEN_INT (second_probability),
10794 if (label != NULL_RTX)
10795 emit_label (label);
10799 ix86_expand_setcc (enum rtx_code code, rtx dest)
10801 rtx ret, tmp, tmpreg, equiv;
10802 rtx second_test, bypass_test;
10804 if (GET_MODE (ix86_compare_op0) == (TARGET_64BIT ? TImode : DImode))
10805 return 0; /* FAIL */
10807 gcc_assert (GET_MODE (dest) == QImode);
10809 ret = ix86_expand_compare (code, &second_test, &bypass_test);
10810 PUT_MODE (ret, QImode);
10815 emit_insn (gen_rtx_SET (VOIDmode, tmp, ret));
10816 if (bypass_test || second_test)
10818 rtx test = second_test;
10820 rtx tmp2 = gen_reg_rtx (QImode);
10823 gcc_assert (!second_test);
10824 test = bypass_test;
10826 PUT_CODE (test, reverse_condition_maybe_unordered (GET_CODE (test)));
10828 PUT_MODE (test, QImode);
10829 emit_insn (gen_rtx_SET (VOIDmode, tmp2, test));
10832 emit_insn (gen_andqi3 (tmp, tmpreg, tmp2));
10834 emit_insn (gen_iorqi3 (tmp, tmpreg, tmp2));
10837 /* Attach a REG_EQUAL note describing the comparison result. */
10838 if (ix86_compare_op0 && ix86_compare_op1)
10840 equiv = simplify_gen_relational (code, QImode,
10841 GET_MODE (ix86_compare_op0),
10842 ix86_compare_op0, ix86_compare_op1);
10843 set_unique_reg_note (get_last_insn (), REG_EQUAL, equiv);
10846 return 1; /* DONE */
10849 /* Expand comparison setting or clearing carry flag. Return true when
10850 successful and set pop for the operation. */
10852 ix86_expand_carry_flag_compare (enum rtx_code code, rtx op0, rtx op1, rtx *pop)
10854 enum machine_mode mode =
10855 GET_MODE (op0) != VOIDmode ? GET_MODE (op0) : GET_MODE (op1);
10857 /* Do not handle DImode compares that go through special path. Also we can't
10858 deal with FP compares yet. This is possible to add. */
10859 if (mode == (TARGET_64BIT ? TImode : DImode))
10861 if (FLOAT_MODE_P (mode))
10863 rtx second_test = NULL, bypass_test = NULL;
10864 rtx compare_op, compare_seq;
10866 /* Shortcut: following common codes never translate into carry flag compares. */
10867 if (code == EQ || code == NE || code == UNEQ || code == LTGT
10868 || code == ORDERED || code == UNORDERED)
10871 /* These comparisons require zero flag; swap operands so they won't. */
10872 if ((code == GT || code == UNLE || code == LE || code == UNGT)
10873 && !TARGET_IEEE_FP)
10878 code = swap_condition (code);
10881 /* Try to expand the comparison and verify that we end up with carry flag
10882 based comparison. This is fails to be true only when we decide to expand
10883 comparison using arithmetic that is not too common scenario. */
10885 compare_op = ix86_expand_fp_compare (code, op0, op1, NULL_RTX,
10886 &second_test, &bypass_test);
10887 compare_seq = get_insns ();
10890 if (second_test || bypass_test)
10892 if (GET_MODE (XEXP (compare_op, 0)) == CCFPmode
10893 || GET_MODE (XEXP (compare_op, 0)) == CCFPUmode)
10894 code = ix86_fp_compare_code_to_integer (GET_CODE (compare_op));
10896 code = GET_CODE (compare_op);
10897 if (code != LTU && code != GEU)
10899 emit_insn (compare_seq);
10903 if (!INTEGRAL_MODE_P (mode))
10911 /* Convert a==0 into (unsigned)a<1. */
10914 if (op1 != const0_rtx)
10917 code = (code == EQ ? LTU : GEU);
10920 /* Convert a>b into b<a or a>=b-1. */
10923 if (GET_CODE (op1) == CONST_INT)
10925 op1 = gen_int_mode (INTVAL (op1) + 1, GET_MODE (op0));
10926 /* Bail out on overflow. We still can swap operands but that
10927 would force loading of the constant into register. */
10928 if (op1 == const0_rtx
10929 || !x86_64_immediate_operand (op1, GET_MODE (op1)))
10931 code = (code == GTU ? GEU : LTU);
10938 code = (code == GTU ? LTU : GEU);
10942 /* Convert a>=0 into (unsigned)a<0x80000000. */
10945 if (mode == DImode || op1 != const0_rtx)
10947 op1 = gen_int_mode (1 << (GET_MODE_BITSIZE (mode) - 1), mode);
10948 code = (code == LT ? GEU : LTU);
10952 if (mode == DImode || op1 != constm1_rtx)
10954 op1 = gen_int_mode (1 << (GET_MODE_BITSIZE (mode) - 1), mode);
10955 code = (code == LE ? GEU : LTU);
10961 /* Swapping operands may cause constant to appear as first operand. */
10962 if (!nonimmediate_operand (op0, VOIDmode))
10964 if (no_new_pseudos)
10966 op0 = force_reg (mode, op0);
10968 ix86_compare_op0 = op0;
10969 ix86_compare_op1 = op1;
10970 *pop = ix86_expand_compare (code, NULL, NULL);
10971 gcc_assert (GET_CODE (*pop) == LTU || GET_CODE (*pop) == GEU);
10976 ix86_expand_int_movcc (rtx operands[])
10978 enum rtx_code code = GET_CODE (operands[1]), compare_code;
10979 rtx compare_seq, compare_op;
10980 rtx second_test, bypass_test;
10981 enum machine_mode mode = GET_MODE (operands[0]);
10982 bool sign_bit_compare_p = false;;
10985 compare_op = ix86_expand_compare (code, &second_test, &bypass_test);
10986 compare_seq = get_insns ();
10989 compare_code = GET_CODE (compare_op);
10991 if ((ix86_compare_op1 == const0_rtx && (code == GE || code == LT))
10992 || (ix86_compare_op1 == constm1_rtx && (code == GT || code == LE)))
10993 sign_bit_compare_p = true;
10995 /* Don't attempt mode expansion here -- if we had to expand 5 or 6
10996 HImode insns, we'd be swallowed in word prefix ops. */
10998 if ((mode != HImode || TARGET_FAST_PREFIX)
10999 && (mode != (TARGET_64BIT ? TImode : DImode))
11000 && GET_CODE (operands[2]) == CONST_INT
11001 && GET_CODE (operands[3]) == CONST_INT)
11003 rtx out = operands[0];
11004 HOST_WIDE_INT ct = INTVAL (operands[2]);
11005 HOST_WIDE_INT cf = INTVAL (operands[3]);
11006 HOST_WIDE_INT diff;
11009 /* Sign bit compares are better done using shifts than we do by using
11011 if (sign_bit_compare_p
11012 || ix86_expand_carry_flag_compare (code, ix86_compare_op0,
11013 ix86_compare_op1, &compare_op))
11015 /* Detect overlap between destination and compare sources. */
11018 if (!sign_bit_compare_p)
11020 bool fpcmp = false;
11022 compare_code = GET_CODE (compare_op);
11024 if (GET_MODE (XEXP (compare_op, 0)) == CCFPmode
11025 || GET_MODE (XEXP (compare_op, 0)) == CCFPUmode)
11028 compare_code = ix86_fp_compare_code_to_integer (compare_code);
11031 /* To simplify rest of code, restrict to the GEU case. */
11032 if (compare_code == LTU)
11034 HOST_WIDE_INT tmp = ct;
11037 compare_code = reverse_condition (compare_code);
11038 code = reverse_condition (code);
11043 PUT_CODE (compare_op,
11044 reverse_condition_maybe_unordered
11045 (GET_CODE (compare_op)));
11047 PUT_CODE (compare_op, reverse_condition (GET_CODE (compare_op)));
11051 if (reg_overlap_mentioned_p (out, ix86_compare_op0)
11052 || reg_overlap_mentioned_p (out, ix86_compare_op1))
11053 tmp = gen_reg_rtx (mode);
11055 if (mode == DImode)
11056 emit_insn (gen_x86_movdicc_0_m1_rex64 (tmp, compare_op));
11058 emit_insn (gen_x86_movsicc_0_m1 (gen_lowpart (SImode, tmp), compare_op));
11062 if (code == GT || code == GE)
11063 code = reverse_condition (code);
11066 HOST_WIDE_INT tmp = ct;
11071 tmp = emit_store_flag (tmp, code, ix86_compare_op0,
11072 ix86_compare_op1, VOIDmode, 0, -1);
11085 tmp = expand_simple_binop (mode, PLUS,
11087 copy_rtx (tmp), 1, OPTAB_DIRECT);
11098 tmp = expand_simple_binop (mode, IOR,
11100 copy_rtx (tmp), 1, OPTAB_DIRECT);
11102 else if (diff == -1 && ct)
11112 tmp = expand_simple_unop (mode, NOT, tmp, copy_rtx (tmp), 1);
11114 tmp = expand_simple_binop (mode, PLUS,
11115 copy_rtx (tmp), GEN_INT (cf),
11116 copy_rtx (tmp), 1, OPTAB_DIRECT);
11124 * andl cf - ct, dest
11134 tmp = expand_simple_unop (mode, NOT, tmp, copy_rtx (tmp), 1);
11137 tmp = expand_simple_binop (mode, AND,
11139 gen_int_mode (cf - ct, mode),
11140 copy_rtx (tmp), 1, OPTAB_DIRECT);
11142 tmp = expand_simple_binop (mode, PLUS,
11143 copy_rtx (tmp), GEN_INT (ct),
11144 copy_rtx (tmp), 1, OPTAB_DIRECT);
11147 if (!rtx_equal_p (tmp, out))
11148 emit_move_insn (copy_rtx (out), copy_rtx (tmp));
11150 return 1; /* DONE */
11156 tmp = ct, ct = cf, cf = tmp;
11158 if (FLOAT_MODE_P (GET_MODE (ix86_compare_op0)))
11160 /* We may be reversing unordered compare to normal compare, that
11161 is not valid in general (we may convert non-trapping condition
11162 to trapping one), however on i386 we currently emit all
11163 comparisons unordered. */
11164 compare_code = reverse_condition_maybe_unordered (compare_code);
11165 code = reverse_condition_maybe_unordered (code);
11169 compare_code = reverse_condition (compare_code);
11170 code = reverse_condition (code);
11174 compare_code = UNKNOWN;
11175 if (GET_MODE_CLASS (GET_MODE (ix86_compare_op0)) == MODE_INT
11176 && GET_CODE (ix86_compare_op1) == CONST_INT)
11178 if (ix86_compare_op1 == const0_rtx
11179 && (code == LT || code == GE))
11180 compare_code = code;
11181 else if (ix86_compare_op1 == constm1_rtx)
11185 else if (code == GT)
11190 /* Optimize dest = (op0 < 0) ? -1 : cf. */
11191 if (compare_code != UNKNOWN
11192 && GET_MODE (ix86_compare_op0) == GET_MODE (out)
11193 && (cf == -1 || ct == -1))
11195 /* If lea code below could be used, only optimize
11196 if it results in a 2 insn sequence. */
11198 if (! (diff == 1 || diff == 2 || diff == 4 || diff == 8
11199 || diff == 3 || diff == 5 || diff == 9)
11200 || (compare_code == LT && ct == -1)
11201 || (compare_code == GE && cf == -1))
11204 * notl op1 (if necessary)
11212 code = reverse_condition (code);
11215 out = emit_store_flag (out, code, ix86_compare_op0,
11216 ix86_compare_op1, VOIDmode, 0, -1);
11218 out = expand_simple_binop (mode, IOR,
11220 out, 1, OPTAB_DIRECT);
11221 if (out != operands[0])
11222 emit_move_insn (operands[0], out);
11224 return 1; /* DONE */
11229 if ((diff == 1 || diff == 2 || diff == 4 || diff == 8
11230 || diff == 3 || diff == 5 || diff == 9)
11231 && ((mode != QImode && mode != HImode) || !TARGET_PARTIAL_REG_STALL)
11233 || x86_64_immediate_operand (GEN_INT (cf), VOIDmode)))
11239 * lea cf(dest*(ct-cf)),dest
11243 * This also catches the degenerate setcc-only case.
11249 out = emit_store_flag (out, code, ix86_compare_op0,
11250 ix86_compare_op1, VOIDmode, 0, 1);
11253 /* On x86_64 the lea instruction operates on Pmode, so we need
11254 to get arithmetics done in proper mode to match. */
11256 tmp = copy_rtx (out);
11260 out1 = copy_rtx (out);
11261 tmp = gen_rtx_MULT (mode, out1, GEN_INT (diff & ~1));
11265 tmp = gen_rtx_PLUS (mode, tmp, out1);
11271 tmp = gen_rtx_PLUS (mode, tmp, GEN_INT (cf));
11274 if (!rtx_equal_p (tmp, out))
11277 out = force_operand (tmp, copy_rtx (out));
11279 emit_insn (gen_rtx_SET (VOIDmode, copy_rtx (out), copy_rtx (tmp)));
11281 if (!rtx_equal_p (out, operands[0]))
11282 emit_move_insn (operands[0], copy_rtx (out));
11284 return 1; /* DONE */
11288 * General case: Jumpful:
11289 * xorl dest,dest cmpl op1, op2
11290 * cmpl op1, op2 movl ct, dest
11291 * setcc dest jcc 1f
11292 * decl dest movl cf, dest
11293 * andl (cf-ct),dest 1:
11296 * Size 20. Size 14.
11298 * This is reasonably steep, but branch mispredict costs are
11299 * high on modern cpus, so consider failing only if optimizing
11303 if ((!TARGET_CMOVE || (mode == QImode && TARGET_PARTIAL_REG_STALL))
11304 && BRANCH_COST >= 2)
11310 if (FLOAT_MODE_P (GET_MODE (ix86_compare_op0)))
11311 /* We may be reversing unordered compare to normal compare,
11312 that is not valid in general (we may convert non-trapping
11313 condition to trapping one), however on i386 we currently
11314 emit all comparisons unordered. */
11315 code = reverse_condition_maybe_unordered (code);
11318 code = reverse_condition (code);
11319 if (compare_code != UNKNOWN)
11320 compare_code = reverse_condition (compare_code);
11324 if (compare_code != UNKNOWN)
11326 /* notl op1 (if needed)
11331 For x < 0 (resp. x <= -1) there will be no notl,
11332 so if possible swap the constants to get rid of the
11334 True/false will be -1/0 while code below (store flag
11335 followed by decrement) is 0/-1, so the constants need
11336 to be exchanged once more. */
11338 if (compare_code == GE || !cf)
11340 code = reverse_condition (code);
11345 HOST_WIDE_INT tmp = cf;
11350 out = emit_store_flag (out, code, ix86_compare_op0,
11351 ix86_compare_op1, VOIDmode, 0, -1);
11355 out = emit_store_flag (out, code, ix86_compare_op0,
11356 ix86_compare_op1, VOIDmode, 0, 1);
11358 out = expand_simple_binop (mode, PLUS, copy_rtx (out), constm1_rtx,
11359 copy_rtx (out), 1, OPTAB_DIRECT);
11362 out = expand_simple_binop (mode, AND, copy_rtx (out),
11363 gen_int_mode (cf - ct, mode),
11364 copy_rtx (out), 1, OPTAB_DIRECT);
11366 out = expand_simple_binop (mode, PLUS, copy_rtx (out), GEN_INT (ct),
11367 copy_rtx (out), 1, OPTAB_DIRECT);
11368 if (!rtx_equal_p (out, operands[0]))
11369 emit_move_insn (operands[0], copy_rtx (out));
11371 return 1; /* DONE */
11375 if (!TARGET_CMOVE || (mode == QImode && TARGET_PARTIAL_REG_STALL))
11377 /* Try a few things more with specific constants and a variable. */
11380 rtx var, orig_out, out, tmp;
11382 if (BRANCH_COST <= 2)
11383 return 0; /* FAIL */
11385 /* If one of the two operands is an interesting constant, load a
11386 constant with the above and mask it in with a logical operation. */
11388 if (GET_CODE (operands[2]) == CONST_INT)
11391 if (INTVAL (operands[2]) == 0 && operands[3] != constm1_rtx)
11392 operands[3] = constm1_rtx, op = and_optab;
11393 else if (INTVAL (operands[2]) == -1 && operands[3] != const0_rtx)
11394 operands[3] = const0_rtx, op = ior_optab;
11396 return 0; /* FAIL */
11398 else if (GET_CODE (operands[3]) == CONST_INT)
11401 if (INTVAL (operands[3]) == 0 && operands[2] != constm1_rtx)
11402 operands[2] = constm1_rtx, op = and_optab;
11403 else if (INTVAL (operands[3]) == -1 && operands[3] != const0_rtx)
11404 operands[2] = const0_rtx, op = ior_optab;
11406 return 0; /* FAIL */
11409 return 0; /* FAIL */
11411 orig_out = operands[0];
11412 tmp = gen_reg_rtx (mode);
11415 /* Recurse to get the constant loaded. */
11416 if (ix86_expand_int_movcc (operands) == 0)
11417 return 0; /* FAIL */
11419 /* Mask in the interesting variable. */
11420 out = expand_binop (mode, op, var, tmp, orig_out, 0,
11422 if (!rtx_equal_p (out, orig_out))
11423 emit_move_insn (copy_rtx (orig_out), copy_rtx (out));
11425 return 1; /* DONE */
11429 * For comparison with above,
11439 if (! nonimmediate_operand (operands[2], mode))
11440 operands[2] = force_reg (mode, operands[2]);
11441 if (! nonimmediate_operand (operands[3], mode))
11442 operands[3] = force_reg (mode, operands[3]);
11444 if (bypass_test && reg_overlap_mentioned_p (operands[0], operands[3]))
11446 rtx tmp = gen_reg_rtx (mode);
11447 emit_move_insn (tmp, operands[3]);
11450 if (second_test && reg_overlap_mentioned_p (operands[0], operands[2]))
11452 rtx tmp = gen_reg_rtx (mode);
11453 emit_move_insn (tmp, operands[2]);
11457 if (! register_operand (operands[2], VOIDmode)
11459 || ! register_operand (operands[3], VOIDmode)))
11460 operands[2] = force_reg (mode, operands[2]);
11463 && ! register_operand (operands[3], VOIDmode))
11464 operands[3] = force_reg (mode, operands[3]);
11466 emit_insn (compare_seq);
11467 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
11468 gen_rtx_IF_THEN_ELSE (mode,
11469 compare_op, operands[2],
11472 emit_insn (gen_rtx_SET (VOIDmode, copy_rtx (operands[0]),
11473 gen_rtx_IF_THEN_ELSE (mode,
11475 copy_rtx (operands[3]),
11476 copy_rtx (operands[0]))));
11478 emit_insn (gen_rtx_SET (VOIDmode, copy_rtx (operands[0]),
11479 gen_rtx_IF_THEN_ELSE (mode,
11481 copy_rtx (operands[2]),
11482 copy_rtx (operands[0]))));
11484 return 1; /* DONE */
11487 /* Swap, force into registers, or otherwise massage the two operands
11488 to an sse comparison with a mask result. Thus we differ a bit from
11489 ix86_prepare_fp_compare_args which expects to produce a flags result.
11491 The DEST operand exists to help determine whether to commute commutative
11492 operators. The POP0/POP1 operands are updated in place. The new
11493 comparison code is returned, or UNKNOWN if not implementable. */
11495 static enum rtx_code
11496 ix86_prepare_sse_fp_compare_args (rtx dest, enum rtx_code code,
11497 rtx *pop0, rtx *pop1)
11505 /* We have no LTGT as an operator. We could implement it with
11506 NE & ORDERED, but this requires an extra temporary. It's
11507 not clear that it's worth it. */
11514 /* These are supported directly. */
11521 /* For commutative operators, try to canonicalize the destination
11522 operand to be first in the comparison - this helps reload to
11523 avoid extra moves. */
11524 if (!dest || !rtx_equal_p (dest, *pop1))
11532 /* These are not supported directly. Swap the comparison operands
11533 to transform into something that is supported. */
11537 code = swap_condition (code);
11541 gcc_unreachable ();
11547 /* Detect conditional moves that exactly match min/max operational
11548 semantics. Note that this is IEEE safe, as long as we don't
11549 interchange the operands.
11551 Returns FALSE if this conditional move doesn't match a MIN/MAX,
11552 and TRUE if the operation is successful and instructions are emitted. */
11555 ix86_expand_sse_fp_minmax (rtx dest, enum rtx_code code, rtx cmp_op0,
11556 rtx cmp_op1, rtx if_true, rtx if_false)
11558 enum machine_mode mode;
11564 else if (code == UNGE)
11567 if_true = if_false;
11573 if (rtx_equal_p (cmp_op0, if_true) && rtx_equal_p (cmp_op1, if_false))
11575 else if (rtx_equal_p (cmp_op1, if_true) && rtx_equal_p (cmp_op0, if_false))
11580 mode = GET_MODE (dest);
11582 /* We want to check HONOR_NANS and HONOR_SIGNED_ZEROS here,
11583 but MODE may be a vector mode and thus not appropriate. */
11584 if (!flag_finite_math_only || !flag_unsafe_math_optimizations)
11586 int u = is_min ? UNSPEC_IEEE_MIN : UNSPEC_IEEE_MAX;
11589 if_true = force_reg (mode, if_true);
11590 v = gen_rtvec (2, if_true, if_false);
11591 tmp = gen_rtx_UNSPEC (mode, v, u);
11595 code = is_min ? SMIN : SMAX;
11596 tmp = gen_rtx_fmt_ee (code, mode, if_true, if_false);
11599 emit_insn (gen_rtx_SET (VOIDmode, dest, tmp));
11603 /* Expand an sse vector comparison. Return the register with the result. */
11606 ix86_expand_sse_cmp (rtx dest, enum rtx_code code, rtx cmp_op0, rtx cmp_op1,
11607 rtx op_true, rtx op_false)
11609 enum machine_mode mode = GET_MODE (dest);
11612 cmp_op0 = force_reg (mode, cmp_op0);
11613 if (!nonimmediate_operand (cmp_op1, mode))
11614 cmp_op1 = force_reg (mode, cmp_op1);
11617 || reg_overlap_mentioned_p (dest, op_true)
11618 || reg_overlap_mentioned_p (dest, op_false))
11619 dest = gen_reg_rtx (mode);
11621 x = gen_rtx_fmt_ee (code, mode, cmp_op0, cmp_op1);
11622 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
11627 /* Expand DEST = CMP ? OP_TRUE : OP_FALSE into a sequence of logical
11628 operations. This is used for both scalar and vector conditional moves. */
11631 ix86_expand_sse_movcc (rtx dest, rtx cmp, rtx op_true, rtx op_false)
11633 enum machine_mode mode = GET_MODE (dest);
11636 if (op_false == CONST0_RTX (mode))
11638 op_true = force_reg (mode, op_true);
11639 x = gen_rtx_AND (mode, cmp, op_true);
11640 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
11642 else if (op_true == CONST0_RTX (mode))
11644 op_false = force_reg (mode, op_false);
11645 x = gen_rtx_NOT (mode, cmp);
11646 x = gen_rtx_AND (mode, x, op_false);
11647 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
11651 op_true = force_reg (mode, op_true);
11652 op_false = force_reg (mode, op_false);
11654 t2 = gen_reg_rtx (mode);
11656 t3 = gen_reg_rtx (mode);
11660 x = gen_rtx_AND (mode, op_true, cmp);
11661 emit_insn (gen_rtx_SET (VOIDmode, t2, x));
11663 x = gen_rtx_NOT (mode, cmp);
11664 x = gen_rtx_AND (mode, x, op_false);
11665 emit_insn (gen_rtx_SET (VOIDmode, t3, x));
11667 x = gen_rtx_IOR (mode, t3, t2);
11668 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
11672 /* Expand a floating-point conditional move. Return true if successful. */
11675 ix86_expand_fp_movcc (rtx operands[])
11677 enum machine_mode mode = GET_MODE (operands[0]);
11678 enum rtx_code code = GET_CODE (operands[1]);
11679 rtx tmp, compare_op, second_test, bypass_test;
11681 if (TARGET_SSE_MATH && SSE_FLOAT_MODE_P (mode))
11683 enum machine_mode cmode;
11685 /* Since we've no cmove for sse registers, don't force bad register
11686 allocation just to gain access to it. Deny movcc when the
11687 comparison mode doesn't match the move mode. */
11688 cmode = GET_MODE (ix86_compare_op0);
11689 if (cmode == VOIDmode)
11690 cmode = GET_MODE (ix86_compare_op1);
11694 code = ix86_prepare_sse_fp_compare_args (operands[0], code,
11696 &ix86_compare_op1);
11697 if (code == UNKNOWN)
11700 if (ix86_expand_sse_fp_minmax (operands[0], code, ix86_compare_op0,
11701 ix86_compare_op1, operands[2],
11705 tmp = ix86_expand_sse_cmp (operands[0], code, ix86_compare_op0,
11706 ix86_compare_op1, operands[2], operands[3]);
11707 ix86_expand_sse_movcc (operands[0], tmp, operands[2], operands[3]);
11711 /* The floating point conditional move instructions don't directly
11712 support conditions resulting from a signed integer comparison. */
11714 compare_op = ix86_expand_compare (code, &second_test, &bypass_test);
11716 /* The floating point conditional move instructions don't directly
11717 support signed integer comparisons. */
11719 if (!fcmov_comparison_operator (compare_op, VOIDmode))
11721 gcc_assert (!second_test && !bypass_test);
11722 tmp = gen_reg_rtx (QImode);
11723 ix86_expand_setcc (code, tmp);
11725 ix86_compare_op0 = tmp;
11726 ix86_compare_op1 = const0_rtx;
11727 compare_op = ix86_expand_compare (code, &second_test, &bypass_test);
11729 if (bypass_test && reg_overlap_mentioned_p (operands[0], operands[3]))
11731 tmp = gen_reg_rtx (mode);
11732 emit_move_insn (tmp, operands[3]);
11735 if (second_test && reg_overlap_mentioned_p (operands[0], operands[2]))
11737 tmp = gen_reg_rtx (mode);
11738 emit_move_insn (tmp, operands[2]);
11742 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
11743 gen_rtx_IF_THEN_ELSE (mode, compare_op,
11744 operands[2], operands[3])));
11746 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
11747 gen_rtx_IF_THEN_ELSE (mode, bypass_test,
11748 operands[3], operands[0])));
11750 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
11751 gen_rtx_IF_THEN_ELSE (mode, second_test,
11752 operands[2], operands[0])));
11757 /* Expand a floating-point vector conditional move; a vcond operation
11758 rather than a movcc operation. */
11761 ix86_expand_fp_vcond (rtx operands[])
11763 enum rtx_code code = GET_CODE (operands[3]);
11766 code = ix86_prepare_sse_fp_compare_args (operands[0], code,
11767 &operands[4], &operands[5]);
11768 if (code == UNKNOWN)
11771 if (ix86_expand_sse_fp_minmax (operands[0], code, operands[4],
11772 operands[5], operands[1], operands[2]))
11775 cmp = ix86_expand_sse_cmp (operands[0], code, operands[4], operands[5],
11776 operands[1], operands[2]);
11777 ix86_expand_sse_movcc (operands[0], cmp, operands[1], operands[2]);
11781 /* Expand a signed integral vector conditional move. */
11784 ix86_expand_int_vcond (rtx operands[])
11786 enum machine_mode mode = GET_MODE (operands[0]);
11787 enum rtx_code code = GET_CODE (operands[3]);
11788 bool negate = false;
11791 cop0 = operands[4];
11792 cop1 = operands[5];
11794 /* Canonicalize the comparison to EQ, GT, GTU. */
11805 code = reverse_condition (code);
11811 code = reverse_condition (code);
11817 code = swap_condition (code);
11818 x = cop0, cop0 = cop1, cop1 = x;
11822 gcc_unreachable ();
11825 /* Unsigned parallel compare is not supported by the hardware. Play some
11826 tricks to turn this into a signed comparison against 0. */
11829 cop0 = force_reg (mode, cop0);
11837 /* Perform a parallel modulo subtraction. */
11838 t1 = gen_reg_rtx (mode);
11839 emit_insn (gen_subv4si3 (t1, cop0, cop1));
11841 /* Extract the original sign bit of op0. */
11842 mask = GEN_INT (-0x80000000);
11843 mask = gen_rtx_CONST_VECTOR (mode,
11844 gen_rtvec (4, mask, mask, mask, mask));
11845 mask = force_reg (mode, mask);
11846 t2 = gen_reg_rtx (mode);
11847 emit_insn (gen_andv4si3 (t2, cop0, mask));
11849 /* XOR it back into the result of the subtraction. This results
11850 in the sign bit set iff we saw unsigned underflow. */
11851 x = gen_reg_rtx (mode);
11852 emit_insn (gen_xorv4si3 (x, t1, t2));
11860 /* Perform a parallel unsigned saturating subtraction. */
11861 x = gen_reg_rtx (mode);
11862 emit_insn (gen_rtx_SET (VOIDmode, x,
11863 gen_rtx_US_MINUS (mode, cop0, cop1)));
11870 gcc_unreachable ();
11874 cop1 = CONST0_RTX (mode);
11877 x = ix86_expand_sse_cmp (operands[0], code, cop0, cop1,
11878 operands[1+negate], operands[2-negate]);
11880 ix86_expand_sse_movcc (operands[0], x, operands[1+negate],
11881 operands[2-negate]);
11885 /* Expand conditional increment or decrement using adb/sbb instructions.
11886 The default case using setcc followed by the conditional move can be
11887 done by generic code. */
11889 ix86_expand_int_addcc (rtx operands[])
11891 enum rtx_code code = GET_CODE (operands[1]);
11893 rtx val = const0_rtx;
11894 bool fpcmp = false;
11895 enum machine_mode mode = GET_MODE (operands[0]);
11897 if (operands[3] != const1_rtx
11898 && operands[3] != constm1_rtx)
11900 if (!ix86_expand_carry_flag_compare (code, ix86_compare_op0,
11901 ix86_compare_op1, &compare_op))
11903 code = GET_CODE (compare_op);
11905 if (GET_MODE (XEXP (compare_op, 0)) == CCFPmode
11906 || GET_MODE (XEXP (compare_op, 0)) == CCFPUmode)
11909 code = ix86_fp_compare_code_to_integer (code);
11916 PUT_CODE (compare_op,
11917 reverse_condition_maybe_unordered
11918 (GET_CODE (compare_op)));
11920 PUT_CODE (compare_op, reverse_condition (GET_CODE (compare_op)));
11922 PUT_MODE (compare_op, mode);
11924 /* Construct either adc or sbb insn. */
11925 if ((code == LTU) == (operands[3] == constm1_rtx))
11927 switch (GET_MODE (operands[0]))
11930 emit_insn (gen_subqi3_carry (operands[0], operands[2], val, compare_op));
11933 emit_insn (gen_subhi3_carry (operands[0], operands[2], val, compare_op));
11936 emit_insn (gen_subsi3_carry (operands[0], operands[2], val, compare_op));
11939 emit_insn (gen_subdi3_carry_rex64 (operands[0], operands[2], val, compare_op));
11942 gcc_unreachable ();
11947 switch (GET_MODE (operands[0]))
11950 emit_insn (gen_addqi3_carry (operands[0], operands[2], val, compare_op));
11953 emit_insn (gen_addhi3_carry (operands[0], operands[2], val, compare_op));
11956 emit_insn (gen_addsi3_carry (operands[0], operands[2], val, compare_op));
11959 emit_insn (gen_adddi3_carry_rex64 (operands[0], operands[2], val, compare_op));
11962 gcc_unreachable ();
11965 return 1; /* DONE */
11969 /* Split operands 0 and 1 into SImode parts. Similar to split_di, but
11970 works for floating pointer parameters and nonoffsetable memories.
11971 For pushes, it returns just stack offsets; the values will be saved
11972 in the right order. Maximally three parts are generated. */
11975 ix86_split_to_parts (rtx operand, rtx *parts, enum machine_mode mode)
11980 size = mode==XFmode ? 3 : GET_MODE_SIZE (mode) / 4;
11982 size = (GET_MODE_SIZE (mode) + 4) / 8;
11984 gcc_assert (GET_CODE (operand) != REG || !MMX_REGNO_P (REGNO (operand)));
11985 gcc_assert (size >= 2 && size <= 3);
11987 /* Optimize constant pool reference to immediates. This is used by fp
11988 moves, that force all constants to memory to allow combining. */
11989 if (GET_CODE (operand) == MEM && MEM_READONLY_P (operand))
11991 rtx tmp = maybe_get_pool_constant (operand);
11996 if (GET_CODE (operand) == MEM && !offsettable_memref_p (operand))
11998 /* The only non-offsetable memories we handle are pushes. */
11999 int ok = push_operand (operand, VOIDmode);
12003 operand = copy_rtx (operand);
12004 PUT_MODE (operand, Pmode);
12005 parts[0] = parts[1] = parts[2] = operand;
12009 if (GET_CODE (operand) == CONST_VECTOR)
12011 enum machine_mode imode = int_mode_for_mode (mode);
12012 /* Caution: if we looked through a constant pool memory above,
12013 the operand may actually have a different mode now. That's
12014 ok, since we want to pun this all the way back to an integer. */
12015 operand = simplify_subreg (imode, operand, GET_MODE (operand), 0);
12016 gcc_assert (operand != NULL);
12022 if (mode == DImode)
12023 split_di (&operand, 1, &parts[0], &parts[1]);
12026 if (REG_P (operand))
12028 gcc_assert (reload_completed);
12029 parts[0] = gen_rtx_REG (SImode, REGNO (operand) + 0);
12030 parts[1] = gen_rtx_REG (SImode, REGNO (operand) + 1);
12032 parts[2] = gen_rtx_REG (SImode, REGNO (operand) + 2);
12034 else if (offsettable_memref_p (operand))
12036 operand = adjust_address (operand, SImode, 0);
12037 parts[0] = operand;
12038 parts[1] = adjust_address (operand, SImode, 4);
12040 parts[2] = adjust_address (operand, SImode, 8);
12042 else if (GET_CODE (operand) == CONST_DOUBLE)
12047 REAL_VALUE_FROM_CONST_DOUBLE (r, operand);
12051 REAL_VALUE_TO_TARGET_LONG_DOUBLE (r, l);
12052 parts[2] = gen_int_mode (l[2], SImode);
12055 REAL_VALUE_TO_TARGET_DOUBLE (r, l);
12058 gcc_unreachable ();
12060 parts[1] = gen_int_mode (l[1], SImode);
12061 parts[0] = gen_int_mode (l[0], SImode);
12064 gcc_unreachable ();
12069 if (mode == TImode)
12070 split_ti (&operand, 1, &parts[0], &parts[1]);
12071 if (mode == XFmode || mode == TFmode)
12073 enum machine_mode upper_mode = mode==XFmode ? SImode : DImode;
12074 if (REG_P (operand))
12076 gcc_assert (reload_completed);
12077 parts[0] = gen_rtx_REG (DImode, REGNO (operand) + 0);
12078 parts[1] = gen_rtx_REG (upper_mode, REGNO (operand) + 1);
12080 else if (offsettable_memref_p (operand))
12082 operand = adjust_address (operand, DImode, 0);
12083 parts[0] = operand;
12084 parts[1] = adjust_address (operand, upper_mode, 8);
12086 else if (GET_CODE (operand) == CONST_DOUBLE)
12091 REAL_VALUE_FROM_CONST_DOUBLE (r, operand);
12092 real_to_target (l, &r, mode);
12094 /* Do not use shift by 32 to avoid warning on 32bit systems. */
12095 if (HOST_BITS_PER_WIDE_INT >= 64)
12098 ((l[0] & (((HOST_WIDE_INT) 2 << 31) - 1))
12099 + ((((HOST_WIDE_INT) l[1]) << 31) << 1),
12102 parts[0] = immed_double_const (l[0], l[1], DImode);
12104 if (upper_mode == SImode)
12105 parts[1] = gen_int_mode (l[2], SImode);
12106 else if (HOST_BITS_PER_WIDE_INT >= 64)
12109 ((l[2] & (((HOST_WIDE_INT) 2 << 31) - 1))
12110 + ((((HOST_WIDE_INT) l[3]) << 31) << 1),
12113 parts[1] = immed_double_const (l[2], l[3], DImode);
12116 gcc_unreachable ();
12123 /* Emit insns to perform a move or push of DI, DF, and XF values.
12124 Return false when normal moves are needed; true when all required
12125 insns have been emitted. Operands 2-4 contain the input values
12126 int the correct order; operands 5-7 contain the output values. */
12129 ix86_split_long_move (rtx operands[])
12134 int collisions = 0;
12135 enum machine_mode mode = GET_MODE (operands[0]);
12137 /* The DFmode expanders may ask us to move double.
12138 For 64bit target this is single move. By hiding the fact
12139 here we simplify i386.md splitters. */
12140 if (GET_MODE_SIZE (GET_MODE (operands[0])) == 8 && TARGET_64BIT)
12142 /* Optimize constant pool reference to immediates. This is used by
12143 fp moves, that force all constants to memory to allow combining. */
12145 if (GET_CODE (operands[1]) == MEM
12146 && GET_CODE (XEXP (operands[1], 0)) == SYMBOL_REF
12147 && CONSTANT_POOL_ADDRESS_P (XEXP (operands[1], 0)))
12148 operands[1] = get_pool_constant (XEXP (operands[1], 0));
12149 if (push_operand (operands[0], VOIDmode))
12151 operands[0] = copy_rtx (operands[0]);
12152 PUT_MODE (operands[0], Pmode);
12155 operands[0] = gen_lowpart (DImode, operands[0]);
12156 operands[1] = gen_lowpart (DImode, operands[1]);
12157 emit_move_insn (operands[0], operands[1]);
12161 /* The only non-offsettable memory we handle is push. */
12162 if (push_operand (operands[0], VOIDmode))
12165 gcc_assert (GET_CODE (operands[0]) != MEM
12166 || offsettable_memref_p (operands[0]));
12168 nparts = ix86_split_to_parts (operands[1], part[1], GET_MODE (operands[0]));
12169 ix86_split_to_parts (operands[0], part[0], GET_MODE (operands[0]));
12171 /* When emitting push, take care for source operands on the stack. */
12172 if (push && GET_CODE (operands[1]) == MEM
12173 && reg_overlap_mentioned_p (stack_pointer_rtx, operands[1]))
12176 part[1][1] = change_address (part[1][1], GET_MODE (part[1][1]),
12177 XEXP (part[1][2], 0));
12178 part[1][0] = change_address (part[1][0], GET_MODE (part[1][0]),
12179 XEXP (part[1][1], 0));
12182 /* We need to do copy in the right order in case an address register
12183 of the source overlaps the destination. */
12184 if (REG_P (part[0][0]) && GET_CODE (part[1][0]) == MEM)
12186 if (reg_overlap_mentioned_p (part[0][0], XEXP (part[1][0], 0)))
12188 if (reg_overlap_mentioned_p (part[0][1], XEXP (part[1][0], 0)))
12191 && reg_overlap_mentioned_p (part[0][2], XEXP (part[1][0], 0)))
12194 /* Collision in the middle part can be handled by reordering. */
12195 if (collisions == 1 && nparts == 3
12196 && reg_overlap_mentioned_p (part[0][1], XEXP (part[1][0], 0)))
12199 tmp = part[0][1]; part[0][1] = part[0][2]; part[0][2] = tmp;
12200 tmp = part[1][1]; part[1][1] = part[1][2]; part[1][2] = tmp;
12203 /* If there are more collisions, we can't handle it by reordering.
12204 Do an lea to the last part and use only one colliding move. */
12205 else if (collisions > 1)
12211 base = part[0][nparts - 1];
12213 /* Handle the case when the last part isn't valid for lea.
12214 Happens in 64-bit mode storing the 12-byte XFmode. */
12215 if (GET_MODE (base) != Pmode)
12216 base = gen_rtx_REG (Pmode, REGNO (base));
12218 emit_insn (gen_rtx_SET (VOIDmode, base, XEXP (part[1][0], 0)));
12219 part[1][0] = replace_equiv_address (part[1][0], base);
12220 part[1][1] = replace_equiv_address (part[1][1],
12221 plus_constant (base, UNITS_PER_WORD));
12223 part[1][2] = replace_equiv_address (part[1][2],
12224 plus_constant (base, 8));
12234 if (TARGET_128BIT_LONG_DOUBLE && mode == XFmode)
12235 emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx, GEN_INT (-4)));
12236 emit_move_insn (part[0][2], part[1][2]);
12241 /* In 64bit mode we don't have 32bit push available. In case this is
12242 register, it is OK - we will just use larger counterpart. We also
12243 retype memory - these comes from attempt to avoid REX prefix on
12244 moving of second half of TFmode value. */
12245 if (GET_MODE (part[1][1]) == SImode)
12247 switch (GET_CODE (part[1][1]))
12250 part[1][1] = adjust_address (part[1][1], DImode, 0);
12254 part[1][1] = gen_rtx_REG (DImode, REGNO (part[1][1]));
12258 gcc_unreachable ();
12261 if (GET_MODE (part[1][0]) == SImode)
12262 part[1][0] = part[1][1];
12265 emit_move_insn (part[0][1], part[1][1]);
12266 emit_move_insn (part[0][0], part[1][0]);
12270 /* Choose correct order to not overwrite the source before it is copied. */
12271 if ((REG_P (part[0][0])
12272 && REG_P (part[1][1])
12273 && (REGNO (part[0][0]) == REGNO (part[1][1])
12275 && REGNO (part[0][0]) == REGNO (part[1][2]))))
12277 && reg_overlap_mentioned_p (part[0][0], XEXP (part[1][0], 0))))
12281 operands[2] = part[0][2];
12282 operands[3] = part[0][1];
12283 operands[4] = part[0][0];
12284 operands[5] = part[1][2];
12285 operands[6] = part[1][1];
12286 operands[7] = part[1][0];
12290 operands[2] = part[0][1];
12291 operands[3] = part[0][0];
12292 operands[5] = part[1][1];
12293 operands[6] = part[1][0];
12300 operands[2] = part[0][0];
12301 operands[3] = part[0][1];
12302 operands[4] = part[0][2];
12303 operands[5] = part[1][0];
12304 operands[6] = part[1][1];
12305 operands[7] = part[1][2];
12309 operands[2] = part[0][0];
12310 operands[3] = part[0][1];
12311 operands[5] = part[1][0];
12312 operands[6] = part[1][1];
12316 /* If optimizing for size, attempt to locally unCSE nonzero constants. */
12319 if (GET_CODE (operands[5]) == CONST_INT
12320 && operands[5] != const0_rtx
12321 && REG_P (operands[2]))
12323 if (GET_CODE (operands[6]) == CONST_INT
12324 && INTVAL (operands[6]) == INTVAL (operands[5]))
12325 operands[6] = operands[2];
12328 && GET_CODE (operands[7]) == CONST_INT
12329 && INTVAL (operands[7]) == INTVAL (operands[5]))
12330 operands[7] = operands[2];
12334 && GET_CODE (operands[6]) == CONST_INT
12335 && operands[6] != const0_rtx
12336 && REG_P (operands[3])
12337 && GET_CODE (operands[7]) == CONST_INT
12338 && INTVAL (operands[7]) == INTVAL (operands[6]))
12339 operands[7] = operands[3];
12342 emit_move_insn (operands[2], operands[5]);
12343 emit_move_insn (operands[3], operands[6]);
12345 emit_move_insn (operands[4], operands[7]);
12350 /* Helper function of ix86_split_ashl used to generate an SImode/DImode
12351 left shift by a constant, either using a single shift or
12352 a sequence of add instructions. */
12355 ix86_expand_ashl_const (rtx operand, int count, enum machine_mode mode)
12359 emit_insn ((mode == DImode
12361 : gen_adddi3) (operand, operand, operand));
12363 else if (!optimize_size
12364 && count * ix86_cost->add <= ix86_cost->shift_const)
12367 for (i=0; i<count; i++)
12369 emit_insn ((mode == DImode
12371 : gen_adddi3) (operand, operand, operand));
12375 emit_insn ((mode == DImode
12377 : gen_ashldi3) (operand, operand, GEN_INT (count)));
12381 ix86_split_ashl (rtx *operands, rtx scratch, enum machine_mode mode)
12383 rtx low[2], high[2];
12385 const int single_width = mode == DImode ? 32 : 64;
12387 if (GET_CODE (operands[2]) == CONST_INT)
12389 (mode == DImode ? split_di : split_ti) (operands, 2, low, high);
12390 count = INTVAL (operands[2]) & (single_width * 2 - 1);
12392 if (count >= single_width)
12394 emit_move_insn (high[0], low[1]);
12395 emit_move_insn (low[0], const0_rtx);
12397 if (count > single_width)
12398 ix86_expand_ashl_const (high[0], count - single_width, mode);
12402 if (!rtx_equal_p (operands[0], operands[1]))
12403 emit_move_insn (operands[0], operands[1]);
12404 emit_insn ((mode == DImode
12406 : gen_x86_64_shld) (high[0], low[0], GEN_INT (count)));
12407 ix86_expand_ashl_const (low[0], count, mode);
12412 (mode == DImode ? split_di : split_ti) (operands, 1, low, high);
12414 if (operands[1] == const1_rtx)
12416 /* Assuming we've chosen a QImode capable registers, then 1 << N
12417 can be done with two 32/64-bit shifts, no branches, no cmoves. */
12418 if (ANY_QI_REG_P (low[0]) && ANY_QI_REG_P (high[0]))
12420 rtx s, d, flags = gen_rtx_REG (CCZmode, FLAGS_REG);
12422 ix86_expand_clear (low[0]);
12423 ix86_expand_clear (high[0]);
12424 emit_insn (gen_testqi_ccz_1 (operands[2], GEN_INT (single_width)));
12426 d = gen_lowpart (QImode, low[0]);
12427 d = gen_rtx_STRICT_LOW_PART (VOIDmode, d);
12428 s = gen_rtx_EQ (QImode, flags, const0_rtx);
12429 emit_insn (gen_rtx_SET (VOIDmode, d, s));
12431 d = gen_lowpart (QImode, high[0]);
12432 d = gen_rtx_STRICT_LOW_PART (VOIDmode, d);
12433 s = gen_rtx_NE (QImode, flags, const0_rtx);
12434 emit_insn (gen_rtx_SET (VOIDmode, d, s));
12437 /* Otherwise, we can get the same results by manually performing
12438 a bit extract operation on bit 5/6, and then performing the two
12439 shifts. The two methods of getting 0/1 into low/high are exactly
12440 the same size. Avoiding the shift in the bit extract case helps
12441 pentium4 a bit; no one else seems to care much either way. */
12446 if (TARGET_PARTIAL_REG_STALL && !optimize_size)
12447 x = gen_rtx_ZERO_EXTEND (mode == DImode ? SImode : DImode, operands[2]);
12449 x = gen_lowpart (mode == DImode ? SImode : DImode, operands[2]);
12450 emit_insn (gen_rtx_SET (VOIDmode, high[0], x));
12452 emit_insn ((mode == DImode
12454 : gen_lshrdi3) (high[0], high[0], GEN_INT (mode == DImode ? 5 : 6)));
12455 emit_insn ((mode == DImode
12457 : gen_anddi3) (high[0], high[0], GEN_INT (1)));
12458 emit_move_insn (low[0], high[0]);
12459 emit_insn ((mode == DImode
12461 : gen_xordi3) (low[0], low[0], GEN_INT (1)));
12464 emit_insn ((mode == DImode
12466 : gen_ashldi3) (low[0], low[0], operands[2]));
12467 emit_insn ((mode == DImode
12469 : gen_ashldi3) (high[0], high[0], operands[2]));
12473 if (operands[1] == constm1_rtx)
12475 /* For -1 << N, we can avoid the shld instruction, because we
12476 know that we're shifting 0...31/63 ones into a -1. */
12477 emit_move_insn (low[0], constm1_rtx);
12479 emit_move_insn (high[0], low[0]);
12481 emit_move_insn (high[0], constm1_rtx);
12485 if (!rtx_equal_p (operands[0], operands[1]))
12486 emit_move_insn (operands[0], operands[1]);
12488 (mode == DImode ? split_di : split_ti) (operands, 1, low, high);
12489 emit_insn ((mode == DImode
12491 : gen_x86_64_shld) (high[0], low[0], operands[2]));
12494 emit_insn ((mode == DImode ? gen_ashlsi3 : gen_ashldi3) (low[0], low[0], operands[2]));
12496 if (TARGET_CMOVE && scratch)
12498 ix86_expand_clear (scratch);
12499 emit_insn ((mode == DImode
12500 ? gen_x86_shift_adj_1
12501 : gen_x86_64_shift_adj) (high[0], low[0], operands[2], scratch));
12504 emit_insn (gen_x86_shift_adj_2 (high[0], low[0], operands[2]));
12508 ix86_split_ashr (rtx *operands, rtx scratch, enum machine_mode mode)
12510 rtx low[2], high[2];
12512 const int single_width = mode == DImode ? 32 : 64;
12514 if (GET_CODE (operands[2]) == CONST_INT)
12516 (mode == DImode ? split_di : split_ti) (operands, 2, low, high);
12517 count = INTVAL (operands[2]) & (single_width * 2 - 1);
12519 if (count == single_width * 2 - 1)
12521 emit_move_insn (high[0], high[1]);
12522 emit_insn ((mode == DImode
12524 : gen_ashrdi3) (high[0], high[0],
12525 GEN_INT (single_width - 1)));
12526 emit_move_insn (low[0], high[0]);
12529 else if (count >= single_width)
12531 emit_move_insn (low[0], high[1]);
12532 emit_move_insn (high[0], low[0]);
12533 emit_insn ((mode == DImode
12535 : gen_ashrdi3) (high[0], high[0],
12536 GEN_INT (single_width - 1)));
12537 if (count > single_width)
12538 emit_insn ((mode == DImode
12540 : gen_ashrdi3) (low[0], low[0],
12541 GEN_INT (count - single_width)));
12545 if (!rtx_equal_p (operands[0], operands[1]))
12546 emit_move_insn (operands[0], operands[1]);
12547 emit_insn ((mode == DImode
12549 : gen_x86_64_shrd) (low[0], high[0], GEN_INT (count)));
12550 emit_insn ((mode == DImode
12552 : gen_ashrdi3) (high[0], high[0], GEN_INT (count)));
12557 if (!rtx_equal_p (operands[0], operands[1]))
12558 emit_move_insn (operands[0], operands[1]);
12560 (mode == DImode ? split_di : split_ti) (operands, 1, low, high);
12562 emit_insn ((mode == DImode
12564 : gen_x86_64_shrd) (low[0], high[0], operands[2]));
12565 emit_insn ((mode == DImode
12567 : gen_ashrdi3) (high[0], high[0], operands[2]));
12569 if (TARGET_CMOVE && scratch)
12571 emit_move_insn (scratch, high[0]);
12572 emit_insn ((mode == DImode
12574 : gen_ashrdi3) (scratch, scratch,
12575 GEN_INT (single_width - 1)));
12576 emit_insn ((mode == DImode
12577 ? gen_x86_shift_adj_1
12578 : gen_x86_64_shift_adj) (low[0], high[0], operands[2],
12582 emit_insn (gen_x86_shift_adj_3 (low[0], high[0], operands[2]));
12587 ix86_split_lshr (rtx *operands, rtx scratch, enum machine_mode mode)
12589 rtx low[2], high[2];
12591 const int single_width = mode == DImode ? 32 : 64;
12593 if (GET_CODE (operands[2]) == CONST_INT)
12595 (mode == DImode ? split_di : split_ti) (operands, 2, low, high);
12596 count = INTVAL (operands[2]) & (single_width * 2 - 1);
12598 if (count >= single_width)
12600 emit_move_insn (low[0], high[1]);
12601 ix86_expand_clear (high[0]);
12603 if (count > single_width)
12604 emit_insn ((mode == DImode
12606 : gen_lshrdi3) (low[0], low[0],
12607 GEN_INT (count - single_width)));
12611 if (!rtx_equal_p (operands[0], operands[1]))
12612 emit_move_insn (operands[0], operands[1]);
12613 emit_insn ((mode == DImode
12615 : gen_x86_64_shrd) (low[0], high[0], GEN_INT (count)));
12616 emit_insn ((mode == DImode
12618 : gen_lshrdi3) (high[0], high[0], GEN_INT (count)));
12623 if (!rtx_equal_p (operands[0], operands[1]))
12624 emit_move_insn (operands[0], operands[1]);
12626 (mode == DImode ? split_di : split_ti) (operands, 1, low, high);
12628 emit_insn ((mode == DImode
12630 : gen_x86_64_shrd) (low[0], high[0], operands[2]));
12631 emit_insn ((mode == DImode
12633 : gen_lshrdi3) (high[0], high[0], operands[2]));
12635 /* Heh. By reversing the arguments, we can reuse this pattern. */
12636 if (TARGET_CMOVE && scratch)
12638 ix86_expand_clear (scratch);
12639 emit_insn ((mode == DImode
12640 ? gen_x86_shift_adj_1
12641 : gen_x86_64_shift_adj) (low[0], high[0], operands[2],
12645 emit_insn (gen_x86_shift_adj_2 (low[0], high[0], operands[2]));
12649 /* Helper function for the string operations below. Dest VARIABLE whether
12650 it is aligned to VALUE bytes. If true, jump to the label. */
12652 ix86_expand_aligntest (rtx variable, int value)
12654 rtx label = gen_label_rtx ();
12655 rtx tmpcount = gen_reg_rtx (GET_MODE (variable));
12656 if (GET_MODE (variable) == DImode)
12657 emit_insn (gen_anddi3 (tmpcount, variable, GEN_INT (value)));
12659 emit_insn (gen_andsi3 (tmpcount, variable, GEN_INT (value)));
12660 emit_cmp_and_jump_insns (tmpcount, const0_rtx, EQ, 0, GET_MODE (variable),
12665 /* Adjust COUNTER by the VALUE. */
12667 ix86_adjust_counter (rtx countreg, HOST_WIDE_INT value)
12669 if (GET_MODE (countreg) == DImode)
12670 emit_insn (gen_adddi3 (countreg, countreg, GEN_INT (-value)));
12672 emit_insn (gen_addsi3 (countreg, countreg, GEN_INT (-value)));
12675 /* Zero extend possibly SImode EXP to Pmode register. */
12677 ix86_zero_extend_to_Pmode (rtx exp)
12680 if (GET_MODE (exp) == VOIDmode)
12681 return force_reg (Pmode, exp);
12682 if (GET_MODE (exp) == Pmode)
12683 return copy_to_mode_reg (Pmode, exp);
12684 r = gen_reg_rtx (Pmode);
12685 emit_insn (gen_zero_extendsidi2 (r, exp));
12689 /* Expand string move (memcpy) operation. Use i386 string operations when
12690 profitable. expand_clrmem contains similar code. */
12692 ix86_expand_movmem (rtx dst, rtx src, rtx count_exp, rtx align_exp)
12694 rtx srcreg, destreg, countreg, srcexp, destexp;
12695 enum machine_mode counter_mode;
12696 HOST_WIDE_INT align = 0;
12697 unsigned HOST_WIDE_INT count = 0;
12699 if (GET_CODE (align_exp) == CONST_INT)
12700 align = INTVAL (align_exp);
12702 /* Can't use any of this if the user has appropriated esi or edi. */
12703 if (global_regs[4] || global_regs[5])
12706 /* This simple hack avoids all inlining code and simplifies code below. */
12707 if (!TARGET_ALIGN_STRINGOPS)
12710 if (GET_CODE (count_exp) == CONST_INT)
12712 count = INTVAL (count_exp);
12713 if (!TARGET_INLINE_ALL_STRINGOPS && count > 64)
12717 /* Figure out proper mode for counter. For 32bits it is always SImode,
12718 for 64bits use SImode when possible, otherwise DImode.
12719 Set count to number of bytes copied when known at compile time. */
12721 || GET_MODE (count_exp) == SImode
12722 || x86_64_zext_immediate_operand (count_exp, VOIDmode))
12723 counter_mode = SImode;
12725 counter_mode = DImode;
12727 gcc_assert (counter_mode == SImode || counter_mode == DImode);
12729 destreg = copy_to_mode_reg (Pmode, XEXP (dst, 0));
12730 if (destreg != XEXP (dst, 0))
12731 dst = replace_equiv_address_nv (dst, destreg);
12732 srcreg = copy_to_mode_reg (Pmode, XEXP (src, 0));
12733 if (srcreg != XEXP (src, 0))
12734 src = replace_equiv_address_nv (src, srcreg);
12736 /* When optimizing for size emit simple rep ; movsb instruction for
12737 counts not divisible by 4, except when (movsl;)*(movsw;)?(movsb;)?
12738 sequence is shorter than mov{b,l} $count, %{ecx,cl}; rep; movsb.
12739 Sice of (movsl;)*(movsw;)?(movsb;)? sequence is
12740 count / 4 + (count & 3), the other sequence is either 4 or 7 bytes,
12741 but we don't know whether upper 24 (resp. 56) bits of %ecx will be
12742 known to be zero or not. The rep; movsb sequence causes higher
12743 register pressure though, so take that into account. */
12745 if ((!optimize || optimize_size)
12750 || (count & 3) + count / 4 > 6))))
12752 emit_insn (gen_cld ());
12753 countreg = ix86_zero_extend_to_Pmode (count_exp);
12754 destexp = gen_rtx_PLUS (Pmode, destreg, countreg);
12755 srcexp = gen_rtx_PLUS (Pmode, srcreg, countreg);
12756 emit_insn (gen_rep_mov (destreg, dst, srcreg, src, countreg,
12760 /* For constant aligned (or small unaligned) copies use rep movsl
12761 followed by code copying the rest. For PentiumPro ensure 8 byte
12762 alignment to allow rep movsl acceleration. */
12764 else if (count != 0
12766 || (!TARGET_PENTIUMPRO && !TARGET_64BIT && align >= 4)
12767 || optimize_size || count < (unsigned int) 64))
12769 unsigned HOST_WIDE_INT offset = 0;
12770 int size = TARGET_64BIT && !optimize_size ? 8 : 4;
12771 rtx srcmem, dstmem;
12773 emit_insn (gen_cld ());
12774 if (count & ~(size - 1))
12776 if ((TARGET_SINGLE_STRINGOP || optimize_size) && count < 5 * 4)
12778 enum machine_mode movs_mode = size == 4 ? SImode : DImode;
12780 while (offset < (count & ~(size - 1)))
12782 srcmem = adjust_automodify_address_nv (src, movs_mode,
12784 dstmem = adjust_automodify_address_nv (dst, movs_mode,
12786 emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
12792 countreg = GEN_INT ((count >> (size == 4 ? 2 : 3))
12793 & (TARGET_64BIT ? -1 : 0x3fffffff));
12794 countreg = copy_to_mode_reg (counter_mode, countreg);
12795 countreg = ix86_zero_extend_to_Pmode (countreg);
12797 destexp = gen_rtx_ASHIFT (Pmode, countreg,
12798 GEN_INT (size == 4 ? 2 : 3));
12799 srcexp = gen_rtx_PLUS (Pmode, destexp, srcreg);
12800 destexp = gen_rtx_PLUS (Pmode, destexp, destreg);
12802 emit_insn (gen_rep_mov (destreg, dst, srcreg, src,
12803 countreg, destexp, srcexp));
12804 offset = count & ~(size - 1);
12807 if (size == 8 && (count & 0x04))
12809 srcmem = adjust_automodify_address_nv (src, SImode, srcreg,
12811 dstmem = adjust_automodify_address_nv (dst, SImode, destreg,
12813 emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
12818 srcmem = adjust_automodify_address_nv (src, HImode, srcreg,
12820 dstmem = adjust_automodify_address_nv (dst, HImode, destreg,
12822 emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
12827 srcmem = adjust_automodify_address_nv (src, QImode, srcreg,
12829 dstmem = adjust_automodify_address_nv (dst, QImode, destreg,
12831 emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
12834 /* The generic code based on the glibc implementation:
12835 - align destination to 4 bytes (8 byte alignment is used for PentiumPro
12836 allowing accelerated copying there)
12837 - copy the data using rep movsl
12838 - copy the rest. */
12843 rtx srcmem, dstmem;
12844 int desired_alignment = (TARGET_PENTIUMPRO
12845 && (count == 0 || count >= (unsigned int) 260)
12846 ? 8 : UNITS_PER_WORD);
12847 /* Get rid of MEM_OFFSETs, they won't be accurate. */
12848 dst = change_address (dst, BLKmode, destreg);
12849 src = change_address (src, BLKmode, srcreg);
12851 /* In case we don't know anything about the alignment, default to
12852 library version, since it is usually equally fast and result in
12855 Also emit call when we know that the count is large and call overhead
12856 will not be important. */
12857 if (!TARGET_INLINE_ALL_STRINGOPS
12858 && (align < UNITS_PER_WORD || !TARGET_REP_MOVL_OPTIMAL))
12861 if (TARGET_SINGLE_STRINGOP)
12862 emit_insn (gen_cld ());
12864 countreg2 = gen_reg_rtx (Pmode);
12865 countreg = copy_to_mode_reg (counter_mode, count_exp);
12867 /* We don't use loops to align destination and to copy parts smaller
12868 than 4 bytes, because gcc is able to optimize such code better (in
12869 the case the destination or the count really is aligned, gcc is often
12870 able to predict the branches) and also it is friendlier to the
12871 hardware branch prediction.
12873 Using loops is beneficial for generic case, because we can
12874 handle small counts using the loops. Many CPUs (such as Athlon)
12875 have large REP prefix setup costs.
12877 This is quite costly. Maybe we can revisit this decision later or
12878 add some customizability to this code. */
12880 if (count == 0 && align < desired_alignment)
12882 label = gen_label_rtx ();
12883 emit_cmp_and_jump_insns (countreg, GEN_INT (desired_alignment - 1),
12884 LEU, 0, counter_mode, 1, label);
12888 rtx label = ix86_expand_aligntest (destreg, 1);
12889 srcmem = change_address (src, QImode, srcreg);
12890 dstmem = change_address (dst, QImode, destreg);
12891 emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
12892 ix86_adjust_counter (countreg, 1);
12893 emit_label (label);
12894 LABEL_NUSES (label) = 1;
12898 rtx label = ix86_expand_aligntest (destreg, 2);
12899 srcmem = change_address (src, HImode, srcreg);
12900 dstmem = change_address (dst, HImode, destreg);
12901 emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
12902 ix86_adjust_counter (countreg, 2);
12903 emit_label (label);
12904 LABEL_NUSES (label) = 1;
12906 if (align <= 4 && desired_alignment > 4)
12908 rtx label = ix86_expand_aligntest (destreg, 4);
12909 srcmem = change_address (src, SImode, srcreg);
12910 dstmem = change_address (dst, SImode, destreg);
12911 emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
12912 ix86_adjust_counter (countreg, 4);
12913 emit_label (label);
12914 LABEL_NUSES (label) = 1;
12917 if (label && desired_alignment > 4 && !TARGET_64BIT)
12919 emit_label (label);
12920 LABEL_NUSES (label) = 1;
12923 if (!TARGET_SINGLE_STRINGOP)
12924 emit_insn (gen_cld ());
12927 emit_insn (gen_lshrdi3 (countreg2, ix86_zero_extend_to_Pmode (countreg),
12929 destexp = gen_rtx_ASHIFT (Pmode, countreg2, GEN_INT (3));
12933 emit_insn (gen_lshrsi3 (countreg2, countreg, const2_rtx));
12934 destexp = gen_rtx_ASHIFT (Pmode, countreg2, const2_rtx);
12936 srcexp = gen_rtx_PLUS (Pmode, destexp, srcreg);
12937 destexp = gen_rtx_PLUS (Pmode, destexp, destreg);
12938 emit_insn (gen_rep_mov (destreg, dst, srcreg, src,
12939 countreg2, destexp, srcexp));
12943 emit_label (label);
12944 LABEL_NUSES (label) = 1;
12946 if (TARGET_64BIT && align > 4 && count != 0 && (count & 4))
12948 srcmem = change_address (src, SImode, srcreg);
12949 dstmem = change_address (dst, SImode, destreg);
12950 emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
12952 if ((align <= 4 || count == 0) && TARGET_64BIT)
12954 rtx label = ix86_expand_aligntest (countreg, 4);
12955 srcmem = change_address (src, SImode, srcreg);
12956 dstmem = change_address (dst, SImode, destreg);
12957 emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
12958 emit_label (label);
12959 LABEL_NUSES (label) = 1;
12961 if (align > 2 && count != 0 && (count & 2))
12963 srcmem = change_address (src, HImode, srcreg);
12964 dstmem = change_address (dst, HImode, destreg);
12965 emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
12967 if (align <= 2 || count == 0)
12969 rtx label = ix86_expand_aligntest (countreg, 2);
12970 srcmem = change_address (src, HImode, srcreg);
12971 dstmem = change_address (dst, HImode, destreg);
12972 emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
12973 emit_label (label);
12974 LABEL_NUSES (label) = 1;
12976 if (align > 1 && count != 0 && (count & 1))
12978 srcmem = change_address (src, QImode, srcreg);
12979 dstmem = change_address (dst, QImode, destreg);
12980 emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
12982 if (align <= 1 || count == 0)
12984 rtx label = ix86_expand_aligntest (countreg, 1);
12985 srcmem = change_address (src, QImode, srcreg);
12986 dstmem = change_address (dst, QImode, destreg);
12987 emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
12988 emit_label (label);
12989 LABEL_NUSES (label) = 1;
12996 /* Expand string clear operation (bzero). Use i386 string operations when
12997 profitable. expand_movmem contains similar code. */
12999 ix86_expand_clrmem (rtx dst, rtx count_exp, rtx align_exp)
13001 rtx destreg, zeroreg, countreg, destexp;
13002 enum machine_mode counter_mode;
13003 HOST_WIDE_INT align = 0;
13004 unsigned HOST_WIDE_INT count = 0;
13006 if (GET_CODE (align_exp) == CONST_INT)
13007 align = INTVAL (align_exp);
13009 /* Can't use any of this if the user has appropriated esi. */
13010 if (global_regs[4])
13013 /* This simple hack avoids all inlining code and simplifies code below. */
13014 if (!TARGET_ALIGN_STRINGOPS)
13017 if (GET_CODE (count_exp) == CONST_INT)
13019 count = INTVAL (count_exp);
13020 if (!TARGET_INLINE_ALL_STRINGOPS && count > 64)
13023 /* Figure out proper mode for counter. For 32bits it is always SImode,
13024 for 64bits use SImode when possible, otherwise DImode.
13025 Set count to number of bytes copied when known at compile time. */
13027 || GET_MODE (count_exp) == SImode
13028 || x86_64_zext_immediate_operand (count_exp, VOIDmode))
13029 counter_mode = SImode;
13031 counter_mode = DImode;
13033 destreg = copy_to_mode_reg (Pmode, XEXP (dst, 0));
13034 if (destreg != XEXP (dst, 0))
13035 dst = replace_equiv_address_nv (dst, destreg);
13038 /* When optimizing for size emit simple rep ; movsb instruction for
13039 counts not divisible by 4. The movl $N, %ecx; rep; stosb
13040 sequence is 7 bytes long, so if optimizing for size and count is
13041 small enough that some stosl, stosw and stosb instructions without
13042 rep are shorter, fall back into the next if. */
13044 if ((!optimize || optimize_size)
13047 && (!optimize_size || (count & 0x03) + (count >> 2) > 7))))
13049 emit_insn (gen_cld ());
13051 countreg = ix86_zero_extend_to_Pmode (count_exp);
13052 zeroreg = copy_to_mode_reg (QImode, const0_rtx);
13053 destexp = gen_rtx_PLUS (Pmode, destreg, countreg);
13054 emit_insn (gen_rep_stos (destreg, countreg, dst, zeroreg, destexp));
13056 else if (count != 0
13058 || (!TARGET_PENTIUMPRO && !TARGET_64BIT && align >= 4)
13059 || optimize_size || count < (unsigned int) 64))
13061 int size = TARGET_64BIT && !optimize_size ? 8 : 4;
13062 unsigned HOST_WIDE_INT offset = 0;
13064 emit_insn (gen_cld ());
13066 zeroreg = copy_to_mode_reg (size == 4 ? SImode : DImode, const0_rtx);
13067 if (count & ~(size - 1))
13069 unsigned HOST_WIDE_INT repcount;
13070 unsigned int max_nonrep;
13072 repcount = count >> (size == 4 ? 2 : 3);
13074 repcount &= 0x3fffffff;
13076 /* movl $N, %ecx; rep; stosl is 7 bytes, while N x stosl is N bytes.
13077 movl $N, %ecx; rep; stosq is 8 bytes, while N x stosq is 2xN
13078 bytes. In both cases the latter seems to be faster for small
13080 max_nonrep = size == 4 ? 7 : 4;
13081 if (!optimize_size)
13084 case PROCESSOR_PENTIUM4:
13085 case PROCESSOR_NOCONA:
13092 if (repcount <= max_nonrep)
13093 while (repcount-- > 0)
13095 rtx mem = adjust_automodify_address_nv (dst,
13096 GET_MODE (zeroreg),
13098 emit_insn (gen_strset (destreg, mem, zeroreg));
13103 countreg = copy_to_mode_reg (counter_mode, GEN_INT (repcount));
13104 countreg = ix86_zero_extend_to_Pmode (countreg);
13105 destexp = gen_rtx_ASHIFT (Pmode, countreg,
13106 GEN_INT (size == 4 ? 2 : 3));
13107 destexp = gen_rtx_PLUS (Pmode, destexp, destreg);
13108 emit_insn (gen_rep_stos (destreg, countreg, dst, zeroreg,
13110 offset = count & ~(size - 1);
13113 if (size == 8 && (count & 0x04))
13115 rtx mem = adjust_automodify_address_nv (dst, SImode, destreg,
13117 emit_insn (gen_strset (destreg, mem,
13118 gen_rtx_SUBREG (SImode, zeroreg, 0)));
13123 rtx mem = adjust_automodify_address_nv (dst, HImode, destreg,
13125 emit_insn (gen_strset (destreg, mem,
13126 gen_rtx_SUBREG (HImode, zeroreg, 0)));
13131 rtx mem = adjust_automodify_address_nv (dst, QImode, destreg,
13133 emit_insn (gen_strset (destreg, mem,
13134 gen_rtx_SUBREG (QImode, zeroreg, 0)));
13141 /* Compute desired alignment of the string operation. */
13142 int desired_alignment = (TARGET_PENTIUMPRO
13143 && (count == 0 || count >= (unsigned int) 260)
13144 ? 8 : UNITS_PER_WORD);
13146 /* In case we don't know anything about the alignment, default to
13147 library version, since it is usually equally fast and result in
13150 Also emit call when we know that the count is large and call overhead
13151 will not be important. */
13152 if (!TARGET_INLINE_ALL_STRINGOPS
13153 && (align < UNITS_PER_WORD || !TARGET_REP_MOVL_OPTIMAL))
13156 if (TARGET_SINGLE_STRINGOP)
13157 emit_insn (gen_cld ());
13159 countreg2 = gen_reg_rtx (Pmode);
13160 countreg = copy_to_mode_reg (counter_mode, count_exp);
13161 zeroreg = copy_to_mode_reg (Pmode, const0_rtx);
13162 /* Get rid of MEM_OFFSET, it won't be accurate. */
13163 dst = change_address (dst, BLKmode, destreg);
13165 if (count == 0 && align < desired_alignment)
13167 label = gen_label_rtx ();
13168 emit_cmp_and_jump_insns (countreg, GEN_INT (desired_alignment - 1),
13169 LEU, 0, counter_mode, 1, label);
13173 rtx label = ix86_expand_aligntest (destreg, 1);
13174 emit_insn (gen_strset (destreg, dst,
13175 gen_rtx_SUBREG (QImode, zeroreg, 0)));
13176 ix86_adjust_counter (countreg, 1);
13177 emit_label (label);
13178 LABEL_NUSES (label) = 1;
13182 rtx label = ix86_expand_aligntest (destreg, 2);
13183 emit_insn (gen_strset (destreg, dst,
13184 gen_rtx_SUBREG (HImode, zeroreg, 0)));
13185 ix86_adjust_counter (countreg, 2);
13186 emit_label (label);
13187 LABEL_NUSES (label) = 1;
13189 if (align <= 4 && desired_alignment > 4)
13191 rtx label = ix86_expand_aligntest (destreg, 4);
13192 emit_insn (gen_strset (destreg, dst,
13194 ? gen_rtx_SUBREG (SImode, zeroreg, 0)
13196 ix86_adjust_counter (countreg, 4);
13197 emit_label (label);
13198 LABEL_NUSES (label) = 1;
13201 if (label && desired_alignment > 4 && !TARGET_64BIT)
13203 emit_label (label);
13204 LABEL_NUSES (label) = 1;
13208 if (!TARGET_SINGLE_STRINGOP)
13209 emit_insn (gen_cld ());
13212 emit_insn (gen_lshrdi3 (countreg2, ix86_zero_extend_to_Pmode (countreg),
13214 destexp = gen_rtx_ASHIFT (Pmode, countreg2, GEN_INT (3));
13218 emit_insn (gen_lshrsi3 (countreg2, countreg, const2_rtx));
13219 destexp = gen_rtx_ASHIFT (Pmode, countreg2, const2_rtx);
13221 destexp = gen_rtx_PLUS (Pmode, destexp, destreg);
13222 emit_insn (gen_rep_stos (destreg, countreg2, dst, zeroreg, destexp));
13226 emit_label (label);
13227 LABEL_NUSES (label) = 1;
13230 if (TARGET_64BIT && align > 4 && count != 0 && (count & 4))
13231 emit_insn (gen_strset (destreg, dst,
13232 gen_rtx_SUBREG (SImode, zeroreg, 0)));
13233 if (TARGET_64BIT && (align <= 4 || count == 0))
13235 rtx label = ix86_expand_aligntest (countreg, 4);
13236 emit_insn (gen_strset (destreg, dst,
13237 gen_rtx_SUBREG (SImode, zeroreg, 0)));
13238 emit_label (label);
13239 LABEL_NUSES (label) = 1;
13241 if (align > 2 && count != 0 && (count & 2))
13242 emit_insn (gen_strset (destreg, dst,
13243 gen_rtx_SUBREG (HImode, zeroreg, 0)));
13244 if (align <= 2 || count == 0)
13246 rtx label = ix86_expand_aligntest (countreg, 2);
13247 emit_insn (gen_strset (destreg, dst,
13248 gen_rtx_SUBREG (HImode, zeroreg, 0)));
13249 emit_label (label);
13250 LABEL_NUSES (label) = 1;
13252 if (align > 1 && count != 0 && (count & 1))
13253 emit_insn (gen_strset (destreg, dst,
13254 gen_rtx_SUBREG (QImode, zeroreg, 0)));
13255 if (align <= 1 || count == 0)
13257 rtx label = ix86_expand_aligntest (countreg, 1);
13258 emit_insn (gen_strset (destreg, dst,
13259 gen_rtx_SUBREG (QImode, zeroreg, 0)));
13260 emit_label (label);
13261 LABEL_NUSES (label) = 1;
13267 /* Expand strlen. */
13269 ix86_expand_strlen (rtx out, rtx src, rtx eoschar, rtx align)
13271 rtx addr, scratch1, scratch2, scratch3, scratch4;
13273 /* The generic case of strlen expander is long. Avoid it's
13274 expanding unless TARGET_INLINE_ALL_STRINGOPS. */
13276 if (TARGET_UNROLL_STRLEN && eoschar == const0_rtx && optimize > 1
13277 && !TARGET_INLINE_ALL_STRINGOPS
13279 && (GET_CODE (align) != CONST_INT || INTVAL (align) < 4))
13282 addr = force_reg (Pmode, XEXP (src, 0));
13283 scratch1 = gen_reg_rtx (Pmode);
13285 if (TARGET_UNROLL_STRLEN && eoschar == const0_rtx && optimize > 1
13288 /* Well it seems that some optimizer does not combine a call like
13289 foo(strlen(bar), strlen(bar));
13290 when the move and the subtraction is done here. It does calculate
13291 the length just once when these instructions are done inside of
13292 output_strlen_unroll(). But I think since &bar[strlen(bar)] is
13293 often used and I use one fewer register for the lifetime of
13294 output_strlen_unroll() this is better. */
13296 emit_move_insn (out, addr);
13298 ix86_expand_strlensi_unroll_1 (out, src, align);
13300 /* strlensi_unroll_1 returns the address of the zero at the end of
13301 the string, like memchr(), so compute the length by subtracting
13302 the start address. */
13304 emit_insn (gen_subdi3 (out, out, addr));
13306 emit_insn (gen_subsi3 (out, out, addr));
13311 scratch2 = gen_reg_rtx (Pmode);
13312 scratch3 = gen_reg_rtx (Pmode);
13313 scratch4 = force_reg (Pmode, constm1_rtx);
13315 emit_move_insn (scratch3, addr);
13316 eoschar = force_reg (QImode, eoschar);
13318 emit_insn (gen_cld ());
13319 src = replace_equiv_address_nv (src, scratch3);
13321 /* If .md starts supporting :P, this can be done in .md. */
13322 unspec = gen_rtx_UNSPEC (Pmode, gen_rtvec (4, src, eoschar, align,
13323 scratch4), UNSPEC_SCAS);
13324 emit_insn (gen_strlenqi_1 (scratch1, scratch3, unspec));
13327 emit_insn (gen_one_cmpldi2 (scratch2, scratch1));
13328 emit_insn (gen_adddi3 (out, scratch2, constm1_rtx));
13332 emit_insn (gen_one_cmplsi2 (scratch2, scratch1));
13333 emit_insn (gen_addsi3 (out, scratch2, constm1_rtx));
13339 /* Expand the appropriate insns for doing strlen if not just doing
13342 out = result, initialized with the start address
13343 align_rtx = alignment of the address.
13344 scratch = scratch register, initialized with the startaddress when
13345 not aligned, otherwise undefined
13347 This is just the body. It needs the initializations mentioned above and
13348 some address computing at the end. These things are done in i386.md. */
13351 ix86_expand_strlensi_unroll_1 (rtx out, rtx src, rtx align_rtx)
13355 rtx align_2_label = NULL_RTX;
13356 rtx align_3_label = NULL_RTX;
13357 rtx align_4_label = gen_label_rtx ();
13358 rtx end_0_label = gen_label_rtx ();
13360 rtx tmpreg = gen_reg_rtx (SImode);
13361 rtx scratch = gen_reg_rtx (SImode);
13365 if (GET_CODE (align_rtx) == CONST_INT)
13366 align = INTVAL (align_rtx);
13368 /* Loop to check 1..3 bytes for null to get an aligned pointer. */
13370 /* Is there a known alignment and is it less than 4? */
13373 rtx scratch1 = gen_reg_rtx (Pmode);
13374 emit_move_insn (scratch1, out);
13375 /* Is there a known alignment and is it not 2? */
13378 align_3_label = gen_label_rtx (); /* Label when aligned to 3-byte */
13379 align_2_label = gen_label_rtx (); /* Label when aligned to 2-byte */
13381 /* Leave just the 3 lower bits. */
13382 align_rtx = expand_binop (Pmode, and_optab, scratch1, GEN_INT (3),
13383 NULL_RTX, 0, OPTAB_WIDEN);
13385 emit_cmp_and_jump_insns (align_rtx, const0_rtx, EQ, NULL,
13386 Pmode, 1, align_4_label);
13387 emit_cmp_and_jump_insns (align_rtx, const2_rtx, EQ, NULL,
13388 Pmode, 1, align_2_label);
13389 emit_cmp_and_jump_insns (align_rtx, const2_rtx, GTU, NULL,
13390 Pmode, 1, align_3_label);
13394 /* Since the alignment is 2, we have to check 2 or 0 bytes;
13395 check if is aligned to 4 - byte. */
13397 align_rtx = expand_binop (Pmode, and_optab, scratch1, const2_rtx,
13398 NULL_RTX, 0, OPTAB_WIDEN);
13400 emit_cmp_and_jump_insns (align_rtx, const0_rtx, EQ, NULL,
13401 Pmode, 1, align_4_label);
13404 mem = change_address (src, QImode, out);
13406 /* Now compare the bytes. */
13408 /* Compare the first n unaligned byte on a byte per byte basis. */
13409 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL,
13410 QImode, 1, end_0_label);
13412 /* Increment the address. */
13414 emit_insn (gen_adddi3 (out, out, const1_rtx));
13416 emit_insn (gen_addsi3 (out, out, const1_rtx));
13418 /* Not needed with an alignment of 2 */
13421 emit_label (align_2_label);
13423 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL, QImode, 1,
13427 emit_insn (gen_adddi3 (out, out, const1_rtx));
13429 emit_insn (gen_addsi3 (out, out, const1_rtx));
13431 emit_label (align_3_label);
13434 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL, QImode, 1,
13438 emit_insn (gen_adddi3 (out, out, const1_rtx));
13440 emit_insn (gen_addsi3 (out, out, const1_rtx));
13443 /* Generate loop to check 4 bytes at a time. It is not a good idea to
13444 align this loop. It gives only huge programs, but does not help to
13446 emit_label (align_4_label);
13448 mem = change_address (src, SImode, out);
13449 emit_move_insn (scratch, mem);
13451 emit_insn (gen_adddi3 (out, out, GEN_INT (4)));
13453 emit_insn (gen_addsi3 (out, out, GEN_INT (4)));
13455 /* This formula yields a nonzero result iff one of the bytes is zero.
13456 This saves three branches inside loop and many cycles. */
13458 emit_insn (gen_addsi3 (tmpreg, scratch, GEN_INT (-0x01010101)));
13459 emit_insn (gen_one_cmplsi2 (scratch, scratch));
13460 emit_insn (gen_andsi3 (tmpreg, tmpreg, scratch));
13461 emit_insn (gen_andsi3 (tmpreg, tmpreg,
13462 gen_int_mode (0x80808080, SImode)));
13463 emit_cmp_and_jump_insns (tmpreg, const0_rtx, EQ, 0, SImode, 1,
13468 rtx reg = gen_reg_rtx (SImode);
13469 rtx reg2 = gen_reg_rtx (Pmode);
13470 emit_move_insn (reg, tmpreg);
13471 emit_insn (gen_lshrsi3 (reg, reg, GEN_INT (16)));
13473 /* If zero is not in the first two bytes, move two bytes forward. */
13474 emit_insn (gen_testsi_ccno_1 (tmpreg, GEN_INT (0x8080)));
13475 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
13476 tmp = gen_rtx_EQ (VOIDmode, tmp, const0_rtx);
13477 emit_insn (gen_rtx_SET (VOIDmode, tmpreg,
13478 gen_rtx_IF_THEN_ELSE (SImode, tmp,
13481 /* Emit lea manually to avoid clobbering of flags. */
13482 emit_insn (gen_rtx_SET (SImode, reg2,
13483 gen_rtx_PLUS (Pmode, out, const2_rtx)));
13485 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
13486 tmp = gen_rtx_EQ (VOIDmode, tmp, const0_rtx);
13487 emit_insn (gen_rtx_SET (VOIDmode, out,
13488 gen_rtx_IF_THEN_ELSE (Pmode, tmp,
13495 rtx end_2_label = gen_label_rtx ();
13496 /* Is zero in the first two bytes? */
13498 emit_insn (gen_testsi_ccno_1 (tmpreg, GEN_INT (0x8080)));
13499 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
13500 tmp = gen_rtx_NE (VOIDmode, tmp, const0_rtx);
13501 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
13502 gen_rtx_LABEL_REF (VOIDmode, end_2_label),
13504 tmp = emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
13505 JUMP_LABEL (tmp) = end_2_label;
13507 /* Not in the first two. Move two bytes forward. */
13508 emit_insn (gen_lshrsi3 (tmpreg, tmpreg, GEN_INT (16)));
13510 emit_insn (gen_adddi3 (out, out, const2_rtx));
13512 emit_insn (gen_addsi3 (out, out, const2_rtx));
13514 emit_label (end_2_label);
13518 /* Avoid branch in fixing the byte. */
13519 tmpreg = gen_lowpart (QImode, tmpreg);
13520 emit_insn (gen_addqi3_cc (tmpreg, tmpreg, tmpreg));
13521 cmp = gen_rtx_LTU (Pmode, gen_rtx_REG (CCmode, 17), const0_rtx);
13523 emit_insn (gen_subdi3_carry_rex64 (out, out, GEN_INT (3), cmp));
13525 emit_insn (gen_subsi3_carry (out, out, GEN_INT (3), cmp));
13527 emit_label (end_0_label);
13531 ix86_expand_call (rtx retval, rtx fnaddr, rtx callarg1,
13532 rtx callarg2 ATTRIBUTE_UNUSED,
13533 rtx pop, int sibcall)
13535 rtx use = NULL, call;
13537 if (pop == const0_rtx)
13539 gcc_assert (!TARGET_64BIT || !pop);
13541 if (TARGET_MACHO && !TARGET_64BIT)
13544 if (flag_pic && GET_CODE (XEXP (fnaddr, 0)) == SYMBOL_REF)
13545 fnaddr = machopic_indirect_call_target (fnaddr);
13550 /* Static functions and indirect calls don't need the pic register. */
13551 if (! TARGET_64BIT && flag_pic
13552 && GET_CODE (XEXP (fnaddr, 0)) == SYMBOL_REF
13553 && ! SYMBOL_REF_LOCAL_P (XEXP (fnaddr, 0)))
13554 use_reg (&use, pic_offset_table_rtx);
13557 if (TARGET_64BIT && INTVAL (callarg2) >= 0)
13559 rtx al = gen_rtx_REG (QImode, 0);
13560 emit_move_insn (al, callarg2);
13561 use_reg (&use, al);
13564 if (! call_insn_operand (XEXP (fnaddr, 0), Pmode))
13566 fnaddr = copy_to_mode_reg (Pmode, XEXP (fnaddr, 0));
13567 fnaddr = gen_rtx_MEM (QImode, fnaddr);
13569 if (sibcall && TARGET_64BIT
13570 && !constant_call_address_operand (XEXP (fnaddr, 0), Pmode))
13573 addr = copy_to_mode_reg (Pmode, XEXP (fnaddr, 0));
13574 fnaddr = gen_rtx_REG (Pmode, FIRST_REX_INT_REG + 3 /* R11 */);
13575 emit_move_insn (fnaddr, addr);
13576 fnaddr = gen_rtx_MEM (QImode, fnaddr);
13579 call = gen_rtx_CALL (VOIDmode, fnaddr, callarg1);
13581 call = gen_rtx_SET (VOIDmode, retval, call);
13584 pop = gen_rtx_PLUS (Pmode, stack_pointer_rtx, pop);
13585 pop = gen_rtx_SET (VOIDmode, stack_pointer_rtx, pop);
13586 call = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, call, pop));
13589 call = emit_call_insn (call);
13591 CALL_INSN_FUNCTION_USAGE (call) = use;
13595 /* Clear stack slot assignments remembered from previous functions.
13596 This is called from INIT_EXPANDERS once before RTL is emitted for each
13599 static struct machine_function *
13600 ix86_init_machine_status (void)
13602 struct machine_function *f;
13604 f = ggc_alloc_cleared (sizeof (struct machine_function));
13605 f->use_fast_prologue_epilogue_nregs = -1;
13606 f->tls_descriptor_call_expanded_p = 0;
13611 /* Return a MEM corresponding to a stack slot with mode MODE.
13612 Allocate a new slot if necessary.
13614 The RTL for a function can have several slots available: N is
13615 which slot to use. */
13618 assign_386_stack_local (enum machine_mode mode, enum ix86_stack_slot n)
13620 struct stack_local_entry *s;
13622 gcc_assert (n < MAX_386_STACK_LOCALS);
13624 /* Virtual slot is valid only before vregs are instantiated. */
13625 gcc_assert ((n == SLOT_VIRTUAL) == !virtuals_instantiated);
13627 for (s = ix86_stack_locals; s; s = s->next)
13628 if (s->mode == mode && s->n == n)
13631 s = (struct stack_local_entry *)
13632 ggc_alloc (sizeof (struct stack_local_entry));
13635 s->rtl = assign_stack_local (mode, GET_MODE_SIZE (mode), 0);
13637 s->next = ix86_stack_locals;
13638 ix86_stack_locals = s;
13642 /* Construct the SYMBOL_REF for the tls_get_addr function. */
13644 static GTY(()) rtx ix86_tls_symbol;
13646 ix86_tls_get_addr (void)
13649 if (!ix86_tls_symbol)
13651 ix86_tls_symbol = gen_rtx_SYMBOL_REF (Pmode,
13652 (TARGET_ANY_GNU_TLS
13654 ? "___tls_get_addr"
13655 : "__tls_get_addr");
13658 return ix86_tls_symbol;
13661 /* Construct the SYMBOL_REF for the _TLS_MODULE_BASE_ symbol. */
13663 static GTY(()) rtx ix86_tls_module_base_symbol;
13665 ix86_tls_module_base (void)
13668 if (!ix86_tls_module_base_symbol)
13670 ix86_tls_module_base_symbol = gen_rtx_SYMBOL_REF (Pmode,
13671 "_TLS_MODULE_BASE_");
13672 SYMBOL_REF_FLAGS (ix86_tls_module_base_symbol)
13673 |= TLS_MODEL_GLOBAL_DYNAMIC << SYMBOL_FLAG_TLS_SHIFT;
13676 return ix86_tls_module_base_symbol;
13679 /* Calculate the length of the memory address in the instruction
13680 encoding. Does not include the one-byte modrm, opcode, or prefix. */
13683 memory_address_length (rtx addr)
13685 struct ix86_address parts;
13686 rtx base, index, disp;
13690 if (GET_CODE (addr) == PRE_DEC
13691 || GET_CODE (addr) == POST_INC
13692 || GET_CODE (addr) == PRE_MODIFY
13693 || GET_CODE (addr) == POST_MODIFY)
13696 ok = ix86_decompose_address (addr, &parts);
13699 if (parts.base && GET_CODE (parts.base) == SUBREG)
13700 parts.base = SUBREG_REG (parts.base);
13701 if (parts.index && GET_CODE (parts.index) == SUBREG)
13702 parts.index = SUBREG_REG (parts.index);
13705 index = parts.index;
13710 - esp as the base always wants an index,
13711 - ebp as the base always wants a displacement. */
13713 /* Register Indirect. */
13714 if (base && !index && !disp)
13716 /* esp (for its index) and ebp (for its displacement) need
13717 the two-byte modrm form. */
13718 if (addr == stack_pointer_rtx
13719 || addr == arg_pointer_rtx
13720 || addr == frame_pointer_rtx
13721 || addr == hard_frame_pointer_rtx)
13725 /* Direct Addressing. */
13726 else if (disp && !base && !index)
13731 /* Find the length of the displacement constant. */
13734 if (base && satisfies_constraint_K (disp))
13739 /* ebp always wants a displacement. */
13740 else if (base == hard_frame_pointer_rtx)
13743 /* An index requires the two-byte modrm form.... */
13745 /* ...like esp, which always wants an index. */
13746 || base == stack_pointer_rtx
13747 || base == arg_pointer_rtx
13748 || base == frame_pointer_rtx)
13755 /* Compute default value for "length_immediate" attribute. When SHORTFORM
13756 is set, expect that insn have 8bit immediate alternative. */
13758 ix86_attr_length_immediate_default (rtx insn, int shortform)
13762 extract_insn_cached (insn);
13763 for (i = recog_data.n_operands - 1; i >= 0; --i)
13764 if (CONSTANT_P (recog_data.operand[i]))
13767 if (shortform && satisfies_constraint_K (recog_data.operand[i]))
13771 switch (get_attr_mode (insn))
13782 /* Immediates for DImode instructions are encoded as 32bit sign extended values. */
13787 fatal_insn ("unknown insn mode", insn);
13793 /* Compute default value for "length_address" attribute. */
13795 ix86_attr_length_address_default (rtx insn)
13799 if (get_attr_type (insn) == TYPE_LEA)
13801 rtx set = PATTERN (insn);
13803 if (GET_CODE (set) == PARALLEL)
13804 set = XVECEXP (set, 0, 0);
13806 gcc_assert (GET_CODE (set) == SET);
13808 return memory_address_length (SET_SRC (set));
13811 extract_insn_cached (insn);
13812 for (i = recog_data.n_operands - 1; i >= 0; --i)
13813 if (GET_CODE (recog_data.operand[i]) == MEM)
13815 return memory_address_length (XEXP (recog_data.operand[i], 0));
13821 /* Return the maximum number of instructions a cpu can issue. */
13824 ix86_issue_rate (void)
13828 case PROCESSOR_PENTIUM:
13832 case PROCESSOR_PENTIUMPRO:
13833 case PROCESSOR_PENTIUM4:
13834 case PROCESSOR_ATHLON:
13836 case PROCESSOR_NOCONA:
13837 case PROCESSOR_GENERIC32:
13838 case PROCESSOR_GENERIC64:
13841 case PROCESSOR_CORE2:
13849 /* A subroutine of ix86_adjust_cost -- return true iff INSN reads flags set
13850 by DEP_INSN and nothing set by DEP_INSN. */
13853 ix86_flags_dependent (rtx insn, rtx dep_insn, enum attr_type insn_type)
13857 /* Simplify the test for uninteresting insns. */
13858 if (insn_type != TYPE_SETCC
13859 && insn_type != TYPE_ICMOV
13860 && insn_type != TYPE_FCMOV
13861 && insn_type != TYPE_IBR)
13864 if ((set = single_set (dep_insn)) != 0)
13866 set = SET_DEST (set);
13869 else if (GET_CODE (PATTERN (dep_insn)) == PARALLEL
13870 && XVECLEN (PATTERN (dep_insn), 0) == 2
13871 && GET_CODE (XVECEXP (PATTERN (dep_insn), 0, 0)) == SET
13872 && GET_CODE (XVECEXP (PATTERN (dep_insn), 0, 1)) == SET)
13874 set = SET_DEST (XVECEXP (PATTERN (dep_insn), 0, 0));
13875 set2 = SET_DEST (XVECEXP (PATTERN (dep_insn), 0, 0));
13880 if (GET_CODE (set) != REG || REGNO (set) != FLAGS_REG)
13883 /* This test is true if the dependent insn reads the flags but
13884 not any other potentially set register. */
13885 if (!reg_overlap_mentioned_p (set, PATTERN (insn)))
13888 if (set2 && reg_overlap_mentioned_p (set2, PATTERN (insn)))
13894 /* A subroutine of ix86_adjust_cost -- return true iff INSN has a memory
13895 address with operands set by DEP_INSN. */
13898 ix86_agi_dependent (rtx insn, rtx dep_insn, enum attr_type insn_type)
13902 if (insn_type == TYPE_LEA
13905 addr = PATTERN (insn);
13907 if (GET_CODE (addr) == PARALLEL)
13908 addr = XVECEXP (addr, 0, 0);
13910 gcc_assert (GET_CODE (addr) == SET);
13912 addr = SET_SRC (addr);
13917 extract_insn_cached (insn);
13918 for (i = recog_data.n_operands - 1; i >= 0; --i)
13919 if (GET_CODE (recog_data.operand[i]) == MEM)
13921 addr = XEXP (recog_data.operand[i], 0);
13928 return modified_in_p (addr, dep_insn);
13932 ix86_adjust_cost (rtx insn, rtx link, rtx dep_insn, int cost)
13934 enum attr_type insn_type, dep_insn_type;
13935 enum attr_memory memory;
13937 int dep_insn_code_number;
13939 /* Anti and output dependencies have zero cost on all CPUs. */
13940 if (REG_NOTE_KIND (link) != 0)
13943 dep_insn_code_number = recog_memoized (dep_insn);
13945 /* If we can't recognize the insns, we can't really do anything. */
13946 if (dep_insn_code_number < 0 || recog_memoized (insn) < 0)
13949 insn_type = get_attr_type (insn);
13950 dep_insn_type = get_attr_type (dep_insn);
13954 case PROCESSOR_PENTIUM:
13955 /* Address Generation Interlock adds a cycle of latency. */
13956 if (ix86_agi_dependent (insn, dep_insn, insn_type))
13959 /* ??? Compares pair with jump/setcc. */
13960 if (ix86_flags_dependent (insn, dep_insn, insn_type))
13963 /* Floating point stores require value to be ready one cycle earlier. */
13964 if (insn_type == TYPE_FMOV
13965 && get_attr_memory (insn) == MEMORY_STORE
13966 && !ix86_agi_dependent (insn, dep_insn, insn_type))
13970 case PROCESSOR_PENTIUMPRO:
13971 memory = get_attr_memory (insn);
13973 /* INT->FP conversion is expensive. */
13974 if (get_attr_fp_int_src (dep_insn))
13977 /* There is one cycle extra latency between an FP op and a store. */
13978 if (insn_type == TYPE_FMOV
13979 && (set = single_set (dep_insn)) != NULL_RTX
13980 && (set2 = single_set (insn)) != NULL_RTX
13981 && rtx_equal_p (SET_DEST (set), SET_SRC (set2))
13982 && GET_CODE (SET_DEST (set2)) == MEM)
13985 /* Show ability of reorder buffer to hide latency of load by executing
13986 in parallel with previous instruction in case
13987 previous instruction is not needed to compute the address. */
13988 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
13989 && !ix86_agi_dependent (insn, dep_insn, insn_type))
13991 /* Claim moves to take one cycle, as core can issue one load
13992 at time and the next load can start cycle later. */
13993 if (dep_insn_type == TYPE_IMOV
13994 || dep_insn_type == TYPE_FMOV)
14002 memory = get_attr_memory (insn);
14004 /* The esp dependency is resolved before the instruction is really
14006 if ((insn_type == TYPE_PUSH || insn_type == TYPE_POP)
14007 && (dep_insn_type == TYPE_PUSH || dep_insn_type == TYPE_POP))
14010 /* INT->FP conversion is expensive. */
14011 if (get_attr_fp_int_src (dep_insn))
14014 /* Show ability of reorder buffer to hide latency of load by executing
14015 in parallel with previous instruction in case
14016 previous instruction is not needed to compute the address. */
14017 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
14018 && !ix86_agi_dependent (insn, dep_insn, insn_type))
14020 /* Claim moves to take one cycle, as core can issue one load
14021 at time and the next load can start cycle later. */
14022 if (dep_insn_type == TYPE_IMOV
14023 || dep_insn_type == TYPE_FMOV)
14032 case PROCESSOR_ATHLON:
14034 case PROCESSOR_GENERIC32:
14035 case PROCESSOR_GENERIC64:
14036 memory = get_attr_memory (insn);
14038 /* Show ability of reorder buffer to hide latency of load by executing
14039 in parallel with previous instruction in case
14040 previous instruction is not needed to compute the address. */
14041 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
14042 && !ix86_agi_dependent (insn, dep_insn, insn_type))
14044 enum attr_unit unit = get_attr_unit (insn);
14047 /* Because of the difference between the length of integer and
14048 floating unit pipeline preparation stages, the memory operands
14049 for floating point are cheaper.
14051 ??? For Athlon it the difference is most probably 2. */
14052 if (unit == UNIT_INTEGER || unit == UNIT_UNKNOWN)
14055 loadcost = TARGET_ATHLON ? 2 : 0;
14057 if (cost >= loadcost)
14070 /* How many alternative schedules to try. This should be as wide as the
14071 scheduling freedom in the DFA, but no wider. Making this value too
14072 large results extra work for the scheduler. */
14075 ia32_multipass_dfa_lookahead (void)
14077 if (ix86_tune == PROCESSOR_PENTIUM)
14080 if (ix86_tune == PROCESSOR_PENTIUMPRO
14081 || ix86_tune == PROCESSOR_K6)
14089 /* Compute the alignment given to a constant that is being placed in memory.
14090 EXP is the constant and ALIGN is the alignment that the object would
14092 The value of this function is used instead of that alignment to align
14096 ix86_constant_alignment (tree exp, int align)
14098 if (TREE_CODE (exp) == REAL_CST)
14100 if (TYPE_MODE (TREE_TYPE (exp)) == DFmode && align < 64)
14102 else if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (exp))) && align < 128)
14105 else if (!optimize_size && TREE_CODE (exp) == STRING_CST
14106 && !TARGET_NO_ALIGN_LONG_STRINGS
14107 && TREE_STRING_LENGTH (exp) >= 31 && align < BITS_PER_WORD)
14108 return BITS_PER_WORD;
14113 /* Compute the alignment for a static variable.
14114 TYPE is the data type, and ALIGN is the alignment that
14115 the object would ordinarily have. The value of this function is used
14116 instead of that alignment to align the object. */
14119 ix86_data_alignment (tree type, int align)
14121 int max_align = optimize_size ? BITS_PER_WORD : 256;
14123 if (AGGREGATE_TYPE_P (type)
14124 && TYPE_SIZE (type)
14125 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
14126 && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= (unsigned) max_align
14127 || TREE_INT_CST_HIGH (TYPE_SIZE (type)))
14128 && align < max_align)
14131 /* x86-64 ABI requires arrays greater than 16 bytes to be aligned
14132 to 16byte boundary. */
14135 if (AGGREGATE_TYPE_P (type)
14136 && TYPE_SIZE (type)
14137 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
14138 && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= 128
14139 || TREE_INT_CST_HIGH (TYPE_SIZE (type))) && align < 128)
14143 if (TREE_CODE (type) == ARRAY_TYPE)
14145 if (TYPE_MODE (TREE_TYPE (type)) == DFmode && align < 64)
14147 if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type))) && align < 128)
14150 else if (TREE_CODE (type) == COMPLEX_TYPE)
14153 if (TYPE_MODE (type) == DCmode && align < 64)
14155 if (TYPE_MODE (type) == XCmode && align < 128)
14158 else if ((TREE_CODE (type) == RECORD_TYPE
14159 || TREE_CODE (type) == UNION_TYPE
14160 || TREE_CODE (type) == QUAL_UNION_TYPE)
14161 && TYPE_FIELDS (type))
14163 if (DECL_MODE (TYPE_FIELDS (type)) == DFmode && align < 64)
14165 if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type))) && align < 128)
14168 else if (TREE_CODE (type) == REAL_TYPE || TREE_CODE (type) == VECTOR_TYPE
14169 || TREE_CODE (type) == INTEGER_TYPE)
14171 if (TYPE_MODE (type) == DFmode && align < 64)
14173 if (ALIGN_MODE_128 (TYPE_MODE (type)) && align < 128)
14180 /* Compute the alignment for a local variable.
14181 TYPE is the data type, and ALIGN is the alignment that
14182 the object would ordinarily have. The value of this macro is used
14183 instead of that alignment to align the object. */
14186 ix86_local_alignment (tree type, int align)
14188 /* x86-64 ABI requires arrays greater than 16 bytes to be aligned
14189 to 16byte boundary. */
14192 if (AGGREGATE_TYPE_P (type)
14193 && TYPE_SIZE (type)
14194 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
14195 && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= 16
14196 || TREE_INT_CST_HIGH (TYPE_SIZE (type))) && align < 128)
14199 if (TREE_CODE (type) == ARRAY_TYPE)
14201 if (TYPE_MODE (TREE_TYPE (type)) == DFmode && align < 64)
14203 if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type))) && align < 128)
14206 else if (TREE_CODE (type) == COMPLEX_TYPE)
14208 if (TYPE_MODE (type) == DCmode && align < 64)
14210 if (TYPE_MODE (type) == XCmode && align < 128)
14213 else if ((TREE_CODE (type) == RECORD_TYPE
14214 || TREE_CODE (type) == UNION_TYPE
14215 || TREE_CODE (type) == QUAL_UNION_TYPE)
14216 && TYPE_FIELDS (type))
14218 if (DECL_MODE (TYPE_FIELDS (type)) == DFmode && align < 64)
14220 if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type))) && align < 128)
14223 else if (TREE_CODE (type) == REAL_TYPE || TREE_CODE (type) == VECTOR_TYPE
14224 || TREE_CODE (type) == INTEGER_TYPE)
14227 if (TYPE_MODE (type) == DFmode && align < 64)
14229 if (ALIGN_MODE_128 (TYPE_MODE (type)) && align < 128)
14235 /* Emit RTL insns to initialize the variable parts of a trampoline.
14236 FNADDR is an RTX for the address of the function's pure code.
14237 CXT is an RTX for the static chain value for the function. */
14239 x86_initialize_trampoline (rtx tramp, rtx fnaddr, rtx cxt)
14243 /* Compute offset from the end of the jmp to the target function. */
14244 rtx disp = expand_binop (SImode, sub_optab, fnaddr,
14245 plus_constant (tramp, 10),
14246 NULL_RTX, 1, OPTAB_DIRECT);
14247 emit_move_insn (gen_rtx_MEM (QImode, tramp),
14248 gen_int_mode (0xb9, QImode));
14249 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, 1)), cxt);
14250 emit_move_insn (gen_rtx_MEM (QImode, plus_constant (tramp, 5)),
14251 gen_int_mode (0xe9, QImode));
14252 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, 6)), disp);
14257 /* Try to load address using shorter movl instead of movabs.
14258 We may want to support movq for kernel mode, but kernel does not use
14259 trampolines at the moment. */
14260 if (x86_64_zext_immediate_operand (fnaddr, VOIDmode))
14262 fnaddr = copy_to_mode_reg (DImode, fnaddr);
14263 emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
14264 gen_int_mode (0xbb41, HImode));
14265 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, offset + 2)),
14266 gen_lowpart (SImode, fnaddr));
14271 emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
14272 gen_int_mode (0xbb49, HImode));
14273 emit_move_insn (gen_rtx_MEM (DImode, plus_constant (tramp, offset + 2)),
14277 /* Load static chain using movabs to r10. */
14278 emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
14279 gen_int_mode (0xba49, HImode));
14280 emit_move_insn (gen_rtx_MEM (DImode, plus_constant (tramp, offset + 2)),
14283 /* Jump to the r11 */
14284 emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
14285 gen_int_mode (0xff49, HImode));
14286 emit_move_insn (gen_rtx_MEM (QImode, plus_constant (tramp, offset+2)),
14287 gen_int_mode (0xe3, QImode));
14289 gcc_assert (offset <= TRAMPOLINE_SIZE);
14292 #ifdef ENABLE_EXECUTE_STACK
14293 emit_library_call (gen_rtx_SYMBOL_REF (Pmode, "__enable_execute_stack"),
14294 LCT_NORMAL, VOIDmode, 1, tramp, Pmode);
14298 /* Codes for all the SSE/MMX builtins. */
14301 IX86_BUILTIN_ADDPS,
14302 IX86_BUILTIN_ADDSS,
14303 IX86_BUILTIN_DIVPS,
14304 IX86_BUILTIN_DIVSS,
14305 IX86_BUILTIN_MULPS,
14306 IX86_BUILTIN_MULSS,
14307 IX86_BUILTIN_SUBPS,
14308 IX86_BUILTIN_SUBSS,
14310 IX86_BUILTIN_CMPEQPS,
14311 IX86_BUILTIN_CMPLTPS,
14312 IX86_BUILTIN_CMPLEPS,
14313 IX86_BUILTIN_CMPGTPS,
14314 IX86_BUILTIN_CMPGEPS,
14315 IX86_BUILTIN_CMPNEQPS,
14316 IX86_BUILTIN_CMPNLTPS,
14317 IX86_BUILTIN_CMPNLEPS,
14318 IX86_BUILTIN_CMPNGTPS,
14319 IX86_BUILTIN_CMPNGEPS,
14320 IX86_BUILTIN_CMPORDPS,
14321 IX86_BUILTIN_CMPUNORDPS,
14322 IX86_BUILTIN_CMPEQSS,
14323 IX86_BUILTIN_CMPLTSS,
14324 IX86_BUILTIN_CMPLESS,
14325 IX86_BUILTIN_CMPNEQSS,
14326 IX86_BUILTIN_CMPNLTSS,
14327 IX86_BUILTIN_CMPNLESS,
14328 IX86_BUILTIN_CMPNGTSS,
14329 IX86_BUILTIN_CMPNGESS,
14330 IX86_BUILTIN_CMPORDSS,
14331 IX86_BUILTIN_CMPUNORDSS,
14333 IX86_BUILTIN_COMIEQSS,
14334 IX86_BUILTIN_COMILTSS,
14335 IX86_BUILTIN_COMILESS,
14336 IX86_BUILTIN_COMIGTSS,
14337 IX86_BUILTIN_COMIGESS,
14338 IX86_BUILTIN_COMINEQSS,
14339 IX86_BUILTIN_UCOMIEQSS,
14340 IX86_BUILTIN_UCOMILTSS,
14341 IX86_BUILTIN_UCOMILESS,
14342 IX86_BUILTIN_UCOMIGTSS,
14343 IX86_BUILTIN_UCOMIGESS,
14344 IX86_BUILTIN_UCOMINEQSS,
14346 IX86_BUILTIN_CVTPI2PS,
14347 IX86_BUILTIN_CVTPS2PI,
14348 IX86_BUILTIN_CVTSI2SS,
14349 IX86_BUILTIN_CVTSI642SS,
14350 IX86_BUILTIN_CVTSS2SI,
14351 IX86_BUILTIN_CVTSS2SI64,
14352 IX86_BUILTIN_CVTTPS2PI,
14353 IX86_BUILTIN_CVTTSS2SI,
14354 IX86_BUILTIN_CVTTSS2SI64,
14356 IX86_BUILTIN_MAXPS,
14357 IX86_BUILTIN_MAXSS,
14358 IX86_BUILTIN_MINPS,
14359 IX86_BUILTIN_MINSS,
14361 IX86_BUILTIN_LOADUPS,
14362 IX86_BUILTIN_STOREUPS,
14363 IX86_BUILTIN_MOVSS,
14365 IX86_BUILTIN_MOVHLPS,
14366 IX86_BUILTIN_MOVLHPS,
14367 IX86_BUILTIN_LOADHPS,
14368 IX86_BUILTIN_LOADLPS,
14369 IX86_BUILTIN_STOREHPS,
14370 IX86_BUILTIN_STORELPS,
14372 IX86_BUILTIN_MASKMOVQ,
14373 IX86_BUILTIN_MOVMSKPS,
14374 IX86_BUILTIN_PMOVMSKB,
14376 IX86_BUILTIN_MOVNTPS,
14377 IX86_BUILTIN_MOVNTQ,
14379 IX86_BUILTIN_LOADDQU,
14380 IX86_BUILTIN_STOREDQU,
14382 IX86_BUILTIN_PACKSSWB,
14383 IX86_BUILTIN_PACKSSDW,
14384 IX86_BUILTIN_PACKUSWB,
14386 IX86_BUILTIN_PADDB,
14387 IX86_BUILTIN_PADDW,
14388 IX86_BUILTIN_PADDD,
14389 IX86_BUILTIN_PADDQ,
14390 IX86_BUILTIN_PADDSB,
14391 IX86_BUILTIN_PADDSW,
14392 IX86_BUILTIN_PADDUSB,
14393 IX86_BUILTIN_PADDUSW,
14394 IX86_BUILTIN_PSUBB,
14395 IX86_BUILTIN_PSUBW,
14396 IX86_BUILTIN_PSUBD,
14397 IX86_BUILTIN_PSUBQ,
14398 IX86_BUILTIN_PSUBSB,
14399 IX86_BUILTIN_PSUBSW,
14400 IX86_BUILTIN_PSUBUSB,
14401 IX86_BUILTIN_PSUBUSW,
14404 IX86_BUILTIN_PANDN,
14408 IX86_BUILTIN_PAVGB,
14409 IX86_BUILTIN_PAVGW,
14411 IX86_BUILTIN_PCMPEQB,
14412 IX86_BUILTIN_PCMPEQW,
14413 IX86_BUILTIN_PCMPEQD,
14414 IX86_BUILTIN_PCMPGTB,
14415 IX86_BUILTIN_PCMPGTW,
14416 IX86_BUILTIN_PCMPGTD,
14418 IX86_BUILTIN_PMADDWD,
14420 IX86_BUILTIN_PMAXSW,
14421 IX86_BUILTIN_PMAXUB,
14422 IX86_BUILTIN_PMINSW,
14423 IX86_BUILTIN_PMINUB,
14425 IX86_BUILTIN_PMULHUW,
14426 IX86_BUILTIN_PMULHW,
14427 IX86_BUILTIN_PMULLW,
14429 IX86_BUILTIN_PSADBW,
14430 IX86_BUILTIN_PSHUFW,
14432 IX86_BUILTIN_PSLLW,
14433 IX86_BUILTIN_PSLLD,
14434 IX86_BUILTIN_PSLLQ,
14435 IX86_BUILTIN_PSRAW,
14436 IX86_BUILTIN_PSRAD,
14437 IX86_BUILTIN_PSRLW,
14438 IX86_BUILTIN_PSRLD,
14439 IX86_BUILTIN_PSRLQ,
14440 IX86_BUILTIN_PSLLWI,
14441 IX86_BUILTIN_PSLLDI,
14442 IX86_BUILTIN_PSLLQI,
14443 IX86_BUILTIN_PSRAWI,
14444 IX86_BUILTIN_PSRADI,
14445 IX86_BUILTIN_PSRLWI,
14446 IX86_BUILTIN_PSRLDI,
14447 IX86_BUILTIN_PSRLQI,
14449 IX86_BUILTIN_PUNPCKHBW,
14450 IX86_BUILTIN_PUNPCKHWD,
14451 IX86_BUILTIN_PUNPCKHDQ,
14452 IX86_BUILTIN_PUNPCKLBW,
14453 IX86_BUILTIN_PUNPCKLWD,
14454 IX86_BUILTIN_PUNPCKLDQ,
14456 IX86_BUILTIN_SHUFPS,
14458 IX86_BUILTIN_RCPPS,
14459 IX86_BUILTIN_RCPSS,
14460 IX86_BUILTIN_RSQRTPS,
14461 IX86_BUILTIN_RSQRTSS,
14462 IX86_BUILTIN_SQRTPS,
14463 IX86_BUILTIN_SQRTSS,
14465 IX86_BUILTIN_UNPCKHPS,
14466 IX86_BUILTIN_UNPCKLPS,
14468 IX86_BUILTIN_ANDPS,
14469 IX86_BUILTIN_ANDNPS,
14471 IX86_BUILTIN_XORPS,
14474 IX86_BUILTIN_LDMXCSR,
14475 IX86_BUILTIN_STMXCSR,
14476 IX86_BUILTIN_SFENCE,
14478 /* 3DNow! Original */
14479 IX86_BUILTIN_FEMMS,
14480 IX86_BUILTIN_PAVGUSB,
14481 IX86_BUILTIN_PF2ID,
14482 IX86_BUILTIN_PFACC,
14483 IX86_BUILTIN_PFADD,
14484 IX86_BUILTIN_PFCMPEQ,
14485 IX86_BUILTIN_PFCMPGE,
14486 IX86_BUILTIN_PFCMPGT,
14487 IX86_BUILTIN_PFMAX,
14488 IX86_BUILTIN_PFMIN,
14489 IX86_BUILTIN_PFMUL,
14490 IX86_BUILTIN_PFRCP,
14491 IX86_BUILTIN_PFRCPIT1,
14492 IX86_BUILTIN_PFRCPIT2,
14493 IX86_BUILTIN_PFRSQIT1,
14494 IX86_BUILTIN_PFRSQRT,
14495 IX86_BUILTIN_PFSUB,
14496 IX86_BUILTIN_PFSUBR,
14497 IX86_BUILTIN_PI2FD,
14498 IX86_BUILTIN_PMULHRW,
14500 /* 3DNow! Athlon Extensions */
14501 IX86_BUILTIN_PF2IW,
14502 IX86_BUILTIN_PFNACC,
14503 IX86_BUILTIN_PFPNACC,
14504 IX86_BUILTIN_PI2FW,
14505 IX86_BUILTIN_PSWAPDSI,
14506 IX86_BUILTIN_PSWAPDSF,
14509 IX86_BUILTIN_ADDPD,
14510 IX86_BUILTIN_ADDSD,
14511 IX86_BUILTIN_DIVPD,
14512 IX86_BUILTIN_DIVSD,
14513 IX86_BUILTIN_MULPD,
14514 IX86_BUILTIN_MULSD,
14515 IX86_BUILTIN_SUBPD,
14516 IX86_BUILTIN_SUBSD,
14518 IX86_BUILTIN_CMPEQPD,
14519 IX86_BUILTIN_CMPLTPD,
14520 IX86_BUILTIN_CMPLEPD,
14521 IX86_BUILTIN_CMPGTPD,
14522 IX86_BUILTIN_CMPGEPD,
14523 IX86_BUILTIN_CMPNEQPD,
14524 IX86_BUILTIN_CMPNLTPD,
14525 IX86_BUILTIN_CMPNLEPD,
14526 IX86_BUILTIN_CMPNGTPD,
14527 IX86_BUILTIN_CMPNGEPD,
14528 IX86_BUILTIN_CMPORDPD,
14529 IX86_BUILTIN_CMPUNORDPD,
14530 IX86_BUILTIN_CMPNEPD,
14531 IX86_BUILTIN_CMPEQSD,
14532 IX86_BUILTIN_CMPLTSD,
14533 IX86_BUILTIN_CMPLESD,
14534 IX86_BUILTIN_CMPNEQSD,
14535 IX86_BUILTIN_CMPNLTSD,
14536 IX86_BUILTIN_CMPNLESD,
14537 IX86_BUILTIN_CMPORDSD,
14538 IX86_BUILTIN_CMPUNORDSD,
14539 IX86_BUILTIN_CMPNESD,
14541 IX86_BUILTIN_COMIEQSD,
14542 IX86_BUILTIN_COMILTSD,
14543 IX86_BUILTIN_COMILESD,
14544 IX86_BUILTIN_COMIGTSD,
14545 IX86_BUILTIN_COMIGESD,
14546 IX86_BUILTIN_COMINEQSD,
14547 IX86_BUILTIN_UCOMIEQSD,
14548 IX86_BUILTIN_UCOMILTSD,
14549 IX86_BUILTIN_UCOMILESD,
14550 IX86_BUILTIN_UCOMIGTSD,
14551 IX86_BUILTIN_UCOMIGESD,
14552 IX86_BUILTIN_UCOMINEQSD,
14554 IX86_BUILTIN_MAXPD,
14555 IX86_BUILTIN_MAXSD,
14556 IX86_BUILTIN_MINPD,
14557 IX86_BUILTIN_MINSD,
14559 IX86_BUILTIN_ANDPD,
14560 IX86_BUILTIN_ANDNPD,
14562 IX86_BUILTIN_XORPD,
14564 IX86_BUILTIN_SQRTPD,
14565 IX86_BUILTIN_SQRTSD,
14567 IX86_BUILTIN_UNPCKHPD,
14568 IX86_BUILTIN_UNPCKLPD,
14570 IX86_BUILTIN_SHUFPD,
14572 IX86_BUILTIN_LOADUPD,
14573 IX86_BUILTIN_STOREUPD,
14574 IX86_BUILTIN_MOVSD,
14576 IX86_BUILTIN_LOADHPD,
14577 IX86_BUILTIN_LOADLPD,
14579 IX86_BUILTIN_CVTDQ2PD,
14580 IX86_BUILTIN_CVTDQ2PS,
14582 IX86_BUILTIN_CVTPD2DQ,
14583 IX86_BUILTIN_CVTPD2PI,
14584 IX86_BUILTIN_CVTPD2PS,
14585 IX86_BUILTIN_CVTTPD2DQ,
14586 IX86_BUILTIN_CVTTPD2PI,
14588 IX86_BUILTIN_CVTPI2PD,
14589 IX86_BUILTIN_CVTSI2SD,
14590 IX86_BUILTIN_CVTSI642SD,
14592 IX86_BUILTIN_CVTSD2SI,
14593 IX86_BUILTIN_CVTSD2SI64,
14594 IX86_BUILTIN_CVTSD2SS,
14595 IX86_BUILTIN_CVTSS2SD,
14596 IX86_BUILTIN_CVTTSD2SI,
14597 IX86_BUILTIN_CVTTSD2SI64,
14599 IX86_BUILTIN_CVTPS2DQ,
14600 IX86_BUILTIN_CVTPS2PD,
14601 IX86_BUILTIN_CVTTPS2DQ,
14603 IX86_BUILTIN_MOVNTI,
14604 IX86_BUILTIN_MOVNTPD,
14605 IX86_BUILTIN_MOVNTDQ,
14608 IX86_BUILTIN_MASKMOVDQU,
14609 IX86_BUILTIN_MOVMSKPD,
14610 IX86_BUILTIN_PMOVMSKB128,
14612 IX86_BUILTIN_PACKSSWB128,
14613 IX86_BUILTIN_PACKSSDW128,
14614 IX86_BUILTIN_PACKUSWB128,
14616 IX86_BUILTIN_PADDB128,
14617 IX86_BUILTIN_PADDW128,
14618 IX86_BUILTIN_PADDD128,
14619 IX86_BUILTIN_PADDQ128,
14620 IX86_BUILTIN_PADDSB128,
14621 IX86_BUILTIN_PADDSW128,
14622 IX86_BUILTIN_PADDUSB128,
14623 IX86_BUILTIN_PADDUSW128,
14624 IX86_BUILTIN_PSUBB128,
14625 IX86_BUILTIN_PSUBW128,
14626 IX86_BUILTIN_PSUBD128,
14627 IX86_BUILTIN_PSUBQ128,
14628 IX86_BUILTIN_PSUBSB128,
14629 IX86_BUILTIN_PSUBSW128,
14630 IX86_BUILTIN_PSUBUSB128,
14631 IX86_BUILTIN_PSUBUSW128,
14633 IX86_BUILTIN_PAND128,
14634 IX86_BUILTIN_PANDN128,
14635 IX86_BUILTIN_POR128,
14636 IX86_BUILTIN_PXOR128,
14638 IX86_BUILTIN_PAVGB128,
14639 IX86_BUILTIN_PAVGW128,
14641 IX86_BUILTIN_PCMPEQB128,
14642 IX86_BUILTIN_PCMPEQW128,
14643 IX86_BUILTIN_PCMPEQD128,
14644 IX86_BUILTIN_PCMPGTB128,
14645 IX86_BUILTIN_PCMPGTW128,
14646 IX86_BUILTIN_PCMPGTD128,
14648 IX86_BUILTIN_PMADDWD128,
14650 IX86_BUILTIN_PMAXSW128,
14651 IX86_BUILTIN_PMAXUB128,
14652 IX86_BUILTIN_PMINSW128,
14653 IX86_BUILTIN_PMINUB128,
14655 IX86_BUILTIN_PMULUDQ,
14656 IX86_BUILTIN_PMULUDQ128,
14657 IX86_BUILTIN_PMULHUW128,
14658 IX86_BUILTIN_PMULHW128,
14659 IX86_BUILTIN_PMULLW128,
14661 IX86_BUILTIN_PSADBW128,
14662 IX86_BUILTIN_PSHUFHW,
14663 IX86_BUILTIN_PSHUFLW,
14664 IX86_BUILTIN_PSHUFD,
14666 IX86_BUILTIN_PSLLW128,
14667 IX86_BUILTIN_PSLLD128,
14668 IX86_BUILTIN_PSLLQ128,
14669 IX86_BUILTIN_PSRAW128,
14670 IX86_BUILTIN_PSRAD128,
14671 IX86_BUILTIN_PSRLW128,
14672 IX86_BUILTIN_PSRLD128,
14673 IX86_BUILTIN_PSRLQ128,
14674 IX86_BUILTIN_PSLLDQI128,
14675 IX86_BUILTIN_PSLLWI128,
14676 IX86_BUILTIN_PSLLDI128,
14677 IX86_BUILTIN_PSLLQI128,
14678 IX86_BUILTIN_PSRAWI128,
14679 IX86_BUILTIN_PSRADI128,
14680 IX86_BUILTIN_PSRLDQI128,
14681 IX86_BUILTIN_PSRLWI128,
14682 IX86_BUILTIN_PSRLDI128,
14683 IX86_BUILTIN_PSRLQI128,
14685 IX86_BUILTIN_PUNPCKHBW128,
14686 IX86_BUILTIN_PUNPCKHWD128,
14687 IX86_BUILTIN_PUNPCKHDQ128,
14688 IX86_BUILTIN_PUNPCKHQDQ128,
14689 IX86_BUILTIN_PUNPCKLBW128,
14690 IX86_BUILTIN_PUNPCKLWD128,
14691 IX86_BUILTIN_PUNPCKLDQ128,
14692 IX86_BUILTIN_PUNPCKLQDQ128,
14694 IX86_BUILTIN_CLFLUSH,
14695 IX86_BUILTIN_MFENCE,
14696 IX86_BUILTIN_LFENCE,
14698 /* Prescott New Instructions. */
14699 IX86_BUILTIN_ADDSUBPS,
14700 IX86_BUILTIN_HADDPS,
14701 IX86_BUILTIN_HSUBPS,
14702 IX86_BUILTIN_MOVSHDUP,
14703 IX86_BUILTIN_MOVSLDUP,
14704 IX86_BUILTIN_ADDSUBPD,
14705 IX86_BUILTIN_HADDPD,
14706 IX86_BUILTIN_HSUBPD,
14707 IX86_BUILTIN_LDDQU,
14709 IX86_BUILTIN_MONITOR,
14710 IX86_BUILTIN_MWAIT,
14713 IX86_BUILTIN_PHADDW,
14714 IX86_BUILTIN_PHADDD,
14715 IX86_BUILTIN_PHADDSW,
14716 IX86_BUILTIN_PHSUBW,
14717 IX86_BUILTIN_PHSUBD,
14718 IX86_BUILTIN_PHSUBSW,
14719 IX86_BUILTIN_PMADDUBSW,
14720 IX86_BUILTIN_PMULHRSW,
14721 IX86_BUILTIN_PSHUFB,
14722 IX86_BUILTIN_PSIGNB,
14723 IX86_BUILTIN_PSIGNW,
14724 IX86_BUILTIN_PSIGND,
14725 IX86_BUILTIN_PALIGNR,
14726 IX86_BUILTIN_PABSB,
14727 IX86_BUILTIN_PABSW,
14728 IX86_BUILTIN_PABSD,
14730 IX86_BUILTIN_PHADDW128,
14731 IX86_BUILTIN_PHADDD128,
14732 IX86_BUILTIN_PHADDSW128,
14733 IX86_BUILTIN_PHSUBW128,
14734 IX86_BUILTIN_PHSUBD128,
14735 IX86_BUILTIN_PHSUBSW128,
14736 IX86_BUILTIN_PMADDUBSW128,
14737 IX86_BUILTIN_PMULHRSW128,
14738 IX86_BUILTIN_PSHUFB128,
14739 IX86_BUILTIN_PSIGNB128,
14740 IX86_BUILTIN_PSIGNW128,
14741 IX86_BUILTIN_PSIGND128,
14742 IX86_BUILTIN_PALIGNR128,
14743 IX86_BUILTIN_PABSB128,
14744 IX86_BUILTIN_PABSW128,
14745 IX86_BUILTIN_PABSD128,
14747 IX86_BUILTIN_VEC_INIT_V2SI,
14748 IX86_BUILTIN_VEC_INIT_V4HI,
14749 IX86_BUILTIN_VEC_INIT_V8QI,
14750 IX86_BUILTIN_VEC_EXT_V2DF,
14751 IX86_BUILTIN_VEC_EXT_V2DI,
14752 IX86_BUILTIN_VEC_EXT_V4SF,
14753 IX86_BUILTIN_VEC_EXT_V4SI,
14754 IX86_BUILTIN_VEC_EXT_V8HI,
14755 IX86_BUILTIN_VEC_EXT_V16QI,
14756 IX86_BUILTIN_VEC_EXT_V2SI,
14757 IX86_BUILTIN_VEC_EXT_V4HI,
14758 IX86_BUILTIN_VEC_SET_V8HI,
14759 IX86_BUILTIN_VEC_SET_V4HI,
14764 #define def_builtin(MASK, NAME, TYPE, CODE) \
14766 if ((MASK) & target_flags \
14767 && (!((MASK) & MASK_64BIT) || TARGET_64BIT)) \
14768 lang_hooks.builtin_function ((NAME), (TYPE), (CODE), BUILT_IN_MD, \
14769 NULL, NULL_TREE); \
14772 /* Bits for builtin_description.flag. */
14774 /* Set when we don't support the comparison natively, and should
14775 swap_comparison in order to support it. */
14776 #define BUILTIN_DESC_SWAP_OPERANDS 1
14778 struct builtin_description
14780 const unsigned int mask;
14781 const enum insn_code icode;
14782 const char *const name;
14783 const enum ix86_builtins code;
14784 const enum rtx_code comparison;
14785 const unsigned int flag;
14788 static const struct builtin_description bdesc_comi[] =
14790 { MASK_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comieq", IX86_BUILTIN_COMIEQSS, UNEQ, 0 },
14791 { MASK_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comilt", IX86_BUILTIN_COMILTSS, UNLT, 0 },
14792 { MASK_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comile", IX86_BUILTIN_COMILESS, UNLE, 0 },
14793 { MASK_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comigt", IX86_BUILTIN_COMIGTSS, GT, 0 },
14794 { MASK_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comige", IX86_BUILTIN_COMIGESS, GE, 0 },
14795 { MASK_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comineq", IX86_BUILTIN_COMINEQSS, LTGT, 0 },
14796 { MASK_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomieq", IX86_BUILTIN_UCOMIEQSS, UNEQ, 0 },
14797 { MASK_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomilt", IX86_BUILTIN_UCOMILTSS, UNLT, 0 },
14798 { MASK_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomile", IX86_BUILTIN_UCOMILESS, UNLE, 0 },
14799 { MASK_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomigt", IX86_BUILTIN_UCOMIGTSS, GT, 0 },
14800 { MASK_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomige", IX86_BUILTIN_UCOMIGESS, GE, 0 },
14801 { MASK_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomineq", IX86_BUILTIN_UCOMINEQSS, LTGT, 0 },
14802 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdeq", IX86_BUILTIN_COMIEQSD, UNEQ, 0 },
14803 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdlt", IX86_BUILTIN_COMILTSD, UNLT, 0 },
14804 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdle", IX86_BUILTIN_COMILESD, UNLE, 0 },
14805 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdgt", IX86_BUILTIN_COMIGTSD, GT, 0 },
14806 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdge", IX86_BUILTIN_COMIGESD, GE, 0 },
14807 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdneq", IX86_BUILTIN_COMINEQSD, LTGT, 0 },
14808 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdeq", IX86_BUILTIN_UCOMIEQSD, UNEQ, 0 },
14809 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdlt", IX86_BUILTIN_UCOMILTSD, UNLT, 0 },
14810 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdle", IX86_BUILTIN_UCOMILESD, UNLE, 0 },
14811 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdgt", IX86_BUILTIN_UCOMIGTSD, GT, 0 },
14812 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdge", IX86_BUILTIN_UCOMIGESD, GE, 0 },
14813 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdneq", IX86_BUILTIN_UCOMINEQSD, LTGT, 0 },
14816 static const struct builtin_description bdesc_2arg[] =
14819 { MASK_SSE, CODE_FOR_addv4sf3, "__builtin_ia32_addps", IX86_BUILTIN_ADDPS, 0, 0 },
14820 { MASK_SSE, CODE_FOR_subv4sf3, "__builtin_ia32_subps", IX86_BUILTIN_SUBPS, 0, 0 },
14821 { MASK_SSE, CODE_FOR_mulv4sf3, "__builtin_ia32_mulps", IX86_BUILTIN_MULPS, 0, 0 },
14822 { MASK_SSE, CODE_FOR_divv4sf3, "__builtin_ia32_divps", IX86_BUILTIN_DIVPS, 0, 0 },
14823 { MASK_SSE, CODE_FOR_sse_vmaddv4sf3, "__builtin_ia32_addss", IX86_BUILTIN_ADDSS, 0, 0 },
14824 { MASK_SSE, CODE_FOR_sse_vmsubv4sf3, "__builtin_ia32_subss", IX86_BUILTIN_SUBSS, 0, 0 },
14825 { MASK_SSE, CODE_FOR_sse_vmmulv4sf3, "__builtin_ia32_mulss", IX86_BUILTIN_MULSS, 0, 0 },
14826 { MASK_SSE, CODE_FOR_sse_vmdivv4sf3, "__builtin_ia32_divss", IX86_BUILTIN_DIVSS, 0, 0 },
14828 { MASK_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpeqps", IX86_BUILTIN_CMPEQPS, EQ, 0 },
14829 { MASK_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpltps", IX86_BUILTIN_CMPLTPS, LT, 0 },
14830 { MASK_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpleps", IX86_BUILTIN_CMPLEPS, LE, 0 },
14831 { MASK_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpgtps", IX86_BUILTIN_CMPGTPS, LT,
14832 BUILTIN_DESC_SWAP_OPERANDS },
14833 { MASK_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpgeps", IX86_BUILTIN_CMPGEPS, LE,
14834 BUILTIN_DESC_SWAP_OPERANDS },
14835 { MASK_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpunordps", IX86_BUILTIN_CMPUNORDPS, UNORDERED, 0 },
14836 { MASK_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpneqps", IX86_BUILTIN_CMPNEQPS, NE, 0 },
14837 { MASK_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpnltps", IX86_BUILTIN_CMPNLTPS, UNGE, 0 },
14838 { MASK_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpnleps", IX86_BUILTIN_CMPNLEPS, UNGT, 0 },
14839 { MASK_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpngtps", IX86_BUILTIN_CMPNGTPS, UNGE,
14840 BUILTIN_DESC_SWAP_OPERANDS },
14841 { MASK_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpngeps", IX86_BUILTIN_CMPNGEPS, UNGT,
14842 BUILTIN_DESC_SWAP_OPERANDS },
14843 { MASK_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpordps", IX86_BUILTIN_CMPORDPS, ORDERED, 0 },
14844 { MASK_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpeqss", IX86_BUILTIN_CMPEQSS, EQ, 0 },
14845 { MASK_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpltss", IX86_BUILTIN_CMPLTSS, LT, 0 },
14846 { MASK_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpless", IX86_BUILTIN_CMPLESS, LE, 0 },
14847 { MASK_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpunordss", IX86_BUILTIN_CMPUNORDSS, UNORDERED, 0 },
14848 { MASK_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpneqss", IX86_BUILTIN_CMPNEQSS, NE, 0 },
14849 { MASK_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpnltss", IX86_BUILTIN_CMPNLTSS, UNGE, 0 },
14850 { MASK_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpnless", IX86_BUILTIN_CMPNLESS, UNGT, 0 },
14851 { MASK_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpngtss", IX86_BUILTIN_CMPNGTSS, UNGE,
14852 BUILTIN_DESC_SWAP_OPERANDS },
14853 { MASK_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpngess", IX86_BUILTIN_CMPNGESS, UNGT,
14854 BUILTIN_DESC_SWAP_OPERANDS },
14855 { MASK_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpordss", IX86_BUILTIN_CMPORDSS, ORDERED, 0 },
14857 { MASK_SSE, CODE_FOR_sminv4sf3, "__builtin_ia32_minps", IX86_BUILTIN_MINPS, 0, 0 },
14858 { MASK_SSE, CODE_FOR_smaxv4sf3, "__builtin_ia32_maxps", IX86_BUILTIN_MAXPS, 0, 0 },
14859 { MASK_SSE, CODE_FOR_sse_vmsminv4sf3, "__builtin_ia32_minss", IX86_BUILTIN_MINSS, 0, 0 },
14860 { MASK_SSE, CODE_FOR_sse_vmsmaxv4sf3, "__builtin_ia32_maxss", IX86_BUILTIN_MAXSS, 0, 0 },
14862 { MASK_SSE, CODE_FOR_andv4sf3, "__builtin_ia32_andps", IX86_BUILTIN_ANDPS, 0, 0 },
14863 { MASK_SSE, CODE_FOR_sse_nandv4sf3, "__builtin_ia32_andnps", IX86_BUILTIN_ANDNPS, 0, 0 },
14864 { MASK_SSE, CODE_FOR_iorv4sf3, "__builtin_ia32_orps", IX86_BUILTIN_ORPS, 0, 0 },
14865 { MASK_SSE, CODE_FOR_xorv4sf3, "__builtin_ia32_xorps", IX86_BUILTIN_XORPS, 0, 0 },
14867 { MASK_SSE, CODE_FOR_sse_movss, "__builtin_ia32_movss", IX86_BUILTIN_MOVSS, 0, 0 },
14868 { MASK_SSE, CODE_FOR_sse_movhlps, "__builtin_ia32_movhlps", IX86_BUILTIN_MOVHLPS, 0, 0 },
14869 { MASK_SSE, CODE_FOR_sse_movlhps, "__builtin_ia32_movlhps", IX86_BUILTIN_MOVLHPS, 0, 0 },
14870 { MASK_SSE, CODE_FOR_sse_unpckhps, "__builtin_ia32_unpckhps", IX86_BUILTIN_UNPCKHPS, 0, 0 },
14871 { MASK_SSE, CODE_FOR_sse_unpcklps, "__builtin_ia32_unpcklps", IX86_BUILTIN_UNPCKLPS, 0, 0 },
14874 { MASK_MMX, CODE_FOR_mmx_addv8qi3, "__builtin_ia32_paddb", IX86_BUILTIN_PADDB, 0, 0 },
14875 { MASK_MMX, CODE_FOR_mmx_addv4hi3, "__builtin_ia32_paddw", IX86_BUILTIN_PADDW, 0, 0 },
14876 { MASK_MMX, CODE_FOR_mmx_addv2si3, "__builtin_ia32_paddd", IX86_BUILTIN_PADDD, 0, 0 },
14877 { MASK_SSE2, CODE_FOR_mmx_adddi3, "__builtin_ia32_paddq", IX86_BUILTIN_PADDQ, 0, 0 },
14878 { MASK_MMX, CODE_FOR_mmx_subv8qi3, "__builtin_ia32_psubb", IX86_BUILTIN_PSUBB, 0, 0 },
14879 { MASK_MMX, CODE_FOR_mmx_subv4hi3, "__builtin_ia32_psubw", IX86_BUILTIN_PSUBW, 0, 0 },
14880 { MASK_MMX, CODE_FOR_mmx_subv2si3, "__builtin_ia32_psubd", IX86_BUILTIN_PSUBD, 0, 0 },
14881 { MASK_SSE2, CODE_FOR_mmx_subdi3, "__builtin_ia32_psubq", IX86_BUILTIN_PSUBQ, 0, 0 },
14883 { MASK_MMX, CODE_FOR_mmx_ssaddv8qi3, "__builtin_ia32_paddsb", IX86_BUILTIN_PADDSB, 0, 0 },
14884 { MASK_MMX, CODE_FOR_mmx_ssaddv4hi3, "__builtin_ia32_paddsw", IX86_BUILTIN_PADDSW, 0, 0 },
14885 { MASK_MMX, CODE_FOR_mmx_sssubv8qi3, "__builtin_ia32_psubsb", IX86_BUILTIN_PSUBSB, 0, 0 },
14886 { MASK_MMX, CODE_FOR_mmx_sssubv4hi3, "__builtin_ia32_psubsw", IX86_BUILTIN_PSUBSW, 0, 0 },
14887 { MASK_MMX, CODE_FOR_mmx_usaddv8qi3, "__builtin_ia32_paddusb", IX86_BUILTIN_PADDUSB, 0, 0 },
14888 { MASK_MMX, CODE_FOR_mmx_usaddv4hi3, "__builtin_ia32_paddusw", IX86_BUILTIN_PADDUSW, 0, 0 },
14889 { MASK_MMX, CODE_FOR_mmx_ussubv8qi3, "__builtin_ia32_psubusb", IX86_BUILTIN_PSUBUSB, 0, 0 },
14890 { MASK_MMX, CODE_FOR_mmx_ussubv4hi3, "__builtin_ia32_psubusw", IX86_BUILTIN_PSUBUSW, 0, 0 },
14892 { MASK_MMX, CODE_FOR_mmx_mulv4hi3, "__builtin_ia32_pmullw", IX86_BUILTIN_PMULLW, 0, 0 },
14893 { MASK_MMX, CODE_FOR_mmx_smulv4hi3_highpart, "__builtin_ia32_pmulhw", IX86_BUILTIN_PMULHW, 0, 0 },
14894 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_umulv4hi3_highpart, "__builtin_ia32_pmulhuw", IX86_BUILTIN_PMULHUW, 0, 0 },
14896 { MASK_MMX, CODE_FOR_mmx_andv2si3, "__builtin_ia32_pand", IX86_BUILTIN_PAND, 0, 0 },
14897 { MASK_MMX, CODE_FOR_mmx_nandv2si3, "__builtin_ia32_pandn", IX86_BUILTIN_PANDN, 0, 0 },
14898 { MASK_MMX, CODE_FOR_mmx_iorv2si3, "__builtin_ia32_por", IX86_BUILTIN_POR, 0, 0 },
14899 { MASK_MMX, CODE_FOR_mmx_xorv2si3, "__builtin_ia32_pxor", IX86_BUILTIN_PXOR, 0, 0 },
14901 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_uavgv8qi3, "__builtin_ia32_pavgb", IX86_BUILTIN_PAVGB, 0, 0 },
14902 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_uavgv4hi3, "__builtin_ia32_pavgw", IX86_BUILTIN_PAVGW, 0, 0 },
14904 { MASK_MMX, CODE_FOR_mmx_eqv8qi3, "__builtin_ia32_pcmpeqb", IX86_BUILTIN_PCMPEQB, 0, 0 },
14905 { MASK_MMX, CODE_FOR_mmx_eqv4hi3, "__builtin_ia32_pcmpeqw", IX86_BUILTIN_PCMPEQW, 0, 0 },
14906 { MASK_MMX, CODE_FOR_mmx_eqv2si3, "__builtin_ia32_pcmpeqd", IX86_BUILTIN_PCMPEQD, 0, 0 },
14907 { MASK_MMX, CODE_FOR_mmx_gtv8qi3, "__builtin_ia32_pcmpgtb", IX86_BUILTIN_PCMPGTB, 0, 0 },
14908 { MASK_MMX, CODE_FOR_mmx_gtv4hi3, "__builtin_ia32_pcmpgtw", IX86_BUILTIN_PCMPGTW, 0, 0 },
14909 { MASK_MMX, CODE_FOR_mmx_gtv2si3, "__builtin_ia32_pcmpgtd", IX86_BUILTIN_PCMPGTD, 0, 0 },
14911 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_umaxv8qi3, "__builtin_ia32_pmaxub", IX86_BUILTIN_PMAXUB, 0, 0 },
14912 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_smaxv4hi3, "__builtin_ia32_pmaxsw", IX86_BUILTIN_PMAXSW, 0, 0 },
14913 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_uminv8qi3, "__builtin_ia32_pminub", IX86_BUILTIN_PMINUB, 0, 0 },
14914 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_sminv4hi3, "__builtin_ia32_pminsw", IX86_BUILTIN_PMINSW, 0, 0 },
14916 { MASK_MMX, CODE_FOR_mmx_punpckhbw, "__builtin_ia32_punpckhbw", IX86_BUILTIN_PUNPCKHBW, 0, 0 },
14917 { MASK_MMX, CODE_FOR_mmx_punpckhwd, "__builtin_ia32_punpckhwd", IX86_BUILTIN_PUNPCKHWD, 0, 0 },
14918 { MASK_MMX, CODE_FOR_mmx_punpckhdq, "__builtin_ia32_punpckhdq", IX86_BUILTIN_PUNPCKHDQ, 0, 0 },
14919 { MASK_MMX, CODE_FOR_mmx_punpcklbw, "__builtin_ia32_punpcklbw", IX86_BUILTIN_PUNPCKLBW, 0, 0 },
14920 { MASK_MMX, CODE_FOR_mmx_punpcklwd, "__builtin_ia32_punpcklwd", IX86_BUILTIN_PUNPCKLWD, 0, 0 },
14921 { MASK_MMX, CODE_FOR_mmx_punpckldq, "__builtin_ia32_punpckldq", IX86_BUILTIN_PUNPCKLDQ, 0, 0 },
14924 { MASK_MMX, CODE_FOR_mmx_packsswb, 0, IX86_BUILTIN_PACKSSWB, 0, 0 },
14925 { MASK_MMX, CODE_FOR_mmx_packssdw, 0, IX86_BUILTIN_PACKSSDW, 0, 0 },
14926 { MASK_MMX, CODE_FOR_mmx_packuswb, 0, IX86_BUILTIN_PACKUSWB, 0, 0 },
14928 { MASK_SSE, CODE_FOR_sse_cvtpi2ps, 0, IX86_BUILTIN_CVTPI2PS, 0, 0 },
14929 { MASK_SSE, CODE_FOR_sse_cvtsi2ss, 0, IX86_BUILTIN_CVTSI2SS, 0, 0 },
14930 { MASK_SSE | MASK_64BIT, CODE_FOR_sse_cvtsi2ssq, 0, IX86_BUILTIN_CVTSI642SS, 0, 0 },
14932 { MASK_MMX, CODE_FOR_mmx_ashlv4hi3, 0, IX86_BUILTIN_PSLLW, 0, 0 },
14933 { MASK_MMX, CODE_FOR_mmx_ashlv4hi3, 0, IX86_BUILTIN_PSLLWI, 0, 0 },
14934 { MASK_MMX, CODE_FOR_mmx_ashlv2si3, 0, IX86_BUILTIN_PSLLD, 0, 0 },
14935 { MASK_MMX, CODE_FOR_mmx_ashlv2si3, 0, IX86_BUILTIN_PSLLDI, 0, 0 },
14936 { MASK_MMX, CODE_FOR_mmx_ashldi3, 0, IX86_BUILTIN_PSLLQ, 0, 0 },
14937 { MASK_MMX, CODE_FOR_mmx_ashldi3, 0, IX86_BUILTIN_PSLLQI, 0, 0 },
14939 { MASK_MMX, CODE_FOR_mmx_lshrv4hi3, 0, IX86_BUILTIN_PSRLW, 0, 0 },
14940 { MASK_MMX, CODE_FOR_mmx_lshrv4hi3, 0, IX86_BUILTIN_PSRLWI, 0, 0 },
14941 { MASK_MMX, CODE_FOR_mmx_lshrv2si3, 0, IX86_BUILTIN_PSRLD, 0, 0 },
14942 { MASK_MMX, CODE_FOR_mmx_lshrv2si3, 0, IX86_BUILTIN_PSRLDI, 0, 0 },
14943 { MASK_MMX, CODE_FOR_mmx_lshrdi3, 0, IX86_BUILTIN_PSRLQ, 0, 0 },
14944 { MASK_MMX, CODE_FOR_mmx_lshrdi3, 0, IX86_BUILTIN_PSRLQI, 0, 0 },
14946 { MASK_MMX, CODE_FOR_mmx_ashrv4hi3, 0, IX86_BUILTIN_PSRAW, 0, 0 },
14947 { MASK_MMX, CODE_FOR_mmx_ashrv4hi3, 0, IX86_BUILTIN_PSRAWI, 0, 0 },
14948 { MASK_MMX, CODE_FOR_mmx_ashrv2si3, 0, IX86_BUILTIN_PSRAD, 0, 0 },
14949 { MASK_MMX, CODE_FOR_mmx_ashrv2si3, 0, IX86_BUILTIN_PSRADI, 0, 0 },
14951 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_psadbw, 0, IX86_BUILTIN_PSADBW, 0, 0 },
14952 { MASK_MMX, CODE_FOR_mmx_pmaddwd, 0, IX86_BUILTIN_PMADDWD, 0, 0 },
14955 { MASK_SSE2, CODE_FOR_addv2df3, "__builtin_ia32_addpd", IX86_BUILTIN_ADDPD, 0, 0 },
14956 { MASK_SSE2, CODE_FOR_subv2df3, "__builtin_ia32_subpd", IX86_BUILTIN_SUBPD, 0, 0 },
14957 { MASK_SSE2, CODE_FOR_mulv2df3, "__builtin_ia32_mulpd", IX86_BUILTIN_MULPD, 0, 0 },
14958 { MASK_SSE2, CODE_FOR_divv2df3, "__builtin_ia32_divpd", IX86_BUILTIN_DIVPD, 0, 0 },
14959 { MASK_SSE2, CODE_FOR_sse2_vmaddv2df3, "__builtin_ia32_addsd", IX86_BUILTIN_ADDSD, 0, 0 },
14960 { MASK_SSE2, CODE_FOR_sse2_vmsubv2df3, "__builtin_ia32_subsd", IX86_BUILTIN_SUBSD, 0, 0 },
14961 { MASK_SSE2, CODE_FOR_sse2_vmmulv2df3, "__builtin_ia32_mulsd", IX86_BUILTIN_MULSD, 0, 0 },
14962 { MASK_SSE2, CODE_FOR_sse2_vmdivv2df3, "__builtin_ia32_divsd", IX86_BUILTIN_DIVSD, 0, 0 },
14964 { MASK_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpeqpd", IX86_BUILTIN_CMPEQPD, EQ, 0 },
14965 { MASK_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpltpd", IX86_BUILTIN_CMPLTPD, LT, 0 },
14966 { MASK_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmplepd", IX86_BUILTIN_CMPLEPD, LE, 0 },
14967 { MASK_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpgtpd", IX86_BUILTIN_CMPGTPD, LT,
14968 BUILTIN_DESC_SWAP_OPERANDS },
14969 { MASK_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpgepd", IX86_BUILTIN_CMPGEPD, LE,
14970 BUILTIN_DESC_SWAP_OPERANDS },
14971 { MASK_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpunordpd", IX86_BUILTIN_CMPUNORDPD, UNORDERED, 0 },
14972 { MASK_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpneqpd", IX86_BUILTIN_CMPNEQPD, NE, 0 },
14973 { MASK_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpnltpd", IX86_BUILTIN_CMPNLTPD, UNGE, 0 },
14974 { MASK_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpnlepd", IX86_BUILTIN_CMPNLEPD, UNGT, 0 },
14975 { MASK_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpngtpd", IX86_BUILTIN_CMPNGTPD, UNGE,
14976 BUILTIN_DESC_SWAP_OPERANDS },
14977 { MASK_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpngepd", IX86_BUILTIN_CMPNGEPD, UNGT,
14978 BUILTIN_DESC_SWAP_OPERANDS },
14979 { MASK_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpordpd", IX86_BUILTIN_CMPORDPD, ORDERED, 0 },
14980 { MASK_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpeqsd", IX86_BUILTIN_CMPEQSD, EQ, 0 },
14981 { MASK_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpltsd", IX86_BUILTIN_CMPLTSD, LT, 0 },
14982 { MASK_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmplesd", IX86_BUILTIN_CMPLESD, LE, 0 },
14983 { MASK_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpunordsd", IX86_BUILTIN_CMPUNORDSD, UNORDERED, 0 },
14984 { MASK_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpneqsd", IX86_BUILTIN_CMPNEQSD, NE, 0 },
14985 { MASK_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpnltsd", IX86_BUILTIN_CMPNLTSD, UNGE, 0 },
14986 { MASK_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpnlesd", IX86_BUILTIN_CMPNLESD, UNGT, 0 },
14987 { MASK_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpordsd", IX86_BUILTIN_CMPORDSD, ORDERED, 0 },
14989 { MASK_SSE2, CODE_FOR_sminv2df3, "__builtin_ia32_minpd", IX86_BUILTIN_MINPD, 0, 0 },
14990 { MASK_SSE2, CODE_FOR_smaxv2df3, "__builtin_ia32_maxpd", IX86_BUILTIN_MAXPD, 0, 0 },
14991 { MASK_SSE2, CODE_FOR_sse2_vmsminv2df3, "__builtin_ia32_minsd", IX86_BUILTIN_MINSD, 0, 0 },
14992 { MASK_SSE2, CODE_FOR_sse2_vmsmaxv2df3, "__builtin_ia32_maxsd", IX86_BUILTIN_MAXSD, 0, 0 },
14994 { MASK_SSE2, CODE_FOR_andv2df3, "__builtin_ia32_andpd", IX86_BUILTIN_ANDPD, 0, 0 },
14995 { MASK_SSE2, CODE_FOR_sse2_nandv2df3, "__builtin_ia32_andnpd", IX86_BUILTIN_ANDNPD, 0, 0 },
14996 { MASK_SSE2, CODE_FOR_iorv2df3, "__builtin_ia32_orpd", IX86_BUILTIN_ORPD, 0, 0 },
14997 { MASK_SSE2, CODE_FOR_xorv2df3, "__builtin_ia32_xorpd", IX86_BUILTIN_XORPD, 0, 0 },
14999 { MASK_SSE2, CODE_FOR_sse2_movsd, "__builtin_ia32_movsd", IX86_BUILTIN_MOVSD, 0, 0 },
15000 { MASK_SSE2, CODE_FOR_sse2_unpckhpd, "__builtin_ia32_unpckhpd", IX86_BUILTIN_UNPCKHPD, 0, 0 },
15001 { MASK_SSE2, CODE_FOR_sse2_unpcklpd, "__builtin_ia32_unpcklpd", IX86_BUILTIN_UNPCKLPD, 0, 0 },
15004 { MASK_SSE2, CODE_FOR_addv16qi3, "__builtin_ia32_paddb128", IX86_BUILTIN_PADDB128, 0, 0 },
15005 { MASK_SSE2, CODE_FOR_addv8hi3, "__builtin_ia32_paddw128", IX86_BUILTIN_PADDW128, 0, 0 },
15006 { MASK_SSE2, CODE_FOR_addv4si3, "__builtin_ia32_paddd128", IX86_BUILTIN_PADDD128, 0, 0 },
15007 { MASK_SSE2, CODE_FOR_addv2di3, "__builtin_ia32_paddq128", IX86_BUILTIN_PADDQ128, 0, 0 },
15008 { MASK_SSE2, CODE_FOR_subv16qi3, "__builtin_ia32_psubb128", IX86_BUILTIN_PSUBB128, 0, 0 },
15009 { MASK_SSE2, CODE_FOR_subv8hi3, "__builtin_ia32_psubw128", IX86_BUILTIN_PSUBW128, 0, 0 },
15010 { MASK_SSE2, CODE_FOR_subv4si3, "__builtin_ia32_psubd128", IX86_BUILTIN_PSUBD128, 0, 0 },
15011 { MASK_SSE2, CODE_FOR_subv2di3, "__builtin_ia32_psubq128", IX86_BUILTIN_PSUBQ128, 0, 0 },
15013 { MASK_MMX, CODE_FOR_sse2_ssaddv16qi3, "__builtin_ia32_paddsb128", IX86_BUILTIN_PADDSB128, 0, 0 },
15014 { MASK_MMX, CODE_FOR_sse2_ssaddv8hi3, "__builtin_ia32_paddsw128", IX86_BUILTIN_PADDSW128, 0, 0 },
15015 { MASK_MMX, CODE_FOR_sse2_sssubv16qi3, "__builtin_ia32_psubsb128", IX86_BUILTIN_PSUBSB128, 0, 0 },
15016 { MASK_MMX, CODE_FOR_sse2_sssubv8hi3, "__builtin_ia32_psubsw128", IX86_BUILTIN_PSUBSW128, 0, 0 },
15017 { MASK_MMX, CODE_FOR_sse2_usaddv16qi3, "__builtin_ia32_paddusb128", IX86_BUILTIN_PADDUSB128, 0, 0 },
15018 { MASK_MMX, CODE_FOR_sse2_usaddv8hi3, "__builtin_ia32_paddusw128", IX86_BUILTIN_PADDUSW128, 0, 0 },
15019 { MASK_MMX, CODE_FOR_sse2_ussubv16qi3, "__builtin_ia32_psubusb128", IX86_BUILTIN_PSUBUSB128, 0, 0 },
15020 { MASK_MMX, CODE_FOR_sse2_ussubv8hi3, "__builtin_ia32_psubusw128", IX86_BUILTIN_PSUBUSW128, 0, 0 },
15022 { MASK_SSE2, CODE_FOR_mulv8hi3, "__builtin_ia32_pmullw128", IX86_BUILTIN_PMULLW128, 0, 0 },
15023 { MASK_SSE2, CODE_FOR_sse2_smulv8hi3_highpart, "__builtin_ia32_pmulhw128", IX86_BUILTIN_PMULHW128, 0, 0 },
15025 { MASK_SSE2, CODE_FOR_andv2di3, "__builtin_ia32_pand128", IX86_BUILTIN_PAND128, 0, 0 },
15026 { MASK_SSE2, CODE_FOR_sse2_nandv2di3, "__builtin_ia32_pandn128", IX86_BUILTIN_PANDN128, 0, 0 },
15027 { MASK_SSE2, CODE_FOR_iorv2di3, "__builtin_ia32_por128", IX86_BUILTIN_POR128, 0, 0 },
15028 { MASK_SSE2, CODE_FOR_xorv2di3, "__builtin_ia32_pxor128", IX86_BUILTIN_PXOR128, 0, 0 },
15030 { MASK_SSE2, CODE_FOR_sse2_uavgv16qi3, "__builtin_ia32_pavgb128", IX86_BUILTIN_PAVGB128, 0, 0 },
15031 { MASK_SSE2, CODE_FOR_sse2_uavgv8hi3, "__builtin_ia32_pavgw128", IX86_BUILTIN_PAVGW128, 0, 0 },
15033 { MASK_SSE2, CODE_FOR_sse2_eqv16qi3, "__builtin_ia32_pcmpeqb128", IX86_BUILTIN_PCMPEQB128, 0, 0 },
15034 { MASK_SSE2, CODE_FOR_sse2_eqv8hi3, "__builtin_ia32_pcmpeqw128", IX86_BUILTIN_PCMPEQW128, 0, 0 },
15035 { MASK_SSE2, CODE_FOR_sse2_eqv4si3, "__builtin_ia32_pcmpeqd128", IX86_BUILTIN_PCMPEQD128, 0, 0 },
15036 { MASK_SSE2, CODE_FOR_sse2_gtv16qi3, "__builtin_ia32_pcmpgtb128", IX86_BUILTIN_PCMPGTB128, 0, 0 },
15037 { MASK_SSE2, CODE_FOR_sse2_gtv8hi3, "__builtin_ia32_pcmpgtw128", IX86_BUILTIN_PCMPGTW128, 0, 0 },
15038 { MASK_SSE2, CODE_FOR_sse2_gtv4si3, "__builtin_ia32_pcmpgtd128", IX86_BUILTIN_PCMPGTD128, 0, 0 },
15040 { MASK_SSE2, CODE_FOR_umaxv16qi3, "__builtin_ia32_pmaxub128", IX86_BUILTIN_PMAXUB128, 0, 0 },
15041 { MASK_SSE2, CODE_FOR_smaxv8hi3, "__builtin_ia32_pmaxsw128", IX86_BUILTIN_PMAXSW128, 0, 0 },
15042 { MASK_SSE2, CODE_FOR_uminv16qi3, "__builtin_ia32_pminub128", IX86_BUILTIN_PMINUB128, 0, 0 },
15043 { MASK_SSE2, CODE_FOR_sminv8hi3, "__builtin_ia32_pminsw128", IX86_BUILTIN_PMINSW128, 0, 0 },
15045 { MASK_SSE2, CODE_FOR_sse2_punpckhbw, "__builtin_ia32_punpckhbw128", IX86_BUILTIN_PUNPCKHBW128, 0, 0 },
15046 { MASK_SSE2, CODE_FOR_sse2_punpckhwd, "__builtin_ia32_punpckhwd128", IX86_BUILTIN_PUNPCKHWD128, 0, 0 },
15047 { MASK_SSE2, CODE_FOR_sse2_punpckhdq, "__builtin_ia32_punpckhdq128", IX86_BUILTIN_PUNPCKHDQ128, 0, 0 },
15048 { MASK_SSE2, CODE_FOR_sse2_punpckhqdq, "__builtin_ia32_punpckhqdq128", IX86_BUILTIN_PUNPCKHQDQ128, 0, 0 },
15049 { MASK_SSE2, CODE_FOR_sse2_punpcklbw, "__builtin_ia32_punpcklbw128", IX86_BUILTIN_PUNPCKLBW128, 0, 0 },
15050 { MASK_SSE2, CODE_FOR_sse2_punpcklwd, "__builtin_ia32_punpcklwd128", IX86_BUILTIN_PUNPCKLWD128, 0, 0 },
15051 { MASK_SSE2, CODE_FOR_sse2_punpckldq, "__builtin_ia32_punpckldq128", IX86_BUILTIN_PUNPCKLDQ128, 0, 0 },
15052 { MASK_SSE2, CODE_FOR_sse2_punpcklqdq, "__builtin_ia32_punpcklqdq128", IX86_BUILTIN_PUNPCKLQDQ128, 0, 0 },
15054 { MASK_SSE2, CODE_FOR_sse2_packsswb, "__builtin_ia32_packsswb128", IX86_BUILTIN_PACKSSWB128, 0, 0 },
15055 { MASK_SSE2, CODE_FOR_sse2_packssdw, "__builtin_ia32_packssdw128", IX86_BUILTIN_PACKSSDW128, 0, 0 },
15056 { MASK_SSE2, CODE_FOR_sse2_packuswb, "__builtin_ia32_packuswb128", IX86_BUILTIN_PACKUSWB128, 0, 0 },
15058 { MASK_SSE2, CODE_FOR_sse2_umulv8hi3_highpart, "__builtin_ia32_pmulhuw128", IX86_BUILTIN_PMULHUW128, 0, 0 },
15059 { MASK_SSE2, CODE_FOR_sse2_psadbw, 0, IX86_BUILTIN_PSADBW128, 0, 0 },
15061 { MASK_SSE2, CODE_FOR_sse2_umulsidi3, 0, IX86_BUILTIN_PMULUDQ, 0, 0 },
15062 { MASK_SSE2, CODE_FOR_sse2_umulv2siv2di3, 0, IX86_BUILTIN_PMULUDQ128, 0, 0 },
15064 { MASK_SSE2, CODE_FOR_ashlv8hi3, 0, IX86_BUILTIN_PSLLWI128, 0, 0 },
15065 { MASK_SSE2, CODE_FOR_ashlv4si3, 0, IX86_BUILTIN_PSLLDI128, 0, 0 },
15066 { MASK_SSE2, CODE_FOR_ashlv2di3, 0, IX86_BUILTIN_PSLLQI128, 0, 0 },
15068 { MASK_SSE2, CODE_FOR_lshrv8hi3, 0, IX86_BUILTIN_PSRLWI128, 0, 0 },
15069 { MASK_SSE2, CODE_FOR_lshrv4si3, 0, IX86_BUILTIN_PSRLDI128, 0, 0 },
15070 { MASK_SSE2, CODE_FOR_lshrv2di3, 0, IX86_BUILTIN_PSRLQI128, 0, 0 },
15072 { MASK_SSE2, CODE_FOR_ashrv8hi3, 0, IX86_BUILTIN_PSRAWI128, 0, 0 },
15073 { MASK_SSE2, CODE_FOR_ashrv4si3, 0, IX86_BUILTIN_PSRADI128, 0, 0 },
15075 { MASK_SSE2, CODE_FOR_sse2_pmaddwd, 0, IX86_BUILTIN_PMADDWD128, 0, 0 },
15077 { MASK_SSE2, CODE_FOR_sse2_cvtsi2sd, 0, IX86_BUILTIN_CVTSI2SD, 0, 0 },
15078 { MASK_SSE2 | MASK_64BIT, CODE_FOR_sse2_cvtsi2sdq, 0, IX86_BUILTIN_CVTSI642SD, 0, 0 },
15079 { MASK_SSE2, CODE_FOR_sse2_cvtsd2ss, 0, IX86_BUILTIN_CVTSD2SS, 0, 0 },
15080 { MASK_SSE2, CODE_FOR_sse2_cvtss2sd, 0, IX86_BUILTIN_CVTSS2SD, 0, 0 },
15083 { MASK_SSE3, CODE_FOR_sse3_addsubv4sf3, "__builtin_ia32_addsubps", IX86_BUILTIN_ADDSUBPS, 0, 0 },
15084 { MASK_SSE3, CODE_FOR_sse3_addsubv2df3, "__builtin_ia32_addsubpd", IX86_BUILTIN_ADDSUBPD, 0, 0 },
15085 { MASK_SSE3, CODE_FOR_sse3_haddv4sf3, "__builtin_ia32_haddps", IX86_BUILTIN_HADDPS, 0, 0 },
15086 { MASK_SSE3, CODE_FOR_sse3_haddv2df3, "__builtin_ia32_haddpd", IX86_BUILTIN_HADDPD, 0, 0 },
15087 { MASK_SSE3, CODE_FOR_sse3_hsubv4sf3, "__builtin_ia32_hsubps", IX86_BUILTIN_HSUBPS, 0, 0 },
15088 { MASK_SSE3, CODE_FOR_sse3_hsubv2df3, "__builtin_ia32_hsubpd", IX86_BUILTIN_HSUBPD, 0, 0 },
15091 { MASK_SSSE3, CODE_FOR_ssse3_phaddwv8hi3, "__builtin_ia32_phaddw128", IX86_BUILTIN_PHADDW128, 0, 0 },
15092 { MASK_SSSE3, CODE_FOR_ssse3_phaddwv4hi3, "__builtin_ia32_phaddw", IX86_BUILTIN_PHADDW, 0, 0 },
15093 { MASK_SSSE3, CODE_FOR_ssse3_phadddv4si3, "__builtin_ia32_phaddd128", IX86_BUILTIN_PHADDD128, 0, 0 },
15094 { MASK_SSSE3, CODE_FOR_ssse3_phadddv2si3, "__builtin_ia32_phaddd", IX86_BUILTIN_PHADDD, 0, 0 },
15095 { MASK_SSSE3, CODE_FOR_ssse3_phaddswv8hi3, "__builtin_ia32_phaddsw128", IX86_BUILTIN_PHADDSW128, 0, 0 },
15096 { MASK_SSSE3, CODE_FOR_ssse3_phaddswv4hi3, "__builtin_ia32_phaddsw", IX86_BUILTIN_PHADDSW, 0, 0 },
15097 { MASK_SSSE3, CODE_FOR_ssse3_phsubwv8hi3, "__builtin_ia32_phsubw128", IX86_BUILTIN_PHSUBW128, 0, 0 },
15098 { MASK_SSSE3, CODE_FOR_ssse3_phsubwv4hi3, "__builtin_ia32_phsubw", IX86_BUILTIN_PHSUBW, 0, 0 },
15099 { MASK_SSSE3, CODE_FOR_ssse3_phsubdv4si3, "__builtin_ia32_phsubd128", IX86_BUILTIN_PHSUBD128, 0, 0 },
15100 { MASK_SSSE3, CODE_FOR_ssse3_phsubdv2si3, "__builtin_ia32_phsubd", IX86_BUILTIN_PHSUBD, 0, 0 },
15101 { MASK_SSSE3, CODE_FOR_ssse3_phsubswv8hi3, "__builtin_ia32_phsubsw128", IX86_BUILTIN_PHSUBSW128, 0, 0 },
15102 { MASK_SSSE3, CODE_FOR_ssse3_phsubswv4hi3, "__builtin_ia32_phsubsw", IX86_BUILTIN_PHSUBSW, 0, 0 },
15103 { MASK_SSSE3, CODE_FOR_ssse3_pmaddubswv8hi3, "__builtin_ia32_pmaddubsw128", IX86_BUILTIN_PMADDUBSW128, 0, 0 },
15104 { MASK_SSSE3, CODE_FOR_ssse3_pmaddubswv4hi3, "__builtin_ia32_pmaddubsw", IX86_BUILTIN_PMADDUBSW, 0, 0 },
15105 { MASK_SSSE3, CODE_FOR_ssse3_pmulhrswv8hi3, "__builtin_ia32_pmulhrsw128", IX86_BUILTIN_PMULHRSW128, 0, 0 },
15106 { MASK_SSSE3, CODE_FOR_ssse3_pmulhrswv4hi3, "__builtin_ia32_pmulhrsw", IX86_BUILTIN_PMULHRSW, 0, 0 },
15107 { MASK_SSSE3, CODE_FOR_ssse3_pshufbv16qi3, "__builtin_ia32_pshufb128", IX86_BUILTIN_PSHUFB128, 0, 0 },
15108 { MASK_SSSE3, CODE_FOR_ssse3_pshufbv8qi3, "__builtin_ia32_pshufb", IX86_BUILTIN_PSHUFB, 0, 0 },
15109 { MASK_SSSE3, CODE_FOR_ssse3_psignv16qi3, "__builtin_ia32_psignb128", IX86_BUILTIN_PSIGNB128, 0, 0 },
15110 { MASK_SSSE3, CODE_FOR_ssse3_psignv8qi3, "__builtin_ia32_psignb", IX86_BUILTIN_PSIGNB, 0, 0 },
15111 { MASK_SSSE3, CODE_FOR_ssse3_psignv8hi3, "__builtin_ia32_psignw128", IX86_BUILTIN_PSIGNW128, 0, 0 },
15112 { MASK_SSSE3, CODE_FOR_ssse3_psignv4hi3, "__builtin_ia32_psignw", IX86_BUILTIN_PSIGNW, 0, 0 },
15113 { MASK_SSSE3, CODE_FOR_ssse3_psignv4si3, "__builtin_ia32_psignd128", IX86_BUILTIN_PSIGND128, 0, 0 },
15114 { MASK_SSSE3, CODE_FOR_ssse3_psignv2si3, "__builtin_ia32_psignd", IX86_BUILTIN_PSIGND, 0, 0 }
15117 static const struct builtin_description bdesc_1arg[] =
15119 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_pmovmskb, 0, IX86_BUILTIN_PMOVMSKB, 0, 0 },
15120 { MASK_SSE, CODE_FOR_sse_movmskps, 0, IX86_BUILTIN_MOVMSKPS, 0, 0 },
15122 { MASK_SSE, CODE_FOR_sqrtv4sf2, 0, IX86_BUILTIN_SQRTPS, 0, 0 },
15123 { MASK_SSE, CODE_FOR_sse_rsqrtv4sf2, 0, IX86_BUILTIN_RSQRTPS, 0, 0 },
15124 { MASK_SSE, CODE_FOR_sse_rcpv4sf2, 0, IX86_BUILTIN_RCPPS, 0, 0 },
15126 { MASK_SSE, CODE_FOR_sse_cvtps2pi, 0, IX86_BUILTIN_CVTPS2PI, 0, 0 },
15127 { MASK_SSE, CODE_FOR_sse_cvtss2si, 0, IX86_BUILTIN_CVTSS2SI, 0, 0 },
15128 { MASK_SSE | MASK_64BIT, CODE_FOR_sse_cvtss2siq, 0, IX86_BUILTIN_CVTSS2SI64, 0, 0 },
15129 { MASK_SSE, CODE_FOR_sse_cvttps2pi, 0, IX86_BUILTIN_CVTTPS2PI, 0, 0 },
15130 { MASK_SSE, CODE_FOR_sse_cvttss2si, 0, IX86_BUILTIN_CVTTSS2SI, 0, 0 },
15131 { MASK_SSE | MASK_64BIT, CODE_FOR_sse_cvttss2siq, 0, IX86_BUILTIN_CVTTSS2SI64, 0, 0 },
15133 { MASK_SSE2, CODE_FOR_sse2_pmovmskb, 0, IX86_BUILTIN_PMOVMSKB128, 0, 0 },
15134 { MASK_SSE2, CODE_FOR_sse2_movmskpd, 0, IX86_BUILTIN_MOVMSKPD, 0, 0 },
15136 { MASK_SSE2, CODE_FOR_sqrtv2df2, 0, IX86_BUILTIN_SQRTPD, 0, 0 },
15138 { MASK_SSE2, CODE_FOR_sse2_cvtdq2pd, 0, IX86_BUILTIN_CVTDQ2PD, 0, 0 },
15139 { MASK_SSE2, CODE_FOR_sse2_cvtdq2ps, 0, IX86_BUILTIN_CVTDQ2PS, 0, 0 },
15141 { MASK_SSE2, CODE_FOR_sse2_cvtpd2dq, 0, IX86_BUILTIN_CVTPD2DQ, 0, 0 },
15142 { MASK_SSE2, CODE_FOR_sse2_cvtpd2pi, 0, IX86_BUILTIN_CVTPD2PI, 0, 0 },
15143 { MASK_SSE2, CODE_FOR_sse2_cvtpd2ps, 0, IX86_BUILTIN_CVTPD2PS, 0, 0 },
15144 { MASK_SSE2, CODE_FOR_sse2_cvttpd2dq, 0, IX86_BUILTIN_CVTTPD2DQ, 0, 0 },
15145 { MASK_SSE2, CODE_FOR_sse2_cvttpd2pi, 0, IX86_BUILTIN_CVTTPD2PI, 0, 0 },
15147 { MASK_SSE2, CODE_FOR_sse2_cvtpi2pd, 0, IX86_BUILTIN_CVTPI2PD, 0, 0 },
15149 { MASK_SSE2, CODE_FOR_sse2_cvtsd2si, 0, IX86_BUILTIN_CVTSD2SI, 0, 0 },
15150 { MASK_SSE2, CODE_FOR_sse2_cvttsd2si, 0, IX86_BUILTIN_CVTTSD2SI, 0, 0 },
15151 { MASK_SSE2 | MASK_64BIT, CODE_FOR_sse2_cvtsd2siq, 0, IX86_BUILTIN_CVTSD2SI64, 0, 0 },
15152 { MASK_SSE2 | MASK_64BIT, CODE_FOR_sse2_cvttsd2siq, 0, IX86_BUILTIN_CVTTSD2SI64, 0, 0 },
15154 { MASK_SSE2, CODE_FOR_sse2_cvtps2dq, 0, IX86_BUILTIN_CVTPS2DQ, 0, 0 },
15155 { MASK_SSE2, CODE_FOR_sse2_cvtps2pd, 0, IX86_BUILTIN_CVTPS2PD, 0, 0 },
15156 { MASK_SSE2, CODE_FOR_sse2_cvttps2dq, 0, IX86_BUILTIN_CVTTPS2DQ, 0, 0 },
15159 { MASK_SSE3, CODE_FOR_sse3_movshdup, 0, IX86_BUILTIN_MOVSHDUP, 0, 0 },
15160 { MASK_SSE3, CODE_FOR_sse3_movsldup, 0, IX86_BUILTIN_MOVSLDUP, 0, 0 },
15163 { MASK_SSSE3, CODE_FOR_absv16qi2, "__builtin_ia32_pabsb128", IX86_BUILTIN_PABSB128, 0, 0 },
15164 { MASK_SSSE3, CODE_FOR_absv8qi2, "__builtin_ia32_pabsb", IX86_BUILTIN_PABSB, 0, 0 },
15165 { MASK_SSSE3, CODE_FOR_absv8hi2, "__builtin_ia32_pabsw128", IX86_BUILTIN_PABSW128, 0, 0 },
15166 { MASK_SSSE3, CODE_FOR_absv4hi2, "__builtin_ia32_pabsw", IX86_BUILTIN_PABSW, 0, 0 },
15167 { MASK_SSSE3, CODE_FOR_absv4si2, "__builtin_ia32_pabsd128", IX86_BUILTIN_PABSD128, 0, 0 },
15168 { MASK_SSSE3, CODE_FOR_absv2si2, "__builtin_ia32_pabsd", IX86_BUILTIN_PABSD, 0, 0 },
15172 ix86_init_builtins (void)
15175 ix86_init_mmx_sse_builtins ();
15178 /* Set up all the MMX/SSE builtins. This is not called if TARGET_MMX
15179 is zero. Otherwise, if TARGET_SSE is not set, only expand the MMX
15182 ix86_init_mmx_sse_builtins (void)
15184 const struct builtin_description * d;
15187 tree V16QI_type_node = build_vector_type_for_mode (intQI_type_node, V16QImode);
15188 tree V2SI_type_node = build_vector_type_for_mode (intSI_type_node, V2SImode);
15189 tree V2SF_type_node = build_vector_type_for_mode (float_type_node, V2SFmode);
15190 tree V2DI_type_node
15191 = build_vector_type_for_mode (long_long_integer_type_node, V2DImode);
15192 tree V2DF_type_node = build_vector_type_for_mode (double_type_node, V2DFmode);
15193 tree V4SF_type_node = build_vector_type_for_mode (float_type_node, V4SFmode);
15194 tree V4SI_type_node = build_vector_type_for_mode (intSI_type_node, V4SImode);
15195 tree V4HI_type_node = build_vector_type_for_mode (intHI_type_node, V4HImode);
15196 tree V8QI_type_node = build_vector_type_for_mode (intQI_type_node, V8QImode);
15197 tree V8HI_type_node = build_vector_type_for_mode (intHI_type_node, V8HImode);
15199 tree pchar_type_node = build_pointer_type (char_type_node);
15200 tree pcchar_type_node = build_pointer_type (
15201 build_type_variant (char_type_node, 1, 0));
15202 tree pfloat_type_node = build_pointer_type (float_type_node);
15203 tree pcfloat_type_node = build_pointer_type (
15204 build_type_variant (float_type_node, 1, 0));
15205 tree pv2si_type_node = build_pointer_type (V2SI_type_node);
15206 tree pv2di_type_node = build_pointer_type (V2DI_type_node);
15207 tree pdi_type_node = build_pointer_type (long_long_unsigned_type_node);
15210 tree int_ftype_v4sf_v4sf
15211 = build_function_type_list (integer_type_node,
15212 V4SF_type_node, V4SF_type_node, NULL_TREE);
15213 tree v4si_ftype_v4sf_v4sf
15214 = build_function_type_list (V4SI_type_node,
15215 V4SF_type_node, V4SF_type_node, NULL_TREE);
15216 /* MMX/SSE/integer conversions. */
15217 tree int_ftype_v4sf
15218 = build_function_type_list (integer_type_node,
15219 V4SF_type_node, NULL_TREE);
15220 tree int64_ftype_v4sf
15221 = build_function_type_list (long_long_integer_type_node,
15222 V4SF_type_node, NULL_TREE);
15223 tree int_ftype_v8qi
15224 = build_function_type_list (integer_type_node, V8QI_type_node, NULL_TREE);
15225 tree v4sf_ftype_v4sf_int
15226 = build_function_type_list (V4SF_type_node,
15227 V4SF_type_node, integer_type_node, NULL_TREE);
15228 tree v4sf_ftype_v4sf_int64
15229 = build_function_type_list (V4SF_type_node,
15230 V4SF_type_node, long_long_integer_type_node,
15232 tree v4sf_ftype_v4sf_v2si
15233 = build_function_type_list (V4SF_type_node,
15234 V4SF_type_node, V2SI_type_node, NULL_TREE);
15236 /* Miscellaneous. */
15237 tree v8qi_ftype_v4hi_v4hi
15238 = build_function_type_list (V8QI_type_node,
15239 V4HI_type_node, V4HI_type_node, NULL_TREE);
15240 tree v4hi_ftype_v2si_v2si
15241 = build_function_type_list (V4HI_type_node,
15242 V2SI_type_node, V2SI_type_node, NULL_TREE);
15243 tree v4sf_ftype_v4sf_v4sf_int
15244 = build_function_type_list (V4SF_type_node,
15245 V4SF_type_node, V4SF_type_node,
15246 integer_type_node, NULL_TREE);
15247 tree v2si_ftype_v4hi_v4hi
15248 = build_function_type_list (V2SI_type_node,
15249 V4HI_type_node, V4HI_type_node, NULL_TREE);
15250 tree v4hi_ftype_v4hi_int
15251 = build_function_type_list (V4HI_type_node,
15252 V4HI_type_node, integer_type_node, NULL_TREE);
15253 tree v4hi_ftype_v4hi_di
15254 = build_function_type_list (V4HI_type_node,
15255 V4HI_type_node, long_long_unsigned_type_node,
15257 tree v2si_ftype_v2si_di
15258 = build_function_type_list (V2SI_type_node,
15259 V2SI_type_node, long_long_unsigned_type_node,
15261 tree void_ftype_void
15262 = build_function_type (void_type_node, void_list_node);
15263 tree void_ftype_unsigned
15264 = build_function_type_list (void_type_node, unsigned_type_node, NULL_TREE);
15265 tree void_ftype_unsigned_unsigned
15266 = build_function_type_list (void_type_node, unsigned_type_node,
15267 unsigned_type_node, NULL_TREE);
15268 tree void_ftype_pcvoid_unsigned_unsigned
15269 = build_function_type_list (void_type_node, const_ptr_type_node,
15270 unsigned_type_node, unsigned_type_node,
15272 tree unsigned_ftype_void
15273 = build_function_type (unsigned_type_node, void_list_node);
15274 tree v2si_ftype_v4sf
15275 = build_function_type_list (V2SI_type_node, V4SF_type_node, NULL_TREE);
15276 /* Loads/stores. */
15277 tree void_ftype_v8qi_v8qi_pchar
15278 = build_function_type_list (void_type_node,
15279 V8QI_type_node, V8QI_type_node,
15280 pchar_type_node, NULL_TREE);
15281 tree v4sf_ftype_pcfloat
15282 = build_function_type_list (V4SF_type_node, pcfloat_type_node, NULL_TREE);
15283 /* @@@ the type is bogus */
15284 tree v4sf_ftype_v4sf_pv2si
15285 = build_function_type_list (V4SF_type_node,
15286 V4SF_type_node, pv2si_type_node, NULL_TREE);
15287 tree void_ftype_pv2si_v4sf
15288 = build_function_type_list (void_type_node,
15289 pv2si_type_node, V4SF_type_node, NULL_TREE);
15290 tree void_ftype_pfloat_v4sf
15291 = build_function_type_list (void_type_node,
15292 pfloat_type_node, V4SF_type_node, NULL_TREE);
15293 tree void_ftype_pdi_di
15294 = build_function_type_list (void_type_node,
15295 pdi_type_node, long_long_unsigned_type_node,
15297 tree void_ftype_pv2di_v2di
15298 = build_function_type_list (void_type_node,
15299 pv2di_type_node, V2DI_type_node, NULL_TREE);
15300 /* Normal vector unops. */
15301 tree v4sf_ftype_v4sf
15302 = build_function_type_list (V4SF_type_node, V4SF_type_node, NULL_TREE);
15303 tree v16qi_ftype_v16qi
15304 = build_function_type_list (V16QI_type_node, V16QI_type_node, NULL_TREE);
15305 tree v8hi_ftype_v8hi
15306 = build_function_type_list (V8HI_type_node, V8HI_type_node, NULL_TREE);
15307 tree v4si_ftype_v4si
15308 = build_function_type_list (V4SI_type_node, V4SI_type_node, NULL_TREE);
15309 tree v8qi_ftype_v8qi
15310 = build_function_type_list (V8QI_type_node, V8QI_type_node, NULL_TREE);
15311 tree v4hi_ftype_v4hi
15312 = build_function_type_list (V4HI_type_node, V4HI_type_node, NULL_TREE);
15314 /* Normal vector binops. */
15315 tree v4sf_ftype_v4sf_v4sf
15316 = build_function_type_list (V4SF_type_node,
15317 V4SF_type_node, V4SF_type_node, NULL_TREE);
15318 tree v8qi_ftype_v8qi_v8qi
15319 = build_function_type_list (V8QI_type_node,
15320 V8QI_type_node, V8QI_type_node, NULL_TREE);
15321 tree v4hi_ftype_v4hi_v4hi
15322 = build_function_type_list (V4HI_type_node,
15323 V4HI_type_node, V4HI_type_node, NULL_TREE);
15324 tree v2si_ftype_v2si_v2si
15325 = build_function_type_list (V2SI_type_node,
15326 V2SI_type_node, V2SI_type_node, NULL_TREE);
15327 tree di_ftype_di_di
15328 = build_function_type_list (long_long_unsigned_type_node,
15329 long_long_unsigned_type_node,
15330 long_long_unsigned_type_node, NULL_TREE);
15332 tree di_ftype_di_di_int
15333 = build_function_type_list (long_long_unsigned_type_node,
15334 long_long_unsigned_type_node,
15335 long_long_unsigned_type_node,
15336 integer_type_node, NULL_TREE);
15338 tree v2si_ftype_v2sf
15339 = build_function_type_list (V2SI_type_node, V2SF_type_node, NULL_TREE);
15340 tree v2sf_ftype_v2si
15341 = build_function_type_list (V2SF_type_node, V2SI_type_node, NULL_TREE);
15342 tree v2si_ftype_v2si
15343 = build_function_type_list (V2SI_type_node, V2SI_type_node, NULL_TREE);
15344 tree v2sf_ftype_v2sf
15345 = build_function_type_list (V2SF_type_node, V2SF_type_node, NULL_TREE);
15346 tree v2sf_ftype_v2sf_v2sf
15347 = build_function_type_list (V2SF_type_node,
15348 V2SF_type_node, V2SF_type_node, NULL_TREE);
15349 tree v2si_ftype_v2sf_v2sf
15350 = build_function_type_list (V2SI_type_node,
15351 V2SF_type_node, V2SF_type_node, NULL_TREE);
15352 tree pint_type_node = build_pointer_type (integer_type_node);
15353 tree pdouble_type_node = build_pointer_type (double_type_node);
15354 tree pcdouble_type_node = build_pointer_type (
15355 build_type_variant (double_type_node, 1, 0));
15356 tree int_ftype_v2df_v2df
15357 = build_function_type_list (integer_type_node,
15358 V2DF_type_node, V2DF_type_node, NULL_TREE);
15360 tree void_ftype_pcvoid
15361 = build_function_type_list (void_type_node, const_ptr_type_node, NULL_TREE);
15362 tree v4sf_ftype_v4si
15363 = build_function_type_list (V4SF_type_node, V4SI_type_node, NULL_TREE);
15364 tree v4si_ftype_v4sf
15365 = build_function_type_list (V4SI_type_node, V4SF_type_node, NULL_TREE);
15366 tree v2df_ftype_v4si
15367 = build_function_type_list (V2DF_type_node, V4SI_type_node, NULL_TREE);
15368 tree v4si_ftype_v2df
15369 = build_function_type_list (V4SI_type_node, V2DF_type_node, NULL_TREE);
15370 tree v2si_ftype_v2df
15371 = build_function_type_list (V2SI_type_node, V2DF_type_node, NULL_TREE);
15372 tree v4sf_ftype_v2df
15373 = build_function_type_list (V4SF_type_node, V2DF_type_node, NULL_TREE);
15374 tree v2df_ftype_v2si
15375 = build_function_type_list (V2DF_type_node, V2SI_type_node, NULL_TREE);
15376 tree v2df_ftype_v4sf
15377 = build_function_type_list (V2DF_type_node, V4SF_type_node, NULL_TREE);
15378 tree int_ftype_v2df
15379 = build_function_type_list (integer_type_node, V2DF_type_node, NULL_TREE);
15380 tree int64_ftype_v2df
15381 = build_function_type_list (long_long_integer_type_node,
15382 V2DF_type_node, NULL_TREE);
15383 tree v2df_ftype_v2df_int
15384 = build_function_type_list (V2DF_type_node,
15385 V2DF_type_node, integer_type_node, NULL_TREE);
15386 tree v2df_ftype_v2df_int64
15387 = build_function_type_list (V2DF_type_node,
15388 V2DF_type_node, long_long_integer_type_node,
15390 tree v4sf_ftype_v4sf_v2df
15391 = build_function_type_list (V4SF_type_node,
15392 V4SF_type_node, V2DF_type_node, NULL_TREE);
15393 tree v2df_ftype_v2df_v4sf
15394 = build_function_type_list (V2DF_type_node,
15395 V2DF_type_node, V4SF_type_node, NULL_TREE);
15396 tree v2df_ftype_v2df_v2df_int
15397 = build_function_type_list (V2DF_type_node,
15398 V2DF_type_node, V2DF_type_node,
15401 tree v2df_ftype_v2df_pcdouble
15402 = build_function_type_list (V2DF_type_node,
15403 V2DF_type_node, pcdouble_type_node, NULL_TREE);
15404 tree void_ftype_pdouble_v2df
15405 = build_function_type_list (void_type_node,
15406 pdouble_type_node, V2DF_type_node, NULL_TREE);
15407 tree void_ftype_pint_int
15408 = build_function_type_list (void_type_node,
15409 pint_type_node, integer_type_node, NULL_TREE);
15410 tree void_ftype_v16qi_v16qi_pchar
15411 = build_function_type_list (void_type_node,
15412 V16QI_type_node, V16QI_type_node,
15413 pchar_type_node, NULL_TREE);
15414 tree v2df_ftype_pcdouble
15415 = build_function_type_list (V2DF_type_node, pcdouble_type_node, NULL_TREE);
15416 tree v2df_ftype_v2df_v2df
15417 = build_function_type_list (V2DF_type_node,
15418 V2DF_type_node, V2DF_type_node, NULL_TREE);
15419 tree v16qi_ftype_v16qi_v16qi
15420 = build_function_type_list (V16QI_type_node,
15421 V16QI_type_node, V16QI_type_node, NULL_TREE);
15422 tree v8hi_ftype_v8hi_v8hi
15423 = build_function_type_list (V8HI_type_node,
15424 V8HI_type_node, V8HI_type_node, NULL_TREE);
15425 tree v4si_ftype_v4si_v4si
15426 = build_function_type_list (V4SI_type_node,
15427 V4SI_type_node, V4SI_type_node, NULL_TREE);
15428 tree v2di_ftype_v2di_v2di
15429 = build_function_type_list (V2DI_type_node,
15430 V2DI_type_node, V2DI_type_node, NULL_TREE);
15431 tree v2di_ftype_v2df_v2df
15432 = build_function_type_list (V2DI_type_node,
15433 V2DF_type_node, V2DF_type_node, NULL_TREE);
15434 tree v2df_ftype_v2df
15435 = build_function_type_list (V2DF_type_node, V2DF_type_node, NULL_TREE);
15436 tree v2di_ftype_v2di_int
15437 = build_function_type_list (V2DI_type_node,
15438 V2DI_type_node, integer_type_node, NULL_TREE);
15439 tree v2di_ftype_v2di_v2di_int
15440 = build_function_type_list (V2DI_type_node, V2DI_type_node,
15441 V2DI_type_node, integer_type_node, NULL_TREE);
15442 tree v4si_ftype_v4si_int
15443 = build_function_type_list (V4SI_type_node,
15444 V4SI_type_node, integer_type_node, NULL_TREE);
15445 tree v8hi_ftype_v8hi_int
15446 = build_function_type_list (V8HI_type_node,
15447 V8HI_type_node, integer_type_node, NULL_TREE);
15448 tree v4si_ftype_v8hi_v8hi
15449 = build_function_type_list (V4SI_type_node,
15450 V8HI_type_node, V8HI_type_node, NULL_TREE);
15451 tree di_ftype_v8qi_v8qi
15452 = build_function_type_list (long_long_unsigned_type_node,
15453 V8QI_type_node, V8QI_type_node, NULL_TREE);
15454 tree di_ftype_v2si_v2si
15455 = build_function_type_list (long_long_unsigned_type_node,
15456 V2SI_type_node, V2SI_type_node, NULL_TREE);
15457 tree v2di_ftype_v16qi_v16qi
15458 = build_function_type_list (V2DI_type_node,
15459 V16QI_type_node, V16QI_type_node, NULL_TREE);
15460 tree v2di_ftype_v4si_v4si
15461 = build_function_type_list (V2DI_type_node,
15462 V4SI_type_node, V4SI_type_node, NULL_TREE);
15463 tree int_ftype_v16qi
15464 = build_function_type_list (integer_type_node, V16QI_type_node, NULL_TREE);
15465 tree v16qi_ftype_pcchar
15466 = build_function_type_list (V16QI_type_node, pcchar_type_node, NULL_TREE);
15467 tree void_ftype_pchar_v16qi
15468 = build_function_type_list (void_type_node,
15469 pchar_type_node, V16QI_type_node, NULL_TREE);
15472 tree float128_type;
15475 /* The __float80 type. */
15476 if (TYPE_MODE (long_double_type_node) == XFmode)
15477 (*lang_hooks.types.register_builtin_type) (long_double_type_node,
15481 /* The __float80 type. */
15482 float80_type = make_node (REAL_TYPE);
15483 TYPE_PRECISION (float80_type) = 80;
15484 layout_type (float80_type);
15485 (*lang_hooks.types.register_builtin_type) (float80_type, "__float80");
15490 float128_type = make_node (REAL_TYPE);
15491 TYPE_PRECISION (float128_type) = 128;
15492 layout_type (float128_type);
15493 (*lang_hooks.types.register_builtin_type) (float128_type, "__float128");
15496 /* Add all builtins that are more or less simple operations on two
15498 for (i = 0, d = bdesc_2arg; i < ARRAY_SIZE (bdesc_2arg); i++, d++)
15500 /* Use one of the operands; the target can have a different mode for
15501 mask-generating compares. */
15502 enum machine_mode mode;
15507 mode = insn_data[d->icode].operand[1].mode;
15512 type = v16qi_ftype_v16qi_v16qi;
15515 type = v8hi_ftype_v8hi_v8hi;
15518 type = v4si_ftype_v4si_v4si;
15521 type = v2di_ftype_v2di_v2di;
15524 type = v2df_ftype_v2df_v2df;
15527 type = v4sf_ftype_v4sf_v4sf;
15530 type = v8qi_ftype_v8qi_v8qi;
15533 type = v4hi_ftype_v4hi_v4hi;
15536 type = v2si_ftype_v2si_v2si;
15539 type = di_ftype_di_di;
15543 gcc_unreachable ();
15546 /* Override for comparisons. */
15547 if (d->icode == CODE_FOR_sse_maskcmpv4sf3
15548 || d->icode == CODE_FOR_sse_vmmaskcmpv4sf3)
15549 type = v4si_ftype_v4sf_v4sf;
15551 if (d->icode == CODE_FOR_sse2_maskcmpv2df3
15552 || d->icode == CODE_FOR_sse2_vmmaskcmpv2df3)
15553 type = v2di_ftype_v2df_v2df;
15555 def_builtin (d->mask, d->name, type, d->code);
15558 /* Add all builtins that are more or less simple operations on 1 operand. */
15559 for (i = 0, d = bdesc_1arg; i < ARRAY_SIZE (bdesc_1arg); i++, d++)
15561 enum machine_mode mode;
15566 mode = insn_data[d->icode].operand[1].mode;
15571 type = v16qi_ftype_v16qi;
15574 type = v8hi_ftype_v8hi;
15577 type = v4si_ftype_v4si;
15580 type = v2df_ftype_v2df;
15583 type = v4sf_ftype_v4sf;
15586 type = v8qi_ftype_v8qi;
15589 type = v4hi_ftype_v4hi;
15592 type = v2si_ftype_v2si;
15599 def_builtin (d->mask, d->name, type, d->code);
15602 /* Add the remaining MMX insns with somewhat more complicated types. */
15603 def_builtin (MASK_MMX, "__builtin_ia32_emms", void_ftype_void, IX86_BUILTIN_EMMS);
15604 def_builtin (MASK_MMX, "__builtin_ia32_psllw", v4hi_ftype_v4hi_di, IX86_BUILTIN_PSLLW);
15605 def_builtin (MASK_MMX, "__builtin_ia32_pslld", v2si_ftype_v2si_di, IX86_BUILTIN_PSLLD);
15606 def_builtin (MASK_MMX, "__builtin_ia32_psllq", di_ftype_di_di, IX86_BUILTIN_PSLLQ);
15608 def_builtin (MASK_MMX, "__builtin_ia32_psrlw", v4hi_ftype_v4hi_di, IX86_BUILTIN_PSRLW);
15609 def_builtin (MASK_MMX, "__builtin_ia32_psrld", v2si_ftype_v2si_di, IX86_BUILTIN_PSRLD);
15610 def_builtin (MASK_MMX, "__builtin_ia32_psrlq", di_ftype_di_di, IX86_BUILTIN_PSRLQ);
15612 def_builtin (MASK_MMX, "__builtin_ia32_psraw", v4hi_ftype_v4hi_di, IX86_BUILTIN_PSRAW);
15613 def_builtin (MASK_MMX, "__builtin_ia32_psrad", v2si_ftype_v2si_di, IX86_BUILTIN_PSRAD);
15615 def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_pshufw", v4hi_ftype_v4hi_int, IX86_BUILTIN_PSHUFW);
15616 def_builtin (MASK_MMX, "__builtin_ia32_pmaddwd", v2si_ftype_v4hi_v4hi, IX86_BUILTIN_PMADDWD);
15618 /* comi/ucomi insns. */
15619 for (i = 0, d = bdesc_comi; i < ARRAY_SIZE (bdesc_comi); i++, d++)
15620 if (d->mask == MASK_SSE2)
15621 def_builtin (d->mask, d->name, int_ftype_v2df_v2df, d->code);
15623 def_builtin (d->mask, d->name, int_ftype_v4sf_v4sf, d->code);
15625 def_builtin (MASK_MMX, "__builtin_ia32_packsswb", v8qi_ftype_v4hi_v4hi, IX86_BUILTIN_PACKSSWB);
15626 def_builtin (MASK_MMX, "__builtin_ia32_packssdw", v4hi_ftype_v2si_v2si, IX86_BUILTIN_PACKSSDW);
15627 def_builtin (MASK_MMX, "__builtin_ia32_packuswb", v8qi_ftype_v4hi_v4hi, IX86_BUILTIN_PACKUSWB);
15629 def_builtin (MASK_SSE, "__builtin_ia32_ldmxcsr", void_ftype_unsigned, IX86_BUILTIN_LDMXCSR);
15630 def_builtin (MASK_SSE, "__builtin_ia32_stmxcsr", unsigned_ftype_void, IX86_BUILTIN_STMXCSR);
15631 def_builtin (MASK_SSE, "__builtin_ia32_cvtpi2ps", v4sf_ftype_v4sf_v2si, IX86_BUILTIN_CVTPI2PS);
15632 def_builtin (MASK_SSE, "__builtin_ia32_cvtps2pi", v2si_ftype_v4sf, IX86_BUILTIN_CVTPS2PI);
15633 def_builtin (MASK_SSE, "__builtin_ia32_cvtsi2ss", v4sf_ftype_v4sf_int, IX86_BUILTIN_CVTSI2SS);
15634 def_builtin (MASK_SSE | MASK_64BIT, "__builtin_ia32_cvtsi642ss", v4sf_ftype_v4sf_int64, IX86_BUILTIN_CVTSI642SS);
15635 def_builtin (MASK_SSE, "__builtin_ia32_cvtss2si", int_ftype_v4sf, IX86_BUILTIN_CVTSS2SI);
15636 def_builtin (MASK_SSE | MASK_64BIT, "__builtin_ia32_cvtss2si64", int64_ftype_v4sf, IX86_BUILTIN_CVTSS2SI64);
15637 def_builtin (MASK_SSE, "__builtin_ia32_cvttps2pi", v2si_ftype_v4sf, IX86_BUILTIN_CVTTPS2PI);
15638 def_builtin (MASK_SSE, "__builtin_ia32_cvttss2si", int_ftype_v4sf, IX86_BUILTIN_CVTTSS2SI);
15639 def_builtin (MASK_SSE | MASK_64BIT, "__builtin_ia32_cvttss2si64", int64_ftype_v4sf, IX86_BUILTIN_CVTTSS2SI64);
15641 def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_maskmovq", void_ftype_v8qi_v8qi_pchar, IX86_BUILTIN_MASKMOVQ);
15643 def_builtin (MASK_SSE, "__builtin_ia32_loadups", v4sf_ftype_pcfloat, IX86_BUILTIN_LOADUPS);
15644 def_builtin (MASK_SSE, "__builtin_ia32_storeups", void_ftype_pfloat_v4sf, IX86_BUILTIN_STOREUPS);
15646 def_builtin (MASK_SSE, "__builtin_ia32_loadhps", v4sf_ftype_v4sf_pv2si, IX86_BUILTIN_LOADHPS);
15647 def_builtin (MASK_SSE, "__builtin_ia32_loadlps", v4sf_ftype_v4sf_pv2si, IX86_BUILTIN_LOADLPS);
15648 def_builtin (MASK_SSE, "__builtin_ia32_storehps", void_ftype_pv2si_v4sf, IX86_BUILTIN_STOREHPS);
15649 def_builtin (MASK_SSE, "__builtin_ia32_storelps", void_ftype_pv2si_v4sf, IX86_BUILTIN_STORELPS);
15651 def_builtin (MASK_SSE, "__builtin_ia32_movmskps", int_ftype_v4sf, IX86_BUILTIN_MOVMSKPS);
15652 def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_pmovmskb", int_ftype_v8qi, IX86_BUILTIN_PMOVMSKB);
15653 def_builtin (MASK_SSE, "__builtin_ia32_movntps", void_ftype_pfloat_v4sf, IX86_BUILTIN_MOVNTPS);
15654 def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_movntq", void_ftype_pdi_di, IX86_BUILTIN_MOVNTQ);
15656 def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_sfence", void_ftype_void, IX86_BUILTIN_SFENCE);
15658 def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_psadbw", di_ftype_v8qi_v8qi, IX86_BUILTIN_PSADBW);
15660 def_builtin (MASK_SSE, "__builtin_ia32_rcpps", v4sf_ftype_v4sf, IX86_BUILTIN_RCPPS);
15661 def_builtin (MASK_SSE, "__builtin_ia32_rcpss", v4sf_ftype_v4sf, IX86_BUILTIN_RCPSS);
15662 def_builtin (MASK_SSE, "__builtin_ia32_rsqrtps", v4sf_ftype_v4sf, IX86_BUILTIN_RSQRTPS);
15663 def_builtin (MASK_SSE, "__builtin_ia32_rsqrtss", v4sf_ftype_v4sf, IX86_BUILTIN_RSQRTSS);
15664 def_builtin (MASK_SSE, "__builtin_ia32_sqrtps", v4sf_ftype_v4sf, IX86_BUILTIN_SQRTPS);
15665 def_builtin (MASK_SSE, "__builtin_ia32_sqrtss", v4sf_ftype_v4sf, IX86_BUILTIN_SQRTSS);
15667 def_builtin (MASK_SSE, "__builtin_ia32_shufps", v4sf_ftype_v4sf_v4sf_int, IX86_BUILTIN_SHUFPS);
15669 /* Original 3DNow! */
15670 def_builtin (MASK_3DNOW, "__builtin_ia32_femms", void_ftype_void, IX86_BUILTIN_FEMMS);
15671 def_builtin (MASK_3DNOW, "__builtin_ia32_pavgusb", v8qi_ftype_v8qi_v8qi, IX86_BUILTIN_PAVGUSB);
15672 def_builtin (MASK_3DNOW, "__builtin_ia32_pf2id", v2si_ftype_v2sf, IX86_BUILTIN_PF2ID);
15673 def_builtin (MASK_3DNOW, "__builtin_ia32_pfacc", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFACC);
15674 def_builtin (MASK_3DNOW, "__builtin_ia32_pfadd", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFADD);
15675 def_builtin (MASK_3DNOW, "__builtin_ia32_pfcmpeq", v2si_ftype_v2sf_v2sf, IX86_BUILTIN_PFCMPEQ);
15676 def_builtin (MASK_3DNOW, "__builtin_ia32_pfcmpge", v2si_ftype_v2sf_v2sf, IX86_BUILTIN_PFCMPGE);
15677 def_builtin (MASK_3DNOW, "__builtin_ia32_pfcmpgt", v2si_ftype_v2sf_v2sf, IX86_BUILTIN_PFCMPGT);
15678 def_builtin (MASK_3DNOW, "__builtin_ia32_pfmax", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFMAX);
15679 def_builtin (MASK_3DNOW, "__builtin_ia32_pfmin", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFMIN);
15680 def_builtin (MASK_3DNOW, "__builtin_ia32_pfmul", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFMUL);
15681 def_builtin (MASK_3DNOW, "__builtin_ia32_pfrcp", v2sf_ftype_v2sf, IX86_BUILTIN_PFRCP);
15682 def_builtin (MASK_3DNOW, "__builtin_ia32_pfrcpit1", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFRCPIT1);
15683 def_builtin (MASK_3DNOW, "__builtin_ia32_pfrcpit2", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFRCPIT2);
15684 def_builtin (MASK_3DNOW, "__builtin_ia32_pfrsqrt", v2sf_ftype_v2sf, IX86_BUILTIN_PFRSQRT);
15685 def_builtin (MASK_3DNOW, "__builtin_ia32_pfrsqit1", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFRSQIT1);
15686 def_builtin (MASK_3DNOW, "__builtin_ia32_pfsub", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFSUB);
15687 def_builtin (MASK_3DNOW, "__builtin_ia32_pfsubr", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFSUBR);
15688 def_builtin (MASK_3DNOW, "__builtin_ia32_pi2fd", v2sf_ftype_v2si, IX86_BUILTIN_PI2FD);
15689 def_builtin (MASK_3DNOW, "__builtin_ia32_pmulhrw", v4hi_ftype_v4hi_v4hi, IX86_BUILTIN_PMULHRW);
15691 /* 3DNow! extension as used in the Athlon CPU. */
15692 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pf2iw", v2si_ftype_v2sf, IX86_BUILTIN_PF2IW);
15693 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pfnacc", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFNACC);
15694 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pfpnacc", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFPNACC);
15695 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pi2fw", v2sf_ftype_v2si, IX86_BUILTIN_PI2FW);
15696 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pswapdsf", v2sf_ftype_v2sf, IX86_BUILTIN_PSWAPDSF);
15697 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pswapdsi", v2si_ftype_v2si, IX86_BUILTIN_PSWAPDSI);
15700 def_builtin (MASK_SSE2, "__builtin_ia32_maskmovdqu", void_ftype_v16qi_v16qi_pchar, IX86_BUILTIN_MASKMOVDQU);
15702 def_builtin (MASK_SSE2, "__builtin_ia32_loadupd", v2df_ftype_pcdouble, IX86_BUILTIN_LOADUPD);
15703 def_builtin (MASK_SSE2, "__builtin_ia32_storeupd", void_ftype_pdouble_v2df, IX86_BUILTIN_STOREUPD);
15705 def_builtin (MASK_SSE2, "__builtin_ia32_loadhpd", v2df_ftype_v2df_pcdouble, IX86_BUILTIN_LOADHPD);
15706 def_builtin (MASK_SSE2, "__builtin_ia32_loadlpd", v2df_ftype_v2df_pcdouble, IX86_BUILTIN_LOADLPD);
15708 def_builtin (MASK_SSE2, "__builtin_ia32_movmskpd", int_ftype_v2df, IX86_BUILTIN_MOVMSKPD);
15709 def_builtin (MASK_SSE2, "__builtin_ia32_pmovmskb128", int_ftype_v16qi, IX86_BUILTIN_PMOVMSKB128);
15710 def_builtin (MASK_SSE2, "__builtin_ia32_movnti", void_ftype_pint_int, IX86_BUILTIN_MOVNTI);
15711 def_builtin (MASK_SSE2, "__builtin_ia32_movntpd", void_ftype_pdouble_v2df, IX86_BUILTIN_MOVNTPD);
15712 def_builtin (MASK_SSE2, "__builtin_ia32_movntdq", void_ftype_pv2di_v2di, IX86_BUILTIN_MOVNTDQ);
15714 def_builtin (MASK_SSE2, "__builtin_ia32_pshufd", v4si_ftype_v4si_int, IX86_BUILTIN_PSHUFD);
15715 def_builtin (MASK_SSE2, "__builtin_ia32_pshuflw", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSHUFLW);
15716 def_builtin (MASK_SSE2, "__builtin_ia32_pshufhw", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSHUFHW);
15717 def_builtin (MASK_SSE2, "__builtin_ia32_psadbw128", v2di_ftype_v16qi_v16qi, IX86_BUILTIN_PSADBW128);
15719 def_builtin (MASK_SSE2, "__builtin_ia32_sqrtpd", v2df_ftype_v2df, IX86_BUILTIN_SQRTPD);
15720 def_builtin (MASK_SSE2, "__builtin_ia32_sqrtsd", v2df_ftype_v2df, IX86_BUILTIN_SQRTSD);
15722 def_builtin (MASK_SSE2, "__builtin_ia32_shufpd", v2df_ftype_v2df_v2df_int, IX86_BUILTIN_SHUFPD);
15724 def_builtin (MASK_SSE2, "__builtin_ia32_cvtdq2pd", v2df_ftype_v4si, IX86_BUILTIN_CVTDQ2PD);
15725 def_builtin (MASK_SSE2, "__builtin_ia32_cvtdq2ps", v4sf_ftype_v4si, IX86_BUILTIN_CVTDQ2PS);
15727 def_builtin (MASK_SSE2, "__builtin_ia32_cvtpd2dq", v4si_ftype_v2df, IX86_BUILTIN_CVTPD2DQ);
15728 def_builtin (MASK_SSE2, "__builtin_ia32_cvtpd2pi", v2si_ftype_v2df, IX86_BUILTIN_CVTPD2PI);
15729 def_builtin (MASK_SSE2, "__builtin_ia32_cvtpd2ps", v4sf_ftype_v2df, IX86_BUILTIN_CVTPD2PS);
15730 def_builtin (MASK_SSE2, "__builtin_ia32_cvttpd2dq", v4si_ftype_v2df, IX86_BUILTIN_CVTTPD2DQ);
15731 def_builtin (MASK_SSE2, "__builtin_ia32_cvttpd2pi", v2si_ftype_v2df, IX86_BUILTIN_CVTTPD2PI);
15733 def_builtin (MASK_SSE2, "__builtin_ia32_cvtpi2pd", v2df_ftype_v2si, IX86_BUILTIN_CVTPI2PD);
15735 def_builtin (MASK_SSE2, "__builtin_ia32_cvtsd2si", int_ftype_v2df, IX86_BUILTIN_CVTSD2SI);
15736 def_builtin (MASK_SSE2, "__builtin_ia32_cvttsd2si", int_ftype_v2df, IX86_BUILTIN_CVTTSD2SI);
15737 def_builtin (MASK_SSE2 | MASK_64BIT, "__builtin_ia32_cvtsd2si64", int64_ftype_v2df, IX86_BUILTIN_CVTSD2SI64);
15738 def_builtin (MASK_SSE2 | MASK_64BIT, "__builtin_ia32_cvttsd2si64", int64_ftype_v2df, IX86_BUILTIN_CVTTSD2SI64);
15740 def_builtin (MASK_SSE2, "__builtin_ia32_cvtps2dq", v4si_ftype_v4sf, IX86_BUILTIN_CVTPS2DQ);
15741 def_builtin (MASK_SSE2, "__builtin_ia32_cvtps2pd", v2df_ftype_v4sf, IX86_BUILTIN_CVTPS2PD);
15742 def_builtin (MASK_SSE2, "__builtin_ia32_cvttps2dq", v4si_ftype_v4sf, IX86_BUILTIN_CVTTPS2DQ);
15744 def_builtin (MASK_SSE2, "__builtin_ia32_cvtsi2sd", v2df_ftype_v2df_int, IX86_BUILTIN_CVTSI2SD);
15745 def_builtin (MASK_SSE2 | MASK_64BIT, "__builtin_ia32_cvtsi642sd", v2df_ftype_v2df_int64, IX86_BUILTIN_CVTSI642SD);
15746 def_builtin (MASK_SSE2, "__builtin_ia32_cvtsd2ss", v4sf_ftype_v4sf_v2df, IX86_BUILTIN_CVTSD2SS);
15747 def_builtin (MASK_SSE2, "__builtin_ia32_cvtss2sd", v2df_ftype_v2df_v4sf, IX86_BUILTIN_CVTSS2SD);
15749 def_builtin (MASK_SSE2, "__builtin_ia32_clflush", void_ftype_pcvoid, IX86_BUILTIN_CLFLUSH);
15750 def_builtin (MASK_SSE2, "__builtin_ia32_lfence", void_ftype_void, IX86_BUILTIN_LFENCE);
15751 def_builtin (MASK_SSE2, "__builtin_ia32_mfence", void_ftype_void, IX86_BUILTIN_MFENCE);
15753 def_builtin (MASK_SSE2, "__builtin_ia32_loaddqu", v16qi_ftype_pcchar, IX86_BUILTIN_LOADDQU);
15754 def_builtin (MASK_SSE2, "__builtin_ia32_storedqu", void_ftype_pchar_v16qi, IX86_BUILTIN_STOREDQU);
15756 def_builtin (MASK_SSE2, "__builtin_ia32_pmuludq", di_ftype_v2si_v2si, IX86_BUILTIN_PMULUDQ);
15757 def_builtin (MASK_SSE2, "__builtin_ia32_pmuludq128", v2di_ftype_v4si_v4si, IX86_BUILTIN_PMULUDQ128);
15759 def_builtin (MASK_SSE2, "__builtin_ia32_psllw128", v8hi_ftype_v8hi_v8hi, IX86_BUILTIN_PSLLW128);
15760 def_builtin (MASK_SSE2, "__builtin_ia32_pslld128", v4si_ftype_v4si_v4si, IX86_BUILTIN_PSLLD128);
15761 def_builtin (MASK_SSE2, "__builtin_ia32_psllq128", v2di_ftype_v2di_v2di, IX86_BUILTIN_PSLLQ128);
15763 def_builtin (MASK_SSE2, "__builtin_ia32_psrlw128", v8hi_ftype_v8hi_v8hi, IX86_BUILTIN_PSRLW128);
15764 def_builtin (MASK_SSE2, "__builtin_ia32_psrld128", v4si_ftype_v4si_v4si, IX86_BUILTIN_PSRLD128);
15765 def_builtin (MASK_SSE2, "__builtin_ia32_psrlq128", v2di_ftype_v2di_v2di, IX86_BUILTIN_PSRLQ128);
15767 def_builtin (MASK_SSE2, "__builtin_ia32_psraw128", v8hi_ftype_v8hi_v8hi, IX86_BUILTIN_PSRAW128);
15768 def_builtin (MASK_SSE2, "__builtin_ia32_psrad128", v4si_ftype_v4si_v4si, IX86_BUILTIN_PSRAD128);
15770 def_builtin (MASK_SSE2, "__builtin_ia32_pslldqi128", v2di_ftype_v2di_int, IX86_BUILTIN_PSLLDQI128);
15771 def_builtin (MASK_SSE2, "__builtin_ia32_psllwi128", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSLLWI128);
15772 def_builtin (MASK_SSE2, "__builtin_ia32_pslldi128", v4si_ftype_v4si_int, IX86_BUILTIN_PSLLDI128);
15773 def_builtin (MASK_SSE2, "__builtin_ia32_psllqi128", v2di_ftype_v2di_int, IX86_BUILTIN_PSLLQI128);
15775 def_builtin (MASK_SSE2, "__builtin_ia32_psrldqi128", v2di_ftype_v2di_int, IX86_BUILTIN_PSRLDQI128);
15776 def_builtin (MASK_SSE2, "__builtin_ia32_psrlwi128", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSRLWI128);
15777 def_builtin (MASK_SSE2, "__builtin_ia32_psrldi128", v4si_ftype_v4si_int, IX86_BUILTIN_PSRLDI128);
15778 def_builtin (MASK_SSE2, "__builtin_ia32_psrlqi128", v2di_ftype_v2di_int, IX86_BUILTIN_PSRLQI128);
15780 def_builtin (MASK_SSE2, "__builtin_ia32_psrawi128", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSRAWI128);
15781 def_builtin (MASK_SSE2, "__builtin_ia32_psradi128", v4si_ftype_v4si_int, IX86_BUILTIN_PSRADI128);
15783 def_builtin (MASK_SSE2, "__builtin_ia32_pmaddwd128", v4si_ftype_v8hi_v8hi, IX86_BUILTIN_PMADDWD128);
15785 /* Prescott New Instructions. */
15786 def_builtin (MASK_SSE3, "__builtin_ia32_monitor",
15787 void_ftype_pcvoid_unsigned_unsigned,
15788 IX86_BUILTIN_MONITOR);
15789 def_builtin (MASK_SSE3, "__builtin_ia32_mwait",
15790 void_ftype_unsigned_unsigned,
15791 IX86_BUILTIN_MWAIT);
15792 def_builtin (MASK_SSE3, "__builtin_ia32_movshdup",
15794 IX86_BUILTIN_MOVSHDUP);
15795 def_builtin (MASK_SSE3, "__builtin_ia32_movsldup",
15797 IX86_BUILTIN_MOVSLDUP);
15798 def_builtin (MASK_SSE3, "__builtin_ia32_lddqu",
15799 v16qi_ftype_pcchar, IX86_BUILTIN_LDDQU);
15802 def_builtin (MASK_SSSE3, "__builtin_ia32_palignr128",
15803 v2di_ftype_v2di_v2di_int, IX86_BUILTIN_PALIGNR128);
15804 def_builtin (MASK_SSSE3, "__builtin_ia32_palignr", di_ftype_di_di_int,
15805 IX86_BUILTIN_PALIGNR);
15807 /* Access to the vec_init patterns. */
15808 ftype = build_function_type_list (V2SI_type_node, integer_type_node,
15809 integer_type_node, NULL_TREE);
15810 def_builtin (MASK_MMX, "__builtin_ia32_vec_init_v2si",
15811 ftype, IX86_BUILTIN_VEC_INIT_V2SI);
15813 ftype = build_function_type_list (V4HI_type_node, short_integer_type_node,
15814 short_integer_type_node,
15815 short_integer_type_node,
15816 short_integer_type_node, NULL_TREE);
15817 def_builtin (MASK_MMX, "__builtin_ia32_vec_init_v4hi",
15818 ftype, IX86_BUILTIN_VEC_INIT_V4HI);
15820 ftype = build_function_type_list (V8QI_type_node, char_type_node,
15821 char_type_node, char_type_node,
15822 char_type_node, char_type_node,
15823 char_type_node, char_type_node,
15824 char_type_node, NULL_TREE);
15825 def_builtin (MASK_MMX, "__builtin_ia32_vec_init_v8qi",
15826 ftype, IX86_BUILTIN_VEC_INIT_V8QI);
15828 /* Access to the vec_extract patterns. */
15829 ftype = build_function_type_list (double_type_node, V2DF_type_node,
15830 integer_type_node, NULL_TREE);
15831 def_builtin (MASK_SSE2, "__builtin_ia32_vec_ext_v2df",
15832 ftype, IX86_BUILTIN_VEC_EXT_V2DF);
15834 ftype = build_function_type_list (long_long_integer_type_node,
15835 V2DI_type_node, integer_type_node,
15837 def_builtin (MASK_SSE2, "__builtin_ia32_vec_ext_v2di",
15838 ftype, IX86_BUILTIN_VEC_EXT_V2DI);
15840 ftype = build_function_type_list (float_type_node, V4SF_type_node,
15841 integer_type_node, NULL_TREE);
15842 def_builtin (MASK_SSE, "__builtin_ia32_vec_ext_v4sf",
15843 ftype, IX86_BUILTIN_VEC_EXT_V4SF);
15845 ftype = build_function_type_list (intSI_type_node, V4SI_type_node,
15846 integer_type_node, NULL_TREE);
15847 def_builtin (MASK_SSE2, "__builtin_ia32_vec_ext_v4si",
15848 ftype, IX86_BUILTIN_VEC_EXT_V4SI);
15850 ftype = build_function_type_list (intHI_type_node, V8HI_type_node,
15851 integer_type_node, NULL_TREE);
15852 def_builtin (MASK_SSE2, "__builtin_ia32_vec_ext_v8hi",
15853 ftype, IX86_BUILTIN_VEC_EXT_V8HI);
15855 ftype = build_function_type_list (intHI_type_node, V4HI_type_node,
15856 integer_type_node, NULL_TREE);
15857 def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_vec_ext_v4hi",
15858 ftype, IX86_BUILTIN_VEC_EXT_V4HI);
15860 ftype = build_function_type_list (intSI_type_node, V2SI_type_node,
15861 integer_type_node, NULL_TREE);
15862 def_builtin (MASK_MMX, "__builtin_ia32_vec_ext_v2si",
15863 ftype, IX86_BUILTIN_VEC_EXT_V2SI);
15865 ftype = build_function_type_list (intQI_type_node, V16QI_type_node,
15866 integer_type_node, NULL_TREE);
15867 def_builtin (MASK_SSE2, "__builtin_ia32_vec_ext_v16qi", ftype, IX86_BUILTIN_VEC_EXT_V16QI);
15869 /* Access to the vec_set patterns. */
15870 ftype = build_function_type_list (V8HI_type_node, V8HI_type_node,
15872 integer_type_node, NULL_TREE);
15873 def_builtin (MASK_SSE2, "__builtin_ia32_vec_set_v8hi",
15874 ftype, IX86_BUILTIN_VEC_SET_V8HI);
15876 ftype = build_function_type_list (V4HI_type_node, V4HI_type_node,
15878 integer_type_node, NULL_TREE);
15879 def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_vec_set_v4hi",
15880 ftype, IX86_BUILTIN_VEC_SET_V4HI);
15883 /* Errors in the source file can cause expand_expr to return const0_rtx
15884 where we expect a vector. To avoid crashing, use one of the vector
15885 clear instructions. */
15887 safe_vector_operand (rtx x, enum machine_mode mode)
15889 if (x == const0_rtx)
15890 x = CONST0_RTX (mode);
15894 /* Subroutine of ix86_expand_builtin to take care of binop insns. */
15897 ix86_expand_binop_builtin (enum insn_code icode, tree arglist, rtx target)
15900 tree arg0 = TREE_VALUE (arglist);
15901 tree arg1 = TREE_VALUE (TREE_CHAIN (arglist));
15902 rtx op0 = expand_normal (arg0);
15903 rtx op1 = expand_normal (arg1);
15904 enum machine_mode tmode = insn_data[icode].operand[0].mode;
15905 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
15906 enum machine_mode mode1 = insn_data[icode].operand[2].mode;
15908 if (VECTOR_MODE_P (mode0))
15909 op0 = safe_vector_operand (op0, mode0);
15910 if (VECTOR_MODE_P (mode1))
15911 op1 = safe_vector_operand (op1, mode1);
15913 if (optimize || !target
15914 || GET_MODE (target) != tmode
15915 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
15916 target = gen_reg_rtx (tmode);
15918 if (GET_MODE (op1) == SImode && mode1 == TImode)
15920 rtx x = gen_reg_rtx (V4SImode);
15921 emit_insn (gen_sse2_loadd (x, op1));
15922 op1 = gen_lowpart (TImode, x);
15925 /* The insn must want input operands in the same modes as the
15927 gcc_assert ((GET_MODE (op0) == mode0 || GET_MODE (op0) == VOIDmode)
15928 && (GET_MODE (op1) == mode1 || GET_MODE (op1) == VOIDmode));
15930 if (!(*insn_data[icode].operand[1].predicate) (op0, mode0))
15931 op0 = copy_to_mode_reg (mode0, op0);
15932 if (!(*insn_data[icode].operand[2].predicate) (op1, mode1))
15933 op1 = copy_to_mode_reg (mode1, op1);
15935 /* ??? Using ix86_fixup_binary_operands is problematic when
15936 we've got mismatched modes. Fake it. */
15942 if (tmode == mode0 && tmode == mode1)
15944 target = ix86_fixup_binary_operands (UNKNOWN, tmode, xops);
15948 else if (optimize || !ix86_binary_operator_ok (UNKNOWN, tmode, xops))
15950 op0 = force_reg (mode0, op0);
15951 op1 = force_reg (mode1, op1);
15952 target = gen_reg_rtx (tmode);
15955 pat = GEN_FCN (icode) (target, op0, op1);
15962 /* Subroutine of ix86_expand_builtin to take care of stores. */
15965 ix86_expand_store_builtin (enum insn_code icode, tree arglist)
15968 tree arg0 = TREE_VALUE (arglist);
15969 tree arg1 = TREE_VALUE (TREE_CHAIN (arglist));
15970 rtx op0 = expand_normal (arg0);
15971 rtx op1 = expand_normal (arg1);
15972 enum machine_mode mode0 = insn_data[icode].operand[0].mode;
15973 enum machine_mode mode1 = insn_data[icode].operand[1].mode;
15975 if (VECTOR_MODE_P (mode1))
15976 op1 = safe_vector_operand (op1, mode1);
15978 op0 = gen_rtx_MEM (mode0, copy_to_mode_reg (Pmode, op0));
15979 op1 = copy_to_mode_reg (mode1, op1);
15981 pat = GEN_FCN (icode) (op0, op1);
15987 /* Subroutine of ix86_expand_builtin to take care of unop insns. */
15990 ix86_expand_unop_builtin (enum insn_code icode, tree arglist,
15991 rtx target, int do_load)
15994 tree arg0 = TREE_VALUE (arglist);
15995 rtx op0 = expand_normal (arg0);
15996 enum machine_mode tmode = insn_data[icode].operand[0].mode;
15997 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
15999 if (optimize || !target
16000 || GET_MODE (target) != tmode
16001 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
16002 target = gen_reg_rtx (tmode);
16004 op0 = gen_rtx_MEM (mode0, copy_to_mode_reg (Pmode, op0));
16007 if (VECTOR_MODE_P (mode0))
16008 op0 = safe_vector_operand (op0, mode0);
16010 if ((optimize && !register_operand (op0, mode0))
16011 || ! (*insn_data[icode].operand[1].predicate) (op0, mode0))
16012 op0 = copy_to_mode_reg (mode0, op0);
16015 pat = GEN_FCN (icode) (target, op0);
16022 /* Subroutine of ix86_expand_builtin to take care of three special unop insns:
16023 sqrtss, rsqrtss, rcpss. */
16026 ix86_expand_unop1_builtin (enum insn_code icode, tree arglist, rtx target)
16029 tree arg0 = TREE_VALUE (arglist);
16030 rtx op1, op0 = expand_normal (arg0);
16031 enum machine_mode tmode = insn_data[icode].operand[0].mode;
16032 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
16034 if (optimize || !target
16035 || GET_MODE (target) != tmode
16036 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
16037 target = gen_reg_rtx (tmode);
16039 if (VECTOR_MODE_P (mode0))
16040 op0 = safe_vector_operand (op0, mode0);
16042 if ((optimize && !register_operand (op0, mode0))
16043 || ! (*insn_data[icode].operand[1].predicate) (op0, mode0))
16044 op0 = copy_to_mode_reg (mode0, op0);
16047 if (! (*insn_data[icode].operand[2].predicate) (op1, mode0))
16048 op1 = copy_to_mode_reg (mode0, op1);
16050 pat = GEN_FCN (icode) (target, op0, op1);
16057 /* Subroutine of ix86_expand_builtin to take care of comparison insns. */
16060 ix86_expand_sse_compare (const struct builtin_description *d, tree arglist,
16064 tree arg0 = TREE_VALUE (arglist);
16065 tree arg1 = TREE_VALUE (TREE_CHAIN (arglist));
16066 rtx op0 = expand_normal (arg0);
16067 rtx op1 = expand_normal (arg1);
16069 enum machine_mode tmode = insn_data[d->icode].operand[0].mode;
16070 enum machine_mode mode0 = insn_data[d->icode].operand[1].mode;
16071 enum machine_mode mode1 = insn_data[d->icode].operand[2].mode;
16072 enum rtx_code comparison = d->comparison;
16074 if (VECTOR_MODE_P (mode0))
16075 op0 = safe_vector_operand (op0, mode0);
16076 if (VECTOR_MODE_P (mode1))
16077 op1 = safe_vector_operand (op1, mode1);
16079 /* Swap operands if we have a comparison that isn't available in
16081 if (d->flag & BUILTIN_DESC_SWAP_OPERANDS)
16083 rtx tmp = gen_reg_rtx (mode1);
16084 emit_move_insn (tmp, op1);
16089 if (optimize || !target
16090 || GET_MODE (target) != tmode
16091 || ! (*insn_data[d->icode].operand[0].predicate) (target, tmode))
16092 target = gen_reg_rtx (tmode);
16094 if ((optimize && !register_operand (op0, mode0))
16095 || ! (*insn_data[d->icode].operand[1].predicate) (op0, mode0))
16096 op0 = copy_to_mode_reg (mode0, op0);
16097 if ((optimize && !register_operand (op1, mode1))
16098 || ! (*insn_data[d->icode].operand[2].predicate) (op1, mode1))
16099 op1 = copy_to_mode_reg (mode1, op1);
16101 op2 = gen_rtx_fmt_ee (comparison, mode0, op0, op1);
16102 pat = GEN_FCN (d->icode) (target, op0, op1, op2);
16109 /* Subroutine of ix86_expand_builtin to take care of comi insns. */
16112 ix86_expand_sse_comi (const struct builtin_description *d, tree arglist,
16116 tree arg0 = TREE_VALUE (arglist);
16117 tree arg1 = TREE_VALUE (TREE_CHAIN (arglist));
16118 rtx op0 = expand_normal (arg0);
16119 rtx op1 = expand_normal (arg1);
16121 enum machine_mode mode0 = insn_data[d->icode].operand[0].mode;
16122 enum machine_mode mode1 = insn_data[d->icode].operand[1].mode;
16123 enum rtx_code comparison = d->comparison;
16125 if (VECTOR_MODE_P (mode0))
16126 op0 = safe_vector_operand (op0, mode0);
16127 if (VECTOR_MODE_P (mode1))
16128 op1 = safe_vector_operand (op1, mode1);
16130 /* Swap operands if we have a comparison that isn't available in
16132 if (d->flag & BUILTIN_DESC_SWAP_OPERANDS)
16139 target = gen_reg_rtx (SImode);
16140 emit_move_insn (target, const0_rtx);
16141 target = gen_rtx_SUBREG (QImode, target, 0);
16143 if ((optimize && !register_operand (op0, mode0))
16144 || !(*insn_data[d->icode].operand[0].predicate) (op0, mode0))
16145 op0 = copy_to_mode_reg (mode0, op0);
16146 if ((optimize && !register_operand (op1, mode1))
16147 || !(*insn_data[d->icode].operand[1].predicate) (op1, mode1))
16148 op1 = copy_to_mode_reg (mode1, op1);
16150 op2 = gen_rtx_fmt_ee (comparison, mode0, op0, op1);
16151 pat = GEN_FCN (d->icode) (op0, op1);
16155 emit_insn (gen_rtx_SET (VOIDmode,
16156 gen_rtx_STRICT_LOW_PART (VOIDmode, target),
16157 gen_rtx_fmt_ee (comparison, QImode,
16161 return SUBREG_REG (target);
16164 /* Return the integer constant in ARG. Constrain it to be in the range
16165 of the subparts of VEC_TYPE; issue an error if not. */
16168 get_element_number (tree vec_type, tree arg)
16170 unsigned HOST_WIDE_INT elt, max = TYPE_VECTOR_SUBPARTS (vec_type) - 1;
16172 if (!host_integerp (arg, 1)
16173 || (elt = tree_low_cst (arg, 1), elt > max))
16175 error ("selector must be an integer constant in the range 0..%wi", max);
16182 /* A subroutine of ix86_expand_builtin. These builtins are a wrapper around
16183 ix86_expand_vector_init. We DO have language-level syntax for this, in
16184 the form of (type){ init-list }. Except that since we can't place emms
16185 instructions from inside the compiler, we can't allow the use of MMX
16186 registers unless the user explicitly asks for it. So we do *not* define
16187 vec_set/vec_extract/vec_init patterns for MMX modes in mmx.md. Instead
16188 we have builtins invoked by mmintrin.h that gives us license to emit
16189 these sorts of instructions. */
16192 ix86_expand_vec_init_builtin (tree type, tree arglist, rtx target)
16194 enum machine_mode tmode = TYPE_MODE (type);
16195 enum machine_mode inner_mode = GET_MODE_INNER (tmode);
16196 int i, n_elt = GET_MODE_NUNITS (tmode);
16197 rtvec v = rtvec_alloc (n_elt);
16199 gcc_assert (VECTOR_MODE_P (tmode));
16201 for (i = 0; i < n_elt; ++i, arglist = TREE_CHAIN (arglist))
16203 rtx x = expand_normal (TREE_VALUE (arglist));
16204 RTVEC_ELT (v, i) = gen_lowpart (inner_mode, x);
16207 gcc_assert (arglist == NULL);
16209 if (!target || !register_operand (target, tmode))
16210 target = gen_reg_rtx (tmode);
16212 ix86_expand_vector_init (true, target, gen_rtx_PARALLEL (tmode, v));
16216 /* A subroutine of ix86_expand_builtin. These builtins are a wrapper around
16217 ix86_expand_vector_extract. They would be redundant (for non-MMX) if we
16218 had a language-level syntax for referencing vector elements. */
16221 ix86_expand_vec_ext_builtin (tree arglist, rtx target)
16223 enum machine_mode tmode, mode0;
16228 arg0 = TREE_VALUE (arglist);
16229 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
16231 op0 = expand_normal (arg0);
16232 elt = get_element_number (TREE_TYPE (arg0), arg1);
16234 tmode = TYPE_MODE (TREE_TYPE (TREE_TYPE (arg0)));
16235 mode0 = TYPE_MODE (TREE_TYPE (arg0));
16236 gcc_assert (VECTOR_MODE_P (mode0));
16238 op0 = force_reg (mode0, op0);
16240 if (optimize || !target || !register_operand (target, tmode))
16241 target = gen_reg_rtx (tmode);
16243 ix86_expand_vector_extract (true, target, op0, elt);
16248 /* A subroutine of ix86_expand_builtin. These builtins are a wrapper around
16249 ix86_expand_vector_set. They would be redundant (for non-MMX) if we had
16250 a language-level syntax for referencing vector elements. */
16253 ix86_expand_vec_set_builtin (tree arglist)
16255 enum machine_mode tmode, mode1;
16256 tree arg0, arg1, arg2;
16258 rtx op0, op1, target;
16260 arg0 = TREE_VALUE (arglist);
16261 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
16262 arg2 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist)));
16264 tmode = TYPE_MODE (TREE_TYPE (arg0));
16265 mode1 = TYPE_MODE (TREE_TYPE (TREE_TYPE (arg0)));
16266 gcc_assert (VECTOR_MODE_P (tmode));
16268 op0 = expand_expr (arg0, NULL_RTX, tmode, 0);
16269 op1 = expand_expr (arg1, NULL_RTX, mode1, 0);
16270 elt = get_element_number (TREE_TYPE (arg0), arg2);
16272 if (GET_MODE (op1) != mode1 && GET_MODE (op1) != VOIDmode)
16273 op1 = convert_modes (mode1, GET_MODE (op1), op1, true);
16275 op0 = force_reg (tmode, op0);
16276 op1 = force_reg (mode1, op1);
16278 /* OP0 is the source of these builtin functions and shouldn't be
16279 modified. Create a copy, use it and return it as target. */
16280 target = gen_reg_rtx (tmode);
16281 emit_move_insn (target, op0);
16282 ix86_expand_vector_set (true, target, op1, elt);
16287 /* Expand an expression EXP that calls a built-in function,
16288 with result going to TARGET if that's convenient
16289 (and in mode MODE if that's convenient).
16290 SUBTARGET may be used as the target for computing one of EXP's operands.
16291 IGNORE is nonzero if the value is to be ignored. */
16294 ix86_expand_builtin (tree exp, rtx target, rtx subtarget ATTRIBUTE_UNUSED,
16295 enum machine_mode mode ATTRIBUTE_UNUSED,
16296 int ignore ATTRIBUTE_UNUSED)
16298 const struct builtin_description *d;
16300 enum insn_code icode;
16301 tree fndecl = TREE_OPERAND (TREE_OPERAND (exp, 0), 0);
16302 tree arglist = TREE_OPERAND (exp, 1);
16303 tree arg0, arg1, arg2;
16304 rtx op0, op1, op2, pat;
16305 enum machine_mode tmode, mode0, mode1, mode2, mode3;
16306 unsigned int fcode = DECL_FUNCTION_CODE (fndecl);
16310 case IX86_BUILTIN_EMMS:
16311 emit_insn (gen_mmx_emms ());
16314 case IX86_BUILTIN_SFENCE:
16315 emit_insn (gen_sse_sfence ());
16318 case IX86_BUILTIN_MASKMOVQ:
16319 case IX86_BUILTIN_MASKMOVDQU:
16320 icode = (fcode == IX86_BUILTIN_MASKMOVQ
16321 ? CODE_FOR_mmx_maskmovq
16322 : CODE_FOR_sse2_maskmovdqu);
16323 /* Note the arg order is different from the operand order. */
16324 arg1 = TREE_VALUE (arglist);
16325 arg2 = TREE_VALUE (TREE_CHAIN (arglist));
16326 arg0 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist)));
16327 op0 = expand_normal (arg0);
16328 op1 = expand_normal (arg1);
16329 op2 = expand_normal (arg2);
16330 mode0 = insn_data[icode].operand[0].mode;
16331 mode1 = insn_data[icode].operand[1].mode;
16332 mode2 = insn_data[icode].operand[2].mode;
16334 op0 = force_reg (Pmode, op0);
16335 op0 = gen_rtx_MEM (mode1, op0);
16337 if (! (*insn_data[icode].operand[0].predicate) (op0, mode0))
16338 op0 = copy_to_mode_reg (mode0, op0);
16339 if (! (*insn_data[icode].operand[1].predicate) (op1, mode1))
16340 op1 = copy_to_mode_reg (mode1, op1);
16341 if (! (*insn_data[icode].operand[2].predicate) (op2, mode2))
16342 op2 = copy_to_mode_reg (mode2, op2);
16343 pat = GEN_FCN (icode) (op0, op1, op2);
16349 case IX86_BUILTIN_SQRTSS:
16350 return ix86_expand_unop1_builtin (CODE_FOR_sse_vmsqrtv4sf2, arglist, target);
16351 case IX86_BUILTIN_RSQRTSS:
16352 return ix86_expand_unop1_builtin (CODE_FOR_sse_vmrsqrtv4sf2, arglist, target);
16353 case IX86_BUILTIN_RCPSS:
16354 return ix86_expand_unop1_builtin (CODE_FOR_sse_vmrcpv4sf2, arglist, target);
16356 case IX86_BUILTIN_LOADUPS:
16357 return ix86_expand_unop_builtin (CODE_FOR_sse_movups, arglist, target, 1);
16359 case IX86_BUILTIN_STOREUPS:
16360 return ix86_expand_store_builtin (CODE_FOR_sse_movups, arglist);
16362 case IX86_BUILTIN_LOADHPS:
16363 case IX86_BUILTIN_LOADLPS:
16364 case IX86_BUILTIN_LOADHPD:
16365 case IX86_BUILTIN_LOADLPD:
16366 icode = (fcode == IX86_BUILTIN_LOADHPS ? CODE_FOR_sse_loadhps
16367 : fcode == IX86_BUILTIN_LOADLPS ? CODE_FOR_sse_loadlps
16368 : fcode == IX86_BUILTIN_LOADHPD ? CODE_FOR_sse2_loadhpd
16369 : CODE_FOR_sse2_loadlpd);
16370 arg0 = TREE_VALUE (arglist);
16371 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
16372 op0 = expand_normal (arg0);
16373 op1 = expand_normal (arg1);
16374 tmode = insn_data[icode].operand[0].mode;
16375 mode0 = insn_data[icode].operand[1].mode;
16376 mode1 = insn_data[icode].operand[2].mode;
16378 op0 = force_reg (mode0, op0);
16379 op1 = gen_rtx_MEM (mode1, copy_to_mode_reg (Pmode, op1));
16380 if (optimize || target == 0
16381 || GET_MODE (target) != tmode
16382 || !register_operand (target, tmode))
16383 target = gen_reg_rtx (tmode);
16384 pat = GEN_FCN (icode) (target, op0, op1);
16390 case IX86_BUILTIN_STOREHPS:
16391 case IX86_BUILTIN_STORELPS:
16392 icode = (fcode == IX86_BUILTIN_STOREHPS ? CODE_FOR_sse_storehps
16393 : CODE_FOR_sse_storelps);
16394 arg0 = TREE_VALUE (arglist);
16395 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
16396 op0 = expand_normal (arg0);
16397 op1 = expand_normal (arg1);
16398 mode0 = insn_data[icode].operand[0].mode;
16399 mode1 = insn_data[icode].operand[1].mode;
16401 op0 = gen_rtx_MEM (mode0, copy_to_mode_reg (Pmode, op0));
16402 op1 = force_reg (mode1, op1);
16404 pat = GEN_FCN (icode) (op0, op1);
16410 case IX86_BUILTIN_MOVNTPS:
16411 return ix86_expand_store_builtin (CODE_FOR_sse_movntv4sf, arglist);
16412 case IX86_BUILTIN_MOVNTQ:
16413 return ix86_expand_store_builtin (CODE_FOR_sse_movntdi, arglist);
16415 case IX86_BUILTIN_LDMXCSR:
16416 op0 = expand_normal (TREE_VALUE (arglist));
16417 target = assign_386_stack_local (SImode, SLOT_VIRTUAL);
16418 emit_move_insn (target, op0);
16419 emit_insn (gen_sse_ldmxcsr (target));
16422 case IX86_BUILTIN_STMXCSR:
16423 target = assign_386_stack_local (SImode, SLOT_VIRTUAL);
16424 emit_insn (gen_sse_stmxcsr (target));
16425 return copy_to_mode_reg (SImode, target);
16427 case IX86_BUILTIN_SHUFPS:
16428 case IX86_BUILTIN_SHUFPD:
16429 icode = (fcode == IX86_BUILTIN_SHUFPS
16430 ? CODE_FOR_sse_shufps
16431 : CODE_FOR_sse2_shufpd);
16432 arg0 = TREE_VALUE (arglist);
16433 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
16434 arg2 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist)));
16435 op0 = expand_normal (arg0);
16436 op1 = expand_normal (arg1);
16437 op2 = expand_normal (arg2);
16438 tmode = insn_data[icode].operand[0].mode;
16439 mode0 = insn_data[icode].operand[1].mode;
16440 mode1 = insn_data[icode].operand[2].mode;
16441 mode2 = insn_data[icode].operand[3].mode;
16443 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
16444 op0 = copy_to_mode_reg (mode0, op0);
16445 if ((optimize && !register_operand (op1, mode1))
16446 || !(*insn_data[icode].operand[2].predicate) (op1, mode1))
16447 op1 = copy_to_mode_reg (mode1, op1);
16448 if (! (*insn_data[icode].operand[3].predicate) (op2, mode2))
16450 /* @@@ better error message */
16451 error ("mask must be an immediate");
16452 return gen_reg_rtx (tmode);
16454 if (optimize || target == 0
16455 || GET_MODE (target) != tmode
16456 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
16457 target = gen_reg_rtx (tmode);
16458 pat = GEN_FCN (icode) (target, op0, op1, op2);
16464 case IX86_BUILTIN_PSHUFW:
16465 case IX86_BUILTIN_PSHUFD:
16466 case IX86_BUILTIN_PSHUFHW:
16467 case IX86_BUILTIN_PSHUFLW:
16468 icode = ( fcode == IX86_BUILTIN_PSHUFHW ? CODE_FOR_sse2_pshufhw
16469 : fcode == IX86_BUILTIN_PSHUFLW ? CODE_FOR_sse2_pshuflw
16470 : fcode == IX86_BUILTIN_PSHUFD ? CODE_FOR_sse2_pshufd
16471 : CODE_FOR_mmx_pshufw);
16472 arg0 = TREE_VALUE (arglist);
16473 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
16474 op0 = expand_normal (arg0);
16475 op1 = expand_normal (arg1);
16476 tmode = insn_data[icode].operand[0].mode;
16477 mode1 = insn_data[icode].operand[1].mode;
16478 mode2 = insn_data[icode].operand[2].mode;
16480 if (! (*insn_data[icode].operand[1].predicate) (op0, mode1))
16481 op0 = copy_to_mode_reg (mode1, op0);
16482 if (! (*insn_data[icode].operand[2].predicate) (op1, mode2))
16484 /* @@@ better error message */
16485 error ("mask must be an immediate");
16489 || GET_MODE (target) != tmode
16490 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
16491 target = gen_reg_rtx (tmode);
16492 pat = GEN_FCN (icode) (target, op0, op1);
16498 case IX86_BUILTIN_PSLLWI128:
16499 icode = CODE_FOR_ashlv8hi3;
16501 case IX86_BUILTIN_PSLLDI128:
16502 icode = CODE_FOR_ashlv4si3;
16504 case IX86_BUILTIN_PSLLQI128:
16505 icode = CODE_FOR_ashlv2di3;
16507 case IX86_BUILTIN_PSRAWI128:
16508 icode = CODE_FOR_ashrv8hi3;
16510 case IX86_BUILTIN_PSRADI128:
16511 icode = CODE_FOR_ashrv4si3;
16513 case IX86_BUILTIN_PSRLWI128:
16514 icode = CODE_FOR_lshrv8hi3;
16516 case IX86_BUILTIN_PSRLDI128:
16517 icode = CODE_FOR_lshrv4si3;
16519 case IX86_BUILTIN_PSRLQI128:
16520 icode = CODE_FOR_lshrv2di3;
16523 arg0 = TREE_VALUE (arglist);
16524 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
16525 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
16526 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
16528 if (GET_CODE (op1) != CONST_INT)
16530 error ("shift must be an immediate");
16533 if (INTVAL (op1) < 0 || INTVAL (op1) > 255)
16534 op1 = GEN_INT (255);
16536 tmode = insn_data[icode].operand[0].mode;
16537 mode1 = insn_data[icode].operand[1].mode;
16538 if (! (*insn_data[icode].operand[1].predicate) (op0, mode1))
16539 op0 = copy_to_reg (op0);
16541 target = gen_reg_rtx (tmode);
16542 pat = GEN_FCN (icode) (target, op0, op1);
16548 case IX86_BUILTIN_PSLLW128:
16549 icode = CODE_FOR_ashlv8hi3;
16551 case IX86_BUILTIN_PSLLD128:
16552 icode = CODE_FOR_ashlv4si3;
16554 case IX86_BUILTIN_PSLLQ128:
16555 icode = CODE_FOR_ashlv2di3;
16557 case IX86_BUILTIN_PSRAW128:
16558 icode = CODE_FOR_ashrv8hi3;
16560 case IX86_BUILTIN_PSRAD128:
16561 icode = CODE_FOR_ashrv4si3;
16563 case IX86_BUILTIN_PSRLW128:
16564 icode = CODE_FOR_lshrv8hi3;
16566 case IX86_BUILTIN_PSRLD128:
16567 icode = CODE_FOR_lshrv4si3;
16569 case IX86_BUILTIN_PSRLQ128:
16570 icode = CODE_FOR_lshrv2di3;
16573 arg0 = TREE_VALUE (arglist);
16574 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
16575 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
16576 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
16578 tmode = insn_data[icode].operand[0].mode;
16579 mode1 = insn_data[icode].operand[1].mode;
16581 if (! (*insn_data[icode].operand[1].predicate) (op0, mode1))
16582 op0 = copy_to_reg (op0);
16584 op1 = simplify_gen_subreg (TImode, op1, GET_MODE (op1), 0);
16585 if (! (*insn_data[icode].operand[2].predicate) (op1, TImode))
16586 op1 = copy_to_reg (op1);
16588 target = gen_reg_rtx (tmode);
16589 pat = GEN_FCN (icode) (target, op0, op1);
16595 case IX86_BUILTIN_PSLLDQI128:
16596 case IX86_BUILTIN_PSRLDQI128:
16597 icode = (fcode == IX86_BUILTIN_PSLLDQI128 ? CODE_FOR_sse2_ashlti3
16598 : CODE_FOR_sse2_lshrti3);
16599 arg0 = TREE_VALUE (arglist);
16600 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
16601 op0 = expand_normal (arg0);
16602 op1 = expand_normal (arg1);
16603 tmode = insn_data[icode].operand[0].mode;
16604 mode1 = insn_data[icode].operand[1].mode;
16605 mode2 = insn_data[icode].operand[2].mode;
16607 if (! (*insn_data[icode].operand[1].predicate) (op0, mode1))
16609 op0 = copy_to_reg (op0);
16610 op0 = simplify_gen_subreg (mode1, op0, GET_MODE (op0), 0);
16612 if (! (*insn_data[icode].operand[2].predicate) (op1, mode2))
16614 error ("shift must be an immediate");
16617 target = gen_reg_rtx (V2DImode);
16618 pat = GEN_FCN (icode) (simplify_gen_subreg (tmode, target, V2DImode, 0),
16625 case IX86_BUILTIN_FEMMS:
16626 emit_insn (gen_mmx_femms ());
16629 case IX86_BUILTIN_PAVGUSB:
16630 return ix86_expand_binop_builtin (CODE_FOR_mmx_uavgv8qi3, arglist, target);
16632 case IX86_BUILTIN_PF2ID:
16633 return ix86_expand_unop_builtin (CODE_FOR_mmx_pf2id, arglist, target, 0);
16635 case IX86_BUILTIN_PFACC:
16636 return ix86_expand_binop_builtin (CODE_FOR_mmx_haddv2sf3, arglist, target);
16638 case IX86_BUILTIN_PFADD:
16639 return ix86_expand_binop_builtin (CODE_FOR_mmx_addv2sf3, arglist, target);
16641 case IX86_BUILTIN_PFCMPEQ:
16642 return ix86_expand_binop_builtin (CODE_FOR_mmx_eqv2sf3, arglist, target);
16644 case IX86_BUILTIN_PFCMPGE:
16645 return ix86_expand_binop_builtin (CODE_FOR_mmx_gev2sf3, arglist, target);
16647 case IX86_BUILTIN_PFCMPGT:
16648 return ix86_expand_binop_builtin (CODE_FOR_mmx_gtv2sf3, arglist, target);
16650 case IX86_BUILTIN_PFMAX:
16651 return ix86_expand_binop_builtin (CODE_FOR_mmx_smaxv2sf3, arglist, target);
16653 case IX86_BUILTIN_PFMIN:
16654 return ix86_expand_binop_builtin (CODE_FOR_mmx_sminv2sf3, arglist, target);
16656 case IX86_BUILTIN_PFMUL:
16657 return ix86_expand_binop_builtin (CODE_FOR_mmx_mulv2sf3, arglist, target);
16659 case IX86_BUILTIN_PFRCP:
16660 return ix86_expand_unop_builtin (CODE_FOR_mmx_rcpv2sf2, arglist, target, 0);
16662 case IX86_BUILTIN_PFRCPIT1:
16663 return ix86_expand_binop_builtin (CODE_FOR_mmx_rcpit1v2sf3, arglist, target);
16665 case IX86_BUILTIN_PFRCPIT2:
16666 return ix86_expand_binop_builtin (CODE_FOR_mmx_rcpit2v2sf3, arglist, target);
16668 case IX86_BUILTIN_PFRSQIT1:
16669 return ix86_expand_binop_builtin (CODE_FOR_mmx_rsqit1v2sf3, arglist, target);
16671 case IX86_BUILTIN_PFRSQRT:
16672 return ix86_expand_unop_builtin (CODE_FOR_mmx_rsqrtv2sf2, arglist, target, 0);
16674 case IX86_BUILTIN_PFSUB:
16675 return ix86_expand_binop_builtin (CODE_FOR_mmx_subv2sf3, arglist, target);
16677 case IX86_BUILTIN_PFSUBR:
16678 return ix86_expand_binop_builtin (CODE_FOR_mmx_subrv2sf3, arglist, target);
16680 case IX86_BUILTIN_PI2FD:
16681 return ix86_expand_unop_builtin (CODE_FOR_mmx_floatv2si2, arglist, target, 0);
16683 case IX86_BUILTIN_PMULHRW:
16684 return ix86_expand_binop_builtin (CODE_FOR_mmx_pmulhrwv4hi3, arglist, target);
16686 case IX86_BUILTIN_PF2IW:
16687 return ix86_expand_unop_builtin (CODE_FOR_mmx_pf2iw, arglist, target, 0);
16689 case IX86_BUILTIN_PFNACC:
16690 return ix86_expand_binop_builtin (CODE_FOR_mmx_hsubv2sf3, arglist, target);
16692 case IX86_BUILTIN_PFPNACC:
16693 return ix86_expand_binop_builtin (CODE_FOR_mmx_addsubv2sf3, arglist, target);
16695 case IX86_BUILTIN_PI2FW:
16696 return ix86_expand_unop_builtin (CODE_FOR_mmx_pi2fw, arglist, target, 0);
16698 case IX86_BUILTIN_PSWAPDSI:
16699 return ix86_expand_unop_builtin (CODE_FOR_mmx_pswapdv2si2, arglist, target, 0);
16701 case IX86_BUILTIN_PSWAPDSF:
16702 return ix86_expand_unop_builtin (CODE_FOR_mmx_pswapdv2sf2, arglist, target, 0);
16704 case IX86_BUILTIN_SQRTSD:
16705 return ix86_expand_unop1_builtin (CODE_FOR_sse2_vmsqrtv2df2, arglist, target);
16706 case IX86_BUILTIN_LOADUPD:
16707 return ix86_expand_unop_builtin (CODE_FOR_sse2_movupd, arglist, target, 1);
16708 case IX86_BUILTIN_STOREUPD:
16709 return ix86_expand_store_builtin (CODE_FOR_sse2_movupd, arglist);
16711 case IX86_BUILTIN_MFENCE:
16712 emit_insn (gen_sse2_mfence ());
16714 case IX86_BUILTIN_LFENCE:
16715 emit_insn (gen_sse2_lfence ());
16718 case IX86_BUILTIN_CLFLUSH:
16719 arg0 = TREE_VALUE (arglist);
16720 op0 = expand_normal (arg0);
16721 icode = CODE_FOR_sse2_clflush;
16722 if (! (*insn_data[icode].operand[0].predicate) (op0, Pmode))
16723 op0 = copy_to_mode_reg (Pmode, op0);
16725 emit_insn (gen_sse2_clflush (op0));
16728 case IX86_BUILTIN_MOVNTPD:
16729 return ix86_expand_store_builtin (CODE_FOR_sse2_movntv2df, arglist);
16730 case IX86_BUILTIN_MOVNTDQ:
16731 return ix86_expand_store_builtin (CODE_FOR_sse2_movntv2di, arglist);
16732 case IX86_BUILTIN_MOVNTI:
16733 return ix86_expand_store_builtin (CODE_FOR_sse2_movntsi, arglist);
16735 case IX86_BUILTIN_LOADDQU:
16736 return ix86_expand_unop_builtin (CODE_FOR_sse2_movdqu, arglist, target, 1);
16737 case IX86_BUILTIN_STOREDQU:
16738 return ix86_expand_store_builtin (CODE_FOR_sse2_movdqu, arglist);
16740 case IX86_BUILTIN_MONITOR:
16741 arg0 = TREE_VALUE (arglist);
16742 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
16743 arg2 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist)));
16744 op0 = expand_normal (arg0);
16745 op1 = expand_normal (arg1);
16746 op2 = expand_normal (arg2);
16748 op0 = copy_to_mode_reg (Pmode, op0);
16750 op1 = copy_to_mode_reg (SImode, op1);
16752 op2 = copy_to_mode_reg (SImode, op2);
16754 emit_insn (gen_sse3_monitor (op0, op1, op2));
16756 emit_insn (gen_sse3_monitor64 (op0, op1, op2));
16759 case IX86_BUILTIN_MWAIT:
16760 arg0 = TREE_VALUE (arglist);
16761 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
16762 op0 = expand_normal (arg0);
16763 op1 = expand_normal (arg1);
16765 op0 = copy_to_mode_reg (SImode, op0);
16767 op1 = copy_to_mode_reg (SImode, op1);
16768 emit_insn (gen_sse3_mwait (op0, op1));
16771 case IX86_BUILTIN_LDDQU:
16772 return ix86_expand_unop_builtin (CODE_FOR_sse3_lddqu, arglist,
16775 case IX86_BUILTIN_PALIGNR:
16776 case IX86_BUILTIN_PALIGNR128:
16777 if (fcode == IX86_BUILTIN_PALIGNR)
16779 icode = CODE_FOR_ssse3_palignrdi;
16784 icode = CODE_FOR_ssse3_palignrti;
16787 arg0 = TREE_VALUE (arglist);
16788 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
16789 arg2 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist)));
16790 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
16791 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
16792 op2 = expand_expr (arg2, NULL_RTX, VOIDmode, 0);
16793 tmode = insn_data[icode].operand[0].mode;
16794 mode1 = insn_data[icode].operand[1].mode;
16795 mode2 = insn_data[icode].operand[2].mode;
16796 mode3 = insn_data[icode].operand[3].mode;
16798 if (! (*insn_data[icode].operand[1].predicate) (op0, mode1))
16800 op0 = copy_to_reg (op0);
16801 op0 = simplify_gen_subreg (mode1, op0, GET_MODE (op0), 0);
16803 if (! (*insn_data[icode].operand[2].predicate) (op1, mode2))
16805 op1 = copy_to_reg (op1);
16806 op1 = simplify_gen_subreg (mode2, op1, GET_MODE (op1), 0);
16808 if (! (*insn_data[icode].operand[3].predicate) (op2, mode3))
16810 error ("shift must be an immediate");
16813 target = gen_reg_rtx (mode);
16814 pat = GEN_FCN (icode) (simplify_gen_subreg (tmode, target, mode, 0),
16821 case IX86_BUILTIN_VEC_INIT_V2SI:
16822 case IX86_BUILTIN_VEC_INIT_V4HI:
16823 case IX86_BUILTIN_VEC_INIT_V8QI:
16824 return ix86_expand_vec_init_builtin (TREE_TYPE (exp), arglist, target);
16826 case IX86_BUILTIN_VEC_EXT_V2DF:
16827 case IX86_BUILTIN_VEC_EXT_V2DI:
16828 case IX86_BUILTIN_VEC_EXT_V4SF:
16829 case IX86_BUILTIN_VEC_EXT_V4SI:
16830 case IX86_BUILTIN_VEC_EXT_V8HI:
16831 case IX86_BUILTIN_VEC_EXT_V16QI:
16832 case IX86_BUILTIN_VEC_EXT_V2SI:
16833 case IX86_BUILTIN_VEC_EXT_V4HI:
16834 return ix86_expand_vec_ext_builtin (arglist, target);
16836 case IX86_BUILTIN_VEC_SET_V8HI:
16837 case IX86_BUILTIN_VEC_SET_V4HI:
16838 return ix86_expand_vec_set_builtin (arglist);
16844 for (i = 0, d = bdesc_2arg; i < ARRAY_SIZE (bdesc_2arg); i++, d++)
16845 if (d->code == fcode)
16847 /* Compares are treated specially. */
16848 if (d->icode == CODE_FOR_sse_maskcmpv4sf3
16849 || d->icode == CODE_FOR_sse_vmmaskcmpv4sf3
16850 || d->icode == CODE_FOR_sse2_maskcmpv2df3
16851 || d->icode == CODE_FOR_sse2_vmmaskcmpv2df3)
16852 return ix86_expand_sse_compare (d, arglist, target);
16854 return ix86_expand_binop_builtin (d->icode, arglist, target);
16857 for (i = 0, d = bdesc_1arg; i < ARRAY_SIZE (bdesc_1arg); i++, d++)
16858 if (d->code == fcode)
16859 return ix86_expand_unop_builtin (d->icode, arglist, target, 0);
16861 for (i = 0, d = bdesc_comi; i < ARRAY_SIZE (bdesc_comi); i++, d++)
16862 if (d->code == fcode)
16863 return ix86_expand_sse_comi (d, arglist, target);
16865 gcc_unreachable ();
16868 /* Store OPERAND to the memory after reload is completed. This means
16869 that we can't easily use assign_stack_local. */
16871 ix86_force_to_memory (enum machine_mode mode, rtx operand)
16875 gcc_assert (reload_completed);
16876 if (TARGET_RED_ZONE)
16878 result = gen_rtx_MEM (mode,
16879 gen_rtx_PLUS (Pmode,
16881 GEN_INT (-RED_ZONE_SIZE)));
16882 emit_move_insn (result, operand);
16884 else if (!TARGET_RED_ZONE && TARGET_64BIT)
16890 operand = gen_lowpart (DImode, operand);
16894 gen_rtx_SET (VOIDmode,
16895 gen_rtx_MEM (DImode,
16896 gen_rtx_PRE_DEC (DImode,
16897 stack_pointer_rtx)),
16901 gcc_unreachable ();
16903 result = gen_rtx_MEM (mode, stack_pointer_rtx);
16912 split_di (&operand, 1, operands, operands + 1);
16914 gen_rtx_SET (VOIDmode,
16915 gen_rtx_MEM (SImode,
16916 gen_rtx_PRE_DEC (Pmode,
16917 stack_pointer_rtx)),
16920 gen_rtx_SET (VOIDmode,
16921 gen_rtx_MEM (SImode,
16922 gen_rtx_PRE_DEC (Pmode,
16923 stack_pointer_rtx)),
16928 /* Store HImodes as SImodes. */
16929 operand = gen_lowpart (SImode, operand);
16933 gen_rtx_SET (VOIDmode,
16934 gen_rtx_MEM (GET_MODE (operand),
16935 gen_rtx_PRE_DEC (SImode,
16936 stack_pointer_rtx)),
16940 gcc_unreachable ();
16942 result = gen_rtx_MEM (mode, stack_pointer_rtx);
16947 /* Free operand from the memory. */
16949 ix86_free_from_memory (enum machine_mode mode)
16951 if (!TARGET_RED_ZONE)
16955 if (mode == DImode || TARGET_64BIT)
16959 /* Use LEA to deallocate stack space. In peephole2 it will be converted
16960 to pop or add instruction if registers are available. */
16961 emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx,
16962 gen_rtx_PLUS (Pmode, stack_pointer_rtx,
16967 /* Put float CONST_DOUBLE in the constant pool instead of fp regs.
16968 QImode must go into class Q_REGS.
16969 Narrow ALL_REGS to GENERAL_REGS. This supports allowing movsf and
16970 movdf to do mem-to-mem moves through integer regs. */
16972 ix86_preferred_reload_class (rtx x, enum reg_class class)
16974 enum machine_mode mode = GET_MODE (x);
16976 /* We're only allowed to return a subclass of CLASS. Many of the
16977 following checks fail for NO_REGS, so eliminate that early. */
16978 if (class == NO_REGS)
16981 /* All classes can load zeros. */
16982 if (x == CONST0_RTX (mode))
16985 /* Force constants into memory if we are loading a (nonzero) constant into
16986 an MMX or SSE register. This is because there are no MMX/SSE instructions
16987 to load from a constant. */
16989 && (MAYBE_MMX_CLASS_P (class) || MAYBE_SSE_CLASS_P (class)))
16992 /* Prefer SSE regs only, if we can use them for math. */
16993 if (TARGET_SSE_MATH && !TARGET_MIX_SSE_I387 && SSE_FLOAT_MODE_P (mode))
16994 return SSE_CLASS_P (class) ? class : NO_REGS;
16996 /* Floating-point constants need more complex checks. */
16997 if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) != VOIDmode)
16999 /* General regs can load everything. */
17000 if (reg_class_subset_p (class, GENERAL_REGS))
17003 /* Floats can load 0 and 1 plus some others. Note that we eliminated
17004 zero above. We only want to wind up preferring 80387 registers if
17005 we plan on doing computation with them. */
17007 && standard_80387_constant_p (x))
17009 /* Limit class to non-sse. */
17010 if (class == FLOAT_SSE_REGS)
17012 if (class == FP_TOP_SSE_REGS)
17014 if (class == FP_SECOND_SSE_REGS)
17015 return FP_SECOND_REG;
17016 if (class == FLOAT_INT_REGS || class == FLOAT_REGS)
17023 /* Generally when we see PLUS here, it's the function invariant
17024 (plus soft-fp const_int). Which can only be computed into general
17026 if (GET_CODE (x) == PLUS)
17027 return reg_class_subset_p (class, GENERAL_REGS) ? class : NO_REGS;
17029 /* QImode constants are easy to load, but non-constant QImode data
17030 must go into Q_REGS. */
17031 if (GET_MODE (x) == QImode && !CONSTANT_P (x))
17033 if (reg_class_subset_p (class, Q_REGS))
17035 if (reg_class_subset_p (Q_REGS, class))
17043 /* Discourage putting floating-point values in SSE registers unless
17044 SSE math is being used, and likewise for the 387 registers. */
17046 ix86_preferred_output_reload_class (rtx x, enum reg_class class)
17048 enum machine_mode mode = GET_MODE (x);
17050 /* Restrict the output reload class to the register bank that we are doing
17051 math on. If we would like not to return a subset of CLASS, reject this
17052 alternative: if reload cannot do this, it will still use its choice. */
17053 mode = GET_MODE (x);
17054 if (TARGET_SSE_MATH && SSE_FLOAT_MODE_P (mode))
17055 return MAYBE_SSE_CLASS_P (class) ? SSE_REGS : NO_REGS;
17057 if (TARGET_80387 && SCALAR_FLOAT_MODE_P (mode))
17059 if (class == FP_TOP_SSE_REGS)
17061 else if (class == FP_SECOND_SSE_REGS)
17062 return FP_SECOND_REG;
17064 return FLOAT_CLASS_P (class) ? class : NO_REGS;
17070 /* If we are copying between general and FP registers, we need a memory
17071 location. The same is true for SSE and MMX registers.
17073 The macro can't work reliably when one of the CLASSES is class containing
17074 registers from multiple units (SSE, MMX, integer). We avoid this by never
17075 combining those units in single alternative in the machine description.
17076 Ensure that this constraint holds to avoid unexpected surprises.
17078 When STRICT is false, we are being called from REGISTER_MOVE_COST, so do not
17079 enforce these sanity checks. */
17082 ix86_secondary_memory_needed (enum reg_class class1, enum reg_class class2,
17083 enum machine_mode mode, int strict)
17085 if (MAYBE_FLOAT_CLASS_P (class1) != FLOAT_CLASS_P (class1)
17086 || MAYBE_FLOAT_CLASS_P (class2) != FLOAT_CLASS_P (class2)
17087 || MAYBE_SSE_CLASS_P (class1) != SSE_CLASS_P (class1)
17088 || MAYBE_SSE_CLASS_P (class2) != SSE_CLASS_P (class2)
17089 || MAYBE_MMX_CLASS_P (class1) != MMX_CLASS_P (class1)
17090 || MAYBE_MMX_CLASS_P (class2) != MMX_CLASS_P (class2))
17092 gcc_assert (!strict);
17096 if (FLOAT_CLASS_P (class1) != FLOAT_CLASS_P (class2))
17099 /* ??? This is a lie. We do have moves between mmx/general, and for
17100 mmx/sse2. But by saying we need secondary memory we discourage the
17101 register allocator from using the mmx registers unless needed. */
17102 if (MMX_CLASS_P (class1) != MMX_CLASS_P (class2))
17105 if (SSE_CLASS_P (class1) != SSE_CLASS_P (class2))
17107 /* SSE1 doesn't have any direct moves from other classes. */
17111 /* If the target says that inter-unit moves are more expensive
17112 than moving through memory, then don't generate them. */
17113 if (!TARGET_INTER_UNIT_MOVES && !optimize_size)
17116 /* Between SSE and general, we have moves no larger than word size. */
17117 if (GET_MODE_SIZE (mode) > UNITS_PER_WORD)
17120 /* ??? For the cost of one register reformat penalty, we could use
17121 the same instructions to move SFmode and DFmode data, but the
17122 relevant move patterns don't support those alternatives. */
17123 if (mode == SFmode || mode == DFmode)
17130 /* Return true if the registers in CLASS cannot represent the change from
17131 modes FROM to TO. */
17134 ix86_cannot_change_mode_class (enum machine_mode from, enum machine_mode to,
17135 enum reg_class class)
17140 /* x87 registers can't do subreg at all, as all values are reformatted
17141 to extended precision. */
17142 if (MAYBE_FLOAT_CLASS_P (class))
17145 if (MAYBE_SSE_CLASS_P (class) || MAYBE_MMX_CLASS_P (class))
17147 /* Vector registers do not support QI or HImode loads. If we don't
17148 disallow a change to these modes, reload will assume it's ok to
17149 drop the subreg from (subreg:SI (reg:HI 100) 0). This affects
17150 the vec_dupv4hi pattern. */
17151 if (GET_MODE_SIZE (from) < 4)
17154 /* Vector registers do not support subreg with nonzero offsets, which
17155 are otherwise valid for integer registers. Since we can't see
17156 whether we have a nonzero offset from here, prohibit all
17157 nonparadoxical subregs changing size. */
17158 if (GET_MODE_SIZE (to) < GET_MODE_SIZE (from))
17165 /* Return the cost of moving data from a register in class CLASS1 to
17166 one in class CLASS2.
17168 It is not required that the cost always equal 2 when FROM is the same as TO;
17169 on some machines it is expensive to move between registers if they are not
17170 general registers. */
17173 ix86_register_move_cost (enum machine_mode mode, enum reg_class class1,
17174 enum reg_class class2)
17176 /* In case we require secondary memory, compute cost of the store followed
17177 by load. In order to avoid bad register allocation choices, we need
17178 for this to be *at least* as high as the symmetric MEMORY_MOVE_COST. */
17180 if (ix86_secondary_memory_needed (class1, class2, mode, 0))
17184 cost += MAX (MEMORY_MOVE_COST (mode, class1, 0),
17185 MEMORY_MOVE_COST (mode, class1, 1));
17186 cost += MAX (MEMORY_MOVE_COST (mode, class2, 0),
17187 MEMORY_MOVE_COST (mode, class2, 1));
17189 /* In case of copying from general_purpose_register we may emit multiple
17190 stores followed by single load causing memory size mismatch stall.
17191 Count this as arbitrarily high cost of 20. */
17192 if (CLASS_MAX_NREGS (class1, mode) > CLASS_MAX_NREGS (class2, mode))
17195 /* In the case of FP/MMX moves, the registers actually overlap, and we
17196 have to switch modes in order to treat them differently. */
17197 if ((MMX_CLASS_P (class1) && MAYBE_FLOAT_CLASS_P (class2))
17198 || (MMX_CLASS_P (class2) && MAYBE_FLOAT_CLASS_P (class1)))
17204 /* Moves between SSE/MMX and integer unit are expensive. */
17205 if (MMX_CLASS_P (class1) != MMX_CLASS_P (class2)
17206 || SSE_CLASS_P (class1) != SSE_CLASS_P (class2))
17207 return ix86_cost->mmxsse_to_integer;
17208 if (MAYBE_FLOAT_CLASS_P (class1))
17209 return ix86_cost->fp_move;
17210 if (MAYBE_SSE_CLASS_P (class1))
17211 return ix86_cost->sse_move;
17212 if (MAYBE_MMX_CLASS_P (class1))
17213 return ix86_cost->mmx_move;
17217 /* Return 1 if hard register REGNO can hold a value of machine-mode MODE. */
17220 ix86_hard_regno_mode_ok (int regno, enum machine_mode mode)
17222 /* Flags and only flags can only hold CCmode values. */
17223 if (CC_REGNO_P (regno))
17224 return GET_MODE_CLASS (mode) == MODE_CC;
17225 if (GET_MODE_CLASS (mode) == MODE_CC
17226 || GET_MODE_CLASS (mode) == MODE_RANDOM
17227 || GET_MODE_CLASS (mode) == MODE_PARTIAL_INT)
17229 if (FP_REGNO_P (regno))
17230 return VALID_FP_MODE_P (mode);
17231 if (SSE_REGNO_P (regno))
17233 /* We implement the move patterns for all vector modes into and
17234 out of SSE registers, even when no operation instructions
17236 return (VALID_SSE_REG_MODE (mode)
17237 || VALID_SSE2_REG_MODE (mode)
17238 || VALID_MMX_REG_MODE (mode)
17239 || VALID_MMX_REG_MODE_3DNOW (mode));
17241 if (MMX_REGNO_P (regno))
17243 /* We implement the move patterns for 3DNOW modes even in MMX mode,
17244 so if the register is available at all, then we can move data of
17245 the given mode into or out of it. */
17246 return (VALID_MMX_REG_MODE (mode)
17247 || VALID_MMX_REG_MODE_3DNOW (mode));
17250 if (mode == QImode)
17252 /* Take care for QImode values - they can be in non-QI regs,
17253 but then they do cause partial register stalls. */
17254 if (regno < 4 || TARGET_64BIT)
17256 if (!TARGET_PARTIAL_REG_STALL)
17258 return reload_in_progress || reload_completed;
17260 /* We handle both integer and floats in the general purpose registers. */
17261 else if (VALID_INT_MODE_P (mode))
17263 else if (VALID_FP_MODE_P (mode))
17265 /* Lots of MMX code casts 8 byte vector modes to DImode. If we then go
17266 on to use that value in smaller contexts, this can easily force a
17267 pseudo to be allocated to GENERAL_REGS. Since this is no worse than
17268 supporting DImode, allow it. */
17269 else if (VALID_MMX_REG_MODE_3DNOW (mode) || VALID_MMX_REG_MODE (mode))
17275 /* A subroutine of ix86_modes_tieable_p. Return true if MODE is a
17276 tieable integer mode. */
17279 ix86_tieable_integer_mode_p (enum machine_mode mode)
17288 return TARGET_64BIT || !TARGET_PARTIAL_REG_STALL;
17291 return TARGET_64BIT;
17298 /* Return true if MODE1 is accessible in a register that can hold MODE2
17299 without copying. That is, all register classes that can hold MODE2
17300 can also hold MODE1. */
17303 ix86_modes_tieable_p (enum machine_mode mode1, enum machine_mode mode2)
17305 if (mode1 == mode2)
17308 if (ix86_tieable_integer_mode_p (mode1)
17309 && ix86_tieable_integer_mode_p (mode2))
17312 /* MODE2 being XFmode implies fp stack or general regs, which means we
17313 can tie any smaller floating point modes to it. Note that we do not
17314 tie this with TFmode. */
17315 if (mode2 == XFmode)
17316 return mode1 == SFmode || mode1 == DFmode;
17318 /* MODE2 being DFmode implies fp stack, general or sse regs, which means
17319 that we can tie it with SFmode. */
17320 if (mode2 == DFmode)
17321 return mode1 == SFmode;
17323 /* If MODE2 is only appropriate for an SSE register, then tie with
17324 any other mode acceptable to SSE registers. */
17325 if (GET_MODE_SIZE (mode2) >= 8
17326 && ix86_hard_regno_mode_ok (FIRST_SSE_REG, mode2))
17327 return ix86_hard_regno_mode_ok (FIRST_SSE_REG, mode1);
17329 /* If MODE2 is appropriate for an MMX (or SSE) register, then tie
17330 with any other mode acceptable to MMX registers. */
17331 if (GET_MODE_SIZE (mode2) == 8
17332 && ix86_hard_regno_mode_ok (FIRST_MMX_REG, mode2))
17333 return ix86_hard_regno_mode_ok (FIRST_MMX_REG, mode1);
17338 /* Return the cost of moving data of mode M between a
17339 register and memory. A value of 2 is the default; this cost is
17340 relative to those in `REGISTER_MOVE_COST'.
17342 If moving between registers and memory is more expensive than
17343 between two registers, you should define this macro to express the
17346 Model also increased moving costs of QImode registers in non
17350 ix86_memory_move_cost (enum machine_mode mode, enum reg_class class, int in)
17352 if (FLOAT_CLASS_P (class))
17369 return in ? ix86_cost->fp_load [index] : ix86_cost->fp_store [index];
17371 if (SSE_CLASS_P (class))
17374 switch (GET_MODE_SIZE (mode))
17388 return in ? ix86_cost->sse_load [index] : ix86_cost->sse_store [index];
17390 if (MMX_CLASS_P (class))
17393 switch (GET_MODE_SIZE (mode))
17404 return in ? ix86_cost->mmx_load [index] : ix86_cost->mmx_store [index];
17406 switch (GET_MODE_SIZE (mode))
17410 return (Q_CLASS_P (class) ? ix86_cost->int_load[0]
17411 : ix86_cost->movzbl_load);
17413 return (Q_CLASS_P (class) ? ix86_cost->int_store[0]
17414 : ix86_cost->int_store[0] + 4);
17417 return in ? ix86_cost->int_load[1] : ix86_cost->int_store[1];
17419 /* Compute number of 32bit moves needed. TFmode is moved as XFmode. */
17420 if (mode == TFmode)
17422 return ((in ? ix86_cost->int_load[2] : ix86_cost->int_store[2])
17423 * (((int) GET_MODE_SIZE (mode)
17424 + UNITS_PER_WORD - 1) / UNITS_PER_WORD));
17428 /* Compute a (partial) cost for rtx X. Return true if the complete
17429 cost has been computed, and false if subexpressions should be
17430 scanned. In either case, *TOTAL contains the cost result. */
17433 ix86_rtx_costs (rtx x, int code, int outer_code, int *total)
17435 enum machine_mode mode = GET_MODE (x);
17443 if (TARGET_64BIT && !x86_64_immediate_operand (x, VOIDmode))
17445 else if (TARGET_64BIT && !x86_64_zext_immediate_operand (x, VOIDmode))
17447 else if (flag_pic && SYMBOLIC_CONST (x)
17449 || (!GET_CODE (x) != LABEL_REF
17450 && (GET_CODE (x) != SYMBOL_REF
17451 || !SYMBOL_REF_LOCAL_P (x)))))
17458 if (mode == VOIDmode)
17461 switch (standard_80387_constant_p (x))
17466 default: /* Other constants */
17471 /* Start with (MEM (SYMBOL_REF)), since that's where
17472 it'll probably end up. Add a penalty for size. */
17473 *total = (COSTS_N_INSNS (1)
17474 + (flag_pic != 0 && !TARGET_64BIT)
17475 + (mode == SFmode ? 0 : mode == DFmode ? 1 : 2));
17481 /* The zero extensions is often completely free on x86_64, so make
17482 it as cheap as possible. */
17483 if (TARGET_64BIT && mode == DImode
17484 && GET_MODE (XEXP (x, 0)) == SImode)
17486 else if (TARGET_ZERO_EXTEND_WITH_AND)
17487 *total = ix86_cost->add;
17489 *total = ix86_cost->movzx;
17493 *total = ix86_cost->movsx;
17497 if (GET_CODE (XEXP (x, 1)) == CONST_INT
17498 && (GET_MODE (XEXP (x, 0)) != DImode || TARGET_64BIT))
17500 HOST_WIDE_INT value = INTVAL (XEXP (x, 1));
17503 *total = ix86_cost->add;
17506 if ((value == 2 || value == 3)
17507 && ix86_cost->lea <= ix86_cost->shift_const)
17509 *total = ix86_cost->lea;
17519 if (!TARGET_64BIT && GET_MODE (XEXP (x, 0)) == DImode)
17521 if (GET_CODE (XEXP (x, 1)) == CONST_INT)
17523 if (INTVAL (XEXP (x, 1)) > 32)
17524 *total = ix86_cost->shift_const + COSTS_N_INSNS (2);
17526 *total = ix86_cost->shift_const * 2;
17530 if (GET_CODE (XEXP (x, 1)) == AND)
17531 *total = ix86_cost->shift_var * 2;
17533 *total = ix86_cost->shift_var * 6 + COSTS_N_INSNS (2);
17538 if (GET_CODE (XEXP (x, 1)) == CONST_INT)
17539 *total = ix86_cost->shift_const;
17541 *total = ix86_cost->shift_var;
17546 if (FLOAT_MODE_P (mode))
17548 *total = ix86_cost->fmul;
17553 rtx op0 = XEXP (x, 0);
17554 rtx op1 = XEXP (x, 1);
17556 if (GET_CODE (XEXP (x, 1)) == CONST_INT)
17558 unsigned HOST_WIDE_INT value = INTVAL (XEXP (x, 1));
17559 for (nbits = 0; value != 0; value &= value - 1)
17563 /* This is arbitrary. */
17566 /* Compute costs correctly for widening multiplication. */
17567 if ((GET_CODE (op0) == SIGN_EXTEND || GET_CODE (op1) == ZERO_EXTEND)
17568 && GET_MODE_SIZE (GET_MODE (XEXP (op0, 0))) * 2
17569 == GET_MODE_SIZE (mode))
17571 int is_mulwiden = 0;
17572 enum machine_mode inner_mode = GET_MODE (op0);
17574 if (GET_CODE (op0) == GET_CODE (op1))
17575 is_mulwiden = 1, op1 = XEXP (op1, 0);
17576 else if (GET_CODE (op1) == CONST_INT)
17578 if (GET_CODE (op0) == SIGN_EXTEND)
17579 is_mulwiden = trunc_int_for_mode (INTVAL (op1), inner_mode)
17582 is_mulwiden = !(INTVAL (op1) & ~GET_MODE_MASK (inner_mode));
17586 op0 = XEXP (op0, 0), mode = GET_MODE (op0);
17589 *total = (ix86_cost->mult_init[MODE_INDEX (mode)]
17590 + nbits * ix86_cost->mult_bit
17591 + rtx_cost (op0, outer_code) + rtx_cost (op1, outer_code));
17600 if (FLOAT_MODE_P (mode))
17601 *total = ix86_cost->fdiv;
17603 *total = ix86_cost->divide[MODE_INDEX (mode)];
17607 if (FLOAT_MODE_P (mode))
17608 *total = ix86_cost->fadd;
17609 else if (GET_MODE_CLASS (mode) == MODE_INT
17610 && GET_MODE_BITSIZE (mode) <= GET_MODE_BITSIZE (Pmode))
17612 if (GET_CODE (XEXP (x, 0)) == PLUS
17613 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
17614 && GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 1)) == CONST_INT
17615 && CONSTANT_P (XEXP (x, 1)))
17617 HOST_WIDE_INT val = INTVAL (XEXP (XEXP (XEXP (x, 0), 0), 1));
17618 if (val == 2 || val == 4 || val == 8)
17620 *total = ix86_cost->lea;
17621 *total += rtx_cost (XEXP (XEXP (x, 0), 1), outer_code);
17622 *total += rtx_cost (XEXP (XEXP (XEXP (x, 0), 0), 0),
17624 *total += rtx_cost (XEXP (x, 1), outer_code);
17628 else if (GET_CODE (XEXP (x, 0)) == MULT
17629 && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT)
17631 HOST_WIDE_INT val = INTVAL (XEXP (XEXP (x, 0), 1));
17632 if (val == 2 || val == 4 || val == 8)
17634 *total = ix86_cost->lea;
17635 *total += rtx_cost (XEXP (XEXP (x, 0), 0), outer_code);
17636 *total += rtx_cost (XEXP (x, 1), outer_code);
17640 else if (GET_CODE (XEXP (x, 0)) == PLUS)
17642 *total = ix86_cost->lea;
17643 *total += rtx_cost (XEXP (XEXP (x, 0), 0), outer_code);
17644 *total += rtx_cost (XEXP (XEXP (x, 0), 1), outer_code);
17645 *total += rtx_cost (XEXP (x, 1), outer_code);
17652 if (FLOAT_MODE_P (mode))
17654 *total = ix86_cost->fadd;
17662 if (!TARGET_64BIT && mode == DImode)
17664 *total = (ix86_cost->add * 2
17665 + (rtx_cost (XEXP (x, 0), outer_code)
17666 << (GET_MODE (XEXP (x, 0)) != DImode))
17667 + (rtx_cost (XEXP (x, 1), outer_code)
17668 << (GET_MODE (XEXP (x, 1)) != DImode)));
17674 if (FLOAT_MODE_P (mode))
17676 *total = ix86_cost->fchs;
17682 if (!TARGET_64BIT && mode == DImode)
17683 *total = ix86_cost->add * 2;
17685 *total = ix86_cost->add;
17689 if (GET_CODE (XEXP (x, 0)) == ZERO_EXTRACT
17690 && XEXP (XEXP (x, 0), 1) == const1_rtx
17691 && GET_CODE (XEXP (XEXP (x, 0), 2)) == CONST_INT
17692 && XEXP (x, 1) == const0_rtx)
17694 /* This kind of construct is implemented using test[bwl].
17695 Treat it as if we had an AND. */
17696 *total = (ix86_cost->add
17697 + rtx_cost (XEXP (XEXP (x, 0), 0), outer_code)
17698 + rtx_cost (const1_rtx, outer_code));
17704 if (!TARGET_SSE_MATH
17706 || (mode == DFmode && !TARGET_SSE2))
17707 /* For standard 80387 constants, raise the cost to prevent
17708 compress_float_constant() to generate load from memory. */
17709 switch (standard_80387_constant_p (XEXP (x, 0)))
17719 *total = (x86_ext_80387_constants & TUNEMASK
17726 if (FLOAT_MODE_P (mode))
17727 *total = ix86_cost->fabs;
17731 if (FLOAT_MODE_P (mode))
17732 *total = ix86_cost->fsqrt;
17736 if (XINT (x, 1) == UNSPEC_TP)
17747 static int current_machopic_label_num;
17749 /* Given a symbol name and its associated stub, write out the
17750 definition of the stub. */
17753 machopic_output_stub (FILE *file, const char *symb, const char *stub)
17755 unsigned int length;
17756 char *binder_name, *symbol_name, lazy_ptr_name[32];
17757 int label = ++current_machopic_label_num;
17759 /* For 64-bit we shouldn't get here. */
17760 gcc_assert (!TARGET_64BIT);
17762 /* Lose our funky encoding stuff so it doesn't contaminate the stub. */
17763 symb = (*targetm.strip_name_encoding) (symb);
17765 length = strlen (stub);
17766 binder_name = alloca (length + 32);
17767 GEN_BINDER_NAME_FOR_STUB (binder_name, stub, length);
17769 length = strlen (symb);
17770 symbol_name = alloca (length + 32);
17771 GEN_SYMBOL_NAME_FOR_SYMBOL (symbol_name, symb, length);
17773 sprintf (lazy_ptr_name, "L%d$lz", label);
17776 switch_to_section (darwin_sections[machopic_picsymbol_stub_section]);
17778 switch_to_section (darwin_sections[machopic_symbol_stub_section]);
17780 fprintf (file, "%s:\n", stub);
17781 fprintf (file, "\t.indirect_symbol %s\n", symbol_name);
17785 fprintf (file, "\tcall\tLPC$%d\nLPC$%d:\tpopl\t%%eax\n", label, label);
17786 fprintf (file, "\tmovl\t%s-LPC$%d(%%eax),%%edx\n", lazy_ptr_name, label);
17787 fprintf (file, "\tjmp\t*%%edx\n");
17790 fprintf (file, "\tjmp\t*%s\n", lazy_ptr_name);
17792 fprintf (file, "%s:\n", binder_name);
17796 fprintf (file, "\tlea\t%s-LPC$%d(%%eax),%%eax\n", lazy_ptr_name, label);
17797 fprintf (file, "\tpushl\t%%eax\n");
17800 fprintf (file, "\tpushl\t$%s\n", lazy_ptr_name);
17802 fprintf (file, "\tjmp\tdyld_stub_binding_helper\n");
17804 switch_to_section (darwin_sections[machopic_lazy_symbol_ptr_section]);
17805 fprintf (file, "%s:\n", lazy_ptr_name);
17806 fprintf (file, "\t.indirect_symbol %s\n", symbol_name);
17807 fprintf (file, "\t.long %s\n", binder_name);
17811 darwin_x86_file_end (void)
17813 darwin_file_end ();
17816 #endif /* TARGET_MACHO */
17818 /* Order the registers for register allocator. */
17821 x86_order_regs_for_local_alloc (void)
17826 /* First allocate the local general purpose registers. */
17827 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
17828 if (GENERAL_REGNO_P (i) && call_used_regs[i])
17829 reg_alloc_order [pos++] = i;
17831 /* Global general purpose registers. */
17832 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
17833 if (GENERAL_REGNO_P (i) && !call_used_regs[i])
17834 reg_alloc_order [pos++] = i;
17836 /* x87 registers come first in case we are doing FP math
17838 if (!TARGET_SSE_MATH)
17839 for (i = FIRST_STACK_REG; i <= LAST_STACK_REG; i++)
17840 reg_alloc_order [pos++] = i;
17842 /* SSE registers. */
17843 for (i = FIRST_SSE_REG; i <= LAST_SSE_REG; i++)
17844 reg_alloc_order [pos++] = i;
17845 for (i = FIRST_REX_SSE_REG; i <= LAST_REX_SSE_REG; i++)
17846 reg_alloc_order [pos++] = i;
17848 /* x87 registers. */
17849 if (TARGET_SSE_MATH)
17850 for (i = FIRST_STACK_REG; i <= LAST_STACK_REG; i++)
17851 reg_alloc_order [pos++] = i;
17853 for (i = FIRST_MMX_REG; i <= LAST_MMX_REG; i++)
17854 reg_alloc_order [pos++] = i;
17856 /* Initialize the rest of array as we do not allocate some registers
17858 while (pos < FIRST_PSEUDO_REGISTER)
17859 reg_alloc_order [pos++] = 0;
17862 /* Handle a "ms_struct" or "gcc_struct" attribute; arguments as in
17863 struct attribute_spec.handler. */
17865 ix86_handle_struct_attribute (tree *node, tree name,
17866 tree args ATTRIBUTE_UNUSED,
17867 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
17870 if (DECL_P (*node))
17872 if (TREE_CODE (*node) == TYPE_DECL)
17873 type = &TREE_TYPE (*node);
17878 if (!(type && (TREE_CODE (*type) == RECORD_TYPE
17879 || TREE_CODE (*type) == UNION_TYPE)))
17881 warning (OPT_Wattributes, "%qs attribute ignored",
17882 IDENTIFIER_POINTER (name));
17883 *no_add_attrs = true;
17886 else if ((is_attribute_p ("ms_struct", name)
17887 && lookup_attribute ("gcc_struct", TYPE_ATTRIBUTES (*type)))
17888 || ((is_attribute_p ("gcc_struct", name)
17889 && lookup_attribute ("ms_struct", TYPE_ATTRIBUTES (*type)))))
17891 warning (OPT_Wattributes, "%qs incompatible attribute ignored",
17892 IDENTIFIER_POINTER (name));
17893 *no_add_attrs = true;
17900 ix86_ms_bitfield_layout_p (tree record_type)
17902 return (TARGET_MS_BITFIELD_LAYOUT &&
17903 !lookup_attribute ("gcc_struct", TYPE_ATTRIBUTES (record_type)))
17904 || lookup_attribute ("ms_struct", TYPE_ATTRIBUTES (record_type));
17907 /* Returns an expression indicating where the this parameter is
17908 located on entry to the FUNCTION. */
17911 x86_this_parameter (tree function)
17913 tree type = TREE_TYPE (function);
17917 int n = aggregate_value_p (TREE_TYPE (type), type) != 0;
17918 return gen_rtx_REG (DImode, x86_64_int_parameter_registers[n]);
17921 if (ix86_function_regparm (type, function) > 0)
17925 parm = TYPE_ARG_TYPES (type);
17926 /* Figure out whether or not the function has a variable number of
17928 for (; parm; parm = TREE_CHAIN (parm))
17929 if (TREE_VALUE (parm) == void_type_node)
17931 /* If not, the this parameter is in the first argument. */
17935 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type)))
17937 return gen_rtx_REG (SImode, regno);
17941 if (aggregate_value_p (TREE_TYPE (type), type))
17942 return gen_rtx_MEM (SImode, plus_constant (stack_pointer_rtx, 8));
17944 return gen_rtx_MEM (SImode, plus_constant (stack_pointer_rtx, 4));
17947 /* Determine whether x86_output_mi_thunk can succeed. */
17950 x86_can_output_mi_thunk (tree thunk ATTRIBUTE_UNUSED,
17951 HOST_WIDE_INT delta ATTRIBUTE_UNUSED,
17952 HOST_WIDE_INT vcall_offset, tree function)
17954 /* 64-bit can handle anything. */
17958 /* For 32-bit, everything's fine if we have one free register. */
17959 if (ix86_function_regparm (TREE_TYPE (function), function) < 3)
17962 /* Need a free register for vcall_offset. */
17966 /* Need a free register for GOT references. */
17967 if (flag_pic && !(*targetm.binds_local_p) (function))
17970 /* Otherwise ok. */
17974 /* Output the assembler code for a thunk function. THUNK_DECL is the
17975 declaration for the thunk function itself, FUNCTION is the decl for
17976 the target function. DELTA is an immediate constant offset to be
17977 added to THIS. If VCALL_OFFSET is nonzero, the word at
17978 *(*this + vcall_offset) should be added to THIS. */
17981 x86_output_mi_thunk (FILE *file ATTRIBUTE_UNUSED,
17982 tree thunk ATTRIBUTE_UNUSED, HOST_WIDE_INT delta,
17983 HOST_WIDE_INT vcall_offset, tree function)
17986 rtx this = x86_this_parameter (function);
17989 /* If VCALL_OFFSET, we'll need THIS in a register. Might as well
17990 pull it in now and let DELTA benefit. */
17993 else if (vcall_offset)
17995 /* Put the this parameter into %eax. */
17997 xops[1] = this_reg = gen_rtx_REG (Pmode, 0);
17998 output_asm_insn ("mov{l}\t{%0, %1|%1, %0}", xops);
18001 this_reg = NULL_RTX;
18003 /* Adjust the this parameter by a fixed constant. */
18006 xops[0] = GEN_INT (delta);
18007 xops[1] = this_reg ? this_reg : this;
18010 if (!x86_64_general_operand (xops[0], DImode))
18012 tmp = gen_rtx_REG (DImode, FIRST_REX_INT_REG + 2 /* R10 */);
18014 output_asm_insn ("mov{q}\t{%1, %0|%0, %1}", xops);
18018 output_asm_insn ("add{q}\t{%0, %1|%1, %0}", xops);
18021 output_asm_insn ("add{l}\t{%0, %1|%1, %0}", xops);
18024 /* Adjust the this parameter by a value stored in the vtable. */
18028 tmp = gen_rtx_REG (DImode, FIRST_REX_INT_REG + 2 /* R10 */);
18031 int tmp_regno = 2 /* ECX */;
18032 if (lookup_attribute ("fastcall",
18033 TYPE_ATTRIBUTES (TREE_TYPE (function))))
18034 tmp_regno = 0 /* EAX */;
18035 tmp = gen_rtx_REG (SImode, tmp_regno);
18038 xops[0] = gen_rtx_MEM (Pmode, this_reg);
18041 output_asm_insn ("mov{q}\t{%0, %1|%1, %0}", xops);
18043 output_asm_insn ("mov{l}\t{%0, %1|%1, %0}", xops);
18045 /* Adjust the this parameter. */
18046 xops[0] = gen_rtx_MEM (Pmode, plus_constant (tmp, vcall_offset));
18047 if (TARGET_64BIT && !memory_operand (xops[0], Pmode))
18049 rtx tmp2 = gen_rtx_REG (DImode, FIRST_REX_INT_REG + 3 /* R11 */);
18050 xops[0] = GEN_INT (vcall_offset);
18052 output_asm_insn ("mov{q}\t{%0, %1|%1, %0}", xops);
18053 xops[0] = gen_rtx_MEM (Pmode, gen_rtx_PLUS (Pmode, tmp, tmp2));
18055 xops[1] = this_reg;
18057 output_asm_insn ("add{q}\t{%0, %1|%1, %0}", xops);
18059 output_asm_insn ("add{l}\t{%0, %1|%1, %0}", xops);
18062 /* If necessary, drop THIS back to its stack slot. */
18063 if (this_reg && this_reg != this)
18065 xops[0] = this_reg;
18067 output_asm_insn ("mov{l}\t{%0, %1|%1, %0}", xops);
18070 xops[0] = XEXP (DECL_RTL (function), 0);
18073 if (!flag_pic || (*targetm.binds_local_p) (function))
18074 output_asm_insn ("jmp\t%P0", xops);
18077 tmp = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, xops[0]), UNSPEC_GOTPCREL);
18078 tmp = gen_rtx_CONST (Pmode, tmp);
18079 tmp = gen_rtx_MEM (QImode, tmp);
18081 output_asm_insn ("jmp\t%A0", xops);
18086 if (!flag_pic || (*targetm.binds_local_p) (function))
18087 output_asm_insn ("jmp\t%P0", xops);
18092 rtx sym_ref = XEXP (DECL_RTL (function), 0);
18093 tmp = (gen_rtx_SYMBOL_REF
18095 machopic_indirection_name (sym_ref, /*stub_p=*/true)));
18096 tmp = gen_rtx_MEM (QImode, tmp);
18098 output_asm_insn ("jmp\t%0", xops);
18101 #endif /* TARGET_MACHO */
18103 tmp = gen_rtx_REG (SImode, 2 /* ECX */);
18104 output_set_got (tmp, NULL_RTX);
18107 output_asm_insn ("mov{l}\t{%0@GOT(%1), %1|%1, %0@GOT[%1]}", xops);
18108 output_asm_insn ("jmp\t{*}%1", xops);
18114 x86_file_start (void)
18116 default_file_start ();
18118 darwin_file_start ();
18120 if (X86_FILE_START_VERSION_DIRECTIVE)
18121 fputs ("\t.version\t\"01.01\"\n", asm_out_file);
18122 if (X86_FILE_START_FLTUSED)
18123 fputs ("\t.global\t__fltused\n", asm_out_file);
18124 if (ix86_asm_dialect == ASM_INTEL)
18125 fputs ("\t.intel_syntax\n", asm_out_file);
18129 x86_field_alignment (tree field, int computed)
18131 enum machine_mode mode;
18132 tree type = TREE_TYPE (field);
18134 if (TARGET_64BIT || TARGET_ALIGN_DOUBLE)
18136 mode = TYPE_MODE (TREE_CODE (type) == ARRAY_TYPE
18137 ? get_inner_array_type (type) : type);
18138 if (mode == DFmode || mode == DCmode
18139 || GET_MODE_CLASS (mode) == MODE_INT
18140 || GET_MODE_CLASS (mode) == MODE_COMPLEX_INT)
18141 return MIN (32, computed);
18145 /* Output assembler code to FILE to increment profiler label # LABELNO
18146 for profiling a function entry. */
18148 x86_function_profiler (FILE *file, int labelno ATTRIBUTE_UNUSED)
18153 #ifndef NO_PROFILE_COUNTERS
18154 fprintf (file, "\tleaq\t%sP%d@(%%rip),%%r11\n", LPREFIX, labelno);
18156 fprintf (file, "\tcall\t*%s@GOTPCREL(%%rip)\n", MCOUNT_NAME);
18160 #ifndef NO_PROFILE_COUNTERS
18161 fprintf (file, "\tmovq\t$%sP%d,%%r11\n", LPREFIX, labelno);
18163 fprintf (file, "\tcall\t%s\n", MCOUNT_NAME);
18167 #ifndef NO_PROFILE_COUNTERS
18168 fprintf (file, "\tleal\t%sP%d@GOTOFF(%%ebx),%%%s\n",
18169 LPREFIX, labelno, PROFILE_COUNT_REGISTER);
18171 fprintf (file, "\tcall\t*%s@GOT(%%ebx)\n", MCOUNT_NAME);
18175 #ifndef NO_PROFILE_COUNTERS
18176 fprintf (file, "\tmovl\t$%sP%d,%%%s\n", LPREFIX, labelno,
18177 PROFILE_COUNT_REGISTER);
18179 fprintf (file, "\tcall\t%s\n", MCOUNT_NAME);
18183 /* We don't have exact information about the insn sizes, but we may assume
18184 quite safely that we are informed about all 1 byte insns and memory
18185 address sizes. This is enough to eliminate unnecessary padding in
18189 min_insn_size (rtx insn)
18193 if (!INSN_P (insn) || !active_insn_p (insn))
18196 /* Discard alignments we've emit and jump instructions. */
18197 if (GET_CODE (PATTERN (insn)) == UNSPEC_VOLATILE
18198 && XINT (PATTERN (insn), 1) == UNSPECV_ALIGN)
18200 if (GET_CODE (insn) == JUMP_INSN
18201 && (GET_CODE (PATTERN (insn)) == ADDR_VEC
18202 || GET_CODE (PATTERN (insn)) == ADDR_DIFF_VEC))
18205 /* Important case - calls are always 5 bytes.
18206 It is common to have many calls in the row. */
18207 if (GET_CODE (insn) == CALL_INSN
18208 && symbolic_reference_mentioned_p (PATTERN (insn))
18209 && !SIBLING_CALL_P (insn))
18211 if (get_attr_length (insn) <= 1)
18214 /* For normal instructions we may rely on the sizes of addresses
18215 and the presence of symbol to require 4 bytes of encoding.
18216 This is not the case for jumps where references are PC relative. */
18217 if (GET_CODE (insn) != JUMP_INSN)
18219 l = get_attr_length_address (insn);
18220 if (l < 4 && symbolic_reference_mentioned_p (PATTERN (insn)))
18229 /* AMD K8 core mispredicts jumps when there are more than 3 jumps in 16 byte
18233 ix86_avoid_jump_misspredicts (void)
18235 rtx insn, start = get_insns ();
18236 int nbytes = 0, njumps = 0;
18239 /* Look for all minimal intervals of instructions containing 4 jumps.
18240 The intervals are bounded by START and INSN. NBYTES is the total
18241 size of instructions in the interval including INSN and not including
18242 START. When the NBYTES is smaller than 16 bytes, it is possible
18243 that the end of START and INSN ends up in the same 16byte page.
18245 The smallest offset in the page INSN can start is the case where START
18246 ends on the offset 0. Offset of INSN is then NBYTES - sizeof (INSN).
18247 We add p2align to 16byte window with maxskip 17 - NBYTES + sizeof (INSN).
18249 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
18252 nbytes += min_insn_size (insn);
18254 fprintf(dump_file, "Insn %i estimated to %i bytes\n",
18255 INSN_UID (insn), min_insn_size (insn));
18256 if ((GET_CODE (insn) == JUMP_INSN
18257 && GET_CODE (PATTERN (insn)) != ADDR_VEC
18258 && GET_CODE (PATTERN (insn)) != ADDR_DIFF_VEC)
18259 || GET_CODE (insn) == CALL_INSN)
18266 start = NEXT_INSN (start);
18267 if ((GET_CODE (start) == JUMP_INSN
18268 && GET_CODE (PATTERN (start)) != ADDR_VEC
18269 && GET_CODE (PATTERN (start)) != ADDR_DIFF_VEC)
18270 || GET_CODE (start) == CALL_INSN)
18271 njumps--, isjump = 1;
18274 nbytes -= min_insn_size (start);
18276 gcc_assert (njumps >= 0);
18278 fprintf (dump_file, "Interval %i to %i has %i bytes\n",
18279 INSN_UID (start), INSN_UID (insn), nbytes);
18281 if (njumps == 3 && isjump && nbytes < 16)
18283 int padsize = 15 - nbytes + min_insn_size (insn);
18286 fprintf (dump_file, "Padding insn %i by %i bytes!\n",
18287 INSN_UID (insn), padsize);
18288 emit_insn_before (gen_align (GEN_INT (padsize)), insn);
18293 /* AMD Athlon works faster
18294 when RET is not destination of conditional jump or directly preceded
18295 by other jump instruction. We avoid the penalty by inserting NOP just
18296 before the RET instructions in such cases. */
18298 ix86_pad_returns (void)
18303 FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR->preds)
18305 basic_block bb = e->src;
18306 rtx ret = BB_END (bb);
18308 bool replace = false;
18310 if (GET_CODE (ret) != JUMP_INSN || GET_CODE (PATTERN (ret)) != RETURN
18311 || !maybe_hot_bb_p (bb))
18313 for (prev = PREV_INSN (ret); prev; prev = PREV_INSN (prev))
18314 if (active_insn_p (prev) || GET_CODE (prev) == CODE_LABEL)
18316 if (prev && GET_CODE (prev) == CODE_LABEL)
18321 FOR_EACH_EDGE (e, ei, bb->preds)
18322 if (EDGE_FREQUENCY (e) && e->src->index >= 0
18323 && !(e->flags & EDGE_FALLTHRU))
18328 prev = prev_active_insn (ret);
18330 && ((GET_CODE (prev) == JUMP_INSN && any_condjump_p (prev))
18331 || GET_CODE (prev) == CALL_INSN))
18333 /* Empty functions get branch mispredict even when the jump destination
18334 is not visible to us. */
18335 if (!prev && cfun->function_frequency > FUNCTION_FREQUENCY_UNLIKELY_EXECUTED)
18340 emit_insn_before (gen_return_internal_long (), ret);
18346 /* Implement machine specific optimizations. We implement padding of returns
18347 for K8 CPUs and pass to avoid 4 jumps in the single 16 byte window. */
18351 if (TARGET_PAD_RETURNS && optimize && !optimize_size)
18352 ix86_pad_returns ();
18353 if (TARGET_FOUR_JUMP_LIMIT && optimize && !optimize_size)
18354 ix86_avoid_jump_misspredicts ();
18357 /* Return nonzero when QImode register that must be represented via REX prefix
18360 x86_extended_QIreg_mentioned_p (rtx insn)
18363 extract_insn_cached (insn);
18364 for (i = 0; i < recog_data.n_operands; i++)
18365 if (REG_P (recog_data.operand[i])
18366 && REGNO (recog_data.operand[i]) >= 4)
18371 /* Return nonzero when P points to register encoded via REX prefix.
18372 Called via for_each_rtx. */
18374 extended_reg_mentioned_1 (rtx *p, void *data ATTRIBUTE_UNUSED)
18376 unsigned int regno;
18379 regno = REGNO (*p);
18380 return REX_INT_REGNO_P (regno) || REX_SSE_REGNO_P (regno);
18383 /* Return true when INSN mentions register that must be encoded using REX
18386 x86_extended_reg_mentioned_p (rtx insn)
18388 return for_each_rtx (&PATTERN (insn), extended_reg_mentioned_1, NULL);
18391 /* Generate an unsigned DImode/SImode to FP conversion. This is the same code
18392 optabs would emit if we didn't have TFmode patterns. */
18395 x86_emit_floatuns (rtx operands[2])
18397 rtx neglab, donelab, i0, i1, f0, in, out;
18398 enum machine_mode mode, inmode;
18400 inmode = GET_MODE (operands[1]);
18401 gcc_assert (inmode == SImode || inmode == DImode);
18404 in = force_reg (inmode, operands[1]);
18405 mode = GET_MODE (out);
18406 neglab = gen_label_rtx ();
18407 donelab = gen_label_rtx ();
18408 i1 = gen_reg_rtx (Pmode);
18409 f0 = gen_reg_rtx (mode);
18411 emit_cmp_and_jump_insns (in, const0_rtx, LT, const0_rtx, Pmode, 0, neglab);
18413 emit_insn (gen_rtx_SET (VOIDmode, out, gen_rtx_FLOAT (mode, in)));
18414 emit_jump_insn (gen_jump (donelab));
18417 emit_label (neglab);
18419 i0 = expand_simple_binop (Pmode, LSHIFTRT, in, const1_rtx, NULL, 1, OPTAB_DIRECT);
18420 i1 = expand_simple_binop (Pmode, AND, in, const1_rtx, NULL, 1, OPTAB_DIRECT);
18421 i0 = expand_simple_binop (Pmode, IOR, i0, i1, i0, 1, OPTAB_DIRECT);
18422 expand_float (f0, i0, 0);
18423 emit_insn (gen_rtx_SET (VOIDmode, out, gen_rtx_PLUS (mode, f0, f0)));
18425 emit_label (donelab);
18428 /* A subroutine of ix86_expand_vector_init. Store into TARGET a vector
18429 with all elements equal to VAR. Return true if successful. */
18432 ix86_expand_vector_init_duplicate (bool mmx_ok, enum machine_mode mode,
18433 rtx target, rtx val)
18435 enum machine_mode smode, wsmode, wvmode;
18450 val = force_reg (GET_MODE_INNER (mode), val);
18451 x = gen_rtx_VEC_DUPLICATE (mode, val);
18452 emit_insn (gen_rtx_SET (VOIDmode, target, x));
18458 if (TARGET_SSE || TARGET_3DNOW_A)
18460 val = gen_lowpart (SImode, val);
18461 x = gen_rtx_TRUNCATE (HImode, val);
18462 x = gen_rtx_VEC_DUPLICATE (mode, x);
18463 emit_insn (gen_rtx_SET (VOIDmode, target, x));
18485 /* Extend HImode to SImode using a paradoxical SUBREG. */
18486 tmp1 = gen_reg_rtx (SImode);
18487 emit_move_insn (tmp1, gen_lowpart (SImode, val));
18488 /* Insert the SImode value as low element of V4SImode vector. */
18489 tmp2 = gen_reg_rtx (V4SImode);
18490 tmp1 = gen_rtx_VEC_MERGE (V4SImode,
18491 gen_rtx_VEC_DUPLICATE (V4SImode, tmp1),
18492 CONST0_RTX (V4SImode),
18494 emit_insn (gen_rtx_SET (VOIDmode, tmp2, tmp1));
18495 /* Cast the V4SImode vector back to a V8HImode vector. */
18496 tmp1 = gen_reg_rtx (V8HImode);
18497 emit_move_insn (tmp1, gen_lowpart (V8HImode, tmp2));
18498 /* Duplicate the low short through the whole low SImode word. */
18499 emit_insn (gen_sse2_punpcklwd (tmp1, tmp1, tmp1));
18500 /* Cast the V8HImode vector back to a V4SImode vector. */
18501 tmp2 = gen_reg_rtx (V4SImode);
18502 emit_move_insn (tmp2, gen_lowpart (V4SImode, tmp1));
18503 /* Replicate the low element of the V4SImode vector. */
18504 emit_insn (gen_sse2_pshufd (tmp2, tmp2, const0_rtx));
18505 /* Cast the V2SImode back to V8HImode, and store in target. */
18506 emit_move_insn (target, gen_lowpart (V8HImode, tmp2));
18517 /* Extend QImode to SImode using a paradoxical SUBREG. */
18518 tmp1 = gen_reg_rtx (SImode);
18519 emit_move_insn (tmp1, gen_lowpart (SImode, val));
18520 /* Insert the SImode value as low element of V4SImode vector. */
18521 tmp2 = gen_reg_rtx (V4SImode);
18522 tmp1 = gen_rtx_VEC_MERGE (V4SImode,
18523 gen_rtx_VEC_DUPLICATE (V4SImode, tmp1),
18524 CONST0_RTX (V4SImode),
18526 emit_insn (gen_rtx_SET (VOIDmode, tmp2, tmp1));
18527 /* Cast the V4SImode vector back to a V16QImode vector. */
18528 tmp1 = gen_reg_rtx (V16QImode);
18529 emit_move_insn (tmp1, gen_lowpart (V16QImode, tmp2));
18530 /* Duplicate the low byte through the whole low SImode word. */
18531 emit_insn (gen_sse2_punpcklbw (tmp1, tmp1, tmp1));
18532 emit_insn (gen_sse2_punpcklbw (tmp1, tmp1, tmp1));
18533 /* Cast the V16QImode vector back to a V4SImode vector. */
18534 tmp2 = gen_reg_rtx (V4SImode);
18535 emit_move_insn (tmp2, gen_lowpart (V4SImode, tmp1));
18536 /* Replicate the low element of the V4SImode vector. */
18537 emit_insn (gen_sse2_pshufd (tmp2, tmp2, const0_rtx));
18538 /* Cast the V2SImode back to V16QImode, and store in target. */
18539 emit_move_insn (target, gen_lowpart (V16QImode, tmp2));
18547 /* Replicate the value once into the next wider mode and recurse. */
18548 val = convert_modes (wsmode, smode, val, true);
18549 x = expand_simple_binop (wsmode, ASHIFT, val,
18550 GEN_INT (GET_MODE_BITSIZE (smode)),
18551 NULL_RTX, 1, OPTAB_LIB_WIDEN);
18552 val = expand_simple_binop (wsmode, IOR, val, x, x, 1, OPTAB_LIB_WIDEN);
18554 x = gen_reg_rtx (wvmode);
18555 if (!ix86_expand_vector_init_duplicate (mmx_ok, wvmode, x, val))
18556 gcc_unreachable ();
18557 emit_move_insn (target, gen_lowpart (mode, x));
18565 /* A subroutine of ix86_expand_vector_init. Store into TARGET a vector
18566 whose ONE_VAR element is VAR, and other elements are zero. Return true
18570 ix86_expand_vector_init_one_nonzero (bool mmx_ok, enum machine_mode mode,
18571 rtx target, rtx var, int one_var)
18573 enum machine_mode vsimode;
18589 var = force_reg (GET_MODE_INNER (mode), var);
18590 x = gen_rtx_VEC_CONCAT (mode, var, CONST0_RTX (GET_MODE_INNER (mode)));
18591 emit_insn (gen_rtx_SET (VOIDmode, target, x));
18596 if (!REG_P (target) || REGNO (target) < FIRST_PSEUDO_REGISTER)
18597 new_target = gen_reg_rtx (mode);
18599 new_target = target;
18600 var = force_reg (GET_MODE_INNER (mode), var);
18601 x = gen_rtx_VEC_DUPLICATE (mode, var);
18602 x = gen_rtx_VEC_MERGE (mode, x, CONST0_RTX (mode), const1_rtx);
18603 emit_insn (gen_rtx_SET (VOIDmode, new_target, x));
18606 /* We need to shuffle the value to the correct position, so
18607 create a new pseudo to store the intermediate result. */
18609 /* With SSE2, we can use the integer shuffle insns. */
18610 if (mode != V4SFmode && TARGET_SSE2)
18612 emit_insn (gen_sse2_pshufd_1 (new_target, new_target,
18614 GEN_INT (one_var == 1 ? 0 : 1),
18615 GEN_INT (one_var == 2 ? 0 : 1),
18616 GEN_INT (one_var == 3 ? 0 : 1)));
18617 if (target != new_target)
18618 emit_move_insn (target, new_target);
18622 /* Otherwise convert the intermediate result to V4SFmode and
18623 use the SSE1 shuffle instructions. */
18624 if (mode != V4SFmode)
18626 tmp = gen_reg_rtx (V4SFmode);
18627 emit_move_insn (tmp, gen_lowpart (V4SFmode, new_target));
18632 emit_insn (gen_sse_shufps_1 (tmp, tmp, tmp,
18634 GEN_INT (one_var == 1 ? 0 : 1),
18635 GEN_INT (one_var == 2 ? 0+4 : 1+4),
18636 GEN_INT (one_var == 3 ? 0+4 : 1+4)));
18638 if (mode != V4SFmode)
18639 emit_move_insn (target, gen_lowpart (V4SImode, tmp));
18640 else if (tmp != target)
18641 emit_move_insn (target, tmp);
18643 else if (target != new_target)
18644 emit_move_insn (target, new_target);
18649 vsimode = V4SImode;
18655 vsimode = V2SImode;
18661 /* Zero extend the variable element to SImode and recurse. */
18662 var = convert_modes (SImode, GET_MODE_INNER (mode), var, true);
18664 x = gen_reg_rtx (vsimode);
18665 if (!ix86_expand_vector_init_one_nonzero (mmx_ok, vsimode, x,
18667 gcc_unreachable ();
18669 emit_move_insn (target, gen_lowpart (mode, x));
18677 /* A subroutine of ix86_expand_vector_init. Store into TARGET a vector
18678 consisting of the values in VALS. It is known that all elements
18679 except ONE_VAR are constants. Return true if successful. */
18682 ix86_expand_vector_init_one_var (bool mmx_ok, enum machine_mode mode,
18683 rtx target, rtx vals, int one_var)
18685 rtx var = XVECEXP (vals, 0, one_var);
18686 enum machine_mode wmode;
18689 const_vec = copy_rtx (vals);
18690 XVECEXP (const_vec, 0, one_var) = CONST0_RTX (GET_MODE_INNER (mode));
18691 const_vec = gen_rtx_CONST_VECTOR (mode, XVEC (const_vec, 0));
18699 /* For the two element vectors, it's just as easy to use
18700 the general case. */
18716 /* There's no way to set one QImode entry easily. Combine
18717 the variable value with its adjacent constant value, and
18718 promote to an HImode set. */
18719 x = XVECEXP (vals, 0, one_var ^ 1);
18722 var = convert_modes (HImode, QImode, var, true);
18723 var = expand_simple_binop (HImode, ASHIFT, var, GEN_INT (8),
18724 NULL_RTX, 1, OPTAB_LIB_WIDEN);
18725 x = GEN_INT (INTVAL (x) & 0xff);
18729 var = convert_modes (HImode, QImode, var, true);
18730 x = gen_int_mode (INTVAL (x) << 8, HImode);
18732 if (x != const0_rtx)
18733 var = expand_simple_binop (HImode, IOR, var, x, var,
18734 1, OPTAB_LIB_WIDEN);
18736 x = gen_reg_rtx (wmode);
18737 emit_move_insn (x, gen_lowpart (wmode, const_vec));
18738 ix86_expand_vector_set (mmx_ok, x, var, one_var >> 1);
18740 emit_move_insn (target, gen_lowpart (mode, x));
18747 emit_move_insn (target, const_vec);
18748 ix86_expand_vector_set (mmx_ok, target, var, one_var);
18752 /* A subroutine of ix86_expand_vector_init. Handle the most general case:
18753 all values variable, and none identical. */
18756 ix86_expand_vector_init_general (bool mmx_ok, enum machine_mode mode,
18757 rtx target, rtx vals)
18759 enum machine_mode half_mode = GET_MODE_INNER (mode);
18760 rtx op0 = NULL, op1 = NULL;
18761 bool use_vec_concat = false;
18767 if (!mmx_ok && !TARGET_SSE)
18773 /* For the two element vectors, we always implement VEC_CONCAT. */
18774 op0 = XVECEXP (vals, 0, 0);
18775 op1 = XVECEXP (vals, 0, 1);
18776 use_vec_concat = true;
18780 half_mode = V2SFmode;
18783 half_mode = V2SImode;
18789 /* For V4SF and V4SI, we implement a concat of two V2 vectors.
18790 Recurse to load the two halves. */
18792 op0 = gen_reg_rtx (half_mode);
18793 v = gen_rtvec (2, XVECEXP (vals, 0, 0), XVECEXP (vals, 0, 1));
18794 ix86_expand_vector_init (false, op0, gen_rtx_PARALLEL (half_mode, v));
18796 op1 = gen_reg_rtx (half_mode);
18797 v = gen_rtvec (2, XVECEXP (vals, 0, 2), XVECEXP (vals, 0, 3));
18798 ix86_expand_vector_init (false, op1, gen_rtx_PARALLEL (half_mode, v));
18800 use_vec_concat = true;
18811 gcc_unreachable ();
18814 if (use_vec_concat)
18816 if (!register_operand (op0, half_mode))
18817 op0 = force_reg (half_mode, op0);
18818 if (!register_operand (op1, half_mode))
18819 op1 = force_reg (half_mode, op1);
18821 emit_insn (gen_rtx_SET (VOIDmode, target,
18822 gen_rtx_VEC_CONCAT (mode, op0, op1)));
18826 int i, j, n_elts, n_words, n_elt_per_word;
18827 enum machine_mode inner_mode;
18828 rtx words[4], shift;
18830 inner_mode = GET_MODE_INNER (mode);
18831 n_elts = GET_MODE_NUNITS (mode);
18832 n_words = GET_MODE_SIZE (mode) / UNITS_PER_WORD;
18833 n_elt_per_word = n_elts / n_words;
18834 shift = GEN_INT (GET_MODE_BITSIZE (inner_mode));
18836 for (i = 0; i < n_words; ++i)
18838 rtx word = NULL_RTX;
18840 for (j = 0; j < n_elt_per_word; ++j)
18842 rtx elt = XVECEXP (vals, 0, (i+1)*n_elt_per_word - j - 1);
18843 elt = convert_modes (word_mode, inner_mode, elt, true);
18849 word = expand_simple_binop (word_mode, ASHIFT, word, shift,
18850 word, 1, OPTAB_LIB_WIDEN);
18851 word = expand_simple_binop (word_mode, IOR, word, elt,
18852 word, 1, OPTAB_LIB_WIDEN);
18860 emit_move_insn (target, gen_lowpart (mode, words[0]));
18861 else if (n_words == 2)
18863 rtx tmp = gen_reg_rtx (mode);
18864 emit_insn (gen_rtx_CLOBBER (VOIDmode, tmp));
18865 emit_move_insn (gen_lowpart (word_mode, tmp), words[0]);
18866 emit_move_insn (gen_highpart (word_mode, tmp), words[1]);
18867 emit_move_insn (target, tmp);
18869 else if (n_words == 4)
18871 rtx tmp = gen_reg_rtx (V4SImode);
18872 vals = gen_rtx_PARALLEL (V4SImode, gen_rtvec_v (4, words));
18873 ix86_expand_vector_init_general (false, V4SImode, tmp, vals);
18874 emit_move_insn (target, gen_lowpart (mode, tmp));
18877 gcc_unreachable ();
18881 /* Initialize vector TARGET via VALS. Suppress the use of MMX
18882 instructions unless MMX_OK is true. */
18885 ix86_expand_vector_init (bool mmx_ok, rtx target, rtx vals)
18887 enum machine_mode mode = GET_MODE (target);
18888 enum machine_mode inner_mode = GET_MODE_INNER (mode);
18889 int n_elts = GET_MODE_NUNITS (mode);
18890 int n_var = 0, one_var = -1;
18891 bool all_same = true, all_const_zero = true;
18895 for (i = 0; i < n_elts; ++i)
18897 x = XVECEXP (vals, 0, i);
18898 if (!CONSTANT_P (x))
18899 n_var++, one_var = i;
18900 else if (x != CONST0_RTX (inner_mode))
18901 all_const_zero = false;
18902 if (i > 0 && !rtx_equal_p (x, XVECEXP (vals, 0, 0)))
18906 /* Constants are best loaded from the constant pool. */
18909 emit_move_insn (target, gen_rtx_CONST_VECTOR (mode, XVEC (vals, 0)));
18913 /* If all values are identical, broadcast the value. */
18915 && ix86_expand_vector_init_duplicate (mmx_ok, mode, target,
18916 XVECEXP (vals, 0, 0)))
18919 /* Values where only one field is non-constant are best loaded from
18920 the pool and overwritten via move later. */
18924 && ix86_expand_vector_init_one_nonzero (mmx_ok, mode, target,
18925 XVECEXP (vals, 0, one_var),
18929 if (ix86_expand_vector_init_one_var (mmx_ok, mode, target, vals, one_var))
18933 ix86_expand_vector_init_general (mmx_ok, mode, target, vals);
18937 ix86_expand_vector_set (bool mmx_ok, rtx target, rtx val, int elt)
18939 enum machine_mode mode = GET_MODE (target);
18940 enum machine_mode inner_mode = GET_MODE_INNER (mode);
18941 bool use_vec_merge = false;
18950 tmp = gen_reg_rtx (GET_MODE_INNER (mode));
18951 ix86_expand_vector_extract (true, tmp, target, 1 - elt);
18953 tmp = gen_rtx_VEC_CONCAT (mode, tmp, val);
18955 tmp = gen_rtx_VEC_CONCAT (mode, val, tmp);
18956 emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
18966 /* For the two element vectors, we implement a VEC_CONCAT with
18967 the extraction of the other element. */
18969 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, GEN_INT (1 - elt)));
18970 tmp = gen_rtx_VEC_SELECT (inner_mode, target, tmp);
18973 op0 = val, op1 = tmp;
18975 op0 = tmp, op1 = val;
18977 tmp = gen_rtx_VEC_CONCAT (mode, op0, op1);
18978 emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
18986 use_vec_merge = true;
18990 /* tmp = target = A B C D */
18991 tmp = copy_to_reg (target);
18992 /* target = A A B B */
18993 emit_insn (gen_sse_unpcklps (target, target, target));
18994 /* target = X A B B */
18995 ix86_expand_vector_set (false, target, val, 0);
18996 /* target = A X C D */
18997 emit_insn (gen_sse_shufps_1 (target, target, tmp,
18998 GEN_INT (1), GEN_INT (0),
18999 GEN_INT (2+4), GEN_INT (3+4)));
19003 /* tmp = target = A B C D */
19004 tmp = copy_to_reg (target);
19005 /* tmp = X B C D */
19006 ix86_expand_vector_set (false, tmp, val, 0);
19007 /* target = A B X D */
19008 emit_insn (gen_sse_shufps_1 (target, target, tmp,
19009 GEN_INT (0), GEN_INT (1),
19010 GEN_INT (0+4), GEN_INT (3+4)));
19014 /* tmp = target = A B C D */
19015 tmp = copy_to_reg (target);
19016 /* tmp = X B C D */
19017 ix86_expand_vector_set (false, tmp, val, 0);
19018 /* target = A B X D */
19019 emit_insn (gen_sse_shufps_1 (target, target, tmp,
19020 GEN_INT (0), GEN_INT (1),
19021 GEN_INT (2+4), GEN_INT (0+4)));
19025 gcc_unreachable ();
19030 /* Element 0 handled by vec_merge below. */
19033 use_vec_merge = true;
19039 /* With SSE2, use integer shuffles to swap element 0 and ELT,
19040 store into element 0, then shuffle them back. */
19044 order[0] = GEN_INT (elt);
19045 order[1] = const1_rtx;
19046 order[2] = const2_rtx;
19047 order[3] = GEN_INT (3);
19048 order[elt] = const0_rtx;
19050 emit_insn (gen_sse2_pshufd_1 (target, target, order[0],
19051 order[1], order[2], order[3]));
19053 ix86_expand_vector_set (false, target, val, 0);
19055 emit_insn (gen_sse2_pshufd_1 (target, target, order[0],
19056 order[1], order[2], order[3]));
19060 /* For SSE1, we have to reuse the V4SF code. */
19061 ix86_expand_vector_set (false, gen_lowpart (V4SFmode, target),
19062 gen_lowpart (SFmode, val), elt);
19067 use_vec_merge = TARGET_SSE2;
19070 use_vec_merge = mmx_ok && (TARGET_SSE || TARGET_3DNOW_A);
19081 tmp = gen_rtx_VEC_DUPLICATE (mode, val);
19082 tmp = gen_rtx_VEC_MERGE (mode, tmp, target, GEN_INT (1 << elt));
19083 emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
19087 rtx mem = assign_stack_temp (mode, GET_MODE_SIZE (mode), false);
19089 emit_move_insn (mem, target);
19091 tmp = adjust_address (mem, inner_mode, elt*GET_MODE_SIZE (inner_mode));
19092 emit_move_insn (tmp, val);
19094 emit_move_insn (target, mem);
19099 ix86_expand_vector_extract (bool mmx_ok, rtx target, rtx vec, int elt)
19101 enum machine_mode mode = GET_MODE (vec);
19102 enum machine_mode inner_mode = GET_MODE_INNER (mode);
19103 bool use_vec_extr = false;
19116 use_vec_extr = true;
19128 tmp = gen_reg_rtx (mode);
19129 emit_insn (gen_sse_shufps_1 (tmp, vec, vec,
19130 GEN_INT (elt), GEN_INT (elt),
19131 GEN_INT (elt+4), GEN_INT (elt+4)));
19135 tmp = gen_reg_rtx (mode);
19136 emit_insn (gen_sse_unpckhps (tmp, vec, vec));
19140 gcc_unreachable ();
19143 use_vec_extr = true;
19158 tmp = gen_reg_rtx (mode);
19159 emit_insn (gen_sse2_pshufd_1 (tmp, vec,
19160 GEN_INT (elt), GEN_INT (elt),
19161 GEN_INT (elt), GEN_INT (elt)));
19165 tmp = gen_reg_rtx (mode);
19166 emit_insn (gen_sse2_punpckhdq (tmp, vec, vec));
19170 gcc_unreachable ();
19173 use_vec_extr = true;
19178 /* For SSE1, we have to reuse the V4SF code. */
19179 ix86_expand_vector_extract (false, gen_lowpart (SFmode, target),
19180 gen_lowpart (V4SFmode, vec), elt);
19186 use_vec_extr = TARGET_SSE2;
19189 use_vec_extr = mmx_ok && (TARGET_SSE || TARGET_3DNOW_A);
19194 /* ??? Could extract the appropriate HImode element and shift. */
19201 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, GEN_INT (elt)));
19202 tmp = gen_rtx_VEC_SELECT (inner_mode, vec, tmp);
19204 /* Let the rtl optimizers know about the zero extension performed. */
19205 if (inner_mode == HImode)
19207 tmp = gen_rtx_ZERO_EXTEND (SImode, tmp);
19208 target = gen_lowpart (SImode, target);
19211 emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
19215 rtx mem = assign_stack_temp (mode, GET_MODE_SIZE (mode), false);
19217 emit_move_insn (mem, vec);
19219 tmp = adjust_address (mem, inner_mode, elt*GET_MODE_SIZE (inner_mode));
19220 emit_move_insn (target, tmp);
19224 /* Expand a vector reduction on V4SFmode for SSE1. FN is the binary
19225 pattern to reduce; DEST is the destination; IN is the input vector. */
19228 ix86_expand_reduc_v4sf (rtx (*fn) (rtx, rtx, rtx), rtx dest, rtx in)
19230 rtx tmp1, tmp2, tmp3;
19232 tmp1 = gen_reg_rtx (V4SFmode);
19233 tmp2 = gen_reg_rtx (V4SFmode);
19234 tmp3 = gen_reg_rtx (V4SFmode);
19236 emit_insn (gen_sse_movhlps (tmp1, in, in));
19237 emit_insn (fn (tmp2, tmp1, in));
19239 emit_insn (gen_sse_shufps_1 (tmp3, tmp2, tmp2,
19240 GEN_INT (1), GEN_INT (1),
19241 GEN_INT (1+4), GEN_INT (1+4)));
19242 emit_insn (fn (dest, tmp2, tmp3));
19245 /* Target hook for scalar_mode_supported_p. */
19247 ix86_scalar_mode_supported_p (enum machine_mode mode)
19249 if (DECIMAL_FLOAT_MODE_P (mode))
19252 return default_scalar_mode_supported_p (mode);
19255 /* Implements target hook vector_mode_supported_p. */
19257 ix86_vector_mode_supported_p (enum machine_mode mode)
19259 if (TARGET_SSE && VALID_SSE_REG_MODE (mode))
19261 if (TARGET_SSE2 && VALID_SSE2_REG_MODE (mode))
19263 if (TARGET_MMX && VALID_MMX_REG_MODE (mode))
19265 if (TARGET_3DNOW && VALID_MMX_REG_MODE_3DNOW (mode))
19270 /* Worker function for TARGET_MD_ASM_CLOBBERS.
19272 We do this in the new i386 backend to maintain source compatibility
19273 with the old cc0-based compiler. */
19276 ix86_md_asm_clobbers (tree outputs ATTRIBUTE_UNUSED,
19277 tree inputs ATTRIBUTE_UNUSED,
19280 clobbers = tree_cons (NULL_TREE, build_string (5, "flags"),
19282 clobbers = tree_cons (NULL_TREE, build_string (4, "fpsr"),
19284 clobbers = tree_cons (NULL_TREE, build_string (7, "dirflag"),
19289 /* Return true if this goes in small data/bss. */
19292 ix86_in_large_data_p (tree exp)
19294 if (ix86_cmodel != CM_MEDIUM && ix86_cmodel != CM_MEDIUM_PIC)
19297 /* Functions are never large data. */
19298 if (TREE_CODE (exp) == FUNCTION_DECL)
19301 if (TREE_CODE (exp) == VAR_DECL && DECL_SECTION_NAME (exp))
19303 const char *section = TREE_STRING_POINTER (DECL_SECTION_NAME (exp));
19304 if (strcmp (section, ".ldata") == 0
19305 || strcmp (section, ".lbss") == 0)
19311 HOST_WIDE_INT size = int_size_in_bytes (TREE_TYPE (exp));
19313 /* If this is an incomplete type with size 0, then we can't put it
19314 in data because it might be too big when completed. */
19315 if (!size || size > ix86_section_threshold)
19322 ix86_encode_section_info (tree decl, rtx rtl, int first)
19324 default_encode_section_info (decl, rtl, first);
19326 if (TREE_CODE (decl) == VAR_DECL
19327 && (TREE_STATIC (decl) || DECL_EXTERNAL (decl))
19328 && ix86_in_large_data_p (decl))
19329 SYMBOL_REF_FLAGS (XEXP (rtl, 0)) |= SYMBOL_FLAG_FAR_ADDR;
19332 /* Worker function for REVERSE_CONDITION. */
19335 ix86_reverse_condition (enum rtx_code code, enum machine_mode mode)
19337 return (mode != CCFPmode && mode != CCFPUmode
19338 ? reverse_condition (code)
19339 : reverse_condition_maybe_unordered (code));
19342 /* Output code to perform an x87 FP register move, from OPERANDS[1]
19346 output_387_reg_move (rtx insn, rtx *operands)
19348 if (REG_P (operands[1])
19349 && find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
19351 if (REGNO (operands[0]) == FIRST_STACK_REG)
19352 return output_387_ffreep (operands, 0);
19353 return "fstp\t%y0";
19355 if (STACK_TOP_P (operands[0]))
19356 return "fld%z1\t%y1";
19360 /* Output code to perform a conditional jump to LABEL, if C2 flag in
19361 FP status register is set. */
19364 ix86_emit_fp_unordered_jump (rtx label)
19366 rtx reg = gen_reg_rtx (HImode);
19369 emit_insn (gen_x86_fnstsw_1 (reg));
19371 if (TARGET_USE_SAHF)
19373 emit_insn (gen_x86_sahf_1 (reg));
19375 temp = gen_rtx_REG (CCmode, FLAGS_REG);
19376 temp = gen_rtx_UNORDERED (VOIDmode, temp, const0_rtx);
19380 emit_insn (gen_testqi_ext_ccno_0 (reg, GEN_INT (0x04)));
19382 temp = gen_rtx_REG (CCNOmode, FLAGS_REG);
19383 temp = gen_rtx_NE (VOIDmode, temp, const0_rtx);
19386 temp = gen_rtx_IF_THEN_ELSE (VOIDmode, temp,
19387 gen_rtx_LABEL_REF (VOIDmode, label),
19389 temp = gen_rtx_SET (VOIDmode, pc_rtx, temp);
19390 emit_jump_insn (temp);
19393 /* Output code to perform a log1p XFmode calculation. */
19395 void ix86_emit_i387_log1p (rtx op0, rtx op1)
19397 rtx label1 = gen_label_rtx ();
19398 rtx label2 = gen_label_rtx ();
19400 rtx tmp = gen_reg_rtx (XFmode);
19401 rtx tmp2 = gen_reg_rtx (XFmode);
19403 emit_insn (gen_absxf2 (tmp, op1));
19404 emit_insn (gen_cmpxf (tmp,
19405 CONST_DOUBLE_FROM_REAL_VALUE (
19406 REAL_VALUE_ATOF ("0.29289321881345247561810596348408353", XFmode),
19408 emit_jump_insn (gen_bge (label1));
19410 emit_move_insn (tmp2, standard_80387_constant_rtx (4)); /* fldln2 */
19411 emit_insn (gen_fyl2xp1_xf3 (op0, tmp2, op1));
19412 emit_jump (label2);
19414 emit_label (label1);
19415 emit_move_insn (tmp, CONST1_RTX (XFmode));
19416 emit_insn (gen_addxf3 (tmp, op1, tmp));
19417 emit_move_insn (tmp2, standard_80387_constant_rtx (4)); /* fldln2 */
19418 emit_insn (gen_fyl2x_xf3 (op0, tmp2, tmp));
19420 emit_label (label2);
19423 /* Solaris implementation of TARGET_ASM_NAMED_SECTION. */
19426 i386_solaris_elf_named_section (const char *name, unsigned int flags,
19429 /* With Binutils 2.15, the "@unwind" marker must be specified on
19430 every occurrence of the ".eh_frame" section, not just the first
19433 && strcmp (name, ".eh_frame") == 0)
19435 fprintf (asm_out_file, "\t.section\t%s,\"%s\",@unwind\n", name,
19436 flags & SECTION_WRITE ? "aw" : "a");
19439 default_elf_asm_named_section (name, flags, decl);
19442 /* Return the mangling of TYPE if it is an extended fundamental type. */
19444 static const char *
19445 ix86_mangle_fundamental_type (tree type)
19447 switch (TYPE_MODE (type))
19450 /* __float128 is "g". */
19453 /* "long double" or __float80 is "e". */
19460 /* For 32-bit code we can save PIC register setup by using
19461 __stack_chk_fail_local hidden function instead of calling
19462 __stack_chk_fail directly. 64-bit code doesn't need to setup any PIC
19463 register, so it is better to call __stack_chk_fail directly. */
19466 ix86_stack_protect_fail (void)
19468 return TARGET_64BIT
19469 ? default_external_stack_protect_fail ()
19470 : default_hidden_stack_protect_fail ();
19473 /* Select a format to encode pointers in exception handling data. CODE
19474 is 0 for data, 1 for code labels, 2 for function pointers. GLOBAL is
19475 true if the symbol may be affected by dynamic relocations.
19477 ??? All x86 object file formats are capable of representing this.
19478 After all, the relocation needed is the same as for the call insn.
19479 Whether or not a particular assembler allows us to enter such, I
19480 guess we'll have to see. */
19482 asm_preferred_eh_data_format (int code, int global)
19486 int type = DW_EH_PE_sdata8;
19488 || ix86_cmodel == CM_SMALL_PIC
19489 || (ix86_cmodel == CM_MEDIUM_PIC && (global || code)))
19490 type = DW_EH_PE_sdata4;
19491 return (global ? DW_EH_PE_indirect : 0) | DW_EH_PE_pcrel | type;
19493 if (ix86_cmodel == CM_SMALL
19494 || (ix86_cmodel == CM_MEDIUM && code))
19495 return DW_EH_PE_udata4;
19496 return DW_EH_PE_absptr;
19499 #include "gt-i386.h"