1 /* Subroutines used for code generation on IA-32.
2 Copyright (C) 1988, 1992, 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001,
3 2002, 2003, 2004, 2005, 2006, 2007 Free Software Foundation, Inc.
5 This file is part of GCC.
7 GCC is free software; you can redistribute it and/or modify
8 it under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 2, or (at your option)
12 GCC is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 GNU General Public License for more details.
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING. If not, write to
19 the Free Software Foundation, 51 Franklin Street, Fifth Floor,
20 Boston, MA 02110-1301, USA. */
26 #include "coretypes.h"
32 #include "hard-reg-set.h"
34 #include "insn-config.h"
35 #include "conditions.h"
37 #include "insn-codes.h"
38 #include "insn-attr.h"
46 #include "basic-block.h"
49 #include "target-def.h"
50 #include "langhooks.h"
52 #include "tree-gimple.h"
54 #include "tm-constrs.h"
56 #ifndef CHECK_STACK_LIMIT
57 #define CHECK_STACK_LIMIT (-1)
60 /* Return index of given mode in mult and division cost tables. */
61 #define MODE_INDEX(mode) \
62 ((mode) == QImode ? 0 \
63 : (mode) == HImode ? 1 \
64 : (mode) == SImode ? 2 \
65 : (mode) == DImode ? 3 \
68 /* Processor costs (relative to an add) */
69 /* We assume COSTS_N_INSNS is defined as (N)*4 and an addition is 2 bytes. */
70 #define COSTS_N_BYTES(N) ((N) * 2)
73 struct processor_costs size_cost = { /* costs for tuning for size */
74 COSTS_N_BYTES (2), /* cost of an add instruction */
75 COSTS_N_BYTES (3), /* cost of a lea instruction */
76 COSTS_N_BYTES (2), /* variable shift costs */
77 COSTS_N_BYTES (3), /* constant shift costs */
78 {COSTS_N_BYTES (3), /* cost of starting multiply for QI */
79 COSTS_N_BYTES (3), /* HI */
80 COSTS_N_BYTES (3), /* SI */
81 COSTS_N_BYTES (3), /* DI */
82 COSTS_N_BYTES (5)}, /* other */
83 0, /* cost of multiply per each bit set */
84 {COSTS_N_BYTES (3), /* cost of a divide/mod for QI */
85 COSTS_N_BYTES (3), /* HI */
86 COSTS_N_BYTES (3), /* SI */
87 COSTS_N_BYTES (3), /* DI */
88 COSTS_N_BYTES (5)}, /* other */
89 COSTS_N_BYTES (3), /* cost of movsx */
90 COSTS_N_BYTES (3), /* cost of movzx */
93 2, /* cost for loading QImode using movzbl */
94 {2, 2, 2}, /* cost of loading integer registers
95 in QImode, HImode and SImode.
96 Relative to reg-reg move (2). */
97 {2, 2, 2}, /* cost of storing integer registers */
98 2, /* cost of reg,reg fld/fst */
99 {2, 2, 2}, /* cost of loading fp registers
100 in SFmode, DFmode and XFmode */
101 {2, 2, 2}, /* cost of storing fp registers
102 in SFmode, DFmode and XFmode */
103 3, /* cost of moving MMX register */
104 {3, 3}, /* cost of loading MMX registers
105 in SImode and DImode */
106 {3, 3}, /* cost of storing MMX registers
107 in SImode and DImode */
108 3, /* cost of moving SSE register */
109 {3, 3, 3}, /* cost of loading SSE registers
110 in SImode, DImode and TImode */
111 {3, 3, 3}, /* cost of storing SSE registers
112 in SImode, DImode and TImode */
113 3, /* MMX or SSE register to integer */
114 0, /* size of prefetch block */
115 0, /* number of parallel prefetches */
117 COSTS_N_BYTES (2), /* cost of FADD and FSUB insns. */
118 COSTS_N_BYTES (2), /* cost of FMUL instruction. */
119 COSTS_N_BYTES (2), /* cost of FDIV instruction. */
120 COSTS_N_BYTES (2), /* cost of FABS instruction. */
121 COSTS_N_BYTES (2), /* cost of FCHS instruction. */
122 COSTS_N_BYTES (2), /* cost of FSQRT instruction. */
125 /* Processor costs (relative to an add) */
127 struct processor_costs i386_cost = { /* 386 specific costs */
128 COSTS_N_INSNS (1), /* cost of an add instruction */
129 COSTS_N_INSNS (1), /* cost of a lea instruction */
130 COSTS_N_INSNS (3), /* variable shift costs */
131 COSTS_N_INSNS (2), /* constant shift costs */
132 {COSTS_N_INSNS (6), /* cost of starting multiply for QI */
133 COSTS_N_INSNS (6), /* HI */
134 COSTS_N_INSNS (6), /* SI */
135 COSTS_N_INSNS (6), /* DI */
136 COSTS_N_INSNS (6)}, /* other */
137 COSTS_N_INSNS (1), /* cost of multiply per each bit set */
138 {COSTS_N_INSNS (23), /* cost of a divide/mod for QI */
139 COSTS_N_INSNS (23), /* HI */
140 COSTS_N_INSNS (23), /* SI */
141 COSTS_N_INSNS (23), /* DI */
142 COSTS_N_INSNS (23)}, /* other */
143 COSTS_N_INSNS (3), /* cost of movsx */
144 COSTS_N_INSNS (2), /* cost of movzx */
145 15, /* "large" insn */
147 4, /* cost for loading QImode using movzbl */
148 {2, 4, 2}, /* cost of loading integer registers
149 in QImode, HImode and SImode.
150 Relative to reg-reg move (2). */
151 {2, 4, 2}, /* cost of storing integer registers */
152 2, /* cost of reg,reg fld/fst */
153 {8, 8, 8}, /* cost of loading fp registers
154 in SFmode, DFmode and XFmode */
155 {8, 8, 8}, /* cost of storing fp registers
156 in SFmode, DFmode and XFmode */
157 2, /* cost of moving MMX register */
158 {4, 8}, /* cost of loading MMX registers
159 in SImode and DImode */
160 {4, 8}, /* cost of storing MMX registers
161 in SImode and DImode */
162 2, /* cost of moving SSE register */
163 {4, 8, 16}, /* cost of loading SSE registers
164 in SImode, DImode and TImode */
165 {4, 8, 16}, /* cost of storing SSE registers
166 in SImode, DImode and TImode */
167 3, /* MMX or SSE register to integer */
168 0, /* size of prefetch block */
169 0, /* number of parallel prefetches */
171 COSTS_N_INSNS (23), /* cost of FADD and FSUB insns. */
172 COSTS_N_INSNS (27), /* cost of FMUL instruction. */
173 COSTS_N_INSNS (88), /* cost of FDIV instruction. */
174 COSTS_N_INSNS (22), /* cost of FABS instruction. */
175 COSTS_N_INSNS (24), /* cost of FCHS instruction. */
176 COSTS_N_INSNS (122), /* cost of FSQRT instruction. */
180 struct processor_costs i486_cost = { /* 486 specific costs */
181 COSTS_N_INSNS (1), /* cost of an add instruction */
182 COSTS_N_INSNS (1), /* cost of a lea instruction */
183 COSTS_N_INSNS (3), /* variable shift costs */
184 COSTS_N_INSNS (2), /* constant shift costs */
185 {COSTS_N_INSNS (12), /* cost of starting multiply for QI */
186 COSTS_N_INSNS (12), /* HI */
187 COSTS_N_INSNS (12), /* SI */
188 COSTS_N_INSNS (12), /* DI */
189 COSTS_N_INSNS (12)}, /* other */
190 1, /* cost of multiply per each bit set */
191 {COSTS_N_INSNS (40), /* cost of a divide/mod for QI */
192 COSTS_N_INSNS (40), /* HI */
193 COSTS_N_INSNS (40), /* SI */
194 COSTS_N_INSNS (40), /* DI */
195 COSTS_N_INSNS (40)}, /* other */
196 COSTS_N_INSNS (3), /* cost of movsx */
197 COSTS_N_INSNS (2), /* cost of movzx */
198 15, /* "large" insn */
200 4, /* cost for loading QImode using movzbl */
201 {2, 4, 2}, /* cost of loading integer registers
202 in QImode, HImode and SImode.
203 Relative to reg-reg move (2). */
204 {2, 4, 2}, /* cost of storing integer registers */
205 2, /* cost of reg,reg fld/fst */
206 {8, 8, 8}, /* cost of loading fp registers
207 in SFmode, DFmode and XFmode */
208 {8, 8, 8}, /* cost of storing fp registers
209 in SFmode, DFmode and XFmode */
210 2, /* cost of moving MMX register */
211 {4, 8}, /* cost of loading MMX registers
212 in SImode and DImode */
213 {4, 8}, /* cost of storing MMX registers
214 in SImode and DImode */
215 2, /* cost of moving SSE register */
216 {4, 8, 16}, /* cost of loading SSE registers
217 in SImode, DImode and TImode */
218 {4, 8, 16}, /* cost of storing SSE registers
219 in SImode, DImode and TImode */
220 3, /* MMX or SSE register to integer */
221 0, /* size of prefetch block */
222 0, /* number of parallel prefetches */
224 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
225 COSTS_N_INSNS (16), /* cost of FMUL instruction. */
226 COSTS_N_INSNS (73), /* cost of FDIV instruction. */
227 COSTS_N_INSNS (3), /* cost of FABS instruction. */
228 COSTS_N_INSNS (3), /* cost of FCHS instruction. */
229 COSTS_N_INSNS (83), /* cost of FSQRT instruction. */
233 struct processor_costs pentium_cost = {
234 COSTS_N_INSNS (1), /* cost of an add instruction */
235 COSTS_N_INSNS (1), /* cost of a lea instruction */
236 COSTS_N_INSNS (4), /* variable shift costs */
237 COSTS_N_INSNS (1), /* constant shift costs */
238 {COSTS_N_INSNS (11), /* cost of starting multiply for QI */
239 COSTS_N_INSNS (11), /* HI */
240 COSTS_N_INSNS (11), /* SI */
241 COSTS_N_INSNS (11), /* DI */
242 COSTS_N_INSNS (11)}, /* other */
243 0, /* cost of multiply per each bit set */
244 {COSTS_N_INSNS (25), /* cost of a divide/mod for QI */
245 COSTS_N_INSNS (25), /* HI */
246 COSTS_N_INSNS (25), /* SI */
247 COSTS_N_INSNS (25), /* DI */
248 COSTS_N_INSNS (25)}, /* other */
249 COSTS_N_INSNS (3), /* cost of movsx */
250 COSTS_N_INSNS (2), /* cost of movzx */
251 8, /* "large" insn */
253 6, /* cost for loading QImode using movzbl */
254 {2, 4, 2}, /* cost of loading integer registers
255 in QImode, HImode and SImode.
256 Relative to reg-reg move (2). */
257 {2, 4, 2}, /* cost of storing integer registers */
258 2, /* cost of reg,reg fld/fst */
259 {2, 2, 6}, /* cost of loading fp registers
260 in SFmode, DFmode and XFmode */
261 {4, 4, 6}, /* cost of storing fp registers
262 in SFmode, DFmode and XFmode */
263 8, /* cost of moving MMX register */
264 {8, 8}, /* cost of loading MMX registers
265 in SImode and DImode */
266 {8, 8}, /* cost of storing MMX registers
267 in SImode and DImode */
268 2, /* cost of moving SSE register */
269 {4, 8, 16}, /* cost of loading SSE registers
270 in SImode, DImode and TImode */
271 {4, 8, 16}, /* cost of storing SSE registers
272 in SImode, DImode and TImode */
273 3, /* MMX or SSE register to integer */
274 0, /* size of prefetch block */
275 0, /* number of parallel prefetches */
277 COSTS_N_INSNS (3), /* cost of FADD and FSUB insns. */
278 COSTS_N_INSNS (3), /* cost of FMUL instruction. */
279 COSTS_N_INSNS (39), /* cost of FDIV instruction. */
280 COSTS_N_INSNS (1), /* cost of FABS instruction. */
281 COSTS_N_INSNS (1), /* cost of FCHS instruction. */
282 COSTS_N_INSNS (70), /* cost of FSQRT instruction. */
286 struct processor_costs pentiumpro_cost = {
287 COSTS_N_INSNS (1), /* cost of an add instruction */
288 COSTS_N_INSNS (1), /* cost of a lea instruction */
289 COSTS_N_INSNS (1), /* variable shift costs */
290 COSTS_N_INSNS (1), /* constant shift costs */
291 {COSTS_N_INSNS (4), /* cost of starting multiply for QI */
292 COSTS_N_INSNS (4), /* HI */
293 COSTS_N_INSNS (4), /* SI */
294 COSTS_N_INSNS (4), /* DI */
295 COSTS_N_INSNS (4)}, /* other */
296 0, /* cost of multiply per each bit set */
297 {COSTS_N_INSNS (17), /* cost of a divide/mod for QI */
298 COSTS_N_INSNS (17), /* HI */
299 COSTS_N_INSNS (17), /* SI */
300 COSTS_N_INSNS (17), /* DI */
301 COSTS_N_INSNS (17)}, /* other */
302 COSTS_N_INSNS (1), /* cost of movsx */
303 COSTS_N_INSNS (1), /* cost of movzx */
304 8, /* "large" insn */
306 2, /* cost for loading QImode using movzbl */
307 {4, 4, 4}, /* cost of loading integer registers
308 in QImode, HImode and SImode.
309 Relative to reg-reg move (2). */
310 {2, 2, 2}, /* cost of storing integer registers */
311 2, /* cost of reg,reg fld/fst */
312 {2, 2, 6}, /* cost of loading fp registers
313 in SFmode, DFmode and XFmode */
314 {4, 4, 6}, /* cost of storing fp registers
315 in SFmode, DFmode and XFmode */
316 2, /* cost of moving MMX register */
317 {2, 2}, /* cost of loading MMX registers
318 in SImode and DImode */
319 {2, 2}, /* cost of storing MMX registers
320 in SImode and DImode */
321 2, /* cost of moving SSE register */
322 {2, 2, 8}, /* cost of loading SSE registers
323 in SImode, DImode and TImode */
324 {2, 2, 8}, /* cost of storing SSE registers
325 in SImode, DImode and TImode */
326 3, /* MMX or SSE register to integer */
327 32, /* size of prefetch block */
328 6, /* number of parallel prefetches */
330 COSTS_N_INSNS (3), /* cost of FADD and FSUB insns. */
331 COSTS_N_INSNS (5), /* cost of FMUL instruction. */
332 COSTS_N_INSNS (56), /* cost of FDIV instruction. */
333 COSTS_N_INSNS (2), /* cost of FABS instruction. */
334 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
335 COSTS_N_INSNS (56), /* cost of FSQRT instruction. */
339 struct processor_costs geode_cost = {
340 COSTS_N_INSNS (1), /* cost of an add instruction */
341 COSTS_N_INSNS (1), /* cost of a lea instruction */
342 COSTS_N_INSNS (2), /* variable shift costs */
343 COSTS_N_INSNS (1), /* constant shift costs */
344 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
345 COSTS_N_INSNS (4), /* HI */
346 COSTS_N_INSNS (7), /* SI */
347 COSTS_N_INSNS (7), /* DI */
348 COSTS_N_INSNS (7)}, /* other */
349 0, /* cost of multiply per each bit set */
350 {COSTS_N_INSNS (15), /* cost of a divide/mod for QI */
351 COSTS_N_INSNS (23), /* HI */
352 COSTS_N_INSNS (39), /* SI */
353 COSTS_N_INSNS (39), /* DI */
354 COSTS_N_INSNS (39)}, /* other */
355 COSTS_N_INSNS (1), /* cost of movsx */
356 COSTS_N_INSNS (1), /* cost of movzx */
357 8, /* "large" insn */
359 1, /* cost for loading QImode using movzbl */
360 {1, 1, 1}, /* cost of loading integer registers
361 in QImode, HImode and SImode.
362 Relative to reg-reg move (2). */
363 {1, 1, 1}, /* cost of storing integer registers */
364 1, /* cost of reg,reg fld/fst */
365 {1, 1, 1}, /* cost of loading fp registers
366 in SFmode, DFmode and XFmode */
367 {4, 6, 6}, /* cost of storing fp registers
368 in SFmode, DFmode and XFmode */
370 1, /* cost of moving MMX register */
371 {1, 1}, /* cost of loading MMX registers
372 in SImode and DImode */
373 {1, 1}, /* cost of storing MMX registers
374 in SImode and DImode */
375 1, /* cost of moving SSE register */
376 {1, 1, 1}, /* cost of loading SSE registers
377 in SImode, DImode and TImode */
378 {1, 1, 1}, /* cost of storing SSE registers
379 in SImode, DImode and TImode */
380 1, /* MMX or SSE register to integer */
381 32, /* size of prefetch block */
382 1, /* number of parallel prefetches */
384 COSTS_N_INSNS (6), /* cost of FADD and FSUB insns. */
385 COSTS_N_INSNS (11), /* cost of FMUL instruction. */
386 COSTS_N_INSNS (47), /* cost of FDIV instruction. */
387 COSTS_N_INSNS (1), /* cost of FABS instruction. */
388 COSTS_N_INSNS (1), /* cost of FCHS instruction. */
389 COSTS_N_INSNS (54), /* cost of FSQRT instruction. */
393 struct processor_costs k6_cost = {
394 COSTS_N_INSNS (1), /* cost of an add instruction */
395 COSTS_N_INSNS (2), /* cost of a lea instruction */
396 COSTS_N_INSNS (1), /* variable shift costs */
397 COSTS_N_INSNS (1), /* constant shift costs */
398 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
399 COSTS_N_INSNS (3), /* HI */
400 COSTS_N_INSNS (3), /* SI */
401 COSTS_N_INSNS (3), /* DI */
402 COSTS_N_INSNS (3)}, /* other */
403 0, /* cost of multiply per each bit set */
404 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
405 COSTS_N_INSNS (18), /* HI */
406 COSTS_N_INSNS (18), /* SI */
407 COSTS_N_INSNS (18), /* DI */
408 COSTS_N_INSNS (18)}, /* other */
409 COSTS_N_INSNS (2), /* cost of movsx */
410 COSTS_N_INSNS (2), /* cost of movzx */
411 8, /* "large" insn */
413 3, /* cost for loading QImode using movzbl */
414 {4, 5, 4}, /* cost of loading integer registers
415 in QImode, HImode and SImode.
416 Relative to reg-reg move (2). */
417 {2, 3, 2}, /* cost of storing integer registers */
418 4, /* cost of reg,reg fld/fst */
419 {6, 6, 6}, /* cost of loading fp registers
420 in SFmode, DFmode and XFmode */
421 {4, 4, 4}, /* cost of storing fp registers
422 in SFmode, DFmode and XFmode */
423 2, /* cost of moving MMX register */
424 {2, 2}, /* cost of loading MMX registers
425 in SImode and DImode */
426 {2, 2}, /* cost of storing MMX registers
427 in SImode and DImode */
428 2, /* cost of moving SSE register */
429 {2, 2, 8}, /* cost of loading SSE registers
430 in SImode, DImode and TImode */
431 {2, 2, 8}, /* cost of storing SSE registers
432 in SImode, DImode and TImode */
433 6, /* MMX or SSE register to integer */
434 32, /* size of prefetch block */
435 1, /* number of parallel prefetches */
437 COSTS_N_INSNS (2), /* cost of FADD and FSUB insns. */
438 COSTS_N_INSNS (2), /* cost of FMUL instruction. */
439 COSTS_N_INSNS (56), /* cost of FDIV instruction. */
440 COSTS_N_INSNS (2), /* cost of FABS instruction. */
441 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
442 COSTS_N_INSNS (56), /* cost of FSQRT instruction. */
446 struct processor_costs athlon_cost = {
447 COSTS_N_INSNS (1), /* cost of an add instruction */
448 COSTS_N_INSNS (2), /* cost of a lea instruction */
449 COSTS_N_INSNS (1), /* variable shift costs */
450 COSTS_N_INSNS (1), /* constant shift costs */
451 {COSTS_N_INSNS (5), /* cost of starting multiply for QI */
452 COSTS_N_INSNS (5), /* HI */
453 COSTS_N_INSNS (5), /* SI */
454 COSTS_N_INSNS (5), /* DI */
455 COSTS_N_INSNS (5)}, /* other */
456 0, /* cost of multiply per each bit set */
457 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
458 COSTS_N_INSNS (26), /* HI */
459 COSTS_N_INSNS (42), /* SI */
460 COSTS_N_INSNS (74), /* DI */
461 COSTS_N_INSNS (74)}, /* other */
462 COSTS_N_INSNS (1), /* cost of movsx */
463 COSTS_N_INSNS (1), /* cost of movzx */
464 8, /* "large" insn */
466 4, /* cost for loading QImode using movzbl */
467 {3, 4, 3}, /* cost of loading integer registers
468 in QImode, HImode and SImode.
469 Relative to reg-reg move (2). */
470 {3, 4, 3}, /* cost of storing integer registers */
471 4, /* cost of reg,reg fld/fst */
472 {4, 4, 12}, /* cost of loading fp registers
473 in SFmode, DFmode and XFmode */
474 {6, 6, 8}, /* cost of storing fp registers
475 in SFmode, DFmode and XFmode */
476 2, /* cost of moving MMX register */
477 {4, 4}, /* cost of loading MMX registers
478 in SImode and DImode */
479 {4, 4}, /* cost of storing MMX registers
480 in SImode and DImode */
481 2, /* cost of moving SSE register */
482 {4, 4, 6}, /* cost of loading SSE registers
483 in SImode, DImode and TImode */
484 {4, 4, 5}, /* cost of storing SSE registers
485 in SImode, DImode and TImode */
486 5, /* MMX or SSE register to integer */
487 64, /* size of prefetch block */
488 6, /* number of parallel prefetches */
490 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
491 COSTS_N_INSNS (4), /* cost of FMUL instruction. */
492 COSTS_N_INSNS (24), /* cost of FDIV instruction. */
493 COSTS_N_INSNS (2), /* cost of FABS instruction. */
494 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
495 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
499 struct processor_costs k8_cost = {
500 COSTS_N_INSNS (1), /* cost of an add instruction */
501 COSTS_N_INSNS (2), /* cost of a lea instruction */
502 COSTS_N_INSNS (1), /* variable shift costs */
503 COSTS_N_INSNS (1), /* constant shift costs */
504 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
505 COSTS_N_INSNS (4), /* HI */
506 COSTS_N_INSNS (3), /* SI */
507 COSTS_N_INSNS (4), /* DI */
508 COSTS_N_INSNS (5)}, /* other */
509 0, /* cost of multiply per each bit set */
510 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
511 COSTS_N_INSNS (26), /* HI */
512 COSTS_N_INSNS (42), /* SI */
513 COSTS_N_INSNS (74), /* DI */
514 COSTS_N_INSNS (74)}, /* other */
515 COSTS_N_INSNS (1), /* cost of movsx */
516 COSTS_N_INSNS (1), /* cost of movzx */
517 8, /* "large" insn */
519 4, /* cost for loading QImode using movzbl */
520 {3, 4, 3}, /* cost of loading integer registers
521 in QImode, HImode and SImode.
522 Relative to reg-reg move (2). */
523 {3, 4, 3}, /* cost of storing integer registers */
524 4, /* cost of reg,reg fld/fst */
525 {4, 4, 12}, /* cost of loading fp registers
526 in SFmode, DFmode and XFmode */
527 {6, 6, 8}, /* cost of storing fp registers
528 in SFmode, DFmode and XFmode */
529 2, /* cost of moving MMX register */
530 {3, 3}, /* cost of loading MMX registers
531 in SImode and DImode */
532 {4, 4}, /* cost of storing MMX registers
533 in SImode and DImode */
534 2, /* cost of moving SSE register */
535 {4, 3, 6}, /* cost of loading SSE registers
536 in SImode, DImode and TImode */
537 {4, 4, 5}, /* cost of storing SSE registers
538 in SImode, DImode and TImode */
539 5, /* MMX or SSE register to integer */
540 64, /* size of prefetch block */
541 6, /* number of parallel prefetches */
543 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
544 COSTS_N_INSNS (4), /* cost of FMUL instruction. */
545 COSTS_N_INSNS (19), /* cost of FDIV instruction. */
546 COSTS_N_INSNS (2), /* cost of FABS instruction. */
547 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
548 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
551 struct processor_costs amdfam10_cost = {
552 COSTS_N_INSNS (1), /* cost of an add instruction */
553 COSTS_N_INSNS (2), /* cost of a lea instruction */
554 COSTS_N_INSNS (1), /* variable shift costs */
555 COSTS_N_INSNS (1), /* constant shift costs */
556 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
557 COSTS_N_INSNS (4), /* HI */
558 COSTS_N_INSNS (3), /* SI */
559 COSTS_N_INSNS (4), /* DI */
560 COSTS_N_INSNS (5)}, /* other */
561 0, /* cost of multiply per each bit set */
562 {COSTS_N_INSNS (19), /* cost of a divide/mod for QI */
563 COSTS_N_INSNS (35), /* HI */
564 COSTS_N_INSNS (51), /* SI */
565 COSTS_N_INSNS (83), /* DI */
566 COSTS_N_INSNS (83)}, /* other */
567 COSTS_N_INSNS (1), /* cost of movsx */
568 COSTS_N_INSNS (1), /* cost of movzx */
569 8, /* "large" insn */
571 4, /* cost for loading QImode using movzbl */
572 {3, 4, 3}, /* cost of loading integer registers
573 in QImode, HImode and SImode.
574 Relative to reg-reg move (2). */
575 {3, 4, 3}, /* cost of storing integer registers */
576 4, /* cost of reg,reg fld/fst */
577 {4, 4, 12}, /* cost of loading fp registers
578 in SFmode, DFmode and XFmode */
579 {6, 6, 8}, /* cost of storing fp registers
580 in SFmode, DFmode and XFmode */
581 2, /* cost of moving MMX register */
582 {3, 3}, /* cost of loading MMX registers
583 in SImode and DImode */
584 {4, 4}, /* cost of storing MMX registers
585 in SImode and DImode */
586 2, /* cost of moving SSE register */
587 {4, 4, 3}, /* cost of loading SSE registers
588 in SImode, DImode and TImode */
589 {4, 4, 5}, /* cost of storing SSE registers
590 in SImode, DImode and TImode */
591 3, /* MMX or SSE register to integer */
593 MOVD reg64, xmmreg Double FSTORE 4
594 MOVD reg32, xmmreg Double FSTORE 4
596 MOVD reg64, xmmreg Double FADD 3
598 MOVD reg32, xmmreg Double FADD 3
600 64, /* size of prefetch block */
601 /* New AMD processors never drop prefetches; if they cannot be performed
602 immediately, they are queued. We set number of simultaneous prefetches
603 to a large constant to reflect this (it probably is not a good idea not
604 to limit number of prefetches at all, as their execution also takes some
606 100, /* number of parallel prefetches */
608 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
609 COSTS_N_INSNS (4), /* cost of FMUL instruction. */
610 COSTS_N_INSNS (19), /* cost of FDIV instruction. */
611 COSTS_N_INSNS (2), /* cost of FABS instruction. */
612 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
613 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
617 struct processor_costs pentium4_cost = {
618 COSTS_N_INSNS (1), /* cost of an add instruction */
619 COSTS_N_INSNS (3), /* cost of a lea instruction */
620 COSTS_N_INSNS (4), /* variable shift costs */
621 COSTS_N_INSNS (4), /* constant shift costs */
622 {COSTS_N_INSNS (15), /* cost of starting multiply for QI */
623 COSTS_N_INSNS (15), /* HI */
624 COSTS_N_INSNS (15), /* SI */
625 COSTS_N_INSNS (15), /* DI */
626 COSTS_N_INSNS (15)}, /* other */
627 0, /* cost of multiply per each bit set */
628 {COSTS_N_INSNS (56), /* cost of a divide/mod for QI */
629 COSTS_N_INSNS (56), /* HI */
630 COSTS_N_INSNS (56), /* SI */
631 COSTS_N_INSNS (56), /* DI */
632 COSTS_N_INSNS (56)}, /* other */
633 COSTS_N_INSNS (1), /* cost of movsx */
634 COSTS_N_INSNS (1), /* cost of movzx */
635 16, /* "large" insn */
637 2, /* cost for loading QImode using movzbl */
638 {4, 5, 4}, /* cost of loading integer registers
639 in QImode, HImode and SImode.
640 Relative to reg-reg move (2). */
641 {2, 3, 2}, /* cost of storing integer registers */
642 2, /* cost of reg,reg fld/fst */
643 {2, 2, 6}, /* cost of loading fp registers
644 in SFmode, DFmode and XFmode */
645 {4, 4, 6}, /* cost of storing fp registers
646 in SFmode, DFmode and XFmode */
647 2, /* cost of moving MMX register */
648 {2, 2}, /* cost of loading MMX registers
649 in SImode and DImode */
650 {2, 2}, /* cost of storing MMX registers
651 in SImode and DImode */
652 12, /* cost of moving SSE register */
653 {12, 12, 12}, /* cost of loading SSE registers
654 in SImode, DImode and TImode */
655 {2, 2, 8}, /* cost of storing SSE registers
656 in SImode, DImode and TImode */
657 10, /* MMX or SSE register to integer */
658 64, /* size of prefetch block */
659 6, /* number of parallel prefetches */
661 COSTS_N_INSNS (5), /* cost of FADD and FSUB insns. */
662 COSTS_N_INSNS (7), /* cost of FMUL instruction. */
663 COSTS_N_INSNS (43), /* cost of FDIV instruction. */
664 COSTS_N_INSNS (2), /* cost of FABS instruction. */
665 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
666 COSTS_N_INSNS (43), /* cost of FSQRT instruction. */
670 struct processor_costs nocona_cost = {
671 COSTS_N_INSNS (1), /* cost of an add instruction */
672 COSTS_N_INSNS (1), /* cost of a lea instruction */
673 COSTS_N_INSNS (1), /* variable shift costs */
674 COSTS_N_INSNS (1), /* constant shift costs */
675 {COSTS_N_INSNS (10), /* cost of starting multiply for QI */
676 COSTS_N_INSNS (10), /* HI */
677 COSTS_N_INSNS (10), /* SI */
678 COSTS_N_INSNS (10), /* DI */
679 COSTS_N_INSNS (10)}, /* other */
680 0, /* cost of multiply per each bit set */
681 {COSTS_N_INSNS (66), /* cost of a divide/mod for QI */
682 COSTS_N_INSNS (66), /* HI */
683 COSTS_N_INSNS (66), /* SI */
684 COSTS_N_INSNS (66), /* DI */
685 COSTS_N_INSNS (66)}, /* other */
686 COSTS_N_INSNS (1), /* cost of movsx */
687 COSTS_N_INSNS (1), /* cost of movzx */
688 16, /* "large" insn */
690 4, /* cost for loading QImode using movzbl */
691 {4, 4, 4}, /* cost of loading integer registers
692 in QImode, HImode and SImode.
693 Relative to reg-reg move (2). */
694 {4, 4, 4}, /* cost of storing integer registers */
695 3, /* cost of reg,reg fld/fst */
696 {12, 12, 12}, /* cost of loading fp registers
697 in SFmode, DFmode and XFmode */
698 {4, 4, 4}, /* cost of storing fp registers
699 in SFmode, DFmode and XFmode */
700 6, /* cost of moving MMX register */
701 {12, 12}, /* cost of loading MMX registers
702 in SImode and DImode */
703 {12, 12}, /* cost of storing MMX registers
704 in SImode and DImode */
705 6, /* cost of moving SSE register */
706 {12, 12, 12}, /* cost of loading SSE registers
707 in SImode, DImode and TImode */
708 {12, 12, 12}, /* cost of storing SSE registers
709 in SImode, DImode and TImode */
710 8, /* MMX or SSE register to integer */
711 128, /* size of prefetch block */
712 8, /* number of parallel prefetches */
714 COSTS_N_INSNS (6), /* cost of FADD and FSUB insns. */
715 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
716 COSTS_N_INSNS (40), /* cost of FDIV instruction. */
717 COSTS_N_INSNS (3), /* cost of FABS instruction. */
718 COSTS_N_INSNS (3), /* cost of FCHS instruction. */
719 COSTS_N_INSNS (44), /* cost of FSQRT instruction. */
723 struct processor_costs core2_cost = {
724 COSTS_N_INSNS (1), /* cost of an add instruction */
725 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
726 COSTS_N_INSNS (1), /* variable shift costs */
727 COSTS_N_INSNS (1), /* constant shift costs */
728 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
729 COSTS_N_INSNS (3), /* HI */
730 COSTS_N_INSNS (3), /* SI */
731 COSTS_N_INSNS (3), /* DI */
732 COSTS_N_INSNS (3)}, /* other */
733 0, /* cost of multiply per each bit set */
734 {COSTS_N_INSNS (22), /* cost of a divide/mod for QI */
735 COSTS_N_INSNS (22), /* HI */
736 COSTS_N_INSNS (22), /* SI */
737 COSTS_N_INSNS (22), /* DI */
738 COSTS_N_INSNS (22)}, /* other */
739 COSTS_N_INSNS (1), /* cost of movsx */
740 COSTS_N_INSNS (1), /* cost of movzx */
741 8, /* "large" insn */
743 2, /* cost for loading QImode using movzbl */
744 {6, 6, 6}, /* cost of loading integer registers
745 in QImode, HImode and SImode.
746 Relative to reg-reg move (2). */
747 {4, 4, 4}, /* cost of storing integer registers */
748 2, /* cost of reg,reg fld/fst */
749 {6, 6, 6}, /* cost of loading fp registers
750 in SFmode, DFmode and XFmode */
751 {4, 4, 4}, /* cost of loading integer registers */
752 2, /* cost of moving MMX register */
753 {6, 6}, /* cost of loading MMX registers
754 in SImode and DImode */
755 {4, 4}, /* cost of storing MMX registers
756 in SImode and DImode */
757 2, /* cost of moving SSE register */
758 {6, 6, 6}, /* cost of loading SSE registers
759 in SImode, DImode and TImode */
760 {4, 4, 4}, /* cost of storing SSE registers
761 in SImode, DImode and TImode */
762 2, /* MMX or SSE register to integer */
763 128, /* size of prefetch block */
764 8, /* number of parallel prefetches */
766 COSTS_N_INSNS (3), /* cost of FADD and FSUB insns. */
767 COSTS_N_INSNS (5), /* cost of FMUL instruction. */
768 COSTS_N_INSNS (32), /* cost of FDIV instruction. */
769 COSTS_N_INSNS (1), /* cost of FABS instruction. */
770 COSTS_N_INSNS (1), /* cost of FCHS instruction. */
771 COSTS_N_INSNS (58), /* cost of FSQRT instruction. */
774 /* Generic64 should produce code tuned for Nocona and K8. */
776 struct processor_costs generic64_cost = {
777 COSTS_N_INSNS (1), /* cost of an add instruction */
778 /* On all chips taken into consideration lea is 2 cycles and more. With
779 this cost however our current implementation of synth_mult results in
780 use of unnecessary temporary registers causing regression on several
781 SPECfp benchmarks. */
782 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
783 COSTS_N_INSNS (1), /* variable shift costs */
784 COSTS_N_INSNS (1), /* constant shift costs */
785 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
786 COSTS_N_INSNS (4), /* HI */
787 COSTS_N_INSNS (3), /* SI */
788 COSTS_N_INSNS (4), /* DI */
789 COSTS_N_INSNS (2)}, /* other */
790 0, /* cost of multiply per each bit set */
791 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
792 COSTS_N_INSNS (26), /* HI */
793 COSTS_N_INSNS (42), /* SI */
794 COSTS_N_INSNS (74), /* DI */
795 COSTS_N_INSNS (74)}, /* other */
796 COSTS_N_INSNS (1), /* cost of movsx */
797 COSTS_N_INSNS (1), /* cost of movzx */
798 8, /* "large" insn */
800 4, /* cost for loading QImode using movzbl */
801 {4, 4, 4}, /* cost of loading integer registers
802 in QImode, HImode and SImode.
803 Relative to reg-reg move (2). */
804 {4, 4, 4}, /* cost of storing integer registers */
805 4, /* cost of reg,reg fld/fst */
806 {12, 12, 12}, /* cost of loading fp registers
807 in SFmode, DFmode and XFmode */
808 {6, 6, 8}, /* cost of storing fp registers
809 in SFmode, DFmode and XFmode */
810 2, /* cost of moving MMX register */
811 {8, 8}, /* cost of loading MMX registers
812 in SImode and DImode */
813 {8, 8}, /* cost of storing MMX registers
814 in SImode and DImode */
815 2, /* cost of moving SSE register */
816 {8, 8, 8}, /* cost of loading SSE registers
817 in SImode, DImode and TImode */
818 {8, 8, 8}, /* cost of storing SSE registers
819 in SImode, DImode and TImode */
820 5, /* MMX or SSE register to integer */
821 64, /* size of prefetch block */
822 6, /* number of parallel prefetches */
823 /* Benchmarks shows large regressions on K8 sixtrack benchmark when this value
824 is increased to perhaps more appropriate value of 5. */
826 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
827 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
828 COSTS_N_INSNS (20), /* cost of FDIV instruction. */
829 COSTS_N_INSNS (8), /* cost of FABS instruction. */
830 COSTS_N_INSNS (8), /* cost of FCHS instruction. */
831 COSTS_N_INSNS (40), /* cost of FSQRT instruction. */
834 /* Generic32 should produce code tuned for Athlon, PPro, Pentium4, Nocona and K8. */
836 struct processor_costs generic32_cost = {
837 COSTS_N_INSNS (1), /* cost of an add instruction */
838 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
839 COSTS_N_INSNS (1), /* variable shift costs */
840 COSTS_N_INSNS (1), /* constant shift costs */
841 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
842 COSTS_N_INSNS (4), /* HI */
843 COSTS_N_INSNS (3), /* SI */
844 COSTS_N_INSNS (4), /* DI */
845 COSTS_N_INSNS (2)}, /* other */
846 0, /* cost of multiply per each bit set */
847 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
848 COSTS_N_INSNS (26), /* HI */
849 COSTS_N_INSNS (42), /* SI */
850 COSTS_N_INSNS (74), /* DI */
851 COSTS_N_INSNS (74)}, /* other */
852 COSTS_N_INSNS (1), /* cost of movsx */
853 COSTS_N_INSNS (1), /* cost of movzx */
854 8, /* "large" insn */
856 4, /* cost for loading QImode using movzbl */
857 {4, 4, 4}, /* cost of loading integer registers
858 in QImode, HImode and SImode.
859 Relative to reg-reg move (2). */
860 {4, 4, 4}, /* cost of storing integer registers */
861 4, /* cost of reg,reg fld/fst */
862 {12, 12, 12}, /* cost of loading fp registers
863 in SFmode, DFmode and XFmode */
864 {6, 6, 8}, /* cost of storing fp registers
865 in SFmode, DFmode and XFmode */
866 2, /* cost of moving MMX register */
867 {8, 8}, /* cost of loading MMX registers
868 in SImode and DImode */
869 {8, 8}, /* cost of storing MMX registers
870 in SImode and DImode */
871 2, /* cost of moving SSE register */
872 {8, 8, 8}, /* cost of loading SSE registers
873 in SImode, DImode and TImode */
874 {8, 8, 8}, /* cost of storing SSE registers
875 in SImode, DImode and TImode */
876 5, /* MMX or SSE register to integer */
877 64, /* size of prefetch block */
878 6, /* number of parallel prefetches */
880 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
881 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
882 COSTS_N_INSNS (20), /* cost of FDIV instruction. */
883 COSTS_N_INSNS (8), /* cost of FABS instruction. */
884 COSTS_N_INSNS (8), /* cost of FCHS instruction. */
885 COSTS_N_INSNS (40), /* cost of FSQRT instruction. */
888 const struct processor_costs *ix86_cost = &pentium_cost;
890 /* Processor feature/optimization bitmasks. */
891 #define m_386 (1<<PROCESSOR_I386)
892 #define m_486 (1<<PROCESSOR_I486)
893 #define m_PENT (1<<PROCESSOR_PENTIUM)
894 #define m_PPRO (1<<PROCESSOR_PENTIUMPRO)
895 #define m_GEODE (1<<PROCESSOR_GEODE)
896 #define m_K6_GEODE (m_K6 | m_GEODE)
897 #define m_K6 (1<<PROCESSOR_K6)
898 #define m_ATHLON (1<<PROCESSOR_ATHLON)
899 #define m_PENT4 (1<<PROCESSOR_PENTIUM4)
900 #define m_K8 (1<<PROCESSOR_K8)
901 #define m_ATHLON_K8 (m_K8 | m_ATHLON)
902 #define m_AMDFAM10 (1<<PROCESSOR_AMDFAM10)
903 #define m_NOCONA (1<<PROCESSOR_NOCONA)
904 #define m_CORE2 (1<<PROCESSOR_CORE2)
905 #define m_GENERIC32 (1<<PROCESSOR_GENERIC32)
906 #define m_GENERIC64 (1<<PROCESSOR_GENERIC64)
907 #define m_GENERIC (m_GENERIC32 | m_GENERIC64)
908 #define m_ATHLON_K8_AMDFAM10 (m_K8 | m_ATHLON | m_AMDFAM10)
910 /* Generic instruction choice should be common subset of supported CPUs
911 (PPro/PENT4/NOCONA/CORE2/Athlon/K8). */
913 /* Leave is not affecting Nocona SPEC2000 results negatively, so enabling for
914 Generic64 seems like good code size tradeoff. We can't enable it for 32bit
915 generic because it is not working well with PPro base chips. */
916 const int x86_use_leave = m_386 | m_K6_GEODE | m_ATHLON_K8_AMDFAM10 | m_CORE2
918 const int x86_push_memory = m_386 | m_K6_GEODE | m_ATHLON_K8_AMDFAM10 | m_PENT4
919 | m_NOCONA | m_CORE2 | m_GENERIC;
920 const int x86_zero_extend_with_and = m_486 | m_PENT;
921 /* Enable to zero extend integer registers to avoid partial dependencies */
922 const int x86_movx = m_ATHLON_K8_AMDFAM10 | m_PPRO | m_PENT4 | m_NOCONA
923 | m_CORE2 | m_GENERIC | m_GEODE /* m_386 | m_K6 */;
924 const int x86_double_with_add = ~m_386;
925 const int x86_use_bit_test = m_386;
926 const int x86_unroll_strlen = m_486 | m_PENT | m_PPRO | m_ATHLON_K8_AMDFAM10
927 | m_K6 | m_CORE2 | m_GENERIC;
928 const int x86_cmove = m_PPRO | m_GEODE | m_ATHLON_K8_AMDFAM10 | m_PENT4
930 const int x86_3dnow_a = m_ATHLON_K8_AMDFAM10;
931 const int x86_deep_branch = m_PPRO | m_K6_GEODE | m_ATHLON_K8_AMDFAM10
932 | m_PENT4 | m_NOCONA | m_CORE2 | m_GENERIC;
933 /* Branch hints were put in P4 based on simulation result. But
934 after P4 was made, no performance benefit was observed with
935 branch hints. It also increases the code size. As the result,
936 icc never generates branch hints. */
937 const int x86_branch_hints = 0;
938 const int x86_use_sahf = m_PPRO | m_K6_GEODE | m_PENT4 | m_NOCONA | m_GENERIC32;
939 /*m_GENERIC | m_ATHLON_K8 ? */
940 /* We probably ought to watch for partial register stalls on Generic32
941 compilation setting as well. However in current implementation the
942 partial register stalls are not eliminated very well - they can
943 be introduced via subregs synthesized by combine and can happen
944 in caller/callee saving sequences.
945 Because this option pays back little on PPro based chips and is in conflict
946 with partial reg. dependencies used by Athlon/P4 based chips, it is better
947 to leave it off for generic32 for now. */
948 const int x86_partial_reg_stall = m_PPRO;
949 const int x86_partial_flag_reg_stall = m_CORE2 | m_GENERIC;
950 const int x86_use_himode_fiop = m_386 | m_486 | m_K6_GEODE;
951 const int x86_use_simode_fiop = ~(m_PPRO | m_ATHLON_K8_AMDFAM10 | m_PENT
952 | m_CORE2 | m_GENERIC);
953 const int x86_use_mov0 = m_K6;
954 const int x86_use_cltd = ~(m_PENT | m_K6 | m_CORE2 | m_GENERIC);
955 const int x86_read_modify_write = ~m_PENT;
956 const int x86_read_modify = ~(m_PENT | m_PPRO);
957 const int x86_split_long_moves = m_PPRO;
958 const int x86_promote_QImode = m_K6_GEODE | m_PENT | m_386 | m_486
959 | m_ATHLON_K8_AMDFAM10 | m_CORE2 | m_GENERIC;
961 const int x86_fast_prefix = ~(m_PENT | m_486 | m_386);
962 const int x86_single_stringop = m_386 | m_PENT4 | m_NOCONA;
963 const int x86_qimode_math = ~(0);
964 const int x86_promote_qi_regs = 0;
965 /* On PPro this flag is meant to avoid partial register stalls. Just like
966 the x86_partial_reg_stall this option might be considered for Generic32
967 if our scheme for avoiding partial stalls was more effective. */
968 const int x86_himode_math = ~(m_PPRO);
969 const int x86_promote_hi_regs = m_PPRO;
970 /* Enable if add/sub rsp is preferred over 1 or 2 push/pop */
971 const int x86_sub_esp_4 = m_ATHLON_K8_AMDFAM10 | m_PPRO | m_PENT4 | m_NOCONA
972 | m_CORE2 | m_GENERIC;
973 const int x86_sub_esp_8 = m_ATHLON_K8_AMDFAM10 | m_PPRO | m_386 | m_486
974 | m_PENT4 | m_NOCONA | m_CORE2 | m_GENERIC;
975 const int x86_add_esp_4 = m_ATHLON_K8_AMDFAM10 | m_K6_GEODE | m_PENT4 | m_NOCONA
976 | m_CORE2 | m_GENERIC;
977 const int x86_add_esp_8 = m_ATHLON_K8_AMDFAM10 | m_PPRO | m_K6_GEODE | m_386
978 | m_486 | m_PENT4 | m_NOCONA | m_CORE2 | m_GENERIC;
979 /* Enable if integer moves are preferred for DFmode copies */
980 const int x86_integer_DFmode_moves = ~(m_ATHLON_K8_AMDFAM10 | m_PENT4 | m_NOCONA
981 | m_PPRO | m_CORE2 | m_GENERIC | m_GEODE);
982 const int x86_partial_reg_dependency = m_ATHLON_K8_AMDFAM10 | m_PENT4 | m_NOCONA
983 | m_CORE2 | m_GENERIC;
984 const int x86_memory_mismatch_stall = m_ATHLON_K8_AMDFAM10 | m_PENT4 | m_NOCONA
985 | m_CORE2 | m_GENERIC;
986 /* If ACCUMULATE_OUTGOING_ARGS is enabled, the maximum amount of space required
987 for outgoing arguments will be computed and placed into the variable
988 `current_function_outgoing_args_size'. No space will be pushed onto the stack
989 for each call; instead, the function prologue should increase the stack frame
990 size by this amount. Setting both PUSH_ARGS and ACCUMULATE_OUTGOING_ARGS is
992 const int x86_accumulate_outgoing_args = m_ATHLON_K8_AMDFAM10 | m_PENT4
993 | m_NOCONA | m_PPRO | m_CORE2
995 const int x86_prologue_using_move = m_ATHLON_K8 | m_PPRO | m_CORE2 | m_GENERIC;
996 const int x86_epilogue_using_move = m_ATHLON_K8 | m_PPRO | m_CORE2 | m_GENERIC;
997 const int x86_shift1 = ~m_486;
998 const int x86_arch_always_fancy_math_387 = m_PENT | m_PPRO
999 | m_ATHLON_K8_AMDFAM10 | m_PENT4
1000 | m_NOCONA | m_CORE2 | m_GENERIC;
1001 /* In Generic model we have an conflict here in between PPro/Pentium4 based chips
1002 that thread 128bit SSE registers as single units versus K8 based chips that
1003 divide SSE registers to two 64bit halves.
1004 x86_sse_partial_reg_dependency promote all store destinations to be 128bit
1005 to allow register renaming on 128bit SSE units, but usually results in one
1006 extra microop on 64bit SSE units. Experimental results shows that disabling
1007 this option on P4 brings over 20% SPECfp regression, while enabling it on
1008 K8 brings roughly 2.4% regression that can be partly masked by careful scheduling
1010 const int x86_sse_partial_reg_dependency = m_PENT4 | m_NOCONA | m_PPRO | m_CORE2
1011 | m_GENERIC | m_AMDFAM10;
1012 /* Set for machines where the type and dependencies are resolved on SSE
1013 register parts instead of whole registers, so we may maintain just
1014 lower part of scalar values in proper format leaving the upper part
1016 const int x86_sse_split_regs = m_ATHLON_K8;
1017 /* Code generation for scalar reg-reg moves of single and double precision data:
1018 if (x86_sse_partial_reg_dependency == true | x86_sse_split_regs == true)
1022 if (x86_sse_partial_reg_dependency == true)
1027 Code generation for scalar loads of double precision data:
1028 if (x86_sse_split_regs == true)
1029 movlpd mem, reg (gas syntax)
1033 Code generation for unaligned packed loads of single precision data
1034 (x86_sse_unaligned_move_optimal overrides x86_sse_partial_reg_dependency):
1035 if (x86_sse_unaligned_move_optimal)
1038 if (x86_sse_partial_reg_dependency == true)
1050 Code generation for unaligned packed loads of double precision data
1051 (x86_sse_unaligned_move_optimal overrides x86_sse_split_regs):
1052 if (x86_sse_unaligned_move_optimal)
1055 if (x86_sse_split_regs == true)
1066 const int x86_sse_unaligned_move_optimal = m_AMDFAM10;
1067 const int x86_sse_typeless_stores = m_ATHLON_K8_AMDFAM10;
1068 const int x86_sse_load0_by_pxor = m_PPRO | m_PENT4 | m_NOCONA;
1069 const int x86_use_ffreep = m_ATHLON_K8_AMDFAM10;
1070 const int x86_rep_movl_optimal = m_386 | m_PENT | m_PPRO | m_K6_GEODE | m_CORE2;
1071 const int x86_use_incdec = ~(m_PENT4 | m_NOCONA | m_CORE2 | m_GENERIC);
1073 /* ??? Allowing interunit moves makes it all too easy for the compiler to put
1074 integer data in xmm registers. Which results in pretty abysmal code. */
1075 const int x86_inter_unit_moves = 0 /* ~(m_ATHLON_K8) */;
1077 const int x86_ext_80387_constants = m_K6_GEODE | m_ATHLON_K8 | m_PENT4
1078 | m_NOCONA | m_PPRO | m_CORE2 | m_GENERIC;
1079 /* Some CPU cores are not able to predict more than 4 branch instructions in
1080 the 16 byte window. */
1081 const int x86_four_jump_limit = m_PPRO | m_ATHLON_K8_AMDFAM10 | m_PENT4
1082 | m_NOCONA | m_CORE2 | m_GENERIC;
1083 const int x86_schedule = m_PPRO | m_ATHLON_K8_AMDFAM10 | m_K6_GEODE | m_PENT
1084 | m_CORE2 | m_GENERIC;
1085 const int x86_use_bt = m_ATHLON_K8_AMDFAM10;
1086 /* Compare and exchange was added for 80486. */
1087 const int x86_cmpxchg = ~m_386;
1088 /* Compare and exchange 8 bytes was added for pentium. */
1089 const int x86_cmpxchg8b = ~(m_386 | m_486);
1090 /* Exchange and add was added for 80486. */
1091 const int x86_xadd = ~m_386;
1092 /* Byteswap was added for 80486. */
1093 const int x86_bswap = ~m_386;
1094 const int x86_pad_returns = m_ATHLON_K8_AMDFAM10 | m_CORE2 | m_GENERIC;
1096 /* In case the average insn count for single function invocation is
1097 lower than this constant, emit fast (but longer) prologue and
1099 #define FAST_PROLOGUE_INSN_COUNT 20
1101 /* Names for 8 (low), 8 (high), and 16-bit registers, respectively. */
1102 static const char *const qi_reg_name[] = QI_REGISTER_NAMES;
1103 static const char *const qi_high_reg_name[] = QI_HIGH_REGISTER_NAMES;
1104 static const char *const hi_reg_name[] = HI_REGISTER_NAMES;
1106 /* Array of the smallest class containing reg number REGNO, indexed by
1107 REGNO. Used by REGNO_REG_CLASS in i386.h. */
1109 enum reg_class const regclass_map[FIRST_PSEUDO_REGISTER] =
1111 /* ax, dx, cx, bx */
1112 AREG, DREG, CREG, BREG,
1113 /* si, di, bp, sp */
1114 SIREG, DIREG, NON_Q_REGS, NON_Q_REGS,
1116 FP_TOP_REG, FP_SECOND_REG, FLOAT_REGS, FLOAT_REGS,
1117 FLOAT_REGS, FLOAT_REGS, FLOAT_REGS, FLOAT_REGS,
1120 /* flags, fpsr, dirflag, frame */
1121 NO_REGS, NO_REGS, NO_REGS, NON_Q_REGS,
1122 SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS,
1124 MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS,
1126 NON_Q_REGS, NON_Q_REGS, NON_Q_REGS, NON_Q_REGS,
1127 NON_Q_REGS, NON_Q_REGS, NON_Q_REGS, NON_Q_REGS,
1128 SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS,
1132 /* The "default" register map used in 32bit mode. */
1134 int const dbx_register_map[FIRST_PSEUDO_REGISTER] =
1136 0, 2, 1, 3, 6, 7, 4, 5, /* general regs */
1137 12, 13, 14, 15, 16, 17, 18, 19, /* fp regs */
1138 -1, -1, -1, -1, -1, /* arg, flags, fpsr, dir, frame */
1139 21, 22, 23, 24, 25, 26, 27, 28, /* SSE */
1140 29, 30, 31, 32, 33, 34, 35, 36, /* MMX */
1141 -1, -1, -1, -1, -1, -1, -1, -1, /* extended integer registers */
1142 -1, -1, -1, -1, -1, -1, -1, -1, /* extended SSE registers */
1145 static int const x86_64_int_parameter_registers[6] =
1147 5 /*RDI*/, 4 /*RSI*/, 1 /*RDX*/, 2 /*RCX*/,
1148 FIRST_REX_INT_REG /*R8 */, FIRST_REX_INT_REG + 1 /*R9 */
1151 static int const x86_64_int_return_registers[4] =
1153 0 /*RAX*/, 1 /*RDI*/, 5 /*RDI*/, 4 /*RSI*/
1156 /* The "default" register map used in 64bit mode. */
1157 int const dbx64_register_map[FIRST_PSEUDO_REGISTER] =
1159 0, 1, 2, 3, 4, 5, 6, 7, /* general regs */
1160 33, 34, 35, 36, 37, 38, 39, 40, /* fp regs */
1161 -1, -1, -1, -1, -1, /* arg, flags, fpsr, dir, frame */
1162 17, 18, 19, 20, 21, 22, 23, 24, /* SSE */
1163 41, 42, 43, 44, 45, 46, 47, 48, /* MMX */
1164 8,9,10,11,12,13,14,15, /* extended integer registers */
1165 25, 26, 27, 28, 29, 30, 31, 32, /* extended SSE registers */
1168 /* Define the register numbers to be used in Dwarf debugging information.
1169 The SVR4 reference port C compiler uses the following register numbers
1170 in its Dwarf output code:
1171 0 for %eax (gcc regno = 0)
1172 1 for %ecx (gcc regno = 2)
1173 2 for %edx (gcc regno = 1)
1174 3 for %ebx (gcc regno = 3)
1175 4 for %esp (gcc regno = 7)
1176 5 for %ebp (gcc regno = 6)
1177 6 for %esi (gcc regno = 4)
1178 7 for %edi (gcc regno = 5)
1179 The following three DWARF register numbers are never generated by
1180 the SVR4 C compiler or by the GNU compilers, but SDB on x86/svr4
1181 believes these numbers have these meanings.
1182 8 for %eip (no gcc equivalent)
1183 9 for %eflags (gcc regno = 17)
1184 10 for %trapno (no gcc equivalent)
1185 It is not at all clear how we should number the FP stack registers
1186 for the x86 architecture. If the version of SDB on x86/svr4 were
1187 a bit less brain dead with respect to floating-point then we would
1188 have a precedent to follow with respect to DWARF register numbers
1189 for x86 FP registers, but the SDB on x86/svr4 is so completely
1190 broken with respect to FP registers that it is hardly worth thinking
1191 of it as something to strive for compatibility with.
1192 The version of x86/svr4 SDB I have at the moment does (partially)
1193 seem to believe that DWARF register number 11 is associated with
1194 the x86 register %st(0), but that's about all. Higher DWARF
1195 register numbers don't seem to be associated with anything in
1196 particular, and even for DWARF regno 11, SDB only seems to under-
1197 stand that it should say that a variable lives in %st(0) (when
1198 asked via an `=' command) if we said it was in DWARF regno 11,
1199 but SDB still prints garbage when asked for the value of the
1200 variable in question (via a `/' command).
1201 (Also note that the labels SDB prints for various FP stack regs
1202 when doing an `x' command are all wrong.)
1203 Note that these problems generally don't affect the native SVR4
1204 C compiler because it doesn't allow the use of -O with -g and
1205 because when it is *not* optimizing, it allocates a memory
1206 location for each floating-point variable, and the memory
1207 location is what gets described in the DWARF AT_location
1208 attribute for the variable in question.
1209 Regardless of the severe mental illness of the x86/svr4 SDB, we
1210 do something sensible here and we use the following DWARF
1211 register numbers. Note that these are all stack-top-relative
1213 11 for %st(0) (gcc regno = 8)
1214 12 for %st(1) (gcc regno = 9)
1215 13 for %st(2) (gcc regno = 10)
1216 14 for %st(3) (gcc regno = 11)
1217 15 for %st(4) (gcc regno = 12)
1218 16 for %st(5) (gcc regno = 13)
1219 17 for %st(6) (gcc regno = 14)
1220 18 for %st(7) (gcc regno = 15)
1222 int const svr4_dbx_register_map[FIRST_PSEUDO_REGISTER] =
1224 0, 2, 1, 3, 6, 7, 5, 4, /* general regs */
1225 11, 12, 13, 14, 15, 16, 17, 18, /* fp regs */
1226 -1, 9, -1, -1, -1, /* arg, flags, fpsr, dir, frame */
1227 21, 22, 23, 24, 25, 26, 27, 28, /* SSE registers */
1228 29, 30, 31, 32, 33, 34, 35, 36, /* MMX registers */
1229 -1, -1, -1, -1, -1, -1, -1, -1, /* extended integer registers */
1230 -1, -1, -1, -1, -1, -1, -1, -1, /* extended SSE registers */
1233 /* Test and compare insns in i386.md store the information needed to
1234 generate branch and scc insns here. */
1236 rtx ix86_compare_op0 = NULL_RTX;
1237 rtx ix86_compare_op1 = NULL_RTX;
1238 rtx ix86_compare_emitted = NULL_RTX;
1240 /* Size of the register save area. */
1241 #define X86_64_VARARGS_SIZE (REGPARM_MAX * UNITS_PER_WORD + SSE_REGPARM_MAX * 16)
1243 /* Define the structure for the machine field in struct function. */
1245 struct stack_local_entry GTY(())
1247 unsigned short mode;
1250 struct stack_local_entry *next;
1253 /* Structure describing stack frame layout.
1254 Stack grows downward:
1260 saved frame pointer if frame_pointer_needed
1261 <- HARD_FRAME_POINTER
1266 [va_arg registers] (
1267 > to_allocate <- FRAME_POINTER
1277 HOST_WIDE_INT frame;
1279 int outgoing_arguments_size;
1282 HOST_WIDE_INT to_allocate;
1283 /* The offsets relative to ARG_POINTER. */
1284 HOST_WIDE_INT frame_pointer_offset;
1285 HOST_WIDE_INT hard_frame_pointer_offset;
1286 HOST_WIDE_INT stack_pointer_offset;
1288 /* When save_regs_using_mov is set, emit prologue using
1289 move instead of push instructions. */
1290 bool save_regs_using_mov;
1293 /* Code model option. */
1294 enum cmodel ix86_cmodel;
1296 enum asm_dialect ix86_asm_dialect = ASM_ATT;
1298 enum tls_dialect ix86_tls_dialect = TLS_DIALECT_GNU;
1300 /* Which unit we are generating floating point math for. */
1301 enum fpmath_unit ix86_fpmath;
1303 /* Which cpu are we scheduling for. */
1304 enum processor_type ix86_tune;
1305 /* Which instruction set architecture to use. */
1306 enum processor_type ix86_arch;
1308 /* true if sse prefetch instruction is not NOOP. */
1309 int x86_prefetch_sse;
1311 /* true if cmpxchg16b is supported. */
1314 /* ix86_regparm_string as a number */
1315 static int ix86_regparm;
1317 /* -mstackrealign option */
1318 extern int ix86_force_align_arg_pointer;
1319 static const char ix86_force_align_arg_pointer_string[] = "force_align_arg_pointer";
1321 /* Preferred alignment for stack boundary in bits. */
1322 unsigned int ix86_preferred_stack_boundary;
1324 /* Values 1-5: see jump.c */
1325 int ix86_branch_cost;
1327 /* Variables which are this size or smaller are put in the data/bss
1328 or ldata/lbss sections. */
1330 int ix86_section_threshold = 65536;
1332 /* Prefix built by ASM_GENERATE_INTERNAL_LABEL. */
1333 char internal_label_prefix[16];
1334 int internal_label_prefix_len;
1336 static bool ix86_handle_option (size_t, const char *, int);
1337 static void output_pic_addr_const (FILE *, rtx, int);
1338 static void put_condition_code (enum rtx_code, enum machine_mode,
1340 static const char *get_some_local_dynamic_name (void);
1341 static int get_some_local_dynamic_name_1 (rtx *, void *);
1342 static rtx ix86_expand_int_compare (enum rtx_code, rtx, rtx);
1343 static enum rtx_code ix86_prepare_fp_compare_args (enum rtx_code, rtx *,
1345 static bool ix86_fixed_condition_code_regs (unsigned int *, unsigned int *);
1346 static enum machine_mode ix86_cc_modes_compatible (enum machine_mode,
1348 static rtx get_thread_pointer (int);
1349 static rtx legitimize_tls_address (rtx, enum tls_model, int);
1350 static void get_pc_thunk_name (char [32], unsigned int);
1351 static rtx gen_push (rtx);
1352 static int ix86_flags_dependent (rtx, rtx, enum attr_type);
1353 static int ix86_agi_dependent (rtx, rtx, enum attr_type);
1354 static struct machine_function * ix86_init_machine_status (void);
1355 static int ix86_split_to_parts (rtx, rtx *, enum machine_mode);
1356 static int ix86_nsaved_regs (void);
1357 static void ix86_emit_save_regs (void);
1358 static void ix86_emit_save_regs_using_mov (rtx, HOST_WIDE_INT);
1359 static void ix86_emit_restore_regs_using_mov (rtx, HOST_WIDE_INT, int);
1360 static void ix86_output_function_epilogue (FILE *, HOST_WIDE_INT);
1361 static HOST_WIDE_INT ix86_GOT_alias_set (void);
1362 static void ix86_adjust_counter (rtx, HOST_WIDE_INT);
1363 static rtx ix86_expand_aligntest (rtx, int);
1364 static void ix86_expand_strlensi_unroll_1 (rtx, rtx, rtx);
1365 static int ix86_issue_rate (void);
1366 static int ix86_adjust_cost (rtx, rtx, rtx, int);
1367 static int ia32_multipass_dfa_lookahead (void);
1368 static void ix86_init_mmx_sse_builtins (void);
1369 static rtx x86_this_parameter (tree);
1370 static void x86_output_mi_thunk (FILE *, tree, HOST_WIDE_INT,
1371 HOST_WIDE_INT, tree);
1372 static bool x86_can_output_mi_thunk (tree, HOST_WIDE_INT, HOST_WIDE_INT, tree);
1373 static void x86_file_start (void);
1374 static void ix86_reorg (void);
1375 static bool ix86_expand_carry_flag_compare (enum rtx_code, rtx, rtx, rtx*);
1376 static tree ix86_build_builtin_va_list (void);
1377 static void ix86_setup_incoming_varargs (CUMULATIVE_ARGS *, enum machine_mode,
1379 static tree ix86_gimplify_va_arg (tree, tree, tree *, tree *);
1380 static bool ix86_scalar_mode_supported_p (enum machine_mode);
1381 static bool ix86_vector_mode_supported_p (enum machine_mode);
1383 static int ix86_address_cost (rtx);
1384 static bool ix86_cannot_force_const_mem (rtx);
1385 static rtx ix86_delegitimize_address (rtx);
1387 static void i386_output_dwarf_dtprel (FILE *, int, rtx) ATTRIBUTE_UNUSED;
1389 struct builtin_description;
1390 static rtx ix86_expand_sse_comi (const struct builtin_description *,
1392 static rtx ix86_expand_sse_compare (const struct builtin_description *,
1394 static rtx ix86_expand_unop1_builtin (enum insn_code, tree, rtx);
1395 static rtx ix86_expand_unop_builtin (enum insn_code, tree, rtx, int);
1396 static rtx ix86_expand_binop_builtin (enum insn_code, tree, rtx);
1397 static rtx ix86_expand_store_builtin (enum insn_code, tree);
1398 static rtx safe_vector_operand (rtx, enum machine_mode);
1399 static rtx ix86_expand_fp_compare (enum rtx_code, rtx, rtx, rtx, rtx *, rtx *);
1400 static int ix86_fp_comparison_arithmetics_cost (enum rtx_code code);
1401 static int ix86_fp_comparison_fcomi_cost (enum rtx_code code);
1402 static int ix86_fp_comparison_sahf_cost (enum rtx_code code);
1403 static int ix86_fp_comparison_cost (enum rtx_code code);
1404 static unsigned int ix86_select_alt_pic_regnum (void);
1405 static int ix86_save_reg (unsigned int, int);
1406 static void ix86_compute_frame_layout (struct ix86_frame *);
1407 static int ix86_comp_type_attributes (tree, tree);
1408 static int ix86_function_regparm (tree, tree);
1409 const struct attribute_spec ix86_attribute_table[];
1410 static bool ix86_function_ok_for_sibcall (tree, tree);
1411 static tree ix86_handle_cconv_attribute (tree *, tree, tree, int, bool *);
1412 static int ix86_value_regno (enum machine_mode, tree, tree);
1413 static bool contains_128bit_aligned_vector_p (tree);
1414 static rtx ix86_struct_value_rtx (tree, int);
1415 static bool ix86_ms_bitfield_layout_p (tree);
1416 static tree ix86_handle_struct_attribute (tree *, tree, tree, int, bool *);
1417 static int extended_reg_mentioned_1 (rtx *, void *);
1418 static bool ix86_rtx_costs (rtx, int, int, int *);
1419 static int min_insn_size (rtx);
1420 static tree ix86_md_asm_clobbers (tree outputs, tree inputs, tree clobbers);
1421 static bool ix86_must_pass_in_stack (enum machine_mode mode, tree type);
1422 static bool ix86_pass_by_reference (CUMULATIVE_ARGS *, enum machine_mode,
1424 static void ix86_init_builtins (void);
1425 static rtx ix86_expand_builtin (tree, rtx, rtx, enum machine_mode, int);
1426 static const char *ix86_mangle_fundamental_type (tree);
1427 static tree ix86_stack_protect_fail (void);
1428 static rtx ix86_internal_arg_pointer (void);
1429 static void ix86_dwarf_handle_frame_unspec (const char *, rtx, int);
1431 /* This function is only used on Solaris. */
1432 static void i386_solaris_elf_named_section (const char *, unsigned int, tree)
1435 /* Register class used for passing given 64bit part of the argument.
1436 These represent classes as documented by the PS ABI, with the exception
1437 of SSESF, SSEDF classes, that are basically SSE class, just gcc will
1438 use SF or DFmode move instead of DImode to avoid reformatting penalties.
1440 Similarly we play games with INTEGERSI_CLASS to use cheaper SImode moves
1441 whenever possible (upper half does contain padding).
1443 enum x86_64_reg_class
1446 X86_64_INTEGER_CLASS,
1447 X86_64_INTEGERSI_CLASS,
1454 X86_64_COMPLEX_X87_CLASS,
1457 static const char * const x86_64_reg_class_name[] = {
1458 "no", "integer", "integerSI", "sse", "sseSF", "sseDF",
1459 "sseup", "x87", "x87up", "cplx87", "no"
1462 #define MAX_CLASSES 4
1464 /* Table of constants used by fldpi, fldln2, etc.... */
1465 static REAL_VALUE_TYPE ext_80387_constants_table [5];
1466 static bool ext_80387_constants_init = 0;
1467 static void init_ext_80387_constants (void);
1468 static bool ix86_in_large_data_p (tree) ATTRIBUTE_UNUSED;
1469 static void ix86_encode_section_info (tree, rtx, int) ATTRIBUTE_UNUSED;
1470 static void x86_64_elf_unique_section (tree decl, int reloc) ATTRIBUTE_UNUSED;
1471 static section *x86_64_elf_select_section (tree decl, int reloc,
1472 unsigned HOST_WIDE_INT align)
1475 /* Initialize the GCC target structure. */
1476 #undef TARGET_ATTRIBUTE_TABLE
1477 #define TARGET_ATTRIBUTE_TABLE ix86_attribute_table
1478 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
1479 # undef TARGET_MERGE_DECL_ATTRIBUTES
1480 # define TARGET_MERGE_DECL_ATTRIBUTES merge_dllimport_decl_attributes
1483 #undef TARGET_COMP_TYPE_ATTRIBUTES
1484 #define TARGET_COMP_TYPE_ATTRIBUTES ix86_comp_type_attributes
1486 #undef TARGET_INIT_BUILTINS
1487 #define TARGET_INIT_BUILTINS ix86_init_builtins
1488 #undef TARGET_EXPAND_BUILTIN
1489 #define TARGET_EXPAND_BUILTIN ix86_expand_builtin
1491 #undef TARGET_ASM_FUNCTION_EPILOGUE
1492 #define TARGET_ASM_FUNCTION_EPILOGUE ix86_output_function_epilogue
1494 #undef TARGET_ENCODE_SECTION_INFO
1495 #ifndef SUBTARGET_ENCODE_SECTION_INFO
1496 #define TARGET_ENCODE_SECTION_INFO ix86_encode_section_info
1498 #define TARGET_ENCODE_SECTION_INFO SUBTARGET_ENCODE_SECTION_INFO
1501 #undef TARGET_ASM_OPEN_PAREN
1502 #define TARGET_ASM_OPEN_PAREN ""
1503 #undef TARGET_ASM_CLOSE_PAREN
1504 #define TARGET_ASM_CLOSE_PAREN ""
1506 #undef TARGET_ASM_ALIGNED_HI_OP
1507 #define TARGET_ASM_ALIGNED_HI_OP ASM_SHORT
1508 #undef TARGET_ASM_ALIGNED_SI_OP
1509 #define TARGET_ASM_ALIGNED_SI_OP ASM_LONG
1511 #undef TARGET_ASM_ALIGNED_DI_OP
1512 #define TARGET_ASM_ALIGNED_DI_OP ASM_QUAD
1515 #undef TARGET_ASM_UNALIGNED_HI_OP
1516 #define TARGET_ASM_UNALIGNED_HI_OP TARGET_ASM_ALIGNED_HI_OP
1517 #undef TARGET_ASM_UNALIGNED_SI_OP
1518 #define TARGET_ASM_UNALIGNED_SI_OP TARGET_ASM_ALIGNED_SI_OP
1519 #undef TARGET_ASM_UNALIGNED_DI_OP
1520 #define TARGET_ASM_UNALIGNED_DI_OP TARGET_ASM_ALIGNED_DI_OP
1522 #undef TARGET_SCHED_ADJUST_COST
1523 #define TARGET_SCHED_ADJUST_COST ix86_adjust_cost
1524 #undef TARGET_SCHED_ISSUE_RATE
1525 #define TARGET_SCHED_ISSUE_RATE ix86_issue_rate
1526 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD
1527 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD \
1528 ia32_multipass_dfa_lookahead
1530 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
1531 #define TARGET_FUNCTION_OK_FOR_SIBCALL ix86_function_ok_for_sibcall
1534 #undef TARGET_HAVE_TLS
1535 #define TARGET_HAVE_TLS true
1537 #undef TARGET_CANNOT_FORCE_CONST_MEM
1538 #define TARGET_CANNOT_FORCE_CONST_MEM ix86_cannot_force_const_mem
1539 #undef TARGET_USE_BLOCKS_FOR_CONSTANT_P
1540 #define TARGET_USE_BLOCKS_FOR_CONSTANT_P hook_bool_mode_rtx_true
1542 #undef TARGET_DELEGITIMIZE_ADDRESS
1543 #define TARGET_DELEGITIMIZE_ADDRESS ix86_delegitimize_address
1545 #undef TARGET_MS_BITFIELD_LAYOUT_P
1546 #define TARGET_MS_BITFIELD_LAYOUT_P ix86_ms_bitfield_layout_p
1549 #undef TARGET_BINDS_LOCAL_P
1550 #define TARGET_BINDS_LOCAL_P darwin_binds_local_p
1553 #undef TARGET_ASM_OUTPUT_MI_THUNK
1554 #define TARGET_ASM_OUTPUT_MI_THUNK x86_output_mi_thunk
1555 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
1556 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK x86_can_output_mi_thunk
1558 #undef TARGET_ASM_FILE_START
1559 #define TARGET_ASM_FILE_START x86_file_start
1561 #undef TARGET_DEFAULT_TARGET_FLAGS
1562 #define TARGET_DEFAULT_TARGET_FLAGS \
1564 | TARGET_64BIT_DEFAULT \
1565 | TARGET_SUBTARGET_DEFAULT \
1566 | TARGET_TLS_DIRECT_SEG_REFS_DEFAULT)
1568 #undef TARGET_HANDLE_OPTION
1569 #define TARGET_HANDLE_OPTION ix86_handle_option
1571 #undef TARGET_RTX_COSTS
1572 #define TARGET_RTX_COSTS ix86_rtx_costs
1573 #undef TARGET_ADDRESS_COST
1574 #define TARGET_ADDRESS_COST ix86_address_cost
1576 #undef TARGET_FIXED_CONDITION_CODE_REGS
1577 #define TARGET_FIXED_CONDITION_CODE_REGS ix86_fixed_condition_code_regs
1578 #undef TARGET_CC_MODES_COMPATIBLE
1579 #define TARGET_CC_MODES_COMPATIBLE ix86_cc_modes_compatible
1581 #undef TARGET_MACHINE_DEPENDENT_REORG
1582 #define TARGET_MACHINE_DEPENDENT_REORG ix86_reorg
1584 #undef TARGET_BUILD_BUILTIN_VA_LIST
1585 #define TARGET_BUILD_BUILTIN_VA_LIST ix86_build_builtin_va_list
1587 #undef TARGET_MD_ASM_CLOBBERS
1588 #define TARGET_MD_ASM_CLOBBERS ix86_md_asm_clobbers
1590 #undef TARGET_PROMOTE_PROTOTYPES
1591 #define TARGET_PROMOTE_PROTOTYPES hook_bool_tree_true
1592 #undef TARGET_STRUCT_VALUE_RTX
1593 #define TARGET_STRUCT_VALUE_RTX ix86_struct_value_rtx
1594 #undef TARGET_SETUP_INCOMING_VARARGS
1595 #define TARGET_SETUP_INCOMING_VARARGS ix86_setup_incoming_varargs
1596 #undef TARGET_MUST_PASS_IN_STACK
1597 #define TARGET_MUST_PASS_IN_STACK ix86_must_pass_in_stack
1598 #undef TARGET_PASS_BY_REFERENCE
1599 #define TARGET_PASS_BY_REFERENCE ix86_pass_by_reference
1600 #undef TARGET_INTERNAL_ARG_POINTER
1601 #define TARGET_INTERNAL_ARG_POINTER ix86_internal_arg_pointer
1602 #undef TARGET_DWARF_HANDLE_FRAME_UNSPEC
1603 #define TARGET_DWARF_HANDLE_FRAME_UNSPEC ix86_dwarf_handle_frame_unspec
1605 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
1606 #define TARGET_GIMPLIFY_VA_ARG_EXPR ix86_gimplify_va_arg
1608 #undef TARGET_SCALAR_MODE_SUPPORTED_P
1609 #define TARGET_SCALAR_MODE_SUPPORTED_P ix86_scalar_mode_supported_p
1611 #undef TARGET_VECTOR_MODE_SUPPORTED_P
1612 #define TARGET_VECTOR_MODE_SUPPORTED_P ix86_vector_mode_supported_p
1615 #undef TARGET_ASM_OUTPUT_DWARF_DTPREL
1616 #define TARGET_ASM_OUTPUT_DWARF_DTPREL i386_output_dwarf_dtprel
1619 #ifdef SUBTARGET_INSERT_ATTRIBUTES
1620 #undef TARGET_INSERT_ATTRIBUTES
1621 #define TARGET_INSERT_ATTRIBUTES SUBTARGET_INSERT_ATTRIBUTES
1624 #undef TARGET_MANGLE_FUNDAMENTAL_TYPE
1625 #define TARGET_MANGLE_FUNDAMENTAL_TYPE ix86_mangle_fundamental_type
1627 #undef TARGET_STACK_PROTECT_FAIL
1628 #define TARGET_STACK_PROTECT_FAIL ix86_stack_protect_fail
1630 #undef TARGET_FUNCTION_VALUE
1631 #define TARGET_FUNCTION_VALUE ix86_function_value
1633 struct gcc_target targetm = TARGET_INITIALIZER;
1636 /* The svr4 ABI for the i386 says that records and unions are returned
1638 #ifndef DEFAULT_PCC_STRUCT_RETURN
1639 #define DEFAULT_PCC_STRUCT_RETURN 1
1642 /* Implement TARGET_HANDLE_OPTION. */
1645 ix86_handle_option (size_t code, const char *arg ATTRIBUTE_UNUSED, int value)
1652 target_flags &= ~MASK_3DNOW_A;
1653 target_flags_explicit |= MASK_3DNOW_A;
1660 target_flags &= ~(MASK_3DNOW | MASK_3DNOW_A);
1661 target_flags_explicit |= MASK_3DNOW | MASK_3DNOW_A;
1668 target_flags &= ~(MASK_SSE2 | MASK_SSE3 | MASK_SSSE3 | MASK_SSE4A);
1669 target_flags_explicit |= MASK_SSE2 | MASK_SSE3 | MASK_SSSE3 | MASK_SSE4A;
1676 target_flags &= ~(MASK_SSE3 | MASK_SSSE3 | MASK_SSE4A);
1677 target_flags_explicit |= MASK_SSE3 | MASK_SSSE3 | MASK_SSE4A;
1684 target_flags &= ~(MASK_SSSE3 | MASK_SSE4A);
1685 target_flags_explicit |= MASK_SSSE3 | MASK_SSE4A;
1692 target_flags &= ~MASK_AES;
1693 target_flags_explicit |= MASK_AES;
1702 /* Sometimes certain combinations of command options do not make
1703 sense on a particular target machine. You can define a macro
1704 `OVERRIDE_OPTIONS' to take account of this. This macro, if
1705 defined, is executed once just after all the command options have
1708 Don't use this macro to turn on various extra optimizations for
1709 `-O'. That is what `OPTIMIZATION_OPTIONS' is for. */
1712 override_options (void)
1715 int ix86_tune_defaulted = 0;
1717 /* Comes from final.c -- no real reason to change it. */
1718 #define MAX_CODE_ALIGN 16
1722 const struct processor_costs *cost; /* Processor costs */
1723 const int target_enable; /* Target flags to enable. */
1724 const int target_disable; /* Target flags to disable. */
1725 const int align_loop; /* Default alignments. */
1726 const int align_loop_max_skip;
1727 const int align_jump;
1728 const int align_jump_max_skip;
1729 const int align_func;
1731 const processor_target_table[PROCESSOR_max] =
1733 {&i386_cost, 0, 0, 4, 3, 4, 3, 4},
1734 {&i486_cost, 0, 0, 16, 15, 16, 15, 16},
1735 {&pentium_cost, 0, 0, 16, 7, 16, 7, 16},
1736 {&pentiumpro_cost, 0, 0, 16, 15, 16, 7, 16},
1737 {&geode_cost, 0, 0, 0, 0, 0, 0, 0},
1738 {&k6_cost, 0, 0, 32, 7, 32, 7, 32},
1739 {&athlon_cost, 0, 0, 16, 7, 16, 7, 16},
1740 {&pentium4_cost, 0, 0, 0, 0, 0, 0, 0},
1741 {&k8_cost, 0, 0, 16, 7, 16, 7, 16},
1742 {&nocona_cost, 0, 0, 0, 0, 0, 0, 0},
1743 {&core2_cost, 0, 0, 16, 7, 16, 7, 16},
1744 {&generic32_cost, 0, 0, 16, 7, 16, 7, 16},
1745 {&generic64_cost, 0, 0, 16, 7, 16, 7, 16},
1746 {&amdfam10_cost, 0, 0, 32, 24, 32, 7, 32}
1749 static const char * const cpu_names[] = TARGET_CPU_DEFAULT_NAMES;
1752 const char *const name; /* processor name or nickname. */
1753 const enum processor_type processor;
1754 const enum pta_flags
1760 PTA_PREFETCH_SSE = 16,
1771 const processor_alias_table[] =
1773 {"i386", PROCESSOR_I386, 0},
1774 {"i486", PROCESSOR_I486, 0},
1775 {"i586", PROCESSOR_PENTIUM, 0},
1776 {"pentium", PROCESSOR_PENTIUM, 0},
1777 {"pentium-mmx", PROCESSOR_PENTIUM, PTA_MMX},
1778 {"winchip-c6", PROCESSOR_I486, PTA_MMX},
1779 {"winchip2", PROCESSOR_I486, PTA_MMX | PTA_3DNOW},
1780 {"c3", PROCESSOR_I486, PTA_MMX | PTA_3DNOW},
1781 {"c3-2", PROCESSOR_PENTIUMPRO, PTA_MMX | PTA_PREFETCH_SSE | PTA_SSE},
1782 {"i686", PROCESSOR_PENTIUMPRO, 0},
1783 {"pentiumpro", PROCESSOR_PENTIUMPRO, 0},
1784 {"pentium2", PROCESSOR_PENTIUMPRO, PTA_MMX},
1785 {"pentium3", PROCESSOR_PENTIUMPRO, PTA_MMX | PTA_SSE | PTA_PREFETCH_SSE},
1786 {"pentium3m", PROCESSOR_PENTIUMPRO, PTA_MMX | PTA_SSE | PTA_PREFETCH_SSE},
1787 {"pentium-m", PROCESSOR_PENTIUMPRO, PTA_MMX | PTA_SSE | PTA_PREFETCH_SSE | PTA_SSE2},
1788 {"pentium4", PROCESSOR_PENTIUM4, PTA_SSE | PTA_SSE2
1789 | PTA_MMX | PTA_PREFETCH_SSE},
1790 {"pentium4m", PROCESSOR_PENTIUM4, PTA_SSE | PTA_SSE2
1791 | PTA_MMX | PTA_PREFETCH_SSE},
1792 {"prescott", PROCESSOR_NOCONA, PTA_SSE | PTA_SSE2 | PTA_SSE3
1793 | PTA_MMX | PTA_PREFETCH_SSE},
1794 {"nocona", PROCESSOR_NOCONA, PTA_SSE | PTA_SSE2 | PTA_SSE3 | PTA_64BIT
1795 | PTA_MMX | PTA_PREFETCH_SSE | PTA_CX16},
1796 {"core2", PROCESSOR_CORE2, PTA_SSE | PTA_SSE2 | PTA_SSE3 | PTA_SSSE3
1797 | PTA_64BIT | PTA_MMX
1798 | PTA_PREFETCH_SSE | PTA_CX16},
1799 {"geode", PROCESSOR_GEODE, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW
1801 {"k6", PROCESSOR_K6, PTA_MMX},
1802 {"k6-2", PROCESSOR_K6, PTA_MMX | PTA_3DNOW},
1803 {"k6-3", PROCESSOR_K6, PTA_MMX | PTA_3DNOW},
1804 {"athlon", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW
1806 {"athlon-tbird", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE
1807 | PTA_3DNOW | PTA_3DNOW_A},
1808 {"athlon-4", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW
1809 | PTA_3DNOW_A | PTA_SSE},
1810 {"athlon-xp", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW
1811 | PTA_3DNOW_A | PTA_SSE},
1812 {"athlon-mp", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW
1813 | PTA_3DNOW_A | PTA_SSE},
1814 {"x86-64", PROCESSOR_K8, PTA_MMX | PTA_PREFETCH_SSE | PTA_64BIT
1815 | PTA_SSE | PTA_SSE2 },
1816 {"k8", PROCESSOR_K8, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW | PTA_64BIT
1817 | PTA_3DNOW_A | PTA_SSE | PTA_SSE2},
1818 {"k8-sse3", PROCESSOR_K8, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW | PTA_64BIT
1819 | PTA_3DNOW_A | PTA_SSE | PTA_SSE2
1821 {"opteron", PROCESSOR_K8, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW | PTA_64BIT
1822 | PTA_3DNOW_A | PTA_SSE | PTA_SSE2},
1823 {"opteron-sse3", PROCESSOR_K8, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW | PTA_64BIT
1824 | PTA_3DNOW_A | PTA_SSE | PTA_SSE2
1826 {"athlon64", PROCESSOR_K8, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW | PTA_64BIT
1827 | PTA_3DNOW_A | PTA_SSE | PTA_SSE2},
1828 {"athlon64-sse3", PROCESSOR_K8, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW | PTA_64BIT
1829 | PTA_3DNOW_A | PTA_SSE | PTA_SSE2
1831 {"athlon-fx", PROCESSOR_K8, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW | PTA_64BIT
1832 | PTA_3DNOW_A | PTA_SSE | PTA_SSE2},
1833 {"amdfam10", PROCESSOR_AMDFAM10, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW
1834 | PTA_64BIT | PTA_3DNOW_A | PTA_SSE
1835 | PTA_SSE2 | PTA_SSE3 | PTA_POPCNT
1836 | PTA_ABM | PTA_SSE4A | PTA_CX16},
1837 {"barcelona", PROCESSOR_AMDFAM10, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW
1838 | PTA_64BIT | PTA_3DNOW_A | PTA_SSE
1839 | PTA_SSE2 | PTA_SSE3 | PTA_POPCNT
1840 | PTA_ABM | PTA_SSE4A | PTA_CX16},
1841 {"generic32", PROCESSOR_GENERIC32, 0 /* flags are only used for -march switch. */ },
1842 {"generic64", PROCESSOR_GENERIC64, PTA_64BIT /* flags are only used for -march switch. */ },
1845 int const pta_size = ARRAY_SIZE (processor_alias_table);
1847 #ifdef SUBTARGET_OVERRIDE_OPTIONS
1848 SUBTARGET_OVERRIDE_OPTIONS;
1851 #ifdef SUBSUBTARGET_OVERRIDE_OPTIONS
1852 SUBSUBTARGET_OVERRIDE_OPTIONS;
1855 /* -fPIC is the default for x86_64. */
1856 if (TARGET_MACHO && TARGET_64BIT)
1859 /* Set the default values for switches whose default depends on TARGET_64BIT
1860 in case they weren't overwritten by command line options. */
1863 /* Mach-O doesn't support omitting the frame pointer for now. */
1864 if (flag_omit_frame_pointer == 2)
1865 flag_omit_frame_pointer = (TARGET_MACHO ? 0 : 1);
1866 if (flag_asynchronous_unwind_tables == 2)
1867 flag_asynchronous_unwind_tables = 1;
1868 if (flag_pcc_struct_return == 2)
1869 flag_pcc_struct_return = 0;
1873 if (flag_omit_frame_pointer == 2)
1874 flag_omit_frame_pointer = 0;
1875 if (flag_asynchronous_unwind_tables == 2)
1876 flag_asynchronous_unwind_tables = 0;
1877 if (flag_pcc_struct_return == 2)
1878 flag_pcc_struct_return = DEFAULT_PCC_STRUCT_RETURN;
1881 /* Need to check -mtune=generic first. */
1882 if (ix86_tune_string)
1884 if (!strcmp (ix86_tune_string, "generic")
1885 || !strcmp (ix86_tune_string, "i686")
1886 /* As special support for cross compilers we read -mtune=native
1887 as -mtune=generic. With native compilers we won't see the
1888 -mtune=native, as it was changed by the driver. */
1889 || !strcmp (ix86_tune_string, "native"))
1892 ix86_tune_string = "generic64";
1894 ix86_tune_string = "generic32";
1896 else if (!strncmp (ix86_tune_string, "generic", 7))
1897 error ("bad value (%s) for -mtune= switch", ix86_tune_string);
1901 if (ix86_arch_string)
1902 ix86_tune_string = ix86_arch_string;
1903 if (!ix86_tune_string)
1905 ix86_tune_string = cpu_names [TARGET_CPU_DEFAULT];
1906 ix86_tune_defaulted = 1;
1909 /* ix86_tune_string is set to ix86_arch_string or defaulted. We
1910 need to use a sensible tune option. */
1911 if (!strcmp (ix86_tune_string, "generic")
1912 || !strcmp (ix86_tune_string, "x86-64")
1913 || !strcmp (ix86_tune_string, "i686"))
1916 ix86_tune_string = "generic64";
1918 ix86_tune_string = "generic32";
1921 if (!strcmp (ix86_tune_string, "x86-64"))
1922 warning (OPT_Wdeprecated, "-mtune=x86-64 is deprecated. Use -mtune=k8 or "
1923 "-mtune=generic instead as appropriate.");
1925 if (!ix86_arch_string)
1926 ix86_arch_string = TARGET_64BIT ? "x86-64" : "i486";
1927 if (!strcmp (ix86_arch_string, "generic"))
1928 error ("generic CPU can be used only for -mtune= switch");
1929 if (!strncmp (ix86_arch_string, "generic", 7))
1930 error ("bad value (%s) for -march= switch", ix86_arch_string);
1932 if (ix86_cmodel_string != 0)
1934 if (!strcmp (ix86_cmodel_string, "small"))
1935 ix86_cmodel = flag_pic ? CM_SMALL_PIC : CM_SMALL;
1936 else if (!strcmp (ix86_cmodel_string, "medium"))
1937 ix86_cmodel = flag_pic ? CM_MEDIUM_PIC : CM_MEDIUM;
1939 sorry ("code model %s not supported in PIC mode", ix86_cmodel_string);
1940 else if (!strcmp (ix86_cmodel_string, "32"))
1941 ix86_cmodel = CM_32;
1942 else if (!strcmp (ix86_cmodel_string, "kernel") && !flag_pic)
1943 ix86_cmodel = CM_KERNEL;
1944 else if (!strcmp (ix86_cmodel_string, "large") && !flag_pic)
1945 ix86_cmodel = CM_LARGE;
1947 error ("bad value (%s) for -mcmodel= switch", ix86_cmodel_string);
1951 ix86_cmodel = CM_32;
1953 ix86_cmodel = flag_pic ? CM_SMALL_PIC : CM_SMALL;
1955 if (ix86_asm_string != 0)
1958 && !strcmp (ix86_asm_string, "intel"))
1959 ix86_asm_dialect = ASM_INTEL;
1960 else if (!strcmp (ix86_asm_string, "att"))
1961 ix86_asm_dialect = ASM_ATT;
1963 error ("bad value (%s) for -masm= switch", ix86_asm_string);
1965 if ((TARGET_64BIT == 0) != (ix86_cmodel == CM_32))
1966 error ("code model %qs not supported in the %s bit mode",
1967 ix86_cmodel_string, TARGET_64BIT ? "64" : "32");
1968 if (ix86_cmodel == CM_LARGE)
1969 sorry ("code model %<large%> not supported yet");
1970 if ((TARGET_64BIT != 0) != ((target_flags & MASK_64BIT) != 0))
1971 sorry ("%i-bit mode not compiled in",
1972 (target_flags & MASK_64BIT) ? 64 : 32);
1974 for (i = 0; i < pta_size; i++)
1975 if (! strcmp (ix86_arch_string, processor_alias_table[i].name))
1977 ix86_arch = processor_alias_table[i].processor;
1978 /* Default cpu tuning to the architecture. */
1979 ix86_tune = ix86_arch;
1980 if (processor_alias_table[i].flags & PTA_MMX
1981 && !(target_flags_explicit & MASK_MMX))
1982 target_flags |= MASK_MMX;
1983 if (processor_alias_table[i].flags & PTA_3DNOW
1984 && !(target_flags_explicit & MASK_3DNOW))
1985 target_flags |= MASK_3DNOW;
1986 if (processor_alias_table[i].flags & PTA_3DNOW_A
1987 && !(target_flags_explicit & MASK_3DNOW_A))
1988 target_flags |= MASK_3DNOW_A;
1989 if (processor_alias_table[i].flags & PTA_SSE
1990 && !(target_flags_explicit & MASK_SSE))
1991 target_flags |= MASK_SSE;
1992 if (processor_alias_table[i].flags & PTA_SSE2
1993 && !(target_flags_explicit & MASK_SSE2))
1994 target_flags |= MASK_SSE2;
1995 if (processor_alias_table[i].flags & PTA_SSE3
1996 && !(target_flags_explicit & MASK_SSE3))
1997 target_flags |= MASK_SSE3;
1998 if (processor_alias_table[i].flags & PTA_SSSE3
1999 && !(target_flags_explicit & MASK_SSSE3))
2000 target_flags |= MASK_SSSE3;
2001 if (processor_alias_table[i].flags & PTA_PREFETCH_SSE)
2002 x86_prefetch_sse = true;
2003 if (processor_alias_table[i].flags & PTA_CX16)
2004 x86_cmpxchg16b = true;
2005 if (processor_alias_table[i].flags & PTA_POPCNT
2006 && !(target_flags_explicit & MASK_POPCNT))
2007 target_flags |= MASK_POPCNT;
2008 if (processor_alias_table[i].flags & PTA_ABM
2009 && !(target_flags_explicit & MASK_ABM))
2010 target_flags |= MASK_ABM;
2011 if (processor_alias_table[i].flags & PTA_SSE4A
2012 && !(target_flags_explicit & MASK_SSE4A))
2013 target_flags |= MASK_SSE4A;
2014 if (TARGET_64BIT && !(processor_alias_table[i].flags & PTA_64BIT))
2015 error ("CPU you selected does not support x86-64 "
2021 error ("bad value (%s) for -march= switch", ix86_arch_string);
2023 for (i = 0; i < pta_size; i++)
2024 if (! strcmp (ix86_tune_string, processor_alias_table[i].name))
2026 ix86_tune = processor_alias_table[i].processor;
2027 if (TARGET_64BIT && !(processor_alias_table[i].flags & PTA_64BIT))
2029 if (ix86_tune_defaulted)
2031 ix86_tune_string = "x86-64";
2032 for (i = 0; i < pta_size; i++)
2033 if (! strcmp (ix86_tune_string,
2034 processor_alias_table[i].name))
2036 ix86_tune = processor_alias_table[i].processor;
2039 error ("CPU you selected does not support x86-64 "
2042 /* Intel CPUs have always interpreted SSE prefetch instructions as
2043 NOPs; so, we can enable SSE prefetch instructions even when
2044 -mtune (rather than -march) points us to a processor that has them.
2045 However, the VIA C3 gives a SIGILL, so we only do that for i686 and
2046 higher processors. */
2047 if (TARGET_CMOVE && (processor_alias_table[i].flags & PTA_PREFETCH_SSE))
2048 x86_prefetch_sse = true;
2052 error ("bad value (%s) for -mtune= switch", ix86_tune_string);
2055 ix86_cost = &size_cost;
2057 ix86_cost = processor_target_table[ix86_tune].cost;
2058 target_flags |= processor_target_table[ix86_tune].target_enable;
2059 target_flags &= ~processor_target_table[ix86_tune].target_disable;
2061 /* Arrange to set up i386_stack_locals for all functions. */
2062 init_machine_status = ix86_init_machine_status;
2064 /* Validate -mregparm= value. */
2065 if (ix86_regparm_string)
2067 i = atoi (ix86_regparm_string);
2068 if (i < 0 || i > REGPARM_MAX)
2069 error ("-mregparm=%d is not between 0 and %d", i, REGPARM_MAX);
2075 ix86_regparm = REGPARM_MAX;
2077 /* If the user has provided any of the -malign-* options,
2078 warn and use that value only if -falign-* is not set.
2079 Remove this code in GCC 3.2 or later. */
2080 if (ix86_align_loops_string)
2082 warning (0, "-malign-loops is obsolete, use -falign-loops");
2083 if (align_loops == 0)
2085 i = atoi (ix86_align_loops_string);
2086 if (i < 0 || i > MAX_CODE_ALIGN)
2087 error ("-malign-loops=%d is not between 0 and %d", i, MAX_CODE_ALIGN);
2089 align_loops = 1 << i;
2093 if (ix86_align_jumps_string)
2095 warning (0, "-malign-jumps is obsolete, use -falign-jumps");
2096 if (align_jumps == 0)
2098 i = atoi (ix86_align_jumps_string);
2099 if (i < 0 || i > MAX_CODE_ALIGN)
2100 error ("-malign-loops=%d is not between 0 and %d", i, MAX_CODE_ALIGN);
2102 align_jumps = 1 << i;
2106 if (ix86_align_funcs_string)
2108 warning (0, "-malign-functions is obsolete, use -falign-functions");
2109 if (align_functions == 0)
2111 i = atoi (ix86_align_funcs_string);
2112 if (i < 0 || i > MAX_CODE_ALIGN)
2113 error ("-malign-loops=%d is not between 0 and %d", i, MAX_CODE_ALIGN);
2115 align_functions = 1 << i;
2119 /* Default align_* from the processor table. */
2120 if (align_loops == 0)
2122 align_loops = processor_target_table[ix86_tune].align_loop;
2123 align_loops_max_skip = processor_target_table[ix86_tune].align_loop_max_skip;
2125 if (align_jumps == 0)
2127 align_jumps = processor_target_table[ix86_tune].align_jump;
2128 align_jumps_max_skip = processor_target_table[ix86_tune].align_jump_max_skip;
2130 if (align_functions == 0)
2132 align_functions = processor_target_table[ix86_tune].align_func;
2135 /* Validate -mbranch-cost= value, or provide default. */
2136 ix86_branch_cost = ix86_cost->branch_cost;
2137 if (ix86_branch_cost_string)
2139 i = atoi (ix86_branch_cost_string);
2141 error ("-mbranch-cost=%d is not between 0 and 5", i);
2143 ix86_branch_cost = i;
2145 if (ix86_section_threshold_string)
2147 i = atoi (ix86_section_threshold_string);
2149 error ("-mlarge-data-threshold=%d is negative", i);
2151 ix86_section_threshold = i;
2154 if (ix86_tls_dialect_string)
2156 if (strcmp (ix86_tls_dialect_string, "gnu") == 0)
2157 ix86_tls_dialect = TLS_DIALECT_GNU;
2158 else if (strcmp (ix86_tls_dialect_string, "gnu2") == 0)
2159 ix86_tls_dialect = TLS_DIALECT_GNU2;
2160 else if (strcmp (ix86_tls_dialect_string, "sun") == 0)
2161 ix86_tls_dialect = TLS_DIALECT_SUN;
2163 error ("bad value (%s) for -mtls-dialect= switch",
2164 ix86_tls_dialect_string);
2167 /* Keep nonleaf frame pointers. */
2168 if (flag_omit_frame_pointer)
2169 target_flags &= ~MASK_OMIT_LEAF_FRAME_POINTER;
2170 else if (TARGET_OMIT_LEAF_FRAME_POINTER)
2171 flag_omit_frame_pointer = 1;
2173 /* If we're doing fast math, we don't care about comparison order
2174 wrt NaNs. This lets us use a shorter comparison sequence. */
2175 if (flag_finite_math_only)
2176 target_flags &= ~MASK_IEEE_FP;
2178 /* If the architecture always has an FPU, turn off NO_FANCY_MATH_387,
2179 since the insns won't need emulation. */
2180 if (x86_arch_always_fancy_math_387 & (1 << ix86_arch))
2181 target_flags &= ~MASK_NO_FANCY_MATH_387;
2183 /* Likewise, if the target doesn't have a 387, or we've specified
2184 software floating point, don't use 387 inline intrinsics. */
2186 target_flags |= MASK_NO_FANCY_MATH_387;
2188 /* Turn on SSE3 builtins for -mssse3. */
2190 target_flags |= MASK_SSE3;
2192 /* Turn on SSE3 builtins for -msse4a. */
2194 target_flags |= MASK_SSE3;
2196 /* Turn on SSE2 builtins for -msse3. */
2198 target_flags |= MASK_SSE2;
2200 /* Turn on SSE2 builtins for -maes. */
2202 target_flags |= MASK_SSE2;
2204 /* Turn on SSE builtins for -msse2. */
2206 target_flags |= MASK_SSE;
2208 /* Turn on MMX builtins for -msse. */
2211 target_flags |= MASK_MMX & ~target_flags_explicit;
2212 x86_prefetch_sse = true;
2215 /* Turn on MMX builtins for 3Dnow. */
2217 target_flags |= MASK_MMX;
2219 /* Turn on POPCNT builtins for -mabm. */
2221 target_flags |= MASK_POPCNT;
2225 if (TARGET_ALIGN_DOUBLE)
2226 error ("-malign-double makes no sense in the 64bit mode");
2228 error ("-mrtd calling convention not supported in the 64bit mode");
2230 /* Enable by default the SSE and MMX builtins. Do allow the user to
2231 explicitly disable any of these. In particular, disabling SSE and
2232 MMX for kernel code is extremely useful. */
2234 |= ((MASK_SSE2 | MASK_SSE | MASK_MMX | MASK_128BIT_LONG_DOUBLE)
2235 & ~target_flags_explicit);
2239 /* i386 ABI does not specify red zone. It still makes sense to use it
2240 when programmer takes care to stack from being destroyed. */
2241 if (!(target_flags_explicit & MASK_NO_RED_ZONE))
2242 target_flags |= MASK_NO_RED_ZONE;
2245 /* Validate -mpreferred-stack-boundary= value, or provide default.
2246 The default of 128 bits is for Pentium III's SSE __m128. We can't
2247 change it because of optimize_size. Otherwise, we can't mix object
2248 files compiled with -Os and -On. */
2249 ix86_preferred_stack_boundary = 128;
2250 if (ix86_preferred_stack_boundary_string)
2252 i = atoi (ix86_preferred_stack_boundary_string);
2253 if (i < (TARGET_64BIT ? 4 : 2) || i > 12)
2254 error ("-mpreferred-stack-boundary=%d is not between %d and 12", i,
2255 TARGET_64BIT ? 4 : 2);
2257 ix86_preferred_stack_boundary = (1 << i) * BITS_PER_UNIT;
2260 /* Accept -msseregparm only if at least SSE support is enabled. */
2261 if (TARGET_SSEREGPARM
2263 error ("-msseregparm used without SSE enabled");
2265 ix86_fpmath = TARGET_FPMATH_DEFAULT;
2267 if (ix86_fpmath_string != 0)
2269 if (! strcmp (ix86_fpmath_string, "387"))
2270 ix86_fpmath = FPMATH_387;
2271 else if (! strcmp (ix86_fpmath_string, "sse"))
2275 warning (0, "SSE instruction set disabled, using 387 arithmetics");
2276 ix86_fpmath = FPMATH_387;
2279 ix86_fpmath = FPMATH_SSE;
2281 else if (! strcmp (ix86_fpmath_string, "387,sse")
2282 || ! strcmp (ix86_fpmath_string, "sse,387"))
2286 warning (0, "SSE instruction set disabled, using 387 arithmetics");
2287 ix86_fpmath = FPMATH_387;
2289 else if (!TARGET_80387)
2291 warning (0, "387 instruction set disabled, using SSE arithmetics");
2292 ix86_fpmath = FPMATH_SSE;
2295 ix86_fpmath = FPMATH_SSE | FPMATH_387;
2298 error ("bad value (%s) for -mfpmath= switch", ix86_fpmath_string);
2301 /* If the i387 is disabled, then do not return values in it. */
2303 target_flags &= ~MASK_FLOAT_RETURNS;
2305 if ((x86_accumulate_outgoing_args & TUNEMASK)
2306 && !(target_flags_explicit & MASK_ACCUMULATE_OUTGOING_ARGS)
2308 target_flags |= MASK_ACCUMULATE_OUTGOING_ARGS;
2310 /* ??? Unwind info is not correct around the CFG unless either a frame
2311 pointer is present or M_A_O_A is set. Fixing this requires rewriting
2312 unwind info generation to be aware of the CFG and propagating states
2314 if ((flag_unwind_tables || flag_asynchronous_unwind_tables
2315 || flag_exceptions || flag_non_call_exceptions)
2316 && flag_omit_frame_pointer
2317 && !(target_flags & MASK_ACCUMULATE_OUTGOING_ARGS))
2319 if (target_flags_explicit & MASK_ACCUMULATE_OUTGOING_ARGS)
2320 warning (0, "unwind tables currently require either a frame pointer "
2321 "or -maccumulate-outgoing-args for correctness");
2322 target_flags |= MASK_ACCUMULATE_OUTGOING_ARGS;
2325 /* Figure out what ASM_GENERATE_INTERNAL_LABEL builds as a prefix. */
2328 ASM_GENERATE_INTERNAL_LABEL (internal_label_prefix, "LX", 0);
2329 p = strchr (internal_label_prefix, 'X');
2330 internal_label_prefix_len = p - internal_label_prefix;
2334 /* When scheduling description is not available, disable scheduler pass
2335 so it won't slow down the compilation and make x87 code slower. */
2336 if (!TARGET_SCHEDULE)
2337 flag_schedule_insns_after_reload = flag_schedule_insns = 0;
2340 /* switch to the appropriate section for output of DECL.
2341 DECL is either a `VAR_DECL' node or a constant of some sort.
2342 RELOC indicates whether forming the initial value of DECL requires
2343 link-time relocations. */
2346 x86_64_elf_select_section (tree decl, int reloc,
2347 unsigned HOST_WIDE_INT align)
2349 if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC)
2350 && ix86_in_large_data_p (decl))
2352 const char *sname = NULL;
2353 unsigned int flags = SECTION_WRITE;
2354 switch (categorize_decl_for_section (decl, reloc))
2359 case SECCAT_DATA_REL:
2360 sname = ".ldata.rel";
2362 case SECCAT_DATA_REL_LOCAL:
2363 sname = ".ldata.rel.local";
2365 case SECCAT_DATA_REL_RO:
2366 sname = ".ldata.rel.ro";
2368 case SECCAT_DATA_REL_RO_LOCAL:
2369 sname = ".ldata.rel.ro.local";
2373 flags |= SECTION_BSS;
2376 case SECCAT_RODATA_MERGE_STR:
2377 case SECCAT_RODATA_MERGE_STR_INIT:
2378 case SECCAT_RODATA_MERGE_CONST:
2382 case SECCAT_SRODATA:
2389 /* We don't split these for medium model. Place them into
2390 default sections and hope for best. */
2395 /* We might get called with string constants, but get_named_section
2396 doesn't like them as they are not DECLs. Also, we need to set
2397 flags in that case. */
2399 return get_section (sname, flags, NULL);
2400 return get_named_section (decl, sname, reloc);
2403 return default_elf_select_section (decl, reloc, align);
2406 /* Build up a unique section name, expressed as a
2407 STRING_CST node, and assign it to DECL_SECTION_NAME (decl).
2408 RELOC indicates whether the initial value of EXP requires
2409 link-time relocations. */
2412 x86_64_elf_unique_section (tree decl, int reloc)
2414 if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC)
2415 && ix86_in_large_data_p (decl))
2417 const char *prefix = NULL;
2418 /* We only need to use .gnu.linkonce if we don't have COMDAT groups. */
2419 bool one_only = DECL_ONE_ONLY (decl) && !HAVE_COMDAT_GROUP;
2421 switch (categorize_decl_for_section (decl, reloc))
2424 case SECCAT_DATA_REL:
2425 case SECCAT_DATA_REL_LOCAL:
2426 case SECCAT_DATA_REL_RO:
2427 case SECCAT_DATA_REL_RO_LOCAL:
2428 prefix = one_only ? ".gnu.linkonce.ld." : ".ldata.";
2431 prefix = one_only ? ".gnu.linkonce.lb." : ".lbss.";
2434 case SECCAT_RODATA_MERGE_STR:
2435 case SECCAT_RODATA_MERGE_STR_INIT:
2436 case SECCAT_RODATA_MERGE_CONST:
2437 prefix = one_only ? ".gnu.linkonce.lr." : ".lrodata.";
2439 case SECCAT_SRODATA:
2446 /* We don't split these for medium model. Place them into
2447 default sections and hope for best. */
2455 plen = strlen (prefix);
2457 name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl));
2458 name = targetm.strip_name_encoding (name);
2459 nlen = strlen (name);
2461 string = alloca (nlen + plen + 1);
2462 memcpy (string, prefix, plen);
2463 memcpy (string + plen, name, nlen + 1);
2465 DECL_SECTION_NAME (decl) = build_string (nlen + plen, string);
2469 default_unique_section (decl, reloc);
2472 #ifdef COMMON_ASM_OP
2473 /* This says how to output assembler code to declare an
2474 uninitialized external linkage data object.
2476 For medium model x86-64 we need to use .largecomm opcode for
2479 x86_elf_aligned_common (FILE *file,
2480 const char *name, unsigned HOST_WIDE_INT size,
2483 if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC)
2484 && size > (unsigned int)ix86_section_threshold)
2485 fprintf (file, ".largecomm\t");
2487 fprintf (file, "%s", COMMON_ASM_OP);
2488 assemble_name (file, name);
2489 fprintf (file, ","HOST_WIDE_INT_PRINT_UNSIGNED",%u\n",
2490 size, align / BITS_PER_UNIT);
2493 /* Utility function for targets to use in implementing
2494 ASM_OUTPUT_ALIGNED_BSS. */
2497 x86_output_aligned_bss (FILE *file, tree decl ATTRIBUTE_UNUSED,
2498 const char *name, unsigned HOST_WIDE_INT size,
2501 if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC)
2502 && size > (unsigned int)ix86_section_threshold)
2503 switch_to_section (get_named_section (decl, ".lbss", 0));
2505 switch_to_section (bss_section);
2506 ASM_OUTPUT_ALIGN (file, floor_log2 (align / BITS_PER_UNIT));
2507 #ifdef ASM_DECLARE_OBJECT_NAME
2508 last_assemble_variable_decl = decl;
2509 ASM_DECLARE_OBJECT_NAME (file, name, decl);
2511 /* Standard thing is just output label for the object. */
2512 ASM_OUTPUT_LABEL (file, name);
2513 #endif /* ASM_DECLARE_OBJECT_NAME */
2514 ASM_OUTPUT_SKIP (file, size ? size : 1);
2519 optimization_options (int level, int size ATTRIBUTE_UNUSED)
2521 /* For -O2 and beyond, turn off -fschedule-insns by default. It tends to
2522 make the problem with not enough registers even worse. */
2523 #ifdef INSN_SCHEDULING
2525 flag_schedule_insns = 0;
2529 /* The Darwin libraries never set errno, so we might as well
2530 avoid calling them when that's the only reason we would. */
2531 flag_errno_math = 0;
2533 /* The default values of these switches depend on the TARGET_64BIT
2534 that is not known at this moment. Mark these values with 2 and
2535 let user the to override these. In case there is no command line option
2536 specifying them, we will set the defaults in override_options. */
2538 flag_omit_frame_pointer = 2;
2539 flag_pcc_struct_return = 2;
2540 flag_asynchronous_unwind_tables = 2;
2541 #ifdef SUBTARGET_OPTIMIZATION_OPTIONS
2542 SUBTARGET_OPTIMIZATION_OPTIONS;
2546 /* Table of valid machine attributes. */
2547 const struct attribute_spec ix86_attribute_table[] =
2549 /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler } */
2550 /* Stdcall attribute says callee is responsible for popping arguments
2551 if they are not variable. */
2552 { "stdcall", 0, 0, false, true, true, ix86_handle_cconv_attribute },
2553 /* Fastcall attribute says callee is responsible for popping arguments
2554 if they are not variable. */
2555 { "fastcall", 0, 0, false, true, true, ix86_handle_cconv_attribute },
2556 /* Cdecl attribute says the callee is a normal C declaration */
2557 { "cdecl", 0, 0, false, true, true, ix86_handle_cconv_attribute },
2558 /* Regparm attribute specifies how many integer arguments are to be
2559 passed in registers. */
2560 { "regparm", 1, 1, false, true, true, ix86_handle_cconv_attribute },
2561 /* Sseregparm attribute says we are using x86_64 calling conventions
2562 for FP arguments. */
2563 { "sseregparm", 0, 0, false, true, true, ix86_handle_cconv_attribute },
2564 /* force_align_arg_pointer says this function realigns the stack at entry. */
2565 { (const char *)&ix86_force_align_arg_pointer_string, 0, 0,
2566 false, true, true, ix86_handle_cconv_attribute },
2567 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
2568 { "dllimport", 0, 0, false, false, false, handle_dll_attribute },
2569 { "dllexport", 0, 0, false, false, false, handle_dll_attribute },
2570 { "shared", 0, 0, true, false, false, ix86_handle_shared_attribute },
2572 { "ms_struct", 0, 0, false, false, false, ix86_handle_struct_attribute },
2573 { "gcc_struct", 0, 0, false, false, false, ix86_handle_struct_attribute },
2574 #ifdef SUBTARGET_ATTRIBUTE_TABLE
2575 SUBTARGET_ATTRIBUTE_TABLE,
2577 { NULL, 0, 0, false, false, false, NULL }
2580 /* Decide whether we can make a sibling call to a function. DECL is the
2581 declaration of the function being targeted by the call and EXP is the
2582 CALL_EXPR representing the call. */
2585 ix86_function_ok_for_sibcall (tree decl, tree exp)
2590 /* If we are generating position-independent code, we cannot sibcall
2591 optimize any indirect call, or a direct call to a global function,
2592 as the PLT requires %ebx be live. */
2593 if (!TARGET_64BIT && flag_pic && (!decl || !targetm.binds_local_p (decl)))
2600 func = TREE_TYPE (TREE_OPERAND (exp, 0));
2601 if (POINTER_TYPE_P (func))
2602 func = TREE_TYPE (func);
2605 /* Check that the return value locations are the same. Like
2606 if we are returning floats on the 80387 register stack, we cannot
2607 make a sibcall from a function that doesn't return a float to a
2608 function that does or, conversely, from a function that does return
2609 a float to a function that doesn't; the necessary stack adjustment
2610 would not be executed. This is also the place we notice
2611 differences in the return value ABI. Note that it is ok for one
2612 of the functions to have void return type as long as the return
2613 value of the other is passed in a register. */
2614 a = ix86_function_value (TREE_TYPE (exp), func, false);
2615 b = ix86_function_value (TREE_TYPE (DECL_RESULT (cfun->decl)),
2617 if (STACK_REG_P (a) || STACK_REG_P (b))
2619 if (!rtx_equal_p (a, b))
2622 else if (VOID_TYPE_P (TREE_TYPE (DECL_RESULT (cfun->decl))))
2624 else if (!rtx_equal_p (a, b))
2627 /* If this call is indirect, we'll need to be able to use a call-clobbered
2628 register for the address of the target function. Make sure that all
2629 such registers are not used for passing parameters. */
2630 if (!decl && !TARGET_64BIT)
2634 /* We're looking at the CALL_EXPR, we need the type of the function. */
2635 type = TREE_OPERAND (exp, 0); /* pointer expression */
2636 type = TREE_TYPE (type); /* pointer type */
2637 type = TREE_TYPE (type); /* function type */
2639 if (ix86_function_regparm (type, NULL) >= 3)
2641 /* ??? Need to count the actual number of registers to be used,
2642 not the possible number of registers. Fix later. */
2647 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
2648 /* Dllimport'd functions are also called indirectly. */
2649 if (decl && DECL_DLLIMPORT_P (decl)
2650 && ix86_function_regparm (TREE_TYPE (decl), NULL) >= 3)
2654 /* If we forced aligned the stack, then sibcalling would unalign the
2655 stack, which may break the called function. */
2656 if (cfun->machine->force_align_arg_pointer)
2659 /* Otherwise okay. That also includes certain types of indirect calls. */
2663 /* Handle "cdecl", "stdcall", "fastcall", "regparm" and "sseregparm"
2664 calling convention attributes;
2665 arguments as in struct attribute_spec.handler. */
2668 ix86_handle_cconv_attribute (tree *node, tree name,
2670 int flags ATTRIBUTE_UNUSED,
2673 if (TREE_CODE (*node) != FUNCTION_TYPE
2674 && TREE_CODE (*node) != METHOD_TYPE
2675 && TREE_CODE (*node) != FIELD_DECL
2676 && TREE_CODE (*node) != TYPE_DECL)
2678 warning (OPT_Wattributes, "%qs attribute only applies to functions",
2679 IDENTIFIER_POINTER (name));
2680 *no_add_attrs = true;
2684 /* Can combine regparm with all attributes but fastcall. */
2685 if (is_attribute_p ("regparm", name))
2689 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
2691 error ("fastcall and regparm attributes are not compatible");
2694 cst = TREE_VALUE (args);
2695 if (TREE_CODE (cst) != INTEGER_CST)
2697 warning (OPT_Wattributes,
2698 "%qs attribute requires an integer constant argument",
2699 IDENTIFIER_POINTER (name));
2700 *no_add_attrs = true;
2702 else if (compare_tree_int (cst, REGPARM_MAX) > 0)
2704 warning (OPT_Wattributes, "argument to %qs attribute larger than %d",
2705 IDENTIFIER_POINTER (name), REGPARM_MAX);
2706 *no_add_attrs = true;
2710 && lookup_attribute (ix86_force_align_arg_pointer_string,
2711 TYPE_ATTRIBUTES (*node))
2712 && compare_tree_int (cst, REGPARM_MAX-1))
2714 error ("%s functions limited to %d register parameters",
2715 ix86_force_align_arg_pointer_string, REGPARM_MAX-1);
2723 warning (OPT_Wattributes, "%qs attribute ignored",
2724 IDENTIFIER_POINTER (name));
2725 *no_add_attrs = true;
2729 /* Can combine fastcall with stdcall (redundant) and sseregparm. */
2730 if (is_attribute_p ("fastcall", name))
2732 if (lookup_attribute ("cdecl", TYPE_ATTRIBUTES (*node)))
2734 error ("fastcall and cdecl attributes are not compatible");
2736 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (*node)))
2738 error ("fastcall and stdcall attributes are not compatible");
2740 if (lookup_attribute ("regparm", TYPE_ATTRIBUTES (*node)))
2742 error ("fastcall and regparm attributes are not compatible");
2746 /* Can combine stdcall with fastcall (redundant), regparm and
2748 else if (is_attribute_p ("stdcall", name))
2750 if (lookup_attribute ("cdecl", TYPE_ATTRIBUTES (*node)))
2752 error ("stdcall and cdecl attributes are not compatible");
2754 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
2756 error ("stdcall and fastcall attributes are not compatible");
2760 /* Can combine cdecl with regparm and sseregparm. */
2761 else if (is_attribute_p ("cdecl", name))
2763 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (*node)))
2765 error ("stdcall and cdecl attributes are not compatible");
2767 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
2769 error ("fastcall and cdecl attributes are not compatible");
2773 /* Can combine sseregparm with all attributes. */
2778 /* Return 0 if the attributes for two types are incompatible, 1 if they
2779 are compatible, and 2 if they are nearly compatible (which causes a
2780 warning to be generated). */
2783 ix86_comp_type_attributes (tree type1, tree type2)
2785 /* Check for mismatch of non-default calling convention. */
2786 const char *const rtdstr = TARGET_RTD ? "cdecl" : "stdcall";
2788 if (TREE_CODE (type1) != FUNCTION_TYPE)
2791 /* Check for mismatched fastcall/regparm types. */
2792 if ((!lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type1))
2793 != !lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type2)))
2794 || (ix86_function_regparm (type1, NULL)
2795 != ix86_function_regparm (type2, NULL)))
2798 /* Check for mismatched sseregparm types. */
2799 if (!lookup_attribute ("sseregparm", TYPE_ATTRIBUTES (type1))
2800 != !lookup_attribute ("sseregparm", TYPE_ATTRIBUTES (type2)))
2803 /* Check for mismatched return types (cdecl vs stdcall). */
2804 if (!lookup_attribute (rtdstr, TYPE_ATTRIBUTES (type1))
2805 != !lookup_attribute (rtdstr, TYPE_ATTRIBUTES (type2)))
2811 /* Return the regparm value for a function with the indicated TYPE and DECL.
2812 DECL may be NULL when calling function indirectly
2813 or considering a libcall. */
2816 ix86_function_regparm (tree type, tree decl)
2819 int regparm = ix86_regparm;
2820 bool user_convention = false;
2824 attr = lookup_attribute ("regparm", TYPE_ATTRIBUTES (type));
2827 regparm = TREE_INT_CST_LOW (TREE_VALUE (TREE_VALUE (attr)));
2828 user_convention = true;
2831 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type)))
2834 user_convention = true;
2837 /* Use register calling convention for local functions when possible. */
2838 if (!TARGET_64BIT && !user_convention && decl
2839 && flag_unit_at_a_time && !profile_flag)
2841 struct cgraph_local_info *i = cgraph_local_info (decl);
2844 int local_regparm, globals = 0, regno;
2846 /* Make sure no regparm register is taken by a global register
2848 for (local_regparm = 0; local_regparm < 3; local_regparm++)
2849 if (global_regs[local_regparm])
2851 /* We can't use regparm(3) for nested functions as these use
2852 static chain pointer in third argument. */
2853 if (local_regparm == 3
2854 && decl_function_context (decl)
2855 && !DECL_NO_STATIC_CHAIN (decl))
2857 /* If the function realigns its stackpointer, the
2858 prologue will clobber %ecx. If we've already
2859 generated code for the callee, the callee
2860 DECL_STRUCT_FUNCTION is gone, so we fall back to
2861 scanning the attributes for the self-realigning
2863 if ((DECL_STRUCT_FUNCTION (decl)
2864 && DECL_STRUCT_FUNCTION (decl)->machine->force_align_arg_pointer)
2865 || (!DECL_STRUCT_FUNCTION (decl)
2866 && lookup_attribute (ix86_force_align_arg_pointer_string,
2867 TYPE_ATTRIBUTES (TREE_TYPE (decl)))))
2869 /* Each global register variable increases register preassure,
2870 so the more global reg vars there are, the smaller regparm
2871 optimization use, unless requested by the user explicitly. */
2872 for (regno = 0; regno < 6; regno++)
2873 if (global_regs[regno])
2876 = globals < local_regparm ? local_regparm - globals : 0;
2878 if (local_regparm > regparm)
2879 regparm = local_regparm;
2886 /* Return 1 or 2, if we can pass up to SSE_REGPARM_MAX SFmode (1) and
2887 DFmode (2) arguments in SSE registers for a function with the
2888 indicated TYPE and DECL. DECL may be NULL when calling function
2889 indirectly or considering a libcall. Otherwise return 0. */
2892 ix86_function_sseregparm (tree type, tree decl)
2894 /* Use SSE registers to pass SFmode and DFmode arguments if requested
2895 by the sseregparm attribute. */
2896 if (TARGET_SSEREGPARM
2898 && lookup_attribute ("sseregparm", TYPE_ATTRIBUTES (type))))
2903 error ("Calling %qD with attribute sseregparm without "
2904 "SSE/SSE2 enabled", decl);
2906 error ("Calling %qT with attribute sseregparm without "
2907 "SSE/SSE2 enabled", type);
2914 /* For local functions, pass up to SSE_REGPARM_MAX SFmode
2915 (and DFmode for SSE2) arguments in SSE registers,
2916 even for 32-bit targets. */
2917 if (!TARGET_64BIT && decl
2918 && TARGET_SSE_MATH && flag_unit_at_a_time && !profile_flag)
2920 struct cgraph_local_info *i = cgraph_local_info (decl);
2922 return TARGET_SSE2 ? 2 : 1;
2928 /* Return true if EAX is live at the start of the function. Used by
2929 ix86_expand_prologue to determine if we need special help before
2930 calling allocate_stack_worker. */
2933 ix86_eax_live_at_start_p (void)
2935 /* Cheat. Don't bother working forward from ix86_function_regparm
2936 to the function type to whether an actual argument is located in
2937 eax. Instead just look at cfg info, which is still close enough
2938 to correct at this point. This gives false positives for broken
2939 functions that might use uninitialized data that happens to be
2940 allocated in eax, but who cares? */
2941 return REGNO_REG_SET_P (ENTRY_BLOCK_PTR->il.rtl->global_live_at_end, 0);
2944 /* Value is the number of bytes of arguments automatically
2945 popped when returning from a subroutine call.
2946 FUNDECL is the declaration node of the function (as a tree),
2947 FUNTYPE is the data type of the function (as a tree),
2948 or for a library call it is an identifier node for the subroutine name.
2949 SIZE is the number of bytes of arguments passed on the stack.
2951 On the 80386, the RTD insn may be used to pop them if the number
2952 of args is fixed, but if the number is variable then the caller
2953 must pop them all. RTD can't be used for library calls now
2954 because the library is compiled with the Unix compiler.
2955 Use of RTD is a selectable option, since it is incompatible with
2956 standard Unix calling sequences. If the option is not selected,
2957 the caller must always pop the args.
2959 The attribute stdcall is equivalent to RTD on a per module basis. */
2962 ix86_return_pops_args (tree fundecl, tree funtype, int size)
2964 int rtd = TARGET_RTD && (!fundecl || TREE_CODE (fundecl) != IDENTIFIER_NODE);
2966 /* Cdecl functions override -mrtd, and never pop the stack. */
2967 if (! lookup_attribute ("cdecl", TYPE_ATTRIBUTES (funtype))) {
2969 /* Stdcall and fastcall functions will pop the stack if not
2971 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (funtype))
2972 || lookup_attribute ("fastcall", TYPE_ATTRIBUTES (funtype)))
2976 && (TYPE_ARG_TYPES (funtype) == NULL_TREE
2977 || (TREE_VALUE (tree_last (TYPE_ARG_TYPES (funtype)))
2978 == void_type_node)))
2982 /* Lose any fake structure return argument if it is passed on the stack. */
2983 if (aggregate_value_p (TREE_TYPE (funtype), fundecl)
2985 && !KEEP_AGGREGATE_RETURN_POINTER)
2987 int nregs = ix86_function_regparm (funtype, fundecl);
2990 return GET_MODE_SIZE (Pmode);
2996 /* Argument support functions. */
2998 /* Return true when register may be used to pass function parameters. */
3000 ix86_function_arg_regno_p (int regno)
3006 return (regno < REGPARM_MAX
3007 || (TARGET_SSE && SSE_REGNO_P (regno) && !fixed_regs[regno]));
3009 return (regno < REGPARM_MAX
3010 || (TARGET_MMX && MMX_REGNO_P (regno)
3011 && (regno < FIRST_MMX_REG + MMX_REGPARM_MAX))
3012 || (TARGET_SSE && SSE_REGNO_P (regno)
3013 && (regno < FIRST_SSE_REG + SSE_REGPARM_MAX)));
3018 if (SSE_REGNO_P (regno) && TARGET_SSE)
3023 if (TARGET_SSE && SSE_REGNO_P (regno)
3024 && (regno < FIRST_SSE_REG + SSE_REGPARM_MAX))
3027 /* RAX is used as hidden argument to va_arg functions. */
3030 for (i = 0; i < REGPARM_MAX; i++)
3031 if (regno == x86_64_int_parameter_registers[i])
3036 /* Return if we do not know how to pass TYPE solely in registers. */
3039 ix86_must_pass_in_stack (enum machine_mode mode, tree type)
3041 if (must_pass_in_stack_var_size_or_pad (mode, type))
3044 /* For 32-bit, we want TImode aggregates to go on the stack. But watch out!
3045 The layout_type routine is crafty and tries to trick us into passing
3046 currently unsupported vector types on the stack by using TImode. */
3047 return (!TARGET_64BIT && mode == TImode
3048 && type && TREE_CODE (type) != VECTOR_TYPE);
3051 /* Initialize a variable CUM of type CUMULATIVE_ARGS
3052 for a call to a function whose data type is FNTYPE.
3053 For a library call, FNTYPE is 0. */
3056 init_cumulative_args (CUMULATIVE_ARGS *cum, /* Argument info to initialize */
3057 tree fntype, /* tree ptr for function decl */
3058 rtx libname, /* SYMBOL_REF of library name or 0 */
3061 static CUMULATIVE_ARGS zero_cum;
3062 tree param, next_param;
3064 if (TARGET_DEBUG_ARG)
3066 fprintf (stderr, "\ninit_cumulative_args (");
3068 fprintf (stderr, "fntype code = %s, ret code = %s",
3069 tree_code_name[(int) TREE_CODE (fntype)],
3070 tree_code_name[(int) TREE_CODE (TREE_TYPE (fntype))]);
3072 fprintf (stderr, "no fntype");
3075 fprintf (stderr, ", libname = %s", XSTR (libname, 0));
3080 /* Set up the number of registers to use for passing arguments. */
3081 cum->nregs = ix86_regparm;
3083 cum->sse_nregs = SSE_REGPARM_MAX;
3085 cum->mmx_nregs = MMX_REGPARM_MAX;
3086 cum->warn_sse = true;
3087 cum->warn_mmx = true;
3088 cum->maybe_vaarg = false;
3090 /* Use ecx and edx registers if function has fastcall attribute,
3091 else look for regparm information. */
3092 if (fntype && !TARGET_64BIT)
3094 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (fntype)))
3100 cum->nregs = ix86_function_regparm (fntype, fndecl);
3103 /* Set up the number of SSE registers used for passing SFmode
3104 and DFmode arguments. Warn for mismatching ABI. */
3105 cum->float_in_sse = ix86_function_sseregparm (fntype, fndecl);
3107 /* Determine if this function has variable arguments. This is
3108 indicated by the last argument being 'void_type_mode' if there
3109 are no variable arguments. If there are variable arguments, then
3110 we won't pass anything in registers in 32-bit mode. */
3112 if (cum->nregs || cum->mmx_nregs || cum->sse_nregs)
3114 for (param = (fntype) ? TYPE_ARG_TYPES (fntype) : 0;
3115 param != 0; param = next_param)
3117 next_param = TREE_CHAIN (param);
3118 if (next_param == 0 && TREE_VALUE (param) != void_type_node)
3128 cum->float_in_sse = 0;
3130 cum->maybe_vaarg = true;
3134 if ((!fntype && !libname)
3135 || (fntype && !TYPE_ARG_TYPES (fntype)))
3136 cum->maybe_vaarg = true;
3138 if (TARGET_DEBUG_ARG)
3139 fprintf (stderr, ", nregs=%d )\n", cum->nregs);
3144 /* Return the "natural" mode for TYPE. In most cases, this is just TYPE_MODE.
3145 But in the case of vector types, it is some vector mode.
3147 When we have only some of our vector isa extensions enabled, then there
3148 are some modes for which vector_mode_supported_p is false. For these
3149 modes, the generic vector support in gcc will choose some non-vector mode
3150 in order to implement the type. By computing the natural mode, we'll
3151 select the proper ABI location for the operand and not depend on whatever
3152 the middle-end decides to do with these vector types. */
3154 static enum machine_mode
3155 type_natural_mode (tree type)
3157 enum machine_mode mode = TYPE_MODE (type);
3159 if (TREE_CODE (type) == VECTOR_TYPE && !VECTOR_MODE_P (mode))
3161 HOST_WIDE_INT size = int_size_in_bytes (type);
3162 if ((size == 8 || size == 16)
3163 /* ??? Generic code allows us to create width 1 vectors. Ignore. */
3164 && TYPE_VECTOR_SUBPARTS (type) > 1)
3166 enum machine_mode innermode = TYPE_MODE (TREE_TYPE (type));
3168 if (TREE_CODE (TREE_TYPE (type)) == REAL_TYPE)
3169 mode = MIN_MODE_VECTOR_FLOAT;
3171 mode = MIN_MODE_VECTOR_INT;
3173 /* Get the mode which has this inner mode and number of units. */
3174 for (; mode != VOIDmode; mode = GET_MODE_WIDER_MODE (mode))
3175 if (GET_MODE_NUNITS (mode) == TYPE_VECTOR_SUBPARTS (type)
3176 && GET_MODE_INNER (mode) == innermode)
3186 /* We want to pass a value in REGNO whose "natural" mode is MODE. However,
3187 this may not agree with the mode that the type system has chosen for the
3188 register, which is ORIG_MODE. If ORIG_MODE is not BLKmode, then we can
3189 go ahead and use it. Otherwise we have to build a PARALLEL instead. */
3192 gen_reg_or_parallel (enum machine_mode mode, enum machine_mode orig_mode,
3197 if (orig_mode != BLKmode)
3198 tmp = gen_rtx_REG (orig_mode, regno);
3201 tmp = gen_rtx_REG (mode, regno);
3202 tmp = gen_rtx_EXPR_LIST (VOIDmode, tmp, const0_rtx);
3203 tmp = gen_rtx_PARALLEL (orig_mode, gen_rtvec (1, tmp));
3209 /* x86-64 register passing implementation. See x86-64 ABI for details. Goal
3210 of this code is to classify each 8bytes of incoming argument by the register
3211 class and assign registers accordingly. */
3213 /* Return the union class of CLASS1 and CLASS2.
3214 See the x86-64 PS ABI for details. */
3216 static enum x86_64_reg_class
3217 merge_classes (enum x86_64_reg_class class1, enum x86_64_reg_class class2)
3219 /* Rule #1: If both classes are equal, this is the resulting class. */
3220 if (class1 == class2)
3223 /* Rule #2: If one of the classes is NO_CLASS, the resulting class is
3225 if (class1 == X86_64_NO_CLASS)
3227 if (class2 == X86_64_NO_CLASS)
3230 /* Rule #3: If one of the classes is MEMORY, the result is MEMORY. */
3231 if (class1 == X86_64_MEMORY_CLASS || class2 == X86_64_MEMORY_CLASS)
3232 return X86_64_MEMORY_CLASS;
3234 /* Rule #4: If one of the classes is INTEGER, the result is INTEGER. */
3235 if ((class1 == X86_64_INTEGERSI_CLASS && class2 == X86_64_SSESF_CLASS)
3236 || (class2 == X86_64_INTEGERSI_CLASS && class1 == X86_64_SSESF_CLASS))
3237 return X86_64_INTEGERSI_CLASS;
3238 if (class1 == X86_64_INTEGER_CLASS || class1 == X86_64_INTEGERSI_CLASS
3239 || class2 == X86_64_INTEGER_CLASS || class2 == X86_64_INTEGERSI_CLASS)
3240 return X86_64_INTEGER_CLASS;
3242 /* Rule #5: If one of the classes is X87, X87UP, or COMPLEX_X87 class,
3244 if (class1 == X86_64_X87_CLASS
3245 || class1 == X86_64_X87UP_CLASS
3246 || class1 == X86_64_COMPLEX_X87_CLASS
3247 || class2 == X86_64_X87_CLASS
3248 || class2 == X86_64_X87UP_CLASS
3249 || class2 == X86_64_COMPLEX_X87_CLASS)
3250 return X86_64_MEMORY_CLASS;
3252 /* Rule #6: Otherwise class SSE is used. */
3253 return X86_64_SSE_CLASS;
3256 /* Classify the argument of type TYPE and mode MODE.
3257 CLASSES will be filled by the register class used to pass each word
3258 of the operand. The number of words is returned. In case the parameter
3259 should be passed in memory, 0 is returned. As a special case for zero
3260 sized containers, classes[0] will be NO_CLASS and 1 is returned.
3262 BIT_OFFSET is used internally for handling records and specifies offset
3263 of the offset in bits modulo 256 to avoid overflow cases.
3265 See the x86-64 PS ABI for details.
3269 classify_argument (enum machine_mode mode, tree type,
3270 enum x86_64_reg_class classes[MAX_CLASSES], int bit_offset)
3272 HOST_WIDE_INT bytes =
3273 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
3274 int words = (bytes + (bit_offset % 64) / 8 + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
3276 /* Variable sized entities are always passed/returned in memory. */
3280 if (mode != VOIDmode
3281 && targetm.calls.must_pass_in_stack (mode, type))
3284 if (type && AGGREGATE_TYPE_P (type))
3288 enum x86_64_reg_class subclasses[MAX_CLASSES];
3290 /* On x86-64 we pass structures larger than 16 bytes on the stack. */
3294 for (i = 0; i < words; i++)
3295 classes[i] = X86_64_NO_CLASS;
3297 /* Zero sized arrays or structures are NO_CLASS. We return 0 to
3298 signalize memory class, so handle it as special case. */
3301 classes[0] = X86_64_NO_CLASS;
3305 /* Classify each field of record and merge classes. */
3306 switch (TREE_CODE (type))
3309 /* For classes first merge in the field of the subclasses. */
3310 if (TYPE_BINFO (type))
3312 tree binfo, base_binfo;
3315 for (binfo = TYPE_BINFO (type), basenum = 0;
3316 BINFO_BASE_ITERATE (binfo, basenum, base_binfo); basenum++)
3319 int offset = tree_low_cst (BINFO_OFFSET (base_binfo), 0) * 8;
3320 tree type = BINFO_TYPE (base_binfo);
3322 num = classify_argument (TYPE_MODE (type),
3324 (offset + bit_offset) % 256);
3327 for (i = 0; i < num; i++)
3329 int pos = (offset + (bit_offset % 64)) / 8 / 8;
3331 merge_classes (subclasses[i], classes[i + pos]);
3335 /* And now merge the fields of structure. */
3336 for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
3338 if (TREE_CODE (field) == FIELD_DECL)
3342 if (TREE_TYPE (field) == error_mark_node)
3345 /* Bitfields are always classified as integer. Handle them
3346 early, since later code would consider them to be
3347 misaligned integers. */
3348 if (DECL_BIT_FIELD (field))
3350 for (i = (int_bit_position (field) + (bit_offset % 64)) / 8 / 8;
3351 i < ((int_bit_position (field) + (bit_offset % 64))
3352 + tree_low_cst (DECL_SIZE (field), 0)
3355 merge_classes (X86_64_INTEGER_CLASS,
3360 num = classify_argument (TYPE_MODE (TREE_TYPE (field)),
3361 TREE_TYPE (field), subclasses,
3362 (int_bit_position (field)
3363 + bit_offset) % 256);
3366 for (i = 0; i < num; i++)
3369 (int_bit_position (field) + (bit_offset % 64)) / 8 / 8;
3371 merge_classes (subclasses[i], classes[i + pos]);
3379 /* Arrays are handled as small records. */
3382 num = classify_argument (TYPE_MODE (TREE_TYPE (type)),
3383 TREE_TYPE (type), subclasses, bit_offset);
3387 /* The partial classes are now full classes. */
3388 if (subclasses[0] == X86_64_SSESF_CLASS && bytes != 4)
3389 subclasses[0] = X86_64_SSE_CLASS;
3390 if (subclasses[0] == X86_64_INTEGERSI_CLASS && bytes != 4)
3391 subclasses[0] = X86_64_INTEGER_CLASS;
3393 for (i = 0; i < words; i++)
3394 classes[i] = subclasses[i % num];
3399 case QUAL_UNION_TYPE:
3400 /* Unions are similar to RECORD_TYPE but offset is always 0.
3403 /* Unions are not derived. */
3404 gcc_assert (!TYPE_BINFO (type)
3405 || !BINFO_N_BASE_BINFOS (TYPE_BINFO (type)));
3406 for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
3408 if (TREE_CODE (field) == FIELD_DECL)
3412 if (TREE_TYPE (field) == error_mark_node)
3415 num = classify_argument (TYPE_MODE (TREE_TYPE (field)),
3416 TREE_TYPE (field), subclasses,
3420 for (i = 0; i < num; i++)
3421 classes[i] = merge_classes (subclasses[i], classes[i]);
3430 /* Final merger cleanup. */
3431 for (i = 0; i < words; i++)
3433 /* If one class is MEMORY, everything should be passed in
3435 if (classes[i] == X86_64_MEMORY_CLASS)
3438 /* The X86_64_SSEUP_CLASS should be always preceded by
3439 X86_64_SSE_CLASS. */
3440 if (classes[i] == X86_64_SSEUP_CLASS
3441 && (i == 0 || classes[i - 1] != X86_64_SSE_CLASS))
3442 classes[i] = X86_64_SSE_CLASS;
3444 /* X86_64_X87UP_CLASS should be preceded by X86_64_X87_CLASS. */
3445 if (classes[i] == X86_64_X87UP_CLASS
3446 && (i == 0 || classes[i - 1] != X86_64_X87_CLASS))
3447 classes[i] = X86_64_SSE_CLASS;
3452 /* Compute alignment needed. We align all types to natural boundaries with
3453 exception of XFmode that is aligned to 64bits. */
3454 if (mode != VOIDmode && mode != BLKmode)
3456 int mode_alignment = GET_MODE_BITSIZE (mode);
3459 mode_alignment = 128;
3460 else if (mode == XCmode)
3461 mode_alignment = 256;
3462 if (COMPLEX_MODE_P (mode))
3463 mode_alignment /= 2;
3464 /* Misaligned fields are always returned in memory. */
3465 if (bit_offset % mode_alignment)
3469 /* for V1xx modes, just use the base mode */
3470 if (VECTOR_MODE_P (mode)
3471 && GET_MODE_SIZE (GET_MODE_INNER (mode)) == bytes)
3472 mode = GET_MODE_INNER (mode);
3474 /* Classification of atomic types. */
3479 classes[0] = X86_64_SSE_CLASS;
3482 classes[0] = X86_64_SSE_CLASS;
3483 classes[1] = X86_64_SSEUP_CLASS;
3492 if (bit_offset + GET_MODE_BITSIZE (mode) <= 32)
3493 classes[0] = X86_64_INTEGERSI_CLASS;
3495 classes[0] = X86_64_INTEGER_CLASS;
3499 classes[0] = classes[1] = X86_64_INTEGER_CLASS;
3504 if (!(bit_offset % 64))
3505 classes[0] = X86_64_SSESF_CLASS;
3507 classes[0] = X86_64_SSE_CLASS;
3510 classes[0] = X86_64_SSEDF_CLASS;
3513 classes[0] = X86_64_X87_CLASS;
3514 classes[1] = X86_64_X87UP_CLASS;
3517 classes[0] = X86_64_SSE_CLASS;
3518 classes[1] = X86_64_SSEUP_CLASS;
3521 classes[0] = X86_64_SSE_CLASS;
3524 classes[0] = X86_64_SSEDF_CLASS;
3525 classes[1] = X86_64_SSEDF_CLASS;
3528 classes[0] = X86_64_COMPLEX_X87_CLASS;
3531 /* This modes is larger than 16 bytes. */
3539 classes[0] = X86_64_SSE_CLASS;
3540 classes[1] = X86_64_SSEUP_CLASS;
3546 classes[0] = X86_64_SSE_CLASS;
3552 gcc_assert (VECTOR_MODE_P (mode));
3557 gcc_assert (GET_MODE_CLASS (GET_MODE_INNER (mode)) == MODE_INT);
3559 if (bit_offset + GET_MODE_BITSIZE (mode) <= 32)
3560 classes[0] = X86_64_INTEGERSI_CLASS;
3562 classes[0] = X86_64_INTEGER_CLASS;
3563 classes[1] = X86_64_INTEGER_CLASS;
3564 return 1 + (bytes > 8);
3568 /* Examine the argument and return set number of register required in each
3569 class. Return 0 iff parameter should be passed in memory. */
3571 examine_argument (enum machine_mode mode, tree type, int in_return,
3572 int *int_nregs, int *sse_nregs)
3574 enum x86_64_reg_class class[MAX_CLASSES];
3575 int n = classify_argument (mode, type, class, 0);
3581 for (n--; n >= 0; n--)
3584 case X86_64_INTEGER_CLASS:
3585 case X86_64_INTEGERSI_CLASS:
3588 case X86_64_SSE_CLASS:
3589 case X86_64_SSESF_CLASS:
3590 case X86_64_SSEDF_CLASS:
3593 case X86_64_NO_CLASS:
3594 case X86_64_SSEUP_CLASS:
3596 case X86_64_X87_CLASS:
3597 case X86_64_X87UP_CLASS:
3601 case X86_64_COMPLEX_X87_CLASS:
3602 return in_return ? 2 : 0;
3603 case X86_64_MEMORY_CLASS:
3609 /* Construct container for the argument used by GCC interface. See
3610 FUNCTION_ARG for the detailed description. */
3613 construct_container (enum machine_mode mode, enum machine_mode orig_mode,
3614 tree type, int in_return, int nintregs, int nsseregs,
3615 const int *intreg, int sse_regno)
3617 /* The following variables hold the static issued_error state. */
3618 static bool issued_sse_arg_error;
3619 static bool issued_sse_ret_error;
3620 static bool issued_x87_ret_error;
3622 enum machine_mode tmpmode;
3624 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
3625 enum x86_64_reg_class class[MAX_CLASSES];
3629 int needed_sseregs, needed_intregs;
3630 rtx exp[MAX_CLASSES];
3633 n = classify_argument (mode, type, class, 0);
3634 if (TARGET_DEBUG_ARG)
3637 fprintf (stderr, "Memory class\n");
3640 fprintf (stderr, "Classes:");
3641 for (i = 0; i < n; i++)
3643 fprintf (stderr, " %s", x86_64_reg_class_name[class[i]]);
3645 fprintf (stderr, "\n");
3650 if (!examine_argument (mode, type, in_return, &needed_intregs,
3653 if (needed_intregs > nintregs || needed_sseregs > nsseregs)
3656 /* We allowed the user to turn off SSE for kernel mode. Don't crash if
3657 some less clueful developer tries to use floating-point anyway. */
3658 if (needed_sseregs && !TARGET_SSE)
3662 if (!issued_sse_ret_error)
3664 error ("SSE register return with SSE disabled");
3665 issued_sse_ret_error = true;
3668 else if (!issued_sse_arg_error)
3670 error ("SSE register argument with SSE disabled");
3671 issued_sse_arg_error = true;
3676 /* Likewise, error if the ABI requires us to return values in the
3677 x87 registers and the user specified -mno-80387. */
3678 if (!TARGET_80387 && in_return)
3679 for (i = 0; i < n; i++)
3680 if (class[i] == X86_64_X87_CLASS
3681 || class[i] == X86_64_X87UP_CLASS
3682 || class[i] == X86_64_COMPLEX_X87_CLASS)
3684 if (!issued_x87_ret_error)
3686 error ("x87 register return with x87 disabled");
3687 issued_x87_ret_error = true;
3692 /* First construct simple cases. Avoid SCmode, since we want to use
3693 single register to pass this type. */
3694 if (n == 1 && mode != SCmode)
3697 case X86_64_INTEGER_CLASS:
3698 case X86_64_INTEGERSI_CLASS:
3699 return gen_rtx_REG (mode, intreg[0]);
3700 case X86_64_SSE_CLASS:
3701 case X86_64_SSESF_CLASS:
3702 case X86_64_SSEDF_CLASS:
3703 return gen_reg_or_parallel (mode, orig_mode, SSE_REGNO (sse_regno));
3704 case X86_64_X87_CLASS:
3705 case X86_64_COMPLEX_X87_CLASS:
3706 return gen_rtx_REG (mode, FIRST_STACK_REG);
3707 case X86_64_NO_CLASS:
3708 /* Zero sized array, struct or class. */
3713 if (n == 2 && class[0] == X86_64_SSE_CLASS && class[1] == X86_64_SSEUP_CLASS
3715 return gen_rtx_REG (mode, SSE_REGNO (sse_regno));
3717 && class[0] == X86_64_X87_CLASS && class[1] == X86_64_X87UP_CLASS)
3718 return gen_rtx_REG (XFmode, FIRST_STACK_REG);
3719 if (n == 2 && class[0] == X86_64_INTEGER_CLASS
3720 && class[1] == X86_64_INTEGER_CLASS
3721 && (mode == CDImode || mode == TImode || mode == TFmode)
3722 && intreg[0] + 1 == intreg[1])
3723 return gen_rtx_REG (mode, intreg[0]);
3725 /* Otherwise figure out the entries of the PARALLEL. */
3726 for (i = 0; i < n; i++)
3730 case X86_64_NO_CLASS:
3732 case X86_64_INTEGER_CLASS:
3733 case X86_64_INTEGERSI_CLASS:
3734 /* Merge TImodes on aligned occasions here too. */
3735 if (i * 8 + 8 > bytes)
3736 tmpmode = mode_for_size ((bytes - i * 8) * BITS_PER_UNIT, MODE_INT, 0);
3737 else if (class[i] == X86_64_INTEGERSI_CLASS)
3741 /* We've requested 24 bytes we don't have mode for. Use DImode. */
3742 if (tmpmode == BLKmode)
3744 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
3745 gen_rtx_REG (tmpmode, *intreg),
3749 case X86_64_SSESF_CLASS:
3750 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
3751 gen_rtx_REG (SFmode,
3752 SSE_REGNO (sse_regno)),
3756 case X86_64_SSEDF_CLASS:
3757 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
3758 gen_rtx_REG (DFmode,
3759 SSE_REGNO (sse_regno)),
3763 case X86_64_SSE_CLASS:
3764 if (i < n - 1 && class[i + 1] == X86_64_SSEUP_CLASS)
3768 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
3769 gen_rtx_REG (tmpmode,
3770 SSE_REGNO (sse_regno)),
3772 if (tmpmode == TImode)
3781 /* Empty aligned struct, union or class. */
3785 ret = gen_rtx_PARALLEL (mode, rtvec_alloc (nexps));
3786 for (i = 0; i < nexps; i++)
3787 XVECEXP (ret, 0, i) = exp [i];
3791 /* Update the data in CUM to advance over an argument
3792 of mode MODE and data type TYPE.
3793 (TYPE is null for libcalls where that information may not be available.) */
3796 function_arg_advance (CUMULATIVE_ARGS *cum, enum machine_mode mode,
3797 tree type, int named)
3800 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
3801 int words = (bytes + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
3804 mode = type_natural_mode (type);
3806 if (TARGET_DEBUG_ARG)
3807 fprintf (stderr, "function_adv (sz=%d, wds=%2d, nregs=%d, ssenregs=%d, "
3808 "mode=%s, named=%d)\n\n",
3809 words, cum->words, cum->nregs, cum->sse_nregs,
3810 GET_MODE_NAME (mode), named);
3814 int int_nregs, sse_nregs;
3815 if (!examine_argument (mode, type, 0, &int_nregs, &sse_nregs))
3816 cum->words += words;
3817 else if (sse_nregs <= cum->sse_nregs && int_nregs <= cum->nregs)
3819 cum->nregs -= int_nregs;
3820 cum->sse_nregs -= sse_nregs;
3821 cum->regno += int_nregs;
3822 cum->sse_regno += sse_nregs;
3825 cum->words += words;
3843 cum->words += words;
3844 cum->nregs -= words;
3845 cum->regno += words;
3847 if (cum->nregs <= 0)
3855 if (cum->float_in_sse < 2)
3858 if (cum->float_in_sse < 1)
3869 if (!type || !AGGREGATE_TYPE_P (type))
3871 cum->sse_words += words;
3872 cum->sse_nregs -= 1;
3873 cum->sse_regno += 1;
3874 if (cum->sse_nregs <= 0)
3886 if (!type || !AGGREGATE_TYPE_P (type))
3888 cum->mmx_words += words;
3889 cum->mmx_nregs -= 1;
3890 cum->mmx_regno += 1;
3891 if (cum->mmx_nregs <= 0)
3902 /* Define where to put the arguments to a function.
3903 Value is zero to push the argument on the stack,
3904 or a hard register in which to store the argument.
3906 MODE is the argument's machine mode.
3907 TYPE is the data type of the argument (as a tree).
3908 This is null for libcalls where that information may
3910 CUM is a variable of type CUMULATIVE_ARGS which gives info about
3911 the preceding args and about the function being called.
3912 NAMED is nonzero if this argument is a named parameter
3913 (otherwise it is an extra parameter matching an ellipsis). */
3916 function_arg (CUMULATIVE_ARGS *cum, enum machine_mode orig_mode,
3917 tree type, int named)
3919 enum machine_mode mode = orig_mode;
3922 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
3923 int words = (bytes + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
3924 static bool warnedsse, warnedmmx;
3926 /* To simplify the code below, represent vector types with a vector mode
3927 even if MMX/SSE are not active. */
3928 if (type && TREE_CODE (type) == VECTOR_TYPE)
3929 mode = type_natural_mode (type);
3931 /* Handle a hidden AL argument containing number of registers for varargs
3932 x86-64 functions. For i386 ABI just return constm1_rtx to avoid
3934 if (mode == VOIDmode)
3937 return GEN_INT (cum->maybe_vaarg
3938 ? (cum->sse_nregs < 0
3946 ret = construct_container (mode, orig_mode, type, 0, cum->nregs,
3948 &x86_64_int_parameter_registers [cum->regno],
3953 /* For now, pass fp/complex values on the stack. */
3965 if (words <= cum->nregs)
3967 int regno = cum->regno;
3969 /* Fastcall allocates the first two DWORD (SImode) or
3970 smaller arguments to ECX and EDX. */
3973 if (mode == BLKmode || mode == DImode)
3976 /* ECX not EAX is the first allocated register. */
3980 ret = gen_rtx_REG (mode, regno);
3984 if (cum->float_in_sse < 2)
3987 if (cum->float_in_sse < 1)
3997 if (!type || !AGGREGATE_TYPE_P (type))
3999 if (!TARGET_SSE && !warnedsse && cum->warn_sse)
4002 warning (0, "SSE vector argument without SSE enabled "
4006 ret = gen_reg_or_parallel (mode, orig_mode,
4007 cum->sse_regno + FIRST_SSE_REG);
4014 if (!type || !AGGREGATE_TYPE_P (type))
4016 if (!TARGET_MMX && !warnedmmx && cum->warn_mmx)
4019 warning (0, "MMX vector argument without MMX enabled "
4023 ret = gen_reg_or_parallel (mode, orig_mode,
4024 cum->mmx_regno + FIRST_MMX_REG);
4029 if (TARGET_DEBUG_ARG)
4032 "function_arg (size=%d, wds=%2d, nregs=%d, mode=%4s, named=%d, ",
4033 words, cum->words, cum->nregs, GET_MODE_NAME (mode), named);
4036 print_simple_rtl (stderr, ret);
4038 fprintf (stderr, ", stack");
4040 fprintf (stderr, " )\n");
4046 /* A C expression that indicates when an argument must be passed by
4047 reference. If nonzero for an argument, a copy of that argument is
4048 made in memory and a pointer to the argument is passed instead of
4049 the argument itself. The pointer is passed in whatever way is
4050 appropriate for passing a pointer to that type. */
4053 ix86_pass_by_reference (CUMULATIVE_ARGS *cum ATTRIBUTE_UNUSED,
4054 enum machine_mode mode ATTRIBUTE_UNUSED,
4055 tree type, bool named ATTRIBUTE_UNUSED)
4060 if (type && int_size_in_bytes (type) == -1)
4062 if (TARGET_DEBUG_ARG)
4063 fprintf (stderr, "function_arg_pass_by_reference\n");
4070 /* Return true when TYPE should be 128bit aligned for 32bit argument passing
4071 ABI. Only called if TARGET_SSE. */
4073 contains_128bit_aligned_vector_p (tree type)
4075 enum machine_mode mode = TYPE_MODE (type);
4076 if (SSE_REG_MODE_P (mode)
4077 && (!TYPE_USER_ALIGN (type) || TYPE_ALIGN (type) > 128))
4079 if (TYPE_ALIGN (type) < 128)
4082 if (AGGREGATE_TYPE_P (type))
4084 /* Walk the aggregates recursively. */
4085 switch (TREE_CODE (type))
4089 case QUAL_UNION_TYPE:
4093 if (TYPE_BINFO (type))
4095 tree binfo, base_binfo;
4098 for (binfo = TYPE_BINFO (type), i = 0;
4099 BINFO_BASE_ITERATE (binfo, i, base_binfo); i++)
4100 if (contains_128bit_aligned_vector_p
4101 (BINFO_TYPE (base_binfo)))
4104 /* And now merge the fields of structure. */
4105 for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
4107 if (TREE_CODE (field) == FIELD_DECL
4108 && contains_128bit_aligned_vector_p (TREE_TYPE (field)))
4115 /* Just for use if some languages passes arrays by value. */
4116 if (contains_128bit_aligned_vector_p (TREE_TYPE (type)))
4127 /* Gives the alignment boundary, in bits, of an argument with the
4128 specified mode and type. */
4131 ix86_function_arg_boundary (enum machine_mode mode, tree type)
4135 align = TYPE_ALIGN (type);
4137 align = GET_MODE_ALIGNMENT (mode);
4138 if (align < PARM_BOUNDARY)
4139 align = PARM_BOUNDARY;
4142 /* i386 ABI defines all arguments to be 4 byte aligned. We have to
4143 make an exception for SSE modes since these require 128bit
4146 The handling here differs from field_alignment. ICC aligns MMX
4147 arguments to 4 byte boundaries, while structure fields are aligned
4148 to 8 byte boundaries. */
4150 align = PARM_BOUNDARY;
4153 if (!SSE_REG_MODE_P (mode))
4154 align = PARM_BOUNDARY;
4158 if (!contains_128bit_aligned_vector_p (type))
4159 align = PARM_BOUNDARY;
4167 /* Return true if N is a possible register number of function value. */
4169 ix86_function_value_regno_p (int regno)
4175 return ((regno) == 0
4176 || ((regno) == FIRST_FLOAT_REG && TARGET_FLOAT_RETURNS_IN_80387)
4177 || ((regno) == FIRST_SSE_REG && TARGET_SSE));
4179 return ((regno) == 0 || (regno) == FIRST_FLOAT_REG
4180 || ((regno) == FIRST_SSE_REG && TARGET_SSE)
4181 || ((regno) == FIRST_FLOAT_REG && TARGET_FLOAT_RETURNS_IN_80387));
4186 || (regno == FIRST_FLOAT_REG && TARGET_FLOAT_RETURNS_IN_80387)
4187 || (regno == FIRST_SSE_REG && TARGET_SSE))
4191 && (regno == FIRST_MMX_REG && TARGET_MMX))
4198 /* Define how to find the value returned by a function.
4199 VALTYPE is the data type of the value (as a tree).
4200 If the precise function being called is known, FUNC is its FUNCTION_DECL;
4201 otherwise, FUNC is 0. */
4203 ix86_function_value (tree valtype, tree fntype_or_decl,
4204 bool outgoing ATTRIBUTE_UNUSED)
4206 enum machine_mode natmode = type_natural_mode (valtype);
4210 rtx ret = construct_container (natmode, TYPE_MODE (valtype), valtype,
4211 1, REGPARM_MAX, SSE_REGPARM_MAX,
4212 x86_64_int_return_registers, 0);
4213 /* For zero sized structures, construct_container return NULL, but we
4214 need to keep rest of compiler happy by returning meaningful value. */
4216 ret = gen_rtx_REG (TYPE_MODE (valtype), 0);
4221 tree fn = NULL_TREE, fntype;
4223 && DECL_P (fntype_or_decl))
4224 fn = fntype_or_decl;
4225 fntype = fn ? TREE_TYPE (fn) : fntype_or_decl;
4226 return gen_rtx_REG (TYPE_MODE (valtype),
4227 ix86_value_regno (natmode, fn, fntype));
4231 /* Return true iff type is returned in memory. */
4233 ix86_return_in_memory (tree type)
4235 int needed_intregs, needed_sseregs, size;
4236 enum machine_mode mode = type_natural_mode (type);
4239 return !examine_argument (mode, type, 1, &needed_intregs, &needed_sseregs);
4241 if (mode == BLKmode)
4244 size = int_size_in_bytes (type);
4246 if (MS_AGGREGATE_RETURN && AGGREGATE_TYPE_P (type) && size <= 8)
4249 if (VECTOR_MODE_P (mode) || mode == TImode)
4251 /* User-created vectors small enough to fit in EAX. */
4255 /* MMX/3dNow values are returned in MM0,
4256 except when it doesn't exits. */
4258 return (TARGET_MMX ? 0 : 1);
4260 /* SSE values are returned in XMM0, except when it doesn't exist. */
4262 return (TARGET_SSE ? 0 : 1);
4276 /* When returning SSE vector types, we have a choice of either
4277 (1) being abi incompatible with a -march switch, or
4278 (2) generating an error.
4279 Given no good solution, I think the safest thing is one warning.
4280 The user won't be able to use -Werror, but....
4282 Choose the STRUCT_VALUE_RTX hook because that's (at present) only
4283 called in response to actually generating a caller or callee that
4284 uses such a type. As opposed to RETURN_IN_MEMORY, which is called
4285 via aggregate_value_p for general type probing from tree-ssa. */
4288 ix86_struct_value_rtx (tree type, int incoming ATTRIBUTE_UNUSED)
4290 static bool warnedsse, warnedmmx;
4294 /* Look at the return type of the function, not the function type. */
4295 enum machine_mode mode = TYPE_MODE (TREE_TYPE (type));
4297 if (!TARGET_SSE && !warnedsse)
4300 || (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 16))
4303 warning (0, "SSE vector return without SSE enabled "
4308 if (!TARGET_MMX && !warnedmmx)
4310 if (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 8)
4313 warning (0, "MMX vector return without MMX enabled "
4322 /* Define how to find the value returned by a library function
4323 assuming the value has mode MODE. */
4325 ix86_libcall_value (enum machine_mode mode)
4339 return gen_rtx_REG (mode, FIRST_SSE_REG);
4342 return gen_rtx_REG (mode, FIRST_FLOAT_REG);
4346 return gen_rtx_REG (mode, 0);
4350 return gen_rtx_REG (mode, ix86_value_regno (mode, NULL, NULL));
4353 /* Given a mode, return the register to use for a return value. */
4356 ix86_value_regno (enum machine_mode mode, tree func, tree fntype)
4358 gcc_assert (!TARGET_64BIT);
4360 /* 8-byte vector modes in %mm0. See ix86_return_in_memory for where
4361 we normally prevent this case when mmx is not available. However
4362 some ABIs may require the result to be returned like DImode. */
4363 if (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 8)
4364 return TARGET_MMX ? FIRST_MMX_REG : 0;
4366 /* 16-byte vector modes in %xmm0. See ix86_return_in_memory for where
4367 we prevent this case when sse is not available. However some ABIs
4368 may require the result to be returned like integer TImode. */
4369 if (mode == TImode || (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 16))
4370 return TARGET_SSE ? FIRST_SSE_REG : 0;
4372 /* Decimal floating point values can go in %eax, unlike other float modes. */
4373 if (DECIMAL_FLOAT_MODE_P (mode))
4376 /* Most things go in %eax, except (unless -mno-fp-ret-in-387) fp values. */
4377 if (!SCALAR_FLOAT_MODE_P (mode) || !TARGET_FLOAT_RETURNS_IN_80387)
4380 /* Floating point return values in %st(0), except for local functions when
4381 SSE math is enabled or for functions with sseregparm attribute. */
4382 if ((func || fntype)
4383 && (mode == SFmode || mode == DFmode))
4385 int sse_level = ix86_function_sseregparm (fntype, func);
4386 if ((sse_level >= 1 && mode == SFmode)
4387 || (sse_level == 2 && mode == DFmode))
4388 return FIRST_SSE_REG;
4391 return FIRST_FLOAT_REG;
4394 /* Create the va_list data type. */
4397 ix86_build_builtin_va_list (void)
4399 tree f_gpr, f_fpr, f_ovf, f_sav, record, type_decl;
4401 /* For i386 we use plain pointer to argument area. */
4403 return build_pointer_type (char_type_node);
4405 record = (*lang_hooks.types.make_type) (RECORD_TYPE);
4406 type_decl = build_decl (TYPE_DECL, get_identifier ("__va_list_tag"), record);
4408 f_gpr = build_decl (FIELD_DECL, get_identifier ("gp_offset"),
4409 unsigned_type_node);
4410 f_fpr = build_decl (FIELD_DECL, get_identifier ("fp_offset"),
4411 unsigned_type_node);
4412 f_ovf = build_decl (FIELD_DECL, get_identifier ("overflow_arg_area"),
4414 f_sav = build_decl (FIELD_DECL, get_identifier ("reg_save_area"),
4417 va_list_gpr_counter_field = f_gpr;
4418 va_list_fpr_counter_field = f_fpr;
4420 DECL_FIELD_CONTEXT (f_gpr) = record;
4421 DECL_FIELD_CONTEXT (f_fpr) = record;
4422 DECL_FIELD_CONTEXT (f_ovf) = record;
4423 DECL_FIELD_CONTEXT (f_sav) = record;
4425 TREE_CHAIN (record) = type_decl;
4426 TYPE_NAME (record) = type_decl;
4427 TYPE_FIELDS (record) = f_gpr;
4428 TREE_CHAIN (f_gpr) = f_fpr;
4429 TREE_CHAIN (f_fpr) = f_ovf;
4430 TREE_CHAIN (f_ovf) = f_sav;
4432 layout_type (record);
4434 /* The correct type is an array type of one element. */
4435 return build_array_type (record, build_index_type (size_zero_node));
4438 /* Worker function for TARGET_SETUP_INCOMING_VARARGS. */
4441 ix86_setup_incoming_varargs (CUMULATIVE_ARGS *cum, enum machine_mode mode,
4442 tree type, int *pretend_size ATTRIBUTE_UNUSED,
4445 CUMULATIVE_ARGS next_cum;
4446 rtx save_area = NULL_RTX, mem;
4459 if (! cfun->va_list_gpr_size && ! cfun->va_list_fpr_size)
4462 /* Indicate to allocate space on the stack for varargs save area. */
4463 ix86_save_varrargs_registers = 1;
4465 cfun->stack_alignment_needed = 128;
4467 fntype = TREE_TYPE (current_function_decl);
4468 stdarg_p = (TYPE_ARG_TYPES (fntype) != 0
4469 && (TREE_VALUE (tree_last (TYPE_ARG_TYPES (fntype)))
4470 != void_type_node));
4472 /* For varargs, we do not want to skip the dummy va_dcl argument.
4473 For stdargs, we do want to skip the last named argument. */
4476 function_arg_advance (&next_cum, mode, type, 1);
4479 save_area = frame_pointer_rtx;
4481 set = get_varargs_alias_set ();
4483 for (i = next_cum.regno;
4485 && i < next_cum.regno + cfun->va_list_gpr_size / UNITS_PER_WORD;
4488 mem = gen_rtx_MEM (Pmode,
4489 plus_constant (save_area, i * UNITS_PER_WORD));
4490 MEM_NOTRAP_P (mem) = 1;
4491 set_mem_alias_set (mem, set);
4492 emit_move_insn (mem, gen_rtx_REG (Pmode,
4493 x86_64_int_parameter_registers[i]));
4496 if (next_cum.sse_nregs && cfun->va_list_fpr_size)
4498 /* Now emit code to save SSE registers. The AX parameter contains number
4499 of SSE parameter registers used to call this function. We use
4500 sse_prologue_save insn template that produces computed jump across
4501 SSE saves. We need some preparation work to get this working. */
4503 label = gen_label_rtx ();
4504 label_ref = gen_rtx_LABEL_REF (Pmode, label);
4506 /* Compute address to jump to :
4507 label - 5*eax + nnamed_sse_arguments*5 */
4508 tmp_reg = gen_reg_rtx (Pmode);
4509 nsse_reg = gen_reg_rtx (Pmode);
4510 emit_insn (gen_zero_extendqidi2 (nsse_reg, gen_rtx_REG (QImode, 0)));
4511 emit_insn (gen_rtx_SET (VOIDmode, tmp_reg,
4512 gen_rtx_MULT (Pmode, nsse_reg,
4514 if (next_cum.sse_regno)
4517 gen_rtx_CONST (DImode,
4518 gen_rtx_PLUS (DImode,
4520 GEN_INT (next_cum.sse_regno * 4))));
4522 emit_move_insn (nsse_reg, label_ref);
4523 emit_insn (gen_subdi3 (nsse_reg, nsse_reg, tmp_reg));
4525 /* Compute address of memory block we save into. We always use pointer
4526 pointing 127 bytes after first byte to store - this is needed to keep
4527 instruction size limited by 4 bytes. */
4528 tmp_reg = gen_reg_rtx (Pmode);
4529 emit_insn (gen_rtx_SET (VOIDmode, tmp_reg,
4530 plus_constant (save_area,
4531 8 * REGPARM_MAX + 127)));
4532 mem = gen_rtx_MEM (BLKmode, plus_constant (tmp_reg, -127));
4533 MEM_NOTRAP_P (mem) = 1;
4534 set_mem_alias_set (mem, set);
4535 set_mem_align (mem, BITS_PER_WORD);
4537 /* And finally do the dirty job! */
4538 emit_insn (gen_sse_prologue_save (mem, nsse_reg,
4539 GEN_INT (next_cum.sse_regno), label));
4544 /* Implement va_start. */
4547 ix86_va_start (tree valist, rtx nextarg)
4549 HOST_WIDE_INT words, n_gpr, n_fpr;
4550 tree f_gpr, f_fpr, f_ovf, f_sav;
4551 tree gpr, fpr, ovf, sav, t;
4554 /* Only 64bit target needs something special. */
4557 std_expand_builtin_va_start (valist, nextarg);
4561 f_gpr = TYPE_FIELDS (TREE_TYPE (va_list_type_node));
4562 f_fpr = TREE_CHAIN (f_gpr);
4563 f_ovf = TREE_CHAIN (f_fpr);
4564 f_sav = TREE_CHAIN (f_ovf);
4566 valist = build1 (INDIRECT_REF, TREE_TYPE (TREE_TYPE (valist)), valist);
4567 gpr = build3 (COMPONENT_REF, TREE_TYPE (f_gpr), valist, f_gpr, NULL_TREE);
4568 fpr = build3 (COMPONENT_REF, TREE_TYPE (f_fpr), valist, f_fpr, NULL_TREE);
4569 ovf = build3 (COMPONENT_REF, TREE_TYPE (f_ovf), valist, f_ovf, NULL_TREE);
4570 sav = build3 (COMPONENT_REF, TREE_TYPE (f_sav), valist, f_sav, NULL_TREE);
4572 /* Count number of gp and fp argument registers used. */
4573 words = current_function_args_info.words;
4574 n_gpr = current_function_args_info.regno;
4575 n_fpr = current_function_args_info.sse_regno;
4577 if (TARGET_DEBUG_ARG)
4578 fprintf (stderr, "va_start: words = %d, n_gpr = %d, n_fpr = %d\n",
4579 (int) words, (int) n_gpr, (int) n_fpr);
4581 if (cfun->va_list_gpr_size)
4583 type = TREE_TYPE (gpr);
4584 t = build2 (MODIFY_EXPR, type, gpr,
4585 build_int_cst (type, n_gpr * 8));
4586 TREE_SIDE_EFFECTS (t) = 1;
4587 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
4590 if (cfun->va_list_fpr_size)
4592 type = TREE_TYPE (fpr);
4593 t = build2 (MODIFY_EXPR, type, fpr,
4594 build_int_cst (type, n_fpr * 16 + 8*REGPARM_MAX));
4595 TREE_SIDE_EFFECTS (t) = 1;
4596 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
4599 /* Find the overflow area. */
4600 type = TREE_TYPE (ovf);
4601 t = make_tree (type, virtual_incoming_args_rtx);
4603 t = build2 (PLUS_EXPR, type, t,
4604 build_int_cst (type, words * UNITS_PER_WORD));
4605 t = build2 (MODIFY_EXPR, type, ovf, t);
4606 TREE_SIDE_EFFECTS (t) = 1;
4607 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
4609 if (cfun->va_list_gpr_size || cfun->va_list_fpr_size)
4611 /* Find the register save area.
4612 Prologue of the function save it right above stack frame. */
4613 type = TREE_TYPE (sav);
4614 t = make_tree (type, frame_pointer_rtx);
4615 t = build2 (MODIFY_EXPR, type, sav, t);
4616 TREE_SIDE_EFFECTS (t) = 1;
4617 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
4621 /* Implement va_arg. */
4624 ix86_gimplify_va_arg (tree valist, tree type, tree *pre_p, tree *post_p)
4626 static const int intreg[6] = { 0, 1, 2, 3, 4, 5 };
4627 tree f_gpr, f_fpr, f_ovf, f_sav;
4628 tree gpr, fpr, ovf, sav, t;
4630 tree lab_false, lab_over = NULL_TREE;
4635 enum machine_mode nat_mode;
4637 /* Only 64bit target needs something special. */
4639 return std_gimplify_va_arg_expr (valist, type, pre_p, post_p);
4641 f_gpr = TYPE_FIELDS (TREE_TYPE (va_list_type_node));
4642 f_fpr = TREE_CHAIN (f_gpr);
4643 f_ovf = TREE_CHAIN (f_fpr);
4644 f_sav = TREE_CHAIN (f_ovf);
4646 valist = build_va_arg_indirect_ref (valist);
4647 gpr = build3 (COMPONENT_REF, TREE_TYPE (f_gpr), valist, f_gpr, NULL_TREE);
4648 fpr = build3 (COMPONENT_REF, TREE_TYPE (f_fpr), valist, f_fpr, NULL_TREE);
4649 ovf = build3 (COMPONENT_REF, TREE_TYPE (f_ovf), valist, f_ovf, NULL_TREE);
4650 sav = build3 (COMPONENT_REF, TREE_TYPE (f_sav), valist, f_sav, NULL_TREE);
4652 indirect_p = pass_by_reference (NULL, TYPE_MODE (type), type, false);
4654 type = build_pointer_type (type);
4655 size = int_size_in_bytes (type);
4656 rsize = (size + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
4658 nat_mode = type_natural_mode (type);
4659 container = construct_container (nat_mode, TYPE_MODE (type), type, 0,
4660 REGPARM_MAX, SSE_REGPARM_MAX, intreg, 0);
4662 /* Pull the value out of the saved registers. */
4664 addr = create_tmp_var (ptr_type_node, "addr");
4665 DECL_POINTER_ALIAS_SET (addr) = get_varargs_alias_set ();
4669 int needed_intregs, needed_sseregs;
4671 tree int_addr, sse_addr;
4673 lab_false = create_artificial_label ();
4674 lab_over = create_artificial_label ();
4676 examine_argument (nat_mode, type, 0, &needed_intregs, &needed_sseregs);
4678 need_temp = (!REG_P (container)
4679 && ((needed_intregs && TYPE_ALIGN (type) > 64)
4680 || TYPE_ALIGN (type) > 128));
4682 /* In case we are passing structure, verify that it is consecutive block
4683 on the register save area. If not we need to do moves. */
4684 if (!need_temp && !REG_P (container))
4686 /* Verify that all registers are strictly consecutive */
4687 if (SSE_REGNO_P (REGNO (XEXP (XVECEXP (container, 0, 0), 0))))
4691 for (i = 0; i < XVECLEN (container, 0) && !need_temp; i++)
4693 rtx slot = XVECEXP (container, 0, i);
4694 if (REGNO (XEXP (slot, 0)) != FIRST_SSE_REG + (unsigned int) i
4695 || INTVAL (XEXP (slot, 1)) != i * 16)
4703 for (i = 0; i < XVECLEN (container, 0) && !need_temp; i++)
4705 rtx slot = XVECEXP (container, 0, i);
4706 if (REGNO (XEXP (slot, 0)) != (unsigned int) i
4707 || INTVAL (XEXP (slot, 1)) != i * 8)
4719 int_addr = create_tmp_var (ptr_type_node, "int_addr");
4720 DECL_POINTER_ALIAS_SET (int_addr) = get_varargs_alias_set ();
4721 sse_addr = create_tmp_var (ptr_type_node, "sse_addr");
4722 DECL_POINTER_ALIAS_SET (sse_addr) = get_varargs_alias_set ();
4725 /* First ensure that we fit completely in registers. */
4728 t = build_int_cst (TREE_TYPE (gpr),
4729 (REGPARM_MAX - needed_intregs + 1) * 8);
4730 t = build2 (GE_EXPR, boolean_type_node, gpr, t);
4731 t2 = build1 (GOTO_EXPR, void_type_node, lab_false);
4732 t = build3 (COND_EXPR, void_type_node, t, t2, NULL_TREE);
4733 gimplify_and_add (t, pre_p);
4737 t = build_int_cst (TREE_TYPE (fpr),
4738 (SSE_REGPARM_MAX - needed_sseregs + 1) * 16
4740 t = build2 (GE_EXPR, boolean_type_node, fpr, t);
4741 t2 = build1 (GOTO_EXPR, void_type_node, lab_false);
4742 t = build3 (COND_EXPR, void_type_node, t, t2, NULL_TREE);
4743 gimplify_and_add (t, pre_p);
4746 /* Compute index to start of area used for integer regs. */
4749 /* int_addr = gpr + sav; */
4750 t = fold_convert (ptr_type_node, gpr);
4751 t = build2 (PLUS_EXPR, ptr_type_node, sav, t);
4752 t = build2 (MODIFY_EXPR, void_type_node, int_addr, t);
4753 gimplify_and_add (t, pre_p);
4757 /* sse_addr = fpr + sav; */
4758 t = fold_convert (ptr_type_node, fpr);
4759 t = build2 (PLUS_EXPR, ptr_type_node, sav, t);
4760 t = build2 (MODIFY_EXPR, void_type_node, sse_addr, t);
4761 gimplify_and_add (t, pre_p);
4766 tree temp = create_tmp_var (type, "va_arg_tmp");
4769 t = build1 (ADDR_EXPR, build_pointer_type (type), temp);
4770 t = build2 (MODIFY_EXPR, void_type_node, addr, t);
4771 gimplify_and_add (t, pre_p);
4773 for (i = 0; i < XVECLEN (container, 0); i++)
4775 rtx slot = XVECEXP (container, 0, i);
4776 rtx reg = XEXP (slot, 0);
4777 enum machine_mode mode = GET_MODE (reg);
4778 tree piece_type = lang_hooks.types.type_for_mode (mode, 1);
4779 tree addr_type = build_pointer_type (piece_type);
4782 tree dest_addr, dest;
4784 if (SSE_REGNO_P (REGNO (reg)))
4786 src_addr = sse_addr;
4787 src_offset = (REGNO (reg) - FIRST_SSE_REG) * 16;
4791 src_addr = int_addr;
4792 src_offset = REGNO (reg) * 8;
4794 src_addr = fold_convert (addr_type, src_addr);
4795 src_addr = fold (build2 (PLUS_EXPR, addr_type, src_addr,
4796 size_int (src_offset)));
4797 src = build_va_arg_indirect_ref (src_addr);
4799 dest_addr = fold_convert (addr_type, addr);
4800 dest_addr = fold (build2 (PLUS_EXPR, addr_type, dest_addr,
4801 size_int (INTVAL (XEXP (slot, 1)))));
4802 dest = build_va_arg_indirect_ref (dest_addr);
4804 t = build2 (MODIFY_EXPR, void_type_node, dest, src);
4805 gimplify_and_add (t, pre_p);
4811 t = build2 (PLUS_EXPR, TREE_TYPE (gpr), gpr,
4812 build_int_cst (TREE_TYPE (gpr), needed_intregs * 8));
4813 t = build2 (MODIFY_EXPR, TREE_TYPE (gpr), gpr, t);
4814 gimplify_and_add (t, pre_p);
4818 t = build2 (PLUS_EXPR, TREE_TYPE (fpr), fpr,
4819 build_int_cst (TREE_TYPE (fpr), needed_sseregs * 16));
4820 t = build2 (MODIFY_EXPR, TREE_TYPE (fpr), fpr, t);
4821 gimplify_and_add (t, pre_p);
4824 t = build1 (GOTO_EXPR, void_type_node, lab_over);
4825 gimplify_and_add (t, pre_p);
4827 t = build1 (LABEL_EXPR, void_type_node, lab_false);
4828 append_to_statement_list (t, pre_p);
4831 /* ... otherwise out of the overflow area. */
4833 /* Care for on-stack alignment if needed. */
4834 if (FUNCTION_ARG_BOUNDARY (VOIDmode, type) <= 64
4835 || integer_zerop (TYPE_SIZE (type)))
4839 HOST_WIDE_INT align = FUNCTION_ARG_BOUNDARY (VOIDmode, type) / 8;
4840 t = build2 (PLUS_EXPR, TREE_TYPE (ovf), ovf,
4841 build_int_cst (TREE_TYPE (ovf), align - 1));
4842 t = build2 (BIT_AND_EXPR, TREE_TYPE (t), t,
4843 build_int_cst (TREE_TYPE (t), -align));
4845 gimplify_expr (&t, pre_p, NULL, is_gimple_val, fb_rvalue);
4847 t2 = build2 (MODIFY_EXPR, void_type_node, addr, t);
4848 gimplify_and_add (t2, pre_p);
4850 t = build2 (PLUS_EXPR, TREE_TYPE (t), t,
4851 build_int_cst (TREE_TYPE (t), rsize * UNITS_PER_WORD));
4852 t = build2 (MODIFY_EXPR, TREE_TYPE (ovf), ovf, t);
4853 gimplify_and_add (t, pre_p);
4857 t = build1 (LABEL_EXPR, void_type_node, lab_over);
4858 append_to_statement_list (t, pre_p);
4861 ptrtype = build_pointer_type (type);
4862 addr = fold_convert (ptrtype, addr);
4865 addr = build_va_arg_indirect_ref (addr);
4866 return build_va_arg_indirect_ref (addr);
4869 /* Return nonzero if OPNUM's MEM should be matched
4870 in movabs* patterns. */
4873 ix86_check_movabs (rtx insn, int opnum)
4877 set = PATTERN (insn);
4878 if (GET_CODE (set) == PARALLEL)
4879 set = XVECEXP (set, 0, 0);
4880 gcc_assert (GET_CODE (set) == SET);
4881 mem = XEXP (set, opnum);
4882 while (GET_CODE (mem) == SUBREG)
4883 mem = SUBREG_REG (mem);
4884 gcc_assert (GET_CODE (mem) == MEM);
4885 return (volatile_ok || !MEM_VOLATILE_P (mem));
4888 /* Initialize the table of extra 80387 mathematical constants. */
4891 init_ext_80387_constants (void)
4893 static const char * cst[5] =
4895 "0.3010299956639811952256464283594894482", /* 0: fldlg2 */
4896 "0.6931471805599453094286904741849753009", /* 1: fldln2 */
4897 "1.4426950408889634073876517827983434472", /* 2: fldl2e */
4898 "3.3219280948873623478083405569094566090", /* 3: fldl2t */
4899 "3.1415926535897932385128089594061862044", /* 4: fldpi */
4903 for (i = 0; i < 5; i++)
4905 real_from_string (&ext_80387_constants_table[i], cst[i]);
4906 /* Ensure each constant is rounded to XFmode precision. */
4907 real_convert (&ext_80387_constants_table[i],
4908 XFmode, &ext_80387_constants_table[i]);
4911 ext_80387_constants_init = 1;
4914 /* Return true if the constant is something that can be loaded with
4915 a special instruction. */
4918 standard_80387_constant_p (rtx x)
4920 if (GET_CODE (x) != CONST_DOUBLE || !FLOAT_MODE_P (GET_MODE (x)))
4923 if (x == CONST0_RTX (GET_MODE (x)))
4925 if (x == CONST1_RTX (GET_MODE (x)))
4928 /* For XFmode constants, try to find a special 80387 instruction when
4929 optimizing for size or on those CPUs that benefit from them. */
4930 if (GET_MODE (x) == XFmode
4931 && (optimize_size || x86_ext_80387_constants & TUNEMASK))
4936 if (! ext_80387_constants_init)
4937 init_ext_80387_constants ();
4939 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
4940 for (i = 0; i < 5; i++)
4941 if (real_identical (&r, &ext_80387_constants_table[i]))
4948 /* Return the opcode of the special instruction to be used to load
4952 standard_80387_constant_opcode (rtx x)
4954 switch (standard_80387_constant_p (x))
4975 /* Return the CONST_DOUBLE representing the 80387 constant that is
4976 loaded by the specified special instruction. The argument IDX
4977 matches the return value from standard_80387_constant_p. */
4980 standard_80387_constant_rtx (int idx)
4984 if (! ext_80387_constants_init)
4985 init_ext_80387_constants ();
5001 return CONST_DOUBLE_FROM_REAL_VALUE (ext_80387_constants_table[i],
5005 /* Return 1 if mode is a valid mode for sse. */
5007 standard_sse_mode_p (enum machine_mode mode)
5024 /* Return 1 if X is FP constant we can load to SSE register w/o using memory.
5027 standard_sse_constant_p (rtx x)
5029 enum machine_mode mode = GET_MODE (x);
5031 if (x == const0_rtx || x == CONST0_RTX (GET_MODE (x)))
5033 if (vector_all_ones_operand (x, mode)
5034 && standard_sse_mode_p (mode))
5035 return TARGET_SSE2 ? 2 : -1;
5040 /* Return the opcode of the special instruction to be used to load
5044 standard_sse_constant_opcode (rtx insn, rtx x)
5046 switch (standard_sse_constant_p (x))
5049 if (get_attr_mode (insn) == MODE_V4SF)
5050 return "xorps\t%0, %0";
5051 else if (get_attr_mode (insn) == MODE_V2DF)
5052 return "xorpd\t%0, %0";
5054 return "pxor\t%0, %0";
5056 return "pcmpeqd\t%0, %0";
5061 /* Returns 1 if OP contains a symbol reference */
5064 symbolic_reference_mentioned_p (rtx op)
5069 if (GET_CODE (op) == SYMBOL_REF || GET_CODE (op) == LABEL_REF)
5072 fmt = GET_RTX_FORMAT (GET_CODE (op));
5073 for (i = GET_RTX_LENGTH (GET_CODE (op)) - 1; i >= 0; i--)
5079 for (j = XVECLEN (op, i) - 1; j >= 0; j--)
5080 if (symbolic_reference_mentioned_p (XVECEXP (op, i, j)))
5084 else if (fmt[i] == 'e' && symbolic_reference_mentioned_p (XEXP (op, i)))
5091 /* Return 1 if it is appropriate to emit `ret' instructions in the
5092 body of a function. Do this only if the epilogue is simple, needing a
5093 couple of insns. Prior to reloading, we can't tell how many registers
5094 must be saved, so return 0 then. Return 0 if there is no frame
5095 marker to de-allocate. */
5098 ix86_can_use_return_insn_p (void)
5100 struct ix86_frame frame;
5102 if (! reload_completed || frame_pointer_needed)
5105 /* Don't allow more than 32 pop, since that's all we can do
5106 with one instruction. */
5107 if (current_function_pops_args
5108 && current_function_args_size >= 32768)
5111 ix86_compute_frame_layout (&frame);
5112 return frame.to_allocate == 0 && frame.nregs == 0;
5115 /* Value should be nonzero if functions must have frame pointers.
5116 Zero means the frame pointer need not be set up (and parms may
5117 be accessed via the stack pointer) in functions that seem suitable. */
5120 ix86_frame_pointer_required (void)
5122 /* If we accessed previous frames, then the generated code expects
5123 to be able to access the saved ebp value in our frame. */
5124 if (cfun->machine->accesses_prev_frame)
5127 /* Several x86 os'es need a frame pointer for other reasons,
5128 usually pertaining to setjmp. */
5129 if (SUBTARGET_FRAME_POINTER_REQUIRED)
5132 /* In override_options, TARGET_OMIT_LEAF_FRAME_POINTER turns off
5133 the frame pointer by default. Turn it back on now if we've not
5134 got a leaf function. */
5135 if (TARGET_OMIT_LEAF_FRAME_POINTER
5136 && (!current_function_is_leaf
5137 || ix86_current_function_calls_tls_descriptor))
5140 if (current_function_profile)
5146 /* Record that the current function accesses previous call frames. */
5149 ix86_setup_frame_addresses (void)
5151 cfun->machine->accesses_prev_frame = 1;
5154 #if (defined(HAVE_GAS_HIDDEN) && (SUPPORTS_ONE_ONLY - 0)) || TARGET_MACHO
5155 # define USE_HIDDEN_LINKONCE 1
5157 # define USE_HIDDEN_LINKONCE 0
5160 static int pic_labels_used;
5162 /* Fills in the label name that should be used for a pc thunk for
5163 the given register. */
5166 get_pc_thunk_name (char name[32], unsigned int regno)
5168 gcc_assert (!TARGET_64BIT);
5170 if (USE_HIDDEN_LINKONCE)
5171 sprintf (name, "__i686.get_pc_thunk.%s", reg_names[regno]);
5173 ASM_GENERATE_INTERNAL_LABEL (name, "LPR", regno);
5177 /* This function generates code for -fpic that loads %ebx with
5178 the return address of the caller and then returns. */
5181 ix86_file_end (void)
5186 for (regno = 0; regno < 8; ++regno)
5190 if (! ((pic_labels_used >> regno) & 1))
5193 get_pc_thunk_name (name, regno);
5198 switch_to_section (darwin_sections[text_coal_section]);
5199 fputs ("\t.weak_definition\t", asm_out_file);
5200 assemble_name (asm_out_file, name);
5201 fputs ("\n\t.private_extern\t", asm_out_file);
5202 assemble_name (asm_out_file, name);
5203 fputs ("\n", asm_out_file);
5204 ASM_OUTPUT_LABEL (asm_out_file, name);
5208 if (USE_HIDDEN_LINKONCE)
5212 decl = build_decl (FUNCTION_DECL, get_identifier (name),
5214 TREE_PUBLIC (decl) = 1;
5215 TREE_STATIC (decl) = 1;
5216 DECL_ONE_ONLY (decl) = 1;
5218 (*targetm.asm_out.unique_section) (decl, 0);
5219 switch_to_section (get_named_section (decl, NULL, 0));
5221 (*targetm.asm_out.globalize_label) (asm_out_file, name);
5222 fputs ("\t.hidden\t", asm_out_file);
5223 assemble_name (asm_out_file, name);
5224 fputc ('\n', asm_out_file);
5225 ASM_DECLARE_FUNCTION_NAME (asm_out_file, name, decl);
5229 switch_to_section (text_section);
5230 ASM_OUTPUT_LABEL (asm_out_file, name);
5233 xops[0] = gen_rtx_REG (SImode, regno);
5234 xops[1] = gen_rtx_MEM (SImode, stack_pointer_rtx);
5235 output_asm_insn ("mov{l}\t{%1, %0|%0, %1}", xops);
5236 output_asm_insn ("ret", xops);
5239 if (NEED_INDICATE_EXEC_STACK)
5240 file_end_indicate_exec_stack ();
5243 /* Emit code for the SET_GOT patterns. */
5246 output_set_got (rtx dest, rtx label ATTRIBUTE_UNUSED)
5251 xops[1] = gen_rtx_SYMBOL_REF (Pmode, GOT_SYMBOL_NAME);
5253 if (! TARGET_DEEP_BRANCH_PREDICTION || !flag_pic)
5255 xops[2] = gen_rtx_LABEL_REF (Pmode, label ? label : gen_label_rtx ());
5258 output_asm_insn ("mov{l}\t{%2, %0|%0, %2}", xops);
5260 output_asm_insn ("call\t%a2", xops);
5263 /* Output the Mach-O "canonical" label name ("Lxx$pb") here too. This
5264 is what will be referenced by the Mach-O PIC subsystem. */
5266 ASM_OUTPUT_LABEL (asm_out_file, machopic_function_base_name ());
5269 (*targetm.asm_out.internal_label) (asm_out_file, "L",
5270 CODE_LABEL_NUMBER (XEXP (xops[2], 0)));
5273 output_asm_insn ("pop{l}\t%0", xops);
5278 get_pc_thunk_name (name, REGNO (dest));
5279 pic_labels_used |= 1 << REGNO (dest);
5281 xops[2] = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (name));
5282 xops[2] = gen_rtx_MEM (QImode, xops[2]);
5283 output_asm_insn ("call\t%X2", xops);
5284 /* Output the Mach-O "canonical" label name ("Lxx$pb") here too. This
5285 is what will be referenced by the Mach-O PIC subsystem. */
5288 ASM_OUTPUT_LABEL (asm_out_file, machopic_function_base_name ());
5290 targetm.asm_out.internal_label (asm_out_file, "L",
5291 CODE_LABEL_NUMBER (label));
5298 if (!flag_pic || TARGET_DEEP_BRANCH_PREDICTION)
5299 output_asm_insn ("add{l}\t{%1, %0|%0, %1}", xops);
5301 output_asm_insn ("add{l}\t{%1+[.-%a2], %0|%0, %1+(.-%a2)}", xops);
5306 /* Generate an "push" pattern for input ARG. */
5311 return gen_rtx_SET (VOIDmode,
5313 gen_rtx_PRE_DEC (Pmode,
5314 stack_pointer_rtx)),
5318 /* Return >= 0 if there is an unused call-clobbered register available
5319 for the entire function. */
5322 ix86_select_alt_pic_regnum (void)
5324 if (current_function_is_leaf && !current_function_profile
5325 && !ix86_current_function_calls_tls_descriptor)
5328 for (i = 2; i >= 0; --i)
5329 if (!regs_ever_live[i])
5333 return INVALID_REGNUM;
5336 /* Return 1 if we need to save REGNO. */
5338 ix86_save_reg (unsigned int regno, int maybe_eh_return)
5340 if (pic_offset_table_rtx
5341 && regno == REAL_PIC_OFFSET_TABLE_REGNUM
5342 && (regs_ever_live[REAL_PIC_OFFSET_TABLE_REGNUM]
5343 || current_function_profile
5344 || current_function_calls_eh_return
5345 || current_function_uses_const_pool))
5347 if (ix86_select_alt_pic_regnum () != INVALID_REGNUM)
5352 if (current_function_calls_eh_return && maybe_eh_return)
5357 unsigned test = EH_RETURN_DATA_REGNO (i);
5358 if (test == INVALID_REGNUM)
5365 if (cfun->machine->force_align_arg_pointer
5366 && regno == REGNO (cfun->machine->force_align_arg_pointer))
5369 return (regs_ever_live[regno]
5370 && !call_used_regs[regno]
5371 && !fixed_regs[regno]
5372 && (regno != HARD_FRAME_POINTER_REGNUM || !frame_pointer_needed));
5375 /* Return number of registers to be saved on the stack. */
5378 ix86_nsaved_regs (void)
5383 for (regno = FIRST_PSEUDO_REGISTER - 1; regno >= 0; regno--)
5384 if (ix86_save_reg (regno, true))
5389 /* Return the offset between two registers, one to be eliminated, and the other
5390 its replacement, at the start of a routine. */
5393 ix86_initial_elimination_offset (int from, int to)
5395 struct ix86_frame frame;
5396 ix86_compute_frame_layout (&frame);
5398 if (from == ARG_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
5399 return frame.hard_frame_pointer_offset;
5400 else if (from == FRAME_POINTER_REGNUM
5401 && to == HARD_FRAME_POINTER_REGNUM)
5402 return frame.hard_frame_pointer_offset - frame.frame_pointer_offset;
5405 gcc_assert (to == STACK_POINTER_REGNUM);
5407 if (from == ARG_POINTER_REGNUM)
5408 return frame.stack_pointer_offset;
5410 gcc_assert (from == FRAME_POINTER_REGNUM);
5411 return frame.stack_pointer_offset - frame.frame_pointer_offset;
5415 /* Fill structure ix86_frame about frame of currently computed function. */
5418 ix86_compute_frame_layout (struct ix86_frame *frame)
5420 HOST_WIDE_INT total_size;
5421 unsigned int stack_alignment_needed;
5422 HOST_WIDE_INT offset;
5423 unsigned int preferred_alignment;
5424 HOST_WIDE_INT size = get_frame_size ();
5426 frame->nregs = ix86_nsaved_regs ();
5429 stack_alignment_needed = cfun->stack_alignment_needed / BITS_PER_UNIT;
5430 preferred_alignment = cfun->preferred_stack_boundary / BITS_PER_UNIT;
5432 /* During reload iteration the amount of registers saved can change.
5433 Recompute the value as needed. Do not recompute when amount of registers
5434 didn't change as reload does multiple calls to the function and does not
5435 expect the decision to change within single iteration. */
5437 && cfun->machine->use_fast_prologue_epilogue_nregs != frame->nregs)
5439 int count = frame->nregs;
5441 cfun->machine->use_fast_prologue_epilogue_nregs = count;
5442 /* The fast prologue uses move instead of push to save registers. This
5443 is significantly longer, but also executes faster as modern hardware
5444 can execute the moves in parallel, but can't do that for push/pop.
5446 Be careful about choosing what prologue to emit: When function takes
5447 many instructions to execute we may use slow version as well as in
5448 case function is known to be outside hot spot (this is known with
5449 feedback only). Weight the size of function by number of registers
5450 to save as it is cheap to use one or two push instructions but very
5451 slow to use many of them. */
5453 count = (count - 1) * FAST_PROLOGUE_INSN_COUNT;
5454 if (cfun->function_frequency < FUNCTION_FREQUENCY_NORMAL
5455 || (flag_branch_probabilities
5456 && cfun->function_frequency < FUNCTION_FREQUENCY_HOT))
5457 cfun->machine->use_fast_prologue_epilogue = false;
5459 cfun->machine->use_fast_prologue_epilogue
5460 = !expensive_function_p (count);
5462 if (TARGET_PROLOGUE_USING_MOVE
5463 && cfun->machine->use_fast_prologue_epilogue)
5464 frame->save_regs_using_mov = true;
5466 frame->save_regs_using_mov = false;
5469 /* Skip return address and saved base pointer. */
5470 offset = frame_pointer_needed ? UNITS_PER_WORD * 2 : UNITS_PER_WORD;
5472 frame->hard_frame_pointer_offset = offset;
5474 /* Do some sanity checking of stack_alignment_needed and
5475 preferred_alignment, since i386 port is the only using those features
5476 that may break easily. */
5478 gcc_assert (!size || stack_alignment_needed);
5479 gcc_assert (preferred_alignment >= STACK_BOUNDARY / BITS_PER_UNIT);
5480 gcc_assert (preferred_alignment <= PREFERRED_STACK_BOUNDARY / BITS_PER_UNIT);
5481 gcc_assert (stack_alignment_needed
5482 <= PREFERRED_STACK_BOUNDARY / BITS_PER_UNIT);
5484 if (stack_alignment_needed < STACK_BOUNDARY / BITS_PER_UNIT)
5485 stack_alignment_needed = STACK_BOUNDARY / BITS_PER_UNIT;
5487 /* Register save area */
5488 offset += frame->nregs * UNITS_PER_WORD;
5491 if (ix86_save_varrargs_registers)
5493 offset += X86_64_VARARGS_SIZE;
5494 frame->va_arg_size = X86_64_VARARGS_SIZE;
5497 frame->va_arg_size = 0;
5499 /* Align start of frame for local function. */
5500 frame->padding1 = ((offset + stack_alignment_needed - 1)
5501 & -stack_alignment_needed) - offset;
5503 offset += frame->padding1;
5505 /* Frame pointer points here. */
5506 frame->frame_pointer_offset = offset;
5510 /* Add outgoing arguments area. Can be skipped if we eliminated
5511 all the function calls as dead code.
5512 Skipping is however impossible when function calls alloca. Alloca
5513 expander assumes that last current_function_outgoing_args_size
5514 of stack frame are unused. */
5515 if (ACCUMULATE_OUTGOING_ARGS
5516 && (!current_function_is_leaf || current_function_calls_alloca
5517 || ix86_current_function_calls_tls_descriptor))
5519 offset += current_function_outgoing_args_size;
5520 frame->outgoing_arguments_size = current_function_outgoing_args_size;
5523 frame->outgoing_arguments_size = 0;
5525 /* Align stack boundary. Only needed if we're calling another function
5527 if (!current_function_is_leaf || current_function_calls_alloca
5528 || ix86_current_function_calls_tls_descriptor)
5529 frame->padding2 = ((offset + preferred_alignment - 1)
5530 & -preferred_alignment) - offset;
5532 frame->padding2 = 0;
5534 offset += frame->padding2;
5536 /* We've reached end of stack frame. */
5537 frame->stack_pointer_offset = offset;
5539 /* Size prologue needs to allocate. */
5540 frame->to_allocate =
5541 (size + frame->padding1 + frame->padding2
5542 + frame->outgoing_arguments_size + frame->va_arg_size);
5544 if ((!frame->to_allocate && frame->nregs <= 1)
5545 || (TARGET_64BIT && frame->to_allocate >= (HOST_WIDE_INT) 0x80000000))
5546 frame->save_regs_using_mov = false;
5548 if (TARGET_RED_ZONE && current_function_sp_is_unchanging
5549 && current_function_is_leaf
5550 && !ix86_current_function_calls_tls_descriptor)
5552 frame->red_zone_size = frame->to_allocate;
5553 if (frame->save_regs_using_mov)
5554 frame->red_zone_size += frame->nregs * UNITS_PER_WORD;
5555 if (frame->red_zone_size > RED_ZONE_SIZE - RED_ZONE_RESERVE)
5556 frame->red_zone_size = RED_ZONE_SIZE - RED_ZONE_RESERVE;
5559 frame->red_zone_size = 0;
5560 frame->to_allocate -= frame->red_zone_size;
5561 frame->stack_pointer_offset -= frame->red_zone_size;
5563 fprintf (stderr, "nregs: %i\n", frame->nregs);
5564 fprintf (stderr, "size: %i\n", size);
5565 fprintf (stderr, "alignment1: %i\n", stack_alignment_needed);
5566 fprintf (stderr, "padding1: %i\n", frame->padding1);
5567 fprintf (stderr, "va_arg: %i\n", frame->va_arg_size);
5568 fprintf (stderr, "padding2: %i\n", frame->padding2);
5569 fprintf (stderr, "to_allocate: %i\n", frame->to_allocate);
5570 fprintf (stderr, "red_zone_size: %i\n", frame->red_zone_size);
5571 fprintf (stderr, "frame_pointer_offset: %i\n", frame->frame_pointer_offset);
5572 fprintf (stderr, "hard_frame_pointer_offset: %i\n",
5573 frame->hard_frame_pointer_offset);
5574 fprintf (stderr, "stack_pointer_offset: %i\n", frame->stack_pointer_offset);
5578 /* Emit code to save registers in the prologue. */
5581 ix86_emit_save_regs (void)
5586 for (regno = FIRST_PSEUDO_REGISTER; regno-- > 0; )
5587 if (ix86_save_reg (regno, true))
5589 insn = emit_insn (gen_push (gen_rtx_REG (Pmode, regno)));
5590 RTX_FRAME_RELATED_P (insn) = 1;
5594 /* Emit code to save registers using MOV insns. First register
5595 is restored from POINTER + OFFSET. */
5597 ix86_emit_save_regs_using_mov (rtx pointer, HOST_WIDE_INT offset)
5602 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
5603 if (ix86_save_reg (regno, true))
5605 insn = emit_move_insn (adjust_address (gen_rtx_MEM (Pmode, pointer),
5607 gen_rtx_REG (Pmode, regno));
5608 RTX_FRAME_RELATED_P (insn) = 1;
5609 offset += UNITS_PER_WORD;
5613 /* Expand prologue or epilogue stack adjustment.
5614 The pattern exist to put a dependency on all ebp-based memory accesses.
5615 STYLE should be negative if instructions should be marked as frame related,
5616 zero if %r11 register is live and cannot be freely used and positive
5620 pro_epilogue_adjust_stack (rtx dest, rtx src, rtx offset, int style)
5625 insn = emit_insn (gen_pro_epilogue_adjust_stack_1 (dest, src, offset));
5626 else if (x86_64_immediate_operand (offset, DImode))
5627 insn = emit_insn (gen_pro_epilogue_adjust_stack_rex64 (dest, src, offset));
5631 /* r11 is used by indirect sibcall return as well, set before the
5632 epilogue and used after the epilogue. ATM indirect sibcall
5633 shouldn't be used together with huge frame sizes in one
5634 function because of the frame_size check in sibcall.c. */
5636 r11 = gen_rtx_REG (DImode, FIRST_REX_INT_REG + 3 /* R11 */);
5637 insn = emit_insn (gen_rtx_SET (DImode, r11, offset));
5639 RTX_FRAME_RELATED_P (insn) = 1;
5640 insn = emit_insn (gen_pro_epilogue_adjust_stack_rex64_2 (dest, src, r11,
5644 RTX_FRAME_RELATED_P (insn) = 1;
5647 /* Handle the TARGET_INTERNAL_ARG_POINTER hook. */
5650 ix86_internal_arg_pointer (void)
5652 bool has_force_align_arg_pointer =
5653 (0 != lookup_attribute (ix86_force_align_arg_pointer_string,
5654 TYPE_ATTRIBUTES (TREE_TYPE (current_function_decl))));
5655 if ((FORCE_PREFERRED_STACK_BOUNDARY_IN_MAIN
5656 && DECL_NAME (current_function_decl)
5657 && MAIN_NAME_P (DECL_NAME (current_function_decl))
5658 && DECL_FILE_SCOPE_P (current_function_decl))
5659 || ix86_force_align_arg_pointer
5660 || has_force_align_arg_pointer)
5662 /* Nested functions can't realign the stack due to a register
5664 if (DECL_CONTEXT (current_function_decl)
5665 && TREE_CODE (DECL_CONTEXT (current_function_decl)) == FUNCTION_DECL)
5667 if (ix86_force_align_arg_pointer)
5668 warning (0, "-mstackrealign ignored for nested functions");
5669 if (has_force_align_arg_pointer)
5670 error ("%s not supported for nested functions",
5671 ix86_force_align_arg_pointer_string);
5672 return virtual_incoming_args_rtx;
5674 cfun->machine->force_align_arg_pointer = gen_rtx_REG (Pmode, 2);
5675 return copy_to_reg (cfun->machine->force_align_arg_pointer);
5678 return virtual_incoming_args_rtx;
5681 /* Handle the TARGET_DWARF_HANDLE_FRAME_UNSPEC hook.
5682 This is called from dwarf2out.c to emit call frame instructions
5683 for frame-related insns containing UNSPECs and UNSPEC_VOLATILEs. */
5685 ix86_dwarf_handle_frame_unspec (const char *label, rtx pattern, int index)
5687 rtx unspec = SET_SRC (pattern);
5688 gcc_assert (GET_CODE (unspec) == UNSPEC);
5692 case UNSPEC_REG_SAVE:
5693 dwarf2out_reg_save_reg (label, XVECEXP (unspec, 0, 0),
5694 SET_DEST (pattern));
5696 case UNSPEC_DEF_CFA:
5697 dwarf2out_def_cfa (label, REGNO (SET_DEST (pattern)),
5698 INTVAL (XVECEXP (unspec, 0, 0)));
5705 /* Expand the prologue into a bunch of separate insns. */
5708 ix86_expand_prologue (void)
5712 struct ix86_frame frame;
5713 HOST_WIDE_INT allocate;
5715 ix86_compute_frame_layout (&frame);
5717 if (cfun->machine->force_align_arg_pointer)
5721 /* Grab the argument pointer. */
5722 x = plus_constant (stack_pointer_rtx, 4);
5723 y = cfun->machine->force_align_arg_pointer;
5724 insn = emit_insn (gen_rtx_SET (VOIDmode, y, x));
5725 RTX_FRAME_RELATED_P (insn) = 1;
5727 /* The unwind info consists of two parts: install the fafp as the cfa,
5728 and record the fafp as the "save register" of the stack pointer.
5729 The later is there in order that the unwinder can see where it
5730 should restore the stack pointer across the and insn. */
5731 x = gen_rtx_UNSPEC (VOIDmode, gen_rtvec (1, const0_rtx), UNSPEC_DEF_CFA);
5732 x = gen_rtx_SET (VOIDmode, y, x);
5733 RTX_FRAME_RELATED_P (x) = 1;
5734 y = gen_rtx_UNSPEC (VOIDmode, gen_rtvec (1, stack_pointer_rtx),
5736 y = gen_rtx_SET (VOIDmode, cfun->machine->force_align_arg_pointer, y);
5737 RTX_FRAME_RELATED_P (y) = 1;
5738 x = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, x, y));
5739 x = gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR, x, NULL);
5740 REG_NOTES (insn) = x;
5742 /* Align the stack. */
5743 emit_insn (gen_andsi3 (stack_pointer_rtx, stack_pointer_rtx,
5746 /* And here we cheat like madmen with the unwind info. We force the
5747 cfa register back to sp+4, which is exactly what it was at the
5748 start of the function. Re-pushing the return address results in
5749 the return at the same spot relative to the cfa, and thus is
5750 correct wrt the unwind info. */
5751 x = cfun->machine->force_align_arg_pointer;
5752 x = gen_frame_mem (Pmode, plus_constant (x, -4));
5753 insn = emit_insn (gen_push (x));
5754 RTX_FRAME_RELATED_P (insn) = 1;
5757 x = gen_rtx_UNSPEC (VOIDmode, gen_rtvec (1, x), UNSPEC_DEF_CFA);
5758 x = gen_rtx_SET (VOIDmode, stack_pointer_rtx, x);
5759 x = gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR, x, NULL);
5760 REG_NOTES (insn) = x;
5763 /* Note: AT&T enter does NOT have reversed args. Enter is probably
5764 slower on all targets. Also sdb doesn't like it. */
5766 if (frame_pointer_needed)
5768 insn = emit_insn (gen_push (hard_frame_pointer_rtx));
5769 RTX_FRAME_RELATED_P (insn) = 1;
5771 insn = emit_move_insn (hard_frame_pointer_rtx, stack_pointer_rtx);
5772 RTX_FRAME_RELATED_P (insn) = 1;
5775 allocate = frame.to_allocate;
5777 if (!frame.save_regs_using_mov)
5778 ix86_emit_save_regs ();
5780 allocate += frame.nregs * UNITS_PER_WORD;
5782 /* When using red zone we may start register saving before allocating
5783 the stack frame saving one cycle of the prologue. */
5784 if (TARGET_RED_ZONE && frame.save_regs_using_mov)
5785 ix86_emit_save_regs_using_mov (frame_pointer_needed ? hard_frame_pointer_rtx
5786 : stack_pointer_rtx,
5787 -frame.nregs * UNITS_PER_WORD);
5791 else if (! TARGET_STACK_PROBE || allocate < CHECK_STACK_LIMIT)
5792 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
5793 GEN_INT (-allocate), -1);
5796 /* Only valid for Win32. */
5797 rtx eax = gen_rtx_REG (SImode, 0);
5798 bool eax_live = ix86_eax_live_at_start_p ();
5801 gcc_assert (!TARGET_64BIT);
5805 emit_insn (gen_push (eax));
5809 emit_move_insn (eax, GEN_INT (allocate));
5811 insn = emit_insn (gen_allocate_stack_worker (eax));
5812 RTX_FRAME_RELATED_P (insn) = 1;
5813 t = gen_rtx_PLUS (Pmode, stack_pointer_rtx, GEN_INT (-allocate));
5814 t = gen_rtx_SET (VOIDmode, stack_pointer_rtx, t);
5815 REG_NOTES (insn) = gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR,
5816 t, REG_NOTES (insn));
5820 if (frame_pointer_needed)
5821 t = plus_constant (hard_frame_pointer_rtx,
5824 - frame.nregs * UNITS_PER_WORD);
5826 t = plus_constant (stack_pointer_rtx, allocate);
5827 emit_move_insn (eax, gen_rtx_MEM (SImode, t));
5831 if (frame.save_regs_using_mov && !TARGET_RED_ZONE)
5833 if (!frame_pointer_needed || !frame.to_allocate)
5834 ix86_emit_save_regs_using_mov (stack_pointer_rtx, frame.to_allocate);
5836 ix86_emit_save_regs_using_mov (hard_frame_pointer_rtx,
5837 -frame.nregs * UNITS_PER_WORD);
5840 pic_reg_used = false;
5841 if (pic_offset_table_rtx
5842 && (regs_ever_live[REAL_PIC_OFFSET_TABLE_REGNUM]
5843 || current_function_profile))
5845 unsigned int alt_pic_reg_used = ix86_select_alt_pic_regnum ();
5847 if (alt_pic_reg_used != INVALID_REGNUM)
5848 REGNO (pic_offset_table_rtx) = alt_pic_reg_used;
5850 pic_reg_used = true;
5856 insn = emit_insn (gen_set_got_rex64 (pic_offset_table_rtx));
5858 insn = emit_insn (gen_set_got (pic_offset_table_rtx));
5860 /* Even with accurate pre-reload life analysis, we can wind up
5861 deleting all references to the pic register after reload.
5862 Consider if cross-jumping unifies two sides of a branch
5863 controlled by a comparison vs the only read from a global.
5864 In which case, allow the set_got to be deleted, though we're
5865 too late to do anything about the ebx save in the prologue. */
5866 REG_NOTES (insn) = gen_rtx_EXPR_LIST (REG_MAYBE_DEAD, const0_rtx, NULL);
5869 /* Prevent function calls from be scheduled before the call to mcount.
5870 In the pic_reg_used case, make sure that the got load isn't deleted. */
5871 if (current_function_profile)
5872 emit_insn (gen_blockage (pic_reg_used ? pic_offset_table_rtx : const0_rtx));
5875 /* Emit code to restore saved registers using MOV insns. First register
5876 is restored from POINTER + OFFSET. */
5878 ix86_emit_restore_regs_using_mov (rtx pointer, HOST_WIDE_INT offset,
5879 int maybe_eh_return)
5882 rtx base_address = gen_rtx_MEM (Pmode, pointer);
5884 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
5885 if (ix86_save_reg (regno, maybe_eh_return))
5887 /* Ensure that adjust_address won't be forced to produce pointer
5888 out of range allowed by x86-64 instruction set. */
5889 if (TARGET_64BIT && offset != trunc_int_for_mode (offset, SImode))
5893 r11 = gen_rtx_REG (DImode, FIRST_REX_INT_REG + 3 /* R11 */);
5894 emit_move_insn (r11, GEN_INT (offset));
5895 emit_insn (gen_adddi3 (r11, r11, pointer));
5896 base_address = gen_rtx_MEM (Pmode, r11);
5899 emit_move_insn (gen_rtx_REG (Pmode, regno),
5900 adjust_address (base_address, Pmode, offset));
5901 offset += UNITS_PER_WORD;
5905 /* Restore function stack, frame, and registers. */
5908 ix86_expand_epilogue (int style)
5911 int sp_valid = !frame_pointer_needed || current_function_sp_is_unchanging;
5912 struct ix86_frame frame;
5913 HOST_WIDE_INT offset;
5915 ix86_compute_frame_layout (&frame);
5917 /* Calculate start of saved registers relative to ebp. Special care
5918 must be taken for the normal return case of a function using
5919 eh_return: the eax and edx registers are marked as saved, but not
5920 restored along this path. */
5921 offset = frame.nregs;
5922 if (current_function_calls_eh_return && style != 2)
5924 offset *= -UNITS_PER_WORD;
5926 /* If we're only restoring one register and sp is not valid then
5927 using a move instruction to restore the register since it's
5928 less work than reloading sp and popping the register.
5930 The default code result in stack adjustment using add/lea instruction,
5931 while this code results in LEAVE instruction (or discrete equivalent),
5932 so it is profitable in some other cases as well. Especially when there
5933 are no registers to restore. We also use this code when TARGET_USE_LEAVE
5934 and there is exactly one register to pop. This heuristic may need some
5935 tuning in future. */
5936 if ((!sp_valid && frame.nregs <= 1)
5937 || (TARGET_EPILOGUE_USING_MOVE
5938 && cfun->machine->use_fast_prologue_epilogue
5939 && (frame.nregs > 1 || frame.to_allocate))
5940 || (frame_pointer_needed && !frame.nregs && frame.to_allocate)
5941 || (frame_pointer_needed && TARGET_USE_LEAVE
5942 && cfun->machine->use_fast_prologue_epilogue
5943 && frame.nregs == 1)
5944 || current_function_calls_eh_return)
5946 /* Restore registers. We can use ebp or esp to address the memory
5947 locations. If both are available, default to ebp, since offsets
5948 are known to be small. Only exception is esp pointing directly to the
5949 end of block of saved registers, where we may simplify addressing
5952 if (!frame_pointer_needed || (sp_valid && !frame.to_allocate))
5953 ix86_emit_restore_regs_using_mov (stack_pointer_rtx,
5954 frame.to_allocate, style == 2);
5956 ix86_emit_restore_regs_using_mov (hard_frame_pointer_rtx,
5957 offset, style == 2);
5959 /* eh_return epilogues need %ecx added to the stack pointer. */
5962 rtx tmp, sa = EH_RETURN_STACKADJ_RTX;
5964 if (frame_pointer_needed)
5966 tmp = gen_rtx_PLUS (Pmode, hard_frame_pointer_rtx, sa);
5967 tmp = plus_constant (tmp, UNITS_PER_WORD);
5968 emit_insn (gen_rtx_SET (VOIDmode, sa, tmp));
5970 tmp = gen_rtx_MEM (Pmode, hard_frame_pointer_rtx);
5971 emit_move_insn (hard_frame_pointer_rtx, tmp);
5973 pro_epilogue_adjust_stack (stack_pointer_rtx, sa,
5978 tmp = gen_rtx_PLUS (Pmode, stack_pointer_rtx, sa);
5979 tmp = plus_constant (tmp, (frame.to_allocate
5980 + frame.nregs * UNITS_PER_WORD));
5981 emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx, tmp));
5984 else if (!frame_pointer_needed)
5985 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
5986 GEN_INT (frame.to_allocate
5987 + frame.nregs * UNITS_PER_WORD),
5989 /* If not an i386, mov & pop is faster than "leave". */
5990 else if (TARGET_USE_LEAVE || optimize_size
5991 || !cfun->machine->use_fast_prologue_epilogue)
5992 emit_insn (TARGET_64BIT ? gen_leave_rex64 () : gen_leave ());
5995 pro_epilogue_adjust_stack (stack_pointer_rtx,
5996 hard_frame_pointer_rtx,
5999 emit_insn (gen_popdi1 (hard_frame_pointer_rtx));
6001 emit_insn (gen_popsi1 (hard_frame_pointer_rtx));
6006 /* First step is to deallocate the stack frame so that we can
6007 pop the registers. */
6010 gcc_assert (frame_pointer_needed);
6011 pro_epilogue_adjust_stack (stack_pointer_rtx,
6012 hard_frame_pointer_rtx,
6013 GEN_INT (offset), style);
6015 else if (frame.to_allocate)
6016 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
6017 GEN_INT (frame.to_allocate), style);
6019 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
6020 if (ix86_save_reg (regno, false))
6023 emit_insn (gen_popdi1 (gen_rtx_REG (Pmode, regno)));
6025 emit_insn (gen_popsi1 (gen_rtx_REG (Pmode, regno)));
6027 if (frame_pointer_needed)
6029 /* Leave results in shorter dependency chains on CPUs that are
6030 able to grok it fast. */
6031 if (TARGET_USE_LEAVE)
6032 emit_insn (TARGET_64BIT ? gen_leave_rex64 () : gen_leave ());
6033 else if (TARGET_64BIT)
6034 emit_insn (gen_popdi1 (hard_frame_pointer_rtx));
6036 emit_insn (gen_popsi1 (hard_frame_pointer_rtx));
6040 if (cfun->machine->force_align_arg_pointer)
6042 emit_insn (gen_addsi3 (stack_pointer_rtx,
6043 cfun->machine->force_align_arg_pointer,
6047 /* Sibcall epilogues don't want a return instruction. */
6051 if (current_function_pops_args && current_function_args_size)
6053 rtx popc = GEN_INT (current_function_pops_args);
6055 /* i386 can only pop 64K bytes. If asked to pop more, pop
6056 return address, do explicit add, and jump indirectly to the
6059 if (current_function_pops_args >= 65536)
6061 rtx ecx = gen_rtx_REG (SImode, 2);
6063 /* There is no "pascal" calling convention in 64bit ABI. */
6064 gcc_assert (!TARGET_64BIT);
6066 emit_insn (gen_popsi1 (ecx));
6067 emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx, popc));
6068 emit_jump_insn (gen_return_indirect_internal (ecx));
6071 emit_jump_insn (gen_return_pop_internal (popc));
6074 emit_jump_insn (gen_return_internal ());
6077 /* Reset from the function's potential modifications. */
6080 ix86_output_function_epilogue (FILE *file ATTRIBUTE_UNUSED,
6081 HOST_WIDE_INT size ATTRIBUTE_UNUSED)
6083 if (pic_offset_table_rtx)
6084 REGNO (pic_offset_table_rtx) = REAL_PIC_OFFSET_TABLE_REGNUM;
6086 /* Mach-O doesn't support labels at the end of objects, so if
6087 it looks like we might want one, insert a NOP. */
6089 rtx insn = get_last_insn ();
6092 && NOTE_LINE_NUMBER (insn) != NOTE_INSN_DELETED_LABEL)
6093 insn = PREV_INSN (insn);
6097 && NOTE_LINE_NUMBER (insn) == NOTE_INSN_DELETED_LABEL)))
6098 fputs ("\tnop\n", file);
6104 /* Extract the parts of an RTL expression that is a valid memory address
6105 for an instruction. Return 0 if the structure of the address is
6106 grossly off. Return -1 if the address contains ASHIFT, so it is not
6107 strictly valid, but still used for computing length of lea instruction. */
6110 ix86_decompose_address (rtx addr, struct ix86_address *out)
6112 rtx base = NULL_RTX, index = NULL_RTX, disp = NULL_RTX;
6113 rtx base_reg, index_reg;
6114 HOST_WIDE_INT scale = 1;
6115 rtx scale_rtx = NULL_RTX;
6117 enum ix86_address_seg seg = SEG_DEFAULT;
6119 if (GET_CODE (addr) == REG || GET_CODE (addr) == SUBREG)
6121 else if (GET_CODE (addr) == PLUS)
6131 addends[n++] = XEXP (op, 1);
6134 while (GET_CODE (op) == PLUS);
6139 for (i = n; i >= 0; --i)
6142 switch (GET_CODE (op))
6147 index = XEXP (op, 0);
6148 scale_rtx = XEXP (op, 1);
6152 if (XINT (op, 1) == UNSPEC_TP
6153 && TARGET_TLS_DIRECT_SEG_REFS
6154 && seg == SEG_DEFAULT)
6155 seg = TARGET_64BIT ? SEG_FS : SEG_GS;
6184 else if (GET_CODE (addr) == MULT)
6186 index = XEXP (addr, 0); /* index*scale */
6187 scale_rtx = XEXP (addr, 1);
6189 else if (GET_CODE (addr) == ASHIFT)
6193 /* We're called for lea too, which implements ashift on occasion. */
6194 index = XEXP (addr, 0);
6195 tmp = XEXP (addr, 1);
6196 if (GET_CODE (tmp) != CONST_INT)
6198 scale = INTVAL (tmp);
6199 if ((unsigned HOST_WIDE_INT) scale > 3)
6205 disp = addr; /* displacement */
6207 /* Extract the integral value of scale. */
6210 if (GET_CODE (scale_rtx) != CONST_INT)
6212 scale = INTVAL (scale_rtx);
6215 base_reg = base && GET_CODE (base) == SUBREG ? SUBREG_REG (base) : base;
6216 index_reg = index && GET_CODE (index) == SUBREG ? SUBREG_REG (index) : index;
6218 /* Allow arg pointer and stack pointer as index if there is not scaling. */
6219 if (base_reg && index_reg && scale == 1
6220 && (index_reg == arg_pointer_rtx
6221 || index_reg == frame_pointer_rtx
6222 || (REG_P (index_reg) && REGNO (index_reg) == STACK_POINTER_REGNUM)))
6225 tmp = base, base = index, index = tmp;
6226 tmp = base_reg, base_reg = index_reg, index_reg = tmp;
6229 /* Special case: %ebp cannot be encoded as a base without a displacement. */
6230 if ((base_reg == hard_frame_pointer_rtx
6231 || base_reg == frame_pointer_rtx
6232 || base_reg == arg_pointer_rtx) && !disp)
6235 /* Special case: on K6, [%esi] makes the instruction vector decoded.
6236 Avoid this by transforming to [%esi+0]. */
6237 if (ix86_tune == PROCESSOR_K6 && !optimize_size
6238 && base_reg && !index_reg && !disp
6240 && REGNO_REG_CLASS (REGNO (base_reg)) == SIREG)
6243 /* Special case: encode reg+reg instead of reg*2. */
6244 if (!base && index && scale && scale == 2)
6245 base = index, base_reg = index_reg, scale = 1;
6247 /* Special case: scaling cannot be encoded without base or displacement. */
6248 if (!base && !disp && index && scale != 1)
6260 /* Return cost of the memory address x.
6261 For i386, it is better to use a complex address than let gcc copy
6262 the address into a reg and make a new pseudo. But not if the address
6263 requires to two regs - that would mean more pseudos with longer
6266 ix86_address_cost (rtx x)
6268 struct ix86_address parts;
6270 int ok = ix86_decompose_address (x, &parts);
6274 if (parts.base && GET_CODE (parts.base) == SUBREG)
6275 parts.base = SUBREG_REG (parts.base);
6276 if (parts.index && GET_CODE (parts.index) == SUBREG)
6277 parts.index = SUBREG_REG (parts.index);
6279 /* More complex memory references are better. */
6280 if (parts.disp && parts.disp != const0_rtx)
6282 if (parts.seg != SEG_DEFAULT)
6285 /* Attempt to minimize number of registers in the address. */
6287 && (!REG_P (parts.base) || REGNO (parts.base) >= FIRST_PSEUDO_REGISTER))
6289 && (!REG_P (parts.index)
6290 || REGNO (parts.index) >= FIRST_PSEUDO_REGISTER)))
6294 && (!REG_P (parts.base) || REGNO (parts.base) >= FIRST_PSEUDO_REGISTER)
6296 && (!REG_P (parts.index) || REGNO (parts.index) >= FIRST_PSEUDO_REGISTER)
6297 && parts.base != parts.index)
6300 /* AMD-K6 don't like addresses with ModR/M set to 00_xxx_100b,
6301 since it's predecode logic can't detect the length of instructions
6302 and it degenerates to vector decoded. Increase cost of such
6303 addresses here. The penalty is minimally 2 cycles. It may be worthwhile
6304 to split such addresses or even refuse such addresses at all.
6306 Following addressing modes are affected:
6311 The first and last case may be avoidable by explicitly coding the zero in
6312 memory address, but I don't have AMD-K6 machine handy to check this
6316 && ((!parts.disp && parts.base && parts.index && parts.scale != 1)
6317 || (parts.disp && !parts.base && parts.index && parts.scale != 1)
6318 || (!parts.disp && parts.base && parts.index && parts.scale == 1)))
6324 /* If X is a machine specific address (i.e. a symbol or label being
6325 referenced as a displacement from the GOT implemented using an
6326 UNSPEC), then return the base term. Otherwise return X. */
6329 ix86_find_base_term (rtx x)
6335 if (GET_CODE (x) != CONST)
6338 if (GET_CODE (term) == PLUS
6339 && (GET_CODE (XEXP (term, 1)) == CONST_INT
6340 || GET_CODE (XEXP (term, 1)) == CONST_DOUBLE))
6341 term = XEXP (term, 0);
6342 if (GET_CODE (term) != UNSPEC
6343 || XINT (term, 1) != UNSPEC_GOTPCREL)
6346 term = XVECEXP (term, 0, 0);
6348 if (GET_CODE (term) != SYMBOL_REF
6349 && GET_CODE (term) != LABEL_REF)
6355 term = ix86_delegitimize_address (x);
6357 if (GET_CODE (term) != SYMBOL_REF
6358 && GET_CODE (term) != LABEL_REF)
6364 /* Allow {LABEL | SYMBOL}_REF - SYMBOL_REF-FOR-PICBASE for Mach-O as
6365 this is used for to form addresses to local data when -fPIC is in
6369 darwin_local_data_pic (rtx disp)
6371 if (GET_CODE (disp) == MINUS)
6373 if (GET_CODE (XEXP (disp, 0)) == LABEL_REF
6374 || GET_CODE (XEXP (disp, 0)) == SYMBOL_REF)
6375 if (GET_CODE (XEXP (disp, 1)) == SYMBOL_REF)
6377 const char *sym_name = XSTR (XEXP (disp, 1), 0);
6378 if (! strcmp (sym_name, "<pic base>"))
6386 /* Determine if a given RTX is a valid constant. We already know this
6387 satisfies CONSTANT_P. */
6390 legitimate_constant_p (rtx x)
6392 switch (GET_CODE (x))
6397 if (GET_CODE (x) == PLUS)
6399 if (GET_CODE (XEXP (x, 1)) != CONST_INT)
6404 if (TARGET_MACHO && darwin_local_data_pic (x))
6407 /* Only some unspecs are valid as "constants". */
6408 if (GET_CODE (x) == UNSPEC)
6409 switch (XINT (x, 1))
6412 return TARGET_64BIT;
6415 x = XVECEXP (x, 0, 0);
6416 return (GET_CODE (x) == SYMBOL_REF
6417 && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_EXEC);
6419 x = XVECEXP (x, 0, 0);
6420 return (GET_CODE (x) == SYMBOL_REF
6421 && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_DYNAMIC);
6426 /* We must have drilled down to a symbol. */
6427 if (GET_CODE (x) == LABEL_REF)
6429 if (GET_CODE (x) != SYMBOL_REF)
6434 /* TLS symbols are never valid. */
6435 if (SYMBOL_REF_TLS_MODEL (x))
6440 if (GET_MODE (x) == TImode
6441 && x != CONST0_RTX (TImode)
6447 if (x == CONST0_RTX (GET_MODE (x)))
6455 /* Otherwise we handle everything else in the move patterns. */
6459 /* Determine if it's legal to put X into the constant pool. This
6460 is not possible for the address of thread-local symbols, which
6461 is checked above. */
6464 ix86_cannot_force_const_mem (rtx x)
6466 /* We can always put integral constants and vectors in memory. */
6467 switch (GET_CODE (x))
6477 return !legitimate_constant_p (x);
6480 /* Determine if a given RTX is a valid constant address. */
6483 constant_address_p (rtx x)
6485 return CONSTANT_P (x) && legitimate_address_p (Pmode, x, 1);
6488 /* Nonzero if the constant value X is a legitimate general operand
6489 when generating PIC code. It is given that flag_pic is on and
6490 that X satisfies CONSTANT_P or is a CONST_DOUBLE. */
6493 legitimate_pic_operand_p (rtx x)
6497 switch (GET_CODE (x))
6500 inner = XEXP (x, 0);
6501 if (GET_CODE (inner) == PLUS
6502 && GET_CODE (XEXP (inner, 1)) == CONST_INT)
6503 inner = XEXP (inner, 0);
6505 /* Only some unspecs are valid as "constants". */
6506 if (GET_CODE (inner) == UNSPEC)
6507 switch (XINT (inner, 1))
6510 return TARGET_64BIT;
6512 x = XVECEXP (inner, 0, 0);
6513 return (GET_CODE (x) == SYMBOL_REF
6514 && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_EXEC);
6522 return legitimate_pic_address_disp_p (x);
6529 /* Determine if a given CONST RTX is a valid memory displacement
6533 legitimate_pic_address_disp_p (rtx disp)
6537 /* In 64bit mode we can allow direct addresses of symbols and labels
6538 when they are not dynamic symbols. */
6541 rtx op0 = disp, op1;
6543 switch (GET_CODE (disp))
6549 if (GET_CODE (XEXP (disp, 0)) != PLUS)
6551 op0 = XEXP (XEXP (disp, 0), 0);
6552 op1 = XEXP (XEXP (disp, 0), 1);
6553 if (GET_CODE (op1) != CONST_INT
6554 || INTVAL (op1) >= 16*1024*1024
6555 || INTVAL (op1) < -16*1024*1024)
6557 if (GET_CODE (op0) == LABEL_REF)
6559 if (GET_CODE (op0) != SYMBOL_REF)
6564 /* TLS references should always be enclosed in UNSPEC. */
6565 if (SYMBOL_REF_TLS_MODEL (op0))
6567 if (!SYMBOL_REF_FAR_ADDR_P (op0) && SYMBOL_REF_LOCAL_P (op0))
6575 if (GET_CODE (disp) != CONST)
6577 disp = XEXP (disp, 0);
6581 /* We are unsafe to allow PLUS expressions. This limit allowed distance
6582 of GOT tables. We should not need these anyway. */
6583 if (GET_CODE (disp) != UNSPEC
6584 || (XINT (disp, 1) != UNSPEC_GOTPCREL
6585 && XINT (disp, 1) != UNSPEC_GOTOFF))
6588 if (GET_CODE (XVECEXP (disp, 0, 0)) != SYMBOL_REF
6589 && GET_CODE (XVECEXP (disp, 0, 0)) != LABEL_REF)
6595 if (GET_CODE (disp) == PLUS)
6597 if (GET_CODE (XEXP (disp, 1)) != CONST_INT)
6599 disp = XEXP (disp, 0);
6603 if (TARGET_MACHO && darwin_local_data_pic (disp))
6606 if (GET_CODE (disp) != UNSPEC)
6609 switch (XINT (disp, 1))
6614 return GET_CODE (XVECEXP (disp, 0, 0)) == SYMBOL_REF;
6616 /* Refuse GOTOFF in 64bit mode since it is always 64bit when used.
6617 While ABI specify also 32bit relocation but we don't produce it in
6618 small PIC model at all. */
6619 if ((GET_CODE (XVECEXP (disp, 0, 0)) == SYMBOL_REF
6620 || GET_CODE (XVECEXP (disp, 0, 0)) == LABEL_REF)
6622 return local_symbolic_operand (XVECEXP (disp, 0, 0), Pmode);
6624 case UNSPEC_GOTTPOFF:
6625 case UNSPEC_GOTNTPOFF:
6626 case UNSPEC_INDNTPOFF:
6629 disp = XVECEXP (disp, 0, 0);
6630 return (GET_CODE (disp) == SYMBOL_REF
6631 && SYMBOL_REF_TLS_MODEL (disp) == TLS_MODEL_INITIAL_EXEC);
6633 disp = XVECEXP (disp, 0, 0);
6634 return (GET_CODE (disp) == SYMBOL_REF
6635 && SYMBOL_REF_TLS_MODEL (disp) == TLS_MODEL_LOCAL_EXEC);
6637 disp = XVECEXP (disp, 0, 0);
6638 return (GET_CODE (disp) == SYMBOL_REF
6639 && SYMBOL_REF_TLS_MODEL (disp) == TLS_MODEL_LOCAL_DYNAMIC);
6645 /* GO_IF_LEGITIMATE_ADDRESS recognizes an RTL expression that is a valid
6646 memory address for an instruction. The MODE argument is the machine mode
6647 for the MEM expression that wants to use this address.
6649 It only recognizes address in canonical form. LEGITIMIZE_ADDRESS should
6650 convert common non-canonical forms to canonical form so that they will
6654 legitimate_address_p (enum machine_mode mode, rtx addr, int strict)
6656 struct ix86_address parts;
6657 rtx base, index, disp;
6658 HOST_WIDE_INT scale;
6659 const char *reason = NULL;
6660 rtx reason_rtx = NULL_RTX;
6662 if (TARGET_DEBUG_ADDR)
6665 "\n======\nGO_IF_LEGITIMATE_ADDRESS, mode = %s, strict = %d\n",
6666 GET_MODE_NAME (mode), strict);
6670 if (ix86_decompose_address (addr, &parts) <= 0)
6672 reason = "decomposition failed";
6677 index = parts.index;
6679 scale = parts.scale;
6681 /* Validate base register.
6683 Don't allow SUBREG's that span more than a word here. It can lead to spill
6684 failures when the base is one word out of a two word structure, which is
6685 represented internally as a DImode int. */
6694 else if (GET_CODE (base) == SUBREG
6695 && REG_P (SUBREG_REG (base))
6696 && GET_MODE_SIZE (GET_MODE (SUBREG_REG (base)))
6698 reg = SUBREG_REG (base);
6701 reason = "base is not a register";
6705 if (GET_MODE (base) != Pmode)
6707 reason = "base is not in Pmode";
6711 if ((strict && ! REG_OK_FOR_BASE_STRICT_P (reg))
6712 || (! strict && ! REG_OK_FOR_BASE_NONSTRICT_P (reg)))
6714 reason = "base is not valid";
6719 /* Validate index register.
6721 Don't allow SUBREG's that span more than a word here -- same as above. */
6730 else if (GET_CODE (index) == SUBREG
6731 && REG_P (SUBREG_REG (index))
6732 && GET_MODE_SIZE (GET_MODE (SUBREG_REG (index)))
6734 reg = SUBREG_REG (index);
6737 reason = "index is not a register";
6741 if (GET_MODE (index) != Pmode)
6743 reason = "index is not in Pmode";
6747 if ((strict && ! REG_OK_FOR_INDEX_STRICT_P (reg))
6748 || (! strict && ! REG_OK_FOR_INDEX_NONSTRICT_P (reg)))
6750 reason = "index is not valid";
6755 /* Validate scale factor. */
6758 reason_rtx = GEN_INT (scale);
6761 reason = "scale without index";
6765 if (scale != 2 && scale != 4 && scale != 8)
6767 reason = "scale is not a valid multiplier";
6772 /* Validate displacement. */
6777 if (GET_CODE (disp) == CONST
6778 && GET_CODE (XEXP (disp, 0)) == UNSPEC)
6779 switch (XINT (XEXP (disp, 0), 1))
6781 /* Refuse GOTOFF and GOT in 64bit mode since it is always 64bit when
6782 used. While ABI specify also 32bit relocations, we don't produce
6783 them at all and use IP relative instead. */
6786 gcc_assert (flag_pic);
6788 goto is_legitimate_pic;
6789 reason = "64bit address unspec";
6792 case UNSPEC_GOTPCREL:
6793 gcc_assert (flag_pic);
6794 goto is_legitimate_pic;
6796 case UNSPEC_GOTTPOFF:
6797 case UNSPEC_GOTNTPOFF:
6798 case UNSPEC_INDNTPOFF:
6804 reason = "invalid address unspec";
6808 else if (SYMBOLIC_CONST (disp)
6812 && MACHOPIC_INDIRECT
6813 && !machopic_operand_p (disp)
6819 if (TARGET_64BIT && (index || base))
6821 /* foo@dtpoff(%rX) is ok. */
6822 if (GET_CODE (disp) != CONST
6823 || GET_CODE (XEXP (disp, 0)) != PLUS
6824 || GET_CODE (XEXP (XEXP (disp, 0), 0)) != UNSPEC
6825 || GET_CODE (XEXP (XEXP (disp, 0), 1)) != CONST_INT
6826 || (XINT (XEXP (XEXP (disp, 0), 0), 1) != UNSPEC_DTPOFF
6827 && XINT (XEXP (XEXP (disp, 0), 0), 1) != UNSPEC_NTPOFF))
6829 reason = "non-constant pic memory reference";
6833 else if (! legitimate_pic_address_disp_p (disp))
6835 reason = "displacement is an invalid pic construct";
6839 /* This code used to verify that a symbolic pic displacement
6840 includes the pic_offset_table_rtx register.
6842 While this is good idea, unfortunately these constructs may
6843 be created by "adds using lea" optimization for incorrect
6852 This code is nonsensical, but results in addressing
6853 GOT table with pic_offset_table_rtx base. We can't
6854 just refuse it easily, since it gets matched by
6855 "addsi3" pattern, that later gets split to lea in the
6856 case output register differs from input. While this
6857 can be handled by separate addsi pattern for this case
6858 that never results in lea, this seems to be easier and
6859 correct fix for crash to disable this test. */
6861 else if (GET_CODE (disp) != LABEL_REF
6862 && GET_CODE (disp) != CONST_INT
6863 && (GET_CODE (disp) != CONST
6864 || !legitimate_constant_p (disp))
6865 && (GET_CODE (disp) != SYMBOL_REF
6866 || !legitimate_constant_p (disp)))
6868 reason = "displacement is not constant";
6871 else if (TARGET_64BIT
6872 && !x86_64_immediate_operand (disp, VOIDmode))
6874 reason = "displacement is out of range";
6879 /* Everything looks valid. */
6880 if (TARGET_DEBUG_ADDR)
6881 fprintf (stderr, "Success.\n");
6885 if (TARGET_DEBUG_ADDR)
6887 fprintf (stderr, "Error: %s\n", reason);
6888 debug_rtx (reason_rtx);
6893 /* Return a unique alias set for the GOT. */
6895 static HOST_WIDE_INT
6896 ix86_GOT_alias_set (void)
6898 static HOST_WIDE_INT set = -1;
6900 set = new_alias_set ();
6904 /* Return a legitimate reference for ORIG (an address) using the
6905 register REG. If REG is 0, a new pseudo is generated.
6907 There are two types of references that must be handled:
6909 1. Global data references must load the address from the GOT, via
6910 the PIC reg. An insn is emitted to do this load, and the reg is
6913 2. Static data references, constant pool addresses, and code labels
6914 compute the address as an offset from the GOT, whose base is in
6915 the PIC reg. Static data objects have SYMBOL_FLAG_LOCAL set to
6916 differentiate them from global data objects. The returned
6917 address is the PIC reg + an unspec constant.
6919 GO_IF_LEGITIMATE_ADDRESS rejects symbolic references unless the PIC
6920 reg also appears in the address. */
6923 legitimize_pic_address (rtx orig, rtx reg)
6930 if (TARGET_MACHO && !TARGET_64BIT)
6933 reg = gen_reg_rtx (Pmode);
6934 /* Use the generic Mach-O PIC machinery. */
6935 return machopic_legitimize_pic_address (orig, GET_MODE (orig), reg);
6939 if (TARGET_64BIT && legitimate_pic_address_disp_p (addr))
6941 else if (TARGET_64BIT
6942 && ix86_cmodel != CM_SMALL_PIC
6943 && local_symbolic_operand (addr, Pmode))
6946 /* This symbol may be referenced via a displacement from the PIC
6947 base address (@GOTOFF). */
6949 if (reload_in_progress)
6950 regs_ever_live[PIC_OFFSET_TABLE_REGNUM] = 1;
6951 if (GET_CODE (addr) == CONST)
6952 addr = XEXP (addr, 0);
6953 if (GET_CODE (addr) == PLUS)
6955 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, XEXP (addr, 0)), UNSPEC_GOTOFF);
6956 new = gen_rtx_PLUS (Pmode, new, XEXP (addr, 1));
6959 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTOFF);
6960 new = gen_rtx_CONST (Pmode, new);
6962 tmpreg = gen_reg_rtx (Pmode);
6965 emit_move_insn (tmpreg, new);
6969 new = expand_simple_binop (Pmode, PLUS, reg, pic_offset_table_rtx,
6970 tmpreg, 1, OPTAB_DIRECT);
6973 else new = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, tmpreg);
6975 else if (!TARGET_64BIT && local_symbolic_operand (addr, Pmode))
6977 /* This symbol may be referenced via a displacement from the PIC
6978 base address (@GOTOFF). */
6980 if (reload_in_progress)
6981 regs_ever_live[PIC_OFFSET_TABLE_REGNUM] = 1;
6982 if (GET_CODE (addr) == CONST)
6983 addr = XEXP (addr, 0);
6984 if (GET_CODE (addr) == PLUS)
6986 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, XEXP (addr, 0)), UNSPEC_GOTOFF);
6987 new = gen_rtx_PLUS (Pmode, new, XEXP (addr, 1));
6990 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTOFF);
6991 new = gen_rtx_CONST (Pmode, new);
6992 new = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new);
6996 emit_move_insn (reg, new);
7000 else if (GET_CODE (addr) == SYMBOL_REF && SYMBOL_REF_TLS_MODEL (addr) == 0)
7004 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTPCREL);
7005 new = gen_rtx_CONST (Pmode, new);
7006 new = gen_const_mem (Pmode, new);
7007 set_mem_alias_set (new, ix86_GOT_alias_set ());
7010 reg = gen_reg_rtx (Pmode);
7011 /* Use directly gen_movsi, otherwise the address is loaded
7012 into register for CSE. We don't want to CSE this addresses,
7013 instead we CSE addresses from the GOT table, so skip this. */
7014 emit_insn (gen_movsi (reg, new));
7019 /* This symbol must be referenced via a load from the
7020 Global Offset Table (@GOT). */
7022 if (reload_in_progress)
7023 regs_ever_live[PIC_OFFSET_TABLE_REGNUM] = 1;
7024 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOT);
7025 new = gen_rtx_CONST (Pmode, new);
7026 new = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new);
7027 new = gen_const_mem (Pmode, new);
7028 set_mem_alias_set (new, ix86_GOT_alias_set ());
7031 reg = gen_reg_rtx (Pmode);
7032 emit_move_insn (reg, new);
7038 if (GET_CODE (addr) == CONST_INT
7039 && !x86_64_immediate_operand (addr, VOIDmode))
7043 emit_move_insn (reg, addr);
7047 new = force_reg (Pmode, addr);
7049 else if (GET_CODE (addr) == CONST)
7051 addr = XEXP (addr, 0);
7053 /* We must match stuff we generate before. Assume the only
7054 unspecs that can get here are ours. Not that we could do
7055 anything with them anyway.... */
7056 if (GET_CODE (addr) == UNSPEC
7057 || (GET_CODE (addr) == PLUS
7058 && GET_CODE (XEXP (addr, 0)) == UNSPEC))
7060 gcc_assert (GET_CODE (addr) == PLUS);
7062 if (GET_CODE (addr) == PLUS)
7064 rtx op0 = XEXP (addr, 0), op1 = XEXP (addr, 1);
7066 /* Check first to see if this is a constant offset from a @GOTOFF
7067 symbol reference. */
7068 if (local_symbolic_operand (op0, Pmode)
7069 && GET_CODE (op1) == CONST_INT)
7073 if (reload_in_progress)
7074 regs_ever_live[PIC_OFFSET_TABLE_REGNUM] = 1;
7075 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, op0),
7077 new = gen_rtx_PLUS (Pmode, new, op1);
7078 new = gen_rtx_CONST (Pmode, new);
7079 new = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new);
7083 emit_move_insn (reg, new);
7089 if (INTVAL (op1) < -16*1024*1024
7090 || INTVAL (op1) >= 16*1024*1024)
7092 if (!x86_64_immediate_operand (op1, Pmode))
7093 op1 = force_reg (Pmode, op1);
7094 new = gen_rtx_PLUS (Pmode, force_reg (Pmode, op0), op1);
7100 base = legitimize_pic_address (XEXP (addr, 0), reg);
7101 new = legitimize_pic_address (XEXP (addr, 1),
7102 base == reg ? NULL_RTX : reg);
7104 if (GET_CODE (new) == CONST_INT)
7105 new = plus_constant (base, INTVAL (new));
7108 if (GET_CODE (new) == PLUS && CONSTANT_P (XEXP (new, 1)))
7110 base = gen_rtx_PLUS (Pmode, base, XEXP (new, 0));
7111 new = XEXP (new, 1);
7113 new = gen_rtx_PLUS (Pmode, base, new);
7121 /* Load the thread pointer. If TO_REG is true, force it into a register. */
7124 get_thread_pointer (int to_reg)
7128 tp = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const0_rtx), UNSPEC_TP);
7132 reg = gen_reg_rtx (Pmode);
7133 insn = gen_rtx_SET (VOIDmode, reg, tp);
7134 insn = emit_insn (insn);
7139 /* A subroutine of legitimize_address and ix86_expand_move. FOR_MOV is
7140 false if we expect this to be used for a memory address and true if
7141 we expect to load the address into a register. */
7144 legitimize_tls_address (rtx x, enum tls_model model, int for_mov)
7146 rtx dest, base, off, pic, tp;
7151 case TLS_MODEL_GLOBAL_DYNAMIC:
7152 dest = gen_reg_rtx (Pmode);
7153 tp = TARGET_GNU2_TLS ? get_thread_pointer (1) : 0;
7155 if (TARGET_64BIT && ! TARGET_GNU2_TLS)
7157 rtx rax = gen_rtx_REG (Pmode, 0), insns;
7160 emit_call_insn (gen_tls_global_dynamic_64 (rax, x));
7161 insns = get_insns ();
7164 emit_libcall_block (insns, dest, rax, x);
7166 else if (TARGET_64BIT && TARGET_GNU2_TLS)
7167 emit_insn (gen_tls_global_dynamic_64 (dest, x));
7169 emit_insn (gen_tls_global_dynamic_32 (dest, x));
7171 if (TARGET_GNU2_TLS)
7173 dest = force_reg (Pmode, gen_rtx_PLUS (Pmode, tp, dest));
7175 set_unique_reg_note (get_last_insn (), REG_EQUIV, x);
7179 case TLS_MODEL_LOCAL_DYNAMIC:
7180 base = gen_reg_rtx (Pmode);
7181 tp = TARGET_GNU2_TLS ? get_thread_pointer (1) : 0;
7183 if (TARGET_64BIT && ! TARGET_GNU2_TLS)
7185 rtx rax = gen_rtx_REG (Pmode, 0), insns, note;
7188 emit_call_insn (gen_tls_local_dynamic_base_64 (rax));
7189 insns = get_insns ();
7192 note = gen_rtx_EXPR_LIST (VOIDmode, const0_rtx, NULL);
7193 note = gen_rtx_EXPR_LIST (VOIDmode, ix86_tls_get_addr (), note);
7194 emit_libcall_block (insns, base, rax, note);
7196 else if (TARGET_64BIT && TARGET_GNU2_TLS)
7197 emit_insn (gen_tls_local_dynamic_base_64 (base));
7199 emit_insn (gen_tls_local_dynamic_base_32 (base));
7201 if (TARGET_GNU2_TLS)
7203 rtx x = ix86_tls_module_base ();
7205 set_unique_reg_note (get_last_insn (), REG_EQUIV,
7206 gen_rtx_MINUS (Pmode, x, tp));
7209 off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x), UNSPEC_DTPOFF);
7210 off = gen_rtx_CONST (Pmode, off);
7212 dest = force_reg (Pmode, gen_rtx_PLUS (Pmode, base, off));
7214 if (TARGET_GNU2_TLS)
7216 dest = force_reg (Pmode, gen_rtx_PLUS (Pmode, dest, tp));
7218 set_unique_reg_note (get_last_insn (), REG_EQUIV, x);
7223 case TLS_MODEL_INITIAL_EXEC:
7227 type = UNSPEC_GOTNTPOFF;
7231 if (reload_in_progress)
7232 regs_ever_live[PIC_OFFSET_TABLE_REGNUM] = 1;
7233 pic = pic_offset_table_rtx;
7234 type = TARGET_ANY_GNU_TLS ? UNSPEC_GOTNTPOFF : UNSPEC_GOTTPOFF;
7236 else if (!TARGET_ANY_GNU_TLS)
7238 pic = gen_reg_rtx (Pmode);
7239 emit_insn (gen_set_got (pic));
7240 type = UNSPEC_GOTTPOFF;
7245 type = UNSPEC_INDNTPOFF;
7248 off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x), type);
7249 off = gen_rtx_CONST (Pmode, off);
7251 off = gen_rtx_PLUS (Pmode, pic, off);
7252 off = gen_const_mem (Pmode, off);
7253 set_mem_alias_set (off, ix86_GOT_alias_set ());
7255 if (TARGET_64BIT || TARGET_ANY_GNU_TLS)
7257 base = get_thread_pointer (for_mov || !TARGET_TLS_DIRECT_SEG_REFS);
7258 off = force_reg (Pmode, off);
7259 return gen_rtx_PLUS (Pmode, base, off);
7263 base = get_thread_pointer (true);
7264 dest = gen_reg_rtx (Pmode);
7265 emit_insn (gen_subsi3 (dest, base, off));
7269 case TLS_MODEL_LOCAL_EXEC:
7270 off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x),
7271 (TARGET_64BIT || TARGET_ANY_GNU_TLS)
7272 ? UNSPEC_NTPOFF : UNSPEC_TPOFF);
7273 off = gen_rtx_CONST (Pmode, off);
7275 if (TARGET_64BIT || TARGET_ANY_GNU_TLS)
7277 base = get_thread_pointer (for_mov || !TARGET_TLS_DIRECT_SEG_REFS);
7278 return gen_rtx_PLUS (Pmode, base, off);
7282 base = get_thread_pointer (true);
7283 dest = gen_reg_rtx (Pmode);
7284 emit_insn (gen_subsi3 (dest, base, off));
7295 /* Try machine-dependent ways of modifying an illegitimate address
7296 to be legitimate. If we find one, return the new, valid address.
7297 This macro is used in only one place: `memory_address' in explow.c.
7299 OLDX is the address as it was before break_out_memory_refs was called.
7300 In some cases it is useful to look at this to decide what needs to be done.
7302 MODE and WIN are passed so that this macro can use
7303 GO_IF_LEGITIMATE_ADDRESS.
7305 It is always safe for this macro to do nothing. It exists to recognize
7306 opportunities to optimize the output.
7308 For the 80386, we handle X+REG by loading X into a register R and
7309 using R+REG. R will go in a general reg and indexing will be used.
7310 However, if REG is a broken-out memory address or multiplication,
7311 nothing needs to be done because REG can certainly go in a general reg.
7313 When -fpic is used, special handling is needed for symbolic references.
7314 See comments by legitimize_pic_address in i386.c for details. */
7317 legitimize_address (rtx x, rtx oldx ATTRIBUTE_UNUSED, enum machine_mode mode)
7322 if (TARGET_DEBUG_ADDR)
7324 fprintf (stderr, "\n==========\nLEGITIMIZE_ADDRESS, mode = %s\n",
7325 GET_MODE_NAME (mode));
7329 log = GET_CODE (x) == SYMBOL_REF ? SYMBOL_REF_TLS_MODEL (x) : 0;
7331 return legitimize_tls_address (x, log, false);
7332 if (GET_CODE (x) == CONST
7333 && GET_CODE (XEXP (x, 0)) == PLUS
7334 && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF
7335 && (log = SYMBOL_REF_TLS_MODEL (XEXP (XEXP (x, 0), 0))))
7337 rtx t = legitimize_tls_address (XEXP (XEXP (x, 0), 0), log, false);
7338 return gen_rtx_PLUS (Pmode, t, XEXP (XEXP (x, 0), 1));
7341 if (flag_pic && SYMBOLIC_CONST (x))
7342 return legitimize_pic_address (x, 0);
7344 /* Canonicalize shifts by 0, 1, 2, 3 into multiply */
7345 if (GET_CODE (x) == ASHIFT
7346 && GET_CODE (XEXP (x, 1)) == CONST_INT
7347 && (unsigned HOST_WIDE_INT) INTVAL (XEXP (x, 1)) < 4)
7350 log = INTVAL (XEXP (x, 1));
7351 x = gen_rtx_MULT (Pmode, force_reg (Pmode, XEXP (x, 0)),
7352 GEN_INT (1 << log));
7355 if (GET_CODE (x) == PLUS)
7357 /* Canonicalize shifts by 0, 1, 2, 3 into multiply. */
7359 if (GET_CODE (XEXP (x, 0)) == ASHIFT
7360 && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT
7361 && (unsigned HOST_WIDE_INT) INTVAL (XEXP (XEXP (x, 0), 1)) < 4)
7364 log = INTVAL (XEXP (XEXP (x, 0), 1));
7365 XEXP (x, 0) = gen_rtx_MULT (Pmode,
7366 force_reg (Pmode, XEXP (XEXP (x, 0), 0)),
7367 GEN_INT (1 << log));
7370 if (GET_CODE (XEXP (x, 1)) == ASHIFT
7371 && GET_CODE (XEXP (XEXP (x, 1), 1)) == CONST_INT
7372 && (unsigned HOST_WIDE_INT) INTVAL (XEXP (XEXP (x, 1), 1)) < 4)
7375 log = INTVAL (XEXP (XEXP (x, 1), 1));
7376 XEXP (x, 1) = gen_rtx_MULT (Pmode,
7377 force_reg (Pmode, XEXP (XEXP (x, 1), 0)),
7378 GEN_INT (1 << log));
7381 /* Put multiply first if it isn't already. */
7382 if (GET_CODE (XEXP (x, 1)) == MULT)
7384 rtx tmp = XEXP (x, 0);
7385 XEXP (x, 0) = XEXP (x, 1);
7390 /* Canonicalize (plus (mult (reg) (const)) (plus (reg) (const)))
7391 into (plus (plus (mult (reg) (const)) (reg)) (const)). This can be
7392 created by virtual register instantiation, register elimination, and
7393 similar optimizations. */
7394 if (GET_CODE (XEXP (x, 0)) == MULT && GET_CODE (XEXP (x, 1)) == PLUS)
7397 x = gen_rtx_PLUS (Pmode,
7398 gen_rtx_PLUS (Pmode, XEXP (x, 0),
7399 XEXP (XEXP (x, 1), 0)),
7400 XEXP (XEXP (x, 1), 1));
7404 (plus (plus (mult (reg) (const)) (plus (reg) (const))) const)
7405 into (plus (plus (mult (reg) (const)) (reg)) (const)). */
7406 else if (GET_CODE (x) == PLUS && GET_CODE (XEXP (x, 0)) == PLUS
7407 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
7408 && GET_CODE (XEXP (XEXP (x, 0), 1)) == PLUS
7409 && CONSTANT_P (XEXP (x, 1)))
7412 rtx other = NULL_RTX;
7414 if (GET_CODE (XEXP (x, 1)) == CONST_INT)
7416 constant = XEXP (x, 1);
7417 other = XEXP (XEXP (XEXP (x, 0), 1), 1);
7419 else if (GET_CODE (XEXP (XEXP (XEXP (x, 0), 1), 1)) == CONST_INT)
7421 constant = XEXP (XEXP (XEXP (x, 0), 1), 1);
7422 other = XEXP (x, 1);
7430 x = gen_rtx_PLUS (Pmode,
7431 gen_rtx_PLUS (Pmode, XEXP (XEXP (x, 0), 0),
7432 XEXP (XEXP (XEXP (x, 0), 1), 0)),
7433 plus_constant (other, INTVAL (constant)));
7437 if (changed && legitimate_address_p (mode, x, FALSE))
7440 if (GET_CODE (XEXP (x, 0)) == MULT)
7443 XEXP (x, 0) = force_operand (XEXP (x, 0), 0);
7446 if (GET_CODE (XEXP (x, 1)) == MULT)
7449 XEXP (x, 1) = force_operand (XEXP (x, 1), 0);
7453 && GET_CODE (XEXP (x, 1)) == REG
7454 && GET_CODE (XEXP (x, 0)) == REG)
7457 if (flag_pic && SYMBOLIC_CONST (XEXP (x, 1)))
7460 x = legitimize_pic_address (x, 0);
7463 if (changed && legitimate_address_p (mode, x, FALSE))
7466 if (GET_CODE (XEXP (x, 0)) == REG)
7468 rtx temp = gen_reg_rtx (Pmode);
7469 rtx val = force_operand (XEXP (x, 1), temp);
7471 emit_move_insn (temp, val);
7477 else if (GET_CODE (XEXP (x, 1)) == REG)
7479 rtx temp = gen_reg_rtx (Pmode);
7480 rtx val = force_operand (XEXP (x, 0), temp);
7482 emit_move_insn (temp, val);
7492 /* Print an integer constant expression in assembler syntax. Addition
7493 and subtraction are the only arithmetic that may appear in these
7494 expressions. FILE is the stdio stream to write to, X is the rtx, and
7495 CODE is the operand print code from the output string. */
7498 output_pic_addr_const (FILE *file, rtx x, int code)
7502 switch (GET_CODE (x))
7505 gcc_assert (flag_pic);
7510 if (! TARGET_MACHO || TARGET_64BIT)
7511 output_addr_const (file, x);
7514 const char *name = XSTR (x, 0);
7516 /* Mark the decl as referenced so that cgraph will output the function. */
7517 if (SYMBOL_REF_DECL (x))
7518 mark_decl_referenced (SYMBOL_REF_DECL (x));
7521 if (MACHOPIC_INDIRECT
7522 && machopic_classify_symbol (x) == MACHOPIC_UNDEFINED_FUNCTION)
7523 name = machopic_indirection_name (x, /*stub_p=*/true);
7525 assemble_name (file, name);
7527 if (!TARGET_MACHO && code == 'P' && ! SYMBOL_REF_LOCAL_P (x))
7528 fputs ("@PLT", file);
7535 ASM_GENERATE_INTERNAL_LABEL (buf, "L", CODE_LABEL_NUMBER (x));
7536 assemble_name (asm_out_file, buf);
7540 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
7544 /* This used to output parentheses around the expression,
7545 but that does not work on the 386 (either ATT or BSD assembler). */
7546 output_pic_addr_const (file, XEXP (x, 0), code);
7550 if (GET_MODE (x) == VOIDmode)
7552 /* We can use %d if the number is <32 bits and positive. */
7553 if (CONST_DOUBLE_HIGH (x) || CONST_DOUBLE_LOW (x) < 0)
7554 fprintf (file, "0x%lx%08lx",
7555 (unsigned long) CONST_DOUBLE_HIGH (x),
7556 (unsigned long) CONST_DOUBLE_LOW (x));
7558 fprintf (file, HOST_WIDE_INT_PRINT_DEC, CONST_DOUBLE_LOW (x));
7561 /* We can't handle floating point constants;
7562 PRINT_OPERAND must handle them. */
7563 output_operand_lossage ("floating constant misused");
7567 /* Some assemblers need integer constants to appear first. */
7568 if (GET_CODE (XEXP (x, 0)) == CONST_INT)
7570 output_pic_addr_const (file, XEXP (x, 0), code);
7572 output_pic_addr_const (file, XEXP (x, 1), code);
7576 gcc_assert (GET_CODE (XEXP (x, 1)) == CONST_INT);
7577 output_pic_addr_const (file, XEXP (x, 1), code);
7579 output_pic_addr_const (file, XEXP (x, 0), code);
7585 putc (ASSEMBLER_DIALECT == ASM_INTEL ? '(' : '[', file);
7586 output_pic_addr_const (file, XEXP (x, 0), code);
7588 output_pic_addr_const (file, XEXP (x, 1), code);
7590 putc (ASSEMBLER_DIALECT == ASM_INTEL ? ')' : ']', file);
7594 gcc_assert (XVECLEN (x, 0) == 1);
7595 output_pic_addr_const (file, XVECEXP (x, 0, 0), code);
7596 switch (XINT (x, 1))
7599 fputs ("@GOT", file);
7602 fputs ("@GOTOFF", file);
7604 case UNSPEC_GOTPCREL:
7605 fputs ("@GOTPCREL(%rip)", file);
7607 case UNSPEC_GOTTPOFF:
7608 /* FIXME: This might be @TPOFF in Sun ld too. */
7609 fputs ("@GOTTPOFF", file);
7612 fputs ("@TPOFF", file);
7616 fputs ("@TPOFF", file);
7618 fputs ("@NTPOFF", file);
7621 fputs ("@DTPOFF", file);
7623 case UNSPEC_GOTNTPOFF:
7625 fputs ("@GOTTPOFF(%rip)", file);
7627 fputs ("@GOTNTPOFF", file);
7629 case UNSPEC_INDNTPOFF:
7630 fputs ("@INDNTPOFF", file);
7633 output_operand_lossage ("invalid UNSPEC as operand");
7639 output_operand_lossage ("invalid expression as operand");
7643 /* This is called from dwarf2out.c via TARGET_ASM_OUTPUT_DWARF_DTPREL.
7644 We need to emit DTP-relative relocations. */
7647 i386_output_dwarf_dtprel (FILE *file, int size, rtx x)
7649 fputs (ASM_LONG, file);
7650 output_addr_const (file, x);
7651 fputs ("@DTPOFF", file);
7657 fputs (", 0", file);
7664 /* In the name of slightly smaller debug output, and to cater to
7665 general assembler lossage, recognize PIC+GOTOFF and turn it back
7666 into a direct symbol reference.
7668 On Darwin, this is necessary to avoid a crash, because Darwin
7669 has a different PIC label for each routine but the DWARF debugging
7670 information is not associated with any particular routine, so it's
7671 necessary to remove references to the PIC label from RTL stored by
7672 the DWARF output code. */
7675 ix86_delegitimize_address (rtx orig_x)
7678 /* reg_addend is NULL or a multiple of some register. */
7679 rtx reg_addend = NULL_RTX;
7680 /* const_addend is NULL or a const_int. */
7681 rtx const_addend = NULL_RTX;
7682 /* This is the result, or NULL. */
7683 rtx result = NULL_RTX;
7685 if (GET_CODE (x) == MEM)
7690 if (GET_CODE (x) != CONST
7691 || GET_CODE (XEXP (x, 0)) != UNSPEC
7692 || XINT (XEXP (x, 0), 1) != UNSPEC_GOTPCREL
7693 || GET_CODE (orig_x) != MEM)
7695 return XVECEXP (XEXP (x, 0), 0, 0);
7698 if (GET_CODE (x) != PLUS
7699 || GET_CODE (XEXP (x, 1)) != CONST)
7702 if (GET_CODE (XEXP (x, 0)) == REG
7703 && REGNO (XEXP (x, 0)) == PIC_OFFSET_TABLE_REGNUM)
7704 /* %ebx + GOT/GOTOFF */
7706 else if (GET_CODE (XEXP (x, 0)) == PLUS)
7708 /* %ebx + %reg * scale + GOT/GOTOFF */
7709 reg_addend = XEXP (x, 0);
7710 if (GET_CODE (XEXP (reg_addend, 0)) == REG
7711 && REGNO (XEXP (reg_addend, 0)) == PIC_OFFSET_TABLE_REGNUM)
7712 reg_addend = XEXP (reg_addend, 1);
7713 else if (GET_CODE (XEXP (reg_addend, 1)) == REG
7714 && REGNO (XEXP (reg_addend, 1)) == PIC_OFFSET_TABLE_REGNUM)
7715 reg_addend = XEXP (reg_addend, 0);
7718 if (GET_CODE (reg_addend) != REG
7719 && GET_CODE (reg_addend) != MULT
7720 && GET_CODE (reg_addend) != ASHIFT)
7726 x = XEXP (XEXP (x, 1), 0);
7727 if (GET_CODE (x) == PLUS
7728 && GET_CODE (XEXP (x, 1)) == CONST_INT)
7730 const_addend = XEXP (x, 1);
7734 if (GET_CODE (x) == UNSPEC
7735 && ((XINT (x, 1) == UNSPEC_GOT && GET_CODE (orig_x) == MEM)
7736 || (XINT (x, 1) == UNSPEC_GOTOFF && GET_CODE (orig_x) != MEM)))
7737 result = XVECEXP (x, 0, 0);
7739 if (TARGET_MACHO && darwin_local_data_pic (x)
7740 && GET_CODE (orig_x) != MEM)
7741 result = XEXP (x, 0);
7747 result = gen_rtx_PLUS (Pmode, result, const_addend);
7749 result = gen_rtx_PLUS (Pmode, reg_addend, result);
7754 put_condition_code (enum rtx_code code, enum machine_mode mode, int reverse,
7759 if (mode == CCFPmode || mode == CCFPUmode)
7761 enum rtx_code second_code, bypass_code;
7762 ix86_fp_comparison_codes (code, &bypass_code, &code, &second_code);
7763 gcc_assert (bypass_code == UNKNOWN && second_code == UNKNOWN);
7764 code = ix86_fp_compare_code_to_integer (code);
7768 code = reverse_condition (code);
7779 gcc_assert (mode == CCmode || mode == CCNOmode || mode == CCGCmode);
7783 /* ??? Use "nbe" instead of "a" for fcmov lossage on some assemblers.
7784 Those same assemblers have the same but opposite lossage on cmov. */
7785 gcc_assert (mode == CCmode);
7786 suffix = fp ? "nbe" : "a";
7806 gcc_assert (mode == CCmode);
7828 gcc_assert (mode == CCmode);
7829 suffix = fp ? "nb" : "ae";
7832 gcc_assert (mode == CCmode || mode == CCGCmode || mode == CCNOmode);
7836 gcc_assert (mode == CCmode);
7840 suffix = fp ? "u" : "p";
7843 suffix = fp ? "nu" : "np";
7848 fputs (suffix, file);
7851 /* Print the name of register X to FILE based on its machine mode and number.
7852 If CODE is 'w', pretend the mode is HImode.
7853 If CODE is 'b', pretend the mode is QImode.
7854 If CODE is 'k', pretend the mode is SImode.
7855 If CODE is 'q', pretend the mode is DImode.
7856 If CODE is 'h', pretend the reg is the 'high' byte register.
7857 If CODE is 'y', print "st(0)" instead of "st", if the reg is stack op. */
7860 print_reg (rtx x, int code, FILE *file)
7862 gcc_assert (REGNO (x) != ARG_POINTER_REGNUM
7863 && REGNO (x) != FRAME_POINTER_REGNUM
7864 && REGNO (x) != FLAGS_REG
7865 && REGNO (x) != FPSR_REG);
7867 if (ASSEMBLER_DIALECT == ASM_ATT || USER_LABEL_PREFIX[0] == 0)
7870 if (code == 'w' || MMX_REG_P (x))
7872 else if (code == 'b')
7874 else if (code == 'k')
7876 else if (code == 'q')
7878 else if (code == 'y')
7880 else if (code == 'h')
7883 code = GET_MODE_SIZE (GET_MODE (x));
7885 /* Irritatingly, AMD extended registers use different naming convention
7886 from the normal registers. */
7887 if (REX_INT_REG_P (x))
7889 gcc_assert (TARGET_64BIT);
7893 error ("extended registers have no high halves");
7896 fprintf (file, "r%ib", REGNO (x) - FIRST_REX_INT_REG + 8);
7899 fprintf (file, "r%iw", REGNO (x) - FIRST_REX_INT_REG + 8);
7902 fprintf (file, "r%id", REGNO (x) - FIRST_REX_INT_REG + 8);
7905 fprintf (file, "r%i", REGNO (x) - FIRST_REX_INT_REG + 8);
7908 error ("unsupported operand size for extended register");
7916 if (STACK_TOP_P (x))
7918 fputs ("st(0)", file);
7925 if (! ANY_FP_REG_P (x))
7926 putc (code == 8 && TARGET_64BIT ? 'r' : 'e', file);
7931 fputs (hi_reg_name[REGNO (x)], file);
7934 if (REGNO (x) >= ARRAY_SIZE (qi_reg_name))
7936 fputs (qi_reg_name[REGNO (x)], file);
7939 if (REGNO (x) >= ARRAY_SIZE (qi_high_reg_name))
7941 fputs (qi_high_reg_name[REGNO (x)], file);
7948 /* Locate some local-dynamic symbol still in use by this function
7949 so that we can print its name in some tls_local_dynamic_base
7953 get_some_local_dynamic_name (void)
7957 if (cfun->machine->some_ld_name)
7958 return cfun->machine->some_ld_name;
7960 for (insn = get_insns (); insn ; insn = NEXT_INSN (insn))
7962 && for_each_rtx (&PATTERN (insn), get_some_local_dynamic_name_1, 0))
7963 return cfun->machine->some_ld_name;
7969 get_some_local_dynamic_name_1 (rtx *px, void *data ATTRIBUTE_UNUSED)
7973 if (GET_CODE (x) == SYMBOL_REF
7974 && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_DYNAMIC)
7976 cfun->machine->some_ld_name = XSTR (x, 0);
7984 L,W,B,Q,S,T -- print the opcode suffix for specified size of operand.
7985 C -- print opcode suffix for set/cmov insn.
7986 c -- like C, but print reversed condition
7987 F,f -- likewise, but for floating-point.
7988 O -- if HAVE_AS_IX86_CMOV_SUN_SYNTAX, expand to "w.", "l." or "q.",
7990 R -- print the prefix for register names.
7991 z -- print the opcode suffix for the size of the current operand.
7992 * -- print a star (in certain assembler syntax)
7993 A -- print an absolute memory reference.
7994 w -- print the operand as if it's a "word" (HImode) even if it isn't.
7995 s -- print a shift double count, followed by the assemblers argument
7997 b -- print the QImode name of the register for the indicated operand.
7998 %b0 would print %al if operands[0] is reg 0.
7999 w -- likewise, print the HImode name of the register.
8000 k -- likewise, print the SImode name of the register.
8001 q -- likewise, print the DImode name of the register.
8002 h -- print the QImode name for a "high" register, either ah, bh, ch or dh.
8003 y -- print "st(0)" instead of "st" as a register.
8004 D -- print condition for SSE cmp instruction.
8005 P -- if PIC, print an @PLT suffix.
8006 X -- don't print any sort of PIC '@' suffix for a symbol.
8007 & -- print some in-use local-dynamic symbol name.
8008 H -- print a memory address offset by 8; used for sse high-parts
8012 print_operand (FILE *file, rtx x, int code)
8019 if (ASSEMBLER_DIALECT == ASM_ATT)
8024 assemble_name (file, get_some_local_dynamic_name ());
8028 switch (ASSEMBLER_DIALECT)
8035 /* Intel syntax. For absolute addresses, registers should not
8036 be surrounded by braces. */
8037 if (GET_CODE (x) != REG)
8040 PRINT_OPERAND (file, x, 0);
8050 PRINT_OPERAND (file, x, 0);
8055 if (ASSEMBLER_DIALECT == ASM_ATT)
8060 if (ASSEMBLER_DIALECT == ASM_ATT)
8065 if (ASSEMBLER_DIALECT == ASM_ATT)
8070 if (ASSEMBLER_DIALECT == ASM_ATT)
8075 if (ASSEMBLER_DIALECT == ASM_ATT)
8080 if (ASSEMBLER_DIALECT == ASM_ATT)
8085 /* 387 opcodes don't get size suffixes if the operands are
8087 if (STACK_REG_P (x))
8090 /* Likewise if using Intel opcodes. */
8091 if (ASSEMBLER_DIALECT == ASM_INTEL)
8094 /* This is the size of op from size of operand. */
8095 switch (GET_MODE_SIZE (GET_MODE (x)))
8098 #ifdef HAVE_GAS_FILDS_FISTS
8104 if (GET_MODE (x) == SFmode)
8119 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT)
8121 #ifdef GAS_MNEMONICS
8147 if (GET_CODE (x) == CONST_INT || ! SHIFT_DOUBLE_OMITS_COUNT)
8149 PRINT_OPERAND (file, x, 0);
8155 /* Little bit of braindamage here. The SSE compare instructions
8156 does use completely different names for the comparisons that the
8157 fp conditional moves. */
8158 switch (GET_CODE (x))
8173 fputs ("unord", file);
8177 fputs ("neq", file);
8181 fputs ("nlt", file);
8185 fputs ("nle", file);
8188 fputs ("ord", file);
8195 #ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
8196 if (ASSEMBLER_DIALECT == ASM_ATT)
8198 switch (GET_MODE (x))
8200 case HImode: putc ('w', file); break;
8202 case SFmode: putc ('l', file); break;
8204 case DFmode: putc ('q', file); break;
8205 default: gcc_unreachable ();
8212 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 0, 0, file);
8215 #ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
8216 if (ASSEMBLER_DIALECT == ASM_ATT)
8219 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 0, 1, file);
8222 /* Like above, but reverse condition */
8224 /* Check to see if argument to %c is really a constant
8225 and not a condition code which needs to be reversed. */
8226 if (!COMPARISON_P (x))
8228 output_operand_lossage ("operand is neither a constant nor a condition code, invalid operand code 'c'");
8231 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 1, 0, file);
8234 #ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
8235 if (ASSEMBLER_DIALECT == ASM_ATT)
8238 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 1, 1, file);
8242 /* It doesn't actually matter what mode we use here, as we're
8243 only going to use this for printing. */
8244 x = adjust_address_nv (x, DImode, 8);
8251 if (!optimize || optimize_size || !TARGET_BRANCH_PREDICTION_HINTS)
8254 x = find_reg_note (current_output_insn, REG_BR_PROB, 0);
8257 int pred_val = INTVAL (XEXP (x, 0));
8259 if (pred_val < REG_BR_PROB_BASE * 45 / 100
8260 || pred_val > REG_BR_PROB_BASE * 55 / 100)
8262 int taken = pred_val > REG_BR_PROB_BASE / 2;
8263 int cputaken = final_forward_branch_p (current_output_insn) == 0;
8265 /* Emit hints only in the case default branch prediction
8266 heuristics would fail. */
8267 if (taken != cputaken)
8269 /* We use 3e (DS) prefix for taken branches and
8270 2e (CS) prefix for not taken branches. */
8272 fputs ("ds ; ", file);
8274 fputs ("cs ; ", file);
8281 output_operand_lossage ("invalid operand code '%c'", code);
8285 if (GET_CODE (x) == REG)
8286 print_reg (x, code, file);
8288 else if (GET_CODE (x) == MEM)
8290 /* No `byte ptr' prefix for call instructions. */
8291 if (ASSEMBLER_DIALECT == ASM_INTEL && code != 'X' && code != 'P')
8294 switch (GET_MODE_SIZE (GET_MODE (x)))
8296 case 1: size = "BYTE"; break;
8297 case 2: size = "WORD"; break;
8298 case 4: size = "DWORD"; break;
8299 case 8: size = "QWORD"; break;
8300 case 12: size = "XWORD"; break;
8301 case 16: size = "XMMWORD"; break;
8306 /* Check for explicit size override (codes 'b', 'w' and 'k') */
8309 else if (code == 'w')
8311 else if (code == 'k')
8315 fputs (" PTR ", file);
8319 /* Avoid (%rip) for call operands. */
8320 if (CONSTANT_ADDRESS_P (x) && code == 'P'
8321 && GET_CODE (x) != CONST_INT)
8322 output_addr_const (file, x);
8323 else if (this_is_asm_operands && ! address_operand (x, VOIDmode))
8324 output_operand_lossage ("invalid constraints for operand");
8329 else if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) == SFmode)
8334 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
8335 REAL_VALUE_TO_TARGET_SINGLE (r, l);
8337 if (ASSEMBLER_DIALECT == ASM_ATT)
8339 fprintf (file, "0x%08lx", l);
8342 /* These float cases don't actually occur as immediate operands. */
8343 else if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) == DFmode)
8347 real_to_decimal (dstr, CONST_DOUBLE_REAL_VALUE (x), sizeof (dstr), 0, 1);
8348 fprintf (file, "%s", dstr);
8351 else if (GET_CODE (x) == CONST_DOUBLE
8352 && GET_MODE (x) == XFmode)
8356 real_to_decimal (dstr, CONST_DOUBLE_REAL_VALUE (x), sizeof (dstr), 0, 1);
8357 fprintf (file, "%s", dstr);
8362 /* We have patterns that allow zero sets of memory, for instance.
8363 In 64-bit mode, we should probably support all 8-byte vectors,
8364 since we can in fact encode that into an immediate. */
8365 if (GET_CODE (x) == CONST_VECTOR)
8367 gcc_assert (x == CONST0_RTX (GET_MODE (x)));
8373 if (GET_CODE (x) == CONST_INT || GET_CODE (x) == CONST_DOUBLE)
8375 if (ASSEMBLER_DIALECT == ASM_ATT)
8378 else if (GET_CODE (x) == CONST || GET_CODE (x) == SYMBOL_REF
8379 || GET_CODE (x) == LABEL_REF)
8381 if (ASSEMBLER_DIALECT == ASM_ATT)
8384 fputs ("OFFSET FLAT:", file);
8387 if (GET_CODE (x) == CONST_INT)
8388 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
8390 output_pic_addr_const (file, x, code);
8392 output_addr_const (file, x);
8396 /* Print a memory operand whose address is ADDR. */
8399 print_operand_address (FILE *file, rtx addr)
8401 struct ix86_address parts;
8402 rtx base, index, disp;
8404 int ok = ix86_decompose_address (addr, &parts);
8409 index = parts.index;
8411 scale = parts.scale;
8419 if (USER_LABEL_PREFIX[0] == 0)
8421 fputs ((parts.seg == SEG_FS ? "fs:" : "gs:"), file);
8427 if (!base && !index)
8429 /* Displacement only requires special attention. */
8431 if (GET_CODE (disp) == CONST_INT)
8433 if (ASSEMBLER_DIALECT == ASM_INTEL && parts.seg == SEG_DEFAULT)
8435 if (USER_LABEL_PREFIX[0] == 0)
8437 fputs ("ds:", file);
8439 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (disp));
8442 output_pic_addr_const (file, disp, 0);
8444 output_addr_const (file, disp);
8446 /* Use one byte shorter RIP relative addressing for 64bit mode. */
8449 if (GET_CODE (disp) == CONST
8450 && GET_CODE (XEXP (disp, 0)) == PLUS
8451 && GET_CODE (XEXP (XEXP (disp, 0), 1)) == CONST_INT)
8452 disp = XEXP (XEXP (disp, 0), 0);
8453 if (GET_CODE (disp) == LABEL_REF
8454 || (GET_CODE (disp) == SYMBOL_REF
8455 && SYMBOL_REF_TLS_MODEL (disp) == 0))
8456 fputs ("(%rip)", file);
8461 if (ASSEMBLER_DIALECT == ASM_ATT)
8466 output_pic_addr_const (file, disp, 0);
8467 else if (GET_CODE (disp) == LABEL_REF)
8468 output_asm_label (disp);
8470 output_addr_const (file, disp);
8475 print_reg (base, 0, file);
8479 print_reg (index, 0, file);
8481 fprintf (file, ",%d", scale);
8487 rtx offset = NULL_RTX;
8491 /* Pull out the offset of a symbol; print any symbol itself. */
8492 if (GET_CODE (disp) == CONST
8493 && GET_CODE (XEXP (disp, 0)) == PLUS
8494 && GET_CODE (XEXP (XEXP (disp, 0), 1)) == CONST_INT)
8496 offset = XEXP (XEXP (disp, 0), 1);
8497 disp = gen_rtx_CONST (VOIDmode,
8498 XEXP (XEXP (disp, 0), 0));
8502 output_pic_addr_const (file, disp, 0);
8503 else if (GET_CODE (disp) == LABEL_REF)
8504 output_asm_label (disp);
8505 else if (GET_CODE (disp) == CONST_INT)
8508 output_addr_const (file, disp);
8514 print_reg (base, 0, file);
8517 if (INTVAL (offset) >= 0)
8519 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (offset));
8523 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (offset));
8530 print_reg (index, 0, file);
8532 fprintf (file, "*%d", scale);
8540 output_addr_const_extra (FILE *file, rtx x)
8544 if (GET_CODE (x) != UNSPEC)
8547 op = XVECEXP (x, 0, 0);
8548 switch (XINT (x, 1))
8550 case UNSPEC_GOTTPOFF:
8551 output_addr_const (file, op);
8552 /* FIXME: This might be @TPOFF in Sun ld. */
8553 fputs ("@GOTTPOFF", file);
8556 output_addr_const (file, op);
8557 fputs ("@TPOFF", file);
8560 output_addr_const (file, op);
8562 fputs ("@TPOFF", file);
8564 fputs ("@NTPOFF", file);
8567 output_addr_const (file, op);
8568 fputs ("@DTPOFF", file);
8570 case UNSPEC_GOTNTPOFF:
8571 output_addr_const (file, op);
8573 fputs ("@GOTTPOFF(%rip)", file);
8575 fputs ("@GOTNTPOFF", file);
8577 case UNSPEC_INDNTPOFF:
8578 output_addr_const (file, op);
8579 fputs ("@INDNTPOFF", file);
8589 /* Split one or more DImode RTL references into pairs of SImode
8590 references. The RTL can be REG, offsettable MEM, integer constant, or
8591 CONST_DOUBLE. "operands" is a pointer to an array of DImode RTL to
8592 split and "num" is its length. lo_half and hi_half are output arrays
8593 that parallel "operands". */
8596 split_di (rtx operands[], int num, rtx lo_half[], rtx hi_half[])
8600 rtx op = operands[num];
8602 /* simplify_subreg refuse to split volatile memory addresses,
8603 but we still have to handle it. */
8604 if (GET_CODE (op) == MEM)
8606 lo_half[num] = adjust_address (op, SImode, 0);
8607 hi_half[num] = adjust_address (op, SImode, 4);
8611 lo_half[num] = simplify_gen_subreg (SImode, op,
8612 GET_MODE (op) == VOIDmode
8613 ? DImode : GET_MODE (op), 0);
8614 hi_half[num] = simplify_gen_subreg (SImode, op,
8615 GET_MODE (op) == VOIDmode
8616 ? DImode : GET_MODE (op), 4);
8620 /* Split one or more TImode RTL references into pairs of DImode
8621 references. The RTL can be REG, offsettable MEM, integer constant, or
8622 CONST_DOUBLE. "operands" is a pointer to an array of DImode RTL to
8623 split and "num" is its length. lo_half and hi_half are output arrays
8624 that parallel "operands". */
8627 split_ti (rtx operands[], int num, rtx lo_half[], rtx hi_half[])
8631 rtx op = operands[num];
8633 /* simplify_subreg refuse to split volatile memory addresses, but we
8634 still have to handle it. */
8635 if (GET_CODE (op) == MEM)
8637 lo_half[num] = adjust_address (op, DImode, 0);
8638 hi_half[num] = adjust_address (op, DImode, 8);
8642 lo_half[num] = simplify_gen_subreg (DImode, op, TImode, 0);
8643 hi_half[num] = simplify_gen_subreg (DImode, op, TImode, 8);
8648 /* Output code to perform a 387 binary operation in INSN, one of PLUS,
8649 MINUS, MULT or DIV. OPERANDS are the insn operands, where operands[3]
8650 is the expression of the binary operation. The output may either be
8651 emitted here, or returned to the caller, like all output_* functions.
8653 There is no guarantee that the operands are the same mode, as they
8654 might be within FLOAT or FLOAT_EXTEND expressions. */
8656 #ifndef SYSV386_COMPAT
8657 /* Set to 1 for compatibility with brain-damaged assemblers. No-one
8658 wants to fix the assemblers because that causes incompatibility
8659 with gcc. No-one wants to fix gcc because that causes
8660 incompatibility with assemblers... You can use the option of
8661 -DSYSV386_COMPAT=0 if you recompile both gcc and gas this way. */
8662 #define SYSV386_COMPAT 1
8666 output_387_binary_op (rtx insn, rtx *operands)
8668 static char buf[30];
8671 int is_sse = SSE_REG_P (operands[0]) || SSE_REG_P (operands[1]) || SSE_REG_P (operands[2]);
8673 #ifdef ENABLE_CHECKING
8674 /* Even if we do not want to check the inputs, this documents input
8675 constraints. Which helps in understanding the following code. */
8676 if (STACK_REG_P (operands[0])
8677 && ((REG_P (operands[1])
8678 && REGNO (operands[0]) == REGNO (operands[1])
8679 && (STACK_REG_P (operands[2]) || GET_CODE (operands[2]) == MEM))
8680 || (REG_P (operands[2])
8681 && REGNO (operands[0]) == REGNO (operands[2])
8682 && (STACK_REG_P (operands[1]) || GET_CODE (operands[1]) == MEM)))
8683 && (STACK_TOP_P (operands[1]) || STACK_TOP_P (operands[2])))
8686 gcc_assert (is_sse);
8689 switch (GET_CODE (operands[3]))
8692 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
8693 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
8701 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
8702 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
8710 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
8711 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
8719 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
8720 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
8734 if (GET_MODE (operands[0]) == SFmode)
8735 strcat (buf, "ss\t{%2, %0|%0, %2}");
8737 strcat (buf, "sd\t{%2, %0|%0, %2}");
8742 switch (GET_CODE (operands[3]))
8746 if (REG_P (operands[2]) && REGNO (operands[0]) == REGNO (operands[2]))
8748 rtx temp = operands[2];
8749 operands[2] = operands[1];
8753 /* know operands[0] == operands[1]. */
8755 if (GET_CODE (operands[2]) == MEM)
8761 if (find_regno_note (insn, REG_DEAD, REGNO (operands[2])))
8763 if (STACK_TOP_P (operands[0]))
8764 /* How is it that we are storing to a dead operand[2]?
8765 Well, presumably operands[1] is dead too. We can't
8766 store the result to st(0) as st(0) gets popped on this
8767 instruction. Instead store to operands[2] (which I
8768 think has to be st(1)). st(1) will be popped later.
8769 gcc <= 2.8.1 didn't have this check and generated
8770 assembly code that the Unixware assembler rejected. */
8771 p = "p\t{%0, %2|%2, %0}"; /* st(1) = st(0) op st(1); pop */
8773 p = "p\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0); pop */
8777 if (STACK_TOP_P (operands[0]))
8778 p = "\t{%y2, %0|%0, %y2}"; /* st(0) = st(0) op st(r2) */
8780 p = "\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0) */
8785 if (GET_CODE (operands[1]) == MEM)
8791 if (GET_CODE (operands[2]) == MEM)
8797 if (find_regno_note (insn, REG_DEAD, REGNO (operands[2])))
8800 /* The SystemV/386 SVR3.2 assembler, and probably all AT&T
8801 derived assemblers, confusingly reverse the direction of
8802 the operation for fsub{r} and fdiv{r} when the
8803 destination register is not st(0). The Intel assembler
8804 doesn't have this brain damage. Read !SYSV386_COMPAT to
8805 figure out what the hardware really does. */
8806 if (STACK_TOP_P (operands[0]))
8807 p = "{p\t%0, %2|rp\t%2, %0}";
8809 p = "{rp\t%2, %0|p\t%0, %2}";
8811 if (STACK_TOP_P (operands[0]))
8812 /* As above for fmul/fadd, we can't store to st(0). */
8813 p = "rp\t{%0, %2|%2, %0}"; /* st(1) = st(0) op st(1); pop */
8815 p = "p\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0); pop */
8820 if (find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
8823 if (STACK_TOP_P (operands[0]))
8824 p = "{rp\t%0, %1|p\t%1, %0}";
8826 p = "{p\t%1, %0|rp\t%0, %1}";
8828 if (STACK_TOP_P (operands[0]))
8829 p = "p\t{%0, %1|%1, %0}"; /* st(1) = st(1) op st(0); pop */
8831 p = "rp\t{%1, %0|%0, %1}"; /* st(r2) = st(0) op st(r2); pop */
8836 if (STACK_TOP_P (operands[0]))
8838 if (STACK_TOP_P (operands[1]))
8839 p = "\t{%y2, %0|%0, %y2}"; /* st(0) = st(0) op st(r2) */
8841 p = "r\t{%y1, %0|%0, %y1}"; /* st(0) = st(r1) op st(0) */
8844 else if (STACK_TOP_P (operands[1]))
8847 p = "{\t%1, %0|r\t%0, %1}";
8849 p = "r\t{%1, %0|%0, %1}"; /* st(r2) = st(0) op st(r2) */
8855 p = "{r\t%2, %0|\t%0, %2}";
8857 p = "\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0) */
8870 /* Return needed mode for entity in optimize_mode_switching pass. */
8873 ix86_mode_needed (int entity, rtx insn)
8875 enum attr_i387_cw mode;
8877 /* The mode UNINITIALIZED is used to store control word after a
8878 function call or ASM pattern. The mode ANY specify that function
8879 has no requirements on the control word and make no changes in the
8880 bits we are interested in. */
8883 || (NONJUMP_INSN_P (insn)
8884 && (asm_noperands (PATTERN (insn)) >= 0
8885 || GET_CODE (PATTERN (insn)) == ASM_INPUT)))
8886 return I387_CW_UNINITIALIZED;
8888 if (recog_memoized (insn) < 0)
8891 mode = get_attr_i387_cw (insn);
8896 if (mode == I387_CW_TRUNC)
8901 if (mode == I387_CW_FLOOR)
8906 if (mode == I387_CW_CEIL)
8911 if (mode == I387_CW_MASK_PM)
8922 /* Output code to initialize control word copies used by trunc?f?i and
8923 rounding patterns. CURRENT_MODE is set to current control word,
8924 while NEW_MODE is set to new control word. */
8927 emit_i387_cw_initialization (int mode)
8929 rtx stored_mode = assign_386_stack_local (HImode, SLOT_CW_STORED);
8934 rtx reg = gen_reg_rtx (HImode);
8936 emit_insn (gen_x86_fnstcw_1 (stored_mode));
8937 emit_move_insn (reg, stored_mode);
8939 if (TARGET_64BIT || TARGET_PARTIAL_REG_STALL || optimize_size)
8944 /* round toward zero (truncate) */
8945 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0c00)));
8946 slot = SLOT_CW_TRUNC;
8950 /* round down toward -oo */
8951 emit_insn (gen_andhi3 (reg, reg, GEN_INT (~0x0c00)));
8952 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0400)));
8953 slot = SLOT_CW_FLOOR;
8957 /* round up toward +oo */
8958 emit_insn (gen_andhi3 (reg, reg, GEN_INT (~0x0c00)));
8959 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0800)));
8960 slot = SLOT_CW_CEIL;
8963 case I387_CW_MASK_PM:
8964 /* mask precision exception for nearbyint() */
8965 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0020)));
8966 slot = SLOT_CW_MASK_PM;
8978 /* round toward zero (truncate) */
8979 emit_insn (gen_movsi_insv_1 (reg, GEN_INT (0xc)));
8980 slot = SLOT_CW_TRUNC;
8984 /* round down toward -oo */
8985 emit_insn (gen_movsi_insv_1 (reg, GEN_INT (0x4)));
8986 slot = SLOT_CW_FLOOR;
8990 /* round up toward +oo */
8991 emit_insn (gen_movsi_insv_1 (reg, GEN_INT (0x8)));
8992 slot = SLOT_CW_CEIL;
8995 case I387_CW_MASK_PM:
8996 /* mask precision exception for nearbyint() */
8997 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0020)));
8998 slot = SLOT_CW_MASK_PM;
9006 gcc_assert (slot < MAX_386_STACK_LOCALS);
9008 new_mode = assign_386_stack_local (HImode, slot);
9009 emit_move_insn (new_mode, reg);
9012 /* Output code for INSN to convert a float to a signed int. OPERANDS
9013 are the insn operands. The output may be [HSD]Imode and the input
9014 operand may be [SDX]Fmode. */
9017 output_fix_trunc (rtx insn, rtx *operands, int fisttp)
9019 int stack_top_dies = find_regno_note (insn, REG_DEAD, FIRST_STACK_REG) != 0;
9020 int dimode_p = GET_MODE (operands[0]) == DImode;
9021 int round_mode = get_attr_i387_cw (insn);
9023 /* Jump through a hoop or two for DImode, since the hardware has no
9024 non-popping instruction. We used to do this a different way, but
9025 that was somewhat fragile and broke with post-reload splitters. */
9026 if ((dimode_p || fisttp) && !stack_top_dies)
9027 output_asm_insn ("fld\t%y1", operands);
9029 gcc_assert (STACK_TOP_P (operands[1]));
9030 gcc_assert (GET_CODE (operands[0]) == MEM);
9033 output_asm_insn ("fisttp%z0\t%0", operands);
9036 if (round_mode != I387_CW_ANY)
9037 output_asm_insn ("fldcw\t%3", operands);
9038 if (stack_top_dies || dimode_p)
9039 output_asm_insn ("fistp%z0\t%0", operands);
9041 output_asm_insn ("fist%z0\t%0", operands);
9042 if (round_mode != I387_CW_ANY)
9043 output_asm_insn ("fldcw\t%2", operands);
9049 /* Output code for x87 ffreep insn. The OPNO argument, which may only
9050 have the values zero or one, indicates the ffreep insn's operand
9051 from the OPERANDS array. */
9054 output_387_ffreep (rtx *operands ATTRIBUTE_UNUSED, int opno)
9056 if (TARGET_USE_FFREEP)
9057 #if HAVE_AS_IX86_FFREEP
9058 return opno ? "ffreep\t%y1" : "ffreep\t%y0";
9060 switch (REGNO (operands[opno]))
9062 case FIRST_STACK_REG + 0: return ".word\t0xc0df";
9063 case FIRST_STACK_REG + 1: return ".word\t0xc1df";
9064 case FIRST_STACK_REG + 2: return ".word\t0xc2df";
9065 case FIRST_STACK_REG + 3: return ".word\t0xc3df";
9066 case FIRST_STACK_REG + 4: return ".word\t0xc4df";
9067 case FIRST_STACK_REG + 5: return ".word\t0xc5df";
9068 case FIRST_STACK_REG + 6: return ".word\t0xc6df";
9069 case FIRST_STACK_REG + 7: return ".word\t0xc7df";
9073 return opno ? "fstp\t%y1" : "fstp\t%y0";
9077 /* Output code for INSN to compare OPERANDS. EFLAGS_P is 1 when fcomi
9078 should be used. UNORDERED_P is true when fucom should be used. */
9081 output_fp_compare (rtx insn, rtx *operands, int eflags_p, int unordered_p)
9084 rtx cmp_op0, cmp_op1;
9085 int is_sse = SSE_REG_P (operands[0]) || SSE_REG_P (operands[1]);
9089 cmp_op0 = operands[0];
9090 cmp_op1 = operands[1];
9094 cmp_op0 = operands[1];
9095 cmp_op1 = operands[2];
9100 if (GET_MODE (operands[0]) == SFmode)
9102 return "ucomiss\t{%1, %0|%0, %1}";
9104 return "comiss\t{%1, %0|%0, %1}";
9107 return "ucomisd\t{%1, %0|%0, %1}";
9109 return "comisd\t{%1, %0|%0, %1}";
9112 gcc_assert (STACK_TOP_P (cmp_op0));
9114 stack_top_dies = find_regno_note (insn, REG_DEAD, FIRST_STACK_REG) != 0;
9116 if (cmp_op1 == CONST0_RTX (GET_MODE (cmp_op1)))
9120 output_asm_insn ("ftst\n\tfnstsw\t%0", operands);
9121 return output_387_ffreep (operands, 1);
9124 return "ftst\n\tfnstsw\t%0";
9127 if (STACK_REG_P (cmp_op1)
9129 && find_regno_note (insn, REG_DEAD, REGNO (cmp_op1))
9130 && REGNO (cmp_op1) != FIRST_STACK_REG)
9132 /* If both the top of the 387 stack dies, and the other operand
9133 is also a stack register that dies, then this must be a
9134 `fcompp' float compare */
9138 /* There is no double popping fcomi variant. Fortunately,
9139 eflags is immune from the fstp's cc clobbering. */
9141 output_asm_insn ("fucomip\t{%y1, %0|%0, %y1}", operands);
9143 output_asm_insn ("fcomip\t{%y1, %0|%0, %y1}", operands);
9144 return output_387_ffreep (operands, 0);
9149 return "fucompp\n\tfnstsw\t%0";
9151 return "fcompp\n\tfnstsw\t%0";
9156 /* Encoded here as eflags_p | intmode | unordered_p | stack_top_dies. */
9158 static const char * const alt[16] =
9160 "fcom%z2\t%y2\n\tfnstsw\t%0",
9161 "fcomp%z2\t%y2\n\tfnstsw\t%0",
9162 "fucom%z2\t%y2\n\tfnstsw\t%0",
9163 "fucomp%z2\t%y2\n\tfnstsw\t%0",
9165 "ficom%z2\t%y2\n\tfnstsw\t%0",
9166 "ficomp%z2\t%y2\n\tfnstsw\t%0",
9170 "fcomi\t{%y1, %0|%0, %y1}",
9171 "fcomip\t{%y1, %0|%0, %y1}",
9172 "fucomi\t{%y1, %0|%0, %y1}",
9173 "fucomip\t{%y1, %0|%0, %y1}",
9184 mask = eflags_p << 3;
9185 mask |= (GET_MODE_CLASS (GET_MODE (cmp_op1)) == MODE_INT) << 2;
9186 mask |= unordered_p << 1;
9187 mask |= stack_top_dies;
9189 gcc_assert (mask < 16);
9198 ix86_output_addr_vec_elt (FILE *file, int value)
9200 const char *directive = ASM_LONG;
9204 directive = ASM_QUAD;
9206 gcc_assert (!TARGET_64BIT);
9209 fprintf (file, "%s%s%d\n", directive, LPREFIX, value);
9213 ix86_output_addr_diff_elt (FILE *file, int value, int rel)
9216 fprintf (file, "%s%s%d-%s%d\n",
9217 ASM_LONG, LPREFIX, value, LPREFIX, rel);
9218 else if (HAVE_AS_GOTOFF_IN_DATA)
9219 fprintf (file, "%s%s%d@GOTOFF\n", ASM_LONG, LPREFIX, value);
9221 else if (TARGET_MACHO)
9223 fprintf (file, "%s%s%d-", ASM_LONG, LPREFIX, value);
9224 machopic_output_function_base_name (file);
9225 fprintf(file, "\n");
9229 asm_fprintf (file, "%s%U%s+[.-%s%d]\n",
9230 ASM_LONG, GOT_SYMBOL_NAME, LPREFIX, value);
9233 /* Generate either "mov $0, reg" or "xor reg, reg", as appropriate
9237 ix86_expand_clear (rtx dest)
9241 /* We play register width games, which are only valid after reload. */
9242 gcc_assert (reload_completed);
9244 /* Avoid HImode and its attendant prefix byte. */
9245 if (GET_MODE_SIZE (GET_MODE (dest)) < 4)
9246 dest = gen_rtx_REG (SImode, REGNO (dest));
9248 tmp = gen_rtx_SET (VOIDmode, dest, const0_rtx);
9250 /* This predicate should match that for movsi_xor and movdi_xor_rex64. */
9251 if (reload_completed && (!TARGET_USE_MOV0 || optimize_size))
9253 rtx clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, 17));
9254 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, tmp, clob));
9260 /* X is an unchanging MEM. If it is a constant pool reference, return
9261 the constant pool rtx, else NULL. */
9264 maybe_get_pool_constant (rtx x)
9266 x = ix86_delegitimize_address (XEXP (x, 0));
9268 if (GET_CODE (x) == SYMBOL_REF && CONSTANT_POOL_ADDRESS_P (x))
9269 return get_pool_constant (x);
9275 ix86_expand_move (enum machine_mode mode, rtx operands[])
9277 int strict = (reload_in_progress || reload_completed);
9279 enum tls_model model;
9284 if (GET_CODE (op1) == SYMBOL_REF)
9286 model = SYMBOL_REF_TLS_MODEL (op1);
9289 op1 = legitimize_tls_address (op1, model, true);
9290 op1 = force_operand (op1, op0);
9295 else if (GET_CODE (op1) == CONST
9296 && GET_CODE (XEXP (op1, 0)) == PLUS
9297 && GET_CODE (XEXP (XEXP (op1, 0), 0)) == SYMBOL_REF)
9299 model = SYMBOL_REF_TLS_MODEL (XEXP (XEXP (op1, 0), 0));
9302 rtx addend = XEXP (XEXP (op1, 0), 1);
9303 op1 = legitimize_tls_address (XEXP (XEXP (op1, 0), 0), model, true);
9304 op1 = force_operand (op1, NULL);
9305 op1 = expand_simple_binop (Pmode, PLUS, op1, addend,
9306 op0, 1, OPTAB_DIRECT);
9312 if (flag_pic && mode == Pmode && symbolic_operand (op1, Pmode))
9314 if (TARGET_MACHO && !TARGET_64BIT)
9319 rtx temp = ((reload_in_progress
9320 || ((op0 && GET_CODE (op0) == REG)
9322 ? op0 : gen_reg_rtx (Pmode));
9323 op1 = machopic_indirect_data_reference (op1, temp);
9324 op1 = machopic_legitimize_pic_address (op1, mode,
9325 temp == op1 ? 0 : temp);
9327 else if (MACHOPIC_INDIRECT)
9328 op1 = machopic_indirect_data_reference (op1, 0);
9335 if (GET_CODE (op0) == MEM)
9336 op1 = force_reg (Pmode, op1);
9338 op1 = legitimize_address (op1, op1, Pmode);
9343 if (GET_CODE (op0) == MEM
9344 && (PUSH_ROUNDING (GET_MODE_SIZE (mode)) != GET_MODE_SIZE (mode)
9345 || !push_operand (op0, mode))
9346 && GET_CODE (op1) == MEM)
9347 op1 = force_reg (mode, op1);
9349 if (push_operand (op0, mode)
9350 && ! general_no_elim_operand (op1, mode))
9351 op1 = copy_to_mode_reg (mode, op1);
9353 /* Force large constants in 64bit compilation into register
9354 to get them CSEed. */
9355 if (TARGET_64BIT && mode == DImode
9356 && immediate_operand (op1, mode)
9357 && !x86_64_zext_immediate_operand (op1, VOIDmode)
9358 && !register_operand (op0, mode)
9359 && optimize && !reload_completed && !reload_in_progress)
9360 op1 = copy_to_mode_reg (mode, op1);
9362 if (FLOAT_MODE_P (mode))
9364 /* If we are loading a floating point constant to a register,
9365 force the value to memory now, since we'll get better code
9366 out the back end. */
9370 else if (GET_CODE (op1) == CONST_DOUBLE)
9372 op1 = validize_mem (force_const_mem (mode, op1));
9373 if (!register_operand (op0, mode))
9375 rtx temp = gen_reg_rtx (mode);
9376 emit_insn (gen_rtx_SET (VOIDmode, temp, op1));
9377 emit_move_insn (op0, temp);
9384 emit_insn (gen_rtx_SET (VOIDmode, op0, op1));
9388 ix86_expand_vector_move (enum machine_mode mode, rtx operands[])
9390 rtx op0 = operands[0], op1 = operands[1];
9392 /* Force constants other than zero into memory. We do not know how
9393 the instructions used to build constants modify the upper 64 bits
9394 of the register, once we have that information we may be able
9395 to handle some of them more efficiently. */
9396 if ((reload_in_progress | reload_completed) == 0
9397 && register_operand (op0, mode)
9399 && standard_sse_constant_p (op1) <= 0)
9400 op1 = validize_mem (force_const_mem (mode, op1));
9402 /* Make operand1 a register if it isn't already. */
9404 && !register_operand (op0, mode)
9405 && !register_operand (op1, mode))
9407 emit_move_insn (op0, force_reg (GET_MODE (op0), op1));
9411 emit_insn (gen_rtx_SET (VOIDmode, op0, op1));
9414 /* Implement the movmisalign patterns for SSE. Non-SSE modes go
9415 straight to ix86_expand_vector_move. */
9418 ix86_expand_vector_move_misalign (enum machine_mode mode, rtx operands[])
9427 /* If we're optimizing for size, movups is the smallest. */
9430 op0 = gen_lowpart (V4SFmode, op0);
9431 op1 = gen_lowpart (V4SFmode, op1);
9432 emit_insn (gen_sse_movups (op0, op1));
9436 /* ??? If we have typed data, then it would appear that using
9437 movdqu is the only way to get unaligned data loaded with
9439 if (TARGET_SSE2 && GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
9441 op0 = gen_lowpart (V16QImode, op0);
9442 op1 = gen_lowpart (V16QImode, op1);
9443 emit_insn (gen_sse2_movdqu (op0, op1));
9447 if (TARGET_SSE2 && mode == V2DFmode)
9451 if (TARGET_SSE_UNALIGNED_MOVE_OPTIMAL)
9453 op0 = gen_lowpart (V2DFmode, op0);
9454 op1 = gen_lowpart (V2DFmode, op1);
9455 emit_insn (gen_sse2_movupd (op0, op1));
9459 /* When SSE registers are split into halves, we can avoid
9460 writing to the top half twice. */
9461 if (TARGET_SSE_SPLIT_REGS)
9463 emit_insn (gen_rtx_CLOBBER (VOIDmode, op0));
9468 /* ??? Not sure about the best option for the Intel chips.
9469 The following would seem to satisfy; the register is
9470 entirely cleared, breaking the dependency chain. We
9471 then store to the upper half, with a dependency depth
9472 of one. A rumor has it that Intel recommends two movsd
9473 followed by an unpacklpd, but this is unconfirmed. And
9474 given that the dependency depth of the unpacklpd would
9475 still be one, I'm not sure why this would be better. */
9476 zero = CONST0_RTX (V2DFmode);
9479 m = adjust_address (op1, DFmode, 0);
9480 emit_insn (gen_sse2_loadlpd (op0, zero, m));
9481 m = adjust_address (op1, DFmode, 8);
9482 emit_insn (gen_sse2_loadhpd (op0, op0, m));
9486 if (TARGET_SSE_UNALIGNED_MOVE_OPTIMAL)
9488 op0 = gen_lowpart (V4SFmode, op0);
9489 op1 = gen_lowpart (V4SFmode, op1);
9490 emit_insn (gen_sse_movups (op0, op1));
9494 if (TARGET_SSE_PARTIAL_REG_DEPENDENCY)
9495 emit_move_insn (op0, CONST0_RTX (mode));
9497 emit_insn (gen_rtx_CLOBBER (VOIDmode, op0));
9499 if (mode != V4SFmode)
9500 op0 = gen_lowpart (V4SFmode, op0);
9501 m = adjust_address (op1, V2SFmode, 0);
9502 emit_insn (gen_sse_loadlps (op0, op0, m));
9503 m = adjust_address (op1, V2SFmode, 8);
9504 emit_insn (gen_sse_loadhps (op0, op0, m));
9507 else if (MEM_P (op0))
9509 /* If we're optimizing for size, movups is the smallest. */
9512 op0 = gen_lowpart (V4SFmode, op0);
9513 op1 = gen_lowpart (V4SFmode, op1);
9514 emit_insn (gen_sse_movups (op0, op1));
9518 /* ??? Similar to above, only less clear because of quote
9519 typeless stores unquote. */
9520 if (TARGET_SSE2 && !TARGET_SSE_TYPELESS_STORES
9521 && GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
9523 op0 = gen_lowpart (V16QImode, op0);
9524 op1 = gen_lowpart (V16QImode, op1);
9525 emit_insn (gen_sse2_movdqu (op0, op1));
9529 if (TARGET_SSE2 && mode == V2DFmode)
9531 m = adjust_address (op0, DFmode, 0);
9532 emit_insn (gen_sse2_storelpd (m, op1));
9533 m = adjust_address (op0, DFmode, 8);
9534 emit_insn (gen_sse2_storehpd (m, op1));
9538 if (mode != V4SFmode)
9539 op1 = gen_lowpart (V4SFmode, op1);
9540 m = adjust_address (op0, V2SFmode, 0);
9541 emit_insn (gen_sse_storelps (m, op1));
9542 m = adjust_address (op0, V2SFmode, 8);
9543 emit_insn (gen_sse_storehps (m, op1));
9550 /* Expand a push in MODE. This is some mode for which we do not support
9551 proper push instructions, at least from the registers that we expect
9552 the value to live in. */
9555 ix86_expand_push (enum machine_mode mode, rtx x)
9559 tmp = expand_simple_binop (Pmode, PLUS, stack_pointer_rtx,
9560 GEN_INT (-GET_MODE_SIZE (mode)),
9561 stack_pointer_rtx, 1, OPTAB_DIRECT);
9562 if (tmp != stack_pointer_rtx)
9563 emit_move_insn (stack_pointer_rtx, tmp);
9565 tmp = gen_rtx_MEM (mode, stack_pointer_rtx);
9566 emit_move_insn (tmp, x);
9569 /* Fix up OPERANDS to satisfy ix86_binary_operator_ok. Return the
9570 destination to use for the operation. If different from the true
9571 destination in operands[0], a copy operation will be required. */
9574 ix86_fixup_binary_operands (enum rtx_code code, enum machine_mode mode,
9577 int matching_memory;
9578 rtx src1, src2, dst;
9584 /* Recognize <var1> = <value> <op> <var1> for commutative operators */
9585 if (GET_RTX_CLASS (code) == RTX_COMM_ARITH
9586 && (rtx_equal_p (dst, src2)
9587 || immediate_operand (src1, mode)))
9594 /* If the destination is memory, and we do not have matching source
9595 operands, do things in registers. */
9596 matching_memory = 0;
9597 if (GET_CODE (dst) == MEM)
9599 if (rtx_equal_p (dst, src1))
9600 matching_memory = 1;
9601 else if (GET_RTX_CLASS (code) == RTX_COMM_ARITH
9602 && rtx_equal_p (dst, src2))
9603 matching_memory = 2;
9605 dst = gen_reg_rtx (mode);
9608 /* Both source operands cannot be in memory. */
9609 if (GET_CODE (src1) == MEM && GET_CODE (src2) == MEM)
9611 if (matching_memory != 2)
9612 src2 = force_reg (mode, src2);
9614 src1 = force_reg (mode, src1);
9617 /* If the operation is not commutable, source 1 cannot be a constant
9618 or non-matching memory. */
9619 if ((CONSTANT_P (src1)
9620 || (!matching_memory && GET_CODE (src1) == MEM))
9621 && GET_RTX_CLASS (code) != RTX_COMM_ARITH)
9622 src1 = force_reg (mode, src1);
9624 src1 = operands[1] = src1;
9625 src2 = operands[2] = src2;
9629 /* Similarly, but assume that the destination has already been
9633 ix86_fixup_binary_operands_no_copy (enum rtx_code code,
9634 enum machine_mode mode, rtx operands[])
9636 rtx dst = ix86_fixup_binary_operands (code, mode, operands);
9637 gcc_assert (dst == operands[0]);
9640 /* Attempt to expand a binary operator. Make the expansion closer to the
9641 actual machine, then just general_operand, which will allow 3 separate
9642 memory references (one output, two input) in a single insn. */
9645 ix86_expand_binary_operator (enum rtx_code code, enum machine_mode mode,
9648 rtx src1, src2, dst, op, clob;
9650 dst = ix86_fixup_binary_operands (code, mode, operands);
9654 /* Emit the instruction. */
9656 op = gen_rtx_SET (VOIDmode, dst, gen_rtx_fmt_ee (code, mode, src1, src2));
9657 if (reload_in_progress)
9659 /* Reload doesn't know about the flags register, and doesn't know that
9660 it doesn't want to clobber it. We can only do this with PLUS. */
9661 gcc_assert (code == PLUS);
9666 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
9667 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clob)));
9670 /* Fix up the destination if needed. */
9671 if (dst != operands[0])
9672 emit_move_insn (operands[0], dst);
9675 /* Return TRUE or FALSE depending on whether the binary operator meets the
9676 appropriate constraints. */
9679 ix86_binary_operator_ok (enum rtx_code code,
9680 enum machine_mode mode ATTRIBUTE_UNUSED,
9683 /* Both source operands cannot be in memory. */
9684 if (GET_CODE (operands[1]) == MEM && GET_CODE (operands[2]) == MEM)
9686 /* If the operation is not commutable, source 1 cannot be a constant. */
9687 if (CONSTANT_P (operands[1]) && GET_RTX_CLASS (code) != RTX_COMM_ARITH)
9689 /* If the destination is memory, we must have a matching source operand. */
9690 if (GET_CODE (operands[0]) == MEM
9691 && ! (rtx_equal_p (operands[0], operands[1])
9692 || (GET_RTX_CLASS (code) == RTX_COMM_ARITH
9693 && rtx_equal_p (operands[0], operands[2]))))
9695 /* If the operation is not commutable and the source 1 is memory, we must
9696 have a matching destination. */
9697 if (GET_CODE (operands[1]) == MEM
9698 && GET_RTX_CLASS (code) != RTX_COMM_ARITH
9699 && ! rtx_equal_p (operands[0], operands[1]))
9704 /* Attempt to expand a unary operator. Make the expansion closer to the
9705 actual machine, then just general_operand, which will allow 2 separate
9706 memory references (one output, one input) in a single insn. */
9709 ix86_expand_unary_operator (enum rtx_code code, enum machine_mode mode,
9712 int matching_memory;
9713 rtx src, dst, op, clob;
9718 /* If the destination is memory, and we do not have matching source
9719 operands, do things in registers. */
9720 matching_memory = 0;
9723 if (rtx_equal_p (dst, src))
9724 matching_memory = 1;
9726 dst = gen_reg_rtx (mode);
9729 /* When source operand is memory, destination must match. */
9730 if (MEM_P (src) && !matching_memory)
9731 src = force_reg (mode, src);
9733 /* Emit the instruction. */
9735 op = gen_rtx_SET (VOIDmode, dst, gen_rtx_fmt_e (code, mode, src));
9736 if (reload_in_progress || code == NOT)
9738 /* Reload doesn't know about the flags register, and doesn't know that
9739 it doesn't want to clobber it. */
9740 gcc_assert (code == NOT);
9745 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
9746 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clob)));
9749 /* Fix up the destination if needed. */
9750 if (dst != operands[0])
9751 emit_move_insn (operands[0], dst);
9754 /* Return TRUE or FALSE depending on whether the unary operator meets the
9755 appropriate constraints. */
9758 ix86_unary_operator_ok (enum rtx_code code ATTRIBUTE_UNUSED,
9759 enum machine_mode mode ATTRIBUTE_UNUSED,
9760 rtx operands[2] ATTRIBUTE_UNUSED)
9762 /* If one of operands is memory, source and destination must match. */
9763 if ((GET_CODE (operands[0]) == MEM
9764 || GET_CODE (operands[1]) == MEM)
9765 && ! rtx_equal_p (operands[0], operands[1]))
9770 /* A subroutine of ix86_expand_fp_absneg_operator and copysign expanders.
9771 Create a mask for the sign bit in MODE for an SSE register. If VECT is
9772 true, then replicate the mask for all elements of the vector register.
9773 If INVERT is true, then create a mask excluding the sign bit. */
9776 ix86_build_signbit_mask (enum machine_mode mode, bool vect, bool invert)
9778 enum machine_mode vec_mode;
9779 HOST_WIDE_INT hi, lo;
9784 /* Find the sign bit, sign extended to 2*HWI. */
9786 lo = 0x80000000, hi = lo < 0;
9787 else if (HOST_BITS_PER_WIDE_INT >= 64)
9788 lo = (HOST_WIDE_INT)1 << shift, hi = -1;
9790 lo = 0, hi = (HOST_WIDE_INT)1 << (shift - HOST_BITS_PER_WIDE_INT);
9795 /* Force this value into the low part of a fp vector constant. */
9796 mask = immed_double_const (lo, hi, mode == SFmode ? SImode : DImode);
9797 mask = gen_lowpart (mode, mask);
9802 v = gen_rtvec (4, mask, mask, mask, mask);
9804 v = gen_rtvec (4, mask, CONST0_RTX (SFmode),
9805 CONST0_RTX (SFmode), CONST0_RTX (SFmode));
9806 vec_mode = V4SFmode;
9811 v = gen_rtvec (2, mask, mask);
9813 v = gen_rtvec (2, mask, CONST0_RTX (DFmode));
9814 vec_mode = V2DFmode;
9817 return force_reg (vec_mode, gen_rtx_CONST_VECTOR (vec_mode, v));
9820 /* Generate code for floating point ABS or NEG. */
9823 ix86_expand_fp_absneg_operator (enum rtx_code code, enum machine_mode mode,
9826 rtx mask, set, use, clob, dst, src;
9827 bool matching_memory;
9828 bool use_sse = false;
9829 bool vector_mode = VECTOR_MODE_P (mode);
9830 enum machine_mode elt_mode = mode;
9834 elt_mode = GET_MODE_INNER (mode);
9837 else if (TARGET_SSE_MATH)
9838 use_sse = SSE_FLOAT_MODE_P (mode);
9840 /* NEG and ABS performed with SSE use bitwise mask operations.
9841 Create the appropriate mask now. */
9843 mask = ix86_build_signbit_mask (elt_mode, vector_mode, code == ABS);
9850 /* If the destination is memory, and we don't have matching source
9851 operands or we're using the x87, do things in registers. */
9852 matching_memory = false;
9855 if (use_sse && rtx_equal_p (dst, src))
9856 matching_memory = true;
9858 dst = gen_reg_rtx (mode);
9860 if (MEM_P (src) && !matching_memory)
9861 src = force_reg (mode, src);
9865 set = gen_rtx_fmt_ee (code == NEG ? XOR : AND, mode, src, mask);
9866 set = gen_rtx_SET (VOIDmode, dst, set);
9871 set = gen_rtx_fmt_e (code, mode, src);
9872 set = gen_rtx_SET (VOIDmode, dst, set);
9875 use = gen_rtx_USE (VOIDmode, mask);
9876 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
9877 emit_insn (gen_rtx_PARALLEL (VOIDmode,
9878 gen_rtvec (3, set, use, clob)));
9884 if (dst != operands[0])
9885 emit_move_insn (operands[0], dst);
9888 /* Expand a copysign operation. Special case operand 0 being a constant. */
9891 ix86_expand_copysign (rtx operands[])
9893 enum machine_mode mode, vmode;
9894 rtx dest, op0, op1, mask, nmask;
9900 mode = GET_MODE (dest);
9901 vmode = mode == SFmode ? V4SFmode : V2DFmode;
9903 if (GET_CODE (op0) == CONST_DOUBLE)
9907 if (real_isneg (CONST_DOUBLE_REAL_VALUE (op0)))
9908 op0 = simplify_unary_operation (ABS, mode, op0, mode);
9910 if (op0 == CONST0_RTX (mode))
9911 op0 = CONST0_RTX (vmode);
9915 v = gen_rtvec (4, op0, CONST0_RTX (SFmode),
9916 CONST0_RTX (SFmode), CONST0_RTX (SFmode));
9918 v = gen_rtvec (2, op0, CONST0_RTX (DFmode));
9919 op0 = force_reg (vmode, gen_rtx_CONST_VECTOR (vmode, v));
9922 mask = ix86_build_signbit_mask (mode, 0, 0);
9925 emit_insn (gen_copysignsf3_const (dest, op0, op1, mask));
9927 emit_insn (gen_copysigndf3_const (dest, op0, op1, mask));
9931 nmask = ix86_build_signbit_mask (mode, 0, 1);
9932 mask = ix86_build_signbit_mask (mode, 0, 0);
9935 emit_insn (gen_copysignsf3_var (dest, NULL, op0, op1, nmask, mask));
9937 emit_insn (gen_copysigndf3_var (dest, NULL, op0, op1, nmask, mask));
9941 /* Deconstruct a copysign operation into bit masks. Operand 0 is known to
9942 be a constant, and so has already been expanded into a vector constant. */
9945 ix86_split_copysign_const (rtx operands[])
9947 enum machine_mode mode, vmode;
9948 rtx dest, op0, op1, mask, x;
9955 mode = GET_MODE (dest);
9956 vmode = GET_MODE (mask);
9958 dest = simplify_gen_subreg (vmode, dest, mode, 0);
9959 x = gen_rtx_AND (vmode, dest, mask);
9960 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
9962 if (op0 != CONST0_RTX (vmode))
9964 x = gen_rtx_IOR (vmode, dest, op0);
9965 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
9969 /* Deconstruct a copysign operation into bit masks. Operand 0 is variable,
9970 so we have to do two masks. */
9973 ix86_split_copysign_var (rtx operands[])
9975 enum machine_mode mode, vmode;
9976 rtx dest, scratch, op0, op1, mask, nmask, x;
9979 scratch = operands[1];
9982 nmask = operands[4];
9985 mode = GET_MODE (dest);
9986 vmode = GET_MODE (mask);
9988 if (rtx_equal_p (op0, op1))
9990 /* Shouldn't happen often (it's useless, obviously), but when it does
9991 we'd generate incorrect code if we continue below. */
9992 emit_move_insn (dest, op0);
9996 if (REG_P (mask) && REGNO (dest) == REGNO (mask)) /* alternative 0 */
9998 gcc_assert (REGNO (op1) == REGNO (scratch));
10000 x = gen_rtx_AND (vmode, scratch, mask);
10001 emit_insn (gen_rtx_SET (VOIDmode, scratch, x));
10004 op0 = simplify_gen_subreg (vmode, op0, mode, 0);
10005 x = gen_rtx_NOT (vmode, dest);
10006 x = gen_rtx_AND (vmode, x, op0);
10007 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
10011 if (REGNO (op1) == REGNO (scratch)) /* alternative 1,3 */
10013 x = gen_rtx_AND (vmode, scratch, mask);
10015 else /* alternative 2,4 */
10017 gcc_assert (REGNO (mask) == REGNO (scratch));
10018 op1 = simplify_gen_subreg (vmode, op1, mode, 0);
10019 x = gen_rtx_AND (vmode, scratch, op1);
10021 emit_insn (gen_rtx_SET (VOIDmode, scratch, x));
10023 if (REGNO (op0) == REGNO (dest)) /* alternative 1,2 */
10025 dest = simplify_gen_subreg (vmode, op0, mode, 0);
10026 x = gen_rtx_AND (vmode, dest, nmask);
10028 else /* alternative 3,4 */
10030 gcc_assert (REGNO (nmask) == REGNO (dest));
10032 op0 = simplify_gen_subreg (vmode, op0, mode, 0);
10033 x = gen_rtx_AND (vmode, dest, op0);
10035 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
10038 x = gen_rtx_IOR (vmode, dest, scratch);
10039 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
10042 /* Return TRUE or FALSE depending on whether the first SET in INSN
10043 has source and destination with matching CC modes, and that the
10044 CC mode is at least as constrained as REQ_MODE. */
10047 ix86_match_ccmode (rtx insn, enum machine_mode req_mode)
10050 enum machine_mode set_mode;
10052 set = PATTERN (insn);
10053 if (GET_CODE (set) == PARALLEL)
10054 set = XVECEXP (set, 0, 0);
10055 gcc_assert (GET_CODE (set) == SET);
10056 gcc_assert (GET_CODE (SET_SRC (set)) == COMPARE);
10058 set_mode = GET_MODE (SET_DEST (set));
10062 if (req_mode != CCNOmode
10063 && (req_mode != CCmode
10064 || XEXP (SET_SRC (set), 1) != const0_rtx))
10068 if (req_mode == CCGCmode)
10072 if (req_mode == CCGOCmode || req_mode == CCNOmode)
10076 if (req_mode == CCZmode)
10083 gcc_unreachable ();
10086 return (GET_MODE (SET_SRC (set)) == set_mode);
10089 /* Generate insn patterns to do an integer compare of OPERANDS. */
10092 ix86_expand_int_compare (enum rtx_code code, rtx op0, rtx op1)
10094 enum machine_mode cmpmode;
10097 cmpmode = SELECT_CC_MODE (code, op0, op1);
10098 flags = gen_rtx_REG (cmpmode, FLAGS_REG);
10100 /* This is very simple, but making the interface the same as in the
10101 FP case makes the rest of the code easier. */
10102 tmp = gen_rtx_COMPARE (cmpmode, op0, op1);
10103 emit_insn (gen_rtx_SET (VOIDmode, flags, tmp));
10105 /* Return the test that should be put into the flags user, i.e.
10106 the bcc, scc, or cmov instruction. */
10107 return gen_rtx_fmt_ee (code, VOIDmode, flags, const0_rtx);
10110 /* Figure out whether to use ordered or unordered fp comparisons.
10111 Return the appropriate mode to use. */
10114 ix86_fp_compare_mode (enum rtx_code code ATTRIBUTE_UNUSED)
10116 /* ??? In order to make all comparisons reversible, we do all comparisons
10117 non-trapping when compiling for IEEE. Once gcc is able to distinguish
10118 all forms trapping and nontrapping comparisons, we can make inequality
10119 comparisons trapping again, since it results in better code when using
10120 FCOM based compares. */
10121 return TARGET_IEEE_FP ? CCFPUmode : CCFPmode;
10125 ix86_cc_mode (enum rtx_code code, rtx op0, rtx op1)
10127 if (SCALAR_FLOAT_MODE_P (GET_MODE (op0)))
10128 return ix86_fp_compare_mode (code);
10131 /* Only zero flag is needed. */
10132 case EQ: /* ZF=0 */
10133 case NE: /* ZF!=0 */
10135 /* Codes needing carry flag. */
10136 case GEU: /* CF=0 */
10137 case GTU: /* CF=0 & ZF=0 */
10138 case LTU: /* CF=1 */
10139 case LEU: /* CF=1 | ZF=1 */
10141 /* Codes possibly doable only with sign flag when
10142 comparing against zero. */
10143 case GE: /* SF=OF or SF=0 */
10144 case LT: /* SF<>OF or SF=1 */
10145 if (op1 == const0_rtx)
10148 /* For other cases Carry flag is not required. */
10150 /* Codes doable only with sign flag when comparing
10151 against zero, but we miss jump instruction for it
10152 so we need to use relational tests against overflow
10153 that thus needs to be zero. */
10154 case GT: /* ZF=0 & SF=OF */
10155 case LE: /* ZF=1 | SF<>OF */
10156 if (op1 == const0_rtx)
10160 /* strcmp pattern do (use flags) and combine may ask us for proper
10165 gcc_unreachable ();
10169 /* Return the fixed registers used for condition codes. */
10172 ix86_fixed_condition_code_regs (unsigned int *p1, unsigned int *p2)
10179 /* If two condition code modes are compatible, return a condition code
10180 mode which is compatible with both. Otherwise, return
10183 static enum machine_mode
10184 ix86_cc_modes_compatible (enum machine_mode m1, enum machine_mode m2)
10189 if (GET_MODE_CLASS (m1) != MODE_CC || GET_MODE_CLASS (m2) != MODE_CC)
10192 if ((m1 == CCGCmode && m2 == CCGOCmode)
10193 || (m1 == CCGOCmode && m2 == CCGCmode))
10199 gcc_unreachable ();
10221 /* These are only compatible with themselves, which we already
10227 /* Return true if we should use an FCOMI instruction for this fp comparison. */
10230 ix86_use_fcomi_compare (enum rtx_code code ATTRIBUTE_UNUSED)
10232 enum rtx_code swapped_code = swap_condition (code);
10233 return ((ix86_fp_comparison_cost (code) == ix86_fp_comparison_fcomi_cost (code))
10234 || (ix86_fp_comparison_cost (swapped_code)
10235 == ix86_fp_comparison_fcomi_cost (swapped_code)));
10238 /* Swap, force into registers, or otherwise massage the two operands
10239 to a fp comparison. The operands are updated in place; the new
10240 comparison code is returned. */
10242 static enum rtx_code
10243 ix86_prepare_fp_compare_args (enum rtx_code code, rtx *pop0, rtx *pop1)
10245 enum machine_mode fpcmp_mode = ix86_fp_compare_mode (code);
10246 rtx op0 = *pop0, op1 = *pop1;
10247 enum machine_mode op_mode = GET_MODE (op0);
10248 int is_sse = TARGET_SSE_MATH && SSE_FLOAT_MODE_P (op_mode);
10250 /* All of the unordered compare instructions only work on registers.
10251 The same is true of the fcomi compare instructions. The XFmode
10252 compare instructions require registers except when comparing
10253 against zero or when converting operand 1 from fixed point to
10257 && (fpcmp_mode == CCFPUmode
10258 || (op_mode == XFmode
10259 && ! (standard_80387_constant_p (op0) == 1
10260 || standard_80387_constant_p (op1) == 1)
10261 && GET_CODE (op1) != FLOAT)
10262 || ix86_use_fcomi_compare (code)))
10264 op0 = force_reg (op_mode, op0);
10265 op1 = force_reg (op_mode, op1);
10269 /* %%% We only allow op1 in memory; op0 must be st(0). So swap
10270 things around if they appear profitable, otherwise force op0
10271 into a register. */
10273 if (standard_80387_constant_p (op0) == 0
10274 || (GET_CODE (op0) == MEM
10275 && ! (standard_80387_constant_p (op1) == 0
10276 || GET_CODE (op1) == MEM)))
10279 tmp = op0, op0 = op1, op1 = tmp;
10280 code = swap_condition (code);
10283 if (GET_CODE (op0) != REG)
10284 op0 = force_reg (op_mode, op0);
10286 if (CONSTANT_P (op1))
10288 int tmp = standard_80387_constant_p (op1);
10290 op1 = validize_mem (force_const_mem (op_mode, op1));
10294 op1 = force_reg (op_mode, op1);
10297 op1 = force_reg (op_mode, op1);
10301 /* Try to rearrange the comparison to make it cheaper. */
10302 if (ix86_fp_comparison_cost (code)
10303 > ix86_fp_comparison_cost (swap_condition (code))
10304 && (GET_CODE (op1) == REG || !no_new_pseudos))
10307 tmp = op0, op0 = op1, op1 = tmp;
10308 code = swap_condition (code);
10309 if (GET_CODE (op0) != REG)
10310 op0 = force_reg (op_mode, op0);
10318 /* Convert comparison codes we use to represent FP comparison to integer
10319 code that will result in proper branch. Return UNKNOWN if no such code
10323 ix86_fp_compare_code_to_integer (enum rtx_code code)
10352 /* Split comparison code CODE into comparisons we can do using branch
10353 instructions. BYPASS_CODE is comparison code for branch that will
10354 branch around FIRST_CODE and SECOND_CODE. If some of branches
10355 is not required, set value to UNKNOWN.
10356 We never require more than two branches. */
10359 ix86_fp_comparison_codes (enum rtx_code code, enum rtx_code *bypass_code,
10360 enum rtx_code *first_code,
10361 enum rtx_code *second_code)
10363 *first_code = code;
10364 *bypass_code = UNKNOWN;
10365 *second_code = UNKNOWN;
10367 /* The fcomi comparison sets flags as follows:
10377 case GT: /* GTU - CF=0 & ZF=0 */
10378 case GE: /* GEU - CF=0 */
10379 case ORDERED: /* PF=0 */
10380 case UNORDERED: /* PF=1 */
10381 case UNEQ: /* EQ - ZF=1 */
10382 case UNLT: /* LTU - CF=1 */
10383 case UNLE: /* LEU - CF=1 | ZF=1 */
10384 case LTGT: /* EQ - ZF=0 */
10386 case LT: /* LTU - CF=1 - fails on unordered */
10387 *first_code = UNLT;
10388 *bypass_code = UNORDERED;
10390 case LE: /* LEU - CF=1 | ZF=1 - fails on unordered */
10391 *first_code = UNLE;
10392 *bypass_code = UNORDERED;
10394 case EQ: /* EQ - ZF=1 - fails on unordered */
10395 *first_code = UNEQ;
10396 *bypass_code = UNORDERED;
10398 case NE: /* NE - ZF=0 - fails on unordered */
10399 *first_code = LTGT;
10400 *second_code = UNORDERED;
10402 case UNGE: /* GEU - CF=0 - fails on unordered */
10404 *second_code = UNORDERED;
10406 case UNGT: /* GTU - CF=0 & ZF=0 - fails on unordered */
10408 *second_code = UNORDERED;
10411 gcc_unreachable ();
10413 if (!TARGET_IEEE_FP)
10415 *second_code = UNKNOWN;
10416 *bypass_code = UNKNOWN;
10420 /* Return cost of comparison done fcom + arithmetics operations on AX.
10421 All following functions do use number of instructions as a cost metrics.
10422 In future this should be tweaked to compute bytes for optimize_size and
10423 take into account performance of various instructions on various CPUs. */
10425 ix86_fp_comparison_arithmetics_cost (enum rtx_code code)
10427 if (!TARGET_IEEE_FP)
10429 /* The cost of code output by ix86_expand_fp_compare. */
10453 gcc_unreachable ();
10457 /* Return cost of comparison done using fcomi operation.
10458 See ix86_fp_comparison_arithmetics_cost for the metrics. */
10460 ix86_fp_comparison_fcomi_cost (enum rtx_code code)
10462 enum rtx_code bypass_code, first_code, second_code;
10463 /* Return arbitrarily high cost when instruction is not supported - this
10464 prevents gcc from using it. */
10467 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
10468 return (bypass_code != UNKNOWN || second_code != UNKNOWN) + 2;
10471 /* Return cost of comparison done using sahf operation.
10472 See ix86_fp_comparison_arithmetics_cost for the metrics. */
10474 ix86_fp_comparison_sahf_cost (enum rtx_code code)
10476 enum rtx_code bypass_code, first_code, second_code;
10477 /* Return arbitrarily high cost when instruction is not preferred - this
10478 avoids gcc from using it. */
10479 if (!TARGET_USE_SAHF && !optimize_size)
10481 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
10482 return (bypass_code != UNKNOWN || second_code != UNKNOWN) + 3;
10485 /* Compute cost of the comparison done using any method.
10486 See ix86_fp_comparison_arithmetics_cost for the metrics. */
10488 ix86_fp_comparison_cost (enum rtx_code code)
10490 int fcomi_cost, sahf_cost, arithmetics_cost = 1024;
10493 fcomi_cost = ix86_fp_comparison_fcomi_cost (code);
10494 sahf_cost = ix86_fp_comparison_sahf_cost (code);
10496 min = arithmetics_cost = ix86_fp_comparison_arithmetics_cost (code);
10497 if (min > sahf_cost)
10499 if (min > fcomi_cost)
10504 /* Generate insn patterns to do a floating point compare of OPERANDS. */
10507 ix86_expand_fp_compare (enum rtx_code code, rtx op0, rtx op1, rtx scratch,
10508 rtx *second_test, rtx *bypass_test)
10510 enum machine_mode fpcmp_mode, intcmp_mode;
10512 int cost = ix86_fp_comparison_cost (code);
10513 enum rtx_code bypass_code, first_code, second_code;
10515 fpcmp_mode = ix86_fp_compare_mode (code);
10516 code = ix86_prepare_fp_compare_args (code, &op0, &op1);
10519 *second_test = NULL_RTX;
10521 *bypass_test = NULL_RTX;
10523 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
10525 /* Do fcomi/sahf based test when profitable. */
10526 if ((bypass_code == UNKNOWN || bypass_test)
10527 && (second_code == UNKNOWN || second_test)
10528 && ix86_fp_comparison_arithmetics_cost (code) > cost)
10532 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
10533 tmp = gen_rtx_SET (VOIDmode, gen_rtx_REG (fpcmp_mode, FLAGS_REG),
10539 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
10540 tmp2 = gen_rtx_UNSPEC (HImode, gen_rtvec (1, tmp), UNSPEC_FNSTSW);
10542 scratch = gen_reg_rtx (HImode);
10543 emit_insn (gen_rtx_SET (VOIDmode, scratch, tmp2));
10544 emit_insn (gen_x86_sahf_1 (scratch));
10547 /* The FP codes work out to act like unsigned. */
10548 intcmp_mode = fpcmp_mode;
10550 if (bypass_code != UNKNOWN)
10551 *bypass_test = gen_rtx_fmt_ee (bypass_code, VOIDmode,
10552 gen_rtx_REG (intcmp_mode, FLAGS_REG),
10554 if (second_code != UNKNOWN)
10555 *second_test = gen_rtx_fmt_ee (second_code, VOIDmode,
10556 gen_rtx_REG (intcmp_mode, FLAGS_REG),
10561 /* Sadness wrt reg-stack pops killing fpsr -- gotta get fnstsw first. */
10562 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
10563 tmp2 = gen_rtx_UNSPEC (HImode, gen_rtvec (1, tmp), UNSPEC_FNSTSW);
10565 scratch = gen_reg_rtx (HImode);
10566 emit_insn (gen_rtx_SET (VOIDmode, scratch, tmp2));
10568 /* In the unordered case, we have to check C2 for NaN's, which
10569 doesn't happen to work out to anything nice combination-wise.
10570 So do some bit twiddling on the value we've got in AH to come
10571 up with an appropriate set of condition codes. */
10573 intcmp_mode = CCNOmode;
10578 if (code == GT || !TARGET_IEEE_FP)
10580 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x45)));
10585 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
10586 emit_insn (gen_addqi_ext_1 (scratch, scratch, constm1_rtx));
10587 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x44)));
10588 intcmp_mode = CCmode;
10594 if (code == LT && TARGET_IEEE_FP)
10596 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
10597 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x01)));
10598 intcmp_mode = CCmode;
10603 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x01)));
10609 if (code == GE || !TARGET_IEEE_FP)
10611 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x05)));
10616 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
10617 emit_insn (gen_xorqi_cc_ext_1 (scratch, scratch,
10624 if (code == LE && TARGET_IEEE_FP)
10626 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
10627 emit_insn (gen_addqi_ext_1 (scratch, scratch, constm1_rtx));
10628 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x40)));
10629 intcmp_mode = CCmode;
10634 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x45)));
10640 if (code == EQ && TARGET_IEEE_FP)
10642 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
10643 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x40)));
10644 intcmp_mode = CCmode;
10649 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x40)));
10656 if (code == NE && TARGET_IEEE_FP)
10658 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
10659 emit_insn (gen_xorqi_cc_ext_1 (scratch, scratch,
10665 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x40)));
10671 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x04)));
10675 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x04)));
10680 gcc_unreachable ();
10684 /* Return the test that should be put into the flags user, i.e.
10685 the bcc, scc, or cmov instruction. */
10686 return gen_rtx_fmt_ee (code, VOIDmode,
10687 gen_rtx_REG (intcmp_mode, FLAGS_REG),
10692 ix86_expand_compare (enum rtx_code code, rtx *second_test, rtx *bypass_test)
10695 op0 = ix86_compare_op0;
10696 op1 = ix86_compare_op1;
10699 *second_test = NULL_RTX;
10701 *bypass_test = NULL_RTX;
10703 if (ix86_compare_emitted)
10705 ret = gen_rtx_fmt_ee (code, VOIDmode, ix86_compare_emitted, const0_rtx);
10706 ix86_compare_emitted = NULL_RTX;
10708 else if (SCALAR_FLOAT_MODE_P (GET_MODE (op0)))
10709 ret = ix86_expand_fp_compare (code, op0, op1, NULL_RTX,
10710 second_test, bypass_test);
10712 ret = ix86_expand_int_compare (code, op0, op1);
10717 /* Return true if the CODE will result in nontrivial jump sequence. */
10719 ix86_fp_jump_nontrivial_p (enum rtx_code code)
10721 enum rtx_code bypass_code, first_code, second_code;
10724 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
10725 return bypass_code != UNKNOWN || second_code != UNKNOWN;
10729 ix86_expand_branch (enum rtx_code code, rtx label)
10733 /* If we have emitted a compare insn, go straight to simple.
10734 ix86_expand_compare won't emit anything if ix86_compare_emitted
10736 if (ix86_compare_emitted)
10739 switch (GET_MODE (ix86_compare_op0))
10745 tmp = ix86_expand_compare (code, NULL, NULL);
10746 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
10747 gen_rtx_LABEL_REF (VOIDmode, label),
10749 emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
10758 enum rtx_code bypass_code, first_code, second_code;
10760 code = ix86_prepare_fp_compare_args (code, &ix86_compare_op0,
10761 &ix86_compare_op1);
10763 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
10765 /* Check whether we will use the natural sequence with one jump. If
10766 so, we can expand jump early. Otherwise delay expansion by
10767 creating compound insn to not confuse optimizers. */
10768 if (bypass_code == UNKNOWN && second_code == UNKNOWN
10771 ix86_split_fp_branch (code, ix86_compare_op0, ix86_compare_op1,
10772 gen_rtx_LABEL_REF (VOIDmode, label),
10773 pc_rtx, NULL_RTX, NULL_RTX);
10777 tmp = gen_rtx_fmt_ee (code, VOIDmode,
10778 ix86_compare_op0, ix86_compare_op1);
10779 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
10780 gen_rtx_LABEL_REF (VOIDmode, label),
10782 tmp = gen_rtx_SET (VOIDmode, pc_rtx, tmp);
10784 use_fcomi = ix86_use_fcomi_compare (code);
10785 vec = rtvec_alloc (3 + !use_fcomi);
10786 RTVEC_ELT (vec, 0) = tmp;
10788 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCFPmode, 18));
10790 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCFPmode, 17));
10793 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (HImode));
10795 emit_jump_insn (gen_rtx_PARALLEL (VOIDmode, vec));
10804 /* Expand DImode branch into multiple compare+branch. */
10806 rtx lo[2], hi[2], label2;
10807 enum rtx_code code1, code2, code3;
10808 enum machine_mode submode;
10810 if (CONSTANT_P (ix86_compare_op0) && ! CONSTANT_P (ix86_compare_op1))
10812 tmp = ix86_compare_op0;
10813 ix86_compare_op0 = ix86_compare_op1;
10814 ix86_compare_op1 = tmp;
10815 code = swap_condition (code);
10817 if (GET_MODE (ix86_compare_op0) == DImode)
10819 split_di (&ix86_compare_op0, 1, lo+0, hi+0);
10820 split_di (&ix86_compare_op1, 1, lo+1, hi+1);
10825 split_ti (&ix86_compare_op0, 1, lo+0, hi+0);
10826 split_ti (&ix86_compare_op1, 1, lo+1, hi+1);
10830 /* When comparing for equality, we can use (hi0^hi1)|(lo0^lo1) to
10831 avoid two branches. This costs one extra insn, so disable when
10832 optimizing for size. */
10834 if ((code == EQ || code == NE)
10836 || hi[1] == const0_rtx || lo[1] == const0_rtx))
10841 if (hi[1] != const0_rtx)
10842 xor1 = expand_binop (submode, xor_optab, xor1, hi[1],
10843 NULL_RTX, 0, OPTAB_WIDEN);
10846 if (lo[1] != const0_rtx)
10847 xor0 = expand_binop (submode, xor_optab, xor0, lo[1],
10848 NULL_RTX, 0, OPTAB_WIDEN);
10850 tmp = expand_binop (submode, ior_optab, xor1, xor0,
10851 NULL_RTX, 0, OPTAB_WIDEN);
10853 ix86_compare_op0 = tmp;
10854 ix86_compare_op1 = const0_rtx;
10855 ix86_expand_branch (code, label);
10859 /* Otherwise, if we are doing less-than or greater-or-equal-than,
10860 op1 is a constant and the low word is zero, then we can just
10861 examine the high word. */
10863 if (GET_CODE (hi[1]) == CONST_INT && lo[1] == const0_rtx)
10866 case LT: case LTU: case GE: case GEU:
10867 ix86_compare_op0 = hi[0];
10868 ix86_compare_op1 = hi[1];
10869 ix86_expand_branch (code, label);
10875 /* Otherwise, we need two or three jumps. */
10877 label2 = gen_label_rtx ();
10880 code2 = swap_condition (code);
10881 code3 = unsigned_condition (code);
10885 case LT: case GT: case LTU: case GTU:
10888 case LE: code1 = LT; code2 = GT; break;
10889 case GE: code1 = GT; code2 = LT; break;
10890 case LEU: code1 = LTU; code2 = GTU; break;
10891 case GEU: code1 = GTU; code2 = LTU; break;
10893 case EQ: code1 = UNKNOWN; code2 = NE; break;
10894 case NE: code2 = UNKNOWN; break;
10897 gcc_unreachable ();
10902 * if (hi(a) < hi(b)) goto true;
10903 * if (hi(a) > hi(b)) goto false;
10904 * if (lo(a) < lo(b)) goto true;
10908 ix86_compare_op0 = hi[0];
10909 ix86_compare_op1 = hi[1];
10911 if (code1 != UNKNOWN)
10912 ix86_expand_branch (code1, label);
10913 if (code2 != UNKNOWN)
10914 ix86_expand_branch (code2, label2);
10916 ix86_compare_op0 = lo[0];
10917 ix86_compare_op1 = lo[1];
10918 ix86_expand_branch (code3, label);
10920 if (code2 != UNKNOWN)
10921 emit_label (label2);
10926 gcc_unreachable ();
10930 /* Split branch based on floating point condition. */
10932 ix86_split_fp_branch (enum rtx_code code, rtx op1, rtx op2,
10933 rtx target1, rtx target2, rtx tmp, rtx pushed)
10935 rtx second, bypass;
10936 rtx label = NULL_RTX;
10938 int bypass_probability = -1, second_probability = -1, probability = -1;
10941 if (target2 != pc_rtx)
10944 code = reverse_condition_maybe_unordered (code);
10949 condition = ix86_expand_fp_compare (code, op1, op2,
10950 tmp, &second, &bypass);
10952 /* Remove pushed operand from stack. */
10954 ix86_free_from_memory (GET_MODE (pushed));
10956 if (split_branch_probability >= 0)
10958 /* Distribute the probabilities across the jumps.
10959 Assume the BYPASS and SECOND to be always test
10961 probability = split_branch_probability;
10963 /* Value of 1 is low enough to make no need for probability
10964 to be updated. Later we may run some experiments and see
10965 if unordered values are more frequent in practice. */
10967 bypass_probability = 1;
10969 second_probability = 1;
10971 if (bypass != NULL_RTX)
10973 label = gen_label_rtx ();
10974 i = emit_jump_insn (gen_rtx_SET
10976 gen_rtx_IF_THEN_ELSE (VOIDmode,
10978 gen_rtx_LABEL_REF (VOIDmode,
10981 if (bypass_probability >= 0)
10983 = gen_rtx_EXPR_LIST (REG_BR_PROB,
10984 GEN_INT (bypass_probability),
10987 i = emit_jump_insn (gen_rtx_SET
10989 gen_rtx_IF_THEN_ELSE (VOIDmode,
10990 condition, target1, target2)));
10991 if (probability >= 0)
10993 = gen_rtx_EXPR_LIST (REG_BR_PROB,
10994 GEN_INT (probability),
10996 if (second != NULL_RTX)
10998 i = emit_jump_insn (gen_rtx_SET
11000 gen_rtx_IF_THEN_ELSE (VOIDmode, second, target1,
11002 if (second_probability >= 0)
11004 = gen_rtx_EXPR_LIST (REG_BR_PROB,
11005 GEN_INT (second_probability),
11008 if (label != NULL_RTX)
11009 emit_label (label);
11013 ix86_expand_setcc (enum rtx_code code, rtx dest)
11015 rtx ret, tmp, tmpreg, equiv;
11016 rtx second_test, bypass_test;
11018 if (GET_MODE (ix86_compare_op0) == (TARGET_64BIT ? TImode : DImode))
11019 return 0; /* FAIL */
11021 gcc_assert (GET_MODE (dest) == QImode);
11023 ret = ix86_expand_compare (code, &second_test, &bypass_test);
11024 PUT_MODE (ret, QImode);
11029 emit_insn (gen_rtx_SET (VOIDmode, tmp, ret));
11030 if (bypass_test || second_test)
11032 rtx test = second_test;
11034 rtx tmp2 = gen_reg_rtx (QImode);
11037 gcc_assert (!second_test);
11038 test = bypass_test;
11040 PUT_CODE (test, reverse_condition_maybe_unordered (GET_CODE (test)));
11042 PUT_MODE (test, QImode);
11043 emit_insn (gen_rtx_SET (VOIDmode, tmp2, test));
11046 emit_insn (gen_andqi3 (tmp, tmpreg, tmp2));
11048 emit_insn (gen_iorqi3 (tmp, tmpreg, tmp2));
11051 /* Attach a REG_EQUAL note describing the comparison result. */
11052 if (ix86_compare_op0 && ix86_compare_op1)
11054 equiv = simplify_gen_relational (code, QImode,
11055 GET_MODE (ix86_compare_op0),
11056 ix86_compare_op0, ix86_compare_op1);
11057 set_unique_reg_note (get_last_insn (), REG_EQUAL, equiv);
11060 return 1; /* DONE */
11063 /* Expand comparison setting or clearing carry flag. Return true when
11064 successful and set pop for the operation. */
11066 ix86_expand_carry_flag_compare (enum rtx_code code, rtx op0, rtx op1, rtx *pop)
11068 enum machine_mode mode =
11069 GET_MODE (op0) != VOIDmode ? GET_MODE (op0) : GET_MODE (op1);
11071 /* Do not handle DImode compares that go through special path. Also we can't
11072 deal with FP compares yet. This is possible to add. */
11073 if (mode == (TARGET_64BIT ? TImode : DImode))
11075 if (FLOAT_MODE_P (mode))
11077 rtx second_test = NULL, bypass_test = NULL;
11078 rtx compare_op, compare_seq;
11080 /* Shortcut: following common codes never translate into carry flag compares. */
11081 if (code == EQ || code == NE || code == UNEQ || code == LTGT
11082 || code == ORDERED || code == UNORDERED)
11085 /* These comparisons require zero flag; swap operands so they won't. */
11086 if ((code == GT || code == UNLE || code == LE || code == UNGT)
11087 && !TARGET_IEEE_FP)
11092 code = swap_condition (code);
11095 /* Try to expand the comparison and verify that we end up with carry flag
11096 based comparison. This is fails to be true only when we decide to expand
11097 comparison using arithmetic that is not too common scenario. */
11099 compare_op = ix86_expand_fp_compare (code, op0, op1, NULL_RTX,
11100 &second_test, &bypass_test);
11101 compare_seq = get_insns ();
11104 if (second_test || bypass_test)
11106 if (GET_MODE (XEXP (compare_op, 0)) == CCFPmode
11107 || GET_MODE (XEXP (compare_op, 0)) == CCFPUmode)
11108 code = ix86_fp_compare_code_to_integer (GET_CODE (compare_op));
11110 code = GET_CODE (compare_op);
11111 if (code != LTU && code != GEU)
11113 emit_insn (compare_seq);
11117 if (!INTEGRAL_MODE_P (mode))
11125 /* Convert a==0 into (unsigned)a<1. */
11128 if (op1 != const0_rtx)
11131 code = (code == EQ ? LTU : GEU);
11134 /* Convert a>b into b<a or a>=b-1. */
11137 if (GET_CODE (op1) == CONST_INT)
11139 op1 = gen_int_mode (INTVAL (op1) + 1, GET_MODE (op0));
11140 /* Bail out on overflow. We still can swap operands but that
11141 would force loading of the constant into register. */
11142 if (op1 == const0_rtx
11143 || !x86_64_immediate_operand (op1, GET_MODE (op1)))
11145 code = (code == GTU ? GEU : LTU);
11152 code = (code == GTU ? LTU : GEU);
11156 /* Convert a>=0 into (unsigned)a<0x80000000. */
11159 if (mode == DImode || op1 != const0_rtx)
11161 op1 = gen_int_mode (1 << (GET_MODE_BITSIZE (mode) - 1), mode);
11162 code = (code == LT ? GEU : LTU);
11166 if (mode == DImode || op1 != constm1_rtx)
11168 op1 = gen_int_mode (1 << (GET_MODE_BITSIZE (mode) - 1), mode);
11169 code = (code == LE ? GEU : LTU);
11175 /* Swapping operands may cause constant to appear as first operand. */
11176 if (!nonimmediate_operand (op0, VOIDmode))
11178 if (no_new_pseudos)
11180 op0 = force_reg (mode, op0);
11182 ix86_compare_op0 = op0;
11183 ix86_compare_op1 = op1;
11184 *pop = ix86_expand_compare (code, NULL, NULL);
11185 gcc_assert (GET_CODE (*pop) == LTU || GET_CODE (*pop) == GEU);
11190 ix86_expand_int_movcc (rtx operands[])
11192 enum rtx_code code = GET_CODE (operands[1]), compare_code;
11193 rtx compare_seq, compare_op;
11194 rtx second_test, bypass_test;
11195 enum machine_mode mode = GET_MODE (operands[0]);
11196 bool sign_bit_compare_p = false;;
11199 compare_op = ix86_expand_compare (code, &second_test, &bypass_test);
11200 compare_seq = get_insns ();
11203 compare_code = GET_CODE (compare_op);
11205 if ((ix86_compare_op1 == const0_rtx && (code == GE || code == LT))
11206 || (ix86_compare_op1 == constm1_rtx && (code == GT || code == LE)))
11207 sign_bit_compare_p = true;
11209 /* Don't attempt mode expansion here -- if we had to expand 5 or 6
11210 HImode insns, we'd be swallowed in word prefix ops. */
11212 if ((mode != HImode || TARGET_FAST_PREFIX)
11213 && (mode != (TARGET_64BIT ? TImode : DImode))
11214 && GET_CODE (operands[2]) == CONST_INT
11215 && GET_CODE (operands[3]) == CONST_INT)
11217 rtx out = operands[0];
11218 HOST_WIDE_INT ct = INTVAL (operands[2]);
11219 HOST_WIDE_INT cf = INTVAL (operands[3]);
11220 HOST_WIDE_INT diff;
11223 /* Sign bit compares are better done using shifts than we do by using
11225 if (sign_bit_compare_p
11226 || ix86_expand_carry_flag_compare (code, ix86_compare_op0,
11227 ix86_compare_op1, &compare_op))
11229 /* Detect overlap between destination and compare sources. */
11232 if (!sign_bit_compare_p)
11234 bool fpcmp = false;
11236 compare_code = GET_CODE (compare_op);
11238 if (GET_MODE (XEXP (compare_op, 0)) == CCFPmode
11239 || GET_MODE (XEXP (compare_op, 0)) == CCFPUmode)
11242 compare_code = ix86_fp_compare_code_to_integer (compare_code);
11245 /* To simplify rest of code, restrict to the GEU case. */
11246 if (compare_code == LTU)
11248 HOST_WIDE_INT tmp = ct;
11251 compare_code = reverse_condition (compare_code);
11252 code = reverse_condition (code);
11257 PUT_CODE (compare_op,
11258 reverse_condition_maybe_unordered
11259 (GET_CODE (compare_op)));
11261 PUT_CODE (compare_op, reverse_condition (GET_CODE (compare_op)));
11265 if (reg_overlap_mentioned_p (out, ix86_compare_op0)
11266 || reg_overlap_mentioned_p (out, ix86_compare_op1))
11267 tmp = gen_reg_rtx (mode);
11269 if (mode == DImode)
11270 emit_insn (gen_x86_movdicc_0_m1_rex64 (tmp, compare_op));
11272 emit_insn (gen_x86_movsicc_0_m1 (gen_lowpart (SImode, tmp), compare_op));
11276 if (code == GT || code == GE)
11277 code = reverse_condition (code);
11280 HOST_WIDE_INT tmp = ct;
11285 tmp = emit_store_flag (tmp, code, ix86_compare_op0,
11286 ix86_compare_op1, VOIDmode, 0, -1);
11299 tmp = expand_simple_binop (mode, PLUS,
11301 copy_rtx (tmp), 1, OPTAB_DIRECT);
11312 tmp = expand_simple_binop (mode, IOR,
11314 copy_rtx (tmp), 1, OPTAB_DIRECT);
11316 else if (diff == -1 && ct)
11326 tmp = expand_simple_unop (mode, NOT, tmp, copy_rtx (tmp), 1);
11328 tmp = expand_simple_binop (mode, PLUS,
11329 copy_rtx (tmp), GEN_INT (cf),
11330 copy_rtx (tmp), 1, OPTAB_DIRECT);
11338 * andl cf - ct, dest
11348 tmp = expand_simple_unop (mode, NOT, tmp, copy_rtx (tmp), 1);
11351 tmp = expand_simple_binop (mode, AND,
11353 gen_int_mode (cf - ct, mode),
11354 copy_rtx (tmp), 1, OPTAB_DIRECT);
11356 tmp = expand_simple_binop (mode, PLUS,
11357 copy_rtx (tmp), GEN_INT (ct),
11358 copy_rtx (tmp), 1, OPTAB_DIRECT);
11361 if (!rtx_equal_p (tmp, out))
11362 emit_move_insn (copy_rtx (out), copy_rtx (tmp));
11364 return 1; /* DONE */
11370 tmp = ct, ct = cf, cf = tmp;
11372 if (FLOAT_MODE_P (GET_MODE (ix86_compare_op0)))
11374 /* We may be reversing unordered compare to normal compare, that
11375 is not valid in general (we may convert non-trapping condition
11376 to trapping one), however on i386 we currently emit all
11377 comparisons unordered. */
11378 compare_code = reverse_condition_maybe_unordered (compare_code);
11379 code = reverse_condition_maybe_unordered (code);
11383 compare_code = reverse_condition (compare_code);
11384 code = reverse_condition (code);
11388 compare_code = UNKNOWN;
11389 if (GET_MODE_CLASS (GET_MODE (ix86_compare_op0)) == MODE_INT
11390 && GET_CODE (ix86_compare_op1) == CONST_INT)
11392 if (ix86_compare_op1 == const0_rtx
11393 && (code == LT || code == GE))
11394 compare_code = code;
11395 else if (ix86_compare_op1 == constm1_rtx)
11399 else if (code == GT)
11404 /* Optimize dest = (op0 < 0) ? -1 : cf. */
11405 if (compare_code != UNKNOWN
11406 && GET_MODE (ix86_compare_op0) == GET_MODE (out)
11407 && (cf == -1 || ct == -1))
11409 /* If lea code below could be used, only optimize
11410 if it results in a 2 insn sequence. */
11412 if (! (diff == 1 || diff == 2 || diff == 4 || diff == 8
11413 || diff == 3 || diff == 5 || diff == 9)
11414 || (compare_code == LT && ct == -1)
11415 || (compare_code == GE && cf == -1))
11418 * notl op1 (if necessary)
11426 code = reverse_condition (code);
11429 out = emit_store_flag (out, code, ix86_compare_op0,
11430 ix86_compare_op1, VOIDmode, 0, -1);
11432 out = expand_simple_binop (mode, IOR,
11434 out, 1, OPTAB_DIRECT);
11435 if (out != operands[0])
11436 emit_move_insn (operands[0], out);
11438 return 1; /* DONE */
11443 if ((diff == 1 || diff == 2 || diff == 4 || diff == 8
11444 || diff == 3 || diff == 5 || diff == 9)
11445 && ((mode != QImode && mode != HImode) || !TARGET_PARTIAL_REG_STALL)
11447 || x86_64_immediate_operand (GEN_INT (cf), VOIDmode)))
11453 * lea cf(dest*(ct-cf)),dest
11457 * This also catches the degenerate setcc-only case.
11463 out = emit_store_flag (out, code, ix86_compare_op0,
11464 ix86_compare_op1, VOIDmode, 0, 1);
11467 /* On x86_64 the lea instruction operates on Pmode, so we need
11468 to get arithmetics done in proper mode to match. */
11470 tmp = copy_rtx (out);
11474 out1 = copy_rtx (out);
11475 tmp = gen_rtx_MULT (mode, out1, GEN_INT (diff & ~1));
11479 tmp = gen_rtx_PLUS (mode, tmp, out1);
11485 tmp = gen_rtx_PLUS (mode, tmp, GEN_INT (cf));
11488 if (!rtx_equal_p (tmp, out))
11491 out = force_operand (tmp, copy_rtx (out));
11493 emit_insn (gen_rtx_SET (VOIDmode, copy_rtx (out), copy_rtx (tmp)));
11495 if (!rtx_equal_p (out, operands[0]))
11496 emit_move_insn (operands[0], copy_rtx (out));
11498 return 1; /* DONE */
11502 * General case: Jumpful:
11503 * xorl dest,dest cmpl op1, op2
11504 * cmpl op1, op2 movl ct, dest
11505 * setcc dest jcc 1f
11506 * decl dest movl cf, dest
11507 * andl (cf-ct),dest 1:
11510 * Size 20. Size 14.
11512 * This is reasonably steep, but branch mispredict costs are
11513 * high on modern cpus, so consider failing only if optimizing
11517 if ((!TARGET_CMOVE || (mode == QImode && TARGET_PARTIAL_REG_STALL))
11518 && BRANCH_COST >= 2)
11524 if (FLOAT_MODE_P (GET_MODE (ix86_compare_op0)))
11525 /* We may be reversing unordered compare to normal compare,
11526 that is not valid in general (we may convert non-trapping
11527 condition to trapping one), however on i386 we currently
11528 emit all comparisons unordered. */
11529 code = reverse_condition_maybe_unordered (code);
11532 code = reverse_condition (code);
11533 if (compare_code != UNKNOWN)
11534 compare_code = reverse_condition (compare_code);
11538 if (compare_code != UNKNOWN)
11540 /* notl op1 (if needed)
11545 For x < 0 (resp. x <= -1) there will be no notl,
11546 so if possible swap the constants to get rid of the
11548 True/false will be -1/0 while code below (store flag
11549 followed by decrement) is 0/-1, so the constants need
11550 to be exchanged once more. */
11552 if (compare_code == GE || !cf)
11554 code = reverse_condition (code);
11559 HOST_WIDE_INT tmp = cf;
11564 out = emit_store_flag (out, code, ix86_compare_op0,
11565 ix86_compare_op1, VOIDmode, 0, -1);
11569 out = emit_store_flag (out, code, ix86_compare_op0,
11570 ix86_compare_op1, VOIDmode, 0, 1);
11572 out = expand_simple_binop (mode, PLUS, copy_rtx (out), constm1_rtx,
11573 copy_rtx (out), 1, OPTAB_DIRECT);
11576 out = expand_simple_binop (mode, AND, copy_rtx (out),
11577 gen_int_mode (cf - ct, mode),
11578 copy_rtx (out), 1, OPTAB_DIRECT);
11580 out = expand_simple_binop (mode, PLUS, copy_rtx (out), GEN_INT (ct),
11581 copy_rtx (out), 1, OPTAB_DIRECT);
11582 if (!rtx_equal_p (out, operands[0]))
11583 emit_move_insn (operands[0], copy_rtx (out));
11585 return 1; /* DONE */
11589 if (!TARGET_CMOVE || (mode == QImode && TARGET_PARTIAL_REG_STALL))
11591 /* Try a few things more with specific constants and a variable. */
11594 rtx var, orig_out, out, tmp;
11596 if (BRANCH_COST <= 2)
11597 return 0; /* FAIL */
11599 /* If one of the two operands is an interesting constant, load a
11600 constant with the above and mask it in with a logical operation. */
11602 if (GET_CODE (operands[2]) == CONST_INT)
11605 if (INTVAL (operands[2]) == 0 && operands[3] != constm1_rtx)
11606 operands[3] = constm1_rtx, op = and_optab;
11607 else if (INTVAL (operands[2]) == -1 && operands[3] != const0_rtx)
11608 operands[3] = const0_rtx, op = ior_optab;
11610 return 0; /* FAIL */
11612 else if (GET_CODE (operands[3]) == CONST_INT)
11615 if (INTVAL (operands[3]) == 0 && operands[2] != constm1_rtx)
11616 operands[2] = constm1_rtx, op = and_optab;
11617 else if (INTVAL (operands[3]) == -1 && operands[3] != const0_rtx)
11618 operands[2] = const0_rtx, op = ior_optab;
11620 return 0; /* FAIL */
11623 return 0; /* FAIL */
11625 orig_out = operands[0];
11626 tmp = gen_reg_rtx (mode);
11629 /* Recurse to get the constant loaded. */
11630 if (ix86_expand_int_movcc (operands) == 0)
11631 return 0; /* FAIL */
11633 /* Mask in the interesting variable. */
11634 out = expand_binop (mode, op, var, tmp, orig_out, 0,
11636 if (!rtx_equal_p (out, orig_out))
11637 emit_move_insn (copy_rtx (orig_out), copy_rtx (out));
11639 return 1; /* DONE */
11643 * For comparison with above,
11653 if (! nonimmediate_operand (operands[2], mode))
11654 operands[2] = force_reg (mode, operands[2]);
11655 if (! nonimmediate_operand (operands[3], mode))
11656 operands[3] = force_reg (mode, operands[3]);
11658 if (bypass_test && reg_overlap_mentioned_p (operands[0], operands[3]))
11660 rtx tmp = gen_reg_rtx (mode);
11661 emit_move_insn (tmp, operands[3]);
11664 if (second_test && reg_overlap_mentioned_p (operands[0], operands[2]))
11666 rtx tmp = gen_reg_rtx (mode);
11667 emit_move_insn (tmp, operands[2]);
11671 if (! register_operand (operands[2], VOIDmode)
11673 || ! register_operand (operands[3], VOIDmode)))
11674 operands[2] = force_reg (mode, operands[2]);
11677 && ! register_operand (operands[3], VOIDmode))
11678 operands[3] = force_reg (mode, operands[3]);
11680 emit_insn (compare_seq);
11681 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
11682 gen_rtx_IF_THEN_ELSE (mode,
11683 compare_op, operands[2],
11686 emit_insn (gen_rtx_SET (VOIDmode, copy_rtx (operands[0]),
11687 gen_rtx_IF_THEN_ELSE (mode,
11689 copy_rtx (operands[3]),
11690 copy_rtx (operands[0]))));
11692 emit_insn (gen_rtx_SET (VOIDmode, copy_rtx (operands[0]),
11693 gen_rtx_IF_THEN_ELSE (mode,
11695 copy_rtx (operands[2]),
11696 copy_rtx (operands[0]))));
11698 return 1; /* DONE */
11701 /* Swap, force into registers, or otherwise massage the two operands
11702 to an sse comparison with a mask result. Thus we differ a bit from
11703 ix86_prepare_fp_compare_args which expects to produce a flags result.
11705 The DEST operand exists to help determine whether to commute commutative
11706 operators. The POP0/POP1 operands are updated in place. The new
11707 comparison code is returned, or UNKNOWN if not implementable. */
11709 static enum rtx_code
11710 ix86_prepare_sse_fp_compare_args (rtx dest, enum rtx_code code,
11711 rtx *pop0, rtx *pop1)
11719 /* We have no LTGT as an operator. We could implement it with
11720 NE & ORDERED, but this requires an extra temporary. It's
11721 not clear that it's worth it. */
11728 /* These are supported directly. */
11735 /* For commutative operators, try to canonicalize the destination
11736 operand to be first in the comparison - this helps reload to
11737 avoid extra moves. */
11738 if (!dest || !rtx_equal_p (dest, *pop1))
11746 /* These are not supported directly. Swap the comparison operands
11747 to transform into something that is supported. */
11751 code = swap_condition (code);
11755 gcc_unreachable ();
11761 /* Detect conditional moves that exactly match min/max operational
11762 semantics. Note that this is IEEE safe, as long as we don't
11763 interchange the operands.
11765 Returns FALSE if this conditional move doesn't match a MIN/MAX,
11766 and TRUE if the operation is successful and instructions are emitted. */
11769 ix86_expand_sse_fp_minmax (rtx dest, enum rtx_code code, rtx cmp_op0,
11770 rtx cmp_op1, rtx if_true, rtx if_false)
11772 enum machine_mode mode;
11778 else if (code == UNGE)
11781 if_true = if_false;
11787 if (rtx_equal_p (cmp_op0, if_true) && rtx_equal_p (cmp_op1, if_false))
11789 else if (rtx_equal_p (cmp_op1, if_true) && rtx_equal_p (cmp_op0, if_false))
11794 mode = GET_MODE (dest);
11796 /* We want to check HONOR_NANS and HONOR_SIGNED_ZEROS here,
11797 but MODE may be a vector mode and thus not appropriate. */
11798 if (!flag_finite_math_only || !flag_unsafe_math_optimizations)
11800 int u = is_min ? UNSPEC_IEEE_MIN : UNSPEC_IEEE_MAX;
11803 if_true = force_reg (mode, if_true);
11804 v = gen_rtvec (2, if_true, if_false);
11805 tmp = gen_rtx_UNSPEC (mode, v, u);
11809 code = is_min ? SMIN : SMAX;
11810 tmp = gen_rtx_fmt_ee (code, mode, if_true, if_false);
11813 emit_insn (gen_rtx_SET (VOIDmode, dest, tmp));
11817 /* Expand an sse vector comparison. Return the register with the result. */
11820 ix86_expand_sse_cmp (rtx dest, enum rtx_code code, rtx cmp_op0, rtx cmp_op1,
11821 rtx op_true, rtx op_false)
11823 enum machine_mode mode = GET_MODE (dest);
11826 cmp_op0 = force_reg (mode, cmp_op0);
11827 if (!nonimmediate_operand (cmp_op1, mode))
11828 cmp_op1 = force_reg (mode, cmp_op1);
11831 || reg_overlap_mentioned_p (dest, op_true)
11832 || reg_overlap_mentioned_p (dest, op_false))
11833 dest = gen_reg_rtx (mode);
11835 x = gen_rtx_fmt_ee (code, mode, cmp_op0, cmp_op1);
11836 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
11841 /* Expand DEST = CMP ? OP_TRUE : OP_FALSE into a sequence of logical
11842 operations. This is used for both scalar and vector conditional moves. */
11845 ix86_expand_sse_movcc (rtx dest, rtx cmp, rtx op_true, rtx op_false)
11847 enum machine_mode mode = GET_MODE (dest);
11850 if (op_false == CONST0_RTX (mode))
11852 op_true = force_reg (mode, op_true);
11853 x = gen_rtx_AND (mode, cmp, op_true);
11854 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
11856 else if (op_true == CONST0_RTX (mode))
11858 op_false = force_reg (mode, op_false);
11859 x = gen_rtx_NOT (mode, cmp);
11860 x = gen_rtx_AND (mode, x, op_false);
11861 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
11865 op_true = force_reg (mode, op_true);
11866 op_false = force_reg (mode, op_false);
11868 t2 = gen_reg_rtx (mode);
11870 t3 = gen_reg_rtx (mode);
11874 x = gen_rtx_AND (mode, op_true, cmp);
11875 emit_insn (gen_rtx_SET (VOIDmode, t2, x));
11877 x = gen_rtx_NOT (mode, cmp);
11878 x = gen_rtx_AND (mode, x, op_false);
11879 emit_insn (gen_rtx_SET (VOIDmode, t3, x));
11881 x = gen_rtx_IOR (mode, t3, t2);
11882 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
11886 /* Expand a floating-point conditional move. Return true if successful. */
11889 ix86_expand_fp_movcc (rtx operands[])
11891 enum machine_mode mode = GET_MODE (operands[0]);
11892 enum rtx_code code = GET_CODE (operands[1]);
11893 rtx tmp, compare_op, second_test, bypass_test;
11895 if (TARGET_SSE_MATH && SSE_FLOAT_MODE_P (mode))
11897 enum machine_mode cmode;
11899 /* Since we've no cmove for sse registers, don't force bad register
11900 allocation just to gain access to it. Deny movcc when the
11901 comparison mode doesn't match the move mode. */
11902 cmode = GET_MODE (ix86_compare_op0);
11903 if (cmode == VOIDmode)
11904 cmode = GET_MODE (ix86_compare_op1);
11908 code = ix86_prepare_sse_fp_compare_args (operands[0], code,
11910 &ix86_compare_op1);
11911 if (code == UNKNOWN)
11914 if (ix86_expand_sse_fp_minmax (operands[0], code, ix86_compare_op0,
11915 ix86_compare_op1, operands[2],
11919 tmp = ix86_expand_sse_cmp (operands[0], code, ix86_compare_op0,
11920 ix86_compare_op1, operands[2], operands[3]);
11921 ix86_expand_sse_movcc (operands[0], tmp, operands[2], operands[3]);
11925 /* The floating point conditional move instructions don't directly
11926 support conditions resulting from a signed integer comparison. */
11928 compare_op = ix86_expand_compare (code, &second_test, &bypass_test);
11930 /* The floating point conditional move instructions don't directly
11931 support signed integer comparisons. */
11933 if (!fcmov_comparison_operator (compare_op, VOIDmode))
11935 gcc_assert (!second_test && !bypass_test);
11936 tmp = gen_reg_rtx (QImode);
11937 ix86_expand_setcc (code, tmp);
11939 ix86_compare_op0 = tmp;
11940 ix86_compare_op1 = const0_rtx;
11941 compare_op = ix86_expand_compare (code, &second_test, &bypass_test);
11943 if (bypass_test && reg_overlap_mentioned_p (operands[0], operands[3]))
11945 tmp = gen_reg_rtx (mode);
11946 emit_move_insn (tmp, operands[3]);
11949 if (second_test && reg_overlap_mentioned_p (operands[0], operands[2]))
11951 tmp = gen_reg_rtx (mode);
11952 emit_move_insn (tmp, operands[2]);
11956 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
11957 gen_rtx_IF_THEN_ELSE (mode, compare_op,
11958 operands[2], operands[3])));
11960 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
11961 gen_rtx_IF_THEN_ELSE (mode, bypass_test,
11962 operands[3], operands[0])));
11964 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
11965 gen_rtx_IF_THEN_ELSE (mode, second_test,
11966 operands[2], operands[0])));
11971 /* Expand a floating-point vector conditional move; a vcond operation
11972 rather than a movcc operation. */
11975 ix86_expand_fp_vcond (rtx operands[])
11977 enum rtx_code code = GET_CODE (operands[3]);
11980 code = ix86_prepare_sse_fp_compare_args (operands[0], code,
11981 &operands[4], &operands[5]);
11982 if (code == UNKNOWN)
11985 if (ix86_expand_sse_fp_minmax (operands[0], code, operands[4],
11986 operands[5], operands[1], operands[2]))
11989 cmp = ix86_expand_sse_cmp (operands[0], code, operands[4], operands[5],
11990 operands[1], operands[2]);
11991 ix86_expand_sse_movcc (operands[0], cmp, operands[1], operands[2]);
11995 /* Expand a signed integral vector conditional move. */
11998 ix86_expand_int_vcond (rtx operands[])
12000 enum machine_mode mode = GET_MODE (operands[0]);
12001 enum rtx_code code = GET_CODE (operands[3]);
12002 bool negate = false;
12005 cop0 = operands[4];
12006 cop1 = operands[5];
12008 /* Canonicalize the comparison to EQ, GT, GTU. */
12019 code = reverse_condition (code);
12025 code = reverse_condition (code);
12031 code = swap_condition (code);
12032 x = cop0, cop0 = cop1, cop1 = x;
12036 gcc_unreachable ();
12039 /* Unsigned parallel compare is not supported by the hardware. Play some
12040 tricks to turn this into a signed comparison against 0. */
12043 cop0 = force_reg (mode, cop0);
12051 /* Perform a parallel modulo subtraction. */
12052 t1 = gen_reg_rtx (mode);
12053 emit_insn (gen_subv4si3 (t1, cop0, cop1));
12055 /* Extract the original sign bit of op0. */
12056 mask = GEN_INT (-0x80000000);
12057 mask = gen_rtx_CONST_VECTOR (mode,
12058 gen_rtvec (4, mask, mask, mask, mask));
12059 mask = force_reg (mode, mask);
12060 t2 = gen_reg_rtx (mode);
12061 emit_insn (gen_andv4si3 (t2, cop0, mask));
12063 /* XOR it back into the result of the subtraction. This results
12064 in the sign bit set iff we saw unsigned underflow. */
12065 x = gen_reg_rtx (mode);
12066 emit_insn (gen_xorv4si3 (x, t1, t2));
12074 /* Perform a parallel unsigned saturating subtraction. */
12075 x = gen_reg_rtx (mode);
12076 emit_insn (gen_rtx_SET (VOIDmode, x,
12077 gen_rtx_US_MINUS (mode, cop0, cop1)));
12084 gcc_unreachable ();
12088 cop1 = CONST0_RTX (mode);
12091 x = ix86_expand_sse_cmp (operands[0], code, cop0, cop1,
12092 operands[1+negate], operands[2-negate]);
12094 ix86_expand_sse_movcc (operands[0], x, operands[1+negate],
12095 operands[2-negate]);
12099 /* Expand conditional increment or decrement using adb/sbb instructions.
12100 The default case using setcc followed by the conditional move can be
12101 done by generic code. */
12103 ix86_expand_int_addcc (rtx operands[])
12105 enum rtx_code code = GET_CODE (operands[1]);
12107 rtx val = const0_rtx;
12108 bool fpcmp = false;
12109 enum machine_mode mode = GET_MODE (operands[0]);
12111 if (operands[3] != const1_rtx
12112 && operands[3] != constm1_rtx)
12114 if (!ix86_expand_carry_flag_compare (code, ix86_compare_op0,
12115 ix86_compare_op1, &compare_op))
12117 code = GET_CODE (compare_op);
12119 if (GET_MODE (XEXP (compare_op, 0)) == CCFPmode
12120 || GET_MODE (XEXP (compare_op, 0)) == CCFPUmode)
12123 code = ix86_fp_compare_code_to_integer (code);
12130 PUT_CODE (compare_op,
12131 reverse_condition_maybe_unordered
12132 (GET_CODE (compare_op)));
12134 PUT_CODE (compare_op, reverse_condition (GET_CODE (compare_op)));
12136 PUT_MODE (compare_op, mode);
12138 /* Construct either adc or sbb insn. */
12139 if ((code == LTU) == (operands[3] == constm1_rtx))
12141 switch (GET_MODE (operands[0]))
12144 emit_insn (gen_subqi3_carry (operands[0], operands[2], val, compare_op));
12147 emit_insn (gen_subhi3_carry (operands[0], operands[2], val, compare_op));
12150 emit_insn (gen_subsi3_carry (operands[0], operands[2], val, compare_op));
12153 emit_insn (gen_subdi3_carry_rex64 (operands[0], operands[2], val, compare_op));
12156 gcc_unreachable ();
12161 switch (GET_MODE (operands[0]))
12164 emit_insn (gen_addqi3_carry (operands[0], operands[2], val, compare_op));
12167 emit_insn (gen_addhi3_carry (operands[0], operands[2], val, compare_op));
12170 emit_insn (gen_addsi3_carry (operands[0], operands[2], val, compare_op));
12173 emit_insn (gen_adddi3_carry_rex64 (operands[0], operands[2], val, compare_op));
12176 gcc_unreachable ();
12179 return 1; /* DONE */
12183 /* Split operands 0 and 1 into SImode parts. Similar to split_di, but
12184 works for floating pointer parameters and nonoffsetable memories.
12185 For pushes, it returns just stack offsets; the values will be saved
12186 in the right order. Maximally three parts are generated. */
12189 ix86_split_to_parts (rtx operand, rtx *parts, enum machine_mode mode)
12194 size = mode==XFmode ? 3 : GET_MODE_SIZE (mode) / 4;
12196 size = (GET_MODE_SIZE (mode) + 4) / 8;
12198 gcc_assert (GET_CODE (operand) != REG || !MMX_REGNO_P (REGNO (operand)));
12199 gcc_assert (size >= 2 && size <= 3);
12201 /* Optimize constant pool reference to immediates. This is used by fp
12202 moves, that force all constants to memory to allow combining. */
12203 if (GET_CODE (operand) == MEM && MEM_READONLY_P (operand))
12205 rtx tmp = maybe_get_pool_constant (operand);
12210 if (GET_CODE (operand) == MEM && !offsettable_memref_p (operand))
12212 /* The only non-offsetable memories we handle are pushes. */
12213 int ok = push_operand (operand, VOIDmode);
12217 operand = copy_rtx (operand);
12218 PUT_MODE (operand, Pmode);
12219 parts[0] = parts[1] = parts[2] = operand;
12223 if (GET_CODE (operand) == CONST_VECTOR)
12225 enum machine_mode imode = int_mode_for_mode (mode);
12226 /* Caution: if we looked through a constant pool memory above,
12227 the operand may actually have a different mode now. That's
12228 ok, since we want to pun this all the way back to an integer. */
12229 operand = simplify_subreg (imode, operand, GET_MODE (operand), 0);
12230 gcc_assert (operand != NULL);
12236 if (mode == DImode)
12237 split_di (&operand, 1, &parts[0], &parts[1]);
12240 if (REG_P (operand))
12242 gcc_assert (reload_completed);
12243 parts[0] = gen_rtx_REG (SImode, REGNO (operand) + 0);
12244 parts[1] = gen_rtx_REG (SImode, REGNO (operand) + 1);
12246 parts[2] = gen_rtx_REG (SImode, REGNO (operand) + 2);
12248 else if (offsettable_memref_p (operand))
12250 operand = adjust_address (operand, SImode, 0);
12251 parts[0] = operand;
12252 parts[1] = adjust_address (operand, SImode, 4);
12254 parts[2] = adjust_address (operand, SImode, 8);
12256 else if (GET_CODE (operand) == CONST_DOUBLE)
12261 REAL_VALUE_FROM_CONST_DOUBLE (r, operand);
12265 REAL_VALUE_TO_TARGET_LONG_DOUBLE (r, l);
12266 parts[2] = gen_int_mode (l[2], SImode);
12269 REAL_VALUE_TO_TARGET_DOUBLE (r, l);
12272 gcc_unreachable ();
12274 parts[1] = gen_int_mode (l[1], SImode);
12275 parts[0] = gen_int_mode (l[0], SImode);
12278 gcc_unreachable ();
12283 if (mode == TImode)
12284 split_ti (&operand, 1, &parts[0], &parts[1]);
12285 if (mode == XFmode || mode == TFmode)
12287 enum machine_mode upper_mode = mode==XFmode ? SImode : DImode;
12288 if (REG_P (operand))
12290 gcc_assert (reload_completed);
12291 parts[0] = gen_rtx_REG (DImode, REGNO (operand) + 0);
12292 parts[1] = gen_rtx_REG (upper_mode, REGNO (operand) + 1);
12294 else if (offsettable_memref_p (operand))
12296 operand = adjust_address (operand, DImode, 0);
12297 parts[0] = operand;
12298 parts[1] = adjust_address (operand, upper_mode, 8);
12300 else if (GET_CODE (operand) == CONST_DOUBLE)
12305 REAL_VALUE_FROM_CONST_DOUBLE (r, operand);
12306 real_to_target (l, &r, mode);
12308 /* Do not use shift by 32 to avoid warning on 32bit systems. */
12309 if (HOST_BITS_PER_WIDE_INT >= 64)
12312 ((l[0] & (((HOST_WIDE_INT) 2 << 31) - 1))
12313 + ((((HOST_WIDE_INT) l[1]) << 31) << 1),
12316 parts[0] = immed_double_const (l[0], l[1], DImode);
12318 if (upper_mode == SImode)
12319 parts[1] = gen_int_mode (l[2], SImode);
12320 else if (HOST_BITS_PER_WIDE_INT >= 64)
12323 ((l[2] & (((HOST_WIDE_INT) 2 << 31) - 1))
12324 + ((((HOST_WIDE_INT) l[3]) << 31) << 1),
12327 parts[1] = immed_double_const (l[2], l[3], DImode);
12330 gcc_unreachable ();
12337 /* Emit insns to perform a move or push of DI, DF, and XF values.
12338 Return false when normal moves are needed; true when all required
12339 insns have been emitted. Operands 2-4 contain the input values
12340 int the correct order; operands 5-7 contain the output values. */
12343 ix86_split_long_move (rtx operands[])
12348 int collisions = 0;
12349 enum machine_mode mode = GET_MODE (operands[0]);
12351 /* The DFmode expanders may ask us to move double.
12352 For 64bit target this is single move. By hiding the fact
12353 here we simplify i386.md splitters. */
12354 if (GET_MODE_SIZE (GET_MODE (operands[0])) == 8 && TARGET_64BIT)
12356 /* Optimize constant pool reference to immediates. This is used by
12357 fp moves, that force all constants to memory to allow combining. */
12359 if (GET_CODE (operands[1]) == MEM
12360 && GET_CODE (XEXP (operands[1], 0)) == SYMBOL_REF
12361 && CONSTANT_POOL_ADDRESS_P (XEXP (operands[1], 0)))
12362 operands[1] = get_pool_constant (XEXP (operands[1], 0));
12363 if (push_operand (operands[0], VOIDmode))
12365 operands[0] = copy_rtx (operands[0]);
12366 PUT_MODE (operands[0], Pmode);
12369 operands[0] = gen_lowpart (DImode, operands[0]);
12370 operands[1] = gen_lowpart (DImode, operands[1]);
12371 emit_move_insn (operands[0], operands[1]);
12375 /* The only non-offsettable memory we handle is push. */
12376 if (push_operand (operands[0], VOIDmode))
12379 gcc_assert (GET_CODE (operands[0]) != MEM
12380 || offsettable_memref_p (operands[0]));
12382 nparts = ix86_split_to_parts (operands[1], part[1], GET_MODE (operands[0]));
12383 ix86_split_to_parts (operands[0], part[0], GET_MODE (operands[0]));
12385 /* When emitting push, take care for source operands on the stack. */
12386 if (push && GET_CODE (operands[1]) == MEM
12387 && reg_overlap_mentioned_p (stack_pointer_rtx, operands[1]))
12390 part[1][1] = change_address (part[1][1], GET_MODE (part[1][1]),
12391 XEXP (part[1][2], 0));
12392 part[1][0] = change_address (part[1][0], GET_MODE (part[1][0]),
12393 XEXP (part[1][1], 0));
12396 /* We need to do copy in the right order in case an address register
12397 of the source overlaps the destination. */
12398 if (REG_P (part[0][0]) && GET_CODE (part[1][0]) == MEM)
12400 if (reg_overlap_mentioned_p (part[0][0], XEXP (part[1][0], 0)))
12402 if (reg_overlap_mentioned_p (part[0][1], XEXP (part[1][0], 0)))
12405 && reg_overlap_mentioned_p (part[0][2], XEXP (part[1][0], 0)))
12408 /* Collision in the middle part can be handled by reordering. */
12409 if (collisions == 1 && nparts == 3
12410 && reg_overlap_mentioned_p (part[0][1], XEXP (part[1][0], 0)))
12413 tmp = part[0][1]; part[0][1] = part[0][2]; part[0][2] = tmp;
12414 tmp = part[1][1]; part[1][1] = part[1][2]; part[1][2] = tmp;
12417 /* If there are more collisions, we can't handle it by reordering.
12418 Do an lea to the last part and use only one colliding move. */
12419 else if (collisions > 1)
12425 base = part[0][nparts - 1];
12427 /* Handle the case when the last part isn't valid for lea.
12428 Happens in 64-bit mode storing the 12-byte XFmode. */
12429 if (GET_MODE (base) != Pmode)
12430 base = gen_rtx_REG (Pmode, REGNO (base));
12432 emit_insn (gen_rtx_SET (VOIDmode, base, XEXP (part[1][0], 0)));
12433 part[1][0] = replace_equiv_address (part[1][0], base);
12434 part[1][1] = replace_equiv_address (part[1][1],
12435 plus_constant (base, UNITS_PER_WORD));
12437 part[1][2] = replace_equiv_address (part[1][2],
12438 plus_constant (base, 8));
12448 if (TARGET_128BIT_LONG_DOUBLE && mode == XFmode)
12449 emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx, GEN_INT (-4)));
12450 emit_move_insn (part[0][2], part[1][2]);
12455 /* In 64bit mode we don't have 32bit push available. In case this is
12456 register, it is OK - we will just use larger counterpart. We also
12457 retype memory - these comes from attempt to avoid REX prefix on
12458 moving of second half of TFmode value. */
12459 if (GET_MODE (part[1][1]) == SImode)
12461 switch (GET_CODE (part[1][1]))
12464 part[1][1] = adjust_address (part[1][1], DImode, 0);
12468 part[1][1] = gen_rtx_REG (DImode, REGNO (part[1][1]));
12472 gcc_unreachable ();
12475 if (GET_MODE (part[1][0]) == SImode)
12476 part[1][0] = part[1][1];
12479 emit_move_insn (part[0][1], part[1][1]);
12480 emit_move_insn (part[0][0], part[1][0]);
12484 /* Choose correct order to not overwrite the source before it is copied. */
12485 if ((REG_P (part[0][0])
12486 && REG_P (part[1][1])
12487 && (REGNO (part[0][0]) == REGNO (part[1][1])
12489 && REGNO (part[0][0]) == REGNO (part[1][2]))))
12491 && reg_overlap_mentioned_p (part[0][0], XEXP (part[1][0], 0))))
12495 operands[2] = part[0][2];
12496 operands[3] = part[0][1];
12497 operands[4] = part[0][0];
12498 operands[5] = part[1][2];
12499 operands[6] = part[1][1];
12500 operands[7] = part[1][0];
12504 operands[2] = part[0][1];
12505 operands[3] = part[0][0];
12506 operands[5] = part[1][1];
12507 operands[6] = part[1][0];
12514 operands[2] = part[0][0];
12515 operands[3] = part[0][1];
12516 operands[4] = part[0][2];
12517 operands[5] = part[1][0];
12518 operands[6] = part[1][1];
12519 operands[7] = part[1][2];
12523 operands[2] = part[0][0];
12524 operands[3] = part[0][1];
12525 operands[5] = part[1][0];
12526 operands[6] = part[1][1];
12530 /* If optimizing for size, attempt to locally unCSE nonzero constants. */
12533 if (GET_CODE (operands[5]) == CONST_INT
12534 && operands[5] != const0_rtx
12535 && REG_P (operands[2]))
12537 if (GET_CODE (operands[6]) == CONST_INT
12538 && INTVAL (operands[6]) == INTVAL (operands[5]))
12539 operands[6] = operands[2];
12542 && GET_CODE (operands[7]) == CONST_INT
12543 && INTVAL (operands[7]) == INTVAL (operands[5]))
12544 operands[7] = operands[2];
12548 && GET_CODE (operands[6]) == CONST_INT
12549 && operands[6] != const0_rtx
12550 && REG_P (operands[3])
12551 && GET_CODE (operands[7]) == CONST_INT
12552 && INTVAL (operands[7]) == INTVAL (operands[6]))
12553 operands[7] = operands[3];
12556 emit_move_insn (operands[2], operands[5]);
12557 emit_move_insn (operands[3], operands[6]);
12559 emit_move_insn (operands[4], operands[7]);
12564 /* Helper function of ix86_split_ashl used to generate an SImode/DImode
12565 left shift by a constant, either using a single shift or
12566 a sequence of add instructions. */
12569 ix86_expand_ashl_const (rtx operand, int count, enum machine_mode mode)
12573 emit_insn ((mode == DImode
12575 : gen_adddi3) (operand, operand, operand));
12577 else if (!optimize_size
12578 && count * ix86_cost->add <= ix86_cost->shift_const)
12581 for (i=0; i<count; i++)
12583 emit_insn ((mode == DImode
12585 : gen_adddi3) (operand, operand, operand));
12589 emit_insn ((mode == DImode
12591 : gen_ashldi3) (operand, operand, GEN_INT (count)));
12595 ix86_split_ashl (rtx *operands, rtx scratch, enum machine_mode mode)
12597 rtx low[2], high[2];
12599 const int single_width = mode == DImode ? 32 : 64;
12601 if (GET_CODE (operands[2]) == CONST_INT)
12603 (mode == DImode ? split_di : split_ti) (operands, 2, low, high);
12604 count = INTVAL (operands[2]) & (single_width * 2 - 1);
12606 if (count >= single_width)
12608 emit_move_insn (high[0], low[1]);
12609 emit_move_insn (low[0], const0_rtx);
12611 if (count > single_width)
12612 ix86_expand_ashl_const (high[0], count - single_width, mode);
12616 if (!rtx_equal_p (operands[0], operands[1]))
12617 emit_move_insn (operands[0], operands[1]);
12618 emit_insn ((mode == DImode
12620 : gen_x86_64_shld) (high[0], low[0], GEN_INT (count)));
12621 ix86_expand_ashl_const (low[0], count, mode);
12626 (mode == DImode ? split_di : split_ti) (operands, 1, low, high);
12628 if (operands[1] == const1_rtx)
12630 /* Assuming we've chosen a QImode capable registers, then 1 << N
12631 can be done with two 32/64-bit shifts, no branches, no cmoves. */
12632 if (ANY_QI_REG_P (low[0]) && ANY_QI_REG_P (high[0]))
12634 rtx s, d, flags = gen_rtx_REG (CCZmode, FLAGS_REG);
12636 ix86_expand_clear (low[0]);
12637 ix86_expand_clear (high[0]);
12638 emit_insn (gen_testqi_ccz_1 (operands[2], GEN_INT (single_width)));
12640 d = gen_lowpart (QImode, low[0]);
12641 d = gen_rtx_STRICT_LOW_PART (VOIDmode, d);
12642 s = gen_rtx_EQ (QImode, flags, const0_rtx);
12643 emit_insn (gen_rtx_SET (VOIDmode, d, s));
12645 d = gen_lowpart (QImode, high[0]);
12646 d = gen_rtx_STRICT_LOW_PART (VOIDmode, d);
12647 s = gen_rtx_NE (QImode, flags, const0_rtx);
12648 emit_insn (gen_rtx_SET (VOIDmode, d, s));
12651 /* Otherwise, we can get the same results by manually performing
12652 a bit extract operation on bit 5/6, and then performing the two
12653 shifts. The two methods of getting 0/1 into low/high are exactly
12654 the same size. Avoiding the shift in the bit extract case helps
12655 pentium4 a bit; no one else seems to care much either way. */
12660 if (TARGET_PARTIAL_REG_STALL && !optimize_size)
12661 x = gen_rtx_ZERO_EXTEND (mode == DImode ? SImode : DImode, operands[2]);
12663 x = gen_lowpart (mode == DImode ? SImode : DImode, operands[2]);
12664 emit_insn (gen_rtx_SET (VOIDmode, high[0], x));
12666 emit_insn ((mode == DImode
12668 : gen_lshrdi3) (high[0], high[0], GEN_INT (mode == DImode ? 5 : 6)));
12669 emit_insn ((mode == DImode
12671 : gen_anddi3) (high[0], high[0], GEN_INT (1)));
12672 emit_move_insn (low[0], high[0]);
12673 emit_insn ((mode == DImode
12675 : gen_xordi3) (low[0], low[0], GEN_INT (1)));
12678 emit_insn ((mode == DImode
12680 : gen_ashldi3) (low[0], low[0], operands[2]));
12681 emit_insn ((mode == DImode
12683 : gen_ashldi3) (high[0], high[0], operands[2]));
12687 if (operands[1] == constm1_rtx)
12689 /* For -1 << N, we can avoid the shld instruction, because we
12690 know that we're shifting 0...31/63 ones into a -1. */
12691 emit_move_insn (low[0], constm1_rtx);
12693 emit_move_insn (high[0], low[0]);
12695 emit_move_insn (high[0], constm1_rtx);
12699 if (!rtx_equal_p (operands[0], operands[1]))
12700 emit_move_insn (operands[0], operands[1]);
12702 (mode == DImode ? split_di : split_ti) (operands, 1, low, high);
12703 emit_insn ((mode == DImode
12705 : gen_x86_64_shld) (high[0], low[0], operands[2]));
12708 emit_insn ((mode == DImode ? gen_ashlsi3 : gen_ashldi3) (low[0], low[0], operands[2]));
12710 if (TARGET_CMOVE && scratch)
12712 ix86_expand_clear (scratch);
12713 emit_insn ((mode == DImode
12714 ? gen_x86_shift_adj_1
12715 : gen_x86_64_shift_adj) (high[0], low[0], operands[2], scratch));
12718 emit_insn (gen_x86_shift_adj_2 (high[0], low[0], operands[2]));
12722 ix86_split_ashr (rtx *operands, rtx scratch, enum machine_mode mode)
12724 rtx low[2], high[2];
12726 const int single_width = mode == DImode ? 32 : 64;
12728 if (GET_CODE (operands[2]) == CONST_INT)
12730 (mode == DImode ? split_di : split_ti) (operands, 2, low, high);
12731 count = INTVAL (operands[2]) & (single_width * 2 - 1);
12733 if (count == single_width * 2 - 1)
12735 emit_move_insn (high[0], high[1]);
12736 emit_insn ((mode == DImode
12738 : gen_ashrdi3) (high[0], high[0],
12739 GEN_INT (single_width - 1)));
12740 emit_move_insn (low[0], high[0]);
12743 else if (count >= single_width)
12745 emit_move_insn (low[0], high[1]);
12746 emit_move_insn (high[0], low[0]);
12747 emit_insn ((mode == DImode
12749 : gen_ashrdi3) (high[0], high[0],
12750 GEN_INT (single_width - 1)));
12751 if (count > single_width)
12752 emit_insn ((mode == DImode
12754 : gen_ashrdi3) (low[0], low[0],
12755 GEN_INT (count - single_width)));
12759 if (!rtx_equal_p (operands[0], operands[1]))
12760 emit_move_insn (operands[0], operands[1]);
12761 emit_insn ((mode == DImode
12763 : gen_x86_64_shrd) (low[0], high[0], GEN_INT (count)));
12764 emit_insn ((mode == DImode
12766 : gen_ashrdi3) (high[0], high[0], GEN_INT (count)));
12771 if (!rtx_equal_p (operands[0], operands[1]))
12772 emit_move_insn (operands[0], operands[1]);
12774 (mode == DImode ? split_di : split_ti) (operands, 1, low, high);
12776 emit_insn ((mode == DImode
12778 : gen_x86_64_shrd) (low[0], high[0], operands[2]));
12779 emit_insn ((mode == DImode
12781 : gen_ashrdi3) (high[0], high[0], operands[2]));
12783 if (TARGET_CMOVE && scratch)
12785 emit_move_insn (scratch, high[0]);
12786 emit_insn ((mode == DImode
12788 : gen_ashrdi3) (scratch, scratch,
12789 GEN_INT (single_width - 1)));
12790 emit_insn ((mode == DImode
12791 ? gen_x86_shift_adj_1
12792 : gen_x86_64_shift_adj) (low[0], high[0], operands[2],
12796 emit_insn (gen_x86_shift_adj_3 (low[0], high[0], operands[2]));
12801 ix86_split_lshr (rtx *operands, rtx scratch, enum machine_mode mode)
12803 rtx low[2], high[2];
12805 const int single_width = mode == DImode ? 32 : 64;
12807 if (GET_CODE (operands[2]) == CONST_INT)
12809 (mode == DImode ? split_di : split_ti) (operands, 2, low, high);
12810 count = INTVAL (operands[2]) & (single_width * 2 - 1);
12812 if (count >= single_width)
12814 emit_move_insn (low[0], high[1]);
12815 ix86_expand_clear (high[0]);
12817 if (count > single_width)
12818 emit_insn ((mode == DImode
12820 : gen_lshrdi3) (low[0], low[0],
12821 GEN_INT (count - single_width)));
12825 if (!rtx_equal_p (operands[0], operands[1]))
12826 emit_move_insn (operands[0], operands[1]);
12827 emit_insn ((mode == DImode
12829 : gen_x86_64_shrd) (low[0], high[0], GEN_INT (count)));
12830 emit_insn ((mode == DImode
12832 : gen_lshrdi3) (high[0], high[0], GEN_INT (count)));
12837 if (!rtx_equal_p (operands[0], operands[1]))
12838 emit_move_insn (operands[0], operands[1]);
12840 (mode == DImode ? split_di : split_ti) (operands, 1, low, high);
12842 emit_insn ((mode == DImode
12844 : gen_x86_64_shrd) (low[0], high[0], operands[2]));
12845 emit_insn ((mode == DImode
12847 : gen_lshrdi3) (high[0], high[0], operands[2]));
12849 /* Heh. By reversing the arguments, we can reuse this pattern. */
12850 if (TARGET_CMOVE && scratch)
12852 ix86_expand_clear (scratch);
12853 emit_insn ((mode == DImode
12854 ? gen_x86_shift_adj_1
12855 : gen_x86_64_shift_adj) (low[0], high[0], operands[2],
12859 emit_insn (gen_x86_shift_adj_2 (low[0], high[0], operands[2]));
12863 /* Helper function for the string operations below. Dest VARIABLE whether
12864 it is aligned to VALUE bytes. If true, jump to the label. */
12866 ix86_expand_aligntest (rtx variable, int value)
12868 rtx label = gen_label_rtx ();
12869 rtx tmpcount = gen_reg_rtx (GET_MODE (variable));
12870 if (GET_MODE (variable) == DImode)
12871 emit_insn (gen_anddi3 (tmpcount, variable, GEN_INT (value)));
12873 emit_insn (gen_andsi3 (tmpcount, variable, GEN_INT (value)));
12874 emit_cmp_and_jump_insns (tmpcount, const0_rtx, EQ, 0, GET_MODE (variable),
12879 /* Adjust COUNTER by the VALUE. */
12881 ix86_adjust_counter (rtx countreg, HOST_WIDE_INT value)
12883 if (GET_MODE (countreg) == DImode)
12884 emit_insn (gen_adddi3 (countreg, countreg, GEN_INT (-value)));
12886 emit_insn (gen_addsi3 (countreg, countreg, GEN_INT (-value)));
12889 /* Zero extend possibly SImode EXP to Pmode register. */
12891 ix86_zero_extend_to_Pmode (rtx exp)
12894 if (GET_MODE (exp) == VOIDmode)
12895 return force_reg (Pmode, exp);
12896 if (GET_MODE (exp) == Pmode)
12897 return copy_to_mode_reg (Pmode, exp);
12898 r = gen_reg_rtx (Pmode);
12899 emit_insn (gen_zero_extendsidi2 (r, exp));
12903 /* Expand string move (memcpy) operation. Use i386 string operations when
12904 profitable. expand_clrmem contains similar code. */
12906 ix86_expand_movmem (rtx dst, rtx src, rtx count_exp, rtx align_exp)
12908 rtx srcreg, destreg, countreg, srcexp, destexp;
12909 enum machine_mode counter_mode;
12910 HOST_WIDE_INT align = 0;
12911 unsigned HOST_WIDE_INT count = 0;
12913 if (GET_CODE (align_exp) == CONST_INT)
12914 align = INTVAL (align_exp);
12916 /* Can't use any of this if the user has appropriated esi or edi. */
12917 if (global_regs[4] || global_regs[5])
12920 /* This simple hack avoids all inlining code and simplifies code below. */
12921 if (!TARGET_ALIGN_STRINGOPS)
12924 if (GET_CODE (count_exp) == CONST_INT)
12926 count = INTVAL (count_exp);
12927 if (!TARGET_INLINE_ALL_STRINGOPS && count > 64)
12931 /* Figure out proper mode for counter. For 32bits it is always SImode,
12932 for 64bits use SImode when possible, otherwise DImode.
12933 Set count to number of bytes copied when known at compile time. */
12935 || GET_MODE (count_exp) == SImode
12936 || x86_64_zext_immediate_operand (count_exp, VOIDmode))
12937 counter_mode = SImode;
12939 counter_mode = DImode;
12941 gcc_assert (counter_mode == SImode || counter_mode == DImode);
12943 destreg = copy_to_mode_reg (Pmode, XEXP (dst, 0));
12944 if (destreg != XEXP (dst, 0))
12945 dst = replace_equiv_address_nv (dst, destreg);
12946 srcreg = copy_to_mode_reg (Pmode, XEXP (src, 0));
12947 if (srcreg != XEXP (src, 0))
12948 src = replace_equiv_address_nv (src, srcreg);
12950 /* When optimizing for size emit simple rep ; movsb instruction for
12951 counts not divisible by 4, except when (movsl;)*(movsw;)?(movsb;)?
12952 sequence is shorter than mov{b,l} $count, %{ecx,cl}; rep; movsb.
12953 Sice of (movsl;)*(movsw;)?(movsb;)? sequence is
12954 count / 4 + (count & 3), the other sequence is either 4 or 7 bytes,
12955 but we don't know whether upper 24 (resp. 56) bits of %ecx will be
12956 known to be zero or not. The rep; movsb sequence causes higher
12957 register pressure though, so take that into account. */
12959 if ((!optimize || optimize_size)
12964 || (count & 3) + count / 4 > 6))))
12966 emit_insn (gen_cld ());
12967 countreg = ix86_zero_extend_to_Pmode (count_exp);
12968 destexp = gen_rtx_PLUS (Pmode, destreg, countreg);
12969 srcexp = gen_rtx_PLUS (Pmode, srcreg, countreg);
12970 emit_insn (gen_rep_mov (destreg, dst, srcreg, src, countreg,
12974 /* For constant aligned (or small unaligned) copies use rep movsl
12975 followed by code copying the rest. For PentiumPro ensure 8 byte
12976 alignment to allow rep movsl acceleration. */
12978 else if (count != 0
12980 || (!TARGET_PENTIUMPRO && !TARGET_64BIT && align >= 4)
12981 || optimize_size || count < (unsigned int) 64))
12983 unsigned HOST_WIDE_INT offset = 0;
12984 int size = TARGET_64BIT && !optimize_size ? 8 : 4;
12985 rtx srcmem, dstmem;
12987 emit_insn (gen_cld ());
12988 if (count & ~(size - 1))
12990 if ((TARGET_SINGLE_STRINGOP || optimize_size) && count < 5 * 4)
12992 enum machine_mode movs_mode = size == 4 ? SImode : DImode;
12994 while (offset < (count & ~(size - 1)))
12996 srcmem = adjust_automodify_address_nv (src, movs_mode,
12998 dstmem = adjust_automodify_address_nv (dst, movs_mode,
13000 emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
13006 countreg = GEN_INT ((count >> (size == 4 ? 2 : 3))
13007 & (TARGET_64BIT ? -1 : 0x3fffffff));
13008 countreg = copy_to_mode_reg (counter_mode, countreg);
13009 countreg = ix86_zero_extend_to_Pmode (countreg);
13011 destexp = gen_rtx_ASHIFT (Pmode, countreg,
13012 GEN_INT (size == 4 ? 2 : 3));
13013 srcexp = gen_rtx_PLUS (Pmode, destexp, srcreg);
13014 destexp = gen_rtx_PLUS (Pmode, destexp, destreg);
13016 emit_insn (gen_rep_mov (destreg, dst, srcreg, src,
13017 countreg, destexp, srcexp));
13018 offset = count & ~(size - 1);
13021 if (size == 8 && (count & 0x04))
13023 srcmem = adjust_automodify_address_nv (src, SImode, srcreg,
13025 dstmem = adjust_automodify_address_nv (dst, SImode, destreg,
13027 emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
13032 srcmem = adjust_automodify_address_nv (src, HImode, srcreg,
13034 dstmem = adjust_automodify_address_nv (dst, HImode, destreg,
13036 emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
13041 srcmem = adjust_automodify_address_nv (src, QImode, srcreg,
13043 dstmem = adjust_automodify_address_nv (dst, QImode, destreg,
13045 emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
13048 /* The generic code based on the glibc implementation:
13049 - align destination to 4 bytes (8 byte alignment is used for PentiumPro
13050 allowing accelerated copying there)
13051 - copy the data using rep movsl
13052 - copy the rest. */
13057 rtx srcmem, dstmem;
13058 int desired_alignment = (TARGET_PENTIUMPRO
13059 && (count == 0 || count >= (unsigned int) 260)
13060 ? 8 : UNITS_PER_WORD);
13061 /* Get rid of MEM_OFFSETs, they won't be accurate. */
13062 dst = change_address (dst, BLKmode, destreg);
13063 src = change_address (src, BLKmode, srcreg);
13065 /* In case we don't know anything about the alignment, default to
13066 library version, since it is usually equally fast and result in
13069 Also emit call when we know that the count is large and call overhead
13070 will not be important. */
13071 if (!TARGET_INLINE_ALL_STRINGOPS
13072 && (align < UNITS_PER_WORD || !TARGET_REP_MOVL_OPTIMAL))
13075 if (TARGET_SINGLE_STRINGOP)
13076 emit_insn (gen_cld ());
13078 countreg2 = gen_reg_rtx (Pmode);
13079 countreg = copy_to_mode_reg (counter_mode, count_exp);
13081 /* We don't use loops to align destination and to copy parts smaller
13082 than 4 bytes, because gcc is able to optimize such code better (in
13083 the case the destination or the count really is aligned, gcc is often
13084 able to predict the branches) and also it is friendlier to the
13085 hardware branch prediction.
13087 Using loops is beneficial for generic case, because we can
13088 handle small counts using the loops. Many CPUs (such as Athlon)
13089 have large REP prefix setup costs.
13091 This is quite costly. Maybe we can revisit this decision later or
13092 add some customizability to this code. */
13094 if (count == 0 && align < desired_alignment)
13096 label = gen_label_rtx ();
13097 emit_cmp_and_jump_insns (countreg, GEN_INT (desired_alignment - 1),
13098 LEU, 0, counter_mode, 1, label);
13102 rtx label = ix86_expand_aligntest (destreg, 1);
13103 srcmem = change_address (src, QImode, srcreg);
13104 dstmem = change_address (dst, QImode, destreg);
13105 emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
13106 ix86_adjust_counter (countreg, 1);
13107 emit_label (label);
13108 LABEL_NUSES (label) = 1;
13112 rtx label = ix86_expand_aligntest (destreg, 2);
13113 srcmem = change_address (src, HImode, srcreg);
13114 dstmem = change_address (dst, HImode, destreg);
13115 emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
13116 ix86_adjust_counter (countreg, 2);
13117 emit_label (label);
13118 LABEL_NUSES (label) = 1;
13120 if (align <= 4 && desired_alignment > 4)
13122 rtx label = ix86_expand_aligntest (destreg, 4);
13123 srcmem = change_address (src, SImode, srcreg);
13124 dstmem = change_address (dst, SImode, destreg);
13125 emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
13126 ix86_adjust_counter (countreg, 4);
13127 emit_label (label);
13128 LABEL_NUSES (label) = 1;
13131 if (label && desired_alignment > 4 && !TARGET_64BIT)
13133 emit_label (label);
13134 LABEL_NUSES (label) = 1;
13137 if (!TARGET_SINGLE_STRINGOP)
13138 emit_insn (gen_cld ());
13141 emit_insn (gen_lshrdi3 (countreg2, ix86_zero_extend_to_Pmode (countreg),
13143 destexp = gen_rtx_ASHIFT (Pmode, countreg2, GEN_INT (3));
13147 emit_insn (gen_lshrsi3 (countreg2, countreg, const2_rtx));
13148 destexp = gen_rtx_ASHIFT (Pmode, countreg2, const2_rtx);
13150 srcexp = gen_rtx_PLUS (Pmode, destexp, srcreg);
13151 destexp = gen_rtx_PLUS (Pmode, destexp, destreg);
13152 emit_insn (gen_rep_mov (destreg, dst, srcreg, src,
13153 countreg2, destexp, srcexp));
13157 emit_label (label);
13158 LABEL_NUSES (label) = 1;
13160 if (TARGET_64BIT && align > 4 && count != 0 && (count & 4))
13162 srcmem = change_address (src, SImode, srcreg);
13163 dstmem = change_address (dst, SImode, destreg);
13164 emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
13166 if ((align <= 4 || count == 0) && TARGET_64BIT)
13168 rtx label = ix86_expand_aligntest (countreg, 4);
13169 srcmem = change_address (src, SImode, srcreg);
13170 dstmem = change_address (dst, SImode, destreg);
13171 emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
13172 emit_label (label);
13173 LABEL_NUSES (label) = 1;
13175 if (align > 2 && count != 0 && (count & 2))
13177 srcmem = change_address (src, HImode, srcreg);
13178 dstmem = change_address (dst, HImode, destreg);
13179 emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
13181 if (align <= 2 || count == 0)
13183 rtx label = ix86_expand_aligntest (countreg, 2);
13184 srcmem = change_address (src, HImode, srcreg);
13185 dstmem = change_address (dst, HImode, destreg);
13186 emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
13187 emit_label (label);
13188 LABEL_NUSES (label) = 1;
13190 if (align > 1 && count != 0 && (count & 1))
13192 srcmem = change_address (src, QImode, srcreg);
13193 dstmem = change_address (dst, QImode, destreg);
13194 emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
13196 if (align <= 1 || count == 0)
13198 rtx label = ix86_expand_aligntest (countreg, 1);
13199 srcmem = change_address (src, QImode, srcreg);
13200 dstmem = change_address (dst, QImode, destreg);
13201 emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
13202 emit_label (label);
13203 LABEL_NUSES (label) = 1;
13210 /* Expand string clear operation (bzero). Use i386 string operations when
13211 profitable. expand_movmem contains similar code. */
13213 ix86_expand_clrmem (rtx dst, rtx count_exp, rtx align_exp)
13215 rtx destreg, zeroreg, countreg, destexp;
13216 enum machine_mode counter_mode;
13217 HOST_WIDE_INT align = 0;
13218 unsigned HOST_WIDE_INT count = 0;
13220 if (GET_CODE (align_exp) == CONST_INT)
13221 align = INTVAL (align_exp);
13223 /* Can't use any of this if the user has appropriated esi. */
13224 if (global_regs[4])
13227 /* This simple hack avoids all inlining code and simplifies code below. */
13228 if (!TARGET_ALIGN_STRINGOPS)
13231 if (GET_CODE (count_exp) == CONST_INT)
13233 count = INTVAL (count_exp);
13234 if (!TARGET_INLINE_ALL_STRINGOPS && count > 64)
13237 /* Figure out proper mode for counter. For 32bits it is always SImode,
13238 for 64bits use SImode when possible, otherwise DImode.
13239 Set count to number of bytes copied when known at compile time. */
13241 || GET_MODE (count_exp) == SImode
13242 || x86_64_zext_immediate_operand (count_exp, VOIDmode))
13243 counter_mode = SImode;
13245 counter_mode = DImode;
13247 destreg = copy_to_mode_reg (Pmode, XEXP (dst, 0));
13248 if (destreg != XEXP (dst, 0))
13249 dst = replace_equiv_address_nv (dst, destreg);
13252 /* When optimizing for size emit simple rep ; movsb instruction for
13253 counts not divisible by 4. The movl $N, %ecx; rep; stosb
13254 sequence is 7 bytes long, so if optimizing for size and count is
13255 small enough that some stosl, stosw and stosb instructions without
13256 rep are shorter, fall back into the next if. */
13258 if ((!optimize || optimize_size)
13261 && (!optimize_size || (count & 0x03) + (count >> 2) > 7))))
13263 emit_insn (gen_cld ());
13265 countreg = ix86_zero_extend_to_Pmode (count_exp);
13266 zeroreg = copy_to_mode_reg (QImode, const0_rtx);
13267 destexp = gen_rtx_PLUS (Pmode, destreg, countreg);
13268 emit_insn (gen_rep_stos (destreg, countreg, dst, zeroreg, destexp));
13270 else if (count != 0
13272 || (!TARGET_PENTIUMPRO && !TARGET_64BIT && align >= 4)
13273 || optimize_size || count < (unsigned int) 64))
13275 int size = TARGET_64BIT && !optimize_size ? 8 : 4;
13276 unsigned HOST_WIDE_INT offset = 0;
13278 emit_insn (gen_cld ());
13280 zeroreg = copy_to_mode_reg (size == 4 ? SImode : DImode, const0_rtx);
13281 if (count & ~(size - 1))
13283 unsigned HOST_WIDE_INT repcount;
13284 unsigned int max_nonrep;
13286 repcount = count >> (size == 4 ? 2 : 3);
13288 repcount &= 0x3fffffff;
13290 /* movl $N, %ecx; rep; stosl is 7 bytes, while N x stosl is N bytes.
13291 movl $N, %ecx; rep; stosq is 8 bytes, while N x stosq is 2xN
13292 bytes. In both cases the latter seems to be faster for small
13294 max_nonrep = size == 4 ? 7 : 4;
13295 if (!optimize_size)
13298 case PROCESSOR_PENTIUM4:
13299 case PROCESSOR_NOCONA:
13306 if (repcount <= max_nonrep)
13307 while (repcount-- > 0)
13309 rtx mem = adjust_automodify_address_nv (dst,
13310 GET_MODE (zeroreg),
13312 emit_insn (gen_strset (destreg, mem, zeroreg));
13317 countreg = copy_to_mode_reg (counter_mode, GEN_INT (repcount));
13318 countreg = ix86_zero_extend_to_Pmode (countreg);
13319 destexp = gen_rtx_ASHIFT (Pmode, countreg,
13320 GEN_INT (size == 4 ? 2 : 3));
13321 destexp = gen_rtx_PLUS (Pmode, destexp, destreg);
13322 emit_insn (gen_rep_stos (destreg, countreg, dst, zeroreg,
13324 offset = count & ~(size - 1);
13327 if (size == 8 && (count & 0x04))
13329 rtx mem = adjust_automodify_address_nv (dst, SImode, destreg,
13331 emit_insn (gen_strset (destreg, mem,
13332 gen_rtx_SUBREG (SImode, zeroreg, 0)));
13337 rtx mem = adjust_automodify_address_nv (dst, HImode, destreg,
13339 emit_insn (gen_strset (destreg, mem,
13340 gen_rtx_SUBREG (HImode, zeroreg, 0)));
13345 rtx mem = adjust_automodify_address_nv (dst, QImode, destreg,
13347 emit_insn (gen_strset (destreg, mem,
13348 gen_rtx_SUBREG (QImode, zeroreg, 0)));
13355 /* Compute desired alignment of the string operation. */
13356 int desired_alignment = (TARGET_PENTIUMPRO
13357 && (count == 0 || count >= (unsigned int) 260)
13358 ? 8 : UNITS_PER_WORD);
13360 /* In case we don't know anything about the alignment, default to
13361 library version, since it is usually equally fast and result in
13364 Also emit call when we know that the count is large and call overhead
13365 will not be important. */
13366 if (!TARGET_INLINE_ALL_STRINGOPS
13367 && (align < UNITS_PER_WORD || !TARGET_REP_MOVL_OPTIMAL))
13370 if (TARGET_SINGLE_STRINGOP)
13371 emit_insn (gen_cld ());
13373 countreg2 = gen_reg_rtx (Pmode);
13374 countreg = copy_to_mode_reg (counter_mode, count_exp);
13375 zeroreg = copy_to_mode_reg (Pmode, const0_rtx);
13376 /* Get rid of MEM_OFFSET, it won't be accurate. */
13377 dst = change_address (dst, BLKmode, destreg);
13379 if (count == 0 && align < desired_alignment)
13381 label = gen_label_rtx ();
13382 emit_cmp_and_jump_insns (countreg, GEN_INT (desired_alignment - 1),
13383 LEU, 0, counter_mode, 1, label);
13387 rtx label = ix86_expand_aligntest (destreg, 1);
13388 emit_insn (gen_strset (destreg, dst,
13389 gen_rtx_SUBREG (QImode, zeroreg, 0)));
13390 ix86_adjust_counter (countreg, 1);
13391 emit_label (label);
13392 LABEL_NUSES (label) = 1;
13396 rtx label = ix86_expand_aligntest (destreg, 2);
13397 emit_insn (gen_strset (destreg, dst,
13398 gen_rtx_SUBREG (HImode, zeroreg, 0)));
13399 ix86_adjust_counter (countreg, 2);
13400 emit_label (label);
13401 LABEL_NUSES (label) = 1;
13403 if (align <= 4 && desired_alignment > 4)
13405 rtx label = ix86_expand_aligntest (destreg, 4);
13406 emit_insn (gen_strset (destreg, dst,
13408 ? gen_rtx_SUBREG (SImode, zeroreg, 0)
13410 ix86_adjust_counter (countreg, 4);
13411 emit_label (label);
13412 LABEL_NUSES (label) = 1;
13415 if (label && desired_alignment > 4 && !TARGET_64BIT)
13417 emit_label (label);
13418 LABEL_NUSES (label) = 1;
13422 if (!TARGET_SINGLE_STRINGOP)
13423 emit_insn (gen_cld ());
13426 emit_insn (gen_lshrdi3 (countreg2, ix86_zero_extend_to_Pmode (countreg),
13428 destexp = gen_rtx_ASHIFT (Pmode, countreg2, GEN_INT (3));
13432 emit_insn (gen_lshrsi3 (countreg2, countreg, const2_rtx));
13433 destexp = gen_rtx_ASHIFT (Pmode, countreg2, const2_rtx);
13435 destexp = gen_rtx_PLUS (Pmode, destexp, destreg);
13436 emit_insn (gen_rep_stos (destreg, countreg2, dst, zeroreg, destexp));
13440 emit_label (label);
13441 LABEL_NUSES (label) = 1;
13444 if (TARGET_64BIT && align > 4 && count != 0 && (count & 4))
13445 emit_insn (gen_strset (destreg, dst,
13446 gen_rtx_SUBREG (SImode, zeroreg, 0)));
13447 if (TARGET_64BIT && (align <= 4 || count == 0))
13449 rtx label = ix86_expand_aligntest (countreg, 4);
13450 emit_insn (gen_strset (destreg, dst,
13451 gen_rtx_SUBREG (SImode, zeroreg, 0)));
13452 emit_label (label);
13453 LABEL_NUSES (label) = 1;
13455 if (align > 2 && count != 0 && (count & 2))
13456 emit_insn (gen_strset (destreg, dst,
13457 gen_rtx_SUBREG (HImode, zeroreg, 0)));
13458 if (align <= 2 || count == 0)
13460 rtx label = ix86_expand_aligntest (countreg, 2);
13461 emit_insn (gen_strset (destreg, dst,
13462 gen_rtx_SUBREG (HImode, zeroreg, 0)));
13463 emit_label (label);
13464 LABEL_NUSES (label) = 1;
13466 if (align > 1 && count != 0 && (count & 1))
13467 emit_insn (gen_strset (destreg, dst,
13468 gen_rtx_SUBREG (QImode, zeroreg, 0)));
13469 if (align <= 1 || count == 0)
13471 rtx label = ix86_expand_aligntest (countreg, 1);
13472 emit_insn (gen_strset (destreg, dst,
13473 gen_rtx_SUBREG (QImode, zeroreg, 0)));
13474 emit_label (label);
13475 LABEL_NUSES (label) = 1;
13481 /* Expand strlen. */
13483 ix86_expand_strlen (rtx out, rtx src, rtx eoschar, rtx align)
13485 rtx addr, scratch1, scratch2, scratch3, scratch4;
13487 /* The generic case of strlen expander is long. Avoid it's
13488 expanding unless TARGET_INLINE_ALL_STRINGOPS. */
13490 if (TARGET_UNROLL_STRLEN && eoschar == const0_rtx && optimize > 1
13491 && !TARGET_INLINE_ALL_STRINGOPS
13493 && (GET_CODE (align) != CONST_INT || INTVAL (align) < 4))
13496 addr = force_reg (Pmode, XEXP (src, 0));
13497 scratch1 = gen_reg_rtx (Pmode);
13499 if (TARGET_UNROLL_STRLEN && eoschar == const0_rtx && optimize > 1
13502 /* Well it seems that some optimizer does not combine a call like
13503 foo(strlen(bar), strlen(bar));
13504 when the move and the subtraction is done here. It does calculate
13505 the length just once when these instructions are done inside of
13506 output_strlen_unroll(). But I think since &bar[strlen(bar)] is
13507 often used and I use one fewer register for the lifetime of
13508 output_strlen_unroll() this is better. */
13510 emit_move_insn (out, addr);
13512 ix86_expand_strlensi_unroll_1 (out, src, align);
13514 /* strlensi_unroll_1 returns the address of the zero at the end of
13515 the string, like memchr(), so compute the length by subtracting
13516 the start address. */
13518 emit_insn (gen_subdi3 (out, out, addr));
13520 emit_insn (gen_subsi3 (out, out, addr));
13525 scratch2 = gen_reg_rtx (Pmode);
13526 scratch3 = gen_reg_rtx (Pmode);
13527 scratch4 = force_reg (Pmode, constm1_rtx);
13529 emit_move_insn (scratch3, addr);
13530 eoschar = force_reg (QImode, eoschar);
13532 emit_insn (gen_cld ());
13533 src = replace_equiv_address_nv (src, scratch3);
13535 /* If .md starts supporting :P, this can be done in .md. */
13536 unspec = gen_rtx_UNSPEC (Pmode, gen_rtvec (4, src, eoschar, align,
13537 scratch4), UNSPEC_SCAS);
13538 emit_insn (gen_strlenqi_1 (scratch1, scratch3, unspec));
13541 emit_insn (gen_one_cmpldi2 (scratch2, scratch1));
13542 emit_insn (gen_adddi3 (out, scratch2, constm1_rtx));
13546 emit_insn (gen_one_cmplsi2 (scratch2, scratch1));
13547 emit_insn (gen_addsi3 (out, scratch2, constm1_rtx));
13553 /* Expand the appropriate insns for doing strlen if not just doing
13556 out = result, initialized with the start address
13557 align_rtx = alignment of the address.
13558 scratch = scratch register, initialized with the startaddress when
13559 not aligned, otherwise undefined
13561 This is just the body. It needs the initializations mentioned above and
13562 some address computing at the end. These things are done in i386.md. */
13565 ix86_expand_strlensi_unroll_1 (rtx out, rtx src, rtx align_rtx)
13569 rtx align_2_label = NULL_RTX;
13570 rtx align_3_label = NULL_RTX;
13571 rtx align_4_label = gen_label_rtx ();
13572 rtx end_0_label = gen_label_rtx ();
13574 rtx tmpreg = gen_reg_rtx (SImode);
13575 rtx scratch = gen_reg_rtx (SImode);
13579 if (GET_CODE (align_rtx) == CONST_INT)
13580 align = INTVAL (align_rtx);
13582 /* Loop to check 1..3 bytes for null to get an aligned pointer. */
13584 /* Is there a known alignment and is it less than 4? */
13587 rtx scratch1 = gen_reg_rtx (Pmode);
13588 emit_move_insn (scratch1, out);
13589 /* Is there a known alignment and is it not 2? */
13592 align_3_label = gen_label_rtx (); /* Label when aligned to 3-byte */
13593 align_2_label = gen_label_rtx (); /* Label when aligned to 2-byte */
13595 /* Leave just the 3 lower bits. */
13596 align_rtx = expand_binop (Pmode, and_optab, scratch1, GEN_INT (3),
13597 NULL_RTX, 0, OPTAB_WIDEN);
13599 emit_cmp_and_jump_insns (align_rtx, const0_rtx, EQ, NULL,
13600 Pmode, 1, align_4_label);
13601 emit_cmp_and_jump_insns (align_rtx, const2_rtx, EQ, NULL,
13602 Pmode, 1, align_2_label);
13603 emit_cmp_and_jump_insns (align_rtx, const2_rtx, GTU, NULL,
13604 Pmode, 1, align_3_label);
13608 /* Since the alignment is 2, we have to check 2 or 0 bytes;
13609 check if is aligned to 4 - byte. */
13611 align_rtx = expand_binop (Pmode, and_optab, scratch1, const2_rtx,
13612 NULL_RTX, 0, OPTAB_WIDEN);
13614 emit_cmp_and_jump_insns (align_rtx, const0_rtx, EQ, NULL,
13615 Pmode, 1, align_4_label);
13618 mem = change_address (src, QImode, out);
13620 /* Now compare the bytes. */
13622 /* Compare the first n unaligned byte on a byte per byte basis. */
13623 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL,
13624 QImode, 1, end_0_label);
13626 /* Increment the address. */
13628 emit_insn (gen_adddi3 (out, out, const1_rtx));
13630 emit_insn (gen_addsi3 (out, out, const1_rtx));
13632 /* Not needed with an alignment of 2 */
13635 emit_label (align_2_label);
13637 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL, QImode, 1,
13641 emit_insn (gen_adddi3 (out, out, const1_rtx));
13643 emit_insn (gen_addsi3 (out, out, const1_rtx));
13645 emit_label (align_3_label);
13648 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL, QImode, 1,
13652 emit_insn (gen_adddi3 (out, out, const1_rtx));
13654 emit_insn (gen_addsi3 (out, out, const1_rtx));
13657 /* Generate loop to check 4 bytes at a time. It is not a good idea to
13658 align this loop. It gives only huge programs, but does not help to
13660 emit_label (align_4_label);
13662 mem = change_address (src, SImode, out);
13663 emit_move_insn (scratch, mem);
13665 emit_insn (gen_adddi3 (out, out, GEN_INT (4)));
13667 emit_insn (gen_addsi3 (out, out, GEN_INT (4)));
13669 /* This formula yields a nonzero result iff one of the bytes is zero.
13670 This saves three branches inside loop and many cycles. */
13672 emit_insn (gen_addsi3 (tmpreg, scratch, GEN_INT (-0x01010101)));
13673 emit_insn (gen_one_cmplsi2 (scratch, scratch));
13674 emit_insn (gen_andsi3 (tmpreg, tmpreg, scratch));
13675 emit_insn (gen_andsi3 (tmpreg, tmpreg,
13676 gen_int_mode (0x80808080, SImode)));
13677 emit_cmp_and_jump_insns (tmpreg, const0_rtx, EQ, 0, SImode, 1,
13682 rtx reg = gen_reg_rtx (SImode);
13683 rtx reg2 = gen_reg_rtx (Pmode);
13684 emit_move_insn (reg, tmpreg);
13685 emit_insn (gen_lshrsi3 (reg, reg, GEN_INT (16)));
13687 /* If zero is not in the first two bytes, move two bytes forward. */
13688 emit_insn (gen_testsi_ccno_1 (tmpreg, GEN_INT (0x8080)));
13689 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
13690 tmp = gen_rtx_EQ (VOIDmode, tmp, const0_rtx);
13691 emit_insn (gen_rtx_SET (VOIDmode, tmpreg,
13692 gen_rtx_IF_THEN_ELSE (SImode, tmp,
13695 /* Emit lea manually to avoid clobbering of flags. */
13696 emit_insn (gen_rtx_SET (SImode, reg2,
13697 gen_rtx_PLUS (Pmode, out, const2_rtx)));
13699 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
13700 tmp = gen_rtx_EQ (VOIDmode, tmp, const0_rtx);
13701 emit_insn (gen_rtx_SET (VOIDmode, out,
13702 gen_rtx_IF_THEN_ELSE (Pmode, tmp,
13709 rtx end_2_label = gen_label_rtx ();
13710 /* Is zero in the first two bytes? */
13712 emit_insn (gen_testsi_ccno_1 (tmpreg, GEN_INT (0x8080)));
13713 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
13714 tmp = gen_rtx_NE (VOIDmode, tmp, const0_rtx);
13715 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
13716 gen_rtx_LABEL_REF (VOIDmode, end_2_label),
13718 tmp = emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
13719 JUMP_LABEL (tmp) = end_2_label;
13721 /* Not in the first two. Move two bytes forward. */
13722 emit_insn (gen_lshrsi3 (tmpreg, tmpreg, GEN_INT (16)));
13724 emit_insn (gen_adddi3 (out, out, const2_rtx));
13726 emit_insn (gen_addsi3 (out, out, const2_rtx));
13728 emit_label (end_2_label);
13732 /* Avoid branch in fixing the byte. */
13733 tmpreg = gen_lowpart (QImode, tmpreg);
13734 emit_insn (gen_addqi3_cc (tmpreg, tmpreg, tmpreg));
13735 cmp = gen_rtx_LTU (Pmode, gen_rtx_REG (CCmode, 17), const0_rtx);
13737 emit_insn (gen_subdi3_carry_rex64 (out, out, GEN_INT (3), cmp));
13739 emit_insn (gen_subsi3_carry (out, out, GEN_INT (3), cmp));
13741 emit_label (end_0_label);
13745 ix86_expand_call (rtx retval, rtx fnaddr, rtx callarg1,
13746 rtx callarg2 ATTRIBUTE_UNUSED,
13747 rtx pop, int sibcall)
13749 rtx use = NULL, call;
13751 if (pop == const0_rtx)
13753 gcc_assert (!TARGET_64BIT || !pop);
13755 if (TARGET_MACHO && !TARGET_64BIT)
13758 if (flag_pic && GET_CODE (XEXP (fnaddr, 0)) == SYMBOL_REF)
13759 fnaddr = machopic_indirect_call_target (fnaddr);
13764 /* Static functions and indirect calls don't need the pic register. */
13765 if (! TARGET_64BIT && flag_pic
13766 && GET_CODE (XEXP (fnaddr, 0)) == SYMBOL_REF
13767 && ! SYMBOL_REF_LOCAL_P (XEXP (fnaddr, 0)))
13768 use_reg (&use, pic_offset_table_rtx);
13771 if (TARGET_64BIT && INTVAL (callarg2) >= 0)
13773 rtx al = gen_rtx_REG (QImode, 0);
13774 emit_move_insn (al, callarg2);
13775 use_reg (&use, al);
13778 if (! call_insn_operand (XEXP (fnaddr, 0), Pmode))
13780 fnaddr = copy_to_mode_reg (Pmode, XEXP (fnaddr, 0));
13781 fnaddr = gen_rtx_MEM (QImode, fnaddr);
13783 if (sibcall && TARGET_64BIT
13784 && !constant_call_address_operand (XEXP (fnaddr, 0), Pmode))
13787 addr = copy_to_mode_reg (Pmode, XEXP (fnaddr, 0));
13788 fnaddr = gen_rtx_REG (Pmode, FIRST_REX_INT_REG + 3 /* R11 */);
13789 emit_move_insn (fnaddr, addr);
13790 fnaddr = gen_rtx_MEM (QImode, fnaddr);
13793 call = gen_rtx_CALL (VOIDmode, fnaddr, callarg1);
13795 call = gen_rtx_SET (VOIDmode, retval, call);
13798 pop = gen_rtx_PLUS (Pmode, stack_pointer_rtx, pop);
13799 pop = gen_rtx_SET (VOIDmode, stack_pointer_rtx, pop);
13800 call = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, call, pop));
13803 call = emit_call_insn (call);
13805 CALL_INSN_FUNCTION_USAGE (call) = use;
13809 /* Clear stack slot assignments remembered from previous functions.
13810 This is called from INIT_EXPANDERS once before RTL is emitted for each
13813 static struct machine_function *
13814 ix86_init_machine_status (void)
13816 struct machine_function *f;
13818 f = ggc_alloc_cleared (sizeof (struct machine_function));
13819 f->use_fast_prologue_epilogue_nregs = -1;
13820 f->tls_descriptor_call_expanded_p = 0;
13825 /* Return a MEM corresponding to a stack slot with mode MODE.
13826 Allocate a new slot if necessary.
13828 The RTL for a function can have several slots available: N is
13829 which slot to use. */
13832 assign_386_stack_local (enum machine_mode mode, enum ix86_stack_slot n)
13834 struct stack_local_entry *s;
13836 gcc_assert (n < MAX_386_STACK_LOCALS);
13838 /* Virtual slot is valid only before vregs are instantiated. */
13839 gcc_assert ((n == SLOT_VIRTUAL) == !virtuals_instantiated);
13841 for (s = ix86_stack_locals; s; s = s->next)
13842 if (s->mode == mode && s->n == n)
13845 s = (struct stack_local_entry *)
13846 ggc_alloc (sizeof (struct stack_local_entry));
13849 s->rtl = assign_stack_local (mode, GET_MODE_SIZE (mode), 0);
13851 s->next = ix86_stack_locals;
13852 ix86_stack_locals = s;
13856 /* Construct the SYMBOL_REF for the tls_get_addr function. */
13858 static GTY(()) rtx ix86_tls_symbol;
13860 ix86_tls_get_addr (void)
13863 if (!ix86_tls_symbol)
13865 ix86_tls_symbol = gen_rtx_SYMBOL_REF (Pmode,
13866 (TARGET_ANY_GNU_TLS
13868 ? "___tls_get_addr"
13869 : "__tls_get_addr");
13872 return ix86_tls_symbol;
13875 /* Construct the SYMBOL_REF for the _TLS_MODULE_BASE_ symbol. */
13877 static GTY(()) rtx ix86_tls_module_base_symbol;
13879 ix86_tls_module_base (void)
13882 if (!ix86_tls_module_base_symbol)
13884 ix86_tls_module_base_symbol = gen_rtx_SYMBOL_REF (Pmode,
13885 "_TLS_MODULE_BASE_");
13886 SYMBOL_REF_FLAGS (ix86_tls_module_base_symbol)
13887 |= TLS_MODEL_GLOBAL_DYNAMIC << SYMBOL_FLAG_TLS_SHIFT;
13890 return ix86_tls_module_base_symbol;
13893 /* Calculate the length of the memory address in the instruction
13894 encoding. Does not include the one-byte modrm, opcode, or prefix. */
13897 memory_address_length (rtx addr)
13899 struct ix86_address parts;
13900 rtx base, index, disp;
13904 if (GET_CODE (addr) == PRE_DEC
13905 || GET_CODE (addr) == POST_INC
13906 || GET_CODE (addr) == PRE_MODIFY
13907 || GET_CODE (addr) == POST_MODIFY)
13910 ok = ix86_decompose_address (addr, &parts);
13913 if (parts.base && GET_CODE (parts.base) == SUBREG)
13914 parts.base = SUBREG_REG (parts.base);
13915 if (parts.index && GET_CODE (parts.index) == SUBREG)
13916 parts.index = SUBREG_REG (parts.index);
13919 index = parts.index;
13924 - esp as the base always wants an index,
13925 - ebp as the base always wants a displacement. */
13927 /* Register Indirect. */
13928 if (base && !index && !disp)
13930 /* esp (for its index) and ebp (for its displacement) need
13931 the two-byte modrm form. */
13932 if (addr == stack_pointer_rtx
13933 || addr == arg_pointer_rtx
13934 || addr == frame_pointer_rtx
13935 || addr == hard_frame_pointer_rtx)
13939 /* Direct Addressing. */
13940 else if (disp && !base && !index)
13945 /* Find the length of the displacement constant. */
13948 if (base && satisfies_constraint_K (disp))
13953 /* ebp always wants a displacement. */
13954 else if (base == hard_frame_pointer_rtx)
13957 /* An index requires the two-byte modrm form.... */
13959 /* ...like esp, which always wants an index. */
13960 || base == stack_pointer_rtx
13961 || base == arg_pointer_rtx
13962 || base == frame_pointer_rtx)
13969 /* Compute default value for "length_immediate" attribute. When SHORTFORM
13970 is set, expect that insn have 8bit immediate alternative. */
13972 ix86_attr_length_immediate_default (rtx insn, int shortform)
13976 extract_insn_cached (insn);
13977 for (i = recog_data.n_operands - 1; i >= 0; --i)
13978 if (CONSTANT_P (recog_data.operand[i]))
13981 if (shortform && satisfies_constraint_K (recog_data.operand[i]))
13985 switch (get_attr_mode (insn))
13996 /* Immediates for DImode instructions are encoded as 32bit sign extended values. */
14001 fatal_insn ("unknown insn mode", insn);
14007 /* Compute default value for "length_address" attribute. */
14009 ix86_attr_length_address_default (rtx insn)
14013 if (get_attr_type (insn) == TYPE_LEA)
14015 rtx set = PATTERN (insn);
14017 if (GET_CODE (set) == PARALLEL)
14018 set = XVECEXP (set, 0, 0);
14020 gcc_assert (GET_CODE (set) == SET);
14022 return memory_address_length (SET_SRC (set));
14025 extract_insn_cached (insn);
14026 for (i = recog_data.n_operands - 1; i >= 0; --i)
14027 if (GET_CODE (recog_data.operand[i]) == MEM)
14029 return memory_address_length (XEXP (recog_data.operand[i], 0));
14035 /* Return the maximum number of instructions a cpu can issue. */
14038 ix86_issue_rate (void)
14042 case PROCESSOR_PENTIUM:
14046 case PROCESSOR_PENTIUMPRO:
14047 case PROCESSOR_PENTIUM4:
14048 case PROCESSOR_ATHLON:
14050 case PROCESSOR_AMDFAM10:
14051 case PROCESSOR_NOCONA:
14052 case PROCESSOR_GENERIC32:
14053 case PROCESSOR_GENERIC64:
14056 case PROCESSOR_CORE2:
14064 /* A subroutine of ix86_adjust_cost -- return true iff INSN reads flags set
14065 by DEP_INSN and nothing set by DEP_INSN. */
14068 ix86_flags_dependent (rtx insn, rtx dep_insn, enum attr_type insn_type)
14072 /* Simplify the test for uninteresting insns. */
14073 if (insn_type != TYPE_SETCC
14074 && insn_type != TYPE_ICMOV
14075 && insn_type != TYPE_FCMOV
14076 && insn_type != TYPE_IBR)
14079 if ((set = single_set (dep_insn)) != 0)
14081 set = SET_DEST (set);
14084 else if (GET_CODE (PATTERN (dep_insn)) == PARALLEL
14085 && XVECLEN (PATTERN (dep_insn), 0) == 2
14086 && GET_CODE (XVECEXP (PATTERN (dep_insn), 0, 0)) == SET
14087 && GET_CODE (XVECEXP (PATTERN (dep_insn), 0, 1)) == SET)
14089 set = SET_DEST (XVECEXP (PATTERN (dep_insn), 0, 0));
14090 set2 = SET_DEST (XVECEXP (PATTERN (dep_insn), 0, 0));
14095 if (GET_CODE (set) != REG || REGNO (set) != FLAGS_REG)
14098 /* This test is true if the dependent insn reads the flags but
14099 not any other potentially set register. */
14100 if (!reg_overlap_mentioned_p (set, PATTERN (insn)))
14103 if (set2 && reg_overlap_mentioned_p (set2, PATTERN (insn)))
14109 /* A subroutine of ix86_adjust_cost -- return true iff INSN has a memory
14110 address with operands set by DEP_INSN. */
14113 ix86_agi_dependent (rtx insn, rtx dep_insn, enum attr_type insn_type)
14117 if (insn_type == TYPE_LEA
14120 addr = PATTERN (insn);
14122 if (GET_CODE (addr) == PARALLEL)
14123 addr = XVECEXP (addr, 0, 0);
14125 gcc_assert (GET_CODE (addr) == SET);
14127 addr = SET_SRC (addr);
14132 extract_insn_cached (insn);
14133 for (i = recog_data.n_operands - 1; i >= 0; --i)
14134 if (GET_CODE (recog_data.operand[i]) == MEM)
14136 addr = XEXP (recog_data.operand[i], 0);
14143 return modified_in_p (addr, dep_insn);
14147 ix86_adjust_cost (rtx insn, rtx link, rtx dep_insn, int cost)
14149 enum attr_type insn_type, dep_insn_type;
14150 enum attr_memory memory;
14152 int dep_insn_code_number;
14154 /* Anti and output dependencies have zero cost on all CPUs. */
14155 if (REG_NOTE_KIND (link) != 0)
14158 dep_insn_code_number = recog_memoized (dep_insn);
14160 /* If we can't recognize the insns, we can't really do anything. */
14161 if (dep_insn_code_number < 0 || recog_memoized (insn) < 0)
14164 insn_type = get_attr_type (insn);
14165 dep_insn_type = get_attr_type (dep_insn);
14169 case PROCESSOR_PENTIUM:
14170 /* Address Generation Interlock adds a cycle of latency. */
14171 if (ix86_agi_dependent (insn, dep_insn, insn_type))
14174 /* ??? Compares pair with jump/setcc. */
14175 if (ix86_flags_dependent (insn, dep_insn, insn_type))
14178 /* Floating point stores require value to be ready one cycle earlier. */
14179 if (insn_type == TYPE_FMOV
14180 && get_attr_memory (insn) == MEMORY_STORE
14181 && !ix86_agi_dependent (insn, dep_insn, insn_type))
14185 case PROCESSOR_PENTIUMPRO:
14186 memory = get_attr_memory (insn);
14188 /* INT->FP conversion is expensive. */
14189 if (get_attr_fp_int_src (dep_insn))
14192 /* There is one cycle extra latency between an FP op and a store. */
14193 if (insn_type == TYPE_FMOV
14194 && (set = single_set (dep_insn)) != NULL_RTX
14195 && (set2 = single_set (insn)) != NULL_RTX
14196 && rtx_equal_p (SET_DEST (set), SET_SRC (set2))
14197 && GET_CODE (SET_DEST (set2)) == MEM)
14200 /* Show ability of reorder buffer to hide latency of load by executing
14201 in parallel with previous instruction in case
14202 previous instruction is not needed to compute the address. */
14203 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
14204 && !ix86_agi_dependent (insn, dep_insn, insn_type))
14206 /* Claim moves to take one cycle, as core can issue one load
14207 at time and the next load can start cycle later. */
14208 if (dep_insn_type == TYPE_IMOV
14209 || dep_insn_type == TYPE_FMOV)
14217 memory = get_attr_memory (insn);
14219 /* The esp dependency is resolved before the instruction is really
14221 if ((insn_type == TYPE_PUSH || insn_type == TYPE_POP)
14222 && (dep_insn_type == TYPE_PUSH || dep_insn_type == TYPE_POP))
14225 /* INT->FP conversion is expensive. */
14226 if (get_attr_fp_int_src (dep_insn))
14229 /* Show ability of reorder buffer to hide latency of load by executing
14230 in parallel with previous instruction in case
14231 previous instruction is not needed to compute the address. */
14232 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
14233 && !ix86_agi_dependent (insn, dep_insn, insn_type))
14235 /* Claim moves to take one cycle, as core can issue one load
14236 at time and the next load can start cycle later. */
14237 if (dep_insn_type == TYPE_IMOV
14238 || dep_insn_type == TYPE_FMOV)
14247 case PROCESSOR_ATHLON:
14249 case PROCESSOR_AMDFAM10:
14250 case PROCESSOR_GENERIC32:
14251 case PROCESSOR_GENERIC64:
14252 memory = get_attr_memory (insn);
14254 /* Show ability of reorder buffer to hide latency of load by executing
14255 in parallel with previous instruction in case
14256 previous instruction is not needed to compute the address. */
14257 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
14258 && !ix86_agi_dependent (insn, dep_insn, insn_type))
14260 enum attr_unit unit = get_attr_unit (insn);
14263 /* Because of the difference between the length of integer and
14264 floating unit pipeline preparation stages, the memory operands
14265 for floating point are cheaper.
14267 ??? For Athlon it the difference is most probably 2. */
14268 if (unit == UNIT_INTEGER || unit == UNIT_UNKNOWN)
14271 loadcost = TARGET_ATHLON ? 2 : 0;
14273 if (cost >= loadcost)
14286 /* How many alternative schedules to try. This should be as wide as the
14287 scheduling freedom in the DFA, but no wider. Making this value too
14288 large results extra work for the scheduler. */
14291 ia32_multipass_dfa_lookahead (void)
14293 if (ix86_tune == PROCESSOR_PENTIUM)
14296 if (ix86_tune == PROCESSOR_PENTIUMPRO
14297 || ix86_tune == PROCESSOR_K6)
14305 /* Compute the alignment given to a constant that is being placed in memory.
14306 EXP is the constant and ALIGN is the alignment that the object would
14308 The value of this function is used instead of that alignment to align
14312 ix86_constant_alignment (tree exp, int align)
14314 if (TREE_CODE (exp) == REAL_CST)
14316 if (TYPE_MODE (TREE_TYPE (exp)) == DFmode && align < 64)
14318 else if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (exp))) && align < 128)
14321 else if (!optimize_size && TREE_CODE (exp) == STRING_CST
14322 && !TARGET_NO_ALIGN_LONG_STRINGS
14323 && TREE_STRING_LENGTH (exp) >= 31 && align < BITS_PER_WORD)
14324 return BITS_PER_WORD;
14329 /* Compute the alignment for a static variable.
14330 TYPE is the data type, and ALIGN is the alignment that
14331 the object would ordinarily have. The value of this function is used
14332 instead of that alignment to align the object. */
14335 ix86_data_alignment (tree type, int align)
14337 int max_align = optimize_size ? BITS_PER_WORD : 256;
14339 if (AGGREGATE_TYPE_P (type)
14340 && TYPE_SIZE (type)
14341 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
14342 && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= (unsigned) max_align
14343 || TREE_INT_CST_HIGH (TYPE_SIZE (type)))
14344 && align < max_align)
14347 /* x86-64 ABI requires arrays greater than 16 bytes to be aligned
14348 to 16byte boundary. */
14351 if (AGGREGATE_TYPE_P (type)
14352 && TYPE_SIZE (type)
14353 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
14354 && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= 128
14355 || TREE_INT_CST_HIGH (TYPE_SIZE (type))) && align < 128)
14359 if (TREE_CODE (type) == ARRAY_TYPE)
14361 if (TYPE_MODE (TREE_TYPE (type)) == DFmode && align < 64)
14363 if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type))) && align < 128)
14366 else if (TREE_CODE (type) == COMPLEX_TYPE)
14369 if (TYPE_MODE (type) == DCmode && align < 64)
14371 if (TYPE_MODE (type) == XCmode && align < 128)
14374 else if ((TREE_CODE (type) == RECORD_TYPE
14375 || TREE_CODE (type) == UNION_TYPE
14376 || TREE_CODE (type) == QUAL_UNION_TYPE)
14377 && TYPE_FIELDS (type))
14379 if (DECL_MODE (TYPE_FIELDS (type)) == DFmode && align < 64)
14381 if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type))) && align < 128)
14384 else if (TREE_CODE (type) == REAL_TYPE || TREE_CODE (type) == VECTOR_TYPE
14385 || TREE_CODE (type) == INTEGER_TYPE)
14387 if (TYPE_MODE (type) == DFmode && align < 64)
14389 if (ALIGN_MODE_128 (TYPE_MODE (type)) && align < 128)
14396 /* Compute the alignment for a local variable.
14397 TYPE is the data type, and ALIGN is the alignment that
14398 the object would ordinarily have. The value of this macro is used
14399 instead of that alignment to align the object. */
14402 ix86_local_alignment (tree type, int align)
14404 /* x86-64 ABI requires arrays greater than 16 bytes to be aligned
14405 to 16byte boundary. */
14408 if (AGGREGATE_TYPE_P (type)
14409 && TYPE_SIZE (type)
14410 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
14411 && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= 128
14412 || TREE_INT_CST_HIGH (TYPE_SIZE (type))) && align < 128)
14415 if (TREE_CODE (type) == ARRAY_TYPE)
14417 if (TYPE_MODE (TREE_TYPE (type)) == DFmode && align < 64)
14419 if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type))) && align < 128)
14422 else if (TREE_CODE (type) == COMPLEX_TYPE)
14424 if (TYPE_MODE (type) == DCmode && align < 64)
14426 if (TYPE_MODE (type) == XCmode && align < 128)
14429 else if ((TREE_CODE (type) == RECORD_TYPE
14430 || TREE_CODE (type) == UNION_TYPE
14431 || TREE_CODE (type) == QUAL_UNION_TYPE)
14432 && TYPE_FIELDS (type))
14434 if (DECL_MODE (TYPE_FIELDS (type)) == DFmode && align < 64)
14436 if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type))) && align < 128)
14439 else if (TREE_CODE (type) == REAL_TYPE || TREE_CODE (type) == VECTOR_TYPE
14440 || TREE_CODE (type) == INTEGER_TYPE)
14443 if (TYPE_MODE (type) == DFmode && align < 64)
14445 if (ALIGN_MODE_128 (TYPE_MODE (type)) && align < 128)
14451 /* Emit RTL insns to initialize the variable parts of a trampoline.
14452 FNADDR is an RTX for the address of the function's pure code.
14453 CXT is an RTX for the static chain value for the function. */
14455 x86_initialize_trampoline (rtx tramp, rtx fnaddr, rtx cxt)
14459 /* Compute offset from the end of the jmp to the target function. */
14460 rtx disp = expand_binop (SImode, sub_optab, fnaddr,
14461 plus_constant (tramp, 10),
14462 NULL_RTX, 1, OPTAB_DIRECT);
14463 emit_move_insn (gen_rtx_MEM (QImode, tramp),
14464 gen_int_mode (0xb9, QImode));
14465 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, 1)), cxt);
14466 emit_move_insn (gen_rtx_MEM (QImode, plus_constant (tramp, 5)),
14467 gen_int_mode (0xe9, QImode));
14468 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, 6)), disp);
14473 /* Try to load address using shorter movl instead of movabs.
14474 We may want to support movq for kernel mode, but kernel does not use
14475 trampolines at the moment. */
14476 if (x86_64_zext_immediate_operand (fnaddr, VOIDmode))
14478 fnaddr = copy_to_mode_reg (DImode, fnaddr);
14479 emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
14480 gen_int_mode (0xbb41, HImode));
14481 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, offset + 2)),
14482 gen_lowpart (SImode, fnaddr));
14487 emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
14488 gen_int_mode (0xbb49, HImode));
14489 emit_move_insn (gen_rtx_MEM (DImode, plus_constant (tramp, offset + 2)),
14493 /* Load static chain using movabs to r10. */
14494 emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
14495 gen_int_mode (0xba49, HImode));
14496 emit_move_insn (gen_rtx_MEM (DImode, plus_constant (tramp, offset + 2)),
14499 /* Jump to the r11 */
14500 emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
14501 gen_int_mode (0xff49, HImode));
14502 emit_move_insn (gen_rtx_MEM (QImode, plus_constant (tramp, offset+2)),
14503 gen_int_mode (0xe3, QImode));
14505 gcc_assert (offset <= TRAMPOLINE_SIZE);
14508 #ifdef ENABLE_EXECUTE_STACK
14509 emit_library_call (gen_rtx_SYMBOL_REF (Pmode, "__enable_execute_stack"),
14510 LCT_NORMAL, VOIDmode, 1, tramp, Pmode);
14514 /* Codes for all the SSE/MMX builtins. */
14517 IX86_BUILTIN_ADDPS,
14518 IX86_BUILTIN_ADDSS,
14519 IX86_BUILTIN_DIVPS,
14520 IX86_BUILTIN_DIVSS,
14521 IX86_BUILTIN_MULPS,
14522 IX86_BUILTIN_MULSS,
14523 IX86_BUILTIN_SUBPS,
14524 IX86_BUILTIN_SUBSS,
14526 IX86_BUILTIN_CMPEQPS,
14527 IX86_BUILTIN_CMPLTPS,
14528 IX86_BUILTIN_CMPLEPS,
14529 IX86_BUILTIN_CMPGTPS,
14530 IX86_BUILTIN_CMPGEPS,
14531 IX86_BUILTIN_CMPNEQPS,
14532 IX86_BUILTIN_CMPNLTPS,
14533 IX86_BUILTIN_CMPNLEPS,
14534 IX86_BUILTIN_CMPNGTPS,
14535 IX86_BUILTIN_CMPNGEPS,
14536 IX86_BUILTIN_CMPORDPS,
14537 IX86_BUILTIN_CMPUNORDPS,
14538 IX86_BUILTIN_CMPEQSS,
14539 IX86_BUILTIN_CMPLTSS,
14540 IX86_BUILTIN_CMPLESS,
14541 IX86_BUILTIN_CMPNEQSS,
14542 IX86_BUILTIN_CMPNLTSS,
14543 IX86_BUILTIN_CMPNLESS,
14544 IX86_BUILTIN_CMPNGTSS,
14545 IX86_BUILTIN_CMPNGESS,
14546 IX86_BUILTIN_CMPORDSS,
14547 IX86_BUILTIN_CMPUNORDSS,
14549 IX86_BUILTIN_COMIEQSS,
14550 IX86_BUILTIN_COMILTSS,
14551 IX86_BUILTIN_COMILESS,
14552 IX86_BUILTIN_COMIGTSS,
14553 IX86_BUILTIN_COMIGESS,
14554 IX86_BUILTIN_COMINEQSS,
14555 IX86_BUILTIN_UCOMIEQSS,
14556 IX86_BUILTIN_UCOMILTSS,
14557 IX86_BUILTIN_UCOMILESS,
14558 IX86_BUILTIN_UCOMIGTSS,
14559 IX86_BUILTIN_UCOMIGESS,
14560 IX86_BUILTIN_UCOMINEQSS,
14562 IX86_BUILTIN_CVTPI2PS,
14563 IX86_BUILTIN_CVTPS2PI,
14564 IX86_BUILTIN_CVTSI2SS,
14565 IX86_BUILTIN_CVTSI642SS,
14566 IX86_BUILTIN_CVTSS2SI,
14567 IX86_BUILTIN_CVTSS2SI64,
14568 IX86_BUILTIN_CVTTPS2PI,
14569 IX86_BUILTIN_CVTTSS2SI,
14570 IX86_BUILTIN_CVTTSS2SI64,
14572 IX86_BUILTIN_MAXPS,
14573 IX86_BUILTIN_MAXSS,
14574 IX86_BUILTIN_MINPS,
14575 IX86_BUILTIN_MINSS,
14577 IX86_BUILTIN_LOADUPS,
14578 IX86_BUILTIN_STOREUPS,
14579 IX86_BUILTIN_MOVSS,
14581 IX86_BUILTIN_MOVHLPS,
14582 IX86_BUILTIN_MOVLHPS,
14583 IX86_BUILTIN_LOADHPS,
14584 IX86_BUILTIN_LOADLPS,
14585 IX86_BUILTIN_STOREHPS,
14586 IX86_BUILTIN_STORELPS,
14588 IX86_BUILTIN_MASKMOVQ,
14589 IX86_BUILTIN_MOVMSKPS,
14590 IX86_BUILTIN_PMOVMSKB,
14592 IX86_BUILTIN_MOVNTPS,
14593 IX86_BUILTIN_MOVNTQ,
14595 IX86_BUILTIN_LOADDQU,
14596 IX86_BUILTIN_STOREDQU,
14598 IX86_BUILTIN_PACKSSWB,
14599 IX86_BUILTIN_PACKSSDW,
14600 IX86_BUILTIN_PACKUSWB,
14602 IX86_BUILTIN_PADDB,
14603 IX86_BUILTIN_PADDW,
14604 IX86_BUILTIN_PADDD,
14605 IX86_BUILTIN_PADDQ,
14606 IX86_BUILTIN_PADDSB,
14607 IX86_BUILTIN_PADDSW,
14608 IX86_BUILTIN_PADDUSB,
14609 IX86_BUILTIN_PADDUSW,
14610 IX86_BUILTIN_PSUBB,
14611 IX86_BUILTIN_PSUBW,
14612 IX86_BUILTIN_PSUBD,
14613 IX86_BUILTIN_PSUBQ,
14614 IX86_BUILTIN_PSUBSB,
14615 IX86_BUILTIN_PSUBSW,
14616 IX86_BUILTIN_PSUBUSB,
14617 IX86_BUILTIN_PSUBUSW,
14620 IX86_BUILTIN_PANDN,
14624 IX86_BUILTIN_PAVGB,
14625 IX86_BUILTIN_PAVGW,
14627 IX86_BUILTIN_PCMPEQB,
14628 IX86_BUILTIN_PCMPEQW,
14629 IX86_BUILTIN_PCMPEQD,
14630 IX86_BUILTIN_PCMPGTB,
14631 IX86_BUILTIN_PCMPGTW,
14632 IX86_BUILTIN_PCMPGTD,
14634 IX86_BUILTIN_PMADDWD,
14636 IX86_BUILTIN_PMAXSW,
14637 IX86_BUILTIN_PMAXUB,
14638 IX86_BUILTIN_PMINSW,
14639 IX86_BUILTIN_PMINUB,
14641 IX86_BUILTIN_PMULHUW,
14642 IX86_BUILTIN_PMULHW,
14643 IX86_BUILTIN_PMULLW,
14645 IX86_BUILTIN_PSADBW,
14646 IX86_BUILTIN_PSHUFW,
14648 IX86_BUILTIN_PSLLW,
14649 IX86_BUILTIN_PSLLD,
14650 IX86_BUILTIN_PSLLQ,
14651 IX86_BUILTIN_PSRAW,
14652 IX86_BUILTIN_PSRAD,
14653 IX86_BUILTIN_PSRLW,
14654 IX86_BUILTIN_PSRLD,
14655 IX86_BUILTIN_PSRLQ,
14656 IX86_BUILTIN_PSLLWI,
14657 IX86_BUILTIN_PSLLDI,
14658 IX86_BUILTIN_PSLLQI,
14659 IX86_BUILTIN_PSRAWI,
14660 IX86_BUILTIN_PSRADI,
14661 IX86_BUILTIN_PSRLWI,
14662 IX86_BUILTIN_PSRLDI,
14663 IX86_BUILTIN_PSRLQI,
14665 IX86_BUILTIN_PUNPCKHBW,
14666 IX86_BUILTIN_PUNPCKHWD,
14667 IX86_BUILTIN_PUNPCKHDQ,
14668 IX86_BUILTIN_PUNPCKLBW,
14669 IX86_BUILTIN_PUNPCKLWD,
14670 IX86_BUILTIN_PUNPCKLDQ,
14672 IX86_BUILTIN_SHUFPS,
14674 IX86_BUILTIN_RCPPS,
14675 IX86_BUILTIN_RCPSS,
14676 IX86_BUILTIN_RSQRTPS,
14677 IX86_BUILTIN_RSQRTSS,
14678 IX86_BUILTIN_SQRTPS,
14679 IX86_BUILTIN_SQRTSS,
14681 IX86_BUILTIN_UNPCKHPS,
14682 IX86_BUILTIN_UNPCKLPS,
14684 IX86_BUILTIN_ANDPS,
14685 IX86_BUILTIN_ANDNPS,
14687 IX86_BUILTIN_XORPS,
14690 IX86_BUILTIN_LDMXCSR,
14691 IX86_BUILTIN_STMXCSR,
14692 IX86_BUILTIN_SFENCE,
14694 /* 3DNow! Original */
14695 IX86_BUILTIN_FEMMS,
14696 IX86_BUILTIN_PAVGUSB,
14697 IX86_BUILTIN_PF2ID,
14698 IX86_BUILTIN_PFACC,
14699 IX86_BUILTIN_PFADD,
14700 IX86_BUILTIN_PFCMPEQ,
14701 IX86_BUILTIN_PFCMPGE,
14702 IX86_BUILTIN_PFCMPGT,
14703 IX86_BUILTIN_PFMAX,
14704 IX86_BUILTIN_PFMIN,
14705 IX86_BUILTIN_PFMUL,
14706 IX86_BUILTIN_PFRCP,
14707 IX86_BUILTIN_PFRCPIT1,
14708 IX86_BUILTIN_PFRCPIT2,
14709 IX86_BUILTIN_PFRSQIT1,
14710 IX86_BUILTIN_PFRSQRT,
14711 IX86_BUILTIN_PFSUB,
14712 IX86_BUILTIN_PFSUBR,
14713 IX86_BUILTIN_PI2FD,
14714 IX86_BUILTIN_PMULHRW,
14716 /* 3DNow! Athlon Extensions */
14717 IX86_BUILTIN_PF2IW,
14718 IX86_BUILTIN_PFNACC,
14719 IX86_BUILTIN_PFPNACC,
14720 IX86_BUILTIN_PI2FW,
14721 IX86_BUILTIN_PSWAPDSI,
14722 IX86_BUILTIN_PSWAPDSF,
14725 IX86_BUILTIN_ADDPD,
14726 IX86_BUILTIN_ADDSD,
14727 IX86_BUILTIN_DIVPD,
14728 IX86_BUILTIN_DIVSD,
14729 IX86_BUILTIN_MULPD,
14730 IX86_BUILTIN_MULSD,
14731 IX86_BUILTIN_SUBPD,
14732 IX86_BUILTIN_SUBSD,
14734 IX86_BUILTIN_CMPEQPD,
14735 IX86_BUILTIN_CMPLTPD,
14736 IX86_BUILTIN_CMPLEPD,
14737 IX86_BUILTIN_CMPGTPD,
14738 IX86_BUILTIN_CMPGEPD,
14739 IX86_BUILTIN_CMPNEQPD,
14740 IX86_BUILTIN_CMPNLTPD,
14741 IX86_BUILTIN_CMPNLEPD,
14742 IX86_BUILTIN_CMPNGTPD,
14743 IX86_BUILTIN_CMPNGEPD,
14744 IX86_BUILTIN_CMPORDPD,
14745 IX86_BUILTIN_CMPUNORDPD,
14746 IX86_BUILTIN_CMPNEPD,
14747 IX86_BUILTIN_CMPEQSD,
14748 IX86_BUILTIN_CMPLTSD,
14749 IX86_BUILTIN_CMPLESD,
14750 IX86_BUILTIN_CMPNEQSD,
14751 IX86_BUILTIN_CMPNLTSD,
14752 IX86_BUILTIN_CMPNLESD,
14753 IX86_BUILTIN_CMPORDSD,
14754 IX86_BUILTIN_CMPUNORDSD,
14755 IX86_BUILTIN_CMPNESD,
14757 IX86_BUILTIN_COMIEQSD,
14758 IX86_BUILTIN_COMILTSD,
14759 IX86_BUILTIN_COMILESD,
14760 IX86_BUILTIN_COMIGTSD,
14761 IX86_BUILTIN_COMIGESD,
14762 IX86_BUILTIN_COMINEQSD,
14763 IX86_BUILTIN_UCOMIEQSD,
14764 IX86_BUILTIN_UCOMILTSD,
14765 IX86_BUILTIN_UCOMILESD,
14766 IX86_BUILTIN_UCOMIGTSD,
14767 IX86_BUILTIN_UCOMIGESD,
14768 IX86_BUILTIN_UCOMINEQSD,
14770 IX86_BUILTIN_MAXPD,
14771 IX86_BUILTIN_MAXSD,
14772 IX86_BUILTIN_MINPD,
14773 IX86_BUILTIN_MINSD,
14775 IX86_BUILTIN_ANDPD,
14776 IX86_BUILTIN_ANDNPD,
14778 IX86_BUILTIN_XORPD,
14780 IX86_BUILTIN_SQRTPD,
14781 IX86_BUILTIN_SQRTSD,
14783 IX86_BUILTIN_UNPCKHPD,
14784 IX86_BUILTIN_UNPCKLPD,
14786 IX86_BUILTIN_SHUFPD,
14788 IX86_BUILTIN_LOADUPD,
14789 IX86_BUILTIN_STOREUPD,
14790 IX86_BUILTIN_MOVSD,
14792 IX86_BUILTIN_LOADHPD,
14793 IX86_BUILTIN_LOADLPD,
14795 IX86_BUILTIN_CVTDQ2PD,
14796 IX86_BUILTIN_CVTDQ2PS,
14798 IX86_BUILTIN_CVTPD2DQ,
14799 IX86_BUILTIN_CVTPD2PI,
14800 IX86_BUILTIN_CVTPD2PS,
14801 IX86_BUILTIN_CVTTPD2DQ,
14802 IX86_BUILTIN_CVTTPD2PI,
14804 IX86_BUILTIN_CVTPI2PD,
14805 IX86_BUILTIN_CVTSI2SD,
14806 IX86_BUILTIN_CVTSI642SD,
14808 IX86_BUILTIN_CVTSD2SI,
14809 IX86_BUILTIN_CVTSD2SI64,
14810 IX86_BUILTIN_CVTSD2SS,
14811 IX86_BUILTIN_CVTSS2SD,
14812 IX86_BUILTIN_CVTTSD2SI,
14813 IX86_BUILTIN_CVTTSD2SI64,
14815 IX86_BUILTIN_CVTPS2DQ,
14816 IX86_BUILTIN_CVTPS2PD,
14817 IX86_BUILTIN_CVTTPS2DQ,
14819 IX86_BUILTIN_MOVNTI,
14820 IX86_BUILTIN_MOVNTPD,
14821 IX86_BUILTIN_MOVNTDQ,
14824 IX86_BUILTIN_MASKMOVDQU,
14825 IX86_BUILTIN_MOVMSKPD,
14826 IX86_BUILTIN_PMOVMSKB128,
14828 IX86_BUILTIN_PACKSSWB128,
14829 IX86_BUILTIN_PACKSSDW128,
14830 IX86_BUILTIN_PACKUSWB128,
14832 IX86_BUILTIN_PADDB128,
14833 IX86_BUILTIN_PADDW128,
14834 IX86_BUILTIN_PADDD128,
14835 IX86_BUILTIN_PADDQ128,
14836 IX86_BUILTIN_PADDSB128,
14837 IX86_BUILTIN_PADDSW128,
14838 IX86_BUILTIN_PADDUSB128,
14839 IX86_BUILTIN_PADDUSW128,
14840 IX86_BUILTIN_PSUBB128,
14841 IX86_BUILTIN_PSUBW128,
14842 IX86_BUILTIN_PSUBD128,
14843 IX86_BUILTIN_PSUBQ128,
14844 IX86_BUILTIN_PSUBSB128,
14845 IX86_BUILTIN_PSUBSW128,
14846 IX86_BUILTIN_PSUBUSB128,
14847 IX86_BUILTIN_PSUBUSW128,
14849 IX86_BUILTIN_PAND128,
14850 IX86_BUILTIN_PANDN128,
14851 IX86_BUILTIN_POR128,
14852 IX86_BUILTIN_PXOR128,
14854 IX86_BUILTIN_PAVGB128,
14855 IX86_BUILTIN_PAVGW128,
14857 IX86_BUILTIN_PCMPEQB128,
14858 IX86_BUILTIN_PCMPEQW128,
14859 IX86_BUILTIN_PCMPEQD128,
14860 IX86_BUILTIN_PCMPGTB128,
14861 IX86_BUILTIN_PCMPGTW128,
14862 IX86_BUILTIN_PCMPGTD128,
14864 IX86_BUILTIN_PMADDWD128,
14866 IX86_BUILTIN_PMAXSW128,
14867 IX86_BUILTIN_PMAXUB128,
14868 IX86_BUILTIN_PMINSW128,
14869 IX86_BUILTIN_PMINUB128,
14871 IX86_BUILTIN_PMULUDQ,
14872 IX86_BUILTIN_PMULUDQ128,
14873 IX86_BUILTIN_PMULHUW128,
14874 IX86_BUILTIN_PMULHW128,
14875 IX86_BUILTIN_PMULLW128,
14877 IX86_BUILTIN_PSADBW128,
14878 IX86_BUILTIN_PSHUFHW,
14879 IX86_BUILTIN_PSHUFLW,
14880 IX86_BUILTIN_PSHUFD,
14882 IX86_BUILTIN_PSLLW128,
14883 IX86_BUILTIN_PSLLD128,
14884 IX86_BUILTIN_PSLLQ128,
14885 IX86_BUILTIN_PSRAW128,
14886 IX86_BUILTIN_PSRAD128,
14887 IX86_BUILTIN_PSRLW128,
14888 IX86_BUILTIN_PSRLD128,
14889 IX86_BUILTIN_PSRLQ128,
14890 IX86_BUILTIN_PSLLDQI128,
14891 IX86_BUILTIN_PSLLWI128,
14892 IX86_BUILTIN_PSLLDI128,
14893 IX86_BUILTIN_PSLLQI128,
14894 IX86_BUILTIN_PSRAWI128,
14895 IX86_BUILTIN_PSRADI128,
14896 IX86_BUILTIN_PSRLDQI128,
14897 IX86_BUILTIN_PSRLWI128,
14898 IX86_BUILTIN_PSRLDI128,
14899 IX86_BUILTIN_PSRLQI128,
14901 IX86_BUILTIN_PUNPCKHBW128,
14902 IX86_BUILTIN_PUNPCKHWD128,
14903 IX86_BUILTIN_PUNPCKHDQ128,
14904 IX86_BUILTIN_PUNPCKHQDQ128,
14905 IX86_BUILTIN_PUNPCKLBW128,
14906 IX86_BUILTIN_PUNPCKLWD128,
14907 IX86_BUILTIN_PUNPCKLDQ128,
14908 IX86_BUILTIN_PUNPCKLQDQ128,
14910 IX86_BUILTIN_CLFLUSH,
14911 IX86_BUILTIN_MFENCE,
14912 IX86_BUILTIN_LFENCE,
14914 /* Prescott New Instructions. */
14915 IX86_BUILTIN_ADDSUBPS,
14916 IX86_BUILTIN_HADDPS,
14917 IX86_BUILTIN_HSUBPS,
14918 IX86_BUILTIN_MOVSHDUP,
14919 IX86_BUILTIN_MOVSLDUP,
14920 IX86_BUILTIN_ADDSUBPD,
14921 IX86_BUILTIN_HADDPD,
14922 IX86_BUILTIN_HSUBPD,
14923 IX86_BUILTIN_LDDQU,
14925 IX86_BUILTIN_MONITOR,
14926 IX86_BUILTIN_MWAIT,
14929 IX86_BUILTIN_PHADDW,
14930 IX86_BUILTIN_PHADDD,
14931 IX86_BUILTIN_PHADDSW,
14932 IX86_BUILTIN_PHSUBW,
14933 IX86_BUILTIN_PHSUBD,
14934 IX86_BUILTIN_PHSUBSW,
14935 IX86_BUILTIN_PMADDUBSW,
14936 IX86_BUILTIN_PMULHRSW,
14937 IX86_BUILTIN_PSHUFB,
14938 IX86_BUILTIN_PSIGNB,
14939 IX86_BUILTIN_PSIGNW,
14940 IX86_BUILTIN_PSIGND,
14941 IX86_BUILTIN_PALIGNR,
14942 IX86_BUILTIN_PABSB,
14943 IX86_BUILTIN_PABSW,
14944 IX86_BUILTIN_PABSD,
14946 IX86_BUILTIN_PHADDW128,
14947 IX86_BUILTIN_PHADDD128,
14948 IX86_BUILTIN_PHADDSW128,
14949 IX86_BUILTIN_PHSUBW128,
14950 IX86_BUILTIN_PHSUBD128,
14951 IX86_BUILTIN_PHSUBSW128,
14952 IX86_BUILTIN_PMADDUBSW128,
14953 IX86_BUILTIN_PMULHRSW128,
14954 IX86_BUILTIN_PSHUFB128,
14955 IX86_BUILTIN_PSIGNB128,
14956 IX86_BUILTIN_PSIGNW128,
14957 IX86_BUILTIN_PSIGND128,
14958 IX86_BUILTIN_PALIGNR128,
14959 IX86_BUILTIN_PABSB128,
14960 IX86_BUILTIN_PABSW128,
14961 IX86_BUILTIN_PABSD128,
14963 /* AMDFAM10 - SSE4A New Instructions. */
14964 IX86_BUILTIN_MOVNTSD,
14965 IX86_BUILTIN_MOVNTSS,
14966 IX86_BUILTIN_EXTRQI,
14967 IX86_BUILTIN_EXTRQ,
14968 IX86_BUILTIN_INSERTQI,
14969 IX86_BUILTIN_INSERTQ,
14971 IX86_BUILTIN_VEC_INIT_V2SI,
14972 IX86_BUILTIN_VEC_INIT_V4HI,
14973 IX86_BUILTIN_VEC_INIT_V8QI,
14974 IX86_BUILTIN_VEC_EXT_V2DF,
14975 IX86_BUILTIN_VEC_EXT_V2DI,
14976 IX86_BUILTIN_VEC_EXT_V4SF,
14977 IX86_BUILTIN_VEC_EXT_V4SI,
14978 IX86_BUILTIN_VEC_EXT_V8HI,
14979 IX86_BUILTIN_VEC_EXT_V16QI,
14980 IX86_BUILTIN_VEC_EXT_V2SI,
14981 IX86_BUILTIN_VEC_EXT_V4HI,
14982 IX86_BUILTIN_VEC_SET_V8HI,
14983 IX86_BUILTIN_VEC_SET_V4HI,
14988 #define def_builtin(MASK, NAME, TYPE, CODE) \
14990 if ((MASK) & target_flags \
14991 && (!((MASK) & MASK_64BIT) || TARGET_64BIT)) \
14992 lang_hooks.builtin_function ((NAME), (TYPE), (CODE), BUILT_IN_MD, \
14993 NULL, NULL_TREE); \
14996 /* Bits for builtin_description.flag. */
14998 /* Set when we don't support the comparison natively, and should
14999 swap_comparison in order to support it. */
15000 #define BUILTIN_DESC_SWAP_OPERANDS 1
15002 struct builtin_description
15004 const unsigned int mask;
15005 const enum insn_code icode;
15006 const char *const name;
15007 const enum ix86_builtins code;
15008 const enum rtx_code comparison;
15009 const unsigned int flag;
15012 static const struct builtin_description bdesc_comi[] =
15014 { MASK_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comieq", IX86_BUILTIN_COMIEQSS, UNEQ, 0 },
15015 { MASK_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comilt", IX86_BUILTIN_COMILTSS, UNLT, 0 },
15016 { MASK_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comile", IX86_BUILTIN_COMILESS, UNLE, 0 },
15017 { MASK_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comigt", IX86_BUILTIN_COMIGTSS, GT, 0 },
15018 { MASK_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comige", IX86_BUILTIN_COMIGESS, GE, 0 },
15019 { MASK_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comineq", IX86_BUILTIN_COMINEQSS, LTGT, 0 },
15020 { MASK_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomieq", IX86_BUILTIN_UCOMIEQSS, UNEQ, 0 },
15021 { MASK_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomilt", IX86_BUILTIN_UCOMILTSS, UNLT, 0 },
15022 { MASK_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomile", IX86_BUILTIN_UCOMILESS, UNLE, 0 },
15023 { MASK_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomigt", IX86_BUILTIN_UCOMIGTSS, GT, 0 },
15024 { MASK_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomige", IX86_BUILTIN_UCOMIGESS, GE, 0 },
15025 { MASK_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomineq", IX86_BUILTIN_UCOMINEQSS, LTGT, 0 },
15026 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdeq", IX86_BUILTIN_COMIEQSD, UNEQ, 0 },
15027 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdlt", IX86_BUILTIN_COMILTSD, UNLT, 0 },
15028 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdle", IX86_BUILTIN_COMILESD, UNLE, 0 },
15029 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdgt", IX86_BUILTIN_COMIGTSD, GT, 0 },
15030 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdge", IX86_BUILTIN_COMIGESD, GE, 0 },
15031 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdneq", IX86_BUILTIN_COMINEQSD, LTGT, 0 },
15032 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdeq", IX86_BUILTIN_UCOMIEQSD, UNEQ, 0 },
15033 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdlt", IX86_BUILTIN_UCOMILTSD, UNLT, 0 },
15034 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdle", IX86_BUILTIN_UCOMILESD, UNLE, 0 },
15035 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdgt", IX86_BUILTIN_UCOMIGTSD, GT, 0 },
15036 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdge", IX86_BUILTIN_UCOMIGESD, GE, 0 },
15037 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdneq", IX86_BUILTIN_UCOMINEQSD, LTGT, 0 },
15040 static const struct builtin_description bdesc_2arg[] =
15043 { MASK_SSE, CODE_FOR_addv4sf3, "__builtin_ia32_addps", IX86_BUILTIN_ADDPS, 0, 0 },
15044 { MASK_SSE, CODE_FOR_subv4sf3, "__builtin_ia32_subps", IX86_BUILTIN_SUBPS, 0, 0 },
15045 { MASK_SSE, CODE_FOR_mulv4sf3, "__builtin_ia32_mulps", IX86_BUILTIN_MULPS, 0, 0 },
15046 { MASK_SSE, CODE_FOR_divv4sf3, "__builtin_ia32_divps", IX86_BUILTIN_DIVPS, 0, 0 },
15047 { MASK_SSE, CODE_FOR_sse_vmaddv4sf3, "__builtin_ia32_addss", IX86_BUILTIN_ADDSS, 0, 0 },
15048 { MASK_SSE, CODE_FOR_sse_vmsubv4sf3, "__builtin_ia32_subss", IX86_BUILTIN_SUBSS, 0, 0 },
15049 { MASK_SSE, CODE_FOR_sse_vmmulv4sf3, "__builtin_ia32_mulss", IX86_BUILTIN_MULSS, 0, 0 },
15050 { MASK_SSE, CODE_FOR_sse_vmdivv4sf3, "__builtin_ia32_divss", IX86_BUILTIN_DIVSS, 0, 0 },
15052 { MASK_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpeqps", IX86_BUILTIN_CMPEQPS, EQ, 0 },
15053 { MASK_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpltps", IX86_BUILTIN_CMPLTPS, LT, 0 },
15054 { MASK_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpleps", IX86_BUILTIN_CMPLEPS, LE, 0 },
15055 { MASK_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpgtps", IX86_BUILTIN_CMPGTPS, LT,
15056 BUILTIN_DESC_SWAP_OPERANDS },
15057 { MASK_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpgeps", IX86_BUILTIN_CMPGEPS, LE,
15058 BUILTIN_DESC_SWAP_OPERANDS },
15059 { MASK_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpunordps", IX86_BUILTIN_CMPUNORDPS, UNORDERED, 0 },
15060 { MASK_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpneqps", IX86_BUILTIN_CMPNEQPS, NE, 0 },
15061 { MASK_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpnltps", IX86_BUILTIN_CMPNLTPS, UNGE, 0 },
15062 { MASK_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpnleps", IX86_BUILTIN_CMPNLEPS, UNGT, 0 },
15063 { MASK_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpngtps", IX86_BUILTIN_CMPNGTPS, UNGE,
15064 BUILTIN_DESC_SWAP_OPERANDS },
15065 { MASK_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpngeps", IX86_BUILTIN_CMPNGEPS, UNGT,
15066 BUILTIN_DESC_SWAP_OPERANDS },
15067 { MASK_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpordps", IX86_BUILTIN_CMPORDPS, ORDERED, 0 },
15068 { MASK_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpeqss", IX86_BUILTIN_CMPEQSS, EQ, 0 },
15069 { MASK_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpltss", IX86_BUILTIN_CMPLTSS, LT, 0 },
15070 { MASK_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpless", IX86_BUILTIN_CMPLESS, LE, 0 },
15071 { MASK_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpunordss", IX86_BUILTIN_CMPUNORDSS, UNORDERED, 0 },
15072 { MASK_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpneqss", IX86_BUILTIN_CMPNEQSS, NE, 0 },
15073 { MASK_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpnltss", IX86_BUILTIN_CMPNLTSS, UNGE, 0 },
15074 { MASK_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpnless", IX86_BUILTIN_CMPNLESS, UNGT, 0 },
15075 { MASK_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpngtss", IX86_BUILTIN_CMPNGTSS, UNGE,
15076 BUILTIN_DESC_SWAP_OPERANDS },
15077 { MASK_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpngess", IX86_BUILTIN_CMPNGESS, UNGT,
15078 BUILTIN_DESC_SWAP_OPERANDS },
15079 { MASK_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpordss", IX86_BUILTIN_CMPORDSS, ORDERED, 0 },
15081 { MASK_SSE, CODE_FOR_sminv4sf3, "__builtin_ia32_minps", IX86_BUILTIN_MINPS, 0, 0 },
15082 { MASK_SSE, CODE_FOR_smaxv4sf3, "__builtin_ia32_maxps", IX86_BUILTIN_MAXPS, 0, 0 },
15083 { MASK_SSE, CODE_FOR_sse_vmsminv4sf3, "__builtin_ia32_minss", IX86_BUILTIN_MINSS, 0, 0 },
15084 { MASK_SSE, CODE_FOR_sse_vmsmaxv4sf3, "__builtin_ia32_maxss", IX86_BUILTIN_MAXSS, 0, 0 },
15086 { MASK_SSE, CODE_FOR_andv4sf3, "__builtin_ia32_andps", IX86_BUILTIN_ANDPS, 0, 0 },
15087 { MASK_SSE, CODE_FOR_sse_nandv4sf3, "__builtin_ia32_andnps", IX86_BUILTIN_ANDNPS, 0, 0 },
15088 { MASK_SSE, CODE_FOR_iorv4sf3, "__builtin_ia32_orps", IX86_BUILTIN_ORPS, 0, 0 },
15089 { MASK_SSE, CODE_FOR_xorv4sf3, "__builtin_ia32_xorps", IX86_BUILTIN_XORPS, 0, 0 },
15091 { MASK_SSE, CODE_FOR_sse_movss, "__builtin_ia32_movss", IX86_BUILTIN_MOVSS, 0, 0 },
15092 { MASK_SSE, CODE_FOR_sse_movhlps, "__builtin_ia32_movhlps", IX86_BUILTIN_MOVHLPS, 0, 0 },
15093 { MASK_SSE, CODE_FOR_sse_movlhps, "__builtin_ia32_movlhps", IX86_BUILTIN_MOVLHPS, 0, 0 },
15094 { MASK_SSE, CODE_FOR_sse_unpckhps, "__builtin_ia32_unpckhps", IX86_BUILTIN_UNPCKHPS, 0, 0 },
15095 { MASK_SSE, CODE_FOR_sse_unpcklps, "__builtin_ia32_unpcklps", IX86_BUILTIN_UNPCKLPS, 0, 0 },
15098 { MASK_MMX, CODE_FOR_mmx_addv8qi3, "__builtin_ia32_paddb", IX86_BUILTIN_PADDB, 0, 0 },
15099 { MASK_MMX, CODE_FOR_mmx_addv4hi3, "__builtin_ia32_paddw", IX86_BUILTIN_PADDW, 0, 0 },
15100 { MASK_MMX, CODE_FOR_mmx_addv2si3, "__builtin_ia32_paddd", IX86_BUILTIN_PADDD, 0, 0 },
15101 { MASK_SSE2, CODE_FOR_mmx_adddi3, "__builtin_ia32_paddq", IX86_BUILTIN_PADDQ, 0, 0 },
15102 { MASK_MMX, CODE_FOR_mmx_subv8qi3, "__builtin_ia32_psubb", IX86_BUILTIN_PSUBB, 0, 0 },
15103 { MASK_MMX, CODE_FOR_mmx_subv4hi3, "__builtin_ia32_psubw", IX86_BUILTIN_PSUBW, 0, 0 },
15104 { MASK_MMX, CODE_FOR_mmx_subv2si3, "__builtin_ia32_psubd", IX86_BUILTIN_PSUBD, 0, 0 },
15105 { MASK_SSE2, CODE_FOR_mmx_subdi3, "__builtin_ia32_psubq", IX86_BUILTIN_PSUBQ, 0, 0 },
15107 { MASK_MMX, CODE_FOR_mmx_ssaddv8qi3, "__builtin_ia32_paddsb", IX86_BUILTIN_PADDSB, 0, 0 },
15108 { MASK_MMX, CODE_FOR_mmx_ssaddv4hi3, "__builtin_ia32_paddsw", IX86_BUILTIN_PADDSW, 0, 0 },
15109 { MASK_MMX, CODE_FOR_mmx_sssubv8qi3, "__builtin_ia32_psubsb", IX86_BUILTIN_PSUBSB, 0, 0 },
15110 { MASK_MMX, CODE_FOR_mmx_sssubv4hi3, "__builtin_ia32_psubsw", IX86_BUILTIN_PSUBSW, 0, 0 },
15111 { MASK_MMX, CODE_FOR_mmx_usaddv8qi3, "__builtin_ia32_paddusb", IX86_BUILTIN_PADDUSB, 0, 0 },
15112 { MASK_MMX, CODE_FOR_mmx_usaddv4hi3, "__builtin_ia32_paddusw", IX86_BUILTIN_PADDUSW, 0, 0 },
15113 { MASK_MMX, CODE_FOR_mmx_ussubv8qi3, "__builtin_ia32_psubusb", IX86_BUILTIN_PSUBUSB, 0, 0 },
15114 { MASK_MMX, CODE_FOR_mmx_ussubv4hi3, "__builtin_ia32_psubusw", IX86_BUILTIN_PSUBUSW, 0, 0 },
15116 { MASK_MMX, CODE_FOR_mmx_mulv4hi3, "__builtin_ia32_pmullw", IX86_BUILTIN_PMULLW, 0, 0 },
15117 { MASK_MMX, CODE_FOR_mmx_smulv4hi3_highpart, "__builtin_ia32_pmulhw", IX86_BUILTIN_PMULHW, 0, 0 },
15118 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_umulv4hi3_highpart, "__builtin_ia32_pmulhuw", IX86_BUILTIN_PMULHUW, 0, 0 },
15120 { MASK_MMX, CODE_FOR_mmx_andv2si3, "__builtin_ia32_pand", IX86_BUILTIN_PAND, 0, 0 },
15121 { MASK_MMX, CODE_FOR_mmx_nandv2si3, "__builtin_ia32_pandn", IX86_BUILTIN_PANDN, 0, 0 },
15122 { MASK_MMX, CODE_FOR_mmx_iorv2si3, "__builtin_ia32_por", IX86_BUILTIN_POR, 0, 0 },
15123 { MASK_MMX, CODE_FOR_mmx_xorv2si3, "__builtin_ia32_pxor", IX86_BUILTIN_PXOR, 0, 0 },
15125 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_uavgv8qi3, "__builtin_ia32_pavgb", IX86_BUILTIN_PAVGB, 0, 0 },
15126 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_uavgv4hi3, "__builtin_ia32_pavgw", IX86_BUILTIN_PAVGW, 0, 0 },
15128 { MASK_MMX, CODE_FOR_mmx_eqv8qi3, "__builtin_ia32_pcmpeqb", IX86_BUILTIN_PCMPEQB, 0, 0 },
15129 { MASK_MMX, CODE_FOR_mmx_eqv4hi3, "__builtin_ia32_pcmpeqw", IX86_BUILTIN_PCMPEQW, 0, 0 },
15130 { MASK_MMX, CODE_FOR_mmx_eqv2si3, "__builtin_ia32_pcmpeqd", IX86_BUILTIN_PCMPEQD, 0, 0 },
15131 { MASK_MMX, CODE_FOR_mmx_gtv8qi3, "__builtin_ia32_pcmpgtb", IX86_BUILTIN_PCMPGTB, 0, 0 },
15132 { MASK_MMX, CODE_FOR_mmx_gtv4hi3, "__builtin_ia32_pcmpgtw", IX86_BUILTIN_PCMPGTW, 0, 0 },
15133 { MASK_MMX, CODE_FOR_mmx_gtv2si3, "__builtin_ia32_pcmpgtd", IX86_BUILTIN_PCMPGTD, 0, 0 },
15135 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_umaxv8qi3, "__builtin_ia32_pmaxub", IX86_BUILTIN_PMAXUB, 0, 0 },
15136 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_smaxv4hi3, "__builtin_ia32_pmaxsw", IX86_BUILTIN_PMAXSW, 0, 0 },
15137 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_uminv8qi3, "__builtin_ia32_pminub", IX86_BUILTIN_PMINUB, 0, 0 },
15138 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_sminv4hi3, "__builtin_ia32_pminsw", IX86_BUILTIN_PMINSW, 0, 0 },
15140 { MASK_MMX, CODE_FOR_mmx_punpckhbw, "__builtin_ia32_punpckhbw", IX86_BUILTIN_PUNPCKHBW, 0, 0 },
15141 { MASK_MMX, CODE_FOR_mmx_punpckhwd, "__builtin_ia32_punpckhwd", IX86_BUILTIN_PUNPCKHWD, 0, 0 },
15142 { MASK_MMX, CODE_FOR_mmx_punpckhdq, "__builtin_ia32_punpckhdq", IX86_BUILTIN_PUNPCKHDQ, 0, 0 },
15143 { MASK_MMX, CODE_FOR_mmx_punpcklbw, "__builtin_ia32_punpcklbw", IX86_BUILTIN_PUNPCKLBW, 0, 0 },
15144 { MASK_MMX, CODE_FOR_mmx_punpcklwd, "__builtin_ia32_punpcklwd", IX86_BUILTIN_PUNPCKLWD, 0, 0 },
15145 { MASK_MMX, CODE_FOR_mmx_punpckldq, "__builtin_ia32_punpckldq", IX86_BUILTIN_PUNPCKLDQ, 0, 0 },
15148 { MASK_MMX, CODE_FOR_mmx_packsswb, 0, IX86_BUILTIN_PACKSSWB, 0, 0 },
15149 { MASK_MMX, CODE_FOR_mmx_packssdw, 0, IX86_BUILTIN_PACKSSDW, 0, 0 },
15150 { MASK_MMX, CODE_FOR_mmx_packuswb, 0, IX86_BUILTIN_PACKUSWB, 0, 0 },
15152 { MASK_SSE, CODE_FOR_sse_cvtpi2ps, 0, IX86_BUILTIN_CVTPI2PS, 0, 0 },
15153 { MASK_SSE, CODE_FOR_sse_cvtsi2ss, 0, IX86_BUILTIN_CVTSI2SS, 0, 0 },
15154 { MASK_SSE | MASK_64BIT, CODE_FOR_sse_cvtsi2ssq, 0, IX86_BUILTIN_CVTSI642SS, 0, 0 },
15156 { MASK_MMX, CODE_FOR_mmx_ashlv4hi3, 0, IX86_BUILTIN_PSLLW, 0, 0 },
15157 { MASK_MMX, CODE_FOR_mmx_ashlv4hi3, 0, IX86_BUILTIN_PSLLWI, 0, 0 },
15158 { MASK_MMX, CODE_FOR_mmx_ashlv2si3, 0, IX86_BUILTIN_PSLLD, 0, 0 },
15159 { MASK_MMX, CODE_FOR_mmx_ashlv2si3, 0, IX86_BUILTIN_PSLLDI, 0, 0 },
15160 { MASK_MMX, CODE_FOR_mmx_ashldi3, 0, IX86_BUILTIN_PSLLQ, 0, 0 },
15161 { MASK_MMX, CODE_FOR_mmx_ashldi3, 0, IX86_BUILTIN_PSLLQI, 0, 0 },
15163 { MASK_MMX, CODE_FOR_mmx_lshrv4hi3, 0, IX86_BUILTIN_PSRLW, 0, 0 },
15164 { MASK_MMX, CODE_FOR_mmx_lshrv4hi3, 0, IX86_BUILTIN_PSRLWI, 0, 0 },
15165 { MASK_MMX, CODE_FOR_mmx_lshrv2si3, 0, IX86_BUILTIN_PSRLD, 0, 0 },
15166 { MASK_MMX, CODE_FOR_mmx_lshrv2si3, 0, IX86_BUILTIN_PSRLDI, 0, 0 },
15167 { MASK_MMX, CODE_FOR_mmx_lshrdi3, 0, IX86_BUILTIN_PSRLQ, 0, 0 },
15168 { MASK_MMX, CODE_FOR_mmx_lshrdi3, 0, IX86_BUILTIN_PSRLQI, 0, 0 },
15170 { MASK_MMX, CODE_FOR_mmx_ashrv4hi3, 0, IX86_BUILTIN_PSRAW, 0, 0 },
15171 { MASK_MMX, CODE_FOR_mmx_ashrv4hi3, 0, IX86_BUILTIN_PSRAWI, 0, 0 },
15172 { MASK_MMX, CODE_FOR_mmx_ashrv2si3, 0, IX86_BUILTIN_PSRAD, 0, 0 },
15173 { MASK_MMX, CODE_FOR_mmx_ashrv2si3, 0, IX86_BUILTIN_PSRADI, 0, 0 },
15175 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_psadbw, 0, IX86_BUILTIN_PSADBW, 0, 0 },
15176 { MASK_MMX, CODE_FOR_mmx_pmaddwd, 0, IX86_BUILTIN_PMADDWD, 0, 0 },
15179 { MASK_SSE2, CODE_FOR_addv2df3, "__builtin_ia32_addpd", IX86_BUILTIN_ADDPD, 0, 0 },
15180 { MASK_SSE2, CODE_FOR_subv2df3, "__builtin_ia32_subpd", IX86_BUILTIN_SUBPD, 0, 0 },
15181 { MASK_SSE2, CODE_FOR_mulv2df3, "__builtin_ia32_mulpd", IX86_BUILTIN_MULPD, 0, 0 },
15182 { MASK_SSE2, CODE_FOR_divv2df3, "__builtin_ia32_divpd", IX86_BUILTIN_DIVPD, 0, 0 },
15183 { MASK_SSE2, CODE_FOR_sse2_vmaddv2df3, "__builtin_ia32_addsd", IX86_BUILTIN_ADDSD, 0, 0 },
15184 { MASK_SSE2, CODE_FOR_sse2_vmsubv2df3, "__builtin_ia32_subsd", IX86_BUILTIN_SUBSD, 0, 0 },
15185 { MASK_SSE2, CODE_FOR_sse2_vmmulv2df3, "__builtin_ia32_mulsd", IX86_BUILTIN_MULSD, 0, 0 },
15186 { MASK_SSE2, CODE_FOR_sse2_vmdivv2df3, "__builtin_ia32_divsd", IX86_BUILTIN_DIVSD, 0, 0 },
15188 { MASK_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpeqpd", IX86_BUILTIN_CMPEQPD, EQ, 0 },
15189 { MASK_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpltpd", IX86_BUILTIN_CMPLTPD, LT, 0 },
15190 { MASK_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmplepd", IX86_BUILTIN_CMPLEPD, LE, 0 },
15191 { MASK_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpgtpd", IX86_BUILTIN_CMPGTPD, LT,
15192 BUILTIN_DESC_SWAP_OPERANDS },
15193 { MASK_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpgepd", IX86_BUILTIN_CMPGEPD, LE,
15194 BUILTIN_DESC_SWAP_OPERANDS },
15195 { MASK_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpunordpd", IX86_BUILTIN_CMPUNORDPD, UNORDERED, 0 },
15196 { MASK_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpneqpd", IX86_BUILTIN_CMPNEQPD, NE, 0 },
15197 { MASK_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpnltpd", IX86_BUILTIN_CMPNLTPD, UNGE, 0 },
15198 { MASK_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpnlepd", IX86_BUILTIN_CMPNLEPD, UNGT, 0 },
15199 { MASK_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpngtpd", IX86_BUILTIN_CMPNGTPD, UNGE,
15200 BUILTIN_DESC_SWAP_OPERANDS },
15201 { MASK_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpngepd", IX86_BUILTIN_CMPNGEPD, UNGT,
15202 BUILTIN_DESC_SWAP_OPERANDS },
15203 { MASK_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpordpd", IX86_BUILTIN_CMPORDPD, ORDERED, 0 },
15204 { MASK_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpeqsd", IX86_BUILTIN_CMPEQSD, EQ, 0 },
15205 { MASK_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpltsd", IX86_BUILTIN_CMPLTSD, LT, 0 },
15206 { MASK_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmplesd", IX86_BUILTIN_CMPLESD, LE, 0 },
15207 { MASK_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpunordsd", IX86_BUILTIN_CMPUNORDSD, UNORDERED, 0 },
15208 { MASK_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpneqsd", IX86_BUILTIN_CMPNEQSD, NE, 0 },
15209 { MASK_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpnltsd", IX86_BUILTIN_CMPNLTSD, UNGE, 0 },
15210 { MASK_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpnlesd", IX86_BUILTIN_CMPNLESD, UNGT, 0 },
15211 { MASK_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpordsd", IX86_BUILTIN_CMPORDSD, ORDERED, 0 },
15213 { MASK_SSE2, CODE_FOR_sminv2df3, "__builtin_ia32_minpd", IX86_BUILTIN_MINPD, 0, 0 },
15214 { MASK_SSE2, CODE_FOR_smaxv2df3, "__builtin_ia32_maxpd", IX86_BUILTIN_MAXPD, 0, 0 },
15215 { MASK_SSE2, CODE_FOR_sse2_vmsminv2df3, "__builtin_ia32_minsd", IX86_BUILTIN_MINSD, 0, 0 },
15216 { MASK_SSE2, CODE_FOR_sse2_vmsmaxv2df3, "__builtin_ia32_maxsd", IX86_BUILTIN_MAXSD, 0, 0 },
15218 { MASK_SSE2, CODE_FOR_andv2df3, "__builtin_ia32_andpd", IX86_BUILTIN_ANDPD, 0, 0 },
15219 { MASK_SSE2, CODE_FOR_sse2_nandv2df3, "__builtin_ia32_andnpd", IX86_BUILTIN_ANDNPD, 0, 0 },
15220 { MASK_SSE2, CODE_FOR_iorv2df3, "__builtin_ia32_orpd", IX86_BUILTIN_ORPD, 0, 0 },
15221 { MASK_SSE2, CODE_FOR_xorv2df3, "__builtin_ia32_xorpd", IX86_BUILTIN_XORPD, 0, 0 },
15223 { MASK_SSE2, CODE_FOR_sse2_movsd, "__builtin_ia32_movsd", IX86_BUILTIN_MOVSD, 0, 0 },
15224 { MASK_SSE2, CODE_FOR_sse2_unpckhpd, "__builtin_ia32_unpckhpd", IX86_BUILTIN_UNPCKHPD, 0, 0 },
15225 { MASK_SSE2, CODE_FOR_sse2_unpcklpd, "__builtin_ia32_unpcklpd", IX86_BUILTIN_UNPCKLPD, 0, 0 },
15228 { MASK_SSE2, CODE_FOR_addv16qi3, "__builtin_ia32_paddb128", IX86_BUILTIN_PADDB128, 0, 0 },
15229 { MASK_SSE2, CODE_FOR_addv8hi3, "__builtin_ia32_paddw128", IX86_BUILTIN_PADDW128, 0, 0 },
15230 { MASK_SSE2, CODE_FOR_addv4si3, "__builtin_ia32_paddd128", IX86_BUILTIN_PADDD128, 0, 0 },
15231 { MASK_SSE2, CODE_FOR_addv2di3, "__builtin_ia32_paddq128", IX86_BUILTIN_PADDQ128, 0, 0 },
15232 { MASK_SSE2, CODE_FOR_subv16qi3, "__builtin_ia32_psubb128", IX86_BUILTIN_PSUBB128, 0, 0 },
15233 { MASK_SSE2, CODE_FOR_subv8hi3, "__builtin_ia32_psubw128", IX86_BUILTIN_PSUBW128, 0, 0 },
15234 { MASK_SSE2, CODE_FOR_subv4si3, "__builtin_ia32_psubd128", IX86_BUILTIN_PSUBD128, 0, 0 },
15235 { MASK_SSE2, CODE_FOR_subv2di3, "__builtin_ia32_psubq128", IX86_BUILTIN_PSUBQ128, 0, 0 },
15237 { MASK_MMX, CODE_FOR_sse2_ssaddv16qi3, "__builtin_ia32_paddsb128", IX86_BUILTIN_PADDSB128, 0, 0 },
15238 { MASK_MMX, CODE_FOR_sse2_ssaddv8hi3, "__builtin_ia32_paddsw128", IX86_BUILTIN_PADDSW128, 0, 0 },
15239 { MASK_MMX, CODE_FOR_sse2_sssubv16qi3, "__builtin_ia32_psubsb128", IX86_BUILTIN_PSUBSB128, 0, 0 },
15240 { MASK_MMX, CODE_FOR_sse2_sssubv8hi3, "__builtin_ia32_psubsw128", IX86_BUILTIN_PSUBSW128, 0, 0 },
15241 { MASK_MMX, CODE_FOR_sse2_usaddv16qi3, "__builtin_ia32_paddusb128", IX86_BUILTIN_PADDUSB128, 0, 0 },
15242 { MASK_MMX, CODE_FOR_sse2_usaddv8hi3, "__builtin_ia32_paddusw128", IX86_BUILTIN_PADDUSW128, 0, 0 },
15243 { MASK_MMX, CODE_FOR_sse2_ussubv16qi3, "__builtin_ia32_psubusb128", IX86_BUILTIN_PSUBUSB128, 0, 0 },
15244 { MASK_MMX, CODE_FOR_sse2_ussubv8hi3, "__builtin_ia32_psubusw128", IX86_BUILTIN_PSUBUSW128, 0, 0 },
15246 { MASK_SSE2, CODE_FOR_mulv8hi3, "__builtin_ia32_pmullw128", IX86_BUILTIN_PMULLW128, 0, 0 },
15247 { MASK_SSE2, CODE_FOR_sse2_smulv8hi3_highpart, "__builtin_ia32_pmulhw128", IX86_BUILTIN_PMULHW128, 0, 0 },
15249 { MASK_SSE2, CODE_FOR_andv2di3, "__builtin_ia32_pand128", IX86_BUILTIN_PAND128, 0, 0 },
15250 { MASK_SSE2, CODE_FOR_sse2_nandv2di3, "__builtin_ia32_pandn128", IX86_BUILTIN_PANDN128, 0, 0 },
15251 { MASK_SSE2, CODE_FOR_iorv2di3, "__builtin_ia32_por128", IX86_BUILTIN_POR128, 0, 0 },
15252 { MASK_SSE2, CODE_FOR_xorv2di3, "__builtin_ia32_pxor128", IX86_BUILTIN_PXOR128, 0, 0 },
15254 { MASK_SSE2, CODE_FOR_sse2_uavgv16qi3, "__builtin_ia32_pavgb128", IX86_BUILTIN_PAVGB128, 0, 0 },
15255 { MASK_SSE2, CODE_FOR_sse2_uavgv8hi3, "__builtin_ia32_pavgw128", IX86_BUILTIN_PAVGW128, 0, 0 },
15257 { MASK_SSE2, CODE_FOR_sse2_eqv16qi3, "__builtin_ia32_pcmpeqb128", IX86_BUILTIN_PCMPEQB128, 0, 0 },
15258 { MASK_SSE2, CODE_FOR_sse2_eqv8hi3, "__builtin_ia32_pcmpeqw128", IX86_BUILTIN_PCMPEQW128, 0, 0 },
15259 { MASK_SSE2, CODE_FOR_sse2_eqv4si3, "__builtin_ia32_pcmpeqd128", IX86_BUILTIN_PCMPEQD128, 0, 0 },
15260 { MASK_SSE2, CODE_FOR_sse2_gtv16qi3, "__builtin_ia32_pcmpgtb128", IX86_BUILTIN_PCMPGTB128, 0, 0 },
15261 { MASK_SSE2, CODE_FOR_sse2_gtv8hi3, "__builtin_ia32_pcmpgtw128", IX86_BUILTIN_PCMPGTW128, 0, 0 },
15262 { MASK_SSE2, CODE_FOR_sse2_gtv4si3, "__builtin_ia32_pcmpgtd128", IX86_BUILTIN_PCMPGTD128, 0, 0 },
15264 { MASK_SSE2, CODE_FOR_umaxv16qi3, "__builtin_ia32_pmaxub128", IX86_BUILTIN_PMAXUB128, 0, 0 },
15265 { MASK_SSE2, CODE_FOR_smaxv8hi3, "__builtin_ia32_pmaxsw128", IX86_BUILTIN_PMAXSW128, 0, 0 },
15266 { MASK_SSE2, CODE_FOR_uminv16qi3, "__builtin_ia32_pminub128", IX86_BUILTIN_PMINUB128, 0, 0 },
15267 { MASK_SSE2, CODE_FOR_sminv8hi3, "__builtin_ia32_pminsw128", IX86_BUILTIN_PMINSW128, 0, 0 },
15269 { MASK_SSE2, CODE_FOR_sse2_punpckhbw, "__builtin_ia32_punpckhbw128", IX86_BUILTIN_PUNPCKHBW128, 0, 0 },
15270 { MASK_SSE2, CODE_FOR_sse2_punpckhwd, "__builtin_ia32_punpckhwd128", IX86_BUILTIN_PUNPCKHWD128, 0, 0 },
15271 { MASK_SSE2, CODE_FOR_sse2_punpckhdq, "__builtin_ia32_punpckhdq128", IX86_BUILTIN_PUNPCKHDQ128, 0, 0 },
15272 { MASK_SSE2, CODE_FOR_sse2_punpckhqdq, "__builtin_ia32_punpckhqdq128", IX86_BUILTIN_PUNPCKHQDQ128, 0, 0 },
15273 { MASK_SSE2, CODE_FOR_sse2_punpcklbw, "__builtin_ia32_punpcklbw128", IX86_BUILTIN_PUNPCKLBW128, 0, 0 },
15274 { MASK_SSE2, CODE_FOR_sse2_punpcklwd, "__builtin_ia32_punpcklwd128", IX86_BUILTIN_PUNPCKLWD128, 0, 0 },
15275 { MASK_SSE2, CODE_FOR_sse2_punpckldq, "__builtin_ia32_punpckldq128", IX86_BUILTIN_PUNPCKLDQ128, 0, 0 },
15276 { MASK_SSE2, CODE_FOR_sse2_punpcklqdq, "__builtin_ia32_punpcklqdq128", IX86_BUILTIN_PUNPCKLQDQ128, 0, 0 },
15278 { MASK_SSE2, CODE_FOR_sse2_packsswb, "__builtin_ia32_packsswb128", IX86_BUILTIN_PACKSSWB128, 0, 0 },
15279 { MASK_SSE2, CODE_FOR_sse2_packssdw, "__builtin_ia32_packssdw128", IX86_BUILTIN_PACKSSDW128, 0, 0 },
15280 { MASK_SSE2, CODE_FOR_sse2_packuswb, "__builtin_ia32_packuswb128", IX86_BUILTIN_PACKUSWB128, 0, 0 },
15282 { MASK_SSE2, CODE_FOR_sse2_umulv8hi3_highpart, "__builtin_ia32_pmulhuw128", IX86_BUILTIN_PMULHUW128, 0, 0 },
15283 { MASK_SSE2, CODE_FOR_sse2_psadbw, 0, IX86_BUILTIN_PSADBW128, 0, 0 },
15285 { MASK_SSE2, CODE_FOR_sse2_umulsidi3, 0, IX86_BUILTIN_PMULUDQ, 0, 0 },
15286 { MASK_SSE2, CODE_FOR_sse2_umulv2siv2di3, 0, IX86_BUILTIN_PMULUDQ128, 0, 0 },
15288 { MASK_SSE2, CODE_FOR_ashlv8hi3, 0, IX86_BUILTIN_PSLLWI128, 0, 0 },
15289 { MASK_SSE2, CODE_FOR_ashlv4si3, 0, IX86_BUILTIN_PSLLDI128, 0, 0 },
15290 { MASK_SSE2, CODE_FOR_ashlv2di3, 0, IX86_BUILTIN_PSLLQI128, 0, 0 },
15292 { MASK_SSE2, CODE_FOR_lshrv8hi3, 0, IX86_BUILTIN_PSRLWI128, 0, 0 },
15293 { MASK_SSE2, CODE_FOR_lshrv4si3, 0, IX86_BUILTIN_PSRLDI128, 0, 0 },
15294 { MASK_SSE2, CODE_FOR_lshrv2di3, 0, IX86_BUILTIN_PSRLQI128, 0, 0 },
15296 { MASK_SSE2, CODE_FOR_ashrv8hi3, 0, IX86_BUILTIN_PSRAWI128, 0, 0 },
15297 { MASK_SSE2, CODE_FOR_ashrv4si3, 0, IX86_BUILTIN_PSRADI128, 0, 0 },
15299 { MASK_SSE2, CODE_FOR_sse2_pmaddwd, 0, IX86_BUILTIN_PMADDWD128, 0, 0 },
15301 { MASK_SSE2, CODE_FOR_sse2_cvtsi2sd, 0, IX86_BUILTIN_CVTSI2SD, 0, 0 },
15302 { MASK_SSE2 | MASK_64BIT, CODE_FOR_sse2_cvtsi2sdq, 0, IX86_BUILTIN_CVTSI642SD, 0, 0 },
15303 { MASK_SSE2, CODE_FOR_sse2_cvtsd2ss, 0, IX86_BUILTIN_CVTSD2SS, 0, 0 },
15304 { MASK_SSE2, CODE_FOR_sse2_cvtss2sd, 0, IX86_BUILTIN_CVTSS2SD, 0, 0 },
15307 { MASK_SSE3, CODE_FOR_sse3_addsubv4sf3, "__builtin_ia32_addsubps", IX86_BUILTIN_ADDSUBPS, 0, 0 },
15308 { MASK_SSE3, CODE_FOR_sse3_addsubv2df3, "__builtin_ia32_addsubpd", IX86_BUILTIN_ADDSUBPD, 0, 0 },
15309 { MASK_SSE3, CODE_FOR_sse3_haddv4sf3, "__builtin_ia32_haddps", IX86_BUILTIN_HADDPS, 0, 0 },
15310 { MASK_SSE3, CODE_FOR_sse3_haddv2df3, "__builtin_ia32_haddpd", IX86_BUILTIN_HADDPD, 0, 0 },
15311 { MASK_SSE3, CODE_FOR_sse3_hsubv4sf3, "__builtin_ia32_hsubps", IX86_BUILTIN_HSUBPS, 0, 0 },
15312 { MASK_SSE3, CODE_FOR_sse3_hsubv2df3, "__builtin_ia32_hsubpd", IX86_BUILTIN_HSUBPD, 0, 0 },
15315 { MASK_SSSE3, CODE_FOR_ssse3_phaddwv8hi3, "__builtin_ia32_phaddw128", IX86_BUILTIN_PHADDW128, 0, 0 },
15316 { MASK_SSSE3, CODE_FOR_ssse3_phaddwv4hi3, "__builtin_ia32_phaddw", IX86_BUILTIN_PHADDW, 0, 0 },
15317 { MASK_SSSE3, CODE_FOR_ssse3_phadddv4si3, "__builtin_ia32_phaddd128", IX86_BUILTIN_PHADDD128, 0, 0 },
15318 { MASK_SSSE3, CODE_FOR_ssse3_phadddv2si3, "__builtin_ia32_phaddd", IX86_BUILTIN_PHADDD, 0, 0 },
15319 { MASK_SSSE3, CODE_FOR_ssse3_phaddswv8hi3, "__builtin_ia32_phaddsw128", IX86_BUILTIN_PHADDSW128, 0, 0 },
15320 { MASK_SSSE3, CODE_FOR_ssse3_phaddswv4hi3, "__builtin_ia32_phaddsw", IX86_BUILTIN_PHADDSW, 0, 0 },
15321 { MASK_SSSE3, CODE_FOR_ssse3_phsubwv8hi3, "__builtin_ia32_phsubw128", IX86_BUILTIN_PHSUBW128, 0, 0 },
15322 { MASK_SSSE3, CODE_FOR_ssse3_phsubwv4hi3, "__builtin_ia32_phsubw", IX86_BUILTIN_PHSUBW, 0, 0 },
15323 { MASK_SSSE3, CODE_FOR_ssse3_phsubdv4si3, "__builtin_ia32_phsubd128", IX86_BUILTIN_PHSUBD128, 0, 0 },
15324 { MASK_SSSE3, CODE_FOR_ssse3_phsubdv2si3, "__builtin_ia32_phsubd", IX86_BUILTIN_PHSUBD, 0, 0 },
15325 { MASK_SSSE3, CODE_FOR_ssse3_phsubswv8hi3, "__builtin_ia32_phsubsw128", IX86_BUILTIN_PHSUBSW128, 0, 0 },
15326 { MASK_SSSE3, CODE_FOR_ssse3_phsubswv4hi3, "__builtin_ia32_phsubsw", IX86_BUILTIN_PHSUBSW, 0, 0 },
15327 { MASK_SSSE3, CODE_FOR_ssse3_pmaddubswv8hi3, "__builtin_ia32_pmaddubsw128", IX86_BUILTIN_PMADDUBSW128, 0, 0 },
15328 { MASK_SSSE3, CODE_FOR_ssse3_pmaddubswv4hi3, "__builtin_ia32_pmaddubsw", IX86_BUILTIN_PMADDUBSW, 0, 0 },
15329 { MASK_SSSE3, CODE_FOR_ssse3_pmulhrswv8hi3, "__builtin_ia32_pmulhrsw128", IX86_BUILTIN_PMULHRSW128, 0, 0 },
15330 { MASK_SSSE3, CODE_FOR_ssse3_pmulhrswv4hi3, "__builtin_ia32_pmulhrsw", IX86_BUILTIN_PMULHRSW, 0, 0 },
15331 { MASK_SSSE3, CODE_FOR_ssse3_pshufbv16qi3, "__builtin_ia32_pshufb128", IX86_BUILTIN_PSHUFB128, 0, 0 },
15332 { MASK_SSSE3, CODE_FOR_ssse3_pshufbv8qi3, "__builtin_ia32_pshufb", IX86_BUILTIN_PSHUFB, 0, 0 },
15333 { MASK_SSSE3, CODE_FOR_ssse3_psignv16qi3, "__builtin_ia32_psignb128", IX86_BUILTIN_PSIGNB128, 0, 0 },
15334 { MASK_SSSE3, CODE_FOR_ssse3_psignv8qi3, "__builtin_ia32_psignb", IX86_BUILTIN_PSIGNB, 0, 0 },
15335 { MASK_SSSE3, CODE_FOR_ssse3_psignv8hi3, "__builtin_ia32_psignw128", IX86_BUILTIN_PSIGNW128, 0, 0 },
15336 { MASK_SSSE3, CODE_FOR_ssse3_psignv4hi3, "__builtin_ia32_psignw", IX86_BUILTIN_PSIGNW, 0, 0 },
15337 { MASK_SSSE3, CODE_FOR_ssse3_psignv4si3, "__builtin_ia32_psignd128", IX86_BUILTIN_PSIGND128, 0, 0 },
15338 { MASK_SSSE3, CODE_FOR_ssse3_psignv2si3, "__builtin_ia32_psignd", IX86_BUILTIN_PSIGND, 0, 0 }
15341 static const struct builtin_description bdesc_1arg[] =
15343 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_pmovmskb, 0, IX86_BUILTIN_PMOVMSKB, 0, 0 },
15344 { MASK_SSE, CODE_FOR_sse_movmskps, 0, IX86_BUILTIN_MOVMSKPS, 0, 0 },
15346 { MASK_SSE, CODE_FOR_sqrtv4sf2, 0, IX86_BUILTIN_SQRTPS, 0, 0 },
15347 { MASK_SSE, CODE_FOR_sse_rsqrtv4sf2, 0, IX86_BUILTIN_RSQRTPS, 0, 0 },
15348 { MASK_SSE, CODE_FOR_sse_rcpv4sf2, 0, IX86_BUILTIN_RCPPS, 0, 0 },
15350 { MASK_SSE, CODE_FOR_sse_cvtps2pi, 0, IX86_BUILTIN_CVTPS2PI, 0, 0 },
15351 { MASK_SSE, CODE_FOR_sse_cvtss2si, 0, IX86_BUILTIN_CVTSS2SI, 0, 0 },
15352 { MASK_SSE | MASK_64BIT, CODE_FOR_sse_cvtss2siq, 0, IX86_BUILTIN_CVTSS2SI64, 0, 0 },
15353 { MASK_SSE, CODE_FOR_sse_cvttps2pi, 0, IX86_BUILTIN_CVTTPS2PI, 0, 0 },
15354 { MASK_SSE, CODE_FOR_sse_cvttss2si, 0, IX86_BUILTIN_CVTTSS2SI, 0, 0 },
15355 { MASK_SSE | MASK_64BIT, CODE_FOR_sse_cvttss2siq, 0, IX86_BUILTIN_CVTTSS2SI64, 0, 0 },
15357 { MASK_SSE2, CODE_FOR_sse2_pmovmskb, 0, IX86_BUILTIN_PMOVMSKB128, 0, 0 },
15358 { MASK_SSE2, CODE_FOR_sse2_movmskpd, 0, IX86_BUILTIN_MOVMSKPD, 0, 0 },
15360 { MASK_SSE2, CODE_FOR_sqrtv2df2, 0, IX86_BUILTIN_SQRTPD, 0, 0 },
15362 { MASK_SSE2, CODE_FOR_sse2_cvtdq2pd, 0, IX86_BUILTIN_CVTDQ2PD, 0, 0 },
15363 { MASK_SSE2, CODE_FOR_sse2_cvtdq2ps, 0, IX86_BUILTIN_CVTDQ2PS, 0, 0 },
15365 { MASK_SSE2, CODE_FOR_sse2_cvtpd2dq, 0, IX86_BUILTIN_CVTPD2DQ, 0, 0 },
15366 { MASK_SSE2, CODE_FOR_sse2_cvtpd2pi, 0, IX86_BUILTIN_CVTPD2PI, 0, 0 },
15367 { MASK_SSE2, CODE_FOR_sse2_cvtpd2ps, 0, IX86_BUILTIN_CVTPD2PS, 0, 0 },
15368 { MASK_SSE2, CODE_FOR_sse2_cvttpd2dq, 0, IX86_BUILTIN_CVTTPD2DQ, 0, 0 },
15369 { MASK_SSE2, CODE_FOR_sse2_cvttpd2pi, 0, IX86_BUILTIN_CVTTPD2PI, 0, 0 },
15371 { MASK_SSE2, CODE_FOR_sse2_cvtpi2pd, 0, IX86_BUILTIN_CVTPI2PD, 0, 0 },
15373 { MASK_SSE2, CODE_FOR_sse2_cvtsd2si, 0, IX86_BUILTIN_CVTSD2SI, 0, 0 },
15374 { MASK_SSE2, CODE_FOR_sse2_cvttsd2si, 0, IX86_BUILTIN_CVTTSD2SI, 0, 0 },
15375 { MASK_SSE2 | MASK_64BIT, CODE_FOR_sse2_cvtsd2siq, 0, IX86_BUILTIN_CVTSD2SI64, 0, 0 },
15376 { MASK_SSE2 | MASK_64BIT, CODE_FOR_sse2_cvttsd2siq, 0, IX86_BUILTIN_CVTTSD2SI64, 0, 0 },
15378 { MASK_SSE2, CODE_FOR_sse2_cvtps2dq, 0, IX86_BUILTIN_CVTPS2DQ, 0, 0 },
15379 { MASK_SSE2, CODE_FOR_sse2_cvtps2pd, 0, IX86_BUILTIN_CVTPS2PD, 0, 0 },
15380 { MASK_SSE2, CODE_FOR_sse2_cvttps2dq, 0, IX86_BUILTIN_CVTTPS2DQ, 0, 0 },
15383 { MASK_SSE3, CODE_FOR_sse3_movshdup, 0, IX86_BUILTIN_MOVSHDUP, 0, 0 },
15384 { MASK_SSE3, CODE_FOR_sse3_movsldup, 0, IX86_BUILTIN_MOVSLDUP, 0, 0 },
15387 { MASK_SSSE3, CODE_FOR_absv16qi2, "__builtin_ia32_pabsb128", IX86_BUILTIN_PABSB128, 0, 0 },
15388 { MASK_SSSE3, CODE_FOR_absv8qi2, "__builtin_ia32_pabsb", IX86_BUILTIN_PABSB, 0, 0 },
15389 { MASK_SSSE3, CODE_FOR_absv8hi2, "__builtin_ia32_pabsw128", IX86_BUILTIN_PABSW128, 0, 0 },
15390 { MASK_SSSE3, CODE_FOR_absv4hi2, "__builtin_ia32_pabsw", IX86_BUILTIN_PABSW, 0, 0 },
15391 { MASK_SSSE3, CODE_FOR_absv4si2, "__builtin_ia32_pabsd128", IX86_BUILTIN_PABSD128, 0, 0 },
15392 { MASK_SSSE3, CODE_FOR_absv2si2, "__builtin_ia32_pabsd", IX86_BUILTIN_PABSD, 0, 0 },
15396 ix86_init_builtins (void)
15399 ix86_init_mmx_sse_builtins ();
15402 /* Set up all the MMX/SSE builtins. This is not called if TARGET_MMX
15403 is zero. Otherwise, if TARGET_SSE is not set, only expand the MMX
15406 ix86_init_mmx_sse_builtins (void)
15408 const struct builtin_description * d;
15411 tree V16QI_type_node = build_vector_type_for_mode (char_type_node, V16QImode);
15412 tree V2SI_type_node = build_vector_type_for_mode (intSI_type_node, V2SImode);
15413 tree V2SF_type_node = build_vector_type_for_mode (float_type_node, V2SFmode);
15414 tree V2DI_type_node
15415 = build_vector_type_for_mode (long_long_integer_type_node, V2DImode);
15416 tree V2DF_type_node = build_vector_type_for_mode (double_type_node, V2DFmode);
15417 tree V4SF_type_node = build_vector_type_for_mode (float_type_node, V4SFmode);
15418 tree V4SI_type_node = build_vector_type_for_mode (intSI_type_node, V4SImode);
15419 tree V4HI_type_node = build_vector_type_for_mode (intHI_type_node, V4HImode);
15420 tree V8QI_type_node = build_vector_type_for_mode (char_type_node, V8QImode);
15421 tree V8HI_type_node = build_vector_type_for_mode (intHI_type_node, V8HImode);
15423 tree pchar_type_node = build_pointer_type (char_type_node);
15424 tree pcchar_type_node = build_pointer_type (
15425 build_type_variant (char_type_node, 1, 0));
15426 tree pfloat_type_node = build_pointer_type (float_type_node);
15427 tree pcfloat_type_node = build_pointer_type (
15428 build_type_variant (float_type_node, 1, 0));
15429 tree pv2si_type_node = build_pointer_type (V2SI_type_node);
15430 tree pv2di_type_node = build_pointer_type (V2DI_type_node);
15431 tree pdi_type_node = build_pointer_type (long_long_unsigned_type_node);
15434 tree int_ftype_v4sf_v4sf
15435 = build_function_type_list (integer_type_node,
15436 V4SF_type_node, V4SF_type_node, NULL_TREE);
15437 tree v4si_ftype_v4sf_v4sf
15438 = build_function_type_list (V4SI_type_node,
15439 V4SF_type_node, V4SF_type_node, NULL_TREE);
15440 /* MMX/SSE/integer conversions. */
15441 tree int_ftype_v4sf
15442 = build_function_type_list (integer_type_node,
15443 V4SF_type_node, NULL_TREE);
15444 tree int64_ftype_v4sf
15445 = build_function_type_list (long_long_integer_type_node,
15446 V4SF_type_node, NULL_TREE);
15447 tree int_ftype_v8qi
15448 = build_function_type_list (integer_type_node, V8QI_type_node, NULL_TREE);
15449 tree v4sf_ftype_v4sf_int
15450 = build_function_type_list (V4SF_type_node,
15451 V4SF_type_node, integer_type_node, NULL_TREE);
15452 tree v4sf_ftype_v4sf_int64
15453 = build_function_type_list (V4SF_type_node,
15454 V4SF_type_node, long_long_integer_type_node,
15456 tree v4sf_ftype_v4sf_v2si
15457 = build_function_type_list (V4SF_type_node,
15458 V4SF_type_node, V2SI_type_node, NULL_TREE);
15460 /* Miscellaneous. */
15461 tree v8qi_ftype_v4hi_v4hi
15462 = build_function_type_list (V8QI_type_node,
15463 V4HI_type_node, V4HI_type_node, NULL_TREE);
15464 tree v4hi_ftype_v2si_v2si
15465 = build_function_type_list (V4HI_type_node,
15466 V2SI_type_node, V2SI_type_node, NULL_TREE);
15467 tree v4sf_ftype_v4sf_v4sf_int
15468 = build_function_type_list (V4SF_type_node,
15469 V4SF_type_node, V4SF_type_node,
15470 integer_type_node, NULL_TREE);
15471 tree v2si_ftype_v4hi_v4hi
15472 = build_function_type_list (V2SI_type_node,
15473 V4HI_type_node, V4HI_type_node, NULL_TREE);
15474 tree v4hi_ftype_v4hi_int
15475 = build_function_type_list (V4HI_type_node,
15476 V4HI_type_node, integer_type_node, NULL_TREE);
15477 tree v4hi_ftype_v4hi_di
15478 = build_function_type_list (V4HI_type_node,
15479 V4HI_type_node, long_long_unsigned_type_node,
15481 tree v2si_ftype_v2si_di
15482 = build_function_type_list (V2SI_type_node,
15483 V2SI_type_node, long_long_unsigned_type_node,
15485 tree void_ftype_void
15486 = build_function_type (void_type_node, void_list_node);
15487 tree void_ftype_unsigned
15488 = build_function_type_list (void_type_node, unsigned_type_node, NULL_TREE);
15489 tree void_ftype_unsigned_unsigned
15490 = build_function_type_list (void_type_node, unsigned_type_node,
15491 unsigned_type_node, NULL_TREE);
15492 tree void_ftype_pcvoid_unsigned_unsigned
15493 = build_function_type_list (void_type_node, const_ptr_type_node,
15494 unsigned_type_node, unsigned_type_node,
15496 tree unsigned_ftype_void
15497 = build_function_type (unsigned_type_node, void_list_node);
15498 tree v2si_ftype_v4sf
15499 = build_function_type_list (V2SI_type_node, V4SF_type_node, NULL_TREE);
15500 /* Loads/stores. */
15501 tree void_ftype_v8qi_v8qi_pchar
15502 = build_function_type_list (void_type_node,
15503 V8QI_type_node, V8QI_type_node,
15504 pchar_type_node, NULL_TREE);
15505 tree v4sf_ftype_pcfloat
15506 = build_function_type_list (V4SF_type_node, pcfloat_type_node, NULL_TREE);
15507 /* @@@ the type is bogus */
15508 tree v4sf_ftype_v4sf_pv2si
15509 = build_function_type_list (V4SF_type_node,
15510 V4SF_type_node, pv2si_type_node, NULL_TREE);
15511 tree void_ftype_pv2si_v4sf
15512 = build_function_type_list (void_type_node,
15513 pv2si_type_node, V4SF_type_node, NULL_TREE);
15514 tree void_ftype_pfloat_v4sf
15515 = build_function_type_list (void_type_node,
15516 pfloat_type_node, V4SF_type_node, NULL_TREE);
15517 tree void_ftype_pdi_di
15518 = build_function_type_list (void_type_node,
15519 pdi_type_node, long_long_unsigned_type_node,
15521 tree void_ftype_pv2di_v2di
15522 = build_function_type_list (void_type_node,
15523 pv2di_type_node, V2DI_type_node, NULL_TREE);
15524 /* Normal vector unops. */
15525 tree v4sf_ftype_v4sf
15526 = build_function_type_list (V4SF_type_node, V4SF_type_node, NULL_TREE);
15527 tree v16qi_ftype_v16qi
15528 = build_function_type_list (V16QI_type_node, V16QI_type_node, NULL_TREE);
15529 tree v8hi_ftype_v8hi
15530 = build_function_type_list (V8HI_type_node, V8HI_type_node, NULL_TREE);
15531 tree v4si_ftype_v4si
15532 = build_function_type_list (V4SI_type_node, V4SI_type_node, NULL_TREE);
15533 tree v8qi_ftype_v8qi
15534 = build_function_type_list (V8QI_type_node, V8QI_type_node, NULL_TREE);
15535 tree v4hi_ftype_v4hi
15536 = build_function_type_list (V4HI_type_node, V4HI_type_node, NULL_TREE);
15538 /* Normal vector binops. */
15539 tree v4sf_ftype_v4sf_v4sf
15540 = build_function_type_list (V4SF_type_node,
15541 V4SF_type_node, V4SF_type_node, NULL_TREE);
15542 tree v8qi_ftype_v8qi_v8qi
15543 = build_function_type_list (V8QI_type_node,
15544 V8QI_type_node, V8QI_type_node, NULL_TREE);
15545 tree v4hi_ftype_v4hi_v4hi
15546 = build_function_type_list (V4HI_type_node,
15547 V4HI_type_node, V4HI_type_node, NULL_TREE);
15548 tree v2si_ftype_v2si_v2si
15549 = build_function_type_list (V2SI_type_node,
15550 V2SI_type_node, V2SI_type_node, NULL_TREE);
15551 tree di_ftype_di_di
15552 = build_function_type_list (long_long_unsigned_type_node,
15553 long_long_unsigned_type_node,
15554 long_long_unsigned_type_node, NULL_TREE);
15556 tree di_ftype_di_di_int
15557 = build_function_type_list (long_long_unsigned_type_node,
15558 long_long_unsigned_type_node,
15559 long_long_unsigned_type_node,
15560 integer_type_node, NULL_TREE);
15562 tree v2si_ftype_v2sf
15563 = build_function_type_list (V2SI_type_node, V2SF_type_node, NULL_TREE);
15564 tree v2sf_ftype_v2si
15565 = build_function_type_list (V2SF_type_node, V2SI_type_node, NULL_TREE);
15566 tree v2si_ftype_v2si
15567 = build_function_type_list (V2SI_type_node, V2SI_type_node, NULL_TREE);
15568 tree v2sf_ftype_v2sf
15569 = build_function_type_list (V2SF_type_node, V2SF_type_node, NULL_TREE);
15570 tree v2sf_ftype_v2sf_v2sf
15571 = build_function_type_list (V2SF_type_node,
15572 V2SF_type_node, V2SF_type_node, NULL_TREE);
15573 tree v2si_ftype_v2sf_v2sf
15574 = build_function_type_list (V2SI_type_node,
15575 V2SF_type_node, V2SF_type_node, NULL_TREE);
15576 tree pint_type_node = build_pointer_type (integer_type_node);
15577 tree pdouble_type_node = build_pointer_type (double_type_node);
15578 tree pcdouble_type_node = build_pointer_type (
15579 build_type_variant (double_type_node, 1, 0));
15580 tree int_ftype_v2df_v2df
15581 = build_function_type_list (integer_type_node,
15582 V2DF_type_node, V2DF_type_node, NULL_TREE);
15584 tree void_ftype_pcvoid
15585 = build_function_type_list (void_type_node, const_ptr_type_node, NULL_TREE);
15586 tree v4sf_ftype_v4si
15587 = build_function_type_list (V4SF_type_node, V4SI_type_node, NULL_TREE);
15588 tree v4si_ftype_v4sf
15589 = build_function_type_list (V4SI_type_node, V4SF_type_node, NULL_TREE);
15590 tree v2df_ftype_v4si
15591 = build_function_type_list (V2DF_type_node, V4SI_type_node, NULL_TREE);
15592 tree v4si_ftype_v2df
15593 = build_function_type_list (V4SI_type_node, V2DF_type_node, NULL_TREE);
15594 tree v2si_ftype_v2df
15595 = build_function_type_list (V2SI_type_node, V2DF_type_node, NULL_TREE);
15596 tree v4sf_ftype_v2df
15597 = build_function_type_list (V4SF_type_node, V2DF_type_node, NULL_TREE);
15598 tree v2df_ftype_v2si
15599 = build_function_type_list (V2DF_type_node, V2SI_type_node, NULL_TREE);
15600 tree v2df_ftype_v4sf
15601 = build_function_type_list (V2DF_type_node, V4SF_type_node, NULL_TREE);
15602 tree int_ftype_v2df
15603 = build_function_type_list (integer_type_node, V2DF_type_node, NULL_TREE);
15604 tree int64_ftype_v2df
15605 = build_function_type_list (long_long_integer_type_node,
15606 V2DF_type_node, NULL_TREE);
15607 tree v2df_ftype_v2df_int
15608 = build_function_type_list (V2DF_type_node,
15609 V2DF_type_node, integer_type_node, NULL_TREE);
15610 tree v2df_ftype_v2df_int64
15611 = build_function_type_list (V2DF_type_node,
15612 V2DF_type_node, long_long_integer_type_node,
15614 tree v4sf_ftype_v4sf_v2df
15615 = build_function_type_list (V4SF_type_node,
15616 V4SF_type_node, V2DF_type_node, NULL_TREE);
15617 tree v2df_ftype_v2df_v4sf
15618 = build_function_type_list (V2DF_type_node,
15619 V2DF_type_node, V4SF_type_node, NULL_TREE);
15620 tree v2df_ftype_v2df_v2df_int
15621 = build_function_type_list (V2DF_type_node,
15622 V2DF_type_node, V2DF_type_node,
15625 tree v2df_ftype_v2df_pcdouble
15626 = build_function_type_list (V2DF_type_node,
15627 V2DF_type_node, pcdouble_type_node, NULL_TREE);
15628 tree void_ftype_pdouble_v2df
15629 = build_function_type_list (void_type_node,
15630 pdouble_type_node, V2DF_type_node, NULL_TREE);
15631 tree void_ftype_pint_int
15632 = build_function_type_list (void_type_node,
15633 pint_type_node, integer_type_node, NULL_TREE);
15634 tree void_ftype_v16qi_v16qi_pchar
15635 = build_function_type_list (void_type_node,
15636 V16QI_type_node, V16QI_type_node,
15637 pchar_type_node, NULL_TREE);
15638 tree v2df_ftype_pcdouble
15639 = build_function_type_list (V2DF_type_node, pcdouble_type_node, NULL_TREE);
15640 tree v2df_ftype_v2df_v2df
15641 = build_function_type_list (V2DF_type_node,
15642 V2DF_type_node, V2DF_type_node, NULL_TREE);
15643 tree v16qi_ftype_v16qi_v16qi
15644 = build_function_type_list (V16QI_type_node,
15645 V16QI_type_node, V16QI_type_node, NULL_TREE);
15646 tree v8hi_ftype_v8hi_v8hi
15647 = build_function_type_list (V8HI_type_node,
15648 V8HI_type_node, V8HI_type_node, NULL_TREE);
15649 tree v4si_ftype_v4si_v4si
15650 = build_function_type_list (V4SI_type_node,
15651 V4SI_type_node, V4SI_type_node, NULL_TREE);
15652 tree v2di_ftype_v2di_v2di
15653 = build_function_type_list (V2DI_type_node,
15654 V2DI_type_node, V2DI_type_node, NULL_TREE);
15655 tree v2di_ftype_v2df_v2df
15656 = build_function_type_list (V2DI_type_node,
15657 V2DF_type_node, V2DF_type_node, NULL_TREE);
15658 tree v2df_ftype_v2df
15659 = build_function_type_list (V2DF_type_node, V2DF_type_node, NULL_TREE);
15660 tree v2di_ftype_v2di_int
15661 = build_function_type_list (V2DI_type_node,
15662 V2DI_type_node, integer_type_node, NULL_TREE);
15663 tree v2di_ftype_v2di_v2di_int
15664 = build_function_type_list (V2DI_type_node, V2DI_type_node,
15665 V2DI_type_node, integer_type_node, NULL_TREE);
15666 tree v4si_ftype_v4si_int
15667 = build_function_type_list (V4SI_type_node,
15668 V4SI_type_node, integer_type_node, NULL_TREE);
15669 tree v8hi_ftype_v8hi_int
15670 = build_function_type_list (V8HI_type_node,
15671 V8HI_type_node, integer_type_node, NULL_TREE);
15672 tree v4si_ftype_v8hi_v8hi
15673 = build_function_type_list (V4SI_type_node,
15674 V8HI_type_node, V8HI_type_node, NULL_TREE);
15675 tree di_ftype_v8qi_v8qi
15676 = build_function_type_list (long_long_unsigned_type_node,
15677 V8QI_type_node, V8QI_type_node, NULL_TREE);
15678 tree di_ftype_v2si_v2si
15679 = build_function_type_list (long_long_unsigned_type_node,
15680 V2SI_type_node, V2SI_type_node, NULL_TREE);
15681 tree v2di_ftype_v16qi_v16qi
15682 = build_function_type_list (V2DI_type_node,
15683 V16QI_type_node, V16QI_type_node, NULL_TREE);
15684 tree v2di_ftype_v4si_v4si
15685 = build_function_type_list (V2DI_type_node,
15686 V4SI_type_node, V4SI_type_node, NULL_TREE);
15687 tree int_ftype_v16qi
15688 = build_function_type_list (integer_type_node, V16QI_type_node, NULL_TREE);
15689 tree v16qi_ftype_pcchar
15690 = build_function_type_list (V16QI_type_node, pcchar_type_node, NULL_TREE);
15691 tree void_ftype_pchar_v16qi
15692 = build_function_type_list (void_type_node,
15693 pchar_type_node, V16QI_type_node, NULL_TREE);
15695 tree v2di_ftype_v2di_unsigned_unsigned
15696 = build_function_type_list (V2DI_type_node, V2DI_type_node,
15697 unsigned_type_node, unsigned_type_node,
15699 tree v2di_ftype_v2di_v2di_unsigned_unsigned
15700 = build_function_type_list (V2DI_type_node, V2DI_type_node, V2DI_type_node,
15701 unsigned_type_node, unsigned_type_node,
15703 tree v2di_ftype_v2di_v16qi
15704 = build_function_type_list (V2DI_type_node, V2DI_type_node, V16QI_type_node,
15708 tree float128_type;
15711 /* The __float80 type. */
15712 if (TYPE_MODE (long_double_type_node) == XFmode)
15713 (*lang_hooks.types.register_builtin_type) (long_double_type_node,
15717 /* The __float80 type. */
15718 float80_type = make_node (REAL_TYPE);
15719 TYPE_PRECISION (float80_type) = 80;
15720 layout_type (float80_type);
15721 (*lang_hooks.types.register_builtin_type) (float80_type, "__float80");
15726 float128_type = make_node (REAL_TYPE);
15727 TYPE_PRECISION (float128_type) = 128;
15728 layout_type (float128_type);
15729 (*lang_hooks.types.register_builtin_type) (float128_type, "__float128");
15732 /* Add all builtins that are more or less simple operations on two
15734 for (i = 0, d = bdesc_2arg; i < ARRAY_SIZE (bdesc_2arg); i++, d++)
15736 /* Use one of the operands; the target can have a different mode for
15737 mask-generating compares. */
15738 enum machine_mode mode;
15743 mode = insn_data[d->icode].operand[1].mode;
15748 type = v16qi_ftype_v16qi_v16qi;
15751 type = v8hi_ftype_v8hi_v8hi;
15754 type = v4si_ftype_v4si_v4si;
15757 type = v2di_ftype_v2di_v2di;
15760 type = v2df_ftype_v2df_v2df;
15763 type = v4sf_ftype_v4sf_v4sf;
15766 type = v8qi_ftype_v8qi_v8qi;
15769 type = v4hi_ftype_v4hi_v4hi;
15772 type = v2si_ftype_v2si_v2si;
15775 type = di_ftype_di_di;
15779 gcc_unreachable ();
15782 /* Override for comparisons. */
15783 if (d->icode == CODE_FOR_sse_maskcmpv4sf3
15784 || d->icode == CODE_FOR_sse_vmmaskcmpv4sf3)
15785 type = v4si_ftype_v4sf_v4sf;
15787 if (d->icode == CODE_FOR_sse2_maskcmpv2df3
15788 || d->icode == CODE_FOR_sse2_vmmaskcmpv2df3)
15789 type = v2di_ftype_v2df_v2df;
15791 def_builtin (d->mask, d->name, type, d->code);
15794 /* Add all builtins that are more or less simple operations on 1 operand. */
15795 for (i = 0, d = bdesc_1arg; i < ARRAY_SIZE (bdesc_1arg); i++, d++)
15797 enum machine_mode mode;
15802 mode = insn_data[d->icode].operand[1].mode;
15807 type = v16qi_ftype_v16qi;
15810 type = v8hi_ftype_v8hi;
15813 type = v4si_ftype_v4si;
15816 type = v2df_ftype_v2df;
15819 type = v4sf_ftype_v4sf;
15822 type = v8qi_ftype_v8qi;
15825 type = v4hi_ftype_v4hi;
15828 type = v2si_ftype_v2si;
15835 def_builtin (d->mask, d->name, type, d->code);
15838 /* Add the remaining MMX insns with somewhat more complicated types. */
15839 def_builtin (MASK_MMX, "__builtin_ia32_emms", void_ftype_void, IX86_BUILTIN_EMMS);
15840 def_builtin (MASK_MMX, "__builtin_ia32_psllw", v4hi_ftype_v4hi_di, IX86_BUILTIN_PSLLW);
15841 def_builtin (MASK_MMX, "__builtin_ia32_pslld", v2si_ftype_v2si_di, IX86_BUILTIN_PSLLD);
15842 def_builtin (MASK_MMX, "__builtin_ia32_psllq", di_ftype_di_di, IX86_BUILTIN_PSLLQ);
15844 def_builtin (MASK_MMX, "__builtin_ia32_psrlw", v4hi_ftype_v4hi_di, IX86_BUILTIN_PSRLW);
15845 def_builtin (MASK_MMX, "__builtin_ia32_psrld", v2si_ftype_v2si_di, IX86_BUILTIN_PSRLD);
15846 def_builtin (MASK_MMX, "__builtin_ia32_psrlq", di_ftype_di_di, IX86_BUILTIN_PSRLQ);
15848 def_builtin (MASK_MMX, "__builtin_ia32_psraw", v4hi_ftype_v4hi_di, IX86_BUILTIN_PSRAW);
15849 def_builtin (MASK_MMX, "__builtin_ia32_psrad", v2si_ftype_v2si_di, IX86_BUILTIN_PSRAD);
15851 def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_pshufw", v4hi_ftype_v4hi_int, IX86_BUILTIN_PSHUFW);
15852 def_builtin (MASK_MMX, "__builtin_ia32_pmaddwd", v2si_ftype_v4hi_v4hi, IX86_BUILTIN_PMADDWD);
15854 /* comi/ucomi insns. */
15855 for (i = 0, d = bdesc_comi; i < ARRAY_SIZE (bdesc_comi); i++, d++)
15856 if (d->mask == MASK_SSE2)
15857 def_builtin (d->mask, d->name, int_ftype_v2df_v2df, d->code);
15859 def_builtin (d->mask, d->name, int_ftype_v4sf_v4sf, d->code);
15861 def_builtin (MASK_MMX, "__builtin_ia32_packsswb", v8qi_ftype_v4hi_v4hi, IX86_BUILTIN_PACKSSWB);
15862 def_builtin (MASK_MMX, "__builtin_ia32_packssdw", v4hi_ftype_v2si_v2si, IX86_BUILTIN_PACKSSDW);
15863 def_builtin (MASK_MMX, "__builtin_ia32_packuswb", v8qi_ftype_v4hi_v4hi, IX86_BUILTIN_PACKUSWB);
15865 def_builtin (MASK_SSE, "__builtin_ia32_ldmxcsr", void_ftype_unsigned, IX86_BUILTIN_LDMXCSR);
15866 def_builtin (MASK_SSE, "__builtin_ia32_stmxcsr", unsigned_ftype_void, IX86_BUILTIN_STMXCSR);
15867 def_builtin (MASK_SSE, "__builtin_ia32_cvtpi2ps", v4sf_ftype_v4sf_v2si, IX86_BUILTIN_CVTPI2PS);
15868 def_builtin (MASK_SSE, "__builtin_ia32_cvtps2pi", v2si_ftype_v4sf, IX86_BUILTIN_CVTPS2PI);
15869 def_builtin (MASK_SSE, "__builtin_ia32_cvtsi2ss", v4sf_ftype_v4sf_int, IX86_BUILTIN_CVTSI2SS);
15870 def_builtin (MASK_SSE | MASK_64BIT, "__builtin_ia32_cvtsi642ss", v4sf_ftype_v4sf_int64, IX86_BUILTIN_CVTSI642SS);
15871 def_builtin (MASK_SSE, "__builtin_ia32_cvtss2si", int_ftype_v4sf, IX86_BUILTIN_CVTSS2SI);
15872 def_builtin (MASK_SSE | MASK_64BIT, "__builtin_ia32_cvtss2si64", int64_ftype_v4sf, IX86_BUILTIN_CVTSS2SI64);
15873 def_builtin (MASK_SSE, "__builtin_ia32_cvttps2pi", v2si_ftype_v4sf, IX86_BUILTIN_CVTTPS2PI);
15874 def_builtin (MASK_SSE, "__builtin_ia32_cvttss2si", int_ftype_v4sf, IX86_BUILTIN_CVTTSS2SI);
15875 def_builtin (MASK_SSE | MASK_64BIT, "__builtin_ia32_cvttss2si64", int64_ftype_v4sf, IX86_BUILTIN_CVTTSS2SI64);
15877 def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_maskmovq", void_ftype_v8qi_v8qi_pchar, IX86_BUILTIN_MASKMOVQ);
15879 def_builtin (MASK_SSE, "__builtin_ia32_loadups", v4sf_ftype_pcfloat, IX86_BUILTIN_LOADUPS);
15880 def_builtin (MASK_SSE, "__builtin_ia32_storeups", void_ftype_pfloat_v4sf, IX86_BUILTIN_STOREUPS);
15882 def_builtin (MASK_SSE, "__builtin_ia32_loadhps", v4sf_ftype_v4sf_pv2si, IX86_BUILTIN_LOADHPS);
15883 def_builtin (MASK_SSE, "__builtin_ia32_loadlps", v4sf_ftype_v4sf_pv2si, IX86_BUILTIN_LOADLPS);
15884 def_builtin (MASK_SSE, "__builtin_ia32_storehps", void_ftype_pv2si_v4sf, IX86_BUILTIN_STOREHPS);
15885 def_builtin (MASK_SSE, "__builtin_ia32_storelps", void_ftype_pv2si_v4sf, IX86_BUILTIN_STORELPS);
15887 def_builtin (MASK_SSE, "__builtin_ia32_movmskps", int_ftype_v4sf, IX86_BUILTIN_MOVMSKPS);
15888 def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_pmovmskb", int_ftype_v8qi, IX86_BUILTIN_PMOVMSKB);
15889 def_builtin (MASK_SSE, "__builtin_ia32_movntps", void_ftype_pfloat_v4sf, IX86_BUILTIN_MOVNTPS);
15890 def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_movntq", void_ftype_pdi_di, IX86_BUILTIN_MOVNTQ);
15892 def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_sfence", void_ftype_void, IX86_BUILTIN_SFENCE);
15894 def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_psadbw", di_ftype_v8qi_v8qi, IX86_BUILTIN_PSADBW);
15896 def_builtin (MASK_SSE, "__builtin_ia32_rcpps", v4sf_ftype_v4sf, IX86_BUILTIN_RCPPS);
15897 def_builtin (MASK_SSE, "__builtin_ia32_rcpss", v4sf_ftype_v4sf, IX86_BUILTIN_RCPSS);
15898 def_builtin (MASK_SSE, "__builtin_ia32_rsqrtps", v4sf_ftype_v4sf, IX86_BUILTIN_RSQRTPS);
15899 def_builtin (MASK_SSE, "__builtin_ia32_rsqrtss", v4sf_ftype_v4sf, IX86_BUILTIN_RSQRTSS);
15900 def_builtin (MASK_SSE, "__builtin_ia32_sqrtps", v4sf_ftype_v4sf, IX86_BUILTIN_SQRTPS);
15901 def_builtin (MASK_SSE, "__builtin_ia32_sqrtss", v4sf_ftype_v4sf, IX86_BUILTIN_SQRTSS);
15903 def_builtin (MASK_SSE, "__builtin_ia32_shufps", v4sf_ftype_v4sf_v4sf_int, IX86_BUILTIN_SHUFPS);
15905 /* Original 3DNow! */
15906 def_builtin (MASK_3DNOW, "__builtin_ia32_femms", void_ftype_void, IX86_BUILTIN_FEMMS);
15907 def_builtin (MASK_3DNOW, "__builtin_ia32_pavgusb", v8qi_ftype_v8qi_v8qi, IX86_BUILTIN_PAVGUSB);
15908 def_builtin (MASK_3DNOW, "__builtin_ia32_pf2id", v2si_ftype_v2sf, IX86_BUILTIN_PF2ID);
15909 def_builtin (MASK_3DNOW, "__builtin_ia32_pfacc", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFACC);
15910 def_builtin (MASK_3DNOW, "__builtin_ia32_pfadd", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFADD);
15911 def_builtin (MASK_3DNOW, "__builtin_ia32_pfcmpeq", v2si_ftype_v2sf_v2sf, IX86_BUILTIN_PFCMPEQ);
15912 def_builtin (MASK_3DNOW, "__builtin_ia32_pfcmpge", v2si_ftype_v2sf_v2sf, IX86_BUILTIN_PFCMPGE);
15913 def_builtin (MASK_3DNOW, "__builtin_ia32_pfcmpgt", v2si_ftype_v2sf_v2sf, IX86_BUILTIN_PFCMPGT);
15914 def_builtin (MASK_3DNOW, "__builtin_ia32_pfmax", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFMAX);
15915 def_builtin (MASK_3DNOW, "__builtin_ia32_pfmin", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFMIN);
15916 def_builtin (MASK_3DNOW, "__builtin_ia32_pfmul", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFMUL);
15917 def_builtin (MASK_3DNOW, "__builtin_ia32_pfrcp", v2sf_ftype_v2sf, IX86_BUILTIN_PFRCP);
15918 def_builtin (MASK_3DNOW, "__builtin_ia32_pfrcpit1", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFRCPIT1);
15919 def_builtin (MASK_3DNOW, "__builtin_ia32_pfrcpit2", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFRCPIT2);
15920 def_builtin (MASK_3DNOW, "__builtin_ia32_pfrsqrt", v2sf_ftype_v2sf, IX86_BUILTIN_PFRSQRT);
15921 def_builtin (MASK_3DNOW, "__builtin_ia32_pfrsqit1", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFRSQIT1);
15922 def_builtin (MASK_3DNOW, "__builtin_ia32_pfsub", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFSUB);
15923 def_builtin (MASK_3DNOW, "__builtin_ia32_pfsubr", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFSUBR);
15924 def_builtin (MASK_3DNOW, "__builtin_ia32_pi2fd", v2sf_ftype_v2si, IX86_BUILTIN_PI2FD);
15925 def_builtin (MASK_3DNOW, "__builtin_ia32_pmulhrw", v4hi_ftype_v4hi_v4hi, IX86_BUILTIN_PMULHRW);
15927 /* 3DNow! extension as used in the Athlon CPU. */
15928 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pf2iw", v2si_ftype_v2sf, IX86_BUILTIN_PF2IW);
15929 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pfnacc", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFNACC);
15930 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pfpnacc", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFPNACC);
15931 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pi2fw", v2sf_ftype_v2si, IX86_BUILTIN_PI2FW);
15932 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pswapdsf", v2sf_ftype_v2sf, IX86_BUILTIN_PSWAPDSF);
15933 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pswapdsi", v2si_ftype_v2si, IX86_BUILTIN_PSWAPDSI);
15936 def_builtin (MASK_SSE2, "__builtin_ia32_maskmovdqu", void_ftype_v16qi_v16qi_pchar, IX86_BUILTIN_MASKMOVDQU);
15938 def_builtin (MASK_SSE2, "__builtin_ia32_loadupd", v2df_ftype_pcdouble, IX86_BUILTIN_LOADUPD);
15939 def_builtin (MASK_SSE2, "__builtin_ia32_storeupd", void_ftype_pdouble_v2df, IX86_BUILTIN_STOREUPD);
15941 def_builtin (MASK_SSE2, "__builtin_ia32_loadhpd", v2df_ftype_v2df_pcdouble, IX86_BUILTIN_LOADHPD);
15942 def_builtin (MASK_SSE2, "__builtin_ia32_loadlpd", v2df_ftype_v2df_pcdouble, IX86_BUILTIN_LOADLPD);
15944 def_builtin (MASK_SSE2, "__builtin_ia32_movmskpd", int_ftype_v2df, IX86_BUILTIN_MOVMSKPD);
15945 def_builtin (MASK_SSE2, "__builtin_ia32_pmovmskb128", int_ftype_v16qi, IX86_BUILTIN_PMOVMSKB128);
15946 def_builtin (MASK_SSE2, "__builtin_ia32_movnti", void_ftype_pint_int, IX86_BUILTIN_MOVNTI);
15947 def_builtin (MASK_SSE2, "__builtin_ia32_movntpd", void_ftype_pdouble_v2df, IX86_BUILTIN_MOVNTPD);
15948 def_builtin (MASK_SSE2, "__builtin_ia32_movntdq", void_ftype_pv2di_v2di, IX86_BUILTIN_MOVNTDQ);
15950 def_builtin (MASK_SSE2, "__builtin_ia32_pshufd", v4si_ftype_v4si_int, IX86_BUILTIN_PSHUFD);
15951 def_builtin (MASK_SSE2, "__builtin_ia32_pshuflw", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSHUFLW);
15952 def_builtin (MASK_SSE2, "__builtin_ia32_pshufhw", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSHUFHW);
15953 def_builtin (MASK_SSE2, "__builtin_ia32_psadbw128", v2di_ftype_v16qi_v16qi, IX86_BUILTIN_PSADBW128);
15955 def_builtin (MASK_SSE2, "__builtin_ia32_sqrtpd", v2df_ftype_v2df, IX86_BUILTIN_SQRTPD);
15956 def_builtin (MASK_SSE2, "__builtin_ia32_sqrtsd", v2df_ftype_v2df, IX86_BUILTIN_SQRTSD);
15958 def_builtin (MASK_SSE2, "__builtin_ia32_shufpd", v2df_ftype_v2df_v2df_int, IX86_BUILTIN_SHUFPD);
15960 def_builtin (MASK_SSE2, "__builtin_ia32_cvtdq2pd", v2df_ftype_v4si, IX86_BUILTIN_CVTDQ2PD);
15961 def_builtin (MASK_SSE2, "__builtin_ia32_cvtdq2ps", v4sf_ftype_v4si, IX86_BUILTIN_CVTDQ2PS);
15963 def_builtin (MASK_SSE2, "__builtin_ia32_cvtpd2dq", v4si_ftype_v2df, IX86_BUILTIN_CVTPD2DQ);
15964 def_builtin (MASK_SSE2, "__builtin_ia32_cvtpd2pi", v2si_ftype_v2df, IX86_BUILTIN_CVTPD2PI);
15965 def_builtin (MASK_SSE2, "__builtin_ia32_cvtpd2ps", v4sf_ftype_v2df, IX86_BUILTIN_CVTPD2PS);
15966 def_builtin (MASK_SSE2, "__builtin_ia32_cvttpd2dq", v4si_ftype_v2df, IX86_BUILTIN_CVTTPD2DQ);
15967 def_builtin (MASK_SSE2, "__builtin_ia32_cvttpd2pi", v2si_ftype_v2df, IX86_BUILTIN_CVTTPD2PI);
15969 def_builtin (MASK_SSE2, "__builtin_ia32_cvtpi2pd", v2df_ftype_v2si, IX86_BUILTIN_CVTPI2PD);
15971 def_builtin (MASK_SSE2, "__builtin_ia32_cvtsd2si", int_ftype_v2df, IX86_BUILTIN_CVTSD2SI);
15972 def_builtin (MASK_SSE2, "__builtin_ia32_cvttsd2si", int_ftype_v2df, IX86_BUILTIN_CVTTSD2SI);
15973 def_builtin (MASK_SSE2 | MASK_64BIT, "__builtin_ia32_cvtsd2si64", int64_ftype_v2df, IX86_BUILTIN_CVTSD2SI64);
15974 def_builtin (MASK_SSE2 | MASK_64BIT, "__builtin_ia32_cvttsd2si64", int64_ftype_v2df, IX86_BUILTIN_CVTTSD2SI64);
15976 def_builtin (MASK_SSE2, "__builtin_ia32_cvtps2dq", v4si_ftype_v4sf, IX86_BUILTIN_CVTPS2DQ);
15977 def_builtin (MASK_SSE2, "__builtin_ia32_cvtps2pd", v2df_ftype_v4sf, IX86_BUILTIN_CVTPS2PD);
15978 def_builtin (MASK_SSE2, "__builtin_ia32_cvttps2dq", v4si_ftype_v4sf, IX86_BUILTIN_CVTTPS2DQ);
15980 def_builtin (MASK_SSE2, "__builtin_ia32_cvtsi2sd", v2df_ftype_v2df_int, IX86_BUILTIN_CVTSI2SD);
15981 def_builtin (MASK_SSE2 | MASK_64BIT, "__builtin_ia32_cvtsi642sd", v2df_ftype_v2df_int64, IX86_BUILTIN_CVTSI642SD);
15982 def_builtin (MASK_SSE2, "__builtin_ia32_cvtsd2ss", v4sf_ftype_v4sf_v2df, IX86_BUILTIN_CVTSD2SS);
15983 def_builtin (MASK_SSE2, "__builtin_ia32_cvtss2sd", v2df_ftype_v2df_v4sf, IX86_BUILTIN_CVTSS2SD);
15985 def_builtin (MASK_SSE2, "__builtin_ia32_clflush", void_ftype_pcvoid, IX86_BUILTIN_CLFLUSH);
15986 def_builtin (MASK_SSE2, "__builtin_ia32_lfence", void_ftype_void, IX86_BUILTIN_LFENCE);
15987 def_builtin (MASK_SSE2, "__builtin_ia32_mfence", void_ftype_void, IX86_BUILTIN_MFENCE);
15989 def_builtin (MASK_SSE2, "__builtin_ia32_loaddqu", v16qi_ftype_pcchar, IX86_BUILTIN_LOADDQU);
15990 def_builtin (MASK_SSE2, "__builtin_ia32_storedqu", void_ftype_pchar_v16qi, IX86_BUILTIN_STOREDQU);
15992 def_builtin (MASK_SSE2, "__builtin_ia32_pmuludq", di_ftype_v2si_v2si, IX86_BUILTIN_PMULUDQ);
15993 def_builtin (MASK_SSE2, "__builtin_ia32_pmuludq128", v2di_ftype_v4si_v4si, IX86_BUILTIN_PMULUDQ128);
15995 def_builtin (MASK_SSE2, "__builtin_ia32_psllw128", v8hi_ftype_v8hi_v8hi, IX86_BUILTIN_PSLLW128);
15996 def_builtin (MASK_SSE2, "__builtin_ia32_pslld128", v4si_ftype_v4si_v4si, IX86_BUILTIN_PSLLD128);
15997 def_builtin (MASK_SSE2, "__builtin_ia32_psllq128", v2di_ftype_v2di_v2di, IX86_BUILTIN_PSLLQ128);
15999 def_builtin (MASK_SSE2, "__builtin_ia32_psrlw128", v8hi_ftype_v8hi_v8hi, IX86_BUILTIN_PSRLW128);
16000 def_builtin (MASK_SSE2, "__builtin_ia32_psrld128", v4si_ftype_v4si_v4si, IX86_BUILTIN_PSRLD128);
16001 def_builtin (MASK_SSE2, "__builtin_ia32_psrlq128", v2di_ftype_v2di_v2di, IX86_BUILTIN_PSRLQ128);
16003 def_builtin (MASK_SSE2, "__builtin_ia32_psraw128", v8hi_ftype_v8hi_v8hi, IX86_BUILTIN_PSRAW128);
16004 def_builtin (MASK_SSE2, "__builtin_ia32_psrad128", v4si_ftype_v4si_v4si, IX86_BUILTIN_PSRAD128);
16006 def_builtin (MASK_SSE2, "__builtin_ia32_pslldqi128", v2di_ftype_v2di_int, IX86_BUILTIN_PSLLDQI128);
16007 def_builtin (MASK_SSE2, "__builtin_ia32_psllwi128", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSLLWI128);
16008 def_builtin (MASK_SSE2, "__builtin_ia32_pslldi128", v4si_ftype_v4si_int, IX86_BUILTIN_PSLLDI128);
16009 def_builtin (MASK_SSE2, "__builtin_ia32_psllqi128", v2di_ftype_v2di_int, IX86_BUILTIN_PSLLQI128);
16011 def_builtin (MASK_SSE2, "__builtin_ia32_psrldqi128", v2di_ftype_v2di_int, IX86_BUILTIN_PSRLDQI128);
16012 def_builtin (MASK_SSE2, "__builtin_ia32_psrlwi128", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSRLWI128);
16013 def_builtin (MASK_SSE2, "__builtin_ia32_psrldi128", v4si_ftype_v4si_int, IX86_BUILTIN_PSRLDI128);
16014 def_builtin (MASK_SSE2, "__builtin_ia32_psrlqi128", v2di_ftype_v2di_int, IX86_BUILTIN_PSRLQI128);
16016 def_builtin (MASK_SSE2, "__builtin_ia32_psrawi128", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSRAWI128);
16017 def_builtin (MASK_SSE2, "__builtin_ia32_psradi128", v4si_ftype_v4si_int, IX86_BUILTIN_PSRADI128);
16019 def_builtin (MASK_SSE2, "__builtin_ia32_pmaddwd128", v4si_ftype_v8hi_v8hi, IX86_BUILTIN_PMADDWD128);
16021 /* Prescott New Instructions. */
16022 def_builtin (MASK_SSE3, "__builtin_ia32_monitor",
16023 void_ftype_pcvoid_unsigned_unsigned,
16024 IX86_BUILTIN_MONITOR);
16025 def_builtin (MASK_SSE3, "__builtin_ia32_mwait",
16026 void_ftype_unsigned_unsigned,
16027 IX86_BUILTIN_MWAIT);
16028 def_builtin (MASK_SSE3, "__builtin_ia32_movshdup",
16030 IX86_BUILTIN_MOVSHDUP);
16031 def_builtin (MASK_SSE3, "__builtin_ia32_movsldup",
16033 IX86_BUILTIN_MOVSLDUP);
16034 def_builtin (MASK_SSE3, "__builtin_ia32_lddqu",
16035 v16qi_ftype_pcchar, IX86_BUILTIN_LDDQU);
16038 def_builtin (MASK_SSSE3, "__builtin_ia32_palignr128",
16039 v2di_ftype_v2di_v2di_int, IX86_BUILTIN_PALIGNR128);
16040 def_builtin (MASK_SSSE3, "__builtin_ia32_palignr", di_ftype_di_di_int,
16041 IX86_BUILTIN_PALIGNR);
16043 /* AMDFAM10 SSE4A New built-ins */
16044 def_builtin (MASK_SSE4A, "__builtin_ia32_movntsd",
16045 void_ftype_pdouble_v2df, IX86_BUILTIN_MOVNTSD);
16046 def_builtin (MASK_SSE4A, "__builtin_ia32_movntss",
16047 void_ftype_pfloat_v4sf, IX86_BUILTIN_MOVNTSS);
16048 def_builtin (MASK_SSE4A, "__builtin_ia32_extrqi",
16049 v2di_ftype_v2di_unsigned_unsigned, IX86_BUILTIN_EXTRQI);
16050 def_builtin (MASK_SSE4A, "__builtin_ia32_extrq",
16051 v2di_ftype_v2di_v16qi, IX86_BUILTIN_EXTRQ);
16052 def_builtin (MASK_SSE4A, "__builtin_ia32_insertqi",
16053 v2di_ftype_v2di_v2di_unsigned_unsigned, IX86_BUILTIN_INSERTQI);
16054 def_builtin (MASK_SSE4A, "__builtin_ia32_insertq",
16055 v2di_ftype_v2di_v2di, IX86_BUILTIN_INSERTQ);
16057 /* Access to the vec_init patterns. */
16058 ftype = build_function_type_list (V2SI_type_node, integer_type_node,
16059 integer_type_node, NULL_TREE);
16060 def_builtin (MASK_MMX, "__builtin_ia32_vec_init_v2si",
16061 ftype, IX86_BUILTIN_VEC_INIT_V2SI);
16063 ftype = build_function_type_list (V4HI_type_node, short_integer_type_node,
16064 short_integer_type_node,
16065 short_integer_type_node,
16066 short_integer_type_node, NULL_TREE);
16067 def_builtin (MASK_MMX, "__builtin_ia32_vec_init_v4hi",
16068 ftype, IX86_BUILTIN_VEC_INIT_V4HI);
16070 ftype = build_function_type_list (V8QI_type_node, char_type_node,
16071 char_type_node, char_type_node,
16072 char_type_node, char_type_node,
16073 char_type_node, char_type_node,
16074 char_type_node, NULL_TREE);
16075 def_builtin (MASK_MMX, "__builtin_ia32_vec_init_v8qi",
16076 ftype, IX86_BUILTIN_VEC_INIT_V8QI);
16078 /* Access to the vec_extract patterns. */
16079 ftype = build_function_type_list (double_type_node, V2DF_type_node,
16080 integer_type_node, NULL_TREE);
16081 def_builtin (MASK_SSE2, "__builtin_ia32_vec_ext_v2df",
16082 ftype, IX86_BUILTIN_VEC_EXT_V2DF);
16084 ftype = build_function_type_list (long_long_integer_type_node,
16085 V2DI_type_node, integer_type_node,
16087 def_builtin (MASK_SSE2, "__builtin_ia32_vec_ext_v2di",
16088 ftype, IX86_BUILTIN_VEC_EXT_V2DI);
16090 ftype = build_function_type_list (float_type_node, V4SF_type_node,
16091 integer_type_node, NULL_TREE);
16092 def_builtin (MASK_SSE, "__builtin_ia32_vec_ext_v4sf",
16093 ftype, IX86_BUILTIN_VEC_EXT_V4SF);
16095 ftype = build_function_type_list (intSI_type_node, V4SI_type_node,
16096 integer_type_node, NULL_TREE);
16097 def_builtin (MASK_SSE2, "__builtin_ia32_vec_ext_v4si",
16098 ftype, IX86_BUILTIN_VEC_EXT_V4SI);
16100 ftype = build_function_type_list (intHI_type_node, V8HI_type_node,
16101 integer_type_node, NULL_TREE);
16102 def_builtin (MASK_SSE2, "__builtin_ia32_vec_ext_v8hi",
16103 ftype, IX86_BUILTIN_VEC_EXT_V8HI);
16105 ftype = build_function_type_list (intHI_type_node, V4HI_type_node,
16106 integer_type_node, NULL_TREE);
16107 def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_vec_ext_v4hi",
16108 ftype, IX86_BUILTIN_VEC_EXT_V4HI);
16110 ftype = build_function_type_list (intSI_type_node, V2SI_type_node,
16111 integer_type_node, NULL_TREE);
16112 def_builtin (MASK_MMX, "__builtin_ia32_vec_ext_v2si",
16113 ftype, IX86_BUILTIN_VEC_EXT_V2SI);
16115 ftype = build_function_type_list (intQI_type_node, V16QI_type_node,
16116 integer_type_node, NULL_TREE);
16117 def_builtin (MASK_SSE2, "__builtin_ia32_vec_ext_v16qi", ftype, IX86_BUILTIN_VEC_EXT_V16QI);
16119 /* Access to the vec_set patterns. */
16120 ftype = build_function_type_list (V8HI_type_node, V8HI_type_node,
16122 integer_type_node, NULL_TREE);
16123 def_builtin (MASK_SSE2, "__builtin_ia32_vec_set_v8hi",
16124 ftype, IX86_BUILTIN_VEC_SET_V8HI);
16126 ftype = build_function_type_list (V4HI_type_node, V4HI_type_node,
16128 integer_type_node, NULL_TREE);
16129 def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_vec_set_v4hi",
16130 ftype, IX86_BUILTIN_VEC_SET_V4HI);
16133 /* Errors in the source file can cause expand_expr to return const0_rtx
16134 where we expect a vector. To avoid crashing, use one of the vector
16135 clear instructions. */
16137 safe_vector_operand (rtx x, enum machine_mode mode)
16139 if (x == const0_rtx)
16140 x = CONST0_RTX (mode);
16144 /* Subroutine of ix86_expand_builtin to take care of binop insns. */
16147 ix86_expand_binop_builtin (enum insn_code icode, tree arglist, rtx target)
16150 tree arg0 = TREE_VALUE (arglist);
16151 tree arg1 = TREE_VALUE (TREE_CHAIN (arglist));
16152 rtx op0 = expand_normal (arg0);
16153 rtx op1 = expand_normal (arg1);
16154 enum machine_mode tmode = insn_data[icode].operand[0].mode;
16155 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
16156 enum machine_mode mode1 = insn_data[icode].operand[2].mode;
16158 if (VECTOR_MODE_P (mode0))
16159 op0 = safe_vector_operand (op0, mode0);
16160 if (VECTOR_MODE_P (mode1))
16161 op1 = safe_vector_operand (op1, mode1);
16163 if (optimize || !target
16164 || GET_MODE (target) != tmode
16165 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
16166 target = gen_reg_rtx (tmode);
16168 if (GET_MODE (op1) == SImode && mode1 == TImode)
16170 rtx x = gen_reg_rtx (V4SImode);
16171 emit_insn (gen_sse2_loadd (x, op1));
16172 op1 = gen_lowpart (TImode, x);
16175 /* The insn must want input operands in the same modes as the
16177 gcc_assert ((GET_MODE (op0) == mode0 || GET_MODE (op0) == VOIDmode)
16178 && (GET_MODE (op1) == mode1 || GET_MODE (op1) == VOIDmode));
16180 if (!(*insn_data[icode].operand[1].predicate) (op0, mode0))
16181 op0 = copy_to_mode_reg (mode0, op0);
16182 if (!(*insn_data[icode].operand[2].predicate) (op1, mode1))
16183 op1 = copy_to_mode_reg (mode1, op1);
16185 /* ??? Using ix86_fixup_binary_operands is problematic when
16186 we've got mismatched modes. Fake it. */
16192 if (tmode == mode0 && tmode == mode1)
16194 target = ix86_fixup_binary_operands (UNKNOWN, tmode, xops);
16198 else if (optimize || !ix86_binary_operator_ok (UNKNOWN, tmode, xops))
16200 op0 = force_reg (mode0, op0);
16201 op1 = force_reg (mode1, op1);
16202 target = gen_reg_rtx (tmode);
16205 pat = GEN_FCN (icode) (target, op0, op1);
16212 /* Subroutine of ix86_expand_builtin to take care of stores. */
16215 ix86_expand_store_builtin (enum insn_code icode, tree arglist)
16218 tree arg0 = TREE_VALUE (arglist);
16219 tree arg1 = TREE_VALUE (TREE_CHAIN (arglist));
16220 rtx op0 = expand_normal (arg0);
16221 rtx op1 = expand_normal (arg1);
16222 enum machine_mode mode0 = insn_data[icode].operand[0].mode;
16223 enum machine_mode mode1 = insn_data[icode].operand[1].mode;
16225 if (VECTOR_MODE_P (mode1))
16226 op1 = safe_vector_operand (op1, mode1);
16228 op0 = gen_rtx_MEM (mode0, copy_to_mode_reg (Pmode, op0));
16229 op1 = copy_to_mode_reg (mode1, op1);
16231 pat = GEN_FCN (icode) (op0, op1);
16237 /* Subroutine of ix86_expand_builtin to take care of unop insns. */
16240 ix86_expand_unop_builtin (enum insn_code icode, tree arglist,
16241 rtx target, int do_load)
16244 tree arg0 = TREE_VALUE (arglist);
16245 rtx op0 = expand_normal (arg0);
16246 enum machine_mode tmode = insn_data[icode].operand[0].mode;
16247 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
16249 if (optimize || !target
16250 || GET_MODE (target) != tmode
16251 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
16252 target = gen_reg_rtx (tmode);
16254 op0 = gen_rtx_MEM (mode0, copy_to_mode_reg (Pmode, op0));
16257 if (VECTOR_MODE_P (mode0))
16258 op0 = safe_vector_operand (op0, mode0);
16260 if ((optimize && !register_operand (op0, mode0))
16261 || ! (*insn_data[icode].operand[1].predicate) (op0, mode0))
16262 op0 = copy_to_mode_reg (mode0, op0);
16265 pat = GEN_FCN (icode) (target, op0);
16272 /* Subroutine of ix86_expand_builtin to take care of three special unop insns:
16273 sqrtss, rsqrtss, rcpss. */
16276 ix86_expand_unop1_builtin (enum insn_code icode, tree arglist, rtx target)
16279 tree arg0 = TREE_VALUE (arglist);
16280 rtx op1, op0 = expand_normal (arg0);
16281 enum machine_mode tmode = insn_data[icode].operand[0].mode;
16282 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
16284 if (optimize || !target
16285 || GET_MODE (target) != tmode
16286 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
16287 target = gen_reg_rtx (tmode);
16289 if (VECTOR_MODE_P (mode0))
16290 op0 = safe_vector_operand (op0, mode0);
16292 if ((optimize && !register_operand (op0, mode0))
16293 || ! (*insn_data[icode].operand[1].predicate) (op0, mode0))
16294 op0 = copy_to_mode_reg (mode0, op0);
16297 if (! (*insn_data[icode].operand[2].predicate) (op1, mode0))
16298 op1 = copy_to_mode_reg (mode0, op1);
16300 pat = GEN_FCN (icode) (target, op0, op1);
16307 /* Subroutine of ix86_expand_builtin to take care of comparison insns. */
16310 ix86_expand_sse_compare (const struct builtin_description *d, tree arglist,
16314 tree arg0 = TREE_VALUE (arglist);
16315 tree arg1 = TREE_VALUE (TREE_CHAIN (arglist));
16316 rtx op0 = expand_normal (arg0);
16317 rtx op1 = expand_normal (arg1);
16319 enum machine_mode tmode = insn_data[d->icode].operand[0].mode;
16320 enum machine_mode mode0 = insn_data[d->icode].operand[1].mode;
16321 enum machine_mode mode1 = insn_data[d->icode].operand[2].mode;
16322 enum rtx_code comparison = d->comparison;
16324 if (VECTOR_MODE_P (mode0))
16325 op0 = safe_vector_operand (op0, mode0);
16326 if (VECTOR_MODE_P (mode1))
16327 op1 = safe_vector_operand (op1, mode1);
16329 /* Swap operands if we have a comparison that isn't available in
16331 if (d->flag & BUILTIN_DESC_SWAP_OPERANDS)
16333 rtx tmp = gen_reg_rtx (mode1);
16334 emit_move_insn (tmp, op1);
16339 if (optimize || !target
16340 || GET_MODE (target) != tmode
16341 || ! (*insn_data[d->icode].operand[0].predicate) (target, tmode))
16342 target = gen_reg_rtx (tmode);
16344 if ((optimize && !register_operand (op0, mode0))
16345 || ! (*insn_data[d->icode].operand[1].predicate) (op0, mode0))
16346 op0 = copy_to_mode_reg (mode0, op0);
16347 if ((optimize && !register_operand (op1, mode1))
16348 || ! (*insn_data[d->icode].operand[2].predicate) (op1, mode1))
16349 op1 = copy_to_mode_reg (mode1, op1);
16351 op2 = gen_rtx_fmt_ee (comparison, mode0, op0, op1);
16352 pat = GEN_FCN (d->icode) (target, op0, op1, op2);
16359 /* Subroutine of ix86_expand_builtin to take care of comi insns. */
16362 ix86_expand_sse_comi (const struct builtin_description *d, tree arglist,
16366 tree arg0 = TREE_VALUE (arglist);
16367 tree arg1 = TREE_VALUE (TREE_CHAIN (arglist));
16368 rtx op0 = expand_normal (arg0);
16369 rtx op1 = expand_normal (arg1);
16371 enum machine_mode mode0 = insn_data[d->icode].operand[0].mode;
16372 enum machine_mode mode1 = insn_data[d->icode].operand[1].mode;
16373 enum rtx_code comparison = d->comparison;
16375 if (VECTOR_MODE_P (mode0))
16376 op0 = safe_vector_operand (op0, mode0);
16377 if (VECTOR_MODE_P (mode1))
16378 op1 = safe_vector_operand (op1, mode1);
16380 /* Swap operands if we have a comparison that isn't available in
16382 if (d->flag & BUILTIN_DESC_SWAP_OPERANDS)
16389 target = gen_reg_rtx (SImode);
16390 emit_move_insn (target, const0_rtx);
16391 target = gen_rtx_SUBREG (QImode, target, 0);
16393 if ((optimize && !register_operand (op0, mode0))
16394 || !(*insn_data[d->icode].operand[0].predicate) (op0, mode0))
16395 op0 = copy_to_mode_reg (mode0, op0);
16396 if ((optimize && !register_operand (op1, mode1))
16397 || !(*insn_data[d->icode].operand[1].predicate) (op1, mode1))
16398 op1 = copy_to_mode_reg (mode1, op1);
16400 op2 = gen_rtx_fmt_ee (comparison, mode0, op0, op1);
16401 pat = GEN_FCN (d->icode) (op0, op1);
16405 emit_insn (gen_rtx_SET (VOIDmode,
16406 gen_rtx_STRICT_LOW_PART (VOIDmode, target),
16407 gen_rtx_fmt_ee (comparison, QImode,
16411 return SUBREG_REG (target);
16414 /* Return the integer constant in ARG. Constrain it to be in the range
16415 of the subparts of VEC_TYPE; issue an error if not. */
16418 get_element_number (tree vec_type, tree arg)
16420 unsigned HOST_WIDE_INT elt, max = TYPE_VECTOR_SUBPARTS (vec_type) - 1;
16422 if (!host_integerp (arg, 1)
16423 || (elt = tree_low_cst (arg, 1), elt > max))
16425 error ("selector must be an integer constant in the range 0..%wi", max);
16432 /* A subroutine of ix86_expand_builtin. These builtins are a wrapper around
16433 ix86_expand_vector_init. We DO have language-level syntax for this, in
16434 the form of (type){ init-list }. Except that since we can't place emms
16435 instructions from inside the compiler, we can't allow the use of MMX
16436 registers unless the user explicitly asks for it. So we do *not* define
16437 vec_set/vec_extract/vec_init patterns for MMX modes in mmx.md. Instead
16438 we have builtins invoked by mmintrin.h that gives us license to emit
16439 these sorts of instructions. */
16442 ix86_expand_vec_init_builtin (tree type, tree arglist, rtx target)
16444 enum machine_mode tmode = TYPE_MODE (type);
16445 enum machine_mode inner_mode = GET_MODE_INNER (tmode);
16446 int i, n_elt = GET_MODE_NUNITS (tmode);
16447 rtvec v = rtvec_alloc (n_elt);
16449 gcc_assert (VECTOR_MODE_P (tmode));
16451 for (i = 0; i < n_elt; ++i, arglist = TREE_CHAIN (arglist))
16453 rtx x = expand_normal (TREE_VALUE (arglist));
16454 RTVEC_ELT (v, i) = gen_lowpart (inner_mode, x);
16457 gcc_assert (arglist == NULL);
16459 if (!target || !register_operand (target, tmode))
16460 target = gen_reg_rtx (tmode);
16462 ix86_expand_vector_init (true, target, gen_rtx_PARALLEL (tmode, v));
16466 /* A subroutine of ix86_expand_builtin. These builtins are a wrapper around
16467 ix86_expand_vector_extract. They would be redundant (for non-MMX) if we
16468 had a language-level syntax for referencing vector elements. */
16471 ix86_expand_vec_ext_builtin (tree arglist, rtx target)
16473 enum machine_mode tmode, mode0;
16478 arg0 = TREE_VALUE (arglist);
16479 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
16481 op0 = expand_normal (arg0);
16482 elt = get_element_number (TREE_TYPE (arg0), arg1);
16484 tmode = TYPE_MODE (TREE_TYPE (TREE_TYPE (arg0)));
16485 mode0 = TYPE_MODE (TREE_TYPE (arg0));
16486 gcc_assert (VECTOR_MODE_P (mode0));
16488 op0 = force_reg (mode0, op0);
16490 if (optimize || !target || !register_operand (target, tmode))
16491 target = gen_reg_rtx (tmode);
16493 ix86_expand_vector_extract (true, target, op0, elt);
16498 /* A subroutine of ix86_expand_builtin. These builtins are a wrapper around
16499 ix86_expand_vector_set. They would be redundant (for non-MMX) if we had
16500 a language-level syntax for referencing vector elements. */
16503 ix86_expand_vec_set_builtin (tree arglist)
16505 enum machine_mode tmode, mode1;
16506 tree arg0, arg1, arg2;
16508 rtx op0, op1, target;
16510 arg0 = TREE_VALUE (arglist);
16511 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
16512 arg2 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist)));
16514 tmode = TYPE_MODE (TREE_TYPE (arg0));
16515 mode1 = TYPE_MODE (TREE_TYPE (TREE_TYPE (arg0)));
16516 gcc_assert (VECTOR_MODE_P (tmode));
16518 op0 = expand_expr (arg0, NULL_RTX, tmode, 0);
16519 op1 = expand_expr (arg1, NULL_RTX, mode1, 0);
16520 elt = get_element_number (TREE_TYPE (arg0), arg2);
16522 if (GET_MODE (op1) != mode1 && GET_MODE (op1) != VOIDmode)
16523 op1 = convert_modes (mode1, GET_MODE (op1), op1, true);
16525 op0 = force_reg (tmode, op0);
16526 op1 = force_reg (mode1, op1);
16528 /* OP0 is the source of these builtin functions and shouldn't be
16529 modified. Create a copy, use it and return it as target. */
16530 target = gen_reg_rtx (tmode);
16531 emit_move_insn (target, op0);
16532 ix86_expand_vector_set (true, target, op1, elt);
16537 /* Expand an expression EXP that calls a built-in function,
16538 with result going to TARGET if that's convenient
16539 (and in mode MODE if that's convenient).
16540 SUBTARGET may be used as the target for computing one of EXP's operands.
16541 IGNORE is nonzero if the value is to be ignored. */
16544 ix86_expand_builtin (tree exp, rtx target, rtx subtarget ATTRIBUTE_UNUSED,
16545 enum machine_mode mode ATTRIBUTE_UNUSED,
16546 int ignore ATTRIBUTE_UNUSED)
16548 const struct builtin_description *d;
16550 enum insn_code icode;
16551 tree fndecl = TREE_OPERAND (TREE_OPERAND (exp, 0), 0);
16552 tree arglist = TREE_OPERAND (exp, 1);
16553 tree arg0, arg1, arg2, arg3;
16554 rtx op0, op1, op2, op3, pat;
16555 enum machine_mode tmode, mode0, mode1, mode2, mode3, mode4;
16556 unsigned int fcode = DECL_FUNCTION_CODE (fndecl);
16560 case IX86_BUILTIN_EMMS:
16561 emit_insn (gen_mmx_emms ());
16564 case IX86_BUILTIN_SFENCE:
16565 emit_insn (gen_sse_sfence ());
16568 case IX86_BUILTIN_MASKMOVQ:
16569 case IX86_BUILTIN_MASKMOVDQU:
16570 icode = (fcode == IX86_BUILTIN_MASKMOVQ
16571 ? CODE_FOR_mmx_maskmovq
16572 : CODE_FOR_sse2_maskmovdqu);
16573 /* Note the arg order is different from the operand order. */
16574 arg1 = TREE_VALUE (arglist);
16575 arg2 = TREE_VALUE (TREE_CHAIN (arglist));
16576 arg0 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist)));
16577 op0 = expand_normal (arg0);
16578 op1 = expand_normal (arg1);
16579 op2 = expand_normal (arg2);
16580 mode0 = insn_data[icode].operand[0].mode;
16581 mode1 = insn_data[icode].operand[1].mode;
16582 mode2 = insn_data[icode].operand[2].mode;
16584 op0 = force_reg (Pmode, op0);
16585 op0 = gen_rtx_MEM (mode1, op0);
16587 if (! (*insn_data[icode].operand[0].predicate) (op0, mode0))
16588 op0 = copy_to_mode_reg (mode0, op0);
16589 if (! (*insn_data[icode].operand[1].predicate) (op1, mode1))
16590 op1 = copy_to_mode_reg (mode1, op1);
16591 if (! (*insn_data[icode].operand[2].predicate) (op2, mode2))
16592 op2 = copy_to_mode_reg (mode2, op2);
16593 pat = GEN_FCN (icode) (op0, op1, op2);
16599 case IX86_BUILTIN_SQRTSS:
16600 return ix86_expand_unop1_builtin (CODE_FOR_sse_vmsqrtv4sf2, arglist, target);
16601 case IX86_BUILTIN_RSQRTSS:
16602 return ix86_expand_unop1_builtin (CODE_FOR_sse_vmrsqrtv4sf2, arglist, target);
16603 case IX86_BUILTIN_RCPSS:
16604 return ix86_expand_unop1_builtin (CODE_FOR_sse_vmrcpv4sf2, arglist, target);
16606 case IX86_BUILTIN_LOADUPS:
16607 return ix86_expand_unop_builtin (CODE_FOR_sse_movups, arglist, target, 1);
16609 case IX86_BUILTIN_STOREUPS:
16610 return ix86_expand_store_builtin (CODE_FOR_sse_movups, arglist);
16612 case IX86_BUILTIN_LOADHPS:
16613 case IX86_BUILTIN_LOADLPS:
16614 case IX86_BUILTIN_LOADHPD:
16615 case IX86_BUILTIN_LOADLPD:
16616 icode = (fcode == IX86_BUILTIN_LOADHPS ? CODE_FOR_sse_loadhps
16617 : fcode == IX86_BUILTIN_LOADLPS ? CODE_FOR_sse_loadlps
16618 : fcode == IX86_BUILTIN_LOADHPD ? CODE_FOR_sse2_loadhpd
16619 : CODE_FOR_sse2_loadlpd);
16620 arg0 = TREE_VALUE (arglist);
16621 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
16622 op0 = expand_normal (arg0);
16623 op1 = expand_normal (arg1);
16624 tmode = insn_data[icode].operand[0].mode;
16625 mode0 = insn_data[icode].operand[1].mode;
16626 mode1 = insn_data[icode].operand[2].mode;
16628 op0 = force_reg (mode0, op0);
16629 op1 = gen_rtx_MEM (mode1, copy_to_mode_reg (Pmode, op1));
16630 if (optimize || target == 0
16631 || GET_MODE (target) != tmode
16632 || !register_operand (target, tmode))
16633 target = gen_reg_rtx (tmode);
16634 pat = GEN_FCN (icode) (target, op0, op1);
16640 case IX86_BUILTIN_STOREHPS:
16641 case IX86_BUILTIN_STORELPS:
16642 icode = (fcode == IX86_BUILTIN_STOREHPS ? CODE_FOR_sse_storehps
16643 : CODE_FOR_sse_storelps);
16644 arg0 = TREE_VALUE (arglist);
16645 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
16646 op0 = expand_normal (arg0);
16647 op1 = expand_normal (arg1);
16648 mode0 = insn_data[icode].operand[0].mode;
16649 mode1 = insn_data[icode].operand[1].mode;
16651 op0 = gen_rtx_MEM (mode0, copy_to_mode_reg (Pmode, op0));
16652 op1 = force_reg (mode1, op1);
16654 pat = GEN_FCN (icode) (op0, op1);
16660 case IX86_BUILTIN_MOVNTPS:
16661 return ix86_expand_store_builtin (CODE_FOR_sse_movntv4sf, arglist);
16662 case IX86_BUILTIN_MOVNTQ:
16663 return ix86_expand_store_builtin (CODE_FOR_sse_movntdi, arglist);
16665 case IX86_BUILTIN_LDMXCSR:
16666 op0 = expand_normal (TREE_VALUE (arglist));
16667 target = assign_386_stack_local (SImode, SLOT_VIRTUAL);
16668 emit_move_insn (target, op0);
16669 emit_insn (gen_sse_ldmxcsr (target));
16672 case IX86_BUILTIN_STMXCSR:
16673 target = assign_386_stack_local (SImode, SLOT_VIRTUAL);
16674 emit_insn (gen_sse_stmxcsr (target));
16675 return copy_to_mode_reg (SImode, target);
16677 case IX86_BUILTIN_SHUFPS:
16678 case IX86_BUILTIN_SHUFPD:
16679 icode = (fcode == IX86_BUILTIN_SHUFPS
16680 ? CODE_FOR_sse_shufps
16681 : CODE_FOR_sse2_shufpd);
16682 arg0 = TREE_VALUE (arglist);
16683 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
16684 arg2 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist)));
16685 op0 = expand_normal (arg0);
16686 op1 = expand_normal (arg1);
16687 op2 = expand_normal (arg2);
16688 tmode = insn_data[icode].operand[0].mode;
16689 mode0 = insn_data[icode].operand[1].mode;
16690 mode1 = insn_data[icode].operand[2].mode;
16691 mode2 = insn_data[icode].operand[3].mode;
16693 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
16694 op0 = copy_to_mode_reg (mode0, op0);
16695 if ((optimize && !register_operand (op1, mode1))
16696 || !(*insn_data[icode].operand[2].predicate) (op1, mode1))
16697 op1 = copy_to_mode_reg (mode1, op1);
16698 if (! (*insn_data[icode].operand[3].predicate) (op2, mode2))
16700 /* @@@ better error message */
16701 error ("mask must be an immediate");
16702 return gen_reg_rtx (tmode);
16704 if (optimize || target == 0
16705 || GET_MODE (target) != tmode
16706 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
16707 target = gen_reg_rtx (tmode);
16708 pat = GEN_FCN (icode) (target, op0, op1, op2);
16714 case IX86_BUILTIN_PSHUFW:
16715 case IX86_BUILTIN_PSHUFD:
16716 case IX86_BUILTIN_PSHUFHW:
16717 case IX86_BUILTIN_PSHUFLW:
16718 icode = ( fcode == IX86_BUILTIN_PSHUFHW ? CODE_FOR_sse2_pshufhw
16719 : fcode == IX86_BUILTIN_PSHUFLW ? CODE_FOR_sse2_pshuflw
16720 : fcode == IX86_BUILTIN_PSHUFD ? CODE_FOR_sse2_pshufd
16721 : CODE_FOR_mmx_pshufw);
16722 arg0 = TREE_VALUE (arglist);
16723 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
16724 op0 = expand_normal (arg0);
16725 op1 = expand_normal (arg1);
16726 tmode = insn_data[icode].operand[0].mode;
16727 mode1 = insn_data[icode].operand[1].mode;
16728 mode2 = insn_data[icode].operand[2].mode;
16730 if (! (*insn_data[icode].operand[1].predicate) (op0, mode1))
16731 op0 = copy_to_mode_reg (mode1, op0);
16732 if (! (*insn_data[icode].operand[2].predicate) (op1, mode2))
16734 /* @@@ better error message */
16735 error ("mask must be an immediate");
16739 || GET_MODE (target) != tmode
16740 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
16741 target = gen_reg_rtx (tmode);
16742 pat = GEN_FCN (icode) (target, op0, op1);
16748 case IX86_BUILTIN_PSLLWI128:
16749 icode = CODE_FOR_ashlv8hi3;
16751 case IX86_BUILTIN_PSLLDI128:
16752 icode = CODE_FOR_ashlv4si3;
16754 case IX86_BUILTIN_PSLLQI128:
16755 icode = CODE_FOR_ashlv2di3;
16757 case IX86_BUILTIN_PSRAWI128:
16758 icode = CODE_FOR_ashrv8hi3;
16760 case IX86_BUILTIN_PSRADI128:
16761 icode = CODE_FOR_ashrv4si3;
16763 case IX86_BUILTIN_PSRLWI128:
16764 icode = CODE_FOR_lshrv8hi3;
16766 case IX86_BUILTIN_PSRLDI128:
16767 icode = CODE_FOR_lshrv4si3;
16769 case IX86_BUILTIN_PSRLQI128:
16770 icode = CODE_FOR_lshrv2di3;
16773 arg0 = TREE_VALUE (arglist);
16774 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
16775 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
16776 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
16778 if (GET_CODE (op1) != CONST_INT)
16780 error ("shift must be an immediate");
16783 if (INTVAL (op1) < 0 || INTVAL (op1) > 255)
16784 op1 = GEN_INT (255);
16786 tmode = insn_data[icode].operand[0].mode;
16787 mode1 = insn_data[icode].operand[1].mode;
16788 if (! (*insn_data[icode].operand[1].predicate) (op0, mode1))
16789 op0 = copy_to_reg (op0);
16791 target = gen_reg_rtx (tmode);
16792 pat = GEN_FCN (icode) (target, op0, op1);
16798 case IX86_BUILTIN_PSLLW128:
16799 icode = CODE_FOR_ashlv8hi3;
16801 case IX86_BUILTIN_PSLLD128:
16802 icode = CODE_FOR_ashlv4si3;
16804 case IX86_BUILTIN_PSLLQ128:
16805 icode = CODE_FOR_ashlv2di3;
16807 case IX86_BUILTIN_PSRAW128:
16808 icode = CODE_FOR_ashrv8hi3;
16810 case IX86_BUILTIN_PSRAD128:
16811 icode = CODE_FOR_ashrv4si3;
16813 case IX86_BUILTIN_PSRLW128:
16814 icode = CODE_FOR_lshrv8hi3;
16816 case IX86_BUILTIN_PSRLD128:
16817 icode = CODE_FOR_lshrv4si3;
16819 case IX86_BUILTIN_PSRLQ128:
16820 icode = CODE_FOR_lshrv2di3;
16823 arg0 = TREE_VALUE (arglist);
16824 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
16825 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
16826 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
16828 tmode = insn_data[icode].operand[0].mode;
16829 mode1 = insn_data[icode].operand[1].mode;
16831 if (! (*insn_data[icode].operand[1].predicate) (op0, mode1))
16832 op0 = copy_to_reg (op0);
16834 op1 = simplify_gen_subreg (TImode, op1, GET_MODE (op1), 0);
16835 if (! (*insn_data[icode].operand[2].predicate) (op1, TImode))
16836 op1 = copy_to_reg (op1);
16838 target = gen_reg_rtx (tmode);
16839 pat = GEN_FCN (icode) (target, op0, op1);
16845 case IX86_BUILTIN_PSLLDQI128:
16846 case IX86_BUILTIN_PSRLDQI128:
16847 icode = (fcode == IX86_BUILTIN_PSLLDQI128 ? CODE_FOR_sse2_ashlti3
16848 : CODE_FOR_sse2_lshrti3);
16849 arg0 = TREE_VALUE (arglist);
16850 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
16851 op0 = expand_normal (arg0);
16852 op1 = expand_normal (arg1);
16853 tmode = insn_data[icode].operand[0].mode;
16854 mode1 = insn_data[icode].operand[1].mode;
16855 mode2 = insn_data[icode].operand[2].mode;
16857 if (! (*insn_data[icode].operand[1].predicate) (op0, mode1))
16859 op0 = copy_to_reg (op0);
16860 op0 = simplify_gen_subreg (mode1, op0, GET_MODE (op0), 0);
16862 if (! (*insn_data[icode].operand[2].predicate) (op1, mode2))
16864 error ("shift must be an immediate");
16867 target = gen_reg_rtx (V2DImode);
16868 pat = GEN_FCN (icode) (simplify_gen_subreg (tmode, target, V2DImode, 0),
16875 case IX86_BUILTIN_FEMMS:
16876 emit_insn (gen_mmx_femms ());
16879 case IX86_BUILTIN_PAVGUSB:
16880 return ix86_expand_binop_builtin (CODE_FOR_mmx_uavgv8qi3, arglist, target);
16882 case IX86_BUILTIN_PF2ID:
16883 return ix86_expand_unop_builtin (CODE_FOR_mmx_pf2id, arglist, target, 0);
16885 case IX86_BUILTIN_PFACC:
16886 return ix86_expand_binop_builtin (CODE_FOR_mmx_haddv2sf3, arglist, target);
16888 case IX86_BUILTIN_PFADD:
16889 return ix86_expand_binop_builtin (CODE_FOR_mmx_addv2sf3, arglist, target);
16891 case IX86_BUILTIN_PFCMPEQ:
16892 return ix86_expand_binop_builtin (CODE_FOR_mmx_eqv2sf3, arglist, target);
16894 case IX86_BUILTIN_PFCMPGE:
16895 return ix86_expand_binop_builtin (CODE_FOR_mmx_gev2sf3, arglist, target);
16897 case IX86_BUILTIN_PFCMPGT:
16898 return ix86_expand_binop_builtin (CODE_FOR_mmx_gtv2sf3, arglist, target);
16900 case IX86_BUILTIN_PFMAX:
16901 return ix86_expand_binop_builtin (CODE_FOR_mmx_smaxv2sf3, arglist, target);
16903 case IX86_BUILTIN_PFMIN:
16904 return ix86_expand_binop_builtin (CODE_FOR_mmx_sminv2sf3, arglist, target);
16906 case IX86_BUILTIN_PFMUL:
16907 return ix86_expand_binop_builtin (CODE_FOR_mmx_mulv2sf3, arglist, target);
16909 case IX86_BUILTIN_PFRCP:
16910 return ix86_expand_unop_builtin (CODE_FOR_mmx_rcpv2sf2, arglist, target, 0);
16912 case IX86_BUILTIN_PFRCPIT1:
16913 return ix86_expand_binop_builtin (CODE_FOR_mmx_rcpit1v2sf3, arglist, target);
16915 case IX86_BUILTIN_PFRCPIT2:
16916 return ix86_expand_binop_builtin (CODE_FOR_mmx_rcpit2v2sf3, arglist, target);
16918 case IX86_BUILTIN_PFRSQIT1:
16919 return ix86_expand_binop_builtin (CODE_FOR_mmx_rsqit1v2sf3, arglist, target);
16921 case IX86_BUILTIN_PFRSQRT:
16922 return ix86_expand_unop_builtin (CODE_FOR_mmx_rsqrtv2sf2, arglist, target, 0);
16924 case IX86_BUILTIN_PFSUB:
16925 return ix86_expand_binop_builtin (CODE_FOR_mmx_subv2sf3, arglist, target);
16927 case IX86_BUILTIN_PFSUBR:
16928 return ix86_expand_binop_builtin (CODE_FOR_mmx_subrv2sf3, arglist, target);
16930 case IX86_BUILTIN_PI2FD:
16931 return ix86_expand_unop_builtin (CODE_FOR_mmx_floatv2si2, arglist, target, 0);
16933 case IX86_BUILTIN_PMULHRW:
16934 return ix86_expand_binop_builtin (CODE_FOR_mmx_pmulhrwv4hi3, arglist, target);
16936 case IX86_BUILTIN_PF2IW:
16937 return ix86_expand_unop_builtin (CODE_FOR_mmx_pf2iw, arglist, target, 0);
16939 case IX86_BUILTIN_PFNACC:
16940 return ix86_expand_binop_builtin (CODE_FOR_mmx_hsubv2sf3, arglist, target);
16942 case IX86_BUILTIN_PFPNACC:
16943 return ix86_expand_binop_builtin (CODE_FOR_mmx_addsubv2sf3, arglist, target);
16945 case IX86_BUILTIN_PI2FW:
16946 return ix86_expand_unop_builtin (CODE_FOR_mmx_pi2fw, arglist, target, 0);
16948 case IX86_BUILTIN_PSWAPDSI:
16949 return ix86_expand_unop_builtin (CODE_FOR_mmx_pswapdv2si2, arglist, target, 0);
16951 case IX86_BUILTIN_PSWAPDSF:
16952 return ix86_expand_unop_builtin (CODE_FOR_mmx_pswapdv2sf2, arglist, target, 0);
16954 case IX86_BUILTIN_SQRTSD:
16955 return ix86_expand_unop1_builtin (CODE_FOR_sse2_vmsqrtv2df2, arglist, target);
16956 case IX86_BUILTIN_LOADUPD:
16957 return ix86_expand_unop_builtin (CODE_FOR_sse2_movupd, arglist, target, 1);
16958 case IX86_BUILTIN_STOREUPD:
16959 return ix86_expand_store_builtin (CODE_FOR_sse2_movupd, arglist);
16961 case IX86_BUILTIN_MFENCE:
16962 emit_insn (gen_sse2_mfence ());
16964 case IX86_BUILTIN_LFENCE:
16965 emit_insn (gen_sse2_lfence ());
16968 case IX86_BUILTIN_CLFLUSH:
16969 arg0 = TREE_VALUE (arglist);
16970 op0 = expand_normal (arg0);
16971 icode = CODE_FOR_sse2_clflush;
16972 if (! (*insn_data[icode].operand[0].predicate) (op0, Pmode))
16973 op0 = copy_to_mode_reg (Pmode, op0);
16975 emit_insn (gen_sse2_clflush (op0));
16978 case IX86_BUILTIN_MOVNTPD:
16979 return ix86_expand_store_builtin (CODE_FOR_sse2_movntv2df, arglist);
16980 case IX86_BUILTIN_MOVNTDQ:
16981 return ix86_expand_store_builtin (CODE_FOR_sse2_movntv2di, arglist);
16982 case IX86_BUILTIN_MOVNTI:
16983 return ix86_expand_store_builtin (CODE_FOR_sse2_movntsi, arglist);
16985 case IX86_BUILTIN_LOADDQU:
16986 return ix86_expand_unop_builtin (CODE_FOR_sse2_movdqu, arglist, target, 1);
16987 case IX86_BUILTIN_STOREDQU:
16988 return ix86_expand_store_builtin (CODE_FOR_sse2_movdqu, arglist);
16990 case IX86_BUILTIN_MONITOR:
16991 arg0 = TREE_VALUE (arglist);
16992 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
16993 arg2 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist)));
16994 op0 = expand_normal (arg0);
16995 op1 = expand_normal (arg1);
16996 op2 = expand_normal (arg2);
16998 op0 = copy_to_mode_reg (Pmode, op0);
17000 op1 = copy_to_mode_reg (SImode, op1);
17002 op2 = copy_to_mode_reg (SImode, op2);
17004 emit_insn (gen_sse3_monitor (op0, op1, op2));
17006 emit_insn (gen_sse3_monitor64 (op0, op1, op2));
17009 case IX86_BUILTIN_MWAIT:
17010 arg0 = TREE_VALUE (arglist);
17011 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
17012 op0 = expand_normal (arg0);
17013 op1 = expand_normal (arg1);
17015 op0 = copy_to_mode_reg (SImode, op0);
17017 op1 = copy_to_mode_reg (SImode, op1);
17018 emit_insn (gen_sse3_mwait (op0, op1));
17021 case IX86_BUILTIN_LDDQU:
17022 return ix86_expand_unop_builtin (CODE_FOR_sse3_lddqu, arglist,
17025 case IX86_BUILTIN_PALIGNR:
17026 case IX86_BUILTIN_PALIGNR128:
17027 if (fcode == IX86_BUILTIN_PALIGNR)
17029 icode = CODE_FOR_ssse3_palignrdi;
17034 icode = CODE_FOR_ssse3_palignrti;
17037 arg0 = TREE_VALUE (arglist);
17038 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
17039 arg2 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist)));
17040 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
17041 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
17042 op2 = expand_expr (arg2, NULL_RTX, VOIDmode, 0);
17043 tmode = insn_data[icode].operand[0].mode;
17044 mode1 = insn_data[icode].operand[1].mode;
17045 mode2 = insn_data[icode].operand[2].mode;
17046 mode3 = insn_data[icode].operand[3].mode;
17048 if (! (*insn_data[icode].operand[1].predicate) (op0, mode1))
17050 op0 = copy_to_reg (op0);
17051 op0 = simplify_gen_subreg (mode1, op0, GET_MODE (op0), 0);
17053 if (! (*insn_data[icode].operand[2].predicate) (op1, mode2))
17055 op1 = copy_to_reg (op1);
17056 op1 = simplify_gen_subreg (mode2, op1, GET_MODE (op1), 0);
17058 if (! (*insn_data[icode].operand[3].predicate) (op2, mode3))
17060 error ("shift must be an immediate");
17063 target = gen_reg_rtx (mode);
17064 pat = GEN_FCN (icode) (simplify_gen_subreg (tmode, target, mode, 0),
17071 case IX86_BUILTIN_MOVNTSD:
17072 return ix86_expand_store_builtin (CODE_FOR_sse4a_vmmovntv2df, arglist);
17074 case IX86_BUILTIN_MOVNTSS:
17075 return ix86_expand_store_builtin (CODE_FOR_sse4a_vmmovntv4sf, arglist);
17077 case IX86_BUILTIN_INSERTQ:
17078 case IX86_BUILTIN_EXTRQ:
17079 icode = (fcode == IX86_BUILTIN_EXTRQ
17080 ? CODE_FOR_sse4a_extrq
17081 : CODE_FOR_sse4a_insertq);
17082 arg0 = TREE_VALUE (arglist);
17083 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
17084 op0 = expand_normal (arg0);
17085 op1 = expand_normal (arg1);
17086 tmode = insn_data[icode].operand[0].mode;
17087 mode1 = insn_data[icode].operand[1].mode;
17088 mode2 = insn_data[icode].operand[2].mode;
17089 if (! (*insn_data[icode].operand[1].predicate) (op0, mode1))
17090 op0 = copy_to_mode_reg (mode1, op0);
17091 if (! (*insn_data[icode].operand[2].predicate) (op1, mode2))
17092 op1 = copy_to_mode_reg (mode2, op1);
17093 if (optimize || target == 0
17094 || GET_MODE (target) != tmode
17095 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
17096 target = gen_reg_rtx (tmode);
17097 pat = GEN_FCN (icode) (target, op0, op1);
17103 case IX86_BUILTIN_EXTRQI:
17104 icode = CODE_FOR_sse4a_extrqi;
17105 arg0 = TREE_VALUE (arglist);
17106 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
17107 arg2 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist)));
17108 op0 = expand_normal (arg0);
17109 op1 = expand_normal (arg1);
17110 op2 = expand_normal (arg2);
17111 tmode = insn_data[icode].operand[0].mode;
17112 mode1 = insn_data[icode].operand[1].mode;
17113 mode2 = insn_data[icode].operand[2].mode;
17114 mode3 = insn_data[icode].operand[3].mode;
17115 if (! (*insn_data[icode].operand[1].predicate) (op0, mode1))
17116 op0 = copy_to_mode_reg (mode1, op0);
17117 if (! (*insn_data[icode].operand[2].predicate) (op1, mode2))
17119 error ("index mask must be an immediate");
17120 return gen_reg_rtx (tmode);
17122 if (! (*insn_data[icode].operand[3].predicate) (op2, mode3))
17124 error ("length mask must be an immediate");
17125 return gen_reg_rtx (tmode);
17127 if (optimize || target == 0
17128 || GET_MODE (target) != tmode
17129 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
17130 target = gen_reg_rtx (tmode);
17131 pat = GEN_FCN (icode) (target, op0, op1, op2);
17137 case IX86_BUILTIN_INSERTQI:
17138 icode = CODE_FOR_sse4a_insertqi;
17139 arg0 = TREE_VALUE (arglist);
17140 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
17141 arg2 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist)));
17142 arg3 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (TREE_CHAIN (arglist))));
17143 op0 = expand_normal (arg0);
17144 op1 = expand_normal (arg1);
17145 op2 = expand_normal (arg2);
17146 op3 = expand_normal (arg3);
17147 tmode = insn_data[icode].operand[0].mode;
17148 mode1 = insn_data[icode].operand[1].mode;
17149 mode2 = insn_data[icode].operand[2].mode;
17150 mode3 = insn_data[icode].operand[3].mode;
17151 mode4 = insn_data[icode].operand[4].mode;
17153 if (! (*insn_data[icode].operand[1].predicate) (op0, mode1))
17154 op0 = copy_to_mode_reg (mode1, op0);
17156 if (! (*insn_data[icode].operand[2].predicate) (op1, mode2))
17157 op1 = copy_to_mode_reg (mode2, op1);
17159 if (! (*insn_data[icode].operand[3].predicate) (op2, mode3))
17161 error ("index mask must be an immediate");
17162 return gen_reg_rtx (tmode);
17164 if (! (*insn_data[icode].operand[4].predicate) (op3, mode4))
17166 error ("length mask must be an immediate");
17167 return gen_reg_rtx (tmode);
17169 if (optimize || target == 0
17170 || GET_MODE (target) != tmode
17171 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
17172 target = gen_reg_rtx (tmode);
17173 pat = GEN_FCN (icode) (target, op0, op1, op2, op3);
17179 case IX86_BUILTIN_VEC_INIT_V2SI:
17180 case IX86_BUILTIN_VEC_INIT_V4HI:
17181 case IX86_BUILTIN_VEC_INIT_V8QI:
17182 return ix86_expand_vec_init_builtin (TREE_TYPE (exp), arglist, target);
17184 case IX86_BUILTIN_VEC_EXT_V2DF:
17185 case IX86_BUILTIN_VEC_EXT_V2DI:
17186 case IX86_BUILTIN_VEC_EXT_V4SF:
17187 case IX86_BUILTIN_VEC_EXT_V4SI:
17188 case IX86_BUILTIN_VEC_EXT_V8HI:
17189 case IX86_BUILTIN_VEC_EXT_V16QI:
17190 case IX86_BUILTIN_VEC_EXT_V2SI:
17191 case IX86_BUILTIN_VEC_EXT_V4HI:
17192 return ix86_expand_vec_ext_builtin (arglist, target);
17194 case IX86_BUILTIN_VEC_SET_V8HI:
17195 case IX86_BUILTIN_VEC_SET_V4HI:
17196 return ix86_expand_vec_set_builtin (arglist);
17202 for (i = 0, d = bdesc_2arg; i < ARRAY_SIZE (bdesc_2arg); i++, d++)
17203 if (d->code == fcode)
17205 /* Compares are treated specially. */
17206 if (d->icode == CODE_FOR_sse_maskcmpv4sf3
17207 || d->icode == CODE_FOR_sse_vmmaskcmpv4sf3
17208 || d->icode == CODE_FOR_sse2_maskcmpv2df3
17209 || d->icode == CODE_FOR_sse2_vmmaskcmpv2df3)
17210 return ix86_expand_sse_compare (d, arglist, target);
17212 return ix86_expand_binop_builtin (d->icode, arglist, target);
17215 for (i = 0, d = bdesc_1arg; i < ARRAY_SIZE (bdesc_1arg); i++, d++)
17216 if (d->code == fcode)
17217 return ix86_expand_unop_builtin (d->icode, arglist, target, 0);
17219 for (i = 0, d = bdesc_comi; i < ARRAY_SIZE (bdesc_comi); i++, d++)
17220 if (d->code == fcode)
17221 return ix86_expand_sse_comi (d, arglist, target);
17223 gcc_unreachable ();
17226 /* Store OPERAND to the memory after reload is completed. This means
17227 that we can't easily use assign_stack_local. */
17229 ix86_force_to_memory (enum machine_mode mode, rtx operand)
17233 gcc_assert (reload_completed);
17234 if (TARGET_RED_ZONE)
17236 result = gen_rtx_MEM (mode,
17237 gen_rtx_PLUS (Pmode,
17239 GEN_INT (-RED_ZONE_SIZE)));
17240 emit_move_insn (result, operand);
17242 else if (!TARGET_RED_ZONE && TARGET_64BIT)
17248 operand = gen_lowpart (DImode, operand);
17252 gen_rtx_SET (VOIDmode,
17253 gen_rtx_MEM (DImode,
17254 gen_rtx_PRE_DEC (DImode,
17255 stack_pointer_rtx)),
17259 gcc_unreachable ();
17261 result = gen_rtx_MEM (mode, stack_pointer_rtx);
17270 split_di (&operand, 1, operands, operands + 1);
17272 gen_rtx_SET (VOIDmode,
17273 gen_rtx_MEM (SImode,
17274 gen_rtx_PRE_DEC (Pmode,
17275 stack_pointer_rtx)),
17278 gen_rtx_SET (VOIDmode,
17279 gen_rtx_MEM (SImode,
17280 gen_rtx_PRE_DEC (Pmode,
17281 stack_pointer_rtx)),
17286 /* Store HImodes as SImodes. */
17287 operand = gen_lowpart (SImode, operand);
17291 gen_rtx_SET (VOIDmode,
17292 gen_rtx_MEM (GET_MODE (operand),
17293 gen_rtx_PRE_DEC (SImode,
17294 stack_pointer_rtx)),
17298 gcc_unreachable ();
17300 result = gen_rtx_MEM (mode, stack_pointer_rtx);
17305 /* Free operand from the memory. */
17307 ix86_free_from_memory (enum machine_mode mode)
17309 if (!TARGET_RED_ZONE)
17313 if (mode == DImode || TARGET_64BIT)
17317 /* Use LEA to deallocate stack space. In peephole2 it will be converted
17318 to pop or add instruction if registers are available. */
17319 emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx,
17320 gen_rtx_PLUS (Pmode, stack_pointer_rtx,
17325 /* Put float CONST_DOUBLE in the constant pool instead of fp regs.
17326 QImode must go into class Q_REGS.
17327 Narrow ALL_REGS to GENERAL_REGS. This supports allowing movsf and
17328 movdf to do mem-to-mem moves through integer regs. */
17330 ix86_preferred_reload_class (rtx x, enum reg_class class)
17332 enum machine_mode mode = GET_MODE (x);
17334 /* We're only allowed to return a subclass of CLASS. Many of the
17335 following checks fail for NO_REGS, so eliminate that early. */
17336 if (class == NO_REGS)
17339 /* All classes can load zeros. */
17340 if (x == CONST0_RTX (mode))
17343 /* Force constants into memory if we are loading a (nonzero) constant into
17344 an MMX or SSE register. This is because there are no MMX/SSE instructions
17345 to load from a constant. */
17347 && (MAYBE_MMX_CLASS_P (class) || MAYBE_SSE_CLASS_P (class)))
17350 /* Prefer SSE regs only, if we can use them for math. */
17351 if (TARGET_SSE_MATH && !TARGET_MIX_SSE_I387 && SSE_FLOAT_MODE_P (mode))
17352 return SSE_CLASS_P (class) ? class : NO_REGS;
17354 /* Floating-point constants need more complex checks. */
17355 if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) != VOIDmode)
17357 /* General regs can load everything. */
17358 if (reg_class_subset_p (class, GENERAL_REGS))
17361 /* Floats can load 0 and 1 plus some others. Note that we eliminated
17362 zero above. We only want to wind up preferring 80387 registers if
17363 we plan on doing computation with them. */
17365 && standard_80387_constant_p (x))
17367 /* Limit class to non-sse. */
17368 if (class == FLOAT_SSE_REGS)
17370 if (class == FP_TOP_SSE_REGS)
17372 if (class == FP_SECOND_SSE_REGS)
17373 return FP_SECOND_REG;
17374 if (class == FLOAT_INT_REGS || class == FLOAT_REGS)
17381 /* Generally when we see PLUS here, it's the function invariant
17382 (plus soft-fp const_int). Which can only be computed into general
17384 if (GET_CODE (x) == PLUS)
17385 return reg_class_subset_p (class, GENERAL_REGS) ? class : NO_REGS;
17387 /* QImode constants are easy to load, but non-constant QImode data
17388 must go into Q_REGS. */
17389 if (GET_MODE (x) == QImode && !CONSTANT_P (x))
17391 if (reg_class_subset_p (class, Q_REGS))
17393 if (reg_class_subset_p (Q_REGS, class))
17401 /* Discourage putting floating-point values in SSE registers unless
17402 SSE math is being used, and likewise for the 387 registers. */
17404 ix86_preferred_output_reload_class (rtx x, enum reg_class class)
17406 enum machine_mode mode = GET_MODE (x);
17408 /* Restrict the output reload class to the register bank that we are doing
17409 math on. If we would like not to return a subset of CLASS, reject this
17410 alternative: if reload cannot do this, it will still use its choice. */
17411 mode = GET_MODE (x);
17412 if (TARGET_SSE_MATH && SSE_FLOAT_MODE_P (mode))
17413 return MAYBE_SSE_CLASS_P (class) ? SSE_REGS : NO_REGS;
17415 if (TARGET_80387 && SCALAR_FLOAT_MODE_P (mode))
17417 if (class == FP_TOP_SSE_REGS)
17419 else if (class == FP_SECOND_SSE_REGS)
17420 return FP_SECOND_REG;
17422 return FLOAT_CLASS_P (class) ? class : NO_REGS;
17428 /* If we are copying between general and FP registers, we need a memory
17429 location. The same is true for SSE and MMX registers.
17431 The macro can't work reliably when one of the CLASSES is class containing
17432 registers from multiple units (SSE, MMX, integer). We avoid this by never
17433 combining those units in single alternative in the machine description.
17434 Ensure that this constraint holds to avoid unexpected surprises.
17436 When STRICT is false, we are being called from REGISTER_MOVE_COST, so do not
17437 enforce these sanity checks. */
17440 ix86_secondary_memory_needed (enum reg_class class1, enum reg_class class2,
17441 enum machine_mode mode, int strict)
17443 if (MAYBE_FLOAT_CLASS_P (class1) != FLOAT_CLASS_P (class1)
17444 || MAYBE_FLOAT_CLASS_P (class2) != FLOAT_CLASS_P (class2)
17445 || MAYBE_SSE_CLASS_P (class1) != SSE_CLASS_P (class1)
17446 || MAYBE_SSE_CLASS_P (class2) != SSE_CLASS_P (class2)
17447 || MAYBE_MMX_CLASS_P (class1) != MMX_CLASS_P (class1)
17448 || MAYBE_MMX_CLASS_P (class2) != MMX_CLASS_P (class2))
17450 gcc_assert (!strict);
17454 if (FLOAT_CLASS_P (class1) != FLOAT_CLASS_P (class2))
17457 /* ??? This is a lie. We do have moves between mmx/general, and for
17458 mmx/sse2. But by saying we need secondary memory we discourage the
17459 register allocator from using the mmx registers unless needed. */
17460 if (MMX_CLASS_P (class1) != MMX_CLASS_P (class2))
17463 if (SSE_CLASS_P (class1) != SSE_CLASS_P (class2))
17465 /* SSE1 doesn't have any direct moves from other classes. */
17469 /* If the target says that inter-unit moves are more expensive
17470 than moving through memory, then don't generate them. */
17471 if (!TARGET_INTER_UNIT_MOVES && !optimize_size)
17474 /* Between SSE and general, we have moves no larger than word size. */
17475 if (GET_MODE_SIZE (mode) > UNITS_PER_WORD)
17478 /* ??? For the cost of one register reformat penalty, we could use
17479 the same instructions to move SFmode and DFmode data, but the
17480 relevant move patterns don't support those alternatives. */
17481 if (mode == SFmode || mode == DFmode)
17488 /* Return true if the registers in CLASS cannot represent the change from
17489 modes FROM to TO. */
17492 ix86_cannot_change_mode_class (enum machine_mode from, enum machine_mode to,
17493 enum reg_class class)
17498 /* x87 registers can't do subreg at all, as all values are reformatted
17499 to extended precision. */
17500 if (MAYBE_FLOAT_CLASS_P (class))
17503 if (MAYBE_SSE_CLASS_P (class) || MAYBE_MMX_CLASS_P (class))
17505 /* Vector registers do not support QI or HImode loads. If we don't
17506 disallow a change to these modes, reload will assume it's ok to
17507 drop the subreg from (subreg:SI (reg:HI 100) 0). This affects
17508 the vec_dupv4hi pattern. */
17509 if (GET_MODE_SIZE (from) < 4)
17512 /* Vector registers do not support subreg with nonzero offsets, which
17513 are otherwise valid for integer registers. Since we can't see
17514 whether we have a nonzero offset from here, prohibit all
17515 nonparadoxical subregs changing size. */
17516 if (GET_MODE_SIZE (to) < GET_MODE_SIZE (from))
17523 /* Return the cost of moving data from a register in class CLASS1 to
17524 one in class CLASS2.
17526 It is not required that the cost always equal 2 when FROM is the same as TO;
17527 on some machines it is expensive to move between registers if they are not
17528 general registers. */
17531 ix86_register_move_cost (enum machine_mode mode, enum reg_class class1,
17532 enum reg_class class2)
17534 /* In case we require secondary memory, compute cost of the store followed
17535 by load. In order to avoid bad register allocation choices, we need
17536 for this to be *at least* as high as the symmetric MEMORY_MOVE_COST. */
17538 if (ix86_secondary_memory_needed (class1, class2, mode, 0))
17542 cost += MAX (MEMORY_MOVE_COST (mode, class1, 0),
17543 MEMORY_MOVE_COST (mode, class1, 1));
17544 cost += MAX (MEMORY_MOVE_COST (mode, class2, 0),
17545 MEMORY_MOVE_COST (mode, class2, 1));
17547 /* In case of copying from general_purpose_register we may emit multiple
17548 stores followed by single load causing memory size mismatch stall.
17549 Count this as arbitrarily high cost of 20. */
17550 if (CLASS_MAX_NREGS (class1, mode) > CLASS_MAX_NREGS (class2, mode))
17553 /* In the case of FP/MMX moves, the registers actually overlap, and we
17554 have to switch modes in order to treat them differently. */
17555 if ((MMX_CLASS_P (class1) && MAYBE_FLOAT_CLASS_P (class2))
17556 || (MMX_CLASS_P (class2) && MAYBE_FLOAT_CLASS_P (class1)))
17562 /* Moves between SSE/MMX and integer unit are expensive. */
17563 if (MMX_CLASS_P (class1) != MMX_CLASS_P (class2)
17564 || SSE_CLASS_P (class1) != SSE_CLASS_P (class2))
17565 return ix86_cost->mmxsse_to_integer;
17566 if (MAYBE_FLOAT_CLASS_P (class1))
17567 return ix86_cost->fp_move;
17568 if (MAYBE_SSE_CLASS_P (class1))
17569 return ix86_cost->sse_move;
17570 if (MAYBE_MMX_CLASS_P (class1))
17571 return ix86_cost->mmx_move;
17575 /* Return 1 if hard register REGNO can hold a value of machine-mode MODE. */
17578 ix86_hard_regno_mode_ok (int regno, enum machine_mode mode)
17580 /* Flags and only flags can only hold CCmode values. */
17581 if (CC_REGNO_P (regno))
17582 return GET_MODE_CLASS (mode) == MODE_CC;
17583 if (GET_MODE_CLASS (mode) == MODE_CC
17584 || GET_MODE_CLASS (mode) == MODE_RANDOM
17585 || GET_MODE_CLASS (mode) == MODE_PARTIAL_INT)
17587 if (FP_REGNO_P (regno))
17588 return VALID_FP_MODE_P (mode);
17589 if (SSE_REGNO_P (regno))
17591 /* We implement the move patterns for all vector modes into and
17592 out of SSE registers, even when no operation instructions
17594 return (VALID_SSE_REG_MODE (mode)
17595 || VALID_SSE2_REG_MODE (mode)
17596 || VALID_MMX_REG_MODE (mode)
17597 || VALID_MMX_REG_MODE_3DNOW (mode));
17599 if (MMX_REGNO_P (regno))
17601 /* We implement the move patterns for 3DNOW modes even in MMX mode,
17602 so if the register is available at all, then we can move data of
17603 the given mode into or out of it. */
17604 return (VALID_MMX_REG_MODE (mode)
17605 || VALID_MMX_REG_MODE_3DNOW (mode));
17608 if (mode == QImode)
17610 /* Take care for QImode values - they can be in non-QI regs,
17611 but then they do cause partial register stalls. */
17612 if (regno < 4 || TARGET_64BIT)
17614 if (!TARGET_PARTIAL_REG_STALL)
17616 return reload_in_progress || reload_completed;
17618 /* We handle both integer and floats in the general purpose registers. */
17619 else if (VALID_INT_MODE_P (mode))
17621 else if (VALID_FP_MODE_P (mode))
17623 /* Lots of MMX code casts 8 byte vector modes to DImode. If we then go
17624 on to use that value in smaller contexts, this can easily force a
17625 pseudo to be allocated to GENERAL_REGS. Since this is no worse than
17626 supporting DImode, allow it. */
17627 else if (VALID_MMX_REG_MODE_3DNOW (mode) || VALID_MMX_REG_MODE (mode))
17633 /* A subroutine of ix86_modes_tieable_p. Return true if MODE is a
17634 tieable integer mode. */
17637 ix86_tieable_integer_mode_p (enum machine_mode mode)
17646 return TARGET_64BIT || !TARGET_PARTIAL_REG_STALL;
17649 return TARGET_64BIT;
17656 /* Return true if MODE1 is accessible in a register that can hold MODE2
17657 without copying. That is, all register classes that can hold MODE2
17658 can also hold MODE1. */
17661 ix86_modes_tieable_p (enum machine_mode mode1, enum machine_mode mode2)
17663 if (mode1 == mode2)
17666 if (ix86_tieable_integer_mode_p (mode1)
17667 && ix86_tieable_integer_mode_p (mode2))
17670 /* MODE2 being XFmode implies fp stack or general regs, which means we
17671 can tie any smaller floating point modes to it. Note that we do not
17672 tie this with TFmode. */
17673 if (mode2 == XFmode)
17674 return mode1 == SFmode || mode1 == DFmode;
17676 /* MODE2 being DFmode implies fp stack, general or sse regs, which means
17677 that we can tie it with SFmode. */
17678 if (mode2 == DFmode)
17679 return mode1 == SFmode;
17681 /* If MODE2 is only appropriate for an SSE register, then tie with
17682 any other mode acceptable to SSE registers. */
17683 if (GET_MODE_SIZE (mode2) >= 8
17684 && ix86_hard_regno_mode_ok (FIRST_SSE_REG, mode2))
17685 return ix86_hard_regno_mode_ok (FIRST_SSE_REG, mode1);
17687 /* If MODE2 is appropriate for an MMX (or SSE) register, then tie
17688 with any other mode acceptable to MMX registers. */
17689 if (GET_MODE_SIZE (mode2) == 8
17690 && ix86_hard_regno_mode_ok (FIRST_MMX_REG, mode2))
17691 return ix86_hard_regno_mode_ok (FIRST_MMX_REG, mode1);
17696 /* Return the cost of moving data of mode M between a
17697 register and memory. A value of 2 is the default; this cost is
17698 relative to those in `REGISTER_MOVE_COST'.
17700 If moving between registers and memory is more expensive than
17701 between two registers, you should define this macro to express the
17704 Model also increased moving costs of QImode registers in non
17708 ix86_memory_move_cost (enum machine_mode mode, enum reg_class class, int in)
17710 if (FLOAT_CLASS_P (class))
17727 return in ? ix86_cost->fp_load [index] : ix86_cost->fp_store [index];
17729 if (SSE_CLASS_P (class))
17732 switch (GET_MODE_SIZE (mode))
17746 return in ? ix86_cost->sse_load [index] : ix86_cost->sse_store [index];
17748 if (MMX_CLASS_P (class))
17751 switch (GET_MODE_SIZE (mode))
17762 return in ? ix86_cost->mmx_load [index] : ix86_cost->mmx_store [index];
17764 switch (GET_MODE_SIZE (mode))
17768 return (Q_CLASS_P (class) ? ix86_cost->int_load[0]
17769 : ix86_cost->movzbl_load);
17771 return (Q_CLASS_P (class) ? ix86_cost->int_store[0]
17772 : ix86_cost->int_store[0] + 4);
17775 return in ? ix86_cost->int_load[1] : ix86_cost->int_store[1];
17777 /* Compute number of 32bit moves needed. TFmode is moved as XFmode. */
17778 if (mode == TFmode)
17780 return ((in ? ix86_cost->int_load[2] : ix86_cost->int_store[2])
17781 * (((int) GET_MODE_SIZE (mode)
17782 + UNITS_PER_WORD - 1) / UNITS_PER_WORD));
17786 /* Compute a (partial) cost for rtx X. Return true if the complete
17787 cost has been computed, and false if subexpressions should be
17788 scanned. In either case, *TOTAL contains the cost result. */
17791 ix86_rtx_costs (rtx x, int code, int outer_code, int *total)
17793 enum machine_mode mode = GET_MODE (x);
17801 if (TARGET_64BIT && !x86_64_immediate_operand (x, VOIDmode))
17803 else if (TARGET_64BIT && !x86_64_zext_immediate_operand (x, VOIDmode))
17805 else if (flag_pic && SYMBOLIC_CONST (x)
17807 || (!GET_CODE (x) != LABEL_REF
17808 && (GET_CODE (x) != SYMBOL_REF
17809 || !SYMBOL_REF_LOCAL_P (x)))))
17816 if (mode == VOIDmode)
17819 switch (standard_80387_constant_p (x))
17824 default: /* Other constants */
17829 /* Start with (MEM (SYMBOL_REF)), since that's where
17830 it'll probably end up. Add a penalty for size. */
17831 *total = (COSTS_N_INSNS (1)
17832 + (flag_pic != 0 && !TARGET_64BIT)
17833 + (mode == SFmode ? 0 : mode == DFmode ? 1 : 2));
17839 /* The zero extensions is often completely free on x86_64, so make
17840 it as cheap as possible. */
17841 if (TARGET_64BIT && mode == DImode
17842 && GET_MODE (XEXP (x, 0)) == SImode)
17844 else if (TARGET_ZERO_EXTEND_WITH_AND)
17845 *total = ix86_cost->add;
17847 *total = ix86_cost->movzx;
17851 *total = ix86_cost->movsx;
17855 if (GET_CODE (XEXP (x, 1)) == CONST_INT
17856 && (GET_MODE (XEXP (x, 0)) != DImode || TARGET_64BIT))
17858 HOST_WIDE_INT value = INTVAL (XEXP (x, 1));
17861 *total = ix86_cost->add;
17864 if ((value == 2 || value == 3)
17865 && ix86_cost->lea <= ix86_cost->shift_const)
17867 *total = ix86_cost->lea;
17877 if (!TARGET_64BIT && GET_MODE (XEXP (x, 0)) == DImode)
17879 if (GET_CODE (XEXP (x, 1)) == CONST_INT)
17881 if (INTVAL (XEXP (x, 1)) > 32)
17882 *total = ix86_cost->shift_const + COSTS_N_INSNS (2);
17884 *total = ix86_cost->shift_const * 2;
17888 if (GET_CODE (XEXP (x, 1)) == AND)
17889 *total = ix86_cost->shift_var * 2;
17891 *total = ix86_cost->shift_var * 6 + COSTS_N_INSNS (2);
17896 if (GET_CODE (XEXP (x, 1)) == CONST_INT)
17897 *total = ix86_cost->shift_const;
17899 *total = ix86_cost->shift_var;
17904 if (FLOAT_MODE_P (mode))
17906 *total = ix86_cost->fmul;
17911 rtx op0 = XEXP (x, 0);
17912 rtx op1 = XEXP (x, 1);
17914 if (GET_CODE (XEXP (x, 1)) == CONST_INT)
17916 unsigned HOST_WIDE_INT value = INTVAL (XEXP (x, 1));
17917 for (nbits = 0; value != 0; value &= value - 1)
17921 /* This is arbitrary. */
17924 /* Compute costs correctly for widening multiplication. */
17925 if ((GET_CODE (op0) == SIGN_EXTEND || GET_CODE (op1) == ZERO_EXTEND)
17926 && GET_MODE_SIZE (GET_MODE (XEXP (op0, 0))) * 2
17927 == GET_MODE_SIZE (mode))
17929 int is_mulwiden = 0;
17930 enum machine_mode inner_mode = GET_MODE (op0);
17932 if (GET_CODE (op0) == GET_CODE (op1))
17933 is_mulwiden = 1, op1 = XEXP (op1, 0);
17934 else if (GET_CODE (op1) == CONST_INT)
17936 if (GET_CODE (op0) == SIGN_EXTEND)
17937 is_mulwiden = trunc_int_for_mode (INTVAL (op1), inner_mode)
17940 is_mulwiden = !(INTVAL (op1) & ~GET_MODE_MASK (inner_mode));
17944 op0 = XEXP (op0, 0), mode = GET_MODE (op0);
17947 *total = (ix86_cost->mult_init[MODE_INDEX (mode)]
17948 + nbits * ix86_cost->mult_bit
17949 + rtx_cost (op0, outer_code) + rtx_cost (op1, outer_code));
17958 if (FLOAT_MODE_P (mode))
17959 *total = ix86_cost->fdiv;
17961 *total = ix86_cost->divide[MODE_INDEX (mode)];
17965 if (FLOAT_MODE_P (mode))
17966 *total = ix86_cost->fadd;
17967 else if (GET_MODE_CLASS (mode) == MODE_INT
17968 && GET_MODE_BITSIZE (mode) <= GET_MODE_BITSIZE (Pmode))
17970 if (GET_CODE (XEXP (x, 0)) == PLUS
17971 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
17972 && GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 1)) == CONST_INT
17973 && CONSTANT_P (XEXP (x, 1)))
17975 HOST_WIDE_INT val = INTVAL (XEXP (XEXP (XEXP (x, 0), 0), 1));
17976 if (val == 2 || val == 4 || val == 8)
17978 *total = ix86_cost->lea;
17979 *total += rtx_cost (XEXP (XEXP (x, 0), 1), outer_code);
17980 *total += rtx_cost (XEXP (XEXP (XEXP (x, 0), 0), 0),
17982 *total += rtx_cost (XEXP (x, 1), outer_code);
17986 else if (GET_CODE (XEXP (x, 0)) == MULT
17987 && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT)
17989 HOST_WIDE_INT val = INTVAL (XEXP (XEXP (x, 0), 1));
17990 if (val == 2 || val == 4 || val == 8)
17992 *total = ix86_cost->lea;
17993 *total += rtx_cost (XEXP (XEXP (x, 0), 0), outer_code);
17994 *total += rtx_cost (XEXP (x, 1), outer_code);
17998 else if (GET_CODE (XEXP (x, 0)) == PLUS)
18000 *total = ix86_cost->lea;
18001 *total += rtx_cost (XEXP (XEXP (x, 0), 0), outer_code);
18002 *total += rtx_cost (XEXP (XEXP (x, 0), 1), outer_code);
18003 *total += rtx_cost (XEXP (x, 1), outer_code);
18010 if (FLOAT_MODE_P (mode))
18012 *total = ix86_cost->fadd;
18020 if (!TARGET_64BIT && mode == DImode)
18022 *total = (ix86_cost->add * 2
18023 + (rtx_cost (XEXP (x, 0), outer_code)
18024 << (GET_MODE (XEXP (x, 0)) != DImode))
18025 + (rtx_cost (XEXP (x, 1), outer_code)
18026 << (GET_MODE (XEXP (x, 1)) != DImode)));
18032 if (FLOAT_MODE_P (mode))
18034 *total = ix86_cost->fchs;
18040 if (!TARGET_64BIT && mode == DImode)
18041 *total = ix86_cost->add * 2;
18043 *total = ix86_cost->add;
18047 if (GET_CODE (XEXP (x, 0)) == ZERO_EXTRACT
18048 && XEXP (XEXP (x, 0), 1) == const1_rtx
18049 && GET_CODE (XEXP (XEXP (x, 0), 2)) == CONST_INT
18050 && XEXP (x, 1) == const0_rtx)
18052 /* This kind of construct is implemented using test[bwl].
18053 Treat it as if we had an AND. */
18054 *total = (ix86_cost->add
18055 + rtx_cost (XEXP (XEXP (x, 0), 0), outer_code)
18056 + rtx_cost (const1_rtx, outer_code));
18062 if (!TARGET_SSE_MATH
18064 || (mode == DFmode && !TARGET_SSE2))
18065 /* For standard 80387 constants, raise the cost to prevent
18066 compress_float_constant() to generate load from memory. */
18067 switch (standard_80387_constant_p (XEXP (x, 0)))
18077 *total = (x86_ext_80387_constants & TUNEMASK
18084 if (FLOAT_MODE_P (mode))
18085 *total = ix86_cost->fabs;
18089 if (FLOAT_MODE_P (mode))
18090 *total = ix86_cost->fsqrt;
18094 if (XINT (x, 1) == UNSPEC_TP)
18105 static int current_machopic_label_num;
18107 /* Given a symbol name and its associated stub, write out the
18108 definition of the stub. */
18111 machopic_output_stub (FILE *file, const char *symb, const char *stub)
18113 unsigned int length;
18114 char *binder_name, *symbol_name, lazy_ptr_name[32];
18115 int label = ++current_machopic_label_num;
18117 /* For 64-bit we shouldn't get here. */
18118 gcc_assert (!TARGET_64BIT);
18120 /* Lose our funky encoding stuff so it doesn't contaminate the stub. */
18121 symb = (*targetm.strip_name_encoding) (symb);
18123 length = strlen (stub);
18124 binder_name = alloca (length + 32);
18125 GEN_BINDER_NAME_FOR_STUB (binder_name, stub, length);
18127 length = strlen (symb);
18128 symbol_name = alloca (length + 32);
18129 GEN_SYMBOL_NAME_FOR_SYMBOL (symbol_name, symb, length);
18131 sprintf (lazy_ptr_name, "L%d$lz", label);
18134 switch_to_section (darwin_sections[machopic_picsymbol_stub_section]);
18136 switch_to_section (darwin_sections[machopic_symbol_stub_section]);
18138 fprintf (file, "%s:\n", stub);
18139 fprintf (file, "\t.indirect_symbol %s\n", symbol_name);
18143 fprintf (file, "\tcall\tLPC$%d\nLPC$%d:\tpopl\t%%eax\n", label, label);
18144 fprintf (file, "\tmovl\t%s-LPC$%d(%%eax),%%edx\n", lazy_ptr_name, label);
18145 fprintf (file, "\tjmp\t*%%edx\n");
18148 fprintf (file, "\tjmp\t*%s\n", lazy_ptr_name);
18150 fprintf (file, "%s:\n", binder_name);
18154 fprintf (file, "\tlea\t%s-LPC$%d(%%eax),%%eax\n", lazy_ptr_name, label);
18155 fprintf (file, "\tpushl\t%%eax\n");
18158 fprintf (file, "\tpushl\t$%s\n", lazy_ptr_name);
18160 fprintf (file, "\tjmp\tdyld_stub_binding_helper\n");
18162 switch_to_section (darwin_sections[machopic_lazy_symbol_ptr_section]);
18163 fprintf (file, "%s:\n", lazy_ptr_name);
18164 fprintf (file, "\t.indirect_symbol %s\n", symbol_name);
18165 fprintf (file, "\t.long %s\n", binder_name);
18169 darwin_x86_file_end (void)
18171 darwin_file_end ();
18174 #endif /* TARGET_MACHO */
18176 /* Order the registers for register allocator. */
18179 x86_order_regs_for_local_alloc (void)
18184 /* First allocate the local general purpose registers. */
18185 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
18186 if (GENERAL_REGNO_P (i) && call_used_regs[i])
18187 reg_alloc_order [pos++] = i;
18189 /* Global general purpose registers. */
18190 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
18191 if (GENERAL_REGNO_P (i) && !call_used_regs[i])
18192 reg_alloc_order [pos++] = i;
18194 /* x87 registers come first in case we are doing FP math
18196 if (!TARGET_SSE_MATH)
18197 for (i = FIRST_STACK_REG; i <= LAST_STACK_REG; i++)
18198 reg_alloc_order [pos++] = i;
18200 /* SSE registers. */
18201 for (i = FIRST_SSE_REG; i <= LAST_SSE_REG; i++)
18202 reg_alloc_order [pos++] = i;
18203 for (i = FIRST_REX_SSE_REG; i <= LAST_REX_SSE_REG; i++)
18204 reg_alloc_order [pos++] = i;
18206 /* x87 registers. */
18207 if (TARGET_SSE_MATH)
18208 for (i = FIRST_STACK_REG; i <= LAST_STACK_REG; i++)
18209 reg_alloc_order [pos++] = i;
18211 for (i = FIRST_MMX_REG; i <= LAST_MMX_REG; i++)
18212 reg_alloc_order [pos++] = i;
18214 /* Initialize the rest of array as we do not allocate some registers
18216 while (pos < FIRST_PSEUDO_REGISTER)
18217 reg_alloc_order [pos++] = 0;
18220 /* Handle a "ms_struct" or "gcc_struct" attribute; arguments as in
18221 struct attribute_spec.handler. */
18223 ix86_handle_struct_attribute (tree *node, tree name,
18224 tree args ATTRIBUTE_UNUSED,
18225 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
18228 if (DECL_P (*node))
18230 if (TREE_CODE (*node) == TYPE_DECL)
18231 type = &TREE_TYPE (*node);
18236 if (!(type && (TREE_CODE (*type) == RECORD_TYPE
18237 || TREE_CODE (*type) == UNION_TYPE)))
18239 warning (OPT_Wattributes, "%qs attribute ignored",
18240 IDENTIFIER_POINTER (name));
18241 *no_add_attrs = true;
18244 else if ((is_attribute_p ("ms_struct", name)
18245 && lookup_attribute ("gcc_struct", TYPE_ATTRIBUTES (*type)))
18246 || ((is_attribute_p ("gcc_struct", name)
18247 && lookup_attribute ("ms_struct", TYPE_ATTRIBUTES (*type)))))
18249 warning (OPT_Wattributes, "%qs incompatible attribute ignored",
18250 IDENTIFIER_POINTER (name));
18251 *no_add_attrs = true;
18258 ix86_ms_bitfield_layout_p (tree record_type)
18260 return (TARGET_MS_BITFIELD_LAYOUT &&
18261 !lookup_attribute ("gcc_struct", TYPE_ATTRIBUTES (record_type)))
18262 || lookup_attribute ("ms_struct", TYPE_ATTRIBUTES (record_type));
18265 /* Returns an expression indicating where the this parameter is
18266 located on entry to the FUNCTION. */
18269 x86_this_parameter (tree function)
18271 tree type = TREE_TYPE (function);
18275 int n = aggregate_value_p (TREE_TYPE (type), type) != 0;
18276 return gen_rtx_REG (DImode, x86_64_int_parameter_registers[n]);
18279 if (ix86_function_regparm (type, function) > 0)
18283 parm = TYPE_ARG_TYPES (type);
18284 /* Figure out whether or not the function has a variable number of
18286 for (; parm; parm = TREE_CHAIN (parm))
18287 if (TREE_VALUE (parm) == void_type_node)
18289 /* If not, the this parameter is in the first argument. */
18293 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type)))
18295 return gen_rtx_REG (SImode, regno);
18299 if (aggregate_value_p (TREE_TYPE (type), type))
18300 return gen_rtx_MEM (SImode, plus_constant (stack_pointer_rtx, 8));
18302 return gen_rtx_MEM (SImode, plus_constant (stack_pointer_rtx, 4));
18305 /* Determine whether x86_output_mi_thunk can succeed. */
18308 x86_can_output_mi_thunk (tree thunk ATTRIBUTE_UNUSED,
18309 HOST_WIDE_INT delta ATTRIBUTE_UNUSED,
18310 HOST_WIDE_INT vcall_offset, tree function)
18312 /* 64-bit can handle anything. */
18316 /* For 32-bit, everything's fine if we have one free register. */
18317 if (ix86_function_regparm (TREE_TYPE (function), function) < 3)
18320 /* Need a free register for vcall_offset. */
18324 /* Need a free register for GOT references. */
18325 if (flag_pic && !(*targetm.binds_local_p) (function))
18328 /* Otherwise ok. */
18332 /* Output the assembler code for a thunk function. THUNK_DECL is the
18333 declaration for the thunk function itself, FUNCTION is the decl for
18334 the target function. DELTA is an immediate constant offset to be
18335 added to THIS. If VCALL_OFFSET is nonzero, the word at
18336 *(*this + vcall_offset) should be added to THIS. */
18339 x86_output_mi_thunk (FILE *file ATTRIBUTE_UNUSED,
18340 tree thunk ATTRIBUTE_UNUSED, HOST_WIDE_INT delta,
18341 HOST_WIDE_INT vcall_offset, tree function)
18344 rtx this = x86_this_parameter (function);
18347 /* If VCALL_OFFSET, we'll need THIS in a register. Might as well
18348 pull it in now and let DELTA benefit. */
18351 else if (vcall_offset)
18353 /* Put the this parameter into %eax. */
18355 xops[1] = this_reg = gen_rtx_REG (Pmode, 0);
18356 output_asm_insn ("mov{l}\t{%0, %1|%1, %0}", xops);
18359 this_reg = NULL_RTX;
18361 /* Adjust the this parameter by a fixed constant. */
18364 xops[0] = GEN_INT (delta);
18365 xops[1] = this_reg ? this_reg : this;
18368 if (!x86_64_general_operand (xops[0], DImode))
18370 tmp = gen_rtx_REG (DImode, FIRST_REX_INT_REG + 2 /* R10 */);
18372 output_asm_insn ("mov{q}\t{%1, %0|%0, %1}", xops);
18376 output_asm_insn ("add{q}\t{%0, %1|%1, %0}", xops);
18379 output_asm_insn ("add{l}\t{%0, %1|%1, %0}", xops);
18382 /* Adjust the this parameter by a value stored in the vtable. */
18386 tmp = gen_rtx_REG (DImode, FIRST_REX_INT_REG + 2 /* R10 */);
18389 int tmp_regno = 2 /* ECX */;
18390 if (lookup_attribute ("fastcall",
18391 TYPE_ATTRIBUTES (TREE_TYPE (function))))
18392 tmp_regno = 0 /* EAX */;
18393 tmp = gen_rtx_REG (SImode, tmp_regno);
18396 xops[0] = gen_rtx_MEM (Pmode, this_reg);
18399 output_asm_insn ("mov{q}\t{%0, %1|%1, %0}", xops);
18401 output_asm_insn ("mov{l}\t{%0, %1|%1, %0}", xops);
18403 /* Adjust the this parameter. */
18404 xops[0] = gen_rtx_MEM (Pmode, plus_constant (tmp, vcall_offset));
18405 if (TARGET_64BIT && !memory_operand (xops[0], Pmode))
18407 rtx tmp2 = gen_rtx_REG (DImode, FIRST_REX_INT_REG + 3 /* R11 */);
18408 xops[0] = GEN_INT (vcall_offset);
18410 output_asm_insn ("mov{q}\t{%0, %1|%1, %0}", xops);
18411 xops[0] = gen_rtx_MEM (Pmode, gen_rtx_PLUS (Pmode, tmp, tmp2));
18413 xops[1] = this_reg;
18415 output_asm_insn ("add{q}\t{%0, %1|%1, %0}", xops);
18417 output_asm_insn ("add{l}\t{%0, %1|%1, %0}", xops);
18420 /* If necessary, drop THIS back to its stack slot. */
18421 if (this_reg && this_reg != this)
18423 xops[0] = this_reg;
18425 output_asm_insn ("mov{l}\t{%0, %1|%1, %0}", xops);
18428 xops[0] = XEXP (DECL_RTL (function), 0);
18431 if (!flag_pic || (*targetm.binds_local_p) (function))
18432 output_asm_insn ("jmp\t%P0", xops);
18435 tmp = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, xops[0]), UNSPEC_GOTPCREL);
18436 tmp = gen_rtx_CONST (Pmode, tmp);
18437 tmp = gen_rtx_MEM (QImode, tmp);
18439 output_asm_insn ("jmp\t%A0", xops);
18444 if (!flag_pic || (*targetm.binds_local_p) (function))
18445 output_asm_insn ("jmp\t%P0", xops);
18450 rtx sym_ref = XEXP (DECL_RTL (function), 0);
18451 tmp = (gen_rtx_SYMBOL_REF
18453 machopic_indirection_name (sym_ref, /*stub_p=*/true)));
18454 tmp = gen_rtx_MEM (QImode, tmp);
18456 output_asm_insn ("jmp\t%0", xops);
18459 #endif /* TARGET_MACHO */
18461 tmp = gen_rtx_REG (SImode, 2 /* ECX */);
18462 output_set_got (tmp, NULL_RTX);
18465 output_asm_insn ("mov{l}\t{%0@GOT(%1), %1|%1, %0@GOT[%1]}", xops);
18466 output_asm_insn ("jmp\t{*}%1", xops);
18472 x86_file_start (void)
18474 default_file_start ();
18476 darwin_file_start ();
18478 if (X86_FILE_START_VERSION_DIRECTIVE)
18479 fputs ("\t.version\t\"01.01\"\n", asm_out_file);
18480 if (X86_FILE_START_FLTUSED)
18481 fputs ("\t.global\t__fltused\n", asm_out_file);
18482 if (ix86_asm_dialect == ASM_INTEL)
18483 fputs ("\t.intel_syntax\n", asm_out_file);
18487 x86_field_alignment (tree field, int computed)
18489 enum machine_mode mode;
18490 tree type = TREE_TYPE (field);
18492 if (TARGET_64BIT || TARGET_ALIGN_DOUBLE)
18494 mode = TYPE_MODE (TREE_CODE (type) == ARRAY_TYPE
18495 ? get_inner_array_type (type) : type);
18496 if (mode == DFmode || mode == DCmode
18497 || GET_MODE_CLASS (mode) == MODE_INT
18498 || GET_MODE_CLASS (mode) == MODE_COMPLEX_INT)
18499 return MIN (32, computed);
18503 /* Output assembler code to FILE to increment profiler label # LABELNO
18504 for profiling a function entry. */
18506 x86_function_profiler (FILE *file, int labelno ATTRIBUTE_UNUSED)
18511 #ifndef NO_PROFILE_COUNTERS
18512 fprintf (file, "\tleaq\t%sP%d@(%%rip),%%r11\n", LPREFIX, labelno);
18514 fprintf (file, "\tcall\t*%s@GOTPCREL(%%rip)\n", MCOUNT_NAME);
18518 #ifndef NO_PROFILE_COUNTERS
18519 fprintf (file, "\tmovq\t$%sP%d,%%r11\n", LPREFIX, labelno);
18521 fprintf (file, "\tcall\t%s\n", MCOUNT_NAME);
18525 #ifndef NO_PROFILE_COUNTERS
18526 fprintf (file, "\tleal\t%sP%d@GOTOFF(%%ebx),%%%s\n",
18527 LPREFIX, labelno, PROFILE_COUNT_REGISTER);
18529 fprintf (file, "\tcall\t*%s@GOT(%%ebx)\n", MCOUNT_NAME);
18533 #ifndef NO_PROFILE_COUNTERS
18534 fprintf (file, "\tmovl\t$%sP%d,%%%s\n", LPREFIX, labelno,
18535 PROFILE_COUNT_REGISTER);
18537 fprintf (file, "\tcall\t%s\n", MCOUNT_NAME);
18541 /* We don't have exact information about the insn sizes, but we may assume
18542 quite safely that we are informed about all 1 byte insns and memory
18543 address sizes. This is enough to eliminate unnecessary padding in
18547 min_insn_size (rtx insn)
18551 if (!INSN_P (insn) || !active_insn_p (insn))
18554 /* Discard alignments we've emit and jump instructions. */
18555 if (GET_CODE (PATTERN (insn)) == UNSPEC_VOLATILE
18556 && XINT (PATTERN (insn), 1) == UNSPECV_ALIGN)
18558 if (GET_CODE (insn) == JUMP_INSN
18559 && (GET_CODE (PATTERN (insn)) == ADDR_VEC
18560 || GET_CODE (PATTERN (insn)) == ADDR_DIFF_VEC))
18563 /* Important case - calls are always 5 bytes.
18564 It is common to have many calls in the row. */
18565 if (GET_CODE (insn) == CALL_INSN
18566 && symbolic_reference_mentioned_p (PATTERN (insn))
18567 && !SIBLING_CALL_P (insn))
18569 if (get_attr_length (insn) <= 1)
18572 /* For normal instructions we may rely on the sizes of addresses
18573 and the presence of symbol to require 4 bytes of encoding.
18574 This is not the case for jumps where references are PC relative. */
18575 if (GET_CODE (insn) != JUMP_INSN)
18577 l = get_attr_length_address (insn);
18578 if (l < 4 && symbolic_reference_mentioned_p (PATTERN (insn)))
18587 /* AMD K8 core mispredicts jumps when there are more than 3 jumps in 16 byte
18591 ix86_avoid_jump_misspredicts (void)
18593 rtx insn, start = get_insns ();
18594 int nbytes = 0, njumps = 0;
18597 /* Look for all minimal intervals of instructions containing 4 jumps.
18598 The intervals are bounded by START and INSN. NBYTES is the total
18599 size of instructions in the interval including INSN and not including
18600 START. When the NBYTES is smaller than 16 bytes, it is possible
18601 that the end of START and INSN ends up in the same 16byte page.
18603 The smallest offset in the page INSN can start is the case where START
18604 ends on the offset 0. Offset of INSN is then NBYTES - sizeof (INSN).
18605 We add p2align to 16byte window with maxskip 17 - NBYTES + sizeof (INSN).
18607 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
18610 nbytes += min_insn_size (insn);
18612 fprintf(dump_file, "Insn %i estimated to %i bytes\n",
18613 INSN_UID (insn), min_insn_size (insn));
18614 if ((GET_CODE (insn) == JUMP_INSN
18615 && GET_CODE (PATTERN (insn)) != ADDR_VEC
18616 && GET_CODE (PATTERN (insn)) != ADDR_DIFF_VEC)
18617 || GET_CODE (insn) == CALL_INSN)
18624 start = NEXT_INSN (start);
18625 if ((GET_CODE (start) == JUMP_INSN
18626 && GET_CODE (PATTERN (start)) != ADDR_VEC
18627 && GET_CODE (PATTERN (start)) != ADDR_DIFF_VEC)
18628 || GET_CODE (start) == CALL_INSN)
18629 njumps--, isjump = 1;
18632 nbytes -= min_insn_size (start);
18634 gcc_assert (njumps >= 0);
18636 fprintf (dump_file, "Interval %i to %i has %i bytes\n",
18637 INSN_UID (start), INSN_UID (insn), nbytes);
18639 if (njumps == 3 && isjump && nbytes < 16)
18641 int padsize = 15 - nbytes + min_insn_size (insn);
18644 fprintf (dump_file, "Padding insn %i by %i bytes!\n",
18645 INSN_UID (insn), padsize);
18646 emit_insn_before (gen_align (GEN_INT (padsize)), insn);
18651 /* AMD Athlon works faster
18652 when RET is not destination of conditional jump or directly preceded
18653 by other jump instruction. We avoid the penalty by inserting NOP just
18654 before the RET instructions in such cases. */
18656 ix86_pad_returns (void)
18661 FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR->preds)
18663 basic_block bb = e->src;
18664 rtx ret = BB_END (bb);
18666 bool replace = false;
18668 if (GET_CODE (ret) != JUMP_INSN || GET_CODE (PATTERN (ret)) != RETURN
18669 || !maybe_hot_bb_p (bb))
18671 for (prev = PREV_INSN (ret); prev; prev = PREV_INSN (prev))
18672 if (active_insn_p (prev) || GET_CODE (prev) == CODE_LABEL)
18674 if (prev && GET_CODE (prev) == CODE_LABEL)
18679 FOR_EACH_EDGE (e, ei, bb->preds)
18680 if (EDGE_FREQUENCY (e) && e->src->index >= 0
18681 && !(e->flags & EDGE_FALLTHRU))
18686 prev = prev_active_insn (ret);
18688 && ((GET_CODE (prev) == JUMP_INSN && any_condjump_p (prev))
18689 || GET_CODE (prev) == CALL_INSN))
18691 /* Empty functions get branch mispredict even when the jump destination
18692 is not visible to us. */
18693 if (!prev && cfun->function_frequency > FUNCTION_FREQUENCY_UNLIKELY_EXECUTED)
18698 emit_insn_before (gen_return_internal_long (), ret);
18704 /* Implement machine specific optimizations. We implement padding of returns
18705 for K8 CPUs and pass to avoid 4 jumps in the single 16 byte window. */
18709 if (TARGET_PAD_RETURNS && optimize && !optimize_size)
18710 ix86_pad_returns ();
18711 if (TARGET_FOUR_JUMP_LIMIT && optimize && !optimize_size)
18712 ix86_avoid_jump_misspredicts ();
18715 /* Return nonzero when QImode register that must be represented via REX prefix
18718 x86_extended_QIreg_mentioned_p (rtx insn)
18721 extract_insn_cached (insn);
18722 for (i = 0; i < recog_data.n_operands; i++)
18723 if (REG_P (recog_data.operand[i])
18724 && REGNO (recog_data.operand[i]) >= 4)
18729 /* Return nonzero when P points to register encoded via REX prefix.
18730 Called via for_each_rtx. */
18732 extended_reg_mentioned_1 (rtx *p, void *data ATTRIBUTE_UNUSED)
18734 unsigned int regno;
18737 regno = REGNO (*p);
18738 return REX_INT_REGNO_P (regno) || REX_SSE_REGNO_P (regno);
18741 /* Return true when INSN mentions register that must be encoded using REX
18744 x86_extended_reg_mentioned_p (rtx insn)
18746 return for_each_rtx (&PATTERN (insn), extended_reg_mentioned_1, NULL);
18749 /* Generate an unsigned DImode/SImode to FP conversion. This is the same code
18750 optabs would emit if we didn't have TFmode patterns. */
18753 x86_emit_floatuns (rtx operands[2])
18755 rtx neglab, donelab, i0, i1, f0, in, out;
18756 enum machine_mode mode, inmode;
18758 inmode = GET_MODE (operands[1]);
18759 gcc_assert (inmode == SImode || inmode == DImode);
18762 in = force_reg (inmode, operands[1]);
18763 mode = GET_MODE (out);
18764 neglab = gen_label_rtx ();
18765 donelab = gen_label_rtx ();
18766 i1 = gen_reg_rtx (Pmode);
18767 f0 = gen_reg_rtx (mode);
18769 emit_cmp_and_jump_insns (in, const0_rtx, LT, const0_rtx, Pmode, 0, neglab);
18771 emit_insn (gen_rtx_SET (VOIDmode, out, gen_rtx_FLOAT (mode, in)));
18772 emit_jump_insn (gen_jump (donelab));
18775 emit_label (neglab);
18777 i0 = expand_simple_binop (Pmode, LSHIFTRT, in, const1_rtx, NULL, 1, OPTAB_DIRECT);
18778 i1 = expand_simple_binop (Pmode, AND, in, const1_rtx, NULL, 1, OPTAB_DIRECT);
18779 i0 = expand_simple_binop (Pmode, IOR, i0, i1, i0, 1, OPTAB_DIRECT);
18780 expand_float (f0, i0, 0);
18781 emit_insn (gen_rtx_SET (VOIDmode, out, gen_rtx_PLUS (mode, f0, f0)));
18783 emit_label (donelab);
18786 /* A subroutine of ix86_expand_vector_init. Store into TARGET a vector
18787 with all elements equal to VAR. Return true if successful. */
18790 ix86_expand_vector_init_duplicate (bool mmx_ok, enum machine_mode mode,
18791 rtx target, rtx val)
18793 enum machine_mode smode, wsmode, wvmode;
18808 val = force_reg (GET_MODE_INNER (mode), val);
18809 x = gen_rtx_VEC_DUPLICATE (mode, val);
18810 emit_insn (gen_rtx_SET (VOIDmode, target, x));
18816 if (TARGET_SSE || TARGET_3DNOW_A)
18818 val = gen_lowpart (SImode, val);
18819 x = gen_rtx_TRUNCATE (HImode, val);
18820 x = gen_rtx_VEC_DUPLICATE (mode, x);
18821 emit_insn (gen_rtx_SET (VOIDmode, target, x));
18843 /* Extend HImode to SImode using a paradoxical SUBREG. */
18844 tmp1 = gen_reg_rtx (SImode);
18845 emit_move_insn (tmp1, gen_lowpart (SImode, val));
18846 /* Insert the SImode value as low element of V4SImode vector. */
18847 tmp2 = gen_reg_rtx (V4SImode);
18848 tmp1 = gen_rtx_VEC_MERGE (V4SImode,
18849 gen_rtx_VEC_DUPLICATE (V4SImode, tmp1),
18850 CONST0_RTX (V4SImode),
18852 emit_insn (gen_rtx_SET (VOIDmode, tmp2, tmp1));
18853 /* Cast the V4SImode vector back to a V8HImode vector. */
18854 tmp1 = gen_reg_rtx (V8HImode);
18855 emit_move_insn (tmp1, gen_lowpart (V8HImode, tmp2));
18856 /* Duplicate the low short through the whole low SImode word. */
18857 emit_insn (gen_sse2_punpcklwd (tmp1, tmp1, tmp1));
18858 /* Cast the V8HImode vector back to a V4SImode vector. */
18859 tmp2 = gen_reg_rtx (V4SImode);
18860 emit_move_insn (tmp2, gen_lowpart (V4SImode, tmp1));
18861 /* Replicate the low element of the V4SImode vector. */
18862 emit_insn (gen_sse2_pshufd (tmp2, tmp2, const0_rtx));
18863 /* Cast the V2SImode back to V8HImode, and store in target. */
18864 emit_move_insn (target, gen_lowpart (V8HImode, tmp2));
18875 /* Extend QImode to SImode using a paradoxical SUBREG. */
18876 tmp1 = gen_reg_rtx (SImode);
18877 emit_move_insn (tmp1, gen_lowpart (SImode, val));
18878 /* Insert the SImode value as low element of V4SImode vector. */
18879 tmp2 = gen_reg_rtx (V4SImode);
18880 tmp1 = gen_rtx_VEC_MERGE (V4SImode,
18881 gen_rtx_VEC_DUPLICATE (V4SImode, tmp1),
18882 CONST0_RTX (V4SImode),
18884 emit_insn (gen_rtx_SET (VOIDmode, tmp2, tmp1));
18885 /* Cast the V4SImode vector back to a V16QImode vector. */
18886 tmp1 = gen_reg_rtx (V16QImode);
18887 emit_move_insn (tmp1, gen_lowpart (V16QImode, tmp2));
18888 /* Duplicate the low byte through the whole low SImode word. */
18889 emit_insn (gen_sse2_punpcklbw (tmp1, tmp1, tmp1));
18890 emit_insn (gen_sse2_punpcklbw (tmp1, tmp1, tmp1));
18891 /* Cast the V16QImode vector back to a V4SImode vector. */
18892 tmp2 = gen_reg_rtx (V4SImode);
18893 emit_move_insn (tmp2, gen_lowpart (V4SImode, tmp1));
18894 /* Replicate the low element of the V4SImode vector. */
18895 emit_insn (gen_sse2_pshufd (tmp2, tmp2, const0_rtx));
18896 /* Cast the V2SImode back to V16QImode, and store in target. */
18897 emit_move_insn (target, gen_lowpart (V16QImode, tmp2));
18905 /* Replicate the value once into the next wider mode and recurse. */
18906 val = convert_modes (wsmode, smode, val, true);
18907 x = expand_simple_binop (wsmode, ASHIFT, val,
18908 GEN_INT (GET_MODE_BITSIZE (smode)),
18909 NULL_RTX, 1, OPTAB_LIB_WIDEN);
18910 val = expand_simple_binop (wsmode, IOR, val, x, x, 1, OPTAB_LIB_WIDEN);
18912 x = gen_reg_rtx (wvmode);
18913 if (!ix86_expand_vector_init_duplicate (mmx_ok, wvmode, x, val))
18914 gcc_unreachable ();
18915 emit_move_insn (target, gen_lowpart (mode, x));
18923 /* A subroutine of ix86_expand_vector_init. Store into TARGET a vector
18924 whose ONE_VAR element is VAR, and other elements are zero. Return true
18928 ix86_expand_vector_init_one_nonzero (bool mmx_ok, enum machine_mode mode,
18929 rtx target, rtx var, int one_var)
18931 enum machine_mode vsimode;
18947 var = force_reg (GET_MODE_INNER (mode), var);
18948 x = gen_rtx_VEC_CONCAT (mode, var, CONST0_RTX (GET_MODE_INNER (mode)));
18949 emit_insn (gen_rtx_SET (VOIDmode, target, x));
18954 if (!REG_P (target) || REGNO (target) < FIRST_PSEUDO_REGISTER)
18955 new_target = gen_reg_rtx (mode);
18957 new_target = target;
18958 var = force_reg (GET_MODE_INNER (mode), var);
18959 x = gen_rtx_VEC_DUPLICATE (mode, var);
18960 x = gen_rtx_VEC_MERGE (mode, x, CONST0_RTX (mode), const1_rtx);
18961 emit_insn (gen_rtx_SET (VOIDmode, new_target, x));
18964 /* We need to shuffle the value to the correct position, so
18965 create a new pseudo to store the intermediate result. */
18967 /* With SSE2, we can use the integer shuffle insns. */
18968 if (mode != V4SFmode && TARGET_SSE2)
18970 emit_insn (gen_sse2_pshufd_1 (new_target, new_target,
18972 GEN_INT (one_var == 1 ? 0 : 1),
18973 GEN_INT (one_var == 2 ? 0 : 1),
18974 GEN_INT (one_var == 3 ? 0 : 1)));
18975 if (target != new_target)
18976 emit_move_insn (target, new_target);
18980 /* Otherwise convert the intermediate result to V4SFmode and
18981 use the SSE1 shuffle instructions. */
18982 if (mode != V4SFmode)
18984 tmp = gen_reg_rtx (V4SFmode);
18985 emit_move_insn (tmp, gen_lowpart (V4SFmode, new_target));
18990 emit_insn (gen_sse_shufps_1 (tmp, tmp, tmp,
18992 GEN_INT (one_var == 1 ? 0 : 1),
18993 GEN_INT (one_var == 2 ? 0+4 : 1+4),
18994 GEN_INT (one_var == 3 ? 0+4 : 1+4)));
18996 if (mode != V4SFmode)
18997 emit_move_insn (target, gen_lowpart (V4SImode, tmp));
18998 else if (tmp != target)
18999 emit_move_insn (target, tmp);
19001 else if (target != new_target)
19002 emit_move_insn (target, new_target);
19007 vsimode = V4SImode;
19013 vsimode = V2SImode;
19019 /* Zero extend the variable element to SImode and recurse. */
19020 var = convert_modes (SImode, GET_MODE_INNER (mode), var, true);
19022 x = gen_reg_rtx (vsimode);
19023 if (!ix86_expand_vector_init_one_nonzero (mmx_ok, vsimode, x,
19025 gcc_unreachable ();
19027 emit_move_insn (target, gen_lowpart (mode, x));
19035 /* A subroutine of ix86_expand_vector_init. Store into TARGET a vector
19036 consisting of the values in VALS. It is known that all elements
19037 except ONE_VAR are constants. Return true if successful. */
19040 ix86_expand_vector_init_one_var (bool mmx_ok, enum machine_mode mode,
19041 rtx target, rtx vals, int one_var)
19043 rtx var = XVECEXP (vals, 0, one_var);
19044 enum machine_mode wmode;
19047 const_vec = copy_rtx (vals);
19048 XVECEXP (const_vec, 0, one_var) = CONST0_RTX (GET_MODE_INNER (mode));
19049 const_vec = gen_rtx_CONST_VECTOR (mode, XVEC (const_vec, 0));
19057 /* For the two element vectors, it's just as easy to use
19058 the general case. */
19074 /* There's no way to set one QImode entry easily. Combine
19075 the variable value with its adjacent constant value, and
19076 promote to an HImode set. */
19077 x = XVECEXP (vals, 0, one_var ^ 1);
19080 var = convert_modes (HImode, QImode, var, true);
19081 var = expand_simple_binop (HImode, ASHIFT, var, GEN_INT (8),
19082 NULL_RTX, 1, OPTAB_LIB_WIDEN);
19083 x = GEN_INT (INTVAL (x) & 0xff);
19087 var = convert_modes (HImode, QImode, var, true);
19088 x = gen_int_mode (INTVAL (x) << 8, HImode);
19090 if (x != const0_rtx)
19091 var = expand_simple_binop (HImode, IOR, var, x, var,
19092 1, OPTAB_LIB_WIDEN);
19094 x = gen_reg_rtx (wmode);
19095 emit_move_insn (x, gen_lowpart (wmode, const_vec));
19096 ix86_expand_vector_set (mmx_ok, x, var, one_var >> 1);
19098 emit_move_insn (target, gen_lowpart (mode, x));
19105 emit_move_insn (target, const_vec);
19106 ix86_expand_vector_set (mmx_ok, target, var, one_var);
19110 /* A subroutine of ix86_expand_vector_init. Handle the most general case:
19111 all values variable, and none identical. */
19114 ix86_expand_vector_init_general (bool mmx_ok, enum machine_mode mode,
19115 rtx target, rtx vals)
19117 enum machine_mode half_mode = GET_MODE_INNER (mode);
19118 rtx op0 = NULL, op1 = NULL;
19119 bool use_vec_concat = false;
19125 if (!mmx_ok && !TARGET_SSE)
19131 /* For the two element vectors, we always implement VEC_CONCAT. */
19132 op0 = XVECEXP (vals, 0, 0);
19133 op1 = XVECEXP (vals, 0, 1);
19134 use_vec_concat = true;
19138 half_mode = V2SFmode;
19141 half_mode = V2SImode;
19147 /* For V4SF and V4SI, we implement a concat of two V2 vectors.
19148 Recurse to load the two halves. */
19150 op0 = gen_reg_rtx (half_mode);
19151 v = gen_rtvec (2, XVECEXP (vals, 0, 0), XVECEXP (vals, 0, 1));
19152 ix86_expand_vector_init (false, op0, gen_rtx_PARALLEL (half_mode, v));
19154 op1 = gen_reg_rtx (half_mode);
19155 v = gen_rtvec (2, XVECEXP (vals, 0, 2), XVECEXP (vals, 0, 3));
19156 ix86_expand_vector_init (false, op1, gen_rtx_PARALLEL (half_mode, v));
19158 use_vec_concat = true;
19169 gcc_unreachable ();
19172 if (use_vec_concat)
19174 if (!register_operand (op0, half_mode))
19175 op0 = force_reg (half_mode, op0);
19176 if (!register_operand (op1, half_mode))
19177 op1 = force_reg (half_mode, op1);
19179 emit_insn (gen_rtx_SET (VOIDmode, target,
19180 gen_rtx_VEC_CONCAT (mode, op0, op1)));
19184 int i, j, n_elts, n_words, n_elt_per_word;
19185 enum machine_mode inner_mode;
19186 rtx words[4], shift;
19188 inner_mode = GET_MODE_INNER (mode);
19189 n_elts = GET_MODE_NUNITS (mode);
19190 n_words = GET_MODE_SIZE (mode) / UNITS_PER_WORD;
19191 n_elt_per_word = n_elts / n_words;
19192 shift = GEN_INT (GET_MODE_BITSIZE (inner_mode));
19194 for (i = 0; i < n_words; ++i)
19196 rtx word = NULL_RTX;
19198 for (j = 0; j < n_elt_per_word; ++j)
19200 rtx elt = XVECEXP (vals, 0, (i+1)*n_elt_per_word - j - 1);
19201 elt = convert_modes (word_mode, inner_mode, elt, true);
19207 word = expand_simple_binop (word_mode, ASHIFT, word, shift,
19208 word, 1, OPTAB_LIB_WIDEN);
19209 word = expand_simple_binop (word_mode, IOR, word, elt,
19210 word, 1, OPTAB_LIB_WIDEN);
19218 emit_move_insn (target, gen_lowpart (mode, words[0]));
19219 else if (n_words == 2)
19221 rtx tmp = gen_reg_rtx (mode);
19222 emit_insn (gen_rtx_CLOBBER (VOIDmode, tmp));
19223 emit_move_insn (gen_lowpart (word_mode, tmp), words[0]);
19224 emit_move_insn (gen_highpart (word_mode, tmp), words[1]);
19225 emit_move_insn (target, tmp);
19227 else if (n_words == 4)
19229 rtx tmp = gen_reg_rtx (V4SImode);
19230 vals = gen_rtx_PARALLEL (V4SImode, gen_rtvec_v (4, words));
19231 ix86_expand_vector_init_general (false, V4SImode, tmp, vals);
19232 emit_move_insn (target, gen_lowpart (mode, tmp));
19235 gcc_unreachable ();
19239 /* Initialize vector TARGET via VALS. Suppress the use of MMX
19240 instructions unless MMX_OK is true. */
19243 ix86_expand_vector_init (bool mmx_ok, rtx target, rtx vals)
19245 enum machine_mode mode = GET_MODE (target);
19246 enum machine_mode inner_mode = GET_MODE_INNER (mode);
19247 int n_elts = GET_MODE_NUNITS (mode);
19248 int n_var = 0, one_var = -1;
19249 bool all_same = true, all_const_zero = true;
19253 for (i = 0; i < n_elts; ++i)
19255 x = XVECEXP (vals, 0, i);
19256 if (!CONSTANT_P (x))
19257 n_var++, one_var = i;
19258 else if (x != CONST0_RTX (inner_mode))
19259 all_const_zero = false;
19260 if (i > 0 && !rtx_equal_p (x, XVECEXP (vals, 0, 0)))
19264 /* Constants are best loaded from the constant pool. */
19267 emit_move_insn (target, gen_rtx_CONST_VECTOR (mode, XVEC (vals, 0)));
19271 /* If all values are identical, broadcast the value. */
19273 && ix86_expand_vector_init_duplicate (mmx_ok, mode, target,
19274 XVECEXP (vals, 0, 0)))
19277 /* Values where only one field is non-constant are best loaded from
19278 the pool and overwritten via move later. */
19282 && ix86_expand_vector_init_one_nonzero (mmx_ok, mode, target,
19283 XVECEXP (vals, 0, one_var),
19287 if (ix86_expand_vector_init_one_var (mmx_ok, mode, target, vals, one_var))
19291 ix86_expand_vector_init_general (mmx_ok, mode, target, vals);
19295 ix86_expand_vector_set (bool mmx_ok, rtx target, rtx val, int elt)
19297 enum machine_mode mode = GET_MODE (target);
19298 enum machine_mode inner_mode = GET_MODE_INNER (mode);
19299 bool use_vec_merge = false;
19308 tmp = gen_reg_rtx (GET_MODE_INNER (mode));
19309 ix86_expand_vector_extract (true, tmp, target, 1 - elt);
19311 tmp = gen_rtx_VEC_CONCAT (mode, tmp, val);
19313 tmp = gen_rtx_VEC_CONCAT (mode, val, tmp);
19314 emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
19324 /* For the two element vectors, we implement a VEC_CONCAT with
19325 the extraction of the other element. */
19327 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, GEN_INT (1 - elt)));
19328 tmp = gen_rtx_VEC_SELECT (inner_mode, target, tmp);
19331 op0 = val, op1 = tmp;
19333 op0 = tmp, op1 = val;
19335 tmp = gen_rtx_VEC_CONCAT (mode, op0, op1);
19336 emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
19344 use_vec_merge = true;
19348 /* tmp = target = A B C D */
19349 tmp = copy_to_reg (target);
19350 /* target = A A B B */
19351 emit_insn (gen_sse_unpcklps (target, target, target));
19352 /* target = X A B B */
19353 ix86_expand_vector_set (false, target, val, 0);
19354 /* target = A X C D */
19355 emit_insn (gen_sse_shufps_1 (target, target, tmp,
19356 GEN_INT (1), GEN_INT (0),
19357 GEN_INT (2+4), GEN_INT (3+4)));
19361 /* tmp = target = A B C D */
19362 tmp = copy_to_reg (target);
19363 /* tmp = X B C D */
19364 ix86_expand_vector_set (false, tmp, val, 0);
19365 /* target = A B X D */
19366 emit_insn (gen_sse_shufps_1 (target, target, tmp,
19367 GEN_INT (0), GEN_INT (1),
19368 GEN_INT (0+4), GEN_INT (3+4)));
19372 /* tmp = target = A B C D */
19373 tmp = copy_to_reg (target);
19374 /* tmp = X B C D */
19375 ix86_expand_vector_set (false, tmp, val, 0);
19376 /* target = A B X D */
19377 emit_insn (gen_sse_shufps_1 (target, target, tmp,
19378 GEN_INT (0), GEN_INT (1),
19379 GEN_INT (2+4), GEN_INT (0+4)));
19383 gcc_unreachable ();
19388 /* Element 0 handled by vec_merge below. */
19391 use_vec_merge = true;
19397 /* With SSE2, use integer shuffles to swap element 0 and ELT,
19398 store into element 0, then shuffle them back. */
19402 order[0] = GEN_INT (elt);
19403 order[1] = const1_rtx;
19404 order[2] = const2_rtx;
19405 order[3] = GEN_INT (3);
19406 order[elt] = const0_rtx;
19408 emit_insn (gen_sse2_pshufd_1 (target, target, order[0],
19409 order[1], order[2], order[3]));
19411 ix86_expand_vector_set (false, target, val, 0);
19413 emit_insn (gen_sse2_pshufd_1 (target, target, order[0],
19414 order[1], order[2], order[3]));
19418 /* For SSE1, we have to reuse the V4SF code. */
19419 ix86_expand_vector_set (false, gen_lowpart (V4SFmode, target),
19420 gen_lowpart (SFmode, val), elt);
19425 use_vec_merge = TARGET_SSE2;
19428 use_vec_merge = mmx_ok && (TARGET_SSE || TARGET_3DNOW_A);
19439 tmp = gen_rtx_VEC_DUPLICATE (mode, val);
19440 tmp = gen_rtx_VEC_MERGE (mode, tmp, target, GEN_INT (1 << elt));
19441 emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
19445 rtx mem = assign_stack_temp (mode, GET_MODE_SIZE (mode), false);
19447 emit_move_insn (mem, target);
19449 tmp = adjust_address (mem, inner_mode, elt*GET_MODE_SIZE (inner_mode));
19450 emit_move_insn (tmp, val);
19452 emit_move_insn (target, mem);
19457 ix86_expand_vector_extract (bool mmx_ok, rtx target, rtx vec, int elt)
19459 enum machine_mode mode = GET_MODE (vec);
19460 enum machine_mode inner_mode = GET_MODE_INNER (mode);
19461 bool use_vec_extr = false;
19474 use_vec_extr = true;
19486 tmp = gen_reg_rtx (mode);
19487 emit_insn (gen_sse_shufps_1 (tmp, vec, vec,
19488 GEN_INT (elt), GEN_INT (elt),
19489 GEN_INT (elt+4), GEN_INT (elt+4)));
19493 tmp = gen_reg_rtx (mode);
19494 emit_insn (gen_sse_unpckhps (tmp, vec, vec));
19498 gcc_unreachable ();
19501 use_vec_extr = true;
19516 tmp = gen_reg_rtx (mode);
19517 emit_insn (gen_sse2_pshufd_1 (tmp, vec,
19518 GEN_INT (elt), GEN_INT (elt),
19519 GEN_INT (elt), GEN_INT (elt)));
19523 tmp = gen_reg_rtx (mode);
19524 emit_insn (gen_sse2_punpckhdq (tmp, vec, vec));
19528 gcc_unreachable ();
19531 use_vec_extr = true;
19536 /* For SSE1, we have to reuse the V4SF code. */
19537 ix86_expand_vector_extract (false, gen_lowpart (SFmode, target),
19538 gen_lowpart (V4SFmode, vec), elt);
19544 use_vec_extr = TARGET_SSE2;
19547 use_vec_extr = mmx_ok && (TARGET_SSE || TARGET_3DNOW_A);
19552 /* ??? Could extract the appropriate HImode element and shift. */
19559 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, GEN_INT (elt)));
19560 tmp = gen_rtx_VEC_SELECT (inner_mode, vec, tmp);
19562 /* Let the rtl optimizers know about the zero extension performed. */
19563 if (inner_mode == HImode)
19565 tmp = gen_rtx_ZERO_EXTEND (SImode, tmp);
19566 target = gen_lowpart (SImode, target);
19569 emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
19573 rtx mem = assign_stack_temp (mode, GET_MODE_SIZE (mode), false);
19575 emit_move_insn (mem, vec);
19577 tmp = adjust_address (mem, inner_mode, elt*GET_MODE_SIZE (inner_mode));
19578 emit_move_insn (target, tmp);
19582 /* Expand a vector reduction on V4SFmode for SSE1. FN is the binary
19583 pattern to reduce; DEST is the destination; IN is the input vector. */
19586 ix86_expand_reduc_v4sf (rtx (*fn) (rtx, rtx, rtx), rtx dest, rtx in)
19588 rtx tmp1, tmp2, tmp3;
19590 tmp1 = gen_reg_rtx (V4SFmode);
19591 tmp2 = gen_reg_rtx (V4SFmode);
19592 tmp3 = gen_reg_rtx (V4SFmode);
19594 emit_insn (gen_sse_movhlps (tmp1, in, in));
19595 emit_insn (fn (tmp2, tmp1, in));
19597 emit_insn (gen_sse_shufps_1 (tmp3, tmp2, tmp2,
19598 GEN_INT (1), GEN_INT (1),
19599 GEN_INT (1+4), GEN_INT (1+4)));
19600 emit_insn (fn (dest, tmp2, tmp3));
19603 /* Target hook for scalar_mode_supported_p. */
19605 ix86_scalar_mode_supported_p (enum machine_mode mode)
19607 if (DECIMAL_FLOAT_MODE_P (mode))
19610 return default_scalar_mode_supported_p (mode);
19613 /* Implements target hook vector_mode_supported_p. */
19615 ix86_vector_mode_supported_p (enum machine_mode mode)
19617 if (TARGET_SSE && VALID_SSE_REG_MODE (mode))
19619 if (TARGET_SSE2 && VALID_SSE2_REG_MODE (mode))
19621 if (TARGET_MMX && VALID_MMX_REG_MODE (mode))
19623 if (TARGET_3DNOW && VALID_MMX_REG_MODE_3DNOW (mode))
19628 /* Worker function for TARGET_MD_ASM_CLOBBERS.
19630 We do this in the new i386 backend to maintain source compatibility
19631 with the old cc0-based compiler. */
19634 ix86_md_asm_clobbers (tree outputs ATTRIBUTE_UNUSED,
19635 tree inputs ATTRIBUTE_UNUSED,
19638 clobbers = tree_cons (NULL_TREE, build_string (5, "flags"),
19640 clobbers = tree_cons (NULL_TREE, build_string (4, "fpsr"),
19642 clobbers = tree_cons (NULL_TREE, build_string (7, "dirflag"),
19647 /* Return true if this goes in small data/bss. */
19650 ix86_in_large_data_p (tree exp)
19652 if (ix86_cmodel != CM_MEDIUM && ix86_cmodel != CM_MEDIUM_PIC)
19655 /* Functions are never large data. */
19656 if (TREE_CODE (exp) == FUNCTION_DECL)
19659 if (TREE_CODE (exp) == VAR_DECL && DECL_SECTION_NAME (exp))
19661 const char *section = TREE_STRING_POINTER (DECL_SECTION_NAME (exp));
19662 if (strcmp (section, ".ldata") == 0
19663 || strcmp (section, ".lbss") == 0)
19669 HOST_WIDE_INT size = int_size_in_bytes (TREE_TYPE (exp));
19671 /* If this is an incomplete type with size 0, then we can't put it
19672 in data because it might be too big when completed. */
19673 if (!size || size > ix86_section_threshold)
19680 ix86_encode_section_info (tree decl, rtx rtl, int first)
19682 default_encode_section_info (decl, rtl, first);
19684 if (TREE_CODE (decl) == VAR_DECL
19685 && (TREE_STATIC (decl) || DECL_EXTERNAL (decl))
19686 && ix86_in_large_data_p (decl))
19687 SYMBOL_REF_FLAGS (XEXP (rtl, 0)) |= SYMBOL_FLAG_FAR_ADDR;
19690 /* Worker function for REVERSE_CONDITION. */
19693 ix86_reverse_condition (enum rtx_code code, enum machine_mode mode)
19695 return (mode != CCFPmode && mode != CCFPUmode
19696 ? reverse_condition (code)
19697 : reverse_condition_maybe_unordered (code));
19700 /* Output code to perform an x87 FP register move, from OPERANDS[1]
19704 output_387_reg_move (rtx insn, rtx *operands)
19706 if (REG_P (operands[1])
19707 && find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
19709 if (REGNO (operands[0]) == FIRST_STACK_REG)
19710 return output_387_ffreep (operands, 0);
19711 return "fstp\t%y0";
19713 if (STACK_TOP_P (operands[0]))
19714 return "fld%z1\t%y1";
19718 /* Output code to perform a conditional jump to LABEL, if C2 flag in
19719 FP status register is set. */
19722 ix86_emit_fp_unordered_jump (rtx label)
19724 rtx reg = gen_reg_rtx (HImode);
19727 emit_insn (gen_x86_fnstsw_1 (reg));
19729 if (TARGET_USE_SAHF)
19731 emit_insn (gen_x86_sahf_1 (reg));
19733 temp = gen_rtx_REG (CCmode, FLAGS_REG);
19734 temp = gen_rtx_UNORDERED (VOIDmode, temp, const0_rtx);
19738 emit_insn (gen_testqi_ext_ccno_0 (reg, GEN_INT (0x04)));
19740 temp = gen_rtx_REG (CCNOmode, FLAGS_REG);
19741 temp = gen_rtx_NE (VOIDmode, temp, const0_rtx);
19744 temp = gen_rtx_IF_THEN_ELSE (VOIDmode, temp,
19745 gen_rtx_LABEL_REF (VOIDmode, label),
19747 temp = gen_rtx_SET (VOIDmode, pc_rtx, temp);
19748 emit_jump_insn (temp);
19751 /* Output code to perform a log1p XFmode calculation. */
19753 void ix86_emit_i387_log1p (rtx op0, rtx op1)
19755 rtx label1 = gen_label_rtx ();
19756 rtx label2 = gen_label_rtx ();
19758 rtx tmp = gen_reg_rtx (XFmode);
19759 rtx tmp2 = gen_reg_rtx (XFmode);
19761 emit_insn (gen_absxf2 (tmp, op1));
19762 emit_insn (gen_cmpxf (tmp,
19763 CONST_DOUBLE_FROM_REAL_VALUE (
19764 REAL_VALUE_ATOF ("0.29289321881345247561810596348408353", XFmode),
19766 emit_jump_insn (gen_bge (label1));
19768 emit_move_insn (tmp2, standard_80387_constant_rtx (4)); /* fldln2 */
19769 emit_insn (gen_fyl2xp1_xf3 (op0, tmp2, op1));
19770 emit_jump (label2);
19772 emit_label (label1);
19773 emit_move_insn (tmp, CONST1_RTX (XFmode));
19774 emit_insn (gen_addxf3 (tmp, op1, tmp));
19775 emit_move_insn (tmp2, standard_80387_constant_rtx (4)); /* fldln2 */
19776 emit_insn (gen_fyl2x_xf3 (op0, tmp2, tmp));
19778 emit_label (label2);
19781 /* Solaris implementation of TARGET_ASM_NAMED_SECTION. */
19784 i386_solaris_elf_named_section (const char *name, unsigned int flags,
19787 /* With Binutils 2.15, the "@unwind" marker must be specified on
19788 every occurrence of the ".eh_frame" section, not just the first
19791 && strcmp (name, ".eh_frame") == 0)
19793 fprintf (asm_out_file, "\t.section\t%s,\"%s\",@unwind\n", name,
19794 flags & SECTION_WRITE ? "aw" : "a");
19797 default_elf_asm_named_section (name, flags, decl);
19800 /* Return the mangling of TYPE if it is an extended fundamental type. */
19802 static const char *
19803 ix86_mangle_fundamental_type (tree type)
19805 switch (TYPE_MODE (type))
19808 /* __float128 is "g". */
19811 /* "long double" or __float80 is "e". */
19818 /* For 32-bit code we can save PIC register setup by using
19819 __stack_chk_fail_local hidden function instead of calling
19820 __stack_chk_fail directly. 64-bit code doesn't need to setup any PIC
19821 register, so it is better to call __stack_chk_fail directly. */
19824 ix86_stack_protect_fail (void)
19826 return TARGET_64BIT
19827 ? default_external_stack_protect_fail ()
19828 : default_hidden_stack_protect_fail ();
19831 /* Select a format to encode pointers in exception handling data. CODE
19832 is 0 for data, 1 for code labels, 2 for function pointers. GLOBAL is
19833 true if the symbol may be affected by dynamic relocations.
19835 ??? All x86 object file formats are capable of representing this.
19836 After all, the relocation needed is the same as for the call insn.
19837 Whether or not a particular assembler allows us to enter such, I
19838 guess we'll have to see. */
19840 asm_preferred_eh_data_format (int code, int global)
19844 int type = DW_EH_PE_sdata8;
19846 || ix86_cmodel == CM_SMALL_PIC
19847 || (ix86_cmodel == CM_MEDIUM_PIC && (global || code)))
19848 type = DW_EH_PE_sdata4;
19849 return (global ? DW_EH_PE_indirect : 0) | DW_EH_PE_pcrel | type;
19851 if (ix86_cmodel == CM_SMALL
19852 || (ix86_cmodel == CM_MEDIUM && code))
19853 return DW_EH_PE_udata4;
19854 return DW_EH_PE_absptr;
19857 #include "gt-i386.h"