1 ;; GCC machine description for SSE instructions
2 ;; Copyright (C) 2005, 2006
3 ;; Free Software Foundation, Inc.
5 ;; This file is part of GCC.
7 ;; GCC is free software; you can redistribute it and/or modify
8 ;; it under the terms of the GNU General Public License as published by
9 ;; the Free Software Foundation; either version 2, or (at your option)
12 ;; GCC is distributed in the hope that it will be useful,
13 ;; but WITHOUT ANY WARRANTY; without even the implied warranty of
14 ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 ;; GNU General Public License for more details.
17 ;; You should have received a copy of the GNU General Public License
18 ;; along with GCC; see the file COPYING. If not, write to
19 ;; the Free Software Foundation, 51 Franklin Street, Fifth Floor,
20 ;; Boston, MA 02110-1301, USA.
23 ;; 16 byte integral modes handled by SSE, minus TImode, which gets
24 ;; special-cased for TARGET_64BIT.
25 (define_mode_macro SSEMODEI [V16QI V8HI V4SI V2DI])
27 ;; All 16-byte vector modes handled by SSE
28 (define_mode_macro SSEMODE [V16QI V8HI V4SI V2DI V4SF V2DF])
31 (define_mode_macro SSEMODE12 [V16QI V8HI])
32 (define_mode_macro SSEMODE24 [V8HI V4SI])
33 (define_mode_macro SSEMODE14 [V16QI V4SI])
34 (define_mode_macro SSEMODE124 [V16QI V8HI V4SI])
35 (define_mode_macro SSEMODE248 [V8HI V4SI V2DI])
37 ;; Mapping from integer vector mode to mnemonic suffix
38 (define_mode_attr ssevecsize [(V16QI "b") (V8HI "w") (V4SI "d") (V2DI "q")])
40 ;; Patterns whose name begins with "sse{,2,3}_" are invoked by intrinsics.
42 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
46 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
48 ;; All of these patterns are enabled for SSE1 as well as SSE2.
49 ;; This is essential for maintaining stable calling conventions.
51 (define_expand "mov<mode>"
52 [(set (match_operand:SSEMODEI 0 "nonimmediate_operand" "")
53 (match_operand:SSEMODEI 1 "nonimmediate_operand" ""))]
56 ix86_expand_vector_move (<MODE>mode, operands);
60 (define_insn "*mov<mode>_internal"
61 [(set (match_operand:SSEMODEI 0 "nonimmediate_operand" "=x,x ,m")
62 (match_operand:SSEMODEI 1 "nonimmediate_or_sse_const_operand" "C ,xm,x"))]
63 "TARGET_SSE && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
65 switch (which_alternative)
68 return standard_sse_constant_opcode (insn, operands[1]);
71 if (get_attr_mode (insn) == MODE_V4SF)
72 return "movaps\t{%1, %0|%0, %1}";
74 return "movdqa\t{%1, %0|%0, %1}";
79 [(set_attr "type" "sselog1,ssemov,ssemov")
82 (ior (ior (ne (symbol_ref "optimize_size") (const_int 0))
83 (eq (symbol_ref "TARGET_SSE2") (const_int 0)))
84 (and (eq_attr "alternative" "2")
85 (ne (symbol_ref "TARGET_SSE_TYPELESS_STORES")
88 (const_string "TI")))])
90 (define_expand "movv4sf"
91 [(set (match_operand:V4SF 0 "nonimmediate_operand" "")
92 (match_operand:V4SF 1 "nonimmediate_operand" ""))]
95 ix86_expand_vector_move (V4SFmode, operands);
99 (define_insn "*movv4sf_internal"
100 [(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,x,m")
101 (match_operand:V4SF 1 "nonimmediate_or_sse_const_operand" "C,xm,x"))]
104 switch (which_alternative)
107 return standard_sse_constant_opcode (insn, operands[1]);
110 return "movaps\t{%1, %0|%0, %1}";
115 [(set_attr "type" "sselog1,ssemov,ssemov")
116 (set_attr "mode" "V4SF")])
119 [(set (match_operand:V4SF 0 "register_operand" "")
120 (match_operand:V4SF 1 "zero_extended_scalar_load_operand" ""))]
121 "TARGET_SSE && reload_completed"
124 (vec_duplicate:V4SF (match_dup 1))
128 operands[1] = simplify_gen_subreg (SFmode, operands[1], V4SFmode, 0);
129 operands[2] = CONST0_RTX (V4SFmode);
132 (define_expand "movv2df"
133 [(set (match_operand:V2DF 0 "nonimmediate_operand" "")
134 (match_operand:V2DF 1 "nonimmediate_operand" ""))]
137 ix86_expand_vector_move (V2DFmode, operands);
141 (define_insn "*movv2df_internal"
142 [(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,x,m")
143 (match_operand:V2DF 1 "nonimmediate_or_sse_const_operand" "C,xm,x"))]
144 "TARGET_SSE && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
146 switch (which_alternative)
149 return standard_sse_constant_opcode (insn, operands[1]);
152 if (get_attr_mode (insn) == MODE_V4SF)
153 return "movaps\t{%1, %0|%0, %1}";
155 return "movapd\t{%1, %0|%0, %1}";
160 [(set_attr "type" "sselog1,ssemov,ssemov")
163 (ior (ior (ne (symbol_ref "optimize_size") (const_int 0))
164 (eq (symbol_ref "TARGET_SSE2") (const_int 0)))
165 (and (eq_attr "alternative" "2")
166 (ne (symbol_ref "TARGET_SSE_TYPELESS_STORES")
168 (const_string "V4SF")
169 (const_string "V2DF")))])
172 [(set (match_operand:V2DF 0 "register_operand" "")
173 (match_operand:V2DF 1 "zero_extended_scalar_load_operand" ""))]
174 "TARGET_SSE2 && reload_completed"
175 [(set (match_dup 0) (vec_concat:V2DF (match_dup 1) (match_dup 2)))]
177 operands[1] = simplify_gen_subreg (DFmode, operands[1], V2DFmode, 0);
178 operands[2] = CONST0_RTX (DFmode);
181 (define_expand "push<mode>1"
182 [(match_operand:SSEMODE 0 "register_operand" "")]
185 ix86_expand_push (<MODE>mode, operands[0]);
189 (define_expand "movmisalign<mode>"
190 [(set (match_operand:SSEMODE 0 "nonimmediate_operand" "")
191 (match_operand:SSEMODE 1 "nonimmediate_operand" ""))]
194 ix86_expand_vector_move_misalign (<MODE>mode, operands);
198 (define_insn "sse_movups"
199 [(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,m")
200 (unspec:V4SF [(match_operand:V4SF 1 "nonimmediate_operand" "xm,x")]
202 "TARGET_SSE && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
203 "movups\t{%1, %0|%0, %1}"
204 [(set_attr "type" "ssemov")
205 (set_attr "mode" "V2DF")])
207 (define_insn "sse2_movupd"
208 [(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,m")
209 (unspec:V2DF [(match_operand:V2DF 1 "nonimmediate_operand" "xm,x")]
211 "TARGET_SSE2 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
212 "movupd\t{%1, %0|%0, %1}"
213 [(set_attr "type" "ssemov")
214 (set_attr "mode" "V2DF")])
216 (define_insn "sse2_movdqu"
217 [(set (match_operand:V16QI 0 "nonimmediate_operand" "=x,m")
218 (unspec:V16QI [(match_operand:V16QI 1 "nonimmediate_operand" "xm,x")]
220 "TARGET_SSE2 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
221 "movdqu\t{%1, %0|%0, %1}"
222 [(set_attr "type" "ssemov")
223 (set_attr "mode" "TI")])
225 (define_insn "sse_movntv4sf"
226 [(set (match_operand:V4SF 0 "memory_operand" "=m")
227 (unspec:V4SF [(match_operand:V4SF 1 "register_operand" "x")]
230 "movntps\t{%1, %0|%0, %1}"
231 [(set_attr "type" "ssemov")
232 (set_attr "mode" "V4SF")])
234 (define_insn "sse2_movntv2df"
235 [(set (match_operand:V2DF 0 "memory_operand" "=m")
236 (unspec:V2DF [(match_operand:V2DF 1 "register_operand" "x")]
239 "movntpd\t{%1, %0|%0, %1}"
240 [(set_attr "type" "ssecvt")
241 (set_attr "mode" "V2DF")])
243 (define_insn "sse2_movntv2di"
244 [(set (match_operand:V2DI 0 "memory_operand" "=m")
245 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "x")]
248 "movntdq\t{%1, %0|%0, %1}"
249 [(set_attr "type" "ssecvt")
250 (set_attr "mode" "TI")])
252 (define_insn "sse2_movntsi"
253 [(set (match_operand:SI 0 "memory_operand" "=m")
254 (unspec:SI [(match_operand:SI 1 "register_operand" "r")]
257 "movnti\t{%1, %0|%0, %1}"
258 [(set_attr "type" "ssecvt")
259 (set_attr "mode" "V2DF")])
261 (define_insn "sse3_lddqu"
262 [(set (match_operand:V16QI 0 "register_operand" "=x")
263 (unspec:V16QI [(match_operand:V16QI 1 "memory_operand" "m")]
266 "lddqu\t{%1, %0|%0, %1}"
267 [(set_attr "type" "ssecvt")
268 (set_attr "mode" "TI")])
270 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
272 ;; Parallel single-precision floating point arithmetic
274 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
276 (define_expand "negv4sf2"
277 [(set (match_operand:V4SF 0 "register_operand" "")
278 (neg:V4SF (match_operand:V4SF 1 "nonimmediate_operand" "")))]
280 "ix86_expand_fp_absneg_operator (NEG, V4SFmode, operands); DONE;")
282 (define_expand "absv4sf2"
283 [(set (match_operand:V4SF 0 "register_operand" "")
284 (abs:V4SF (match_operand:V4SF 1 "nonimmediate_operand" "")))]
286 "ix86_expand_fp_absneg_operator (ABS, V4SFmode, operands); DONE;")
288 (define_expand "addv4sf3"
289 [(set (match_operand:V4SF 0 "register_operand" "")
290 (plus:V4SF (match_operand:V4SF 1 "nonimmediate_operand" "")
291 (match_operand:V4SF 2 "nonimmediate_operand" "")))]
293 "ix86_fixup_binary_operands_no_copy (PLUS, V4SFmode, operands);")
295 (define_insn "*addv4sf3"
296 [(set (match_operand:V4SF 0 "register_operand" "=x")
297 (plus:V4SF (match_operand:V4SF 1 "nonimmediate_operand" "%0")
298 (match_operand:V4SF 2 "nonimmediate_operand" "xm")))]
299 "TARGET_SSE && ix86_binary_operator_ok (PLUS, V4SFmode, operands)"
300 "addps\t{%2, %0|%0, %2}"
301 [(set_attr "type" "sseadd")
302 (set_attr "mode" "V4SF")])
304 (define_insn "sse_vmaddv4sf3"
305 [(set (match_operand:V4SF 0 "register_operand" "=x")
307 (plus:V4SF (match_operand:V4SF 1 "register_operand" "0")
308 (match_operand:V4SF 2 "nonimmediate_operand" "xm"))
311 "TARGET_SSE && ix86_binary_operator_ok (PLUS, V4SFmode, operands)"
312 "addss\t{%2, %0|%0, %2}"
313 [(set_attr "type" "sseadd")
314 (set_attr "mode" "SF")])
316 (define_expand "subv4sf3"
317 [(set (match_operand:V4SF 0 "register_operand" "")
318 (minus:V4SF (match_operand:V4SF 1 "register_operand" "")
319 (match_operand:V4SF 2 "nonimmediate_operand" "")))]
321 "ix86_fixup_binary_operands_no_copy (MINUS, V4SFmode, operands);")
323 (define_insn "*subv4sf3"
324 [(set (match_operand:V4SF 0 "register_operand" "=x")
325 (minus:V4SF (match_operand:V4SF 1 "register_operand" "0")
326 (match_operand:V4SF 2 "nonimmediate_operand" "xm")))]
328 "subps\t{%2, %0|%0, %2}"
329 [(set_attr "type" "sseadd")
330 (set_attr "mode" "V4SF")])
332 (define_insn "sse_vmsubv4sf3"
333 [(set (match_operand:V4SF 0 "register_operand" "=x")
335 (minus:V4SF (match_operand:V4SF 1 "register_operand" "0")
336 (match_operand:V4SF 2 "nonimmediate_operand" "xm"))
340 "subss\t{%2, %0|%0, %2}"
341 [(set_attr "type" "sseadd")
342 (set_attr "mode" "SF")])
344 (define_expand "mulv4sf3"
345 [(set (match_operand:V4SF 0 "register_operand" "")
346 (mult:V4SF (match_operand:V4SF 1 "nonimmediate_operand" "")
347 (match_operand:V4SF 2 "nonimmediate_operand" "")))]
349 "ix86_fixup_binary_operands_no_copy (MULT, V4SFmode, operands);")
351 (define_insn "*mulv4sf3"
352 [(set (match_operand:V4SF 0 "register_operand" "=x")
353 (mult:V4SF (match_operand:V4SF 1 "nonimmediate_operand" "%0")
354 (match_operand:V4SF 2 "nonimmediate_operand" "xm")))]
355 "TARGET_SSE && ix86_binary_operator_ok (MULT, V4SFmode, operands)"
356 "mulps\t{%2, %0|%0, %2}"
357 [(set_attr "type" "ssemul")
358 (set_attr "mode" "V4SF")])
360 (define_insn "sse_vmmulv4sf3"
361 [(set (match_operand:V4SF 0 "register_operand" "=x")
363 (mult:V4SF (match_operand:V4SF 1 "register_operand" "0")
364 (match_operand:V4SF 2 "nonimmediate_operand" "xm"))
367 "TARGET_SSE && ix86_binary_operator_ok (MULT, V4SFmode, operands)"
368 "mulss\t{%2, %0|%0, %2}"
369 [(set_attr "type" "ssemul")
370 (set_attr "mode" "SF")])
372 (define_expand "divv4sf3"
373 [(set (match_operand:V4SF 0 "register_operand" "")
374 (div:V4SF (match_operand:V4SF 1 "register_operand" "")
375 (match_operand:V4SF 2 "nonimmediate_operand" "")))]
377 "ix86_fixup_binary_operands_no_copy (DIV, V4SFmode, operands);")
379 (define_insn "*divv4sf3"
380 [(set (match_operand:V4SF 0 "register_operand" "=x")
381 (div:V4SF (match_operand:V4SF 1 "register_operand" "0")
382 (match_operand:V4SF 2 "nonimmediate_operand" "xm")))]
384 "divps\t{%2, %0|%0, %2}"
385 [(set_attr "type" "ssediv")
386 (set_attr "mode" "V4SF")])
388 (define_insn "sse_vmdivv4sf3"
389 [(set (match_operand:V4SF 0 "register_operand" "=x")
391 (div:V4SF (match_operand:V4SF 1 "register_operand" "0")
392 (match_operand:V4SF 2 "nonimmediate_operand" "xm"))
396 "divss\t{%2, %0|%0, %2}"
397 [(set_attr "type" "ssediv")
398 (set_attr "mode" "SF")])
400 (define_insn "sse_rcpv4sf2"
401 [(set (match_operand:V4SF 0 "register_operand" "=x")
403 [(match_operand:V4SF 1 "nonimmediate_operand" "xm")] UNSPEC_RCP))]
405 "rcpps\t{%1, %0|%0, %1}"
406 [(set_attr "type" "sse")
407 (set_attr "mode" "V4SF")])
409 (define_insn "sse_vmrcpv4sf2"
410 [(set (match_operand:V4SF 0 "register_operand" "=x")
412 (unspec:V4SF [(match_operand:V4SF 1 "nonimmediate_operand" "xm")]
414 (match_operand:V4SF 2 "register_operand" "0")
417 "rcpss\t{%1, %0|%0, %1}"
418 [(set_attr "type" "sse")
419 (set_attr "mode" "SF")])
421 (define_insn "sse_rsqrtv4sf2"
422 [(set (match_operand:V4SF 0 "register_operand" "=x")
424 [(match_operand:V4SF 1 "nonimmediate_operand" "xm")] UNSPEC_RSQRT))]
426 "rsqrtps\t{%1, %0|%0, %1}"
427 [(set_attr "type" "sse")
428 (set_attr "mode" "V4SF")])
430 (define_insn "sse_vmrsqrtv4sf2"
431 [(set (match_operand:V4SF 0 "register_operand" "=x")
433 (unspec:V4SF [(match_operand:V4SF 1 "nonimmediate_operand" "xm")]
435 (match_operand:V4SF 2 "register_operand" "0")
438 "rsqrtss\t{%1, %0|%0, %1}"
439 [(set_attr "type" "sse")
440 (set_attr "mode" "SF")])
442 (define_insn "sqrtv4sf2"
443 [(set (match_operand:V4SF 0 "register_operand" "=x")
444 (sqrt:V4SF (match_operand:V4SF 1 "nonimmediate_operand" "xm")))]
446 "sqrtps\t{%1, %0|%0, %1}"
447 [(set_attr "type" "sse")
448 (set_attr "mode" "V4SF")])
450 (define_insn "sse_vmsqrtv4sf2"
451 [(set (match_operand:V4SF 0 "register_operand" "=x")
453 (sqrt:V4SF (match_operand:V4SF 1 "nonimmediate_operand" "xm"))
454 (match_operand:V4SF 2 "register_operand" "0")
457 "sqrtss\t{%1, %0|%0, %1}"
458 [(set_attr "type" "sse")
459 (set_attr "mode" "SF")])
461 ;; ??? For !flag_finite_math_only, the representation with SMIN/SMAX
462 ;; isn't really correct, as those rtl operators aren't defined when
463 ;; applied to NaNs. Hopefully the optimizers won't get too smart on us.
465 (define_expand "smaxv4sf3"
466 [(set (match_operand:V4SF 0 "register_operand" "")
467 (smax:V4SF (match_operand:V4SF 1 "nonimmediate_operand" "")
468 (match_operand:V4SF 2 "nonimmediate_operand" "")))]
471 if (!flag_finite_math_only)
472 operands[1] = force_reg (V4SFmode, operands[1]);
473 ix86_fixup_binary_operands_no_copy (SMAX, V4SFmode, operands);
476 (define_insn "*smaxv4sf3_finite"
477 [(set (match_operand:V4SF 0 "register_operand" "=x")
478 (smax:V4SF (match_operand:V4SF 1 "nonimmediate_operand" "%0")
479 (match_operand:V4SF 2 "nonimmediate_operand" "xm")))]
480 "TARGET_SSE && flag_finite_math_only
481 && ix86_binary_operator_ok (SMAX, V4SFmode, operands)"
482 "maxps\t{%2, %0|%0, %2}"
483 [(set_attr "type" "sse")
484 (set_attr "mode" "V4SF")])
486 (define_insn "*smaxv4sf3"
487 [(set (match_operand:V4SF 0 "register_operand" "=x")
488 (smax:V4SF (match_operand:V4SF 1 "register_operand" "0")
489 (match_operand:V4SF 2 "nonimmediate_operand" "xm")))]
491 "maxps\t{%2, %0|%0, %2}"
492 [(set_attr "type" "sse")
493 (set_attr "mode" "V4SF")])
495 (define_insn "sse_vmsmaxv4sf3"
496 [(set (match_operand:V4SF 0 "register_operand" "=x")
498 (smax:V4SF (match_operand:V4SF 1 "register_operand" "0")
499 (match_operand:V4SF 2 "nonimmediate_operand" "xm"))
503 "maxss\t{%2, %0|%0, %2}"
504 [(set_attr "type" "sse")
505 (set_attr "mode" "SF")])
507 (define_expand "sminv4sf3"
508 [(set (match_operand:V4SF 0 "register_operand" "")
509 (smin:V4SF (match_operand:V4SF 1 "nonimmediate_operand" "")
510 (match_operand:V4SF 2 "nonimmediate_operand" "")))]
513 if (!flag_finite_math_only)
514 operands[1] = force_reg (V4SFmode, operands[1]);
515 ix86_fixup_binary_operands_no_copy (SMIN, V4SFmode, operands);
518 (define_insn "*sminv4sf3_finite"
519 [(set (match_operand:V4SF 0 "register_operand" "=x")
520 (smin:V4SF (match_operand:V4SF 1 "nonimmediate_operand" "%0")
521 (match_operand:V4SF 2 "nonimmediate_operand" "xm")))]
522 "TARGET_SSE && flag_finite_math_only
523 && ix86_binary_operator_ok (SMIN, V4SFmode, operands)"
524 "minps\t{%2, %0|%0, %2}"
525 [(set_attr "type" "sse")
526 (set_attr "mode" "V4SF")])
528 (define_insn "*sminv4sf3"
529 [(set (match_operand:V4SF 0 "register_operand" "=x")
530 (smin:V4SF (match_operand:V4SF 1 "register_operand" "0")
531 (match_operand:V4SF 2 "nonimmediate_operand" "xm")))]
533 "minps\t{%2, %0|%0, %2}"
534 [(set_attr "type" "sse")
535 (set_attr "mode" "V4SF")])
537 (define_insn "sse_vmsminv4sf3"
538 [(set (match_operand:V4SF 0 "register_operand" "=x")
540 (smin:V4SF (match_operand:V4SF 1 "register_operand" "0")
541 (match_operand:V4SF 2 "nonimmediate_operand" "xm"))
545 "minss\t{%2, %0|%0, %2}"
546 [(set_attr "type" "sse")
547 (set_attr "mode" "SF")])
549 ;; These versions of the min/max patterns implement exactly the operations
550 ;; min = (op1 < op2 ? op1 : op2)
551 ;; max = (!(op1 < op2) ? op1 : op2)
552 ;; Their operands are not commutative, and thus they may be used in the
553 ;; presence of -0.0 and NaN.
555 (define_insn "*ieee_sminv4sf3"
556 [(set (match_operand:V4SF 0 "register_operand" "=x")
557 (unspec:V4SF [(match_operand:V4SF 1 "register_operand" "0")
558 (match_operand:V4SF 2 "nonimmediate_operand" "xm")]
561 "minps\t{%2, %0|%0, %2}"
562 [(set_attr "type" "sseadd")
563 (set_attr "mode" "V4SF")])
565 (define_insn "*ieee_smaxv4sf3"
566 [(set (match_operand:V4SF 0 "register_operand" "=x")
567 (unspec:V4SF [(match_operand:V4SF 1 "register_operand" "0")
568 (match_operand:V4SF 2 "nonimmediate_operand" "xm")]
571 "maxps\t{%2, %0|%0, %2}"
572 [(set_attr "type" "sseadd")
573 (set_attr "mode" "V4SF")])
575 (define_insn "*ieee_sminv2df3"
576 [(set (match_operand:V2DF 0 "register_operand" "=x")
577 (unspec:V2DF [(match_operand:V2DF 1 "register_operand" "0")
578 (match_operand:V2DF 2 "nonimmediate_operand" "xm")]
581 "minpd\t{%2, %0|%0, %2}"
582 [(set_attr "type" "sseadd")
583 (set_attr "mode" "V2DF")])
585 (define_insn "*ieee_smaxv2df3"
586 [(set (match_operand:V2DF 0 "register_operand" "=x")
587 (unspec:V2DF [(match_operand:V2DF 1 "register_operand" "0")
588 (match_operand:V2DF 2 "nonimmediate_operand" "xm")]
591 "maxpd\t{%2, %0|%0, %2}"
592 [(set_attr "type" "sseadd")
593 (set_attr "mode" "V2DF")])
595 (define_insn "sse3_addsubv4sf3"
596 [(set (match_operand:V4SF 0 "register_operand" "=x")
599 (match_operand:V4SF 1 "register_operand" "0")
600 (match_operand:V4SF 2 "nonimmediate_operand" "xm"))
601 (minus:V4SF (match_dup 1) (match_dup 2))
604 "addsubps\t{%2, %0|%0, %2}"
605 [(set_attr "type" "sseadd")
606 (set_attr "mode" "V4SF")])
608 (define_insn "sse3_haddv4sf3"
609 [(set (match_operand:V4SF 0 "register_operand" "=x")
614 (match_operand:V4SF 1 "register_operand" "0")
615 (parallel [(const_int 0)]))
616 (vec_select:SF (match_dup 1) (parallel [(const_int 1)])))
618 (vec_select:SF (match_dup 1) (parallel [(const_int 2)]))
619 (vec_select:SF (match_dup 1) (parallel [(const_int 3)]))))
623 (match_operand:V4SF 2 "nonimmediate_operand" "xm")
624 (parallel [(const_int 0)]))
625 (vec_select:SF (match_dup 2) (parallel [(const_int 1)])))
627 (vec_select:SF (match_dup 2) (parallel [(const_int 2)]))
628 (vec_select:SF (match_dup 2) (parallel [(const_int 3)]))))))]
630 "haddps\t{%2, %0|%0, %2}"
631 [(set_attr "type" "sseadd")
632 (set_attr "mode" "V4SF")])
634 (define_insn "sse3_hsubv4sf3"
635 [(set (match_operand:V4SF 0 "register_operand" "=x")
640 (match_operand:V4SF 1 "register_operand" "0")
641 (parallel [(const_int 0)]))
642 (vec_select:SF (match_dup 1) (parallel [(const_int 1)])))
644 (vec_select:SF (match_dup 1) (parallel [(const_int 2)]))
645 (vec_select:SF (match_dup 1) (parallel [(const_int 3)]))))
649 (match_operand:V4SF 2 "nonimmediate_operand" "xm")
650 (parallel [(const_int 0)]))
651 (vec_select:SF (match_dup 2) (parallel [(const_int 1)])))
653 (vec_select:SF (match_dup 2) (parallel [(const_int 2)]))
654 (vec_select:SF (match_dup 2) (parallel [(const_int 3)]))))))]
656 "hsubps\t{%2, %0|%0, %2}"
657 [(set_attr "type" "sseadd")
658 (set_attr "mode" "V4SF")])
660 (define_expand "reduc_splus_v4sf"
661 [(match_operand:V4SF 0 "register_operand" "")
662 (match_operand:V4SF 1 "register_operand" "")]
667 rtx tmp = gen_reg_rtx (V4SFmode);
668 emit_insn (gen_sse3_haddv4sf3 (tmp, operands[1], operands[1]));
669 emit_insn (gen_sse3_haddv4sf3 (operands[0], tmp, tmp));
672 ix86_expand_reduc_v4sf (gen_addv4sf3, operands[0], operands[1]);
676 (define_expand "reduc_smax_v4sf"
677 [(match_operand:V4SF 0 "register_operand" "")
678 (match_operand:V4SF 1 "register_operand" "")]
681 ix86_expand_reduc_v4sf (gen_smaxv4sf3, operands[0], operands[1]);
685 (define_expand "reduc_smin_v4sf"
686 [(match_operand:V4SF 0 "register_operand" "")
687 (match_operand:V4SF 1 "register_operand" "")]
690 ix86_expand_reduc_v4sf (gen_sminv4sf3, operands[0], operands[1]);
694 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
696 ;; Parallel single-precision floating point comparisons
698 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
700 (define_insn "sse_maskcmpv4sf3"
701 [(set (match_operand:V4SF 0 "register_operand" "=x")
702 (match_operator:V4SF 3 "sse_comparison_operator"
703 [(match_operand:V4SF 1 "register_operand" "0")
704 (match_operand:V4SF 2 "nonimmediate_operand" "xm")]))]
706 "cmp%D3ps\t{%2, %0|%0, %2}"
707 [(set_attr "type" "ssecmp")
708 (set_attr "mode" "V4SF")])
710 (define_insn "sse_vmmaskcmpv4sf3"
711 [(set (match_operand:V4SF 0 "register_operand" "=x")
713 (match_operator:V4SF 3 "sse_comparison_operator"
714 [(match_operand:V4SF 1 "register_operand" "0")
715 (match_operand:V4SF 2 "register_operand" "x")])
719 "cmp%D3ss\t{%2, %0|%0, %2}"
720 [(set_attr "type" "ssecmp")
721 (set_attr "mode" "SF")])
723 (define_insn "sse_comi"
724 [(set (reg:CCFP FLAGS_REG)
727 (match_operand:V4SF 0 "register_operand" "x")
728 (parallel [(const_int 0)]))
730 (match_operand:V4SF 1 "nonimmediate_operand" "xm")
731 (parallel [(const_int 0)]))))]
733 "comiss\t{%1, %0|%0, %1}"
734 [(set_attr "type" "ssecomi")
735 (set_attr "mode" "SF")])
737 (define_insn "sse_ucomi"
738 [(set (reg:CCFPU FLAGS_REG)
741 (match_operand:V4SF 0 "register_operand" "x")
742 (parallel [(const_int 0)]))
744 (match_operand:V4SF 1 "nonimmediate_operand" "xm")
745 (parallel [(const_int 0)]))))]
747 "ucomiss\t{%1, %0|%0, %1}"
748 [(set_attr "type" "ssecomi")
749 (set_attr "mode" "SF")])
751 (define_expand "vcondv4sf"
752 [(set (match_operand:V4SF 0 "register_operand" "")
755 [(match_operand:V4SF 4 "nonimmediate_operand" "")
756 (match_operand:V4SF 5 "nonimmediate_operand" "")])
757 (match_operand:V4SF 1 "general_operand" "")
758 (match_operand:V4SF 2 "general_operand" "")))]
761 if (ix86_expand_fp_vcond (operands))
767 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
769 ;; Parallel single-precision floating point logical operations
771 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
773 (define_expand "andv4sf3"
774 [(set (match_operand:V4SF 0 "register_operand" "")
775 (and:V4SF (match_operand:V4SF 1 "nonimmediate_operand" "")
776 (match_operand:V4SF 2 "nonimmediate_operand" "")))]
778 "ix86_fixup_binary_operands_no_copy (AND, V4SFmode, operands);")
780 (define_insn "*andv4sf3"
781 [(set (match_operand:V4SF 0 "register_operand" "=x")
782 (and:V4SF (match_operand:V4SF 1 "nonimmediate_operand" "%0")
783 (match_operand:V4SF 2 "nonimmediate_operand" "xm")))]
784 "TARGET_SSE && ix86_binary_operator_ok (AND, V4SFmode, operands)"
785 "andps\t{%2, %0|%0, %2}"
786 [(set_attr "type" "sselog")
787 (set_attr "mode" "V4SF")])
789 (define_insn "sse_nandv4sf3"
790 [(set (match_operand:V4SF 0 "register_operand" "=x")
791 (and:V4SF (not:V4SF (match_operand:V4SF 1 "register_operand" "0"))
792 (match_operand:V4SF 2 "nonimmediate_operand" "xm")))]
794 "andnps\t{%2, %0|%0, %2}"
795 [(set_attr "type" "sselog")
796 (set_attr "mode" "V4SF")])
798 (define_expand "iorv4sf3"
799 [(set (match_operand:V4SF 0 "register_operand" "")
800 (ior:V4SF (match_operand:V4SF 1 "nonimmediate_operand" "")
801 (match_operand:V4SF 2 "nonimmediate_operand" "")))]
803 "ix86_fixup_binary_operands_no_copy (IOR, V4SFmode, operands);")
805 (define_insn "*iorv4sf3"
806 [(set (match_operand:V4SF 0 "register_operand" "=x")
807 (ior:V4SF (match_operand:V4SF 1 "nonimmediate_operand" "%0")
808 (match_operand:V4SF 2 "nonimmediate_operand" "xm")))]
809 "TARGET_SSE && ix86_binary_operator_ok (IOR, V4SFmode, operands)"
810 "orps\t{%2, %0|%0, %2}"
811 [(set_attr "type" "sselog")
812 (set_attr "mode" "V4SF")])
814 (define_expand "xorv4sf3"
815 [(set (match_operand:V4SF 0 "register_operand" "")
816 (xor:V4SF (match_operand:V4SF 1 "nonimmediate_operand" "")
817 (match_operand:V4SF 2 "nonimmediate_operand" "")))]
819 "ix86_fixup_binary_operands_no_copy (XOR, V4SFmode, operands);")
821 (define_insn "*xorv4sf3"
822 [(set (match_operand:V4SF 0 "register_operand" "=x")
823 (xor:V4SF (match_operand:V4SF 1 "nonimmediate_operand" "%0")
824 (match_operand:V4SF 2 "nonimmediate_operand" "xm")))]
825 "TARGET_SSE && ix86_binary_operator_ok (XOR, V4SFmode, operands)"
826 "xorps\t{%2, %0|%0, %2}"
827 [(set_attr "type" "sselog")
828 (set_attr "mode" "V4SF")])
830 ;; Also define scalar versions. These are used for abs, neg, and
831 ;; conditional move. Using subregs into vector modes causes register
832 ;; allocation lossage. These patterns do not allow memory operands
833 ;; because the native instructions read the full 128-bits.
835 (define_insn "*andsf3"
836 [(set (match_operand:SF 0 "register_operand" "=x")
837 (and:SF (match_operand:SF 1 "register_operand" "0")
838 (match_operand:SF 2 "register_operand" "x")))]
840 "andps\t{%2, %0|%0, %2}"
841 [(set_attr "type" "sselog")
842 (set_attr "mode" "V4SF")])
844 (define_insn "*nandsf3"
845 [(set (match_operand:SF 0 "register_operand" "=x")
846 (and:SF (not:SF (match_operand:SF 1 "register_operand" "0"))
847 (match_operand:SF 2 "register_operand" "x")))]
849 "andnps\t{%2, %0|%0, %2}"
850 [(set_attr "type" "sselog")
851 (set_attr "mode" "V4SF")])
853 (define_insn "*iorsf3"
854 [(set (match_operand:SF 0 "register_operand" "=x")
855 (ior:SF (match_operand:SF 1 "register_operand" "0")
856 (match_operand:SF 2 "register_operand" "x")))]
858 "orps\t{%2, %0|%0, %2}"
859 [(set_attr "type" "sselog")
860 (set_attr "mode" "V4SF")])
862 (define_insn "*xorsf3"
863 [(set (match_operand:SF 0 "register_operand" "=x")
864 (xor:SF (match_operand:SF 1 "register_operand" "0")
865 (match_operand:SF 2 "register_operand" "x")))]
867 "xorps\t{%2, %0|%0, %2}"
868 [(set_attr "type" "sselog")
869 (set_attr "mode" "V4SF")])
871 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
873 ;; Parallel single-precision floating point conversion operations
875 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
877 (define_insn "sse_cvtpi2ps"
878 [(set (match_operand:V4SF 0 "register_operand" "=x")
881 (float:V2SF (match_operand:V2SI 2 "nonimmediate_operand" "ym")))
882 (match_operand:V4SF 1 "register_operand" "0")
885 "cvtpi2ps\t{%2, %0|%0, %2}"
886 [(set_attr "type" "ssecvt")
887 (set_attr "mode" "V4SF")])
889 (define_insn "sse_cvtps2pi"
890 [(set (match_operand:V2SI 0 "register_operand" "=y")
892 (unspec:V4SI [(match_operand:V4SF 1 "nonimmediate_operand" "xm")]
894 (parallel [(const_int 0) (const_int 1)])))]
896 "cvtps2pi\t{%1, %0|%0, %1}"
897 [(set_attr "type" "ssecvt")
898 (set_attr "unit" "mmx")
899 (set_attr "mode" "DI")])
901 (define_insn "sse_cvttps2pi"
902 [(set (match_operand:V2SI 0 "register_operand" "=y")
904 (fix:V4SI (match_operand:V4SF 1 "nonimmediate_operand" "xm"))
905 (parallel [(const_int 0) (const_int 1)])))]
907 "cvttps2pi\t{%1, %0|%0, %1}"
908 [(set_attr "type" "ssecvt")
909 (set_attr "unit" "mmx")
910 (set_attr "mode" "SF")])
912 (define_insn "sse_cvtsi2ss"
913 [(set (match_operand:V4SF 0 "register_operand" "=x,x")
916 (float:SF (match_operand:SI 2 "nonimmediate_operand" "r,m")))
917 (match_operand:V4SF 1 "register_operand" "0,0")
920 "cvtsi2ss\t{%2, %0|%0, %2}"
921 [(set_attr "type" "sseicvt")
922 (set_attr "athlon_decode" "vector,double")
923 (set_attr "mode" "SF")])
925 (define_insn "sse_cvtsi2ssq"
926 [(set (match_operand:V4SF 0 "register_operand" "=x,x")
929 (float:SF (match_operand:DI 2 "nonimmediate_operand" "r,rm")))
930 (match_operand:V4SF 1 "register_operand" "0,0")
932 "TARGET_SSE && TARGET_64BIT"
933 "cvtsi2ssq\t{%2, %0|%0, %2}"
934 [(set_attr "type" "sseicvt")
935 (set_attr "athlon_decode" "vector,double")
936 (set_attr "mode" "SF")])
938 (define_insn "sse_cvtss2si"
939 [(set (match_operand:SI 0 "register_operand" "=r,r")
942 (match_operand:V4SF 1 "nonimmediate_operand" "x,m")
943 (parallel [(const_int 0)]))]
944 UNSPEC_FIX_NOTRUNC))]
946 "cvtss2si\t{%1, %0|%0, %1}"
947 [(set_attr "type" "sseicvt")
948 (set_attr "athlon_decode" "double,vector")
949 (set_attr "mode" "SI")])
951 (define_insn "sse_cvtss2siq"
952 [(set (match_operand:DI 0 "register_operand" "=r,r")
955 (match_operand:V4SF 1 "nonimmediate_operand" "x,m")
956 (parallel [(const_int 0)]))]
957 UNSPEC_FIX_NOTRUNC))]
958 "TARGET_SSE && TARGET_64BIT"
959 "cvtss2siq\t{%1, %0|%0, %1}"
960 [(set_attr "type" "sseicvt")
961 (set_attr "athlon_decode" "double,vector")
962 (set_attr "mode" "DI")])
964 (define_insn "sse_cvttss2si"
965 [(set (match_operand:SI 0 "register_operand" "=r,r")
968 (match_operand:V4SF 1 "nonimmediate_operand" "x,m")
969 (parallel [(const_int 0)]))))]
971 "cvttss2si\t{%1, %0|%0, %1}"
972 [(set_attr "type" "sseicvt")
973 (set_attr "athlon_decode" "double,vector")
974 (set_attr "mode" "SI")])
976 (define_insn "sse_cvttss2siq"
977 [(set (match_operand:DI 0 "register_operand" "=r,r")
980 (match_operand:V4SF 1 "nonimmediate_operand" "x,m")
981 (parallel [(const_int 0)]))))]
982 "TARGET_SSE && TARGET_64BIT"
983 "cvttss2siq\t{%1, %0|%0, %1}"
984 [(set_attr "type" "sseicvt")
985 (set_attr "athlon_decode" "double,vector")
986 (set_attr "mode" "DI")])
988 (define_insn "sse2_cvtdq2ps"
989 [(set (match_operand:V4SF 0 "register_operand" "=x")
990 (float:V4SF (match_operand:V4SI 1 "nonimmediate_operand" "xm")))]
992 "cvtdq2ps\t{%1, %0|%0, %1}"
993 [(set_attr "type" "ssecvt")
994 (set_attr "mode" "V2DF")])
996 (define_insn "sse2_cvtps2dq"
997 [(set (match_operand:V4SI 0 "register_operand" "=x")
998 (unspec:V4SI [(match_operand:V4SF 1 "nonimmediate_operand" "xm")]
999 UNSPEC_FIX_NOTRUNC))]
1001 "cvtps2dq\t{%1, %0|%0, %1}"
1002 [(set_attr "type" "ssecvt")
1003 (set_attr "mode" "TI")])
1005 (define_insn "sse2_cvttps2dq"
1006 [(set (match_operand:V4SI 0 "register_operand" "=x")
1007 (fix:V4SI (match_operand:V4SF 1 "nonimmediate_operand" "xm")))]
1009 "cvttps2dq\t{%1, %0|%0, %1}"
1010 [(set_attr "type" "ssecvt")
1011 (set_attr "mode" "TI")])
1013 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1015 ;; Parallel single-precision floating point element swizzling
1017 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1019 (define_insn "sse_movhlps"
1020 [(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,x,m")
1023 (match_operand:V4SF 1 "nonimmediate_operand" " 0,0,0")
1024 (match_operand:V4SF 2 "nonimmediate_operand" " x,o,x"))
1025 (parallel [(const_int 6)
1029 "TARGET_SSE && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
1031 movhlps\t{%2, %0|%0, %2}
1032 movlps\t{%H2, %0|%0, %H2}
1033 movhps\t{%2, %0|%0, %2}"
1034 [(set_attr "type" "ssemov")
1035 (set_attr "mode" "V4SF,V2SF,V2SF")])
1037 (define_insn "sse_movlhps"
1038 [(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,x,o")
1041 (match_operand:V4SF 1 "nonimmediate_operand" " 0,0,0")
1042 (match_operand:V4SF 2 "nonimmediate_operand" " x,m,x"))
1043 (parallel [(const_int 0)
1047 "TARGET_SSE && ix86_binary_operator_ok (UNKNOWN, V4SFmode, operands)"
1049 movlhps\t{%2, %0|%0, %2}
1050 movhps\t{%2, %0|%0, %2}
1051 movlps\t{%2, %H0|%H0, %2}"
1052 [(set_attr "type" "ssemov")
1053 (set_attr "mode" "V4SF,V2SF,V2SF")])
1055 (define_insn "sse_unpckhps"
1056 [(set (match_operand:V4SF 0 "register_operand" "=x")
1059 (match_operand:V4SF 1 "register_operand" "0")
1060 (match_operand:V4SF 2 "nonimmediate_operand" "xm"))
1061 (parallel [(const_int 2) (const_int 6)
1062 (const_int 3) (const_int 7)])))]
1064 "unpckhps\t{%2, %0|%0, %2}"
1065 [(set_attr "type" "sselog")
1066 (set_attr "mode" "V4SF")])
1068 (define_insn "sse_unpcklps"
1069 [(set (match_operand:V4SF 0 "register_operand" "=x")
1072 (match_operand:V4SF 1 "register_operand" "0")
1073 (match_operand:V4SF 2 "nonimmediate_operand" "xm"))
1074 (parallel [(const_int 0) (const_int 4)
1075 (const_int 1) (const_int 5)])))]
1077 "unpcklps\t{%2, %0|%0, %2}"
1078 [(set_attr "type" "sselog")
1079 (set_attr "mode" "V4SF")])
1081 ;; These are modeled with the same vec_concat as the others so that we
1082 ;; capture users of shufps that can use the new instructions
1083 (define_insn "sse3_movshdup"
1084 [(set (match_operand:V4SF 0 "register_operand" "=x")
1087 (match_operand:V4SF 1 "nonimmediate_operand" "xm")
1089 (parallel [(const_int 1)
1094 "movshdup\t{%1, %0|%0, %1}"
1095 [(set_attr "type" "sse")
1096 (set_attr "mode" "V4SF")])
1098 (define_insn "sse3_movsldup"
1099 [(set (match_operand:V4SF 0 "register_operand" "=x")
1102 (match_operand:V4SF 1 "nonimmediate_operand" "xm")
1104 (parallel [(const_int 0)
1109 "movsldup\t{%1, %0|%0, %1}"
1110 [(set_attr "type" "sse")
1111 (set_attr "mode" "V4SF")])
1113 (define_expand "sse_shufps"
1114 [(match_operand:V4SF 0 "register_operand" "")
1115 (match_operand:V4SF 1 "register_operand" "")
1116 (match_operand:V4SF 2 "nonimmediate_operand" "")
1117 (match_operand:SI 3 "const_int_operand" "")]
1120 int mask = INTVAL (operands[3]);
1121 emit_insn (gen_sse_shufps_1 (operands[0], operands[1], operands[2],
1122 GEN_INT ((mask >> 0) & 3),
1123 GEN_INT ((mask >> 2) & 3),
1124 GEN_INT (((mask >> 4) & 3) + 4),
1125 GEN_INT (((mask >> 6) & 3) + 4)));
1129 (define_insn "sse_shufps_1"
1130 [(set (match_operand:V4SF 0 "register_operand" "=x")
1133 (match_operand:V4SF 1 "register_operand" "0")
1134 (match_operand:V4SF 2 "nonimmediate_operand" "xm"))
1135 (parallel [(match_operand 3 "const_0_to_3_operand" "")
1136 (match_operand 4 "const_0_to_3_operand" "")
1137 (match_operand 5 "const_4_to_7_operand" "")
1138 (match_operand 6 "const_4_to_7_operand" "")])))]
1142 mask |= INTVAL (operands[3]) << 0;
1143 mask |= INTVAL (operands[4]) << 2;
1144 mask |= (INTVAL (operands[5]) - 4) << 4;
1145 mask |= (INTVAL (operands[6]) - 4) << 6;
1146 operands[3] = GEN_INT (mask);
1148 return "shufps\t{%3, %2, %0|%0, %2, %3}";
1150 [(set_attr "type" "sselog")
1151 (set_attr "mode" "V4SF")])
1153 (define_insn "sse_storehps"
1154 [(set (match_operand:V2SF 0 "nonimmediate_operand" "=m,x,x")
1156 (match_operand:V4SF 1 "nonimmediate_operand" "x,x,o")
1157 (parallel [(const_int 2) (const_int 3)])))]
1160 movhps\t{%1, %0|%0, %1}
1161 movhlps\t{%1, %0|%0, %1}
1162 movlps\t{%H1, %0|%0, %H1}"
1163 [(set_attr "type" "ssemov")
1164 (set_attr "mode" "V2SF,V4SF,V2SF")])
1166 (define_insn "sse_loadhps"
1167 [(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,x,o")
1170 (match_operand:V4SF 1 "nonimmediate_operand" "0,0,0")
1171 (parallel [(const_int 0) (const_int 1)]))
1172 (match_operand:V2SF 2 "nonimmediate_operand" "m,x,x")))]
1175 movhps\t{%2, %0|%0, %2}
1176 movlhps\t{%2, %0|%0, %2}
1177 movlps\t{%2, %H0|%H0, %2}"
1178 [(set_attr "type" "ssemov")
1179 (set_attr "mode" "V2SF,V4SF,V2SF")])
1181 (define_insn "sse_storelps"
1182 [(set (match_operand:V2SF 0 "nonimmediate_operand" "=m,x,x")
1184 (match_operand:V4SF 1 "nonimmediate_operand" "x,x,m")
1185 (parallel [(const_int 0) (const_int 1)])))]
1188 movlps\t{%1, %0|%0, %1}
1189 movaps\t{%1, %0|%0, %1}
1190 movlps\t{%1, %0|%0, %1}"
1191 [(set_attr "type" "ssemov")
1192 (set_attr "mode" "V2SF,V4SF,V2SF")])
1194 (define_insn "sse_loadlps"
1195 [(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,x,m")
1197 (match_operand:V2SF 2 "nonimmediate_operand" "0,m,x")
1199 (match_operand:V4SF 1 "nonimmediate_operand" "x,0,0")
1200 (parallel [(const_int 2) (const_int 3)]))))]
1203 shufps\t{$0xe4, %1, %0|%0, %1, 0xe4}
1204 movlps\t{%2, %0|%0, %2}
1205 movlps\t{%2, %0|%0, %2}"
1206 [(set_attr "type" "sselog,ssemov,ssemov")
1207 (set_attr "mode" "V4SF,V2SF,V2SF")])
1209 (define_insn "sse_movss"
1210 [(set (match_operand:V4SF 0 "register_operand" "=x")
1212 (match_operand:V4SF 2 "register_operand" "x")
1213 (match_operand:V4SF 1 "register_operand" "0")
1216 "movss\t{%2, %0|%0, %2}"
1217 [(set_attr "type" "ssemov")
1218 (set_attr "mode" "SF")])
1220 (define_insn "*vec_dupv4sf"
1221 [(set (match_operand:V4SF 0 "register_operand" "=x")
1223 (match_operand:SF 1 "register_operand" "0")))]
1225 "shufps\t{$0, %0, %0|%0, %0, 0}"
1226 [(set_attr "type" "sselog1")
1227 (set_attr "mode" "V4SF")])
1229 ;; ??? In theory we can match memory for the MMX alternative, but allowing
1230 ;; nonimmediate_operand for operand 2 and *not* allowing memory for the SSE
1231 ;; alternatives pretty much forces the MMX alternative to be chosen.
1232 (define_insn "*sse_concatv2sf"
1233 [(set (match_operand:V2SF 0 "register_operand" "=x,x,*y,*y")
1235 (match_operand:SF 1 "nonimmediate_operand" " 0,m, 0, m")
1236 (match_operand:SF 2 "reg_or_0_operand" " x,C,*y, C")))]
1239 unpcklps\t{%2, %0|%0, %2}
1240 movss\t{%1, %0|%0, %1}
1241 punpckldq\t{%2, %0|%0, %2}
1242 movd\t{%1, %0|%0, %1}"
1243 [(set_attr "type" "sselog,ssemov,mmxcvt,mmxmov")
1244 (set_attr "mode" "V4SF,SF,DI,DI")])
1246 (define_insn "*sse_concatv4sf"
1247 [(set (match_operand:V4SF 0 "register_operand" "=x,x")
1249 (match_operand:V2SF 1 "register_operand" " 0,0")
1250 (match_operand:V2SF 2 "nonimmediate_operand" " x,m")))]
1253 movlhps\t{%2, %0|%0, %2}
1254 movhps\t{%2, %0|%0, %2}"
1255 [(set_attr "type" "ssemov")
1256 (set_attr "mode" "V4SF,V2SF")])
1258 (define_expand "vec_initv4sf"
1259 [(match_operand:V4SF 0 "register_operand" "")
1260 (match_operand 1 "" "")]
1263 ix86_expand_vector_init (false, operands[0], operands[1]);
1267 (define_insn "*vec_setv4sf_0"
1268 [(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,x,Y ,m")
1271 (match_operand:SF 2 "general_operand" " x,m,*r,x*rfF"))
1272 (match_operand:V4SF 1 "vector_move_operand" " 0,C,C ,0")
1276 movss\t{%2, %0|%0, %2}
1277 movss\t{%2, %0|%0, %2}
1278 movd\t{%2, %0|%0, %2}
1280 [(set_attr "type" "ssemov")
1281 (set_attr "mode" "SF")])
1284 [(set (match_operand:V4SF 0 "memory_operand" "")
1287 (match_operand:SF 1 "nonmemory_operand" ""))
1290 "TARGET_SSE && reload_completed"
1293 emit_move_insn (adjust_address (operands[0], SFmode, 0), operands[1]);
1297 (define_expand "vec_setv4sf"
1298 [(match_operand:V4SF 0 "register_operand" "")
1299 (match_operand:SF 1 "register_operand" "")
1300 (match_operand 2 "const_int_operand" "")]
1303 ix86_expand_vector_set (false, operands[0], operands[1],
1304 INTVAL (operands[2]));
1308 (define_insn_and_split "*vec_extractv4sf_0"
1309 [(set (match_operand:SF 0 "nonimmediate_operand" "=x,m,fr")
1311 (match_operand:V4SF 1 "nonimmediate_operand" "xm,x,m")
1312 (parallel [(const_int 0)])))]
1313 "TARGET_SSE && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
1315 "&& reload_completed"
1318 rtx op1 = operands[1];
1320 op1 = gen_rtx_REG (SFmode, REGNO (op1));
1322 op1 = gen_lowpart (SFmode, op1);
1323 emit_move_insn (operands[0], op1);
1327 (define_expand "vec_extractv4sf"
1328 [(match_operand:SF 0 "register_operand" "")
1329 (match_operand:V4SF 1 "register_operand" "")
1330 (match_operand 2 "const_int_operand" "")]
1333 ix86_expand_vector_extract (false, operands[0], operands[1],
1334 INTVAL (operands[2]));
1338 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1340 ;; Parallel double-precision floating point arithmetic
1342 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1344 (define_expand "negv2df2"
1345 [(set (match_operand:V2DF 0 "register_operand" "")
1346 (neg:V2DF (match_operand:V2DF 1 "nonimmediate_operand" "")))]
1348 "ix86_expand_fp_absneg_operator (NEG, V2DFmode, operands); DONE;")
1350 (define_expand "absv2df2"
1351 [(set (match_operand:V2DF 0 "register_operand" "")
1352 (abs:V2DF (match_operand:V2DF 1 "nonimmediate_operand" "")))]
1354 "ix86_expand_fp_absneg_operator (ABS, V2DFmode, operands); DONE;")
1356 (define_expand "addv2df3"
1357 [(set (match_operand:V2DF 0 "register_operand" "")
1358 (plus:V2DF (match_operand:V2DF 1 "nonimmediate_operand" "")
1359 (match_operand:V2DF 2 "nonimmediate_operand" "")))]
1361 "ix86_fixup_binary_operands_no_copy (PLUS, V2DFmode, operands);")
1363 (define_insn "*addv2df3"
1364 [(set (match_operand:V2DF 0 "register_operand" "=x")
1365 (plus:V2DF (match_operand:V2DF 1 "nonimmediate_operand" "%0")
1366 (match_operand:V2DF 2 "nonimmediate_operand" "xm")))]
1367 "TARGET_SSE2 && ix86_binary_operator_ok (PLUS, V2DFmode, operands)"
1368 "addpd\t{%2, %0|%0, %2}"
1369 [(set_attr "type" "sseadd")
1370 (set_attr "mode" "V2DF")])
1372 (define_insn "sse2_vmaddv2df3"
1373 [(set (match_operand:V2DF 0 "register_operand" "=x")
1375 (plus:V2DF (match_operand:V2DF 1 "register_operand" "0")
1376 (match_operand:V2DF 2 "nonimmediate_operand" "xm"))
1379 "TARGET_SSE2 && ix86_binary_operator_ok (PLUS, V4SFmode, operands)"
1380 "addsd\t{%2, %0|%0, %2}"
1381 [(set_attr "type" "sseadd")
1382 (set_attr "mode" "DF")])
1384 (define_expand "subv2df3"
1385 [(set (match_operand:V2DF 0 "register_operand" "")
1386 (minus:V2DF (match_operand:V2DF 1 "nonimmediate_operand" "")
1387 (match_operand:V2DF 2 "nonimmediate_operand" "")))]
1389 "ix86_fixup_binary_operands_no_copy (MINUS, V2DFmode, operands);")
1391 (define_insn "*subv2df3"
1392 [(set (match_operand:V2DF 0 "register_operand" "=x")
1393 (minus:V2DF (match_operand:V2DF 1 "register_operand" "0")
1394 (match_operand:V2DF 2 "nonimmediate_operand" "xm")))]
1396 "subpd\t{%2, %0|%0, %2}"
1397 [(set_attr "type" "sseadd")
1398 (set_attr "mode" "V2DF")])
1400 (define_insn "sse2_vmsubv2df3"
1401 [(set (match_operand:V2DF 0 "register_operand" "=x")
1403 (minus:V2DF (match_operand:V2DF 1 "register_operand" "0")
1404 (match_operand:V2DF 2 "nonimmediate_operand" "xm"))
1408 "subsd\t{%2, %0|%0, %2}"
1409 [(set_attr "type" "sseadd")
1410 (set_attr "mode" "DF")])
1412 (define_expand "mulv2df3"
1413 [(set (match_operand:V2DF 0 "register_operand" "")
1414 (mult:V2DF (match_operand:V2DF 1 "nonimmediate_operand" "")
1415 (match_operand:V2DF 2 "nonimmediate_operand" "")))]
1417 "ix86_fixup_binary_operands_no_copy (MULT, V2DFmode, operands);")
1419 (define_insn "*mulv2df3"
1420 [(set (match_operand:V2DF 0 "register_operand" "=x")
1421 (mult:V2DF (match_operand:V2DF 1 "nonimmediate_operand" "%0")
1422 (match_operand:V2DF 2 "nonimmediate_operand" "xm")))]
1423 "TARGET_SSE2 && ix86_binary_operator_ok (MULT, V2DFmode, operands)"
1424 "mulpd\t{%2, %0|%0, %2}"
1425 [(set_attr "type" "ssemul")
1426 (set_attr "mode" "V2DF")])
1428 (define_insn "sse2_vmmulv2df3"
1429 [(set (match_operand:V2DF 0 "register_operand" "=x")
1431 (mult:V2DF (match_operand:V2DF 1 "register_operand" "0")
1432 (match_operand:V2DF 2 "nonimmediate_operand" "xm"))
1435 "TARGET_SSE2 && ix86_binary_operator_ok (MULT, V2DFmode, operands)"
1436 "mulsd\t{%2, %0|%0, %2}"
1437 [(set_attr "type" "ssemul")
1438 (set_attr "mode" "DF")])
1440 (define_expand "divv2df3"
1441 [(set (match_operand:V2DF 0 "register_operand" "")
1442 (div:V2DF (match_operand:V2DF 1 "register_operand" "")
1443 (match_operand:V2DF 2 "nonimmediate_operand" "")))]
1445 "ix86_fixup_binary_operands_no_copy (DIV, V2DFmode, operands);")
1447 (define_insn "*divv2df3"
1448 [(set (match_operand:V2DF 0 "register_operand" "=x")
1449 (div:V2DF (match_operand:V2DF 1 "register_operand" "0")
1450 (match_operand:V2DF 2 "nonimmediate_operand" "xm")))]
1452 "divpd\t{%2, %0|%0, %2}"
1453 [(set_attr "type" "ssediv")
1454 (set_attr "mode" "V2DF")])
1456 (define_insn "sse2_vmdivv2df3"
1457 [(set (match_operand:V2DF 0 "register_operand" "=x")
1459 (div:V2DF (match_operand:V2DF 1 "register_operand" "0")
1460 (match_operand:V2DF 2 "nonimmediate_operand" "xm"))
1464 "divsd\t{%2, %0|%0, %2}"
1465 [(set_attr "type" "ssediv")
1466 (set_attr "mode" "DF")])
1468 (define_insn "sqrtv2df2"
1469 [(set (match_operand:V2DF 0 "register_operand" "=x")
1470 (sqrt:V2DF (match_operand:V2DF 1 "nonimmediate_operand" "xm")))]
1472 "sqrtpd\t{%1, %0|%0, %1}"
1473 [(set_attr "type" "sse")
1474 (set_attr "mode" "V2DF")])
1476 (define_insn "sse2_vmsqrtv2df2"
1477 [(set (match_operand:V2DF 0 "register_operand" "=x")
1479 (sqrt:V2DF (match_operand:V2DF 1 "register_operand" "xm"))
1480 (match_operand:V2DF 2 "register_operand" "0")
1483 "sqrtsd\t{%1, %0|%0, %1}"
1484 [(set_attr "type" "sse")
1485 (set_attr "mode" "DF")])
1487 ;; ??? For !flag_finite_math_only, the representation with SMIN/SMAX
1488 ;; isn't really correct, as those rtl operators aren't defined when
1489 ;; applied to NaNs. Hopefully the optimizers won't get too smart on us.
1491 (define_expand "smaxv2df3"
1492 [(set (match_operand:V2DF 0 "register_operand" "")
1493 (smax:V2DF (match_operand:V2DF 1 "nonimmediate_operand" "")
1494 (match_operand:V2DF 2 "nonimmediate_operand" "")))]
1497 if (!flag_finite_math_only)
1498 operands[1] = force_reg (V2DFmode, operands[1]);
1499 ix86_fixup_binary_operands_no_copy (SMAX, V2DFmode, operands);
1502 (define_insn "*smaxv2df3_finite"
1503 [(set (match_operand:V2DF 0 "register_operand" "=x")
1504 (smax:V2DF (match_operand:V2DF 1 "nonimmediate_operand" "%0")
1505 (match_operand:V2DF 2 "nonimmediate_operand" "xm")))]
1506 "TARGET_SSE2 && flag_finite_math_only
1507 && ix86_binary_operator_ok (SMAX, V2DFmode, operands)"
1508 "maxpd\t{%2, %0|%0, %2}"
1509 [(set_attr "type" "sseadd")
1510 (set_attr "mode" "V2DF")])
1512 (define_insn "*smaxv2df3"
1513 [(set (match_operand:V2DF 0 "register_operand" "=x")
1514 (smax:V2DF (match_operand:V2DF 1 "register_operand" "0")
1515 (match_operand:V2DF 2 "nonimmediate_operand" "xm")))]
1517 "maxpd\t{%2, %0|%0, %2}"
1518 [(set_attr "type" "sseadd")
1519 (set_attr "mode" "V2DF")])
1521 (define_insn "sse2_vmsmaxv2df3"
1522 [(set (match_operand:V2DF 0 "register_operand" "=x")
1524 (smax:V2DF (match_operand:V2DF 1 "register_operand" "0")
1525 (match_operand:V2DF 2 "nonimmediate_operand" "xm"))
1529 "maxsd\t{%2, %0|%0, %2}"
1530 [(set_attr "type" "sseadd")
1531 (set_attr "mode" "DF")])
1533 (define_expand "sminv2df3"
1534 [(set (match_operand:V2DF 0 "register_operand" "")
1535 (smin:V2DF (match_operand:V2DF 1 "nonimmediate_operand" "")
1536 (match_operand:V2DF 2 "nonimmediate_operand" "")))]
1539 if (!flag_finite_math_only)
1540 operands[1] = force_reg (V2DFmode, operands[1]);
1541 ix86_fixup_binary_operands_no_copy (SMIN, V2DFmode, operands);
1544 (define_insn "*sminv2df3_finite"
1545 [(set (match_operand:V2DF 0 "register_operand" "=x")
1546 (smin:V2DF (match_operand:V2DF 1 "nonimmediate_operand" "%0")
1547 (match_operand:V2DF 2 "nonimmediate_operand" "xm")))]
1548 "TARGET_SSE2 && flag_finite_math_only
1549 && ix86_binary_operator_ok (SMIN, V2DFmode, operands)"
1550 "minpd\t{%2, %0|%0, %2}"
1551 [(set_attr "type" "sseadd")
1552 (set_attr "mode" "V2DF")])
1554 (define_insn "*sminv2df3"
1555 [(set (match_operand:V2DF 0 "register_operand" "=x")
1556 (smin:V2DF (match_operand:V2DF 1 "register_operand" "0")
1557 (match_operand:V2DF 2 "nonimmediate_operand" "xm")))]
1559 "minpd\t{%2, %0|%0, %2}"
1560 [(set_attr "type" "sseadd")
1561 (set_attr "mode" "V2DF")])
1563 (define_insn "sse2_vmsminv2df3"
1564 [(set (match_operand:V2DF 0 "register_operand" "=x")
1566 (smin:V2DF (match_operand:V2DF 1 "register_operand" "0")
1567 (match_operand:V2DF 2 "nonimmediate_operand" "xm"))
1571 "minsd\t{%2, %0|%0, %2}"
1572 [(set_attr "type" "sseadd")
1573 (set_attr "mode" "DF")])
1575 (define_insn "sse3_addsubv2df3"
1576 [(set (match_operand:V2DF 0 "register_operand" "=x")
1579 (match_operand:V2DF 1 "register_operand" "0")
1580 (match_operand:V2DF 2 "nonimmediate_operand" "xm"))
1581 (minus:V2DF (match_dup 1) (match_dup 2))
1584 "addsubpd\t{%2, %0|%0, %2}"
1585 [(set_attr "type" "sseadd")
1586 (set_attr "mode" "V2DF")])
1588 (define_insn "sse3_haddv2df3"
1589 [(set (match_operand:V2DF 0 "register_operand" "=x")
1593 (match_operand:V2DF 1 "register_operand" "0")
1594 (parallel [(const_int 0)]))
1595 (vec_select:DF (match_dup 1) (parallel [(const_int 1)])))
1598 (match_operand:V2DF 2 "nonimmediate_operand" "xm")
1599 (parallel [(const_int 0)]))
1600 (vec_select:DF (match_dup 2) (parallel [(const_int 1)])))))]
1602 "haddpd\t{%2, %0|%0, %2}"
1603 [(set_attr "type" "sseadd")
1604 (set_attr "mode" "V2DF")])
1606 (define_insn "sse3_hsubv2df3"
1607 [(set (match_operand:V2DF 0 "register_operand" "=x")
1611 (match_operand:V2DF 1 "register_operand" "0")
1612 (parallel [(const_int 0)]))
1613 (vec_select:DF (match_dup 1) (parallel [(const_int 1)])))
1616 (match_operand:V2DF 2 "nonimmediate_operand" "xm")
1617 (parallel [(const_int 0)]))
1618 (vec_select:DF (match_dup 2) (parallel [(const_int 1)])))))]
1620 "hsubpd\t{%2, %0|%0, %2}"
1621 [(set_attr "type" "sseadd")
1622 (set_attr "mode" "V2DF")])
1624 (define_expand "reduc_splus_v2df"
1625 [(match_operand:V2DF 0 "register_operand" "")
1626 (match_operand:V2DF 1 "register_operand" "")]
1629 emit_insn (gen_sse3_haddv2df3 (operands[0], operands[1], operands[1]));
1633 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1635 ;; Parallel double-precision floating point comparisons
1637 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1639 (define_insn "sse2_maskcmpv2df3"
1640 [(set (match_operand:V2DF 0 "register_operand" "=x")
1641 (match_operator:V2DF 3 "sse_comparison_operator"
1642 [(match_operand:V2DF 1 "register_operand" "0")
1643 (match_operand:V2DF 2 "nonimmediate_operand" "xm")]))]
1645 "cmp%D3pd\t{%2, %0|%0, %2}"
1646 [(set_attr "type" "ssecmp")
1647 (set_attr "mode" "V2DF")])
1649 (define_insn "sse2_vmmaskcmpv2df3"
1650 [(set (match_operand:V2DF 0 "register_operand" "=x")
1652 (match_operator:V2DF 3 "sse_comparison_operator"
1653 [(match_operand:V2DF 1 "register_operand" "0")
1654 (match_operand:V2DF 2 "nonimmediate_operand" "xm")])
1658 "cmp%D3sd\t{%2, %0|%0, %2}"
1659 [(set_attr "type" "ssecmp")
1660 (set_attr "mode" "DF")])
1662 (define_insn "sse2_comi"
1663 [(set (reg:CCFP FLAGS_REG)
1666 (match_operand:V2DF 0 "register_operand" "x")
1667 (parallel [(const_int 0)]))
1669 (match_operand:V2DF 1 "nonimmediate_operand" "xm")
1670 (parallel [(const_int 0)]))))]
1672 "comisd\t{%1, %0|%0, %1}"
1673 [(set_attr "type" "ssecomi")
1674 (set_attr "mode" "DF")])
1676 (define_insn "sse2_ucomi"
1677 [(set (reg:CCFPU FLAGS_REG)
1680 (match_operand:V2DF 0 "register_operand" "x")
1681 (parallel [(const_int 0)]))
1683 (match_operand:V2DF 1 "nonimmediate_operand" "xm")
1684 (parallel [(const_int 0)]))))]
1686 "ucomisd\t{%1, %0|%0, %1}"
1687 [(set_attr "type" "ssecomi")
1688 (set_attr "mode" "DF")])
1690 (define_expand "vcondv2df"
1691 [(set (match_operand:V2DF 0 "register_operand" "")
1693 (match_operator 3 ""
1694 [(match_operand:V2DF 4 "nonimmediate_operand" "")
1695 (match_operand:V2DF 5 "nonimmediate_operand" "")])
1696 (match_operand:V2DF 1 "general_operand" "")
1697 (match_operand:V2DF 2 "general_operand" "")))]
1700 if (ix86_expand_fp_vcond (operands))
1706 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1708 ;; Parallel double-precision floating point logical operations
1710 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1712 (define_expand "andv2df3"
1713 [(set (match_operand:V2DF 0 "register_operand" "")
1714 (and:V2DF (match_operand:V2DF 1 "nonimmediate_operand" "")
1715 (match_operand:V2DF 2 "nonimmediate_operand" "")))]
1717 "ix86_fixup_binary_operands_no_copy (AND, V2DFmode, operands);")
1719 (define_insn "*andv2df3"
1720 [(set (match_operand:V2DF 0 "register_operand" "=x")
1721 (and:V2DF (match_operand:V2DF 1 "nonimmediate_operand" "%0")
1722 (match_operand:V2DF 2 "nonimmediate_operand" "xm")))]
1723 "TARGET_SSE2 && ix86_binary_operator_ok (AND, V2DFmode, operands)"
1724 "andpd\t{%2, %0|%0, %2}"
1725 [(set_attr "type" "sselog")
1726 (set_attr "mode" "V2DF")])
1728 (define_insn "sse2_nandv2df3"
1729 [(set (match_operand:V2DF 0 "register_operand" "=x")
1730 (and:V2DF (not:V2DF (match_operand:V2DF 1 "register_operand" "0"))
1731 (match_operand:V2DF 2 "nonimmediate_operand" "xm")))]
1733 "andnpd\t{%2, %0|%0, %2}"
1734 [(set_attr "type" "sselog")
1735 (set_attr "mode" "V2DF")])
1737 (define_expand "iorv2df3"
1738 [(set (match_operand:V2DF 0 "register_operand" "")
1739 (ior:V2DF (match_operand:V2DF 1 "nonimmediate_operand" "")
1740 (match_operand:V2DF 2 "nonimmediate_operand" "")))]
1742 "ix86_fixup_binary_operands_no_copy (IOR, V2DFmode, operands);")
1744 (define_insn "*iorv2df3"
1745 [(set (match_operand:V2DF 0 "register_operand" "=x")
1746 (ior:V2DF (match_operand:V2DF 1 "nonimmediate_operand" "%0")
1747 (match_operand:V2DF 2 "nonimmediate_operand" "xm")))]
1748 "TARGET_SSE2 && ix86_binary_operator_ok (IOR, V2DFmode, operands)"
1749 "orpd\t{%2, %0|%0, %2}"
1750 [(set_attr "type" "sselog")
1751 (set_attr "mode" "V2DF")])
1753 (define_expand "xorv2df3"
1754 [(set (match_operand:V2DF 0 "register_operand" "")
1755 (xor:V2DF (match_operand:V2DF 1 "nonimmediate_operand" "")
1756 (match_operand:V2DF 2 "nonimmediate_operand" "")))]
1758 "ix86_fixup_binary_operands_no_copy (XOR, V2DFmode, operands);")
1760 (define_insn "*xorv2df3"
1761 [(set (match_operand:V2DF 0 "register_operand" "=x")
1762 (xor:V2DF (match_operand:V2DF 1 "nonimmediate_operand" "%0")
1763 (match_operand:V2DF 2 "nonimmediate_operand" "xm")))]
1764 "TARGET_SSE2 && ix86_binary_operator_ok (XOR, V2DFmode, operands)"
1765 "xorpd\t{%2, %0|%0, %2}"
1766 [(set_attr "type" "sselog")
1767 (set_attr "mode" "V2DF")])
1769 ;; Also define scalar versions. These are used for abs, neg, and
1770 ;; conditional move. Using subregs into vector modes causes register
1771 ;; allocation lossage. These patterns do not allow memory operands
1772 ;; because the native instructions read the full 128-bits.
1774 (define_insn "*anddf3"
1775 [(set (match_operand:DF 0 "register_operand" "=x")
1776 (and:DF (match_operand:DF 1 "register_operand" "0")
1777 (match_operand:DF 2 "register_operand" "x")))]
1779 "andpd\t{%2, %0|%0, %2}"
1780 [(set_attr "type" "sselog")
1781 (set_attr "mode" "V2DF")])
1783 (define_insn "*nanddf3"
1784 [(set (match_operand:DF 0 "register_operand" "=x")
1785 (and:DF (not:DF (match_operand:DF 1 "register_operand" "0"))
1786 (match_operand:DF 2 "register_operand" "x")))]
1788 "andnpd\t{%2, %0|%0, %2}"
1789 [(set_attr "type" "sselog")
1790 (set_attr "mode" "V2DF")])
1792 (define_insn "*iordf3"
1793 [(set (match_operand:DF 0 "register_operand" "=x")
1794 (ior:DF (match_operand:DF 1 "register_operand" "0")
1795 (match_operand:DF 2 "register_operand" "x")))]
1797 "orpd\t{%2, %0|%0, %2}"
1798 [(set_attr "type" "sselog")
1799 (set_attr "mode" "V2DF")])
1801 (define_insn "*xordf3"
1802 [(set (match_operand:DF 0 "register_operand" "=x")
1803 (xor:DF (match_operand:DF 1 "register_operand" "0")
1804 (match_operand:DF 2 "register_operand" "x")))]
1806 "xorpd\t{%2, %0|%0, %2}"
1807 [(set_attr "type" "sselog")
1808 (set_attr "mode" "V2DF")])
1810 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1812 ;; Parallel double-precision floating point conversion operations
1814 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1816 (define_insn "sse2_cvtpi2pd"
1817 [(set (match_operand:V2DF 0 "register_operand" "=x,x")
1818 (float:V2DF (match_operand:V2SI 1 "nonimmediate_operand" "y,m")))]
1820 "cvtpi2pd\t{%1, %0|%0, %1}"
1821 [(set_attr "type" "ssecvt")
1822 (set_attr "unit" "mmx,*")
1823 (set_attr "mode" "V2DF")])
1825 (define_insn "sse2_cvtpd2pi"
1826 [(set (match_operand:V2SI 0 "register_operand" "=y")
1827 (unspec:V2SI [(match_operand:V2DF 1 "nonimmediate_operand" "xm")]
1828 UNSPEC_FIX_NOTRUNC))]
1830 "cvtpd2pi\t{%1, %0|%0, %1}"
1831 [(set_attr "type" "ssecvt")
1832 (set_attr "unit" "mmx")
1833 (set_attr "mode" "DI")])
1835 (define_insn "sse2_cvttpd2pi"
1836 [(set (match_operand:V2SI 0 "register_operand" "=y")
1837 (fix:V2SI (match_operand:V2DF 1 "nonimmediate_operand" "xm")))]
1839 "cvttpd2pi\t{%1, %0|%0, %1}"
1840 [(set_attr "type" "ssecvt")
1841 (set_attr "unit" "mmx")
1842 (set_attr "mode" "TI")])
1844 (define_insn "sse2_cvtsi2sd"
1845 [(set (match_operand:V2DF 0 "register_operand" "=x,x")
1848 (float:DF (match_operand:SI 2 "nonimmediate_operand" "r,m")))
1849 (match_operand:V2DF 1 "register_operand" "0,0")
1852 "cvtsi2sd\t{%2, %0|%0, %2}"
1853 [(set_attr "type" "sseicvt")
1854 (set_attr "mode" "DF")
1855 (set_attr "athlon_decode" "double,direct")])
1857 (define_insn "sse2_cvtsi2sdq"
1858 [(set (match_operand:V2DF 0 "register_operand" "=x,x")
1861 (float:DF (match_operand:DI 2 "nonimmediate_operand" "r,m")))
1862 (match_operand:V2DF 1 "register_operand" "0,0")
1864 "TARGET_SSE2 && TARGET_64BIT"
1865 "cvtsi2sdq\t{%2, %0|%0, %2}"
1866 [(set_attr "type" "sseicvt")
1867 (set_attr "mode" "DF")
1868 (set_attr "athlon_decode" "double,direct")])
1870 (define_insn "sse2_cvtsd2si"
1871 [(set (match_operand:SI 0 "register_operand" "=r,r")
1874 (match_operand:V2DF 1 "nonimmediate_operand" "x,m")
1875 (parallel [(const_int 0)]))]
1876 UNSPEC_FIX_NOTRUNC))]
1878 "cvtsd2si\t{%1, %0|%0, %1}"
1879 [(set_attr "type" "sseicvt")
1880 (set_attr "athlon_decode" "double,vector")
1881 (set_attr "mode" "SI")])
1883 (define_insn "sse2_cvtsd2siq"
1884 [(set (match_operand:DI 0 "register_operand" "=r,r")
1887 (match_operand:V2DF 1 "nonimmediate_operand" "x,m")
1888 (parallel [(const_int 0)]))]
1889 UNSPEC_FIX_NOTRUNC))]
1890 "TARGET_SSE2 && TARGET_64BIT"
1891 "cvtsd2siq\t{%1, %0|%0, %1}"
1892 [(set_attr "type" "sseicvt")
1893 (set_attr "athlon_decode" "double,vector")
1894 (set_attr "mode" "DI")])
1896 (define_insn "sse2_cvttsd2si"
1897 [(set (match_operand:SI 0 "register_operand" "=r,r")
1900 (match_operand:V2DF 1 "nonimmediate_operand" "x,m")
1901 (parallel [(const_int 0)]))))]
1903 "cvttsd2si\t{%1, %0|%0, %1}"
1904 [(set_attr "type" "sseicvt")
1905 (set_attr "mode" "SI")
1906 (set_attr "athlon_decode" "double,vector")])
1908 (define_insn "sse2_cvttsd2siq"
1909 [(set (match_operand:DI 0 "register_operand" "=r,r")
1912 (match_operand:V2DF 1 "nonimmediate_operand" "x,m")
1913 (parallel [(const_int 0)]))))]
1914 "TARGET_SSE2 && TARGET_64BIT"
1915 "cvttsd2siq\t{%1, %0|%0, %1}"
1916 [(set_attr "type" "sseicvt")
1917 (set_attr "mode" "DI")
1918 (set_attr "athlon_decode" "double,vector")])
1920 (define_insn "sse2_cvtdq2pd"
1921 [(set (match_operand:V2DF 0 "register_operand" "=x")
1924 (match_operand:V4SI 1 "nonimmediate_operand" "xm")
1925 (parallel [(const_int 0) (const_int 1)]))))]
1927 "cvtdq2pd\t{%1, %0|%0, %1}"
1928 [(set_attr "type" "ssecvt")
1929 (set_attr "mode" "V2DF")])
1931 (define_expand "sse2_cvtpd2dq"
1932 [(set (match_operand:V4SI 0 "register_operand" "")
1934 (unspec:V2SI [(match_operand:V2DF 1 "nonimmediate_operand" "")]
1938 "operands[2] = CONST0_RTX (V2SImode);")
1940 (define_insn "*sse2_cvtpd2dq"
1941 [(set (match_operand:V4SI 0 "register_operand" "=x")
1943 (unspec:V2SI [(match_operand:V2DF 1 "nonimmediate_operand" "xm")]
1945 (match_operand:V2SI 2 "const0_operand" "")))]
1947 "cvtpd2dq\t{%1, %0|%0, %1}"
1948 [(set_attr "type" "ssecvt")
1949 (set_attr "mode" "TI")])
1951 (define_expand "sse2_cvttpd2dq"
1952 [(set (match_operand:V4SI 0 "register_operand" "")
1954 (fix:V2SI (match_operand:V2DF 1 "nonimmediate_operand" ""))
1957 "operands[2] = CONST0_RTX (V2SImode);")
1959 (define_insn "*sse2_cvttpd2dq"
1960 [(set (match_operand:V4SI 0 "register_operand" "=x")
1962 (fix:V2SI (match_operand:V2DF 1 "nonimmediate_operand" "xm"))
1963 (match_operand:V2SI 2 "const0_operand" "")))]
1965 "cvttpd2dq\t{%1, %0|%0, %1}"
1966 [(set_attr "type" "ssecvt")
1967 (set_attr "mode" "TI")])
1969 (define_insn "sse2_cvtsd2ss"
1970 [(set (match_operand:V4SF 0 "register_operand" "=x,x")
1973 (float_truncate:V2SF
1974 (match_operand:V2DF 2 "nonimmediate_operand" "x,m")))
1975 (match_operand:V4SF 1 "register_operand" "0,0")
1978 "cvtsd2ss\t{%2, %0|%0, %2}"
1979 [(set_attr "type" "ssecvt")
1980 (set_attr "athlon_decode" "vector,double")
1981 (set_attr "mode" "SF")])
1983 (define_insn "sse2_cvtss2sd"
1984 [(set (match_operand:V2DF 0 "register_operand" "=x")
1988 (match_operand:V4SF 2 "nonimmediate_operand" "xm")
1989 (parallel [(const_int 0) (const_int 1)])))
1990 (match_operand:V2DF 1 "register_operand" "0")
1993 "cvtss2sd\t{%2, %0|%0, %2}"
1994 [(set_attr "type" "ssecvt")
1995 (set_attr "mode" "DF")])
1997 (define_expand "sse2_cvtpd2ps"
1998 [(set (match_operand:V4SF 0 "register_operand" "")
2000 (float_truncate:V2SF
2001 (match_operand:V2DF 1 "nonimmediate_operand" "xm"))
2004 "operands[2] = CONST0_RTX (V2SFmode);")
2006 (define_insn "*sse2_cvtpd2ps"
2007 [(set (match_operand:V4SF 0 "register_operand" "=x")
2009 (float_truncate:V2SF
2010 (match_operand:V2DF 1 "nonimmediate_operand" "xm"))
2011 (match_operand:V2SF 2 "const0_operand" "")))]
2013 "cvtpd2ps\t{%1, %0|%0, %1}"
2014 [(set_attr "type" "ssecvt")
2015 (set_attr "mode" "V4SF")])
2017 (define_insn "sse2_cvtps2pd"
2018 [(set (match_operand:V2DF 0 "register_operand" "=x")
2021 (match_operand:V4SF 1 "nonimmediate_operand" "xm")
2022 (parallel [(const_int 0) (const_int 1)]))))]
2024 "cvtps2pd\t{%1, %0|%0, %1}"
2025 [(set_attr "type" "ssecvt")
2026 (set_attr "mode" "V2DF")])
2028 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
2030 ;; Parallel double-precision floating point element swizzling
2032 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
2034 (define_insn "sse2_unpckhpd"
2035 [(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,x,m")
2038 (match_operand:V2DF 1 "nonimmediate_operand" " 0,o,x")
2039 (match_operand:V2DF 2 "nonimmediate_operand" " x,0,0"))
2040 (parallel [(const_int 1)
2042 "TARGET_SSE2 && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
2044 unpckhpd\t{%2, %0|%0, %2}
2045 movlpd\t{%H1, %0|%0, %H1}
2046 movhpd\t{%1, %0|%0, %1}"
2047 [(set_attr "type" "sselog,ssemov,ssemov")
2048 (set_attr "mode" "V2DF,V1DF,V1DF")])
2050 (define_insn "*sse3_movddup"
2051 [(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,o")
2054 (match_operand:V2DF 1 "nonimmediate_operand" "xm,x")
2056 (parallel [(const_int 0)
2058 "TARGET_SSE3 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
2060 movddup\t{%1, %0|%0, %1}
2062 [(set_attr "type" "sselog1,ssemov")
2063 (set_attr "mode" "V2DF")])
2066 [(set (match_operand:V2DF 0 "memory_operand" "")
2069 (match_operand:V2DF 1 "register_operand" "")
2071 (parallel [(const_int 0)
2073 "TARGET_SSE3 && reload_completed"
2076 rtx low = gen_rtx_REG (DFmode, REGNO (operands[1]));
2077 emit_move_insn (adjust_address (operands[0], DFmode, 0), low);
2078 emit_move_insn (adjust_address (operands[0], DFmode, 8), low);
2082 (define_insn "sse2_unpcklpd"
2083 [(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,x,o")
2086 (match_operand:V2DF 1 "nonimmediate_operand" " 0,0,0")
2087 (match_operand:V2DF 2 "nonimmediate_operand" " x,m,x"))
2088 (parallel [(const_int 0)
2090 "TARGET_SSE2 && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
2092 unpcklpd\t{%2, %0|%0, %2}
2093 movhpd\t{%2, %0|%0, %2}
2094 movlpd\t{%2, %H0|%H0, %2}"
2095 [(set_attr "type" "sselog,ssemov,ssemov")
2096 (set_attr "mode" "V2DF,V1DF,V1DF")])
2098 (define_expand "sse2_shufpd"
2099 [(match_operand:V2DF 0 "register_operand" "")
2100 (match_operand:V2DF 1 "register_operand" "")
2101 (match_operand:V2DF 2 "nonimmediate_operand" "")
2102 (match_operand:SI 3 "const_int_operand" "")]
2105 int mask = INTVAL (operands[3]);
2106 emit_insn (gen_sse2_shufpd_1 (operands[0], operands[1], operands[2],
2108 GEN_INT (mask & 2 ? 3 : 2)));
2112 (define_insn "sse2_shufpd_1"
2113 [(set (match_operand:V2DF 0 "register_operand" "=x")
2116 (match_operand:V2DF 1 "register_operand" "0")
2117 (match_operand:V2DF 2 "nonimmediate_operand" "xm"))
2118 (parallel [(match_operand 3 "const_0_to_1_operand" "")
2119 (match_operand 4 "const_2_to_3_operand" "")])))]
2123 mask = INTVAL (operands[3]);
2124 mask |= (INTVAL (operands[4]) - 2) << 1;
2125 operands[3] = GEN_INT (mask);
2127 return "shufpd\t{%3, %2, %0|%0, %2, %3}";
2129 [(set_attr "type" "sselog")
2130 (set_attr "mode" "V2DF")])
2132 (define_insn "sse2_storehpd"
2133 [(set (match_operand:DF 0 "nonimmediate_operand" "=m,x,x*fr")
2135 (match_operand:V2DF 1 "nonimmediate_operand" " x,0,o")
2136 (parallel [(const_int 1)])))]
2137 "TARGET_SSE2 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
2139 movhpd\t{%1, %0|%0, %1}
2142 [(set_attr "type" "ssemov,sselog1,ssemov")
2143 (set_attr "mode" "V1DF,V2DF,DF")])
2146 [(set (match_operand:DF 0 "register_operand" "")
2148 (match_operand:V2DF 1 "memory_operand" "")
2149 (parallel [(const_int 1)])))]
2150 "TARGET_SSE2 && reload_completed"
2151 [(set (match_dup 0) (match_dup 1))]
2153 operands[1] = adjust_address (operands[1], DFmode, 8);
2156 (define_insn "sse2_storelpd"
2157 [(set (match_operand:DF 0 "nonimmediate_operand" "=m,x,x*fr")
2159 (match_operand:V2DF 1 "nonimmediate_operand" " x,x,m")
2160 (parallel [(const_int 0)])))]
2161 "TARGET_SSE2 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
2163 movlpd\t{%1, %0|%0, %1}
2166 [(set_attr "type" "ssemov")
2167 (set_attr "mode" "V1DF,DF,DF")])
2170 [(set (match_operand:DF 0 "register_operand" "")
2172 (match_operand:V2DF 1 "nonimmediate_operand" "")
2173 (parallel [(const_int 0)])))]
2174 "TARGET_SSE2 && reload_completed"
2177 rtx op1 = operands[1];
2179 op1 = gen_rtx_REG (DFmode, REGNO (op1));
2181 op1 = gen_lowpart (DFmode, op1);
2182 emit_move_insn (operands[0], op1);
2186 (define_insn "sse2_loadhpd"
2187 [(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,x,x,o")
2190 (match_operand:V2DF 1 "nonimmediate_operand" " 0,0,x,0")
2191 (parallel [(const_int 0)]))
2192 (match_operand:DF 2 "nonimmediate_operand" " m,x,0,x*fr")))]
2193 "TARGET_SSE2 && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
2195 movhpd\t{%2, %0|%0, %2}
2196 unpcklpd\t{%2, %0|%0, %2}
2197 shufpd\t{$1, %1, %0|%0, %1, 1}
2199 [(set_attr "type" "ssemov,sselog,sselog,other")
2200 (set_attr "mode" "V1DF,V2DF,V2DF,DF")])
2203 [(set (match_operand:V2DF 0 "memory_operand" "")
2205 (vec_select:DF (match_dup 0) (parallel [(const_int 0)]))
2206 (match_operand:DF 1 "register_operand" "")))]
2207 "TARGET_SSE2 && reload_completed"
2208 [(set (match_dup 0) (match_dup 1))]
2210 operands[0] = adjust_address (operands[0], DFmode, 8);
2213 (define_insn "sse2_loadlpd"
2214 [(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,x,x,x,x,m")
2216 (match_operand:DF 2 "nonimmediate_operand" " m,m,x,0,0,x*fr")
2218 (match_operand:V2DF 1 "vector_move_operand" " C,0,0,x,o,0")
2219 (parallel [(const_int 1)]))))]
2220 "TARGET_SSE2 && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
2222 movsd\t{%2, %0|%0, %2}
2223 movlpd\t{%2, %0|%0, %2}
2224 movsd\t{%2, %0|%0, %2}
2225 shufpd\t{$2, %2, %0|%0, %2, 2}
2226 movhpd\t{%H1, %0|%0, %H1}
2228 [(set_attr "type" "ssemov,ssemov,ssemov,sselog,ssemov,other")
2229 (set_attr "mode" "DF,V1DF,V1DF,V2DF,V1DF,DF")])
2232 [(set (match_operand:V2DF 0 "memory_operand" "")
2234 (match_operand:DF 1 "register_operand" "")
2235 (vec_select:DF (match_dup 0) (parallel [(const_int 1)]))))]
2236 "TARGET_SSE2 && reload_completed"
2237 [(set (match_dup 0) (match_dup 1))]
2239 operands[0] = adjust_address (operands[0], DFmode, 8);
2242 ;; Not sure these two are ever used, but it doesn't hurt to have
2244 (define_insn "*vec_extractv2df_1_sse"
2245 [(set (match_operand:DF 0 "nonimmediate_operand" "=m,x,x")
2247 (match_operand:V2DF 1 "nonimmediate_operand" "x,x,o")
2248 (parallel [(const_int 1)])))]
2249 "!TARGET_SSE2 && TARGET_SSE
2250 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
2252 movhps\t{%1, %0|%0, %1}
2253 movhlps\t{%1, %0|%0, %1}
2254 movlps\t{%H1, %0|%0, %H1}"
2255 [(set_attr "type" "ssemov")
2256 (set_attr "mode" "V2SF,V4SF,V2SF")])
2258 (define_insn "*vec_extractv2df_0_sse"
2259 [(set (match_operand:DF 0 "nonimmediate_operand" "=m,x,x")
2261 (match_operand:V2DF 1 "nonimmediate_operand" "x,x,m")
2262 (parallel [(const_int 0)])))]
2263 "!TARGET_SSE2 && TARGET_SSE
2264 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
2266 movlps\t{%1, %0|%0, %1}
2267 movaps\t{%1, %0|%0, %1}
2268 movlps\t{%1, %0|%0, %1}"
2269 [(set_attr "type" "ssemov")
2270 (set_attr "mode" "V2SF,V4SF,V2SF")])
2272 (define_insn "sse2_movsd"
2273 [(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,x,m,x,x,o")
2275 (match_operand:V2DF 2 "nonimmediate_operand" " x,m,x,0,0,0")
2276 (match_operand:V2DF 1 "nonimmediate_operand" " 0,0,0,x,o,x")
2280 movsd\t{%2, %0|%0, %2}
2281 movlpd\t{%2, %0|%0, %2}
2282 movlpd\t{%2, %0|%0, %2}
2283 shufpd\t{$2, %2, %0|%0, %2, 2}
2284 movhps\t{%H1, %0|%0, %H1}
2285 movhps\t{%1, %H0|%H0, %1}"
2286 [(set_attr "type" "ssemov,ssemov,ssemov,sselog,ssemov,ssemov")
2287 (set_attr "mode" "DF,V1DF,V1DF,V2DF,V1DF,V1DF")])
2289 (define_insn "*vec_dupv2df_sse3"
2290 [(set (match_operand:V2DF 0 "register_operand" "=x")
2292 (match_operand:DF 1 "nonimmediate_operand" "xm")))]
2294 "movddup\t{%1, %0|%0, %1}"
2295 [(set_attr "type" "sselog1")
2296 (set_attr "mode" "DF")])
2298 (define_insn "*vec_dupv2df"
2299 [(set (match_operand:V2DF 0 "register_operand" "=x")
2301 (match_operand:DF 1 "register_operand" "0")))]
2304 [(set_attr "type" "sselog1")
2305 (set_attr "mode" "V4SF")])
2307 (define_insn "*vec_concatv2df_sse3"
2308 [(set (match_operand:V2DF 0 "register_operand" "=x")
2310 (match_operand:DF 1 "nonimmediate_operand" "xm")
2313 "movddup\t{%1, %0|%0, %1}"
2314 [(set_attr "type" "sselog1")
2315 (set_attr "mode" "DF")])
2317 (define_insn "*vec_concatv2df"
2318 [(set (match_operand:V2DF 0 "register_operand" "=Y,Y,Y,x,x")
2320 (match_operand:DF 1 "nonimmediate_operand" " 0,0,m,0,0")
2321 (match_operand:DF 2 "vector_move_operand" " Y,m,C,x,m")))]
2324 unpcklpd\t{%2, %0|%0, %2}
2325 movhpd\t{%2, %0|%0, %2}
2326 movsd\t{%1, %0|%0, %1}
2327 movlhps\t{%2, %0|%0, %2}
2328 movhps\t{%2, %0|%0, %2}"
2329 [(set_attr "type" "sselog,ssemov,ssemov,ssemov,ssemov")
2330 (set_attr "mode" "V2DF,V1DF,DF,V4SF,V2SF")])
2332 (define_expand "vec_setv2df"
2333 [(match_operand:V2DF 0 "register_operand" "")
2334 (match_operand:DF 1 "register_operand" "")
2335 (match_operand 2 "const_int_operand" "")]
2338 ix86_expand_vector_set (false, operands[0], operands[1],
2339 INTVAL (operands[2]));
2343 (define_expand "vec_extractv2df"
2344 [(match_operand:DF 0 "register_operand" "")
2345 (match_operand:V2DF 1 "register_operand" "")
2346 (match_operand 2 "const_int_operand" "")]
2349 ix86_expand_vector_extract (false, operands[0], operands[1],
2350 INTVAL (operands[2]));
2354 (define_expand "vec_initv2df"
2355 [(match_operand:V2DF 0 "register_operand" "")
2356 (match_operand 1 "" "")]
2359 ix86_expand_vector_init (false, operands[0], operands[1]);
2363 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
2365 ;; Parallel integral arithmetic
2367 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
2369 (define_expand "neg<mode>2"
2370 [(set (match_operand:SSEMODEI 0 "register_operand" "")
2373 (match_operand:SSEMODEI 1 "nonimmediate_operand" "")))]
2375 "operands[2] = force_reg (<MODE>mode, CONST0_RTX (<MODE>mode));")
2377 (define_expand "add<mode>3"
2378 [(set (match_operand:SSEMODEI 0 "register_operand" "")
2379 (plus:SSEMODEI (match_operand:SSEMODEI 1 "nonimmediate_operand" "")
2380 (match_operand:SSEMODEI 2 "nonimmediate_operand" "")))]
2382 "ix86_fixup_binary_operands_no_copy (PLUS, <MODE>mode, operands);")
2384 (define_insn "*add<mode>3"
2385 [(set (match_operand:SSEMODEI 0 "register_operand" "=x")
2387 (match_operand:SSEMODEI 1 "nonimmediate_operand" "%0")
2388 (match_operand:SSEMODEI 2 "nonimmediate_operand" "xm")))]
2389 "TARGET_SSE2 && ix86_binary_operator_ok (PLUS, <MODE>mode, operands)"
2390 "padd<ssevecsize>\t{%2, %0|%0, %2}"
2391 [(set_attr "type" "sseiadd")
2392 (set_attr "mode" "TI")])
2394 (define_insn "sse2_ssadd<mode>3"
2395 [(set (match_operand:SSEMODE12 0 "register_operand" "=x")
2397 (match_operand:SSEMODE12 1 "nonimmediate_operand" "%0")
2398 (match_operand:SSEMODE12 2 "nonimmediate_operand" "xm")))]
2399 "TARGET_SSE2 && ix86_binary_operator_ok (SS_PLUS, <MODE>mode, operands)"
2400 "padds<ssevecsize>\t{%2, %0|%0, %2}"
2401 [(set_attr "type" "sseiadd")
2402 (set_attr "mode" "TI")])
2404 (define_insn "sse2_usadd<mode>3"
2405 [(set (match_operand:SSEMODE12 0 "register_operand" "=x")
2407 (match_operand:SSEMODE12 1 "nonimmediate_operand" "%0")
2408 (match_operand:SSEMODE12 2 "nonimmediate_operand" "xm")))]
2409 "TARGET_SSE2 && ix86_binary_operator_ok (US_PLUS, <MODE>mode, operands)"
2410 "paddus<ssevecsize>\t{%2, %0|%0, %2}"
2411 [(set_attr "type" "sseiadd")
2412 (set_attr "mode" "TI")])
2414 (define_expand "sub<mode>3"
2415 [(set (match_operand:SSEMODEI 0 "register_operand" "")
2416 (minus:SSEMODEI (match_operand:SSEMODEI 1 "register_operand" "")
2417 (match_operand:SSEMODEI 2 "nonimmediate_operand" "")))]
2419 "ix86_fixup_binary_operands_no_copy (MINUS, <MODE>mode, operands);")
2421 (define_insn "*sub<mode>3"
2422 [(set (match_operand:SSEMODEI 0 "register_operand" "=x")
2424 (match_operand:SSEMODEI 1 "register_operand" "0")
2425 (match_operand:SSEMODEI 2 "nonimmediate_operand" "xm")))]
2427 "psub<ssevecsize>\t{%2, %0|%0, %2}"
2428 [(set_attr "type" "sseiadd")
2429 (set_attr "mode" "TI")])
2431 (define_insn "sse2_sssub<mode>3"
2432 [(set (match_operand:SSEMODE12 0 "register_operand" "=x")
2434 (match_operand:SSEMODE12 1 "register_operand" "0")
2435 (match_operand:SSEMODE12 2 "nonimmediate_operand" "xm")))]
2437 "psubs<ssevecsize>\t{%2, %0|%0, %2}"
2438 [(set_attr "type" "sseiadd")
2439 (set_attr "mode" "TI")])
2441 (define_insn "sse2_ussub<mode>3"
2442 [(set (match_operand:SSEMODE12 0 "register_operand" "=x")
2444 (match_operand:SSEMODE12 1 "register_operand" "0")
2445 (match_operand:SSEMODE12 2 "nonimmediate_operand" "xm")))]
2447 "psubus<ssevecsize>\t{%2, %0|%0, %2}"
2448 [(set_attr "type" "sseiadd")
2449 (set_attr "mode" "TI")])
2451 (define_expand "mulv16qi3"
2452 [(set (match_operand:V16QI 0 "register_operand" "")
2453 (mult:V16QI (match_operand:V16QI 1 "register_operand" "")
2454 (match_operand:V16QI 2 "register_operand" "")))]
2460 for (i = 0; i < 12; ++i)
2461 t[i] = gen_reg_rtx (V16QImode);
2463 /* Unpack data such that we've got a source byte in each low byte of
2464 each word. We don't care what goes into the high byte of each word.
2465 Rather than trying to get zero in there, most convenient is to let
2466 it be a copy of the low byte. */
2467 emit_insn (gen_sse2_punpckhbw (t[0], operands[1], operands[1]));
2468 emit_insn (gen_sse2_punpckhbw (t[1], operands[2], operands[2]));
2469 emit_insn (gen_sse2_punpcklbw (t[2], operands[1], operands[1]));
2470 emit_insn (gen_sse2_punpcklbw (t[3], operands[2], operands[2]));
2472 /* Multiply words. The end-of-line annotations here give a picture of what
2473 the output of that instruction looks like. Dot means don't care; the
2474 letters are the bytes of the result with A being the most significant. */
2475 emit_insn (gen_mulv8hi3 (gen_lowpart (V8HImode, t[4]), /* .A.B.C.D.E.F.G.H */
2476 gen_lowpart (V8HImode, t[0]),
2477 gen_lowpart (V8HImode, t[1])));
2478 emit_insn (gen_mulv8hi3 (gen_lowpart (V8HImode, t[5]), /* .I.J.K.L.M.N.O.P */
2479 gen_lowpart (V8HImode, t[2]),
2480 gen_lowpart (V8HImode, t[3])));
2482 /* Extract the relevant bytes and merge them back together. */
2483 emit_insn (gen_sse2_punpckhbw (t[6], t[5], t[4])); /* ..AI..BJ..CK..DL */
2484 emit_insn (gen_sse2_punpcklbw (t[7], t[5], t[4])); /* ..EM..FN..GO..HP */
2485 emit_insn (gen_sse2_punpckhbw (t[8], t[7], t[6])); /* ....AEIM....BFJN */
2486 emit_insn (gen_sse2_punpcklbw (t[9], t[7], t[6])); /* ....CGKO....DHLP */
2487 emit_insn (gen_sse2_punpckhbw (t[10], t[9], t[8])); /* ........ACEGIKMO */
2488 emit_insn (gen_sse2_punpcklbw (t[11], t[9], t[8])); /* ........BDFHJLNP */
2491 emit_insn (gen_sse2_punpcklbw (op0, t[11], t[10])); /* ABCDEFGHIJKLMNOP */
2495 (define_expand "mulv8hi3"
2496 [(set (match_operand:V8HI 0 "register_operand" "")
2497 (mult:V8HI (match_operand:V8HI 1 "nonimmediate_operand" "")
2498 (match_operand:V8HI 2 "nonimmediate_operand" "")))]
2500 "ix86_fixup_binary_operands_no_copy (MULT, V8HImode, operands);")
2502 (define_insn "*mulv8hi3"
2503 [(set (match_operand:V8HI 0 "register_operand" "=x")
2504 (mult:V8HI (match_operand:V8HI 1 "nonimmediate_operand" "%0")
2505 (match_operand:V8HI 2 "nonimmediate_operand" "xm")))]
2506 "TARGET_SSE2 && ix86_binary_operator_ok (MULT, V8HImode, operands)"
2507 "pmullw\t{%2, %0|%0, %2}"
2508 [(set_attr "type" "sseimul")
2509 (set_attr "mode" "TI")])
2511 (define_insn "sse2_smulv8hi3_highpart"
2512 [(set (match_operand:V8HI 0 "register_operand" "=x")
2517 (match_operand:V8HI 1 "nonimmediate_operand" "%0"))
2519 (match_operand:V8HI 2 "nonimmediate_operand" "xm")))
2521 "TARGET_SSE2 && ix86_binary_operator_ok (MULT, V8HImode, operands)"
2522 "pmulhw\t{%2, %0|%0, %2}"
2523 [(set_attr "type" "sseimul")
2524 (set_attr "mode" "TI")])
2526 (define_insn "sse2_umulv8hi3_highpart"
2527 [(set (match_operand:V8HI 0 "register_operand" "=x")
2532 (match_operand:V8HI 1 "nonimmediate_operand" "%0"))
2534 (match_operand:V8HI 2 "nonimmediate_operand" "xm")))
2536 "TARGET_SSE2 && ix86_binary_operator_ok (MULT, V8HImode, operands)"
2537 "pmulhuw\t{%2, %0|%0, %2}"
2538 [(set_attr "type" "sseimul")
2539 (set_attr "mode" "TI")])
2541 (define_insn "sse2_umulv2siv2di3"
2542 [(set (match_operand:V2DI 0 "register_operand" "=x")
2546 (match_operand:V4SI 1 "nonimmediate_operand" "%0")
2547 (parallel [(const_int 0) (const_int 2)])))
2550 (match_operand:V4SI 2 "nonimmediate_operand" "xm")
2551 (parallel [(const_int 0) (const_int 2)])))))]
2552 "TARGET_SSE2 && ix86_binary_operator_ok (MULT, V8HImode, operands)"
2553 "pmuludq\t{%2, %0|%0, %2}"
2554 [(set_attr "type" "sseimul")
2555 (set_attr "mode" "TI")])
2557 (define_insn "sse2_pmaddwd"
2558 [(set (match_operand:V4SI 0 "register_operand" "=x")
2563 (match_operand:V8HI 1 "nonimmediate_operand" "%0")
2564 (parallel [(const_int 0)
2570 (match_operand:V8HI 2 "nonimmediate_operand" "xm")
2571 (parallel [(const_int 0)
2577 (vec_select:V4HI (match_dup 1)
2578 (parallel [(const_int 1)
2583 (vec_select:V4HI (match_dup 2)
2584 (parallel [(const_int 1)
2587 (const_int 7)]))))))]
2589 "pmaddwd\t{%2, %0|%0, %2}"
2590 [(set_attr "type" "sseiadd")
2591 (set_attr "mode" "TI")])
2593 (define_expand "mulv4si3"
2594 [(set (match_operand:V4SI 0 "register_operand" "")
2595 (mult:V4SI (match_operand:V4SI 1 "register_operand" "")
2596 (match_operand:V4SI 2 "register_operand" "")))]
2599 rtx t1, t2, t3, t4, t5, t6, thirtytwo;
2605 t1 = gen_reg_rtx (V4SImode);
2606 t2 = gen_reg_rtx (V4SImode);
2607 t3 = gen_reg_rtx (V4SImode);
2608 t4 = gen_reg_rtx (V4SImode);
2609 t5 = gen_reg_rtx (V4SImode);
2610 t6 = gen_reg_rtx (V4SImode);
2611 thirtytwo = GEN_INT (32);
2613 /* Multiply elements 2 and 0. */
2614 emit_insn (gen_sse2_umulv2siv2di3 (gen_lowpart (V2DImode, t1), op1, op2));
2616 /* Shift both input vectors down one element, so that elements 3 and 1
2617 are now in the slots for elements 2 and 0. For K8, at least, this is
2618 faster than using a shuffle. */
2619 emit_insn (gen_sse2_lshrti3 (gen_lowpart (TImode, t2),
2620 gen_lowpart (TImode, op1), thirtytwo));
2621 emit_insn (gen_sse2_lshrti3 (gen_lowpart (TImode, t3),
2622 gen_lowpart (TImode, op2), thirtytwo));
2624 /* Multiply elements 3 and 1. */
2625 emit_insn (gen_sse2_umulv2siv2di3 (gen_lowpart (V2DImode, t4), t2, t3));
2627 /* Move the results in element 2 down to element 1; we don't care what
2628 goes in elements 2 and 3. */
2629 emit_insn (gen_sse2_pshufd_1 (t5, t1, const0_rtx, const2_rtx,
2630 const0_rtx, const0_rtx));
2631 emit_insn (gen_sse2_pshufd_1 (t6, t4, const0_rtx, const2_rtx,
2632 const0_rtx, const0_rtx));
2634 /* Merge the parts back together. */
2635 emit_insn (gen_sse2_punpckldq (op0, t5, t6));
2639 (define_expand "mulv2di3"
2640 [(set (match_operand:V2DI 0 "register_operand" "")
2641 (mult:V2DI (match_operand:V2DI 1 "register_operand" "")
2642 (match_operand:V2DI 2 "register_operand" "")))]
2645 rtx t1, t2, t3, t4, t5, t6, thirtytwo;
2651 t1 = gen_reg_rtx (V2DImode);
2652 t2 = gen_reg_rtx (V2DImode);
2653 t3 = gen_reg_rtx (V2DImode);
2654 t4 = gen_reg_rtx (V2DImode);
2655 t5 = gen_reg_rtx (V2DImode);
2656 t6 = gen_reg_rtx (V2DImode);
2657 thirtytwo = GEN_INT (32);
2659 /* Multiply low parts. */
2660 emit_insn (gen_sse2_umulv2siv2di3 (t1, gen_lowpart (V4SImode, op1),
2661 gen_lowpart (V4SImode, op2)));
2663 /* Shift input vectors left 32 bits so we can multiply high parts. */
2664 emit_insn (gen_lshrv2di3 (t2, op1, thirtytwo));
2665 emit_insn (gen_lshrv2di3 (t3, op2, thirtytwo));
2667 /* Multiply high parts by low parts. */
2668 emit_insn (gen_sse2_umulv2siv2di3 (t4, gen_lowpart (V4SImode, op1),
2669 gen_lowpart (V4SImode, t3)));
2670 emit_insn (gen_sse2_umulv2siv2di3 (t5, gen_lowpart (V4SImode, op2),
2671 gen_lowpart (V4SImode, t2)));
2673 /* Shift them back. */
2674 emit_insn (gen_ashlv2di3 (t4, t4, thirtytwo));
2675 emit_insn (gen_ashlv2di3 (t5, t5, thirtytwo));
2677 /* Add the three parts together. */
2678 emit_insn (gen_addv2di3 (t6, t1, t4));
2679 emit_insn (gen_addv2di3 (op0, t6, t5));
2683 (define_expand "sdot_prodv8hi"
2684 [(match_operand:V4SI 0 "register_operand" "")
2685 (match_operand:V8HI 1 "nonimmediate_operand" "")
2686 (match_operand:V8HI 2 "nonimmediate_operand" "")
2687 (match_operand:V4SI 3 "register_operand" "")]
2690 rtx t = gen_reg_rtx (V4SImode);
2691 emit_insn (gen_sse2_pmaddwd (t, operands[1], operands[2]));
2692 emit_insn (gen_addv4si3 (operands[0], operands[3], t));
2696 (define_expand "udot_prodv4si"
2697 [(match_operand:V2DI 0 "register_operand" "")
2698 (match_operand:V4SI 1 "register_operand" "")
2699 (match_operand:V4SI 2 "register_operand" "")
2700 (match_operand:V2DI 3 "register_operand" "")]
2705 t1 = gen_reg_rtx (V2DImode);
2706 emit_insn (gen_sse2_umulv2siv2di3 (t1, operands[1], operands[2]));
2707 emit_insn (gen_addv2di3 (t1, t1, operands[3]));
2709 t2 = gen_reg_rtx (V4SImode);
2710 t3 = gen_reg_rtx (V4SImode);
2711 emit_insn (gen_sse2_lshrti3 (gen_lowpart (TImode, t2),
2712 gen_lowpart (TImode, operands[1]),
2714 emit_insn (gen_sse2_lshrti3 (gen_lowpart (TImode, t3),
2715 gen_lowpart (TImode, operands[2]),
2718 t4 = gen_reg_rtx (V2DImode);
2719 emit_insn (gen_sse2_umulv2siv2di3 (t4, t2, t3));
2721 emit_insn (gen_addv2di3 (operands[0], t1, t4));
2725 (define_insn "ashr<mode>3"
2726 [(set (match_operand:SSEMODE24 0 "register_operand" "=x")
2728 (match_operand:SSEMODE24 1 "register_operand" "0")
2729 (match_operand:TI 2 "nonmemory_operand" "xn")))]
2731 "psra<ssevecsize>\t{%2, %0|%0, %2}"
2732 [(set_attr "type" "sseishft")
2733 (set_attr "mode" "TI")])
2735 (define_insn "lshr<mode>3"
2736 [(set (match_operand:SSEMODE248 0 "register_operand" "=x")
2737 (lshiftrt:SSEMODE248
2738 (match_operand:SSEMODE248 1 "register_operand" "0")
2739 (match_operand:TI 2 "nonmemory_operand" "xn")))]
2741 "psrl<ssevecsize>\t{%2, %0|%0, %2}"
2742 [(set_attr "type" "sseishft")
2743 (set_attr "mode" "TI")])
2745 (define_insn "ashl<mode>3"
2746 [(set (match_operand:SSEMODE248 0 "register_operand" "=x")
2748 (match_operand:SSEMODE248 1 "register_operand" "0")
2749 (match_operand:TI 2 "nonmemory_operand" "xn")))]
2751 "psll<ssevecsize>\t{%2, %0|%0, %2}"
2752 [(set_attr "type" "sseishft")
2753 (set_attr "mode" "TI")])
2755 (define_insn "sse2_ashlti3"
2756 [(set (match_operand:TI 0 "register_operand" "=x")
2757 (ashift:TI (match_operand:TI 1 "register_operand" "0")
2758 (match_operand:SI 2 "const_0_to_255_mul_8_operand" "n")))]
2761 operands[2] = GEN_INT (INTVAL (operands[2]) / 8);
2762 return "pslldq\t{%2, %0|%0, %2}";
2764 [(set_attr "type" "sseishft")
2765 (set_attr "mode" "TI")])
2767 (define_expand "vec_shl_<mode>"
2768 [(set (match_operand:SSEMODEI 0 "register_operand" "")
2769 (ashift:TI (match_operand:SSEMODEI 1 "register_operand" "")
2770 (match_operand:SI 2 "general_operand" "")))]
2773 if (!const_0_to_255_mul_8_operand (operands[2], SImode))
2775 operands[0] = gen_lowpart (TImode, operands[0]);
2776 operands[1] = gen_lowpart (TImode, operands[1]);
2779 (define_insn "sse2_lshrti3"
2780 [(set (match_operand:TI 0 "register_operand" "=x")
2781 (lshiftrt:TI (match_operand:TI 1 "register_operand" "0")
2782 (match_operand:SI 2 "const_0_to_255_mul_8_operand" "n")))]
2785 operands[2] = GEN_INT (INTVAL (operands[2]) / 8);
2786 return "psrldq\t{%2, %0|%0, %2}";
2788 [(set_attr "type" "sseishft")
2789 (set_attr "mode" "TI")])
2791 (define_expand "vec_shr_<mode>"
2792 [(set (match_operand:SSEMODEI 0 "register_operand" "")
2793 (lshiftrt:TI (match_operand:SSEMODEI 1 "register_operand" "")
2794 (match_operand:SI 2 "general_operand" "")))]
2797 if (!const_0_to_255_mul_8_operand (operands[2], SImode))
2799 operands[0] = gen_lowpart (TImode, operands[0]);
2800 operands[1] = gen_lowpart (TImode, operands[1]);
2803 (define_expand "umaxv16qi3"
2804 [(set (match_operand:V16QI 0 "register_operand" "")
2805 (umax:V16QI (match_operand:V16QI 1 "nonimmediate_operand" "")
2806 (match_operand:V16QI 2 "nonimmediate_operand" "")))]
2808 "ix86_fixup_binary_operands_no_copy (UMAX, V16QImode, operands);")
2810 (define_insn "*umaxv16qi3"
2811 [(set (match_operand:V16QI 0 "register_operand" "=x")
2812 (umax:V16QI (match_operand:V16QI 1 "nonimmediate_operand" "%0")
2813 (match_operand:V16QI 2 "nonimmediate_operand" "xm")))]
2814 "TARGET_SSE2 && ix86_binary_operator_ok (UMAX, V16QImode, operands)"
2815 "pmaxub\t{%2, %0|%0, %2}"
2816 [(set_attr "type" "sseiadd")
2817 (set_attr "mode" "TI")])
2819 (define_expand "smaxv8hi3"
2820 [(set (match_operand:V8HI 0 "register_operand" "")
2821 (smax:V8HI (match_operand:V8HI 1 "nonimmediate_operand" "")
2822 (match_operand:V8HI 2 "nonimmediate_operand" "")))]
2824 "ix86_fixup_binary_operands_no_copy (SMAX, V8HImode, operands);")
2826 (define_insn "*smaxv8hi3"
2827 [(set (match_operand:V8HI 0 "register_operand" "=x")
2828 (smax:V8HI (match_operand:V8HI 1 "nonimmediate_operand" "%0")
2829 (match_operand:V8HI 2 "nonimmediate_operand" "xm")))]
2830 "TARGET_SSE2 && ix86_binary_operator_ok (SMAX, V8HImode, operands)"
2831 "pmaxsw\t{%2, %0|%0, %2}"
2832 [(set_attr "type" "sseiadd")
2833 (set_attr "mode" "TI")])
2835 (define_expand "umaxv8hi3"
2836 [(set (match_operand:V8HI 0 "register_operand" "=x")
2837 (us_minus:V8HI (match_operand:V8HI 1 "register_operand" "0")
2838 (match_operand:V8HI 2 "nonimmediate_operand" "xm")))
2840 (plus:V8HI (match_dup 0) (match_dup 2)))]
2843 operands[3] = operands[0];
2844 if (rtx_equal_p (operands[0], operands[2]))
2845 operands[0] = gen_reg_rtx (V8HImode);
2848 (define_expand "smax<mode>3"
2849 [(set (match_operand:SSEMODE14 0 "register_operand" "")
2850 (smax:SSEMODE14 (match_operand:SSEMODE14 1 "register_operand" "")
2851 (match_operand:SSEMODE14 2 "register_operand" "")))]
2857 xops[0] = operands[0];
2858 xops[1] = operands[1];
2859 xops[2] = operands[2];
2860 xops[3] = gen_rtx_GT (VOIDmode, operands[1], operands[2]);
2861 xops[4] = operands[1];
2862 xops[5] = operands[2];
2863 ok = ix86_expand_int_vcond (xops);
2868 (define_expand "umaxv4si3"
2869 [(set (match_operand:V4SI 0 "register_operand" "")
2870 (umax:V4SI (match_operand:V4SI 1 "register_operand" "")
2871 (match_operand:V4SI 2 "register_operand" "")))]
2877 xops[0] = operands[0];
2878 xops[1] = operands[1];
2879 xops[2] = operands[2];
2880 xops[3] = gen_rtx_GTU (VOIDmode, operands[1], operands[2]);
2881 xops[4] = operands[1];
2882 xops[5] = operands[2];
2883 ok = ix86_expand_int_vcond (xops);
2888 (define_expand "uminv16qi3"
2889 [(set (match_operand:V16QI 0 "register_operand" "")
2890 (umin:V16QI (match_operand:V16QI 1 "nonimmediate_operand" "")
2891 (match_operand:V16QI 2 "nonimmediate_operand" "")))]
2893 "ix86_fixup_binary_operands_no_copy (UMIN, V16QImode, operands);")
2895 (define_insn "*uminv16qi3"
2896 [(set (match_operand:V16QI 0 "register_operand" "=x")
2897 (umin:V16QI (match_operand:V16QI 1 "nonimmediate_operand" "%0")
2898 (match_operand:V16QI 2 "nonimmediate_operand" "xm")))]
2899 "TARGET_SSE2 && ix86_binary_operator_ok (UMIN, V16QImode, operands)"
2900 "pminub\t{%2, %0|%0, %2}"
2901 [(set_attr "type" "sseiadd")
2902 (set_attr "mode" "TI")])
2904 (define_expand "sminv8hi3"
2905 [(set (match_operand:V8HI 0 "register_operand" "")
2906 (smin:V8HI (match_operand:V8HI 1 "nonimmediate_operand" "")
2907 (match_operand:V8HI 2 "nonimmediate_operand" "")))]
2909 "ix86_fixup_binary_operands_no_copy (SMIN, V8HImode, operands);")
2911 (define_insn "*sminv8hi3"
2912 [(set (match_operand:V8HI 0 "register_operand" "=x")
2913 (smin:V8HI (match_operand:V8HI 1 "nonimmediate_operand" "%0")
2914 (match_operand:V8HI 2 "nonimmediate_operand" "xm")))]
2915 "TARGET_SSE2 && ix86_binary_operator_ok (SMIN, V8HImode, operands)"
2916 "pminsw\t{%2, %0|%0, %2}"
2917 [(set_attr "type" "sseiadd")
2918 (set_attr "mode" "TI")])
2920 (define_expand "smin<mode>3"
2921 [(set (match_operand:SSEMODE14 0 "register_operand" "")
2922 (smin:SSEMODE14 (match_operand:SSEMODE14 1 "register_operand" "")
2923 (match_operand:SSEMODE14 2 "register_operand" "")))]
2929 xops[0] = operands[0];
2930 xops[1] = operands[2];
2931 xops[2] = operands[1];
2932 xops[3] = gen_rtx_GT (VOIDmode, operands[1], operands[2]);
2933 xops[4] = operands[1];
2934 xops[5] = operands[2];
2935 ok = ix86_expand_int_vcond (xops);
2940 (define_expand "umin<mode>3"
2941 [(set (match_operand:SSEMODE24 0 "register_operand" "")
2942 (umin:SSEMODE24 (match_operand:SSEMODE24 1 "register_operand" "")
2943 (match_operand:SSEMODE24 2 "register_operand" "")))]
2949 xops[0] = operands[0];
2950 xops[1] = operands[2];
2951 xops[2] = operands[1];
2952 xops[3] = gen_rtx_GTU (VOIDmode, operands[1], operands[2]);
2953 xops[4] = operands[1];
2954 xops[5] = operands[2];
2955 ok = ix86_expand_int_vcond (xops);
2960 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
2962 ;; Parallel integral comparisons
2964 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
2966 (define_insn "sse2_eq<mode>3"
2967 [(set (match_operand:SSEMODE124 0 "register_operand" "=x")
2969 (match_operand:SSEMODE124 1 "nonimmediate_operand" "%0")
2970 (match_operand:SSEMODE124 2 "nonimmediate_operand" "xm")))]
2971 "TARGET_SSE2 && ix86_binary_operator_ok (EQ, <MODE>mode, operands)"
2972 "pcmpeq<ssevecsize>\t{%2, %0|%0, %2}"
2973 [(set_attr "type" "ssecmp")
2974 (set_attr "mode" "TI")])
2976 (define_insn "sse2_gt<mode>3"
2977 [(set (match_operand:SSEMODE124 0 "register_operand" "=x")
2979 (match_operand:SSEMODE124 1 "register_operand" "0")
2980 (match_operand:SSEMODE124 2 "nonimmediate_operand" "xm")))]
2982 "pcmpgt<ssevecsize>\t{%2, %0|%0, %2}"
2983 [(set_attr "type" "ssecmp")
2984 (set_attr "mode" "TI")])
2986 (define_expand "vcond<mode>"
2987 [(set (match_operand:SSEMODE124 0 "register_operand" "")
2988 (if_then_else:SSEMODE124
2989 (match_operator 3 ""
2990 [(match_operand:SSEMODE124 4 "nonimmediate_operand" "")
2991 (match_operand:SSEMODE124 5 "nonimmediate_operand" "")])
2992 (match_operand:SSEMODE124 1 "general_operand" "")
2993 (match_operand:SSEMODE124 2 "general_operand" "")))]
2996 if (ix86_expand_int_vcond (operands))
3002 (define_expand "vcondu<mode>"
3003 [(set (match_operand:SSEMODE124 0 "register_operand" "")
3004 (if_then_else:SSEMODE124
3005 (match_operator 3 ""
3006 [(match_operand:SSEMODE124 4 "nonimmediate_operand" "")
3007 (match_operand:SSEMODE124 5 "nonimmediate_operand" "")])
3008 (match_operand:SSEMODE124 1 "general_operand" "")
3009 (match_operand:SSEMODE124 2 "general_operand" "")))]
3012 if (ix86_expand_int_vcond (operands))
3018 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
3020 ;; Parallel integral logical operations
3022 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
3024 (define_expand "one_cmpl<mode>2"
3025 [(set (match_operand:SSEMODEI 0 "register_operand" "")
3026 (xor:SSEMODEI (match_operand:SSEMODEI 1 "nonimmediate_operand" "")
3030 int i, n = GET_MODE_NUNITS (<MODE>mode);
3031 rtvec v = rtvec_alloc (n);
3033 for (i = 0; i < n; ++i)
3034 RTVEC_ELT (v, i) = constm1_rtx;
3036 operands[2] = force_reg (<MODE>mode, gen_rtx_CONST_VECTOR (<MODE>mode, v));
3039 (define_expand "and<mode>3"
3040 [(set (match_operand:SSEMODEI 0 "register_operand" "")
3041 (and:SSEMODEI (match_operand:SSEMODEI 1 "nonimmediate_operand" "")
3042 (match_operand:SSEMODEI 2 "nonimmediate_operand" "")))]
3044 "ix86_fixup_binary_operands_no_copy (AND, <MODE>mode, operands);")
3046 (define_insn "*and<mode>3"
3047 [(set (match_operand:SSEMODEI 0 "register_operand" "=x")
3049 (match_operand:SSEMODEI 1 "nonimmediate_operand" "%0")
3050 (match_operand:SSEMODEI 2 "nonimmediate_operand" "xm")))]
3051 "TARGET_SSE2 && ix86_binary_operator_ok (AND, <MODE>mode, operands)"
3052 "pand\t{%2, %0|%0, %2}"
3053 [(set_attr "type" "sselog")
3054 (set_attr "mode" "TI")])
3056 (define_insn "sse2_nand<mode>3"
3057 [(set (match_operand:SSEMODEI 0 "register_operand" "=x")
3059 (not:SSEMODEI (match_operand:SSEMODEI 1 "register_operand" "0"))
3060 (match_operand:SSEMODEI 2 "nonimmediate_operand" "xm")))]
3062 "pandn\t{%2, %0|%0, %2}"
3063 [(set_attr "type" "sselog")
3064 (set_attr "mode" "TI")])
3066 (define_expand "ior<mode>3"
3067 [(set (match_operand:SSEMODEI 0 "register_operand" "")
3068 (ior:SSEMODEI (match_operand:SSEMODEI 1 "nonimmediate_operand" "")
3069 (match_operand:SSEMODEI 2 "nonimmediate_operand" "")))]
3071 "ix86_fixup_binary_operands_no_copy (IOR, <MODE>mode, operands);")
3073 (define_insn "*ior<mode>3"
3074 [(set (match_operand:SSEMODEI 0 "register_operand" "=x")
3076 (match_operand:SSEMODEI 1 "nonimmediate_operand" "%0")
3077 (match_operand:SSEMODEI 2 "nonimmediate_operand" "xm")))]
3078 "TARGET_SSE2 && ix86_binary_operator_ok (IOR, <MODE>mode, operands)"
3079 "por\t{%2, %0|%0, %2}"
3080 [(set_attr "type" "sselog")
3081 (set_attr "mode" "TI")])
3083 (define_expand "xor<mode>3"
3084 [(set (match_operand:SSEMODEI 0 "register_operand" "")
3085 (xor:SSEMODEI (match_operand:SSEMODEI 1 "nonimmediate_operand" "")
3086 (match_operand:SSEMODEI 2 "nonimmediate_operand" "")))]
3088 "ix86_fixup_binary_operands_no_copy (XOR, <MODE>mode, operands);")
3090 (define_insn "*xor<mode>3"
3091 [(set (match_operand:SSEMODEI 0 "register_operand" "=x")
3093 (match_operand:SSEMODEI 1 "nonimmediate_operand" "%0")
3094 (match_operand:SSEMODEI 2 "nonimmediate_operand" "xm")))]
3095 "TARGET_SSE2 && ix86_binary_operator_ok (XOR, <MODE>mode, operands)"
3096 "pxor\t{%2, %0|%0, %2}"
3097 [(set_attr "type" "sselog")
3098 (set_attr "mode" "TI")])
3100 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
3102 ;; Parallel integral element swizzling
3104 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
3106 (define_insn "sse2_packsswb"
3107 [(set (match_operand:V16QI 0 "register_operand" "=x")
3110 (match_operand:V8HI 1 "register_operand" "0"))
3112 (match_operand:V8HI 2 "nonimmediate_operand" "xm"))))]
3114 "packsswb\t{%2, %0|%0, %2}"
3115 [(set_attr "type" "sselog")
3116 (set_attr "mode" "TI")])
3118 (define_insn "sse2_packssdw"
3119 [(set (match_operand:V8HI 0 "register_operand" "=x")
3122 (match_operand:V4SI 1 "register_operand" "0"))
3124 (match_operand:V4SI 2 "nonimmediate_operand" "xm"))))]
3126 "packssdw\t{%2, %0|%0, %2}"
3127 [(set_attr "type" "sselog")
3128 (set_attr "mode" "TI")])
3130 (define_insn "sse2_packuswb"
3131 [(set (match_operand:V16QI 0 "register_operand" "=x")
3134 (match_operand:V8HI 1 "register_operand" "0"))
3136 (match_operand:V8HI 2 "nonimmediate_operand" "xm"))))]
3138 "packuswb\t{%2, %0|%0, %2}"
3139 [(set_attr "type" "sselog")
3140 (set_attr "mode" "TI")])
3142 (define_insn "sse2_punpckhbw"
3143 [(set (match_operand:V16QI 0 "register_operand" "=x")
3146 (match_operand:V16QI 1 "register_operand" "0")
3147 (match_operand:V16QI 2 "nonimmediate_operand" "xm"))
3148 (parallel [(const_int 8) (const_int 24)
3149 (const_int 9) (const_int 25)
3150 (const_int 10) (const_int 26)
3151 (const_int 11) (const_int 27)
3152 (const_int 12) (const_int 28)
3153 (const_int 13) (const_int 29)
3154 (const_int 14) (const_int 30)
3155 (const_int 15) (const_int 31)])))]
3157 "punpckhbw\t{%2, %0|%0, %2}"
3158 [(set_attr "type" "sselog")
3159 (set_attr "mode" "TI")])
3161 (define_insn "sse2_punpcklbw"
3162 [(set (match_operand:V16QI 0 "register_operand" "=x")
3165 (match_operand:V16QI 1 "register_operand" "0")
3166 (match_operand:V16QI 2 "nonimmediate_operand" "xm"))
3167 (parallel [(const_int 0) (const_int 16)
3168 (const_int 1) (const_int 17)
3169 (const_int 2) (const_int 18)
3170 (const_int 3) (const_int 19)
3171 (const_int 4) (const_int 20)
3172 (const_int 5) (const_int 21)
3173 (const_int 6) (const_int 22)
3174 (const_int 7) (const_int 23)])))]
3176 "punpcklbw\t{%2, %0|%0, %2}"
3177 [(set_attr "type" "sselog")
3178 (set_attr "mode" "TI")])
3180 (define_insn "sse2_punpckhwd"
3181 [(set (match_operand:V8HI 0 "register_operand" "=x")
3184 (match_operand:V8HI 1 "register_operand" "0")
3185 (match_operand:V8HI 2 "nonimmediate_operand" "xm"))
3186 (parallel [(const_int 4) (const_int 12)
3187 (const_int 5) (const_int 13)
3188 (const_int 6) (const_int 14)
3189 (const_int 7) (const_int 15)])))]
3191 "punpckhwd\t{%2, %0|%0, %2}"
3192 [(set_attr "type" "sselog")
3193 (set_attr "mode" "TI")])
3195 (define_insn "sse2_punpcklwd"
3196 [(set (match_operand:V8HI 0 "register_operand" "=x")
3199 (match_operand:V8HI 1 "register_operand" "0")
3200 (match_operand:V8HI 2 "nonimmediate_operand" "xm"))
3201 (parallel [(const_int 0) (const_int 8)
3202 (const_int 1) (const_int 9)
3203 (const_int 2) (const_int 10)
3204 (const_int 3) (const_int 11)])))]
3206 "punpcklwd\t{%2, %0|%0, %2}"
3207 [(set_attr "type" "sselog")
3208 (set_attr "mode" "TI")])
3210 (define_insn "sse2_punpckhdq"
3211 [(set (match_operand:V4SI 0 "register_operand" "=x")
3214 (match_operand:V4SI 1 "register_operand" "0")
3215 (match_operand:V4SI 2 "nonimmediate_operand" "xm"))
3216 (parallel [(const_int 2) (const_int 6)
3217 (const_int 3) (const_int 7)])))]
3219 "punpckhdq\t{%2, %0|%0, %2}"
3220 [(set_attr "type" "sselog")
3221 (set_attr "mode" "TI")])
3223 (define_insn "sse2_punpckldq"
3224 [(set (match_operand:V4SI 0 "register_operand" "=x")
3227 (match_operand:V4SI 1 "register_operand" "0")
3228 (match_operand:V4SI 2 "nonimmediate_operand" "xm"))
3229 (parallel [(const_int 0) (const_int 4)
3230 (const_int 1) (const_int 5)])))]
3232 "punpckldq\t{%2, %0|%0, %2}"
3233 [(set_attr "type" "sselog")
3234 (set_attr "mode" "TI")])
3236 (define_insn "sse2_punpckhqdq"
3237 [(set (match_operand:V2DI 0 "register_operand" "=x")
3240 (match_operand:V2DI 1 "register_operand" "0")
3241 (match_operand:V2DI 2 "nonimmediate_operand" "xm"))
3242 (parallel [(const_int 1)
3245 "punpckhqdq\t{%2, %0|%0, %2}"
3246 [(set_attr "type" "sselog")
3247 (set_attr "mode" "TI")])
3249 (define_insn "sse2_punpcklqdq"
3250 [(set (match_operand:V2DI 0 "register_operand" "=x")
3253 (match_operand:V2DI 1 "register_operand" "0")
3254 (match_operand:V2DI 2 "nonimmediate_operand" "xm"))
3255 (parallel [(const_int 0)
3258 "punpcklqdq\t{%2, %0|%0, %2}"
3259 [(set_attr "type" "sselog")
3260 (set_attr "mode" "TI")])
3262 (define_expand "sse2_pinsrw"
3263 [(set (match_operand:V8HI 0 "register_operand" "")
3266 (match_operand:SI 2 "nonimmediate_operand" ""))
3267 (match_operand:V8HI 1 "register_operand" "")
3268 (match_operand:SI 3 "const_0_to_7_operand" "")))]
3271 operands[2] = gen_lowpart (HImode, operands[2]);
3272 operands[3] = GEN_INT ((1 << INTVAL (operands[3])));
3275 (define_insn "*sse2_pinsrw"
3276 [(set (match_operand:V8HI 0 "register_operand" "=x")
3279 (match_operand:HI 2 "nonimmediate_operand" "rm"))
3280 (match_operand:V8HI 1 "register_operand" "0")
3281 (match_operand:SI 3 "const_pow2_1_to_128_operand" "n")))]
3284 operands[3] = GEN_INT (exact_log2 (INTVAL (operands[3])));
3285 return "pinsrw\t{%3, %k2, %0|%0, %k2, %3}";
3287 [(set_attr "type" "sselog")
3288 (set_attr "mode" "TI")])
3290 (define_insn "sse2_pextrw"
3291 [(set (match_operand:SI 0 "register_operand" "=r")
3294 (match_operand:V8HI 1 "register_operand" "x")
3295 (parallel [(match_operand:SI 2 "const_0_to_7_operand" "n")]))))]
3297 "pextrw\t{%2, %1, %0|%0, %1, %2}"
3298 [(set_attr "type" "sselog")
3299 (set_attr "mode" "TI")])
3301 (define_expand "sse2_pshufd"
3302 [(match_operand:V4SI 0 "register_operand" "")
3303 (match_operand:V4SI 1 "nonimmediate_operand" "")
3304 (match_operand:SI 2 "const_int_operand" "")]
3307 int mask = INTVAL (operands[2]);
3308 emit_insn (gen_sse2_pshufd_1 (operands[0], operands[1],
3309 GEN_INT ((mask >> 0) & 3),
3310 GEN_INT ((mask >> 2) & 3),
3311 GEN_INT ((mask >> 4) & 3),
3312 GEN_INT ((mask >> 6) & 3)));
3316 (define_insn "sse2_pshufd_1"
3317 [(set (match_operand:V4SI 0 "register_operand" "=x")
3319 (match_operand:V4SI 1 "nonimmediate_operand" "xm")
3320 (parallel [(match_operand 2 "const_0_to_3_operand" "")
3321 (match_operand 3 "const_0_to_3_operand" "")
3322 (match_operand 4 "const_0_to_3_operand" "")
3323 (match_operand 5 "const_0_to_3_operand" "")])))]
3327 mask |= INTVAL (operands[2]) << 0;
3328 mask |= INTVAL (operands[3]) << 2;
3329 mask |= INTVAL (operands[4]) << 4;
3330 mask |= INTVAL (operands[5]) << 6;
3331 operands[2] = GEN_INT (mask);
3333 return "pshufd\t{%2, %1, %0|%0, %1, %2}";
3335 [(set_attr "type" "sselog1")
3336 (set_attr "mode" "TI")])
3338 (define_expand "sse2_pshuflw"
3339 [(match_operand:V8HI 0 "register_operand" "")
3340 (match_operand:V8HI 1 "nonimmediate_operand" "")
3341 (match_operand:SI 2 "const_int_operand" "")]
3344 int mask = INTVAL (operands[2]);
3345 emit_insn (gen_sse2_pshuflw_1 (operands[0], operands[1],
3346 GEN_INT ((mask >> 0) & 3),
3347 GEN_INT ((mask >> 2) & 3),
3348 GEN_INT ((mask >> 4) & 3),
3349 GEN_INT ((mask >> 6) & 3)));
3353 (define_insn "sse2_pshuflw_1"
3354 [(set (match_operand:V8HI 0 "register_operand" "=x")
3356 (match_operand:V8HI 1 "nonimmediate_operand" "xm")
3357 (parallel [(match_operand 2 "const_0_to_3_operand" "")
3358 (match_operand 3 "const_0_to_3_operand" "")
3359 (match_operand 4 "const_0_to_3_operand" "")
3360 (match_operand 5 "const_0_to_3_operand" "")
3368 mask |= INTVAL (operands[2]) << 0;
3369 mask |= INTVAL (operands[3]) << 2;
3370 mask |= INTVAL (operands[4]) << 4;
3371 mask |= INTVAL (operands[5]) << 6;
3372 operands[2] = GEN_INT (mask);
3374 return "pshuflw\t{%2, %1, %0|%0, %1, %2}";
3376 [(set_attr "type" "sselog")
3377 (set_attr "mode" "TI")])
3379 (define_expand "sse2_pshufhw"
3380 [(match_operand:V8HI 0 "register_operand" "")
3381 (match_operand:V8HI 1 "nonimmediate_operand" "")
3382 (match_operand:SI 2 "const_int_operand" "")]
3385 int mask = INTVAL (operands[2]);
3386 emit_insn (gen_sse2_pshufhw_1 (operands[0], operands[1],
3387 GEN_INT (((mask >> 0) & 3) + 4),
3388 GEN_INT (((mask >> 2) & 3) + 4),
3389 GEN_INT (((mask >> 4) & 3) + 4),
3390 GEN_INT (((mask >> 6) & 3) + 4)));
3394 (define_insn "sse2_pshufhw_1"
3395 [(set (match_operand:V8HI 0 "register_operand" "=x")
3397 (match_operand:V8HI 1 "nonimmediate_operand" "xm")
3398 (parallel [(const_int 0)
3402 (match_operand 2 "const_4_to_7_operand" "")
3403 (match_operand 3 "const_4_to_7_operand" "")
3404 (match_operand 4 "const_4_to_7_operand" "")
3405 (match_operand 5 "const_4_to_7_operand" "")])))]
3409 mask |= (INTVAL (operands[2]) - 4) << 0;
3410 mask |= (INTVAL (operands[3]) - 4) << 2;
3411 mask |= (INTVAL (operands[4]) - 4) << 4;
3412 mask |= (INTVAL (operands[5]) - 4) << 6;
3413 operands[2] = GEN_INT (mask);
3415 return "pshufhw\t{%2, %1, %0|%0, %1, %2}";
3417 [(set_attr "type" "sselog")
3418 (set_attr "mode" "TI")])
3420 (define_expand "sse2_loadd"
3421 [(set (match_operand:V4SI 0 "register_operand" "")
3424 (match_operand:SI 1 "nonimmediate_operand" ""))
3428 "operands[2] = CONST0_RTX (V4SImode);")
3430 (define_insn "sse2_loadld"
3431 [(set (match_operand:V4SI 0 "register_operand" "=Y,x,x")
3434 (match_operand:SI 2 "nonimmediate_operand" "mr,m,x"))
3435 (match_operand:V4SI 1 "reg_or_0_operand" " C,C,0")
3439 movd\t{%2, %0|%0, %2}
3440 movss\t{%2, %0|%0, %2}
3441 movss\t{%2, %0|%0, %2}"
3442 [(set_attr "type" "ssemov")
3443 (set_attr "mode" "TI,V4SF,SF")])
3445 ;; ??? The hardware supports more, but TARGET_INTER_UNIT_MOVES must
3446 ;; be taken into account, and movdi isn't fully populated even without.
3447 (define_insn_and_split "sse2_stored"
3448 [(set (match_operand:SI 0 "nonimmediate_operand" "=mx")
3450 (match_operand:V4SI 1 "register_operand" "x")
3451 (parallel [(const_int 0)])))]
3454 "&& reload_completed"
3455 [(set (match_dup 0) (match_dup 1))]
3457 operands[1] = gen_rtx_REG (SImode, REGNO (operands[1]));
3460 (define_expand "sse_storeq"
3461 [(set (match_operand:DI 0 "nonimmediate_operand" "")
3463 (match_operand:V2DI 1 "register_operand" "")
3464 (parallel [(const_int 0)])))]
3468 ;; ??? The hardware supports more, but TARGET_INTER_UNIT_MOVES must
3469 ;; be taken into account, and movdi isn't fully populated even without.
3470 (define_insn "*sse2_storeq"
3471 [(set (match_operand:DI 0 "nonimmediate_operand" "=mx")
3473 (match_operand:V2DI 1 "register_operand" "x")
3474 (parallel [(const_int 0)])))]
3479 [(set (match_operand:DI 0 "nonimmediate_operand" "")
3481 (match_operand:V2DI 1 "register_operand" "")
3482 (parallel [(const_int 0)])))]
3483 "TARGET_SSE && reload_completed"
3484 [(set (match_dup 0) (match_dup 1))]
3486 operands[1] = gen_rtx_REG (DImode, REGNO (operands[1]));
3489 (define_insn "*vec_extractv2di_1_sse2"
3490 [(set (match_operand:DI 0 "nonimmediate_operand" "=m,x,x")
3492 (match_operand:V2DI 1 "nonimmediate_operand" "x,0,o")
3493 (parallel [(const_int 1)])))]
3494 "TARGET_SSE2 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
3496 movhps\t{%1, %0|%0, %1}
3497 psrldq\t{$8, %0|%0, 8}
3498 movq\t{%H1, %0|%0, %H1}"
3499 [(set_attr "type" "ssemov,sseishft,ssemov")
3500 (set_attr "memory" "*,none,*")
3501 (set_attr "mode" "V2SF,TI,TI")])
3503 ;; Not sure this is ever used, but it doesn't hurt to have it. -aoliva
3504 (define_insn "*vec_extractv2di_1_sse"
3505 [(set (match_operand:DI 0 "nonimmediate_operand" "=m,x,x")
3507 (match_operand:V2DI 1 "nonimmediate_operand" "x,x,o")
3508 (parallel [(const_int 1)])))]
3509 "!TARGET_SSE2 && TARGET_SSE
3510 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
3512 movhps\t{%1, %0|%0, %1}
3513 movhlps\t{%1, %0|%0, %1}
3514 movlps\t{%H1, %0|%0, %H1}"
3515 [(set_attr "type" "ssemov")
3516 (set_attr "mode" "V2SF,V4SF,V2SF")])
3518 (define_insn "*vec_dupv4si"
3519 [(set (match_operand:V4SI 0 "register_operand" "=Y,x")
3521 (match_operand:SI 1 "register_operand" " Y,0")))]
3524 pshufd\t{$0, %1, %0|%0, %1, 0}
3525 shufps\t{$0, %0, %0|%0, %0, 0}"
3526 [(set_attr "type" "sselog1")
3527 (set_attr "mode" "TI,V4SF")])
3529 (define_insn "*vec_dupv2di"
3530 [(set (match_operand:V2DI 0 "register_operand" "=Y,x")
3532 (match_operand:DI 1 "register_operand" " 0,0")))]
3537 [(set_attr "type" "sselog1,ssemov")
3538 (set_attr "mode" "TI,V4SF")])
3540 ;; ??? In theory we can match memory for the MMX alternative, but allowing
3541 ;; nonimmediate_operand for operand 2 and *not* allowing memory for the SSE
3542 ;; alternatives pretty much forces the MMX alternative to be chosen.
3543 (define_insn "*sse2_concatv2si"
3544 [(set (match_operand:V2SI 0 "register_operand" "=Y, Y,*y,*y")
3546 (match_operand:SI 1 "nonimmediate_operand" " 0,rm, 0,rm")
3547 (match_operand:SI 2 "reg_or_0_operand" " Y, C,*y, C")))]
3550 punpckldq\t{%2, %0|%0, %2}
3551 movd\t{%1, %0|%0, %1}
3552 punpckldq\t{%2, %0|%0, %2}
3553 movd\t{%1, %0|%0, %1}"
3554 [(set_attr "type" "sselog,ssemov,mmxcvt,mmxmov")
3555 (set_attr "mode" "TI,TI,DI,DI")])
3557 (define_insn "*sse1_concatv2si"
3558 [(set (match_operand:V2SI 0 "register_operand" "=x,x,*y,*y")
3560 (match_operand:SI 1 "nonimmediate_operand" " 0,m, 0,*rm")
3561 (match_operand:SI 2 "reg_or_0_operand" " x,C,*y,C")))]
3564 unpcklps\t{%2, %0|%0, %2}
3565 movss\t{%1, %0|%0, %1}
3566 punpckldq\t{%2, %0|%0, %2}
3567 movd\t{%1, %0|%0, %1}"
3568 [(set_attr "type" "sselog,ssemov,mmxcvt,mmxmov")
3569 (set_attr "mode" "V4SF,V4SF,DI,DI")])
3571 (define_insn "*vec_concatv4si_1"
3572 [(set (match_operand:V4SI 0 "register_operand" "=Y,x,x")
3574 (match_operand:V2SI 1 "register_operand" " 0,0,0")
3575 (match_operand:V2SI 2 "nonimmediate_operand" " Y,x,m")))]
3578 punpcklqdq\t{%2, %0|%0, %2}
3579 movlhps\t{%2, %0|%0, %2}
3580 movhps\t{%2, %0|%0, %2}"
3581 [(set_attr "type" "sselog,ssemov,ssemov")
3582 (set_attr "mode" "TI,V4SF,V2SF")])
3584 (define_insn "*vec_concatv2di"
3585 [(set (match_operand:V2DI 0 "register_operand" "=Y,?Y,Y,x,x,x")
3587 (match_operand:DI 1 "nonimmediate_operand" " m,*y,0,0,0,m")
3588 (match_operand:DI 2 "vector_move_operand" " C, C,Y,x,m,0")))]
3591 movq\t{%1, %0|%0, %1}
3592 movq2dq\t{%1, %0|%0, %1}
3593 punpcklqdq\t{%2, %0|%0, %2}
3594 movlhps\t{%2, %0|%0, %2}
3595 movhps\t{%2, %0|%0, %2}
3596 movlps\t{%1, %0|%0, %1}"
3597 [(set_attr "type" "ssemov,ssemov,sselog,ssemov,ssemov,ssemov")
3598 (set_attr "mode" "TI,TI,TI,V4SF,V2SF,V2SF")])
3600 (define_expand "vec_setv2di"
3601 [(match_operand:V2DI 0 "register_operand" "")
3602 (match_operand:DI 1 "register_operand" "")
3603 (match_operand 2 "const_int_operand" "")]
3606 ix86_expand_vector_set (false, operands[0], operands[1],
3607 INTVAL (operands[2]));
3611 (define_expand "vec_extractv2di"
3612 [(match_operand:DI 0 "register_operand" "")
3613 (match_operand:V2DI 1 "register_operand" "")
3614 (match_operand 2 "const_int_operand" "")]
3617 ix86_expand_vector_extract (false, operands[0], operands[1],
3618 INTVAL (operands[2]));
3622 (define_expand "vec_initv2di"
3623 [(match_operand:V2DI 0 "register_operand" "")
3624 (match_operand 1 "" "")]
3627 ix86_expand_vector_init (false, operands[0], operands[1]);
3631 (define_expand "vec_setv4si"
3632 [(match_operand:V4SI 0 "register_operand" "")
3633 (match_operand:SI 1 "register_operand" "")
3634 (match_operand 2 "const_int_operand" "")]
3637 ix86_expand_vector_set (false, operands[0], operands[1],
3638 INTVAL (operands[2]));
3642 (define_expand "vec_extractv4si"
3643 [(match_operand:SI 0 "register_operand" "")
3644 (match_operand:V4SI 1 "register_operand" "")
3645 (match_operand 2 "const_int_operand" "")]
3648 ix86_expand_vector_extract (false, operands[0], operands[1],
3649 INTVAL (operands[2]));
3653 (define_expand "vec_initv4si"
3654 [(match_operand:V4SI 0 "register_operand" "")
3655 (match_operand 1 "" "")]
3658 ix86_expand_vector_init (false, operands[0], operands[1]);
3662 (define_expand "vec_setv8hi"
3663 [(match_operand:V8HI 0 "register_operand" "")
3664 (match_operand:HI 1 "register_operand" "")
3665 (match_operand 2 "const_int_operand" "")]
3668 ix86_expand_vector_set (false, operands[0], operands[1],
3669 INTVAL (operands[2]));
3673 (define_expand "vec_extractv8hi"
3674 [(match_operand:HI 0 "register_operand" "")
3675 (match_operand:V8HI 1 "register_operand" "")
3676 (match_operand 2 "const_int_operand" "")]
3679 ix86_expand_vector_extract (false, operands[0], operands[1],
3680 INTVAL (operands[2]));
3684 (define_expand "vec_initv8hi"
3685 [(match_operand:V8HI 0 "register_operand" "")
3686 (match_operand 1 "" "")]
3689 ix86_expand_vector_init (false, operands[0], operands[1]);
3693 (define_expand "vec_setv16qi"
3694 [(match_operand:V16QI 0 "register_operand" "")
3695 (match_operand:QI 1 "register_operand" "")
3696 (match_operand 2 "const_int_operand" "")]
3699 ix86_expand_vector_set (false, operands[0], operands[1],
3700 INTVAL (operands[2]));
3704 (define_expand "vec_extractv16qi"
3705 [(match_operand:QI 0 "register_operand" "")
3706 (match_operand:V16QI 1 "register_operand" "")
3707 (match_operand 2 "const_int_operand" "")]
3710 ix86_expand_vector_extract (false, operands[0], operands[1],
3711 INTVAL (operands[2]));
3715 (define_expand "vec_initv16qi"
3716 [(match_operand:V16QI 0 "register_operand" "")
3717 (match_operand 1 "" "")]
3720 ix86_expand_vector_init (false, operands[0], operands[1]);
3724 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
3728 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
3730 (define_insn "sse2_uavgv16qi3"
3731 [(set (match_operand:V16QI 0 "register_operand" "=x")
3737 (match_operand:V16QI 1 "nonimmediate_operand" "%0"))
3739 (match_operand:V16QI 2 "nonimmediate_operand" "xm")))
3740 (const_vector:V16QI [(const_int 1) (const_int 1)
3741 (const_int 1) (const_int 1)
3742 (const_int 1) (const_int 1)
3743 (const_int 1) (const_int 1)
3744 (const_int 1) (const_int 1)
3745 (const_int 1) (const_int 1)
3746 (const_int 1) (const_int 1)
3747 (const_int 1) (const_int 1)]))
3749 "TARGET_SSE2 && ix86_binary_operator_ok (PLUS, V16QImode, operands)"
3750 "pavgb\t{%2, %0|%0, %2}"
3751 [(set_attr "type" "sseiadd")
3752 (set_attr "mode" "TI")])
3754 (define_insn "sse2_uavgv8hi3"
3755 [(set (match_operand:V8HI 0 "register_operand" "=x")
3761 (match_operand:V8HI 1 "nonimmediate_operand" "%0"))
3763 (match_operand:V8HI 2 "nonimmediate_operand" "xm")))
3764 (const_vector:V8HI [(const_int 1) (const_int 1)
3765 (const_int 1) (const_int 1)
3766 (const_int 1) (const_int 1)
3767 (const_int 1) (const_int 1)]))
3769 "TARGET_SSE2 && ix86_binary_operator_ok (PLUS, V8HImode, operands)"
3770 "pavgw\t{%2, %0|%0, %2}"
3771 [(set_attr "type" "sseiadd")
3772 (set_attr "mode" "TI")])
3774 ;; The correct representation for this is absolutely enormous, and
3775 ;; surely not generally useful.
3776 (define_insn "sse2_psadbw"
3777 [(set (match_operand:V2DI 0 "register_operand" "=x")
3778 (unspec:V2DI [(match_operand:V16QI 1 "register_operand" "0")
3779 (match_operand:V16QI 2 "nonimmediate_operand" "xm")]
3782 "psadbw\t{%2, %0|%0, %2}"
3783 [(set_attr "type" "sseiadd")
3784 (set_attr "mode" "TI")])
3786 (define_insn "sse_movmskps"
3787 [(set (match_operand:SI 0 "register_operand" "=r")
3788 (unspec:SI [(match_operand:V4SF 1 "register_operand" "x")]
3791 "movmskps\t{%1, %0|%0, %1}"
3792 [(set_attr "type" "ssecvt")
3793 (set_attr "mode" "V4SF")])
3795 (define_insn "sse2_movmskpd"
3796 [(set (match_operand:SI 0 "register_operand" "=r")
3797 (unspec:SI [(match_operand:V2DF 1 "register_operand" "x")]
3800 "movmskpd\t{%1, %0|%0, %1}"
3801 [(set_attr "type" "ssecvt")
3802 (set_attr "mode" "V2DF")])
3804 (define_insn "sse2_pmovmskb"
3805 [(set (match_operand:SI 0 "register_operand" "=r")
3806 (unspec:SI [(match_operand:V16QI 1 "register_operand" "x")]
3809 "pmovmskb\t{%1, %0|%0, %1}"
3810 [(set_attr "type" "ssecvt")
3811 (set_attr "mode" "V2DF")])
3813 (define_expand "sse2_maskmovdqu"
3814 [(set (match_operand:V16QI 0 "memory_operand" "")
3815 (unspec:V16QI [(match_operand:V16QI 1 "register_operand" "x")
3816 (match_operand:V16QI 2 "register_operand" "x")
3822 (define_insn "*sse2_maskmovdqu"
3823 [(set (mem:V16QI (match_operand:SI 0 "register_operand" "D"))
3824 (unspec:V16QI [(match_operand:V16QI 1 "register_operand" "x")
3825 (match_operand:V16QI 2 "register_operand" "x")
3826 (mem:V16QI (match_dup 0))]
3828 "TARGET_SSE2 && !TARGET_64BIT"
3829 ;; @@@ check ordering of operands in intel/nonintel syntax
3830 "maskmovdqu\t{%2, %1|%1, %2}"
3831 [(set_attr "type" "ssecvt")
3832 (set_attr "mode" "TI")])
3834 (define_insn "*sse2_maskmovdqu_rex64"
3835 [(set (mem:V16QI (match_operand:DI 0 "register_operand" "D"))
3836 (unspec:V16QI [(match_operand:V16QI 1 "register_operand" "x")
3837 (match_operand:V16QI 2 "register_operand" "x")
3838 (mem:V16QI (match_dup 0))]
3840 "TARGET_SSE2 && TARGET_64BIT"
3841 ;; @@@ check ordering of operands in intel/nonintel syntax
3842 "maskmovdqu\t{%2, %1|%1, %2}"
3843 [(set_attr "type" "ssecvt")
3844 (set_attr "mode" "TI")])
3846 (define_insn "sse_ldmxcsr"
3847 [(unspec_volatile [(match_operand:SI 0 "memory_operand" "m")]
3851 [(set_attr "type" "sse")
3852 (set_attr "memory" "load")])
3854 (define_insn "sse_stmxcsr"
3855 [(set (match_operand:SI 0 "memory_operand" "=m")
3856 (unspec_volatile:SI [(const_int 0)] UNSPECV_STMXCSR))]
3859 [(set_attr "type" "sse")
3860 (set_attr "memory" "store")])
3862 (define_expand "sse_sfence"
3864 (unspec:BLK [(match_dup 0)] UNSPEC_SFENCE))]
3865 "TARGET_SSE || TARGET_3DNOW_A"
3867 operands[0] = gen_rtx_MEM (BLKmode, gen_rtx_SCRATCH (Pmode));
3868 MEM_VOLATILE_P (operands[0]) = 1;
3871 (define_insn "*sse_sfence"
3872 [(set (match_operand:BLK 0 "" "")
3873 (unspec:BLK [(match_dup 0)] UNSPEC_SFENCE))]
3874 "TARGET_SSE || TARGET_3DNOW_A"
3876 [(set_attr "type" "sse")
3877 (set_attr "memory" "unknown")])
3879 (define_insn "sse2_clflush"
3880 [(unspec_volatile [(match_operand 0 "address_operand" "p")]
3884 [(set_attr "type" "sse")
3885 (set_attr "memory" "unknown")])
3887 (define_expand "sse2_mfence"
3889 (unspec:BLK [(match_dup 0)] UNSPEC_MFENCE))]
3892 operands[0] = gen_rtx_MEM (BLKmode, gen_rtx_SCRATCH (Pmode));
3893 MEM_VOLATILE_P (operands[0]) = 1;
3896 (define_insn "*sse2_mfence"
3897 [(set (match_operand:BLK 0 "" "")
3898 (unspec:BLK [(match_dup 0)] UNSPEC_MFENCE))]
3901 [(set_attr "type" "sse")
3902 (set_attr "memory" "unknown")])
3904 (define_expand "sse2_lfence"
3906 (unspec:BLK [(match_dup 0)] UNSPEC_LFENCE))]
3909 operands[0] = gen_rtx_MEM (BLKmode, gen_rtx_SCRATCH (Pmode));
3910 MEM_VOLATILE_P (operands[0]) = 1;
3913 (define_insn "*sse2_lfence"
3914 [(set (match_operand:BLK 0 "" "")
3915 (unspec:BLK [(match_dup 0)] UNSPEC_LFENCE))]
3918 [(set_attr "type" "sse")
3919 (set_attr "memory" "unknown")])
3921 (define_insn "sse3_mwait"
3922 [(unspec_volatile [(match_operand:SI 0 "register_operand" "a")
3923 (match_operand:SI 1 "register_operand" "c")]
3926 ;; 64bit version is "mwait %rax,%rcx". But only lower 32bits are used.
3927 ;; Since 32bit register operands are implicitly zero extended to 64bit,
3928 ;; we only need to set up 32bit registers.
3930 [(set_attr "length" "3")])
3932 (define_insn "sse3_monitor"
3933 [(unspec_volatile [(match_operand:SI 0 "register_operand" "a")
3934 (match_operand:SI 1 "register_operand" "c")
3935 (match_operand:SI 2 "register_operand" "d")]
3937 "TARGET_SSE3 && !TARGET_64BIT"
3938 "monitor\t%0, %1, %2"
3939 [(set_attr "length" "3")])
3941 (define_insn "sse3_monitor64"
3942 [(unspec_volatile [(match_operand:DI 0 "register_operand" "a")
3943 (match_operand:SI 1 "register_operand" "c")
3944 (match_operand:SI 2 "register_operand" "d")]
3946 "TARGET_SSE3 && TARGET_64BIT"
3947 ;; 64bit version is "monitor %rax,%rcx,%rdx". But only lower 32bits in
3948 ;; RCX and RDX are used. Since 32bit register operands are implicitly
3949 ;; zero extended to 64bit, we only need to set up 32bit registers.
3951 [(set_attr "length" "3")])