1 ;; GCC machine description for SSE instructions
2 ;; Copyright (C) 2005, 2006, 2007
3 ;; Free Software Foundation, Inc.
5 ;; This file is part of GCC.
7 ;; GCC is free software; you can redistribute it and/or modify
8 ;; it under the terms of the GNU General Public License as published by
9 ;; the Free Software Foundation; either version 2, or (at your option)
12 ;; GCC is distributed in the hope that it will be useful,
13 ;; but WITHOUT ANY WARRANTY; without even the implied warranty of
14 ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 ;; GNU General Public License for more details.
17 ;; You should have received a copy of the GNU General Public License
18 ;; along with GCC; see the file COPYING. If not, write to
19 ;; the Free Software Foundation, 51 Franklin Street, Fifth Floor,
20 ;; Boston, MA 02110-1301, USA.
23 ;; 16 byte integral modes handled by SSE, minus TImode, which gets
24 ;; special-cased for TARGET_64BIT.
25 (define_mode_macro SSEMODEI [V16QI V8HI V4SI V2DI])
27 ;; All 16-byte vector modes handled by SSE
28 (define_mode_macro SSEMODE [V16QI V8HI V4SI V2DI V4SF V2DF])
31 (define_mode_macro SSEMODE12 [V16QI V8HI])
32 (define_mode_macro SSEMODE24 [V8HI V4SI])
33 (define_mode_macro SSEMODE14 [V16QI V4SI])
34 (define_mode_macro SSEMODE124 [V16QI V8HI V4SI])
35 (define_mode_macro SSEMODE248 [V8HI V4SI V2DI])
37 ;; Mapping from integer vector mode to mnemonic suffix
38 (define_mode_attr ssevecsize [(V16QI "b") (V8HI "w") (V4SI "d") (V2DI "q")])
40 ;; Patterns whose name begins with "sse{,2,3}_" are invoked by intrinsics.
42 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
46 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
48 ;; All of these patterns are enabled for SSE1 as well as SSE2.
49 ;; This is essential for maintaining stable calling conventions.
51 (define_expand "mov<mode>"
52 [(set (match_operand:SSEMODEI 0 "nonimmediate_operand" "")
53 (match_operand:SSEMODEI 1 "nonimmediate_operand" ""))]
56 ix86_expand_vector_move (<MODE>mode, operands);
60 (define_insn "*mov<mode>_internal"
61 [(set (match_operand:SSEMODEI 0 "nonimmediate_operand" "=x,x ,m")
62 (match_operand:SSEMODEI 1 "nonimmediate_or_sse_const_operand" "C ,xm,x"))]
63 "TARGET_SSE && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
65 switch (which_alternative)
68 return standard_sse_constant_opcode (insn, operands[1]);
71 if (get_attr_mode (insn) == MODE_V4SF)
72 return "movaps\t{%1, %0|%0, %1}";
74 return "movdqa\t{%1, %0|%0, %1}";
79 [(set_attr "type" "sselog1,ssemov,ssemov")
82 (ior (ior (ne (symbol_ref "optimize_size") (const_int 0))
83 (eq (symbol_ref "TARGET_SSE2") (const_int 0)))
84 (and (eq_attr "alternative" "2")
85 (ne (symbol_ref "TARGET_SSE_TYPELESS_STORES")
88 (const_string "TI")))])
90 (define_expand "movv4sf"
91 [(set (match_operand:V4SF 0 "nonimmediate_operand" "")
92 (match_operand:V4SF 1 "nonimmediate_operand" ""))]
95 ix86_expand_vector_move (V4SFmode, operands);
99 (define_insn "*movv4sf_internal"
100 [(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,x,m")
101 (match_operand:V4SF 1 "nonimmediate_or_sse_const_operand" "C,xm,x"))]
104 switch (which_alternative)
107 return standard_sse_constant_opcode (insn, operands[1]);
110 return "movaps\t{%1, %0|%0, %1}";
115 [(set_attr "type" "sselog1,ssemov,ssemov")
116 (set_attr "mode" "V4SF")])
119 [(set (match_operand:V4SF 0 "register_operand" "")
120 (match_operand:V4SF 1 "zero_extended_scalar_load_operand" ""))]
121 "TARGET_SSE && reload_completed"
124 (vec_duplicate:V4SF (match_dup 1))
128 operands[1] = simplify_gen_subreg (SFmode, operands[1], V4SFmode, 0);
129 operands[2] = CONST0_RTX (V4SFmode);
132 (define_expand "movv2df"
133 [(set (match_operand:V2DF 0 "nonimmediate_operand" "")
134 (match_operand:V2DF 1 "nonimmediate_operand" ""))]
137 ix86_expand_vector_move (V2DFmode, operands);
141 (define_insn "*movv2df_internal"
142 [(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,x,m")
143 (match_operand:V2DF 1 "nonimmediate_or_sse_const_operand" "C,xm,x"))]
144 "TARGET_SSE && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
146 switch (which_alternative)
149 return standard_sse_constant_opcode (insn, operands[1]);
152 if (get_attr_mode (insn) == MODE_V4SF)
153 return "movaps\t{%1, %0|%0, %1}";
155 return "movapd\t{%1, %0|%0, %1}";
160 [(set_attr "type" "sselog1,ssemov,ssemov")
163 (ior (ior (ne (symbol_ref "optimize_size") (const_int 0))
164 (eq (symbol_ref "TARGET_SSE2") (const_int 0)))
165 (and (eq_attr "alternative" "2")
166 (ne (symbol_ref "TARGET_SSE_TYPELESS_STORES")
168 (const_string "V4SF")
169 (const_string "V2DF")))])
172 [(set (match_operand:V2DF 0 "register_operand" "")
173 (match_operand:V2DF 1 "zero_extended_scalar_load_operand" ""))]
174 "TARGET_SSE2 && reload_completed"
175 [(set (match_dup 0) (vec_concat:V2DF (match_dup 1) (match_dup 2)))]
177 operands[1] = simplify_gen_subreg (DFmode, operands[1], V2DFmode, 0);
178 operands[2] = CONST0_RTX (DFmode);
181 (define_expand "push<mode>1"
182 [(match_operand:SSEMODE 0 "register_operand" "")]
185 ix86_expand_push (<MODE>mode, operands[0]);
189 (define_expand "movmisalign<mode>"
190 [(set (match_operand:SSEMODE 0 "nonimmediate_operand" "")
191 (match_operand:SSEMODE 1 "nonimmediate_operand" ""))]
194 ix86_expand_vector_move_misalign (<MODE>mode, operands);
198 (define_insn "sse_movups"
199 [(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,m")
200 (unspec:V4SF [(match_operand:V4SF 1 "nonimmediate_operand" "xm,x")]
202 "TARGET_SSE && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
203 "movups\t{%1, %0|%0, %1}"
204 [(set_attr "type" "ssemov")
205 (set_attr "mode" "V2DF")])
207 (define_insn "sse2_movupd"
208 [(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,m")
209 (unspec:V2DF [(match_operand:V2DF 1 "nonimmediate_operand" "xm,x")]
211 "TARGET_SSE2 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
212 "movupd\t{%1, %0|%0, %1}"
213 [(set_attr "type" "ssemov")
214 (set_attr "mode" "V2DF")])
216 (define_insn "sse2_movdqu"
217 [(set (match_operand:V16QI 0 "nonimmediate_operand" "=x,m")
218 (unspec:V16QI [(match_operand:V16QI 1 "nonimmediate_operand" "xm,x")]
220 "TARGET_SSE2 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
221 "movdqu\t{%1, %0|%0, %1}"
222 [(set_attr "type" "ssemov")
223 (set_attr "mode" "TI")])
225 (define_insn "sse_movntv4sf"
226 [(set (match_operand:V4SF 0 "memory_operand" "=m")
227 (unspec:V4SF [(match_operand:V4SF 1 "register_operand" "x")]
230 "movntps\t{%1, %0|%0, %1}"
231 [(set_attr "type" "ssemov")
232 (set_attr "mode" "V4SF")])
234 (define_insn "sse2_movntv2df"
235 [(set (match_operand:V2DF 0 "memory_operand" "=m")
236 (unspec:V2DF [(match_operand:V2DF 1 "register_operand" "x")]
239 "movntpd\t{%1, %0|%0, %1}"
240 [(set_attr "type" "ssecvt")
241 (set_attr "mode" "V2DF")])
243 (define_insn "sse2_movntv2di"
244 [(set (match_operand:V2DI 0 "memory_operand" "=m")
245 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "x")]
248 "movntdq\t{%1, %0|%0, %1}"
249 [(set_attr "type" "ssecvt")
250 (set_attr "mode" "TI")])
252 (define_insn "sse2_movntsi"
253 [(set (match_operand:SI 0 "memory_operand" "=m")
254 (unspec:SI [(match_operand:SI 1 "register_operand" "r")]
257 "movnti\t{%1, %0|%0, %1}"
258 [(set_attr "type" "ssecvt")
259 (set_attr "mode" "V2DF")])
261 (define_insn "sse3_lddqu"
262 [(set (match_operand:V16QI 0 "register_operand" "=x")
263 (unspec:V16QI [(match_operand:V16QI 1 "memory_operand" "m")]
266 "lddqu\t{%1, %0|%0, %1}"
267 [(set_attr "type" "ssecvt")
268 (set_attr "mode" "TI")])
270 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
272 ;; Parallel single-precision floating point arithmetic
274 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
276 (define_expand "negv4sf2"
277 [(set (match_operand:V4SF 0 "register_operand" "")
278 (neg:V4SF (match_operand:V4SF 1 "nonimmediate_operand" "")))]
280 "ix86_expand_fp_absneg_operator (NEG, V4SFmode, operands); DONE;")
282 (define_expand "absv4sf2"
283 [(set (match_operand:V4SF 0 "register_operand" "")
284 (abs:V4SF (match_operand:V4SF 1 "nonimmediate_operand" "")))]
286 "ix86_expand_fp_absneg_operator (ABS, V4SFmode, operands); DONE;")
288 (define_expand "addv4sf3"
289 [(set (match_operand:V4SF 0 "register_operand" "")
290 (plus:V4SF (match_operand:V4SF 1 "nonimmediate_operand" "")
291 (match_operand:V4SF 2 "nonimmediate_operand" "")))]
293 "ix86_fixup_binary_operands_no_copy (PLUS, V4SFmode, operands);")
295 (define_insn "*addv4sf3"
296 [(set (match_operand:V4SF 0 "register_operand" "=x")
297 (plus:V4SF (match_operand:V4SF 1 "nonimmediate_operand" "%0")
298 (match_operand:V4SF 2 "nonimmediate_operand" "xm")))]
299 "TARGET_SSE && ix86_binary_operator_ok (PLUS, V4SFmode, operands)"
300 "addps\t{%2, %0|%0, %2}"
301 [(set_attr "type" "sseadd")
302 (set_attr "mode" "V4SF")])
304 (define_insn "sse_vmaddv4sf3"
305 [(set (match_operand:V4SF 0 "register_operand" "=x")
307 (plus:V4SF (match_operand:V4SF 1 "register_operand" "0")
308 (match_operand:V4SF 2 "nonimmediate_operand" "xm"))
311 "TARGET_SSE && ix86_binary_operator_ok (PLUS, V4SFmode, operands)"
312 "addss\t{%2, %0|%0, %2}"
313 [(set_attr "type" "sseadd")
314 (set_attr "mode" "SF")])
316 (define_expand "subv4sf3"
317 [(set (match_operand:V4SF 0 "register_operand" "")
318 (minus:V4SF (match_operand:V4SF 1 "register_operand" "")
319 (match_operand:V4SF 2 "nonimmediate_operand" "")))]
321 "ix86_fixup_binary_operands_no_copy (MINUS, V4SFmode, operands);")
323 (define_insn "*subv4sf3"
324 [(set (match_operand:V4SF 0 "register_operand" "=x")
325 (minus:V4SF (match_operand:V4SF 1 "register_operand" "0")
326 (match_operand:V4SF 2 "nonimmediate_operand" "xm")))]
328 "subps\t{%2, %0|%0, %2}"
329 [(set_attr "type" "sseadd")
330 (set_attr "mode" "V4SF")])
332 (define_insn "sse_vmsubv4sf3"
333 [(set (match_operand:V4SF 0 "register_operand" "=x")
335 (minus:V4SF (match_operand:V4SF 1 "register_operand" "0")
336 (match_operand:V4SF 2 "nonimmediate_operand" "xm"))
340 "subss\t{%2, %0|%0, %2}"
341 [(set_attr "type" "sseadd")
342 (set_attr "mode" "SF")])
344 (define_expand "mulv4sf3"
345 [(set (match_operand:V4SF 0 "register_operand" "")
346 (mult:V4SF (match_operand:V4SF 1 "nonimmediate_operand" "")
347 (match_operand:V4SF 2 "nonimmediate_operand" "")))]
349 "ix86_fixup_binary_operands_no_copy (MULT, V4SFmode, operands);")
351 (define_insn "*mulv4sf3"
352 [(set (match_operand:V4SF 0 "register_operand" "=x")
353 (mult:V4SF (match_operand:V4SF 1 "nonimmediate_operand" "%0")
354 (match_operand:V4SF 2 "nonimmediate_operand" "xm")))]
355 "TARGET_SSE && ix86_binary_operator_ok (MULT, V4SFmode, operands)"
356 "mulps\t{%2, %0|%0, %2}"
357 [(set_attr "type" "ssemul")
358 (set_attr "mode" "V4SF")])
360 (define_insn "sse_vmmulv4sf3"
361 [(set (match_operand:V4SF 0 "register_operand" "=x")
363 (mult:V4SF (match_operand:V4SF 1 "register_operand" "0")
364 (match_operand:V4SF 2 "nonimmediate_operand" "xm"))
367 "TARGET_SSE && ix86_binary_operator_ok (MULT, V4SFmode, operands)"
368 "mulss\t{%2, %0|%0, %2}"
369 [(set_attr "type" "ssemul")
370 (set_attr "mode" "SF")])
372 (define_expand "divv4sf3"
373 [(set (match_operand:V4SF 0 "register_operand" "")
374 (div:V4SF (match_operand:V4SF 1 "register_operand" "")
375 (match_operand:V4SF 2 "nonimmediate_operand" "")))]
377 "ix86_fixup_binary_operands_no_copy (DIV, V4SFmode, operands);")
379 (define_insn "*divv4sf3"
380 [(set (match_operand:V4SF 0 "register_operand" "=x")
381 (div:V4SF (match_operand:V4SF 1 "register_operand" "0")
382 (match_operand:V4SF 2 "nonimmediate_operand" "xm")))]
384 "divps\t{%2, %0|%0, %2}"
385 [(set_attr "type" "ssediv")
386 (set_attr "mode" "V4SF")])
388 (define_insn "sse_vmdivv4sf3"
389 [(set (match_operand:V4SF 0 "register_operand" "=x")
391 (div:V4SF (match_operand:V4SF 1 "register_operand" "0")
392 (match_operand:V4SF 2 "nonimmediate_operand" "xm"))
396 "divss\t{%2, %0|%0, %2}"
397 [(set_attr "type" "ssediv")
398 (set_attr "mode" "SF")])
400 (define_insn "sse_rcpv4sf2"
401 [(set (match_operand:V4SF 0 "register_operand" "=x")
403 [(match_operand:V4SF 1 "nonimmediate_operand" "xm")] UNSPEC_RCP))]
405 "rcpps\t{%1, %0|%0, %1}"
406 [(set_attr "type" "sse")
407 (set_attr "mode" "V4SF")])
409 (define_insn "sse_vmrcpv4sf2"
410 [(set (match_operand:V4SF 0 "register_operand" "=x")
412 (unspec:V4SF [(match_operand:V4SF 1 "nonimmediate_operand" "xm")]
414 (match_operand:V4SF 2 "register_operand" "0")
417 "rcpss\t{%1, %0|%0, %1}"
418 [(set_attr "type" "sse")
419 (set_attr "mode" "SF")])
421 (define_insn "sse_rsqrtv4sf2"
422 [(set (match_operand:V4SF 0 "register_operand" "=x")
424 [(match_operand:V4SF 1 "nonimmediate_operand" "xm")] UNSPEC_RSQRT))]
426 "rsqrtps\t{%1, %0|%0, %1}"
427 [(set_attr "type" "sse")
428 (set_attr "mode" "V4SF")])
430 (define_insn "sse_vmrsqrtv4sf2"
431 [(set (match_operand:V4SF 0 "register_operand" "=x")
433 (unspec:V4SF [(match_operand:V4SF 1 "nonimmediate_operand" "xm")]
435 (match_operand:V4SF 2 "register_operand" "0")
438 "rsqrtss\t{%1, %0|%0, %1}"
439 [(set_attr "type" "sse")
440 (set_attr "mode" "SF")])
442 (define_insn "sqrtv4sf2"
443 [(set (match_operand:V4SF 0 "register_operand" "=x")
444 (sqrt:V4SF (match_operand:V4SF 1 "nonimmediate_operand" "xm")))]
446 "sqrtps\t{%1, %0|%0, %1}"
447 [(set_attr "type" "sse")
448 (set_attr "mode" "V4SF")])
450 (define_insn "sse_vmsqrtv4sf2"
451 [(set (match_operand:V4SF 0 "register_operand" "=x")
453 (sqrt:V4SF (match_operand:V4SF 1 "nonimmediate_operand" "xm"))
454 (match_operand:V4SF 2 "register_operand" "0")
457 "sqrtss\t{%1, %0|%0, %1}"
458 [(set_attr "type" "sse")
459 (set_attr "mode" "SF")])
461 ;; ??? For !flag_finite_math_only, the representation with SMIN/SMAX
462 ;; isn't really correct, as those rtl operators aren't defined when
463 ;; applied to NaNs. Hopefully the optimizers won't get too smart on us.
465 (define_expand "smaxv4sf3"
466 [(set (match_operand:V4SF 0 "register_operand" "")
467 (smax:V4SF (match_operand:V4SF 1 "nonimmediate_operand" "")
468 (match_operand:V4SF 2 "nonimmediate_operand" "")))]
471 if (!flag_finite_math_only)
472 operands[1] = force_reg (V4SFmode, operands[1]);
473 ix86_fixup_binary_operands_no_copy (SMAX, V4SFmode, operands);
476 (define_insn "*smaxv4sf3_finite"
477 [(set (match_operand:V4SF 0 "register_operand" "=x")
478 (smax:V4SF (match_operand:V4SF 1 "nonimmediate_operand" "%0")
479 (match_operand:V4SF 2 "nonimmediate_operand" "xm")))]
480 "TARGET_SSE && flag_finite_math_only
481 && ix86_binary_operator_ok (SMAX, V4SFmode, operands)"
482 "maxps\t{%2, %0|%0, %2}"
483 [(set_attr "type" "sse")
484 (set_attr "mode" "V4SF")])
486 (define_insn "*smaxv4sf3"
487 [(set (match_operand:V4SF 0 "register_operand" "=x")
488 (smax:V4SF (match_operand:V4SF 1 "register_operand" "0")
489 (match_operand:V4SF 2 "nonimmediate_operand" "xm")))]
491 "maxps\t{%2, %0|%0, %2}"
492 [(set_attr "type" "sse")
493 (set_attr "mode" "V4SF")])
495 (define_insn "sse_vmsmaxv4sf3"
496 [(set (match_operand:V4SF 0 "register_operand" "=x")
498 (smax:V4SF (match_operand:V4SF 1 "register_operand" "0")
499 (match_operand:V4SF 2 "nonimmediate_operand" "xm"))
503 "maxss\t{%2, %0|%0, %2}"
504 [(set_attr "type" "sse")
505 (set_attr "mode" "SF")])
507 (define_expand "sminv4sf3"
508 [(set (match_operand:V4SF 0 "register_operand" "")
509 (smin:V4SF (match_operand:V4SF 1 "nonimmediate_operand" "")
510 (match_operand:V4SF 2 "nonimmediate_operand" "")))]
513 if (!flag_finite_math_only)
514 operands[1] = force_reg (V4SFmode, operands[1]);
515 ix86_fixup_binary_operands_no_copy (SMIN, V4SFmode, operands);
518 (define_insn "*sminv4sf3_finite"
519 [(set (match_operand:V4SF 0 "register_operand" "=x")
520 (smin:V4SF (match_operand:V4SF 1 "nonimmediate_operand" "%0")
521 (match_operand:V4SF 2 "nonimmediate_operand" "xm")))]
522 "TARGET_SSE && flag_finite_math_only
523 && ix86_binary_operator_ok (SMIN, V4SFmode, operands)"
524 "minps\t{%2, %0|%0, %2}"
525 [(set_attr "type" "sse")
526 (set_attr "mode" "V4SF")])
528 (define_insn "*sminv4sf3"
529 [(set (match_operand:V4SF 0 "register_operand" "=x")
530 (smin:V4SF (match_operand:V4SF 1 "register_operand" "0")
531 (match_operand:V4SF 2 "nonimmediate_operand" "xm")))]
533 "minps\t{%2, %0|%0, %2}"
534 [(set_attr "type" "sse")
535 (set_attr "mode" "V4SF")])
537 (define_insn "sse_vmsminv4sf3"
538 [(set (match_operand:V4SF 0 "register_operand" "=x")
540 (smin:V4SF (match_operand:V4SF 1 "register_operand" "0")
541 (match_operand:V4SF 2 "nonimmediate_operand" "xm"))
545 "minss\t{%2, %0|%0, %2}"
546 [(set_attr "type" "sse")
547 (set_attr "mode" "SF")])
549 ;; These versions of the min/max patterns implement exactly the operations
550 ;; min = (op1 < op2 ? op1 : op2)
551 ;; max = (!(op1 < op2) ? op1 : op2)
552 ;; Their operands are not commutative, and thus they may be used in the
553 ;; presence of -0.0 and NaN.
555 (define_insn "*ieee_sminv4sf3"
556 [(set (match_operand:V4SF 0 "register_operand" "=x")
557 (unspec:V4SF [(match_operand:V4SF 1 "register_operand" "0")
558 (match_operand:V4SF 2 "nonimmediate_operand" "xm")]
561 "minps\t{%2, %0|%0, %2}"
562 [(set_attr "type" "sseadd")
563 (set_attr "mode" "V4SF")])
565 (define_insn "*ieee_smaxv4sf3"
566 [(set (match_operand:V4SF 0 "register_operand" "=x")
567 (unspec:V4SF [(match_operand:V4SF 1 "register_operand" "0")
568 (match_operand:V4SF 2 "nonimmediate_operand" "xm")]
571 "maxps\t{%2, %0|%0, %2}"
572 [(set_attr "type" "sseadd")
573 (set_attr "mode" "V4SF")])
575 (define_insn "*ieee_sminv2df3"
576 [(set (match_operand:V2DF 0 "register_operand" "=x")
577 (unspec:V2DF [(match_operand:V2DF 1 "register_operand" "0")
578 (match_operand:V2DF 2 "nonimmediate_operand" "xm")]
581 "minpd\t{%2, %0|%0, %2}"
582 [(set_attr "type" "sseadd")
583 (set_attr "mode" "V2DF")])
585 (define_insn "*ieee_smaxv2df3"
586 [(set (match_operand:V2DF 0 "register_operand" "=x")
587 (unspec:V2DF [(match_operand:V2DF 1 "register_operand" "0")
588 (match_operand:V2DF 2 "nonimmediate_operand" "xm")]
591 "maxpd\t{%2, %0|%0, %2}"
592 [(set_attr "type" "sseadd")
593 (set_attr "mode" "V2DF")])
595 (define_insn "sse3_addsubv4sf3"
596 [(set (match_operand:V4SF 0 "register_operand" "=x")
599 (match_operand:V4SF 1 "register_operand" "0")
600 (match_operand:V4SF 2 "nonimmediate_operand" "xm"))
601 (minus:V4SF (match_dup 1) (match_dup 2))
604 "addsubps\t{%2, %0|%0, %2}"
605 [(set_attr "type" "sseadd")
606 (set_attr "mode" "V4SF")])
608 (define_insn "sse3_haddv4sf3"
609 [(set (match_operand:V4SF 0 "register_operand" "=x")
614 (match_operand:V4SF 1 "register_operand" "0")
615 (parallel [(const_int 0)]))
616 (vec_select:SF (match_dup 1) (parallel [(const_int 1)])))
618 (vec_select:SF (match_dup 1) (parallel [(const_int 2)]))
619 (vec_select:SF (match_dup 1) (parallel [(const_int 3)]))))
623 (match_operand:V4SF 2 "nonimmediate_operand" "xm")
624 (parallel [(const_int 0)]))
625 (vec_select:SF (match_dup 2) (parallel [(const_int 1)])))
627 (vec_select:SF (match_dup 2) (parallel [(const_int 2)]))
628 (vec_select:SF (match_dup 2) (parallel [(const_int 3)]))))))]
630 "haddps\t{%2, %0|%0, %2}"
631 [(set_attr "type" "sseadd")
632 (set_attr "mode" "V4SF")])
634 (define_insn "sse3_hsubv4sf3"
635 [(set (match_operand:V4SF 0 "register_operand" "=x")
640 (match_operand:V4SF 1 "register_operand" "0")
641 (parallel [(const_int 0)]))
642 (vec_select:SF (match_dup 1) (parallel [(const_int 1)])))
644 (vec_select:SF (match_dup 1) (parallel [(const_int 2)]))
645 (vec_select:SF (match_dup 1) (parallel [(const_int 3)]))))
649 (match_operand:V4SF 2 "nonimmediate_operand" "xm")
650 (parallel [(const_int 0)]))
651 (vec_select:SF (match_dup 2) (parallel [(const_int 1)])))
653 (vec_select:SF (match_dup 2) (parallel [(const_int 2)]))
654 (vec_select:SF (match_dup 2) (parallel [(const_int 3)]))))))]
656 "hsubps\t{%2, %0|%0, %2}"
657 [(set_attr "type" "sseadd")
658 (set_attr "mode" "V4SF")])
660 (define_expand "reduc_splus_v4sf"
661 [(match_operand:V4SF 0 "register_operand" "")
662 (match_operand:V4SF 1 "register_operand" "")]
667 rtx tmp = gen_reg_rtx (V4SFmode);
668 emit_insn (gen_sse3_haddv4sf3 (tmp, operands[1], operands[1]));
669 emit_insn (gen_sse3_haddv4sf3 (operands[0], tmp, tmp));
672 ix86_expand_reduc_v4sf (gen_addv4sf3, operands[0], operands[1]);
676 (define_expand "reduc_smax_v4sf"
677 [(match_operand:V4SF 0 "register_operand" "")
678 (match_operand:V4SF 1 "register_operand" "")]
681 ix86_expand_reduc_v4sf (gen_smaxv4sf3, operands[0], operands[1]);
685 (define_expand "reduc_smin_v4sf"
686 [(match_operand:V4SF 0 "register_operand" "")
687 (match_operand:V4SF 1 "register_operand" "")]
690 ix86_expand_reduc_v4sf (gen_sminv4sf3, operands[0], operands[1]);
694 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
696 ;; Parallel single-precision floating point comparisons
698 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
700 (define_insn "sse_maskcmpv4sf3"
701 [(set (match_operand:V4SF 0 "register_operand" "=x")
702 (match_operator:V4SF 3 "sse_comparison_operator"
703 [(match_operand:V4SF 1 "register_operand" "0")
704 (match_operand:V4SF 2 "nonimmediate_operand" "xm")]))]
706 "cmp%D3ps\t{%2, %0|%0, %2}"
707 [(set_attr "type" "ssecmp")
708 (set_attr "mode" "V4SF")])
710 (define_insn "sse_vmmaskcmpv4sf3"
711 [(set (match_operand:V4SF 0 "register_operand" "=x")
713 (match_operator:V4SF 3 "sse_comparison_operator"
714 [(match_operand:V4SF 1 "register_operand" "0")
715 (match_operand:V4SF 2 "register_operand" "x")])
719 "cmp%D3ss\t{%2, %0|%0, %2}"
720 [(set_attr "type" "ssecmp")
721 (set_attr "mode" "SF")])
723 (define_insn "sse_comi"
724 [(set (reg:CCFP FLAGS_REG)
727 (match_operand:V4SF 0 "register_operand" "x")
728 (parallel [(const_int 0)]))
730 (match_operand:V4SF 1 "nonimmediate_operand" "xm")
731 (parallel [(const_int 0)]))))]
733 "comiss\t{%1, %0|%0, %1}"
734 [(set_attr "type" "ssecomi")
735 (set_attr "mode" "SF")])
737 (define_insn "sse_ucomi"
738 [(set (reg:CCFPU FLAGS_REG)
741 (match_operand:V4SF 0 "register_operand" "x")
742 (parallel [(const_int 0)]))
744 (match_operand:V4SF 1 "nonimmediate_operand" "xm")
745 (parallel [(const_int 0)]))))]
747 "ucomiss\t{%1, %0|%0, %1}"
748 [(set_attr "type" "ssecomi")
749 (set_attr "mode" "SF")])
751 (define_expand "vcondv4sf"
752 [(set (match_operand:V4SF 0 "register_operand" "")
755 [(match_operand:V4SF 4 "nonimmediate_operand" "")
756 (match_operand:V4SF 5 "nonimmediate_operand" "")])
757 (match_operand:V4SF 1 "general_operand" "")
758 (match_operand:V4SF 2 "general_operand" "")))]
761 if (ix86_expand_fp_vcond (operands))
767 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
769 ;; Parallel single-precision floating point logical operations
771 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
773 (define_expand "andv4sf3"
774 [(set (match_operand:V4SF 0 "register_operand" "")
775 (and:V4SF (match_operand:V4SF 1 "nonimmediate_operand" "")
776 (match_operand:V4SF 2 "nonimmediate_operand" "")))]
778 "ix86_fixup_binary_operands_no_copy (AND, V4SFmode, operands);")
780 (define_insn "*andv4sf3"
781 [(set (match_operand:V4SF 0 "register_operand" "=x")
782 (and:V4SF (match_operand:V4SF 1 "nonimmediate_operand" "%0")
783 (match_operand:V4SF 2 "nonimmediate_operand" "xm")))]
784 "TARGET_SSE && ix86_binary_operator_ok (AND, V4SFmode, operands)"
785 "andps\t{%2, %0|%0, %2}"
786 [(set_attr "type" "sselog")
787 (set_attr "mode" "V4SF")])
789 (define_insn "sse_nandv4sf3"
790 [(set (match_operand:V4SF 0 "register_operand" "=x")
791 (and:V4SF (not:V4SF (match_operand:V4SF 1 "register_operand" "0"))
792 (match_operand:V4SF 2 "nonimmediate_operand" "xm")))]
794 "andnps\t{%2, %0|%0, %2}"
795 [(set_attr "type" "sselog")
796 (set_attr "mode" "V4SF")])
798 (define_expand "iorv4sf3"
799 [(set (match_operand:V4SF 0 "register_operand" "")
800 (ior:V4SF (match_operand:V4SF 1 "nonimmediate_operand" "")
801 (match_operand:V4SF 2 "nonimmediate_operand" "")))]
803 "ix86_fixup_binary_operands_no_copy (IOR, V4SFmode, operands);")
805 (define_insn "*iorv4sf3"
806 [(set (match_operand:V4SF 0 "register_operand" "=x")
807 (ior:V4SF (match_operand:V4SF 1 "nonimmediate_operand" "%0")
808 (match_operand:V4SF 2 "nonimmediate_operand" "xm")))]
809 "TARGET_SSE && ix86_binary_operator_ok (IOR, V4SFmode, operands)"
810 "orps\t{%2, %0|%0, %2}"
811 [(set_attr "type" "sselog")
812 (set_attr "mode" "V4SF")])
814 (define_expand "xorv4sf3"
815 [(set (match_operand:V4SF 0 "register_operand" "")
816 (xor:V4SF (match_operand:V4SF 1 "nonimmediate_operand" "")
817 (match_operand:V4SF 2 "nonimmediate_operand" "")))]
819 "ix86_fixup_binary_operands_no_copy (XOR, V4SFmode, operands);")
821 (define_insn "*xorv4sf3"
822 [(set (match_operand:V4SF 0 "register_operand" "=x")
823 (xor:V4SF (match_operand:V4SF 1 "nonimmediate_operand" "%0")
824 (match_operand:V4SF 2 "nonimmediate_operand" "xm")))]
825 "TARGET_SSE && ix86_binary_operator_ok (XOR, V4SFmode, operands)"
826 "xorps\t{%2, %0|%0, %2}"
827 [(set_attr "type" "sselog")
828 (set_attr "mode" "V4SF")])
830 ;; Also define scalar versions. These are used for abs, neg, and
831 ;; conditional move. Using subregs into vector modes causes register
832 ;; allocation lossage. These patterns do not allow memory operands
833 ;; because the native instructions read the full 128-bits.
835 (define_insn "*andsf3"
836 [(set (match_operand:SF 0 "register_operand" "=x")
837 (and:SF (match_operand:SF 1 "register_operand" "0")
838 (match_operand:SF 2 "register_operand" "x")))]
840 "andps\t{%2, %0|%0, %2}"
841 [(set_attr "type" "sselog")
842 (set_attr "mode" "V4SF")])
844 (define_insn "*nandsf3"
845 [(set (match_operand:SF 0 "register_operand" "=x")
846 (and:SF (not:SF (match_operand:SF 1 "register_operand" "0"))
847 (match_operand:SF 2 "register_operand" "x")))]
849 "andnps\t{%2, %0|%0, %2}"
850 [(set_attr "type" "sselog")
851 (set_attr "mode" "V4SF")])
853 (define_insn "*iorsf3"
854 [(set (match_operand:SF 0 "register_operand" "=x")
855 (ior:SF (match_operand:SF 1 "register_operand" "0")
856 (match_operand:SF 2 "register_operand" "x")))]
858 "orps\t{%2, %0|%0, %2}"
859 [(set_attr "type" "sselog")
860 (set_attr "mode" "V4SF")])
862 (define_insn "*xorsf3"
863 [(set (match_operand:SF 0 "register_operand" "=x")
864 (xor:SF (match_operand:SF 1 "register_operand" "0")
865 (match_operand:SF 2 "register_operand" "x")))]
867 "xorps\t{%2, %0|%0, %2}"
868 [(set_attr "type" "sselog")
869 (set_attr "mode" "V4SF")])
871 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
873 ;; Parallel single-precision floating point conversion operations
875 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
877 (define_insn "sse_cvtpi2ps"
878 [(set (match_operand:V4SF 0 "register_operand" "=x")
881 (float:V2SF (match_operand:V2SI 2 "nonimmediate_operand" "ym")))
882 (match_operand:V4SF 1 "register_operand" "0")
885 "cvtpi2ps\t{%2, %0|%0, %2}"
886 [(set_attr "type" "ssecvt")
887 (set_attr "mode" "V4SF")])
889 (define_insn "sse_cvtps2pi"
890 [(set (match_operand:V2SI 0 "register_operand" "=y")
892 (unspec:V4SI [(match_operand:V4SF 1 "nonimmediate_operand" "xm")]
894 (parallel [(const_int 0) (const_int 1)])))]
896 "cvtps2pi\t{%1, %0|%0, %1}"
897 [(set_attr "type" "ssecvt")
898 (set_attr "unit" "mmx")
899 (set_attr "mode" "DI")])
901 (define_insn "sse_cvttps2pi"
902 [(set (match_operand:V2SI 0 "register_operand" "=y")
904 (fix:V4SI (match_operand:V4SF 1 "nonimmediate_operand" "xm"))
905 (parallel [(const_int 0) (const_int 1)])))]
907 "cvttps2pi\t{%1, %0|%0, %1}"
908 [(set_attr "type" "ssecvt")
909 (set_attr "unit" "mmx")
910 (set_attr "mode" "SF")])
912 (define_insn "sse_cvtsi2ss"
913 [(set (match_operand:V4SF 0 "register_operand" "=x,x")
916 (float:SF (match_operand:SI 2 "nonimmediate_operand" "r,m")))
917 (match_operand:V4SF 1 "register_operand" "0,0")
920 "cvtsi2ss\t{%2, %0|%0, %2}"
921 [(set_attr "type" "sseicvt")
922 (set_attr "athlon_decode" "vector,double")
923 (set_attr "amdfam10_decode" "vector,double")
924 (set_attr "mode" "SF")])
926 (define_insn "sse_cvtsi2ssq"
927 [(set (match_operand:V4SF 0 "register_operand" "=x,x")
930 (float:SF (match_operand:DI 2 "nonimmediate_operand" "r,rm")))
931 (match_operand:V4SF 1 "register_operand" "0,0")
933 "TARGET_SSE && TARGET_64BIT"
934 "cvtsi2ssq\t{%2, %0|%0, %2}"
935 [(set_attr "type" "sseicvt")
936 (set_attr "athlon_decode" "vector,double")
937 (set_attr "amdfam10_decode" "vector,double")
938 (set_attr "mode" "SF")])
940 (define_insn "sse_cvtss2si"
941 [(set (match_operand:SI 0 "register_operand" "=r,r")
944 (match_operand:V4SF 1 "nonimmediate_operand" "x,m")
945 (parallel [(const_int 0)]))]
946 UNSPEC_FIX_NOTRUNC))]
948 "cvtss2si\t{%1, %0|%0, %1}"
949 [(set_attr "type" "sseicvt")
950 (set_attr "athlon_decode" "double,vector")
951 (set_attr "amdfam10_decode" "double,double")
952 (set_attr "mode" "SI")])
954 (define_insn "sse_cvtss2siq"
955 [(set (match_operand:DI 0 "register_operand" "=r,r")
958 (match_operand:V4SF 1 "nonimmediate_operand" "x,m")
959 (parallel [(const_int 0)]))]
960 UNSPEC_FIX_NOTRUNC))]
961 "TARGET_SSE && TARGET_64BIT"
962 "cvtss2siq\t{%1, %0|%0, %1}"
963 [(set_attr "type" "sseicvt")
964 (set_attr "athlon_decode" "double,vector")
965 (set_attr "amdfam10_decode" "double,double")
966 (set_attr "mode" "DI")])
968 (define_insn "sse_cvttss2si"
969 [(set (match_operand:SI 0 "register_operand" "=r,r")
972 (match_operand:V4SF 1 "nonimmediate_operand" "x,m")
973 (parallel [(const_int 0)]))))]
975 "cvttss2si\t{%1, %0|%0, %1}"
976 [(set_attr "type" "sseicvt")
977 (set_attr "athlon_decode" "double,vector")
978 (set_attr "amdfam10_decode" "double,double")
979 (set_attr "mode" "SI")])
981 (define_insn "sse_cvttss2siq"
982 [(set (match_operand:DI 0 "register_operand" "=r,r")
985 (match_operand:V4SF 1 "nonimmediate_operand" "x,m")
986 (parallel [(const_int 0)]))))]
987 "TARGET_SSE && TARGET_64BIT"
988 "cvttss2siq\t{%1, %0|%0, %1}"
989 [(set_attr "type" "sseicvt")
990 (set_attr "athlon_decode" "double,vector")
991 (set_attr "amdfam10_decode" "double,double")
992 (set_attr "mode" "DI")])
994 (define_insn "sse2_cvtdq2ps"
995 [(set (match_operand:V4SF 0 "register_operand" "=x")
996 (float:V4SF (match_operand:V4SI 1 "nonimmediate_operand" "xm")))]
998 "cvtdq2ps\t{%1, %0|%0, %1}"
999 [(set_attr "type" "ssecvt")
1000 (set_attr "mode" "V2DF")])
1002 (define_insn "sse2_cvtps2dq"
1003 [(set (match_operand:V4SI 0 "register_operand" "=x")
1004 (unspec:V4SI [(match_operand:V4SF 1 "nonimmediate_operand" "xm")]
1005 UNSPEC_FIX_NOTRUNC))]
1007 "cvtps2dq\t{%1, %0|%0, %1}"
1008 [(set_attr "type" "ssecvt")
1009 (set_attr "mode" "TI")])
1011 (define_insn "sse2_cvttps2dq"
1012 [(set (match_operand:V4SI 0 "register_operand" "=x")
1013 (fix:V4SI (match_operand:V4SF 1 "nonimmediate_operand" "xm")))]
1015 "cvttps2dq\t{%1, %0|%0, %1}"
1016 [(set_attr "type" "ssecvt")
1017 (set_attr "mode" "TI")])
1019 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1021 ;; Parallel single-precision floating point element swizzling
1023 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1025 (define_insn "sse_movhlps"
1026 [(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,x,m")
1029 (match_operand:V4SF 1 "nonimmediate_operand" " 0,0,0")
1030 (match_operand:V4SF 2 "nonimmediate_operand" " x,o,x"))
1031 (parallel [(const_int 6)
1035 "TARGET_SSE && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
1037 movhlps\t{%2, %0|%0, %2}
1038 movlps\t{%H2, %0|%0, %H2}
1039 movhps\t{%2, %0|%0, %2}"
1040 [(set_attr "type" "ssemov")
1041 (set_attr "mode" "V4SF,V2SF,V2SF")])
1043 (define_insn "sse_movlhps"
1044 [(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,x,o")
1047 (match_operand:V4SF 1 "nonimmediate_operand" " 0,0,0")
1048 (match_operand:V4SF 2 "nonimmediate_operand" " x,m,x"))
1049 (parallel [(const_int 0)
1053 "TARGET_SSE && ix86_binary_operator_ok (UNKNOWN, V4SFmode, operands)"
1055 movlhps\t{%2, %0|%0, %2}
1056 movhps\t{%2, %0|%0, %2}
1057 movlps\t{%2, %H0|%H0, %2}"
1058 [(set_attr "type" "ssemov")
1059 (set_attr "mode" "V4SF,V2SF,V2SF")])
1061 (define_insn "sse_unpckhps"
1062 [(set (match_operand:V4SF 0 "register_operand" "=x")
1065 (match_operand:V4SF 1 "register_operand" "0")
1066 (match_operand:V4SF 2 "nonimmediate_operand" "xm"))
1067 (parallel [(const_int 2) (const_int 6)
1068 (const_int 3) (const_int 7)])))]
1070 "unpckhps\t{%2, %0|%0, %2}"
1071 [(set_attr "type" "sselog")
1072 (set_attr "mode" "V4SF")])
1074 (define_insn "sse_unpcklps"
1075 [(set (match_operand:V4SF 0 "register_operand" "=x")
1078 (match_operand:V4SF 1 "register_operand" "0")
1079 (match_operand:V4SF 2 "nonimmediate_operand" "xm"))
1080 (parallel [(const_int 0) (const_int 4)
1081 (const_int 1) (const_int 5)])))]
1083 "unpcklps\t{%2, %0|%0, %2}"
1084 [(set_attr "type" "sselog")
1085 (set_attr "mode" "V4SF")])
1087 ;; These are modeled with the same vec_concat as the others so that we
1088 ;; capture users of shufps that can use the new instructions
1089 (define_insn "sse3_movshdup"
1090 [(set (match_operand:V4SF 0 "register_operand" "=x")
1093 (match_operand:V4SF 1 "nonimmediate_operand" "xm")
1095 (parallel [(const_int 1)
1100 "movshdup\t{%1, %0|%0, %1}"
1101 [(set_attr "type" "sse")
1102 (set_attr "mode" "V4SF")])
1104 (define_insn "sse3_movsldup"
1105 [(set (match_operand:V4SF 0 "register_operand" "=x")
1108 (match_operand:V4SF 1 "nonimmediate_operand" "xm")
1110 (parallel [(const_int 0)
1115 "movsldup\t{%1, %0|%0, %1}"
1116 [(set_attr "type" "sse")
1117 (set_attr "mode" "V4SF")])
1119 (define_expand "sse_shufps"
1120 [(match_operand:V4SF 0 "register_operand" "")
1121 (match_operand:V4SF 1 "register_operand" "")
1122 (match_operand:V4SF 2 "nonimmediate_operand" "")
1123 (match_operand:SI 3 "const_int_operand" "")]
1126 int mask = INTVAL (operands[3]);
1127 emit_insn (gen_sse_shufps_1 (operands[0], operands[1], operands[2],
1128 GEN_INT ((mask >> 0) & 3),
1129 GEN_INT ((mask >> 2) & 3),
1130 GEN_INT (((mask >> 4) & 3) + 4),
1131 GEN_INT (((mask >> 6) & 3) + 4)));
1135 (define_insn "sse_shufps_1"
1136 [(set (match_operand:V4SF 0 "register_operand" "=x")
1139 (match_operand:V4SF 1 "register_operand" "0")
1140 (match_operand:V4SF 2 "nonimmediate_operand" "xm"))
1141 (parallel [(match_operand 3 "const_0_to_3_operand" "")
1142 (match_operand 4 "const_0_to_3_operand" "")
1143 (match_operand 5 "const_4_to_7_operand" "")
1144 (match_operand 6 "const_4_to_7_operand" "")])))]
1148 mask |= INTVAL (operands[3]) << 0;
1149 mask |= INTVAL (operands[4]) << 2;
1150 mask |= (INTVAL (operands[5]) - 4) << 4;
1151 mask |= (INTVAL (operands[6]) - 4) << 6;
1152 operands[3] = GEN_INT (mask);
1154 return "shufps\t{%3, %2, %0|%0, %2, %3}";
1156 [(set_attr "type" "sselog")
1157 (set_attr "mode" "V4SF")])
1159 (define_insn "sse_storehps"
1160 [(set (match_operand:V2SF 0 "nonimmediate_operand" "=m,x,x")
1162 (match_operand:V4SF 1 "nonimmediate_operand" "x,x,o")
1163 (parallel [(const_int 2) (const_int 3)])))]
1166 movhps\t{%1, %0|%0, %1}
1167 movhlps\t{%1, %0|%0, %1}
1168 movlps\t{%H1, %0|%0, %H1}"
1169 [(set_attr "type" "ssemov")
1170 (set_attr "mode" "V2SF,V4SF,V2SF")])
1172 (define_insn "sse_loadhps"
1173 [(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,x,o")
1176 (match_operand:V4SF 1 "nonimmediate_operand" "0,0,0")
1177 (parallel [(const_int 0) (const_int 1)]))
1178 (match_operand:V2SF 2 "nonimmediate_operand" "m,x,x")))]
1181 movhps\t{%2, %0|%0, %2}
1182 movlhps\t{%2, %0|%0, %2}
1183 movlps\t{%2, %H0|%H0, %2}"
1184 [(set_attr "type" "ssemov")
1185 (set_attr "mode" "V2SF,V4SF,V2SF")])
1187 (define_insn "sse_storelps"
1188 [(set (match_operand:V2SF 0 "nonimmediate_operand" "=m,x,x")
1190 (match_operand:V4SF 1 "nonimmediate_operand" "x,x,m")
1191 (parallel [(const_int 0) (const_int 1)])))]
1194 movlps\t{%1, %0|%0, %1}
1195 movaps\t{%1, %0|%0, %1}
1196 movlps\t{%1, %0|%0, %1}"
1197 [(set_attr "type" "ssemov")
1198 (set_attr "mode" "V2SF,V4SF,V2SF")])
1200 (define_insn "sse_loadlps"
1201 [(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,x,m")
1203 (match_operand:V2SF 2 "nonimmediate_operand" "0,m,x")
1205 (match_operand:V4SF 1 "nonimmediate_operand" "x,0,0")
1206 (parallel [(const_int 2) (const_int 3)]))))]
1209 shufps\t{$0xe4, %1, %0|%0, %1, 0xe4}
1210 movlps\t{%2, %0|%0, %2}
1211 movlps\t{%2, %0|%0, %2}"
1212 [(set_attr "type" "sselog,ssemov,ssemov")
1213 (set_attr "mode" "V4SF,V2SF,V2SF")])
1215 (define_insn "sse_movss"
1216 [(set (match_operand:V4SF 0 "register_operand" "=x")
1218 (match_operand:V4SF 2 "register_operand" "x")
1219 (match_operand:V4SF 1 "register_operand" "0")
1222 "movss\t{%2, %0|%0, %2}"
1223 [(set_attr "type" "ssemov")
1224 (set_attr "mode" "SF")])
1226 (define_insn "*vec_dupv4sf"
1227 [(set (match_operand:V4SF 0 "register_operand" "=x")
1229 (match_operand:SF 1 "register_operand" "0")))]
1231 "shufps\t{$0, %0, %0|%0, %0, 0}"
1232 [(set_attr "type" "sselog1")
1233 (set_attr "mode" "V4SF")])
1235 ;; ??? In theory we can match memory for the MMX alternative, but allowing
1236 ;; nonimmediate_operand for operand 2 and *not* allowing memory for the SSE
1237 ;; alternatives pretty much forces the MMX alternative to be chosen.
1238 (define_insn "*sse_concatv2sf"
1239 [(set (match_operand:V2SF 0 "register_operand" "=x,x,*y,*y")
1241 (match_operand:SF 1 "nonimmediate_operand" " 0,m, 0, m")
1242 (match_operand:SF 2 "reg_or_0_operand" " x,C,*y, C")))]
1245 unpcklps\t{%2, %0|%0, %2}
1246 movss\t{%1, %0|%0, %1}
1247 punpckldq\t{%2, %0|%0, %2}
1248 movd\t{%1, %0|%0, %1}"
1249 [(set_attr "type" "sselog,ssemov,mmxcvt,mmxmov")
1250 (set_attr "mode" "V4SF,SF,DI,DI")])
1252 (define_insn "*sse_concatv4sf"
1253 [(set (match_operand:V4SF 0 "register_operand" "=x,x")
1255 (match_operand:V2SF 1 "register_operand" " 0,0")
1256 (match_operand:V2SF 2 "nonimmediate_operand" " x,m")))]
1259 movlhps\t{%2, %0|%0, %2}
1260 movhps\t{%2, %0|%0, %2}"
1261 [(set_attr "type" "ssemov")
1262 (set_attr "mode" "V4SF,V2SF")])
1264 (define_expand "vec_initv4sf"
1265 [(match_operand:V4SF 0 "register_operand" "")
1266 (match_operand 1 "" "")]
1269 ix86_expand_vector_init (false, operands[0], operands[1]);
1273 (define_insn "*vec_setv4sf_0"
1274 [(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,x,Y ,m")
1277 (match_operand:SF 2 "general_operand" " x,m,*r,x*rfF"))
1278 (match_operand:V4SF 1 "vector_move_operand" " 0,C,C ,0")
1282 movss\t{%2, %0|%0, %2}
1283 movss\t{%2, %0|%0, %2}
1284 movd\t{%2, %0|%0, %2}
1286 [(set_attr "type" "ssemov")
1287 (set_attr "mode" "SF")])
1290 [(set (match_operand:V4SF 0 "memory_operand" "")
1293 (match_operand:SF 1 "nonmemory_operand" ""))
1296 "TARGET_SSE && reload_completed"
1299 emit_move_insn (adjust_address (operands[0], SFmode, 0), operands[1]);
1303 (define_expand "vec_setv4sf"
1304 [(match_operand:V4SF 0 "register_operand" "")
1305 (match_operand:SF 1 "register_operand" "")
1306 (match_operand 2 "const_int_operand" "")]
1309 ix86_expand_vector_set (false, operands[0], operands[1],
1310 INTVAL (operands[2]));
1314 (define_insn_and_split "*vec_extractv4sf_0"
1315 [(set (match_operand:SF 0 "nonimmediate_operand" "=x,m,fr")
1317 (match_operand:V4SF 1 "nonimmediate_operand" "xm,x,m")
1318 (parallel [(const_int 0)])))]
1319 "TARGET_SSE && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
1321 "&& reload_completed"
1324 rtx op1 = operands[1];
1326 op1 = gen_rtx_REG (SFmode, REGNO (op1));
1328 op1 = gen_lowpart (SFmode, op1);
1329 emit_move_insn (operands[0], op1);
1333 (define_expand "vec_extractv4sf"
1334 [(match_operand:SF 0 "register_operand" "")
1335 (match_operand:V4SF 1 "register_operand" "")
1336 (match_operand 2 "const_int_operand" "")]
1339 ix86_expand_vector_extract (false, operands[0], operands[1],
1340 INTVAL (operands[2]));
1344 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1346 ;; Parallel double-precision floating point arithmetic
1348 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1350 (define_expand "negv2df2"
1351 [(set (match_operand:V2DF 0 "register_operand" "")
1352 (neg:V2DF (match_operand:V2DF 1 "nonimmediate_operand" "")))]
1354 "ix86_expand_fp_absneg_operator (NEG, V2DFmode, operands); DONE;")
1356 (define_expand "absv2df2"
1357 [(set (match_operand:V2DF 0 "register_operand" "")
1358 (abs:V2DF (match_operand:V2DF 1 "nonimmediate_operand" "")))]
1360 "ix86_expand_fp_absneg_operator (ABS, V2DFmode, operands); DONE;")
1362 (define_expand "addv2df3"
1363 [(set (match_operand:V2DF 0 "register_operand" "")
1364 (plus:V2DF (match_operand:V2DF 1 "nonimmediate_operand" "")
1365 (match_operand:V2DF 2 "nonimmediate_operand" "")))]
1367 "ix86_fixup_binary_operands_no_copy (PLUS, V2DFmode, operands);")
1369 (define_insn "*addv2df3"
1370 [(set (match_operand:V2DF 0 "register_operand" "=x")
1371 (plus:V2DF (match_operand:V2DF 1 "nonimmediate_operand" "%0")
1372 (match_operand:V2DF 2 "nonimmediate_operand" "xm")))]
1373 "TARGET_SSE2 && ix86_binary_operator_ok (PLUS, V2DFmode, operands)"
1374 "addpd\t{%2, %0|%0, %2}"
1375 [(set_attr "type" "sseadd")
1376 (set_attr "mode" "V2DF")])
1378 (define_insn "sse2_vmaddv2df3"
1379 [(set (match_operand:V2DF 0 "register_operand" "=x")
1381 (plus:V2DF (match_operand:V2DF 1 "register_operand" "0")
1382 (match_operand:V2DF 2 "nonimmediate_operand" "xm"))
1385 "TARGET_SSE2 && ix86_binary_operator_ok (PLUS, V4SFmode, operands)"
1386 "addsd\t{%2, %0|%0, %2}"
1387 [(set_attr "type" "sseadd")
1388 (set_attr "mode" "DF")])
1390 (define_expand "subv2df3"
1391 [(set (match_operand:V2DF 0 "register_operand" "")
1392 (minus:V2DF (match_operand:V2DF 1 "nonimmediate_operand" "")
1393 (match_operand:V2DF 2 "nonimmediate_operand" "")))]
1395 "ix86_fixup_binary_operands_no_copy (MINUS, V2DFmode, operands);")
1397 (define_insn "*subv2df3"
1398 [(set (match_operand:V2DF 0 "register_operand" "=x")
1399 (minus:V2DF (match_operand:V2DF 1 "register_operand" "0")
1400 (match_operand:V2DF 2 "nonimmediate_operand" "xm")))]
1402 "subpd\t{%2, %0|%0, %2}"
1403 [(set_attr "type" "sseadd")
1404 (set_attr "mode" "V2DF")])
1406 (define_insn "sse2_vmsubv2df3"
1407 [(set (match_operand:V2DF 0 "register_operand" "=x")
1409 (minus:V2DF (match_operand:V2DF 1 "register_operand" "0")
1410 (match_operand:V2DF 2 "nonimmediate_operand" "xm"))
1414 "subsd\t{%2, %0|%0, %2}"
1415 [(set_attr "type" "sseadd")
1416 (set_attr "mode" "DF")])
1418 (define_expand "mulv2df3"
1419 [(set (match_operand:V2DF 0 "register_operand" "")
1420 (mult:V2DF (match_operand:V2DF 1 "nonimmediate_operand" "")
1421 (match_operand:V2DF 2 "nonimmediate_operand" "")))]
1423 "ix86_fixup_binary_operands_no_copy (MULT, V2DFmode, operands);")
1425 (define_insn "*mulv2df3"
1426 [(set (match_operand:V2DF 0 "register_operand" "=x")
1427 (mult:V2DF (match_operand:V2DF 1 "nonimmediate_operand" "%0")
1428 (match_operand:V2DF 2 "nonimmediate_operand" "xm")))]
1429 "TARGET_SSE2 && ix86_binary_operator_ok (MULT, V2DFmode, operands)"
1430 "mulpd\t{%2, %0|%0, %2}"
1431 [(set_attr "type" "ssemul")
1432 (set_attr "mode" "V2DF")])
1434 (define_insn "sse2_vmmulv2df3"
1435 [(set (match_operand:V2DF 0 "register_operand" "=x")
1437 (mult:V2DF (match_operand:V2DF 1 "register_operand" "0")
1438 (match_operand:V2DF 2 "nonimmediate_operand" "xm"))
1441 "TARGET_SSE2 && ix86_binary_operator_ok (MULT, V2DFmode, operands)"
1442 "mulsd\t{%2, %0|%0, %2}"
1443 [(set_attr "type" "ssemul")
1444 (set_attr "mode" "DF")])
1446 (define_expand "divv2df3"
1447 [(set (match_operand:V2DF 0 "register_operand" "")
1448 (div:V2DF (match_operand:V2DF 1 "register_operand" "")
1449 (match_operand:V2DF 2 "nonimmediate_operand" "")))]
1451 "ix86_fixup_binary_operands_no_copy (DIV, V2DFmode, operands);")
1453 (define_insn "*divv2df3"
1454 [(set (match_operand:V2DF 0 "register_operand" "=x")
1455 (div:V2DF (match_operand:V2DF 1 "register_operand" "0")
1456 (match_operand:V2DF 2 "nonimmediate_operand" "xm")))]
1458 "divpd\t{%2, %0|%0, %2}"
1459 [(set_attr "type" "ssediv")
1460 (set_attr "mode" "V2DF")])
1462 (define_insn "sse2_vmdivv2df3"
1463 [(set (match_operand:V2DF 0 "register_operand" "=x")
1465 (div:V2DF (match_operand:V2DF 1 "register_operand" "0")
1466 (match_operand:V2DF 2 "nonimmediate_operand" "xm"))
1470 "divsd\t{%2, %0|%0, %2}"
1471 [(set_attr "type" "ssediv")
1472 (set_attr "mode" "DF")])
1474 (define_insn "sqrtv2df2"
1475 [(set (match_operand:V2DF 0 "register_operand" "=x")
1476 (sqrt:V2DF (match_operand:V2DF 1 "nonimmediate_operand" "xm")))]
1478 "sqrtpd\t{%1, %0|%0, %1}"
1479 [(set_attr "type" "sse")
1480 (set_attr "mode" "V2DF")])
1482 (define_insn "sse2_vmsqrtv2df2"
1483 [(set (match_operand:V2DF 0 "register_operand" "=x")
1485 (sqrt:V2DF (match_operand:V2DF 1 "register_operand" "xm"))
1486 (match_operand:V2DF 2 "register_operand" "0")
1489 "sqrtsd\t{%1, %0|%0, %1}"
1490 [(set_attr "type" "sse")
1491 (set_attr "mode" "DF")])
1493 ;; ??? For !flag_finite_math_only, the representation with SMIN/SMAX
1494 ;; isn't really correct, as those rtl operators aren't defined when
1495 ;; applied to NaNs. Hopefully the optimizers won't get too smart on us.
1497 (define_expand "smaxv2df3"
1498 [(set (match_operand:V2DF 0 "register_operand" "")
1499 (smax:V2DF (match_operand:V2DF 1 "nonimmediate_operand" "")
1500 (match_operand:V2DF 2 "nonimmediate_operand" "")))]
1503 if (!flag_finite_math_only)
1504 operands[1] = force_reg (V2DFmode, operands[1]);
1505 ix86_fixup_binary_operands_no_copy (SMAX, V2DFmode, operands);
1508 (define_insn "*smaxv2df3_finite"
1509 [(set (match_operand:V2DF 0 "register_operand" "=x")
1510 (smax:V2DF (match_operand:V2DF 1 "nonimmediate_operand" "%0")
1511 (match_operand:V2DF 2 "nonimmediate_operand" "xm")))]
1512 "TARGET_SSE2 && flag_finite_math_only
1513 && ix86_binary_operator_ok (SMAX, V2DFmode, operands)"
1514 "maxpd\t{%2, %0|%0, %2}"
1515 [(set_attr "type" "sseadd")
1516 (set_attr "mode" "V2DF")])
1518 (define_insn "*smaxv2df3"
1519 [(set (match_operand:V2DF 0 "register_operand" "=x")
1520 (smax:V2DF (match_operand:V2DF 1 "register_operand" "0")
1521 (match_operand:V2DF 2 "nonimmediate_operand" "xm")))]
1523 "maxpd\t{%2, %0|%0, %2}"
1524 [(set_attr "type" "sseadd")
1525 (set_attr "mode" "V2DF")])
1527 (define_insn "sse2_vmsmaxv2df3"
1528 [(set (match_operand:V2DF 0 "register_operand" "=x")
1530 (smax:V2DF (match_operand:V2DF 1 "register_operand" "0")
1531 (match_operand:V2DF 2 "nonimmediate_operand" "xm"))
1535 "maxsd\t{%2, %0|%0, %2}"
1536 [(set_attr "type" "sseadd")
1537 (set_attr "mode" "DF")])
1539 (define_expand "sminv2df3"
1540 [(set (match_operand:V2DF 0 "register_operand" "")
1541 (smin:V2DF (match_operand:V2DF 1 "nonimmediate_operand" "")
1542 (match_operand:V2DF 2 "nonimmediate_operand" "")))]
1545 if (!flag_finite_math_only)
1546 operands[1] = force_reg (V2DFmode, operands[1]);
1547 ix86_fixup_binary_operands_no_copy (SMIN, V2DFmode, operands);
1550 (define_insn "*sminv2df3_finite"
1551 [(set (match_operand:V2DF 0 "register_operand" "=x")
1552 (smin:V2DF (match_operand:V2DF 1 "nonimmediate_operand" "%0")
1553 (match_operand:V2DF 2 "nonimmediate_operand" "xm")))]
1554 "TARGET_SSE2 && flag_finite_math_only
1555 && ix86_binary_operator_ok (SMIN, V2DFmode, operands)"
1556 "minpd\t{%2, %0|%0, %2}"
1557 [(set_attr "type" "sseadd")
1558 (set_attr "mode" "V2DF")])
1560 (define_insn "*sminv2df3"
1561 [(set (match_operand:V2DF 0 "register_operand" "=x")
1562 (smin:V2DF (match_operand:V2DF 1 "register_operand" "0")
1563 (match_operand:V2DF 2 "nonimmediate_operand" "xm")))]
1565 "minpd\t{%2, %0|%0, %2}"
1566 [(set_attr "type" "sseadd")
1567 (set_attr "mode" "V2DF")])
1569 (define_insn "sse2_vmsminv2df3"
1570 [(set (match_operand:V2DF 0 "register_operand" "=x")
1572 (smin:V2DF (match_operand:V2DF 1 "register_operand" "0")
1573 (match_operand:V2DF 2 "nonimmediate_operand" "xm"))
1577 "minsd\t{%2, %0|%0, %2}"
1578 [(set_attr "type" "sseadd")
1579 (set_attr "mode" "DF")])
1581 (define_insn "sse3_addsubv2df3"
1582 [(set (match_operand:V2DF 0 "register_operand" "=x")
1585 (match_operand:V2DF 1 "register_operand" "0")
1586 (match_operand:V2DF 2 "nonimmediate_operand" "xm"))
1587 (minus:V2DF (match_dup 1) (match_dup 2))
1590 "addsubpd\t{%2, %0|%0, %2}"
1591 [(set_attr "type" "sseadd")
1592 (set_attr "mode" "V2DF")])
1594 (define_insn "sse3_haddv2df3"
1595 [(set (match_operand:V2DF 0 "register_operand" "=x")
1599 (match_operand:V2DF 1 "register_operand" "0")
1600 (parallel [(const_int 0)]))
1601 (vec_select:DF (match_dup 1) (parallel [(const_int 1)])))
1604 (match_operand:V2DF 2 "nonimmediate_operand" "xm")
1605 (parallel [(const_int 0)]))
1606 (vec_select:DF (match_dup 2) (parallel [(const_int 1)])))))]
1608 "haddpd\t{%2, %0|%0, %2}"
1609 [(set_attr "type" "sseadd")
1610 (set_attr "mode" "V2DF")])
1612 (define_insn "sse3_hsubv2df3"
1613 [(set (match_operand:V2DF 0 "register_operand" "=x")
1617 (match_operand:V2DF 1 "register_operand" "0")
1618 (parallel [(const_int 0)]))
1619 (vec_select:DF (match_dup 1) (parallel [(const_int 1)])))
1622 (match_operand:V2DF 2 "nonimmediate_operand" "xm")
1623 (parallel [(const_int 0)]))
1624 (vec_select:DF (match_dup 2) (parallel [(const_int 1)])))))]
1626 "hsubpd\t{%2, %0|%0, %2}"
1627 [(set_attr "type" "sseadd")
1628 (set_attr "mode" "V2DF")])
1630 (define_expand "reduc_splus_v2df"
1631 [(match_operand:V2DF 0 "register_operand" "")
1632 (match_operand:V2DF 1 "register_operand" "")]
1635 emit_insn (gen_sse3_haddv2df3 (operands[0], operands[1], operands[1]));
1639 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1641 ;; Parallel double-precision floating point comparisons
1643 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1645 (define_insn "sse2_maskcmpv2df3"
1646 [(set (match_operand:V2DF 0 "register_operand" "=x")
1647 (match_operator:V2DF 3 "sse_comparison_operator"
1648 [(match_operand:V2DF 1 "register_operand" "0")
1649 (match_operand:V2DF 2 "nonimmediate_operand" "xm")]))]
1651 "cmp%D3pd\t{%2, %0|%0, %2}"
1652 [(set_attr "type" "ssecmp")
1653 (set_attr "mode" "V2DF")])
1655 (define_insn "sse2_vmmaskcmpv2df3"
1656 [(set (match_operand:V2DF 0 "register_operand" "=x")
1658 (match_operator:V2DF 3 "sse_comparison_operator"
1659 [(match_operand:V2DF 1 "register_operand" "0")
1660 (match_operand:V2DF 2 "nonimmediate_operand" "xm")])
1664 "cmp%D3sd\t{%2, %0|%0, %2}"
1665 [(set_attr "type" "ssecmp")
1666 (set_attr "mode" "DF")])
1668 (define_insn "sse2_comi"
1669 [(set (reg:CCFP FLAGS_REG)
1672 (match_operand:V2DF 0 "register_operand" "x")
1673 (parallel [(const_int 0)]))
1675 (match_operand:V2DF 1 "nonimmediate_operand" "xm")
1676 (parallel [(const_int 0)]))))]
1678 "comisd\t{%1, %0|%0, %1}"
1679 [(set_attr "type" "ssecomi")
1680 (set_attr "mode" "DF")])
1682 (define_insn "sse2_ucomi"
1683 [(set (reg:CCFPU FLAGS_REG)
1686 (match_operand:V2DF 0 "register_operand" "x")
1687 (parallel [(const_int 0)]))
1689 (match_operand:V2DF 1 "nonimmediate_operand" "xm")
1690 (parallel [(const_int 0)]))))]
1692 "ucomisd\t{%1, %0|%0, %1}"
1693 [(set_attr "type" "ssecomi")
1694 (set_attr "mode" "DF")])
1696 (define_expand "vcondv2df"
1697 [(set (match_operand:V2DF 0 "register_operand" "")
1699 (match_operator 3 ""
1700 [(match_operand:V2DF 4 "nonimmediate_operand" "")
1701 (match_operand:V2DF 5 "nonimmediate_operand" "")])
1702 (match_operand:V2DF 1 "general_operand" "")
1703 (match_operand:V2DF 2 "general_operand" "")))]
1706 if (ix86_expand_fp_vcond (operands))
1712 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1714 ;; Parallel double-precision floating point logical operations
1716 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1718 (define_expand "andv2df3"
1719 [(set (match_operand:V2DF 0 "register_operand" "")
1720 (and:V2DF (match_operand:V2DF 1 "nonimmediate_operand" "")
1721 (match_operand:V2DF 2 "nonimmediate_operand" "")))]
1723 "ix86_fixup_binary_operands_no_copy (AND, V2DFmode, operands);")
1725 (define_insn "*andv2df3"
1726 [(set (match_operand:V2DF 0 "register_operand" "=x")
1727 (and:V2DF (match_operand:V2DF 1 "nonimmediate_operand" "%0")
1728 (match_operand:V2DF 2 "nonimmediate_operand" "xm")))]
1729 "TARGET_SSE2 && ix86_binary_operator_ok (AND, V2DFmode, operands)"
1730 "andpd\t{%2, %0|%0, %2}"
1731 [(set_attr "type" "sselog")
1732 (set_attr "mode" "V2DF")])
1734 (define_insn "sse2_nandv2df3"
1735 [(set (match_operand:V2DF 0 "register_operand" "=x")
1736 (and:V2DF (not:V2DF (match_operand:V2DF 1 "register_operand" "0"))
1737 (match_operand:V2DF 2 "nonimmediate_operand" "xm")))]
1739 "andnpd\t{%2, %0|%0, %2}"
1740 [(set_attr "type" "sselog")
1741 (set_attr "mode" "V2DF")])
1743 (define_expand "iorv2df3"
1744 [(set (match_operand:V2DF 0 "register_operand" "")
1745 (ior:V2DF (match_operand:V2DF 1 "nonimmediate_operand" "")
1746 (match_operand:V2DF 2 "nonimmediate_operand" "")))]
1748 "ix86_fixup_binary_operands_no_copy (IOR, V2DFmode, operands);")
1750 (define_insn "*iorv2df3"
1751 [(set (match_operand:V2DF 0 "register_operand" "=x")
1752 (ior:V2DF (match_operand:V2DF 1 "nonimmediate_operand" "%0")
1753 (match_operand:V2DF 2 "nonimmediate_operand" "xm")))]
1754 "TARGET_SSE2 && ix86_binary_operator_ok (IOR, V2DFmode, operands)"
1755 "orpd\t{%2, %0|%0, %2}"
1756 [(set_attr "type" "sselog")
1757 (set_attr "mode" "V2DF")])
1759 (define_expand "xorv2df3"
1760 [(set (match_operand:V2DF 0 "register_operand" "")
1761 (xor:V2DF (match_operand:V2DF 1 "nonimmediate_operand" "")
1762 (match_operand:V2DF 2 "nonimmediate_operand" "")))]
1764 "ix86_fixup_binary_operands_no_copy (XOR, V2DFmode, operands);")
1766 (define_insn "*xorv2df3"
1767 [(set (match_operand:V2DF 0 "register_operand" "=x")
1768 (xor:V2DF (match_operand:V2DF 1 "nonimmediate_operand" "%0")
1769 (match_operand:V2DF 2 "nonimmediate_operand" "xm")))]
1770 "TARGET_SSE2 && ix86_binary_operator_ok (XOR, V2DFmode, operands)"
1771 "xorpd\t{%2, %0|%0, %2}"
1772 [(set_attr "type" "sselog")
1773 (set_attr "mode" "V2DF")])
1775 ;; Also define scalar versions. These are used for abs, neg, and
1776 ;; conditional move. Using subregs into vector modes causes register
1777 ;; allocation lossage. These patterns do not allow memory operands
1778 ;; because the native instructions read the full 128-bits.
1780 (define_insn "*anddf3"
1781 [(set (match_operand:DF 0 "register_operand" "=x")
1782 (and:DF (match_operand:DF 1 "register_operand" "0")
1783 (match_operand:DF 2 "register_operand" "x")))]
1785 "andpd\t{%2, %0|%0, %2}"
1786 [(set_attr "type" "sselog")
1787 (set_attr "mode" "V2DF")])
1789 (define_insn "*nanddf3"
1790 [(set (match_operand:DF 0 "register_operand" "=x")
1791 (and:DF (not:DF (match_operand:DF 1 "register_operand" "0"))
1792 (match_operand:DF 2 "register_operand" "x")))]
1794 "andnpd\t{%2, %0|%0, %2}"
1795 [(set_attr "type" "sselog")
1796 (set_attr "mode" "V2DF")])
1798 (define_insn "*iordf3"
1799 [(set (match_operand:DF 0 "register_operand" "=x")
1800 (ior:DF (match_operand:DF 1 "register_operand" "0")
1801 (match_operand:DF 2 "register_operand" "x")))]
1803 "orpd\t{%2, %0|%0, %2}"
1804 [(set_attr "type" "sselog")
1805 (set_attr "mode" "V2DF")])
1807 (define_insn "*xordf3"
1808 [(set (match_operand:DF 0 "register_operand" "=x")
1809 (xor:DF (match_operand:DF 1 "register_operand" "0")
1810 (match_operand:DF 2 "register_operand" "x")))]
1812 "xorpd\t{%2, %0|%0, %2}"
1813 [(set_attr "type" "sselog")
1814 (set_attr "mode" "V2DF")])
1816 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1818 ;; Parallel double-precision floating point conversion operations
1820 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1822 (define_insn "sse2_cvtpi2pd"
1823 [(set (match_operand:V2DF 0 "register_operand" "=x,x")
1824 (float:V2DF (match_operand:V2SI 1 "nonimmediate_operand" "y,m")))]
1826 "cvtpi2pd\t{%1, %0|%0, %1}"
1827 [(set_attr "type" "ssecvt")
1828 (set_attr "unit" "mmx,*")
1829 (set_attr "mode" "V2DF")])
1831 (define_insn "sse2_cvtpd2pi"
1832 [(set (match_operand:V2SI 0 "register_operand" "=y")
1833 (unspec:V2SI [(match_operand:V2DF 1 "nonimmediate_operand" "xm")]
1834 UNSPEC_FIX_NOTRUNC))]
1836 "cvtpd2pi\t{%1, %0|%0, %1}"
1837 [(set_attr "type" "ssecvt")
1838 (set_attr "unit" "mmx")
1839 (set_attr "mode" "DI")])
1841 (define_insn "sse2_cvttpd2pi"
1842 [(set (match_operand:V2SI 0 "register_operand" "=y")
1843 (fix:V2SI (match_operand:V2DF 1 "nonimmediate_operand" "xm")))]
1845 "cvttpd2pi\t{%1, %0|%0, %1}"
1846 [(set_attr "type" "ssecvt")
1847 (set_attr "unit" "mmx")
1848 (set_attr "mode" "TI")])
1850 (define_insn "sse2_cvtsi2sd"
1851 [(set (match_operand:V2DF 0 "register_operand" "=x,x")
1854 (float:DF (match_operand:SI 2 "nonimmediate_operand" "r,m")))
1855 (match_operand:V2DF 1 "register_operand" "0,0")
1858 "cvtsi2sd\t{%2, %0|%0, %2}"
1859 [(set_attr "type" "sseicvt")
1860 (set_attr "mode" "DF")
1861 (set_attr "athlon_decode" "double,direct")
1862 (set_attr "amdfam10_decode" "vector,double")])
1864 (define_insn "sse2_cvtsi2sdq"
1865 [(set (match_operand:V2DF 0 "register_operand" "=x,x")
1868 (float:DF (match_operand:DI 2 "nonimmediate_operand" "r,m")))
1869 (match_operand:V2DF 1 "register_operand" "0,0")
1871 "TARGET_SSE2 && TARGET_64BIT"
1872 "cvtsi2sdq\t{%2, %0|%0, %2}"
1873 [(set_attr "type" "sseicvt")
1874 (set_attr "mode" "DF")
1875 (set_attr "athlon_decode" "double,direct")
1876 (set_attr "amdfam10_decode" "vector,double")])
1878 (define_insn "sse2_cvtsd2si"
1879 [(set (match_operand:SI 0 "register_operand" "=r,r")
1882 (match_operand:V2DF 1 "nonimmediate_operand" "x,m")
1883 (parallel [(const_int 0)]))]
1884 UNSPEC_FIX_NOTRUNC))]
1886 "cvtsd2si\t{%1, %0|%0, %1}"
1887 [(set_attr "type" "sseicvt")
1888 (set_attr "athlon_decode" "double,vector")
1889 (set_attr "amdfam10_decode" "double,double")
1890 (set_attr "mode" "SI")])
1892 (define_insn "sse2_cvtsd2siq"
1893 [(set (match_operand:DI 0 "register_operand" "=r,r")
1896 (match_operand:V2DF 1 "nonimmediate_operand" "x,m")
1897 (parallel [(const_int 0)]))]
1898 UNSPEC_FIX_NOTRUNC))]
1899 "TARGET_SSE2 && TARGET_64BIT"
1900 "cvtsd2siq\t{%1, %0|%0, %1}"
1901 [(set_attr "type" "sseicvt")
1902 (set_attr "athlon_decode" "double,vector")
1903 (set_attr "amdfam10_decode" "double,double")
1904 (set_attr "mode" "DI")])
1906 (define_insn "sse2_cvttsd2si"
1907 [(set (match_operand:SI 0 "register_operand" "=r,r")
1910 (match_operand:V2DF 1 "nonimmediate_operand" "x,m")
1911 (parallel [(const_int 0)]))))]
1913 "cvttsd2si\t{%1, %0|%0, %1}"
1914 [(set_attr "type" "sseicvt")
1915 (set_attr "mode" "SI")
1916 (set_attr "athlon_decode" "double,vector")
1917 (set_attr "amdfam10_decode" "double,double")])
1919 (define_insn "sse2_cvttsd2siq"
1920 [(set (match_operand:DI 0 "register_operand" "=r,r")
1923 (match_operand:V2DF 1 "nonimmediate_operand" "x,m")
1924 (parallel [(const_int 0)]))))]
1925 "TARGET_SSE2 && TARGET_64BIT"
1926 "cvttsd2siq\t{%1, %0|%0, %1}"
1927 [(set_attr "type" "sseicvt")
1928 (set_attr "mode" "DI")
1929 (set_attr "athlon_decode" "double,vector")
1930 (set_attr "amdfam10_decode" "double,double")])
1932 (define_insn "sse2_cvtdq2pd"
1933 [(set (match_operand:V2DF 0 "register_operand" "=x")
1936 (match_operand:V4SI 1 "nonimmediate_operand" "xm")
1937 (parallel [(const_int 0) (const_int 1)]))))]
1939 "cvtdq2pd\t{%1, %0|%0, %1}"
1940 [(set_attr "type" "ssecvt")
1941 (set_attr "mode" "V2DF")])
1943 (define_expand "sse2_cvtpd2dq"
1944 [(set (match_operand:V4SI 0 "register_operand" "")
1946 (unspec:V2SI [(match_operand:V2DF 1 "nonimmediate_operand" "")]
1950 "operands[2] = CONST0_RTX (V2SImode);")
1952 (define_insn "*sse2_cvtpd2dq"
1953 [(set (match_operand:V4SI 0 "register_operand" "=x")
1955 (unspec:V2SI [(match_operand:V2DF 1 "nonimmediate_operand" "xm")]
1957 (match_operand:V2SI 2 "const0_operand" "")))]
1959 "cvtpd2dq\t{%1, %0|%0, %1}"
1960 [(set_attr "type" "ssecvt")
1961 (set_attr "mode" "TI")
1962 (set_attr "amdfam10_decode" "double")])
1964 (define_expand "sse2_cvttpd2dq"
1965 [(set (match_operand:V4SI 0 "register_operand" "")
1967 (fix:V2SI (match_operand:V2DF 1 "nonimmediate_operand" ""))
1970 "operands[2] = CONST0_RTX (V2SImode);")
1972 (define_insn "*sse2_cvttpd2dq"
1973 [(set (match_operand:V4SI 0 "register_operand" "=x")
1975 (fix:V2SI (match_operand:V2DF 1 "nonimmediate_operand" "xm"))
1976 (match_operand:V2SI 2 "const0_operand" "")))]
1978 "cvttpd2dq\t{%1, %0|%0, %1}"
1979 [(set_attr "type" "ssecvt")
1980 (set_attr "mode" "TI")
1981 (set_attr "amdfam10_decode" "double")])
1983 (define_insn "sse2_cvtsd2ss"
1984 [(set (match_operand:V4SF 0 "register_operand" "=x,x")
1987 (float_truncate:V2SF
1988 (match_operand:V2DF 2 "nonimmediate_operand" "x,m")))
1989 (match_operand:V4SF 1 "register_operand" "0,0")
1992 "cvtsd2ss\t{%2, %0|%0, %2}"
1993 [(set_attr "type" "ssecvt")
1994 (set_attr "athlon_decode" "vector,double")
1995 (set_attr "amdfam10_decode" "vector,double")
1996 (set_attr "mode" "SF")])
1998 (define_insn "sse2_cvtss2sd"
1999 [(set (match_operand:V2DF 0 "register_operand" "=x,x")
2003 (match_operand:V4SF 2 "nonimmediate_operand" "x,m")
2004 (parallel [(const_int 0) (const_int 1)])))
2005 (match_operand:V2DF 1 "register_operand" "0,0")
2008 "cvtss2sd\t{%2, %0|%0, %2}"
2009 [(set_attr "type" "ssecvt")
2010 (set_attr "amdfam10_decode" "vector,double")
2011 (set_attr "mode" "DF")])
2013 (define_expand "sse2_cvtpd2ps"
2014 [(set (match_operand:V4SF 0 "register_operand" "")
2016 (float_truncate:V2SF
2017 (match_operand:V2DF 1 "nonimmediate_operand" "xm"))
2020 "operands[2] = CONST0_RTX (V2SFmode);")
2022 (define_insn "*sse2_cvtpd2ps"
2023 [(set (match_operand:V4SF 0 "register_operand" "=x")
2025 (float_truncate:V2SF
2026 (match_operand:V2DF 1 "nonimmediate_operand" "xm"))
2027 (match_operand:V2SF 2 "const0_operand" "")))]
2029 "cvtpd2ps\t{%1, %0|%0, %1}"
2030 [(set_attr "type" "ssecvt")
2031 (set_attr "mode" "V4SF")
2032 (set_attr "amdfam10_decode" "double")])
2034 (define_insn "sse2_cvtps2pd"
2035 [(set (match_operand:V2DF 0 "register_operand" "=x")
2038 (match_operand:V4SF 1 "nonimmediate_operand" "xm")
2039 (parallel [(const_int 0) (const_int 1)]))))]
2041 "cvtps2pd\t{%1, %0|%0, %1}"
2042 [(set_attr "type" "ssecvt")
2043 (set_attr "mode" "V2DF")
2044 (set_attr "amdfam10_decode" "direct")])
2046 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
2048 ;; Parallel double-precision floating point element swizzling
2050 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
2052 (define_insn "sse2_unpckhpd"
2053 [(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,x,m")
2056 (match_operand:V2DF 1 "nonimmediate_operand" " 0,o,x")
2057 (match_operand:V2DF 2 "nonimmediate_operand" " x,0,0"))
2058 (parallel [(const_int 1)
2060 "TARGET_SSE2 && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
2062 unpckhpd\t{%2, %0|%0, %2}
2063 movlpd\t{%H1, %0|%0, %H1}
2064 movhpd\t{%1, %0|%0, %1}"
2065 [(set_attr "type" "sselog,ssemov,ssemov")
2066 (set_attr "mode" "V2DF,V1DF,V1DF")])
2068 (define_insn "*sse3_movddup"
2069 [(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,o")
2072 (match_operand:V2DF 1 "nonimmediate_operand" "xm,x")
2074 (parallel [(const_int 0)
2076 "TARGET_SSE3 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
2078 movddup\t{%1, %0|%0, %1}
2080 [(set_attr "type" "sselog1,ssemov")
2081 (set_attr "mode" "V2DF")])
2084 [(set (match_operand:V2DF 0 "memory_operand" "")
2087 (match_operand:V2DF 1 "register_operand" "")
2089 (parallel [(const_int 0)
2091 "TARGET_SSE3 && reload_completed"
2094 rtx low = gen_rtx_REG (DFmode, REGNO (operands[1]));
2095 emit_move_insn (adjust_address (operands[0], DFmode, 0), low);
2096 emit_move_insn (adjust_address (operands[0], DFmode, 8), low);
2100 (define_insn "sse2_unpcklpd"
2101 [(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,x,o")
2104 (match_operand:V2DF 1 "nonimmediate_operand" " 0,0,0")
2105 (match_operand:V2DF 2 "nonimmediate_operand" " x,m,x"))
2106 (parallel [(const_int 0)
2108 "TARGET_SSE2 && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
2110 unpcklpd\t{%2, %0|%0, %2}
2111 movhpd\t{%2, %0|%0, %2}
2112 movlpd\t{%2, %H0|%H0, %2}"
2113 [(set_attr "type" "sselog,ssemov,ssemov")
2114 (set_attr "mode" "V2DF,V1DF,V1DF")])
2116 (define_expand "sse2_shufpd"
2117 [(match_operand:V2DF 0 "register_operand" "")
2118 (match_operand:V2DF 1 "register_operand" "")
2119 (match_operand:V2DF 2 "nonimmediate_operand" "")
2120 (match_operand:SI 3 "const_int_operand" "")]
2123 int mask = INTVAL (operands[3]);
2124 emit_insn (gen_sse2_shufpd_1 (operands[0], operands[1], operands[2],
2126 GEN_INT (mask & 2 ? 3 : 2)));
2130 (define_insn "sse2_shufpd_1"
2131 [(set (match_operand:V2DF 0 "register_operand" "=x")
2134 (match_operand:V2DF 1 "register_operand" "0")
2135 (match_operand:V2DF 2 "nonimmediate_operand" "xm"))
2136 (parallel [(match_operand 3 "const_0_to_1_operand" "")
2137 (match_operand 4 "const_2_to_3_operand" "")])))]
2141 mask = INTVAL (operands[3]);
2142 mask |= (INTVAL (operands[4]) - 2) << 1;
2143 operands[3] = GEN_INT (mask);
2145 return "shufpd\t{%3, %2, %0|%0, %2, %3}";
2147 [(set_attr "type" "sselog")
2148 (set_attr "mode" "V2DF")])
2150 (define_insn "sse2_storehpd"
2151 [(set (match_operand:DF 0 "nonimmediate_operand" "=m,x,x*fr")
2153 (match_operand:V2DF 1 "nonimmediate_operand" " x,0,o")
2154 (parallel [(const_int 1)])))]
2155 "TARGET_SSE2 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
2157 movhpd\t{%1, %0|%0, %1}
2160 [(set_attr "type" "ssemov,sselog1,ssemov")
2161 (set_attr "mode" "V1DF,V2DF,DF")])
2164 [(set (match_operand:DF 0 "register_operand" "")
2166 (match_operand:V2DF 1 "memory_operand" "")
2167 (parallel [(const_int 1)])))]
2168 "TARGET_SSE2 && reload_completed"
2169 [(set (match_dup 0) (match_dup 1))]
2171 operands[1] = adjust_address (operands[1], DFmode, 8);
2174 (define_insn "sse2_storelpd"
2175 [(set (match_operand:DF 0 "nonimmediate_operand" "=m,x,x*fr")
2177 (match_operand:V2DF 1 "nonimmediate_operand" " x,x,m")
2178 (parallel [(const_int 0)])))]
2179 "TARGET_SSE2 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
2181 movlpd\t{%1, %0|%0, %1}
2184 [(set_attr "type" "ssemov")
2185 (set_attr "mode" "V1DF,DF,DF")])
2188 [(set (match_operand:DF 0 "register_operand" "")
2190 (match_operand:V2DF 1 "nonimmediate_operand" "")
2191 (parallel [(const_int 0)])))]
2192 "TARGET_SSE2 && reload_completed"
2195 rtx op1 = operands[1];
2197 op1 = gen_rtx_REG (DFmode, REGNO (op1));
2199 op1 = gen_lowpart (DFmode, op1);
2200 emit_move_insn (operands[0], op1);
2204 (define_insn "sse2_loadhpd"
2205 [(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,x,x,o")
2208 (match_operand:V2DF 1 "nonimmediate_operand" " 0,0,x,0")
2209 (parallel [(const_int 0)]))
2210 (match_operand:DF 2 "nonimmediate_operand" " m,x,0,x*fr")))]
2211 "TARGET_SSE2 && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
2213 movhpd\t{%2, %0|%0, %2}
2214 unpcklpd\t{%2, %0|%0, %2}
2215 shufpd\t{$1, %1, %0|%0, %1, 1}
2217 [(set_attr "type" "ssemov,sselog,sselog,other")
2218 (set_attr "mode" "V1DF,V2DF,V2DF,DF")])
2221 [(set (match_operand:V2DF 0 "memory_operand" "")
2223 (vec_select:DF (match_dup 0) (parallel [(const_int 0)]))
2224 (match_operand:DF 1 "register_operand" "")))]
2225 "TARGET_SSE2 && reload_completed"
2226 [(set (match_dup 0) (match_dup 1))]
2228 operands[0] = adjust_address (operands[0], DFmode, 8);
2231 (define_insn "sse2_loadlpd"
2232 [(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,x,x,x,x,m")
2234 (match_operand:DF 2 "nonimmediate_operand" " m,m,x,0,0,x*fr")
2236 (match_operand:V2DF 1 "vector_move_operand" " C,0,0,x,o,0")
2237 (parallel [(const_int 1)]))))]
2238 "TARGET_SSE2 && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
2240 movsd\t{%2, %0|%0, %2}
2241 movlpd\t{%2, %0|%0, %2}
2242 movsd\t{%2, %0|%0, %2}
2243 shufpd\t{$2, %2, %0|%0, %2, 2}
2244 movhpd\t{%H1, %0|%0, %H1}
2246 [(set_attr "type" "ssemov,ssemov,ssemov,sselog,ssemov,other")
2247 (set_attr "mode" "DF,V1DF,V1DF,V2DF,V1DF,DF")])
2250 [(set (match_operand:V2DF 0 "memory_operand" "")
2252 (match_operand:DF 1 "register_operand" "")
2253 (vec_select:DF (match_dup 0) (parallel [(const_int 1)]))))]
2254 "TARGET_SSE2 && reload_completed"
2255 [(set (match_dup 0) (match_dup 1))]
2257 operands[0] = adjust_address (operands[0], DFmode, 8);
2260 ;; Not sure these two are ever used, but it doesn't hurt to have
2262 (define_insn "*vec_extractv2df_1_sse"
2263 [(set (match_operand:DF 0 "nonimmediate_operand" "=m,x,x")
2265 (match_operand:V2DF 1 "nonimmediate_operand" "x,x,o")
2266 (parallel [(const_int 1)])))]
2267 "!TARGET_SSE2 && TARGET_SSE
2268 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
2270 movhps\t{%1, %0|%0, %1}
2271 movhlps\t{%1, %0|%0, %1}
2272 movlps\t{%H1, %0|%0, %H1}"
2273 [(set_attr "type" "ssemov")
2274 (set_attr "mode" "V2SF,V4SF,V2SF")])
2276 (define_insn "*vec_extractv2df_0_sse"
2277 [(set (match_operand:DF 0 "nonimmediate_operand" "=m,x,x")
2279 (match_operand:V2DF 1 "nonimmediate_operand" "x,x,m")
2280 (parallel [(const_int 0)])))]
2281 "!TARGET_SSE2 && TARGET_SSE
2282 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
2284 movlps\t{%1, %0|%0, %1}
2285 movaps\t{%1, %0|%0, %1}
2286 movlps\t{%1, %0|%0, %1}"
2287 [(set_attr "type" "ssemov")
2288 (set_attr "mode" "V2SF,V4SF,V2SF")])
2290 (define_insn "sse2_movsd"
2291 [(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,x,m,x,x,o")
2293 (match_operand:V2DF 2 "nonimmediate_operand" " x,m,x,0,0,0")
2294 (match_operand:V2DF 1 "nonimmediate_operand" " 0,0,0,x,o,x")
2298 movsd\t{%2, %0|%0, %2}
2299 movlpd\t{%2, %0|%0, %2}
2300 movlpd\t{%2, %0|%0, %2}
2301 shufpd\t{$2, %2, %0|%0, %2, 2}
2302 movhps\t{%H1, %0|%0, %H1}
2303 movhps\t{%1, %H0|%H0, %1}"
2304 [(set_attr "type" "ssemov,ssemov,ssemov,sselog,ssemov,ssemov")
2305 (set_attr "mode" "DF,V1DF,V1DF,V2DF,V1DF,V1DF")])
2307 (define_insn "*vec_dupv2df_sse3"
2308 [(set (match_operand:V2DF 0 "register_operand" "=x")
2310 (match_operand:DF 1 "nonimmediate_operand" "xm")))]
2312 "movddup\t{%1, %0|%0, %1}"
2313 [(set_attr "type" "sselog1")
2314 (set_attr "mode" "DF")])
2316 (define_insn "*vec_dupv2df"
2317 [(set (match_operand:V2DF 0 "register_operand" "=x")
2319 (match_operand:DF 1 "register_operand" "0")))]
2322 [(set_attr "type" "sselog1")
2323 (set_attr "mode" "V4SF")])
2325 (define_insn "*vec_concatv2df_sse3"
2326 [(set (match_operand:V2DF 0 "register_operand" "=x")
2328 (match_operand:DF 1 "nonimmediate_operand" "xm")
2331 "movddup\t{%1, %0|%0, %1}"
2332 [(set_attr "type" "sselog1")
2333 (set_attr "mode" "DF")])
2335 (define_insn "*vec_concatv2df"
2336 [(set (match_operand:V2DF 0 "register_operand" "=Y,Y,Y,x,x")
2338 (match_operand:DF 1 "nonimmediate_operand" " 0,0,m,0,0")
2339 (match_operand:DF 2 "vector_move_operand" " Y,m,C,x,m")))]
2342 unpcklpd\t{%2, %0|%0, %2}
2343 movhpd\t{%2, %0|%0, %2}
2344 movsd\t{%1, %0|%0, %1}
2345 movlhps\t{%2, %0|%0, %2}
2346 movhps\t{%2, %0|%0, %2}"
2347 [(set_attr "type" "sselog,ssemov,ssemov,ssemov,ssemov")
2348 (set_attr "mode" "V2DF,V1DF,DF,V4SF,V2SF")])
2350 (define_expand "vec_setv2df"
2351 [(match_operand:V2DF 0 "register_operand" "")
2352 (match_operand:DF 1 "register_operand" "")
2353 (match_operand 2 "const_int_operand" "")]
2356 ix86_expand_vector_set (false, operands[0], operands[1],
2357 INTVAL (operands[2]));
2361 (define_expand "vec_extractv2df"
2362 [(match_operand:DF 0 "register_operand" "")
2363 (match_operand:V2DF 1 "register_operand" "")
2364 (match_operand 2 "const_int_operand" "")]
2367 ix86_expand_vector_extract (false, operands[0], operands[1],
2368 INTVAL (operands[2]));
2372 (define_expand "vec_initv2df"
2373 [(match_operand:V2DF 0 "register_operand" "")
2374 (match_operand 1 "" "")]
2377 ix86_expand_vector_init (false, operands[0], operands[1]);
2381 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
2383 ;; Parallel integral arithmetic
2385 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
2387 (define_expand "neg<mode>2"
2388 [(set (match_operand:SSEMODEI 0 "register_operand" "")
2391 (match_operand:SSEMODEI 1 "nonimmediate_operand" "")))]
2393 "operands[2] = force_reg (<MODE>mode, CONST0_RTX (<MODE>mode));")
2395 (define_expand "add<mode>3"
2396 [(set (match_operand:SSEMODEI 0 "register_operand" "")
2397 (plus:SSEMODEI (match_operand:SSEMODEI 1 "nonimmediate_operand" "")
2398 (match_operand:SSEMODEI 2 "nonimmediate_operand" "")))]
2400 "ix86_fixup_binary_operands_no_copy (PLUS, <MODE>mode, operands);")
2402 (define_insn "*add<mode>3"
2403 [(set (match_operand:SSEMODEI 0 "register_operand" "=x")
2405 (match_operand:SSEMODEI 1 "nonimmediate_operand" "%0")
2406 (match_operand:SSEMODEI 2 "nonimmediate_operand" "xm")))]
2407 "TARGET_SSE2 && ix86_binary_operator_ok (PLUS, <MODE>mode, operands)"
2408 "padd<ssevecsize>\t{%2, %0|%0, %2}"
2409 [(set_attr "type" "sseiadd")
2410 (set_attr "mode" "TI")])
2412 (define_insn "sse2_ssadd<mode>3"
2413 [(set (match_operand:SSEMODE12 0 "register_operand" "=x")
2415 (match_operand:SSEMODE12 1 "nonimmediate_operand" "%0")
2416 (match_operand:SSEMODE12 2 "nonimmediate_operand" "xm")))]
2417 "TARGET_SSE2 && ix86_binary_operator_ok (SS_PLUS, <MODE>mode, operands)"
2418 "padds<ssevecsize>\t{%2, %0|%0, %2}"
2419 [(set_attr "type" "sseiadd")
2420 (set_attr "mode" "TI")])
2422 (define_insn "sse2_usadd<mode>3"
2423 [(set (match_operand:SSEMODE12 0 "register_operand" "=x")
2425 (match_operand:SSEMODE12 1 "nonimmediate_operand" "%0")
2426 (match_operand:SSEMODE12 2 "nonimmediate_operand" "xm")))]
2427 "TARGET_SSE2 && ix86_binary_operator_ok (US_PLUS, <MODE>mode, operands)"
2428 "paddus<ssevecsize>\t{%2, %0|%0, %2}"
2429 [(set_attr "type" "sseiadd")
2430 (set_attr "mode" "TI")])
2432 (define_expand "sub<mode>3"
2433 [(set (match_operand:SSEMODEI 0 "register_operand" "")
2434 (minus:SSEMODEI (match_operand:SSEMODEI 1 "register_operand" "")
2435 (match_operand:SSEMODEI 2 "nonimmediate_operand" "")))]
2437 "ix86_fixup_binary_operands_no_copy (MINUS, <MODE>mode, operands);")
2439 (define_insn "*sub<mode>3"
2440 [(set (match_operand:SSEMODEI 0 "register_operand" "=x")
2442 (match_operand:SSEMODEI 1 "register_operand" "0")
2443 (match_operand:SSEMODEI 2 "nonimmediate_operand" "xm")))]
2445 "psub<ssevecsize>\t{%2, %0|%0, %2}"
2446 [(set_attr "type" "sseiadd")
2447 (set_attr "mode" "TI")])
2449 (define_insn "sse2_sssub<mode>3"
2450 [(set (match_operand:SSEMODE12 0 "register_operand" "=x")
2452 (match_operand:SSEMODE12 1 "register_operand" "0")
2453 (match_operand:SSEMODE12 2 "nonimmediate_operand" "xm")))]
2455 "psubs<ssevecsize>\t{%2, %0|%0, %2}"
2456 [(set_attr "type" "sseiadd")
2457 (set_attr "mode" "TI")])
2459 (define_insn "sse2_ussub<mode>3"
2460 [(set (match_operand:SSEMODE12 0 "register_operand" "=x")
2462 (match_operand:SSEMODE12 1 "register_operand" "0")
2463 (match_operand:SSEMODE12 2 "nonimmediate_operand" "xm")))]
2465 "psubus<ssevecsize>\t{%2, %0|%0, %2}"
2466 [(set_attr "type" "sseiadd")
2467 (set_attr "mode" "TI")])
2469 (define_expand "mulv16qi3"
2470 [(set (match_operand:V16QI 0 "register_operand" "")
2471 (mult:V16QI (match_operand:V16QI 1 "register_operand" "")
2472 (match_operand:V16QI 2 "register_operand" "")))]
2478 for (i = 0; i < 12; ++i)
2479 t[i] = gen_reg_rtx (V16QImode);
2481 /* Unpack data such that we've got a source byte in each low byte of
2482 each word. We don't care what goes into the high byte of each word.
2483 Rather than trying to get zero in there, most convenient is to let
2484 it be a copy of the low byte. */
2485 emit_insn (gen_sse2_punpckhbw (t[0], operands[1], operands[1]));
2486 emit_insn (gen_sse2_punpckhbw (t[1], operands[2], operands[2]));
2487 emit_insn (gen_sse2_punpcklbw (t[2], operands[1], operands[1]));
2488 emit_insn (gen_sse2_punpcklbw (t[3], operands[2], operands[2]));
2490 /* Multiply words. The end-of-line annotations here give a picture of what
2491 the output of that instruction looks like. Dot means don't care; the
2492 letters are the bytes of the result with A being the most significant. */
2493 emit_insn (gen_mulv8hi3 (gen_lowpart (V8HImode, t[4]), /* .A.B.C.D.E.F.G.H */
2494 gen_lowpart (V8HImode, t[0]),
2495 gen_lowpart (V8HImode, t[1])));
2496 emit_insn (gen_mulv8hi3 (gen_lowpart (V8HImode, t[5]), /* .I.J.K.L.M.N.O.P */
2497 gen_lowpart (V8HImode, t[2]),
2498 gen_lowpart (V8HImode, t[3])));
2500 /* Extract the relevant bytes and merge them back together. */
2501 emit_insn (gen_sse2_punpckhbw (t[6], t[5], t[4])); /* ..AI..BJ..CK..DL */
2502 emit_insn (gen_sse2_punpcklbw (t[7], t[5], t[4])); /* ..EM..FN..GO..HP */
2503 emit_insn (gen_sse2_punpckhbw (t[8], t[7], t[6])); /* ....AEIM....BFJN */
2504 emit_insn (gen_sse2_punpcklbw (t[9], t[7], t[6])); /* ....CGKO....DHLP */
2505 emit_insn (gen_sse2_punpckhbw (t[10], t[9], t[8])); /* ........ACEGIKMO */
2506 emit_insn (gen_sse2_punpcklbw (t[11], t[9], t[8])); /* ........BDFHJLNP */
2509 emit_insn (gen_sse2_punpcklbw (op0, t[11], t[10])); /* ABCDEFGHIJKLMNOP */
2513 (define_expand "mulv8hi3"
2514 [(set (match_operand:V8HI 0 "register_operand" "")
2515 (mult:V8HI (match_operand:V8HI 1 "nonimmediate_operand" "")
2516 (match_operand:V8HI 2 "nonimmediate_operand" "")))]
2518 "ix86_fixup_binary_operands_no_copy (MULT, V8HImode, operands);")
2520 (define_insn "*mulv8hi3"
2521 [(set (match_operand:V8HI 0 "register_operand" "=x")
2522 (mult:V8HI (match_operand:V8HI 1 "nonimmediate_operand" "%0")
2523 (match_operand:V8HI 2 "nonimmediate_operand" "xm")))]
2524 "TARGET_SSE2 && ix86_binary_operator_ok (MULT, V8HImode, operands)"
2525 "pmullw\t{%2, %0|%0, %2}"
2526 [(set_attr "type" "sseimul")
2527 (set_attr "mode" "TI")])
2529 (define_insn "sse2_smulv8hi3_highpart"
2530 [(set (match_operand:V8HI 0 "register_operand" "=x")
2535 (match_operand:V8HI 1 "nonimmediate_operand" "%0"))
2537 (match_operand:V8HI 2 "nonimmediate_operand" "xm")))
2539 "TARGET_SSE2 && ix86_binary_operator_ok (MULT, V8HImode, operands)"
2540 "pmulhw\t{%2, %0|%0, %2}"
2541 [(set_attr "type" "sseimul")
2542 (set_attr "mode" "TI")])
2544 (define_insn "sse2_umulv8hi3_highpart"
2545 [(set (match_operand:V8HI 0 "register_operand" "=x")
2550 (match_operand:V8HI 1 "nonimmediate_operand" "%0"))
2552 (match_operand:V8HI 2 "nonimmediate_operand" "xm")))
2554 "TARGET_SSE2 && ix86_binary_operator_ok (MULT, V8HImode, operands)"
2555 "pmulhuw\t{%2, %0|%0, %2}"
2556 [(set_attr "type" "sseimul")
2557 (set_attr "mode" "TI")])
2559 (define_insn "sse2_umulv2siv2di3"
2560 [(set (match_operand:V2DI 0 "register_operand" "=x")
2564 (match_operand:V4SI 1 "nonimmediate_operand" "%0")
2565 (parallel [(const_int 0) (const_int 2)])))
2568 (match_operand:V4SI 2 "nonimmediate_operand" "xm")
2569 (parallel [(const_int 0) (const_int 2)])))))]
2570 "TARGET_SSE2 && ix86_binary_operator_ok (MULT, V8HImode, operands)"
2571 "pmuludq\t{%2, %0|%0, %2}"
2572 [(set_attr "type" "sseimul")
2573 (set_attr "mode" "TI")])
2575 (define_insn "sse2_pmaddwd"
2576 [(set (match_operand:V4SI 0 "register_operand" "=x")
2581 (match_operand:V8HI 1 "nonimmediate_operand" "%0")
2582 (parallel [(const_int 0)
2588 (match_operand:V8HI 2 "nonimmediate_operand" "xm")
2589 (parallel [(const_int 0)
2595 (vec_select:V4HI (match_dup 1)
2596 (parallel [(const_int 1)
2601 (vec_select:V4HI (match_dup 2)
2602 (parallel [(const_int 1)
2605 (const_int 7)]))))))]
2607 "pmaddwd\t{%2, %0|%0, %2}"
2608 [(set_attr "type" "sseiadd")
2609 (set_attr "mode" "TI")])
2611 (define_expand "mulv4si3"
2612 [(set (match_operand:V4SI 0 "register_operand" "")
2613 (mult:V4SI (match_operand:V4SI 1 "register_operand" "")
2614 (match_operand:V4SI 2 "register_operand" "")))]
2617 rtx t1, t2, t3, t4, t5, t6, thirtytwo;
2623 t1 = gen_reg_rtx (V4SImode);
2624 t2 = gen_reg_rtx (V4SImode);
2625 t3 = gen_reg_rtx (V4SImode);
2626 t4 = gen_reg_rtx (V4SImode);
2627 t5 = gen_reg_rtx (V4SImode);
2628 t6 = gen_reg_rtx (V4SImode);
2629 thirtytwo = GEN_INT (32);
2631 /* Multiply elements 2 and 0. */
2632 emit_insn (gen_sse2_umulv2siv2di3 (gen_lowpart (V2DImode, t1), op1, op2));
2634 /* Shift both input vectors down one element, so that elements 3 and 1
2635 are now in the slots for elements 2 and 0. For K8, at least, this is
2636 faster than using a shuffle. */
2637 emit_insn (gen_sse2_lshrti3 (gen_lowpart (TImode, t2),
2638 gen_lowpart (TImode, op1), thirtytwo));
2639 emit_insn (gen_sse2_lshrti3 (gen_lowpart (TImode, t3),
2640 gen_lowpart (TImode, op2), thirtytwo));
2642 /* Multiply elements 3 and 1. */
2643 emit_insn (gen_sse2_umulv2siv2di3 (gen_lowpart (V2DImode, t4), t2, t3));
2645 /* Move the results in element 2 down to element 1; we don't care what
2646 goes in elements 2 and 3. */
2647 emit_insn (gen_sse2_pshufd_1 (t5, t1, const0_rtx, const2_rtx,
2648 const0_rtx, const0_rtx));
2649 emit_insn (gen_sse2_pshufd_1 (t6, t4, const0_rtx, const2_rtx,
2650 const0_rtx, const0_rtx));
2652 /* Merge the parts back together. */
2653 emit_insn (gen_sse2_punpckldq (op0, t5, t6));
2657 (define_expand "mulv2di3"
2658 [(set (match_operand:V2DI 0 "register_operand" "")
2659 (mult:V2DI (match_operand:V2DI 1 "register_operand" "")
2660 (match_operand:V2DI 2 "register_operand" "")))]
2663 rtx t1, t2, t3, t4, t5, t6, thirtytwo;
2669 t1 = gen_reg_rtx (V2DImode);
2670 t2 = gen_reg_rtx (V2DImode);
2671 t3 = gen_reg_rtx (V2DImode);
2672 t4 = gen_reg_rtx (V2DImode);
2673 t5 = gen_reg_rtx (V2DImode);
2674 t6 = gen_reg_rtx (V2DImode);
2675 thirtytwo = GEN_INT (32);
2677 /* Multiply low parts. */
2678 emit_insn (gen_sse2_umulv2siv2di3 (t1, gen_lowpart (V4SImode, op1),
2679 gen_lowpart (V4SImode, op2)));
2681 /* Shift input vectors left 32 bits so we can multiply high parts. */
2682 emit_insn (gen_lshrv2di3 (t2, op1, thirtytwo));
2683 emit_insn (gen_lshrv2di3 (t3, op2, thirtytwo));
2685 /* Multiply high parts by low parts. */
2686 emit_insn (gen_sse2_umulv2siv2di3 (t4, gen_lowpart (V4SImode, op1),
2687 gen_lowpart (V4SImode, t3)));
2688 emit_insn (gen_sse2_umulv2siv2di3 (t5, gen_lowpart (V4SImode, op2),
2689 gen_lowpart (V4SImode, t2)));
2691 /* Shift them back. */
2692 emit_insn (gen_ashlv2di3 (t4, t4, thirtytwo));
2693 emit_insn (gen_ashlv2di3 (t5, t5, thirtytwo));
2695 /* Add the three parts together. */
2696 emit_insn (gen_addv2di3 (t6, t1, t4));
2697 emit_insn (gen_addv2di3 (op0, t6, t5));
2701 (define_expand "sdot_prodv8hi"
2702 [(match_operand:V4SI 0 "register_operand" "")
2703 (match_operand:V8HI 1 "nonimmediate_operand" "")
2704 (match_operand:V8HI 2 "nonimmediate_operand" "")
2705 (match_operand:V4SI 3 "register_operand" "")]
2708 rtx t = gen_reg_rtx (V4SImode);
2709 emit_insn (gen_sse2_pmaddwd (t, operands[1], operands[2]));
2710 emit_insn (gen_addv4si3 (operands[0], operands[3], t));
2714 (define_expand "udot_prodv4si"
2715 [(match_operand:V2DI 0 "register_operand" "")
2716 (match_operand:V4SI 1 "register_operand" "")
2717 (match_operand:V4SI 2 "register_operand" "")
2718 (match_operand:V2DI 3 "register_operand" "")]
2723 t1 = gen_reg_rtx (V2DImode);
2724 emit_insn (gen_sse2_umulv2siv2di3 (t1, operands[1], operands[2]));
2725 emit_insn (gen_addv2di3 (t1, t1, operands[3]));
2727 t2 = gen_reg_rtx (V4SImode);
2728 t3 = gen_reg_rtx (V4SImode);
2729 emit_insn (gen_sse2_lshrti3 (gen_lowpart (TImode, t2),
2730 gen_lowpart (TImode, operands[1]),
2732 emit_insn (gen_sse2_lshrti3 (gen_lowpart (TImode, t3),
2733 gen_lowpart (TImode, operands[2]),
2736 t4 = gen_reg_rtx (V2DImode);
2737 emit_insn (gen_sse2_umulv2siv2di3 (t4, t2, t3));
2739 emit_insn (gen_addv2di3 (operands[0], t1, t4));
2743 (define_insn "ashr<mode>3"
2744 [(set (match_operand:SSEMODE24 0 "register_operand" "=x")
2746 (match_operand:SSEMODE24 1 "register_operand" "0")
2747 (match_operand:TI 2 "nonmemory_operand" "xn")))]
2749 "psra<ssevecsize>\t{%2, %0|%0, %2}"
2750 [(set_attr "type" "sseishft")
2751 (set_attr "mode" "TI")])
2753 (define_insn "lshr<mode>3"
2754 [(set (match_operand:SSEMODE248 0 "register_operand" "=x")
2755 (lshiftrt:SSEMODE248
2756 (match_operand:SSEMODE248 1 "register_operand" "0")
2757 (match_operand:TI 2 "nonmemory_operand" "xn")))]
2759 "psrl<ssevecsize>\t{%2, %0|%0, %2}"
2760 [(set_attr "type" "sseishft")
2761 (set_attr "mode" "TI")])
2763 (define_insn "ashl<mode>3"
2764 [(set (match_operand:SSEMODE248 0 "register_operand" "=x")
2766 (match_operand:SSEMODE248 1 "register_operand" "0")
2767 (match_operand:TI 2 "nonmemory_operand" "xn")))]
2769 "psll<ssevecsize>\t{%2, %0|%0, %2}"
2770 [(set_attr "type" "sseishft")
2771 (set_attr "mode" "TI")])
2773 (define_insn "sse2_ashlti3"
2774 [(set (match_operand:TI 0 "register_operand" "=x")
2775 (ashift:TI (match_operand:TI 1 "register_operand" "0")
2776 (match_operand:SI 2 "const_0_to_255_mul_8_operand" "n")))]
2779 operands[2] = GEN_INT (INTVAL (operands[2]) / 8);
2780 return "pslldq\t{%2, %0|%0, %2}";
2782 [(set_attr "type" "sseishft")
2783 (set_attr "mode" "TI")])
2785 (define_expand "vec_shl_<mode>"
2786 [(set (match_operand:SSEMODEI 0 "register_operand" "")
2787 (ashift:TI (match_operand:SSEMODEI 1 "register_operand" "")
2788 (match_operand:SI 2 "general_operand" "")))]
2791 if (!const_0_to_255_mul_8_operand (operands[2], SImode))
2793 operands[0] = gen_lowpart (TImode, operands[0]);
2794 operands[1] = gen_lowpart (TImode, operands[1]);
2797 (define_insn "sse2_lshrti3"
2798 [(set (match_operand:TI 0 "register_operand" "=x")
2799 (lshiftrt:TI (match_operand:TI 1 "register_operand" "0")
2800 (match_operand:SI 2 "const_0_to_255_mul_8_operand" "n")))]
2803 operands[2] = GEN_INT (INTVAL (operands[2]) / 8);
2804 return "psrldq\t{%2, %0|%0, %2}";
2806 [(set_attr "type" "sseishft")
2807 (set_attr "mode" "TI")])
2809 (define_expand "vec_shr_<mode>"
2810 [(set (match_operand:SSEMODEI 0 "register_operand" "")
2811 (lshiftrt:TI (match_operand:SSEMODEI 1 "register_operand" "")
2812 (match_operand:SI 2 "general_operand" "")))]
2815 if (!const_0_to_255_mul_8_operand (operands[2], SImode))
2817 operands[0] = gen_lowpart (TImode, operands[0]);
2818 operands[1] = gen_lowpart (TImode, operands[1]);
2821 (define_expand "umaxv16qi3"
2822 [(set (match_operand:V16QI 0 "register_operand" "")
2823 (umax:V16QI (match_operand:V16QI 1 "nonimmediate_operand" "")
2824 (match_operand:V16QI 2 "nonimmediate_operand" "")))]
2826 "ix86_fixup_binary_operands_no_copy (UMAX, V16QImode, operands);")
2828 (define_insn "*umaxv16qi3"
2829 [(set (match_operand:V16QI 0 "register_operand" "=x")
2830 (umax:V16QI (match_operand:V16QI 1 "nonimmediate_operand" "%0")
2831 (match_operand:V16QI 2 "nonimmediate_operand" "xm")))]
2832 "TARGET_SSE2 && ix86_binary_operator_ok (UMAX, V16QImode, operands)"
2833 "pmaxub\t{%2, %0|%0, %2}"
2834 [(set_attr "type" "sseiadd")
2835 (set_attr "mode" "TI")])
2837 (define_expand "smaxv8hi3"
2838 [(set (match_operand:V8HI 0 "register_operand" "")
2839 (smax:V8HI (match_operand:V8HI 1 "nonimmediate_operand" "")
2840 (match_operand:V8HI 2 "nonimmediate_operand" "")))]
2842 "ix86_fixup_binary_operands_no_copy (SMAX, V8HImode, operands);")
2844 (define_insn "*smaxv8hi3"
2845 [(set (match_operand:V8HI 0 "register_operand" "=x")
2846 (smax:V8HI (match_operand:V8HI 1 "nonimmediate_operand" "%0")
2847 (match_operand:V8HI 2 "nonimmediate_operand" "xm")))]
2848 "TARGET_SSE2 && ix86_binary_operator_ok (SMAX, V8HImode, operands)"
2849 "pmaxsw\t{%2, %0|%0, %2}"
2850 [(set_attr "type" "sseiadd")
2851 (set_attr "mode" "TI")])
2853 (define_expand "umaxv8hi3"
2854 [(set (match_operand:V8HI 0 "register_operand" "=x")
2855 (us_minus:V8HI (match_operand:V8HI 1 "register_operand" "0")
2856 (match_operand:V8HI 2 "nonimmediate_operand" "xm")))
2858 (plus:V8HI (match_dup 0) (match_dup 2)))]
2861 operands[3] = operands[0];
2862 if (rtx_equal_p (operands[0], operands[2]))
2863 operands[0] = gen_reg_rtx (V8HImode);
2866 (define_expand "smax<mode>3"
2867 [(set (match_operand:SSEMODE14 0 "register_operand" "")
2868 (smax:SSEMODE14 (match_operand:SSEMODE14 1 "register_operand" "")
2869 (match_operand:SSEMODE14 2 "register_operand" "")))]
2875 xops[0] = operands[0];
2876 xops[1] = operands[1];
2877 xops[2] = operands[2];
2878 xops[3] = gen_rtx_GT (VOIDmode, operands[1], operands[2]);
2879 xops[4] = operands[1];
2880 xops[5] = operands[2];
2881 ok = ix86_expand_int_vcond (xops);
2886 (define_expand "umaxv4si3"
2887 [(set (match_operand:V4SI 0 "register_operand" "")
2888 (umax:V4SI (match_operand:V4SI 1 "register_operand" "")
2889 (match_operand:V4SI 2 "register_operand" "")))]
2895 xops[0] = operands[0];
2896 xops[1] = operands[1];
2897 xops[2] = operands[2];
2898 xops[3] = gen_rtx_GTU (VOIDmode, operands[1], operands[2]);
2899 xops[4] = operands[1];
2900 xops[5] = operands[2];
2901 ok = ix86_expand_int_vcond (xops);
2906 (define_expand "uminv16qi3"
2907 [(set (match_operand:V16QI 0 "register_operand" "")
2908 (umin:V16QI (match_operand:V16QI 1 "nonimmediate_operand" "")
2909 (match_operand:V16QI 2 "nonimmediate_operand" "")))]
2911 "ix86_fixup_binary_operands_no_copy (UMIN, V16QImode, operands);")
2913 (define_insn "*uminv16qi3"
2914 [(set (match_operand:V16QI 0 "register_operand" "=x")
2915 (umin:V16QI (match_operand:V16QI 1 "nonimmediate_operand" "%0")
2916 (match_operand:V16QI 2 "nonimmediate_operand" "xm")))]
2917 "TARGET_SSE2 && ix86_binary_operator_ok (UMIN, V16QImode, operands)"
2918 "pminub\t{%2, %0|%0, %2}"
2919 [(set_attr "type" "sseiadd")
2920 (set_attr "mode" "TI")])
2922 (define_expand "sminv8hi3"
2923 [(set (match_operand:V8HI 0 "register_operand" "")
2924 (smin:V8HI (match_operand:V8HI 1 "nonimmediate_operand" "")
2925 (match_operand:V8HI 2 "nonimmediate_operand" "")))]
2927 "ix86_fixup_binary_operands_no_copy (SMIN, V8HImode, operands);")
2929 (define_insn "*sminv8hi3"
2930 [(set (match_operand:V8HI 0 "register_operand" "=x")
2931 (smin:V8HI (match_operand:V8HI 1 "nonimmediate_operand" "%0")
2932 (match_operand:V8HI 2 "nonimmediate_operand" "xm")))]
2933 "TARGET_SSE2 && ix86_binary_operator_ok (SMIN, V8HImode, operands)"
2934 "pminsw\t{%2, %0|%0, %2}"
2935 [(set_attr "type" "sseiadd")
2936 (set_attr "mode" "TI")])
2938 (define_expand "smin<mode>3"
2939 [(set (match_operand:SSEMODE14 0 "register_operand" "")
2940 (smin:SSEMODE14 (match_operand:SSEMODE14 1 "register_operand" "")
2941 (match_operand:SSEMODE14 2 "register_operand" "")))]
2947 xops[0] = operands[0];
2948 xops[1] = operands[2];
2949 xops[2] = operands[1];
2950 xops[3] = gen_rtx_GT (VOIDmode, operands[1], operands[2]);
2951 xops[4] = operands[1];
2952 xops[5] = operands[2];
2953 ok = ix86_expand_int_vcond (xops);
2958 (define_expand "umin<mode>3"
2959 [(set (match_operand:SSEMODE24 0 "register_operand" "")
2960 (umin:SSEMODE24 (match_operand:SSEMODE24 1 "register_operand" "")
2961 (match_operand:SSEMODE24 2 "register_operand" "")))]
2967 xops[0] = operands[0];
2968 xops[1] = operands[2];
2969 xops[2] = operands[1];
2970 xops[3] = gen_rtx_GTU (VOIDmode, operands[1], operands[2]);
2971 xops[4] = operands[1];
2972 xops[5] = operands[2];
2973 ok = ix86_expand_int_vcond (xops);
2978 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
2980 ;; Parallel integral comparisons
2982 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
2984 (define_insn "sse2_eq<mode>3"
2985 [(set (match_operand:SSEMODE124 0 "register_operand" "=x")
2987 (match_operand:SSEMODE124 1 "nonimmediate_operand" "%0")
2988 (match_operand:SSEMODE124 2 "nonimmediate_operand" "xm")))]
2989 "TARGET_SSE2 && ix86_binary_operator_ok (EQ, <MODE>mode, operands)"
2990 "pcmpeq<ssevecsize>\t{%2, %0|%0, %2}"
2991 [(set_attr "type" "ssecmp")
2992 (set_attr "mode" "TI")])
2994 (define_insn "sse2_gt<mode>3"
2995 [(set (match_operand:SSEMODE124 0 "register_operand" "=x")
2997 (match_operand:SSEMODE124 1 "register_operand" "0")
2998 (match_operand:SSEMODE124 2 "nonimmediate_operand" "xm")))]
3000 "pcmpgt<ssevecsize>\t{%2, %0|%0, %2}"
3001 [(set_attr "type" "ssecmp")
3002 (set_attr "mode" "TI")])
3004 (define_expand "vcond<mode>"
3005 [(set (match_operand:SSEMODE124 0 "register_operand" "")
3006 (if_then_else:SSEMODE124
3007 (match_operator 3 ""
3008 [(match_operand:SSEMODE124 4 "nonimmediate_operand" "")
3009 (match_operand:SSEMODE124 5 "nonimmediate_operand" "")])
3010 (match_operand:SSEMODE124 1 "general_operand" "")
3011 (match_operand:SSEMODE124 2 "general_operand" "")))]
3014 if (ix86_expand_int_vcond (operands))
3020 (define_expand "vcondu<mode>"
3021 [(set (match_operand:SSEMODE124 0 "register_operand" "")
3022 (if_then_else:SSEMODE124
3023 (match_operator 3 ""
3024 [(match_operand:SSEMODE124 4 "nonimmediate_operand" "")
3025 (match_operand:SSEMODE124 5 "nonimmediate_operand" "")])
3026 (match_operand:SSEMODE124 1 "general_operand" "")
3027 (match_operand:SSEMODE124 2 "general_operand" "")))]
3030 if (ix86_expand_int_vcond (operands))
3036 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
3038 ;; Parallel integral logical operations
3040 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
3042 (define_expand "one_cmpl<mode>2"
3043 [(set (match_operand:SSEMODEI 0 "register_operand" "")
3044 (xor:SSEMODEI (match_operand:SSEMODEI 1 "nonimmediate_operand" "")
3048 int i, n = GET_MODE_NUNITS (<MODE>mode);
3049 rtvec v = rtvec_alloc (n);
3051 for (i = 0; i < n; ++i)
3052 RTVEC_ELT (v, i) = constm1_rtx;
3054 operands[2] = force_reg (<MODE>mode, gen_rtx_CONST_VECTOR (<MODE>mode, v));
3057 (define_expand "and<mode>3"
3058 [(set (match_operand:SSEMODEI 0 "register_operand" "")
3059 (and:SSEMODEI (match_operand:SSEMODEI 1 "nonimmediate_operand" "")
3060 (match_operand:SSEMODEI 2 "nonimmediate_operand" "")))]
3062 "ix86_fixup_binary_operands_no_copy (AND, <MODE>mode, operands);")
3064 (define_insn "*and<mode>3"
3065 [(set (match_operand:SSEMODEI 0 "register_operand" "=x")
3067 (match_operand:SSEMODEI 1 "nonimmediate_operand" "%0")
3068 (match_operand:SSEMODEI 2 "nonimmediate_operand" "xm")))]
3069 "TARGET_SSE2 && ix86_binary_operator_ok (AND, <MODE>mode, operands)"
3070 "pand\t{%2, %0|%0, %2}"
3071 [(set_attr "type" "sselog")
3072 (set_attr "mode" "TI")])
3074 (define_insn "sse2_nand<mode>3"
3075 [(set (match_operand:SSEMODEI 0 "register_operand" "=x")
3077 (not:SSEMODEI (match_operand:SSEMODEI 1 "register_operand" "0"))
3078 (match_operand:SSEMODEI 2 "nonimmediate_operand" "xm")))]
3080 "pandn\t{%2, %0|%0, %2}"
3081 [(set_attr "type" "sselog")
3082 (set_attr "mode" "TI")])
3084 (define_expand "ior<mode>3"
3085 [(set (match_operand:SSEMODEI 0 "register_operand" "")
3086 (ior:SSEMODEI (match_operand:SSEMODEI 1 "nonimmediate_operand" "")
3087 (match_operand:SSEMODEI 2 "nonimmediate_operand" "")))]
3089 "ix86_fixup_binary_operands_no_copy (IOR, <MODE>mode, operands);")
3091 (define_insn "*ior<mode>3"
3092 [(set (match_operand:SSEMODEI 0 "register_operand" "=x")
3094 (match_operand:SSEMODEI 1 "nonimmediate_operand" "%0")
3095 (match_operand:SSEMODEI 2 "nonimmediate_operand" "xm")))]
3096 "TARGET_SSE2 && ix86_binary_operator_ok (IOR, <MODE>mode, operands)"
3097 "por\t{%2, %0|%0, %2}"
3098 [(set_attr "type" "sselog")
3099 (set_attr "mode" "TI")])
3101 (define_expand "xor<mode>3"
3102 [(set (match_operand:SSEMODEI 0 "register_operand" "")
3103 (xor:SSEMODEI (match_operand:SSEMODEI 1 "nonimmediate_operand" "")
3104 (match_operand:SSEMODEI 2 "nonimmediate_operand" "")))]
3106 "ix86_fixup_binary_operands_no_copy (XOR, <MODE>mode, operands);")
3108 (define_insn "*xor<mode>3"
3109 [(set (match_operand:SSEMODEI 0 "register_operand" "=x")
3111 (match_operand:SSEMODEI 1 "nonimmediate_operand" "%0")
3112 (match_operand:SSEMODEI 2 "nonimmediate_operand" "xm")))]
3113 "TARGET_SSE2 && ix86_binary_operator_ok (XOR, <MODE>mode, operands)"
3114 "pxor\t{%2, %0|%0, %2}"
3115 [(set_attr "type" "sselog")
3116 (set_attr "mode" "TI")])
3118 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
3120 ;; Parallel integral element swizzling
3122 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
3124 (define_insn "sse2_packsswb"
3125 [(set (match_operand:V16QI 0 "register_operand" "=x")
3128 (match_operand:V8HI 1 "register_operand" "0"))
3130 (match_operand:V8HI 2 "nonimmediate_operand" "xm"))))]
3132 "packsswb\t{%2, %0|%0, %2}"
3133 [(set_attr "type" "sselog")
3134 (set_attr "mode" "TI")])
3136 (define_insn "sse2_packssdw"
3137 [(set (match_operand:V8HI 0 "register_operand" "=x")
3140 (match_operand:V4SI 1 "register_operand" "0"))
3142 (match_operand:V4SI 2 "nonimmediate_operand" "xm"))))]
3144 "packssdw\t{%2, %0|%0, %2}"
3145 [(set_attr "type" "sselog")
3146 (set_attr "mode" "TI")])
3148 (define_insn "sse2_packuswb"
3149 [(set (match_operand:V16QI 0 "register_operand" "=x")
3152 (match_operand:V8HI 1 "register_operand" "0"))
3154 (match_operand:V8HI 2 "nonimmediate_operand" "xm"))))]
3156 "packuswb\t{%2, %0|%0, %2}"
3157 [(set_attr "type" "sselog")
3158 (set_attr "mode" "TI")])
3160 (define_insn "sse2_punpckhbw"
3161 [(set (match_operand:V16QI 0 "register_operand" "=x")
3164 (match_operand:V16QI 1 "register_operand" "0")
3165 (match_operand:V16QI 2 "nonimmediate_operand" "xm"))
3166 (parallel [(const_int 8) (const_int 24)
3167 (const_int 9) (const_int 25)
3168 (const_int 10) (const_int 26)
3169 (const_int 11) (const_int 27)
3170 (const_int 12) (const_int 28)
3171 (const_int 13) (const_int 29)
3172 (const_int 14) (const_int 30)
3173 (const_int 15) (const_int 31)])))]
3175 "punpckhbw\t{%2, %0|%0, %2}"
3176 [(set_attr "type" "sselog")
3177 (set_attr "mode" "TI")])
3179 (define_insn "sse2_punpcklbw"
3180 [(set (match_operand:V16QI 0 "register_operand" "=x")
3183 (match_operand:V16QI 1 "register_operand" "0")
3184 (match_operand:V16QI 2 "nonimmediate_operand" "xm"))
3185 (parallel [(const_int 0) (const_int 16)
3186 (const_int 1) (const_int 17)
3187 (const_int 2) (const_int 18)
3188 (const_int 3) (const_int 19)
3189 (const_int 4) (const_int 20)
3190 (const_int 5) (const_int 21)
3191 (const_int 6) (const_int 22)
3192 (const_int 7) (const_int 23)])))]
3194 "punpcklbw\t{%2, %0|%0, %2}"
3195 [(set_attr "type" "sselog")
3196 (set_attr "mode" "TI")])
3198 (define_insn "sse2_punpckhwd"
3199 [(set (match_operand:V8HI 0 "register_operand" "=x")
3202 (match_operand:V8HI 1 "register_operand" "0")
3203 (match_operand:V8HI 2 "nonimmediate_operand" "xm"))
3204 (parallel [(const_int 4) (const_int 12)
3205 (const_int 5) (const_int 13)
3206 (const_int 6) (const_int 14)
3207 (const_int 7) (const_int 15)])))]
3209 "punpckhwd\t{%2, %0|%0, %2}"
3210 [(set_attr "type" "sselog")
3211 (set_attr "mode" "TI")])
3213 (define_insn "sse2_punpcklwd"
3214 [(set (match_operand:V8HI 0 "register_operand" "=x")
3217 (match_operand:V8HI 1 "register_operand" "0")
3218 (match_operand:V8HI 2 "nonimmediate_operand" "xm"))
3219 (parallel [(const_int 0) (const_int 8)
3220 (const_int 1) (const_int 9)
3221 (const_int 2) (const_int 10)
3222 (const_int 3) (const_int 11)])))]
3224 "punpcklwd\t{%2, %0|%0, %2}"
3225 [(set_attr "type" "sselog")
3226 (set_attr "mode" "TI")])
3228 (define_insn "sse2_punpckhdq"
3229 [(set (match_operand:V4SI 0 "register_operand" "=x")
3232 (match_operand:V4SI 1 "register_operand" "0")
3233 (match_operand:V4SI 2 "nonimmediate_operand" "xm"))
3234 (parallel [(const_int 2) (const_int 6)
3235 (const_int 3) (const_int 7)])))]
3237 "punpckhdq\t{%2, %0|%0, %2}"
3238 [(set_attr "type" "sselog")
3239 (set_attr "mode" "TI")])
3241 (define_insn "sse2_punpckldq"
3242 [(set (match_operand:V4SI 0 "register_operand" "=x")
3245 (match_operand:V4SI 1 "register_operand" "0")
3246 (match_operand:V4SI 2 "nonimmediate_operand" "xm"))
3247 (parallel [(const_int 0) (const_int 4)
3248 (const_int 1) (const_int 5)])))]
3250 "punpckldq\t{%2, %0|%0, %2}"
3251 [(set_attr "type" "sselog")
3252 (set_attr "mode" "TI")])
3254 (define_insn "sse2_punpckhqdq"
3255 [(set (match_operand:V2DI 0 "register_operand" "=x")
3258 (match_operand:V2DI 1 "register_operand" "0")
3259 (match_operand:V2DI 2 "nonimmediate_operand" "xm"))
3260 (parallel [(const_int 1)
3263 "punpckhqdq\t{%2, %0|%0, %2}"
3264 [(set_attr "type" "sselog")
3265 (set_attr "mode" "TI")])
3267 (define_insn "sse2_punpcklqdq"
3268 [(set (match_operand:V2DI 0 "register_operand" "=x")
3271 (match_operand:V2DI 1 "register_operand" "0")
3272 (match_operand:V2DI 2 "nonimmediate_operand" "xm"))
3273 (parallel [(const_int 0)
3276 "punpcklqdq\t{%2, %0|%0, %2}"
3277 [(set_attr "type" "sselog")
3278 (set_attr "mode" "TI")])
3280 (define_expand "sse2_pinsrw"
3281 [(set (match_operand:V8HI 0 "register_operand" "")
3284 (match_operand:SI 2 "nonimmediate_operand" ""))
3285 (match_operand:V8HI 1 "register_operand" "")
3286 (match_operand:SI 3 "const_0_to_7_operand" "")))]
3289 operands[2] = gen_lowpart (HImode, operands[2]);
3290 operands[3] = GEN_INT ((1 << INTVAL (operands[3])));
3293 (define_insn "*sse2_pinsrw"
3294 [(set (match_operand:V8HI 0 "register_operand" "=x")
3297 (match_operand:HI 2 "nonimmediate_operand" "rm"))
3298 (match_operand:V8HI 1 "register_operand" "0")
3299 (match_operand:SI 3 "const_pow2_1_to_128_operand" "n")))]
3302 operands[3] = GEN_INT (exact_log2 (INTVAL (operands[3])));
3303 return "pinsrw\t{%3, %k2, %0|%0, %k2, %3}";
3305 [(set_attr "type" "sselog")
3306 (set_attr "mode" "TI")])
3308 (define_insn "sse2_pextrw"
3309 [(set (match_operand:SI 0 "register_operand" "=r")
3312 (match_operand:V8HI 1 "register_operand" "x")
3313 (parallel [(match_operand:SI 2 "const_0_to_7_operand" "n")]))))]
3315 "pextrw\t{%2, %1, %0|%0, %1, %2}"
3316 [(set_attr "type" "sselog")
3317 (set_attr "mode" "TI")])
3319 (define_expand "sse2_pshufd"
3320 [(match_operand:V4SI 0 "register_operand" "")
3321 (match_operand:V4SI 1 "nonimmediate_operand" "")
3322 (match_operand:SI 2 "const_int_operand" "")]
3325 int mask = INTVAL (operands[2]);
3326 emit_insn (gen_sse2_pshufd_1 (operands[0], operands[1],
3327 GEN_INT ((mask >> 0) & 3),
3328 GEN_INT ((mask >> 2) & 3),
3329 GEN_INT ((mask >> 4) & 3),
3330 GEN_INT ((mask >> 6) & 3)));
3334 (define_insn "sse2_pshufd_1"
3335 [(set (match_operand:V4SI 0 "register_operand" "=x")
3337 (match_operand:V4SI 1 "nonimmediate_operand" "xm")
3338 (parallel [(match_operand 2 "const_0_to_3_operand" "")
3339 (match_operand 3 "const_0_to_3_operand" "")
3340 (match_operand 4 "const_0_to_3_operand" "")
3341 (match_operand 5 "const_0_to_3_operand" "")])))]
3345 mask |= INTVAL (operands[2]) << 0;
3346 mask |= INTVAL (operands[3]) << 2;
3347 mask |= INTVAL (operands[4]) << 4;
3348 mask |= INTVAL (operands[5]) << 6;
3349 operands[2] = GEN_INT (mask);
3351 return "pshufd\t{%2, %1, %0|%0, %1, %2}";
3353 [(set_attr "type" "sselog1")
3354 (set_attr "mode" "TI")])
3356 (define_expand "sse2_pshuflw"
3357 [(match_operand:V8HI 0 "register_operand" "")
3358 (match_operand:V8HI 1 "nonimmediate_operand" "")
3359 (match_operand:SI 2 "const_int_operand" "")]
3362 int mask = INTVAL (operands[2]);
3363 emit_insn (gen_sse2_pshuflw_1 (operands[0], operands[1],
3364 GEN_INT ((mask >> 0) & 3),
3365 GEN_INT ((mask >> 2) & 3),
3366 GEN_INT ((mask >> 4) & 3),
3367 GEN_INT ((mask >> 6) & 3)));
3371 (define_insn "sse2_pshuflw_1"
3372 [(set (match_operand:V8HI 0 "register_operand" "=x")
3374 (match_operand:V8HI 1 "nonimmediate_operand" "xm")
3375 (parallel [(match_operand 2 "const_0_to_3_operand" "")
3376 (match_operand 3 "const_0_to_3_operand" "")
3377 (match_operand 4 "const_0_to_3_operand" "")
3378 (match_operand 5 "const_0_to_3_operand" "")
3386 mask |= INTVAL (operands[2]) << 0;
3387 mask |= INTVAL (operands[3]) << 2;
3388 mask |= INTVAL (operands[4]) << 4;
3389 mask |= INTVAL (operands[5]) << 6;
3390 operands[2] = GEN_INT (mask);
3392 return "pshuflw\t{%2, %1, %0|%0, %1, %2}";
3394 [(set_attr "type" "sselog")
3395 (set_attr "mode" "TI")])
3397 (define_expand "sse2_pshufhw"
3398 [(match_operand:V8HI 0 "register_operand" "")
3399 (match_operand:V8HI 1 "nonimmediate_operand" "")
3400 (match_operand:SI 2 "const_int_operand" "")]
3403 int mask = INTVAL (operands[2]);
3404 emit_insn (gen_sse2_pshufhw_1 (operands[0], operands[1],
3405 GEN_INT (((mask >> 0) & 3) + 4),
3406 GEN_INT (((mask >> 2) & 3) + 4),
3407 GEN_INT (((mask >> 4) & 3) + 4),
3408 GEN_INT (((mask >> 6) & 3) + 4)));
3412 (define_insn "sse2_pshufhw_1"
3413 [(set (match_operand:V8HI 0 "register_operand" "=x")
3415 (match_operand:V8HI 1 "nonimmediate_operand" "xm")
3416 (parallel [(const_int 0)
3420 (match_operand 2 "const_4_to_7_operand" "")
3421 (match_operand 3 "const_4_to_7_operand" "")
3422 (match_operand 4 "const_4_to_7_operand" "")
3423 (match_operand 5 "const_4_to_7_operand" "")])))]
3427 mask |= (INTVAL (operands[2]) - 4) << 0;
3428 mask |= (INTVAL (operands[3]) - 4) << 2;
3429 mask |= (INTVAL (operands[4]) - 4) << 4;
3430 mask |= (INTVAL (operands[5]) - 4) << 6;
3431 operands[2] = GEN_INT (mask);
3433 return "pshufhw\t{%2, %1, %0|%0, %1, %2}";
3435 [(set_attr "type" "sselog")
3436 (set_attr "mode" "TI")])
3438 (define_expand "sse2_loadd"
3439 [(set (match_operand:V4SI 0 "register_operand" "")
3442 (match_operand:SI 1 "nonimmediate_operand" ""))
3446 "operands[2] = CONST0_RTX (V4SImode);")
3448 (define_insn "sse2_loadld"
3449 [(set (match_operand:V4SI 0 "register_operand" "=Y,x,x")
3452 (match_operand:SI 2 "nonimmediate_operand" "mr,m,x"))
3453 (match_operand:V4SI 1 "reg_or_0_operand" " C,C,0")
3457 movd\t{%2, %0|%0, %2}
3458 movss\t{%2, %0|%0, %2}
3459 movss\t{%2, %0|%0, %2}"
3460 [(set_attr "type" "ssemov")
3461 (set_attr "mode" "TI,V4SF,SF")])
3463 ;; ??? The hardware supports more, but TARGET_INTER_UNIT_MOVES must
3464 ;; be taken into account, and movdi isn't fully populated even without.
3465 (define_insn_and_split "sse2_stored"
3466 [(set (match_operand:SI 0 "nonimmediate_operand" "=mx")
3468 (match_operand:V4SI 1 "register_operand" "x")
3469 (parallel [(const_int 0)])))]
3472 "&& reload_completed"
3473 [(set (match_dup 0) (match_dup 1))]
3475 operands[1] = gen_rtx_REG (SImode, REGNO (operands[1]));
3478 (define_expand "sse_storeq"
3479 [(set (match_operand:DI 0 "nonimmediate_operand" "")
3481 (match_operand:V2DI 1 "register_operand" "")
3482 (parallel [(const_int 0)])))]
3486 ;; ??? The hardware supports more, but TARGET_INTER_UNIT_MOVES must
3487 ;; be taken into account, and movdi isn't fully populated even without.
3488 (define_insn "*sse2_storeq"
3489 [(set (match_operand:DI 0 "nonimmediate_operand" "=mx")
3491 (match_operand:V2DI 1 "register_operand" "x")
3492 (parallel [(const_int 0)])))]
3497 [(set (match_operand:DI 0 "nonimmediate_operand" "")
3499 (match_operand:V2DI 1 "register_operand" "")
3500 (parallel [(const_int 0)])))]
3501 "TARGET_SSE && reload_completed"
3502 [(set (match_dup 0) (match_dup 1))]
3504 operands[1] = gen_rtx_REG (DImode, REGNO (operands[1]));
3507 (define_insn "*vec_extractv2di_1_sse2"
3508 [(set (match_operand:DI 0 "nonimmediate_operand" "=m,x,x")
3510 (match_operand:V2DI 1 "nonimmediate_operand" "x,0,o")
3511 (parallel [(const_int 1)])))]
3512 "TARGET_SSE2 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
3514 movhps\t{%1, %0|%0, %1}
3515 psrldq\t{$8, %0|%0, 8}
3516 movq\t{%H1, %0|%0, %H1}"
3517 [(set_attr "type" "ssemov,sseishft,ssemov")
3518 (set_attr "memory" "*,none,*")
3519 (set_attr "mode" "V2SF,TI,TI")])
3521 ;; Not sure this is ever used, but it doesn't hurt to have it. -aoliva
3522 (define_insn "*vec_extractv2di_1_sse"
3523 [(set (match_operand:DI 0 "nonimmediate_operand" "=m,x,x")
3525 (match_operand:V2DI 1 "nonimmediate_operand" "x,x,o")
3526 (parallel [(const_int 1)])))]
3527 "!TARGET_SSE2 && TARGET_SSE
3528 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
3530 movhps\t{%1, %0|%0, %1}
3531 movhlps\t{%1, %0|%0, %1}
3532 movlps\t{%H1, %0|%0, %H1}"
3533 [(set_attr "type" "ssemov")
3534 (set_attr "mode" "V2SF,V4SF,V2SF")])
3536 (define_insn "*vec_dupv4si"
3537 [(set (match_operand:V4SI 0 "register_operand" "=Y,x")
3539 (match_operand:SI 1 "register_operand" " Y,0")))]
3542 pshufd\t{$0, %1, %0|%0, %1, 0}
3543 shufps\t{$0, %0, %0|%0, %0, 0}"
3544 [(set_attr "type" "sselog1")
3545 (set_attr "mode" "TI,V4SF")])
3547 (define_insn "*vec_dupv2di"
3548 [(set (match_operand:V2DI 0 "register_operand" "=Y,x")
3550 (match_operand:DI 1 "register_operand" " 0,0")))]
3555 [(set_attr "type" "sselog1,ssemov")
3556 (set_attr "mode" "TI,V4SF")])
3558 ;; ??? In theory we can match memory for the MMX alternative, but allowing
3559 ;; nonimmediate_operand for operand 2 and *not* allowing memory for the SSE
3560 ;; alternatives pretty much forces the MMX alternative to be chosen.
3561 (define_insn "*sse2_concatv2si"
3562 [(set (match_operand:V2SI 0 "register_operand" "=Y, Y,*y,*y")
3564 (match_operand:SI 1 "nonimmediate_operand" " 0,rm, 0,rm")
3565 (match_operand:SI 2 "reg_or_0_operand" " Y, C,*y, C")))]
3568 punpckldq\t{%2, %0|%0, %2}
3569 movd\t{%1, %0|%0, %1}
3570 punpckldq\t{%2, %0|%0, %2}
3571 movd\t{%1, %0|%0, %1}"
3572 [(set_attr "type" "sselog,ssemov,mmxcvt,mmxmov")
3573 (set_attr "mode" "TI,TI,DI,DI")])
3575 (define_insn "*sse1_concatv2si"
3576 [(set (match_operand:V2SI 0 "register_operand" "=x,x,*y,*y")
3578 (match_operand:SI 1 "nonimmediate_operand" " 0,m, 0,*rm")
3579 (match_operand:SI 2 "reg_or_0_operand" " x,C,*y,C")))]
3582 unpcklps\t{%2, %0|%0, %2}
3583 movss\t{%1, %0|%0, %1}
3584 punpckldq\t{%2, %0|%0, %2}
3585 movd\t{%1, %0|%0, %1}"
3586 [(set_attr "type" "sselog,ssemov,mmxcvt,mmxmov")
3587 (set_attr "mode" "V4SF,V4SF,DI,DI")])
3589 (define_insn "*vec_concatv4si_1"
3590 [(set (match_operand:V4SI 0 "register_operand" "=Y,x,x")
3592 (match_operand:V2SI 1 "register_operand" " 0,0,0")
3593 (match_operand:V2SI 2 "nonimmediate_operand" " Y,x,m")))]
3596 punpcklqdq\t{%2, %0|%0, %2}
3597 movlhps\t{%2, %0|%0, %2}
3598 movhps\t{%2, %0|%0, %2}"
3599 [(set_attr "type" "sselog,ssemov,ssemov")
3600 (set_attr "mode" "TI,V4SF,V2SF")])
3602 (define_insn "*vec_concatv2di"
3603 [(set (match_operand:V2DI 0 "register_operand" "=Y,?Y,Y,x,x,x")
3605 (match_operand:DI 1 "nonimmediate_operand" " m,*y,0,0,0,m")
3606 (match_operand:DI 2 "vector_move_operand" " C, C,Y,x,m,0")))]
3609 movq\t{%1, %0|%0, %1}
3610 movq2dq\t{%1, %0|%0, %1}
3611 punpcklqdq\t{%2, %0|%0, %2}
3612 movlhps\t{%2, %0|%0, %2}
3613 movhps\t{%2, %0|%0, %2}
3614 movlps\t{%1, %0|%0, %1}"
3615 [(set_attr "type" "ssemov,ssemov,sselog,ssemov,ssemov,ssemov")
3616 (set_attr "mode" "TI,TI,TI,V4SF,V2SF,V2SF")])
3618 (define_expand "vec_setv2di"
3619 [(match_operand:V2DI 0 "register_operand" "")
3620 (match_operand:DI 1 "register_operand" "")
3621 (match_operand 2 "const_int_operand" "")]
3624 ix86_expand_vector_set (false, operands[0], operands[1],
3625 INTVAL (operands[2]));
3629 (define_expand "vec_extractv2di"
3630 [(match_operand:DI 0 "register_operand" "")
3631 (match_operand:V2DI 1 "register_operand" "")
3632 (match_operand 2 "const_int_operand" "")]
3635 ix86_expand_vector_extract (false, operands[0], operands[1],
3636 INTVAL (operands[2]));
3640 (define_expand "vec_initv2di"
3641 [(match_operand:V2DI 0 "register_operand" "")
3642 (match_operand 1 "" "")]
3645 ix86_expand_vector_init (false, operands[0], operands[1]);
3649 (define_expand "vec_setv4si"
3650 [(match_operand:V4SI 0 "register_operand" "")
3651 (match_operand:SI 1 "register_operand" "")
3652 (match_operand 2 "const_int_operand" "")]
3655 ix86_expand_vector_set (false, operands[0], operands[1],
3656 INTVAL (operands[2]));
3660 (define_expand "vec_extractv4si"
3661 [(match_operand:SI 0 "register_operand" "")
3662 (match_operand:V4SI 1 "register_operand" "")
3663 (match_operand 2 "const_int_operand" "")]
3666 ix86_expand_vector_extract (false, operands[0], operands[1],
3667 INTVAL (operands[2]));
3671 (define_expand "vec_initv4si"
3672 [(match_operand:V4SI 0 "register_operand" "")
3673 (match_operand 1 "" "")]
3676 ix86_expand_vector_init (false, operands[0], operands[1]);
3680 (define_expand "vec_setv8hi"
3681 [(match_operand:V8HI 0 "register_operand" "")
3682 (match_operand:HI 1 "register_operand" "")
3683 (match_operand 2 "const_int_operand" "")]
3686 ix86_expand_vector_set (false, operands[0], operands[1],
3687 INTVAL (operands[2]));
3691 (define_expand "vec_extractv8hi"
3692 [(match_operand:HI 0 "register_operand" "")
3693 (match_operand:V8HI 1 "register_operand" "")
3694 (match_operand 2 "const_int_operand" "")]
3697 ix86_expand_vector_extract (false, operands[0], operands[1],
3698 INTVAL (operands[2]));
3702 (define_expand "vec_initv8hi"
3703 [(match_operand:V8HI 0 "register_operand" "")
3704 (match_operand 1 "" "")]
3707 ix86_expand_vector_init (false, operands[0], operands[1]);
3711 (define_expand "vec_setv16qi"
3712 [(match_operand:V16QI 0 "register_operand" "")
3713 (match_operand:QI 1 "register_operand" "")
3714 (match_operand 2 "const_int_operand" "")]
3717 ix86_expand_vector_set (false, operands[0], operands[1],
3718 INTVAL (operands[2]));
3722 (define_expand "vec_extractv16qi"
3723 [(match_operand:QI 0 "register_operand" "")
3724 (match_operand:V16QI 1 "register_operand" "")
3725 (match_operand 2 "const_int_operand" "")]
3728 ix86_expand_vector_extract (false, operands[0], operands[1],
3729 INTVAL (operands[2]));
3733 (define_expand "vec_initv16qi"
3734 [(match_operand:V16QI 0 "register_operand" "")
3735 (match_operand 1 "" "")]
3738 ix86_expand_vector_init (false, operands[0], operands[1]);
3742 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
3746 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
3748 (define_insn "sse2_uavgv16qi3"
3749 [(set (match_operand:V16QI 0 "register_operand" "=x")
3755 (match_operand:V16QI 1 "nonimmediate_operand" "%0"))
3757 (match_operand:V16QI 2 "nonimmediate_operand" "xm")))
3758 (const_vector:V16QI [(const_int 1) (const_int 1)
3759 (const_int 1) (const_int 1)
3760 (const_int 1) (const_int 1)
3761 (const_int 1) (const_int 1)
3762 (const_int 1) (const_int 1)
3763 (const_int 1) (const_int 1)
3764 (const_int 1) (const_int 1)
3765 (const_int 1) (const_int 1)]))
3767 "TARGET_SSE2 && ix86_binary_operator_ok (PLUS, V16QImode, operands)"
3768 "pavgb\t{%2, %0|%0, %2}"
3769 [(set_attr "type" "sseiadd")
3770 (set_attr "mode" "TI")])
3772 (define_insn "sse2_uavgv8hi3"
3773 [(set (match_operand:V8HI 0 "register_operand" "=x")
3779 (match_operand:V8HI 1 "nonimmediate_operand" "%0"))
3781 (match_operand:V8HI 2 "nonimmediate_operand" "xm")))
3782 (const_vector:V8HI [(const_int 1) (const_int 1)
3783 (const_int 1) (const_int 1)
3784 (const_int 1) (const_int 1)
3785 (const_int 1) (const_int 1)]))
3787 "TARGET_SSE2 && ix86_binary_operator_ok (PLUS, V8HImode, operands)"
3788 "pavgw\t{%2, %0|%0, %2}"
3789 [(set_attr "type" "sseiadd")
3790 (set_attr "mode" "TI")])
3792 ;; The correct representation for this is absolutely enormous, and
3793 ;; surely not generally useful.
3794 (define_insn "sse2_psadbw"
3795 [(set (match_operand:V2DI 0 "register_operand" "=x")
3796 (unspec:V2DI [(match_operand:V16QI 1 "register_operand" "0")
3797 (match_operand:V16QI 2 "nonimmediate_operand" "xm")]
3800 "psadbw\t{%2, %0|%0, %2}"
3801 [(set_attr "type" "sseiadd")
3802 (set_attr "mode" "TI")])
3804 (define_insn "sse_movmskps"
3805 [(set (match_operand:SI 0 "register_operand" "=r")
3806 (unspec:SI [(match_operand:V4SF 1 "register_operand" "x")]
3809 "movmskps\t{%1, %0|%0, %1}"
3810 [(set_attr "type" "ssecvt")
3811 (set_attr "mode" "V4SF")])
3813 (define_insn "sse2_movmskpd"
3814 [(set (match_operand:SI 0 "register_operand" "=r")
3815 (unspec:SI [(match_operand:V2DF 1 "register_operand" "x")]
3818 "movmskpd\t{%1, %0|%0, %1}"
3819 [(set_attr "type" "ssecvt")
3820 (set_attr "mode" "V2DF")])
3822 (define_insn "sse2_pmovmskb"
3823 [(set (match_operand:SI 0 "register_operand" "=r")
3824 (unspec:SI [(match_operand:V16QI 1 "register_operand" "x")]
3827 "pmovmskb\t{%1, %0|%0, %1}"
3828 [(set_attr "type" "ssecvt")
3829 (set_attr "mode" "V2DF")])
3831 (define_expand "sse2_maskmovdqu"
3832 [(set (match_operand:V16QI 0 "memory_operand" "")
3833 (unspec:V16QI [(match_operand:V16QI 1 "register_operand" "x")
3834 (match_operand:V16QI 2 "register_operand" "x")
3840 (define_insn "*sse2_maskmovdqu"
3841 [(set (mem:V16QI (match_operand:SI 0 "register_operand" "D"))
3842 (unspec:V16QI [(match_operand:V16QI 1 "register_operand" "x")
3843 (match_operand:V16QI 2 "register_operand" "x")
3844 (mem:V16QI (match_dup 0))]
3846 "TARGET_SSE2 && !TARGET_64BIT"
3847 ;; @@@ check ordering of operands in intel/nonintel syntax
3848 "maskmovdqu\t{%2, %1|%1, %2}"
3849 [(set_attr "type" "ssecvt")
3850 (set_attr "mode" "TI")])
3852 (define_insn "*sse2_maskmovdqu_rex64"
3853 [(set (mem:V16QI (match_operand:DI 0 "register_operand" "D"))
3854 (unspec:V16QI [(match_operand:V16QI 1 "register_operand" "x")
3855 (match_operand:V16QI 2 "register_operand" "x")
3856 (mem:V16QI (match_dup 0))]
3858 "TARGET_SSE2 && TARGET_64BIT"
3859 ;; @@@ check ordering of operands in intel/nonintel syntax
3860 "maskmovdqu\t{%2, %1|%1, %2}"
3861 [(set_attr "type" "ssecvt")
3862 (set_attr "mode" "TI")])
3864 (define_insn "sse_ldmxcsr"
3865 [(unspec_volatile [(match_operand:SI 0 "memory_operand" "m")]
3869 [(set_attr "type" "sse")
3870 (set_attr "memory" "load")])
3872 (define_insn "sse_stmxcsr"
3873 [(set (match_operand:SI 0 "memory_operand" "=m")
3874 (unspec_volatile:SI [(const_int 0)] UNSPECV_STMXCSR))]
3877 [(set_attr "type" "sse")
3878 (set_attr "memory" "store")])
3880 (define_expand "sse_sfence"
3882 (unspec:BLK [(match_dup 0)] UNSPEC_SFENCE))]
3883 "TARGET_SSE || TARGET_3DNOW_A"
3885 operands[0] = gen_rtx_MEM (BLKmode, gen_rtx_SCRATCH (Pmode));
3886 MEM_VOLATILE_P (operands[0]) = 1;
3889 (define_insn "*sse_sfence"
3890 [(set (match_operand:BLK 0 "" "")
3891 (unspec:BLK [(match_dup 0)] UNSPEC_SFENCE))]
3892 "TARGET_SSE || TARGET_3DNOW_A"
3894 [(set_attr "type" "sse")
3895 (set_attr "memory" "unknown")])
3897 (define_insn "sse2_clflush"
3898 [(unspec_volatile [(match_operand 0 "address_operand" "p")]
3902 [(set_attr "type" "sse")
3903 (set_attr "memory" "unknown")])
3905 (define_expand "sse2_mfence"
3907 (unspec:BLK [(match_dup 0)] UNSPEC_MFENCE))]
3910 operands[0] = gen_rtx_MEM (BLKmode, gen_rtx_SCRATCH (Pmode));
3911 MEM_VOLATILE_P (operands[0]) = 1;
3914 (define_insn "*sse2_mfence"
3915 [(set (match_operand:BLK 0 "" "")
3916 (unspec:BLK [(match_dup 0)] UNSPEC_MFENCE))]
3919 [(set_attr "type" "sse")
3920 (set_attr "memory" "unknown")])
3922 (define_expand "sse2_lfence"
3924 (unspec:BLK [(match_dup 0)] UNSPEC_LFENCE))]
3927 operands[0] = gen_rtx_MEM (BLKmode, gen_rtx_SCRATCH (Pmode));
3928 MEM_VOLATILE_P (operands[0]) = 1;
3931 (define_insn "*sse2_lfence"
3932 [(set (match_operand:BLK 0 "" "")
3933 (unspec:BLK [(match_dup 0)] UNSPEC_LFENCE))]
3936 [(set_attr "type" "sse")
3937 (set_attr "memory" "unknown")])
3939 (define_insn "sse3_mwait"
3940 [(unspec_volatile [(match_operand:SI 0 "register_operand" "a")
3941 (match_operand:SI 1 "register_operand" "c")]
3944 ;; 64bit version is "mwait %rax,%rcx". But only lower 32bits are used.
3945 ;; Since 32bit register operands are implicitly zero extended to 64bit,
3946 ;; we only need to set up 32bit registers.
3948 [(set_attr "length" "3")])
3950 (define_insn "sse3_monitor"
3951 [(unspec_volatile [(match_operand:SI 0 "register_operand" "a")
3952 (match_operand:SI 1 "register_operand" "c")
3953 (match_operand:SI 2 "register_operand" "d")]
3955 "TARGET_SSE3 && !TARGET_64BIT"
3956 "monitor\t%0, %1, %2"
3957 [(set_attr "length" "3")])
3959 (define_insn "sse3_monitor64"
3960 [(unspec_volatile [(match_operand:DI 0 "register_operand" "a")
3961 (match_operand:SI 1 "register_operand" "c")
3962 (match_operand:SI 2 "register_operand" "d")]
3964 "TARGET_SSE3 && TARGET_64BIT"
3965 ;; 64bit version is "monitor %rax,%rcx,%rdx". But only lower 32bits in
3966 ;; RCX and RDX are used. Since 32bit register operands are implicitly
3967 ;; zero extended to 64bit, we only need to set up 32bit registers.
3969 [(set_attr "length" "3")])
3972 (define_insn "ssse3_phaddwv8hi3"
3973 [(set (match_operand:V8HI 0 "register_operand" "=x")
3979 (match_operand:V8HI 1 "register_operand" "0")
3980 (parallel [(const_int 0)]))
3981 (vec_select:HI (match_dup 1) (parallel [(const_int 1)])))
3983 (vec_select:HI (match_dup 1) (parallel [(const_int 2)]))
3984 (vec_select:HI (match_dup 1) (parallel [(const_int 3)]))))
3987 (vec_select:HI (match_dup 1) (parallel [(const_int 4)]))
3988 (vec_select:HI (match_dup 1) (parallel [(const_int 5)])))
3990 (vec_select:HI (match_dup 1) (parallel [(const_int 6)]))
3991 (vec_select:HI (match_dup 1) (parallel [(const_int 7)])))))
3996 (match_operand:V8HI 2 "nonimmediate_operand" "xm")
3997 (parallel [(const_int 0)]))
3998 (vec_select:HI (match_dup 2) (parallel [(const_int 1)])))
4000 (vec_select:HI (match_dup 2) (parallel [(const_int 2)]))
4001 (vec_select:HI (match_dup 2) (parallel [(const_int 3)]))))
4004 (vec_select:HI (match_dup 2) (parallel [(const_int 4)]))
4005 (vec_select:HI (match_dup 2) (parallel [(const_int 5)])))
4007 (vec_select:HI (match_dup 2) (parallel [(const_int 6)]))
4008 (vec_select:HI (match_dup 2) (parallel [(const_int 7)])))))))]
4010 "phaddw\t{%2, %0|%0, %2}"
4011 [(set_attr "type" "sseiadd")
4012 (set_attr "mode" "TI")])
4014 (define_insn "ssse3_phaddwv4hi3"
4015 [(set (match_operand:V4HI 0 "register_operand" "=y")
4020 (match_operand:V4HI 1 "register_operand" "0")
4021 (parallel [(const_int 0)]))
4022 (vec_select:HI (match_dup 1) (parallel [(const_int 1)])))
4024 (vec_select:HI (match_dup 1) (parallel [(const_int 2)]))
4025 (vec_select:HI (match_dup 1) (parallel [(const_int 3)]))))
4029 (match_operand:V4HI 2 "nonimmediate_operand" "ym")
4030 (parallel [(const_int 0)]))
4031 (vec_select:HI (match_dup 2) (parallel [(const_int 1)])))
4033 (vec_select:HI (match_dup 2) (parallel [(const_int 2)]))
4034 (vec_select:HI (match_dup 2) (parallel [(const_int 3)]))))))]
4036 "phaddw\t{%2, %0|%0, %2}"
4037 [(set_attr "type" "sseiadd")
4038 (set_attr "mode" "DI")])
4040 (define_insn "ssse3_phadddv4si3"
4041 [(set (match_operand:V4SI 0 "register_operand" "=x")
4046 (match_operand:V4SI 1 "register_operand" "0")
4047 (parallel [(const_int 0)]))
4048 (vec_select:SI (match_dup 1) (parallel [(const_int 1)])))
4050 (vec_select:SI (match_dup 1) (parallel [(const_int 2)]))
4051 (vec_select:SI (match_dup 1) (parallel [(const_int 3)]))))
4055 (match_operand:V4SI 2 "nonimmediate_operand" "xm")
4056 (parallel [(const_int 0)]))
4057 (vec_select:SI (match_dup 2) (parallel [(const_int 1)])))
4059 (vec_select:SI (match_dup 2) (parallel [(const_int 2)]))
4060 (vec_select:SI (match_dup 2) (parallel [(const_int 3)]))))))]
4062 "phaddd\t{%2, %0|%0, %2}"
4063 [(set_attr "type" "sseiadd")
4064 (set_attr "mode" "TI")])
4066 (define_insn "ssse3_phadddv2si3"
4067 [(set (match_operand:V2SI 0 "register_operand" "=y")
4071 (match_operand:V2SI 1 "register_operand" "0")
4072 (parallel [(const_int 0)]))
4073 (vec_select:SI (match_dup 1) (parallel [(const_int 1)])))
4076 (match_operand:V2SI 2 "nonimmediate_operand" "ym")
4077 (parallel [(const_int 0)]))
4078 (vec_select:SI (match_dup 2) (parallel [(const_int 1)])))))]
4080 "phaddd\t{%2, %0|%0, %2}"
4081 [(set_attr "type" "sseiadd")
4082 (set_attr "mode" "DI")])
4084 (define_insn "ssse3_phaddswv8hi3"
4085 [(set (match_operand:V8HI 0 "register_operand" "=x")
4091 (match_operand:V8HI 1 "register_operand" "0")
4092 (parallel [(const_int 0)]))
4093 (vec_select:HI (match_dup 1) (parallel [(const_int 1)])))
4095 (vec_select:HI (match_dup 1) (parallel [(const_int 2)]))
4096 (vec_select:HI (match_dup 1) (parallel [(const_int 3)]))))
4099 (vec_select:HI (match_dup 1) (parallel [(const_int 4)]))
4100 (vec_select:HI (match_dup 1) (parallel [(const_int 5)])))
4102 (vec_select:HI (match_dup 1) (parallel [(const_int 6)]))
4103 (vec_select:HI (match_dup 1) (parallel [(const_int 7)])))))
4108 (match_operand:V8HI 2 "nonimmediate_operand" "xm")
4109 (parallel [(const_int 0)]))
4110 (vec_select:HI (match_dup 2) (parallel [(const_int 1)])))
4112 (vec_select:HI (match_dup 2) (parallel [(const_int 2)]))
4113 (vec_select:HI (match_dup 2) (parallel [(const_int 3)]))))
4116 (vec_select:HI (match_dup 2) (parallel [(const_int 4)]))
4117 (vec_select:HI (match_dup 2) (parallel [(const_int 5)])))
4119 (vec_select:HI (match_dup 2) (parallel [(const_int 6)]))
4120 (vec_select:HI (match_dup 2) (parallel [(const_int 7)])))))))]
4122 "phaddsw\t{%2, %0|%0, %2}"
4123 [(set_attr "type" "sseiadd")
4124 (set_attr "mode" "TI")])
4126 (define_insn "ssse3_phaddswv4hi3"
4127 [(set (match_operand:V4HI 0 "register_operand" "=y")
4132 (match_operand:V4HI 1 "register_operand" "0")
4133 (parallel [(const_int 0)]))
4134 (vec_select:HI (match_dup 1) (parallel [(const_int 1)])))
4136 (vec_select:HI (match_dup 1) (parallel [(const_int 2)]))
4137 (vec_select:HI (match_dup 1) (parallel [(const_int 3)]))))
4141 (match_operand:V4HI 2 "nonimmediate_operand" "ym")
4142 (parallel [(const_int 0)]))
4143 (vec_select:HI (match_dup 2) (parallel [(const_int 1)])))
4145 (vec_select:HI (match_dup 2) (parallel [(const_int 2)]))
4146 (vec_select:HI (match_dup 2) (parallel [(const_int 3)]))))))]
4148 "phaddsw\t{%2, %0|%0, %2}"
4149 [(set_attr "type" "sseiadd")
4150 (set_attr "mode" "DI")])
4152 (define_insn "ssse3_phsubwv8hi3"
4153 [(set (match_operand:V8HI 0 "register_operand" "=x")
4159 (match_operand:V8HI 1 "register_operand" "0")
4160 (parallel [(const_int 0)]))
4161 (vec_select:HI (match_dup 1) (parallel [(const_int 1)])))
4163 (vec_select:HI (match_dup 1) (parallel [(const_int 2)]))
4164 (vec_select:HI (match_dup 1) (parallel [(const_int 3)]))))
4167 (vec_select:HI (match_dup 1) (parallel [(const_int 4)]))
4168 (vec_select:HI (match_dup 1) (parallel [(const_int 5)])))
4170 (vec_select:HI (match_dup 1) (parallel [(const_int 6)]))
4171 (vec_select:HI (match_dup 1) (parallel [(const_int 7)])))))
4176 (match_operand:V8HI 2 "nonimmediate_operand" "xm")
4177 (parallel [(const_int 0)]))
4178 (vec_select:HI (match_dup 2) (parallel [(const_int 1)])))
4180 (vec_select:HI (match_dup 2) (parallel [(const_int 2)]))
4181 (vec_select:HI (match_dup 2) (parallel [(const_int 3)]))))
4184 (vec_select:HI (match_dup 2) (parallel [(const_int 4)]))
4185 (vec_select:HI (match_dup 2) (parallel [(const_int 5)])))
4187 (vec_select:HI (match_dup 2) (parallel [(const_int 6)]))
4188 (vec_select:HI (match_dup 2) (parallel [(const_int 7)])))))))]
4190 "phsubw\t{%2, %0|%0, %2}"
4191 [(set_attr "type" "sseiadd")
4192 (set_attr "mode" "TI")])
4194 (define_insn "ssse3_phsubwv4hi3"
4195 [(set (match_operand:V4HI 0 "register_operand" "=y")
4200 (match_operand:V4HI 1 "register_operand" "0")
4201 (parallel [(const_int 0)]))
4202 (vec_select:HI (match_dup 1) (parallel [(const_int 1)])))
4204 (vec_select:HI (match_dup 1) (parallel [(const_int 2)]))
4205 (vec_select:HI (match_dup 1) (parallel [(const_int 3)]))))
4209 (match_operand:V4HI 2 "nonimmediate_operand" "ym")
4210 (parallel [(const_int 0)]))
4211 (vec_select:HI (match_dup 2) (parallel [(const_int 1)])))
4213 (vec_select:HI (match_dup 2) (parallel [(const_int 2)]))
4214 (vec_select:HI (match_dup 2) (parallel [(const_int 3)]))))))]
4216 "phsubw\t{%2, %0|%0, %2}"
4217 [(set_attr "type" "sseiadd")
4218 (set_attr "mode" "DI")])
4220 (define_insn "ssse3_phsubdv4si3"
4221 [(set (match_operand:V4SI 0 "register_operand" "=x")
4226 (match_operand:V4SI 1 "register_operand" "0")
4227 (parallel [(const_int 0)]))
4228 (vec_select:SI (match_dup 1) (parallel [(const_int 1)])))
4230 (vec_select:SI (match_dup 1) (parallel [(const_int 2)]))
4231 (vec_select:SI (match_dup 1) (parallel [(const_int 3)]))))
4235 (match_operand:V4SI 2 "nonimmediate_operand" "xm")
4236 (parallel [(const_int 0)]))
4237 (vec_select:SI (match_dup 2) (parallel [(const_int 1)])))
4239 (vec_select:SI (match_dup 2) (parallel [(const_int 2)]))
4240 (vec_select:SI (match_dup 2) (parallel [(const_int 3)]))))))]
4242 "phsubd\t{%2, %0|%0, %2}"
4243 [(set_attr "type" "sseiadd")
4244 (set_attr "mode" "TI")])
4246 (define_insn "ssse3_phsubdv2si3"
4247 [(set (match_operand:V2SI 0 "register_operand" "=y")
4251 (match_operand:V2SI 1 "register_operand" "0")
4252 (parallel [(const_int 0)]))
4253 (vec_select:SI (match_dup 1) (parallel [(const_int 1)])))
4256 (match_operand:V2SI 2 "nonimmediate_operand" "ym")
4257 (parallel [(const_int 0)]))
4258 (vec_select:SI (match_dup 2) (parallel [(const_int 1)])))))]
4260 "phsubd\t{%2, %0|%0, %2}"
4261 [(set_attr "type" "sseiadd")
4262 (set_attr "mode" "DI")])
4264 (define_insn "ssse3_phsubswv8hi3"
4265 [(set (match_operand:V8HI 0 "register_operand" "=x")
4271 (match_operand:V8HI 1 "register_operand" "0")
4272 (parallel [(const_int 0)]))
4273 (vec_select:HI (match_dup 1) (parallel [(const_int 1)])))
4275 (vec_select:HI (match_dup 1) (parallel [(const_int 2)]))
4276 (vec_select:HI (match_dup 1) (parallel [(const_int 3)]))))
4279 (vec_select:HI (match_dup 1) (parallel [(const_int 4)]))
4280 (vec_select:HI (match_dup 1) (parallel [(const_int 5)])))
4282 (vec_select:HI (match_dup 1) (parallel [(const_int 6)]))
4283 (vec_select:HI (match_dup 1) (parallel [(const_int 7)])))))
4288 (match_operand:V8HI 2 "nonimmediate_operand" "xm")
4289 (parallel [(const_int 0)]))
4290 (vec_select:HI (match_dup 2) (parallel [(const_int 1)])))
4292 (vec_select:HI (match_dup 2) (parallel [(const_int 2)]))
4293 (vec_select:HI (match_dup 2) (parallel [(const_int 3)]))))
4296 (vec_select:HI (match_dup 2) (parallel [(const_int 4)]))
4297 (vec_select:HI (match_dup 2) (parallel [(const_int 5)])))
4299 (vec_select:HI (match_dup 2) (parallel [(const_int 6)]))
4300 (vec_select:HI (match_dup 2) (parallel [(const_int 7)])))))))]
4302 "phsubsw\t{%2, %0|%0, %2}"
4303 [(set_attr "type" "sseiadd")
4304 (set_attr "mode" "TI")])
4306 (define_insn "ssse3_phsubswv4hi3"
4307 [(set (match_operand:V4HI 0 "register_operand" "=y")
4312 (match_operand:V4HI 1 "register_operand" "0")
4313 (parallel [(const_int 0)]))
4314 (vec_select:HI (match_dup 1) (parallel [(const_int 1)])))
4316 (vec_select:HI (match_dup 1) (parallel [(const_int 2)]))
4317 (vec_select:HI (match_dup 1) (parallel [(const_int 3)]))))
4321 (match_operand:V4HI 2 "nonimmediate_operand" "ym")
4322 (parallel [(const_int 0)]))
4323 (vec_select:HI (match_dup 2) (parallel [(const_int 1)])))
4325 (vec_select:HI (match_dup 2) (parallel [(const_int 2)]))
4326 (vec_select:HI (match_dup 2) (parallel [(const_int 3)]))))))]
4328 "phsubsw\t{%2, %0|%0, %2}"
4329 [(set_attr "type" "sseiadd")
4330 (set_attr "mode" "DI")])
4332 (define_insn "ssse3_pmaddubswv8hi3"
4333 [(set (match_operand:V8HI 0 "register_operand" "=x")
4338 (match_operand:V16QI 1 "nonimmediate_operand" "%0")
4339 (parallel [(const_int 0)
4349 (match_operand:V16QI 2 "nonimmediate_operand" "xm")
4350 (parallel [(const_int 0)
4360 (vec_select:V16QI (match_dup 1)
4361 (parallel [(const_int 1)
4370 (vec_select:V16QI (match_dup 2)
4371 (parallel [(const_int 1)
4378 (const_int 15)]))))))]
4380 "pmaddubsw\t{%2, %0|%0, %2}"
4381 [(set_attr "type" "sseiadd")
4382 (set_attr "mode" "TI")])
4384 (define_insn "ssse3_pmaddubswv4hi3"
4385 [(set (match_operand:V4HI 0 "register_operand" "=y")
4390 (match_operand:V8QI 1 "nonimmediate_operand" "%0")
4391 (parallel [(const_int 0)
4397 (match_operand:V8QI 2 "nonimmediate_operand" "ym")
4398 (parallel [(const_int 0)
4404 (vec_select:V8QI (match_dup 1)
4405 (parallel [(const_int 1)
4410 (vec_select:V8QI (match_dup 2)
4411 (parallel [(const_int 1)
4414 (const_int 7)]))))))]
4416 "pmaddubsw\t{%2, %0|%0, %2}"
4417 [(set_attr "type" "sseiadd")
4418 (set_attr "mode" "DI")])
4420 (define_insn "ssse3_pmulhrswv8hi3"
4421 [(set (match_operand:V8HI 0 "register_operand" "=x")
4428 (match_operand:V8HI 1 "nonimmediate_operand" "%0"))
4430 (match_operand:V8HI 2 "nonimmediate_operand" "xm")))
4432 (const_vector:V8HI [(const_int 1) (const_int 1)
4433 (const_int 1) (const_int 1)
4434 (const_int 1) (const_int 1)
4435 (const_int 1) (const_int 1)]))
4437 "TARGET_SSSE3 && ix86_binary_operator_ok (MULT, V8HImode, operands)"
4438 "pmulhrsw\t{%2, %0|%0, %2}"
4439 [(set_attr "type" "sseimul")
4440 (set_attr "mode" "TI")])
4442 (define_insn "ssse3_pmulhrswv4hi3"
4443 [(set (match_operand:V4HI 0 "register_operand" "=y")
4450 (match_operand:V4HI 1 "nonimmediate_operand" "%0"))
4452 (match_operand:V4HI 2 "nonimmediate_operand" "ym")))
4454 (const_vector:V4HI [(const_int 1) (const_int 1)
4455 (const_int 1) (const_int 1)]))
4457 "TARGET_SSSE3 && ix86_binary_operator_ok (MULT, V4HImode, operands)"
4458 "pmulhrsw\t{%2, %0|%0, %2}"
4459 [(set_attr "type" "sseimul")
4460 (set_attr "mode" "DI")])
4462 (define_insn "ssse3_pshufbv16qi3"
4463 [(set (match_operand:V16QI 0 "register_operand" "=x")
4464 (unspec:V16QI [(match_operand:V16QI 1 "register_operand" "0")
4465 (match_operand:V16QI 2 "nonimmediate_operand" "xm")]
4468 "pshufb\t{%2, %0|%0, %2}";
4469 [(set_attr "type" "sselog1")
4470 (set_attr "mode" "TI")])
4472 (define_insn "ssse3_pshufbv8qi3"
4473 [(set (match_operand:V8QI 0 "register_operand" "=y")
4474 (unspec:V8QI [(match_operand:V8QI 1 "register_operand" "0")
4475 (match_operand:V8QI 2 "nonimmediate_operand" "ym")]
4478 "pshufb\t{%2, %0|%0, %2}";
4479 [(set_attr "type" "sselog1")
4480 (set_attr "mode" "DI")])
4482 (define_insn "ssse3_psign<mode>3"
4483 [(set (match_operand:SSEMODE124 0 "register_operand" "=x")
4484 (unspec:SSEMODE124 [(match_operand:SSEMODE124 1 "register_operand" "0")
4485 (match_operand:SSEMODE124 2 "nonimmediate_operand" "xm")]
4488 "psign<ssevecsize>\t{%2, %0|%0, %2}";
4489 [(set_attr "type" "sselog1")
4490 (set_attr "mode" "TI")])
4492 (define_insn "ssse3_psign<mode>3"
4493 [(set (match_operand:MMXMODEI 0 "register_operand" "=y")
4494 (unspec:MMXMODEI [(match_operand:MMXMODEI 1 "register_operand" "0")
4495 (match_operand:MMXMODEI 2 "nonimmediate_operand" "ym")]
4498 "psign<mmxvecsize>\t{%2, %0|%0, %2}";
4499 [(set_attr "type" "sselog1")
4500 (set_attr "mode" "DI")])
4502 (define_insn "ssse3_palignrti"
4503 [(set (match_operand:TI 0 "register_operand" "=x")
4504 (unspec:TI [(match_operand:TI 1 "register_operand" "0")
4505 (match_operand:TI 2 "nonimmediate_operand" "xm")
4506 (match_operand:SI 3 "const_0_to_255_mul_8_operand" "n")]
4510 operands[3] = GEN_INT (INTVAL (operands[3]) / 8);
4511 return "palignr\t{%3, %2, %0|%0, %2, %3}";
4513 [(set_attr "type" "sseishft")
4514 (set_attr "mode" "TI")])
4516 (define_insn "ssse3_palignrdi"
4517 [(set (match_operand:DI 0 "register_operand" "=y")
4518 (unspec:DI [(match_operand:DI 1 "register_operand" "0")
4519 (match_operand:DI 2 "nonimmediate_operand" "ym")
4520 (match_operand:SI 3 "const_0_to_255_mul_8_operand" "n")]
4524 operands[3] = GEN_INT (INTVAL (operands[3]) / 8);
4525 return "palignr\t{%3, %2, %0|%0, %2, %3}";
4527 [(set_attr "type" "sseishft")
4528 (set_attr "mode" "DI")])
4530 (define_insn "abs<mode>2"
4531 [(set (match_operand:SSEMODE124 0 "register_operand" "=x")
4532 (abs:SSEMODE124 (match_operand:SSEMODE124 1 "nonimmediate_operand" "xm")))]
4534 "pabs<ssevecsize>\t{%1, %0|%0, %1}";
4535 [(set_attr "type" "sselog1")
4536 (set_attr "mode" "TI")])
4538 (define_insn "abs<mode>2"
4539 [(set (match_operand:MMXMODEI 0 "register_operand" "=y")
4540 (abs:MMXMODEI (match_operand:MMXMODEI 1 "nonimmediate_operand" "ym")))]
4542 "pabs<mmxvecsize>\t{%1, %0|%0, %1}";
4543 [(set_attr "type" "sselog1")
4544 (set_attr "mode" "DI")])
4546 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
4548 ;; AMD SSE4A instructions
4550 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
4552 (define_insn "sse4a_vmmovntv2df"
4553 [(set (match_operand:DF 0 "memory_operand" "=m")
4554 (unspec:DF [(vec_select:DF
4555 (match_operand:V2DF 1 "register_operand" "x")
4556 (parallel [(const_int 0)]))]
4559 "movntsd\t{%1, %0|%0, %1}"
4560 [(set_attr "type" "ssemov")
4561 (set_attr "mode" "DF")])
4563 (define_insn "sse4a_movntdf"
4564 [(set (match_operand:DF 0 "memory_operand" "=m")
4565 (unspec:DF [(match_operand:DF 1 "register_operand" "x")]
4568 "movntsd\t{%1, %0|%0, %1}"
4569 [(set_attr "type" "ssemov")
4570 (set_attr "mode" "DF")])
4572 (define_insn "sse4a_vmmovntv4sf"
4573 [(set (match_operand:SF 0 "memory_operand" "=m")
4574 (unspec:SF [(vec_select:SF
4575 (match_operand:V4SF 1 "register_operand" "x")
4576 (parallel [(const_int 0)]))]
4579 "movntss\t{%1, %0|%0, %1}"
4580 [(set_attr "type" "ssemov")
4581 (set_attr "mode" "SF")])
4583 (define_insn "sse4a_movntsf"
4584 [(set (match_operand:SF 0 "memory_operand" "=m")
4585 (unspec:SF [(match_operand:SF 1 "register_operand" "x")]
4588 "movntss\t{%1, %0|%0, %1}"
4589 [(set_attr "type" "ssemov")
4590 (set_attr "mode" "SF")])
4592 (define_insn "sse4a_extrqi"
4593 [(set (match_operand:V2DI 0 "register_operand" "=x")
4594 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0")
4595 (match_operand 2 "const_int_operand" "")
4596 (match_operand 3 "const_int_operand" "")]
4599 "extrq\t{%3, %2, %0|%0, %2, %3}"
4600 [(set_attr "type" "sse")
4601 (set_attr "mode" "TI")])
4603 (define_insn "sse4a_extrq"
4604 [(set (match_operand:V2DI 0 "register_operand" "=x")
4605 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0")
4606 (match_operand:V16QI 2 "register_operand" "x")]
4609 "extrq\t{%2, %0|%0, %2}"
4610 [(set_attr "type" "sse")
4611 (set_attr "mode" "TI")])
4613 (define_insn "sse4a_insertqi"
4614 [(set (match_operand:V2DI 0 "register_operand" "=x")
4615 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0")
4616 (match_operand:V2DI 2 "register_operand" "x")
4617 (match_operand 3 "const_int_operand" "")
4618 (match_operand 4 "const_int_operand" "")]
4621 "insertq\t{%4, %3, %2, %0|%0, %2, %3, %4}"
4622 [(set_attr "type" "sseins")
4623 (set_attr "mode" "TI")])
4625 (define_insn "sse4a_insertq"
4626 [(set (match_operand:V2DI 0 "register_operand" "=x")
4627 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0")
4628 (match_operand:V2DI 2 "register_operand" "x")]
4631 "insertq\t{%2, %0|%0, %2}"
4632 [(set_attr "type" "sseins")
4633 (set_attr "mode" "TI")])