From 6f7640e2733c60f37e84c2fc37488f044c391666 Mon Sep 17 00:00:00 2001 From: mm Date: Sat, 30 Apr 2011 22:07:04 +0000 Subject: [PATCH] MFC 219374, 219376, 219639, 219640, 219697, 219711, 220150: MFC r219374: Backport Intel Core 2 and AMD Geode CPU types from gcc-4.3 (GPLv2) These options are supported in this shape in all newer GCC versions. Source: gcc-4_3-branch (rev. 118090, 118973, 120846; GPLv2) MFC r219376: Add AMD Geode CPU type to bsd.cpu.mk and examples/etc/make.conf For CPUTYPE=core2 use -march=core2 MFC r219639: Backport SSSE3 instruction set support to base gcc. Enabled by default for -march=core2 Source: gcc-4_3-branch (rev. 117958, 121687, 121726, 123639; GPLv2) MFC r219640: Add ssse3 capability for CPUTYPE=core2 to MACHINE_CPU in bsd.cpu.mk MFC r219697: Fix -march/-mtune=native autodetection for Intel Core 2 CPUs Source: gcc-4_3-branch (partial rev. 119454; GPLv2) MFC r219711: Backport missing tunings for -march=core2: - enable extra 80387 mathematical constants (ext_80387_constants) - enable compare and exchange 16 bytes (cmpxchg16b) Verified against llvm-gcc (and apple gcc) Source: gcc-4_3-branch (ref. svn revs. 119260, 121140; GPLv2) MFC r220150: Upgrade of base gcc and libstdc++ to the last GPLv2-licensed revision (rev. 127959 of gcc-4_2-branch). Resolved GCC bugs: c++: 17763, 29365, 30535, 30917, 31337, 31941, 32108, 32112, 32346, 32898, 32992 debug: 32610, 32914 libstdc++: 33084, 33128 middle-end: 32563 rtl-optimization: 33148 tree-optimization: 25413, 32723 target: 32218 Source: gcc-4_2-branch (up to rev. 127959) Obtained from: gcc (var. revs of gcc-4_2-branch and gcc-4_3-branch; GPLv2) PR: gnu/153298, gnu/153959, gnu/154385, gnu/155308 git-svn-id: svn://svn.freebsd.org/base/stable/8@221274 ccf9f872-aa2e-dd11-9fc8-001c23d0bc1f --- contrib/gcc/BASE-VER | 2 +- contrib/gcc/ChangeLog | 118 ++++ contrib/gcc/DATESTAMP | 2 +- contrib/gcc/DEV-PHASE | 1 + contrib/gcc/config.gcc | 18 +- contrib/gcc/config/i386/driver-i386.c | 11 +- contrib/gcc/config/i386/geode.md | 153 +++++ contrib/gcc/config/i386/i386.c | 397 +++++++++++-- contrib/gcc/config/i386/i386.h | 48 +- contrib/gcc/config/i386/i386.md | 10 +- contrib/gcc/config/i386/i386.opt | 4 + contrib/gcc/config/i386/sse.md | 575 +++++++++++++++++++ contrib/gcc/config/i386/tmmintrin.h | 448 +++++++++++++++ contrib/gcc/config/i386/xmmintrin.h | 2 + contrib/gcc/config/mips/predicates.md | 4 +- contrib/gcc/config/rs6000/rs6000.c | 35 ++ contrib/gcc/config/s390/s390.md | 4 +- contrib/gcc/cp/ChangeLog | 63 ++ contrib/gcc/cp/call.c | 22 +- contrib/gcc/cp/cp-tree.h | 2 + contrib/gcc/cp/cxx-pretty-print.c | 4 + contrib/gcc/cp/decl.c | 2 +- contrib/gcc/cp/decl2.c | 10 +- contrib/gcc/cp/error.c | 16 +- contrib/gcc/cp/lex.c | 15 + contrib/gcc/cp/name-lookup.c | 47 +- contrib/gcc/cp/pt.c | 13 +- contrib/gcc/cp/semantics.c | 9 +- contrib/gcc/cp/typeck.c | 1 + contrib/gcc/doc/contrib.texi | 4 + contrib/gcc/doc/extend.texi | 46 ++ contrib/gcc/doc/gcc.1 | 22 +- contrib/gcc/doc/invoke.texi | 16 +- contrib/gcc/dwarf2out.c | 40 +- contrib/gcc/expr.c | 73 ++- contrib/gcc/fold-const.c | 4 + contrib/gcc/gimplify.c | 18 +- contrib/gcc/reload1.c | 9 +- contrib/gcc/simplify-rtx.c | 3 +- contrib/gcc/target-def.h | 5 +- contrib/gcc/target.h | 4 + contrib/gcc/targhooks.c | 16 + contrib/gcc/targhooks.h | 2 + contrib/gcc/tree-if-conv.c | 10 +- contrib/gcc/tree-ssa-structalias.c | 92 ++- contrib/gcc/tree-vect-analyze.c | 59 +- contrib/gcc/tree-vect-patterns.c | 5 +- contrib/gcc/tree.c | 3 +- contrib/gcc/tree.h | 6 + contrib/libstdc++/ChangeLog | 15 + contrib/libstdc++/include/std/std_valarray.h | 2 +- contrib/libstdc++/include/tr1/random | 6 +- share/examples/etc/make.conf | 2 +- share/mk/bsd.cpu.mk | 12 +- 54 files changed, 2379 insertions(+), 131 deletions(-) create mode 100644 contrib/gcc/config/i386/geode.md create mode 100644 contrib/gcc/config/i386/tmmintrin.h diff --git a/contrib/gcc/BASE-VER b/contrib/gcc/BASE-VER index fae6e3d04..af8c8ec7c 100644 --- a/contrib/gcc/BASE-VER +++ b/contrib/gcc/BASE-VER @@ -1 +1 @@ -4.2.1 +4.2.2 diff --git a/contrib/gcc/ChangeLog b/contrib/gcc/ChangeLog index 136b46419..d0dec77ba 100644 --- a/contrib/gcc/ChangeLog +++ b/contrib/gcc/ChangeLog @@ -1,3 +1,121 @@ +2007-08-31 Jakub Jelinek + + PR rtl-optimization/33148 + * simplify-rtx.c (simplify_unary_operation_1): Only optimize + (neg (lt X 0)) if X has scalar int mode. + + PR debug/32914 + * dwarf2out.c (rtl_for_decl_init): If vector decl has CONSTRUCTOR + initializer, use build_vector_from_ctor if possible to create + VECTOR_CST out of it. If vector initializer is not VECTOR_CST + even after this, return NULL. + +2007-08-27 Jason Merrill + + PR c++/31337 + * gimplify.c (gimplify_modify_expr): Discard the assignment of + zero-sized types after calling gimplify_modify_expr_rhs. + +2007-08-24 Jakub Jelinek + + PR debug/32610 + * dwarf2out.c (gen_decl_die): Don't call + gen_tagged_type_instantiation_die if decl doesn't have tagged type. + +2007-08-24 Richard Guenther + + * expr.c (get_inner_reference): Remove unused variable. + +2007-08-24 Richard Guenther + + * expr.c (get_inner_reference): Do computation of bitoffset + from offset in a way we can detect overflow reliably. + +2007-08-22 Richard Guenther + + PR middle-end/32563 + * tree.c (host_integerp): Treat sizetype as signed as it is + sign-extended. + +2007-08-20 Adam Nemet + + * config/mips/predicates.md (const_call_insn_operand): Invoke + SYMBOL_REF_LONG_CALL_P only on SYMBOL_REFs. + +2007-08-17 Chen liqin + + * config/score/score.md : Update pattern tablejump. + * config/score/score.c : Update score_initialize_trampoline + function. + * config/score/score.h (TRAMPOLINE_TEMPLATE): Added macro. + (TRAMPOLINE_INSNS, TRAMPOLINE_SIZE) Update macro. + * doc/contrib.texi: Add my entry. + +2007-08-02 Andreas Krebbel + + * config/s390/s390.md ("*xordi3_cconly"): Change xr to xg. + +2007-08-01 Andreas Krebbel + + * config/s390/s390.md (TF in GPR splitter): Change operand_subword + parameter to TFmode. + +2007-07-30 Mark Mitchell + + * BASE-VER: Bump. + * DEV-PHASE: Mark as prerelease. + +2007-07-25 Steve Ellcey + + PR target/32218 + * tree-vect-patterns.c (vect_pattern_recog_1): Check for valid type. + +2007-07-25 Dorit Nuzman + Devang Patel + + PR tree-optimization/25413 + * targhooks.c (default_builtin_vector_alignment_reachable): New. + * targhooks.h (default_builtin_vector_alignment_reachable): New. + * tree.h (contains_packed_reference): New. + * expr.c (contains_packed_reference): New. + * tree-vect-analyze.c (vector_alignment_reachable_p): New. + (vect_enhance_data_refs_alignment): Call + vector_alignment_reachable_p. + * target.h (vector_alignment_reachable): New builtin. + * target-def.h (TARGET_VECTOR_ALIGNMENT_REACHABLE): New. + * config/rs6000/rs6000.c (rs6000_vector_alignment_reachable): New. + (TARGET_VECTOR_ALIGNMENT_REACHABLE): Define. + +2007-07-24 Richard Guenther + + Backport from mainline: + 2007-07-16 Richard Guenther + Uros Bizjak + + * tree-if-conv.c (find_phi_replacement_condition): Unshare "*cond" + before forcing it to gimple operand. + +2007-07-24 Richard Guenther + + PR tree-optimization/32723 + Backport from mainline: + 2007-03-09 Daniel Berlin + + * tree-ssa-structalias.c (shared_bitmap_info_t): New structure. + (shared_bitmap_table): New variable. + (shared_bitmap_hash): New function. + (shared_bitmap_eq): Ditto + (shared_bitmap_lookup): Ditto. + (shared_bitmap_add): Ditto. + (find_what_p_points_to): Rewrite to use shared bitmap hashtable. + (init_alias_vars): Init shared bitmap hashtable. + (delete_points_to_sets): Delete shared bitmap hashtable. + +2007-07-23 Bernd Schmidt + + * reload1.c (choose_reload_regs): Set reload_spill_index for regs + chosen during find_reloads. + 2007-07-19 Release Manager * GCC 4.2.1 released. diff --git a/contrib/gcc/DATESTAMP b/contrib/gcc/DATESTAMP index 38e56c588..a3fd8a5d0 100644 --- a/contrib/gcc/DATESTAMP +++ b/contrib/gcc/DATESTAMP @@ -1 +1 @@ -20070719 +20070831 diff --git a/contrib/gcc/DEV-PHASE b/contrib/gcc/DEV-PHASE index e69de29bb..373fbc60b 100644 --- a/contrib/gcc/DEV-PHASE +++ b/contrib/gcc/DEV-PHASE @@ -0,0 +1 @@ +prerelease diff --git a/contrib/gcc/config.gcc b/contrib/gcc/config.gcc index 9a142e257..44065937a 100644 --- a/contrib/gcc/config.gcc +++ b/contrib/gcc/config.gcc @@ -268,11 +268,13 @@ xscale-*-*) ;; i[34567]86-*-*) cpu_type=i386 - extra_headers="mmintrin.h mm3dnow.h xmmintrin.h emmintrin.h pmmintrin.h" + extra_headers="mmintrin.h mm3dnow.h xmmintrin.h emmintrin.h + pmmintrin.h tmmintrin.h" ;; x86_64-*-*) cpu_type=i386 - extra_headers="mmintrin.h mm3dnow.h xmmintrin.h emmintrin.h pmmintrin.h" + extra_headers="mmintrin.h mm3dnow.h xmmintrin.h emmintrin.h + pmmintrin.h tmmintrin.h" need_64bit_hwint=yes ;; ia64-*-*) @@ -1207,14 +1209,14 @@ i[34567]86-*-solaris2*) # FIXME: -m64 for i[34567]86-*-* should be allowed just # like -m32 for x86_64-*-*. case X"${with_cpu}" in - Xgeneric|Xnocona|Xx86-64|Xk8|Xopteron|Xathlon64|Xathlon-fx) + Xgeneric|Xcore2|Xnocona|Xx86-64|Xk8|Xopteron|Xathlon64|Xathlon-fx) ;; X) with_cpu=generic ;; *) echo "Unsupported CPU used in --with-cpu=$with_cpu, supported values:" 1>&2 - echo "generic nocona x86-64 k8 opteron athlon64 athlon-fx" 1>&2 + echo "generic core2 nocona x86-64 k8 opteron athlon64 athlon-fx" 1>&2 exit 1 ;; esac @@ -2537,6 +2539,9 @@ if test x$with_cpu = x ; then nocona-*) with_cpu=nocona ;; + core2-*) + with_cpu=core2 + ;; pentium_m-*) with_cpu=pentium-m ;; @@ -2556,6 +2561,9 @@ if test x$with_cpu = x ; then nocona-*) with_cpu=nocona ;; + core2-*) + with_cpu=core2 + ;; *) with_cpu=generic ;; @@ -2787,7 +2795,7 @@ case "${target}" in esac # OK ;; - "" | k8 | opteron | athlon64 | athlon-fx | nocona | generic) + "" | k8 | opteron | athlon64 | athlon-fx | nocona | core2 | generic) # OK ;; *) diff --git a/contrib/gcc/config/i386/driver-i386.c b/contrib/gcc/config/i386/driver-i386.c index ffcee4e55..cb9c6c3f3 100644 --- a/contrib/gcc/config/i386/driver-i386.c +++ b/contrib/gcc/config/i386/driver-i386.c @@ -39,6 +39,7 @@ const char *host_detect_local_cpu (int argc, const char **argv); #define bit_SSE2 (1 << 26) #define bit_SSE3 (1 << 0) +#define bit_SSSE3 (1 << 9) #define bit_CMPXCHG16B (1 << 13) #define bit_3DNOW (1 << 31) @@ -66,7 +67,7 @@ const char *host_detect_local_cpu (int argc, const char **argv) unsigned int vendor; unsigned int ext_level; unsigned char has_mmx = 0, has_3dnow = 0, has_3dnowp = 0, has_sse = 0; - unsigned char has_sse2 = 0, has_sse3 = 0, has_cmov = 0; + unsigned char has_sse2 = 0, has_sse3 = 0, has_ssse3 = 0, has_cmov = 0; unsigned char has_longmode = 0, has_cmpxchg8b = 0; unsigned char is_amd = 0; unsigned int family = 0; @@ -107,6 +108,7 @@ const char *host_detect_local_cpu (int argc, const char **argv) has_sse = !!(edx & bit_SSE); has_sse2 = !!(edx & bit_SSE2); has_sse3 = !!(ecx & bit_SSE3); + has_ssse3 = !!(ecx & bit_SSSE3); /* We don't care for extended family. */ family = (eax >> 8) & ~(1 << 4); @@ -148,7 +150,9 @@ const char *host_detect_local_cpu (int argc, const char **argv) /* We have no idea. Use something reasonable. */ if (arch) { - if (has_sse3) + if (has_ssse3) + cpu = "core2"; + else if (has_sse3) { if (has_longmode) cpu = "nocona"; @@ -230,6 +234,9 @@ const char *host_detect_local_cpu (int argc, const char **argv) cpu = "generic"; } break; + case PROCESSOR_GEODE: + cpu = "geode"; + break; case PROCESSOR_K6: if (has_3dnow) cpu = "k6-3"; diff --git a/contrib/gcc/config/i386/geode.md b/contrib/gcc/config/i386/geode.md new file mode 100644 index 000000000..050216acd --- /dev/null +++ b/contrib/gcc/config/i386/geode.md @@ -0,0 +1,153 @@ +;; Geode Scheduling +;; Copyright (C) 2006 +;; Free Software Foundation, Inc. +;; +;; This file is part of GCC. +;; +;; GCC is free software; you can redistribute it and/or modify +;; it under the terms of the GNU General Public License as published by +;; the Free Software Foundation; either version 2, or (at your option) +;; any later version. +;; +;; GCC is distributed in the hope that it will be useful, +;; but WITHOUT ANY WARRANTY; without even the implied warranty of +;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +;; GNU General Public License for more details. +;; +;; You should have received a copy of the GNU General Public License +;; along with GCC; see the file COPYING. If not, write to +;; the Free Software Foundation, 51 Franklin Street, Fifth Floor, +;; Boston, MA 02110-1301, USA. +;; +;; The Geode architecture is one insn issue processor. +;; +;; This description is based on data from the following documents: +;; +;; "AMD Geode GX Processor Data Book" +;; Advanced Micro Devices, Inc., Aug 2005. +;; +;; "AMD Geode LX Processor Data Book" +;; Advanced Micro Devices, Inc., Jan 2006. +;; +;; +;; CPU execution units of the Geode: +;; +;; issue describes the issue pipeline. +;; alu describes the Integer unit +;; fpu describes the FP unit +;; +;; The fp unit is out of order execution unit with register renaming. +;; There is also memory management unit and execution pipeline for +;; load/store operations. We ignore it and difference between insns +;; using memory and registers. + +(define_automaton "geode") + +(define_cpu_unit "geode_issue,geode_alu,geode_fpu" "geode") + +(define_insn_reservation "alu" 1 + (and (eq_attr "cpu" "geode") + (eq_attr "type" "alu,alu1,negnot,icmp,lea,test,imov,imovx,icmov,incdec,setcc")) + "geode_issue,geode_alu") + +(define_insn_reservation "shift" 2 + (and (eq_attr "cpu" "geode") + (eq_attr "type" "ishift,ishift1,rotate,rotate1,cld")) + "geode_issue,geode_alu*2") + +(define_insn_reservation "imul" 7 + (and (eq_attr "cpu" "geode") + (eq_attr "type" "imul")) + "geode_issue,geode_alu*7") + +(define_insn_reservation "idiv" 40 + (and (eq_attr "cpu" "geode") + (eq_attr "type" "idiv")) + "geode_issue,geode_alu*40") + +;; The branch unit. +(define_insn_reservation "call" 2 + (and (eq_attr "cpu" "geode") + (eq_attr "type" "call,callv")) + "geode_issue,geode_alu*2") + +(define_insn_reservation "geode_branch" 1 + (and (eq_attr "cpu" "geode") + (eq_attr "type" "ibr")) + "geode_issue,geode_alu") + +(define_insn_reservation "geode_pop_push" 1 + (and (eq_attr "cpu" "geode") + (eq_attr "type" "pop,push")) + "geode_issue,geode_alu") + +(define_insn_reservation "geode_leave" 2 + (and (eq_attr "cpu" "geode") + (eq_attr "type" "leave")) + "geode_issue,geode_alu*2") + +(define_insn_reservation "geode_load_str" 4 + (and (eq_attr "cpu" "geode") + (and (eq_attr "type" "str") + (eq_attr "memory" "load,both"))) + "geode_issue,geode_alu*4") + +(define_insn_reservation "geode_store_str" 2 + (and (eq_attr "cpu" "geode") + (and (eq_attr "type" "str") + (eq_attr "memory" "store"))) + "geode_issue,geode_alu*2") + +;; Be optimistic +(define_insn_reservation "geode_unknown" 1 + (and (eq_attr "cpu" "geode") + (eq_attr "type" "multi,other")) + "geode_issue,geode_alu") + +;; FPU + +(define_insn_reservation "geode_fop" 6 + (and (eq_attr "cpu" "geode") + (eq_attr "type" "fop,fcmp")) + "geode_issue,geode_fpu*6") + +(define_insn_reservation "geode_fsimple" 1 + (and (eq_attr "cpu" "geode") + (eq_attr "type" "fmov,fcmov,fsgn,fxch")) + "geode_issue,geode_fpu") + +(define_insn_reservation "geode_fist" 4 + (and (eq_attr "cpu" "geode") + (eq_attr "type" "fistp,fisttp")) + "geode_issue,geode_fpu*4") + +(define_insn_reservation "geode_fmul" 10 + (and (eq_attr "cpu" "geode") + (eq_attr "type" "fmul")) + "geode_issue,geode_fpu*10") + +(define_insn_reservation "geode_fdiv" 47 + (and (eq_attr "cpu" "geode") + (eq_attr "type" "fdiv")) + "geode_issue,geode_fpu*47") + +;; We use minimal latency (fsin) here +(define_insn_reservation "geode_fpspc" 54 + (and (eq_attr "cpu" "geode") + (eq_attr "type" "fpspc")) + "geode_issue,geode_fpu*54") + +(define_insn_reservation "geode_frndint" 12 + (and (eq_attr "cpu" "geode") + (eq_attr "type" "frndint")) + "geode_issue,geode_fpu*12") + +(define_insn_reservation "geode_mmxmov" 1 + (and (eq_attr "cpu" "geode") + (eq_attr "type" "mmxmov")) + "geode_issue,geode_fpu") + +(define_insn_reservation "geode_mmx" 2 + (and (eq_attr "cpu" "geode") + (eq_attr "type" "mmx,mmxadd,mmxmul,mmxcmp,mmxcvt,mmxshft")) + "geode_issue,geode_fpu*2") diff --git a/contrib/gcc/config/i386/i386.c b/contrib/gcc/config/i386/i386.c index 18293d9b8..2567fca20 100644 --- a/contrib/gcc/config/i386/i386.c +++ b/contrib/gcc/config/i386/i386.c @@ -335,6 +335,60 @@ struct processor_costs pentiumpro_cost = { COSTS_N_INSNS (56), /* cost of FSQRT instruction. */ }; +static const +struct processor_costs geode_cost = { + COSTS_N_INSNS (1), /* cost of an add instruction */ + COSTS_N_INSNS (1), /* cost of a lea instruction */ + COSTS_N_INSNS (2), /* variable shift costs */ + COSTS_N_INSNS (1), /* constant shift costs */ + {COSTS_N_INSNS (3), /* cost of starting multiply for QI */ + COSTS_N_INSNS (4), /* HI */ + COSTS_N_INSNS (7), /* SI */ + COSTS_N_INSNS (7), /* DI */ + COSTS_N_INSNS (7)}, /* other */ + 0, /* cost of multiply per each bit set */ + {COSTS_N_INSNS (15), /* cost of a divide/mod for QI */ + COSTS_N_INSNS (23), /* HI */ + COSTS_N_INSNS (39), /* SI */ + COSTS_N_INSNS (39), /* DI */ + COSTS_N_INSNS (39)}, /* other */ + COSTS_N_INSNS (1), /* cost of movsx */ + COSTS_N_INSNS (1), /* cost of movzx */ + 8, /* "large" insn */ + 4, /* MOVE_RATIO */ + 1, /* cost for loading QImode using movzbl */ + {1, 1, 1}, /* cost of loading integer registers + in QImode, HImode and SImode. + Relative to reg-reg move (2). */ + {1, 1, 1}, /* cost of storing integer registers */ + 1, /* cost of reg,reg fld/fst */ + {1, 1, 1}, /* cost of loading fp registers + in SFmode, DFmode and XFmode */ + {4, 6, 6}, /* cost of storing fp registers + in SFmode, DFmode and XFmode */ + + 1, /* cost of moving MMX register */ + {1, 1}, /* cost of loading MMX registers + in SImode and DImode */ + {1, 1}, /* cost of storing MMX registers + in SImode and DImode */ + 1, /* cost of moving SSE register */ + {1, 1, 1}, /* cost of loading SSE registers + in SImode, DImode and TImode */ + {1, 1, 1}, /* cost of storing SSE registers + in SImode, DImode and TImode */ + 1, /* MMX or SSE register to integer */ + 32, /* size of prefetch block */ + 1, /* number of parallel prefetches */ + 1, /* Branch cost */ + COSTS_N_INSNS (6), /* cost of FADD and FSUB insns. */ + COSTS_N_INSNS (11), /* cost of FMUL instruction. */ + COSTS_N_INSNS (47), /* cost of FDIV instruction. */ + COSTS_N_INSNS (1), /* cost of FABS instruction. */ + COSTS_N_INSNS (1), /* cost of FCHS instruction. */ + COSTS_N_INSNS (54), /* cost of FSQRT instruction. */ +}; + static const struct processor_costs k6_cost = { COSTS_N_INSNS (1), /* cost of an add instruction */ @@ -600,6 +654,58 @@ struct processor_costs nocona_cost = { COSTS_N_INSNS (44), /* cost of FSQRT instruction. */ }; +static const +struct processor_costs core2_cost = { + COSTS_N_INSNS (1), /* cost of an add instruction */ + COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */ + COSTS_N_INSNS (1), /* variable shift costs */ + COSTS_N_INSNS (1), /* constant shift costs */ + {COSTS_N_INSNS (3), /* cost of starting multiply for QI */ + COSTS_N_INSNS (3), /* HI */ + COSTS_N_INSNS (3), /* SI */ + COSTS_N_INSNS (3), /* DI */ + COSTS_N_INSNS (3)}, /* other */ + 0, /* cost of multiply per each bit set */ + {COSTS_N_INSNS (22), /* cost of a divide/mod for QI */ + COSTS_N_INSNS (22), /* HI */ + COSTS_N_INSNS (22), /* SI */ + COSTS_N_INSNS (22), /* DI */ + COSTS_N_INSNS (22)}, /* other */ + COSTS_N_INSNS (1), /* cost of movsx */ + COSTS_N_INSNS (1), /* cost of movzx */ + 8, /* "large" insn */ + 16, /* MOVE_RATIO */ + 2, /* cost for loading QImode using movzbl */ + {6, 6, 6}, /* cost of loading integer registers + in QImode, HImode and SImode. + Relative to reg-reg move (2). */ + {4, 4, 4}, /* cost of storing integer registers */ + 2, /* cost of reg,reg fld/fst */ + {6, 6, 6}, /* cost of loading fp registers + in SFmode, DFmode and XFmode */ + {4, 4, 4}, /* cost of loading integer registers */ + 2, /* cost of moving MMX register */ + {6, 6}, /* cost of loading MMX registers + in SImode and DImode */ + {4, 4}, /* cost of storing MMX registers + in SImode and DImode */ + 2, /* cost of moving SSE register */ + {6, 6, 6}, /* cost of loading SSE registers + in SImode, DImode and TImode */ + {4, 4, 4}, /* cost of storing SSE registers + in SImode, DImode and TImode */ + 2, /* MMX or SSE register to integer */ + 128, /* size of prefetch block */ + 8, /* number of parallel prefetches */ + 3, /* Branch cost */ + COSTS_N_INSNS (3), /* cost of FADD and FSUB insns. */ + COSTS_N_INSNS (5), /* cost of FMUL instruction. */ + COSTS_N_INSNS (32), /* cost of FDIV instruction. */ + COSTS_N_INSNS (1), /* cost of FABS instruction. */ + COSTS_N_INSNS (1), /* cost of FCHS instruction. */ + COSTS_N_INSNS (58), /* cost of FSQRT instruction. */ +}; + /* Generic64 should produce code tuned for Nocona and K8. */ static const struct processor_costs generic64_cost = { @@ -721,38 +827,41 @@ const struct processor_costs *ix86_cost = &pentium_cost; #define m_486 (1<mask, d->name, type, d->code); } + /* Add all builtins that are more or less simple operations on 1 operand. */ + for (i = 0, d = bdesc_1arg; i < ARRAY_SIZE (bdesc_1arg); i++, d++) + { + enum machine_mode mode; + tree type; + + if (d->name == 0) + continue; + mode = insn_data[d->icode].operand[1].mode; + + switch (mode) + { + case V16QImode: + type = v16qi_ftype_v16qi; + break; + case V8HImode: + type = v8hi_ftype_v8hi; + break; + case V4SImode: + type = v4si_ftype_v4si; + break; + case V2DFmode: + type = v2df_ftype_v2df; + break; + case V4SFmode: + type = v4sf_ftype_v4sf; + break; + case V8QImode: + type = v8qi_ftype_v8qi; + break; + case V4HImode: + type = v4hi_ftype_v4hi; + break; + case V2SImode: + type = v2si_ftype_v2si; + break; + + default: + abort (); + } + + def_builtin (d->mask, d->name, type, d->code); + } + /* Add the remaining MMX insns with somewhat more complicated types. */ def_builtin (MASK_MMX, "__builtin_ia32_emms", void_ftype_void, IX86_BUILTIN_EMMS); def_builtin (MASK_MMX, "__builtin_ia32_psllw", v4hi_ftype_v4hi_di, IX86_BUILTIN_PSLLW); @@ -15531,6 +15798,12 @@ ix86_init_mmx_sse_builtins (void) def_builtin (MASK_SSE3, "__builtin_ia32_lddqu", v16qi_ftype_pcchar, IX86_BUILTIN_LDDQU); + /* SSSE3. */ + def_builtin (MASK_SSSE3, "__builtin_ia32_palignr128", + v2di_ftype_v2di_v2di_int, IX86_BUILTIN_PALIGNR128); + def_builtin (MASK_SSSE3, "__builtin_ia32_palignr", di_ftype_di_di_int, + IX86_BUILTIN_PALIGNR); + /* Access to the vec_init patterns. */ ftype = build_function_type_list (V2SI_type_node, integer_type_node, integer_type_node, NULL_TREE); @@ -16029,7 +16302,7 @@ ix86_expand_builtin (tree exp, rtx target, rtx subtarget ATTRIBUTE_UNUSED, tree arglist = TREE_OPERAND (exp, 1); tree arg0, arg1, arg2; rtx op0, op1, op2, pat; - enum machine_mode tmode, mode0, mode1, mode2; + enum machine_mode tmode, mode0, mode1, mode2, mode3; unsigned int fcode = DECL_FUNCTION_CODE (fndecl); switch (fcode) @@ -16499,6 +16772,52 @@ ix86_expand_builtin (tree exp, rtx target, rtx subtarget ATTRIBUTE_UNUSED, return ix86_expand_unop_builtin (CODE_FOR_sse3_lddqu, arglist, target, 1); + case IX86_BUILTIN_PALIGNR: + case IX86_BUILTIN_PALIGNR128: + if (fcode == IX86_BUILTIN_PALIGNR) + { + icode = CODE_FOR_ssse3_palignrdi; + mode = DImode; + } + else + { + icode = CODE_FOR_ssse3_palignrti; + mode = V2DImode; + } + arg0 = TREE_VALUE (arglist); + arg1 = TREE_VALUE (TREE_CHAIN (arglist)); + arg2 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist))); + op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0); + op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0); + op2 = expand_expr (arg2, NULL_RTX, VOIDmode, 0); + tmode = insn_data[icode].operand[0].mode; + mode1 = insn_data[icode].operand[1].mode; + mode2 = insn_data[icode].operand[2].mode; + mode3 = insn_data[icode].operand[3].mode; + + if (! (*insn_data[icode].operand[1].predicate) (op0, mode1)) + { + op0 = copy_to_reg (op0); + op0 = simplify_gen_subreg (mode1, op0, GET_MODE (op0), 0); + } + if (! (*insn_data[icode].operand[2].predicate) (op1, mode2)) + { + op1 = copy_to_reg (op1); + op1 = simplify_gen_subreg (mode2, op1, GET_MODE (op1), 0); + } + if (! (*insn_data[icode].operand[3].predicate) (op2, mode3)) + { + error ("shift must be an immediate"); + return const0_rtx; + } + target = gen_reg_rtx (mode); + pat = GEN_FCN (icode) (simplify_gen_subreg (tmode, target, mode, 0), + op0, op1, op2); + if (! pat) + return 0; + emit_insn (pat); + return target; + case IX86_BUILTIN_VEC_INIT_V2SI: case IX86_BUILTIN_VEC_INIT_V4HI: case IX86_BUILTIN_VEC_INIT_V8QI: diff --git a/contrib/gcc/config/i386/i386.h b/contrib/gcc/config/i386/i386.h index 5f1b58f6c..b97701479 100644 --- a/contrib/gcc/config/i386/i386.h +++ b/contrib/gcc/config/i386/i386.h @@ -130,12 +130,14 @@ extern const struct processor_costs *ix86_cost; #define TARGET_486 (ix86_tune == PROCESSOR_I486) #define TARGET_PENTIUM (ix86_tune == PROCESSOR_PENTIUM) #define TARGET_PENTIUMPRO (ix86_tune == PROCESSOR_PENTIUMPRO) +#define TARGET_GEODE (ix86_tune == PROCESSOR_GEODE) #define TARGET_K6 (ix86_tune == PROCESSOR_K6) #define TARGET_ATHLON (ix86_tune == PROCESSOR_ATHLON) #define TARGET_PENTIUM4 (ix86_tune == PROCESSOR_PENTIUM4) #define TARGET_K8 (ix86_tune == PROCESSOR_K8) #define TARGET_ATHLON_K8 (TARGET_K8 || TARGET_ATHLON) #define TARGET_NOCONA (ix86_tune == PROCESSOR_NOCONA) +#define TARGET_CORE2 (ix86_tune == PROCESSOR_CORE2) #define TARGET_GENERIC32 (ix86_tune == PROCESSOR_GENERIC32) #define TARGET_GENERIC64 (ix86_tune == PROCESSOR_GENERIC64) #define TARGET_GENERIC (TARGET_GENERIC32 || TARGET_GENERIC64) @@ -376,6 +378,10 @@ extern const char *host_detect_local_cpu (int argc, const char **argv); break; \ } \ } \ + else if (TARGET_GEODE) \ + { \ + builtin_define ("__tune_geode__"); \ + } \ else if (TARGET_K6) \ { \ builtin_define ("__tune_k6__"); \ @@ -397,6 +403,8 @@ extern const char *host_detect_local_cpu (int argc, const char **argv); builtin_define ("__tune_pentium4__"); \ else if (TARGET_NOCONA) \ builtin_define ("__tune_nocona__"); \ + else if (TARGET_CORE2) \ + builtin_define ("__tune_core2__"); \ \ if (TARGET_MMX) \ builtin_define ("__MMX__"); \ @@ -410,6 +418,8 @@ extern const char *host_detect_local_cpu (int argc, const char **argv); builtin_define ("__SSE2__"); \ if (TARGET_SSE3) \ builtin_define ("__SSE3__"); \ + if (TARGET_SSSE3) \ + builtin_define ("__SSSE3__"); \ if (TARGET_SSE_MATH && TARGET_SSE) \ builtin_define ("__SSE_MATH__"); \ if (TARGET_SSE_MATH && TARGET_SSE2) \ @@ -437,6 +447,11 @@ extern const char *host_detect_local_cpu (int argc, const char **argv); builtin_define ("__pentiumpro"); \ builtin_define ("__pentiumpro__"); \ } \ + else if (ix86_arch == PROCESSOR_GEODE) \ + { \ + builtin_define ("__geode"); \ + builtin_define ("__geode__"); \ + } \ else if (ix86_arch == PROCESSOR_K6) \ { \ \ @@ -470,6 +485,11 @@ extern const char *host_detect_local_cpu (int argc, const char **argv); builtin_define ("__nocona"); \ builtin_define ("__nocona__"); \ } \ + else if (ix86_arch == PROCESSOR_CORE2) \ + { \ + builtin_define ("__core2"); \ + builtin_define ("__core2__"); \ + } \ } \ while (0) @@ -481,23 +501,25 @@ extern const char *host_detect_local_cpu (int argc, const char **argv); #define TARGET_CPU_DEFAULT_pentium2 5 #define TARGET_CPU_DEFAULT_pentium3 6 #define TARGET_CPU_DEFAULT_pentium4 7 -#define TARGET_CPU_DEFAULT_k6 8 -#define TARGET_CPU_DEFAULT_k6_2 9 -#define TARGET_CPU_DEFAULT_k6_3 10 -#define TARGET_CPU_DEFAULT_athlon 11 -#define TARGET_CPU_DEFAULT_athlon_sse 12 -#define TARGET_CPU_DEFAULT_k8 13 -#define TARGET_CPU_DEFAULT_pentium_m 14 -#define TARGET_CPU_DEFAULT_prescott 15 -#define TARGET_CPU_DEFAULT_nocona 16 -#define TARGET_CPU_DEFAULT_generic 17 +#define TARGET_CPU_DEFAULT_geode 8 +#define TARGET_CPU_DEFAULT_k6 9 +#define TARGET_CPU_DEFAULT_k6_2 10 +#define TARGET_CPU_DEFAULT_k6_3 11 +#define TARGET_CPU_DEFAULT_athlon 12 +#define TARGET_CPU_DEFAULT_athlon_sse 13 +#define TARGET_CPU_DEFAULT_k8 14 +#define TARGET_CPU_DEFAULT_pentium_m 15 +#define TARGET_CPU_DEFAULT_prescott 16 +#define TARGET_CPU_DEFAULT_nocona 17 +#define TARGET_CPU_DEFAULT_core2 18 +#define TARGET_CPU_DEFAULT_generic 19 #define TARGET_CPU_DEFAULT_NAMES {"i386", "i486", "pentium", "pentium-mmx",\ "pentiumpro", "pentium2", "pentium3", \ - "pentium4", "k6", "k6-2", "k6-3",\ + "pentium4", "geode", "k6", "k6-2", "k6-3", \ "athlon", "athlon-4", "k8", \ "pentium-m", "prescott", "nocona", \ - "generic"} + "core2", "generic"} #ifndef CC1_SPEC #define CC1_SPEC "%(cc1_cpu) " @@ -2077,11 +2099,13 @@ enum processor_type PROCESSOR_I486, /* 80486DX, 80486SX, 80486DX[24] */ PROCESSOR_PENTIUM, PROCESSOR_PENTIUMPRO, + PROCESSOR_GEODE, PROCESSOR_K6, PROCESSOR_ATHLON, PROCESSOR_PENTIUM4, PROCESSOR_K8, PROCESSOR_NOCONA, + PROCESSOR_CORE2, PROCESSOR_GENERIC32, PROCESSOR_GENERIC64, PROCESSOR_max diff --git a/contrib/gcc/config/i386/i386.md b/contrib/gcc/config/i386/i386.md index f7481ed8c..bd81c4aa7 100644 --- a/contrib/gcc/config/i386/i386.md +++ b/contrib/gcc/config/i386/i386.md @@ -148,6 +148,11 @@ (UNSPEC_SP_TEST 101) (UNSPEC_SP_TLS_SET 102) (UNSPEC_SP_TLS_TEST 103) + + ; SSSE3 + (UNSPEC_PSHUFB 120) + (UNSPEC_PSIGN 121) + (UNSPEC_PALIGNR 122) ]) (define_constants @@ -187,7 +192,7 @@ ;; Processor type. This attribute must exactly match the processor_type ;; enumeration in i386.h. -(define_attr "cpu" "i386,i486,pentium,pentiumpro,k6,athlon,pentium4,k8,nocona,generic32,generic64" +(define_attr "cpu" "i386,i486,pentium,pentiumpro,geode,k6,athlon,pentium4,k8,nocona,core2,generic32,generic64" (const (symbol_ref "ix86_tune"))) ;; A basic instruction type. Refinements due to arguments to be @@ -473,6 +478,7 @@ (include "ppro.md") (include "k6.md") (include "athlon.md") +(include "geode.md") ;; Operand and operator predicates and constraints @@ -20947,6 +20953,6 @@ } [(set_attr "type" "multi")]) -(include "sse.md") (include "mmx.md") +(include "sse.md") (include "sync.md") diff --git a/contrib/gcc/config/i386/i386.opt b/contrib/gcc/config/i386/i386.opt index 2922f5d2f..aa249205e 100644 --- a/contrib/gcc/config/i386/i386.opt +++ b/contrib/gcc/config/i386/i386.opt @@ -197,6 +197,10 @@ msse3 Target Report Mask(SSE3) Support MMX, SSE, SSE2 and SSE3 built-in functions and code generation +mssse3 +Target Report Mask(SSSE3) +Support MMX, SSE, SSE2, SSE3 and SSSE3 built-in functions and code generation + msseregparm Target RejectNegative Mask(SSEREGPARM) Use SSE register passing conventions for SF and DF mode diff --git a/contrib/gcc/config/i386/sse.md b/contrib/gcc/config/i386/sse.md index 15817fcb1..4e25280ce 100644 --- a/contrib/gcc/config/i386/sse.md +++ b/contrib/gcc/config/i386/sse.md @@ -3949,3 +3949,578 @@ ;; zero extended to 64bit, we only need to set up 32bit registers. "monitor" [(set_attr "length" "3")]) + +;; SSSE3 +(define_insn "ssse3_phaddwv8hi3" + [(set (match_operand:V8HI 0 "register_operand" "=x") + (vec_concat:V8HI + (vec_concat:V4HI + (vec_concat:V2HI + (plus:HI + (vec_select:HI + (match_operand:V8HI 1 "register_operand" "0") + (parallel [(const_int 0)])) + (vec_select:HI (match_dup 1) (parallel [(const_int 1)]))) + (plus:HI + (vec_select:HI (match_dup 1) (parallel [(const_int 2)])) + (vec_select:HI (match_dup 1) (parallel [(const_int 3)])))) + (vec_concat:V2HI + (plus:HI + (vec_select:HI (match_dup 1) (parallel [(const_int 4)])) + (vec_select:HI (match_dup 1) (parallel [(const_int 5)]))) + (plus:HI + (vec_select:HI (match_dup 1) (parallel [(const_int 6)])) + (vec_select:HI (match_dup 1) (parallel [(const_int 7)]))))) + (vec_concat:V4HI + (vec_concat:V2HI + (plus:HI + (vec_select:HI + (match_operand:V8HI 2 "nonimmediate_operand" "xm") + (parallel [(const_int 0)])) + (vec_select:HI (match_dup 2) (parallel [(const_int 1)]))) + (plus:HI + (vec_select:HI (match_dup 2) (parallel [(const_int 2)])) + (vec_select:HI (match_dup 2) (parallel [(const_int 3)])))) + (vec_concat:V2HI + (plus:HI + (vec_select:HI (match_dup 2) (parallel [(const_int 4)])) + (vec_select:HI (match_dup 2) (parallel [(const_int 5)]))) + (plus:HI + (vec_select:HI (match_dup 2) (parallel [(const_int 6)])) + (vec_select:HI (match_dup 2) (parallel [(const_int 7)])))))))] + "TARGET_SSSE3" + "phaddw\t{%2, %0|%0, %2}" + [(set_attr "type" "sseiadd") + (set_attr "mode" "TI")]) + +(define_insn "ssse3_phaddwv4hi3" + [(set (match_operand:V4HI 0 "register_operand" "=y") + (vec_concat:V4HI + (vec_concat:V2HI + (plus:HI + (vec_select:HI + (match_operand:V4HI 1 "register_operand" "0") + (parallel [(const_int 0)])) + (vec_select:HI (match_dup 1) (parallel [(const_int 1)]))) + (plus:HI + (vec_select:HI (match_dup 1) (parallel [(const_int 2)])) + (vec_select:HI (match_dup 1) (parallel [(const_int 3)])))) + (vec_concat:V2HI + (plus:HI + (vec_select:HI + (match_operand:V4HI 2 "nonimmediate_operand" "ym") + (parallel [(const_int 0)])) + (vec_select:HI (match_dup 2) (parallel [(const_int 1)]))) + (plus:HI + (vec_select:HI (match_dup 2) (parallel [(const_int 2)])) + (vec_select:HI (match_dup 2) (parallel [(const_int 3)]))))))] + "TARGET_SSSE3" + "phaddw\t{%2, %0|%0, %2}" + [(set_attr "type" "sseiadd") + (set_attr "mode" "DI")]) + +(define_insn "ssse3_phadddv4si3" + [(set (match_operand:V4SI 0 "register_operand" "=x") + (vec_concat:V4SI + (vec_concat:V2SI + (plus:SI + (vec_select:SI + (match_operand:V4SI 1 "register_operand" "0") + (parallel [(const_int 0)])) + (vec_select:SI (match_dup 1) (parallel [(const_int 1)]))) + (plus:SI + (vec_select:SI (match_dup 1) (parallel [(const_int 2)])) + (vec_select:SI (match_dup 1) (parallel [(const_int 3)])))) + (vec_concat:V2SI + (plus:SI + (vec_select:SI + (match_operand:V4SI 2 "nonimmediate_operand" "xm") + (parallel [(const_int 0)])) + (vec_select:SI (match_dup 2) (parallel [(const_int 1)]))) + (plus:SI + (vec_select:SI (match_dup 2) (parallel [(const_int 2)])) + (vec_select:SI (match_dup 2) (parallel [(const_int 3)]))))))] + "TARGET_SSSE3" + "phaddd\t{%2, %0|%0, %2}" + [(set_attr "type" "sseiadd") + (set_attr "mode" "TI")]) + +(define_insn "ssse3_phadddv2si3" + [(set (match_operand:V2SI 0 "register_operand" "=y") + (vec_concat:V2SI + (plus:SI + (vec_select:SI + (match_operand:V2SI 1 "register_operand" "0") + (parallel [(const_int 0)])) + (vec_select:SI (match_dup 1) (parallel [(const_int 1)]))) + (plus:SI + (vec_select:SI + (match_operand:V2SI 2 "nonimmediate_operand" "ym") + (parallel [(const_int 0)])) + (vec_select:SI (match_dup 2) (parallel [(const_int 1)])))))] + "TARGET_SSSE3" + "phaddd\t{%2, %0|%0, %2}" + [(set_attr "type" "sseiadd") + (set_attr "mode" "DI")]) + +(define_insn "ssse3_phaddswv8hi3" + [(set (match_operand:V8HI 0 "register_operand" "=x") + (vec_concat:V8HI + (vec_concat:V4HI + (vec_concat:V2HI + (ss_plus:HI + (vec_select:HI + (match_operand:V8HI 1 "register_operand" "0") + (parallel [(const_int 0)])) + (vec_select:HI (match_dup 1) (parallel [(const_int 1)]))) + (ss_plus:HI + (vec_select:HI (match_dup 1) (parallel [(const_int 2)])) + (vec_select:HI (match_dup 1) (parallel [(const_int 3)])))) + (vec_concat:V2HI + (ss_plus:HI + (vec_select:HI (match_dup 1) (parallel [(const_int 4)])) + (vec_select:HI (match_dup 1) (parallel [(const_int 5)]))) + (ss_plus:HI + (vec_select:HI (match_dup 1) (parallel [(const_int 6)])) + (vec_select:HI (match_dup 1) (parallel [(const_int 7)]))))) + (vec_concat:V4HI + (vec_concat:V2HI + (ss_plus:HI + (vec_select:HI + (match_operand:V8HI 2 "nonimmediate_operand" "xm") + (parallel [(const_int 0)])) + (vec_select:HI (match_dup 2) (parallel [(const_int 1)]))) + (ss_plus:HI + (vec_select:HI (match_dup 2) (parallel [(const_int 2)])) + (vec_select:HI (match_dup 2) (parallel [(const_int 3)])))) + (vec_concat:V2HI + (ss_plus:HI + (vec_select:HI (match_dup 2) (parallel [(const_int 4)])) + (vec_select:HI (match_dup 2) (parallel [(const_int 5)]))) + (ss_plus:HI + (vec_select:HI (match_dup 2) (parallel [(const_int 6)])) + (vec_select:HI (match_dup 2) (parallel [(const_int 7)])))))))] + "TARGET_SSSE3" + "phaddsw\t{%2, %0|%0, %2}" + [(set_attr "type" "sseiadd") + (set_attr "mode" "TI")]) + +(define_insn "ssse3_phaddswv4hi3" + [(set (match_operand:V4HI 0 "register_operand" "=y") + (vec_concat:V4HI + (vec_concat:V2HI + (ss_plus:HI + (vec_select:HI + (match_operand:V4HI 1 "register_operand" "0") + (parallel [(const_int 0)])) + (vec_select:HI (match_dup 1) (parallel [(const_int 1)]))) + (ss_plus:HI + (vec_select:HI (match_dup 1) (parallel [(const_int 2)])) + (vec_select:HI (match_dup 1) (parallel [(const_int 3)])))) + (vec_concat:V2HI + (ss_plus:HI + (vec_select:HI + (match_operand:V4HI 2 "nonimmediate_operand" "ym") + (parallel [(const_int 0)])) + (vec_select:HI (match_dup 2) (parallel [(const_int 1)]))) + (ss_plus:HI + (vec_select:HI (match_dup 2) (parallel [(const_int 2)])) + (vec_select:HI (match_dup 2) (parallel [(const_int 3)]))))))] + "TARGET_SSSE3" + "phaddsw\t{%2, %0|%0, %2}" + [(set_attr "type" "sseiadd") + (set_attr "mode" "DI")]) + +(define_insn "ssse3_phsubwv8hi3" + [(set (match_operand:V8HI 0 "register_operand" "=x") + (vec_concat:V8HI + (vec_concat:V4HI + (vec_concat:V2HI + (minus:HI + (vec_select:HI + (match_operand:V8HI 1 "register_operand" "0") + (parallel [(const_int 0)])) + (vec_select:HI (match_dup 1) (parallel [(const_int 1)]))) + (minus:HI + (vec_select:HI (match_dup 1) (parallel [(const_int 2)])) + (vec_select:HI (match_dup 1) (parallel [(const_int 3)])))) + (vec_concat:V2HI + (minus:HI + (vec_select:HI (match_dup 1) (parallel [(const_int 4)])) + (vec_select:HI (match_dup 1) (parallel [(const_int 5)]))) + (minus:HI + (vec_select:HI (match_dup 1) (parallel [(const_int 6)])) + (vec_select:HI (match_dup 1) (parallel [(const_int 7)]))))) + (vec_concat:V4HI + (vec_concat:V2HI + (minus:HI + (vec_select:HI + (match_operand:V8HI 2 "nonimmediate_operand" "xm") + (parallel [(const_int 0)])) + (vec_select:HI (match_dup 2) (parallel [(const_int 1)]))) + (minus:HI + (vec_select:HI (match_dup 2) (parallel [(const_int 2)])) + (vec_select:HI (match_dup 2) (parallel [(const_int 3)])))) + (vec_concat:V2HI + (minus:HI + (vec_select:HI (match_dup 2) (parallel [(const_int 4)])) + (vec_select:HI (match_dup 2) (parallel [(const_int 5)]))) + (minus:HI + (vec_select:HI (match_dup 2) (parallel [(const_int 6)])) + (vec_select:HI (match_dup 2) (parallel [(const_int 7)])))))))] + "TARGET_SSSE3" + "phsubw\t{%2, %0|%0, %2}" + [(set_attr "type" "sseiadd") + (set_attr "mode" "TI")]) + +(define_insn "ssse3_phsubwv4hi3" + [(set (match_operand:V4HI 0 "register_operand" "=y") + (vec_concat:V4HI + (vec_concat:V2HI + (minus:HI + (vec_select:HI + (match_operand:V4HI 1 "register_operand" "0") + (parallel [(const_int 0)])) + (vec_select:HI (match_dup 1) (parallel [(const_int 1)]))) + (minus:HI + (vec_select:HI (match_dup 1) (parallel [(const_int 2)])) + (vec_select:HI (match_dup 1) (parallel [(const_int 3)])))) + (vec_concat:V2HI + (minus:HI + (vec_select:HI + (match_operand:V4HI 2 "nonimmediate_operand" "ym") + (parallel [(const_int 0)])) + (vec_select:HI (match_dup 2) (parallel [(const_int 1)]))) + (minus:HI + (vec_select:HI (match_dup 2) (parallel [(const_int 2)])) + (vec_select:HI (match_dup 2) (parallel [(const_int 3)]))))))] + "TARGET_SSSE3" + "phsubw\t{%2, %0|%0, %2}" + [(set_attr "type" "sseiadd") + (set_attr "mode" "DI")]) + +(define_insn "ssse3_phsubdv4si3" + [(set (match_operand:V4SI 0 "register_operand" "=x") + (vec_concat:V4SI + (vec_concat:V2SI + (minus:SI + (vec_select:SI + (match_operand:V4SI 1 "register_operand" "0") + (parallel [(const_int 0)])) + (vec_select:SI (match_dup 1) (parallel [(const_int 1)]))) + (minus:SI + (vec_select:SI (match_dup 1) (parallel [(const_int 2)])) + (vec_select:SI (match_dup 1) (parallel [(const_int 3)])))) + (vec_concat:V2SI + (minus:SI + (vec_select:SI + (match_operand:V4SI 2 "nonimmediate_operand" "xm") + (parallel [(const_int 0)])) + (vec_select:SI (match_dup 2) (parallel [(const_int 1)]))) + (minus:SI + (vec_select:SI (match_dup 2) (parallel [(const_int 2)])) + (vec_select:SI (match_dup 2) (parallel [(const_int 3)]))))))] + "TARGET_SSSE3" + "phsubd\t{%2, %0|%0, %2}" + [(set_attr "type" "sseiadd") + (set_attr "mode" "TI")]) + +(define_insn "ssse3_phsubdv2si3" + [(set (match_operand:V2SI 0 "register_operand" "=y") + (vec_concat:V2SI + (minus:SI + (vec_select:SI + (match_operand:V2SI 1 "register_operand" "0") + (parallel [(const_int 0)])) + (vec_select:SI (match_dup 1) (parallel [(const_int 1)]))) + (minus:SI + (vec_select:SI + (match_operand:V2SI 2 "nonimmediate_operand" "ym") + (parallel [(const_int 0)])) + (vec_select:SI (match_dup 2) (parallel [(const_int 1)])))))] + "TARGET_SSSE3" + "phsubd\t{%2, %0|%0, %2}" + [(set_attr "type" "sseiadd") + (set_attr "mode" "DI")]) + +(define_insn "ssse3_phsubswv8hi3" + [(set (match_operand:V8HI 0 "register_operand" "=x") + (vec_concat:V8HI + (vec_concat:V4HI + (vec_concat:V2HI + (ss_minus:HI + (vec_select:HI + (match_operand:V8HI 1 "register_operand" "0") + (parallel [(const_int 0)])) + (vec_select:HI (match_dup 1) (parallel [(const_int 1)]))) + (ss_minus:HI + (vec_select:HI (match_dup 1) (parallel [(const_int 2)])) + (vec_select:HI (match_dup 1) (parallel [(const_int 3)])))) + (vec_concat:V2HI + (ss_minus:HI + (vec_select:HI (match_dup 1) (parallel [(const_int 4)])) + (vec_select:HI (match_dup 1) (parallel [(const_int 5)]))) + (ss_minus:HI + (vec_select:HI (match_dup 1) (parallel [(const_int 6)])) + (vec_select:HI (match_dup 1) (parallel [(const_int 7)]))))) + (vec_concat:V4HI + (vec_concat:V2HI + (ss_minus:HI + (vec_select:HI + (match_operand:V8HI 2 "nonimmediate_operand" "xm") + (parallel [(const_int 0)])) + (vec_select:HI (match_dup 2) (parallel [(const_int 1)]))) + (ss_minus:HI + (vec_select:HI (match_dup 2) (parallel [(const_int 2)])) + (vec_select:HI (match_dup 2) (parallel [(const_int 3)])))) + (vec_concat:V2HI + (ss_minus:HI + (vec_select:HI (match_dup 2) (parallel [(const_int 4)])) + (vec_select:HI (match_dup 2) (parallel [(const_int 5)]))) + (ss_minus:HI + (vec_select:HI (match_dup 2) (parallel [(const_int 6)])) + (vec_select:HI (match_dup 2) (parallel [(const_int 7)])))))))] + "TARGET_SSSE3" + "phsubsw\t{%2, %0|%0, %2}" + [(set_attr "type" "sseiadd") + (set_attr "mode" "TI")]) + +(define_insn "ssse3_phsubswv4hi3" + [(set (match_operand:V4HI 0 "register_operand" "=y") + (vec_concat:V4HI + (vec_concat:V2HI + (ss_minus:HI + (vec_select:HI + (match_operand:V4HI 1 "register_operand" "0") + (parallel [(const_int 0)])) + (vec_select:HI (match_dup 1) (parallel [(const_int 1)]))) + (ss_minus:HI + (vec_select:HI (match_dup 1) (parallel [(const_int 2)])) + (vec_select:HI (match_dup 1) (parallel [(const_int 3)])))) + (vec_concat:V2HI + (ss_minus:HI + (vec_select:HI + (match_operand:V4HI 2 "nonimmediate_operand" "ym") + (parallel [(const_int 0)])) + (vec_select:HI (match_dup 2) (parallel [(const_int 1)]))) + (ss_minus:HI + (vec_select:HI (match_dup 2) (parallel [(const_int 2)])) + (vec_select:HI (match_dup 2) (parallel [(const_int 3)]))))))] + "TARGET_SSSE3" + "phsubsw\t{%2, %0|%0, %2}" + [(set_attr "type" "sseiadd") + (set_attr "mode" "DI")]) + +(define_insn "ssse3_pmaddubswv8hi3" + [(set (match_operand:V8HI 0 "register_operand" "=x") + (ss_plus:V8HI + (mult:V8HI + (zero_extend:V8HI + (vec_select:V4QI + (match_operand:V16QI 1 "nonimmediate_operand" "%0") + (parallel [(const_int 0) + (const_int 2) + (const_int 4) + (const_int 6) + (const_int 8) + (const_int 10) + (const_int 12) + (const_int 14)]))) + (sign_extend:V8HI + (vec_select:V8QI + (match_operand:V16QI 2 "nonimmediate_operand" "xm") + (parallel [(const_int 0) + (const_int 2) + (const_int 4) + (const_int 6) + (const_int 8) + (const_int 10) + (const_int 12) + (const_int 14)])))) + (mult:V8HI + (zero_extend:V8HI + (vec_select:V16QI (match_dup 1) + (parallel [(const_int 1) + (const_int 3) + (const_int 5) + (const_int 7) + (const_int 9) + (const_int 11) + (const_int 13) + (const_int 15)]))) + (sign_extend:V8HI + (vec_select:V16QI (match_dup 2) + (parallel [(const_int 1) + (const_int 3) + (const_int 5) + (const_int 7) + (const_int 9) + (const_int 11) + (const_int 13) + (const_int 15)]))))))] + "TARGET_SSSE3" + "pmaddubsw\t{%2, %0|%0, %2}" + [(set_attr "type" "sseiadd") + (set_attr "mode" "TI")]) + +(define_insn "ssse3_pmaddubswv4hi3" + [(set (match_operand:V4HI 0 "register_operand" "=y") + (ss_plus:V4HI + (mult:V4HI + (zero_extend:V4HI + (vec_select:V4QI + (match_operand:V8QI 1 "nonimmediate_operand" "%0") + (parallel [(const_int 0) + (const_int 2) + (const_int 4) + (const_int 6)]))) + (sign_extend:V4HI + (vec_select:V4QI + (match_operand:V8QI 2 "nonimmediate_operand" "ym") + (parallel [(const_int 0) + (const_int 2) + (const_int 4) + (const_int 6)])))) + (mult:V4HI + (zero_extend:V4HI + (vec_select:V8QI (match_dup 1) + (parallel [(const_int 1) + (const_int 3) + (const_int 5) + (const_int 7)]))) + (sign_extend:V4HI + (vec_select:V8QI (match_dup 2) + (parallel [(const_int 1) + (const_int 3) + (const_int 5) + (const_int 7)]))))))] + "TARGET_SSSE3" + "pmaddubsw\t{%2, %0|%0, %2}" + [(set_attr "type" "sseiadd") + (set_attr "mode" "DI")]) + +(define_insn "ssse3_pmulhrswv8hi3" + [(set (match_operand:V8HI 0 "register_operand" "=x") + (truncate:V8HI + (lshiftrt:V8SI + (plus:V8SI + (lshiftrt:V8SI + (mult:V8SI + (sign_extend:V8SI + (match_operand:V8HI 1 "nonimmediate_operand" "%0")) + (sign_extend:V8SI + (match_operand:V8HI 2 "nonimmediate_operand" "xm"))) + (const_int 14)) + (const_vector:V8HI [(const_int 1) (const_int 1) + (const_int 1) (const_int 1) + (const_int 1) (const_int 1) + (const_int 1) (const_int 1)])) + (const_int 1))))] + "TARGET_SSSE3 && ix86_binary_operator_ok (MULT, V8HImode, operands)" + "pmulhrsw\t{%2, %0|%0, %2}" + [(set_attr "type" "sseimul") + (set_attr "mode" "TI")]) + +(define_insn "ssse3_pmulhrswv4hi3" + [(set (match_operand:V4HI 0 "register_operand" "=y") + (truncate:V4HI + (lshiftrt:V4SI + (plus:V4SI + (lshiftrt:V4SI + (mult:V4SI + (sign_extend:V4SI + (match_operand:V4HI 1 "nonimmediate_operand" "%0")) + (sign_extend:V4SI + (match_operand:V4HI 2 "nonimmediate_operand" "ym"))) + (const_int 14)) + (const_vector:V4HI [(const_int 1) (const_int 1) + (const_int 1) (const_int 1)])) + (const_int 1))))] + "TARGET_SSSE3 && ix86_binary_operator_ok (MULT, V4HImode, operands)" + "pmulhrsw\t{%2, %0|%0, %2}" + [(set_attr "type" "sseimul") + (set_attr "mode" "DI")]) + +(define_insn "ssse3_pshufbv16qi3" + [(set (match_operand:V16QI 0 "register_operand" "=x") + (unspec:V16QI [(match_operand:V16QI 1 "register_operand" "0") + (match_operand:V16QI 2 "nonimmediate_operand" "xm")] + UNSPEC_PSHUFB))] + "TARGET_SSSE3" + "pshufb\t{%2, %0|%0, %2}"; + [(set_attr "type" "sselog1") + (set_attr "mode" "TI")]) + +(define_insn "ssse3_pshufbv8qi3" + [(set (match_operand:V8QI 0 "register_operand" "=y") + (unspec:V8QI [(match_operand:V8QI 1 "register_operand" "0") + (match_operand:V8QI 2 "nonimmediate_operand" "ym")] + UNSPEC_PSHUFB))] + "TARGET_SSSE3" + "pshufb\t{%2, %0|%0, %2}"; + [(set_attr "type" "sselog1") + (set_attr "mode" "DI")]) + +(define_insn "ssse3_psign3" + [(set (match_operand:SSEMODE124 0 "register_operand" "=x") + (unspec:SSEMODE124 [(match_operand:SSEMODE124 1 "register_operand" "0") + (match_operand:SSEMODE124 2 "nonimmediate_operand" "xm")] + UNSPEC_PSIGN))] + "TARGET_SSSE3" + "psign\t{%2, %0|%0, %2}"; + [(set_attr "type" "sselog1") + (set_attr "mode" "TI")]) + +(define_insn "ssse3_psign3" + [(set (match_operand:MMXMODEI 0 "register_operand" "=y") + (unspec:MMXMODEI [(match_operand:MMXMODEI 1 "register_operand" "0") + (match_operand:MMXMODEI 2 "nonimmediate_operand" "ym")] + UNSPEC_PSIGN))] + "TARGET_SSSE3" + "psign\t{%2, %0|%0, %2}"; + [(set_attr "type" "sselog1") + (set_attr "mode" "DI")]) + +(define_insn "ssse3_palignrti" + [(set (match_operand:TI 0 "register_operand" "=x") + (unspec:TI [(match_operand:TI 1 "register_operand" "0") + (match_operand:TI 2 "nonimmediate_operand" "xm") + (match_operand:SI 3 "const_0_to_255_mul_8_operand" "n")] + UNSPEC_PALIGNR))] + "TARGET_SSSE3" +{ + operands[3] = GEN_INT (INTVAL (operands[3]) / 8); + return "palignr\t{%3, %2, %0|%0, %2, %3}"; +} + [(set_attr "type" "sseishft") + (set_attr "mode" "TI")]) + +(define_insn "ssse3_palignrdi" + [(set (match_operand:DI 0 "register_operand" "=y") + (unspec:DI [(match_operand:DI 1 "register_operand" "0") + (match_operand:DI 2 "nonimmediate_operand" "ym") + (match_operand:SI 3 "const_0_to_255_mul_8_operand" "n")] + UNSPEC_PALIGNR))] + "TARGET_SSSE3" +{ + operands[3] = GEN_INT (INTVAL (operands[3]) / 8); + return "palignr\t{%3, %2, %0|%0, %2, %3}"; +} + [(set_attr "type" "sseishft") + (set_attr "mode" "DI")]) + +(define_insn "abs2" + [(set (match_operand:SSEMODE124 0 "register_operand" "=x") + (abs:SSEMODE124 (match_operand:SSEMODE124 1 "nonimmediate_operand" "xm")))] + "TARGET_SSSE3" + "pabs\t{%1, %0|%0, %1}"; + [(set_attr "type" "sselog1") + (set_attr "mode" "TI")]) + +(define_insn "abs2" + [(set (match_operand:MMXMODEI 0 "register_operand" "=y") + (abs:MMXMODEI (match_operand:MMXMODEI 1 "nonimmediate_operand" "ym")))] + "TARGET_SSSE3" + "pabs\t{%1, %0|%0, %1}"; + [(set_attr "type" "sselog1") + (set_attr "mode" "DI")]) diff --git a/contrib/gcc/config/i386/tmmintrin.h b/contrib/gcc/config/i386/tmmintrin.h new file mode 100644 index 000000000..e1bedc561 --- /dev/null +++ b/contrib/gcc/config/i386/tmmintrin.h @@ -0,0 +1,448 @@ +/* Copyright (C) 2006 Free Software Foundation, Inc. + + This file is part of GCC. + + GCC is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2, or (at your option) + any later version. + + GCC is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with GCC; see the file COPYING. If not, write to + the Free Software Foundation, 59 Temple Place - Suite 330, + Boston, MA 02111-1307, USA. */ + +/* As a special exception, if you include this header file into source + files compiled by GCC, this header file does not by itself cause + the resulting executable to be covered by the GNU General Public + License. This exception does not however invalidate any other + reasons why the executable file might be covered by the GNU General + Public License. */ + +/* Implemented from the specification included in the Intel C++ Compiler + User Guide and Reference, version 9.1. */ + +#ifndef _TMMINTRIN_H_INCLUDED +#define _TMMINTRIN_H_INCLUDED + +#ifdef __SSSE3__ +#include + +static __inline __m128i __attribute__((__always_inline__)) +_mm_hadd_epi16 (__m128i __X, __m128i __Y) +{ + return (__m128i) __builtin_ia32_phaddw128 ((__v8hi)__X, (__v8hi)__Y); +} + +static __inline __m128i __attribute__((__always_inline__)) +_mm_hadd_epi32 (__m128i __X, __m128i __Y) +{ + return (__m128i) __builtin_ia32_phaddd128 ((__v4si)__X, (__v4si)__Y); +} + +static __inline __m128i __attribute__((__always_inline__)) +_mm_hadds_epi16 (__m128i __X, __m128i __Y) +{ + return (__m128i) __builtin_ia32_phaddsw128 ((__v8hi)__X, (__v8hi)__Y); +} + +static __inline __m64 __attribute__((__always_inline__)) +_mm_hadd_pi16 (__m64 __X, __m64 __Y) +{ + return (__m64) __builtin_ia32_phaddw ((__v4hi)__X, (__v4hi)__Y); +} + +static __inline __m64 __attribute__((__always_inline__)) +_mm_hadd_pi32 (__m64 __X, __m64 __Y) +{ + return (__m64) __builtin_ia32_phaddd ((__v2si)__X, (__v2si)__Y); +} + +static __inline __m64 __attribute__((__always_inline__)) +_mm_hadds_pi16 (__m64 __X, __m64 __Y) +{ + return (__m64) __builtin_ia32_phaddsw ((__v4hi)__X, (__v4hi)__Y); +} + +static __inline __m128i __attribute__((__always_inline__)) +_mm_hsub_epi16 (__m128i __X, __m128i __Y) +{ + return (__m128i) __builtin_ia32_phsubw128 ((__v8hi)__X, (__v8hi)__Y); +} + +static __inline __m128i __attribute__((__always_inline__)) +_mm_hsub_epi32 (__m128i __X, __m128i __Y) +{ + return (__m128i) __builtin_ia32_phsubd128 ((__v4si)__X, (__v4si)__Y); +} + +static __inline __m128i __attribute__((__always_inline__)) +_mm_hsubs_epi16 (__m128i __X, __m128i __Y) +{ + return (__m128i) __builtin_ia32_phsubsw128 ((__v8hi)__X, (__v8hi)__Y); +} + +static __inline __m64 __attribute__((__always_inline__)) +_mm_hsub_pi16 (__m64 __X, __m64 __Y) +{ + return (__m64) __builtin_ia32_phsubw ((__v4hi)__X, (__v4hi)__Y); +} + +static __inline __m64 __attribute__((__always_inline__)) +_mm_hsub_pi32 (__m64 __X, __m64 __Y) +{ + return (__m64) __builtin_ia32_phsubd ((__v2si)__X, (__v2si)__Y); +} + +static __inline __m64 __attribute__((__always_inline__)) +_mm_hsubs_pi16 (__m64 __X, __m64 __Y) +{ + return (__m64) __builtin_ia32_phsubsw ((__v4hi)__X, (__v4hi)__Y); +} + +static __inline __m128i __attribute__((__always_inline__)) +_mm_maddubs_epi16 (__m128i __X, __m128i __Y) +{ + return (__m128i) __builtin_ia32_pmaddubsw128 ((__v16qi)__X, (__v16qi)__Y); +} + +static __inline __m64 __attribute__((__always_inline__)) +_mm_maddubs_pi16 (__m64 __X, __m64 __Y) +{ + return (__m64) __builtin_ia32_pmaddubsw ((__v8qi)__X, (__v8qi)__Y); +} + +static __inline __m128i __attribute__((__always_inline__)) +_mm_mulhrs_epi16 (__m128i __X, __m128i __Y) +{ + return (__m128i) __builtin_ia32_pmulhrsw128 ((__v8hi)__X, (__v8hi)__Y); +} + +static __inline __m64 __attribute__((__always_inline__)) +_mm_mulhrs_pi16 (__m64 __X, __m64 __Y) +{ + return (__m64) __builtin_ia32_pmulhrsw ((__v4hi)__X, (__v4hi)__Y); +} + +static __inline __m128i __attribute__((__always_inline__)) +_mm_shuffle_epi8 (__m128i __X, __m128i __Y) +{ + return (__m128i) __builtin_ia32_pshufb128 ((__v16qi)__X, (__v16qi)__Y); +} + +static __inline __m64 __attribute__((__always_inline__)) +_mm_shuffle_pi8 (__m64 __X, __m64 __Y) +{ + return (__m64) __builtin_ia32_pshufb ((__v8qi)__X, (__v8qi)__Y); +} + +static __inline __m128i __attribute__((__always_inline__)) +_mm_sign_epi8 (__m128i __X, __m128i __Y) +{ + return (__m128i) __builtin_ia32_psignb128 ((__v16qi)__X, (__v16qi)__Y); +} + +static __inline __m128i __attribute__((__always_inline__)) +_mm_sign_epi16 (__m128i __X, __m128i __Y) +{ + return (__m128i) __builtin_ia32_psignw128 ((__v8hi)__X, (__v8hi)__Y); +} + +static __inline __m128i __attribute__((__always_inline__)) +_mm_sign_epi32 (__m128i __X, __m128i __Y) +{ + return (__m128i) __builtin_ia32_psignd128 ((__v4si)__X, (__v4si)__Y); +} + +static __inline __m64 __attribute__((__always_inline__)) +_mm_sign_pi8 (__m64 __X, __m64 __Y) +{ + return (__m64) __builtin_ia32_psignb ((__v8qi)__X, (__v8qi)__Y); +} + +static __inline __m64 __attribute__((__always_inline__)) +_mm_sign_pi16 (__m64 __X, __m64 __Y) +{ + return (__m64) __builtin_ia32_psignw ((__v4hi)__X, (__v4hi)__Y); +} + +static __inline __m64 __attribute__((__always_inline__)) +_mm_sign_pi32 (__m64 __X, __m64 __Y) +{ + return (__m64) __builtin_ia32_psignd ((__v2si)__X, (__v2si)__Y); +} + +#define _mm_alignr_epi8(__X, __Y, __N) \ + ((__m128i)__builtin_ia32_palignr128 ((__v2di) __X, (__v2di) __Y, (__N) * 8)) + +#define _mm_alignr_pi8(__X, __Y, __N) \ + ((__m64)__builtin_ia32_palignr ((long long) (__X), (long long) (__Y), (__N) * 8)) + +static __inline __m128i __attribute__((__always_inline__)) +_mm_abs_epi8 (__m128i __X) +{ + return (__m128i) __builtin_ia32_pabsb128 ((__v16qi)__X); +} + +static __inline __m128i __attribute__((__always_inline__)) +_mm_abs_epi16 (__m128i __X) +{ + return (__m128i) __builtin_ia32_pabsw128 ((__v8hi)__X); +} + +static __inline __m128i __attribute__((__always_inline__)) +_mm_abs_epi32 (__m128i __X) +{ + return (__m128i) __builtin_ia32_pabsd128 ((__v4si)__X); +} + +static __inline __m64 __attribute__((__always_inline__)) +_mm_abs_pi8 (__m64 __X) +{ + return (__m64) __builtin_ia32_pabsb ((__v8qi)__X); +} + +static __inline __m64 __attribute__((__always_inline__)) +_mm_abs_pi16 (__m64 __X) +{ + return (__m64) __builtin_ia32_pabsw ((__v4hi)__X); +} + +static __inline __m64 __attribute__((__always_inline__)) +_mm_abs_pi32 (__m64 __X) +{ + return (__m64) __builtin_ia32_pabsd ((__v2si)__X); +} + +#endif /* __SSSE3__ */ + +#endif /* _TMMINTRIN_H_INCLUDED */ +/* Copyright (C) 2006 Free Software Foundation, Inc. + + This file is part of GCC. + + GCC is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2, or (at your option) + any later version. + + GCC is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with GCC; see the file COPYING. If not, write to + the Free Software Foundation, 59 Temple Place - Suite 330, + Boston, MA 02111-1307, USA. */ + +/* As a special exception, if you include this header file into source + files compiled by GCC, this header file does not by itself cause + the resulting executable to be covered by the GNU General Public + License. This exception does not however invalidate any other + reasons why the executable file might be covered by the GNU General + Public License. */ + +/* Implemented from the specification included in the Intel C++ Compiler + User Guide and Reference, version 9.1. */ + +#ifndef _TMMINTRIN_H_INCLUDED +#define _TMMINTRIN_H_INCLUDED + +#ifdef __SSSE3__ +#include + +static __inline __m128i __attribute__((__always_inline__)) +_mm_hadd_epi16 (__m128i __X, __m128i __Y) +{ + return (__m128i) __builtin_ia32_phaddw128 ((__v8hi)__X, (__v8hi)__Y); +} + +static __inline __m128i __attribute__((__always_inline__)) +_mm_hadd_epi32 (__m128i __X, __m128i __Y) +{ + return (__m128i) __builtin_ia32_phaddd128 ((__v4si)__X, (__v4si)__Y); +} + +static __inline __m128i __attribute__((__always_inline__)) +_mm_hadds_epi16 (__m128i __X, __m128i __Y) +{ + return (__m128i) __builtin_ia32_phaddsw128 ((__v8hi)__X, (__v8hi)__Y); +} + +static __inline __m64 __attribute__((__always_inline__)) +_mm_hadd_pi16 (__m64 __X, __m64 __Y) +{ + return (__m64) __builtin_ia32_phaddw ((__v4hi)__X, (__v4hi)__Y); +} + +static __inline __m64 __attribute__((__always_inline__)) +_mm_hadd_pi32 (__m64 __X, __m64 __Y) +{ + return (__m64) __builtin_ia32_phaddd ((__v2si)__X, (__v2si)__Y); +} + +static __inline __m64 __attribute__((__always_inline__)) +_mm_hadds_pi16 (__m64 __X, __m64 __Y) +{ + return (__m64) __builtin_ia32_phaddsw ((__v4hi)__X, (__v4hi)__Y); +} + +static __inline __m128i __attribute__((__always_inline__)) +_mm_hsub_epi16 (__m128i __X, __m128i __Y) +{ + return (__m128i) __builtin_ia32_phsubw128 ((__v8hi)__X, (__v8hi)__Y); +} + +static __inline __m128i __attribute__((__always_inline__)) +_mm_hsub_epi32 (__m128i __X, __m128i __Y) +{ + return (__m128i) __builtin_ia32_phsubd128 ((__v4si)__X, (__v4si)__Y); +} + +static __inline __m128i __attribute__((__always_inline__)) +_mm_hsubs_epi16 (__m128i __X, __m128i __Y) +{ + return (__m128i) __builtin_ia32_phsubsw128 ((__v8hi)__X, (__v8hi)__Y); +} + +static __inline __m64 __attribute__((__always_inline__)) +_mm_hsub_pi16 (__m64 __X, __m64 __Y) +{ + return (__m64) __builtin_ia32_phsubw ((__v4hi)__X, (__v4hi)__Y); +} + +static __inline __m64 __attribute__((__always_inline__)) +_mm_hsub_pi32 (__m64 __X, __m64 __Y) +{ + return (__m64) __builtin_ia32_phsubd ((__v2si)__X, (__v2si)__Y); +} + +static __inline __m64 __attribute__((__always_inline__)) +_mm_hsubs_pi16 (__m64 __X, __m64 __Y) +{ + return (__m64) __builtin_ia32_phsubsw ((__v4hi)__X, (__v4hi)__Y); +} + +static __inline __m128i __attribute__((__always_inline__)) +_mm_maddubs_epi16 (__m128i __X, __m128i __Y) +{ + return (__m128i) __builtin_ia32_pmaddubsw128 ((__v16qi)__X, (__v16qi)__Y); +} + +static __inline __m64 __attribute__((__always_inline__)) +_mm_maddubs_pi16 (__m64 __X, __m64 __Y) +{ + return (__m64) __builtin_ia32_pmaddubsw ((__v8qi)__X, (__v8qi)__Y); +} + +static __inline __m128i __attribute__((__always_inline__)) +_mm_mulhrs_epi16 (__m128i __X, __m128i __Y) +{ + return (__m128i) __builtin_ia32_pmulhrsw128 ((__v8hi)__X, (__v8hi)__Y); +} + +static __inline __m64 __attribute__((__always_inline__)) +_mm_mulhrs_pi16 (__m64 __X, __m64 __Y) +{ + return (__m64) __builtin_ia32_pmulhrsw ((__v4hi)__X, (__v4hi)__Y); +} + +static __inline __m128i __attribute__((__always_inline__)) +_mm_shuffle_epi8 (__m128i __X, __m128i __Y) +{ + return (__m128i) __builtin_ia32_pshufb128 ((__v16qi)__X, (__v16qi)__Y); +} + +static __inline __m64 __attribute__((__always_inline__)) +_mm_shuffle_pi8 (__m64 __X, __m64 __Y) +{ + return (__m64) __builtin_ia32_pshufb ((__v8qi)__X, (__v8qi)__Y); +} + +static __inline __m128i __attribute__((__always_inline__)) +_mm_sign_epi8 (__m128i __X, __m128i __Y) +{ + return (__m128i) __builtin_ia32_psignb128 ((__v16qi)__X, (__v16qi)__Y); +} + +static __inline __m128i __attribute__((__always_inline__)) +_mm_sign_epi16 (__m128i __X, __m128i __Y) +{ + return (__m128i) __builtin_ia32_psignw128 ((__v8hi)__X, (__v8hi)__Y); +} + +static __inline __m128i __attribute__((__always_inline__)) +_mm_sign_epi32 (__m128i __X, __m128i __Y) +{ + return (__m128i) __builtin_ia32_psignd128 ((__v4si)__X, (__v4si)__Y); +} + +static __inline __m64 __attribute__((__always_inline__)) +_mm_sign_pi8 (__m64 __X, __m64 __Y) +{ + return (__m64) __builtin_ia32_psignb ((__v8qi)__X, (__v8qi)__Y); +} + +static __inline __m64 __attribute__((__always_inline__)) +_mm_sign_pi16 (__m64 __X, __m64 __Y) +{ + return (__m64) __builtin_ia32_psignw ((__v4hi)__X, (__v4hi)__Y); +} + +static __inline __m64 __attribute__((__always_inline__)) +_mm_sign_pi32 (__m64 __X, __m64 __Y) +{ + return (__m64) __builtin_ia32_psignd ((__v2si)__X, (__v2si)__Y); +} + +#define _mm_alignr_epi8(__X, __Y, __N) \ + ((__m128i)__builtin_ia32_palignr128 ((__v2di) __X, (__v2di) __Y, (__N) * 8)) + +#define _mm_alignr_pi8(__X, __Y, __N) \ + ((__m64)__builtin_ia32_palignr ((long long) (__X), (long long) (__Y), (__N) * 8)) + +static __inline __m128i __attribute__((__always_inline__)) +_mm_abs_epi8 (__m128i __X) +{ + return (__m128i) __builtin_ia32_pabsb128 ((__v16qi)__X); +} + +static __inline __m128i __attribute__((__always_inline__)) +_mm_abs_epi16 (__m128i __X) +{ + return (__m128i) __builtin_ia32_pabsw128 ((__v8hi)__X); +} + +static __inline __m128i __attribute__((__always_inline__)) +_mm_abs_epi32 (__m128i __X) +{ + return (__m128i) __builtin_ia32_pabsd128 ((__v4si)__X); +} + +static __inline __m64 __attribute__((__always_inline__)) +_mm_abs_pi8 (__m64 __X) +{ + return (__m64) __builtin_ia32_pabsb ((__v8qi)__X); +} + +static __inline __m64 __attribute__((__always_inline__)) +_mm_abs_pi16 (__m64 __X) +{ + return (__m64) __builtin_ia32_pabsw ((__v4hi)__X); +} + +static __inline __m64 __attribute__((__always_inline__)) +_mm_abs_pi32 (__m64 __X) +{ + return (__m64) __builtin_ia32_pabsd ((__v2si)__X); +} + +#endif /* __SSSE3__ */ + +#endif /* _TMMINTRIN_H_INCLUDED */ diff --git a/contrib/gcc/config/i386/xmmintrin.h b/contrib/gcc/config/i386/xmmintrin.h index 87ec195e8..35417ec9b 100644 --- a/contrib/gcc/config/i386/xmmintrin.h +++ b/contrib/gcc/config/i386/xmmintrin.h @@ -1243,7 +1243,9 @@ do { \ } while (0) /* For backward source compatibility. */ +#ifdef __SSE2__ #include +#endif #endif /* __SSE__ */ #endif /* _XMMINTRIN_H_INCLUDED */ diff --git a/contrib/gcc/config/mips/predicates.md b/contrib/gcc/config/mips/predicates.md index 9a6756c20..bbc8e3cf8 100644 --- a/contrib/gcc/config/mips/predicates.md +++ b/contrib/gcc/config/mips/predicates.md @@ -116,7 +116,9 @@ /* If -mlong-calls, force all calls to use register addressing. Also, if this function has the long_call attribute, we must use register addressing. */ - return !TARGET_LONG_CALLS && !SYMBOL_REF_LONG_CALL_P (op); + return (!TARGET_LONG_CALLS + && !(GET_CODE (op) == SYMBOL_REF + && SYMBOL_REF_LONG_CALL_P (op))); case SYMBOL_GOT_GLOBAL: /* Without explicit relocs, there is no special syntax for diff --git a/contrib/gcc/config/rs6000/rs6000.c b/contrib/gcc/config/rs6000/rs6000.c index 6670a2034..5e705d007 100644 --- a/contrib/gcc/config/rs6000/rs6000.c +++ b/contrib/gcc/config/rs6000/rs6000.c @@ -664,6 +664,7 @@ static int rs6000_use_sched_lookahead (void); static tree rs6000_builtin_mask_for_load (void); static void def_builtin (int, const char *, tree, int); +static bool rs6000_vector_alignment_reachable (tree, bool); static void rs6000_init_builtins (void); static rtx rs6000_expand_unop_builtin (enum insn_code, tree, rtx); static rtx rs6000_expand_binop_builtin (enum insn_code, tree, rtx); @@ -915,6 +916,9 @@ static const char alt_reg_names[][8] = #undef TARGET_VECTORIZE_BUILTIN_MASK_FOR_LOAD #define TARGET_VECTORIZE_BUILTIN_MASK_FOR_LOAD rs6000_builtin_mask_for_load +#undef TARGET_VECTOR_ALIGNMENT_REACHABLE +#define TARGET_VECTOR_ALIGNMENT_REACHABLE rs6000_vector_alignment_reachable + #undef TARGET_INIT_BUILTINS #define TARGET_INIT_BUILTINS rs6000_init_builtins @@ -1584,6 +1588,37 @@ rs6000_builtin_mask_for_load (void) return 0; } + +/* Return true iff, data reference of TYPE can reach vector alignment (16) + after applying N number of iterations. This routine does not determine + how may iterations are required to reach desired alignment. */ + +static bool +rs6000_vector_alignment_reachable (tree type ATTRIBUTE_UNUSED, bool is_packed) +{ + if (is_packed) + return false; + + if (TARGET_32BIT) + { + if (rs6000_alignment_flags == MASK_ALIGN_NATURAL) + return true; + + if (rs6000_alignment_flags == MASK_ALIGN_POWER) + return true; + + return false; + } + else + { + if (TARGET_MACHO) + return false; + + /* Assuming that all other types are naturally aligned. CHECKME! */ + return true; + } +} + /* Handle generic options of the form -mfoo=yes/no. NAME is the option name. VALUE is the option value. diff --git a/contrib/gcc/config/s390/s390.md b/contrib/gcc/config/s390/s390.md index 23484098f..e8eb20684 100644 --- a/contrib/gcc/config/s390/s390.md +++ b/contrib/gcc/config/s390/s390.md @@ -1500,7 +1500,7 @@ && !s_operand (operands[1], VOIDmode)" [(set (match_dup 0) (match_dup 1))] { - rtx addr = operand_subword (operands[0], 1, 0, DFmode); + rtx addr = operand_subword (operands[0], 1, 0, TFmode); s390_load_address (addr, XEXP (operands[1], 0)); operands[1] = replace_equiv_address (operands[1], addr); }) @@ -5624,7 +5624,7 @@ "s390_match_ccmode(insn, CCTmode) && TARGET_64BIT" "@ xgr\t%0,%2 - xr\t%0,%2" + xg\t%0,%2" [(set_attr "op_type" "RRE,RXY")]) (define_insn "*xordi3_extimm" diff --git a/contrib/gcc/cp/ChangeLog b/contrib/gcc/cp/ChangeLog index 34c036f01..39875472b 100644 --- a/contrib/gcc/cp/ChangeLog +++ b/contrib/gcc/cp/ChangeLog @@ -1,3 +1,66 @@ +2007-08-24 Jakub Jelinek + + PR c++/31941 + * error.c (resolve_virtual_fun_from_obj_type_ref): Handle + TARGET_VTABLE_USES_DESCRIPTORS targets properly. + + PR c++/32898 + * name-lookup.c (set_decl_namespace): lookup_qualified_name failure + is error_mark_node rather than NULL_TREE. + * pt.c (check_explicit_specialization): Likewise. + +2007-08-22 Jason Merrill + + PR c++/29365 + * pt.c (outermost_tinst_level): New function. + * lex.c (in_main_input_context): New function. + * cp-tree.h: Declare it. + * decl2.c (constrain_class_visibility): Use it to avoid warning + about uses of the anonymous namespace in the main input file. + +2007-08-20 Jakub Jelinek + + PR c++/32992 + * typeck.c (check_return_expr): Don't NRV optimize vars in + anonymous unions. + * decl.c (finish_function): Comment fix. + +2007-08-18 Paolo Carlini + + PR c++/32112 + * error.c (dump_decl): Deal with UNBOUND_CLASS_TEMPLATE. + * cxx-pretty-print.c (pp_cxx_unqualified_id): Likewise. + +2007-08-10 Paolo Carlini + + PR c++/17763 + * error.c (dump_expr): Consistently use the *_cxx_* + variants of the pretty-print functions. + +2007-07-30 Paolo Carlini + + PR c++/32108 + * semantics.c (finish_label_stmt): Reject the __label__ + extension outside function scopes. + +2007-07-28 Simon Martin + Mark Mitchell + + PR c++/30917 + * name-lookup.c (lookup_name_real): Non namespace-scope bindings can be + hidden due to friend declarations in local classes. + +2007-07-27 Mark Mitchell + + PR c++/32346 + * call.c (convert_for_arg_passing): Only widen bitfields to their + declared types if necessary. + +2007-07-24 Paolo Carlini + + PR c++/30535 + * pt.c (unify): Never pass error_mark_node to template_decl_level. + 2007-07-19 Release Manager * GCC 4.2.1 released. diff --git a/contrib/gcc/cp/call.c b/contrib/gcc/cp/call.c index 862c3d0ec..848f3c894 100644 --- a/contrib/gcc/cp/call.c +++ b/contrib/gcc/cp/call.c @@ -4674,7 +4674,27 @@ type_passed_as (tree type) tree convert_for_arg_passing (tree type, tree val) { - val = convert_bitfield_to_declared_type (val); + tree bitfield_type; + + /* If VAL is a bitfield, then -- since it has already been converted + to TYPE -- it cannot have a precision greater than TYPE. + + If it has a smaller precision, we must widen it here. For + example, passing "int f:3;" to a function expecting an "int" will + not result in any conversion before this point. + + If the precision is the same we must not risk widening. For + example, the COMPONENT_REF for a 32-bit "long long" bitfield will + often have type "int", even though the C++ type for the field is + "long long". If the value is being passed to a function + expecting an "int", then no conversions will be required. But, + if we call convert_bitfield_to_declared_type, the bitfield will + be converted to "long long". */ + bitfield_type = is_bitfield_expr_with_lowered_type (val); + if (bitfield_type + && TYPE_PRECISION (TREE_TYPE (val)) < TYPE_PRECISION (type)) + val = convert_to_integer (TYPE_MAIN_VARIANT (bitfield_type), val); + if (val == error_mark_node) ; /* Pass classes with copy ctors by invisible reference. */ diff --git a/contrib/gcc/cp/cp-tree.h b/contrib/gcc/cp/cp-tree.h index 9e0342a39..deea897c4 100644 --- a/contrib/gcc/cp/cp-tree.h +++ b/contrib/gcc/cp/cp-tree.h @@ -4079,6 +4079,7 @@ extern void yyerror (const char *); extern void yyhook (int); extern bool cxx_init (void); extern void cxx_finish (void); +extern bool in_main_input_context (void); /* in method.c */ extern void init_method (void); @@ -4161,6 +4162,7 @@ extern tree build_non_dependent_args (tree); extern bool reregister_specialization (tree, tree, tree); extern tree fold_non_dependent_expr (tree); extern bool explicit_class_specialization_p (tree); +extern tree outermost_tinst_level (void); /* in repo.c */ extern void init_repo (void); diff --git a/contrib/gcc/cp/cxx-pretty-print.c b/contrib/gcc/cp/cxx-pretty-print.c index 5ceca61a4..5f6150aab 100644 --- a/contrib/gcc/cp/cxx-pretty-print.c +++ b/contrib/gcc/cp/cxx-pretty-print.c @@ -204,6 +204,10 @@ pp_cxx_unqualified_id (cxx_pretty_printer *pp, tree t) pp_cxx_unqualified_id (pp, TEMPLATE_PARM_DECL (t)); break; + case UNBOUND_CLASS_TEMPLATE: + pp_cxx_unqualified_id (pp, TYPE_NAME (t)); + break; + default: pp_unsupported_tree (pp, t); break; diff --git a/contrib/gcc/cp/decl.c b/contrib/gcc/cp/decl.c index 28e653b07..b837b1451 100644 --- a/contrib/gcc/cp/decl.c +++ b/contrib/gcc/cp/decl.c @@ -11273,7 +11273,7 @@ finish_function (int flags) gcc_assert (stmts_are_full_exprs_p ()); /* Set up the named return value optimization, if we can. Candidate - variables are selected in check_return_value. */ + variables are selected in check_return_expr. */ if (current_function_return_value) { tree r = current_function_return_value; diff --git a/contrib/gcc/cp/decl2.c b/contrib/gcc/cp/decl2.c index 00bcc16b7..310dc8315 100644 --- a/contrib/gcc/cp/decl2.c +++ b/contrib/gcc/cp/decl2.c @@ -1860,9 +1860,12 @@ constrain_class_visibility (tree type) int subvis = type_visibility (ftype); if (subvis == VISIBILITY_ANON) - warning (0, "\ + { + if (!in_main_input_context ()) + warning (0, "\ %qT has a field %qD whose type uses the anonymous namespace", type, t); + } else if (IS_AGGR_TYPE (ftype) && vis < VISIBILITY_HIDDEN && subvis >= VISIBILITY_HIDDEN) @@ -1877,9 +1880,12 @@ constrain_class_visibility (tree type) int subvis = type_visibility (TREE_TYPE (t)); if (subvis == VISIBILITY_ANON) - warning (0, "\ + { + if (!in_main_input_context()) + warning (0, "\ %qT has a base %qT whose type uses the anonymous namespace", type, TREE_TYPE (t)); + } else if (vis < VISIBILITY_HIDDEN && subvis >= VISIBILITY_HIDDEN) warning (OPT_Wattributes, "\ diff --git a/contrib/gcc/cp/error.c b/contrib/gcc/cp/error.c index 2962fe655..c169cabed 100644 --- a/contrib/gcc/cp/error.c +++ b/contrib/gcc/cp/error.c @@ -901,6 +901,10 @@ dump_decl (tree t, int flags) pp_type_id (cxx_pp, t); break; + case UNBOUND_CLASS_TEMPLATE: + dump_type (t, flags); + break; + default: pp_unsupported_tree (cxx_pp, t); /* Fall through to error. */ @@ -1301,10 +1305,14 @@ static tree resolve_virtual_fun_from_obj_type_ref (tree ref) { tree obj_type = TREE_TYPE (OBJ_TYPE_REF_OBJECT (ref)); - int index = tree_low_cst (OBJ_TYPE_REF_TOKEN (ref), 1); + HOST_WIDE_INT index = tree_low_cst (OBJ_TYPE_REF_TOKEN (ref), 1); tree fun = BINFO_VIRTUALS (TYPE_BINFO (TREE_TYPE (obj_type))); - while (index--) + while (index) + { fun = TREE_CHAIN (fun); + index -= (TARGET_VTABLE_USES_DESCRIPTORS + ? TARGET_VTABLE_USES_DESCRIPTORS : 1); + } return BV_FN (fun); } @@ -1420,13 +1428,13 @@ dump_expr (tree t, int flags) if (TREE_CODE (ob) == ADDR_EXPR) { dump_expr (TREE_OPERAND (ob, 0), flags | TFF_EXPR_IN_PARENS); - pp_dot (cxx_pp); + pp_cxx_dot (cxx_pp); } else if (TREE_CODE (ob) != PARM_DECL || strcmp (IDENTIFIER_POINTER (DECL_NAME (ob)), "this")) { dump_expr (ob, flags | TFF_EXPR_IN_PARENS); - pp_arrow (cxx_pp); + pp_cxx_arrow (cxx_pp); } args = TREE_CHAIN (args); } diff --git a/contrib/gcc/cp/lex.c b/contrib/gcc/cp/lex.c index d14a1baa9..65dcd7d60 100644 --- a/contrib/gcc/cp/lex.c +++ b/contrib/gcc/cp/lex.c @@ -827,3 +827,18 @@ make_aggr_type (enum tree_code code) return t; } + +/* Returns true if we are currently in the main source file, or in a + template instantiation started from the main source file. */ + +bool +in_main_input_context (void) +{ + tree tl = outermost_tinst_level(); + + if (tl) + return strcmp (main_input_filename, + LOCATION_FILE (TINST_LOCATION (tl))) == 0; + else + return strcmp (main_input_filename, input_filename) == 0; +} diff --git a/contrib/gcc/cp/name-lookup.c b/contrib/gcc/cp/name-lookup.c index 3016bb0ad..744dd5c48 100644 --- a/contrib/gcc/cp/name-lookup.c +++ b/contrib/gcc/cp/name-lookup.c @@ -2924,7 +2924,7 @@ set_decl_namespace (tree decl, tree scope, bool friendp) /* See whether this has been declared in the namespace. */ old = lookup_qualified_name (scope, DECL_NAME (decl), false, true); - if (!old) + if (old == error_mark_node) /* No old declaration at all. */ goto complain; if (!is_overloaded_fn (decl)) @@ -3996,8 +3996,49 @@ lookup_name_real (tree name, int prefer_type, int nonclass, bool block_p, if (binding) { - /* Only namespace-scope bindings can be hidden. */ - gcc_assert (!hidden_name_p (binding)); + if (hidden_name_p (binding)) + { + /* A non namespace-scope binding can only be hidden if + we are in a local class, due to friend declarations. + In particular, consider: + + void f() { + struct A { + friend struct B; + void g() { B* b; } // error: B is hidden + } + struct B {}; + } + + The standard says that "B" is a local class in "f" + (but not nested within "A") -- but that name lookup + for "B" does not find this declaration until it is + declared directly with "f". + + In particular: + + [class.friend] + + If a friend declaration appears in a local class and + the name specified is an unqualified name, a prior + declaration is looked up without considering scopes + that are outside the innermost enclosing non-class + scope. For a friend class declaration, if there is no + prior declaration, the class that is specified + belongs to the innermost enclosing non-class scope, + but if it is subsequently referenced, its name is not + found by name lookup until a matching declaration is + provided in the innermost enclosing nonclass scope. + */ + gcc_assert (current_class_type && + LOCAL_CLASS_P (current_class_type)); + + /* This binding comes from a friend declaration in the local + class. The standard (11.4.8) states that the lookup can + only succeed if there is a non-hidden declaration in the + current scope, which is not the case here. */ + POP_TIMEVAR_AND_RETURN (TV_NAME_LOOKUP, NULL_TREE); + } val = binding; break; } diff --git a/contrib/gcc/cp/pt.c b/contrib/gcc/cp/pt.c index 477265d49..ac28afa41 100644 --- a/contrib/gcc/cp/pt.c +++ b/contrib/gcc/cp/pt.c @@ -1971,7 +1971,7 @@ check_explicit_specialization (tree declarator, context. */ fns = lookup_qualified_name (CP_DECL_CONTEXT (decl), dname, false, true); - if (!fns || !is_overloaded_fn (fns)) + if (fns == error_mark_node || !is_overloaded_fn (fns)) { error ("%qD is not a template function", dname); fns = error_mark_node; @@ -5288,6 +5288,15 @@ reopen_tinst_level (tree level) pop_tinst_level (); } +/* Returns the TINST_LEVEL which gives the original instantiation + context. */ + +tree +outermost_tinst_level (void) +{ + return tree_last (current_tinst_level); +} + /* DECL is a friend FUNCTION_DECL or TEMPLATE_DECL. ARGS is the vector of template arguments, as for tsubst. @@ -10453,6 +10462,8 @@ unify (tree tparms, tree targs, tree parm, tree arg, int strict) case TEMPLATE_TEMPLATE_PARM: case BOUND_TEMPLATE_TEMPLATE_PARM: tparm = TREE_VALUE (TREE_VEC_ELT (tparms, 0)); + if (tparm == error_mark_node) + return 1; if (TEMPLATE_TYPE_LEVEL (parm) != template_decl_level (tparm)) diff --git a/contrib/gcc/cp/semantics.c b/contrib/gcc/cp/semantics.c index 8ad197858..9c37b36ce 100644 --- a/contrib/gcc/cp/semantics.c +++ b/contrib/gcc/cp/semantics.c @@ -1320,8 +1320,13 @@ finish_label_stmt (tree name) void finish_label_decl (tree name) { - tree decl = declare_local_label (name); - add_decl_expr (decl); + if (!at_function_scope_p ()) + { + error ("__label__ declarations are only allowed in function scopes"); + return; + } + + add_decl_expr (declare_local_label (name)); } /* When DECL goes out of scope, make sure that CLEANUP is executed. */ diff --git a/contrib/gcc/cp/typeck.c b/contrib/gcc/cp/typeck.c index 4ae2f073a..6f0777274 100644 --- a/contrib/gcc/cp/typeck.c +++ b/contrib/gcc/cp/typeck.c @@ -6604,6 +6604,7 @@ check_return_expr (tree retval, bool *no_warning) && TREE_CODE (retval) == VAR_DECL && DECL_CONTEXT (retval) == current_function_decl && ! TREE_STATIC (retval) + && ! DECL_ANON_UNION_VAR_P (retval) && (DECL_ALIGN (retval) >= DECL_ALIGN (DECL_RESULT (current_function_decl))) && same_type_p ((TYPE_MAIN_VARIANT diff --git a/contrib/gcc/doc/contrib.texi b/contrib/gcc/doc/contrib.texi index 16c8af58d..7e3d4b11e 100644 --- a/contrib/gcc/doc/contrib.texi +++ b/contrib/gcc/doc/contrib.texi @@ -513,6 +513,10 @@ patches. @item Robert Lipe for OpenServer support, new testsuites, testing, etc. +@item +Chen Liqin for various S+core related fixes/improvement, and for +maintaining the S+core port. + @item Weiwen Liu for testing and various bug fixes. diff --git a/contrib/gcc/doc/extend.texi b/contrib/gcc/doc/extend.texi index 7d3c010a5..68996e53d 100644 --- a/contrib/gcc/doc/extend.texi +++ b/contrib/gcc/doc/extend.texi @@ -7211,6 +7211,52 @@ The following built-in functions are available when @option{-msse3} is used. Generates the @code{movddup} machine instruction as a load from memory. @end table +The following built-in functions are available when @option{-mssse3} is used. +All of them generate the machine instruction that is part of the name +with MMX registers. + +@smallexample +v2si __builtin_ia32_phaddd (v2si, v2si) +v4hi __builtin_ia32_phaddw (v4hi, v4hi) +v4hi __builtin_ia32_phaddsw (v4hi, v4hi) +v2si __builtin_ia32_phsubd (v2si, v2si) +v4hi __builtin_ia32_phsubw (v4hi, v4hi) +v4hi __builtin_ia32_phsubsw (v4hi, v4hi) +v8qi __builtin_ia32_pmaddubsw (v8qi, v8qi) +v4hi __builtin_ia32_pmulhrsw (v4hi, v4hi) +v8qi __builtin_ia32_pshufb (v8qi, v8qi) +v8qi __builtin_ia32_psignb (v8qi, v8qi) +v2si __builtin_ia32_psignd (v2si, v2si) +v4hi __builtin_ia32_psignw (v4hi, v4hi) +long long __builtin_ia32_palignr (long long, long long, int) +v8qi __builtin_ia32_pabsb (v8qi) +v2si __builtin_ia32_pabsd (v2si) +v4hi __builtin_ia32_pabsw (v4hi) +@end smallexample + +The following built-in functions are available when @option{-mssse3} is used. +All of them generate the machine instruction that is part of the name +with SSE registers. + +@smallexample +v4si __builtin_ia32_phaddd128 (v4si, v4si) +v8hi __builtin_ia32_phaddw128 (v8hi, v8hi) +v8hi __builtin_ia32_phaddsw128 (v8hi, v8hi) +v4si __builtin_ia32_phsubd128 (v4si, v4si) +v8hi __builtin_ia32_phsubw128 (v8hi, v8hi) +v8hi __builtin_ia32_phsubsw128 (v8hi, v8hi) +v16qi __builtin_ia32_pmaddubsw128 (v16qi, v16qi) +v8hi __builtin_ia32_pmulhrsw128 (v8hi, v8hi) +v16qi __builtin_ia32_pshufb128 (v16qi, v16qi) +v16qi __builtin_ia32_psignb128 (v16qi, v16qi) +v4si __builtin_ia32_psignd128 (v4si, v4si) +v8hi __builtin_ia32_psignw128 (v8hi, v8hi) +v2di __builtin_ia32_palignr (v2di, v2di, int) +v16qi __builtin_ia32_pabsb128 (v16qi) +v4si __builtin_ia32_pabsd128 (v4si) +v8hi __builtin_ia32_pabsw128 (v8hi) +@end smallexample + The following built-in functions are available when @option{-m3dnow} is used. All of them generate the machine instruction that is part of the name. diff --git a/contrib/gcc/doc/gcc.1 b/contrib/gcc/doc/gcc.1 index 141b5e67d..a2e3716f8 100644 --- a/contrib/gcc/doc/gcc.1 +++ b/contrib/gcc/doc/gcc.1 @@ -129,7 +129,7 @@ .\" ======================================================================== .\" .IX Title "GCC 1" -.TH GCC 1 "2011-02-20" "gcc-4.2.1" "GNU" +.TH GCC 1 "2011-03-07" "gcc-4.2.1" "GNU" .SH "NAME" gcc \- GNU project C and C++ compiler .SH "SYNOPSIS" @@ -541,7 +541,7 @@ in the following sections. \&\-mno\-fp\-ret\-in\-387 \-msoft\-float \-msvr3\-shlib \&\-mno\-wide\-multiply \-mrtd \-malign\-double \&\-mpreferred\-stack\-boundary=\fR\fInum\fR -\&\fB\-mmmx \-msse \-msse2 \-msse3 \-m3dnow +\&\fB\-mmmx \-msse \-msse2 \-msse3 \-mssse3 \-m3dnow \&\-mthreads \-mno\-align\-stringops \-minline\-all\-stringops \&\-mpush\-args \-maccumulate\-outgoing\-args \-m128bit\-long\-double \&\-m96bit\-long\-double \-mregparm=\fR\fInum\fR \fB\-msseregparm @@ -8733,6 +8733,10 @@ set support. .IX Item "nocona" Improved version of Intel Pentium4 \s-1CPU\s0 with 64\-bit extensions, \s-1MMX\s0, \s-1SSE\s0, \&\s-1SSE2\s0 and \s-1SSE3\s0 instruction set support. +.IP "\fIcore2\fR" 4 +.IX Item "core2" +Intel Core2 \s-1CPU\s0 with 64\-bit extensions, \s-1MMX\s0, \s-1SSE\s0, \s-1SSE2\s0, \s-1SSE3\s0 and \s-1SSSE3\s0 +instruction set support. .IP "\fIk6\fR" 4 .IX Item "k6" \&\s-1AMD\s0 K6 \s-1CPU\s0 with \s-1MMX\s0 instruction set support. @@ -8770,6 +8774,9 @@ implemented for this chip.) .IX Item "c3-2" Via C3\-2 \s-1CPU\s0 with \s-1MMX\s0 and \s-1SSE\s0 instruction set support. (No scheduling is implemented for this chip.) +.IP "\fIgeode\fR" 4 +.IX Item "geode" +Embedded AMD \s-1CPU\s0 with \s-1MMX\s0 and 3dNOW! instruction set support. .RE .RS 4 .Sp @@ -9050,15 +9057,20 @@ preferred alignment to \fB\-mpreferred\-stack\-boundary=2\fR. .IX Item "-msse3" .IP "\fB\-mno\-sse3\fR" 4 .IX Item "-mno-sse3" +.IP "\fB\-mssse3\fR" 4 +.IX Item "-mssse3" +.IP "\fB\-mno\-ssse3\fR" 4 +.IX Item "-mno-ssse3" .IP "\fB\-m3dnow\fR" 4 .IX Item "-m3dnow" .IP "\fB\-mno\-3dnow\fR" 4 .IX Item "-mno-3dnow" .PD These switches enable or disable the use of instructions in the \s-1MMX\s0, -\&\s-1SSE\s0, \s-1SSE2\s0 or 3DNow! extended instruction sets. These extensions are -also available as built-in functions: see \fBX86 Built-in Functions\fR, -for details of the functions enabled and disabled by these switches. +\&\s-1SSE\s0, \s-1SSE2\s0, \s-1SSE3\s0, \s-1SSSE3\s0 or 3DNow! extended instruction sets. +These extensions are also available as built-in functions: see +\fBX86 Built-in Functions\fR, for details of the functions enabled and +disabled by these switches. .Sp To have \s-1SSE/SSE2\s0 instructions generated automatically from floating-point code (as opposed to 387 instructions), see \fB\-mfpmath=sse\fR. diff --git a/contrib/gcc/doc/invoke.texi b/contrib/gcc/doc/invoke.texi index 87777064f..d7a0a4008 100644 --- a/contrib/gcc/doc/invoke.texi +++ b/contrib/gcc/doc/invoke.texi @@ -533,7 +533,7 @@ Objective-C and Objective-C++ Dialects}. -mno-fp-ret-in-387 -msoft-float -msvr3-shlib @gol -mno-wide-multiply -mrtd -malign-double @gol -mpreferred-stack-boundary=@var{num} @gol --mmmx -msse -msse2 -msse3 -m3dnow @gol +-mmmx -msse -msse2 -msse3 -mssse3 -m3dnow @gol -mthreads -mno-align-stringops -minline-all-stringops @gol -mpush-args -maccumulate-outgoing-args -m128bit-long-double @gol -m96bit-long-double -mregparm=@var{num} -msseregparm @gol @@ -9369,6 +9369,9 @@ set support. @item nocona Improved version of Intel Pentium4 CPU with 64-bit extensions, MMX, SSE, SSE2 and SSE3 instruction set support. +@item core2 +Intel Core2 CPU with 64-bit extensions, MMX, SSE, SSE2, SSE3 and SSSE3 +instruction set support. @item k6 AMD K6 CPU with MMX instruction set support. @item k6-2, k6-3 @@ -9396,6 +9399,8 @@ implemented for this chip.) @item c3-2 Via C3-2 CPU with MMX and SSE instruction set support. (No scheduling is implemented for this chip.) +@item geode +Embedded AMD CPU with MMX and 3dNOW! instruction set support. @end table While picking a specific @var{cpu-type} will schedule things appropriately @@ -9674,6 +9679,8 @@ preferred alignment to @option{-mpreferred-stack-boundary=2}. @itemx -mno-sse2 @item -msse3 @itemx -mno-sse3 +@item -mssse3 +@itemx -mno-ssse3 @item -m3dnow @itemx -mno-3dnow @opindex mmmx @@ -9683,9 +9690,10 @@ preferred alignment to @option{-mpreferred-stack-boundary=2}. @opindex m3dnow @opindex mno-3dnow These switches enable or disable the use of instructions in the MMX, -SSE, SSE2 or 3DNow! extended instruction sets. These extensions are -also available as built-in functions: see @ref{X86 Built-in Functions}, -for details of the functions enabled and disabled by these switches. +SSE, SSE2, SSE3, SSSE3 or 3DNow! extended instruction sets. +These extensions are also available as built-in functions: see +@ref{X86 Built-in Functions}, for details of the functions enabled and +disabled by these switches. To have SSE/SSE2 instructions generated automatically from floating-point code (as opposed to 387 instructions), see @option{-mfpmath=sse}. diff --git a/contrib/gcc/dwarf2out.c b/contrib/gcc/dwarf2out.c index 18ea2ad4b..25a906bad 100644 --- a/contrib/gcc/dwarf2out.c +++ b/contrib/gcc/dwarf2out.c @@ -10065,6 +10065,43 @@ rtl_for_decl_init (tree init, tree type) else if (initializer_constant_valid_p (init, type) && ! walk_tree (&init, reference_to_unused, NULL, NULL)) { + /* Convert vector CONSTRUCTOR initializers to VECTOR_CST if + possible. */ + if (TREE_CODE (type) == VECTOR_TYPE) + switch (TREE_CODE (init)) + { + case VECTOR_CST: + break; + case CONSTRUCTOR: + if (TREE_CONSTANT (init)) + { + VEC(constructor_elt,gc) *elts = CONSTRUCTOR_ELTS (init); + bool constant_p = true; + tree value; + unsigned HOST_WIDE_INT ix; + + /* Even when ctor is constant, it might contain non-*_CST + elements (e.g. { 1.0/0.0 - 1.0/0.0, 0.0 }) and those don't + belong into VECTOR_CST nodes. */ + FOR_EACH_CONSTRUCTOR_VALUE (elts, ix, value) + if (!CONSTANT_CLASS_P (value)) + { + constant_p = false; + break; + } + + if (constant_p) + { + init = build_vector_from_ctor (type, elts); + break; + } + } + /* FALLTHRU */ + + default: + return NULL; + } + rtl = expand_expr (init, NULL_RTX, VOIDmode, EXPAND_INITIALIZER); /* If expand_expr returns a MEM, it wasn't immediate. */ @@ -13197,7 +13234,8 @@ gen_decl_die (tree decl, dw_die_ref context_die) was generated within the original definition of an inline function) we have to generate a special (abbreviated) DW_TAG_structure_type, DW_TAG_union_type, or DW_TAG_enumeration_type DIE here. */ - if (TYPE_DECL_IS_STUB (decl) && decl_ultimate_origin (decl) != NULL_TREE) + if (TYPE_DECL_IS_STUB (decl) && decl_ultimate_origin (decl) != NULL_TREE + && is_tagged_type (TREE_TYPE (decl))) { gen_tagged_type_instantiation_die (TREE_TYPE (decl), context_die); break; diff --git a/contrib/gcc/expr.c b/contrib/gcc/expr.c index c6bc4263b..1773fa179 100644 --- a/contrib/gcc/expr.c +++ b/contrib/gcc/expr.c @@ -5654,7 +5654,6 @@ get_inner_reference (tree exp, HOST_WIDE_INT *pbitsize, enum machine_mode mode = VOIDmode; tree offset = size_zero_node; tree bit_offset = bitsize_zero_node; - tree tem; /* First get the mode, signedness, and size. We do this from just the outermost expression. */ @@ -5690,6 +5689,8 @@ get_inner_reference (tree exp, HOST_WIDE_INT *pbitsize, *pbitsize = tree_low_cst (size_tree, 1); } + *pmode = mode; + /* Compute cumulative bit-offset for nested component-refs and array-refs, and find the ultimate containing object. */ while (1) @@ -5774,21 +5775,69 @@ get_inner_reference (tree exp, HOST_WIDE_INT *pbitsize, done: /* If OFFSET is constant, see if we can return the whole thing as a - constant bit position. Otherwise, split it up. */ - if (host_integerp (offset, 0) - && 0 != (tem = size_binop (MULT_EXPR, - fold_convert (bitsizetype, offset), - bitsize_unit_node)) - && 0 != (tem = size_binop (PLUS_EXPR, tem, bit_offset)) - && host_integerp (tem, 0)) - *pbitpos = tree_low_cst (tem, 0), *poffset = 0; - else - *pbitpos = tree_low_cst (bit_offset, 0), *poffset = offset; + constant bit position. Make sure to handle overflow during + this conversion. */ + if (host_integerp (offset, 0)) + { + double_int tem = double_int_mul (tree_to_double_int (offset), + uhwi_to_double_int (BITS_PER_UNIT)); + tem = double_int_add (tem, tree_to_double_int (bit_offset)); + if (double_int_fits_in_shwi_p (tem)) + { + *pbitpos = double_int_to_shwi (tem); + *poffset = NULL_TREE; + return exp; + } + } + + /* Otherwise, split it up. */ + *pbitpos = tree_low_cst (bit_offset, 0); + *poffset = offset; - *pmode = mode; return exp; } +/* Given an expression EXP that may be a COMPONENT_REF or an ARRAY_REF, + look for whether EXP or any nested component-refs within EXP is marked + as PACKED. */ + +bool +contains_packed_reference (tree exp) +{ + bool packed_p = false; + + while (1) + { + switch (TREE_CODE (exp)) + { + case COMPONENT_REF: + { + tree field = TREE_OPERAND (exp, 1); + packed_p = DECL_PACKED (field) + || TYPE_PACKED (TREE_TYPE (field)) + || TYPE_PACKED (TREE_TYPE (exp)); + if (packed_p) + goto done; + } + break; + + case BIT_FIELD_REF: + case ARRAY_REF: + case ARRAY_RANGE_REF: + case REALPART_EXPR: + case IMAGPART_EXPR: + case VIEW_CONVERT_EXPR: + break; + + default: + goto done; + } + exp = TREE_OPERAND (exp, 0); + } + done: + return packed_p; +} + /* Return a tree of sizetype representing the size, in bytes, of the element of EXP, an ARRAY_REF. */ diff --git a/contrib/gcc/fold-const.c b/contrib/gcc/fold-const.c index ea16eae74..0753caa15 100644 --- a/contrib/gcc/fold-const.c +++ b/contrib/gcc/fold-const.c @@ -9387,6 +9387,7 @@ fold_binary (enum tree_code code, tree type, tree op0, tree op1) /* ~X | X is -1. */ if (TREE_CODE (arg0) == BIT_NOT_EXPR + && INTEGRAL_TYPE_P (TREE_TYPE (arg1)) && operand_equal_p (TREE_OPERAND (arg0, 0), arg1, 0)) { t1 = build_int_cst (type, -1); @@ -9396,6 +9397,7 @@ fold_binary (enum tree_code code, tree type, tree op0, tree op1) /* X | ~X is -1. */ if (TREE_CODE (arg1) == BIT_NOT_EXPR + && INTEGRAL_TYPE_P (TREE_TYPE (arg0)) && operand_equal_p (arg0, TREE_OPERAND (arg1, 0), 0)) { t1 = build_int_cst (type, -1); @@ -9503,6 +9505,7 @@ fold_binary (enum tree_code code, tree type, tree op0, tree op1) /* ~X ^ X is -1. */ if (TREE_CODE (arg0) == BIT_NOT_EXPR + && INTEGRAL_TYPE_P (TREE_TYPE (arg1)) && operand_equal_p (TREE_OPERAND (arg0, 0), arg1, 0)) { t1 = build_int_cst (type, -1); @@ -9512,6 +9515,7 @@ fold_binary (enum tree_code code, tree type, tree op0, tree op1) /* X ^ ~X is -1. */ if (TREE_CODE (arg1) == BIT_NOT_EXPR + && INTEGRAL_TYPE_P (TREE_TYPE (arg0)) && operand_equal_p (arg0, TREE_OPERAND (arg1, 0), 0)) { t1 = build_int_cst (type, -1); diff --git a/contrib/gcc/gimplify.c b/contrib/gcc/gimplify.c index e6c0d1f24..80dcd1ac5 100644 --- a/contrib/gcc/gimplify.c +++ b/contrib/gcc/gimplify.c @@ -3532,8 +3532,16 @@ gimplify_modify_expr (tree *expr_p, tree *pre_p, tree *post_p, bool want_value) gcc_assert (TREE_CODE (*expr_p) == MODIFY_EXPR || TREE_CODE (*expr_p) == INIT_EXPR); - /* For zero sized types only gimplify the left hand side and right hand side - as statements and throw away the assignment. */ + /* See if any simplifications can be done based on what the RHS is. */ + ret = gimplify_modify_expr_rhs (expr_p, from_p, to_p, pre_p, post_p, + want_value); + if (ret != GS_UNHANDLED) + return ret; + + /* For zero sized types only gimplify the left hand side and right hand + side as statements and throw away the assignment. Do this after + gimplify_modify_expr_rhs so we handle TARGET_EXPRs of addressable + types properly. */ if (zero_sized_type (TREE_TYPE (*from_p))) { gimplify_stmt (from_p); @@ -3544,12 +3552,6 @@ gimplify_modify_expr (tree *expr_p, tree *pre_p, tree *post_p, bool want_value) return GS_ALL_DONE; } - /* See if any simplifications can be done based on what the RHS is. */ - ret = gimplify_modify_expr_rhs (expr_p, from_p, to_p, pre_p, post_p, - want_value); - if (ret != GS_UNHANDLED) - return ret; - /* If the value being copied is of variable width, compute the length of the copy into a WITH_SIZE_EXPR. Note that we need to do this before gimplifying any of the operands so that we can resolve any diff --git a/contrib/gcc/reload1.c b/contrib/gcc/reload1.c index a2449da09..20da1b67b 100644 --- a/contrib/gcc/reload1.c +++ b/contrib/gcc/reload1.c @@ -5451,7 +5451,14 @@ choose_reload_regs (struct insn_chain *chain) for (j = 0; j < n_reloads; j++) { reload_order[j] = j; - reload_spill_index[j] = -1; + if (rld[j].reg_rtx != NULL_RTX) + { + gcc_assert (REG_P (rld[j].reg_rtx) + && HARD_REGISTER_P (rld[j].reg_rtx)); + reload_spill_index[j] = REGNO (rld[j].reg_rtx); + } + else + reload_spill_index[j] = -1; if (rld[j].nregs > 1) { diff --git a/contrib/gcc/simplify-rtx.c b/contrib/gcc/simplify-rtx.c index 10be1e18d..b088d9208 100644 --- a/contrib/gcc/simplify-rtx.c +++ b/contrib/gcc/simplify-rtx.c @@ -589,7 +589,8 @@ simplify_unary_operation_1 (enum rtx_code code, enum machine_mode mode, rtx op) /* (neg (lt x 0)) is (ashiftrt X C) if STORE_FLAG_VALUE is 1. */ /* (neg (lt x 0)) is (lshiftrt X C) if STORE_FLAG_VALUE is -1. */ if (GET_CODE (op) == LT - && XEXP (op, 1) == const0_rtx) + && XEXP (op, 1) == const0_rtx + && SCALAR_INT_MODE_P (GET_MODE (XEXP (op, 0)))) { enum machine_mode inner = GET_MODE (XEXP (op, 0)); int isize = GET_MODE_BITSIZE (inner); diff --git a/contrib/gcc/target-def.h b/contrib/gcc/target-def.h index 0361a788e..653d6fd9b 100644 --- a/contrib/gcc/target-def.h +++ b/contrib/gcc/target-def.h @@ -337,9 +337,12 @@ Foundation, 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. TARGET_SCHED_SET_SCHED_FLAGS} #define TARGET_VECTORIZE_BUILTIN_MASK_FOR_LOAD 0 +#define TARGET_VECTOR_ALIGNMENT_REACHABLE \ + default_builtin_vector_alignment_reachable #define TARGET_VECTORIZE \ - {TARGET_VECTORIZE_BUILTIN_MASK_FOR_LOAD} + {TARGET_VECTORIZE_BUILTIN_MASK_FOR_LOAD, \ + TARGET_VECTOR_ALIGNMENT_REACHABLE} #define TARGET_DEFAULT_TARGET_FLAGS 0 diff --git a/contrib/gcc/target.h b/contrib/gcc/target.h index d0a38f2a5..f407bb757 100644 --- a/contrib/gcc/target.h +++ b/contrib/gcc/target.h @@ -375,6 +375,10 @@ struct gcc_target by the vectorizer, and return the decl of the target builtin function. */ tree (* builtin_mask_for_load) (void); + + /* Return true if vector alignment is reachable (by peeling N + interations) for the given type. */ + bool (* vector_alignment_reachable) (tree, bool); } vectorize; /* The initial value of target_flags. */ diff --git a/contrib/gcc/targhooks.c b/contrib/gcc/targhooks.c index b164c842b..796bd2d6f 100644 --- a/contrib/gcc/targhooks.c +++ b/contrib/gcc/targhooks.c @@ -604,4 +604,20 @@ default_reloc_rw_mask (void) return flag_pic ? 3 : 0; } +bool +default_builtin_vector_alignment_reachable (tree type, bool is_packed) +{ + if (is_packed) + return false; + + /* Assuming that types whose size is > pointer-size are not guaranteed to be + naturally aligned. */ + if (tree_int_cst_compare (TYPE_SIZE (type), bitsize_int (POINTER_SIZE)) > 0) + return false; + + /* Assuming that types whose size is <= pointer-size + are naturally aligned. */ + return true; +} + #include "gt-targhooks.h" diff --git a/contrib/gcc/targhooks.h b/contrib/gcc/targhooks.h index 7884f159e..7f990e9a1 100644 --- a/contrib/gcc/targhooks.h +++ b/contrib/gcc/targhooks.h @@ -57,6 +57,8 @@ extern const char * default_invalid_within_doloop (rtx); extern bool default_narrow_bitfield (void); +extern bool default_builtin_vector_alignment_reachable (tree, bool); + /* These are here, and not in hooks.[ch], because not all users of hooks.h include tm.h, and thus we don't have CUMULATIVE_ARGS. */ diff --git a/contrib/gcc/tree-if-conv.c b/contrib/gcc/tree-if-conv.c index 5b4a9a33c..722721ade 100644 --- a/contrib/gcc/tree-if-conv.c +++ b/contrib/gcc/tree-if-conv.c @@ -743,7 +743,7 @@ find_phi_replacement_condition (struct loop *loop, if (TREE_CODE (*cond) == TRUTH_NOT_EXPR) /* We can be smart here and choose inverted condition without switching bbs. */ - *cond = invert_truthvalue (*cond); + *cond = invert_truthvalue (*cond); else /* Select non loop header bb. */ first_edge = second_edge; @@ -762,9 +762,11 @@ find_phi_replacement_condition (struct loop *loop, /* Create temp. for the condition. Vectorizer prefers to have gimple value as condition. Various targets use different means to communicate - condition in vector compare operation. Using gimple value allows compiler - to emit vector compare and select RTL without exposing compare's result. */ - *cond = force_gimple_operand (*cond, &new_stmts, false, NULL_TREE); + condition in vector compare operation. Using gimple value allows + compiler to emit vector compare and select RTL without exposing + compare's result. */ + *cond = force_gimple_operand (unshare_expr (*cond), &new_stmts, + false, NULL_TREE); if (new_stmts) bsi_insert_before (bsi, new_stmts, BSI_SAME_STMT); if (!is_gimple_reg (*cond) && !is_gimple_condexpr (*cond)) diff --git a/contrib/gcc/tree-ssa-structalias.c b/contrib/gcc/tree-ssa-structalias.c index d2bc09ba3..29811ca0a 100644 --- a/contrib/gcc/tree-ssa-structalias.c +++ b/contrib/gcc/tree-ssa-structalias.c @@ -4350,6 +4350,75 @@ intra_create_variable_infos (void) process_constraint (new_constraint (lhs, rhs)); } +/* Structure used to put solution bitmaps in a hashtable so they can + be shared among variables with the same points-to set. */ + +typedef struct shared_bitmap_info +{ + bitmap pt_vars; + hashval_t hashcode; +} *shared_bitmap_info_t; + +static htab_t shared_bitmap_table; + +/* Hash function for a shared_bitmap_info_t */ + +static hashval_t +shared_bitmap_hash (const void *p) +{ + const shared_bitmap_info_t bi = (shared_bitmap_info_t) p; + return bi->hashcode; +} + +/* Equality function for two shared_bitmap_info_t's. */ + +static int +shared_bitmap_eq (const void *p1, const void *p2) +{ + const shared_bitmap_info_t sbi1 = (shared_bitmap_info_t) p1; + const shared_bitmap_info_t sbi2 = (shared_bitmap_info_t) p2; + return bitmap_equal_p (sbi1->pt_vars, sbi2->pt_vars); +} + +/* Lookup a bitmap in the shared bitmap hashtable, and return an already + existing instance if there is one, NULL otherwise. */ + +static bitmap +shared_bitmap_lookup (bitmap pt_vars) +{ + void **slot; + struct shared_bitmap_info sbi; + + sbi.pt_vars = pt_vars; + sbi.hashcode = bitmap_hash (pt_vars); + + slot = htab_find_slot_with_hash (shared_bitmap_table, &sbi, + sbi.hashcode, NO_INSERT); + if (!slot) + return NULL; + else + return ((shared_bitmap_info_t) *slot)->pt_vars; +} + + +/* Add a bitmap to the shared bitmap hashtable. */ + +static void +shared_bitmap_add (bitmap pt_vars) +{ + void **slot; + shared_bitmap_info_t sbi = XNEW (struct shared_bitmap_info); + + sbi->pt_vars = pt_vars; + sbi->hashcode = bitmap_hash (pt_vars); + + slot = htab_find_slot_with_hash (shared_bitmap_table, sbi, + sbi->hashcode, INSERT); + gcc_assert (!*slot); + *slot = (void *) sbi; +} + + /* Set bits in INTO corresponding to the variable uids in solution set FROM, which came from variable PTR. For variables that are actually dereferenced, we also use type @@ -4460,7 +4529,9 @@ find_what_p_points_to (tree p) struct ptr_info_def *pi = get_ptr_info (p); unsigned int i; bitmap_iterator bi; - + bitmap finished_solution; + bitmap result; + /* This variable may have been collapsed, let's get the real variable. */ vi = get_varinfo (find (vi->id)); @@ -4492,10 +4563,20 @@ find_what_p_points_to (tree p) if (pi->pt_anything) return false; - if (!pi->pt_vars) - pi->pt_vars = BITMAP_GGC_ALLOC (); + finished_solution = BITMAP_GGC_ALLOC (); + set_uids_in_ptset (vi->decl, finished_solution, vi->solution); + result = shared_bitmap_lookup (finished_solution); - set_uids_in_ptset (vi->decl, pi->pt_vars, vi->solution); + if (!result) + { + shared_bitmap_add (finished_solution); + pi->pt_vars = finished_solution; + } + else + { + pi->pt_vars = result; + bitmap_clear (finished_solution); + } if (bitmap_empty_p (pi->pt_vars)) pi->pt_vars = NULL; @@ -4691,6 +4772,8 @@ init_alias_vars (void) vi_for_tree = pointer_map_create (); memset (&stats, 0, sizeof (stats)); + shared_bitmap_table = htab_create (511, shared_bitmap_hash, + shared_bitmap_eq, free); init_base_vars (); } @@ -4923,6 +5006,7 @@ delete_points_to_sets (void) varinfo_t v; int i; + htab_delete (shared_bitmap_table); if (dump_file && (dump_flags & TDF_STATS)) fprintf (dump_file, "Points to sets created:%d\n", stats.points_to_sets_created); diff --git a/contrib/gcc/tree-vect-analyze.c b/contrib/gcc/tree-vect-analyze.c index f247cd9ab..17cb9de1e 100644 --- a/contrib/gcc/tree-vect-analyze.c +++ b/contrib/gcc/tree-vect-analyze.c @@ -25,6 +25,7 @@ Software Foundation, 51 Franklin Street, Fifth Floor, Boston, MA #include "tm.h" #include "ggc.h" #include "tree.h" +#include "target.h" #include "basic-block.h" #include "diagnostic.h" #include "tree-flow.h" @@ -911,6 +912,57 @@ vect_verify_datarefs_alignment (loop_vec_info loop_vinfo) } +/* Function vector_alignment_reachable_p + + Return true if vector alignment for DR is reachable by peeling + a few loop iterations. Return false otherwise. */ + +static bool +vector_alignment_reachable_p (struct data_reference *dr) +{ + tree stmt = DR_STMT (dr); + stmt_vec_info stmt_info = vinfo_for_stmt (stmt); + tree vectype = STMT_VINFO_VECTYPE (stmt_info); + + /* If misalignment is known at the compile time then allow peeling + only if natural alignment is reachable through peeling. */ + if (known_alignment_for_access_p (dr) && !aligned_access_p (dr)) + { + HOST_WIDE_INT elmsize = + int_cst_value (TYPE_SIZE_UNIT (TREE_TYPE (vectype))); + if (vect_print_dump_info (REPORT_DETAILS)) + { + fprintf (vect_dump, "data size =" HOST_WIDE_INT_PRINT_DEC, elmsize); + fprintf (vect_dump, ". misalignment = %d. ", DR_MISALIGNMENT (dr)); + } + if (DR_MISALIGNMENT (dr) % elmsize) + { + if (vect_print_dump_info (REPORT_DETAILS)) + fprintf (vect_dump, "data size does not divide the misalignment.\n"); + return false; + } + } + + if (!known_alignment_for_access_p (dr)) + { + tree type = (TREE_TYPE (DR_REF (dr))); + tree ba = DR_BASE_OBJECT (dr); + bool is_packed = false; + + if (ba) + is_packed = contains_packed_reference (ba); + + if (vect_print_dump_info (REPORT_DETAILS)) + fprintf (vect_dump, "Unknown misalignment, is_packed = %d",is_packed); + if (targetm.vectorize.vector_alignment_reachable (type, is_packed)) + return true; + else + return false; + } + + return true; +} + /* Function vect_enhance_data_refs_alignment This pass will use loop versioning and loop peeling in order to enhance @@ -1056,8 +1108,11 @@ vect_enhance_data_refs_alignment (loop_vec_info loop_vinfo) for (i = 0; VEC_iterate (data_reference_p, datarefs, i, dr); i++) if (!DR_IS_READ (dr) && !aligned_access_p (dr)) { - dr0 = dr; - do_peeling = true; + do_peeling = vector_alignment_reachable_p (dr); + if (do_peeling) + dr0 = dr; + if (!do_peeling && vect_print_dump_info (REPORT_DETAILS)) + fprintf (vect_dump, "vector alignment may not be reachable"); break; } diff --git a/contrib/gcc/tree-vect-patterns.c b/contrib/gcc/tree-vect-patterns.c index 9ac11b07b..1c57c59eb 100644 --- a/contrib/gcc/tree-vect-patterns.c +++ b/contrib/gcc/tree-vect-patterns.c @@ -499,8 +499,9 @@ vect_pattern_recog_1 ( || (icode = optab->handlers[(int) vec_mode].insn_code) == CODE_FOR_nothing || (type_out - && (insn_data[icode].operand[0].mode != - TYPE_MODE (get_vectype_for_scalar_type (type_out))))) + && (!get_vectype_for_scalar_type (type_out) + || (insn_data[icode].operand[0].mode != + TYPE_MODE (get_vectype_for_scalar_type (type_out)))))) return; } diff --git a/contrib/gcc/tree.c b/contrib/gcc/tree.c index 90b860ce1..a4f061632 100644 --- a/contrib/gcc/tree.c +++ b/contrib/gcc/tree.c @@ -4540,7 +4540,8 @@ host_integerp (tree t, int pos) && (HOST_WIDE_INT) TREE_INT_CST_LOW (t) >= 0) || (! pos && TREE_INT_CST_HIGH (t) == -1 && (HOST_WIDE_INT) TREE_INT_CST_LOW (t) < 0 - && !TYPE_UNSIGNED (TREE_TYPE (t))) + && (!TYPE_UNSIGNED (TREE_TYPE (t)) + || TYPE_IS_SIZETYPE (TREE_TYPE (t)))) || (pos && TREE_INT_CST_HIGH (t) == 0))); } diff --git a/contrib/gcc/tree.h b/contrib/gcc/tree.h index 760a4507c..7861514dc 100644 --- a/contrib/gcc/tree.h +++ b/contrib/gcc/tree.h @@ -4112,6 +4112,12 @@ extern tree get_inner_reference (tree, HOST_WIDE_INT *, HOST_WIDE_INT *, tree *, enum machine_mode *, int *, int *, bool); +/* Given an expression EXP that may be a COMPONENT_REF or an ARRAY_REF, + look for whether EXP or any nested component-refs within EXP is marked + as PACKED. */ + +extern bool contains_packed_reference (tree exp); + /* Return 1 if T is an expression that get_inner_reference handles. */ extern int handled_component_p (tree); diff --git a/contrib/libstdc++/ChangeLog b/contrib/libstdc++/ChangeLog index 7aa047877..d915bcf66 100644 --- a/contrib/libstdc++/ChangeLog +++ b/contrib/libstdc++/ChangeLog @@ -1,3 +1,18 @@ +2007-08-28 Paolo Carlini + + PR libstdc++/33128 + * include/tr1/random (uniform_int<>::_M_call): Deal with + __urng() returning negative values. + * testsuite/tr1/5_numerical_facilities/random/uniform_int/33128.cc: + New. + +2007-08-17 Johannes Willkomm + + PR libstdc++/33084 + * include/std/valarray (operator _Op(const _Tp&, + const valarray<>&)): Fix typo. + * testsuite/26_numerics/numeric_arrays/valarray/33084.cc: New. + 2007-07-19 Release Manager * GCC 4.2.1 released. diff --git a/contrib/libstdc++/include/std/std_valarray.h b/contrib/libstdc++/include/std/std_valarray.h index 87f30ec4e..6ef21500f 100644 --- a/contrib/libstdc++/include/std/std_valarray.h +++ b/contrib/libstdc++/include/std/std_valarray.h @@ -1010,7 +1010,7 @@ _DEFINE_VALARRAY_EXPR_AUGMENTED_ASSIGNMENT(>>, __shift_right) { \ typedef _BinClos<_Name, _Constant, _ValArray, _Tp, _Tp> _Closure; \ typedef typename __fun<_Name, _Tp>::result_type _Rt; \ - return _Expr<_Closure, _Tp>(_Closure(__t, __v)); \ + return _Expr<_Closure, _Rt>(_Closure(__t, __v)); \ } _DEFINE_BINARY_OPERATOR(+, __plus) diff --git a/contrib/libstdc++/include/tr1/random b/contrib/libstdc++/include/tr1/random index 56ea50857..c97f16d2a 100644 --- a/contrib/libstdc++/include/tr1/random +++ b/contrib/libstdc++/include/tr1/random @@ -1618,7 +1618,11 @@ _GLIBCXX_BEGIN_NAMESPACE(tr1) result_type _M_call(_UniformRandomNumberGenerator& __urng, result_type __min, result_type __max, true_type) - { return result_type(__urng() % (__max - __min + 1)) + __min; } + { + typedef typename __gnu_cxx::__add_unsigned::__type __utype; + return result_type(__utype(__urng()) % (__max - __min + 1)) + __min; + } template result_type diff --git a/share/examples/etc/make.conf b/share/examples/etc/make.conf index 91b7e171c..f8d1ca314 100644 --- a/share/examples/etc/make.conf +++ b/share/examples/etc/make.conf @@ -32,7 +32,7 @@ # Intel x86 architecture: # (AMD CPUs) opteron-sse3 opteron athlon64-sse3 athlon64 athlon-mp # athlon-xp athlon-4 athlon-tbird athlon k8-sse3 k8 -# k6-3 k6-2 k6 k5 +# geode k6-3 k6-2 k6 k5 # (Intel CPUs) core2 core nocona pentium4m pentium4 prescott # pentium3m pentium3 pentium-m pentium2 # pentiumpro pentium-mmx pentium i486 i386 diff --git a/share/mk/bsd.cpu.mk b/share/mk/bsd.cpu.mk index e04515950..48573bd66 100644 --- a/share/mk/bsd.cpu.mk +++ b/share/mk/bsd.cpu.mk @@ -29,7 +29,7 @@ MACHINE_CPU = mips . if ${MACHINE_ARCH} == "i386" . if ${CPUTYPE} == "nocona" CPUTYPE = prescott -. elif ${CPUTYPE} == "core" || ${CPUTYPE} == "core2" +. elif ${CPUTYPE} == "core" CPUTYPE = prescott . elif ${CPUTYPE} == "p4" CPUTYPE = pentium4 @@ -59,7 +59,7 @@ CPUTYPE = athlon-mp CPUTYPE = athlon . endif . elif ${MACHINE_ARCH} == "amd64" -. if ${CPUTYPE} == "prescott" || ${CPUTYPE} == "core2" +. if ${CPUTYPE} == "prescott" CPUTYPE = nocona . endif . elif ${MACHINE_ARCH} == "sparc64" @@ -86,6 +86,8 @@ CPUTYPE = ultrasparc3 _CPUCFLAGS = -march=i686 -falign-functions=0 -falign-jumps=0 -falign-loops=0 . elif ${CPUTYPE} == "k5" _CPUCFLAGS = -march=pentium +. elif ${CPUTYPE} == "core2" +_CPUCFLAGS = -march=prescott . else _CPUCFLAGS = -march=${CPUTYPE} . endif # GCC on 'i386' @@ -168,7 +170,7 @@ MACHINE_CPU = athlon-xp athlon k7 3dnow sse2 sse mmx k6 k5 i586 i486 i386 MACHINE_CPU = athlon-xp athlon k7 3dnow sse mmx k6 k5 i586 i486 i386 . elif ${CPUTYPE} == "athlon" || ${CPUTYPE} == "athlon-tbird" MACHINE_CPU = athlon k7 3dnow mmx k6 k5 i586 i486 i386 -. elif ${CPUTYPE} == "k6-3" || ${CPUTYPE} == "k6-2" +. elif ${CPUTYPE} == "k6-3" || ${CPUTYPE} == "k6-2" || ${CPUTYPE} == "geode" MACHINE_CPU = 3dnow mmx k6 k5 i586 i486 i386 . elif ${CPUTYPE} == "k6" MACHINE_CPU = mmx k6 k5 i586 i486 i386 @@ -180,6 +182,8 @@ MACHINE_CPU = 3dnow mmx i586 i486 i386 MACHINE_CPU = sse mmx i586 i486 i386 . elif ${CPUTYPE} == "c7" MACHINE_CPU = sse3 sse2 sse i686 mmx i586 i486 i386 +. elif ${CPUTYPE} == "core2" +MACHINE_CPU = ssse3 sse3 sse2 sse i686 mmx i586 i486 i386 . elif ${CPUTYPE} == "prescott" MACHINE_CPU = sse3 sse2 sse i686 mmx i586 i486 i386 . elif ${CPUTYPE} == "pentium4" || ${CPUTYPE} == "pentium4m" || ${CPUTYPE} == "pentium-m" @@ -204,6 +208,8 @@ MACHINE_CPU = i386 MACHINE_CPU = k8 3dnow sse3 . elif ${CPUTYPE} == "opteron" || ${CPUTYPE} == "athlon64" || ${CPUTYPE} == "k8" MACHINE_CPU = k8 3dnow +. elif ${CPUTYPE} == "core2" +MACHINE_CPU = ssse3 sse3 . elif ${CPUTYPE} == "nocona" MACHINE_CPU = sse3 . endif -- 2.45.0