1 ; RUN: llc < %s -march=x86 -mcpu=pentium4 -mattr=+sse2 | FileCheck %s 2 3 define void @test(<2 x i64>* %P, i8 %x) nounwind { 4 %tmp = insertelement <16 x i8> zeroinitializer, i8 %x, i32 0 ; <<16 x i8>> [#uses=1] 5 %tmp36 = insertelement <16 x i8> %tmp, i8 %x, i32 1 ; <<16 x i8>> [#uses=1] 6 %tmp38 = insertelement <16 x i8> %tmp36, i8 %x, i32 2 ; <<16 x i8>> [#uses=1] 7 %tmp40 = insertelement <16 x i8> %tmp38, i8 %x, i32 3 ; <<16 x i8>> [#uses=1] 8 %tmp42 = insertelement <16 x i8> %tmp40, i8 %x, i32 4 ; <<16 x i8>> [#uses=1] 9 %tmp44 = insertelement <16 x i8> %tmp42, i8 %x, i32 5 ; <<16 x i8>> [#uses=1] 10 %tmp46 = insertelement <16 x i8> %tmp44, i8 %x, i32 6 ; <<16 x i8>> [#uses=1] 11 %tmp48 = insertelement <16 x i8> %tmp46, i8 %x, i32 7 ; <<16 x i8>> [#uses=1] 12 %tmp50 = insertelement <16 x i8> %tmp48, i8 %x, i32 8 ; <<16 x i8>> [#uses=1] 13 %tmp52 = insertelement <16 x i8> %tmp50, i8 %x, i32 9 ; <<16 x i8>> [#uses=1] 14 %tmp54 = insertelement <16 x i8> %tmp52, i8 %x, i32 10 ; <<16 x i8>> [#uses=1] 15 %tmp56 = insertelement <16 x i8> %tmp54, i8 %x, i32 11 ; <<16 x i8>> [#uses=1] 16 %tmp58 = insertelement <16 x i8> %tmp56, i8 %x, i32 12 ; <<16 x i8>> [#uses=1] 17 %tmp60 = insertelement <16 x i8> %tmp58, i8 %x, i32 13 ; <<16 x i8>> [#uses=1] 18 %tmp62 = insertelement <16 x i8> %tmp60, i8 %x, i32 14 ; <<16 x i8>> [#uses=1] 19 %tmp64 = insertelement <16 x i8> %tmp62, i8 %x, i32 15 ; <<16 x i8>> [#uses=1] 20 %tmp68 = load <2 x i64>* %P ; <<2 x i64>> [#uses=1] 21 %tmp71 = bitcast <2 x i64> %tmp68 to <16 x i8> ; <<16 x i8>> [#uses=1] 22 %tmp73 = add <16 x i8> %tmp71, %tmp64 ; <<16 x i8>> [#uses=1] 23 %tmp73.upgrd.1 = bitcast <16 x i8> %tmp73 to <2 x i64> ; <<2 x i64>> [#uses=1] 24 store <2 x i64> %tmp73.upgrd.1, <2 x i64>* %P 25 ret void 26 27 ; CHECK-LABEL: test: 28 ; CHECK-NOT: pshufd 29 ; CHECK: punpcklbw 30 ; CHECK: punpcklbw 31 ; CHECK: pshufd $0 32 ; CHECK-NOT: pshufd 33 } 34