llvm.org GIT mirror llvm / 94fdc9b
[ARM] Implement interleaved access bug fix from r306334 r306334 fixed a bug in AArch64 dealing with wide interleaved accesses having pointer types. The bug also exists in ARM, so this patch copies over the fix. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@307409 91177308-0d34-0410-b5e6-96231b3b80d8 Matthew Simpson 3 years ago
2 changed file(s) with 32 addition(s) and 1 deletion(s). Raw diff Collapse all Expand all
1377813778
1377913779 // Convert the integer vector to pointer vector if the element is pointer.
1378013780 if (EltTy->isPointerTy())
13781 SubVec = Builder.CreateIntToPtr(SubVec, SV->getType());
13781 SubVec = Builder.CreateIntToPtr(
13782 SubVec, VectorType::get(SV->getType()->getVectorElementType(),
13783 VecTy->getVectorNumElements()));
1378213784
1378313785 SubVecs[SV].push_back(SubVec);
1378413786 }
853853 %v1 = shufflevector <4 x fp128> %interleaved.vec, <4 x fp128> undef, <2 x i32>
854854 ret void
855855 }
856
857 define void @load_factor2_wide_pointer(<16 x i32*>* %ptr) {
858 ; NEON-LABEL: @load_factor2_wide_pointer(
859 ; NEON-NEXT: [[TMP1:%.*]] = bitcast <16 x i32*>* %ptr to i32*
860 ; NEON-NEXT: [[TMP2:%.*]] = bitcast i32* [[TMP1]] to i8*
861 ; NEON-NEXT: [[VLDN:%.*]] = call { <4 x i32>, <4 x i32> } @llvm.arm.neon.vld2.v4i32.p0i8(i8* [[TMP2]], i32 4)
862 ; NEON-NEXT: [[TMP3:%.*]] = extractvalue { <4 x i32>, <4 x i32> } [[VLDN]], 1
863 ; NEON-NEXT: [[TMP4:%.*]] = inttoptr <4 x i32> [[TMP3]] to <4 x i32*>
864 ; NEON-NEXT: [[TMP5:%.*]] = extractvalue { <4 x i32>, <4 x i32> } [[VLDN]], 0
865 ; NEON-NEXT: [[TMP6:%.*]] = inttoptr <4 x i32> [[TMP5]] to <4 x i32*>
866 ; NEON-NEXT: [[TMP7:%.*]] = getelementptr i32, i32* [[TMP1]], i32 8
867 ; NEON-NEXT: [[TMP8:%.*]] = bitcast i32* [[TMP7]] to i8*
868 ; NEON-NEXT: [[VLDN1:%.*]] = call { <4 x i32>, <4 x i32> } @llvm.arm.neon.vld2.v4i32.p0i8(i8* [[TMP8]], i32 4)
869 ; NEON-NEXT: [[TMP9:%.*]] = extractvalue { <4 x i32>, <4 x i32> } [[VLDN1]], 1
870 ; NEON-NEXT: [[TMP10:%.*]] = inttoptr <4 x i32> [[TMP9]] to <4 x i32*>
871 ; NEON-NEXT: [[TMP11:%.*]] = extractvalue { <4 x i32>, <4 x i32> } [[VLDN1]], 0
872 ; NEON-NEXT: [[TMP12:%.*]] = inttoptr <4 x i32> [[TMP11]] to <4 x i32*>
873 ; NEON-NEXT: [[TMP13:%.*]] = shufflevector <4 x i32*> [[TMP4]], <4 x i32*> [[TMP10]], <8 x i32>
874 ; NEON-NEXT: [[TMP14:%.*]] = shufflevector <4 x i32*> [[TMP6]], <4 x i32*> [[TMP12]], <8 x i32>
875 ; NEON-NEXT: ret void
876 ; NO_NEON-LABEL: @load_factor2_wide_pointer(
877 ; NO_NEON-NOT: @llvm.arm.neon
878 ; NO_NEON: ret void
879 ;
880 %interleaved.vec = load <16 x i32*>, <16 x i32*>* %ptr, align 4
881 %v0 = shufflevector <16 x i32*> %interleaved.vec, <16 x i32*> undef, <8 x i32>
882 %v1 = shufflevector <16 x i32*> %interleaved.vec, <16 x i32*> undef, <8 x i32>
883 ret void
884 }