llvm.org GIT mirror llvm / 5a5be13
[ARM][MVE] Enable truncating masked stores Allow us to generate truncating masked store which take v4i32 and v8i16 vectors and can store to v4i8, v4i16 and v8i8 and memory. Removed support for unaligned masked stores. Differential Revision: https://reviews.llvm.org/D68461 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@375108 91177308-0d34-0410-b5e6-96231b3b80d8 Sam Parker 11 months ago
4 changed file(s) with 785 addition(s) and 86 deletion(s). Raw diff Collapse all Expand all
51175117 return (ScalarVT == MVT::i32 || ScalarVT == MVT::f32) && Ld->getAlignment() >= 4;
51185118 }]>;
51195119
5120 def alignedmaskedstore32 : PatFrag<(ops node:$val, node:$ptr, node:$pred),
5121 (masked_st node:$val, node:$ptr, node:$pred), [{
5122 return cast(N)->getAlignment() >= 4;
5120 def maskedstore8 : PatFrag<(ops node:$val, node:$ptr, node:$pred),
5121 (masked_st node:$val, node:$ptr, node:$pred), [{
5122 return cast(N)->getMemoryVT().getScalarType() == MVT::i8;
51235123 }]>;
5124 def alignedmaskedstore16 : PatFrag<(ops node:$val, node:$ptr, node:$pred),
5125 (masked_st node:$val, node:$ptr, node:$pred), [{
5126 return cast(N)->getAlignment() >= 2;
5124 def truncatingmaskedstore8 : PatFrag<(ops node:$val, node:$ptr, node:$pred),
5125 (maskedstore8 node:$val, node:$ptr, node:$pred), [{
5126 return cast(N)->isTruncatingStore();
51275127 }]>;
5128
5129 def maskedstore : PatFrag<(ops node:$val, node:$ptr, node:$pred),
5130 (masked_st node:$val, node:$ptr, node:$pred)>;
5128 def maskedstore16 : PatFrag<(ops node:$val, node:$ptr, node:$pred),
5129 (masked_st node:$val, node:$ptr, node:$pred), [{
5130 auto *St = cast(N);
5131 EVT ScalarVT = St->getMemoryVT().getScalarType();
5132 return (ScalarVT == MVT::i16 || ScalarVT == MVT::f16) && St->getAlignment() >= 2;
5133 }]>;
5134
5135 def truncatingmaskedstore16 : PatFrag<(ops node:$val, node:$ptr, node:$pred),
5136 (maskedstore16 node:$val, node:$ptr, node:$pred), [{
5137 return cast(N)->isTruncatingStore();
5138 }]>;
5139 def maskedstore32 : PatFrag<(ops node:$val, node:$ptr, node:$pred),
5140 (masked_st node:$val, node:$ptr, node:$pred), [{
5141 auto *St = cast(N);
5142 EVT ScalarVT = St->getMemoryVT().getScalarType();
5143 return (ScalarVT == MVT::i32 || ScalarVT == MVT::f32) && St->getAlignment() >= 4;
5144 }]>;
51315145
51325146 let Predicates = [HasMVEInt, IsLE] in {
51335147 // Stores
51475161 defm : MVE_vector_offset_store;
51485162 defm : MVE_vector_offset_store;
51495163 defm : MVE_vector_offset_store;
5150
5151 // Unaligned masked stores (aligned are below)
5152 def : Pat<(maskedstore (v4i32 MQPR:$val), t2addrmode_imm7<0>:$addr, VCCR:$pred),
5153 (MVE_VSTRBU8 MQPR:$val, t2addrmode_imm7<0>:$addr, (i32 1), VCCR:$pred)>;
5154 def : Pat<(maskedstore (v4f32 MQPR:$val), t2addrmode_imm7<0>:$addr, VCCR:$pred),
5155 (MVE_VSTRBU8 MQPR:$val, t2addrmode_imm7<0>:$addr, (i32 1), VCCR:$pred)>;
5156 def : Pat<(maskedstore (v8i16 MQPR:$val), t2addrmode_imm7<0>:$addr, VCCR:$pred),
5157 (MVE_VSTRBU8 MQPR:$val, t2addrmode_imm7<0>:$addr, (i32 1), VCCR:$pred)>;
5158 def : Pat<(maskedstore (v8f16 MQPR:$val), t2addrmode_imm7<0>:$addr, VCCR:$pred),
5159 (MVE_VSTRBU8 MQPR:$val, t2addrmode_imm7<0>:$addr, (i32 1), VCCR:$pred)>;
51605164 }
51615165
51625166 let Predicates = [HasMVEInt, IsBE] in {
52115215 def : MVE_vector_offset_store_typed;
52125216 def : MVE_vector_offset_store_typed;
52135217 def : MVE_vector_offset_store_typed;
5214
5215 // Unaligned masked stores (aligned are below)
5216 def : Pat<(maskedstore (v4i32 MQPR:$val), t2addrmode_imm7<0>:$addr, VCCR:$pred),
5217 (MVE_VSTRBU8 (MVE_VREV32_8 MQPR:$val), t2addrmode_imm7<0>:$addr, (i32 1), VCCR:$pred)>;
5218 def : Pat<(maskedstore (v4f32 MQPR:$val), t2addrmode_imm7<0>:$addr, VCCR:$pred),
5219 (MVE_VSTRBU8 (MVE_VREV32_8 MQPR:$val), t2addrmode_imm7<0>:$addr, (i32 1), VCCR:$pred)>;
5220 def : Pat<(maskedstore (v8i16 MQPR:$val), t2addrmode_imm7<0>:$addr, VCCR:$pred),
5221 (MVE_VSTRBU8 (MVE_VREV16_8 MQPR:$val), t2addrmode_imm7<0>:$addr, (i32 1), VCCR:$pred)>;
5222 def : Pat<(maskedstore (v8f16 MQPR:$val), t2addrmode_imm7<0>:$addr, VCCR:$pred),
5223 (MVE_VSTRBU8 (MVE_VREV16_8 MQPR:$val), t2addrmode_imm7<0>:$addr, (i32 1), VCCR:$pred)>;
52245218 }
52255219
52265220 let Predicates = [HasMVEInt] in {
52275221 // Aligned masked store, shared between LE and BE
5228 def : MVE_vector_maskedstore_typed;
5229 def : MVE_vector_maskedstore_typed;
5230 def : MVE_vector_maskedstore_typed;
5231 def : MVE_vector_maskedstore_typed;
5232 def : MVE_vector_maskedstore_typed4f32, MVE_VSTRWU32, alignedmaskedstore32, 2>;
5222 def : MVE_vector_maskedstore_typed16i8, MVE_VSTRBU8, maskedstore8, 0>;
5223 def : MVE_vector_maskedstore_typed;
5224 def : MVE_vector_maskedstore_typed;
5225 def : MVE_vector_maskedstore_typed;
5226 def : MVE_vector_maskedstore_typed;
5227 // Truncating stores
5228 def : Pat<(truncatingmaskedstore8 (v8i16 MQPR:$val), t2addrmode_imm7<0>:$addr, VCCR:$pred),
5229 (MVE_VSTRB16 MQPR:$val, t2addrmode_imm7<0>:$addr, (i32 1), VCCR:$pred)>;
5230 def : Pat<(truncatingmaskedstore8 (v4i32 MQPR:$val), t2addrmode_imm7<0>:$addr, VCCR:$pred),
5231 (MVE_VSTRB32 MQPR:$val, t2addrmode_imm7<0>:$addr, (i32 1), VCCR:$pred)>;
5232 def : Pat<(truncatingmaskedstore16 (v4i32 MQPR:$val), t2addrmode_imm7<1>:$addr, VCCR:$pred),
5233 (MVE_VSTRH32 MQPR:$val, t2addrmode_imm7<1>:$addr, (i32 1), VCCR:$pred)>;
52335234 // Aligned masked loads
52345235 def : MVE_vector_maskedload_typed;
52355236 def : MVE_vector_maskedload_typed;
153153 }
154154
155155 bool isLegalMaskedLoad(Type *DataTy, MaybeAlign Alignment);
156
156157 bool isLegalMaskedStore(Type *DataTy, MaybeAlign Alignment) {
157158 return isLegalMaskedLoad(DataTy, Alignment);
158159 }
782782 }
783783
784784 define void @foo_trunc_v8i8_v8i16(<8 x i8> *%dest, <8 x i16> *%mask, <8 x i16> *%src) {
785 ; CHECK-LE-LABEL: foo_trunc_v8i8_v8i16:
786 ; CHECK-LE: @ %bb.0: @ %entry
787 ; CHECK-LE-NEXT: vldrh.u16 q0, [r1]
788 ; CHECK-LE-NEXT: vptt.s16 gt, q0, zr
789 ; CHECK-LE-NEXT: vldrht.u16 q0, [r2]
790 ; CHECK-LE-NEXT: vstrbt.8 q0, [r0]
791 ; CHECK-LE-NEXT: bx lr
792 ;
793 ; CHECK-BE-LABEL: foo_trunc_v8i8_v8i16:
794 ; CHECK-BE: @ %bb.0: @ %entry
795 ; CHECK-BE-NEXT: vldrh.u16 q0, [r1]
796 ; CHECK-BE-NEXT: vpt.s16 gt, q0, zr
797 ; CHECK-BE-NEXT: vldrht.u16 q0, [r2]
798 ; CHECK-BE-NEXT: vrev16.8 q0, q0
799 ; CHECK-BE-NEXT: vpst
800 ; CHECK-BE-NEXT: vstrbt.8 q0, [r0]
801 ; CHECK-BE-NEXT: bx lr
785 ; CHECK-LABEL: foo_trunc_v8i8_v8i16:
786 ; CHECK: @ %bb.0: @ %entry
787 ; CHECK-NEXT: vldrh.u16 q0, [r1]
788 ; CHECK-NEXT: vptt.s16 gt, q0, zr
789 ; CHECK-NEXT: vldrht.u16 q0, [r2]
790 ; CHECK-NEXT: vstrbt.16 q0, [r0]
791 ; CHECK-NEXT: bx lr
802792 entry:
803793 %0 = load <8 x i16>, <8 x i16>* %mask, align 2
804794 %1 = icmp sgt <8 x i16> %0, zeroinitializer
809799 }
810800
811801 define void @foo_trunc_v4i8_v4i32(<4 x i8> *%dest, <4 x i32> *%mask, <4 x i32> *%src) {
812 ; CHECK-LE-LABEL: foo_trunc_v4i8_v4i32:
813 ; CHECK-LE: @ %bb.0: @ %entry
814 ; CHECK-LE-NEXT: vldrw.u32 q0, [r1]
815 ; CHECK-LE-NEXT: vptt.s32 gt, q0, zr
816 ; CHECK-LE-NEXT: vldrwt.u32 q0, [r2]
817 ; CHECK-LE-NEXT: vstrbt.8 q0, [r0]
818 ; CHECK-LE-NEXT: bx lr
819 ;
820 ; CHECK-BE-LABEL: foo_trunc_v4i8_v4i32:
821 ; CHECK-BE: @ %bb.0: @ %entry
822 ; CHECK-BE-NEXT: vldrw.u32 q0, [r1]
823 ; CHECK-BE-NEXT: vpt.s32 gt, q0, zr
824 ; CHECK-BE-NEXT: vldrwt.u32 q0, [r2]
825 ; CHECK-BE-NEXT: vrev32.8 q0, q0
826 ; CHECK-BE-NEXT: vpst
827 ; CHECK-BE-NEXT: vstrbt.8 q0, [r0]
828 ; CHECK-BE-NEXT: bx lr
802 ; CHECK-LABEL: foo_trunc_v4i8_v4i32:
803 ; CHECK: @ %bb.0: @ %entry
804 ; CHECK-NEXT: vldrw.u32 q0, [r1]
805 ; CHECK-NEXT: vptt.s32 gt, q0, zr
806 ; CHECK-NEXT: vldrwt.u32 q0, [r2]
807 ; CHECK-NEXT: vstrbt.32 q0, [r0]
808 ; CHECK-NEXT: bx lr
829809 entry:
830810 %0 = load <4 x i32>, <4 x i32>* %mask, align 4
831811 %1 = icmp sgt <4 x i32> %0, zeroinitializer
836816 }
837817
838818 define void @foo_trunc_v4i16_v4i32(<4 x i16> *%dest, <4 x i32> *%mask, <4 x i32> *%src) {
839 ; CHECK-LE-LABEL: foo_trunc_v4i16_v4i32:
840 ; CHECK-LE: @ %bb.0: @ %entry
841 ; CHECK-LE-NEXT: vldrw.u32 q0, [r1]
842 ; CHECK-LE-NEXT: vptt.s32 gt, q0, zr
843 ; CHECK-LE-NEXT: vldrwt.u32 q0, [r2]
844 ; CHECK-LE-NEXT: vstrbt.8 q0, [r0]
845 ; CHECK-LE-NEXT: bx lr
846 ;
847 ; CHECK-BE-LABEL: foo_trunc_v4i16_v4i32:
848 ; CHECK-BE: @ %bb.0: @ %entry
849 ; CHECK-BE-NEXT: vldrw.u32 q0, [r1]
850 ; CHECK-BE-NEXT: vpt.s32 gt, q0, zr
851 ; CHECK-BE-NEXT: vldrwt.u32 q0, [r2]
852 ; CHECK-BE-NEXT: vrev32.8 q0, q0
853 ; CHECK-BE-NEXT: vpst
854 ; CHECK-BE-NEXT: vstrbt.8 q0, [r0]
855 ; CHECK-BE-NEXT: bx lr
819 ; CHECK-LABEL: foo_trunc_v4i16_v4i32:
820 ; CHECK: @ %bb.0: @ %entry
821 ; CHECK-NEXT: vldrw.u32 q0, [r1]
822 ; CHECK-NEXT: vptt.s32 gt, q0, zr
823 ; CHECK-NEXT: vldrwt.u32 q0, [r2]
824 ; CHECK-NEXT: vstrht.32 q0, [r0]
825 ; CHECK-NEXT: bx lr
856826 entry:
857827 %0 = load <4 x i32>, <4 x i32>* %mask, align 4
858828 %1 = icmp sgt <4 x i32> %0, zeroinitializer
11011101 ret void
11021102 }
11031103
1104
1104 define arm_aapcs_vfpcc void @masked_v4i16(<4 x i16> *%dest, <4 x i32> %a) {
1105 ; CHECK-LE-LABEL: masked_v4i16:
1106 ; CHECK-LE: @ %bb.0: @ %entry
1107 ; CHECK-LE-NEXT: vpt.s32 gt, q0, zr
1108 ; CHECK-LE-NEXT: vstrht.32 q0, [r0]
1109 ; CHECK-LE-NEXT: bx lr
1110 ;
1111 ; CHECK-BE-LABEL: masked_v4i16:
1112 ; CHECK-BE: @ %bb.0: @ %entry
1113 ; CHECK-BE-NEXT: vrev64.32 q1, q0
1114 ; CHECK-BE-NEXT: vpt.s32 gt, q1, zr
1115 ; CHECK-BE-NEXT: vstrht.32 q1, [r0]
1116 ; CHECK-BE-NEXT: bx lr
1117 entry:
1118 %c = icmp sgt <4 x i32> %a, zeroinitializer
1119 %trunc = trunc <4 x i32> %a to <4 x i16>
1120 call void @llvm.masked.store.v4i16.p0v4i16(<4 x i16> %trunc, <4 x i16>* %dest, i32 2, <4 x i1> %c)
1121 ret void
1122 }
1123
1124 define arm_aapcs_vfpcc void @masked_v4i8(<4 x i8> *%dest, <4 x i32> %a) {
1125 ; CHECK-LE-LABEL: masked_v4i8:
1126 ; CHECK-LE: @ %bb.0: @ %entry
1127 ; CHECK-LE-NEXT: vpt.s32 gt, q0, zr
1128 ; CHECK-LE-NEXT: vstrbt.32 q0, [r0]
1129 ; CHECK-LE-NEXT: bx lr
1130 ;
1131 ; CHECK-BE-LABEL: masked_v4i8:
1132 ; CHECK-BE: @ %bb.0: @ %entry
1133 ; CHECK-BE-NEXT: vrev64.32 q1, q0
1134 ; CHECK-BE-NEXT: vpt.s32 gt, q1, zr
1135 ; CHECK-BE-NEXT: vstrbt.32 q1, [r0]
1136 ; CHECK-BE-NEXT: bx lr
1137 entry:
1138 %c = icmp sgt <4 x i32> %a, zeroinitializer
1139 %trunc = trunc <4 x i32> %a to <4 x i8>
1140 call void @llvm.masked.store.v4i8.p0v4i8(<4 x i8> %trunc, <4 x i8>* %dest, i32 1, <4 x i1> %c)
1141 ret void
1142 }
1143
1144 define arm_aapcs_vfpcc void @masked_v8i8(<8 x i8> *%dest, <8 x i16> %a) {
1145 ; CHECK-LE-LABEL: masked_v8i8:
1146 ; CHECK-LE: @ %bb.0: @ %entry
1147 ; CHECK-LE-NEXT: vpt.s16 gt, q0, zr
1148 ; CHECK-LE-NEXT: vstrbt.16 q0, [r0]
1149 ; CHECK-LE-NEXT: bx lr
1150 ;
1151 ; CHECK-BE-LABEL: masked_v8i8:
1152 ; CHECK-BE: @ %bb.0: @ %entry
1153 ; CHECK-BE-NEXT: vrev64.16 q1, q0
1154 ; CHECK-BE-NEXT: vpt.s16 gt, q1, zr
1155 ; CHECK-BE-NEXT: vstrbt.16 q1, [r0]
1156 ; CHECK-BE-NEXT: bx lr
1157 entry:
1158 %c = icmp sgt <8 x i16> %a, zeroinitializer
1159 %trunc = trunc <8 x i16> %a to <8 x i8>
1160 call void @llvm.masked.store.v8i8.p0v8i8(<8 x i8> %trunc, <8 x i8>* %dest, i32 1, <8 x i1> %c)
1161 ret void
1162 }
1163
1164 define arm_aapcs_vfpcc void @masked_v4i16_align1(<4 x i16> *%dest, <4 x i32> %a) {
1165 ; CHECK-LE-LABEL: masked_v4i16_align1:
1166 ; CHECK-LE: @ %bb.0: @ %entry
1167 ; CHECK-LE-NEXT: .pad #4
1168 ; CHECK-LE-NEXT: sub sp, #4
1169 ; CHECK-LE-NEXT: vcmp.s32 gt, q0, zr
1170 ; CHECK-LE-NEXT: vmrs r2, p0
1171 ; CHECK-LE-NEXT: and r1, r2, #1
1172 ; CHECK-LE-NEXT: rsbs r3, r1, #0
1173 ; CHECK-LE-NEXT: movs r1, #0
1174 ; CHECK-LE-NEXT: bfi r1, r3, #0, #1
1175 ; CHECK-LE-NEXT: ubfx r3, r2, #4, #1
1176 ; CHECK-LE-NEXT: rsbs r3, r3, #0
1177 ; CHECK-LE-NEXT: bfi r1, r3, #1, #1
1178 ; CHECK-LE-NEXT: ubfx r3, r2, #8, #1
1179 ; CHECK-LE-NEXT: ubfx r2, r2, #12, #1
1180 ; CHECK-LE-NEXT: rsbs r3, r3, #0
1181 ; CHECK-LE-NEXT: bfi r1, r3, #2, #1
1182 ; CHECK-LE-NEXT: rsbs r2, r2, #0
1183 ; CHECK-LE-NEXT: bfi r1, r2, #3, #1
1184 ; CHECK-LE-NEXT: lsls r2, r1, #31
1185 ; CHECK-LE-NEXT: itt ne
1186 ; CHECK-LE-NEXT: vmovne r2, s0
1187 ; CHECK-LE-NEXT: strhne r2, [r0]
1188 ; CHECK-LE-NEXT: lsls r2, r1, #30
1189 ; CHECK-LE-NEXT: itt mi
1190 ; CHECK-LE-NEXT: vmovmi r2, s1
1191 ; CHECK-LE-NEXT: strhmi r2, [r0, #2]
1192 ; CHECK-LE-NEXT: lsls r2, r1, #29
1193 ; CHECK-LE-NEXT: itt mi
1194 ; CHECK-LE-NEXT: vmovmi r2, s2
1195 ; CHECK-LE-NEXT: strhmi r2, [r0, #4]
1196 ; CHECK-LE-NEXT: lsls r1, r1, #28
1197 ; CHECK-LE-NEXT: itt mi
1198 ; CHECK-LE-NEXT: vmovmi r1, s3
1199 ; CHECK-LE-NEXT: strhmi r1, [r0, #6]
1200 ; CHECK-LE-NEXT: add sp, #4
1201 ; CHECK-LE-NEXT: bx lr
1202 ;
1203 ; CHECK-BE-LABEL: masked_v4i16_align1:
1204 ; CHECK-BE: @ %bb.0: @ %entry
1205 ; CHECK-BE-NEXT: .pad #4
1206 ; CHECK-BE-NEXT: sub sp, #4
1207 ; CHECK-BE-NEXT: vrev64.32 q1, q0
1208 ; CHECK-BE-NEXT: vcmp.s32 gt, q1, zr
1209 ; CHECK-BE-NEXT: vmrs r2, p0
1210 ; CHECK-BE-NEXT: and r1, r2, #1
1211 ; CHECK-BE-NEXT: rsbs r3, r1, #0
1212 ; CHECK-BE-NEXT: movs r1, #0
1213 ; CHECK-BE-NEXT: bfi r1, r3, #0, #1
1214 ; CHECK-BE-NEXT: ubfx r3, r2, #4, #1
1215 ; CHECK-BE-NEXT: rsbs r3, r3, #0
1216 ; CHECK-BE-NEXT: bfi r1, r3, #1, #1
1217 ; CHECK-BE-NEXT: ubfx r3, r2, #8, #1
1218 ; CHECK-BE-NEXT: ubfx r2, r2, #12, #1
1219 ; CHECK-BE-NEXT: rsbs r3, r3, #0
1220 ; CHECK-BE-NEXT: bfi r1, r3, #2, #1
1221 ; CHECK-BE-NEXT: rsbs r2, r2, #0
1222 ; CHECK-BE-NEXT: bfi r1, r2, #3, #1
1223 ; CHECK-BE-NEXT: lsls r2, r1, #31
1224 ; CHECK-BE-NEXT: itt ne
1225 ; CHECK-BE-NEXT: vmovne r2, s4
1226 ; CHECK-BE-NEXT: strhne r2, [r0]
1227 ; CHECK-BE-NEXT: lsls r2, r1, #30
1228 ; CHECK-BE-NEXT: itt mi
1229 ; CHECK-BE-NEXT: vmovmi r2, s5
1230 ; CHECK-BE-NEXT: strhmi r2, [r0, #2]
1231 ; CHECK-BE-NEXT: lsls r2, r1, #29
1232 ; CHECK-BE-NEXT: itt mi
1233 ; CHECK-BE-NEXT: vmovmi r2, s6
1234 ; CHECK-BE-NEXT: strhmi r2, [r0, #4]
1235 ; CHECK-BE-NEXT: lsls r1, r1, #28
1236 ; CHECK-BE-NEXT: itt mi
1237 ; CHECK-BE-NEXT: vmovmi r1, s7
1238 ; CHECK-BE-NEXT: strhmi r1, [r0, #6]
1239 ; CHECK-BE-NEXT: add sp, #4
1240 ; CHECK-BE-NEXT: bx lr
1241 entry:
1242 %c = icmp sgt <4 x i32> %a, zeroinitializer
1243 %trunc = trunc <4 x i32> %a to <4 x i16>
1244 call void @llvm.masked.store.v4i16.p0v4i16(<4 x i16> %trunc, <4 x i16>* %dest, i32 1, <4 x i1> %c)
1245 ret void
1246 }
1247
1248 define arm_aapcs_vfpcc void @masked_v4f16_align4(<4 x half> *%dest, <4 x float> %a) {
1249 ; CHECK-LE-LABEL: masked_v4f16_align4:
1250 ; CHECK-LE: @ %bb.0: @ %entry
1251 ; CHECK-LE-NEXT: .pad #4
1252 ; CHECK-LE-NEXT: sub sp, #4
1253 ; CHECK-LE-NEXT: vcvtb.f16.f32 s4, s0
1254 ; CHECK-LE-NEXT: vcmp.f32 s0, #0
1255 ; CHECK-LE-NEXT: vmov r1, s4
1256 ; CHECK-LE-NEXT: vcvtb.f16.f32 s4, s1
1257 ; CHECK-LE-NEXT: vmov r2, s4
1258 ; CHECK-LE-NEXT: vmov.16 q1[0], r1
1259 ; CHECK-LE-NEXT: vcvtb.f16.f32 s8, s2
1260 ; CHECK-LE-NEXT: vmov.16 q1[1], r2
1261 ; CHECK-LE-NEXT: vmov r1, s8
1262 ; CHECK-LE-NEXT: vcvtb.f16.f32 s8, s3
1263 ; CHECK-LE-NEXT: vmov.16 q1[2], r1
1264 ; CHECK-LE-NEXT: vmov r1, s8
1265 ; CHECK-LE-NEXT: vmov.16 q1[3], r1
1266 ; CHECK-LE-NEXT: vmrs APSR_nzcv, fpscr
1267 ; CHECK-LE-NEXT: mov.w r1, #0
1268 ; CHECK-LE-NEXT: vcmp.f32 s1, #0
1269 ; CHECK-LE-NEXT: it gt
1270 ; CHECK-LE-NEXT: movgt r1, #1
1271 ; CHECK-LE-NEXT: cmp r1, #0
1272 ; CHECK-LE-NEXT: cset r1, ne
1273 ; CHECK-LE-NEXT: vmrs APSR_nzcv, fpscr
1274 ; CHECK-LE-NEXT: and r1, r1, #1
1275 ; CHECK-LE-NEXT: vcmp.f32 s2, #0
1276 ; CHECK-LE-NEXT: rsb.w r3, r1, #0
1277 ; CHECK-LE-NEXT: mov.w r1, #0
1278 ; CHECK-LE-NEXT: bfi r1, r3, #0, #1
1279 ; CHECK-LE-NEXT: mov.w r3, #0
1280 ; CHECK-LE-NEXT: it gt
1281 ; CHECK-LE-NEXT: movgt r3, #1
1282 ; CHECK-LE-NEXT: cmp r3, #0
1283 ; CHECK-LE-NEXT: cset r3, ne
1284 ; CHECK-LE-NEXT: vmrs APSR_nzcv, fpscr
1285 ; CHECK-LE-NEXT: and r3, r3, #1
1286 ; CHECK-LE-NEXT: vcmp.f32 s3, #0
1287 ; CHECK-LE-NEXT: rsb.w r3, r3, #0
1288 ; CHECK-LE-NEXT: mov.w r2, #0
1289 ; CHECK-LE-NEXT: bfi r1, r3, #1, #1
1290 ; CHECK-LE-NEXT: mov.w r3, #0
1291 ; CHECK-LE-NEXT: it gt
1292 ; CHECK-LE-NEXT: movgt r3, #1
1293 ; CHECK-LE-NEXT: cmp r3, #0
1294 ; CHECK-LE-NEXT: cset r3, ne
1295 ; CHECK-LE-NEXT: vmrs APSR_nzcv, fpscr
1296 ; CHECK-LE-NEXT: it gt
1297 ; CHECK-LE-NEXT: movgt r2, #1
1298 ; CHECK-LE-NEXT: cmp r2, #0
1299 ; CHECK-LE-NEXT: and r3, r3, #1
1300 ; CHECK-LE-NEXT: cset r2, ne
1301 ; CHECK-LE-NEXT: and r2, r2, #1
1302 ; CHECK-LE-NEXT: rsbs r3, r3, #0
1303 ; CHECK-LE-NEXT: bfi r1, r3, #2, #1
1304 ; CHECK-LE-NEXT: rsbs r2, r2, #0
1305 ; CHECK-LE-NEXT: bfi r1, r2, #3, #1
1306 ; CHECK-LE-NEXT: lsls r2, r1, #31
1307 ; CHECK-LE-NEXT: bne .LBB25_5
1308 ; CHECK-LE-NEXT: @ %bb.1: @ %else
1309 ; CHECK-LE-NEXT: lsls r2, r1, #30
1310 ; CHECK-LE-NEXT: bmi .LBB25_6
1311 ; CHECK-LE-NEXT: .LBB25_2: @ %else2
1312 ; CHECK-LE-NEXT: lsls r2, r1, #29
1313 ; CHECK-LE-NEXT: bmi .LBB25_7
1314 ; CHECK-LE-NEXT: .LBB25_3: @ %else4
1315 ; CHECK-LE-NEXT: lsls r1, r1, #28
1316 ; CHECK-LE-NEXT: bmi .LBB25_8
1317 ; CHECK-LE-NEXT: .LBB25_4: @ %else6
1318 ; CHECK-LE-NEXT: add sp, #4
1319 ; CHECK-LE-NEXT: bx lr
1320 ; CHECK-LE-NEXT: .LBB25_5: @ %cond.store
1321 ; CHECK-LE-NEXT: vstr.16 s4, [r0]
1322 ; CHECK-LE-NEXT: lsls r2, r1, #30
1323 ; CHECK-LE-NEXT: bpl .LBB25_2
1324 ; CHECK-LE-NEXT: .LBB25_6: @ %cond.store1
1325 ; CHECK-LE-NEXT: vmovx.f16 s0, s4
1326 ; CHECK-LE-NEXT: vstr.16 s0, [r0, #2]
1327 ; CHECK-LE-NEXT: lsls r2, r1, #29
1328 ; CHECK-LE-NEXT: bpl .LBB25_3
1329 ; CHECK-LE-NEXT: .LBB25_7: @ %cond.store3
1330 ; CHECK-LE-NEXT: vstr.16 s5, [r0, #4]
1331 ; CHECK-LE-NEXT: lsls r1, r1, #28
1332 ; CHECK-LE-NEXT: bpl .LBB25_4
1333 ; CHECK-LE-NEXT: .LBB25_8: @ %cond.store5
1334 ; CHECK-LE-NEXT: vmovx.f16 s0, s5
1335 ; CHECK-LE-NEXT: vstr.16 s0, [r0, #6]
1336 ; CHECK-LE-NEXT: add sp, #4
1337 ; CHECK-LE-NEXT: bx lr
1338 ;
1339 ; CHECK-BE-LABEL: masked_v4f16_align4:
1340 ; CHECK-BE: @ %bb.0: @ %entry
1341 ; CHECK-BE-NEXT: .pad #4
1342 ; CHECK-BE-NEXT: sub sp, #4
1343 ; CHECK-BE-NEXT: vrev64.32 q1, q0
1344 ; CHECK-BE-NEXT: vcvtb.f16.f32 s0, s4
1345 ; CHECK-BE-NEXT: vcmp.f32 s4, #0
1346 ; CHECK-BE-NEXT: vmov r1, s0
1347 ; CHECK-BE-NEXT: vcvtb.f16.f32 s0, s5
1348 ; CHECK-BE-NEXT: vmov r2, s0
1349 ; CHECK-BE-NEXT: vmov.16 q0[0], r1
1350 ; CHECK-BE-NEXT: vcvtb.f16.f32 s8, s6
1351 ; CHECK-BE-NEXT: vmov.16 q0[1], r2
1352 ; CHECK-BE-NEXT: vmov r1, s8
1353 ; CHECK-BE-NEXT: vcvtb.f16.f32 s8, s7
1354 ; CHECK-BE-NEXT: vmov.16 q0[2], r1
1355 ; CHECK-BE-NEXT: vmov r1, s8
1356 ; CHECK-BE-NEXT: vmov.16 q0[3], r1
1357 ; CHECK-BE-NEXT: vmrs APSR_nzcv, fpscr
1358 ; CHECK-BE-NEXT: mov.w r1, #0
1359 ; CHECK-BE-NEXT: vcmp.f32 s5, #0
1360 ; CHECK-BE-NEXT: it gt
1361 ; CHECK-BE-NEXT: movgt r1, #1
1362 ; CHECK-BE-NEXT: cmp r1, #0
1363 ; CHECK-BE-NEXT: cset r1, ne
1364 ; CHECK-BE-NEXT: vmrs APSR_nzcv, fpscr
1365 ; CHECK-BE-NEXT: and r1, r1, #1
1366 ; CHECK-BE-NEXT: vcmp.f32 s6, #0
1367 ; CHECK-BE-NEXT: rsb.w r3, r1, #0
1368 ; CHECK-BE-NEXT: mov.w r1, #0
1369 ; CHECK-BE-NEXT: bfi r1, r3, #0, #1
1370 ; CHECK-BE-NEXT: mov.w r3, #0
1371 ; CHECK-BE-NEXT: it gt
1372 ; CHECK-BE-NEXT: movgt r3, #1
1373 ; CHECK-BE-NEXT: cmp r3, #0
1374 ; CHECK-BE-NEXT: cset r3, ne
1375 ; CHECK-BE-NEXT: vmrs APSR_nzcv, fpscr
1376 ; CHECK-BE-NEXT: and r3, r3, #1
1377 ; CHECK-BE-NEXT: vcmp.f32 s7, #0
1378 ; CHECK-BE-NEXT: rsb.w r3, r3, #0
1379 ; CHECK-BE-NEXT: mov.w r2, #0
1380 ; CHECK-BE-NEXT: bfi r1, r3, #1, #1
1381 ; CHECK-BE-NEXT: mov.w r3, #0
1382 ; CHECK-BE-NEXT: it gt
1383 ; CHECK-BE-NEXT: movgt r3, #1
1384 ; CHECK-BE-NEXT: cmp r3, #0
1385 ; CHECK-BE-NEXT: cset r3, ne
1386 ; CHECK-BE-NEXT: vmrs APSR_nzcv, fpscr
1387 ; CHECK-BE-NEXT: it gt
1388 ; CHECK-BE-NEXT: movgt r2, #1
1389 ; CHECK-BE-NEXT: cmp r2, #0
1390 ; CHECK-BE-NEXT: and r3, r3, #1
1391 ; CHECK-BE-NEXT: cset r2, ne
1392 ; CHECK-BE-NEXT: and r2, r2, #1
1393 ; CHECK-BE-NEXT: rsbs r3, r3, #0
1394 ; CHECK-BE-NEXT: bfi r1, r3, #2, #1
1395 ; CHECK-BE-NEXT: rsbs r2, r2, #0
1396 ; CHECK-BE-NEXT: bfi r1, r2, #3, #1
1397 ; CHECK-BE-NEXT: lsls r2, r1, #31
1398 ; CHECK-BE-NEXT: bne .LBB25_5
1399 ; CHECK-BE-NEXT: @ %bb.1: @ %else
1400 ; CHECK-BE-NEXT: lsls r2, r1, #30
1401 ; CHECK-BE-NEXT: bmi .LBB25_6
1402 ; CHECK-BE-NEXT: .LBB25_2: @ %else2
1403 ; CHECK-BE-NEXT: lsls r2, r1, #29
1404 ; CHECK-BE-NEXT: bmi .LBB25_7
1405 ; CHECK-BE-NEXT: .LBB25_3: @ %else4
1406 ; CHECK-BE-NEXT: lsls r1, r1, #28
1407 ; CHECK-BE-NEXT: bmi .LBB25_8
1408 ; CHECK-BE-NEXT: .LBB25_4: @ %else6
1409 ; CHECK-BE-NEXT: add sp, #4
1410 ; CHECK-BE-NEXT: bx lr
1411 ; CHECK-BE-NEXT: .LBB25_5: @ %cond.store
1412 ; CHECK-BE-NEXT: vstr.16 s0, [r0]
1413 ; CHECK-BE-NEXT: lsls r2, r1, #30
1414 ; CHECK-BE-NEXT: bpl .LBB25_2
1415 ; CHECK-BE-NEXT: .LBB25_6: @ %cond.store1
1416 ; CHECK-BE-NEXT: vmovx.f16 s4, s0
1417 ; CHECK-BE-NEXT: vstr.16 s4, [r0, #2]
1418 ; CHECK-BE-NEXT: lsls r2, r1, #29
1419 ; CHECK-BE-NEXT: bpl .LBB25_3
1420 ; CHECK-BE-NEXT: .LBB25_7: @ %cond.store3
1421 ; CHECK-BE-NEXT: vstr.16 s1, [r0, #4]
1422 ; CHECK-BE-NEXT: lsls r1, r1, #28
1423 ; CHECK-BE-NEXT: bpl .LBB25_4
1424 ; CHECK-BE-NEXT: .LBB25_8: @ %cond.store5
1425 ; CHECK-BE-NEXT: vmovx.f16 s0, s1
1426 ; CHECK-BE-NEXT: vstr.16 s0, [r0, #6]
1427 ; CHECK-BE-NEXT: add sp, #4
1428 ; CHECK-BE-NEXT: bx lr
1429 entry:
1430 %c = fcmp ogt <4 x float> %a, zeroinitializer
1431 %trunc = fptrunc <4 x float> %a to <4 x half>
1432 call void @llvm.masked.store.v4f16.p0v4f16(<4 x half> %trunc, <4 x half>* %dest, i32 4, <4 x i1> %c)
1433 ret void
1434 }
1435
1436 define arm_aapcs_vfpcc void @masked_v4f16_align2(<4 x half> *%dest, <4 x float> %a) {
1437 ; CHECK-LE-LABEL: masked_v4f16_align2:
1438 ; CHECK-LE: @ %bb.0: @ %entry
1439 ; CHECK-LE-NEXT: .pad #4
1440 ; CHECK-LE-NEXT: sub sp, #4
1441 ; CHECK-LE-NEXT: vcvtb.f16.f32 s4, s0
1442 ; CHECK-LE-NEXT: vcmp.f32 s0, #0
1443 ; CHECK-LE-NEXT: vmov r1, s4
1444 ; CHECK-LE-NEXT: vcvtb.f16.f32 s4, s1
1445 ; CHECK-LE-NEXT: vmov r2, s4
1446 ; CHECK-LE-NEXT: vmov.16 q1[0], r1
1447 ; CHECK-LE-NEXT: vcvtb.f16.f32 s8, s2
1448 ; CHECK-LE-NEXT: vmov.16 q1[1], r2
1449 ; CHECK-LE-NEXT: vmov r1, s8
1450 ; CHECK-LE-NEXT: vcvtb.f16.f32 s8, s3
1451 ; CHECK-LE-NEXT: vmov.16 q1[2], r1
1452 ; CHECK-LE-NEXT: vmov r1, s8
1453 ; CHECK-LE-NEXT: vmov.16 q1[3], r1
1454 ; CHECK-LE-NEXT: vmrs APSR_nzcv, fpscr
1455 ; CHECK-LE-NEXT: mov.w r1, #0
1456 ; CHECK-LE-NEXT: vcmp.f32 s1, #0
1457 ; CHECK-LE-NEXT: it gt
1458 ; CHECK-LE-NEXT: movgt r1, #1
1459 ; CHECK-LE-NEXT: cmp r1, #0
1460 ; CHECK-LE-NEXT: cset r1, ne
1461 ; CHECK-LE-NEXT: vmrs APSR_nzcv, fpscr
1462 ; CHECK-LE-NEXT: and r1, r1, #1
1463 ; CHECK-LE-NEXT: vcmp.f32 s2, #0
1464 ; CHECK-LE-NEXT: rsb.w r3, r1, #0
1465 ; CHECK-LE-NEXT: mov.w r1, #0
1466 ; CHECK-LE-NEXT: bfi r1, r3, #0, #1
1467 ; CHECK-LE-NEXT: mov.w r3, #0
1468 ; CHECK-LE-NEXT: it gt
1469 ; CHECK-LE-NEXT: movgt r3, #1
1470 ; CHECK-LE-NEXT: cmp r3, #0
1471 ; CHECK-LE-NEXT: cset r3, ne
1472 ; CHECK-LE-NEXT: vmrs APSR_nzcv, fpscr
1473 ; CHECK-LE-NEXT: and r3, r3, #1
1474 ; CHECK-LE-NEXT: vcmp.f32 s3, #0
1475 ; CHECK-LE-NEXT: rsb.w r3, r3, #0
1476 ; CHECK-LE-NEXT: mov.w r2, #0
1477 ; CHECK-LE-NEXT: bfi r1, r3, #1, #1
1478 ; CHECK-LE-NEXT: mov.w r3, #0
1479 ; CHECK-LE-NEXT: it gt
1480 ; CHECK-LE-NEXT: movgt r3, #1
1481 ; CHECK-LE-NEXT: cmp r3, #0
1482 ; CHECK-LE-NEXT: cset r3, ne
1483 ; CHECK-LE-NEXT: vmrs APSR_nzcv, fpscr
1484 ; CHECK-LE-NEXT: it gt
1485 ; CHECK-LE-NEXT: movgt r2, #1
1486 ; CHECK-LE-NEXT: cmp r2, #0
1487 ; CHECK-LE-NEXT: and r3, r3, #1
1488 ; CHECK-LE-NEXT: cset r2, ne
1489 ; CHECK-LE-NEXT: and r2, r2, #1
1490 ; CHECK-LE-NEXT: rsbs r3, r3, #0
1491 ; CHECK-LE-NEXT: bfi r1, r3, #2, #1
1492 ; CHECK-LE-NEXT: rsbs r2, r2, #0
1493 ; CHECK-LE-NEXT: bfi r1, r2, #3, #1
1494 ; CHECK-LE-NEXT: lsls r2, r1, #31
1495 ; CHECK-LE-NEXT: bne .LBB26_5
1496 ; CHECK-LE-NEXT: @ %bb.1: @ %else
1497 ; CHECK-LE-NEXT: lsls r2, r1, #30
1498 ; CHECK-LE-NEXT: bmi .LBB26_6
1499 ; CHECK-LE-NEXT: .LBB26_2: @ %else2
1500 ; CHECK-LE-NEXT: lsls r2, r1, #29
1501 ; CHECK-LE-NEXT: bmi .LBB26_7
1502 ; CHECK-LE-NEXT: .LBB26_3: @ %else4
1503 ; CHECK-LE-NEXT: lsls r1, r1, #28
1504 ; CHECK-LE-NEXT: bmi .LBB26_8
1505 ; CHECK-LE-NEXT: .LBB26_4: @ %else6
1506 ; CHECK-LE-NEXT: add sp, #4
1507 ; CHECK-LE-NEXT: bx lr
1508 ; CHECK-LE-NEXT: .LBB26_5: @ %cond.store
1509 ; CHECK-LE-NEXT: vstr.16 s4, [r0]
1510 ; CHECK-LE-NEXT: lsls r2, r1, #30
1511 ; CHECK-LE-NEXT: bpl .LBB26_2
1512 ; CHECK-LE-NEXT: .LBB26_6: @ %cond.store1
1513 ; CHECK-LE-NEXT: vmovx.f16 s0, s4
1514 ; CHECK-LE-NEXT: vstr.16 s0, [r0, #2]
1515 ; CHECK-LE-NEXT: lsls r2, r1, #29
1516 ; CHECK-LE-NEXT: bpl .LBB26_3
1517 ; CHECK-LE-NEXT: .LBB26_7: @ %cond.store3
1518 ; CHECK-LE-NEXT: vstr.16 s5, [r0, #4]
1519 ; CHECK-LE-NEXT: lsls r1, r1, #28
1520 ; CHECK-LE-NEXT: bpl .LBB26_4
1521 ; CHECK-LE-NEXT: .LBB26_8: @ %cond.store5
1522 ; CHECK-LE-NEXT: vmovx.f16 s0, s5
1523 ; CHECK-LE-NEXT: vstr.16 s0, [r0, #6]
1524 ; CHECK-LE-NEXT: add sp, #4
1525 ; CHECK-LE-NEXT: bx lr
1526 ;
1527 ; CHECK-BE-LABEL: masked_v4f16_align2:
1528 ; CHECK-BE: @ %bb.0: @ %entry
1529 ; CHECK-BE-NEXT: .pad #4
1530 ; CHECK-BE-NEXT: sub sp, #4
1531 ; CHECK-BE-NEXT: vrev64.32 q1, q0
1532 ; CHECK-BE-NEXT: vcvtb.f16.f32 s0, s4
1533 ; CHECK-BE-NEXT: vcmp.f32 s4, #0
1534 ; CHECK-BE-NEXT: vmov r1, s0
1535 ; CHECK-BE-NEXT: vcvtb.f16.f32 s0, s5
1536 ; CHECK-BE-NEXT: vmov r2, s0
1537 ; CHECK-BE-NEXT: vmov.16 q0[0], r1
1538 ; CHECK-BE-NEXT: vcvtb.f16.f32 s8, s6
1539 ; CHECK-BE-NEXT: vmov.16 q0[1], r2
1540 ; CHECK-BE-NEXT: vmov r1, s8
1541 ; CHECK-BE-NEXT: vcvtb.f16.f32 s8, s7
1542 ; CHECK-BE-NEXT: vmov.16 q0[2], r1
1543 ; CHECK-BE-NEXT: vmov r1, s8
1544 ; CHECK-BE-NEXT: vmov.16 q0[3], r1
1545 ; CHECK-BE-NEXT: vmrs APSR_nzcv, fpscr
1546 ; CHECK-BE-NEXT: mov.w r1, #0
1547 ; CHECK-BE-NEXT: vcmp.f32 s5, #0
1548 ; CHECK-BE-NEXT: it gt
1549 ; CHECK-BE-NEXT: movgt r1, #1
1550 ; CHECK-BE-NEXT: cmp r1, #0
1551 ; CHECK-BE-NEXT: cset r1, ne
1552 ; CHECK-BE-NEXT: vmrs APSR_nzcv, fpscr
1553 ; CHECK-BE-NEXT: and r1, r1, #1
1554 ; CHECK-BE-NEXT: vcmp.f32 s6, #0
1555 ; CHECK-BE-NEXT: rsb.w r3, r1, #0
1556 ; CHECK-BE-NEXT: mov.w r1, #0
1557 ; CHECK-BE-NEXT: bfi r1, r3, #0, #1
1558 ; CHECK-BE-NEXT: mov.w r3, #0
1559 ; CHECK-BE-NEXT: it gt
1560 ; CHECK-BE-NEXT: movgt r3, #1
1561 ; CHECK-BE-NEXT: cmp r3, #0
1562 ; CHECK-BE-NEXT: cset r3, ne
1563 ; CHECK-BE-NEXT: vmrs APSR_nzcv, fpscr
1564 ; CHECK-BE-NEXT: and r3, r3, #1
1565 ; CHECK-BE-NEXT: vcmp.f32 s7, #0
1566 ; CHECK-BE-NEXT: rsb.w r3, r3, #0
1567 ; CHECK-BE-NEXT: mov.w r2, #0
1568 ; CHECK-BE-NEXT: bfi r1, r3, #1, #1
1569 ; CHECK-BE-NEXT: mov.w r3, #0
1570 ; CHECK-BE-NEXT: it gt
1571 ; CHECK-BE-NEXT: movgt r3, #1
1572 ; CHECK-BE-NEXT: cmp r3, #0
1573 ; CHECK-BE-NEXT: cset r3, ne
1574 ; CHECK-BE-NEXT: vmrs APSR_nzcv, fpscr
1575 ; CHECK-BE-NEXT: it gt
1576 ; CHECK-BE-NEXT: movgt r2, #1
1577 ; CHECK-BE-NEXT: cmp r2, #0
1578 ; CHECK-BE-NEXT: and r3, r3, #1
1579 ; CHECK-BE-NEXT: cset r2, ne
1580 ; CHECK-BE-NEXT: and r2, r2, #1
1581 ; CHECK-BE-NEXT: rsbs r3, r3, #0
1582 ; CHECK-BE-NEXT: bfi r1, r3, #2, #1
1583 ; CHECK-BE-NEXT: rsbs r2, r2, #0
1584 ; CHECK-BE-NEXT: bfi r1, r2, #3, #1
1585 ; CHECK-BE-NEXT: lsls r2, r1, #31
1586 ; CHECK-BE-NEXT: bne .LBB26_5
1587 ; CHECK-BE-NEXT: @ %bb.1: @ %else
1588 ; CHECK-BE-NEXT: lsls r2, r1, #30
1589 ; CHECK-BE-NEXT: bmi .LBB26_6
1590 ; CHECK-BE-NEXT: .LBB26_2: @ %else2
1591 ; CHECK-BE-NEXT: lsls r2, r1, #29
1592 ; CHECK-BE-NEXT: bmi .LBB26_7
1593 ; CHECK-BE-NEXT: .LBB26_3: @ %else4
1594 ; CHECK-BE-NEXT: lsls r1, r1, #28
1595 ; CHECK-BE-NEXT: bmi .LBB26_8
1596 ; CHECK-BE-NEXT: .LBB26_4: @ %else6
1597 ; CHECK-BE-NEXT: add sp, #4
1598 ; CHECK-BE-NEXT: bx lr
1599 ; CHECK-BE-NEXT: .LBB26_5: @ %cond.store
1600 ; CHECK-BE-NEXT: vstr.16 s0, [r0]
1601 ; CHECK-BE-NEXT: lsls r2, r1, #30
1602 ; CHECK-BE-NEXT: bpl .LBB26_2
1603 ; CHECK-BE-NEXT: .LBB26_6: @ %cond.store1
1604 ; CHECK-BE-NEXT: vmovx.f16 s4, s0
1605 ; CHECK-BE-NEXT: vstr.16 s4, [r0, #2]
1606 ; CHECK-BE-NEXT: lsls r2, r1, #29
1607 ; CHECK-BE-NEXT: bpl .LBB26_3
1608 ; CHECK-BE-NEXT: .LBB26_7: @ %cond.store3
1609 ; CHECK-BE-NEXT: vstr.16 s1, [r0, #4]
1610 ; CHECK-BE-NEXT: lsls r1, r1, #28
1611 ; CHECK-BE-NEXT: bpl .LBB26_4
1612 ; CHECK-BE-NEXT: .LBB26_8: @ %cond.store5
1613 ; CHECK-BE-NEXT: vmovx.f16 s0, s1
1614 ; CHECK-BE-NEXT: vstr.16 s0, [r0, #6]
1615 ; CHECK-BE-NEXT: add sp, #4
1616 ; CHECK-BE-NEXT: bx lr
1617 entry:
1618 %c = fcmp ogt <4 x float> %a, zeroinitializer
1619 %trunc = fptrunc <4 x float> %a to <4 x half>
1620 call void @llvm.masked.store.v4f16.p0v4f16(<4 x half> %trunc, <4 x half>* %dest, i32 2, <4 x i1> %c)
1621 ret void
1622 }
1623
1624 define arm_aapcs_vfpcc void @masked_v4f16_align1(<4 x half> *%dest, <4 x float> %a) {
1625 ; CHECK-LE-LABEL: masked_v4f16_align1:
1626 ; CHECK-LE: @ %bb.0: @ %entry
1627 ; CHECK-LE-NEXT: .pad #20
1628 ; CHECK-LE-NEXT: sub sp, #20
1629 ; CHECK-LE-NEXT: vcvtb.f16.f32 s4, s0
1630 ; CHECK-LE-NEXT: vcmp.f32 s0, #0
1631 ; CHECK-LE-NEXT: vmov r1, s4
1632 ; CHECK-LE-NEXT: vcvtb.f16.f32 s4, s1
1633 ; CHECK-LE-NEXT: vmov r2, s4
1634 ; CHECK-LE-NEXT: vmov.16 q1[0], r1
1635 ; CHECK-LE-NEXT: vcvtb.f16.f32 s8, s2
1636 ; CHECK-LE-NEXT: vmov.16 q1[1], r2
1637 ; CHECK-LE-NEXT: vmov r1, s8
1638 ; CHECK-LE-NEXT: vcvtb.f16.f32 s8, s3
1639 ; CHECK-LE-NEXT: vmov.16 q1[2], r1
1640 ; CHECK-LE-NEXT: vmov r1, s8
1641 ; CHECK-LE-NEXT: vmov.16 q1[3], r1
1642 ; CHECK-LE-NEXT: vmrs APSR_nzcv, fpscr
1643 ; CHECK-LE-NEXT: mov.w r1, #0
1644 ; CHECK-LE-NEXT: vcmp.f32 s1, #0
1645 ; CHECK-LE-NEXT: it gt
1646 ; CHECK-LE-NEXT: movgt r1, #1
1647 ; CHECK-LE-NEXT: cmp r1, #0
1648 ; CHECK-LE-NEXT: cset r1, ne
1649 ; CHECK-LE-NEXT: vmrs APSR_nzcv, fpscr
1650 ; CHECK-LE-NEXT: and r1, r1, #1
1651 ; CHECK-LE-NEXT: vcmp.f32 s2, #0
1652 ; CHECK-LE-NEXT: rsb.w r3, r1, #0
1653 ; CHECK-LE-NEXT: mov.w r1, #0
1654 ; CHECK-LE-NEXT: bfi r1, r3, #0, #1
1655 ; CHECK-LE-NEXT: mov.w r3, #0
1656 ; CHECK-LE-NEXT: it gt
1657 ; CHECK-LE-NEXT: movgt r3, #1
1658 ; CHECK-LE-NEXT: cmp r3, #0
1659 ; CHECK-LE-NEXT: cset r3, ne
1660 ; CHECK-LE-NEXT: vmrs APSR_nzcv, fpscr
1661 ; CHECK-LE-NEXT: and r3, r3, #1
1662 ; CHECK-LE-NEXT: vcmp.f32 s3, #0
1663 ; CHECK-LE-NEXT: rsb.w r3, r3, #0
1664 ; CHECK-LE-NEXT: mov.w r2, #0
1665 ; CHECK-LE-NEXT: bfi r1, r3, #1, #1
1666 ; CHECK-LE-NEXT: mov.w r3, #0
1667 ; CHECK-LE-NEXT: it gt
1668 ; CHECK-LE-NEXT: movgt r3, #1
1669 ; CHECK-LE-NEXT: cmp r3, #0
1670 ; CHECK-LE-NEXT: cset r3, ne
1671 ; CHECK-LE-NEXT: vmrs APSR_nzcv, fpscr
1672 ; CHECK-LE-NEXT: it gt
1673 ; CHECK-LE-NEXT: movgt r2, #1
1674 ; CHECK-LE-NEXT: cmp r2, #0
1675 ; CHECK-LE-NEXT: and r3, r3, #1
1676 ; CHECK-LE-NEXT: cset r2, ne
1677 ; CHECK-LE-NEXT: and r2, r2, #1
1678 ; CHECK-LE-NEXT: rsbs r3, r3, #0
1679 ; CHECK-LE-NEXT: bfi r1, r3, #2, #1
1680 ; CHECK-LE-NEXT: rsbs r2, r2, #0
1681 ; CHECK-LE-NEXT: bfi r1, r2, #3, #1
1682 ; CHECK-LE-NEXT: lsls r2, r1, #31
1683 ; CHECK-LE-NEXT: bne .LBB27_5
1684 ; CHECK-LE-NEXT: @ %bb.1: @ %else
1685 ; CHECK-LE-NEXT: lsls r2, r1, #30
1686 ; CHECK-LE-NEXT: bmi .LBB27_6
1687 ; CHECK-LE-NEXT: .LBB27_2: @ %else2
1688 ; CHECK-LE-NEXT: lsls r2, r1, #29
1689 ; CHECK-LE-NEXT: bmi .LBB27_7
1690 ; CHECK-LE-NEXT: .LBB27_3: @ %else4
1691 ; CHECK-LE-NEXT: lsls r1, r1, #28
1692 ; CHECK-LE-NEXT: bmi .LBB27_8
1693 ; CHECK-LE-NEXT: .LBB27_4: @ %else6
1694 ; CHECK-LE-NEXT: add sp, #20
1695 ; CHECK-LE-NEXT: bx lr
1696 ; CHECK-LE-NEXT: .LBB27_5: @ %cond.store
1697 ; CHECK-LE-NEXT: vstr.16 s4, [sp, #12]
1698 ; CHECK-LE-NEXT: ldrh.w r2, [sp, #12]
1699 ; CHECK-LE-NEXT: strh r2, [r0]
1700 ; CHECK-LE-NEXT: lsls r2, r1, #30
1701 ; CHECK-LE-NEXT: bpl .LBB27_2
1702 ; CHECK-LE-NEXT: .LBB27_6: @ %cond.store1
1703 ; CHECK-LE-NEXT: vmovx.f16 s0, s4
1704 ; CHECK-LE-NEXT: vstr.16 s0, [sp, #8]
1705 ; CHECK-LE-NEXT: ldrh.w r2, [sp, #8]
1706 ; CHECK-LE-NEXT: strh r2, [r0, #2]
1707 ; CHECK-LE-NEXT: lsls r2, r1, #29
1708 ; CHECK-LE-NEXT: bpl .LBB27_3
1709 ; CHECK-LE-NEXT: .LBB27_7: @ %cond.store3
1710 ; CHECK-LE-NEXT: vstr.16 s5, [sp, #4]
1711 ; CHECK-LE-NEXT: ldrh.w r2, [sp, #4]
1712 ; CHECK-LE-NEXT: strh r2, [r0, #4]
1713 ; CHECK-LE-NEXT: lsls r1, r1, #28
1714 ; CHECK-LE-NEXT: bpl .LBB27_4
1715 ; CHECK-LE-NEXT: .LBB27_8: @ %cond.store5
1716 ; CHECK-LE-NEXT: vmovx.f16 s0, s5
1717 ; CHECK-LE-NEXT: vstr.16 s0, [sp]
1718 ; CHECK-LE-NEXT: ldrh.w r1, [sp]
1719 ; CHECK-LE-NEXT: strh r1, [r0, #6]
1720 ; CHECK-LE-NEXT: add sp, #20
1721 ; CHECK-LE-NEXT: bx lr
1722 ;
1723 ; CHECK-BE-LABEL: masked_v4f16_align1:
1724 ; CHECK-BE: @ %bb.0: @ %entry
1725 ; CHECK-BE-NEXT: .pad #20
1726 ; CHECK-BE-NEXT: sub sp, #20
1727 ; CHECK-BE-NEXT: vrev64.32 q1, q0
1728 ; CHECK-BE-NEXT: vcvtb.f16.f32 s0, s4
1729 ; CHECK-BE-NEXT: vcmp.f32 s4, #0
1730 ; CHECK-BE-NEXT: vmov r1, s0
1731 ; CHECK-BE-NEXT: vcvtb.f16.f32 s0, s5
1732 ; CHECK-BE-NEXT: vmov r2, s0
1733 ; CHECK-BE-NEXT: vmov.16 q0[0], r1
1734 ; CHECK-BE-NEXT: vcvtb.f16.f32 s8, s6
1735 ; CHECK-BE-NEXT: vmov.16 q0[1], r2
1736 ; CHECK-BE-NEXT: vmov r1, s8
1737 ; CHECK-BE-NEXT: vcvtb.f16.f32 s8, s7
1738 ; CHECK-BE-NEXT: vmov.16 q0[2], r1
1739 ; CHECK-BE-NEXT: vmov r1, s8
1740 ; CHECK-BE-NEXT: vmov.16 q0[3], r1
1741 ; CHECK-BE-NEXT: vmrs APSR_nzcv, fpscr
1742 ; CHECK-BE-NEXT: mov.w r1, #0
1743 ; CHECK-BE-NEXT: vcmp.f32 s5, #0
1744 ; CHECK-BE-NEXT: it gt
1745 ; CHECK-BE-NEXT: movgt r1, #1
1746 ; CHECK-BE-NEXT: cmp r1, #0
1747 ; CHECK-BE-NEXT: cset r1, ne
1748 ; CHECK-BE-NEXT: vmrs APSR_nzcv, fpscr
1749 ; CHECK-BE-NEXT: and r1, r1, #1
1750 ; CHECK-BE-NEXT: vcmp.f32 s6, #0
1751 ; CHECK-BE-NEXT: rsb.w r3, r1, #0
1752 ; CHECK-BE-NEXT: mov.w r1, #0
1753 ; CHECK-BE-NEXT: bfi r1, r3, #0, #1
1754 ; CHECK-BE-NEXT: mov.w r3, #0
1755 ; CHECK-BE-NEXT: it gt
1756 ; CHECK-BE-NEXT: movgt r3, #1
1757 ; CHECK-BE-NEXT: cmp r3, #0
1758 ; CHECK-BE-NEXT: cset r3, ne
1759 ; CHECK-BE-NEXT: vmrs APSR_nzcv, fpscr
1760 ; CHECK-BE-NEXT: and r3, r3, #1
1761 ; CHECK-BE-NEXT: vcmp.f32 s7, #0
1762 ; CHECK-BE-NEXT: rsb.w r3, r3, #0
1763 ; CHECK-BE-NEXT: mov.w r2, #0
1764 ; CHECK-BE-NEXT: bfi r1, r3, #1, #1
1765 ; CHECK-BE-NEXT: mov.w r3, #0
1766 ; CHECK-BE-NEXT: it gt
1767 ; CHECK-BE-NEXT: movgt r3, #1
1768 ; CHECK-BE-NEXT: cmp r3, #0
1769 ; CHECK-BE-NEXT: cset r3, ne
1770 ; CHECK-BE-NEXT: vmrs APSR_nzcv, fpscr
1771 ; CHECK-BE-NEXT: it gt
1772 ; CHECK-BE-NEXT: movgt r2, #1
1773 ; CHECK-BE-NEXT: cmp r2, #0
1774 ; CHECK-BE-NEXT: and r3, r3, #1
1775 ; CHECK-BE-NEXT: cset r2, ne
1776 ; CHECK-BE-NEXT: and r2, r2, #1
1777 ; CHECK-BE-NEXT: rsbs r3, r3, #0
1778 ; CHECK-BE-NEXT: bfi r1, r3, #2, #1
1779 ; CHECK-BE-NEXT: rsbs r2, r2, #0
1780 ; CHECK-BE-NEXT: bfi r1, r2, #3, #1
1781 ; CHECK-BE-NEXT: lsls r2, r1, #31
1782 ; CHECK-BE-NEXT: bne .LBB27_5
1783 ; CHECK-BE-NEXT: @ %bb.1: @ %else
1784 ; CHECK-BE-NEXT: lsls r2, r1, #30
1785 ; CHECK-BE-NEXT: bmi .LBB27_6
1786 ; CHECK-BE-NEXT: .LBB27_2: @ %else2
1787 ; CHECK-BE-NEXT: lsls r2, r1, #29
1788 ; CHECK-BE-NEXT: bmi .LBB27_7
1789 ; CHECK-BE-NEXT: .LBB27_3: @ %else4
1790 ; CHECK-BE-NEXT: lsls r1, r1, #28
1791 ; CHECK-BE-NEXT: bmi .LBB27_8
1792 ; CHECK-BE-NEXT: .LBB27_4: @ %else6
1793 ; CHECK-BE-NEXT: add sp, #20
1794 ; CHECK-BE-NEXT: bx lr
1795 ; CHECK-BE-NEXT: .LBB27_5: @ %cond.store
1796 ; CHECK-BE-NEXT: vstr.16 s0, [sp, #12]
1797 ; CHECK-BE-NEXT: ldrh.w r2, [sp, #12]
1798 ; CHECK-BE-NEXT: strh r2, [r0]
1799 ; CHECK-BE-NEXT: lsls r2, r1, #30
1800 ; CHECK-BE-NEXT: bpl .LBB27_2
1801 ; CHECK-BE-NEXT: .LBB27_6: @ %cond.store1
1802 ; CHECK-BE-NEXT: vmovx.f16 s4, s0
1803 ; CHECK-BE-NEXT: vstr.16 s4, [sp, #8]
1804 ; CHECK-BE-NEXT: ldrh.w r2, [sp, #8]
1805 ; CHECK-BE-NEXT: strh r2, [r0, #2]
1806 ; CHECK-BE-NEXT: lsls r2, r1, #29
1807 ; CHECK-BE-NEXT: bpl .LBB27_3
1808 ; CHECK-BE-NEXT: .LBB27_7: @ %cond.store3
1809 ; CHECK-BE-NEXT: vstr.16 s1, [sp, #4]
1810 ; CHECK-BE-NEXT: ldrh.w r2, [sp, #4]
1811 ; CHECK-BE-NEXT: strh r2, [r0, #4]
1812 ; CHECK-BE-NEXT: lsls r1, r1, #28
1813 ; CHECK-BE-NEXT: bpl .LBB27_4
1814 ; CHECK-BE-NEXT: .LBB27_8: @ %cond.store5
1815 ; CHECK-BE-NEXT: vmovx.f16 s0, s1
1816 ; CHECK-BE-NEXT: vstr.16 s0, [sp]
1817 ; CHECK-BE-NEXT: ldrh.w r1, [sp]
1818 ; CHECK-BE-NEXT: strh r1, [r0, #6]
1819 ; CHECK-BE-NEXT: add sp, #20
1820 ; CHECK-BE-NEXT: bx lr
1821 entry:
1822 %c = fcmp ogt <4 x float> %a, zeroinitializer
1823 %trunc = fptrunc <4 x float> %a to <4 x half>
1824 call void @llvm.masked.store.v4f16.p0v4f16(<4 x half> %trunc, <4 x half>* %dest, i32 1, <4 x i1> %c)
1825 ret void
1826 }
1827
1828 declare void @llvm.masked.store.v4i8.p0v4i8(<4 x i8>, <4 x i8>*, i32, <4 x i1>)
1829 declare void @llvm.masked.store.v8i8.p0v8i8(<8 x i8>, <8 x i8>*, i32, <8 x i1>)
1830 declare void @llvm.masked.store.v4i16.p0v4i16(<4 x i16>, <4 x i16>*, i32, <4 x i1>)
11051831 declare void @llvm.masked.store.v4i32.p0v4i32(<4 x i32>, <4 x i32>*, i32, <4 x i1>)
11061832 declare void @llvm.masked.store.v8i16.p0v8i16(<8 x i16>, <8 x i16>*, i32, <8 x i1>)
11071833 declare void @llvm.masked.store.v16i8.p0v16i8(<16 x i8>, <16 x i8>*, i32, <16 x i1>)
11081834 declare void @llvm.masked.store.v4f32.p0v4f32(<4 x float>, <4 x float>*, i32, <4 x i1>)
1835 declare void @llvm.masked.store.v4f16.p0v4f16(<4 x half>, <4 x half>*, i32, <4 x i1>)
11091836 declare void @llvm.masked.store.v8f16.p0v8f16(<8 x half>, <8 x half>*, i32, <8 x i1>)
11101837 declare void @llvm.masked.store.v2i64.p0v2i64(<2 x i64>, <2 x i64>*, i32, <2 x i1>)
11111838 declare void @llvm.masked.store.v2f64.p0v2f64(<2 x double>, <2 x double>*, i32, <2 x i1>)