llvm.org GIT mirror llvm / a59e6d2
[GlobalISel][X86] Support float/double and vector types. Summary: [GlobalISel][X86] Add support for f32/f64 and vector types in RegisterBank and InstructionSelector. Reviewers: delena, zvi Reviewed By: zvi Subscribers: dberris, rovka, llvm-commits, kristof.beyls Differential Revision: https://reviews.llvm.org/D30533 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@296856 91177308-0d34-0410-b5e6-96231b3b80d8 Igor Breger 3 years ago
13 changed file(s) with 993 addition(s) and 157 deletion(s). Raw diff Collapse all Expand all
1818 RegisterBankInfo::PartialMapping X86GenRegisterBankInfo::PartMappings[]{
1919 /* StartIdx, Length, RegBank */
2020 // GPR value
21 {0, 8, X86::GPRRegBank}, // :0
22 {0, 16, X86::GPRRegBank}, // :1
23 {0, 32, X86::GPRRegBank}, // :2
24 {0, 64, X86::GPRRegBank}, // :3
21 {0, 8, X86::GPRRegBank}, // :0
22 {0, 16, X86::GPRRegBank}, // :1
23 {0, 32, X86::GPRRegBank}, // :2
24 {0, 64, X86::GPRRegBank}, // :3
25 // FR32/64 , xmm registers
26 {0, 32, X86::VECRRegBank}, // :4
27 {0, 64, X86::VECRRegBank}, // :5
28 // VR128/256/512
29 {0, 128, X86::VECRRegBank}, // :6
30 {0, 256, X86::VECRRegBank}, // :7
31 {0, 512, X86::VECRRegBank}, // :8
2532 };
2633
2734 enum PartialMappingIdx {
3037 PMI_GPR16,
3138 PMI_GPR32,
3239 PMI_GPR64,
40 PMI_FP32,
41 PMI_FP64,
42 PMI_VEC128,
43 PMI_VEC256,
44 PMI_VEC512
3345 };
3446
3547 #define INSTR_3OP(INFO) INFO, INFO, INFO,
4355 INSTR_3OP(BREAKDOWN(PMI_GPR8, 1)) // 0: GPR_8
4456 INSTR_3OP(BREAKDOWN(PMI_GPR16, 1)) // 3: GPR_16
4557 INSTR_3OP(BREAKDOWN(PMI_GPR32, 1)) // 6: GPR_32
46 INSTR_3OP(BREAKDOWN(PMI_GPR64, 1)) // 9: GPR_64
58 INSTR_3OP(BREAKDOWN(PMI_GPR64, 1)) // 9: GPR_64
59 INSTR_3OP(BREAKDOWN(PMI_FP32, 1)) // 12: Fp32
60 INSTR_3OP(BREAKDOWN(PMI_FP64, 1)) // 15: Fp64
61 INSTR_3OP(BREAKDOWN(PMI_VEC128, 1)) // 18: Vec128
62 INSTR_3OP(BREAKDOWN(PMI_VEC256, 1)) // 21: Vec256
63 INSTR_3OP(BREAKDOWN(PMI_VEC512, 1)) // 24: Vec512
4764 };
4865 #undef INSTR_3OP
4966 #undef BREAKDOWN
5067
5168 enum ValueMappingIdx {
5269 VMI_None = -1,
53 VMI_3OpsGpr8Idx = 0,
54 VMI_3OpsGpr16Idx = 3,
55 VMI_3OpsGpr32Idx = 6,
56 VMI_3OpsGpr64Idx = 9,
70 VMI_3OpsGpr8Idx = PMI_GPR8 * 3,
71 VMI_3OpsGpr16Idx = PMI_GPR16 * 3,
72 VMI_3OpsGpr32Idx = PMI_GPR32 * 3,
73 VMI_3OpsGpr64Idx = PMI_GPR64 * 3,
74 VMI_3OpsFp32Idx = PMI_FP32 * 3,
75 VMI_3OpsFp64Idx = PMI_FP64 * 3,
76 VMI_3OpsVec128Idx = PMI_VEC128 * 3,
77 VMI_3OpsVec256Idx = PMI_VEC256 * 3,
78 VMI_3OpsVec512Idx = PMI_VEC512 * 3,
5779 };
5880
5981 } // End llvm namespace.
3838
3939 X86InstructionSelector::X86InstructionSelector(const X86Subtarget &STI,
4040 const X86RegisterBankInfo &RBI)
41 : InstructionSelector(), TII(*STI.getInstrInfo()),
41 : InstructionSelector(), STI(STI), TII(*STI.getInstrInfo()),
4242 TRI(*STI.getRegisterInfo()), RBI(RBI) {}
4343
4444 // FIXME: This should be target-independent, inferred from the types declared
4646 static const TargetRegisterClass *
4747 getRegClassForTypeOnBank(LLT Ty, const RegisterBank &RB) {
4848 if (RB.getID() == X86::GPRRegBankID) {
49 if (Ty.getSizeInBits() <= 32)
49 if (Ty.getSizeInBits() == 32)
5050 return &X86::GR32RegClass;
5151 if (Ty.getSizeInBits() == 64)
5252 return &X86::GR64RegClass;
53 }
54 if (RB.getID() == X86::VECRRegBankID) {
55 if (Ty.getSizeInBits() == 32)
56 return &X86::FR32XRegClass;
57 if (Ty.getSizeInBits() == 64)
58 return &X86::FR64XRegClass;
59 if (Ty.getSizeInBits() == 128)
60 return &X86::VR128XRegClass;
61 if (Ty.getSizeInBits() == 256)
62 return &X86::VR256XRegClass;
63 if (Ty.getSizeInBits() == 512)
64 return &X86::VR512RegClass;
5365 }
5466
5567 llvm_unreachable("Unknown RegBank!");
88100 assert((DstSize <= 64) && "GPRs cannot get more than 64-bit width values.");
89101 RC = getRegClassForTypeOnBank(MRI.getType(DstReg), RegBank);
90102 break;
103 case X86::VECRRegBankID:
104 RC = getRegClassForTypeOnBank(MRI.getType(DstReg), RegBank);
105 break;
91106 default:
92107 llvm_unreachable("Unknown RegBank!");
93108 }
95110 // No need to constrain SrcReg. It will get constrained when
96111 // we hit another of its use or its defs.
97112 // Copies do not have constraints.
98 if (!RBI.constrainGenericRegister(DstReg, *RC, MRI)) {
99 DEBUG(dbgs() << "Failed to constrain " << TII.getName(I.getOpcode())
100 << " operand\n");
101 return false;
113 const TargetRegisterClass *OldRC = MRI.getRegClassOrNull(DstReg);
114 if (!OldRC || !RC->hasSubClassEq(OldRC)) {
115 if (!RBI.constrainGenericRegister(DstReg, *RC, MRI)) {
116 DEBUG(dbgs() << "Failed to constrain " << TII.getName(I.getOpcode())
117 << " operand\n");
118 return false;
119 }
102120 }
103121 I.setDesc(TII.get(X86::COPY));
104122 return true;
126144 assert(I.getNumOperands() == I.getNumExplicitOperands() &&
127145 "Generic instruction has unexpected implicit operands\n");
128146
147 // TODO: This should be implemented by tblgen, pattern with predicate not supported yet.
148 if (selectBinaryOp(I, MRI))
149 return true;
150
129151 return selectImpl(I);
130152 }
153
154 unsigned X86InstructionSelector::getFAddOp(LLT &Ty,
155 const RegisterBank &RB) const {
156
157 if (X86::VECRRegBankID != RB.getID())
158 return TargetOpcode::G_FADD;
159
160 if (Ty == LLT::scalar(32)) {
161 if (STI.hasAVX512()) {
162 return X86::VADDSSZrr;
163 } else if (STI.hasAVX()) {
164 return X86::VADDSSrr;
165 } else if (STI.hasSSE1()) {
166 return X86::ADDSSrr;
167 }
168 } else if (Ty == LLT::scalar(64)) {
169 if (STI.hasAVX512()) {
170 return X86::VADDSDZrr;
171 } else if (STI.hasAVX()) {
172 return X86::VADDSDrr;
173 } else if (STI.hasSSE2()) {
174 return X86::ADDSDrr;
175 }
176 } else if (Ty == LLT::vector(4, 32)) {
177 if ((STI.hasAVX512()) && (STI.hasVLX())) {
178 return X86::VADDPSZ128rr;
179 } else if (STI.hasAVX()) {
180 return X86::VADDPSrr;
181 } else if (STI.hasSSE1()) {
182 return X86::ADDPSrr;
183 }
184 }
185
186 return TargetOpcode::G_FADD;
187 }
188
189 unsigned X86InstructionSelector::getFSubOp(LLT &Ty,
190 const RegisterBank &RB) const {
191
192 if (X86::VECRRegBankID != RB.getID())
193 return TargetOpcode::G_FSUB;
194
195 if (Ty == LLT::scalar(32)) {
196 if (STI.hasAVX512()) {
197 return X86::VSUBSSZrr;
198 } else if (STI.hasAVX()) {
199 return X86::VSUBSSrr;
200 } else if (STI.hasSSE1()) {
201 return X86::SUBSSrr;
202 }
203 } else if (Ty == LLT::scalar(64)) {
204 if (STI.hasAVX512()) {
205 return X86::VSUBSDZrr;
206 } else if (STI.hasAVX()) {
207 return X86::VSUBSDrr;
208 } else if (STI.hasSSE2()) {
209 return X86::SUBSDrr;
210 }
211 } else if (Ty == LLT::vector(4, 32)) {
212 if ((STI.hasAVX512()) && (STI.hasVLX())) {
213 return X86::VSUBPSZ128rr;
214 } else if (STI.hasAVX()) {
215 return X86::VSUBPSrr;
216 } else if (STI.hasSSE1()) {
217 return X86::SUBPSrr;
218 }
219 }
220
221 return TargetOpcode::G_FSUB;
222 }
223
224 unsigned X86InstructionSelector::getAddOp(LLT &Ty,
225 const RegisterBank &RB) const {
226
227 if (X86::VECRRegBankID != RB.getID())
228 return TargetOpcode::G_ADD;
229
230 if (Ty == LLT::vector(4, 32)) {
231 if (STI.hasAVX512() && STI.hasVLX()) {
232 return X86::VPADDDZ128rr;
233 } else if (STI.hasAVX()) {
234 return X86::VPADDDrr;
235 } else if (STI.hasSSE2()) {
236 return X86::PADDDrr;
237 }
238 }
239
240 return TargetOpcode::G_ADD;
241 }
242
243 unsigned X86InstructionSelector::getSubOp(LLT &Ty,
244 const RegisterBank &RB) const {
245
246 if (X86::VECRRegBankID != RB.getID())
247 return TargetOpcode::G_SUB;
248
249 if (Ty == LLT::vector(4, 32)) {
250 if (STI.hasAVX512() && STI.hasVLX()) {
251 return X86::VPSUBDZ128rr;
252 } else if (STI.hasAVX()) {
253 return X86::VPSUBDrr;
254 } else if (STI.hasSSE2()) {
255 return X86::PSUBDrr;
256 }
257 }
258
259 return TargetOpcode::G_SUB;
260 }
261
262 bool X86InstructionSelector::selectBinaryOp(MachineInstr &I,
263 MachineRegisterInfo &MRI) const {
264
265 LLT Ty = MRI.getType(I.getOperand(0).getReg());
266 const unsigned DefReg = I.getOperand(0).getReg();
267 const RegisterBank &RB = *RBI.getRegBank(DefReg, MRI, TRI);
268
269 unsigned NewOpc = I.getOpcode();
270
271 switch (I.getOpcode()) {
272 case TargetOpcode::G_FADD:
273 NewOpc = getFAddOp(Ty, RB);
274 break;
275 case TargetOpcode::G_FSUB:
276 NewOpc = getFSubOp(Ty, RB);
277 break;
278 case TargetOpcode::G_ADD:
279 NewOpc = getAddOp(Ty, RB);
280 break;
281 case TargetOpcode::G_SUB:
282 NewOpc = getSubOp(Ty, RB);
283 break;
284 default:
285 break;
286 }
287
288 if (NewOpc == I.getOpcode())
289 return false;
290
291 I.setDesc(TII.get(NewOpc));
292
293 return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
294 }
295
2121 class X86RegisterInfo;
2222 class X86Subtarget;
2323 class X86TargetMachine;
24 class LLT;
25 class RegisterBank;
26 class MachineRegisterInfo;
2427
2528 class X86InstructionSelector : public InstructionSelector {
2629 public:
3437 /// the patterns that don't require complex C++.
3538 bool selectImpl(MachineInstr &I) const;
3639
40 // TODO: remove after selectImpl support pattern with a predicate.
41 unsigned getFAddOp(LLT &Ty, const RegisterBank &RB) const;
42 unsigned getFSubOp(LLT &Ty, const RegisterBank &RB) const;
43 unsigned getAddOp(LLT &Ty, const RegisterBank &RB) const;
44 unsigned getSubOp(LLT &Ty, const RegisterBank &RB) const;
45 bool selectBinaryOp(MachineInstr &I, MachineRegisterInfo &MRI) const;
46
47 const X86Subtarget &STI;
3748 const X86InstrInfo &TII;
3849 const X86RegisterInfo &TRI;
3950 const X86RegisterBankInfo &RBI;
1818 #include "llvm/Target/TargetOpcodes.h"
1919
2020 using namespace llvm;
21 using namespace TargetOpcode;
2122
2223 #ifndef LLVM_BUILD_GLOBAL_ISEL
2324 #error "You shouldn't build this"
2728
2829 setLegalizerInfo32bit();
2930 setLegalizerInfo64bit();
31 setLegalizerInfoSSE1();
32 setLegalizerInfoSSE2();
3033
3134 computeTables();
3235 }
3841 const LLT s32 = LLT::scalar(32);
3942
4043 for (auto Ty : {s8, s16, s32}) {
41 setAction({TargetOpcode::G_ADD, Ty}, Legal);
42 setAction({TargetOpcode::G_SUB, Ty}, Legal);
44 setAction({G_ADD, Ty}, Legal);
45 setAction({G_SUB, Ty}, Legal);
4346 }
4447 }
4548
5053
5154 const LLT s64 = LLT::scalar(64);
5255
53 setAction({TargetOpcode::G_ADD, s64}, Legal);
54 setAction({TargetOpcode::G_SUB, s64}, Legal);
56 setAction({G_ADD, s64}, Legal);
57 setAction({G_SUB, s64}, Legal);
5558 }
59
60 void X86LegalizerInfo::setLegalizerInfoSSE1() {
61 if (!Subtarget.hasSSE1())
62 return;
63
64 const LLT s32 = LLT::scalar(32);
65 const LLT v4s32 = LLT::vector(4, 32);
66
67 for (unsigned BinOp : {G_FADD, G_FSUB, G_FMUL, G_FDIV})
68 for (auto Ty : {s32, v4s32})
69 setAction({BinOp, Ty}, Legal);
70 }
71
72 void X86LegalizerInfo::setLegalizerInfoSSE2() {
73 if (!Subtarget.hasSSE2())
74 return;
75
76 const LLT s64 = LLT::scalar(64);
77 const LLT v4s32 = LLT::vector(4, 32);
78 const LLT v2s64 = LLT::vector(2, 64);
79
80 for (unsigned BinOp : {G_FADD, G_FSUB, G_FMUL, G_FDIV})
81 for (auto Ty : {s64, v2s64})
82 setAction({BinOp, Ty}, Legal);
83
84 for (unsigned BinOp : {G_ADD, G_SUB})
85 for (auto Ty : {v4s32})
86 setAction({BinOp, Ty}, Legal);
87
88 }
3333 private:
3434 void setLegalizerInfo32bit();
3535 void setLegalizerInfo64bit();
36 void setLegalizerInfoSSE1();
37 void setLegalizerInfoSSE2();
3638 };
3739 } // End llvm namespace.
3840 #endif
5353 X86::GR64RegClass.hasSubClassEq(&RC))
5454 return getRegBank(X86::GPRRegBankID);
5555
56 if (X86::FR32XRegClass.hasSubClassEq(&RC) ||
57 X86::FR64XRegClass.hasSubClassEq(&RC) ||
58 X86::VR128XRegClass.hasSubClassEq(&RC) ||
59 X86::VR256XRegClass.hasSubClassEq(&RC) ||
60 X86::VR512RegClass.hasSubClassEq(&RC))
61 return getRegBank(X86::VECRRegBankID);
62
5663 llvm_unreachable("Unsupported register kind yet.");
5764 }
5865
7077 llvm_unreachable("Unsupported operand maping yet.");
7178
7279 ValueMappingIdx ValMapIdx = VMI_None;
73 if (!isFP) {
80
81 if (Ty.isScalar()) {
82 if (!isFP) {
83 switch (Ty.getSizeInBits()) {
84 case 8:
85 ValMapIdx = VMI_3OpsGpr8Idx;
86 break;
87 case 16:
88 ValMapIdx = VMI_3OpsGpr16Idx;
89 break;
90 case 32:
91 ValMapIdx = VMI_3OpsGpr32Idx;
92 break;
93 case 64:
94 ValMapIdx = VMI_3OpsGpr64Idx;
95 break;
96 default:
97 llvm_unreachable("Unsupported register size.");
98 }
99 } else {
100 switch (Ty.getSizeInBits()) {
101 case 32:
102 ValMapIdx = VMI_3OpsFp32Idx;
103 break;
104 case 64:
105 ValMapIdx = VMI_3OpsFp64Idx;
106 break;
107 default:
108 llvm_unreachable("Unsupported register size.");
109 }
110 }
111 } else {
74112 switch (Ty.getSizeInBits()) {
75 case 8:
76 ValMapIdx = VMI_3OpsGpr8Idx;
113 case 128:
114 ValMapIdx = VMI_3OpsVec128Idx;
77115 break;
78 case 16:
79 ValMapIdx = VMI_3OpsGpr16Idx;
116 case 256:
117 ValMapIdx = VMI_3OpsVec256Idx;
80118 break;
81 case 32:
82 ValMapIdx = VMI_3OpsGpr32Idx;
83 break;
84 case 64:
85 ValMapIdx = VMI_3OpsGpr64Idx;
119 case 512:
120 ValMapIdx = VMI_3OpsVec512Idx;
86121 break;
87122 default:
88123 llvm_unreachable("Unsupported register size.");
89 break;
90124 }
91 } else {
92 llvm_unreachable("Floating point not supported yet.");
93125 }
94126
95127 return InstructionMapping{DefaultMappingID, 1, &ValMappings[ValMapIdx],
113145 case TargetOpcode::G_SUB:
114146 return getOperandsMapping(MI, false);
115147 break;
148 case TargetOpcode::G_FADD:
149 case TargetOpcode::G_FSUB:
150 case TargetOpcode::G_FMUL:
151 case TargetOpcode::G_FDIV:
152 return getOperandsMapping(MI, true);
153 break;
116154 default:
117155 return InstructionMapping{};
118156 }
1111
1212 /// General Purpose Registers: RAX, RCX,...
1313 def GPRRegBank : RegisterBank<"GPR", [GR64]>;
14
15 /// Floating Point/Vector Registers
16 def VECRRegBank : RegisterBank<"VECR", [VR512]>;
44 source_filename = "tmp.ll"
55 target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
66 target triple = "x86_64--linux-gnu"
7
7
88 define i8 @test_add_i8(i8 %arg1, i8 %arg2) {
99 %ret = add i8 %arg1, %arg2
1010 ret i8 %ret
1111 }
12
12
1313 define i16 @test_add_i16(i16 %arg1, i16 %arg2) {
1414 %ret = add i16 %arg1, %arg2
1515 ret i16 %ret
1616 }
17
17
1818 define i32 @test_add_i32(i32 %arg1, i32 %arg2) {
1919 %ret = add i32 %arg1, %arg2
2020 ret i32 %ret
2121 }
22
22
2323 define i64 @test_add_i64(i64 %arg1, i64 %arg2) {
2424 %ret = add i64 %arg1, %arg2
2525 ret i64 %ret
2626 }
2727
28 define float @test_add_float(float %arg1, float %arg2) {
29 %ret = fadd float %arg1, %arg2
30 ret float %ret
31 }
32
33 define double @test_add_double(double %arg1, double %arg2) {
34 %ret = fadd double %arg1, %arg2
35 ret double %ret
36 }
37
38 define <4 x i32> @test_add_v4i32(<4 x i32> %arg1, <4 x i32> %arg2) {
39 %ret = add <4 x i32> %arg1, %arg2
40 ret <4 x i32> %ret
41 }
42
43 define <4 x float> @test_add_v4f32(<4 x float> %arg1, <4 x float> %arg2) {
44 %ret = fadd <4 x float> %arg1, %arg2
45 ret <4 x float> %ret
46 }
47
2848 ...
2949 ---
3050 name: test_add_i8
3858 # CHECK: - { id: 0, class: gpr }
3959 # CHECK: - { id: 1, class: gpr }
4060 # CHECK: - { id: 2, class: gpr }
41 registers:
61 registers:
4262 - { id: 0, class: _ }
4363 - { id: 1, class: _ }
4464 - { id: 2, class: _ }
4565 body: |
4666 bb.1 (%ir-block.0):
4767 liveins: %edi, %esi
48
68
4969 %0(s8) = COPY %edi
5070 %1(s8) = COPY %esi
5171 %2(s8) = G_ADD %0, %1
6585 # CHECK: - { id: 0, class: gpr }
6686 # CHECK: - { id: 1, class: gpr }
6787 # CHECK: - { id: 2, class: gpr }
68 registers:
88 registers:
6989 - { id: 0, class: _ }
7090 - { id: 1, class: _ }
7191 - { id: 2, class: _ }
7292 body: |
7393 bb.1 (%ir-block.0):
7494 liveins: %edi, %esi
75
95
7696 %0(s16) = COPY %edi
7797 %1(s16) = COPY %esi
7898 %2(s16) = G_ADD %0, %1
92112 # CHECK: - { id: 0, class: gpr }
93113 # CHECK: - { id: 1, class: gpr }
94114 # CHECK: - { id: 2, class: gpr }
95 registers:
115 registers:
96116 - { id: 0, class: _ }
97117 - { id: 1, class: _ }
98118 - { id: 2, class: _ }
99119 body: |
100120 bb.1 (%ir-block.0):
101121 liveins: %edi, %esi
102
122
103123 %0(s32) = COPY %edi
104124 %1(s32) = COPY %esi
105125 %2(s32) = G_ADD %0, %1
119139 # CHECK: - { id: 0, class: gpr }
120140 # CHECK: - { id: 1, class: gpr }
121141 # CHECK: - { id: 2, class: gpr }
122 registers:
142 registers:
123143 - { id: 0, class: _ }
124144 - { id: 1, class: _ }
125145 - { id: 2, class: _ }
126146 body: |
127147 bb.1 (%ir-block.0):
128148 liveins: %rdi, %rsi
129
149
130150 %0(s64) = COPY %rdi
131151 %1(s64) = COPY %rsi
132152 %2(s64) = G_ADD %0, %1
134154 RET 0, implicit %rax
135155
136156 ...
157 ---
158 name: test_add_float
159 alignment: 4
160 legalized: true
161 regBankSelected: false
162 selected: false
163 tracksRegLiveness: true
164 # CHECK-LABEL: name: test_add_float
165 # CHECK: registers:
166 # CHECK: - { id: 0, class: vecr }
167 # CHECK: - { id: 1, class: vecr }
168 # CHECK: - { id: 2, class: vecr }
169 registers:
170 - { id: 0, class: _ }
171 - { id: 1, class: _ }
172 - { id: 2, class: _ }
173 body: |
174 bb.1 (%ir-block.0):
175 liveins: %xmm0, %xmm1
176
177 %0(s32) = COPY %xmm0
178 %1(s32) = COPY %xmm1
179 %2(s32) = G_FADD %0, %1
180 %xmm0 = COPY %2(s32)
181 RET 0, implicit %xmm0
182
183 ...
184 ---
185 name: test_add_double
186 alignment: 4
187 legalized: true
188 regBankSelected: false
189 selected: false
190 tracksRegLiveness: true
191 # CHECK-LABEL: name: test_add_double
192 # CHECK: registers:
193 # CHECK: - { id: 0, class: vecr }
194 # CHECK: - { id: 1, class: vecr }
195 # CHECK: - { id: 2, class: vecr }
196 registers:
197 - { id: 0, class: _ }
198 - { id: 1, class: _ }
199 - { id: 2, class: _ }
200 body: |
201 bb.1 (%ir-block.0):
202 liveins: %xmm0, %xmm1
203
204 %0(s64) = COPY %xmm0
205 %1(s64) = COPY %xmm1
206 %2(s64) = G_FADD %0, %1
207 %xmm0 = COPY %2(s64)
208 RET 0, implicit %xmm0
209
210 ...
211 ---
212 name: test_add_v4i32
213 alignment: 4
214 legalized: true
215 regBankSelected: false
216 selected: false
217 tracksRegLiveness: true
218 # CHECK-LABEL: name: test_add_v4i32
219 # CHECK: registers:
220 # CHECK: - { id: 0, class: vecr }
221 # CHECK: - { id: 1, class: vecr }
222 # CHECK: - { id: 2, class: vecr }
223 registers:
224 - { id: 0, class: _ }
225 - { id: 1, class: _ }
226 - { id: 2, class: _ }
227 body: |
228 bb.1 (%ir-block.0):
229 liveins: %xmm0, %xmm1
230
231 %0(<4 x s32>) = COPY %xmm0
232 %1(<4 x s32>) = COPY %xmm1
233 %2(<4 x s32>) = G_ADD %0, %1
234 %xmm0 = COPY %2(<4 x s32>)
235 RET 0, implicit %xmm0
236
237 ...
238 ---
239 name: test_add_v4f32
240 alignment: 4
241 legalized: true
242 regBankSelected: false
243 selected: false
244 tracksRegLiveness: true
245 # CHECK-LABEL: name: test_add_v4f32
246 # CHECK: registers:
247 # CHECK: - { id: 0, class: vecr }
248 # CHECK: - { id: 1, class: vecr }
249 # CHECK: - { id: 2, class: vecr }
250 registers:
251 - { id: 0, class: _ }
252 - { id: 1, class: _ }
253 - { id: 2, class: _ }
254 body: |
255 bb.1 (%ir-block.0):
256 liveins: %xmm0, %xmm1
257
258 %0(<4 x s32>) = COPY %xmm0
259 %1(<4 x s32>) = COPY %xmm1
260 %2(<4 x s32>) = G_FADD %0, %1
261 %xmm0 = COPY %2(<4 x s32>)
262 RET 0, implicit %xmm0
263
264 ...
265
0 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
1 ; RUN: llc -mtriple=x86_64-linux-gnu -global-isel < %s -o - | FileCheck %s
1 ; RUN: llc -mtriple=x86_64-linux-gnu -global-isel < %s -o - | FileCheck %s --check-prefix=ALL --check-prefix=SSE
2 ; RUN: llc -mtriple=x86_64-linux-gnu -mattr=+avx -global-isel < %s -o - | FileCheck %s --check-prefix=ALL --check-prefix=ALL_AVX --check-prefix=AVX
3 ; RUN: llc -mtriple=x86_64-linux-gnu -mattr=+avx512f -global-isel < %s -o - | FileCheck %s --check-prefix=ALL --check-prefix=ALL_AVX --check-prefix=AVX512F
4 ; RUN: llc -mtriple=x86_64-linux-gnu -mattr=+avx512f -global-isel < %s -o - | FileCheck %s --check-prefix=ALL --check-prefix=ALL_AVX --check-prefix=AVX512VL
25
36 define i64 @test_add_i64(i64 %arg1, i64 %arg2) {
4 ; CHECK-LABEL: test_add_i64:
5 ; CHECK: # BB#0:
6 ; CHECK-NEXT: leaq (%rsi,%rdi), %rax
7 ; CHECK-NEXT: retq
7 ; ALL-LABEL: test_add_i64:
8 ; ALL: # BB#0:
9 ; ALL-NEXT: leaq (%rsi,%rdi), %rax
10 ; ALL-NEXT: retq
811 %ret = add i64 %arg1, %arg2
912 ret i64 %ret
1013 }
1114
1215 define i32 @test_add_i32(i32 %arg1, i32 %arg2) {
13 ; CHECK-LABEL: test_add_i32:
14 ; CHECK: # BB#0:
15 ; CHECK-NEXT: # kill: %EDI %EDI %RDI
16 ; CHECK-NEXT: # kill: %ESI %ESI %RSI
17 ; CHECK-NEXT: leal (%rsi,%rdi), %eax
18 ; CHECK-NEXT: retq
16 ; ALL-LABEL: test_add_i32:
17 ; ALL: # BB#0:
18 ; ALL-NEXT: # kill: %EDI %EDI %RDI
19 ; ALL-NEXT: # kill: %ESI %ESI %RSI
20 ; ALL-NEXT: leal (%rsi,%rdi), %eax
21 ; ALL-NEXT: retq
1922 %ret = add i32 %arg1, %arg2
2023 ret i32 %ret
2124 }
2225
2326 define i64 @test_sub_i64(i64 %arg1, i64 %arg2) {
24 ; CHECK-LABEL: test_sub_i64:
25 ; CHECK: # BB#0:
26 ; CHECK-NEXT: subq %rsi, %rdi
27 ; CHECK-NEXT: movq %rdi, %rax
28 ; CHECK-NEXT: retq
27 ; ALL-LABEL: test_sub_i64:
28 ; ALL: # BB#0:
29 ; ALL-NEXT: subq %rsi, %rdi
30 ; ALL-NEXT: movq %rdi, %rax
31 ; ALL-NEXT: retq
2932 %ret = sub i64 %arg1, %arg2
3033 ret i64 %ret
3134 }
3235
3336 define i32 @test_sub_i32(i32 %arg1, i32 %arg2) {
34 ; CHECK-LABEL: test_sub_i32:
35 ; CHECK: # BB#0:
36 ; CHECK-NEXT: subl %esi, %edi
37 ; CHECK-NEXT: movl %edi, %eax
38 ; CHECK-NEXT: retq
37 ; ALL-LABEL: test_sub_i32:
38 ; ALL: # BB#0:
39 ; ALL-NEXT: subl %esi, %edi
40 ; ALL-NEXT: movl %edi, %eax
41 ; ALL-NEXT: retq
3942 %ret = sub i32 %arg1, %arg2
4043 ret i32 %ret
4144 }
45
46 define float @test_add_float(float %arg1, float %arg2) {
47 ; SSE-LABEL: test_add_float:
48 ; SSE: # BB#0:
49 ; SSE-NEXT: addss %xmm1, %xmm0
50 ; SSE-NEXT: retq
51 ;
52 ; ALL_AVX-LABEL: test_add_float:
53 ; ALL_AVX: # BB#0:
54 ; ALL_AVX-NEXT: vaddss %xmm1, %xmm0, %xmm0
55 ; ALL_AVX-NEXT: retq
56 %ret = fadd float %arg1, %arg2
57 ret float %ret
58 }
59
60 define double @test_add_double(double %arg1, double %arg2) {
61 ; SSE-LABEL: test_add_double:
62 ; SSE: # BB#0:
63 ; SSE-NEXT: addsd %xmm1, %xmm0
64 ; SSE-NEXT: retq
65 ;
66 ; ALL_AVX-LABEL: test_add_double:
67 ; ALL_AVX: # BB#0:
68 ; ALL_AVX-NEXT: vaddsd %xmm1, %xmm0, %xmm0
69 ; ALL_AVX-NEXT: retq
70 %ret = fadd double %arg1, %arg2
71 ret double %ret
72 }
73
74 define float @test_sub_float(float %arg1, float %arg2) {
75 ; SSE-LABEL: test_sub_float:
76 ; SSE: # BB#0:
77 ; SSE-NEXT: subss %xmm1, %xmm0
78 ; SSE-NEXT: retq
79 ;
80 ; ALL_AVX-LABEL: test_sub_float:
81 ; ALL_AVX: # BB#0:
82 ; ALL_AVX-NEXT: vsubss %xmm1, %xmm0, %xmm0
83 ; ALL_AVX-NEXT: retq
84 %ret = fsub float %arg1, %arg2
85 ret float %ret
86 }
87
88 define double @test_sub_double(double %arg1, double %arg2) {
89 ; SSE-LABEL: test_sub_double:
90 ; SSE: # BB#0:
91 ; SSE-NEXT: subsd %xmm1, %xmm0
92 ; SSE-NEXT: retq
93 ;
94 ; ALL_AVX-LABEL: test_sub_double:
95 ; ALL_AVX: # BB#0:
96 ; ALL_AVX-NEXT: vsubsd %xmm1, %xmm0, %xmm0
97 ; ALL_AVX-NEXT: retq
98 %ret = fsub double %arg1, %arg2
99 ret double %ret
100 }
101
102 define <4 x i32> @test_add_v4i32(<4 x i32> %arg1, <4 x i32> %arg2) {
103 ; SSE-LABEL: test_add_v4i32:
104 ; SSE: # BB#0:
105 ; SSE-NEXT: paddd %xmm1, %xmm0
106 ; SSE-NEXT: retq
107 ;
108 ; ALL_AVX-LABEL: test_add_v4i32:
109 ; ALL_AVX: # BB#0:
110 ; ALL_AVX-NEXT: vpaddd %xmm1, %xmm0, %xmm0
111 ; ALL_AVX-NEXT: retq
112 %ret = add <4 x i32> %arg1, %arg2
113 ret <4 x i32> %ret
114 }
115
116 define <4 x i32> @test_sub_v4i32(<4 x i32> %arg1, <4 x i32> %arg2) {
117 ; SSE-LABEL: test_sub_v4i32:
118 ; SSE: # BB#0:
119 ; SSE-NEXT: psubd %xmm1, %xmm0
120 ; SSE-NEXT: retq
121 ;
122 ; ALL_AVX-LABEL: test_sub_v4i32:
123 ; ALL_AVX: # BB#0:
124 ; ALL_AVX-NEXT: vpsubd %xmm1, %xmm0, %xmm0
125 ; ALL_AVX-NEXT: retq
126 %ret = sub <4 x i32> %arg1, %arg2
127 ret <4 x i32> %ret
128 }
129
130 define <4 x float> @test_add_v4f32(<4 x float> %arg1, <4 x float> %arg2) {
131 ; SSE-LABEL: test_add_v4f32:
132 ; SSE: # BB#0:
133 ; SSE-NEXT: addps %xmm1, %xmm0
134 ; SSE-NEXT: retq
135 ;
136 ; ALL_AVX-LABEL: test_add_v4f32:
137 ; ALL_AVX: # BB#0:
138 ; ALL_AVX-NEXT: vaddps %xmm1, %xmm0, %xmm0
139 ; ALL_AVX-NEXT: retq
140 %ret = fadd <4 x float> %arg1, %arg2
141 ret <4 x float> %ret
142 }
143
144 define <4 x float> @test_sub_v4f32(<4 x float> %arg1, <4 x float> %arg2) {
145 ; SSE-LABEL: test_sub_v4f32:
146 ; SSE: # BB#0:
147 ; SSE-NEXT: subps %xmm1, %xmm0
148 ; SSE-NEXT: retq
149 ;
150 ; ALL_AVX-LABEL: test_sub_v4f32:
151 ; ALL_AVX: # BB#0:
152 ; ALL_AVX-NEXT: vsubps %xmm1, %xmm0, %xmm0
153 ; ALL_AVX-NEXT: retq
154 %ret = fsub <4 x float> %arg1, %arg2
155 ret <4 x float> %ret
156 }
44 @a7_8bit = external global i8
55 @a8_8bit = external global i8
66
7 define i8 @test_i8_args_8(i8 %arg1, i8 %arg2, i8 %arg3, i8 %arg4,
7 define i8 @test_i8_args_8(i8 %arg1, i8 %arg2, i8 %arg3, i8 %arg4,
88 i8 %arg5, i8 %arg6, i8 %arg7, i8 %arg8) {
99
1010 ; ALL-LABEL: name: test_i8_args_8
1111
12 ; X64: fixedStack:
12 ; X64: fixedStack:
1313 ; X64: id: [[STACK8:[0-9]+]], offset: 8, size: 1, alignment: 8, isImmutable: true, isAliased: false
1414 ; X64: id: [[STACK0:[0-9]+]], offset: 0, size: 1, alignment: 16, isImmutable: true, isAliased: false
15 ; X64: liveins: %ecx, %edi, %edx, %esi, %r8d, %r9d
15 ; X64: liveins: %ecx, %edi, %edx, %esi, %r8d, %r9d
1616 ; X64: [[ARG1:%[0-9]+]](s8) = COPY %edi
1717 ; X64-NEXT: %{{[0-9]+}}(s8) = COPY %esi
1818 ; X64-NEXT: %{{[0-9]+}}(s8) = COPY %edx
2424 ; X64-NEXT: [[ARG8_ADDR:%[0-9]+]](p0) = G_FRAME_INDEX %fixed-stack.[[STACK8]]
2525 ; X64-NEXT: [[ARG8:%[0-9]+]](s8) = G_LOAD [[ARG8_ADDR]](p0) :: (invariant load 1 from %fixed-stack.[[STACK8]], align 0)
2626
27 ; X32: fixedStack:
27 ; X32: fixedStack:
2828 ; X32: id: [[STACK28:[0-9]+]], offset: 28, size: 1, alignment: 4, isImmutable: true, isAliased: false }
2929 ; X32: id: [[STACK24:[0-9]+]], offset: 24, size: 1, alignment: 8, isImmutable: true, isAliased: false }
3030 ; X32: id: [[STACK20:[0-9]+]], offset: 20, size: 1, alignment: 4, isImmutable: true, isAliased: false }
3939 ; X32-NEXT: [[ARG2:%[0-9]+]](s8) = G_LOAD [[ARG2_ADDR]](p0) :: (invariant load 1 from %fixed-stack.[[STACK4]], align 0)
4040 ; X32-NEXT: [[ARG3_ADDR:%[0-9]+]](p0) = G_FRAME_INDEX %fixed-stack.[[STACK8]]
4141 ; X32-NEXT: [[ARG3:%[0-9]+]](s8) = G_LOAD [[ARG3_ADDR]](p0) :: (invariant load 1 from %fixed-stack.[[STACK8]], align 0)
42 ; X32-NEXT: [[ARG4_ADDR:%[0-9]+]](p0) = G_FRAME_INDEX %fixed-stack.[[STACK12]]
42 ; X32-NEXT: [[ARG4_ADDR:%[0-9]+]](p0) = G_FRAME_INDEX %fixed-stack.[[STACK12]]
4343 ; X32-NEXT: [[ARG4:%[0-9]+]](s8) = G_LOAD [[ARG4_ADDR]](p0) :: (invariant load 1 from %fixed-stack.[[STACK12]], align 0)
4444 ; X32-NEXT: [[ARG5_ADDR:%[0-9]+]](p0) = G_FRAME_INDEX %fixed-stack.[[STACK16]]
4545 ; X32-NEXT: [[ARG5:%[0-9]+]](s8) = G_LOAD [[ARG5_ADDR]](p0) :: (invariant load 1 from %fixed-stack.[[STACK16]], align 0)
5252
5353 ; ALL-NEXT: [[GADDR_A1:%[0-9]+]](p0) = G_GLOBAL_VALUE @a1_8bit
5454 ; ALL-NEXT: [[GADDR_A7:%[0-9]+]](p0) = G_GLOBAL_VALUE @a7_8bit
55 ; ALL-NEXT: [[GADDR_A8:%[0-9]+]](p0) = G_GLOBAL_VALUE @a8_8bit
55 ; ALL-NEXT: [[GADDR_A8:%[0-9]+]](p0) = G_GLOBAL_VALUE @a8_8bit
5656 ; ALL-NEXT: G_STORE [[ARG1]](s8), [[GADDR_A1]](p0) :: (store 1 into @a1_8bit)
5757 ; ALL-NEXT: G_STORE [[ARG7]](s8), [[GADDR_A7]](p0) :: (store 1 into @a7_8bit)
5858 ; ALL-NEXT: G_STORE [[ARG8]](s8), [[GADDR_A8]](p0) :: (store 1 into @a8_8bit)
7070 @a7_32bit = external global i32
7171 @a8_32bit = external global i32
7272
73 define i32 @test_i32_args_8(i32 %arg1, i32 %arg2, i32 %arg3, i32 %arg4,
73 define i32 @test_i32_args_8(i32 %arg1, i32 %arg2, i32 %arg3, i32 %arg4,
7474 i32 %arg5, i32 %arg6, i32 %arg7, i32 %arg8) {
7575
7676 ; ALL-LABEL: name: test_i32_args_8
7777
78 ; X64: fixedStack:
78 ; X64: fixedStack:
7979 ; X64: id: [[STACK8:[0-9]+]], offset: 8, size: 4, alignment: 8, isImmutable: true, isAliased: false
8080 ; X64: id: [[STACK0:[0-9]+]], offset: 0, size: 4, alignment: 16, isImmutable: true, isAliased: false
81 ; X64: liveins: %ecx, %edi, %edx, %esi, %r8d, %r9d
81 ; X64: liveins: %ecx, %edi, %edx, %esi, %r8d, %r9d
8282 ; X64: [[ARG1:%[0-9]+]](s32) = COPY %edi
8383 ; X64-NEXT: %{{[0-9]+}}(s32) = COPY %esi
8484 ; X64-NEXT: %{{[0-9]+}}(s32) = COPY %edx
9090 ; X64-NEXT: [[ARG8_ADDR:%[0-9]+]](p0) = G_FRAME_INDEX %fixed-stack.[[STACK8]]
9191 ; X64-NEXT: [[ARG8:%[0-9]+]](s32) = G_LOAD [[ARG8_ADDR]](p0) :: (invariant load 4 from %fixed-stack.[[STACK8]], align 0)
9292
93 ; X32: fixedStack:
93 ; X32: fixedStack:
9494 ; X32: id: [[STACK28:[0-9]+]], offset: 28, size: 4, alignment: 4, isImmutable: true, isAliased: false }
9595 ; X32: id: [[STACK24:[0-9]+]], offset: 24, size: 4, alignment: 8, isImmutable: true, isAliased: false }
9696 ; X32: id: [[STACK20:[0-9]+]], offset: 20, size: 4, alignment: 4, isImmutable: true, isAliased: false }
101101 ; X32: id: [[STACK0:[0-9]+]], offset: 0, size: 4, alignment: 16, isImmutable: true, isAliased: false }
102102 ; X32: [[ARG1_ADDR:%[0-9]+]](p0) = G_FRAME_INDEX %fixed-stack.[[STACK0]]
103103 ; X32-NEXT: [[ARG1:%[0-9]+]](s32) = G_LOAD [[ARG1_ADDR]](p0) :: (invariant load 4 from %fixed-stack.[[STACK0]], align 0)
104 ; X32-NEXT: [[ARG2_ADDR:%[0-9]+]](p0) = G_FRAME_INDEX %fixed-stack.[[STACK4]]
104 ; X32-NEXT: [[ARG2_ADDR:%[0-9]+]](p0) = G_FRAME_INDEX %fixed-stack.[[STACK4]]
105105 ; X32-NEXT: [[ARG2:%[0-9]+]](s32) = G_LOAD [[ARG2_ADDR]](p0) :: (invariant load 4 from %fixed-stack.[[STACK4]], align 0)
106 ; X32-NEXT: [[ARG3_ADDR:%[0-9]+]](p0) = G_FRAME_INDEX %fixed-stack.[[STACK8]]
106 ; X32-NEXT: [[ARG3_ADDR:%[0-9]+]](p0) = G_FRAME_INDEX %fixed-stack.[[STACK8]]
107107 ; X32-NEXT: [[ARG3:%[0-9]+]](s32) = G_LOAD [[ARG3_ADDR]](p0) :: (invariant load 4 from %fixed-stack.[[STACK8]], align 0)
108 ; X32-NEXT: [[ARG4_ADDR:%[0-9]+]](p0) = G_FRAME_INDEX %fixed-stack.[[STACK12]]
108 ; X32-NEXT: [[ARG4_ADDR:%[0-9]+]](p0) = G_FRAME_INDEX %fixed-stack.[[STACK12]]
109109 ; X32-NEXT: [[ARG4:%[0-9]+]](s32) = G_LOAD [[ARG4_ADDR]](p0) :: (invariant load 4 from %fixed-stack.[[STACK12]], align 0)
110 ; X32-NEXT: [[ARG5_ADDR:%[0-9]+]](p0) = G_FRAME_INDEX %fixed-stack.[[STACK16]]
110 ; X32-NEXT: [[ARG5_ADDR:%[0-9]+]](p0) = G_FRAME_INDEX %fixed-stack.[[STACK16]]
111111 ; X32-NEXT: [[ARG5:%[0-9]+]](s32) = G_LOAD [[ARG5_ADDR]](p0) :: (invariant load 4 from %fixed-stack.[[STACK16]], align 0)
112 ; X32-NEXT: [[ARG6_ADDR:%[0-9]+]](p0) = G_FRAME_INDEX %fixed-stack.[[STACK20]]
112 ; X32-NEXT: [[ARG6_ADDR:%[0-9]+]](p0) = G_FRAME_INDEX %fixed-stack.[[STACK20]]
113113 ; X32-NEXT: [[ARG6:%[0-9]+]](s32) = G_LOAD [[ARG6_ADDR]](p0) :: (invariant load 4 from %fixed-stack.[[STACK20]], align 0)
114 ; X32-NEXT: [[ARG7_ADDR:%[0-9]+]](p0) = G_FRAME_INDEX %fixed-stack.[[STACK24]]
114 ; X32-NEXT: [[ARG7_ADDR:%[0-9]+]](p0) = G_FRAME_INDEX %fixed-stack.[[STACK24]]
115115 ; X32-NEXT: [[ARG7:%[0-9]+]](s32) = G_LOAD [[ARG7_ADDR]](p0) :: (invariant load 4 from %fixed-stack.[[STACK24]], align 0)
116 ; X32-NEXT: [[ARG8_ADDR:%[0-9]+]](p0) = G_FRAME_INDEX %fixed-stack.[[STACK28]]
116 ; X32-NEXT: [[ARG8_ADDR:%[0-9]+]](p0) = G_FRAME_INDEX %fixed-stack.[[STACK28]]
117117 ; X32-NEXT: [[ARG8:%[0-9]+]](s32) = G_LOAD [[ARG8_ADDR]](p0) :: (invariant load 4 from %fixed-stack.[[STACK28]], align 0)
118118
119119 ; ALL-NEXT: [[GADDR_A1:%[0-9]+]](p0) = G_GLOBAL_VALUE @a1_32bit
120120 ; ALL-NEXT: [[GADDR_A7:%[0-9]+]](p0) = G_GLOBAL_VALUE @a7_32bit
121 ; ALL-NEXT: [[GADDR_A8:%[0-9]+]](p0) = G_GLOBAL_VALUE @a8_32bit
121 ; ALL-NEXT: [[GADDR_A8:%[0-9]+]](p0) = G_GLOBAL_VALUE @a8_32bit
122122 ; ALL-NEXT: G_STORE [[ARG1]](s32), [[GADDR_A1]](p0) :: (store 4 into @a1_32bit)
123123 ; ALL-NEXT: G_STORE [[ARG7]](s32), [[GADDR_A7]](p0) :: (store 4 into @a7_32bit)
124124 ; ALL-NEXT: G_STORE [[ARG8]](s32), [[GADDR_A8]](p0) :: (store 4 into @a8_32bit)
128128 entry:
129129 store i32 %arg1, i32* @a1_32bit
130130 store i32 %arg7, i32* @a7_32bit
131 store i32 %arg8, i32* @a8_32bit
131 store i32 %arg8, i32* @a8_32bit
132132 ret i32 %arg1
133133 }
134134
136136 @a7_64bit = external global i64
137137 @a8_64bit = external global i64
138138
139 define i64 @test_i64_args_8(i64 %arg1, i64 %arg2, i64 %arg3, i64 %arg4,
139 define i64 @test_i64_args_8(i64 %arg1, i64 %arg2, i64 %arg3, i64 %arg4,
140140 i64 %arg5, i64 %arg6, i64 %arg7, i64 %arg8) {
141141
142142 ; ALL-LABEL: name: test_i64_args_8
143 ; X64: fixedStack:
143 ; X64: fixedStack:
144144 ; X64: id: [[STACK8:[0-9]+]], offset: 8, size: 8, alignment: 8, isImmutable: true, isAliased: false
145145 ; X64: id: [[STACK0:[0-9]+]], offset: 0, size: 8, alignment: 16, isImmutable: true, isAliased: false
146146 ; X64: liveins: %rcx, %rdi, %rdx, %rsi, %r8, %r9
155155 ; X64-NEXT: [[ARG8_ADDR:%[0-9]+]](p0) = G_FRAME_INDEX %fixed-stack.[[STACK8]]
156156 ; X64-NEXT: [[ARG8:%[0-9]+]](s64) = G_LOAD [[ARG8_ADDR]](p0) :: (invariant load 8 from %fixed-stack.[[STACK8]], align 0)
157157
158 ; X32: fixedStack:
159 ; X32: id: [[STACK60:[0-9]+]], offset: 60, size: 4, alignment: 4, isImmutable: true, isAliased: false }
160 ; X32: id: [[STACK56:[0-9]+]], offset: 56, size: 4, alignment: 8, isImmutable: true, isAliased: false }
161 ; X32: id: [[STACK52:[0-9]+]], offset: 52, size: 4, alignment: 4, isImmutable: true, isAliased: false }
162 ; X32: id: [[STACK48:[0-9]+]], offset: 48, size: 4, alignment: 16, isImmutable: true, isAliased: false }
163 ; X32: id: [[STACK44:[0-9]+]], offset: 44, size: 4, alignment: 4, isImmutable: true, isAliased: false }
164 ; X32: id: [[STACK40:[0-9]+]], offset: 40, size: 4, alignment: 8, isImmutable: true, isAliased: false }
165 ; X32: id: [[STACK36:[0-9]+]], offset: 36, size: 4, alignment: 4, isImmutable: true, isAliased: false }
166 ; X32: id: [[STACK32:[0-9]+]], offset: 32, size: 4, alignment: 16, isImmutable: true, isAliased: false }
167 ; X32: id: [[STACK28:[0-9]+]], offset: 28, size: 4, alignment: 4, isImmutable: true, isAliased: false }
168 ; X32: id: [[STACK24:[0-9]+]], offset: 24, size: 4, alignment: 8, isImmutable: true, isAliased: false }
169 ; X32: id: [[STACK20:[0-9]+]], offset: 20, size: 4, alignment: 4, isImmutable: true, isAliased: false }
170 ; X32: id: [[STACK16:[0-9]+]], offset: 16, size: 4, alignment: 16, isImmutable: true, isAliased: false }
171 ; X32: id: [[STACK12:[0-9]+]], offset: 12, size: 4, alignment: 4, isImmutable: true, isAliased: false }
172 ; X32: id: [[STACK8:[0-9]+]], offset: 8, size: 4, alignment: 8, isImmutable: true, isAliased: false }
173 ; X32: id: [[STACK4:[0-9]+]], offset: 4, size: 4, alignment: 4, isImmutable: true, isAliased: false }
158 ; X32: fixedStack:
159 ; X32: id: [[STACK60:[0-9]+]], offset: 60, size: 4, alignment: 4, isImmutable: true, isAliased: false }
160 ; X32: id: [[STACK56:[0-9]+]], offset: 56, size: 4, alignment: 8, isImmutable: true, isAliased: false }
161 ; X32: id: [[STACK52:[0-9]+]], offset: 52, size: 4, alignment: 4, isImmutable: true, isAliased: false }
162 ; X32: id: [[STACK48:[0-9]+]], offset: 48, size: 4, alignment: 16, isImmutable: true, isAliased: false }
163 ; X32: id: [[STACK44:[0-9]+]], offset: 44, size: 4, alignment: 4, isImmutable: true, isAliased: false }
164 ; X32: id: [[STACK40:[0-9]+]], offset: 40, size: 4, alignment: 8, isImmutable: true, isAliased: false }
165 ; X32: id: [[STACK36:[0-9]+]], offset: 36, size: 4, alignment: 4, isImmutable: true, isAliased: false }
166 ; X32: id: [[STACK32:[0-9]+]], offset: 32, size: 4, alignment: 16, isImmutable: true, isAliased: false }
167 ; X32: id: [[STACK28:[0-9]+]], offset: 28, size: 4, alignment: 4, isImmutable: true, isAliased: false }
168 ; X32: id: [[STACK24:[0-9]+]], offset: 24, size: 4, alignment: 8, isImmutable: true, isAliased: false }
169 ; X32: id: [[STACK20:[0-9]+]], offset: 20, size: 4, alignment: 4, isImmutable: true, isAliased: false }
170 ; X32: id: [[STACK16:[0-9]+]], offset: 16, size: 4, alignment: 16, isImmutable: true, isAliased: false }
171 ; X32: id: [[STACK12:[0-9]+]], offset: 12, size: 4, alignment: 4, isImmutable: true, isAliased: false }
172 ; X32: id: [[STACK8:[0-9]+]], offset: 8, size: 4, alignment: 8, isImmutable: true, isAliased: false }
173 ; X32: id: [[STACK4:[0-9]+]], offset: 4, size: 4, alignment: 4, isImmutable: true, isAliased: false }
174174 ; X32: id: [[STACK0:[0-9]+]], offset: 0, size: 4, alignment: 16, isImmutable: true, isAliased: false }
175175
176176 ; X32: [[ARG1L_ADDR:%[0-9]+]](p0) = G_FRAME_INDEX %fixed-stack.[[STACK0]]
232232 entry:
233233 store i64 %arg1, i64* @a1_64bit
234234 store i64 %arg7, i64* @a7_64bit
235 store i64 %arg8, i64* @a8_64bit
235 store i64 %arg8, i64* @a8_64bit
236236 ret i64 %arg1
237237 }
238238
239239 define float @test_float_args(float %arg1, float %arg2) {
240 ; ALL-LABEL:name: test_float_args
241
242 ; X64: liveins: %xmm0, %xmm1
240 ; ALL-LABEL:name: test_float_args
241
242 ; X64: liveins: %xmm0, %xmm1
243243 ; X64: [[ARG1:%[0-9]+]](s32) = COPY %xmm0
244244 ; X64-NEXT: [[ARG2:%[0-9]+]](s32) = COPY %xmm1
245245 ; X64-NEXT: %xmm0 = COPY [[ARG2:%[0-9]+]](s32)
246246 ; X64-NEXT: RET 0, implicit %xmm0
247247
248 ; X32: fixedStack:
248 ; X32: fixedStack:
249249 ; X32: id: [[STACK4:[0-9]+]], offset: 4, size: 4, alignment: 4, isImmutable: true, isAliased: false }
250250 ; X32: id: [[STACK0:[0-9]+]], offset: 0, size: 4, alignment: 16, isImmutable: true, isAliased: false }
251251 ; X32: [[ARG1_ADDR:%[0-9]+]](p0) = G_FRAME_INDEX %fixed-stack.[[STACK0]]
254254 ; X32-NEXT: [[ARG2:%[0-9]+]](s32) = G_LOAD [[ARG2_ADDR:%[0-9]+]](p0) :: (invariant load 4 from %fixed-stack.[[STACK4]], align 0)
255255 ; X32-NEXT: %fp0 = COPY [[ARG2:%[0-9]+]](s32)
256256 ; X32-NEXT: RET 0, implicit %fp0
257
257
258258 ret float %arg2
259259 }
260260
261261 define double @test_double_args(double %arg1, double %arg2) {
262 ; ALL-LABEL:name: test_double_args
263 ; X64: liveins: %xmm0, %xmm1
262 ; ALL-LABEL:name: test_double_args
263 ; X64: liveins: %xmm0, %xmm1
264264 ; X64: [[ARG1:%[0-9]+]](s64) = COPY %xmm0
265265 ; X64-NEXT: [[ARG2:%[0-9]+]](s64) = COPY %xmm1
266266 ; X64-NEXT: %xmm0 = COPY [[ARG2:%[0-9]+]](s64)
267267 ; X64-NEXT: RET 0, implicit %xmm0
268268
269 ; X32: fixedStack:
269 ; X32: fixedStack:
270270 ; X32: id: [[STACK4:[0-9]+]], offset: 8, size: 8, alignment: 8, isImmutable: true, isAliased: false }
271271 ; X32: id: [[STACK0:[0-9]+]], offset: 0, size: 8, alignment: 16, isImmutable: true, isAliased: false }
272272 ; X32: [[ARG1_ADDR:%[0-9]+]](p0) = G_FRAME_INDEX %fixed-stack.[[STACK0]]
None # RUN: llc -mtriple=x86_64-linux-gnu -global-isel -run-pass=legalizer %s -o - | FileCheck %s
0 # RUN: llc -mtriple=x86_64-linux-gnu -global-isel -run-pass=legalizer %s -o - | FileCheck %s
11
22 --- |
33 ; ModuleID = ''
44 source_filename = ""
55 target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
66 target triple = "x86_64--linux-gnu"
7
7
88 define i32 @test_add_i32(i32 %arg1, i32 %arg2) {
99 %ret = add i32 %arg1, %arg2
1010 ret i32 %ret
1818 regBankSelected: false
1919 selected: false
2020 tracksRegLiveness: true
21 registers:
21 registers:
2222 - { id: 0, class: _ }
2323 - { id: 1, class: _ }
2424 - { id: 2, class: _ }
2626 bb.1 (%ir-block.0):
2727 liveins: %edi, %esi
2828 ; CHECK-LABEL: name: test_add_i32
29 ; CHECK: [[VAL1:%.*]](s32) = COPY %edi
29 ; CHECK: [[VAL1:%.*]](s32) = COPY %edi
3030 ; CHECK: [[VAL2:%.*]](s32) = COPY %esi
3131 ; CHECK: [[RES:%.*]](s32) = G_ADD [[VAL1:%.*]], [[VAL2:%.*]]
3232
None # RUN: llc -mtriple=x86_64-linux-gnu -global-isel -run-pass=legalizer %s -o - | FileCheck %s
0 # RUN: llc -mtriple=x86_64-linux-gnu -global-isel -run-pass=legalizer %s -o - | FileCheck %s
11
22 --- |
33 ; ModuleID = ''
44 source_filename = ""
55 target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
66 target triple = "x86_64--linux-gnu"
7
7
88 define i32 @test_sub_i32(i32 %arg1, i32 %arg2) {
99 %ret = sub i32 %arg1, %arg2
1010 ret i32 %ret
1818 regBankSelected: false
1919 selected: false
2020 tracksRegLiveness: true
21 registers:
21 registers:
2222 - { id: 0, class: _ }
2323 - { id: 1, class: _ }
2424 - { id: 2, class: _ }
2626 bb.1 (%ir-block.0):
2727 liveins: %edi, %esi
2828 ; CHECK-LABEL: name: test_sub_i32
29 ; CHECK: [[VAL1:%.*]](s32) = COPY %edi
29 ; CHECK: [[VAL1:%.*]](s32) = COPY %edi
3030 ; CHECK: [[VAL2:%.*]](s32) = COPY %esi
3131 ; CHECK: [[RES:%.*]](s32) = G_SUB [[VAL1:%.*]], [[VAL2:%.*]]
3232
None # RUN: llc -mtriple=x86_64-linux-gnu -global-isel -run-pass=instruction-select -verify-machineinstrs %s -o - | FileCheck %s
0 # RUN: llc -mtriple=x86_64-linux-gnu -global-isel -run-pass=instruction-select -verify-machineinstrs %s -o - | FileCheck %s --check-prefix=ALL --check-prefix=NO_AVX512VL --check-prefix=NO_AVX512F --check-prefix=SSE
1 # RUN: llc -mtriple=x86_64-linux-gnu -mattr=+avx -global-isel -run-pass=instruction-select -verify-machineinstrs %s -o - | FileCheck %s --check-prefix=ALL --check-prefix=NO_AVX512VL --check-prefix=NO_AVX512F --check-prefix=AVX
2 # RUN: llc -mtriple=x86_64-linux-gnu -mattr=+avx512f -global-isel -run-pass=instruction-select -verify-machineinstrs %s -o - | FileCheck %s --check-prefix=ALL --check-prefix=NO_AVX512VL --check-prefix=AVX512ALL --check-prefix=AVX512F
3 # RUN: llc -mtriple=x86_64-linux-gnu -mattr=+avx512f -mattr=+avx512vl -global-isel -run-pass=instruction-select -verify-machineinstrs %s -o - | FileCheck %s --check-prefix=ALL --check-prefix=AVX512ALL --check-prefix=AVX512VL
14
25 --- |
36 define i64 @test_add_i64(i64 %arg1, i64 %arg2) {
1417 %ret = sub i64 %arg1, %arg2
1518 ret i64 %ret
1619 }
17
20
1821 define i32 @test_sub_i32(i32 %arg1, i32 %arg2) {
1922 %ret = sub i32 %arg1, %arg2
2023 ret i32 %ret
2124 }
2225
26 define float @test_add_float(float %arg1, float %arg2) {
27 %ret = fadd float %arg1, %arg2
28 ret float %ret
29 }
30
31 define double @test_add_double(double %arg1, double %arg2) {
32 %ret = fadd double %arg1, %arg2
33 ret double %ret
34 }
35
36 define float @test_sub_float(float %arg1, float %arg2) {
37 %ret = fsub float %arg1, %arg2
38 ret float %ret
39 }
40
41 define double @test_sub_double(double %arg1, double %arg2) {
42 %ret = fsub double %arg1, %arg2
43 ret double %ret
44 }
45
46 define <4 x i32> @test_add_v4i32(<4 x i32> %arg1, <4 x i32> %arg2) {
47 %ret = add <4 x i32> %arg1, %arg2
48 ret <4 x i32> %ret
49 }
50
51 define <4 x i32> @test_sub_v4i32(<4 x i32> %arg1, <4 x i32> %arg2) {
52 %ret = sub <4 x i32> %arg1, %arg2
53 ret <4 x i32> %ret
54 }
55
56 define <4 x float> @test_add_v4f32(<4 x float> %arg1, <4 x float> %arg2) {
57 %ret = fadd <4 x float> %arg1, %arg2
58 ret <4 x float> %ret
59 }
60
61 define <4 x float> @test_sub_v4f32(<4 x float> %arg1, <4 x float> %arg2) {
62 %ret = fsub <4 x float> %arg1, %arg2
63 ret <4 x float> %ret
64 }
2365 ...
2466
2567 ---
2668 name: test_add_i64
2769 legalized: true
2870 regBankSelected: true
29 # CHECK: registers:
30 # CHECK-NEXT: - { id: 0, class: gr64 }
31 # CHECK-NEXT: - { id: 1, class: gr64 }
32 # CHECK-NEXT: - { id: 2, class: gr64 }
33 registers:
71 # ALL: registers:
72 # ALL-NEXT: - { id: 0, class: gr64 }
73 # ALL-NEXT: - { id: 1, class: gr64 }
74 # ALL-NEXT: - { id: 2, class: gr64 }
75 registers:
3476 - { id: 0, class: gpr }
3577 - { id: 1, class: gpr }
3678 - { id: 2, class: gpr }
37 # CHECK: %0 = COPY %rdi
38 # CHECK-NEXT: %1 = COPY %rsi
39 # CHECK-NEXT: %2 = ADD64rr %0, %1
79 # ALL: %0 = COPY %rdi
80 # ALL-NEXT: %1 = COPY %rsi
81 # ALL-NEXT: %2 = ADD64rr %0, %1
4082 body: |
4183 bb.1 (%ir-block.0):
4284 liveins: %edi, %esi
43
85
4486 %0(s64) = COPY %rdi
4587 %1(s64) = COPY %rsi
4688 %2(s64) = G_ADD %0, %1
5193 name: test_add_i32
5294 legalized: true
5395 regBankSelected: true
54 # CHECK: registers:
55 # CHECK-NEXT: - { id: 0, class: gr32 }
56 # CHECK-NEXT: - { id: 1, class: gr32 }
57 # CHECK-NEXT: - { id: 2, class: gr32 }
58 registers:
96 # ALL: registers:
97 # ALL-NEXT: - { id: 0, class: gr32 }
98 # ALL-NEXT: - { id: 1, class: gr32 }
99 # ALL-NEXT: - { id: 2, class: gr32 }
100 registers:
59101 - { id: 0, class: gpr }
60102 - { id: 1, class: gpr }
61103 - { id: 2, class: gpr }
62 # CHECK: %0 = COPY %edi
63 # CHECK-NEXT: %1 = COPY %esi
64 # CHECK-NEXT: %2 = ADD32rr %0, %1
104 # ALL: %0 = COPY %edi
105 # ALL-NEXT: %1 = COPY %esi
106 # ALL-NEXT: %2 = ADD32rr %0, %1
65107 body: |
66108 bb.1 (%ir-block.0):
67109 liveins: %edi, %esi
68
110
69111 %0(s32) = COPY %edi
70112 %1(s32) = COPY %esi
71113 %2(s32) = G_ADD %0, %1
76118 name: test_sub_i64
77119 legalized: true
78120 regBankSelected: true
79 # CHECK: registers:
80 # CHECK-NEXT: - { id: 0, class: gr64 }
81 # CHECK-NEXT: - { id: 1, class: gr64 }
82 # CHECK-NEXT: - { id: 2, class: gr64 }
83 registers:
121 # ALL: registers:
122 # ALL-NEXT: - { id: 0, class: gr64 }
123 # ALL-NEXT: - { id: 1, class: gr64 }
124 # ALL-NEXT: - { id: 2, class: gr64 }
125 registers:
84126 - { id: 0, class: gpr }
85127 - { id: 1, class: gpr }
86128 - { id: 2, class: gpr }
87 # CHECK: %0 = COPY %rdi
88 # CHECK-NEXT: %1 = COPY %rsi
89 # CHECK-NEXT: %2 = SUB64rr %0, %1
129 # ALL: %0 = COPY %rdi
130 # ALL-NEXT: %1 = COPY %rsi
131 # ALL-NEXT: %2 = SUB64rr %0, %1
90132 body: |
91133 bb.1 (%ir-block.0):
92134 liveins: %edi, %esi
93
135
94136 %0(s64) = COPY %rdi
95137 %1(s64) = COPY %rsi
96138 %2(s64) = G_SUB %0, %1
101143 name: test_sub_i32
102144 legalized: true
103145 regBankSelected: true
104 # CHECK: registers:
105 # CHECK-NEXT: - { id: 0, class: gr32 }
106 # CHECK-NEXT: - { id: 1, class: gr32 }
107 # CHECK-NEXT: - { id: 2, class: gr32 }
108 registers:
146 # ALL: registers:
147 # ALL-NEXT: - { id: 0, class: gr32 }
148 # ALL-NEXT: - { id: 1, class: gr32 }
149 # ALL-NEXT: - { id: 2, class: gr32 }
150 registers:
109151 - { id: 0, class: gpr }
110152 - { id: 1, class: gpr }
111153 - { id: 2, class: gpr }
112 # CHECK: %0 = COPY %edi
113 # CHECK-NEXT: %1 = COPY %esi
114 # CHECK-NEXT: %2 = SUB32rr %0, %1
154 # ALL: %0 = COPY %edi
155 # ALL-NEXT: %1 = COPY %esi
156 # ALL-NEXT: %2 = SUB32rr %0, %1
115157 body: |
116158 bb.1 (%ir-block.0):
117159 liveins: %edi, %esi
118
160
119161 %0(s32) = COPY %edi
120162 %1(s32) = COPY %esi
121163 %2(s32) = G_SUB %0, %1
122164
123165 ...
166
167 ---
168 name: test_add_float
169 alignment: 4
170 legalized: true
171 regBankSelected: true
172 selected: false
173 tracksRegLiveness: true
174 # ALL: registers:
175 # NO_AVX512F-NEXT: - { id: 0, class: fr32 }
176 # NO_AVX512F-NEXT: - { id: 1, class: fr32 }
177 # NO_AVX512F-NEXT: - { id: 2, class: fr32 }
178 # AVX512ALL-NEXT: - { id: 0, class: fr32x }
179 # AVX512ALL-NEXT: - { id: 1, class: fr32x }
180 # AVX512ALL-NEXT: - { id: 2, class: fr32x }
181 registers:
182 - { id: 0, class: vecr }
183 - { id: 1, class: vecr }
184 - { id: 2, class: vecr }
185 # ALL: %0 = COPY %xmm0
186 # ALL-NEXT: %1 = COPY %xmm1
187 # SSE-NEXT: %2 = ADDSSrr %0, %1
188 # AVX-NEXT: %2 = VADDSSrr %0, %1
189 # AVX512F-NEXT: %2 = VADDSSZrr %0, %1
190 body: |
191 bb.1 (%ir-block.0):
192 liveins: %xmm0, %xmm1
193
194 %0(s32) = COPY %xmm0
195 %1(s32) = COPY %xmm1
196 %2(s32) = G_FADD %0, %1
197 %xmm0 = COPY %2(s32)
198 RET 0, implicit %xmm0
199
200 ...
201 ---
202 name: test_add_double
203 alignment: 4
204 legalized: true
205 regBankSelected: true
206 selected: false
207 tracksRegLiveness: true
208 # ALL: registers:
209 # NO_AVX512F-NEXT: - { id: 0, class: fr64 }
210 # NO_AVX512F-NEXT: - { id: 1, class: fr64 }
211 # NO_AVX512F-NEXT: - { id: 2, class: fr64 }
212 # AVX512ALL-NEXT: - { id: 0, class: fr64x }
213 # AVX512ALL-NEXT: - { id: 1, class: fr64x }
214 # AVX512ALL-NEXT: - { id: 2, class: fr64x }
215 registers:
216 - { id: 0, class: vecr }
217 - { id: 1, class: vecr }
218 - { id: 2, class: vecr }
219 # ALL: %0 = COPY %xmm0
220 # ALL-NEXT: %1 = COPY %xmm1
221 # SSE-NEXT: %2 = ADDSDrr %0, %1
222 # AVX-NEXT: %2 = VADDSDrr %0, %1
223 # AVX512F-NEXT: %2 = VADDSDZrr %0, %1
224 body: |
225 bb.1 (%ir-block.0):
226 liveins: %xmm0, %xmm1
227
228 %0(s64) = COPY %xmm0
229 %1(s64) = COPY %xmm1
230 %2(s64) = G_FADD %0, %1
231 %xmm0 = COPY %2(s64)
232 RET 0, implicit %xmm0
233
234 ...
235 ---
236 name: test_sub_float
237 alignment: 4
238 legalized: true
239 regBankSelected: true
240 selected: false
241 tracksRegLiveness: true
242 # ALL: registers:
243 # NO_AVX512F-NEXT: - { id: 0, class: fr32 }
244 # NO_AVX512F-NEXT: - { id: 1, class: fr32 }
245 # NO_AVX512F-NEXT: - { id: 2, class: fr32 }
246 # AVX512ALL-NEXT: - { id: 0, class: fr32x }
247 # AVX512ALL-NEXT: - { id: 1, class: fr32x }
248 # AVX512ALL-NEXT: - { id: 2, class: fr32x }
249 registers:
250 - { id: 0, class: vecr }
251 - { id: 1, class: vecr }
252 - { id: 2, class: vecr }
253 # ALL: %0 = COPY %xmm0
254 # ALL-NEXT: %1 = COPY %xmm1
255 # SSE-NEXT: %2 = SUBSSrr %0, %1
256 # AVX-NEXT: %2 = VSUBSSrr %0, %1
257 # AVX512F-NEXT: %2 = VSUBSSZrr %0, %1
258 body: |
259 bb.1 (%ir-block.0):
260 liveins: %xmm0, %xmm1
261
262 %0(s32) = COPY %xmm0
263 %1(s32) = COPY %xmm1
264 %2(s32) = G_FSUB %0, %1
265 %xmm0 = COPY %2(s32)
266 RET 0, implicit %xmm0
267
268 ...
269 ---
270 name: test_sub_double
271 alignment: 4
272 legalized: true
273 regBankSelected: true
274 selected: false
275 tracksRegLiveness: true
276 # ALL: registers:
277 # NO_AVX512F-NEXT: - { id: 0, class: fr64 }
278 # NO_AVX512F-NEXT: - { id: 1, class: fr64 }
279 # NO_AVX512F-NEXT: - { id: 2, class: fr64 }
280 # AVX512ALL-NEXT: - { id: 0, class: fr64x }
281 # AVX512ALL-NEXT: - { id: 1, class: fr64x }
282 # AVX512ALL-NEXT: - { id: 2, class: fr64x }
283 registers:
284 - { id: 0, class: vecr }
285 - { id: 1, class: vecr }
286 - { id: 2, class: vecr }
287 # ALL: %0 = COPY %xmm0
288 # ALL-NEXT: %1 = COPY %xmm1
289 # SSE-NEXT: %2 = SUBSDrr %0, %1
290 # AVX-NEXT: %2 = VSUBSDrr %0, %1
291 # AVX512F-NEXT: %2 = VSUBSDZrr %0, %1
292 body: |
293 bb.1 (%ir-block.0):
294 liveins: %xmm0, %xmm1
295
296 %0(s64) = COPY %xmm0
297 %1(s64) = COPY %xmm1
298 %2(s64) = G_FSUB %0, %1
299 %xmm0 = COPY %2(s64)
300 RET 0, implicit %xmm0
301 ...
302 ---
303 name: test_add_v4i32
304 alignment: 4
305 legalized: true
306 regBankSelected: true
307 selected: false
308 tracksRegLiveness: true
309 # ALL: registers:
310 # NO_AVX512VL-NEXT: - { id: 0, class: vr128 }
311 # NO_AVX512VL-NEXT: - { id: 1, class: vr128 }
312 # NO_AVX512VL-NEXT: - { id: 2, class: vr128 }
313 # AVX512VL-NEXT: - { id: 0, class: vr128x }
314 # AVX512VL-NEXT: - { id: 1, class: vr128x }
315 # AVX512VL-NEXT: - { id: 2, class: vr128x }
316 registers:
317 - { id: 0, class: vecr }
318 - { id: 1, class: vecr }
319 - { id: 2, class: vecr }
320 # ALL: %0 = COPY %xmm0
321 # ALL-NEXT: %1 = COPY %xmm1
322 # SSE-NEXT: %2 = PADDDrr %0, %1
323 # AVX-NEXT: %2 = VPADDDrr %0, %1
324 # AVX512F-NEXT: %2 = VPADDDrr %0, %1
325 # AVX512VL-NEXT: %2 = VPADDDZ128rr %0, %1
326 body: |
327 bb.1 (%ir-block.0):
328 liveins: %xmm0, %xmm1
329
330 %0(<4 x s32>) = COPY %xmm0
331 %1(<4 x s32>) = COPY %xmm1
332 %2(<4 x s32>) = G_ADD %0, %1
333 %xmm0 = COPY %2(<4 x s32>)
334 RET 0, implicit %xmm0
335
336 ...
337 ---
338 name: test_sub_v4i32
339 alignment: 4
340 legalized: true
341 regBankSelected: true
342 selected: false
343 tracksRegLiveness: true
344 # ALL: registers:
345 # NO_AVX512VL-NEXT: - { id: 0, class: vr128 }
346 # NO_AVX512VL-NEXT: - { id: 1, class: vr128 }
347 # NO_AVX512VL-NEXT: - { id: 2, class: vr128 }
348 # AVX512VL-NEXT: - { id: 0, class: vr128x }
349 # AVX512VL-NEXT: - { id: 1, class: vr128x }
350 # AVX512VL-NEXT: - { id: 2, class: vr128x }
351 registers:
352 - { id: 0, class: vecr }
353 - { id: 1, class: vecr }
354 - { id: 2, class: vecr }
355 # ALL: %0 = COPY %xmm0
356 # ALL-NEXT: %1 = COPY %xmm1
357 # SSE-NEXT: %2 = PSUBDrr %0, %1
358 # AVX-NEXT: %2 = VPSUBDrr %0, %1
359 # AVX512F-NEXT: %2 = VPSUBDrr %0, %1
360 # AVX512VL-NEXT: %2 = VPSUBDZ128rr %0, %1
361 body: |
362 bb.1 (%ir-block.0):
363 liveins: %xmm0, %xmm1
364
365 %0(<4 x s32>) = COPY %xmm0
366 %1(<4 x s32>) = COPY %xmm1
367 %2(<4 x s32>) = G_SUB %0, %1
368 %xmm0 = COPY %2(<4 x s32>)
369 RET 0, implicit %xmm0
370
371 ...
372 ---
373 name: test_add_v4f32
374 alignment: 4
375 legalized: true
376 regBankSelected: true
377 selected: false
378 tracksRegLiveness: true
379 # ALL: registers:
380 # NO_AVX512VL-NEXT: - { id: 0, class: vr128 }
381 # NO_AVX512VL-NEXT: - { id: 1, class: vr128 }
382 # NO_AVX512VL-NEXT: - { id: 2, class: vr128 }
383 # AVX512VL-NEXT: - { id: 0, class: vr128x }
384 # AVX512VL-NEXT: - { id: 1, class: vr128x }
385 # AVX512VL-NEXT: - { id: 2, class: vr128x }
386 registers:
387 - { id: 0, class: vecr }
388 - { id: 1, class: vecr }
389 - { id: 2, class: vecr }
390 # ALL: %0 = COPY %xmm0
391 # ALL-NEXT: %1 = COPY %xmm1
392 # SSE-NEXT: %2 = ADDPSrr %0, %1
393 # AVX-NEXT: %2 = VADDPSrr %0, %1
394 # AVX512F-NEXT: %2 = VADDPSrr %0, %1
395 # AVX512VL-NEXT: %2 = VADDPSZ128rr %0, %1
396 body: |
397 bb.1 (%ir-block.0):
398 liveins: %xmm0, %xmm1
399
400 %0(<4 x s32>) = COPY %xmm0
401 %1(<4 x s32>) = COPY %xmm1
402 %2(<4 x s32>) = G_FADD %0, %1
403 %xmm0 = COPY %2(<4 x s32>)
404 RET 0, implicit %xmm0
405
406 ...
407 ---
408 name: test_sub_v4f32
409 alignment: 4
410 legalized: true
411 regBankSelected: true
412 selected: false
413 tracksRegLiveness: true
414 # ALL: registers:
415 # NO_AVX512VL-NEXT: - { id: 0, class: vr128 }
416 # NO_AVX512VL-NEXT: - { id: 1, class: vr128 }
417 # NO_AVX512VL-NEXT: - { id: 2, class: vr128 }
418 # AVX512VL-NEXT: - { id: 0, class: vr128x }
419 # AVX512VL-NEXT: - { id: 1, class: vr128x }
420 # AVX512VL-NEXT: - { id: 2, class: vr128x }
421 registers:
422 - { id: 0, class: vecr }
423 - { id: 1, class: vecr }
424 - { id: 2, class: vecr }
425 # ALL: %0 = COPY %xmm0
426 # ALL-NEXT: %1 = COPY %xmm1
427 # SSE-NEXT: %2 = SUBPSrr %0, %1
428 # AVX-NEXT: %2 = VSUBPSrr %0, %1
429 # AVX512F-NEXT: %2 = VSUBPSrr %0, %1
430 # AVX512VL-NEXT: %2 = VSUBPSZ128rr %0, %1
431 body: |
432 bb.1 (%ir-block.0):
433 liveins: %xmm0, %xmm1
434
435 %0(<4 x s32>) = COPY %xmm0
436 %1(<4 x s32>) = COPY %xmm1
437 %2(<4 x s32>) = G_FSUB %0, %1
438 %xmm0 = COPY %2(<4 x s32>)
439 RET 0, implicit %xmm0
440
441 ...