llvm.org GIT mirror llvm / ff136c7
[SystemZ, RegAlloc] Favor 3-address instructions during instruction selection. This patch aims to reduce spilling and register moves by using the 3-address versions of instructions per default instead of the 2-address equivalent ones. It seems that both spilling and register moves are improved noticeably generally. Regalloc hints are passed to increase conversions to 2-address instructions which are done in SystemZShortenInst.cpp (after regalloc). Since the SystemZ reg/mem instructions are 2-address (dst and lhs regs are the same), foldMemoryOperandImpl() can no longer trivially fold a spilled source register since the reg/reg instruction is now 3-address. In order to remedy this, new 3-address pseudo memory instructions are used to perform the folding only when the dst and lhs virtual registers are known to be allocated to the same physreg. In order to not let MachineCopyPropagation run and change registers on these transformed instructions (making it 3-address), a new target pass called SystemZPostRewrite.cpp is run just after VirtRegRewriter, that immediately lowers the pseudo to a target instruction. If it would have been possibe to insert a COPY instruction and change a register operand (convert to 2-address) in foldMemoryOperandImpl() while trusting that the caller (e.g. InlineSpiller) would update/repair the involved LiveIntervals, the solution involving pseudo instructions would not have been needed. This is perhaps a potential improvement (see Phabricator post). Common code changes: * A new hook TargetPassConfig::addPostRewrite() is utilized to be able to run a target pass immediately before MachineCopyPropagation. * VirtRegMap is passed as an argument to foldMemoryOperand(). Review: Ulrich Weigand, Quentin Colombet https://reviews.llvm.org/D60888 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@362868 91177308-0d34-0410-b5e6-96231b3b80d8 Jonas Paulsson 4 months ago
26 changed file(s) with 529 addition(s) and 231 deletion(s). Raw diff Collapse all Expand all
2525 #include "llvm/CodeGen/MachineOperand.h"
2626 #include "llvm/CodeGen/MachineOutliner.h"
2727 #include "llvm/CodeGen/PseudoSourceValue.h"
28 #include "llvm/CodeGen/VirtRegMap.h"
2829 #include "llvm/MC/MCInstrInfo.h"
2930 #include "llvm/Support/BranchProbability.h"
3031 #include "llvm/Support/ErrorHandling.h"
931932 /// operand folded, otherwise NULL is returned.
932933 /// The new instruction is inserted before MI, and the client is responsible
933934 /// for removing the old instruction.
935 /// If VRM is passed, the assigned physregs can be inspected by target to
936 /// decide on using an opcode (note that those assignments can still change).
934937 MachineInstr *foldMemoryOperand(MachineInstr &MI, ArrayRef Ops,
935938 int FI,
936 LiveIntervals *LIS = nullptr) const;
939 LiveIntervals *LIS = nullptr,
940 VirtRegMap *VRM = nullptr) const;
937941
938942 /// Same as the previous version except it allows folding of any load and
939943 /// store from / to any address, not just from a specific stack slot.
10231027 foldMemoryOperandImpl(MachineFunction &MF, MachineInstr &MI,
10241028 ArrayRef Ops,
10251029 MachineBasicBlock::iterator InsertPt, int FrameIndex,
1026 LiveIntervals *LIS = nullptr) const {
1030 LiveIntervals *LIS = nullptr,
1031 VirtRegMap *VRM = nullptr) const {
10271032 return nullptr;
10281033 }
10291034
385385 return false;
386386 }
387387
388 /// Add passes to be run immediately after virtual registers are rewritten
389 /// to physical registers.
390 virtual void addPostRewrite() { }
391
388392 /// This method may be implemented by targets that want to run passes after
389393 /// register allocation pass pipeline but before prolog-epilog insertion.
390394 virtual void addPostRegAlloc() { }
836836
837837 MachineInstr *FoldMI =
838838 LoadMI ? TII.foldMemoryOperand(*MI, FoldOps, *LoadMI, &LIS)
839 : TII.foldMemoryOperand(*MI, FoldOps, StackSlot, &LIS);
839 : TII.foldMemoryOperand(*MI, FoldOps, StackSlot, &LIS, &VRM);
840840 if (!FoldMI)
841841 return false;
842842
523523
524524 MachineInstr *TargetInstrInfo::foldMemoryOperand(MachineInstr &MI,
525525 ArrayRef Ops, int FI,
526 LiveIntervals *LIS) const {
526 LiveIntervals *LIS,
527 VirtRegMap *VRM) const {
527528 auto Flags = MachineMemOperand::MONone;
528529 for (unsigned OpIdx : Ops)
529530 Flags |= MI.getOperand(OpIdx).isDef() ? MachineMemOperand::MOStore
569570 MBB->insert(MI, NewMI);
570571 } else {
571572 // Ask the target to do the actual folding.
572 NewMI = foldMemoryOperandImpl(MF, MI, Ops, MI, FI, LIS);
573 NewMI = foldMemoryOperandImpl(MF, MI, Ops, MI, FI, LIS, VRM);
573574 }
574575
575576 if (NewMI) {
11671167 addPass(&MachineSchedulerID);
11681168
11691169 if (addRegAssignmentOptimized()) {
1170 // Allow targets to expand pseudo instructions depending on the choice of
1171 // registers before MachineCopyPropagation.
1172 addPostRewrite();
1173
11701174 // Copy propagate to forward register uses and try to eliminate COPYs that
11711175 // were not coalesced.
11721176 addPass(&MachineCopyPropagationID);
30483048 MachineInstr *AArch64InstrInfo::foldMemoryOperandImpl(
30493049 MachineFunction &MF, MachineInstr &MI, ArrayRef Ops,
30503050 MachineBasicBlock::iterator InsertPt, int FrameIndex,
3051 LiveIntervals *LIS) const {
3051 LiveIntervals *LIS, VirtRegMap *VRM) const {
30523052 // This is a bit of a hack. Consider this instruction:
30533053 //
30543054 // %0 = COPY %sp; GPR64all:%0
161161 foldMemoryOperandImpl(MachineFunction &MF, MachineInstr &MI,
162162 ArrayRef Ops,
163163 MachineBasicBlock::iterator InsertPt, int FrameIndex,
164 LiveIntervals *LIS = nullptr) const override;
164 LiveIntervals *LIS = nullptr,
165 VirtRegMap *VRM = nullptr) const override;
165166
166167 /// \returns true if a branch from an instruction with opcode \p BranchOpc
167168 /// bytes is capable of jumping to a position \p BrOffset bytes away.
2929 SystemZMCInstLower.cpp
3030 SystemZRegisterInfo.cpp
3131 SystemZSelectionDAGInfo.cpp
32 SystemZPostRewrite.cpp
3233 SystemZShortenInst.cpp
3334 SystemZSubtarget.cpp
3435 SystemZTargetMachine.cpp
193193 FunctionPass *createSystemZShortenInstPass(SystemZTargetMachine &TM);
194194 FunctionPass *createSystemZLongBranchPass(SystemZTargetMachine &TM);
195195 FunctionPass *createSystemZLDCleanupPass(SystemZTargetMachine &TM);
196 FunctionPass *createSystemZPostRewritePass(SystemZTargetMachine &TM);
196197 FunctionPass *createSystemZTDCPass();
197198 } // end namespace llvm
198199
3535 // "reg" for R and "mem" for .
3636 string OpKey = "";
3737 string OpType = "none";
38
39 // MemKey identifies a targe reg-mem opcode, while MemType can be either
40 // "pseudo" or "target". This is used to map a pseduo memory instruction to
41 // its corresponding target opcode. See comment at MemFoldPseudo.
42 string MemKey = "";
43 string MemType = "none";
3844
3945 // Many distinct-operands instructions have older 2-operand equivalents.
4046 // NumOpsKey uniquely identifies one of these 2-operand and 3-operand pairs,
119125 let ValueCols = [["20"]];
120126 }
121127
122 // Return the memory form of a register instruction.
128 // Return the memory form of a register instruction. Note that this may
129 // return a MemFoldPseudo instruction (see below).
123130 def getMemOpcode : InstrMapping {
124131 let FilterClass = "InstSystemZ";
125132 let RowFields = ["OpKey"];
128135 let ValueCols = [["mem"]];
129136 }
130137
131 // Return the 3-operand form of a 2-operand instruction.
132 def getThreeOperandOpcode : InstrMapping {
138 // Return the target memory instruction for a MemFoldPseudo.
139 def getTargetMemOpcode : InstrMapping {
140 let FilterClass = "InstSystemZ";
141 let RowFields = ["MemKey"];
142 let ColFields = ["MemType"];
143 let KeyCol = ["pseudo"];
144 let ValueCols = [["target"]];
145 }
146
147 // Return the 2-operand form of a 3-operand instruction.
148 def getTwoOperandOpcode : InstrMapping {
133149 let FilterClass = "InstSystemZ";
134150 let RowFields = ["NumOpsKey"];
135151 let ColFields = ["NumOpsValue"];
136 let KeyCol = ["2"];
137 let ValueCols = [["3"]];
152 let KeyCol = ["3"];
153 let ValueCols = [["2"]];
138154 }
139155
140156 //===----------------------------------------------------------------------===//
30653081 mnemonic#"\t$R1, $R2, $R3",
30663082 [(set cls1:$R1, (operator cls2:$R2, cls3:$R3))]> {
30673083 let M4 = 0;
3084 let OpKey = mnemonic#cls1;
3085 let OpType = "reg";
30683086 }
30693087
30703088 multiclass BinaryRRAndK opcode1, bits<16> opcode2,
30723090 RegisterOperand cls2> {
30733091 let NumOpsKey = mnemonic in {
30743092 let NumOpsValue = "3" in
3075 def K : BinaryRRFanull_frag, cls1, cls1, cls2>,
3093 def K : BinaryRRFaoperator, cls1, cls1, cls2>,
30763094 Requires<[FeatureDistinctOps]>;
3077 let NumOpsValue = "2", isConvertibleToThreeAddress = 1 in
3095 let NumOpsValue = "2" in
30783096 def "" : BinaryRR;
30793097 }
30803098 }
30843102 RegisterOperand cls2> {
30853103 let NumOpsKey = mnemonic in {
30863104 let NumOpsValue = "3" in
3087 def K : BinaryRRFanull_frag, cls1, cls1, cls2>,
3105 def K : BinaryRRFaoperator, cls1, cls1, cls2>,
30883106 Requires<[FeatureDistinctOps]>;
3089 let NumOpsValue = "2", isConvertibleToThreeAddress = 1 in
3107 let NumOpsValue = "2" in
30903108 def "" : BinaryRRE;
30913109 }
30923110 }
31873205 Immediate imm> {
31883206 let NumOpsKey = mnemonic in {
31893207 let NumOpsValue = "3" in
3190 def K : BinaryRIEnull_frag, cls, imm>,
3208 def K : BinaryRIEoperator, cls, imm>,
31913209 Requires<[FeatureDistinctOps]>;
3192 let NumOpsValue = "2", isConvertibleToThreeAddress = 1 in
3210 let NumOpsValue = "2" in
31933211 def "" : BinaryRI;
31943212 }
31953213 }
32643282 SDPatternOperator operator, RegisterOperand cls> {
32653283 let NumOpsKey = mnemonic in {
32663284 let NumOpsValue = "3" in
3267 def K : BinaryRSYnull_frag, cls>,
3285 def K : BinaryRSYoperator, cls>,
32683286 Requires<[FeatureDistinctOps]>;
3269 let NumOpsValue = "2", isConvertibleToThreeAddress = 1 in
3287 let NumOpsValue = "2" in
32703288 def "" : BinaryRS;
32713289 }
32723290 }
45924610 RegisterOperand cls, Immediate imm> {
45934611 let NumOpsKey = key in {
45944612 let NumOpsValue = "3" in
4595 def K : BinaryRIEPseudo<null_frag, cls, imm>,
4613 def K : BinaryRIEPseudo<operator, cls, imm>,
45964614 Requires<[FeatureHighWord, FeatureDistinctOps]>;
4597 let NumOpsValue = "2", isConvertibleToThreeAddress = 1 in
4615 let NumOpsValue = "2" in
45984616 def "" : BinaryRIPseudo,
45994617 Requires<[FeatureHighWord]>;
46004618 }
4619 }
4620
4621 // A pseudo that is used during register allocation when folding a memory
4622 // operand. The 3-address register instruction with a spilled source cannot
4623 // be converted directly to a target 2-address reg/mem instruction.
4624 // Mapping: R -> MemFoldPseudo ->
4625 class MemFoldPseudo bytes,
4626 AddressingMode mode>
4627 : Pseudo<(outs cls:$R1), (ins cls:$R2, mode:$XBD2), []> {
4628 let OpKey = mnemonic#"rk"#cls;
4629 let OpType = "mem";
4630 let MemKey = mnemonic#cls;
4631 let MemType = "pseudo";
4632 let mayLoad = 1;
4633 let AccessBytes = bytes;
4634 let HasIndex = 1;
4635 let hasNoSchedulingInfo = 1;
46014636 }
46024637
46034638 // Like CompareRI, but expanded after RA depending on the choice of register.
47734808 : AtomicLoadWBinary;
47744809 class AtomicLoadWBinaryImm
47754810 : AtomicLoadWBinary;
4811
4812 // A pseudo instruction that is a direct alias of a real instruction.
4813 // These aliases are used in cases where a particular register operand is
4814 // fixed or where the same instruction is used with different register sizes.
4815 // The size parameter is the size in bytes of the associated real instruction.
4816 class Alias pattern>
4817 : InstSystemZ {
4818 let isPseudo = 1;
4819 let isCodeGenOnly = 1;
4820 }
4821
4822 class UnaryAliasVRS
4823 : Alias<6, (outs cls1:$src1), (ins cls2:$src2), []>;
4824
4825 // An alias of a UnaryVRR*, but with different register sizes.
4826 class UnaryAliasVRR
4827 : Alias<6, (outs tr1.op:$V1), (ins tr2.op:$V2),
4828 [(set (tr1.vt tr1.op:$V1), (operator (tr2.vt tr2.op:$V2)))]>;
4829
4830 // An alias of a UnaryVRX, but with different register sizes.
4831 class UnaryAliasVRX
4832 AddressingMode mode = bdxaddr12only>
4833 : Alias<6, (outs tr.op:$V1), (ins mode:$XBD2),
4834 [(set (tr.vt tr.op:$V1), (operator mode:$XBD2))]>;
4835
4836 // An alias of a StoreVRX, but with different register sizes.
4837 class StoreAliasVRX
4838 AddressingMode mode = bdxaddr12only>
4839 : Alias<6, (outs), (ins tr.op:$V1, mode:$XBD2),
4840 [(operator (tr.vt tr.op:$V1), mode:$XBD2)]>;
4841
4842 // An alias of a BinaryRI, but with different register sizes.
4843 class BinaryAliasRI
4844 Immediate imm>
4845 : Alias<4, (outs cls:$R1), (ins cls:$R1src, imm:$I2),
4846 [(set cls:$R1, (operator cls:$R1src, imm:$I2))]> {
4847 let Constraints = "$R1 = $R1src";
4848 }
4849
4850 // An alias of a BinaryRIL, but with different register sizes.
4851 class BinaryAliasRIL
4852 Immediate imm>
4853 : Alias<6, (outs cls:$R1), (ins cls:$R1src, imm:$I2),
4854 [(set cls:$R1, (operator cls:$R1src, imm:$I2))]> {
4855 let Constraints = "$R1 = $R1src";
4856 }
4857
4858 // An alias of a BinaryVRRf, but with different register sizes.
4859 class BinaryAliasVRRf
4860 : Alias<6, (outs VR128:$V1), (ins cls:$R2, cls:$R3), []>;
4861
4862 // An alias of a CompareRI, but with different register sizes.
4863 class CompareAliasRI
4864 Immediate imm>
4865 : Alias<4, (outs), (ins cls:$R1, imm:$I2),
4866 [(set CC, (operator cls:$R1, imm:$I2))]> {
4867 let isCompare = 1;
4868 }
4869
4870 // An alias of a RotateSelectRIEf, but with different register sizes.
4871 class RotateSelectAliasRIEf
4872 : Alias<6, (outs cls1:$R1),
4873 (ins cls1:$R1src, cls2:$R2, imm32zx8:$I3, imm32zx8:$I4,
4874 imm32zx6:$I5), []> {
4875 let Constraints = "$R1 = $R1src";
4876 }
4877
4878 //===----------------------------------------------------------------------===//
4879 // Multiclasses that emit both real and pseudo instructions
4880 //===----------------------------------------------------------------------===//
4881
4882 multiclass BinaryRXYAndPseudo opcode,
4883 SDPatternOperator operator, RegisterOperand cls,
4884 SDPatternOperator load, bits<5> bytes,
4885 AddressingMode mode = bdxaddr20only> {
4886
4887 def "" : BinaryRXY {
4888 let MemKey = mnemonic#cls;
4889 let MemType = "target";
4890 }
4891 let Has20BitOffset = 1 in
4892 def _MemFoldPseudo : MemFoldPseudo;
4893 }
4894
4895 multiclass BinaryRXPairAndPseudo rxOpcode,
4896 bits<16> rxyOpcode, SDPatternOperator operator,
4897 RegisterOperand cls,
4898 SDPatternOperator load, bits<5> bytes> {
4899 let DispKey = mnemonic ## #cls in {
4900 def "" : BinaryRX
4901 bdxaddr12pair> {
4902 let DispSize = "12";
4903 let MemKey = mnemonic#cls;
4904 let MemType = "target";
4905 }
4906 let DispSize = "20" in
4907 def Y : BinaryRXY
4908 bytes, bdxaddr20pair>;
4909 }
4910 def _MemFoldPseudo : MemFoldPseudo;
4911 }
47764912
47774913 // Define an instruction that operates on two fixed-length blocks of memory,
47784914 // and associated pseudo instructions for operating on blocks of any size.
48254961 [(set GR64:$end, (operator GR64:$start1, GR64:$start2,
48264962 GR32:$char))]>;
48274963 }
4828
4829 // A pseudo instruction that is a direct alias of a real instruction.
4830 // These aliases are used in cases where a particular register operand is
4831 // fixed or where the same instruction is used with different register sizes.
4832 // The size parameter is the size in bytes of the associated real instruction.
4833 class Alias pattern>
4834 : InstSystemZ {
4835 let isPseudo = 1;
4836 let isCodeGenOnly = 1;
4837 }
4838
4839 class UnaryAliasVRS
4840 : Alias<6, (outs cls1:$src1), (ins cls2:$src2), []>;
4841
4842 // An alias of a UnaryVRR*, but with different register sizes.
4843 class UnaryAliasVRR
4844 : Alias<6, (outs tr1.op:$V1), (ins tr2.op:$V2),
4845 [(set (tr1.vt tr1.op:$V1), (operator (tr2.vt tr2.op:$V2)))]>;
4846
4847 // An alias of a UnaryVRX, but with different register sizes.
4848 class UnaryAliasVRX
4849 AddressingMode mode = bdxaddr12only>
4850 : Alias<6, (outs tr.op:$V1), (ins mode:$XBD2),
4851 [(set (tr.vt tr.op:$V1), (operator mode:$XBD2))]>;
4852
4853 // An alias of a StoreVRX, but with different register sizes.
4854 class StoreAliasVRX
4855 AddressingMode mode = bdxaddr12only>
4856 : Alias<6, (outs), (ins tr.op:$V1, mode:$XBD2),
4857 [(operator (tr.vt tr.op:$V1), mode:$XBD2)]>;
4858
4859 // An alias of a BinaryRI, but with different register sizes.
4860 class BinaryAliasRI
4861 Immediate imm>
4862 : Alias<4, (outs cls:$R1), (ins cls:$R1src, imm:$I2),
4863 [(set cls:$R1, (operator cls:$R1src, imm:$I2))]> {
4864 let Constraints = "$R1 = $R1src";
4865 }
4866
4867 // An alias of a BinaryRIL, but with different register sizes.
4868 class BinaryAliasRIL
4869 Immediate imm>
4870 : Alias<6, (outs cls:$R1), (ins cls:$R1src, imm:$I2),
4871 [(set cls:$R1, (operator cls:$R1src, imm:$I2))]> {
4872 let Constraints = "$R1 = $R1src";
4873 }
4874
4875 // An alias of a BinaryVRRf, but with different register sizes.
4876 class BinaryAliasVRRf
4877 : Alias<6, (outs VR128:$V1), (ins cls:$R2, cls:$R3), []>;
4878
4879 // An alias of a CompareRI, but with different register sizes.
4880 class CompareAliasRI
4881 Immediate imm>
4882 : Alias<4, (outs), (ins cls:$R1, imm:$I2),
4883 [(set CC, (operator cls:$R1, imm:$I2))]> {
4884 let isCompare = 1;
4885 }
4886
4887 // An alias of a RotateSelectRIEf, but with different register sizes.
4888 class RotateSelectAliasRIEf
4889 : Alias<6, (outs cls1:$R1),
4890 (ins cls1:$R1src, cls2:$R2, imm32zx8:$I3, imm32zx8:$I4,
4891 imm32zx6:$I5), []> {
4892 let Constraints = "$R1 = $R1src";
4893 }
956956 }
957957 }
958958
959 // Used to return from convertToThreeAddress after replacing two-address
960 // instruction OldMI with three-address instruction NewMI.
961 static MachineInstr *finishConvertToThreeAddress(MachineInstr *OldMI,
962 MachineInstr *NewMI,
963 LiveVariables *LV) {
964 if (LV) {
965 unsigned NumOps = OldMI->getNumOperands();
966 for (unsigned I = 1; I < NumOps; ++I) {
967 MachineOperand &Op = OldMI->getOperand(I);
968 if (Op.isReg() && Op.isKill())
969 LV->replaceKillInstruction(Op.getReg(), *OldMI, *NewMI);
970 }
971 }
972 transferDeadCC(OldMI, NewMI);
973 return NewMI;
974 }
975
976959 MachineInstr *SystemZInstrInfo::convertToThreeAddress(
977960 MachineFunction::iterator &MFI, MachineInstr &MI, LiveVariables *LV) const {
978961 MachineBasicBlock *MBB = MI.getParent();
979 MachineFunction *MF = MBB->getParent();
980 MachineRegisterInfo &MRI = MF->getRegInfo();
981
982 unsigned Opcode = MI.getOpcode();
983 unsigned NumOps = MI.getNumOperands();
984
985 // Try to convert something like SLL into SLLK, if supported.
986 // We prefer to keep the two-operand form where possible both
987 // because it tends to be shorter and because some instructions
988 // have memory forms that can be used during spilling.
989 if (STI.hasDistinctOps()) {
990 MachineOperand &Dest = MI.getOperand(0);
991 MachineOperand &Src = MI.getOperand(1);
992 unsigned DestReg = Dest.getReg();
993 unsigned SrcReg = Src.getReg();
994 // AHIMux is only really a three-operand instruction when both operands
995 // are low registers. Try to constrain both operands to be low if
996 // possible.
997 if (Opcode == SystemZ::AHIMux &&
998 TargetRegisterInfo::isVirtualRegister(DestReg) &&
999 TargetRegisterInfo::isVirtualRegister(SrcReg) &&
1000 MRI.getRegClass(DestReg)->contains(SystemZ::R1L) &&
1001 MRI.getRegClass(SrcReg)->contains(SystemZ::R1L)) {
1002 MRI.constrainRegClass(DestReg, &SystemZ::GR32BitRegClass);
1003 MRI.constrainRegClass(SrcReg, &SystemZ::GR32BitRegClass);
1004 }
1005 int ThreeOperandOpcode = SystemZ::getThreeOperandOpcode(Opcode);
1006 if (ThreeOperandOpcode >= 0) {
1007 // Create three address instruction without adding the implicit
1008 // operands. Those will instead be copied over from the original
1009 // instruction by the loop below.
1010 MachineInstrBuilder MIB(
1011 *MF, MF->CreateMachineInstr(get(ThreeOperandOpcode), MI.getDebugLoc(),
1012 /*NoImplicit=*/true));
1013 MIB.add(Dest);
1014 // Keep the kill state, but drop the tied flag.
1015 MIB.addReg(Src.getReg(), getKillRegState(Src.isKill()), Src.getSubReg());
1016 // Keep the remaining operands as-is.
1017 for (unsigned I = 2; I < NumOps; ++I)
1018 MIB.add(MI.getOperand(I));
1019 MBB->insert(MI, MIB);
1020 return finishConvertToThreeAddress(&MI, MIB, LV);
1021 }
1022 }
1023962
1024963 // Try to convert an AND into an RISBG-type instruction.
1025 if (LogicOp And = interpretAndImmediate(Opcode)) {
964 // TODO: It might be beneficial to select RISBG and shorten to AND instead.
965 if (LogicOp And = interpretAndImmediate(MI.getOpcode())) {
1026966 uint64_t Imm = MI.getOperand(2).getImm() << And.ImmLSB;
1027967 // AND IMMEDIATE leaves the other bits of the register unchanged.
1028968 Imm |= allOnes(And.RegSize) & ~(allOnes(And.ImmSize) << And.ImmLSB);
1050990 .addImm(Start)
1051991 .addImm(End + 128)
1052992 .addImm(0);
1053 return finishConvertToThreeAddress(&MI, MIB, LV);
993 if (LV) {
994 unsigned NumOps = MI.getNumOperands();
995 for (unsigned I = 1; I < NumOps; ++I) {
996 MachineOperand &Op = MI.getOperand(I);
997 if (Op.isReg() && Op.isKill())
998 LV->replaceKillInstruction(Op.getReg(), MI, *MIB);
999 }
1000 }
1001 transferDeadCC(&MI, MIB);
1002 return MIB;
10541003 }
10551004 }
10561005 return nullptr;
10591008 MachineInstr *SystemZInstrInfo::foldMemoryOperandImpl(
10601009 MachineFunction &MF, MachineInstr &MI, ArrayRef Ops,
10611010 MachineBasicBlock::iterator InsertPt, int FrameIndex,
1062 LiveIntervals *LIS) const {
1011 LiveIntervals *LIS, VirtRegMap *VRM) const {
10631012 const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo();
10641013 const MachineFrameInfo &MFI = MF.getFrameInfo();
10651014 unsigned Size = MFI.getObjectSize(FrameIndex);
12131162 }
12141163 }
12151164
1216 // If the spilled operand is the final one, try to change R
1217 // into .
1165 // If the spilled operand is the final one or the instruction is
1166 // commutable, try to change R into .
1167 unsigned NumOps = MI.getNumExplicitOperands();
12181168 int MemOpcode = SystemZ::getMemOpcode(Opcode);
1169
1170 // See if this is a 3-address instruction that is convertible to 2-address
1171 // and suitable for folding below. Only try this with virtual registers
1172 // and a provided VRM (during regalloc).
1173 bool NeedsCommute = false;
1174 if (SystemZ::getTwoOperandOpcode(Opcode) != -1 && MemOpcode != -1) {
1175 if (VRM == nullptr)
1176 MemOpcode = -1;
1177 else {
1178 assert(NumOps == 3 && "Expected two source registers.");
1179 unsigned DstReg = MI.getOperand(0).getReg();
1180 unsigned DstPhys =
1181 (TRI->isVirtualRegister(DstReg) ? VRM->getPhys(DstReg) : DstReg);
1182 unsigned SrcReg = (OpNum == 2 ? MI.getOperand(1).getReg()
1183 : ((OpNum == 1 && MI.isCommutable())
1184 ? MI.getOperand(2).getReg()
1185 : 0));
1186 if (DstPhys && !SystemZ::GRH32BitRegClass.contains(DstPhys) && SrcReg &&
1187 TRI->isVirtualRegister(SrcReg) && DstPhys == VRM->getPhys(SrcReg))
1188 NeedsCommute = (OpNum == 1);
1189 else
1190 MemOpcode = -1;
1191 }
1192 }
1193
12191194 if (MemOpcode >= 0) {
1220 unsigned NumOps = MI.getNumExplicitOperands();
1221 if (OpNum == NumOps - 1) {
1195 if ((OpNum == NumOps - 1) || NeedsCommute) {
12221196 const MCInstrDesc &MemDesc = get(MemOpcode);
12231197 uint64_t AccessBytes = SystemZII::getAccessSize(MemDesc.TSFlags);
12241198 assert(AccessBytes != 0 && "Size of access should be known");
12261200 uint64_t Offset = Size - AccessBytes;
12271201 MachineInstrBuilder MIB = BuildMI(*InsertPt->getParent(), InsertPt,
12281202 MI.getDebugLoc(), get(MemOpcode));
1229 for (unsigned I = 0; I < OpNum; ++I)
1230 MIB.add(MI.getOperand(I));
1203 MIB.add(MI.getOperand(0));
1204 if (NeedsCommute)
1205 MIB.add(MI.getOperand(2));
1206 else
1207 for (unsigned I = 1; I < OpNum; ++I)
1208 MIB.add(MI.getOperand(I));
12311209 MIB.addFrameIndex(FrameIndex).addImm(Offset);
12321210 if (MemDesc.TSFlags & SystemZII::HasIndex)
12331211 MIB.addReg(0);
139139 };
140140
141141 } // end namespace SystemZII
142
143 namespace SystemZ {
144 int getTwoOperandOpcode(uint16_t Opcode);
145 int getTargetMemOpcode(uint16_t Opcode);
146 }
142147
143148 class SystemZInstrInfo : public SystemZGenInstrInfo {
144149 const SystemZRegisterInfo RI;
247252 foldMemoryOperandImpl(MachineFunction &MF, MachineInstr &MI,
248253 ArrayRef Ops,
249254 MachineBasicBlock::iterator InsertPt, int FrameIndex,
250 LiveIntervals *LIS = nullptr) const override;
255 LiveIntervals *LIS = nullptr,
256 VirtRegMap *VRM = nullptr) const override;
251257 MachineInstr *foldMemoryOperandImpl(
252258 MachineFunction &MF, MachineInstr &MI, ArrayRef Ops,
253259 MachineBasicBlock::iterator InsertPt, MachineInstr &LoadMI,
915915
916916 // Addition of memory.
917917 defm AH : BinaryRXPair<"ah", 0x4A, 0xE37A, z_sadd, GR32, asextloadi16, 2>;
918 defm A : BinaryRXPair<"a", 0x5A, 0xE35A, z_sadd, GR32, load, 4>;
918 defm A : BinaryRXPairAndPseudo<"a", 0x5A, 0xE35A, z_sadd, GR32, load, 4>;
919919 def AGH : BinaryRXY<"agh", 0xE338, z_sadd, GR64, asextloadi16, 2>,
920920 Requires<[FeatureMiscellaneousExtensions2]>;
921921 def AGF : BinaryRXY<"agf", 0xE318, z_sadd, GR64, asextloadi32, 4>;
922 def AG : BinaryRXY<"ag", 0xE308, z_sadd, GR64, load, 8>;
922 defm AG : BinaryRXYAndPseudo<"ag", 0xE308, z_sadd, GR64, load, 8>;
923923
924924 // Addition to memory.
925925 def ASI : BinarySIY<"asi", 0xEB6A, add, imm32sx8>;
957957 Requires<[FeatureHighWord]>;
958958
959959 // Addition of memory.
960 defm AL : BinaryRXPair<"al", 0x5E, 0xE35E, z_uadd, GR32, load, 4>;
960 defm AL : BinaryRXPairAndPseudo<"al", 0x5E, 0xE35E, z_uadd, GR32, load, 4>;
961961 def ALGF : BinaryRXY<"algf", 0xE31A, z_uadd, GR64, azextloadi32, 4>;
962 def ALG : BinaryRXY<"alg", 0xE30A, z_uadd, GR64, load, 8>;
962 defm ALG : BinaryRXYAndPseudo<"alg", 0xE30A, z_uadd, GR64, load, 8>;
963963
964964 // Addition to memory.
965965 def ALSI : BinarySIY<"alsi", 0xEB6E, null_frag, imm32sx8>;
10021002
10031003 // Subtraction of memory.
10041004 defm SH : BinaryRXPair<"sh", 0x4B, 0xE37B, z_ssub, GR32, asextloadi16, 2>;
1005 defm S : BinaryRXPair<"s", 0x5B, 0xE35B, z_ssub, GR32, load, 4>;
1005 defm S : BinaryRXPairAndPseudo<"s", 0x5B, 0xE35B, z_ssub, GR32, load, 4>;
10061006 def SGH : BinaryRXY<"sgh", 0xE339, z_ssub, GR64, asextloadi16, 2>,
10071007 Requires<[FeatureMiscellaneousExtensions2]>;
10081008 def SGF : BinaryRXY<"sgf", 0xE319, z_ssub, GR64, asextloadi32, 4>;
1009 def SG : BinaryRXY<"sg", 0xE309, z_ssub, GR64, load, 8>;
1009 defm SG : BinaryRXYAndPseudo<"sg", 0xE309, z_ssub, GR64, load, 8>;
10101010 }
10111011 defm : SXB;
10121012
10541054 def SLGFI : BinaryRIL<"slgfi", 0xC24, z_usub, GR64, imm64zx32>;
10551055
10561056 // Subtraction of memory.
1057 defm SL : BinaryRXPair<"sl", 0x5F, 0xE35F, z_usub, GR32, load, 4>;
1057 defm SL : BinaryRXPairAndPseudo<"sl", 0x5F, 0xE35F, z_usub, GR32, load, 4>;
10581058 def SLGF : BinaryRXY<"slgf", 0xE31B, z_usub, GR64, azextloadi32, 4>;
1059 def SLG : BinaryRXY<"slg", 0xE30B, z_usub, GR64, load, 8>;
1059 defm SLG : BinaryRXYAndPseudo<"slg", 0xE30B, z_usub, GR64, load, 8>;
10601060 }
10611061 defm : ZXB;
10621062
11311131
11321132 // ANDs of memory.
11331133 let CCValues = 0xC, CompareZeroCCMask = 0x8 in {
1134 defm N : BinaryRXPair<"n", 0x54, 0xE354, and, GR32, load, 4>;
1135 def NG : BinaryRXY<"ng", 0xE380, and, GR64, load, 8>;
1134 defm N : BinaryRXPairAndPseudo<"n", 0x54, 0xE354, and, GR32, load, 4>;
1135 defm NG : BinaryRXYAndPseudo<"ng", 0xE380, and, GR64, load, 8>;
11361136 }
11371137
11381138 // AND to memory
11881188
11891189 // ORs of memory.
11901190 let CCValues = 0xC, CompareZeroCCMask = 0x8 in {
1191 defm O : BinaryRXPair<"o", 0x56, 0xE356, or, GR32, load, 4>;
1192 def OG : BinaryRXY<"og", 0xE381, or, GR64, load, 8>;
1191 defm O : BinaryRXPairAndPseudo<"o", 0x56, 0xE356, or, GR32, load, 4>;
1192 defm OG : BinaryRXYAndPseudo<"og", 0xE381, or, GR64, load, 8>;
11931193 }
11941194
11951195 // OR to memory
12281228
12291229 // XORs of memory.
12301230 let CCValues = 0xC, CompareZeroCCMask = 0x8 in {
1231 defm X : BinaryRXPair<"x",0x57, 0xE357, xor, GR32, load, 4>;
1232 def XG : BinaryRXY<"xg", 0xE382, xor, GR64, load, 8>;
1231 defm X : BinaryRXPairAndPseudo<"x",0x57, 0xE357, xor, GR32, load, 4>;
1232 defm XG : BinaryRXYAndPseudo<"xg", 0xE382, xor, GR64, load, 8>;
12331233 }
12341234
12351235 // XOR to memory
0 //==---- SystemZPostRewrite.cpp - Select pseudos after RegAlloc ---*- C++ -*-=//
1 //
2 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
3 // See https://llvm.org/LICENSE.txt for license information.
4 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
5 //
6 //===----------------------------------------------------------------------===//
7 //
8 // This file contains a pass that is run immediately after VirtRegRewriter
9 // but before MachineCopyPropagation. The purpose is to lower pseudos to
10 // target instructions before any later pass might substitute a register for
11 // another.
12 //
13 //===----------------------------------------------------------------------===//
14
15 #include "SystemZ.h"
16 #include "SystemZInstrInfo.h"
17 #include "SystemZSubtarget.h"
18 #include "llvm/ADT/Statistic.h"
19 #include "llvm/CodeGen/MachineFunctionPass.h"
20 #include "llvm/CodeGen/MachineInstrBuilder.h"
21 using namespace llvm;
22
23 #define SYSTEMZ_POSTREWRITE_NAME "SystemZ Post Rewrite pass"
24
25 #define DEBUG_TYPE "systemz-postrewrite"
26 STATISTIC(MemFoldCopies, "Number of copies inserted before folded mem ops.");
27
28 namespace llvm {
29 void initializeSystemZPostRewritePass(PassRegistry&);
30 }
31
32 namespace {
33
34 class SystemZPostRewrite : public MachineFunctionPass {
35 public:
36 static char ID;
37 SystemZPostRewrite() : MachineFunctionPass(ID) {
38 initializeSystemZPostRewritePass(*PassRegistry::getPassRegistry());
39 }
40
41 const SystemZInstrInfo *TII;
42
43 bool runOnMachineFunction(MachineFunction &Fn) override;
44
45 StringRef getPassName() const override { return SYSTEMZ_POSTREWRITE_NAME; }
46
47 void getAnalysisUsage(AnalysisUsage &AU) const override {
48 AU.setPreservesAll();
49 MachineFunctionPass::getAnalysisUsage(AU);
50 }
51
52 private:
53 bool selectMI(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI,
54 MachineBasicBlock::iterator &NextMBBI);
55 bool selectMBB(MachineBasicBlock &MBB);
56 };
57
58 char SystemZPostRewrite::ID = 0;
59
60 } // end anonymous namespace
61
62 INITIALIZE_PASS(SystemZPostRewrite, "systemz-post-rewrite",
63 SYSTEMZ_POSTREWRITE_NAME, false, false)
64
65 /// Returns an instance of the Post Rewrite pass.
66 FunctionPass *llvm::createSystemZPostRewritePass(SystemZTargetMachine &TM) {
67 return new SystemZPostRewrite();
68 }
69
70 /// If MBBI references a pseudo instruction that should be selected here,
71 /// do it and return true. Otherwise return false.
72 bool SystemZPostRewrite::selectMI(MachineBasicBlock &MBB,
73 MachineBasicBlock::iterator MBBI,
74 MachineBasicBlock::iterator &NextMBBI) {
75 MachineInstr &MI = *MBBI;
76 unsigned Opcode = MI.getOpcode();
77
78 // Note: If this could be done during regalloc in foldMemoryOperandImpl()
79 // while also updating the LiveIntervals, there would be no need for the
80 // MemFoldPseudo to begin with.
81 int TargetMemOpcode = SystemZ::getTargetMemOpcode(Opcode);
82 if (TargetMemOpcode != -1) {
83 MI.setDesc(TII->get(TargetMemOpcode));
84 MI.tieOperands(0, 1);
85 unsigned DstReg = MI.getOperand(0).getReg();
86 MachineOperand &SrcMO = MI.getOperand(1);
87 if (DstReg != SrcMO.getReg()) {
88 BuildMI(MBB, &MI, MI.getDebugLoc(), TII->get(SystemZ::COPY), DstReg)
89 .addReg(SrcMO.getReg());
90 SrcMO.setReg(DstReg);
91 MemFoldCopies++;
92 }
93 return true;
94 }
95
96 return false;
97 }
98
99 /// Iterate over the instructions in basic block MBB and select any
100 /// pseudo instructions. Return true if anything was modified.
101 bool SystemZPostRewrite::selectMBB(MachineBasicBlock &MBB) {
102 bool Modified = false;
103
104 MachineBasicBlock::iterator MBBI = MBB.begin(), E = MBB.end();
105 while (MBBI != E) {
106 MachineBasicBlock::iterator NMBBI = std::next(MBBI);
107 Modified |= selectMI(MBB, MBBI, NMBBI);
108 MBBI = NMBBI;
109 }
110
111 return Modified;
112 }
113
114 bool SystemZPostRewrite::runOnMachineFunction(MachineFunction &MF) {
115 TII = static_cast(MF.getSubtarget().getInstrInfo());
116
117 bool Modified = false;
118 for (auto &MBB : MF)
119 Modified |= selectMBB(MBB);
120
121 return Modified;
122 }
123
8080 const VirtRegMap *VRM,
8181 const LiveRegMatrix *Matrix) const {
8282 const MachineRegisterInfo *MRI = &MF.getRegInfo();
83 const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo();
83 const SystemZSubtarget &Subtarget = MF.getSubtarget();
84 const TargetRegisterInfo *TRI = Subtarget.getRegisterInfo();
8485
8586 bool BaseImplRetVal = TargetRegisterInfo::getRegAllocationHints(
8687 VirtReg, Order, Hints, MF, VRM, Matrix);
137138 }
138139 }
139140
141 if (VRM == nullptr)
142 return BaseImplRetVal;
143
144 // Add any two address hints after any copy hints.
145 SmallSet TwoAddrHints;
146 for (auto &Use : MRI->reg_nodbg_instructions(VirtReg))
147 if (SystemZ::getTwoOperandOpcode(Use.getOpcode()) != -1) {
148 const MachineOperand *VRRegMO = nullptr;
149 const MachineOperand *OtherMO = nullptr;
150 const MachineOperand *CommuMO = nullptr;
151 if (VirtReg == Use.getOperand(0).getReg()) {
152 VRRegMO = &Use.getOperand(0);
153 OtherMO = &Use.getOperand(1);
154 if (Use.isCommutable())
155 CommuMO = &Use.getOperand(2);
156 } else if (VirtReg == Use.getOperand(1).getReg()) {
157 VRRegMO = &Use.getOperand(1);
158 OtherMO = &Use.getOperand(0);
159 } else if (VirtReg == Use.getOperand(2).getReg() && Use.isCommutable()) {
160 VRRegMO = &Use.getOperand(2);
161 OtherMO = &Use.getOperand(0);
162 } else
163 continue;
164
165 auto tryAddHint = [&](const MachineOperand *MO) -> void {
166 unsigned Reg = MO->getReg();
167 unsigned PhysReg = isPhysicalRegister(Reg) ? Reg : VRM->getPhys(Reg);
168 if (PhysReg) {
169 if (MO->getSubReg())
170 PhysReg = getSubReg(PhysReg, MO->getSubReg());
171 if (VRRegMO->getSubReg())
172 PhysReg = getMatchingSuperReg(PhysReg, VRRegMO->getSubReg(),
173 MRI->getRegClass(VirtReg));
174 if (!MRI->isReserved(PhysReg) && !is_contained(Hints, PhysReg))
175 TwoAddrHints.insert(PhysReg);
176 }
177 };
178 tryAddHint(OtherMO);
179 if (CommuMO)
180 tryAddHint(CommuMO);
181 }
182 for (MCPhysReg OrderReg : Order)
183 if (TwoAddrHints.count(OrderReg))
184 Hints.push_back(OrderReg);
185
140186 return BaseImplRetVal;
141187 }
142188
298298 case SystemZ::VST64:
299299 Changed |= shortenOn0(MI, SystemZ::STD);
300300 break;
301
302 default: {
303 int TwoOperandOpcode = SystemZ::getTwoOperandOpcode(MI.getOpcode());
304 if (TwoOperandOpcode == -1)
305 break;
306
307 if ((MI.getOperand(0).getReg() != MI.getOperand(1).getReg()) &&
308 (!MI.isCommutable() ||
309 MI.getOperand(0).getReg() != MI.getOperand(2).getReg() ||
310 !TII->commuteInstruction(MI, false, 1, 2)))
311 break;
312
313 MI.setDesc(TII->get(TwoOperandOpcode));
314 MI.tieOperands(0, 1);
315 if (TwoOperandOpcode == SystemZ::SLL ||
316 TwoOperandOpcode == SystemZ::SLA ||
317 TwoOperandOpcode == SystemZ::SRL ||
318 TwoOperandOpcode == SystemZ::SRA) {
319 // These shifts only use the low 6 bits of the shift count.
320 MachineOperand &ImmMO = MI.getOperand(3);
321 ImmMO.setImm(ImmMO.getImm() & 0xfff);
322 }
323 Changed = true;
324 break;
325 }
301326 }
302327
303328 LiveRegs.stepBackward(MI);
182182 void addIRPasses() override;
183183 bool addInstSelector() override;
184184 bool addILPOpts() override;
185 void addPostRewrite() override;
185186 void addPreSched2() override;
186187 void addPreEmitPass() override;
187188 };
211212 return true;
212213 }
213214
215 void SystemZPassConfig::addPostRewrite() {
216 addPass(createSystemZPostRewritePass(getSystemZTargetMachine()));
217 }
218
214219 void SystemZPassConfig::addPreSched2() {
220 // PostRewrite needs to be run at -O0 also (in which case addPostRewrite()
221 // is not called).
222 if (getOptLevel() == CodeGenOpt::None)
223 addPass(createSystemZPostRewritePass(getSystemZTargetMachine()));
224
215225 addPass(createSystemZExpandPseudoPass(getSystemZTargetMachine()));
216226
217227 if (getOptLevel() != CodeGenOpt::None)
47824782 X86InstrInfo::foldMemoryOperandImpl(MachineFunction &MF, MachineInstr &MI,
47834783 ArrayRef Ops,
47844784 MachineBasicBlock::iterator InsertPt,
4785 int FrameIndex, LiveIntervals *LIS) const {
4785 int FrameIndex, LiveIntervals *LIS,
4786 VirtRegMap *VRM) const {
47864787 // Check switch flag
47874788 if (NoFusing)
47884789 return nullptr;
349349 foldMemoryOperandImpl(MachineFunction &MF, MachineInstr &MI,
350350 ArrayRef Ops,
351351 MachineBasicBlock::iterator InsertPt, int FrameIndex,
352 LiveIntervals *LIS = nullptr) const override;
352 LiveIntervals *LIS = nullptr,
353 VirtRegMap *VRM = nullptr) const override;
353354
354355 /// foldMemoryOperand - Same as the previous version except it allows folding
355356 /// of any load and store from / to any address, not just from a specific
602602 }
603603
604604 ; Test three-operand halfword immediate addition involving mixtures of low
605 ; and high registers. RISBHG/AIH would be OK too, instead of AHIK/RISBHG.
605 ; and high registers. AHIK/RISBHG would be OK too, instead of RISBHG/AIH.
606606 define i32 @f28(i32 %old) {
607607 ; CHECK-LABEL: f28:
608608 ; CHECK: ahik [[REG1:%r[0-5]]], %r2, 14
609609 ; CHECK: stepa %r2, [[REG1]]
610 ; CHECK: ahik [[TMP:%r[0-5]]], [[REG1]], 254
611 ; CHECK: risbhg [[REG2:%r[0-5]]], [[TMP]], 0, 159, 32
610 ; CHECK: risbhg [[REG1]], [[REG1]], 0, 159, 32
611 ; CHECK: aih [[REG1]], 254
612612 ; CHECK: stepb [[REG1]], [[REG2]]
613613 ; CHECK: risbhg [[REG3:%r[0-5]]], [[REG2]], 0, 159, 0
614614 ; CHECK: aih [[REG3]], 127
88 ; CHECK-LABEL: f1:
99 ; CHECK: popcnt %r0, %r2
1010 ; CHECK: sllk %r1, %r0, 16
11 ; CHECK: ar %r1, %r0
12 ; CHECK: sllk %r2, %r1, 8
13 ; CHECK: ar %r2, %r1
14 ; CHECK: srl %r2, 24
11 ; CHECK: ar %r0, %r1
12 ; CHECK: sllk %r1, %r0, 8
13 ; CHECK: ar %r0, %r1
14 ; CHECK: srlk %r2, %r0, 24
1515 ; CHECK: br %r14
1616
1717 %popcnt = call i32 @llvm.ctpop.i32(i32 %a)
2222 ; CHECK-LABEL: f2:
2323 ; CHECK: llhr %r0, %r2
2424 ; CHECK: popcnt %r0, %r0
25 ; CHECK: risblg %r2, %r0, 16, 151, 8
26 ; CHECK: ar %r2, %r0
27 ; CHECK: srl %r2, 8
25 ; CHECK: risblg %r1, %r0, 16, 151, 8
26 ; CHECK: ar %r0, %r1
27 ; CHECK: srlk %r2, %r0, 8
2828 ; CHECK: br %r14
2929 %and = and i32 %a, 65535
3030 %popcnt = call i32 @llvm.ctpop.i32(i32 %and)
4545 ; CHECK-LABEL: f4:
4646 ; CHECK: popcnt %r0, %r2
4747 ; CHECK: sllg %r1, %r0, 32
48 ; CHECK: agr %r1, %r0
49 ; CHECK: sllg %r0, %r1, 16
48 ; CHECK: agr %r0, %r1
49 ; CHECK: sllg %r1, %r0, 16
5050 ; CHECK: agr %r0, %r1
5151 ; CHECK: sllg %r1, %r0, 8
52 ; CHECK: agr %r1, %r0
53 ; CHECK: srlg %r2, %r1, 56
52 ; CHECK: agr %r0, %r1
53 ; CHECK: srlg %r2, %r0, 56
5454 ; CHECK: br %r14
5555 %popcnt = call i64 @llvm.ctpop.i64(i64 %a)
5656 ret i64 %popcnt
7575 ; CHECK: llghr %r0, %r2
7676 ; CHECK: popcnt %r0, %r0
7777 ; CHECK: risbg %r1, %r0, 48, 183, 8
78 ; CHECK: agr %r1, %r0
79 ; CHECK: srlg %r2, %r1, 8
78 ; CHECK: agr %r0, %r1
79 ; CHECK: srlg %r2, %r0, 8
8080 ; CHECK: br %r14
8181 %and = and i64 %a, 65535
8282 %popcnt = call i64 @llvm.ctpop.i64(i64 %and)
0 ; Test 64-bit addition in which the second operand is variable.
11 ;
2 ; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z10 | FileCheck %s
3 ; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z196 | FileCheck %s
2 ; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z10 | FileCheck %s --check-prefixes=CHECK,Z10
3 ; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z196 | FileCheck %s --check-prefixes=CHECK,Z196
44
55 declare i64 @foo()
66
9696 }
9797
9898 ; Check that additions of spilled values can use AG rather than AGR.
99 ; Note: Z196 is suboptimal with one unfolded reload.
99100 define i64 @f9(i64 *%ptr0) {
100101 ; CHECK-LABEL: f9:
101102 ; CHECK: brasl %r14, foo@PLT
102 ; CHECK: ag %r2, 160(%r15)
103 ; Z10: ag %r2, 168(%r15)
104 ; Z196: ag %r0, 168(%r15)
103105 ; CHECK: br %r14
104106 %ptr1 = getelementptr i64, i64 *%ptr0, i64 2
105107 %ptr2 = getelementptr i64, i64 *%ptr0, i64 4
0 ; Test of subtraction that involves a constant as the first operand
1 ;
2 ; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z196 | FileCheck %s
3
4 ; Check highest 16-bit signed int immediate value.
5 define i64 @f1(i64 %a) {
6 ; CHECK-LABEL: f1:
7 ; CHECK: lghi %r0, 32767
8 ; CHECK: sgrk %r2, %r0, %r2
9 ; CHECK: br %r14
10 %sub = sub i64 32767, %a
11 ret i64 %sub
12 }
13 ; Check highest 32-bit signed int immediate value.
14 define i64 @f2(i64 %a) {
15 ; CHECK-LABEL: f2:
16 ; CHECK: lgfi %r0, 2147483647
17 ; CHECK: sgrk %r2, %r0, %r2
18 ; CHECK: br %r14
19 %sub = sub i64 2147483647, %a
20 ret i64 %sub
21 }
5454 ; CHECK-LABEL: %bb.0:
5555 ; CHECK-NEXT: # kill
5656 ; CHECK-NEXT: llghr %r0, %r2
57 ; CHECK-NEXT: flogr %r2, %r0
58 ; CHECK-NEXT: aghi %r2, -32
59 ; CHECK-NEXT: ahi %r2, -16
60 ; CHECK-NEXT: # kill
57 ; CHECK-NEXT: flogr %r0, %r0
58 ; CHECK-NEXT: aghi %r0, -32
59 ; CHECK-NEXT: ahik %r2, %r0, -16
6160 ; CHECK-NEXT: br %r14
6261 %1 = tail call i16 @llvm.ctlz.i16(i16 %arg, i1 false)
6362 ret i16 %1
6867 ; CHECK-LABEL: %bb.0:
6968 ; CHECK-NEXT: # kill
7069 ; CHECK-NEXT: llghr %r0, %r2
71 ; CHECK-NEXT: flogr %r2, %r0
72 ; CHECK-NEXT: aghi %r2, -32
73 ; CHECK-NEXT: ahi %r2, -16
74 ; CHECK-NEXT: # kill
70 ; CHECK-NEXT: flogr %r0, %r0
71 ; CHECK-NEXT: aghi %r0, -32
72 ; CHECK-NEXT: ahik %r2, %r0, -16
7573 ; CHECK-NEXT: br %r14
7674 %1 = tail call i16 @llvm.ctlz.i16(i16 %arg, i1 true)
7775 ret i16 %1
8280 ; CHECK-LABEL: %bb.0:
8381 ; CHECK-NEXT: # kill
8482 ; CHECK-NEXT: llgcr %r0, %r2
85 ; CHECK-NEXT: flogr %r2, %r0
86 ; CHECK-NEXT: aghi %r2, -32
87 ; CHECK-NEXT: ahi %r2, -24
88 ; CHECK-NEXT: # kill
83 ; CHECK-NEXT: flogr %r0, %r0
84 ; CHECK-NEXT: aghi %r0, -32
85 ; CHECK-NEXT: ahik %r2, %r0, -24
8986 ; CHECK-NEXT: br %r14
9087 %1 = tail call i8 @llvm.ctlz.i8(i8 %arg, i1 false)
9188 ret i8 %1
9693 ; CHECK-LABEL: %bb.0:
9794 ; CHECK-NEXT: # kill
9895 ; CHECK-NEXT: llgcr %r0, %r2
99 ; CHECK-NEXT: flogr %r2, %r0
100 ; CHECK-NEXT: aghi %r2, -32
101 ; CHECK-NEXT: ahi %r2, -24
102 ; CHECK-NEXT: # kill
96 ; CHECK-NEXT: flogr %r0, %r0
97 ; CHECK-NEXT: aghi %r0, -32
98 ; CHECK-NEXT: ahik %r2, %r0, -24
10399 ; CHECK-NEXT: br %r14
104100 %1 = tail call i8 @llvm.ctlz.i8(i8 %arg, i1 true)
105101 ret i8 %1
7474 ; CHECK-NEXT: stmg %r14, %r15, 112(%r15)
7575 ; CHECK-NEXT: .cfi_offset %r14, -48
7676 ; CHECK-NEXT: .cfi_offset %r15, -40
77 ; CHECK-NEXT: vlgvf %r3, %v26, 1
78 ; CHECK-NEXT: vlgvf %r1, %v26, 2
79 ; CHECK-NEXT: risbgn %r4, %r3, 0, 129, 62
80 ; CHECK-NEXT: rosbg %r4, %r1, 2, 32, 31
77 ; CHECK-DAG: vlgvf [[REG11:%r[0-9]+]], %v26, 1
78 ; CHECK-DAG: vlgvf [[REG12:%r[0-9]+]], %v26, 2
79 ; CHECK-DAG: risbgn [[REG13:%r[0-9]+]], [[REG11]], 0, 129, 62
80 ; CHECK-DAG: rosbg [[REG13]], [[REG12]], 2, 32, 31
8181 ; CHECK-DAG: vlgvf %r0, %v26, 3
82 ; CHECK-DAG: rosbg %r4, %r0, 33, 63, 0
82 ; CHECK-DAG: rosbg [[REG13]], %r0, 33, 63, 0
8383 ; CHECK-DAG: stc %r0, 30(%r2)
84 ; CHECK-DAG: srl %r0, 8
84 ; CHECK-DAG: srlk %r1, %r0, 8
8585 ; CHECK-DAG: vlgvf [[REG0:%r[0-9]+]], %v24, 1
8686 ; CHECK-DAG: vlgvf [[REG1:%r[0-9]+]], %v24, 0
87 ; CHECK-DAG: sth %r0, 28(%r2)
87 ; CHECK-DAG: sth %r1, 28(%r2)
8888 ; CHECK-DAG: vlgvf [[REG2:%r[0-9]+]], %v24, 2
8989 ; CHECK-DAG: risbgn [[REG3:%r[0-9]+]], [[REG0]], 0, 133, 58
9090 ; CHECK-DAG: rosbg [[REG3]], [[REG2]], 6, 36, 27
9494 ; CHECK-DAG: rosbg [[REG3]], [[REG5]], 37, 63, 60
9595 ; CHECK-DAG: sllg [[REG6:%r[0-9]+]], [[REG4]], 8
9696 ; CHECK-DAG: rosbg [[REG6]], [[REG3]], 56, 63, 8
97 ; CHECK-NEXT: stg [[REG6]], 0(%r2)
98 ; CHECK-NEXT: srlg [[REG7:%r[0-9]+]], %r4, 24
99 ; CHECK-NEXT: st [[REG7]], 24(%r2)
100 ; CHECK-NEXT: vlgvf [[REG8:%r[0-9]+]], %v26, 0
101 ; CHECK-NEXT: risbgn [[REG10:%r[0-9]+]], [[REG5]], 0, 131, 60
102 ; CHECK-NEXT: rosbg [[REG10]], [[REG8]], 4, 34, 29
103 ; CHECK-NEXT: sllg [[REG9:%r[0-9]+]], [[REG3]], 8
104 ; CHECK-NEXT: rosbg [[REG10]], %r3, 35, 63, 62
105 ; CHECK-NEXT: rosbg [[REG9]], [[REG10]], 56, 63, 8
106 ; CHECK-NEXT: stg [[REG9]], 8(%r2)
107 ; CHECK-NEXT: sllg %r0, [[REG10]], 8
108 ; CHECK-NEXT: rosbg %r0, %r4, 56, 63, 8
97 ; CHECK-DAG: stg [[REG6]], 0(%r2)
98 ; CHECK-DAG: srlg [[REG7:%r[0-9]+]], [[REG13]], 24
99 ; CHECK-DAG: st [[REG7]], 24(%r2)
100 ; CHECK-DAG: vlgvf [[REG8:%r[0-9]+]], %v26, 0
101 ; CHECK-DAG: risbgn [[REG10:%r[0-9]+]], [[REG5]], 0, 131, 60
102 ; CHECK-DAG: rosbg [[REG10]], [[REG8]], 4, 34, 29
103 ; CHECK-DAG: sllg [[REG9:%r[0-9]+]], [[REG3]], 8
104 ; CHECK-DAG: rosbg [[REG10]], [[REG11]], 35, 63, 62
105 ; CHECK-DAG: rosbg [[REG9]], [[REG10]], 56, 63, 8
106 ; CHECK-DAG: stg [[REG9]], 8(%r2)
107 ; CHECK-DAG: sllg %r0, [[REG10]], 8
108 ; CHECK-DAG: rosbg %r0, [[REG13]], 56, 63, 8
109109 ; CHECK-NEXT: stg %r0, 16(%r2)
110110 ; CHECK-NEXT: lmg %r14, %r15, 112(%r15)
111111 ; CHECK-NEXT: br %r14
407407 ; CHECK-NOT: vmrh
408408 ; CHECK: ar {{%r[0-5]}},
409409 ; CHECK: ar {{%r[0-5]}},
410 ; CHECK: or %r2,
410 ; CHECK: ork %r2,
411411 ; CHECK: br %r14
412412 %vec0 = insertelement <2 x double> undef, double %scalar0, i32 0
413413 %vec1 = insertelement <2 x double> undef, double %scalar1, i32 0