llvm.org GIT mirror llvm / 2ce77c8
[RISCV] Custom lower SHL_PARTS, SRA_PARTS, SRL_PARTS When not optimizing for minimum size (-Oz) we custom lower wide shifts (SHL_PARTS, SRA_PARTS, SRL_PARTS) instead of expanding to a libcall. Differential Revision: https://reviews.llvm.org/D59477 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@358498 91177308-0d34-0410-b5e6-96231b3b80d8 Luis Marques 1 year, 7 months ago
4 changed file(s) with 407 addition(s) and 18 deletion(s). Raw diff Collapse all Expand all
124124 setOperationAction(ISD::SMUL_LOHI, XLenVT, Expand);
125125 setOperationAction(ISD::UMUL_LOHI, XLenVT, Expand);
126126
127 setOperationAction(ISD::SHL_PARTS, XLenVT, Expand);
128 setOperationAction(ISD::SRL_PARTS, XLenVT, Expand);
129 setOperationAction(ISD::SRA_PARTS, XLenVT, Expand);
127 setOperationAction(ISD::SHL_PARTS, XLenVT, Custom);
128 setOperationAction(ISD::SRL_PARTS, XLenVT, Custom);
129 setOperationAction(ISD::SRA_PARTS, XLenVT, Custom);
130130
131131 setOperationAction(ISD::ROTL, XLenVT, Expand);
132132 setOperationAction(ISD::ROTR, XLenVT, Expand);
359359 return lowerFRAMEADDR(Op, DAG);
360360 case ISD::RETURNADDR:
361361 return lowerRETURNADDR(Op, DAG);
362 case ISD::SHL_PARTS:
363 return lowerShiftLeftParts(Op, DAG);
364 case ISD::SRA_PARTS:
365 return lowerShiftRightParts(Op, DAG, true);
366 case ISD::SRL_PARTS:
367 return lowerShiftRightParts(Op, DAG, false);
362368 case ISD::BITCAST: {
363369 assert(Subtarget.is64Bit() && Subtarget.hasStdExtF() &&
364370 "Unexpected custom legalisation");
565571 // live-in.
566572 unsigned Reg = MF.addLiveIn(RI.getRARegister(), getRegClassFor(XLenVT));
567573 return DAG.getCopyFromReg(DAG.getEntryNode(), DL, Reg, XLenVT);
574 }
575
576 SDValue RISCVTargetLowering::lowerShiftLeftParts(SDValue Op,
577 SelectionDAG &DAG) const {
578 SDLoc DL(Op);
579 SDValue Lo = Op.getOperand(0);
580 SDValue Hi = Op.getOperand(1);
581 SDValue Shamt = Op.getOperand(2);
582 EVT VT = Lo.getValueType();
583
584 // if Shamt-XLEN < 0: // Shamt < XLEN
585 // Lo = Lo << Shamt
586 // Hi = (Hi << Shamt) | ((Lo >>u 1) >>u (XLEN-1 - Shamt))
587 // else:
588 // Lo = 0
589 // Hi = Lo << (Shamt-XLEN)
590
591 SDValue Zero = DAG.getConstant(0, DL, VT);
592 SDValue One = DAG.getConstant(1, DL, VT);
593 SDValue MinusXLen = DAG.getConstant(-(int)Subtarget.getXLen(), DL, VT);
594 SDValue XLenMinus1 = DAG.getConstant(Subtarget.getXLen() - 1, DL, VT);
595 SDValue ShamtMinusXLen = DAG.getNode(ISD::ADD, DL, VT, Shamt, MinusXLen);
596 SDValue XLenMinus1Shamt = DAG.getNode(ISD::SUB, DL, VT, XLenMinus1, Shamt);
597
598 SDValue LoTrue = DAG.getNode(ISD::SHL, DL, VT, Lo, Shamt);
599 SDValue ShiftRight1Lo = DAG.getNode(ISD::SRL, DL, VT, Lo, One);
600 SDValue ShiftRightLo =
601 DAG.getNode(ISD::SRL, DL, VT, ShiftRight1Lo, XLenMinus1Shamt);
602 SDValue ShiftLeftHi = DAG.getNode(ISD::SHL, DL, VT, Hi, Shamt);
603 SDValue HiTrue = DAG.getNode(ISD::OR, DL, VT, ShiftLeftHi, ShiftRightLo);
604 SDValue HiFalse = DAG.getNode(ISD::SHL, DL, VT, Lo, ShamtMinusXLen);
605
606 SDValue CC = DAG.getSetCC(DL, VT, ShamtMinusXLen, Zero, ISD::SETLT);
607
608 Lo = DAG.getNode(ISD::SELECT, DL, VT, CC, LoTrue, Zero);
609 Hi = DAG.getNode(ISD::SELECT, DL, VT, CC, HiTrue, HiFalse);
610
611 SDValue Parts[2] = {Lo, Hi};
612 return DAG.getMergeValues(Parts, DL);
613 }
614
615 SDValue RISCVTargetLowering::lowerShiftRightParts(SDValue Op, SelectionDAG &DAG,
616 bool IsSRA) const {
617 SDLoc DL(Op);
618 SDValue Lo = Op.getOperand(0);
619 SDValue Hi = Op.getOperand(1);
620 SDValue Shamt = Op.getOperand(2);
621 EVT VT = Lo.getValueType();
622
623 // SRA expansion:
624 // if Shamt-XLEN < 0: // Shamt < XLEN
625 // Lo = (Lo >>u Shamt) | ((Hi << 1) << (XLEN-1 - Shamt))
626 // Hi = Hi >>s Shamt
627 // else:
628 // Lo = Hi >>s (Shamt-XLEN);
629 // Hi = Hi >>s (XLEN-1)
630 //
631 // SRL expansion:
632 // if Shamt-XLEN < 0: // Shamt < XLEN
633 // Lo = (Lo >>u Shamt) | ((Hi << 1) << (XLEN-1 - Shamt))
634 // Hi = Hi >>u Shamt
635 // else:
636 // Lo = Hi >>u (Shamt-XLEN);
637 // Hi = 0;
638
639 unsigned ShiftRightOp = IsSRA ? ISD::SRA : ISD::SRL;
640
641 SDValue Zero = DAG.getConstant(0, DL, VT);
642 SDValue One = DAG.getConstant(1, DL, VT);
643 SDValue MinusXLen = DAG.getConstant(-(int)Subtarget.getXLen(), DL, VT);
644 SDValue XLenMinus1 = DAG.getConstant(Subtarget.getXLen() - 1, DL, VT);
645 SDValue ShamtMinusXLen = DAG.getNode(ISD::ADD, DL, VT, Shamt, MinusXLen);
646 SDValue XLenMinus1Shamt = DAG.getNode(ISD::SUB, DL, VT, XLenMinus1, Shamt);
647
648 SDValue ShiftRightLo = DAG.getNode(ISD::SRL, DL, VT, Lo, Shamt);
649 SDValue ShiftLeftHi1 = DAG.getNode(ISD::SHL, DL, VT, Hi, One);
650 SDValue ShiftLeftHi =
651 DAG.getNode(ISD::SHL, DL, VT, ShiftLeftHi1, XLenMinus1Shamt);
652 SDValue LoTrue = DAG.getNode(ISD::OR, DL, VT, ShiftRightLo, ShiftLeftHi);
653 SDValue HiTrue = DAG.getNode(ShiftRightOp, DL, VT, Hi, Shamt);
654 SDValue LoFalse = DAG.getNode(ShiftRightOp, DL, VT, Hi, ShamtMinusXLen);
655 SDValue HiFalse =
656 IsSRA ? DAG.getNode(ISD::SRA, DL, VT, Hi, XLenMinus1) : Zero;
657
658 SDValue CC = DAG.getSetCC(DL, VT, ShamtMinusXLen, Zero, ISD::SETLT);
659
660 Lo = DAG.getNode(ISD::SELECT, DL, VT, CC, LoTrue, LoFalse);
661 Hi = DAG.getNode(ISD::SELECT, DL, VT, CC, HiTrue, HiFalse);
662
663 SDValue Parts[2] = {Lo, Hi};
664 return DAG.getMergeValues(Parts, DL);
568665 }
569666
570667 // Returns the opcode of the target-specific SDNode that implements the 32-bit
113113 return ISD::SIGN_EXTEND;
114114 }
115115
116 bool shouldExpandShift(SelectionDAG &DAG, SDNode *N) const override {
117 if (DAG.getMachineFunction().getFunction().hasMinSize())
118 return false;
119 return true;
120 }
121
116122 private:
117123 void analyzeInputArgs(MachineFunction &MF, CCState &CCInfo,
118124 const SmallVectorImpl &Ins,
151157 SDValue lowerVASTART(SDValue Op, SelectionDAG &DAG) const;
152158 SDValue lowerFRAMEADDR(SDValue Op, SelectionDAG &DAG) const;
153159 SDValue lowerRETURNADDR(SDValue Op, SelectionDAG &DAG) const;
160 SDValue lowerShiftLeftParts(SDValue Op, SelectionDAG &DAG) const;
161 SDValue lowerShiftRightParts(SDValue Op, SelectionDAG &DAG, bool IsSRA) const;
154162
155163 bool isEligibleForTailCallOptimization(
156164 CCState &CCInfo, CallLoweringInfo &CLI, MachineFunction &MF,
210210 ;
211211 ; RV32I-LABEL: sll:
212212 ; RV32I: # %bb.0:
213 ; RV32I-NEXT: addi sp, sp, -16
214 ; RV32I-NEXT: sw ra, 12(sp)
215 ; RV32I-NEXT: call __ashldi3
216 ; RV32I-NEXT: lw ra, 12(sp)
217 ; RV32I-NEXT: addi sp, sp, 16
213 ; RV32I-NEXT: addi a3, a2, -32
214 ; RV32I-NEXT: bltz a3, .LBB11_2
215 ; RV32I-NEXT: # %bb.1:
216 ; RV32I-NEXT: sll a1, a0, a3
217 ; RV32I-NEXT: mv a0, zero
218 ; RV32I-NEXT: ret
219 ; RV32I-NEXT: .LBB11_2:
220 ; RV32I-NEXT: addi a3, zero, 31
221 ; RV32I-NEXT: sub a3, a3, a2
222 ; RV32I-NEXT: srli a4, a0, 1
223 ; RV32I-NEXT: srl a3, a4, a3
224 ; RV32I-NEXT: sll a1, a1, a2
225 ; RV32I-NEXT: or a1, a1, a3
226 ; RV32I-NEXT: sll a0, a0, a2
218227 ; RV32I-NEXT: ret
219228 %1 = shl i64 %a, %b
220229 ret i64 %1
287296 ;
288297 ; RV32I-LABEL: srl:
289298 ; RV32I: # %bb.0:
290 ; RV32I-NEXT: addi sp, sp, -16
291 ; RV32I-NEXT: sw ra, 12(sp)
292 ; RV32I-NEXT: call __lshrdi3
293 ; RV32I-NEXT: lw ra, 12(sp)
294 ; RV32I-NEXT: addi sp, sp, 16
299 ; RV32I-NEXT: addi a3, a2, -32
300 ; RV32I-NEXT: bltz a3, .LBB15_2
301 ; RV32I-NEXT: # %bb.1:
302 ; RV32I-NEXT: srl a0, a1, a3
303 ; RV32I-NEXT: mv a1, zero
304 ; RV32I-NEXT: ret
305 ; RV32I-NEXT: .LBB15_2:
306 ; RV32I-NEXT: addi a3, zero, 31
307 ; RV32I-NEXT: sub a3, a3, a2
308 ; RV32I-NEXT: slli a4, a1, 1
309 ; RV32I-NEXT: sll a3, a4, a3
310 ; RV32I-NEXT: srl a0, a0, a2
311 ; RV32I-NEXT: or a0, a0, a3
312 ; RV32I-NEXT: srl a1, a1, a2
295313 ; RV32I-NEXT: ret
296314 %1 = lshr i64 %a, %b
297315 ret i64 %1
305323 ;
306324 ; RV32I-LABEL: sra:
307325 ; RV32I: # %bb.0:
308 ; RV32I-NEXT: addi sp, sp, -16
309 ; RV32I-NEXT: sw ra, 12(sp)
310 ; RV32I-NEXT: call __ashrdi3
311 ; RV32I-NEXT: lw ra, 12(sp)
312 ; RV32I-NEXT: addi sp, sp, 16
326 ; RV32I-NEXT: addi a3, a2, -32
327 ; RV32I-NEXT: bltz a3, .LBB16_2
328 ; RV32I-NEXT: # %bb.1:
329 ; RV32I-NEXT: sra a0, a1, a3
330 ; RV32I-NEXT: srai a1, a1, 31
331 ; RV32I-NEXT: ret
332 ; RV32I-NEXT: .LBB16_2:
333 ; RV32I-NEXT: addi a3, zero, 31
334 ; RV32I-NEXT: sub a3, a3, a2
335 ; RV32I-NEXT: slli a4, a1, 1
336 ; RV32I-NEXT: sll a3, a4, a3
337 ; RV32I-NEXT: srl a0, a0, a2
338 ; RV32I-NEXT: or a0, a0, a3
339 ; RV32I-NEXT: sra a1, a1, a2
313340 ; RV32I-NEXT: ret
314341 %1 = ashr i64 %a, %b
315342 ret i64 %1
0 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
11 ; RUN: llc -mtriple=riscv32 -verify-machineinstrs < %s \
22 ; RUN: | FileCheck %s -check-prefix=RV32I
3 ; RUN: llc -mtriple=riscv64 -verify-machineinstrs < %s \
4 ; RUN: | FileCheck %s -check-prefix=RV64I
35
46 ; Basic shift support is tested as part of ALU.ll. This file ensures that
57 ; shifts which may not be supported natively are lowered properly.
68
79 define i64 @lshr64(i64 %a, i64 %b) nounwind {
810 ; RV32I-LABEL: lshr64:
11 ; RV32I: # %bb.0:
12 ; RV32I-NEXT: addi a3, a2, -32
13 ; RV32I-NEXT: bltz a3, .LBB0_2
14 ; RV32I-NEXT: # %bb.1:
15 ; RV32I-NEXT: srl a0, a1, a3
16 ; RV32I-NEXT: mv a1, zero
17 ; RV32I-NEXT: ret
18 ; RV32I-NEXT: .LBB0_2:
19 ; RV32I-NEXT: addi a3, zero, 31
20 ; RV32I-NEXT: sub a3, a3, a2
21 ; RV32I-NEXT: slli a4, a1, 1
22 ; RV32I-NEXT: sll a3, a4, a3
23 ; RV32I-NEXT: srl a0, a0, a2
24 ; RV32I-NEXT: or a0, a0, a3
25 ; RV32I-NEXT: srl a1, a1, a2
26 ; RV32I-NEXT: ret
27 ;
28 ; RV64I-LABEL: lshr64:
29 ; RV64I: # %bb.0:
30 ; RV64I-NEXT: srl a0, a0, a1
31 ; RV64I-NEXT: ret
32 %1 = lshr i64 %a, %b
33 ret i64 %1
34 }
35
36 define i64 @lshr64_minsize(i64 %a, i64 %b) minsize nounwind {
37 ; RV32I-LABEL: lshr64_minsize:
938 ; RV32I: # %bb.0:
1039 ; RV32I-NEXT: addi sp, sp, -16
1140 ; RV32I-NEXT: sw ra, 12(sp)
1342 ; RV32I-NEXT: lw ra, 12(sp)
1443 ; RV32I-NEXT: addi sp, sp, 16
1544 ; RV32I-NEXT: ret
45 ;
46 ; RV64I-LABEL: lshr64_minsize:
47 ; RV64I: # %bb.0:
48 ; RV64I-NEXT: srl a0, a0, a1
49 ; RV64I-NEXT: ret
1650 %1 = lshr i64 %a, %b
1751 ret i64 %1
1852 }
1953
2054 define i64 @ashr64(i64 %a, i64 %b) nounwind {
2155 ; RV32I-LABEL: ashr64:
56 ; RV32I: # %bb.0:
57 ; RV32I-NEXT: addi a3, a2, -32
58 ; RV32I-NEXT: bltz a3, .LBB2_2
59 ; RV32I-NEXT: # %bb.1:
60 ; RV32I-NEXT: sra a0, a1, a3
61 ; RV32I-NEXT: srai a1, a1, 31
62 ; RV32I-NEXT: ret
63 ; RV32I-NEXT: .LBB2_2:
64 ; RV32I-NEXT: addi a3, zero, 31
65 ; RV32I-NEXT: sub a3, a3, a2
66 ; RV32I-NEXT: slli a4, a1, 1
67 ; RV32I-NEXT: sll a3, a4, a3
68 ; RV32I-NEXT: srl a0, a0, a2
69 ; RV32I-NEXT: or a0, a0, a3
70 ; RV32I-NEXT: sra a1, a1, a2
71 ; RV32I-NEXT: ret
72 ;
73 ; RV64I-LABEL: ashr64:
74 ; RV64I: # %bb.0:
75 ; RV64I-NEXT: sra a0, a0, a1
76 ; RV64I-NEXT: ret
77 %1 = ashr i64 %a, %b
78 ret i64 %1
79 }
80
81 define i64 @ashr64_minsize(i64 %a, i64 %b) minsize nounwind {
82 ; RV32I-LABEL: ashr64_minsize:
2283 ; RV32I: # %bb.0:
2384 ; RV32I-NEXT: addi sp, sp, -16
2485 ; RV32I-NEXT: sw ra, 12(sp)
2687 ; RV32I-NEXT: lw ra, 12(sp)
2788 ; RV32I-NEXT: addi sp, sp, 16
2889 ; RV32I-NEXT: ret
90 ;
91 ; RV64I-LABEL: ashr64_minsize:
92 ; RV64I: # %bb.0:
93 ; RV64I-NEXT: sra a0, a0, a1
94 ; RV64I-NEXT: ret
2995 %1 = ashr i64 %a, %b
3096 ret i64 %1
3197 }
3298
3399 define i64 @shl64(i64 %a, i64 %b) nounwind {
34100 ; RV32I-LABEL: shl64:
101 ; RV32I: # %bb.0:
102 ; RV32I-NEXT: addi a3, a2, -32
103 ; RV32I-NEXT: bltz a3, .LBB4_2
104 ; RV32I-NEXT: # %bb.1:
105 ; RV32I-NEXT: sll a1, a0, a3
106 ; RV32I-NEXT: mv a0, zero
107 ; RV32I-NEXT: ret
108 ; RV32I-NEXT: .LBB4_2:
109 ; RV32I-NEXT: addi a3, zero, 31
110 ; RV32I-NEXT: sub a3, a3, a2
111 ; RV32I-NEXT: srli a4, a0, 1
112 ; RV32I-NEXT: srl a3, a4, a3
113 ; RV32I-NEXT: sll a1, a1, a2
114 ; RV32I-NEXT: or a1, a1, a3
115 ; RV32I-NEXT: sll a0, a0, a2
116 ; RV32I-NEXT: ret
117 ;
118 ; RV64I-LABEL: shl64:
119 ; RV64I: # %bb.0:
120 ; RV64I-NEXT: sll a0, a0, a1
121 ; RV64I-NEXT: ret
122 %1 = shl i64 %a, %b
123 ret i64 %1
124 }
125
126 define i64 @shl64_minsize(i64 %a, i64 %b) minsize nounwind {
127 ; RV32I-LABEL: shl64_minsize:
35128 ; RV32I: # %bb.0:
36129 ; RV32I-NEXT: addi sp, sp, -16
37130 ; RV32I-NEXT: sw ra, 12(sp)
39132 ; RV32I-NEXT: lw ra, 12(sp)
40133 ; RV32I-NEXT: addi sp, sp, 16
41134 ; RV32I-NEXT: ret
135 ;
136 ; RV64I-LABEL: shl64_minsize:
137 ; RV64I: # %bb.0:
138 ; RV64I-NEXT: sll a0, a0, a1
139 ; RV64I-NEXT: ret
42140 %1 = shl i64 %a, %b
43141 ret i64 %1
44142 }
143
144 define i128 @lshr128(i128 %a, i128 %b) nounwind {
145 ; RV32I-LABEL: lshr128:
146 ; RV32I: # %bb.0:
147 ; RV32I-NEXT: addi sp, sp, -48
148 ; RV32I-NEXT: sw ra, 44(sp)
149 ; RV32I-NEXT: sw s0, 40(sp)
150 ; RV32I-NEXT: mv s0, a0
151 ; RV32I-NEXT: lw a0, 12(a1)
152 ; RV32I-NEXT: sw a0, 20(sp)
153 ; RV32I-NEXT: lw a0, 8(a1)
154 ; RV32I-NEXT: sw a0, 16(sp)
155 ; RV32I-NEXT: lw a0, 4(a1)
156 ; RV32I-NEXT: sw a0, 12(sp)
157 ; RV32I-NEXT: lw a0, 0(a1)
158 ; RV32I-NEXT: sw a0, 8(sp)
159 ; RV32I-NEXT: lw a2, 0(a2)
160 ; RV32I-NEXT: addi a0, sp, 24
161 ; RV32I-NEXT: addi a1, sp, 8
162 ; RV32I-NEXT: call __lshrti3
163 ; RV32I-NEXT: lw a0, 36(sp)
164 ; RV32I-NEXT: sw a0, 12(s0)
165 ; RV32I-NEXT: lw a0, 32(sp)
166 ; RV32I-NEXT: sw a0, 8(s0)
167 ; RV32I-NEXT: lw a0, 28(sp)
168 ; RV32I-NEXT: sw a0, 4(s0)
169 ; RV32I-NEXT: lw a0, 24(sp)
170 ; RV32I-NEXT: sw a0, 0(s0)
171 ; RV32I-NEXT: lw s0, 40(sp)
172 ; RV32I-NEXT: lw ra, 44(sp)
173 ; RV32I-NEXT: addi sp, sp, 48
174 ; RV32I-NEXT: ret
175 ;
176 ; RV64I-LABEL: lshr128:
177 ; RV64I: # %bb.0:
178 ; RV64I-NEXT: addi a3, a2, -64
179 ; RV64I-NEXT: bltz a3, .LBB6_2
180 ; RV64I-NEXT: # %bb.1:
181 ; RV64I-NEXT: srl a0, a1, a3
182 ; RV64I-NEXT: mv a1, zero
183 ; RV64I-NEXT: ret
184 ; RV64I-NEXT: .LBB6_2:
185 ; RV64I-NEXT: addi a3, zero, 63
186 ; RV64I-NEXT: sub a3, a3, a2
187 ; RV64I-NEXT: slli a4, a1, 1
188 ; RV64I-NEXT: sll a3, a4, a3
189 ; RV64I-NEXT: srl a0, a0, a2
190 ; RV64I-NEXT: or a0, a0, a3
191 ; RV64I-NEXT: srl a1, a1, a2
192 ; RV64I-NEXT: ret
193 %1 = lshr i128 %a, %b
194 ret i128 %1
195 }
196
197 define i128 @ashr128(i128 %a, i128 %b) nounwind {
198 ; RV32I-LABEL: ashr128:
199 ; RV32I: # %bb.0:
200 ; RV32I-NEXT: addi sp, sp, -48
201 ; RV32I-NEXT: sw ra, 44(sp)
202 ; RV32I-NEXT: sw s0, 40(sp)
203 ; RV32I-NEXT: mv s0, a0
204 ; RV32I-NEXT: lw a0, 12(a1)
205 ; RV32I-NEXT: sw a0, 20(sp)
206 ; RV32I-NEXT: lw a0, 8(a1)
207 ; RV32I-NEXT: sw a0, 16(sp)
208 ; RV32I-NEXT: lw a0, 4(a1)
209 ; RV32I-NEXT: sw a0, 12(sp)
210 ; RV32I-NEXT: lw a0, 0(a1)
211 ; RV32I-NEXT: sw a0, 8(sp)
212 ; RV32I-NEXT: lw a2, 0(a2)
213 ; RV32I-NEXT: addi a0, sp, 24
214 ; RV32I-NEXT: addi a1, sp, 8
215 ; RV32I-NEXT: call __ashrti3
216 ; RV32I-NEXT: lw a0, 36(sp)
217 ; RV32I-NEXT: sw a0, 12(s0)
218 ; RV32I-NEXT: lw a0, 32(sp)
219 ; RV32I-NEXT: sw a0, 8(s0)
220 ; RV32I-NEXT: lw a0, 28(sp)
221 ; RV32I-NEXT: sw a0, 4(s0)
222 ; RV32I-NEXT: lw a0, 24(sp)
223 ; RV32I-NEXT: sw a0, 0(s0)
224 ; RV32I-NEXT: lw s0, 40(sp)
225 ; RV32I-NEXT: lw ra, 44(sp)
226 ; RV32I-NEXT: addi sp, sp, 48
227 ; RV32I-NEXT: ret
228 ;
229 ; RV64I-LABEL: ashr128:
230 ; RV64I: # %bb.0:
231 ; RV64I-NEXT: addi a3, a2, -64
232 ; RV64I-NEXT: bltz a3, .LBB7_2
233 ; RV64I-NEXT: # %bb.1:
234 ; RV64I-NEXT: sra a0, a1, a3
235 ; RV64I-NEXT: srai a1, a1, 63
236 ; RV64I-NEXT: ret
237 ; RV64I-NEXT: .LBB7_2:
238 ; RV64I-NEXT: addi a3, zero, 63
239 ; RV64I-NEXT: sub a3, a3, a2
240 ; RV64I-NEXT: slli a4, a1, 1
241 ; RV64I-NEXT: sll a3, a4, a3
242 ; RV64I-NEXT: srl a0, a0, a2
243 ; RV64I-NEXT: or a0, a0, a3
244 ; RV64I-NEXT: sra a1, a1, a2
245 ; RV64I-NEXT: ret
246 %1 = ashr i128 %a, %b
247 ret i128 %1
248 }
249
250 define i128 @shl128(i128 %a, i128 %b) nounwind {
251 ; RV32I-LABEL: shl128:
252 ; RV32I: # %bb.0:
253 ; RV32I-NEXT: addi sp, sp, -48
254 ; RV32I-NEXT: sw ra, 44(sp)
255 ; RV32I-NEXT: sw s0, 40(sp)
256 ; RV32I-NEXT: mv s0, a0
257 ; RV32I-NEXT: lw a0, 12(a1)
258 ; RV32I-NEXT: sw a0, 20(sp)
259 ; RV32I-NEXT: lw a0, 8(a1)
260 ; RV32I-NEXT: sw a0, 16(sp)
261 ; RV32I-NEXT: lw a0, 4(a1)
262 ; RV32I-NEXT: sw a0, 12(sp)
263 ; RV32I-NEXT: lw a0, 0(a1)
264 ; RV32I-NEXT: sw a0, 8(sp)
265 ; RV32I-NEXT: lw a2, 0(a2)
266 ; RV32I-NEXT: addi a0, sp, 24
267 ; RV32I-NEXT: addi a1, sp, 8
268 ; RV32I-NEXT: call __ashlti3
269 ; RV32I-NEXT: lw a0, 36(sp)
270 ; RV32I-NEXT: sw a0, 12(s0)
271 ; RV32I-NEXT: lw a0, 32(sp)
272 ; RV32I-NEXT: sw a0, 8(s0)
273 ; RV32I-NEXT: lw a0, 28(sp)
274 ; RV32I-NEXT: sw a0, 4(s0)
275 ; RV32I-NEXT: lw a0, 24(sp)
276 ; RV32I-NEXT: sw a0, 0(s0)
277 ; RV32I-NEXT: lw s0, 40(sp)
278 ; RV32I-NEXT: lw ra, 44(sp)
279 ; RV32I-NEXT: addi sp, sp, 48
280 ; RV32I-NEXT: ret
281 ;
282 ; RV64I-LABEL: shl128:
283 ; RV64I: # %bb.0:
284 ; RV64I-NEXT: addi a3, a2, -64
285 ; RV64I-NEXT: bltz a3, .LBB8_2
286 ; RV64I-NEXT: # %bb.1:
287 ; RV64I-NEXT: sll a1, a0, a3
288 ; RV64I-NEXT: mv a0, zero
289 ; RV64I-NEXT: ret
290 ; RV64I-NEXT: .LBB8_2:
291 ; RV64I-NEXT: addi a3, zero, 63
292 ; RV64I-NEXT: sub a3, a3, a2
293 ; RV64I-NEXT: srli a4, a0, 1
294 ; RV64I-NEXT: srl a3, a4, a3
295 ; RV64I-NEXT: sll a1, a1, a2
296 ; RV64I-NEXT: or a1, a1, a3
297 ; RV64I-NEXT: sll a0, a0, a2
298 ; RV64I-NEXT: ret
299 %1 = shl i128 %a, %b
300 ret i128 %1
301 }