llvm.org GIT mirror llvm / 7e2fe91
Re-commit 117518 and 117519 now that ARM MC test failures are out of the way. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@117531 91177308-0d34-0410-b5e6-96231b3b80d8 Evan Cheng 9 years ago
12 changed file(s) with 96 addition(s) and 31 deletion(s). Raw diff Collapse all Expand all
453453 return;
454454
455455 unsigned DefIdx = Use->getOperand(OpIdx).getResNo();
456 if (Use->isMachineOpcode())
457 // Adjust the use operand index by num of defs.
458 OpIdx += TII->get(Use->getMachineOpcode()).getNumDefs();
456459 int Latency = TII->getOperandLatency(InstrItins, Def, DefIdx, Use, OpIdx);
457460 if (Latency >= 0)
458461 dep.setLatency(Latency);
18221822 // This may be a def / use of a variable_ops instruction, the operand
18231823 // latency might be determinable dynamically. Let the target try to
18241824 // figure it out.
1825 int DefCycle = -1;
18251826 bool LdmBypass = false;
1826 int DefCycle = -1;
18271827 switch (DefTID.getOpcode()) {
18281828 default:
18291829 DefCycle = ItinData->getOperandCycle(DefClass, DefIdx);
19211921 ? (*DefMI->memoperands_begin())->getAlignment() : 0;
19221922 unsigned UseAlign = UseMI->hasOneMemOperand()
19231923 ? (*UseMI->memoperands_begin())->getAlignment() : 0;
1924 return getOperandLatency(ItinData, DefTID, DefIdx, DefAlign,
1925 UseTID, UseIdx, UseAlign);
1924 int Latency = getOperandLatency(ItinData, DefTID, DefIdx, DefAlign,
1925 UseTID, UseIdx, UseAlign);
1926
1927 if (Latency > 1 &&
1928 (Subtarget.isCortexA8() || Subtarget.isCortexA9())) {
1929 // FIXME: Shifter op hack: no shift (i.e. [r +/- r]) or [r + r << 2]
1930 // variants are one cycle cheaper.
1931 switch (DefTID.getOpcode()) {
1932 default: break;
1933 case ARM::LDRrs:
1934 case ARM::LDRBrs: {
1935 unsigned ShOpVal = DefMI->getOperand(3).getImm();
1936 unsigned ShImm = ARM_AM::getAM2Offset(ShOpVal);
1937 if (ShImm == 0 ||
1938 (ShImm == 2 && ARM_AM::getAM2ShiftOpc(ShOpVal) == ARM_AM::lsl))
1939 --Latency;
1940 break;
1941 }
1942 case ARM::t2LDRs:
1943 case ARM::t2LDRBs:
1944 case ARM::t2LDRHs:
1945 case ARM::t2LDRSHs: {
1946 // Thumb2 mode: lsl only.
1947 unsigned ShAmt = DefMI->getOperand(3).getImm();
1948 if (ShAmt == 0 || ShAmt == 2)
1949 --Latency;
1950 break;
1951 }
1952 }
1953 }
1954
1955 return Latency;
19261956 }
19271957
19281958 int
19461976 const MachineSDNode *UseMN = dyn_cast(UseNode);
19471977 unsigned UseAlign = !UseMN->memoperands_empty()
19481978 ? (*UseMN->memoperands_begin())->getAlignment() : 0;
1949 return getOperandLatency(ItinData, DefTID, DefIdx, DefAlign,
1950 UseTID, UseIdx, UseAlign);
1979 int Latency = getOperandLatency(ItinData, DefTID, DefIdx, DefAlign,
1980 UseTID, UseIdx, UseAlign);
1981
1982 if (Latency > 1 &&
1983 (Subtarget.isCortexA8() || Subtarget.isCortexA9())) {
1984 // FIXME: Shifter op hack: no shift (i.e. [r +/- r]) or [r + r << 2]
1985 // variants are one cycle cheaper.
1986 switch (DefTID.getOpcode()) {
1987 default: break;
1988 case ARM::LDRrs:
1989 case ARM::LDRBrs: {
1990 unsigned ShOpVal =
1991 cast(DefNode->getOperand(2))->getZExtValue();
1992 unsigned ShImm = ARM_AM::getAM2Offset(ShOpVal);
1993 if (ShImm == 0 ||
1994 (ShImm == 2 && ARM_AM::getAM2ShiftOpc(ShOpVal) == ARM_AM::lsl))
1995 --Latency;
1996 break;
1997 }
1998 case ARM::t2LDRs:
1999 case ARM::t2LDRBs:
2000 case ARM::t2LDRHs:
2001 case ARM::t2LDRSHs: {
2002 // Thumb2 mode: lsl only.
2003 unsigned ShAmt =
2004 cast(DefNode->getOperand(2))->getZExtValue();
2005 if (ShAmt == 0 || ShAmt == 2)
2006 --Latency;
2007 break;
2008 }
2009 }
2010 }
2011
2012 return Latency;
19512013 }
19522014
19532015 bool ARMBaseInstrInfo::
14371437 // Load
14381438
14391439
1440 defm LDR : AI_ldr1<0, "ldr", IIC_iLoad_i, IIC_iLoad_r,
1440 defm LDR : AI_ldr1<0, "ldr", IIC_iLoad_r, IIC_iLoad_si,
14411441 UnOpFrag<(load node:$Src)>>;
1442 defm LDRB : AI_ldr1<1, "ldrb", IIC_iLoad_bh_i, IIC_iLoad_bh_r,
1442 defm LDRB : AI_ldr1<1, "ldrb", IIC_iLoad_bh_r, IIC_iLoad_bh_si,
14431443 UnOpFrag<(zextloadi8 node:$Src)>>;
1444 defm STR : AI_str1<0, "str", IIC_iStore_i, IIC_iStore_r,
1444 defm STR : AI_str1<0, "str", IIC_iStore_r, IIC_iStore_si,
14451445 BinOpFrag<(store node:$LHS, node:$RHS)>>;
1446 defm STRB : AI_str1<1, "strb", IIC_iStore_bh_i, IIC_iStore_bh_r,
1446 defm STRB : AI_str1<1, "strb", IIC_iStore_bh_r, IIC_iStore_bh_si,
14471447 BinOpFrag<(truncstorei8 node:$LHS, node:$RHS)>>;
14481448
14491449 // Special LDR for loads from non-pc-relative constpools.
573573
574574 /// T2I_ld - Defines a set of (op r, {imm12|imm8|so_reg}) load patterns.
575575 multiclass T2I_ld opcod, string opc,
576 InstrItinClass iii, InstrItinClass iir, PatFrag opnode> {
576 InstrItinClass iii, InstrItinClass iis, PatFrag opnode> {
577577 def i12 : T2Ii12<(outs GPR:$dst), (ins t2addrmode_imm12:$addr), iii,
578578 opc, ".w\t$dst, $addr",
579579 [(set GPR:$dst, (opnode t2addrmode_imm12:$addr))]> {
598598 let Inst{10} = 1; // The P bit.
599599 let Inst{8} = 0; // The W bit.
600600 }
601 def s : T2Iso <(outs GPR:$dst), (ins t2addrmode_so_reg:$addr), iir,
601 def s : T2Iso <(outs GPR:$dst), (ins t2addrmode_so_reg:$addr), iis,
602602 opc, ".w\t$dst, $addr",
603603 [(set GPR:$dst, (opnode t2addrmode_so_reg:$addr))]> {
604604 let Inst{31-27} = 0b11111;
625625
626626 /// T2I_st - Defines a set of (op r, {imm12|imm8|so_reg}) store patterns.
627627 multiclass T2I_st opcod, string opc,
628 InstrItinClass iii, InstrItinClass iir, PatFrag opnode> {
628 InstrItinClass iii, InstrItinClass iis, PatFrag opnode> {
629629 def i12 : T2Ii12<(outs), (ins GPR:$src, t2addrmode_imm12:$addr), iii,
630630 opc, ".w\t$src, $addr",
631631 [(opnode GPR:$src, t2addrmode_imm12:$addr)]> {
646646 let Inst{10} = 1; // The P bit.
647647 let Inst{8} = 0; // The W bit.
648648 }
649 def s : T2Iso <(outs), (ins GPR:$src, t2addrmode_so_reg:$addr), iir,
649 def s : T2Iso <(outs), (ins GPR:$src, t2addrmode_so_reg:$addr), iis,
650650 opc, ".w\t$src, $addr",
651651 [(opnode GPR:$src, t2addrmode_so_reg:$addr)]> {
652652 let Inst{31-27} = 0b11111;
915915
916916 // Load
917917 let canFoldAsLoad = 1, isReMaterializable = 1 in
918 defm t2LDR : T2I_ld<0, 0b10, "ldr", IIC_iLoad_i, IIC_iLoad_r,
918 defm t2LDR : T2I_ld<0, 0b10, "ldr", IIC_iLoad_i, IIC_iLoad_si,
919919 UnOpFrag<(load node:$Src)>>;
920920
921921 // Loads with zero extension
922 defm t2LDRH : T2I_ld<0, 0b01, "ldrh", IIC_iLoad_bh_i, IIC_iLoad_bh_r,
922 defm t2LDRH : T2I_ld<0, 0b01, "ldrh", IIC_iLoad_bh_i, IIC_iLoad_bh_si,
923923 UnOpFrag<(zextloadi16 node:$Src)>>;
924 defm t2LDRB : T2I_ld<0, 0b00, "ldrb", IIC_iLoad_bh_i, IIC_iLoad_bh_r,
924 defm t2LDRB : T2I_ld<0, 0b00, "ldrb", IIC_iLoad_bh_i, IIC_iLoad_bh_si,
925925 UnOpFrag<(zextloadi8 node:$Src)>>;
926926
927927 // Loads with sign extension
928 defm t2LDRSH : T2I_ld<1, 0b01, "ldrsh", IIC_iLoad_bh_i, IIC_iLoad_bh_r,
928 defm t2LDRSH : T2I_ld<1, 0b01, "ldrsh", IIC_iLoad_bh_i, IIC_iLoad_bh_si,
929929 UnOpFrag<(sextloadi16 node:$Src)>>;
930 defm t2LDRSB : T2I_ld<1, 0b00, "ldrsb", IIC_iLoad_bh_i, IIC_iLoad_bh_r,
930 defm t2LDRSB : T2I_ld<1, 0b00, "ldrsb", IIC_iLoad_bh_i, IIC_iLoad_bh_si,
931931 UnOpFrag<(sextloadi8 node:$Src)>>;
932932
933933 let mayLoad = 1, neverHasSideEffects = 1, hasExtraDefRegAllocReq = 1 in {
10691069 def t2LDRSHT : T2IldT<1, 0b01, "ldrsht", IIC_iLoad_bh_i>;
10701070
10711071 // Store
1072 defm t2STR :T2I_st<0b10,"str", IIC_iStore_i, IIC_iStore_r,
1072 defm t2STR :T2I_st<0b10,"str", IIC_iStore_i, IIC_iStore_si,
10731073 BinOpFrag<(store node:$LHS, node:$RHS)>>;
1074 defm t2STRB:T2I_st<0b00,"strb", IIC_iStore_bh_i, IIC_iStore_bh_r,
1074 defm t2STRB:T2I_st<0b00,"strb", IIC_iStore_bh_i, IIC_iStore_bh_si,
10751075 BinOpFrag<(truncstorei8 node:$LHS, node:$RHS)>>;
1076 defm t2STRH:T2I_st<0b01,"strh", IIC_iStore_bh_i, IIC_iStore_bh_r,
1076 defm t2STRH:T2I_st<0b01,"strh", IIC_iStore_bh_i, IIC_iStore_bh_si,
10771077 BinOpFrag<(truncstorei16 node:$LHS, node:$RHS)>>;
10781078
10791079 // Store doubleword
573573 InstrStage<1, [A9_DRegsVFP], 0, Required>,
574574 InstrStage<9, [A9_DRegsN], 0, Reserved>,
575575 InstrStage<1, [A9_NPipe]>],
576 [8, 0, 1, 1]>,
576 [8, 1, 1, 1]>,
577577 //
578578 // Double-precision FP MAC
579579 InstrItinData,
581581 InstrStage<1, [A9_DRegsVFP], 0, Required>,
582582 InstrStage<10, [A9_DRegsN], 0, Reserved>,
583583 InstrStage<2, [A9_NPipe]>],
584 [9, 0, 1, 1]>,
584 [9, 1, 1, 1]>,
585585 //
586586 // Single-precision FP DIV
587587 InstrItinData,
2323 ; CORTEXA8: test:
2424 ; CORTEXA8: vabs.f32 d1, d1
2525 ; CORTEXA9: test:
26 ; CORTEXA9: vabs.f32 s0, s0
26 ; CORTEXA9: vabs.f32 s1, s1
1919 ; CORTEXA8: test:
2020 ; CORTEXA8: vadd.f32 d0, d1, d0
2121 ; CORTEXA9: test:
22 ; CORTEXA9: vadd.f32 s0, s0, s1
22 ; CORTEXA9: vadd.f32 s0, s1, s0
1919 ; CORTEXA8: test:
2020 ; CORTEXA8: vdiv.f32 s0, s1, s0
2121 ; CORTEXA9: test:
22 ; CORTEXA9: vdiv.f32 s0, s0, s1
22 ; CORTEXA9: vdiv.f32 s0, s1, s0
2020 ; CORTEXA8: test:
2121 ; CORTEXA8: vmul.f32 d0, d1, d0
2222 ; CORTEXA9: test:
23 ; CORTEXA9: vmla.f32 s0, s1, s2
23 ; CORTEXA9: vmla.f32 s2, s1, s0
1818 ; NFP0: vnmls.f32 s2, s1, s0
1919
2020 ; CORTEXA8: test:
21 ; CORTEXA8: vnmls.f32 s1, s2, s0
21 ; CORTEXA8: vnmls.f32 s2, s1, s0
2222 ; CORTEXA9: test:
23 ; CORTEXA9: vnmls.f32 s0, s1, s2
23 ; CORTEXA9: vnmls.f32 s2, s1, s0
1919 ; CORTEXA8: test:
2020 ; CORTEXA8: vmul.f32 d0, d1, d0
2121 ; CORTEXA9: test:
22 ; CORTEXA9: vmul.f32 s0, s0, s1
22 ; CORTEXA9: vmul.f32 s0, s1, s0
3535
3636 ; lsl #2 is free
3737 ; A9: test3:
38 ; A9: ldr r0, [r0, r2, lsl #2]
3839 ; A9: ldr r1, [r1, r2, lsl #2]
39 ; A9: ldr r0, [r0, r2, lsl #2]
4040 %tmp1 = shl i32 %offset, 2
4141 %tmp2 = add i32 %base, %tmp1
4242 %tmp3 = inttoptr i32 %tmp2 to i32*