llvm.org GIT mirror llvm / 05388f2
R600/SI: Clean up some of the unused REGISTER_{LOAD,STORE} code There are a few more cleanups to do, but I ran into some problems with ext loads and trunc stores, when I tried to change some of the vector loads and stores from custom to legal, so I wasn't able to get rid of everything. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@213552 91177308-0d34-0410-b5e6-96231b3b80d8 Tom Stellard 5 years ago
3 changed file(s) with 36 addition(s) and 168 deletion(s). Raw diff Collapse all Expand all
8989 setOperationAction(ISD::FCOS, MVT::f32, Custom);
9090
9191 // We need to custom lower vector stores from local memory
92 setOperationAction(ISD::LOAD, MVT::v2i32, Custom);
9392 setOperationAction(ISD::LOAD, MVT::v4i32, Custom);
9493 setOperationAction(ISD::LOAD, MVT::v8i32, Custom);
9594 setOperationAction(ISD::LOAD, MVT::v16i32, Custom);
9695
9796 setOperationAction(ISD::STORE, MVT::v8i32, Custom);
9897 setOperationAction(ISD::STORE, MVT::v16i32, Custom);
99
100 // We need to custom lower loads/stores from private memory
101 setOperationAction(ISD::LOAD, MVT::i32, Custom);
102 setOperationAction(ISD::LOAD, MVT::v2i32, Custom);
103 setOperationAction(ISD::LOAD, MVT::v4i32, Custom);
104 setOperationAction(ISD::LOAD, MVT::v8i32, Custom);
10598
10699 setOperationAction(ISD::STORE, MVT::i1, Custom);
107100 setOperationAction(ISD::STORE, MVT::i32, Custom);
264257 // see what for specifically. The wording everywhere else seems to be the
265258 // same.
266259
267 // 3.6.4 - Operations using pairs of VGPRs (for example: double-floats) have
268 // no alignment restrictions.
269 if (AddrSpace == AMDGPUAS::PRIVATE_ADDRESS) {
270 // Using any pair of GPRs should be the same as any other pair.
271 if (IsFast)
272 *IsFast = true;
273 return VT.bitsGE(MVT::i64);
274 }
275
276260 // XXX - The only mention I see of this in the ISA manual is for LDS direct
277261 // reads the "byte address and must be dword aligned". Is it also true for the
278262 // normal loads and stores?
281265
282266 // 8.1.6 - For Dword or larger reads or writes, the two LSBs of the
283267 // byte-address are ignored, thus forcing Dword alignment.
268 // This applies to private, global, and constant memory.
284269 if (IsFast)
285270 *IsFast = true;
286271 return VT.bitsGT(MVT::i32);
657642 case ISD::FrameIndex: return LowerFrameIndex(Op, DAG);
658643 case ISD::BRCOND: return LowerBRCOND(Op, DAG);
659644 case ISD::LOAD: {
660 LoadSDNode *Load = dyn_cast(Op);
661 EVT VT = Op.getValueType();
662
663 // These loads are legal.
664 if (Load->getAddressSpace() == AMDGPUAS::LOCAL_ADDRESS &&
665 VT.isVector() && VT.getVectorNumElements() == 2 &&
666 VT.getVectorElementType() == MVT::i32)
667 return SDValue();
668
669 if (Op.getValueType().isVector() &&
670 (Load->getAddressSpace() == AMDGPUAS::LOCAL_ADDRESS ||
671 Load->getAddressSpace() == AMDGPUAS::PRIVATE_ADDRESS ||
672 (Load->getAddressSpace() == AMDGPUAS::GLOBAL_ADDRESS &&
673 Op.getValueType().getVectorNumElements() > 4))) {
674 return SplitVectorLoad(Op, DAG);
675 } else {
676 SDValue Result = LowerLOAD(Op, DAG);
677 assert((!Result.getNode() ||
678 Result.getNode()->getNumValues() == 2) &&
679 "Load should return a value and a chain");
680 return Result;
681 }
645 SDValue Result = LowerLOAD(Op, DAG);
646 assert((!Result.getNode() ||
647 Result.getNode()->getNumValues() == 2) &&
648 "Load should return a value and a chain");
649 return Result;
682650 }
683651
684652 case ISD::FSIN:
939907 SDValue SITargetLowering::LowerLOAD(SDValue Op, SelectionDAG &DAG) const {
940908 SDLoc DL(Op);
941909 LoadSDNode *Load = cast(Op);
942 // Vector private memory loads have already been split, and
943 // all the rest of private memory loads are legal.
944 if (Load->getAddressSpace() == AMDGPUAS::PRIVATE_ADDRESS) {
945 return SDValue();
946 }
947 SDValue Lowered = AMDGPUTargetLowering::LowerLOAD(Op, DAG);
948 if (Lowered.getNode())
949 return Lowered;
950
951 if (Load->getAddressSpace() != AMDGPUAS::PRIVATE_ADDRESS) {
952 return SDValue();
953 }
954
955 EVT MemVT = Load->getMemoryVT();
956
957 assert(!MemVT.isVector() && "Private loads should be scalarized");
958 assert(!MemVT.isFloatingPoint() && "FP loads should be promoted to int");
959
960 SDValue Ptr = DAG.getNode(ISD::SRL, DL, MVT::i32, Load->getBasePtr(),
961 DAG.getConstant(2, MVT::i32));
962
963 // FIXME: REGISTER_LOAD should probably have a chain result.
964 SDValue Chain = Load->getChain();
965 SDValue LoLoad = DAG.getNode(AMDGPUISD::REGISTER_LOAD, DL, MVT::i32,
966 Chain, Ptr,
967 DAG.getTargetConstant(0, MVT::i32),
968 Op.getOperand(2));
969
970 SDValue Ret = LoLoad.getValue(0);
971 if (MemVT.getSizeInBits() == 64) {
972 // TODO: This needs a test to make sure the right thing is happening with
973 // the chain. That is hard without general function support.
974
975 SDValue IncPtr = DAG.getNode(ISD::ADD, DL, MVT::i32, Ptr,
976 DAG.getConstant(1, MVT::i32));
977
978 SDValue HiLoad = DAG.getNode(AMDGPUISD::REGISTER_LOAD, DL, MVT::i32,
979 Chain, IncPtr,
980 DAG.getTargetConstant(0, MVT::i32),
981 Op.getOperand(2));
982
983 Ret = DAG.getNode(ISD::BUILD_PAIR, DL, MVT::i64, LoLoad, HiLoad);
984 // Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other,
985 // LoLoad.getValue(1), HiLoad.getValue(1));
986 }
987
988 SDValue Ops[] = {
989 Ret,
990 Chain
991 };
992
993 return DAG.getMergeValues(Ops, DL);
910
911 if (Op.getValueType().isVector()) {
912 assert(Op.getValueType().getVectorElementType() == MVT::i32 &&
913 "Custom lowering for non-i32 vectors hasn't been implemented.");
914 unsigned NumElements = Op.getValueType().getVectorNumElements();
915 assert(NumElements != 2 && "v2 loads are supported for all address spaces.");
916 switch (Load->getAddressSpace()) {
917 default: break;
918 case AMDGPUAS::GLOBAL_ADDRESS:
919 case AMDGPUAS::PRIVATE_ADDRESS:
920 // v4 loads are supported for private and global memory.
921 if (NumElements <= 4)
922 break;
923 // fall-through
924 case AMDGPUAS::LOCAL_ADDRESS:
925 return SplitVectorLoad(Op, DAG);
926 }
927 }
928
929 return AMDGPUTargetLowering::LowerLOAD(Op, DAG);
994930 }
995931
996932 SDValue SITargetLowering::LowerSampleIntrinsic(unsigned Opcode,
11521088 DAG.getSExtOrTrunc(Store->getValue(), DL, MVT::i32),
11531089 Store->getBasePtr(), MVT::i1, Store->getMemOperand());
11541090
1155 if (Store->getAddressSpace() != AMDGPUAS::PRIVATE_ADDRESS)
1156 return SDValue();
1157
1158 SDValue Ptr = DAG.getNode(ISD::SRL, DL, MVT::i32, Store->getBasePtr(),
1159 DAG.getConstant(2, MVT::i32));
1160 SDValue Chain = Store->getChain();
1161 SmallVector Values;
1162
1163 if (Store->isTruncatingStore()) {
1164 unsigned Mask = 0;
1165 if (Store->getMemoryVT() == MVT::i8) {
1166 Mask = 0xff;
1167 } else if (Store->getMemoryVT() == MVT::i16) {
1168 Mask = 0xffff;
1169 }
1170 SDValue Dst = DAG.getNode(AMDGPUISD::REGISTER_LOAD, DL, MVT::i32,
1171 Chain, Store->getBasePtr(),
1172 DAG.getConstant(0, MVT::i32));
1173 SDValue ByteIdx = DAG.getNode(ISD::AND, DL, MVT::i32, Store->getBasePtr(),
1174 DAG.getConstant(0x3, MVT::i32));
1175 SDValue ShiftAmt = DAG.getNode(ISD::SHL, DL, MVT::i32, ByteIdx,
1176 DAG.getConstant(3, MVT::i32));
1177 SDValue MaskedValue = DAG.getNode(ISD::AND, DL, MVT::i32, Store->getValue(),
1178 DAG.getConstant(Mask, MVT::i32));
1179 SDValue ShiftedValue = DAG.getNode(ISD::SHL, DL, MVT::i32,
1180 MaskedValue, ShiftAmt);
1181 SDValue RotrAmt = DAG.getNode(ISD::SUB, DL, MVT::i32,
1182 DAG.getConstant(32, MVT::i32), ShiftAmt);
1183 SDValue DstMask = DAG.getNode(ISD::ROTR, DL, MVT::i32,
1184 DAG.getConstant(Mask, MVT::i32),
1185 RotrAmt);
1186 Dst = DAG.getNode(ISD::AND, DL, MVT::i32, Dst, DstMask);
1187 Dst = DAG.getNode(ISD::OR, DL, MVT::i32, Dst, ShiftedValue);
1188
1189 Values.push_back(Dst);
1190 } else if (VT == MVT::i64) {
1191 for (unsigned i = 0; i < 2; ++i) {
1192 Values.push_back(DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i32,
1193 Store->getValue(), DAG.getConstant(i, MVT::i32)));
1194 }
1195 } else if (VT == MVT::i128) {
1196 for (unsigned i = 0; i < 2; ++i) {
1197 for (unsigned j = 0; j < 2; ++j) {
1198 Values.push_back(DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i32,
1199 DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i64,
1200 Store->getValue(), DAG.getConstant(i, MVT::i32)),
1201 DAG.getConstant(j, MVT::i32)));
1202 }
1203 }
1204 } else {
1205 Values.push_back(Store->getValue());
1206 }
1207
1208 for (unsigned i = 0; i < Values.size(); ++i) {
1209 SDValue PartPtr = DAG.getNode(ISD::ADD, DL, MVT::i32,
1210 Ptr, DAG.getConstant(i, MVT::i32));
1211 Chain = DAG.getNode(AMDGPUISD::REGISTER_STORE, DL, MVT::Other,
1212 Chain, Values[i], PartPtr,
1213 DAG.getTargetConstant(0, MVT::i32));
1214 }
1215 return Chain;
1091 return SDValue();
12161092 }
12171093
12181094 SDValue SITargetLowering::LowerTrig(SDValue Op, SelectionDAG &DAG) const {
4242 }
4343
4444 ; SI-LABEL: @load_v4i8_to_v4f32:
45 ; SI: BUFFER_LOAD_DWORD [[LOADREG:v[0-9]+]],
45 ; We can't use BUFFER_LOAD_DWORD here, because the load is byte aligned, and
46 ; BUFFER_LOAD_DWORD requires dword alignment.
47 ; SI: BUFFER_LOAD_USHORT
48 ; SI: BUFFER_LOAD_USHORT
49 ; SI: V_OR_B32_e32 [[LOADREG:v[0-9]+]]
4650 ; SI-NOT: BFE
4751 ; SI-NOT: LSHR
4852 ; SI-DAG: V_CVT_F32_UBYTE3_e32 v[[HIRESULT:[0-9]+]], [[LOADREG]]
66 ; SI-LABEL: @private_access_f64_alloca:
77
88 ; SI-ALLOCA: BUFFER_STORE_DWORDX2
9 ; FIXME: We should be able to use BUFFER_LOAD_DWORDX2
10 ; SI-ALLOCA: BUFFER_LOAD_DWORD
11 ; SI-ALLOCA: BUFFER_LOAD_DWORD
9 ; SI-ALLOCA: BUFFER_LOAD_DWORDX2
1210
1311 ; SI-PROMOTE: DS_WRITE_B64
1412 ; SI-PROMOTE: DS_READ_B64
2624 ; SI-LABEL: @private_access_v2f64_alloca:
2725
2826 ; SI-ALLOCA: BUFFER_STORE_DWORDX4
29 ; FIXME: We should be able to use BUFFER_LOAD_DWORDX4
30 ; SI-ALLOCA: BUFFER_LOAD_DWORD
31 ; SI-ALLOCA: BUFFER_LOAD_DWORD
32 ; SI-ALLOCA: BUFFER_LOAD_DWORD
33 ; SI-ALLOCA: BUFFER_LOAD_DWORD
27 ; SI-ALLOCA: BUFFER_LOAD_DWORDX4
3428
3529 ; SI-PROMOTE: DS_WRITE_B32
3630 ; SI-PROMOTE: DS_WRITE_B32
5448 ; SI-LABEL: @private_access_i64_alloca:
5549
5650 ; SI-ALLOCA: BUFFER_STORE_DWORDX2
57 ; FIXME: We should be able to use BUFFER_LOAD_DWORDX2
58 ; SI-ALLOCA: BUFFER_LOAD_DWORD
59 ; SI-ALLOCA: BUFFER_LOAD_DWORD
51 ; SI-ALLOCA: BUFFER_LOAD_DWORDX2
6052
6153 ; SI-PROMOTE: DS_WRITE_B64
6254 ; SI-PROMOTE: DS_READ_B64
7466 ; SI-LABEL: @private_access_v2i64_alloca:
7567
7668 ; SI-ALLOCA: BUFFER_STORE_DWORDX4
77 ; FIXME: We should be able to use BUFFER_LOAD_DWORDX4
78 ; SI-ALLOCA: BUFFER_LOAD_DWORD
79 ; SI-ALLOCA: BUFFER_LOAD_DWORD
80 ; SI-ALLOCA: BUFFER_LOAD_DWORD
81 ; SI-ALLOCA: BUFFER_LOAD_DWORD
69 ; SI-ALLOCA: BUFFER_LOAD_DWORDX4
8270
8371 ; SI-PROMOTE: DS_WRITE_B32
8472 ; SI-PROMOTE: DS_WRITE_B32