llvm.org GIT mirror llvm / 42faffd
R600/SI: Initialize M0 and emit S_WQM_B64 whenever DS instructions are used DS instructions that access local memory can only uses addresses that are less than or equal to the value of M0. When M0 is uninitialized, then we experience undefined behavior. This patch also changes the behavior to emit S_WQM_B64 on pixel shaders no matter what kind of DS instruction is used. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@201097 91177308-0d34-0410-b5e6-96231b3b80d8 Tom Stellard 6 years ago
5 changed file(s) with 45 addition(s) and 12 deletion(s). Raw diff Collapse all Expand all
312312 return RC != &AMDGPU::EXECRegRegClass;
313313 }
314314
315 namespace llvm {
316 namespace AMDGPU {
317 // Helper function generated by tablegen. We are wrapping this with
318 // an SIInstrInfo function that reutrns bool rather than int.
319 int isDS(uint16_t Opcode);
320 }
321 }
322
323 bool SIInstrInfo::isDS(uint16_t Opcode) const {
324 return ::AMDGPU::isDS(Opcode) != -1;
325 }
326
315327 int SIInstrInfo::isMIMG(uint16_t Opcode) const {
316328 return get(Opcode).TSFlags & SIInstrFlags::MIMG;
317329 }
6969 virtual bool isMov(unsigned Opcode) const;
7070
7171 virtual bool isSafeToMoveRegClassDefs(const TargetRegisterClass *RC) const;
72 bool isDS(uint16_t Opcode) const;
7273 int isMIMG(uint16_t Opcode) const;
7374 int isSMRD(uint16_t Opcode) const;
7475 bool isVOP1(uint16_t Opcode) const;
627627 let ValueCols = [["1"]];
628628 }
629629
630 def isDS : InstrMapping {
631 let FilterClass = "DS";
632 let RowFields = ["Inst"];
633 let ColFields = ["Size"];
634 let KeyCol = ["8"];
635 let ValueCols = [["8"]];
636 }
637
630638 include "SIInstructions.td"
6666
6767 static char ID;
6868 const TargetRegisterInfo *TRI;
69 const TargetInstrInfo *TII;
69 const SIInstrInfo *TII;
7070
7171 bool shouldSkip(MachineBasicBlock *From, MachineBasicBlock *To);
7272
407407 }
408408
409409 bool SILowerControlFlowPass::runOnMachineFunction(MachineFunction &MF) {
410 TII = MF.getTarget().getInstrInfo();
410 TII = static_cast(MF.getTarget().getInstrInfo());
411411 TRI = MF.getTarget().getRegisterInfo();
412412 SIMachineFunctionInfo *MFI = MF.getInfo();
413413
425425
426426 Next = llvm::next(I);
427427 MachineInstr &MI = *I;
428 if (TII->isDS(MI.getOpcode())) {
429 NeedM0 = true;
430 NeedWQM = true;
431 }
432
428433 switch (MI.getOpcode()) {
429434 default: break;
430435 case AMDGPU::SI_IF:
483488 case AMDGPU::SI_INDIRECT_DST_V8:
484489 case AMDGPU::SI_INDIRECT_DST_V16:
485490 IndirectDst(MI);
486 break;
487
488 case AMDGPU::DS_READ_B32:
489 NeedWQM = true;
490 // Fall through
491 case AMDGPU::DS_WRITE_B32:
492 case AMDGPU::DS_ADD_U32_RTN:
493 NeedM0 = true;
494491 break;
495492
496493 case AMDGPU::V_INTERP_P1_F32:
444444 ; R600-CHECK: LDS_UBYTE_READ_RET
445445 ; SI-CHECK-LABEL: @load_i8_local
446446 ; SI-CHECK-NOT: S_WQM_B64
447 ; SI-CHECK: S_MOV_B32 m0
447448 ; SI-CHECK: DS_READ_U8
448449 define void @load_i8_local(i32 addrspace(1)* %out, i8 addrspace(3)* %in) {
449450 %1 = load i8 addrspace(3)* %in
457458 ; R600-CHECK: ASHR
458459 ; SI-CHECK-LABEL: @load_i8_sext_local
459460 ; SI-CHECK-NOT: S_WQM_B64
461 ; SI-CHECK: S_MOV_B32 m0
460462 ; SI-CHECK: DS_READ_I8
461463 define void @load_i8_sext_local(i32 addrspace(1)* %out, i8 addrspace(3)* %in) {
462464 entry:
471473 ; R600-CHECK: LDS_UBYTE_READ_RET
472474 ; SI-CHECK-LABEL: @load_v2i8_local
473475 ; SI-CHECK-NOT: S_WQM_B64
476 ; SI-CHECK: S_MOV_B32 m0
474477 ; SI-CHECK: DS_READ_U8
475478 ; SI-CHECK: DS_READ_U8
476479 define void @load_v2i8_local(<2 x i32> addrspace(1)* %out, <2 x i8> addrspace(3)* %in) {
488491 ; R600-CHECK-DAG: ASHR
489492 ; SI-CHECK-LABEL: @load_v2i8_sext_local
490493 ; SI-CHECK-NOT: S_WQM_B64
494 ; SI-CHECK: S_MOV_B32 m0
491495 ; SI-CHECK: DS_READ_I8
492496 ; SI-CHECK: DS_READ_I8
493497 define void @load_v2i8_sext_local(<2 x i32> addrspace(1)* %out, <2 x i8> addrspace(3)* %in) {
505509 ; R600-CHECK: LDS_UBYTE_READ_RET
506510 ; SI-CHECK-LABEL: @load_v4i8_local
507511 ; SI-CHECK-NOT: S_WQM_B64
512 ; SI-CHECK: S_MOV_B32 m0
508513 ; SI-CHECK: DS_READ_U8
509514 ; SI-CHECK: DS_READ_U8
510515 ; SI-CHECK: DS_READ_U8
528533 ; R600-CHECK-DAG: ASHR
529534 ; SI-CHECK-LABEL: @load_v4i8_sext_local
530535 ; SI-CHECK-NOT: S_WQM_B64
536 ; SI-CHECK: S_MOV_B32 m0
531537 ; SI-CHECK: DS_READ_I8
532538 ; SI-CHECK: DS_READ_I8
533539 ; SI-CHECK: DS_READ_I8
545551 ; R600-CHECK: LDS_USHORT_READ_RET
546552 ; SI-CHECK-LABEL: @load_i16_local
547553 ; SI-CHECK-NOT: S_WQM_B64
554 ; SI-CHECK: S_MOV_B32 m0
548555 ; SI-CHECK: DS_READ_U16
549556 define void @load_i16_local(i32 addrspace(1)* %out, i16 addrspace(3)* %in) {
550557 entry:
559566 ; R600-CHECK: ASHR
560567 ; SI-CHECK-LABEL: @load_i16_sext_local
561568 ; SI-CHECK-NOT: S_WQM_B64
569 ; SI-CHECK: S_MOV_B32 m0
562570 ; SI-CHECK: DS_READ_I16
563571 define void @load_i16_sext_local(i32 addrspace(1)* %out, i16 addrspace(3)* %in) {
564572 entry:
573581 ; R600-CHECK: LDS_USHORT_READ_RET
574582 ; SI-CHECK-LABEL: @load_v2i16_local
575583 ; SI-CHECK-NOT: S_WQM_B64
584 ; SI-CHECK: S_MOV_B32 m0
576585 ; SI-CHECK: DS_READ_U16
577586 ; SI-CHECK: DS_READ_U16
578587 define void @load_v2i16_local(<2 x i32> addrspace(1)* %out, <2 x i16> addrspace(3)* %in) {
590599 ; R600-CHECK-DAG: ASHR
591600 ; SI-CHECK-LABEL: @load_v2i16_sext_local
592601 ; SI-CHECK-NOT: S_WQM_B64
602 ; SI-CHECK: S_MOV_B32 m0
593603 ; SI-CHECK: DS_READ_I16
594604 ; SI-CHECK: DS_READ_I16
595605 define void @load_v2i16_sext_local(<2 x i32> addrspace(1)* %out, <2 x i16> addrspace(3)* %in) {
607617 ; R600-CHECK: LDS_USHORT_READ_RET
608618 ; SI-CHECK-LABEL: @load_v4i16_local
609619 ; SI-CHECK-NOT: S_WQM_B64
620 ; SI-CHECK: S_MOV_B32 m0
610621 ; SI-CHECK: DS_READ_U16
611622 ; SI-CHECK: DS_READ_U16
612623 ; SI-CHECK: DS_READ_U16
630641 ; R600-CHECK-DAG: ASHR
631642 ; SI-CHECK-LABEL: @load_v4i16_sext_local
632643 ; SI-CHECK-NOT: S_WQM_B64
644 ; SI-CHECK: S_MOV_B32 m0
633645 ; SI-CHECK: DS_READ_I16
634646 ; SI-CHECK: DS_READ_I16
635647 ; SI-CHECK: DS_READ_I16
642654 ret void
643655 }
644656
645 ; load an i32 value from the glocal address space.
657 ; load an i32 value from the local address space.
646658 ; R600-CHECK-LABEL: @load_i32_local
647659 ; R600-CHECK: LDS_READ_RET
648660 ; SI-CHECK-LABEL: @load_i32_local
649661 ; SI-CHECK-NOT: S_WQM_B64
662 ; SI-CHECK: S_MOV_B32 m0
650663 ; SI-CHECK: DS_READ_B32
651664 define void @load_i32_local(i32 addrspace(1)* %out, i32 addrspace(3)* %in) {
652665 entry:
655668 ret void
656669 }
657670
658 ; load a f32 value from the global address space.
671 ; load a f32 value from the local address space.
659672 ; R600-CHECK-LABEL: @load_f32_local
660673 ; R600-CHECK: LDS_READ_RET
661674 ; SI-CHECK-LABEL: @load_f32_local
675 ; SI-CHECK: S_MOV_B32 m0
662676 ; SI-CHECK: DS_READ_B32
663677 define void @load_f32_local(float addrspace(1)* %out, float addrspace(3)* %in) {
664678 entry:
672686 ; R600-CHECK: LDS_READ_RET
673687 ; R600-CHECK: LDS_READ_RET
674688 ; SI-CHECK-LABEL: @load_v2f32_local
689 ; SI-CHECK: S_MOV_B32 m0
675690 ; SI-CHECK: DS_READ_B32
676691 ; SI-CHECK: DS_READ_B32
677692 define void @load_v2f32_local(<2 x float> addrspace(1)* %out, <2 x float> addrspace(3)* %in) {