llvm.org GIT mirror llvm / f1e95d3
AMDGPU: Fix shrinking of addc/subb. To shrink to VOP2 the input carry must also be VCC. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@291720 91177308-0d34-0410-b5e6-96231b3b80d8 Matt Arsenault 3 years ago
2 changed file(s) with 318 addition(s) and 8 deletion(s). Raw diff Collapse all Expand all
8989 switch (MI.getOpcode()) {
9090 default: return false;
9191
92 case AMDGPU::V_ADDC_U32_e64:
93 case AMDGPU::V_SUBB_U32_e64:
94 // Additional verification is needed for sdst/src2.
95 return true;
96
9297 case AMDGPU::V_MAC_F32_e64:
9398 case AMDGPU::V_MAC_F16_e64:
9499 if (!isVGPR(Src2, TRI, MRI) ||
173178 const MachineOperand &Orig) {
174179
175180 for (MachineOperand &Use : MI.implicit_operands()) {
176 if (Use.getReg() == AMDGPU::VCC) {
181 if (Use.isUse() && Use.getReg() == AMDGPU::VCC) {
177182 Use.setIsUndef(Orig.isUndef());
178183 Use.setIsKill(Orig.isKill());
179184 return;
458463 // Check for the bool flag output for instructions like V_ADD_I32_e64.
459464 const MachineOperand *SDst = TII->getNamedOperand(MI,
460465 AMDGPU::OpName::sdst);
461 if (SDst && SDst->getReg() != AMDGPU::VCC) {
462 if (TargetRegisterInfo::isVirtualRegister(SDst->getReg()))
463 MRI.setRegAllocationHint(SDst->getReg(), 0, AMDGPU::VCC);
464
465 continue;
466
467 // Check the carry-in operand for v_addc_u32_e64.
468 const MachineOperand *Src2 = TII->getNamedOperand(MI,
469 AMDGPU::OpName::src2);
470
471 if (SDst) {
472 if (SDst->getReg() != AMDGPU::VCC) {
473 if (TargetRegisterInfo::isVirtualRegister(SDst->getReg()))
474 MRI.setRegAllocationHint(SDst->getReg(), 0, AMDGPU::VCC);
475 continue;
476 }
477
478 // All of the instructions with carry outs also have an SGPR input in
479 // src2.
480 if (Src2 && Src2->getReg() != AMDGPU::VCC) {
481 if (TargetRegisterInfo::isVirtualRegister(Src2->getReg()))
482 MRI.setRegAllocationHint(Src2->getReg(), 0, AMDGPU::VCC);
483
484 continue;
485 }
466486 }
467487
468488 // We can shrink this instruction
490510 if (Src1)
491511 Inst32.addOperand(*Src1);
492512
493 const MachineOperand *Src2 =
494 TII->getNamedOperand(MI, AMDGPU::OpName::src2);
495513 if (Src2) {
496514 int Op32Src2Idx = AMDGPU::getNamedOperandIdx(Op32, AMDGPU::OpName::src2);
497515 if (Op32Src2Idx != -1) {
4141 %a = load volatile i32, i32 addrspace(1)* %a.ptr
4242 %b = load volatile i32, i32 addrspace(1)* %b.ptr
4343 %result = sub i32 %a, %b
44 store volatile i32 %result, i32 addrspace(1)* %out.gep
45 ret void
46 }
47
48 define void @check_addc_src2_vop3(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #0 {
49 %tid = call i32 @llvm.amdgcn.workitem.id.x()
50 %tid.ext = sext i32 %tid to i64
51 %a.ptr = getelementptr inbounds i32, i32 addrspace(1)* %in, i64 %tid.ext
52 %b.ptr = getelementptr i32, i32 addrspace(1)* %a.ptr, i32 1
53 %out.gep = getelementptr i32, i32 addrspace(1)* %out, i64 %tid.ext
54 %a = load volatile i32, i32 addrspace(1)* %a.ptr
55 %b = load volatile i32, i32 addrspace(1)* %b.ptr
56 %result = add i32 %a, %b
57 store volatile i32 %result, i32 addrspace(1)* %out.gep
58 ret void
59 }
60
61 define void @shrink_addc_vop3(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #0 {
62 %tid = call i32 @llvm.amdgcn.workitem.id.x()
63 %tid.ext = sext i32 %tid to i64
64 %a.ptr = getelementptr inbounds i32, i32 addrspace(1)* %in, i64 %tid.ext
65 %b.ptr = getelementptr i32, i32 addrspace(1)* %a.ptr, i32 1
66 %out.gep = getelementptr i32, i32 addrspace(1)* %out, i64 %tid.ext
67 %a = load volatile i32, i32 addrspace(1)* %a.ptr
68 %b = load volatile i32, i32 addrspace(1)* %b.ptr
69 %result = add i32 %a, %b
70 store volatile i32 %result, i32 addrspace(1)* %out.gep
71 ret void
72 }
73
74 define void @shrink_addc_undef_vcc(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #0 {
75 %tid = call i32 @llvm.amdgcn.workitem.id.x()
76 %tid.ext = sext i32 %tid to i64
77 %a.ptr = getelementptr inbounds i32, i32 addrspace(1)* %in, i64 %tid.ext
78 %b.ptr = getelementptr i32, i32 addrspace(1)* %a.ptr, i32 1
79 %out.gep = getelementptr i32, i32 addrspace(1)* %out, i64 %tid.ext
80 %a = load volatile i32, i32 addrspace(1)* %a.ptr
81 %b = load volatile i32, i32 addrspace(1)* %b.ptr
82 %result = add i32 %a, %b
4483 store volatile i32 %result, i32 addrspace(1)* %out.gep
4584 ret void
4685 }
302341 S_ENDPGM
303342
304343 ...
344 ---
345 # GCN-LABEL: name: check_addc_src2_vop3{{$}}
346 # GCN: %29, %vcc = V_ADDC_U32_e64 %19, %17, %9, implicit %exec
347 # GCN: %24 = V_CNDMASK_B32_e64 0, 1, killed %vcc, implicit %exec
348 name: check_addc_src2_vop3
349 alignment: 0
350 exposesReturnsTwice: false
351 legalized: false
352 regBankSelected: false
353 selected: false
354 tracksRegLiveness: true
355 registers:
356 - { id: 0, class: sgpr_64 }
357 - { id: 1, class: sreg_32_xm0 }
358 - { id: 2, class: sgpr_32 }
359 - { id: 3, class: vgpr_32 }
360 - { id: 4, class: sreg_64_xexec }
361 - { id: 5, class: sreg_64_xexec }
362 - { id: 6, class: sreg_32 }
363 - { id: 7, class: sreg_32 }
364 - { id: 8, class: sreg_32_xm0 }
365 - { id: 9, class: sreg_64 }
366 - { id: 10, class: sreg_32_xm0 }
367 - { id: 11, class: sreg_32_xm0 }
368 - { id: 12, class: sgpr_64 }
369 - { id: 13, class: sgpr_128 }
370 - { id: 14, class: sreg_32_xm0 }
371 - { id: 15, class: sreg_64 }
372 - { id: 16, class: sgpr_128 }
373 - { id: 17, class: vgpr_32 }
374 - { id: 18, class: vreg_64 }
375 - { id: 19, class: vgpr_32 }
376 - { id: 20, class: vreg_64 }
377 - { id: 21, class: sreg_32_xm0 }
378 - { id: 22, class: sreg_32 }
379 - { id: 23, class: sreg_32 }
380 - { id: 24, class: vgpr_32 }
381 - { id: 25, class: vreg_64 }
382 - { id: 26, class: vgpr_32 }
383 - { id: 27, class: vreg_64 }
384 - { id: 28, class: vreg_64 }
385 - { id: 29, class: vgpr_32 }
386 liveins:
387 - { reg: '%sgpr0_sgpr1', virtual-reg: '%0' }
388 - { reg: '%vgpr0', virtual-reg: '%3' }
389 frameInfo:
390 isFrameAddressTaken: false
391 isReturnAddressTaken: false
392 hasStackMap: false
393 hasPatchPoint: false
394 stackSize: 0
395 offsetAdjustment: 0
396 maxAlignment: 0
397 adjustsStack: false
398 hasCalls: false
399 maxCallFrameSize: 0
400 hasOpaqueSPAdjustment: false
401 hasVAStart: false
402 hasMustTailInVarArgFunc: false
403 body: |
404 bb.0 (%ir-block.0):
405 liveins: %sgpr0_sgpr1, %vgpr0
406
407 %3 = COPY %vgpr0
408 %0 = COPY %sgpr0_sgpr1
409 %4 = S_LOAD_DWORDX2_IMM %0, 9, 0 :: (non-temporal dereferenceable invariant load 8 from `i64 addrspace(2)* undef`)
410 %5 = S_LOAD_DWORDX2_IMM %0, 11, 0 :: (non-temporal dereferenceable invariant load 8 from `i64 addrspace(2)* undef`)
411 %26 = V_ASHRREV_I32_e32 31, %3, implicit %exec
412 %27 = REG_SEQUENCE %3, 1, %26, 2
413 %10 = S_MOV_B32 61440
414 %11 = S_MOV_B32 0
415 %12 = REG_SEQUENCE killed %11, 1, killed %10, 2
416 %13 = REG_SEQUENCE killed %5, 17, %12, 18
417 %28 = V_LSHL_B64 killed %27, 2, implicit %exec
418 %16 = REG_SEQUENCE killed %4, 17, %12, 18
419 %17 = BUFFER_LOAD_DWORD_ADDR64 %28, %13, 0, 0, 0, 0, 0, implicit %exec :: (volatile load 4 from %ir.a.ptr)
420 %19 = BUFFER_LOAD_DWORD_ADDR64 %28, %13, 0, 4, 0, 0, 0, implicit %exec :: (volatile load 4 from %ir.b.ptr)
421 %9 = S_MOV_B64 0
422 %29, %vcc = V_ADDC_U32_e64 %19, %17, %9, implicit %exec
423 %24 = V_CNDMASK_B32_e64 0, 1, killed %vcc, implicit %exec
424 BUFFER_STORE_DWORD_ADDR64 %24, %28, killed %16, 0, 0, 0, 0, 0, implicit %exec :: (volatile store 4 into %ir.out.gep)
425 S_ENDPGM
426
427 ...
428 ---
429 # GCN-LABEL: name: shrink_addc_vop3{{$}}
430 # GCN: %29 = V_ADDC_U32_e32 %17, %19, implicit-def %vcc, implicit %vcc, implicit %exec
431 # GCN %24 = V_CNDMASK_B32_e64 0, 1, killed %vcc, implicit %exec
432
433 name: shrink_addc_vop3
434 alignment: 0
435 exposesReturnsTwice: false
436 legalized: false
437 regBankSelected: false
438 selected: false
439 tracksRegLiveness: true
440 registers:
441 - { id: 0, class: sgpr_64 }
442 - { id: 1, class: sreg_32_xm0 }
443 - { id: 2, class: sgpr_32 }
444 - { id: 3, class: vgpr_32 }
445 - { id: 4, class: sreg_64_xexec }
446 - { id: 5, class: sreg_64_xexec }
447 - { id: 6, class: sreg_32 }
448 - { id: 7, class: sreg_32 }
449 - { id: 8, class: sreg_32_xm0 }
450 - { id: 9, class: sreg_64 }
451 - { id: 10, class: sreg_32_xm0 }
452 - { id: 11, class: sreg_32_xm0 }
453 - { id: 12, class: sgpr_64 }
454 - { id: 13, class: sgpr_128 }
455 - { id: 14, class: sreg_32_xm0 }
456 - { id: 15, class: sreg_64 }
457 - { id: 16, class: sgpr_128 }
458 - { id: 17, class: vgpr_32 }
459 - { id: 18, class: vreg_64 }
460 - { id: 19, class: vgpr_32 }
461 - { id: 20, class: vreg_64 }
462 - { id: 21, class: sreg_32_xm0 }
463 - { id: 22, class: sreg_32 }
464 - { id: 23, class: sreg_32 }
465 - { id: 24, class: vgpr_32 }
466 - { id: 25, class: vreg_64 }
467 - { id: 26, class: vgpr_32 }
468 - { id: 27, class: vreg_64 }
469 - { id: 28, class: vreg_64 }
470 - { id: 29, class: vgpr_32 }
471 liveins:
472 - { reg: '%sgpr0_sgpr1', virtual-reg: '%0' }
473 - { reg: '%vgpr0', virtual-reg: '%3' }
474 frameInfo:
475 isFrameAddressTaken: false
476 isReturnAddressTaken: false
477 hasStackMap: false
478 hasPatchPoint: false
479 stackSize: 0
480 offsetAdjustment: 0
481 maxAlignment: 0
482 adjustsStack: false
483 hasCalls: false
484 maxCallFrameSize: 0
485 hasOpaqueSPAdjustment: false
486 hasVAStart: false
487 hasMustTailInVarArgFunc: false
488 body: |
489 bb.0 (%ir-block.0):
490 liveins: %sgpr0_sgpr1, %vgpr0
491
492 %3 = COPY %vgpr0
493 %0 = COPY %sgpr0_sgpr1
494 %4 = S_LOAD_DWORDX2_IMM %0, 9, 0 :: (non-temporal dereferenceable invariant load 8 from `i64 addrspace(2)* undef`)
495 %5 = S_LOAD_DWORDX2_IMM %0, 11, 0 :: (non-temporal dereferenceable invariant load 8 from `i64 addrspace(2)* undef`)
496 %26 = V_ASHRREV_I32_e32 31, %3, implicit %exec
497 %27 = REG_SEQUENCE %3, 1, %26, 2
498 %10 = S_MOV_B32 61440
499 %11 = S_MOV_B32 0
500 %12 = REG_SEQUENCE killed %11, 1, killed %10, 2
501 %13 = REG_SEQUENCE killed %5, 17, %12, 18
502 %28 = V_LSHL_B64 killed %27, 2, implicit %exec
503 %16 = REG_SEQUENCE killed %4, 17, %12, 18
504 %17 = BUFFER_LOAD_DWORD_ADDR64 %28, %13, 0, 0, 0, 0, 0, implicit %exec :: (volatile load 4 from %ir.a.ptr)
505 %19 = BUFFER_LOAD_DWORD_ADDR64 %28, %13, 0, 4, 0, 0, 0, implicit %exec :: (volatile load 4 from %ir.b.ptr)
506 %vcc = S_MOV_B64 0
507 %29, %vcc = V_ADDC_U32_e64 %19, %17, %vcc, implicit %exec
508 %24 = V_CNDMASK_B32_e64 0, 1, killed %vcc, implicit %exec
509 BUFFER_STORE_DWORD_ADDR64 %24, %28, killed %16, 0, 0, 0, 0, 0, implicit %exec :: (volatile store 4 into %ir.out.gep)
510 S_ENDPGM
511
512 ...
513
514 ---
515 # GCN-LABEL: name: shrink_addc_undef_vcc{{$}}
516 # GCN: %29 = V_ADDC_U32_e32 %17, %19, implicit-def %vcc, implicit undef %vcc, implicit %exec
517 # GCN: %24 = V_CNDMASK_B32_e64 0, 1, killed %vcc, implicit %exec
518 name: shrink_addc_undef_vcc
519 alignment: 0
520 exposesReturnsTwice: false
521 legalized: false
522 regBankSelected: false
523 selected: false
524 tracksRegLiveness: true
525 registers:
526 - { id: 0, class: sgpr_64 }
527 - { id: 1, class: sreg_32_xm0 }
528 - { id: 2, class: sgpr_32 }
529 - { id: 3, class: vgpr_32 }
530 - { id: 4, class: sreg_64_xexec }
531 - { id: 5, class: sreg_64_xexec }
532 - { id: 6, class: sreg_32 }
533 - { id: 7, class: sreg_32 }
534 - { id: 8, class: sreg_32_xm0 }
535 - { id: 9, class: sreg_64 }
536 - { id: 10, class: sreg_32_xm0 }
537 - { id: 11, class: sreg_32_xm0 }
538 - { id: 12, class: sgpr_64 }
539 - { id: 13, class: sgpr_128 }
540 - { id: 14, class: sreg_32_xm0 }
541 - { id: 15, class: sreg_64 }
542 - { id: 16, class: sgpr_128 }
543 - { id: 17, class: vgpr_32 }
544 - { id: 18, class: vreg_64 }
545 - { id: 19, class: vgpr_32 }
546 - { id: 20, class: vreg_64 }
547 - { id: 21, class: sreg_32_xm0 }
548 - { id: 22, class: sreg_32 }
549 - { id: 23, class: sreg_32 }
550 - { id: 24, class: vgpr_32 }
551 - { id: 25, class: vreg_64 }
552 - { id: 26, class: vgpr_32 }
553 - { id: 27, class: vreg_64 }
554 - { id: 28, class: vreg_64 }
555 - { id: 29, class: vgpr_32 }
556 liveins:
557 - { reg: '%sgpr0_sgpr1', virtual-reg: '%0' }
558 - { reg: '%vgpr0', virtual-reg: '%3' }
559 frameInfo:
560 isFrameAddressTaken: false
561 isReturnAddressTaken: false
562 hasStackMap: false
563 hasPatchPoint: false
564 stackSize: 0
565 offsetAdjustment: 0
566 maxAlignment: 0
567 adjustsStack: false
568 hasCalls: false
569 maxCallFrameSize: 0
570 hasOpaqueSPAdjustment: false
571 hasVAStart: false
572 hasMustTailInVarArgFunc: false
573 body: |
574 bb.0 (%ir-block.0):
575 liveins: %sgpr0_sgpr1, %vgpr0
576
577 %3 = COPY %vgpr0
578 %0 = COPY %sgpr0_sgpr1
579 %4 = S_LOAD_DWORDX2_IMM %0, 9, 0 :: (non-temporal dereferenceable invariant load 8 from `i64 addrspace(2)* undef`)
580 %5 = S_LOAD_DWORDX2_IMM %0, 11, 0 :: (non-temporal dereferenceable invariant load 8 from `i64 addrspace(2)* undef`)
581 %26 = V_ASHRREV_I32_e32 31, %3, implicit %exec
582 %27 = REG_SEQUENCE %3, 1, %26, 2
583 %10 = S_MOV_B32 61440
584 %11 = S_MOV_B32 0
585 %12 = REG_SEQUENCE killed %11, 1, killed %10, 2
586 %13 = REG_SEQUENCE killed %5, 17, %12, 18
587 %28 = V_LSHL_B64 killed %27, 2, implicit %exec
588 %16 = REG_SEQUENCE killed %4, 17, %12, 18
589 %17 = BUFFER_LOAD_DWORD_ADDR64 %28, %13, 0, 0, 0, 0, 0, implicit %exec :: (volatile load 4 from %ir.a.ptr)
590 %19 = BUFFER_LOAD_DWORD_ADDR64 %28, %13, 0, 4, 0, 0, 0, implicit %exec :: (volatile load 4 from %ir.b.ptr)
591 %29, %vcc = V_ADDC_U32_e64 %19, %17, undef %vcc, implicit %exec
592 %24 = V_CNDMASK_B32_e64 0, 1, killed %vcc, implicit %exec
593 BUFFER_STORE_DWORD_ADDR64 %24, %28, killed %16, 0, 0, 0, 0, 0, implicit %exec :: (volatile store 4 into %ir.out.gep)
594 S_ENDPGM
595
596 ...