llvm.org GIT mirror llvm / 8647750
Add X86 code emitter support AVX encoded MRMDestReg instructions. Previously we weren't skipping the VVVV encoded register. Based on patch by Michael Liao. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@177221 91177308-0d34-0410-b5e6-96231b3b80d8 Craig Topper 7 years ago
4 changed file(s) with 95 addition(s) and 33 deletion(s). Raw diff Collapse all Expand all
673673 // MRMDestReg instructions forms:
674674 // dst(ModR/M), src(ModR/M)
675675 // dst(ModR/M), src(ModR/M), imm8
676 if (X86II::isX86_64ExtendedReg(MI.getOperand(0).getReg()))
676 // dst(ModR/M), src1(VEX_4V), src2(ModR/M)
677 if (X86II::isX86_64ExtendedReg(MI.getOperand(CurOp).getReg()))
677678 VEX_B = 0x0;
678 if (X86II::isX86_64ExtendedReg(MI.getOperand(1).getReg()))
679 CurOp++;
680
681 if (HasVEX_4V)
682 VEX_4V = getVEXRegisterEncoding(MI, CurOp++);
683
684 if (X86II::isX86_64ExtendedReg(MI.getOperand(CurOp).getReg()))
679685 VEX_R = 0x0;
680686 break;
681687 case X86II::MRM0r: case X86II::MRM1r:
10451051
10461052 case X86II::MRMDestReg:
10471053 EmitByte(BaseOpcode, CurByte, OS);
1054 SrcRegNum = CurOp + 1;
1055
1056 if (HasVEX_4V) // Skip 1st src (which is encoded in VEX_VVVV)
1057 ++SrcRegNum;
1058
10481059 EmitRegModRMByte(MI.getOperand(CurOp),
1049 GetX86RegNum(MI.getOperand(CurOp+1)), CurByte, OS);
1050 CurOp += 2;
1060 GetX86RegNum(MI.getOperand(SrcRegNum)), CurByte, OS);
1061 CurOp = SrcRegNum + 1;
10511062 break;
10521063
10531064 case X86II::MRMDestMem:
10461046 // MRMDestReg instructions forms:
10471047 // dst(ModR/M), src(ModR/M)
10481048 // dst(ModR/M), src(ModR/M), imm8
1049 if (X86II::isX86_64ExtendedReg(MI.getOperand(0).getReg()))
1049 // dst(ModR/M), src1(VEX_4V), src2(ModR/M)
1050 if (X86II::isX86_64ExtendedReg(MI.getOperand(CurOp).getReg()))
10501051 VEX_B = 0x0;
1051 if (X86II::isX86_64ExtendedReg(MI.getOperand(1).getReg()))
1052 CurOp++;
1053
1054 if (HasVEX_4V)
1055 VEX_4V = getVEXRegisterEncoding(MI, CurOp++);
1056
1057 if (X86II::isX86_64ExtendedReg(MI.getOperand(CurOp).getReg()))
10521058 VEX_R = 0x0;
10531059 break;
10541060 case X86II::MRM0r: case X86II::MRM1r:
12831289
12841290 case X86II::MRMDestReg: {
12851291 MCE.emitByte(BaseOpcode);
1292
1293 unsigned SrcRegNum = CurOp+1;
1294 if (HasVEX_4V) // Skip 1st src (which is encoded in VEX_VVVV)
1295 SrcRegNum++;
1296
12861297 emitRegModRMByte(MI.getOperand(CurOp).getReg(),
1287 getX86RegNum(MI.getOperand(CurOp+1).getReg()));
1288 CurOp += 2;
1298 getX86RegNum(MI.getOperand(SrcRegNum).getReg()));
1299 CurOp = SrcRegNum + 1;
12891300 break;
12901301 }
12911302 case X86II::MRMDestMem: {
416416 case X86::VMOVDQAYrr:
417417 case X86::VMOVDQUrr:
418418 case X86::VMOVDQUYrr:
419 case X86::VMOVSDrr:
420 case X86::VMOVSSrr:
421419 case X86::VMOVUPDrr:
422420 case X86::VMOVUPDYrr:
423421 case X86::VMOVUPSrr:
424422 case X86::VMOVUPSYrr: {
425 if (X86II::isX86_64ExtendedReg(OutMI.getOperand(0).getReg()) &&
426 !X86II::isX86_64ExtendedReg(OutMI.getOperand(1).getReg()))
427 break;
428
429 unsigned NewOpc;
430 switch (OutMI.getOpcode()) {
431 default: llvm_unreachable("Invalid opcode");
432 case X86::VMOVAPDrr: NewOpc = X86::VMOVAPDrr_REV; break;
433 case X86::VMOVAPDYrr: NewOpc = X86::VMOVAPDYrr_REV; break;
434 case X86::VMOVAPSrr: NewOpc = X86::VMOVAPSrr_REV; break;
435 case X86::VMOVAPSYrr: NewOpc = X86::VMOVAPSYrr_REV; break;
436 case X86::VMOVDQArr: NewOpc = X86::VMOVDQArr_REV; break;
437 case X86::VMOVDQAYrr: NewOpc = X86::VMOVDQAYrr_REV; break;
438 case X86::VMOVDQUrr: NewOpc = X86::VMOVDQUrr_REV; break;
439 case X86::VMOVDQUYrr: NewOpc = X86::VMOVDQUYrr_REV; break;
440 case X86::VMOVSDrr: NewOpc = X86::VMOVSDrr_REV; break;
441 case X86::VMOVSSrr: NewOpc = X86::VMOVSSrr_REV; break;
442 case X86::VMOVUPDrr: NewOpc = X86::VMOVUPDrr_REV; break;
443 case X86::VMOVUPDYrr: NewOpc = X86::VMOVUPDYrr_REV; break;
444 case X86::VMOVUPSrr: NewOpc = X86::VMOVUPSrr_REV; break;
445 case X86::VMOVUPSYrr: NewOpc = X86::VMOVUPSYrr_REV; break;
446 }
447 OutMI.setOpcode(NewOpc);
423 if (!X86II::isX86_64ExtendedReg(OutMI.getOperand(0).getReg()) &&
424 X86II::isX86_64ExtendedReg(OutMI.getOperand(1).getReg())) {
425 unsigned NewOpc;
426 switch (OutMI.getOpcode()) {
427 default: llvm_unreachable("Invalid opcode");
428 case X86::VMOVAPDrr: NewOpc = X86::VMOVAPDrr_REV; break;
429 case X86::VMOVAPDYrr: NewOpc = X86::VMOVAPDYrr_REV; break;
430 case X86::VMOVAPSrr: NewOpc = X86::VMOVAPSrr_REV; break;
431 case X86::VMOVAPSYrr: NewOpc = X86::VMOVAPSYrr_REV; break;
432 case X86::VMOVDQArr: NewOpc = X86::VMOVDQArr_REV; break;
433 case X86::VMOVDQAYrr: NewOpc = X86::VMOVDQAYrr_REV; break;
434 case X86::VMOVDQUrr: NewOpc = X86::VMOVDQUrr_REV; break;
435 case X86::VMOVDQUYrr: NewOpc = X86::VMOVDQUYrr_REV; break;
436 case X86::VMOVUPDrr: NewOpc = X86::VMOVUPDrr_REV; break;
437 case X86::VMOVUPDYrr: NewOpc = X86::VMOVUPDYrr_REV; break;
438 case X86::VMOVUPSrr: NewOpc = X86::VMOVUPSrr_REV; break;
439 case X86::VMOVUPSYrr: NewOpc = X86::VMOVUPSYrr_REV; break;
440 }
441 OutMI.setOpcode(NewOpc);
442 }
443 break;
444 }
445 case X86::VMOVSDrr:
446 case X86::VMOVSSrr: {
447 if (!X86II::isX86_64ExtendedReg(OutMI.getOperand(0).getReg()) &&
448 X86II::isX86_64ExtendedReg(OutMI.getOperand(2).getReg())) {
449 unsigned NewOpc;
450 switch (OutMI.getOpcode()) {
451 default: llvm_unreachable("Invalid opcode");
452 case X86::VMOVSDrr: NewOpc = X86::VMOVSDrr_REV; break;
453 case X86::VMOVSSrr: NewOpc = X86::VMOVSSrr_REV; break;
454 }
455 OutMI.setOpcode(NewOpc);
456 }
448457 break;
449458 }
450459
0 ; RUN: llc < %s -mtriple=x86_64-apple-macosx10.8.0 -mcpu=core-avx-i -show-mc-encoding
1
2 ; ModuleID = 'bugpoint-reduced-simplified.bc'
3 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
4 target triple = "x86_64-apple-macosx10.8.0"
5
6 @b = external global [8 x float], align 32
7 @e = external global [8 x float], align 16
8
9 define void @main() #0 {
10 entry:
11 %0 = load <8 x float>* bitcast ([8 x float]* @b to <8 x float>*), align 32, !tbaa !0
12 %bitcast.i = extractelement <8 x float> %0, i32 0
13 %vecinit.i.i = insertelement <4 x float> undef, float %bitcast.i, i32 0
14 %vecinit2.i.i = insertelement <4 x float> %vecinit.i.i, float 0.000000e+00, i32 1
15 %vecinit3.i.i = insertelement <4 x float> %vecinit2.i.i, float 0.000000e+00, i32 2
16 %vecinit4.i.i = insertelement <4 x float> %vecinit3.i.i, float 0.000000e+00, i32 3
17 %1 = tail call <4 x float> @llvm.x86.sse.rcp.ss(<4 x float> %vecinit4.i.i) #2
18 %vecext.i.i = extractelement <4 x float> %1, i32 0
19 store float %vecext.i.i, float* getelementptr inbounds ([8 x float]* @e, i64 0, i64 0), align 16, !tbaa !0
20 unreachable
21 }
22
23 declare <4 x float> @llvm.x86.sse.rcp.ss(<4 x float>) #1
24
25 attributes #0 = { nounwind ssp uwtable "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf"="true" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "unsafe-fp-math"="false" "use-soft-float"="false" }
26 attributes #1 = { nounwind readnone }
27 attributes #2 = { nounwind }
28
29 !0 = metadata !{metadata !"omnipotent char", metadata !1}
30 !1 = metadata !{metadata !"Simple C/C++ TBAA"}