llvm.org GIT mirror llvm / 72062f5
Add AArch64 as an experimental target. This patch adds support for AArch64 (ARM's 64-bit architecture) to LLVM in the "experimental" category. Currently, it won't be built unless requested explicitly. This initial commit should have support for: + Assembly of all scalar (i.e. non-NEON, non-Crypto) instructions (except the late addition CRC instructions). + CodeGen features required for C++03 and C99. + Compilation for the "small" memory model: code+static data < 4GB. + Absolute and position-independent code. + GNU-style (i.e. "__thread") TLS. + Debugging information. The principal omission, currently, is performance tuning. This patch excludes the NEON support also reviewed due to an outbreak of batshit insanity in our legal department. That will be committed soon bringing the changes to precisely what has been approved. Further reviews would be gratefully received. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@174054 91177308-0d34-0410-b5e6-96231b3b80d8 Tim Northover 6 years ago
192 changed file(s) with 45628 addition(s) and 13 deletion(s). Raw diff Collapse all Expand all
250250 | alpha64 | alpha64ev[4-8] | alpha64ev56 | alpha64ev6[78] | alpha64pca5[67] \
251251 | am33_2.0 \
252252 | arc | arm | arm[bl]e | arme[lb] | armv[2345] | armv[345][lb] | avr | avr32 \
253 | be32 | be64 \
253 | aarch64 \
254 | be32 | be64 \
254255 | bfin \
255256 | c4x | clipper \
256257 | d10v | d30v | dlx | dsp16xx \
358359 | alpha64-* | alpha64ev[4-8]-* | alpha64ev56-* | alpha64ev6[78]-* \
359360 | alphapca5[67]-* | alpha64pca5[67]-* | arc-* \
360361 | arm-* | armbe-* | armle-* | armeb-* | armv*-* \
362 | aarch64-* \
361363 | avr-* | avr32-* \
362364 | be32-* | be64-* \
363365 | bfin-* | bs2000-* \
388388 sparc*-*) llvm_cv_target_arch="Sparc" ;;
389389 powerpc*-*) llvm_cv_target_arch="PowerPC" ;;
390390 arm*-*) llvm_cv_target_arch="ARM" ;;
391 aarch64*-*) llvm_cv_target_arch="AArch64" ;;
391392 mips-* | mips64-*) llvm_cv_target_arch="Mips" ;;
392393 mipsel-* | mips64el-*) llvm_cv_target_arch="Mips" ;;
393394 xcore-*) llvm_cv_target_arch="XCore" ;;
421422 sparc*-*) host_arch="Sparc" ;;
422423 powerpc*-*) host_arch="PowerPC" ;;
423424 arm*-*) host_arch="ARM" ;;
425 aarch64*-*) host_arch="AArch64" ;;
424426 mips-* | mips64-*) host_arch="Mips" ;;
425427 mipsel-* | mips64el-*) host_arch="Mips" ;;
426428 xcore-*) host_arch="XCore" ;;
639641 PowerPC) AC_SUBST(TARGET_HAS_JIT,1) ;;
640642 x86_64) AC_SUBST(TARGET_HAS_JIT,1) ;;
641643 ARM) AC_SUBST(TARGET_HAS_JIT,1) ;;
644 AArch64) AC_SUBST(TARGET_HAS_JIT,0) ;;
642645 Mips) AC_SUBST(TARGET_HAS_JIT,1) ;;
643646 XCore) AC_SUBST(TARGET_HAS_JIT,0) ;;
644647 MSP430) AC_SUBST(TARGET_HAS_JIT,0) ;;
770773 TARGETS_TO_BUILD=""
771774 AC_ARG_ENABLE([targets],AS_HELP_STRING([--enable-targets],
772775 [Build specific host targets: all or target1,target2,... Valid targets are:
773 host, x86, x86_64, sparc, powerpc, arm, mips, hexagon,
776 host, x86, x86_64, sparc, powerpc, arm, aarch64, mips, hexagon,
774777 xcore, msp430, nvptx, and cpp (default=all)]),,
775778 enableval=all)
776779 if test "$enableval" = host-only ; then
14371437 YES)
14381438 --enable-targets Build specific host targets: all or
14391439 target1,target2,... Valid targets are: host, x86,
1440 x86_64, sparc, powerpc, arm, mips, hexagon, xcore,
1441 msp430, nvptx, and cpp (default=all)
1440 x86_64, sparc, powerpc, arm, aarch64, mips, hexagon,
1441 xcore, msp430, nvptx, and cpp (default=all)
14421442 --enable-experimental-targets
14431443 Build experimental host targets: disable or
14441444 target1,target2,... (default=disable)
40074007 sparc*-*) llvm_cv_target_arch="Sparc" ;;
40084008 powerpc*-*) llvm_cv_target_arch="PowerPC" ;;
40094009 arm*-*) llvm_cv_target_arch="ARM" ;;
4010 aarch64*-*) llvm_cv_target_arch="AArch64" ;;
40104011 mips-* | mips64-*) llvm_cv_target_arch="Mips" ;;
40114012 mipsel-* | mips64el-*) llvm_cv_target_arch="Mips" ;;
40124013 xcore-*) llvm_cv_target_arch="XCore" ;;
40404041 sparc*-*) host_arch="Sparc" ;;
40414042 powerpc*-*) host_arch="PowerPC" ;;
40424043 arm*-*) host_arch="ARM" ;;
4044 aarch64*-*) host_arch="AArch64" ;;
40434045 mips-* | mips64-*) host_arch="Mips" ;;
40444046 mipsel-* | mips64el-*) host_arch="Mips" ;;
40454047 xcore-*) host_arch="XCore" ;;
53715373 x86_64) TARGET_HAS_JIT=1
53725374 ;;
53735375 ARM) TARGET_HAS_JIT=1
5376 ;;
5377 AArch64) TARGET_HAS_JIT=0
53745378 ;;
53755379 Mips) TARGET_HAS_JIT=1
53765380 ;;
1048810492 lt_dlunknown=0; lt_dlno_uscore=1; lt_dlneed_uscore=2
1048910493 lt_status=$lt_dlunknown
1049010494 cat > conftest.$ac_ext <
10491 #line 10492 "configure"
10495 #line 10496 "configure"
1049210496 #include "confdefs.h"
1049310497
1049410498 #if HAVE_DLFCN_H
2020 * `ARM documentation `_ (`Processor Cores `_ Cores)
2121
2222 * `ABI `_
23
24 AArch64
25 -------
26
27 * `ARMv8 Instruction Set Overview `_
2328
2429 Itanium (ia64)
2530 --------------
98103 -----
99104
100105 * `PowerPC 64-bit ELF ABI Supplement `_
106 * `Procedure Call Standard for the AArch64 Architecture `_
107 * `ELF for the ARM 64-bit Architecture (AArch64) `_
101108
102109 OS X
103110 ----
4343 UnknownArch,
4444
4545 arm, // ARM; arm, armv.*, xscale
46 aarch64, // AArch64: aarch64
4647 hexagon, // Hexagon: hexagon
4748 mips, // MIPS: mips, mipsallegrex
4849 mipsel, // MIPSEL: mipsel, mipsallegrexel
471471 virtual void AddValueSymbols(MCAssembler *) const = 0;
472472 virtual const MCSection *FindAssociatedSection() const = 0;
473473
474 virtual void fixELFSymbolsInTLSFixups(MCAssembler &) const = 0;
475
474476 static bool classof(const MCExpr *E) {
475477 return E->getKind() == MCExpr::Target;
476478 }
99 #ifndef LLVM_MC_MCOBJECTWRITER_H
1010 #define LLVM_MC_MCOBJECTWRITER_H
1111
12 #include "llvm/ADT/SmallVector.h"
1213 #include "llvm/Support/Compiler.h"
1314 #include "llvm/Support/DataTypes.h"
1415 #include "llvm/Support/raw_ostream.h"
16191619 LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_386_TLS_DESC_CALL);
16201620 LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_386_TLS_DESC);
16211621 LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_386_IRELATIVE);
1622 default:
1623 res = "Unknown";
1624 }
1625 break;
1626 case ELF::EM_AARCH64:
1627 switch (type) {
1628 LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_AARCH64_NONE);
1629 LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_AARCH64_ABS64);
1630 LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_AARCH64_ABS32);
1631 LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_AARCH64_ABS16);
1632 LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_AARCH64_PREL64);
1633 LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_AARCH64_PREL32);
1634 LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_AARCH64_PREL16);
1635 LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_AARCH64_MOVW_UABS_G0);
1636 LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_AARCH64_MOVW_UABS_G0_NC);
1637 LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_AARCH64_MOVW_UABS_G1);
1638 LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_AARCH64_MOVW_UABS_G1_NC);
1639 LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_AARCH64_MOVW_UABS_G2);
1640 LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_AARCH64_MOVW_UABS_G2_NC);
1641 LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_AARCH64_MOVW_UABS_G3);
1642 LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_AARCH64_MOVW_SABS_G0);
1643 LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_AARCH64_MOVW_SABS_G1);
1644 LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_AARCH64_MOVW_SABS_G2);
1645 LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_AARCH64_LD_PREL_LO19);
1646 LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_AARCH64_ADR_PREL_LO21);
1647 LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_AARCH64_ADR_PREL_PG_HI21);
1648 LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_AARCH64_ADD_ABS_LO12_NC);
1649 LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_AARCH64_LDST8_ABS_LO12_NC);
1650 LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_AARCH64_TSTBR14);
1651 LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_AARCH64_CONDBR19);
1652 LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_AARCH64_JUMP26);
1653 LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_AARCH64_CALL26);
1654 LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_AARCH64_LDST16_ABS_LO12_NC);
1655 LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_AARCH64_LDST32_ABS_LO12_NC);
1656 LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_AARCH64_LDST64_ABS_LO12_NC);
1657 LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_AARCH64_LDST128_ABS_LO12_NC);
1658 LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_AARCH64_ADR_GOT_PAGE);
1659 LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_AARCH64_LD64_GOT_LO12_NC);
1660 LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_AARCH64_TLSLD_MOVW_DTPREL_G2);
1661 LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_AARCH64_TLSLD_MOVW_DTPREL_G1);
1662 LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_AARCH64_TLSLD_MOVW_DTPREL_G1_NC);
1663 LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_AARCH64_TLSLD_MOVW_DTPREL_G0);
1664 LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_AARCH64_TLSLD_MOVW_DTPREL_G0_NC);
1665 LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_AARCH64_TLSLD_ADD_DTPREL_HI12);
1666 LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_AARCH64_TLSLD_ADD_DTPREL_LO12);
1667 LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_AARCH64_TLSLD_ADD_DTPREL_LO12_NC);
1668 LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_AARCH64_TLSLD_LDST8_DTPREL_LO12);
1669 LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_AARCH64_TLSLD_LDST8_DTPREL_LO12_NC);
1670 LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_AARCH64_TLSLD_LDST16_DTPREL_LO12);
1671 LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_AARCH64_TLSLD_LDST16_DTPREL_LO12_NC);
1672 LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_AARCH64_TLSLD_LDST32_DTPREL_LO12);
1673 LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_AARCH64_TLSLD_LDST32_DTPREL_LO12_NC);
1674 LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_AARCH64_TLSLD_LDST64_DTPREL_LO12);
1675 LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_AARCH64_TLSLD_LDST64_DTPREL_LO12_NC);
1676 LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_AARCH64_TLSIE_MOVW_GOTTPREL_G1);
1677 LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_AARCH64_TLSIE_MOVW_GOTTPREL_G0_NC);
1678 LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_AARCH64_TLSIE_ADR_GOTTPREL_PAGE21);
1679 LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_AARCH64_TLSIE_LD64_GOTTPREL_LO12_NC);
1680 LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_AARCH64_TLSIE_LD_GOTTPREL_PREL19);
1681 LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_AARCH64_TLSLE_MOVW_TPREL_G2);
1682 LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_AARCH64_TLSLE_MOVW_TPREL_G1);
1683 LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_AARCH64_TLSLE_MOVW_TPREL_G1_NC);
1684 LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_AARCH64_TLSLE_MOVW_TPREL_G0);
1685 LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_AARCH64_TLSLE_MOVW_TPREL_G0_NC);
1686 LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_AARCH64_TLSLE_ADD_TPREL_HI12);
1687 LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_AARCH64_TLSLE_ADD_TPREL_LO12);
1688 LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_AARCH64_TLSLE_ADD_TPREL_LO12_NC);
1689 LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_AARCH64_TLSLE_LDST8_TPREL_LO12);
1690 LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_AARCH64_TLSLE_LDST8_TPREL_LO12_NC);
1691 LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_AARCH64_TLSLE_LDST16_TPREL_LO12);
1692 LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_AARCH64_TLSLE_LDST16_TPREL_LO12_NC);
1693 LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_AARCH64_TLSLE_LDST32_TPREL_LO12);
1694 LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_AARCH64_TLSLE_LDST32_TPREL_LO12_NC);
1695 LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_AARCH64_TLSLE_LDST64_TPREL_LO12);
1696 LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_AARCH64_TLSLE_LDST64_TPREL_LO12_NC);
1697 LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_AARCH64_TLSDESC_ADR_PAGE);
1698 LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_AARCH64_TLSDESC_LD64_LO12_NC);
1699 LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_AARCH64_TLSDESC_ADD_LO12_NC);
1700 LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_AARCH64_TLSDESC_CALL);
1701
16221702 default:
16231703 res = "Unknown";
16241704 }
19362016 res = "Unknown";
19372017 }
19382018 break;
2019 case ELF::EM_AARCH64:
19392020 case ELF::EM_ARM:
19402021 case ELF::EM_HEXAGON:
19412022 res = symname;
23552436 return "ELF64-i386";
23562437 case ELF::EM_X86_64:
23572438 return "ELF64-x86-64";
2439 case ELF::EM_AARCH64:
2440 return "ELF64-aarch64";
23582441 case ELF::EM_PPC64:
23592442 return "ELF64-ppc64";
23602443 default:
23732456 return Triple::x86;
23742457 case ELF::EM_X86_64:
23752458 return Triple::x86_64;
2459 case ELF::EM_AARCH64:
2460 return Triple::aarch64;
23762461 case ELF::EM_ARM:
23772462 return Triple::arm;
23782463 case ELF::EM_HEXAGON:
270270 EM_SLE9X = 179, // Infineon Technologies SLE9X core
271271 EM_L10M = 180, // Intel L10M
272272 EM_K10M = 181, // Intel K10M
273 EM_AARCH64 = 183, // ARM AArch64
273274 EM_AVR32 = 185, // Atmel Corporation 32-bit microprocessor family
274275 EM_STM8 = 186, // STMicroeletronics STM8 8-bit microcontroller
275276 EM_TILE64 = 187, // Tilera TILE64 multicore architecture family
491492 R_PPC64_GOT_TPREL16_HA = 90,
492493 R_PPC64_TLSGD = 107,
493494 R_PPC64_TLSLD = 108
495 };
496
497 // ELF Relocation types for AArch64
498
499 enum {
500 R_AARCH64_NONE = 0x100,
501
502 R_AARCH64_ABS64 = 0x101,
503 R_AARCH64_ABS32 = 0x102,
504 R_AARCH64_ABS16 = 0x103,
505 R_AARCH64_PREL64 = 0x104,
506 R_AARCH64_PREL32 = 0x105,
507 R_AARCH64_PREL16 = 0x106,
508
509 R_AARCH64_MOVW_UABS_G0 = 0x107,
510 R_AARCH64_MOVW_UABS_G0_NC = 0x108,
511 R_AARCH64_MOVW_UABS_G1 = 0x109,
512 R_AARCH64_MOVW_UABS_G1_NC = 0x10a,
513 R_AARCH64_MOVW_UABS_G2 = 0x10b,
514 R_AARCH64_MOVW_UABS_G2_NC = 0x10c,
515 R_AARCH64_MOVW_UABS_G3 = 0x10d,
516 R_AARCH64_MOVW_SABS_G0 = 0x10e,
517 R_AARCH64_MOVW_SABS_G1 = 0x10f,
518 R_AARCH64_MOVW_SABS_G2 = 0x110,
519
520 R_AARCH64_LD_PREL_LO19 = 0x111,
521 R_AARCH64_ADR_PREL_LO21 = 0x112,
522 R_AARCH64_ADR_PREL_PG_HI21 = 0x113,
523 R_AARCH64_ADD_ABS_LO12_NC = 0x115,
524 R_AARCH64_LDST8_ABS_LO12_NC = 0x116,
525
526 R_AARCH64_TSTBR14 = 0x117,
527 R_AARCH64_CONDBR19 = 0x118,
528 R_AARCH64_JUMP26 = 0x11a,
529 R_AARCH64_CALL26 = 0x11b,
530
531 R_AARCH64_LDST16_ABS_LO12_NC = 0x11c,
532 R_AARCH64_LDST32_ABS_LO12_NC = 0x11d,
533 R_AARCH64_LDST64_ABS_LO12_NC = 0x11e,
534
535 R_AARCH64_LDST128_ABS_LO12_NC = 0x12b,
536
537 R_AARCH64_ADR_GOT_PAGE = 0x137,
538 R_AARCH64_LD64_GOT_LO12_NC = 0x138,
539
540 R_AARCH64_TLSLD_MOVW_DTPREL_G2 = 0x20b,
541 R_AARCH64_TLSLD_MOVW_DTPREL_G1 = 0x20c,
542 R_AARCH64_TLSLD_MOVW_DTPREL_G1_NC = 0x20d,
543 R_AARCH64_TLSLD_MOVW_DTPREL_G0 = 0x20e,
544 R_AARCH64_TLSLD_MOVW_DTPREL_G0_NC = 0x20f,
545 R_AARCH64_TLSLD_ADD_DTPREL_HI12 = 0x210,
546 R_AARCH64_TLSLD_ADD_DTPREL_LO12 = 0x211,
547 R_AARCH64_TLSLD_ADD_DTPREL_LO12_NC = 0x212,
548 R_AARCH64_TLSLD_LDST8_DTPREL_LO12 = 0x213,
549 R_AARCH64_TLSLD_LDST8_DTPREL_LO12_NC = 0x214,
550 R_AARCH64_TLSLD_LDST16_DTPREL_LO12 = 0x215,
551 R_AARCH64_TLSLD_LDST16_DTPREL_LO12_NC = 0x216,
552 R_AARCH64_TLSLD_LDST32_DTPREL_LO12 = 0x217,
553 R_AARCH64_TLSLD_LDST32_DTPREL_LO12_NC = 0x218,
554 R_AARCH64_TLSLD_LDST64_DTPREL_LO12 = 0x219,
555 R_AARCH64_TLSLD_LDST64_DTPREL_LO12_NC = 0x21a,
556
557 R_AARCH64_TLSIE_MOVW_GOTTPREL_G1 = 0x21b,
558 R_AARCH64_TLSIE_MOVW_GOTTPREL_G0_NC = 0x21c,
559 R_AARCH64_TLSIE_ADR_GOTTPREL_PAGE21 = 0x21d,
560 R_AARCH64_TLSIE_LD64_GOTTPREL_LO12_NC = 0x21e,
561 R_AARCH64_TLSIE_LD_GOTTPREL_PREL19 = 0x21f,
562
563 R_AARCH64_TLSLE_MOVW_TPREL_G2 = 0x220,
564 R_AARCH64_TLSLE_MOVW_TPREL_G1 = 0x221,
565 R_AARCH64_TLSLE_MOVW_TPREL_G1_NC = 0x222,
566 R_AARCH64_TLSLE_MOVW_TPREL_G0 = 0x223,
567 R_AARCH64_TLSLE_MOVW_TPREL_G0_NC = 0x224,
568 R_AARCH64_TLSLE_ADD_TPREL_HI12 = 0x225,
569 R_AARCH64_TLSLE_ADD_TPREL_LO12 = 0x226,
570 R_AARCH64_TLSLE_ADD_TPREL_LO12_NC = 0x227,
571 R_AARCH64_TLSLE_LDST8_TPREL_LO12 = 0x228,
572 R_AARCH64_TLSLE_LDST8_TPREL_LO12_NC = 0x229,
573 R_AARCH64_TLSLE_LDST16_TPREL_LO12 = 0x22a,
574 R_AARCH64_TLSLE_LDST16_TPREL_LO12_NC = 0x22b,
575 R_AARCH64_TLSLE_LDST32_TPREL_LO12 = 0x22c,
576 R_AARCH64_TLSLE_LDST32_TPREL_LO12_NC = 0x22d,
577 R_AARCH64_TLSLE_LDST64_TPREL_LO12 = 0x22e,
578 R_AARCH64_TLSLE_LDST64_TPREL_LO12_NC = 0x22f,
579
580 R_AARCH64_TLSDESC_ADR_PAGE = 0x232,
581 R_AARCH64_TLSDESC_LD64_LO12_NC = 0x233,
582 R_AARCH64_TLSDESC_ADD_LO12_NC = 0x234,
583
584 R_AARCH64_TLSDESC_CALL = 0x239
494585 };
495586
496587 // ARM Specific e_flags
299299
300300 void MCELFStreamer::fixSymbolsInTLSFixups(const MCExpr *expr) {
301301 switch (expr->getKind()) {
302 case MCExpr::Target: llvm_unreachable("Can't handle target exprs yet!");
302 case MCExpr::Target:
303 cast(expr)->fixELFSymbolsInTLSFixups(getAssembler());
304 break;
303305 case MCExpr::Constant:
304306 break;
305307
255255 TTypeEncoding = (CMModel == CodeModel::Small)
256256 ? dwarf::DW_EH_PE_udata4 : dwarf::DW_EH_PE_absptr;
257257 }
258 } else if (T.getArch() == Triple::aarch64) {
259 FDECFIEncoding = dwarf::DW_EH_PE_pcrel | dwarf::DW_EH_PE_sdata4;
260
261 // The small model guarantees static code/data size < 4GB, but not where it
262 // will be in memory. Most of these could end up >2GB away so even a signed
263 // pc-relative 32-bit address is insufficient, theoretically.
264 if (RelocM == Reloc::PIC_) {
265 PersonalityEncoding = dwarf::DW_EH_PE_indirect | dwarf::DW_EH_PE_pcrel |
266 dwarf::DW_EH_PE_sdata8;
267 LSDAEncoding = dwarf::DW_EH_PE_pcrel | dwarf::DW_EH_PE_sdata8;
268 FDEEncoding = dwarf::DW_EH_PE_pcrel | dwarf::DW_EH_PE_sdata4;
269 TTypeEncoding = dwarf::DW_EH_PE_indirect | dwarf::DW_EH_PE_pcrel |
270 dwarf::DW_EH_PE_sdata8;
271 } else {
272 PersonalityEncoding = dwarf::DW_EH_PE_absptr;
273 LSDAEncoding = dwarf::DW_EH_PE_absptr;
274 FDEEncoding = dwarf::DW_EH_PE_udata4;
275 TTypeEncoding = dwarf::DW_EH_PE_absptr;
276 }
258277 } else if (T.getArch() == Triple::ppc64) {
259278 PersonalityEncoding = dwarf::DW_EH_PE_indirect | dwarf::DW_EH_PE_pcrel |
260279 dwarf::DW_EH_PE_udata8;
1818 switch (Kind) {
1919 case UnknownArch: return "unknown";
2020
21 case aarch64: return "aarch64";
2122 case arm: return "arm";
2223 case hexagon: return "hexagon";
2324 case mips: return "mips";
5253 default:
5354 return 0;
5455
56 case aarch64: return "aarch64";
57
5558 case arm:
5659 case thumb: return "arm";
5760
151154
152155 Triple::ArchType Triple::getArchTypeForLLVMName(StringRef Name) {
153156 return StringSwitch(Name)
157 .Case("aarch64", aarch64)
154158 .Case("arm", arm)
155159 .Case("mips", mips)
156160 .Case("mipsel", mipsel)
214218 .Case("powerpc", Triple::ppc)
215219 .Cases("powerpc64", "ppu", Triple::ppc64)
216220 .Case("mblaze", Triple::mblaze)
221 .Case("aarch64", Triple::aarch64)
217222 .Cases("arm", "xscale", Triple::arm)
218223 // FIXME: It would be good to replace these with explicit names for all the
219224 // various suffixes supported.
675680 case llvm::Triple::spir:
676681 return 32;
677682
683 case llvm::Triple::aarch64:
678684 case llvm::Triple::mips64:
679685 case llvm::Triple::mips64el:
680686 case llvm::Triple::nvptx64:
703709 Triple T(*this);
704710 switch (getArch()) {
705711 case Triple::UnknownArch:
712 case Triple::aarch64:
706713 case Triple::msp430:
707714 T.setArch(UnknownArch);
708715 break;
754761 T.setArch(UnknownArch);
755762 break;
756763
764 case Triple::aarch64:
757765 case Triple::spir64:
758766 case Triple::mips64:
759767 case Triple::mips64el:
0 //==-- AArch64.h - Top-level interface for AArch64 representation -*- C++ -*-=//
1 //
2 // The LLVM Compiler Infrastructure
3 //
4 // This file is distributed under the University of Illinois Open Source
5 // License. See LICENSE.TXT for details.
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file contains the entry points for global functions defined in the LLVM
10 // AArch64 back-end.
11 //
12 //===----------------------------------------------------------------------===//
13
14 #ifndef LLVM_TARGET_AARCH64_H
15 #define LLVM_TARGET_AARCH64_H
16
17 #include "MCTargetDesc/AArch64MCTargetDesc.h"
18 #include "llvm/Target/TargetMachine.h"
19
20 namespace llvm {
21
22 class AArch64AsmPrinter;
23 class FunctionPass;
24 class AArch64TargetMachine;
25 class MachineInstr;
26 class MCInst;
27
28 FunctionPass *createAArch64ISelDAG(AArch64TargetMachine &TM,
29 CodeGenOpt::Level OptLevel);
30
31 FunctionPass *createAArch64ConstantIslandPass();
32
33 FunctionPass *createAArch64CleanupLocalDynamicTLSPass();
34
35 void LowerAArch64MachineInstrToMCInst(const MachineInstr *MI, MCInst &OutMI,
36 AArch64AsmPrinter &AP);
37
38
39 }
40
41 #endif
0 //===- AArch64.td - Describe the AArch64 Target Machine ---------*- tblgen -*-==//
1 //
2 // The LLVM Compiler Infrastructure
3 //
4 // This file is distributed under the University of Illinois Open Source
5 // License. See LICENSE.TXT for details.
6 //
7 //===----------------------------------------------------------------------===//
8 // This is the top level entry point for the AArch64 target.
9 //===----------------------------------------------------------------------===//
10
11 //===----------------------------------------------------------------------===//
12 // Target-independent interfaces
13 //===----------------------------------------------------------------------===//
14
15 include "llvm/Target/Target.td"
16
17 //===----------------------------------------------------------------------===//
18 // AArch64 Subtarget features.
19 //
20
21 def FeatureNEON : SubtargetFeature<"neon", "HasNEON", "true",
22 "Enable Advanced SIMD instructions">;
23
24 def FeatureCrypto : SubtargetFeature<"crypto", "HasCrypto", "true",
25 "Enable cryptographic instructions">;
26
27 //===----------------------------------------------------------------------===//
28 // AArch64 Processors
29 //
30
31 include "AArch64Schedule.td"
32
33 def : Processor<"generic", GenericItineraries, [FeatureNEON, FeatureCrypto]>;
34
35 //===----------------------------------------------------------------------===//
36 // Register File Description
37 //===----------------------------------------------------------------------===//
38
39 include "AArch64RegisterInfo.td"
40
41 include "AArch64CallingConv.td"
42
43 //===----------------------------------------------------------------------===//
44 // Instruction Descriptions
45 //===----------------------------------------------------------------------===//
46
47 include "AArch64InstrInfo.td"
48
49 def AArch64InstrInfo : InstrInfo;
50
51 //===----------------------------------------------------------------------===//
52 // Assembly printer
53 //===----------------------------------------------------------------------===//
54
55 def A64InstPrinter : AsmWriter {
56 string AsmWriterClassName = "InstPrinter";
57 bit isMCAsmWriter = 1;
58 }
59
60 //===----------------------------------------------------------------------===//
61 // Declare the target which we are implementing
62 //===----------------------------------------------------------------------===//
63
64 def AArch64 : Target {
65 let InstructionSet = AArch64InstrInfo;
66 let AssemblyWriters = [A64InstPrinter];
67 }
0 //===-- AArch64AsmPrinter.cpp - Print machine code to an AArch64 .s file --===//
1 //
2 // The LLVM Compiler Infrastructure
3 //
4 // This file is distributed under the University of Illinois Open Source
5 // License. See LICENSE.TXT for details.
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file contains a printer that converts from our internal representation
10 // of machine-dependent LLVM code to GAS-format AArch64 assembly language.
11 //
12 //===----------------------------------------------------------------------===//
13
14 #define DEBUG_TYPE "asm-printer"
15 #include "AArch64AsmPrinter.h"
16 #include "InstPrinter/AArch64InstPrinter.h"
17 #include "llvm/DebugInfo.h"
18 #include "llvm/ADT/SmallString.h"
19 #include "llvm/CodeGen/MachineConstantPool.h"
20 #include "llvm/CodeGen/MachineModuleInfoImpls.h"
21 #include "llvm/CodeGen/TargetLoweringObjectFileImpl.h"
22 #include "llvm/MC/MCAsmInfo.h"
23 #include "llvm/MC/MCInst.h"
24 #include "llvm/MC/MCSymbol.h"
25 #include "llvm/Support/TargetRegistry.h"
26 #include "llvm/Target/Mangler.h"
27
28 using namespace llvm;
29
30 MachineLocation
31 AArch64AsmPrinter::getDebugValueLocation(const MachineInstr *MI) const {
32 // See emitFrameIndexDebugValue in InstrInfo for where this instruction is
33 // expected to be created.
34 assert(MI->getNumOperands() == 4 && MI->getOperand(0).isReg()
35 && MI->getOperand(1).isImm() && "unexpected custom DBG_VALUE");
36 return MachineLocation(MI->getOperand(0).getReg(), MI->getOperand(1).getImm());
37 }
38
39 /// Try to print a floating-point register as if it belonged to a specified
40 /// register-class. For example the inline asm operand modifier "b" requires its
41 /// argument to be printed as "bN".
42 static bool printModifiedFPRAsmOperand(const MachineOperand &MO,
43 const TargetRegisterInfo *TRI,
44 const TargetRegisterClass &RegClass,
45 raw_ostream &O) {
46 if (!MO.isReg())
47 return true;
48
49 for (MCRegAliasIterator AR(MO.getReg(), TRI, true); AR.isValid(); ++AR) {
50 if (RegClass.contains(*AR)) {
51 O << AArch64InstPrinter::getRegisterName(*AR);
52 return false;
53 }
54 }
55 return true;
56 }
57
58 /// Implements the 'w' and 'x' inline asm operand modifiers, which print a GPR
59 /// with the obvious type and an immediate 0 as either wzr or xzr.
60 static bool printModifiedGPRAsmOperand(const MachineOperand &MO,
61 const TargetRegisterInfo *TRI,
62 const TargetRegisterClass &RegClass,
63 raw_ostream &O) {
64 char Prefix = &RegClass == &AArch64::GPR32RegClass ? 'w' : 'x';
65
66 if (MO.isImm() && MO.getImm() == 0) {
67 O << Prefix << "zr";
68 return false;
69 } else if (MO.isReg()) {
70 if (MO.getReg() == AArch64::XSP || MO.getReg() == AArch64::WSP) {
71 O << (Prefix == 'x' ? "sp" : "wsp");
72 return false;
73 }
74
75 for (MCRegAliasIterator AR(MO.getReg(), TRI, true); AR.isValid(); ++AR) {
76 if (RegClass.contains(*AR)) {
77 O << AArch64InstPrinter::getRegisterName(*AR);
78 return false;
79 }
80 }
81 }
82
83 return true;
84 }
85
86 bool AArch64AsmPrinter::printSymbolicAddress(const MachineOperand &MO,
87 bool PrintImmediatePrefix,
88 StringRef Suffix, raw_ostream &O) {
89 StringRef Name;
90 StringRef Modifier;
91 switch (MO.getType()) {
92 default: llvm_unreachable("Unexpected operand for symbolic address constraint");
93 case MachineOperand::MO_GlobalAddress:
94 Name = Mang->getSymbol(MO.getGlobal())->getName();
95
96 // Global variables may be accessed either via a GOT or in various fun and
97 // interesting TLS-model specific ways. Set the prefix modifier as
98 // appropriate here.
99 if (const GlobalVariable *GV = dyn_cast(MO.getGlobal())) {
100 Reloc::Model RelocM = TM.getRelocationModel();
101 if (GV->isThreadLocal()) {
102 switch (TM.getTLSModel(GV)) {
103 case TLSModel::GeneralDynamic:
104 Modifier = "tlsdesc";
105 break;
106 case TLSModel::LocalDynamic:
107 Modifier = "dtprel";
108 break;
109 case TLSModel::InitialExec:
110 Modifier = "gottprel";
111 break;
112 case TLSModel::LocalExec:
113 Modifier = "tprel";
114 break;
115 }
116 } else if (Subtarget->GVIsIndirectSymbol(GV, RelocM)) {
117 Modifier = "got";
118 }
119 }
120 break;
121 case MachineOperand::MO_BlockAddress:
122 Name = GetBlockAddressSymbol(MO.getBlockAddress())->getName();
123 break;
124 case MachineOperand::MO_ExternalSymbol:
125 Name = MO.getSymbolName();
126 break;
127 case MachineOperand::MO_ConstantPoolIndex:
128 Name = GetCPISymbol(MO.getIndex())->getName();
129 break;
130 }
131
132 // Some instructions (notably ADRP) don't take the # prefix for
133 // immediates. Only print it if asked to.
134 if (PrintImmediatePrefix)
135 O << '#';
136
137 // Only need the joining "_" if both the prefix and the suffix are
138 // non-null. This little block simply takes care of the four possibly
139 // combinations involved there.
140 if (Modifier == "" && Suffix == "")
141 O << Name;
142 else if (Modifier == "" && Suffix != "")
143 O << ":" << Suffix << ':' << Name;
144 else if (Modifier != "" && Suffix == "")
145 O << ":" << Modifier << ':' << Name;
146 else
147 O << ":" << Modifier << '_' << Suffix << ':' << Name;
148
149 return false;
150 }
151
152 bool AArch64AsmPrinter::PrintAsmOperand(const MachineInstr *MI, unsigned OpNum,
153 unsigned AsmVariant,
154 const char *ExtraCode, raw_ostream &O) {
155 const TargetRegisterInfo *TRI = MF->getTarget().getRegisterInfo();
156 if (!ExtraCode || !ExtraCode[0]) {
157 // There's actually no operand modifier, which leads to a slightly eclectic
158 // set of behaviour which we have to handle here.
159 const MachineOperand &MO = MI->getOperand(OpNum);
160 switch (MO.getType()) {
161 default:
162 llvm_unreachable("Unexpected operand for inline assembly");
163 case MachineOperand::MO_Register:
164 // GCC prints the unmodified operand of a 'w' constraint as the vector
165 // register. Technically, we could allocate the argument as a VPR128, but
166 // that leads to extremely dodgy copies being generated to get the data
167 // there.
168 if (printModifiedFPRAsmOperand(MO, TRI, AArch64::VPR128RegClass, O))
169 O << AArch64InstPrinter::getRegisterName(MO.getReg());
170 break;
171 case MachineOperand::MO_Immediate:
172 O << '#' << MO.getImm();
173 break;
174 case MachineOperand::MO_FPImmediate:
175 assert(MO.getFPImm()->isExactlyValue(0.0) && "Only FP 0.0 expected");
176 O << "#0.0";
177 break;
178 case MachineOperand::MO_BlockAddress:
179 case MachineOperand::MO_ConstantPoolIndex:
180 case MachineOperand::MO_GlobalAddress:
181 case MachineOperand::MO_ExternalSymbol:
182 return printSymbolicAddress(MO, false, "", O);
183 }
184 return false;
185 }
186
187 // We have a real modifier to handle.
188 switch(ExtraCode[0]) {
189 default:
190 // See if this is a generic operand
191 return AsmPrinter::PrintAsmOperand(MI, OpNum, AsmVariant, ExtraCode, O);
192 case 'c': // Don't print "#" before an immediate operand.
193 if (!MI->getOperand(OpNum).isImm())
194 return true;
195 O << MI->getOperand(OpNum).getImm();
196 return false;
197 case 'w':
198 // Output 32-bit general register operand, constant zero as wzr, or stack
199 // pointer as wsp. Ignored when used with other operand types.
200 return printModifiedGPRAsmOperand(MI->getOperand(OpNum), TRI,
201 AArch64::GPR32RegClass, O);
202 case 'x':
203 // Output 64-bit general register operand, constant zero as xzr, or stack
204 // pointer as sp. Ignored when used with other operand types.
205 return printModifiedGPRAsmOperand(MI->getOperand(OpNum), TRI,
206 AArch64::GPR64RegClass, O);
207 case 'H':
208 // Output higher numbered of a 64-bit general register pair
209 case 'Q':
210 // Output least significant register of a 64-bit general register pair
211 case 'R':
212 // Output most significant register of a 64-bit general register pair
213
214 // FIXME note: these three operand modifiers will require, to some extent,
215 // adding a paired GPR64 register class. Initial investigation suggests that
216 // assertions are hit unless it has a type and is made legal for that type
217 // in ISelLowering. After that step is made, the number of modifications
218 // needed explodes (operation legality, calling conventions, stores, reg
219 // copies ...).
220 llvm_unreachable("FIXME: Unimplemented register pairs");
221 case 'b':
222 // Output 8-bit FP/SIMD scalar register operand, prefixed with b.
223 return printModifiedFPRAsmOperand(MI->getOperand(OpNum), TRI,
224 AArch64::FPR8RegClass, O);
225 case 'h':
226 // Output 16-bit FP/SIMD scalar register operand, prefixed with h.
227 return printModifiedFPRAsmOperand(MI->getOperand(OpNum), TRI,
228 AArch64::FPR16RegClass, O);
229 case 's':
230 // Output 32-bit FP/SIMD scalar register operand, prefixed with s.
231 return printModifiedFPRAsmOperand(MI->getOperand(OpNum), TRI,
232 AArch64::FPR32RegClass, O);
233 case 'd':
234 // Output 64-bit FP/SIMD scalar register operand, prefixed with d.
235 return printModifiedFPRAsmOperand(MI->getOperand(OpNum), TRI,
236 AArch64::FPR64RegClass, O);
237 case 'q':
238 // Output 128-bit FP/SIMD scalar register operand, prefixed with q.
239 return printModifiedFPRAsmOperand(MI->getOperand(OpNum), TRI,
240 AArch64::FPR128RegClass, O);
241 case 'A':
242 // Output symbolic address with appropriate relocation modifier (also
243 // suitable for ADRP).
244 return printSymbolicAddress(MI->getOperand(OpNum), false, "", O);
245 case 'L':
246 // Output bits 11:0 of symbolic address with appropriate :lo12: relocation
247 // modifier.
248 return printSymbolicAddress(MI->getOperand(OpNum), true, "lo12", O);
249 case 'G':
250 // Output bits 23:12 of symbolic address with appropriate :hi12: relocation
251 // modifier (currently only for TLS local exec).
252 return printSymbolicAddress(MI->getOperand(OpNum), true, "hi12", O);
253 }
254
255
256 }
257
258 bool AArch64AsmPrinter::PrintAsmMemoryOperand(const MachineInstr *MI,
259 unsigned OpNum,
260 unsigned AsmVariant,
261 const char *ExtraCode,
262 raw_ostream &O) {
263 // Currently both the memory constraints (m and Q) behave the same and amount
264 // to the address as a single register. In future, we may allow "m" to provide
265 // both a base and an offset.
266 const MachineOperand &MO = MI->getOperand(OpNum);
267 assert(MO.isReg() && "unexpected inline assembly memory operand");
268 O << '[' << AArch64InstPrinter::getRegisterName(MO.getReg()) << ']';
269 return false;
270 }
271
272 void AArch64AsmPrinter::PrintDebugValueComment(const MachineInstr *MI,
273 raw_ostream &OS) {
274 unsigned NOps = MI->getNumOperands();
275 assert(NOps==4);
276 OS << '\t' << MAI->getCommentString() << "DEBUG_VALUE: ";
277 // cast away const; DIetc do not take const operands for some reason.
278 DIVariable V(const_cast(MI->getOperand(NOps-1).getMetadata()));
279 OS << V.getName();
280 OS << " <- ";
281 // Frame address. Currently handles register +- offset only.
282 assert(MI->getOperand(0).isReg() && MI->getOperand(1).isImm());
283 OS << '[' << AArch64InstPrinter::getRegisterName(MI->getOperand(0).getReg());
284 OS << '+' << MI->getOperand(1).getImm();
285 OS << ']';
286 OS << "+" << MI->getOperand(NOps - 2).getImm();
287 }
288
289
290 #include "AArch64GenMCPseudoLowering.inc"
291
292 void AArch64AsmPrinter::EmitInstruction(const MachineInstr *MI) {
293 // Do any auto-generated pseudo lowerings.
294 if (emitPseudoExpansionLowering(OutStreamer, MI))
295 return;
296
297 switch (MI->getOpcode()) {
298 case AArch64::CONSTPOOL_ENTRY: {
299 unsigned LabelId = (unsigned)MI->getOperand(0).getImm();
300 unsigned CPIdx = (unsigned)MI->getOperand(1).getIndex();
301
302 OutStreamer.EmitLabel(GetCPISymbol(LabelId));
303
304 const MachineConstantPoolEntry &MCPE = MCP->getConstants()[CPIdx];
305 if (MCPE.isMachineConstantPoolEntry())
306 EmitMachineConstantPoolValue(MCPE.Val.MachineCPVal);
307 else
308 EmitGlobalConstant(MCPE.Val.ConstVal);
309
310 return;
311 }
312 case AArch64::DBG_VALUE: {
313 if (isVerbose() && OutStreamer.hasRawTextSupport()) {
314 SmallString<128> TmpStr;
315 raw_svector_ostream OS(TmpStr);
316 PrintDebugValueComment(MI, OS);
317 OutStreamer.EmitRawText(StringRef(OS.str()));
318 }
319 return;
320 }
321 }
322
323 MCInst TmpInst;
324 LowerAArch64MachineInstrToMCInst(MI, TmpInst, *this);
325 OutStreamer.EmitInstruction(TmpInst);
326 }
327
328 void AArch64AsmPrinter::EmitEndOfAsmFile(Module &M) {
329 if (Subtarget->isTargetELF()) {
330 const TargetLoweringObjectFileELF &TLOFELF =
331 static_cast(getObjFileLowering());
332
333 MachineModuleInfoELF &MMIELF = MMI->getObjFileInfo();
334
335 // Output stubs for external and common global variables.
336 MachineModuleInfoELF::SymbolListTy Stubs = MMIELF.GetGVStubList();
337 if (!Stubs.empty()) {
338 OutStreamer.SwitchSection(TLOFELF.getDataRelSection());
339 const DataLayout *TD = TM.getDataLayout();
340
341 for (unsigned i = 0, e = Stubs.size(); i != e; ++i) {
342 OutStreamer.EmitLabel(Stubs[i].first);
343 OutStreamer.EmitSymbolValue(Stubs[i].second.getPointer(),
344 TD->getPointerSize(0), 0);
345 }
346 Stubs.clear();
347 }
348 }
349 }
350
351 bool AArch64AsmPrinter::runOnMachineFunction(MachineFunction &MF) {
352 MCP = MF.getConstantPool();
353 return AsmPrinter::runOnMachineFunction(MF);
354 }
355
356 // Force static initialization.
357 extern "C" void LLVMInitializeAArch64AsmPrinter() {
358 RegisterAsmPrinter X(TheAArch64Target);
359 }
360
0 // AArch64AsmPrinter.h - Print machine code to an AArch64 .s file -*- C++ -*-=//
1 //
2 // The LLVM Compiler Infrastructure
3 //
4 // This file is distributed under the University of Illinois Open Source
5 // License. See LICENSE.TXT for details.
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // AArch64 Assembly printer class.
10 //
11 //===----------------------------------------------------------------------===//
12
13 #ifndef LLVM_AARCH64ASMPRINTER_H
14 #define LLVM_AARCH64ASMPRINTER_H
15
16 #include "AArch64.h"
17 #include "AArch64TargetMachine.h"
18 #include "llvm/CodeGen/AsmPrinter.h"
19 #include "llvm/MC/MCStreamer.h"
20 #include "llvm/Support/Compiler.h"
21
22 namespace llvm {
23
24 class MCOperand;
25
26 class LLVM_LIBRARY_VISIBILITY AArch64AsmPrinter : public AsmPrinter {
27
28 /// Subtarget - Keep a pointer to the AArch64Subtarget around so that we can
29 /// make the right decision when printing asm code for different targets.
30 const AArch64Subtarget *Subtarget;
31 const MachineConstantPool *MCP;
32
33 // emitPseudoExpansionLowering - tblgen'erated.
34 bool emitPseudoExpansionLowering(MCStreamer &OutStreamer,
35 const MachineInstr *MI);
36
37 public:
38 explicit AArch64AsmPrinter(TargetMachine &TM, MCStreamer &Streamer)
39 : AsmPrinter(TM, Streamer) {
40 Subtarget = &TM.getSubtarget();
41 }
42
43 bool lowerOperand(const MachineOperand &MO, MCOperand &MCOp) const;
44
45 MCOperand lowerSymbolOperand(const MachineOperand &MO,
46 const MCSymbol *Sym) const;
47
48 void EmitInstruction(const MachineInstr *MI);
49 void EmitEndOfAsmFile(Module &M);
50
51 bool PrintAsmOperand(const MachineInstr *MI, unsigned OpNum,
52 unsigned AsmVariant, const char *ExtraCode,
53 raw_ostream &O);
54 bool PrintAsmMemoryOperand(const MachineInstr *MI, unsigned OpNum,
55 unsigned AsmVariant, const char *ExtraCode,
56 raw_ostream &O);
57
58 void PrintDebugValueComment(const MachineInstr *MI, raw_ostream &OS);
59
60 /// printSymbolicAddress - Given some kind of reasonably bare symbolic
61 /// reference, print out the appropriate asm string to represent it. If
62 /// appropriate, a relocation-specifier will be produced, composed of a
63 /// general class derived from the MO parameter and an instruction-specific
64 /// suffix, provided in Suffix. E.g. ":got_lo12:" if a Suffix of "lo12" is
65 /// given.
66 bool printSymbolicAddress(const MachineOperand &MO,
67 bool PrintImmediatePrefix,
68 StringRef Suffix, raw_ostream &O);
69
70 MachineLocation getDebugValueLocation(const MachineInstr *MI) const;
71
72 virtual const char *getPassName() const {
73 return "AArch64 Assembly Printer";
74 }
75
76 /// A no-op on AArch64 because we emit our constant pool entries inline with
77 /// the function.
78 virtual void EmitConstantPool() {}
79
80 virtual bool runOnMachineFunction(MachineFunction &MF);
81 };
82 } // end namespace llvm
83
84 #endif
0 //==-- AArch64CallingConv.td - Calling Conventions for ARM ----*- tblgen -*-==//
1 //
2 // The LLVM Compiler Infrastructure
3 //
4 // This file is distributed under the University of Illinois Open Source
5 // License. See LICENSE.TXT for details.
6 //
7 //===----------------------------------------------------------------------===//
8 // This describes the calling conventions for AArch64 architecture.
9 //===----------------------------------------------------------------------===//
10
11
12 // The AArch64 Procedure Call Standard is unfortunately specified at a slightly
13 // higher level of abstraction than LLVM's target interface presents. In
14 // particular, it refers (like other ABIs, in fact) directly to
15 // structs. However, generic LLVM code takes the liberty of lowering structure
16 // arguments to the component fields before we see them.
17 //
18 // As a result, the obvious direct map from LLVM IR to PCS concepts can't be
19 // implemented, so the goals of this calling convention are, in decreasing
20 // priority order:
21 // 1. Expose *some* way to express the concepts required to implement the
22 // generic PCS from a front-end.
23 // 2. Provide a sane ABI for pure LLVM.
24 // 3. Follow the generic PCS as closely as is naturally possible.
25 //
26 // The suggested front-end implementation of PCS features is:
27 // * Integer, float and vector arguments of all sizes which end up in
28 // registers are passed and returned via the natural LLVM type.
29 // * Structure arguments with size <= 16 bytes are passed and returned in
30 // registers as similar integer or composite types. For example:
31 // [1 x i64], [2 x i64] or [1 x i128] (if alignment 16 needed).
32 // * HFAs in registers follow rules similar to small structs: appropriate
33 // composite types.
34 // * Structure arguments with size > 16 bytes are passed via a pointer,
35 // handled completely by the front-end.
36 // * Structure return values > 16 bytes via an sret pointer argument.
37 // * Other stack-based arguments (not large structs) are passed using byval
38 // pointers. Padding arguments are added beforehand to guarantee a large
39 // struct doesn't later use integer registers.
40 //
41 // N.b. this means that it is the front-end's responsibility (if it cares about
42 // PCS compliance) to check whether enough registers are available for an
43 // argument when deciding how to pass it.
44
45 class CCIfAlign:
46 CCIf<"ArgFlags.getOrigAlign() == " # Align, A>;
47
48 def CC_A64_APCS : CallingConv<[
49 // SRet is an LLVM-specific concept, so it takes precedence over general ABI
50 // concerns. However, this rule will be used by C/C++ frontends to implement
51 // structure return.
52 CCIfSRet>,
53
54 // Put ByVal arguments directly on the stack. Minimum size and alignment of a
55 // slot is 64-bit.
56 CCIfByVal>,
57
58 // Canonicalise the various types that live in different floating-point
59 // registers. This makes sense because the PCS does not distinguish Short
60 // Vectors and Floating-point types.
61 CCIfType<[v2i8], CCBitConvertToType>,
62 CCIfType<[v4i8, v2i16], CCBitConvertToType>,
63 CCIfType<[v8i8, v4i16, v2i32, v2f32], CCBitConvertToType>,
64 CCIfType<[v16i8, v8i16, v4i32, v2i64, v4f32, v2f64],
65 CCBitConvertToType>,
66
67 // PCS: "C.1: If the argument is a Half-, Single-, Double- or Quad- precision
68 // Floating-point or Short Vector Type and the NSRN is less than 8, then the
69 // argument is allocated to the least significant bits of register
70 // v[NSRN]. The NSRN is incremented by one. The argument has now been
71 // allocated."
72 CCIfType<[f16], CCAssignToReg<[B0, B1, B2, B3, B4, B5, B6, B7]>>,
73 CCIfType<[f32], CCAssignToReg<[S0, S1, S2, S3, S4, S5, S6, S7]>>,
74 CCIfType<[f64], CCAssignToReg<[D0, D1, D2, D3, D4, D5, D6, D7]>>,
75 CCIfType<[f128], CCAssignToReg<[Q0, Q1, Q2, Q3, Q4, Q5, Q6, Q7]>>,
76
77 // PCS: "C.2: If the argument is an HFA and there are sufficient unallocated
78 // SIMD and Floating-point registers (NSRN - number of elements < 8), then the
79 // argument is allocated to SIMD and Floating-point registers (with one
80 // register per element of the HFA). The NSRN is incremented by the number of
81 // registers used. The argument has now been allocated."
82 //
83 // N.b. As above, this rule is the responsibility of the front-end.
84
85 // "C.3: If the argument is an HFA then the NSRN is set to 8 and the size of
86 // the argument is rounded up to the nearest multiple of 8 bytes."
87 //
88 // "C.4: If the argument is an HFA, a Quad-precision Floating-point or Short
89 // Vector Type then the NSAA is rounded up to the larger of 8 or the Natural
90 // Alignment of the Argument's type."
91 //
92 // It is expected that these will be satisfied by adding dummy arguments to
93 // the prototype.
94
95 // PCS: "C.5: If the argument is a Half- or Single- precision Floating-point
96 // type then the size of the argument is set to 8 bytes. The effect is as if
97 // the argument had been copied to the least significant bits of a 64-bit
98 // register and the remaining bits filled with unspecified values."
99 CCIfType<[f16, f32], CCPromoteToType>,
100
101 // PCS: "C.6: If the argument is an HFA, a Half-, Single-, Double- or Quad-
102 // precision Floating-point or Short Vector Type, then the argument is copied
103 // to memory at the adjusted NSAA. The NSAA is incremented by the size of the
104 // argument. The argument has now been allocated."
105 CCIfType<[f64], CCAssignToStack<8, 8>>,
106 CCIfType<[f128], CCAssignToStack<16, 16>>,
107
108 // PCS: "C.7: If the argument is an Integral Type, the size of the argument is
109 // less than or equal to 8 bytes and the NGRN is less than 8, the argument is
110 // copied to the least significant bits of x[NGRN]. The NGRN is incremented by
111 // one. The argument has now been allocated."
112
113 // First we implement C.8 and C.9 (128-bit types get even registers). i128 is
114 // represented as two i64s, the first one being split. If we delayed this
115 // operation C.8 would never be reached.
116 CCIfType<[i64],
117 CCIfSplit>>,
118
119 // Note: the promotion also implements C.14.
120 CCIfType<[i8, i16, i32], CCPromoteToType>,
121
122 // And now the real implementation of C.7
123 CCIfType<[i64], CCAssignToReg<[X0, X1, X2, X3, X4, X5, X6, X7]>>,
124
125 // PCS: "C.8: If the argument has an alignment of 16 then the NGRN is rounded
126 // up to the next even number."
127 //
128 // "C.9: If the argument is an Integral Type, the size of the argument is
129 // equal to 16 and the NGRN is less than 7, the argument is copied to x[NGRN]
130 // and x[NGRN+1], x[NGRN] shall contain the lower addressed double-word of the
131 // memory representation of the argument. The NGRN is incremented by two. The
132 // argument has now been allocated."
133 //
134 // Subtlety here: what if alignment is 16 but it is not an integral type? All
135 // floating-point types have been allocated already, which leaves composite
136 // types: this is why a front-end may need to produce i128 for a struct <= 16
137 // bytes.
138
139 // PCS: "C.10 If the argument is a Composite Type and the size in double-words
140 // of the argument is not more than 8 minus NGRN, then the argument is copied
141 // into consecutive general-purpose registers, starting at x[NGRN]. The
142 // argument is passed as though it had been loaded into the registers from a
143 // double-word aligned address with an appropriate sequence of LDR
144 // instructions loading consecutive registers from memory (the contents of any
145 // unused parts of the registers are unspecified by this standard). The NGRN
146 // is incremented by the number of registers used. The argument has now been
147 // allocated."
148 //
149 // Another one that's the responsibility of the front-end (sigh).
150
151 // PCS: "C.11: The NGRN is set to 8."
152 CCCustom<"CC_AArch64NoMoreRegs">,
153
154 // PCS: "C.12: The NSAA is rounded up to the larger of 8 or the Natural
155 // Alignment of the argument's type."
156 //
157 // PCS: "C.13: If the argument is a composite type then the argument is copied
158 // to memory at the adjusted NSAA. The NSAA is by the size of the
159 // argument. The argument has now been allocated."
160 //
161 // Note that the effect of this corresponds to a memcpy rather than register
162 // stores so that the struct ends up correctly addressable at the adjusted
163 // NSAA.
164
165 // PCS: "C.14: If the size of the argument is less than 8 bytes then the size
166 // of the argument is set to 8 bytes. The effect is as if the argument was
167 // copied to the least significant bits of a 64-bit register and the remaining
168 // bits filled with unspecified values."
169 //
170 // Integer types were widened above. Floating-point and composite types have
171 // already been allocated completely. Nothing to do.
172
173 // PCS: "C.15: The argument is copied to memory at the adjusted NSAA. The NSAA
174 // is incremented by the size of the argument. The argument has now been
175 // allocated."
176 CCIfType<[i64], CCIfSplit>>,
177 CCIfType<[i64], CCAssignToStack<8, 8>>
178
179 ]>;
180
181 // According to the PCS, X19-X30 are callee-saved, however only the low 64-bits
182 // of vector registers (8-15) are callee-saved. The order here is is picked up
183 // by PrologEpilogInserter.cpp to allocate stack slots, starting from top of
184 // stack upon entry. This gives the customary layout of x30 at [sp-8], x29 at
185 // [sp-16], ...
186 def CSR_PCS : CalleeSavedRegs<(add (sequence "X%u", 30, 19),
187 (sequence "D%u", 15, 8))>;
188
189
190 // TLS descriptor calls are extremely restricted in their changes, to allow
191 // optimisations in the (hopefully) more common fast path where no real action
192 // is needed. They actually have to preserve all registers, except for the
193 // unavoidable X30 and the return register X0.
194 def TLSDesc : CalleeSavedRegs<(add (sequence "X%u", 29, 1),
195 (sequence "Q%u", 31, 0))>;
0 //===-- AArch64ConstantIslandPass.cpp - AArch64 constant islands ----------===//
1 //
2 // The LLVM Compiler Infrastructure
3 //
4 // This file is distributed under the University of Illinois Open Source
5 // License. See LICENSE.TXT for details.
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file contains a pass that splits the constant pool up into 'islands'
10 // which are scattered through-out the function. This is required due to the
11 // limited pc-relative displacements that AArch64 has.
12 //
13 //===----------------------------------------------------------------------===//
14
15 #define DEBUG_TYPE "aarch64-cp-islands"
16 #include "AArch64.h"
17 #include "AArch64InstrInfo.h"
18 #include "AArch64MachineFunctionInfo.h"
19 #include "AArch64Subtarget.h"
20 #include "AArch64MachineFunctionInfo.h"
21 #include "MCTargetDesc/AArch64BaseInfo.h"
22 #include "llvm/CodeGen/MachineConstantPool.h"
23 #include "llvm/CodeGen/MachineFunctionPass.h"
24 #include "llvm/CodeGen/MachineInstrBuilder.h"
25 #include "llvm/CodeGen/MachineJumpTableInfo.h"
26 #include "llvm/CodeGen/MachineRegisterInfo.h"
27 #include "llvm/IR/DataLayout.h"
28 #include "llvm/Target/TargetMachine.h"
29 #include "llvm/Support/Debug.h"
30 #include "llvm/Support/ErrorHandling.h"
31 #include "llvm/Support/Format.h"
32 #include "llvm/Support/raw_ostream.h"
33 #include "llvm/ADT/SmallSet.h"
34 #include "llvm/ADT/SmallVector.h"
35 #include "llvm/ADT/STLExtras.h"
36 #include "llvm/ADT/Statistic.h"
37 #include "llvm/Support/CommandLine.h"
38 #include
39 using namespace llvm;
40
41 STATISTIC(NumCPEs, "Number of constpool entries");
42 STATISTIC(NumSplit, "Number of uncond branches inserted");
43 STATISTIC(NumCBrFixed, "Number of cond branches fixed");
44
45 // FIXME: This option should be removed once it has received sufficient testing.
46 static cl::opt
47 AlignConstantIslands("aarch64-align-constant-islands", cl::Hidden,
48 cl::init(true), cl::desc("Align constant islands in code"));
49
50 /// Return the worst case padding that could result from unknown offset bits.
51 /// This does not include alignment padding caused by known offset bits.
52 ///
53 /// @param LogAlign log2(alignment)
54 /// @param KnownBits Number of known low offset bits.
55 static inline unsigned UnknownPadding(unsigned LogAlign, unsigned KnownBits) {
56 if (KnownBits < LogAlign)
57 return (1u << LogAlign) - (1u << KnownBits);
58 return 0;
59 }
60
61 namespace {
62 /// Due to limited PC-relative displacements, AArch64 requires constant pool
63 /// entries to be scattered among the instructions inside a function. To do
64 /// this, it completely ignores the normal LLVM constant pool; instead, it
65 /// places constants wherever it feels like with special instructions.
66 ///
67 /// The terminology used in this pass includes:
68 /// Islands - Clumps of constants placed in the function.
69 /// Water - Potential places where an island could be formed.
70 /// CPE - A constant pool entry that has been placed somewhere, which
71 /// tracks a list of users.
72 class AArch64ConstantIslands : public MachineFunctionPass {
73 /// Information about the offset and size of a single basic block.
74 struct BasicBlockInfo {
75 /// Distance from the beginning of the function to the beginning of this
76 /// basic block.
77 ///
78 /// Offsets are computed assuming worst case padding before an aligned
79 /// block. This means that subtracting basic block offsets always gives a
80 /// conservative estimate of the real distance which may be smaller.
81 ///
82 /// Because worst case padding is used, the computed offset of an aligned
83 /// block may not actually be aligned.
84 unsigned Offset;
85
86 /// Size of the basic block in bytes. If the block contains inline
87 /// assembly, this is a worst case estimate.
88 ///
89 /// The size does not include any alignment padding whether from the
90 /// beginning of the block, or from an aligned jump table at the end.
91 unsigned Size;
92
93 /// The number of low bits in Offset that are known to be exact. The
94 /// remaining bits of Offset are an upper bound.
95 uint8_t KnownBits;
96
97 /// When non-zero, the block contains instructions (inline asm) of unknown
98 /// size. The real size may be smaller than Size bytes by a multiple of 1
99 /// << Unalign.
100 uint8_t Unalign;
101
102 BasicBlockInfo() : Offset(0), Size(0), KnownBits(0), Unalign(0) {}
103
104 /// Compute the number of known offset bits internally to this block.
105 /// This number should be used to predict worst case padding when
106 /// splitting the block.
107 unsigned internalKnownBits() const {
108 unsigned Bits = Unalign ? Unalign : KnownBits;
109 // If the block size isn't a multiple of the known bits, assume the
110 // worst case padding.
111 if (Size & ((1u << Bits) - 1))
112 Bits = CountTrailingZeros_32(Size);
113 return Bits;
114 }
115
116 /// Compute the offset immediately following this block. If LogAlign is
117 /// specified, return the offset the successor block will get if it has
118 /// this alignment.
119 unsigned postOffset(unsigned LogAlign = 0) const {
120 unsigned PO = Offset + Size;
121 if (!LogAlign)
122 return PO;
123 // Add alignment padding from the terminator.
124 return PO + UnknownPadding(LogAlign, internalKnownBits());
125 }
126
127 /// Compute the number of known low bits of postOffset. If this block
128 /// contains inline asm, the number of known bits drops to the
129 /// instruction alignment. An aligned terminator may increase the number
130 /// of know bits.
131 /// If LogAlign is given, also consider the alignment of the next block.
132 unsigned postKnownBits(unsigned LogAlign = 0) const {
133 return std::max(LogAlign, internalKnownBits());
134 }
135 };
136
137 std::vector BBInfo;
138
139 /// A sorted list of basic blocks where islands could be placed (i.e. blocks
140 /// that don't fall through to the following block, due to a return,
141 /// unreachable, or unconditional branch).
142 std::vector WaterList;
143
144 /// The subset of WaterList that was created since the previous iteration by
145 /// inserting unconditional branches.
146 SmallSet NewWaterList;
147
148 typedef std::vector::iterator water_iterator;
149
150 /// One user of a constant pool, keeping the machine instruction pointer,
151 /// the constant pool being referenced, and the number of bits used by the
152 /// instruction for displacement. The HighWaterMark records the highest
153 /// basic block where a new CPEntry can be placed. To ensure this pass
154 /// terminates, the CP entries are initially placed at the end of the
155 /// function and then move monotonically to lower addresses. The exception
156 /// to this rule is when the current CP entry for a particular CPUser is out
157 /// of range, but there is another CP entry for the same constant value in
158 /// range. We want to use the existing in-range CP entry, but if it later
159 /// moves out of range, the search for new water should resume where it left
160 /// off. The HighWaterMark is used to record that point.
161 struct CPUser {
162 MachineInstr *MI;
163 MachineInstr *CPEMI;
164 MachineBasicBlock *HighWaterMark;
165 private:
166 unsigned OffsetBits;
167 public:
168 CPUser(MachineInstr *mi, MachineInstr *cpemi, unsigned offsetbits)
169 : MI(mi), CPEMI(cpemi), OffsetBits(offsetbits) {
170 HighWaterMark = CPEMI->getParent();
171 }
172 /// Returns the number of bits used to specify the offset.
173 unsigned getOffsetBits() const {
174 return OffsetBits;
175 }
176
177 /// Returns the maximum positive displacement possible from this CPUser
178 /// (essentially INT_MAX * 4).
179 unsigned getMaxPosDisp() const {
180 return (1 << (OffsetBits - 1)) - 1;
181 }
182 };
183
184 /// Keep track of all of the machine instructions that use various constant
185 /// pools and their max displacement.
186 std::vector CPUsers;
187
188 /// One per constant pool entry, keeping the machine instruction pointer,
189 /// the constpool index, and the number of CPUser's which reference this
190 /// entry.
191 struct CPEntry {
192 MachineInstr *CPEMI;
193 unsigned CPI;
194 unsigned RefCount;
195 CPEntry(MachineInstr *cpemi, unsigned cpi, unsigned rc = 0)
196 : CPEMI(cpemi), CPI(cpi), RefCount(rc) {}
197 };
198
199 /// Keep track of all of the constant pool entry machine instructions. For
200 /// each original constpool index (i.e. those that existed upon entry to
201 /// this pass), it keeps a vector of entries. Original elements are cloned
202 /// as we go along; the clones are put in the vector of the original
203 /// element, but have distinct CPIs.
204 std::vector > CPEntries;
205
206 /// One per immediate branch, keeping the machine instruction pointer,
207 /// conditional or unconditional, the max displacement, and (if IsCond is
208 /// true) the corresponding inverted branch opcode.
209 struct ImmBranch {
210 MachineInstr *MI;
211 unsigned OffsetBits : 31;
212 bool IsCond : 1;
213 ImmBranch(MachineInstr *mi, unsigned offsetbits, bool cond)
214 : MI(mi), OffsetBits(offsetbits), IsCond(cond) {}
215 };
216
217 /// Keep track of all the immediate branch instructions.
218 ///
219 std::vector ImmBranches;
220
221 MachineFunction *MF;
222 MachineConstantPool *MCP;
223 const AArch64InstrInfo *TII;
224 const AArch64Subtarget *STI;
225 AArch64MachineFunctionInfo *AFI;
226 public:
227 static char ID;
228 AArch64ConstantIslands() : MachineFunctionPass(ID) {}
229
230 virtual bool runOnMachineFunction(MachineFunction &MF);
231
232 virtual const char *getPassName() const {
233 return "AArch64 constant island placement pass";
234 }
235
236 private:
237 void doInitialPlacement(std::vector &CPEMIs);
238 CPEntry *findConstPoolEntry(unsigned CPI, const MachineInstr *CPEMI);
239 unsigned getCPELogAlign(const MachineInstr *CPEMI);
240 void scanFunctionJumpTables();
241 void initializeFunctionInfo(const std::vector &CPEMIs);
242 MachineBasicBlock *splitBlockBeforeInstr(MachineInstr *MI);
243 void updateForInsertedWaterBlock(MachineBasicBlock *NewBB);
244 void adjustBBOffsetsAfter(MachineBasicBlock *BB);
245 bool decrementCPEReferenceCount(unsigned CPI, MachineInstr* CPEMI);
246 int findInRangeCPEntry(CPUser& U, unsigned UserOffset);
247 bool findAvailableWater(CPUser&U, unsigned UserOffset,
248 water_iterator &WaterIter);
249 void createNewWater(unsigned CPUserIndex, unsigned UserOffset,
250 MachineBasicBlock *&NewMBB);
251 bool handleConstantPoolUser(unsigned CPUserIndex);
252 void removeDeadCPEMI(MachineInstr *CPEMI);
253 bool removeUnusedCPEntries();
254 bool isCPEntryInRange(MachineInstr *MI, unsigned UserOffset,
255 MachineInstr *CPEMI, unsigned OffsetBits,
256 bool DoDump = false);
257 bool isWaterInRange(unsigned UserOffset, MachineBasicBlock *Water,
258 CPUser &U, unsigned &Growth);
259 bool isBBInRange(MachineInstr *MI, MachineBasicBlock *BB,
260 unsigned OffsetBits);
261 bool fixupImmediateBr(ImmBranch &Br);
262 bool fixupConditionalBr(ImmBranch &Br);
263
264 void computeBlockSize(MachineBasicBlock *MBB);
265 unsigned getOffsetOf(MachineInstr *MI) const;
266 unsigned getUserOffset(CPUser&) const;
267 void dumpBBs();
268 void verify();
269
270 bool isOffsetInRange(unsigned UserOffset, unsigned TrialOffset,
271 unsigned BitsAvailable);
272 bool isOffsetInRange(unsigned UserOffset, unsigned TrialOffset,
273 const CPUser &U) {
274 return isOffsetInRange(UserOffset, TrialOffset, U.getOffsetBits());
275 }
276 };
277 char AArch64ConstantIslands::ID = 0;
278 }
279
280 /// check BBOffsets, BBSizes, alignment of islands
281 void AArch64ConstantIslands::verify() {
282 #ifndef NDEBUG
283 for (MachineFunction::iterator MBBI = MF->begin(), E = MF->end();
284 MBBI != E; ++MBBI) {
285 MachineBasicBlock *MBB = MBBI;
286 unsigned MBBId = MBB->getNumber();
287 assert(!MBBId || BBInfo[MBBId - 1].postOffset() <= BBInfo[MBBId].Offset);
288 }
289 DEBUG(dbgs() << "Verifying " << CPUsers.size() << " CP users.\n");
290 for (unsigned i = 0, e = CPUsers.size(); i != e; ++i) {
291 CPUser &U = CPUsers[i];
292 unsigned UserOffset = getUserOffset(U);
293 // Verify offset using the real max displacement without the safety
294 // adjustment.
295 if (isCPEntryInRange(U.MI, UserOffset, U.CPEMI, U.getOffsetBits(),
296 /* DoDump = */ true)) {
297 DEBUG(dbgs() << "OK\n");
298 continue;
299 }
300 DEBUG(dbgs() << "Out of range.\n");
301 dumpBBs();
302 DEBUG(MF->dump());
303 llvm_unreachable("Constant pool entry out of range!");
304 }
305 #endif
306 }
307
308 /// print block size and offset information - debugging
309 void AArch64ConstantIslands::dumpBBs() {
310 DEBUG({
311 for (unsigned J = 0, E = BBInfo.size(); J !=E; ++J) {
312 const BasicBlockInfo &BBI = BBInfo[J];
313 dbgs() << format("%08x BB#%u\t", BBI.Offset, J)
314 << " kb=" << unsigned(BBI.KnownBits)
315 << " ua=" << unsigned(BBI.Unalign)
316 << format(" size=%#x\n", BBInfo[J].Size);
317 }
318 });
319 }
320
321 /// Returns an instance of the constpool island pass.
322 FunctionPass *llvm::createAArch64ConstantIslandPass() {
323 return new AArch64ConstantIslands();
324 }
325
326 bool AArch64ConstantIslands::runOnMachineFunction(MachineFunction &mf) {
327 MF = &mf;
328 MCP = mf.getConstantPool();
329
330 DEBUG(dbgs() << "***** AArch64ConstantIslands: "
331 << MCP->getConstants().size() << " CP entries, aligned to "
332 << MCP->getConstantPoolAlignment() << " bytes *****\n");
333
334 TII = (const AArch64InstrInfo*)MF->getTarget().getInstrInfo();
335 AFI = MF->getInfo();
336 STI = &MF->getTarget().getSubtarget();
337
338 // This pass invalidates liveness information when it splits basic blocks.
339 MF->getRegInfo().invalidateLiveness();
340
341 // Renumber all of the machine basic blocks in the function, guaranteeing that
342 // the numbers agree with the position of the block in the function.
343 MF->RenumberBlocks();
344
345 // Perform the initial placement of the constant pool entries. To start with,
346 // we put them all at the end of the function.
347 std::vector CPEMIs;
348 if (!MCP->isEmpty())
349 doInitialPlacement(CPEMIs);
350
351 /// The next UID to take is the first unused one.
352 AFI->initPICLabelUId(CPEMIs.size());
353
354 // Do the initial scan of the function, building up information about the
355 // sizes of each block, the location of all the water, and finding all of the
356 // constant pool users.
357 initializeFunctionInfo(CPEMIs);
358 CPEMIs.clear();
359 DEBUG(dumpBBs());
360
361
362 /// Remove dead constant pool entries.
363 bool MadeChange = removeUnusedCPEntries();
364
365 // Iteratively place constant pool entries and fix up branches until there
366 // is no change.
367 unsigned NoCPIters = 0, NoBRIters = 0;
368 while (true) {
369 DEBUG(dbgs() << "Beginning CP iteration #" << NoCPIters << '\n');
370 bool CPChange = false;
371 for (unsigned i = 0, e = CPUsers.size(); i != e; ++i)
372 CPChange |= handleConstantPoolUser(i);
373 if (CPChange && ++NoCPIters > 30)
374 report_fatal_error("Constant Island pass failed to converge!");
375 DEBUG(dumpBBs());
376
377 // Clear NewWaterList now. If we split a block for branches, it should
378 // appear as "new water" for the next iteration of constant pool placement.
379 NewWaterList.clear();
380
381 DEBUG(dbgs() << "Beginning BR iteration #" << NoBRIters << '\n');
382 bool BRChange = false;
383 for (unsigned i = 0, e = ImmBranches.size(); i != e; ++i)
384 BRChange |= fixupImmediateBr(ImmBranches[i]);
385 if (BRChange && ++NoBRIters > 30)
386 report_fatal_error("Branch Fix Up pass failed to converge!");
387 DEBUG(dumpBBs());
388
389 if (!CPChange && !BRChange)
390 break;
391 MadeChange = true;
392 }
393
394 // After a while, this might be made debug-only, but it is not expensive.
395 verify();
396
397 DEBUG(dbgs() << '\n'; dumpBBs());
398
399 BBInfo.clear();
400 WaterList.clear();
401 CPUsers.clear();
402 CPEntries.clear();
403 ImmBranches.clear();
404
405 return MadeChange;
406 }
407
408 /// Perform the initial placement of the constant pool entries. To start with,
409 /// we put them all at the end of the function.
410 void
411 AArch64ConstantIslands::doInitialPlacement(std::vector &CPEMIs) {
412 // Create the basic block to hold the CPE's.
413 MachineBasicBlock *BB = MF->CreateMachineBasicBlock();
414 MF->push_back(BB);
415
416 // MachineConstantPool measures alignment in bytes. We measure in log2(bytes).
417 unsigned MaxAlign = Log2_32(MCP->getConstantPoolAlignment());
418
419 // Mark the basic block as required by the const-pool.
420 // If AlignConstantIslands isn't set, use 4-byte alignment for everything.
421 BB->setAlignment(AlignConstantIslands ? MaxAlign : 2);
422
423 // The function needs to be as aligned as the basic blocks. The linker may
424 // move functions around based on their alignment.
425 MF->ensureAlignment(BB->getAlignment());
426
427 // Order the entries in BB by descending alignment. That ensures correct
428 // alignment of all entries as long as BB is sufficiently aligned. Keep
429 // track of the insertion point for each alignment. We are going to bucket
430 // sort the entries as they are created.
431 SmallVector InsPoint(MaxAlign + 1, BB->end());
432
433 // Add all of the constants from the constant pool to the end block, use an
434 // identity mapping of CPI's to CPE's.
435 const std::vector &CPs = MCP->getConstants();
436
437 const DataLayout &TD = *MF->getTarget().getDataLayout();
438 for (unsigned i = 0, e = CPs.size(); i != e; ++i) {
439 unsigned Size = TD.getTypeAllocSize(CPs[i].getType());
440 assert(Size >= 4 && "Too small constant pool entry");
441 unsigned Align = CPs[i].getAlignment();
442 assert(isPowerOf2_32(Align) && "Invalid alignment");
443 // Verify that all constant pool entries are a multiple of their alignment.
444 // If not, we would have to pad them out so that instructions stay aligned.
445 assert((Size % Align) == 0 && "CP Entry not multiple of 4 bytes!");
446
447 // Insert CONSTPOOL_ENTRY before entries with a smaller alignment.
448 unsigned LogAlign = Log2_32(Align);
449 MachineBasicBlock::iterator InsAt = InsPoint[LogAlign];
450 MachineInstr *CPEMI =
451 BuildMI(*BB, InsAt, DebugLoc(), TII->get(AArch64::CONSTPOOL_ENTRY))
452 .addImm(i).addConstantPoolIndex(i).addImm(Size);
453 CPEMIs.push_back(CPEMI);
454
455 // Ensure that future entries with higher alignment get inserted before
456 // CPEMI. This is bucket sort with iterators.
457 for (unsigned a = LogAlign + 1; a <= MaxAlign; ++a)
458 if (InsPoint[a] == InsAt)
459 InsPoint[a] = CPEMI;
460
461 // Add a new CPEntry, but no corresponding CPUser yet.
462 std::vector CPEs;
463 CPEs.push_back(CPEntry(CPEMI, i));
464 CPEntries.push_back(CPEs);
465 ++NumCPEs;
466 DEBUG(dbgs() << "Moved CPI#" << i << " to end of function, size = "
467 << Size << ", align = " << Align <<'\n');
468 }
469 DEBUG(BB->dump());
470 }
471
472 /// Return true if the specified basic block can fallthrough into the block
473 /// immediately after it.
474 static bool BBHasFallthrough(MachineBasicBlock *MBB) {
475 // Get the next machine basic block in the function.
476 MachineFunction::iterator MBBI = MBB;
477 // Can't fall off end of function.
478 if (llvm::next(MBBI) == MBB->getParent()->end())
479 return false;
480
481 MachineBasicBlock *NextBB = llvm::next(MBBI);
482 for (MachineBasicBlock::succ_iterator I = MBB->succ_begin(),
483 E = MBB->succ_end(); I != E; ++I)
484 if (*I == NextBB)
485 return true;
486
487 return false;
488 }
489
490 /// Given the constpool index and CONSTPOOL_ENTRY MI, look up the corresponding
491 /// CPEntry.
492 AArch64ConstantIslands::CPEntry
493 *AArch64ConstantIslands::findConstPoolEntry(unsigned CPI,
494 const MachineInstr *CPEMI) {
495 std::vector &CPEs = CPEntries[CPI];
496 // Number of entries per constpool index should be small, just do a
497 // linear search.
498 for (unsigned i = 0, e = CPEs.size(); i != e; ++i) {
499 if (CPEs[i].CPEMI == CPEMI)
500 return &CPEs[i];
501 }
502 return NULL;
503 }
504
505 /// Returns the required alignment of the constant pool entry represented by
506 /// CPEMI. Alignment is measured in log2(bytes) units.
507 unsigned AArch64ConstantIslands::getCPELogAlign(const MachineInstr *CPEMI) {
508 assert(CPEMI && CPEMI->getOpcode() == AArch64::CONSTPOOL_ENTRY);
509
510 // Everything is 4-byte aligned unless AlignConstantIslands is set.
511 if (!AlignConstantIslands)
512 return 2;
513
514 unsigned CPI = CPEMI->getOperand(1).getIndex();
515 assert(CPI < MCP->getConstants().size() && "Invalid constant pool index.");
516 unsigned Align = MCP->getConstants()[CPI].getAlignment();
517 assert(isPowerOf2_32(Align) && "Invalid CPE alignment");
518 return Log2_32(Align);
519 }
520
521 /// Do the initial scan of the function, building up information about the sizes
522 /// of each block, the location of all the water, and finding all of the
523 /// constant pool users.
524 void AArch64ConstantIslands::
525 initializeFunctionInfo(const std::vector &CPEMIs) {
526 BBInfo.clear();
527 BBInfo.resize(MF->getNumBlockIDs());
528
529 // First thing, compute the size of all basic blocks, and see if the function
530 // has any inline assembly in it. If so, we have to be conservative about
531 // alignment assumptions, as we don't know for sure the size of any
532 // instructions in the inline assembly.
533 for (MachineFunction::iterator I = MF->begin(), E = MF->end(); I != E; ++I)
534 computeBlockSize(I);
535
536 // The known bits of the entry block offset are determined by the function
537 // alignment.
538 BBInfo.front().KnownBits = MF->getAlignment();
539
540 // Compute block offsets and known bits.
541 adjustBBOffsetsAfter(MF->begin());
542
543 // Now go back through the instructions and build up our data structures.
544 for (MachineFunction::iterator MBBI = MF->begin(), E = MF->end();
545 MBBI != E; ++MBBI) {
546 MachineBasicBlock &MBB = *MBBI;
547
548 // If this block doesn't fall through into the next MBB, then this is
549 // 'water' that a constant pool island could be placed.
550 if (!BBHasFallthrough(&MBB))
551 WaterList.push_back(&MBB);
552
553 for (MachineBasicBlock::iterator I = MBB.begin(), E = MBB.end();
554 I != E; ++I) {
555 if (I->isDebugValue())
556 continue;
557
558 int Opc = I->getOpcode();
559 if (I->isBranch()) {
560 bool IsCond = false;
561
562 // The offsets encoded in instructions here scale by the instruction
563 // size (4 bytes), effectively increasing their range by 2 bits.
564 unsigned Bits = 0;
565 switch (Opc) {
566 default:
567 continue; // Ignore other JT branches
568 case AArch64::TBZxii:
569 case AArch64::TBZwii:
570 case AArch64::TBNZxii:
571 case AArch64::TBNZwii:
572 IsCond = true;
573 Bits = 14 + 2;
574 break;
575 case AArch64::Bcc:
576 case AArch64::CBZx:
577 case AArch64::CBZw:
578 case AArch64::CBNZx:
579 case AArch64::CBNZw:
580 IsCond = true;
581 Bits = 19 + 2;
582 break;
583 case AArch64::Bimm:
584 Bits = 26 + 2;
585 break;
586 }
587
588 // Record this immediate branch.
589 ImmBranches.push_back(ImmBranch(I, Bits, IsCond));
590 }
591
592 if (Opc == AArch64::CONSTPOOL_ENTRY)
593 continue;
594
595 // Scan the instructions for constant pool operands.
596 for (unsigned op = 0, e = I->getNumOperands(); op != e; ++op)
597 if (I->getOperand(op).isCPI()) {
598 // We found one. The addressing mode tells us the max displacement
599 // from the PC that this instruction permits.
600
601 // The offsets encoded in instructions here scale by the instruction
602 // size (4 bytes), effectively increasing their range by 2 bits.
603 unsigned Bits = 0;
604
605 switch (Opc) {
606 default:
607 llvm_unreachable("Unknown addressing mode for CP reference!");
608
609 case AArch64::LDRw_lit:
610 case AArch64::LDRx_lit:
611 case AArch64::LDRs_lit:
612 case AArch64::LDRd_lit:
613 case AArch64::LDRq_lit:
614 case AArch64::LDRSWx_lit:
615 case AArch64::PRFM_lit:
616 Bits = 19 + 2;
617 }
618
619 // Remember that this is a user of a CP entry.
620 unsigned CPI = I->getOperand(op).getIndex();
621 MachineInstr *CPEMI = CPEMIs[CPI];
622 CPUsers.push_back(CPUser(I, CPEMI, Bits));
623
624 // Increment corresponding CPEntry reference count.
625 CPEntry *CPE = findConstPoolEntry(CPI, CPEMI);
626 assert(CPE && "Cannot find a corresponding CPEntry!");
627 CPE->RefCount++;
628
629 // Instructions can only use one CP entry, don't bother scanning the
630 // rest of the operands.
631 break;
632 }
633 }
634 }
635 }
636
637 /// Compute the size and some alignment information for MBB. This function
638 /// updates BBInfo directly.
639 void AArch64ConstantIslands::computeBlockSize(MachineBasicBlock *MBB) {
640 BasicBlockInfo &BBI = BBInfo[MBB->getNumber()];
641 BBI.Size = 0;
642 BBI.Unalign = 0;
643
644 for (MachineBasicBlock::iterator I = MBB->begin(), E = MBB->end(); I != E;
645 ++I) {
646 BBI.Size += TII->getInstSizeInBytes(*I);
647 // For inline asm, GetInstSizeInBytes returns a conservative estimate.
648 // The actual size may be smaller, but still a multiple of the instr size.
649 if (I->isInlineAsm())
650 BBI.Unalign = 2;
651 }
652 }
653
654 /// Return the current offset of the specified machine instruction from the
655 /// start of the function. This offset changes as stuff is moved around inside
656 /// the function.
657 unsigned AArch64ConstantIslands::getOffsetOf(MachineInstr *MI) const {
658 MachineBasicBlock *MBB = MI->getParent();
659
660 // The offset is composed of two things: the sum of the sizes of all MBB's
661 // before this instruction's block, and the offset from the start of the block
662 // it is in.
663 unsigned Offset = BBInfo[MBB->getNumber()].Offset;
664
665 // Sum instructions before MI in MBB.
666 for (MachineBasicBlock::iterator I = MBB->begin(); &*I != MI; ++I) {
667 assert(I != MBB->end() && "Didn't find MI in its own basic block?");
668 Offset += TII->getInstSizeInBytes(*I);
669 }
670 return Offset;
671 }
672
673 /// Little predicate function to sort the WaterList by MBB ID.
674 static bool CompareMBBNumbers(const MachineBasicBlock *LHS,
675 const MachineBasicBlock *RHS) {
676 return LHS->getNumber() < RHS->getNumber();
677 }
678
679 /// When a block is newly inserted into the machine function, it upsets all of
680 /// the block numbers. Renumber the blocks and update the arrays that parallel
681 /// this numbering.
682 void AArch64ConstantIslands::
683 updateForInsertedWaterBlock(MachineBasicBlock *NewBB) {
684 // Renumber the MBB's to keep them consecutive.
685 NewBB->getParent()->RenumberBlocks(NewBB);
686
687 // Insert an entry into BBInfo to align it properly with the (newly
688 // renumbered) block numbers.
689 BBInfo.insert(BBInfo.begin() + NewBB->getNumber(), BasicBlockInfo());
690
691 // Next, update WaterList. Specifically, we need to add NewMBB as having
692 // available water after it.
693 water_iterator IP =
694 std::lower_bound(WaterList.begin(), WaterList.end(), NewBB,
695 CompareMBBNumbers);
696 WaterList.insert(IP, NewBB);
697 }
698
699
700 /// Split the basic block containing MI into two blocks, which are joined by
701 /// an unconditional branch. Update data structures and renumber blocks to
702 /// account for this change and returns the newly created block.
703 MachineBasicBlock *
704 AArch64ConstantIslands::splitBlockBeforeInstr(MachineInstr *MI) {
705 MachineBasicBlock *OrigBB = MI->getParent();
706
707 // Create a new MBB for the code after the OrigBB.
708 MachineBasicBlock *NewBB =
709 MF->CreateMachineBasicBlock(OrigBB->getBasicBlock());
710 MachineFunction::iterator MBBI = OrigBB; ++MBBI;
711 MF->insert(MBBI, NewBB);
712
713 // Splice the instructions starting with MI over to NewBB.
714 NewBB->splice(NewBB->end(), OrigBB, MI, OrigBB->end());
715
716 // Add an unconditional branch from OrigBB to NewBB.
717 // Note the new unconditional branch is not being recorded.
718 // There doesn't seem to be meaningful DebugInfo available; this doesn't
719 // correspond to anything in the source.
720 BuildMI(OrigBB, DebugLoc(), TII->get(AArch64::Bimm)).addMBB(NewBB);
721 ++NumSplit;
722
723 // Update the CFG. All succs of OrigBB are now succs of NewBB.
724 NewBB->transferSuccessors(OrigBB);
725
726 // OrigBB branches to NewBB.
727 OrigBB->addSuccessor(NewBB);
728
729 // Update internal data structures to account for the newly inserted MBB.
730 // This is almost the same as updateForInsertedWaterBlock, except that
731 // the Water goes after OrigBB, not NewBB.
732 MF->RenumberBlocks(NewBB);
733
734 // Insert an entry into BBInfo to align it properly with the (newly
735 // renumbered) block numbers.
736 BBInfo.insert(BBInfo.begin() + NewBB->getNumber(), BasicBlockInfo());
737
738 // Next, update WaterList. Specifically, we need to add OrigMBB as having
739 // available water after it (but not if it's already there, which happens
740 // when splitting before a conditional branch that is followed by an
741 // unconditional branch - in that case we want to insert NewBB).
742 water_iterator IP =
743 std::lower_bound(WaterList.begin(), WaterList.end(), OrigBB,
744 CompareMBBNumbers);
745 MachineBasicBlock* WaterBB = *IP;
746 if (WaterBB == OrigBB)
747 WaterList.insert(llvm::next(IP), NewBB);
748 else
749 WaterList.insert(IP, OrigBB);
750 NewWaterList.insert(OrigBB);
751
752 // Figure out how large the OrigBB is. As the first half of the original
753 // block, it cannot contain a tablejump. The size includes
754 // the new jump we added. (It should be possible to do this without
755 // recounting everything, but it's very confusing, and this is rarely
756 // executed.)
757 computeBlockSize(OrigBB);
758
759 // Figure out how large the NewMBB is. As the second half of the original
760 // block, it may contain a tablejump.
761 computeBlockSize(NewBB);
762
763 // All BBOffsets following these blocks must be modified.
764 adjustBBOffsetsAfter(OrigBB);
765
766 return NewBB;
767 }
768
769 /// Compute the offset of U.MI as seen by the hardware displacement computation.
770 unsigned AArch64ConstantIslands::getUserOffset(CPUser &U) const {
771 return getOffsetOf(U.MI);
772 }
773
774 /// Checks whether UserOffset (the location of a constant pool reference) is
775 /// within OffsetBits of TrialOffset (a proposed location of a constant pool
776 /// entry).
777 bool AArch64ConstantIslands::isOffsetInRange(unsigned UserOffset,
778 unsigned TrialOffset,
779 unsigned OffsetBits) {
780 return isIntN(OffsetBits, static_cast(TrialOffset) - UserOffset);
781 }
782
783 /// Returns true if a CPE placed after the specified Water (a basic block) will
784 /// be in range for the specific MI.
785 ///
786 /// Compute how much the function will grow by inserting a CPE after Water.
787 bool AArch64ConstantIslands::isWaterInRange(unsigned UserOffset,
788 MachineBasicBlock* Water, CPUser &U,
789 unsigned &Growth) {
790 unsigned CPELogAlign = getCPELogAlign(U.CPEMI);
791 unsigned CPEOffset = BBInfo[Water->getNumber()].postOffset(CPELogAlign);
792 unsigned NextBlockOffset, NextBlockAlignment;
793 MachineFunction::const_iterator NextBlock = Water;
794 if (++NextBlock == MF->end()) {
795 NextBlockOffset = BBInfo[Water->getNumber()].postOffset();
796 NextBlockAlignment = 0;
797 } else {
798 NextBlockOffset = BBInfo[NextBlock->getNumber()].Offset;
799 NextBlockAlignment = NextBlock->getAlignment();
800 }
801 unsigned Size = U.CPEMI->getOperand(2).getImm();
802 unsigned CPEEnd = CPEOffset + Size;
803
804 // The CPE may be able to hide in the alignment padding before the next
805 // block. It may also cause more padding to be required if it is more aligned
806 // that the next block.
807 if (CPEEnd > NextBlockOffset) {
808 Growth = CPEEnd - NextBlockOffset;
809 // Compute the padding that would go at the end of the CPE to align the next
810 // block.
811 Growth += OffsetToAlignment(CPEEnd, 1u << NextBlockAlignment);
812
813 // If the CPE is to be inserted before the instruction, that will raise
814 // the offset of the instruction. Also account for unknown alignment padding
815 // in blocks between CPE and the user.
816 if (CPEOffset < UserOffset)
817 UserOffset += Growth + UnknownPadding(MF->getAlignment(), CPELogAlign);
818 } else
819 // CPE fits in existing padding.
820 Growth = 0;
821
822 return isOffsetInRange(UserOffset, CPEOffset, U);
823 }
824
825 /// Returns true if the distance between specific MI and specific ConstPool
826 /// entry instruction can fit in MI's displacement field.
827 bool AArch64ConstantIslands::isCPEntryInRange(MachineInstr *MI,
828 unsigned UserOffset,
829 MachineInstr *CPEMI,
830 unsigned OffsetBits, bool DoDump) {
831 unsigned CPEOffset = getOffsetOf(CPEMI);
832
833 if (DoDump) {
834 DEBUG({
835 unsigned Block = MI->getParent()->getNumber();
836 const BasicBlockInfo &BBI = BBInfo[Block];
837 dbgs() << "User of CPE#" << CPEMI->getOperand(0).getImm()
838 << " bits available=" << OffsetBits
839 << format(" insn address=%#x", UserOffset)
840 << " in BB#" << Block << ": "
841 << format("%#x-%x\t", BBI.Offset, BBI.postOffset()) << *MI
842 << format("CPE address=%#x offset=%+d: ", CPEOffset,
843 int(CPEOffset-UserOffset));
844 });
845 }
846
847 return isOffsetInRange(UserOffset, CPEOffset, OffsetBits);
848 }
849
850 #ifndef NDEBUG
851 /// Return true of the specified basic block's only predecessor unconditionally
852 /// branches to its only successor.
853 static bool BBIsJumpedOver(MachineBasicBlock *MBB) {
854 if (MBB->pred_size() != 1 || MBB->succ_size() != 1)
855 return false;
856
857 MachineBasicBlock *Succ = *MBB->succ_begin();
858 MachineBasicBlock *Pred = *MBB->pred_begin();
859 MachineInstr *PredMI = &Pred->back();
860 if (PredMI->getOpcode() == AArch64::Bimm)
861 return PredMI->getOperand(0).getMBB() == Succ;
862 return false;
863 }
864 #endif // NDEBUG
865
866 void AArch64ConstantIslands::adjustBBOffsetsAfter(MachineBasicBlock *BB) {
867 unsigned BBNum = BB->getNumber();
868 for(unsigned i = BBNum + 1, e = MF->getNumBlockIDs(); i < e; ++i) {
869 // Get the offset and known bits at the end of the layout predecessor.
870 // Include the alignment of the current block.
871 unsigned LogAlign = MF->getBlockNumbered(i)->getAlignment();
872 unsigned Offset = BBInfo[i - 1].postOffset(LogAlign);
873 unsigned KnownBits = BBInfo[i - 1].postKnownBits(LogAlign);
874
875 // This is where block i begins. Stop if the offset is already correct,
876 // and we have updated 2 blocks. This is the maximum number of blocks
877 // changed before calling this function.
878 if (i > BBNum + 2 &&
879 BBInfo[i].Offset == Offset &&
880 BBInfo[i].KnownBits == KnownBits)
881 break;
882
883 BBInfo[i].Offset = Offset;
884 BBInfo[i].KnownBits = KnownBits;
885 }
886 }
887
888 /// Find the constant pool entry with index CPI and instruction CPEMI, and
889 /// decrement its refcount. If the refcount becomes 0 remove the entry and
890 /// instruction. Returns true if we removed the entry, false if we didn't.
891 bool AArch64ConstantIslands::decrementCPEReferenceCount(unsigned CPI,
892 MachineInstr *CPEMI) {
893 // Find the old entry. Eliminate it if it is no longer used.
894 CPEntry *CPE = findConstPoolEntry(CPI, CPEMI);
895 assert(CPE && "Unexpected!");
896 if (--CPE->RefCount == 0) {
897 removeDeadCPEMI(CPEMI);
898 CPE->CPEMI = NULL;
899 --NumCPEs;
900 return true;
901 }
902 return false;
903 }
904
905 /// See if the currently referenced CPE is in range; if not, see if an in-range
906 /// clone of the CPE is in range, and if so, change the data structures so the
907 /// user references the clone. Returns:
908 /// 0 = no existing entry found
909 /// 1 = entry found, and there were no code insertions or deletions
910 /// 2 = entry found, and there were code insertions or deletions
911 int AArch64ConstantIslands::findInRangeCPEntry(CPUser& U, unsigned UserOffset)
912 {
913 MachineInstr *UserMI = U.MI;
914 MachineInstr *CPEMI = U.CPEMI;
915
916 // Check to see if the CPE is already in-range.
917 if (isCPEntryInRange(UserMI, UserOffset, CPEMI, U.getOffsetBits(), true)) {
918 DEBUG(dbgs() << "In range\n");
919 return 1;
920 }
921
922 // No. Look for previously created clones of the CPE that are in range.
923 unsigned CPI = CPEMI->getOperand(1).getIndex();
924 std::vector &CPEs = CPEntries[CPI];
925 for (unsigned i = 0, e = CPEs.size(); i != e; ++i) {
926 // We already tried this one
927 if (CPEs[i].CPEMI == CPEMI)
928 continue;
929 // Removing CPEs can leave empty entries, skip
930 if (CPEs[i].CPEMI == NULL)
931 continue;
932 if (isCPEntryInRange(UserMI, UserOffset, CPEs[i].CPEMI, U.getOffsetBits())) {
933 DEBUG(dbgs() << "Replacing CPE#" << CPI << " with CPE#"
934 << CPEs[i].CPI << "\n");
935 // Point the CPUser node to the replacement
936 U.CPEMI = CPEs[i].CPEMI;
937 // Change the CPI in the instruction operand to refer to the clone.
938 for (unsigned j = 0, e = UserMI->getNumOperands(); j != e; ++j)
939 if (UserMI->getOperand(j).isCPI()) {
940 UserMI->getOperand(j).setIndex(CPEs[i].CPI);
941 break;
942 }
943 // Adjust the refcount of the clone...
944 CPEs[i].RefCount++;
945 // ...and the original. If we didn't remove the old entry, none of the
946 // addresses changed, so we don't need another pass.
947 return decrementCPEReferenceCount(CPI, CPEMI) ? 2 : 1;
948 }
949 }
950 return 0;
951 }
952
953 /// Look for an existing entry in the WaterList in which we can place the CPE
954 /// referenced from U so it's within range of U's MI. Returns true if found,
955 /// false if not. If it returns true, WaterIter is set to the WaterList
956 /// entry. To ensure that this pass terminates, the CPE location for a
957 /// particular CPUser is only allowed to move to a lower address, so search
958 /// backward from the end of the list and prefer the first water that is in
959 /// range.
960 bool AArch64ConstantIslands::findAvailableWater(CPUser &U, unsigned UserOffset,
961 water_iterator &WaterIter) {
962 if (WaterList.empty())
963 return false;
964
965 unsigned BestGrowth = ~0u;
966 for (water_iterator IP = prior(WaterList.end()), B = WaterList.begin();;
967 --IP) {
968 MachineBasicBlock* WaterBB = *IP;
969 // Check if water is in range and is either at a lower address than the
970 // current "high water mark" or a new water block that was created since
971 // the previous iteration by inserting an unconditional branch. In the
972 // latter case, we want to allow resetting the high water mark back to
973 // this new water since we haven't seen it before. Inserting branches
974 // should be relatively uncommon and when it does happen, we want to be
975 // sure to take advantage of it for all the CPEs near that block, so that
976 // we don't insert more branches than necessary.
977 unsigned Growth;
978 if (isWaterInRange(UserOffset, WaterBB, U, Growth) &&
979 (WaterBB->getNumber() < U.HighWaterMark->getNumber() ||
980 NewWaterList.count(WaterBB)) && Growth < BestGrowth) {
981 // This is the least amount of required padding seen so far.
982 BestGrowth = Growth;
983 WaterIter = IP;
984 DEBUG(dbgs() << "Found water after BB#" << WaterBB->getNumber()
985 << " Growth=" << Growth << '\n');
986
987 // Keep looking unless it is perfect.
988 if (BestGrowth == 0)
989 return true;
990 }
991 if (IP == B)
992 break;
993 }
994 return BestGrowth != ~0u;
995 }
996
997 /// No existing WaterList entry will work for CPUsers[CPUserIndex], so create a
998 /// place to put the CPE. The end of the block is used if in range, and the
999 /// conditional branch munged so control flow is correct. Otherwise the block
1000 /// is split to create a hole with an unconditional branch around it. In either
1001 /// case NewMBB is set to a block following which the new island can be inserted
1002 /// (the WaterList is not adjusted).
1003 void AArch64ConstantIslands::createNewWater(unsigned CPUserIndex,
1004 unsigned UserOffset,
1005 MachineBasicBlock *&NewMBB) {
1006 CPUser &U = CPUsers[CPUserIndex];
1007 MachineInstr *UserMI = U.MI;
1008 MachineInstr *CPEMI = U.CPEMI;
1009 unsigned CPELogAlign = getCPELogAlign(CPEMI);
1010 MachineBasicBlock *UserMBB = UserMI->getParent();
1011 const BasicBlockInfo &UserBBI = BBInfo[UserMBB->getNumber()];
1012
1013 // If the block does not end in an unconditional branch already, and if the
1014 // end of the block is within range, make new water there.
1015 if (BBHasFallthrough(UserMBB)) {
1016 // Size of branch to insert.
1017 unsigned InstrSize = 4;
1018 // Compute the offset where the CPE will begin.
1019 unsigned CPEOffset = UserBBI.postOffset(CPELogAlign) + InstrSize;
1020
1021 if (isOffsetInRange(UserOffset, CPEOffset, U)) {
1022 DEBUG(dbgs() << "Split at end of BB#" << UserMBB->getNumber()
1023 << format(", expected CPE offset %#x\n", CPEOffset));
1024 NewMBB = llvm::next(MachineFunction::iterator(UserMBB));
1025 // Add an unconditional branch from UserMBB to fallthrough block. Record
1026 // it for branch lengthening; this new branch will not get out of range,
1027 // but if the preceding conditional branch is out of range, the targets
1028 // will be exchanged, and the altered branch may be out of range, so the
1029 // machinery has to know about it.
1030 BuildMI(UserMBB, DebugLoc(), TII->get(AArch64::Bimm)).addMBB(NewMBB);
1031
1032 // 26 bits written down, specifying a multiple of 4.
1033 unsigned OffsetBits = 26 + 2;
1034 ImmBranches.push_back(ImmBranch(&UserMBB->back(), OffsetBits, false));
1035 BBInfo[UserMBB->getNumber()].Size += InstrSize;
1036 adjustBBOffsetsAfter(UserMBB);
1037 return;
1038 }
1039 }
1040
1041 // What a big block. Find a place within the block to split it. We make a
1042 // first guess, then walk through the instructions between the one currently
1043 // being looked at and the possible insertion point, and make sure any other
1044 // instructions that reference CPEs will be able to use the same island area;
1045 // if not, we back up the insertion point.
1046
1047 // Try to split the block so it's fully aligned. Compute the latest split
1048 // point where we can add a 4-byte branch instruction, and then align to
1049 // LogAlign which is the largest possible alignment in the function.
1050 unsigned LogAlign = MF->getAlignment();
1051 assert(LogAlign >= CPELogAlign && "Over-aligned constant pool entry");
1052 unsigned KnownBits = UserBBI.internalKnownBits();
1053 unsigned UPad = UnknownPadding(LogAlign, KnownBits);
1054 unsigned BaseInsertOffset = UserOffset + U.getMaxPosDisp() - UPad;
1055 DEBUG(dbgs() << format("Split in middle of big block before %#x",
1056 BaseInsertOffset));
1057
1058 // The 4 in the following is for the unconditional branch we'll be inserting
1059 // Alignment of the island is handled inside isOffsetInRange.
1060 BaseInsertOffset -= 4;
1061
1062 DEBUG(dbgs() << format(", adjusted to %#x", BaseInsertOffset)
1063 << " la=" << LogAlign
1064 << " kb=" << KnownBits
1065 << " up=" << UPad << '\n');
1066
1067 // This could point off the end of the block if we've already got constant
1068 // pool entries following this block; only the last one is in the water list.
1069 // Back past any possible branches (allow for a conditional and a maximally
1070 // long unconditional).
1071 if (BaseInsertOffset + 8 >= UserBBI.postOffset()) {
1072 BaseInsertOffset = UserBBI.postOffset() - UPad - 8;
1073 DEBUG(dbgs() << format("Move inside block: %#x\n", BaseInsertOffset));
1074 }
1075 unsigned EndInsertOffset = BaseInsertOffset + 4 + UPad +
1076 CPEMI->getOperand(2).getImm();
1077 MachineBasicBlock::iterator MI = UserMI;
1078 ++MI;
1079 unsigned CPUIndex = CPUserIndex+1;
1080 unsigned NumCPUsers = CPUsers.size();
1081 for (unsigned Offset = UserOffset+TII->getInstSizeInBytes(*UserMI);
1082 Offset < BaseInsertOffset;
1083 Offset += TII->getInstSizeInBytes(*MI),
1084 MI = llvm::next(MI)) {
1085 assert(MI != UserMBB->end() && "Fell off end of block");
1086 if (CPUIndex < NumCPUsers && CPUsers[CPUIndex].MI == MI) {
1087 CPUser &U = CPUsers[CPUIndex];
1088 if (!isOffsetInRange(Offset, EndInsertOffset, U)) {
1089 // Shift intertion point by one unit of alignment so it is within reach.
1090 BaseInsertOffset -= 1u << LogAlign;
1091 EndInsertOffset -= 1u << LogAlign;
1092 }
1093 // This is overly conservative, as we don't account for CPEMIs being
1094 // reused within the block, but it doesn't matter much. Also assume CPEs
1095 // are added in order with alignment padding. We may eventually be able
1096 // to pack the aligned CPEs better.
1097 EndInsertOffset += U.CPEMI->getOperand(2).getImm();
1098 CPUIndex++;
1099 }
1100 }
1101
1102 --MI;
1103 NewMBB = splitBlockBeforeInstr(MI);
1104 }
1105
1106 /// Analyze the specified user, checking to see if it is out-of-range. If so,
1107 /// pick up the constant pool value and move it some place in-range. Return
1108 /// true if we changed any addresses, false otherwise.
1109 bool AArch64ConstantIslands::handleConstantPoolUser(unsigned CPUserIndex) {
1110 CPUser &U = CPUsers[CPUserIndex];
1111 MachineInstr *UserMI = U.MI;
1112 MachineInstr *CPEMI = U.CPEMI;
1113 unsigned CPI = CPEMI->getOperand(1).getIndex();
1114 unsigned Size = CPEMI->getOperand(2).getImm();
1115 // Compute this only once, it's expensive.
1116 unsigned UserOffset = getUserOffset(U);
1117
1118 // See if the current entry is within range, or there is a clone of it
1119 // in range.
1120 int result = findInRangeCPEntry(U, UserOffset);
1121 if (result==1) return false;
1122 else if (result==2) return true;
1123
1124 // No existing clone of this CPE is within range.
1125 // We will be generating a new clone. Get a UID for it.
1126 unsigned ID = AFI->createPICLabelUId();
1127
1128 // Look for water where we can place this CPE.
1129 MachineBasicBlock *NewIsland = MF->CreateMachineBasicBlock();
1130 MachineBasicBlock *NewMBB;
1131 water_iterator IP;
1132 if (findAvailableWater(U, UserOffset, IP)) {
1133 DEBUG(dbgs() << "Found water in range\n");
1134 MachineBasicBlock *WaterBB = *IP;
1135
1136 // If the original WaterList entry was "new water" on this iteration,
1137 // propagate that to the new island. This is just keeping NewWaterList
1138 // updated to match the WaterList, which will be updated below.
1139 if (NewWaterList.count(WaterBB)) {
1140 NewWaterList.erase(WaterBB);
1141 NewWaterList.insert(NewIsland);
1142 }
1143 // The new CPE goes before the following block (NewMBB).
1144 NewMBB = llvm::next(MachineFunction::iterator(WaterBB));
1145
1146 } else {
1147 // No water found.
1148 DEBUG(dbgs() << "No water found\n");
1149 createNewWater(CPUserIndex, UserOffset, NewMBB);
1150
1151 // splitBlockBeforeInstr adds to WaterList, which is important when it is
1152 // called while handling branches so that the water will be seen on the
1153 // next iteration for constant pools, but in this context, we don't want
1154 // it. Check for this so it will be removed from the WaterList.
1155 // Also remove any entry from NewWaterList.
1156 MachineBasicBlock *WaterBB = prior(MachineFunction::iterator(NewMBB));
1157 IP = std::find(WaterList.begin(), WaterList.end(), WaterBB);
1158 if (IP != WaterList.end())
1159 NewWaterList.erase(WaterBB);
1160
1161 // We are adding new water. Update NewWaterList.
1162 NewWaterList.insert(NewIsland);
1163 }
1164
1165 // Remove the original WaterList entry; we want subsequent insertions in
1166 // this vicinity to go after the one we're about to insert. This
1167 // considerably reduces the number of times we have to move the same CPE
1168 // more than once and is also important to ensure the algorithm terminates.
1169 if (IP != WaterList.end())
1170 WaterList.erase(IP);
1171
1172 // Okay, we know we can put an island before NewMBB now, do it!
1173 MF->insert(NewMBB, NewIsland);
1174
1175 // Update internal data structures to account for the newly inserted MBB.
1176 updateForInsertedWaterBlock(NewIsland);
1177
1178 // Decrement the old entry, and remove it if refcount becomes 0.
1179 decrementCPEReferenceCount(CPI, CPEMI);
1180
1181 // Now that we have an island to add the CPE to, clone the original CPE and
1182 // add it to the island.
1183 U.HighWaterMark = NewIsland;
1184 U.CPEMI = BuildMI(NewIsland, DebugLoc(), TII->get(AArch64::CONSTPOOL_ENTRY))
1185 .addImm(ID).addConstantPoolIndex(CPI).addImm(Size);
1186 CPEntries[CPI].push_back(CPEntry(U.CPEMI, ID, 1));
1187 ++NumCPEs;
1188
1189 // Mark the basic block as aligned as required by the const-pool entry.
1190 NewIsland->setAlignment(getCPELogAlign(U.CPEMI));
1191
1192 // Increase the size of the island block to account for the new entry.
1193 BBInfo[NewIsland->getNumber()].Size += Size;
1194 adjustBBOffsetsAfter(llvm::prior(MachineFunction::iterator(NewIsland)));
1195
1196 // Finally, change the CPI in the instruction operand to be ID.
1197 for (unsigned i = 0, e = UserMI->getNumOperands(); i != e; ++i)
1198 if (UserMI->getOperand(i).isCPI()) {
1199 UserMI->getOperand(i).setIndex(ID);
1200 break;
1201 }
1202
1203 DEBUG(dbgs() << " Moved CPE to #" << ID << " CPI=" << CPI
1204 << format(" offset=%#x\n", BBInfo[NewIsland->getNumber()].Offset));
1205
1206 return true;
1207 }
1208
1209 /// Remove a dead constant pool entry instruction. Update sizes and offsets of
1210 /// impacted basic blocks.
1211 void AArch64ConstantIslands::removeDeadCPEMI(MachineInstr *CPEMI) {
1212 MachineBasicBlock *CPEBB = CPEMI->getParent();
1213 unsigned Size = CPEMI->getOperand(2).getImm();
1214 CPEMI->eraseFromParent();
1215 BBInfo[CPEBB->getNumber()].Size -= Size;
1216 // All succeeding offsets have the current size value added in, fix this.
1217 if (CPEBB->empty()) {
1218 BBInfo[CPEBB->getNumber()].Size = 0;
1219
1220 // This block no longer needs to be aligned. .
1221 CPEBB->setAlignment(0);
1222 } else
1223 // Entries are sorted by descending alignment, so realign from the front.
1224 CPEBB->setAlignment(getCPELogAlign(CPEBB->begin()));
1225
1226 adjustBBOffsetsAfter(CPEBB);
1227 // An island has only one predecessor BB and one successor BB. Check if
1228 // this BB's predecessor jumps directly to this BB's successor. This
1229 // shouldn't happen currently.
1230 assert(!BBIsJumpedOver(CPEBB) && "How did this happen?");
1231 // FIXME: remove the empty blocks after all the work is done?
1232 }
1233
1234 /// Remove constant pool entries whose refcounts are zero.
1235 bool AArch64ConstantIslands::removeUnusedCPEntries() {
1236 unsigned MadeChange = false;
1237 for (unsigned i = 0, e = CPEntries.size(); i != e; ++i) {
1238 std::vector &CPEs = CPEntries[i];
1239 for (unsigned j = 0, ee = CPEs.size(); j != ee; ++j) {
1240 if (CPEs[j].RefCount == 0 && CPEs[j].CPEMI) {
1241 removeDeadCPEMI(CPEs[j].CPEMI);
1242 CPEs[j].CPEMI = NULL;
1243 MadeChange = true;
1244 }
1245 }
1246 }
1247 return MadeChange;
1248 }
1249
1250 /// Returns true if the distance between specific MI and specific BB can fit in
1251 /// MI's displacement field.
1252 bool AArch64ConstantIslands::isBBInRange(MachineInstr *MI,
1253 MachineBasicBlock *DestBB,
1254 unsigned OffsetBits) {
1255 int64_t BrOffset = getOffsetOf(MI);
1256 int64_t DestOffset = BBInfo[DestBB->getNumber()].Offset;
1257
1258 DEBUG(dbgs() << "Branch of destination BB#" << DestBB->getNumber()
1259 << " from BB#" << MI->getParent()->getNumber()
1260 << " bits available=" << OffsetBits
1261 << " from " << getOffsetOf(MI) << " to " << DestOffset
1262 << " offset " << int(DestOffset-BrOffset) << "\t" << *MI);
1263
1264 return isIntN(OffsetBits, DestOffset - BrOffset);
1265 }
1266
1267 /// Fix up an immediate branch whose destination is too far away to fit in its
1268 /// displacement field.
1269 bool AArch64ConstantIslands::fixupImmediateBr(ImmBranch &Br) {
1270 MachineInstr *MI = Br.MI;
1271 MachineBasicBlock *DestBB = 0;
1272 for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
1273 if (MI->getOperand(i).isMBB()) {
1274 DestBB = MI->getOperand(i).getMBB();
1275 break;
1276 }
1277 }
1278 assert(DestBB && "Branch with no destination BB?");
1279
1280 // Check to see if the DestBB is already in-range.
1281 if (isBBInRange(MI, DestBB, Br.OffsetBits))
1282 return false;
1283
1284 assert(Br.IsCond && "Only conditional branches should need fixup");
1285 return fixupConditionalBr(Br);
1286 }
1287
1288 /// Fix up a conditional branch whose destination is too far away to fit in its
1289 /// displacement field. It is converted to an inverse conditional branch + an
1290 /// unconditional branch to the destination.
1291 bool
1292 AArch64ConstantIslands::fixupConditionalBr(ImmBranch &Br) {
1293 MachineInstr *MI = Br.MI;
1294 MachineBasicBlock *MBB = MI->getParent();
1295 unsigned CondBrMBBOperand = 0;
1296
1297 // The general idea is to add an unconditional branch to the destination and
1298 // invert the conditional branch to jump over it. Complications occur around
1299 // fallthrough and unreachable ends to the block.
1300 // b.lt L1
1301 // =>
1302 // b.ge L2
1303 // b L1
1304 // L2:
1305
1306 // First we invert the conditional branch, by creating a replacement if
1307 // necessary. This if statement contains all the special handling of different
1308 // branch types.
1309 if (MI->getOpcode() == AArch64::Bcc) {
1310 // The basic block is operand number 1 for Bcc
1311 CondBrMBBOperand = 1;
1312
1313 A64CC::CondCodes CC = (A64CC::CondCodes)MI->getOperand(0).getImm();
1314 CC = A64InvertCondCode(CC);
1315 MI->getOperand(0).setImm(CC);
1316 } else {
1317 MachineInstrBuilder InvertedMI;
1318 int InvertedOpcode;
1319 switch (MI->getOpcode()) {
1320 default: llvm_unreachable("Unknown branch type");
1321 case AArch64::TBZxii: InvertedOpcode = AArch64::TBNZxii; break;
1322 case AArch64::TBZwii: InvertedOpcode = AArch64::TBNZwii; break;
1323 case AArch64::TBNZxii: InvertedOpcode = AArch64::TBZxii; break;
1324 case AArch64::TBNZwii: InvertedOpcode = AArch64::TBZwii; break;
1325 case AArch64::CBZx: InvertedOpcode = AArch64::CBNZx; break;
1326 case AArch64::CBZw: InvertedOpcode = AArch64::CBNZw; break;
1327 case AArch64::CBNZx: InvertedOpcode = AArch64::CBZx; break;
1328 case AArch64::CBNZw: InvertedOpcode = AArch64::CBZw; break;
1329 }
1330
1331 InvertedMI = BuildMI(*MBB, MI, MI->getDebugLoc(), TII->get(InvertedOpcode));
1332 for (unsigned i = 0, e= MI->getNumOperands(); i != e; ++i) {
1333 InvertedMI.addOperand(MI->getOperand(i));
1334 if (MI->getOperand(i).isMBB())
1335 CondBrMBBOperand = i;
1336 }
1337
1338 MI->eraseFromParent();
1339 MI = Br.MI = InvertedMI;
1340 }
1341
1342 // If the branch is at the end of its MBB and that has a fall-through block,
1343 // direct the updated conditional branch to the fall-through
1344 // block. Otherwise, split the MBB before the next instruction.
1345 MachineInstr *BMI = &MBB->back();
1346 bool NeedSplit = (BMI != MI) || !BBHasFallthrough(MBB);
1347
1348 ++NumCBrFixed;
1349 if (BMI != MI) {
1350 if (llvm::next(MachineBasicBlock::iterator(MI)) == prior(MBB->end()) &&
1351 BMI->getOpcode() == AArch64::Bimm) {
1352 // Last MI in the BB is an unconditional branch. We can swap destinations:
1353 // b.eq L1 (temporarily b.ne L1 after first change)
1354 // b L2
1355 // =>
1356 // b.ne L2
1357 // b L1
1358 MachineBasicBlock *NewDest = BMI->getOperand(0).getMBB();
1359 if (isBBInRange(MI, NewDest, Br.OffsetBits)) {
1360 DEBUG(dbgs() << " Invert Bcc condition and swap its destination with "
1361 << *BMI);
1362 MachineBasicBlock *DestBB = MI->getOperand(CondBrMBBOperand).getMBB();
1363 BMI->getOperand(0).setMBB(DestBB);
1364 MI->getOperand(CondBrMBBOperand).setMBB(NewDest);
1365 return true;
1366 }
1367 }
1368 }
1369
1370 if (NeedSplit) {
1371 MachineBasicBlock::iterator MBBI = MI; ++MBBI;
1372 splitBlockBeforeInstr(MBBI);
1373 // No need for the branch to the next block. We're adding an unconditional
1374 // branch to the destination.
1375 int delta = TII->getInstSizeInBytes(MBB->back());
1376 BBInfo[MBB->getNumber()].Size -= delta;
1377 MBB->back().eraseFromParent();
1378 // BBInfo[SplitBB].Offset is wrong temporarily, fixed below
1379 }
1380
1381 // After splitting and removing the unconditional branch from the original BB,
1382 // the structure is now:
1383 // oldbb:
1384 // [things]
1385 // b.invertedCC L1
1386 // splitbb/fallthroughbb:
1387 // [old b L2/real continuation]
1388 //
1389 // We now have to change the conditional branch to point to splitbb and add an
1390 // unconditional branch after it to L1, giving the final structure:
1391 // oldbb:
1392 // [things]
1393 // b.invertedCC splitbb
1394 // b L1
1395 // splitbb/fallthroughbb:
1396 // [old b L2/real continuation]
1397 MachineBasicBlock *NextBB = llvm::next(MachineFunction::iterator(MBB));
1398
1399 DEBUG(dbgs() << " Insert B to BB#"
1400 << MI->getOperand(CondBrMBBOperand).getMBB()->getNumber()
1401 << " also invert condition and change dest. to BB#"
1402 << NextBB->getNumber() << "\n");
1403
1404 // Insert a new unconditional branch and fixup the destination of the
1405 // conditional one. Also update the ImmBranch as well as adding a new entry
1406 // for the new branch.
1407 BuildMI(MBB, DebugLoc(), TII->get(AArch64::Bimm))
1408 .addMBB(MI->getOperand(CondBrMBBOperand).getMBB());
1409 MI->getOperand(CondBrMBBOperand).setMBB(NextBB);
1410
1411 BBInfo[MBB->getNumber()].Size += TII->getInstSizeInBytes(MBB->back());
1412
1413 // 26 bits written down in Bimm, specifying a multiple of 4.
1414 unsigned OffsetBits = 26 + 2;
1415 ImmBranches.push_back(ImmBranch(&MBB->back(), OffsetBits, false));
1416
1417 adjustBBOffsetsAfter(MBB);
1418 return true;
1419 }
0 //===- AArch64FrameLowering.cpp - AArch64 Frame Information ---------------===//
1 //
2 // The LLVM Compiler Infrastructure
3 //
4 // This file is distributed under the University of Illinois Open Source
5 // License. See LICENSE.TXT for details.
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file contains the AArch64 implementation of TargetFrameLowering class.
10 //
11 //===----------------------------------------------------------------------===//
12
13 #include "AArch64.h"
14 #include "AArch64FrameLowering.h"
15 #include "AArch64MachineFunctionInfo.h"
16 #include "AArch64InstrInfo.h"
17 #include "llvm/CodeGen/MachineFrameInfo.h"
18 #include "llvm/CodeGen/MachineFunction.h"
19 #include "llvm/CodeGen/MachineInstrBuilder.h"
20 #include "llvm/CodeGen/MachineMemOperand.h"
21 #include "llvm/CodeGen/MachineModuleInfo.h"
22 #include "llvm/CodeGen/MachineRegisterInfo.h"
23 #include "llvm/CodeGen/RegisterScavenging.h"
24 #include "llvm/IR/Function.h"
25 #include "llvm/MC/MachineLocation.h"
26 #include "llvm/Support/Debug.h"
27 #include "llvm/Support/ErrorHandling.h"
28
29 using namespace llvm;
30
31 void AArch64FrameLowering::splitSPAdjustments(uint64_t Total,
32 uint64_t &Initial,
33 uint64_t &Residual) const {
34 // 0x1f0 here is a pessimistic (i.e. realistic) boundary: x-register LDP
35 // instructions have a 7-bit signed immediate scaled by 8, giving a reach of
36 // 0x1f8, but stack adjustment should always be a multiple of 16.
37 if (Total <= 0x1f0) {
38 Initial = Total;
39 Residual = 0;
40 } else {
41 Initial = 0x1f0;
42 Residual = Total - Initial;
43 }
44 }
45
46 void AArch64FrameLowering::emitPrologue(MachineFunction &MF) const {
47 AArch64MachineFunctionInfo *FuncInfo =
48 MF.getInfo();
49 MachineBasicBlock &MBB = MF.front();
50 MachineBasicBlock::iterator MBBI = MBB.begin();
51 MachineFrameInfo *MFI = MF.getFrameInfo();
52 const TargetInstrInfo &TII = *MF.getTarget().getInstrInfo();
53 DebugLoc DL = MBBI != MBB.end() ? MBBI->getDebugLoc() : DebugLoc();
54
55 MachineModuleInfo &MMI = MF.getMMI();
56 std::vector &Moves = MMI.getFrameMoves();
57 bool NeedsFrameMoves = MMI.hasDebugInfo()
58 || MF.getFunction()->needsUnwindTableEntry();
59
60 uint64_t NumInitialBytes, NumResidualBytes;
61
62 // Currently we expect the stack to be laid out by
63 // sub sp, sp, #initial
64 // stp x29, x30, [sp, #offset]
65 // ...
66 // str xxx, [sp, #offset]
67 // sub sp, sp, #rest (possibly via extra instructions).
68 if (MFI->getCalleeSavedInfo().size()) {
69 // If there are callee-saved registers, we want to store them efficiently as
70 // a block, and virtual base assignment happens too early to do it for us so
71 // we adjust the stack in two phases: first just for callee-saved fiddling,
72 // then to allocate the rest of the frame.
73 splitSPAdjustments(MFI->getStackSize(), NumInitialBytes, NumResidualBytes);
74 } else {
75 // If there aren't any callee-saved registers, two-phase adjustment is
76 // inefficient. It's more efficient to adjust with NumInitialBytes too
77 // because when we're in a "callee pops argument space" situation, that pop
78 // must be tacked onto Initial for correctness.
79 NumInitialBytes = MFI->getStackSize();
80 NumResidualBytes = 0;
81 }
82
83 // Tell everyone else how much adjustment we're expecting them to use. In
84 // particular if an adjustment is required for a tail call the epilogue could
85 // have a different view of things.
86 FuncInfo->setInitialStackAdjust(NumInitialBytes);
87
88 emitSPUpdate(MBB, MBBI, DL, TII, AArch64::X16, -NumInitialBytes,
89 MachineInstr::FrameSetup);
90
91 if (NeedsFrameMoves && NumInitialBytes) {
92 // We emit this update even if the CFA is set from a frame pointer later so
93 // that the CFA is valid in the interim.
94 MCSymbol *SPLabel = MMI.getContext().CreateTempSymbol();
95 BuildMI(MBB, MBBI, DL, TII.get(TargetOpcode::PROLOG_LABEL))
96 .addSym(SPLabel);
97
98 MachineLocation Dst(MachineLocation::VirtualFP);
99 MachineLocation Src(AArch64::XSP, NumInitialBytes);
100 Moves.push_back(MachineMove(SPLabel, Dst, Src));
101 }
102
103 // Otherwise we need to set the frame pointer and/or add a second stack
104 // adjustment.
105
106 bool FPNeedsSetting = hasFP(MF);
107 for (; MBBI != MBB.end(); ++MBBI) {
108 // Note that this search makes strong assumptions about the operation used
109 // to store the frame-pointer: it must be "STP x29, x30, ...". This could
110 // change in future, but until then there's no point in implementing
111 // untestable more generic cases.
112 if (FPNeedsSetting && MBBI->getOpcode() == AArch64::LSPair64_STR
113 && MBBI->getOperand(0).getReg() == AArch64::X29) {
114 int64_t X29FrameIdx = MBBI->getOperand(2).getIndex();
115 FuncInfo->setFramePointerOffset(MFI->getObjectOffset(X29FrameIdx));
116
117 ++MBBI;
118 emitRegUpdate(MBB, MBBI, DL, TII, AArch64::X29, AArch64::XSP,
119 AArch64::X29,
120 NumInitialBytes + MFI->getObjectOffset(X29FrameIdx),
121 MachineInstr::FrameSetup);
122
123 // The offset adjustment used when emitting debugging locations relative
124 // to whatever frame base is set. AArch64 uses the default frame base (FP
125 // or SP) and this adjusts the calculations to be correct.
126 MFI->setOffsetAdjustment(- MFI->getObjectOffset(X29FrameIdx)
127 - MFI->getStackSize());
128
129 if (NeedsFrameMoves) {
130 MCSymbol *FPLabel = MMI.getContext().CreateTempSymbol();
131 BuildMI(MBB, MBBI, DL, TII.get(TargetOpcode::PROLOG_LABEL))
132 .addSym(FPLabel);
133 MachineLocation Dst(MachineLocation::VirtualFP);
134 MachineLocation Src(AArch64::X29, -MFI->getObjectOffset(X29FrameIdx));
135 Moves.push_back(MachineMove(FPLabel, Dst, Src));
136 }
137
138 FPNeedsSetting = false;
139 }
140
141 if (!MBBI->getFlag(MachineInstr::FrameSetup))
142 break;
143 }
144
145 assert(!FPNeedsSetting && "Frame pointer couldn't be set");
146
147 emitSPUpdate(MBB, MBBI, DL, TII, AArch64::X16, -NumResidualBytes,
148 MachineInstr::FrameSetup);
149
150 // Now we emit the rest of the frame setup information, if necessary: we've
151 // already noted the FP and initial SP moves so we're left with the prologue's
152 // final SP update and callee-saved register locations.
153 if (!NeedsFrameMoves)
154 return;
155
156 // Reuse the label if appropriate, so create it in this outer scope.
157 MCSymbol *CSLabel = 0;
158
159 // The rest of the stack adjustment
160 if (!hasFP(MF) && NumResidualBytes) {
161 CSLabel = MMI.getContext().CreateTempSymbol();
162 BuildMI(MBB, MBBI, DL, TII.get(TargetOpcode::PROLOG_LABEL))
163 .addSym(CSLabel);
164
165 MachineLocation Dst(MachineLocation::VirtualFP);
166 MachineLocation Src(AArch64::XSP, NumResidualBytes + NumInitialBytes);
167 Moves.push_back(MachineMove(CSLabel, Dst, Src));
168 }
169
170 // And any callee-saved registers (it's fine to leave them to the end here,
171 // because the old values are still valid at this point.
172 const std::vector &CSI = MFI->getCalleeSavedInfo();
173 if (CSI.size()) {
174 if (!CSLabel) {
175 CSLabel = MMI.getContext().CreateTempSymbol();
176 BuildMI(MBB, MBBI, DL, TII.get(TargetOpcode::PROLOG_LABEL))
177 .addSym(CSLabel);
178 }
179
180 for (std::vector::const_iterator I = CSI.begin(),
181 E = CSI.end(); I != E; ++I) {
182 MachineLocation Dst(MachineLocation::VirtualFP, MFI->getObjectOffset(I->getFrameIdx()));
183 MachineLocation Src(I->getReg());
184 Moves.push_back(MachineMove(CSLabel, Dst, Src));
185 }
186 }
187 }
188
189 void
190 AArch64FrameLowering::emitEpilogue(MachineFunction &MF,
191 MachineBasicBlock &MBB) const {
192 AArch64MachineFunctionInfo *FuncInfo =
193 MF.getInfo();
194
195 MachineBasicBlock::iterator MBBI = MBB.getLastNonDebugInstr();
196 DebugLoc DL = MBBI->getDebugLoc();
197 const TargetInstrInfo &TII = *MF.getTarget().getInstrInfo();
198 MachineFrameInfo &MFI = *MF.getFrameInfo();
199 unsigned RetOpcode = MBBI->getOpcode();
200
201 // Initial and residual are named for consitency with the prologue. Note that
202 // in the epilogue, the residual adjustment is executed first.
203 uint64_t NumInitialBytes = FuncInfo->getInitialStackAdjust();
204 uint64_t NumResidualBytes = MFI.getStackSize() - NumInitialBytes;
205 uint64_t ArgumentPopSize = 0;
206 if (RetOpcode == AArch64::TC_RETURNdi ||
207 RetOpcode == AArch64::TC_RETURNxi) {
208 MachineOperand &JumpTarget = MBBI->getOperand(0);
209 MachineOperand &StackAdjust = MBBI->getOperand(1);
210
211 MachineInstrBuilder MIB;
212 if (RetOpcode == AArch64::TC_RETURNdi) {
213 MIB = BuildMI(MBB, MBBI, DL, TII.get(AArch64::TAIL_Bimm));
214 if (JumpTarget.isGlobal()) {
215 MIB.addGlobalAddress(JumpTarget.getGlobal(), JumpTarget.getOffset(),
216 JumpTarget.getTargetFlags());
217 } else {
218 assert(JumpTarget.isSymbol() && "unexpected tail call destination");
219 MIB.addExternalSymbol(JumpTarget.getSymbolName(),
220 JumpTarget.getTargetFlags());
221 }
222 } else {
223 assert(RetOpcode == AArch64::TC_RETURNxi && JumpTarget.isReg()
224 && "Unexpected tail call");
225
226 MIB = BuildMI(MBB, MBBI, DL, TII.get(AArch64::TAIL_BRx));
227 MIB.addReg(JumpTarget.getReg(), RegState::Kill);
228 }
229
230 // Add the extra operands onto the new tail call instruction even though
231 // they're not used directly (so that liveness is tracked properly etc).
232 for (unsigned i = 2, e = MBBI->getNumOperands(); i != e; ++i)
233 MIB->addOperand(MBBI->getOperand(i));
234
235
236 // Delete the pseudo instruction TC_RETURN.
237 MachineInstr *NewMI = prior(MBBI);
238 MBB.erase(MBBI);
239 MBBI = NewMI;
240
241 // For a tail-call in a callee-pops-arguments environment, some or all of
242 // the stack may actually be in use for the call's arguments, this is
243 // calculated during LowerCall and consumed here...
244 ArgumentPopSize = StackAdjust.getImm();
245 } else {
246 // ... otherwise the amount to pop is *all* of the argument space,
247 // conveniently stored in the MachineFunctionInfo by
248 // LowerFormalArguments. This will, of course, be zero for the C calling
249 // convention.
250 ArgumentPopSize = FuncInfo->getArgumentStackToRestore();
251 }
252
253 assert(NumInitialBytes % 16 == 0 && NumResidualBytes % 16 == 0
254 && "refusing to adjust stack by misaligned amt");
255
256 // We may need to address callee-saved registers differently, so find out the
257 // bound on the frame indices.
258 const std::vector &CSI = MFI.getCalleeSavedInfo();
259 int MinCSFI = 0;
260 int MaxCSFI = -1;
261
262 if (CSI.size()) {
263 MinCSFI = CSI[0].getFrameIdx();
264 MaxCSFI = CSI[CSI.size() - 1].getFrameIdx();
265 }
266
267 // The "residual" stack update comes first from this direction and guarantees
268 // that SP is NumInitialBytes below its value on function entry, either by a
269 // direct update or restoring it from the frame pointer.
270 if (NumInitialBytes + ArgumentPopSize != 0) {
271 emitSPUpdate(MBB, MBBI, DL, TII, AArch64::X16,
272 NumInitialBytes + ArgumentPopSize);
273 --MBBI;
274 }
275
276
277 // MBBI now points to the instruction just past the last callee-saved
278 // restoration (either RET/B if NumInitialBytes == 0, or the "ADD sp, sp"
279 // otherwise).
280
281 // Now we need to find out where to put the bulk of the stack adjustment
282 MachineBasicBlock::iterator FirstEpilogue = MBBI;
283 while (MBBI != MBB.begin()) {
284 --MBBI;
285
286 unsigned FrameOp;
287 for (FrameOp = 0; FrameOp < MBBI->getNumOperands(); ++FrameOp) {
288 if (MBBI->getOperand(FrameOp).isFI())
289 break;
290 }
291
292 // If this instruction doesn't have a frame index we've reached the end of
293 // the callee-save restoration.
294 if (FrameOp == MBBI->getNumOperands())
295 break;
296
297 // Likewise if it *is* a local reference, but not to a callee-saved object.
298 int FrameIdx = MBBI->getOperand(FrameOp).getIndex();
299 if (FrameIdx < MinCSFI || FrameIdx > MaxCSFI)
300 break;
301
302 FirstEpilogue = MBBI;
303 }
304
305 if (MF.getFrameInfo()->hasVarSizedObjects()) {
306 int64_t StaticFrameBase;
307 StaticFrameBase = -(NumInitialBytes + FuncInfo->getFramePointerOffset());
308 emitRegUpdate(MBB, FirstEpilogue, DL, TII,
309 AArch64::XSP, AArch64::X29, AArch64::NoRegister,
310 StaticFrameBase);
311 } else {
312 emitSPUpdate(MBB, FirstEpilogue, DL,TII, AArch64::X16, NumResidualBytes);
313 }
314 }
315
316 int64_t
317 AArch64FrameLowering::resolveFrameIndexReference(MachineFunction &MF,
318 int FrameIndex,
319 unsigned &FrameReg,
320 int SPAdj,
321 bool IsCalleeSaveOp) const {
322 AArch64MachineFunctionInfo *FuncInfo =
323 MF.getInfo();
324 MachineFrameInfo *MFI = MF.getFrameInfo();
325
326 int64_t TopOfFrameOffset = MFI->getObjectOffset(FrameIndex);
327
328 assert(!(IsCalleeSaveOp && FuncInfo->getInitialStackAdjust() == 0)
329 && "callee-saved register in unexpected place");
330
331 // If the frame for this function is particularly large, we adjust the stack
332 // in two phases which means the callee-save related operations see a
333 // different (intermediate) stack size.
334 int64_t FrameRegPos;
335 if (IsCalleeSaveOp) {
336 FrameReg = AArch64::XSP;
337 FrameRegPos = -static_cast(FuncInfo->getInitialStackAdjust());
338 } else if (useFPForAddressing(MF)) {
339 // Have to use the frame pointer since we have no idea where SP is.
340 FrameReg = AArch64::X29;
341 FrameRegPos = FuncInfo->getFramePointerOffset();
342 } else {
343 FrameReg = AArch64::XSP;
344 FrameRegPos = -static_cast(MFI->getStackSize()) + SPAdj;
345 }
346
347 return TopOfFrameOffset - FrameRegPos;
348 }
349
350 /// Estimate and return the size of the frame.
351 static unsigned estimateStackSize(MachineFunction &MF) {
352 // FIXME: Make generic? Really consider after upstreaming. This code is now
353 // shared between PEI, ARM *and* here.
354 const MachineFrameInfo *MFI = MF.getFrameInfo();
355 const TargetFrameLowering *TFI = MF.getTarget().getFrameLowering();
356 const TargetRegisterInfo *RegInfo = MF.getTarget().getRegisterInfo();
357 unsigned MaxAlign = MFI->getMaxAlignment();
358 int Offset = 0;
359
360 // This code is very, very similar to PEI::calculateFrameObjectOffsets().
361 // It really should be refactored to share code. Until then, changes
362 // should keep in mind that there's tight coupling between the two.
363
364 for (int i = MFI->getObjectIndexBegin(); i != 0; ++i) {
365 int FixedOff = -MFI->getObjectOffset(i);
366 if (FixedOff > Offset) Offset = FixedOff;
367 }
368 for (unsigned i = 0, e = MFI->getObjectIndexEnd(); i != e; ++i) {
369 if (MFI->isDeadObjectIndex(i))
370 continue;
371 Offset += MFI->getObjectSize(i);
372 unsigned Align = MFI->getObjectAlignment(i);
373 // Adjust to alignment boundary
374 Offset = (Offset+Align-1)/Align*Align;
375
376 MaxAlign = std::max(Align, MaxAlign);
377 }
378
379 if (MFI->adjustsStack() && TFI->hasReservedCallFrame(MF))
380 Offset += MFI->getMaxCallFrameSize();
381
382 // Round up the size to a multiple of the alignment. If the function has
383 // any calls or alloca's, align to the target's StackAlignment value to
384 // ensure that the callee's frame or the alloca data is suitably aligned;
385 // otherwise, for leaf functions, align to the TransientStackAlignment
386 // value.
387 unsigned StackAlign;
388 if (MFI->adjustsStack() || MFI->hasVarSizedObjects() ||
389 (RegInfo->needsStackRealignment(MF) && MFI->getObjectIndexEnd() != 0))
390 StackAlign = TFI->getStackAlignment();
391 else
392 StackAlign = TFI->getTransientStackAlignment();
393
394 // If the frame pointer is eliminated, all frame offsets will be relative to
395 // SP not FP. Align to MaxAlign so this works.
396 StackAlign = std::max(StackAlign, MaxAlign);
397 unsigned AlignMask = StackAlign - 1;
398 Offset = (Offset + AlignMask) & ~uint64_t(AlignMask);
399
400 return (unsigned)Offset;
401 }
402
403 void
404 AArch64FrameLowering::processFunctionBeforeCalleeSavedScan(MachineFunction &MF,
405 RegScavenger *RS) const {
406 const AArch64RegisterInfo *RegInfo =
407 static_cast(MF.getTarget().getRegisterInfo());
408 MachineFrameInfo *MFI = MF.getFrameInfo();
409 const AArch64InstrInfo &TII =
410 *static_cast(MF.getTarget().getInstrInfo());
411
412 if (hasFP(MF)) {
413 MF.getRegInfo().setPhysRegUsed(AArch64::X29);
414 MF.getRegInfo().setPhysRegUsed(AArch64::X30);
415 }
416
417 // If addressing of local variables is going to be more complicated than
418 // shoving a base register and an offset into the instruction then we may well
419 // need to scavenge registers. We should either specifically add an
420 // callee-save register for this purpose or allocate an extra spill slot.
421
422 bool BigStack =
423 (RS && estimateStackSize(MF) >= TII.estimateRSStackLimit(MF))
424 || MFI->hasVarSizedObjects() // Access will be from X29: messes things up
425 || (MFI->adjustsStack() && !hasReservedCallFrame(MF));
426
427 if (!BigStack)
428 return;
429
430 // We certainly need some slack space for the scavenger, preferably an extra
431 // register.
432 const uint16_t *CSRegs = RegInfo->getCalleeSavedRegs();
433 uint16_t ExtraReg = AArch64::NoRegister;
434
435 for (unsigned i = 0; CSRegs[i]; ++i) {
436 if (AArch64::GPR64RegClass.contains(CSRegs[i]) &&
437 !MF.getRegInfo().isPhysRegUsed(CSRegs[i])) {
438 ExtraReg = CSRegs[i];
439 break;
440 }
441 }
442
443 if (ExtraReg != 0) {
444 MF.getRegInfo().setPhysRegUsed(ExtraReg);
445 } else {
446 // Create a stack slot for scavenging purposes. PrologEpilogInserter
447 // helpfully places it near either SP or FP for us to avoid
448 // infinitely-regression during scavenging.
449 const TargetRegisterClass *RC = &AArch64::GPR64RegClass;
450 RS->setScavengingFrameIndex(MFI->CreateStackObject(RC->getSize(),
451 RC->getAlignment(),
452 false));
453 }
454 }
455
456 bool AArch64FrameLowering::determinePrologueDeath(MachineBasicBlock &MBB,
457 unsigned Reg) const {
458 // If @llvm.returnaddress is called then it will refer to X30 by some means;
459 // the prologue store does not kill the register.
460 if (Reg == AArch64::X30) {
461 if (MBB.getParent()->getFrameInfo()->isReturnAddressTaken()
462 && MBB.getParent()->getRegInfo().isLiveIn(Reg))
463 return false;
464 }
465
466 // In all other cases, physical registers are dead after they've been saved
467 // but live at the beginning of the prologue block.
468 MBB.addLiveIn(Reg);
469 return true;
470 }
471
472 void
473 AArch64FrameLowering::emitFrameMemOps(bool isPrologue, MachineBasicBlock &MBB,
474 MachineBasicBlock::iterator MBBI,
475 const std::vector &CSI,
476 const TargetRegisterInfo *TRI,
477 LoadStoreMethod PossClasses[],
478 unsigned NumClasses) const {
479 DebugLoc DL = MBB.findDebugLoc(MBBI);
480 MachineFunction &MF = *MBB.getParent();
481 MachineFrameInfo &MFI = *MF.getFrameInfo();
482 const TargetInstrInfo &TII = *MF.getTarget().getInstrInfo();
483
484 // A certain amount of implicit contract is present here. The actual stack
485 // offsets haven't been allocated officially yet, so for strictly correct code
486 // we rely on the fact that the elements of CSI are allocated in order
487 // starting at SP, purely as dictated by size and alignment. In practice since
488 // this function handles the only accesses to those slots it's not quite so
489 // important.
490 //
491 // We have also ordered the Callee-saved register list in AArch64CallingConv
492 // so that the above scheme puts registers in order: in particular we want
493 // &X30 to be &X29+8 for an ABI-correct frame record (PCS 5.2.2)
494 for (unsigned i = 0, e = CSI.size(); i < e; ++i) {
495 unsigned Reg = CSI[i].getReg();
496
497 // First we need to find out which register class the register belongs to so
498 // that we can use the correct load/store instrucitons.
499 unsigned ClassIdx;
500 for (ClassIdx = 0; ClassIdx < NumClasses; ++ClassIdx) {
501 if (PossClasses[ClassIdx].RegClass->contains(Reg))
502 break;
503 }
504 assert(ClassIdx != NumClasses
505 && "Asked to store register in unexpected class");
506 const TargetRegisterClass &TheClass = *PossClasses[ClassIdx].RegClass;
507
508 // Now we need to decide whether it's possible to emit a paired instruction:
509 // for this we want the next register to be in the same class.
510 MachineInstrBuilder NewMI;
511 bool Pair = false;
512 if (i + 1 < CSI.size() && TheClass.contains(CSI[i+1].getReg())) {
513 Pair = true;
514 unsigned StLow = 0, StHigh = 0;
515 if (isPrologue) {
516 // Most of these registers will be live-in to the MBB and killed by our
517 // store, though there are exceptions (see determinePrologueDeath).
518 StLow = getKillRegState(determinePrologueDeath(MBB, CSI[i+1].getReg()));
519 StHigh = getKillRegState(determinePrologueDeath(MBB, CSI[i].getReg()));
520 } else {
521 StLow = RegState::Define;
522 StHigh = RegState::Define;
523 }
524
525 NewMI = BuildMI(MBB, MBBI, DL, TII.get(PossClasses[ClassIdx].PairOpcode))
526 .addReg(CSI[i+1].getReg(), StLow)
527 .addReg(CSI[i].getReg(), StHigh);
528
529 // If it's a paired op, we've consumed two registers
530 ++i;
531 } else {
532 unsigned State;
533 if (isPrologue) {
534 State = getKillRegState(determinePrologueDeath(MBB, CSI[i].getReg()));
535 } else {
536 State = RegState::Define;
537 }
538
539 NewMI = BuildMI(MBB, MBBI, DL, TII.get(PossClasses[ClassIdx].SingleOpcode))
540 .addReg(CSI[i].getReg(), State);
541 }
542
543 // Note that the FrameIdx refers to the second register in a pair: it will
544 // be allocated the smaller numeric address and so is the one an LDP/STP
545 // address must use.
546 int FrameIdx = CSI[i].getFrameIdx();
547 MachineMemOperand::MemOperandFlags Flags;
548 Flags = isPrologue ? MachineMemOperand::MOStore : MachineMemOperand::MOLoad;
549 MachineMemOperand *MMO =
550 MF.getMachineMemOperand(MachinePointerInfo::getFixedStack(FrameIdx),
551 Flags,
552 Pair ? TheClass.getSize() * 2 : TheClass.getSize(),
553 MFI.getObjectAlignment(FrameIdx));
554
555 NewMI.addFrameIndex(FrameIdx)
556 .addImm(0) // address-register offset
557 .addMemOperand(MMO);
558
559 if (isPrologue)
560 NewMI.setMIFlags(MachineInstr::FrameSetup);
561
562 // For aesthetic reasons, during an epilogue we want to emit complementary
563 // operations to the prologue, but in the opposite order. So we still
564 // iterate through the CalleeSavedInfo list in order, but we put the
565 // instructions successively earlier in the MBB.
566 if (!isPrologue)
567 --MBBI;
568 }
569 }
570
571 bool
572 AArch64FrameLowering::spillCalleeSavedRegisters(MachineBasicBlock &MBB,
573 MachineBasicBlock::iterator MBBI,
574 const std::vector &CSI,
575 const TargetRegisterInfo *TRI) const {
576 if (CSI.empty())
577 return false;
578
579 static LoadStoreMethod PossibleClasses[] = {
580 {&AArch64::GPR64RegClass, AArch64::LSPair64_STR, AArch64::LS64_STR},
581 {&AArch64::FPR64RegClass, AArch64::LSFPPair64_STR, AArch64::LSFP64_STR},
582 };
583 unsigned NumClasses = llvm::array_lengthof(PossibleClasses);
584
585 emitFrameMemOps(/* isPrologue = */ true, MBB, MBBI, CSI, TRI,
586 PossibleClasses, NumClasses);
587
588 return true;
589 }
590
591 bool
592 AArch64FrameLowering::restoreCalleeSavedRegisters(MachineBasicBlock &MBB,
593 MachineBasicBlock::iterator MBBI,
594 const std::vector &CSI,
595 const TargetRegisterInfo *TRI) const {
596
597 if (CSI.empty())
598 return false;
599
600 static LoadStoreMethod PossibleClasses[] = {
601 {&AArch64::GPR64RegClass, AArch64::LSPair64_LDR, AArch64::LS64_LDR},
602 {&AArch64::FPR64RegClass, AArch64::LSFPPair64_LDR, AArch64::LSFP64_LDR},
603 };
604 unsigned NumClasses = llvm::array_lengthof(PossibleClasses);
605
606 emitFrameMemOps(/* isPrologue = */ false, MBB, MBBI, CSI, TRI,
607 PossibleClasses, NumClasses);
608
609 return true;
610 }
611
612 bool
613 AArch64FrameLowering::hasFP(const MachineFunction &MF) const {
614 const MachineFrameInfo *MFI = MF.getFrameInfo();
615 const TargetRegisterInfo *RI = MF.getTarget().getRegisterInfo();
616
617 // This is a decision of ABI compliance. The AArch64 PCS gives various options
618 // for conformance, and even at the most stringent level more or less permits
619 // elimination for leaf functions because there's no loss of functionality
620 // (for debugging etc)..
621 if (MF.getTarget().Options.DisableFramePointerElim(MF) && MFI->hasCalls())
622 return true;
623
624 // The following are hard-limits: incorrect code will be generated if we try
625 // to omit the frame.
626 return (RI->needsStackRealignment(MF) ||
627 MFI->hasVarSizedObjects() ||
628 MFI->isFrameAddressTaken());
629 }
630
631 bool
632 AArch64FrameLowering::useFPForAddressing(const MachineFunction &MF) const {
633 return MF.getFrameInfo()->hasVarSizedObjects();
634 }
635
636 bool
637 AArch64FrameLowering::hasReservedCallFrame(const MachineFunction &MF) const {
638 const MachineFrameInfo *MFI = MF.getFrameInfo();
639
640 // Of the various reasons for having a frame pointer, it's actually only
641 // variable-sized objects that prevent reservation of a call frame.
642 return !(hasFP(MF) && MFI->hasVarSizedObjects());
643 }
0 //==- AArch64FrameLowering.h - Define frame lowering for AArch64 -*- C++ -*--=//
1 //
2 // The LLVM Compiler Infrastructure
3 //
4 // This file is distributed under the University of Illinois Open Source
5 // License. See LICENSE.TXT for details.
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 //
10 //
11 //===----------------------------------------------------------------------===//
12
13 #ifndef LLVM_AARCH64_FRAMEINFO_H
14 #define LLVM_AARCH64_FRAMEINFO_H
15
16 #include "AArch64Subtarget.h"
17 #include "llvm/Target/TargetFrameLowering.h"
18
19 namespace llvm {
20 class AArch64Subtarget;
21
22 class AArch64FrameLowering : public TargetFrameLowering {
23 private:
24 // In order to unify the spilling and restoring of callee-saved registers into
25 // emitFrameMemOps, we need to be able to specify which instructions to use
26 // for the relevant memory operations on each register class. An array of the
27 // following struct is populated and passed in to achieve this.
28 struct LoadStoreMethod {
29 const TargetRegisterClass *RegClass; // E.g. GPR64RegClass
30
31 // The preferred instruction.
32 unsigned PairOpcode; // E.g. LSPair64_STR
33
34 // Sometimes only a single register can be handled at once.
35 unsigned SingleOpcode; // E.g. LS64_STR
36 };
37 protected:
38 const AArch64Subtarget &STI;
39
40 public:
41 explicit AArch64FrameLowering(const AArch64Subtarget &sti)
42 : TargetFrameLowering(TargetFrameLowering::StackGrowsDown, 16, 0, 16),
43 STI(sti) {
44 }
45
46 /// emitProlog/emitEpilog - These methods insert prolog and epilog code into
47 /// the function.
48 virtual void emitPrologue(MachineFunction &MF) const;
49 virtual void emitEpilogue(MachineFunction &MF, MachineBasicBlock &MBB) const;
50
51 /// Decides how much stack adjustment to perform in each phase of the prologue
52 /// and epilogue.
53 void splitSPAdjustments(uint64_t Total, uint64_t &Initial,
54 uint64_t &Residual) const;
55
56 int64_t resolveFrameIndexReference(MachineFunction &MF, int FrameIndex,
57 unsigned &FrameReg, int SPAdj,
58 bool IsCalleeSaveOp) const;
59
60 virtual void processFunctionBeforeCalleeSavedScan(MachineFunction &MF,
61 RegScavenger *RS) const;
62
63 virtual bool spillCalleeSavedRegisters(MachineBasicBlock &MBB,
64 MachineBasicBlock::iterator MI,
65 const std::vector &CSI,
66 const TargetRegisterInfo *TRI) const;
67 virtual bool restoreCalleeSavedRegisters(MachineBasicBlock &MBB,
68 MachineBasicBlock::iterator MI,
69 const std::vector &CSI,
70 const TargetRegisterInfo *TRI) const;
71
72 /// If the register is X30 (i.e. LR) and the return address is used in the
73 /// function then the callee-save store doesn't actually kill the register,
74 /// otherwise it does.
75 bool determinePrologueDeath(MachineBasicBlock &MBB, unsigned Reg) const;
76
77 /// This function emits the loads or stores required during prologue and
78 /// epilogue as efficiently as possible.
79 ///
80 /// The operations involved in setting up and tearing down the frame are
81 /// similar enough to warrant a shared function, particularly as discrepancies
82 /// between the two would be disastrous.
83 void emitFrameMemOps(bool isStore, MachineBasicBlock &MBB,
84 MachineBasicBlock::iterator MI,
85 const std::vector &CSI,
86 const TargetRegisterInfo *TRI,
87 LoadStoreMethod PossibleClasses[],
88 unsigned NumClasses) const;
89
90
91 virtual bool hasFP(const MachineFunction &MF) const;
92
93 virtual bool useFPForAddressing(const MachineFunction &MF) const;
94
95 /// On AA
96 virtual bool hasReservedCallFrame(const MachineFunction &MF) const;
97
98 };
99
100 } // End llvm namespace
101
102 #endif
0 //===-- AArch64ISelDAGToDAG.cpp - A dag to dag inst selector for AArch64 --===//
1 //
2 // The LLVM Compiler Infrastructure
3 //
4 // This file is distributed under the University of Illinois Open Source
5 // License. See LICENSE.TXT for details.
6 //
7 //===----------------------------------------------------------------------===//