llvm.org GIT mirror llvm / af303d5 tools / llvm-ld / llvm-ld.cpp
af303d5

Tree @af303d5 (Download .tar.gz)

llvm-ld.cpp @af303d5raw · history · blame

  1
  2
  3
  4
  5
  6
  7
  8
  9
 10
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
//===- llvm-ld.cpp - LLVM 'ld' compatible linker --------------------------===//
//
//                     The LLVM Compiler Infrastructure
//
// This file was developed by the LLVM research group and is distributed under
// the University of Illinois Open Source License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
//
// This utility is intended to be compatible with GCC, and follows standard
// system 'ld' conventions.  As such, the default output file is ./a.out.
// Additionally, this program outputs a shell script that is used to invoke LLI
// to execute the program.  In this manner, the generated executable (a.out for
// example), is directly executable, whereas the bytecode file actually lives in
// the a.out.bc file generated by this program.  Also, Force is on by default.
//
// Note that if someone (or a script) deletes the executable program generated,
// the .bc file will be left around.  Considering that this is a temporary hack,
// I'm not too worried about this.
//
//===----------------------------------------------------------------------===//

#include "llvm/LinkAllVMCore.h"
#include "llvm/Linker.h"
#include "llvm/System/Program.h"
#include "llvm/Module.h"
#include "llvm/PassManager.h"
#include "llvm/Bytecode/Reader.h"
#include "llvm/Bytecode/Writer.h"
#include "llvm/Target/TargetData.h"
#include "llvm/Target/TargetMachine.h"
#include "llvm/Target/TargetMachineRegistry.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/FileUtilities.h"
#include "llvm/Support/SystemUtils.h"
#include "llvm/System/Signals.h"
#include <fstream>
#include <iostream>
#include <memory>

using namespace llvm;

// Input/Output Options
static cl::list<std::string> InputFilenames(cl::Positional, cl::OneOrMore,
  cl::desc("<input bytecode files>"));

static cl::opt<std::string> OutputFilename("o", cl::init("a.out"),
  cl::desc("Override output filename"),
  cl::value_desc("filename"));

static cl::opt<bool> Verbose("v",
  cl::desc("Print information about actions taken"));

static cl::list<std::string> LibPaths("L", cl::Prefix,
  cl::desc("Specify a library search path"),
  cl::value_desc("directory"));

static cl::list<std::string> Libraries("l", cl::Prefix,
  cl::desc("Specify libraries to link to"),
  cl::value_desc("library prefix"));

static cl::opt<bool> LinkAsLibrary("link-as-library",
  cl::desc("Link the .bc files together as a library, not an executable"));

static cl::alias Relink("r", cl::aliasopt(LinkAsLibrary),
  cl::desc("Alias for -link-as-library"));

static cl::opt<const TargetMachineRegistry::Entry*, false, TargetNameParser>
  MachineArch("march", cl::desc("Architecture to generate assembly for:"));

static cl::opt<bool> Native("native",
  cl::desc("Generate a native binary instead of a shell script"));

static cl::opt<bool>NativeCBE("native-cbe",
  cl::desc("Generate a native binary with the C backend and GCC"));

static cl::opt<bool>DisableCompression("disable-compression",cl::init(false),
  cl::desc("Disable writing of compressed bytecode files"));

static cl::list<std::string> PostLinkOpts("post-link-opts",
  cl::value_desc("path"),
  cl::desc("Run one or more optimization programs after linking"));

static cl::list<std::string> XLinker("Xlinker", cl::value_desc("option"),
  cl::desc("Pass options to the system linker"));

// Compatibility options that are ignored but supported by LD
static cl::opt<std::string> CO3("soname", cl::Hidden,
  cl::desc("Compatibility option: ignored"));

static cl::opt<std::string> CO4("version-script", cl::Hidden,
  cl::desc("Compatibility option: ignored"));

static cl::opt<bool> CO5("eh-frame-hdr", cl::Hidden,
  cl::desc("Compatibility option: ignored"));

static  cl::opt<std::string> CO6("h", cl::Hidden,
  cl::desc("Compatibility option: ignored"));


/// This is just for convenience so it doesn't have to be passed around
/// everywhere.
static std::string progname;

/// PrintAndReturn - Prints a message to standard error and returns true.
///
/// Inputs:
///  progname - The name of the program (i.e. argv[0]).
///  Message  - The message to print to standard error.
///
static int PrintAndReturn(const std::string &Message) {
  std::cerr << progname << ": " << Message << "\n";
  return 1;
}

/// CopyEnv - This function takes an array of environment variables and makes a
/// copy of it.  This copy can then be manipulated any way the caller likes
/// without affecting the process's real environment.
///
/// Inputs:
///  envp - An array of C strings containing an environment.
///
/// Return value:
///  NULL - An error occurred.
///
///  Otherwise, a pointer to a new array of C strings is returned.  Every string
///  in the array is a duplicate of the one in the original array (i.e. we do
///  not copy the char *'s from one array to another).
///
static char ** CopyEnv(char ** const envp) {
  // Count the number of entries in the old list;
  unsigned entries;   // The number of entries in the old environment list
  for (entries = 0; envp[entries] != NULL; entries++)
    /*empty*/;

  // Add one more entry for the NULL pointer that ends the list.
  ++entries;

  // If there are no entries at all, just return NULL.
  if (entries == 0)
    return NULL;

  // Allocate a new environment list.
  char **newenv = new char* [entries];
  if ((newenv = new char* [entries]) == NULL)
    return NULL;

  // Make a copy of the list.  Don't forget the NULL that ends the list.
  entries = 0;
  while (envp[entries] != NULL) {
    newenv[entries] = new char[strlen (envp[entries]) + 1];
    strcpy (newenv[entries], envp[entries]);
    ++entries;
  }
  newenv[entries] = NULL;

  return newenv;
}


/// RemoveEnv - Remove the specified environment variable from the environment
/// array.
///
/// Inputs:
///  name - The name of the variable to remove.  It cannot be NULL.
///  envp - The array of environment variables.  It cannot be NULL.
///
/// Notes:
///  This is mainly done because functions to remove items from the environment
///  are not available across all platforms.  In particular, Solaris does not
///  seem to have an unsetenv() function or a setenv() function (or they are
///  undocumented if they do exist).
///
static void RemoveEnv(const char * name, char ** const envp) {
  for (unsigned index=0; envp[index] != NULL; index++) {
    // Find the first equals sign in the array and make it an EOS character.
    char *p = strchr (envp[index], '=');
    if (p == NULL)
      continue;
    else
      *p = '\0';

    // Compare the two strings.  If they are equal, zap this string.
    // Otherwise, restore it.
    if (!strcmp(name, envp[index]))
      *envp[index] = '\0';
    else
      *p = '=';
  }

  return;
}

/// GenerateBytecode - generates a bytecode file from the module provided
void GenerateBytecode(Module* M, const std::string& FileName) {

  // Create the output file.
  std::ios::openmode io_mode = std::ios::out | std::ios::trunc |
                               std::ios::binary;
  std::ofstream Out(FileName.c_str(), io_mode);
  if (!Out.good()) {
    PrintAndReturn("error opening '" + FileName + "' for writing!");
    return;
  }

  // Ensure that the bytecode file gets removed from the disk if we get a
  // terminating signal.
  sys::RemoveFileOnSignal(sys::Path(FileName));

  // Write it out
  WriteBytecodeToFile(M, Out, !DisableCompression);

  // Close the bytecode file.
  Out.close();
}

/// GenerateAssembly - generates a native assembly language source file from the
/// specified bytecode file.
///
/// Inputs:
///  InputFilename  - The name of the output bytecode file.
///  OutputFilename - The name of the file to generate.
///  llc            - The pathname to use for LLC.
///  envp           - The environment to use when running LLC.
///
/// Return non-zero value on error.
///
static int GenerateAssembly(const std::string &OutputFilename,
                            const std::string &InputFilename,
                            const sys::Path &llc) {
  // Run LLC to convert the bytecode file into assembly code.
  std::vector<const char*> args;
  args.push_back(llc.c_str());
  args.push_back("-f");
  args.push_back("-o");
  args.push_back(OutputFilename.c_str());
  args.push_back(InputFilename.c_str());
  args.push_back(0);

  return sys::Program::ExecuteAndWait(llc,&args[0]);
}

/// GenerateAssembly - generates a native assembly language source file from the
/// specified bytecode file.
static int GenerateCFile(const std::string &OutputFile,
                         const std::string &InputFile,
                         const sys::Path &llc) {
  // Run LLC to convert the bytecode file into C.
  std::vector<const char*> args;
  args.push_back(llc.c_str());
  args.push_back("-march=c");
  args.push_back("-f");
  args.push_back("-o");
  args.push_back(OutputFile.c_str());
  args.push_back(InputFile.c_str());
  args.push_back(0);
  return sys::Program::ExecuteAndWait(llc, &args[0]);
}

/// GenerateNative - generates a native assembly language source file from the
/// specified assembly source file.
///
/// Inputs:
///  InputFilename  - The name of the output bytecode file.
///  OutputFilename - The name of the file to generate.
///  Libraries      - The list of libraries with which to link.
///  LibPaths       - The list of directories in which to find libraries.
///  gcc            - The pathname to use for GGC.
///  envp           - A copy of the process's current environment.
///
/// Outputs:
///  None.
///
/// Returns non-zero value on error.
///
static int GenerateNative(const std::string &OutputFilename,
                          const std::string &InputFilename,
                          const std::vector<std::string> &Libraries,
                          const sys::Path &gcc, char ** const envp) {
  // Remove these environment variables from the environment of the
  // programs that we will execute.  It appears that GCC sets these
  // environment variables so that the programs it uses can configure
  // themselves identically.
  //
  // However, when we invoke GCC below, we want it to use its normal
  // configuration.  Hence, we must sanitize its environment.
  char ** clean_env = CopyEnv(envp);
  if (clean_env == NULL)
    return 1;
  RemoveEnv("LIBRARY_PATH", clean_env);
  RemoveEnv("COLLECT_GCC_OPTIONS", clean_env);
  RemoveEnv("GCC_EXEC_PREFIX", clean_env);
  RemoveEnv("COMPILER_PATH", clean_env);
  RemoveEnv("COLLECT_GCC", clean_env);


  // Run GCC to assemble and link the program into native code.
  //
  // Note:
  //  We can't just assemble and link the file with the system assembler
  //  and linker because we don't know where to put the _start symbol.
  //  GCC mysteriously knows how to do it.
  std::vector<const char*> args;
  args.push_back(gcc.c_str());
  args.push_back("-fno-strict-aliasing");
  args.push_back("-O3");
  args.push_back("-o");
  args.push_back(OutputFilename.c_str());
  args.push_back(InputFilename.c_str());

  // Add in the library paths
  for (unsigned index = 0; index < LibPaths.size(); index++) {
    args.push_back("-L");
    args.push_back(LibPaths[index].c_str());
  }

  // Add the requested options
  for (unsigned index = 0; index < XLinker.size(); index++) {
    args.push_back(XLinker[index].c_str());
    args.push_back(Libraries[index].c_str());
  }

  // Add in the libraries to link.
  for (unsigned index = 0; index < Libraries.size(); index++)
    if (Libraries[index] != "crtend") {
      args.push_back("-l");
      args.push_back(Libraries[index].c_str());
    }

  args.push_back(0);

  // Run the compiler to assembly and link together the program.
  int R = sys::Program::ExecuteAndWait(gcc, &args[0], (const char**)clean_env);
  delete [] clean_env;
  return R;
}

/// EmitShellScript - Output the wrapper file that invokes the JIT on the LLVM
/// bytecode file for the program.
static void EmitShellScript(char **argv) {
#if defined(_WIN32) || defined(__CYGWIN__)
  // Windows doesn't support #!/bin/sh style shell scripts in .exe files.  To
  // support windows systems, we copy the llvm-stub.exe executable from the
  // build tree to the destination file.
  sys::Path llvmstub = FindExecutable("llvm-stub.exe", argv[0]);
  if (llvmstub.isEmpty()) {
    std::cerr << "Could not find llvm-stub.exe executable!\n";
    exit(1);
  }
  sys::CopyFile(sys::Path(OutputFilename), llvmstub);
  return;
#endif

  // Output the script to start the program...
  std::ofstream Out2(OutputFilename.c_str());
  if (!Out2.good())
    exit(PrintAndReturn("error opening '" + OutputFilename + "' for writing!"));

  Out2 << "#!/bin/sh\n";
  // Allow user to setenv LLVMINTERP if lli is not in their PATH.
  Out2 << "lli=${LLVMINTERP-lli}\n";
  Out2 << "exec $lli \\\n";
  // gcc accepts -l<lib> and implicitly searches /lib and /usr/lib.
  LibPaths.push_back("/lib");
  LibPaths.push_back("/usr/lib");
  LibPaths.push_back("/usr/X11R6/lib");
  // We don't need to link in libc! In fact, /usr/lib/libc.so may not be a
  // shared object at all! See RH 8: plain text.
  std::vector<std::string>::iterator libc =
    std::find(Libraries.begin(), Libraries.end(), "c");
  if (libc != Libraries.end()) Libraries.erase(libc);
  // List all the shared object (native) libraries this executable will need
  // on the command line, so that we don't have to do this manually!
  for (std::vector<std::string>::iterator i = Libraries.begin(),
         e = Libraries.end(); i != e; ++i) {
    sys::Path FullLibraryPath = sys::Path::FindLibrary(*i);
    if (!FullLibraryPath.isEmpty() && FullLibraryPath.isDynamicLibrary())
      Out2 << "    -load=" << FullLibraryPath.toString() << " \\\n";
  }
  Out2 << "    $0.bc ${1+\"$@\"}\n";
  Out2.close();
}

// BuildLinkItems -- This function generates a LinkItemList for the LinkItems
// linker function by combining the Files and Libraries in the order they were
// declared on the command line.
static void BuildLinkItems(
  Linker::ItemList& Items,
  const cl::list<std::string>& Files,
  const cl::list<std::string>& Libraries) {

  // Build the list of linkage items for LinkItems.

  cl::list<std::string>::const_iterator fileIt = Files.begin();
  cl::list<std::string>::const_iterator libIt  = Libraries.begin();

  int libPos = -1, filePos = -1;
  while ( libIt != Libraries.end() || fileIt != Files.end() ) {
    if (libIt != Libraries.end())
      libPos = Libraries.getPosition(libIt - Libraries.begin());
    else
      libPos = -1;
    if (fileIt != Files.end())
      filePos = Files.getPosition(fileIt - Files.begin());
    else
      filePos = -1;

    if (filePos != -1 && (libPos == -1 || filePos < libPos)) {
      // Add a source file
      Items.push_back(std::make_pair(*fileIt++, false));
    } else if (libPos != -1 && (filePos == -1 || libPos < filePos)) {
      // Add a library
      Items.push_back(std::make_pair(*libIt++, true));
    }
  }
}

// Rightly this should go in a header file but it just seems such a waste.
namespace llvm {
extern void Optimize(Module*);
}

int main(int argc, char **argv, char **envp) {
  try {
    // Initial global variable above for convenience printing of program name.
    progname = sys::Path(argv[0]).getBasename();
    Linker TheLinker(progname, OutputFilename, Verbose);

    // Parse the command line options
    cl::ParseCommandLineOptions(argc, argv, " llvm linker\n");
    sys::PrintStackTraceOnErrorSignal();

    // Set up the library paths for the Linker
    TheLinker.addPaths(LibPaths);
    TheLinker.addSystemPaths();

    // Remove any consecutive duplicates of the same library...
    Libraries.erase(std::unique(Libraries.begin(), Libraries.end()),
                    Libraries.end());

    if (LinkAsLibrary) {
      std::vector<sys::Path> Files;
      for (unsigned i = 0; i < InputFilenames.size(); ++i )
        Files.push_back(sys::Path(InputFilenames[i]));
      if (TheLinker.LinkInFiles(Files))
        return 1; // Error already printed

      // The libraries aren't linked in but are noted as "dependent" in the
      // module.
      for (cl::list<std::string>::const_iterator I = Libraries.begin(),
           E = Libraries.end(); I != E ; ++I) {
        TheLinker.getModule()->addLibrary(*I);
      }
    } else {
      // Build a list of the items from our command line
      Linker::ItemList Items;
      Linker::ItemList NativeItems;
      BuildLinkItems(Items, InputFilenames, Libraries);

      // Link all the items together
      if (TheLinker.LinkInItems(Items,NativeItems) )
        return 1;
    }

    std::auto_ptr<Module> Composite(TheLinker.releaseModule());

    // Optimize the module
    Optimize(Composite.get());

    // Generate the bytecode for the optimized module.
    std::string RealBytecodeOutput = OutputFilename;
    if (!LinkAsLibrary) RealBytecodeOutput += ".bc";
    GenerateBytecode(Composite.get(), RealBytecodeOutput);

    // If we are not linking a library, generate either a native executable
    // or a JIT shell script, depending upon what the user wants.
    if (!LinkAsLibrary) {
      // If the user wants to run a post-link optimization, run it now.
      if (!PostLinkOpts.empty()) {
        std::vector<std::string> opts = PostLinkOpts;
        for (std::vector<std::string>::iterator I = opts.begin(),
             E = opts.end(); I != E; ++I) {
          sys::Path prog(*I);
          if (!prog.canExecute()) {
            prog = sys::Program::FindProgramByName(*I);
            if (prog.isEmpty())
              return PrintAndReturn(std::string("Optimization program '") + *I +
                "' is not found or not executable.");
          }
          // Get the program arguments
          sys::Path tmp_output("opt_result");
          if (!tmp_output.createTemporaryFileOnDisk()) {
            return PrintAndReturn(
              "Can't create temporary file for post-link optimization");
          }
          const char* args[4];
          args[0] = I->c_str();
          args[1] = RealBytecodeOutput.c_str();
          args[2] = tmp_output.c_str();
          args[3] = 0;
          if (0 == sys::Program::ExecuteAndWait(prog, args)) {
            if (tmp_output.isBytecodeFile()) {
              sys::Path target(RealBytecodeOutput);
              target.eraseFromDisk();
              tmp_output.renamePathOnDisk(target);
            } else
              return PrintAndReturn(
                "Post-link optimization output is not bytecode");
          }
        }
      }

      // If the user wants to generate a native executable, compile it from the
      // bytecode file.
      //
      // Otherwise, create a script that will run the bytecode through the JIT.
      if (Native) {
        // Name of the Assembly Language output file
        sys::Path AssemblyFile ( OutputFilename);
        AssemblyFile.appendSuffix("s");

        // Mark the output files for removal if we get an interrupt.
        sys::RemoveFileOnSignal(AssemblyFile);
        sys::RemoveFileOnSignal(sys::Path(OutputFilename));

        // Determine the locations of the llc and gcc programs.
        sys::Path llc = FindExecutable("llc", argv[0]);
        if (llc.isEmpty())
          return PrintAndReturn("Failed to find llc");

        sys::Path gcc = FindExecutable("gcc", argv[0]);
        if (gcc.isEmpty())
          return PrintAndReturn("Failed to find gcc");

        // Generate an assembly language file for the bytecode.
        if (Verbose) std::cout << "Generating Assembly Code\n";
        GenerateAssembly(AssemblyFile.toString(), RealBytecodeOutput, llc);
        if (Verbose) std::cout << "Generating Native Code\n";
        GenerateNative(OutputFilename, AssemblyFile.toString(), Libraries,
                       gcc, envp);

        // Remove the assembly language file.
        AssemblyFile.eraseFromDisk();
      } else if (NativeCBE) {
        sys::Path CFile (OutputFilename);
        CFile.appendSuffix("cbe.c");

        // Mark the output files for removal if we get an interrupt.
        sys::RemoveFileOnSignal(CFile);
        sys::RemoveFileOnSignal(sys::Path(OutputFilename));

        // Determine the locations of the llc and gcc programs.
        sys::Path llc = FindExecutable("llc", argv[0]);
        if (llc.isEmpty())
          return PrintAndReturn("Failed to find llc");

        sys::Path gcc = FindExecutable("gcc", argv[0]);
        if (gcc.isEmpty())
          return PrintAndReturn("Failed to find gcc");

        // Generate an assembly language file for the bytecode.
        if (Verbose) std::cout << "Generating Assembly Code\n";
        GenerateCFile(CFile.toString(), RealBytecodeOutput, llc);
        if (Verbose) std::cout << "Generating Native Code\n";
        GenerateNative(OutputFilename, CFile.toString(), Libraries, gcc, envp);

        // Remove the assembly language file.
        CFile.eraseFromDisk();

      } else {
        EmitShellScript(argv);
      }

      // Make the script executable...
      sys::Path(OutputFilename).makeExecutableOnDisk();

      // Make the bytecode file readable and directly executable in LLEE as well
      sys::Path(RealBytecodeOutput).makeExecutableOnDisk();
      sys::Path(RealBytecodeOutput).makeReadableOnDisk();
    }

    return 0;
  } catch (const std::string& msg) {
    std::cerr << argv[0] << ": " << msg << "\n";
  } catch (...) {
    std::cerr << argv[0] << ": Unexpected unknown exception occurred.\n";
  }
  return 1;
}