llvm.org GIT mirror llvm / 0095054
Initial revision git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@2 91177308-0d34-0410-b5e6-96231b3b80d8 Chris Lattner 18 years ago
145 changed file(s) with 19198 addition(s) and 0 deletion(s). Raw diff Collapse all Expand all
0 LEVEL = .
1 DIRS = lib tools
2
3 include $(LEVEL)/Makefile.common
4
5 test :: all
6 cd test; $(MAKE)
0 # Makefile.common
1 #
2 # This file is included by all of the LLVM makefiles. This file defines common
3 # rules to do things like compile a .cpp file or generate dependancy info.
4 # These are platform dependant, so this is the file used to specify these
5 # system dependant operations.
6 #
7 # The following functionality may be set by setting incoming variables:
8 #
9 # 1. LEVEL - The level of the current subdirectory from the top of the
10 # MagicStats view. This level should be expressed as a path, for
11 # example, ../.. for two levels deep.
12 #
13 # 2. DIRS - A list of subdirectories to be built. Fake targets are set up
14 # so that each of the targets "all", "install", and "clean" each build.
15 # the subdirectories before the local target.
16 #
17 # 3. Source - If specified, this sets the source code filenames. If this
18 # is not set, it defaults to be all of the .cpp, .c, .y, and .l files
19 # in the current directory.
20 #
21
22 # Default Rule:
23 all ::
24
25 # Default for install is to at least build everything...
26 install ::
27
28 #--------------------------------------------------------------------
29 # Installation configuration options...
30 #--------------------------------------------------------------------
31
32 #BinInstDir=/usr/local/bin
33 #LibInstDir=/usrl/local/lib/xxx
34 #DocInstDir=/usr/doc/xxx
35
36 #---------------------------------------------------------
37 # Compilation options...
38 #---------------------------------------------------------
39
40 # Add -L options to the link command lines...
41 LibPathsO = -L $(LEVEL)/lib/VMCore/Release \
42 -L $(LEVEL)/lib/Assembly/Parser/Release \
43 -L $(LEVEL)/lib/Assembly/Writer/Release \
44 -L $(LEVEL)/lib/Analysis/Release \
45 -L $(LEVEL)/lib/Bytecode/Writer/Release \
46 -L $(LEVEL)/lib/Bytecode/Reader/Release \
47 -L $(LEVEL)/lib/Optimizations/Release
48
49 LibPathsG = $(LibPathsO:Release=Debug)
50
51 # Enable this for profiling support with 'gprof'
52 #Prof = -pg
53
54 # TODO: Get rid of exceptions! : -fno-exceptions -fno-rtti
55 CompileCommonOpts = $(Prof) -Wall -Winline -W -Wwrite-strings -Wno-unused -I$(LEVEL)/include
56
57 # Compile a file, don't link...
58 Compile = $(CXX) -c $(CPPFLAGS) $(CXXFLAGS) $(CompileCommonOpts)
59 CompileG = $(Compile) -g -D_DEBUG
60 # Add This for DebugMalloc: -fno-defer-pop
61 CompileO = $(Compile) -O3 -DNDEBUG -finline-functions -felide-constructors -fnonnull-objects -freg-struct-return -fshort-enums
62
63 # Link final executable
64 Link = $(CXX) $(Prof)
65 LinkG = $(Link) -g $(LibPathsG)
66 LinkO = $(Link) -O3 $(LibPathsO)
67
68 # Create a .so file from a .cpp file...
69 #MakeSO = $(CXX) -shared $(Prof)
70 MakeSO = $(CXX) -G $(Prof)
71 MakeSOG = $(MakeSO) -g
72 MakeSOO = $(MakeSO) -O3
73
74 # Create dependancy file from CPP file, send to stdout.
75 Depend = $(CXX) -MM -I$(LEVEL)/include $(CPPFLAGS)
76
77 # Archive a bunch of .o files into a .a file...
78 AR = ar cq
79
80 #----------------------------------------------------------
81
82 # Source includes all of the cpp files, and objects are derived from the
83 # source files...
84 ifndef Source
85 Source = $(wildcard *.cpp *.c *.y *.l)
86 endif
87 Objs = $(sort $(addsuffix .o,$(basename $(Source))))
88 ObjectsO = $(addprefix Release/,$(Objs))
89 ObjectsG = $(addprefix Debug/,$(Objs))
90
91 #---------------------------------------------------------
92 # Handle the DIRS option
93 #---------------------------------------------------------
94
95 ifdef DIRS # Only do this if we're using DIRS!
96
97 all :: $(addsuffix /.makeall , $(DIRS))
98 install :: $(addsuffix /.makeinstall, $(DIRS))
99 clean :: $(addsuffix /.makeclean , $(DIRS))
100
101 %/.makeall %/.makeclean %/.makeinstall:
102 cd $(@D); $(MAKE) $(subst $(@D)/.make,,$@)
103 endif
104
105 #---------------------------------------------------------
106 # Handle the LIBRARYNAME option - used when building libs...
107 #---------------------------------------------------------
108
109 ifdef LIBRARYNAME
110 LIBNAME_O := Release/lib$(LIBRARYNAME).so
111 LIBNAME_G := Debug/lib$(LIBRARYNAME).so
112
113 all:: $(LIBNAME_G)
114 #$(LIBNAME_O)
115 # TODO: Enable optimized builds
116
117 $(LIBNAME_O): $(ObjectsO) $(LibSubDirs) Release/.dir
118 @echo ======= Linking $(LIBRARYNAME) release library =======
119 $(MakeSOO) -o $@ $(ObjectsO) $(LibSubDirs) $(LibLinkOpts)
120
121 $(LIBNAME_G): $(ObjectsG) $(LibSubDirs) Debug/.dir
122 @echo ======= Linking $(LIBRARYNAME) debug library =======
123 $(MakeSOG) -o $@ $(ObjectsG) $(LibSubDirs) $(LibLinkOpts)
124
125 endif
126
127
128 #---------------------------------------------------------
129
130 # Create dependacies for the cpp files...
131 Depend/%.d: %.cpp Depend/.dir
132 $(Depend) $< | sed 's|$*\.o *|Release/& Debug/& Depend/$(@F)|g' > $@
133
134 # Create .o files in the ObjectFiles directory from the .cpp files...
135 Release/%.o: %.cpp Release/.dir Depend/.dir
136 $(CompileO) $< -o $@
137
138 Debug/%.o: %.cpp Debug/.dir Depend/.dir
139 $(CompileG) $< -o $@
140
141 # Create a .cpp source file from a flex input file... this uses sed to cut down
142 # on the warnings emited by GCC...
143 %.cpp: %.l
144 flex -t $< | sed '/^find_rule/d' | sed 's/void yyunput/inline void yyunput/' | sed 's/void \*yy_flex_realloc/inline void *yy_flex_realloc/' > $@
145
146 # Rule for building the bison parsers...
147
148 %.cpp %.h : %.y
149 bison -d -p $(<:%Parser.y=%) $(basename $@).y
150 mv -f $(basename $@).tab.c $(basename $@).cpp
151 mv -f $(basename $@).tab.h $(basename $@).h
152
153 # To create the directories...
154 %/.dir:
155 mkdir -p $(@D)
156 @date > $@
157
158 # Clean does not remove the output files... just the temporaries
159 clean::
160 rm -rf Debug Release Depend
161 rm -f core *.o *.d *.so *~ *.flc
162
163 # If dependancies were generated for the file that included this file,
164 # include the dependancies now...
165 #
166 SourceDepend = $(addsuffix .d,$(addprefix Depend/,$(basename $(Source))))
167 ifneq ($(SourceDepend),)
168 include $(SourceDepend)
169 endif
0 # Makefile.common
1 #
2 # This file is included by all of the LLVM makefiles. This file defines common
3 # rules to do things like compile a .cpp file or generate dependancy info.
4 # These are platform dependant, so this is the file used to specify these
5 # system dependant operations.
6 #
7 # The following functionality may be set by setting incoming variables:
8 #
9 # 1. LEVEL - The level of the current subdirectory from the top of the
10 # MagicStats view. This level should be expressed as a path, for
11 # example, ../.. for two levels deep.
12 #
13 # 2. DIRS - A list of subdirectories to be built. Fake targets are set up
14 # so that each of the targets "all", "install", and "clean" each build.
15 # the subdirectories before the local target.
16 #
17 # 3. Source - If specified, this sets the source code filenames. If this
18 # is not set, it defaults to be all of the .cpp, .c, .y, and .l files
19 # in the current directory.
20 #
21
22 # Default Rule:
23 all ::
24
25 # Default for install is to at least build everything...
26 install ::
27
28 #--------------------------------------------------------------------
29 # Installation configuration options...
30 #--------------------------------------------------------------------
31
32 #BinInstDir=/usr/local/bin
33 #LibInstDir=/usrl/local/lib/xxx
34 #DocInstDir=/usr/doc/xxx
35
36 #---------------------------------------------------------
37 # Compilation options...
38 #---------------------------------------------------------
39
40 # Add -L options to the link command lines...
41 LibPathsO = -L $(LEVEL)/lib/VMCore/Release \
42 -L $(LEVEL)/lib/Assembly/Parser/Release \
43 -L $(LEVEL)/lib/Assembly/Writer/Release \
44 -L $(LEVEL)/lib/Analysis/Release \
45 -L $(LEVEL)/lib/Bytecode/Writer/Release \
46 -L $(LEVEL)/lib/Bytecode/Reader/Release \
47 -L $(LEVEL)/lib/Optimizations/Release
48
49 LibPathsG = $(LibPathsO:Release=Debug)
50
51 # Enable this for profiling support with 'gprof'
52 #Prof = -pg
53
54 # TODO: Get rid of exceptions! : -fno-exceptions -fno-rtti
55 CompileCommonOpts = $(Prof) -Wall -Winline -W -Wwrite-strings -Wno-unused -I$(LEVEL)/include
56
57 # Compile a file, don't link...
58 Compile = $(CXX) -c $(CPPFLAGS) $(CXXFLAGS) $(CompileCommonOpts)
59 CompileG = $(Compile) -g -D_DEBUG
60 # Add This for DebugMalloc: -fno-defer-pop
61 CompileO = $(Compile) -O3 -DNDEBUG -finline-functions -felide-constructors -fnonnull-objects -freg-struct-return -fshort-enums
62
63 # Link final executable
64 Link = $(CXX) $(Prof)
65 LinkG = $(Link) -g $(LibPathsG)
66 LinkO = $(Link) -O3 $(LibPathsO)
67
68 # Create a .so file from a .cpp file...
69 #MakeSO = $(CXX) -shared $(Prof)
70 MakeSO = $(CXX) -G $(Prof)
71 MakeSOG = $(MakeSO) -g
72 MakeSOO = $(MakeSO) -O3
73
74 # Create dependancy file from CPP file, send to stdout.
75 Depend = $(CXX) -MM -I$(LEVEL)/include $(CPPFLAGS)
76
77 # Archive a bunch of .o files into a .a file...
78 AR = ar cq
79
80 #----------------------------------------------------------
81
82 # Source includes all of the cpp files, and objects are derived from the
83 # source files...
84 ifndef Source
85 Source = $(wildcard *.cpp *.c *.y *.l)
86 endif
87 Objs = $(sort $(addsuffix .o,$(basename $(Source))))
88 ObjectsO = $(addprefix Release/,$(Objs))
89 ObjectsG = $(addprefix Debug/,$(Objs))
90
91 #---------------------------------------------------------
92 # Handle the DIRS option
93 #---------------------------------------------------------
94
95 ifdef DIRS # Only do this if we're using DIRS!
96
97 all :: $(addsuffix /.makeall , $(DIRS))
98 install :: $(addsuffix /.makeinstall, $(DIRS))
99 clean :: $(addsuffix /.makeclean , $(DIRS))
100
101 %/.makeall %/.makeclean %/.makeinstall:
102 cd $(@D); $(MAKE) $(subst $(@D)/.make,,$@)
103 endif
104
105 #---------------------------------------------------------
106 # Handle the LIBRARYNAME option - used when building libs...
107 #---------------------------------------------------------
108
109 ifdef LIBRARYNAME
110 LIBNAME_O := Release/lib$(LIBRARYNAME).so
111 LIBNAME_G := Debug/lib$(LIBRARYNAME).so
112
113 all:: $(LIBNAME_G)
114 #$(LIBNAME_O)
115 # TODO: Enable optimized builds
116
117 $(LIBNAME_O): $(ObjectsO) $(LibSubDirs) Release/.dir
118 @echo ======= Linking $(LIBRARYNAME) release library =======
119 $(MakeSOO) -o $@ $(ObjectsO) $(LibSubDirs) $(LibLinkOpts)
120
121 $(LIBNAME_G): $(ObjectsG) $(LibSubDirs) Debug/.dir
122 @echo ======= Linking $(LIBRARYNAME) debug library =======
123 $(MakeSOG) -o $@ $(ObjectsG) $(LibSubDirs) $(LibLinkOpts)
124
125 endif
126
127
128 #---------------------------------------------------------
129
130 # Create dependacies for the cpp files...
131 Depend/%.d: %.cpp Depend/.dir
132 $(Depend) $< | sed 's|$*\.o *|Release/& Debug/& Depend/$(@F)|g' > $@
133
134 # Create .o files in the ObjectFiles directory from the .cpp files...
135 Release/%.o: %.cpp Release/.dir Depend/.dir
136 $(CompileO) $< -o $@
137
138 Debug/%.o: %.cpp Debug/.dir Depend/.dir
139 $(CompileG) $< -o $@
140
141 # Create a .cpp source file from a flex input file... this uses sed to cut down
142 # on the warnings emited by GCC...
143 %.cpp: %.l
144 flex -t $< | sed '/^find_rule/d' | sed 's/void yyunput/inline void yyunput/' | sed 's/void \*yy_flex_realloc/inline void *yy_flex_realloc/' > $@
145
146 # Rule for building the bison parsers...
147
148 %.cpp %.h : %.y
149 bison -d -p $(<:%Parser.y=%) $(basename $@).y
150 mv -f $(basename $@).tab.c $(basename $@).cpp
151 mv -f $(basename $@).tab.h $(basename $@).h
152
153 # To create the directories...
154 %/.dir:
155 mkdir -p $(@D)
156 @date > $@
157
158 # Clean does not remove the output files... just the temporaries
159 clean::
160 rm -rf Debug Release Depend
161 rm -f core *.o *.d *.so *~ *.flc
162
163 # If dependancies were generated for the file that included this file,
164 # include the dependancies now...
165 #
166 SourceDepend = $(addsuffix .d,$(addprefix Depend/,$(basename $(Source))))
167 ifneq ($(SourceDepend),)
168 include $(SourceDepend)
169 endif
0 * Provide a pass that eliminates critical edges from the CFG
1 * Provide a print hook to print out xvcg format files for vis
2 * I need to provide an option to the bytecode loader to ignore memory
3 dependance edges. Instead, the VM would just treat memory operations
4 (load, store, getfield, putfield, call) as pinned instructions.
5 * I need to have a way to prevent taking the address of a constant pool
6 reference. You should only be able to take the address of a variable.
7 Maybe taking the address of a constant copies it? What about virtual
8 function tables? Maybe a const pointer would be better...
9 * Structures should be accessed something like this: ubyte is ok. Limits
10 structure size to 256 members. This can be fixed later by either:
11 1. adding varient that takes ushort
12 2. Splitting structures into nested structures each of half size
13 %f = loadfield *{int, {float}} Str, ubyte 1, ubyte 0
14 storefield float %f, *{int, {float}} Str, ubyte 1, ubyte 0
15 * I'm noticing me writing a lot of code that looks like this (dtor material here):
16 ConstPool.dropAllReferences();
17 ConstPool.delete_all();
18 ConstPool.setParent(0);
19 ~ConstPool
20
21 * Need a way to attach bytecode block info at various levels of asm code.
22 * Rename "ConstantPool" to "ConstPool"
23 * Maybe ConstantPool objects should keep themselves sorted as things are
24 inserted.
25 * Need to be able to inflate recursive types. %x = { *%x }, %x = %x ()
26 * Recognize and save comments in assembly and bytecode format
27 * Encode line number table in bytecode (like #line), optional table
28
29 * Encode negative relative offsets in the bytecode file
30
31 * Implement switch to switch on a constant pool array of type:
32 [{ label, int }] or [label] (lookup vs index switch)
33 * Apparently bison has a %pure_parser option. Maybe useful for Assembly/Parser
34
35 * Implement a header file that can read either assembly or bytecode, implement
36 a writer that can output either based on what is read with this reader..
37 * Implement the following derived types:
38 * structure/record { int %foo, int %bar} or { %foo = int, int }
39 * pointer int *
40 * "packed format", like this: [4 x sbyte]: Packed SIMD datatype
41 * Maybe 'tailcall' also?
42 * It might be nice to support enumerations of some sort... especially for use
43 as a compiler IR
44 * Include a method level bytecode block that defines a mapping between values
45 and registers that defines a minimally register allocated code. This can
46 make me finally address how to encode extensions in assembly.
47 * Bytecode reader should use extensions that may or may not be linked into the
48 application to read blocks. Thus an easy way to ignore symbol table info
49 would be to not link in that reader into the app.
0 Date: Sat, 18 Nov 2000 09:19:35 -0600 (CST)
1 From: Vikram Adve
2 To: Chris Lattner
3 Subject: a few thoughts
4
5 I've been mulling over the virtual machine problem and I had some
6 thoughts about some things for us to think about discuss:
7
8 1. We need to be clear on our goals for the VM. Do we want to emphasize
9 portability and safety like the Java VM? Or shall we focus on the
10 architecture interface first (i.e., consider the code generation and
11 processor issues), since the architecture interface question is also
12 important for portable Java-type VMs?
13
14 This is important because the audiences for these two goals are very
15 different. Architects and many compiler people care much more about
16 the second question. The Java compiler and OS community care much more
17 about the first one.
18
19 Also, while the architecture interface question is important for
20 Java-type VMs, the design constraints are very different.
21
22
23 2. Design issues to consider (an initial list that we should continue
24 to modify). Note that I'm not trying to suggest actual solutions here,
25 but just various directions we can pursue:
26
27 a. A single-assignment VM, which we've both already been thinking about.
28
29 b. A strongly-typed VM. One question is do we need the types to be
30 explicitly declared or should they be inferred by the dynamic compiler?
31
32 c. How do we get more high-level information into the VM while keeping
33 to a low-level VM design?
34
35 o Explicit array references as operands? An alternative is
36 to have just an array type, and let the index computations be
37 separate 3-operand instructions.
38
39 o Explicit instructions to handle aliasing, e.g.s:
40 -- an instruction to say "I speculate that these two values are not
41 aliased, but check at runtime", like speculative execution in
42 EPIC?
43 -- or an instruction to check whether two values are aliased and
44 execute different code depending on the answer, somewhat like
45 predicated code in EPIC
46
47 o (This one is a difficult but powerful idea.)
48 A "thread-id" field on every instruction that allows the static
49 compiler to generate a set of parallel threads, and then have
50 the runtime compiler and hardware do what they please with it.
51 This has very powerful uses, but thread-id on every instruction
52 is expensive in terms of instruction size and code size.
53 We would need to compactly encode it somehow.
54
55 Also, this will require some reading on at least two other
56 projects:
57 -- Multiscalar architecture from Wisconsin
58 -- Simultaneous multithreading architecture from Washington
59
60 o Or forget all this and stick to a traditional instruction set?
61
62
63 BTW, on an unrelated note, after the meeting yesterday, I did remember
64 that you had suggested doing instruction scheduling on SSA form instead
65 of a dependence DAG earlier in the semester. When we talked about
66 it yesterday, I didn't remember where the idea had come from but I
67 remembered later. Just giving credit where its due...
68
69 Perhaps you can save the above as a file under RCS so you and I can
70 continue to expand on this.
71
72 --Vikram
73
0 Date: Sun, 19 Nov 2000 16:23:57 -0600 (CST)
1 From: Chris Lattner
2 To: Vikram Adve
3 Subject: Re: a few thoughts
4
5 Okay... here are a few of my thoughts on this (it's good to know that we
6 think so alike!):
7
8 > 1. We need to be clear on our goals for the VM. Do we want to emphasize
9 > portability and safety like the Java VM? Or shall we focus on the
10 > architecture interface first (i.e., consider the code generation and
11 > processor issues), since the architecture interface question is also
12 > important for portable Java-type VMs?
13
14 I forsee the architecture looking kinda like this: (which is completely
15 subject to change)
16
17 1. The VM code is NOT guaranteed safe in a java sense. Doing so makes it
18 basically impossible to support C like languages. Besides that,
19 certifying a register based language as safe at run time would be a
20 pretty expensive operation to have to do. Additionally, we would like
21 to be able to statically eliminate many bounds checks in Java
22 programs... for example.
23
24 2. Instead, we can do the following (eventually):
25 * Java bytecode is used as our "safe" representation (to avoid
26 reinventing something that we don't add much value to). When the
27 user chooses to execute Java bytecodes directly (ie, not
28 precompiled) the runtime compiler can do some very simple
29 transformations (JIT style) to convert it into valid input for our
30 VM. Performance is not wonderful, but it works right.
31 * The file is scheduled to be compiled (rigorously) at a later
32 time. This could be done by some background process or by a second
33 processor in the system during idle time or something...
34 * To keep things "safe" ie to enforce a sandbox on Java/foreign code,
35 we could sign the generated VM code with a host specific private
36 key. Then before the code is executed/loaded, we can check to see if
37 the trusted compiler generated the code. This would be much quicker
38 than having to validate consistency (especially if bounds checks have
39 been removed, for example)
40
41 > This is important because the audiences for these two goals are very
42 > different. Architects and many compiler people care much more about
43 > the second question. The Java compiler and OS community care much more
44 > about the first one.
45
46 3. By focusing on a more low level virtual machine, we have much more room
47 for value add. The nice safe "sandbox" VM can be provided as a layer
48 on top of it. It also lets us focus on the more interesting compilers
49 related projects.
50
51 > 2. Design issues to consider (an initial list that we should continue
52 > to modify). Note that I'm not trying to suggest actual solutions here,
53 > but just various directions we can pursue:
54
55 Understood. :)
56
57 > a. A single-assignment VM, which we've both already been thinking
58 > about.
59
60 Yup, I think that this makes a lot of sense. I am still intrigued,
61 however, by the prospect of a minimally allocated VM representation... I
62 think that it could have definate advantages for certain applications
63 (think very small machines, like PDAs). I don't, however, think that our
64 initial implementations should focus on this. :)
65
66 Here are some other auxilliary goals that I think we should consider:
67
68 1. Primary goal: Support a high performance dynamic compilation
69 system. This means that we have an "ideal" division of labor between
70 the runtime and static compilers. Of course, the other goals of the
71 system somewhat reduce the importance of this point (f.e. portability
72 reduces performance, but hopefully not much)
73 2. Portability to different processors. Since we are most familiar with
74 x86 and solaris, I think that these two are excellent candidates when
75 we get that far...
76 3. Support for all languages & styles of programming (general purpose
77 VM). This is the point that disallows java style bytecodes, where all
78 array refs are checked for bounds, etc...
79 4. Support linking between different language families. For example, call
80 C functions directly from Java without using the nasty/slow/gross JNI
81 layer. This involves several subpoints:
82 A. Support for languages that require garbage collectors and integration
83 with languages that don't. As a base point, we could insist on
84 always using a conservative GC, but implement free as a noop, f.e.
85
86 > b. A strongly-typed VM. One question is do we need the types to be
87 > explicitly declared or should they be inferred by the dynamic
88 > compiler?
89
90 B. This is kind of similar to another idea that I have: make OOP
91 constructs (virtual function tables, class heirarchies, etc) explicit
92 in the VM representation. I believe that the number of additional
93 constructs would be fairly low, but would give us lots of important
94 information... something else that would/could be important is to
95 have exceptions as first class types so that they would be handled in
96 a uniform way for the entire VM... so that C functions can call Java
97 functions for example...
98
99 > c. How do we get more high-level information into the VM while keeping
100 > to a low-level VM design?
101 > o Explicit array references as operands? An alternative is
102 > to have just an array type, and let the index computations be
103 > separate 3-operand instructions.
104
105 C. In the model I was thinking of (subject to change of course), we
106 would just have an array type (distinct from the pointer
107 types). This would allow us to have arbitrarily complex index
108 expressions, while still distinguishing "load" from "Array load",
109 for example. Perhaps also, switch jump tables would be first class
110 types as well? This would allow better reasoning about the program.
111
112 5. Support dynamic loading of code from various sources. Already
113 mentioned above was the example of loading java bytecodes, but we want
114 to support dynamic loading of VM code as well. This makes the job of
115 the runtime compiler much more interesting: it can do interprocedural
116 optimizations that the static compiler can't do, because it doesn't
117 have all of the required information (for example, inlining from
118 shared libraries, etc...)
119
120 6. Define a set of generally useful annotations to add to the VM
121 representation. For example, a function can be analysed to see if it
122 has any sideeffects when run... also, the MOD/REF sets could be
123 calculated, etc... we would have to determine what is reasonable. This
124 would generally be used to make IP optimizations cheaper for the
125 runtime compiler...
126
127 > o Explicit instructions to handle aliasing, e.g.s:
128 > -- an instruction to say "I speculate that these two values are not
129 > aliased, but check at runtime", like speculative execution in
130 > EPIC?
131 > -- or an instruction to check whether two values are aliased and
132 > execute different code depending on the answer, somewhat like
133 > predicated code in EPIC
134
135 These are also very good points... if this can be determined at compile
136 time. I think that an epic style of representation (not the instruction
137 packing, just the information presented) could be a very interesting model
138 to use... more later...
139
140 > o (This one is a difficult but powerful idea.)
141 > A "thread-id" field on every instruction that allows the static
142 > compiler to generate a set of parallel threads, and then have
143 > the runtime compiler and hardware do what they please with it.
144 > This has very powerful uses, but thread-id on every instruction
145 > is expensive in terms of instruction size and code size.
146 > We would need to compactly encode it somehow.
147
148 Yes yes yes! :) I think it would be *VERY* useful to include this kind
149 of information (which EPIC architectures *implicitly* encode. The trend
150 that we are seeing supports this greatly:
151
152 1. Commodity processors are getting massive SIMD support:
153 * Intel/Amd MMX/MMX2
154 * AMD's 3Dnow!
155 * Intel's SSE/SSE2
156 * Sun's VIS
157 2. SMP is becoming much more common, especially in the server space.
158 3. Multiple processors on a die are right around the corner.
159
160 If nothing else, not designing this in would severely limit our future
161 expansion of the project...
162
163 > Also, this will require some reading on at least two other
164 > projects:
165 > -- Multiscalar architecture from Wisconsin
166 > -- Simultaneous multithreading architecture from Washington
167 >
168 > o Or forget all this and stick to a traditional instruction set?
169
170 Heh... :) Well, from a pure research point of view, it is almost more
171 attactive to go with the most extreme/different ISA possible. On one axis
172 you get safety and conservatism, and on the other you get degree of
173 influence that the results have. Of course the problem with pure research
174 is that often times there is no concrete product of the research... :)
175
176 > BTW, on an unrelated note, after the meeting yesterday, I did remember
177 > that you had suggested doing instruction scheduling on SSA form instead
178 > of a dependence DAG earlier in the semester. When we talked about
179 > it yesterday, I didn't remember where the idea had come from but I
180 > remembered later. Just giving credit where its due...
181
182 :) Thanks.
183
184 > Perhaps you can save the above as a file under RCS so you and I can
185 > continue to expand on this.
186
187 I think it makes sense to do so when we get our ideas more formalized and
188 bounce it back and forth a couple of times... then I'll do a more formal
189 writeup of our goals and ideas. Obviously our first implementation will
190 not want to do all of the stuff that I pointed out above... be we will
191 want to design the project so that we do not artificially limit ourselves
192 at sometime in the future...
193
194 Anyways, let me know what you think about these ideas... and if they sound
195 reasonable...
196
197 -Chris
198
0 From: Chris Lattner [mailto:sabre@nondot.org]
1 Sent: Wednesday, December 06, 2000 6:41 PM
2 To: Vikram S. Adve
3 Subject: Additional idea with respect to encoding
4
5 Here's another idea with respect to keeping the common case instruction
6 size down (less than 32 bits ideally):
7
8 Instead of encoding an instruction to operate on two register numbers,
9 have it operate on two negative offsets based on the current register
10 number. Therefore, instead of using:
11
12 r57 = add r55, r56 (r57 is the implicit dest register, of course)
13
14 We could use:
15
16 r57 = add -2, -1
17
18 My guess is that most SSA references are to recent values (especially if
19 they correspond to expressions like (x+y*z+p*q/ ...), so the negative
20 numbers would tend to stay small, even at the end of the procedure (where
21 the implicit register destination number could be quite large). Of course
22 the negative sign is reduntant, so you would be storing small integers
23 almost all of the time, and 5-6 bits worth of register number would be
24 plenty for most cases...
25
26 What do you think?
27
28 -Chris
29
0 SUMMARY
1 -------
2
3 We met to discuss the LLVM instruction format and bytecode representation:
4
5 ISSUES RESOLVED
6 ---------------
7
8 1. We decided that we shall use a flat namespace to represent our
9 variables in SSA form, as opposed to having a two dimensional namespace
10 of the original variable and the SSA instance subscript.
11
12 ARGUMENT AGAINST:
13 * A two dimensional namespace would be valuable when doing alias
14 analysis because the extra information can help limit the scope of
15 analysis.
16
17 ARGUMENT FOR:
18 * Including this information would require that all users of the LLVM
19 bytecode would have to parse and handle it. This would slow down the
20 common case and inflate the instruction representation with another
21 infinite variable space.
22
23 REASONING:
24 * It was decided that because original variable sources could be
25 reconstructed from SSA form in linear time, that it would be an
26 unjustified expense for the common case to include the extra
27 information for one optimization. Alias analysis itself is typically
28 greater than linear in asymptotic complexity, so this extra analaysis
29 would not affect the runtime of the optimization in a significant
30 way. Additionally, this would be an unlikely optimization to do at
31 runtime.
32
33
34 IDEAS TO CONSIDER
35 -----------------
36
37 1. Including dominator information in the LLVM bytecode
38 representation. This is one example of an analysis result that may be
39 packaged with the bytecodes themselves. As a conceptual implementation
40 idea, we could include an immediate dominator number for each basic block
41 in the LLVM bytecode program. Basic blocks could be numbered according
42 to the order of occurance in the bytecode representation.
43
44 2. Including loop header and body information. This would facilitate
45 detection of intervals and natural loops.
46
47 UNRESOLVED ISSUES
48 -----------------
49
50 1. Will oSUIF provide enough of an infrastructure to support the research
51 that we will be doing? We know that it has less than stellar
52 performance, but hope that this will be of little importance for our
53 static compiler. This could affect us if we decided to do some IP
54 research. Also we do not yet understand the level of exception support
55 currently implemented.
56
57 2. Should we consider the requirements of a direct hardware implementation
58 of the LLVM when we design it? If so, several design issues should
59 have their priorities shifted. The other option is to focus on a
60 software layer interpreting the LLVM in all cases.
61
62 3. Should we use some form of packetized format to improve forward
63 compatibility? For example, we could design the system to encode a
64 packet type and length field before analysis information, to allow a
65 runtime to skip information that it didn't understand in a bytecode
66 stream. The obvious benefit would be for compatibility, the drawback
67 is that it would tend to splinter that 'standard' LLVM definition.
68
69 4. Should we use fixed length instructions or variable length
70 instructions? Fetching variable length instructions is expensive (for
71 either hardware or software based LLVM runtimes), but we have several
72 'infinite' spaces that instructions operate in (SSA register numbers,
73 type spaces, or packet length [if packets were implemented]). Several
74 options were mentioned including:
75 A. Using 16 or 32 bit numbers, which would be 'big enough'
76 B. A scheme similar to how UTF-8 works, to encode infinite numbers
77 while keeping small number small.
78 C. Use something similar to Huffman encoding, so that the most common
79 numbers are the smallest.
80
81 -Chris
82
0 Date: Wed, 31 Jan 2001 12:04:33 -0600
1 From: Vikram S. Adve
2 To: Chris Lattner
3 Subject: another thought
4
5 I have a budding idea about making LLVM a little more ambitious: a
6 customizable runtime system that can be used to implement language-specific
7 virtual machines for many different languages. E.g., a C vm, a C++ vm, a
8 Java vm, a Lisp vm, ..
9
10 The idea would be that LLVM would provide a standard set of runtime features
11 (some low-level like standard assembly instructions with code generation and
12 static and runtime optimization; some higher-level like type-safety and
13 perhaps a garbage collection library). Each language vm would select the
14 runtime features needed for that language, extending or customizing them as
15 needed. Most of the machine-dependent code-generation and optimization
16 features as well as low-level machine-independent optimizations (like PRE)
17 could be provided by LLVM and should be sufficient for any language,
18 simplifying the language compiler. (This would also help interoperability
19 between languages.) Also, some or most of the higher-level
20 machine-independent features like type-safety and access safety should be
21 reusable by different languages, with minor extensions. The language
22 compiler could then focus on language-specific analyses and optimizations.
23
24 The risk is that this sounds like a universal IR -- something that the
25 compiler community has tried and failed to develop for decades, and is
26 universally skeptical about. No matter what we say, we won't be able to
27 convince anyone that we have a universal IR that will work. We need to
28 think about whether LLVM is different or if has something novel that might
29 convince people. E.g., the idea of providing a package of separable
30 features that different languages select from. Also, using SSA with or
31 without type-safety as the intermediate representation.
32
33 One interesting starting point would be to discuss how a JVM would be
34 implemented on top of LLVM a bit more. That might give us clues on how to
35 structure LLVM to support one or more language VMs.
36
37 --Vikram
38
0 Date: Tue, 6 Feb 2001 20:27:37 -0600 (CST)
1 From: Chris Lattner
2 To: Vikram S. Adve
3 Subject: Type notation debate...
4
5 This is the way that I am currently planning on implementing types:
6
7 Primitive Types:
8 type ::= void|bool|sbyte|ubyte|short|ushort|int|uint|long|ulong
9
10 Method:
11 typelist ::= typelisth | /*empty*/
12 typelisth ::= type | typelisth ',' type
13 type ::= type (typelist)
14
15 Arrays (without and with size):
16 type ::= '[' type ']' | '[' INT ',' type ']'
17
18 Pointer:
19 type ::= type '*'
20
21 Structure:
22 type ::= '{' typelist '}'
23
24 Packed:
25 type ::= '<' INT ',' type '>'
26
27 Simple examples:
28
29 [[ %4, int ]] - array of (array of 4 (int))
30 [ { int, int } ] - Array of structure
31 [ < %4, int > ] - Array of 128 bit SIMD packets
32 int (int, [[int, %4]]) - Method taking a 2d array and int, returning int
33
34
35 Okay before you comment, please look at:
36
37 http://www.research.att.com/~bs/devXinterview.html
38
39 Search for "In another interview, you defined the C declarator syntax as
40 an experiment that failed. However, this syntactic construct has been
41 around for 27 years and perhaps more; why do you consider it problematic
42 (except for its cumbersome syntax)?" and read that response for me. :)
43
44 Now with this syntax, his example would be represented as:
45
46 [ %10, bool (int, int) * ] *
47
48 vs
49
50 bool (*(*)[10])(int, int)
51
52 in C.
53
54 Basically, my argument for this type construction system is that it is
55 VERY simple to use and understand (although it IS different than C, it is
56 very simple and straightforward, which C is NOT). In fact, I would assert
57 that most programmers TODAY do not understand pointers to member
58 functions, and have to look up an example when they have to write them.
59
60 In my opinion, it is critically important to have clear and concise type
61 specifications, because types are going to be all over the programs.
62
63 Let me know your thoughts on this. :)
64
65 -Chris
66
0 Date: Thu, 8 Feb 2001 08:42:04 -0600
1 From: Vikram S. Adve
2 To: Chris Lattner
3 Subject: RE: Type notation debate...
4
5 Chris,
6
7 > Okay before you comment, please look at:
8 >
9 > http://www.research.att.com/~bs/devXinterview.html
10
11 I read this argument. Even before that, I was already in agreement with you
12 and him that the C declarator syntax is difficult and confusing.
13
14 But in fact, if you read the entire answer carefully, he came to the same
15 conclusion I do: that you have to go with familiar syntax over logical
16 syntax because familiarity is such a strong force:
17
18 "However, familiarity is a strong force. To compare, in English, we
19 live
20 more or less happily with the absurd rules for "to be" (am, are, is, been,
21 was, were, ...) and all attempts to simplify are treated with contempt or
22 (preferably) humor. It be a curious world and it always beed."
23
24 > Basically, my argument for this type construction system is that it is
25 > VERY simple to use and understand (although it IS different than C, it is
26 > very simple and straightforward, which C is NOT). In fact, I would assert
27 > that most programmers TODAY do not understand pointers to member
28 > functions, and have to look up an example when they have to write them.
29
30 Again, I don't disagree with this at all. But to some extent this
31 particular problem is inherently difficult. Your syntax for the above
32 example may be easier for you to read because this is the way you have been
33 thinking about it. Honestly, I don't find it much easier than the C syntax.
34 In either case, I would have to look up an example to write pointers to
35 member functions.
36
37 But pointers to member functions are nowhere near as common as arrays. And
38 the old array syntax:
39 type [ int, int, ...]
40 is just much more familiar and clear to people than anything new you
41 introduce, no matter how logical it is. Introducing a new syntax that may
42 make function pointers easier but makes arrays much more difficult seems
43 very risky to me.
44
45 > In my opinion, it is critically important to have clear and concise type
46 > specifications, because types are going to be all over the programs.
47
48 I absolutely agree. But the question is, what is more clear and concise?
49 The syntax programmers are used to out of years of experience or a new
50 syntax that they have never seen that has a more logical structure. I think
51 the answer is the former. Sometimes, you have to give up a better idea
52 because you can't overcome sociological barriers to it. Qwerty keyboards
53 and Windows are two classic examples of bad technology that are difficult to
54 root out.
55
56 P.S. Also, while I agree that most your syntax is more logical, there is
57 one part that isn't:
58
59 Arrays (without and with size):
60 type ::= '[' type ']' | '[' INT ',' type ']'.
61
62 The arrays with size lists the dimensions and the type in a single list.
63 That is just too confusing:
64 [10, 40, int]
65 This seems to be a 3-D array where the third dimension is something strange.
66 It is too confusing to have a list of 3 things, some of which are dimensions
67 and one is a type. Either of the following would be better:
68
69 array [10, 40] of int
70 or
71 int [10, 40]
72
73 --Vikram
74
0 Date: Thu, 8 Feb 2001 14:31:05 -0600 (CST)
1 From: Chris Lattner
2 To: Vikram S. Adve
3 Subject: RE: Type notation debate...
4
5 > Arrays (without and with size):
6 > type ::= '[' type ']' | '[' INT ',' type ']'.
7 >
8 > The arrays with size lists the dimensions and the type in a single list.
9 > That is just too confusing:
10
11 > [10, 40, int]
12 > This seems to be a 3-D array where the third dimension is something strange.
13 > It is too confusing to have a list of 3 things, some of which are dimensions
14 > and one is a type.
15
16 The above grammar indicates that there is only one integer parameter, ie
17 the upper bound. The lower bound is always implied to be zero, for
18 several reasons:
19
20 * As a low level VM, we want to expose addressing computations
21 explicitly. Since the lower bound must always be known in a high level
22 language statically, the language front end can do the translation
23 automatically.
24 * This fits more closely with what Java needs, ie what we need in the
25 short term. Java arrays are always zero based.
26
27 If a two element list is too confusing, I would recommend an alternate
28 syntax of:
29
30 type ::= '[' type ']' | '[' INT 'x' type ']'.
31
32 For example:
33 [12 x int]
34 [12x int]
35 [ 12 x [ 4x int ]]
36
37 Which is syntactically nicer, and more explicit.
38
39 > Either of the following would be better:
40 > array [10, 40] of int
41
42 I considered this approach for arrays in general (ie array of int/ array
43 of 12 int), but found that it made declarations WAY too long. Remember
44 that because of the nature of llvm, you get a lot of types strewn all over
45 the program, and using the 'typedef' like facility is not a wonderful
46 option, because then types aren't explicit anymore.
47
48 I find this email interesting, because you contradict the previous email
49 you sent, where you recommend that we stick to C syntax....
50
51 -Chris
52
0 > But in fact, if you read the entire answer carefully, he came to the same
1 > conclusion I do: that you have to go with familiar syntax over logical
2 > syntax because familiarity is such a strong force:
3 > "However, familiarity is a strong force. To compare, in English, we
4 live
5 > more or less happily with the absurd rules for "to be" (am, are, is, been,
6 > was, were, ...) and all attempts to simplify are treated with contempt or
7 > (preferably) humor. It be a curious world and it always beed."
8
9 Although you have to remember that his situation was considerably
10 different than ours. He was in a position where he was designing a high
11 level language that had to be COMPATIBLE with C. Our language is such
12 that a new person would have to learn the new, different, syntax
13 anyways. Making them learn about the type system does not seem like much
14 of a stretch from learning the opcodes and how SSA form works, and how
15 everything ties together...
16
17 > > Basically, my argument for this type construction system is that it is
18 > > VERY simple to use and understand (although it IS different than C, it is
19 > > very simple and straightforward, which C is NOT). In fact, I would assert
20 > > that most programmers TODAY do not understand pointers to member
21 > > functions, and have to look up an example when they have to write them.
22
23 > Again, I don't disagree with this at all. But to some extent this
24 > particular problem is inherently difficult. Your syntax for the above
25 > example may be easier for you to read because this is the way you have been
26 > thinking about it. Honestly, I don't find it much easier than the C syntax.
27 > In either case, I would have to look up an example to write pointers to
28 > member functions.
29
30 I would argue that because the lexical structure of the language is self
31 consistent, any person who spent a significant amount of time programming
32 in LLVM directly would understand how to do it without looking it up in a
33 manual. The reason this does not work for C is because you rarely have to
34 declare these pointers, and the syntax is inconsistent with the method
35 declaration and calling syntax.
36
37 > But pointers to member functions are nowhere near as common as arrays.
38
39 Very true. If you're implementing an object oriented language, however,
40 remember that you have to do all the pointer to member function stuff
41 yourself.... so everytime you invoke a virtual method one is involved
42 (instead of having C++ hide it for you behind "syntactic sugar").
43
44 > And the old array syntax:
45 > type [ int, int, ...]
46 > is just much more familiar and clear to people than anything new you
47 > introduce, no matter how logical it is.
48
49 Erm... excuse me but how is this the "old array syntax"? If you are
50 arguing for consistency with C, you should be asking for 'type int []',
51 which is significantly different than the above (beside the above
52 introduces a new operator and duplicates information
53 needlessly). Basically what I am suggesting is exactly the above without
54 the fluff. So instead of:
55
56 type [ int, int, ...]
57
58 you use:
59
60 type [ int ]
61
62 > Introducing a new syntax that may
63 > make function pointers easier but makes arrays much more difficult seems
64 > very risky to me.
65
66 This is not about function pointers. This is about consistency in the
67 type system, and consistency with the rest of the language. The point
68 above does not make arrays any more difficult to use, and makes the
69 structure of types much more obvious than the "c way".
70
71 > > In my opinion, it is critically important to have clear and concise type
72 > > specifications, because types are going to be all over the programs.
73 >
74 > I absolutely agree. But the question is, what is more clear and concise?
75 > The syntax programmers are used to out of years of experience or a new
76 > syntax that they have never seen that has a more logical structure. I think
77 > the answer is the former. Sometimes, you have to give up a better idea
78 > because you can't overcome sociological barriers to it. Qwerty keyboards
79 > and Windows are two classic examples of bad technology that are difficult to
80 > root out.
81
82 Very true, but you seem to be advocating a completely different Type
83 system than C has, in addition to it not offering the advantages of clear
84 structure that the system I recommended does... so you seem to not have a
85 problem with changing this, just with what I change it to. :)
86
87 -Chris
88
0 Ok, here are my comments and suggestions about the LLVM instruction set.
1 We should discuss some now, but can discuss many of them later, when we
2 revisit synchronization, type inference, and other issues.
3 (We have discussed some of the comments already.)
4
5
6 o We should consider eliminating the type annotation in cases where it is
7 essentially obvious from the instruction type, e.g., in br, it is obvious
8 that the first arg. should be a bool and the other args should be labels:
9
10 br bool , label , label
11
12 I think your point was that making all types explicit improves clarity
13 and readability. I agree to some extent, but it also comes at the cost
14 of verbosity. And when the types are obvious from people's experience
15 (e.g., in the br instruction), it doesn't seem to help as much.
16
17
18 o On reflection, I really like your idea of having the two different switch
19 types (even though they encode implementation techniques rather than
20 semantics). It should simplify building the CFG and my guess is it could
21 enable some significant optimizations, though we should think about which.
22
23
24 o In the lookup-indirect form of the switch, is there a reason not to make
25 the val-type uint? Most HLL switch statements (including Java and C++)
26 require that anyway. And it would also make the val-type uniform
27 in the two forms of the switch.
28
29 I did see the switch-on-bool examples and, while cute, we can just use
30 the branch instructions in that particular case.
31
32
33 o I agree with your comment that we don't need 'neg'.
34
35
36 o There's a trade-off with the cast instruction:
37 + it avoids having to define all the upcasts and downcasts that are
38 valid for the operands of each instruction (you probably have thought
39 of other benefits also)
40 - it could make the bytecode significantly larger because there could
41 be a lot of cast operations
42
43
44 o Making the second arg. to 'shl' a ubyte seems good enough to me.
45 255 positions seems adequate for several generations of machines
46 and is more compact than uint.
47
48
49 o I still have some major concerns about including malloc and free in the
50 language (either as builtin functions or instructions). LLVM must be
51 able to represent code from many different languages. Languages such as
52 C, C++ Java and Fortran 90 would not be able to use our malloc anyway
53 because each of them will want to provide a library implementation of it.
54
55 This gets even worse when code from different languages is linked
56 into a single executable (which is fairly common in large apps).
57 Having a single malloc would just not suffice, and instead would simply
58 complicate the picture further because it adds an extra variant in
59 addition to the one each language provides.
60
61 Instead, providing a default library version of malloc and free
62 (and perhaps a malloc_gc with garbage collection instead of free)
63 would make a good implementation available to anyone who wants it.
64
65 I don't recall all your arguments in favor so let's discuss this again,
66 and soon.
67
68
69 o 'alloca' on the other hand sounds like a good idea, and the
70 implementation seems fairly language-independent so it doesn't have the
71 problems with malloc listed above.
72
73
74 o About indirect call:
75 Your option #2 sounded good to me. I'm not sure I understand your
76 concern about an explicit 'icall' instruction?
77
78
79 o A pair of important synchronization instr'ns to think about:
80 load-linked
81 store-conditional
82
83
84 o Other classes of instructions that are valuable for pipeline performance:
85 conditional-move
86 predicated instructions
87
88
89 o I believe tail calls are relatively easy to identify; do you know why
90 .NET has a tailcall instruction?
91
92
93 o I agree that we need a static data space. Otherwise, emulating global
94 data gets unnecessarily complex.
95
96
97 o About explicit parallelism:
98
99 We once talked about adding a symbolic thread-id field to each
100 instruction. (It could be optional so single-threaded codes are
101 not penalized.) This could map well to multi-threaded architectures
102 while providing easy ILP for single-threaded onces. But it is probably
103 too radical an idea to include in a base version of LLVM. Instead, it
104 could a great topic for a separate study.
105
106 What is the semantics of the IA64 stop bit?
107
108
109
110
111 o And finally, another thought about the syntax for arrays :-)
112
113 Although this syntax:
114 array of
115 is verbose, it will be used only in the human-readable assembly code so
116 size should not matter. I think we should consider it because I find it
117 to be the clearest syntax. It could even make arrays of function
118 pointers somewhat readable.
119
0 From: Chris Lattner
1 To: "Vikram S. Adve"
2 Subject: Re: LLVM Feedback
3
4 I've included your feedback in the /home/vadve/lattner/llvm/docs directory
5 so that it will live in CVS eventually with the rest of LLVM. I've
6 significantly updated the documentation to reflect the changes you
7 suggested, as specified below:
8
9 > We should consider eliminating the type annotation in cases where it is
10 > essentially obvious from the instruction type:
11 > br bool , label , label
12 > I think your point was that making all types explicit improves clarity
13 > and readability. I agree to some extent, but it also comes at the
14 > cost of verbosity. And when the types are obvious from people's
15 > experience (e.g., in the br instruction), it doesn't seem to help as
16 > much.
17
18 Very true. We should discuss this more, but my reasoning is more of a
19 consistency argument. There are VERY few instructions that can have all
20 of the types eliminated, and doing so when available unnecesarily makes
21 the language more difficult to handle. Especially when you see 'int
22 %this' and 'bool %that' all over the place, I think it would be
23 disorienting to see:
24
25 br %predicate, %iftrue, %iffalse
26
27 for branches. Even just typing that once gives me the creeps. ;) Like I
28 said, we should probably discuss this further in person...
29
30 > On reflection, I really like your idea of having the two different
31 > switch types (even though they encode implementation techniques rather
32 > than semantics). It should simplify building the CFG and my guess is it
33 > could enable some significant optimizations, though we should think
34 > about which.
35
36 Great. I added a note to the switch section commenting on how the VM
37 should just use the instruction type as a hint, and that the
38 implementation may choose altermate representations (such as predicated
39 branches).
40
41 > In the lookup-indirect form of the switch, is there a reason not to
42 > make the val-type uint?
43
44 No. This was something I was debating for a while, and didn't really feel
45 strongly about either way. It is common to switch on other types in HLL's
46 (for example signed int's are particually common), but in this case, all
47 that will be added is an additional 'cast' instruction. I removed that
48 from the spec.
49
50 > I agree with your comment that we don't need 'neg'
51
52 Removed.
53
54 > There's a trade-off with the cast instruction:
55 > + it avoids having to define all the upcasts and downcasts that are
56 > valid for the operands of each instruction (you probably have
57 > thought of other benefits also)
58 > - it could make the bytecode significantly larger because there could
59 > be a lot of cast operations
60
61 + You NEED casts to represent things like:
62 void foo(float);
63 ...
64 int x;
65 ...
66 foo(x);
67 in a language like C. Even in a Java like language, you need upcasts
68 and some way to implement dynamic downcasts.
69 + Not all forms of instructions take every type (for example you can't
70 shift by a floating point number of bits), thus SOME programs will need
71 implicit casts.
72
73 To be efficient and to avoid your '-' point above, we just have to be
74 careful to specify that the instructions shall operate on all common
75 types, therefore casting should be relatively uncommon. For example all
76 of the arithmetic operations work on almost all data types.
77
78 > Making the second arg. to 'shl' a ubyte seems good enough to me.
79 > 255 positions seems adequate for several generations of machines
80
81 Okay, that comment is removed.
82
83 > and is more compact than uint.
84
85 No, it isn't. Remember that the bytecode encoding saves value slots into
86 the bytecode instructions themselves, not constant values. This is
87 another case where we may introduce more cast instructions (but we will
88 also reduce the number of opcode variants that must be supported by a
89 virtual machine). Because most shifts are by constant values, I don't
90 think that we'll have to cast many shifts. :)
91
92 > I still have some major concerns about including malloc and free in the
93 > language (either as builtin functions or instructions).
94
95 Agreed. How about this proposal:
96
97 malloc/free are either built in functions or actual opcodes. They provide
98 all of the type safety that the document would indicate, blah blah
99 blah. :)
100
101 Now, because of all of the excellent points that you raised, an
102 implementation may want to override the default malloc/free behavior of
103 the program. To do this, they simply implement a "malloc" and
104 "free" function. The virtual machine will then be defined to use the user
105 defined malloc/free function (which return/take void*'s, not type'd
106 pointers like the builtin function would) if one is available, otherwise
107 fall back on a system malloc/free.
108
109 Does this sound like a good compromise? It would give us all of the
110 typesafety/elegance in the language while still allowing the user to do
111 all the cool stuff they want to...
112
113 > 'alloca' on the other hand sounds like a good idea, and the
114 > implementation seems fairly language-independent so it doesn't have the
115 > problems with malloc listed above.
116
117 Okay, once we get the above stuff figured out, I'll put it all in the
118 spec.
119
120 > About indirect call:
121 > Your option #2 sounded good to me. I'm not sure I understand your
122 > concern about an explicit 'icall' instruction?
123
124 I worry too much. :) The other alternative has been removed. 'icall' is
125 now up in the instruction list next to 'call'.
126
127 > I believe tail calls are relatively easy to identify; do you know why
128 > .NET has a tailcall instruction?
129
130 Although I am just guessing, I believe it probably has to do with the fact
131 that they want languages like Haskell and lisp to be efficiently runnable
132 on their VM. Of course this means that the VM MUST implement tail calls
133 'correctly', or else life will suck. :) I would put this into a future
134 feature bin, because it could be pretty handy...
135
136 > A pair of important synchronization instr'ns to think about:
137 > load-linked
138 > store-conditional
139
140 What is 'load-linked'? I think that (at least for now) I should add these
141 to the 'possible extensions' section, because they are not immediately
142 needed...
143
144 > Other classes of instructions that are valuable for pipeline
145 > performance:
146 > conditional-move
147 > predicated instructions
148
149 Conditional move is effectly a special case of a predicated
150 instruction... and I think that all predicated instructions can possibly
151 be implemented later in LLVM. It would significantly change things, and
152 it doesn't seem to be very neccesary right now. It would seem to
153 complicate flow control analysis a LOT in the virtual machine. I would
154 tend to prefer that a predicated architecture like IA64 convert from a
155 "basic block" representation to a predicated rep as part of it's dynamic
156 complication phase. Also, if a basic block contains ONLY a move, then
157 that can be trivally translated into a conditional move...
158
159 > I agree that we need a static data space. Otherwise, emulating global
160 > data gets unnecessarily complex.
161
162 Definately. Also a later item though. :)
163
164 > We once talked about adding a symbolic thread-id field to each
165 > ..
166 > Instead, it could a great topic for a separate study.
167
168 Agreed. :)
169
170 > What is the semantics of the IA64 stop bit?
171
172 Basically, the IA64 writes instructions like this:
173 mov ...
174 add ...
175 sub ...
176 op xxx
177 op xxx
178 ;;
179 mov ...
180 add ...
181 sub ...
182 op xxx
183 op xxx
184 ;;
185
186 Where the ;; delimits a group of instruction with no dependencies between
187 them, which can all be executed concurrently (to the limits of the
188 available functional units). The ;; gets translated into a bit set in one
189 of the opcodes.
190
191 The advantages of this representation is that you don't have to do some
192 kind of 'thread id scheduling' pass by having to specify ahead of time how
193 many threads to use, and the representation doesn't have a per instruction
194 overhead...
195
196 > And finally, another thought about the syntax for arrays :-)
197 > Although this syntax:
198 > array of
199 > is verbose, it will be used only in the human-readable assembly code so
200 > size should not matter. I think we should consider it because I find it
201 > to be the clearest syntax. It could even make arrays of function
202 > pointers somewhat readable.
203
204 My only comment will be to give you an example of why this is a bad
205 idea. :)
206
207 Here is an example of using the switch statement (with my recommended
208 syntax):
209
210 switch uint %val, label %otherwise,
211 [%3 x {uint, label}] [ { uint %57, label %l1 },
212 { uint %20, label %l2 },
213 { uint %14, label %l3 } ]
214
215 Here it is with the syntax you are proposing:
216
217 switch uint %val, label %otherwise,
218 array %3 of {uint, label}
219 array of {uint, label}
220 { uint %57, label %l1 },
221 { uint %20, label %l2 },
222 { uint %14, label %l3 }
223
224 Which is ambiguous and very verbose. It would be possible to specify
225 constants with [] brackets as in my syntax, which would look like this:
226
227 switch uint %val, label %otherwise,
228 array %3 of {uint, label} [ { uint %57, label %l1 },
229 { uint %20, label %l2 },
230 { uint %14, label %l3 } ]
231
232 But then the syntax is inconsistent between type definition and constant
233 definition (why do []'s enclose the constants but not the types??).
234
235 Anyways, I'm sure that there is much debate still to be had over
236 this... :)
237
238 -Chris
239
240 http://www.nondot.org/~sabre/os/
241 http://www.nondot.org/MagicStats/
242 http://korbit.sourceforge.net/
243
244
0 Date: Tue, 13 Feb 2001 13:29:52 -0600 (CST)
1 From: Chris Lattner
2 To: Vikram S. Adve
3 Subject: LLVM Concerns...
4
5
6 I've updated the documentation to include load store and allocation
7 instructions (please take a look and let me know if I'm on the right
8 track):
9
10 file:/home/vadve/lattner/llvm/docs/LangRef.html#memoryops
11
12 I have a couple of concerns I would like to bring up:
13
14 1. Reference types
15 Right now, I've spec'd out the language to have a pointer type, which
16 works fine for lots of stuff... except that Java really has
17 references: constrained pointers that cannot be manipulated: added and
18 subtracted, moved, etc... Do we want to have a type like this? It
19 could be very nice for analysis (pointer always points to the start of
20 an object, etc...) and more closely matches Java semantics. The
21 pointer type would be kept for C++ like semantics. Through analysis,
22 C++ pointers could be promoted to references in the LLVM
23 representation.
24
25 2. Our "implicit" memory references in assembly language:
26 After thinking about it, this model has two problems:
27 A. If you do pointer analysis and realize that two stores are
28 independent and can share the same memory source object, there is
29 no way to represent this in either the bytecode or assembly.
30 B. When parsing assembly/bytecode, we effectively have to do a full
31 SSA generation/PHI node insertion pass to build the dependencies
32 when we don't want the "pinned" representation. This is not
33 cool.
34 I'm tempted to make memory references explicit in both the assembly and
35 bytecode to get around this... what do you think?
36
37 -Chris
38
0 Date: Tue, 13 Feb 2001 18:25:42 -0600
1 From: Vikram S. Adve
2 To: Chris Lattner
3 Subject: RE: LLVM Concerns...
4
5 > 1. Reference types
6 > Right now, I've spec'd out the language to have a pointer type, which
7 > works fine for lots of stuff... except that Java really has
8 > references: constrained pointers that cannot be manipulated: added and
9 > subtracted, moved, etc... Do we want to have a type like this? It
10 > could be very nice for analysis (pointer always points to the start of
11 > an object, etc...) and more closely matches Java semantics. The
12 > pointer type would be kept for C++ like semantics. Through analysis,
13 > C++ pointers could be promoted to references in the LLVM
14 > representation.
15
16
17 You're right, having references would be useful. Even for C++ the *static*
18 compiler could generate references instead of pointers with fairly
19 straightforward analysis. Let's include a reference type for now. But I'm
20 also really concerned that LLVM is becoming big and complex and (perhaps)
21 too high-level. After we get some initial performance results, we may have
22 a clearer idea of what our goals should be and we should revisit this
23 question then.
24
25 > 2. Our "implicit" memory references in assembly language:
26 > After thinking about it, this model has two problems:
27 > A. If you do pointer analysis and realize that two stores are
28 > independent and can share the same memory source object,
29
30 not sure what you meant by "share the same memory source object"
31
32 > there is
33 > no way to represent this in either the bytecode or assembly.
34 > B. When parsing assembly/bytecode, we effectively have to do a full
35 > SSA generation/PHI node insertion pass to build the dependencies
36 > when we don't want the "pinned" representation. This is not
37 > cool.
38
39 I understand the concern. But again, let's focus on the performance first
40 and then look at the language design issues. E.g., it would be good to know
41 how big the bytecode files are before expanding them further. I am pretty
42 keen to explore the implications of LLVM for mobile devices. Both bytecode
43 size and power consumption are important to consider there.
44
45 --Vikram
46
0 By Chris:
1
2 LLVM has been designed with two primary goals in mind. First we strive to enable the best possible division of labor between static and dynamic compilers, and second, we need a flexible and powerful interface between these two complementary stages of compilation. We feel that providing a solution to these two goals will yield an excellent solution to the performance problem faced by modern architectures and programming languages.
3
4 A key insight into current compiler and runtime systems is that a compiler may fall in anywhere in a "continuum of compilation" to do its job. On one side, scripting languages statically compile nothing and dynamically compile (or equivalently, interpret) everything. On the far other side, traditional static compilers process everything statically and nothing dynamically. These approaches have typically been seen as a tradeoff between performance and portability. On a deeper level, however, there are two reasons that optimal system performance may be obtained by a system somewhere in between these two extremes: Dynamic application behavior and social constraints.
5
6 From a technical perspective, pure static compilation cannot ever give optimal performance in all cases, because applications have varying dynamic behavior that the static compiler cannot take into consideration. Even compilers that support profile guided optimization generate poor code in the real world, because using such optimization tunes that application to one particular usage pattern, whereas real programs (as opposed to benchmarks) often have several different usage patterns.
7
8 On a social level, static compilation is a very shortsighted solution to the performance problem. Instruction set architectures (ISAs) continuously evolve, and each implementation of an ISA (a processor) must choose a set of tradeoffs that make sense in the market context that it is designed for. With every new processor introduced, the vendor faces two fundamental problems: First, there is a lag time between when a processor is introduced to when compilers generate quality code for the architecture. Secondly, even when compilers catch up to the new architecture there is often a large body of legacy code that was compiled for previous generations and will not or can not be upgraded. Thus a large percentage of code running on a processor may be compiled quite sub-optimally for the current characteristics of the dynamic execution environment.
9
10 For these reasons, LLVM has been designed from the beginning as a long-term solution to these problems. Its design allows the large body of platform independent, static, program optimizations currently in compilers to be reused unchanged in their current form. It also provides important static type information to enable powerful dynamic and link time optimizations to be performed quickly and efficiently. This combination enables an increase in effective system performance for real world environments.
11
0 Meeting notes: Implementation idea: Exception Handling in C++/Java
1
2 The 5/18/01 meeting discussed ideas for implementing exceptions in LLVM.
3 We decided that the best solution requires a set of library calls provided by
4 the VM, as well as an extension to the LLVM function invocation syntax.
5
6 The LLVM function invocation instruction previously looks like this (ignoring
7 types):
8
9 call func(arg1, arg2, arg3)
10
11 The extension discussed today adds an optional "with" clause that
12 associates a label with the call site. The new syntax looks like this:
13
14 call func(arg1, arg2, arg3) with funcCleanup
15
16 This funcHandler always stays tightly associated with the call site (being
17 encoded directly into the call opcode itself), and should be used whenever
18 there is cleanup work that needs to be done for the current function if
19 an exception is thrown by func (or if we are in a try block).
20
21 To support this, the VM/Runtime provide the following simple library
22 functions (all syntax in this document is very abstract):
23
24 typedef struct { something } %frame;
25 The VM must export a "frame type", that is an opaque structure used to
26 implement different types of stack walking that may be used by various
27 language runtime libraries. We imagine that it would be typical to
28 represent a frame with a PC and frame pointer pair, although that is not
29 required.
30
31 %frame getStackCurrentFrame();
32 Get a frame object for the current function. Note that if the current
33 function was inlined into its caller, the "current" frame will belong to
34 the "caller".
35
36 bool isFirstFrame(%frame f);
37 Returns true if the specified frame is the top level (first activated) frame
38 for this thread. For the main thread, this corresponds to the main()
39 function, for a spawned thread, it corresponds to the thread function.
40
41 %frame getNextFrame(%frame f);
42 Return the previous frame on the stack. This function is undefined if f
43 satisfies the predicate isFirstFrame(f).
44
45 Label *getFrameLabel(%frame f);
46 If a label was associated with f (as discussed below), this function returns
47 it. Otherwise, it returns a null pointer.
48
49 doNonLocalBranch(Label *L);
50 At this point, it is not clear whether this should be a function or
51 intrinsic. It should probably be an intrinsic in LLVM, but we'll deal with
52 this issue later.
53
54
55 Here is a motivating example that illustrates how these facilities could be
56 used to implement the C++ exception model:
57
58 void TestFunction(...) {
59 A a; B b;
60 foo(); // Any function call may throw
61 bar();
62 C c;
63
64 try {
65 D d;
66 baz();
67 } catch (int) {
68 ...int Stuff...
69 // execution continues after the try block: the exception is consumed
70 } catch (double) {
71 ...double stuff...
72 throw; // Exception is propogated
73 }
74 }
75
76 This function would compile to approximately the following code (heavy
77 pseudo code follows):
78
79 Func:
80 %a = alloca A
81 A::A(%a) // These ctors & dtors could throw, but we ignore this
82 %b = alloca B // minor detail for this example
83 B::B(%b)
84
85 call foo() with fooCleanup // An exception in foo is propogated to fooCleanup
86 call bar() with barCleanup // An exception in bar is propogated to barCleanup
87
88 %c = alloca C
89 C::C(c)
90 %d = alloca D
91 D::D(d)
92 call baz() with bazCleanup // An exception in baz is propogated to bazCleanup
93 d->~D();
94 EndTry: // This label corresponds to the end of the try block
95 c->~C() // These could also throw, these are also ignored
96 b->~B()
97 a->~A()
98 return
99
100 Note that this is a very straight forward and literal translation: exactly
101 what we want for zero cost (when unused) exception handling. Especially on
102 platforms with many registers (ie, the IA64) setjmp/longjmp style exception
103 handling is *very* impractical. Also, the "with" clauses describe the
104 control flow paths explicitly so that analysis is not adversly effected.
105
106 The foo/barCleanup labels are implemented as:
107
108 TryCleanup: // Executed if an exception escapes the try block
109 c->~C()
110 barCleanup: // Executed if an exception escapes from bar()
111 // fall through
112 fooCleanup: // Executed if an exception escapes from foo()
113 b->~B()
114 a->~A()
115 Exception *E = getThreadLocalException()
116 call throw(E) // Implemented by the C++ runtime, described below
117
118 Which does the work one would expect. getThreadLocalException is a function
119 implemented by the C++ support library. It returns the current exception
120 object for the current thread. Note that we do not attempt to recycle the
121 shutdown code from before, because performance of the mainline code is
122 critically important. Also, obviously fooCleanup and barCleanup may be
123 merged and one of them eliminated. This just shows how the code generator
124 would most likely emit code.
125
126 The bazCleanup label is more interesting. Because the exception may be caught
127 by the try block, we must dispatch to its handler... but it does not exist
128 on the call stack (it does not have a VM Call->Label mapping installed), so
129 we must dispatch statically with a goto. The bazHandler thus appears as:
130
131 bazHandler:
132 d->~D(); // destruct D as it goes out of scope when entering catch clauses
133 goto TryHandler
134
135 In general, TryHandler is not the same as bazHandler, because multiple
136 function calls could be made from the try block. In this case, trivial
137 optimization could merge the two basic blocks. TryHandler is the code
138 that actually determines the type of exception, based on the Exception object
139 itself. For this discussion, assume that the exception object contains *at
140 least*:
141
142 1. A pointer to the RTTI info for the contained object
143 2. A pointer to the dtor for the contained object
144 3. The contained object itself
145
146 Note that it is neccesary to maintain #1 & #2 in the exception object itself
147 because objects without virtual function tables may be thrown (as in this
148 example). Assuming this, TryHandler would look something like this:
149
150 TryHandler:
151 Exception *E = getThreadLocalException();
152 switch (E->RTTIType) {
153 case IntRTTIInfo:
154 ...int Stuff... // The action to perform from the catch block
155 break;
156 case DoubleRTTIInfo:
157 ...double Stuff... // The action to perform from the catch block
158 goto TryCleanup // This catch block rethrows the exception
159 break; // Redundant, eliminated by the optimizer
160 default:
161 goto TryCleanup // Exception not caught, rethrow
162 }
163
164 // Exception was consumed
165 if (E->dtor)
166 E->dtor(E->object) // Invoke the dtor on the object if it exists
167 goto EndTry // Continue mainline code...
168
169 And that is all there is to it.
170
171 The throw(E) function would then be implemented like this (which may be
172 inlined into the caller through standard optimization):
173
174 function throw(Exception *E) {
175 // Get the start of the stack trace...
176 %frame %f = call getStackCurrentFrame()
177
178 // Get the label information that corresponds to it
179 label * %L = call getFrameLabel(%f)
180 while (%L == 0 && !isFirstFrame(%f)) {
181 // Loop until a cleanup handler is found
182 %f = call getNextFrame(%f)
183 %L = call getFrameLabel(%f)
184 }
185
186 if (%L != 0) {
187 call setThreadLocalException(E) // Allow handlers access to this...
188 call doNonLocalBranch(%L)
189 }
190 // No handler found!
191 call BlowUp() // Ends up calling the terminate() method in use
192 }
193
194 That's a brief rundown of how C++ exception handling could be implemented in
195 llvm. Java would be very similar, except it only uses destructors to unlock
196 synchronized blocks, not to destroy data. Also, it uses two stack walks: a
197 nondestructive walk that builds a stack trace, then a destructive walk that
198 unwinds the stack as shown here.
199
200 It would be trivial to get exception interoperability between C++ and Java.
201
0 Date: Sat, 19 May 2001 19:09:13 -0500 (CDT)
1 From: Chris Lattner
2 To: Vikram S. Adve
3 Subject: RE: Meeting writeup
4
5 > I read it through and it looks great!
6
7 Thanks!
8
9 > The finally clause in Java may need more thought. The code for this clause
10 > is like a subroutine because it needs to be entered from many points (end of
11 > try block and beginning of each catch block), and then needs to *return to
12 > the place from where the code was entered*. That's why JVM has the
13 > jsr/jsr_w instruction.
14
15 Hrm... I guess that is an implementation decision. It can either be
16 modelled as a subroutine (as java bytecodes do), which is really
17 gross... or it can be modelled as code duplication (emitted once inline,
18 then once in the exception path). Because this could, at worst,
19 slightly less than double the amount of code in a function (it is
20 bounded) I don't think this is a big deal. One of the really nice things
21 about the LLVM representation is that it still allows for runtime code
22 generation for exception paths (exceptions paths are not compiled until
23 needed). Obviously a static compiler couldn't do this though. :)
24
25 In this case, only one copy of the code would be compiled... until the
26 other one is needed on demand. Also this strategy fits with the "zero
27 cost" exception model... the standard case is not burdened with extra
28 branches or "call"s.
29
30 > I suppose you could save the return address in a particular register
31 > (specific to this finally block), jump to the finally block, and then at the
32 > end of the finally block, jump back indirectly through this register. It
33 > will complicate building the CFG but I suppose that can be handled. It is
34 > also unsafe in terms of checking where control returns (which is I suppose
35 > why the JVM doesn't use this).
36
37 I think that a code duplication method would be cleaner, and would avoid
38 the caveats that you mention. Also, it does not slow down the normal case
39 with an indirect branch...
40
41 Like everything, we can probably defer a final decision until later. :)
42
43 -Chris
44
0 Date: Fri, 1 Jun 2001 16:38:17 -0500 (CDT)
1 From: Chris Lattner
2 To: Vikram S. Adve
3 Subject: Interesting: GCC passes
4
5
6 Take a look at this document (which describes the order of optimizations
7 that GCC performs):
8
9 http://gcc.gnu.org/onlinedocs/gcc_17.html
10
11 The rundown is that after RTL generation, the following happens:
12
13 1 . [t] jump optimization (jumps to jumps, etc)
14 2 . [t] Delete unreachable code
15 3 . Compute live ranges for CSE
16 4 . [t] Jump threading (jumps to jumps with identical or inverse conditions)
17 5 . [t] CSE
18 6 . *** Conversion to SSA
19 7 . [t] SSA Based DCE
20 8 . *** Conversion to LLVM
21 9 . UnSSA
22 10. GCSE
23 11. LICM
24 12. Strength Reduction
25 13. Loop unrolling
26 14. [t] CSE
27 15. [t] DCE
28 16. Instruction combination, register movement, scheduling... etc.
29
30 I've marked optimizations with a [t] to indicate things that I believe to
31 be relatively trivial to implement in LLVM itself. The time consuming
32 things to reimplement would be SSA based PRE, Strength reduction & loop
33 unrolling... these would be the major things we would miss out on if we
34 did LLVM creation from tree code [inlining and other high level
35 optimizations are done on the tree representation].
36
37 Given the lack of "strong" optimizations that would take a long time to
38 reimplement, I am leaning a bit more towards creating LLVM from the tree
39 code. Especially given that SGI has GPL'd their compiler, including many
40 SSA based optimizations that could be adapted (besides the fact that their
41 code looks MUCH nicer than GCC :)
42
43 Even if we choose to do LLVM code emission from RTL, we will almost
44 certainly want to move LLVM emission from step 8 down until at least CSE
45 has been rerun... which causes me to wonder if the SSA generation code
46 will still work (due to global variable dependancies and stuff). I assume
47 that it can be made to work, but might be a little more involved than we
48 would like.
49
50 I'm continuing to look at the Tree -> RTL code. It is pretty gross
51 because they do some of the translation a statement at a time, and some
52 of it a function at a time... I'm not quite clear why and how the
53 distinction is drawn, but it does not appear that there is a wonderful
54 place to attach extra info.
55
56 Anyways, I'm proceeding with the RTL -> LLVM conversion phase for now. We
57 can talk about this more on Monday.
58
59 Wouldn't it be nice if there were a obvious decision to be made? :)
60
61 -Chris
62
0 Date: Fri, 1 Jun 2001 17:08:44 -0500 (CDT)
1 From: Chris Lattner
2 To: Vikram S. Adve
3 Subject: RE: Interesting: GCC passes
4
5 > That is very interesting. I agree that some of these could be done on LLVM
6 > at link-time, but it is the extra time required that concerns me. Link-time
7 > optimization is severely time-constrained.
8
9 If we were to reimplement any of these optimizations, I assume that we
10 could do them a translation unit at a time, just as GCC does now. This
11 would lead to a pipeline like this:
12
13 Static optimizations, xlation unit at a time:
14 .c --GCC--> .llvm --llvmopt--> .llvm
15
16 Link time optimizations:
17 .llvm --llvm-ld--> .llvm --llvm-link-opt--> .llvm
18
19 Of course, many optimizations could be shared between llvmopt and
20 llvm-link-opt, but the wouldn't need to be shared... Thus compile time
21 could be faster, because we are using a "smarter" IR (SSA based).
22
23 > BTW, about SGI, "borrowing" SSA-based optimizations from one compiler and
24 > putting it into another is not necessarily easier than re-doing it.
25 > Optimization code is usually heavily tied in to the specific IR they use.
26
27 Understood. The only reason that I brought this up is because SGI's IR is
28 more similar to LLVM than it is different in many respects (SSA based,
29 relatively low level, etc), and could be easily adapted. Also their
30 optimizations are written in C++ and are actually somewhat
31 structured... of course it would be no walk in the park, but it would be
32 much less time consuming to adapt, say, SSA-PRE than to rewrite it.
33
34 > But your larger point is valid that adding SSA based optimizations is
35 > feasible and should be fun. (Again, link time cost is the issue.)
36
37 Assuming linktime cost wasn't an issue, the question is:
38 Does using GCC's backend buy us anything?
39
40 > It also occurs to me that GCC is probably doing quite a bit of back-end
41 > optimization (step 16 in your list). Do you have a breakdown of that?
42
43 Not really. The irritating part of GCC is that it mixes it all up and
44 doesn't have a clean seperation of concerns. A lot of the "back end
45 optimization" happens right along with other data optimizations (ie, CSE
46 of machine specific things).
47
48 As far as REAL back end optimizations go, it looks something like this:
49
50 1. Instruction combination: try to make CISCy instructions, if available
51 2. Register movement: try to get registers in the right places for the
52 architecture to avoid register to register moves. For example, try to get
53 the first argument of a function to naturally land in %o0 for sparc.
54 3. Instruction scheduling: 'nuff said :)
55 4. Register class preferencing: ??
56 5. Local register allocation
57 6. global register allocation
58 7. Spilling
59 8. Local regalloc
60 9. Jump optimization
61 10. Delay slot scheduling
62 11. Branch shorting for CISC machines
63 12. Instruction selection & peephole optimization
64 13. Debug info output
65
66 But none of this would be usable for LLVM anyways, unless we were using
67 GCC as a static compiler.
68
69 -Chris
70
0
1 llvm Assembly Language Reference Manual
2
3
4
5
  llvm Assembly Language Reference Manual
6
7
8
9
  • Abstract
  • 10
  • Introduction
  • 11
  • Identifiers
  • 12
  • Type System
  • 13
    14
  • Primitive Types
  • 15
    16
  • Type Classifications
  • 17
    18
  • Derived Types
  • 19
    20
  • Array Type
  • 21
  • Method Type
  • 22
  • Pointer Type
  • 23
  • Structure Type
  • 24
  • Packed Type
  • 25
    26
    27
  • High Level Structure
  • 28
    29
  • Module Structure
  • 30
  • Method Structure
  • 31
    32
  • Instruction Reference
  • 33
    34
  • Terminator Instructions
  • 35
    36
  • 'ret' Instruction
  • 37
  • 'br' Instruction
  • 38
  • 'switch' Instruction
  • 39
  • 'call .. with' Instruction
  • 40
    41
  • Unary Operations
  • 42
    43
  • 'not' Instruction
  • 44
  • 'cast .. to' Instruction
  • 45
    46
  • Binary Operations
  • 47
    48
  • 'add' Instruction
  • 49
  • 'sub' Instruction
  • 50
  • 'mul' Instruction
  • 51
  • 'div' Instruction
  • 52
  • 'rem' Instruction
  • 53
  • 'setcc' Instructions
  • 54
    55
  • Bitwise Binary Operations
  • 56
    57
  • 'and' Instruction
  • 58
  • 'or' Instruction
  • 59
  • 'xor' Instruction
  • 60
  • 'shl' Instruction
  • 61
  • 'shr' Instruction
  • 62
    63
  • Memory Access Operations
  • 64
    65
  • 'malloc' Instruction
  • 66
  • 'free' Instruction
  • 67
  • 'alloca' Instruction
  • 68
  • 'load' Instruction
  • 69
  • 'store' Instruction
  • 70
  • 'getfieldptr' Instruction
  • 71
    72
  • Other Operations
  • 73
    74
  • 'call' Instruction
  • 75
  • 'icall' Instruction
  • 76
  • 'phi' Instruction
  • 77
    78
  • Builtin Functions
  • 79
    80
  • TODO List
  • 81
    82
  • Exception Handling Instructions
  • 83
  • Synchronization Instructions
  • 84
    85
  • Possible Extensions
  • 86
    87
  • 'tailcall' Instruction
  • 88
  • Global Variables
  • 89
  • Explicit Parrellelism
  • 90
    91
  • Related Work
  • 92
    93
    94
    95
    96

    97 Abstract
    98
    99
    100
    101
    102 This document describes the LLVM assembly language IR/VM. LLVM is an SSA
    103 based representation that attempts to be a useful midlevel IR by providing
    104 type safety, low level operations, flexibility, and the capability to
    105 represent 'all' high level languages cleanly.
    106
    107
    108
    109
    110
    111
    112
    113 Introduction
    114
    115
    116
    117 The LLVM is designed to exhibit a dual nature: on one hand, it is a useful compiler IR, on the other hand, it is a bytecode representation for dynamic compilation. We contend that this is a natural and good thing, making LLVM a natural form of communication between different compiler phases, and also between a static and dynamic compiler.

    118
    119 This dual nature leads to three different representations of LLVM (the human readable assembly representation, the compact bytecode representation, and the in memory, pointer based, representation). This document describes the human readable representation and notation.

    120
    121 The LLVM representation aims to be a light weight and low level while being expressive, type safe, and extensible at the same time. It aims to be a "universal IR" of sorts, by being at a low enough level that high level ideas may be cleanly mapped to it. By providing type safety, LLVM can be used as the target of optimizations: for example, through pointer analysis, it can be proven that a C automatic variable is never accessed outside of the current function... allowing it to be promoted to a simple SSA value instead of a memory location.

    122
    123
    124


    Well Formedness

    125
    126 It is important to note that this document describes 'well formed' llvm assembly language. There is a difference between what the parser accepts and what is considered 'well formed'. For example, the following instruction is syntactically okay, but not well formed:

    127
    128
    
                      
                    
    129 %x = add int 1, %x
    130
    131
    132 ...because only a phi node may refer to itself. The LLVM api provides a verification function (verify) that may be used to verify that a whole module or a single method is well formed. It is useful to validate whether an optimization pass performed a well formed transformation to the code.

    133
    134
    135 Describe the typesetting conventions here.
    136
    137
    138
    139
    140 Identifiers
    141
    142
    143
    144 LLVM uses three different forms of identifiers, for different purposes:

    145
    146
    147
  • Numeric constants are represented as you would expect: 12, -3 123.421, etc.
  • 148
  • Named values are represented as a string of characters with a '%' prefix. For example, %foo, %DivisionByZero, %a.really.long.identifier. The actual regular expression used is '%[a-zA-Z$._][a-zA-Z$._0-9]*'.
  • 149
  • Unnamed values are represented as an unsigned numeric value with a '%' prefix. For example, %12, %2, %44.
  • 150

    151
    152 LLVM requires the values start with a '%' sign for two reasons: Compilers don't need to worry about name clashes with reserved words, and the set of reserved words may be expanded in the future without penalty. Additionally, unnamed identifiers allow a compiler to quickly come up with a temporary variable without having to avoid symbol table conflicts.

    153
    154 Reserved words in LLVM are very similar to reserved words in other languages. There are keywords for different opcodes ('add', 'cast', 'ret', etc...), for primitive type names ('void', 'uint', etc...), and others. These reserved words cannot conflict with variable names, because none of them may start with a '%' character.

    155
    156 Here is an example of LLVM code to multiply the integer variable '%X' by 8:

    157
    158 The easy way:
    159
    
                      
                    
    160 %result = mul int %X, 8
    161
    162
    163 After strength reduction:
    164
    
                      
                    
    165 %result = shl int %X, ubyte 3
    166
    167
    168 And the hard way:
    169
    
                      
                    
    170 add int %X, %X ; yields {int}:%0
    171 add int %0, %0 ; yields {int}:%1
    172 %result = add int %1, %1
    173
    174
    175 This last way of multiplying %X by 8 illustrates several important lexical features of LLVM:

    176
    177
    178
  • Comments are delimited with a ';' and go until the end of line.
  • 179
  • Unnamed temporaries are created when the result of a computation is not assigned to a named value.
  • 180
  • Unnamed temporaries are numbered sequentially
  • 181

    182
    183 ...and it also show a convention that we follow in this document. When demonstrating instructions, we will follow an instruction with a comment that defines the type and name of value produced. Comments are shown in italic text.

    184
    185
    186
    187
    188
    189 Type System
    190
    191
    192
    193 The LLVM type system is important to the overall usefulness of the language and VM runtime. By being strongly typed, a number of optimizations may be performed on the IR directly, without having to do extra analysis to derive types. A strong type system also makes it easier to comprehend generated code and assists with safety concerns.

    194
    195 The assembly language form for the type system was heavily influenced by the type problems in the C language1.

    196
    197
    198
    199
    200
      
    201 Primitive Types
    202
    203
    204 The primitive types are the fundemental building blocks of the LLVM system. The current set of primitive types are as follows:

    205
    206
    207
    208
    209
    void No value
    210
    ubyte Unsigned 8 bit value
    211
    ushortUnsigned 16 bit value
    212
    uint Unsigned 32 bit value
    213
    ulong Unsigned 64 bit value
    214
    float 32 bit floating point value
    215
    label Branch destination
    216
    217
    218
    219
    220
    221
    bool True or False value
    222
    sbyte Signed 8 bit value
    223
    short Signed 16 bit value
    224
    int Signed 32 bit value
    225
    long Signed 64 bit value
    226
    double64 bit floating point value
    227
    lock Recursive mutex value
    228
    229
    230

    231
    232
    233
    234
    235


    Type Classifications

    236
    237 These different primitive types fall into a few useful classifications:

    238
    239
    240
    signed sbyte, short, int, long, float, double
    241
    unsignedubyte, ushort, uint, ulong
    242
    integralubyte, sbyte, ushort, short, uint, int, ulong, long
    243
    floating pointfloat, double
    244
    first classbool, ubyte, sbyte, ushort, short, uint, int, ulong, long, float, double, lock
    245

    246
    247
    248
    249
    250
    251
    252
      
    253 Derived Types
    254
    255
    256 The real power in LLVM comes from the derived types in the system. This is what allows a programmer to represent arrays, methods, pointers, and other useful types. Note that these derived types may be recursive: For example, it is possible to have a two dimensional array.

    257
    258
    259
    260
    261


    Array Type

    262
    263
    Overview:
    264
    265 The array type is a very simple derived type. It arranges elements sequentially in memory. There are two different forms of the array type:

    266
    267
    268
  • Fixed size array type:
  • 269 The simplest form of the array type, has a size hard coded in as part of the type. Thus these are three distinct type qualifiers:

    270
    271 [40 x int ]: Array of 40 integer values.
    272 [41 x int ]: Array of 41 integer values.
    273 [40 x uint]: Array of 40 unsigned integer values.

    274
    275 Fixed sized arrays are very useful for compiler optimization passes and for representing analysis results. Additionally, multidimensional arrays must have fixed sizes for all dimensions except the outer-most dimension.

    276
    277
  • Dynamically sized array type:
  • 278 The dynamically sized arrays are very similar to the fixed size arrays, except that the size of the array is calculated at runtime by the virtual machine. This is useful for representing generic methods that take any size array as an argument, or when representing Java style arrays.
    279

    280
    281 Here are some examples of multidimensional arrays:

    282
    283
    284
    [3 x [4 x int]]: 3x4 array integer values.
    285
    [[10 x int]]: Nx10 array of integer values.
    286
    [2 x [3 x [4 x uint]]]: 2x3x4 array of unsigned integer values.
    287
    288
    289
    290
    291
    292
    293


    Method Type

    294
    295
    Overview:
    296
    297 The method type can be thought of as a method signature. It consists of a return type and a list of formal parameter types. Method types are usually used when to build virtual function tables (which are structures of pointers to methods) and for indirect method calls.

    298
    299
    Syntax:
    300
    
                      
                    
    301 <returntype> (<parameter list>)
    302
    303
    304 Where '<parameter list>' is a comma seperated list of type specifiers.

    305
    306
    Examples:
    307
    308
    309
    int (int): method taking an int, returning an int
    310
    float (int, int *) *: Pointer to a method that takes an int and a pointer to int, returning float.
    311
    312
    313
    314
    315
    316
    317


    Structure Type

    318
    319
    Overview:
    320
    321 The structure type is used to represent a collection of data members together in memory. Although the runtime is allowed to lay out the data members any way that it would like, they are guaranteed to be "close" to each other.

    322
    323 Structures are accessed using 'load and 'store' by getting a pointer to a field with the 'getfieldptr' instruction.

    324
    325
    Syntax:
    326
    
                      
                    
    327 { <type list> }
    328
    329
    330
    331
    Examples:
    332
    333
    { int, int, int }: a triple of three int values
    334
    { float, int (int *) * }: A pair, where the first element is a float and the second element is a pointer to a method that takes an int, returning an int.
    335
    336
    337
    338
    339


    Pointer Type

    340
    341
    342
    343


    Packed Type

    344
    345 Mention/decide that packed types work with saturation or not. Maybe have a packed+saturated type in addition to just a packed type.

    346
    347 Packed types should be 'nonsaturated' because standard data types are not saturated. Maybe have a saturated packed type?

    348
    349
    350
    351
    352 High Level Structure
    353
    354
    355
    356
    357
    358
      
    359 Module Structure
    360
    361
    362
    363 talk about the elements of a module: constant pool and method list.

    364
    365
    366
    367
      
    368 Method Structure
    369
    370
    371
    372 talk about the constant pool

    373 talk about how basic blocks delinate labels

    374 talk about how basic blocks end with terminators

    375
    376
    377
    378
    379 Instruction Reference
    380
    381
    382
    383 List all of the instructions, list valid types that they accept. Tell what they
    384 do and stuff also.
    385
    386
    387
      
    388 Terminator Instructions
    389
    390
    391
    392
    393 As was mentioned previously, every basic block in
    394 a program ends with a "Terminator" instruction. Additionally, all terminators yield a 'void' value: they produce control flow, not values.

    395
    396 There are three different terminator instructions: the 'ret' instruction, the 'br' instruction, and the 'switch' instruction.

    397
    398
    399
    400


    'ret' Instruction

    401
    402
    Syntax:
    403
    
                      
                    
    404 ret <type> <value> ; Return a value from a non-void method
    405 ret void ; Return from void method
    406
    407
    408
    Overview:
    409 The 'ret' instruction is used to return control flow (and optionally a value) from a method, back to the caller.

    410
    411 There are two forms of the 'ret' instructruction: one that returns a value and then causes control flow, and one that just causes control flow to occur.

    412
    413
    Arguments:
    414 The 'ret' instruction may return any 'first class' type. Notice that a method is not well formed if there exists a 'ret' instruction inside of the method that returns a value that does not match the return type of the method.

    415
    416
    Semantics:
    417 When the 'ret' instruction is executed, control flow returns back to the calling method's context. If the instruction returns a value, that value shall be propogated into the calling method's data space.

    418
    419
    Example:
    420
    
                      
                    
    421 ret int 5 ; Return an integer value of 5
    422 ret void ; Return from a void method
    423
    424
    425
    426
    427


    'br' Instruction

    428
    429
    Syntax:
    430
    
                      
                    
    431 br bool <cond>, label <iftrue>, label <iffalse>
    432 br label <dest> ; Unconditional branch
    433
    434
    435
    Overview:
    436 The 'br' instruction is used to cause control flow to transfer to a different basic block in the current method. There are two forms of this instruction, corresponding to a conditional branch and an unconditional branch. The 'br' instruction is a (useful) special case 'switch' instruction.

    437
    438
    Arguments:
    439
    440 The conditional branch form of the 'br' instruction shall take a single 'bool' value and two 'label' values. The unconditional form of the 'br' instruction takes a single 'label' value as a target.

    441
    442
    Semantics:
    443
    444 Upon execution of a conditional 'br' instruction, the 'bool' argument is evaluated. If the value is true, control flows to the 'iftrue' 'label' argument. If "cond" is false, control flows to the 'iffalse' 'label' argument.

    445
    446
    Example:
    447
    
                      
                    
    448 Test:
    449 %cond = seteq int %a, %b
    450 br bool %cond, label %IfEqual, label %IfUnequal
    451 IfEqual:
    452 ret bool true
    453 IfUnequal:
    454 ret bool false
    455
    456
    457
    458
    459


    'switch' Instruction

    460
    461
    Syntax:
    462
    
                      
                    
    463 ; Definitions for lookup indirect branch
    464 %switchtype = type [<anysize> x { uint, label }]
    465
    466 ; Lookup indirect branch
    467 switch uint <value>, label <defaultdest>, %switchtype <switchtable>
    468
    469 ; Indexed indirect branch
    470 switch uint <idxvalue>, label <defaultdest>, [<anysize> x label] <desttable>
    471
    472
    473
    Overview:
    474 The 'switch' instruction is used to transfer control flow to one of several different places. It is a simple generalization of the 'br' instruction, and supports a strict superset of its functionality.

    475
    476 The 'switch' statement supports two different styles of indirect branching: lookup branching and indexed branching. Lookup branching is generally useful if the values to switch on are spread far appart, where index branching is useful if the values to switch on are generally dense.

    477
    478 The two different forms of the 'switch' statement are simple hints to the underlying virtual machine implementation. For example, a virtual machine may choose to implement a small indirect branch table as a series of predicated comparisons: if it is faster for the target architecture.

    479
    480
    Arguments:
    481 The lookup form of the 'switch' instruction uses three parameters: a 'uint' comparison value 'value', a default 'label' destination, and a sized array of pairs of comparison value constants and 'label's. The sized array must be a constant value.

    482
    483 The indexed form of the 'switch' instruction uses three parameters: an 'uint' index value, a default 'label' and a sized array of 'label's. The 'dests' array must be a constant array.
    484
    485
    Semantics:
    486
    487 The lookup style switch statement specifies a table of values and destinations. When the 'switch' instruction is executed, this table is searched for the given value. If the value is found, the corresponding destination is branched to.

    488 The index branch form simply looks up a label element directly in a table and branches to it.

    489
    490 In either case, the compiler knows the static size of the array, because it is provided as part of the constant values type.

    491
    492
    Example:
    493
    
                      
                    
    494 ; Emulate a conditional br instruction
    495 %Val = cast bool %value to uint
    496 switch uint %Val, label %truedest, [1 x label] [label %falsedest ]
    497
    498 ; Emulate an unconditional br instruction
    499 switch uint 0, label %dest, [ 0 x label] [ ]
    500
    501 ; Implement a jump table using the constant pool:
    502 void "testmeth"(int %arg0)
    503 %switchdests = [3 x label] [ label %onzero, label %onone, label %ontwo ]
    504 {
    505 ...
    506 switch uint %val, label %otherwise, [3 x label] %switchdests...
    507 ...
    508 }
    509
    510 ; Implement the equivilent jump table directly:
    511 switch uint %val, label %otherwise, [3 x label] [ label %onzero,
    512 label %onone,
    513 label %ontwo ]
    514
    515
    516
    517
    518
    519
    520


    'call .. with' Instruction

    521
    522
    Syntax:
    523
    
                      
                    
    524 <result> = call <method ty> %<method name>(<method args>) with label <break label>
    525
    526
    527
    Overview:
    528 The 'call .. with' instruction is used to cause control flow to transfer to a specified method, with the possibility of control flow transfer to the 'break label' label, in addition to the possibility of fallthrough to the next basic block. The 'call' instruction is closely related, but does guarantees that control flow either never returns from the invoked method, or that it returns to the instruction succeeding the 'call' instruction.

    529
    530 TODO: icall .. with needs to be defined as well for an indirect call.

    531
    532
    Arguments:
    533
    534 This instruction requires several arguments:

    535
    536
  • 'method ty': shall be the signature of the named method being invoked. This must be a method type.
  • 537
  • 'method name': method name to be invoked.
  • 538
  • 'method args': argument list whose types match the method signature argument types.
  • 539
  • 'break label': a label that specifies the break label associated with this call.
  • 540
    541
    542
    Semantics:
    543
    544 This instruction is designed to operate as a standard 'call' instruction in most regards. The primary difference is that it assiciates a label with the method invocation that may be accessed via the runtime library provided by the execution environment. This instruction is used in languages with destructors to ensure that proper cleanup is performed in the case of either a longjmp or a thrown exception. Additionally, this is important for implementation of 'catch' clauses in high-level languages that support them.

    545
    546 For a more comprehensive explanation of this instruction look in the llvm/docs/2001-05-18-ExceptionHandling.txt document.
    547
    548
    Example:
    549
    
                      
                    
    550 %retval = call int (int) %Test(int 15) with label %TestCleanup ; {int}:retval set
    551
    552
    553
    554
    555
    556
      
    557 Unary Operations
    558
    559
    560 Unary operators are used to do a simple operation to a single value.

    561
    562 There are two different unary operators: the 'not' instruction and the 'cast' instruction.

    563
    564
    565
    566


    'not' Instruction

    567
    568
    Syntax:
    569
    
                      
                    
    570 <result> = not <ty> <var> ; yields {ty}:result
    571
    572
    573
    Overview:
    574 The 'not' instruction returns the logical inverse of its operand.

    575
    576
    Arguments:
    577 The single argument to 'not' must be of of integral type.

    578
    579
    580
    Semantics:
    581 The 'not' instruction returns the logical inverse of an integral type.

    582
    583 Note that the 'not' instruction is is not defined over to 'bool' type. To invert a boolean value, the recommended method is to use:

    584
    585
    
                      
                    
    586 <result> = xor bool true, <var> ; yields {bool}:result
    587
    588
    589
    Example:
    590
    
                      
                    
    591 %x = not int 1 ; {int}:x is now equal to 0
    592 %x = not bool true ; {bool}:x is now equal to false
    593
    594
    595
    596
    597
    598


    'cast .. to' Instruction

    599
    600

    TODO

    601
    602
    603 Talk about what is considered true or false for integrals.
    604
    605
    606
    607
    Syntax:
    608
    
                      
                    
    609
    610
    611
    Overview:
    612
    613
    614
    Arguments:
    615
    616
    617
    Semantics:
    618
    619
    620
    Example:
    621
    
                      
                    
    622
    623
    624
    625
    626
      
    627 Binary Operations
    628
    629
    630 Binary operators are used to do most of the computation in a program. They require two operands, execute an operation on them, and produce a single value. The result value of a binary operator is not neccesarily the same type as its operands.

    631
    632 There are several different binary operators:

    633
    634
    635
    636


    'add' Instruction

    637
    638
    Syntax:
    639
    
                      
                    
    640 <result> = add <ty> <var1>, <var2> ; yields {ty}:result
    641
    642
    643
    Overview:
    644 The 'add' instruction returns the sum of its two operands.

    645
    646
    Arguments:
    647 The two arguments to the 'add' instruction must be either integral or floating point values. Both arguments must have identical types.

    648
    649
    Semantics:
    650 ...

    651
    652
    Example:
    653
    
                      
                    
    654 <result> = add int 4, %var ; yields {int}:result = 4 + %var
    655
    656
    657
    658
    659


    'sub' Instruction

    660
    661
    Syntax:
    662
    
                      
                    
    663 <result> = sub <ty> <var1>, <var2> ; yields {ty}:result
    664
    665
    666
    Overview:
    667 The 'sub' instruction returns the difference of its two operands.

    668
    669 Note that the 'sub' instruction is the cannonical way the 'neg' instruction is represented as well.

    670
    671
    Arguments:
    672 The two arguments to the 'sub' instruction must be either integral or floating point values. Both arguments must have identical types.

    673
    674
    Semantics:
    675 ...

    676
    677
    Example:
    678
    
                      
                    
    679 <result> = sub int 4, %var ; yields {int}:result = 4 - %var
    680 <result> = sub int 0, %val ; yields {int}:result = -%var
    681
    682
    683
    684


    'mul' Instruction

    685
    686
    Syntax:
    687
    
                      
                    
    688 <result> = mul <ty> <var1>, <var2> ; yields {ty}:result
    689
    690
    691
    Overview:
    692 The 'mul' instruction returns the product of its two operands.

    693
    694
    Arguments:
    695 The two arguments to the 'mul' instruction must be either integral or floating point values. Both arguments must have identical types.

    696
    697
    Semantics:
    698 ...

    699 There is no signed vs unsigned multiplication. The appropriate action is taken based on the type of the operand.

    700
    701
    702
    Example:
    703
    
                      
                    
    704 <result> = mul int 4, %var ; yields {int}:result = 4 * %var
    705
    706
    707
    708
    709


    'div' Instruction

    710
    711
    Syntax:
    712
    
                      
                    
    713 <result> = div <ty> <var1>, <var2> ; yields {ty}:result
    714
    715
    716
    Overview:
    717 The 'div' instruction returns the quotient of its two operands.

    718
    719
    Arguments:
    720 The two arguments to the 'div' instruction must be either integral or floating point values. Both arguments must have identical types.

    721
    722
    Semantics:
    723 ...

    724
    725
    Example:
    726
    
                      
                    
    727 <result> = div int 4, %var ; yields {int}:result = 4 / %var
    728
    729
    730
    731
    732


    'rem' Instruction

    733
    734
    Syntax:
    735
    
                      
                    
    736 <result> = rem <ty> <var1>, <var2> ; yields {ty}:result
    737
    738
    739
    Overview:
    740 The 'rem' instruction returns the remainder from the division of its two operands.

    741
    742
    Arguments:
    743 The two arguments to the 'rem' instruction must be either integral or floating point values. Both arguments must have identical types.

    744
    745
    Semantics:
    746 TODO: remainder or modulus?

    747 ...

    748
    749
    Example:
    750
    
                      
                    
    751 <result> = rem int 4, %var ; yields {int}:result = 4 % %var
    752
    753
    754
    755
    756


    'setcc' Instructions

    757
    758
    Syntax:
    759
    
                      
                    
    760 <result> = seteq <ty> <var1>, <var2> ; yields {bool}:result
    761 <result> = setne <ty> <var1>, <var2> ; yields {bool}:result
    762 <result> = setlt <ty> <var1>, <var2> ; yields {bool}:result
    763 <result> = setgt <ty> <var1>, <var2> ; yields {bool}:result
    764 <result> = setle <ty> <var1>, <var2> ; yields {bool}:result
    765 <result> = setge <ty> <var1>, <var2> ; yields {bool}:result
    766
    767
    768
    Overview:
    769 The 'setcc' family of instructions returns a boolean value based on a comparison of their two operands.

    770
    771
    Arguments:
    772 The two arguments to the 'setcc' instructions must be of first class or derived type (it is not possible to compare 'label's or 'void' values). Both arguments must have identical types.

    773
    774 The 'setlt', 'setgt', 'setle', and 'setge' instructions do not operate on 'bool' typed arguments.

    775
    776
    Semantics:
    777 The 'seteq' instruction yields a true 'bool' value if both operands are equal.
    778 The 'setne' instruction yields a true 'bool' value if both operands are unequal.
    779 The 'setlt' instruction yields a true 'bool' value if the first operand is less than the second operand.
    780 The 'setgt' instruction yields a true 'bool' value if the first operand is greater than the second operand.
    781 The 'setle' instruction yields a true 'bool' value if the first operand is less than or equal to the second operand.
    782 The 'setge' instruction yields a true 'bool' value if the first operand is greater than or equal to the second operand.

    783
    784
    Example:
    785
    
                      
                    
    786 <result> = seteq int 4, 5 ; yields {bool}:result = false
    787 <result> = setne float 4, 5 ; yields {bool}:result = true
    788 <result> = setlt uint 4, 5 ; yields {bool}:result = true
    789 <result> = setgt sbyte 4, 5 ; yields {bool}:result = false
    790 <result> = setle sbyte 4, 5 ; yields {bool}:result = true
    791 <result> = setge sbyte 4, 5 ; yields {bool}:result = false
    792
    793
    794
    795
    796
    797
      
    798 Bitwise Binary Operations
    799
    800
    801 Bitwise binary operators are used to do various forms of bit-twiddling in a program. They are generally very efficient instructions, and can commonly be strength reduced from other instructions. They require two operands, execute an operation on them, and produce a single value. The resulting value of the bitwise binary operators is always the same type as its first operand.

    802
    803
    804


    'and' Instruction

    805
    806
    Syntax:
    807
    
                      
                    
    808 <result> = and <ty> <var1>, <var2> ; yields {ty}:result
    809
    810
    811
    Overview:
    812 The 'and' instruction returns the bitwise logical and of its two operands.

    813
    814
    Arguments:
    815 The two arguments to the 'and' instruction must be either integral or bool values. Both arguments must have identical types.

    816
    817
    818
    Semantics:
    819 ...

    820
    821
    822
    Example:
    823
    
                      
                    
    824 <result> = and int 4, %var ; yields {int}:result = 4 & %var
    825 <result> = and int 15, 40 ; yields {int}:result = 8
    826 <result> = and int 4, 8 ; yields {int}:result = 0
    827
    828
    829
    830
    831
    832


    'or' Instruction

    833
    834
    Syntax:
    835
    
                      
                    
    836 <result> = or <ty> <var1>, <var2> ; yields {ty}:result
    837
    838
    839
    Overview:
    840 The 'or' instruction returns the bitwise logical inclusive or of its two operands.

    841
    842
    Arguments:
    843 The two arguments to the 'or' instruction must be either integral or bool values. Both arguments must have identical types.

    844
    845
    846
    Semantics:
    847 ...

    848
    849
    850
    Example:
    851
    
                      
                    
    852 <result> = or int 4, %var ; yields {int}:result = 4 | %var
    853 <result> = or int 15, 40 ; yields {int}:result = 47
    854 <result> = or int 4, 8 ; yields {int}:result = 12
    855
    856
    857
    858
    859


    'xor' Instruction

    860
    861
    Syntax:
    862
    
                      
                    
    863 <result> = xor <ty> <var1>, <var2> ; yields {ty}:result
    864
    865
    866
    Overview:
    867 The 'xor' instruction returns the bitwise logical exclusive or of its two operands.

    868
    869
    Arguments:
    870 The two arguments to the 'xor' instruction must be either integral or bool values. Both arguments must have identical types.

    871
    872
    873
    Semantics:
    874 ...

    875
    876
    877
    Example:
    878
    
                      
                    
    879 <result> = xor int 4, %var ; yields {int}:result = 4 ^ %var
    880 <result> = xor int 15, 40 ; yields {int}:result = 39
    881 <result> = xor int 4, 8 ; yields {int}:result = 12
    882
    883
    884
    885
    886


    'shl' Instruction

    887
    888
    Syntax:
    889
    
                      
                    
    890 <result> = shl <ty> <var1>, ubyte <var2> ; yields {ty}:result
    891
    892
    893
    Overview:
    894 The 'shl' instruction returns the first operand shifted to the left a specified number of bits.
    895
    896
    Arguments:
    897 The first argument to the 'shl' instruction must be an integral type. The second argument must be an 'ubyte' type.

    898
    899
    Semantics:
    900 ... 0 bits are shifted into the emptied bit positions...

    901
    902
    903
    Example:
    904
    
                      
                    
    905 <result> = shl int 4, ubyte %var ; yields {int}:result = 4 << %var
    906 <result> = shl int 4, ubyte 2 ; yields {int}:result = 16
    907 <result> = shl int 1, ubyte 10 ; yields {int}:result = 1024
    908
    909
    910
    911
    912


    'shr' Instruction

    913
    914
    915
    Syntax:
    916
    
                      
                    
    917 <result> = shr <ty> <var1>, ubyte <var2> ; yields {ty}:result
    918
    919
    920
    Overview:
    921 The 'shr' instruction returns the first operand shifted to the right a specified number of bits.
    922
    923
    Arguments:
    924 The first argument to the 'shr' instruction must be an integral type. The second argument must be an 'ubyte' type.

    925
    926
    Semantics:
    927 ... if the first argument is a signed type, the most significant bit is duplicated in the newly free'd bit positions. If the first argument is unsigned, zeros shall fill the empty positions...

    928
    929
    Example:
    930
    
                      
                    
    931 <result> = shr int 4, ubyte %var ; yields {int}:result = 4 >> %var
    932 <result> = shr int 4, ubyte 1 ; yields {int}:result = 2
    933 <result> = shr int 4, ubyte 2 ; yields {int}:result = 1
    934 <result> = shr int 4, ubyte 3 ; yields {int}:result = 0
    935
    936
    937
    938
    939
    940
    941
    942
      
    943 Memory Access Operations
    944
    945
    946 Accessing memory in SSA form is, well, sticky at best. This section describes how to read and write memory in LLVM.

    947
    948
    949
    950


    'malloc' Instruction

    951
    952
    Syntax:
    953
    
                      
                    
    954 <result> = malloc <type> ; yields { type *}:result
    955 <result> = malloc [<type>], uint <NumElements> ; yields {[type] *}:result
    956
    957
    958
    Overview:
    959 The 'malloc' instruction allocates memory from the system heap and returns a pointer to it.

    960
    961
    Arguments:
    962
    963 There are two forms of the 'malloc' instruction, one for allocating a variable of a fixed type, and one for allocating an array. The array form is used to allocate an array, where the upper bound is not known until run time. If the upper bound is known at compile time, it is recommended that the first form be used with a sized array type.

    964
    965 'type' may be any type except for a unsized array type.

    966
    967
    Semantics:
    968 Memory is allocated, a pointer is returned.

    969
    970
    Example:
    971
    
                      
                    
    972 %array = malloc [4 x ubyte ] ; yields {[%4 x ubyte]*}:array
    973
    974 %size = add uint 2, 2 ; yields {uint}:size = uint 4
    975 %array1 = malloc [ubyte], uint 4 ; yields {[ubyte]*}:array1
    976 %array2 = malloc [ubyte], uint %size ; yields {[ubyte]*}:array2
    977
    978
    979
    980
    981


    'free' Instruction

    982
    983
    Syntax:
    984
    
                      
                    
    985 free <type> <value> ; yields {void}
    986
    987
    988
    989
    Overview:
    990 The 'free' instruction returns memory back to the unused memory heap, to be reallocated in the future.

    991
    992
    993
    Arguments:
    994
    995 'value' shall be a pointer value that points to a value that was allocated with the 'malloc' instruction.

    996
    997
    998
    Semantics:
    999 Memory is available for use after this point. The contents of the 'value' pointer are undefined after this instruction.

    1000
    1001
    1002
    Example:
    1003
    
                      
                    
    1004 %array = malloc [4 x ubyte] ; yields {[4 x ubyte]*}:array
    1005 free [4 x ubyte]* %array
    1006
    1007
    1008
    1009
    1010


    'alloca' Instruction

    1011
    1012
    Syntax:
    1013
    
                      
                    
    1014 <result> = alloca <type> ; yields {type*}:result
    1015 <result> = alloca [<type>], uint <NumElements> ; yields {[type] *}:result
    1016
    1017
    1018
    Overview:
    1019
    1020 The 'alloca' instruction allocates memory on the current stack frame of the procedure that is live as long as the method does not return.

    1021
    1022
    Arguments:
    1023 There are two forms of the 'alloca' instruction, one for allocating a variable of a fixed type, and one for allocating an array. The array form is used to allocate an array, where the upper bound is not known until run time. If the upper bound is known at compile time, it is recommended that the first form be used with a sized array type.

    1024
    1025 'type' may be any type except for a unsized array type.

    1026
    1027 Note that a virtual machine may generate more efficient native code for a method if all of the fixed size 'alloca' instructions live in the first basic block of that method.
    1028
    1029
    1030
    Semantics:
    1031 Memory is allocated, a pointer is returned. 'alloca'd memory is automatically released when the method returns. The 'alloca' utility is how variable spills shall be implemented.

    1032
    1033
    Example:
    1034
    
                      
                    
    1035 %ptr = alloca int ; yields {int*}:ptr
    1036 %ptr = alloca [int], uint 4 ; yields {[int]*}:ptr
    1037
    1038
    1039
    1040
    1041


    'load' Instruction

    1042
    1043
    Syntax:
    1044
    
                      
                    
    1045 <result> = load <ty>* <pointer> ; yields {ty}:result
    1046 <result> = load <ty>* <arrayptr>, uint <idx> ; yields {ty}:result
    1047
    1048
    1049
    Overview:
    1050 The 'load' instruction is used to read from memory.

    1051
    1052
    Arguments:
    1053
    1054 There are two forms of the 'load' instruction: one for reading from a general pointer, and one for reading from a pointer to an array.

    1055
    1056 In the first form, '<ty>' may be any pointer type. If it is a pointer to an array, the first (zeroth) element is read from). In the second form, '<ty>' must be a pointer to an array. No bounds checking is performed on array reads.

    1057
    1058
    1059
    Semantics:
    1060 ...
    1061
    1062
    Examples:
    1063
    
                      
                    
    1064 %ptr = alloca int ; yields {int*}:ptr
    1065 store int* %ptr, int 3 ; yields {void}
    1066 %val = load int* %ptr ; yields {int}:val = int 3
    1067
    1068 %array = malloc [4 x ubyte] ; yields {[4 x ubyte]*}:array
    1069 store [4 x ubyte]* %array,
    1070 uint 4, ubyte 124
    1071 %val = load [4 x ubyte]* %array, uint 4 ; yields {ubyte}:val = ubyte 124
    1072
    1073
    1074
    1075
    1076
    1077
    1078
    1079


    'store' Instruction

    1080
    1081
    Syntax:
    1082
    
                      
                    
    1083 store <ty>* <pointer>, <ty> <value> ; yields {void}
    1084 store <ty>* <arrayptr>, uint <idx>, <ty> <value> ; yields {void}
    1085
    1086
    1087
    Overview:
    1088 The 'store' instruction is used to write to memory.

    1089
    1090
    1091
    Arguments:
    1092 There are two forms of the 'store' instruction: one for writing through a general pointer, and one for writing through a pointer to an array.

    1093
    1094 In the first form, '<ty>' may be any pointer type. If it is a pointer to an array, the first (zeroth) element is writen to). In the second form, '<ty>' must be a pointer to an array. No bounds checking is performed on array writes.

    1095
    1096
    1097
    Semantics:
    1098 ...
    1099
    1100
    Example:
    1101
    
                      
                    
    1102 %ptr = alloca int ; yields {int*}:ptr
    1103 store int* %ptr, int 3 ; yields {void}
    1104 %val = load int* %ptr ; yields {int}:val = int 3
    1105
    1106 %array = malloc [4 x ubyte] ; yields {[4 x ubyte]*}:array
    1107 store [4 x ubyte]* %array,
    1108 uint 4, ubyte 124
    1109 %val = load [4 x ubyte]* %array, uint 4 ; yields {ubyte}:val = ubyte 124
    1110
    1111
    1112
    1113
    1114
    1115
    1116


    'getfieldptr' Instruction

    1117
    1118
    Syntax:
    1119
    
                      
                    
    1120
    1121
    1122
    1123
    Overview:
    1124
    1125 getfield takes a structure pointer, and an unsigned byte. It returns a pointer to the specified element, of the correct type. At the implementation level, this would be compiled down to an addition of a constant int.
    1126
    1127
    Arguments:
    1128
    1129
    1130
    Semantics:
    1131
    1132
    1133
    Example:
    1134
    
                      
                    
    1135
    1136
    1137
    1138
    1139
    1140
    1141
      
    1142 Other Operations
    1143
    1144
    1145 The instructions in this catagory are the "miscellaneous" functions, that defy better classification.

    1146
    1147
    1148
    1149


    'call' Instruction

    1150
    1151
    Syntax:
    1152
    
                      
                    
    1153
    1154
    1155
    1156
    Overview:
    1157
    1158
    1159
    Arguments:
    1160
    1161
    1162
    Semantics:
    1163
    1164
    1165
    Example:
    1166
    
                      
                    
    1167 %retval = call int %test(int %argc)
    1168
    1169
    1170
    1171


    'icall' Instruction

    1172
    1173 Indirect calls are desperately needed to implement virtual function tables (C++, java) and function pointers (C, C++, ...).

    1174
    1175 A new instruction icall or similar should be introduced to represent an indirect call.

    1176
    1177 Example:
    1178
    
                      
                    
    1179 %retval = icall int %funcptr(int %arg1) ; yields {int}:%retval
    1180
    1181
    1182
    1183
    1184
    1185


    'phi' Instruction

    1186
    1187
    Syntax:
    1188
    
                      
                    
    1189
    1190
    1191
    Overview:
    1192
    1193
    1194
    Arguments:
    1195
    1196
    1197
    Semantics:
    1198
    1199
    1200
    Example:
    1201
    
                      
                    
    1202
    1203
    1204
    1205
    1206
      
    1207 Builtin Functions
    1208
    1209
    1210 Notice: Preliminary idea!

    1211
    1212 Builtin functions are very similar to normal functions, except they are defined by the implementation. Invocations of these functions are very similar to method invocations, except that the syntax is a little less verbose.

    1213
    1214 Builtin functions are useful to implement semi-high level ideas like a 'min' or 'max' operation that can have important properties when doing program analysis. For example:
    1215
    1216
    1217
  • Some optimizations can make use of identities defined over the functions,
  • 1218 for example a parrallelizing compiler could make use of 'min'
    1219 identities to parrellelize a loop.
    1220
  • Builtin functions would have polymorphic types, where normal method calls
  • 1221 may only have a single type.
    1222
  • Builtin functions would be known to not have side effects, simplifying
  • 1223 analysis over straight method calls.
    1224
  • The syntax of the builtin are cleaner than the syntax of the
  • 1225 'call' instruction (very minor point).
    1226
    1227
    1228 Because these invocations are explicit in the representation, the runtime can choose to implement these builtin functions any way that they want, including:
    1229
    1230
    1231
  • Inlining the code directly into the invocation
  • 1232
  • Implementing the functions in some sort of Runtime class, convert invocation
  • 1233 to a standard method call.
    1234
  • Implementing the functions in some sort of Runtime class, and perform
  • 1235 standard inlining optimizations on it.
    1236
    1237
    1238 Note that these builtins do not use quoted identifiers: the name of the builtin effectively becomes an identifier in the language.

    1239
    1240 Example:
    1241
    
                      
                    
    1242 ; Example of a normal method call
    1243 %maximum = call int %maximum(int %arg1, int %arg2) ; yields {int}:%maximum
    1244
    1245 ; Examples of potential builtin functions
    1246 %max = max(int %arg1, int %arg2) ; yields {int}:%max
    1247 %min = min(int %arg1, int %arg2) ; yields {int}:%min
    1248 %sin = sin(double %arg) ; yields {double}:%sin
    1249 %cos = cos(double %arg) ; yields {double}:%cos
    1250
    1251 ; Show that builtin's are polymorphic, like instructions
    1252 %max = max(float %arg1, float %arg2) ; yields {float}:%max
    1253 %cos = cos(float %arg) ; yields {float}:%cos
    1254
    1255
    1256 The 'maximum' vs 'max' example illustrates the difference in calling semantics between a 'call' instruction and a builtin function invocation. Notice that the 'maximum' example assumes that the method is defined local to the caller.

    1257
    1258
    1259
    1260
    1261
    1262
    1263 TODO List
    1264
    1265
    1266
    1267 This list of random topics includes things that will need to be addressed before the llvm may be used to implement a java like langauge. Right now, it is pretty much useless for any language, given to unavailable of structure types

    1268
    1269
    1270


    Synchronization Instructions

    1271
    1272 We will need some type of synchronization instructions to be able to implement stuff in Java well. The way I currently envision doing this is to introduce a 'lock' type, and then add two (builtin or instructions) operations to lock and unlock the lock.

    1273
    1274
    1275
    1276
    1277 Possible Extensions
    1278
    1279
    1280
    1281 These extensions are distinct from the TODO list, as they are mostly "interesting" ideas that could be implemented in the future by someone so motivated. They are not directly required to get Java like languages working.

    1282
    1283
    1284


    'tailcall' Instruction

    1285
    1286 This could be useful. Who knows. '.net' does it, but is the optimization really worth the extra hassle? Using strong typing would make this trivial to implement and a runtime could always callback to using downconverting this to a normal 'call' instruction.

    1287
    1288
    1289
    1290


    Global Variables

    1291
    1292 In order to represent programs written in languages like C, we need to be able to support variables at the module (global) scope. Perhaps they should be written outside of the module definition even. Maybe global functions should be handled like this as well.

    1293
    1294
    1295
    1296


    Explicit Parrellelism

    1297
    1298 With the rise of massively parrellel architectures (like the IA64 architecture, multithreaded CPU cores, and SIMD data sets) it is becoming increasingly more important to extract all of the ILP from a code stream possible. It would be interesting to research encoding methods that can explicitly represent this. One straightforward way to do this would be to introduce a "stop" instruction that is equilivent to the IA64 stop bit.

    1299
    1300
    1301
    1302
    1303
    1304 Related Work
    1305
    1306
    1307
    1308
    1309 Codesigned virtual machines.

    1310
    1311
    1312
    1313
    SafeTSA
    1314
    Description here

    1315
    1316
    1317
    Java
    1318
    Desciption here

    1319
    1320
    1321
    Microsoft .net
    1322
    Desciption here

    1323
    1324
    1325
    GNU RTL Intermediate Representation
    1326
    Desciption here

    1327
    1328
    1329
    IA64 Architecture & Instruction Set
    1330
    Desciption here

    1331
    1332
    1333
    MMIX Instruction Set
    1334
    Desciption here

    1335
    1336
    1337
    "Interview With Bjarne Stroustrup"
    1338
    This interview influenced the design and thought process behind LLVM in several ways, most notably the way that derived types are written in text format. See the question that starts with "you defined the C declarator syntax as an experiment that failed".

    1339
    1340
    1341
    1342


    Vectorized Architectures

    1343
    1344
    1345
    1346
    Intel MMX, MMX2, SSE, SSE2
    1347
    Description here

    1348
    1349
    1350
    AMD 3Dnow!, 3Dnow! 2
    1351
    Desciption here

    1352
    1353
    1354
    Sun VIS ISA
    1355
    Desciption here

    1356
    1357
    1358
    1359
    1360 more...
    1361
    1362
    1363
    1364
    1365
    1366
    1367
    1368
    1369
    Chris Lattner
    1370
    1371
    1372 Last modified: Thu May 31 17:36:39 CDT 2001
    1373
    1374
    1375
    0 #!/bin/sh
    1 # This script prints out some of the source files that are useful when
    2 # editing. I use this like this: xemacs `./getsomesrcs.sh` &
    3 ./getsrcs.sh | grep -v Assembly | grep -v Byte | grep -v \\.ll | grep -v tools | grep -v Makefile | grep -v Opt | grep -v CommandLi | grep -v String | grep -v DataType
    4
    0 #!/bin/sh
    1 # This is useful because it prints out all of the source files. Useful for
    2 # greps.
    3 find . -name \*.\[chyl\]\* | grep -v Lexer.cpp | grep -v llvmAsmParser.cpp | grep -v llvmAsmParser.h | grep -v '~$' | grep -v '\.ll$' | grep -v test | grep -v .flc
    0 //===-- llvm/Analysis/ModuleAnalyzer.h - Module analysis driver --*- C++ -*-==//
    1 //
    2 // This class provides a nice interface to traverse a module in a predictable
    3 // way. This is used by the AssemblyWriter, BytecodeWriter, and SlotCalculator
    4 // to do analysis of a module.
    5 //