Compiling Linux kernel as an LLVM bc file

We needed to perform data structure analysis (DSA) on the Linux kernel and its complex subsystems for LCDs project. However, the mainline Linux kernel is not (yet) compilable with LLVM/Clang. But there is a tree maintained here that has detailed instructions on how to build one using clang. This may not be runnable on an actual system. But for doing DSA, this is sufficient. DSA requires bitcode files of all the modules that need to be analyzed. There is a nice tool, wllvm for generating bitcode files during compilation, and is now available as a python-pip package.

Prerequisites

Prepare sources

Clone the tree and download the small ‘m’ makefile.

git clone git://git.linuxfoundation.org/llvmlinux/kernel.git linux-llvm
wget http://buildbot.llvm.linuxfoundation.org/makefile -P ./linux-llvm

For making DSA analysis easier and more meaningful, optimization level has to be set to 0. I basically used ccflags-y += -O1 for those files (in their respective Makefiles) where there were inline assemblies and other assembly quirks which fails to compile with -O0. I used a very minimal config, for that, the patch given below was enough to keep -O0 happy. If you decide to pull in more modules, then you might have to patch more Makefiles (basically wherever clang is unhappy to compile with -O0)

diff --git a/Makefile b/Makefile
index c9aa4c0..5ad0866 100644
--- a/Makefile
+++ b/Makefile
@@ -629,7 +629,7 @@ ifdef CONFIG_CC_OPTIMIZE_FOR_SIZE
 KBUILD_CFLAGS	+= $(call cc-option,-Oz,-Os)
 KBUILD_CFLAGS	+= $(call cc-disable-warning,maybe-uninitialized,)
 else
-KBUILD_CFLAGS	+= -O2
+KBUILD_CFLAGS	+= -O0
 endif
 
 # Tell gcc to never replace conditional load with a non-conditional one
diff --git a/arch/x86/ia32/Makefile b/arch/x86/ia32/Makefile
index e785b42..af78249 100644
--- a/arch/x86/ia32/Makefile
+++ b/arch/x86/ia32/Makefile
@@ -1,7 +1,7 @@
 #
 # Makefile for the ia32 kernel emulation subsystem.
 #
-
+ccflags-y += -O1
 obj-$(CONFIG_IA32_EMULATION) := ia32entry.o sys_ia32.o ia32_signal.o
 obj-$(CONFIG_IA32_EMULATION) += nosyscall.o syscall_ia32.o
 
diff --git a/arch/x86/kernel/Makefile b/arch/x86/kernel/Makefile
index 5d4502c..7df5837 100644
--- a/arch/x86/kernel/Makefile
+++ b/arch/x86/kernel/Makefile
@@ -6,6 +6,8 @@ extra-y                := head_$(BITS).o head$(BITS).o head.o vmlinux.lds
 
 CPPFLAGS_vmlinux.lds += -U$(UTS_MACHINE)
 
+ccflags-y += -O1
+
 ifdef CONFIG_FUNCTION_TRACER
 # Do not profile debug and lowlevel utilities
 CFLAGS_REMOVE_tsc.o = -pg
diff --git a/drivers/idle/Makefile b/drivers/idle/Makefile
index 23d295c..f17a349 100644
--- a/drivers/idle/Makefile
+++ b/drivers/idle/Makefile
@@ -1,3 +1,4 @@
+ccflags-y += -O1
 obj-$(CONFIG_I7300_IDLE)			+= i7300_idle.o
 obj-$(CONFIG_INTEL_IDLE)			+= intel_idle.o
 
diff --git a/include/linux/compiler.h b/include/linux/compiler.h
index 33063f8..bc5aff1 100644
--- a/include/linux/compiler.h
+++ b/include/linux/compiler.h
@@ -412,6 +412,12 @@ static __always_inline void __write_once_size(volatile void *p, void *res, int s
 # define __compiletime_error_fallback(condition) do { } while (0)
 #endif
 
+
+#define __compiletime_assert(condition, msg, prefix, suffix)		\
+	do { \
+	} while(0)
+
+#if 0
 #define __compiletime_assert(condition, msg, prefix, suffix)		\
 	do {								\
 		bool __cond = !(condition);				\
@@ -420,7 +426,7 @@ static __always_inline void __write_once_size(volatile void *p, void *res, int s
 			prefix ## suffix();				\
 		__compiletime_error_fallback(__cond);			\
 	} while (0)
-
+#endif
 #define _compiletime_assert(condition, msg, prefix, suffix) \
 	__compiletime_assert(condition, msg, prefix, suffix)
 

Adjust makefile

To build llvm-linux with the whole-program-llvm tool, set both HOSTCC and CC to wllvm in the makefile (note the small ‘m’)

# Choose to use wllvm as your compiler
HOSTCC          := wllvm
CC              := wllvm

Export LLVM_COMPILER variable

export LLVM_COMPILER=clang

Build

cd linux-llvm && make -j12

When make is invoked, an LLVM bitcode file is generated for each object (.o) file. After the build, you should see the bc files that were generated by the wllvm wrapper.

The below are some files from my build tree. You may or may not see the same set of files.

./security/.min_addr.o.bc
./security/.commoncap.o.bc
./security/.device_cgroup.o.bc
./arch/x86/vdso/.vgetcpu.o.bc
./arch/x86/vdso/.vdso2c.o.bc
./arch/x86/vdso/.vdso-image-64.o.bc
./arch/x86/vdso/.vclock_gettime.o.bc
./arch/x86/vdso/.vdso-image-32-syscall.o.bc
./arch/x86/vdso/.vdso-image-32-sysenter.o.bc
./arch/x86/vdso/vdso32/.vclock_gettime.o.bc
./arch/x86/vdso/.vma.o.bc
......

Kernel modules as .bc file

For analyzing kernel modules, one may need to pack a set of bitcode files as a single .bc file. llvm-link comes in handy for this task.

llvm-link -o module.bc file1.bc file2.bc

vmlinux.bc file

Once the build is complete, use extract-bc (part of wllvm-whole-program tools) to extract all the .bc files from vmlinux image into a single monstrous vmlinux.bc file.

extract-bc ./vmlinux