IR API(五)——使用LLVM提供的C接口和IRBuilder来生成LLVM IR常用方法总结

可以转载，请注明出处！

文章目录

自动生成IR有以下几种方式：

1、通过c++直接使用Instructions.h文件中的命令来生成IR。这个现在很少有人采用，基本都是利用2、3结合。
2、使用llvm提供的c接口来生成IR，c接口官方文档
3、使用IRBuilder来生成IR ，irbuilder官方文档
这里有一个官网的编码指导文档，与上面两个链接结合着学习：API编程指导文档

重要类的介绍：

官网都有对这些的解释：http://llvm.org/docs/ProgrammersManual.html#the-core-llvm-class-hierarchy-reference

API使用总结

一、类型定义

基本类型

基本类型定义常用的方式有三种，一种是通过Type类，如Type::getInt32Ty(context)；另一种是通过IRBuilder中封装的方法，如builder.getInt32Ty()；我一般用第一种，因为第二种的封装就是将第一种的方法包了起来。还可以使用每种类型对应封装类的get方法，如IntegerType::get(context, 32)

IntegerType type_i32 = Type::getInt32Ty(context);	//int
//int,使用IntegerType的get方法和Type的getInt32Ty方法是一样的，我一般用第一种，感觉方便
IntegerType type_i32 = IntegerType::get(context, 32);	

Type::getInt64Ty(context);	//long
Type::getFloatTy(context);	//float
Type::getDoubleTy(context);	//double
Type::getInt8Ty(context);	//char
Type::getVoidTy(context);	//void

指针类型

指针类型的创建方式和基本类型大径相同，只不过凡是指针都是要指向一个类型，包括基本类型、数组、结构体等。

【注】i8*对应C/C++的char*和void*，在使用时需要注意一下，有时候将函数指针、数组指针等转成i8*，此时的i8*相当于void*，而不是char*。

//int* int&，int指针和引用，其他类型都一样，改变Type::getInt32Ty(context)为预期类型即可
PointerType* int_pointer = Type::getInt32PtrTy(context);
PointerType* int_pointer = PointerType::get(Type::getInt32Ty(context), 0);	

//char*、void*对应的类型都是i8*
PointerType* char_pointer = PointerType::get(IntegerType::get(mod->getContext(), 8), 0);

数组类型

数组类型Type类并没有提供快捷的创建方法，一般都是使用ArrayType类的get方法，第一个参数表元素类型，第二个表元素个数。

//长度为4的整型数组，int[4]
ArrayType* array_type = ArrayType::get(Type::getInt32Ty(context), 4);

结构体类型

结构体类型创建完之后只是一个类型，并不是一个结构体变量。

struct Number{
    
    
	int precision;
	int scale;
	double value;
	char* name;
};

/*
*这里创建的结构体与上面C的结构体类型对应
*/
StructType *structType = mod->getTypeByName("Number");//获取当前module中名为Number的结构体
if (!structType) {
    
    	//如果当前module中没有，就创建一个
	structType = StructType::create(context, "Number");
}
std::vector<Type*> elements;	//添加结构体元素
elements.push_back(Type::getInt32Ty(context));
elements.push_back(Type::getInt32Ty(context));
elements.push_back(Type::getDoubleTy(context));
elements.push_back(PointerType::get(Type::getInt8Ty(context), 0));
structType->setBody(elements);

二、常量定义

基本类型常量

常量只是一个字面量或者字面值，不能直接用来操作，必须依附于其对应的类型的变量或者指针。整型

//int值：10,下面方式同上面一样，long类型就是将32换成64
Constant* con_10 = ConstantInt::get(Type::getInt32Ty(context), 10);	

ConstantInt* con_10 = ConstantInt::get(context, APInt(32, StringRef("100"), 10));

//float值：11.11，注意这里用的是ConstantFP类
Constant* con_11f = ConstantFP::get(Type::getFloatTy(context), 11.11);	
ConstantFP* con_11f = ConstantFP::get(context, APFloat(4.342340e+02f));
Constant* con_a = ConstantInt::get(Type::getInt8Ty(context), 97);	//字符：a
ConstantInt* con_a = ConstantInt::get(context, APInt(8, StringRef("97"), 10));

字符串常量

字符串常量用ConstantDataArray类的getString方法来定义。ConstantDataArray是一个常量数组（即里面存放的元素时常量），元素类型可以是1/2/4/8-byte的整型常量或float/double常量。字符串是由字符（char）数组构成，字符（char）在ir中对应的类型是i8，所以可以以此来构建字符串常量。

【注】clang将C中的字符串编译成ir时都是按全局常量来处理的，但我们编译的时候不这么用，看后面存取值部分关于字符串的操作

Constant* const_string = ConstantDataArray::getString(context, "hello world", true);

数组常量

数组常量使用ConstantArray类来定义，下面用例定义一个一维数组常量。如果要用二位数组，先创建几个一维数组，再把一维数组常量作为另一个一维数组常量的元素来使用，就构成了二维数组常量。

Constant* con_1 = ConstantInt::get(Type::getInt32Ty(context), 1);
Constant* con_2 = ConstantInt::get(Type::getInt32Ty(context), 2);
Constant* con_3 = ConstantInt::get(Type::getInt32Ty(context), 3);
Constant* con_4 = ConstantInt::get(Type::getInt32Ty(context), 4);
std::vector<Constant*> const_array_elems;
const_array_elems.push_back(con_1);
const_array_elems.push_back(con_2);
const_array_elems.push_back(con_3);
const_array_elems.push_back(con_4);
/**
 *array_type是数组常量的类型，需要自己定义，参照上面数组类型的定义 
 *二维或多维数组常量的类型的元素类型不再是基本类型，而是数组类型，有点绕，捋一捋
 */
Constant* const_array = ConstantArray::get(array_type, const_array_elems);

结构体常量

结构体常量的定义用ConstantStruct定义，其构建需要用到结构体类型和每个结构体元素的常量。

//结构体类型
StructType *structType = StructType::create(context, "Number");
std::vector<Type*> elements;	//添加结构体元素
elements.push_back(Type::getInt32Ty(context));
elements.push_back(Type::getInt32Ty(context));
elements.push_back(Type::getDoubleTy(context));
structType->setBody(elements);

//每个元素对应的常量
std::vector<Constant*> ele_vec;
ele_vec.push_back(ConstantInt::get(Type::getInt32Ty(context), 1));
ele_vec.push_back(ConstantInt::get(Type::getInt32Ty(context), 2));	
ele_vec.push_back(ConstantFP::get(Type::getDoubleTy(context), 10.0));

//构建结构体常量
Constant *struct_value = ConstantStruct::get(structType, ele_vec);

可以用结构体常量直接对alloca出来的结构体类型的变量进行store操作，即不需要GEP取出每个元素的alloca地址一一store。

//alloca一个结构体类型变量
Value* alloca_struct = builder.CreateAlloca(structType);
//对变量store值
builder.CreateStore(struct_value, alloca_struct);

三、函数定义

定义一个函数的步骤可以简单分为五步：返回值类型 - 参数类型 - 前两者构成函数类型 - 根据函数类型声明函数 - 获取函数参数(如果有参数)，定义函数体；

如果对一个函数只完成前面四个步骤，没有最后一步函数体的定义，相当于只声明了一个函数，只有最后一步完成才是定义了一个函数。所以，如果只是声明一个函数，只需要完成前四个步骤，不定义函数体也是OK的。

什么时候需要定义一个函数，什么时候需要声明一个函数？

当你想用llvm ir的语法实现一个函数的功能的时候，就需要定义函数
当你想用外部函数，比如C中定义的函数，只需声明，然后在执行前将声明的函数与外部函数映射到一块就可以，llvm ir 调用C函数

定义一个函数，以int max(int,a int b){...}为例

//返回值类型
Type *returnType = Type::getInt32Ty(context);

//参数类型
SmallVector<Type *, 2> functionArgs;
functionArgs.push_back(Type::getInt32Ty(context));
functionArgs.push_back(Type::getInt32Ty(context));

//根据前两者构成函数类型
FunctionType *max_type = FunctionType::get(returnType, functionArgs, /*isVarArg*/ false);

/*
 *下面这种构建函数类型的方式和上面的选一个就可以了，很显然下面这种可以用C++的类型，方便很多，语法也很简单，长得也好看,
 *这种方式有些特殊，到/usr/local/include/llvm/IR目录下看一下TypeBuilder.h源码
 */
FunctionType *max_type = TypeBuilder<int(int, int), false>::get(context);

//cast将指针或者引用从基类转向为派生类，可以从http://llvm.org/docs/ProgrammersManual.html#the-c-standard-template-library这个链接学习
Function *max_fun = cast<Function>(module->getOrInsertFunction("max", max_type));//将函数插入module

//存储参数（获取参数的引用）
Function::arg_iterator argsIT = max_fun->arg_begin();//Function中的一个方法，获取参数的迭代器
Value *arg_a = argsIT++;//获取第一个参数
arg_a->setName("a");//设置第一个参数名为a
Value *arg_b = argsIT++;
arg_b->setName("b");

/*
	函数体...
	用arg_a、arg_b就可以对函数体定义，此时的arg_a、arg_b相当于alloca出来的Value*，可以对其进行store和load操作
*/

四、调用函数

函数调用一般都是采用call指令，如果有异常处理机制，就需要invoke指令。

//构造实参列表
Value* arg1_value = ConstantInt::get(Type::getInt32Ty(context), 10);
Value* arg2_value = ConstantInt::get(Type::getInt32Ty(context), 10);
std::vector<Value*> putsargs;
putsargs.push_back(arg1_value);
putsargs.push_back(arg2_value);

//注意builder所在基本块的位置
builder.SetInsertPoint(entry_mian);

// 调用函数max
Value *ret_value = builder.CreateCall(max_fun, putsargs);

五、基本块的定义

常用控制语句的基本块定义，在使用是注意一下几点：

基本块一旦创建，就会存在，且必须要有一个终端指令来结束这个块
如果使用同一个name创建基本块，生成的ir block会在name的后面加个数字
在做codeGen时，块的逻辑并不受块创建的顺序影响

if{
    
    
}
else{
    
    
}
//BasicBlock有4个参数： @1:context, @2:"字符串"//char*, @3:函数名//Function*, @4:0
BasicBlock* entry = BasicBlock::Create(context, "entry",function,0);
BasicBlock* label_if_then = BasicBlock::Create(context, "if.then",function,0);
BasicBlock* label_if_else = BasicBlock::Create(context, "if.else",function,0);
BasicBlock* label_if_end = BasicBlock::Create(context, "if.end",function,0);

for{
    
    
}
BasicBlock* entry = BasicBlock::Create(context, "entry",function,0);
BasicBlock* label_for_cond = BasicBlock::Create(context, "for.cond",function,0);
BasicBlock* label_for_body = BasicBlock::Create(context, "for.body",function,0);
BasicBlock* label_for_inc = BasicBlock::Create(context, "for.inc",function,0);
BasicBlock* label_for_end = BasicBlock::Create(context, "for.end",function,0);

while{
    
    
}
BasicBlock* entry = BasicBlock::Create(context, "entry",function,0);
BasicBlock* label_while_cond = BasicBlock::Create(context, "while.cond",function,0);
BasicBlock* label_while_body = BasicBlock::Create(context, "while.body",function,0);
BasicBlock* label_while_end = BasicBlock::Create(context, "while.end",function,0);

六、存值、取值操作

基本类型存取值

存值取值都是针对alloca出来的地址进行的。

//float f1 = 1.25
llvm::Constant* con_1 = llvm::ConstantFP::get(llvm::Type::getFloatTy(context), 1.25);//常量值
llvm::Value* f1_alloca = builder.CreateAlloca(llvm::Type::getFloatTy(context));//申请内存
builder.CreateStore(con_1, f1_alloca);//存值
llvm::Value* f1_load = builder.CreateLoad(f1_alloca);//取值

字符串存取值操作

通过getType()方法，可以获取常量类型，该方法在Value类中。

//字符串常量
Constant *str_const = ConstantDataArray::getString(context, "hello world");
//str_alloc变量类型为[12 x i8]*
AllocaInst *str_alloc = builder.CreateAlloca(str_const->getType(),
		ConstantExpr::getSizeOf(str_const->getType()));
builder.CreateStore(str_const, str_alloc);//存值
LoadInst *loadVal = builder.CreateLoad(str_alloc);//取值

数组、结构体存取值

数组类型变量的存取值同结构体一样，都是取出每个元素的地址，然后分别对每个元素操作。

//int array[4] = {5,6,7,8};
llvm::Constant* con_5 = llvm::ConstantInt::get(llvm::Type::getInt32Ty(context), 5);
llvm::Constant* con_6 = llvm::ConstantInt::get(llvm::Type::getInt32Ty(context), 6);
llvm::Constant* con_7 = llvm::ConstantInt::get(llvm::Type::getInt32Ty(context), 7);
llvm::Constant* con_8 = llvm::ConstantInt::get(llvm::Type::getInt32Ty(context), 8);
llvm::Type* arrayType = llvm::ArrayType::get(llvm::Type::getInt32Ty(context), 4);//如果是结构体这里就是一个结构体类型
llvm::Value* array = builder.CreateAlloca(arrayType);//申请内存
//存值
builder.CreateStore(con_5, builder.CreateConstGEP2_32(arrayType, array, 0, 0));
builder.CreateStore(con_6, builder.CreateConstGEP2_32(arrayType, array, 0, 1));
builder.CreateStore(con_7, builder.CreateConstGEP2_32(arrayType, array, 0, 2));
builder.CreateStore(con_8, builder.CreateConstGEP2_32(arrayType, array, 0, 3));
//取值
llvm::Value *Idxs[] = {
    
    con_0, i_load};//con_0是一个i32类型值为0的常量，i_load是一个i32值为0,1,2,3中的某一个的常量，省掉了循环
llvm::Value* array_i = builder.CreateGEP(array, Idxs);
/*区分CreateGEP和CreateConstGEP2_32的使用区别，gep指令获取元素地址的指令的方式有好几个，最好都掌握*/

结构体也可以直接对结构体地址操作，数组还没有试过，估计也可以。

StructType *structType = StructType::create(context, "Number");
std::vector<Type*> elements;	//添加结构体元素
elements.push_back(Type::getInt32Ty(context));
elements.push_back(Type::getInt32Ty(context));
elements.push_back(Type::getDoubleTy(context));
structType->setBody(elements);

std::vector<Constant*> ele_vec;
ele_vec.push_back(ConstantInt::get(Type::getInt32Ty(context), 1));
ele_vec.push_back(ConstantInt::get(Type::getInt32Ty(context), 2));	
ele_vec.push_back(ConstantFP::get(Type::getDoubleTy(context), 10.0));
Constant *struct_value = ConstantStruct::get(structType, ele_vec);

Value* alloca_struct = builder.CreateAlloca(structType);

builder.CreateStore(struct_value, alloca_struct);

七、从函数中获取一个指定名称的基本块

	llvm::Function *catchFun = NULL;
	llvm::StringRef destName("lpad");
	llvm::BasicBlock *bb = NULL;
	for (llvm::Function::iterator iter = catchFun->getBasicBlockList().begin();
			iter != catchFun->getBasicBlockList().end(); iter++) {
    
    
		llvm::StringRef currentName = iter->getName();
		if (currentName == destName) {
    
    
			bb = cast<llvm::BasicBlock>(iter);
			break;
		}
	}