lua指令集与虚拟机

lua_int32 Instruction 指令结构介绍与解析方法

----------------------目录-----------------------

(a)lu_int32 指令类型
(b)Instruction 指令结构
(c)OpMode 操作码类型
(d)MAXARGlimit限制
(e)GET_/SET_OPCODE 操作
(f)操作码分类校验
(j)OpCode 指令枚举
(h)OpArgMask
(i)Opmode 指令解析方法

(j)OP顺序调整加密

---------------------------------------------------

(a)lu_int32 指令类型

关于它的定义：typedef unsigned LUA_INT32 lu_int32;

LUA_INT32 可以通过设置 LUAI_BITSINT 的值来控制值 int类型还是long类型

(b)Instruction 指令结构

在lopcodes.h中有相关于指令的介绍

前6bits（低位）：所有指令的opcode

之后会存在

A：8 bits

B：9 bits

C：9 bits

或者

Ax：26 bits（A、B、C组合在一起）

Bx：18 bits（A和B组合在一起）

sBx：18 bits（有符号的Bx）

(c)OpMode 操作码类型

{iABC,iABx,iAsBx,iAx}这个是代表组合的类型

#define SIZE_C 9
#define SIZE_B 9
#define SIZE_Bx (SIZE_C + SIZE_B)
#define SIZE_A 8
#define SIZE_Ax (SIZE_C + SIZE_B + SIZE_A)

#define SIZE_OP 6

#define POS_OP 0
#define POS_A (POS_OP + SIZE_OP)
#define POS_C (POS_A + SIZE_A)
#define POS_B (POS_C + SIZE_C)

#define POS_BxPOS_C

#define POS_AxPOS_A

上述定义说你名了关于Instruction的相关结构介绍

(d)MAXARG limit限制

系列参数定义，定义所有指令参数的限制条件，这里不做详细的介绍了，可以参考lopcodes.h中相关定义学习

(e)GET_/SET_OPCODE 操作

这一部分是为了获取、设置OPCode和相对应的参数，也就是指令集解析操作，在lopcodes.h GET_OPCODE之后是相应的指令定义

MASK1 制作1的掩码，协助下方操作获取相应的数值

MASK0 制作0的掩码，协助下方操作设置相应的数值

GET_OPCODE 获取opcode

SET_OPCODE 设置opcode

getarg 获取参数方法，协助下方获取参数操作

setarg 设置参数方法，协助下方设置参数操作

GETARG_A

SETARG_A

……

这一部分就是相关的参数操作定义了

CREATE_ABC

这一部分是用来创建各个mode组合的指令，创建好之后就可以设置参数了！

(f)操作码分类校验

* opmode校验

{iABC,iABx,iAsBx,iAx}OpMode分为这四个种类

主要是用来给指令分类别，好对应相应的解析执行操作

这部分的讲解请参见(i)部分

* RK校验

由于lua中会需要将常量加载到寄存器中，这个时候我们就需要校验一下参数是常量的缩影还是寄存器的索引

lua源码中，在操作码枚举声明后的注释也说明了所有操作码对应的操作：

其中：

R()表示这是一个寄存器索引

RK()表示这有可能是一个寄存器索引，也可能是一个常量索引，由参数的高位来决定是哪一种

Kst()表示这是一个常量索引

BITRK 如果得到的是1 表示常量，0 表示寄存器

ISK() 检测是不是constant，也就是是不是常量

INDEXK() 获取常量的index

MAXINDEXRK() 最大数值（limit）

RKASK 把一个常量的索引标记成RK类型，也就是对高位表示进行处理

NO_REG 这里表示没有用到这个参数

(j)OpCode 指令枚举

typedef enum {
/*----------------------------------------------------------------------
name argsdescription
------------------------------------------------------------------------*/
OP_MOVE,/* A B R(A) := R(B)*/
OP_LOADK,/* A Bx R(A) := Kst(Bx)*/
OP_LOADKX,/* A R(A) := Kst(extra arg)*/
OP_LOADBOOL,/*A B C R(A) := (Bool)B; if (C) pc++*/
OP_LOADNIL,/* A B R(A), R(A+1), ..., R(A+B) := nil*/
OP_GETUPVAL,/*A B R(A) := UpValue[B]*/

OP_GETTABUP,/*A B C R(A) := UpValue[B][RK(C)]*/
OP_GETTABLE,/*A B C R(A) := R(B)[RK(C)]*/

OP_SETTABUP,/*A B C UpValue[A][RK(B)] := RK(C)*/
OP_SETUPVAL,/*A B UpValue[B] := R(A)*/
OP_SETTABLE,/*A B C R(A)[RK(B)] := RK(C)*/

OP_NEWTABLE,/*A B C R(A) := {} (size = B,C)*/

OP_SELF,/* A B C R(A+1) := R(B); R(A) := R(B)[RK(C)]*/

OP_ADD,/* A B C R(A) := RK(B) + RK(C)*/
OP_SUB,/* A B C R(A) := RK(B) - RK(C)*/
OP_MUL,/* A B C R(A) := RK(B) * RK(C)*/
OP_DIV,/* A B C R(A) := RK(B) / RK(C)*/
OP_MOD,/* A B C R(A) := RK(B) % RK(C)*/
OP_POW,/* A B C R(A) := RK(B) ^ RK(C)*/
OP_UNM,/* A B R(A) := -R(B)*/
OP_NOT,/* A B R(A) := not R(B)*/
OP_LEN,/* A B R(A) := length of R(B)*/

OP_CONCAT,/* A B C R(A) := R(B).. ... ..R(C)*/

OP_JMP,/* A sBx pc+=sBx; if (A) close all upvalues >= R(A) + 1*/
OP_EQ,/* A B C if ((RK(B) == RK(C)) ~= A) then pc++*/
OP_LT,/* A B C if ((RK(B) < RK(C)) ~= A) then pc++*/
OP_LE,/* A B C if ((RK(B) <= RK(C)) ~= A) then pc++*/

OP_TEST,/* A C if not (R(A) <=> C) then pc++*/
OP_TESTSET,/* A B C if (R(B) <=> C) then R(A) := R(B) else pc++*/

OP_CALL,/* A B C R(A), ... ,R(A+C-2) := R(A)(R(A+1), ... ,R(A+B-1)) */
OP_TAILCALL,/*A B C return R(A)(R(A+1), ... ,R(A+B-1))*/
OP_RETURN,/* A B return R(A), ... ,R(A+B-2)(see note) */

OP_FORLOOP,/* A sBx R(A)+=R(A+2);
if R(A) <?= R(A+1) then { pc+=sBx; R(A+3)=R(A) }*/
OP_FORPREP,/* A sBx R(A)-=R(A+2); pc+=sBx*/

OP_TFORCALL,/*A C R(A+3), ... ,R(A+2+C) := R(A)(R(A+1), R(A+2));*/
OP_TFORLOOP,/*A sBx if R(A+1) ~= nil then { R(A)=R(A+1); pc += sBx }*/

OP_SETLIST,/* A B C R(A)[(C-1)*FPF+i] := R(A+i), 1 <= i <= B*/

OP_CLOSURE,/* A Bx R(A) := closure(KPROTO[Bx])*/

OP_VARARG,/* A B R(A), R(A+1), ..., R(A+B-2) = vararg*/

OP_EXTRAARG/* Ax extra (larger) argument for previous opcode*/
} OpCode;

#define NUM_OPCODES(cast(int, OP_EXTRAARG) + 1)

这里定义了指令的数量，调整这里的舜玺，可以调整opcode码，做到一定的操作码混淆，有一定的加密功能

(h)OpArgMask

enum OpArgMask {
OpArgN, 没有用到的参数
OpArgU, 在使用的参数
OpArgR, 寄存器参数或者跳转
OpArgK R或者RK
};

这个是用来协助操作码解析的，分类参数类型，保证最高效率解析指令

(i)Opmode指令解析方法

LUAI_DDEF const lu_byte luaP_opmodes[NUM_OPCODES] = {
/* T A B C mode opcode*/
opmode(0, 1, OpArgR, OpArgN, iABC) /* OP_MOVE */
,opmode(0, 1, OpArgK, OpArgN, iABx) /* OP_LOADK */
,opmode(0, 1, OpArgN, OpArgN, iABx) /* OP_LOADKX */
,opmode(0, 1, OpArgU, OpArgU, iABC) /* OP_LOADBOOL */
,opmode(0, 1, OpArgU, OpArgN, iABC) /* OP_LOADNIL */
,opmode(0, 1, OpArgU, OpArgN, iABC) /* OP_GETUPVAL */
,opmode(0, 1, OpArgU, OpArgK, iABC) /* OP_GETTABUP */
,opmode(0, 1, OpArgR, OpArgK, iABC) /* OP_GETTABLE */
,opmode(0, 0, OpArgK, OpArgK, iABC) /* OP_SETTABUP */
,opmode(0, 0, OpArgU, OpArgN, iABC) /* OP_SETUPVAL */
,opmode(0, 0, OpArgK, OpArgK, iABC) /* OP_SETTABLE */
,opmode(0, 1, OpArgU, OpArgU, iABC) /* OP_NEWTABLE */
,opmode(0, 1, OpArgR, OpArgK, iABC) /* OP_SELF */
,opmode(0, 1, OpArgK, OpArgK, iABC) /* OP_ADD */
,opmode(0, 1, OpArgK, OpArgK, iABC) /* OP_SUB */
,opmode(0, 1, OpArgK, OpArgK, iABC) /* OP_MUL */
,opmode(0, 1, OpArgK, OpArgK, iABC) /* OP_DIV */
,opmode(0, 1, OpArgK, OpArgK, iABC) /* OP_MOD */
,opmode(0, 1, OpArgK, OpArgK, iABC) /* OP_POW */
,opmode(0, 1, OpArgR, OpArgN, iABC) /* OP_UNM */
,opmode(0, 1, OpArgR, OpArgN, iABC) /* OP_NOT */
,opmode(0, 1, OpArgR, OpArgN, iABC) /* OP_LEN */
,opmode(0, 1, OpArgR, OpArgR, iABC) /* OP_CONCAT */
,opmode(0, 0, OpArgR, OpArgN, iAsBx) /* OP_JMP */
,opmode(1, 0, OpArgK, OpArgK, iABC) /* OP_EQ */
,opmode(1, 0, OpArgK, OpArgK, iABC) /* OP_LT */
,opmode(1, 0, OpArgK, OpArgK, iABC) /* OP_LE */
,opmode(1, 0, OpArgN, OpArgU, iABC) /* OP_TEST */
,opmode(1, 1, OpArgR, OpArgU, iABC) /* OP_TESTSET */
,opmode(0, 1, OpArgU, OpArgU, iABC) /* OP_CALL */
,opmode(0, 1, OpArgU, OpArgU, iABC) /* OP_TAILCALL */
,opmode(0, 0, OpArgU, OpArgN, iABC) /* OP_RETURN */
,opmode(0, 1, OpArgR, OpArgN, iAsBx) /* OP_FORLOOP */
,opmode(0, 1, OpArgR, OpArgN, iAsBx) /* OP_FORPREP */
,opmode(0, 0, OpArgN, OpArgU, iABC) /* OP_TFORCALL */
,opmode(0, 1, OpArgR, OpArgN, iAsBx) /* OP_TFORLOOP */
,opmode(0, 0, OpArgU, OpArgU, iABC) /* OP_SETLIST */
,opmode(0, 1, OpArgU, OpArgN, iABx) /* OP_CLOSURE */
,opmode(0, 1, OpArgU, OpArgN, iABC) /* OP_VARARG */
,opmode(0, 0, OpArgU, OpArgU, iAx) /* OP_EXTRAARG */
};

这里定义了全部的操作指令的定义，

#define opmode(t,a,b,c,m) (((t)<<7) | ((a)<<6) | ((b)<<4) | ((c)<<2) | (m))

在这里针对opmode进行调整编码顺序，例如t,a,b,c书序或者便宜量，可以起到一定的加密作用

接下来，通过

getOpMode 获取指令Mode，通过OpMode处理获取

getBMode 获取B的类型，通过OpArgMask处理获取

getCMode 获取C 的类型，通过OpArgMask处理获取

getAMode

testAMode 第六位（lua里面默认的设置，上面的a），用来判断当前指令是否会修改寄存器A

testTmode 第七位，用来判断当前指令是否涉及一次条件跳转，增加这个标记可以用来检测分支指令这一个打的类别，这样简化了指令集，当遇到跳转指令的时候，可以回到前一条指令来看看那是否是条件跳转

#define LFIELDS_PER_FLUSH50

(j)op顺序调整加密

需要调整的位置

lopcodes.h enum{}opCode

lopcodes.c LUAI_DDEF const char *const luaP_opnames[NUM_OPCODES+1],这里也需要调整顺序