Crawler reverse - AST de-obfuscation

Table of contents

Online Deobfuscation Tool

obfuscation technique

The meaning of AST

Common confusion reduction


Online Deobfuscation Tool

obfuscation technique

For web pages, the logic depends on js to achieve, js has the following characteristics:

  • js runs on the client and must be loaded and run in the client's browser
  • The js code is open and transparent, and the browser can obtain the running js source code

Based on these two reasons, js code is extremely insecure, and anyone can read, analyze, copy, or even tamper with it, so various obfuscation techniques appear

  • Variable obfuscation randomizes meaningful variable names, method names, and constant names into meaningless garbled strings, reducing code readability, such as converting them into single characters or hexadecimal strings.

  • String obfuscation puts strings in an array and stores them in MD5 or Base64 encryption, so that no plaintext strings appear in the code, so that you can avoid using the global search string to locate the entry point.

  • Attribute encryption encrypts and transforms the attributes of JavaScript objects to hide the calling relationship between codes.

  • Control flow flattening disrupts the original code execution flow and function call relationship of the function, making the code logic chaotic.

  • Zombie code randomly inserts useless zombie code, zombie functions in the code to further clutter the code.

  • Debugging protection is based on debugger features, checks the current running environment, and adds some mandatory debugger statements to make it difficult to execute JavaScript code smoothly in debug mode.

  • Polymorphic mutation makes the JavaScript code automatically mutate immediately every time it is called, and changes it into a completely different code from before, that is, the function is completely unchanged, but the code form is mutated, so as to prevent the code from being dynamically analyzed and debugged.

  • Lock the domain name so that JavaScript code can only be executed under the specified domain name.

  • Anti-formatting If the JavaScript code is formatted, it cannot be executed, causing the browser to freeze.

  • Special encodings completely encode JavaScript into unreadable code, such as emoji, special presentation content, and so on.

Is there a way to restore the obfuscated code back to readable code? The answer is yes, namely AST. URL: AST explorer

AST(Abstract Syntax Tree), translated as an abstract syntax tree, we can ASTmodify the code efficiently and accurately with the help of a machine through a series of operations on the tree nodes. It is not unique to a certain programming language, almost all programming languages ​​​​have syntax Tree.

The meaning of AST

For crawler engineers, it cannot help you find the specific location of encrypted parameters, but you can use it to deobfuscate the obfuscated code and replace it in the browser, so that you can easily find the generation logic of encrypted parameters.

De-obfuscation --> use of babel library

Install npm install @babel/core --save-dev

Babel is a JavaScript compiler, which can also be said to be a parsing library. Babel has many built-in methods for analyzing JavaScript code. We can use Babel to convert js code into AST syntax tree, and then add, delete, modify, check and other operations, and then convert it into JavaScript code. Babel contains a lot of various function packages, APIs, optional parameters of each method, etc. In the actual use process, you should check the official document URL: Document

In reverse deobfuscation, the following functional packages of Babel are mainly used

  • @babel/core: The Babel compiler itself provides the babel compilation API;
  • @babel/parser: parse the JavaScript code into an AST syntax tree;
  • @babel/traverse: Traverse and modify each node of the AST syntax tree;
  • @babel/generator: Restore AST to JavaScript code;
  • @babel/types: Judging and verifying the type of nodes, building new AST nodes, etc.

Common confusion reduction

const generator = require('@babel/generator').default    //将 AST 还原成 JavaScript 代码
const parser = require("@babel/parser");     // 编译成语法树
const traverse = require("@babel/traverse");  //对语法树进行操作
const types = require("@babel/types");  //判断、验证节点的类型、构建新 AST 节点等
var fs =require("fs");  //读取文件

js = fs.readFileSync('mfw.js',{encoding:'utf-8'})
let ast = parser.parse(js);
function writeFile(code) {
    console.log("Write start\\n");
    fs.writeFile(file_out, code, function (err) {
        if (err) {
            return console.error(err);
        }
    });
    console.log("Write finish\\n");
}


visitor1={
    "Program"(path){
        var body =path.get('body.0');
        var node =body.node;
        var args=node.expression.argument;
        if(args==undefined)return;
        var params=args.callee.params;
        var paramsvalue=args.arguments;
        var name,valuelist;
        for(var i=0;i<params.length;i++){
            name=params[i].name;
            valuelist=paramsvalue[i].elements;
            body.traverse({
                MemberExpression(_path){
                    var _node=_path.node;
                    var _name=_node.object.name;
                    if(!types.isNumericLiteral(_node.property))return;
                    var _value=_node.property.value;
                    if(name==_name){
                        if(valuelist[_value]==undefined)return;
                        if(valuelist[_value].value==undefined)return;
                        rvalue=valuelist[_value].value;
                        switch(typeof rvalue){
                            case "string":
                                _path.replaceWith(types.StringLiteral(rvalue));
                                break;
                            case "number":
                                _path.replaceWith(types.NumericLiteral(rvalue));
                                break;
                        }
                    }
                }
            });
        

        }
        
    }
}
const visitor2={
    VariableDeclarator(path)
        {
        const {id,init}=path.node;
        if(!types.isLiteral(init))return;
        const binding=path.scope.getBinding(id.name);
        if(binding.constantViolations.length===0)
            {
                for(const refer_path of binding.referencePaths)
                    {
                    refer_path.replaceInline(init);
                    }
                //path.remove();
            }
        }
}
replaceliteral=function(path,value){
    switch(typeof value){
        case 'boolean':
            path.replaceWith(types.booleanLiteral(value));
            break;
        case 'number':
            path.replaceWith(types.NumericLiteral(value));
            break;
        case 'string':
            path.replaceWith(types.stringLiteral(value));
            break;
        default:
            break;
    }
}
const visitor3={
    "UnaryExpression|BinaryExpression|CallExpression|ConditionalExpression":{
        enter:function(path){
            const{value}=path.evaluate();
            replaceliteral(path,value);
        }

    }
}
const visitor4={
    "FunctionDeclaration"(path){
        let {id}=path.node;
        let code=path.toString();
        if(code.indexOf("try")!=-1 ||code.indexOf("random")!=-1||code.indexOf("Date")!=-1){
            return;
        }
        eval(code);
        let scope =path.scope;
        const binding = path.scope.parent.getBinding(id.name);
        let isdel=false;
        if(!binding || binding.constantViolations.length>0){
            return;
        }
        for(const refer_path of binding.referencePaths)
        {
            
            let call_express=refer_path.findParent(p=>p.isCallExpression());
            let arguments=call_express.get('arguments');
            let args=[];
            arguments.forEach(arg=>{args.push(arg.isLiteral())});
            if(args.length ===0 || args.indexOf("false")!=-1){
                continue;
            }
            try{
                let value= eval(call_express.toString());
                if(value==undefined)return;
                switch(typeof value){
                    case "string":
                        call_express.replaceWith(types.StringLiteral(value));
                        isdel=true;
                        break;
                    case "number":
                        call_express.replaceWith(types.NumericLiteral(value));
                        isdel=true;
                        break;
                }
                
            }catch(e){

            }
        }
        if(isdel){
            //path.remove();
        }
    
    }
}
const visitor5={
    "StringLiteral|NumericLiteral"(path){
        delete path.node.extra;
    }
}
const visitor6={
    "CallExpression"(path){
        var node =path.node;
        var code=path.toString();
        var value;
        if(!node.arguments.length>0)return;
        if(!types.isLiteral(node.arguments[0]))return;
        if(code.indexOf("Time")!=-1)return;
        try{
            value=eval("value="+code);
            
        }catch(e){

        }
        if(value==undefined)return;
        switch(typeof value){
            case "string":
                path.replaceWith(types.StringLiteral(value));
                break;
            case "number":
                path.replaceWith(types.NumericLiteral(value));
                break;
            case "boolean":
                path.replaceWith(types.BooleanLiteral(value));
                break;
        }
        
    }
}


traverse.default(ast,visitor1);
traverse.default(ast,visitor2);
traverse.default(ast,visitor3);
traverse.default(ast,visitor4);
traverse.default(ast,visitor5);
traverse.default(ast,visitor6);


const {code} = generator(ast,opts = {"comments":false},js);

fs.writeFile('mfw_decode.js', code, (err)=>{});



Guess you like

Origin blog.csdn.net/xmx_000/article/details/131152456