Fishhook原理(源码解读)

用到技术
利用dyld相关接口,我们可以注册image装载的监听方法:
extern void _dyld_register_func_for_add_image(void (func)(const struct mach_header mh, intptr_t vmaddr_slide));
调用_dyld_register_func_for_add_image注册监听方法后,当前已经装载的image(动态库等)会立刻触发回调,
之后的image会在装载的时候触发回调。
dyld在装载的时候,会对符号进行bind,而fishhook则会在回调函数中进行rebind。

hook过程
1.先找到SEG_LINKEDIT加载命令
2.获取它加载后在内存中的基址 linkedit_base
3.通过基址找到三个表基址 间接符号表 符号表 字符串表
4.找到要 hook中函数符号
5.先保存,再修改为指向我们自己的符号
fishhook.h

#ifndef fishhook_h
#define fishhook_h

#include <stddef.h>
#include <stdint.h>
#include <mach-o/nlist.h>

#if !defined(FISHHOOK_EXPORT)
#define FISHHOOK_VISIBILITY __attribute__((visibility("hidden")))
#else
#define FISHHOOK_VISIBILITY __attribute__((visibility("default")))
#endif

#ifdef __cplusplus
extern "C" {
#endif //__cplusplus

struct rebinding {
  const char *name; //字符串名称
  void *replacement;//替换后的方法
  void **replaced;  //原始的方法(通常要存储下来,在替换后的方法里调用)
};
//在__DATA段中,有两个Sections和动态符号绑定有关:
//
//__nl_symbol_ptr 存储了non-lazily绑定的符号,这些符号在mach-o加载的时候绑定。
//__la_symbol_ptr 存储了lazy绑定的符号(方法),这些方法在第一调用的时候,
//由dyld_stub_binder来绑定,所以你会看到,每个mach-o的non-lazily绑定符号都有dyld_stub_binder。
//两个参数分别是rebinding结构体数组,以及数组的长度
//实现手动绑定函数
FISHHOOK_VISIBILITY
int rebind_symbols(struct rebinding rebindings[], size_t rebindings_nel);

/*
 * Rebinds as above, but only in the specified image. The header should point
 * to the mach-o header, the slide should be the slide offset. Others as above.
 */
FISHHOOK_VISIBILITY
int rebind_symbols_image(void *header,
                         intptr_t slide,
                         struct rebinding rebindings[],
                         size_t rebindings_nel);

#ifdef __cplusplus
}
#endif //__cplusplus

#endif //fishhook_h

fishhook.c


//用到技术
//利用dyld相关接口,我们可以注册image装载的监听方法:
//extern void _dyld_register_func_for_add_image(void (*func)(const struct mach_header* mh, intptr_t vmaddr_slide));
//调用_dyld_register_func_for_add_image注册监听方法后,当前已经装载的image(动态库等)会立刻触发回调,
//之后的image会在装载的时候触发回调。
//dyld在装载的时候,会对符号进行bind,而fishhook则会在回调函数中进行rebind。

//hook过程
//1.先找到SEG_LINKEDIT加载命令
//2.获取它加载后在内存中的基址 linkedit_base
//3.通过基址找到三个表基址  间接符号表  符号表  字符串表
//4.找到要 hook中函数符号
//5.先保存,再修改为指向我们自己的符号
#include "fishhook.h"
#include <dlfcn.h>
#include <stdlib.h>
#include <string.h>
#include <sys/types.h>
#include <mach-o/dyld.h>
#include <mach-o/loader.h>
#include <mach-o/nlist.h>
#ifdef __LP64__
typedef struct mach_header_64 mach_header_t;
typedef struct segment_command_64 segment_command_t;
typedef struct section_64 section_t;
typedef struct nlist_64 nlist_t;
#define LC_SEGMENT_ARCH_DEPENDENT LC_SEGMENT_64
#else
typedef struct mach_header mach_header_t;
typedef struct segment_command segment_command_t;
typedef struct section section_t;
typedef struct nlist nlist_t;
#define LC_SEGMENT_ARCH_DEPENDENT LC_SEGMENT
#endif

#ifndef SEG_DATA_CONST
#define SEG_DATA_CONST  "__DATA_CONST"
#endif
//用于保护要 hook的符号(rebind_symbols传入的参数)
//每次调用,就会在链表的头部插入一个节点
struct rebindings_entry {
  struct rebinding *rebindings;   //hook结点
  size_t rebindings_nel;          //大小
  struct rebindings_entry *next;  //下一个
};
//链表头结点
static struct rebindings_entry *_rebindings_head;
////往链表的头部插入一个节点
static int prepend_rebindings(struct rebindings_entry **rebindings_head,
                              struct rebinding rebindings[],
                              size_t nel) {
    //申请结点空间
  struct rebindings_entry *new_entry = (struct rebindings_entry *) malloc(sizeof(struct rebindings_entry));
  if (!new_entry) {
    return -1;
  }//再申请结点指向的空间
  new_entry->rebindings = (struct rebinding *) malloc(sizeof(struct rebinding) * nel);
  if (!new_entry->rebindings) {
    free(new_entry);
    return -1;
  }
  //拷贝
  memcpy(new_entry->rebindings, rebindings, sizeof(struct rebinding) * nel);
  //结点指向改变
  new_entry->rebindings_nel = nel;
  //新的结点下一个指向头结点
  new_entry->next = *rebindings_head;
  //保存结点
  *rebindings_head = new_entry;
  return 0;
}
//进行section中的symbol rebind
//symtab_cmd = symtab_command
//nlist_t *symtab = (nlist_t *)(linkedit_base + symtab_cmd->symoff);
//char *strtab = (char *)(linkedit_base + symtab_cmd->stroff);
//dysymtab_cmd=dysymtab_command
//uint32_t *indirect_symtab = (uint32_t *)(linkedit_base + dysymtab_cmd->indirectsymoff);
//slide 为RVA
//uintptr_t linkedit_base = (uintptr_t)slide + linkedit_segment->vmaddr - linkedit_segment->fileoff;
//进行section中的symbol rebind
static void perform_rebinding_with_section(
        //结点链表
        struct rebindings_entry *rebindings,
        //节 Symbol Table
        section_t *section,
        //偏移
        intptr_t slide,
        //符号表
        nlist_t *symtab,
        //字符串表
        char *strtab,
        //间接寻址符号表
        uint32_t *indirect_symtab) {
    //读取indirect table中的数据(uint32_t)的数组   Indirect Symbol Table
    //ndirect_symtab 是动态符号表的地址,表中包含动态符号在符号表中的索引。
    //那么 section->reserved1 又是代表什么呢?这里的 section 实际上是指 __DATA 段
    //中包含 __la_symbol_ptr 以及 __nl_symbol_ptr 的 section,它们会在 reserved1
    //字段中记录自身所包含的动态符号在 indirect_symtab 的起始索引,因此通过
    //indirect_symbol_indices 便可以得到 section 所包含的动态符号在符号表中的索引信息。
    //indirect_symbol_bindings 则是代表程序偏移后的相关 section 的虚拟地址,fishhook
    //会在其中寻找指向目标动态符号的指针,然后将其指向我们自己的符号。
  uint32_t *indirect_symbol_indices = indirect_symtab + section->reserved1;
  // VA
  void **indirect_symbol_bindings = (void **)((uintptr_t)slide + section->addr);
  //遍历indirect table
  for (uint i = 0; i < section->size / sizeof(void *); i++) {
      //找到符号在Indrect Symbol Table表中的值
    uint32_t symtab_index = indirect_symbol_indices[i];
    if (symtab_index == INDIRECT_SYMBOL_ABS || symtab_index == INDIRECT_SYMBOL_LOCAL ||
        symtab_index == (INDIRECT_SYMBOL_LOCAL   | INDIRECT_SYMBOL_ABS)) {
      continue;
    }
    //接着去symbol table里面找到符号的值,进一步获取到符号在String Table的名字。
    //以symtab_index作为下标,访问symbol table  n_strx为string table中的下标
    uint32_t strtab_offset = symtab[symtab_index].n_un.n_strx;
    //获取到symbol_name  偏移量+String Table的基础偏移量
    char *symbol_name = strtab + strtab_offset;
    bool symbol_name_longer_than_1 = symbol_name[0] && symbol_name[1];
    //遍历链表,一个个hook
    struct rebindings_entry *cur = rebindings;
    while (cur) {
        //每一个链表的结点包括一个hook的C数组
      for (uint j = 0; j < cur->rebindings_nel; j++) {
        if (symbol_name_longer_than_1 &&
            strcmp(&symbol_name[1], cur->rebindings[j].name) == 0) {//如果名称一致
            //如果没有被替换,并且数据合法,则进行替换 指向我们自己的符号
          if (cur->rebindings[j].replaced != NULL &&
              indirect_symbol_bindings[i] != cur->rebindings[j].replacement) {
              //把函数原来的地址保存起来
            *(cur->rebindings[j].replaced) = indirect_symbol_bindings[i];
          }
            //将新函数的地址设置上
          indirect_symbol_bindings[i] = cur->rebindings[j].replacement;
          goto symbol_loop;
        }
      }
      cur = cur->next;
    }
  symbol_loop:;
  }
}

static void rebind_symbols_for_image(struct rebindings_entry *rebindings,
                                     const struct mach_header *header,
                                     intptr_t slide) {
  Dl_info info;
  if (dladdr(header, &info) == 0) {
    return;
  }

  segment_command_t *cur_seg_cmd;
  segment_command_t *linkedit_segment = NULL;
  struct symtab_command* symtab_cmd = NULL;
  struct dysymtab_command* dysymtab_cmd = NULL;
    //SEG_LINKEDIT这个段非常重要
    //Indirect Symbol Table、Symbol Table、String Table的地址都要基于它获取
  uintptr_t cur = (uintptr_t)header + sizeof(mach_header_t);
  //第一次遍历找出三个表的基址
  for (uint i = 0; i < header->ncmds; i++, cur += cur_seg_cmd->cmdsize) {
    cur_seg_cmd = (segment_command_t *)cur;
    //找到SEG_LINKEDIT段
    if (cur_seg_cmd->cmd == LC_SEGMENT_ARCH_DEPENDENT) {
      if (strcmp(cur_seg_cmd->segname, SEG_LINKEDIT) == 0) {
        linkedit_segment = cur_seg_cmd;
      }
    } else if (cur_seg_cmd->cmd == LC_SYMTAB) {
    //struct symtab_command {
    //    uint32_t  cmd;        /* LC_SYMTAB */
    //    uint32_t  cmdsize;    /* sizeof(struct symtab_command) */
    //    uint32_t  symoff;     /* symbol table offset */
    //    uint32_t  nsyms;      /* number of symbol table entries */
    //    uint32_t  stroff;     /* string table offset */
    //    uint32_t  strsize;    /* string table size in bytes */
    //};
      symtab_cmd = (struct symtab_command*)cur_seg_cmd;
    } else if (cur_seg_cmd->cmd == LC_DYSYMTAB) {
      //重要字段extrefsymoff   file offset to the indirect symbol table
      dysymtab_cmd = (struct dysymtab_command*)cur_seg_cmd;
    }
  }

  if (!symtab_cmd || !dysymtab_cmd || !linkedit_segment ||
      !dysymtab_cmd->nindirectsyms) {
    return;
  }

//因为地址空间加载随机化的缘故,系统在加载程序时,会在其原有的地址空间上进行偏移操作,
//而这个 slide 正是偏移的大小,所以 linkedit_base 代表的是程序被加载后的基地址
  uintptr_t linkedit_base = (uintptr_t)slide + linkedit_segment->vmaddr - linkedit_segment->fileoff;
  //linkedit_base+symtab_cmd->symoff是Symbol Table的位置
  nlist_t *symtab = (nlist_t *)(linkedit_base + symtab_cmd->symoff);
  //linkedit_base+symtab_cmd->stroff是String Table的位置  算的是文件中的基址
  char *strtab = (char *)(linkedit_base + symtab_cmd->stroff);

  //获取indriect table的数据(uint32_t类型的数组)
  uint32_t *indirect_symtab = (uint32_t *)(linkedit_base + dysymtab_cmd->indirectsymoff);
  //加载命令基址
  cur = (uintptr_t)header + sizeof(mach_header_t);
  //再一次遍历
  //遍历 all 段(加载命令)
  for (uint i = 0; i < header->ncmds; i++, cur += cur_seg_cmd->cmdsize) {
      //下一个 cmd
    cur_seg_cmd = (segment_command_t *)cur;
    if (cur_seg_cmd->cmd == LC_SEGMENT_ARCH_DEPENDENT) {
        //找到DATA和DATA_CONST segment
      if (strcmp(cur_seg_cmd->segname, SEG_DATA) != 0 &&
          strcmp(cur_seg_cmd->segname, SEG_DATA_CONST) != 0) {
        continue;
      }
      //遍历 all节
      for (uint j = 0; j < cur_seg_cmd->nsects; j++) {
          //找到__nl_symbol_ptr和__la_symbol_ptr这两个section
        section_t *sect =
          (section_t *)(cur + sizeof(segment_command_t)) + j;
        if ((sect->flags & SECTION_TYPE) == S_LAZY_SYMBOL_POINTERS) {
            //进行section中的symbol rebind
          perform_rebinding_with_section(rebindings, sect, slide, symtab, strtab, indirect_symtab);
        }
        if ((sect->flags & SECTION_TYPE) == S_NON_LAZY_SYMBOL_POINTERS) {
            //进行section中的symbol rebind
            //symtab_cmd = symtab_command
            //nlist_t *symtab = (nlist_t *)(linkedit_base + symtab_cmd->symoff);
            //char *strtab = (char *)(linkedit_base + symtab_cmd->stroff);
            //dysymtab_cmd=dysymtab_command
            //uint32_t *indirect_symtab = (uint32_t *)(linkedit_base + dysymtab_cmd->indirectsymoff);
            //slide 为RVA
            //uintptr_t linkedit_base = (uintptr_t)slide + linkedit_segment->vmaddr - linkedit_segment->fileoff;
          perform_rebinding_with_section(rebindings, sect, slide, symtab, strtab, indirect_symtab);
        }
      }
    }
  }
}
//完成动态库的binding之后,会回调这个函数。
//其中slide跟ALSR(Address space layout randomization)有关系,是一个随机的加载地址。
static void _rebind_symbols_for_image(const struct mach_header *header,
                                      intptr_t slide) {
    rebind_symbols_for_image(_rebindings_head, header, slide);
}
//绑定符号
int rebind_symbols_image(void *header,
                         intptr_t slide,
                         struct rebinding rebindings[],
                         size_t rebindings_nel) {
    struct rebindings_entry *rebindings_head = NULL;
    int retval = prepend_rebindings(&rebindings_head, rebindings, rebindings_nel);
    rebind_symbols_for_image(rebindings_head, (const struct mach_header *) header, slide);
    if (rebindings_head) {
      free(rebindings_head->rebindings);
    }
    free(rebindings_head);
    return retval;
}
//手动绑定
int rebind_symbols(struct rebinding rebindings[], size_t rebindings_nel) {
  int retval = prepend_rebindings(&_rebindings_head, rebindings, rebindings_nel);
  if (retval < 0) {
    return retval;
  }
  // If this was the first call, register callback for image additions (which is also invoked for
  // existing images, otherwise, just run on existing images
  if (!_rebindings_head->next) {
      //注册image装载的监听方法
      //当前已经装载的image(动态库等)会立刻触发回调,之后的image会在装载的时候触发回调。
      //dyld在装载的时候,会对符号进行bind,而fishhook则会在回调函数中进行rebind。
    _dyld_register_func_for_add_image(_rebind_symbols_for_image);
  } else {
    uint32_t c = _dyld_image_count();
    for (uint32_t i = 0; i < c; i++) {
        //启动之后也可以做函数替换
      _rebind_symbols_for_image(_dyld_get_image_header(i), _dyld_get_image_vmaddr_slide(i));
    }
  }
  return retval;
}

参考:
https://www.jianshu.com/p/4fa4dd917682
https://blog.csdn.net/Hello_Hwc/article/details/78444203?locationNum=3&fps=1
https://www.aliyun.com/jiaocheng/356052.html

猜你喜欢

转载自blog.51cto.com/haidragon/2142616
今日推荐