fishhook原理

iOS使用fishhook

iOS开发中有时会需要交换两个方法的实现,也就是Method Swizzle,这种功能依赖Objective-C的动态特性实现。对于没有动态性的编程语言比如C语言来说,能不能交换两个方法的实现呢?fishhook开源库可以帮助我们交换两个C函数实现,看下它的基本使用方法。

引入fishhook源码到项目中,在项目中写入以下代码


static void(*sysNSLog)(NSString *format, ...);

void myNSLog(NSString *format, ...){
    
    
    format = [NSString stringWithFormat:@"NSLog modified, %@", format];
    //调用原有实现
    sysNSLog(format);
}


- (void)viewDidLoad {
    
    
    [super viewDidLoad];NSLog(@"nslog test");

    struct rebinding ns}log;
    nslog.name = "NSLog";                //替换的函数名
    nslog.replacement = myNSLog;         //新函数地址
    nslog.replaced = (void *)&sysNSLog;  //替换的函数指针
       
    struct rebinding arr[1] = {
    
    nslog};
    rebind_symbols(arr, 1);
    NSLog(@"nslog test");
}

每个被替换的函数name,replaced需要赋值,新函数地址存放在replacement字段。将这些信息组装成结构体,并生成结构体数组调用fishhook的rebind_symbols完成绑定。

fishhook原理

在了解fishhook原理之前,需要了解Mach-O文件,可以看这篇文章。
Mach-O文件介绍

我们从fishhook源码入手,先看入口函数。

int rebind_symbols(struct rebinding rebindings[], size_t rebindings_nel) {
    
    
  int retval = prepend_rebindings(&_rebindings_head, rebindings, rebindings_nel);
  if (retval < 0) {
    
    
    return retval;
  }
  // 首次调用的时候,增加回调,这个回调对于已经加载或者后序加载的image都会生效
  if (!_rebindings_head->next) {
    
    
    _dyld_register_func_for_add_image(_rebind_symbols_for_image);
  } else {
    
    
    uint32_t c = _dyld_image_count();
    for (uint32_t i = 0; i < c; i++) {
    
    
      _rebind_symbols_for_image(_dyld_get_image_header(i), _dyld_get_image_vmaddr_slide(i));
    }
  }
  return retval;
}

rebind_sysmbols函数调用了prepend_rebindings,prepend_rebindings函数内部把即将hook的函数添加到_rebindings_head这个链表里面来。rebind_sysmbols首先判断_rebindings_head->next是否为空,为空的话代表rebind_sysmbols首次调用,这个时候有可能出现部分image还没有加载的情况,如果直接获取现有的image列表获取不到,所以要注册image加载的回调,回调函数是_rebind_symbols_for_image。这样可以保证后序的image加载时会重新调用_rebind_symbols_for_image完成hook。

可以看到fishhook是遍历所有的image,逐个尝试去hook。在每个循环里面实际调用了rebind_symbols_for_image这个函数。

static void rebind_symbols_for_image(struct rebindings_entry *rebindings,
                                     const struct mach_header *header,
                                     intptr_t slide) {
    
    
  //校验image
  Dl_info info;
  if (dladdr(header, &info) == 0) {
    
    
    return;
  }

  segment_command_t *cur_seg_cmd;
  segment_command_t *linkedit_segment = NULL;
  struct symtab_command* symtab_cmd = NULL;
  struct dysymtab_command* dysymtab_cmd = NULL;

 //遍历load_command,找到symtab_cmd,symtab_cmd
  uintptr_t cur = (uintptr_t)header + sizeof(mach_header_t);
  for (uint i = 0; i < header->ncmds; i++, cur += cur_seg_cmd->cmdsize) {
    
    
    cur_seg_cmd = (segment_command_t *)cur;
    if (cur_seg_cmd->cmd == LC_SEGMENT_ARCH_DEPENDENT) {
    
    
      if (strcmp(cur_seg_cmd->segname, SEG_LINKEDIT) == 0) {
    
    
        linkedit_segment = cur_seg_cmd;
      }
    } else if (cur_seg_cmd->cmd == LC_SYMTAB) {
    
    
      symtab_cmd = (struct symtab_command*)cur_seg_cmd;
    } else if (cur_seg_cmd->cmd == LC_DYSYMTAB) {
    
    
      symtab_cmd = (struct dysymtab_command*)cur_seg_cmd;
    }
  }

  if (!symtab_cmd || !dysymtab_cmd || !linkedit_segment ||
      !dysymtab_cmd->nindirectsyms) {
    
    
    return;
  }

  // 找到基地址
  uintptr_t linkedit_base = (uintptr_t)slide + linkedit_segment->vmaddr - linkedit_segment->fileoff;
  //基地址加上符号表的偏移量,得到符号表的地址
  nlist_t *symtab = (nlist_t *)(linkedit_base + symtab_cmd->symoff);
  //symtab_cmd保存了字符串表的偏移量
  char *strtab = (char *)(linkedit_base + symtab_cmd->stroff);

  // 动态符号表的地址,动态符号表会索引到符号表
  uint32_t *indirect_symtab = (uint32_t *)(linkedit_base + dysymtab_cmd->indirectsymoff);
  // 返回到load_command起始地址
  cur = (uintptr_t)header + sizeof(mach_header_t);
  for (uint i = 0; i < header->ncmds; i++, cur += cur_seg_cmd->cmdsize) {
    
    
    cur_seg_cmd = (segment_command_t *)cur;
    if (cur_seg_cmd->cmd == LC_SEGMENT_ARCH_DEPENDENT) {
    
    
      //需要找到DATA段
      if (strcmp(cur_seg_cmd->segname, SEG_DATA) != 0 &&
          strcmp(cur_seg_cmd->segname, SEG_DATA_CONST) != 0) {
    
    
        continue;
      }
      for (uint j = 0; j < cur_seg_cmd->nsects; j++) {
    
    
        //segment里面存放的是section_t类型的数据
        section_t *sect =
          (section_t *)(cur + sizeof(segment_command_t)) + j;
        if ((sect->flags & SECTION_TYPE) == S_LAZY_SYMBOL_POINTERS) {
    
    
          perform_rebinding_with_section(rebindings, sect, slide, symtab, strtab, indirect_symtab);
        }
        if ((sect->flags & SECTION_TYPE) == S_NON_LAZY_SYMBOL_POINTERS) {
    
    
          perform_rebinding_with_section(rebindings, sect, slide, symtab, strtab, indirect_symtab);
        }
      }
    }
  }
}

rebind_symbols_for_image函数先用dladdr校验image,dladdr可以获取image的基本信息,包括image的地址和名称。rebind_symbols_for_image的核心目的在于获取符号表symtab的地址,和动态符号表indirect_symtab地址,以及字符串表strtab的地址。

我们知道load_command类似于DATA端的索引,如果要找到三个表的地址,需要找到对应的command,通过遍历commands,可以找到symtab_cmd和dysymtab_cmd,字符串表没有对应的command,字符串表的地址可以通过符号表间接得出。找到command之后,就可以通过计算得出三个表的地址。

在这段函数中,还有两个变量需要注意,一个是slide,一个是sect。slide是系统生成的随机偏移量,用来随机化程序的执行地址,提高安全性。sect是符号表或者动态符号表对应的数据段section,它保存了section的大小、地址、偏移量等信息。

最后执行的函数是perform_rebinding_with_section,这里执行了替换函数实现的操作。

static void perform_rebinding_with_section(struct rebindings_entry *rebindings,
                                           section_t *section,
                                           intptr_t slide,
                                           nlist_t *symtab,
                                           char *strtab,
                                           uint32_t *indirect_symtab) {
    
    
 //动态表序号数组
  uint32_t *indirect_symbol_indices = indirect_symtab + section->reserved1;
 //动态符号表指针,存放的是符号的执行地址
  void **indirect_symbol_bindings = (void **)((uintptr_t)slide + section->addr);

  for (uint i = 0; i < section->size / sizeof(void *); i++) {
    
    
    //动态表序号数组,获取到符号表的序号
    uint32_t symtab_index = indirect_symbol_indices[i];
    if (symtab_index == INDIRECT_SYMBOL_ABS || symtab_index == INDIRECT_SYMBOL_LOCAL ||
        symtab_index == (INDIRECT_SYMBOL_LOCAL   | INDIRECT_SYMBOL_ABS)) {
    
    
      continue;
    }
    //使用符号表的序号获取到字符串中的偏移
    uint32_t strtab_offset = symtab[symtab_index].n_un.n_strx;
    //符号名称
    char *symbol_name = strtab + strtab_offset;
    bool symbol_name_longer_than_1 = symbol_name[0] && symbol_name[1];
    struct rebindings_entry *cur = rebindings;
    while (cur) {
    
    
      for (uint j = 0; j < cur->rebindings_nel; j++) {
    
    
        if (symbol_name_longer_than_1 && strcmp(&symbol_name[1], cur->rebindings[j].name) == 0) {
    
    
          kern_return_t err;

          if (cur->rebindings[j].replaced != NULL && indirect_symbol_bindings[i] != cur->rebindings[j].replacement)
            *(cur->rebindings[j].replaced) = indirect_symbol_bindings[i];

          err = vm_protect (mach_task_self (), (uintptr_t)indirect_symbol_bindings, section->size, 0, VM_PROT_READ | VM_PROT_WRITE | VM_PROT_COPY);
          if (err == KERN_SUCCESS) {
    
    
            //替换函数的实现
            indirect_symbol_bindings[i] = cur->rebindings[j].replacement;
          }
          goto symbol_loop;
        }
      }
      cur = cur->next;
    }
  symbol_loop:;
  }
}

查找符号的过程大致是先找到动态符号表的索引,根据这个索引获取符号表的索引,根据符号表的索引在字符串表中获取符号的名称信息。如果字符串表中的名称和待替换实现的函数名称匹配,会发生函数地址的替换,从而完成了hook过程。

猜你喜欢

转载自blog.csdn.net/u011608357/article/details/127593199
今日推荐