这篇主要是介绍 Facebook 的开源库 fishhook 的原理和源码实现,需要了解 Mach-O 的相关知识,最好先阅读 Mach-O 文件探索,两篇结合来看效果更佳。
使用
首先写一个 demo 来使用 fishhook,这个 demo 来 hook 系统函数 printf
,让它始终打印 damon
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20
| #import <stdio.h> #import "fishhook.h"
static int (*original_printf)(const char * __restrict, ...);
int new_printf(const char * __restrict s) { original_printf("damon"); return 0; }
int main(int argc, char * argv[]) { struct rebinding printf_rebinding = { "printf", new_printf, (void *)&original_printf }; rebind_symbols((struct rebinding[1]){ printf_rebinding }, 1); printf("123"); return 0; }
|
在 demo 中,我们使用了 rebinding
结构体:
1 2 3 4 5
| struct rebinding { const char *name; void *replacement; void **replaced; };
|
然后调用了 rebind_symbols
函数,这个函数的第一个参数是 rebinding
结构体数组,第二个参数是数组的长度。
代码实现
rebind_symbols
看一下 rebind_symbols
的方法实现:
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22
| int rebind_symbols(struct rebinding rebindings[], size_t rebindings_nel) {
int retval = prepend_rebindings(&_rebindings_head, rebindings, rebindings_nel); if (retval < 0) { return retval; }
if (!_rebindings_head->next) { _dyld_register_func_for_add_image(_rebind_symbols_for_image); } else { uint32_t c = _dyld_image_count(); for (uint32_t i = 0; i < c; i++) { _rebind_symbols_for_image(_dyld_get_image_header(i), _dyld_get_image_vmaddr_slide(i)); } } return retval; }
|
rebind_symbols_for_image
触发回调后,会跳用 _rebind_symbols_for_image
来进行重绑定,看一下实现:
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130
| static void _rebind_symbols_for_image(const struct mach_header *header, intptr_t slide) { rebind_symbols_for_image(_rebindings_head, header, slide); }
static void rebind_symbols_for_image(struct rebindings_entry *rebindings, const struct mach_header *header, intptr_t slide) {
segment_command_t *cur_seg_cmd; segment_command_t *linkedit_segment = NULL; struct symtab_command* symtab_cmd = NULL; struct dysymtab_command* dysymtab_cmd = NULL;
uintptr_t cur = (uintptr_t)header + sizeof(mach_header_t);
for (uint i = 0; i < header->ncmds; i++, cur += cur_seg_cmd->cmdsize) {
cur_seg_cmd = (segment_command_t *)cur;
if (cur_seg_cmd->cmd == LC_SEGMENT_ARCH_DEPENDENT) {
if (strcmp(cur_seg_cmd->segname, SEG_LINKEDIT) == 0) { linkedit_segment = cur_seg_cmd; }
}
else if (cur_seg_cmd->cmd == LC_SYMTAB) { symtab_cmd = (struct symtab_command*)cur_seg_cmd; }
else if (cur_seg_cmd->cmd == LC_DYSYMTAB) { dysymtab_cmd = (struct dysymtab_command*)cur_seg_cmd; }
}
if (!symtab_cmd || !dysymtab_cmd || !linkedit_segment || !dysymtab_cmd->nindirectsyms) { return; }
uintptr_t linkedit_base = (uintptr_t)slide + linkedit_segment->vmaddr - linkedit_segment->fileoff;
nlist_t *symtab = (nlist_t *)(linkedit_base + symtab_cmd->symoff);
char *strtab = (char *)(linkedit_base + symtab_cmd->stroff);
uint32_t *indirect_symtab = (uint32_t *)(linkedit_base + dysymtab_cmd->indirectsymoff);
cur = (uintptr_t)header + sizeof(mach_header_t);
for (uint i = 0; i < header->ncmds; i++, cur += cur_seg_cmd->cmdsize) {
cur_seg_cmd = (segment_command_t *)cur;
if (cur_seg_cmd->cmd == LC_SEGMENT_ARCH_DEPENDENT) {
if (strcmp(cur_seg_cmd->segname, SEG_DATA) != 0 && strcmp(cur_seg_cmd->segname, SEG_DATA_CONST) != 0) { continue; }
for (uint j = 0; j < cur_seg_cmd->nsects; j++) {
section_t *sect = (section_t *)(cur + sizeof(segment_command_t)) + j;
if ((sect->flags & SECTION_TYPE) == S_LAZY_SYMBOL_POINTERS) { perform_rebinding_with_section(rebindings, sect, slide, symtab, strtab, indirect_symtab); }
if ((sect->flags & SECTION_TYPE) == S_NON_LAZY_SYMBOL_POINTERS) { perform_rebinding_with_section(rebindings, sect, slide, symtab, strtab, indirect_symtab); }
} } } }
|
- 声明变量,cur_seg_cmd、linkedit_segment、symtab_cmd、dysymtab_cmd
- 遍历 Load Command 给以上变量赋值,通过 linkedit_segment 计算偏移量得到 符号表、字符串表、动态字符串表的地址,为什么要通过 linkedit_segment 来计算?因为
LC_SYMTAB
和 LC_DYSYMTAB
的中所记录的 Offset 都是基于 __LINKEDIT 段的
- 然后再次遍历 Load Command,目的是找到 lazy symbol 和 non-lazy symbol section,然后执行
perform_rebinding_with_section
函数,这个函数就是重绑定的核心实现。
重绑定的核心实现
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57
| static void perform_rebinding_with_section(struct rebindings_entry *rebindings, section_t *section, intptr_t slide, nlist_t *symtab, char *strtab, uint32_t *indirect_symtab) {
uint32_t *indirect_symbol_indices = indirect_symtab + section->reserved1;
void **indirect_symbol_bindings = (void **)((uintptr_t)slide + section->addr);
for (uint i = 0; i < section->size / sizeof(void *); i++) {
uint32_t symtab_index = indirect_symbol_indices[i];
if (symtab_index == INDIRECT_SYMBOL_ABS || symtab_index == INDIRECT_SYMBOL_LOCAL || symtab_index == (INDIRECT_SYMBOL_LOCAL | INDIRECT_SYMBOL_ABS)) { continue; }
uint32_t strtab_offset = symtab[symtab_index].n_un.n_strx; char *symbol_name = strtab + strtab_offset;
struct rebindings_entry *cur = rebindings;
while (cur) { for (uint j = 0; j < cur->rebindings_nel; j++) { if (strcmp(&symbol_name[1], cur->rebindings[j].name) == 0) {
if (cur->rebindings[j].replaced != NULL && indirect_symbol_bindings[i] != cur->rebindings[j].replacement) { *(cur->rebindings[j].replaced) = indirect_symbol_bindings[i]; }
indirect_symbol_bindings[i] = cur->rebindings[j].replacement;
} } cur = cur->next; } } }
|
总结
HOOK 的原理就是把 __DATA.__la_symbol_ptr
和 __DATA.__nl_symbol_ptr
两个 section 对应函数的值替换成新函数的地址。fishhook 实现了如何查找 section 对应的符号名,然后匹配替换。fishhook 能 hook 的原因主要还是因为 PIC 的特性,所以 fishhook 不能 hook 内部符号,内部符号都是在 __Text
代码段上,这个段的数据是不可写的。