iOS RE 4 beginners 3 - fishhook

关于

Fishhook是Facebook提供的利用MachO文件惰性加载原理,通过修改懒加载和非懒加载两个表的指针达到C函数HOOK的目的一个轻量级的hook库。理解这个工具和熟悉流程也是可以帮助更好的理解MachO文件格式 :)

原理图如下:

fishhook

源码阅读

核心其实就是rebind_symbols 这个接口,另一个 rebind_symbols_image 是指定macho中的symbol进行rebind,所以从 rebind_symbols函数看起就行了。

1
2
FISHHOOK_VISIBILITY
int rebind_symbols(struct rebinding rebindings[], size_t rebindings_nel);

简单看下关键的调用路径:

1
2
3
4
rebind_symbols(struct rebinding rebindings[], size_t rebindings_nel);
_rebind_symbols_for_image(_dyld_get_image_header(i), _dyld_get_image_vmaddr_slide(i));
rebind_symbols_for_image(_rebindings_head, header, slide);
perform_rebinding_with_section(...)

_rebindings_head 指向一个需要重绑定的符号的单项链表:

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
struct rebinding {
const char *name;
void *replacement;
void **replaced;
};

struct rebindings_entry {
struct rebinding *rebindings;
size_t rebindings_nel;
struct rebindings_entry *next;
};

static struct rebindings_entry *_rebindings_head;
segment_command_t *cur_seg_cmd;
segment_command_t *linkedit_segment = NULL;
struct symtab_command* symtab_cmd = NULL;
struct dysymtab_command* dysymtab_cmd = NULL;

uintptr_t cur = (uintptr_t)header + sizeof(mach_header_t); // now, cur points to LOAD_CMDs
// iter LOAD CMDs
for (uint i = 0; i < header->ncmds; i++, cur += cur_seg_cmd->cmdsize) {
cur_seg_cmd = (segment_command_t *)cur;
// find LINK_EDIT seg
if (cur_seg_cmd->cmd == LC_SEGMENT_ARCH_DEPENDENT) {
if (strcmp(cur_seg_cmd->segname, SEG_LINKEDIT) == 0) {
linkedit_segment = cur_seg_cmd;
}
} else if (cur_seg_cmd->cmd == LC_SYMTAB) {
// find SYMTAB CMD
symtab_cmd = (struct symtab_command*)cur_seg_cmd;
} else if (cur_seg_cmd->cmd == LC_DYSYMTAB) {
// find DYSYM CMD
dysymtab_cmd = (struct dysymtab_command*)cur_seg_cmd;
}
}

if (!symtab_cmd || !dysymtab_cmd || !linkedit_segment ||
!dysymtab_cmd->nindirectsyms) {
return;
}

...

// Get indirect symbol table (array of uint32_t indices into symbol table)
uint32_t *indirect_symtab = (uint32_t *)(linkedit_base + dysymtab_cmd->indirectsymoff);

cur = (uintptr_t)header + sizeof(mach_header_t);
for (uint i = 0; i < header->ncmds; i++, cur += cur_seg_cmd->cmdsize) {
cur_seg_cmd = (segment_command_t *)cur;
if (cur_seg_cmd->cmd == LC_SEGMENT_ARCH_DEPENDENT) {
if (strcmp(cur_seg_cmd->segname, SEG_DATA) != 0 &&
strcmp(cur_seg_cmd->segname, SEG_DATA_CONST) != 0) {
continue;
}
for (uint j = 0; j < cur_seg_cmd->nsects; j++) {
section_t *sect =
(section_t *)(cur + sizeof(segment_command_t)) + j;
if ((sect->flags & SECTION_TYPE) == S_LAZY_SYMBOL_POINTERS) {
perform_rebinding_with_section(rebindings, sect, slide, symtab, strtab, indirect_symtab);
}
if ((sect->flags & SECTION_TYPE) == S_NON_LAZY_SYMBOL_POINTERS) {
perform_rebinding_with_section(rebindings, sect, slide, symtab, strtab, indirect_symtab);
}
}
}
}
static void perform_rebinding_with_section(struct rebindings_entry *rebindings,
section_t *section,
intptr_t slide,
nlist_t *symtab,
char *strtab,
uint32_t *indirect_symtab) {
// if _DATA,CONST
const bool isDataConst = strcmp(section->segname, SEG_DATA_CONST) == 0;

//__la_symbol_ptr的reserved1字段标识了section描述的符号在符号表中开始的index
//动态符号表中第一个需要解析的符号 开始地址
uint32_t *indirect_symbol_indices = indirect_symtab + section->reserved1;
// section __la_symbol_ptr
void **indirect_symbol_bindings = (void **)((uintptr_t)slide + section->addr);
vm_prot_t oldProtection = VM_PROT_READ;
// chang memory protection to write && back old memery protection
if (isDataConst) {
oldProtection = get_protection(rebindings);
mprotect(indirect_symbol_bindings, section->size, PROT_READ | PROT_WRITE);
}

// Traverse section -> symtab
for (uint i = 0; i < section->size / sizeof(void *); i++) {
uint32_t symtab_index = indirect_symbol_indices[i];
if (symtab_index == INDIRECT_SYMBOL_ABS || symtab_index == INDIRECT_SYMBOL_LOCAL ||
symtab_index == (INDIRECT_SYMBOL_LOCAL | INDIRECT_SYMBOL_ABS)) {
continue;
}

// nlist_t
uint32_t strtab_offset = symtab[symtab_index].n_un.n_strx;
char *symbol_name = strtab + strtab_offset;
bool symbol_name_longer_than_1 = symbol_name[0] && symbol_name[1];
struct rebindings_entry *cur = rebindings;
while (cur) {
for (uint j = 0; j < cur->rebindings_nel; j++) {
// yes, it's target symbol to rebind!
if (symbol_name_longer_than_1 &&
strcmp(&symbol_name[1], cur->rebindings[j].name) == 0) {
if (cur->rebindings[j].replaced != NULL &&
indirect_symbol_bindings[i] != cur->rebindings[j].replacement) {
*(cur->rebindings[j].replaced) = indirect_symbol_bindings[i]; // backup old func
}
// do rebind, hook!
**indirect_symbol_bindings[i] = cur->rebindings[j].replacement;**
goto symbol_loop;
}
}
cur = cur->next;
}
symbol_loop:;
}
// restore protection
if (isDataConst) {
int protection = 0;
if (oldProtection & VM_PROT_READ) {
protection |= PROT_READ;
}
if (oldProtection & VM_PROT_WRITE) {
protection |= PROT_WRITE;
}
if (oldProtection & VM_PROT_EXECUTE) {
protection |= PROT_EXEC;
}
mprotect(indirect_symbol_bindings, section->size, protection);
}
}

调试

直接拿官方的demo编译出来调试分析流程:

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
~/study/ios_re_link/fishhook  cat main.c
#include <fcntl.h>
#include <unistd.h>
#include <stdio.h>
#include <stdarg.h>
#include "fishhook.h"

static int (*orig_close)(int);
static int (*orig_open)(const char *, int, ...);

int my_close(int fd) {
printf("Calling real close(%d)\\n", fd);
return orig_close(fd);
}

int my_open(const char *path, int oflag, ...) {
va_list ap = {0};
mode_t mode = 0;

if ((oflag & O_CREAT) != 0) {
// mode only applies to O_CREAT
va_start(ap, oflag);
mode = va_arg(ap, int);
va_end(ap);
printf("Calling real open('%s', %d, %d)\\n", path, oflag, mode);
return orig_open(path, oflag, mode);
} else {
printf("Calling real open('%s', %d)\\n", path, oflag);
return orig_open(path, oflag, mode);
}
}

int main(int argc, char * argv[])
{
getchar();
rebind_symbols((struct rebinding[2]){{"close", my_close, (void *)&orig_close}, {"open", my_open, (void *)&orig_open}}, 2);

// Open our own binary and print out first 4 bytes (which is the same
// for all Mach-O binaries on a given architecture)
int fd = open(argv[0], O_RDONLY);
uint32_t magic_number = 0;
read(fd, &magic_number, 4);
printf("Mach-O Magic Number: %x \\n", magic_number);
close(fd);

return 0;

}%
~/study/ios_re_link/fishhook  cat Makefile
all:
xcrun -sdk iphoneos clang main.c fishhook.c -o main -target arm64-apple-ios12.2
codesign -s "A64593A4DDFA3557CCEFF47FC8E688DCD3E6E455" --entitlements entitlements.xml -f main

push:
scp main root@10.2.5.0:/tmp

clean:
rm main
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
* thread #1, queue = 'com.apple.main-thread', stop reason = step over
frame #0: 0x0000000100a3f6ac main`rebind_symbols_for_image(rebindings=0x00000001012005b0, header=0x0000000100a38000, slide=10715136) at fishhook.c:187:8
184 }
185 }
186
-> 187 if (!symtab_cmd || !dysymtab_cmd || !linkedit_segment ||
188 !dysymtab_cmd->nindirectsyms) {
189 return;
190 }
Target 0: (main) stopped.
(lldb) po symtab_cmd
0x0000000100a38440

(lldb) po dysymtab_cmd
0x0000000100a38458

(lldb) po linkedit_segment
0x0000000100a383c8

(lldb)

然后找到 LC_SEGMENT_64_DATA 处理 S_LAZY_SYMBOL_POINTERSS_NON_LAZY_SYMBOL_POINTERS

1
2
3
4
5
6
7
8
9
10
11
(lldb) n
Process 2046 stopped
* thread #1, queue = 'com.apple.main-thread', stop reason = step over
frame #0: 0x0000000100a3f828 main`rebind_symbols_for_image(rebindings=0x00000001012005b0, header=0x0000000100a38000, slide=10715136) at fishhook.c:215:42
212 perform_rebinding_with_section(rebindings, sect, slide, symtab, strtab, indirect_symtab);
213 }
214 if ((sect->flags & SECTION_TYPE) == S_NON_LAZY_SYMBOL_POINTERS) {
-> 215 perform_rebinding_with_section(rebindings, sect, slide, symtab, strtab, indirect_symtab);
216 }
217 }
218 }

seg

这里为了调试,重点关注 S_LAZY_SYMBOL_POINTERS 的处理

首先在rebind之前查看open符号

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
(lldb) image lookup -va 0x0000000100a3fee0
Address: main[0x0000000100007ee0] (main.__TEXT.__stub_helper + 180)
Summary:
Module: file = "/private/var/tmp/main", arch = "arm64"

//....
Process 2046 resuming
Process 2046 stopped
* thread #1, queue = 'com.apple.main-thread', stop reason = breakpoint 4.1
frame #0: 0x0000000100a3fbc8 main`perform_rebinding_with_section(rebindings=0x00000001012005b0, section=0x0000000100a382d8, slide=10715136, symtab=0x0000000100a44210, strtab=" ", indirect_symtab=0x0000000100a44780) at fishhook.c:135:46
132 strcmp(&symbol_name[1], cur->rebindings[j].name) == 0) {
133 if (cur->rebindings[j].replaced != NULL &&
134 indirect_symbol_bindings[i] != cur->rebindings[j].replacement) {
-> 135 *(cur->rebindings[j].replaced) = indirect_symbol_bindings[i];
136 }
137 indirect_symbol_bindings[i] = cur->rebindings[j].replacement;
138 goto symbol_loop;
Target 0: (main) stopped.
(lldb) p symbol_name
(char *) $20 = 0x0000000100a44937 "_open"
(lldb)

首先备份了原函数地址,确保hook后可以通过 orign_open调用到原本的函数。

1
2
3
4
5
6
7
8
9
10
11
(lldb) p i
(uint) $24 = 13
(lldb) po indirect_symbol_bindings[13]
0x0000000100a3fee0

(lldb) image lookup -va 0x0000000100a3fee0
Address: main[0x0000000100007ee0] (main.__TEXT.__stub_helper + 180)
Summary:
Module: file = "/private/var/tmp/main", arch = "arm64"

(lldb)

之后找到函数指针,完成替换

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
Process 2046 stopped
* thread #1, queue = 'com.apple.main-thread', stop reason = step over
frame #0: 0x0000000100a3fbfc main`perform_rebinding_with_section(rebindings=0x00000001012005b0, section=0x0000000100a382d8, slide=10715136, symtab=0x0000000100a44210, strtab=" ", indirect_symtab=0x0000000100a44780) at fishhook.c:137:41
134 indirect_symbol_bindings[i] != cur->rebindings[j].replacement) {
135 *(cur->rebindings[j].replaced) = indirect_symbol_bindings[i];
136 }
-> 137 indirect_symbol_bindings[i] = cur->rebindings[j].replacement;
138 goto symbol_loop;
139 }
140 }
Target 0: (main) stopped.
(lldb) image list |grep main
[ 0] EAE1AE51-465A-32E0-8B3F-195FE2480F4F 0x0000000100a38000 /private/var/tmp/main
/System/Volumes/Data/Users/muhe/study/ios_re_link/fishhook/main.dSYM/Contents/Resources/DWARF/main(0x0000000100a38000)
(lldb)
(lldb) x/20gx indirect_symbol_bindings
0x100a40018: 0x00000001d8642a68 0x0000000100a3fe50
0x100a40028: 0x0000000100a3fe5c 0x0000000100a3fe68
0x100a40038: 0x0000000100a3fe74 0x00000001d8581374
0x100a40048: 0x0000000100a3f224 0x00000001d8581694
0x100a40058: 0x0000000100a3fea4 0x00000001d860ae30
0x100a40068: 0x00000001d871060c 0x00000001d873dd30
0x100a40078: 0x0000000100a3fed4 0x0000000100a3f270
0x100a40088: 0x0000000100a3feec 0x0000000100a3fef8
0x100a40098: 0x00000001d873dfd0 0x0000000100a3ff10
0x100a400a8: 0x0000000100d04498 0x0000000100a3ff72
(lldb) x/gx 0x100a40078+8
0x100a40080: 0x0000000100a3f270
(lldb)

---
>>> hex(0x100a40080-0x0000000100a38000)
'0x8080'
>>>

func_ptr

如果调用 原本的函数会走什么流程?

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
0x100a3f448 <+176>: bl     0x100a3fd9c               ; symbol stub for: close
-> 0x100a3f44c <+180>: adrp x8, 1
0x100a3f450 <+184>: ldr x8, [x8]
0x100a3f454 <+188>: ldr x8, [x8]
0x100a3f458 <+192>: ldur x10, [x29, #-0x8]
0x100a3f45c <+196>: subs x8, x8, x10
0x100a3f460 <+200>: b.ne 0x100a3f478 ; <+224> at main.c
0x100a3f464 <+204>: mov w8, #0x0
0x100a3f468 <+208>: mov x0, x8
0x100a3f46c <+212>: ldp x29, x30, [sp, #0x70]
0x100a3f470 <+216>: add sp, sp, #0x80 ; =0x80
0x100a3f474 <+220>: ret
0x100a3f478 <+224>: bl 0x100a3fd60 ; symbol stub for: __stack_chk_fail
(lldb) dis -a 0x100a3fd9c
main`close:
0x100a3fd9c <+0>: nop
0x100a3fda0 <+4>: ldr x16, #0x2a8 ; (void *)0x0000000100a3f224: my_close at /Users/muhe/study/ios_re_link/fishhook/main.c:10
0x100a3fda4 <+8>: br x16

引用

https://github.com/facebook/fishhook