Last time we introduced the principle of function call stack, from which we can know that every time LR and FP registers are pushed onto the stack, then we can get the stack frame layer by layer recursion to obtain the whole function call stack
_STRUCT_MCONTEXT machineContext, its structure has __ss,__ss can get LR, FP, SP registers
// Initialize first
_STRUCT_MCONTEXT machineContext;
Copy the code
bool fillThradStateContext(thread_t thread, _STRUCT_MCONTEXT *machineContext){
mach_msg_type_number_t state_count = JY_THREAD_STATE_COUNT;
kern_return_t kr = thread_get_state(thread, JY_THREAD_STATE, (thread_state_t)&machineContext->__ss, &state_count);
return (kr == KERN_SUCCESS);
}
Copy the code
Get the corresponding registers LR, PC, FP
// PC register
const uintptr_t pcRegister = machineContext.__ss.JY_INSTRUCTION_ADDRESS;
if (pcRegister == 0) {
return @"Fail to get pc address";
}
// the function returns the address. Used to recursively symbolize the stack
uintptr_t lrRegister;
#if defined(__i386__) || defined(__x86_64__)
lrRegister = 0;
#else
lrRegister = machineContext.__ss.__lr;
#endif
// Get the frame pointer to the start address of the function
const uintptr_t fpRegister = machineContext.__ss.JY_FRAME_POINTER;
Copy the code
And then we recursively get the function stack
// Initialize a buffer of StackMaxDepth
uintptr_t backtraceBuffer[StackMaxDepth];
int i = 0;
// Put the PC register into the register, where is the current address
backtraceBuffer[i++] = pcRegister;
// Start initializing the stack frame
JYStackFrame frame = {(void *)fpRegister, lrRegister};
vm_size_t len = sizeof(frame);
// Start recursion
while (frame.fp && i < StackMaxDepth) {
backtraceBuffer[i++] = frame.lr;
bool flag = readFPMemory(frame.fp, &frame, len);
if(! flag || frame.fp==0 || frame.lr==0) {
break; }}Copy the code
ReadFPMemory: Len (16) bytes of memory at the start of reading fp. sp fp, lr… Fp is 8 bytes, and then the next 8 bytes are LR
bool readFPMemory(const void *fp, const void *dst, const vm_size_t len)
{
vm_size_t bytesCopied = 0;
kern_return_t kr = vm_read_overwrite(mach_task_self(), (vm_address_t)fp, len, (vm_address_t)dst, &bytesCopied);
return KERN_SUCCESS == kr;
}
Copy the code
Now that you’ve recursively gathered the function stack, it’s time to get the instruction set and start restoring symbols
// Collect all LRS and start restoring the symbol table
restoreSymbol(backtraceBuffer,i,thread).copy;
Copy the code
Define structs for recording symbolic information, etc
typedef struct{
uint64_t address; // Base address
uint64_t offset; // Offset address
const char * symbol; / / symbol
const char * machOName; // The corresponding binary Macho name
} JYFuncInfo;
typedef struct{
JYFuncInfo *stacks;
int allocLenght;
int length;
} JYCallStackInfo;
Copy the code
Now to restore, start by doing some initialization
// restore the symbol table
NSString * restoreSymbol(uintptr_t *backtraceBuffer, int length ,thread_t thread){
JYCallStackInfo * csInfo = malloc(sizeof(JYCallStackInfo));
if (csInfo == NULL) {
return @"fail to malloc";
}
csInfo->length = 0;
csInfo->allocLenght = length;
csInfo->stacks = (JYFuncInfo *)malloc(sizeof(JYFuncInfo) * csInfo ->allocLenght);
if (csInfo->stacks == NULL) {
return @"error";
}
callStackOfSymbol(backtraceBuffer, length, csInfo);
NSMutableString *strM = [NSMutableString stringWithFormat:@"\n 🔥🔥🔥JYCallStack of thread: %u 🔥🔥🔥\n", thread];
for (int j = 0; j < csInfo->length; j++) {
[strM appendFormat:@"% @", formatFuncInfo(csInfo->stacks[j])];
}
freeMemory(csInfo);
return strM.copy;
}
Copy the code
Go to the key code callStackOfSymbol(backtraceBuffer, length, csInfo);
void callStackOfSymbol(uintptr_t *backtraceBuffer, int length ,JYCallStackInfo *csInfo){
// The stack data we got before the loop starts to recover each instruction
for (int i = 0; i<length; i++) {
// Get the current LR address
JYMachHeader * machHeader = getLrInMach(backtraceBuffer[i]);
if (machHeader) {
// Find the LR symbol in imagefindSymbolInMach(backtraceBuffer[i],machHeader,csInfo); }}}Copy the code
First of all, our backtraceBuffer[I] is the value of LR register instruction except for the first PC instruction
So the first thing we need to know is, at runtime, there are multiple images, and we need to get all of them, because images have ASLR, name, etc
void getMachHeader(void){
// Create space
machHeaderArr = (JYMachHeaderArr *)malloc(sizeof(JYMachHeaderArr));
// _dyLD_IMAGe_count Gets the number of all images
machHeaderArr->allocLength = _dyld_image_count();
// Get the base address of the first image
// intptr_t base_addr = _dyld_get_image_vmaddr_slide(0);
/ / of the image
machHeaderArr->array = (JYMachHeader *)malloc(sizeof(JYMachHeader) * machHeaderArr->allocLength);
for (uint32_t i = 0; i < machHeaderArr->allocLength; i++) {
JYMachHeader *machHeader = &machHeaderArr->array[i];
// Get the header of the image
machHeader->header = _dyld_get_image_header(i);
// Get the name of the image
machHeader->name = _dyld_get_image_name(i);
// Get the Slide value of a single image load in the process
// Slide represents the base address loaded in memory by defaultmachHeader->slide = _dyld_get_image_vmaddr_slide(i); }}Copy the code
So we’ve got all the images and we’ve saved them in the machHeaderArr, so now we can start to find out which image the instruction is in, okay
// Find the header in machO
JYMachHeader *getLrInMach(uintptr_t lr)
{
if(! machHeaderArr) {// Get all the image files and add them to machHeaderArrm
getMachHeader();
}
// Start looping through all images to determine which image the current instruction is in
for (uint32_t i = 0; i < machHeaderArr->allocLength; i++) {
// Get the header for each image
JYMachHeader *machHeader = &machHeaderArr->array[i];
// In which image is the instruction to start searching the LR register
if (backtraceBufferItemInMach(lr-machHeader->slide, machHeader->header)) {
// Find which image is in and return the corresponding machHeader
returnmachHeader; }}return NULL;
}
Copy the code
Through the header to backtraceBufferItemInMach this function, we first get the address of the current Load Commands, Load the structure of the Commands below, began to traverse the Load Commands,
bool backtraceBufferItemInMach(uintptr_t slideLR, const struct mach_header *header)
{
// Offset mach_header by 1 to Load Commands
// cur = location of Load Commands
uintptr_t cur = (uintptr_t)(((struct mach_header_64*)header) + 1);
// Iterate over loadCommands to check whether lr falls in a segment of the current image.
// Start looping NCMDS: number of loadCommands.
for (uint32_t i = 0; i < header->ncmds; i++) {
// Assign the starting position of Load Commands to command
struct load_command *command = (struct load_command *)cur;
// To determine whether the command type is LC_SEGMENT_64, use the segment_command_64 structure
if (command->cmd == LC_SEGMENT_64) {
// Change the command to the segment_command_64 structure
struct segment_command_64 *segmentCommand = (struct segment_command_64 *)command;
// The starting position of command
uintptr_t start = segmentCommand->vmaddr;
// Start command + command size to get the start and end positions
uintptr_t end = segmentCommand->vmaddr + segmentCommand->vmsize;
// Then start to check whether the data in our array exists in the interval
if (slideLR >= start && slideLR <= end) {
// If the address of LR falls in this module, the image index is returned
return true; }}#warning TODO
// If the command type is LC_SEGMENT, the segment_command structure is required
// Command addresses are sequential, moving to the position of the next command
cur = cur + command->cmdsize;
}
return false;
}
Copy the code
We find out which image this instruction is in, go back to callStackOfSymbol, we should now go to the current image and find our symbol findSymbolInMach(backtraceBuffer[I],machHeader,csInfo); This is also the most critical step
First of all, we need to understand the structure of MachO, the relationship between Symbol Table and String Table, and the role of LC_SYMTAB section and __LINKEDIT section. There are many explanations of the principle on the Internet, you can understand it by yourself
The __LINKEDIT section contains raw data used by the dynamic linker, such as symbols, strings, and relocation entries.
LC_SYMTAB describes the position of the string table and symbol table in __LINKEDIT
First we get Load Commands via the image header, and then we loop to find LC_SYMTAB and __LINKEDIT segments
The address of LR in our backtraceBuffer
seg_linkedit->vmaddr
=LINKEDIT Virtual address
seg_linkedit->fileoff
=The file address of LINKEDIT
(uintptr_t)machHeader->slide
=ASLR
Offset address of LR
=Lr Real address
–ASLR
Get the __LINKEDIT base address
Segment loaded into memory base address = ASLR + LINKEDIT virtual address - LINKEDIT file address
Symbol table real address = symbol table virtual address + symoff offset address Because our lr real address Is just an instruction address, it should be greater than or equal to the function of the entrance to the address, that is, the value of the corresponding symbol, we should iterate through all the symbol table entry Find the closest to the lr function entry address Is the most accurate, Iterate through all Symbol tabels to get all Symbol. N_values and compare them with lr offset addresses to get a minimum value
Symtab [best].n_un. N_strx stringTable + symtab[best].n_un
Get the sign name
void findSymbolInMach(uintptr_t lr, JYMachHeader * machHeader, JYCallStackInfo * csInfo){
if(! machHeader) {return;
}
The __LINKEDIT section contains raw data used by the dynamic linker, such as symbols, strings, and relocation entries.
struct segment_command_64 * seg_linkedit = NULL;
LC_SYMTAB Command LC_SYMTAB Command LC_SYMTAB
struct symtab_command * sym_command = NULL;
/ / machO header
const struct mach_header * header = machHeader->header;
// Offset mach_header by 1 to Load Commands
// cur = location of Load Commands
uintptr_t cur = (uintptr_t)(((struct mach_header_64*)header) + 1);
// Iterate over Load Commands to find LC_SYMTAB segment
for (uint32_t i = 0; i<header->ncmds; i++) {
// Assign the starting position of Load Commands to command
struct load_command * command = (struct load_command*)cur;
if (command->cmd == LC_SEGMENT_64) {
struct segment_command_64 * segmentCommand = (struct segment_command_64 *)command;
// We need to find the __LINKEDIT segment aka SEG_LINKEDIT
if (strcmp(segmentCommand->segname, SEG_LINKEDIT) == 0) { seg_linkedit = segmentCommand; }}else if (command->cmd == LC_SYMTAB){
/* LC_SYMTAB describes the position of the string and symbol tables in __LINKEDIT. The symbol table describes the address information of the symbol and the position of the corresponding string (function name) in the string */
sym_command = (struct symtab_command*)command;
}
// Command addresses are sequential, moving to the position of the next command
cur = cur + command->cmdsize;
}
// Non-null judgment
if(! seg_linkedit || ! sym_command) {return;
}
// segment loading base address = ASLR + LINKEDIT virtual address - LINKEDIT file address
uintptr_t linkedit_base = (uintptr_t)machHeader->slide + seg_linkedit->vmaddr - seg_linkedit->fileoff;
// Real address of symbol table = virtual address of symbol table + Symoff offset address
struct nlist_64 *symbolTable = (struct nlist_64 *)(linkedit_base + sym_command->symoff);
// The position of the string table
const uintptr_t stringTable = linkedit_base + sym_command->stroff;
uintptr_t slideLR = lr - machHeader->slide;
uint64_t offset = UINT64_MAX;
int best = - 1;
SymtabCmd ->nsyms indicates the symbol table entry
for (uint32_t i = 0; i < sym_command->nsyms; i++) {
// Find the nearest lr offset address - symbol address = get the distance between the two
uint64_t distance = slideLR - symbolTable[i].n_value;
if(slideLR >= symbolTable[i].n_value && distance <= offset) { offset = distance; best = i; }}if (best >= 0) {
JYFuncInfo *funcInfo = &csInfo->stacks[csInfo->length++];
funcInfo->machOName = machHeader->name;
funcInfo->address = symbolTable[best].n_value;
funcInfo->offset = offset;
// Find the corresponding symbol name in the string table.
Symtab [best].n_un. N_strx Obtain the offset address of the symbol name in the character table
funcInfo->symbol = (char *)(stringTable + symbolTable[best].n_un.n_strx);
// Remove the underline
if (*funcInfo->symbol == '_')
{
// char stores numbers from 0 to 255, and then displays characters (according to the Ascii table).
// ++ -- is a number, so this is just the underscore
funcInfo->symbol++;
}
if (funcInfo->machOName == NULL) {
funcInfo->machOName = ""; }}}Copy the code
Finally, there are some finishing touches, finding all the collected instructions to the function entry and successfully recovering symbols, which is the whole principle of BSBacktraceLogger written by The big guy