Last time we introduced the principle of function call stack, from which we can know that every time LR and FP registers are pushed onto the stack, then we can get the stack frame layer by layer recursion to obtain the whole function call stack

_STRUCT_MCONTEXT machineContext, its structure has __ss,__ss can get LR, FP, SP registers

// Initialize first
 _STRUCT_MCONTEXT machineContext;
Copy the code
bool fillThradStateContext(thread_t thread, _STRUCT_MCONTEXT *machineContext){
    mach_msg_type_number_t state_count = JY_THREAD_STATE_COUNT;
    kern_return_t kr = thread_get_state(thread, JY_THREAD_STATE, (thread_state_t)&machineContext->__ss, &state_count);
    return (kr == KERN_SUCCESS);
}
Copy the code

Get the corresponding registers LR, PC, FP

 // PC register
    const uintptr_t pcRegister = machineContext.__ss.JY_INSTRUCTION_ADDRESS;
    if (pcRegister == 0) {
        return @"Fail to get pc address";
    }
    
    // the function returns the address. Used to recursively symbolize the stack
    uintptr_t lrRegister;
#if defined(__i386__) || defined(__x86_64__)
    lrRegister =  0;
#else
    lrRegister =  machineContext.__ss.__lr;
#endif
     
    // Get the frame pointer to the start address of the function
    const uintptr_t fpRegister = machineContext.__ss.JY_FRAME_POINTER;
    
Copy the code

And then we recursively get the function stack

// Initialize a buffer of StackMaxDepth
    uintptr_t backtraceBuffer[StackMaxDepth];
    int i = 0;
    // Put the PC register into the register, where is the current address
    backtraceBuffer[i++] = pcRegister;
    
    // Start initializing the stack frame
    JYStackFrame frame = {(void *)fpRegister, lrRegister};
    vm_size_t len = sizeof(frame);
​
    // Start recursion
    while (frame.fp && i < StackMaxDepth) {
        backtraceBuffer[i++] = frame.lr;
        bool flag = readFPMemory(frame.fp, &frame, len);
        if(! flag || frame.fp==0 || frame.lr==0) {
            break; }}Copy the code

ReadFPMemory: Len (16) bytes of memory at the start of reading fp. sp fp, lr… Fp is 8 bytes, and then the next 8 bytes are LR

bool readFPMemory(const void *fp, const void *dst, const vm_size_t len)
{
    vm_size_t bytesCopied = 0;
    kern_return_t kr = vm_read_overwrite(mach_task_self(), (vm_address_t)fp, len, (vm_address_t)dst, &bytesCopied);
    return KERN_SUCCESS == kr;
}
Copy the code

Now that you’ve recursively gathered the function stack, it’s time to get the instruction set and start restoring symbols

// Collect all LRS and start restoring the symbol table
restoreSymbol(backtraceBuffer,i,thread).copy;
Copy the code

Define structs for recording symbolic information, etc

typedef struct{
    uint64_t address; // Base address
    uint64_t offset;  // Offset address
    const char * symbol; / / symbol
    const char * machOName; // The corresponding binary Macho name
} JYFuncInfo;
​
​
typedef struct{
    JYFuncInfo *stacks;
    int allocLenght;
    int length;
} JYCallStackInfo;
Copy the code

Now to restore, start by doing some initialization

// restore the symbol table
NSString * restoreSymbol(uintptr_t *backtraceBuffer, int length ,thread_t thread){
    
    JYCallStackInfo * csInfo = malloc(sizeof(JYCallStackInfo));
    if (csInfo == NULL) {
        return @"fail to malloc";
    }
    csInfo->length = 0;
    csInfo->allocLenght = length;
    csInfo->stacks =  (JYFuncInfo *)malloc(sizeof(JYFuncInfo) * csInfo ->allocLenght);
    if (csInfo->stacks == NULL) {
        return @"error";
    }
    callStackOfSymbol(backtraceBuffer, length, csInfo);
    NSMutableString *strM = [NSMutableString stringWithFormat:@"\n 🔥🔥🔥JYCallStack of thread: %u 🔥🔥🔥\n", thread];
    for (int j = 0; j < csInfo->length; j++) {
        [strM appendFormat:@"% @", formatFuncInfo(csInfo->stacks[j])];
    }
    freeMemory(csInfo);
    return strM.copy;
}
Copy the code

Go to the key code callStackOfSymbol(backtraceBuffer, length, csInfo);

void callStackOfSymbol(uintptr_t *backtraceBuffer, int length ,JYCallStackInfo *csInfo){
    // The stack data we got before the loop starts to recover each instruction
    for (int i = 0; i<length; i++) {
        // Get the current LR address
        JYMachHeader * machHeader = getLrInMach(backtraceBuffer[i]);
        if (machHeader) {
            // Find the LR symbol in imagefindSymbolInMach(backtraceBuffer[i],machHeader,csInfo); }}}Copy the code

First of all, our backtraceBuffer[I] is the value of LR register instruction except for the first PC instruction

So the first thing we need to know is, at runtime, there are multiple images, and we need to get all of them, because images have ASLR, name, etc

void getMachHeader(void){
    // Create space
    machHeaderArr = (JYMachHeaderArr *)malloc(sizeof(JYMachHeaderArr));
  
    // _dyLD_IMAGe_count Gets the number of all images
    machHeaderArr->allocLength = _dyld_image_count();
​
    // Get the base address of the first image
// intptr_t base_addr = _dyld_get_image_vmaddr_slide(0);
​
    
    / / of the image
    machHeaderArr->array = (JYMachHeader *)malloc(sizeof(JYMachHeader) * machHeaderArr->allocLength);
    for (uint32_t i = 0; i < machHeaderArr->allocLength; i++) {
        JYMachHeader *machHeader = &machHeaderArr->array[i];
        
        // Get the header of the image
        machHeader->header = _dyld_get_image_header(i);
        
        // Get the name of the image
        machHeader->name = _dyld_get_image_name(i);
        
        // Get the Slide value of a single image load in the process
        // Slide represents the base address loaded in memory by defaultmachHeader->slide = _dyld_get_image_vmaddr_slide(i); }}Copy the code

So we’ve got all the images and we’ve saved them in the machHeaderArr, so now we can start to find out which image the instruction is in, okay

// Find the header in machO
JYMachHeader *getLrInMach(uintptr_t lr)
{
    if(! machHeaderArr) {// Get all the image files and add them to machHeaderArrm
        getMachHeader();
    }
    
    // Start looping through all images to determine which image the current instruction is in
    for (uint32_t i = 0; i < machHeaderArr->allocLength; i++) {
        // Get the header for each image
        JYMachHeader *machHeader = &machHeaderArr->array[i];
        
        // In which image is the instruction to start searching the LR register
        if (backtraceBufferItemInMach(lr-machHeader->slide, machHeader->header)) {
            // Find which image is in and return the corresponding machHeader
            returnmachHeader; }}return NULL;
}
Copy the code

Through the header to backtraceBufferItemInMach this function, we first get the address of the current Load Commands, Load the structure of the Commands below, began to traverse the Load Commands,

bool backtraceBufferItemInMach(uintptr_t slideLR, const struct mach_header *header)
{
    // Offset mach_header by 1 to Load Commands
    // cur = location of Load Commands
    uintptr_t cur = (uintptr_t)(((struct mach_header_64*)header) + 1);
    
    // Iterate over loadCommands to check whether lr falls in a segment of the current image.
   
    // Start looping NCMDS: number of loadCommands.
    for (uint32_t i = 0; i < header->ncmds; i++) {
       
        // Assign the starting position of Load Commands to command
        struct load_command *command = (struct load_command *)cur;
       
         // To determine whether the command type is LC_SEGMENT_64, use the segment_command_64 structure
        if (command->cmd == LC_SEGMENT_64) {
            // Change the command to the segment_command_64 structure
            struct segment_command_64 *segmentCommand = (struct segment_command_64 *)command;
           
            // The starting position of command
            uintptr_t start = segmentCommand->vmaddr;
            
            // Start command + command size to get the start and end positions
            uintptr_t end = segmentCommand->vmaddr + segmentCommand->vmsize;
            
            // Then start to check whether the data in our array exists in the interval
            if (slideLR >= start && slideLR <= end) {
                // If the address of LR falls in this module, the image index is returned
                return true; }}#warning TODO
        // If the command type is LC_SEGMENT, the segment_command structure is required
       
        
        // Command addresses are sequential, moving to the position of the next command
        cur = cur + command->cmdsize;
    }
    return false;
}
Copy the code

We find out which image this instruction is in, go back to callStackOfSymbol, we should now go to the current image and find our symbol findSymbolInMach(backtraceBuffer[I],machHeader,csInfo); This is also the most critical step

First of all, we need to understand the structure of MachO, the relationship between Symbol Table and String Table, and the role of LC_SYMTAB section and __LINKEDIT section. There are many explanations of the principle on the Internet, you can understand it by yourself

The __LINKEDIT section contains raw data used by the dynamic linker, such as symbols, strings, and relocation entries.

LC_SYMTAB describes the position of the string table and symbol table in __LINKEDIT

First we get Load Commands via the image header, and then we loop to find LC_SYMTAB and __LINKEDIT segments

The address of LR in our backtraceBuffer

  • seg_linkedit->vmaddr = LINKEDIT Virtual address
  • seg_linkedit->fileoff= The file address of LINKEDIT
  • (uintptr_t)machHeader->slide = ASLR
  • Offset address of LR =Lr Real addressASLR

Get the __LINKEDIT base address

Segment loaded into memory base address = ASLR + LINKEDIT virtual address - LINKEDIT file address

Symbol table real address = symbol table virtual address + symoff offset address Because our lr real address Is just an instruction address, it should be greater than or equal to the function of the entrance to the address, that is, the value of the corresponding symbol, we should iterate through all the symbol table entry Find the closest to the lr function entry address Is the most accurate, Iterate through all Symbol tabels to get all Symbol. N_values and compare them with lr offset addresses to get a minimum value

Symtab [best].n_un. N_strx stringTable + symtab[best].n_un

Get the sign name

void findSymbolInMach(uintptr_t lr, JYMachHeader * machHeader, JYCallStackInfo * csInfo){
    
    if(! machHeader) {return;
    }
    
    The __LINKEDIT section contains raw data used by the dynamic linker, such as symbols, strings, and relocation entries.
    struct segment_command_64 * seg_linkedit = NULL;
    
    LC_SYMTAB Command LC_SYMTAB Command LC_SYMTAB
    struct symtab_command * sym_command = NULL;
    
    / / machO header
    const struct mach_header * header = machHeader->header;
    
    // Offset mach_header by 1 to Load Commands
    // cur = location of Load Commands
    uintptr_t cur = (uintptr_t)(((struct mach_header_64*)header) + 1);
    
    // Iterate over Load Commands to find LC_SYMTAB segment
    for (uint32_t i = 0; i<header->ncmds; i++) {
        
        // Assign the starting position of Load Commands to command
        struct load_command * command = (struct load_command*)cur;
      
        if (command->cmd == LC_SEGMENT_64) {
            struct segment_command_64 * segmentCommand = (struct segment_command_64 *)command;
        
            // We need to find the __LINKEDIT segment aka SEG_LINKEDIT
            if (strcmp(segmentCommand->segname, SEG_LINKEDIT) == 0) { seg_linkedit = segmentCommand; }}else if (command->cmd == LC_SYMTAB){
            /* LC_SYMTAB describes the position of the string and symbol tables in __LINKEDIT. The symbol table describes the address information of the symbol and the position of the corresponding string (function name) in the string */
        
            sym_command = (struct symtab_command*)command;
        }
        
        // Command addresses are sequential, moving to the position of the next command
        cur = cur + command->cmdsize;
    }
    
    // Non-null judgment
    if(! seg_linkedit || ! sym_command) {return;
    }
    
 
    // segment loading base address = ASLR + LINKEDIT virtual address - LINKEDIT file address
    uintptr_t linkedit_base = (uintptr_t)machHeader->slide + seg_linkedit->vmaddr - seg_linkedit->fileoff;
    
    // Real address of symbol table = virtual address of symbol table + Symoff offset address
    struct nlist_64 *symbolTable = (struct nlist_64 *)(linkedit_base + sym_command->symoff);
    
    // The position of the string table
    const uintptr_t stringTable = linkedit_base + sym_command->stroff;
    
    uintptr_t slideLR = lr - machHeader->slide;
     
    uint64_t offset = UINT64_MAX;
    
    int best = - 1;
    
    SymtabCmd ->nsyms indicates the symbol table entry
    for (uint32_t i = 0; i < sym_command->nsyms; i++) {
        
        // Find the nearest lr offset address - symbol address = get the distance between the two
        uint64_t distance = slideLR - symbolTable[i].n_value;
        if(slideLR >= symbolTable[i].n_value && distance <= offset) { offset = distance; best = i; }}if (best >= 0) {
        JYFuncInfo *funcInfo = &csInfo->stacks[csInfo->length++];
        funcInfo->machOName = machHeader->name;
        funcInfo->address = symbolTable[best].n_value;
        funcInfo->offset = offset;
        
        // Find the corresponding symbol name in the string table.
        Symtab [best].n_un. N_strx Obtain the offset address of the symbol name in the character table
        funcInfo->symbol = (char *)(stringTable + symbolTable[best].n_un.n_strx);
        
        // Remove the underline
        if (*funcInfo->symbol == '_')
        {
          // char stores numbers from 0 to 255, and then displays characters (according to the Ascii table).
          // ++ -- is a number, so this is just the underscore
            funcInfo->symbol++;
        }
        if (funcInfo->machOName == NULL) {
            funcInfo->machOName = ""; }}}Copy the code

Finally, there are some finishing touches, finding all the collected instructions to the function entry and successfully recovering symbols, which is the whole principle of BSBacktraceLogger written by The big guy