preface
Main thread time is an important indicator of App performance. The main thread is blocked and immediately causes a lag in user action, which is the most immediate response, so it’s a performance point we have to focus on.
Detection scheme
Instrument – Time Profiler
The Time Profiler template uses the Time Profiler tool to perform low-overhead, time-based sampling on processes running on the system CPU to display the application’s use of multi-core CPU and threads.
Over time, the more efficient it is to use multiple cores and threads, the better your App will perform.
If you are not familiar with it, refer to the official document Track CPU Core and Thread Use
You need to enable the generation of dSYM symbol files before using the Time Profiler, otherwise you will only see system function calls.
Debug mode. By default, dSYM symbol files are not generated.
You need to enable dSYM file generation for Debug in Build Setting > Debug Infomation Format.
Then start Xcode to build the current project.
Open Instrument, select the Time Profiler template, and start recording.
The Time Profiler records the tree of function calls in each thread, making it easier to locate which piece of code is causing the thread to block.
Double-click on this record to see the source code for the code
OK, so that’s it, Time Profiler, it’s almost all UI, it’s easy to use.
So what’s the downside of Time Profiler
- When detecting, Xcode must be supported.
- When the real machine is tested, it must be online.
- Unable to implement custom output content.
Self-made testing tool
Although Time Profiler is easy to use, it also has limitations. At this Time, it must be what everyone wants to build a set of detection tools.
The function of this detection tool can be seen in the Time Profiler
- Relationship tree for function calls
- The statistics are filtered through the configuration
- Capture as many functions as possible
- Format the output statistics
- Statistical data is logged and uploaded to the server
In the beginning, I also started from the Method Swizzle idea to calculate the time of UIViewController and UIView. Unfortunately, the granularity of statistics is too coarse to actually use.
The hook objc_msgSend function would be a better choice. After I consulted the source code of OBJC4, I found that it involves hooks to THE C library, overwrites the implementation of objc_msgSend in assembly language, and local storage of threads.
Fortunately, with the development of iOS, there have been many great gods to provide us with wheels. For example, I found The wheels of Mr. Dai Ming for secondary encapsulation.
My project is not finished yet, but the basic principle of implementation is based on the design of Mr. Dai Ming. You can refer to his project to build your own statistical system.
Reference project address: GCDFetchFeed
The hook of C library uses FaceBook’s Fishhook, which I won’t introduce much, because everyone should be familiar with it.
This encapsulation involves files:
SMCallTrace.h SMCallTrace.m SMCallTraceCore.c SMCallTraceCore.h
I’ve commented on the main implementation of the Hook objc_msgSend method to help you understand how it works.
//
// GHObjcMsgSendHook.c
// CommercialVehiclePlatform
//
// Created by JunhuaShao on 2019/3/17.
// Copyright © 2019 Junhuashao. All rights reserved.
//
/ * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * objc_msgSend Hook source code: https://github.com/ming1016/GCDFetchFeed thread-local storage: https://blog.csdn.net/vevenlcf/article/details/77882985 ******************************************************** */
#import "GHObjcMsgSendHook.h"
// This Hook only supports arm64 architecture
#ifdef __aarch64__
//#import <objc/runtime.h>
//#import <sys/time.h>
//
//#import <pthread.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <stddef.h>
#include <stdint.h>
#include <sys/types.h>
#include <sys/stat.h>
#include <sys/time.h>
#include <objc/message.h>
#include <objc/runtime.h>
#include <dispatch/dispatch.h>
#include <pthread.h>
#import "fishhook.h"
/** Configuration */
// Call the log tool switch
static bool _call_record_enabled = true;
// Set the minimum time threshold, expressed in microseconds
static uint64_t _min_time_cost = 1000; //us
// Set the maximum call depth threshold
static int _max_call_depth = 3;
/** iVar */
// Replace the original objc_msgSend
__unused static id (*orig_objc_msgSend)(id, SEL, ...);
// The key used to associate with the calling thread, initialized when the tool is started
static pthread_key_t _thread_key;
// The formatting time record
static GHCallRecord *_ghCallRecords;
// The formatted time record occupies space
static int _ghRecordAlloc;
// Number of time consuming records to format
static int _ghRecordNum;
/** hook function call record */
typedef struct {
// Use object_getClass to get a Class
id self;
// NSStringFromClass gives you the class name
Class cls;
// we can get the method name from NSStringFromSelector
SEL cmd;
// us call time (microseconds)
uint64_t time;
// Link register is used to specify the address of the next function
uintptr_t lr;
} thread_call_record;
/** Stack of function calls in thread */
typedef struct {
// The current call record of the hook function
thread_call_record *stack;
// The current storage space size
int allocated_length;
// The current record number
int index;
// Whether it is on the main thread
bool is_main_thread;
} thread_call_stack;
/** get stack of function calls in thread @return stack of function calls */
static inline thread_call_stack * get_thread_call_stack(a) {
/** int pthread_setspecific (pthread_key_t key, const void *value) is used to store a copy of value in a data structure and associate it with the calling thread and key. The parameter value usually points to a block of memory allocated by the caller. When the thread terminates, this pointer is passed as an argument to the destructor function associated with the key. void *pthread_getspecific (pthread_key_t key); When a thread is created, all thread-local storage variables are initialized to NULL. Pthread_getspecific () must be called the first time such a variable is used to verify that it is associated with the corresponding key. If it is not, you can allocate a block of memory and store a pointer to it through pthread_setspecific(). * /
thread_call_stack *cs = (thread_call_stack *)pthread_getspecific(_thread_key);
if (cs == NULL) {
// Make room for the function call stack
cs = (thread_call_stack *)malloc(sizeof(thread_call_stack));
// Set the size of the hook function to 128 records
cs->stack = (thread_call_record *)calloc(128.sizeof(thread_call_record));
// The initial current storage space is 64 records size
cs->allocated_length = 64;
// Initialize the sequence number
cs->index = - 1;
/** int pthread_main_np(void); If on the main thread, a non-zero result */ is returned
cs->is_main_thread = pthread_main_np();
// Associate the call stack with the thread
pthread_setspecific(_thread_key, cs);
}
return cs;
}
static void release_thread_call_stack(void *ptr) {
thread_call_stack *cs = (thread_call_stack *)ptr;
if(! cs)return;
// Release the call stack
if (cs->stack) free(cs->stack);
free(cs);
}
static inline void push_call_record(id _self, Class _cls, SEL _cmd, uintptr_t lr) {
// Get the call stack associated with the current thread
thread_call_stack *cs = get_thread_call_stack();
if (cs) {
// Add one
int nextIndex = (++cs->index);
// If the serial number exceeds the current storage space size
if (nextIndex >= cs->allocated_length) {
// Increase the current storage space by 64 records
cs->allocated_length += 64;
// Reallocate the call stack space for the pointer to the current storage space.
cs->stack = (thread_call_record *)realloc(cs->stack, cs->allocated_length * sizeof(thread_call_record));
}
// Obtain the memory address corresponding to the current sequence number and create a new record
thread_call_record *newRecord = &cs->stack[nextIndex];
// Record the calling object
newRecord->self = _self;
// Record the call to class
newRecord->cls = _cls;
// Record the calling function
newRecord->cmd = _cmd;
// Record the address of the calling function
newRecord->lr = lr;
/** The current thread is the main thread and the call logging function is enabled. The goal is to count only the main thread time */
if (cs->is_main_thread && _call_record_enabled) {
/** * __darwin_time_t tv_sec; //seconds __darwin_suseconds_t tv_usec; //and microseconds tv_sec indicates the number of seconds from Epoch to struct timeval. Tv_usec indicates the number of microseconds. I'm using high precision here, so I add the two, and I take the nearest 100 seconds */
struct timeval now;
// Get the current time
gettimeofday(&now, NULL);
newRecord->time = (now.tv_sec % 100) * 1000000+ now.tv_usec; }}}static inline uintptr_t pop_call_record(a) {
// Get the current call stack
thread_call_stack *cs = get_thread_call_stack();
// Number of the current call record
int curIndex = cs->index;
// The parent function call record number
int nextIndex = cs->index--;
// Get the parent function call record, off the stack
thread_call_record *pRecord = &cs->stack[nextIndex];
// This is also the main thread, and the recording function is enabled
if (cs->is_main_thread && _call_record_enabled) {
// Get the current time
struct timeval now;
gettimeofday(&now, NULL);
uint64_t time = (now.tv_sec % 100) * 1000000 + now.tv_usec;
// If the current time is less than the last time recorded, then carry, add 100 seconds here
if (time < pRecord->time) {
time += 100 * 1000000;
}
// Obtain time
uint64_t cost = time - pRecord->time;
// If the time is greater than the time threshold and the call depth is less than the maximum depth, the call is recorded. Here we call sequence number, which is depth
if (cost > _min_time_cost && cs->index < _max_call_depth) {
// Initialize the formatting time record
if(! _ghCallRecords) {// Create 1024 records
_ghRecordAlloc = 1024;
_ghCallRecords = (GHCallRecord *)malloc(sizeof(GHCallRecord)*_ghRecordAlloc);
}
// Add one to the number of records
_ghRecordNum++;
// If the number of current records is greater than the size of the memory, allocate memory for the pointer again, and the size is 1024 more than the original
if (_ghRecordNum >= _ghRecordAlloc) {
_ghRecordAlloc += 1024;
_ghCallRecords = (GHCallRecord *)realloc(_ghCallRecords, sizeof(GHCallRecord) * _ghRecordAlloc);
}
// Get the address corresponding to the current page number and create a formatting record.
GHCallRecord *log = &_ghCallRecords[_ghRecordNum - 1];
// Save the call to class
log->cls = pRecord->cls;
// Save the call depth
log->depth = curIndex;
// Save the calling method
log->sel = pRecord->cmd;
// Save time
log->time = cost; }}// Return the address of the next function called
return pRecord->lr;
}
void hook_before_objc_msgSend(id self, SEL _cmd, uintptr_t lr)
{
// The function call record is pushed to the stack
push_call_record(self, object_getClass(self), _cmd, lr);
}
uintptr_t hook_after_objc_msgSend() {
// The function call is logged out of the stack
return pop_call_record();
}
// replacement objc_msgSend (arm64)
// https://blog.nelhage.com/2010/10/amd64-and-va_arg/
// http://infocenter.arm.com/help/topic/com.arm.doc.ihi0055b/IHI0055B_aapcs64.pdf
// https://developer.apple.com/library/ios/documentation/Xcode/Conceptual/iPhoneOSABIReference/Articles/ARM64FunctionCallin gConventions.html
#define call(b, value) \
__asm volatile ("stp x8, x9, [sp, #-16]! \n"); \
__asm volatile ("mov x12, %0\n": :"r"(value)); \
__asm volatile ("ldp x8, x9, [sp], #16\n"); \
__asm volatile (#b " x12\n");
#define save() \
__asm volatile ( \
"stp x8, x9, [sp, #-16]! \n" \
"stp x6, x7, [sp, #-16]! \n" \
"stp x4, x5, [sp, #-16]! \n" \
"stp x2, x3, [sp, #-16]! \n" \
"stp x0, x1, [sp, #-16]! \n");
#define load() \
__asm volatile ( \
"ldp x0, x1, [sp], #16\n" \
"ldp x2, x3, [sp], #16\n" \
"ldp x4, x5, [sp], #16\n" \
"ldp x6, x7, [sp], #16\n" \
"ldp x8, x9, [sp], #16\n" );
#define link(b, value) \
__asm volatile ("stp x8, lr, [sp, #-16]! \n"); \
__asm volatile ("sub sp, sp, #16\n"); \
call(b, value); \
__asm volatile ("add sp, sp, #16\n"); \
__asm volatile ("ldp x8, lr, [sp], #16\n");
#define ret() __asm volatile ("ret\n");
__attribute__((__naked__))
static void hook_Objc_msgSend(a) {
// Save parameters.
save();
__asm volatile ("mov x2, lr\n");
__asm volatile ("mov x3, x4\n");
// Call our before_objc_msgSend.
call(blr, &hook_before_objc_msgSend);
// Load parameters.
load();
// Call through to the original objc_msgSend.
call(blr, orig_objc_msgSend);
// Save original objc_msgSend return value.
save();
// Call our after_objc_msgSend.
call(blr, &hook_after_objc_msgSend);
// restore lr
__asm volatile ("mov lr, x0\n");
// Load original objc_msgSend return value.
load();
// return
ret();
}
void ghAnalyerStart(a) {
_call_record_enabled = true;
static dispatch_once_t onceToken;
dispatch_once(&onceToken, ^{
pthread_key_create(&_thread_key, &release_thread_call_stack);
rebind_symbols((struct rebinding[6]){
{"objc_msgSend", (void *)hook_Objc_msgSend, (void **)&orig_objc_msgSend},
}, 1);
});
}
void ghAnalyerStop(a) {
_call_record_enabled = false;
}
void ghSetMinTimeCallCost(uint64_t us) {
_min_time_cost = us;
}
void ghSetMaxCallDepth(int depth) {
_max_call_depth = depth;
}
GHCallRecord *ghGetCallRecords(int *num) {
if (num) {
*num = _ghRecordNum;
}
return _ghCallRecords;
}
void ghClearCallRecords(a) {
if (_ghCallRecords) {
free(_ghCallRecords);
_ghCallRecords = NULL;
}
_ghRecordNum = 0;
}
#else
void ghAnalyerStart(a) {}
void ghAnalyerStop(a) {}
void ghSetMinTimeCallCost(uint64_t us) {}void ghSetMaxCallDepth(int depth) {}GHCallRecord *ghGetCallRecords(int *num) {
if (num) {
*num = 0;
}
return NULL;
}
void ghClearCallRecords(a) {}
#endif
Macro MethodTableLookup // Push frame SignLR STP FP, lr, [sp, #-16]! mov fp, sp // save parameter registers: x0.. x8, q0.. q7 sub sp, sp, #(10*8 + 8*16) stp q0, q1, [sp, #(0*16)] stp q2, q3, [sp, #(2*16)] stp q4, q5, [sp, #(4*16)] stp q6, q7, [sp, #(6*16)] stp x0, x1, [sp, #(8*16+0*8)] stp x2, x3, [sp, #(8*16+2*8)] stp x4, x5, [sp, #(8*16+4*8)] stp x6, x7, [sp, #(8*16+6*8)] str x8, [sp, #(8*16+8*8)] // receiver and selector already in x0 and x1 mov x2, x16 bl __class_lookupMethodAndLoadCache3 // IMP in x0 mov x17, x0 // restore registers and return ldp q0, q1, [sp, #(0*16)] ldp q2, q3, [sp, #(2*16)] ldp q4, q5, [sp, #(4*16)] ldp q6, q7, [sp, #(6*16)] ldp x0, x1, [sp, #(8*16+0*8)] ldp x2, x3, [sp, #(8*16+2*8)] ldp x4, x5, [sp, #(8*16+4*8)] ldp x6, x7, [sp, #(8*16+6*8)] ldr x8, [sp, #(8*16+8*8)] mov sp, fp ldp fp, lr, [sp], #16 AuthenticateLR .endmacro */
Copy the code