preface
The source code of this paper is libdispatch-1173.40.5, which mainly analyzes the specific implementation principle of commonly used DISPATCH API. The dispatch_object_s data structure is used for subsequent analysis
struct dispatch_object_s {
const struct dispatch_object_vtable_s *do_vtable.int volatile do_ref_cnt// Reference countint volatile do_xref_cnt, // External reference count, the object memory is freed only when both are 0struct dispatch_object_s *volatile do_next; // Next do
struct dispatch_queue_s *do_targetq; // Target queue
void *do_ctxt; / / context
void *do_finalizer; // Call the function when it is destroyed
unsigned int do_suspend_cnt; //suspend suspends the count
};
Copy the code
Where do_vtable contains the object type and function pointer;
dispatch_object_t
“Dispatch_object_t” is a union of “dispatch_object_t” for all data structures in the union.
typedef union {
struct _os_object_s* _os_obj;
struct dispatch_object_s* _do; / / the object structure
struct dispatch_continuation_s* _dc; // The dispatch_aync block is encapsulated into this data structure
struct dispatch_queue_s* _dq; / / the queue
struct dispatch_queue_attr_s* _dqa; // Queue attributes
struct dispatch_group_s* _dg; // Group operation
struct dispatch_source_s* _ds; / / the source structure
struct dispatch_mach_s* _dm;
struct dispatch_mach_msg_s* _dmsg;
struct dispatch_timer_aggregate_s* _dta;
struct dispatch_source_attr_s* _dsa; / / the source attribute
struct dispatch_semaphore_s* _dsema; / / semaphore
struct dispatch_data_s* _ddata;
struct dispatch_io_s* _dchannel;
struct dispatch_operation_s* _doperation;
struct dispatch_disk_s* _ddisk;
} dispatch_object_t __attribute__((__transparent_union__));
Copy the code
dispatch_continuation_s
struct dispatch_continuation_s {
struct dispatch_object_s *volatile do_next; // Next task
dispatch_function_t dc_func; // The method of execution
void *dc_ctxt; // Method context
void *dc_data; // Related data
void *dc_other // Other information
}
Copy the code
Dispatch_continuation_s is the structure of the task in dispatch_continuation_s. Incoming blocks are queued as this structure object.
dispatch_queue_s
struct dispatch_queue_s {
struct dispatch_queue_s *do_targetq; // Target queue, which eventually points to a system's default queue
struct dispatch_object_s *volatile dq_items_head; // Queue header
struct dispatch_object_s *volatile dq_items_tail; // The end of the queue
unsigned long dq_serialnum; // Queue number
const char *dq_label; / / the queue name
dispatch_priority_t dq_priority; / / priority
dispatch_priority_t volatile dq_override; // Whether it is overwritten
uint16_t dq_width; // The number of tasks that can be executed concurrently
dispatch_queue_t dq_specific_q; // Special queue
uint32_t dq_side_suspend_cnt; // The number of paused tasks
const struct queue_vtable_s *do_vtable { // Some function Pointers to the queue
unsigned long const do_type; // Queue types, such as DISPATCH_QUEUE_CONCURRENT_TYPE, DISPATCH_QUEUE_SERIAL_TYPE, DISPATCH_QUEUE_GLOBAL_ROOT_TYPE...
const char *const do_kind; // Queue type, for example: "serial-queue", "concurrent-queue", "global-queue", "main-queue", "runloop-queue"" Mgr-queue "...
void (*const do_dispose)(/*params*/); // Destroy the queue
void (*const do_suspend)(/*params*/); // Pause the queue
void (*const do_resume)(/*params*/); // Restore the queue
void (*const do_invoke)(/*params*/); // Start processing the queue
void (*const do_wakeup)(/*params*/); // Wake up the queue
void (*const do_set_targetq)(/*params*/); // Set target queue
};
}
Copy the code
Dispatch_queue_s is the structure of the queue. In its DO_vtable, there are many Pointers to functions corresponding to the queue operation methods. There should be some macros to call these methods in the queue. For example, the do_dispose method should have a macro dx_Dispose:
#define dx_dispose(queue) &(queue)->do_vtable->_os_obj_vtable->do_dispose(queue)
Copy the code
Understand the relationship between queues and threads
dispatch_once
The source code is as follows:
void
dispatch_once_f(dispatch_once_t *val, void *ctxt, dispatch_function_t func)
{
dispatch_once_gate_t l = (dispatch_once_gate_t)val;
#if! DISPATCH_ONCE_INLINE_FASTPATH || DISPATCH_ONCE_USE_QUIESCENT_COUNTER
// atomically get l->dgo_once
uintptr_t v = os_atomic_load(&l->dgo_once, acquire);
// Check if the above value is DLOCK_ONCE_DONE(most likely yes, indicating that func has been assigned), if yes, return directly
if (likely(v == DLOCK_ONCE_DONE)) {
return;
}
#if DISPATCH_ONCE_USE_QUIESCENT_COUNTER
// Different decision forms
if (likely(DISPATCH_ONCE_IS_GEN(v))) {
return _dispatch_once_mark_done_if_quiesced(l, v);
}
#endif
#endif
// Atomicity determines whether an assignment has been made
if (_dispatch_once_gate_tryenter(l)) {
return _dispatch_once_callout(l, ctxt, func);
}
// The thread blocks waiting for dispatch_function_t func to complete
return _dispatch_once_wait(l);
}
static inline bool
_dispatch_once_gate_tryenter(dispatch_once_gate_t l)
{
// if l->dgo_once equals DLOCK_ONCE_UNLOCKED,
// If so, assign the thread ID
return os_atomic_cmpxchg(&l->dgo_once, DLOCK_ONCE_UNLOCKED,
(uintptr_t)_dispatch_lock_value_for_self(), relaxed);
}
static void
_dispatch_once_callout(dispatch_once_gate_t l, void *ctxt,
dispatch_function_t func)
{
// Call func in block
_dispatch_client_callout(ctxt, func);
// Broadcast to wake up all waiting threads
_dispatch_once_gate_broadcast(l);
}
Copy the code
The general process is as follows (copied from others’ drawings) :
GCD dispatch_once
queue
The main apis used by queue are as follows:
dispatch_queue_main_t
dispatch_get_main_queue(void);
dispatch_queue_global_t
dispatch_get_global_queue(long identifier, unsigned long flags);
dispatch_queue_t
dispatch_queue_create(const char *_Nullable label, dispatch_queue_attr_t _Nullable attr);
Copy the code
dispatch_queue_create
The pseudocode is as follows:
dispatch_queue_t
dispatch_queue_create(const char *label, dispatch_queue_attr_t attr)
{
return _dispatch_lane_create_with_target(label, attr,
DISPATCH_TARGET_QUEUE_DEFAULT, true);
}
//_dispatch_lane_create_with_target
static dispatch_queue_t
_dispatch_lane_create_with_target(const char *label, dispatch_queue_attr_t dqa,
dispatch_queue_t tq, bool legacy)
{
//Step 1: Normalize arguments (qos, overcommit, tq)
dispatch_queue_attr_info_t dqai = _dispatch_queue_attr_to_info(dqa);// Returns an empty attribute object for serial queues (DQA is NULL), and a default initialization value for parallel queues
Overcommit whether new threads need to be created. Tq target queue is DISPATCH_TARGET_QUEUE_DEFAULT by default
if(! tq) { tq = _dispatch_root_queues[2 * (qos - 1) + overcommit];// If the destination queue is not specified, it is obtained from the root queue array
}
Initialize the queue Initialize the queue Initialize the queue
// Apply for memory space
dispatch_lane_t dq = _dispatch_object_alloc(vtable,
sizeof(struct dispatch_lane_s));
// Initialize the serial number from 17, the rest is as follows:
// skip zero
// 1-main_q // main queue
// 2 -mgr_q // Manage the queue
// 3 -mgr_root_q // The target queue of the management queue
/ / 4,5,6,7,8,9,10,11,12,13,14,15 - global the queues, global queue has set up a file in the root queue array initialization, respectively according to different qos specified priority queue
// 17 - workloop_fallback_q
// we use 'xadd' on Intel, so the initial value == next assigned
dq->dq_serialnum = os_atomic_inc_orig(&_dispatch_queue_serial_numbers, relaxed);
// Number of concurrent queues (1 for serial queues and DISPATCH_QUEUE_WIDTH_MAX for parallel queues)
dq->dq_width = dqai.dqai_concurrent ? DISPATCH_QUEUE_WIDTH_MAX : 1;
dq->dq_label = label;// Specify a name
dq->dq_priority = _dispatch_priority_make((dispatch_qos_t)dqai.dqai_qos,
dqai.dqai_relpri);// Specify the priority
dq->do_targetq = tq;// Specify the destination queue
return dq._dq;
}
// Allocated global queue, specifying queues of different qos classes
struct dispatch_queue_global_s _dispatch_root_queues[] = {
// Initialize the property. _DISPATCH_ROOT_QUEUE_ENTRY(MAINTENANCE,0,
.dq_label = "com.apple.root.maintenance-qos",
.dq_serialnum = 4,
),
_DISPATCH_ROOT_QUEUE_ENTRY(MAINTENANCE, DISPATCH_PRIORITY_FLAG_OVERCOMMIT,
.dq_label = "com.apple.root.maintenance-qos.overcommit",
.dq_serialnum = 5,
),
_DISPATCH_ROOT_QUEUE_ENTRY(BACKGROUND, 0,
.dq_label = "com.apple.root.background-qos",
.dq_serialnum = 6,
),
_DISPATCH_ROOT_QUEUE_ENTRY(BACKGROUND, DISPATCH_PRIORITY_FLAG_OVERCOMMIT,
.dq_label = "com.apple.root.background-qos.overcommit",
.dq_serialnum = 7,
),
_DISPATCH_ROOT_QUEUE_ENTRY(UTILITY, 0,
.dq_label = "com.apple.root.utility-qos",
.dq_serialnum = 8,
),
_DISPATCH_ROOT_QUEUE_ENTRY(UTILITY, DISPATCH_PRIORITY_FLAG_OVERCOMMIT,
.dq_label = "com.apple.root.utility-qos.overcommit",
.dq_serialnum = 9,
),
_DISPATCH_ROOT_QUEUE_ENTRY(DEFAULT, DISPATCH_PRIORITY_FLAG_FALLBACK,
.dq_label = "com.apple.root.default-qos",
.dq_serialnum = 10,
),
_DISPATCH_ROOT_QUEUE_ENTRY(DEFAULT,
DISPATCH_PRIORITY_FLAG_FALLBACK | DISPATCH_PRIORITY_FLAG_OVERCOMMIT,
.dq_label = "com.apple.root.default-qos.overcommit",
.dq_serialnum = 11,
),
_DISPATCH_ROOT_QUEUE_ENTRY(USER_INITIATED, 0,
.dq_label = "com.apple.root.user-initiated-qos",
.dq_serialnum = 12,
),
_DISPATCH_ROOT_QUEUE_ENTRY(USER_INITIATED, DISPATCH_PRIORITY_FLAG_OVERCOMMIT,
.dq_label = "com.apple.root.user-initiated-qos.overcommit",
.dq_serialnum = 13,
),
_DISPATCH_ROOT_QUEUE_ENTRY(USER_INTERACTIVE, 0,
.dq_label = "com.apple.root.user-interactive-qos",
.dq_serialnum = 14,
),
_DISPATCH_ROOT_QUEUE_ENTRY(USER_INTERACTIVE, DISPATCH_PRIORITY_FLAG_OVERCOMMIT,
.dq_label = "com.apple.root.user-interactive-qos.overcommit",
.dq_serialnum = 15,),};Copy the code
Dispatch_queue_create creates a queue and initializes its parameters, including its name, priority, number of concurrent requests, sequence number, and destination queue.
dispatch_get_global_queue
// The core function
static inline dispatch_queue_global_t
_dispatch_get_root_queue(dispatch_qos_t qos, bool overcommit)
{
if (unlikely(qos < DISPATCH_QOS_MIN || qos > DISPATCH_QOS_MAX)) {
DISPATCH_CLIENT_CRASH(qos, "Corrupted priority");
}
return &_dispatch_root_queues[2 * (qos - 1) + overcommit];
}
Copy the code
Queues are initialized with _dispatch_root_queues based on qos and overcommit.
dispatch_get_main_queue
// Core code
struct dispatch_queue_static_s _dispatch_main_q = {
DISPATCH_GLOBAL_OBJECT_HEADER(queue_main),
#if! DISPATCH_USE_RESOLVERS
.do_targetq = _dispatch_get_default_queue(true),
#endif
.dq_state = DISPATCH_QUEUE_STATE_INIT_VALUE(1) |
DISPATCH_QUEUE_ROLE_BASE_ANON,
.dq_label = "com.apple.main-thread",
.dq_atomic_flags = DQF_THREAD_BOUND | DQF_WIDTH(1),
.dq_serialnum = 1};Copy the code
The main queue is _dispatch_main_q queue, whose queue name is com.apple.main-thread, width is 1, serial number is 1.
async
void dispatch_async(dispatch_queue_t dq, dispatch_block_t work)
{
// Allocate dispatch_CONTINUATION memory
dispatch_continuation_t dc = _dispatch_continuation_alloc();
uintptr_t dc_flags = DC_FLAG_CONSUME;
dispatch_qos_t qos;
// Initialize the primary copy(work) to avoid the block being destroyed before execution;
/ / specify the dc - > dc_ctxt = work;
//dc->func=_dispatch_call_block_and_release; Used to release a block after it completes execution
qos = _dispatch_continuation_init(dc, dq, work, 0, dc_flags);
//_dispatch_continuation_async core functions are dX_push function Pointers saved in do_vtable
_dispatch_continuation_async(dq, dc, qos, dc->dc_flags);
}
static inline void
_dispatch_continuation_async(dispatch_queue_class_t dqu,
dispatch_continuation_t dc, dispatch_qos_t qos, uintptr_t dc_flags)
{
#if DISPATCH_INTROSPECTION
if(! (dc_flags & DC_FLAG_NO_INTROSPECTION)) { _dispatch_trace_item_push(dqu, dc); }#else
(void)dc_flags;
#endif
return dx_push(dqu._dq, dc, qos);// The core is the dq_push pointer stored in do_vtable
}
Copy the code
The pointer to the dx_push function is specified when init.c is initialized as follows:
//workloop class instance vtable
_dispatch_workloop_push
/ / queue_serial queue_runloop, source and channel, the Mach serial/runloop/source/channel/Mach
_dispatch_lane_push
//queue_concurrent concurrent queue
_dispatch_lane_concurrent_push
//queue_global,queue_pthread_root Global queue and root thread
_dispatch_root_queue_push
//queue_mgr manages queues
_dispatch_mgr_queue_push
/ / queue_main the home team
_dispatch_main_queue_push
Copy the code
The following analyzes common primary and global queues:
void _dispatch_main_queue_push(dispatch_queue_main_t dq, dispatch_object_t dou,
dispatch_qos_t qos)
{
/ / pseudo code
_disaptch_queue_push();//push to main queue
dx_wakeup()// Call the dq_wakeup function in do_vtable to wakeup the queue and execute
}
Copy the code
Call stack for MAC (ios has some differences) :
dx_wakeup
_dispatch_main_queue_wakeup
_dispatch_runloop_queue_wakeup
_dispatch_runloop_queue_poke
_dispatch_runloop_queue_handle_init// Dispatch_once_f dispatch_once_f dispatch_once_f dispatch_once_f dispatch_once_f dispatch_once_f dispatch_once_f dispatch_once_f
mach_port_construct// Build a thread-Mach port
_dispatch_runloop_queue_set_handle
dq->do_ctxt = (void(*)uintptr_t)handle
_dispatch_send_wakeup_runloop_thread// Wake up the runloop through the Mach port built above
Copy the code
The main thread runloop call stack looks like this:
push
mach port
_dispatch_send_wakeup_runloop_thread
mach port
runloop
runloop
libdispatch.dylib
_dispatch_main_queue_callback_4CF
block
Global Queue call stack:
_dispatch_root_queue_push
_dispatch_root_queue_push_override
_dispatch_root_queue_push_inlineOs_mpsc_push_list Atomic push to the queue
_dispatch_root_queue_poke
_dispatch_root_queue_poke_slow
_pthread_workqueue_addthreads// Add the task to the work queue
Copy the code
The work queue call thread stack looks like this:
global quque
push
root queue
pop
Other queue types can be traced through the source code and call stack analysis, as long as you understand the working principle, the source code needs to deal with various situations of the queue logic is more complex;
Sync
Dispatch_sync Join global queue analysis:
dispatch_sync(dispatch_get_global_queue(DISPATCH_QUEUE_PRIORITY_DEFAULT, 0), ^{
NSLog(@"global queue sync fired");
});
Copy the code
The dispatch_sync call stack is as follows:
dispatch_sync
_dispatch_sync_f
_dispatch_sync_f_inline
_dispatch_barrier_sync_f// Serial queue
_dispatch_queue_try_acquire_barrier_sync// Attempts to lock the queue. If the lock fails, the queue is being scheduled
_dispatch_sync_f_slow
_dispatch_sync_recurse// Nested calls with multiple target queues are recursive calls
_dispatch_lane_barrier_sync_invoke_and_complete// If none of the above occurs, the queue triggers the call immediately
_dispatch_sync_f_slow// Concurrent queue
_dispatch_sync_function_invoke
_dispatch_client_callout
__main_block_invoke
Copy the code
Execute tasks synchronously. If a task is being executed or the current queue is scheduled, the system locks the task (semaphore or KEvent) until the task is executed. Otherwise, the system directly executes the task to ensure synchronous execution.
The little knowledge
__builtin_expect
GCC introduced to optimize compiler instructions, avoid instruction jump, improve CPU efficiency;
By introducing pipeline into CPU, CPU efficiency can be improved. More simply, allowing the CPU to pre-fetch the next instruction provides CPU efficiency. As shown below:
It can be seen that CPU flow money can reduce the CPU waiting time for instructions, thus improving the EFFICIENCY of the CPU. If there is a jump instruction, the pre-fetched instruction is useless. When executing the current instruction, the CPU fetches the next instruction of the current instruction from memory. After executing the current instruction, the CPU discovers that instead of executing the next instruction, it is executing the instruction at offset. The CPU can only refetch instructions at offset from memory. Therefore, jump instructions reduce pipeline efficiency, and thus CPU efficiency. In summary, skip statements should be avoided when writing programs. So how do you avoid jump statements? The answer is **builtin_expect. This instruction was introduced by GCC to “allow the programmer to tell the compiler which branch is most likely to be executed.” The command is written as: **builtin_expect(EXP, N). The probability that e to the N is equal to N is high. The __builtin_expect directive is generally used to encapsulate it as likely and Unlikely macros, which are commonly used in kernel programming. These macros are written as follows:
#define likely(x) __builtin_expect(!! (x), 1) //x is likely to be true #define unlikely(x) __builtin_expect(!! (x), 0) //x is probably false Copy the code
Just be clear:
ifLikely (value) // equivalent toif(value) if(unlikely(value)) // Is equivalent toif(value) Copy the code
However, statements following if are more likely to be executed with likely(), and statements following else are more likely to be executed with Unlikely (). In this way, the compiler can reduce the performance penalty of instruction jumps by following the most likely code right behind it during compilation.
Builtin_expect instructions
GCC __builtin_expect
os_atomic_cmpxchg
The p variable is equivalent to the PTR pointer of atomic_t type used to obtain the value of the current memory access restriction rule M, which is used to compare the old value E, and assign a new value V if it is equal.
#define os_atomic_cmpxchg(p, e, v, m) \
({ _os_atomic_basetypeof(p) _r = (e); \
atomic_compare_exchange_strong_explicit(_os_atomic_c11_atomic(p), \
&_r, v, memory_order_##m, memory_order_relaxed); })
typedef enum memory_order {
memory_order_relaxed = __ATOMIC_RELAXED,
memory_order_consume = __ATOMIC_CONSUME,
memory_order_acquire = __ATOMIC_ACQUIRE,
memory_order_release = __ATOMIC_RELEASE,
memory_order_acq_rel = __ATOMIC_ACQ_REL,
memory_order_seq_cst = __ATOMIC_SEQ_CST
} memory_order;
Copy the code
For comparison, see atomic_cmpxchg:
static inline int atomic_cmpxchg(atomic_t *ptr, int old, int new)
Copy the code
Atomic_cmpxchg is used to compare the value of the Atmoic variable and replace it with the new value if it is equal to the value in memory. The PTR argument points to an atomic_t variable; The parameter old represents the value compared to memory; New represents the value of the substitution.
Atomic_cmpxchg source code analysis
Linux atomic_cmpxchg()/Atomic_read()/Atomic_set()/Atomic_add()/Atomic_sub()/atomi
std::memory_order
LLDB debugging
Use breakpoint matching and mirror matching to find function symbols, you can view all link library function symbols and break points, follow up the function call stack;
Breakpoint set -r -n _dispatch_ // Image lookup -r -n _dispatch_ libdispatch.dylib // Specify a matching function to find the libdispatch.dylib dynamic libraryCopy the code
LLDB supports Python script parsing, and you can import Python scripts for advanced debugging.
Refenrence
Dispatch
IOS Multithreading summary
Dig up the libDispatch source code
The threading principle of GCD