As you can see from the previous section, the cache insert process is sent from objc_msgSend. Let’s examine the objc_msgSend process. There is some code that is not in the flow in the previous analysis, so you can look directly at the summary.

Compile analysis objc_msgSend

The demo code

        Person *p  = [Person alloc];
        [p sayHappy];
Copy the code

[p sayHappy]; Will call objc_msgSend this method, which is a message search process, the essence of the message search is through sel to find imp such a process. Objc-msg-arm64.s objC-msG-arm64.s objC-msG-arm64.s

        / / the entry
	ENTRY _objc_msgSend
	UNWIND _objc_msgSend, NoFrame
        P0 is p, the receiver of the message
        // see if p0 is null
	cmp	p0, #0			// nil check and tagged pointer check
        // If you are tagged_pointers, go LNilOrTagged
	b.le	LNilOrTagged		// (MSB tagged pointer looks negative)
#else   / / otherwise LReturnZero
	b.eq	LReturnZero
       //x0 is the first address of the object, isa
	ldr	p13, [x0]// p13 = isa
        / / to find the class
	GetClassFromIsa_p16 p13, 1, x0	// p16 = class 
	// calls imp or objc_msgSend_uncached
	CacheLookup NORMAL, _objc_msgSend, __objc_msgSend_uncached
Copy the code

Let’s look at GetClassFromIsa_p16

Needs_auth =1, auth_address=isa
/ / go ExtractISA
.macro GetClassFromIsa_p16 src, needs_auth, auth_address /* note: auth_address is not required if ! needs_auth */

	// Indexed isa
	mov	p16, \src			// optimistically set dst = src
	tbz	p16, #ISA_INDEX_IS_NPI_BIT, 1f	// done if not non-pointer isa
	// isa in p16 is indexed
	adrp	x10, _objc_indexed_classes@PAGE
	add	x10, x10, _objc_indexed_classes@PAGEOFF
	ubfx	p16, p16, #ISA_INDEX_SHIFT, #ISA_INDEX_BITS  // extract index
	ldr	p16, [x10, p16, UXTP #PTRSHIFT]	// load class from array

#elif __LP64__
.if \needs_auth == 0 // _cache_getImp takes an authed class already
	mov	p16, \src
	// 64-bit packed isa
	ExtractISA p16, \src, \auth_address
	// 32-bit raw isa
	mov	p16, \src



// src=isa auth_address=isa 
//isa & #ISA_MASK = $0
.macro ExtractISA
	and    $0, $1, #ISA_MASK

Copy the code

We’ve seen that GetClassFromIsa_p16 is a process to find a class. Why class? We know from our previous analysis that cache_t is only present in a class.

Cached NORMAL, _objc_msgSend, and __objc_msgSend_uncached

//CacheLookup NORMAL, _objc_msgSendSuper2, __objc_msgSend_uncached
//mode = NORMAL
//Function = _objc_msgSendSuper2
//MissLabelDynamic = __objc_msgSend_uncached
.macro CacheLookup Mode, Function, MissLabelDynamic, MissLabelConstant
mov	x15, x16			// stash the original isa
	// p1 = SEL, p16 = isa
	ldr	p10, [x16, #CACHE]				// p10 = mask|buckets
	lsr	p11, p10, #48			// p11 = mask
	and	p10, p10, #0xffffffffffff	// p10 = buckets
	and	w12, w1, w11			// x12 = _cmd & mask
	ldr	p11, [x16, #CACHE]			// p11 = mask|buckets
#if __has_feature(ptrauth_calls)
	tbnz	p11, #0, LLookupPreopt\Function
	and	p10, p11, #0x0000ffffffffffff	// p10 = buckets
#else / / go here
//-(cache) &0x0000FFFFFFFFFFFF
	and	p10, p11, #0x0000fffffffffffe	// p10 = buckets
	tbnz	p11, #0, LLookupPreopt\Function
      // Go here and find index
	eor	p12, p1, p1, LSR #7
	and	p12, p12, p11, LSR #48		// x12 = (_cmd ^ (_cmd >> 7)) & mask
	and	p10, p11, #0x0000ffffffffffff	// p10 = buckets
	and	p12, p1, p11, LSR #48		// x12 = _cmd & mask
	ldr	p11, [x16, #CACHE]				// p11 = mask|buckets
	and	p10, p11, #~0xf			// p10 = buckets
	and	p11, p11, #0xf			// p11 = maskShift
	mov	p12, #0xffff
	lsr	p11, p12, p11			// p11 = mask = 0xffff >> p11
	and	p12, p1, p11			// x12 = _cmd & mask
#error Unsupported cache mask storage for ARM64.
	add	p13, p10, p12, LSL #(1+PTRSHIFT)
						// p13 = buckets + ((_cmd & mask) << (1+PTRSHIFT))

						// do {
1:	ldp	p17, p9, [x13], #-BUCKET_SIZE	// {imp, sel} = *bucket--
	cmp	p9, p1				// if (sel ! = _cmd) {	3f				// scan more
						// } else {
2:	CacheHit \Mode				// hit: call or return imp
						/ /}
3:	cbz	p9, \MissLabelDynamic		// if (sel == 0) goto Miss;
	cmp	p13, p10			// } while (bucket >= buckets)
	b.hs	1b
Copy the code

#define CACHE_MASK_STORAGE CACHE_MASK_STORAGE_HIGH_16 下 载 arm64 下 载 LDR P11, [x16, #CACHE]

#define CACHE            (2 * __SIZEOF_POINTER__)
#define CLASS            __SIZEOF_POINTER__
Copy the code

LDR p11, [x16, #CACHE] LDR p11, [x16, 16] This is a type that finds cache_t; p11 stores cache_t

Copy the code


and	p10, p11, #0x0000fffffffffffe	// p10 = buckets
tbnz	p11, #0, LLookupPreopt\Function
Copy the code

❓ p10 = buckets (cache) & 0x0000FFffFFFFFFFF (16 bits high); If the position 0 of BUCKETS P11 is not 0, LLookupPreopt is used

//(_cmd & mask) select * from bucket where index << 4
P13 is the subscript p10 is the first address of buckets array. The subscript * 1<<4(that is, 16) is used to obtain the offset of the actual memory

add	p13, p10, p12, LSL #(1+PTRSHIFT)
						// p13 = buckets + ((_cmd & mask) << (1+PTRSHIFT))

// select sel from p17 and sel from p9.
1:	ldp	p17, p9, [x13], #-BUCKET_SIZE	// {imp, sel} = *bucket--
	// Compare SEL and P1
        cmp	p9, p1				// if (sel ! = _cmd) {
	// If not, i.e., if not found, jump to 3f	3f				// scan more
        // cache hit //} else {
2:	CacheHit \Mode				// hit: call or return imp
						/ /}
3:	cbz	p9, \MissLabelDynamic		// if (sel == 0) gotoMiss;
	cmp	p13, p10                        // if (bucket >= buckets) you can jump out of the while loop and go to the bottom
        // Skip to the first step and continue the comparison
	b.hs	1b
	add	p13, p10, w11, UXTW #(1+PTRSHIFT)
						// p13 = buckets + (mask << 1+PTRSHIFT)
        P13 points to the last bucket
	add	p13, p10, p11, LSR #(48 - (1+PTRSHIFT))
	// p13 = buckets + (mask << 1+PTRSHIFT)
						// see comment about maskZeroBits
	add	p13, p10, p11, LSL #(1+PTRSHIFT)
						// p13 = buckets + (mask << 1+PTRSHIFT)
         #error Unsupported cache mask storage for ARM64.
	add	p12, p10, p12, LSL #(1+PTRSHIFT)
						// p12 = first probed bucket

// start a loop from the last to the first first_probed.
4:	ldp	p17, p9, [x13], #-BUCKET_SIZE	// {imp, sel} = *bucket--
	cmp	p9, p1				// if (sel == _cmd)
	b.eq	2b				// goto hit
	cmp	p9, #0				// } while (sel ! = 0 &&
	ccmp	p13, p12, #0, ne		// bucket > first_probed)
	b.hi	4b 
	b	\MissLabelDynamic 
Copy the code

Cached if not, we get __objc_msgSend_uncached, MissLabelDynamic, which is used when we call CacheLoopup. We get __objc_msgSend_uncached

       STATIC_ENTRY __objc_msgSend_uncached
	UNWIND __objc_msgSend_uncached, FrameWithNoSaves

	// Out-of-band p15 is the class to search
	TailCallFunctionPointer x17

	END_ENTRY __objc_msgSend_uncached
Copy the code

Look at the TailCallFunctionPointer

.macro TailCallFunctionPointer
	// $0 = function pointer value
	br	$0
Copy the code

It’s just a return, so the focus should be on MethodTableLookup

.macro MethodTableLookup

	// lookUpImpOrForward(obj, sel, cls, LOOKUP_INITIALIZE | LOOKUP_RESOLVER)
	// receiver and selector already in x0 and x1
	mov	x2, x16
	mov	x3, #3
	bl	_lookUpImpOrForward

	// IMP in x0
	mov	x17, x0


Copy the code

We see the call _lookUpImpOrForward and global search, already is not in the assembly, the next process search lookUpImpOrForward, sees in objc runtime – new. Mm


The whole above is quite messy, we only pick out the useful ones for a summary:

ENTRY _objc_msgSend
If the receiver is not empty
// Get the class corresponding to the receiver
ldr	p13, [x0]		// p13 = isa
GetClassFromIsa_p16 p13, 1, x0	// p16 = class
// Start cache lookup
CacheLookup NORMAL, _objc_msgSend, __objc_msgSend_uncached
// Pan 16 to find cache
ldr	p11, [x16, #CACHE]			// p11 = mask|buckets
// Buckets are the last 48 digits in the cache.
and	p10, p11, #0x0000fffffffffffe	// p10 = buckets
// Select index ();
eor	p12, p1, p1, LSR #7
and	p12, p12, p11, LSR #48		// x12 = (_cmd ^ (_cmd >> 7)) & mask
// Get the corresponding bucket
add	p13, p10, p12, LSL #(1+PTRSHIFT)
						// p13 = buckets + ((_cmd & mask) << (1+PTRSHIFT))
 // The index of the index is the same as the index of the index of the index.
1:	ldp	p17, p9, [x13], #-BUCKET_SIZE	// {imp, sel} = *bucket--
	cmp	p9, p1				// if (sel ! = _cmd) {	3f				// scan more
						// } else {
2:	CacheHit \Mode				// hit: call or return imp
						/ /}
3:	cbz	p9, \MissLabelDynamic		// if (sel == 0) goto Miss;
	cmp	p13, p10			// } while (bucket >= buckets)
	b.hs	1b
  // If not, move p13 to the last bucket
  add	p13, p10, p11, LSR #(48 - (1+PTRSHIFT))
						// p13 = buckets + (mask << 1+PTRSHIFT)

add	p12, p10, p12, LSL #(1+PTRSHIFT)
						// p12 = first probed bucket
// From the last one forward -- traverse to firstProbed
 						// do {
4:	ldp	p17, p9, [x13], #-BUCKET_SIZE	// {imp, sel} = *bucket--
	cmp	p9, p1				// if (sel == _cmd)
	b.eq	2b				// goto hit
	cmp	p9, #0				// } while (sel ! = 0 &&
	ccmp	p13, p12, #0, ne		// bucket > first_probed)
	b.hi	4b
__objc_msgSend_uncached (MethodTableLookup)
	b	\MissLabelDynamic  
 / / __objc_msgSend_uncached process
 / / go MethodTableLookup
 // Enter the lookUpImpOrForward method
 bl	_lookUpImpOrForward     
Copy the code


1. Looking for a bucket
// Select buckets according to the last 48 digits in the cache
and	p10, p11, #0x0000fffffffffffe	// p10 = buckets
Copy the code

_bucketsAndMaybeMask is a bucketS_t pointer in the low 48 bits & # 0x0000FFFFFFfffe

    // _bucketsAndMaybeMask is a buckets_t pointer in the low 48 bits
    // _maybeMask is unused, the mask is stored in the top 16 bits.

    // How much the mask is shifted by.
    static constexpr uintptr_t maskShift = 48;

    // Additional bits after the mask which must be zero. msgSend
    // takes advantage of these additional bits to construct the value
    // `mask << 4` from `_maskAndBuckets` in a single instruction.
    static constexpr uintptr_t maskZeroBits = 4;

    // The largest mask value we can store.
    static constexpr uintptr_t maxMask = ((uintptr_t)1< < (64 - maskShift)) - 1;
    // The mask applied to `_maskAndBuckets` to retrieve the buckets pointer.
    static constexpr uintptr_t bucketsMask = ((uintptr_t)1 << (maskShift - maskZeroBits)) - 1;
    // Ensure we have enough bits for the buckets pointer.
    static_assert(bucketsMask >= MACH_VM_MAX_ADDRESS,
            "Bucket field doesn't have enough bits for arbitrary pointers.");
Copy the code
2. To find the index
/ / get the index
eor	p12, p1, p1, LSR #7
and	p12, p12, p11, LSR #48		// x12 = (_cmd ^ (_cmd >> 7)) & mask
Copy the code

Mask_t begin = cache_hash(sel, m); The algorithm to find index

static inline mask_t cache_hash(SEL sel, mask_t mask) 
    uintptr_t value = (uintptr_t)sel;
    value ^= value >> 7;
    return (mask_t)(value & mask);
Copy the code
3. Walk through the process
   bucket_t *b = buckets();
    mask_t m = capacity - 1; / / 4-1 = 3
    mask_t begin = cache_hash(sel, m);
    mask_t i = begin;
 do {
        if (fastpath(b[i].sel() == 0)) {
            b[i].set<Atomic, Encoded>(b, sel, imp, cls());
        if (b[i].sel() == sel) {
            // The entry was added to the cache by some other thread
            // before we grabbed the cacheUpdateLock.
            return; }}while(fastpath((i = cache_next(i, m)) ! = begin));Copy the code

Take a look at the source code for cache_next

#elif __arm64__
static inline mask_t cache_next(mask_t i, mask_t mask) {
    return i ? i-1 : mask;
Copy the code

Eg: Add mask = 7, I =3 the first time, after the first call to cache_next, I = 2, then –, when I =0, I will be assigned to 7, and then move on through — until I ==3