Look at the work recently, push inside ah micro signalbgwx7788
struct objc_class : objc_object {
// Class ISA; / / 8
Class superclass; / / 8
cache_t cache; // 16 // formerly cache pointer and vtable
class_data_bits_t bits; // class_rw_t * plus custom RR /alloc flags attribute method protocol //8. }Copy the code
The previous series examined ISA for objc_class, as well as superClass and bits. This article examines cache_t.
The address offset of cache_t is 8+8=16 to hexadecimal =10, +10 on the base address
0x01 – cache_t
What’s stored in it?
Let’s take a look at the cache_t structure type and see what’s stored in it.
truct cache_t {
#if CACHE_MASK_STORAGE == CACHE_MASK_STORAGE_OUTLINED
// explicit_atomic displays atomicity. The purpose is to ensure thread safety when adding or deleting changes
Struct bucket_t * _buckets;
// bucket_t contains sel and IMP, which are stored in different order depending on the schema
// cache_t reads _buckets with buckets()
explicit_atomic<struct bucket_t* > _buckets; / / 8
explicit_atomic<mask_t> _mask; / / 4
#elif CACHE_MASK_STORAGE == CACHE_MASK_STORAGE_HIGH_16
explicit_atomic<uintptr_t> _maskAndBuckets; // Buckets and mask are stored together to save memory
mask_t _mask_unused;
// How much the mask is shifted by.
static constexpr uintptr_t maskShift = 48;
// Additional bits after the mask which must be zero. msgSend
// takes advantage of these additional bits to construct the value
// `mask << 4` from `_maskAndBuckets` in a single instruction.
static constexpr uintptr_t maskZeroBits = 4;
// The largest mask value we can store.
static constexpr uintptr_t maxMask = ((uintptr_t)1< < (64 - maskShift)) - 1;
// The mask applied to `_maskAndBuckets` to retrieve the buckets pointer.
static constexpr uintptr_t bucketsMask = ((uintptr_t)1 << (maskShift - maskZeroBits)) - 1;
// Ensure we have enough bits for the buckets pointer.
static_assert(bucketsMask >= MACH_VM_MAX_ADDRESS, "Bucket field doesn't have enough bits for arbitrary pointers.");
#elif CACHE_MASK_STORAGE == CACHE_MASK_STORAGE_LOW_4
// _maskAndBuckets stores the mask shift in the low 4 bits, and
// the buckets pointer in the remainder of the value. The mask
// shift is the value where (0xffff >> shift) produces the correct
// mask. This is equal to 16 - log2(cache_size).
explicit_atomic<uintptr_t> _maskAndBuckets;
mask_t _mask_unused;
static constexpr uintptr_t maskBits = 4;
static constexpr uintptr_t maskMask = (1 << maskBits) - 1;
static constexpr uintptr_t bucketsMask = ~maskMask;
#else
#error Unknown cache mask storage type.
#endif
#if __LP64__
uint16_t _flags; / / 2
#endif
uint16_t _occupied; / / 2
public:
static bucket_t *emptyBuckets(a);
struct bucket_t *buckets(a);
mask_t mask(a);
mask_t occupied(a);
//
void incrementOccupied(a);
void setBucketsAndMask(struct bucket_t *newBuckets, mask_t newMask);
void initializeToEmpty(a);
unsigned capacity(a);
bool isConstantEmptyCache(a);
bool canBeFreed(a); . }Copy the code
Let’s start with these macros:
CACHE_MASK_STORAGE_OUTLINED
The simulator
ormacOS
CACHE_MASK_STORAGE_HIGH_16
Real machine 64
CACHE_MASK_STORAGE_LOW_4
Non - 64-bit true machine
These macros are platform specific macros
#define CACHE_MASK_STORAGE_OUTLINED 1
#define CACHE_MASK_STORAGE_HIGH_16 2
#define CACHE_MASK_STORAGE_LOW_4 3
// The compiler has compiled the schema selection in advance
struct cache_t {
#if defined(__arm64__) && __LP64__ // True and 64-bit
#define CACHE_MASK_STORAGE CACHE_MASK_STORAGE_HIGH_16
#elifdefined(__arm64__) && ! __LP64__// True and not 64-bit
#define CACHE_MASK_STORAGE CACHE_MASK_STORAGE_LOW_4
#else
#define CACHE_MASK_STORAGE CACHE_MASK_STORAGE_OUTLINED // The rest of the simulator
#endif
Copy the code
bucket_t
Type structure
struct bucket_t {
private:
// IMP-first is better for arm64e ptrauth and no worse for arm64.
// SEL-first is better for armv7* and i386 and x86_64.
#if __arm64__ / / real machine
// Protection is also added here via explicit_atomic
explicit_atomic<uintptr_t> _imp;
explicit_atomic<SEL> _sel;
#else / / the real machine
explicit_atomic<SEL> _sel;
explicit_atomic<uintptr_t> _imp;
#endif
public:
inline SEL sel(a) const { return _sel.load(memory_order::memory_order_relaxed); } / / for sel
/ / for IMP
inline IMP imp(Class cls) const {
uintptr_t imp = _imp.load(memory_order::memory_order_relaxed);
if(! imp)return nil;
#if CACHE_IMP_ENCODING == CACHE_IMP_ENCODING_PTRAUTH
SEL sel = _sel.load(memory_order::memory_order_relaxed);
return (IMP)
ptrauth_auth_and_resign((const void *)imp,
ptrauth_key_process_dependent_code,
modifierForSEL(sel, cls),
ptrauth_key_function_pointer, 0);
#elif CACHE_IMP_ENCODING == CACHE_IMP_ENCODING_ISA_XOR
return (IMP)(imp ^ (uintptr_t)cls);
#elif CACHE_IMP_ENCODING == CACHE_IMP_ENCODING_NONE
return (IMP)imp;
#else
#error Unknown method cache IMP encoding.
#endif}}Copy the code
Cache_t and its members, bucket_t, are cache_t structures. In other words, sel and IMP are cache_t
0 x02 – throughLLDB
validationcache_t
Store the data
Now let’s verify the above analysis against the source environment
@interface Person : NSObject
- (void)sayHello;
- (void)sayCode;
- (void)say1;
- (void)say2;
@end
@implementation
- (void)sayHello
{
NSLog(@"%s", __func__);
}
- (void)sayCode
{
NSLog(@"%s", __func__);
}
- (void)say1
{
NSLog(@"%s", __func__);
}
- (void)say2
{
NSLog(@"%s", __func__);
}
@end
Copy the code
These four methods are invoked to validate the code cache_t through the simple Person class above
Stop breakpoint at the first method to be called, so that the person object’s method cache is empty before any methods are called, and then print the class’s header address in the Debug area. Add 10 to the header address to get the memory address for cache_t
The cache_t address is translated to print its contents, which are empty because no method has been called, such as _occupied = 0
Struct bucket *buckets(); Sel () and IMP (Class) methods of bucket_t to get sel and IMP.
From the above analysis, method caching is performed every time a method is called.
So now we go through a breakpoint and execute the first method
⚠️ is an array obtained by buckets, so you can use [] or the pointer +1 to obtain multiple bucket_t values
So we’ve got the first cached method in the cache, and we’re going to look at MachOView and see if this is the sayHello method that we’re calling?
The method address we see in MachOView is exactly the same as the address we printed out, so make sure it’s the sayHello method we called.
0 x03 – outobjc-781
Source code environment testingcache_t
Store the data
First, open a common project and copy the source code type as follows:
typedef uint32_t mask_t; // x86_64 & arm64 asm are less efficient with 16-bits struct test_bucket_t { SEL _sel; IMP _imp; }; struct test_cache_t { struct test_bucket_t * _buckets; //8 mask_t _mask; // 4 uint16_t _flags; uint16_t _occupied; }; struct test_objc_class { Class ISA; // When copying, you need to pay attention to ⚠️, because in the source code, this member is inherited from the parent Class, get here, there is no parent Class, need to display the write Class superclass; // 8 struct test_cache_t cache; // 16 // formerly cache pointer and vtable }; int main(int argc, const char * argv[]) { @autoreleasepool { Person *p = [Person alloc]; Class pClass = [Person class]; [p say1]; [p say2]; [p say3]; [p say4]; struct test_objc_class *tepClass = (__bridge struct test_objc_class *)(pClass); NSLog(@"%hu - %u",tepClass->cache._occupied,tepClass->cache._mask); for (mask_t i = 0; i<tepClass->cache._mask; i++) { struct test_bucket_t bucket = tepClass->cache._buckets[i]; NSLog(@"%@ - %p",NSStringFromSelector(bucket._sel),bucket._imp); } } return 0; }Copy the code
❤️ interested soldering iron can play. This is more intuitive than playing in LLDB
Calls the print of a method once.
Call the print of the method four times
You can see some problems by printing, right?
- The output of the
2-7
What does it stand for? namelyoccupied
andmask
What is? - Why call
One way to
andThree methods
The printout is different, right? bucket
Why is there a “lost” situation? Such as 4 times method print output onlysay3
andsay4
0x04 –cache_t
A real hit 🧍♀️
With some of these questions in mind, take a look at cache_t’s source code and see if you can get an answer? In the above example, finding only the occupied increment causes the mask to change, and in cache_t only incrementOccupied() increases the occupied increment by one.
void cache_t::incrementOccupied(a)
{
_occupied++;
}
Copy the code
Follow the line again to see where this function is used to find the core function of the day void cache_t::insert
/* Initial cache bucket count. INIT_CACHE_SIZE must be a power of two. */
enum {
INIT_CACHE_SIZE_LOG2 = 2,
INIT_CACHE_SIZE = (1 << INIT_CACHE_SIZE_LOG2),
MAX_CACHE_SIZE_LOG2 = 16,
MAX_CACHE_SIZE = (1 << MAX_CACHE_SIZE_LOG2),
};
ALWAYS_INLINE
void cache_t::insert(Class cls, SEL sel, IMP imp, id receiver)
{
#if CONFIG_USE_CACHE_LOCK
cacheUpdateLock.assertLocked();
#else
runtimeLock.assertLocked();
#endifASSERT(sel ! =0 && cls->isInitialized());
// Use the cache as-is if it is less than 3/4 full
mask_t newOccupied = occupied() + 1;Occupied () = 0, newOccupied = 1
unsigned oldCapacity = capacity(), capacity = oldCapacity;
if (slowpath(isConstantEmptyCache())) { ; // A low probability event: "Occupied ()" is 0
// Cache is read-only. Replace it.
if(! capacity) capacity = INIT_CACHE_SIZE;// Initialize 4 (capacity = 1<<2)
reallocate(oldCapacity, capacity, /* freeOld */false); // Initialize create to open up memory space
}
else if (fastpath(newOccupied + CACHE_END_MARKER <= capacity / 4 * 3)) {
// Cache is less than 3/4 full. Use it as-is.
// Do not expand within the range of less than three quarters
// Do nothing in range
}
else {
// If the number exceeds three quarters, the capacity will be expanded
capacity = capacity ? capacity * 2 : INIT_CACHE_SIZE; 2*4=8
if (capacity > MAX_CACHE_SIZE) { //
capacity = MAX_CACHE_SIZE;
}
// If the cache has been stored before, it has an old cache, but it is full, and starts to reallocate memory
reallocate(oldCapacity, capacity, true);
}
bucket_t *b = buckets(); / / get buckte
mask_t m = capacity - 1; // Mask data
mask_t begin = cache_hash(sel, m); // Cache hash, that is, the table below the hash, through the hash algorithm to calculate sel storage index
mask_t i = begin;
// Scan for the first unused slot and insert there.
// There is guaranteed to be an empty slot because the
// minimum size is 4 and we resized at 3/4 full.
//
do {
if (fastpath(b[i].sel() == 0)) {// Check whether there is a value in the corresponding position of buckets by subscript.
incrementOccupied();
b[i].set<Atomic, Encoded>(sel, imp, cls);
return;
}
if (b[i].sel() == sel) { // If there is a value, check whether it is the same sel
// The entry was added to the cache by some other thread
// before we grabbed the cacheUpdateLock.
return; }}while(fastpath((i = cache_next(i, m)) ! = begin));// If neither of them is satisfied, re-hash the index to obtain a new subscript
cache_t::bad_cache(receiver, (SEL)sel, cls);
}
Copy the code
Void cache_t::insert inserts methods into the cache
The above snippet mostly does this
- First of all get
_occupied
The size of the footprint so far,Cache footprint
- According to this
occupied
Size performs the specified operation - in
bucket
Internal storageimp
andsel
The occupied occupied cache is occupied for occupied exams.
- As long as
object
Generate, callinit
Method also mattersoccupied
The value of the attribute
theget
andset
Method also mattersoccupied
The value of the
According to theoccupied
To determine what to do
- The first time it is executed, the cache is not there yet, so it is created by default
4
a
if (slowpath(isConstantEmptyCache())) { ; // A low probability event: "Occupied ()" is 0
// Cache is read-only. Replace it.
if(! capacity) capacity = INIT_CACHE_SIZE;// Initialize 4 (capacity = 1<<2)
reallocate(oldCapacity, capacity, /* freeOld */false); // Initialize create to open up memory space
}
Copy the code
- If the cache usage is less than three quarters, no processing is done
if (fastpath(newOccupied + CACHE_END_MARKER <= capacity / 4 * 3)) {
// Cache is less than 3/4 full. Use it as-is.
// Do not expand within the range of less than three quarters
// Do nothing in range
}
Copy the code
- If the cache capacity exceeds the threshold, expand the cache capacity and re-create the cache capacity
else {
// If the number exceeds three quarters, the capacity will be expanded
capacity = capacity ? capacity * 2 : INIT_CACHE_SIZE; 2*4=8
if (capacity > MAX_CACHE_SIZE) { //
capacity = MAX_CACHE_SIZE;
}
// If the cache has been stored before, it has an old cache, but it is full, and starts to reallocate memory
reallocate(oldCapacity, capacity, true);
}
Copy the code
ALWAYS_INLINE
void cache_t::reallocate(mask_t oldCapacity, mask_t newCapacity, bool freeOld)
{
bucket_t *oldBuckets = buckets(); // You must have left us at the first time
bucket_t *newBuckets = allocateBuckets(newCapacity); // Create space according to the number of capacity
// Cache's old contents are not propagated.
// This is thought to save cache memory at the cost of extra cache fills.
// fixme re-measure this
ASSERT(newCapacity > 0);
ASSERT((uintptr_t) (mask_t)(newCapacity- 1) == newCapacity- 1);
setBucketsAndMask(newBuckets, newCapacity - 1); // Store buckets and mask-1
if(freeOld) { cache_collect_free(oldBuckets, oldCapacity); }}bucket_t *allocateBuckets(mask_t newCapacity)
{
// Allocate one extra bucket to mark the end of the list.
// This can't overflow mask_t because newCapacity is a power of 2.
bucket_t *newBuckets = (bucket_t *)
calloc(cache_t::bytesForCapacity(newCapacity), 1);
bucket_t *end = cache_t::endMarker(newBuckets, newCapacity);
#if __arm__
// End marker's sel is 1 and imp points BEFORE the first bucket.
// This saves an instruction in objc_msgSend.
end->set<NotAtomic, Raw>((SEL)(uintptr_t)1, (IMP)(newBuckets - 1), nil);
#else
// End marker's sel is 1 and imp points to the first bucket.
end->set<NotAtomic, Raw>((SEL)(uintptr_t)1, (IMP)newBuckets, nil);
#endif
if (PrintCaches) recordNewCache(newCapacity);
return newBuckets;
}
Copy the code
-
AllocateBuckets applies to the system for the storage capacity of newCapacity
-
SetBucketsAndMask stores the requested buckets in the cache, which also performs different operations depending on the platform architecture
- If it’s the real thing
buckets
andmask
The location is stored and willoccupied
Set to zero
#elif CACHE_MASK_STORAGE == CACHE_MASK_STORAGE_HIGH_16 void cache_t::setBucketsAndMask(struct bucket_t *newBuckets, mask_t newMask) { uintptr_t buckets = (uintptr_t)newBuckets; uintptr_t mask = (uintptr_t)newMask; ASSERT(buckets <= bucketsMask); ASSERT(mask <= maxMask); _maskAndBuckets.store(((uintptr_t)newMask << maskShift) | (uintptr_t)newBuckets, std::memory_order_relaxed); _occupied = 0; } Copy the code
- If it’s a simulator or
macOS
, normal storagebucktes
andmask
And willoccupied
Set to zero
void cache_t::setBucketsAndMask(struct bucket_t *newBuckets, mask_t newMask) { // objc_msgSend uses mask and buckets with no locks. // It is safe for objc_msgSend to see new buckets but old mask. // (It will get a cache miss but not overrun the buckets' bounds). // It is unsafe for objc_msgSend to see old buckets and new mask. // Therefore we write new buckets, wait a lot, then write new mask. // objc_msgSend reads mask first, then buckets. #ifdef __arm__ // ensure other threads see buckets contents before buckets pointer mega_barrier(); _buckets.store(newBuckets, memory_order::memory_order_relaxed); // ensure other threads see new buckets before new mask mega_barrier(); _mask.store(newMask, memory_order::memory_order_relaxed); _occupied = 0; #elif __x86_64__ || i386 // ensure other threads see buckets contents before buckets pointer _buckets.store(newBuckets, memory_order::memory_order_release); // ensure other threads see new buckets before new mask _mask.store(newMask, memory_order::memory_order_release); _occupied = 0; // Set it to 0 #else #error Don't know how to do setBucketsAndMask on this architecture. #endif } Copy the code
- If it’s the real thing
if (freeOld) {
cache_collect_free(oldBuckets, oldCapacity);
}
/*********************************************************************** * cache_collect_free. Add the specified malloc'd memory to the list * of them to free at some later point. * size is used for the collection threshold. It does not have to be * precisely the block's size. * Cache locks: cacheUpdateLock must be held by the caller. **********************************************************************/
static void cache_collect_free(bucket_t *data, mask_t capacity)
{
#if CONFIG_USE_CACHE_LOCK
cacheUpdateLock.assertLocked();
#else
runtimeLock.assertLocked();
#endif
if (PrintCaches) recordDeadCache(capacity);
_garbage_make_room (); // Create a garbage collection space
garbage_byte_size += cache_t::bytesForCapacity(capacity);
garbage_refs[garbage_count++] = data; // Store sel-IMP in a later location
cache_collect(false);// Garbage collection, clean up old buckets
}
static void _garbage_make_room(void)
{
static int first = 1;
// Create the collection table the first time it is needed
if (first)
{
first = 0;
garbage_refs = (bucket_t* *)malloc(INIT_GARBAGE_COUNT * sizeof(void *));
garbage_max = INIT_GARBAGE_COUNT;
}
// Double the table if it is full
else if (garbage_count == garbage_max)
{
garbage_refs = (bucket_t* *)realloc(garbage_refs, garbage_max * 2 * sizeof(void *));
garbage_max *= 2; // The system space also needs to increase the memory segment}}Copy the code
-
If you have an old buckets, you call cache_collect_free to empty the oldCapacity memory.
-
For the first time, you need to create a garbage collection space
-
If it is not the first time, the memory will be enlarged and the original base will be 2
-
The cache_collect method collects garbage to clean up old buckets
To storesel
andimp
// Class points to cache. SEL is key. Cache buckets store SEL+IMP.
// Caches are never built in the dyld shared cache.
static inline mask_t cache_hash(SEL sel, mask_t mask)
{
return (mask_t) (uintptr_t)sel & mask;
}
Copy the code
This is based primarily on cache_hash(sel, m); This method generates storage subscripts in three cases
-
Sel () = 0; sel-imp: occupied
if (fastpath(b[i].sel() == 0)) { incrementOccupied(); // b[i].set<Atomic, Encoded>(sel, imp, cls); return; } Copy the code
-
If the current position in the following table is not empty, check whether the current SEL is the same, and return directly
if (b[i].sel() == sel) { // The entry was added to the cache by some other thread // before we grabbed the cacheUpdateLock. return; } Copy the code
-
If neither of the above conditions is met, the cache_next(I, m) hash collision algorithm is used to recalculate the storage index and then compare the storage
#if __arm__ || __x86_64__ || __i386__ // objc_msgSend has few registers available. // Cache scan increments and wraps at special end-marking bucket. #define CACHE_END_MARKER 1 static inline mask_t cache_next(mask_t i, mask_t mask) { return (i+1) & mask; // (the current hash subscript +1) &mask, re-hash, Decrements. No end marker. // Cache scan decrements needed. #define CACHE_END_MARKER 0 static inline mask_t cache_next(mask_t i, mask_t mask) { return i ? i-1 : mask; Sel-imp} #else #error unknown architecture #endifCopy the code
At this point, I think cache_t is coming to an end, and with this analysis, I think I have answers to the questions I raised above,
-
_mask equals capacity-1, and is also mask data used to calculate hash subscripts in hash algorithms and hash collision algorithms
-
_occupied is the amount of sel-IMP occupied by buckets.
-
The init initialization method also causes changes to occupied.
-
Both attribute methods and method calls affect changes to Occupied.
-
Why do occupied and Mask change as method calls increase?
During the call, buckets will expand their capacity as the number of buckets increases to meet the needs of the buffer wall cache. For details, please refer to the above.
-
Why is buckets missing?
During capacity expansion, the previous memory will be released and re-applied for memory, so the previous cache content will not exist.
The maximum storage 1 under ARM moves 15 bits to the left
Forget under the simulator