We all know that a Category determines its structure at compile time, appends its content to classes and MetaClass at run time, and cannot add member variables to a Category. So the question is, 1. What are the steps involved in this process? 2. How do you attach content to aClass or MetaClass? 3. Why not add a member variable to something so useful?


Realize the principle of

So first of all, let’s look at what we do at Category compile time and we have a test class and its categories

// TYObject.h
@interface TYObject : NSObject
@end

// TYObject+Test.h
@interface TYObject (Test)
@property (strong.nonatomic) NSString *testStr;
@property (assign.nonatomic) NSInteger testInt;
- (void)test;
+ (void)test;
@end
Copy the code

Use the clang compiler directive to convert it to C++ code xcrun-sdk iphoneos clang-arch arm64-rewrite-objc TYObject+ test.m

TYObject+Test.cpp

struct _category_t {
	const char *name; / / the name of the class
	struct _class_t *cls;
	const struct _method_list_t *instance_methods; // List of instance methods
	const struct _method_list_t *class_methods; // List of class methods
	const struct _protocol_list_t *protocols; // Protocol list
	const struct _prop_list_t *properties; // Attribute list
};

static struct _category_t _OBJC_The $_CATEGORY_TYObject_The $_Test __attribute__ ((used.section(" __DATA, __objc_const"))) = 
{
	"TYObject".0.// &OBJC_CLASS_$_TYObject,
	(const struct _method_list_t *)&_OBJC_$_CATEGORY_INSTANCE_METHODS_TYObject_$_Test,
	(const struct _method_list_t *)&_OBJC_$_CATEGORY_CLASS_METHODS_TYObject_$_Test,
	0,
	(const struct _prop_list_t *)&_OBJC_$_PROP_LIST_TYObject_$_Test,
};

// List of instance methods
static struct/ * _method_list_t* / {
	unsigned int entsize;  // sizeof(struct _objc_method)
	unsigned int method_count;
	struct _objc_method method_list[1].
} _OBJC_$_CATEGORY_INSTANCE_METHODS_TYObject_$_Test __attribute__ ((used, section ("__DATA,__objc_const"))) = {
	sizeof(_objc_method),
	1,
	{{(struct objc_selector *)"test"."v16@0:8", (void *)_I_TYObject_Test_test}}
};

// List of class methods
static struct/ * _method_list_t* / {
	unsigned int entsize;  // sizeof(struct _objc_method)
	unsigned int method_count;
	struct _objc_method method_list[1].
} _OBJC_$_CATEGORY_CLASS_METHODS_TYObject_$_Test __attribute__ ((used, section ("__DATA,__objc_const"))) = {
	sizeof(_objc_method),
	1,
	{{(struct objc_selector *)"test"."v16@0:8", (void *)_C_TYObject_Test_test}}
};

// Attribute list
static struct/ * _prop_list_t* / {
	unsigned int entsize;  // sizeof(struct _prop_t)
	unsigned int count_of_properties;
	struct _prop_t prop_list[2].
} _OBJC_$_PROP_LIST_TYObject_$_Test __attribute__ ((used, section ("__DATA,__objc_const"))) = {
	sizeof(_prop_t),
	2,
	{{"testStr"."T@\"NSString\",&,N"},
	{"testInt"."Tq,N"}}};Copy the code

As you can see from the c++ code, compile-time categories are only placed in the _category_t structure, not incorporated into the class.

The process of attaching the _category_t content to aClass or MetaClass can be seen in Apple’s open source code for ObjC.

Start by looking at the objc-os.mm file, which is the runtime entry class. (See the section where I added the Chinese notes.)

// Initialization at runtime
void _objc_init(void)
{
    static bool initialized = false;
    if (initialized) return;
    initialized = true;
    
    // fixme defer initialization until an objc-using image is found?
    environ_init();
    tls_init();
    static_init();
    lock_init();
    exception_init();
    
    // in the map_images method, image is not an image, but a module
    _dyld_objc_notify_register(&map_images, load_images, unmap_image);
}

void map_images(unsigned count, const char * const paths[],
           const struct mach_header * const mhdrs[])
{
    mutex_locker_t lock(runtimeLock);
    
    // Go ahead and look inside this
    return map_images_nolock(count, paths, mhdrs);
}

void 
map_images_nolock(unsigned mhCount, const char * const mhPaths[],
                  const struct mach_header * const mhdrs[])
{
    // ...
    
    if (hCount > 0) {
    // The relevant logic is here, load the module
        _read_images(hList, hCount, totalClasses, unoptimizedTotalClasses);
    }
    
    // ...
}

void _read_images(header_info **hList, uint32_t hCount, int totalClasses, int unoptimizedTotalClasses)
{
    // ... 
    
    // Discover categories. (Gategory)
    
    // ... 
    
    for (EACH_HEADER) {
        // Get an array of categories
        category_t **catlist = 
            _getObjc2CategoryList(hi, &count);
        bool hasClassProperties = hi->info()->hasCategoryClassProperties();

        for (i = 0; i < count; i++) {
            category_t *cat = catlist[i];
            Class cls = remapClass(cat->cls);

            if(! cls) {// Category's target class is missing (probably weak-linked).
                // Disavow any knowledge of this category.
                catlist[i] = nil;
                if (PrintConnecting) {
                    _objc_inform("CLASS: IGNORING category \? \? \? (%s) %p with "
                                 "missing weak-linked target class", 
                                 cat->name, cat);
                }
                continue;
            }

            // Process this category. 
            // First, register the category with its target class. 
            // Then, rebuild the class's method lists (etc) if 
            // the class is realized. 
            bool classExists = NO;
            if (cat->instanceMethods ||  cat->protocols  
                ||  cat->instanceProperties) 
            {
                addUnattachedCategoryForClass(cat, cls, hi);
                if (cls->isRealized()) {
                    // reorganize the class object method structure
                    remethodizeClass(cls);
                    classExists = YES;
                }
                if (PrintConnecting) {
                    _objc_inform("CLASS: found category -%s(%s) %s", 
                                 cls->nameForLogging(), cat->name, 
                                 classExists ? "on existing class" : ""); }}if (cat->classMethods  ||  cat->protocols  
                ||  (hasClassProperties && cat->_classProperties)) 
            {
                addUnattachedCategoryForClass(cat, cls->ISA(), hi);
                if (cls->ISA()->isRealized()) {
                    // Reorganize the metaclass object method structure
                    remethodizeClass(cls->ISA());
                }
                if (PrintConnecting) {
                    _objc_inform("CLASS: found category +%s(%s)", 
                                 cls->nameForLogging(), cat->name);
                }
            }
        }
    }

    ts.log("IMAGE TIMES: discover categories");
    
    // ... 
}

RemethodizeClass () ¶
static void remethodizeClass(Class cls)
{
    category_list *cats;
    bool isMeta;

    runtimeLock.assertLocked();

    isMeta = cls->isMetaClass();

    // Re-methodizing: check for more categories
    if ((cats = unattachedCategoriesForClass(cls, false/*not realizing*/))) {
        if (PrintConnecting) {
            _objc_inform("CLASS: attaching categories to class '%s' %s", 
                         cls->nameForLogging(), isMeta ? "(meta)" : "");
        }
        
        // The main logic is here, append Categories
        // CLS: class object
        // cats: class object array
        attachCategories(cls, cats, true /*flush caches*/);        
        free(cats); }}// The real add step is here
// cls = [TYOject class]
// cats = [category_t(TYOject+Test), category_t(TYOject+ other)]
static void 
attachCategories(Class cls, category_list *cats, bool flush_caches)
{
    if(! cats)return;
    if (PrintReplacedMethods) printReplacements(cls, cats);

    bool isMeta = cls->isMetaClass();

    // fixme rearrange to remove these intermediate allocations
    
    / / array method (two dimensional array [[method_t method_t], [method_t, method_t]])
    method_list_t **mlists = (method_list_t* *)malloc(cats->count * sizeof(*mlists));
    // Attribute array (2d array)
    property_list_t **proplists = (property_list_t* *)malloc(cats->count * sizeof(*proplists));
    // Protocol array (2d array)
    protocol_list_t **protolists = (protocol_list_t* *)malloc(cats->count * sizeof(*protolists));

    // Count backwards through cats to get newest categories first
    int mcount = 0;
    int propcount = 0;
    int protocount = 0;
    int i = cats->count;
    bool fromBundle = NO;
    
    // It is important here that this traversal determines the order in which methods are added to the classification
    while (i--) {
        // Get the last category
        auto& entry = cats->list[i];
        // Select an instance method or a class method based on the isMeta parameter.
        method_list_t *mlist = entry.cat->methodsForMeta(isMeta);
        if (mlist) {
            // Put the object method array in the classification into mlists(the method array created above)
            // Mlists list of methods: [list of methods, list of methods]
            mlists[mcount++] = mlist;
            fromBundle |= entry.hi->isBundle();
        }

        // Same logic as the method array
        property_list_t *proplist = 
            entry.cat->propertiesForMeta(isMeta, entry.hi);
        if (proplist) {
            proplists[propcount++] = proplist;
        }
        // Same logic as the method array
        protocol_list_t *protolist = entry.cat->protocols;
        if(protolist) { protolists[protocount++] = protolist; }}// Get class_rw_t of the class
    // Where to store the class's own methods, attributes, etc
    auto rw = cls->data();

    prepareMethodLists(cls, mlists, mcount, NO, fromBundle);
    
    // Append mlists(object methods of all classes) to the method list of the class object
    rw->methods.attachLists(mlists, mcount);
    free(mlists);
    if (flush_caches  &&  mcount > 0) flushCaches(cls);
    
    // Append proplists(properties of all classes) to the property list of the class object
    rw->properties.attachLists(proplists, propcount);
    free(proplists);

    // Protolists (all classified protocols) appended to the list of protocols of the class object
    rw->protocols.attachLists(protolists, protocount);
    free(protolists);
}

// Here is an example of using methods.attachLists
// Add the internal implementation of the method function to the class object
void attachLists(List* const * addedLists, uint32_t addedCount) {
        if (addedCount == 0) return;

        if (hasArray()) {
            // many lists -> many lists
            uint32_t oldCount = array()->count;
            uint32_t newCount = oldCount + addedCount;
            
            // From here is the logic to merge the list of methods
            // Reallocate array memory
            setArray((array_t *)realloc(array(), array_t::byteSize(newCount)));
            array()->count = newCount;
            
            // array()-> Lists is the original method list
            // Move array()->lists back addedCount
            memmove(array()->lists + addedCount,
                    array()->lists,
                    oldCount * sizeof(array()->lists[0]));
                    
            // addedLists are a list of methods for all categories
            // Copy the addedCount element inside addedLists to array()->lists
            memcpy(array()->lists,
                   addedLists,
                   addedCount * sizeof(array()->lists[0]));
                   
            // If the method in the class has the same name as the method in the class, the method in the class will be called first.
        }
        else if (!list  &&  addedCount == 1) {
            // 0 lists -> 1 list
            list = addedLists[0];
        } 
        else {
            // 1 list -> many lists
            List* oldList = list;
            uint32_t oldCount = oldList ? 1 : 0;
            uint32_t newCount = oldCount + addedCount;
            setArray((array_t *)malloc(array_t::byteSize(newCount)));
            array()->count = newCount;
            if (oldList) array()->lists[addedCount] = oldList;
            memcpy(array()->lists, addedLists, 
                   addedCount * sizeof(array()->lists[0])); }}Copy the code

At this point, the logic of additional information ends.

Conclusion:

When the class array of methods is appended to the class object, So the list of methods in the class object is structured like this: list of methods compiled = [method_t, method_t] list of methods compiled before = [method_t, method_t] List of methods that were in the class original = [method_t, TARGETS -> Build Phases -> Compile Sources (Compiled first above, compiled after below)Copy the code

Member variables

About not being able to add member variables:

Category_t = category_t = category_t = category_t = category_t = category_t = category_t = category_t = category_t = category_t = category_t = category_t = category_t = category_t Instance variables may not be placed in categories

So why did Apple do this? I think there are several ways to understand the memory layout of the OC object and the loading time

The underlying implementation of an OC instance object is a structure, and its internal member addresses are continuous, so its structure is determined at compile time and cannot be modified. We can prove this by adding a member variable of int data type to our test custom class for debugging purposes

@interface TYObject : NSObject
{
    @public int _age;
}
@end
Copy the code

Used in the main function

int main(int argc, char * argv[]) {
    @autoreleasepool {
        
        TYObject *obj = [[TYObject alloc] init];
        obj->_age = 10;
        
    }
    return 0;
}
Copy the code

Convert to c++ code as

extern "C" unsigned long OBJC_IVAR_$_TYObject$_age;
struct TYObject_IMPL {
	struct NSObject_IMPL NSObject_IVARS;
	int _age;
};

int main(int argc, char * argv[]) {
    /* @autoreleasepool */ { __AtAutoreleasePool __autoreleasepool; 

        TYObject *obj = ((TYObject *(*)(id, SEL))(void *)objc_msgSend)((id)((TYObject *(*)(id, SEL))(void *)objc_msgSend)((id)objc_getClass("TYObject"), sel_registerName("alloc")), sel_registerName("init")); (* (int((*)char *)obj + OBJC_IVAR_$_TYObject$_age)) = 10;

    }
    return 0;
}
Copy the code

Note (* (int *) (obj (char *) + OBJC_IVAR_ $$_age _TYObject)) = 10; This line of assignment, OBJC_IVAR_$_TYObject$_age, is an unsigned long, which is 8 bytes in our 64-bit system. Get the address of obj, add 8 bytes exactly to the address of the age member variable, and assign.

This proves that the OC instance object’s member variable content exists in the structure, and that compile time has already determined what the content of the structure is. The category_t content is appended to the class at run time, and how scary it would be to change the memory structure at run time!

We can also see from objC’s source code that its structure ivars is inside the class_ro_t structure and is decorated with const

struct class_ro_t {
    uint32_t flags;
    uint32_t instanceStart;
    uint32_t instanceSize;
#ifdef __LP64__
    uint32_t reserved;
#endif

    const uint8_t * ivarLayout;
    
    const char * name;
    method_list_t * baseMethodList;
    protocol_list_t * baseProtocols;
    const ivar_list_t * ivars; // List of member variables

    const uint8_t * weakIvarLayout;
    property_list_t *baseProperties;
}
Copy the code

So for these reasons, you can’t add member variables to a category