布景

crash 监控发现有大量的新增溃散,仓库如下

0	libsystem_platform.dylib	__os_unfair_lock_corruption_abort()
1	libsystem_platform.dylib	__os_unfair_lock_lock_slow()
2	Foundation	__NSSetBoolValueAndNotify()

剖析仓库

__os_unfair_lock_corruption_abort

log 翻译:lock 已损坏

_os_unfair_lock_corruption_abort(os_ulock_value_t current)
{
	__LIBPLATFORM_CLIENT_CRASH__(current, "os_unfair_lock is corrupt");
}

__os_unfair_lock_lock_slow

在这个办法里边 __ulock_wait 回来 EOWNERDEAD 调用 corruption abort 办法。

int ret = __ulock_wait(UL_UNFAIR_LOCK | ULF_NO_ERRNO | options,
				l, current, 0);
if (unlikely(ret < 0)) {
  switch (-ret) {
    case EINTR:
    case EFAULT:
      continue;
    case EOWNERDEAD:
      _os_unfair_lock_corruption_abort(current);
      break;
    default:
      __LIBPLATFORM_INTERNAL_CRASH__(-ret, "ulock_wait failure");
  }
}

EOWNERDEAD 的界说

#define EOWNERDEAD 105 /* Previous owner died */

到这儿猜测是 lock 的 owner 已经野指针了,持续向下看。

__NSSetBoolValueAndNotify

google 下这个办法是在 KVO 里边修正特点的时分调用,伪代码:

int __NSSetBoolValueAndNotify(int arg0, int arg1, int arg2) {
    r31 = r31 - 0x90;
    var_30 = r24;
    stack[-56] = r23;
    var_20 = r22;
    stack[-40] = r21;
    var_10 = r20;
    stack[-24] = r19;
    saved_fp = r29;
    stack[-8] = r30;
    r20 = arg2;
    r21 = arg1;
    r19 = arg0;
    r0 = object_getClass(arg0);
    r0 = object_getIndexedIvars(r0); // 理清这个溃散的要害办法,这儿和汇编代码不一致,汇编代码的入参是 r0 + 0x20
    r23 = r0;
    os_unfair_recursive_lock_lock_with_options(); 
    CFDictionaryGetValue(*(r23 + 0x18), r21);
    r22 = _objc_msgSend$copyWithZone:();
    os_unfair_recursive_lock_unlock();
    if (*(int8_t *)(r23 + 0x28) != 0x0) {
            _objc_msgSend$willChangeValueForKey:();
            (class_getMethodImplementation(*r23, r21))(r19, r21, r20);
            _objc_msgSend$didChangeValueForKey:();
    }
    else {
            _objc_msgSend$_changeValueForKey:key:key:usingBlock:();
    }
    var_38 = **qword_9590e8;
    r0 = objc_release_x22();
    if (**qword_9590e8 != var_38) {
            r0 = __stack_chk_fail();
    }
    return r0;
}

os_unfair_recursive_lock_lock_with_options

溃散调用栈中间还有这一层的内联调用 os_unfair_recursive_lock_lock_with_options。这儿的 lock owner 有个比较赋值的操作,假如 oul_value 等于 OS_LOCK_NO_OWNER 则赋值 self 然后 return。溃散时这儿持续向下执行了,那这儿的 oul_value 的取值只能是 lock->oul_value。到这儿猜测溃散的原因是 lock->oul_value 野指针了。

void
os_unfair_recursive_lock_lock_with_options(os_unfair_recursive_lock_t lock,
		os_unfair_lock_options_t options)
{
	os_lock_owner_t cur, self = _os_lock_owner_get_self();
	_os_unfair_lock_t l = (_os_unfair_lock_t)&lock->ourl_lock;
	if (likely(os_atomic_cmpxchgv2o(l, oul_value,
			OS_LOCK_NO_OWNER, self, &cur, acquire))) {
		return;
	}
	if (OS_ULOCK_OWNER(cur) == self) {
		lock->ourl_count++;
		return;
	}
	return _os_unfair_lock_lock_slow(l, self, options);
}
OS_ALWAYS_INLINE OS_CONST
static inline os_lock_owner_t
_os_lock_owner_get_self(void)
{
	os_lock_owner_t self;
	self = (os_lock_owner_t)_os_tsd_get_direct(__TSD_MACH_THREAD_SELF);
	return self;
}

object_getIndexedIvars

__NSSetBoolValueAndNotify 里边的获取 lock 的办法,这个函数非常要害。

/**
 * Returns a pointer to any extra bytes allocated with an instance given object.
 * 
 * @param obj An Objective-C object.
 * 
 * @return A pointer to any extra bytes allocated with \e obj. If \e obj was
 *   not allocated with any extra bytes, then dereferencing the returned pointer is undefined.
 * 
 * @note This function returns a pointer to any extra bytes allocated with the instance
 *  (as specified by \c class_createInstance with extraBytes>0). This memory follows the
 *  object's ordinary ivars, but may not be adjacent to the last ivar.
 * @note The returned pointer is guaranteed to be pointer-size aligned, even if the area following
 *  the object's last ivar is less aligned than that. Alignment greater than pointer-size is never
 *  guaranteed, even if the area following the object's last ivar is more aligned than that.
 * @note In a garbage-collected environment, the memory is scanned conservatively.
/**
 * Returns a pointer immediately after the instance variables declared in an
 * object.  This is a pointer to the storage specified with the extraBytes
 * parameter given when allocating an object.
 */
void *object_getIndexedIvars(id obj)
{
    uint8_t *base = (uint8_t *)obj;
    if (_objc_isTaggedPointerOrNil(obj)) return nil;
    if (!obj->isClass()) return base + obj->ISA()->alignedInstanceSize();
    Class cls = (Class)obj;
    if (!cls->isAnySwift()) return base + sizeof(objc_class);
    swift_class_t *swcls = (swift_class_t *)cls;
    return base - swcls->classAddressOffset + word_align(swcls->classSize);
}

上层调用 __NSSetBoolValueAndNotify 里边:

r0 = object_getClass(arg0),arg0 是实例目标,r0 是类目标,由于这儿是个 KVO 的调用,那正常情况下r0 是 NSKVONotifying_xxx。

关于 KVO 类,object_getIndexedIvars 回来的地址是 (uint8_t *)obj + sizeof(objc_class)。依据函数的注释,这个地址指向创立类时附在类空间后 extraBytes 巨细的一块内存。

debug 调试

object_getIndexedIvars

__NSSetBoolValueAndNotify 下的调用

99% 的 iOS 开发都不知道的 KVO 崩溃

object_getIndexedIvars 入参是 NSKVONotifying_KVObject,object_getClass 获取的是 KVO Class。

objc_allocateClassPair

动态创立 KVO 类的办法。

 thread #8, queue = 'com.apple.root.default-qos', stop reason = breakpoint 1.1
  * frame #0: 0x000000018143a088 libobjc.A.dylib`objc_allocateClassPair
    frame #1: 0x000000018259cd94 Foundation`_NSKVONotifyingCreateInfoWithOriginalClass + 152
    frame #2: 0x00000001825b8fd0 Foundation`_NSKeyValueContainerClassGetNotifyingInfo + 56
    frame #3: 0x000000018254b7dc Foundation`-[NSKeyValueUnnestedProperty _isaForAutonotifying] + 44
    frame #4: 0x000000018254b504 Foundation`-[NSKeyValueUnnestedProperty isaForAutonotifying] + 88
    frame #5: 0x000000018254b32c Foundation`-[NSObject(NSKeyValueObserverRegistration) _addObserver:forProperty:options:context:] + 404
    frame #6: 0x000000018254b054 Foundation`-[NSObject(NSKeyValueObserverRegistration) addObserver:forKeyPath:options:context:] + 136
    frame #7: 0x00000001040d1860 Test`__29-[ViewController viewDidLoad]_block_invoke(.block_descriptor=0x0000000282a55170) at ViewController.m:28:13
    frame #8: 0x00000001043d05a8 libdispatch.dylib`_dispatch_call_block_and_release + 32
    frame #9: 0x00000001043d205c libdispatch.dylib`_dispatch_client_callout + 20
    frame #10: 0x00000001043d4b94 libdispatch.dylib`_dispatch_queue_override_invoke + 1052
    frame #11: 0x00000001043e6478 libdispatch.dylib`_dispatch_root_queue_drain + 408
    frame #12: 0x00000001043e6e74 libdispatch.dylib`_dispatch_worker_thread2 + 196
    frame #13: 0x00000001d515fdbc libsystem_pthread.dylib`_pthread_wqthread + 228

_NSKVONotifyingCreateInfoWithOriginalClass

objc_allocateClassPair 的上层调用。 allocate 之前的 context w2 是个固定值 0x30,即创立 KVO Class 入参 extraBytes 的巨细是 0x30

    0x18259cd78 <+124>: mov    x1, x21
    0x18259cd7c <+128>: mov    x2, x22
    0x18259cd80 <+132>: bl     0x188097080
    0x18259cd84 <+136>: mov    x0, x20
    0x18259cd88 <+140>: mov    x1, x19
    0x18259cd8c <+144>: mov    w2, #0x30
    0x18259cd90 <+148>: bl     0x1880961f0 // objc_allocateClassPair 
    0x18259cd94 <+152>: cbz    x0, 0x18259ce24           ; <+296>
    0x18259cd98 <+156>: mov    x21, x0
    0x18259cd9c <+160>: bl     0x188096410 // objc_registerClassPair
    0x18259cda0 <+164>: mov    x0, x19
    0x18259cda4 <+168>: bl     0x182b45f44               ; symbol stub for: free
    0x18259cda8 <+172>: mov    x0, x21
    0x18259cdac <+176>: bl     0x1880967e0 // object_getIndexedIvars
    0x18259cdb0 <+180>: mov    x19, x0
    0x18259cdb4 <+184>: stp    x20, x21, [x0]

_NSKVONotifyingCreateInfoWithOriginalClass+184 处将 x20 和 x21 写入 [x0],此刻 x0 指向的是巨细为 extraBytes 的内存,打印 x20 和 x21 的值

x20 = 0x00000001117caa10 (void *)0x00000001117caa38: KVObject(向上回溯这个值取自 _NSKVONotifyingCreateInfoWithOriginalClass 的入参 x0)

x21 NSKVONotifying_KVObject

依据这儿能够看出 object_getIndexedIvars 回来的地址,依次存储了 KVObject(origin Class) 和 NSKVONotifying_KVObject(KVO Class)。

查看 _NSKVONotifyingCreateInfoWithOriginalClass 的伪代码,对 [x0] 有 5 次写入的操作,而且终究这个办法回来的是 x0 的地址。

function __NSKVONotifyingCreateInfoWithOriginalClass {
    r31 = r31 - 0x50;
    stack[32] = r22;
    stack[40] = r21;
    stack[48] = r20;
    stack[56] = r19;
    stack[64] = r29;
    stack[72] = r30;
    r20 = r0;
    if (*(int8_t *)0x993e78 != 0x0) {
            os_unfair_lock_assert_owner(0x993e7c);
    }
    r0 = class_getName(r20);
    r22 = strlen(r0) + 0x10;
    r0 = malloc(r22);
    r19 = r0;
    strlcpy(r0, "NSKVONotifying_", r22);
    strlcat(r19, r21, r22);
    r0 = objc_allocateClassPair(r20, r19, 0x30);
    if (r0 != 0x0) {
            objc_registerClassPair(r0);
            free(r19);
            r0 = object_getIndexedIvars(r21);
            r19 = r0;
            *(int128_t *)r0 = r20; // 第一次写入 Class
            *(int128_t *)(r0 + 0x8) = r21; // 第二次写入 Class
            *(r19 + 0x10) = CFSetCreateMutable(0x0, 0x0, *qword_9592d8); // 第三次写入 CFSet
            *(int128_t *)(r19 + 0x18) = CFDictionaryCreateMutable(0x0, 0x0, 0x0, *qword_959598); // 第四次写入 CFDictionary
            *(int128_t *)(r19 + 0x20) = 0x0; // 第五次写入空值
            if (*qword_9fc560 != -0x1) {
                    dispatch_once(0x9fc560, 0x8eaf98);
            }
            if (class_getMethodImplementation(*r19, @selector(willChangeValueForKey:)) != *qword_9fc568) {
                    r8 = 0x1;
            }
            else {
                    r0 = *r19;
                    r0 = class_getMethodImplementation(r0, @selector(didChangeValueForKey:));
                    r8 = *qword_9fc570;
                    if (r0 != r8) {
                            r8 = *qword_9fc570;
                            if (CPU_FLAGS & NE) {
                                    r8 = 0x1;
                            }
                    }
            }
            *(int8_t *)(r19 + 0x28) = r8;
            _NSKVONotifyingSetMethodImplementation(r19, @selector(_isKVOA), 0x44fab4, 0x0);
            _NSKVONotifyingSetMethodImplementation(r19, @selector(dealloc), 0x44fabc, 0x0);
            _NSKVONotifyingSetMethodImplementation(r19, @selector(class), 0x44fd2c, 0x0);
    }
    else {
            if (*qword_9fc558 != -0x1) {
                    dispatch_once(0x9fc558, 0x8eaf78);
            }
            if (os_log_type_enabled(*0x9fc550, 0x10) != 0x0) {
                    _os_log_error_impl(0x0, *0x9fc550, 0x10, "KVO failed to allocate class pair for name %s, automatic key-value observing will not work for this class", &stack[0], 0xc);
            }
            free(r19);
            r19 = 0x0;
    }
    if (**qword_9590e8 == **qword_9590e8) {
            r0 = r19;
    }
    else {
            r0 = __stack_chk_fail();
    }
    return r0;
}

_NSKVONotifyingCreateInfoWithOriginalClass 的上层调用,入参是 [x19, #0x8],回来的参数写入 [x19, #0x28]

    0x1825b8fc0 <+40>: ldr    x0, [x19, #0x28]
    0x1825b8fc4 <+44>: b      0x1825b8fd4               ; <+60>
    0x1825b8fc8 <+48>: ldr    x0, [x19, #0x8]
->  0x1825b8fcc <+52>: bl     0x18259ccfc               ; _NSKVONotifyingCreateInfoWithOriginalClass
    0x1825b8fd0 <+56>: str    x0, [x19, #0x28]
    0x1825b8fd4 <+60>: ldp    x29, x30, [sp, #0x10]
    0x1825b8fd8 <+64>: ldp    x20, x19, [sp], #0x20

打印 x19 是一个 NSKeyValueContainerClass 类型的实例目标,这个目标类的 ivars layout

ivars 0x99f3c0 __OBJC_$_INSTANCE_VARIABLES_NSKeyValueContainerClass
            entsize   32
            count     5
            offset    0x9e6048 _OBJC_IVAR_$_NSKeyValueContainerClass._originalClass 8
            name      0x90bd27 _originalClass
            type      0x929ae6 #
            alignment 3
            size      8
            offset    0x9e6050 _OBJC_IVAR_$_NSKeyValueContainerClass._cachedObservationInfoImplementation 16
            name      0x90bd36 _cachedObservationInfoImplementation
            type      0x92bb88 ^?
            alignment 3
            size      8
            offset    0x9e6058 _OBJC_IVAR_$_NSKeyValueContainerClass._cachedSetObservationInfoImplementation 24
            name      0x90bd5b _cachedSetObservationInfoImplementation
            type      0x92bb88 ^?
            alignment 3
            size      8
            offset    0x9e6060 _OBJC_IVAR_$_NSKeyValueContainerClass._cachedSetObservationInfoTakesAnObject 32
            name      0x90bd83 _cachedSetObservationInfoTakesAnObject
            type      0x92a01a B
            alignment 0
            size      1
            offset    0x9e6068 _OBJC_IVAR_$_NSKeyValueContainerClass._notifyingInfo 40
            name      0x90bdaa _notifyingInfo
            type      0x92bdd7 ^{?=##^{__CFSet}^{__CFDictionary}{os_unfair_recursive_lock_s={os_unfair_lock_s=I}I}B}
            alignment 3
            size      8

offset 0x8 name:_originalClass type:Class

offset 0x28 name:_notifyingInfo type:struct

_notifyingInfo 结构体

{
  Class,
  Class,
  __CFSet,
  __CFDictionary,
  os_unfair_recursive_lock_s
}

type encoding:

developer.apple.com/library/arc…

从 context 能够看出_NSKVONotifyingCreateInfoWithOriginalClass 这个办法入参是 _OBJC_IVAR__NSKeyValueContainerClass._originalClass。回来值 x0 是 _OBJC_IVAR__NSKeyValueContainerClass._notifyingInfo。5 次对 [x0] 的写入是在初始化 _notifyingInfo。

溃散时的 context:

    0x1825231f0 <+56>:  bl     0x1880967c0 // object_getClass
    0x1825231f4 <+60>:  bl     0x1880967e0 // object_getIndexedIvars
    0x1825231f8 <+64>:  mov    x23, x0 // x0 == _notifyingInfo
    0x1825231fc <+68>:  add    x24, x0, #0x20 // x24 == os_unfair_recursive_lock_s
    0x182523200 <+72>:  mov    x0, x24
    0x182523204 <+76>:  mov    w1, #0x0
    0x182523208 <+80>:  bl     0x188096910 // os_unfair_recursive_lock_lock_with_options crash 调用栈

调用 object_getClass 获取 Class,调用 object_getIndexedIvars 获取到 _notifyingInfo,_notifyingInfo + 偏移量 0x20 获取 os_unfair_recursive_lock_s,溃散的原因是这把锁的 owner 损坏了,lock 也是一个结构体,ower 也是依据 offset 获取的。

结论

从溃散的上下文来看,最可能出问题的是获取 _notifyingInfo,由于只有 KVO Class 才干获取到 _notifyingInfo 这个结构体,假如在调用 __NSSetBoolValueAndNotify 的过程中,在其它线程监听被移除,此刻 object_getClass 取到的不是 KVO Class 那后续再依据 offset 去取 lock,这个时分就有可能发生上述溃散。

线下暴力复现验证了上述猜测。

- (void)start {
    __block KVObject *obj = [KVObject new];
    dispatch_async(dispatch_get_global_queue(0, 0x0), ^{
        for (int i = 0; i < 100000; i++) {
            [obj addObserver:self forKeyPath:@"value" options:0x7 context:nil];
            [obj removeObserver:self forKeyPath:@"value"];
        }
    });
    dispatch_async(dispatch_get_global_queue(0, 0x0), ^{
        for (int i = 0; i < 100000; i++) {
            obj.value = YES;
            obj.value = NO;
        }
    });
}
- (void)observeValueForKeyPath:(NSString *)keyPath ofObject:(id)object change:(NSDictionary<NSKeyValueChangeKey,id> *)change context:(void *)context {}

处理这个问题的思路便是保证线程安全,咱们在线上断点找到了 removeObserver 的代码,将 removeObserver 和触发监听的代码放在了同一个串行行列。当然假如 removeObserver 在 dealloc 里边,理论上也不会出现这类问题。

__NSSetxxxValueAndNotify 系列办法都有可能会触发这个溃散,相似的问题能够按照相同的思路处理。

00000000004e05cd t __NSSetBoolValueAndNotify
00000000004e0707 t __NSSetCharValueAndNotify
00000000004e097b t __NSSetDoubleValueAndNotify
00000000004e0abc t __NSSetFloatValueAndNotify
00000000004e0bfd t __NSSetIntValueAndNotify
00000000004e10e7 t __NSSetLongLongValueAndNotify
00000000004e0e6f t __NSSetLongValueAndNotify
00000000004e0491 t __NSSetObjectValueAndNotify
00000000004e15d5 t __NSSetPointValueAndNotify
00000000004e1734 t __NSSetRangeValueAndNotify
00000000004e188a t __NSSetRectValueAndNotify
00000000004e135f t __NSSetShortValueAndNotify
00000000004e19e8 t __NSSetSizeValueAndNotify
00000000004e0841 t __NSSetUnsignedCharValueAndNotify
00000000004e0d36 t __NSSetUnsignedIntValueAndNotify
00000000004e1223 t __NSSetUnsignedLongLongValueAndNotify
00000000004e0fab t __NSSetUnsignedLongValueAndNotify
00000000004e149a t __NSSetUnsignedShortValueAndNotify
00000000004de834 t __NSSetValueAndNotifyForKeyInIvar