这是一个介绍 Android 渠道日志体系的系列文章:

本文依据 AOSP android-10.0.0_r41 版本解说

1. SocketListener 读取 App 发送的日志信息

上一篇,咱们讲到 SocketListener 收到 App 发送过出来的日志信息,接着会调用 onDataAvailable() 来处理收到的数据:

// system/core/logd/LogListener.cpp
bool LogListener::onDataAvailable(SocketClient* cli) {
    //prctl 体系调用用于设置进程相关的一些东西。这儿运用 PR_SET_NAME 设置了线程的姓名为 logd.writer
    static bool name_set;
    if (!name_set) {
        prctl(PR_SET_NAME, "logd.writer");
        name_set = true;
    }
    // + 1 to ensure null terminator if MAX_PAYLOAD buffer is received
    char buffer[sizeof_log_id_t + sizeof(uint16_t) + sizeof(log_time) +
                LOGGER_ENTRY_MAX_PAYLOAD + 1];
    struct iovec iov = { buffer, sizeof(buffer) - 1 };
    alignas(4) char control[CMSG_SPACE(sizeof(struct ucred))];
    struct msghdr hdr = {
        nullptr, 0, &iov, 1, control, sizeof(control), 0,
    };
    int socket = cli->getSocket();
    // 接下来运用 recvmsg 从 socket 里读取数据
    // To clear the entire buffer is secure/safe, but this contributes to 1.68%
    // overhead under logging load. We are safe because we check counts, but
    // still need to clear null terminator
    // memset(buffer, 0, sizeof(buffer));
    ssize_t n = recvmsg(socket, &hdr, 0);
    if (n <= (ssize_t)(sizeof(android_log_header_t))) {
        return false;
    }
    buffer[n]  = 0;
    // 因为咱们创立 socket 时打开了 SO_PASSCRED 选项,这儿咱们能够读取一个用于表示客户端身份的 struct ucred
    struct ucred* cred = nullptr;
    struct cmsghdr* cmsg = CMSG_FIRSTHDR(&hdr);
    while (cmsg != nullptr) {
        if (cmsg->cmsg_level == SOL_SOCKET &&
            cmsg->cmsg_type == SCM_CREDENTIALS) {
            cred = (struct ucred*)CMSG_DATA(cmsg);
            break;
        }
        cmsg = CMSG_NXTHDR(&hdr, cmsg);
    }
    struct ucred fake_cred;
    if (cred == nullptr) {
        cred = &fake_cred;
        cred->pid = 0;
        cred->uid = DEFAULT_OVERFLOWUID;
    }
    if (cred->uid == AID_LOGD) {
        // ignore log messages we send to ourself.
        // Such log messages are often generated by libraries we depend on
        // which use standard Android logging.
        return false;
    }
    // 假如 struct ucred 内部有一些特殊值,做独自处理
    android_log_header_t* header =
        reinterpret_cast<android_log_header_t*>(buffer);
    log_id_t logId = static_cast<log_id_t>(header->id);
    if (/* logId < LOG_ID_MIN || */ logId >= LOG_ID_MAX ||
        logId == LOG_ID_KERNEL) {
        return false;
    }
    if ((logId == LOG_ID_SECURITY) &&
        (!__android_log_security() ||
         !clientHasLogCredentials(cred->uid, cred->gid, cred->pid))) {
        return false;
    }
    // Check credential validity, acquire corrected details if not supplied.
    if (cred->pid == 0) {
        cred->pid = logbuf ? logbuf->tidToPid(header->tid)
                           : android::tidToPid(header->tid);
        if (cred->pid == getpid()) {
            // We expect that /proc/<tid>/ is accessible to self even without
            // readproc group, so that we will always drop messages that come
            // from any of our logd threads and their library calls.
            return false;  // ignore self
        }
    }
    if (cred->uid == DEFAULT_OVERFLOWUID) {
        uid_t uid =
            logbuf ? logbuf->pidToUid(cred->pid) : android::pidToUid(cred->pid);
        if (uid == AID_LOGD) {
            uid = logbuf ? logbuf->pidToUid(header->tid)
                         : android::pidToUid(cred->pid);
        }
        if (uid != AID_LOGD) cred->uid = uid;
    }
    char* msg = ((char*)buffer) + sizeof(android_log_header_t);
    n -= sizeof(android_log_header_t);
    // NB: hdr.msg_flags & MSG_TRUNC is not tested, silently passing a
    // truncated message to the logs.
    // 调用 logbuf->log 将数据写入 LogBuffer
    if (logbuf != nullptr) {
        int res = logbuf->log(
            logId, header->realtime, cred->uid, cred->pid, header->tid, msg,
            ((size_t)n <= UINT16_MAX) ? (uint16_t)n : UINT16_MAX);
        // 此刻可能有客户端在等待读取数据,于是也调用 reader->notifyNewLog()
        if (res > 0 && reader != nullptr) {
            reader->notifyNewLog(static_cast<log_mask_t>(1 << logId));
        }
    }
    return true;
}

函数主要流程如下:

  • prctl 体系调用用于设置进程相关的一些东西。这儿运用 PR_SET_NAME 设置了线程的姓名为 logd.writer
  • 因为咱们创立 socket 时打开了 SO_PASSCRED 选项,这儿咱们能够读取一个用于表示客户端身份的 struct ucred
  • 接下来,假如 struct ucred 内部有一些特殊值,做独自处理
  • 调用 logbuf->log 将数据写入 LogBuffer
  • 此刻可能有客户端在等待读取数据,于是也调用 reader->notifyNewLog()

2. 写入 LogBuffer

log 的写入分为 4 个进程:

  • 依据日志的内容对数据做调整,数据不对,就直接 return
  • 运用一个状态机去除重复的 log
  • 写入 log
  • 假如需求,删去一些 log 以避免 log 数据过多

接下来分步检查源码:

依据 tag 和优先级判别该 log 是否能够写入

// system/core/logd/LogBuffer.cpp
int LogBuffer::log(log_id_t log_id, log_time realtime, uid_t uid, pid_t pid,
                   pid_t tid, const char* msg, uint16_t len) {
    if (log_id >= LOG_ID_MAX) {
        return -EINVAL;
    }
    // Slip the time by 1 nsec if the incoming lands on xxxxxx000 ns.
    // This prevents any chance that an outside source can request an
    // exact entry with time specified in ms or us precision.
    if ((realtime.tv_nsec % 1000) == 0) ++realtime.tv_nsec;
    // LogBufferElement 用于保存一条日志信息
    LogBufferElement* elem =
        new LogBufferElement(log_id, realtime, uid, pid, tid, msg, len);
    // 依据数据内容,调整数据
    if (log_id != LOG_ID_SECURITY) {
        int prio = ANDROID_LOG_INFO;
        const char* tag = nullptr;
        size_t tag_len = 0;
        if (log_id == LOG_ID_EVENTS || log_id == LOG_ID_STATS) {
            tag = tagToName(elem->getTag());
            if (tag) {
                tag_len = strlen(tag);
            }
        } else {
            prio = *msg;
            tag = msg + 1;
            tag_len = strnlen(tag, len - 1);
        }
        if (!__android_log_is_loggable_len(prio, tag, tag_len,
                                           ANDROID_LOG_VERBOSE)) {
            // Log traffic received to total
            wrlock();
            stats.addTotal(elem);
            unlock();
            delete elem;
            return -EACCES;
        }
    }
    // ......
}

运用一个状态机去除重复的 log

int LogBuffer::log(log_id_t log_id, log_time realtime, uid_t uid, pid_t pid,
                   pid_t tid, const char* msg, uint16_t len) {
    //......
    wrlock();
    LogBufferElement* currentLast = lastLoggedElements[log_id];
    if (currentLast) {
        LogBufferElement* dropped = droppedElements[log_id];
        uint16_t count = dropped ? dropped->getDropped() : 0;
        //
        // State Init
        //     incoming:
        //         dropped = nullptr
        //         currentLast = nullptr;
        //         elem = incoming message
        //     outgoing:
        //         dropped = nullptr -> State 0
        //         currentLast = copy of elem
        //         log elem
        // State 0
        //     incoming:
        //         count = 0
        //         dropped = nullptr
        //         currentLast = copy of last message
        //         elem = incoming message
        //     outgoing: if match != DIFFERENT
        //         dropped = copy of first identical message -> State 1
        //         currentLast = reference to elem
        //     break: if match == DIFFERENT
        //         dropped = nullptr -> State 0
        //         delete copy of last message (incoming currentLast)
        //         currentLast = copy of elem
        //         log elem
        // State 1
        //     incoming:
        //         count = 0
        //         dropped = copy of first identical message
        //         currentLast = reference to last held-back incoming
        //                       message
        //         elem = incoming message
        //     outgoing: if match == SAME
        //         delete copy of first identical message (dropped)
        //         dropped = reference to last held-back incoming
        //                   message set to chatty count of 1 -> State 2
        //         currentLast = reference to elem
        //     outgoing: if match == SAME_LIBLOG
        //         dropped = copy of first identical message -> State 1
        //         take sum of currentLast and elem
        //         if sum overflows:
        //             log currentLast
        //             currentLast = reference to elem
        //         else
        //             delete currentLast
        //             currentLast = reference to elem, sum liblog.
        //     break: if match == DIFFERENT
        //         delete dropped
        //         dropped = nullptr -> State 0
        //         log reference to last held-back (currentLast)
        //         currentLast = copy of elem
        //         log elem
        // State 2
        //     incoming:
        //         count = chatty count
        //         dropped = chatty message holding count
        //         currentLast = reference to last held-back incoming
        //                       message.
        //         dropped = chatty message holding count
        //         elem = incoming message
        //     outgoing: if match != DIFFERENT
        //         delete chatty message holding count
        //         dropped = reference to last held-back incoming
        //                   message, set to chatty count + 1
        //         currentLast = reference to elem
        //     break: if match == DIFFERENT
        //         log dropped (chatty message)
        //         dropped = nullptr -> State 0
        //         log reference to last held-back (currentLast)
        //         currentLast = copy of elem
        //         log elem
        //
        enum match_type match = identical(elem, currentLast);
        if (match != DIFFERENT) {
            if (dropped) {
                // Sum up liblog tag messages?
                if ((count == 0) /* at Pass 1 */ && (match == SAME_LIBLOG)) {
                    android_log_event_int_t* event =
                        reinterpret_cast<android_log_event_int_t*>(
                            const_cast<char*>(currentLast->getMsg()));
                    //
                    // To unit test, differentiate with something like:
                    //    event->header.tag = htole32(CHATTY_LOG_TAG);
                    // here, then instead of delete currentLast below,
                    // log(currentLast) to see the incremental sums form.
                    //
                    uint32_t swab = event->payload.data;
                    unsigned long long total = htole32(swab);
                    event = reinterpret_cast<android_log_event_int_t*>(
                        const_cast<char*>(elem->getMsg()));
                    swab = event->payload.data;
                    lastLoggedElements[LOG_ID_EVENTS] = elem;
                    total += htole32(swab);
                    // check for overflow
                    if (total >= UINT32_MAX) {
                        log(currentLast);
                        unlock();
                        return len;
                    }
                    stats.addTotal(currentLast);
                    delete currentLast;
                    swab = total;
                    event->payload.data = htole32(swab);
                    unlock();
                    return len;
                }
                if (count == USHRT_MAX) {
                    log(dropped);
                    count = 1;
                } else {
                    delete dropped;
                    ++count;
                }
            }
            if (count) {
                stats.addTotal(currentLast);
                currentLast->setDropped(count);
            }
            droppedElements[log_id] = currentLast;
            lastLoggedElements[log_id] = elem;
            unlock();
            return len;
        }
        if (dropped) {         // State 1 or 2
            if (count) {       // State 2
                log(dropped);  // report chatty
            } else {           // State 1
                delete dropped;
            }
            droppedElements[log_id] = nullptr;
            log(currentLast);  // report last message in the series
        } else {               // State 0
            delete currentLast;
        }
    }
    lastLoggedElements[log_id] = new LogBufferElement(*elem);
    // .....

这儿运用一个状态机来去除重复的 log,知道流程即可,有兴趣的同学能够参考源码中的注释自行剖析。

写入 log

int LogBuffer::log(log_id_t log_id, log_time realtime, uid_t uid, pid_t pid,
                   pid_t tid, const char* msg, uint16_t len) {
    //......
    log(elem);
    unlock();
    return len;
}

这儿调用另一个重载 log:

// assumes LogBuffer::wrlock() held, owns elem, look after garbage collection
void LogBuffer::log(LogBufferElement* elem) {
    // cap on how far back we will sort in-place, otherwise append
    static uint32_t too_far_back = 5;  // five seconds
    // Insert elements in time sorted order if possible
    //  NB: if end is region locked, place element at end of list
    LogBufferElementCollection::iterator it = mLogElements.end();
    LogBufferElementCollection::iterator last = it;
    if (__predict_true(it != mLogElements.begin())) --it;
    if (__predict_false(it == mLogElements.begin()) ||
        __predict_true((*it)->getRealTime() <= elem->getRealTime()) ||
        __predict_false((((*it)->getRealTime().tv_sec - too_far_back) >
                         elem->getRealTime().tv_sec) &&
                        (elem->getLogId() != LOG_ID_KERNEL) &&
                        ((*it)->getLogId() != LOG_ID_KERNEL))) {
        mLogElements.push_back(elem);
    } else {
        log_time end(log_time::EPOCH);
        bool end_set = false;
        bool end_always = false;
        LogTimeEntry::rdlock();
        LastLogTimes::iterator times = mTimes.begin();
        while (times != mTimes.end()) {
            LogTimeEntry* entry = times->get();
            if (!entry->mNonBlock) {
                end_always = true;
                break;
            }
            // it passing mEnd is blocked by the following checks.
            if (!end_set || (end <= entry->mEnd)) {
                end = entry->mEnd;
                end_set = true;
            }
            times++;
        }
        if (end_always || (end_set && (end > (*it)->getRealTime()))) {
            mLogElements.push_back(elem);
        } else {
            // should be short as timestamps are localized near end()
            do {
                last = it;
                if (__predict_false(it == mLogElements.begin())) {
                    break;
                }
                --it;
            } while (((*it)->getRealTime() > elem->getRealTime()) &&
                     (!end_set || (end <= (*it)->getRealTime())));
            mLogElements.insert(last, elem);
        }
        LogTimeEntry::unlock();
    }
    stats.add(elem);
    maybePrune(elem->getLogId());
}

__predict_true 和 __predict_false 用来提示编译器对应的判别很可能是 true/false,类似于 Linux 内核的 likely/unlikely。假如判别正确,能够得到很大的功能提升。

客户端运用 Unix-domain socket 来写入 log,读出来的时分,很可能就现已按时间顺序排好,这个时分,只需求把 LogBufferElement 刺进列表结尾就能够了。

假如很不幸的,上面的判别失利了,就只能从后往前遍历列表,找到一个适宜的方位来刺进 LogBufferElement。这个进程类似于刺进排序。

假如需求,删去一些 log 以避免 log 数据过多

void LogBuffer::log(LogBufferElement* elem) 在最终会调用 maybePrune,依据 log 总量判别是否需求删去一些 log。

// system/core/logd/LogBuffer.cpp
// Prune at most 10% of the log entries or maxPrune, whichever is less.
//
// LogBuffer::wrlock() must be held when this function is called.
void LogBuffer::maybePrune(log_id_t id) {
    size_t sizes = stats.sizes(id);
    unsigned long maxSize = log_buffer_size(id);
    if (sizes > maxSize) {
        size_t sizeOver = sizes - ((maxSize * 9) / 10);
        size_t elements = stats.realElements(id);
        size_t minElements = elements / 100;
        if (minElements < minPrune) {
            minElements = minPrune;
        }
        unsigned long pruneRows = elements * sizeOver / sizes;
        if (pruneRows < minElements) {
            pruneRows = minElements;
        }
        if (pruneRows > maxPrune) {
            pruneRows = maxPrune;
        }
        prune(id, pruneRows);
    }
}

假如 id 类型的 log 超过了总量约束,就删去 10% 的 log。在这个前提下,所删去的 log 调试大于 minElements 和 minPrune,小于 maxPrune。其中,minElements 是一切 id 类型的 log 的总条数的百分之一。

当每条 log 都很大、log 总量又很小,约束最小值能够避免总是需求除掉旧 log。假如 log 每条很小,约束最大数目能够避免删去过多的 log。

实践的删去作业由 prune 方法完结,这个咱们会在冗余日志处理进程剖析日志部分来解说。

参考资料