布景

应用中后台过度资源导致的退出占比在2%左右。

源码查找(根据Android 13)

观察日志大部分是抛出以下格式日志。

“excessive cpu 8020 during 300091 dur=1235468 limit=2”

查找源码在ActivityManagerService的updateAppProcessCpuTimeLPr办法中

private void updateAppProcessCpuTimeLPr(final long uptimeSince, final boolean doCpuKills,
        final long checkDur, final int cpuLimit, final ProcessRecord app) {
    synchronized (mAppProfiler.mProfilerLock) {
        final ProcessProfileRecord profile = app.mProfile;
        final long curCpuTime = profile.mCurCpuTime.get();
        final long lastCpuTime = profile.mLastCpuTime.get();
        if (lastCpuTime > 0) {
            final long cpuTimeUsed = curCpuTime - lastCpuTime;
            if (checkExcessivePowerUsageLPr(uptimeSince, doCpuKills, cpuTimeUsed,
                        app.processName, app.toShortString(), cpuLimit, app)) {
                mHandler.post(() -> {
                    synchronized (ActivityManagerService.this) {
                        app.killLocked("excessive cpu " + cpuTimeUsed + " during "
                                + uptimeSince + " dur=" + checkDur + " limit=" + cpuLimit,
                                ApplicationExitInfo.REASON_EXCESSIVE_RESOURCE_USAGE,
                                ApplicationExitInfo.SUBREASON_EXCESSIVE_CPU,
                                true);
                    }
                });
                profile.reportExcessiveCpu();
            }
        }
        profile.mLastCpuTime.set(curCpuTime);
    }
}

堆栈调试

触发检测堆栈。 默许5分钟检测一次。

    at com.android.server.am.ActivityManagerService.updateAppProcessCpuTimeLPr(ActivityManagerService.java:15240)
        at com.android.server.am.ActivityManagerService.lambda$checkExcessivePowerUsage$20(ActivityManagerService.java:15224)
        at com.android.server.am.ActivityManagerService.$r8$lambda$vSwcjZLInwE40j-EAbD7kDO2Uwo(Unknown Source:0)
        at com.android.server.am.ActivityManagerService$$ExternalSyntheticLambda12.accept(Unknown Source:13)
        at com.android.server.am.ProcessList.forEachLruProcessesLOSP(ProcessList.java:3809)
        at com.android.server.am.ActivityManagerService.checkExcessivePowerUsage(ActivityManagerService.java:15206)
        at com.android.server.am.ActivityManagerService.-$$Nest$mcheckExcessivePowerUsage(Unknown Source:0)
        at com.android.server.am.ActivityManagerService$MainHandler.handleMessage(ActivityManagerService.java:1767)
        at android.os.Handler.dispatchMessage(Handler.java:106)
        at android.os.Looper.loopOnce(Looper.java:201)
        at android.os.Looper.loop(Looper.java:288)
        at android.os.HandlerThread.run(HandlerThread.java:67)
        at com.android.server.ServiceThread.run(ServiceThread.java:44)

关键检测逻辑分析

依照堆栈主要分析以下两个办法。

private void checkExcessivePowerUsage() {
    updateCpuStatsNow();
    final boolean monitorPhantomProcs = mSystemReady && FeatureFlagUtils.isEnabled(mContext,
            SETTINGS_ENABLE_MONITOR_PHANTOM_PROCS);
    synchronized (mProcLock) {
        final boolean doCpuKills = mLastPowerCheckUptime != 0;
        final long curUptime = SystemClock.uptimeMillis();
        final long uptimeSince = curUptime - mLastPowerCheckUptime;
        mLastPowerCheckUptime = curUptime;
        mProcessList.forEachLruProcessesLOSP(false, app -> {
            if (app.getThread() == null) {
                return;
            }
            if (app.mState.getSetProcState() >= ActivityManager.PROCESS_STATE_HOME) {
                int cpuLimit;
                long checkDur = curUptime - app.mState.getWhenUnimportant();
                if (checkDur <= mConstants.POWER_CHECK_INTERVAL) {
                    cpuLimit = mConstants.POWER_CHECK_MAX_CPU_1;
                } else if (checkDur <= (mConstants.POWER_CHECK_INTERVAL * 2)
                        || app.mState.getSetProcState() <= ActivityManager.PROCESS_STATE_HOME) {
                    cpuLimit = mConstants.POWER_CHECK_MAX_CPU_2;
                } else if (checkDur <= (mConstants.POWER_CHECK_INTERVAL * 3)) {
                    cpuLimit = mConstants.POWER_CHECK_MAX_CPU_3;
                } else {
                    cpuLimit = mConstants.POWER_CHECK_MAX_CPU_4;
                }
                updateAppProcessCpuTimeLPr(uptimeSince, doCpuKills, checkDur, cpuLimit, app);
                if (monitorPhantomProcs) {
                    // Also check the phantom processes if there is any
                    updatePhantomProcessCpuTimeLPr(
                            uptimeSince, doCpuKills, checkDur, cpuLimit, app);
                }
            }
        });
    }
}
private boolean checkExcessivePowerUsageLPr(final long uptimeSince, boolean doCpuKills,
        final long cputimeUsed, final String processName, final String description,
        final int cpuLimit, final ProcessRecord app) {
    if (DEBUG_POWER && (uptimeSince > 0)) {
        StringBuilder sb = new StringBuilder(128);
        sb.append("CPU for ");
        sb.append(description);
        sb.append(": over ");
        TimeUtils.formatDuration(uptimeSince, sb);
        sb.append(" used ");
        TimeUtils.formatDuration(cputimeUsed, sb);
        sb.append(" (");
        sb.append((cputimeUsed * 100.0) / uptimeSince);
        sb.append("%)");
        Slog.i(TAG_POWER, sb.toString());
    }
    // If the process has used too much CPU over the last duration, the
    // user probably doesn't want this, so kill!
    if (doCpuKills && uptimeSince > 0) {
        if (((cputimeUsed * 100) / uptimeSince) >= cpuLimit) {
            mBatteryStatsService.reportExcessiveCpu(app.info.uid, app.processName,
                    uptimeSince, cputimeUsed);
            app.getPkgList().forEachPackageProcessStats(holder -> {
                final ProcessState state = holder.state;
                FrameworkStatsLog.write(
                        FrameworkStatsLog.EXCESSIVE_CPU_USAGE_REPORTED,
                        app.info.uid,
                        processName,
                        state != null ? state.getPackage() : app.info.packageName,
                        holder.appVersion);
            });
            return true;
        }
    }
    return false;
}

有几个关键的字段

((cputimeUsed * 100) / uptimeSince) >= cpuLimit 这个条件建立后会触发杀进程。

cputimeUsed: cpu运用时长

uptimeSince:间隔前次的检查时刻。

final long curUptime = SystemClock.uptimeMillis();
final long uptimeSince = curUptime - mLastPowerCheckUptime;
mLastPowerCheckUptime = curUptime;

cpuLimit

  1. 会检测app 被设置mWhenUnimportant的时刻, 这个important就和之前文章的进程优先级相关。 当ProcState<PROCESS_STATE_SERVICE 就会被设置 。
  2. 当前时刻减去mWhenUnimportant 得到checkDur 然后依照checkDur 核算cpuLimit

这个查看代码核算规则是这样的 POWER_CHECK_INTERVAL 默许是5分钟。 小于10分钟cpuLimit为25 小于15分钟cpuLimit为10 不然便是2

总结: cputimeUsed越小越不容易触发。 uptimeSince越大越不容易触发,cpuLimit越大越不容易触发。 也就cpu运用时长要小,间隔前次的检查时刻要长,进程不重要持续时刻要短。

private static final int DEFAULT_POWER_CHECK_MAX_CPU_1 = 25;
private static final int DEFAULT_POWER_CHECK_MAX_CPU_2 = 25;
private static final int DEFAULT_POWER_CHECK_MAX_CPU_3 = 10;
private static final int DEFAULT_POWER_CHECK_MAX_CPU_4 = 2;
int cpuLimit;
long checkDur = curUptime - app.mState.getWhenUnimportant();
if (checkDur <= mConstants.POWER_CHECK_INTERVAL) {
    cpuLimit = mConstants.POWER_CHECK_MAX_CPU_1;
} else if (checkDur <= (mConstants.POWER_CHECK_INTERVAL * 2)
        || app.mState.getSetProcState() <= ActivityManager.PROCESS_STATE_HOME) {
    cpuLimit = mConstants.POWER_CHECK_MAX_CPU_2;
} else if (checkDur <= (mConstants.POWER_CHECK_INTERVAL * 3)) {
    cpuLimit = mConstants.POWER_CHECK_MAX_CPU_3;
} else {
    cpuLimit = mConstants.POWER_CHECK_MAX_CPU_4;
}

cputimeUsed 是怎么核算的

final long curCpuTime = profile.mCurCpuTime.get();
final long lastCpuTime = profile.mLastCpuTime.get();
final long cpuTimeUsed = curCpuTime - lastCpuTime;

在ProcessProfile类中,mCurCpuTime和mLastCpuTime的差值可以用来核算进程在两次记载之间耗费的CPU时刻,这个时刻差值可以用于核算进程的CPU利用率等性能指标。别的,经过比较mCurCpuTime和mLastCpuTime的值,可以检测进程是否正在占用CPU资源

mCurCpuTime 和mLastCpuTime 这两个值是怎么核算的呢。

checkExcessivePowerUsage调用后 首先会调用updateCpuStatsNow 办法其间会用ProcessCpuTracker类去更新cpu运用信息。ProcessCpuTracker在anr分析文章中已经是老常客了

简单来说便是经过读取/proc//stat文件,经过解析该文件中的各个字段,来更新进程的CPU时刻信息,包括mCurCpuTime和mLastCpuTime等成员变量

**再回过头看一下 excessive cpu 8020 during 300091 dur=1235468 limit=2 的意思。

间隔前次cpu检测300091 ms 进程被设置不重要时刻1235468 ms 超过了POWER_CHECK_INTERVAL * 3 默许15分钟时刻。

cpu耗费了8020ms

(8020*100)/300091>=2 建立。触发了杀进程。**

怎么防止

  1. 防止后台过多运用cpu,依照aosp 13逻辑 当进程为非重要进程时 。

    1. 10分钟内要满意 (a*100)/300000<25 5分钟内cpu运用少于75s
    2. 15分钟内要满意 (a*100)/300000<10 5分钟内cpu运用少于30s
    3. 20分钟内要满意 (a*100)/300000<2 也便是5分钟内cpu运用少于6s
  2. 提高进程优先级 至于怎么提升进程优先级又是别的一个话题了。

    1. 理论上 app.mState.getSetProcState() < ActivityManager.PROCESS_STATE_HOME 就不会去进行杀进程。
    2. enum ProcessStateEnum {
          /** @hide Not a real process state. */
          UNKNOWN = -1,
          /** @hide Process is a persistent system process. */
          PERSISTENT = 0,
          /** @hide Process is a persistent system process and is doing UI. */
          PERSISTENT_UI = 1,
          /** @hide Process is hosting the current top activities.  Note that this covers
           * all activities that are visible to the user. */
          TOP = 2,
          /** @hide Process is bound to a TOP app. */
          BOUND_TOP = 3,
          /** @hide Process is hosting a foreground service. */
          FOREGROUND_SERVICE = 4,
          /** @hide Process is hosting a foreground service due to a system binding. */
          BOUND_FOREGROUND_SERVICE = 5,
          /** @hide Process is important to the user, and something they are aware of. */
          IMPORTANT_FOREGROUND = 6,
          /** @hide Process is important to the user, but not something they are aware of. */
          IMPORTANT_BACKGROUND = 7,
          /** @hide Process is in the background transient so we will try to keep running. */
          TRANSIENT_BACKGROUND = 8,
          /** @hide Process is in the background running a backup/restore operation. */
          BACKUP = 9,
          /** @hide Process is in the background running a service.  Unlike oom_adj, this level
           * is used for both the normal running in background state and the executing
           * operations state. */
          SERVICE = 10,
          /** @hide Process is in the background running a receiver.   Note that from the
           * perspective of oom_adj, receivers run at a higher foreground level, but for our
           * prioritization here that is not necessary and putting them below services means
           * many fewer changes in some process states as they receive broadcasts. */
          RECEIVER = 11,
          /** @hide Same as {@link #PROCESS_STATE_TOP} but while device is sleeping. */
          TOP_SLEEPING = 12,
          /** @hide Process is in the background, but it can't restore its state so we want
           * to try to avoid killing it. */
          HEAVY_WEIGHT = 13,
          /** @hide Process is in the background but hosts the home activity. */
          HOME = 14,
          /** @hide Process is in the background but hosts the last shown activity. */
          LAST_ACTIVITY = 15,