在上篇文章中,介绍ANR产生的原因、ANR的分类以及ANR问题的分析。本篇文章接下来将从源码的角度来分析ANR产生的过程,首先介绍InputDispatcher Timeout产生的过程。在ANR产生时,最终都会调用到appNotResponding()方法,该方法在Android 7.0以前定义在ActivityManagerService.java类中,在Android 7.0中定义在AppErrors.java类中,本文将以Android 7.0源码来分析ANR的产生过程。首先来分析appNotResponding()方法,在该方法中将记录一些ANR的信息到event、system、trace日志文件中,并发送应用未响应Dialog给系统显示。ANR日志分析将在下一个小节中描述,appNotResponding()在frame
works/base/services/core/java/com/android/server/am/AppErrors.java文件中实现:
/*
* 该函数的主要作用是记录发生ANR的信息到日志文件中,包括event日志、system日志以及trace日志中,
* 同时在应用显示一个ANR Dialog通知应用发生了ANR
*/
final void appNotResponding(ProcessRecord app, ActivityRecord activity,
ActivityRecord parent, boolean aboveSystem, final String annotation) {
//保存最近执行的进程号
ArrayList firstPids = new ArrayList(5);
SparseArray lastPids = new SparseArray(20);
...........
//记录发生ANR的时间
long anrTime = SystemClock.uptimeMillis();
if (ActivityManagerService.MONITOR_CPU_USAGE) {
//第一次更新CPU的状态
mService.updateCpuStatsNow();
}
// Unless configured otherwise, swallow ANRs in background processes & kill the process.
boolean showBackground = Settings.Secure.getInt(mContext.getContentResolver(),
Settings.Secure.ANR_SHOW_BACKGROUND, 0) != 0;
boolean isSilentANR;
//在system日志中,打印ANR发生的原因
synchronized (mService) {
.........
app.notResponding = true;
//1.记录ANR日志到event日志中
EventLog.writeEvent(EventLogTags.AM_ANR, app.userId, app.pid,
app.processName, app.info.flags, annotation);
// 将应用pid添加到firstPids集合中
firstPids.add(app.pid);
isSilentANR = !showBackground && !app.isInterestingToUserLocked() && app.pid != MY_PID;
if (!isSilentANR) {
int parentPid = app.pid;
if (parent != null && parent.app != null && parent.app.pid > 0) {
parentPid = parent.app.pid;
}
if (parentPid != app.pid) firstPids.add(parentPid);//添加父进程的pid到firstPids集合中
if (MY_PID != app.pid && MY_PID != parentPid) firstPids.add(MY_PID);//再将当前应用的pid添加到firstPids集合中
//将最近使用的进程pid添加到firstPids和lastPids集合中
for (int i = mService.mLruProcesses.size() - 1; i >= 0; i--) {
ProcessRecord r = mService.mLruProcesses.get(i);
if (r != null && r.thread != null) {
int pid = r.pid;
if (pid > 0 && pid != app.pid && pid != parentPid && pid != MY_PID) {
if (r.persistent) {
firstPids.add(pid);
if (DEBUG_ANR) Slog.i(TAG, "Adding persistent proc: " + r);
} else {
lastPids.put(pid, Boolean.TRUE);
if (DEBUG_ANR) Slog.i(TAG, "Adding ANR proc: " + r);
}
}
}
}
}
}
// 2.记录ANR信息到system日志中
StringBuilder info = new StringBuilder();
info.setLength(0);
info.append("ANR in ").append(app.processName);
if (activity != null && activity.shortComponentName != null) {
info.append(" (").append(activity.shortComponentName).append(")");
}
info.append("\n");
info.append("PID: ").append(app.pid).append("\n");
if (annotation != null) {
info.append("Reason: ").append(annotation).append("\n");
}
if (parent != null && parent != activity) {
info.append("Parent: ").append(parent.shortComponentName).append("\n");
}
ProcessCpuTracker processCpuTracker = new ProcessCpuTracker(true);
String[] nativeProcs = NATIVE_STACKS_OF_INTEREST;
// 3.记录ANR信息到trace日志文件中
File tracesFile = null;
if (isSilentANR) {
tracesFile = mService.dumpStackTraces(true, firstPids, null, lastPids,
null);
} else {
//调用AMS的dumpStackTraces记录ANR日志到trace文件中
tracesFile = mService.dumpStackTraces(true, firstPids, processCpuTracker, lastPids,
nativeProcs);
}
String cpuInfo = null;
if (ActivityManagerService.MONITOR_CPU_USAGE) {
//第二次更新CPU的状态
mService.updateCpuStatsNow();
synchronized (mService.mProcessCpuTracker) {
//记录第一次CPU的信息
cpuInfo = mService.mProcessCpuTracker.printCurrentState(anrTime);
}
info.append(processCpuTracker.printCurrentLoad());
info.append(cpuInfo);
}
//记录第二次CPU的信息
info.append(processCpuTracker.printCurrentState(anrTime));
//记录ANR信息到system日志中
Slog.e(TAG, info.toString());
if (tracesFile == null) {
// There is no trace file, so dump (only) the alleged culprit's threads to the log
//如果没有生成trace文件,则发送SIGNAL_QUIT信号
Process.sendSignal(app.pid, Process.SIGNAL_QUIT);
}
//将ANR信息写入到DropBox中
mService.addErrorToDropBox("anr", app, app.processName, activity, parent, annotation,
cpuInfo, tracesFile, null);
........
synchronized (mService) {
........
//设置应用未相应状态,并寻找错误的接收器
makeAppNotRespondingLocked(app,
activity != null ? activity.shortComponentName : null,
annotation != null ? "ANR " + annotation : "ANR",
info.toString());
// 4.通知系统显示应用未响应的Dialog
Message msg = Message.obtain();
HashMap map = new HashMap();
msg.what = ActivityManagerService.SHOW_NOT_RESPONDING_UI_MSG;
msg.obj = map;
msg.arg1 = aboveSystem ? 1 : 0;
map.put("app", app);
if (activity != null) {
map.put("activity", activity);
}
mService.mUiHandler.sendMessage(msg);
}
}
从上面可以看到appNotResponding()方法主要做了4件事情:
1.记录ANR信息到event日志中;
2.记录ANR信息到system日志中,里面包含了ANR发生时的CPU状态信息;
3.记录ANR信息到trace日志中,里面包含了最近运行进程的方法调用堆栈信息;
4.通知系统显示应用未响应的Dialog;
在上面介绍的4种ANR类型,都会最终调用到appNotResponding()方法,接下来分别介绍这4种ANR是如何产生的,以及如何最终调用到AppErrors的appNotResponding()方法。
一、 InputDispatcher Timeout
当输入事件例如触摸、点击事件,在5s内没有响应的话,则会产生一个ANR消息。InputDispatcher Timeout的整体流程如下:
应用接收到输入事件时,会向系统注册该输入事件对应的输入通道、输入窗口等信息,InputDispatcher会监听这些注册的输入事件。当输入事件执行完成了,就会向系统发送处理完成finish消息。如果在5s内,没有收到输入事件结束finish消息,则InputDispatcher发送ANR通知。InputDispatcher注册输入通道信息的函数如下:
frameworks/native/services/inputflinger/InputDispatcher.cpp
/*
* 注册输入通道,并监听该输入通道对应的Socket fd。
*/
status_t InputDispatcher::registerInputChannel(const sp& inputChannel,
const sp& inputWindowHandle, bool monitor) {
{ // acquire lock
AutoMutex _l(mLock);
...
//创建连接
sp connection = new Connection(inputChannel, inputWindowHandle, monitor);
//返回连接的fd
int fd = inputChannel->getFd();
//添加连接到集合中
mConnectionsByFd.add(fd, connection);
if (monitor) {
mMonitoringChannels.push(inputChannel);
}
//将fd添加到mLooper中,监听该fd对应的输入事件
mLooper->addFd(fd, 0, ALOOPER_EVENT_INPUT, handleReceiveCallback, this);
} // release lock
// Wake the looper because some connections have changed.
//唤醒mLooper处理,连接已经改变了
mLooper->wake();
return OK;
}
mLooper是Native Looper,在/system/core/libutils/Looper.cpp文件中实现,主要的作用是将输入请求添加到请求队列中。
int Looper::addFd(int fd, int ident, int events, const sp& callback, void* data) {
.......
{ // acquire lock
AutoMutex _l(mLock);
Request request;
request.fd = fd;
request.ident = ident;
request.events = events;
request.seq = mNextRequestSeq++;
request.callback = callback;
request.data = data;
if (mNextRequestSeq == -1) mNextRequestSeq = 0; // reserve sequence number -1
struct epoll_event eventItem;
request.initEventItem(&eventItem);
ssize_t requestIndex = mRequests.indexOfKey(fd);
//如果是首次添加
if (requestIndex < 0) {
int epollResult = epoll_ctl(mEpollFd, EPOLL_CTL_ADD, fd, & eventItem);
........
//将请求添加到请求队列中
mRequests.add(fd, request);
} else {//非首次添加
int epollResult = epoll_ctl(mEpollFd, EPOLL_CTL_MOD, fd, & eventItem);
.....
//更新请求
mRequests.replaceValueAt(requestIndex, request);
}
} // release lock
return 1;
}
当开始处理请求队列中的输入事件请求时,会回调该输入事件注册的回调函数handleReceiveCallback。
/*
* 输入事件的回调函数
*/
int InputDispatcher::handleReceiveCallback(int fd, int events, void* data) {
InputDispatcher* d = static_cast(data);
{ // acquire lock
AutoMutex _l(d->mLock);
ssize_t connectionIndex = d->mConnectionsByFd.indexOfKey(fd);
....
bool notify;
sp connection = d->mConnectionsByFd.valueAt(connectionIndex);
if (!(events & (ALOOPER_EVENT_ERROR | ALOOPER_EVENT_HANGUP))) {
......
nsecs_t currentTime = now();
bool gotOne = false;
status_t status;
//死循环,等待输入事件finish信号
for (;;) {
uint32_t seq;
bool handled;
//等待输入事件finish信号
status = connection->inputPublisher.receiveFinishedSignal(&seq, &handled);
if (status) {
break;
}
//结束当前输入事件分发过程
d->finishDispatchCycleLocked(currentTime, connection, seq, handled);
gotOne = true;
}
if (gotOne) {
d->runCommandsLockedInterruptible();
if (status == WOULD_BLOCK) {
return 1;
}
}
......
} else {
........
}
// Unregister the channel.
d->unregisterInputChannelLocked(connection->inputChannel, notify);
return 0; // remove the callback
} // release lock
}
当开始处理输入事件后,会循环等待输入事件finish信号。如果接收到输入事件的finish信号后,则结束当前事件的分发过程。如果在5s内,还未收到输入事件的finish信号,则会调用InputDispatcher的handleTargetsNotReadyLocked()方法,发送ANR通知。
1.handleTargetsNotReadyLocked方法(
InputDispatcher.cpp
)
handleTargetsNotReadyLocked方法主要的工作是获取超时时间,然后设置输入事件等待的原因,等待开始的时间、等待的超时时间。如果当前时间大于等待的超时时间,则说明发生了超时,调用onANRLocked发送ANR通知。
int32_t InputDispatcher::handleTargetsNotReadyLocked(nsecs_t currentTime,
const EventEntry* entry,
const sp& applicationHandle,
const sp& windowHandle,
nsecs_t* nextWakeupTime, const char* reason) {
if (applicationHandle == NULL && windowHandle == NULL) {
........
} else {
if (mInputTargetWaitCause != INPUT_TARGET_WAIT_CAUSE_APPLICATION_NOT_READY) {
.......
nsecs_t timeout;
//获取超时时间
if (windowHandle != NULL) {
timeout = windowHandle->getDispatchingTimeout(DEFAULT_INPUT_DISPATCHING_TIMEOUT);
} else if (applicationHandle != NULL) {
timeout = applicationHandle->getDispatchingTimeout(
DEFAULT_INPUT_DISPATCHING_TIMEOUT);
} else {
timeout = DEFAULT_INPUT_DISPATCHING_TIMEOUT;
}
//设置等待的原因,等待开始的时间,等待超时的时间
mInputTargetWaitCause = INPUT_TARGET_WAIT_CAUSE_APPLICATION_NOT_READY;
mInputTargetWaitStartTime = currentTime;
mInputTargetWaitTimeoutTime = currentTime + timeout;
mInputTargetWaitTimeoutExpired = false;
mInputTargetWaitApplicationHandle.clear();
if (windowHandle != NULL) {
mInputTargetWaitApplicationHandle = windowHandle->inputApplicationHandle;
}
if (mInputTargetWaitApplicationHandle == NULL && applicationHandle != NULL) {
mInputTargetWaitApplicationHandle = applicationHandle;
}
}
}
if (mInputTargetWaitTimeoutExpired) {
return INPUT_EVENT_INJECTION_TIMED_OUT;
}
//当前时间超过设定的超时时间
if (currentTime >= mInputTargetWaitTimeoutTime) {
onANRLocked(currentTime, applicationHandle, windowHandle,
entry->eventTime, mInputTargetWaitStartTime, reason);
*nextWakeupTime = LONG_LONG_MIN;
return INPUT_EVENT_INJECTION_PENDING;
} else {
........
}
}
2.onANRLocked方法
(InputDispatcher.cpp)
在onANRLocked()方法中,主要是记录ANR发生时的状态,并将ANR日志输出到main日志中。
void InputDispatcher::onANRLocked(
nsecs_t currentTime, const sp& applicationHandle,
const sp& windowHandle,
nsecs_t eventTime, nsecs_t waitStartTime, const char* reason) {
float dispatchLatency = (currentTime - eventTime) * 0.000001f;
float waitDuration = (currentTime - waitStartTime) * 0.000001f;
//在main日志中打印ANR信息
ALOGI("Application is not responding: %s. "
"It has been %0.1fms since event, %0.1fms since wait started. Reason: %s",
getApplicationWindowLabelLocked(applicationHandle, windowHandle).string(),
dispatchLatency, waitDuration, reason);
//记录ANR发生时的状态
time_t t = time(NULL);
struct tm tm;
localtime_r(&t, &tm);
char timestr[64];
strftime(timestr, sizeof(timestr), "%F %T", &tm);
mLastANRState.clear();
mLastANRState.append(INDENT "ANR:\n");
mLastANRState.appendFormat(INDENT2 "Time: %s\n", timestr);
mLastANRState.appendFormat(INDENT2 "Window: %s\n",
getApplicationWindowLabelLocked(applicationHandle, windowHandle).string());
mLastANRState.appendFormat(INDENT2 "DispatchLatency: %0.1fms\n", dispatchLatency);
mLastANRState.appendFormat(INDENT2 "WaitDuration: %0.1fms\n", waitDuration);
mLastANRState.appendFormat(INDENT2 "Reason: %s\n", reason);
dumpDispatchStateLocked(mLastANRState);
CommandEntry* commandEntry = postCommandLocked(
& InputDispatcher::doNotifyANRLockedInterruptible);
commandEntry->inputApplicationHandle = applicationHandle;
commandEntry->inputWindowHandle = windowHandle;
commandEntry->reason = reason;
}
3.doNotifyANRLockedInterruptible方法
(InputDispatcher.cpp)
doNotifyANRLockedInterruptible方法主要工作是从系统框架层获取新的timeout,并发送ANR消息到框架层,最后恢复输入事件状态,并用新的timeout开始下一个输入事件的超时处理。
void InputDispatcher::doNotifyANRLockedInterruptible(
CommandEntry* commandEntry) {
mLock.unlock();
//获取新的timeout,并通知应用ANR消息
nsecs_t newTimeout = mPolicy->notifyANR(
commandEntry->inputApplicationHandle, commandEntry->inputWindowHandle,
commandEntry->reason);
mLock.lock();
//恢复状态
resumeAfterTargetsNotReadyTimeoutLocked(newTimeout,
commandEntry->inputWindowHandle != NULL
? commandEntry->inputWindowHandle->getInputChannel() : NULL);
}
4.notifyANR方法
(InputManagerService.java)
在InputDispatcher.cpp中调用doNotifyANRLockedInterruptible()方法后,最终会调用到framework中的InputManagerService.java类中的notifyANR方法。notifyANR方法调用InputMonitor类中的notifyANR方法。
frameworks/services/core/java/com/android/server/input/InputManagerService.java
/*
* 本地方法回调,通知应用层输入事件分发发生了ANR
*/
private long notifyANR(InputApplicationHandle inputApplicationHandle,
InputWindowHandle inputWindowHandle, String reason) {
return mWindowManagerCallbacks.notifyANR(
inputApplicationHandle, inputWindowHandle, reason);
}
5.notifyANR方法(
InputMonitor.java
)
notifyANR方法返回一个超时时间,并通知窗口管理器,发生了ANR。
frameworks/services/core/java/com/android/server/wm/InputMonitor.java
/*
* 通知窗口管理器,应用没有响应。
* 返回一个新的超时时间,或者返回0来终止事件分发
*/
public long notifyANR(InputApplicationHandle inputApplicationHandle,
InputWindowHandle inputWindowHandle, String reason) {
AppWindowToken appWindowToken = null;
WindowState windowState = null;
boolean aboveSystem = false;
synchronized (mService.mWindowMap) {
if (inputWindowHandle != null) {
//获取Windowstate状态
windowState = (WindowState) inputWindowHandle.windowState;
if (windowState != null) {
appWindowToken = windowState.mAppToken;
}
}
if (appWindowToken == null && inputApplicationHandle != null) {
appWindowToken = (AppWindowToken)inputApplicationHandle.appWindowToken;
}
if (windowState != null) {
Slog.i(TAG_WM, "Input event dispatching timed out "
+ "sending to " + windowState.mAttrs.getTitle()
+ ". Reason: " + reason);
int systemAlertLayer = mService.mPolicy.windowTypeToLayerLw(
WindowManager.LayoutParams.TYPE_SYSTEM_ALERT);
//是否在系统窗口上显示
aboveSystem = windowState.mBaseLayer > systemAlertLayer;
} else if (appWindowToken != null) {
Slog.i(TAG_WM, "Input event dispatching timed out "
+ "sending to application " + appWindowToken.stringName
+ ". Reason: " + reason);
} else {
Slog.i(TAG_WM, "Input event dispatching timed out "
+ ". Reason: " + reason);
}
//保存ANR状态
mService.saveANRStateLocked(appWindowToken, windowState, reason);
}
if (appWindowToken != null && appWindowToken.appToken != null) {
try {
// Notify the activity manager about the timeout and let it decide whether
// to abort dispatching or keep waiting.
boolean abort = appWindowToken.appToken.keyDispatchingTimedOut(reason);
if (! abort) {
// The activity manager declined to abort dispatching.
// Wait a bit longer and timeout again later.
return appWindowToken.inputDispatchingTimeoutNanos;
}
} catch (RemoteException ex) {
}
} else if (windowState != null) {
try {
//通知Activity管理器超时时间,并让它决定是否终止事件处理还是继续等待
long timeout = ActivityManagerNative.getDefault().inputDispatchingTimedOut(
windowState.mSession.mPid, aboveSystem, reason);
if (timeout >= 0) {
//Activity管理器拒绝终止事件分发,再等待一个更长的超时时间
return timeout * 1000000L; // nanoseconds
}
} catch (RemoteException ex) {
}
}
//返回0表示终止事件的分发处理
return 0; // abort dispatching
}
6.
inputDispatchingTimedOut方法(ActivityManagerService.java)
inputDispatchingTimedOut主要是通过getInputDispatchingTimeoutLocked方法获取超时时间,然后通过inputDispatchingTimedOut方法通知AMS,输入事件分发超时了,发送ANR通知。
public long inputDispatchingTimedOut(int pid, final boolean aboveSystem, String reason) {
if (checkCallingPermission(android.Manifest.permission.FILTER_EVENTS)
!= PackageManager.PERMISSION_GRANTED) {
throw new SecurityException("Requires permission "
+ android.Manifest.permission.FILTER_EVENTS);
}
ProcessRecord proc;
long timeout;
synchronized (this) {
synchronized (mPidsSelfLocked) {
proc = mPidsSelfLocked.get(pid);
}
timeout = getInputDispatchingTimeoutLocked(proc);
}
if (!inputDispatchingTimedOut(proc, null, null, aboveSystem, reason)) {
return -1;
}
return timeout;
}
7.
getInputDispatchingTimeoutLocked方法(ActivityManagerService.java)
获取输入事件超时时间,这个时间在KEY_DISPATCHING_TIMEOUT中定义,为5s中。
public static long getInputDispatchingTimeoutLocked(ProcessRecord r) {
if (r != null && (r.instrumentationClass != null || r.usingWrapper)) {
return INSTRUMENTATION_KEY_DISPATCHING_TIMEOUT;
}
return KEY_DISPATCHING_TIMEOUT;
}
//key dispatching超时时间,为5s
static final int KEY_DISPATCHING_TIMEOUT = 5*1000;
8.
inputDispatchingTimedOut(
ActivityManagerService.java
)
发送ANR通知给AppErros类统一处理。AppErros在appNotResponding方法中统一处理ANR消息,在前面的小节中已经介绍了。
/*
* 处理输入事件分发超时
* 返回事件分发是否应该终止还是继续
*/
public boolean inputDispatchingTimedOut(final ProcessRecord proc,
final ActivityRecord activity, final ActivityRecord parent,
final boolean aboveSystem, String reason) {
.....
final String annotation;
if (reason == null) {
annotation = "Input dispatching timed out";
} else {
annotation = "Input dispatching timed out (" + reason + ")";
}
if (proc != null) {
......
mHandler.post(new Runnable() {
@Override
public void run() {
//通知AppErrors发生了ANR
mAppErrors.appNotResponding(proc, activity, parent, aboveSystem, annotation);
}
});
}
return true;
}
至此,已经介绍完了InputDispatcher Timeout产生的整个流程,可以看到InputDispatcher的超时时间为5s钟。接下来分析Broadcast Timeout产生的流程。