[Android6.0] 数据业务重试机制

Android 6.0 Framework telephony中数据业务链接错误处理一般分3种情况:

1. SETUP_DATA_CALL 时返回错误

2. Modem上报DATA_CALL_LIST包含错误码或者链接中断

3. 一段时间内没有上下行数据(TX/RX)


下面具体来看每种情况的处理。

1. SETUP_DATA_CALL失败

DataConnection在收到SETUP_DATA_CALL结果后,用Message通知DcTracker处理:
  1. protected void onDataSetupComplete(AsyncResult ar) {
  2. if (ar.exception == null) {
  3. //链接成功
  4. } else{
  5. ...
  6. //标记permanent fail的次数,会影响后面onDataSetupCompleteError的判断
  7. if (isPermanentFail(cause)) apnContext.decWaitingApnsPermFailCount();
  8. apnContext.removeWaitingApn(apnContext.getApnSetting()); //从waiting列表中移除已经失败的APN
  9. onDataSetupCompleteError(ar);//继续处理错误
  10. ...
  11. }
  12. }

处理Error的逻辑:
1. 如果apnContext中的所有waiting APN都失败了,且不是每个都发生permanent fail(永久性错误),则设置delay并重新发起这次连接
2. 如果apnContext中仍有没有尝试的waiting APN,则设置delay并尝试用下一个APN去连接
  1. /**
  2. * Error has occurred during the SETUP {aka bringUP} request and the DCT
  3. * should either try the next waiting APN or start over from the
  4. * beginning if the list is empty. Between each SETUP request there will
  5. * be a delay defined by {@link #getApnDelay()}.
  6. */
  7. @Override
  8. protected void onDataSetupCompleteError(AsyncResult ar) {
  9. String reason = "";
  10. ApnContext apnContext = getValidApnContext(ar, "onDataSetupCompleteError");
  11. if (apnContext == null) return;
  12. //已经尝试过所有APN
  13. if (apnContext.getWaitingApns().isEmpty()) {
  14. apnContext.setState(DctConstants.State.FAILED);//apnContext state设置成FAILED
  15. mPhone.notifyDataConnection(Phone.REASON_APN_FAILED, apnContext.getApnType());
  16. //清除DataConnection
  17. apnContext.setDataConnectionAc( null);
  18. //如果所有APN都发生Permanent fail,则不做重试
  19. if (apnContext.getWaitingApnsPermFailCount() == 0) {
  20. if (DBG) {
  21. log( "onDataSetupCompleteError: All APN's had permanent failures, stop retrying");
  22. }
  23. } else { //执行重试
  24. int delay = getApnDelay(Phone.REASON_APN_FAILED);
  25. if (DBG) {
  26. log( "onDataSetupCompleteError: Not all APN's had permanent failures delay="
  27. + delay);
  28. }
  29. startAlarmForRestartTrySetup(delay, apnContext);
  30. }
  31. } else { //waitingAPN中还有没有尝试的APN,继续尝试下一个
  32. if (DBG) log( "onDataSetupCompleteError: Try next APN");
  33. apnContext.setState(DctConstants.State.SCANNING);
  34. // Wait a bit before trying the next APN, so that
  35. // we're not tying up the RIL command channel
  36. startAlarmForReconnect(getApnDelay(Phone.REASON_APN_FAILED), apnContext); //试下一个APN
  37. }
  38. }

附:ApnContext的所有状态

  1. /**
  2. * IDLE: ready to start data connection setup, default state
  3. * CONNECTING: state of issued startPppd() but not finish yet
  4. * SCANNING: data connection fails with one apn but other apns are available
  5. * ready to start data connection on other apns (before INITING)
  6. * CONNECTED: IP connection is setup
  7. * DISCONNECTING: Connection.disconnect() has been called, but PDP
  8. * context is not yet deactivated
  9. * FAILED: data connection fail for all apns settings
  10. * RETRYING: data connection failed but we're going to retry.
  11. *
  12. * getDataConnectionState() maps State to DataState
  13. * FAILED or IDLE : DISCONNECTED
  14. * RETRYING or CONNECTING or SCANNING: CONNECTING
  15. * CONNECTED : CONNECTED or DISCONNECTING
  16. */
  17. public enum State {
  18. IDLE,
  19. CONNECTING,
  20. SCANNING,
  21. CONNECTED,
  22. DISCONNECTING,
  23. FAILED,
  24. RETRYING
  25. }


2. 链接中断

DcController监听RIL_UNSOL_DATA_CALL_LIST_CHANGED消息,获得每一个数据连接的更新:

  1. mPhone.mCi.registerForDataNetworkStateChanged(getHandler(),
  2. DataConnection.EVENT_DATA_STATE_CHANGED, null);

RIL上报DATA_CALL_LIST_CHANGED时会带上当前的Modem中的DataCall list,DcController将此dataCall list和上层的active list做对比:

1. 已经丢失 及 断开 的连接将会重试

2. 发生变化 和 发生永久错误的链接则需要清除

  1. private void onDataStateChanged(ArrayList<DataCallResponse> dcsList) {
  2. // Create hashmap of cid to DataCallResponse
  3. HashMap<Integer, DataCallResponse> dataCallResponseListByCid =
  4. new HashMap<Integer, DataCallResponse>();
  5. for (DataCallResponse dcs : dcsList) {
  6. dataCallResponseListByCid.put(dcs.cid, dcs);
  7. }
  8. //如果上报的dcsList中并没有找到对应的active的链接,则默认连接丢失并加入重试List
  9. ArrayList<DataConnection> dcsToRetry = new ArrayList<DataConnection>();
  10. for (DataConnection dc : mDcListActiveByCid.values()) {
  11. if (dataCallResponseListByCid.get(dc.mCid) == null) {
  12. if (DBG) log( "onDataStateChanged: add to retry dc=" + dc);
  13. dcsToRetry.add(dc);
  14. }
  15. }
  16. // Find which connections have changed state and send a notification or cleanup
  17. // and any that are in active need to be retried.
  18. ArrayList<ApnContext> apnsToCleanup = new ArrayList<ApnContext>();
  19. boolean isAnyDataCallDormant = false;
  20. boolean isAnyDataCallActive = false;
  21. for (DataCallResponse newState : dcsList) {
  22. DataConnection dc = mDcListActiveByCid.get(newState.cid);
  23. //不在Active MAP中的连接,表明这个连接还没同步到上层,会有其他地方处理。
  24. if (dc == null) {
  25. // UNSOL_DATA_CALL_LIST_CHANGED arrived before SETUP_DATA_CALL completed.
  26. loge( "onDataStateChanged: no associated DC yet, ignore");
  27. continue;
  28. }
  29. if (dc.mApnContexts.size() == 0) {
  30. if (DBG) loge( "onDataStateChanged: no connected apns, ignore");
  31. } else {
  32. // Determine if the connection/apnContext should be cleaned up
  33. // or just a notification should be sent out.
  34. if (newState.active == DATA_CONNECTION_ACTIVE_PH_LINK_INACTIVE) {
  35. //连接INACTIVE,按照错误类型区分处理
  36. DcFailCause failCause = DcFailCause.fromInt(newState.status);
  37. if (failCause.isRestartRadioFail()) {
  38. //恢复需要重启radio
  39. mDct.sendRestartRadio();
  40. } else if (mDct.isPermanentFail(failCause)) {
  41. //链接发生不可恢复的错误,需要Cleanup
  42. apnsToCleanup.addAll(dc.mApnContexts.keySet());
  43. } else {
  44. for (ApnContext apnContext : dc.mApnContexts.keySet()) {
  45. if (apnContext.isEnabled()) {
  46. //apn是enabled状态,重试
  47. dcsToRetry.add(dc);
  48. break;
  49. } else {
  50. //apn已经disabled,需要cleanup
  51. apnsToCleanup.add(apnContext);
  52. }
  53. }
  54. }
  55. } else {
  56. //LinkProperty发生变化
  57. UpdateLinkPropertyResult result = dc.updateLinkProperty(newState);
  58. if (result.oldLp.equals(result.newLp)) {
  59. if (DBG) log( "onDataStateChanged: no change");
  60. } else {
  61. //判断interface是否一致
  62. if (result.oldLp.isIdenticalInterfaceName(result.newLp)) {
  63. if (! result.oldLp.isIdenticalDnses(result.newLp) ||
  64. ! result.oldLp.isIdenticalRoutes(result.newLp) ||
  65. ! result.oldLp.isIdenticalHttpProxy(result.newLp) ||
  66. ! result.oldLp.isIdenticalAddresses(result.newLp)) {
  67. // If the same address type was removed and
  68. // added we need to cleanup
  69. CompareResult<LinkAddress> car =
  70. result.oldLp.compareAddresses(result.newLp);
  71. if (DBG) {
  72. log( "onDataStateChanged: oldLp=" + result.oldLp +
  73. " newLp=" + result.newLp + " car=" + car);
  74. }
  75. boolean needToClean = false;
  76. //如果address发生变化,需要清除这个old connection
  77. for (LinkAddress added : car.added) {
  78. for (LinkAddress removed : car.removed) {
  79. if (NetworkUtils.addressTypeMatches(
  80. removed.getAddress(),
  81. added.getAddress())) {
  82. needToClean = true;
  83. break;
  84. }
  85. }
  86. }
  87. if (needToClean) {
  88. apnsToCleanup.addAll(dc.mApnContexts.keySet());
  89. } else {
  90. if (DBG) log( "onDataStateChanged: simple change");
  91. //其他的LP变化,只做notify
  92. for (ApnContext apnContext : dc.mApnContexts.keySet()) {
  93. mPhone.notifyDataConnection(
  94. PhoneConstants.REASON_LINK_PROPERTIES_CHANGED,
  95. apnContext.getApnType());
  96. }
  97. }
  98. } else {
  99. if (DBG) {
  100. log( "onDataStateChanged: no changes");
  101. }
  102. }
  103. } else {
  104. //interface发生改变,cleanUp这个old connection
  105. apnsToCleanup.addAll(dc.mApnContexts.keySet());
  106. if (DBG) {
  107. log( "onDataStateChanged: interface change, cleanup apns="
  108. + dc.mApnContexts);
  109. }
  110. }
  111. }
  112. }
  113. }
  114. ...
  115. }
  116. ...
  117. //清除链接
  118. for (ApnContext apnContext : apnsToCleanup) {
  119. mDct.sendCleanUpConnection( true, apnContext);
  120. }
  121. //通知DataConnection链接丢失,需要发起重连
  122. for (DataConnection dc : dcsToRetry) {
  123. dc.sendMessage(DataConnection.EVENT_LOST_CONNECTION, dc.mTag);
  124. }
  125. }
  126. }

DataConnection ActiveState在收到LOST_CONNECTION消息后:

1. 如果重试次数没有达到上限,则设置定时重试,并切换到RetryingState

2. 如果不需要重试,则切换到Inactive状态,并可能通知DcTracker处理(onDataSetupCompleteError,可看第一种情况)

  1. case EVENT_LOST_CONNECTION: {
  2. if (DBG) {
  3. log( "DcActiveState EVENT_LOST_CONNECTION dc=" + DataConnection. this);
  4. }
  5. if (mRetryManager.isRetryNeeded()) {
  6. // We're going to retry
  7. int delayMillis = mRetryManager.getRetryTimer();
  8. //重试
  9. mDcRetryAlarmController.startRetryAlarm(EVENT_RETRY_CONNECTION, mTag,
  10. delayMillis);
  11. transitionTo(mRetryingState);
  12. } else {
  13. mInactiveState.setEnterNotificationParams(DcFailCause.LOST_CONNECTION);
  14. transitionTo(mInactiveState);
  15. }
  16. retVal = HANDLED;
  17. break;
  18. }


RetryingState 收到RETRY消息后,发起连接并切换到ActivatingState

  1. case EVENT_RETRY_CONNECTION: {
  2. if (msg.arg1 == mTag) {
  3. mRetryManager.increaseRetryCount(); //计数
  4. onConnect(mConnectionParams); //开始连接
  5. transitionTo(mActivatingState); //切换到Activating State
  6. } else {
  7. if (DBG) {
  8. log( "DcRetryingState stale EVENT_RETRY_CONNECTION"
  9. + " tag:" + msg.arg1 + " != mTag:" + mTag);
  10. }
  11. }
  12. retVal = HANDLED;
  13. break;
  14. }

RetryManager负责重试相关的计数:

  1. public boolean isRetryNeeded() {
  2. boolean retVal = mRetryForever || (mRetryCount < mCurMaxRetryCount);
  3. if (DBG) log( "isRetryNeeded: " + retVal);
  4. return retVal;
  5. }

3. 一段时间内持续没有接收到新的数据包

在Data完成连接后,DcTracker会定时检查TX/RX的更新,如果RX的值持续没有更新并超过设置的上限值,就会触发Recovery动作。



首先来看方法onDataStallAlarm,它由Alarm定时触发,执行这些操作:

更新TX/RX数据 -> 判断是否需要Recover并执行 -> 重新设置Alarm来触发下一次检查

  1. protected void onDataStallAlarm(int tag) {
  2. if (mDataStallAlarmTag != tag) {
  3. if (DBG) {
  4. log( "onDataStallAlarm: ignore, tag=" + tag + " expecting " + mDataStallAlarmTag);
  5. }
  6. return;
  7. }
  8. //更新mSentSinceLastRecv
  9. updateDataStallInfo();
  10. //默认值是10
  11. int hangWatchdogTrigger = Settings.Global.getInt(mResolver,
  12. Settings.Global.PDP_WATCHDOG_TRIGGER_PACKET_COUNT,
  13. NUMBER_SENT_PACKETS_OF_HANG);
  14. boolean suspectedStall = DATA_STALL_NOT_SUSPECTED;
  15. if (mSentSinceLastRecv >= hangWatchdogTrigger) {
  16. //一段时间没有RX,且超过watchdog的值,需要recover
  17. suspectedStall = DATA_STALL_SUSPECTED;
  18. sendMessage(obtainMessage(DctConstants.EVENT_DO_RECOVERY));
  19. } else {
  20. if (VDBG_STALL) {
  21. log( "onDataStallAlarm: tag=" + tag + " Sent " + String.valueOf(mSentSinceLastRecv) +
  22. " pkts since last received, < watchdogTrigger=" + hangWatchdogTrigger);
  23. }
  24. }
  25. //重新设置Alarm任务,一段时间后再次执行本方法(onDataStallAlarm)
  26. startDataStallAlarm(suspectedStall);
  27. }


updateDataStallInfo()负责记数,处理分3种情况:

1. 有TX 也有RX  -> 正常,重置计数和Recovery action(Recovery action后面会写到)

2. 有TX没有RX -> 异常,累计TX数据

3. 没有TX 只有RX  -> 正常,重置计数和Recovery action

  1. private void updateDataStallInfo() {
  2. long sent, received;
  3. TxRxSum preTxRxSum = new TxRxSum(mDataStallTxRxSum);
  4. mDataStallTxRxSum.updateTxRxSum();
  5. sent = mDataStallTxRxSum.txPkts - preTxRxSum.txPkts;
  6. received = mDataStallTxRxSum.rxPkts - preTxRxSum.rxPkts;
  7. //收发正常,RecoveryAction重置
  8. if ( sent > 0 && received > 0 ) {
  9. if (VDBG_STALL) log( "updateDataStallInfo: IN/OUT");
  10. mSentSinceLastRecv = 0;
  11. putRecoveryAction(RecoveryAction.GET_DATA_CALL_LIST);
  12. } else if (sent > 0 && received == 0) {
  13. //没有RX;若不在通话状态则需要累计本次发送量
  14. if (isPhoneStateIdle()) {
  15. mSentSinceLastRecv += sent;
  16. } else {
  17. mSentSinceLastRecv = 0;
  18. }
  19. //没有发数据,RecoveryAction重置
  20. } else if (sent == 0 && received > 0) {
  21. if (VDBG_STALL) log( "updateDataStallInfo: IN");
  22. mSentSinceLastRecv = 0;
  23. putRecoveryAction(RecoveryAction.GET_DATA_CALL_LIST);
  24. } else {
  25. if (VDBG_STALL) log( "updateDataStallInfo: NONE");
  26. }
  27. }

TX/RX数据由TrafficStats提供的静态方法获得,是native层方法统计所有Mobile的iface后返回的数据:

  1. public void updateTxRxSum() {
  2. this.txPkts = TrafficStats.getMobileTcpTxPackets();
  3. this.rxPkts = TrafficStats.getMobileTcpRxPackets();
  4. }

最后看下doRecovery方法如何执行恢复数据。

doRecovery方法中有5种不同的Recovery action对应着各自的处理:
1. 向Modem主动查询DATA CALL LIST
2. 清除现有的数据链接
3. 重新驻网
4. 重启Radio
5. 深度重启Radio(根据高通的注释,这个操作涉及到RIL的设计)

如果一种方法执行之后,连接依然有问题,则执行下一种恢复方法,顺序类似于循环链表,直到恢复正常后updateDataStallInfo()将Action重置:

  1. protected void doRecovery() {
  2. if (getOverallState() == DctConstants.State.CONNECTED) {
  3. // Go through a series of recovery steps, each action transitions to the next action
  4. int recoveryAction = getRecoveryAction();
  5. switch (recoveryAction) {
  6. case RecoveryAction.GET_DATA_CALL_LIST:
  7. mPhone.mCi.getDataCallList(obtainMessage(DctConstants.EVENT_DATA_STATE_CHANGED));
  8. putRecoveryAction(RecoveryAction.CLEANUP);
  9. break;
  10. case RecoveryAction.CLEANUP:
  11. cleanUpAllConnections(Phone.REASON_PDP_RESET);
  12. putRecoveryAction(RecoveryAction.REREGISTER);
  13. break;
  14. case RecoveryAction.REREGISTER:
  15. mPhone.getServiceStateTracker().reRegisterNetwork( null);
  16. putRecoveryAction(RecoveryAction.RADIO_RESTART);
  17. break;
  18. case RecoveryAction.RADIO_RESTART:
  19. putRecoveryAction(RecoveryAction.RADIO_RESTART_WITH_PROP);
  20. restartRadio();
  21. break;
  22. case RecoveryAction.RADIO_RESTART_WITH_PROP:
  23. // This is in case radio restart has not recovered the data.
  24. // It will set an additional "gsm.radioreset" property to tell
  25. // RIL or system to take further action.
  26. // The implementation of hard reset recovery action is up to OEM product.
  27. // Once RADIO_RESET property is consumed, it is expected to set back
  28. // to false by RIL.
  29. EventLog.writeEvent(EventLogTags.DATA_STALL_RECOVERY_RADIO_RESTART_WITH_PROP, - 1);
  30. if (DBG) log( "restarting radio with gsm.radioreset to true");
  31. SystemProperties.set(RADIO_RESET_PROPERTY, "true");
  32. // give 1 sec so property change can be notified.
  33. try {
  34. Thread.sleep( 1000);
  35. } catch (InterruptedException e) {}
  36. restartRadio();
  37. putRecoveryAction(RecoveryAction.GET_DATA_CALL_LIST);
  38. break;
  39. default:
  40. throw new RuntimeException( "doRecovery: Invalid recoveryAction=" +
  41. recoveryAction);
  42. }
  43. mSentSinceLastRecv = 0;
  44. }
  45. }

猜你喜欢

转载自blog.csdn.net/zhgeliang/article/details/81034684