Fatal signal 11 (SIGSEGV), code 1, fault addr 0xf4f17000 in tid 5091(CAM_MctBus) 解决方法总结

--------- beginning of crash
01-02 09:14:39.508   775  5091 F libc    : Fatal signal 11 (SIGSEGV), code 1, fault addr 0xf4f17000 in tid 5091 (CAM_MctBus)
01-02 09:14:39.762 28465 28465 F DEBUG   : *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** ***
01-02 09:14:39.762 28465 28465 F DEBUG   : Build fingerprint: 'Android/Aiolos/msm8953_64:7.1.2/Aiolos/20180811_1106:userdebug/test-keys'
01-02 09:14:39.762 28465 28465 F DEBUG   : Revision: '0'
01-02 09:14:39.762 28465 28465 F DEBUG   : ABI: 'arm'
01-02 09:14:39.763 28465 28465 F DEBUG   : pid: 775, tid: 5091, name: CAM_MctBus  >>> /system/bin/mm-qcamera-daemon <<<
01-02 09:14:39.763 28465 28465 F DEBUG   : signal 11 (SIGSEGV), code 1 (SEGV_MAPERR), fault addr 0xf4f17000
01-02 09:14:39.763 28465 28465 F DEBUG   :     r0 f4f17000  r1 00000003  r2 00008318  r3 ebdedac0
01-02 09:14:39.763 28465 28465 F DEBUG   :     r4 f08de1e0  r5 00000001  r6 ebdf5dd8  r7 f08de224
01-02 09:14:39.763 28465 28465 F DEBUG   :     r8 00000001  r9 e992d540  sl f1548800  fp 00000003
01-02 09:14:39.763 28465 28465 F DEBUG   :     ip f4255df4  sp ea8037e8  lr f423ed8f  pc f423c166  cpsr a0070030
01-02 09:14:39.781 28465 28465 F DEBUG   : 
01-02 09:14:39.781 28465 28465 F DEBUG   : backtrace:
01-02 09:14:39.782 28465 28465 F DEBUG   :     #00 pc 00006166  /system/vendor/lib/libmmcamera2_mct.so
01-02 09:14:39.782 28465 28465 F DEBUG   :     #01 pc 00046fe3  /system/lib/libc.so (_ZL15__pthread_startPv+22)
01-02 09:14:39.782 28465 28465 F DEBUG   :     #02 pc 00019ced  /system/lib/libc.so (__start_thread+6)

根据kernel log

[ 2804.384904] pgd = ffffffc059f28000
[ 2804.389559] [f4f17000] *pgd=00000000d9dda003, *pud=00000000d9dda003

以及反汇编

00006166                 LDR             R0, [R0]

我们可以得到是 0xf4f17000 这个指针地址的取值出现了错误。

根据反汇编我们可以定位到函数的位置
代码路径
:vendor/qcom/proprietary/mm-camera/mm-camera2/media-controller/mct/controller/mct_controller.c

static boolean mct_controller_handle_SOF_proc(
  mct_pipeline_t *pipeline, mct_bus_msg_t *bus_msg)
{

 CLOGI(CAM_MCT_MODULE, "(sofdelay, curr_sofdelay) = (%d, %d) kptr %u",
    pipeline->sof_delay, current_sof_delay, *(int *)isp_sof_bus_msg->kernel_sof_ptr);
}

我们可以通过打印信息验证这一点

01-02 09:14:39.301   775 28447 E mm-camera-iface2: isp_sof.frame_id=3, f_src=1, mmap_addr=0xf4f17000
01-02 09:14:39.301   775 28447 E mm-camera-iface2: Event SOF session 3 VFE0, src 1 with frame_id 3 ksofid: 0
01-02 09:14:39.301   775  5091 E mm-camera: <MCT   ><ERROR> 1026: mct_controller_handle_SOF_proc: Metadata stream not present
01-02 09:14:39.301   775  5091 I mm-camera: <MCT   >< INFO> 1040: mct_controller_handle_SOF_proc: (sofdelay, curr_sofdelay) = (-2, -3) kptr 0xf4f17000

这个地址 0xf4f17000 就是 isp_sof_bus_msg->kernel_sof_ptr 的值,而 Fatal signal 11 (SIGSEGV), code 1, fault addr 0xf4f17000 in tid 5091 (CAM_MctBus) 中也是这个地址。
下面我们分析这个地址的来源,根据分析我们可以得到地址来源 ,iface_axi_open iface_thread_start iface_axi_handle_sof_event 三个函数列出如下,我们可以从中得知
isp_sof_bus_msg->kernel_sof_ptr 的创建和传递:
代码路径
:vendor/qcom/proprietary/mm-camera/mm-camera2/media-controller/modules/iface2/axi/iface_axi.c

static int iface_axi_open(iface_axi_hw_t *axi_hw, char *dev_name)
{
  int rc = 0;

  switch (axi_hw->hw_state) {
  case IFACE_AXI_HW_STATE_INVALID: {
    axi_hw->fd = open(dev_name, O_RDWR | O_NONBLOCK);

    if (axi_hw->fd <= 0) {
      CDBG_ERROR("%s: cannot open '%s'\n", __func__, dev_name);
      axi_hw->fd = 0;
      return -1;
    }

    /* hw opened, set state to idle */
    axi_hw->hw_state = IFACE_AXI_HW_STATE_DEV_OPEN;
  }
    break;

  default: {
    /* cannot open twice, nop */
    rc = -EAGAIN;
  }
    break;
  }

  if (rc == 0) {
    /* fork the hw thread to poll on vfe subdev and pipe */
    rc = iface_thread_start(&axi_hw->thread_poll, axi_hw, axi_hw->fd);
  }

  if(rc == 0) {
    /* fork the hw thread for stream on/off */
    rc = iface_sem_thread_start(&axi_hw->thread_stream, axi_hw);
  }

  return rc;
}

代码路径
:vendor/qcom/proprietary/mm-camera/mm-camera2/media-controller/modules/iface2/axi/iface_axi_thread.c

int iface_thread_start(iface_thread_t *thread_data, void *hw_ptr, int poll_fd)
{

 axi_hw->mmap_addr =
    mmap(NULL, PAGE_SIZE, PROT_READ | PROT_WRITE,
      MAP_SHARED, poll_fd, 0);
}

以及
代码路径
:vendor/qcom/proprietary/mm-camera/mm-camera2/media-controller/modules/iface2/axi/iface_axi.c

/** iface_axi_handle_sof_event:
 *
 *  Description:
 *  this funciton is called when SOF event is received from
 *  kernel
 *
 *    @isp_hw: isp hw
 *    @isp_event_data: isp event data
 *
 *  Returns 0 for success and negative error on failure
 **/
static int iface_axi_handle_sof_event(
    iface_axi_hw_t *axi_hw,
    struct msm_isp_event_data *sof,
    enum msm_vfe_input_src input_src)
{
  if (axi_hw->mmap_addr != MAP_FAILED) {
    isp_sof.kernel_sof_ptr = axi_hw->mmap_addr;
    ksofid = *(uint32_t *)axi_hw->mmap_addr;
  } else {
    isp_sof.kernel_sof_ptr = NULL;
  }
}

这里的 axi_hw->fd =open(“/dev/v4l-subdev13”)对应着的设备路径如下
==kernel/msm-3.18/drivers/media/platform/msm/camera_v2/isp/msm_isp.c==
大致我们可以分析状况如下:
vendor 还在处理数据,但是mmap的地址已经被释放了,现在我们来验证这一想法。

下面我们摘录出log的异常地方便于分析

01-02 09:14:39.463   775 28462 E mm-camera-iface2: Event SOF session 3 VFE0, src 1 with frame_id 2 ksofid: 0
01-02 09:14:39.463   775  5091 E mm-camera: <MCT   ><ERROR> 1026: mct_controller_handle_SOF_proc: Metadata stream not present
01-02 09:14:39.464   775  5091 I mm-camera: <MCT   >< INFO> 1040: mct_controller_handle_SOF_proc: (sofdelay, curr_sofdelay) = (-1, -2) kptr 0xf4f17000
01-02 09:14:39.464   775  5091 E mm-camera-iface2: iface_post_control_sof_to_thread: E, session id = 3, user stream id = 15
01-02 09:14:39.471   775  5089 I mm-camera: <MCT   >< INFO> 3674: mct_pipeline_process_set: command=8000009
01-02 09:14:39.471   775  5089 I mm-camera: <MCT   >< INFO> 3812: mct_pipeline_process_set:  STREAM-OFF on stream 0x30001 stream type=8
01-02 09:14:39.479   775  5089 E mm-camera: <ISP   ><ERROR> 1006: isp_handler_control_streamoff: stream_off 2 2 ide 30001
01-02 09:14:39.480   775  5089 E mm-camera: <ISP   ><ERROR> 1729: isp_util_forward_event_to_internal_pipeline: isp pipeline not support this stream identity 30001
01-02 09:14:39.480   775  5089 E mm-camera: <ISP   ><ERROR> 1729: isp_util_forward_event_to_internal_pipeline: isp pipeline not support this stream identity 30001
01-02 09:14:39.481   775  5089 I mm-camera: <IFACE >< INFO> 1622: iface_streamoff_to_thread: iface_streamoff_to_thread: E, session id = 3, user stream id = 1
01-02 09:14:39.481   775  5089 E mm-camera-iface2: iface_session_sem_thread_execute: E
01-02 09:14:39.481   775  5072 E mm-camera-iface2: iface:streamoff
01-02 09:14:39.481   775  5072 I mm-camera: <IFACE >< INFO> 2216: iface_streamoff: E, session_id 3 mct_stream_id = 1
01-02 09:14:39.481   775  5072 E mm-camera-iface2: iface_decide_session_hw_stream: no Session based hw streams to streamon, *num_hw_streams = 0
01-02 09:14:39.482   775 28463 E mm-camera-iface2: iface_axi_cfg_stream: E, start_flag = 0, sessionid = 3
01-02 09:14:39.482   775 28463 E mm-camera-iface2: iface_axi_cfg_stream:1617: Cmd (0) axi stream 0x205 on vfe 0
01-02 09:14:39.483   775 28462 E mm-camera-iface2: isp_sof.frame_id=3, f_src=1, mmap_addr=0xf4f17000
01-02 09:14:39.483   775 28463 E mm-camera-iface2: iface_axi_unreg_buf: Revmoe bufq in kernel with hw stream 0x1, use stream 1
01-02 09:14:39.483   775 28462 E mm-camera-iface2: Event SOF session 3 VFE0, src 1 with frame_id 3 ksofid: 0
01-02 09:14:39.484   775 28463 E mm-camera-iface2: iface_axi_unreg_buf: Unregister buf session_id = 3, stream_id = 1, buf_handle = 0x340a0000
01-02 09:14:39.484   775 28463 E mm-camera-iface2: iface_axi_cfg_stats_stream_stop: stats_cmd: enable  = 0, num of stats stream = 0
01-02 09:14:39.484   775 28463 E mm-camera-iface2: iface_axi_cfg_stats_stream_stop: no stats stream to START/STOP, stats cmd.num_streams = 0
01-02 09:14:39.486   775  5072 E mm-camera-iface2: iface_streamoff: hw_stream id 1 mct_stream_id 1 user_stream_id 1
01-02 09:14:39.486   775  5072 I mm-camera: <IFACE >< INFO> 2406: iface_streamoff: session_id = 3, active_streams = 0
01-02 09:14:39.486   775  5072 I mm-camera: <IFACE >< INFO> 2425: iface_streamoff: session id = 0x3,Delta between vfe irqs 0 sec 0 usec
01-02 09:14:39.486   775  5072 E mm-camera-iface2: iface_streamoff: hw_stream 0xf106777c type 0 axi_path 5 id 1
01-02 09:14:39.486   775  5072 I mm-camera: <IFACE >< INFO> 10046: iface_util_release_resource: camif_cnt= 0, rdi_cnt= 0, used mask 2
01-02 09:14:39.486   775  5072 E mm-camera-iface2: iface_destroy_hw: E, hw_idx 0
01-02 09:14:39.487   775  5072 E mm-camera-iface2: iface_destroy_hw: Destroy axi hw on VFE :0
--------- beginning of crash
01-02 09:14:39.508   775  5091 F libc    : Fatal signal 11 (SIGSEGV), code 1, fault addr 0xf4f17000 in tid 5091 (CAM_MctBus)

由log可以看出 ==iface_axi_handle_sof_event== 这个事件还在处理中,根据函数注释我们可以得知
this funciton is called when SOF event is received from kernel

01-02 09:14:39.483   775 28462 E mm-camera-iface2: Event SOF session 3 VFE0, src 1 with frame_id 3 ksofid: 0

但是在此之前,根据时间戳发现却已经执行了 streamoff

01-02 09:14:39.481   775  5072 E mm-camera-iface2: iface:streamoff

现在我们把整个函数整理如下
代码路径
:vendor/qcom/proprietary/mm-camera/mm-camera2/media-controller/modules/iface2/axi/iface_axi_thread.c

/** iface_thread_main_loop
 *
 * DESCRIPTION: main loop to proc cmd or poll v4l2 event
 *
 **/
static void *iface_thread_main_loop(void *data)
{
  int rc = 0, i;
  int timeout;
  int thread_exit = 0;
  iface_thread_t *thread_data = (iface_thread_t *)data;
  iface_axi_hw_t *axi_hw = (iface_axi_hw_t *)thread_data->hw_ptr;

  timeout = thread_data->poll_timeoutms;
  /* wake up the creater first */
  sem_post(&thread_data->sig_sem);
  while(!thread_exit) {
    for(i = 0; i < thread_data->num_fds; i++)
      thread_data->poll_fds[i].events = POLLIN|POLLRDNORM|POLLPRI;

    rc = poll(thread_data->poll_fds, thread_data->num_fds, timeout);
    if(rc > 0) {
      if ((thread_data->poll_fds[0].revents & POLLIN) &&
        (thread_data->poll_fds[0].revents & POLLRDNORM)) {
        /* if we have data on pipe, we only process pipe in this iteration */
        rc = iface_thread_proc_cmd(axi_hw, thread_data, &thread_exit, TRUE);
      } else {
        if ((thread_data->poll_fds[1].revents & POLLPRI) ||
          (thread_data->poll_fds[1].revents & POLLIN) ||
          (thread_data->poll_fds[1].revents & POLLRDNORM)) {
          /* if we have data on subdev */
          iface_axi_proc_subdev_event(axi_hw, thread_data);
        }
      }
    } else {
      CDBG_ERROR("error: poll() failed error =%s %d",
        strerror(errno), errno);
    }
  }
  if (thread_data->pipe_fds[0] > 0) {
    close(thread_data->pipe_fds[0]);
    thread_data->pipe_fds[0] = 0;
  }
  if (thread_data->pipe_fds[1] > 0) {
    close(thread_data->pipe_fds[1]);
    thread_data->pipe_fds[1] = 0;
  }

  return NULL;
}

iface_axi_proc_subdev_event—->iface_axi_handle_sof_event(axi_hw, sof, input_src);
—->ISP_EVENT_SOF—->iface_axi_handle_sof_event
而 ISP_EVENT_SOF 的事件来源是 msm_isp40.c ,代表着 kernel 接收到 sensor 的帧。

代码路径
:kernel/msm-3.18/drivers/media/platform/msm/camera_v2/isp/msm_isp40.c

            case VFE_RAW_0:
            case VFE_RAW_1:
            case VFE_RAW_2:
                msm_isp_notify(vfe_dev, ISP_EVENT_SOF, i, ts);

或者

    msm_isp_notify(vfe_dev, ISP_EVENT_SOF, VFE_PIX_0, ts);

这里可以验证我们的猜想,kernel的事件确实还通过v4l2上传到vendor去处理。
现在我们回到kernel的log

[ 2801.881016] msm_isp_open_node open_cnt 0
[ 2801.886397] msm_isp_open_node: HW Version: 0x10090000
[ 2801.892568] msm_isp_open_node open_cnt 1 exit 111111
[ 2801.963456] msm_isp_close_node E open_cnt 1
[ 2801.966810] msm_isp_close_node open_cnt 1 unregister page fault handler
[ 2801.976748] msm_isp_close_node X open_cnt 0 EXIT 1111111
[ 2804.384904] pgd = ffffffc059f28000
[ 2804.389559] [f4f17000] *pgd=00000000d9dda003, *pud=00000000d9dda003

也可以佐证我们的想法,已经执行完 msm_isp_close_node 函数才出现 pgd = ffffffc059f28000 这个取地址的错误。
我们可以打印如下函数中的 munmap(axi_hw->mmap_addr, PAGE_SIZE) 来验证
代码路径
:kernel/msm-3.18/drivers/media/platform/msm/camera_v2/isp/msm_isp40.c

/** iface_axi_destroy_thread
 *
 *    @thread: information about the thread to be destroyed
 *
 *  Destroys isp hw thread
 **/
static int iface_axi_destroy_thread(iface_thread_t *thread)
{
  int len;
  uint32_t cmd = IFACE_THREAD_CMD_DESTROY;
  iface_axi_hw_t *axi_hw;

  axi_hw = (iface_axi_hw_t *)thread->hw_ptr;

  pthread_mutex_lock(&thread->cmd_mutex);

  if (axi_hw->mmap_addr != MAP_FAILED) {
    if (munmap(axi_hw->mmap_addr, PAGE_SIZE))
      perror("munmap");
  }

  if (thread->return_code == -EPIPE) {
    CDBG_ERROR("%s: Pipe read error\n", __func__);
    pthread_mutex_unlock(&thread->cmd_mutex);
    return -EPIPE;
  }

  len = write(thread->pipe_fds[1], &cmd, sizeof(cmd));
  if (len != sizeof(cmd)) {
    /* we got error here */
    pthread_mutex_unlock(&thread->cmd_mutex);
    CDBG_ERROR("%s: Pipe write error\n", __func__);
    return -EPIPE;
  }
  /* we do not wait on sem but join to wait the thread to die. */
  if (pthread_join(thread->pid, NULL) != 0)
    CDBG("%s: pthread dead already\n", __func__);

  sem_destroy(&thread->sig_sem);

  pthread_mutex_destroy(&thread->busy_mutex);
  pthread_mutex_unlock(&thread->cmd_mutex);
  pthread_mutex_destroy(&thread->cmd_mutex);

  return 0;
}

猜你喜欢

转载自blog.csdn.net/huifeidedabian/article/details/82051778