Android性能优化之Thread native层源码分析(InternalError/Out of memory)

近期处理Bugly上OOM问题,很多发生在Thread创建启动过程,虽然最后分析出是32位4G虚拟内存不足导致,但还是分析下Java层Thread 源码过程,可能会抛出的异常InternalError/Out of memory。

Thread报错堆栈:
在这里插入图片描述

Java线程创建到启动过程

从Thread.start()-> c++层CreateNativeThread()->JNIEnvExt::Create()创建JniEnv ->c++层pthread_create()—> allocate_thread()分配堆内存->Linux层clone()拷贝新线程-> 反射调用Thread.run()

源码分析
Java层Thread#start():
在这里插入图片描述

接着来到c++层:

http://aospxref.com/android-7.1.2_r39/xref/art/runtime/native/java_lang_Thread.cc

/art/runtime/native/java_lang_Thread.cc

static void Thread_nativeCreate(JNIEnv* env, jclass, jobject java_thread, jlong stack_size,jboolean daemon) {
    
    
    //... 部分zygote进程是不允许创建线程,会抛出InternalError异常
    //接下来看
    Thread::CreateNativeThread(env, java_thread, stack_size, daemon == JNI_TRUE);
}

http://aospxref.com/android-7.1.2_r39/xref/art/runtime/thread.cc

/art/runtime/thread.cc

void Thread::CreateNativeThread(JNIEnv* env, jobject java_peer, size_t stack_size, bool is_daemon) {
    
    
    CHECK(java_peer != nullptr);
    Thread* self = static_cast<JNIEnvExt*>(env)->self;

       //若当虚拟机正在关闭时,创建线程会抛出InternalError异常
    Runtime* runtime = Runtime::Current();
    bool thread_start_during_shutdown = false;
    {
    
    
      MutexLock mu(self, *Locks::runtime_shutdown_lock_);
      if (runtime->IsShuttingDownLocked()) {
    
    
        thread_start_during_shutdown = true;
      } else {
    
    
        runtime->StartThreadBirth();
      }
    }
    if (thread_start_during_shutdown) {
    
    
      ScopedLocalRef<jclass> error_class(env, env->FindClass("java/lang/InternalError"));
      env->ThrowNew(error_class.get(), "Thread starting during runtime shutdown");
      return;
    }
    Thread* child_thread = new Thread(is_daemon);//创建java层thread对应的c++对象
    child_thread->tlsPtr_.jpeer = env->NewGlobalRef(java_peer); // 将java层的Thread引用创建成全局引用
    stack_size = FixStackSize(stack_size);// 计算出线程的堆内存大小,默认计算出是1040kb
  
    //将线程记录在线程组中
    env->SetLongField(java_peer, WellKnownClasses::java_lang_Thread_nativePeer,
                      reinterpret_cast<jlong>(child_thread));
  
       //给c++层Threa对象创建JNIEnvExt环境(一个线程对应一个jniEnv),这一步可能会OOM
    std::unique_ptr<JNIEnvExt> child_jni_env_ext(
        JNIEnvExt::Create(child_thread, Runtime::Current()->GetJavaVM()));
  
 
    int pthread_create_result = 0;
    if (child_jni_env_ext.get() != nullptr) {
    
    // 闯将线程的JniEnv成功时
      pthread_t new_pthread;
      pthread_attr_t attr;
      child_thread->tlsPtr_.tmp_jni_env = child_jni_env_ext.get();//将JniEnv赋值给C++层Thread对象
      CHECK_PTHREAD_CALL(pthread_attr_init, (&attr), "new thread");
      CHECK_PTHREAD_CALL(pthread_attr_setdetachstate, (&attr, PTHREAD_CREATE_DETACHED),
                         "PTHREAD_CREATE_DETACHED");
      CHECK_PTHREAD_CALL(pthread_attr_setstacksize, (&attr, stack_size), stack_size);
         //真正创建线程,参数1是线程标识符;参数2:线程属性设置(设置堆的大小等等);参数3:线程函数的起始地址;参数4:传递给参数3线程函数的参数;
      pthread_create_result = pthread_create(&new_pthread,
                                             &attr,
                                             Thread::CreateCallback,
                                             child_thread);
      CHECK_PTHREAD_CALL(pthread_attr_destroy, (&attr), "new thread");
  
      if (pthread_create_result == 0) {
    
     // 若是线程创建,执行完Java层Thread#run()后会返回0
        child_jni_env_ext.release();
        return; // 释放执行完成任务的线程资源,不会往下走
      }
    }
  
    //当创建失败时,释放资源
    env->DeleteGlobalRef(child_thread->tlsPtr_.jpeer); //删除java层的thread 全局引用
    child_thread->tlsPtr_.jpeer = nullptr;
    delete child_thread; //删除 c++层Thread指针
    child_thread = nullptr;
    //从线程组中移除
    env->SetLongField(java_peer, WellKnownClasses::java_lang_Thread_nativePeer, 0);
	
	//当创建线程的JniEnv失败或者pthread_create创建线程失败时,会抛出异常
    {
    
    
      std::string msg(child_jni_env_ext.get() == nullptr ?
          "Could not allocate JNI Env" : //当线程创建JniEnv 环境失败时,抛出该提示语
          StringPrintf("pthread_create (%s stack) failed: %s",
                                   PrettySize(stack_size).c_str(), strerror(pthread_create_result)));
      ScopedObjectAccess soa(env);
      soa.Self()->ThrowOutOfMemoryError(msg.c_str()); //抛出OOM 异常
    }
}

通过FixStackSize()计算出线程的堆内存大小,堆内存=1024K(1M)+8k+8K=1040k

static size_t FixStackSize(size_t stack_size) {
    
     //参数是java层中thread 的stack_size默认0
    if (stack_size == 0) {
    
    
         // GetDefaultStackSize 是启动art时命令行的 "-Xss=" 参数, Android 中没有该参数,因此为0.
      stack_size = Runtime::Current()->GetDefaultStackSize();
    }
    // bionic pthread 默认栈大小是 1M
    stack_size += 1 * MB;
    //...
    if (Runtime::Current()->ExplicitStackOverflowChecks()) {
    
    
       //8k
      stack_size += GetStackOverflowReservedBytes(kRuntimeISA);
    } else {
    
    
      8k+8K
      stack_size += Thread::kStackOverflowImplicitCheckSize +
          GetStackOverflowReservedBytes(kRuntimeISA);
    }
    //...
    return stack_size;
  }

查看创建JniEnv过程:
http://aospxref.com/android-7.1.2_r39/xref/art/runtime/jni_env_ext.cc

/art/runtime/jni_env_ext.cc

JNIEnvExt* JNIEnvExt::Create(Thread* self_in, JavaVMExt* vm_in) {
    
    
    std::unique_ptr<JNIEnvExt> ret(new JNIEnvExt(self_in, vm_in));
    if (CheckLocalsValid(ret.get())) {
    
    
      return ret.release();
    }
    return nullptr;
}

JNIEnvExt::JNIEnvExt(Thread* self_in, JavaVMExt* vm_in)
        : self(self_in),
        vm(vm_in),
        local_ref_cookie(IRT_FIRST_SEGMENT),
        locals(kLocalsInitial, kLocalsMax, kLocal, false),
        check_jni(false),
        runtime_deleted(false),
        critical(0),
        monitors("monitors", kMonitorsInitial, kMonitorsMax) {
    
    
    functions = unchecked_functions = GetJniNativeInterface(); //获取到全局的Jni函数接口列表
    if (vm->IsCheckJniEnabled()) {
    
    
      SetCheckJniEnabled(true);
    }
}

查看pthread的创建线程过程:

http://aospxref.com/android-7.1.2_r39/xref/bionic/libc/bionic/pthread_create.cpp
/bionic/libc/bionic/pthread_create.cpp

int pthread_create(pthread_t* thread_out, pthread_attr_t const* attr,
                    void* (*start_routine)(void*), void* arg) {
    
    

    pthread_internal_t* thread = NULL;
    void* child_stack = NULL;
    //创建线程的堆内存
    int result = __allocate_thread(&thread_attr, &thread, &child_stack);
    if (result != 0) {
    
    
     return result; //若是创建失败,则抛出oom 异常
    }
    //....  
    int flags = CLONE_VM | CLONE_FS | CLONE_FILES | CLONE_SIGHAND | CLONE_THREAD | CLONE_SYSVSEM |
        CLONE_SETTLS | CLONE_PARENT_SETTID | CLONE_CHILD_CLEARTID;
	//linux 的clone 进程,即
    int rc = clone(__pthread_start, child_stack, flags, thread, &(thread->tid), tls, &(thread->tid));
    if (rc == -1) {
    
    
      int clone_errno = errno;
      if (thread->mmap_size != 0) {
    
    
	    //当拷贝失败时,释放申请好的匿名共享内存
        munmap(thread->attr.stack_base, thread->mmap_size);
      }
	  // 当拷贝进程失败时,会输出错误日志 clone faild
      __libc_format_log(ANDROID_LOG_WARN, "libc", "pthread_create failed: clone failed: %s", strerror(errno));
      return clone_errno;
    }
	//...
    return 0;
}

接下来看下__allocate_thread()是如何创建线程的堆内存

static int __allocate_thread(pthread_attr_t* attr, pthread_internal_t** threadp, void** child_stack) {
    
    
    size_t mmap_size;
    uint8_t* stack_top;
    if (attr->stack_base == NULL) {
    
    
      //计算出mmap_size
      mmap_size = BIONIC_ALIGN(attr->stack_size + sizeof(pthread_internal_t), PAGE_SIZE);
      attr->guard_size = BIONIC_ALIGN(attr->guard_size, PAGE_SIZE);
      attr->stack_base = __create_thread_mapped_space(mmap_size, attr->guard_size);
      if (attr->stack_base == NULL) {
    
    
        return EAGAIN; //创建mapp空间失败,则返回错误码
      }
      stack_top = reinterpret_cast<uint8_t*>(attr->stack_base) + mmap_size;
    }
    //....
    return 0;
}

线程的分配mmap_size=线程堆大小(1040k)+线程结构体pthread_internal_t的大小 , 线程结构体pthread_internal_t包含了线程的名字,localtread等。

接下来看下__create_thread_mapped_space()

static void* __create_thread_mapped_space(size_t mmap_size, size_t stack_guard_size) {
    
    
    int prot = PROT_READ | PROT_WRITE;
    int flags = MAP_PRIVATE | MAP_ANONYMOUS | MAP_NORESERVE;
    //根据MAP_ANONYMOUS flags,分配指定mmap_size大小的匿名共享内存
    void* space = mmap(NULL, mmap_size, prot, flags, -1, 0);
    if (space == MAP_FAILED) {
    
    
      __libc_format_log(ANDROID_LOG_WARN,
                        "libc","pthread_create failed: couldn't allocate %zu-bytes mapped space: %s", mmap_size, strerror(errno));
      return NULL;
    }
    //....
    return space;
}

这里和Bugly上的pthread_create failed: couldn't allocate 1085440-bytes mapped space: Out of memory 对应上了,即创建线程的堆内存失败了,虚拟内存不够了。

接下来看下,Linux 是如何创建新子进程,即创建线程。

先来了解下一些Linux中的概念

进程创建:

  • Linux 进程创建: 通过fork(),复制资源(包含代码段、数据段、堆、栈)给子进程,但两进程内存资源不共享;
  • Linux用户级别线程创建:通过pthread库中的pthread_create()创建线程,共享同个进程中的资源;
  • inux内核线程创建: 通过kthread_create()

在Linux看来线程是一种进程间共享资源的方式,线程也可以看做跟其进程共享资源的进程。线程与进程的区别是是否共享资源。

http://aospxref.com/android-7.1.2_r39/xref/bionic/libc/bionic/clone.cpp
/bionic/libc/bionic/clone.cpp

int clone(int (*fn)(void*), void* child_stack, int flags, void* arg, ...) {
    
    
    //真正拷贝子进程过程,更多调用过程
   int clone_result = __bionic_clone(flags, child_stack, parent_tid, new_tls, child_tid, fn, arg);
   self->set_cached_pid(parent_pid);
   return clone_result;
}

pthread_create()->linux的clone()->sys_clone()->do_fork()->copy_process(),在这个过程中,会拷贝当前进程(比如主进程)的资源,
会检查进程是超出限制(即线程是否超过最大值),fd资源是否超过限制(在linux 中socket、file都是fd),共享信号处理。

更多请阅读,http://gityuan.com/2017/08/05/linux-process-fork/

最后看下每个code对应的异常msg:
http://aospxref.com/android-7.1.2_r39/xref/bionic/libc/bionic/strerror.cpp#36

/bionic/libc/bionic/strerror.cpp


char* strerror(int error_number) {
    
    
    // Just return the original constant in the easy cases.
    char* result = const_cast<char*>(__strerror_lookup(error_number));
    if (result != nullptr) {
    
    
      return result;
    }
  
    result = g_strerror_tls_buffer.get();
    strerror_r(error_number, result, g_strerror_tls_buffer.size());
    return result;
}

http://aospxref.com/android-7.1.2_r39/xref/bionic/libc/include/sys/_errdefs.h
/bionic/libc/include/sys/_errdefs.h

__BIONIC_ERRDEF( EAGAIN         ,  11, "Try again" )
__BIONIC_ERRDEF( ENOMEM         ,  12, "Out of memory" )
__BIONIC_ERRDEF( EACCES         ,  13, "Permission denied" )
__BIONIC_ERRDEF( EMFILE         ,  24, "Too many open files" )

这里延伸点,Thread 异常捕捉处理器中:

  • 捕获到java 层异常时,不能再创建Thread,不然会抛出 InternalError:Thread starting during runtime shutdown。即异常上报的线程要提前创建。

  • 当发生异常时,当内存不足时进行异常上报,使用OkHttp传输(会创建新线程),可能造成新的OOM 异常;

资料参考

  • http://gityuan.com/2016/09/24/android-thread/
  • https://blog.csdn.net/Tencent_Bugly/article/details/78542324

猜你喜欢

转载自blog.csdn.net/hexingen/article/details/131959505