kubernetes exec源码简析

1 概述:

1.1 环境

版本信息如下:
a、操作系统:centos 7.6
c、kubernetes版本:v1.15.0


1.2 exec原理概述

为进入目标pod的目标容器中执行命令(挂载标准输入和输出、标准错误的情景),kubectl exec访问kube-apiserver的connect接口(中间过程是通过http协议来握手,之后升级为spdy协议),kube-apiserver把请求转发至对应节点的kubelet进程,而kubelet进程此时是一个反向代理,再把请求转发至cri shim程序(kubelet进程中实现了docker shim),cri shim程序再调用容器运行时的exec接口。当cri是dockert,kubelet进程和docker shim可通过localhost网卡来通信。

补充说明:
1)v1.18版本开始,必须是通过kubelet代理客户端的streaming请求。


2 源码简析:

2.1 kube-apiserver侧

kube-apiserver是无状态的web服务,exec接口的注册:

// 注册http handler,重点看restfulConnectResource(...)。
func (a *APIInstaller) registerResourceHandlers(path string, storage rest.Storage, ws *restful.WebService) (*metav1.APIResource, error) {
		
		/*
			其他代码
		*/

		switch action.Verb {	
		case "CONNECT":
			for _, method := range connecter.ConnectMethods() {
				connectProducedObject := storageMeta.ProducesObject(method)
				if connectProducedObject == nil {
					connectProducedObject = "string"
				}
				doc := "connect " + method + " requests to " + kind
				if isSubresource {
					doc = "connect " + method + " requests to " + subresource + " of " + kind
				}
			
				// http handler,主要是restfulConnectResource(...)
				handler := metrics.InstrumentRouteFunc(action.Verb, group, version, resource, subresource, requestScope, metrics.APIServerComponent, restfulConnectResource(connecter, reqScope, admit, path, isSubresource))
				// 创建http路由
				route := ws.Method(method).Path(action.Path).
					To(handler).
					Doc(doc).
					Operation("connect" + strings.Title(strings.ToLower(method)) + namespaced + kind + strings.Title(subresource) + operationSuffix).
					Produces("*/*").
					Consumes("*/*").
					Writes(connectProducedObject)
				// 新增的http路由放入切片	
				routes = append(routes, route)
				
			}
		
		}
		
		for _, route := range routes {
			route.Metadata(ROUTE_META_GVK, metav1.GroupVersionKind{
				Group:   reqScope.Kind.Group,
				Version: reqScope.Kind.Version,
				Kind:    reqScope.Kind.Kind,
			})
			route.Metadata(ROUTE_META_ACTION, strings.ToLower(action.Verb)
			// 把http路由注册到ws对象
			ws.Route(route)
		}

		/*
			其他代码
		*/
}

func restfulConnectResource(connecter rest.Connecter, scope handlers.RequestScope, admit admission.Interface, restPath string, isSubresource bool) restful.RouteFunction {
	return func(req *restful.Request, res *restful.Response) {
		handlers.ConnectResource(connecter, &scope, admit, restPath, isSubresource)(res.ResponseWriter, req.Request)
	}
}

func ConnectResource(connecter rest.Connecter, scope *RequestScope, admit admission.Interface, restPath string, isSubresource bool) http.HandlerFunc {
	return func(w http.ResponseWriter, req *http.Request) {
		/*
			其他代码
		*/
		requestInfo, _ := request.RequestInfoFrom(ctx)
		metrics.RecordLongRunning(req, requestInfo, metrics.APIServerComponent, func() {
			// connecter对象的类型是ExecREST
			// 调用ExecREST结构体的Connect(...)方法来获得一个http handler
			handler, err := connecter.Connect(ctx, name, opts, &responder{scope: scope, req: req, w: w})
			if err != nil {
				scope.err(err, w, req)
				return
			}
			// 处理kubectl exec的请求
			handler.ServeHTTP(w, req)
		})
	}
}

// 为pod exec这个情景,返回一个 http handler
// 核心逻辑是找到正确的后端服务(目标kubelet的ip地址、端口、uri等),进行反向代理。
func (r *ExecREST) Connect(ctx context.Context, name string, opts runtime.Object, responder rest.Responder) (http.Handler, error) {
	execOpts, ok := opts.(*api.PodExecOptions)
	if !ok {
		return nil, fmt.Errorf("invalid options object: %#v", opts)
	}
	
	// location是目标kubelet的http url
	location, transport, err := pod.ExecLocation(r.Store, r.KubeletConn, ctx, name, execOpts)
	
	if err != nil {
		return nil, err
	}
	// kube-apiserver此时是一个反向代理,访问的是目标kubelet的接口,然后进行流拷贝。
	return newThrottledUpgradeAwareProxyHandler(location, transport, false, true, true, responder), nil
}
// 调用proxy.NewUpgradeAwareHandler(...)创建了类型为*proxy.UpgradeAwareHandler的对象,并返回
func newThrottledUpgradeAwareProxyHandler(location *url.URL, transport http.RoundTripper, wrapTransport, upgradeRequired, interceptRedirects bool, responder rest.Responder) *proxy.UpgradeAwareHandler {
	handler := proxy.NewUpgradeAwareHandler(location, transport, wrapTransport, upgradeRequired, proxy.NewErrorResponder(responder))
	handler.InterceptRedirects = interceptRedirects && utilfeature.DefaultFeatureGate.Enabled(genericfeatures.StreamingProxyRedirects)
	handler.RequireSameHostRedirects = utilfeature.DefaultFeatureGate.Enabled(genericfeatures.ValidateProxyRedirects)
	handler.MaxBytesPerSec = capabilities.Get().PerConnectionBandwidthLimitBytesPerSec
	return handler
}

2.2 kubelet侧

kubelet除了启动很多协程做for循环,还会启动web服务,web服务中就包含了/exec接口。

// 为web server注册用于调试容器的接口,例如/exec、/log等。
// /exec接口的处理方法是getExec(...)
func (s *Server) InstallDebuggingHandlers(criHandler http.Handler) {
	klog.Infof("Adding debug handlers to kubelet server.")

	ws := new(restful.WebService)
	/*
		其他代码
	*/
	
	ws = new(restful.WebService)
	ws.
		Path("/exec")
	ws.Route(ws.GET("/{podNamespace}/{podID}/{containerName}").
		To(s.getExec).
		Operation("getExec"))
	ws.Route(ws.POST("/{podNamespace}/{podID}/{containerName}").
		To(s.getExec).
		Operation("getExec"))
	ws.Route(ws.GET("/{podNamespace}/{podID}/{uid}/{containerName}").
		To(s.getExec).
		Operation("getExec"))
	ws.Route(ws.POST("/{podNamespace}/{podID}/{uid}/{containerName}").
		To(s.getExec).
		Operation("getExec"))
	s.restfulCont.Add(ws)


	/*
		其他代码
	*/
	
	s.restfulCont.Add(ws)

	if criHandler != nil {
		s.restfulCont.Handle("/cri/", criHandler)
	}
}

kubelet是一个反向代理,代理来自kube-apiserver的streaming请求

func (s *Server) getExec(request *restful.Request, response *restful.Response) {
	params := getExecRequestParams(request)
	streamOpts, err := remotecommandserver.NewOptions(request.Request)
	/*
		检查性代码
	*/
	pod, ok := s.host.GetPodByName(params.podNamespace, params.podName)
	/*
		检查性代码
	*/

	podFullName := kubecontainer.GetPodFullName(pod)
	// url其实所以一个127.0.0.1:端口/exec/{token},这是docker shim的接口
	url, err := s.host.GetExec(podFullName, params.podUID, params.containerName, params.cmd, *streamOpts)
	/*
		检查性代码
	*/
	
	// 让客户端重定向至url
	if s.redirectContainerStreaming {
		http.Redirect(response.ResponseWriter, request.Request, url.String(), http.StatusFound)
		return
	}
	
	// 此时kubelet作为一个反向代理(可看成nginx或traefik),url是反向代理背后的一个具体的服务实例
	// v1.18版本开始,代码是一定到达此处,必须是通过kubelet代理客户端的streaming请求
	proxyStream(response.ResponseWriter, request.Request, url)
}

// 和kube-apiserver一样的套路,还是获得UpgradeAwareHandler结构体对象,然后调用ServeHTTP(w, r)
func proxyStream(w http.ResponseWriter, r *http.Request, url *url.URL) {			   	
	handler := proxy.NewUpgradeAwareHandler(url, nil /*transport*/, false /*wrapTransport*/, true /*upgradeRequired*/, &responder{})
	handler.ServeHTTP(w, r)
}

2.3 UpgradeAwareHandler

这是一个工具包下的一个结构体,专门作为反向代理。


func (h *UpgradeAwareHandler) ServeHTTP(w http.ResponseWriter, req *http.Request) {

	// h.tryUpgrade(...)尝试处理长连接请求(如果是的话)
	// 用户执行kubectl exec命令,则h.tryUpgrade(...)返回true
	if h.tryUpgrade(w, req) {
		return
	}
	
	/*
		其他代码
	*/

	// 代码到达到此处,说明不是长连接
	
	newReq := req.WithContext(context.Background())
	newReq.Header = utilnet.CloneHeader(req.Header)
	if !h.UseRequestLocation {
		newReq.URL = &loc
	}

	//经典的反向代理用法
	proxy := httputil.NewSingleHostReverseProxy(&url.URL{Scheme: h.Location.Scheme, Host: h.Location.Host})
	proxy.Transport = h.Transport
	proxy.FlushInterval = h.FlushInterval
	proxy.ServeHTTP(w, newReq)
}

重点是看UpgradeAwareHandler对象的tryUpgrade(…)方法。tryUpgrade(…)方法里面核心的业务逻辑是拿着tcp连接做流拷贝。


// 如果http请求要求提升协议,则启动协程进行流拷贝,直到连接断开
func (h *UpgradeAwareHandler) tryUpgrade(w http.ResponseWriter, req *http.Request) bool {

	//检查http请求是不是要求升级协议,如果不是则直接返回
	if !httpstream.IsUpgradeRequest(req) {
		klog.V(6).Infof("Request was not an upgrade")
		return false
	}

	var (
		// backendConn是指kubelet到cri shim的连接
		backendConn net.Conn
		rawResponse []byte
		err         error
	)

	location := *h.Location
	if h.UseRequestLocation {
		location = *req.URL
		location.Scheme = h.Location.Scheme
		location.Host = h.Location.Host
	}

	// 将http请求进行克隆,是为了保留原有协议、header,但url在后续会被替换。
	clone := utilnet.CloneRequest(req)

	utilnet.AppendForwardedForHeader(clone)
	if h.InterceptRedirects {
		backendConn, rawResponse, err = utilnet.ConnectWithRedirects(req.Method, &location, clone.Header, req.Body, utilnet.DialerFunc(h.DialForUpgrade), h.RequireSameHostRedirects)
	} else {
		// 直接连接cri shim
		clone.URL = &location
		backendConn, err = h.DialForUpgrade(clone)
	}
	
	/*
		检查性代码
	*/	
	defer backendConn.Close()

	// 尝试访问cri shim,如果响应是失败的,则直接返回
	backendHTTPResponse, headerBytes, err := getResponse(io.MultiReader(bytes.NewReader(rawResponse), backendConn))
		
	requestHijacker, ok := w.(http.Hijacker)
	/*
		检查性代码
	*/
	// 从获取客户端和kubelet之间的tcp连接
	requestHijackedConn, _, err := requestHijacker.Hijack()
	/*
		检查性代码
	*/
	defer requestHijackedConn.Close()

	/*
		其他代码
	*/

	writerComplete := make(chan struct{})
	readerComplete := make(chan struct{})

	// 启动两个协程专门做流拷贝
	go func() {
		var writer io.WriteCloser
		if h.MaxBytesPerSec > 0 {
			writer = flowrate.NewWriter(backendConn, h.MaxBytesPerSec)
		} else {
			writer = backendConn
		}
		_, err := io.Copy(writer, requestHijackedConn)
		if err != nil && !strings.Contains(err.Error(), "use of closed network connection") {
			klog.Errorf("Error proxying data from client to backend: %v", err)
		}
		close(writerComplete)
	}()

	go func() {
		var reader io.ReadCloser
		if h.MaxBytesPerSec > 0 {
			reader = flowrate.NewReader(backendConn, h.MaxBytesPerSec)
		} else {
			reader = backendConn
		}
		
		// io.Copy(...)会一直阻塞
		// 在容器中执行的命令退出后,阻塞的io.Copy(...)才会退出
		_, err := io.Copy(requestHijackedConn, reader)
		if err != nil && !strings.Contains(err.Error(), "use of closed network connection") {
			klog.Errorf("Error proxying data from backend to client: %v", err)
		}
		close(readerComplete)
	}()

	//一直阻塞,直到以上两个协程有一个退出
	select {
	case <-writerComplete:
	case <-readerComplete:
	}
	klog.V(6).Infof("Disconnecting from backend proxy %s\n  Headers: %v", &location, clone.Header)

	return true
}

kubelet访问docker shim,发送的升级协议的请求的格式:
协议是:SPDY/3.1
主机是:127.0.0.1:端口
uri是:/exec/{一个token}
在这里插入图片描述
在这里插入图片描述


2.4 docker shim侧

docker shim是kubelet的一部分,在创建kubelet对象的时候,就会创建和启动docker shim。


func NewMainKubelet(...) {

	/*
		其他代码
	*/


	//创建kubelet对象
	klet := &Kubelet{
	...
	}

	switch containerRuntime {
	
	case kubetypes.DockerContainerRuntime:	// 当容器运行时为docker	
		// ds就是docker shim
		ds, err := dockershim.NewDockerService(kubeDeps.DockerClientConfig, crOptions.PodSandboxImage, streamingConfig,
			&pluginSettings, runtimeCgroups, kubeCfg.CgroupDriver, crOptions.DockershimRootDirectory, !crOptions.RedirectContainerStreaming)
		if err != nil {
			return nil, err
		}
		if crOptions.RedirectContainerStreaming {
			// 为kubelet对象的属性criHandler进行赋值
			klet.criHandler = ds
		}
		
		// server其实是ds的封装
		server := dockerremote.NewDockerServer(remoteRuntimeEndpoint, ds)
		// 启动ds
		if err := server.Start(); err != nil {
			return nil, err
		}

		/*
			其他代码
		*/
	}

	/*
		其他代码
	*/

	return klet, nil


}

docker shim中包含了stream server,stream server是它的一个属性。

func NewDockerService(config *ClientConfig, podSandboxImage string, streamingConfig *streaming.Config, pluginSettings *NetworkPluginSettings,
	cgroupsName string, kubeCgroupDriver string, dockershimRootDir string, startLocalStreamingServer bool) (DockerService, error) {
	
	/*
		其他代码
	*/
	
	ds := &dockerService{
		client:          c,
		os:              kubecontainer.RealOS{},
		podSandboxImage: podSandboxImage,
		streamingRuntime: &streamingRuntime{
			client:      client,
			execHandler: &NativeExecHandler{},
		},
		containerManager:          cm.NewContainerManager(cgroupsName, client),
		checkpointManager:         checkpointManager,
		startLocalStreamingServer: startLocalStreamingServer,
		networkReady:              make(map[string]bool),
		containerCleanupInfos:     make(map[string]*containerCleanupInfo),
	}
	
	// 创建streaming server,
	if streamingConfig != nil {
		var err error
		// streaming server是docker service的一个属性
		ds.streamingServer, err = streaming.NewServer(*streamingConfig, ds.streamingRuntime)
		if err != nil {
			return nil, err
		}
	}
	
	/*
		其他代码
	*/
	return ds, nil
}

stream server(一个web server)的构造方法如下,得知/exec接口的处理器是stream server的serveExec(…)方法。


func NewServer(config Config, runtime Runtime) (Server, error) {
	s := &server{
		config:  config,
		runtime: &criAdapter{runtime},
		cache:   newRequestCache(),
	}

	if s.config.BaseURL == nil {
		s.config.BaseURL = &url.URL{
			Scheme: "http",
			Host:   s.config.Addr,
		}
		if s.config.TLSConfig != nil {
			s.config.BaseURL.Scheme = "https"
		}
	}

	ws := &restful.WebService{}
	endpoints := []struct {
		path    string
		handler restful.RouteFunction
	}{
		{"/exec/{token}", s.serveExec},		// /exec接口的处理器
		{"/attach/{token}", s.serveAttach},
		{"/portforward/{token}", s.servePortForward},
	}
	
	pathPrefix := path.Dir(s.config.BaseURL.Path)
	// 为web server注册处理器
	for _, e := range endpoints {
		for _, method := range []string{"GET", "POST"} {
			ws.Route(ws.
				Method(method).
				Path(path.Join(pathPrefix, e.path)).
				To(e.handler))
		}
	}
	handler := restful.NewContainer()
	handler.Add(ws)
	s.handler = handler
	s.server = &http.Server{
		Addr:      s.config.Addr,
		Handler:   s.handler,
		TLSConfig: s.config.TLSConfig,
	}

	return s, nil
}

2.5 docker shim中的stream server

stream server的/exec接口的处理器

func (s *server) serveExec(req *restful.Request, resp *restful.Response) {
	token := req.PathParameter("token")
	// 从缓存中根据token的值拿出grpc请求
	cachedRequest, ok := s.cache.Consume(token)
	if !ok {
		http.NotFound(resp.ResponseWriter, req.Request)
		return
	}
	exec, ok := cachedRequest.(*runtimeapi.ExecRequest)
	if !ok {
		http.NotFound(resp.ResponseWriter, req.Request)
		return
	}

	streamOpts := &remotecommandserver.Options{
		Stdin:  exec.Stdin,
		Stdout: exec.Stdout,
		Stderr: exec.Stderr,
		TTY:    exec.Tty,
	}

	// remotecommandserver.ServeExec(...)是一个静态方法
	// 在容器中执行的命令退出后,xshell终端是卡住状态,直到remotecommandserver.ServeExec(...)退出
	remotecommandserver.ServeExec(
		resp.ResponseWriter,
		req.Request,
		s.runtime,
		"", // unused: podName
		"", // unusued: podUID
		exec.ContainerId,
		exec.Cmd,
		streamOpts,
		s.config.StreamIdleTimeout,
		s.config.StreamCreationTimeout,
		s.config.SupportedRemoteCommandProtocols)
}

静态方法ServeExec(…)


func ServeExec(w http.ResponseWriter, req *http.Request, executor Executor, podName string, uid types.UID, container string, cmd []string, streamOpts *Options, idleTimeout, streamCreationTimeout time.Duration, supportedProtocols []string) {
	
	// ctx对象包含了conn对象、stdinStream对象、stdoutStream对象、stderrStream对象
	// 和客户端建立spdy stream
	ctx, ok := createStreams(req, w, streamOpts, supportedProtocols, idleTimeout, streamCreationTimeout)
	if !ok {
		// error is handled by createStreams
		return
	}
	defer ctx.conn.Close()

	// executor的实现是结构体criAdapter
	// 本质是调用docker client访问docker daemon的/exec接口
	err := executor.ExecInContainer(podName, uid, container, cmd, ctx.stdinStream, ctx.stdoutStream, ctx.stderrStream, ctx.tty, ctx.resizeChan, 0)
	if err != nil {
		if exitErr, ok := err.(utilexec.ExitError); ok && exitErr.Exited() {
			rc := exitErr.ExitStatus()
			ctx.writeStatus(&apierrors.StatusError{ErrStatus: metav1.Status{
				Status: metav1.StatusFailure,
				Reason: remotecommandconsts.NonZeroExitCodeReason,
				Details: &metav1.StatusDetails{
					Causes: []metav1.StatusCause{
						{
							Type:    remotecommandconsts.ExitCodeCauseType,
							Message: fmt.Sprintf("%d", rc),
						},
					},
				},
				Message: fmt.Sprintf("command terminated with non-zero exit code: %v", exitErr),
			}})
		} else {
			err = fmt.Errorf("error executing command in container: %v", err)
			runtime.HandleError(err)
			ctx.writeStatus(apierrors.NewInternalError(err))
		}
	} else {
		ctx.writeStatus(&apierrors.StatusError{ErrStatus: metav1.Status{
			Status: metav1.StatusSuccess,
		}})
	}
}

结构体criAdapter的ExecInContainer(…)方法本质是调用结构体streamingRuntime的属性execHandler的方法ExecInContainer(…)


func (a *criAdapter) ExecInContainer(podName string, podUID types.UID, container string, cmd []string, in io.Reader, out, err io.WriteCloser, tty bool, resize <-chan remotecommand.TerminalSize, timeout time.Duration) error {
	/*
		a.Runtime的实现类就是
		type streamingRuntime struct {
			client      libdocker.Interface
			execHandler ExecHandler		//实现类是NativeExecHandler
		}
	*/
	
	// 本质是调用结构体streamingRuntime的属性execHandler的方法ExecInContainer(...)
	return a.Runtime.Exec(container, cmd, in, out, err, tty, resize)
}

结构体streamingRuntime的属性execHandler的实现就是NativeExecHandler

/*
type streamingRuntime struct {
	client      libdocker.Interface
	execHandler ExecHandler 	 // 实现是NativeExecHandler
}
*/

// 其实就是调用docker client: client.CreateExec(...)和client.StartExec(...)
func (*NativeExecHandler) ExecInContainer(client libdocker.Interface, container *dockertypes.ContainerJSON, cmd []string, stdin io.Reader, stdout, stderr io.WriteCloser, tty bool, resize <-chan remotecommand.TerminalSize, timeout time.Duration) error {
	done := make(chan struct{})
	defer close(done)

	createOpts := dockertypes.ExecConfig{
		Cmd:          cmd,
		AttachStdin:  stdin != nil,
		AttachStdout: stdout != nil,
		AttachStderr: stderr != nil,
		Tty:          tty,
	}
	
	execObj, err := client.CreateExec(container.ID, createOpts)
	if err != nil {
		return fmt.Errorf("failed to exec in container - Exec setup failed - %v", err)
	}
	
	// 给docker发送resize tty的请求
	execStarted := make(chan struct{})
	go func() {
		select {
		case <-execStarted:
			// client.StartExec has started the exec, so we can start resizing
		case <-done:
			// ExecInContainer has returned, so short-circuit
			return
		}

		kubecontainer.HandleResizing(resize, func(size remotecommand.TerminalSize) {
			client.ResizeExecTTY(execObj.ID, uint(size.Height), uint(size.Width))
		})
	}()

	startOpts := dockertypes.ExecStartCheck{Detach: false, Tty: tty}
	streamOpts := libdocker.StreamOptions{
		InputStream:  stdin,
		OutputStream: stdout,
		ErrorStream:  stderr,
		RawTerminal:  tty,
		ExecStarted:  execStarted,
	}
	
	// 用户能在终端随意在容器中执行命令
	// 指令执行完成,client.StartExec(...)才进行返回
	err = client.StartExec(execObj.ID, startOpts, streamOpts)
	if err != nil {
		return err
	}

	// 此时用户的xshell终端是卡住状态
	// 本方法就算return了,用户的xshell终端依然卡住
	// 得等到上层方法remotecommandserver.ServeExec(...)退出,则xshell终端由卡住状态变成自由状态(即标准输入、标准输出、标准错误从容器中卸载了)。
	

	// 根据在容器中执行的命令的退出码来决定本方法返回的是nil还是具体的err
	ticker := time.NewTicker(2 * time.Second)
	defer ticker.Stop()
	count := 0
	for {
		inspect, err2 := client.InspectExec(execObj.ID)
		if err2 != nil {
			return err2
		}

		if !inspect.Running {
			// 如果命令的退出码不是0,修改err
			if inspect.ExitCode != 0 {
				err = &dockerExitError{inspect}
			}		
			break	// 则退出for循环
		}

		count++
		if count == 5 {
			klog.Errorf("Exec session %s in container %s terminated but process still running!", execObj.ID, container.ID)
			break
		}

		<-ticker.C
	}

	return err
}

当在容器中执行的指令执行完成,client.StartExec(…)进行返回,然后进入for循环探测该命令是否退出、退出码是不是0。在这里插入图片描述
说明:
但用户的xshell终端依然是处于卡住状态,直到上层方法remotecommandserver.ServeExec(…)退出。

3 总结:

kubectl exec经过两个反向代理(先是kube-apiserver,然后是kubelet),到达docker shim(虽然还是属于kubelet进程,kubelet和docker shim通过127.0.0.1通信)的stream server,stream server调用docker client访问docer daemon的/exec接口。

猜你喜欢

转载自blog.csdn.net/nangonghen/article/details/110411187