kubelet源码分析-启动分析

版权声明:本文为博主原创文章,未经博主允许不得转载。 https://blog.csdn.net/qq_21816375/article/details/81901272

本博客分析的代码是v1.11.0

kubelet主要的功能是把调度到node节点上的pod统一管理起来,维护该节点上所有的pod的正常运行

维护pod的正常运行需要维护很多pod相关的资源,比如pod的配置文件,网络资源,硬盘资源,pod的日志搜集,pod的事件搜集,pod的镜像GC策略问题,pod的容器策略问题,控制节点上的PID生成数量,容器的生命周期,授权模式等等,这些工作都是由kubelet来维护和管理的

下面我们来分析kubelet的启动过程以及启动时如何启动以上提到的相关维护工作所用到的工具(依赖)

kubelet启动的入口程序

k8s.io/kubernetes/cmd/kubelet/kubelet.go

package main

import (
    "fmt"
    "math/rand"
    "os"
    "time"

    "k8s.io/apiserver/pkg/server"
    "k8s.io/apiserver/pkg/util/logs"
    "k8s.io/kubernetes/cmd/kubelet/app"
    _ "k8s.io/kubernetes/pkg/client/metrics/prometheus" // for client metric registration
    _ "k8s.io/kubernetes/pkg/version/prometheus"        // for version metric registration
)

func main() {
    rand.Seed(time.Now().UTC().UnixNano())

    command := app.NewKubeletCommand(server.SetupSignalHandler())
    logs.InitLogs()
    defer logs.FlushLogs()

    if err := command.Execute(); err != nil {
        fmt.Fprintf(os.Stderr, "%v\n", err)
        os.Exit(1)
    }
}

app.NewKubeletCommand(server.SetupSignalHandler()) 这个函数是主要执行的过程,里面包括 kubelet启动参数的验证,kubelet对象的生成以及该kubelet维护pod所需要用到的服务

具体的实现k8s.io/kubernetes/cmd/kubelet/app/server.go

// NewKubeletCommand creates a *cobra.Command object with default parameters
func NewKubeletCommand(stopCh <-chan struct{}) *cobra.Command {
    cleanFlagSet := pflag.NewFlagSet(componentKubelet, pflag.ContinueOnError)
    cleanFlagSet.SetNormalizeFunc(flag.WordSepNormalizeFunc)
    //设置默认的KubeletFlags的值,包括docker,证书路径,插件目录,包括CIDR等等信息
    kubeletFlags := options.NewKubeletFlags()

    //生成kubelet默认的配置文件
    kubeletConfig, err := options.NewKubeletConfiguration()
    // programmer error
    if err != nil {
        glog.Fatal(err)
    }

    cmd := &cobra.Command{
        Use: componentKubelet,
        Long: `The kubelet is the primary "node agent" that runs on each
node. The kubelet works in terms of a PodSpec. A PodSpec is a YAML or JSON object
that describes a pod. The kubelet takes a set of PodSpecs that are provided through
various mechanisms (primarily through the apiserver) and ensures that the containers
described in those PodSpecs are running and healthy. The kubelet doesn't manage
containers which were not created by Kubernetes.

Other than from an PodSpec from the apiserver, there are three ways that a container
manifest can be provided to the Kubelet.

File: Path passed as a flag on the command line. Files under this path will be monitored
periodically for updates. The monitoring period is 20s by default and is configurable
via a flag.

HTTP endpoint: HTTP endpoint passed as a parameter on the command line. This endpoint
is checked every 20 seconds (also configurable with a flag).

HTTP server: The kubelet can also listen for HTTP and respond to a simple API
(underspec'd currently) to submit a new manifest.`,
        // The Kubelet has special flag parsing requirements to enforce flag precedence rules,
        // so we do all our parsing manually in Run, below.
        // DisableFlagParsing=true provides the full set of flags passed to the kubelet in the
        // `args` arg to Run, without Cobra's interference.
        DisableFlagParsing: true,
        Run: func(cmd *cobra.Command, args []string) {
            // initial flag parse, since we disable cobra's flag parsing
            if err := cleanFlagSet.Parse(args); err != nil {
                cmd.Usage()
                glog.Fatal(err)
            }

            // check if there are non-flag arguments in the command line
         ... 
         ...

            // short-circuit on verflag
            verflag.PrintAndExitIfRequested()
            utilflag.PrintFlags(cleanFlagSet)

            // set feature gates from initial flags-based config
            if err := utilfeature.DefaultFeatureGate.SetFromMap(kubeletConfig.FeatureGates); err != nil {
                glog.Fatal(err)
            }

            // validate the initial KubeletFlags
            if err := options.ValidateKubeletFlags(kubeletFlags); err != nil {
                glog.Fatal(err)
            }
            //制定默认的pod运行所需要的基础镜像
            if kubeletFlags.ContainerRuntime == "remote" && cleanFlagSet.Changed("pod-infra-container-image") {
                glog.Warning("Warning: For remote container runtime, --pod-infra-container-image is ignored in kubelet, which should be set in that remote runtime instead")
            }

            // load kubelet config file, if provided
            if configFile := kubeletFlags.KubeletConfigFile; len(configFile) > 0 {
                kubeletConfig, err = loadConfigFile(configFile)
                if err != nil {
                    glog.Fatal(err)
                }
                // We must enforce flag precedence by re-parsing the command line into the new object.
                // This is necessary to preserve backwards-compatibility across binary upgrades.
                // See issue #56171 for more details.
                if err := kubeletConfigFlagPrecedence(kubeletConfig, args); err != nil {
                    glog.Fatal(err)
                }
                // update feature gates based on new config
                if err := utilfeature.DefaultFeatureGate.SetFromMap(kubeletConfig.FeatureGates); err != nil {
                    glog.Fatal(err)
                }
            }

            // We always validate the local configuration (command line + config file).
            // This is the default "last-known-good" config for dynamic config, and must always remain valid.
            if err := kubeletconfigvalidation.ValidateKubeletConfiguration(kubeletConfig); err != nil {
                glog.Fatal(err)
            }

            // use dynamic kubelet config, if enabled
            var kubeletConfigController *dynamickubeletconfig.Controller
            if dynamicConfigDir := kubeletFlags.DynamicConfigDir.Value(); len(dynamicConfigDir) > 0 {
                var dynamicKubeletConfig *kubeletconfiginternal.KubeletConfiguration
                dynamicKubeletConfig, kubeletConfigController, err = BootstrapKubeletConfigController(dynamicConfigDir,
                    func(kc *kubeletconfiginternal.KubeletConfiguration) error {
                        // Here, we enforce flag precedence inside the controller, prior to the controller's validation sequence,
                        // so that we get a complete validation at the same point where we can decide to reject dynamic config.
                        // This fixes the flag-precedence component of issue #63305.
                        // See issue #56171 for general details on flag precedence.
                        return kubeletConfigFlagPrecedence(kc, args)
                    })
                if err != nil {
                    glog.Fatal(err)
                }
                // If we should just use our existing, local config, the controller will return a nil config
                if dynamicKubeletConfig != nil {
                    kubeletConfig = dynamicKubeletConfig
                    // Note: flag precedence was already enforced in the controller, prior to validation,
                    // by our above transform function. Now we simply update feature gates from the new config.
                    if err := utilfeature.DefaultFeatureGate.SetFromMap(kubeletConfig.FeatureGates); err != nil {
                        glog.Fatal(err)
                    }
                }
            }

            // construct a KubeletServer from kubeletFlags and kubeletConfig
            kubeletServer := &options.KubeletServer{
                KubeletFlags:         *kubeletFlags,
                KubeletConfiguration: *kubeletConfig,
            }

            // use kubeletServer to construct the default KubeletDeps kubelet的缓存

            kubeletDeps, err := UnsecuredDependencies(kubeletServer)
            if err != nil {
                glog.Fatal(err)
            }

            // add the kubelet config controller to kubeletDeps
            kubeletDeps.KubeletConfigController = kubeletConfigController

            ...
            ...

            // run the kubelet 运行kuberlet,入口主要是这个函数
            glog.V(5).Infof("KubeletConfiguration: %#v", kubeletServer.KubeletConfiguration)
            if err := Run(kubeletServer, kubeletDeps, stopCh); err != nil {
                glog.Fatal(err)
            }
        },
    }

    ...
    ...


    return cmd
}

功能分析

动态修改kubelet的配置文件

   ...
   ...
// use dynamic kubelet config, if enabled
            var kubeletConfigController *dynamickubeletconfig.Controller
            if dynamicConfigDir := kubeletFlags.DynamicConfigDir.Value(); len(dynamicConfigDir) > 0 {
                var dynamicKubeletConfig *kubeletconfiginternal.KubeletConfiguration
                dynamicKubeletConfig, kubeletConfigController, err = BootstrapKubeletConfigController(dynamicConfigDir,
                    func(kc *kubeletconfiginternal.KubeletConfiguration) error {
                        // Here, we enforce flag precedence inside the controller, prior to the controller's validation sequence,
                        // so that we get a complete validation at the same point where we can decide to reject dynamic config.
                        // This fixes the flag-precedence component of issue #63305.
                        // See issue #56171 for general details on flag precedence.
                        return kubeletConfigFlagPrecedence(kc, args)
                    })
                if err != nil {
                    glog.Fatal(err)
                }
                // If we should just use our existing, local config, the controller will return a nil config
                if dynamicKubeletConfig != nil {
                    kubeletConfig = dynamicKubeletConfig
                    // Note: flag precedence was already enforced in the controller, prior to validation,
                    // by our above transform function. Now we simply update feature gates from the new config.
                    if err := utilfeature.DefaultFeatureGate.SetFromMap(kubeletConfig.FeatureGates); err != nil {
                        glog.Fatal(err)
                    }
                }
            }
            ...
            ...

该功能主要是新建一个watch的功能,主要是用来watch kubelet的配置文件是否改变,如果已经改变,那么就重新load kubelet的配置文件 用的是kubernetes常用到的Controller,也就是Informer的架构,watch ConfigMap对象

// BootstrapKubeletConfigController constructs and bootstrap a configuration controller
func BootstrapKubeletConfigController(dynamicConfigDir string, transform dynamickubeletconfig.TransformFunc) (*kubeletconfiginternal.KubeletConfiguration, *dynamickubeletconfig.Controller, error) {
    if !utilfeature.DefaultFeatureGate.Enabled(features.DynamicKubeletConfig) {
        return nil, nil, fmt.Errorf("failed to bootstrap Kubelet config controller, you must enable the DynamicKubeletConfig feature gate")
    }
    if len(dynamicConfigDir) == 0 {
        return nil, nil, fmt.Errorf("cannot bootstrap Kubelet config controller, --dynamic-config-dir was not provided")
    }

    // compute absolute path and bootstrap controller
    dir, err := filepath.Abs(dynamicConfigDir)
    if err != nil {
        return nil, nil, fmt.Errorf("failed to get absolute path for --dynamic-config-dir=%s", dynamicConfigDir)
    }
    // get the latest KubeletConfiguration checkpoint from disk, or return the default config if no valid checkpoints exist
    //监控磁盘的配置文件
    c := dynamickubeletconfig.NewController(dir, transform)
    kc, err := c.Bootstrap()
    if err != nil {
        return nil, nil, fmt.Errorf("failed to determine a valid configuration, error: %v", err)
    }
    return kc, c, nil
}

具体实现源码请前往 k8s.io/kubernetes/pkg/kubelet/kubeletconfig/controller.go

启用动态同步之后,kubeDeps.KubeletConfigController.StartSync(kubeDeps.KubeClient, kubeDeps.EventClient, string(nodeName));

// If the kubelet config controller is available, and dynamic config is enabled, start the config and status sync loops
    if utilfeature.DefaultFeatureGate.Enabled(features.DynamicKubeletConfig) && len(s.DynamicConfigDir.Value()) > 0 &&
        kubeDeps.KubeletConfigController != nil && !standaloneMode && !s.RunOnce {
        if err := kubeDeps.KubeletConfigController.StartSync(kubeDeps.KubeClient, kubeDeps.EventClient, string(nodeName)); err != nil {
            return err
        }
    }

回到函数NewKubeletCommand.Run,这个函数主要是用来启动各种以来的服务以及kubelet的监听端口

具体实现 k8s.io/kubernetes/cmd/kubelet/app/server.go.Run函数里

func Run(s *options.KubeletServer, kubeDeps *kubelet.Dependencies, stopCh <-chan struct{}) error {
    // To help debugging, immediately log version
    glog.Infof("Version: %+v", version.Get())
    if err := initForOS(s.KubeletFlags.WindowsService); err != nil {
        return fmt.Errorf("failed OS init: %v", err)
    }
    //主要启动函数
    if err := run(s, kubeDeps, stopCh); err != nil {
        return fmt.Errorf("failed to run Kubelet: %v", err)
    }
    return nil
}

run函数的具体实现

func run(s *options.KubeletServer, kubeDeps *kubelet.Dependencies, stopCh <-chan struct{}) (err error) {
    // Set global feature gates based on the value on the initial KubeletServer
    err = utilfeature.DefaultFeatureGate.SetFromMap(s.KubeletConfiguration.FeatureGates)
    if err != nil {
        return err
    }
    // validate the initial KubeletServer (we set feature gates first, because this validation depends on feature gates)
    if err := options.ValidateKubeletServer(s); err != nil {
        return err
    }

    // Obtain Kubelet Lock File
    if s.ExitOnLockContention && s.LockFilePath == "" {
        return errors.New("cannot exit on lock file contention: no lock file specified")
    }
    done := make(chan struct{})
    if s.LockFilePath != "" {
        glog.Infof("acquiring file lock on %q", s.LockFilePath)
        if err := flock.Acquire(s.LockFilePath); err != nil {
            return fmt.Errorf("unable to acquire file lock on %q: %v", s.LockFilePath, err)
        }
        if s.ExitOnLockContention {
            glog.Infof("watching for inotify events for: %v", s.LockFilePath)
            if err := watchForLockfileContention(s.LockFilePath, done); err != nil {
                return err
            }
        }
    }

    //加载依赖服务相关
    if kubeDeps == nil {
        kubeDeps, err = UnsecuredDependencies(s)
        if err != nil {
            return err
        }
    }
   ...
   ...
   //如果kubeconfig是空的,那么启动的时候,kubelet会向kube-apiserver申请生成证书,并且在证书目录生成相关的证书,具体实现 k8s.io/kubernetes/pkg/kubelet/certificate/bootstrap/bootstrap.go.LoadClientCert函数


   if s.BootstrapKubeconfig != "" {
        if err := bootstrap.LoadClientCert(s.KubeConfig, s.BootstrapKubeconfig, s.CertDirectory, nodeName); err != nil {
            return err
        }
    }

SetFromMap函数分析

err = utilfeature.DefaultFeatureGate.SetFromMap(s.KubeletConfiguration.FeatureGates)

SetFromMap这个配置函数主要是kubelet启用的功能特性

// SetFromMap stores flag gates for known features from a map[string]bool or returns an error
func (f *featureGate) SetFromMap(m map[string]bool) error {
    f.lock.Lock()
    defer f.lock.Unlock()

    // Copy existing state
    known := map[Feature]FeatureSpec{}
    for k, v := range f.known.Load().(map[Feature]FeatureSpec) {
        known[k] = v
    }
    enabled := map[Feature]bool{}
    for k, v := range f.enabled.Load().(map[Feature]bool) {
        enabled[k] = v
    }

    for k, v := range m {
        k := Feature(k)
        _, ok := known[k]
        if !ok {
            return fmt.Errorf("unrecognized key: %s", k)
        }
        enabled[k] = v
        // Handle "special" features like "all alpha gates"
        if fn, found := f.special[k]; found {
            fn(known, enabled, v)
        }
    }

    // Persist changes
    f.known.Store(known)
    f.enabled.Store(enabled)

    glog.Infof("feature gates: %v", f.enabled)
    return nil
}

UnsecuredDependencies依赖启动分析

if kubeDeps == nil {
        kubeDeps, err = UnsecuredDependencies(s)
        if err != nil {
            return err
        }
    }

具体实现在 k8s.io/kubernetes/cmd/kubelet/app/server.go 355行

这个函数主要是生成dockerClientConfig 存储mounter OOMAdjuster 探针插件管理ProbeVolumePlugins 同步插件探针GetDynamicPluginProber以及证书相关tlsOptions等等


run函数的主要内容分析

// if in standalone mode, indicate as much by setting all clients to nil
    if standaloneMode {
        kubeDeps.KubeClient = nil
        kubeDeps.ExternalKubeClient = nil
        kubeDeps.EventClient = nil
        kubeDeps.HeartbeatClient = nil
        glog.Warningf("standalone mode, no API client")
    } else if kubeDeps.KubeClient == nil || kubeDeps.ExternalKubeClient == nil || kubeDeps.EventClient == nil || kubeDeps.HeartbeatClient == nil {
        // initialize clients if not standalone mode and any of the clients are not provided
        var kubeClient clientset.Interface
        var eventClient v1core.EventsGetter
        var heartbeatClient v1core.CoreV1Interface
        var externalKubeClient clientset.Interface
     ...
     ...
     var clientCertificateManager certificate.Manager
        if s.RotateCertificates && utilfeature.DefaultFeatureGate.Enabled(features.RotateKubeletClientCertificate) {
            clientCertificateManager, err = kubeletcertificate.NewKubeletClientCertificateManager(s.CertDirectory, nodeName, clientConfig.CertData, clientConfig.KeyData, clientConfig.CertFile, clientConfig.KeyFile)
            if err != nil {
                return err
            }
        }

启动证书管理客户端 clientCertificateManager

...
...
var clientCertificateManager certificate.Manager
        if s.RotateCertificates && utilfeature.DefaultFeatureGate.Enabled(features.RotateKubeletClientCertificate) {
            clientCertificateManager, err = kubeletcertificate.NewKubeletClientCertificateManager(s.CertDirectory, nodeName, clientConfig.CertData, clientConfig.KeyData, clientConfig.CertFile, clientConfig.KeyFile)
            if err != nil {
                return err
            }
        }
...
...

具体实现代码逻辑 k8s.io/kubernetes/pkg/kubelet/certificate/kubelet.go

func NewKubeletClientCertificateManager(certDirectory string, nodeName types.NodeName, certData []byte, keyData []byte, certFile string, keyFile string) (certificate.Manager, error) {
    certificateStore, err := certificate.NewFileStore(
        "kubelet-client",
        certDirectory,
        certDirectory,
        certFile,
        keyFile)
    if err != nil {
        return nil, fmt.Errorf("failed to initialize client certificate store: %v", err)
    }
    var certificateExpiration = prometheus.NewGauge(
        prometheus.GaugeOpts{
            Namespace: metrics.KubeletSubsystem,
            Subsystem: "certificate_manager",
            Name:      "client_expiration_seconds",
            Help:      "Gauge of the lifetime of a certificate. The value is the date the certificate will expire in seconds since January 1, 1970 UTC.",
        },
    )
    //向prometheus注册监控指标
    prometheus.MustRegister(certificateExpiration)

    m, err := certificate.NewManager(&certificate.Config{
        Template: &x509.CertificateRequest{
            Subject: pkix.Name{
                CommonName:   fmt.Sprintf("system:node:%s", nodeName),
                Organization: []string{"system:nodes"},
            },
        },
        Usages: []certificates.KeyUsage{
            // https://tools.ietf.org/html/rfc5280#section-4.2.1.3
            //
            // DigitalSignature allows the certificate to be used to verify
            // digital signatures including signatures used during TLS
            // negotiation.
            certificates.UsageDigitalSignature,
            // KeyEncipherment allows the cert/key pair to be used to encrypt
            // keys, including the symmetric keys negotiated during TLS setup
            // and used for data transfer..
            certificates.UsageKeyEncipherment,
            // ClientAuth allows the cert to be used by a TLS client to
            // authenticate itself to the TLS server.
            certificates.UsageClientAuth,
        },
        CertificateStore:        certificateStore,
        BootstrapCertificatePEM: certData,
        BootstrapKeyPEM:         keyData,
        CertificateExpiration:   certificateExpiration,
    })
    if err != nil {
        return nil, fmt.Errorf("failed to initialize client certificate manager: %v", err)
    }
    return m, nil
}

获取连接kube-apiserver的kubeclient

...
...
    clientConfig, err := createAPIServerClientConfig(s)
        if err != nil {
            return fmt.Errorf("invalid kubeconfig: %v", err)
        }
...
...
//获取kubernetes kube-apiserver的kubeClient客户端

kubeClient, err = clientset.NewForConfig(clientConfig)
        if err != nil {
            glog.Warningf("New kubeClient from clientConfig error: %v", err)
        } else if kubeClient.CertificatesV1beta1() != nil && clientCertificateManager != nil {
            glog.V(2).Info("Starting client certificate rotation.")
            clientCertificateManager.SetCertificateSigningRequestClient(kubeClient.CertificatesV1beta1().CertificateSigningRequests())
            clientCertificateManager.Start()
        }
    //获取外部kubeclient    
externalKubeClient, err = clientset.NewForConfig(clientConfig)
        if err != nil {
            glog.Warningf("New kubeClient from clientConfig error: %v", err)
        }

...
...

具体实现逻辑在 k8s.io/kubernetes/vendor/k8s.io/client-go/kubernetes/clientset.go 406行


获取事件client客户端

 ...
        ...
        //启用了限流的功能
// make a separate client for events
        eventClientConfig := *clientConfig
        eventClientConfig.QPS = float32(s.EventRecordQPS)
        eventClientConfig.Burst = int(s.EventBurst)
        eventClient, err = v1core.NewForConfig(&eventClientConfig)
        if err != nil {
            glog.Warningf("Failed to create API Server client for Events: %v", err)
        }
        ...
        ...

具体实现代码逻辑 k8s.io/kubernetes/vendor/k8s.io/client-go/kubernetes/typed/core/v1/core_client.go


获取心跳客户端

// make a separate client for heartbeat with throttling disabled and a timeout attached
        heartbeatClientConfig := *clientConfig
        heartbeatClientConfig.Timeout = s.KubeletConfiguration.NodeStatusUpdateFrequency.Duration
        heartbeatClientConfig.QPS = float32(-1)
        heartbeatClient, err = v1core.NewForConfig(&heartbeatClientConfig)
        if err != nil {
            glog.Warningf("Failed to create API Server client for heartbeat: %v", err)
        }

具体实现逻辑代码 k8s.io/kubernetes/vendor/k8s.io/client-go/kubernetes/typed/core/v1/core_client.go


获取cadvisor客户端

...
...
if kubeDeps.CAdvisorInterface == nil {
        imageFsInfoProvider := cadvisor.NewImageFsInfoProvider(s.ContainerRuntime, s.RemoteRuntimeEndpoint)
        kubeDeps.CAdvisorInterface, err = cadvisor.New(s.Address, uint(s.CAdvisorPort), imageFsInfoProvider, s.RootDirectory, cadvisor.UsingLegacyCadvisorStats(s.ContainerRuntime, s.RemoteRuntimeEndpoint))
        if err != nil {
            return err
        }
    }
...
...

启动容器管理服务
这个功能包括 Cgroup管理 mount管理 容器运行时管理 Cgroup驱动 qos服务质量审计等

... 
   ...
if kubeDeps.ContainerManager == nil {
        if s.CgroupsPerQOS && s.CgroupRoot == "" {
            glog.Infof("--cgroups-per-qos enabled, but --cgroup-root was not specified.  defaulting to /")
            s.CgroupRoot = "/"
        }
        kubeReserved, err := parseResourceList(s.KubeReserved)
        if err != nil {
            return err
        }
        systemReserved, err := parseResourceList(s.SystemReserved)
        if err != nil {
            return err
        }
        var hardEvictionThresholds []evictionapi.Threshold
        // If the user requested to ignore eviction thresholds, then do not set valid values for hardEvictionThresholds here.
        if !s.ExperimentalNodeAllocatableIgnoreEvictionThreshold {
            hardEvictionThresholds, err = eviction.ParseThresholdConfig([]string{}, s.EvictionHard, nil, nil, nil)
            if err != nil {
                return err
            }
        }
        experimentalQOSReserved, err := cm.ParseQOSReserved(s.QOSReserved)
        if err != nil {
            return err
        }

        devicePluginEnabled := utilfeature.DefaultFeatureGate.Enabled(features.DevicePlugins)

        kubeDeps.ContainerManager, err = cm.NewContainerManager(
            kubeDeps.Mounter,
            kubeDeps.CAdvisorInterface,
            cm.NodeConfig{
                RuntimeCgroupsName:    s.RuntimeCgroups,
                SystemCgroupsName:     s.SystemCgroups,
                KubeletCgroupsName:    s.KubeletCgroups,
                ContainerRuntime:      s.ContainerRuntime,
                CgroupsPerQOS:         s.CgroupsPerQOS,
                CgroupRoot:            s.CgroupRoot,
                CgroupDriver:          s.CgroupDriver,
                KubeletRootDir:        s.RootDirectory,
                ProtectKernelDefaults: s.ProtectKernelDefaults,
                NodeAllocatableConfig: cm.NodeAllocatableConfig{
                    KubeReservedCgroupName:   s.KubeReservedCgroup,
                    SystemReservedCgroupName: s.SystemReservedCgroup,
                    EnforceNodeAllocatable:   sets.NewString(s.EnforceNodeAllocatable...),
                    KubeReserved:             kubeReserved,
                    SystemReserved:           systemReserved,
                    HardEvictionThresholds:   hardEvictionThresholds,
                },
                QOSReserved:                           *experimentalQOSReserved,
                ExperimentalCPUManagerPolicy:          s.CPUManagerPolicy,
                ExperimentalCPUManagerReconcilePeriod: s.CPUManagerReconcilePeriod.Duration,
                ExperimentalPodPidsLimit:              s.PodPidsLimit,
                EnforceCPULimits:                      s.CPUCFSQuota,
            },
            s.FailSwapOn,
            devicePluginEnabled,
            kubeDeps.Recorder)

        if err != nil {
            return err
        }
    }
    ...
    ...

开启健康检查的端口

if s.HealthzPort > 0 {
        healthz.DefaultHealthz()
        go wait.Until(func() {
            err := http.ListenAndServe(net.JoinHostPort(s.HealthzBindAddress, strconv.Itoa(int(s.HealthzPort))), nil)
            if err != nil {
                glog.Errorf("Starting health server failed: %v", err)
            }
        }, 5*time.Second, wait.NeverStop)
    }

Until这个函数非常有用,它主要是循环执行永远不会退出

func Until(f func(), period time.Duration, stopCh <-chan struct{}) {
    JitterUntil(f, period, 0.0, true, stopCh)
}

运行kubelet

if err := RunKubelet(&s.KubeletFlags, &s.KubeletConfiguration, kubeDeps, s.RunOnce); err != nil {
        return err
    }

具体实现代码k8s.io/kubernetes/cmd/kubelet/app/server.go 890行

执行步骤

首先检验网络资源 PID资源 IPC资源

主要包括以下函数

 ...
   ...
k, err := CreateAndInitKubelet(kubeCfg,
        kubeDeps,
        &kubeFlags.ContainerRuntimeOptions,
        kubeFlags.ContainerRuntime,
        kubeFlags.RuntimeCgroups,
        kubeFlags.HostnameOverride,
        kubeFlags.NodeIP,
        kubeFlags.ProviderID,
        kubeFlags.CloudProvider,
        kubeFlags.CertDirectory,
        kubeFlags.RootDirectory,
        kubeFlags.RegisterNode,
        kubeFlags.RegisterWithTaints,
        kubeFlags.AllowedUnsafeSysctls,
        kubeFlags.RemoteRuntimeEndpoint,
        kubeFlags.RemoteImageEndpoint,
        kubeFlags.ExperimentalMounterPath,
        kubeFlags.ExperimentalKernelMemcgNotification,
        kubeFlags.ExperimentalCheckNodeCapabilitiesBeforeMount,
        kubeFlags.ExperimentalNodeAllocatableIgnoreEvictionThreshold,
        kubeFlags.MinimumGCAge,
        kubeFlags.MaxPerPodContainerCount,
        kubeFlags.MaxContainerCount,
        kubeFlags.MasterServiceNamespace,
        kubeFlags.RegisterSchedulable,
        kubeFlags.NonMasqueradeCIDR,
        kubeFlags.KeepTerminatedPodVolumes,
        kubeFlags.NodeLabels,
        kubeFlags.SeccompProfileRoot,
        kubeFlags.BootstrapCheckpointPath,
        kubeFlags.NodeStatusMaxImages)
    if err != nil {
        return fmt.Errorf("failed to create kubelet: %v", err)
    }

    // NewMainKubelet should have set up a pod source config if one didn't exist
    // when the builder was run. This is just a precaution.
    if kubeDeps.PodConfig == nil {
        return fmt.Errorf("failed to create kubelet, pod source config was nil")
    }
    podCfg := kubeDeps.PodConfig

    rlimit.RlimitNumFiles(uint64(kubeCfg.MaxOpenFiles))

    // process pods and exit.
    if runOnce {
        if _, err := k.RunOnce(podCfg.Updates()); err != nil {
            return fmt.Errorf("runonce failed: %v", err)
        }
        glog.Infof("Started kubelet as runonce")
    } else {
        startKubelet(k, podCfg, kubeCfg, kubeDeps, kubeFlags.EnableServer)
        glog.Infof("Started kubelet")
    }
    return nil

先看CreateAndInitKubelet函数

func CreateAndInitKubelet(kubeCfg *kubeletconfiginternal.KubeletConfiguration,
    kubeDeps *kubelet.Dependencies,
    crOptions *config.ContainerRuntimeOptions,
    containerRuntime string,
    runtimeCgroups string,
    hostnameOverride string,
    nodeIP string,
    providerID string,
    cloudProvider string,
    certDirectory string,
    rootDirectory string,
    registerNode bool,
    registerWithTaints []api.Taint,
    allowedUnsafeSysctls []string,
    remoteRuntimeEndpoint string,
    remoteImageEndpoint string,
    experimentalMounterPath string,
    experimentalKernelMemcgNotification bool,
    experimentalCheckNodeCapabilitiesBeforeMount bool,
    experimentalNodeAllocatableIgnoreEvictionThreshold bool,
    minimumGCAge metav1.Duration,
    maxPerPodContainerCount int32,
    maxContainerCount int32,
    masterServiceNamespace string,
    registerSchedulable bool,
    nonMasqueradeCIDR string,
    keepTerminatedPodVolumes bool,
    nodeLabels map[string]string,
    seccompProfileRoot string,
    bootstrapCheckpointPath string,
    nodeStatusMaxImages int32) (k kubelet.Bootstrap, err error) {
    // TODO: block until all sources have delivered at least one update to the channel, or break the sync loop
    // up into "per source" synchronizations

    k, err = kubelet.NewMainKubelet(kubeCfg,
        kubeDeps,
        crOptions,
        containerRuntime,
        runtimeCgroups,
        hostnameOverride,
        nodeIP,
        providerID,
        cloudProvider,
        certDirectory,
        rootDirectory,
        registerNode,
        registerWithTaints,
        allowedUnsafeSysctls,
        remoteRuntimeEndpoint,
        remoteImageEndpoint,
        experimentalMounterPath,
        experimentalKernelMemcgNotification,
        experimentalCheckNodeCapabilitiesBeforeMount,
        experimentalNodeAllocatableIgnoreEvictionThreshold,
        minimumGCAge,
        maxPerPodContainerCount,
        maxContainerCount,
        masterServiceNamespace,
        registerSchedulable,
        nonMasqueradeCIDR,
        keepTerminatedPodVolumes,
        nodeLabels,
        seccompProfileRoot,
        bootstrapCheckpointPath,
        nodeStatusMaxImages)
    if err != nil {
        return nil, err
    }

    //宣告出生
    k.BirthCry()
    //启动垃圾回收GC服务
    k.StartGarbageCollection()

    return k, nil
}

k.BirthCry()的具体实现

// BirthCry sends an event that the kubelet has started up.
func (kl *Kubelet) BirthCry() {
    // Make an event that kubelet restarted.
    kl.recorder.Eventf(kl.nodeRef, v1.EventTypeNormal, events.StartingKubelet, "Starting kubelet.")
}

k.StartGarbageCollection()的具体实现

// StartGarbageCollection starts garbage collection threads.
func (kl *Kubelet) StartGarbageCollection() {
     //启动容器垃圾回收器
    loggedContainerGCFailure := false
    go wait.Until(func() {
        if err := kl.containerGC.GarbageCollect(); err != nil {
            glog.Errorf("Container garbage collection failed: %v", err)
            kl.recorder.Eventf(kl.nodeRef, v1.EventTypeWarning, events.ContainerGCFailed, err.Error())
            loggedContainerGCFailure = true
        } else {
            var vLevel glog.Level = 4
            if loggedContainerGCFailure {
                vLevel = 1
                loggedContainerGCFailure = false
            }

            glog.V(vLevel).Infof("Container garbage collection succeeded")
        }
    }, ContainerGCPeriod, wait.NeverStop)

    //启动镜像垃圾回收器
    stopChan := make(chan struct{})
    defer close(stopChan)
    // when the high threshold is set to 100, stub the image GC manager
    if kl.kubeletConfiguration.ImageGCHighThresholdPercent == 100 {
        glog.V(2).Infof("ImageGCHighThresholdPercent is set 100, Disable image GC")
        go func() { stopChan <- struct{}{} }()
    }

    prevImageGCFailed := false
    go wait.Until(func() {
        if err := kl.imageManager.GarbageCollect(); err != nil {
            if prevImageGCFailed {
                glog.Errorf("Image garbage collection failed multiple times in a row: %v", err)
                // Only create an event for repeated failures
                kl.recorder.Eventf(kl.nodeRef, v1.EventTypeWarning, events.ImageGCFailed, err.Error())
            } else {
                glog.Errorf("Image garbage collection failed once. Stats initialization may not have completed yet: %v", err)
            }
            prevImageGCFailed = true
        } else {
            var vLevel glog.Level = 4
            if prevImageGCFailed {
                vLevel = 1
                prevImageGCFailed = false
            }

            glog.V(vLevel).Infof("Image garbage collection succeeded")
        }
    }, ImageGCPeriod, stopChan)
}

startKubelet 函数分析

func startKubelet(k kubelet.Bootstrap, podCfg *config.PodConfig, kubeCfg *kubeletconfiginternal.KubeletConfiguration, kubeDeps *kubelet.Dependencies, enableServer bool) {
    wg := sync.WaitGroup{}

    // start the kubelet
    wg.Add(1)
    go wait.Until(func() {
        wg.Done()
        k.Run(podCfg.Updates())
    }, 0, wait.NeverStop)

    // start the kubelet server
    //开启服务端口 10250

    if enableServer {
        wg.Add(1)
        go wait.Until(func() {
            wg.Done()
            k.ListenAndServe(net.ParseIP(kubeCfg.Address), uint(kubeCfg.Port), kubeDeps.TLSOptions, kubeDeps.Auth, kubeCfg.EnableDebuggingHandlers, kubeCfg.EnableContentionProfiling)
        }, 0, wait.NeverStop)
    }

    //开启只读接口10255

    if kubeCfg.ReadOnlyPort > 0 {
        wg.Add(1)
        go wait.Until(func() {
            wg.Done()
            k.ListenAndServeReadOnly(net.ParseIP(kubeCfg.Address), uint(kubeCfg.ReadOnlyPort))
        }, 0, wait.NeverStop)
    }
    wg.Wait()
}

k.Run函数分析,代码 k8s.io/kubernetes/pkg/kubelet/kubelet.go 1356行

// Run starts the kubelet reacting to config updates
func (kl *Kubelet) Run(updates <-chan kubetypes.PodUpdate) {
    //启动日志服务
    if kl.logServer == nil {
        kl.logServer = http.StripPrefix("/logs/", http.FileServer(http.Dir("/var/log/")))
    }
    if kl.kubeClient == nil {
        glog.Warning("No api server defined - no node status update will be sent.")
    }
    //初始化模块,包括volume 数据目录 容器日志
    //启动镜像管理 启动证书管理 OOM管理
    //启动资源分析器
    if err := kl.initializeModules(); err != nil {
        kl.recorder.Eventf(kl.nodeRef, v1.EventTypeWarning, events.KubeletSetupFailed, err.Error())
        glog.Fatal(err)
    }

    // Start volume manager
    //启动volume管理
    go kl.volumeManager.Run(kl.sourcesReady, wait.NeverStop)

    if kl.kubeClient != nil {
        // Start syncing node status immediately, this may set up things the runtime needs to run.
        go wait.Until(kl.syncNodeStatus, kl.nodeStatusUpdateFrequency, wait.NeverStop)
    }
    go wait.Until(kl.updateRuntimeUp, 5*time.Second, wait.NeverStop)

    //启用网络util
    // Start loop to sync iptables util rules
    if kl.makeIPTablesUtilChains {
        go wait.Until(kl.syncNetworkUtil, 1*time.Minute, wait.NeverStop)
    }

    //启用pod的删除管理机制
    // Start a goroutine responsible for killing pods (that are not properly
    // handled by pod workers).
    go wait.Until(kl.podKiller, 1*time.Second, wait.NeverStop)

    // Start gorouting responsible for checking limits in resolv.conf
    if kl.dnsConfigurer.ResolverConfig != "" {
        go wait.Until(func() { kl.dnsConfigurer.CheckLimitsForResolvConf() }, 30*time.Second, wait.NeverStop)
    }

    // Start component sync loops.
    kl.statusManager.Start() //状态管理
    kl.probeManager.Start() //探针管理

    // Start the pod lifecycle event generator.
    kl.pleg.Start() //启动容器的生命周期
    kl.syncLoop(updates, kl) //循环同步
}

kl.syncLoop具体实现逻辑

func (kl *Kubelet) syncLoop(updates <-chan kubetypes.PodUpdate, handler SyncHandler) {
    glog.Info("Starting kubelet main sync loop.")
    // The resyncTicker wakes up kubelet to checks if there are any pod workers
    // that need to be sync'd. A one-second period is sufficient because the
    // sync interval is defaulted to 10s.
    syncTicker := time.NewTicker(time.Second)
    defer syncTicker.Stop()
    housekeepingTicker := time.NewTicker(housekeepingPeriod)
    defer housekeepingTicker.Stop()
    plegCh := kl.pleg.Watch()
    const (
        base   = 100 * time.Millisecond
        max    = 5 * time.Second
        factor = 2
    )
    duration := base
    for {
        if rs := kl.runtimeState.runtimeErrors(); len(rs) != 0 {
            glog.Infof("skipping pod synchronization - %v", rs)
            // exponential backoff
            time.Sleep(duration)
            duration = time.Duration(math.Min(float64(max), factor*float64(duration)))
            continue
        }
        // reset backoff if we have a success
        duration = base

        kl.syncLoopMonitor.Store(kl.clock.Now())
        if !kl.syncLoopIteration(updates, handler, syncTicker.C, housekeepingTicker.C, plegCh) {
            break
        }
        kl.syncLoopMonitor.Store(kl.clock.Now())
    }
}

initializeModules 函数的具体实现

// initializeModules will initialize internal modules that do not require the container runtime to be up.
// Note that the modules here must not depend on modules that are not initialized here.
func (kl *Kubelet) initializeModules() error {
    // Prometheus metrics.
    metrics.Register(kl.runtimeCache, collectors.NewVolumeStatsCollector(kl))

    // Setup filesystem directories.
    if err := kl.setupDataDirs(); err != nil {
        return err
    }

    // If the container logs directory does not exist, create it.
    if _, err := os.Stat(ContainerLogsDir); err != nil {
        if err := kl.os.MkdirAll(ContainerLogsDir, 0755); err != nil {
            glog.Errorf("Failed to create directory %q: %v", ContainerLogsDir, err)
        }
    }

    // Start the image manager.
    kl.imageManager.Start()

    // Start the certificate manager if it was enabled.
    if kl.serverCertificateManager != nil {
        kl.serverCertificateManager.Start()
    }

    // Start out of memory watcher.
    if err := kl.oomWatcher.Start(kl.nodeRef); err != nil {
        return fmt.Errorf("Failed to start OOM watcher %v", err)
    }

    // Start resource analyzer
    kl.resourceAnalyzer.Start()

    return nil
}

kl.updateRuntimeUp函数实现

// updateRuntimeUp calls the container runtime status callback, initializing
// the runtime dependent modules when the container runtime first comes up,
// and returns an error if the status check fails.  If the status check is OK,
// update the container runtime uptime in the kubelet runtimeState.
func (kl *Kubelet) updateRuntimeUp() {
    s, err := kl.containerRuntime.Status()
    if err != nil {
        glog.Errorf("Container runtime sanity check failed: %v", err)
        return
    }
    if s == nil {
        glog.Errorf("Container runtime status is nil")
        return
    }
    // Periodically log the whole runtime status for debugging.
    // TODO(random-liu): Consider to send node event when optional
    // condition is unmet.
    glog.V(4).Infof("Container runtime status: %v", s)
    networkReady := s.GetRuntimeCondition(kubecontainer.NetworkReady)
    if networkReady == nil || !networkReady.Status {
        glog.Errorf("Container runtime network not ready: %v", networkReady)
        kl.runtimeState.setNetworkState(fmt.Errorf("runtime network not ready: %v", networkReady))
    } else {
        // Set nil if the container runtime network is ready.
        kl.runtimeState.setNetworkState(nil)
    }
    // TODO(random-liu): Add runtime error in runtimeState, and update it
    // when runtime is not ready, so that the information in RuntimeReady
    // condition will be propagated to NodeReady condition.
    runtimeReady := s.GetRuntimeCondition(kubecontainer.RuntimeReady)
    // If RuntimeReady is not set or is false, report an error.
    if runtimeReady == nil || !runtimeReady.Status {
        glog.Errorf("Container runtime not ready: %v", runtimeReady)
        return
    }
    kl.oneTimeInitializer.Do(kl.initializeRuntimeDependentModules)
    kl.runtimeState.setRuntimeSync(kl.clock.Now())
}

kl.initializeRuntimeDependentModules 函数分析

// initializeRuntimeDependentModules will initialize internal modules that require the container runtime to be up.
//启动依赖模块
func (kl *Kubelet) initializeRuntimeDependentModules() {
    if err := kl.cadvisor.Start(); err != nil {
        // Fail kubelet and rely on the babysitter to retry starting kubelet.
        // TODO(random-liu): Add backoff logic in the babysitter
        glog.Fatalf("Failed to start cAdvisor %v", err)
    }

    // trigger on-demand stats collection once so that we have capacity information for ephemeral storage.
    // ignore any errors, since if stats collection is not successful, the container manager will fail to start below.
    kl.StatsProvider.GetCgroupStats("/", true)
    // Start container manager.
    node, err := kl.getNodeAnyWay()
    if err != nil {
        // Fail kubelet and rely on the babysitter to retry starting kubelet.
        glog.Fatalf("Kubelet failed to get node info: %v", err)
    }
    // containerManager must start after cAdvisor because it needs filesystem capacity information
    if err := kl.containerManager.Start(node, kl.GetActivePods, kl.sourcesReady, kl.statusManager, kl.runtimeService); err != nil {
        // Fail kubelet and rely on the babysitter to retry starting kubelet.
        glog.Fatalf("Failed to start ContainerManager %v", err)
    }
    // eviction manager must start after cadvisor because it needs to know if the container runtime has a dedicated imagefs
    kl.evictionManager.Start(kl.StatsProvider, kl.GetActivePods, kl.podResourcesAreReclaimed, evictionMonitoringPeriod)

    // container log manager must start after container runtime is up to retrieve information from container runtime
    // and inform container to reopen log file after log rotation.
    kl.containerLogManager.Start()
    if kl.enablePluginsWatcher {
        // Adding Registration Callback function for CSI Driver
        kl.pluginWatcher.AddHandler("CSIPlugin", csi.RegistrationCallback)
        // Start the plugin watcher
        glog.V(4).Infof("starting watcher")
        if err := kl.pluginWatcher.Start(); err != nil {
            kl.recorder.Eventf(kl.nodeRef, v1.EventTypeWarning, events.KubeletSetupFailed, err.Error())
            glog.Fatalf("failed to start Plugin Watcher. err: %v", err)
        }
    }
}

command.Execute() 这个是支持命令行方式的

该函数的实现

k8s.io/kubernetes/vendor/github.com/spf13/cobra/command.go

// Execute uses the args (os.Args[1:] by default)
// and run through the command tree finding appropriate matches
// for commands and then corresponding flags.
func (c *Command) Execute() error {
    _, err := c.ExecuteC()
    return err
}

至此,kubelet已经启动完毕
接下来,会分析各个功能的具体实现方式,比如 oom管理 容器垃圾回收机制 镜像垃圾回收机制 网络管理 pod的生命周期管理 pod状态管理 运行时管理 pod的日志搜集 pod的事件搜集 授权模式等等

猜你喜欢

转载自blog.csdn.net/qq_21816375/article/details/81901272