Analysis of kubelet source code: CNI plug-in processing flow

Overall introduction

The kubelet implements the dockershim of the CRI to complete the rpc communication by calling the grpc interface, and the CNI is called by the dockershim grpc server

kubelet -> CRI shim -> container runtime -> container

The UML structure from kubelet to docker server to cni during POD creation is as follows

CNI plugin initialization

If the kubelet uses containerRuntime as Docker during initialization, it will start the dockershim rpc server

case kubetypes.DockerContainerRuntime:
    // Create and start the CRI shim running as a grpc server.
    streamingConfig := getStreamingConfig(kubeCfg, kubeDeps)
    ds, err := dockershim.NewDockerService(kubeDeps.DockerClientConfig, crOptions.PodSandboxImage, streamingConfig,
        &pluginSettings, runtimeCgroups, kubeCfg.CgroupDriver, crOptions.DockershimRootDirectory,
        crOptions.DockerDisableSharedPID)
    if err != nil {
        return nil, err
    }
    // For now, the CRI shim redirects the streaming requests to the
    // kubelet, which handles the requests using DockerService..
    klet.criHandler = ds

    // The unix socket for kubelet <-> dockershim communication.
    glog.V(5).Infof("RemoteRuntimeEndpoint: %q, RemoteImageEndpoint: %q",
        remoteRuntimeEndpoint,
        remoteImageEndpoint)
    glog.V(2).Infof("Starting the GRPC server for the docker CRI shim.")
    server := dockerremote.NewDockerServer(remoteRuntimeEndpoint, ds)
    if err := server.Start(); err != nil {
        return nil, err
    }

Initialize cniplugin when creating dockerservice object

cniPlugins := cni.ProbeNetworkPlugins(pluginSettings.PluginConfDir, pluginSettings.PluginBinDir)
cniPlugins = append(cniPlugins, kubenet.NewPlugin(pluginSettings.PluginBinDir))

When cniplugin is initialized, it will find the first CNI config file that meets the conditions according to the pluginDir, and find the corresponding CNI bin with this config file.

func probeNetworkPluginsWithVendorCNIDirPrefix(pluginDir, binDir, vendorCNIDirPrefix string) []network.NetworkPlugin {
    if binDir == "" {
        binDir = DefaultCNIDir
    }
    plugin := &cniNetworkPlugin{
        defaultNetwork:     nil,
        loNetwork:          getLoNetwork(binDir, vendorCNIDirPrefix),
        execer:             utilexec.New(),
        pluginDir:          pluginDir,
        binDir:             binDir,
        vendorCNIDirPrefix: vendorCNIDirPrefix,
    }

    // sync NetworkConfig in best effort during probing.
    plugin.syncNetworkConfig()
    return []network.NetworkPlugin{plugin}
}
func (plugin *cniNetworkPlugin) syncNetworkConfig() {
    network, err := getDefaultCNINetwork(plugin.pluginDir, plugin.binDir, plugin.vendorCNIDirPrefix)
    if err != nil {
        glog.Warningf("Unable to update cni config: %s", err)
        return
    }
    plugin.setDefaultNetwork(network)
}
// plugin目录中找到符合.conf,.conflist,.json为后缀的文件,用文件名来排序,并从文件
// 名列表中找到符合cni 配置规则的plugin配置文件并返回。
func getDefaultCNINetwork(pluginDir, binDir, vendorCNIDirPrefix string) (*cniNetwork, error) {
    if pluginDir == "" {
        pluginDir = DefaultNetDir
    }
    files, err := libcni.ConfFiles(pluginDir, []string{".conf", ".conflist", ".json"})
    switch {
    case err != nil:
        return nil, err
    case len(files) == 0:
        return nil, fmt.Errorf("No networks found in %s", pluginDir)
    }

    sort.Strings(files)
    for _, confFile := range files {
        var confList *libcni.NetworkConfigList
        if strings.HasSuffix(confFile, ".conflist") {
            confList, err = libcni.ConfListFromFile(confFile)
            if err != nil {
                glog.Warningf("Error loading CNI config list file %s: %v", confFile, err)
                continue
            }
        } else {
            conf, err := libcni.ConfFromFile(confFile)
            if err != nil {
                glog.Warningf("Error loading CNI config file %s: %v", confFile, err)
                continue
            }
            // Ensure the config has a "type" so we know what plugin to run.
            // Also catches the case where somebody put a conflist into a conf file.
            if conf.Network.Type == "" {
                glog.Warningf("Error loading CNI config file %s: no 'type'; perhaps this is a .conflist?", confFile)
                continue
            }

            confList, err = libcni.ConfListFromConf(conf)
            if err != nil {
                glog.Warningf("Error converting CNI config file %s to list: %v", confFile, err)
                continue
            }
        }
        if len(confList.Plugins) == 0 {
            glog.Warningf("CNI config list %s has no networks, skipping", confFile)
            continue
        }
        confType := confList.Plugins[0].Network.Type

        // Search for vendor-specific plugins as well as default plugins in the CNI codebase.
        vendorDir := vendorCNIDir(vendorCNIDirPrefix, confType)
        cninet := &libcni.CNIConfig{
            Path: []string{vendorDir, binDir},
        }
        network := &cniNetwork{name: confList.Name, NetworkConfig: confList, CNIConfig: cninet}
        return network, nil
    }
    return nil, fmt.Errorf("No valid networks found in %s", pluginDir)
}

POD creation source

Before analyzing the POD creation process, let's take a look at how kubelet obtains POD resources. To summarize, there are three POD sources:

  • The most common is kubelet list & watch apiserver to get updates of POD resources
  • static POD in the static directory
  • The latter two of the http services provided by kubelet
    are static PODs. In order to manage static PODs in apiserver, kubelet creates mirror PODs in apiserver.
// makePodSourceConfig 为kubelet 提供 pod update 事件来源,目前支持三种,监听url,
// 监听目录,watch apiserver,每种来源都有对应的channel
func makePodSourceConfig(kubeCfg *kubeletconfiginternal.KubeletConfiguration, kubeDeps *Dependencies, nodeName types.NodeName, bootstrapCheckpointPath string) (*config.PodConfig, error) {
    manifestURLHeader := make(http.Header)
    if len(kubeCfg.ManifestURLHeader) > 0 {
        for k, v := range kubeCfg.ManifestURLHeader {
            for i := range v {
                manifestURLHeader.Add(k, v[i])
            }
        }
    }

    // source of all configuration
    cfg := config.NewPodConfig(config.PodConfigNotificationIncremental, kubeDeps.Recorder)

    // define file config source
    if kubeCfg.PodManifestPath != "" {
        glog.Infof("Adding manifest path: %v", kubeCfg.PodManifestPath)
        config.NewSourceFile(kubeCfg.PodManifestPath, nodeName, kubeCfg.FileCheckFrequency.Duration, cfg.Channel(kubetypes.FileSource))
    }

    // define url config source
    if kubeCfg.ManifestURL != "" {
        glog.Infof("Adding manifest url %q with HTTP header %v", kubeCfg.ManifestURL, manifestURLHeader)
        config.NewSourceURL(kubeCfg.ManifestURL, manifestURLHeader, nodeName, kubeCfg.HTTPCheckFrequency.Duration, cfg.Channel(kubetypes.HTTPSource))
    }

    // Restore from the checkpoint path
    // NOTE: This MUST happen before creating the apiserver source
    // below, or the checkpoint would override the source of truth.

    var updatechannel chan<- interface{}
    if bootstrapCheckpointPath != "" {
        glog.Infof("Adding checkpoint path: %v", bootstrapCheckpointPath)
        updatechannel = cfg.Channel(kubetypes.ApiserverSource)
        err := cfg.Restore(bootstrapCheckpointPath, updatechannel)
        if err != nil {
            return nil, err
        }
    }

    if kubeDeps.KubeClient != nil {
        glog.Infof("Watching apiserver")
        if updatechannel == nil {
            updatechannel = cfg.Channel(kubetypes.ApiserverSource)
        }
        config.NewSourceApiserver(kubeDeps.KubeClient, nodeName, updatechannel)
    }
    return cfg, nil
}

Afterwards each pod source update will send an event to the update channel of the podcfg

// syncLoop is the main loop for processing changes. It watches for changes from
// three channels (file, apiserver, and http) and creates a union of them. For
// any new change seen, will run a sync against desired state and running state. If
// no changes are seen to the configuration, will synchronize the last known desired
// state every sync-frequency seconds. Never returns.
func (kl *Kubelet) syncLoop(updates <-chan kubetypes.PodUpdate, handler SyncHandler) {
    glog.Info("Starting kubelet main sync loop.")
    // The resyncTicker wakes up kubelet to checks if there are any pod workers
    // that need to be sync'd. A one-second period is sufficient because the
    // sync interval is defaulted to 10s.
    syncTicker := time.NewTicker(time.Second)
    defer syncTicker.Stop()
    housekeepingTicker := time.NewTicker(housekeepingPeriod)
    defer housekeepingTicker.Stop()
    plegCh := kl.pleg.Watch()
    const (
        base   = 100 * time.Millisecond
        max    = 5 * time.Second
        factor = 2
    )
    duration := base
    for {
        if rs := kl.runtimeState.runtimeErrors(); len(rs) != 0 {
            glog.Infof("skipping pod synchronization - %v", rs)
            // exponential backoff
            time.Sleep(duration)
            duration = time.Duration(math.Min(float64(max), factor*float64(duration)))
            continue
        }
        // reset backoff if we have a success
        duration = base

        kl.syncLoopMonitor.Store(kl.clock.Now())
        if !kl.syncLoopIteration(updates, handler, syncTicker.C, housekeepingTicker.C, plegCh) {
            break
        }
        kl.syncLoopMonitor.Store(kl.clock.Now())
    }
}

POD create

func (kl *Kubelet) syncLoopIteration(configCh <-chan kubetypes.PodUpdate, handler SyncHandler,
    syncCh <-chan time.Time, housekeepingCh <-chan time.Time, plegCh <-chan *pleg.PodLifecycleEvent) bool {
    select {
    case u, open := <-configCh:
        // Update from a config source; dispatch it to the right handler
        // callback.
        if !open {
            glog.Errorf("Update channel is closed. Exiting the sync loop.")
            return false
        }

        switch u.Op {
        case kubetypes.ADD:
            glog.V(2).Infof("SyncLoop (ADD, %q): %q", u.Source, format.Pods(u.Pods))
            // After restarting, kubelet will get all existing pods through
            // ADD as if they are new pods. These pods will then go through the
            // admission process and *may* be rejected. This can be resolved
            // once we have checkpointing.
            handler.HandlePodAdditions(u.Pods)
        case kubetypes.UPDATE:
            glog.V(2).Infof("SyncLoop (UPDATE, %q): %q", u.Source, format.PodsWithDeletiontimestamps(u.Pods))
            handler.HandlePodUpdates(u.Pods)
        case kubetypes.REMOVE:
            glog.V(2).Infof("SyncLoop (REMOVE, %q): %q", u.Source, format.Pods(u.Pods))
            handler.HandlePodRemoves(u.Pods)
        case kubetypes.RECONCILE:
            glog.V(4).Infof("SyncLoop (RECONCILE, %q): %q", u.Source, format.Pods(u.Pods))
            handler.HandlePodReconcile(u.Pods)
        case kubetypes.DELETE:
            glog.V(2).Infof("SyncLoop (DELETE, %q): %q", u.Source, format.Pods(u.Pods))
            // DELETE is treated as a UPDATE because of graceful deletion.
            handler.HandlePodUpdates(u.Pods)
                ... ...
    return true
}

pod_works is used to assign goroutines to perform specific tasks for pod update.
result := kl.containerRuntime.SyncPod(pod, apiPodStatus, podStatus, pullSecrets, kl.backOff)
During syncpod, createPodSandbox will be called to create a pause container (infrastructure container). After the pause pod is created, all business containers share the network of the pause container.
When creating the pause container, call the following method to configure the network,
err = ds.network.SetUpPod(config.GetMetadata().Namespace, config.GetMetadata().Name, cID, config.Annotations)

// 为了能够保证业务pod在异常退出时仍然能够保存网络信息,因此创建pause (infra)容器来共享网络配置
// pause container也叫做 infrastructure-container, 
// RunPodSandbox creates and starts a pod-level sandbox. Runtimes should ensure
// the sandbox is in ready state.
// For docker, PodSandbox is implemented by a container holding the network
// namespace for the pod.
// Note: docker doesn't use LogDirectory (yet).
func (ds *dockerService) RunPodSandbox(config *runtimeapi.PodSandboxConfig) (id string, err error) {
    // Step 1: Pull the image for the sandbox.
        ... ...
    // Step 2: Create the sandbox container.
        ... ....

    // Step 3: Create Sandbox Checkpoint.
        ... ...

    // Step 4: Start the sandbox container.
        ... ...

    // Step 5: Setup networking for the sandbox.
    // All pod networking is setup by a CNI plugin discovered at startup time.
    // This plugin assigns the pod ip, sets up routes inside the sandbox,
    // creates interfaces etc. In theory, its jurisdiction ends with pod
    // sandbox networking, but it might insert iptables rules or open ports
    // on the host as well, to satisfy parts of the pod spec that aren't
    // recognized by the CNI standard yet.
    cID := kubecontainer.BuildContainerID(runtimeName, createResp.ID)
    err = ds.network.SetUpPod(config.GetMetadata().Namespace, config.GetMetadata().Name, cID, config.Annotations)
    if err != nil {
        // TODO(random-liu): Do we need to teardown network here?
        if err := ds.client.StopContainer(createResp.ID, defaultSandboxGracePeriod); err != nil {
            glog.Warningf("Failed to stop sandbox container %q for pod %q: %v", createResp.ID, config.Metadata.Name, err)
        }
    }
    return createResp.ID, err
}

SetUpPod -> addToNetwork, cniNet.AddNetworkList executes CNI binary according to CNI, and takes the content of CNI config file as stdin, and passes podName, podNamespace, podSandboxID, etc. to CNI binary in the form of env

func (plugin *cniNetworkPlugin) addToNetwork(network *cniNetwork, podName string, podNamespace string, podSandboxID kubecontainer.ContainerID, podNetnsPath string, annotations map[string]string) (cnitypes.Result, error) {
    rt, err := plugin.buildCNIRuntimeConf(podName, podNamespace, podSandboxID, podNetnsPath, annotations)
    if err != nil {
        glog.Errorf("Error adding network when building cni runtime conf: %v", err)
        return nil, err
    }

    netConf, cniNet := network.NetworkConfig, network.CNIConfig
    glog.V(4).Infof("About to add CNI network %v (type=%v)", netConf.Name, netConf.Plugins[0].Network.Type)
    res, err := cniNet.AddNetworkList(netConf, rt)
    if err != nil {
        glog.Errorf("Error adding network: %v", err)
        return nil, err
    }

    return res, nil
}

Guess you like

Origin http://43.154.161.224:23101/article/api/json?id=325233889&siteId=291194637