【kubernetes/k8s源码分析】operator controller-runtime源码分析

operator的原理

      kubernetes 自定义扩展资源注册到 controller-manager,通过 list / watch 的方式监听资源的变化,然后在周期内的各个环节做相应的协调处理。所谓的处理就是operator实现由状态的应用的核心部分

     operator-sdk 和 Kubebuilder 是目前开发 operator 两种常用的 framework,生成相应的模板而已

kubebuilder 使用

    创建project

         在 ${GOPATH}/src 目录下创建目录,进入目录执行

         kubebuilder --domain krome.io --license apache2 --owner "zzl"

         kubebuilder create api --group apps --version v1 --kind TestKind

     参考: https://github.com/kubernetes-sigs/kubebuilder

operator-sdk 使用

     operator-sdk new krome

     operator-sdk add api --api-version=apps.krome.io/v1 --kind=Statefulset

     operator-sdk add controller --api-version=apps.krome.io/v1 --kind=Statefulset

    更新并生成自定义资源的代码

        每当types.go文件有变更时,因为有一些代码文件依赖于types.go中定义的类型,所以它们需要重新生成。

  $ operator-sdk generate k8s

    kubebuild 与 operator-sdk 都使用了 controller-runtime 框架

    实例化 Manager,定义在 controller-runtime/pkg/manager/manager.go 的 New 函数,GetConfigOrDie 函数如果指定了环境变量 KUBECONFIG,则从这个路径下连接 kubernetes,或者 in-cluster 模式,指定 QPS

mgr, err := ctrl.NewManager(ctrl.GetConfigOrDie(), ctrl.Options{
    Scheme:             scheme,
    MetricsBindAddress: metricsAddr,
    Port:               9443,
    LeaderElection:     enableLeaderElection,
    LeaderElectionID:   "2b0c98f1.krome.io",
})


1. New 函数实例化 Manager 对象,实现了 Manager 接口

// New returns a new Manager for creating Controllers.
func New(config *rest.Config, options Options) (Manager, error) {
	// Initialize a rest.config if none was specified
	if config == nil {
		return nil, fmt.Errorf("must specify Config")
	}

	// Set default values for options fields
	options = setOptionsDefaults(options)

     1.1 setOptionDefaults 函数

       这里设置默认的 cache,这个说一下,核心结构 InformersMap 

// InformersMap create and caches Informers for (runtime.Object, schema.GroupVersionKind) pairs.
// It uses a standard parameter codec constructed based on the given generated Scheme.
type InformersMap struct {
	// we abstract over the details of structured vs unstructured with the specificInformerMaps

	structured   *specificInformersMap
	unstructured *specificInformersMap

	// Scheme maps runtime.Objects to GroupVersionKinds
	Scheme *runtime.Scheme
}

    1.1.1 对于 option 中的 provider,这个主要是创建事件广播器

// NewProvider create a new Provider instance.
func NewProvider(config *rest.Config, scheme *runtime.Scheme, logger logr.Logger, broadcaster record.EventBroadcaster) (recorder.Provider, error) {
	clientSet, err := kubernetes.NewForConfig(config)
	if err != nil {
		return nil, fmt.Errorf("failed to init clientSet: %w", err)
	}

	p := &provider{scheme: scheme, logger: logger, eventBroadcaster: broadcaster}
	p.eventBroadcaster.StartRecordingToSink(&typedcorev1.EventSinkImpl{Interface: clientSet.CoreV1().Events("")})
	p.eventBroadcaster.StartEventWatcher(
		func(e *corev1.Event) {
			p.logger.V(1).Info(e.Type, "object", e.InvolvedObject, "reason", e.Reason, "message", e.Message)
		})

	return p, nil
}

    1.2 实例化 controllerManager 对象

      定义在 controller-runtime/pkg/manager/internal.go 中

return &controllerManager{
	config:                config,
	scheme:                options.Scheme,
	cache:                 cache,
	fieldIndexes:          cache,
	client:                writeObj,
	apiReader:             apiReader,
	recorderProvider:      recorderProvider,
	resourceLock:          resourceLock,
	mapper:                mapper,
	metricsListener:       metricsListener,
	internalStop:          stop,
	internalStopper:       stop,
	port:                  options.Port,
	host:                  options.Host,
	certDir:               options.CertDir,
	leaseDuration:         *options.LeaseDuration,
	renewDeadline:         *options.RenewDeadline,
	retryPeriod:           *options.RetryPeriod,
	healthProbeListener:   healthProbeListener,
	readinessEndpointName: options.ReadinessEndpointName,
	livenessEndpointName:  options.LivenessEndpointName,
}, nil

    启动 manager 

if err := mgr.Start(ctrl.SetupSignalHandler()); err != nil {
    setupLog.Error(err, "problem running manager")
    os.Exit(1)
}

2. contrllerManager 的 Start 方法

    serveMetrics 启动 http server,/metrics

func (cm *controllerManager) Start(stop <-chan struct{}) error {
	// join the passed-in stop channel as an upstream feeding into cm.internalStopper
	defer close(cm.internalStopper)

	// initialize this here so that we reset the signal channel state on every start
	cm.errSignal = &errSignaler{errSignal: make(chan struct{})}

	// Metrics should be served whether the controller is leader or not.
	// (If we don't serve metrics for non-leaders, prometheus will still scrape
	// the pod but will get a connection refused)
	if cm.metricsListener != nil {
		go cm.serveMetrics(cm.internalStop)
	}

	// Serve health probes
	if cm.healthProbeListener != nil {
		go cm.serveHealthProbes(cm.internalStop)
	}

     2.1 controller.New 函数

      定义在 controller-runtime/pkg/controller/controller.go 中

// New returns a new Controller registered with the Manager.  The Manager will ensure that shared Caches have
// been synced before the Controller is Started.
func New(name string, mgr manager.Manager, options Options) (Controller, error) {
	if options.Reconciler == nil {
		return nil, fmt.Errorf("must specify Reconciler")
	}

	if len(name) == 0 {
		return nil, fmt.Errorf("must specify Name for Controller")
	}

	if options.MaxConcurrentReconciles <= 0 {
		options.MaxConcurrentReconciles = 1
	}

     2.1.1 实例化 Controller 对象

      定义在 controller-runtime/pkg/internal/controller/controller.go 中

// Create controller with dependencies set
c := &controller.Controller{
	Do:       options.Reconciler,
	Cache:    mgr.GetCache(),
	Config:   mgr.GetConfig(),
	Scheme:   mgr.GetScheme(),
	Client:   mgr.GetClient(),
	Recorder: mgr.GetEventRecorderFor(name),
	MakeQueue: func() workqueue.RateLimitingInterface {
		return workqueue.NewNamedRateLimitingQueue(options.RateLimiter, name)
	},
	MaxConcurrentReconciles: options.MaxConcurrentReconciles,
	Name:                    name,
}

     2.1.1 manager 的 Add 方法

// Add sets dependencies on i, and adds it to the list of Runnables to start.
func (cm *controllerManager) Add(r Runnable) error {
	cm.mu.Lock()
	defer cm.mu.Unlock()

	// Set dependencies on the object
	if err := cm.SetFields(r); err != nil {
		return err
	}

	var shouldStart bool

	// Add the runnable to the leader election or the non-leaderelection list
	if leRunnable, ok := r.(LeaderElectionRunnable); ok && !leRunnable.NeedLeaderElection() {
		shouldStart = cm.started
		cm.nonLeaderElectionRunnables = append(cm.nonLeaderElectionRunnables, r)
	} else {
		shouldStart = cm.startedLeader
		cm.leaderElectionRunnables = append(cm.leaderElectionRunnables, r)
	}

	if shouldStart {
		// If already started, start the controller
		go func() {
			if err := r.Start(cm.internalStop); err != nil {
				cm.errSignal.SignalError(err)
			}
		}()
	}

	return nil
}

   

2 Controller 的 Start 方法

    每一个 controller 都有自己的 queue

// Start implements controller.Controller
func (c *Controller) Start(stop <-chan struct{}) error {
	// use an IIFE to get proper lock handling
	// but lock outside to get proper handling of the queue shutdown
	c.mu.Lock()

	c.Queue = c.MakeQueue()

     2.1 启动 worker,与原生 controller manager 雷同

       从队列取 key,调用 reconcileHandler 进行处理

// worker runs a worker thread that just dequeues items, processes them, and marks them done.
// It enforces that the reconcileHandler is never invoked concurrently with the same object.
func (c *Controller) worker() {
	for c.processNextWorkItem() {
	}
}

// processNextWorkItem will read a single work item off the workqueue and
// attempt to process it, by calling the reconcileHandler.
func (c *Controller) processNextWorkItem() bool {
	obj, shutdown := c.Queue.Get()
	if shutdown {
		// Stop working
		return false
	}

	// We call Done here so the workqueue knows we have finished
	// processing this item. We also must remember to call Forget if we
	// do not want this work item being re-queued. For example, we do
	// not call Forget if a transient error occurs, instead the item is
	// put back on the workqueue and attempted again after a back-off
	// period.
	defer c.Queue.Done(obj)

	return c.reconcileHandler(obj)
}

    2.2 reconcileHandler 函数

func (c *Controller) reconcileHandler(obj interface{}) bool {
	// Update metrics after processing each item
	reconcileStartTS := time.Now()
	defer func() {
		c.updateMetrics(time.Since(reconcileStartTS))
	}()

	var req reconcile.Request
	var ok bool
	if req, ok = obj.(reconcile.Request); !ok {
		// As the item in the workqueue is actually invalid, we call
		// Forget here else we'd go into a loop of attempting to
		// process a work item that is invalid.
		c.Queue.Forget(obj)
		log.Error(nil, "Queue item was not a Request",
			"controller", c.Name, "type", fmt.Sprintf("%T", obj), "value", obj)
		// Return true, don't take a break
		return true
	}

    2.3 调用 Reconcile 进行处理

     也就是自己实现控制器的 Reconcile 函数

// RunInformersAndControllers the syncHandler, passing it the namespace/Name string of the
// resource to be synced.
if result, err := c.Do.Reconcile(req); err != nil {
	c.Queue.AddRateLimited(req)
	log.Error(err, "Reconciler error", "controller", c.Name, "request", req)
	ctrlmetrics.ReconcileErrors.WithLabelValues(c.Name).Inc()
	ctrlmetrics.ReconcileTotal.WithLabelValues(c.Name, "error").Inc()
	return false
} else if result.RequeueAfter > 0 {
	// The result.RequeueAfter request will be lost, if it is returned
	// along with a non-nil error. But this is intended as
	// We need to drive to stable reconcile loops before queuing due
	// to result.RequestAfter
	c.Queue.Forget(obj)
	c.Queue.AddAfter(req, result.RequeueAfter)
	ctrlmetrics.ReconcileTotal.WithLabelValues(c.Name, "requeue_after").Inc()
	return true
} else if result.Requeue {
	c.Queue.AddRateLimited(req)
	ctrlmetrics.ReconcileTotal.WithLabelValues(c.Name, "requeue").Inc()
	return true
}

如果要生成zz_generated.defaults.go文件的话

a、在自定义的类型上,多添加一个tag, // +k8s:defaulter-gen=true

b、修改generate-groups.sh脚本,添加对应的模块,或者直接执行命令

// defaulter-gen --input-dirs krome/pkg/apis/apps/v1 --input-dirs krome/pkg/apis/apps
// -o $GOPATH/src --go-header-file boilerplate.go.txt
// -O zz_generated.defaults
// --extra-peer-dirs= k8s.io/apimachinery/pkg/apis/meta/v1,k8s.io/apimachinery/pkg/conversion,k8s.io/apimachinery/pkg/runtime

猜你喜欢

转载自blog.csdn.net/zhonglinzhang/article/details/105023103
今日推荐