【kubernetes/k8s源码分析】 kubernetes csi external-snapshotter 源码分析

Git Repository: https://github.com/kubernetes-csi/external-snapshotter

Status: Alpha

Latest stable release Branch Min CSI Version Max CSI Version Container Image Min K8s Version Max K8s Version Recommended K8s Version
external-snapshotter v1.2.0 release-1.2 v1.0.0 - quay.io/k8scsi/csi-snapshotter:v1.2.0 v1.13 - v1.14
external-snapshotter v1.0.1 release-1.0 v1.0.0 - quay.io/k8scsi/csi-snapshotter:v1.0.1 v1.13 - v1.13
external-snapshotter v0.4.1 release-0.4 v0.3.0 v0.3.0 quay.io/k8scsi/csi-snapshotter:v0.4.1 v1.10 - v.10

     本文基于版本 v1.2.0 

启动命令:

   /csi-snapshotter --csi-address=/csi/csi.sock --connection-timeout=15s --leader-election=false

1. main 函数

    1.1 创建 CRD 资源

NAME SHORTNAMES APIGROUP NAMESPACED KIND
volumesnapshotclasses   snapshot.storage.k8s.io false VolumeSnapshotClass
volumesnapshotcontents   snapshot.storage.k8s.io false VolumeSnapshotContent
volumesnapshots   snapshot.storage.k8s.io true VolumeSnapshot
// CreateCRD creates CustomResourceDefinition
func CreateCRD(clientset apiextensionsclient.Interface) error {
	crd := &apiextensionsv1beta1.CustomResourceDefinition{
		ObjectMeta: metav1.ObjectMeta{
			Name: crdv1.VolumeSnapshotClassResourcePlural + "." + crdv1.GroupName,
		},
		Spec: apiextensionsv1beta1.CustomResourceDefinitionSpec{
			Group:   crdv1.GroupName,
			Version: crdv1.SchemeGroupVersion.Version,
			Scope:   apiextensionsv1beta1.ClusterScoped,
			Names: apiextensionsv1beta1.CustomResourceDefinitionNames{
				Plural: crdv1.VolumeSnapshotClassResourcePlural,
				Kind:   reflect.TypeOf(crdv1.VolumeSnapshotClass{}).Name(),
			},
			Subresources: &apiextensionsv1beta1.CustomResourceSubresources{
				Status: &apiextensionsv1beta1.CustomResourceSubresourceStatus{},
			},
		},
	}

    1.2 插件是否有能力 CREATE_DELETE_SNAPSHOT

// Find out if the driver supports create/delete snapshot.
supportsCreateSnapshot, err := supportsControllerCreateSnapshot(ctx, csiConn)
if err != nil {
	klog.Errorf("error determining if driver supports create/delete snapshot operations: %v", err)
	os.Exit(1)
}
if !supportsCreateSnapshot {
	klog.Errorf("CSI driver %s does not support ControllerCreateSnapshot", *snapshotterName)
	os.Exit(1)
}

    1.3 实例化 csiSnapshotController

     路径:pkg/controller/snapshot_controller_base.go,所有模式相同,比较简单,不再详解

     主要关注的资源有 pvc volumesnapshotclasses volumesnapshotcontents volumesnapshots

snapShotter := snapshotter.NewSnapshotter(csiConn)
ctrl := controller.NewCSISnapshotController(
	snapClient,
	kubeClient,
	*snapshotterName,
	factory.Snapshot().V1alpha1().VolumeSnapshots(),
	factory.Snapshot().V1alpha1().VolumeSnapshotContents(),
	factory.Snapshot().V1alpha1().VolumeSnapshotClasses(),
	coreFactory.Core().V1().PersistentVolumeClaims(),
	*createSnapshotContentRetryCount,
	*createSnapshotContentInterval,
	snapShotter,
	*csiTimeout,
	*resyncPeriod,
	*snapshotNamePrefix,
	*snapshotNameUUIDLength,
)

    1.4 Run 函数

     主要的两个函数为 snapshotWorker 与 contentWorker

func (ctrl *csiSnapshotController) Run(workers int, stopCh <-chan struct{}) {
	defer ctrl.snapshotQueue.ShutDown()
	defer ctrl.contentQueue.ShutDown()

	klog.Infof("Starting CSI snapshotter")
	defer klog.Infof("Shutting CSI snapshotter")

	if !cache.WaitForCacheSync(stopCh, ctrl.snapshotListerSynced, ctrl.contentListerSynced, ctrl.classListerSynced, ctrl.pvcListerSynced) {
		klog.Errorf("Cannot sync caches")
		return
	}

	ctrl.initializeCaches(ctrl.snapshotLister, ctrl.contentLister)

	for i := 0; i < workers; i++ {
		go wait.Until(ctrl.snapshotWorker, 0, stopCh)
		go wait.Until(ctrl.contentWorker, 0, stopCh)
	}

	<-stopCh
}

    1.4.1 snapshotWorker 函数

     从 snapshotQueue 队列取 key, 如果有的需要处理 VolumeSnapshot,处理函数为 updateSnapshot

// snapshotWorker processes items from snapshotQueue. It must run only once,
// syncSnapshot is not assured to be reentrant.
func (ctrl *csiSnapshotController) snapshotWorker() {
	workFunc := func() bool {
		keyObj, quit := ctrl.snapshotQueue.Get()

		snapshot, err := ctrl.snapshotLister.VolumeSnapshots(namespace).Get(name)
		if err == nil {
			// The volume snapshot still exists in informer cache, the event must have
			// been add/update/sync
			newSnapshot, err := ctrl.checkAndUpdateSnapshotClass(snapshot)
			if err == nil {
				klog.V(5).Infof("passed checkAndUpdateSnapshotClass for snapshot %q", key)
				ctrl.updateSnapshot(newSnapshot)
			}
			return false
}

    1.4.1.1 updateSnapshot 函数

    如果是老版本的无需处理,新版本调用 syncSnapshot 函数处理,第2章节讲解

// updateSnapshot runs in worker thread and handles "snapshot added",
// "snapshot updated" and "periodic sync" events.
func (ctrl *csiSnapshotController) updateSnapshot(snapshot *crdv1.VolumeSnapshot) {
	// Store the new snapshot version in the cache and do not process it if this is
	// an old version.
	klog.V(5).Infof("updateSnapshot %q", snapshotKey(snapshot))
	newSnapshot, err := ctrl.storeSnapshotUpdate(snapshot)
	if err != nil {
		klog.Errorf("%v", err)
	}
	if !newSnapshot {
		return
	}
	err = ctrl.syncSnapshot(snapshot)
	if err != nil {
		if errors.IsConflict(err) {
			// Version conflict error happens quite often and the controller
			// recovers from it easily.
			klog.V(3).Infof("could not sync claim %q: %+v", snapshotKey(snapshot), err)
		} else {
			klog.Errorf("could not sync volume %q: %+v", snapshotKey(snapshot), err)
		}
	}
}

    1.4.2 contentWorker 函数

    从队列 contentQueue 取值,处理函数为 updateContent

// contentWorker processes items from contentQueue. It must run only once,
// syncContent is not assured to be reentrant.
func (ctrl *csiSnapshotController) contentWorker() {
	workFunc := func() bool {
		keyObj, quit := ctrl.contentQueue.Get()

		content, err := ctrl.contentLister.Get(name)
		// The content still exists in informer cache, the event must have
		// been add/update/sync
		if err == nil {
			if ctrl.isDriverMatch(content) {
				ctrl.updateContent(content)
			}
			return false
		}
	}

}

    1.4.2.1 updateContent 函数

        如果是老版本的无需处理,新版本调用 syncContent 函数处理,第4章节讲解    

// updateContent runs in worker thread and handles "content added",
// "content updated" and "periodic sync" events.
func (ctrl *csiSnapshotController) updateContent(content *crdv1.VolumeSnapshotContent) {
	// Store the new content version in the cache and do not process it if this is
	// an old version.
	new, err := ctrl.storeContentUpdate(content)
	if err != nil {
		klog.Errorf("%v", err)
	}
	if !new {
		return
	}
	err = ctrl.syncContent(content)
}

2. syncSnapshot 函数

// It's invoked by appropriate cache.Controller callbacks when a snapshot is
// created, updated or periodically synced. We do not differentiate between
// these events.
// For easier readability, it is split into syncUnreadySnapshot and syncReadySnapshot
func (ctrl *csiSnapshotController) syncSnapshot(snapshot *crdv1.VolumeSnapshot) error {
	klog.V(5).Infof("synchonizing VolumeSnapshot[%s]: %s", snapshotKey(snapshot), getSnapshotStatusForLogging(snapshot))

	if isSnapshotDeletionCandidate(snapshot) {
		// Volume snapshot should be deleted. Check if it's used
		// and remove finalizer if it's not.
		// Check if a volume is being created from snapshot.
		isUsed := ctrl.isVolumeBeingCreatedFromSnapshot(snapshot)
		if !isUsed {
			klog.V(5).Infof("syncSnapshot: Remove Finalizer for VolumeSnapshot[%s]", snapshotKey(snapshot))
			return ctrl.removeSnapshotFinalizer(snapshot)
		}
	}

   根据 status readyToUse 值是否为 true

	if !snapshot.Status.ReadyToUse {
		return ctrl.syncUnreadySnapshot(snapshot)
	}
	return ctrl.syncReadySnapshot(snapshot)

apiVersion: snapshot.storage.k8s.io/v1alpha1
kind: VolumeSnapshot
metadata:
  annotations:
    kubectl.kubernetes.io/last-applied-configuration: |
      {"apiVersion":"snapshot.storage.k8s.io/v1alpha1","kind":"VolumeSnapshot","metadata":{"annotations":{},"name":"new-snapshot-demo","namespace":"default"},"spec":{"snapshotClassName":"csi-hostpath-snapclass","source":{"kind":"PersistentVolumeClaim","name":"hpvc"}}}
  creationTimestamp: "2019-07-04T09:26:16Z"
  finalizers:
  - snapshot.storage.kubernetes.io/volumesnapshot-protection
  generation: 5
  name: new-snapshot-demo
  namespace: default
  resourceVersion: "1244547"
  selfLink: /apis/snapshot.storage.k8s.io/v1alpha1/namespaces/default/volumesnapshots/new-snapshot-demo
  uid: c6101f3b-9e3d-11e9-a3dd-080027603363
spec:
  snapshotClassName: csi-hostpath-snapclass
  snapshotContentName: snapcontent-c6101f3b-9e3d-11e9-a3dd-080027603363
  source:
    apiGroup: null
    kind: PersistentVolumeClaim
    name: hpvc
status:
  creationTime: "2019-07-04T09:26:16Z"
  readyToUse: true
  restoreSize: 1Gi

    2.1 syncUnreadySnapshot

      情况为 status readyToUse 为 false 的情况

     2.1.1 情况为 spec snapshotClassName != ""

if snapshot.Spec.SnapshotContentName != "" {
	contentObj, found, err := ctrl.contentStore.GetByKey(snapshot.Spec.SnapshotContentName)

	if !found {
		// snapshot is bound to a non-existing content.
		ctrl.updateSnapshotErrorStatusWithEvent(snapshot, v1.EventTypeWarning, "SnapshotContentMissing", "VolumeSnapshotContent is missing")
		klog.V(4).Infof("synchronizing unready snapshot[%s]: snapshotcontent %q requested and not found, will try again next time", uniqueSnapshotName, snapshot.Spec.SnapshotContentName)
		return fmt.Errorf("snapshot %s is bound to a non-existing content %s", uniqueSnapshotName, snapshot.Spec.SnapshotContentName)
	}
	content, ok := contentObj.(*crdv1.VolumeSnapshotContent)
	if !ok {
		return fmt.Errorf("expected volume snapshot content, got %+v", contentObj)
	}
	contentBound, err := ctrl.checkandBindSnapshotContent(snapshot, content)
	if err != nil {
		// snapshot is bound but content is not bound to snapshot correctly
		ctrl.updateSnapshotErrorStatusWithEvent(snapshot, v1.EventTypeWarning, "SnapshotBindFailed", fmt.Sprintf("Snapshot failed to bind VolumeSnapshotContent, %v", err))
		return fmt.Errorf("snapshot %s is bound, but VolumeSnapshotContent %s is not bound to the VolumeSnapshot correctly, %v", uniqueSnapshotName, content.Name, err)
	}
	// snapshot is already bound correctly, check the status and update if it is ready.
	klog.V(5).Infof("Check and update snapshot %s status", uniqueSnapshotName)
	if err = ctrl.checkandUpdateBoundSnapshotStatus(snapshot, contentBound); err != nil {
		return err
	}
	return nil
} 

    2.1.2 情况为 spec snapshotcontentname == nil

     getMatchSnapshotContent 函数获得和snapshot 名字一样的 content,如果找到则调用 bindandUpdateVolumeSnapshot 进行绑定,这个绑定主要操作时更新 snapshot 的 spec SnapshotContentName 内容

     未找到 content 则进行 createSnapshot 操作.第 3 章节讲解

else { // snapshot.Spec.SnapshotContentName == nil
	if contentObj := ctrl.getMatchSnapshotContent(snapshot); contentObj != nil {
		klog.V(5).Infof("Find VolumeSnapshotContent object %s for snapshot %s", contentObj.Name, uniqueSnapshotName)
		newSnapshot, err := ctrl.bindandUpdateVolumeSnapshot(contentObj, snapshot)
		if err != nil {
			return err
		}
		klog.V(5).Infof("bindandUpdateVolumeSnapshot %v", newSnapshot)
		return nil
	} else if snapshot.Status.Error == nil || isControllerUpdateFailError(snapshot.Status.Error) { // Try to create snapshot if no error status is set
		if err := ctrl.createSnapshot(snapshot); err != nil {
			ctrl.updateSnapshotErrorStatusWithEvent(snapshot, v1.EventTypeWarning, "SnapshotCreationFailed", fmt.Sprintf("Failed to create snapshot with error %v", err))
			return err
		}
		return nil
	}
	return nil
}

    2.2 syncReadySnapshot 函数

      情况为 status readyToUse 为 true,已经成功 ready的必须包含 spec snapshotcontentname

       如果 spec snapshotcontentname 为空,则报错误事件SnapshotLost,并更新 snapshot

// syncReadySnapshot checks the snapshot which has been bound to snapshot content successfully before.
// If there is any problem with the binding (e.g., snapshot points to a non-exist snapshot content), update the snapshot status and emit event.
func (ctrl *csiSnapshotController) syncReadySnapshot(snapshot *crdv1.VolumeSnapshot) error {
	if snapshot.Spec.SnapshotContentName == "" {
		if err := ctrl.updateSnapshotErrorStatusWithEvent(snapshot, v1.EventTypeWarning, "SnapshotLost", "Bound snapshot has lost reference to VolumeSnapshotContent"); err != nil {
			return err
		}
		return nil
	}

    2.2.1 如果 cache 未找到 snapshotcontentname,则报错误事件 SnapshotContentMissing,并更新 snapshot

obj, found, err := ctrl.contentStore.GetByKey(snapshot.Spec.SnapshotContentName)
if err != nil {
	return err
}
if !found {
	if err = ctrl.updateSnapshotErrorStatusWithEvent(snapshot, v1.EventTypeWarning, "SnapshotContentMissing", "VolumeSnapshotContent is missing"); err != nil {
		return err
	}
	return nil
}

    2.2.2 验证 snapshot 与 content 绑定 

     成功绑定情况为:  content.Spec.VolumeSnapshotRef != nil && content.Spec.VolumeSnapshotRef.Name == snapshot.Name && content.Spec.VolumeSnapshotRef.UID == snapshot.UID

else {
	content, ok := obj.(*crdv1.VolumeSnapshotContent)
	if !ok {
		return fmt.Errorf("Cannot convert object from snapshot content store to VolumeSnapshotContent %q!?: %#v", snapshot.Spec.SnapshotContentName, obj)
	}

	klog.V(5).Infof("syncReadySnapshot[%s]: VolumeSnapshotContent %q found", snapshotKey(snapshot), content.Name)
	if !IsSnapshotBound(snapshot, content) {
		// snapshot is bound but content is not bound to snapshot correctly
		if err = ctrl.updateSnapshotErrorStatusWithEvent(snapshot, v1.EventTypeWarning, "SnapshotMisbound", "VolumeSnapshotContent is not bound to the VolumeSnapshot correctly"); err != nil {
			return err
		}
		return nil
	}
	// Snapshot is correctly bound.
	return nil
}

3. createSnapshot 函数

    异步操作创建 snapshot

// createSnapshot starts new asynchronous operation to create snapshot
func (ctrl *csiSnapshotController) createSnapshot(snapshot *crdv1.VolumeSnapshot) error {
	klog.V(5).Infof("createSnapshot[%s]: started", snapshotKey(snapshot))
	opName := fmt.Sprintf("create-%s[%s]", snapshotKey(snapshot), string(snapshot.UID))
	ctrl.scheduleOperation(opName, func() error {
		snapshotObj, err := ctrl.createSnapshotOperation(snapshot)
		if err != nil {
			ctrl.updateSnapshotErrorStatusWithEvent(snapshot, v1.EventTypeWarning, "SnapshotCreationFailed", fmt.Sprintf("Failed to create snapshot: %v", err))
			klog.Errorf("createSnapshot [%s]: error occurred in createSnapshotOperation: %v", opName, err)
			return err
		}
		_, updateErr := ctrl.storeSnapshotUpdate(snapshotObj)
		if updateErr != nil {
			// We will get an "snapshot update" event soon, this is not a big error
			klog.V(4).Infof("createSnapshot [%s]: cannot update internal cache: %v", snapshotKey(snapshotObj), updateErr)
		}
		return nil
	})
	return nil
}

    3.1 createSnapshotOperation 函数

     注释描述的很清楚,非常友好

// The function goes through the whole snapshot creation process.
// 1. Trigger the snapshot through csi storage provider.
// 2. Update VolumeSnapshot status with creationtimestamp information
// 3. Create the VolumeSnapshotContent object with the snapshot id information.
// 4. Bind the VolumeSnapshot and VolumeSnapshotContent object
func (ctrl *csiSnapshotController) createSnapshotOperation(snapshot *crdv1.VolumeSnapshot) (*crdv1.VolumeSnapshot, error) {
	klog.Infof("createSnapshot: Creating snapshot %s through the plugin ...", snapshotKey(snapshot))

	if snapshot.Status.Error != nil && !isControllerUpdateFailError(snapshot.Status.Error) {
		klog.V(4).Infof("error is already set in snapshot, do not retry to create: %s", snapshot.Status.Error.Message)
		return snapshot, nil
	}

    3.1.1 CreateSnapshot 函数

     csiHandler 实现了 Handler 接口,路径为 pkg/controller/csi_handler.go

func (handler *csiHandler) CreateSnapshot(snapshot *crdv1.VolumeSnapshot, volume *v1.PersistentVolume, parameters map[string]string, snapshotterCredentials map[string]string) (string, string, time.Time, int64, bool, error) {

	ctx, cancel := context.WithTimeout(context.Background(), handler.timeout)
	defer cancel()

	snapshotName, err := makeSnapshotName(handler.snapshotNamePrefix, string(snapshot.UID), handler.snapshotNameUUIDLength)
	if err != nil {
		return "", "", time.Time{}, 0, false, err
	}
	newParameters, err := removePrefixedParameters(parameters)
	if err != nil {
		return "", "", time.Time{}, 0, false, fmt.Errorf("failed to remove CSI Parameters of prefixed keys: %v", err)
	}
	return handler.snapshotter.CreateSnapshot(ctx, snapshotName, volume, newParameters, snapshotterCredentials)
}

    3.1.1.1 handler.snapshotter.CreateSnapshot

     snapshot 实现了 Snapshotter 接口,路径 pkg/snapshotter/snapshotter.go

// Snapshotter implements CreateSnapshot/DeleteSnapshot operations against a remote CSI driver.
type Snapshotter interface {
   // CreateSnapshot creates a snapshot for a volume
   CreateSnapshot(ctx context.Context, snapshotName string, volume *v1.PersistentVolume, parameters map[string]string, snapshotterCredentials map[string]string) (driverName string, snapshotId string, timestamp time.Time, size int64, readyToUse bool, err error)

   // DeleteSnapshot deletes a snapshot from a volume
   DeleteSnapshot(ctx context.Context, snapshotID string, snapshotterCredentials map[string]string) (err error)

   // GetSnapshotStatus returns if a snapshot is ready to use, creation time, and restore size.
   GetSnapshotStatus(ctx context.Context, snapshotID string) (bool, time.Time, int64, error)
}

     向插件发送 GRPC 请求 CreateSnapshotRequest

func (s *snapshot) CreateSnapshot(ctx context.Context, snapshotName string, volume *v1.PersistentVolume, parameters map[string]string, snapshotterCredentials map[string]string) (string, string, time.Time, int64, bool, error) {
	klog.V(5).Infof("CSI CreateSnapshot: %s", snapshotName)
	if volume.Spec.CSI == nil {
		return "", "", time.Time{}, 0, false, fmt.Errorf("CSIPersistentVolumeSource not defined in spec")
	}

	client := csi.NewControllerClient(s.conn)

	driverName, err := csirpc.GetDriverName(ctx, s.conn)
	if err != nil {
		return "", "", time.Time{}, 0, false, err
	}

	req := csi.CreateSnapshotRequest{
		SourceVolumeId: volume.Spec.CSI.VolumeHandle,
		Name:           snapshotName,
		Parameters:     parameters,
		Secrets:        snapshotterCredentials,
	}

	rsp, err := client.CreateSnapshot(ctx, &req)
	if err != nil {
		return "", "", time.Time{}, 0, false, err
	}

	klog.V(5).Infof("CSI CreateSnapshot: %s driver name [%s] snapshot ID [%s] time stamp [%d] size [%d] readyToUse [%v]", snapshotName, driverName, rsp.Snapshot.SnapshotId, rsp.Snapshot.CreationTime, rsp.Snapshot.SizeBytes, rsp.Snapshot.ReadyToUse)
	creationTime, err := ptypes.Timestamp(rsp.Snapshot.CreationTime)
	if err != nil {
		return "", "", time.Time{}, 0, false, err
	}
	return driverName, rsp.Snapshot.SnapshotId, creationTime, rsp.Snapshot.SizeBytes, rsp.Snapshot.ReadyToUse, nil
}

apiVersion: snapshot.storage.k8s.io/v1alpha1
kind: VolumeSnapshotContent
metadata:
  creationTimestamp: "2019-07-04T09:26:16Z"
  finalizers:
  - snapshot.storage.kubernetes.io/volumesnapshotcontent-protection
  generation: 1
  name: snapcontent-c6101f3b-9e3d-11e9-a3dd-080027603363
  resourceVersion: "1244545"
  selfLink: /apis/snapshot.storage.k8s.io/v1alpha1/volumesnapshotcontents/snapcontent-c6101f3b-9e3d-11e9-a3dd-080027603363
  uid: c618764e-9e3d-11e9-a3dd-080027603363
spec:
  csiVolumeSnapshotSource:
    creationTime: 1562232376496710591
    driver: csi-hostpath
    restoreSize: 1073741824
    snapshotHandle: c614f6af-9e3d-11e9-af40-5a33ac1ef10c
  deletionPolicy: Delete
  persistentVolumeRef:
    apiVersion: v1
    kind: PersistentVolume
    name: pvc-aa5ea23e-9e3d-11e9-a3dd-080027603363
    resourceVersion: "1244491"
    uid: afa75231-9e3d-11e9-a3dd-080027603363
  snapshotClassName: csi-hostpath-snapclass
  volumeSnapshotRef:
    apiVersion: snapshot.storage.k8s.io/v1alpha1
    kind: VolumeSnapshot
    name: new-snapshot-demo
    namespace: default
    resourceVersion: "1244542"
    uid: c6101f3b-9e3d-11e9-a3dd-080027603363

总结:

    watch volumesnapshot 和 VolumeSnapshotContent 资源对象进行同步操作

发布了236 篇原创文章 · 获赞 301 · 访问量 38万+

猜你喜欢

转载自blog.csdn.net/zhonglinzhang/article/details/94459996