etcd的raft实现的存储层详解

源码文件

Storage接口:提供从存储中读取raft日志条目功能。
etcd/raft/storage.go

// Storage is an interface that may be implemented by the application
// to retrieve log entries from storage.
//
// If any Storage method returns an error, the raft instance will
// become inoperable and refuse to participate in elections; the
// application is responsible for cleanup and recovery in this case.
type Storage interface {
    // TODO(tbg): split this into two interfaces, LogStorage and StateStorage.

    // InitialState returns the saved HardState and ConfState information.
    InitialState() (pb.HardState, pb.ConfState, error)
    // Entries returns a slice of log entries in the range [lo,hi).
    // MaxSize limits the total size of the log entries returned, but
    // Entries returns at least one entry if any.
    Entries(lo, hi, maxSize uint64) ([]pb.Entry, error)
    // Term returns the term of entry i, which must be in the range
    // [FirstIndex()-1, LastIndex()]. The term of the entry before
    // FirstIndex is retained for matching purposes even though the
    // rest of that entry may not be available.
    Term(i uint64) (uint64, error)
    // LastIndex returns the index of the last entry in the log.
    LastIndex() (uint64, error)
    // FirstIndex returns the index of the first log entry that is
    // possibly available via Entries (older entries have been incorporated
    // into the latest Snapshot; if storage only contains the dummy entry the
    // first log entry is not available).
    FirstIndex() (uint64, error)
    // Snapshot returns the most recent snapshot.
    // If snapshot is temporarily unavailable, it should return ErrSnapshotTemporarilyUnavailable,
    // so raft state machine could know that Storage needs some time to prepare
    // snapshot and call Snapshot later.
    Snapshot() (pb.Snapshot, error)
}

判断多数集已经提交,leader进行apply raft log的逻辑

// etcd/raft/raft.go
func stepLeader(r *raft, m pb.Message) error {
    ...
    switch m.Type {
        case pb.MsgAppResp:
        ...
        if r.maybeCommit() {
                    r.bcastAppend()
        }
        ...
    }
    ...
}

// maybeCommit attempts to advance the commit index. Returns true if
// the commit index changed (in which case the caller should call
// r.bcastAppend).
func (r *raft) maybeCommit() bool {
    mci := r.prs.Committed()
    return r.raftLog.maybeCommit(mci, r.Term)
}
// etcd/raft/tracker/tracker.go
// Committed returns the largest log index known to be committed based on what
// the voting members of the group have acknowledged.
func (p *ProgressTracker) Committed() uint64 {
    return uint64(p.Voters.CommittedIndex(matchAckIndexer(p.Progress)))
}
// etcd/raft/quorum/joint.go
// CommittedIndex returns the largest committed index for the given joint
// quorum. An index is jointly committed if it is committed in both constituent
// majorities.
func (c JointConfig) CommittedIndex(l AckedIndexer) Index {
    idx0 := c[0].CommittedIndex(l)
    idx1 := c[1].CommittedIndex(l)
    if idx0 < idx1 {
        return idx0
    }
    return idx1
}

猜你喜欢

转载自blog.51cto.com/1196740/2554919