一、实验说明
https://pdos.csail.mit.edu/6.824/labs/lab-kvraft.html
二、梳理
长时间运行的服务器永远记住完整的Raft日志是不切实际的。本次试验需要修改Raft和kvserver以便协作以节省空间:kvserver会不时地持久存储其当前状态的“快照”,并且Raft会丢弃快照之前的日志条目。服务器重新启动时(或远远落后于领导者,必须赶上来),服务器首先加载快照,然后从创建快照的那一刻开始处理Log Entry
type Persister struct {
mu sync.Mutex
raftstate []byte
snapshot []byte
}
新增
maxraftstate:当前的raft state size 接近maxraftstate的时候,进行SnapShot
appliedRaftLogIndex:已经apply的log index(server执行完一条commod后更新)
type KVServer struct {
...
maxraftstate int
appliedRaftLogIndex int
}
Server
判断是否需要存储快照(waitRaftApply()中)
func (kv *KVServer) shouldTakeSnapshot() bool {
if kv.maxraftstate == -1 {
return false
}
if kv.rf.GetRaftStateSize() >= kv.maxraftstate {
return true
}
return false
}
截断当前logEntry,将数据持久化到Persister(raftstate+snapshot)
func (kv *KVServer) takeSnapshot() {
w := new(bytes.Buffer)
e := labgob.NewEncoder(w)
kv.mu.Lock()
e.Encode(kv.db)
e.Encode(kv.lastAppliedRequestId)
appliedRaftLogIndex := kv.appliedRaftLogIndex
kv.mu.Unlock()
kv.rf.ReplaceLogWithSnapshot(appliedRaftLogIndex, w.Bytes())
}
加载snapshot(StartKVServer())
func (kv *KVServer) installSnapshot(snapshot []byte) {
kv.mu.Lock()
defer kv.mu.Unlock()
if snapshot != nil {
r := bytes.NewBuffer(snapshot)
d := labgob.NewDecoder(r)
if d.Decode(&kv.db) != nil ||
d.Decode(&kv.lastAppliedRequestId) != nil {
}
}
}
if msg.CommandValid == false {
switch msg.Command.(string) {
case "InstallSnapshot":
kv.installSnapshot(msg.CommandData)
}
continue
}
Raft
新增snapshottedIndex(已经快照的index)
type Raft struct {
...
snapshottedIndex int
}
保证下标的准确性(因为LOG被截断过,所以原Index需要减去snapshottedIndex, 才是新的LOG数组的下标),需要替换原有rf.logs根据下标取值的地方
func (rf *Raft) getRelativeLogIndex(index int) int {
return index - rf.snapshottedIndex
}
func (rf *Raft) getAbsoluteLogIndex(index int) int {
return index + rf.snapshottedIndex
}
持久化,通知follower
func (rf *Raft) ReplaceLogWithSnapshot(appliedIndex int, kvSnapshot []byte) {
rf.mu.Lock()
defer rf.mu.Unlock()
if appliedIndex <= rf.snapshottedIndex {
return
}
rf.logs = rf.logs[rf.getRelativeLogIndex(appliedIndex):]
rf.snapshottedIndex = appliedIndex
rf.persister.SaveStateAndSnapshot(rf.encodeRaftState(), kvSnapshot)
for i := range rf.peers {
if i == rf.me {
continue
}
go rf.syncSnapshotWith(i)
}
}
func (rf *Raft) syncSnapshotWith(server int) {
rf.mu.Lock()
if rf.state != Leader {
rf.mu.Unlock()
return
}
args := InstallSnapshotArgs{
Term: rf.currentTerm,
LeaderId: rf.me,
LastIncludedIndex: rf.snapshottedIndex,
LastIncludedTerm: rf.logs[0].Term,
Data: rf.persister.ReadSnapshot(),
}
rf.mu.Unlock()
var reply InstallSnapshotReply
if rf.sendInstallSnapshot(server, &args, &reply) {
rf.mu.Lock()
if reply.Term > rf.currentTerm {
rf.currentTerm = reply.Term
rf.convertTo(Follower)
rf.persist()
} else {
if rf.matchIndex[server] < args.LastIncludedIndex {
rf.matchIndex[server] = args.LastIncludedIndex
}
rf.nextIndex[server] = rf.matchIndex[server] + 1
}
rf.mu.Unlock()
}
}
处理sendInstallSnapshot
func (rf *Raft) InstallSnapshot(args *InstallSnapshotArgs, reply *InstallSnapshotReply) {
rf.mu.Lock()
defer rf.mu.Unlock()
reply.Term = rf.currentTerm
if args.Term < rf.currentTerm || args.LastIncludedIndex < rf.snapshottedIndex {
return
}
if args.Term > rf.currentTerm {
rf.currentTerm = args.Term
rf.convertTo(Follower)
}
lastIncludedRelativeIndex := rf.getRelativeLogIndex(args.LastIncludedIndex)
if len(rf.logs) > lastIncludedRelativeIndex &&
rf.logs[lastIncludedRelativeIndex].Term == args.LastIncludedTerm {
rf.logs = rf.logs[lastIncludedRelativeIndex:]
} else {
rf.logs = []LogEntry{{Term: args.LastIncludedTerm, Command: nil}}
}
rf.snapshottedIndex = args.LastIncludedIndex
if rf.commitIndex < rf.snapshottedIndex {
rf.commitIndex = rf.snapshottedIndex
}
if rf.lastApplied < rf.snapshottedIndex {
rf.lastApplied = rf.snapshottedIndex
}
rf.persister.SaveStateAndSnapshot(rf.encodeRaftState(), args.Data)
if rf.lastApplied > rf.snapshottedIndex {
return
}
installSnapshotCommand := ApplyMsg{
CommandIndex: rf.snapshottedIndex,
Command: "InstallSnapshot",
CommandValid: false,
CommandData: rf.persister.ReadSnapshot(),
}
go func(msg ApplyMsg) {
rf.applyCh <- msg
}(installSnapshotCommand)
}