以对etcd键值的操作请求为例介绍下etcd的请求的开始处理到响应的大体流程,etcd中处理的kv请求的handler为keysHandler,由于keysHandler最终作为http请求的处理函数,所以请求开始处理的入口为ServeHTTP:
func (h *keysHandler) ServeHTTP(w http.ResponseWriter, r *http.Request) {
// 检查方法的合法性
if !allowMethod(w, r.Method, "HEAD", "GET", "PUT", "POST", "DELETE") {
return
}
w.Header().Set("X-Etcd-Cluster-ID", h.cluster.ID().String())
// 通过context控制请求处理的时间
ctx, cancel := context.WithTimeout(context.Background(), h.timeout)
defer cancel()
clock := clockwork.NewRealClock()
startTime := clock.Now()
// 解析请求,并检查请求的合法性
rr, err := parseKeyRequest(r, clock)
if err != nil {
writeKeyError(w, err)
return
}
// The path must be valid at this point (we've parsed the request successfully).
if !hasKeyPrefixAccess(h.sec, r, r.URL.Path[len(keysPrefix):], rr.Recursive) {
writeKeyNoAuth(w)
return
}
if !rr.Wait {
reportRequestReceived(rr)
}
// 最终把请求交给server的Do方法去处理,该方法会阻塞请求处理完成,如果是写请求,时间可能会比较长。
resp, err := h.server.Do(ctx, rr)
if err != nil {
err = trimErrorPrefix(err, etcdserver.StoreKeysPrefix)
writeKeyError(w, err)
reportRequestFailed(rr, err)
return
}
// 根据不同的请求类型,把响应返回到客户端
switch {
case resp.Event != nil:
if err := writeKeyEvent(w, resp.Event, h.timer); err != nil {
// Should never be reached
plog.Errorf("error writing event (%v)", err)
}
reportRequestCompleted(rr, resp, startTime)
case resp.Watcher != nil:
ctx, cancel := context.WithTimeout(context.Background(), defaultWatchTimeout)
defer cancel()
handleKeyWatch(ctx, w, resp.Watcher, rr.Stream, h.timer)
default:
writeKeyError(w, errors.New("received response with no Event/Watcher!"))
}
}
EtcdServer的Do方法处理流程根据请求的类型而不同,如果是写请求需要先把请求通过raftNode提交到大多数集群上之后在本地commit,返回响应结果,如果是读请求直接查找响应结果然后返回,如下:
func (s *EtcdServer) Do(ctx context.Context, r pb.Request) (Response, error) {
// 获取一个请求ID,标识一个唯一请求
r.ID = s.reqIDGen.Next()
if r.Method == "GET" && r.Quorum {
r.Method = "QGET"
}
switch r.Method {
case "POST", "PUT", "DELETE", "QGET":
data, err := r.Marshal()
if err != nil {
return Response{}, err
}
// 对于写请求把该请求通过请求ID注册到Wait链表上,并拿到一个获取响应的通道,后面在select中阻塞在这个通道上等待响应结果。
ch := s.w.Register(r.ID)
// TODO: benchmark the cost of time.Now()
// might be sampling?
start := time.Now()
// 把请求提交到raftNode
s.r.Propose(ctx, data)
// 增加当前正在被处理的Propose应用计数。
proposePending.Inc()
defer proposePending.Dec()
select {
// 等待响应结果
case x := <-ch:
proposeDurations.Observe(float64(time.Since(start)) / float64(time.Second))
resp := x.(Response)
return resp, resp.err
// ...
}
case "GET":
// ...
case "HEAD":
// ...
}
}
ch通道响应的返回主要是在EtcdServer的apply中赋值的,在EtcdServer的run方法中,如果有请求条目已经被应用到大多数集群成员上,那个EtcdServer的applyAll方法就会被调用,最终会调用到apply,apply中应用请求条目的相关代码代码如下:
func (s *EtcdServer) applyAll(ep *etcdProgress, apply *apply) {
// ...
s.applyEntries(ep, apply)
// ...
}
func (s *EtcdServer) applyEntries(ep *etcdProgress, apply *apply) {
// ...
var shouldstop bool
if ep.appliedi, shouldstop = s.apply(ents, &ep.confState); shouldstop {
go s.stopWithDelay(10*100*time.Millisecond, fmt.Errorf("the member has been permanently removed from the cluster"))
}
}
func (s *EtcdServer) apply(es []raftpb.Entry, confState *raftpb.ConfState) (uint64, bool) {
var applied uint64
var shouldstop bool
for i := range es {
e := es[i]
// set the consistent index of current executing entry
s.consistIndex.setConsistentIndex(e.Index)
switch e.Type {
// 普通的kv请求
case raftpb.EntryNormal:
// raft state machine may generate noop entry when leader confirmation.
// skip it in advance to avoid some potential bug in the future
if len(e.Data) == 0 {
select {
case s.forceVersionC <- struct{}{}:
default:
}
break
}
var raftReq pb.InternalRaftRequest
if !pbutil.MaybeUnmarshal(&raftReq, e.Data) { // backward compatible
var r pb.Request
pbutil.MustUnmarshal(&r, e.Data)
s.w.Trigger(r.ID, s.applyRequest(r))
} else {
switch {
case raftReq.V2 != nil:
req := raftReq.V2
// 通过Wait.Trigger(reqid, ...)把s.applyRequest(*req)返回的最终响应发送到channel ch,EtcdServer结束在Do函数中的等待。
s.w.Trigger(req.ID, s.applyRequest(*req))
default:
s.w.Trigger(raftReq.ID, s.applyV3Request(&raftReq))
}
}
// 集群配置变更请求,如节点变更
case raftpb.EntryConfChange:
// ...
}
atomic.StoreUint64(&s.r.index, e.Index)
atomic.StoreUint64(&s.r.term, e.Term)
applied = e.Index
}
return applied, shouldstop
}