一篇边看边写的流水账,适合作为看源码的参考,不适合通过本文迅速了解Vearch
启动
main
函数中通过下面这几句启动router
server, err := router.NewServer(ctx)
server.Start();
http.ListenAndServe("0.0.0.0:"+cast.ToString(port),
复制代码
主要启动函数就在NewServer
和Start
中,们分别来看
NewServer
func NewServer(ctx context.Context) (*Server, error) {
// 1. 初始化client
cli, err := client.NewClient(config.Conf())
addr := config.LocalCastAddr
// 2. 初始化httpserver
httpServerConfig := &netutil.ServerConfig{
Name: "HttpServer",
Addr: util.BuildAddr(addr, config.Conf().Router.Port),
ConnLimit: config.Conf().Router.ConnLimit,
CloseTimeout: time.Duration(config.Conf().Router.CloseTimeout),
}
netutil.SetMode(netutil.RouterModeGorilla) //no need
httpServer := netutil.NewServer(httpServerConfig)
document.ExportDocumentHandler(httpServer, cli)
// 3. 初始化rpcServer
var rpcServer *grpc.Server
if config.Conf().Router.RpcPort > 0 {
lis, err := net.Listen("tcp", util.BuildAddr(addr, config.Conf().Router.RpcPort))
rpcServer = grpc.NewServer()
go func() {
rpcServer.Serve(lis); err
}
}()
document.ExportRpcHandler(rpcServer, cli)
}
routerCtx, routerCancel := context.WithCancel(ctx)
// start router cache
if err := cli.Master().FlushCacheJob(routerCtx); err != nil {
log.Error("Error in Start cache Job,Err:%v", err)
panic(err)
}
// start master job
if config.Conf().Global.MergeRouter {
if err := client.NewWatchServerCache(ctx, cli); err != nil {
log.Error("watcher server cache error,Err:%v", err)
panic(err)
}
}
return &Server{
httpServer: httpServer,
ctx: routerCtx,
cli: cli,
cancelFunc: routerCancel,
rpcServer: rpcServer,
}, nil
}
复制代码
初始化client
函数func NewClient(conf *config.Config)
初始化router的client,client有两个成员,psclient ps
和masterclient master
,ps只有一个指向本client的指针,不知道有啥用
func NewClient(conf *config.Config) (client *Client, err error) {
client = &Client{}
err = client.initPsClient(conf)
err = client.initMasterClient(conf)
return client, err
}
复制代码
ps毛没有重点看initMasterClient
func (client *Client) initMasterClient(conf *config.Config) error {
openStore, err := store.OpenStore("etcd", conf.GetEtcdAddress())
client.master = &masterClient{client: client, Store: openStore, cfg: conf}
masterServer.init(len(conf.Masters))
return nil
}
复制代码
masterclient
有如下几个成员:
- client *Client 指向client
- store.Store // 继承,存etcd地址
- cfg *config.Config // 配置
- once sync.Once //
- cliCache *clientCache // 缓存
初始化函数中,openStore
通过配置读到etcd
地址,etcd
是一个类似zk的分布式一致性配置存储.初始化masterServer,不知道是干嘛,就是把masterServer.total
设为配置的值(好像是记录有几个master)
小结:初始化client就是通过配置,把
etcd
地址存在了masterclient中
初始化httpServer
newServer()
中初始化httpServer段落如下:
httpServerConfig := &netutil.ServerConfig{
Name: "HttpServer",
Addr: util.BuildAddr(addr, config.Conf().Router.Port),
ConnLimit: config.Conf().Router.ConnLimit,
CloseTimeout: time.Duration(config.Conf().Router.CloseTimeout),
}
netutil.SetMode(netutil.RouterModeGorilla) //no need
httpServer := netutil.NewServer(httpServerConfig)
document.ExportDocumentHandler(httpServer, cli)
复制代码
httpServerConfig
包括了name、地址(ip+port)、最大连接数、超时。netutil.NewServer()
函数通过这个配置初始化了一个httpServer
httpServer
是一个Server
类型,有下面这些成员:
- cfg *ServerConfig :配置
- server *http.Server :
- router http.Handler
- rateLimit ratelimit2.RateLimit
- closed int64
追踪一下初始化httpServer的netutil.NewServer()
函数,主要过程如下:
func NewServer(config *ServerConfig) *Server {
s := &Server{
cfg: config,
}
s.createRouter()
s.server = &http.Server{
Handler: s,
}
return s
}
复制代码
函数返回一个Server
类型,Server
类型成员如下:
type Server struct {
cfg *ServerConfig // 配置
server *http.Server // httpServer
router http.Handler
rateLimit ratelimit2.RateLimit
closed int64
}
复制代码
看一下createRouter()
函数做了什么
func (s *Server) createRouter() {
if routerMode == RouterModeGorilla {
s.router = mux.NewRouter()
} else if routerMode == RouterModeHttpRouter {
s.router = httprouter.New()
}
}
复制代码
可以看到,createRouter()
函数根据路由模式为Server初始化了route
成员,mux和httprouter是来自于别的库的东西了
然后看一下ExportDocumentHandler
,通过GorillaExport
函数注册了一堆方法,就可以处理这些请求了
func (handler *DocumentHandler) GorillaExport(masterService *masterService) error {
masterApi := &MasterClusterAPI{masterService : masterService}
// cluster handler
handler.httpServer.HandlesMethods([]string{http.MethodGet}, "/", []netutil.HandleContinued{handler.handleTimeout, masterApi.Auth, handler.handleRouterInfo}, nil)
//cluster handler
handler.httpServer.HandlesMethods([]string{http.MethodGet}, "/clean_lock", []netutil.HandleContinued{handler.handleTimeout, masterApi.Auth, masterApi.cleanLock},nil )
//db,servers handler
handler.httpServer.HandlesMethods([]string{http.MethodGet}, "/list/server", []netutil.HandleContinued{handler.handleTimeout, masterApi.Auth, masterApi.serverList}, nil)
handler.httpServer.HandlesMethods([]string{http.MethodGet}, "/list/db", []netutil.HandleContinued{handler.handleTimeout, masterApi.Auth, masterApi.dbList}, nil)
handler.httpServer.HandlesMethods([]string{http.MethodGet}, "/list/space", []netutil.HandleContinued{handler.handleTimeout, masterApi.Auth, masterApi.spaceList}, nil)
handler.httpServer.HandlesMethods([]string{http.MethodGet}, "/list/partition", []netutil.HandleContinued{handler.handleTimeout, masterApi.Auth, masterApi.partitionList}, nil)
handler.httpServer.HandlesMethods(
...省略...
复制代码
HandlesMethods
方法调用Server.doHandles(method, path, handles, end)
方法,再根据routerMode调用http.HandlerFunc
把方法变成一个handler,然后通过下面这句:
s.router.(*mux.Router).Handle(path, h).Methods(method)
复制代码
handler h就被加入了s.router.handlers[method],router接到method对应的请求就会调用这个handler
初始化rpcServer
初始化rpcServer的代码部分如下
// 3. 初始化rpcServer
var rpcServer *grpc.Server
lis, err := net.Listen("tcp", util.BuildAddr(addr, config.Conf().Router.RpcPort))
rpcServer = grpc.NewServer()
go func() {
rpcServer.Serve(lis); err
}
}()
document.ExportRpcHandler(rpcServer, cli)
routerCtx, routerCancel := context.WithCancel(ctx)
复制代码
这里抄一些注释:
NewServer:
NewServer creates a gRPC server which has no service registered and has not started to accept requests yet.
Serve:
Serve accepts incoming connections on the listener lis, creating a new ServerTransport and service goroutine for each. The service goroutines read gRPC requests and then call the registered handlers to reply to them. Serve returns when lis.Accept fails with fatal errors. lis will be closed when this method returns. Serve will return a non-nil error unless Stop or GracefulStop is called.
ExportRpcHandler是用来注册服务的,doc_http.go
下面RpcHandler
的所有方法就都注册进去了
Start
Start里主要就启动httpServer,没啥说的
业务逻辑
router服务的入口都在doc_rpc.go
里面的方法,我们以Get
方法为例
func (handler *RpcHandler) Get(ctx context.Context, req *vearchpb.GetRequest) (reply *vearchpb.GetResponse, err error) {
defer Cost("get", time.Now())
res, err := handler.deal(ctx, req)
reply, ok := res.(*vearchpb.GetResponse)
return reply, nil
}
复制代码
调用的是deal
方法
func (handler *RpcHandler) deal(ctx context.Context, req Request) (reply interface{}, err error) {
defer func() {
recover()
}()
ctx, cancel := handler.setTimeout(ctx, req.GetHead())
defer func() {
if cancel != nil {
cancel()
}
}()
switch v := req.(type) {
case *vearchpb.GetRequest:
reply = handler.docService.getDocs(ctx, v)
...
}
return reply, nil
}
复制代码
继续去看handler.docService.getDocs(ctx, v)
方法
func (docService *docService) getDocs(ctx context.Context, args *vearchpb.GetRequest) *vearchpb.GetResponse {
ctx, cancel := setTimeOut(ctx, args.Head)
defer cancel()
reply := &vearchpb.GetResponse{Head: newOkHead()}
request := client.NewRouterRequest(ctx, docService.client)
request.SetMsgID().
SetMethod(client.GetDocsHandler).
SetHead(args.Head).
SetSpace().
SetDocsByKey(args.PrimaryKeys).
PartitionDocs()
items := request.Execute()
reply.Head.Params = request.GetMD()
reply.Items = items
return reply
}
复制代码
大概步骤就是构建一个request
,然后通过request.Execute()
发送。
构建RouterRequest
RouterRequest
结构如下:
type routerRequest struct {
ctx context.Context
client *Client // router的client
md map[string]string // key是messageID,
head *vearchpb.RequestHead // 包含用户名,密码,spacename, params
docs []*vearchpb.Document // 物料数组
space *entity.Space // space相当于table
sendMap map[entity.PartitionID]*vearchpb.PartitionData // key是partitionID,value是发送给这个partition的data
// Err if error else nil
Err error
}
复制代码
构造RouterRequest
的语句如下所示,我们一个个看这一串函数吧
request := client.NewRouterRequest(ctx, docService.client).
request.SetMsgID().
SetMethod(client.GetDocsHandler).
SetHead(args.Head).
SetSpace().
SetDocsByKey(args.PrimaryKeys).
PartitionDocs()
复制代码
NewRouterRequest
func (r *routerRequest) SetMsgID() *routerRequest {
r.md[MessageID] = uuid.FlakeUUID()
return r
}
复制代码
给request.md[MessageID]填上一个uuid(Universally Unique Identifier)
SetMethod(client.GetDocsHandler)
func (r *routerRequest) SetMethod(method string) *routerRequest {
r.md[HandlerType] = method
return r
}
复制代码
给request.md[HandlerType]填上一个"GetDocsHandler",表示这个请求是get
SetHead(args.Head)
请求的pb是GetRequest
,其中有一个成员
ResponseHead head = 1;
message ResponseHead {
Error err = 1;
map<string, string> params = 3;
}
复制代码
把这个head给到了routerRequest.head,这个params里是啥以后再看
SetSpace()
func (r *routerRequest) SetSpace() *routerRequest {
r.space, r.Err = r.client.Space(r.ctx, r.head.DbName, r.head.SpaceName)
return r
}
复制代码
追踪Space()
函数
func (client *Client) Space(ctx context.Context, dbName,
spaceName string) (*entity.Space, error) {
return client.Master().Cache().SpaceByCache(ctx, dbName, spaceName)
}
复制代码
router.client.master.cliCache
下面是各种缓存
type clientCache struct {
sync.Map
mc *masterClient
cancel context.CancelFunc
lock sync.Mutex
userCache,
spaceCache,
spaceIDCache,
partitionCache,
serverCache *cache.Cache // 存ps
}
复制代码
SpaceByCache
函数就是通过space name和db从缓存cliCache.spaceCache
里找Space并缓存,如果没有的话就从db里找
func (cliCache *clientCache) SpaceByCache(ctx context.Context, db, space string) (*entity.Space, error) {
key := cacheSpaceKey(db, space)
//从缓存找
get, found := cliCache.spaceCache.Get(key)
if found {
return get.(*entity.Space), nil
}
// 没有找到,就从db里找并加载进缓存
vearchlog.LogErrNotNil(cliCache.reloadSpaceCache(ctx, false, db, space))
// 然后重新从缓存里拿
get, found = cliCache.spaceCache.Get(key)
return get.(*entity.Space), nil
}
复制代码
reloadSpaceCache
函数负责从db中查找space并加入缓存,查找的步骤和关键代码片段如下:
- 从etcd中获取dbID
dbID, err := cliCache.mc.QueryDBName2Id(ctx, db)
复制代码
- 获取space
space, err := cliCache.mc.QuerySpaceByName(ctx, dbID, spaceName)
复制代码
这里也是通过ETCD的GET()接口获取,masterclient有个成员clientv3,可以直接请求etcd
resp, err := store.cli.Get(ctx, prefix, clientv3.WithPrefix())
复制代码
- 存进缓存spaceCache spaceIDcache
cliCache.spaceCache.Set(key, space, cache.NoExpiration)
cliCache.spaceIDCache.Set(cast.ToString(space.Id), space, cache.NoExpiration)
复制代码
SetDocsByKey(args.PrimaryKeys)
GetRequest
pb里有repeated string primary_keys
,
func (r *routerRequest) SetDocsByKey(keys []string) *routerRequest {
r.docs, r.Err = setDocs(keys)
return r
}
复制代码
setDocs
其实就是把routerRequest.docs
设为pb中的primary_keys
数组,docs中每一个Document
的PKey设为primary_keys
数组中的元素
func setDocs(keys []string) (docs []*vearchpb.Document, err error) {
docs = make([]*vearchpb.Document, 0)
for _, key := range keys {
if key == "" {
return nil, errors.New("key can not be null")
}
docs = append(docs, &vearchpb.Document{PKey: key})
}
return docs, nil
}
复制代码
PartitionDocs()
// PartitionDocs split docs into different partition
func (r *routerRequest) PartitionDocs() *routerRequest {
// map_variable := make(map[key_data_type]value_data_type)
dataMap := make(map[entity.PartitionID]*vearchpb.PartitionData)
for _, doc := range r.docs {
partitionID := r.space.PartitionId(murmur3.Sum32WithSeed(cbbytes.StringToByte(doc.PKey), 0))
item := &vearchpb.Item{Doc: doc}
if d, ok := dataMap[partitionID]; ok {
d.Items = append(d.Items, item)
} else {
items := make([]*vearchpb.Item, 0)
d = &vearchpb.PartitionData{PartitionID: partitionID, MessageID: r.GetMsgID(), Items: items}
dataMap[partitionID] = d
d.Items = append(d.Items, item)
}
}
r.sendMap = dataMap
return r
}
复制代码
函数就是构建routerRequest.sendMap
,这个map的key是partitionID,value是要发给这个partition的pb PartitionData
,PartitionData
里有repeated Item items
message Item{
option (gogoproto.goproto_getters) = true;
Error err = 1;
Document doc = 2;
string msg = 3;
}
复制代码
函数首先构建Item,Item.doc设为上一步的doc,然后把doc装进dataMap[partitionID]
构建好了routerRequest
,接下来就准备发送了
发送request
发送的方法入口是func (r *routerRequest) Execute() []*vearchpb.Item
重点是下面这句
err := r.client
.PS()
.GetOrCreateRPCClient(ctx, nodeID)
.Execute(ctx, UnaryHandler, d, replyPartition)
复制代码
其中分为构建rpcClient
和正式发送请求
正式发送请求由Execute
执行,前面的是构建rpcClient
构建rpcClient
看一下GetOrCreateRPCClient
func (ps *psClient) GetOrCreateRPCClient(ctx context.Context, nodeID entity.NodeID) *rpcClient {
value, ok := ps.Client().Master().cliCache.Load(nodeID)
if ok {
return value.(*rpcClient).lastUse()
}
ps.Client().Master().cliCache.lock.Lock()
defer ps.Client().Master().cliCache.lock.Unlock()
value, ok = ps.Client().Master().cliCache.Load(nodeID)
if ok {
return value.(*rpcClient).lastUse()
}
psServer, err := ps.Client().Master().cliCache.ServerByCache(ctx, nodeID)
if err != nil {
log.Error("Master().ServerByCache() err, can not get ps server from master, err: %s", err.Error())
return nilClient
}
client, err := server.NewRpcClient(psServer.Ip + ":" + cast.ToString(psServer.RpcPort))
if err != nil {
log.Error("server.NewRpcClient() err, can not new rpc Client, err: %s", err.Error())
return nilClient
}
if client != nil {
c := &rpcClient{client: client, useTime: time.Now().UnixNano()}
ps.Client().Master().cliCache.Store(nodeID, c)
return c.lastUse()
}
return nilClient
}
复制代码
这个函数返回的是一个rpcClient
类型对象,这应该就是用来和ps通信的。首先通过nodeID
从缓存中拿,没有就构建一个新的,存入缓存并返回,流程是下面这几句
// 通过nodeID获取ps
psServer, err := ps.Client().Master().cliCache.ServerByCache(ctx, nodeID)
// 通过psServer的ip构建一个新的rpcClient
client, err := server.NewRpcClient(psServer.Ip + ":" + cast.ToString(psServer.RpcPort))
c := &rpcClient{client: client, useTime: time.Now().UnixNano()}
// 存入缓存
ps.Client().Master().cliCache.Store(nodeID, c)
return c.lastUse()
复制代码
先看获取psServer
信息
上文说过,router.client.masterclient.cliCache
里面有很多缓存,其中serverCache
存的有psServer,类型是entity.Server
,结构的定义如下:
type Server struct {
ID NodeID `json:"name,omitempty"` //unique name for raft
RpcPort uint16 `json:"rpc_port"`
RaftHeartbeatPort uint16 `json:"raft_heartbeat_port"`
RaftReplicatePort uint16 `json:"raft_replicate_port"`
Ip string `json:"ip,omitempty"`
PartitionIds []PartitionID `json:"p_ids,omitempty"`
Size uint64 `json:"size,omitempty"`
Private bool `json:"private"`
Version *BuildVersion `json:"version"`
}
复制代码
可以看到里面包括了节点的ID,ip,端口等信息,还有PartitionIds[]
数组,猜测是这个节点包含的partitions。
ServerByCache
函数先从缓存中获取psServer,没有就从etcd中取并加入缓存。
我们再看看构建rpcClient
的过程 首先通过server.NewRpcClient
函数构建一个RpcClient
对象, 传入的是psServer的IP+port。RpcClient
结构声明如下
client, err := server.NewRpcClient(psServer.Ip + ":" + cast.ToString(psServer.RpcPort))
type RpcClient struct {
serverAddress []string
clientPool *pool.Pool
concurrent chan bool
concurrentNum int
}
复制代码
然后,通过刚才构建的RpcClient
对象来构建一个rpcClient
对象, 注意这两个不一样,一个大写一个小写。rpcClient
结构如下:
c := &rpcClient{client: client, useTime: time.Now().UnixNano()}
type rpcClient struct {
client *server.RpcClient
useTime int64
_lock sync.RWMutex
}
复制代码
useTime设为初始化时间,至此,rpcClient
初始化完成,里面包含了psServer的地址等信息,下面要做的就是发送了
rpcClient正式发送消息
正式发送消息在函数rpcClient.Execute
中
func (r *rpcClient) Execute(ctx context.Context, servicePath string, args interface{}, reply *vearchpb.PartitionData) error {
if r == nilClient {
return vearchpb.NewError(vearchpb.ErrorEnum_Create_RpcClient_Failed, nil)
}
return r.client.Execute(ctx, servicePath, args, reply)
}
复制代码
调用的又是RpcClient.Execute
(注意大小写)
func (r *RpcClient) Execute(ctx context.Context, servicePath string, args interface{}, reply *vearchpb.PartitionData) (err error) {
...
cli := r.clientPool.Get().(*client.OneClient)
cli.Call(ctx, servicePath, serviceMethod, args, reply)
...
}
复制代码
这里调用的又是smaellnest/rpcx/client/oneclient.go
中的OneClient.Call
方法了,vearch用了RPCX
库