源码注解如下
// Copyright 2015 The go-ethereum Authors
// This file is part of the go-ethereum library.
//
// The go-ethereum library is free software: you can redistribute it and/or modify
// it under the terms of the GNU Lesser General Public License as published by
// the Free Software Foundation, either version 3 of the License, or
// (at your option) any later version.
//
// The go-ethereum library is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU Lesser General Public License for more details.
//
// You should have received a copy of the GNU Lesser General Public License
// along with the go-ethereum library. If not, see <http://www.gnu.org/licenses/>.
package p2p
import (
"context"
crand "crypto/rand"
"encoding/binary"
"errors"
"fmt"
mrand "math/rand"
"net"
"sync"
"time"
"github.com/ethereum/go-ethereum/common/mclock"
"github.com/ethereum/go-ethereum/log"
"github.com/ethereum/go-ethereum/p2p/enode"
"github.com/ethereum/go-ethereum/p2p/netutil"
)
const (
// This is the amount of time spent waiting in between redialing a certain node. The
// limit is a bit higher than inboundThrottleTime to prevent failing dials in small
// private networks.
// 重连一个节点的间隔时间,比inboundThrottleTime稍微大一点,防止在小型的私有网络中重连失败
dialHistoryExpiration = inboundThrottleTime + 5*time.Second
// Config for the "Looking for peers" message.
// 拨号状态日志输出频率的最小间隔
dialStatsLogInterval = 10 * time.Second // printed at most this often
// 如果拨号节点超过此限制,则上诉的间隔时间失效
dialStatsPeerLimit = 3 // but not if more than this many dialed peers
// Endpoint resolution is throttled with bounded backoff.
// 节点解析的基础延迟与最大延迟
initialResolveDelay = 60 * time.Second
maxResolveDelay = time.Hour
)
// NodeDialer is used to connect to nodes in the network, typically by using
// an underlying net.Dialer but also using net.Pipe in tests.
// 连接网络中节点的接口
type NodeDialer interface {
Dial(context.Context, *enode.Node) (net.Conn, error)
}
type nodeResolver interface {
Resolve(*enode.Node) *enode.Node
}
// tcpDialer implements NodeDialer using real TCP connections.
// 节点间tcp连接用到的, 实现了NodeDialer接口
type tcpDialer struct {
d *net.Dialer
}
// NodeDialer接口的具体实现
func (t tcpDialer) Dial(ctx context.Context, dest *enode.Node) (net.Conn, error) {
return t.d.DialContext(ctx, "tcp", nodeAddr(dest).String())
}
// 获取节点的网络类型、ip地址、开发端口
func nodeAddr(n *enode.Node) net.Addr {
return &net.TCPAddr{IP: n.IP(), Port: n.TCP()}
}
// checkDial errors:
var (
errSelf = errors.New("is self")
errAlreadyDialing = errors.New("already dialing")
errAlreadyConnected = errors.New("already connected")
errRecentlyDialed = errors.New("recently dialed")
errNotWhitelisted = errors.New("not contained in netrestrict whitelist")
errNoPort = errors.New("node does not provide TCP port")
)
// dialer creates outbound connections and submits them into Server.
// Two types of peer connections can be created:
// 拨号器对外建立连接并且提交给p2p/server.go,即主线程,会与以下两种类型的接口创建连接
//
// - static dials are pre-configured connections. The dialer attempts
// keep these nodes connected at all times.
// - 静态拨号是预置的连接,拨号器总会尝试同此类节点保持连接
//
// - dynamic dials are created from node discovery results. The dialer
// continuously reads candidate nodes from its input iterator and attempts
// to create peer connections to nodes arriving through the iterator.
// - 动态拨号是节点发现的结果。拨号器不断地从其输入迭代器中读取候选节点,并尝试与通过迭代器到达的节
// 点建立对等连接。
//
type dialScheduler struct {
dialConfig
setupFunc dialSetupFunc
wg sync.WaitGroup
cancel context.CancelFunc
ctx context.Context
nodesIn chan *enode.Node
doneCh chan *dialTask
addStaticCh chan *enode.Node
remStaticCh chan *enode.Node
addPeerCh chan *conn
remPeerCh chan *conn
// Everything below here belongs to loop and
// should only be accessed by code on the loop goroutine.
// 以下变量都属于loop循环,只应该在loop循环中访问
dialing map[enode.ID]*dialTask // active tasks
peers map[enode.ID]connFlag // all connected peers
dialPeers int // current number of dialed peers
// The static map tracks all static dial tasks. The subset of usable static dial tasks
// (i.e. those passing checkDial) is kept in staticPool. The scheduler prefers
// launching random static tasks from the pool over launching dynamic dials from the
// iterator.
// 变量static追踪了全部静态拨号的任务。可用的静态拨号任务的自己都在staticPool。调度器偏好
// 启动从staticPool中随机的启动拨号任务,对于动态拨号,则是从迭代器中启动。
static map[enode.ID]*dialTask
staticPool []*dialTask
// The dial history keeps recently dialed nodes. Members of history are not dialed.
// 拨号历史保存最近拨号的节点,此中的节点不会拨号。
history expHeap
historyTimer mclock.Timer
historyTimerTime mclock.AbsTime
// for logStats
lastStatsLog mclock.AbsTime
doneSinceLastLog int
}
type dialSetupFunc func(net.Conn, connFlag, *enode.Node) error
type dialConfig struct {
self enode.ID // our own ID 自身id
maxDialPeers int // maximum number of dialed peers 最大已经拨号节点数
maxActiveDials int // maximum number of active dials 最大活跃拨号数量
netRestrict *netutil.Netlist // IP whitelist, disabled if nil ip白名单
resolver nodeResolver
dialer NodeDialer
log log.Logger
clock mclock.Clock
rand *mrand.Rand
}
// 使用默认值初始化拨号配置
func (cfg dialConfig) withDefaults() dialConfig {
if cfg.maxActiveDials == 0 {
cfg.maxActiveDials = defaultMaxPendingPeers
}
if cfg.log == nil {
cfg.log = log.Root()
}
if cfg.clock == nil {
cfg.clock = mclock.System{}
}
if cfg.rand == nil {
seedb := make([]byte, 8)
crand.Read(seedb)
seed := int64(binary.BigEndian.Uint64(seedb))
cfg.rand = mrand.New(mrand.NewSource(seed))
}
return cfg
}
// 新建一个拨号调度器
func newDialScheduler(config dialConfig, it enode.Iterator, setupFunc dialSetupFunc) *dialScheduler {
d := &dialScheduler{
dialConfig: config.withDefaults(),
setupFunc: setupFunc,
dialing: make(map[enode.ID]*dialTask),
static: make(map[enode.ID]*dialTask),
peers: make(map[enode.ID]connFlag),
doneCh: make(chan *dialTask),
nodesIn: make(chan *enode.Node),
addStaticCh: make(chan *enode.Node),
remStaticCh: make(chan *enode.Node),
addPeerCh: make(chan *conn),
remPeerCh: make(chan *conn),
}
d.lastStatsLog = d.clock.Now()
d.ctx, d.cancel = context.WithCancel(context.Background())
d.wg.Add(2)
go d.readNodes(it)
go d.loop(it)
return d
}
// stop shuts down the dialer, canceling all current dial tasks.
func (d *dialScheduler) stop() {
d.cancel()
d.wg.Wait()
}
// addStatic adds a static dial candidate.
func (d *dialScheduler) addStatic(n *enode.Node) {
select {
case d.addStaticCh <- n:
case <-d.ctx.Done():
}
}
// removeStatic removes a static dial candidate.
func (d *dialScheduler) removeStatic(n *enode.Node) {
select {
case d.remStaticCh <- n:
case <-d.ctx.Done():
}
}
// peerAdded updates the peer set.
func (d *dialScheduler) peerAdded(c *conn) {
select {
case d.addPeerCh <- c:
case <-d.ctx.Done():
}
}
// peerRemoved updates the peer set.
func (d *dialScheduler) peerRemoved(c *conn) {
select {
case d.remPeerCh <- c:
case <-d.ctx.Done():
}
}
// loop is the main loop of the dialer.
func (d *dialScheduler) loop(it enode.Iterator) {
var (
nodesCh chan *enode.Node
historyExp = make(chan struct{}, 1)
)
loop:
for {
// Launch new dials if slots are available.
// 如果插槽可用,则启动新的拨号流程
slots := d.freeDialSlots() // 获取可用的插槽数量
slots -= d.startStaticDials(slots) // 新建n个静态拨号任务 更新插槽数量
if slots > 0 { // 如果仍有空余的插槽, 将新增节点的通道指向nodeCh
nodesCh = d.nodesIn
} else {
nodesCh = nil
}
d.rearmHistoryTimer(historyExp)
d.logStats()
select {
case node := <-nodesCh: // 如果收到新增的节点 那么针对新建一个动态的拨号任务
if err := d.checkDial(node); err != nil {
d.log.Trace("Discarding dial candidate", "id", node.ID(), "ip", node.IP(), "reason", err)
} else {
d.startDial(newDialTask(node, dynDialedConn))
}
case task := <-d.doneCh: // 完成一个拨号任务
id := task.dest.ID()
delete(d.dialing, id)
d.updateStaticPool(id)
d.doneSinceLastLog++
case c := <-d.addPeerCh: //处理新增一个对等节点
if c.is(dynDialedConn) || c.is(staticDialedConn) {
d.dialPeers++
}
id := c.node.ID()
d.peers[id] = c.flags
// Remove from static pool because the node is now connected.
task := d.static[id]
if task != nil && task.staticPoolIndex >= 0 {
d.removeFromStaticPool(task.staticPoolIndex)
}
// TODO: cancel dials to connected peers
case c := <-d.remPeerCh: // 移除一个节点
if c.is(dynDialedConn) || c.is(staticDialedConn) {
d.dialPeers--
}
delete(d.peers, c.node.ID())
d.updateStaticPool(c.node.ID())
case node := <-d.addStaticCh: //新增一个静态节点
id := node.ID()
_, exists := d.static[id]
d.log.Trace("Adding static node", "id", id, "ip", node.IP(), "added", !exists)
if exists {
continue loop
}
task := newDialTask(node, staticDialedConn)
d.static[id] = task
if d.checkDial(node) == nil {
d.addToStaticPool(task)
}
case node := <-d.remStaticCh: // 移除一个静态节点
id := node.ID()
task := d.static[id]
d.log.Trace("Removing static node", "id", id, "ok", task != nil)
if task != nil {
delete(d.static, id)
if task.staticPoolIndex >= 0 {
d.removeFromStaticPool(task.staticPoolIndex)
}
}
case <-historyExp: // 历史记录超时timer
d.expireHistory()
case <-d.ctx.Done(): // 退出loop
it.Close()
break loop
}
}
d.stopHistoryTimer(historyExp)
for range d.dialing {
<-d.doneCh
}
d.wg.Done()
}
// readNodes runs in its own goroutine and delivers nodes from
// the input iterator to the nodesIn channel.
// readNodes方法运行在自己的goroutine中,分发来自输入迭代器的节点至nodesIn通道
func (d *dialScheduler) readNodes(it enode.Iterator) {
defer d.wg.Done()
for it.Next() {
select {
case d.nodesIn <- it.Node():
case <-d.ctx.Done():
}
}
}
// logStats prints dialer statistics to the log. The message is suppressed when enough
// peers are connected because users should only see it while their client is starting up
// or comes back online.
// logStats打印拨号统计至日志中。当连接了足够多的对等点时,该消息将被抑制,因为用户应该只在其客户端
// 启动或重新连接时看到它。
func (d *dialScheduler) logStats() {
now := d.clock.Now()
if d.lastStatsLog.Add(dialStatsLogInterval) > now {
return
}
if d.dialPeers < dialStatsPeerLimit && d.dialPeers < d.maxDialPeers {
d.log.Info("Looking for peers", "peercount", len(d.peers), "tried", d.doneSinceLastLog, "static", len(d.static))
}
d.doneSinceLastLog = 0
d.lastStatsLog = now
}
// rearmHistoryTimer configures d.historyTimer to fire when the
// next item in d.history expires.
// rearmHistoryTimer设定了d.historyTimer,以便在其历史中的下一个元素过期的触发
func (d *dialScheduler) rearmHistoryTimer(ch chan struct{}) {
if len(d.history) == 0 || d.historyTimerTime == d.history.nextExpiry() {
return
}
d.stopHistoryTimer(ch)
d.historyTimerTime = d.history.nextExpiry()
timeout := time.Duration(d.historyTimerTime - d.clock.Now())
d.historyTimer = d.clock.AfterFunc(timeout, func() { ch <- struct{}{} })
}
// stopHistoryTimer stops the timer and drains the channel it sends on.
func (d *dialScheduler) stopHistoryTimer(ch chan struct{}) {
if d.historyTimer != nil && !d.historyTimer.Stop() {
<-ch
}
}
// expireHistory removes expired items from d.history.
// expireHistory移除d.history中过期的元素
func (d *dialScheduler) expireHistory() {
d.historyTimer.Stop()
d.historyTimer = nil
d.historyTimerTime = 0
d.history.expire(d.clock.Now(), func(hkey string) {
var id enode.ID
copy(id[:], hkey)
d.updateStaticPool(id)
})
}
// freeDialSlots returns the number of free dial slots. The result can be negative
// when peers are connected while their task is still running.
// 次函数返回空闲的拨号插槽数量。当对等节点已经连接但他们的调度任务仍在运行的时候,此结果可能为负。
func (d *dialScheduler) freeDialSlots() int {
slots := (d.maxDialPeers - d.dialPeers) * 2
if slots > d.maxActiveDials {
slots = d.maxActiveDials
}
free := slots - len(d.dialing)
return free
}
// checkDial returns an error if node n should not be dialed.
// 检查节点是否能够拨号,如果不能则返回对应错误
func (d *dialScheduler) checkDial(n *enode.Node) error {
if n.ID() == d.self {
return errSelf
}
if n.IP() != nil && n.TCP() == 0 {
// This check can trigger if a non-TCP node is found
// by discovery. If there is no IP, the node is a static
// node and the actual endpoint will be resolved later in dialTask.
return errNoPort
}
if _, ok := d.dialing[n.ID()]; ok {
return errAlreadyDialing
}
if _, ok := d.peers[n.ID()]; ok {
return errAlreadyConnected
}
if d.netRestrict != nil && !d.netRestrict.Contains(n.IP()) {
return errNotWhitelisted
}
if d.history.contains(string(n.ID().Bytes())) {
return errRecentlyDialed
}
return nil
}
// startStaticDials starts n static dial tasks.
// 启动若干个静态拨号任务
func (d *dialScheduler) startStaticDials(n int) (started int) {
for started = 0; started < n && len(d.staticPool) > 0; started++ {
idx := d.rand.Intn(len(d.staticPool))
task := d.staticPool[idx]
d.startDial(task)
d.removeFromStaticPool(idx)
}
return started
}
// updateStaticPool attempts to move the given static dial back into staticPool.
// 此方法尝试将给定的静态拨号任务移动到staticPool中
func (d *dialScheduler) updateStaticPool(id enode.ID) {
task, ok := d.static[id]
if ok && task.staticPoolIndex < 0 && d.checkDial(task.dest) == nil {
d.addToStaticPool(task)
}
}
func (d *dialScheduler) addToStaticPool(task *dialTask) {
if task.staticPoolIndex >= 0 {
panic("attempt to add task to staticPool twice")
}
d.staticPool = append(d.staticPool, task)
task.staticPoolIndex = len(d.staticPool) - 1
}
// removeFromStaticPool removes the task at idx from staticPool. It does that by moving the
// current last element of the pool to idx and then shortening the pool by one.
// removeFromStaticPool移除staticPool中指定索引的任务。此操作移动当前静态拨号池中最后一个元素到指定索引位置,
// 然后将静态拨号池的容量-1
func (d *dialScheduler) removeFromStaticPool(idx int) {
task := d.staticPool[idx]
end := len(d.staticPool) - 1
d.staticPool[idx] = d.staticPool[end]
d.staticPool[idx].staticPoolIndex = idx
d.staticPool[end] = nil
d.staticPool = d.staticPool[:end]
task.staticPoolIndex = -1
}
// startDial runs the given dial task in a separate goroutine.
// 此方法运行指定的拨号任务于独立的goroutine中
func (d *dialScheduler) startDial(task *dialTask) {
d.log.Trace("Starting p2p dial", "id", task.dest.ID(), "ip", task.dest.IP(), "flag", task.flags)
hkey := string(task.dest.ID().Bytes())
d.history.add(hkey, d.clock.Now().Add(dialHistoryExpiration))
d.dialing[task.dest.ID()] = task
go func() {
task.run(d)
d.doneCh <- task
}()
}
// A dialTask generated for each node that is dialed.
type dialTask struct {
staticPoolIndex int
flags connFlag
// These fields are private to the task and should not be
// accessed by dialScheduler while the task is running.
dest *enode.Node
lastResolved mclock.AbsTime
resolveDelay time.Duration
}
func newDialTask(dest *enode.Node, flags connFlag) *dialTask {
return &dialTask{dest: dest, flags: flags, staticPoolIndex: -1}
}
type dialError struct {
error
}
func (t *dialTask) run(d *dialScheduler) {
if t.needResolve() && !t.resolve(d) {
return
}
err := t.dial(d, t.dest)
if err != nil {
// For static nodes, resolve one more time if dialing fails.
if _, ok := err.(*dialError); ok && t.flags&staticDialedConn != 0 {
if t.resolve(d) {
t.dial(d, t.dest)
}
}
}
}
func (t *dialTask) needResolve() bool {
return t.flags&staticDialedConn != 0 && t.dest.IP() == nil
}
// resolve attempts to find the current endpoint for the destination
// using discovery.
// 此方法试图通过发现分布式发现系统寻找目标的endpoint
//
// Resolve operations are throttled with backoff to avoid flooding the
// discovery network with useless queries for nodes that don't exist.
// The backoff delay resets when the node is found.
// 解析操作受到回退限制,以避免对不存在的节点的无用查询导致网络遭受洪泛攻击。当找到节点时,
// 回退延迟会重置。
func (t *dialTask) resolve(d *dialScheduler) bool {
if d.resolver == nil {
return false
}
if t.resolveDelay == 0 {
t.resolveDelay = initialResolveDelay
}
// 如果已经解析过并且当前时间和上一次的解析时间小于最小解析间隔,则返回
if t.lastResolved > 0 && time.Duration(d.clock.Now()-t.lastResolved) < t.resolveDelay {
return false
}
resolved := d.resolver.Resolve(t.dest)
t.lastResolved = d.clock.Now()
if resolved == nil {
t.resolveDelay *= 2
if t.resolveDelay > maxResolveDelay {
t.resolveDelay = maxResolveDelay
}
d.log.Debug("Resolving node failed", "id", t.dest.ID(), "newdelay", t.resolveDelay)
return false
}
// The node was found.
t.resolveDelay = initialResolveDelay
t.dest = resolved
d.log.Debug("Resolved node", "id", t.dest.ID(), "addr", &net.TCPAddr{IP: t.dest.IP(), Port: t.dest.TCP()})
return true
}
// dial performs the actual connection attempt.
// 实际的连接请求由此方法完成
func (t *dialTask) dial(d *dialScheduler, dest *enode.Node) error {
fd, err := d.dialer.Dial(d.ctx, t.dest)
if err != nil {
d.log.Trace("Dial error", "id", t.dest.ID(), "addr", nodeAddr(t.dest), "conn", t.flags, "err", cleanupDialErr(err))
return &dialError{err}
}
mfd := newMeteredConn(fd, false, &net.TCPAddr{IP: dest.IP(), Port: dest.TCP()})
return d.setupFunc(mfd, t.flags, dest)
}
func (t *dialTask) String() string {
id := t.dest.ID()
return fmt.Sprintf("%v %x %v:%d", t.flags, id[:8], t.dest.IP(), t.dest.TCP())
}
func cleanupDialErr(err error) error {
if netErr, ok := err.(*net.OpError); ok && netErr.Op == "dial" {
return netErr.Err
}
return err
}