A brief analysis of log-pilot source code

log-pilot is an open source container log collection project of Alibaba, with features such as dynamic scaling and dynamic configuration. Its core principle is: monitor docker events, automatically configure and reload filebeat/fluentd to achieve the effect of log collection and automatic scaling with the dynamic scheduling of the container

The core of the whole project is:

  • Polit structure

  • Politer interface

Polit

Mainly monitor docker container events and obtain information such as container log mount directory, tags, environment variables, etc., and dynamically generate filebeat/fluentd configuration files

type Pilot struct {
    piloter Piloter // Piloter related
    mutex sync.Mutex // Concurrent lock, multiple container events are triggered, whoever grabs the lock first, handles it first
    templ *template.Template // Log collection client configuration file template, log-pilot uses golang's text/template module to render the configuration file
    client *k8s.Client // docker container client, get relevant container information through docker event interface api
    lastReload time.Time // Last configuration file reload time
    reloadChan chan bool // reload notification chan
    stopChan chan bool // stop notification chan
    baseDir string // docker data storage location on the host
    logPrefix []string // Define what character the environment variable starts with to indicate the directory where the application log is located, log-pilot configures the log path location of the application in each container by configuring the environment variable
    createSymlink bool // Whether to create a hard link to associate the log file to be collected
}


Politer

Politer defines some methods that the collection tool needs to operate, and is mainly responsible for the specific operations of enabling , stopping , and reloading the collection tool

type Pilot interface {
    Name() string // "filebeat" and "fluentd" respectively indicate different collection tools
 
    Start() error // Start the collection tool
    Reload() error // Reload configuration file
    Stop() error // Stop the collection tool
 
    GetBaseConf() string // Log collection client configuration file location, such as /etc/filebeat of filebeat
    GetConfHome() string // The unified configuration file directory of the log collection client, such as the location of prospectors.d in filebeat
    GetConfPath(container string) string // specific configuration file path
 
    OnDestroyEvent(container string) error // listen to the container stop event
}

main function

Program entry, command line processing: log collection configuration template designation, log-pilot log level configuration, etc. 

Plot.Run

  • 初始化 Polit 数据,Polit 中包含了对应 filebeat/fluentd 配置模版、dokcer client、并发锁、piloter 对象等
  • 开启容器事件监控
func Run(templ string, baseDir string) error {
p, err := New(templ, baseDir)
....
return p.watch()
}

Pilot.watch

  • 使用 docker api 连接 docker,并watch docker 事件
func (p *Pilot) watch() error {
    ....
     
    err := p.piloter.Start()            // 启动收集工具
    ....
     
    msgs, errs := p.client.Events(ctx, options)  // 接受 docker 事件,返回 chan
 
    go func() {
        ....
 
        for {           // 无限循环获取事件
            select {
            case msg := <-msgs:
                if err := p.processEvent(msg); err != nil {     // 处理 docker 事件
                    log.Errorf("fail to process event: %v,  %v", msg, err)
                }
            ........
        }
    }()
    ....
}

Pilot.processEvent

  docker event 的handler函数

func (p *Pilot) processEvent(msg events.Message) error {
	....
	switch msg.Action {
	case "start", "restart":
            ....
            return p.newContainer(&containerJSON)
	case "destroy", "die":
            ....
	    err := p.delContainer(containerId)
	return nil
}

Pilot.newContainer

  • 处理环境变量/tag标签/mount

  • 渲染配置文键模板,生成新的配置文件并reload生效 

func (p *Pilot) newContainer(containerJSON *types.ContainerJSON) error {
    ....
    // containerJSON 是 docker接口 Client.ContainerInspect 返回的数据类型
 
    container := container(containerJSON)
 
    for _, e := range env {         // 处理环境变量, env由containerJSON 得到
        .....
    }
    // 获取配置文件模板数据
    logConfigs, err := p.getLogConfigs(jsonLogPath, mounts, labels)
    if err != nil {
        return err
    }
    
 
    ....
 
    // 关联 docker 容器中应用日志文件或目录
    p.createVolumeSymlink(containerJSON)
    
 
    // 渲染配置文件模板数据,生成具体的配置文件
    logConfig, err := p.render(id, container, logConfigs)
    if err != nil {
        return err
    }
    //TODO validate config before save
    //log.Debugf("container %s log config: %s", id, logConfig)
    if err = ioutil.WriteFile(p.piloter.GetConfPath(id), []byte(logConfig), os.FileMode(0644)); err != nil {
        return err
    }
    // 重载配置文件
    p.tryReload()
    return nil
}

Pilot.delContainer

  • 渲染配置文键模板,删除配置文件
  • reload 配置文件

func (p *Pilot) delContainer(id string) error {
	p.removeVolumeSymlink(id)

	//fixme refactor in the future
	if p.piloter.Name() == PILOT_FLUENTD {
		clean := func() {
			log.Infof("Try removing log config %s", id)
			if err := os.Remove(p.piloter.GetConfPath(id)); err != nil {
				log.Warnf("removing %s log config failure", id)
				return
			}
			p.tryReload()
		}
		time.AfterFunc(15*time.Minute, clean)
		return nil
	}

	return p.piloter.OnDestroyEvent(id)
}

LogConfig

动态渲染配置文件模板数据集

type LogConfig struct {
    Name         string                 // 日志名
    HostDir      string                 // 日志文件在宿主机上的目录
    ContainerDir string                 // 容器应用日志目录
    Format       string
    FormatConfig map[string]string     
    File         string                 // 具体的日志文件名
    Tags         map[string]string      // 标签数据
    Target       string                 // 索引或者kafka主题
    EstimateTime bool
    Stdout       bool
 
    CustomFields  map[string]string     // 自定义添加日志字段
    CustomConfigs map[string]string     // 自定义配置文件项
}

getLogConfigs

  • 获取配置文件模板渲染数据

parseLogConfig

  • 接续容器数据获得配置文件模板渲染数据

最终生成的filebeat 配置文件

- type: log
  enabled: true
  paths:
      - /host/var/lib/docker/containers/b61b94c9f38eec70df32d45df408ea09ad05987bf4ff92d5d5f2eae3fd9e503d/b61b94c9f38eec70df32d45df408ea09ad05987bf4ff92d5d5f2eae3fd9e503d-json.log*
  scan_frequency: 10s
  fields_under_root: true
 
  docker-json: true
  
  fields:
 
      index: aaa-test
 
      topic: aaa-test
  
      docker_container: k8s_tomcat_tomcat_default_6cc39a2f-2a2b-45a2-94d8-a51faf68dd14_0
 
      k8s_container_name: tomcat
 
      k8s_node_name: cn-hangzhou.172.16.179.195
 
      k8s_pod: tomcat
 
      k8s_pod_namespace: default
 
  tail_files: false
  close_inactive: 2h
  close_eof: false
  close_removed: true
  clean_removed: true
  close_renamed: false

Guess you like

Origin blog.51cto.com/3379770/2641663