一个例子搞懂浏览器工作原理

当我们在浏览器中输入一个网址，一个网页就会展现在我们面前。这是个很复杂的过程，但我们只能看到冰山一角。我想用一篇文章，一个 demo，来把浏览器的工作过程梳理一遍。

下面我按步骤由前到后进行梳理。

1. HTTP 请求页面

首先定义一个 HTML 页面

<!DOCTYPE html>
<html>

<head>
    <title>http</title>
    <meta charset="utf-8">
    <link rel="stylesheet" type="text/css" href="http.css">
</head>

<body>
    <p>&lt;mark&gt; 元素用于 <mark>高亮</mark> 文本</p>
    <kbd>cmd</kbd>
    <p>Type the following in the Run dialog: <kbd>cmd</kbd><br />Then click the OK button.</p>
</body>

</html>

然后用 node 写一个简单的后台

const http = require('http')
const url = require('url')
const qs = require('querystring')
const fs = require('fs')

const port = 8088

let server = http.createServer((req, res) => {

let pathname = url.parse(req.url).pathname
let query = url.parse(req.url).query

if (req.method === 'GET') {

    fs.readFile(`${__dirname}/http2.html`, (err, file) => {
        if (err) {
            console.log(err)
        } else {
            res.writeHeader(200, {
                'Content-Type': 'text/html'
            })
            res.write(file)
            res.end()
        }
    })

} else if (req.method === 'POST') {

} else {

}

})

server.listen(port || 8088, () => {
    console.log(`server start at ${port}`)
})

网址是 http://localhost:8088/，使用浏览器进行 http 请求，并得到请求行、请求头部、响应行、响应头部。

请求行与请求头部

GET / HTTP/1.1
Host: localhost:8088
Connection: keep-alive
Cache-Control: max-age=0
Upgrade-Insecure-Requests: 1
User-Agent: Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/74.0.3729.157 Safari/537.36
Accept: text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3
Accept-Encoding: gzip, deflate, br
Accept-Language: zh-CN,zh;q=0.9,en;q=0.8
Cookie: _ga=GA1.1.1064190624.1487806123

响应行与响应头部

HTTP/1.1 200 OK
Content-Type: text/html
Date: Sat, 25 May 2019 03:47:20 GMT
Connection: keep-alive
Transfer-Encoding: chunked

HTTP 请求和响应过程中传输的只有文本。第一行为请求或响应行，下面的为头部。除此之外，请求与响应分别有请求体和响应体，二者的头部与体之间还有一个空行。

这个 HTTP 请求体为空，响应体是 HTML 代码文本，也就是最上方我们定义的那个 HTML 页面的代码。

2. DOM 树构建

HTTP 协议基于 TCP 协议，规定了使用 TCP 协议来传输的文本格式。浏览器发出请求并接收到响应后，就会根据 HTTP 协议解析响应的文本。得到了响应的格式，然后根据格式解析响应体。这里传输的文本格式是 HTML，浏览器通过解析响应体中的 HTML 文本构建 DOM 树。

用 JavaScript 模拟浏览器对于 HTML 的解析过程，具体代码如下:

扫描二维码关注公众号，回复： 6438880 查看本文章

  // 每个函数是一个状态，参数是传入的字符，返回值是下一个状态

////////////////////////////////词法分析    /////////////////////////////////////////////
const EOF = void 0

class StartTagToken {}
class EndTagToken {}
class Attribute {}

function HTMLLexicalParser(syntaxer) {
let state = data
let token = null
let attribute = null

//all state
function data(c) {

    if (c == "<") {
        return tagOpen
    }

    if (c === EOF) {
        emitToken(EOF)
        return data
    } else {
        emitToken(c)
        return data
    }
}

function markupDeclaration(c) {

}

function tagOpen(c) {

    if (c == "/") {
        return endTagOpen
    }

    if (/[a-zA-Z]/.test(c)) {
        token = new StartTagToken()
        token.name = c.toLowerCase()
        return tagName
    }

}

function endTagOpen(c) {
    if (/[a-zA-Z]/.test(c)) {
        token = new EndTagToken()
        token.name = c.toLowerCase()
        return tagName
    }
}

// \u0020 WhiteSpace
function tagName(c) {
    if (c === '/') {
        return selfClosingStartTag
    }
    if (c === '\u0020') {
        return beforeAttributeName
    }
    if (c === '>') {
        emitToken(token)
        return data
    }
    if (/[A-Za-z]/.test(token)) {
        token.name += c.toLowerCase();
        return tagName
    }
}

function selfClosingStartTag(c) {
    if (c === '>') {
        emitToken(token)
        endToken = new EndTagToken()
        endToken.name = token.name
        emitToken(endToken)
        return data
    }
}

function beforeAttributeName(c) {
    if(c === '\u0020'){
        beforeAttributeName
    }
    if (c === '/') {
        return selfClosingStartTag
    }
    if (c === '>') {
        emitToken(token)
        return data
    }
    if (/[a-zA-z]/.test(c)) {
        attribute = new Attribute()
        attribute.name = c.toLowerCase()
        attribute.value = ''
        return attributeName
    }
}

function attributeName(c) {
    if (c === '=') {
        return beforeAttributeValue
    }

    attribute.name += c.toLowerCase()
    return attributeName
}

function beforeAttributeValue(c) {
    if (c === '"') {
        return beforeAttributeValue
    }
    if (/[a-zA-Z]/.test(c)) {
        attribute.value = c
        return attributeValue
    }
}

function attributeValue(c) {
    if (c === '\u0020') {
        token[attribute.name] = attribute.value
        return beforeAttributeName
    }
    if (c === '"') {
        return attributeValue
    }
    attribute.value += c
    return attributeValue
}

// 输出解析好的 token
function emitToken(token) {
    syntaxer.receiveInput(token)
}

// state transition
this.receiveInput = function(char) {
    state = state(char)
}
}


///////////////////////////////语法分析    ////////////////////////////////////////////////


class HTMLDocument {
constructor() {
    this.isDocument = true
    this.childNodes = []
}
}

class Element {
constructor(token) {
    super(token)
    for (let key in token) {
        this[key] = token[key]
    }
    this.childNodes = []
}
}
class Text {
constructor(value) {
    super(value)
    this.value = value || ''
}
}

// syntaxer
function HTMLSyntaticalParser() {
var stack = [new HTMLDocument];
//栈顶元素是当前节点，

this.receiveInput = function(token) {

    if (typeof token === 'string') {
        if (getStackTop(stack) instanceof Text) {
            getStackTop(stack).value += token
        } else {
            let text = new Text(token)
            getStackTop(stack).childNodes.push(text)
            stack.push(text)
        }
    } else if (getStackTop(stack) instanceof Text) {
        stack.pop()
    }

    if (token instanceof StartTagToken) {
        //遇到 start tag 入栈一个节点，当前节点就是这个节点的父节点
        let element = new Element(token)
        getStackTop(stack).childNodes.push(element)
        stack.push(element)
    } else {

    }

    if (token instanceof EndTagToken) {
        return stack.pop()
    } else {

    }

}

this.getOutput = function() {
    return stack[0];
}

function getStackTop(stack) {
    return stack[stack.length - 1]
}
}
///////////////////////////////////////////////////////////////////////////////////////
const html = `<html maaa=a >
<head>
    <title>cool</title>
</head>
<body>
    <img src="a" />
</body>
</html>`

const syntaxer = new HTMLSyntaticalParser()
const lexer = new HTMLLexicalParser(syntaxer)

for (let c of html) {
    lexer.receiveInput(c)
}

console.log(JSON.stringify(syntaxer.getOutput(), null, 2))

3. DOM 树添加 CSS 属性

CSS 首先会经过词法以及语法分析被解析为抽象语法树。然后浏览器会将 DOM 构建过程中构建好的元素与 CSS 文件中规则匹配，为 DOM 树添加样式。这里每生成一个 DOM 节点，便立刻匹配相应的 CSS 规则。

4. 渲染、合成和绘制

渲染：把每一个元素对应的盒变为位图。位图是在内存中建立一张二维表格，把一张图片每一个像素对应的颜色保存进去。
合成：渲染过程不会把子元素绘制到渲染的位图上，合成这个过程就是把一部分子元素渲染到位图上。
绘制：浏览器将把位图合成到最终位图并提交给计算机，由计算机负责显示。这个过程中会有重新绘制的现象。

Daria Tumanova 2017-07-13 08-32-01 .jpg

转载于:https://www.jianshu.com/p/112be5f02299

一个例子搞懂浏览器工作原理

猜你喜欢