Difference between Go multiple regex lookups

the difference

Go's regexp is a package that uses regular expressions to find and replace strings. There are many ways to find them. Their differences are as follows:

function Parameter Type Can all Can submatch return matching string or index
Find byte array far left no byte array
FindAll byte array all no byte array
FindIndex byte array far left no index
FindAllIndex byte array all no index
FindString string far left no string
FindAllString string all no string
FindAllStringIndex string all no string
FindAllSubmatch byte array all yes byte array
FindAllSubmatchIndex byte array all yes byte array
FindSubmatch byte array far left yes byte array
FindSubmatchIndex byte array far left yes byte array
FindStringSubmatch string far left yes array of strings
FindStringSubmatchIndex string far left yes array of strings
FindAllStringSubmatch string all yes array of strings
FindAllStringSubmatchIndex string all yes array of strings

Byte type matching template Expand

package main_test

import (
	"fmt"
	"regexp"
	"testing"
)

func TestExpand(t *testing.T) {
    
    
	content := []byte(`
	# comment line
	option1: value1
	option2: value2

	# another comment line
	option3: value3
`)

	// 正则匹配:键值对key:value
	pattern := regexp.MustCompile(`(?m)(?P<key>\w+):\s+(?P<value>\w+)$`)

	// 通过命名分组类把"key: value" 转换为 "key=value"
	template := []byte("$key=$value\n")

	result := []byte{
    
    }
	for _, submatches := range pattern.FindAllSubmatchIndex(content, -1) {
    
    
		// 把匹配到的每个键值对应用到对应模板上
		result = pattern.Expand(result, template, content, submatches)
	}
	fmt.Println(string(result))
}

// option1=value1
// option2=value2
// option3=value3

Here, (?P<key>\w+)the group is named key, and then the matched key is output to the result according to the style of the template through Expand. The value is the same, and the FindAllSubmatchIndexresult that must be used is matched.

String type matches template Expand

func TestExpandString(t *testing.T) {
    
    
	content := `
	# comment line
	option1: value1
	option2: value2

	# another comment line
	option3: value3
`

	// 解析正则
	pattern := regexp.MustCompile(`(?m)(?P<key>\w+):\s+(?P<value>\w+)$`)

	// 转换模板
	template := "$key=$value\n"

	result := []byte{
    
    }

	for _, submatches := range pattern.FindAllStringSubmatchIndex(content, -1) {
    
    
		// 应用
		result = pattern.ExpandString(result, template, content, submatches)
	}
	fmt.Println(string(result))
}


Find the first and all left

func TestFind(t *testing.T) {
    
    
	re := regexp.MustCompile(`foo.?`)
	fmt.Printf("%q\n", re.Find([]byte(`seafood fool`)))
}
// "food"


func TestFindAll(t *testing.T) {
    
    
	re := regexp.MustCompile(`foo.?`)
	fmt.Printf("%q\n", re.FindAll([]byte(`seafood fool`), -1))
}
// ["food" "fool"]

Find finds the first one on the far left. The second parameter of FindAll is -1 to find all matches. If the second parameter of FindAll is 1, it is the same as Find.
These two methods do not return submatches.

String type finds the first string on the left

func TestFindString(t *testing.T) {
    
    
	re := regexp.MustCompile(`foo.?`)
	fmt.Printf("%q\n", re.FindString("seafood fool"))
	fmt.Printf("%q\n", re.FindString("meat"))
}

// "food"
// ""

search index

func TestFindAllIndex(t *testing.T) {
    
    
	content := []byte("London")
	re := regexp.MustCompile(`o.`)
	fmt.Println(re.FindAllIndex(content, 1))
	fmt.Println(re.FindAllIndex(content, -1))
}
// [[1 3]]
// [[1 3] [4 6]]

FindAllIndex will return the matching shards (start and end indexes).

Find all of string types

func TestFindAllString(t *testing.T) {
    
    
	re := regexp.MustCompile(`a.`)
	fmt.Println(re.FindAllString("paranormal", -1))
	fmt.Println(re.FindAllString("paranormal", 2))
	fmt.Println(re.FindAllString("graal", -1))
	fmt.Println(re.FindAllString("none", -1))
}
// [ar an al]
// [ar an]
// [aa]
// []

sub match string to find all

func TestFindAllStringSubmatch(t *testing.T) {
    
    
	re := regexp.MustCompile(`a(x*)b`)
	fmt.Printf("%q\n", re.FindAllStringSubmatch("-ab-", -1))
	fmt.Printf("%q\n", re.FindAllStringSubmatch("-axxb-", -1))
	fmt.Printf("%q\n", re.FindAllStringSubmatch("-ab-axb-", -1))
	fmt.Printf("%q\n", re.FindAllStringSubmatch("-axxb-ab-", -1))
}

// [["ab" ""]]
// [["axxb" "xx"]]
// [["ab" ""] ["axb" "x"]]
// [["axxb" "xx"] ["ab" ""]]

submatch string find all indices

func TestFindAllStringSubmatchIndex(t *testing.T) {
    
    
	re := regexp.MustCompile(`a(x*)b`)
	// Indices:
	//    01234567   012345678
	//    -ab-axb-   -axxb-ab-
	fmt.Println(re.FindAllStringSubmatchIndex("-ab-", -1))
	fmt.Println(re.FindAllStringSubmatchIndex("-axxb-", -1))
	fmt.Println(re.FindAllStringSubmatchIndex("-ab-axb-", -1))
	fmt.Println(re.FindAllStringSubmatchIndex("-axxb-ab-", -1))
	fmt.Println(re.FindAllStringSubmatchIndex("-foo-", -1))
}
// [[1 3 2 2]]
// [[1 5 2 4]]
// [[1 3 2 2] [4 7 5 6]]
// [[1 5 2 4] [6 8 7 7]]
// []

sub match byte type find all

func TestFindAllSubmatch(t *testing.T) {
    
    
	re := regexp.MustCompile(`foo(.?)`)
	fmt.Printf("%q\n", re.FindAllSubmatch([]byte(`seafood fool`), -1))
}

// [["food" "d"] ["fool" "l"]]

Submatch byte type query all indexes

func TestFindAllSubmatchIndex(t *testing.T) {
    
    
	content := []byte(`
	# comment line
	option1: value1
	option2: value2
`)
	// Regex pattern captures "key: value" pair from the content.
	pattern := regexp.MustCompile(`(?m)(?P<key>\w+):\s+(?P<value>\w+)$`)
	allIndexes := pattern.FindAllSubmatchIndex(content, -1)
	for _, loc := range allIndexes {
    
    
		fmt.Println(loc)
		fmt.Println("1:", string(content[loc[0]:loc[1]]))
		fmt.Println("2:", string(content[loc[2]:loc[3]]))
		fmt.Println("3:", string(content[loc[4]:loc[5]]))
	}
}

// [18 33 18 25 27 33]
// 1: option1: value1
// 2: option1
// 3: value1
// [35 50 35 42 44 50]
// 1: option2: value2
// 2: option2
// 3: value2

The first index on the left of the sub-matching byte type query

func TestFindIndex(t *testing.T) {
    
    
	content := []byte(`
	# comment line
	option1: value1
	option2: value2
`)
	// Regex pattern captures "key: value" pair from the content.
	pattern := regexp.MustCompile(`(?m)(?P<key>\w+):\s+(?P<value>\w+)$`)

	loc := pattern.FindIndex(content)
	fmt.Println(loc)
	fmt.Println(string(content[loc[0]:loc[1]]))
}

// [18 33]
// option1: value1

String submatch leftmost

func TestFindStringSubmatch(t *testing.T) {
    
    
	re := regexp.MustCompile(`(?s)<title>(.*?)<\/title>`)
	fmt.Printf("%q\n", re.FindStringSubmatch(`<title>
	标题
	</title>`))
}

// ["<title>\n\t标题\n\t</title>" "\n\t标题\n\t"]

reference

https://github.com/google/re2/wiki/Syntax

Guess you like

Origin blog.csdn.net/lilongsy/article/details/131243919