Go regular processing

The Go language regexpprovides official support for regular expressions through the standard package

There are three functions in the package to determine whether there is a match, the match returns true, otherwise it returns false, these three functions only have different input sources

func Match(pattern string, b []byte) (matched bool, error error)
func MatchReader(pattern string, r io.RuneReader) (matched bool, error error)
func MatchString(pattern string, s string) (matched bool, error error)

For example, to determine whether the IP

func IsIP (ip string) (b bool) {
	if m, _ := regexp.MatchString("^[0-9]{1,3}\\.[0-9]{1,3}\\.[0-9]{1,3}\\ .[0-9]{1,3}$", ip); !m { //Determine IP by point, return bool is generally used for judgment
		return false
	}
	return true
}
func main() {
	if len(os.Args) == 1 { //Determine the length of the command line
		fmt.Println("Usage: regexp [string]")
		os.Exit(1)
	} else if m, _ := regexp.MatchString("^[0-9]+$", os.Args[1]); m { //Command line parameter format
		fmt.Println("Number")
	} else {
		fmt.Println("Not a number")
	}
}

In other languages, match is used for testing or judgment, and it cannot extract the data of successful matching.

using several other functions

package main

import (
	"fmt"
	"io/ioutil"
	"net/http"
	"regexp"
	"strings"
)
// Demonstrate a crawler here
func main() {
	resp, err := http.Get("http://www.baidu.com")   //请求url
	if err != nil {
		fmt.Println("http get error.")
	}
	defer resp.Body.Close()
	body, err := ioutil.ReadAll(resp.Body) //Read web page code, all in the body tag
	if err != nil {
		fmt.Println("http read error")
		return
	}

	src := string(body)

	// Convert all HTML tags to lowercase
	re, _ := regexp.Compile("\\<[\\S\\s]+?\\>") //Use the function to edit the regular expression to determine the validity of the expression
	src = re.ReplaceAllStringFunc(src, strings.ToLower) //replace literally means that it is a replacement function

	//Remove STYLE
	re, _ = regexp.Compile("\\<style[\\S\\s]+?\\</style\\>")
	src = re.ReplaceAllString(src, "")

	//Remove SCRIPT
	re, _ = regexp.Compile("\\<script[\\S\\s]+?\\</script\\>")
	src = re.ReplaceAllString(src, "")

	//Remove all HTML code in angle brackets and replace with newlines
	re, _ = regexp.Compile("\\<[\\S\\s]+?\\>")
	src = re.ReplaceAllString(src, "\n")

	//remove consecutive newlines
	re, _ = regexp.Compile("\\s{2,}")
	src = re.ReplaceAllString(src, "\n")

	fmt.Println(strings.TrimSpace(src))
}
func (re *Regexp) Find(b []byte) []byte
func (re *Regexp) FindAll(b []byte, n int) [][]byte
func (re *Regexp) FindAllIndex(b []byte, n int) [][]int
func (re *Regexp) FindAllString(s string, n int) []string
func (re *Regexp) FindAllStringIndex(s string, n int) [][]int
func (re *Regexp) FindAllStringSubmatch(s string, n int) [][]string
func (re *Regexp) FindAllStringSubmatchIndex(s string, n int) [][]int
func (re *Regexp) FindAllSubmatch(b []byte, n int) [][][]byte
func (re *Regexp) FindAllSubmatchIndex(b []byte, n int) [][]int
func (re *Regexp) FindIndex(b []byte) (loc []int)
func (re *Regexp) FindReaderIndex(r io.RuneReader) (loc []int)
func (re *Regexp) FindReaderSubmatchIndex(r io.RuneReader) []int
func (re *Regexp) FindString(s string) string
func (re *Regexp) FindStringIndex(s string) (loc []int)
func (re *Regexp) FindStringSubmatch(s string) []string
func (re *Regexp) FindStringSubmatchIndex(s string) []int
func (re *Regexp) FindSubmatch(b []byte) [][]byte
func (re *Regexp) FindSubmatchIndex(b []byte) []int

see the example below

package main

import (
	"fmt"
	"regexp"
)

func main() {
	a := "I am learning Go language"

	re, _ := regexp.Compile("[az]{2,4}") //match 2 to 4 letters

	//find the first one that matches the regular
	one := re.Find([]byte(a))
	fmt.Println("Find:", string(one))

	//Find all slices that match the regularity, if n is less than 0, it means returning all matching strings, otherwise it returns the specified length
	all := re.FindAll([]byte(a), -1)
	fmt.Println("FindAll", all)

	/ / Find the index position, start position and end position that meet the conditions
	index := re.FindIndex([]byte(a))
	fmt.Println("FindIndex", index)

	//Find all index positions that meet the conditions, n is the same as above
	allindex := re.FindAllIndex([]byte(a), -1)
	fmt.Println("FindAllIndex", allindex)

	re2, _ := regexp.Compile("am(.*)lang(.*)")

	//Find Submatch, return an array, the first element is all matched elements, the second element is in the first (), the third is in the second ()
	//The first element of the output below is "am learning Go language"
	//The second element is "learning Go", notice the output that contains spaces
	//The third element is "uage"
	submatch := re2.FindSubmatch([]byte(a))
	fmt.Println("FindSubmatch", submatch)
	for _, v := range submatch {
		fmt.Println (string (v))
	}

	//The definition is the same as FindIndex above
	submatchindex := re2.FindSubmatchIndex([]byte(a))
	fmt.Println(submatchindex)

	//FindAllSubmatch, find all submatches that meet the conditions
	submatchall := re2.FindAllSubmatch([]byte(a), -1)
	fmt.Println(submatchall)

	//FindAllSubmatchIndex, find the index of all word matches
	submatchallindex := re2.FindAllSubmatchIndex([]byte(a), -1)
	fmt.Println(submatchallindex)
}

 

func main() {
	src := []byte(`
		call hello alice
		hello bob
		call hello eve
	`)
	pat := regexp.MustCompile(`(?m)(call)\s+(?P<cmd>\w+)\s+(?P<arg>.+)\s*$`)
	res := []byte{}
	for _, s := range pat.FindAllSubmatchIndex(src, -1) {
		res = pat.Expand(res, []byte("$cmd('$arg')\n"), src, s)
	}
	fmt.Println(string(res))
}

 

Guess you like

Origin http://43.154.161.224:23101/article/api/json?id=326400977&siteId=291194637