The Go language regexp
provides official support for regular expressions through the standard package
There are three functions in the package to determine whether there is a match, the match returns true, otherwise it returns false, these three functions only have different input sources
func Match(pattern string, b []byte) (matched bool, error error) func MatchReader(pattern string, r io.RuneReader) (matched bool, error error) func MatchString(pattern string, s string) (matched bool, error error)
For example, to determine whether the IP
func IsIP (ip string) (b bool) { if m, _ := regexp.MatchString("^[0-9]{1,3}\\.[0-9]{1,3}\\.[0-9]{1,3}\\ .[0-9]{1,3}$", ip); !m { //Determine IP by point, return bool is generally used for judgment return false } return true }
func main() { if len(os.Args) == 1 { //Determine the length of the command line fmt.Println("Usage: regexp [string]") os.Exit(1) } else if m, _ := regexp.MatchString("^[0-9]+$", os.Args[1]); m { //Command line parameter format fmt.Println("Number") } else { fmt.Println("Not a number") } }
In other languages, match is used for testing or judgment, and it cannot extract the data of successful matching.
using several other functions
package main import ( "fmt" "io/ioutil" "net/http" "regexp" "strings" ) // Demonstrate a crawler here func main() { resp, err := http.Get("http://www.baidu.com") //请求url if err != nil { fmt.Println("http get error.") } defer resp.Body.Close() body, err := ioutil.ReadAll(resp.Body) //Read web page code, all in the body tag if err != nil { fmt.Println("http read error") return } src := string(body) // Convert all HTML tags to lowercase re, _ := regexp.Compile("\\<[\\S\\s]+?\\>") //Use the function to edit the regular expression to determine the validity of the expression src = re.ReplaceAllStringFunc(src, strings.ToLower) //replace literally means that it is a replacement function //Remove STYLE re, _ = regexp.Compile("\\<style[\\S\\s]+?\\</style\\>") src = re.ReplaceAllString(src, "") //Remove SCRIPT re, _ = regexp.Compile("\\<script[\\S\\s]+?\\</script\\>") src = re.ReplaceAllString(src, "") //Remove all HTML code in angle brackets and replace with newlines re, _ = regexp.Compile("\\<[\\S\\s]+?\\>") src = re.ReplaceAllString(src, "\n") //remove consecutive newlines re, _ = regexp.Compile("\\s{2,}") src = re.ReplaceAllString(src, "\n") fmt.Println(strings.TrimSpace(src)) }
func (re *Regexp) Find(b []byte) []byte func (re *Regexp) FindAll(b []byte, n int) [][]byte func (re *Regexp) FindAllIndex(b []byte, n int) [][]int func (re *Regexp) FindAllString(s string, n int) []string func (re *Regexp) FindAllStringIndex(s string, n int) [][]int func (re *Regexp) FindAllStringSubmatch(s string, n int) [][]string func (re *Regexp) FindAllStringSubmatchIndex(s string, n int) [][]int func (re *Regexp) FindAllSubmatch(b []byte, n int) [][][]byte func (re *Regexp) FindAllSubmatchIndex(b []byte, n int) [][]int func (re *Regexp) FindIndex(b []byte) (loc []int) func (re *Regexp) FindReaderIndex(r io.RuneReader) (loc []int) func (re *Regexp) FindReaderSubmatchIndex(r io.RuneReader) []int func (re *Regexp) FindString(s string) string func (re *Regexp) FindStringIndex(s string) (loc []int) func (re *Regexp) FindStringSubmatch(s string) []string func (re *Regexp) FindStringSubmatchIndex(s string) []int func (re *Regexp) FindSubmatch(b []byte) [][]byte func (re *Regexp) FindSubmatchIndex(b []byte) []int
see the example below
package main import ( "fmt" "regexp" ) func main() { a := "I am learning Go language" re, _ := regexp.Compile("[az]{2,4}") //match 2 to 4 letters //find the first one that matches the regular one := re.Find([]byte(a)) fmt.Println("Find:", string(one)) //Find all slices that match the regularity, if n is less than 0, it means returning all matching strings, otherwise it returns the specified length all := re.FindAll([]byte(a), -1) fmt.Println("FindAll", all) / / Find the index position, start position and end position that meet the conditions index := re.FindIndex([]byte(a)) fmt.Println("FindIndex", index) //Find all index positions that meet the conditions, n is the same as above allindex := re.FindAllIndex([]byte(a), -1) fmt.Println("FindAllIndex", allindex) re2, _ := regexp.Compile("am(.*)lang(.*)") //Find Submatch, return an array, the first element is all matched elements, the second element is in the first (), the third is in the second () //The first element of the output below is "am learning Go language" //The second element is "learning Go", notice the output that contains spaces //The third element is "uage" submatch := re2.FindSubmatch([]byte(a)) fmt.Println("FindSubmatch", submatch) for _, v := range submatch { fmt.Println (string (v)) } //The definition is the same as FindIndex above submatchindex := re2.FindSubmatchIndex([]byte(a)) fmt.Println(submatchindex) //FindAllSubmatch, find all submatches that meet the conditions submatchall := re2.FindAllSubmatch([]byte(a), -1) fmt.Println(submatchall) //FindAllSubmatchIndex, find the index of all word matches submatchallindex := re2.FindAllSubmatchIndex([]byte(a), -1) fmt.Println(submatchallindex) }
func main() { src := []byte(` call hello alice hello bob call hello eve `) pat := regexp.MustCompile(`(?m)(call)\s+(?P<cmd>\w+)\s+(?P<arg>.+)\s*$`) res := []byte{} for _, s := range pat.FindAllSubmatchIndex(src, -1) { res = pat.Expand(res, []byte("$cmd('$arg')\n"), src, s) } fmt.Println(string(res)) }