Several methods of data collection used in C #

In the past two days to do data collection, and therefore compiled some method of data acquisition can use to. Because the data I collected relatively simple, so there is no use framework. The more famous of the two frameworks HtmlAgilityPack and Jumony, interest can be studied. Of course, the locomotive acquisition tool is also very convenient, but to pay. Here is finishing the code:

 

   ///  <Summary> 
    /// Html regular process helper
     ///  </ Summary> 
    public  class HtmlRegex 
    { 
        ///  <Summary> 
        /// match all tags Html
         ///  </ Summary> 
        const  String HTMLALLTAG = @ " <[^>] +> | </ [^>] +> " ; 

        ///  <Summary> 
        /// remove all html tags
         ///  </ Summary> 
        ///  <param name = "Content"> original HTML code </ param> 
        ///  <Returns> </ Returns>
        public static string RemoveAllHtml(string content)
        {
            return the Regex.Replace (Content, HTMLALLTAG, "" ); 
        } 

        ///  <Summary> 
        /// The specified content acquired regular match
         ///  </ Summary> 
        ///  <param name = "regStr"> regular </ param> 
        ///  <param name = "Content"> the original HTML code </ param> 
        ///  <param name = "hashtml"> contains HTML tags </ param> 
        ///  <Returns> </ Returns> 
        public  static  String GetStrByRegex ( String regStr, String Content,bool hashtml = true)
        {
            string= Result String .Empty; 
            the Regex REG = new new the Regex (regStr); 
            Match MTH = reg.Match (Content); 

            IF (mth.Success) 
            { 
                Result = mth.Value;
                 IF Result = HtmlRegex.RemoveAllHtml (Result (hashtml!) ); // removing html tags 

            } 
            return Result; 
        } 

        ///  <Summary> 
        /// Gets the specified location html code
         ///  </ Summary> 
        ///  <param name = "start"> start string < / param>
        /// <param name = "end"> end of the string </ param> 
        ///  <param name = "Content"> the original HTML code </ param> 
        ///  <param name = "hasHtml"> contains HTML tags </ param> 
        ///  <Returns> </ Returns> 
        public  static  String GetStrByRegex ( String Start, String End, String Content, BOOL hasHtml = to true ) 
        { 
            String Result = String .Empty;
             String regStr = @ " (? IS) ( " + start + "".?) * ( + End + " ) " ; 
            the Regex REG = new new the Regex (regStr); 
            Match MTH = reg.Match (Content);
             IF (mth.Success) 
            { 
                the Result = mth.Value;
                 IF ! (HasHtml) the Result HtmlRegex.RemoveAllHtml = (Result); // removing html tags 
            }
             return Result; 
        } 

        ///  <Summary> 
        /// Gets a list of string matching
         ///  </ Summary> 
        ///  <param name = "regStr" > regular</param>
        /// <param name="content">原HTML代码</param>
        /// <returns></returns>
        public static List<string> GetStrListByRegex(string regStr, string content)
        {
            List<string> strList = null;
            MatchCollection mc = null;
            try
            {
                Regex reg = new Regex(regStr);
                mc = reg.Matches(content);
                if (mc.Count > 0)
                {
                    strList = new List<string>();
                    for (int i = 0; i < mc.Count; i++)
                    {
                        strList.Add(mc[i].Value);
                    }
                }
            }
            catch
            {
                strList = null;
            }
            return strList;
        }

        /// <summary>
        ///Get a list of matching string
         ///  </ Summary> 
        ///  <param name = "Start"> start string </ param> 
        ///  <param name = "End"> end of the string </ param> 
        ///  <param name = "Content"> the original HTML code </ param> 
        ///  <Returns> </ Returns> 
        public  static List < String > GetStrListByRegex ( String Start, String End, String Content) 
        { 
            List < String > = strlist null ;
            MatchCollection mc = null;
            string regStr = @"(?is)(" + start + ").*?(" + end + ")";
            try
            {
                Regex reg = new Regex(regStr);
                mc = reg.Matches(content);
                if (mc.Count > 0)
                {
                    strList = new List<string>();
                    for (int i = 0; i < mc.Count; i++)
                    {
                        strList.Add(mc[i].Value);
                    }
                }
            }
            catch
            {
                strList = null;
            }
            return strList;
        }

    }

 

Guess you like

Origin www.cnblogs.com/similar/p/8350870.html