Boost中string的查找用法

头文件

boost/algorithm/string/find.hpp

作用

string的查找有如下API

find_first,在大字符串中查找子串第一次出现时,返回 子串在大串的位置。

ifind_first,在大字符串中查找子串第一次出现时,并且忽略大小,返回 子串在大串的位置。

find_last,在大字符串中查找子串最后一次出现时,返回 子串在大串的位置。

ifind_last,在大字符串中查找子串最后一次出现时,并且忽略大小,返回 子串在大串的位置。

find_nth,大字符串中在第n次出现后,查找子串,返回 子串在大串的位置。

ifind_nth,大字符串中在第n次出现后,查找子串,并且忽略大小,返回 子串在大串的位置。

find_head,查找字符串中在头n个字符,返回 位置

find_tail,查找字符串中在尾n个字符,返回 位置

find_token

find,在大字符串中查找子串第一次出现时,返回 子串在大串的位置。

举例

#include <boost/algorithm/string/find.hpp>
#include <boost/algorithm/string/classification.hpp>
#include <boost/algorithm/string/split.hpp>


#include <boost/test/unit_test.hpp>

#include <string>
#include <vector>
#include <iostream>
#include <iterator>
#include <sstream>
#include <boost/test/test_tools.hpp>

using namespace std;
using namespace boost;

void find_test()
{
    string str1("123abcxXxabcXxXabc321");
    string str2("abc");
    string str3("");
    const char* pch1="123abcxxxabcXXXabc321";
    vector<int> vec1( str1.begin(), str1.end() );

    // find results ------------------------------------------------------------//
    iterator_range<string::iterator> nc_result;
    iterator_range<string::const_iterator> cv_result;
    
    iterator_range<vector<int>::iterator> nc_vresult;
    iterator_range<vector<int>::const_iterator> cv_vresult;

    iterator_range<const char*> ch_result;

    // basic tests ------------------------------------------------------------//


    // find_first
    BOOST_TEST_CHECKPOINT( "find_first" );

    nc_result=find_first( str1, string("abc") );
    BOOST_CHECK( 
        ( (nc_result.begin()-str1.begin()) == 3) &&
        ( (nc_result.end()-str1.begin()) == 6) );

    cv_result=find_first( const_cast<const string&>(str1), str2 );
    BOOST_CHECK( 
        ( (cv_result.begin()-str1.begin()) == 3) &&
        ( (cv_result.end()-str1.begin()) == 6) );

    cv_result=ifind_first( const_cast<const string&>(str1), "xXX" );
    BOOST_CHECK( 
        ( (cv_result.begin()-str1.begin()) == 6) &&
        ( (cv_result.end()-str1.begin()) == 9) );

    ch_result=find_first( pch1, "abc" );
    BOOST_CHECK(( (ch_result.begin() - pch1 ) == 3) && ( (ch_result.end() - pch1 ) == 6 ) );

    // find_last
    BOOST_TEST_CHECKPOINT( "find_last" );
    
    nc_result=find_last( str1, string("abc") );
    BOOST_CHECK( 
        ( (nc_result.begin()-str1.begin()) == 15) &&
        ( (nc_result.end()-str1.begin()) == 18) );

    cv_result=find_last( const_cast<const string&>(str1), str2 );
    BOOST_CHECK( 
        ( (cv_result.begin()-str1.begin()) == 15) &&
        ( (cv_result.end()-str1.begin()) == 18) );

    cv_result=ifind_last( const_cast<const string&>(str1), "XXx" );
    BOOST_CHECK( 
        ( (cv_result.begin()-str1.begin()) == 12) &&
        ( (cv_result.end()-str1.begin()) == 15) );

    ch_result=find_last( pch1, "abc" );
    BOOST_CHECK(( (ch_result.begin() - pch1 ) == 15) && ( (ch_result.end() - pch1 ) == 18 ) );

    // find_nth
    BOOST_TEST_CHECKPOINT( "find_nth" );

    nc_result=find_nth( str1, string("abc"), 1 );
    BOOST_CHECK( 
        ( (nc_result.begin()-str1.begin()) == 9) &&
        ( (nc_result.end()-str1.begin()) == 12) );

    nc_result=find_nth( str1, string("abc"), -1 );
    BOOST_CHECK( 
        ( (nc_result.begin()-str1.begin()) == 15) &&
        ( (nc_result.end()-str1.begin()) == 18) );


    cv_result=find_nth( const_cast<const string&>(str1), str2, 1 );
    BOOST_CHECK( 
        ( (cv_result.begin()-str1.begin()) == 9) &&
        ( (cv_result.end()-str1.begin()) == 12) );

    cv_result=find_nth( const_cast<const string&>(str1), str2, -1 );
    BOOST_CHECK( 
        ( (cv_result.begin()-str1.begin()) == 15) &&
        ( (cv_result.end()-str1.begin()) == 18) );
        
    cv_result=ifind_nth( const_cast<const string&>(str1), "xxx", 1 );
    BOOST_CHECK( 
        ( (cv_result.begin()-str1.begin()) == 12) &&
        ( (cv_result.end()-str1.begin()) == 15) );

    cv_result=ifind_nth( const_cast<const string&>(str1), "xxx", 1 );
    BOOST_CHECK( 
        ( (cv_result.begin()-str1.begin()) == 12) &&
        ( (cv_result.end()-str1.begin()) == 15) );


    ch_result=find_nth( pch1, "abc", 1 );
    BOOST_CHECK(( (ch_result.begin() - pch1 ) == 9) && ( (ch_result.end() - pch1 ) == 12 ) );

    // find_head
    BOOST_TEST_CHECKPOINT( "find_head" );

    nc_result=find_head( str1, 6 );
    BOOST_CHECK( 
        ( (nc_result.begin()-str1.begin()) == 0) &&
        ( (nc_result.end()-str1.begin()) == 6) );

    nc_result=find_head( str1, -6 );
    BOOST_CHECK( 
        ( (nc_result.begin()-str1.begin()) == 0) &&
        ( (str1.end()-nc_result.end()) == 6 ) );

    cv_result=find_head( const_cast<const string&>(str1), 6 );
    BOOST_CHECK( 
        ( (cv_result.begin()-str1.begin()) == 0) &&
        ( (cv_result.end()-str1.begin()) == 6) );

    ch_result=find_head( pch1, 6 );
    BOOST_CHECK( ( (ch_result.begin() - pch1 ) == 0 ) && ( (ch_result.end() - pch1 ) == 6 ) );

    // find_tail
    BOOST_TEST_CHECKPOINT( "find_tail" );

    nc_result=find_tail( str1, 6 );
    BOOST_CHECK( 
        ( (nc_result.begin()-str1.begin()) == 15) &&
        ( (nc_result.end()-str1.begin()) == 21) );

    nc_result=find_tail( str1, -6 );
    BOOST_CHECK( 
        ( (nc_result.begin()-str1.begin()) == 6) &&
        ( (nc_result.end()-str1.begin()) == 21) );


    cv_result=find_tail( const_cast<const string&>(str1), 6 );
    BOOST_CHECK( 
        ( (cv_result.begin()-str1.begin()) == 15) &&
        ( (cv_result.end()-str1.begin()) == 21) );

    ch_result=find_tail( pch1, 6 );
    BOOST_CHECK( ( (ch_result.begin() - pch1 ) == 15 ) && ( (ch_result.end() - pch1 ) == 21 ) );

    // find_token
    BOOST_TEST_CHECKPOINT( "find_token" );

    nc_result=find_token( str1, is_any_of("abc"), token_compress_on );
    BOOST_CHECK( 
        ( (nc_result.begin()-str1.begin()) == 3) &&
        ( (nc_result.end()-str1.begin()) == 6) );

    cv_result=find_token( const_cast<const string&>(str1), is_any_of("abc"), token_compress_on );
    BOOST_CHECK( 
        ( (cv_result.begin()-str1.begin()) == 3) &&
        ( (cv_result.end()-str1.begin()) == 6) );

    string s1("abc def ghi jkl");
    find_iterator<string::iterator> fEnd;

    find_iterator<string::iterator> fxIt = make_find_iterator(s1,
            token_finder(is_alnum(), token_compress_on));
    BOOST_CHECK((fxIt != fEnd) && (*fxIt == string("abc")));
    ++fxIt;
    BOOST_CHECK((fxIt != fEnd) && (*fxIt == string("def")));
    ++fxIt;
    BOOST_CHECK((fxIt != fEnd) && (*fxIt == string("ghi")));
    ++fxIt;
    BOOST_CHECK((fxIt != fEnd) && (*fxIt == string("jkl")));
    ++fxIt;
    BOOST_CHECK(fxIt == fEnd);

    nc_result=find_token( str1, is_any_of("abc"), token_compress_off );
    BOOST_CHECK( 
        ( (nc_result.begin()-str1.begin()) == 3) &&
        ( (nc_result.end()-str1.begin()) == 4) );

    cv_result=find_token( const_cast<const string&>(str1), is_any_of("abc"), token_compress_off );
    BOOST_CHECK( 
        ( (cv_result.begin()-str1.begin()) == 3) &&
        ( (cv_result.end()-str1.begin()) == 4) );

    ch_result=find_token( pch1, is_any_of("abc"), token_compress_off );
    BOOST_CHECK( ( (ch_result.begin() - pch1 ) == 3 ) && ( (ch_result.end() - pch1 ) == 4 ) );

    // generic find
    BOOST_TEST_CHECKPOINT( "generic find" );

    nc_result=find(str1, first_finder(string("abc")));
    BOOST_CHECK( 
        ( (nc_result.begin()-str1.begin()) == 3) &&
        ( (nc_result.end()-str1.begin()) == 6) );

    cv_result=find(const_cast<const string&>(str1), first_finder(str2) );
    BOOST_CHECK( 
        ( (cv_result.begin()-str1.begin()) == 3) &&
        ( (cv_result.end()-str1.begin()) == 6) );

    // multi-type comparison test 
    BOOST_TEST_CHECKPOINT( "multi-type" );

    nc_vresult=find_first( vec1, string("abc") );
    BOOST_CHECK( 
        ( (nc_result.begin()-str1.begin()) == 3) &&
        ( (nc_result.end()-str1.begin()) == 6) );

    cv_vresult=find_first( const_cast<const vector<int>&>(vec1), str2 );
    BOOST_CHECK( 
        ( (cv_result.begin()-str1.begin()) == 3) &&
        ( (cv_result.end()-str1.begin()) == 6) );

    // overflow test
    BOOST_TEST_CHECKPOINT( "overflow" );
    
    nc_result=find_first( str2, string("abcd") );
    BOOST_CHECK( nc_result.begin()==nc_result.end() );
    cv_result=find_first( const_cast<const string&>(str2), string("abcd") );
    BOOST_CHECK( cv_result.begin()==cv_result.end() );

    cv_result=find_head( const_cast<const string&>(str2), 4 );
    BOOST_CHECK( string( cv_result.begin(), cv_result.end() )== string("abc") );
    cv_result=find_tail( const_cast<const string&>(str2), 4 );
    BOOST_CHECK( string( cv_result.begin(), cv_result.end() )== string("abc") );

    // Empty string test
    BOOST_TEST_CHECKPOINT( "empty" );
    
    nc_result=find_first( str3, string("abcd") );
    BOOST_CHECK( nc_result.begin()==nc_result.end() );
    nc_result=find_first( str1, string("") );
    BOOST_CHECK( nc_result.begin()==nc_result.end() );

    cv_result=find_first( const_cast<const string&>(str3), string("abcd") );
    BOOST_CHECK( cv_result.begin()==cv_result.end() );
    cv_result=find_first( const_cast<const string&>(str1), string("") );
    BOOST_CHECK( cv_result.begin()==cv_result.end() ); 

    // iterator_range specific tests
    ostringstream osstr;
    osstr << find_first( str1, "abc" );
    BOOST_CHECK( osstr.str()=="abc" );

}

// test main 
int main( int argc, char* [] )
{
    find_test();
    return 0;
}

源代码

namespace boost {
    namespace algorithm {

//  Generic find -----------------------------------------------//

        //! Generic find algorithm
        /*!
            Search the input using the given finder.

            \param Input A string which will be searched.
            \param Finder Finder object used for searching.
            \return 
                An \c iterator_range delimiting the match. 
                Returned iterator is either \c RangeT::iterator or 
                \c RangeT::const_iterator, depending on the constness of 
                the input parameter.
        */
        template<typename RangeT, typename FinderT>
        inline iterator_range< 
            BOOST_STRING_TYPENAME range_iterator<RangeT>::type>
        find( 
            RangeT& Input, 
            const FinderT& Finder)
        {
            iterator_range<BOOST_STRING_TYPENAME range_iterator<RangeT>::type> lit_input(::boost::as_literal(Input));

            return Finder(::boost::begin(lit_input),::boost::end(lit_input));
        }

//  find_first  -----------------------------------------------//

        //! Find first algorithm
        /*!
            Search for the first occurrence of the substring in the input. 
            
            \param Input A string which will be searched.
            \param Search A substring to be searched for.
            \return 
                An \c iterator_range delimiting the match. 
                Returned iterator is either \c RangeT::iterator or 
                \c RangeT::const_iterator, depending on the constness of 
                the input parameter.

              \note This function provides the strong exception-safety guarantee
        */
        template<typename Range1T, typename Range2T>
        inline iterator_range< 
            BOOST_STRING_TYPENAME range_iterator<Range1T>::type>
        find_first( 
            Range1T& Input, 
            const Range2T& Search)
        {
            return ::boost::algorithm::find(Input, ::boost::algorithm::first_finder(Search));
        }

        //! Find first algorithm ( case insensitive )
        /*!
            Search for the first occurrence of the substring in the input. 
            Searching is case insensitive.
            
            \param Input A string which will be searched.
            \param Search A substring to be searched for.
            \param Loc A locale used for case insensitive comparison
            \return 
                An \c iterator_range delimiting the match. 
                Returned iterator is either \c Range1T::iterator or 
                \c Range1T::const_iterator, depending on the constness of 
                the input parameter.

            \note This function provides the strong exception-safety guarantee
        */
        template<typename Range1T, typename Range2T>
        inline iterator_range< 
            BOOST_STRING_TYPENAME range_iterator<Range1T>::type>
        ifind_first( 
            Range1T& Input, 
            const Range2T& Search,
            const std::locale& Loc=std::locale())
        {
            return ::boost::algorithm::find(Input, ::boost::algorithm::first_finder(Search,is_iequal(Loc)));
        }

//  find_last  -----------------------------------------------//

        //! Find last algorithm
        /*!
            Search for the last occurrence of the substring in the input. 
            
            \param Input A string which will be searched.
            \param Search A substring to be searched for.
            \return 
                An \c iterator_range delimiting the match. 
                Returned iterator is either \c Range1T::iterator or 
                \c Range1T::const_iterator, depending on the constness of 
                the input parameter.

            \note This function provides the strong exception-safety guarantee
        */
        template<typename Range1T, typename Range2T>
        inline iterator_range< 
            BOOST_STRING_TYPENAME range_iterator<Range1T>::type>
        find_last( 
            Range1T& Input, 
            const Range2T& Search)
        {
            return ::boost::algorithm::find(Input, ::boost::algorithm::last_finder(Search));
        }

        //! Find last algorithm ( case insensitive )
        /*!
            Search for the last match a string in the input. 
            Searching is case insensitive.
            
            \param Input A string which will be searched.
            \param Search A substring to be searched for.
            \param Loc A locale used for case insensitive comparison
            \return 
                An \c iterator_range delimiting the match. 
                Returned iterator is either \c Range1T::iterator or 
                \c Range1T::const_iterator, depending on the constness of 
                the input parameter.
        
            \note This function provides the strong exception-safety guarantee    
        */
        template<typename Range1T, typename Range2T>
        inline iterator_range< 
            BOOST_STRING_TYPENAME range_iterator<Range1T>::type>
        ifind_last( 
            Range1T& Input, 
            const Range2T& Search,
            const std::locale& Loc=std::locale())
        {
            return ::boost::algorithm::find(Input, ::boost::algorithm::last_finder(Search, is_iequal(Loc)));
        }

//  find_nth ----------------------------------------------------------------------//

        //! Find n-th algorithm 
        /*!
            Search for the n-th (zero-indexed) occurrence of the substring in the 
            input.         
            
            \param Input A string which will be searched.
            \param Search A substring to be searched for.
            \param Nth An index (zero-indexed) of the match to be found.
                For negative N, the matches are counted from the end of string.
            \return 
                An \c iterator_range delimiting the match. 
                Returned iterator is either \c Range1T::iterator or 
                \c Range1T::const_iterator, depending on the constness of 
                the input parameter.
        */
        template<typename Range1T, typename Range2T>
        inline iterator_range< 
            BOOST_STRING_TYPENAME range_iterator<Range1T>::type>
        find_nth( 
            Range1T& Input, 
            const Range2T& Search,
            int Nth)
        {
            return ::boost::algorithm::find(Input, ::boost::algorithm::nth_finder(Search,Nth));
        }

        //! Find n-th algorithm ( case insensitive ).
        /*!
            Search for the n-th (zero-indexed) occurrence of the substring in the 
            input. Searching is case insensitive.
            
            \param Input A string which will be searched.
            \param Search A substring to be searched for.
            \param Nth An index (zero-indexed) of the match to be found. 
                For negative N, the matches are counted from the end of string.
            \param Loc A locale used for case insensitive comparison
            \return 
                An \c iterator_range delimiting the match. 
                Returned iterator is either \c Range1T::iterator or 
                \c Range1T::const_iterator, depending on the constness of 
                the input parameter.


            \note This function provides the strong exception-safety guarantee
        */
        template<typename Range1T, typename Range2T>
        inline iterator_range< 
            BOOST_STRING_TYPENAME range_iterator<Range1T>::type>
        ifind_nth( 
            Range1T& Input, 
            const Range2T& Search,
            int Nth,
            const std::locale& Loc=std::locale())
        {
            return ::boost::algorithm::find(Input, ::boost::algorithm::nth_finder(Search,Nth,is_iequal(Loc)));
        }

//  find_head ----------------------------------------------------------------------//

        //! Find head algorithm
        /*!
            Get the head of the input. Head is a prefix of the string of the 
            given size. If the input is shorter then required, whole input is considered 
            to be the head.

            \param Input An input string
            \param N Length of the head
                For N>=0, at most N characters are extracted.
                For N<0, at most size(Input)-|N| characters are extracted.
            \return 
                An \c iterator_range delimiting the match. 
                Returned iterator is either \c Range1T::iterator or 
                \c Range1T::const_iterator, depending on the constness of 
                the input parameter.

            \note This function provides the strong exception-safety guarantee
        */
        template<typename RangeT>
        inline iterator_range< 
            BOOST_STRING_TYPENAME range_iterator<RangeT>::type>
        find_head( 
            RangeT& Input, 
            int N)
        {
            return ::boost::algorithm::find(Input, ::boost::algorithm::head_finder(N));
        }

//  find_tail ----------------------------------------------------------------------//

        //! Find tail algorithm
        /*!
            Get the tail of the input. Tail is a suffix of the string of the 
            given size. If the input is shorter then required, whole input is considered 
            to be the tail.

            \param Input An input string
            \param N Length of the tail. 
                For N>=0, at most N characters are extracted.
                For N<0, at most size(Input)-|N| characters are extracted.
            \return 
                An \c iterator_range delimiting the match. 
                Returned iterator is either \c RangeT::iterator or 
                \c RangeT::const_iterator, depending on the constness of 
                the input parameter.


            \note This function provides the strong exception-safety guarantee
        */
        template<typename RangeT>
        inline iterator_range< 
            BOOST_STRING_TYPENAME range_iterator<RangeT>::type>
        find_tail( 
            RangeT& Input, 
            int N)
        {
            return ::boost::algorithm::find(Input, ::boost::algorithm::tail_finder(N));
        }

//  find_token --------------------------------------------------------------------//

        //! Find token algorithm
        /*!
            Look for a given token in the string. Token is a character that matches the
            given predicate.
            If the "token compress mode" is enabled, adjacent tokens are considered to be one match.
            
            \param Input A input string.
            \param Pred A unary predicate to identify a token
            \param eCompress Enable/Disable compressing of adjacent tokens
            \return 
                An \c iterator_range delimiting the match. 
                Returned iterator is either \c RangeT::iterator or 
                \c RangeT::const_iterator, depending on the constness of 
                the input parameter.
        
            \note This function provides the strong exception-safety guarantee    
        */
        template<typename RangeT, typename PredicateT>
        inline iterator_range< 
            BOOST_STRING_TYPENAME range_iterator<RangeT>::type>
        find_token( 
            RangeT& Input,
            PredicateT Pred,
            token_compress_mode_type eCompress=token_compress_off)
        {
            return ::boost::algorithm::find(Input, ::boost::algorithm::token_finder(Pred, eCompress));
        }

    } // namespace algorithm

    // pull names to the boost namespace
    using algorithm::find;
    using algorithm::find_first;
    using algorithm::ifind_first;
    using algorithm::find_last;
    using algorithm::ifind_last;
    using algorithm::find_nth;
    using algorithm::ifind_nth;
    using algorithm::find_head;
    using algorithm::find_tail;
    using algorithm::find_token;

} // namespace boost

猜你喜欢

转载自blog.csdn.net/zhangxiong1985/article/details/84454370
今日推荐