Clase de ayuda para boost.property_tree parsing xml y resolución de problemas de análisis chino

descripción general

En proyectos anteriores, se usó xml_parser para admitir el análisis sintáctico chino. Debe hacer referencia a los siguientes dos
artículos
: clase de ayuda de boost.property_tree analizando xml y la solución de problemas de análisis sintáctico chino: https://blog.krybot.com/a?ID =00950-284221e4-a6f8-420e-a7dc-ba233446b333

xml_parser.hpp utilizado en el proyecto

/**
 * @file xml_parser.hpp
 *
 * Declares the XML parser class
 * This class encapsulates a collection of operations for property_tree .
 */
#ifndef XML_PARSER_INCLUDED
#define XML_PARSER_INCLUDED

#include<boost/property_tree/ptree.hpp>
#include<boost/property_tree/xml_parser.hpp>
#include <boost/lexical_cast.hpp>
#include <boost/algorithm/string/classification.hpp>  
#include <boost/algorithm/string/split.hpp>  
#include <boost/algorithm/string.hpp> 
#include <boost/property_tree/detail/xml_parser_writer_settings.hpp> 
#include <boost/property_tree/ptree.hpp>
#include <mutex>

using namespace boost;
using namespace boost::property_tree;

#include <map>
#include <vector>
#include <codecvt>
#include <locale>
#include <iostream>

using namespace std;

const wstring XMLATTR = L"<xmlattr>";
const wstring XMLCOMMENT = L"<xmlcomment>";
const wstring XMLATTR_DOT = L"<xmlattr>.";
const wstring XMLCOMMENT_DOT = L"<xmlcomment>.";


/** A class encapsulates a collection of operations for property_tree */
class CXMLParser
{
    
    
public:

	/** Default constructor. */
	CXMLParser() 
	{
    
    
	}

	/** Destructor. */
	~CXMLParser()
	{
    
    
	}

	/**
	 * Reads XML file.
	 *
	 * @param 		  	fileName	 Full path of the input file.
	 * @param [in,out]	pt			The wptree object.
	 */
	void ReadXMLFile(const string& fileName, wptree& pt)
	{
    
    
        ReadXMLFile(to_wstr(fileName), pt);
	}

	/**
	 * Reads XML file.
	 *
	 * @param 		  	fileName	Full path of the input file.
	 * @param [in,out]	pt			The wptree object
	 */
	void ReadXMLFile(const wstring& fileName, wptree& pt)
	{
    
    
        std::wifstream f(to_str(fileName));
        std::locale utf8Locale(std::locale(), new std::codecvt_utf8<wchar_t>);
        f.imbue(utf8Locale);
        //wcout.imbue(std::locale("chs")); //convert for Chinese output format
        property_tree::read_xml(f, pt, property_tree::xml_parser::trim_whitespace);
		f.close();
	}

	/**
	 * Writes XML file.
	 *
	 * @param 		  	fileName	Full path of the input file.
	 * @param [in,out]	pt			The wptree object
	 */
	void WriteXMLFile(const string& fileName, wptree& pt)
	{
    
    
		// for Chinese output
        std::locale current_locale(locale(""), new std::codecvt_utf8<wchar_t>);
        // make out layout formated
        auto settings = property_tree::xml_writer_make_settings<wstring>(L'\t', 1, L"utf-8");
        property_tree::write_xml(fileName, pt, current_locale, settings);
	}

	/**
	 * Writes XML file.
	 *
	 * @param 		  	fileName	Full path of the input file.
	 * @param [in,out]	pt			The wptree object
	 */
	void WriteXMLFile(const wstring& fileName, wptree& pt)
	{
    
    
        WriteXMLFile(to_str(fileName), pt);
	}

	/**
	 * Reads XML content.
	 *
	 * @param 		  	content	The input stringsream content.
	 * @param [in,out]	pt	   	The wptree object
	 */
	void ReadXMLContent(const wstring& content, wptree& pt)
	{
    
    
        std::wstringstream ss;
        ss << content;
        property_tree::read_xml(ss, pt);
	}

	/**
	 * Reads XML content.
	 *
	 * @param 		  	content	The input stringsream content.
	 * @param [in,out]	pt	   	The wptree object
	 */
	void ReadXMLContent(const string& content, wptree& pt)
	{
    
    
        ReadXMLContent(to_wstr(content), pt);
	}

	/**
	 * Writes XML content.
	 *
	 * @param [in,out]	content	The output stringsream content.
	 * @param [in,out]	pt	   	The wptree object
	 */
	void WriteXMLContent(string& content, wptree& pt)
	{
    
    
        wstring wstr;
        WriteXMLContent(wstr, pt);
        content = to_str(wstr);
	}
	
	/**
	 * Writes XML content.
	 *
	 * @param [in,out]	content	The output stringsream content.
	 * @param [in,out]	pt	    The wptree object
	 */
	void WriteXMLContent(wstring& content, wptree& pt)
	{
    
    
        std::wstringstream out;
        property_tree::write_xml(out, pt);
        content = out.str();
	}

	/**
	 * convert UTF-8 string to wstring.
	 *
	 * @param	str	The string to converted.
	 *
	 * @return	Str as a wstring.
	 */
    std::wstring to_wstr(const std::string& str)
	{
    
    
        std::wstring_convert<std::codecvt_utf8<wchar_t> > conv;
#ifdef WIN32
        std::locale::global(std::locale("chs"));
#else
        std::locale::global(std::locale("zh_CN.utf8"));
#endif
        return conv.from_bytes(str);
	}

	/**
	 * convert wstring to UTF-8 string.
	 *
	 * @param	str	The wstring to converted.
	 *
	 * @return	Str as a string.
	 */
    std::string to_str(const std::wstring& str)
	{
    
    
        std::wstring_convert<std::codecvt_utf8<wchar_t> > conv;
        return conv.to_bytes(str);
	}

	/**
	 * Converts a str to an uint16_t.
	 *
	 * @param	str	The wstring to converted.
	 *
	 * @return	Str as an uint16_t.
	 */
	uint16_t to_uint16(const std::wstring& str)
	{
    
    
		std::wstring s = boost::trim_copy(str);
		if (!s.empty())
			return to_type<uint16_t>(str);
		else
			return 0;
	}


	uint32_t to_uint32(const std::wstring& str)
	{
    
    
		std::wstring s = boost::trim_copy(str);
		if (!s.empty())
			return boost::lexical_cast<uint32_t>(str);
		else
			return 0;
	}

	uint64_t to_uint64(const std::wstring& str)
	{
    
    
		std::wstring s = boost::trim_copy(str);
		if (!s.empty())
			return boost::lexical_cast<uint64_t>(str);
		else
			return 0;
		
	}

	/**
	 * Converts a str to a double.
	 *
	 * @param	str	The wstring to converted.
	 *
	 * @return	Str as a double.
	 */
	double to_double(const std::wstring& str)
	{
    
    
        return to_type<double>(str);
	}

	/**
	* Converts a string to a special type.
	*
	* @param	str	The wstring to converted..
	*
	* @return	String as a T.
	*/
    template<typename T>
    T to_type(const std::wstring& str)
    {
    
    
        return boost::lexical_cast<T>(str);
    }

	/**
	 * Converts this object to a double vector.
	 *
	 * @param	str   	The wstring to converted.
	 * @param	any_of	The split strings.
	 *
	 * @return	The given data converted to a double array
	 */
    vector<double> to_doubleVector(const std::wstring& str, const std::wstring& any_of)
    {
    
    
        return to_vector<double>(str, any_of);
    }

    /**
     * Converts this object to a string vector.
     *
     * @param	str   	The wstring to converted.
     * @param	any_of	The split strings.
     *
     * @return	The given data converted to a double array
     */
    vector<std::string> to_stringVector(const std::wstring& str, const std::wstring& any_of)
    {
    
    
        vector<std::string> vec;
        vector<wstring> strVec;
        boost::split(strVec, str, boost::is_any_of(any_of));
        for (auto &str : strVec)
        {
    
    
            const std::string &data = to_str(str);
            vec.push_back(data);
        }
        return vec;
    }

	/**
	* Converts this object to a vector.
	*
	* @param	wstr   	The wstring to converted.
	* @param	any_of	The split strings.
	*
	* @return	The given data converted to an array.
	*/
    template<typename T>
    vector<T> to_vector(const std::wstring& wstr, const std::wstring& any_of)
    {
    
    
        vector<wstring> strVec;
        boost::split(strVec, wstr, boost::is_any_of(any_of));

        vector<T> vec;
        for (auto str : strVec)
        {
    
    
            auto trimed = boost::trim_copy(str);
            if (trimed.empty())
                continue;

            auto data = to_type<T>(trimed);
            vec.push_back(data);
        }
        return vec;
    }

	/**
	* Converts vector to a wstring.
	*
	* @param	vec   	The vector need to convert.
	* @param	any_of	The split strings.
	*
	* @return	The converted string.
	*/
    template<typename T>
    wstring to_wstr(const vector<T>& vec, const string& split)
    {
    
    
        stringstream result;
        std::copy(vec.begin(), vec.end(), std::ostream_iterator<T>(result, split.c_str()));
        string str = result.str();
        boost::trim_right_if(str, boost::is_any_of(split));

        return to_wstr(str);
    }

    wstring int32_to_wstr(const vector<uint32_t>& vec, const string& split){
    
    
        vector<int32_t> vec_32;
        for(auto v: vec){
    
    
            vec_32.push_back(static_cast<int32_t>(v));
        }
        stringstream result;
        std::copy(vec_32.begin(), vec_32.end(), std::ostream_iterator<int32_t>(result, split.c_str()));
        string str = result.str();
        boost::trim_right_if(str, boost::is_any_of(split));
        return to_wstr(str);
    }
	/**
	* Converts array to a wstring.
	*
	* @param	vec   	The arrat need to convert.
	* @param	len   	The array size.
	* @param	any_of	The split strings.
	*
	* @return	The converted string.
	*/
    template<typename T>
    wstring to_wstr(const T* arr, int32_t len, const string& split)
    {
    
    
        vector<T> vec(arr, arr + len);
		
        return to_wstr<T>(vec, split);
    }

	/**
	 * Get list of child nodes.
	 *
	 * @param	root	The root wptree object.
	 * @param	key 	The key of the node.
	 *
	 * @return	Child nodes.
	 */
    auto Descendants(const wptree& root, const wstring& key)->decltype(root.get_child_optional(key))
    {
    
    
        return root.get_child_optional(key);
    }

	/**
	* Get list of child nodes based on child node attributes
	*
	* @param	parant  	The parant wptree object.
	* @param	tagName 	Name of the tag.
	* @param	attrName	Name of the attribute.
	* @param	attrVal 	The attribute value.
	*
	* @return	The childs by attribute.
	*/
    template<typename T>
    vector<wptree> GetChildsByAttr(const wptree& parant, const wstring& tagName, const wstring& attrName, const T& attrVal)
    {
    
    
        vector<wptree> v;

        for (auto& child : parant)
        {
    
    
            if (child.first != tagName)
                continue;

            auto attr = Attribute<T>(child, attrName);

            if (attr&&*attr == attrVal)
                v.push_back(child.second);
        }

        return v;
    }

	/**
	* Gets attribute value of a node.
	*
	* @param	node		The wptree object.
	* @param	attrName	Name of the attribute.
	*
	* @return	Attribute value of a node;
	*/
    template<typename R>
    optional<R> Attribute(const wptree& node, const wstring& attrName)
    {
    
    
        return node.get_optional<R>(XMLATTR_DOT + attrName);
    }

	/**
	 * Gets attribute value of a node, default is wstring.
	 *
	 * @param	node		The wptree object.
	 * @param	attrName	Name of the attribute.
	 *
	 * @return	Attribute value of a node;
	 */
    optional<wstring> Attribute(const wptree& node, const wstring& attrName)
    {
    
    
        return Attribute<wstring>(node, attrName);
    }

	/**
	* Gets attribute value of value_type
	*
	* @param	pair		The pair.
	* @param	attrName	Name of the attribute.
	*
	* @return	Attribute value of value_type
	*/
    template<typename R>
    optional<R> Attribute(const wptree::value_type& pair, const wstring& attrName)
    {
    
    
        if (pair.first == XMLATTR)
            return pair.second.get_optional<R>(attrName);
        else if (pair.first == XMLCOMMENT)
            return optional<R>();
        else
            return pair.second.get_optional<R>(XMLATTR_DOT + attrName);
    }

	/**
	 * Get property value of value_type, default is string.
	 *
	 * @param	pair		The pair.
	 * @param	attrName	Name of the attribute.
	 *
	 * @return	Attribute value of value_type
	 */
    optional<wstring> Attribute(const wptree::value_type& pair, const wstring& attrName)
    {
    
    
        return Attribute<wstring>(pair, attrName);
    }

	/**
	* Gets filed value of value_type
	*
	* @param	pair	 	The pair.
	* @param	fieldName	Name of the field.
	*
	* @return	Attribute value of value_type
	*/
    template<typename R>
    optional<R> FieldValue(const wptree::value_type& pair, const wstring& fieldName)
    {
    
    
        if (pair.first == fieldName)
            return pair.second.data();
        else
            return optional<R>();
    }

	/**
	 * Get filed value of value_type, default is string.
	 *
	 * @param	pair	 	The pair.
	 * @param	fieldName	Name of the field.
	 *
	 * @return	Attribute value of value_type
	 */
    optional<wstring> FieldValue(const wptree::value_type& pair, const wstring& fieldName)
    {
    
    
        return FieldValue<wstring>(pair, fieldName);
    }

	/**
	* Generate a Multimap of <string, ptree>  based on a property
	*
	* @param	root		The root.
	* @param	key			The key.
	* @param	attrName	Name of the attribute.
	*
	* @return	A multimap&lt;wstring,wptree&gt;
	*/
    template<class F = std::function<bool(wstring&)>>
    multimap<wstring, wptree> MakeMapByAttr(const wptree& root, const wstring& key, const wstring& attrName, F predict = [](wstring& str){
    
    return true; })
    {
    
    
        multimap<wstring, wptree> resultMap;
        auto list = Descendants(root, key);
        if (!list)
            return resultMap;

        for (auto& item : *list)
        {
    
    
            auto attr = Attribute(item, attrName);
            if (attr&&predict(*attr))
                resultMap.insert(std::make_pair(*attr, item.second));
        }

        return resultMap;
    }

	/**
	 * Reads file to string.
	 *
	 * @param	path	Full path of the file.
	 *
	 * @return	The file to string.
	 */
    string ReadFileToString(const string& path)
    {
    
    
        wstring wstr = ReadFileToString(to_wstr(path));
        return to_str(wstr);
    }

	/**
	 * Reads file to wstring.
	 *
	 * @param	path	Full pathname of the file.
	 *
	 * @return	The file to wstring.
	 */
    wstring ReadFileToString(const wstring& path)
    {
    
    
        wstring content;
        std::wifstream fin(to_str(path));
        std::locale utf8Locale(std::locale(), new std::codecvt_utf8<wchar_t>);
        fin.imbue(utf8Locale);
        //wcout.imbue(std::locale()); //convert for Chinese output format
        fin.open(to_str(path), ios::in);

        if (fin.is_open())
        {
    
    
            wstringstream ss;
            ss << fin.rdbuf();
            content = ss.str();
            fin.close();
        }
        return content;
    }
};

#endif

Código de prueba test_xml_parser.cpp

El código de prueba se proporciona en el artículo original, y la prueba unitaria también se lleva a cabo en el proyecto.

#include "xml_parser.hpp"

#include "gtest/gtest.h"
#include "gtest/gtest-spi.h"
#include <vector>

using namespace testing;

/*
<?xml version="1.0" encoding="UTF-8"?>
<Root>
<Scenes>
<Scene Name="测试1">
<Name1>"测试2"</Name1>
</Scene>
</Scenes>
</Root>
*/
/** Init crash exporter. */

const std::string filePath = "test1.xml";

class CXMLParserTest : public Test
{
    
    
public:
	CXMLParser parser;
	wptree pt;
private:
	virtual void SetUp()
	{
    
    
		parser.ReadXMLFile(filePath, pt);
	}

	virtual void TearDown()
	{
    
    
	}
};

TEST_F(CXMLParserTest, TestDescendants)
{
    
    
	auto elements = parser.Descendants(pt, L"Root.Scenes");
	EXPECT_EQ(true, !!elements);
}


TEST_F(CXMLParserTest, XMLParserAttributeAndFieldValue)
{
    
    
	auto elements = parser.Descendants(pt, L"Root.Scenes");

	for (auto& element : *elements)
	{
    
    
		auto val = parser.Attribute(element, L"Name");
		if (val)
		{
    
    
			EXPECT_EQ(L"测试1", *val);
			continue;
		}

		for (auto& a : element.second)
		{
    
    
			auto val = parser.FieldValue(a, L"Name1");
			if (val)
			{
    
    
				EXPECT_EQ(L"测试2", *val);
				continue;
			}
		}
	}
}


TEST_F(CXMLParserTest, test_Vector_to_wstring)
{
    
    
	vector<double> vec{
    
     1, 2, 3, 4, 5, 6 };
	string split(";");
	wstring wstr = parser.to_wstr<double>(vec, split);
	EXPECT_EQ(L"1;2;3;4;5;6", wstr);
}

TEST_F(CXMLParserTest, test_Array_to_wstring)
{
    
    
	double values[6] = {
    
     1.1, 2.2, 3.3, 4.4, 5.5, 6.6 };
	string split(";");
	wstring wstr = parser.to_wstr<double>(values, 6, split);
	EXPECT_EQ(L"1.1;2.2;3.3;4.4;5.5;6.6", wstr);
}

TEST_F(CXMLParserTest, test_wstring_to_vector)
{
    
    
	wstring wstr = L"1;2;3;4;5;6";
	wstring split(L";");
	vector<double> vec = parser.to_vector<double>(wstr, split);
	EXPECT_EQ(6, vec.size());
}

TEST_F(CXMLParserTest, test_wstring_to_uint16)
{
    
    
	wstring wstr = L"11";
	uint16_t u = parser.to_uint16(wstr);
	EXPECT_EQ(11, u);
}

TEST_F(CXMLParserTest, test_wstring_to_double)
{
    
    
	wstring wstr = L"11.22";
	double u = parser.to_double(wstr);
	EXPECT_EQ(11.22, u);
}


TEST(CXMLParserTest2, TestReadXMLContent)
{
    
    
	CXMLParser parser;
	wptree pt;
	wstring wstr = parser.to_wstr(filePath);
	wstring content = CUtility::ReadFileToString(wstr);
	parser.ReadXMLContent(content, pt);
	auto elements = parser.Descendants(pt, L"Root.Scenes");
	EXPECT_EQ(true, !!elements);

	for (auto& element : *elements)
	{
    
    
		auto val = parser.Attribute(element, L"Name");
		if (val)
		{
    
    
			EXPECT_EQ(L"测试1", *val);
			continue;
		}

		for (auto& a : element.second)
		{
    
    
			auto val = parser.FieldValue(a, L"Name1");
			if (val)
			{
    
    
				EXPECT_EQ(L"测试2", *val);
				continue;
			}
		}
	}
}

Supongo que te gusta

Origin blog.csdn.net/stallion5632/article/details/125825864
Recomendado
Clasificación