Recently character recognition project need to get a picture of a Chinese character width, so in order for the text to be typeset in space. OCR text recognition obtained by the current to each word of the x, y coordinate data, then the width of one word will be equal to a kanji characters x a x subtracted before. But again, numbers and letters abc 123 is certainly smaller than the width of a character's width, then it is necessary to determine whether the two adjacent characters as Chinese characters.
- View Chinese character encoding of Unicode range: https://unicode-table.com/cn/blocks/cjk-unified-ideographs/
public class NewBehaviourScript : MonoBehaviour {
private string str = "[问题12]:相邻的都是汉字吗?";
void Start()
{
//IsChineseCharacterMethod1(str);
IsChineseCharacterMethod2(str);
}
/// <summary>
/// 通过ASCII码判断,汉字的ASCII码大于127
/// </summary>
/// <param name="str"></param>
private void IsChineseCharacterMethod1(string str)
{
for (int i = 1; i < str.Length; i++)
{
if(str[i-1]>127&&str[i]>127)
{
Debug.Log("\""+str[i - 1] + "\"与\"" + str[i] + "\"相邻,都是汉字");
}
else
{
Debug.Log("\"" + str[i - 1] + "\"与\"" + str[i] + "\"相邻,但不都是汉字");
}
}
}
/// <summary>
/// 通过UNICODE 编码范围判断
/// </summary>
/// <param name="str"></param>
private void IsChineseCharacterMethod2(string str)
{
for (int i = 1; i < str.Length; i++)
{
if (str[i - 1] >= 0x4e00&& str[i - 1]<= 0x9fff && str[i] >=0x4e00&& str[i] <= 0x9fff)
{
Debug.Log("\"" + str[i - 1] + "\"与\"" + str[i] + "\"相邻,都是汉字");
}
else
{
Debug.Log("\"" + str[i - 1] + "\"与\"" + str[i] + "\"相邻,但不都是汉字");
}
}
}
// Update is called once per frame
void Update () {
}
}
Results are as follows: