The text is split into phonetic get the first letter (return more than one pronunciation)

using System;
using System.Collections.Generic;
using System.Linq;
using System.Text;
using System.Text.RegularExpressions;

namespace PinYin
{
    class Pinyin
    {
        ///  <Summary>  
        /// retrieved list of strings specified phonetic index that match a string cnStr 
         ///  </ Summary>  
        ///  <param name = "cnStr"> kanji character string </ param>  
        // /  <Returns> corresponding to the first letter string </ Returns>  
        public  static  String [] GetSpellCode ( String cnStr)
        {           
            CnStr = Regex.Replace(CnStr, "\\s", "");

            string ReturnStr = ResolvePinyinString(CnStr);

            string[] strArray = ReturnStr.Split(",;".ToCharArray());

            return strArray;
        }

        Private  static  string ResolvePinyinString ( string HanZiStr)
         // Get phonetic alphabet characters strings containing polyphone 
        {
             int I, J, K, m;
             string tmpStr;
             string returnStr = "" ;   // returns the final result of the character string 
            string [] tmpArr;
             for (I = 0 ; I <HanZiStr.Length; I ++ )
            {    // process kanji character string, each character of the first letter of the next cycle 
                tmpStr GetCharSpellCode = (( char ) HanZiStr [i]);    // Get the i-th first letter of alphabet characters, may be one or more of 
                IF (tmpStr.Length> 0 )
                {    // case first letter of alphabet characters exist only to operate 
                    IF (returnStr! = "" )
                    {    // is not the first character 
                        the Regex REGEX = new new the Regex ( " , " );
                        tmpArr = regex.Split(returnStr);
                        returnStr = "";
                        for (k = 0; k < tmpArr.Length; k++)
                        {
                            for (j = 0 ; j <tmpStr.Length; j ++)     // for the first letter of each return spliced 
                            {
                                 String charCode = tmpStr [j] .ToString (); // remove the j-th alphabet 
                                returnStr + = tmpArr [K] + charCode + " , " ;
                            }
                        }
                        if (returnStr != "")
                            returnStr = returnStr.Substring(0, returnStr.Length - 1);
                    }
                    the else 
                    {    // configured first character return results 
                        for (m = 0 ; m <tmpStr.Length - . 1 ; m ++ )
                            returnStr += tmpStr[m] + ",";
                        returnStr += tmpStr[tmpStr.Length - 1];
                    }
                }
            }
            return returnStr;    // returns the processing result string to separate each combination of phonetic 
        }


        ///  <Summary>  
        /// Get a single pinyin characters corresponding to the first character string,
         ///  </ Summary>  
        ///  <param name = "CnChar"> Single Character </ param>  
        ///  <Returns> single capital letters </ Returns>  
        Private  static  String GetCharSpellCode ( char Hanzi)      
        {
            // here a collection of more than 375 sound words 
            String MultiPinyin = "19969:DZ,19975:WM,19988:QJ,20048:YL,20056:SC,20060:NM,20094:QG,20127:QJ,20167:QC,20193:YG,20250:KH,20256:ZC,20282:SC,20285:QJG,20291:TD,20314:YD,20340:NE,20375:TD,20389:YJ,20391:CZ,20415:PB,20446:YS,20447:SQ,20504:TC,20608:KG,20854:QJ,20857:ZC,20911:PF,20504:TC,20608:KG,20854:QJ,20857:ZC,20911:PF,20985:AW,21032:PB,21048:XQ,21049:SC,21089:YS,21119:JC,21242:SB,21273:SC,21305:YP,21306:QO,21330:ZC,21333:SDC,21345:QK,21378:CA,21397:SC,21414:XS,21442:SC,21477:JG,21480:TD,21484:ZS,21494:YX,21505:YX,21512:HG,21523:XH,21537:PB,21542:PF,21549:KH,21571:E,21574:DA,21588:TD,21589:O,21618:ZC,21621:KHA,21632:ZJ,21654:KG,21679:LKG,21683:KH,21710:A,21719:YH,21734:WOE,21769:A,21780:WN,21804:XH,21834:A,21899:ZD,21903:RN,21908:WO,21939:ZC,21956:SA,21964:YA,21970:TD,22003:A,22031:JG,22040:XS,22060:ZC,22066:ZC,22079:MH,22129:XJ,22179:XA,22237:NJ,22244:TD,22280:JQ,22300:YH,22313:XW,22331:YQ,22343:YJ,22351:PH,22395:DC,22412:TD,22484:PB,22500:PB,22534:ZD,22549:DH,22561:PB,22612:TD,22771:KQ,22831:HB,22841:JG,22855:QJ,22865:XQ,23013:ML,23081:WM,23487:SX,23558:QJ,23561:YW,23586:YW,23614:YW,23615:SN,23631:PB,23646:ZS,23663:ZT,23673:YG,23762:TD,23769:ZS,23780:QJ,23884:QK,24055:XH,24113:DC,24162:ZC,24191:GA,24273:QJ,24324:NL,24377:TD,24378:QJ,24439:PF,24554:ZS,24683:TD,24694:WE,24733:LK,24925:TN,25094:ZG,25100:XQ,25103:XH,25153:PB,25170:PB,25179:KG,25203:PB,25240:ZS,25282:FB,25303:NA,25324:KG,25341:ZY,25373:WZ,25375:XJ,25384:A,25457:A,25528:SD,25530:SC,25552:TD,25774:ZC,25874:ZC,26044:YW,26080:WM,26292:PB,26333:PB,26355:ZY,26366:CZ,26397:ZC,26399:QJ,26415:ZS,26451:SB,26526:ZC,26552:JG,26561:TD,26588:JG,26597:CZ,26629:ZS,26638:YL,26646:XQ,26653:KG,26657:XJ,26727:HG,26894:ZC,26937:ZS,26946:ZC,26999:KJ,27099:KJ,27449:YQ,27481: XS, 27542: ZS, 27663: ZS, 27748: TS 27784: SC, 27788: ZD, 27795: TD 27812: O, 27850: PB 27852: MB 27895: SL 27898: PL, 27973: QJ, 27981: KH, 27986: HX, 27994: XJ, 28044: YC, 28065: WG 28177: SM, 28267: QJ, 28291: KH, 28337: ZQ, 28463: TL, DC 28548: 28601: TD, 28689: PB 28805: JG, 28820: QG, 28846: PB 28952: TD 28975: ZC, 29100: A, 29325: QJ, 29575: SL 29602: FB, 30010: TD 30044: CX, 30058: PF 30091: YSP, 30111: YN 30229: XJ, 30427: SC, 30465: SX, 30631: YQ, 30655: QJ, 30684: QJG, SD 30707: 30729: settled, LG 30796: 30917: PB, 31074: NM, 31085: JZ, 31109: SC, 31181: ZC, 31192: MLB, 31293: JQ, 31400: YX 31584: YJ, 31896: ZN 31909: ZY, 31995: XJ, 32321: PF, 32327: ZY, 32418: HG, 32420: XQ, 32421: HG, LG 32438: 32473: GJ, 32488: TD 32521: QJ, 32527: PB 32562: ZSQ, 32564: JZ, 32735: ZD, 32793: PB, 33071: PF, 33098: XL 33100: FLY, 33152: PB 33261: CX, 33324: BP, 33333: TD 33406: FLY, 33426: WM, 33432: PB 33445: JG, 33486: ZN, 33493: TS 33507: QJ, 33540: QJ, 33544: ZC, 33564: XQ, 33617: YT, 33632: QJ, 33636: settled,33 637: YX, 33,694: VA, 33705: PF, 33 728: IS, 33 882: SR, 34 067: WM, 34 074: IS, 34121: QJ, 34 255: ZC, 34 259: XL, 34 425: JH, 34 430: XH, 34 485: KH, 34 503: YS, 34 532: HG, 34 552: XS, 34 558: YE, 34 593: ZL, 34 660: YQ, 34 892: XH, 34 928: SC, 34999: QJ, 35048: PB, 35059: SC, 35098: ZC, 35203: TQ, 35 265: JX, 35 299: JX, 35 782: SZ, 35 828: YS, 35 830: E, 35 843: TD, 35 895: NT, 35 977: MH, 36 158: JG, 36 228: QJ, 36426: xq, 36 466: DC, 36 710: JC, 36 711: ZYG, 36767: PB, 36866: SK, 36 951: iS, 37034: YX, 37063: XH, 37 218: ZC, 37 325: ZC, 38063: PB, 38079: TD, 38 085: Q, 38107: DC, 38116: TD, 38 123: YD, 38 224: HG, 38 241: XTC, 38,271: ZC, 38 415: YE, 38 426: KH, 38 461: YD, 38 463: AE, 38 466: PB, 38 477: XJ, 38 518: YT, 38551: WK, 38585: ZC, 38704: XS, 38739: LJ, 38 761: GJ, 38 808: SQ, 39 048: JG, 39049: XJ, 39 052: HG, 39 076: CZ, 39 271: XT, 39 534: TD, 39 552: TD, 39 584: PB, 39 647: SB, 39730: LG, 39 748: TPB, 40109: ZQ, 40 479: ND, 40 516: HG, 40 536: HG, 40 583: QJ, 40 765: YQ, 40 784: QJ, 40 840: YK, 40 863: QJG,34074: YW, 34121: QJ, 34255: ZC, 34259: XL 34425: JH, 34430: settled, 34485: KH, 34503: Ys, 34532: HG, 34552: XS, 34558: YE, 34593: zł, 34660: YQ, 34892: settled, 34928: SC, 34999: QJ, 35048: PB 35059: SC, 35098: ZC, 35203: TQ, 35265: JX, 35299: JX, 35782: SZ 35828: Ys, 35830: E, 35843: TD 35895: YG, 35977: MH, 36158: JG, 36228: QJ, 36426: XQ, DC 36466: 36710: JC, 36711: ZYG, 36767: PB 36866: SK 36951: YW, 37034: YX 37063: settled, 37218: ZC, 37325: ZC, 38063: PB 38079: TD 38085: QY, DC 38107: 38116: TD 38123: YD, 38224: HG, 38241: XTC, 38271: ZC, 38415: YE, 38426: KH, 38461: YD, 38463: AE, 38466: PB 38477: XJ, 38518: YT, 38551: WK, 38585: ZC, 38704: XS, 38739: lj, 38761: GJ, 38808: sq, 39048: JG, 39049: XJ, 39052: HG, 39076: CZ, 39271: XT, 39534: TD 39552: TD 39584: PB 39647: SB, LG 39730: 39748: TPB, 40109: ZQ, 40479: ND, 40516: HG, 40536: HG, 40583: QJ, 40765: YQ, 40784: QJ, 40840: YK, 40863: QJG34074: YW, 34121: QJ, 34255: ZC, 34259: XL 34425: JH, 34430: settled, 34485: KH, 34503: Ys, 34532: HG, 34552: XS, 34558: YE, 34593: zł, 34660: YQ, 34892: settled, 34928: SC, 34999: QJ, 35048: PB 35059: SC, 35098: ZC, 35203: TQ, 35265: JX, 35299: JX, 35782: SZ 35828: Ys, 35830: E, 35843: TD 35895: YG, 35977: MH, 36158: JG, 36228: QJ, 36426: XQ, DC 36466: 36710: JC, 36711: ZYG, 36767: PB 36866: SK 36951: YW, 37034: YX 37063: settled, 37218: ZC, 37325: ZC, 38063: PB 38079: TD 38085: QY, DC 38107: 38116: TD 38123: YD, 38224: HG, 38241: XTC, 38271: ZC, 38415: YE, 38426: KH, 38461: YD, 38463: AE, 38466: PB 38477: XJ, 38518: YT, 38551: WK, 38585: ZC, 38704: XS, 38739: lj, 38761: GJ, 38808: sq, 39048: JG, 39049: XJ, 39052: HG, 39076: CZ, 39271: XT, 39534: TD 39552: TD 39584: PB 39647: SB, LG 39730: 39748: TPB, 40109: ZQ, 40479: ND, 40516: HG, 40536: HG, 40583: QJ, 40765: YQ, 40784: QJ, 40840: YK, 40863: QJGYQ, 34892: settled, 34928: SC, 34999: QJ, 35048: PB 35059: SC, 35098: ZC, 35203: TQ, 35265: JX, 35299: JX, 35782: SZ 35828: Ys, 35830: E, 35843: TD 35895: YG, 35977: MH, 36158: JG, 36228: QJ, 36426: XQ, DC 36466: 36710: JC, 36711: ZYG, 36767: PB 36866: SK 36951: YW, 37034: YX 37063: settled, 37218: ZC, 37325: ZC, 38063: PB 38079: TD 38085: QY, DC 38107: 38116: TD 38123: YD, 38224: HG, 38241: XTC, 38271: ZC, 38415: YE, 38426: KH, 38461: YD, 38463: AE, 38466: PB 38477: XJ, 38518: YT, 38551: WK, 38585: ZC, 38704: XS, 38739: lj, 38761: GJ, 38808: sq, 39048: JG, 39049: XJ, 39052: HG, 39076: CZ, 39271: XT, 39534: TD 39552: TD 39584: PB 39647: SB, LG 39730: 39748: TPB, 40109: ZQ, 40479: ND, 40516: HG, 40536: HG, 40583: QJ, 40765: YQ, 40784: QJ, 40840: YK, 40863: QJGYQ, 34892: settled, 34928: SC, 34999: QJ, 35048: PB 35059: SC, 35098: ZC, 35203: TQ, 35265: JX, 35299: JX, 35782: SZ 35828: Ys, 35830: E, 35843: TD 35895: YG, 35977: MH, 36158: JG, 36228: QJ, 36426: XQ, DC 36466: 36710: JC, 36711: ZYG, 36767: PB 36866: SK 36951: YW, 37034: YX 37063: settled, 37218: ZC, 37325: ZC, 38063: PB 38079: TD 38085: QY, DC 38107: 38116: TD 38123: YD, 38224: HG, 38241: XTC, 38271: ZC, 38415: YE, 38426: KH, 38461: YD, 38463: AE, 38466: PB 38477: XJ, 38518: YT, 38551: WK, 38585: ZC, 38704: XS, 38739: lj, 38761: GJ, 38808: sq, 39048: JG, 39049: XJ, 39052: HG, 39076: CZ, 39271: XT, 39534: TD 39552: TD 39584: PB 39647: SB, LG 39730: 39748: TPB, 40109: ZQ, 40479: ND, 40516: HG, 40536: HG, 40583: QJ, 40765: YQ, 40784: QJ, 40840: YK, 40863: QJGYX 37063: settled, 37218: ZC, 37325: ZC, 38063: PB 38079: TD 38085: QY, DC 38107: 38116: TD 38123: YD, 38224: HG, 38241: XTC, 38271: ZC, 38415: YE, 38426: KH, 38461: YD, 38463: AE, 38466: PB 38477: XJ, 38518: YT, 38551: WK, 38585: ZC, 38704: XS, 38739: lj, 38761: GJ, 38808: sq, 39048: JG, 39049: XJ, 39052: HG, 39076: CZ, 39271: XT, 39534: TD 39552: TD 39584: PB 39647: SB, LG 39730: 39748: TPB, 40109: ZQ, 40479: ND, 40516: HG, 40536: HG, 40583: QJ, 40765: YQ, 40784: QJ, 40840: YK, 40863: QJGYX 37063: settled, 37218: ZC, 37325: ZC, 38063: PB 38079: TD 38085: QY, DC 38107: 38116: TD 38123: YD, 38224: HG, 38241: XTC, 38271: ZC, 38415: YE, 38426: KH, 38461: YD, 38463: AE, 38466: PB 38477: XJ, 38518: YT, 38551: WK, 38585: ZC, 38704: XS, 38739: lj, 38761: GJ, 38808: sq, 39048: JG, 39049: XJ, 39052: HG, 39076: CZ, 39271: XT, 39534: TD 39552: TD 39584: PB 39647: SB, LG 39730: 39748: TPB, 40109: ZQ, 40479: ND, 40516: HG, 40536: HG, 40583: QJ, 40765: YQ, 40784: QJ, 40840: YK, 40863: QJG";
            string resStr = "";
            int i, j, uni;
            UNI = (UInt16) Hanzi;
             IF (UNI> 40 869 || UNI < 19968 )
                 return ResStr;
             // returns the character in the Unicode character set code value 
            I = MultiPinyin.IndexOf (uni.ToString ());
             // check whether is polyphone, is based on a multi-word sounds, not to find the corresponding first letter string directly strChineseFirstPY 
            IF (I < 0 )
             // Get initials non polyphone characters 
            {
                resStr = GetSingleCharSpellCode (HanZiToString ());

            }
            the else 
            {    // Get Character polyphone initials 
                J = MultiPinyin.IndexOf ( " , " , I);
                resStr = MultiPinyin.Substring(i + 6, j - i - 6);
            }
            return resStr;
        }


        ///  <Summary>  
        /// obtain a non polyphone Phonetic first letter, if a letter is uppercase directly returns 
         ///  </ Summary>  
        ///  <param name = "CnChar"> single Character </ param>  
        ///  <Returns> single uppercase </ Returns>  
        Private  static  String GetSingleCharSpellCode ( String CnChar)
        {
            long iCnChar;

            byte[] ZW = System.Text.Encoding.Default.GetBytes(CnChar);

            // If the letter is returned directly 
            IF (ZW.Length == 1 )
            {
                return CnChar.ToUpper();
            }
            else
            {
                // get the array of byte from the single char 
                int i1 = (short)(ZW[0]);
                int i2 = (short)(ZW[1]);
                iCnChar = i1 * 256 + i2;
            }

            //expresstion 
            //table of the constant list 
            // 'A'; //45217..45252 
            // 'B'; //45253..45760 
            // 'C'; //45761..46317 
            // 'D'; //46318..46825 
            // 'E'; //46826..47009 
            // 'F'; //47010..47296 
            // 'G'; //47297..47613 

            // 'H'; //47614..48118 
            // 'J'; //48119..49061 
            // 'K'; //49062..49323 
            // 'L'; //49324..49895 
            // 'M'; //49896..50370 
            // 'N'; //50371..50613 
            // 'O'; //50614..50621 
            // 'P'; //50622..50905 
            // 'Q'; //50906..51386 

            // 'R & lt'; // 51387..51445 
             // 'S'; // 51446..52217 
             // 'T'; // 52218..52697 
             // without the U-, V 
             // 'W is'; // 52698..52979 
             // 'X-'; // 52980..53640 
             // 'the Y'; // 53689..54480 
             // 'the Z'; // 54481..55289

            // iCnChar match the constant 
            if ((iCnChar >= 45217) && (iCnChar <= 45252))
            {
                return "A";
            }
            else if ((iCnChar >= 45253) && (iCnChar <= 45760))
            {
                return "B";
            }
            else if ((iCnChar >= 45761) && (iCnChar <= 46317))
            {
                return "C";
            }
            else if ((iCnChar >= 46318) && (iCnChar <= 46825))
            {
                return "D";
            }
            else if ((iCnChar >= 46826) && (iCnChar <= 47009))
            {
                return "E";
            }
            else if ((iCnChar >= 47010) && (iCnChar <= 47296))
            {
                return "F";
            }
            else if ((iCnChar >= 47297) && (iCnChar <= 47613))
            {
                return "G";
            }
            else if ((iCnChar >= 47614) && (iCnChar <= 48118))
            {
                return "H";
            }
            else if ((iCnChar >= 48119) && (iCnChar <= 49061))
            {
                return "J";
            }
            else if ((iCnChar >= 49062) && (iCnChar <= 49323))
            {
                return "K";
            }
            else if ((iCnChar >= 49324) && (iCnChar <= 49895))
            {
                return "L";
            }
            else if ((iCnChar >= 49896) && (iCnChar <= 50370))
            {
                return "M";
            }

            else if ((iCnChar >= 50371) && (iCnChar <= 50613))
            {
                return "N";
            }
            else if ((iCnChar >= 50614) && (iCnChar <= 50621))
            {
                return "O";
            }
            else if ((iCnChar >= 50622) && (iCnChar <= 50905))
            {
                return "P";
            }
            else if ((iCnChar >= 50906) && (iCnChar <= 51386))
            {
                return "Q";
            }
            else if ((iCnChar >= 51387) && (iCnChar <= 51445))
            {
                return "R";
            }
            else if ((iCnChar >= 51446) && (iCnChar <= 52217))
            {
                return "S";
            }
            else if ((iCnChar >= 52218) && (iCnChar <= 52697))
            {
                return "T";
            }
            else if ((iCnChar >= 52698) && (iCnChar <= 52979))
            {
                return "W";
            }
            else if ((iCnChar >= 52980) && (iCnChar <= 53640))
            {
                return "X";
            }
            else if ((iCnChar >= 53689) && (iCnChar <= 54480))
            {
                return "Y";
            }
            else if ((iCnChar >= 54481) && (iCnChar <= 55289))
            {
                return "Z";
            }
            else return ("?");
        }
    }
}

 

Guess you like

Origin www.cnblogs.com/jijm123/p/12663590.html