using System; using System.Collections.Generic; using System.Linq; using System.Text; using System.Text.RegularExpressions; namespace PinYin { class Pinyin { /// <Summary> /// retrieved list of strings specified phonetic index that match a string cnStr /// </ Summary> /// <param name = "cnStr"> kanji character string </ param> // / <Returns> corresponding to the first letter string </ Returns> public static String [] GetSpellCode ( String cnStr) { CnStr = Regex.Replace(CnStr, "\\s", ""); string ReturnStr = ResolvePinyinString(CnStr); string[] strArray = ReturnStr.Split(",;".ToCharArray()); return strArray; } Private static string ResolvePinyinString ( string HanZiStr) // Get phonetic alphabet characters strings containing polyphone { int I, J, K, m; string tmpStr; string returnStr = "" ; // returns the final result of the character string string [] tmpArr; for (I = 0 ; I <HanZiStr.Length; I ++ ) { // process kanji character string, each character of the first letter of the next cycle tmpStr GetCharSpellCode = (( char ) HanZiStr [i]); // Get the i-th first letter of alphabet characters, may be one or more of IF (tmpStr.Length> 0 ) { // case first letter of alphabet characters exist only to operate IF (returnStr! = "" ) { // is not the first character the Regex REGEX = new new the Regex ( " , " ); tmpArr = regex.Split(returnStr); returnStr = ""; for (k = 0; k < tmpArr.Length; k++) { for (j = 0 ; j <tmpStr.Length; j ++) // for the first letter of each return spliced { String charCode = tmpStr [j] .ToString (); // remove the j-th alphabet returnStr + = tmpArr [K] + charCode + " , " ; } } if (returnStr != "") returnStr = returnStr.Substring(0, returnStr.Length - 1); } the else { // configured first character return results for (m = 0 ; m <tmpStr.Length - . 1 ; m ++ ) returnStr += tmpStr[m] + ","; returnStr += tmpStr[tmpStr.Length - 1]; } } } return returnStr; // returns the processing result string to separate each combination of phonetic } /// <Summary> /// Get a single pinyin characters corresponding to the first character string, /// </ Summary> /// <param name = "CnChar"> Single Character </ param> /// <Returns> single capital letters </ Returns> Private static String GetCharSpellCode ( char Hanzi) { // here a collection of more than 375 sound words String MultiPinyin = "19969:DZ,19975:WM,19988:QJ,20048:YL,20056:SC,20060:NM,20094:QG,20127:QJ,20167:QC,20193:YG,20250:KH,20256:ZC,20282:SC,20285:QJG,20291:TD,20314:YD,20340:NE,20375:TD,20389:YJ,20391:CZ,20415:PB,20446:YS,20447:SQ,20504:TC,20608:KG,20854:QJ,20857:ZC,20911:PF,20504:TC,20608:KG,20854:QJ,20857:ZC,20911:PF,20985:AW,21032:PB,21048:XQ,21049:SC,21089:YS,21119:JC,21242:SB,21273:SC,21305:YP,21306:QO,21330:ZC,21333:SDC,21345:QK,21378:CA,21397:SC,21414:XS,21442:SC,21477:JG,21480:TD,21484:ZS,21494:YX,21505:YX,21512:HG,21523:XH,21537:PB,21542:PF,21549:KH,21571:E,21574:DA,21588:TD,21589:O,21618:ZC,21621:KHA,21632:ZJ,21654:KG,21679:LKG,21683:KH,21710:A,21719:YH,21734:WOE,21769:A,21780:WN,21804:XH,21834:A,21899:ZD,21903:RN,21908:WO,21939:ZC,21956:SA,21964:YA,21970:TD,22003:A,22031:JG,22040:XS,22060:ZC,22066:ZC,22079:MH,22129:XJ,22179:XA,22237:NJ,22244:TD,22280:JQ,22300:YH,22313:XW,22331:YQ,22343:YJ,22351:PH,22395:DC,22412:TD,22484:PB,22500:PB,22534:ZD,22549:DH,22561:PB,22612:TD,22771:KQ,22831:HB,22841:JG,22855:QJ,22865:XQ,23013:ML,23081:WM,23487:SX,23558:QJ,23561:YW,23586:YW,23614:YW,23615:SN,23631:PB,23646:ZS,23663:ZT,23673:YG,23762:TD,23769:ZS,23780:QJ,23884:QK,24055:XH,24113:DC,24162:ZC,24191:GA,24273:QJ,24324:NL,24377:TD,24378:QJ,24439:PF,24554:ZS,24683:TD,24694:WE,24733:LK,24925:TN,25094:ZG,25100:XQ,25103:XH,25153:PB,25170:PB,25179:KG,25203:PB,25240:ZS,25282:FB,25303:NA,25324:KG,25341:ZY,25373:WZ,25375:XJ,25384:A,25457:A,25528:SD,25530:SC,25552:TD,25774:ZC,25874:ZC,26044:YW,26080:WM,26292:PB,26333:PB,26355:ZY,26366:CZ,26397:ZC,26399:QJ,26415:ZS,26451:SB,26526:ZC,26552:JG,26561:TD,26588:JG,26597:CZ,26629:ZS,26638:YL,26646:XQ,26653:KG,26657:XJ,26727:HG,26894:ZC,26937:ZS,26946:ZC,26999:KJ,27099:KJ,27449:YQ,27481: XS, 27542: ZS, 27663: ZS, 27748: TS 27784: SC, 27788: ZD, 27795: TD 27812: O, 27850: PB 27852: MB 27895: SL 27898: PL, 27973: QJ, 27981: KH, 27986: HX, 27994: XJ, 28044: YC, 28065: WG 28177: SM, 28267: QJ, 28291: KH, 28337: ZQ, 28463: TL, DC 28548: 28601: TD, 28689: PB 28805: JG, 28820: QG, 28846: PB 28952: TD 28975: ZC, 29100: A, 29325: QJ, 29575: SL 29602: FB, 30010: TD 30044: CX, 30058: PF 30091: YSP, 30111: YN 30229: XJ, 30427: SC, 30465: SX, 30631: YQ, 30655: QJ, 30684: QJG, SD 30707: 30729: settled, LG 30796: 30917: PB, 31074: NM, 31085: JZ, 31109: SC, 31181: ZC, 31192: MLB, 31293: JQ, 31400: YX 31584: YJ, 31896: ZN 31909: ZY, 31995: XJ, 32321: PF, 32327: ZY, 32418: HG, 32420: XQ, 32421: HG, LG 32438: 32473: GJ, 32488: TD 32521: QJ, 32527: PB 32562: ZSQ, 32564: JZ, 32735: ZD, 32793: PB, 33071: PF, 33098: XL 33100: FLY, 33152: PB 33261: CX, 33324: BP, 33333: TD 33406: FLY, 33426: WM, 33432: PB 33445: JG, 33486: ZN, 33493: TS 33507: QJ, 33540: QJ, 33544: ZC, 33564: XQ, 33617: YT, 33632: QJ, 33636: settled,33 637: YX, 33,694: VA, 33705: PF, 33 728: IS, 33 882: SR, 34 067: WM, 34 074: IS, 34121: QJ, 34 255: ZC, 34 259: XL, 34 425: JH, 34 430: XH, 34 485: KH, 34 503: YS, 34 532: HG, 34 552: XS, 34 558: YE, 34 593: ZL, 34 660: YQ, 34 892: XH, 34 928: SC, 34999: QJ, 35048: PB, 35059: SC, 35098: ZC, 35203: TQ, 35 265: JX, 35 299: JX, 35 782: SZ, 35 828: YS, 35 830: E, 35 843: TD, 35 895: NT, 35 977: MH, 36 158: JG, 36 228: QJ, 36426: xq, 36 466: DC, 36 710: JC, 36 711: ZYG, 36767: PB, 36866: SK, 36 951: iS, 37034: YX, 37063: XH, 37 218: ZC, 37 325: ZC, 38063: PB, 38079: TD, 38 085: Q, 38107: DC, 38116: TD, 38 123: YD, 38 224: HG, 38 241: XTC, 38,271: ZC, 38 415: YE, 38 426: KH, 38 461: YD, 38 463: AE, 38 466: PB, 38 477: XJ, 38 518: YT, 38551: WK, 38585: ZC, 38704: XS, 38739: LJ, 38 761: GJ, 38 808: SQ, 39 048: JG, 39049: XJ, 39 052: HG, 39 076: CZ, 39 271: XT, 39 534: TD, 39 552: TD, 39 584: PB, 39 647: SB, 39730: LG, 39 748: TPB, 40109: ZQ, 40 479: ND, 40 516: HG, 40 536: HG, 40 583: QJ, 40 765: YQ, 40 784: QJ, 40 840: YK, 40 863: QJG,34074: YW, 34121: QJ, 34255: ZC, 34259: XL 34425: JH, 34430: settled, 34485: KH, 34503: Ys, 34532: HG, 34552: XS, 34558: YE, 34593: zł, 34660: YQ, 34892: settled, 34928: SC, 34999: QJ, 35048: PB 35059: SC, 35098: ZC, 35203: TQ, 35265: JX, 35299: JX, 35782: SZ 35828: Ys, 35830: E, 35843: TD 35895: YG, 35977: MH, 36158: JG, 36228: QJ, 36426: XQ, DC 36466: 36710: JC, 36711: ZYG, 36767: PB 36866: SK 36951: YW, 37034: YX 37063: settled, 37218: ZC, 37325: ZC, 38063: PB 38079: TD 38085: QY, DC 38107: 38116: TD 38123: YD, 38224: HG, 38241: XTC, 38271: ZC, 38415: YE, 38426: KH, 38461: YD, 38463: AE, 38466: PB 38477: XJ, 38518: YT, 38551: WK, 38585: ZC, 38704: XS, 38739: lj, 38761: GJ, 38808: sq, 39048: JG, 39049: XJ, 39052: HG, 39076: CZ, 39271: XT, 39534: TD 39552: TD 39584: PB 39647: SB, LG 39730: 39748: TPB, 40109: ZQ, 40479: ND, 40516: HG, 40536: HG, 40583: QJ, 40765: YQ, 40784: QJ, 40840: YK, 40863: QJG34074: YW, 34121: QJ, 34255: ZC, 34259: XL 34425: JH, 34430: settled, 34485: KH, 34503: Ys, 34532: HG, 34552: XS, 34558: YE, 34593: zł, 34660: YQ, 34892: settled, 34928: SC, 34999: QJ, 35048: PB 35059: SC, 35098: ZC, 35203: TQ, 35265: JX, 35299: JX, 35782: SZ 35828: Ys, 35830: E, 35843: TD 35895: YG, 35977: MH, 36158: JG, 36228: QJ, 36426: XQ, DC 36466: 36710: JC, 36711: ZYG, 36767: PB 36866: SK 36951: YW, 37034: YX 37063: settled, 37218: ZC, 37325: ZC, 38063: PB 38079: TD 38085: QY, DC 38107: 38116: TD 38123: YD, 38224: HG, 38241: XTC, 38271: ZC, 38415: YE, 38426: KH, 38461: YD, 38463: AE, 38466: PB 38477: XJ, 38518: YT, 38551: WK, 38585: ZC, 38704: XS, 38739: lj, 38761: GJ, 38808: sq, 39048: JG, 39049: XJ, 39052: HG, 39076: CZ, 39271: XT, 39534: TD 39552: TD 39584: PB 39647: SB, LG 39730: 39748: TPB, 40109: ZQ, 40479: ND, 40516: HG, 40536: HG, 40583: QJ, 40765: YQ, 40784: QJ, 40840: YK, 40863: QJGYQ, 34892: settled, 34928: SC, 34999: QJ, 35048: PB 35059: SC, 35098: ZC, 35203: TQ, 35265: JX, 35299: JX, 35782: SZ 35828: Ys, 35830: E, 35843: TD 35895: YG, 35977: MH, 36158: JG, 36228: QJ, 36426: XQ, DC 36466: 36710: JC, 36711: ZYG, 36767: PB 36866: SK 36951: YW, 37034: YX 37063: settled, 37218: ZC, 37325: ZC, 38063: PB 38079: TD 38085: QY, DC 38107: 38116: TD 38123: YD, 38224: HG, 38241: XTC, 38271: ZC, 38415: YE, 38426: KH, 38461: YD, 38463: AE, 38466: PB 38477: XJ, 38518: YT, 38551: WK, 38585: ZC, 38704: XS, 38739: lj, 38761: GJ, 38808: sq, 39048: JG, 39049: XJ, 39052: HG, 39076: CZ, 39271: XT, 39534: TD 39552: TD 39584: PB 39647: SB, LG 39730: 39748: TPB, 40109: ZQ, 40479: ND, 40516: HG, 40536: HG, 40583: QJ, 40765: YQ, 40784: QJ, 40840: YK, 40863: QJGYQ, 34892: settled, 34928: SC, 34999: QJ, 35048: PB 35059: SC, 35098: ZC, 35203: TQ, 35265: JX, 35299: JX, 35782: SZ 35828: Ys, 35830: E, 35843: TD 35895: YG, 35977: MH, 36158: JG, 36228: QJ, 36426: XQ, DC 36466: 36710: JC, 36711: ZYG, 36767: PB 36866: SK 36951: YW, 37034: YX 37063: settled, 37218: ZC, 37325: ZC, 38063: PB 38079: TD 38085: QY, DC 38107: 38116: TD 38123: YD, 38224: HG, 38241: XTC, 38271: ZC, 38415: YE, 38426: KH, 38461: YD, 38463: AE, 38466: PB 38477: XJ, 38518: YT, 38551: WK, 38585: ZC, 38704: XS, 38739: lj, 38761: GJ, 38808: sq, 39048: JG, 39049: XJ, 39052: HG, 39076: CZ, 39271: XT, 39534: TD 39552: TD 39584: PB 39647: SB, LG 39730: 39748: TPB, 40109: ZQ, 40479: ND, 40516: HG, 40536: HG, 40583: QJ, 40765: YQ, 40784: QJ, 40840: YK, 40863: QJGYX 37063: settled, 37218: ZC, 37325: ZC, 38063: PB 38079: TD 38085: QY, DC 38107: 38116: TD 38123: YD, 38224: HG, 38241: XTC, 38271: ZC, 38415: YE, 38426: KH, 38461: YD, 38463: AE, 38466: PB 38477: XJ, 38518: YT, 38551: WK, 38585: ZC, 38704: XS, 38739: lj, 38761: GJ, 38808: sq, 39048: JG, 39049: XJ, 39052: HG, 39076: CZ, 39271: XT, 39534: TD 39552: TD 39584: PB 39647: SB, LG 39730: 39748: TPB, 40109: ZQ, 40479: ND, 40516: HG, 40536: HG, 40583: QJ, 40765: YQ, 40784: QJ, 40840: YK, 40863: QJGYX 37063: settled, 37218: ZC, 37325: ZC, 38063: PB 38079: TD 38085: QY, DC 38107: 38116: TD 38123: YD, 38224: HG, 38241: XTC, 38271: ZC, 38415: YE, 38426: KH, 38461: YD, 38463: AE, 38466: PB 38477: XJ, 38518: YT, 38551: WK, 38585: ZC, 38704: XS, 38739: lj, 38761: GJ, 38808: sq, 39048: JG, 39049: XJ, 39052: HG, 39076: CZ, 39271: XT, 39534: TD 39552: TD 39584: PB 39647: SB, LG 39730: 39748: TPB, 40109: ZQ, 40479: ND, 40516: HG, 40536: HG, 40583: QJ, 40765: YQ, 40784: QJ, 40840: YK, 40863: QJG"; string resStr = ""; int i, j, uni; UNI = (UInt16) Hanzi; IF (UNI> 40 869 || UNI < 19968 ) return ResStr; // returns the character in the Unicode character set code value I = MultiPinyin.IndexOf (uni.ToString ()); // check whether is polyphone, is based on a multi-word sounds, not to find the corresponding first letter string directly strChineseFirstPY IF (I < 0 ) // Get initials non polyphone characters { resStr = GetSingleCharSpellCode (HanZiToString ()); } the else { // Get Character polyphone initials J = MultiPinyin.IndexOf ( " , " , I); resStr = MultiPinyin.Substring(i + 6, j - i - 6); } return resStr; } /// <Summary> /// obtain a non polyphone Phonetic first letter, if a letter is uppercase directly returns /// </ Summary> /// <param name = "CnChar"> single Character </ param> /// <Returns> single uppercase </ Returns> Private static String GetSingleCharSpellCode ( String CnChar) { long iCnChar; byte[] ZW = System.Text.Encoding.Default.GetBytes(CnChar); // If the letter is returned directly IF (ZW.Length == 1 ) { return CnChar.ToUpper(); } else { // get the array of byte from the single char int i1 = (short)(ZW[0]); int i2 = (short)(ZW[1]); iCnChar = i1 * 256 + i2; } //expresstion //table of the constant list // 'A'; //45217..45252 // 'B'; //45253..45760 // 'C'; //45761..46317 // 'D'; //46318..46825 // 'E'; //46826..47009 // 'F'; //47010..47296 // 'G'; //47297..47613 // 'H'; //47614..48118 // 'J'; //48119..49061 // 'K'; //49062..49323 // 'L'; //49324..49895 // 'M'; //49896..50370 // 'N'; //50371..50613 // 'O'; //50614..50621 // 'P'; //50622..50905 // 'Q'; //50906..51386 // 'R & lt'; // 51387..51445 // 'S'; // 51446..52217 // 'T'; // 52218..52697 // without the U-, V // 'W is'; // 52698..52979 // 'X-'; // 52980..53640 // 'the Y'; // 53689..54480 // 'the Z'; // 54481..55289 // iCnChar match the constant if ((iCnChar >= 45217) && (iCnChar <= 45252)) { return "A"; } else if ((iCnChar >= 45253) && (iCnChar <= 45760)) { return "B"; } else if ((iCnChar >= 45761) && (iCnChar <= 46317)) { return "C"; } else if ((iCnChar >= 46318) && (iCnChar <= 46825)) { return "D"; } else if ((iCnChar >= 46826) && (iCnChar <= 47009)) { return "E"; } else if ((iCnChar >= 47010) && (iCnChar <= 47296)) { return "F"; } else if ((iCnChar >= 47297) && (iCnChar <= 47613)) { return "G"; } else if ((iCnChar >= 47614) && (iCnChar <= 48118)) { return "H"; } else if ((iCnChar >= 48119) && (iCnChar <= 49061)) { return "J"; } else if ((iCnChar >= 49062) && (iCnChar <= 49323)) { return "K"; } else if ((iCnChar >= 49324) && (iCnChar <= 49895)) { return "L"; } else if ((iCnChar >= 49896) && (iCnChar <= 50370)) { return "M"; } else if ((iCnChar >= 50371) && (iCnChar <= 50613)) { return "N"; } else if ((iCnChar >= 50614) && (iCnChar <= 50621)) { return "O"; } else if ((iCnChar >= 50622) && (iCnChar <= 50905)) { return "P"; } else if ((iCnChar >= 50906) && (iCnChar <= 51386)) { return "Q"; } else if ((iCnChar >= 51387) && (iCnChar <= 51445)) { return "R"; } else if ((iCnChar >= 51446) && (iCnChar <= 52217)) { return "S"; } else if ((iCnChar >= 52218) && (iCnChar <= 52697)) { return "T"; } else if ((iCnChar >= 52698) && (iCnChar <= 52979)) { return "W"; } else if ((iCnChar >= 52980) && (iCnChar <= 53640)) { return "X"; } else if ((iCnChar >= 53689) && (iCnChar <= 54480)) { return "Y"; } else if ((iCnChar >= 54481) && (iCnChar <= 55289)) { return "Z"; } else return ("?"); } } }