forked from 2881099/NPinyin
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathPinyin.cs
160 lines (138 loc) · 4.96 KB
/
Pinyin.cs
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
using System;
using System.Collections.Generic;
using System.Text;
namespace NPinyin
{
public static class Pinyin
{
/// <summary>
/// 取中文文本的拼音首字母
/// </summary>
/// <param name="text">编码为UTF8的文本</param>
/// <returns>返回中文对应的拼音首字母</returns>
public static string GetInitials(string text)
{
text = text.Trim();
StringBuilder chars = new StringBuilder();
for (var i = 0; i < text.Length; ++i)
{
string py = GetPinyin(text[i]);
if (py != "") chars.Append(py[0]);
}
return chars.ToString().ToUpper();
}
/// <summary>
/// 取中文文本的拼音首字母
/// </summary>
/// <param name="text">文本</param>
/// <param name="encoding">源文本的编码</param>
/// <returns>返回encoding编码类型中文对应的拼音首字母</returns>
public static string GetInitials(string text, Encoding encoding)
{
string temp = ConvertEncoding(text, encoding, Encoding.UTF8);
return ConvertEncoding(GetInitials(temp), Encoding.UTF8, encoding);
}
/// <summary>
/// 取中文文本的拼音
/// </summary>
/// <param name="text">编码为UTF8的文本</param>
/// <returns>返回中文文本的拼音</returns>
public static string GetPinyin(string text)
{
StringBuilder sbPinyin = new StringBuilder();
for (var i = 0; i < text.Length; ++i)
{
string py = GetPinyin(text[i]);
if (py != "") sbPinyin.Append(py);
sbPinyin.Append(" ");
}
return sbPinyin.ToString().Trim();
}
/// <summary>
/// 取中文文本的拼音
/// </summary>
/// <param name="text">编码为UTF8的文本</param>
/// <param name="encoding">源文本的编码</param>
/// <returns>返回encoding编码类型的中文文本的拼音</returns>
public static string GetPinyin(string text, Encoding encoding)
{
string temp = ConvertEncoding(text.Trim(), encoding, Encoding.UTF8);
return ConvertEncoding(GetPinyin(temp), Encoding.UTF8, encoding);
}
/// <summary>
/// 取和拼音相同的汉字列表
/// </summary>
/// <param name="pinyin">编码为UTF8的拼音</param>
/// <returns>取拼音相同的汉字列表,如拼音“ai”将会返回“唉爱……”等</returns>
public static string GetChineseText(string pinyin)
{
string key = pinyin.Trim().ToLower();
foreach (string str in PyCode.codes)
{
if (str.StartsWith(key + " ") || str.StartsWith(key + ":"))
return str.Substring(7);
}
return "";
}
/// <summary>
/// 取和拼音相同的汉字列表,编码同参数encoding
/// </summary>
/// <param name="pinyin">编码为encoding的拼音</param>
/// <param name="encoding">编码</param>
/// <returns>返回编码为encoding的拼音为pinyin的汉字列表,如拼音“ai”将会返回“唉爱……”等</returns>
public static string GetChineseText(string pinyin, Encoding encoding)
{
string text = ConvertEncoding(pinyin, encoding, Encoding.UTF8);
return ConvertEncoding(GetChineseText(text), Encoding.UTF8, encoding);
}
/// <summary>
/// 返回单个字符的汉字拼音
/// </summary>
/// <param name="ch">编码为UTF8的中文字符</param>
/// <returns>ch对应的拼音</returns>
public static string GetPinyin(char ch)
{
short hash = GetHashIndex(ch);
for (var i = 0; i < PyHash.hashes[hash].Length; ++i)
{
short index = PyHash.hashes[hash][i];
var pos = PyCode.codes[index].IndexOf(ch, 7);
if (pos != -1)
return PyCode.codes[index].Substring(0, 6).Trim();
}
return ch.ToString();
}
/// <summary>
/// 返回单个字符的汉字拼音
/// </summary>
/// <param name="ch">编码为encoding的中文字符</param>
/// <returns>编码为encoding的ch对应的拼音</returns>
public static string GetPinyin(char ch, Encoding encoding)
{
ch = ConvertEncoding(ch.ToString(), encoding, Encoding.UTF8)[0];
return ConvertEncoding(GetPinyin(ch), Encoding.UTF8, encoding);
}
/// <summary>
/// 转换编码
/// </summary>
/// <param name="text">文本</param>
/// <param name="srcEncoding">源编码</param>
/// <param name="dstEncoding">目标编码</param>
/// <returns>目标编码文本</returns>
public static string ConvertEncoding(string text, Encoding srcEncoding, Encoding dstEncoding)
{
byte[] srcBytes = srcEncoding.GetBytes(text);
byte[] dstBytes = Encoding.Convert(srcEncoding, dstEncoding, srcBytes);
return dstEncoding.GetString(dstBytes);
}
/// <summary>
/// 取文本索引值
/// </summary>
/// <param name="ch">字符</param>
/// <returns>文本索引值</returns>
private static short GetHashIndex(char ch)
{
return (short)((uint)ch % PyCode.codes.Length);
}
}
}