全站通知:
模块:Pinyin
刷
历
编
跳到导航
跳到搜索
local p = {}
local bit32 = bit32 or require('bit32')
-- 导入数据模块
local hanziDict = require('Module:Pinyin/data/hanzi')
-- local phrasesDict = require('Module:Pinyin/data/phrases')
-- local dictWeb = require('Module:Pinyin/data/dict-zi-web')
-- 工具函数
local find, sub = string.find, string.sub
local function split(str, sep, nmax)
if sep == nil then
sep = '%s+'
end
local r = {}
if #str <= 0 then
return r
end
local plain = false
nmax = nmax or -1
local nf = 1
local ns = 1
local nfr, nl = find(str, sep, ns, plain)
while nfr and nmax ~= 0 do
r[nf] = sub(str, ns, nfr - 1)
nf = nf + 1
ns = nl + 1
nmax = nmax - 1
nfr, nl = find(str, sep, ns, plain)
end
r[nf] = sub(str, ns)
return r
end
-- 声调符号转换表
local phoneticTable = {
["ā"] = "a1",
["á"] = "a2",
["ǎ"] = "a3",
["à"] = "a4",
["ē"] = "e1",
["é"] = "e2",
["ě"] = "e3",
["è"] = "e4",
["ō"] = "o1",
["ó"] = "o2",
["ǒ"] = "o3",
["ò"] = "o4",
["ī"] = "i1",
["í"] = "i2",
["ǐ"] = "i3",
["ì"] = "i4",
["ū"] = "u1",
["ú"] = "u2",
["ǔ"] = "u3",
["ù"] = "u4",
["ü"] = "v0",
["ǘ"] = "v2",
["ǚ"] = "v3",
["ǜ"] = "v4",
["ń"] = "n2",
["ň"] = "n3",
[""] = "m2",
};
-- 重音字符映射表
local accentMap = {
["à"] = "a",
["á"] = "a",
["ä"] = "a",
["â"] = "a",
["è"] = "e",
["é"] = "e",
["ë"] = "e",
["ê"] = "e",
["ì"] = "i",
["í"] = "i",
["ï"] = "i",
["î"] = "i",
["ò"] = "o",
["ó"] = "o",
["ö"] = "o",
["ô"] = "o",
["ù"] = "u",
["ú"] = "u",
["ü"] = "u",
["û"] = "u",
["ñ"] = "n",
["ç"] = "c",
["ā"] = "a1",
["ǎ"] = "a3",
["ē"] = "e1",
["ě"] = "e3",
["ō"] = "o1",
["ǒ"] = "o3",
["ī"] = "i1",
["ǐ"] = "i3",
["ū"] = "u1",
["ǔ"] = "u3",
["ü"] = "v0",
["ǘ"] = "v2",
["ǚ"] = "v3",
["ǜ"] = "v4",
["ń"] = "n2",
["ň"] = "n3",
[""] = "m2",
}
-- UTF-8转Unicode码点
function Utf8to32(utf8str)
local bit
if type(bit32) == "table" then
bit = bit32
end
assert(type(utf8str) == "string")
local res, seq, val = {}, 0, nil
for i = 1, #utf8str do
local c = string.byte(utf8str, i)
if seq == 0 then
table.insert(res, val)
seq = c < 0x80 and 1 or c < 0xE0 and 2 or c < 0xF0 and 3 or
c < 0xF8 and 4 or --c < 0xFC and 5 or c < 0xFE and 6 or
error("invalid UTF-8 character sequence")
val = bit.band(c, 2^(8-seq) - 1)
else
val = bit.bor(bit.lshift(val, 6), bit.band(c, 0x3F))
end
seq = seq - 1
end
table.insert(res, val)
return res
end
-- 数字转十六进制
function num2hex(num)
local hexstr = '0123456789abcdef'
local s = ''
while num > 0 do
local mod = math.fmod(num, 16)
s = string.sub(hexstr, mod+1, mod+1) .. s
num = math.floor(num / 16)
end
if s == '' then s = '0' end
return s
end
-- 核心拼音转换函数
local function pinyinConvert(ustring, flat, keepNull)
local stringArray = {}
local tempAlphas = {}
string.gsub(ustring, "([%z\1-\127\194-\244][\128-\191]*)", function(singleAlpha, b)
if #singleAlpha == 2 then
if accentMap[singleAlpha] then
singleAlpha = accentMap[singleAlpha]
end
end
if #singleAlpha > 1 then
local hex = num2hex(Utf8to32(singleAlpha)[1])
local pinyin = hanziDict[hex]
if #tempAlphas > 0 then
table.insert(stringArray, table.concat(tempAlphas))
tempAlphas = {}
end
if pinyin then
pinyin = split(pinyin, ",")[1]
if flat then
pinyin = pinyin:gsub("([%z\1-\127\194-\244][\128-\191]*)", function(phonetic)
if #phonetic > 1 then
return (phoneticTable[phonetic]):sub(0, 1)
else
return phonetic
end
end)
end
table.insert(stringArray, pinyin)
else
if keepNull then
table.insert(stringArray, "")
end
end
else
local hasEmptyStr = singleAlpha:find("[\n%s\t]")
if hasEmptyStr and #tempAlphas > 0 then
table.insert(stringArray, table.concat(tempAlphas))
tempAlphas = {}
else
table.insert(tempAlphas, singleAlpha:lower())
end
end
end)
if #tempAlphas > 0 then
table.insert(stringArray, table.concat(tempAlphas))
tempAlphas = {}
end
return stringArray
end
-- 供模板调用的函数
function p.pinyin(frame)
local args = frame.args
mw.logObject(args)
local text = args[1] or args.text or ""
local flat = args.flat or args[2]
local keepNull = args.keepNull or args[3]
local separator = args.separator or args.sep or " "
-- 转换参数类型
flat = flat and (flat:lower() == "true" or flat == "1")
keepNull = keepNull and (keepNull:lower() == "true" or keepNull == "1")
local result = pinyinConvert(text, flat, keepNull)
return table.concat(result, separator)
end
-- 供其他模块调用的函数
function p.convert(text, flat, keepNull)
return pinyinConvert(text, flat, keepNull)
end
-- 返回拼音结果并用指定分隔符连接
function p.convertWithSeparator(text, separator, flat, keepNull)
separator = separator or " "
local result = pinyinConvert(text, flat, keepNull)
return table.concat(result, separator)
end
return p