对于 this question,我创建了以下Lua代码,将Unicode代码点转换为UTF-8字符串.有没有更好的方法(在Lua 5.1中)?在这种情况下,“更好”意味着“显着提高效率,或者更优选更少的代码行”. 注意:
注意:我并不是真的要求这个算法的code review;我要求更好的算法(或内置库).
do
local bytebits = {
{0x7F,{0,128}},
{0x7FF,{192,32},{128,64}},
{0xFFFF,{224,16},{128,64},{128,64}},
{0x1FFFFF,{240,8},{128,64},{128,64},{128,64}}
}
function utf8(decimal)
local charbytes = {}
for b,lim in ipairs(bytebits) do
if decimal<=lim[1] then
for i=b,1,-1 do
local prefix,max = lim[i+1][1],lim[i+1][2]
local mod = decimal % max
charbytes[i] = string.char( prefix + mod )
decimal = ( decimal - mod ) / max
end
break
end
end
return table.concat(charbytes)
end
end
c=utf8(0x24) print(c.." is "..#c.." bytes.") --> $is 1 bytes.
c=utf8(0xA2) print(c.." is "..#c.." bytes.") --> ¢ is 2 bytes.
c=utf8(0x20AC) print(c.." is "..#c.." bytes.") --> € is 3 bytes.
c=utf8(0xFFFF) print(c.." is "..#c.." bytes.") --> is 3 bytes.
c=utf8(0x10000) print(c.." is "..#c.." bytes.") --> 