Khác biệt giữa bản sửa đổi của “Mô đun:Ruby”
Không có tóm lược sửa đổi Thẻ: Đã bị lùi lại |
Không có tóm lược sửa đổi Thẻ: Đã bị lùi lại |
||
| Dòng 1: | Dòng 1: | ||
local | ---Lexicographic tools for Vietnamese language text. | ||
local m_str_utils = require("Module:string utilities") | |||
local | local find = m_str_utils.find | ||
local format = string.format | |||
local gmatch = m_str_utils.gmatch | |||
local gsub = m_str_utils.gsub | |||
local len = m_str_utils.len | |||
local lower = m_str_utils.lower | |||
local match = m_str_utils.match | |||
local toNFC = mw.ustring.toNFC | |||
local toNFD = mw.ustring.toNFD | |||
local u = m_str_utils.char | |||
local upper = m_str_utils.upper | |||
local lang = require("Module:languages").getByCode("vi") | |||
local export = {} | |||
---Converts the given text to traditional tone marks. | ---Converts the given text to traditional tone marks. | ||
function | function export.toTraditionalTones(text) | ||
if type(text) == "table" then | if type(text) == "table" then | ||
text = text.args[1] | text = text.args[1] | ||
end | end | ||
return ( | return (gsub(text, "%a+", function (word) | ||
if | if match(word, "^qu[yýỳỷỹỵ]$") then return word end | ||
return ( | return (gsub(word, "%a%a$", { | ||
["oá"] = "óa", ["oà"] = "òa", ["oả"] = "ỏa", ["oã"] = "õa", ["oạ"] = "ọa", | ["oá"] = "óa", ["oà"] = "òa", ["oả"] = "ỏa", ["oã"] = "õa", ["oạ"] = "ọa", | ||
["oé"] = "óe", ["oè"] = "òe", ["oẻ"] = "ỏe", ["oẽ"] = "õe", ["oẹ"] = "ọe", | ["oé"] = "óe", ["oè"] = "òe", ["oẻ"] = "ỏe", ["oẽ"] = "õe", ["oẹ"] = "ọe", | ||
| Dòng 19: | Dòng 34: | ||
---Converts the given text to reformed tone marks. | ---Converts the given text to reformed tone marks. | ||
function | function export.toReformedTones(text) | ||
if type(text) == "table" then | if type(text) == "table" then | ||
text = text.args[1] | text = text.args[1] | ||
end | end | ||
return ( | return (gsub(text, "%a+", function (word) | ||
return ( | return (gsub(word, "%a%a$", { | ||
["óa"] = "oá", ["òa"] = "oà", ["ỏa"] = "oả", ["õa"] = "oã", ["ọa"] = "oạ", | ["óa"] = "oá", ["òa"] = "oà", ["ỏa"] = "oả", ["õa"] = "oã", ["ọa"] = "oạ", | ||
["óe"] = "oé", ["òe"] = "oè", ["ỏe"] = "oẻ", ["õe"] = "oẽ", ["ọe"] = "oẹ", | ["óe"] = "oé", ["òe"] = "oè", ["ỏe"] = "oẻ", ["õe"] = "oẽ", ["ọe"] = "oẹ", | ||
| Dòng 33: | Dòng 48: | ||
---Generate alternative orthographies. | ---Generate alternative orthographies. | ||
function | function export.allSpellings(main_spelling, makeLinks) | ||
local frame = nil | local frame = nil | ||
if type(main_spelling) == "table" then | if type(main_spelling) == "table" then | ||
| Dòng 41: | Dòng 56: | ||
local xformers = { | local xformers = { | ||
export.toTraditionalTones, export.toReformedTones, | |||
} | } | ||
| Dòng 63: | Dòng 78: | ||
---Unicode codepoints for combining Vietnamese tone marks. | ---Unicode codepoints for combining Vietnamese tone marks. | ||
export.combiningToneMarks = u( | |||
0x300, -- à | 0x300, -- à | ||
0x301, -- á | 0x301, -- á | ||
0x303, -- ã | 0x303, -- ã | ||
0x309, -- ả | 0x309, -- ả | ||
0x323 | 0x323 -- ạ | ||
) | ) | ||
---Unicode codepoints for combining Vietnamese accent marks. | ---Unicode codepoints for combining Vietnamese accent marks. | ||
export.combiningAccentMarks = u( | |||
0x302, -- â | 0x302, -- â | ||
0x306, -- ă | 0x306, -- ă | ||
0x31b | 0x31b -- ơ | ||
) | ) | ||
---Strips Vietnamese diacritical marks from the given text. | ---Strips Vietnamese diacritical marks from the given text. | ||
-- @param tones | -- @param tones Set to “0” to leave tone marks intact. | ||
-- @param accents | -- @param accents Set to “0” to leave accent marks intact. | ||
-- @param đ | -- @param đ Set to “0” to leave “Đ” and “đ” intact. | ||
function | function export.removeDiacritics(text, toneMarks, accentMarks, stroke) | ||
if type(text) == "table" then | if type(text) == "table" then | ||
text, toneMarks, accentMarks, stroke = text.args[1], | text, toneMarks, accentMarks, stroke = text.args[1], | ||
| Dòng 89: | Dòng 104: | ||
not text.args["đ"] or tonumber(text.args["đ"]) == 1 | not text.args["đ"] or tonumber(text.args["đ"]) == 1 | ||
end | end | ||
text = | text = toNFD(text) | ||
if toneMarks then | if toneMarks then | ||
text = | text = gsub(text, "[" .. export.combiningToneMarks .. "]", "") | ||
end | end | ||
if accentMarks then | if accentMarks then | ||
text = | text = gsub(text, "[" .. export.combiningAccentMarks .. "]", "") | ||
end | end | ||
if stroke then | if stroke then | ||
text = | text = gsub(text, "[Đđ]", {["Đ"] = "D", ["đ"] = "d"}) | ||
end | end | ||
return | return toNFC(text) | ||
end | end | ||
---Vietnamese letters for use in comp(). | ---Vietnamese letters for use in comp(). | ||
export.letters = "aAàÀảẢãÃáÁạẠăĂằẰẳẲẵẴắẮặẶâÂầẦẩẨẫẪấẤậẬbBcCdDđĐeEèÈẻẺẽẼéÉẹẸêÊềỀểỂễỄếẾệỆfFgGhHiIìÌỉỈĩĨíÍịỊjJkKlLmMnNoOòÒỏỎõÕóÓọỌôÔồỒổỔỗỖốỐộỘơƠờỜởỞỡỠớỚợỢpPqQrRsStTuUùÙủỦũŨúÚụỤưƯừỪửỬữỮứỨựỰvVwWxXyYỳỲỷỶỹỸýÝỵỴzZ" | |||
---Compare two syllables according to Vietnamese dictionary sorting order. | ---Compare two syllables according to Vietnamese dictionary sorting order. | ||
function | function export.compWord(word1, word2) | ||
if | if find(word1, word2, 1, true) == 0 then return false end | ||
if | if find(word2, word1, 1, true) == 0 then return true end | ||
do | do | ||
local func1, static1, var1 = | local func1, static1, var1 = gmatch(word1, "[" .. export.letters .. "]") | ||
local func2, static2, var2 = | local func2, static2, var2 = gmatch(word2, "[" .. export.letters .. "]") | ||
while true do | while true do | ||
local c1 = func1(static1, var1) | local c1 = func1(static1, var1) | ||
| Dòng 118: | Dòng 133: | ||
if c1 == nil or c2 == nil then break end | if c1 == nil or c2 == nil then break end | ||
local idx1 = | local idx1 = find(export.letters, c1, 1, true) | ||
local idx2 = | local idx2 = find(export.letters, c2, 1, true) | ||
if idx1 and idx2 then | if idx1 and idx2 then | ||
if idx1 < idx2 then return true end | if idx1 < idx2 then return true end | ||
| Dòng 130: | Dòng 145: | ||
end | end | ||
---Abbreviations and text for Han tu references (used by | ---Compare two strings according to Vietnamese dictionary sorting order. | ||
function export.comp(text1, text2) | |||
if text1 == text2 then return false end | |||
do | |||
local func1, static1, var1 = gmatch(text1, "%a+") | |||
local func2, static2, var2 = gmatch(text2, "%a+") | |||
while true do | |||
local word1 = func1(static1, var1) | |||
local word2 = func2(static2, var2) | |||
if word1 == nil then return true end | |||
if word2 == nil then return false end | |||
if word1 ~= word2 then | |||
local lower1 = lower(word1) | |||
local lower2 = lower(word2) | |||
local noTones1 = export.removeDiacritics(lower1, true, false, false) | |||
local noTones2 = export.removeDiacritics(lower2, true, false, false) | |||
-- Compare base letters. | |||
if noTones1 ~= noTones2 then | |||
return export.compWord(noTones1, noTones2) | |||
end | |||
-- Compare letters case-insensitively. | |||
if lower1 ~= lower2 then | |||
return export.compWord(lower1, lower2) | |||
end | |||
-- Compare letters including tones. | |||
assert(word1 ~= word2) | |||
return export.compWord(word1, word2) | |||
end | |||
end | |||
end | |||
return text1 < text2 | |||
end | |||
-- pruby variable for phien thiet hyperlinks (used by export.readings() and export.ruby()) | |||
local pruby = {} | |||
---Abbreviations and text for Han tu references (used by export.createRefTag()) | |||
---[[Wiktionary:Beer parlour/2018/December#References for Vietnamese readings listed under Template:vi-readings]] | ---[[Wiktionary:Beer parlour/2018/December#References for Vietnamese readings listed under Template:vi-readings]] | ||
export.refAbbreviations = { | |||
tvctdhv = "Trần (1999)"; | tvctdhv = "Trần (1999)"; | ||
hvttd = "Nguyễn (1974)"; | hvttd = "Nguyễn (1974)"; | ||
| Dòng 148: | Dòng 205: | ||
---Creates a ref tag containing [[Template:vi-ref]]. | ---Creates a ref tag containing [[Template:vi-ref]]. | ||
---Expands abbreviations using | ---Expands abbreviations using export.refAbbreviations. | ||
function | function export.createRefTag(ref) | ||
local refFullName = | local refFullName = export.refAbbreviations[ref] or ref | ||
return mw.getCurrentFrame():extensionTag | return mw.getCurrentFrame():extensionTag("ref", format("{{vi-ref|%s.}}", refFullName), {name = ref}) | ||
end | end | ||
---[[Template:vi-readings]] | ---[[Template:vi-readings]] | ||
function | function export.readings(hanviet, nom, rs, phienthiet, reading) | ||
local pagename = mw.title.getCurrentTitle().text | local pagename = mw.title.getCurrentTitle().text | ||
if type(hanviet) == "table" then | if type(hanviet) == "table" then | ||
| Dòng 174: | Dòng 225: | ||
{ | { | ||
link = "Hán Việt", | link = "Hán Việt", | ||
cat = "Vietnamese | cat = "Vietnamese Chữ Hán", | ||
list = hanviet and mw.text.split(hanviet, "%s*,%s*"), | list = hanviet and mw.text.split(hanviet, "%s*,%s*"), | ||
phienthiet = phienthiet and mw.text.split(phienthiet, "%s*,%s*") | phienthiet = phienthiet and mw.text.split(phienthiet, "%s*,%s*") | ||
| Dòng 192: | Dòng 243: | ||
if style.list and #style.list > 0 and #style.list[1] > 0 then | if style.list and #style.list > 0 and #style.list[1] > 0 then | ||
local readings = style.list | local readings = style.list | ||
-- table.sort(readings, | -- table.sort(readings, export.comp) | ||
for j, reading in ipairs(readings) do | for j, reading in ipairs(readings) do | ||
local ref | local ref | ||
local a, b = | local a, b = match(reading, "(.-)%s*%-%s*(.+)") | ||
if a then | if a then | ||
reading, ref = a, b | reading, ref = a, b | ||
end | end | ||
local spellings = | local spellings = export.allSpellings(reading, true) | ||
readings[j] = table.concat(spellings, "/") | readings[j] = table.concat(spellings, "/") | ||
| Dòng 207: | Dòng 258: | ||
if style.phienthiet and style.phienthiet[j] then | if style.phienthiet and style.phienthiet[j] then | ||
pruby = "link" | pruby = "link" | ||
local ruby = | local ruby = export.ruby(match(mw.text.trim(style.phienthiet[j]), | ||
"(%a+) +(.+)")) | "(%a+) +(.+)")) | ||
pruby = {} | pruby = {} | ||
if ruby then | if ruby then | ||
pruby = "nocolor" | pruby = "nocolor" | ||
local suffix = | local suffix = export.ruby("切", "thiết") | ||
pruby = {} | pruby = {} | ||
readings[j] = | readings[j] = format("%s (%s[[fanqie#English|%s]])", | ||
readings[j], ruby, suffix) | readings[j], ruby, suffix) | ||
end | end | ||
| Dòng 222: | Dòng 273: | ||
if ref then | if ref then | ||
for ref in mw.text.gsplit(ref, "%s*;%s*") do | for ref in mw.text.gsplit(ref, "%s*;%s*") do | ||
readings[j] = readings[j] .. | readings[j] = readings[j] .. export.createRefTag(ref) | ||
end | end | ||
end | end | ||
| Dòng 229: | Dòng 280: | ||
local sortkey = rs or mw.title.getCurrentTitle().text | local sortkey = rs or mw.title.getCurrentTitle().text | ||
readings = table.concat(readings, ", ") | readings = table.concat(readings, ", ") | ||
table.insert(lines, | table.insert(lines, format("<span class='Hani' lang='vi' style='font-size: 135%%;'>%s</span>: '''[[%s]]''' readings: %s[[Category:%s|%s]] [[Category:Vietnamese lemmas]] [[Category:Vietnamese Han characters]]</br>", | ||
pagename, style.link, readings, style.cat, sortkey)) | pagename, style.link, readings, style.cat, sortkey)) | ||
end | end | ||
| Dòng 239: | Dòng 290: | ||
---[[Template:vi-ruby]] | ---[[Template:vi-ruby]] | ||
function | function export.ruby(characters, readings, mark, alts) | ||
if type(characters) == "table" then | if type(characters) == "table" then | ||
local args = characters:getParent().args | local args = characters:getParent().args | ||
| Dòng 254: | Dòng 305: | ||
end | end | ||
readings = mw.text.split(readings, "[^" .. | readings = mw.text.split(readings, "[^" .. export.letters .. "]+") | ||
local result = {} | local result = {} | ||
local character_idx = 1 | local character_idx = 1 | ||
local alt_idx = 1 | local alt_idx = 1 | ||
for character in | for character in gmatch(characters, ".") do | ||
local is_alt = false | local is_alt = false | ||
if character == "*" and alts[alt_idx] then | if character == "*" and alts[alt_idx] then | ||
| Dòng 266: | Dòng 317: | ||
alt_idx = alt_idx + 1 | alt_idx = alt_idx + 1 | ||
end | end | ||
if is_alt or ( | if is_alt or (match(character, "^%a$") and not character:match("^%w$")) then | ||
local reading = readings[character_idx] | local reading = readings[character_idx] | ||
if mark and character == mark then | if mark and character == mark then | ||
character = | character = format("<mark>%s</mark>", character) | ||
reading = | reading = format("<mark>%s</mark>", reading) | ||
end | end | ||
if pruby == 'link' then | if pruby == 'link' then | ||
character = | character = format( | ||
"<ruby | "<ruby><rb><span class='Hani'; span style='font-size: 100%%'>[[%s#Vietnamese|%s]]</span></rb><rp>(</rp><rt><span style='padding: 0 0.25em; font-size: 135%%;'>[[%s#Vietnamese|%s]]</span></rt><rp>)</rp></ruby>", | ||
character, character, reading, reading) | character, character, reading, reading) | ||
end | end | ||
if pruby == 'nocolor' then | if pruby == 'nocolor' then | ||
character = | character = format( | ||
"<ruby | "<ruby><rb><span class='Hani' style='color:var(--wikt-palette-black, #202122);'>%s</span></rb><rp>(</rp><rt><span style='padding: 0 0.25em; font-size: 125%%;'>%s</span></rt><rp>)</rp></ruby>", | ||
character, reading) | character, reading) | ||
end | end | ||
if pruby ~= 'link' and pruby ~= 'nocolor' then | if pruby ~= 'link' and pruby ~= 'nocolor' then | ||
character = | character = format( | ||
"<ruby | "<ruby><rb><span class='Hani'>%s</span></rb><rp>(</rp><rt><span style='padding: 0 0.25em;'>%s</span></rt><rp>)</rp></ruby>", | ||
character, reading) | character, reading) | ||
end | end | ||
| Dòng 291: | Dòng 342: | ||
table.insert(result, character) | table.insert(result, character) | ||
end | end | ||
return | return format("<span lang='vi' style='font-size: 135%%;'>%s</span>", table.concat(result)) | ||
end | end | ||
function | function export.hantutab() | ||
local hantu = | local HaniChars = require("Module:scripts").getByCode("Hani"):getCharacters() | ||
local hantu = gsub(mw.title.getCurrentTitle().text, '[^' .. HaniChars .. ']', '') | |||
local table_head = '<table class="floatright wikitable" style="text-align:center; font-size:small;"><tr><th colspan="' .. | local table_head = '<table class="floatright wikitable" style="text-align:center; font-size:small;"><tr><th colspan="' .. | ||
len(hantu) .. | |||
'" style="font-weight:normal;">[[Hán | '" style="font-weight:normal;">[[Hán Nôm|chữ Hán Nôm]] in this term</th></tr><tr lang="vi" class="Hani" style="font-size:2em; background:var(--wikt-palette-white,#ffffff); color:inherit; line-height:1em;">' | ||
return table_head .. | return table_head .. | ||
gsub(hantu, '(.)', '<td style="padding:0.5em;">[[%1#Vietnamese|%1]]</td>') .. | |||
'</tr></table>' | '</tr></table>' | ||
end | end | ||
---Returns the categories indicated by the given wikitext. | ---Returns the categories indicated by the given wikitext. | ||
function | function export.classifierCategories(frame) | ||
local src = frame.args[1] | local src = frame.args[1] | ||
local classifiers = {} | local classifiers = {} | ||
for classifier in | for classifier in gmatch(gsub(src, "<[^>]->", ""), "[" .. export.letters .. "]+") do | ||
if classifier ~= "l" and classifier ~= "vi" and classifier ~= "vi-l" and | if classifier ~= "l" and classifier ~= "vi" and classifier ~= "vi-l" and | ||
classifier ~= "Vietnamese" then | classifier ~= "Vietnamese" then | ||
local cat = | local cat = format("[[Category:Vietnamese nouns classified by %s]]", | ||
classifier) | classifier) | ||
table.insert(classifiers, cat) | table.insert(classifiers, cat) | ||
| Dòng 319: | Dòng 371: | ||
end | end | ||
function | function export.new(frame) | ||
local title = mw.title.getCurrentTitle().subpageText | local title = mw.title.getCurrentTitle().subpageText | ||
local args = frame:getParent().args | local args = frame:getParent().args | ||
| Dòng 338: | Dòng 390: | ||
local picc = args["picc"] or false | local picc = args["picc"] or false | ||
nom = nom and | nom = nom and gsub(nom, "(.)", "[[%1]], ") or false | ||
nom = nom and | nom = nom and gsub(nom, ", $", "") or false | ||
if args["h"] then | if args["h"] then | ||
etym = "{{vi-etym-sino|" .. args["h"] .. "}}." | etym = "{{vi-etym-sino|" .. args["h"] .. "}}." | ||
end | end | ||
if not etym and | if not etym and match(title, " ") then | ||
etym = "{{com|vi" | etym = "{{com|vi" | ||
for word in mw.text.gsplit(title, " ") do | for word in mw.text.gsplit(title, " ") do | ||
| Dòng 351: | Dòng 403: | ||
end | end | ||
if etym == "-" then etym = false end | if etym == "-" then etym = false end | ||
if etym then etym = | if etym then etym = gsub(etym, "^%<", "From") end | ||
local result = "" | local result = "" | ||
| Dòng 364: | Dòng 416: | ||
["cl"] = "Classifier", ["cls"] = "Classifier", ["num"] = "Numeral", ["abb"] = "Abbreviation", ["deter"] = "Determiner" | ["cl"] = "Classifier", ["cls"] = "Classifier", ["num"] = "Numeral", ["abb"] = "Abbreviation", ["deter"] = "Determiner" | ||
}; | }; | ||
return pos_title[text] or | return pos_title[text] or upper(sub(text, 1, 1)) .. sub(text, 2, -1) | ||
end | end | ||
| Dòng 403: | Dòng 455: | ||
(args["wp"] == "y" and "" or "|" .. args["wp"]) .. "}}" end | (args["wp"] == "y" and "" or "|" .. args["wp"]) .. "}}" end | ||
if pic then result = result .. "\n[[File:" .. pic .. "|thumb|" .. | if pic then result = result .. "\n[[File:" .. pic .. "|thumb|" .. | ||
(picc or | (picc or gsub(title, '^%l', upper) .. ".") .. "]]" end | ||
result = result .. other("alt", "Alternative forms", args) | result = result .. other("alt", "Alternative forms", args) | ||
| Dòng 441: | Dòng 493: | ||
end | end | ||
function | function export.new_der(frame) | ||
local title = mw.title.getCurrentTitle().subpageText | local title = mw.title.getCurrentTitle().subpageText | ||
local data_module = require("Module:vi/vocab-list") | local data_module = require("Module:vi/vocab-list") | ||
| Dòng 465: | Dòng 517: | ||
local vi_sort_module = require("Module:vi-sortkey") | local vi_sort_module = require("Module:vi-sortkey") | ||
local makeSortKey = require("Module: | local makeSortKey = require("Module:memoize")(vi_sort_module.makeSortKey) | ||
table.sort(res, function(term1, term2) return makeSortKey(term1) < makeSortKey(term2) end) | table.sort(res, function(term1, term2) return makeSortKey(term1) < makeSortKey(term2) end) | ||
| Dòng 471: | Dòng 523: | ||
end | end | ||
function | function export.derived(frame) | ||
local tu_lay_note = "<span style=\"padding-left:4px; padding-right:4px\"> </span><span style=\"background:#ffffe0\">(''[[từ láy]]'')</span>" | local tu_lay_note = "<span style=\"padding-left:4px; padding-right:4px\"> </span><span style=\"background:#ffffe0\">(''[[từ láy]]'')</span>" | ||
local m_columns = require("Module:columns") | local m_columns = require("Module:columns") | ||
| Dòng 486: | Dòng 538: | ||
for i, word in ipairs(args) do | for i, word in ipairs(args) do | ||
word, is_tu_lay = | word, is_tu_lay = gsub(word, "%:tl", "") | ||
tu_lay = is_tu_lay > 0 and tu_lay_note or "" | tu_lay = is_tu_lay > 0 and tu_lay_note or "" | ||
local word_parts = mw.text.split( | local word_parts = mw.text.split(gsub(word, "\n", "" ), ":") | ||
table.insert(result, m_links.full_link({ | table.insert(result, m_links.full_link({ | ||
lang = lang, | lang = lang, | ||
| Dòng 496: | Dòng 548: | ||
tu_lay) | tu_lay) | ||
length = math.max( | length = math.max(len(word), length) | ||
end | end | ||
return | return | ||
m_columns. | m_columns.create_list { | ||
(length > 15 and 2 or 3), | column_count = (length > 15 and 2 or 3), | ||
result, | content = result, | ||
header = "''(" .. title_text .. ")'':", | |||
alphabetize = true, | |||
((unfold or #result < 7) and false or true), | collapse = ((unfold or #result < 7) and false or true), | ||
" | class = "columns-bg", | ||
lang = lang | |||
} | |||
end | end | ||
return | return export | ||