Module:Ancient Greek/typing
From Historical Hastings
Documentation for this module may be created at Module:Ancient Greek/typing/doc
local p = {} local sparse_concat = require("Module:TableTools").sparseConcat local ustring = mw.ustring local U = ustring.char local get_codepoint = ustring.codepoint local ufind = ustring.find local ugsub = ustring.gsub local decompose = ustring.toNFD local str_gsub = string.gsub local UTF8_char = "[%z\1-\127\194-\244][\128-\191]*" -- roughly equivalent to "." in Ustring patterns local one_UTF8_char_or_none = "[%z\1-\127\194-\244]?[\128-\191]*" -- roughly equivalent to ".?" in Ustring patterns local subscript = U(0x345) -- iota subscript (ypogegrammeni) local macron = U(0x304) -- macron local spacing_macron = U(0xAF) local modifier_macron = U(0x2C9) -- modifier letter macron local breve = U(0x306) -- breve local spacing_breve = "˘" -- spacing breve local diaeresis = U(0x308) -- diaeresis local rough = U(0x314) -- rough breathing (reversed comma) local smooth = U(0x313) -- smooth breathing (comma) local acute = U(0x301) -- acute local grave = U(0x300) -- grave local circumflex = U(0x342) -- Greek circumflex (perispomeni) local question_mark = U(0x37E) -- Greek question mark local spacing_rough = "῾" -- spacing rough breathing local spacing_smooth = "᾿" -- spacing smooth breathing local combining_diacritic = table.concat{ "[", macron, breve, rough, smooth, diaeresis, acute, grave, circumflex, subscript, "]", } -- The numbers are used to sort series of diacritics. local diacritic_position = { [macron] = 1, [breve] = 2, [rough] = 3, [smooth] = 3, [diaeresis] = 3, [acute] = 4, [grave] = 4, [circumflex] = 4, [subscript] = 5, } -- Perform a function on each Unicode character in a string. local function for_each(str, func) for char in string.gmatch(str, UTF8_char) do func(char) end end --[=[ This function arranges diacritics in the following order: 1. macron or breve 2. breathings or diaeresis 3. acute, circumflex, or grave 4. iota subscript Used by [[Module:typing-aids]]. Returns an error if a sequence of diacritics contains more than one of each category. ]=] local function get_relative_position(diacritic1, diacritic2) return diacritic_position[diacritic1] < diacritic_position[diacritic2] end local function chars_to_table(chars) local t = {} local i = 0 for char in string.gmatch(chars, "[%z\1-\127\194-\244][\128-\191]*") do i = i + 1 t[i] = char end return t end local function reorder_diacritic_sequence(diacritics) diacritics = chars_to_table(diacritics) table.sort(diacritics, get_relative_position) return table.concat(diacritics) end function p.reorder_diacritics(text) return (ugsub(decompose(text), combining_diacritic .. combining_diacritic .. "+", reorder_diacritic_sequence)) end local multiple = { ["_i"] = subscript, } local single = { ["a"] = "α", ["A"] = "Α", ["b"] = "β", ["B"] = "Β", ["c"] = "ξ", ["C"] = "Ξ", ["d"] = "δ", ["D"] = "Δ", ["e"] = "ε", ["E"] = "Ε", ["f"] = "φ", ["F"] = "Φ", ["g"] = "γ", ["G"] = "Γ", ["h"] = "η", ["H"] = "Η", ["i"] = "ι", ["I"] = "Ι", ["k"] = "κ", ["K"] = "Κ", ["l"] = "λ", ["L"] = "Λ", ["m"] = "μ", ["M"] = "Μ", ["n"] = "ν", ["N"] = "Ν", ["o"] = "ο", ["O"] = "Ο", ["p"] = "π", ["P"] = "Π", ["q"] = "θ", ["Q"] = "Θ", ["r"] = "ρ", ["R"] = "Ρ", ["s"] = "σ", ["S"] = "Σ", ["t"] = "τ", ["T"] = "Τ", ["u"] = "υ", ["U"] = "Υ", ["v"] = "ϝ", ["V"] = "Ϝ", ["w"] = "ω", ["W"] = "Ω", ["x"] = "χ", ["X"] = "Χ", ["y"] = "ψ", ["Y"] = "Ψ", ["z"] = "ζ", ["Z"] = "Ζ", -- vowel length ["_"] = macron, [spacing_macron] = macron, [modifier_macron] = macron, ["^"] = breve, [spacing_breve] = breve, -- diaeresis and breathings ["+"] = diaeresis, ["("] = rough, [")"] = smooth, -- accents ["/"] = acute, ["\\"] = grave, ["="] = circumflex, ["{{=}}"] = circumflex, ["~"] = circumflex, -- punctuation ["'"] = "’", ["?"] = question_mark, [";"] = "·", ["*"] = "", -- place after s to prevent it from turning into final sigma -- pipe ["!"] = "|", } local function convert_s_to_sigma(text) text = str_gsub(text, "s(" .. one_UTF8_char_or_none .. ")", function (following) return ((following == "" or following ~= "*" and following ~= "-" and ufind(following, "[%s%p]")) and "ς" or "σ") .. following end) return text end local function combining_to_spacing(text) for _, accents in ipairs{ { rough, spacing_rough }, { smooth, spacing_smooth } } do local combining, spacing = unpack(accents) text = str_gsub(text, "(" .. one_UTF8_char_or_none .. ")" .. combining, function (preceding) if preceding == "" then return spacing else return preceding .. combining end end) end return text end function p.to_Greek(text) if type(text) ~= "string" then error("first argument to to_greek should be string, not " .. type(text)) end text = convert_s_to_sigma(text) for k, v in pairs(multiple) do text = str_gsub(text, k, v) end text = str_gsub(text, UTF8_char, single) text = combining_to_spacing(text) return p.reorder_diacritics(text) end function p.to_Greek_t(frame) local args = {} for k, v in pairs(frame:getParent().args) do if k == 1 then v = mw.text.trim(v) if v == "" then v = nil end args[k] = v end end if not args[1] then if mw.title.getCurrentTitle().nsText == "Template" then args[1] = "le/cis" else error("Parameter 1 is required.") end end return p.to_Greek(args[1]) end local function process(char) if char == "" then return char end local entity = ("&#x%X;"):format(get_codepoint(char)) if diacritic_position[char] then return "◌" .. entity else return entity end end function p.show_shortcuts(frame) local output = { '{| class="wikitable"' } local function comp(item1, item2) -- non-letters after letters if item1:find("^%a$") ~= item2:find("^%a$") then return item1:find("^%a$") end local lower1, lower2 = item1:lower(), item2:lower() -- capitals before lowercase if lower1 == lower2 then return item1 < item2 -- otherwise case-insensitive sorting else return lower1 < lower2 end end local i = 1 for k, v in require("Module:TableTools").sortedPairs(single, comp) do i = i + 1 output[i] = '| <code>' .. k .. '</code> || <span lang="grc">' .. process(v) .. '</span>' if i % 3 == 0 then -- 3 because each row consists of row syntax |- and two pairs of cells i = i + 1 output[i] = '|-' end end table.insert(output, '|}') return table.concat(output, '\n') end return p