Module:Peter Bowman/transliterator

From Wikipedia

Documentation for this module may be created at Module:Peter Bowman/transliterator/doc

local p = {}

local commonTransliteration = {
    ["-"] = "-",
    ["="] = "=",
    [","] = ",",
    ["."] = ".",
    ["/"] = "/",
    [";"] = ";",
    ["'"] = "'",
    ["["] = "[",
    ["]"] = "]",
    ["\\"] = "\\",
    ["`"] = "`",
    ["~"] = "~",
    ["!"] = "!",
    ["@"] = "@",
    ["#"] = "#",
    ["$"] = "$",
    ["%"] = "%",
    ["^"] = "^",
    ["&"] = "&",
    ["*"] = "*",
    ["("] = "(",
    [")"] = ")",
    ["_"] = "_",
    ["+"] = "+",
    ["{"] = "{",
    ["}"] = "}",
    ["|"] = "|",
    [":"] = ":",
    ["\""] = "\"",
    ["<"] = "<",
    [">"] = ">",
    ["?"] = "?" ,
    [" "] = " "
}

local transliterationHi = {
    ["अ"] = "a",
    ["आ"] = "ā",
    ["इ"] = "i",
    ["ई"] = "ī",
    ["उ"] = "u",
    ["ऊ"] = "ū",
    ["ऋ"] = "ṛ",
    ["ॠ"] = "ṝ",
    ["ऌ"] = "ḷ",
    ["ॡ"] = "ḹ",
    ["ए"] = "e",
    ["ऐ"] = "ai",
    ["ओ"] = "o",
    ["औ"] = "au",
    ["अं"] = "ã", -- !
    ["अः"] = "aḥ", -- !
    ["अँ"] = "ã", -- !
    ["क"] = "k",
    ["च"] = "c",
    ["ट"] = "ṭ",
    ["त"] = "t",
    ["प"] = "p",
    ["ख"] = "kh",
    ["छ"] = "ch",
    ["ठ"] = "ṭh",
    ["थ"] = "th",
    ["फ"] = "ph",
    ["ग"] = "g",
    ["ज"] = "j",
    ["ड"] = "ḍ",
    ["द"] = "d",
    ["ब"] = "b",
    ["घ"] = "gh",
    ["झ"] = "jh",
    ["ढ"] = "ḍh",
    ["ध"] = "dh",
    ["भ"] = "bh",
    ["ङ"] = "ṅ",
    ["ञ"] = "ñ",
    ["ण"] = "ṇ",
    ["न"] = "n",
    ["म"] = "m",
    ["य"] = "y",
    ["र"] = "r",
    ["ल"] = "l",
    ["व"] = "v",
    ["श"] = "ś",
    ["ष"] = "ṣ",
    ["स"] = "s",
    ["ह"] = "h",
    ["क्"] = "k",
    ["त्"] = "t",
    ["ज्"] = "j",
    ["श्"] = "ś",
    ["क़"] = "q",
    ["ख़"] = "ḵẖ",
    ["ग़"] = "ġ",
    ["ज़"] = "z",
    ["फ़"] = "f",
    ["ड़"] = "ṛ",
    ["ढ़"] = "ṛh"
}

local transliterations = {
    ["hi"] = transliterationHi
}

function sanitizeText( text ) 
    local result = text
    result = string.gsub( result, "&#39;", "'" )
    result = string.gsub( result, "&quot;", "\"" )
    result = string.gsub( result, "&amp;", "&" )
    return result
end

function p.transliterate( frame )
        local language = frame.args[1]
        local text = sanitizeText( frame.args[2] )
        local transliteration = transliterations[ language ]
        
        if transliteration == nil then
            return "Błędny kod języka: " .. language
        end
        
        local result = {}
        local cache = {}
        
        for codepoint in mw.ustring.gcodepoint( text ) do
            local untransliterated = mw.ustring.char( codepoint )
            local transliterated = commonTransliteration[ untransliterated ]
            
            if transliterated == nil then
                transliterated = transliteration[ untransliterated ]
            end
            
            if type( transliterated ) == "function" then
            	transliterated( result, cache )
            elseif transliterated == nil then
                return "Nieprawidłowy znak " .. untransliterated .. " dla języka o kodzie " .. language .. "."
            else
                result[ #result+1 ] = transliterated
            end
        end
        
        return table.concat( result )
end

return p