Module:LanguageifyLinks

From Wikipedia

Documentation for this module may be created at Module:LanguageifyLinks/doc

local z = {}

-- Given a string containing the UTF-8 representation of a Unicode
-- character, returns the code-point of that character, expressed as an
-- integer. (N.B. GIGO.)
function z.unicodeCharacterToCodePoint(uc)
    local a, b, c, d = 0, 0, 0, 0
    if uc:len() <= 1 then
        d = uc:byte(1) or 0
    elseif uc:len() == 2 then
        c, d = uc:byte(1) - 0xC0, uc:byte(2) - 0x80
    elseif uc:len() == 3 then
        b, c, d = uc:byte(1) - 0xE0, uc:byte(2) - 0x80, uc:byte(3) - 0x80
    elseif uc:len() == 4 then
        a, b, c, d = uc:byte(1) - 0xF0, uc:byte(2) - 0x80,
                     uc:byte(3) - 0x80, uc:byte(4) - 0x80
    end
    return ((a * 0x40 + b) * 0x40 + c) * 0x40 + d
end

-- Given a string containing the UTF-8 representation of a sequence of
-- Unicode characters, returns a list of the UTF-8 representations of the
-- individual characters. (N.B. GIGO.)
function z.splitIntoUnicodeCharacters(s)
    local ret = {}
    local i = 1
    for c in s:gmatch('.[\128-\191]*') do
        ret[i] = c
        i = i + 1
    end
    return ret
end

function z.computeCodePoints(frame)
    local args = frame.args
    
    local chars = z.splitIntoUnicodeCharacters(args[1])
    local prefix = args.prefix or ''
    local suffix = args.suffix or ''
    local separator = args.separator or ''
    
    local ret = ''
    for i, v in ipairs(chars) do
        if i > 1 then
            ret = ret .. separator
        end
        ret = ret .. prefix ..z.unicodeCharacterToCodePoint(v) .. suffix
    end
    
    return ret
end

function z.separateChars(frame)
    return table.concat(z.splitIntoUnicodeCharacters(frame.args[1]), ' ')
end

function z.separateBytes(frame)
    return (frame.args[1]:gsub(".", " %1"):gsub("^ ", ""))
end

function z.anchorEnchode(frame)
    local text = frame.args[1]
    
    text = text:gsub("^%s+", ""):gsub("%s+$", ""):gsub("%s+", "_")
    text = text:gsub("[^%w.:_-]", function(character)
        return string.format(".%02X", character:byte())
    end)
    
    return text
end

function z.languageifyLinks(frame)
    local args = frame:getParent().args
    
    local text = args[1] or ""
    local langname = args[2] or "English"
    
    -- handle unpiped wikilinks:
    text = text:gsub("%[%[([^%[%]|{}#]+)%]%]", "[[%1#" .. langname .. "|%1]]")
    -- handled piped wikilinks:
    text = text:gsub("%[%[([^%[%]|{}#]+)|([^%[%]|{}]+)%]%]", "[[%1#" .. langname .. "|%2]]")
    
    return text
end
 
return z