Module:Language/data

local U = mw.ustring.char

-- diacritics local grave       = U(0x300) local acute       = U(0x301) local double_acute = U(0x30B) local tilde       = U(0x303) local macron      = U(0x304) local dgrave      = U(0x30F) local invbreve    = U(0x311)

-- Name is the "canonical name" used on Wiktionary. Article is the Wikipedia article. Script is the ISO 15924 code.

local data = { ["languages"] = { ["ang"] = { ["name"] = "Old English", ["article"] = {"Old English"}, -- ["scripts"] = {"Latn"}, -- Remove macrons, acutes, and overdots ["replacements"] = { ["[ĀÁ]"] = "A", ["[āá]"] = "a", ["[ǢǼ]"] = "Æ", ["[ǣǽ]"] = "æ", ["Ċ"]   = "C", ["ċ"]   = "c", ["[ĒÉ]"] = "E", ["[ēé]"] = "e", ["Ġ"]   = "G", ["ġ"]   = "g", ["[ĪÍ]"] = "I", ["[īí]"] = "i", ["[ŌÓ]"] = "O", ["[ōó]"] = "o", ["[ŪÚ]"] = "U", ["[ūú]"] = "u", ["[ȲÝ]"] = "Y", ["[ȳý]"] = "y", },		},		["ar"] = { ["name"] = "Arabic", ["article"] = "Arabic language", -- ["scripts"] = { "Arab" }, -- ālif with wasla is replaced by ālif;				taṭwīl, fatḥatan, ḍammatan, kasratan,				fatḥa, ḍamma, kasra,				shadda, sukūn, and superscript (dagger) ālif are removed. ["direction"] = "rtl", -- Should be in the script data module. ["replacements"] = { [U(0x0671)] = U(0x0627), ["["..U(0x0640)..U(0x064B)..U(0x064C)..U(0x064D) ..U(0x064E)..U(0x064F)..U(0x0650) ..U(0x0651)..U(0x0652)..U(0x0670).."]"] = "", },		},		["be"] = { ["article"] = "Belarusian language", -- ["scripts"] = { "Cyrl" }, -- Combining acute accent is removed. ["replacements"] = { [U(0x0301)] = "", }, },		["bn"] = { ["name"] = "Bengali", ["article"] = "Bengali language", -- ["scripts"] = { "Beng" }, },		["cu"] = { ["name"] = "Old Church Slavonic", ["article"] = "Old Church Slavonic", -- ["scripts"] = { "Cyrs" }, },		["de"] = { ["name"] = "German", ["article"] = "German language", -- ["scripts"] = { "Latn" }, --			["replacements"] = {				["ae"]   = "ä",				["oe"]    = "ö",				["ue"]    = "ü",				["A[Ee]"] = "Ä",				["O[Ee]"] = "Ö",				["U[Ee]"] = "Ü",			}, },		["en"] = { ["name"] = "English", ["article"] = "English language", -- ["scripts"] = { "Latn" }, },		["es"] = { ["name"] = "Spanish", ["article"] = "Spanish language", -- ["scripts"] = { "Latn" }, },		["fr"] = { ["name"] = "French", ["article"] = "French language", -- ["scripts"] = { "Latn" }, },		["frm"] = { ["name"] = "Middle French", ["article"] = "Middle French", -- ["scripts"] = { "Latn" }, },		["gem-pro"] = { ["name"] = "Proto-Germanic", ["article"] = "Proto-Germanic language", -- ["scripts"] = { "Latn" }, ["type"] = "reconstructed", ["replacements"] = {}, ["Wikipedia_code"] = "gem-x-proto", },		["grc"] = { ["name"] = "Ancient Greek", ["article"] = "Ancient Greek", -- ["scripts"] = { "Grek" }, ["replacements"] = { -- Vowels with macrons or breves are replaced with plain letters. ["[ᾱᾰ]"] = "α", ["[ᾹᾸ]"] = "Α", ["[ῑῐ]"] = "ι", ["[ῙῘ]"] = "Ι", ["[ῡῠ]"] = "υ", ["[ῩῨ]"] = "Υ", ["ϐ"]   = "β", ["ϵ"]   = "ε", ["ϑ"]   = "θ", ["ϰ"]   = "κ", ["ϱ"]   = "ρ", ["ϲ"]   = "σ", ["ϕ"]   = "φ", },		},		["got"] = { ["name"] = "Gothic", ["article"] = "Gothic language", -- ["scripts"] = { "Goth" }, ["replacements"] = { -- Latin to Gothic since people will not want to have to copy -- and paste Gothic letters in				["[AÁaáĀā]"] = "𐌰", ["[Bb]"]    = "𐌱", ["[Gg]"]    = "𐌲", ["[Dd]"]    = "𐌳", ["[EeĒē]"]  = "𐌴", ["[Qq]"]    = "𐌵", ["[Zz]"]    = "𐌶", ["[Hh]"]    = "𐌷", ["[Þþ]"]    = "𐌸", ["[IiÍí]"]  = "𐌹", ["[Kk]"]    = "𐌺", ["[Ll]"]    = "𐌻", ["[Mm]"]    = "𐌼", ["[Nn]"]    = "𐌽", ["[Jj]"]    = "𐌾", ["[UuÚúŪū]"] = "𐌿", ["[Pp]"]    = "𐍀", ["[Rr]"]    = "𐍂", ["[Ss]"]    = "𐍃", ["[Tt]"]    = "𐍄", ["[WwYy]"]  = "𐍅", ["[Ff]"]    = "𐍆", ["[Xx]"]    = "𐍇", ["[Ƕƕ]"]   = "𐍈", -- Not sure if "hw" and "hv" can safely be converted ["[OoŌō]"]  = "𐍉", },		},		["grk-pro"] = { ["name"] = "Proto-Hellenic", ["Wikipedia_name"] = "Proto-Greek", ["article"] = "Proto-Greek language", -- ["scripts"] = { "Latn" }, ["type"] = "reconstructed", ["replacements"] = {}, },		["hi"] = { ["name"] = "Hindi", ["article"] = "Hindi", -- ["scripts"] = { "Deva" }, },		["ine-pro"] = { ["name"] = "Proto-Indo-European", ["article"] = "Proto-Indo-European language", -- ["scripts"] = { "Latn" }, ["type"] = "reconstructed", ["replacements"] = {}, ["Wikipedia_code"] = "ine-x-proto", },		["ja"] = { ["name"] = "Japanese", ["article"] = "Japanese language", -- ["scripts"] = { "Jpan" }, },		["la"] = { ["name"] = "Latin", ["article"] = "Latin", -- ["scripts"] = { "Latn" }, ["replacements"] = { -- Vowels with macrons, breves, or diaereses are replaced with plain letters. ["[ĀĂ]"] = "A", ["[āă]"] = "a", ["[ĒĔ]"] = "E", ["[ēĕë]"] = "e", ["[ĪĬÏ]"] = "I", ["[īĭï]"] = "i", ["[ŌŎ]"] = "O", ["[ōŏ]"] = "o", ["[ŪŬÜ]"] = "U", ["[ūŭü]"] = "u", ["Ȳ"]    = "Y", ["ȳ"]    = "y" },		},		["mul"] = { ["name"] = "Translingual", ["article"] = "", -- ["scripts"] = { "" }, },		["orv"] = { ["name"] = "Old East Slavic", ["article"] = "Old East Slavic", -- ["scripts"] = { "Cyrs" }, ["replacements"] = { [U(0x484)] = "", },		},		["pt"] = { ["name"] = "Portuguese", ["article"] = "Portuguese language", -- ["scripts"] = { "Latn" }, },		["pa"] = { ["name"] = "Punjabi", ["article"] = "Punjabi language", -- ["scripts"] = { "Guru", "Arab", }, },		["ru"] = { ["name"] = "Russian", ["article"] = "Russian language", -- ["scripts"] = { "Cyrl" }, -- Combining acute accent is removed. ["replacements"] = { [U(0x0301)] = "", }, },		["se"] = { ["replacements"] = { ["([đflmnŋrsšŧv])'%1"] = "%1%1", },		},		["sh"] = { ["article"] = "Serbo-Croatian language", -- ["scripts"] = { "Latn", "Cyrl" }, ["replacements"] = { ["[ȀÀȂÁĀÃ]"]	= "A", ["[ȁàȃáāã]"]	= "a", ["[ȄÈȆÉĒẼ]"]	= "E", ["[ȅèȇéēẽ]"]	= "e", ["[ȈÌȊÍĪĨ]"]	= "I", ["[ȉìȋíīĩ]"]	= "i", ["[ȌÒȎÓŌÕ]"]	= "O", ["[ȍòȏóōõ]"]	= "o", ["[ȐȒŔ]"]		= "R", ["[ȑȓŕ]"]		= "r", ["[ȔÙȖÚŪŨ]"]	= "U", ["[ȕùȗúūũ]"]	= "u", ["Ѐ"]			= "Е", ["ѐ"]			= "е", ["[ӢЍ]"]		= "И", ["[ӣѝ]"]		= "и", ["[Ӯ]"]			= "У", ["[ӯ]"]			= "у" },		},		["sla-pro"] = { ["name"] = "Proto-Slavic", -- also Common Slavic ["type"] = "reconstructed", -- ["scripts"] = { "Latn" }, ["replacements"] = { ["[ÀÁÃĀȀȂ]"] = "A", ["[àáãāȁȃ]"] = "a", ["[ÈÉẼĒȄȆ]"] = "E", ["[èéẽēȅȇ]"] = "e", ["[ÌÍĨĪȈȊ]"] = "I", ["[ìíĩīȉȋ]"] = "i", ["[ÒÓÕŌȌȎŐ]"] = "O", ["[òóõōȍȏő]"] = "o", ["[ÙÚŨŪȔȖŰ]"] = "U", ["[ùúũūȕȗű]"] = "u", ["[ỲÝỸȲ]"] = "Y", ["[ỳýỹȳ]"] = "y", ["Ǭ"] = "Ǫ", ["ǭ"] = "ǫ", ["[" .. grave .. acute .. double_acute .. tilde .. macron .. dgrave .. invbreve .. "]"] = "",			},		},		["uk"] = { ["article"] = "Ukrainian language", -- ["scripts"] = { "Cyrl" }, -- Combining acute accent is removed. ["replacements"] = { [U(0x0301)] = "", } },		["ur"] = { ["name"] = "Urdu", ["article"] = "Urdu", -- ["scripts"] = { "Arab" }, },		["zh"] = { ["name"] = "Chinese", ["article"] = "Chinese language", -- ["scripts"] = { "Hani" }, },		["xcl"] = { ["name"] = "Old Armenian", ["article"] = "Classical Armenian", -- ["scripts"] = { "Armn" }, ["replacements"] = { ["[՞՜՛՟]"] = "",				["և"] = "եւ", },		},		["xvn"] = { ["name"] = "Vandalic", ["article"] = "Vandalic language", -- ["scripts"] = { "Latn" }, }, --[[		[""] = {			["name"] = "",			["article"] = "",			-- ["scripts"] = { "" },			},		[""] = {			["name"] = "",			["article"] = "",			-- ["scripts"] = { "" },			["replacements"] = {				},			},

]]	},	["redirects"] = { ["gem"] = "gem-pro", -- Not correct, but is commonly used. ["gem-x-proto"] = "gem-pro", ["ine"] = "ine-pro", -- Not correct, but might be commonly used. ["ine-x-proto"] = "ine-pro", }, }

return data