Modul:family tree/etymology languages
Creates a version of Module:etymology languages/data in which there is just one code per language. Codes that have the same language data are reduced to the most language-codiest one. For instance, Austrian German has three codes (Austrian German
, AG.
, de-AT
) by which it can be accessed in etymology templates like {{cog}}
. de-AT
is chosen as the only language code for Austrian German because it looks the most like a language code.
The following criteria are used successively to weed out candidates for language-codiest code:
- The code must consist of letters and hyphens.
- The code must not contain an uppercase letter followed by a lowercase letter.
- The code must be as short as possible.
This list shows those etymology languages that have multiple codes, together with the code chosen by this module and the other codes:
- Acadian French (
fr-aca
):Acadian French
,fra-aca
- American English (
en-US
):AE.
,American English
- Ashtiani (
atn
):xme-ast
- Austrian German (
de-AT
):AG.
,Austrian German
- British English (
en-GB
):BE.
,British English
- Canadian French (
fr-CA
):CF.
,Canadian French
- Chakavian Serbo-Croatian (
ckm
):sh-cha
- Early Scots (
sco-osc
):Early Scots
,O.Sc.
,Old Scots
- Eastern Balochi (
bgp
):bal-eas
- Ecclesiastical Latin (
la-ecc
):EL.
,Ecclesiastical Latin
- Gazi (
xme-gaz
):gzi
- Guernsey Norman (
nrf-grn
):roa-grn
- Insular Scots (
sco-ins
):Ins.Sc.
,Insular Scots
- Jersey Norman (
nrf-jer
):roa-jer
- Kajkavian Serbo-Croatian (
kjv
):sh-kaj
- Kamviri (
bsh-kam
):xvi
- Khunsari (
kfm
):xme-xun
- Kölsch (
ksh
):Kölsch
- Late Latin (
LL
):LL.
,Late Latin
,la-lat
- Lombardic (
lng
):Lombardic
,goh-lng
- Lunfardo (
es-lun
):Lunfardo
- Medieval Latin (
la-med
):ML
,ML.
,Medieval Latin
- Middle Iranian (
ira-mid
):MIr.
- Middle Scots (
sco-smi
):Mid.Sc.
,Middle Scots
- Natanzi (
ntz
):xme-nat
- Nayini (
xme-nay
):nyq
- New Latin (
la-new
):NL.
,New Latin
- Northern Scots (
sco-nor
):Nor.Sc.
,Northern Scots
- Old Iranian (
ira-old
):OIr.
- Old Italian (
it-oit
):roa-oit
- Old Northern French (
fro-nor
):ONF.
,Old Northern French
- Pre-Greek (
qfa-sub-grc
):pregrc
- Provençal (
oc-pro
):prv
- Renaissance Latin (
la-ren
):RL.
,Renaissance Latin
- Soi (
soj
):xme-soi
- Southern Balochi (
bal-sou
):bcc
- Southern Scots (
sco-sou
):Borders Scots
,Sou.Sc.
,Southern Scots
- Swiss Italian (
it-CH
):Swiss Italian
- Switzerland French (
fr-CH
):Swiss French
,Switzerland French
- Ulster Scots (
sco-uls
):Uls.Sc.
,Ulster Scots
- Vafsi (
xme-vaf
):vaf
- Viennese German (
de-AT-vie
):VG.
,Viennese German
- Vulgar Latin (
la-vul
):VL.
,Vulgar Latin
- Zoroastrian Dari (
gbz
):xme-dar
- a substrate language originally spoken by the Pygmies (
qfa-pyg
):pygmy
- bizánci görög (
gkm
):Medieval Greek
- koiné görög (
grc-koi
):Koine
- taxonomic name (
mul-tax
):Tax.
local language_codes = require "Module:languages/code to canonical name"
local function determine_preferred_etymology_language_code(code1, code2)
if code2:find "^[%a-]+$" then
if code1:find "^[%a-]+$" then
if not code2:find "%u%l" then
if not code1:find "%u%l" then
if #code2 < #code1 then
return code2
else
-- Prefer nrf-grn and nrf-jer over roa-grn and roa-jer
-- (Guernsey and Jersey).
local first_word1, first_word2 =
code1:match "^[a-z]+", code2:match "^[a-z]+"
if first_word1 and first_word2
and language_codes[first_word1] then
return code1
else
return code2
end
end
else
return code2
end
else
return code1
end
else
return code2
end
else
return code1
end
end
local function fold(t, accum, func)
for k, v in pairs(t) do
accum = func(k, v, accum)
end
return accum
end
local function invert(t)
local inverted = {}
for k, v in pairs(t) do
inverted[v] = k
end
return inverted
end
return invert(fold(
require "Module:etymology languages/data",
{},
function (code, data, data_to_code)
if data_to_code[data] then
local preferred_code = determine_preferred_etymology_language_code(data_to_code[data], code)
data_to_code[data] = preferred_code
table.insert(data.codes, code)
else
data_to_code[data] = code
data.codes = { code }
end
return data_to_code
end))