Modul:family tree/etymology languages

A Wikiszótárból, a nyitott szótárból

Creates a version of Module:etymology languages/data in which there is just one code per language. Codes that have the same language data are reduced to the most language-codiest one. For instance, Austrian German has three codes (Austrian German, AG., de-AT) by which it can be accessed in etymology templates like {{cog}}. de-AT is chosen as the only language code for Austrian German because it looks the most like a language code.

The following criteria are used successively to weed out candidates for language-codiest code:

  1. The code must consist of letters and hyphens.
  2. The code must not contain an uppercase letter followed by a lowercase letter.
  3. The code must be as short as possible.

This list shows those etymology languages that have multiple codes, together with the code chosen by this module and the other codes:

  • Acadian French (fr-aca): Acadian French, fra-aca
  • American English (en-US): AE., American English
  • Ashtiani (atn): xme-ast
  • Austrian German (de-AT): AG., Austrian German
  • British English (en-GB): BE., British English
  • Canadian French (fr-CA): CF., Canadian French
  • Chakavian Serbo-Croatian (ckm): sh-cha
  • Early Scots (sco-osc): Early Scots, O.Sc., Old Scots
  • Eastern Balochi (bgp): bal-eas
  • Ecclesiastical Latin (la-ecc): EL., Ecclesiastical Latin
  • Gazi (xme-gaz): gzi
  • Guernsey Norman (nrf-grn): roa-grn
  • Insular Scots (sco-ins): Ins.Sc., Insular Scots
  • Jersey Norman (nrf-jer): roa-jer
  • Kajkavian Serbo-Croatian (kjv): sh-kaj
  • Kamviri (bsh-kam): xvi
  • Khunsari (kfm): xme-xun
  • Kölsch (ksh): Kölsch
  • Late Latin (LL): LL., Late Latin, la-lat
  • Lombardic (lng): Lombardic, goh-lng
  • Lunfardo (es-lun): Lunfardo
  • Medieval Latin (la-med): ML, ML., Medieval Latin
  • Middle Iranian (ira-mid): MIr.
  • Middle Scots (sco-smi): Mid.Sc., Middle Scots
  • Natanzi (ntz): xme-nat
  • Nayini (xme-nay): nyq
  • New Latin (la-new): NL., New Latin
  • Northern Scots (sco-nor): Nor.Sc., Northern Scots
  • Old Iranian (ira-old): OIr.
  • Old Italian (it-oit): roa-oit
  • Old Northern French (fro-nor): ONF., Old Northern French
  • Pre-Greek (qfa-sub-grc): pregrc
  • Provençal (oc-pro): prv
  • Renaissance Latin (la-ren): RL., Renaissance Latin
  • Soi (soj): xme-soi
  • Southern Balochi (bal-sou): bcc
  • Southern Scots (sco-sou): Borders Scots, Sou.Sc., Southern Scots
  • Swiss Italian (it-CH): Swiss Italian
  • Switzerland French (fr-CH): Swiss French, Switzerland French
  • Ulster Scots (sco-uls): Uls.Sc., Ulster Scots
  • Vafsi (xme-vaf): vaf
  • Viennese German (de-AT-vie): VG., Viennese German
  • Vulgar Latin (la-vul): VL., Vulgar Latin
  • Zoroastrian Dari (gbz): xme-dar
  • a substrate language originally spoken by the Pygmies (qfa-pyg): pygmy
  • bizánci görög (gkm): Medieval Greek
  • koiné görög (grc-koi): Koine
  • taxonomic name (mul-tax): Tax.

local language_codes = require "Module:languages/code to canonical name"

local function determine_preferred_etymology_language_code(code1, code2)
	if code2:find "^[%a-]+$" then
		if code1:find "^[%a-]+$" then
			if not code2:find "%u%l" then
				if not code1:find "%u%l" then
					if #code2 < #code1 then
						return code2
					else
						-- Prefer nrf-grn and nrf-jer over roa-grn and roa-jer
						-- (Guernsey and Jersey).
						local first_word1, first_word2 =
							code1:match "^[a-z]+", code2:match "^[a-z]+"
						if first_word1 and first_word2
						and language_codes[first_word1] then
							return code1
						else
							return code2
						end
					end
				else
					return code2
				end
			else
				return code1
			end
		else
			return code2
		end
	else
		return code1
	end
end

local function fold(t, accum, func)
	for k, v in pairs(t) do
		accum = func(k, v, accum)
	end
	return accum
end

local function invert(t)
	local inverted = {}
	for k, v in pairs(t) do
		inverted[v] = k
	end
	return inverted
end

return invert(fold(
	require "Module:etymology languages/data",
	{},
	function (code, data, data_to_code)
		if data_to_code[data] then
			local preferred_code = determine_preferred_etymology_language_code(data_to_code[data], code)
			data_to_code[data] = preferred_code
			table.insert(data.codes, code)
		else
			data_to_code[data] = code
			data.codes = { code }
		end
		return data_to_code
	end))