Modul:sa-utilities

A Wikiszótárból, a nyitott szótárból
Magánhangzók
Deva IAST SLP1 Deva IAST SLP1
अ, प a a आ, पा ā A
इ, पि i i ई, पी ī I
उ, पु u u ऊ, पू ū U
ए, पे e e ऐ, पै ai E
ओ, पो o o औ, पौ au O
ऋ, पृ f ॠ, पॄ F
ऌ, पॢ x ॡ, पॣ X
Mássalhangzók
Deva IAST SLP1 Deva IAST SLP1 Deva IAST SLP1 Deva IAST SLP1 Deva IAST SLP1
ka ka ca ca ṭa wa ta ta pa pa
kha Ka cha Ca ṭha Wa tha Ta pha Pa
ga ga ja ja ḍa qa da da ba ba
gha Ga jha Ja ḍha Qa dha Da bha Ba
ṅa Na ña Ya ṇa Ra na na ma ma
ya ya ra ra ḷa La la la va va
ha ha śa Sa ṣa za sa sa
Egyéb jelek
Deva IAST SLP1 Deva IAST SLP1
अं, पं M अँ, पँ ~
अः, पः H '
x Z f V
. . . .
Számok
Deva
IAST & SLP1 0 1 2 3 4 5 6 7 8 9

Lásd még[szerkesztés]


local export = {}

export.consonant_list = "kKgGNcCjJYwWqQRtTdDnpPbBmyrlLvSzsh"
export.consonant = "[" .. export.consonant_list .. "]"
export.accent = "[/\\]"
export.vowel_list = "aAiIuUfFxXeEoO"
export.vowel = "[" .. export.vowel_list .. "]"
export.vowel_with_accent = export.vowel .. export.accent .. "?"

local U = mw.ustring.char
local match = mw.ustring.match
local gsub = mw.ustring.gsub
local sub = mw.ustring.sub
local lower = mw.ustring.lower
local upper = mw.ustring.upper

local function ends_with(text, pattern)
	return match(text, pattern .. "$")
end

local function starts_with(text, pattern)
	return match(text, "^" .. pattern)
end

export.up_one_grade = {
	['a'] = 'A', ['A'] = 'A', ['a/'] = 'A/', ['A/'] = 'A/', ['a\\'] = 'A\\', ['A\\'] = 'A\\',
	['i'] = 'e', ['I'] = 'e', ['i/'] = 'e/', ['I/'] = 'e/', ['i\\'] = 'e\\', ['I\\'] = 'e\\',
	['u'] = 'o', ['U'] = 'o', ['u/'] = 'o/', ['U/'] = 'o/', ['u\\'] = 'o\\', ['U\\'] = 'o\\',
	['e'] = 'E', ['E'] = 'E', ['e/'] = 'E/', ['E/'] = 'E/', ['e\\'] = 'E\\', ['E\\'] = 'E\\',
	['o'] = 'O', ['O'] = 'O', ['o/'] = 'O/', ['O/'] = 'O/', ['o\\'] = 'O\\', ['O\\'] = 'O\\',
	['f'] = 'ar', ['F'] = 'ar', ['f/'] = 'a/r', ['F/'] = 'a/r', ['f\\'] = 'a\\r', ['F\\'] = 'a\\r',
}

export.shorten = {
	['a'] = 'a', ['A'] = 'a', ['a/'] = 'a/', ['A/'] = 'a/', ['a\\'] = 'a\\', ['A\\'] = 'a\\',
	['i'] = 'i', ['I'] = 'i', ['i/'] = 'i/', ['I/'] = 'i/', ['i\\'] = 'i\\', ['I\\'] = 'i\\',
	['u'] = 'u', ['U'] = 'u', ['u/'] = 'u/', ['U/'] = 'u/', ['u\\'] = 'u\\', ['U\\'] = 'u\\',
	['f'] = 'f', ['F'] = 'f', ['f/'] = 'f/', ['F/'] = 'f/', ['f\\'] = 'f\\', ['F\\'] = 'f\\',
}

export.lengthen = {
	['a'] = 'A', ['A'] = 'A', ['a/'] = 'A/', ['A/'] = 'A/', ['a\\'] = 'A\\', ['A\\'] = 'A\\',
	['i'] = 'I', ['I'] = 'I', ['i/'] = 'I/', ['I/'] = 'I/', ['i\\'] = 'I\\', ['I\\'] = 'I\\',
	['u'] = 'U', ['U'] = 'U', ['u/'] = 'U/', ['U/'] = 'U/', ['u\\'] = 'U\\', ['U\\'] = 'U\\',
	['f'] = 'F', ['F'] = 'F', ['f/'] = 'F/', ['F/'] = 'F/', ['f\\'] = 'F\\', ['F\\'] = 'F\\',
}

export.split_diphthong = {
	['e'] = 'ay', ['e/'] = 'a/y', ['e\\'] = 'a\\y',
	['E'] = 'Ay', ['E/'] = 'A/y', ['E\\'] = 'A\\y',
	['o'] = 'av', ['o/'] = 'a/v', ['o\\'] = 'a\\v',
	['O'] = 'Av', ['O/'] = 'A/v', ['O\\'] = 'A\\v',
}

export.semivowel_to_cons = {
	['i'] = 'y', ['I'] = 'y',
	['u'] = 'v', ['U'] = 'v',
	['f'] = 'r', ['F'] = 'r',
	['x'] = 'l', ['X'] = 'l',
}

local insert_glide = {
	['i'] = 'iy', ['I'] = 'iy', ['i/'] = 'i/y', ['I/'] = 'i/y', ['i\\'] = 'i\\y', ['I\\'] = 'i\\y',
	['u'] = 'uv', ['U'] = 'uv', ['u/'] = 'u/v', ['U/'] = 'u/v', ['u\\'] = 'u\\v', ['U\\'] = 'u\\v',
}

local to_final = {
	['k'] = 'k', ['K'] = 'k', ['g'] = 'k', ['G'] = 'k',
	['w'] = 'w', ['W'] = 'w', ['q'] = 'w', ['Q'] = 'w',
	['t'] = 't', ['T'] = 't', ['d'] = 't', ['D'] = 't',
	['p'] = 'p', ['P'] = 'p', ['b'] = 'p', ['B'] = 'p',
	['Y'] = 'N',
}

local dental_to_retroflex = {
	['t'] = 'w', ['T'] = 'W', ['d'] = 'q', ['Q'] = 'Q', ['n'] = 'R',
}

local deaspirate = {
	['K'] = 'k', ['G'] = 'g',
	['C'] = 'c', ['J'] = 'j',
	['W'] = 'w', ['Q'] = 'q',
	['T'] = 't', ['D'] = 'd',
	['P'] = 'p', ['B'] = 'b',
	['h'] = 'g',
}

function export.is_monosyllabic(text)
	return match(text, "^" .. export.consonant .. "*" .. export.vowel .. export.accent .. "?" .. export.consonant .. "*$")
end

local function absolute_final(text, ambig_hint)
	if ends_with(text, export.consonant .. export.consonant) then -- at least 2 consonants
		-- take the first of the cluster
		text = gsub(text, "(" .. export.consonant .. "+)$",
			function(cluster) return sub(cluster, 1, 1) end)
	end
	-- L, v, and y are not handled as they should not appear finally
	if ends_with(text, "[kwtpNRnmlaAiIuUeEoOfFxXH][/\\]?") then
		-- do nothing
	elseif ends_with(text, "M") then -- just in case
		text = gsub(text, ".$", "m")
	elseif ends_with(text, "[sr]") then
		text = gsub(text, ".$", "H")
	elseif ends_with(text, "[KgGWqQTdDPbBY]") then
		text = gsub(text, ".$", to_final)
	elseif ends_with(text, "[cCjJhSz]") then
		text = gsub(text, ".$", ambig_hint)
	end
	return text
end

function export.retroflexion(stem, ending)
	if ends_with(stem, "[iIeEfFxuUoOrk][/\\]?[HM]?") then
		ending = gsub(ending, "^s([^rfF])", "z%1")
	end
	if ends_with(stem, "[iIeEfFxuUoOrk][/\\]?[HM]?s") and starts_with(ending, "[^rfF]") then
		stem = gsub(stem, "s$", "z")
	end
	if ends_with(stem, "z") then
		ending = gsub(ending, "^[tTdDn]*", function(dentals) return gsub(dentals, ".", dental_to_retroflex) end)
	end
	if ends_with(stem, "[zrfF][^cCjJYwWqQRtTdDnSsl]*") then
		ending = gsub(ending,
			"^([^cCjJYwWqQRtTdDnSsl]*)n([aAiIeEfFxuUoOynmv])",
			function(pre, post)
				return pre .. "R" .. post
			end)
	end
	if ends_with(stem, "[zrfF][^cCjJYwWqQRtTdDnSsl]*n") and starts_with(ending, "[aAiIeEfFxuUoOynmv]") then
		stem = gsub(stem, "n$", "R")
	end
	-- for safety
	ending = gsub(ending,
		"([zrfF][^cCjJYwWqQRtTdDnSsl]*)n([aAiIeEfFxuUoOynmv])",
		function(pre, post)
			return pre .. "R" .. post
		end)
	return stem, ending
end

local function combine_accent(stem, ending, has_accent, accent_override, mono, recessive)
	if has_accent then
		if recessive then
			local combined = stem .. ending
			combined = gsub(combined, export.accent, "") -- remove any accent
			combined = gsub(combined, "^([^" .. export.vowel_list .. "]-)(" .. export.vowel .. ")", "%1%2/")
			return combined
		elseif accent_override then
			stem = gsub(stem, export.accent, "")
		elseif mono and match(ending, export.accent) then
			stem = gsub(stem, export.accent, "")
		elseif match(stem, export.accent) and match(ending, export.accent) then
			ending = gsub(ending, export.accent, "")
		end
	end
	return stem .. ending
end

function export.internal_sandhi(input_table)
	local stem, ending = input_table.stem, input_table.ending
	local last, acc, first, combined
	-- explicitly ignored are CV, C + semivowel, or C + nasal
	if ending == "" then
		return absolute_final(stem, input_table.ambig_hint)
	elseif starts_with(ending, export.vowel) then -- ending starts with vowel
		if ends_with(stem, export.vowel_with_accent) then -- stem ends with vowel
			-- strip last vowel and accent off stem
			stem, last, acc = match(stem, "^(.*)(" .. export.vowel .. ")(" .. export.accent .. "?)$")
			-- strip first vowel off ending
			first, ending = match(ending, "^(.)(.*)$")
			if match(last, '[iIuU]') and input_table.mono then
				stem = stem .. insert_glide[last .. acc]
				ending = first .. ending
			elseif lower(last) == lower(first) then -- homorganic
				ending = upper(first) .. acc .. ending
			elseif lower(last) == "a" then -- gunation and vrddhization
				ending = export.up_one_grade[first .. acc] .. ending
			elseif export.semivowel_to_cons[last] then
				stem = stem .. export.semivowel_to_cons[last]
				ending = first .. (acc == "/" and "\\" or "") .. ending
			elseif export.split_diphthong[last] then -- guna and vrddhi splitting
				stem = stem .. export.split_diphthong[last .. acc]
				ending = first .. ending
			end
		end
		-- all consonants unchanged
	elseif ends_with(stem, "[iu][/\\]?[rv]") and input_table.mono then
		stem = gsub(stem, "([iu][/\\]?)([rv])$", function(vow, glide) return export.lengthen[vow] .. glide end)
	elseif ends_with(stem, "[cj]") and starts_with(ending, "n") then
		ending = gsub(ending, "^.", "Y")
	elseif ends_with(stem, "S") and starts_with(ending, "s") then
		stem = gsub(stem, ".$", "k")
	elseif ends_with(stem, "s") and starts_with(ending, "s") and input_table.mono then
		stem = gsub(stem, ".$", "t")
	elseif ends_with(stem, "s") and starts_with(ending, "B") and input_table.mono then
		stem = gsub(stem, ".$", "d")
	elseif ends_with(stem, "j") and starts_with(ending, "[tT]") and input_table.j_to_z then
		stem = gsub(stem, ".$", "z")
	elseif ends_with(stem, "h") and starts_with(ending, "[tTdD]") then
		stem = gsub(stem, "([aiu]?)([/\\]?)h$", function(vow, acc) return (export.lengthen[vow] or "") .. acc end)
		ending = gsub(ending, "[tTdD]", "Q")
	elseif ends_with(stem, "[GJQDBh]") and starts_with(ending, "[tT]") then
		stem = gsub(stem, ".$", deaspirate)
		ending = gsub(ending, "^.", "D")
	elseif ends_with(stem, export.consonant) and starts_with(ending, export.consonant) then
		if input_table.final then
			if ends_with(stem, export.consonant .. export.consonant) then -- at least 2 consonants
				-- take the first of the cluster
				stem = gsub(stem, "(" .. export.consonant .. "+)$",
					function(cluster) return sub(cluster, 1, 1) end)
			end
			if ends_with(stem, "[KgGWqQTdDPbB]") then
				stem = gsub(stem, ".$", to_final)
			elseif ends_with(stem, "[cCjJhSz]") then
				stem = gsub(stem, ".$", input_table.ambig_hint)
			end
		end
		if ends_with(stem, "[kwp]") then
			if starts_with(ending, "[gGjJqQdDbB]") then
				stem = gsub(stem, ".$", {['k'] = 'g', ['w'] = 'q', ['p'] = 'b'})
			elseif starts_with(ending, "h") then
				stem = gsub(stem, ".$", {['k'] = 'g', ['w'] = 'q', ['p'] = 'b'})
				ending = gsub(ending, "^.", gsub(stem, ".$", {['k'] = 'G', ['w'] = 'Q', ['p'] = 'B'}))
			end
		elseif ends_with(stem, "t") then
			if starts_with(ending, "[cCjJwWqQ]") then
				stem = gsub(stem, ".$", gsub(ending, "^.",
					{
						['c'] = 'c', ['C'] = 'c', ['j'] = 'j', ['J'] = 'J',
						['w'] = 'w', ['W'] = 'w', ['q'] = 'q', ['Q'] = 'q',
					}))
			elseif starts_with(ending, "S") then
				stem = gsub(stem, ".$", "c")
				ending = gsub(ending, "^.", "C")
			elseif starts_with(ending, "[gGdDbB]") then
				stem = gsub(stem, ".$", "d")
			elseif starts_with(ending, "[nm]") then
				stem = gsub(stem, ".$", "n")
			elseif starts_with(ending, "h") then
				stem = gsub(stem, ".$", "d")
				ending = gsub(ending, "^.", "D")
			end
		elseif ends_with(stem, "m") then
			if starts_with(ending, "[hSzs]") then
				stem = gsub(stem, ".$", "M")
			elseif starts_with(ending, "[^yrln]") then
				stem = gsub(stem, ".$", "n")
			end
		elseif ends_with(stem, "n") then
			if starts_with(ending, "[hSzs]") then
				stem = gsub(stem, ".$", "M")
			elseif starts_with(ending, "[kKgGcCjJwWqQpPbBl]") then
				stem = gsub(stem, ".$",
					{
						['k'] = 'N', ['K'] = 'N', ['g'] = 'N', ['G'] = 'N',
						['c'] = 'Y', ['C'] = 'Y', ['j'] = 'Y', ['J'] = 'Y',
						['w'] = 'R', ['W'] = 'R', ['q'] = 'R', ['Q'] = 'R',
						['p'] = 'm', ['P'] = 'm', ['b'] = 'm', ['B'] = 'm',
						['l'] = 'M', -- or 'l~'
					})
			end
		elseif ends_with(stem, "[aA][/\\]?[sHr]") and starts_with(ending, "[rgGdDbByvjJqQlLhnm]") then
			stem = gsub(stem, "([aA])([/\\]?)[sHr]$",
				function(vow, acc) return (vow == "a" and "o" or "A") .. acc end)
		elseif ends_with(stem, "[sHr]") then
			if starts_with(ending, "[kKpPzsS]") then
				stem = gsub(stem, ".$", "H")
			elseif starts_with(ending, "[cCwWtT]") then
				stem = gsub(stem, ".$",
					{
						['c'] = 'S', ['C'] = 'S',
						['w'] = 'z', ['W'] = 'Z',
						['t'] = 's', ['T'] = 's'
					})
			elseif starts_with(ending, "r") then
				stem = gsub(stem, "(" .. export.vowel .. "[/\\])[sHr]$", function(vow) return export.lengthen[vow] or vow end)
			elseif starts_with(ending, "[gGjJqQdDbByvlLhnm]") then
				stem = gsub(stem, ".$", "r")
			end
		end
	end
	stem, ending = export.retroflexion(stem, ending)
	combined = combine_accent(stem, ending, input_table.has_accent, input_table.accent_override, input_table.mono, input_table.recessive)
	return absolute_final(combined, input_table.ambig_hint)
end

return export