Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Unverified Commit 2f96e534 authored by Alexander Epaneshnikov's avatar Alexander Epaneshnikov Committed by GitHub
Browse files

Add Shavian alphabet support (English) (#1924)

This PR adds support for **Shavian**, a phonemic alphabet for English:
https://en.wikipedia.org/wiki/Shavian_alphabet (Unicode
[U+10450–U+1047F](https://www.unicode.org/charts/PDF/U10450.pdf); ISO
15924 - Shaw)
It correctly pronounces most English words, except sometimes it
misplaces the word stress. All checks are passing.
There are many Shavian communities online; I wanted to join in without
excluding people using screen readers.
If needed, this website has more resources: https://shavian.info/
![Screenshot of GNOME terminal, showing the difference between phonemes
in "This is an example sentence in the Shavian alphabet", and the same
sentence but for
Latin](https://github.com/espeak-ng/espeak-ng/assets/113068485/75d43247-1b70-4c16-8e55-6a9ffab7784d)

Continuation of #1818 due to an issue in a rebase
parents a7295c1e f69dd80b
Loading
Loading
Loading
Loading
+1 −0
Original line number Diff line number Diff line
@@ -39,6 +39,7 @@ updated languages:
*  uz (Uzbek) -- Andiv06

new languages:
*  en-Shaw (English, Shavian script) -- Luna Rose
*  ti (Tigrinya) -- Biniam Gebremichael
*  mto (Totontepec Mixe) -- Bill Dengler, Elizabeth Resendiz
*  fo (Faroese) -- Andras Eliassen, iSolveIT ApS and Setur.fo/Ravnur
+79 −0
Original line number Diff line number Diff line
@@ -3372,6 +3372,7 @@ shampoo $2
shaoni	SaI'oUni
shareable	Se@@b@L
sharpie		$alt2
shavian		$alt6
shazam		$alt3
shea S'eI $only
shebang		SI#baN
@@ -5708,3 +5709,81 @@ sterile $alt2
tensile		$alt2
virile		$alt2
?3 volatile	v0la#t@L



// Shavian (phonemic alphabet for English)
// Community Shavian letter names
𐑐	p'In
𐑚	b'El
_𐑑	t'an
𐑛	d'Vn
𐑒	k'i:
𐑜	g'Il
_𐑓	f'eI
_𐑝	v'aI
𐑔	T'O@n
_𐑞	D'aU
𐑕	s'i:
𐑟	z'u:
𐑖	S'aI
𐑠	Z'oU
𐑗	tS'A:
𐑡	dZ'OI
𐑘	j'En
_𐑢	w'eI
𐑙	s'0N
𐑣	h'u:
𐑤	l'am
𐑮	r'oU
𐑥	m'i:
_𐑯	n'aU
𐑦	'In
𐑰	'i:v
𐑧	'EdZ
𐑱	'eIm
𐑨	'aS
_𐑲	'aIz
_𐑩	@g'oU
𐑳	'Vp
𐑪	'0n
_𐑴	'oUT
𐑫	'Umf
_𐑵	'u:z
𐑬	'aUns
𐑶	'OIl
𐑭	'A:mz
𐑷	'O:t
_𐑸	'a:ri;@
_𐑹	'O:r@
𐑻	'3:rlI
𐑼	@r'eI
_𐑽	'i@3ri
𐑾	'i:;@n
_𐑿	j'u:l

// Shavian short forms and single letter words
𐑩	$nounf
𐑸	%A@		$pastf $only
𐑲	aI		$u+ $verbf $only
𐑯	and		$u $pause $only
𐑞	D@2		$only $nounf
𐑓	fO@		$u
𐑽	i@3		$noun
𐑿	ju:		$u $verbf
𐑴	'oU
𐑵	'u:		// "ooh"
𐑹	O@		$u $pause $only
𐑝	02v		$u $only
𐑑	t@5		$verbf $alt7    // @ change to U before vowel
𐑢	wID		$u   // used sometimes

// Shavian dictionary
𐑥𐑲	maI 	$u $nounf $strend2
𐑦𐑯	%In 	$strend $only $nounf
𐑦𐑟	%Iz 	$pastf $only
𐑣𐑨𐑛	%had	$pastf $strend2 $only
𐑖𐑰	Si: 	$u $only $verbsf
𐑕𐑳𐑥	,sVm	$nounf $unstressend $only
𐑚𐑳𐑑	,bVt	$pause $only
𐑢𐑦𐑗	,wItS	$pause $strend2 $only $verbf
+53 −1
Original line number Diff line number Diff line
@@ -6986,9 +6986,61 @@ contin) u ju:
        'd (_S2    d
        'll (_S3   @L

// Shavian characters, phonemic alphabet for English
.group 0xF0
// 0xF0 is the first byte of the utf-8 code for these characters

    𐑐     p
    𐑚     b
    𐑑     t
    𐑛     d
    𐑒     k
    𐑜     g
    𐑓     f
    𐑝     v
    𐑔     T
    𐑞     D
    𐑕     s
    𐑟     z
    𐑖     S
    𐑠     Z
    𐑗     tS
    𐑡     dZ
    𐑘     j
    𐑢     w
    𐑙     N
    𐑣     h
    𐑤     l
    𐑮     r
    𐑥     m
    𐑯     n
    𐑦     I
    𐑦 (_  %i
    𐑰     i:
    𐑧     E
    𐑱     eI
    𐑨     a
    𐑲     aI
    𐑩     @
    𐑳     V
    𐑪     0
    𐑴     oU
    𐑫     U
    𐑵     u:
    𐑬     aU
    𐑶     OI
    𐑭     A:
    𐑷     O:
    𐑸     'A@
    𐑹     'O@
    𐑺     'e@
    𐑻     '3:
    𐑼     3
    𐑽     'i@3
    𐑾     i:@
    𐑿     ju:

.group 0xce   // Greek letters
// 0xce is the first byte of the utf-8 code for these characters

        α          'alf@_
        β          b'i:t@_
+5 −0
Original line number Diff line number Diff line
name English (Shavian alphabet)
language en-shaw
maintainer Luna Rose <luna@anarchy.center>
status testing
phonemes en
+2 −0
Original line number Diff line number Diff line
@@ -59,6 +59,7 @@
#define OFFSET_GEORGIAN 0x10a0
#define OFFSET_KOREAN   0x1100
#define OFFSET_ETHIOPIC 0x1200
#define OFFSET_SHAVIAN  0x10450

// character ranges must be listed in ascending unicode order
static const ALPHABET alphabets[] = {
@@ -89,6 +90,7 @@ static const ALPHABET alphabets[] = {
	{ "_ja",    0x3040,          0x3040, 0x30ff, 0, AL_NOT_CODE },
	{ "_zh",    0x3100,          0x3100, 0x9fff, 0, AL_NOT_CODE },
	{ "_ko",    0xa700,          0xa700, 0xd7ff, L('k', 'o'), AL_NOT_CODE | AL_WORDS },
	{ "_shaw",  OFFSET_SHAVIAN,  0x10450, 0x1047F, L('e', 'n'), 0 },
	{ NULL, 0, 0, 0, 0, 0 }
};

Loading