diff options
Diffstat (limited to 'src/nvim/arabic.c')
-rw-r--r-- | src/nvim/arabic.c | 1532 |
1 files changed, 1532 insertions, 0 deletions
diff --git a/src/nvim/arabic.c b/src/nvim/arabic.c new file mode 100644 index 0000000000..54f88f8757 --- /dev/null +++ b/src/nvim/arabic.c @@ -0,0 +1,1532 @@ +/// @file arabic.c +/// +/// Functions for Arabic language. +/// +/// Arabic characters are categorized into following types: +/// +/// Isolated - iso-8859-6 form char denoted with a_* +/// Initial - unicode form-B start char denoted with a_i_* +/// Medial - unicode form-B middle char denoted with a_m_* +/// Final - unicode form-B final char denoted with a_f_* +/// Stand-Alone - unicode form-B isolated char denoted with a_s_* (NOT USED) +/// + +#include "vim.h" +#include "arabic.h" + +// Arabic ISO-10646-1 character set definition + +// Arabic ISO-8859-6 (subset of 10646; 0600 - 06FF) +#define a_COMMA 0x060C +#define a_SEMICOLON 0x061B +#define a_QUESTION 0x061F +#define a_HAMZA 0x0621 +#define a_ALEF_MADDA 0x0622 +#define a_ALEF_HAMZA_ABOVE 0x0623 +#define a_WAW_HAMZA 0x0624 +#define a_ALEF_HAMZA_BELOW 0x0625 +#define a_YEH_HAMZA 0x0626 +#define a_ALEF 0x0627 +#define a_BEH 0x0628 +#define a_TEH_MARBUTA 0x0629 +#define a_TEH 0x062a +#define a_THEH 0x062b +#define a_JEEM 0x062c +#define a_HAH 0x062d +#define a_KHAH 0x062e +#define a_DAL 0x062f +#define a_THAL 0x0630 +#define a_REH 0x0631 +#define a_ZAIN 0x0632 +#define a_SEEN 0x0633 +#define a_SHEEN 0x0634 +#define a_SAD 0x0635 +#define a_DAD 0x0636 +#define a_TAH 0x0637 +#define a_ZAH 0x0638 +#define a_AIN 0x0639 +#define a_GHAIN 0x063a +#define a_TATWEEL 0x0640 +#define a_FEH 0x0641 +#define a_QAF 0x0642 +#define a_KAF 0x0643 +#define a_LAM 0x0644 +#define a_MEEM 0x0645 +#define a_NOON 0x0646 +#define a_HEH 0x0647 +#define a_WAW 0x0648 +#define a_ALEF_MAKSURA 0x0649 +#define a_YEH 0x064a + +#define a_FATHATAN 0x064b +#define a_DAMMATAN 0x064c +#define a_KASRATAN 0x064d +#define a_FATHA 0x064e +#define a_DAMMA 0x064f +#define a_KASRA 0x0650 +#define a_SHADDA 0x0651 +#define a_SUKUN 0x0652 + +#define a_MADDA_ABOVE 0x0653 +#define a_HAMZA_ABOVE 0x0654 +#define a_HAMZA_BELOW 0x0655 + +#define a_ZERO 0x0660 +#define a_ONE 0x0661 +#define a_TWO 0x0662 +#define a_THREE 0x0663 +#define a_FOUR 0x0664 +#define a_FIVE 0x0665 +#define a_SIX 0x0666 +#define a_SEVEN 0x0667 +#define a_EIGHT 0x0668 +#define a_NINE 0x0669 +#define a_PERCENT 0x066a +#define a_DECIMAL 0x066b +#define a_THOUSANDS 0x066c +#define a_STAR 0x066d +#define a_MINI_ALEF 0x0670 +// Rest of 8859-6 does not relate to Arabic + +// Arabic Presentation Form-B (subset of 10646; FE70 - FEFF) +// +// s -> isolated +// i -> initial +// m -> medial +// f -> final +// +#define a_s_FATHATAN 0xfe70 +#define a_m_TATWEEL_FATHATAN 0xfe71 +#define a_s_DAMMATAN 0xfe72 + +#define a_s_KASRATAN 0xfe74 + +#define a_s_FATHA 0xfe76 +#define a_m_FATHA 0xfe77 +#define a_s_DAMMA 0xfe78 +#define a_m_DAMMA 0xfe79 +#define a_s_KASRA 0xfe7a +#define a_m_KASRA 0xfe7b +#define a_s_SHADDA 0xfe7c +#define a_m_SHADDA 0xfe7d +#define a_s_SUKUN 0xfe7e +#define a_m_SUKUN 0xfe7f + +#define a_s_HAMZA 0xfe80 +#define a_s_ALEF_MADDA 0xfe81 +#define a_f_ALEF_MADDA 0xfe82 +#define a_s_ALEF_HAMZA_ABOVE 0xfe83 +#define a_f_ALEF_HAMZA_ABOVE 0xfe84 +#define a_s_WAW_HAMZA 0xfe85 +#define a_f_WAW_HAMZA 0xfe86 +#define a_s_ALEF_HAMZA_BELOW 0xfe87 +#define a_f_ALEF_HAMZA_BELOW 0xfe88 +#define a_s_YEH_HAMZA 0xfe89 +#define a_f_YEH_HAMZA 0xfe8a +#define a_i_YEH_HAMZA 0xfe8b +#define a_m_YEH_HAMZA 0xfe8c +#define a_s_ALEF 0xfe8d +#define a_f_ALEF 0xfe8e +#define a_s_BEH 0xfe8f +#define a_f_BEH 0xfe90 +#define a_i_BEH 0xfe91 +#define a_m_BEH 0xfe92 +#define a_s_TEH_MARBUTA 0xfe93 +#define a_f_TEH_MARBUTA 0xfe94 +#define a_s_TEH 0xfe95 +#define a_f_TEH 0xfe96 +#define a_i_TEH 0xfe97 +#define a_m_TEH 0xfe98 +#define a_s_THEH 0xfe99 +#define a_f_THEH 0xfe9a +#define a_i_THEH 0xfe9b +#define a_m_THEH 0xfe9c +#define a_s_JEEM 0xfe9d +#define a_f_JEEM 0xfe9e +#define a_i_JEEM 0xfe9f +#define a_m_JEEM 0xfea0 +#define a_s_HAH 0xfea1 +#define a_f_HAH 0xfea2 +#define a_i_HAH 0xfea3 +#define a_m_HAH 0xfea4 +#define a_s_KHAH 0xfea5 +#define a_f_KHAH 0xfea6 +#define a_i_KHAH 0xfea7 +#define a_m_KHAH 0xfea8 +#define a_s_DAL 0xfea9 +#define a_f_DAL 0xfeaa +#define a_s_THAL 0xfeab +#define a_f_THAL 0xfeac +#define a_s_REH 0xfead +#define a_f_REH 0xfeae +#define a_s_ZAIN 0xfeaf +#define a_f_ZAIN 0xfeb0 +#define a_s_SEEN 0xfeb1 +#define a_f_SEEN 0xfeb2 +#define a_i_SEEN 0xfeb3 +#define a_m_SEEN 0xfeb4 +#define a_s_SHEEN 0xfeb5 +#define a_f_SHEEN 0xfeb6 +#define a_i_SHEEN 0xfeb7 +#define a_m_SHEEN 0xfeb8 +#define a_s_SAD 0xfeb9 +#define a_f_SAD 0xfeba +#define a_i_SAD 0xfebb +#define a_m_SAD 0xfebc +#define a_s_DAD 0xfebd +#define a_f_DAD 0xfebe +#define a_i_DAD 0xfebf +#define a_m_DAD 0xfec0 +#define a_s_TAH 0xfec1 +#define a_f_TAH 0xfec2 +#define a_i_TAH 0xfec3 +#define a_m_TAH 0xfec4 +#define a_s_ZAH 0xfec5 +#define a_f_ZAH 0xfec6 +#define a_i_ZAH 0xfec7 +#define a_m_ZAH 0xfec8 +#define a_s_AIN 0xfec9 +#define a_f_AIN 0xfeca +#define a_i_AIN 0xfecb +#define a_m_AIN 0xfecc +#define a_s_GHAIN 0xfecd +#define a_f_GHAIN 0xfece +#define a_i_GHAIN 0xfecf +#define a_m_GHAIN 0xfed0 +#define a_s_FEH 0xfed1 +#define a_f_FEH 0xfed2 +#define a_i_FEH 0xfed3 +#define a_m_FEH 0xfed4 +#define a_s_QAF 0xfed5 +#define a_f_QAF 0xfed6 +#define a_i_QAF 0xfed7 +#define a_m_QAF 0xfed8 +#define a_s_KAF 0xfed9 +#define a_f_KAF 0xfeda +#define a_i_KAF 0xfedb +#define a_m_KAF 0xfedc +#define a_s_LAM 0xfedd +#define a_f_LAM 0xfede +#define a_i_LAM 0xfedf +#define a_m_LAM 0xfee0 +#define a_s_MEEM 0xfee1 +#define a_f_MEEM 0xfee2 +#define a_i_MEEM 0xfee3 +#define a_m_MEEM 0xfee4 +#define a_s_NOON 0xfee5 +#define a_f_NOON 0xfee6 +#define a_i_NOON 0xfee7 +#define a_m_NOON 0xfee8 +#define a_s_HEH 0xfee9 +#define a_f_HEH 0xfeea +#define a_i_HEH 0xfeeb +#define a_m_HEH 0xfeec +#define a_s_WAW 0xfeed +#define a_f_WAW 0xfeee +#define a_s_ALEF_MAKSURA 0xfeef +#define a_f_ALEF_MAKSURA 0xfef0 +#define a_s_YEH 0xfef1 +#define a_f_YEH 0xfef2 +#define a_i_YEH 0xfef3 +#define a_m_YEH 0xfef4 +#define a_s_LAM_ALEF_MADDA_ABOVE 0xfef5 +#define a_f_LAM_ALEF_MADDA_ABOVE 0xfef6 +#define a_s_LAM_ALEF_HAMZA_ABOVE 0xfef7 +#define a_f_LAM_ALEF_HAMZA_ABOVE 0xfef8 +#define a_s_LAM_ALEF_HAMZA_BELOW 0xfef9 +#define a_f_LAM_ALEF_HAMZA_BELOW 0xfefa +#define a_s_LAM_ALEF 0xfefb +#define a_f_LAM_ALEF 0xfefc + +#define a_BYTE_ORDER_MARK 0xfeff + +static int A_is_a(int cur_c); +static int A_is_s(int cur_c); +static int A_is_f(int cur_c); +static int chg_c_a2s(int cur_c); +static int chg_c_a2i(int cur_c); +static int chg_c_a2m(int cur_c); +static int chg_c_a2f(int cur_c); +static int chg_c_i2m(int cur_c); +static int chg_c_f2m(int cur_c); +static int chg_c_laa2i(int hid_c); +static int chg_c_laa2f(int hid_c); +static int half_shape(int c); +static int A_firstc_laa(int c1, int c); +static int A_is_harakat(int c); +static int A_is_iso(int c); +static int A_is_formb(int c); +static int A_is_ok(int c); +static int A_is_valid(int c); +static int A_is_special(int c); + +// Returns True if c is an ISO-8859-6 shaped ARABIC letter (user entered). +static int A_is_a(int cur_c) +{ + switch (cur_c) { + case a_HAMZA: + case a_ALEF_MADDA: + case a_ALEF_HAMZA_ABOVE: + case a_WAW_HAMZA: + case a_ALEF_HAMZA_BELOW: + case a_YEH_HAMZA: + case a_ALEF: + case a_BEH: + case a_TEH_MARBUTA: + case a_TEH: + case a_THEH: + case a_JEEM: + case a_HAH: + case a_KHAH: + case a_DAL: + case a_THAL: + case a_REH: + case a_ZAIN: + case a_SEEN: + case a_SHEEN: + case a_SAD: + case a_DAD: + case a_TAH: + case a_ZAH: + case a_AIN: + case a_GHAIN: + case a_TATWEEL: + case a_FEH: + case a_QAF: + case a_KAF: + case a_LAM: + case a_MEEM: + case a_NOON: + case a_HEH: + case a_WAW: + case a_ALEF_MAKSURA: + case a_YEH: + return TRUE; + } + + return FALSE; +} + +// Returns True if c is an Isolated Form-B ARABIC letter +static int A_is_s(int cur_c) +{ + switch (cur_c) { + case a_s_HAMZA: + case a_s_ALEF_MADDA: + case a_s_ALEF_HAMZA_ABOVE: + case a_s_WAW_HAMZA: + case a_s_ALEF_HAMZA_BELOW: + case a_s_YEH_HAMZA: + case a_s_ALEF: + case a_s_BEH: + case a_s_TEH_MARBUTA: + case a_s_TEH: + case a_s_THEH: + case a_s_JEEM: + case a_s_HAH: + case a_s_KHAH: + case a_s_DAL: + case a_s_THAL: + case a_s_REH: + case a_s_ZAIN: + case a_s_SEEN: + case a_s_SHEEN: + case a_s_SAD: + case a_s_DAD: + case a_s_TAH: + case a_s_ZAH: + case a_s_AIN: + case a_s_GHAIN: + case a_s_FEH: + case a_s_QAF: + case a_s_KAF: + case a_s_LAM: + case a_s_MEEM: + case a_s_NOON: + case a_s_HEH: + case a_s_WAW: + case a_s_ALEF_MAKSURA: + case a_s_YEH: + return TRUE; + } + + return FALSE; +} + +// Returns True if c is a Final shape of an ARABIC letter +static int A_is_f(int cur_c) +{ + switch (cur_c) { + case a_f_ALEF_MADDA: + case a_f_ALEF_HAMZA_ABOVE: + case a_f_WAW_HAMZA: + case a_f_ALEF_HAMZA_BELOW: + case a_f_YEH_HAMZA: + case a_f_ALEF: + case a_f_BEH: + case a_f_TEH_MARBUTA: + case a_f_TEH: + case a_f_THEH: + case a_f_JEEM: + case a_f_HAH: + case a_f_KHAH: + case a_f_DAL: + case a_f_THAL: + case a_f_REH: + case a_f_ZAIN: + case a_f_SEEN: + case a_f_SHEEN: + case a_f_SAD: + case a_f_DAD: + case a_f_TAH: + case a_f_ZAH: + case a_f_AIN: + case a_f_GHAIN: + case a_f_FEH: + case a_f_QAF: + case a_f_KAF: + case a_f_LAM: + case a_f_MEEM: + case a_f_NOON: + case a_f_HEH: + case a_f_WAW: + case a_f_ALEF_MAKSURA: + case a_f_YEH: + case a_f_LAM_ALEF_MADDA_ABOVE: + case a_f_LAM_ALEF_HAMZA_ABOVE: + case a_f_LAM_ALEF_HAMZA_BELOW: + case a_f_LAM_ALEF: + return TRUE; + } + return FALSE; +} + +// Change shape - from ISO-8859-6/Isolated to Form-B Isolated +static int chg_c_a2s(int cur_c) +{ + int tempc; + + switch (cur_c) { + case a_HAMZA: + tempc = a_s_HAMZA; + break; + + case a_ALEF_MADDA: + tempc = a_s_ALEF_MADDA; + break; + + case a_ALEF_HAMZA_ABOVE: + tempc = a_s_ALEF_HAMZA_ABOVE; + break; + + case a_WAW_HAMZA: + tempc = a_s_WAW_HAMZA; + break; + + case a_ALEF_HAMZA_BELOW: + tempc = a_s_ALEF_HAMZA_BELOW; + break; + + case a_YEH_HAMZA: + tempc = a_s_YEH_HAMZA; + break; + + case a_ALEF: + tempc = a_s_ALEF; + break; + + case a_TEH_MARBUTA: + tempc = a_s_TEH_MARBUTA; + break; + + case a_DAL: + tempc = a_s_DAL; + break; + + case a_THAL: + tempc = a_s_THAL; + break; + + case a_REH: + tempc = a_s_REH; + break; + + case a_ZAIN: + tempc = a_s_ZAIN; + break; + + case a_TATWEEL: // exceptions + tempc = cur_c; + break; + + case a_WAW: + tempc = a_s_WAW; + break; + + case a_ALEF_MAKSURA: + tempc = a_s_ALEF_MAKSURA; + break; + + case a_BEH: + tempc = a_s_BEH; + break; + + case a_TEH: + tempc = a_s_TEH; + break; + + case a_THEH: + tempc = a_s_THEH; + break; + + case a_JEEM: + tempc = a_s_JEEM; + break; + + case a_HAH: + tempc = a_s_HAH; + break; + + case a_KHAH: + tempc = a_s_KHAH; + break; + + case a_SEEN: + tempc = a_s_SEEN; + break; + + case a_SHEEN: + tempc = a_s_SHEEN; + break; + + case a_SAD: + tempc = a_s_SAD; + break; + + case a_DAD: + tempc = a_s_DAD; + break; + + case a_TAH: + tempc = a_s_TAH; + break; + + case a_ZAH: + tempc = a_s_ZAH; + break; + + case a_AIN: + tempc = a_s_AIN; + break; + + case a_GHAIN: + tempc = a_s_GHAIN; + break; + + case a_FEH: + tempc = a_s_FEH; + break; + + case a_QAF: + tempc = a_s_QAF; + break; + + case a_KAF: + tempc = a_s_KAF; + break; + + case a_LAM: + tempc = a_s_LAM; + break; + + case a_MEEM: + tempc = a_s_MEEM; + break; + + case a_NOON: + tempc = a_s_NOON; + break; + + case a_HEH: + tempc = a_s_HEH; + break; + + case a_YEH: + tempc = a_s_YEH; + break; + + default: + tempc = 0; + } + + return tempc; +} + +// Change shape - from ISO-8859-6/Isolated to Initial +static int chg_c_a2i(int cur_c) +{ + int tempc; + + switch (cur_c) { + case a_YEH_HAMZA: + tempc = a_i_YEH_HAMZA; + break; + + case a_HAMZA: // exceptions + tempc = a_s_HAMZA; + break; + + case a_ALEF_MADDA: // exceptions + tempc = a_s_ALEF_MADDA; + break; + + case a_ALEF_HAMZA_ABOVE: // exceptions + tempc = a_s_ALEF_HAMZA_ABOVE; + break; + + case a_WAW_HAMZA: // exceptions + tempc = a_s_WAW_HAMZA; + break; + + case a_ALEF_HAMZA_BELOW: // exceptions + tempc = a_s_ALEF_HAMZA_BELOW; + break; + + case a_ALEF: // exceptions + tempc = a_s_ALEF; + break; + + case a_TEH_MARBUTA: // exceptions + tempc = a_s_TEH_MARBUTA; + break; + + case a_DAL: // exceptions + tempc = a_s_DAL; + break; + + case a_THAL: // exceptions + tempc = a_s_THAL; + break; + + case a_REH: // exceptions + tempc = a_s_REH; + break; + + case a_ZAIN: // exceptions + tempc = a_s_ZAIN; + break; + + case a_TATWEEL: // exceptions + tempc = cur_c; + break; + + case a_WAW: // exceptions + tempc = a_s_WAW; + break; + + case a_ALEF_MAKSURA: // exceptions + tempc = a_s_ALEF_MAKSURA; + break; + + case a_BEH: + tempc = a_i_BEH; + break; + + case a_TEH: + tempc = a_i_TEH; + break; + + case a_THEH: + tempc = a_i_THEH; + break; + + case a_JEEM: + tempc = a_i_JEEM; + break; + + case a_HAH: + tempc = a_i_HAH; + break; + + case a_KHAH: + tempc = a_i_KHAH; + break; + + case a_SEEN: + tempc = a_i_SEEN; + break; + + case a_SHEEN: + tempc = a_i_SHEEN; + break; + + case a_SAD: + tempc = a_i_SAD; + break; + + case a_DAD: + tempc = a_i_DAD; + break; + + case a_TAH: + tempc = a_i_TAH; + break; + + case a_ZAH: + tempc = a_i_ZAH; + break; + + case a_AIN: + tempc = a_i_AIN; + break; + + case a_GHAIN: + tempc = a_i_GHAIN; + break; + + case a_FEH: + tempc = a_i_FEH; + break; + + case a_QAF: + tempc = a_i_QAF; + break; + + case a_KAF: + tempc = a_i_KAF; + break; + + case a_LAM: + tempc = a_i_LAM; + break; + + case a_MEEM: + tempc = a_i_MEEM; + break; + + case a_NOON: + tempc = a_i_NOON; + break; + + case a_HEH: + tempc = a_i_HEH; + break; + + case a_YEH: + tempc = a_i_YEH; + break; + + default: + tempc = 0; + } + + return tempc; +} + +// Change shape - from ISO-8859-6/Isolated to Medial +static int chg_c_a2m(int cur_c) +{ + int tempc; + + switch (cur_c) { + case a_HAMZA: // exception + tempc = a_s_HAMZA; + break; + + case a_ALEF_MADDA: // exception + tempc = a_f_ALEF_MADDA; + break; + + case a_ALEF_HAMZA_ABOVE: // exception + tempc = a_f_ALEF_HAMZA_ABOVE; + break; + + case a_WAW_HAMZA: // exception + tempc = a_f_WAW_HAMZA; + break; + + case a_ALEF_HAMZA_BELOW: // exception + tempc = a_f_ALEF_HAMZA_BELOW; + break; + + case a_YEH_HAMZA: + tempc = a_m_YEH_HAMZA; + break; + + case a_ALEF: // exception + tempc = a_f_ALEF; + break; + + case a_BEH: + tempc = a_m_BEH; + break; + + case a_TEH_MARBUTA: // exception + tempc = a_f_TEH_MARBUTA; + break; + + case a_TEH: + tempc = a_m_TEH; + break; + + case a_THEH: + tempc = a_m_THEH; + break; + + case a_JEEM: + tempc = a_m_JEEM; + break; + + case a_HAH: + tempc = a_m_HAH; + break; + + case a_KHAH: + tempc = a_m_KHAH; + break; + + case a_DAL: // exception + tempc = a_f_DAL; + break; + + case a_THAL: // exception + tempc = a_f_THAL; + break; + + case a_REH: // exception + tempc = a_f_REH; + break; + + case a_ZAIN: // exception + tempc = a_f_ZAIN; + break; + + case a_SEEN: + tempc = a_m_SEEN; + break; + + case a_SHEEN: + tempc = a_m_SHEEN; + break; + + case a_SAD: + tempc = a_m_SAD; + break; + + case a_DAD: + tempc = a_m_DAD; + break; + + case a_TAH: + tempc = a_m_TAH; + break; + + case a_ZAH: + tempc = a_m_ZAH; + break; + + case a_AIN: + tempc = a_m_AIN; + break; + + case a_GHAIN: + tempc = a_m_GHAIN; + break; + + case a_TATWEEL: // exception + tempc = cur_c; + break; + + case a_FEH: + tempc = a_m_FEH; + break; + + case a_QAF: + tempc = a_m_QAF; + break; + + case a_KAF: + tempc = a_m_KAF; + break; + + case a_LAM: + tempc = a_m_LAM; + break; + + case a_MEEM: + tempc = a_m_MEEM; + break; + + case a_NOON: + tempc = a_m_NOON; + break; + + case a_HEH: + tempc = a_m_HEH; + break; + + case a_WAW: // exception + tempc = a_f_WAW; + break; + + case a_ALEF_MAKSURA: // exception + tempc = a_f_ALEF_MAKSURA; + break; + + case a_YEH: + tempc = a_m_YEH; + break; + + default: + tempc = 0; + } + + return tempc; +} + +// Change shape - from ISO-8859-6/Isolated to final +static int chg_c_a2f(int cur_c) +{ + int tempc; + + // NOTE: these encodings need to be accounted for + // + // a_f_ALEF_MADDA; + // a_f_ALEF_HAMZA_ABOVE; + // a_f_ALEF_HAMZA_BELOW; + // a_f_LAM_ALEF_MADDA_ABOVE; + // a_f_LAM_ALEF_HAMZA_ABOVE; + // a_f_LAM_ALEF_HAMZA_BELOW; + + switch (cur_c) { + case a_HAMZA: // exception + tempc = a_s_HAMZA; + break; + + case a_ALEF_MADDA: + tempc = a_f_ALEF_MADDA; + break; + + case a_ALEF_HAMZA_ABOVE: + tempc = a_f_ALEF_HAMZA_ABOVE; + break; + + case a_WAW_HAMZA: + tempc = a_f_WAW_HAMZA; + break; + + case a_ALEF_HAMZA_BELOW: + tempc = a_f_ALEF_HAMZA_BELOW; + break; + + case a_YEH_HAMZA: + tempc = a_f_YEH_HAMZA; + break; + + case a_ALEF: + tempc = a_f_ALEF; + break; + + case a_BEH: + tempc = a_f_BEH; + break; + + case a_TEH_MARBUTA: + tempc = a_f_TEH_MARBUTA; + break; + + case a_TEH: + tempc = a_f_TEH; + break; + + case a_THEH: + tempc = a_f_THEH; + break; + + case a_JEEM: + tempc = a_f_JEEM; + break; + + case a_HAH: + tempc = a_f_HAH; + break; + + case a_KHAH: + tempc = a_f_KHAH; + break; + + case a_DAL: + tempc = a_f_DAL; + break; + + case a_THAL: + tempc = a_f_THAL; + break; + + case a_REH: + tempc = a_f_REH; + break; + + case a_ZAIN: + tempc = a_f_ZAIN; + break; + + case a_SEEN: + tempc = a_f_SEEN; + break; + + case a_SHEEN: + tempc = a_f_SHEEN; + break; + + case a_SAD: + tempc = a_f_SAD; + break; + + case a_DAD: + tempc = a_f_DAD; + break; + + case a_TAH: + tempc = a_f_TAH; + break; + + case a_ZAH: + tempc = a_f_ZAH; + break; + + case a_AIN: + tempc = a_f_AIN; + break; + + case a_GHAIN: + tempc = a_f_GHAIN; + break; + + case a_TATWEEL: // exception + tempc = cur_c; + break; + + case a_FEH: + tempc = a_f_FEH; + break; + + case a_QAF: + tempc = a_f_QAF; + break; + + case a_KAF: + tempc = a_f_KAF; + break; + + case a_LAM: + tempc = a_f_LAM; + break; + + case a_MEEM: + tempc = a_f_MEEM; + break; + + case a_NOON: + tempc = a_f_NOON; + break; + + case a_HEH: + tempc = a_f_HEH; + break; + + case a_WAW: + tempc = a_f_WAW; + break; + + case a_ALEF_MAKSURA: + tempc = a_f_ALEF_MAKSURA; + break; + + case a_YEH: + tempc = a_f_YEH; + break; + + default: + tempc = 0; + } + + return tempc; +} + +// Change shape - from Initial to Medial +static int chg_c_i2m(int cur_c) +{ + int tempc; + + switch (cur_c) { + case a_i_YEH_HAMZA: + tempc = a_m_YEH_HAMZA; + break; + + case a_i_BEH: + tempc = a_m_BEH; + break; + + case a_i_TEH: + tempc = a_m_TEH; + break; + + case a_i_THEH: + tempc = a_m_THEH; + break; + + case a_i_JEEM: + tempc = a_m_JEEM; + break; + + case a_i_HAH: + tempc = a_m_HAH; + break; + + case a_i_KHAH: + tempc = a_m_KHAH; + break; + + case a_i_SEEN: + tempc = a_m_SEEN; + break; + + case a_i_SHEEN: + tempc = a_m_SHEEN; + break; + + case a_i_SAD: + tempc = a_m_SAD; + break; + + case a_i_DAD: + tempc = a_m_DAD; + break; + + case a_i_TAH: + tempc = a_m_TAH; + break; + + case a_i_ZAH: + tempc = a_m_ZAH; + break; + + case a_i_AIN: + tempc = a_m_AIN; + break; + + case a_i_GHAIN: + tempc = a_m_GHAIN; + break; + + case a_i_FEH: + tempc = a_m_FEH; + break; + + case a_i_QAF: + tempc = a_m_QAF; + break; + + case a_i_KAF: + tempc = a_m_KAF; + break; + + case a_i_LAM: + tempc = a_m_LAM; + break; + + case a_i_MEEM: + tempc = a_m_MEEM; + break; + + case a_i_NOON: + tempc = a_m_NOON; + break; + + case a_i_HEH: + tempc = a_m_HEH; + break; + + case a_i_YEH: + tempc = a_m_YEH; + break; + + default: + tempc = 0; + } + + return tempc; +} + +// Change shape - from Final to Medial +static int chg_c_f2m(int cur_c) +{ + int tempc; + + switch (cur_c) { + // NOTE: these encodings are multi-positional, no ? + // case a_f_ALEF_MADDA: + // case a_f_ALEF_HAMZA_ABOVE: + // case a_f_ALEF_HAMZA_BELOW: + case a_f_YEH_HAMZA: + tempc = a_m_YEH_HAMZA; + break; + + case a_f_WAW_HAMZA: // exceptions + case a_f_ALEF: + case a_f_TEH_MARBUTA: + case a_f_DAL: + case a_f_THAL: + case a_f_REH: + case a_f_ZAIN: + case a_f_WAW: + case a_f_ALEF_MAKSURA: + tempc = cur_c; + break; + + case a_f_BEH: + tempc = a_m_BEH; + break; + + case a_f_TEH: + tempc = a_m_TEH; + break; + + case a_f_THEH: + tempc = a_m_THEH; + break; + + case a_f_JEEM: + tempc = a_m_JEEM; + break; + + case a_f_HAH: + tempc = a_m_HAH; + break; + + case a_f_KHAH: + tempc = a_m_KHAH; + break; + + case a_f_SEEN: + tempc = a_m_SEEN; + break; + + case a_f_SHEEN: + tempc = a_m_SHEEN; + break; + + case a_f_SAD: + tempc = a_m_SAD; + break; + + case a_f_DAD: + tempc = a_m_DAD; + break; + + case a_f_TAH: + tempc = a_m_TAH; + break; + + case a_f_ZAH: + tempc = a_m_ZAH; + break; + + case a_f_AIN: + tempc = a_m_AIN; + break; + + case a_f_GHAIN: + tempc = a_m_GHAIN; + break; + + case a_f_FEH: + tempc = a_m_FEH; + break; + + case a_f_QAF: + tempc = a_m_QAF; + break; + + case a_f_KAF: + tempc = a_m_KAF; + break; + + case a_f_LAM: + tempc = a_m_LAM; + break; + + case a_f_MEEM: + tempc = a_m_MEEM; + break; + + case a_f_NOON: + tempc = a_m_NOON; + break; + + case a_f_HEH: + tempc = a_m_HEH; + break; + + case a_f_YEH: + tempc = a_m_YEH; + break; + + /* NOTE: these encodings are multi-positional, no ? + case a_f_LAM_ALEF_MADDA_ABOVE: + case a_f_LAM_ALEF_HAMZA_ABOVE: + case a_f_LAM_ALEF_HAMZA_BELOW: + case a_f_LAM_ALEF: + */ + default: + tempc = 0; + } + + return tempc; +} + +/* + * Change shape - from Combination (2 char) to an Isolated + */ +static int chg_c_laa2i(int hid_c) +{ + int tempc; + + switch (hid_c) { + case a_ALEF_MADDA: + tempc = a_s_LAM_ALEF_MADDA_ABOVE; + break; + + case a_ALEF_HAMZA_ABOVE: + tempc = a_s_LAM_ALEF_HAMZA_ABOVE; + break; + + case a_ALEF_HAMZA_BELOW: + tempc = a_s_LAM_ALEF_HAMZA_BELOW; + break; + + case a_ALEF: + tempc = a_s_LAM_ALEF; + break; + + default: + tempc = 0; + } + + return tempc; +} + +/* + * Change shape - from Combination-Isolated to Final + */ +static int chg_c_laa2f(int hid_c) +{ + int tempc; + + switch (hid_c) { + case a_ALEF_MADDA: + tempc = a_f_LAM_ALEF_MADDA_ABOVE; + break; + + case a_ALEF_HAMZA_ABOVE: + tempc = a_f_LAM_ALEF_HAMZA_ABOVE; + break; + + case a_ALEF_HAMZA_BELOW: + tempc = a_f_LAM_ALEF_HAMZA_BELOW; + break; + + case a_ALEF: + tempc = a_f_LAM_ALEF; + break; + + default: + tempc = 0; + } + + return tempc; +} + +/* + * Do "half-shaping" on character "c". Return zero if no shaping. + */ +static int half_shape(int c) +{ + if (A_is_a(c)) { + return chg_c_a2i(c); + } + + if (A_is_valid(c) && A_is_f(c)) { + return chg_c_f2m(c); + } + return 0; +} + +/* + * Do Arabic shaping on character "c". Returns the shaped character. + * out: "ccp" points to the first byte of the character to be shaped. + * in/out: "c1p" points to the first composing char for "c". + * in: "prev_c" is the previous character (not shaped) + * in: "prev_c1" is the first composing char for the previous char + * (not shaped) + * in: "next_c" is the next character (not shaped). + */ +int arabic_shape(int c, int *ccp, int *c1p, int prev_c, int prev_c1, + int next_c) +{ + int curr_c; + int shape_c; + int curr_laa; + int prev_laa; + + /* Deal only with Arabic character, pass back all others */ + if (!A_is_ok(c)) { + return c; + } + + /* half-shape current and previous character */ + shape_c = half_shape(prev_c); + + /* Save away current character */ + curr_c = c; + + curr_laa = A_firstc_laa(c, *c1p); + prev_laa = A_firstc_laa(prev_c, prev_c1); + + if (curr_laa) { + if (A_is_valid(prev_c) && !A_is_f(shape_c) && !A_is_s(shape_c) && + !prev_laa) { + curr_c = chg_c_laa2f(curr_laa); + } else { + curr_c = chg_c_laa2i(curr_laa); + } + + /* Remove the composing character */ + *c1p = 0; + } else if (!A_is_valid(prev_c) && A_is_valid(next_c)) { + curr_c = chg_c_a2i(c); + } else if (!shape_c || A_is_f(shape_c) || A_is_s(shape_c) || prev_laa) { + curr_c = A_is_valid(next_c) ? chg_c_a2i(c) : chg_c_a2s(c); + } else if (A_is_valid(next_c)) { + curr_c = A_is_iso(c) ? chg_c_a2m(c) : chg_c_i2m(c); + } else if (A_is_valid(prev_c)) { + curr_c = chg_c_a2f(c); + } else { + curr_c = chg_c_a2s(c); + } + + /* Sanity check -- curr_c should, in the future, never be 0. + * We should, in the future, insert a fatal error here. */ + if (curr_c == NUL) { + curr_c = c; + } + + if ((curr_c != c) && (ccp != NULL)) { + char_u buf[MB_MAXBYTES + 1]; + + /* Update the first byte of the character. */ + (*mb_char2bytes)(curr_c, buf); + *ccp = buf[0]; + } + + /* Return the shaped character */ + return curr_c; +} + +/// Check whether we are dealing with Arabic combining characters. +/// Note: these are NOT really composing characters! +/// +/// @param one First character. +/// @param two Character just after "one". +int arabic_combine(int one, int two) +{ + if (one == a_LAM) { + return arabic_maycombine(two); + } + return FALSE; +} + +/// Check whether we are dealing with a character that could be regarded as an +/// Arabic combining character, need to check the character before this. +int arabic_maycombine(int two) +{ + if (p_arshape && !p_tbidi) { + return two == a_ALEF_MADDA + || two == a_ALEF_HAMZA_ABOVE + || two == a_ALEF_HAMZA_BELOW + || two == a_ALEF; + } + return FALSE; +} + +/* + * A_firstc_laa returns first character of LAA combination if it exists + * in: "c" base character + * in: "c1" first composing character + */ +static int A_firstc_laa(int c, int c1) +{ + if ((c1 != NUL) && (c == a_LAM) && !A_is_harakat(c1)) { + return c1; + } + return 0; +} + +/* + * A_is_harakat returns TRUE if 'c' is an Arabic Harakat character + * (harakat/tanween) + */ +static int A_is_harakat(int c) +{ + return c >= a_FATHATAN && c <= a_SUKUN; +} + +/* + * A_is_iso returns TRUE if 'c' is an Arabic ISO-8859-6 character + * (alphabet/number/punctuation) + */ +static int A_is_iso(int c) +{ + return (c >= a_HAMZA && c <= a_GHAIN) || + (c >= a_TATWEEL && c <= a_HAMZA_BELOW) || + c == a_MINI_ALEF; +} + +/* + * A_is_formb returns TRUE if 'c' is an Arabic 10646-1 FormB character + * (alphabet/number/punctuation) + */ +static int A_is_formb(int c) +{ + return (c >= a_s_FATHATAN && c <= a_s_DAMMATAN) || + c == a_s_KASRATAN || + (c >= a_s_FATHA && c <= a_f_LAM_ALEF) || + c == a_BYTE_ORDER_MARK; +} + +/* + * A_is_ok returns TRUE if 'c' is an Arabic 10646 (8859-6 or Form-B) + */ +static int A_is_ok(int c) +{ + return A_is_iso(c) || A_is_formb(c); +} + +/* + * A_is_valid returns TRUE if 'c' is an Arabic 10646 (8859-6 or Form-B) + * with some exceptions/exclusions + */ +static int A_is_valid(int c) +{ + return A_is_ok(c) && !A_is_special(c); +} + +/* + * A_is_special returns TRUE if 'c' is not a special Arabic character. + * Specials don't adhere to most of the rules. + */ +static int A_is_special(int c) +{ + return c == a_HAMZA || c == a_s_HAMZA; +} |