Skip to content

Commit

Permalink
Merge pull request #25 from amerharb/fix-medial-yeh
Browse files Browse the repository at this point in the history
Fix medial yeh
  • Loading branch information
AbdelrahmanBayoumi authored Jan 18, 2024
2 parents 9434361 + b5dfa9c commit addf8c4
Show file tree
Hide file tree
Showing 5 changed files with 119 additions and 25 deletions.
16 changes: 8 additions & 8 deletions src/constants/arabic-letters.ts
Original file line number Diff line number Diff line change
Expand Up @@ -5,10 +5,10 @@ export const ARABIC_DOTLESS_DICT: { [key: string]: string } = {
آ: 'ا',
ٱ: 'ا',
ء: '',
ب: 'ٮ',
پ: 'ٮ',
ت: 'ٮ',
ث: 'ٮ',
ب: '\u066E', // Dotless Beh in all four contextual forms.
پ: '\u066E',
ت: '\u066E',
ث: '\u066E',
ج: 'ح',
چ: 'ح',
خ: 'ح',
Expand All @@ -33,14 +33,14 @@ export const ARABIC_DOTLESS_DICT: { [key: string]: string } = {
گ: 'ک',
ل: 'ل',
م: 'م',
ن: 'ں',
ن: '\u06BA', // Dotless Noon in all four contextual forms, however most fonts shows dots in initial and medial forms
ه: 'ه',
و: 'و',
ؤ: 'و',
ة: 'ه',
ى: 'ى',
ي: 'ى',
ئ: 'ى',
ى: '\u0649', // Dotless Yeh in all four contextual forms, however some fonts shows final form in all cases
ي: '\u0649',
ئ: '\u0649',
};

export const STANDARD_LETTERS: string[] = [
Expand Down
3 changes: 3 additions & 0 deletions src/index.ts
Original file line number Diff line number Diff line change
@@ -1,9 +1,12 @@
import * as constants from './constants';
import * as script from './scripts';
import * as utils from './utils';
import { OldArabicOptions } from './options';

export const ArabicServices = {
constants,
utils,
...script,
};

export type { OldArabicOptions };
16 changes: 16 additions & 0 deletions src/options.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
export type OldArabicOptions = {
replaceMidNoonWithBah?: boolean;
replaceMidYahWithBah?: boolean;
};

const defaultOldArabicOptions: OldArabicOptions = {
replaceMidNoonWithBah: true,
replaceMidYahWithBah: false,
};

export function fillDefaultOptions(options: OldArabicOptions): OldArabicOptions {
return {
...defaultOldArabicOptions,
...options,
};
}
37 changes: 23 additions & 14 deletions src/scripts/scripts.ts
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@ import {
YAA,
} from '../constants/arabic-letters';
import { setCharAt, similarityScore } from '../utils';
import { fillDefaultOptions, type OldArabicOptions } from '../options';

/**
* Remove all tashkeel from text
Expand All @@ -30,32 +31,40 @@ export function removeTashkeel(text: string): string {
/**
* Remove all dots & tashkeel from text
* @param sentence string to convert to old arabic
* @param option
* @returns string in old arabic
* @example
* Input: "الخَيْلُ وَاللّيْلُ وَالبَيْداءُ تَعرِفُني"
* Output: "الحىل واللىل والٮىدا ٮعرڡٮى"
*/
export function toOldArabic(sentence: string): string {
export function toOldArabic(sentence: string, option: OldArabicOptions = {}): string {
const { replaceMidNoonWithBah, replaceMidYahWithBah } = fillDefaultOptions(option);
sentence = removeTashkeel(sentence.trim());
let newSentence = '';
for (let letter = 0; letter < sentence.length; letter++) {
// if letter is not Arabic letter => append to newSentence
if (!ARABIC_DOTLESS_DICT.hasOwnProperty(sentence[letter])) {
newSentence += sentence[letter];
} else {
// letter is Arabic letter => replace it with its corresponding dotless letter
newSentence += ARABIC_DOTLESS_DICT[sentence[letter]];
// Handle 'ن' Issue
if (sentence[letter] == 'ن') {
const nextLetter = letter + 1;
// if 'ن' is not last character replace it with 'ب' corresponding dotless letter => 'ٮ'
if (nextLetter < sentence.length) {
let temp = newSentence.substring(0, newSentence.length - 1);
if (ARABIC_DOTLESS_DICT.hasOwnProperty(sentence[nextLetter]) || sentence[nextLetter] == 'ـ') {
temp += ARABIC_DOTLESS_DICT['ب'];
newSentence = temp;
}
}
if (
// Handle 'ن' Issue
replaceMidNoonWithBah &&
sentence[letter] == 'ن' &&
letter + 1 < sentence.length &&
(ARABIC_DOTLESS_DICT.hasOwnProperty(sentence[letter + 1]) || sentence[letter + 1] == 'ـ')
) {
newSentence += ARABIC_DOTLESS_DICT['ب'];
} else if (
// Handle 'ي' Issue
replaceMidYahWithBah &&
sentence[letter] == 'ي' &&
letter + 1 < sentence.length &&
(ARABIC_DOTLESS_DICT.hasOwnProperty(sentence[letter + 1]) || sentence[letter + 1] == 'ـ')
) {
newSentence += ARABIC_DOTLESS_DICT['ب'];
} else {
// if letter is Arabic letter => append corresponding dotless letter to newSentence
newSentence += ARABIC_DOTLESS_DICT[sentence[letter]];
}
}
}
Expand Down
72 changes: 69 additions & 3 deletions tests/scripts.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,14 @@ describe('#toOldArabic()', () => {
const expected = `ٮٮں`;
expect(actual).toBe(expected);
});
it('should convert ن to ں in all case', () => {
const text = `ننن`;
const actual = ArabicServices.toOldArabic(text, { replaceMidNoonWithBah: false });
const expected = `ںںں`;
const expectedUnicode = '\u06BA\u06BA\u06BA';
expect(actual).toBe(expected);
expect(actual).toBe(expectedUnicode);
});
});

describe('Test Tatweel with ن letter', () => {
Expand All @@ -37,6 +45,52 @@ describe('#toOldArabic()', () => {
const expected = `ٮــٮــں`;
expect(actual).toBe(expected);
});
it('should convert ن to ں last letter in all cases', () => {
const text = `نــنــن`;
const actual = ArabicServices.toOldArabic(text, { replaceMidNoonWithBah: false });
const expected = `ںــںــں`;
const expectedUnicode = '\u06BA\u0640\u0640\u06BA\u0640\u0640\u06BA';
expect(actual).toBe(expected);
expect(actual).toBe(expectedUnicode);
});
});

describe('Test ي letter', () => {
it('should convert ي to ى last letter, else convert it to ٮ', () => {
const text = `ييي`;
const actual = ArabicServices.toOldArabic(text, { replaceMidYahWithBah : true });
const expected = `ٮٮى`;
const expectedUnicode = '\u066E\u066E\u0649';
expect(actual).toBe(expected);
expect(actual).toBe(expectedUnicode);
});
it('should convert ي to ى in all case (default)', () => {
const text = `ييي`;
const actual = ArabicServices.toOldArabic(text, { replaceMidNoonWithBah: false });
const expected = `ىىى`;
const expectedUnicode = '\u0649\u0649\u0649';
expect(actual).toBe(expected);
expect(actual).toBe(expectedUnicode);
});
});

describe('Test Tatweel with ي letter', () => {
it('should convert ي to ى last letter, else convert it to ٮ', () => {
const text = `يــيــي`;
const actual = ArabicServices.toOldArabic(text, { replaceMidYahWithBah : true });
const expected = `ٮــٮــى`;
const expectedUnicode = '\u066E\u0640\u0640\u066E\u0640\u0640\u0649';
expect(actual).toBe(expected);
expect(actual).toBe(expectedUnicode);
});
it('should convert ي to ں last letter in all cases (default)', () => {
const text = `يــيــي`;
const actual = ArabicServices.toOldArabic(text);
const expected = `ىــىــى`;
const expectedUnicode = '\u0649\u0640\u0640\u0649\u0640\u0640\u0649';
expect(actual).toBe(expected);
expect(actual).toBe(expectedUnicode);
});
});

describe('Test with Poetry Text', () => {
Expand All @@ -46,13 +100,25 @@ describe('#toOldArabic()', () => {
const expected = `الحىـل واللىـل والٮىـدا ٮعرڡٮى والسىڡ والرمح والٯرطاس والٯلـم`;
expect(actual).toBe(expected);
});
it('should remove all dots & tashkeel from Poetry text, with option replace Yeh with dotless Bah', () => {
const text = `الخَيْـلُ وَاللّيْـلُ وَالبَيْـداءُ تَعرِفُني وَالسّيفُ وَالرّمحُ والقرْطاسُ وَالقَلَـمُ`;
const actual = ArabicServices.toOldArabic(text, { replaceMidYahWithBah: true });
const expected = `الحٮـل واللٮـل والٮٮـدا ٮعرڡٮى والسٮڡ والرمح والٯرطاس والٯلـم`;
expect(actual).toBe(expected);
});
});

describe('Test with Quran Text', () => {
it('should remove tashkeel from Quran text', () => {
it('should remove all dots & tashkeel from Quran text', () => {
const text = `وَقَالُواْ ٱلۡحَمۡدُ لِلَّهِ ٱلَّذِيٓ أَذۡهَبَ عَنَّا ٱلۡحَزَنَۖ إِنَّ رَبَّنَا لَغَفُورٞ شَكُورٌ`;
const actual = ArabicServices.removeTashkeel(text);
const expected = `وقالوا الحمد لله الذي أذهب عنا الحزن إن ربنا لغفور شكور`;
const actual = ArabicServices.toOldArabic(text);
const expected = `وٯالوا الحمد لله الدى ادهٮ عٮا الحرں اں رٮٮا لعڡور سکور`;
expect(actual).toBe(expected);
});
it('should remove all dots & tashkeel from Quran text', () => {
const text = `وَقَالُواْ ٱلۡحَمۡدُ لِلَّهِ ٱلَّذِيٓ أَذۡهَبَ عَنَّا ٱلۡحَزَنَۖ إِنَّ رَبَّنَا لَغَفُورٞ شَكُورٌ`;
const actual = ArabicServices.toOldArabic(text, { replaceMidNoonWithBah: false });
const expected = `وٯالوا الحمد لله الدى ادهٮ عںا الحرں اں رٮںا لعڡور سکور`;
expect(actual).toBe(expected);
});
});
Expand Down

0 comments on commit addf8c4

Please sign in to comment.