Skip to content

Commit

Permalink
improve-boundry
Browse files Browse the repository at this point in the history
  • Loading branch information
uriva committed Jan 31, 2024
1 parent e3af275 commit 39b7a2d
Show file tree
Hide file tree
Showing 4 changed files with 35 additions and 28 deletions.
22 changes: 18 additions & 4 deletions .vscode/settings.json
Original file line number Diff line number Diff line change
@@ -1,5 +1,19 @@
{
"deno.enable": true,
"deno.lint": true,
"deno.unstable": true
}
"deno.lint": true,
"deno.enable": true,
"deno.unstable": true,
"search.exclude": {
"deno.lock": true,
"dist/**": true
},
"editor.defaultFormatter": "denoland.vscode-deno",
"[typescript]": {
"editor.defaultFormatter": "denoland.vscode-deno"
},
"[markdown]": {
"editor.defaultFormatter": "denoland.vscode-deno"
},
"[github-actions-workflow]": {
"editor.defaultFormatter": "esbenp.prettier-vscode"
}
}
4 changes: 1 addition & 3 deletions build_npm.ts
Original file line number Diff line number Diff line change
Expand Up @@ -24,9 +24,7 @@ await build({
type: "git",
url: "git+https://github.com/uriva/silly-nlp.git",
},
bugs: {
url: "https://github.com/uriva/silly-nlp/issues",
},
bugs: { url: "https://github.com/uriva/silly-nlp/issues" },
},
postBuild() {
Deno.copyFileSync("./LICENSE", outDir + "/LICENSE");
Expand Down
10 changes: 4 additions & 6 deletions src/index.test.ts
Original file line number Diff line number Diff line change
@@ -1,14 +1,13 @@
import { sideLog } from "https://deno.land/x/gamla@43.0.0/src/debug.ts";
import { regexpTimes } from "./index.ts";
import { matchesRegexp } from "./index.ts";
import {
approximateSemanticEquality,
capitalizedPrefix,
capitalizedSuffix,
cleanSpeakers,
matchesRegexp,
ngramsOfAtLeastNWords,
prefixesWithSuffix,
quotedTexts,
regexpTimes,
simplify,
someKewyordMatches,
suffixesWithPrefix,
Expand All @@ -23,7 +22,7 @@ const testFn =
<F extends Func>(name: string, f: F) =>
(cases: [Parameters<F>, ReturnType<F>][]) =>
cases.forEach(([args, result]) =>
Deno.test(name, () => assertEquals(f(...args), result)),
Deno.test(name, () => assertEquals(f(...args), result))
);

const testUnaryFn =
Expand Down Expand Up @@ -66,8 +65,7 @@ testFn(
Deno.test("suffixesWithPrefix", () =>
assertEquals(suffixesWithPrefix(/from\s+/gi, "from the matrix"), [
"the matrix",
]),
);
]));

Deno.test("capitalizedSuffix", () => {
assertEquals(
Expand Down
27 changes: 12 additions & 15 deletions src/index.ts
Original file line number Diff line number Diff line change
Expand Up @@ -101,8 +101,7 @@ export const suffixesWithPrefix = (regex: RegExp, input: string) =>
regExpLocations(regex, input).map(({ end }) => input.slice(end));

export const majority =
(equivalence: (str: string) => string) =>
(elements: string[]): string => {
(equivalence: (str: string) => string) => (elements: string[]): string => {
const counts: Record<string, number> = {};
const original: Record<string, string> = {};
for (const element of elements) {
Expand Down Expand Up @@ -194,17 +193,15 @@ export const simplify: (x: string) => string = pipe(
const allEnglishWordsAsSet = new Set(englishWords);

const fixMissingSpaceInOneWord = (x: string) =>
allEnglishWordsAsSet.has(x)
? x
: letIn(
range(1, x.length - 1).find(
(index) =>
allEnglishWordsAsSet.has(x.slice(0, index)) &&
allEnglishWordsAsSet.has(x.slice(index)),
),
(location) =>
location ? [x.slice(0, location), x.slice(location)].join(" ") : x,
);
allEnglishWordsAsSet.has(x) ? x : letIn(
range(1, x.length - 1).find(
(index) =>
allEnglishWordsAsSet.has(x.slice(0, index)) &&
allEnglishWordsAsSet.has(x.slice(index)),
),
(location) =>
location ? [x.slice(0, location), x.slice(location)].join(" ") : x,
);

const missingSpace = (x: string) =>
x.split(/\s/).map(fixMissingSpaceInOneWord).join(" ");
Expand Down Expand Up @@ -276,7 +273,7 @@ export const selectionGroup = (x: RegExp) =>

const bracketIfNeeded = (s: string) =>
(s.startsWith("(") && s.endsWith(")")) ||
(s.startsWith("[") && s.endsWith("]"))
(s.startsWith("[") && s.endsWith("]"))
? s
: `(?:${s})`;

Expand Down Expand Up @@ -312,7 +309,7 @@ const personName = [

const hyphen = /[-]/;

const boundry = [/[@.-\s:/בלה[\]?&%$#=*,!()]/, /^/, /$/].reduce(regExpOr); // \b doesn't work for non ascii
const boundry = [/[_@.-\s:/בלה[\]?&%$#=*,!()]/, /^/, /$/].reduce(regExpOr); // \b doesn't work for non ascii

const speaker = [optional(hyphen), personName, /\s?:/, boundry].reduce(
concatRegexp,
Expand Down

0 comments on commit 39b7a2d

Please sign in to comment.