Skip to content

Commit aeda3bc

Browse files
authored
Merge pull request #273 from wysha-object/fuzz-word-test
feat: fuzz similarity
2 parents fa988be + 5d4fbb4 commit aeda3bc

1 file changed

Lines changed: 18 additions & 10 deletions

File tree

packages/core/src/utils/word-test.ts

Lines changed: 18 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,14 @@ import { get } from "idb-keyval";
22
import { Candidate, Question, Word } from "../types";
33
import { shuffle } from "../utils";
44

5+
const CHARS_CONTAIN_SCORE = 1 << 2;
6+
const CHARS_NOT_CONTAIN_SCORE = -1;
7+
const TRAN_EQUALS_BASE_SCORE = 1 << 16;
8+
const TRANS_CHARS_COMMON_FACTOR = 1;
9+
const WORD_COMMON_FACTOR = 1 << 16;
10+
const WORD_CONTAIN_SCORE = 1 << 20;
11+
const FUZZ_SCORE_BASE = 1 << 18;
12+
513
function getTrans(word: Word): { cn: string, freq: number }[] {
614
let rsMap = new Map<string, { cn: string, freq: number }>();
715
word.trans.forEach(item => {
@@ -24,16 +32,16 @@ function calCommon(str1: string, str2: string): number {
2432
let set2 = new Set(str2.split(''));
2533
for (let char of set1) {
2634
if (set2.has(char)) {
27-
rs += 1 << 2;
35+
rs += CHARS_CONTAIN_SCORE;
2836
} else {
29-
rs--;
37+
rs += CHARS_NOT_CONTAIN_SCORE;
3038
}
3139
}
3240
for (let char of set2) {
3341
if (set1.has(char)) {
34-
rs += 1 << 2;
42+
rs += CHARS_CONTAIN_SCORE;
3543
} else {
36-
rs--;
44+
rs += CHARS_NOT_CONTAIN_SCORE;
3745
}
3846
}
3947
return rs;
@@ -53,26 +61,26 @@ function calSimilarity(word1: Word, word2: Word): number {
5361
if (item2) {
5462
const freq1 = item.freq;
5563
const freq2 = item2.freq;
56-
similarity += (freq1 * freq2) << 16;
64+
similarity += (freq1 * freq2) * TRAN_EQUALS_BASE_SCORE;
5765
}
5866
})
5967

60-
similarity += calCommon(word1Trans.map(item => item.cn).join(''), word2Trans.map((item) => item.cn).join(''));
61-
similarity += calCommon(word1.word, word2.word) << 16;
68+
similarity += calCommon(word1Trans.map(item => item.cn).join(''), word2Trans.map((item) => item.cn).join('')) * TRANS_CHARS_COMMON_FACTOR;
69+
similarity += calCommon(word1.word, word2.word) * WORD_COMMON_FACTOR;
6270
if (
6371
word1.word.includes(word2.word) ||
6472
word2.word.includes(word1.word)
6573
) {
66-
similarity += 1 << 20;
74+
similarity += WORD_CONTAIN_SCORE;
6775
}
6876
if (
6977
word1.relWords.rels.findIndex(rel => rel.words.findIndex(word => word.c === word2.word) !== -1) !== -1 ||
7078
word2.relWords.rels.findIndex(rel => rel.words.findIndex(word => word.c === word1.word) !== -1) !== -1
7179
) {
72-
similarity += 1 << 20;
80+
similarity += WORD_CONTAIN_SCORE;
7381
// console.log('relWords', word1.word, word2.word)
7482
}
75-
return similarity;
83+
return similarity + Math.pow(Math.random(), 2) * FUZZ_SCORE_BASE;
7684
}
7785

7886
export function buildQuestion(word: Word, list: Word[], maxCount: number = 4): Question {

0 commit comments

Comments
 (0)