word intents
Former-commit-id: d14f06c38a12be985ee1e570dc9c59ffe67e4246
This commit is contained in:
@@ -8,6 +8,62 @@ async function Init() {
|
||||
});
|
||||
}
|
||||
|
||||
// probability of trigram/bigram
|
||||
const BigramCommonality = {
|
||||
th: 1.52,
|
||||
he: 1.28,
|
||||
in: 0.94,
|
||||
er: 0.94,
|
||||
an: 0.82,
|
||||
re: 0.68,
|
||||
nd: 0.63,
|
||||
at: 0.59,
|
||||
on: 0.57,
|
||||
nt: 0.56,
|
||||
ha: 0.56,
|
||||
es: 0.56,
|
||||
st: 0.55,
|
||||
en: 0.55,
|
||||
ed: 0.53,
|
||||
to: 0.52,
|
||||
it: 0.50,
|
||||
ou: 0.50,
|
||||
ea: 0.47,
|
||||
hi: 0.46,
|
||||
is: 0.46,
|
||||
or: 0.43,
|
||||
ti: 0.34,
|
||||
as: 0.33,
|
||||
te: 0.27,
|
||||
et: 0.19,
|
||||
ng: 0.18,
|
||||
of: 0.16,
|
||||
al: 0.09,
|
||||
de: 0.09,
|
||||
se: 0.08,
|
||||
le: 0.08,
|
||||
sa: 0.06,
|
||||
si: 0.05,
|
||||
ar: 0.04,
|
||||
ve: 0.04,
|
||||
ra: 0.04,
|
||||
ld: 0.02,
|
||||
ur: 0.02,
|
||||
};
|
||||
|
||||
const TrigramCommonality = {
|
||||
the: 1.81,
|
||||
and: 0.73,
|
||||
tha: 0.33,
|
||||
ent: 0.42,
|
||||
ing: 0.72,
|
||||
ion: 0.42,
|
||||
tio: 0.31,
|
||||
for: 0.34,
|
||||
oft: 0.22,
|
||||
sth: 0.21,
|
||||
};
|
||||
|
||||
function MostProbableAlternateQueries(query) {
|
||||
const words = query.split(' ');
|
||||
|
||||
@@ -24,17 +80,36 @@ function MostProbableAlternateQueries(query) {
|
||||
}
|
||||
|
||||
function MostProbableMissSpelling(word) {
|
||||
return [];
|
||||
// First work out if it's intended to be a word
|
||||
|
||||
console.log(word);
|
||||
return BiGrams(word);
|
||||
}
|
||||
|
||||
function TriGrams(word) {
|
||||
function ConditionalTrigramProbability(token) {
|
||||
|
||||
}
|
||||
|
||||
function ConditionalBigramProbability(token) {
|
||||
|
||||
}
|
||||
|
||||
// returns list of tokens
|
||||
function TriGrams(word) {
|
||||
return NGrams(word, 3);
|
||||
}
|
||||
|
||||
function BiGrams(word) {
|
||||
|
||||
return NGrams(word, 2);
|
||||
}
|
||||
|
||||
function NGrams(word, n) {
|
||||
const tokens = [];
|
||||
for (let i = 0; i < word.length - n + 1; i++) {
|
||||
tokens.push(word.substring(i, i + n));
|
||||
}
|
||||
return tokens;
|
||||
}
|
||||
|
||||
module.exports = {
|
||||
Init,
|
||||
|
||||
Reference in New Issue
Block a user