gpt4 book ai didi

javascript - 如何读取此 Radix Tree 结构以确定下一个字符串的概率?

转载 作者:塔克拉玛干 更新时间:2023-11-02 21:43:30 25 4
gpt4 key购买 nike

在 JavaScript 中,我试图接受给定的用户输入并猜测 3 个最有可能完成用户当前(未完成)键入的单词的单词。猜测是基于用户过去的输入。我正在研究这个 here, in this JSFiddle .

我构建的用于记录用户过去输入的结构是经过修改的 Radix Tree (AKA Patricia Trie) :

输入:“

{
"h": {
"value": "h",
"count": 1,
"followables": {
"e": {
"value": "e",
"count": 1,
"followables": {
"y": {
"value": "y",
"count": 1,
"followables": {}
}
}
}
}
}
}

这个数据结构构建得很完美,我认为这是实现所描述目标的最佳结构。我的问题是读取 Radix Tree 数据以定义给定输入的 3 个最可能单词的函数。例如,在上面的数据中,如果用户输入“h”,猜测函数应该返回这样一个对象:

guess : {
1 : "hey",
2 : "",
3 : ""
}

这是我的代码/进度:

学习 - 获取完整的输入字符串并将组合组织到基数树中(brain):

function learn(message, brain) {
if (message.length == 0) return {}; // or do something else
var ch = message[0]; // get the first character
if (!brain[ch]) { // create new node when not exists
brain[ch] = {
value: ch,
count: 1,
followables: {}
};
} else { // increment count when exist
brain[ch].count += 1;
}
var substr = message.substring(1); // remove first character
if (substr) { // do it for the remaining substring
brain[ch].followables = learn(substr, brain[ch].followables);
} else {
renderData();
}
return brain;
}

一切都完成了。不幸的是,下一个代码,旨在读取数据并猜测用户正在输入的单词,并不好。我在处理对我来说非常复杂的功能时遇到了麻烦。我已将它分成小功能,据我所知这是最佳做法,但恐怕我搞得一团糟,可能会简单得多:

- 获取“学习”的字符串数据并猜测用户可能输入的单词:

function guess(progress, brain) {
console.log("Guessing based on: " + progress);
var guesses = {
0: "",
1: "",
2: ""
}
var firstChar = progress[0];
if (brain[firstChar]) {
var step = brain[firstChar];
for (var i = 0; i < progress.length; i++) {
var char = progress[i];
if (step.followables[char]) {
step = step.followables[char];
if (i == progress.length) {
var guesses = nextStrings(step.followables);
renderGuesses(guesses);
}
} else {
renderGuesses(guesses);
}
}
} else {
renderGuesses(guesses);
}
}

function renderGuesses(guesses) {
console.log(guesses);
$('#guess-1').text(guesses[0]);
$('#guess-2').text(guesses[1]);
$('#guess-3').text(guesses[2]);
}

function nextStrings(followables) {
console.log('Searching for next string...');
var results;
if (followables.length > 0) {
results = chooseRoutes(followables);
} else {
results = {
0: "",
1: "",
2: ""
}
}
console.log(result);
return result;
}

function chooseRoutes(followables) {
var results = {
0: {
value: "",
count: 0
},
1: {
value: "",
count: 0
},
2: {
value: "",
count: 0
}
};
for (var i = 0; i < followables.length; i++) {
var count = followables[i].count;
if (count > results[0].count) {
results[0].value = followStr(followables[i], "");
} else if (count > results[1].count) {
results[1].value = followStr(followables[i], "");
} else if (count > results[2].count) {
results[2].value = followStr(followables[i], "");
}
}
console.log(results);
return results;
}

function followStr(followables, str) {
var guess = {
value: "",
count: 0
};
for (var i = 0; i < followables.length; i++) {
if (followables[i].count > guess.count) {
guess = followables[i];
}
}
followables = guess.followables;
if (guess.value != " ") {
str += guess;
followStr(followables, str);
} else {
console.log(str);
return str;
}
}

旁注 - 虽然在字典上进行模糊字符串搜索是一种更常见的方法,但学习方法是根据用户的写作/消息传递风格定制猜测并支持用户的好方法非标准词汇(“heyy”、“sup”、“:P”、“lol”) - 这些猜测的结果可以与标准字典结果结合(并优先于)。

最佳答案

您用于字典的结构不正确,它应该包含对象数组。例如,在您输入这些词后:

hi
hi
hi
hi
hi
hey
hello
hella

结构应该是:

history: [{
letter: "h",
count: 8,
followables: [{
letter: "e",
count: 3,
followables: [{
letter: "y",
count: 1,
followables: []
}, {
letter: "l",
count: 2,
followables: [{
letter: "l",
count: 2,
followables: [{
letter: "o",
count: 1,
followables: []
}, {
letter: "a",
count: 1,
followables: []
}]
}]
}]
}, {
letter: "i",
count: 5,
followables: []
}]
}]

我对创建和存储历史记录的方式(我会使用 localStorage)不感兴趣。重点是深入挖掘树内部以获得建议的递归函数。这一个获得给定单词的最终 followables:

findChildren: function (node, depth) {
/* Traverse history for current word, there is only one path */
for (k in node) {
if (node[k].letter == app.progress[depth]) {
if (depth + 1 == app.progress.length) {
/* Found it, hope it has followables */
return node[k].followables;
} else {
/* Must go deeper... */
return app.findChildren(node[k].followables, depth + 1);
};
};
};
/* No results */
return false;
}

第二个 (getWord) 创建建议:

countWordsFromNode: function (node) {
for (i in node) {
if (node[i].followables.length) {
app.countWordsFromNode(node[i].followables);
} else {
app.totalInNode++;
};
};
},
getWord: function (node, limit) {
/* First sort by count */
var sorted = node.sort(function (n1, n2) {
return n2.count - n1.count;
});
for (k in sorted) {
app.guesses[app.totalFound].word += sorted[k].letter;
if (sorted[k].followables.length) {
app.totalInNode = 0;
app.countWordsFromNode(sorted[k].followables);
for (m = 1; m < app.totalInNode; m++) {
if ((app.totalFound + m) < limit) {
app.guesses[app.totalFound + m].word += sorted[k].letter;
};
};
app.getWord(sorted[k].followables, limit);
} else {
/* End of word */
app.totalFound++;
};
if (app.totalFound >= limit) {
/* Found all suggestions */
break;
};
};
}

详情请见 this Fiddle ,我不得不删除你的一些代码。很容易将其集成到任何地方,例如您可以设置其他建议字段,当前是:

guesses: [
{
element: $('#guess-1'),
word: ''
},
{
element: $('#guess-2'),
word: ''
},
{
element: $('#guess-3'),
word: ''
}
]

编辑:修复了向右添加字母的错误。

关于javascript - 如何读取此 Radix Tree 结构以确定下一个字符串的概率?,我们在Stack Overflow上找到一个类似的问题: https://stackoverflow.com/questions/28534473/

25 4 0
Copyright 2021 - 2024 cfsdn All Rights Reserved 蜀ICP备2022000587号
广告合作:1813099741@qq.com 6ren.com