gpt4 book ai didi

node.js - Node.js 数据集中的最近邻居

转载 作者:太空宇宙 更新时间:2023-11-03 23:45:12 24 4
gpt4 key购买 nike

我有一个将数据保存到 MongoDB 的 Node.js 应用程序。给定一份文档,我想在数据库中找到最相似的文档。

我的想法是实现某种最近邻算法,将所有记录作为训练序列并返回最相似的文档(包括这两个文档相似程度的某种百分比。)

例如我的数据库中有这些记录...

{ name: "Bill",   age: 10,  pc: "Mac",      ip: "68.23.13.8" }
{ name: "Alice", age: 22, pc: "Windows", ip: "193.186.11.3" }
{ name: "Bob", age: 12, pc: "Windows", ip: "56.89.22.1" }

...我想找到与此最接近的文档

{ name: "Tom", age: 10, pc: "Mac", ip: "68.23.13.10" }
// algorithm returns "Bill", .76

是否有任何 Node 模块/实现可以接受任何类型的对象/参数并返回其最近的邻居?

最佳答案

这是一些示例代码。它假设您可以对每个请求运行搜索。如果要修改它,请确保所有相似性函数返回 0 到 1 之间的数字。

function tokenize(string) {
var tokens = [];
for (var i = 0; i < string.length-1; i++) {
tokens.push(string.substr(i,2));
}

return tokens.sort();
}

function intersect(a, b)
{
var ai=0, bi=0;
var result = new Array();

while( ai < a.length && bi < b.length )
{
if (a[ai] < b[bi] ){ ai++; }
else if (a[ai] > b[bi] ){ bi++; }
else /* they're equal */
{
result.push(a[ai]);
ai++;
bi++;
}
}

return result;
}

function sum(items) {
var sum = 0;
for (var i = 0; i < items.length; i++) {
sum += items[i];
}

return sum;
}

function wordSimilarity(a, b) {
var left = tokenize(a);
var right = tokenize(b);
var middle = intersect(left, right);

return (2*middle.length) / (left.length + right.length);
}

function ipSimilarity(a, b) {
var left = a.split('.');
var right = b.split('.');

var diffs = [];
for (var i = 0; i < 4; i++) {
var diff1 = 255-left[i];
var diff2 = 255-right[i];
var diff = Math.abs(diff2-diff1);

diffs[i] = diff;
}

var distance = sum(diffs)/(255*4);

return 1 - distance;
}

function ageSimilarity(a, b) {
var maxAge = 100;
var diff1 = maxAge-a;
var diff2 = maxAge-b;
var diff = Math.abs(diff2-diff1);
var distance = diff / maxAge;

return 1-distance;
}

function recordSimilarity(a, b) {
var fields = [
{name:'name', measure:wordSimilarity},
{name:'age', measure:ageSimilarity},
{name:'pc', measure:wordSimilarity},
{name:'ip', measure:ipSimilarity}
];

var sum = 0;
for (var i = 0; i < fields.length; i++) {
var field = fields[i];
var name = field.name;
var measure = field.measure;
var sim = measure(a[name], b[name]);

sum += sim;
}

return sum / fields.length;
}

function findMostSimilar(items, query) {
var maxSim = 0;
var result = null;

for (var i = 0; i < items.length; i++) {
var item = items[i];
var sim = recordSimilarity(item, query);

if (sim > maxSim) {
maxSim = sim;
result = item;
}
}

return result
}

var items = [
{ name: "Bill", age: 10, pc: "Mac", ip: "68.23.13.8" },
{ name: "Alice", age: 22, pc: "Windows", ip: "193.186.11.3" },
{ name: "Bob", age: 12, pc: "Windows", ip: "56.89.22.1" }
];

var query = { name: "Tom", age: 10, pc: "Mac", ip: "68.23.13.10" };
var result = findMostSimilar(items, query);

console.log(result);

关于node.js - Node.js 数据集中的最近邻居,我们在Stack Overflow上找到一个类似的问题: https://stackoverflow.com/questions/14327466/

24 4 0
Copyright 2021 - 2024 cfsdn All Rights Reserved 蜀ICP备2022000587号
广告合作:1813099741@qq.com 6ren.com