gpt4 book ai didi

javascript - 如何计算 JavaScript 中 url 列表中的单词出现次数?

转载 作者:行者123 更新时间:2023-11-28 14:08:41 25 4
gpt4 key购买 nike

我在 WordPress 的 JSON 对象中有 url 列表。我想计算 url 第二部分的出现次数。

下面的代码当前获取前缀 https://www.example.co 之后的其余 url。接下来我要做的是计算 url 第二部分的出现次数,即 cat1, cat3, cat2, xmlrpc.php

var urlList = [
{
"URL": "https://www.example.co/cat1/aa/bb/cc",
"Last crawled": "Jun 23, 2019"
},
{
"URL": "https://www.example.co/cat2/aa",
"Last crawled": "Jun 23, 2019"
},
{
"URL": "https://www.example.co/cat1/aa/bb/cc/dd/ee",
"Last crawled": "Jun 23, 2019"
},
{
"URL": "https://www.example.co/cat3/aa/bb/cc/",
"Last crawled": "Jun 23, 2019"
},
{
"URL": "https://www.example.co/cat2/aa/bb",
"Last crawled": "Jun 23, 2019"
},
{
"URL": "https://www.example.co/cat1/aa/bb",
"Last crawled": "Jun 23, 2019"
},
{
"URL": "https://www.example.co/xmlrpc.php",
"Last crawled": "Jun 19, 2019"
}
]

const paths = urlList.map(value => value.URL.replace('https://www.example.co', ''));

//console.log(paths);

paths.forEach(function(item) {
var urlSecondPart = item.split("/")[1];
console.log(urlSecondPart);
});

您知道如何使用当前的 forEach 循环实现这一目标吗?

非常感谢任何帮助。谢谢

最佳答案

使用正则表达式匹配 .co/ 之后的非 /:

var urlList = [
{
"URL": "https://www.example.co/cat1/aa/bb/cc",
"Last crawled": "Jun 23, 2019"
},
{
"URL": "https://www.example.co/cat2/aa",
"Last crawled": "Jun 23, 2019"
},
{
"URL": "https://www.example.co/cat1/aa/bb/cc/dd/ee",
"Last crawled": "Jun 23, 2019"
},
{
"URL": "https://www.example.co/cat3/aa/bb/cc/",
"Last crawled": "Jun 23, 2019"
},
{
"URL": "https://www.example.co/cat2/aa/bb",
"Last crawled": "Jun 23, 2019"
},
{
"URL": "https://www.example.co/cat1/aa/bb",
"Last crawled": "Jun 23, 2019"
},
{
"URL": "https://www.example.co/xmlrpc.php",
"Last crawled": "Jun 19, 2019"
}
]

const paths = urlList.map(
({ URL }) => URL.match(/\.co\/([^\/]+)/)[1]
);
console.log(paths);

const counts = paths.reduce((a, str) => {
a[str] = (a[str] || 0) + 1;
return a;
}, {});
console.log(counts);

在较新的引擎上,您可以使用 lookbehind 而不是提取捕获组:

const paths = urlList.map(
({ URL }) => URL.match(/(?<=\.co\/)[^\/]+/)[0]
);

如果您想跟踪所有使用的完整 URL,不仅要减少计数,还要减少这些完整 URL 的数组:

var urlList = [
{
"URL": "https://www.example.co/cat1/aa/bb/cc",
"Last crawled": "Jun 23, 2019"
},
{
"URL": "https://www.example.co/cat2/aa",
"Last crawled": "Jun 23, 2019"
},
{
"URL": "https://www.example.co/cat1/aa/bb/cc/dd/ee",
"Last crawled": "Jun 23, 2019"
},
{
"URL": "https://www.example.co/cat3/aa/bb/cc/",
"Last crawled": "Jun 23, 2019"
},
{
"URL": "https://www.example.co/cat2/aa/bb",
"Last crawled": "Jun 23, 2019"
},
{
"URL": "https://www.example.co/cat1/aa/bb",
"Last crawled": "Jun 23, 2019"
},
{
"URL": "https://www.example.co/xmlrpc.php",
"Last crawled": "Jun 19, 2019"
}
]

const getSecond = url => url.match(/\.co\/([^\/]+)/)[1];

const counts = urlList.reduce((a, { URL }) => {
const second = getSecond(URL);
if (!a[second]) {
a[second] = { count: 0, fullUrls: [] };
}
a[second].count++;
a[second].fullUrls.push(URL);
return a;
}, {});
console.log(counts);

关于javascript - 如何计算 JavaScript 中 url 列表中的单词出现次数?,我们在Stack Overflow上找到一个类似的问题: https://stackoverflow.com/questions/56844726/

25 4 0
Copyright 2021 - 2024 cfsdn All Rights Reserved 蜀ICP备2022000587号
广告合作:1813099741@qq.com 6ren.com