gpt4 book ai didi

javascript - Webpack 构建不适用于爬虫

转载 作者:行者123 更新时间:2023-11-29 20:37:22 25 4
gpt4 key购买 nike

我的构建在浏览器中运行良好(甚至是旧版 v40 < chrome)。到目前为止,它不适用于我尝试过的任何爬虫。奇怪的是,同样的错误只发生在抓取工具(例如 googlebot)中,这使得查明问题变得异常困难。

我试过了

  • 禁用生产构建以查看未缩小的错误,但爬虫随后拒绝加载 js 文件,因为它太大了
  • 在尽可能多的浏览器(IE 除外)中运行网站,它在所有浏览器中都能正常工作。
  • 禁用爬虫预加载(使用 headless chrome 呈现应用的 html)

这是 googlebot 看到的错误:

enter image description here

您可以在 https://wavedistrict.com 自行测试

网络包配置:

const { resolve } = require("path")
const ForkTsCheckerWebpackPlugin = require("fork-ts-checker-webpack-plugin")
const CleanWebpackPlugin = require("clean-webpack-plugin")
const CopyWebpackPlugin = require("copy-webpack-plugin")
const HtmlWebpackPlugin = require("html-webpack-plugin")
const HtmlWebpackInlineSourcePlugin = require("html-webpack-inline-source-plugin")
const MiniCssExtractPlugin = require("mini-css-extract-plugin")
const OptimizeCssAssetsWebpackPlugin = require("optimize-css-assets-webpack-plugin")
const WebpackPwaManifest = require("webpack-pwa-manifest")
const webpackMerge = require("webpack-merge")
const Visualizer = require("webpack-visualizer-plugin")

const isProduction = process.env.NODE_ENV === "production"

/**
* Variable for the project root.
* Change this when moving the configuration files
*/
const projectRoot = resolve(__dirname)
const sourceFolder = resolve(projectRoot, "src")
const tsFolder = resolve(sourceFolder, "ts")
const buildFolder = resolve(projectRoot, "build")
const publicFolder = resolve(projectRoot, "public")
const htmlTemplateFile = resolve(publicFolder, "index.html")
const tsconfigPath = resolve(projectRoot, "tsconfig.json")
const tslintPath = resolve(projectRoot, "tslint.json")

const tsLoader = {
loader: "ts-loader",
options: {
compilerOptions: {
module: "esnext",
target: "es5",
allowSyntheticDefaultImports: true,
},
transpileOnly: true,
configFile: tsconfigPath,
allowTsInNodeModules: true,
},
}

const babelLoader = {
loader: "babel-loader",
}

const workerRule = {
test: /\.worker\.ts$/,
use: {
loader: "worker-loader",
},
}

const babelRule = {
test: /\.(js|ts|tsx)$/,
use: [babelLoader],
}

const sassRule = {
test: /\.scss$/,
use: [
isProduction
? MiniCssExtractPlugin.loader
: {
loader: "style-loader",
options: {
singleton: true,
},
},
{ loader: "css-loader" },
{
loader: "sass-loader",
options: {
data: "@import './ts/modules/core/styles/_.scss';",
includePaths: [sourceFolder],
},
},
],
}

/** @type {import('webpack').Configuration} */
const baseConfig = {
context: projectRoot,

entry: [
"babel-polyfill",
"url-search-params-polyfill",
resolve(tsFolder, "init"),
],

output: {
filename: "js/[name].js",
path: buildFolder,
publicPath: "/",
},

module: {
rules: [workerRule, babelRule, sassRule],
},

resolve: {
modules: ["node_modules"],
extensions: [".js", ".ts", ".tsx", ".scss"],
alias: {
modules: resolve(tsFolder, "modules"),
common: resolve(tsFolder, "common"),
},
mainFields: ["jsnext:main", "module", "main"],
},

plugins: [
new CopyWebpackPlugin([
{
from: publicFolder,
ignore: [htmlTemplateFile],
},
]),
new CleanWebpackPlugin(buildFolder, { root: projectRoot, verbose: false }),
/**new ForkTsCheckerWebpackPlugin({
tslint: tslintPath,
tsconfig: tsconfigPath,
}),**/
],

stats: {
children: false,
entrypoints: false,
modules: false,
},
}

if (process.argv.includes("--stats")) {
if (baseConfig.plugins) {
baseConfig.plugins.push(new Visualizer())
}
}

const devConfig = webpackMerge(baseConfig, {
mode: "development",
plugins: [
new HtmlWebpackPlugin({
template: htmlTemplateFile,
chunksSortMode: "dependency",
}),
],
devtool: "inline-source-map",
devServer: {
hot: false,
historyApiFallback: true,
},
})

const prodConfig = webpackMerge(baseConfig, {
mode: "production",

optimization: {
minimize: true,
nodeEnv: "production",
},

plugins: [
new WebpackPwaManifest({
name: "WaveDistrict",
short_name: "WaveDistrict",
description: "",
background_color: "#091F35",
theme_color: "#00ba8c",
orientation: "any",
icons: [
{
src: resolve(publicFolder, "img/logo.svg"),
sizes: [48, 72, 96, 128, 144, 192, 256, 512],
destination: "icons",
},
{
src: resolve(publicFolder, "img/logo.png"),
sizes: [48, 72, 96, 128, 144, 192, 256, 512],
destination: "icons",
},
],
}),

new MiniCssExtractPlugin({
filename: "css/[name].css",
}),

new OptimizeCssAssetsWebpackPlugin(),

new HtmlWebpackPlugin({
template: htmlTemplateFile,
minify: {
removeComments: true,
collapseWhitespace: true,
removeRedundantAttributes: true,
useShortDoctype: true,
removeEmptyAttributes: true,
removeStyleLinkTypeAttributes: true,
keepClosingSlash: true,
minifyJS: true,
minifyCSS: true,
minifyURLs: true,
},
inject: true,
}),

new HtmlWebpackInlineSourcePlugin(),
],

performance: {
maxAssetSize: 500000,
},

devtool: "source-map",
})

module.exports = isProduction ? prodConfig : devConfig

Babel 配置(需要将 ES6 node_modules 转换为 ES5):

const babelEnv = {
targets: {
chrome: "41", // For googlebot
},
}

/** Keep track of all conflicting dependencies here */
const nonES5Deps = ["qs", "querystring", "query-string", "decko"]

module.exports = function(api) {
api.cache(true)

return {
exclude: [],
include: (path) => {
if (nonES5Deps.some((p) => path.match(p))) {
return true
}
if (path.match(/node_modules/)) return false

return true
},
presets: [
["@babel/preset-env", babelEnv],
"@babel/preset-react",
"@babel/preset-typescript",
],
plugins: [
"@babel/plugin-syntax-dynamic-import",
"@babel/plugin-transform-arrow-functions",
[
"@babel/plugin-proposal-decorators",
{
legacy: true,
},
],
[
"@babel/plugin-proposal-class-properties",
{
loose: true,
},
],
],
}
}

那么这里发生了什么?仅出现在爬虫中的问题如何调试?

最佳答案

我在 friend 的帮助下发现了这个问题。 Googlebot(和其他抓取工具)似乎不支持 AudioContext 对象,因此 undefined 不是函数

修改我的代码以检查它并安全地禁用功能(如果它不存在)已经解决了这个问题。

关于javascript - Webpack 构建不适用于爬虫,我们在Stack Overflow上找到一个类似的问题: https://stackoverflow.com/questions/56435628/

25 4 0
Copyright 2021 - 2024 cfsdn All Rights Reserved 蜀ICP备2022000587号
广告合作:1813099741@qq.com 6ren.com