gpt4 book ai didi

javascript - Node.js 进程在中间退出,没有错误(使用流)

转载 作者:太空宇宙 更新时间:2023-11-04 01:39:29 25 4
gpt4 key购买 nike

我正在编写一个 Lambda 函数,该函数给出 S3 上的文本文件列表,并将它们连接在一起,然后压缩生成的文件。由于某种原因,该函数在进程中间崩溃,没有错误。

发送到 Lambda 函数的负载如下所示:

{
"sourceFiles": [
"s3://bucket/largefile1.txt",
"s3://bucket/largefile2.txt"
],
"destinationFile": "s3://bucket/concat.zip",
"compress": true,
"omitHeader": false,
"preserveSourceFiles": true
}

该功能完全正常工作的场景:

  1. 这两个文件都很小,并且compress === false
  2. 两个文件都很小,并且compress === true
  3. 两个文件都很大,并且compress === false

如果我尝试让它压缩两个大文件,它会在中间退出。连接过程本身工作正常,但当它尝试使用 zip-stream 将流添加到存档时,它会失败。

这两个大文件总共有 483,833 字节。当 Lambda 函数失败时,它会读取 290,229 或 306,589 字节(随机),然后退出。

这是函数的主要入口点:

const packer = require('zip-stream');
const S3 = require('aws-sdk/clients/s3');
const s3 = new S3({ apiVersion: '2006-03-01' });
const { concatCsvFiles } = require('./csv');
const { s3UrlToParts } = require('./utils');

function addToZip(archive, stream, options) {
return new Promise((resolve, reject) => {
archive.entry(stream, options, (err, entry) => {
console.log('entry done', entry);
if (err) reject(err);
resolve(entry);
});
});
}

export const handler = async event => {
/**
* concatCsvFiles returns a readable stream to pass to either the archiver or
* s3.upload.
*/
let bytesRead = 0;

try {
const stream = await concatCsvFiles(event.sourceFiles, {
omitHeader: event.omitHeader,
});
stream.on('data', chunk => {
bytesRead += chunk.length;
console.log('read', bytesRead, 'bytes so far');
});
stream.on('end', () => {
console.log('this is never called :(');
});
const dest = s3UrlToParts(event.destinationFile);
let archive;

if (event.compress) {
archive = new packer();

await addToZip(archive, stream, { name: 'concat.csv' });
archive.finalize();
}

console.log('uploading');
await s3
.upload({
Body: event.compress ? archive : stream,
Bucket: dest.bucket,
Key: dest.key,
})
.promise();

console.log('done uploading');

if (!event.preserveSourceFiles) {
const s3Objects = event.sourceFiles.map(s3Url => {
const { bucket, key } = s3UrlToParts(s3Url);

return {
bucket,
key,
};
});

await s3
.deleteObjects({
Bucket: s3Objects[0].bucket,
Delete: {
Objects: s3Objects.map(s3Obj => ({ Key: s3Obj.key })),
},
})
.promise();
}

console.log('## Never gets here');

// return {
// newFile: event.destinationFile,
// };
} catch (e) {
if (e.code) {
throw new Error(e.code);
}

throw e;
}
};

这是串联代码:

import MultiStream from 'multistream';
import { Readable } from 'stream';
import S3 from 'aws-sdk/clients/s3';
import { s3UrlToParts } from './utils';

const s3 = new S3({ apiVersion: '2006-03-01' });

/**
* Takes an array of S3 URLs and returns a readable stream of the concatenated results
* @param {string[]} s3Urls Array of S3 URLs
* @param {object} options Options
* @param {boolean} options.omitHeader Omit the header from the final output
*/
export async function concatCsvFiles(s3Urls, options = {}) {
// Get the header so we can use the length to set an offset in grabbing files
const firstFile = s3Urls[0];
const file = s3UrlToParts(firstFile);
const data = await s3
.getObject({
Bucket: file.bucket,
Key: file.key,
Range: 'bytes 0-512', // first 512 bytes is pretty safe for header size
})
.promise();
const streams = [];
const [header] = data.Body.toString().split('\n');

for (const s3Url of s3Urls) {
const { bucket, key } = s3UrlToParts(s3Url);

const stream = s3
.getObject({
Bucket: bucket,
Key: key,
Range: `bytes=${header.length + 1}-`, // +1 for newline char
})
.createReadStream();
streams.push(stream);
}

if (!options.omitHeader) {
const headerStream = new Readable();
headerStream.push(header + '\n');
headerStream.push(null);
streams.unshift(headerStream);
}

const combinedStream = new MultiStream(streams);
return combinedStream;
}

最佳答案

明白了。问题实际上出在 zip-stream 库上。显然它不适用于 S3 + 流媒体。我尝试了 yazl 并且它工作得很好。

关于javascript - Node.js 进程在中间退出,没有错误(使用流),我们在Stack Overflow上找到一个类似的问题: https://stackoverflow.com/questions/53385050/

25 4 0
Copyright 2021 - 2024 cfsdn All Rights Reserved 蜀ICP备2022000587号
广告合作:1813099741@qq.com 6ren.com