gpt4 book ai didi

javascript - 逐行处理文件,并在输出中保持行顺序

转载 作者:行者123 更新时间:2023-11-30 14:37:27 25 4
gpt4 key购买 nike

所以我正在编写一个 NodeJS 路由,用户上传一个文件(缓冲区),逐行处理(需要调用 RESP API 来处理每一行),然后将结果输出到另一个缓冲区,发送作为下载文件提供给用户。

这是路由代码:

app.post('/tokenizeFile', isLoggedIn, upload.single('file'), function(req, res){
var file = req.file;

//File Validations
if (!validator.validateFile(file)) res.redirect('/?err=invalidFile');

//Process file
tokenizer.tokenizeFile(file, req).then((data)=>{
//res.setHeader('Content-Length', stat.size);
res.setHeader('Content-Type', 'text/plain');
res.setHeader('Content-Disposition', 'attachment; filename=tokenized.txt');
res.write(data, 'binary');
res.end();
}).catch((err)=>{
res.redirect('/?err='+err);
});

});

这是 tokenizer.tokenizeFile 代码:

tokenizeFile: function(file, req){

actionLogger.info(`Request to tokenize ${file.originalname} received. Made by: ${req.user.displayName}`);

return new Promise(function(resolve, reject){
var fileProcessPromise = Promise.resolve();

var lineReader = require('readline').createInterface({
input: require('streamifier').createReadStream(file.buffer)
});

var output = "";

lineReader.on('line', function (line) {
//Tokenize each line

if (!validate.validateLine(line)) return reject(`Invalid line [${line}].`);
fileProcessPromise = Tokenize(line)
.then((data)=>{
output += data + "\\r\\n";
})
.catch((err)=>{
reject(`API didn\'t respond.`);
});
});

lineReader.on('close', () => {
fileProcessPromise.then(()=>resolve(output));
});

});

}

Tokenize 函数返回一个 promise ,因为它是对 RESTful API 的 HTTP 请求。

问题是我需要输出文件保持相同的顺序,使用上面的代码,它的顺序取决于 Tokenize 函数解析的速度。

关于如何实现这一点有什么想法吗?

最佳答案

1)分词文件代码:

tokenizeFile: (file, req) => {

actionLogger.info(`Request to tokenize ${file.originalname} received. Made by: ${req.user.displayName}`);

return new Promise((resolve, reject) => {

const lines = [], responses = [];

const lineReader = require('readline').createInterface({
input: require('streamifier').createReadStream(file.buffer)
});

// 1. read all lines to array
lineReader.on('line', line => {
if(!validate.validateLine(line)) {
return reject(`Invalid line [${line}].`);
}
lines.push(line);
});


lineReader.on('close', async () => {
// 2. process every line sequentially
try {
for(const line of lines) {
const response = await Tokenize(line);
responses.push(response);
}
resolve(responses.join("\n"));
}
.catch(error => {
console.log(error);
reject("API didn't respond");
});
});

});
}

2) 以及请求部分:

app.post(
'/tokenizeFile',
isLoggedIn,
upload.single('file'),
async (req, res) => {
try {
const file = req.file;

if (!validator.validateFile(file)) {
throw new Error('invalidFile');
}

const data = await tokenizer.tokenizeFile(file, req);

res.setHeader('Content-Type', 'text/plain');
res.setHeader('Content-Disposition', 'attachment; filename=tokenized.txt');
res.write(data, 'binary');
res.end();
})
.catch(error => {
res.redirect('/?err='+error);
});
});

关于javascript - 逐行处理文件,并在输出中保持行顺序,我们在Stack Overflow上找到一个类似的问题: https://stackoverflow.com/questions/50201100/

25 4 0
Copyright 2021 - 2024 cfsdn All Rights Reserved 蜀ICP备2022000587号
广告合作:1813099741@qq.com 6ren.com