梗概
- 需要自定义一个转换流的类,将文件流转换为使用指定分割符的字符流
实例
AI生成并与业务结合:
function getSpokenRank(rawTLDPath: string) {
return new Promise<WORD_RANK[]>((resolve, reject) => {
const fs = require('fs');
const stream = require('stream');
// 自定义的转换流,用于处理分隔符逻辑
class SplitStream extends stream.Transform {
constructor(splitter: string) {
super();
this.splitter = splitter;
this.buffer = '';
}
_transform(chunk: { toString: () => any }, encoding: any, callback: () => void) {
this.buffer += chunk.toString();
// 判断是否存在分隔符
const parts = this.buffer.split(this.splitter);
// 处理分隔符前的部分
while (parts.length > 1) {
const part = parts.shift();
this.push(part);
}
// 保存未处理的部分
this.buffer = parts[0];
callback();
}
_flush(callback: () => void) {
// 处理剩余的部分
if (this.buffer.length > 0) {
this.push(this.buffer);
}
callback();
}
}
// 读取文件并创建可读流
const fileStream = fs.createReadStream(rawTLDPath, 'utf8');
// 创建自定义的转换流实例,以</>为分隔符
const splitStream = new SplitStream('</>');
// 将流连接起来
fileStream.pipe(splitStream);
// 处理每个分隔符分割的部分
let rst: WORD_RANK[] = []
splitStream.on('data', (part: any) => {
const str = part.toString() as string
const EOL = '(\r?\n|\r)'
const wordReg = new RegExp(`(?<word>.+)(?:${EOL}<)`)
const matchRst = str.match(wordReg)!
const word = matchRst.groups!['word']
const rankReg = /<h3>Spoken:<\/h3><span class="rank">(?<rank>\d+)</
const rank = str.match(rankReg)?.groups!['rank']
if (rank) rst.push({ word: word, rank: parseInt(rank) })
});
// 在流结束时进行处理
splitStream.on('end', () => {
console.log('done');
resolve(rst)
});
})
}