Terris :
How can I efficiently write a mysql result set to a file in CSV format without loading all of the rows into memory? NodeJS code, please.
Terris :
This example works with npm packages csv-stringify and either mysql or mysql2 and any Node version that supports the async keyword.
const csvstringify = require('csv-stringify');
const fs = require('fs');
const outputStream = fs.createWriteStream('output.csv', {encoding: 'utf8'});
// Capture events on the outputStream before writing begins
const finishedWriting = new Promise((resolve, reject)=>
outputStream.on('finished', resolve).on('error', reject));
const BOM = '\ufeff'; // Microsoft Excel needs this
outputStream.write(BOM);
const connection = __Create a mysql2 Connection object here__
const generator = connection.connection.query('SELECT...');
let recordsProcessed = 0;
try {
await new Promise((resolve, reject) => {
// When using a connection pool, the 'error' connection event is called only
// when enableKeepAlive is true. See:
// https://github.com/sidorares/node-mysql2/issues/677#issuecomment-588530194
// Without this handler, this code will hang if the database connection is
// lost while reading the result set.
connection.on('error', reject);
generator
.on('result', row => ++recordsProcessed) // Counting rows as an example
.stream({highWaterMark: 10})
.on('error', reject)
.on('end', resolve)
.pipe(csvstringify({header: true}))
.pipe(outputStream)
.on('error', error => {
// Handle stream write error
// See also https://github.com/sidorares/node-mysql2/issues/664
// Data is being sent from server to client; without calling destroy, the
// connection will go back to the pool and will be unusable. The
// callback provided to destroy() is never called.
connection.destroy(); // This appears to cause file descriptor leaks
reject(error);
});
});
}
finally {
connection.on('error', error => console.log(error)); // Remove the handler.
// Is there a better way?
}
await finishedWriting;