Node.js can handle tens of thousands of concurrent connections on a single server — but only if you avoid the common performance pitfalls. This guide covers clustering to use all CPU cores, streams to avoid memory overflow, profiling to find bottlenecks, and caching strategies that can reduce response times by 90%.
Understanding the Event Loop
Node.js is single-threaded. Blocking the event loop blocks all requests. The most critical performance rule: never run CPU-intensive operations synchronously in the main thread.
// NEVER do this — blocks the event loop
app.get('/compute', (req, res) => {
// Synchronous CPU-heavy computation blocks ALL requests
let result = 0;
for (let i = 0; i < 1e9; i++) result += i; // 1 billion iterations!
res.json({ result });
});
// DO THIS instead — offload to worker thread
const { Worker, isMainThread, parentPort, workerData } = require('worker_threads');
app.get('/compute', (req, res) => {
const worker = new Worker('./computeWorker.js', {
workerData: { input: req.query.n }
});
worker.on('message', result => res.json({ result }));
worker.on('error', err => res.status(500).json({ error: err.message }));
});Clustering for Multi-Core Performance
Node.js runs on a single CPU core by default. The cluster module creates child processes that share the server port, utilizing all available CPU cores.
// Node.js Cluster Module — Use All CPU Cores
const cluster = require('cluster');
const os = require('os');
const express = require('express');
const NUM_WORKERS = os.cpus().length;
if (cluster.isPrimary) {
console.log(`Primary ${process.pid} is running`);
console.log(`Starting ${NUM_WORKERS} workers...`);
// Fork workers
for (let i = 0; i < NUM_WORKERS; i++) {
cluster.fork();
}
cluster.on('exit', (worker, code, signal) => {
console.log(`Worker ${worker.process.pid} died (${signal || code}). Restarting...`);
cluster.fork(); // Auto-restart crashed workers
});
cluster.on('online', (worker) => {
console.log(`Worker ${worker.process.pid} is online`);
});
} else {
// Worker process — runs the actual server
const app = express();
app.get('/api/users', async (req, res) => {
const users = await db.getUsers();
res.json(users);
});
app.listen(3000, () => {
console.log(`Worker ${process.pid} listening on port 3000`);
});
}
// Alternative: PM2 cluster mode (recommended for production)
// pm2 start server.js -i max # auto-detect CPU count
// pm2 start server.js -i 4 # explicit countStreams for Memory Efficiency
Streams allow processing data piece-by-piece without loading everything into memory. Essential for file processing, HTTP responses, and database cursors.
// Node.js Streams — Memory-Efficient Processing
const fs = require('fs');
const { Transform, pipeline } = require('stream');
const { promisify } = require('util');
const pipelineAsync = promisify(pipeline);
// 1. Stream a large file as HTTP response (no memory buffering)
app.get('/download/large-file', (req, res) => {
const filePath = './large-file.csv';
const stat = fs.statSync(filePath);
res.setHeader('Content-Type', 'text/csv');
res.setHeader('Content-Length', stat.size);
res.setHeader('Content-Disposition', 'attachment; filename=data.csv');
// Pipe file directly to response — never fully in memory
fs.createReadStream(filePath).pipe(res);
});
// 2. Transform stream for CSV processing
class CsvParser extends Transform {
constructor() {
super({ objectMode: true });
this.buffer = '';
this.headers = null;
}
_transform(chunk, encoding, callback) {
this.buffer += chunk.toString();
const lines = this.buffer.split('\n');
this.buffer = lines.pop(); // Keep incomplete line in buffer
for (const line of lines) {
if (!this.headers) {
this.headers = line.split(',');
continue;
}
const values = line.split(',');
const record = {};
this.headers.forEach((h, i) => record[h.trim()] = values[i]?.trim());
this.push(record);
}
callback();
}
}
// 3. Pipeline for reliable error handling
async function processLargeCsvFile(inputPath, outputPath) {
await pipelineAsync(
fs.createReadStream(inputPath),
new CsvParser(),
new Transform({
objectMode: true,
transform(record, enc, cb) {
// Transform each record
record.processed = true;
cb(null, JSON.stringify(record) + '\n');
}
}),
fs.createWriteStream(outputPath)
);
console.log('Processing complete');
}Caching Strategies
Caching is the highest-impact performance optimization. Even a simple in-memory cache can reduce database load by 80-90% for read-heavy applications.
// Caching Strategies for Node.js
// 1. In-Memory LRU Cache
const { LRUCache } = require('lru-cache');
const cache = new LRUCache({
max: 500, // Maximum 500 items
ttl: 5 * 60 * 1000, // 5 minutes TTL
allowStale: true, // Return stale value while refreshing
updateAgeOnGet: true,
});
async function getUser(id) {
const cacheKey = `user:${id}`;
const cached = cache.get(cacheKey);
if (cached) return cached;
const user = await db.findUser(id);
cache.set(cacheKey, user);
return user;
}
// 2. Redis Cache with Stale-While-Revalidate
const Redis = require('ioredis');
const redis = new Redis();
async function getCachedData(key, fetchFn, ttl = 300) {
const [cached, ttlRemaining] = await redis.pipeline()
.get(key)
.ttl(key)
.exec();
if (cached[1]) {
const data = JSON.parse(cached[1]);
// Background refresh when < 60 seconds remaining
if (ttlRemaining[1] < 60) {
fetchFn().then(fresh =>
redis.setex(key, ttl, JSON.stringify(fresh))
);
}
return data;
}
const data = await fetchFn();
await redis.setex(key, ttl, JSON.stringify(data));
return data;
}
// 3. HTTP Response Caching with ETags
app.get('/api/products', async (req, res) => {
const products = await getProducts();
const etag = require('crypto')
.createHash('md5')
.update(JSON.stringify(products))
.digest('hex');
if (req.headers['if-none-match'] === etag) {
return res.status(304).end();
}
res.setHeader('ETag', etag);
res.setHeader('Cache-Control', 'public, max-age=60, stale-while-revalidate=300');
res.json(products);
});Frequently Asked Questions
How many worker threads or cluster workers should I create?
For cluster (multi-process): create one worker per CPU core, os.cpus().length workers. For worker_threads (CPU-intensive tasks): create a pool of threads equal to CPU cores minus 1 (to leave one for the event loop). Too many threads/processes causes context switching overhead.
When should I use streams vs loading data into memory?
Use streams when: processing files larger than 10MB, piping data between sources (file to HTTP response), processing data that arrives incrementally, or when you need backpressure control. Load into memory only when: you need random access, data fits comfortably (under 100MB), or you need to do complex transformations on the entire dataset.
What is the --inspect flag and how do I use it?
The --inspect flag starts Node.js with the V8 inspector protocol enabled. Open chrome://inspect in Chrome to attach DevTools to the Node.js process. You get the full Chrome DevTools including the Performance profiler, Memory profiler, and CPU profiler. Use --inspect-brk to break on the first line (useful for startup profiling).
Why is my Node.js app using so much memory?
Common causes: (1) Memory leaks — event listeners not removed, closures holding references, (2) Caching without eviction policies, (3) Buffer misuse — creating buffers but not releasing them, (4) Large in-memory datasets instead of streaming. Use the --max-old-space-size flag to increase V8 heap size, but also profile memory with Chrome DevTools to find the actual cause.