Technology Apr 17, 2026 · 4 min read

Building an Automated Invoice Processing Pipeline with Node.js

Accounts payable teams spend an average of 3.7 minutes manually processing each invoice. At 200 invoices per month, that's 12+ hours of data entry. Here's how to build an automated pipeline that brings this to under 10 seconds per document. Pipeline Architecture Email/SFTP/API → Recei...

DE
DEV Community
by DevToolsmith
Building an Automated Invoice Processing Pipeline with Node.js

Accounts payable teams spend an average of 3.7 minutes manually processing each invoice. At 200 invoices per month, that's 12+ hours of data entry. Here's how to build an automated pipeline that brings this to under 10 seconds per document.

Pipeline Architecture

Email/SFTP/API → Receive → Extract → Validate → Enrich → Store → Notify

Each stage is independent and can fail gracefully without losing the document.

Stage 1: Document Ingestion

Accept invoices from multiple sources:

const express = require('express');
const multer  = require('multer');
const path    = require('path');

const upload = multer({
  dest: '/tmp/invoices',
  limits: { fileSize: 20 * 1024 * 1024 }, // 20MB
  fileFilter: (req, file, cb) => {
    const allowed = ['.pdf', '.docx', '.xlsx', '.png', '.jpg'];
    const ext     = path.extname(file.originalname).toLowerCase();
    cb(null, allowed.includes(ext));
  },
});

app.post('/api/invoices/upload', upload.array('files', 20), async (req, res) => {
  const jobs = req.files.map(file => ({
    id:       generateJobId(),
    path:     file.path,
    filename: file.originalname,
    status:   'queued',
  }));

  await queue.addBatch(jobs);
  res.json({ jobs: jobs.map(j => ({ id: j.id, status: j.status })) });
});

Stage 2: Extraction

async function extractInvoiceData(job) {
  const formData = new FormData();
  formData.append('file', fs.createReadStream(job.path), job.filename);
  formData.append('fields', JSON.stringify([
    'invoice_number', 'invoice_date', 'due_date',
    'vendor_name', 'vendor_address', 'vendor_tax_id',
    'line_items', 'subtotal', 'tax_amount', 'total_amount',
    'currency', 'payment_terms',
  ]));

  const response = await fetch('https://parseflow.dev/api/extract', {
    method:  'POST',
    headers: { 'Authorization': `Bearer ${process.env.PARSEFLOW_KEY}` },
    body:    formData,
  });

  if (!response.ok) {
    const error = await response.json();
    throw new Error(`Extraction failed: ${error.message}`);
  }

  return response.json();
}

Stage 3: Validation

Never trust extracted data without validation:

function validateInvoice(data) {
  const errors = [];

  // Required fields
  if (!data.invoice_number) errors.push('Missing invoice number');
  if (!data.vendor_name)    errors.push('Missing vendor name');
  if (!data.total_amount)   errors.push('Missing total amount');

  // Math validation
  if (data.line_items?.length > 0) {
    const lineTotal = data.line_items.reduce((sum, item) => sum + item.total, 0);
    const tolerance = 0.02; // 2 cents tolerance for rounding

    if (Math.abs(lineTotal - data.subtotal) > tolerance) {
      errors.push(`Line items sum (${lineTotal}) != subtotal (${data.subtotal})`);
    }
  }

  if (data.subtotal && data.tax_amount && data.total_amount) {
    const expected = data.subtotal + data.tax_amount;
    if (Math.abs(expected - data.total_amount) > 0.02) {
      errors.push(`Subtotal + tax (${expected}) != total (${data.total_amount})`);
    }
  }

  // Duplicate detection
  // (check against your DB for same invoice_number + vendor)

  return { valid: errors.length === 0, errors };
}

Stage 4: Enrichment

Match the vendor to your supplier database:

async function enrichInvoice(data) {
  // Fuzzy match vendor name to known suppliers
  const vendor = await db.suppliers.findBestMatch(data.vendor_name);

  if (vendor) {
    data.supplier_id      = vendor.id;
    data.gl_account       = vendor.default_gl_account;
    data.cost_center      = vendor.default_cost_center;
    data.approver_email   = vendor.approver_email;
    data.payment_method   = vendor.preferred_payment_method;
  } else {
    data.requires_review  = true;
    data.review_reason    = 'Unknown vendor — manual matching required';
  }

  return data;
}

Stage 5: Notifications

async function notifyApprover(invoice) {
  // Only for invoices above threshold or from unknown vendors
  if (invoice.total_amount > 5000 || invoice.requires_review) {
    await emailService.send({
      to:      invoice.approver_email,
      subject: `Invoice approval required: ${invoice.invoice_number}${invoice.vendor_name}`,
      template: 'invoice-approval',
      data:    invoice,
    });
  }
}

Error Handling and Dead Letter Queue

async function processJob(job) {
  try {
    job.status = 'processing';
    const extracted  = await extractInvoiceData(job);
    const validation = validateInvoice(extracted);

    if (!validation.valid) {
      job.status       = 'validation_failed';
      job.errors       = validation.errors;
      await moveToReview(job);
      return;
    }

    const enriched = await enrichInvoice(extracted);
    await db.invoices.create({ ...enriched, job_id: job.id });
    await notifyApprover(enriched);

    job.status = 'completed';

  } catch (err) {
    job.attempts++;
    if (job.attempts >= 3) {
      job.status = 'dead_letter';
      await alertOps(job, err);
    } else {
      job.status       = 'retry';
      job.retry_after  = addMinutes(new Date(), job.attempts * 15);
    }
  }

  await db.jobs.update(job);
}

Results

A pipeline like this, using ParseFlow for the extraction stage, processes a typical invoice in 4-8 seconds with 94%+ field accuracy across variable formats. The validation stage catches the remaining edge cases and routes them to a human reviewer queue rather than silently accepting bad data.

The full pipeline handles PDF, Word, and Excel with the same code path — no special-casing per format.

DE
Source

This article was originally published by DEV Community and written by DevToolsmith.

Read original article on DEV Community
Back to Discover

Reading List