Skip to main content

Extract text from PDF page

You can extract text from the PDF page objects using the getText method of the PDFiumPage object

import { PDFiumLibrary } from "@hyzyla/pdfium";
import { promises as fs } from 'fs';

async function main() {
const buff = await fs.readFile('document.pdf');

// Initialize the library
const library = await PDFiumLibrary.init();

// Load the document from the buffer
const document = await library.loadDocument(buff);

// Extract text from all pages
for (const page of document.pages()) {
console.log(`Page ${page.number + 1}:`);
const text = page.getText();
console.log(text);
console.log('-------------------');
}

// Save text from a specific page to a file
const page = document.getPage(0);
const text = page.getText();
await fs.writeFile('page1.txt', text);

// Do not forget to destroy the document and the library
// when you are done.
document.destroy();
library.destroy();
}

main();