Command line concatenate PDFs on OS X

A great post at which does what it says on the tin.

He explains how to link to a python script included in OS X since some years ago so as to make concatenating PDFs as easy as typing

pdfconcat -o output.pdf *.pdf

The heavy lifting is all done with calls to OS X Quartz.CoreGraphics module so this isn’t going to work on other platforms, but for the curious it demonstrates how easily you can do such stuff on OS X.

#! /usr/bin/python
# join
#   Joing pages from a a collection of PDF files into a single PDF file.
#   join [--output <file>] [--shuffle] [--verbose]"
#   Parameter:
#   --shuffle
#	Take a page from each PDF input file in turn before taking another from each file.
#	If this option is not specified then all of the pages from a PDF file are appended
#	to the output PDF file before the next input PDF file is processed.
#   --verbose
#   Write information about the doings of this tool to stderr.
import sys
import os
import getopt
import tempfile
import shutil
from CoreFoundation import *
from Quartz.CoreGraphics import *

verbose = False

def createPDFDocumentWithPath(path):
	global verbose
	if verbose:
		print "Creating PDF document from file %s" % (path)
	return CGPDFDocumentCreateWithURL(CFURLCreateFromFileSystemRepresentation(kCFAllocatorDefault, path, len(path), False))

def writePageFromDoc(writeContext, doc, pageNum):

	global verbose
	page = CGPDFDocumentGetPage(doc, pageNum)
	if page:
		mediaBox = CGPDFPageGetBoxRect(page, kCGPDFMediaBox)
		if CGRectIsEmpty(mediaBox):
			mediaBox = None
		CGContextBeginPage(writeContext, mediaBox)
		CGContextDrawPDFPage(writeContext, page)
		if verbose:
			print "Copied page %d from %s" % (pageNum, doc)

def shufflePages(writeContext, docs, maxPages):
	for pageNum in xrange(1, maxPages + 1):
		for doc in docs:
			writePageFromDoc(writeContext, doc, pageNum)
def append(writeContext, docs, maxPages):

	for doc in docs:
		for pageNum in xrange(1, maxPages + 1) :
			writePageFromDoc(writeContext, doc, pageNum)

def main(argv):

	global verbose

	# The PDF context we will draw into to create a new PDF
	writeContext = None

	# If True then generate more verbose information
	source = None
	shuffle = False
	# Parse the command line options
		options, args = getopt.getopt(argv, "o:sv", ["output=", "shuffle", "verbose"])

	except getopt.GetoptError:

	for option, arg in options:

		if option in ("-o", "--output") :
			if verbose:
				print "Setting %s as the destination." % (arg)
			writeContext = CGPDFContextCreateWithURL(CFURLCreateFromFileSystemRepresentation(kCFAllocatorDefault, arg, len(arg), False), None, None)

		elif option in ("-s", "--shuffle") :
			if verbose :
				print "Shuffle pages to the output file."
			shuffle = True

		elif option in ("-v", "--verbose") :
			print "Verbose mode enabled."
			verbose = True

		else :
			print "Unknown argument: %s" % (option)
	if writeContext:
		# create PDFDocuments for all of the files.
		docs = map(createPDFDocumentWithPath, args)
		# find the maximum number of pages.
		maxPages = 0
		for doc in docs:
			if CGPDFDocumentGetNumberOfPages(doc) > maxPages:
				maxPages = CGPDFDocumentGetNumberOfPages(doc)
		if shuffle:
			shufflePages(writeContext, docs, maxPages)
			append(writeContext, docs, maxPages)
		del writeContext
def usage():
	print "Usage: join [--output <file>] [--shuffle] [--verbose]"

if __name__ == "__main__":

A Very Small Editable PDF for Testing

A Small PDF

Ever wanted a small PDF file, or to tweak your own PDF for testing? Below is a very small PDF which you can paste into a text editor and save as MySmallPdf.PDF.
Two things are easy to edit:

  • The page size. Look for the line /MediaBox [0 0 612 144]. Leave the first (0,0) pair and edit the (612,144) to be (width of page,height of page) in 1/72ths of an inch.
  • One line of text. Look for the line (This is a small text editable pdf) Tj and replace the text with your own.

This is particularly helpful if you are testing or itextsharp or some other PDF generator or API, and you want a couple of small identifiable test files to play with.

The Editable PDF

1 0 obj
<< /Type /Catalog
/Outlines 2 0 R
/Pages 3 0 R
2 0 obj
<< /Type /Outlines
/Count 0
3 0 obj
<< /Type /Pages
/Kids [4 0 R]
/Count 1
4 0 obj
<< /Type /Page
/Parent 3 0 R
/MediaBox [0 0 612 144]
/Contents 5 0 R
/Resources << /ProcSet 6 0 R
/Font << /F1 7 0 R >>
5 0 obj
<< /Length 73 >>
/F1 24 Tf
100 100 Td
(This is a small text editable pdf) Tj
6 0 obj
[/PDF /Text]
7 0 obj
<< /Type /Font
/Subtype /Type1
/Name /F1
/BaseFont /Helvetica
/Encoding /MacRomanEncoding
0 8
0000000000 65535 f
0000000009 00000 n
0000000074 00000 n
0000000120 00000 n
0000000179 00000 n
0000000364 00000 n
0000000466 00000 n
0000000496 00000 n
<< /Size 8
/Root 1 0 R

The PDF ISO32000 standard

The PDF reference is available for free from Adobe at The one you want is the copy of the ISO reference at