Merging a folder of scanned receipts, photos, or screenshots into a single PDF is a common task with a surprisingly tricky edge: images come in different sizes, orientations, and aspect ratios. The simple Pillow approach uses each image as a PDF page at its native resolution — fine for uniform screenshots but awkward for mixed-size images. This guide covers both the quick approach and the professional approach with consistent A4 pages.
TL;DR
- Quickest: Pillow
save_all=True— each image becomes one page at native resolution - Consistent page size: reportlab or fpdf2 — scale images to fit A4/Letter with margins
- Mixed formats (HEIC + JPG + PNG): load with Pillow, convert all to RGB, then merge
- API: POST individual images to convert to PDF, then merge with pypdf
Pillow: quick merge (native resolution)
pip install Pillow
from PIL import Image, ImageOps
from pathlib import Path
def merge_images_to_pdf(image_paths: list[str], out_pdf: str) -> int:
"""
Merge a list of image files into a single PDF.
Each image becomes one page at its native resolution.
"""
pages = []
for p in image_paths:
img = Image.open(p)
img = ImageOps.exif_transpose(img) # fix phone photo rotation
if img.mode in ("RGBA", "P", "LA"):
bg = Image.new("RGB", img.size, (255, 255, 255))
if img.mode == "RGBA":
bg.paste(img, mask=img.split()[3])
else:
bg.paste(img.convert("RGBA"), mask=img.convert("RGBA").split()[3])
img = bg
elif img.mode != "RGB":
img = img.convert("RGB")
pages.append(img)
if not pages:
raise ValueError("No images to merge")
pages[0].save(out_pdf, save_all=True, append_images=pages[1:])
return len(pages)
# Merge all images in a folder (sorted alphabetically)
folder = Path("./receipts")
image_files = sorted(
[p for p in folder.iterdir() if p.suffix.lower() in (".jpg", ".jpeg", ".png", ".bmp", ".tiff")]
)
count = merge_images_to_pdf([str(p) for p in image_files], "receipts.pdf")
print(f"Merged {count} images into receipts.pdf")
A4-consistent pages with reportlab
For professional documents where all pages should be A4 (or Letter) with consistent margins:
pip install reportlab Pillow
from reportlab.pdfgen import canvas
from reportlab.lib.pagesizes import A4, LETTER
from PIL import Image, ImageOps
from pathlib import Path
import io
A4_W, A4_H = A4 # 595.27 x 841.89 points (1pt = 1/72 inch)
MARGIN = 40 # points
def merge_to_a4_pdf(image_paths: list[str], out_pdf: str, pagesize=A4) -> int:
w, h = pagesize
content_w = w - 2 * MARGIN
content_h = h - 2 * MARGIN
c = canvas.Canvas(out_pdf, pagesize=pagesize)
count = 0
for path in image_paths:
img = Image.open(path)
img = ImageOps.exif_transpose(img)
if img.mode != "RGB":
bg = Image.new("RGB", img.size, (255, 255, 255))
if img.mode == "RGBA":
bg.paste(img, mask=img.split()[3])
else:
bg.paste(img.convert("RGB"))
img = bg
# Scale to fit within content area, maintain aspect ratio
iw, ih = img.size
ratio = min(content_w / iw, content_h / ih)
draw_w = iw * ratio
draw_h = ih * ratio
# Center on page
x = MARGIN + (content_w - draw_w) / 2
y = MARGIN + (content_h - draw_h) / 2
# Write image to temp bytes
buf = io.BytesIO()
img.save(buf, "JPEG", quality=88)
buf.seek(0)
c.drawImage(buf, x, y, draw_w, draw_h)
c.showPage()
count += 1
c.save()
return count
images = sorted(Path("./scans").glob("*.jpg"))
merge_to_a4_pdf([str(p) for p in images], "invoice-package.pdf")
ChangeThisFile API + pypdf merge
# Convert individual images to PDF
curl -X POST https://changethisfile.com/v1/convert \
-H "Authorization: Bearer ctf_sk_your_key_here" \
-F "file=@image1.jpg" -F "target=pdf" --output p1.pdf
curl -X POST https://changethisfile.com/v1/convert \
-H "Authorization: Bearer ctf_sk_your_key_here" \
-F "file=@image2.jpg" -F "target=pdf" --output p2.pdf
# Then merge locally with pypdf
pip install pypdf
import requests
from pypdf import PdfWriter
from pathlib import Path
import tempfile
API_KEY = "ctf_sk_your_key_here"
def images_to_pdf_via_api(image_paths: list[str], out_pdf: str) -> int:
writer = PdfWriter()
with tempfile.TemporaryDirectory() as tmp:
for i, img_path in enumerate(image_paths):
with open(img_path, "rb") as f:
resp = requests.post(
"https://changethisfile.com/v1/convert",
headers={"Authorization": f"Bearer {API_KEY}"},
files={"file": f},
data={"target": "pdf"},
timeout=30,
)
resp.raise_for_status()
tmp_pdf = Path(tmp) / f"page_{i:03d}.pdf"
tmp_pdf.write_bytes(resp.content)
writer.append(str(tmp_pdf))
with open(out_pdf, "wb") as f:
writer.write(f)
return len(image_paths)
Edge cases and gotchas
- Page ordering. Sort by filename or pass an explicit ordered list. alphabetical sort works for zero-padded numbers (001.jpg, 002.jpg). File modification time is unreliable for ordering after copying.
- HEIC images. Pillow doesn't read HEIC natively — use pyheif first (see bulk-convert-heic-to-jpg guide) or convert via API before merging.
- Very large images (4K+). A 4000×3000 image in a PDF inflates it significantly. Resize to max 2048px on the longest side before merging unless you specifically need archival quality.
- Multi-page TIFFs. Some TIFF files contain multiple pages (common for fax output).
ImageSequence.Iteratorhandles multi-page TIFFs — each TIFF page becomes a PDF page. - Image DPI metadata and PDF page size. Pillow uses image DPI metadata to set PDF page size. A 300 DPI 3000×2100px image becomes a 10×7 inch PDF page. For consistent A4 output, use the reportlab approach which explicitly sets the page size.
Processing scan batches
# Merge scans from multiple subfolders, one PDF per folder
from pathlib import Path
base = Path("./scan-archive")
for folder in sorted(base.iterdir()):
if not folder.is_dir(): continue
images = sorted([p for p in folder.iterdir() if p.suffix.lower() in (".jpg", ".png", ".tiff")])
if not images:
print(f"Skipping {folder.name}: no images")
continue
out = folder.parent / f"{folder.name}.pdf"
count = merge_images_to_pdf([str(p) for p in images], str(out))
size_mb = out.stat().st_size / 1e6
print(f"{folder.name}: {count} images → {out.name} ({size_mb:.1f}MB)")
For receipts and casual docs, the Pillow approach is 5 lines and done. For anything going to a client or legal file, use the reportlab A4 approach to get consistent, professional page layout. API free tier handles HEIC and mixed-format inputs without local conversion deps.