Source code for pygacity.util.pdfutils
# Author: Cameron F. Abrams, <cfa22@drexel.edu>
"""
PDF utility functions for pygacity
"""
import PyPDF2
[docs]
def combine_pdfs(args):
pdf_list = args.i
output_filename = args.o
merger = PyPDF2.PdfMerger()
for pdf in pdf_list:
merger.append(pdf)
merger.write(output_filename)
merger.close()
print(f"Combined PDF saved as: {output_filename}")
[docs]
def bundle_pdfs(pdf_paths: list, output_path, two_sided: bool = False):
"""
Merge a list of PDF files into a single bundle PDF.
Parameters
----------
pdf_paths : list of str or Path
Ordered list of PDF files to include in the bundle.
output_path : str or Path
Destination path for the merged bundle PDF.
two_sided : bool, optional
If True, pad each exam to an even page count by appending a blank page
when the exam has an odd number of pages. This ensures the first page
of every exam falls on a right-hand (odd) sheet side when printed
double-sided (default: False).
"""
writer = PyPDF2.PdfWriter()
for pdf in pdf_paths:
reader = PyPDF2.PdfReader(str(pdf))
for page in reader.pages:
writer.add_page(page)
if two_sided and len(reader.pages) % 2 != 0:
last = reader.pages[-1]
writer.add_blank_page(
width=last.mediabox.width,
height=last.mediabox.height,
)
with open(output_path, 'wb') as fh:
writer.write(fh)
[docs]
def bundle_subcommand(args):
"""
CLI subcommand: bundle student exam PDFs found in a build directory.
Student PDFs are identified by excluding files whose names contain
``_soln``, ``answerset``, or ``bundle``. They are sorted by
modification time (build order) before being chunked.
"""
from pathlib import Path
build_dir = Path(args.build_dir)
if not build_dir.is_dir():
raise NotADirectoryError(f'"{build_dir}" is not a directory')
all_pdfs = sorted(build_dir.glob('*.pdf'), key=lambda p: p.stat().st_mtime)
student_pdfs = [
p for p in all_pdfs
if not any(tag in p.stem for tag in ('_soln', 'answerset', 'bundle'))
]
if not student_pdfs:
print(f'No student exam PDFs found in {build_dir}')
return
# Derive a job name from the common stem prefix (everything before the last '-')
job_name = student_pdfs[0].stem.rsplit('-', 1)[0] if '-' in student_pdfs[0].stem else student_pdfs[0].stem
bundle_size = args.bundle_size
two_sided = args.two_sided
chunks = [student_pdfs[i:i + bundle_size] for i in range(0, len(student_pdfs), bundle_size)]
for n, chunk in enumerate(chunks, 1):
out = build_dir / f'{job_name}-bundle-{n}.pdf'
bundle_pdfs(chunk, out, two_sided=two_sided)
print(f'Bundle {n}/{len(chunks)}: {out.name} ({len(chunk)} exams, two_sided={two_sided})')
[docs]
def duplicate_last_page(input_pdf, output_pdf):
"""
Duplicate the last page of a PDF document and append it to the end.
Args:
input_pdf: Path to the input PDF file
output_pdf: Path to save the modified PDF file
"""
reader = PyPDF2.PdfReader(input_pdf)
writer = PyPDF2.PdfWriter()
# Add all existing pages
for page in reader.pages:
writer.add_page(page)
# Duplicate and append the last page
if len(reader.pages) > 0:
last_page = reader.pages[-1]
writer.add_page(last_page)
# Write to output file
with open(output_pdf, 'wb') as output_file:
writer.write(output_file)
print(f"Duplicated last page and saved as: {output_pdf}")