1.安装 PyPDF2 包
pip install PyPDF2然后import PyPDF2
2.在 PyPDF2 库中,可以使用以下代码打开 PDF 文件:
pdf_file = open('filename.pdf', 'rb')
pdf_reader = PyPDF2.PdfFileReader(pdf_file)
total_pages = pdf_reader.numPages
3.下面代码将每一页分开
from PyPDF2 import PdfFileReader,PdfFileWriter
pdf_path = r"F:\工作\1.pdf"
save_path = r"F:\工作\a\a"
# Split Pages of PDF
pdf_reader = PdfFileReader(pdf_path)
for i in range(0,pdf_reader.getNumPages()):
pdf_writer = PdfFileWriter()
pdf_writer.addPage(pdf_reader.getPage(i))
# Every page write to a path
with open(save_path+'{}.pdf'.format(str(i)), 'wb') as fh:
pdf_writer.write(fh)
print('{} Save Sucessfully !\n'.format(str(i)))
4. 2个PDF 文件合并为1个
from PyPDF2 import PdfFileReader,PdfFileWriter
merge_pdf = r"F:\工作\z.pdf"
p1_pdf = r"F:\工作\a\a0.pdf"
p2_pdf = r"F:\工作\a\a1.pdf"
p1_reader = PdfFileReader(p1_pdf)
p2_reader = PdfFileReader(p2_pdf)
merge = PdfFileWriter()
# Write p1
for i in range(0,p1_reader.getNumPages()):
merge.addPage(p1_reader.getPage(i))
# Write p2
for j in range(0,p2_reader.getNumPages()):
merge.addPage(p2_reader.getPage(j))
# Write out
with open(merge_pdf,'wb') as f:
merge.write(f)
5.将多个单页合并成一页
from PyPDF2 import PdfFileReader,PdfFileWriter
merge_pdf = r"F:\工作\z.pdf"
p_pdf = r"F:\工作\a\a"
merge = PdfFileWriter()
for i in range(220,227):
p_reader = PdfFileReader(p_pdf + str(i) +'.pdf')
for i in range(0,p_reader.getNumPages()):
merge.addPage(p_reader.getPage(i))
# Write out
with open(merge_pdf,'wb') as f:
merge.write(f)
不知道为什么,直接从原PDF文档中截取某几页的代码总是不正常。只好采用以上分步执行的办法。
参考:https://pythonjishu.com/ugmvrnorpclhikd/
https://zhuanlan.zhihu.com/p/357378479?utm_id=0