刘仁 Java后端开发

Python中把PDF转成单个或多个图片

2021-07-19
LIUREN
python  pdf  png
   

Python中把PDF转成单个或多个图片

现在有很多在线的pdf转图片工具,但是有些数据是不想联网,更不想把数据上传到别人的服务器进行转换,那么怎么办呢?正常的Java编写一个耗费时间;现在不是有Python吗,人家有库,用起来快有方便,那我们就直接拿来用就好了,多舒服;

协议:CC BY-SA 4.0 https://creativecommons.org/licenses/by-sa/4.0/

版权声明:本文为原创文章,遵循 CC 4.0 BY-SA 版权协议,转载请附上原文出处链接及本声明。

一、安装Python环境

这一步不想说了,这个比较简单。记得配置一下环境变量。不然运行脚本比较费劲!


二、Python的PDF转PNG代码

注意:第一步要安装库:pip install pymupdf

#安装库 pip install pymupdf
import os
import fitz
pdf_dir=[]

def get_file():
	docunames = os.listdir()
	for docuname in docunames:
		if os.path.splitext(docuname)[1]=='.pdf': #目录下包含.pdf的文件
			pdf_dir.append(docuname)
def conver_img():
	for pdf in pdf_dir:
		doc = fitz.open(pdf)
		#pdf_name = os.path.splitext(pdf)[0]
		pdf_name = os.path.splitext(pdf)[0]
		print("====================================")
		print("开始转换%s.PDF文档"%pdf_name)
		print("====================================")
		print("共",doc.pageCount,"页")
		for pg in range(0,doc.pageCount):
			print("\r转换为图片",pg+1,"/",doc.pageCount,end=";")
			page=doc[pg]
			rotate = int(0) #旋转角度
			# 每个尺寸的缩放系数为2,这将为我们生成分辨率提高四倍的图像
			zoom_x=2.0
			zoom_y=2.0
			print("")
			trans = fitz.Matrix(zoom_x, zoom_y).preRotate(rotate)
			pm= page.getPixmap(matrix=trans, alpha=False)
			pm.writePNG(r'./tu'+str(pdf_name)+'-'+'{:02}.png'.format(pg))
if __name__=='__main__':
	get_file()
	conver_img()

脚本和PDF要在同一个文件夹下。该脚本会对文件夹下的所有PDF进行转换。

=====================================================================

如果是JPG或者PNG转换成PDF,脚本如下:

记得根据提示添加对应模块

#coding=utf-8
#!/usr/bin/env python
"""
convert image to pdf file
"""
#Author: mrbeann 
import os
import sys
import glob
import platform
from reportlab.lib.pagesizes import letter, A4, landscape
from reportlab.platypus import SimpleDocTemplate, Image
from reportlab.lib.units import inch
from reportlab.pdfgen import canvas
from reportlab import rl_settings
from PIL import Image
import importlib,sys
#importlib.reload(sys)
#sys.setdefaultencoding("utf-8")
def topdf(path,recursion=None,pictureType=None,sizeMode=None,width=None,height=None,fit=None,save=None):
  """
  Parameters
  ----------
  path : string
      path of the pictures
  recursion : boolean
        None or False for no recursion
        True for recursion to children folder
        wether to recursion or not
  pictureType : list
         type of pictures,for example :jpg,png...
  sizeMode : int
      None or 0 for pdf's pagesize is the biggest of all the pictures
      1 for pdf's pagesize is the min of all the pictures
      2 for pdf's pagesize is the given value of width and height
      to choose how to determine the size of pdf
  width : int
      width of the pdf page
  height : int
      height of the pdf page
  fit : boolean
      None or False for fit the picture size to pagesize
      True for keep the size of the pictures
      wether to keep the picture size or not
  save : string
      path to save the pdf
  """
  if platform.system() == 'Windows':
    path = path.replace('\\','/')
  if path[-1] != '/':
    path = (path + '/')
  if recursion == True:
    for i in os.listdir(path):
      if os.path.isdir(os.path.abspath(os.path.join(path, i))):
        topdf(path+i,recursion,pictureType,sizeMode,width,height,fit,save)
  filelist = []
  if pictureType == None:
    filelist = glob.glob(os.path.join(path, '*.jpg'))
  else:
    for i in pictureType:
      filelist.extend(glob.glob(os.path.join(path, '*.'+i)))
  maxw = 0
  maxh = 0
  if sizeMode == None or sizeMode == 0:
    for i in filelist:
      im = Image.open(i)
      if maxw < im.size[0]:
        maxw = im.size[0]
      if maxh < im.size[1]:
        maxh = im.size[1]
  elif sizeMode == 1:
    maxw = 999999
    maxh = 999999
    for i in filelist:
      im = Image.open(i)
      if maxw > im.size[0]:
        maxw = im.size[0]
      if maxh > im.size[1]:
        maxh = im.size[1]
  else:
    if width == None or height == None:
      raise Exception("no width or height provid")
    maxw = width
    maxh = height
  maxsize = (maxw,maxh)
  if save == None:
    filename_pdf = path + path.split('/')[-2]
  else:
    filename_pdf = save + path.split('/')[-2]
  filename_pdf = filename_pdf + '.pdf'
  c = canvas.Canvas(filename_pdf, pagesize=maxsize )
  l = len(filelist)
  for i in range(l):
    (w, h) =maxsize
    width, height = letter
    if fit == True:
      c.drawImage(filelist[i] , 0,0)
    else:
      c.drawImage(filelist[i] , 0,0,maxw,maxh)
    c.showPage()
  c.save()
def main():
	##路径根据实际情况自己改
  topdf(u'D:\Python_dll\png2pdf',pictureType=['png','jpg'],save='D:\Python_dll\png2pdf\\')
if __name__ == '__main__':
  main()

微信公众号:


Comments

Content