from openpyxl import Workbook
from openpyxl.styles import PatternFill,Side,Border
import pdfplumber
l=[]
def visitDir(path):
if not os.path.isdir(path):
print('Error:"',path,'" is not a directory or does not exist.')
return
list_dirs = os.walk(path) #os.walk返回一个元组,包括3个元素:#所有路径名、所有目录列表与文件列表
for root, dirs, files in list_dirs: #遍历该元组的目录和文件信息
for f in files:
if f.endswith(".pdf"):
l.append(os.path.join(root, f))
def writeExcel(l):
wb = Workbook()
ws1 = wb.active
data =[]
for i in l:
with pdfplumber.open(i) as pdf:
for page in pdf.pages:
textdata =page.extract_text()
l = textdata.split()
data.append(l)
border=Border(top=Side(border_style='thin',color='000000'),
bottom=Side(border_style='thin',color='000000'),
left=Side(border_style='thin',color='000000'),
right=Side(border_style='thin',color='000000'))
ws1["A1"]="合同序号"
ws1["B1"]="合同名称"
ws1["C1"]="合同金额"
ws1["A1"].fill=PatternFill(fill_type='solid', fgColor="8B008B")
ws1["B1"].fill=PatternFill(fill_type='solid', fgColor="8B008B")
ws1["C1"].fill=PatternFill(fill_type='solid', fgColor="8B008B")
ws1["A1"].border = border
ws1["B1"].border = border
ws1["C1"].border = border
fill = PatternFill(fill_type='solid', fgColor="FFC0CB")
for i in range(len(data)):
for j in range(len(data[0])):
ws1.cell(i+2,j+1,data[i][j]).fill=fill
ws1.cell(i+2,j+1,data[i][j]).border=border
wb.save("data/合同信息导出.xlsx")
wb.close()
if __name__ == '__main__':
print_hi('PyCharm,geovin du study')
visitDir('data')
writeExcel(l)