#!/usr/bin/python
# -*-coding:utf-8 -*-
from pdfminer.pdfparser import PDFParser, PDFDocument
from pdfminer.pdfinterp import PDFResourceManager, PDFPageInterpreter
from pdfminer.pdfdevice import PDFDevice
fp = open("naacl06-shinyama.pdf",'rb');
parser = PDFParser(fp);
doc = PDFDocument();
parser.set_document(doc);
doc.initialize("");
resource = PDFResourceManager();
laparms = LAParams()
device = PDFPageAggregator(resource,laparms=laparms);
interpreter = PDFPageInterpreter(resource,device);
for page in doc.get_pages():
interpreter.process_page(page);
layout = device.get_result();
for out in layout:
print(out.get_text())
你现在能操作了不??
device = PDFPageAggregator(resource,laparms=laparms);
你这条代码中laparms应该为laparams
#!/usr/bin/python
# -*- coding: utf-8 -*-
from pdfminer.pdfparser import PDFParser,PDFDocument
from pdfminer.pdfinterp import PDFResourceManager,PDFPageInterpreter
from pdfminer.pdfdevice import PDFDevice
from pdfminer.layout import LAParams
from pdfminer.converter import PDFPageAggregator