Java: используя apache POI, как преобразовать ms word file в pdf?

Используя apache POI как преобразовать файл ms word в pdf?

Я использую следующий код, но его не работает, давая ошибки. Я предполагаю, что я импортирую неправильные классы?

import java.io.File;
import java.io.FileInputStream;
import java.io.FileOutputStream;
import java.io.OutputStream;

import org.apache.poi.hslf.record.Document;
import org.apache.poi.hwpf.HWPFDocument;
import org.apache.poi.hwpf.extractor.WordExtractor;
import org.apache.poi.hwpf.usermodel.Paragraph;
import org.apache.poi.hwpf.usermodel.Range;
import org.apache.poi.poifs.filesystem.POIFSFileSystem;


public class TestCon {

    /**
     * @param args
     */
    public static void main(String[] args) {
        // TODO Auto-generated method stub

        POIFSFileSystem fs = null;  
         Document document = new Document(); 

         try {  
             System.out.println("Starting the test");  
             fs = new POIFSFileSystem(new FileInputStream("/document/test2.doc"));  

             HWPFDocument doc = new HWPFDocument(fs);  
             WordExtractor we = new WordExtractor(doc);  

             OutputStream file = new FileOutputStream(new File("/document/test.pdf")); 

             PdfWriter writer = PdfWriter.getInstance(document, file);  

             Range range = doc.getRange();
             document.open();  
             writer.setPageEmpty(true);  
             document.newPage();  
             writer.setPageEmpty(true);  

             String[] paragraphs = we.getParagraphText();  
             for (int i = 0; i < paragraphs.length; i++) {  

                 org.apache.poi.hwpf.usermodel.Paragraph pr = range.getParagraph(i);
                // CharacterRun run = pr.getCharacterRun(i);
                // run.setBold(true);
                // run.setCapitalized(true);
                // run.setItalic(true);
                 paragraphs[i] = paragraphs[i].replaceAll("\\cM?\r?\n", "");  
             System.out.println("Length:" + paragraphs[i].length());  
             System.out.println("Paragraph" + i + ": " + paragraphs[i].toString());  

             // add the paragraph to the document  
             document.add(new Paragraph(paragraphs[i]));  
             }  

             System.out.println("Document testing completed");  
         } catch (Exception e) {  
             System.out.println("Exception during test");  
             e.printStackTrace();  
         } finally {  
                         // close the document  
            document.close();  
                     }  
         }  
    }

Ответ 1

Получил Решение

import java.io.File;
import java.io.FileInputStream;
import java.io.FileOutputStream;
import java.io.OutputStream;

import com.lowagie.text.Document;
import com.lowagie.text.DocumentException;
import com.lowagie.text.Paragraph;
import com.lowagie.text.pdf.PdfWriter;


import org.apache.poi.hwpf.HWPFDocument;
import org.apache.poi.hwpf.extractor.WordExtractor;

import org.apache.poi.hwpf.usermodel.Range;
import org.apache.poi.poifs.filesystem.POIFSFileSystem;


public class TestCon {

    /**
     * @param args
     */
    public static void main(String[] args) {
        // TODO Auto-generated method stub

        POIFSFileSystem fs = null;  
        Document document = new Document();

         try {  
             System.out.println("Starting the test");  
             fs = new POIFSFileSystem(new FileInputStream("D:/Resume.doc"));  

             HWPFDocument doc = new HWPFDocument(fs);  
             WordExtractor we = new WordExtractor(doc);  

             OutputStream file = new FileOutputStream(new File("D:/test.pdf")); 

             PdfWriter writer = PdfWriter.getInstance(document, file);  

             Range range = doc.getRange();
             document.open();  
             writer.setPageEmpty(true);  
             document.newPage();  
             writer.setPageEmpty(true);  

             String[] paragraphs = we.getParagraphText();  
             for (int i = 0; i < paragraphs.length; i++) {  

                 org.apache.poi.hwpf.usermodel.Paragraph pr = range.getParagraph(i);
                // CharacterRun run = pr.getCharacterRun(i);
                // run.setBold(true);
                // run.setCapitalized(true);
                // run.setItalic(true);
                 paragraphs[i] = paragraphs[i].replaceAll("\\cM?\r?\n", "");  
             System.out.println("Length:" + paragraphs[i].length());  
             System.out.println("Paragraph" + i + ": " + paragraphs[i].toString());  

             // add the paragraph to the document  
             document.add(new Paragraph(paragraphs[i]));  
             }  

             System.out.println("Document testing completed");  
         } catch (Exception e) {  
             System.out.println("Exception during test");  
             e.printStackTrace();  
         } finally {  
                         // close the document  
            document.close();  
                     }  
         }  
    }

Ответ 2

Это сработало для меня: -

Источник: - http://www.programcreek.com/java-api-examples/index.php?api=org.apache.poi.xwpf.converter.pdf.PdfConverter

package pdf;

import java.io.File;
import java.io.FileInputStream;
import java.io.FileOutputStream;
import java.io.OutputStream;

import org.apache.poi.xwpf.converter.pdf.PdfConverter;
import org.apache.poi.xwpf.converter.pdf.PdfOptions;
import org.apache.poi.xwpf.usermodel.XWPFDocument;

public class PDF {
    public static void main(String[] args) throws Exception {
          String inputFile="D:/TEST.docx";
          String outputFile="D:/TEST.pdf";
          if (args != null && args.length == 2) {
            inputFile=args[0];
            outputFile=args[1];
          }
          System.out.println("inputFile:" + inputFile + ",outputFile:"+ outputFile);
          FileInputStream in=new FileInputStream(inputFile);
          XWPFDocument document=new XWPFDocument(in);
          File outFile=new File(outputFile);
          OutputStream out=new FileOutputStream(outFile);
          PdfOptions options=null;
          PdfConverter.getInstance().convert(document,out,options);
        }
}

Ответ 3

Следующий код работал у меня:

Public class DocToPdfConverter{

public static void main(String[] args) {

        String k=null;
        OutputStream fileForPdf =null;
        try {

            String fileName="/document/test2.doc";
            //Below Code is for .doc file 
            if(fileName.endsWith(".doc"))
            {
            HWPFDocument doc = new HWPFDocument(new FileInputStream(
                    fileName));
            WordExtractor we=new WordExtractor(doc);
            k = we.getText();

             fileForPdf = new FileOutputStream(new File(
                        "/document/DocToPdf.pdf")); 
            we.close();
            }

            //Below Code for 

            else if(fileName.endsWith(".docx"))
            {
                XWPFDocument docx = new XWPFDocument(new FileInputStream(
                        fileName));
                // using XWPFWordExtractor Class
                XWPFWordExtractor we = new XWPFWordExtractor(docx);
                 k = we.getText();

                 fileForPdf = new FileOutputStream(new File(
                            "/document/DocxToPdf.pdf"));    
                 we.close();
            }



            Document document = new Document();
            PdfWriter.getInstance(document, fileForPdf);

            document.open();

            document.add(new Paragraph(k));

            document.close();
            fileForPdf.close();



        } catch (Exception e) {
            e.printStackTrace();
        }
    }
}

Ответ 4

Здесь есть несколько шагов:

Чтение документа Word с использованием POI в формат-агностик
Преобразовать формат-агностическую форму в PDF
Написать PDF

Я не знаю, будет ли POI делать для вас шаг 2. Я бы рекомендовал что-то другое, например iText.

Ответ 5

В качестве побочного примечания также можно читать контент "на лету" непосредственно из потока содержимого Word/Excel, вместо того, чтобы читать его из файловой системы и сериализовать его на диск, например, при извлечении контента из репозиториев CMIS:

например.

 //HWPFDocument docx = new HWPFDocument(fs);  
 HWPFDocument docx = new HWPFDocument(doc.getContentStream().getStream());

(doc имеет тип org.apache.chemistry.opencmis.client.api.Document, и в этом случае я адаптировал ваш код для извлечения текстового файла из репозитория Alfresco с помощью opencmis и преобразовал его в PDF)

НТН

Ответ 6

В дополнение к ответу Кушагры, здесь обновлены зависимости maven:

    <dependency>
        <groupId>fr.opensagres.xdocreport</groupId>
        <artifactId>fr.opensagres.xdocreport.converter.docx.xwpf</artifactId>
        <version>2.0.1</version>
    </dependency>
    <dependency>
        <groupId>fr.opensagres.xdocreport</groupId>
        <artifactId>fr.opensagres.xdocreport.converter</artifactId>
        <version>2.0.1</version>
    </dependency>
    <dependency>
        <groupId>fr.opensagres.xdocreport</groupId>
        <artifactId>fr.opensagres.poi.xwpf.converter.pdf</artifactId>
        <version>2.0.1</version>
    </dependency>
    <dependency>
        <groupId>fr.opensagres.xdocreport</groupId>
        <artifactId>fr.opensagres.poi.xwpf.converter.xhtml</artifactId>
        <version>2.0.1</version>
    </dependency>

Ответ 7

Привет kushagra sahini это работает только для файлов DOCX, но я хочу работать для DOC также, когда я пытаюсь использовать это, он говорит об ошибке, что использование HWPF вместо XWPF для DOC даже использовал hwpf, но Pdf converter.getInstance(). Convert is не поддерживает это принимает только xwpf может любой помочь, как обработать этот файл документа

Ответ 8

Это спасет мой день, я загружаю файл docx из URL и конвертирую его в pdf:

pom.xml

<dependency>
    <groupId>org.apache.poi</groupId>
    <artifactId>poi</artifactId>
    <version>3.13</version>
</dependency>
<dependency>
    <groupId>org.apache.poi</groupId>
    <artifactId>poi-ooxml</artifactId>
    <version>3.13</version>
</dependency>
<dependency>
    <groupId>fr.opensagres.xdocreport</groupId>
    <artifactId>org.apache.poi.xwpf.converter.pdf</artifactId>
    <version>LATEST</version>
</dependency>

main_class

public String wordToPDFPOI(String url) throws Exception {
    InputStream doc = new URL(url).openStream();
    ByteArrayOutputStream baos = new ByteArrayOutputStream();

    XWPFDocument document = new XWPFDocument(doc);
    PdfOptions options = PdfOptions.create();
    PdfConverter.getInstance().convert(document, baos, options);
    String base64_encoded = Base64.encodeBytes(baos.toByteArray());

    return base64_encoded;
}