aspose Java word 使用书签 poi获取word书签 您所在的位置:网站首页 java书签 aspose Java word 使用书签 poi获取word书签

aspose Java word 使用书签 poi获取word书签

2024-07-09 06:45| 来源: 网络整理| 查看: 265

通过操作书签可以实现 word 模板替换变量的功能场景,本文一下代码内容,直接可以复制使用正常编译运行。

添加 maven 依赖 org.apache.poi poi-ooxml 5.2.2 org.apache.poi poi-scratchpad 5.2.2 com.deepoove poi-tl 1.12.0 操作书签的代码

1、ShanhyXWPFDocumentMerge.java

package org.example; import com.deepoove.poi.xwpf.NiceXWPFDocument; import com.deepoove.poi.xwpf.XmlXWPFDocumentMerge; import org.apache.xmlbeans.XmlOptions; import org.apache.xmlbeans.impl.store.DomImpl; import org.openxmlformats.schemas.wordprocessingml.x2006.main.CTBody; import org.openxmlformats.schemas.wordprocessingml.x2006.main.CTP; import org.w3c.dom.Node; import java.lang.reflect.InvocationTargetException; import java.lang.reflect.Method; import java.util.ArrayList; import java.util.Iterator; import java.util.List; /** * 文档合并 * * @author shanhy */ public class ShanhyXWPFDocumentMerge extends XmlXWPFDocumentMerge { /** * 将一个文档合并到另外一个文档指定段落的相对位置 * * @param source 当前文档 * @param mergeIterator 被合入的文档 * @param targetParagraphNode 目标段落Node * @param deleteTargetParagraph 是否删除目标段落自身 * @return * @throws Exception */ public ShanhyXWPFDocument mergeToParagraphBefore(ShanhyXWPFDocument source, Iterator mergeIterator, Node targetParagraphNode, boolean deleteTargetParagraph) throws Exception { CTBody body = source.getDocument().getBody(); List addParts = createMergeableStrings(source, mergeIterator); String[] startEnd = truncatedStartEndXmlFragment(body); // CTP mergedContainer = paragraph.getCTP(); XmlOptions options = new XmlOptions(); options.setUseSameLocale(((DomImpl.Dom) targetParagraphNode).locale()); CTP mergedBody = CTP.Factory.parse(startEnd[0] + String.join("", addParts) + startEnd[1], options); // instead insert xml-fragment? // new XWPFParagraph(CTP.Factory.parse(mergedBody.getDomNode(), options), source); Node mergedContainerParentNode = targetParagraphNode.getParentNode(); // 将引入文档的整体插入到目标段落之前 mergedContainerParentNode.insertBefore(mergedBody.getDomNode(), targetParagraphNode); if(deleteTargetParagraph) { // 删除掉目标段落自身 mergedContainerParentNode.removeChild(targetParagraphNode); } // mergedContainer.getDomNode().appendChild(CTP.Factory.parse(mergedBody.getDomNode(), options).getDomNode()); // mergedContainer.set(mergedBody); // String xmlText = truncatedOverlapWP(body); // body.set(CTBody.Factory.parse(xmlText)); // return source.generate(true); return source; } /** * 反射调用父类方法 * * @param methodName * @param params * @throws NoSuchMethodException * @throws InvocationTargetException * @throws IllegalAccessException */ private Object invokeSuperMethod(String methodName, Class[] paramClasses, Object[] params) throws NoSuchMethodException, InvocationTargetException, IllegalAccessException { Method method = this.getClass().getSuperclass().getDeclaredMethod(methodName, paramClasses); method.setAccessible(true); return method.invoke(this, params); } @SuppressWarnings("unchecked") private List createMergeableStrings(ShanhyXWPFDocument source, Iterator mergeIterator) { try { Object obj = invokeSuperMethod("createMergeableStrings", new Class[]{NiceXWPFDocument.class, Iterator.class}, new Object[]{source, mergeIterator}); if (obj instanceof List) { return (List) obj; } } catch (NoSuchMethodException | InvocationTargetException | IllegalAccessException e) { throw new RuntimeException(e); } return new ArrayList(); } private String[] truncatedStartEndXmlFragment(CTBody body) { try { Object obj = invokeSuperMethod("truncatedStartEndXmlFragment", new Class[]{CTBody.class}, new Object[]{body}); return (String[]) obj; } catch (NoSuchMethodException | InvocationTargetException | IllegalAccessException e) { throw new RuntimeException(e); } } private String truncatedOverlapWP(CTBody body) { try { Object obj = invokeSuperMethod("truncatedOverlapWP", new Class[]{CTBody.class}, new Object[]{body}); return (String) obj; } catch (NoSuchMethodException | InvocationTargetException | IllegalAccessException e) { throw new RuntimeException(e); } } }

2、ShanhyXWPFDocument.java

package org.example; import com.deepoove.poi.xwpf.NiceXWPFDocument; import org.w3c.dom.Node; import java.io.IOException; import java.io.InputStream; import java.util.Iterator; /** * 处理 Docx 文档内容处理 * * @author shanhy */ public class ShanhyXWPFDocument extends NiceXWPFDocument { public ShanhyXWPFDocument(InputStream in) throws IOException { super(in); } /** * 将一个文档合并到另外一个文档指定段落的相对位置 * * @param source 当前文档 * @param mergeIterator 被合入的文档 * @param targetParagraphNode 目标段落Node * @param deleteTargetParagraph 是否删除目标段落自身 * * @throws Exception */ public void mergeToParagraphBefore(ShanhyXWPFDocument source, Iterator mergeIterator, Node targetParagraphNode, boolean deleteTargetParagraph) throws Exception { new ShanhyXWPFDocumentMerge().mergeToParagraphBefore(this, mergeIterator, targetParagraphNode, deleteTargetParagraph); } }

3、ShanhyDocxBookmarkService.java

package org.example; import org.apache.poi.xwpf.usermodel.XWPFDocument; import org.apache.poi.xwpf.usermodel.XWPFParagraph; import org.apache.poi.xwpf.usermodel.XWPFRun; import org.apache.xmlbeans.XmlException; import org.apache.xmlbeans.XmlOptions; import org.apache.xmlbeans.impl.store.DomImpl; import org.openxmlformats.schemas.wordprocessingml.x2006.main.CTP; import org.w3c.dom.Node; import org.w3c.dom.NodeList; import java.io.IOException; import java.io.OutputStream; import java.util.ArrayList; import java.util.Collections; import java.util.List; import java.util.Map; /** * 书签替换处理类 * * @author shanhy */ public class ShanhyDocxBookmarkService { /** * 因为docx为xml格式的结构,一下为docx中定义的部分常量引用 **/ public static final String RUN_NODE_NAME = "w:r"; public static final String TEXT_NODE_NAME = "w:t"; public static final String BOOKMARK_START_TAG = "w:bookmarkStart"; public static final String BOOKMARK_END_TAG = "w:bookmarkEnd"; public static final String BOOKMARK_ID_ATTR_ID = "w:id"; public static final String NODENAME_BODY = "w:body"; public static final String NODENAME_PARAGRAPH = "w:p"; public static final String BOOKMARK_ID_ATTR_NAME = "w:name"; public static final String STYLE_NODE_NAME = "w:rPr"; public static final String PARAGRAPH_PROPERTIES_NAME = "w:pPr"; /** * 读取 docx 文件中的所有书签(注意不支持书签嵌套,书签嵌套书签的情况只识别最外层书签) * * @param docx */ public List getBookmarksFromDocx(XWPFDocument docx) { Node bodyNode = docx.getDocument().getBody().getDomNode(); // 递归读取 bookmarkStart 节点,返回bookmarkNode集合() List bookmarkNodeList = new ArrayList(); getBookmarksFromNode(bodyNode, bookmarkNodeList); return bookmarkNodeList; } /** * 递归解析所有Node节点,将bookmark记录到集合中 * * @param node * @param bookmarkNodeList */ public void getBookmarksFromNode(Node node, List bookmarkNodeList) { if (node.getNodeName().equals(BOOKMARK_START_TAG)) { bookmarkNodeList.add(node); } else if (node.getNodeName().equals(BOOKMARK_END_TAG) || node.getNodeName().equals(PARAGRAPH_PROPERTIES_NAME)) { // Nothing } else { NodeList childNodes = node.getChildNodes(); for (int i = 0, j = childNodes.getLength(); i < j; i++) { Node childNode = childNodes.item(i); getBookmarksFromNode(childNode, bookmarkNodeList); } } } /** * docx 文件中书签的替换 * * @param docx * @param outputStream * @param dataMap * @throws IOException */ public void replaceDocxBookmarks(ShanhyXWPFDocument docx, OutputStream outputStream, Map dataMap) throws Exception { // 获取所有书签 List startBookmarkList = getBookmarksFromDocx(docx); // 替换书签内容 for (Node startBookmarkNode : startBookmarkList) { String bookmarkName = startBookmarkNode.getAttributes().getNamedItem("w:name").getNodeValue(); if (dataMap.containsKey(bookmarkName)) { Object data = dataMap.get(bookmarkName); if (data instanceof String) {// 内容是文本 replaceDocxBookmarkFromString(getFirstParentParagraphByNode(startBookmarkNode, docx), startBookmarkNode, (String) data); } else if (data instanceof ShanhyXWPFDocument) { // 内容是外部 docx 文档 replaceDocxBookmarkFromDocx(docx, getFirstParentNodeByNode(startBookmarkNode, docx), startBookmarkNode, (ShanhyXWPFDocument) data); } else { throw new RuntimeException("替换书签的内容源数据格式暂不支持"); } } } docx.write(outputStream); docx.close(); } /** * 将一个docx文档替换到docx的书签中 * 1.要求书签尽量设置在文档的换行起点 * 2.被合入的docx会作为换行起点开始合入,如果bookmark没有设置在一个换行的起点,程序会自动从该bookmark节点开始寻找, * 定位到父节点为body的对应p节点的下一个节点,然后将需要合入的文档所有内容追加到该p节点的下一个 * * @param bookmarkParentParagraph * @param startBookmarkNode * @param content */ public void replaceDocxBookmarkFromString(XWPFParagraph bookmarkParentParagraph, Node startBookmarkNode, String content) { Node nextNode = startBookmarkNode.getNextSibling(); boolean contentReplaced = false; while (nextNode != null) { // 节点是结束符 if (nextNode.getNodeName().equals(BOOKMARK_END_TAG)) { break; } // 1.寻找startBookmark的下一个 w:r 节点,然后将该节点中的 w:t 节点的真正文字内容替换掉(这样可以保留原来的bookmark的文字样式) if (!contentReplaced && nextNode.getNodeName().equals("w:r")) { NodeList runChildNodes = nextNode.getChildNodes(); for (int i = 0, j = runChildNodes.getLength(); i < j; i++) { if (runChildNodes.item(i).getNodeName().equals("w:t")) {// w:t 是真正的文本内容 runChildNodes.item(i).getFirstChild().setNodeValue(content); } } contentReplaced = true; } else { // 2.然后继续向下删除 endBookmark 之前的所有节点 startBookmarkNode.getParentNode().removeChild(nextNode); } nextNode = nextNode.getNextSibling(); } //1-end if (!contentReplaced) { XWPFRun run = bookmarkParentParagraph.createRun(); run.setText(content); Node newChildNode = run.getCTR().getDomNode(); Node startBookmarkNextNode = startBookmarkNode.getNextSibling(); if (startBookmarkNextNode == null) { startBookmarkNode.getParentNode().appendChild(newChildNode); } else { startBookmarkNode.getParentNode().insertBefore(newChildNode, startBookmarkNextNode); } } } /** * 将一个docx文档替换到docx的书签中 * 1.要求书签尽量设置在文档的换行起点 * 2.被合入的docx会作为换行起点开始合入,如果bookmark没有设置在一个换行的起点,程序会自动从该bookmark节点开始寻找, * 定位到父节点为body的对应p节点的下一个节点,然后将需要合入的文档所有内容追加到该p节点的下一个 * * @param startBookmarkNode * @param contentDocx */ public void replaceDocxBookmarkFromDocx(ShanhyXWPFDocument docx, Node bookmarkParentNode, Node startBookmarkNode, ShanhyXWPFDocument contentDocx) throws Exception { //1-start.删除bookmark原始内容 Node nextNode = startBookmarkNode.getNextSibling(); while (nextNode != null) { // 循环查找结束符 String nodeName = nextNode.getNodeName(); if (nodeName.equals(BOOKMARK_END_TAG)) { break; } // 删除中间的非结束节点,即删除原书签内容 Node delNode = nextNode; nextNode = nextNode.getNextSibling(); startBookmarkNode.getParentNode().removeChild(delNode); } //1-end docx.mergeToParagraphBefore(docx, Collections.singletonList(contentDocx).iterator(), bookmarkParentNode, true); } /** * 向上递归获得指定node节点的上一个p节点 * * @param node * @return */ public XWPFParagraph getFirstParentParagraphByNode(Node node, XWPFDocument docx) throws XmlException { XmlOptions options = new XmlOptions(); options.setUseSameLocale(((DomImpl.Dom) node).locale()); return new XWPFParagraph(CTP.Factory.parse(getFirstParentNodeByNode(node, docx), options), docx); } /** * 向上递归获得指定node节点的上一个p节点 * * @param node * @return */ public Node getFirstParentNodeByNode(Node node, XWPFDocument docx) throws XmlException { Node parentNode = node.getParentNode(); if (NODENAME_PARAGRAPH.equals(parentNode.getNodeName())) { return parentNode; } return getFirstParentNodeByNode(parentNode, docx); } }

4、BookmarkDemo.java

package org.example; import java.nio.file.Files; import java.nio.file.Paths; import java.util.HashMap; import java.util.Map; public class BookmarkDemo { public static void main(String[] args) throws Exception { long start = System.currentTimeMillis(); ShanhyDocxBookmarkService bookmarkService = new ShanhyDocxBookmarkService(); // 读取 docx 文件中的所有书签(基于解析xmlDom文档读取) // List bookmarkNodeList = bookmarkService.getBookmarksFromDocx(new XWPFDocument(Files.newInputStream(Paths.get("D:\\Desktop\\docx\\模板_Password_Removed.docx")))); // bookmarkNodeList.forEach(bookmarkNode -> { // NamedNodeMap attrs = bookmarkNode.getAttributes(); // System.out.println("书签id=" + attrs.getNamedItem(ShanhyDocxBookmarkService.BOOKMARK_ID_ATTR_ID).getNodeValue() + ", 书签name=" + attrs.getNamedItem(ShanhyDocxBookmarkService.BOOKMARK_ID_ATTR_NAME).getNodeValue() + ""); // }); // 替换 docx 文件中的 bookmark 内容 Map dataMap = new HashMap(); // dataMap.put("strong", "单红宇"); // dataMap.put("footnotes", "李小雨"); // dataMap.put("替换的书签", new ShanhyXWPFDocument(Files.newInputStream(Paths.get("D:\\Desktop\\docx\\content_table2.docx")))); // dataMap.put("Hello", new ShanhyXWPFDocument(Files.newInputStream(Paths.get("D:\\Desktop\\docx\\content_table2.docx")))); // dataMap.put("替换的书签", new ShanhyXWPFDocument(Files.newInputStream(Paths.get("D:\\Desktop\\docx\\content_table2.docx")))); ShanhyXWPFDocument mergeContentDocx = new ShanhyXWPFDocument(Files.newInputStream(Paths.get("D:\\Desktop\\docx\\content_table2.docx"))); mergeContentDocx.getTables().forEach(tbl -> { tbl.setWidthType(TableWidthType.PCT); tbl.setWidth("100%"); }); dataMap.put("AAA", mergeContentDocx); dataMap.put("String书签", "单红宇"); dataMap.put("没有内容的书签", "书签内容Hello"); // dataMap.put("测试docx书签", "内容内容内容"); // bookmarkService.replaceDocxBookmarks(new ShanhyXWPFDocument(Files.newInputStream(Paths.get("D:\\Desktop\\docx\\template.docx"))), Files.newOutputStream(Paths.get("D:\\Desktop\\docx\\out-2.docx")), dataMap); bookmarkService.replaceDocxBookmarks(new ShanhyXWPFDocument(Files.newInputStream(Paths.get("D:\\Desktop\\docx\\template.docx"))), Files.newOutputStream(Paths.get("D:\\Desktop\\docx\\out-2.docx")), dataMap); System.out.println("耗时=" + (System.currentTimeMillis() - start) + "ms"); } }

测试的合并效果如下:

aspose Java word 使用书签 poi获取word书签_xml

在编辑 word 书签时,可以开启显示书签模式,这样可以直观的 word 页面中看到书签标记,如下图所示:

aspose Java word 使用书签 poi获取word书签_java_02

aspose Java word 使用书签 poi获取word书签_POI_03

如果你有获取 docx 的 xml 内容的需求,则代码为:

new XWPFDocument(inputStream).getDocument().xmlText(); new XWPFDocument(inputStream).getDocument().getBody().xmlText();

其他: 1、本文因为业务需求需要使用书签方式,如果你使用变量 {{username}}、{{快递地址}} 这样的方式,则直接使用 poi-tl 库的直接操作会更简单。 2、poi-tl 是一个方便操作 word 的开源项目: https://github.com/Sayi/poi-tl

(END)



【本文地址】

公司简介

联系我们

今日新闻

    推荐新闻

    专题文章
      CopyRight 2018-2019 实验室设备网 版权所有