001 /**
002 * Licensed to the Apache Software Foundation (ASF) under one or more
003 * contributor license agreements. See the NOTICE file distributed with
004 * this work for additional information regarding copyright ownership.
005 * The ASF licenses this file to You under the Apache License, Version 2.0
006 * (the "License"); you may not use this file except in compliance with
007 * the License. You may obtain a copy of the License at
008 *
009 * http://www.apache.org/licenses/LICENSE-2.0
010 *
011 * Unless required by applicable law or agreed to in writing, software
012 * distributed under the License is distributed on an "AS IS" BASIS,
013 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
014 * See the License for the specific language governing permissions and
015 * limitations under the License.
016 */
017 package org.apache.camel.maven;
018
019 import org.apache.maven.plugin.AbstractMojo;
020 import org.apache.maven.plugin.MojoExecutionException;
021
022 import java.io.File;
023 import java.io.FileOutputStream;
024 import java.io.IOException;
025 import java.io.BufferedInputStream;
026 import java.io.BufferedOutputStream;
027 import java.io.FileWriter;
028 import java.io.ByteArrayOutputStream;
029 import java.io.InputStream;
030 import java.io.PrintWriter;
031 import java.util.ArrayList;
032 import java.util.Iterator;
033 import java.util.List;
034 import java.util.StringTokenizer;
035 import java.net.URL;
036 import java.net.URLConnection;
037
038 import javax.xml.transform.Transformer;
039 import javax.xml.transform.TransformerFactory;
040 import javax.xml.transform.dom.DOMSource;
041 import javax.xml.transform.stream.StreamResult;
042 import javax.xml.transform.stream.StreamSource;
043
044 import org.w3c.dom.Element;
045 import org.w3c.dom.NodeList;
046 import org.w3c.dom.Document;
047 import org.w3c.dom.Node;
048 import org.w3c.dom.NamedNodeMap;
049
050 import org.w3c.tidy.DOMElementImpl;
051 import org.w3c.tidy.Tidy;
052
053 import org.apache.commons.logging.Log;
054 import org.apache.commons.logging.LogFactory;
055
056 /**
057 * Goal which extracts the content of a wiki page and converts it to docbook
058 * format
059 *
060 * @goal htmlToDocbook
061 * @phase process-sources
062 */
063 public class GenerateDocBookMojo extends AbstractMojo {
064
065 /**
066 * Base URL.
067 *
068 * @parameter expression="${baseURL}"
069 * default-value="http://activemq.apache.org/camel/"
070 * @required
071 */
072 private String baseURL;
073
074 /**
075 * List of resources
076 *
077 * @parameter
078 */
079 private String[] resources;
080
081 /**
082 * List of author's fullname
083 *
084 * @parameter
085 */
086 private String[] authors;
087
088 /**
089 * Location of the xsl file.
090 *
091 * @parameter expression="${configDirectory}"
092 *
093 */
094 private String xslFile;
095
096 /**
097 * Location of the output directory.
098 *
099 * @parameter expression="${project.build.directory}/docbkx/docbkx-source"
100 */
101 private String outputPath;
102
103 /**
104 * Location of the output directory for wiki source.
105 *
106 * @parameter expression="${project.build.directory}/docbkx/wiki-source"
107 */
108 private String wikiOutputPath;
109
110 /**
111 * @parameter expression="${title}"
112 * @required
113 */
114 private String title;
115
116 /**
117 * @parameter expression="${subtitle}"
118 */
119 private String subtitle;
120
121 /**
122 * @parameter expression="${mainFilename}" default-value="manual"
123 * @required
124 */
125 private String mainFilename;
126
127 /**
128 * @parameter expression="${version}" default-value="${project.version}"
129 */
130 private String version;
131
132 /**
133 * @parameter expression="${legalNotice}"
134 */
135 private String legalNotice;
136
137 /**
138 * Location of image files.
139 *
140 * @parameter expression="${project.build.directory}/site/book/images"
141 *
142 */
143 private String imageLocation;
144
145 private String chapterId;
146
147 private static final transient Log log = LogFactory
148 .getLog(GenerateDocBookMojo.class);
149
150 public void execute() throws MojoExecutionException {
151 File outputDir = new File(outputPath);
152 File wikiOutputDir = new File(wikiOutputPath);
153 File imageDir = new File(imageLocation);
154 if (!outputDir.exists()) {
155 outputDir.mkdirs();
156 imageDir.mkdirs();
157 wikiOutputDir.mkdirs();
158 }
159 this.createMainXML();
160
161 for (int i = 0; i < resources.length; ++i) {
162 this.setChapterId(removeExtension(resources[i]));
163
164 process(resources[i]);
165 }
166
167 }
168
169 /**
170 * Extract the wiki content and tranform it into docbook format
171 *
172 * @param resource
173 */
174 public void process(String resource) {
175
176 Tidy tidy = new Tidy();
177 ByteArrayOutputStream out = null;
178 BufferedOutputStream output = null;
179 BufferedOutputStream wikiOutput = null;
180 StreamSource streamSource = null;
181
182 tidy.setXmlOut(true);
183 try {
184 out = new ByteArrayOutputStream();
185 URL u = new URL(baseURL + resource);
186 Document doc = tidy.parseDOM(
187 new BufferedInputStream(u.openStream()), out);
188 out.close();
189 // let's extract the div element with class="wiki-content
190 // maincontent"
191 NodeList nodeList = doc.getElementsByTagName("div");
192 for (int i = 0; i < nodeList.getLength(); ++i) {
193 Node node = nodeList.item(i);
194
195 NamedNodeMap nm = node.getAttributes();
196 Node attr = nm.getNamedItem("class");
197
198 if (attr != null
199 && attr.getNodeValue().equalsIgnoreCase(
200 "wiki-content maincontent")) {
201 downloadImages(node);
202 // These attributes will be used by xsl to
203 Element element = (Element) node;
204 element.setAttribute("chapterId", chapterId);
205 element.setAttribute("baseURL", baseURL);
206 element.setAttribute("imageLocation", "../images/");
207
208 DOMSource source = new DOMSource(
209 processH2Section(doc, node));
210
211 output = new BufferedOutputStream(new FileOutputStream(
212 outputPath + File.separator
213 + removeExtension(resource) + ".xml"));
214 StreamResult result = new StreamResult(output);
215 TransformerFactory tFactory = TransformerFactory
216 .newInstance();
217 if (xslFile != null && !xslFile.trim().equals("")) {
218 streamSource = new StreamSource(xslFile);
219 } else {
220 InputStream xslStream = getClass().getResourceAsStream(
221 "/docbook.xsl");
222 streamSource = new StreamSource(xslStream);
223 }
224
225 Transformer transformer = tFactory
226 .newTransformer(streamSource);
227 transformer.transform(source, result);
228
229 // generate the wiki source for debugging
230 wikiOutput = new BufferedOutputStream(new FileOutputStream(
231 wikiOutputPath + File.separator
232 + removeExtension(resource) + ".html"));
233 result = new StreamResult(wikiOutput);
234 transformer = tFactory.newTransformer();
235 transformer.transform(source, result);
236
237 break;
238 }
239
240 }
241
242 } catch (Exception e) {
243 log.debug("Exception processing wiki content", e);
244 } finally {
245 try {
246 if (output != null)
247 output.close();
248 } catch (IOException e) {
249 // TODO Auto-generated catch block
250 log.debug("Exception closing output stream", e);
251 }
252 }
253 }
254
255 /*
256 * create the main docbook xml file
257 */
258 public void createMainXML() {
259 try {
260
261 PrintWriter out = new PrintWriter(new FileWriter(outputPath
262 + File.separator + mainFilename + ".xml"));
263
264 out
265 .println("<!DOCTYPE book PUBLIC \"-//OASIS//DTD DocBook XML V4.4//EN\" \"http://www.oasis-open.org/docbook/xml/4.4/docbookx.dtd\" ");
266 out.println("[");
267
268 for (int i = 0; i < resources.length; ++i) {
269 out.println("<!ENTITY " + removeExtension(resources[i])
270 + " SYSTEM \"" + removeExtension(resources[i])
271 + ".xml\">");
272 }
273
274 out.println("]>");
275 out.println("<book>");
276 out.println("<bookinfo>");
277 out.println("<title>" + title + "</title>");
278 out.println("<subtitle>" + subtitle + "</subtitle>");
279 out.println("<releaseinfo>" + version + "</releaseinfo>");
280 out.println(" <authorgroup>");
281 if (authors != null) {
282 for (int i = 0; i < authors.length; ++i) {
283 StringTokenizer name = new StringTokenizer(authors[i]);
284 String fname = name.nextToken();
285 String lname = "";
286 if (name.hasMoreTokens()) {
287 lname = name.nextToken();
288 }
289 out.println("<author>");
290 out.println("<firstname>" + fname + "</firstname>");
291 out.println("<surname>" + lname + "</surname>");
292 out.println("</author>");
293
294 }
295 }
296
297 out.println("</authorgroup>");
298 out.println("<legalnotice>");
299 if (legalNotice != null && legalNotice.length() > 0) {
300 out.println("<para>");
301 out.println(legalNotice);
302 out.println("</para>");
303 } else {
304 out
305 .println("<para>Licensed to the Apache Software Foundation (ASF) under one or more");
306 out
307 .println("contributor license agreements. See the NOTICE file distributed with");
308 out
309 .println("this work for additional information regarding copyright ownership. The");
310 out
311 .println("ASF licenses this file to You under the Apache License, Version 2.0 (the");
312 out
313 .println("\"License\"); you may not use this file except in compliance with the");
314 out
315 .println("License. You may obtain a copy of the License at</para>");
316 out
317 .println("<para>http://www.apache.org/licenses/LICENSE-2.0</para>");
318 out
319 .println("<para>Unless required by applicable law or agreed to in writing,");
320 out
321 .println(" software distributed under the License is distributed on an \"AS IS\"");
322 out
323 .println("BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or");
324 out
325 .println("implied. See the License for the specific language governing permissions");
326 out.println("and limitations under the License.</para>");
327 }
328
329 out.println("</legalnotice>");
330 out.println("</bookinfo>");
331 out.println("<toc></toc>");
332
333 for (int i = 0; i < resources.length; ++i) {
334 out.println("&" + removeExtension(resources[i]) + ";");
335 }
336
337 out.println("</book>");
338 out.flush();
339 out.close();
340
341 } catch (IOException e) {
342 // TODO Auto-generated catch block
343 log.debug("Exception in creating manual.xml file", e);
344 }
345 }
346
347 public void downloadImages(Node node) {
348 List<String> imageList = getImageUrls(node);
349 Iterator<String> iter = imageList.iterator();
350 while (iter.hasNext()) {
351 String imageUrl = (String) iter.next();
352 String imageFile = "imageFile";
353
354 //check if url path is relative
355 if (imageUrl.indexOf("http://") < 0) {
356 imageUrl = baseURL + imageUrl;
357 }
358 try {
359
360 URL url = new URL(imageUrl);
361 StringTokenizer st = new StringTokenizer(url.getFile(), "/");
362 while (st.hasMoreTokens()) {
363 imageFile = st.nextToken();
364 }
365
366 URLConnection connection = url.openConnection();
367 InputStream stream = connection.getInputStream();
368 BufferedInputStream in = new BufferedInputStream(stream);
369 FileOutputStream file = new FileOutputStream(imageLocation
370 + File.separator + imageFile);
371 BufferedOutputStream out = new BufferedOutputStream(file);
372 int i;
373 while ((i = in.read()) != -1) {
374 out.write(i);
375 }
376 out.flush();
377 } catch (Exception e) {
378 log.debug("Exception in downloading image " + imageFile, e);
379 }
380
381 }
382 }
383
384 public List<String> getImageUrls(Node node) {
385 List<String> list = new ArrayList<String>();
386 DOMElementImpl doc = (DOMElementImpl) node;
387 NodeList imageList = doc.getElementsByTagName("img");
388
389 if (imageList != null) {
390 for (int i = 0; i < imageList.getLength(); ++i) {
391 Node imageNode = imageList.item(i);
392
393 NamedNodeMap nm = imageNode.getAttributes();
394 Node attr = nm.getNamedItem("src");
395 if (attr != null) {
396 list.add(attr.getNodeValue());
397 }
398
399 }
400 }
401 return list;
402 }
403
404 public String getChapterId() {
405 return chapterId;
406 }
407
408 public void setChapterId(String chapterId) {
409 this.chapterId = chapterId;
410 }
411
412 public String removeExtension(String resource) {
413 int index = resource.indexOf('.');
414 return resource.substring(0, index);
415 }
416
417 /*
418 * creates a <h2_section> node and place all nodes after a <h2> node until another <h2> node is found.
419 * This is so that we can divide chapter contents into section delimited by a <h2> node
420 */
421
422 public Node processH2Section(Document doc, Node node) {
423 NodeList nodeList = node.getChildNodes();
424 Node h2Node = null;
425 Node pNode = null;
426 boolean firstInstanceOfH2 = false;
427
428 for (int x = 0; x < nodeList.getLength(); ++x) {
429 Node node2 = nodeList.item(x);
430
431 if (node2 != null) {
432 String nodes = node2.getNodeName();
433
434 if (nodes.equalsIgnoreCase("h2")) {
435 h2Node = node2.appendChild(doc.createElement("h2_section"));
436 } else {
437 //if first node is not a <p> or a h2 node, create a <p> node and place all succeeding nodes
438 //inside this node until a <p> or <h2> node is found
439 if (x == 0 && !nodes.equalsIgnoreCase("p")
440 && !nodes.equalsIgnoreCase("h2")) {
441 pNode = node
442 .insertBefore(doc.createElement("p"), node2);
443 x++;
444 firstInstanceOfH2 = true;
445 }
446 if (firstInstanceOfH2) {
447 if (node2 == node.getLastChild()) {
448 pNode.appendChild(node2.cloneNode(true));
449 } else {
450 Node nextNode = node2.getNextSibling();
451 pNode.appendChild(node2.cloneNode(true));
452 if (nextNode.getNodeName().equalsIgnoreCase("h2")
453 || nextNode.getNodeName().equalsIgnoreCase(
454 "p")) {
455 firstInstanceOfH2 = false;
456 }
457 }
458
459 }
460
461 if (h2Node != null) {
462 h2Node.appendChild(node2.cloneNode(true));
463 }
464 }
465
466 }
467 }
468
469 //let's remove all nodes that are not <h2> or <p> - they should already have been copied inside an <h2> or <p> node
470 NodeList nodeList3 = node.getChildNodes();
471 boolean afterH2 = false;
472 for (int x = 0; x < nodeList3.getLength(); ++x) {
473 Node node2 = nodeList3.item(x);
474 if (node2.getNodeName().equalsIgnoreCase("h2") && !afterH2) {
475 afterH2 = true;
476 }
477
478 if (node2 != null && !node2.getNodeName().equalsIgnoreCase("p")
479 && !node2.getNodeName().equalsIgnoreCase("h2")) {
480 node.removeChild(node2);
481 x--;
482 }
483 }
484 return node;
485 }
486 }