Mercurial > public > html2wiki
annotate src/org/nwoca/ssdt/tools/html2wiki/ChapterTransformer.java @ 0:f8b1ea49d065
Initial version of crude HTML to WikiText converter. Customized for converting HTML files from DEC Document into Wiki markup.
author | smith@nwoca.org |
---|---|
date | Fri, 12 May 2006 16:45:42 -0400 |
parents | |
children | 22ed6d93442c |
rev | line source |
---|---|
0
f8b1ea49d065
Initial version of crude HTML to WikiText converter. Customized for converting HTML files from DEC Document into Wiki markup.
smith@nwoca.org
parents:
diff
changeset
|
1 package org.nwoca.ssdt.tools.html2wiki; |
f8b1ea49d065
Initial version of crude HTML to WikiText converter. Customized for converting HTML files from DEC Document into Wiki markup.
smith@nwoca.org
parents:
diff
changeset
|
2 |
f8b1ea49d065
Initial version of crude HTML to WikiText converter. Customized for converting HTML files from DEC Document into Wiki markup.
smith@nwoca.org
parents:
diff
changeset
|
3 import java.util.regex.*; |
f8b1ea49d065
Initial version of crude HTML to WikiText converter. Customized for converting HTML files from DEC Document into Wiki markup.
smith@nwoca.org
parents:
diff
changeset
|
4 |
f8b1ea49d065
Initial version of crude HTML to WikiText converter. Customized for converting HTML files from DEC Document into Wiki markup.
smith@nwoca.org
parents:
diff
changeset
|
5 class ChapterTransformer implements Transformer { |
f8b1ea49d065
Initial version of crude HTML to WikiText converter. Customized for converting HTML files from DEC Document into Wiki markup.
smith@nwoca.org
parents:
diff
changeset
|
6 private Pattern chapterPattern = Pattern.compile("^<hr size=5>.*?<h1>(.*?)</h1>",Pattern.MULTILINE + Pattern.DOTALL); |
f8b1ea49d065
Initial version of crude HTML to WikiText converter. Customized for converting HTML files from DEC Document into Wiki markup.
smith@nwoca.org
parents:
diff
changeset
|
7 private String category; |
f8b1ea49d065
Initial version of crude HTML to WikiText converter. Customized for converting HTML files from DEC Document into Wiki markup.
smith@nwoca.org
parents:
diff
changeset
|
8 public ChapterTransformer() { |
f8b1ea49d065
Initial version of crude HTML to WikiText converter. Customized for converting HTML files from DEC Document into Wiki markup.
smith@nwoca.org
parents:
diff
changeset
|
9 |
f8b1ea49d065
Initial version of crude HTML to WikiText converter. Customized for converting HTML files from DEC Document into Wiki markup.
smith@nwoca.org
parents:
diff
changeset
|
10 } |
f8b1ea49d065
Initial version of crude HTML to WikiText converter. Customized for converting HTML files from DEC Document into Wiki markup.
smith@nwoca.org
parents:
diff
changeset
|
11 public ChapterTransformer(String category) { |
f8b1ea49d065
Initial version of crude HTML to WikiText converter. Customized for converting HTML files from DEC Document into Wiki markup.
smith@nwoca.org
parents:
diff
changeset
|
12 this.category = category; |
f8b1ea49d065
Initial version of crude HTML to WikiText converter. Customized for converting HTML files from DEC Document into Wiki markup.
smith@nwoca.org
parents:
diff
changeset
|
13 } |
f8b1ea49d065
Initial version of crude HTML to WikiText converter. Customized for converting HTML files from DEC Document into Wiki markup.
smith@nwoca.org
parents:
diff
changeset
|
14 |
f8b1ea49d065
Initial version of crude HTML to WikiText converter. Customized for converting HTML files from DEC Document into Wiki markup.
smith@nwoca.org
parents:
diff
changeset
|
15 public void apply(StringBuffer buffer) { |
f8b1ea49d065
Initial version of crude HTML to WikiText converter. Customized for converting HTML files from DEC Document into Wiki markup.
smith@nwoca.org
parents:
diff
changeset
|
16 Matcher matcher = chapterPattern.matcher(buffer); |
f8b1ea49d065
Initial version of crude HTML to WikiText converter. Customized for converting HTML files from DEC Document into Wiki markup.
smith@nwoca.org
parents:
diff
changeset
|
17 |
f8b1ea49d065
Initial version of crude HTML to WikiText converter. Customized for converting HTML files from DEC Document into Wiki markup.
smith@nwoca.org
parents:
diff
changeset
|
18 boolean first = true; |
f8b1ea49d065
Initial version of crude HTML to WikiText converter. Customized for converting HTML files from DEC Document into Wiki markup.
smith@nwoca.org
parents:
diff
changeset
|
19 while (matcher.find( 0 )) { |
f8b1ea49d065
Initial version of crude HTML to WikiText converter. Customized for converting HTML files from DEC Document into Wiki markup.
smith@nwoca.org
parents:
diff
changeset
|
20 buffer.replace(matcher.start(),matcher.end(), |
f8b1ea49d065
Initial version of crude HTML to WikiText converter. Customized for converting HTML files from DEC Document into Wiki markup.
smith@nwoca.org
parents:
diff
changeset
|
21 "<chapter>" + matcher.group(1) +"</chapter>\n__TOC__" + |
f8b1ea49d065
Initial version of crude HTML to WikiText converter. Customized for converting HTML files from DEC Document into Wiki markup.
smith@nwoca.org
parents:
diff
changeset
|
22 (category == null ? "" : "\n[[Category:" + category + "]]")); |
f8b1ea49d065
Initial version of crude HTML to WikiText converter. Customized for converting HTML files from DEC Document into Wiki markup.
smith@nwoca.org
parents:
diff
changeset
|
23 first = false; |
f8b1ea49d065
Initial version of crude HTML to WikiText converter. Customized for converting HTML files from DEC Document into Wiki markup.
smith@nwoca.org
parents:
diff
changeset
|
24 } |
f8b1ea49d065
Initial version of crude HTML to WikiText converter. Customized for converting HTML files from DEC Document into Wiki markup.
smith@nwoca.org
parents:
diff
changeset
|
25 |
f8b1ea49d065
Initial version of crude HTML to WikiText converter. Customized for converting HTML files from DEC Document into Wiki markup.
smith@nwoca.org
parents:
diff
changeset
|
26 } |
f8b1ea49d065
Initial version of crude HTML to WikiText converter. Customized for converting HTML files from DEC Document into Wiki markup.
smith@nwoca.org
parents:
diff
changeset
|
27 |
f8b1ea49d065
Initial version of crude HTML to WikiText converter. Customized for converting HTML files from DEC Document into Wiki markup.
smith@nwoca.org
parents:
diff
changeset
|
28 public String toString() { |
f8b1ea49d065
Initial version of crude HTML to WikiText converter. Customized for converting HTML files from DEC Document into Wiki markup.
smith@nwoca.org
parents:
diff
changeset
|
29 return "Replace Chapter markers"; |
f8b1ea49d065
Initial version of crude HTML to WikiText converter. Customized for converting HTML files from DEC Document into Wiki markup.
smith@nwoca.org
parents:
diff
changeset
|
30 } |
f8b1ea49d065
Initial version of crude HTML to WikiText converter. Customized for converting HTML files from DEC Document into Wiki markup.
smith@nwoca.org
parents:
diff
changeset
|
31 |
f8b1ea49d065
Initial version of crude HTML to WikiText converter. Customized for converting HTML files from DEC Document into Wiki markup.
smith@nwoca.org
parents:
diff
changeset
|
32 } |