Mercurial > public > html2wiki
comparison src/org/nwoca/ssdt/tools/html2wiki/Html2Wiki.java @ 7:a634b4d554d4
Minor fixups >, random smilies :), etc. Fixed blockquote. Handle escaping brackets outside pre tag.
author | smith@nwoca.org |
---|---|
date | Thu, 27 Jan 2011 18:07:28 -0500 |
parents | 99f293bd507f |
children | e8ea26ab2cd7 |
comparison
equal
deleted
inserted
replaced
6:99f293bd507f | 7:a634b4d554d4 |
---|---|
10 import java.util.Collection; | 10 import java.util.Collection; |
11 import java.util.ArrayList; | 11 import java.util.ArrayList; |
12 import java.util.List; | 12 import java.util.List; |
13 import org.apache.commons.io.FileUtils; | 13 import org.apache.commons.io.FileUtils; |
14 import java.util.regex.*; | 14 import java.util.regex.*; |
15 import org.apache.commons.io.FilenameUtils; | |
16 | 15 |
17 /** | 16 /** |
18 * Converter to convert HTML documents into MediaWiki test. | 17 * Converter to convert HTML documents into MediaWiki test. |
19 * | 18 * |
20 * Heavily customized to handle HTML produced by DEC DOCUMENT | 19 * Heavily customized to handle HTML produced by DEC DOCUMENT |
32 | 31 |
33 /** Creates a new instance of Html2Wiki. */ | 32 /** Creates a new instance of Html2Wiki. */ |
34 public Html2Wiki(String html) { | 33 public Html2Wiki(String html) { |
35 buffer = new StringBuffer(html); | 34 buffer = new StringBuffer(html); |
36 transformers = new ArrayList<Transformer>(); | 35 transformers = new ArrayList<Transformer>(); |
37 // transformers.add(new PreTagTransformer()); | |
38 // transformers.add(new DeleteTransformer("^\\s",true)); | |
39 transformers.add(new DeleteTransformer("<html>|</html>|<body>|</body>")); | 36 transformers.add(new DeleteTransformer("<html>|</html>|<body>|</body>")); |
40 transformers.add(new DeleteTransformer("<!--.*-->(\\n|\\r)*",true)); | 37 transformers.add(new DeleteTransformer("<!--.*-->(\\n|\\r)*",true)); |
41 transformers.add(new DeleteTransformer("<a .*?>|</a>")); | 38 transformers.add(new DeleteTransformer("<a .*?>|</a>")); |
42 transformers.add(new DeleteTransformer("(?m)^\\*")); | 39 transformers.add(new DeleteTransformer("(?m)^\\*")); |
43 // transformers.add(new DeleteTransformer("<blockquote>|</blockquote>")); | 40 // transformers.add(new DeleteTransformer("<blockquote>|</blockquote>")); |
47 transformers.add(new BadTableDataTransformer()); | 44 transformers.add(new BadTableDataTransformer()); |
48 transformers.add(new BadTableRowTransformer()); | 45 transformers.add(new BadTableRowTransformer()); |
49 transformers.add(new ReflowTransformer()); | 46 transformers.add(new ReflowTransformer()); |
50 transformers.add(new DeleteTransformer("<p>")); | 47 transformers.add(new DeleteTransformer("<p>")); |
51 // transformers.add(new ReplaceTransformer("</td>","\n</td>")); | 48 // transformers.add(new ReplaceTransformer("</td>","\n</td>")); |
52 transformers.add(new ReplaceTransformer("\\{","\\{")); | 49 transformers.add(new ReplaceTransformer("\\{","\\{")); // Escape braces |
53 transformers.add(new ReplaceTransformer("\\}","\\}")); | 50 transformers.add(new ReplaceTransformer("\\}","\\}")); |
54 // transformers.add(new ReplaceTransformer("\\[","\\[")); | 51 |
55 // transformers.add(new ReplaceTransformer("\\]","\\]")); | 52 transformers.add(new ReplaceTransformer("\\[","\\[")); // Escape brackets |
53 transformers.add(new ReplaceTransformer("\\]","\\]")); | |
54 transformers.add(new PreTagTransformer()); // Unescape brackets inside <pre> | |
55 // | |
56 transformers.add(new ReplaceTransformer("<br>","\\\\")); | 56 transformers.add(new ReplaceTransformer("<br>","\\\\")); |
57 transformers.add(new ReplaceTransformer("<table.*?>|</table>","{table}")); | 57 transformers.add(new ReplaceTransformer("<table.*?>|</table>","{table}")); |
58 transformers.add(new ReplaceTransformer("<tr>|</tr>","{tr}")); | 58 transformers.add(new ReplaceTransformer("<tr>|</tr>","{tr}")); |
59 transformers.add(new ReplaceTransformer("<td.*?>|</td>","{td}")); | 59 transformers.add(new ReplaceTransformer("<td.*?>|</td>","{td}")); |
60 transformers.add(new ReplaceTransformer("<th.*?>|</th>","{th}")); | 60 transformers.add(new ReplaceTransformer("<th.*?>|</th>","{th}")); |
80 transformers.add(new ReplaceTransformer("\\{center}\\n\\{table}\\n\\{tr\\}\\n\\s{2}\\{td\\}\\{center\\}\\*Note\\*\\{center\\}","{note}")); | 80 transformers.add(new ReplaceTransformer("\\{center}\\n\\{table}\\n\\{tr\\}\\n\\s{2}\\{td\\}\\{center\\}\\*Note\\*\\{center\\}","{note}")); |
81 transformers.add(new ReplaceTransformer("\\{td\\}\\n\\s{2}\\{tr\\}\\n\\{table\\}\\n\\{center\\}","{note}")); | 81 transformers.add(new ReplaceTransformer("\\{td\\}\\n\\s{2}\\{tr\\}\\n\\{table\\}\\n\\{center\\}","{note}")); |
82 | 82 |
83 // transformers.add(new TagTransformer("\\{center}\\n\\{table}\\n\\{tr\\}\\n\\s{2}\\{td\\}\\{center\\}\\*Note\\*\\{center\\}(.*?)\\s\\{td\\}\\n\\s{2}\\{tr\\}\\{table\\}", "{note}", "{note}")); | 83 // transformers.add(new TagTransformer("\\{center}\\n\\{table}\\n\\{tr\\}\\n\\s{2}\\{td\\}\\{center\\}\\*Note\\*\\{center\\}(.*?)\\s\\{td\\}\\n\\s{2}\\{tr\\}\\{table\\}", "{note}", "{note}")); |
84 // transformers.add(new TagTransformer("(\\S)\\s\\n", "", " ")); | 84 // transformers.add(new TagTransformer("(\\S)\\s\\n", "", " ")); |
85 transformers.add(new TagTransformer("<blockquote>(.*)</blockquote>", "{quote}", "{quote}")); | 85 transformers.add(new TagTransformer("<blockquote>(.*?)</blockquote>", true, "{quote}", "{quote}")); |
86 transformers.add(new DeleteTransformer("(?s)<hr.*?>")); | 86 transformers.add(new DeleteTransformer("(?s)<hr.*?>")); |
87 transformers.add(new ReflowTransformer("(\\{note\\})([^\\{]*)(\\{note\\})")); | 87 transformers.add(new ReflowTransformer("(\\{note\\})([^\\{]*)(\\{note\\})")); |
88 transformers.add(new TagTransformer("<sup>(.*?)</sup>", true, "^\\[","\\]^ ")); | |
89 transformers.add(new ReplaceTransformer("<","<")); | |
90 transformers.add(new ReplaceTransformer(">",">")); | |
91 transformers.add(new ReplaceTransformer(""","\"")); | |
92 transformers.add(new ReplaceTransformer(":\\)",": )")); // No smilies... | |
88 | 93 |
89 } | 94 } |
90 | 95 |
91 /** | 96 /** |
92 * @param args the command line arguments | 97 * @param args the command line arguments |