diff src/org/nwoca/ssdt/tools/html2wiki/Html2Wiki.java @ 7:a634b4d554d4

Minor fixups >, random smilies :), etc. Fixed blockquote. Handle escaping brackets outside pre tag.
author smith@nwoca.org
date Thu, 27 Jan 2011 18:07:28 -0500
parents 99f293bd507f
children e8ea26ab2cd7
line wrap: on
line diff
--- a/src/org/nwoca/ssdt/tools/html2wiki/Html2Wiki.java	Thu Jan 27 16:37:27 2011 -0500
+++ b/src/org/nwoca/ssdt/tools/html2wiki/Html2Wiki.java	Thu Jan 27 18:07:28 2011 -0500
@@ -12,7 +12,6 @@
 import java.util.List;
 import org.apache.commons.io.FileUtils;
 import java.util.regex.*;
-import org.apache.commons.io.FilenameUtils;
 
 /**
  * Converter to convert HTML documents into MediaWiki test.
@@ -34,8 +33,6 @@
     public Html2Wiki(String html) {
         buffer = new StringBuffer(html);
         transformers =  new ArrayList<Transformer>();
-//        transformers.add(new PreTagTransformer());
-//        transformers.add(new DeleteTransformer("^\\s",true));
         transformers.add(new DeleteTransformer("<html>|</html>|<body>|</body>"));
         transformers.add(new DeleteTransformer("<!--.*-->(\\n|\\r)*",true));
         transformers.add(new DeleteTransformer("<a .*?>|</a>"));
@@ -49,10 +46,13 @@
         transformers.add(new ReflowTransformer());
         transformers.add(new DeleteTransformer("<p>"));
 //        transformers.add(new ReplaceTransformer("</td>","\n</td>"));
-          transformers.add(new ReplaceTransformer("\\{","\\{"));
+          transformers.add(new ReplaceTransformer("\\{","\\{"));  // Escape braces
           transformers.add(new ReplaceTransformer("\\}","\\}"));
-//          transformers.add(new ReplaceTransformer("\\[","\\["));
-//          transformers.add(new ReplaceTransformer("\\]","\\]"));
+
+        transformers.add(new ReplaceTransformer("\\[","\\["));  // Escape brackets
+        transformers.add(new ReplaceTransformer("\\]","\\]"));
+        transformers.add(new PreTagTransformer());              // Unescape brackets inside <pre>
+//
         transformers.add(new ReplaceTransformer("<br>","\\\\"));
         transformers.add(new ReplaceTransformer("<table.*?>|</table>","{table}"));
         transformers.add(new ReplaceTransformer("<tr>|</tr>","{tr}"));
@@ -82,9 +82,14 @@
 
 //        transformers.add(new TagTransformer("\\{center}\\n\\{table}\\n\\{tr\\}\\n\\s{2}\\{td\\}\\{center\\}\\*Note\\*\\{center\\}(.*?)\\s\\{td\\}\\n\\s{2}\\{tr\\}\\{table\\}",    "{note}",  "{note}"));
 //        transformers.add(new TagTransformer("(\\S)\\s\\n",    "", " "));
-        transformers.add(new TagTransformer("<blockquote>(.*)</blockquote>",    "{quote}", "{quote}"));
+        transformers.add(new TagTransformer("<blockquote>(.*?)</blockquote>", true,   "{quote}", "{quote}"));
         transformers.add(new DeleteTransformer("(?s)<hr.*?>"));
         transformers.add(new ReflowTransformer("(\\{note\\})([^\\{]*)(\\{note\\})"));
+        transformers.add(new TagTransformer("<sup>(.*?)</sup>", true, "^\\[","\\]^ "));
+        transformers.add(new ReplaceTransformer("&lt;","<"));
+        transformers.add(new ReplaceTransformer("&gt;",">"));
+        transformers.add(new ReplaceTransformer("&quot;","\""));
+        transformers.add(new ReplaceTransformer(":\\)",": )"));  // No smilies...
 
     }