changeset 7:a634b4d554d4

Minor fixups >, random smilies :), etc. Fixed blockquote. Handle escaping brackets outside pre tag.
author smith@nwoca.org
date Thu, 27 Jan 2011 18:07:28 -0500
parents 99f293bd507f
children e8ea26ab2cd7
files src/org/nwoca/ssdt/tools/html2wiki/Html2Wiki.java src/org/nwoca/ssdt/tools/html2wiki/PreTagTransformer.java
diffstat 2 files changed, 15 insertions(+), 9 deletions(-) [+]
line wrap: on
line diff
--- a/src/org/nwoca/ssdt/tools/html2wiki/Html2Wiki.java	Thu Jan 27 16:37:27 2011 -0500
+++ b/src/org/nwoca/ssdt/tools/html2wiki/Html2Wiki.java	Thu Jan 27 18:07:28 2011 -0500
@@ -12,7 +12,6 @@
 import java.util.List;
 import org.apache.commons.io.FileUtils;
 import java.util.regex.*;
-import org.apache.commons.io.FilenameUtils;
 
 /**
  * Converter to convert HTML documents into MediaWiki test.
@@ -34,8 +33,6 @@
     public Html2Wiki(String html) {
         buffer = new StringBuffer(html);
         transformers =  new ArrayList<Transformer>();
-//        transformers.add(new PreTagTransformer());
-//        transformers.add(new DeleteTransformer("^\\s",true));
         transformers.add(new DeleteTransformer("<html>|</html>|<body>|</body>"));
         transformers.add(new DeleteTransformer("<!--.*-->(\\n|\\r)*",true));
         transformers.add(new DeleteTransformer("<a .*?>|</a>"));
@@ -49,10 +46,13 @@
         transformers.add(new ReflowTransformer());
         transformers.add(new DeleteTransformer("<p>"));
 //        transformers.add(new ReplaceTransformer("</td>","\n</td>"));
-          transformers.add(new ReplaceTransformer("\\{","\\{"));
+          transformers.add(new ReplaceTransformer("\\{","\\{"));  // Escape braces
           transformers.add(new ReplaceTransformer("\\}","\\}"));
-//          transformers.add(new ReplaceTransformer("\\[","\\["));
-//          transformers.add(new ReplaceTransformer("\\]","\\]"));
+
+        transformers.add(new ReplaceTransformer("\\[","\\["));  // Escape brackets
+        transformers.add(new ReplaceTransformer("\\]","\\]"));
+        transformers.add(new PreTagTransformer());              // Unescape brackets inside <pre>
+//
         transformers.add(new ReplaceTransformer("<br>","\\\\"));
         transformers.add(new ReplaceTransformer("<table.*?>|</table>","{table}"));
         transformers.add(new ReplaceTransformer("<tr>|</tr>","{tr}"));
@@ -82,9 +82,14 @@
 
 //        transformers.add(new TagTransformer("\\{center}\\n\\{table}\\n\\{tr\\}\\n\\s{2}\\{td\\}\\{center\\}\\*Note\\*\\{center\\}(.*?)\\s\\{td\\}\\n\\s{2}\\{tr\\}\\{table\\}",    "{note}",  "{note}"));
 //        transformers.add(new TagTransformer("(\\S)\\s\\n",    "", " "));
-        transformers.add(new TagTransformer("<blockquote>(.*)</blockquote>",    "{quote}", "{quote}"));
+        transformers.add(new TagTransformer("<blockquote>(.*?)</blockquote>", true,   "{quote}", "{quote}"));
         transformers.add(new DeleteTransformer("(?s)<hr.*?>"));
         transformers.add(new ReflowTransformer("(\\{note\\})([^\\{]*)(\\{note\\})"));
+        transformers.add(new TagTransformer("<sup>(.*?)</sup>", true, "^\\[","\\]^ "));
+        transformers.add(new ReplaceTransformer("&lt;","<"));
+        transformers.add(new ReplaceTransformer("&gt;",">"));
+        transformers.add(new ReplaceTransformer("&quot;","\""));
+        transformers.add(new ReplaceTransformer(":\\)",": )"));  // No smilies...
 
     }
 
--- a/src/org/nwoca/ssdt/tools/html2wiki/PreTagTransformer.java	Thu Jan 27 16:37:27 2011 -0500
+++ b/src/org/nwoca/ssdt/tools/html2wiki/PreTagTransformer.java	Thu Jan 27 18:07:28 2011 -0500
@@ -14,7 +14,8 @@
         boolean first = true;
         while (matcher.find( first ? 0 : matcher.end())) {
             String temp = buffer.substring(matcher.start(),matcher.end());
-            temp = temp.replaceAll("(?m)^\\s","\r&nbsp;");
+            temp = temp.replaceAll("\\\\\\[","[");
+            temp = temp.replaceAll("\\\\\\]","]");
             buffer.replace(matcher.start(),matcher.end(),temp);
             first = false;
         }
@@ -22,7 +23,7 @@
     }
         
     public String toString() {
-        return "Preserving <pre> tag spacing";
+        return "Unescaping Pre tag contents";
     }
 
 }
\ No newline at end of file