# HG changeset patch
# User smith@nwoca.org
# Date 1147466742 14400
# Node ID f8b1ea49d0658fcacefa2a1c2c748d2c13b7c98b
Initial version of crude HTML to WikiText converter. Customized for converting HTML files from DEC Document into Wiki markup.
diff -r 000000000000 -r f8b1ea49d065 .cvsignore
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/.cvsignore Fri May 12 16:45:42 2006 -0400
@@ -0,0 +1,2 @@
+build
+dist
diff -r 000000000000 -r f8b1ea49d065 build.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/build.xml Fri May 12 16:45:42 2006 -0400
@@ -0,0 +1,83 @@
+
+
+
+
+
+ Builds, tests, and runs the project html2wiki.
+
+
+
+
+
+
+
+
+
+
+
+
+
+
diff -r 000000000000 -r f8b1ea49d065 manifest.mf
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/manifest.mf Fri May 12 16:45:42 2006 -0400
@@ -0,0 +1,3 @@
+Manifest-Version: 1.0
+X-COMMENT: Main-Class will be added automatically by build
+
diff -r 000000000000 -r f8b1ea49d065 nbproject/.cvsignore
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/nbproject/.cvsignore Fri May 12 16:45:42 2006 -0400
@@ -0,0 +1,1 @@
+private
diff -r 000000000000 -r f8b1ea49d065 nbproject/build-impl.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/nbproject/build-impl.xml Fri May 12 16:45:42 2006 -0400
@@ -0,0 +1,541 @@
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ Must set src.dir
+ Must set test.src.dir
+ Must set build.dir
+ Must set dist.dir
+ Must set build.classes.dir
+ Must set dist.javadoc.dir
+ Must set build.test.classes.dir
+ Must set build.test.results.dir
+ Must set build.classes.excludes
+ Must set dist.jar
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ Must select some files in the IDE or set javac.includes
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ To run this application from the command line without Ant, try:
+
+
+
+
+
+
+ java -cp "${run.classpath.with.dist.jar}" ${main.class}
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ To run this application from the command line without Ant, try:
+
+ java -jar "${dist.jar.resolved}"
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ Must select one file in the IDE or set run.class
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ Must select one file in the IDE or set debug.class
+
+
+
+
+ Must set fix.includes
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ Must select some files in the IDE or set javac.includes
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ Some tests failed; see details above.
+
+
+
+
+
+
+
+
+ Must select some files in the IDE or set test.includes
+
+
+
+ Some tests failed; see details above.
+
+
+
+
+ Must select one file in the IDE or set test.class
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ Must select one file in the IDE or set applet.url
+
+
+
+
+
+
+
+
+ Must select one file in the IDE or set applet.url
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
diff -r 000000000000 -r f8b1ea49d065 nbproject/genfiles.properties
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/nbproject/genfiles.properties Fri May 12 16:45:42 2006 -0400
@@ -0,0 +1,8 @@
+build.xml.data.CRC32=9d118728
+build.xml.script.CRC32=867757ba
+build.xml.stylesheet.CRC32=d5b6853a
+# This file is used by a NetBeans-based IDE to track changes in generated files such as build-impl.xml.
+# Do not edit this file. You may delete it but then the IDE will never regenerate such files for you.
+nbproject/build-impl.xml.data.CRC32=9d118728
+nbproject/build-impl.xml.script.CRC32=22ed648d
+nbproject/build-impl.xml.stylesheet.CRC32=99b91518
diff -r 000000000000 -r f8b1ea49d065 nbproject/project.properties
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/nbproject/project.properties Fri May 12 16:45:42 2006 -0400
@@ -0,0 +1,60 @@
+application.args=
+build.classes.dir=${build.dir}/classes
+build.classes.excludes=**/*.java,**/*.form
+# This directory is removed when the project is cleaned:
+build.dir=build
+build.generated.dir=${build.dir}/generated
+# Only compile against the classpath explicitly listed here:
+build.sysclasspath=ignore
+build.test.classes.dir=${build.dir}/test/classes
+build.test.results.dir=${build.dir}/test/results
+debug.classpath=\
+ ${run.classpath}
+debug.test.classpath=\
+ ${run.test.classpath}
+# This directory is removed when the project is cleaned:
+dist.dir=dist
+dist.jar=${dist.dir}/html2wiki.jar
+dist.javadoc.dir=${dist.dir}/javadoc
+jar.compress=false
+javac.classpath=\
+ ${libs.commons-io-1.2.classpath}
+# Space-separated list of extra javac options
+javac.compilerargs=-Xlint:unchecked
+javac.deprecation=false
+javac.source=1.5
+javac.target=1.5
+javac.test.classpath=\
+ ${javac.classpath}:\
+ ${build.classes.dir}:\
+ ${libs.junit.classpath}
+javadoc.additionalparam=
+javadoc.author=false
+javadoc.encoding=
+javadoc.noindex=false
+javadoc.nonavbar=false
+javadoc.notree=false
+javadoc.private=false
+javadoc.splitindex=true
+javadoc.use=true
+javadoc.version=false
+javadoc.windowtitle=
+# Property libs.commons-io-1.2.classpath is set here just to make sharing of project simpler.
+# The library definition has always preference over this property.
+libs.commons-io-1.2.classpath=../shared/commons-io-1.2/commons-io-1.2.jar
+main.class=org.nwoca.ssdt.tools.html2wiki.Html2Wiki
+manifest.file=manifest.mf
+meta.inf.dir=${src.dir}/META-INF
+platform.active=default_platform
+run.classpath=\
+ ${javac.classpath}:\
+ ${build.classes.dir}
+# Space-separated list of JVM arguments used when running the project
+# (you may also define separate properties like run-sys-prop.name=value instead of -Dname=value
+# or test-sys-prop.name=value to set system properties for unit tests):
+run.jvmargs=
+run.test.classpath=\
+ ${javac.test.classpath}:\
+ ${build.test.classes.dir}
+src.dir=src
+test.src.dir=test
diff -r 000000000000 -r f8b1ea49d065 nbproject/project.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/nbproject/project.xml Fri May 12 16:45:42 2006 -0400
@@ -0,0 +1,16 @@
+
+
+ org.netbeans.modules.java.j2seproject
+
+
+ html2wiki
+ 1.6.5
+
+
+
+
+
+
+
+
+
diff -r 000000000000 -r f8b1ea49d065 src/org/nwoca/ssdt/tools/html2wiki/BadTableDataTransformer.java
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/src/org/nwoca/ssdt/tools/html2wiki/BadTableDataTransformer.java Fri May 12 16:45:42 2006 -0400
@@ -0,0 +1,55 @@
+/*
+ * CloseTagTransformer.java
+ *
+ * Created on May 10, 2006, 10:42 AM
+ *
+ * To change this template, choose Tools | Template Manager
+ * and open the template in the editor.
+ */
+
+package org.nwoca.ssdt.tools.html2wiki;
+
+import java.util.regex.*;
+/**
+ *
+ * @author SMITH
+ */
+public class BadTableDataTransformer implements Transformer {
+
+ private Pattern startPattern = Pattern.compile("
");;
+ private Pattern terminationPattern = Pattern.compile(" | | | |");
+ private Pattern endTRPattern = Pattern.compile("");
+
+ private String terminator = "";
+
+ public BadTableDataTransformer() {
+ }
+
+ public void apply(StringBuffer buffer) {
+ Matcher startMatcher = startPattern.matcher(buffer);
+ Matcher terminateMatcher = terminationPattern.matcher(buffer);
+ Matcher checkTerminator = endTRPattern.matcher(buffer);
+
+ int index = 0;
+ while (startMatcher.find(index)) {
+ if(terminateMatcher.find(startMatcher.end())) {
+
+ checkTerminator.region(startMatcher.end(),terminateMatcher.start());
+ if (!checkTerminator.find()) {
+
+ buffer.insert(terminateMatcher.start(),terminator);
+ }
+ index = terminateMatcher.start();
+ } else {
+ index = startMatcher.end();
+ }
+
+ }
+
+ }
+
+ public String toString() {
+ return "Closing: " + startPattern.pattern() + " with " + terminator;
+ }
+
+}
diff -r 000000000000 -r f8b1ea49d065 src/org/nwoca/ssdt/tools/html2wiki/BadTableRowTransformer.java
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/src/org/nwoca/ssdt/tools/html2wiki/BadTableRowTransformer.java Fri May 12 16:45:42 2006 -0400
@@ -0,0 +1,59 @@
+/*
+ * CloseTagTransformer.java
+ *
+ * Created on May 10, 2006, 10:42 AM
+ *
+ * To change this template, choose Tools | Template Manager
+ * and open the template in the editor.
+ */
+
+package org.nwoca.ssdt.tools.html2wiki;
+
+import java.util.regex.*;
+/**
+ *
+ * @author SMITH
+ */
+public class BadTableRowTransformer implements Transformer {
+
+ private Pattern startPattern = Pattern.compile("
");;
+ private Pattern terminationPattern = Pattern.compile("
|");;
+ private Pattern endTRPattern = Pattern.compile("
");
+
+ private String terminator = "";
+
+ /** Creates a new instance of CloseTagTransformer */
+ public BadTableRowTransformer() {
+ }
+
+ public void apply(StringBuffer buffer) {
+ Matcher startMatcher = startPattern.matcher(buffer);
+ Matcher terminateMatcher = terminationPattern.matcher(buffer);
+ Matcher checkTerminator = endTRPattern.matcher(buffer);
+
+ int index = 0;
+
+ while (startMatcher.find(index)) {
+
+ if(terminateMatcher.find(startMatcher.end())) {
+
+ checkTerminator.region(startMatcher.start(),terminateMatcher.start());
+ if (!checkTerminator.find()) {
+
+ buffer.insert(terminateMatcher.start(),terminator);
+ }
+ index = terminateMatcher.start();
+ } else {
+ index = startMatcher.end();
+ }
+
+
+ }
+
+ }
+
+ public String toString() {
+ return "Closing: " + startPattern.pattern() + " with " + terminator;
+ }
+
+}
diff -r 000000000000 -r f8b1ea49d065 src/org/nwoca/ssdt/tools/html2wiki/ChapterTransformer.java
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/src/org/nwoca/ssdt/tools/html2wiki/ChapterTransformer.java Fri May 12 16:45:42 2006 -0400
@@ -0,0 +1,32 @@
+package org.nwoca.ssdt.tools.html2wiki;
+
+import java.util.regex.*;
+
+class ChapterTransformer implements Transformer {
+ private Pattern chapterPattern = Pattern.compile("^
.*?(.*?)
",Pattern.MULTILINE + Pattern.DOTALL);
+ private String category;
+ public ChapterTransformer() {
+
+ }
+ public ChapterTransformer(String category) {
+ this.category = category;
+ }
+
+ public void apply(StringBuffer buffer) {
+ Matcher matcher = chapterPattern.matcher(buffer);
+
+ boolean first = true;
+ while (matcher.find( 0 )) {
+ buffer.replace(matcher.start(),matcher.end(),
+ "" + matcher.group(1) +"\n__TOC__" +
+ (category == null ? "" : "\n[[Category:" + category + "]]"));
+ first = false;
+ }
+
+ }
+
+ public String toString() {
+ return "Replace Chapter markers";
+ }
+
+}
\ No newline at end of file
diff -r 000000000000 -r f8b1ea49d065 src/org/nwoca/ssdt/tools/html2wiki/CloseTagTransformer.java
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/src/org/nwoca/ssdt/tools/html2wiki/CloseTagTransformer.java Fri May 12 16:45:42 2006 -0400
@@ -0,0 +1,47 @@
+/*
+ * CloseTagTransformer.java
+ *
+ * Created on May 10, 2006, 10:42 AM
+ *
+ * To change this template, choose Tools | Template Manager
+ * and open the template in the editor.
+ */
+
+package org.nwoca.ssdt.tools.html2wiki;
+
+import java.util.regex.*;
+/**
+ *
+ * @author SMITH
+ */
+public class CloseTagTransformer implements Transformer {
+
+ private Pattern startPattern;
+ private Pattern terminationPattern;
+ private String terminator;
+
+ /** Creates a new instance of CloseTagTransformer */
+ public CloseTagTransformer(String startExp, String termExp, String terminator) {
+ this.startPattern = Pattern.compile(startExp);
+ this.terminationPattern = Pattern.compile(termExp);
+ this.terminator = terminator;
+ }
+
+ public void apply(StringBuffer buffer) {
+ Matcher startMatcher = startPattern.matcher(buffer);
+ Matcher terminateMatcher = terminationPattern.matcher(buffer);
+ boolean first = true;
+ while (startMatcher.find()) {
+ if(terminateMatcher.find(startMatcher.end())) {
+ buffer.insert(terminateMatcher.start(),terminator);
+ }
+
+ }
+
+ }
+
+ public String toString() {
+ return "Closing: " + startPattern.pattern() + " with " + terminator;
+ }
+
+}
diff -r 000000000000 -r f8b1ea49d065 src/org/nwoca/ssdt/tools/html2wiki/DeleteTransformer.java
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/src/org/nwoca/ssdt/tools/html2wiki/DeleteTransformer.java Fri May 12 16:45:42 2006 -0400
@@ -0,0 +1,30 @@
+package org.nwoca.ssdt.tools.html2wiki;
+
+import java.util.regex.*;
+
+class DeleteTransformer implements Transformer {
+ private Pattern delete;
+
+ public DeleteTransformer(String regex, boolean multiLine) {
+ delete = Pattern.compile(regex, multiLine ? Pattern.MULTILINE : 0);
+ }
+
+ public DeleteTransformer(String regex) {
+ this(regex,false);
+ }
+
+ public void apply(StringBuffer buffer) {
+ Matcher matcher = delete.matcher(buffer);
+
+ boolean first = true;
+ while (matcher.find(first ? 0 : matcher.start() )) {
+ buffer.delete(matcher.start(),matcher.end());
+ first = false;
+ }
+
+ }
+
+ public String toString() {
+ return "Delete: " + delete.pattern();
+ }
+}
\ No newline at end of file
diff -r 000000000000 -r f8b1ea49d065 src/org/nwoca/ssdt/tools/html2wiki/Html2Wiki.java
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/src/org/nwoca/ssdt/tools/html2wiki/Html2Wiki.java Fri May 12 16:45:42 2006 -0400
@@ -0,0 +1,207 @@
+package org.nwoca.ssdt.tools.html2wiki;
+/*
+ * Html2Wiki.java
+ *
+ * Created on May 9, 2006, 3:22 PM
+ *
+ */
+
+import java.io.*;
+import java.util.Collection;
+import java.util.ArrayList;
+import java.util.List;
+import java.util.Iterator;
+import org.apache.commons.io.FileSystemUtils;
+import org.apache.commons.io.FileUtils;
+import java.util.regex.*;
+import org.apache.commons.io.FilenameUtils;
+
+/**
+ * Converter to convert HTML documents into MediaWiki test.
+ *
+ * Heavily customized to handle HTML produced by DEC DOCUMENT
+ * SOFTARE doctype. Breaks file into Chapters in the manner done
+ * by Document. Needs modification to work with other HTML files.
+ *
+ * @author SMITH
+ */
+public class Html2Wiki {
+
+ private StringBuffer buffer;
+ private Collection transformers;
+ private boolean converted = false;
+ private static String category;
+
+ /** Creates a new instance of Html2Wiki. */
+ public Html2Wiki(String html) {
+ buffer = new StringBuffer(html);
+ transformers = new ArrayList();
+ transformers.add(new PreTagTransformer());
+ transformers.add(new DeleteTransformer("^\\s",true));
+ transformers.add(new DeleteTransformer("|||"));
+ transformers.add(new DeleteTransformer("(\\n|\\r)*",true));
+ transformers.add(new DeleteTransformer("|"));
+ transformers.add(new DeleteTransformer("(?m)^\\*"));
+ transformers.add(new DeleteTransformer("|
"));
+ transformers.add(new DeleteTransformer(""));
+ transformers.add(new DeleteTransformer("(?m)
$"));
+ transformers.add(new DeleteTransformer("|"));
+ transformers.add(new CloseTagTransformer("
","(\n|\r)*(|||"));
+ transformers.add(new BadTableDataTransformer());
+ transformers.add(new BadTableRowTransformer());
+ transformers.add(new ReplaceTransformer("","\n"));
+ transformers.add(new ChapterTransformer(category));
+ transformers.add(new TagTransformer("(.*?)", "''"));
+ transformers.add(new TagTransformer("(.*?)", "'''"));
+ transformers.add(new TagTransformer("(?s)(.*?)", "", ""));
+ transformers.add(new TagTransformer("(.*)
", "== ", " =="));
+ transformers.add(new TagTransformer("(.*)
", "=== ", " ==="));
+ transformers.add(new TagTransformer("(accessing the program|sample run|sample screens?|sample reports?)[h|H]3>","=== ", " ==="));
+ transformers.add(new TagTransformer("(.*)
", "", ""));
+ transformers.add(new TagTransformer("(.*)
", "==== ", " ===="));
+ transformers.add(new TagTransformer("(.*)
", "===== ", " ====="));
+ transformers.add(new TagTransformer("(.*)
", "====== ", " ======"));
+ transformers.add(new TagTransformer("(.*)
", "======= ", " ======="));
+ transformers.add(new DeleteTransformer("(?s)"));
+
+ }
+
+ /**
+ * @param args the command line arguments
+ */
+ public static void main(String[] args) throws IOException {
+
+ if (args.length == 0) {
+ System.out.println("Usage:");
+ System.out.println(" Html2Wiki {inputDirectory} [Category]");
+ System.out.println(" default is current directory");
+ System.out.println(" Processes all *.html files. ");
+ System.out.println(" Each 'chapter' written to *.wiki");
+ return;
+ }
+
+ File inputs = new File(args[0]);
+
+ if (args.length > 1) {
+ category = args[1];
+ }
+
+ File[] inputFiles = inputs.listFiles(new HtmlFileFilter());
+ for (int i = 0; i < inputFiles.length; i++) {
+
+ process(inputFiles[i]);
+
+ }
+
+ }
+
+ protected static void process(File input) throws IOException {
+
+ System.out.println(input.getAbsoluteFile());
+
+ Html2Wiki converter = new Html2Wiki(FileUtils.readFileToString(input,null));
+
+
+ WikiChapter[] chapters = converter.getWikiChapters();
+
+ System.out.format("Writing %d wiki files...\n",chapters.length);
+
+ StringBuffer wikiIndex = new StringBuffer();
+ wikiIndex.append("Contents:\n\n");
+
+ for (int i = 0; i < chapters.length; i++) {
+
+ wikiIndex.append("# [[" + chapters[i].getChapterName() + "]]\n");
+ FileUtils.writeStringToFile(new File(input.getParent(),
+ generateFilename(chapters[i].getChapterName())+".wiki"),
+ chapters[i].getContents().toString(),
+ null);
+
+ }
+ System.out.println("Writing wikiIndex...");
+
+ FileUtils.writeStringToFile(new File(FilenameUtils.removeExtension(input.getPath())+".wikiIndex"),wikiIndex.toString(),null);
+ }
+
+ public static String generateFilename(String input) {
+ return input.replaceAll("\\\\|/|:|\\(|\\)","-");
+
+ }
+ public String getWikiText() {
+ convert();
+ return buffer.toString();
+ }
+
+ public WikiChapter[] getWikiChapters() {
+
+ convert();
+
+ List chapters = new ArrayList();
+
+ Pattern chapterPat = Pattern.compile("");
+ Matcher begin = chapterPat.matcher(buffer);
+ Matcher end = chapterPat.matcher(buffer);
+
+ while(begin.find()) {
+
+
+ end.find(begin.end());
+
+ Pattern chapterNamePat = Pattern.compile("(.*?)");
+
+ Matcher chapterNameMatcher = chapterNamePat.matcher(buffer);
+
+ String chapterName = chapterNameMatcher.find(begin.start()) ? chapterNameMatcher.group(1) : null;
+
+ CharSequence contents = buffer.subSequence(chapterName == null ? begin.start() : chapterNameMatcher.end()
+ ,end.hitEnd() ? buffer.length() : end.start());
+
+ chapters.add(new WikiChapter(chapterName,contents));
+
+ }
+ return (WikiChapter[])chapters.toArray(new WikiChapter[]{});
+ }
+
+ private void convert() {
+
+ if(!converted) {
+ for (Transformer t : transformers) {
+
+ System.out.println(".Applying: " + t);
+ t.apply(buffer);
+
+ }
+ }
+ converted = true;
+ }
+
+ private static class HtmlFileFilter implements FileFilter {
+ public boolean accept(File pathname) {
+ return pathname.getName().toLowerCase().matches("^.*\\.html$");
+ }
+
+ }
+ private static class WikiChapter {
+ private String chapterName;
+ private CharSequence contents;
+
+ public WikiChapter(String chapterName, CharSequence contents) {
+ this.chapterName = chapterName.replaceAll("\\\\|/|:|\\(|\\)","-").replaceAll("\\s+"," ").replaceAll("&","and");
+
+ this.contents = contents;
+ }
+
+ public String getChapterName() {
+ return chapterName;
+ }
+
+ public CharSequence getContents() {
+ return contents;
+ }
+
+ public String toString() {
+ return "Chapter: " + chapterName + "\nContents: " + contents;
+ }
+ }
+
+}
diff -r 000000000000 -r f8b1ea49d065 src/org/nwoca/ssdt/tools/html2wiki/PreTagTransformer.java
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/src/org/nwoca/ssdt/tools/html2wiki/PreTagTransformer.java Fri May 12 16:45:42 2006 -0400
@@ -0,0 +1,28 @@
+package org.nwoca.ssdt.tools.html2wiki;
+
+import java.util.regex.*;
+
+class PreTagTransformer implements Transformer {
+ private Pattern pattern = Pattern.compile(".*?
",Pattern.MULTILINE + Pattern.DOTALL);
+ public PreTagTransformer() {
+
+ }
+
+ public void apply(StringBuffer buffer) {
+ Matcher matcher = pattern.matcher(buffer);
+
+ boolean first = true;
+ while (matcher.find( first ? 0 : matcher.end())) {
+ String temp = buffer.substring(matcher.start(),matcher.end());
+ temp = temp.replaceAll("(?m)^\\s","\r ");
+ buffer.replace(matcher.start(),matcher.end(),temp);
+ first = false;
+ }
+
+ }
+
+ public String toString() {
+ return "Preserving tag spacing";
+ }
+
+}
\ No newline at end of file
diff -r 000000000000 -r f8b1ea49d065 src/org/nwoca/ssdt/tools/html2wiki/ReplaceTransformer.java
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/src/org/nwoca/ssdt/tools/html2wiki/ReplaceTransformer.java Fri May 12 16:45:42 2006 -0400
@@ -0,0 +1,31 @@
+package org.nwoca.ssdt.tools.html2wiki;
+
+import java.util.regex.*;
+
+class ReplaceTransformer implements Transformer {
+
+ private Pattern replace;
+ private String replacement;
+
+ public ReplaceTransformer(String regex, String replacement) {
+ replace = Pattern.compile(regex);
+ this.replacement = replacement;
+
+ }
+ public void apply(StringBuffer buffer) {
+ Matcher matcher = replace.matcher(buffer);
+
+ boolean first = true;
+ while (matcher.find(first ? 0 : matcher.end())) {
+ buffer.replace(matcher.start(),matcher.end(),replacement);
+ first = false;
+ }
+
+ }
+
+ public String toString() {
+ return "Replace: " + replace.pattern() + " with " +replacement;
+ }
+
+
+}
\ No newline at end of file
diff -r 000000000000 -r f8b1ea49d065 src/org/nwoca/ssdt/tools/html2wiki/TagTransformer.java
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/src/org/nwoca/ssdt/tools/html2wiki/TagTransformer.java Fri May 12 16:45:42 2006 -0400
@@ -0,0 +1,35 @@
+package org.nwoca.ssdt.tools.html2wiki;
+
+import java.util.regex.*;
+
+class TagTransformer implements Transformer {
+ private Pattern tagPattern;
+ private String replacementBegin;
+ private String replacementEnd;
+
+ public TagTransformer(String regexp, String replacementBegin, String replacementEnd) {
+ tagPattern = Pattern.compile(regexp);
+ this.replacementBegin = replacementBegin;
+ this.replacementEnd = replacementEnd;
+
+ }
+ public TagTransformer(String regexp, String replacement) {
+ this(regexp,replacement,replacement);
+ }
+
+ public void apply(StringBuffer buffer) {
+ Matcher matcher = tagPattern.matcher(buffer);
+
+ boolean first = true;
+ while (matcher.find(first ? 0 : matcher.start())) {
+ buffer.replace(matcher.start(),matcher.end(),replacementBegin + matcher.group(1) + replacementEnd);
+ first = false;
+ }
+
+ }
+
+ public String toString() {
+ return "Replace: " + tagPattern.pattern() + " with " +replacementBegin + "..." + replacementEnd;
+ }
+
+}
\ No newline at end of file
diff -r 000000000000 -r f8b1ea49d065 src/org/nwoca/ssdt/tools/html2wiki/Transformer.java
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/src/org/nwoca/ssdt/tools/html2wiki/Transformer.java Fri May 12 16:45:42 2006 -0400
@@ -0,0 +1,19 @@
+/*
+ * Transformer.java
+ *
+ * Created on May 9, 2006, 6:47 PM
+ *
+ * To change this template, choose Tools | Template Manager
+ * and open the template in the editor.
+ */
+
+package org.nwoca.ssdt.tools.html2wiki;
+
+/**
+ *
+ * @author SMITH
+ */
+public interface Transformer {
+ public void apply(StringBuffer buffer);
+
+}
diff -r 000000000000 -r f8b1ea49d065 test/org/nwoca/ssdt/tools/html2wiki/Html2WikiTest.java
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test/org/nwoca/ssdt/tools/html2wiki/Html2WikiTest.java Fri May 12 16:45:42 2006 -0400
@@ -0,0 +1,54 @@
+/*
+ * Html2WikiTest.java
+ * JUnit based test
+ *
+ * Created on May 9, 2006, 4:16 PM
+ */
+
+package org.nwoca.ssdt.tools.html2wiki;
+
+import junit.framework.*;
+import java.io.*;
+
+
+/**
+ *
+ * @author SMITH
+ */
+public class Html2WikiTest extends TestCase {
+
+ public Html2WikiTest(String testName) {
+ super(testName);
+ }
+
+ protected void setUp() throws Exception {
+ }
+
+ protected void tearDown() throws Exception {
+ }
+
+ public static Test suite() {
+ TestSuite suite = new TestSuite(Html2WikiTest.class);
+
+ return suite;
+ }
+
+ /**
+ * Test of main method, of class org.nwoca.ssdt.tools.html2wiki.Html2Wiki.
+ */
+ public void testMain() throws Exception {
+ System.out.println("main");
+
+
+ Html2Wiki.main(new String[] {"\\C:\\temp\\", "EMIS System Managers Manual"});
+
+ }
+
+// public void testHtml2Wiki() throws Exception {
+//
+// File testFile = new File("\\C:\\temp\\test.html");
+// Html2Wiki.process(testFile);
+//
+// }
+
+}