From 5f3fb9afece2125cbeba79d61a8d88460b7878d7 Mon Sep 17 00:00:00 2001 From: Joshua Rahm Date: Tue, 27 Jan 2015 18:40:32 -0700 Subject: initial commit --- .../src/com/modulus/common/strings/Tokenizer.java | 48 ++++++++++++++++++++++ 1 file changed, 48 insertions(+) create mode 100644 project/JavaCommon/src/com/modulus/common/strings/Tokenizer.java (limited to 'project/JavaCommon/src/com/modulus/common/strings/Tokenizer.java') diff --git a/project/JavaCommon/src/com/modulus/common/strings/Tokenizer.java b/project/JavaCommon/src/com/modulus/common/strings/Tokenizer.java new file mode 100644 index 0000000..80363cd --- /dev/null +++ b/project/JavaCommon/src/com/modulus/common/strings/Tokenizer.java @@ -0,0 +1,48 @@ +package com.modulus.common.strings; + +import java.util.ArrayList; +import java.util.Arrays; +import java.util.List; + +/** + * Class that tokenizes strings based on which groups + * the characters fit into. + * + * If the group is -1, then that tells the tokenizer to + * not include those tokens and instead delete the characters + * that belong to that group in the process of splitting. + * + * @author jrahm + * + */ +public abstract class Tokenizer { + + public String[] tokenize(String str){ + if(str.length() == 0) + return new String[]{}; + + List tokens = new ArrayList(); + StringBuffer buffer = new StringBuffer(); + + int curGroup = groupOf(str.charAt(0)); + for(int i = 0;i < str.length();i++){ + char ch = str.charAt(i); + + int temp = groupOf(ch); + if(temp != curGroup && curGroup != -1){ + curGroup = temp; + tokens.add(buffer.toString()); + + buffer = new StringBuffer(); + } + + if(temp != -1) + buffer.append(ch); + } + tokens.add(buffer.toString()); + + return tokens.toArray(new String[tokens.size()]); + } + + public abstract int groupOf(char ch); +} -- cgit