Added string tokenizer in java (#184)
* Added string tokenizer in java * fix typos * Update Readme change line index from 4 to 5pull/198/head
parent
2f86af8736
commit
ef836c94d0
|
@ -24,6 +24,7 @@
|
|||
2. [All subsequences](java/sequence.java)
|
||||
3. [KMP String Searching](java/kmp.cpp)
|
||||
4. [Rabin Karp String Searching](java/rabin-karp.cpp)
|
||||
5. [String Tokenizer](java/tokenizer.java)
|
||||
|
||||
### Python
|
||||
|
||||
|
|
|
@ -0,0 +1,96 @@
|
|||
import java.util.ArrayList;
|
||||
import java.util.List;
|
||||
|
||||
public class tokenizer{
|
||||
|
||||
public static boolean charIsDelimiter(char c, char [] delimiters){
|
||||
//verrify if a character is a delimiter
|
||||
for(char d: delimiters)
|
||||
if(c == d) return true;
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
public static List<String> tokenize(String str, char... delimiters){
|
||||
//by default the delimiter is white space ' '
|
||||
if(delimiters.length <= 0) delimiters = new char [] {' '};
|
||||
|
||||
List<String> tokens = new ArrayList<String>();
|
||||
String token = "";
|
||||
|
||||
for(int i = 0; i < str.length(); i++) {
|
||||
char pos = str.charAt(i);
|
||||
|
||||
if(!charIsDelimiter(pos, delimiters)) {
|
||||
//if the character is not a delimiter add it into the current token
|
||||
token += pos;
|
||||
}else {
|
||||
//avoid an empty token before adding to the list
|
||||
if(!token.equals(""))
|
||||
tokens.add(token);
|
||||
|
||||
token = "";
|
||||
}
|
||||
}
|
||||
|
||||
//add the last token to the list
|
||||
tokens.add(token);
|
||||
|
||||
return tokens;
|
||||
}
|
||||
|
||||
public static void printTokens(List<String> tokens){
|
||||
if(tokens == null) return;
|
||||
|
||||
System.out.print("[ ");
|
||||
for(String token : tokens){
|
||||
System.out.print("'" + token + "', ");
|
||||
}
|
||||
System.out.println("]");
|
||||
}
|
||||
|
||||
public static void main(String [] args){
|
||||
String myString = "Hello I like pasta & pizza--hut";
|
||||
|
||||
System.out.println("myString = '" + myString + "'");
|
||||
|
||||
System.out.print("\ntokenize(myString) = ");
|
||||
printTokens(tokenize(myString));
|
||||
|
||||
System.out.print("\ntokenize(myString, ' ', 'z') = ");
|
||||
printTokens(tokenize(myString, ' ', 'z'));
|
||||
|
||||
System.out.print("\ntokenize(myString, 'p','l', 'u') = ");
|
||||
printTokens(tokenize(myString, 'p','l', 'u'));
|
||||
|
||||
System.out.print("\ntokenize(myString, ' ', '&', '-') = ");
|
||||
printTokens(tokenize(myString, ' ', '&', '-'));
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
to call the function:
|
||||
tokenize(str, delimiters)
|
||||
str is a text
|
||||
delimiters is a list of char which is by default white space : ' '
|
||||
example:
|
||||
tokenize("hello world") = tokenize("hello world", ' ') = [ 'hello', 'world' ]
|
||||
tokenize("hello world", ' ', 'l') = [ 'he', 'o', 'wor', 'd' ]
|
||||
|
||||
|
||||
to run this file:
|
||||
javac tokenizer.java
|
||||
java tokenizer
|
||||
|
||||
result:
|
||||
myString = 'Hello I like pasta & pizza--hut'
|
||||
|
||||
tokenize(myString) = [ 'Hello', 'I', 'like', 'pasta', '&', 'pizza--hut', ]
|
||||
|
||||
tokenize(myString, ' ', 'z') = [ 'Hello', 'I', 'like', 'pasta', '&', 'pi', 'a--hut', ]
|
||||
|
||||
tokenize(myString, 'p','l', 'u') = [ 'He', 'o I ', 'ike ', 'asta & ', 'izza--h', 't', ]
|
||||
|
||||
tokenize(myString, ' ', '&', '-') = [ 'Hello', 'I', 'like', 'pasta', 'pizza', 'hut', ]
|
||||
|
||||
*/
|
Loading…
Reference in New Issue