Added string tokenizer in java (#184)

* Added string tokenizer in java

* fix typos

* Update Readme change line index from 4 to 5
pull/198/head
Toihir Halim 2021-04-15 02:37:29 +02:00 committed by GitHub
parent 2f86af8736
commit ef836c94d0
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
2 changed files with 97 additions and 0 deletions

View File

@ -24,6 +24,7 @@
2. [All subsequences](java/sequence.java)
3. [KMP String Searching](java/kmp.cpp)
4. [Rabin Karp String Searching](java/rabin-karp.cpp)
5. [String Tokenizer](java/tokenizer.java)
### Python

View File

@ -0,0 +1,96 @@
import java.util.ArrayList;
import java.util.List;
public class tokenizer{
public static boolean charIsDelimiter(char c, char [] delimiters){
//verrify if a character is a delimiter
for(char d: delimiters)
if(c == d) return true;
return false;
}
public static List<String> tokenize(String str, char... delimiters){
//by default the delimiter is white space ' '
if(delimiters.length <= 0) delimiters = new char [] {' '};
List<String> tokens = new ArrayList<String>();
String token = "";
for(int i = 0; i < str.length(); i++) {
char pos = str.charAt(i);
if(!charIsDelimiter(pos, delimiters)) {
//if the character is not a delimiter add it into the current token
token += pos;
}else {
//avoid an empty token before adding to the list
if(!token.equals(""))
tokens.add(token);
token = "";
}
}
//add the last token to the list
tokens.add(token);
return tokens;
}
public static void printTokens(List<String> tokens){
if(tokens == null) return;
System.out.print("[ ");
for(String token : tokens){
System.out.print("'" + token + "', ");
}
System.out.println("]");
}
public static void main(String [] args){
String myString = "Hello I like pasta & pizza--hut";
System.out.println("myString = '" + myString + "'");
System.out.print("\ntokenize(myString) = ");
printTokens(tokenize(myString));
System.out.print("\ntokenize(myString, ' ', 'z') = ");
printTokens(tokenize(myString, ' ', 'z'));
System.out.print("\ntokenize(myString, 'p','l', 'u') = ");
printTokens(tokenize(myString, 'p','l', 'u'));
System.out.print("\ntokenize(myString, ' ', '&', '-') = ");
printTokens(tokenize(myString, ' ', '&', '-'));
}
}
/*
to call the function:
tokenize(str, delimiters)
str is a text
delimiters is a list of char which is by default white space : ' '
example:
tokenize("hello world") = tokenize("hello world", ' ') = [ 'hello', 'world' ]
tokenize("hello world", ' ', 'l') = [ 'he', 'o', 'wor', 'd' ]
to run this file:
javac tokenizer.java
java tokenizer
result:
myString = 'Hello I like pasta & pizza--hut'
tokenize(myString) = [ 'Hello', 'I', 'like', 'pasta', '&', 'pizza--hut', ]
tokenize(myString, ' ', 'z') = [ 'Hello', 'I', 'like', 'pasta', '&', 'pi', 'a--hut', ]
tokenize(myString, 'p','l', 'u') = [ 'He', 'o I ', 'ike ', 'asta & ', 'izza--h', 't', ]
tokenize(myString, ' ', '&', '-') = [ 'Hello', 'I', 'like', 'pasta', 'pizza', 'hut', ]
*/