Added ByteArray unit tests, converted tests to kotlin

This commit is contained in:
Robinson 2023-07-04 00:07:54 +02:00
parent 9ad6095a4e
commit 617e142fc1
No known key found for this signature in database
GPG Key ID: 8E7DB78588BD6F5C
2 changed files with 341 additions and 211 deletions

View File

@ -0,0 +1,168 @@
/*
* Copyright 2023 dorkbox, llc
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package dorkbox.fsm
import dorkbox.fsm.FiniteStateMachine.build
import org.junit.Assert
import org.junit.Test
import java.io.*
import java.nio.file.Files
import java.nio.file.Paths
import java.util.*
/**
* @author hankcs
*/
class TestDoubleArrayByteTrie {
private fun buildASimpleDoubleArrayByteArrayTrie(): DoubleArrayByteArrayTrie<String> {
// Collect test data set
val map = mutableMapOf<ByteArray, String>()
val keyArray = arrayOf("hers", "his", "she", "he")
keyArray.forEach { key ->
map[key.toByteArray()] = key
}
// Build an DoubleArrayStringTrie
return build(map)
}
private fun validateASimpleDoubleArrayByteArrayTrie(acdat: DoubleArrayByteArrayTrie<String>) {
// Test it
val bytes = "uhers".toByteArray()
acdat.parseBytes(bytes, object : IHit<String> {
override fun hit(begin: Int, end: Int, value: String) {
System.out.printf("[%d:%d]=%s\n", begin, end, value)
Assert.assertEquals(String(bytes.copyOfRange(begin, end)), value)
}
})
// Or simply use
val wordList = acdat.parseBytes(bytes)
println(wordList)
}
@Test
@Throws(Exception::class)
fun testBuildAndParseSimply() {
val acdat = buildASimpleDoubleArrayByteArrayTrie()
validateASimpleDoubleArrayByteArrayTrie(acdat)
}
private class CountHits internal constructor(private val countAll: Boolean) : IHitCancellable<String> {
var count = 0
private set
override fun hit(begin: Int, end: Int, value: String): Boolean {
count += 1
return countAll
}
}
@Test
fun testMatches() {
val map: MutableMap<String, Int> = HashMap()
map["space"] = 1
map["keyword"] = 2
map["ch"] = 3
val trie = build(map)
Assert.assertTrue(trie.matches("space"))
Assert.assertTrue(trie.matches("keyword"))
Assert.assertTrue(trie.matches("ch"))
Assert.assertTrue(trie.matches(" ch"))
Assert.assertTrue(trie.matches("chkeyword"))
Assert.assertTrue(trie.matches("oooospace2"))
Assert.assertFalse(trie.matches("c"))
Assert.assertFalse(trie.matches(""))
Assert.assertFalse(trie.matches("spac"))
Assert.assertFalse(trie.matches("nothing"))
}
@Test
fun testFirstMatch() {
val map: MutableMap<String, Int> = HashMap()
map["space"] = 1
map["keyword"] = 2
map["ch"] = 3
val trie = build(map)
var hit = trie.findFirst("space")
Assert.assertEquals(0, hit!!.begin.toLong())
Assert.assertEquals(5, hit.end.toLong())
Assert.assertEquals(
1, hit.value.toLong()
)
hit = trie.findFirst("a lot of garbage in the space ch")
Assert.assertEquals(24, hit!!.begin.toLong())
Assert.assertEquals(29, hit.end.toLong())
Assert.assertEquals(
1, hit.value.toLong()
)
Assert.assertNull(trie.findFirst(""))
Assert.assertNull(trie.findFirst("value"))
Assert.assertNull(trie.findFirst("keywork"))
Assert.assertNull(trie.findFirst(" no pace"))
}
@Test
@Throws(Exception::class)
fun testCancellation() {
// Collect test data set
val map = TreeMap<String, String>()
val keyArray = arrayOf("foo", "bar")
for (key in keyArray) {
map[key] = key
}
// Build an DoubleArrayStringTrie
val acdat = build(map)
// count matches
val haystack = "sfwtfoowercwbarqwrcq"
val cancellingMatcher = CountHits(false)
val countingMatcher = CountHits(true)
println("Testing cancellation")
acdat.parseText(haystack, cancellingMatcher)
acdat.parseText(haystack, countingMatcher)
Assert.assertEquals(cancellingMatcher.count.toLong(), 1)
Assert.assertEquals(countingMatcher.count.toLong(), 2)
}
@Suppress("UNCHECKED_CAST")
@Test
@Throws(Exception::class)
fun testSaveAndLoad() {
var acdat = buildASimpleDoubleArrayByteArrayTrie()
val tmpPath = System.getProperty("java.io.tmpdir").replace("\\\\", "/") + "/acdat.tmp"
println("Saving acdat to: $tmpPath")
val out = ObjectOutputStream(Files.newOutputStream(Paths.get(tmpPath)))
out.writeObject(acdat)
out.close()
println("Loading acdat from: $tmpPath")
val `in` = ObjectInputStream(Files.newInputStream(Paths.get(tmpPath)))
acdat = `in`.readObject() as DoubleArrayByteArrayTrie<String>
validateASimpleDoubleArrayByteArrayTrie(acdat)
}
@Test
fun testBuildEmptyTrie() {
val map = TreeMap<String, String>()
val acdat = build(map)
Assert.assertEquals(0, acdat.size.toLong())
val hits = acdat.parseText("uhers")
Assert.assertEquals(0, hits.size.toLong())
}
}

View File

@ -13,284 +13,246 @@
* See the License for the specific language governing permissions and * See the License for the specific language governing permissions and
* limitations under the License. * limitations under the License.
*/ */
package dorkbox.fsm
package dorkbox.fsm; import dorkbox.fsm.FiniteStateMachine.build
import org.junit.Assert
import static org.junit.Assert.assertEquals; import org.junit.Test
import static org.junit.Assert.assertFalse; import java.io.*
import static org.junit.Assert.assertNull; import java.nio.file.Files
import static org.junit.Assert.assertTrue; import java.nio.file.Paths
import java.util.*
import java.io.BufferedReader;
import java.io.IOException;
import java.io.InputStreamReader;
import java.io.ObjectInputStream;
import java.io.ObjectOutputStream;
import java.nio.file.Files;
import java.nio.file.Paths;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.Set;
import java.util.TreeMap;
import java.util.TreeSet;
import org.junit.Test;
/** /**
* @author hankcs * @author hankcs
*/ */
public
class TestDoubleArrayStringTrie { class TestDoubleArrayStringTrie {
private private fun buildASimpleDoubleArrayStringTrie(): DoubleArrayStringTrie<String> {
DoubleArrayStringTrie<String> buildASimpleDoubleArrayStringTrie() {
// Collect test data set // Collect test data set
TreeMap<String, String> map = new TreeMap<String, String>(); val map = TreeMap<String, String>()
String[] keyArray = new String[] {"hers", "his", "she", "he"}; val keyArray = arrayOf("hers", "his", "she", "he")
for (String key : keyArray) { for (key in keyArray) {
map.put(key, key); map[key] = key
} }
// Build an DoubleArrayStringTrie // Build an DoubleArrayStringTrie
return FiniteStateMachine.INSTANCE.build(map); return build(map)
} }
private private fun validateASimpleDoubleArrayStringTrie(acdat: DoubleArrayStringTrie<String>) {
void validateASimpleDoubleArrayStringTrie(DoubleArrayStringTrie<String> acdat) {
// Test it // Test it
final String text = "uhers"; val text = "uhers"
acdat.parseText(text, new IHit<String>() { acdat.parseText(text, object : IHit<String> {
@Override override fun hit(begin: Int, end: Int, value: String) {
public System.out.printf("[%d:%d]=%s\n", begin, end, value)
void hit(int begin, int end, String value) { Assert.assertEquals(text.substring(begin, end), value)
System.out.printf("[%d:%d]=%s\n", begin, end, value);
assertEquals(text.substring(begin, end), value);
} }
}); })
// Or simply use // Or simply use
List<Hit<String>> wordList = acdat.parseText(text); val wordList = acdat.parseText(text)
System.out.println(wordList); println(wordList)
} }
@Test @Test
public @Throws(Exception::class)
void testBuildAndParseSimply() throws Exception { fun testBuildAndParseSimply() {
DoubleArrayStringTrie<String> acdat = buildASimpleDoubleArrayStringTrie(); val acdat = buildASimpleDoubleArrayStringTrie()
validateASimpleDoubleArrayStringTrie(acdat); validateASimpleDoubleArrayStringTrie(acdat)
} }
@Test @Test
public @Throws(Exception::class)
void testBuildVeryLongWord() throws Exception { fun testBuildVeryLongWord() {
TreeMap<String, String> map = new TreeMap<String, String>(); val map = TreeMap<String, String?>()
val longWordLength = 20000
int longWordLength = 20000; val word = loadText("dorkbox/fsm/text.txt")
map[word.substring(10, longWordLength)] = word.substring(10, longWordLength)
String word = loadText("dorkbox/fsm/text.txt"); map[word.substring(30, 40)] = null
map.put(word.substring(10, longWordLength), word.substring(10, longWordLength));
map.put(word.substring(30, 40), null);
// word = loadText("en/text.txt"); // word = loadText("en/text.txt");
// map.put(word.substring(10, longWordLength), word.substring(10, longWordLength)); // map.put(word.substring(10, longWordLength), word.substring(10, longWordLength));
// map.put(word.substring(30, 40), null); // map.put(word.substring(30, 40), null);
// Build an DoubleArrayStringTrie // Build an DoubleArrayStringTrie
DoubleArrayStringTrie<String> acdat = FiniteStateMachine.INSTANCE.build(map); val acdat: DoubleArrayStringTrie<String?> = build(map)
List<Hit<String>> result = acdat.parseText(word); val result = acdat.parseText(word)
Assert.assertEquals(2, result.size.toLong())
assertEquals(2, result.size()); Assert.assertEquals(
assertEquals(30, 30, result[0].begin.toLong()
result.get(0) )
.getBegin()); Assert.assertEquals(
assertEquals(40, 40, result[0].end.toLong()
result.get(0) )
.getEnd()); Assert.assertEquals(
assertEquals(10, 10, result[1].begin.toLong()
result.get(1) )
.getBegin()); Assert.assertEquals(
assertEquals(longWordLength, longWordLength.toLong(), result[1].end.toLong()
result.get(1) )
.getEnd());
} }
@Test @Test
public @Throws(Exception::class)
void testBuildAndParseWithBigFile() throws Exception { fun testBuildAndParseWithBigFile() {
// Load test data from disk // Load test data from disk
Set<String> dictionary = loadDictionary("dorkbox/fsm/dictionary.txt"); val dictionary = loadDictionary("dorkbox/fsm/dictionary.txt")
final String text = loadText("dorkbox/fsm/text.txt");
val text = loadText("dorkbox/fsm/text.txt")
// You can use any type of Map to hold data // You can use any type of Map to hold data
Map<String, String> map = new TreeMap<String, String>(); val map: MutableMap<String, String> = TreeMap()
// Map<String, String> map = new HashMap<String, String>(); // Map<String, String> map = new HashMap<String, String>();
// Map<String, String> map = new LinkedHashMap<String, String>(); // Map<String, String> map = new LinkedHashMap<String, String>();
for (String key : dictionary) { for (key in dictionary) {
map.put(key, key); map[key] = key
} }
// Build an DoubleArrayStringTrie // Build an DoubleArrayStringTrie
DoubleArrayStringTrie<String> acdat = FiniteStateMachine.INSTANCE.build(map); val acdat = build(map)
// Test it // Test it
acdat.parseText(text, new IHit<String>() { acdat.parseText(text, object : IHit<String> {
@Override override fun hit(begin: Int, end: Int, value: String) {
public Assert.assertEquals(text.substring(begin, end), value)
void hit(int begin, int end, String value) {
assertEquals(text.substring(begin, end), value);
} }
}); })
} }
private static private class CountHits internal constructor(private val countAll: Boolean) : IHitCancellable<String> {
class CountHits implements IHitCancellable<String> { var count = 0
private int count; private set
private boolean countAll;
CountHits(boolean countAll) { override fun hit(begin: Int, end: Int, value: String): Boolean {
this.count = 0; count += 1
this.countAll = countAll; return countAll
}
public
int getCount() {
return count;
}
@Override
public
boolean hit(int begin, int end, String value) {
count += 1;
return countAll;
} }
} }
@Test @Test
public fun testMatches() {
void testMatches() { val map: MutableMap<String, Int> = HashMap()
Map<String, Integer> map = new HashMap<String, Integer>(); map["space"] = 1
map.put("space", 1); map["keyword"] = 2
map.put("keyword", 2); map["ch"] = 3
map.put("ch", 3); val trie = build(map)
DoubleArrayStringTrie<Integer> trie = FiniteStateMachine.INSTANCE.build(map); Assert.assertTrue(trie.matches("space"))
Assert.assertTrue(trie.matches("keyword"))
assertTrue(trie.matches("space")); Assert.assertTrue(trie.matches("ch"))
assertTrue(trie.matches("keyword")); Assert.assertTrue(trie.matches(" ch"))
assertTrue(trie.matches("ch")); Assert.assertTrue(trie.matches("chkeyword"))
assertTrue(trie.matches(" ch")); Assert.assertTrue(trie.matches("oooospace2"))
assertTrue(trie.matches("chkeyword")); Assert.assertFalse(trie.matches("c"))
assertTrue(trie.matches("oooospace2")); Assert.assertFalse(trie.matches(""))
assertFalse(trie.matches("c")); Assert.assertFalse(trie.matches("spac"))
assertFalse(trie.matches("")); Assert.assertFalse(trie.matches("nothing"))
assertFalse(trie.matches("spac"));
assertFalse(trie.matches("nothing"));
} }
@Test @Test
public fun testFirstMatch() {
void testFirstMatch() { val map: MutableMap<String, Int> = HashMap()
Map<String, Integer> map = new HashMap<String, Integer>(); map["space"] = 1
map.put("space", 1); map["keyword"] = 2
map.put("keyword", 2); map["ch"] = 3
map.put("ch", 3); val trie = build(map)
DoubleArrayStringTrie<Integer> trie = FiniteStateMachine.INSTANCE.build(map); var hit = trie.findFirst("space")
Assert.assertEquals(0, hit!!.begin.toLong())
Hit<Integer> hit = trie.findFirst("space"); Assert.assertEquals(5, hit.end.toLong())
assertEquals(0, hit.getBegin()); Assert.assertEquals(
assertEquals(5, hit.getEnd()); 1, hit.value.toLong()
assertEquals(1, )
hit.getValue() hit = trie.findFirst("a lot of garbage in the space ch")
.intValue()); Assert.assertEquals(24, hit!!.begin.toLong())
Assert.assertEquals(29, hit.end.toLong())
hit = trie.findFirst("a lot of garbage in the space ch"); Assert.assertEquals(
assertEquals(24, hit.getBegin()); 1, hit.value.toLong()
assertEquals(29, hit.getEnd()); )
assertEquals(1, Assert.assertNull(trie.findFirst(""))
hit.getValue() Assert.assertNull(trie.findFirst("value"))
.intValue()); Assert.assertNull(trie.findFirst("keywork"))
Assert.assertNull(trie.findFirst(" no pace"))
assertNull(trie.findFirst(""));
assertNull(trie.findFirst("value"));
assertNull(trie.findFirst("keywork"));
assertNull(trie.findFirst(" no pace"));
} }
@Test @Test
public @Throws(Exception::class)
void testCancellation() throws Exception { fun testCancellation() {
// Collect test data set // Collect test data set
TreeMap<String, String> map = new TreeMap<String, String>(); val map = TreeMap<String, String>()
String[] keyArray = new String[] {"foo", "bar"}; val keyArray = arrayOf("foo", "bar")
for (String key : keyArray) { for (key in keyArray) {
map.put(key, key); map[key] = key
} }
// Build an DoubleArrayStringTrie // Build an DoubleArrayStringTrie
DoubleArrayStringTrie<String> acdat = FiniteStateMachine.INSTANCE.build(map); val acdat = build(map)
// count matches // count matches
String haystack = "sfwtfoowercwbarqwrcq"; val haystack = "sfwtfoowercwbarqwrcq"
CountHits cancellingMatcher = new CountHits(false); val cancellingMatcher = CountHits(false)
CountHits countingMatcher = new CountHits(true); val countingMatcher = CountHits(true)
System.out.println("Testing cancellation");
acdat.parseText(haystack, cancellingMatcher); println("Testing cancellation")
acdat.parseText(haystack, countingMatcher); acdat.parseText(haystack, cancellingMatcher)
assertEquals(cancellingMatcher.count, 1); acdat.parseText(haystack, countingMatcher)
assertEquals(countingMatcher.count, 2); Assert.assertEquals(cancellingMatcher.count.toLong(), 1)
Assert.assertEquals(countingMatcher.count.toLong(), 2)
} }
private @Throws(IOException::class)
String loadText(String path) throws IOException { private fun loadText(path: String): String {
StringBuilder sbText = new StringBuilder(); val sbText = StringBuilder()
BufferedReader br = new BufferedReader(new InputStreamReader(Thread.currentThread() val br = BufferedReader(
.getContextClassLoader() InputStreamReader(
.getResourceAsStream(path), "UTF-8")); Thread.currentThread().contextClassLoader.getResourceAsStream(path), "UTF-8"
String line; )
while ((line = br.readLine()) != null) { )
sbText.append(line) var line: String?
.append("\n"); while (br.readLine().also { line = it } != null) {
sbText.append(line).append("\n")
} }
br.close(); br.close()
return sbText.toString()
return sbText.toString();
} }
private @Throws(IOException::class)
Set<String> loadDictionary(String path) throws IOException { private fun loadDictionary(path: String): Set<String> {
Set<String> dictionary = new TreeSet<String>();
BufferedReader br = new BufferedReader(new InputStreamReader(Thread.currentThread() val dictionary: MutableSet<String> = TreeSet()
.getContextClassLoader() val br = BufferedReader(
.getResourceAsStream(path), "UTF-8")); InputStreamReader(
String line; Thread.currentThread().contextClassLoader.getResourceAsStream(path), "UTF-8"
while ((line = br.readLine()) != null) { )
dictionary.add(line); )
var line: String?
while (br.readLine().also { line = it } != null) {
dictionary.add(line!!)
} }
br.close(); br.close()
return dictionary
return dictionary;
} }
@SuppressWarnings("unchecked") @Suppress("UNCHECKED_CAST")
@Test @Test
public @Throws(Exception::class)
void testSaveAndLoad() throws Exception { fun testSaveAndLoad() {
DoubleArrayStringTrie<String> acdat = buildASimpleDoubleArrayStringTrie(); var acdat = buildASimpleDoubleArrayStringTrie()
final String tmpPath = System.getProperty("java.io.tmpdir") val tmpPath = System.getProperty("java.io.tmpdir").replace("\\\\", "/") + "/acdat.tmp"
.replace("\\\\", "/") + "/acdat.tmp";
System.out.println("Saving acdat to: " + tmpPath);
ObjectOutputStream out = new ObjectOutputStream(Files.newOutputStream(Paths.get(tmpPath))); println("Saving acdat to: $tmpPath")
out.writeObject(acdat); val out = ObjectOutputStream(Files.newOutputStream(Paths.get(tmpPath)))
out.close(); out.writeObject(acdat)
System.out.println("Loading acdat from: " + tmpPath); out.close()
ObjectInputStream in = new ObjectInputStream(Files.newInputStream(Paths.get(tmpPath))); println("Loading acdat from: $tmpPath")
val `in` = ObjectInputStream(Files.newInputStream(Paths.get(tmpPath)))
acdat = (DoubleArrayStringTrie<String>) in.readObject(); acdat = `in`.readObject() as DoubleArrayStringTrie<String>
validateASimpleDoubleArrayStringTrie(acdat); validateASimpleDoubleArrayStringTrie(acdat)
} }
@Test @Test
public fun testBuildEmptyTrie() {
void testBuildEmptyTrie() { val map = TreeMap<String, String>()
TreeMap<String, String> map = new TreeMap<String, String>(); val acdat = build(map)
DoubleArrayStringTrie<String> acdat = FiniteStateMachine.INSTANCE.build(map); Assert.assertEquals(0, acdat.size.toLong())
assertEquals(0, acdat.getSize()); val hits = acdat.parseText("uhers")
List<Hit<String>> hits = acdat.parseText("uhers"); Assert.assertEquals(0, hits.size.toLong())
assertEquals(0, hits.size());
} }
} }