Moved FSM from utils to own project
commit
0a79ce2633
|
@ -0,0 +1,121 @@
|
|||
# Covers JetBrains IDEs: IntelliJ, RubyMine, PhpStorm, AppCode, PyCharm, CLion, Android Studio and Webstorm
|
||||
# Reference: https://intellij-support.jetbrains.com/hc/en-us/articles/206544839
|
||||
|
||||
# User-specific stuff:
|
||||
.idea/**/workspace.xml
|
||||
.idea/**/tasks.xml
|
||||
.idea/dictionaries
|
||||
.idea/**/codeStyles/
|
||||
.idea/**/codeStyleSettings.xml
|
||||
|
||||
# Sensitive or high-churn files:
|
||||
.idea/**/dataSources/
|
||||
.idea/**/dataSources.ids
|
||||
.idea/**/dataSources.xml
|
||||
.idea/**/dataSources.local.xml
|
||||
.idea/**/sqlDataSources.xml
|
||||
.idea/**/dynamic.xml
|
||||
.idea/**/uiDesigner.xml
|
||||
.idea/**/shelf/
|
||||
|
||||
|
||||
# Gradle:
|
||||
.idea/**/gradle.xml
|
||||
.idea/**/libraries
|
||||
|
||||
# CMake
|
||||
cmake-build-debug/
|
||||
|
||||
# Mongo Explorer plugin:
|
||||
.idea/**/mongoSettings.xml
|
||||
|
||||
## File-based project format:
|
||||
*.iws
|
||||
|
||||
## Plugin-specific files:
|
||||
|
||||
|
||||
# IntelliJ
|
||||
out/
|
||||
|
||||
# mpeltonen/sbt-idea plugin
|
||||
.idea_modules/
|
||||
|
||||
# JIRA plugin
|
||||
atlassian-ide-plugin.xml
|
||||
|
||||
# Cursive Clojure plugin
|
||||
.idea/replstate.xml
|
||||
|
||||
# Crashlytics plugin (for Android Studio and IntelliJ)
|
||||
com_crashlytics_export_strings.xml
|
||||
crashlytics.properties
|
||||
crashlytics-build.properties
|
||||
fabric.properties
|
||||
|
||||
######################
|
||||
# End JetBrains IDEs #
|
||||
######################
|
||||
|
||||
|
||||
# From https://github.com/github/gitignore/blob/master/Gradle.gitignore
|
||||
.gradle
|
||||
/build/
|
||||
|
||||
# Ignore Gradle GUI config
|
||||
gradle-app.setting
|
||||
|
||||
# Avoid ignoring Gradle wrapper jar file (.jar files are usually ignored)
|
||||
!gradle-wrapper.jar
|
||||
!gradle-wrapper.properties
|
||||
|
||||
# Cache of project
|
||||
.gradletasknamecache
|
||||
|
||||
|
||||
|
||||
|
||||
# From https://github.com/github/gitignore/blob/master/Java.gitignore
|
||||
*.class
|
||||
|
||||
# Mobile Tools for Java (J2ME)
|
||||
.mtj.tmp/
|
||||
|
||||
|
||||
# virtual machine crash logs, see http://www.java.com/en/download/help/error_hotspot.xml
|
||||
hs_err_pid*
|
||||
|
||||
*.DS_Store
|
||||
.AppleDouble
|
||||
.LSOverride
|
||||
|
||||
# Icon must end with two \r
|
||||
Icon
|
||||
|
||||
|
||||
# Thumbnails
|
||||
._*
|
||||
|
||||
# Files that might appear in the root of a volume
|
||||
.DocumentRevisions-V100
|
||||
.fseventsd
|
||||
.Spotlight-V100
|
||||
.TemporaryItems
|
||||
.Trashes
|
||||
.VolumeIcon.icns
|
||||
.com.apple.timemachine.donotpresent
|
||||
|
||||
# Directories potentially created on remote AFP share
|
||||
.AppleDB
|
||||
.AppleDesktop
|
||||
Network Trash Folder
|
||||
Temporary Items
|
||||
.apdisk
|
||||
|
||||
|
||||
|
||||
##########################################################
|
||||
# Specific to this module
|
||||
|
||||
# iml files are generated by intellij/gradle now
|
||||
**/*.iml
|
|
@ -0,0 +1,35 @@
|
|||
- FiniteStateMachine - Niche collections to augment what is already available.
|
||||
[The Apache Software License, Version 2.0]
|
||||
https://git.dorkbox.com/dorkbox/FSM
|
||||
Copyright 2023
|
||||
Dorkbox LLC
|
||||
|
||||
Extra license information
|
||||
- AhoCorasickDoubleArrayTrie - Niche collections to augment what is already available.
|
||||
[The Apache Software License, Version 2.0]
|
||||
https://github.com/hankcs/AhoCorasickDoubleArrayTrie
|
||||
Copyright 2018
|
||||
hankcs <me@hankcs.com>
|
||||
|
||||
- Kotlin -
|
||||
[The Apache Software License, Version 2.0]
|
||||
https://github.com/JetBrains/kotlin
|
||||
Copyright 2020
|
||||
JetBrains s.r.o. and Kotlin Programming Language contributors
|
||||
Kotlin Compiler, Test Data+Libraries, and Tools repository contain third-party code, to which different licenses may apply
|
||||
See: https://github.com/JetBrains/kotlin/blob/master/license/README.md
|
||||
|
||||
- Updates - Software Update Management
|
||||
[The Apache Software License, Version 2.0]
|
||||
https://git.dorkbox.com/dorkbox/Updates
|
||||
Copyright 2021
|
||||
Dorkbox LLC
|
||||
|
||||
Extra license information
|
||||
- Kotlin -
|
||||
[The Apache Software License, Version 2.0]
|
||||
https://github.com/JetBrains/kotlin
|
||||
Copyright 2020
|
||||
JetBrains s.r.o. and Kotlin Programming Language contributors
|
||||
Kotlin Compiler, Test Data+Libraries, and Tools repository contain third-party code, to which different licenses may apply
|
||||
See: https://github.com/JetBrains/kotlin/blob/master/license/README.md
|
|
@ -0,0 +1,218 @@
|
|||
Apache License
|
||||
Version 2.0, January 2004
|
||||
http://www.apache.org/licenses/
|
||||
|
||||
TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
|
||||
|
||||
1. Definitions.
|
||||
|
||||
"License" shall mean the terms and conditions for use, reproduction,
|
||||
and distribution as defined by Sections 1 through 9 of this document.
|
||||
|
||||
"Licensor" shall mean the copyright owner or entity authorized by
|
||||
the copyright owner that is granting the License.
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a
|
||||
copy of this software and associated documentation files (the "Software"),
|
||||
to deal in the Software without restriction, including without limitation
|
||||
the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
and/or sell copies of the Software, and to permit persons to whom the
|
||||
Software is furnished to do so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be included
|
||||
in all copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
|
||||
OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
|
||||
DEALINGS IN THE SOFTWARE.
|
||||
"Legal Entity" shall mean the union of the acting entity and all
|
||||
other entities that control, are controlled by, or are under common
|
||||
control with that entity. For the purposes of this definition,
|
||||
"control" means (i) the power, direct or indirect, to cause the
|
||||
direction or management of such entity, whether by contract or
|
||||
otherwise, or (ii) ownership of fifty percent (50%) or more of the
|
||||
outstanding shares, or (iii) beneficial ownership of such entity.
|
||||
|
||||
"You" (or "Your") shall mean an individual or Legal Entity
|
||||
exercising permissions granted by this License.
|
||||
|
||||
"Source" form shall mean the preferred form for making modifications,
|
||||
including but not limited to software source code, documentation
|
||||
source, and configuration files.
|
||||
|
||||
"Object" form shall mean any form resulting from mechanical
|
||||
transformation or translation of a Source form, including but
|
||||
not limited to compiled object code, generated documentation,
|
||||
and conversions to other media types.
|
||||
|
||||
"Work" shall mean the work of authorship, whether in Source or
|
||||
Object form, made available under the License, as indicated by a
|
||||
copyright notice that is included in or attached to the work
|
||||
(an example is provided in the Appendix below).
|
||||
|
||||
"Derivative Works" shall mean any work, whether in Source or Object
|
||||
form, that is based on (or derived from) the Work and for which the
|
||||
editorial revisions, annotations, elaborations, or other modifications
|
||||
represent, as a whole, an original work of authorship. For the purposes
|
||||
of this License, Derivative Works shall not include works that remain
|
||||
separable from, or merely link (or bind by name) to the interfaces of,
|
||||
the Work and Derivative Works thereof.
|
||||
|
||||
"Contribution" shall mean any work of authorship, including
|
||||
the original version of the Work and any modifications or additions
|
||||
to that Work or Derivative Works thereof, that is intentionally
|
||||
submitted to Licensor for inclusion in the Work by the copyright owner
|
||||
or by an individual or Legal Entity authorized to submit on behalf of
|
||||
the copyright owner. For the purposes of this definition, "submitted"
|
||||
means any form of electronic, verbal, or written communication sent
|
||||
to the Licensor or its representatives, including but not limited to
|
||||
communication on electronic mailing lists, source code control systems,
|
||||
and issue tracking systems that are managed by, or on behalf of, the
|
||||
Licensor for the purpose of discussing and improving the Work, but
|
||||
excluding communication that is conspicuously marked or otherwise
|
||||
designated in writing by the copyright owner as "Not a Contribution."
|
||||
|
||||
"Contributor" shall mean Licensor and any individual or Legal Entity
|
||||
on behalf of whom a Contribution has been received by Licensor and
|
||||
subsequently incorporated within the Work.
|
||||
|
||||
2. Grant of Copyright License. Subject to the terms and conditions of
|
||||
this License, each Contributor hereby grants to You a perpetual,
|
||||
worldwide, non-exclusive, no-charge, royalty-free, irrevocable
|
||||
copyright license to reproduce, prepare Derivative Works of,
|
||||
publicly display, publicly perform, sublicense, and distribute the
|
||||
Work and such Derivative Works in Source or Object form.
|
||||
|
||||
3. Grant of Patent License. Subject to the terms and conditions of
|
||||
this License, each Contributor hereby grants to You a perpetual,
|
||||
worldwide, non-exclusive, no-charge, royalty-free, irrevocable
|
||||
(except as stated in this section) patent license to make, have made,
|
||||
use, offer to sell, sell, import, and otherwise transfer the Work,
|
||||
where such license applies only to those patent claims licensable
|
||||
by such Contributor that are necessarily infringed by their
|
||||
Contribution(s) alone or by combination of their Contribution(s)
|
||||
with the Work to which such Contribution(s) was submitted. If You
|
||||
institute patent litigation against any entity (including a
|
||||
cross-claim or counterclaim in a lawsuit) alleging that the Work
|
||||
or a Contribution incorporated within the Work constitutes direct
|
||||
or contributory patent infringement, then any patent licenses
|
||||
granted to You under this License for that Work shall terminate
|
||||
as of the date such litigation is filed.
|
||||
|
||||
4. Redistribution. You may reproduce and distribute copies of the
|
||||
Work or Derivative Works thereof in any medium, with or without
|
||||
modifications, and in Source or Object form, provided that You
|
||||
meet the following conditions:
|
||||
|
||||
(a) You must give any other recipients of the Work or
|
||||
Derivative Works a copy of this License; and
|
||||
|
||||
(b) You must cause any modified files to carry prominent notices
|
||||
stating that You changed the files; and
|
||||
|
||||
(c) You must retain, in the Source form of any Derivative Works
|
||||
that You distribute, all copyright, patent, trademark, and
|
||||
attribution notices from the Source form of the Work,
|
||||
excluding those notices that do not pertain to any part of
|
||||
the Derivative Works; and
|
||||
|
||||
(d) If the Work includes a "NOTICE" text file as part of its
|
||||
distribution, then any Derivative Works that You distribute must
|
||||
include a readable copy of the attribution notices contained
|
||||
within such NOTICE file, excluding those notices that do not
|
||||
pertain to any part of the Derivative Works, in at least one
|
||||
of the following places: within a NOTICE text file distributed
|
||||
as part of the Derivative Works; within the Source form or
|
||||
documentation, if provided along with the Derivative Works; or,
|
||||
within a display generated by the Derivative Works, if and
|
||||
wherever such third-party notices normally appear. The contents
|
||||
of the NOTICE file are for informational purposes only and
|
||||
do not modify the License. You may add Your own attribution
|
||||
notices within Derivative Works that You distribute, alongside
|
||||
or as an addendum to the NOTICE text from the Work, provided
|
||||
that such additional attribution notices cannot be construed
|
||||
as modifying the License.
|
||||
|
||||
You may add Your own copyright statement to Your modifications and
|
||||
may provide additional or different license terms and conditions
|
||||
for use, reproduction, or distribution of Your modifications, or
|
||||
for any such Derivative Works as a whole, provided Your use,
|
||||
reproduction, and distribution of the Work otherwise complies with
|
||||
the conditions stated in this License.
|
||||
|
||||
5. Submission of Contributions. Unless You explicitly state otherwise,
|
||||
any Contribution intentionally submitted for inclusion in the Work
|
||||
by You to the Licensor shall be under the terms and conditions of
|
||||
this License, without any additional terms or conditions.
|
||||
Notwithstanding the above, nothing herein shall supersede or modify
|
||||
the terms of any separate license agreement you may have executed
|
||||
with Licensor regarding such Contributions.
|
||||
|
||||
6. Trademarks. This License does not grant permission to use the trade
|
||||
names, trademarks, service marks, or product names of the Licensor,
|
||||
except as required for reasonable and customary use in describing the
|
||||
origin of the Work and reproducing the content of the NOTICE file.
|
||||
|
||||
7. Disclaimer of Warranty. Unless required by applicable law or
|
||||
agreed to in writing, Licensor provides the Work (and each
|
||||
Contributor provides its Contributions) on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
|
||||
implied, including, without limitation, any warranties or conditions
|
||||
of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
|
||||
PARTICULAR PURPOSE. You are solely responsible for determining the
|
||||
appropriateness of using or redistributing the Work and assume any
|
||||
risks associated with Your exercise of permissions under this License.
|
||||
|
||||
8. Limitation of Liability. In no event and under no legal theory,
|
||||
whether in tort (including negligence), contract, or otherwise,
|
||||
unless required by applicable law (such as deliberate and grossly
|
||||
negligent acts) or agreed to in writing, shall any Contributor be
|
||||
liable to You for damages, including any direct, indirect, special,
|
||||
incidental, or consequential damages of any character arising as a
|
||||
result of this License or out of the use or inability to use the
|
||||
Work (including but not limited to damages for loss of goodwill,
|
||||
work stoppage, computer failure or malfunction, or any and all
|
||||
other commercial damages or losses), even if such Contributor
|
||||
has been advised of the possibility of such damages.
|
||||
|
||||
9. Accepting Warranty or Additional Liability. While redistributing
|
||||
the Work or Derivative Works thereof, You may choose to offer,
|
||||
and charge a fee for, acceptance of support, warranty, indemnity,
|
||||
or other liability obligations and/or rights consistent with this
|
||||
License. However, in accepting such obligations, You may act only
|
||||
on Your own behalf and on Your sole responsibility, not on behalf
|
||||
of any other Contributor, and only if You agree to indemnify,
|
||||
defend, and hold each Contributor harmless for any liability
|
||||
incurred by, or claims asserted against, such Contributor by reason
|
||||
of your accepting any such warranty or additional liability.
|
||||
|
||||
END OF TERMS AND CONDITIONS
|
||||
|
||||
APPENDIX: How to apply the Apache License to your work.
|
||||
|
||||
To apply the Apache License to your work, attach the following
|
||||
boilerplate notice, with the fields enclosed by brackets "[]"
|
||||
replaced with your own identifying information. (Don't include
|
||||
the brackets!) The text should be enclosed in the appropriate
|
||||
comment syntax for the file format. We also recommend that a
|
||||
file or class name and description of purpose be included on the
|
||||
same "printed page" as the copyright notice for easier
|
||||
identification within third-party archives.
|
||||
|
||||
Copyright [yyyy] [name of copyright owner]
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
|
@ -0,0 +1,21 @@
|
|||
MIT License
|
||||
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining
|
||||
a copy of this software and associated documentation files (the
|
||||
"Software"), to deal in the Software without restriction, including
|
||||
without limitation the rights to use, copy, modify, merge, publish,
|
||||
distribute, sublicense, and/or sell copies of the Software, and to
|
||||
permit persons to whom the Software is furnished to do so, subject to
|
||||
the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be
|
||||
included in all copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
||||
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||
NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
|
||||
LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
|
||||
OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
|
||||
WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
|
@ -0,0 +1,35 @@
|
|||
Finite State Machine using the AhoCorasick implementation
|
||||
|
||||
###### [![Dorkbox](https://badge.dorkbox.com/dorkbox.svg "Dorkbox")](https://git.dorkbox.com/dorkbox/FSM) [![Github](https://badge.dorkbox.com/github.svg "Github")](https://github.com/dorkbox/FSM) [![Gitlab](https://badge.dorkbox.com/gitlab.svg "Gitlab")](https://gitlab.com/dorkbox/FSM)
|
||||
|
||||
|
||||
* AhoCorasick finite state machine for Strings and ByteArrays
|
||||
|
||||
|
||||
|
||||
Maven Info
|
||||
---------
|
||||
```
|
||||
<dependencies>
|
||||
...
|
||||
<dependency>
|
||||
<groupId>com.dorkbox</groupId>
|
||||
<artifactId>FSM</artifactId>
|
||||
<version>1.0</version>
|
||||
</dependency>
|
||||
</dependencies>
|
||||
```
|
||||
|
||||
Gradle Info
|
||||
---------
|
||||
```
|
||||
dependencies {
|
||||
...
|
||||
implementation("com.dorkbox:FSM:1.0")
|
||||
}
|
||||
```
|
||||
|
||||
License
|
||||
---------
|
||||
This project is © 2023 dorkbox llc, and is distributed under the terms of the Apache v2.0 License. See file "LICENSE" for further
|
||||
references.
|
|
@ -0,0 +1,120 @@
|
|||
/*
|
||||
* Copyright 2023 dorkbox, llc
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
import java.time.Instant
|
||||
|
||||
///////////////////////////////
|
||||
////// PUBLISH TO SONATYPE / MAVEN CENTRAL
|
||||
////// TESTING : (to local maven repo) <'publish and release' - 'publishToMavenLocal'>
|
||||
////// RELEASE : (to sonatype/maven central), <'publish and release' - 'publishToSonatypeAndRelease'>
|
||||
///////////////////////////////
|
||||
|
||||
gradle.startParameter.showStacktrace = ShowStacktrace.ALWAYS // always show the stacktrace!
|
||||
|
||||
|
||||
plugins {
|
||||
id("com.dorkbox.GradleUtils") version "3.9"
|
||||
id("com.dorkbox.Licensing") version "2.19.1"
|
||||
id("com.dorkbox.VersionUpdate") version "2.5"
|
||||
id("com.dorkbox.GradlePublish") version "1.17"
|
||||
|
||||
kotlin("jvm") version "1.8.0"
|
||||
}
|
||||
|
||||
object Extras {
|
||||
// set for the project
|
||||
const val description = "Niche collections to augment what is already available."
|
||||
const val group = "com.dorkbox"
|
||||
const val version = "1.0"
|
||||
|
||||
// set as project.ext
|
||||
const val name = "FSM"
|
||||
const val id = "FSM" // this is the maven ID!
|
||||
const val vendor = "Dorkbox LLC"
|
||||
const val vendorUrl = "https://dorkbox.com"
|
||||
const val url = "https://git.dorkbox.com/dorkbox/FSM"
|
||||
|
||||
val buildDate = Instant.now().toString()
|
||||
}
|
||||
|
||||
///////////////////////////////
|
||||
///// assign 'Extras'
|
||||
///////////////////////////////
|
||||
GradleUtils.load("$projectDir/../../gradle.properties", Extras)
|
||||
GradleUtils.defaults()
|
||||
GradleUtils.compileConfiguration(JavaVersion.VERSION_1_8)
|
||||
GradleUtils.jpms(JavaVersion.VERSION_1_9)
|
||||
|
||||
|
||||
licensing {
|
||||
license(License.APACHE_2) {
|
||||
description(Extras.description)
|
||||
author(Extras.vendor)
|
||||
url(Extras.url)
|
||||
|
||||
extra("AhoCorasickDoubleArrayTrie", License.APACHE_2) {
|
||||
description(Extras.description)
|
||||
copyright(2018)
|
||||
author("hankcs <me@hankcs.com>")
|
||||
url("https://github.com/hankcs/AhoCorasickDoubleArrayTrie")
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
tasks.jar.get().apply {
|
||||
manifest {
|
||||
// https://docs.oracle.com/javase/tutorial/deployment/jar/packageman.html
|
||||
attributes["Name"] = Extras.name
|
||||
|
||||
attributes["Specification-Title"] = Extras.name
|
||||
attributes["Specification-Version"] = Extras.version
|
||||
attributes["Specification-Vendor"] = Extras.vendor
|
||||
|
||||
attributes["Implementation-Title"] = "${Extras.group}.${Extras.id}"
|
||||
attributes["Implementation-Version"] = Extras.buildDate
|
||||
attributes["Implementation-Vendor"] = Extras.vendor
|
||||
}
|
||||
}
|
||||
|
||||
dependencies {
|
||||
api("com.dorkbox:Updates:1.1")
|
||||
|
||||
testImplementation("junit:junit:4.13.2")
|
||||
}
|
||||
|
||||
publishToSonatype {
|
||||
groupId = Extras.group
|
||||
artifactId = Extras.id
|
||||
version = Extras.version
|
||||
|
||||
name = Extras.name
|
||||
description = Extras.description
|
||||
url = Extras.url
|
||||
|
||||
vendor = Extras.vendor
|
||||
vendorUrl = Extras.vendorUrl
|
||||
|
||||
issueManagement {
|
||||
url = "${Extras.url}/issues"
|
||||
nickname = "Gitea Issues"
|
||||
}
|
||||
|
||||
developer {
|
||||
id = "dorkbox"
|
||||
name = Extras.vendor
|
||||
email = "email@dorkbox.com"
|
||||
}
|
||||
}
|
|
@ -0,0 +1,15 @@
|
|||
# https://docs.gradle.org/current/userguide/build_environment.html#sec:gradle_configuration_properties
|
||||
org.gradle.jvmargs=-Dfile.encoding=UTF-8
|
||||
|
||||
#org.gradle.warning.mode=(all,fail,none,summary)
|
||||
org.gradle.warning.mode=all
|
||||
|
||||
#org.gradle.daemon=false
|
||||
# default is 3 hours, this is 1 minute
|
||||
org.gradle.daemon.idletimeout=60000
|
||||
|
||||
#org.gradle.console=(auto,plain,rich,verbose)
|
||||
org.gradle.console=auto
|
||||
|
||||
#org.gradle.logging.level=(quiet,warn,lifecycle,info,debug)
|
||||
org.gradle.logging.level=lifecycle
|
|
@ -0,0 +1,15 @@
|
|||
/*
|
||||
* Copyright 2018 dorkbox, llc
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
|
@ -0,0 +1,348 @@
|
|||
/*
|
||||
* Copyright 2023 dorkbox, llc
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
/*
|
||||
* AhoCorasickDoubleArrayTrie Project
|
||||
* https://github.com/hankcs/AhoCorasickDoubleArrayTrie
|
||||
*
|
||||
* Copyright 2008-2018 hankcs <me@hankcs.com>
|
||||
* You may modify and redistribute as long as this attribution remains.
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
package dorkbox.fsm
|
||||
|
||||
import java.util.*
|
||||
|
||||
/**
|
||||
* A builder to build the AhoCorasickDoubleArrayTrie
|
||||
*/
|
||||
internal abstract class BaseByteBuilder<K, V> {
|
||||
/**
|
||||
* the root state of trie
|
||||
*/
|
||||
internal var rootState: StateByte? = StateByte()
|
||||
|
||||
/**
|
||||
* whether the position has been used
|
||||
*/
|
||||
private var used: BooleanArray? = null
|
||||
|
||||
/**
|
||||
* the allocSize of the dynamic array
|
||||
*/
|
||||
private var allocSize: Int = 0
|
||||
|
||||
/**
|
||||
* a parameter controls the memory growth speed of the dynamic array
|
||||
*/
|
||||
private var progress: Int = 0
|
||||
|
||||
/**
|
||||
* the next position to check unused memory
|
||||
*/
|
||||
private var nextCheckPos: Int = 0
|
||||
|
||||
/**
|
||||
* the size of the key-pair sets
|
||||
*/
|
||||
private var keySize: Int = 0
|
||||
|
||||
|
||||
lateinit var output: Array<IntArray?>
|
||||
lateinit var fail: IntArray
|
||||
lateinit var base: IntArray
|
||||
lateinit var check: IntArray
|
||||
var size: Int = 0
|
||||
|
||||
/**
|
||||
* Build from a map
|
||||
*
|
||||
* @param map a map containing key-value pairs
|
||||
*/
|
||||
fun build(map: Map<K, V>) {
|
||||
val keySet = map.keys
|
||||
|
||||
// Construct a two-point trie tree
|
||||
addAllKeyword(keySet)
|
||||
|
||||
// Building a double array trie tree based on a two-point trie tree
|
||||
buildDoubleArrayTrie(keySet.size)
|
||||
used = null
|
||||
|
||||
// Build the failure table and merge the output table
|
||||
constructFailureStates()
|
||||
rootState = null
|
||||
loseWeight()
|
||||
}
|
||||
|
||||
/**
|
||||
* fetch siblings of a parent node
|
||||
*
|
||||
* @param parent parent node
|
||||
* @param siblings parent node's child nodes, i . e . the siblings
|
||||
*
|
||||
* @return the amount of the siblings
|
||||
*/
|
||||
private fun fetch(parent: StateByte,
|
||||
siblings: MutableList<Pair<Int, StateByte>>): Int {
|
||||
|
||||
if (parent.isAcceptable) {
|
||||
// This node is a child of the parent and has the output of the parent.
|
||||
val fakeNode = StateByte(-(parent.depth + 1))
|
||||
fakeNode.addEmit(parent.largestValueId!!)
|
||||
siblings.add(Pair(0, fakeNode))
|
||||
}
|
||||
|
||||
for ((key, value) in parent.getSuccess()) {
|
||||
siblings.add(Pair(key + 1, value))
|
||||
}
|
||||
|
||||
return siblings.size
|
||||
}
|
||||
|
||||
/**
|
||||
* add a keyword
|
||||
*
|
||||
* @param keyword a keyword
|
||||
* @param index the index of the keyword
|
||||
*/
|
||||
internal abstract fun addKeyword(keyword: K, index: Int)
|
||||
|
||||
/**
|
||||
* add a collection of keywords
|
||||
*
|
||||
* @param keywordSet the collection holding keywords
|
||||
*/
|
||||
private fun addAllKeyword(keywordSet: Collection<K>) {
|
||||
var i = 0
|
||||
keywordSet.forEach { keyword ->
|
||||
addKeyword(keyword, i++)
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* construct failure table
|
||||
*/
|
||||
private fun constructFailureStates() {
|
||||
fail = IntArray((size + 1).coerceAtLeast(2))
|
||||
fail[1] = base[0]
|
||||
output = arrayOfNulls(size + 1)
|
||||
|
||||
val queue = ArrayDeque<StateByte>()
|
||||
|
||||
// The first step is to set the failure of the node with depth 1 to the root node.
|
||||
this.rootState!!.states.forEach { depthOneState ->
|
||||
depthOneState.setFailure(this.rootState!!, fail)
|
||||
queue.add(depthOneState)
|
||||
constructOutput(depthOneState)
|
||||
}
|
||||
|
||||
// The second step is to create a failure table for nodes with depth > 1, which is a bfs
|
||||
while (!queue.isEmpty()) {
|
||||
val currentState = queue.remove()
|
||||
|
||||
for (transition in currentState.transitions) {
|
||||
val targetState = currentState.nextState(transition)
|
||||
queue.add(targetState)
|
||||
|
||||
var traceFailureState = currentState.failure()
|
||||
while (traceFailureState!!.nextState(transition) == null) {
|
||||
traceFailureState = traceFailureState.failure()
|
||||
}
|
||||
|
||||
val newFailureState = traceFailureState.nextState(transition)
|
||||
targetState!!.setFailure(newFailureState!!, fail)
|
||||
targetState.addEmit(newFailureState.emit())
|
||||
constructOutput(targetState)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* construct output table
|
||||
*/
|
||||
private fun constructOutput(targetState: StateByte) {
|
||||
val emit = targetState.emit()
|
||||
if (emit.isEmpty()) {
|
||||
return
|
||||
}
|
||||
|
||||
val output = IntArray(emit.size)
|
||||
val it = emit.iterator()
|
||||
for (i in output.indices) {
|
||||
output[i] = it.next()
|
||||
}
|
||||
|
||||
this.output[targetState.index] = output
|
||||
}
|
||||
|
||||
private fun buildDoubleArrayTrie(keySize: Int) {
|
||||
progress = 0
|
||||
this.keySize = keySize
|
||||
resize(65536 * 32) // 32 double bytes
|
||||
|
||||
base[0] = 1
|
||||
nextCheckPos = 0
|
||||
|
||||
val rootNode = this.rootState
|
||||
val initialCapacity = rootNode!!.getSuccess().entries.size
|
||||
|
||||
val siblings = ArrayList<Pair<Int, StateByte>>(initialCapacity)
|
||||
fetch(rootNode, siblings)
|
||||
|
||||
if (siblings.isNotEmpty()) {
|
||||
insert(siblings)
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* allocate the memory of the dynamic array
|
||||
*/
|
||||
private fun resize(newSize: Int): Int {
|
||||
val base2 = IntArray(newSize)
|
||||
val check2 = IntArray(newSize)
|
||||
val used2 = BooleanArray(newSize)
|
||||
|
||||
if (allocSize > 0) {
|
||||
System.arraycopy(base, 0, base2, 0, allocSize)
|
||||
System.arraycopy(check, 0, check2, 0, allocSize)
|
||||
System.arraycopy(used!!, 0, used2, 0, allocSize)
|
||||
}
|
||||
|
||||
base = base2
|
||||
check = check2
|
||||
used = used2
|
||||
|
||||
allocSize = newSize
|
||||
return newSize
|
||||
}
|
||||
|
||||
/**
|
||||
* insert the siblings to double array trie
|
||||
*
|
||||
* @param siblings the siblings being inserted
|
||||
*
|
||||
* @return the position to insert them
|
||||
*/
|
||||
private fun insert(siblings: List<Pair<Int, StateByte>>): Int {
|
||||
var begin: Int
|
||||
var pos = Math.max(siblings[0].first + 1, nextCheckPos) - 1
|
||||
var nonzeroNum = 0
|
||||
var first = 0
|
||||
|
||||
if (allocSize <= pos) {
|
||||
resize(pos + 1)
|
||||
}
|
||||
|
||||
outer@
|
||||
// The goal of this loop body is to find n free spaces that satisfy base[begin + a1...an] == 0, a1...an are n nodes in siblings
|
||||
while (true) {
|
||||
pos++
|
||||
|
||||
if (allocSize <= pos) {
|
||||
resize(pos + 1)
|
||||
}
|
||||
|
||||
if (check[pos] != 0) {
|
||||
nonzeroNum++
|
||||
continue
|
||||
}
|
||||
else if (first == 0) {
|
||||
nextCheckPos = pos
|
||||
first = 1
|
||||
}
|
||||
|
||||
begin = pos - siblings[0].first // The distance of the current position from the first sibling node
|
||||
if (allocSize <= begin + siblings[siblings.size - 1].first) {
|
||||
// progress can be zero
|
||||
// Prevent progress from generating zero divide errors
|
||||
val l = if (1.05 > 1.0 * keySize / (progress + 1)) 1.05 else 1.0 * keySize / (progress + 1)
|
||||
resize((allocSize * l).toInt())
|
||||
}
|
||||
|
||||
if (used!![begin]) {
|
||||
continue
|
||||
}
|
||||
|
||||
for (i in 1 until siblings.size) {
|
||||
if (check[begin + siblings[i].first] != 0) {
|
||||
continue@outer
|
||||
}
|
||||
}
|
||||
|
||||
break
|
||||
}
|
||||
|
||||
// -- Simple heuristics --
|
||||
// if the percentage of non-empty contents in check between the
|
||||
// index
|
||||
// 'next_check_pos' and 'check' is greater than some constant value
|
||||
// (e.g. 0.9),
|
||||
// new 'next_check_pos' index is written by 'check'.
|
||||
if (1.0 * nonzeroNum / (pos - nextCheckPos + 1) >= 0.95) {
|
||||
// From the position next_check_pos to pos, if the occupied space is above 95%, the next
|
||||
// time you insert a node, you can start looking directly at the pos position.
|
||||
nextCheckPos = pos
|
||||
}
|
||||
used!![begin] = true // valid because resize is called.
|
||||
|
||||
val sizeLimit = begin + siblings[siblings.size - 1].first + 1
|
||||
if (size <= sizeLimit) {
|
||||
size = sizeLimit
|
||||
}
|
||||
|
||||
|
||||
for (sibling in siblings) {
|
||||
check[begin + sibling.first] = begin
|
||||
}
|
||||
|
||||
for (sibling in siblings) {
|
||||
val newSiblings = ArrayList<Pair<Int, StateByte>>(sibling.second.getSuccess().entries.size + 1)
|
||||
|
||||
if (fetch(sibling.second, newSiblings) == 0) {
|
||||
// The termination of a word and not the prefix of other words, in fact, is the leaf node
|
||||
base[begin + sibling.first] = 0 - sibling.second.largestValueId!! - 1
|
||||
progress++
|
||||
}
|
||||
else {
|
||||
val h = insert(newSiblings) // depth first search
|
||||
base[begin + sibling.first] = h
|
||||
}
|
||||
sibling.second.index = begin + sibling.first
|
||||
}
|
||||
return begin
|
||||
}
|
||||
|
||||
/**
|
||||
* free the unnecessary memory
|
||||
*/
|
||||
private fun loseWeight() {
|
||||
base = base.copyOf(size + 65535)
|
||||
check = check.copyOf(size + 65535)
|
||||
}
|
||||
}
|
|
@ -0,0 +1,558 @@
|
|||
/*
|
||||
* Copyright 2023 dorkbox, llc
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
/*
|
||||
* AhoCorasickDoubleArrayTrie Project
|
||||
* https://github.com/hankcs/AhoCorasickDoubleArrayTrie
|
||||
*
|
||||
* Copyright 2008-2018 hankcs <me@hankcs.com>
|
||||
* You may modify and redistribute as long as this attribution remains.
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
@file:Suppress("unused")
|
||||
|
||||
package dorkbox.fsm
|
||||
|
||||
import java.io.IOException
|
||||
import java.io.ObjectInputStream
|
||||
import java.io.ObjectOutputStream
|
||||
import java.io.Serializable
|
||||
import java.util.*
|
||||
|
||||
/**
|
||||
* An implementation of Aho Corasick algorithm based on Double Array Trie
|
||||
*
|
||||
* Will create a DoubleArray Trie from a Map or InputStream (if previously saved)
|
||||
*
|
||||
* @author hankcs, dorkbox
|
||||
*/
|
||||
@Suppress("DuplicatedCode")
|
||||
abstract class BaseByteTrie<K, V>(map: Map<K, V>?, inputStream: ObjectInputStream?) : Serializable {
|
||||
|
||||
/**
|
||||
* check array of the Double Array Trie structure
|
||||
*/
|
||||
private val check: IntArray
|
||||
|
||||
/**
|
||||
* base array of the Double Array Trie structure
|
||||
*/
|
||||
private val base: IntArray
|
||||
|
||||
/**
|
||||
* fail table of the Aho Corasick automata
|
||||
*/
|
||||
private val fail: IntArray
|
||||
|
||||
/**
|
||||
* output table of the Aho Corasick automata
|
||||
*/
|
||||
private val output: Array<IntArray?>
|
||||
|
||||
/**
|
||||
* outer value array
|
||||
*/
|
||||
internal val v: Array<V>
|
||||
|
||||
/**
|
||||
* the length of every key
|
||||
*/
|
||||
internal val l: IntArray
|
||||
|
||||
/**
|
||||
* the size of base and check array
|
||||
*/
|
||||
private val checkSize: Int
|
||||
|
||||
init {
|
||||
when {
|
||||
map != null -> {
|
||||
@Suppress("UNCHECKED_CAST")
|
||||
v = kotlin.jvm.internal.collectionToArray(map.values) as Array<V>
|
||||
l = IntArray(map.size)
|
||||
|
||||
@Suppress("LeakingThis")
|
||||
val builder = builder()
|
||||
builder.build(map)
|
||||
|
||||
fail = builder.fail
|
||||
base = builder.base
|
||||
check = builder.check
|
||||
|
||||
checkSize = builder.size
|
||||
output = builder.output
|
||||
}
|
||||
|
||||
inputStream != null -> {
|
||||
@Suppress("UNCHECKED_CAST")
|
||||
v = inputStream.readObject() as Array<V>
|
||||
l = inputStream.readObject() as IntArray
|
||||
|
||||
fail = inputStream.readObject() as IntArray
|
||||
base = inputStream.readObject() as IntArray
|
||||
check = inputStream.readObject() as IntArray
|
||||
checkSize = inputStream.readObject() as Int
|
||||
|
||||
@Suppress("UNCHECKED_CAST")
|
||||
output = inputStream.readObject() as Array<IntArray?>
|
||||
}
|
||||
else -> throw NullPointerException("Map or InputStream must be specified!")
|
||||
}
|
||||
}
|
||||
|
||||
internal abstract fun builder(): BaseByteBuilder<K, V>
|
||||
|
||||
/**
|
||||
* Save
|
||||
*/
|
||||
@Throws(IOException::class)
|
||||
fun save(out: ObjectOutputStream) {
|
||||
out.writeObject(v)
|
||||
out.writeObject(l)
|
||||
out.writeObject(fail)
|
||||
out.writeObject(base)
|
||||
out.writeObject(check)
|
||||
out.writeObject(checkSize)
|
||||
out.writeObject(output)
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns the backing keywords IN THEIR NATURAL ORDER, in the case that you need access to the original FSM data.
|
||||
*
|
||||
* @return for example, if the FSM was populated with [reddit.com, cnn.com], this will return [cnn.com, reddit.com]
|
||||
*/
|
||||
val keywords: Array<V>
|
||||
get() {
|
||||
return v
|
||||
}
|
||||
|
||||
/**
|
||||
* Get the size of the keywords
|
||||
*/
|
||||
val size: Int
|
||||
get() {
|
||||
return v.size
|
||||
}
|
||||
|
||||
/**
|
||||
* Parses text and returns true if there are PARTIALLY matching results. For exact matches only it is better to use `matches`
|
||||
*
|
||||
* @return true if there is a match or partial match. "fun.reddit.com" will partially match to "reddit.com"
|
||||
*/
|
||||
fun hasPartialMatch(byteArray: ByteArray): Boolean {
|
||||
return parseBytes(byteArray).isNotEmpty()
|
||||
}
|
||||
|
||||
/**
|
||||
* Parses text and finds PARTIALLY matching results. For exact matches only it is better to use `matches`
|
||||
*
|
||||
* @return a list of outputs that contain matches or partial matches. The returned list will specify HOW MUCH of the text matches (A full match would be from 0 (the start), to N (the length of the text).
|
||||
*/
|
||||
fun partialMatch(byteArray: ByteArray): List<Hit<V>> {
|
||||
return parseBytes(byteArray)
|
||||
}
|
||||
|
||||
/**
|
||||
* Parse byte arrays
|
||||
*
|
||||
* @return a list of outputs
|
||||
*/
|
||||
fun parseBytes(byteArray: ByteArray): List<Hit<V>> {
|
||||
var position = 1
|
||||
var currentState = 0
|
||||
val collectedEmits = LinkedList<Hit<V>>() // unknown size, so
|
||||
|
||||
for (element in byteArray) {
|
||||
currentState = getState(currentState, element)
|
||||
storeEmits(position++, currentState, collectedEmits)
|
||||
}
|
||||
|
||||
return collectedEmits
|
||||
}
|
||||
|
||||
/**
|
||||
* Parse byte arrays
|
||||
*
|
||||
* @param byteArray The text
|
||||
* @param processor A processor which handles the output
|
||||
*/
|
||||
fun parseBytes(byteArray: ByteArray,
|
||||
processor: IHitCancellable<V>
|
||||
) {
|
||||
var position = 1
|
||||
var currentState = 0
|
||||
for (element in byteArray) {
|
||||
position++
|
||||
currentState = getState(currentState, element)
|
||||
val hitArray = output[currentState]
|
||||
if (hitArray != null) {
|
||||
for (hit in hitArray) {
|
||||
val proceed = processor.hit(position - l[hit], position, v[hit])
|
||||
if (!proceed) {
|
||||
return
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Parse byte arrays
|
||||
*
|
||||
* @param byteArray The text
|
||||
* @param processor A processor which handles the output
|
||||
*/
|
||||
fun parseBytes(byteArray: ByteArray,
|
||||
processor: IHit<V>
|
||||
) {
|
||||
var position = 1
|
||||
var currentState = 0
|
||||
for (c in byteArray) {
|
||||
currentState = getState(currentState, c)
|
||||
val hitArray = output[currentState]
|
||||
if (hitArray != null) {
|
||||
for (hit in hitArray) {
|
||||
processor.hit(position - l[hit], position, v[hit])
|
||||
}
|
||||
}
|
||||
position++
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Parse byte arrays
|
||||
*
|
||||
* @param byteArray The text
|
||||
* @param processor A processor which handles the output
|
||||
*/
|
||||
fun parseBytes(byteArray: ByteArray,
|
||||
processor: IHitFull<V>
|
||||
) {
|
||||
var position = 1
|
||||
var currentState = 0
|
||||
for (c in byteArray) {
|
||||
currentState = getState(currentState, c)
|
||||
val hitArray = output[currentState]
|
||||
if (hitArray != null) {
|
||||
for (hit in hitArray) {
|
||||
processor.hit(position - l[hit], position, v[hit], hit)
|
||||
}
|
||||
}
|
||||
position++
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Checks that string contains at least one substring
|
||||
*
|
||||
* @param byteArray source byte arrays to check
|
||||
*
|
||||
* @return `true` if string contains at least one substring
|
||||
*/
|
||||
fun matches(byteArray: ByteArray): Boolean {
|
||||
var currentState = 0
|
||||
for (element in byteArray) {
|
||||
currentState = getState(currentState, element)
|
||||
val hitArray = output[currentState]
|
||||
if (hitArray != null) {
|
||||
return true
|
||||
}
|
||||
}
|
||||
return false
|
||||
}
|
||||
|
||||
/**
|
||||
* Search first match in string
|
||||
*
|
||||
* @param byteArray source byte array to check
|
||||
*
|
||||
* @return first match or `null` if there are no matches
|
||||
*/
|
||||
fun findFirst(byteArray: ByteArray): Hit<V>? {
|
||||
var position = 1
|
||||
var currentState = 0
|
||||
for (element in byteArray) {
|
||||
currentState = getState(currentState, element)
|
||||
val hitArray = output[currentState]
|
||||
if (hitArray != null) {
|
||||
val hitIndex = hitArray[0]
|
||||
return Hit(position - l[hitIndex], position, v[hitIndex])
|
||||
}
|
||||
position++
|
||||
}
|
||||
return null
|
||||
}
|
||||
|
||||
/**
|
||||
* Pick the value by index in value array <br></br>
|
||||
* Notice that to be more efficiently, this method DOES NOT check the parameter
|
||||
*
|
||||
* @param index The index
|
||||
*
|
||||
* @return The value
|
||||
*/
|
||||
operator fun get(index: Int): V {
|
||||
return v[index]
|
||||
}
|
||||
|
||||
/**
|
||||
* transmit state, supports failure function
|
||||
*/
|
||||
private fun getState(currentState: Int,
|
||||
character: Byte): Int {
|
||||
|
||||
@Suppress("NAME_SHADOWING")
|
||||
var currentState = currentState
|
||||
|
||||
var newCurrentState = transitionWithRoot(currentState, character) // First press success
|
||||
while (newCurrentState == -1)
|
||||
// If the jump fails, press failure to jump
|
||||
{
|
||||
currentState = fail[currentState]
|
||||
newCurrentState = transitionWithRoot(currentState, character)
|
||||
}
|
||||
return newCurrentState
|
||||
}
|
||||
|
||||
/**
|
||||
* store output
|
||||
*/
|
||||
private fun storeEmits(position: Int,
|
||||
currentState: Int,
|
||||
collectedEmits: MutableList<Hit<V>>) {
|
||||
val hitArray = output[currentState]
|
||||
if (hitArray != null) {
|
||||
for (hit in hitArray) {
|
||||
collectedEmits.add(Hit(position - l[hit], position, v[hit]))
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* transition of a state
|
||||
*/
|
||||
private fun transition(current: Int,
|
||||
c: Char): Int {
|
||||
var b = current
|
||||
var p: Int
|
||||
|
||||
p = b + c.code + 1
|
||||
if (b == check[p]) {
|
||||
b = base[p]
|
||||
}
|
||||
else {
|
||||
return -1
|
||||
}
|
||||
|
||||
p = b
|
||||
return p
|
||||
}
|
||||
|
||||
/**
|
||||
* transition of a state, if the state is root, and it failed, then returns the root
|
||||
*/
|
||||
private fun transitionWithRoot(nodePos: Int,
|
||||
c: Byte): Int {
|
||||
val b = base[nodePos]
|
||||
val p: Int
|
||||
|
||||
p = b + c + 1
|
||||
return if (b != check[p]) {
|
||||
if (nodePos == 0) {
|
||||
0
|
||||
}
|
||||
else -1
|
||||
}
|
||||
else p
|
||||
}
|
||||
|
||||
/**
|
||||
* match exactly by a key
|
||||
*
|
||||
* @param byteArray the key
|
||||
*
|
||||
* @return the index of the key, you can use it as a perfect hash function
|
||||
*/
|
||||
fun exactMatchSearch(byteArray: ByteArray): Int {
|
||||
return exactMatchSearch(byteArray, 0, 0, 0)
|
||||
}
|
||||
|
||||
/**
|
||||
* match exactly by a key
|
||||
*/
|
||||
fun exactMatchSearch(byteArray: ByteArray,
|
||||
pos: Int,
|
||||
len: Int,
|
||||
nodePos: Int): Int {
|
||||
|
||||
@Suppress("NAME_SHADOWING")
|
||||
var len = len
|
||||
|
||||
@Suppress("NAME_SHADOWING")
|
||||
var nodePos = nodePos
|
||||
|
||||
if (len <= 0) {
|
||||
len = byteArray.size
|
||||
}
|
||||
if (nodePos <= 0) {
|
||||
nodePos = 0
|
||||
}
|
||||
|
||||
var result = -1
|
||||
|
||||
val keyChars = byteArray
|
||||
|
||||
var b = base[nodePos]
|
||||
var p: Int
|
||||
|
||||
for (i in pos until len) {
|
||||
p = b + keyChars[i] + 1
|
||||
if (b == check[p]) {
|
||||
b = base[p]
|
||||
}
|
||||
else {
|
||||
return result
|
||||
}
|
||||
}
|
||||
|
||||
p = b
|
||||
val n = base[p]
|
||||
if (b == check[p] && n < 0) {
|
||||
result = -n - 1
|
||||
}
|
||||
return result
|
||||
}
|
||||
|
||||
// /**
|
||||
// * Just for debug when I wrote it
|
||||
// */
|
||||
// public void debug()
|
||||
// {
|
||||
// System.out.println("base:");
|
||||
// for (int i = 0; i < base.length; i++)
|
||||
// {
|
||||
// if (base[i] < 0)
|
||||
// {
|
||||
// System.out.println(i + " : " + -base[i]);
|
||||
// }
|
||||
// }
|
||||
//
|
||||
// System.out.println("output:");
|
||||
// for (int i = 0; i < output.length; i++)
|
||||
// {
|
||||
// if (output[i] != null)
|
||||
// {
|
||||
// System.out.println(i + " : " + Arrays.toString(output[i]));
|
||||
// }
|
||||
// }
|
||||
//
|
||||
// System.out.println("fail:");
|
||||
// for (int i = 0; i < fail.length; i++)
|
||||
// {
|
||||
// if (fail[i] != 0)
|
||||
// {
|
||||
// System.out.println(i + " : " + fail[i]);
|
||||
// }
|
||||
// }
|
||||
//
|
||||
// System.out.println(this);
|
||||
// }
|
||||
//
|
||||
// @Override
|
||||
// public String toString()
|
||||
// {
|
||||
// String infoIndex = "i = ";
|
||||
// String infoChar = "char = ";
|
||||
// String infoBase = "base = ";
|
||||
// String infoCheck = "check= ";
|
||||
// for (int i = 0; i < Math.min(base.length, 200); ++i)
|
||||
// {
|
||||
// if (base[i] != 0 || check[i] != 0)
|
||||
// {
|
||||
// infoChar += " " + (i == check[i] ? " ×" : (char) (i - check[i] - 1));
|
||||
// infoIndex += " " + String.format("%5d", i);
|
||||
// infoBase += " " + String.format("%5d", base[i]);
|
||||
// infoCheck += " " + String.format("%5d", check[i]);
|
||||
// }
|
||||
// }
|
||||
// return "DoubleArrayTrie:" +
|
||||
// "\n" + infoChar +
|
||||
// "\n" + infoIndex +
|
||||
// "\n" + infoBase +
|
||||
// "\n" + infoCheck + "\n" +
|
||||
//// "check=" + Arrays.toString(check) +
|
||||
//// ", base=" + Arrays.toString(base) +
|
||||
//// ", used=" + Arrays.toString(used) +
|
||||
// "size=" + size
|
||||
//// ", length=" + Arrays.toString(length) +
|
||||
//// ", value=" + Arrays.toString(value) +
|
||||
// ;
|
||||
// }
|
||||
//
|
||||
// /**
|
||||
// * A debug class that sequentially outputs variable names and variable values
|
||||
// */
|
||||
// private static class DebugArray
|
||||
// {
|
||||
// Map<String, String> nameValueMap = new LinkedHashMap<String, String>();
|
||||
//
|
||||
// public void add(String name, int value)
|
||||
// {
|
||||
// String valueInMap = nameValueMap.get(name);
|
||||
// if (valueInMap == null)
|
||||
// {
|
||||
// valueInMap = "";
|
||||
// }
|
||||
//
|
||||
// valueInMap += " " + String.format("%5d", value);
|
||||
//
|
||||
// nameValueMap.put(name, valueInMap);
|
||||
// }
|
||||
//
|
||||
// @Override
|
||||
// public String toString()
|
||||
// {
|
||||
// String text = "";
|
||||
// for (Map.Entry<String, String> entry : nameValueMap.entrySet())
|
||||
// {
|
||||
// String name = entry.getKey();
|
||||
// String value = entry.getValue();
|
||||
// text += String.format("%-5s", name) + "= " + value + '\n';
|
||||
// }
|
||||
//
|
||||
// return text;
|
||||
// }
|
||||
//
|
||||
// public void println()
|
||||
// {
|
||||
// System.out.print(this);
|
||||
// }
|
||||
// }
|
||||
|
||||
|
||||
}
|
|
@ -0,0 +1,348 @@
|
|||
/*
|
||||
* Copyright 2023 dorkbox, llc
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
/*
|
||||
* AhoCorasickDoubleArrayTrie Project
|
||||
* https://github.com/hankcs/AhoCorasickDoubleArrayTrie
|
||||
*
|
||||
* Copyright 2008-2018 hankcs <me@hankcs.com>
|
||||
* You may modify and redistribute as long as this attribution remains.
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
package dorkbox.fsm
|
||||
|
||||
import java.util.*
|
||||
|
||||
/**
|
||||
* A builder to build the AhoCorasickDoubleArrayTrie
|
||||
*/
|
||||
internal abstract class BaseCharBuilder<K, V> {
|
||||
/**
|
||||
* the root state of trie
|
||||
*/
|
||||
internal var rootState: StateChar? = StateChar()
|
||||
|
||||
/**
|
||||
* whether the position has been used
|
||||
*/
|
||||
private var used: BooleanArray? = null
|
||||
|
||||
/**
|
||||
* the allocSize of the dynamic array
|
||||
*/
|
||||
private var allocSize: Int = 0
|
||||
|
||||
/**
|
||||
* a parameter controls the memory growth speed of the dynamic array
|
||||
*/
|
||||
private var progress: Int = 0
|
||||
|
||||
/**
|
||||
* the next position to check unused memory
|
||||
*/
|
||||
private var nextCheckPos: Int = 0
|
||||
|
||||
/**
|
||||
* the size of the key-pair sets
|
||||
*/
|
||||
private var keySize: Int = 0
|
||||
|
||||
|
||||
lateinit var output: Array<IntArray?>
|
||||
lateinit var fail: IntArray
|
||||
lateinit var base: IntArray
|
||||
lateinit var check: IntArray
|
||||
var size: Int = 0
|
||||
|
||||
/**
|
||||
* Build from a map
|
||||
*
|
||||
* @param map a map containing key-value pairs
|
||||
*/
|
||||
fun build(map: Map<K, V>) {
|
||||
val keySet = map.keys
|
||||
|
||||
// Construct a two-point trie tree
|
||||
addAllKeyword(keySet)
|
||||
|
||||
// Building a double array trie tree based on a two-point trie tree
|
||||
buildDoubleArrayTrie(keySet.size)
|
||||
used = null
|
||||
|
||||
// Build the failure table and merge the output table
|
||||
constructFailureStates()
|
||||
rootState = null
|
||||
loseWeight()
|
||||
}
|
||||
|
||||
/**
|
||||
* fetch siblings of a parent node
|
||||
*
|
||||
* @param parent parent node
|
||||
* @param siblings parent node's child nodes, i . e . the siblings
|
||||
*
|
||||
* @return the amount of the siblings
|
||||
*/
|
||||
private fun fetch(parent: StateChar,
|
||||
siblings: MutableList<Pair<Int, StateChar>>): Int {
|
||||
|
||||
if (parent.isAcceptable) {
|
||||
// This node is a child of the parent and has the output of the parent.
|
||||
val fakeNode = StateChar(-(parent.depth + 1))
|
||||
fakeNode.addEmit(parent.largestValueId!!)
|
||||
siblings.add(Pair(0, fakeNode))
|
||||
}
|
||||
|
||||
for ((key, value) in parent.getSuccess()) {
|
||||
siblings.add(Pair(key.code + 1, value))
|
||||
}
|
||||
|
||||
return siblings.size
|
||||
}
|
||||
|
||||
/**
|
||||
* add a keyword
|
||||
*
|
||||
* @param keyword a keyword
|
||||
* @param index the index of the keyword
|
||||
*/
|
||||
internal abstract fun addKeyword(keyword: K, index: Int)
|
||||
|
||||
/**
|
||||
* add a collection of keywords
|
||||
*
|
||||
* @param keywordSet the collection holding keywords
|
||||
*/
|
||||
private fun addAllKeyword(keywordSet: Collection<K>) {
|
||||
var i = 0
|
||||
keywordSet.forEach { keyword ->
|
||||
addKeyword(keyword, i++)
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* construct failure table
|
||||
*/
|
||||
private fun constructFailureStates() {
|
||||
fail = IntArray((size + 1).coerceAtLeast(2))
|
||||
fail[1] = base[0]
|
||||
output = arrayOfNulls(size + 1)
|
||||
|
||||
val queue = ArrayDeque<StateChar>()
|
||||
|
||||
// The first step is to set the failure of the node with depth 1 to the root node.
|
||||
this.rootState!!.states.forEach { depthOneState ->
|
||||
depthOneState.setFailure(this.rootState!!, fail)
|
||||
queue.add(depthOneState)
|
||||
constructOutput(depthOneState)
|
||||
}
|
||||
|
||||
// The second step is to create a failure table for nodes with depth > 1, which is a bfs
|
||||
while (!queue.isEmpty()) {
|
||||
val currentState = queue.remove()
|
||||
|
||||
for (transition in currentState.transitions) {
|
||||
val targetState = currentState.nextState(transition)
|
||||
queue.add(targetState)
|
||||
|
||||
var traceFailureState = currentState.failure()
|
||||
while (traceFailureState!!.nextState(transition) == null) {
|
||||
traceFailureState = traceFailureState.failure()
|
||||
}
|
||||
|
||||
val newFailureState = traceFailureState.nextState(transition)
|
||||
targetState!!.setFailure(newFailureState!!, fail)
|
||||
targetState.addEmit(newFailureState.emit())
|
||||
constructOutput(targetState)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* construct output table
|
||||
*/
|
||||
private fun constructOutput(targetState: StateChar) {
|
||||
val emit = targetState.emit()
|
||||
if (emit.isEmpty()) {
|
||||
return
|
||||
}
|
||||
|
||||
val output = IntArray(emit.size)
|
||||
val it = emit.iterator()
|
||||
for (i in output.indices) {
|
||||
output[i] = it.next()
|
||||
}
|
||||
|
||||
this.output[targetState.index] = output
|
||||
}
|
||||
|
||||
private fun buildDoubleArrayTrie(keySize: Int) {
|
||||
progress = 0
|
||||
this.keySize = keySize
|
||||
resize(65536 * 32) // 32 double bytes
|
||||
|
||||
base[0] = 1
|
||||
nextCheckPos = 0
|
||||
|
||||
val rootNode = this.rootState
|
||||
val initialCapacity = rootNode!!.getSuccess().entries.size
|
||||
|
||||
val siblings = ArrayList<Pair<Int, StateChar>>(initialCapacity)
|
||||
fetch(rootNode, siblings)
|
||||
|
||||
if (siblings.isNotEmpty()) {
|
||||
insert(siblings)
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* allocate the memory of the dynamic array
|
||||
*/
|
||||
private fun resize(newSize: Int): Int {
|
||||
val base2 = IntArray(newSize)
|
||||
val check2 = IntArray(newSize)
|
||||
val used2 = BooleanArray(newSize)
|
||||
|
||||
if (allocSize > 0) {
|
||||
System.arraycopy(base, 0, base2, 0, allocSize)
|
||||
System.arraycopy(check, 0, check2, 0, allocSize)
|
||||
System.arraycopy(used!!, 0, used2, 0, allocSize)
|
||||
}
|
||||
|
||||
base = base2
|
||||
check = check2
|
||||
used = used2
|
||||
|
||||
allocSize = newSize
|
||||
return newSize
|
||||
}
|
||||
|
||||
/**
|
||||
* insert the siblings to double array trie
|
||||
*
|
||||
* @param siblings the siblings being inserted
|
||||
*
|
||||
* @return the position to insert them
|
||||
*/
|
||||
private fun insert(siblings: List<Pair<Int, StateChar>>): Int {
|
||||
var begin: Int
|
||||
var pos = Math.max(siblings[0].first + 1, nextCheckPos) - 1
|
||||
var nonzeroNum = 0
|
||||
var first = 0
|
||||
|
||||
if (allocSize <= pos) {
|
||||
resize(pos + 1)
|
||||
}
|
||||
|
||||
outer@
|
||||
// The goal of this loop body is to find n free spaces that satisfy base[begin + a1...an] == 0, a1...an are n nodes in siblings
|
||||
while (true) {
|
||||
pos++
|
||||
|
||||
if (allocSize <= pos) {
|
||||
resize(pos + 1)
|
||||
}
|
||||
|
||||
if (check[pos] != 0) {
|
||||
nonzeroNum++
|
||||
continue
|
||||
}
|
||||
else if (first == 0) {
|
||||
nextCheckPos = pos
|
||||
first = 1
|
||||
}
|
||||
|
||||
begin = pos - siblings[0].first // The distance of the current position from the first sibling node
|
||||
if (allocSize <= begin + siblings[siblings.size - 1].first) {
|
||||
// progress can be zero
|
||||
// Prevent progress from generating zero divide errors
|
||||
val l = if (1.05 > 1.0 * keySize / (progress + 1)) 1.05 else 1.0 * keySize / (progress + 1)
|
||||
resize((allocSize * l).toInt())
|
||||
}
|
||||
|
||||
if (used!![begin]) {
|
||||
continue
|
||||
}
|
||||
|
||||
for (i in 1 until siblings.size) {
|
||||
if (check[begin + siblings[i].first] != 0) {
|
||||
continue@outer
|
||||
}
|
||||
}
|
||||
|
||||
break
|
||||
}
|
||||
|
||||
// -- Simple heuristics --
|
||||
// if the percentage of non-empty contents in check between the
|
||||
// index
|
||||
// 'next_check_pos' and 'check' is greater than some constant value
|
||||
// (e.g. 0.9),
|
||||
// new 'next_check_pos' index is written by 'check'.
|
||||
if (1.0 * nonzeroNum / (pos - nextCheckPos + 1) >= 0.95) {
|
||||
// From the position next_check_pos to pos, if the occupied space is above 95%, the next
|
||||
// time you insert a node, you can start looking directly at the pos position.
|
||||
nextCheckPos = pos
|
||||
}
|
||||
used!![begin] = true // valid because resize is called.
|
||||
|
||||
val sizeLimit = begin + siblings[siblings.size - 1].first + 1
|
||||
if (size <= sizeLimit) {
|
||||
size = sizeLimit
|
||||
}
|
||||
|
||||
|
||||
for (sibling in siblings) {
|
||||
check[begin + sibling.first] = begin
|
||||
}
|
||||
|
||||
for (sibling in siblings) {
|
||||
val newSiblings = ArrayList<Pair<Int, StateChar>>(sibling.second.getSuccess().entries.size + 1)
|
||||
|
||||
if (fetch(sibling.second, newSiblings) == 0) {
|
||||
// The termination of a word and not the prefix of other words, in fact, is the leaf node
|
||||
base[begin + sibling.first] = 0 - sibling.second.largestValueId!! - 1
|
||||
progress++
|
||||
}
|
||||
else {
|
||||
val h = insert(newSiblings) // depth first search
|
||||
base[begin + sibling.first] = h
|
||||
}
|
||||
sibling.second.index = begin + sibling.first
|
||||
}
|
||||
return begin
|
||||
}
|
||||
|
||||
/**
|
||||
* free the unnecessary memory
|
||||
*/
|
||||
private fun loseWeight() {
|
||||
base = base.copyOf(size + 65535)
|
||||
check = check.copyOf(size + 65535)
|
||||
}
|
||||
}
|
|
@ -0,0 +1,595 @@
|
|||
/*
|
||||
* Copyright 2023 dorkbox, llc
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
/*
|
||||
* AhoCorasickDoubleArrayTrie Project
|
||||
* https://github.com/hankcs/AhoCorasickDoubleArrayTrie
|
||||
*
|
||||
* Copyright 2008-2018 hankcs <me@hankcs.com>
|
||||
* You may modify and redistribute as long as this attribution remains.
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
@file:Suppress("unused")
|
||||
|
||||
package dorkbox.fsm
|
||||
|
||||
import java.io.IOException
|
||||
import java.io.ObjectInputStream
|
||||
import java.io.ObjectOutputStream
|
||||
import java.io.Serializable
|
||||
import java.util.*
|
||||
|
||||
/**
|
||||
* An implementation of Aho Corasick algorithm based on Double Array Trie
|
||||
*
|
||||
* Will create a DoubleArray Trie from a Map or InputStream (if previously saved)
|
||||
*
|
||||
* @author hankcs, dorkbox
|
||||
*/
|
||||
abstract class BaseCharTrie<K, V>(map: Map<K, V>?, inputStream: ObjectInputStream?) : Serializable {
|
||||
|
||||
/**
|
||||
* check array of the Double Array Trie structure
|
||||
*/
|
||||
private val check: IntArray
|
||||
|
||||
/**
|
||||
* base array of the Double Array Trie structure
|
||||
*/
|
||||
private val base: IntArray
|
||||
|
||||
/**
|
||||
* fail table of the Aho Corasick automata
|
||||
*/
|
||||
private val fail: IntArray
|
||||
|
||||
/**
|
||||
* output table of the Aho Corasick automata
|
||||
*/
|
||||
private val output: Array<IntArray?>
|
||||
|
||||
/**
|
||||
* outer value array
|
||||
*/
|
||||
internal val v: Array<V>
|
||||
|
||||
/**
|
||||
* the length of every key
|
||||
*/
|
||||
internal val l: IntArray
|
||||
|
||||
/**
|
||||
* the size of base and check array
|
||||
*/
|
||||
private val checkSize: Int
|
||||
|
||||
init {
|
||||
when {
|
||||
map != null -> {
|
||||
@Suppress("UNCHECKED_CAST")
|
||||
v = kotlin.jvm.internal.collectionToArray(map.values) as Array<V>
|
||||
l = IntArray(map.size)
|
||||
|
||||
val builder = builder()
|
||||
builder.build(map)
|
||||
|
||||
fail = builder.fail
|
||||
base = builder.base
|
||||
check = builder.check
|
||||
|
||||
checkSize = builder.size
|
||||
output = builder.output
|
||||
}
|
||||
|
||||
inputStream != null -> {
|
||||
@Suppress("UNCHECKED_CAST")
|
||||
v = inputStream.readObject() as Array<V>
|
||||
l = inputStream.readObject() as IntArray
|
||||
|
||||
fail = inputStream.readObject() as IntArray
|
||||
base = inputStream.readObject() as IntArray
|
||||
check = inputStream.readObject() as IntArray
|
||||
checkSize = inputStream.readObject() as Int
|
||||
|
||||
@Suppress("UNCHECKED_CAST")
|
||||
output = inputStream.readObject() as Array<IntArray?>
|
||||
}
|
||||
else -> throw NullPointerException("Map or InputStream must be specified!")
|
||||
}
|
||||
}
|
||||
|
||||
internal abstract fun builder(): BaseCharBuilder<K, V>
|
||||
|
||||
/**
|
||||
* Save
|
||||
*/
|
||||
@Throws(IOException::class)
|
||||
fun save(out: ObjectOutputStream) {
|
||||
out.writeObject(v)
|
||||
out.writeObject(l)
|
||||
out.writeObject(fail)
|
||||
out.writeObject(base)
|
||||
out.writeObject(check)
|
||||
out.writeObject(checkSize)
|
||||
out.writeObject(output)
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Get the size of the keywords
|
||||
*/
|
||||
val size: Int
|
||||
get() {
|
||||
return v.size
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns the backing keywords IN THEIR NATURAL ORDER, in the case that you need access to the original FSM data.
|
||||
*
|
||||
* @return for example, if the FSM was populated with [reddit.com, cnn.com], this will return [cnn.com, reddit.com]
|
||||
*/
|
||||
val keywords: Array<V>
|
||||
get() {
|
||||
return v
|
||||
}
|
||||
|
||||
/**
|
||||
* Parses text and returns true if there are PARTIALLY matching results. For exact matches only it is better to use `matches`
|
||||
*
|
||||
* @return true if there is a match or partial match. "fun.reddit.com" will partially match to "reddit.com"
|
||||
*/
|
||||
fun hasPartialMatch(text: String): Boolean {
|
||||
return parseText(text).isNotEmpty()
|
||||
}
|
||||
|
||||
/**
|
||||
* Parses text and finds PARTIALLY matching results. For exact matches only it is better to use `matches`
|
||||
*
|
||||
* @return a list of outputs that contain matches or partial matches. The returned list will specify HOW MUCH of the text matches (A full match would be from 0 (the start), to N (the length of the text).
|
||||
*/
|
||||
fun partialMatch(text: String): List<Hit<V>> {
|
||||
return parseText(text)
|
||||
}
|
||||
|
||||
/**
|
||||
* Parse text
|
||||
*
|
||||
* @return a list of outputs
|
||||
*/
|
||||
fun parseText(text: CharSequence): List<Hit<V>> {
|
||||
var position = 1
|
||||
var currentState = 0
|
||||
val collectedEmits = LinkedList<Hit<V>>() // unknown size, so
|
||||
|
||||
for (element in text) {
|
||||
currentState = getState(currentState, element)
|
||||
storeEmits(position++, currentState, collectedEmits)
|
||||
}
|
||||
|
||||
return collectedEmits
|
||||
}
|
||||
|
||||
/**
|
||||
* Parse text
|
||||
*
|
||||
* @param text The text
|
||||
* @param processor A processor which handles the output
|
||||
*/
|
||||
fun parseText(text: CharSequence,
|
||||
processor: IHit<V>
|
||||
) {
|
||||
var position = 1
|
||||
var currentState = 0
|
||||
for (element in text) {
|
||||
currentState = getState(currentState, element)
|
||||
val hitArray = output[currentState]
|
||||
if (hitArray != null) {
|
||||
for (hit in hitArray) {
|
||||
processor.hit(position - l[hit], position, v[hit])
|
||||
}
|
||||
}
|
||||
position++
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Parse text
|
||||
*
|
||||
* @param text The text
|
||||
* @param processor A processor which handles the output
|
||||
*/
|
||||
fun parseText(text: CharSequence,
|
||||
processor: IHitCancellable<V>
|
||||
) {
|
||||
var position = 1
|
||||
var currentState = 0
|
||||
for (element in text) {
|
||||
position++
|
||||
currentState = getState(currentState, element)
|
||||
val hitArray = output[currentState]
|
||||
if (hitArray != null) {
|
||||
for (hit in hitArray) {
|
||||
val proceed = processor.hit(position - l[hit], position, v[hit])
|
||||
if (!proceed) {
|
||||
return
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Parse text
|
||||
*
|
||||
* @param text The text
|
||||
* @param processor A processor which handles the output
|
||||
*/
|
||||
fun parseText(text: CharArray,
|
||||
processor: IHit<V>
|
||||
) {
|
||||
var position = 1
|
||||
var currentState = 0
|
||||
for (c in text) {
|
||||
currentState = getState(currentState, c)
|
||||
val hitArray = output[currentState]
|
||||
if (hitArray != null) {
|
||||
for (hit in hitArray) {
|
||||
processor.hit(position - l[hit], position, v[hit])
|
||||
}
|
||||
}
|
||||
position++
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Parse text
|
||||
*
|
||||
* @param text The text
|
||||
* @param processor A processor which handles the output
|
||||
*/
|
||||
fun parseText(text: CharArray,
|
||||
processor: IHitFull<V>
|
||||
) {
|
||||
var position = 1
|
||||
var currentState = 0
|
||||
for (c in text) {
|
||||
currentState = getState(currentState, c)
|
||||
val hitArray = output[currentState]
|
||||
if (hitArray != null) {
|
||||
for (hit in hitArray) {
|
||||
processor.hit(position - l[hit], position, v[hit], hit)
|
||||
}
|
||||
}
|
||||
position++
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Checks that string contains at least one substring
|
||||
*
|
||||
* @param text source text to check
|
||||
*
|
||||
* @return `true` if string contains at least one substring
|
||||
*/
|
||||
fun matches(text: String): Boolean {
|
||||
var currentState = 0
|
||||
for (element in text) {
|
||||
currentState = getState(currentState, element)
|
||||
val hitArray = output[currentState]
|
||||
if (hitArray != null) {
|
||||
return true
|
||||
}
|
||||
}
|
||||
return false
|
||||
}
|
||||
|
||||
/**
|
||||
* Search first match in string
|
||||
*
|
||||
* @param text source text to check
|
||||
*
|
||||
* @return first match or `null` if there are no matches
|
||||
*/
|
||||
fun findFirst(text: String): Hit<V>? {
|
||||
var position = 1
|
||||
var currentState = 0
|
||||
for (element in text) {
|
||||
currentState = getState(currentState, element)
|
||||
val hitArray = output[currentState]
|
||||
if (hitArray != null) {
|
||||
val hitIndex = hitArray[0]
|
||||
return Hit(position - l[hitIndex], position, v[hitIndex])
|
||||
}
|
||||
position++
|
||||
}
|
||||
return null
|
||||
}
|
||||
|
||||
/**
|
||||
* Pick the value by index in value array <br></br>
|
||||
* Notice that to be more efficiently, this method DOES NOT check the parameter
|
||||
*
|
||||
* @param index The index
|
||||
*
|
||||
* @return The value
|
||||
*/
|
||||
operator fun get(index: Int): V {
|
||||
return v[index]
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* transmit state, supports failure function
|
||||
*/
|
||||
private fun getState(currentState: Int,
|
||||
character: Char): Int {
|
||||
|
||||
@Suppress("NAME_SHADOWING")
|
||||
var currentState = currentState
|
||||
|
||||
var newCurrentState = transitionWithRoot(currentState, character) // First press success
|
||||
while (newCurrentState == -1)
|
||||
// If the jump fails, press failure to jump
|
||||
{
|
||||
currentState = fail[currentState]
|
||||
newCurrentState = transitionWithRoot(currentState, character)
|
||||
}
|
||||
return newCurrentState
|
||||
}
|
||||
|
||||
/**
|
||||
* store output
|
||||
*/
|
||||
private fun storeEmits(position: Int,
|
||||
currentState: Int,
|
||||
collectedEmits: MutableList<Hit<V>>) {
|
||||
val hitArray = output[currentState]
|
||||
if (hitArray != null) {
|
||||
for (hit in hitArray) {
|
||||
collectedEmits.add(Hit(position - l[hit], position, v[hit]))
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* transition of a state
|
||||
*/
|
||||
private fun transition(current: Int,
|
||||
c: Char): Int {
|
||||
var b = current
|
||||
var p: Int
|
||||
|
||||
p = b + c.code + 1
|
||||
if (b == check[p]) {
|
||||
b = base[p]
|
||||
}
|
||||
else {
|
||||
return -1
|
||||
}
|
||||
|
||||
p = b
|
||||
return p
|
||||
}
|
||||
|
||||
/**
|
||||
* transition of a state, if the state is root and it failed, then returns the root
|
||||
*/
|
||||
private fun transitionWithRoot(nodePos: Int,
|
||||
c: Char): Int {
|
||||
val b = base[nodePos]
|
||||
val p: Int
|
||||
|
||||
p = b + c.code + 1
|
||||
return if (b != check[p]) {
|
||||
if (nodePos == 0) {
|
||||
0
|
||||
}
|
||||
else -1
|
||||
}
|
||||
else p
|
||||
}
|
||||
|
||||
/**
|
||||
* match exactly by a key-char array
|
||||
*
|
||||
* @param keyChars the key (as a Character array)
|
||||
*
|
||||
* @return the index of the key, you can use it as a perfect hash function
|
||||
*/
|
||||
fun exactMatchSearch(keyChars: CharArray): Int {
|
||||
return exactMatchSearch(keyChars, 0, 0, 0)
|
||||
}
|
||||
|
||||
/**
|
||||
* match exactly by a key
|
||||
*
|
||||
* @param key the key
|
||||
*
|
||||
* @return the index of the key, you can use it as a perfect hash function
|
||||
*/
|
||||
fun exactMatchSearch(key: String): Int {
|
||||
return exactMatchSearch(key.toCharArray(), pos = 0, len = 0, nodePos = 0)
|
||||
}
|
||||
|
||||
/**
|
||||
* match exactly by a key
|
||||
*
|
||||
* @param keyChars the char array of the key
|
||||
* @param pos the start index of char array
|
||||
* @param len the length of the key
|
||||
* @param nodePos the starting position of the node for searching
|
||||
*
|
||||
* @return the value index of the key, minus indicates null
|
||||
*/
|
||||
internal fun exactMatchSearch(keyChars: CharArray,
|
||||
pos: Int,
|
||||
len: Int,
|
||||
nodePos: Int): Int {
|
||||
@Suppress("NAME_SHADOWING")
|
||||
var len = len
|
||||
|
||||
@Suppress("NAME_SHADOWING")
|
||||
var nodePos = nodePos
|
||||
|
||||
if (len <= 0) {
|
||||
len = keyChars.size
|
||||
}
|
||||
if (nodePos <= 0) {
|
||||
nodePos = 0
|
||||
}
|
||||
|
||||
var result = -1
|
||||
|
||||
var b = base[nodePos]
|
||||
var p: Int
|
||||
|
||||
for (i in pos until len) {
|
||||
p = b + keyChars[i].code + 1
|
||||
if (b == check[p]) {
|
||||
b = base[p]
|
||||
}
|
||||
else {
|
||||
return result
|
||||
}
|
||||
}
|
||||
|
||||
p = b
|
||||
val n = base[p]
|
||||
if (b == check[p] && n < 0) {
|
||||
result = -n - 1
|
||||
}
|
||||
return result
|
||||
}
|
||||
|
||||
// /**
|
||||
// * Just for debug when I wrote it
|
||||
// */
|
||||
// public void debug()
|
||||
// {
|
||||
// System.out.println("base:");
|
||||
// for (int i = 0; i < base.length; i++)
|
||||
// {
|
||||
// if (base[i] < 0)
|
||||
// {
|
||||
// System.out.println(i + " : " + -base[i]);
|
||||
// }
|
||||
// }
|
||||
//
|
||||
// System.out.println("output:");
|
||||
// for (int i = 0; i < output.length; i++)
|
||||
// {
|
||||
// if (output[i] != null)
|
||||
// {
|
||||
// System.out.println(i + " : " + Arrays.toString(output[i]));
|
||||
// }
|
||||
// }
|
||||
//
|
||||
// System.out.println("fail:");
|
||||
// for (int i = 0; i < fail.length; i++)
|
||||
// {
|
||||
// if (fail[i] != 0)
|
||||
// {
|
||||
// System.out.println(i + " : " + fail[i]);
|
||||
// }
|
||||
// }
|
||||
//
|
||||
// System.out.println(this);
|
||||
// }
|
||||
//
|
||||
// @Override
|
||||
// public String toString()
|
||||
// {
|
||||
// String infoIndex = "i = ";
|
||||
// String infoChar = "char = ";
|
||||
// String infoBase = "base = ";
|
||||
// String infoCheck = "check= ";
|
||||
// for (int i = 0; i < Math.min(base.length, 200); ++i)
|
||||
// {
|
||||
// if (base[i] != 0 || check[i] != 0)
|
||||
// {
|
||||
// infoChar += " " + (i == check[i] ? " ×" : (char) (i - check[i] - 1));
|
||||
// infoIndex += " " + String.format("%5d", i);
|
||||
// infoBase += " " + String.format("%5d", base[i]);
|
||||
// infoCheck += " " + String.format("%5d", check[i]);
|
||||
// }
|
||||
// }
|
||||
// return "DoubleArrayTrie:" +
|
||||
// "\n" + infoChar +
|
||||
// "\n" + infoIndex +
|
||||
// "\n" + infoBase +
|
||||
// "\n" + infoCheck + "\n" +
|
||||
//// "check=" + Arrays.toString(check) +
|
||||
//// ", base=" + Arrays.toString(base) +
|
||||
//// ", used=" + Arrays.toString(used) +
|
||||
// "size=" + size
|
||||
//// ", length=" + Arrays.toString(length) +
|
||||
//// ", value=" + Arrays.toString(value) +
|
||||
// ;
|
||||
// }
|
||||
//
|
||||
// /**
|
||||
// * A debug class that sequentially outputs variable names and variable values
|
||||
// */
|
||||
// private static class DebugArray
|
||||
// {
|
||||
// Map<String, String> nameValueMap = new LinkedHashMap<String, String>();
|
||||
//
|
||||
// public void add(String name, int value)
|
||||
// {
|
||||
// String valueInMap = nameValueMap.get(name);
|
||||
// if (valueInMap == null)
|
||||
// {
|
||||
// valueInMap = "";
|
||||
// }
|
||||
//
|
||||
// valueInMap += " " + String.format("%5d", value);
|
||||
//
|
||||
// nameValueMap.put(name, valueInMap);
|
||||
// }
|
||||
//
|
||||
// @Override
|
||||
// public String toString()
|
||||
// {
|
||||
// String text = "";
|
||||
// for (Map.Entry<String, String> entry : nameValueMap.entrySet())
|
||||
// {
|
||||
// String name = entry.getKey();
|
||||
// String value = entry.getValue();
|
||||
// text += String.format("%-5s", name) + "= " + value + '\n';
|
||||
// }
|
||||
//
|
||||
// return text;
|
||||
// }
|
||||
//
|
||||
// public void println()
|
||||
// {
|
||||
// System.out.print(this);
|
||||
// }
|
||||
// }
|
||||
|
||||
}
|
|
@ -0,0 +1,97 @@
|
|||
/*
|
||||
* Copyright 2023 dorkbox, llc
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
/*
|
||||
* AhoCorasickDoubleArrayTrie Project
|
||||
* https://github.com/hankcs/AhoCorasickDoubleArrayTrie
|
||||
*
|
||||
* Copyright 2008-2018 hankcs <me@hankcs.com>
|
||||
* You may modify and redistribute as long as this attribution remains.
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
package dorkbox.fsm
|
||||
|
||||
import java.io.ObjectInputStream
|
||||
|
||||
class DoubleArrayByteArrayTrie<V>(map: Map<ByteArray, V>? = null, inputStream: ObjectInputStream? = null):
|
||||
BaseByteTrie<ByteArray, V>(map, inputStream) {
|
||||
|
||||
override fun builder(): BaseByteBuilder<ByteArray, V> {
|
||||
return object: BaseByteBuilder<ByteArray, V>() {
|
||||
/**
|
||||
* add a keyword
|
||||
*
|
||||
* @param keyword a keyword
|
||||
* @param index the index of the keyword
|
||||
*/
|
||||
override fun addKeyword(keyword: ByteArray, index: Int) {
|
||||
var currentState = this.rootState
|
||||
keyword.forEach { character ->
|
||||
currentState = currentState!!.addState(character)
|
||||
}
|
||||
|
||||
currentState!!.addEmit(index)
|
||||
this@DoubleArrayByteArrayTrie.l[index] = keyword.size
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Get value by a ByteArray key, just like a map.get() method
|
||||
*
|
||||
* @param key The key
|
||||
*/
|
||||
operator fun get(key: ByteArray): V? {
|
||||
val index = exactMatchSearch(key)
|
||||
return if (index >= 0) {
|
||||
v[index]
|
||||
}
|
||||
else null
|
||||
|
||||
}
|
||||
|
||||
/**
|
||||
* Update a value corresponding to a key
|
||||
*
|
||||
* @param key the key
|
||||
* @param value the value
|
||||
*
|
||||
* @return successful or not(failure if there is no key)
|
||||
*/
|
||||
operator fun set(key: ByteArray,
|
||||
value: V): Boolean {
|
||||
val index = exactMatchSearch(key)
|
||||
if (index >= 0) {
|
||||
v[index] = value
|
||||
return true
|
||||
}
|
||||
|
||||
return false
|
||||
}
|
||||
}
|
|
@ -0,0 +1,95 @@
|
|||
/*
|
||||
* Copyright 2023 dorkbox, llc
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
/*
|
||||
* AhoCorasickDoubleArrayTrie Project
|
||||
* https://github.com/hankcs/AhoCorasickDoubleArrayTrie
|
||||
*
|
||||
* Copyright 2008-2018 hankcs <me@hankcs.com>
|
||||
* You may modify and redistribute as long as this attribution remains.
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
package dorkbox.fsm
|
||||
|
||||
import java.io.ObjectInputStream
|
||||
|
||||
class DoubleArrayStringTrie<V>(map: Map<String, V>? = null,
|
||||
inputStream: ObjectInputStream? = null): BaseCharTrie<String, V>(map, inputStream) {
|
||||
override fun builder(): BaseCharBuilder<String, V> {
|
||||
return object: BaseCharBuilder<String, V>() {
|
||||
/**
|
||||
* add a keyword
|
||||
*
|
||||
* @param keyword a keyword
|
||||
* @param index the index of the keyword
|
||||
*/
|
||||
override fun addKeyword(keyword: String, index: Int) {
|
||||
var currentState = this.rootState
|
||||
keyword.toCharArray().forEach { character ->
|
||||
currentState = currentState!!.addState(character)
|
||||
}
|
||||
|
||||
currentState!!.addEmit(index)
|
||||
this@DoubleArrayStringTrie.l[index] = keyword.length
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Get value by a String key, just like a map.get() method
|
||||
*
|
||||
* @param key The key
|
||||
*/
|
||||
operator fun get(key: String): V? {
|
||||
val index = exactMatchSearch(key)
|
||||
return if (index >= 0) {
|
||||
v[index]
|
||||
}
|
||||
else null
|
||||
|
||||
}
|
||||
|
||||
/**
|
||||
* Update a value corresponding to a key
|
||||
*
|
||||
* @param key the key
|
||||
* @param value the value
|
||||
*
|
||||
* @return successful or not(failure if there is no key)
|
||||
*/
|
||||
operator fun set(key: String,
|
||||
value: V): Boolean {
|
||||
val index = exactMatchSearch(key)
|
||||
if (index >= 0) {
|
||||
v[index] = value
|
||||
return true
|
||||
}
|
||||
|
||||
return false
|
||||
}
|
||||
}
|
|
@ -0,0 +1,81 @@
|
|||
/*
|
||||
* Copyright 2023 dorkbox, llc
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
package dorkbox.fsm
|
||||
|
||||
import java.util.*
|
||||
|
||||
/**
|
||||
* Creates a Finite State Machine for very fast string matching.
|
||||
*
|
||||
* This is a wrapper for DoubleArrayTrie, since that class is awkward to use
|
||||
*/
|
||||
object FiniteStateMachine {
|
||||
/**
|
||||
* Gets the version number.
|
||||
*/
|
||||
const val version = "1.0"
|
||||
|
||||
init {
|
||||
// Add this project to the updates system, which verifies this class + UUID + version information
|
||||
dorkbox.updates.Updates.add(FiniteStateMachine::class.java, "f3e65e67f9b243c0afcf59d1a3066fee", version)
|
||||
}
|
||||
|
||||
|
||||
fun <V> build(map: Map<String, V>): DoubleArrayStringTrie<V> {
|
||||
return DoubleArrayStringTrie(map)
|
||||
}
|
||||
|
||||
fun <V> build(map: Map<ByteArray, V>): DoubleArrayByteArrayTrie<V> {
|
||||
return DoubleArrayByteArrayTrie(map)
|
||||
}
|
||||
|
||||
fun build(strings: List<String>): DoubleArrayStringTrie<Boolean> {
|
||||
val map = TreeMap<String, Boolean>()
|
||||
for (key in strings) {
|
||||
map[key] = java.lang.Boolean.TRUE
|
||||
}
|
||||
|
||||
return build(map)
|
||||
}
|
||||
|
||||
fun build(strings: List<ByteArray>): DoubleArrayByteArrayTrie<Boolean> {
|
||||
val map = TreeMap<ByteArray, Boolean>()
|
||||
for (key in strings) {
|
||||
map[key] = java.lang.Boolean.TRUE
|
||||
}
|
||||
|
||||
return build(map)
|
||||
}
|
||||
|
||||
fun build(vararg strings: String): DoubleArrayStringTrie<Boolean> {
|
||||
val map = TreeMap<String, Boolean>()
|
||||
for (key in strings) {
|
||||
map[key] = java.lang.Boolean.TRUE
|
||||
}
|
||||
|
||||
return build(map)
|
||||
}
|
||||
|
||||
fun build(vararg strings: ByteArray): DoubleArrayByteArrayTrie<Boolean> {
|
||||
val map = TreeMap<ByteArray, Boolean>()
|
||||
for (key in strings) {
|
||||
map[key] = java.lang.Boolean.TRUE
|
||||
}
|
||||
|
||||
return build(map)
|
||||
}
|
||||
}
|
|
@ -0,0 +1,61 @@
|
|||
/*
|
||||
* Copyright 2023 dorkbox, llc
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
/*
|
||||
* AhoCorasickDoubleArrayTrie Project
|
||||
* https://github.com/hankcs/AhoCorasickDoubleArrayTrie
|
||||
*
|
||||
* Copyright 2008-2018 hankcs <me@hankcs.com>
|
||||
* You may modify and redistribute as long as this attribution remains.
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
package dorkbox.fsm
|
||||
|
||||
/**
|
||||
* A result output
|
||||
*
|
||||
* @param <V> the value type
|
||||
</V> */
|
||||
class Hit<V> internal constructor(
|
||||
/**
|
||||
* the beginning index, inclusive.
|
||||
*/
|
||||
val begin: Int,
|
||||
/**
|
||||
* the ending index, exclusive.
|
||||
*/
|
||||
val end: Int,
|
||||
/**
|
||||
* the value assigned to the keyword
|
||||
*/
|
||||
val value: V) {
|
||||
|
||||
override fun toString(): String {
|
||||
return String.format("[%d:%d]=%s", begin, end, value.toString())
|
||||
}
|
||||
}
|
|
@ -0,0 +1,54 @@
|
|||
|
||||
/*
|
||||
* Copyright 2023 dorkbox, llc
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
/*
|
||||
* AhoCorasickDoubleArrayTrie Project
|
||||
* https://github.com/hankcs/AhoCorasickDoubleArrayTrie
|
||||
*
|
||||
* Copyright 2008-2018 hankcs <me@hankcs.com>
|
||||
* You may modify and redistribute as long as this attribution remains.
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
package dorkbox.fsm
|
||||
|
||||
/**
|
||||
* Processor handles the output when hit a keyword
|
||||
*/
|
||||
interface IHit<V> {
|
||||
/**
|
||||
* Hit a keyword, you can use some code like text.substring(begin, end) to get the keyword
|
||||
*
|
||||
* @param begin the beginning index, inclusive.
|
||||
* @param end the ending index, exclusive.
|
||||
* @param value the value assigned to the keyword
|
||||
*/
|
||||
fun hit(begin: Int,
|
||||
end: Int,
|
||||
value: V)
|
||||
}
|
|
@ -0,0 +1,56 @@
|
|||
|
||||
/*
|
||||
* Copyright 2023 dorkbox, llc
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
/*
|
||||
* AhoCorasickDoubleArrayTrie Project
|
||||
* https://github.com/hankcs/AhoCorasickDoubleArrayTrie
|
||||
*
|
||||
* Copyright 2008-2018 hankcs <me@hankcs.com>
|
||||
* You may modify and redistribute as long as this attribution remains.
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
package dorkbox.fsm
|
||||
|
||||
/**
|
||||
* Callback that allows to cancel the search process.
|
||||
*/
|
||||
interface IHitCancellable<V> {
|
||||
/**
|
||||
* Hit a keyword, you can use some code like text.substring(begin, end) to get the keyword
|
||||
*
|
||||
* @param begin the beginning index, inclusive.
|
||||
* @param end the ending index, exclusive.
|
||||
* @param value the value assigned to the keyword
|
||||
*
|
||||
* @return Return true for continuing the search and false for stopping it.
|
||||
*/
|
||||
fun hit(begin: Int,
|
||||
end: Int,
|
||||
value: V): Boolean
|
||||
}
|
|
@ -0,0 +1,56 @@
|
|||
|
||||
/*
|
||||
* Copyright 2023 dorkbox, llc
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
/*
|
||||
* AhoCorasickDoubleArrayTrie Project
|
||||
* https://github.com/hankcs/AhoCorasickDoubleArrayTrie
|
||||
*
|
||||
* Copyright 2008-2018 hankcs <me@hankcs.com>
|
||||
* You may modify and redistribute as long as this attribution remains.
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
package dorkbox.fsm
|
||||
|
||||
/**
|
||||
* Processor handles the output when hit a keyword, with more detail
|
||||
*/
|
||||
interface IHitFull<V> {
|
||||
/**
|
||||
* Hit a keyword, you can use some code like text.substring(begin, end) to get the keyword
|
||||
*
|
||||
* @param begin the beginning index, inclusive.
|
||||
* @param end the ending index, exclusive.
|
||||
* @param value the value assigned to the keyword
|
||||
* @param index the index of the value assigned to the keyword, you can use the integer as a perfect hash value
|
||||
*/
|
||||
fun hit(begin: Int,
|
||||
end: Int,
|
||||
value: V,
|
||||
index: Int)
|
||||
}
|
|
@ -0,0 +1,209 @@
|
|||
/*
|
||||
* Copyright 2023 dorkbox, llc
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
/*
|
||||
* AhoCorasickDoubleArrayTrie Project
|
||||
* https://github.com/hankcs/AhoCorasickDoubleArrayTrie
|
||||
*
|
||||
* Copyright 2008-2018 hankcs <me@hankcs.com>
|
||||
* You may modify and redistribute as long as this attribution remains.
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
package dorkbox.fsm
|
||||
|
||||
import java.util.*
|
||||
|
||||
/**
|
||||
*
|
||||
*
|
||||
* A state has the following functions
|
||||
*
|
||||
*
|
||||
*
|
||||
*
|
||||
* * success; successfully transferred to another state
|
||||
* * failure; if you cannot jump along the string, jump to a shallow node
|
||||
* * emits; hit a pattern string
|
||||
*
|
||||
*
|
||||
*
|
||||
*
|
||||
*
|
||||
* The root node is slightly different. The root node has no failure function. Its "failure" refers to moving to the next state according to the string path. Other nodes have a failure state.
|
||||
*
|
||||
*
|
||||
* @author Robert Bor
|
||||
*/
|
||||
class StateByte(
|
||||
/**
|
||||
* The length of the pattern string is also the depth of this state
|
||||
*/
|
||||
val depth: Int = 0) {
|
||||
|
||||
/**
|
||||
* The fail function, if there is no match, jumps to this state.
|
||||
*/
|
||||
private var failure: StateByte? = null
|
||||
|
||||
/**
|
||||
* Record mode string as long as this state is reachable
|
||||
*/
|
||||
private var emits: MutableSet<Int>? = null
|
||||
|
||||
/**
|
||||
* The goto table, also known as the transfer function. Move to the next state based on the next character of the string
|
||||
*/
|
||||
private val success = TreeMap<Byte, StateByte>()
|
||||
|
||||
/**
|
||||
* Corresponding subscript in double array
|
||||
*/
|
||||
var index: Int = 0
|
||||
|
||||
/**
|
||||
* Get the largest value
|
||||
*/
|
||||
val largestValueId: Int?
|
||||
get() = if (emits == null || emits!!.size == 0) {
|
||||
null
|
||||
}
|
||||
else emits!!.iterator().next()
|
||||
|
||||
/**
|
||||
* Whether it is the termination status
|
||||
*/
|
||||
val isAcceptable: Boolean
|
||||
get() = this.depth > 0 && this.emits != null
|
||||
|
||||
val states: Collection<StateByte>
|
||||
get() = this.success.values
|
||||
|
||||
val transitions: Collection<Byte>
|
||||
get() = this.success.keys
|
||||
|
||||
/**
|
||||
* Add a matching pattern string (this state corresponds to this pattern string)
|
||||
*/
|
||||
fun addEmit(keyword: Int) {
|
||||
if (this.emits == null) {
|
||||
this.emits = TreeSet(Collections.reverseOrder())
|
||||
}
|
||||
this.emits!!.add(keyword)
|
||||
}
|
||||
|
||||
/**
|
||||
* Add some matching pattern strings
|
||||
*/
|
||||
fun addEmit(emits: Collection<Int>) {
|
||||
for (emit in emits) {
|
||||
addEmit(emit)
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Get the pattern string represented by this node (we)
|
||||
*/
|
||||
fun emit(): Collection<Int> {
|
||||
return this.emits ?: emptyList()
|
||||
}
|
||||
|
||||
/**
|
||||
* Get the failure status
|
||||
*/
|
||||
fun failure(): StateByte? {
|
||||
return this.failure
|
||||
}
|
||||
|
||||
/**
|
||||
* Set the failure status
|
||||
*/
|
||||
fun setFailure(failState: StateByte,
|
||||
fail: IntArray) {
|
||||
this.failure = failState
|
||||
fail[index] = failState.index
|
||||
}
|
||||
|
||||
/**
|
||||
* Move to the next state
|
||||
*
|
||||
* @param character wants to transfer by this character
|
||||
* @param ignoreRootState Whether to ignore the root node, it should be true if the root node calls itself, otherwise it is false
|
||||
*
|
||||
* @return transfer result
|
||||
*/
|
||||
private fun nextState(character: Byte,
|
||||
ignoreRootState: Boolean): StateByte? {
|
||||
var nextState: StateByte? = this.success[character]
|
||||
if (!ignoreRootState && nextState == null && this.depth == 0) {
|
||||
nextState = this
|
||||
}
|
||||
return nextState
|
||||
}
|
||||
|
||||
/**
|
||||
* According to the character transfer, the root node transfer failure will return itself (never return null)
|
||||
*/
|
||||
fun nextState(character: Byte): StateByte? {
|
||||
return nextState(character, false)
|
||||
}
|
||||
|
||||
/**
|
||||
* According to character transfer, any node transfer failure will return null
|
||||
*/
|
||||
fun nextStateIgnoreRootState(character: Byte): StateByte? {
|
||||
return nextState(character, true)
|
||||
}
|
||||
|
||||
fun addState(character: Byte): StateByte {
|
||||
var nextState = nextStateIgnoreRootState(character)
|
||||
if (nextState == null) {
|
||||
nextState = StateByte(this.depth + 1)
|
||||
this.success[character] = nextState
|
||||
}
|
||||
return nextState
|
||||
}
|
||||
|
||||
override fun toString(): String {
|
||||
val sb = StringBuilder("State{")
|
||||
sb.append("depth=").append(depth)
|
||||
sb.append(", ID=").append(index)
|
||||
sb.append(", emits=").append(emits)
|
||||
sb.append(", success=").append(success.keys)
|
||||
sb.append(", failureID=").append(if (failure == null) "-1" else failure!!.index)
|
||||
sb.append(", failure=").append(failure)
|
||||
sb.append('}')
|
||||
return sb.toString()
|
||||
}
|
||||
|
||||
/**
|
||||
* Get goto table
|
||||
*/
|
||||
fun getSuccess(): Map<Byte, StateByte> {
|
||||
return success
|
||||
}
|
||||
}
|
|
@ -0,0 +1,209 @@
|
|||
/*
|
||||
* Copyright 2023 dorkbox, llc
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
/*
|
||||
* AhoCorasickDoubleArrayTrie Project
|
||||
* https://github.com/hankcs/AhoCorasickDoubleArrayTrie
|
||||
*
|
||||
* Copyright 2008-2018 hankcs <me@hankcs.com>
|
||||
* You may modify and redistribute as long as this attribution remains.
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
package dorkbox.fsm
|
||||
|
||||
import java.util.*
|
||||
|
||||
/**
|
||||
*
|
||||
*
|
||||
* A state has the following functions
|
||||
*
|
||||
*
|
||||
*
|
||||
*
|
||||
* * success; successfully transferred to another state
|
||||
* * failure; if you cannot jump along the string, jump to a shallow node
|
||||
* * emits; hit a pattern string
|
||||
*
|
||||
*
|
||||
*
|
||||
*
|
||||
*
|
||||
* The root node is slightly different. The root node has no failure function. Its "failure" refers to moving to the next state according to the string path. Other nodes have a failure state.
|
||||
*
|
||||
*
|
||||
* @author Robert Bor
|
||||
*/
|
||||
class StateChar(
|
||||
/**
|
||||
* The length of the pattern string is also the depth of this state
|
||||
*/
|
||||
val depth: Int = 0) {
|
||||
|
||||
/**
|
||||
* The fail function, if there is no match, jumps to this state.
|
||||
*/
|
||||
private var failure: StateChar? = null
|
||||
|
||||
/**
|
||||
* Record mode string as long as this state is reachable
|
||||
*/
|
||||
private var emits: MutableSet<Int>? = null
|
||||
|
||||
/**
|
||||
* The goto table, also known as the transfer function. Move to the next state based on the next character of the string
|
||||
*/
|
||||
private val success = TreeMap<Char, StateChar>()
|
||||
|
||||
/**
|
||||
* Corresponding subscript in double array
|
||||
*/
|
||||
var index: Int = 0
|
||||
|
||||
/**
|
||||
* Get the largest value
|
||||
*/
|
||||
val largestValueId: Int?
|
||||
get() = if (emits == null || emits!!.size == 0) {
|
||||
null
|
||||
}
|
||||
else emits!!.iterator().next()
|
||||
|
||||
/**
|
||||
* Whether it is the termination status
|
||||
*/
|
||||
val isAcceptable: Boolean
|
||||
get() = this.depth > 0 && this.emits != null
|
||||
|
||||
val states: Collection<StateChar>
|
||||
get() = this.success.values
|
||||
|
||||
val transitions: Collection<Char>
|
||||
get() = this.success.keys
|
||||
|
||||
/**
|
||||
* Add a matching pattern string (this state corresponds to this pattern string)
|
||||
*/
|
||||
fun addEmit(keyword: Int) {
|
||||
if (this.emits == null) {
|
||||
this.emits = TreeSet(Collections.reverseOrder())
|
||||
}
|
||||
this.emits!!.add(keyword)
|
||||
}
|
||||
|
||||
/**
|
||||
* Add some matching pattern strings
|
||||
*/
|
||||
fun addEmit(emits: Collection<Int>) {
|
||||
for (emit in emits) {
|
||||
addEmit(emit)
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Get the pattern string represented by this node (we)
|
||||
*/
|
||||
fun emit(): Collection<Int> {
|
||||
return this.emits ?: emptyList()
|
||||
}
|
||||
|
||||
/**
|
||||
* Get the failure status
|
||||
*/
|
||||
fun failure(): StateChar? {
|
||||
return this.failure
|
||||
}
|
||||
|
||||
/**
|
||||
* Set the failure status
|
||||
*/
|
||||
fun setFailure(failState: StateChar,
|
||||
fail: IntArray) {
|
||||
this.failure = failState
|
||||
fail[index] = failState.index
|
||||
}
|
||||
|
||||
/**
|
||||
* Move to the next state
|
||||
*
|
||||
* @param character wants to transfer by this character
|
||||
* @param ignoreRootState Whether to ignore the root node, it should be true if the root node calls itself, otherwise it is false
|
||||
*
|
||||
* @return transfer result
|
||||
*/
|
||||
private fun nextState(character: Char,
|
||||
ignoreRootState: Boolean): StateChar? {
|
||||
var nextState: StateChar? = this.success[character]
|
||||
if (!ignoreRootState && nextState == null && this.depth == 0) {
|
||||
nextState = this
|
||||
}
|
||||
return nextState
|
||||
}
|
||||
|
||||
/**
|
||||
* According to the character transfer, the root node transfer failure will return itself (never return null)
|
||||
*/
|
||||
fun nextState(character: Char): StateChar? {
|
||||
return nextState(character, false)
|
||||
}
|
||||
|
||||
/**
|
||||
* According to character transfer, any node transfer failure will return null
|
||||
*/
|
||||
fun nextStateIgnoreRootState(character: Char): StateChar? {
|
||||
return nextState(character, true)
|
||||
}
|
||||
|
||||
fun addState(character: Char): StateChar {
|
||||
var nextState = nextStateIgnoreRootState(character)
|
||||
if (nextState == null) {
|
||||
nextState = StateChar(this.depth + 1)
|
||||
this.success[character] = nextState
|
||||
}
|
||||
return nextState
|
||||
}
|
||||
|
||||
override fun toString(): String {
|
||||
val sb = StringBuilder("State{")
|
||||
sb.append("depth=").append(depth)
|
||||
sb.append(", ID=").append(index)
|
||||
sb.append(", emits=").append(emits)
|
||||
sb.append(", success=").append(success.keys)
|
||||
sb.append(", failureID=").append(if (failure == null) "-1" else failure!!.index)
|
||||
sb.append(", failure=").append(failure)
|
||||
sb.append('}')
|
||||
return sb.toString()
|
||||
}
|
||||
|
||||
/**
|
||||
* Get goto table
|
||||
*/
|
||||
fun getSuccess(): Map<Char, StateChar> {
|
||||
return success
|
||||
}
|
||||
}
|
|
@ -0,0 +1,17 @@
|
|||
/*
|
||||
* Copyright 2023 dorkbox, llc
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
package dorkbox.fsm;
|
|
@ -0,0 +1,24 @@
|
|||
/*
|
||||
* Copyright 2023 dorkbox, llc
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
package dorkbox.fsm;
|
||||
|
||||
/**
|
||||
* Required for intellij to not complain regarding `module-info` for a multi-release jar.
|
||||
* This file is completely ignored by the gradle build process
|
||||
*/
|
||||
public
|
||||
class EmptyClass {}
|
|
@ -0,0 +1,7 @@
|
|||
module dorkbox.collections {
|
||||
exports dorkbox.fsm;
|
||||
|
||||
requires transitive dorkbox.updates;
|
||||
|
||||
requires transitive kotlin.stdlib;
|
||||
}
|
|
@ -0,0 +1,245 @@
|
|||
/*
|
||||
* Copyright 2023 dorkbox, llc
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
package dorkbox.fsm
|
||||
|
||||
import org.junit.Test
|
||||
import java.util.*
|
||||
|
||||
class TestTrie {
|
||||
@Test
|
||||
fun trieFromStringMap() {
|
||||
val strings = arrayOf("khanacademy.com", "cnn.com", "google.com", "fun.reddit.com", "reddit.com")
|
||||
val keys = Arrays.asList(*strings)
|
||||
var text: String
|
||||
run {
|
||||
val map = TreeMap<String, String>()
|
||||
for (key in keys) {
|
||||
map[key] = key
|
||||
}
|
||||
val fsm = FiniteStateMachine.build(map)
|
||||
text = "reddit.google.com"
|
||||
println("Searching : $text")
|
||||
println(fsm.partialMatch(text))
|
||||
println("Found: " + fsm.matches(text))
|
||||
println()
|
||||
|
||||
text = "reddit.com"
|
||||
println("Searching : $text")
|
||||
println(fsm.partialMatch(text))
|
||||
println("Found: " + fsm.matches(text))
|
||||
println()
|
||||
|
||||
text = "fun.reddit.com"
|
||||
println("Searching : $text")
|
||||
println(fsm.partialMatch(text))
|
||||
println("Found: " + fsm.matches(text))
|
||||
}
|
||||
}
|
||||
|
||||
@Test
|
||||
fun trieFromByteArrayMap() {
|
||||
val strings = arrayOf(
|
||||
"khanacademy.com".toByteArray(),
|
||||
"cnn.com".toByteArray(),
|
||||
"google.com".toByteArray(),
|
||||
"fun.reddit.com".toByteArray(),
|
||||
"reddit.com".toByteArray())
|
||||
val keys = Arrays.asList(*strings)
|
||||
var text: String
|
||||
run {
|
||||
val map = TreeMap<ByteArray, String>()
|
||||
for (key in keys) {
|
||||
map[key] = String(key)
|
||||
}
|
||||
val fsm = FiniteStateMachine.build(map)
|
||||
|
||||
text = "reddit.google.com"
|
||||
println("Searching : $text")
|
||||
var result = fsm.partialMatch(text.toByteArray())
|
||||
result.forEach { it ->
|
||||
println(it.toString())
|
||||
}
|
||||
|
||||
result = fsm.partialMatch(text.toByteArray())
|
||||
result.forEach { it ->
|
||||
println(it.toString())
|
||||
}
|
||||
println("Found: " + fsm.matches(text.toByteArray()))
|
||||
println()
|
||||
|
||||
text = "reddit.com"
|
||||
println("Searching : $text")
|
||||
result = fsm.partialMatch(text.toByteArray())
|
||||
result.forEach { it ->
|
||||
println(it.toString())
|
||||
}
|
||||
println("Found: " + fsm.matches(text.toByteArray()))
|
||||
println()
|
||||
|
||||
text = "fun.reddit.com"
|
||||
println("Searching : $text")
|
||||
result = fsm.partialMatch(text.toByteArray())
|
||||
result.forEach { it ->
|
||||
println(it.toString())
|
||||
}
|
||||
println("Found: " + fsm.matches(text.toByteArray()))
|
||||
}
|
||||
}
|
||||
|
||||
@Test
|
||||
fun trieFromStringList() {
|
||||
val strings = arrayOf("khanacademy.com", "cnn.com", "google.com", "fun.reddit.com", "reddit.com")
|
||||
val keys = Arrays.asList(*strings)
|
||||
var text: String
|
||||
run {
|
||||
val fsm = FiniteStateMachine.build(keys)
|
||||
text = "reddit.google.com"
|
||||
println("Searching : $text")
|
||||
println(fsm.partialMatch(text))
|
||||
println("Found: " + fsm.matches(text))
|
||||
println()
|
||||
|
||||
text = "reddit.com"
|
||||
println("Searching : $text")
|
||||
println(fsm.partialMatch(text))
|
||||
println("Found: " + fsm.matches(text))
|
||||
println()
|
||||
|
||||
text = "fun.reddit.com"
|
||||
println("Searching : $text")
|
||||
println(fsm.partialMatch(text))
|
||||
println("Found: " + fsm.matches(text))
|
||||
}
|
||||
}
|
||||
|
||||
@Test
|
||||
fun trieFromByteArrayList() {
|
||||
val strings = arrayOf(
|
||||
"khanacademy.com".toByteArray(),
|
||||
"cnn.com".toByteArray(),
|
||||
"google.com".toByteArray(),
|
||||
"fun.reddit.com".toByteArray(),
|
||||
"reddit.com".toByteArray())
|
||||
|
||||
val keys = Arrays.asList(*strings)
|
||||
var text: String
|
||||
run {
|
||||
val fsm = FiniteStateMachine.build(keys)
|
||||
text = "reddit.google.com"
|
||||
println("Searching : $text")
|
||||
var result = fsm.partialMatch(text.toByteArray())
|
||||
result.forEach { it ->
|
||||
println(it.toString())
|
||||
}
|
||||
println("Found: " + fsm.matches(text.toByteArray()))
|
||||
println()
|
||||
|
||||
text = "reddit.com"
|
||||
println("Searching : $text")
|
||||
result = fsm.partialMatch(text.toByteArray())
|
||||
result.forEach { it ->
|
||||
println(it.toString())
|
||||
}
|
||||
println("Found: " + fsm.matches(text.toByteArray()))
|
||||
println()
|
||||
|
||||
text = "fun.reddit.com"
|
||||
println("Searching : $text")
|
||||
result = fsm.partialMatch(text.toByteArray())
|
||||
result.forEach { it ->
|
||||
println(it.toString())
|
||||
}
|
||||
println("Found: " + fsm.matches(text.toByteArray()))
|
||||
}
|
||||
}
|
||||
|
||||
@Test
|
||||
fun trieFromStringVarArg() {
|
||||
val strings = arrayOf("khanacademy.com", "cnn.com", "google.com", "fun.reddit.com", "reddit.com")
|
||||
var text: String
|
||||
run {
|
||||
val fsm = FiniteStateMachine.build(*strings)
|
||||
text = "reddit.google.com"
|
||||
println("Searching : $text")
|
||||
println(fsm.partialMatch(text))
|
||||
println("Found: " + fsm.matches(text))
|
||||
println()
|
||||
|
||||
text = "reddit.com"
|
||||
println("Searching : $text")
|
||||
println(fsm.partialMatch(text))
|
||||
println("Found: " + fsm.matches(text))
|
||||
println()
|
||||
|
||||
text = "fun.reddit.com"
|
||||
println("Searching : $text")
|
||||
println(fsm.partialMatch(text))
|
||||
println("Found: " + fsm.matches(text))
|
||||
}
|
||||
}
|
||||
|
||||
@Test
|
||||
fun trieFromByteArrayVarArg() {
|
||||
val strings = arrayOf(
|
||||
"khanacademy.com".toByteArray(),
|
||||
"cnn.com".toByteArray(),
|
||||
"google.com".toByteArray(),
|
||||
"fun.reddit.com".toByteArray(),
|
||||
"reddit.com".toByteArray())
|
||||
|
||||
var text: String
|
||||
run {
|
||||
val fsm = FiniteStateMachine.build(*strings)
|
||||
|
||||
text = "reddit.google.com"
|
||||
println("Searching : $text")
|
||||
var result = fsm.partialMatch(text.toByteArray())
|
||||
result.forEach { it ->
|
||||
println(it.toString())
|
||||
}
|
||||
println("Found: " + fsm.matches(text.toByteArray()))
|
||||
println()
|
||||
|
||||
text = "reddit.com"
|
||||
println("Searching : $text")
|
||||
result = fsm.partialMatch(text.toByteArray())
|
||||
result.forEach { it ->
|
||||
println(it.toString())
|
||||
}
|
||||
println("Found: " + fsm.matches(text.toByteArray()))
|
||||
println()
|
||||
|
||||
text = "fun.reddit.com"
|
||||
println("Searching : $text")
|
||||
result = fsm.partialMatch(text.toByteArray())
|
||||
result.forEach { it ->
|
||||
println(it.toString())
|
||||
}
|
||||
println("Found: " + fsm.matches(text.toByteArray()))
|
||||
}
|
||||
}
|
||||
|
||||
@Test
|
||||
fun fmsOutput() {
|
||||
val strings = arrayOf("khanacademy.com", "cnn.com", "google.com", "fun.reddit.com", "reddit.com")
|
||||
val fsm = FiniteStateMachine.build(*strings)
|
||||
|
||||
run {
|
||||
println("Keywords Orig: " + Arrays.toString(strings))
|
||||
println("Keywords FSM : " + Arrays.toString(fsm.keywords))
|
||||
}
|
||||
}
|
||||
}
|
Loading…
Reference in New Issue