diff --git a/LICENSE b/LICENSE index 14a4527..f05d01c 100644 --- a/LICENSE +++ b/LICENSE @@ -1,4 +1,4 @@ - - ByteUtilities - Byte manipulation and SHA/xxHash utilities + - ByteUtils - Byte manipulation and SHA/xxHash utilities [The Apache Software License, Version 2.0] https://git.dorkbox.com/dorkbox/ByteUtilities Copyright 2023 diff --git a/build.gradle.kts b/build.gradle.kts index ca5fc05..3202807 100644 --- a/build.gradle.kts +++ b/build.gradle.kts @@ -25,7 +25,7 @@ gradle.startParameter.showStacktrace = ShowStacktrace.ALWAYS // always show th plugins { id("com.dorkbox.GradleUtils") version "3.17" - id("com.dorkbox.Licensing") version "2.22" + id("com.dorkbox.Licensing") version "2.25" id("com.dorkbox.VersionUpdate") version "2.8" id("com.dorkbox.GradlePublish") version "1.18" @@ -102,15 +102,15 @@ dependencies { api("com.dorkbox:Updates:1.1") // listed as compileOnly, since we will be using netty bytebuf utils if we ALREADY are using netty byte buffs. **We don't want a hard dependency.** - compileOnly("io.netty:netty-buffer:4.1.93.Final") + compileOnly("io.netty:netty-buffer:4.1.96.Final") compileOnly("com.esotericsoftware:kryo:5.5.0") // https://github.com/lz4/lz4-java - api("org.lz4:lz4-java:1.8.0") + compileOnly("org.lz4:lz4-java:1.8.0") // for xxHash, optional - testImplementation("io.netty:netty-buffer:4.1.93.Final") + testImplementation("io.netty:netty-buffer:4.1.96.Final") testImplementation("com.esotericsoftware:kryo:5.5.0") -// testImplementation("org.lz4:lz4-java:1.8.0") + testImplementation("org.lz4:lz4-java:1.8.0") testImplementation("junit:junit:4.13.2") } diff --git a/src/dorkbox/bytes/HashExtensions.kt b/src/dorkbox/bytes/HashExtensions.kt index 4ff7203..66096af 100644 --- a/src/dorkbox/bytes/HashExtensions.kt +++ b/src/dorkbox/bytes/HashExtensions.kt @@ -15,9 +15,6 @@ */ package dorkbox.bytes -import net.jpountz.xxhash.StreamingXXHash32 -import net.jpountz.xxhash.StreamingXXHash64 -import net.jpountz.xxhash.XXHashFactory import java.io.File import java.io.InputStream import java.security.MessageDigest @@ -126,15 +123,7 @@ object Hash { } } } - internal val xxHashFactory: ThreadLocal by lazy { - ThreadLocal.withInitial { - try { - return@withInitial XXHashFactory.fastestInstance() - } catch (e: NoSuchAlgorithmException) { - throw RuntimeException("Unable to initialize xxHash algorithm. xxHash doesn't exist?!?") - } - } - } + @Deprecated("Do not use this, it is insecure and prone to attack!") val md5 get() = digest1.get() @@ -176,59 +165,7 @@ private fun updateDigest(digest: MessageDigest, data: InputStream, bufferSize: I } } -/** - * Reads an InputStream and updates the digest for the data - */ -private fun updateDigest32(hash32: StreamingXXHash32, data: InputStream, bufferSize: Int, start: Long, length: Long) { - val skipped = data.skip(start) - if (skipped != start) { - throw IllegalArgumentException("Unable to skip $start bytes. Only able to skip $skipped bytes instead") - } - var readLength = length - val adjustedBufferSize = if (bufferSize > readLength) { - readLength.toInt() - } else { - bufferSize - } - - val buffer = ByteArray(adjustedBufferSize) - var read = 1 - while (read > 0 && readLength > 0) { - read = if (adjustedBufferSize > readLength) { - data.read(buffer, 0, readLength.toInt()) - } else { - data.read(buffer, 0, adjustedBufferSize) - } - hash32.update(buffer, 0, read) - readLength -= read - } -} -private fun updateDigest64(hash64: StreamingXXHash64, data: InputStream, bufferSize: Int, start: Long, length: Long) { - val skipped = data.skip(start) - if (skipped != start) { - throw IllegalArgumentException("Unable to skip $start bytes. Only able to skip $skipped bytes instead") - } - - var readLength = length - val adjustedBufferSize = if (bufferSize > readLength) { - readLength.toInt() - } else { - bufferSize - } - - val buffer = ByteArray(adjustedBufferSize) - var read = 1 - while (read > 0 && readLength > 0) { - read = if (adjustedBufferSize > readLength) { - data.read(buffer, 0, readLength.toInt()) - } else { - data.read(buffer, 0, adjustedBufferSize) - } - hash64.update(buffer, 0, read) - readLength -= read - } -} private fun hash(byteArray: ByteArray, start: Int, length: Int, digest: MessageDigest): ByteArray { digest.reset() @@ -287,23 +224,15 @@ fun ByteArray.sha3_512(start: Int = 0, length: Int = this.size): ByteArray = has /** * @param seed used to initialize the hash value (for the xxhash seed), use whatever value you want, but always the same */ -fun ByteArray.xxHash32(seed: Int = -0x31bf6a3c, start: Int = 0, length: Int = this.size): Int { - val xxHash = Hash.xxHashFactory.get() - val hash32 = xxHash.newStreamingHash32(seed)!! - - hash32.update(this, start, length) - return hash32.value +fun ByteArray.xxHash32(start: Int = 0, length: Int = this.size, seed: Int = -0x31bf6a3c): Int { + return LZ4Util.xxHash32(this, start, length, seed) } /** * @param seed used to initialize the hash value (for the xxhash seed), use whatever value you want, but always the same */ -fun ByteArray.xxHash64(seed: Long = -0x31bf6a3c, start: Int = 0, length: Int = this.size): Long { - val xxHash = Hash.xxHashFactory.get() - val hash64 = xxHash.newStreamingHash64(seed)!! - - hash64.update(this, start, length) - return hash64.value +fun ByteArray.xxHash64(start: Int = 0, length: Int = this.size, seed: Long = -0x31bf6a3c): Long { + return LZ4Util.xxHash64(this, start, length, seed) } @@ -346,13 +275,8 @@ fun String.sha3_512(start: Int = 0, length: Int = this.length): ByteArray = hash * * @param seed used to initialize the hash value (for the xxhash seed), use whatever value you want, but always the same */ -fun String.xxHash32(seed: Int = -0x31bf6a3c, start: Int = 0, length: Int = this.length): Int { - val xxHash = Hash.xxHashFactory.get() - val hash32 = xxHash.newStreamingHash32(seed)!! - - val charToBytes = this.toCharArray().toBytes16(start, length) - hash32.update(charToBytes, 0, charToBytes.size) - return hash32.value +fun String.xxHash32(start: Int = 0, length: Int = this.length, seed: Int = -0x31bf6a3c): Int { + return LZ4Util.xxHash32(this, start, length, seed) } /** @@ -361,12 +285,7 @@ fun String.xxHash32(seed: Int = -0x31bf6a3c, start: Int = 0, length: Int = this. * @param seed used to initialize the hash value (for the xxhash seed), use whatever value you want, but always the same */ fun String.xxHash64(seed: Long = -0x31bf6a3c, start: Int = 0, length: Int = this.length): Long { - val xxHash = Hash.xxHashFactory.get() - val hash64 = xxHash.newStreamingHash64(seed)!! - - val charToBytes = this.toCharArray().toBytes16(start, length) - hash64.update(charToBytes, 0, charToBytes.size) - return hash64.value + return LZ4Util.xxHash64(this, start, length, seed) } /** @@ -425,19 +344,12 @@ fun String.sha3_512WithSalt(saltBytes: ByteArray, start: Int = 0, length: Int = * * @param seed used to initialize the hash value (for the xxhash seed), use whatever value you want, but always the same */ -fun String.xxHash32WithSalt(saltBytes: ByteArray, seed: Int = -0x31bf6a3c, start: Int = 0, length: Int = this.length + saltBytes.size): Int { +fun String.xxHash32WithSalt(saltBytes: ByteArray, start: Int = 0, length: Int = this.length + saltBytes.size, seed: Int = -0x31bf6a3c): Int { + require(start >= 0) { "Start ($start) must be >= 0" } require(length >= 0) { "Length ($length) must be >= 0" } require(start < length) { "Start ($start) position must be smaller than the size of the String" } - - val xxHash = Hash.xxHashFactory.get() - val hash32 = xxHash.newStreamingHash32(seed)!! - - val charToBytes = this.toCharArray().toBytes16(start, length) - - hash32.update(charToBytes, 0, charToBytes.size) - hash32.update(saltBytes, 0, saltBytes.size) - return hash32.value + return LZ4Util.xxHash32WithSalt(this, saltBytes, start, length, seed) } /** @@ -445,19 +357,12 @@ fun String.xxHash32WithSalt(saltBytes: ByteArray, seed: Int = -0x31bf6a3c, start * * @param seed used to initialize the hash value (for the xxhash seed), use whatever value you want, but always the same */ -fun String.xxHash64WithSalt(saltBytes: ByteArray, seed: Long = -0x31bf6a3c, start: Int = 0, length: Int = this.length + saltBytes.size): Long { +fun String.xxHash64WithSalt(saltBytes: ByteArray, start: Int = 0, length: Int = this.length + saltBytes.size, seed: Long = -0x31bf6a3c): Long { require(start >= 0) { "Start ($start) must be >= 0" } require(length >= 0) { "Length ($length) must be >= 0" } require(start < length) { "Start ($start) position must be smaller than the size of the String" } - val xxHash = Hash.xxHashFactory.get() - val hash64 = xxHash.newStreamingHash64(seed)!! - - val charToBytes = this.toCharArray().toBytes16(start, length) - - hash64.update(charToBytes, 0, charToBytes.size) - hash64.update(saltBytes, 0, saltBytes.size) - return hash64.value + return LZ4Util.xxHash64WithSalt(this, saltBytes, start, length, seed) } @@ -514,13 +419,8 @@ fun ByteArray.sha3_512WithSalt(saltBytes: ByteArray, start: Int = 0, length: Int * * @param seed used to initialize the hash value (for the xxhash seed), use whatever value you want, but always the same */ -fun ByteArray.xxHash32WithSalt(saltBytes: ByteArray, seed: Int = -0x31bf6a3c, start: Int = 0, length: Int = this.size + saltBytes.size): Int { - val xxHash = Hash.xxHashFactory.get() - val hash32 = xxHash.newStreamingHash32(seed)!! - - hash32.update(this, start, length) - hash32.update(saltBytes, 0, saltBytes.size) - return hash32.value +fun ByteArray.xxHash32WithSalt(saltBytes: ByteArray, start: Int = 0, length: Int = this.size + saltBytes.size, seed: Int = -0x31bf6a3c): Int { + return LZ4Util.xxHash32WithSalt(this, saltBytes, start, length, seed) } /** @@ -528,13 +428,8 @@ fun ByteArray.xxHash32WithSalt(saltBytes: ByteArray, seed: Int = -0x31bf6a3c, st * * @param seed used to initialize the hash value (for the xxhash seed), use whatever value you want, but always the same */ -fun ByteArray.xxHash64WithSalt(saltBytes: ByteArray, seed: Long = -0x31bf6a3c, start: Int = 0, length: Int = this.size + saltBytes.size): Long { - val xxHash = Hash.xxHashFactory.get() - val hash64 = xxHash.newStreamingHash64(seed)!! - - hash64.update(this, start, length) - hash64.update(saltBytes, 0, saltBytes.size) - return hash64.value +fun ByteArray.xxHash64WithSalt(saltBytes: ByteArray, start: Int = 0, length: Int = this.size + saltBytes.size, seed: Long = -0x31bf6a3c): Long { + return LZ4Util.xxHash64WithSalt(this, saltBytes, start, length, seed) } @@ -580,20 +475,15 @@ fun File.sha3_512(start: Int = 0, length: Long = this.length(), bufferSize: Int * @param seed used to initialize the hash value (for the xxhash seed), use whatever value you want, but always the same */ fun File.xxHash32(start: Long = 0L, length: Long = this.length(), bufferSize: Int = 4096, seed: Int = -0x31bf6a3c): Int { - val xxHash = Hash.xxHashFactory.get() - val hash32 = xxHash.newStreamingHash32(seed)!! - require(this.isFile) { "Unable open as file: ${this.absolutePath}" } require(this.canRead()) { "Unable to read file: ${this.absolutePath}" } require(start >= 0) { "Start ($start) must be >= 0" } require(length >= 0) { "Length ($length) must be >= 0" } - require(start < length()) { "Start ($start) position must be smaller than the size of the file" } + require(start < this.length()) { "Start ($start) position must be smaller than the size of the file" } - this.inputStream().use { - updateDigest32(hash32, it, bufferSize, start, length) - return hash32.value - } + + return LZ4Util.xxHash32(this, start, length, bufferSize, seed) } /** @@ -602,25 +492,17 @@ fun File.xxHash32(start: Long = 0L, length: Long = this.length(), bufferSize: In * @param seed used to initialize the hash value (for the xxhash seed), use whatever value you want, but always the same */ fun File.xxHash64(start: Long = 0L, length: Long = this.length(), bufferSize: Int = 4096, seed: Long = -0x31bf6a3c): Long { - val xxHash = Hash.xxHashFactory.get() - val hash64 = xxHash.newStreamingHash64(seed)!! - require(this.isFile) { "Unable open as file: ${this.absolutePath}" } require(this.canRead()) { "Unable to read file: ${this.absolutePath}" } require(start >= 0) { "Start ($start) must be >= 0" } require(length >= 0) { "Length ($length) must be >= 0" } - require(start < length()) { "Start ($start) position must be smaller than the size of the file" } + require(start < this.length()) { "Start ($start) position must be smaller than the size of the file" } - this.inputStream().use { - updateDigest64(hash64, it, bufferSize, start, length) - return hash64.value - } + return LZ4Util.xxHash64(this, start, length, bufferSize, seed) } - - /** * gets the SHA1 hash of the input stream */ @@ -656,19 +538,7 @@ fun InputStream.sha3_512(bufferSize: Int = 4096): ByteArray = hash(this, bufferS * @param seed used to initialize the hash value (for the xxhash seed), use whatever value you want, but always the same */ fun InputStream.xxHash32(bufferSize: Int = 4096, seed: Int = -0x31bf6a3c): Int { - val xxHash = Hash.xxHashFactory.get() - val hash32 = xxHash.newStreamingHash32(seed)!! - - val buffer = ByteArray(bufferSize) - var read: Int - - this.use { - while (it.read(buffer).also { read = it } > 0) { - hash32.update(buffer, 0, read) - } - } - - return hash32.value + return LZ4Util.xxHash32(this, bufferSize, seed) } /** @@ -677,17 +547,5 @@ fun InputStream.xxHash32(bufferSize: Int = 4096, seed: Int = -0x31bf6a3c): Int { * @param seed used to initialize the hash value (for the xxhash seed), use whatever value you want, but always the same */ fun InputStream.xxHash64(bufferSize: Int = 4096, seed: Long = -0x31bf6a3c): Long { - val xxHash = Hash.xxHashFactory.get() - val hash64 = xxHash.newStreamingHash64(seed)!! - - val buffer = ByteArray(bufferSize) - var read: Int - - this.use { - while (it.read(buffer).also { read = it } > 0) { - hash64.update(buffer, 0, read) - } - } - - return hash64.value + return LZ4Util.xxHash64(this, bufferSize, seed) } diff --git a/src/dorkbox/bytes/LZ4Util.kt b/src/dorkbox/bytes/LZ4Util.kt new file mode 100644 index 0000000..93f8a93 --- /dev/null +++ b/src/dorkbox/bytes/LZ4Util.kt @@ -0,0 +1,216 @@ +/* + * Copyright 2023 dorkbox, llc + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package dorkbox.bytes + +import net.jpountz.xxhash.StreamingXXHash32 +import net.jpountz.xxhash.StreamingXXHash64 +import net.jpountz.xxhash.XXHashFactory +import java.io.File +import java.io.InputStream +import java.security.NoSuchAlgorithmException + +object LZ4Util { + private val xxHashFactory: ThreadLocal by lazy { + ThreadLocal.withInitial { + try { + return@withInitial XXHashFactory.fastestInstance() + } catch (e: NoSuchAlgorithmException) { + throw RuntimeException("Unable to initialize xxHash algorithm. xxHash doesn't exist?!?") + } + } + } + + /** + * Reads an InputStream and updates the digest for the data + */ + private fun updateDigest32(hash32: StreamingXXHash32, data: InputStream, bufferSize: Int, start: Long, length: Long) { + val skipped = data.skip(start) + if (skipped != start) { + throw IllegalArgumentException("Unable to skip $start bytes. Only able to skip $skipped bytes instead") + } + + var readLength = length + val adjustedBufferSize = if (bufferSize > readLength) { + readLength.toInt() + } else { + bufferSize + } + + val buffer = ByteArray(adjustedBufferSize) + var read = 1 + while (read > 0 && readLength > 0) { + read = if (adjustedBufferSize > readLength) { + data.read(buffer, 0, readLength.toInt()) + } else { + data.read(buffer, 0, adjustedBufferSize) + } + hash32.update(buffer, 0, read) + readLength -= read + } + } + private fun updateDigest64(hash64: StreamingXXHash64, data: InputStream, bufferSize: Int, start: Long, length: Long) { + val skipped = data.skip(start) + if (skipped != start) { + throw IllegalArgumentException("Unable to skip $start bytes. Only able to skip $skipped bytes instead") + } + + var readLength = length + val adjustedBufferSize = if (bufferSize > readLength) { + readLength.toInt() + } else { + bufferSize + } + + val buffer = ByteArray(adjustedBufferSize) + var read = 1 + while (read > 0 && readLength > 0) { + read = if (adjustedBufferSize > readLength) { + data.read(buffer, 0, readLength.toInt()) + } else { + data.read(buffer, 0, adjustedBufferSize) + } + hash64.update(buffer, 0, read) + readLength -= read + } + } + + fun xxHash32(file: File, start: Long, length: Long, bufferSize: Int, seed: Int): Int { + val xxHash = xxHashFactory.get() + val hash32 = xxHash.newStreamingHash32(seed)!! + + file.inputStream().use { + updateDigest32(hash32, it, bufferSize, start, length) + return hash32.value + } + } + + fun xxHash64(file: File, start: Long, length: Long, bufferSize: Int, seed: Long): Long { + val xxHash = xxHashFactory.get() + val hash64 = xxHash.newStreamingHash64(seed)!! + + file.inputStream().use { + updateDigest64(hash64, it, bufferSize, start, length) + return hash64.value + } + } + + fun xxHash32(byteArray: ByteArray, start: Int, length: Int, seed: Int): Int { + val xxHash = xxHashFactory.get() + val hash32 = xxHash.newStreamingHash32(seed)!! + + hash32.update(byteArray, start, length) + return hash32.value + } + + fun xxHash64(byteArray: ByteArray, start: Int, length: Int, seed: Long): Long { + val xxHash = xxHashFactory.get() + val hash64 = xxHash.newStreamingHash64(seed)!! + + hash64.update(byteArray, start, length) + return hash64.value + } + + fun xxHash32(string: String, start: Int, length: Int, seed: Int): Int { + val xxHash = xxHashFactory.get() + val hash32 = xxHash.newStreamingHash32(seed)!! + + val charToBytes = string.toCharArray().toBytes16(start, length) + hash32.update(charToBytes, 0, charToBytes.size) + return hash32.value + } + + fun xxHash64(string: String, start: Int, length: Int, seed: Long): Long { + val xxHash = xxHashFactory.get() + val hash64 = xxHash.newStreamingHash64(seed)!! + + val charToBytes = string.toCharArray().toBytes16(start, length) + hash64.update(charToBytes, 0, charToBytes.size) + return hash64.value + } + + fun xxHash32WithSalt(string: String, saltBytes: ByteArray, start: Int, length: Int, seed: Int): Int { + val xxHash = xxHashFactory.get() + val hash32 = xxHash.newStreamingHash32(seed)!! + + val charToBytes = string.toCharArray().toBytes16(start, length) + + hash32.update(charToBytes, 0, charToBytes.size) + hash32.update(saltBytes, 0, saltBytes.size) + return hash32.value + } + + fun xxHash64WithSalt(string: String, saltBytes: ByteArray, start: Int, length: Int, seed: Long): Long { + val xxHash = xxHashFactory.get() + val hash64 = xxHash.newStreamingHash64(seed)!! + + val charToBytes = string.toCharArray().toBytes16(start, length) + + hash64.update(charToBytes, 0, charToBytes.size) + hash64.update(saltBytes, 0, saltBytes.size) + return hash64.value + } + + fun xxHash32WithSalt(string: ByteArray, saltBytes: ByteArray, start: Int, length: Int, seed: Int): Int { + val xxHash = xxHashFactory.get() + val hash32 = xxHash.newStreamingHash32(seed)!! + + hash32.update(string, start, length) + hash32.update(saltBytes, 0, saltBytes.size) + return hash32.value + } + + fun xxHash64WithSalt(string: ByteArray, saltBytes: ByteArray, start: Int, length: Int, seed: Long): Long { + val xxHash = xxHashFactory.get() + val hash64 = xxHash.newStreamingHash64(seed)!! + + hash64.update(string, start, length) + hash64.update(saltBytes, 0, saltBytes.size) + return hash64.value + } + + fun xxHash32(inputStream: InputStream, bufferSize: Int, seed: Int): Int { + val xxHash = xxHashFactory.get() + val hash32 = xxHash.newStreamingHash32(seed)!! + + val buffer = ByteArray(bufferSize) + var read: Int + + inputStream.use { + while (it.read(buffer).also { read = it } > 0) { + hash32.update(buffer, 0, read) + } + } + + return hash32.value + } + + fun xxHash64(inputStream: InputStream, bufferSize: Int, seed: Long): Long { + val xxHash = xxHashFactory.get() + val hash64 = xxHash.newStreamingHash64(seed)!! + + val buffer = ByteArray(bufferSize) + var read: Int + + inputStream.use { + while (it.read(buffer).also { read = it } > 0) { + hash64.update(buffer, 0, read) + } + } + + return hash64.value + } +} diff --git a/src9/module-info.java b/src9/module-info.java index bcfe12d..b04e8c8 100644 --- a/src9/module-info.java +++ b/src9/module-info.java @@ -1,4 +1,4 @@ -module dorkbox.bytes { +module dorkbox.byteUtils { exports dorkbox.bytes; requires transitive dorkbox.updates; @@ -6,6 +6,7 @@ module dorkbox.bytes { requires static com.esotericsoftware.kryo; requires static io.netty.common; requires static io.netty.buffer; + requires static org.lz4.java; requires transitive kotlin.stdlib; }