Cleaned up dns util
parent
60993ddb81
commit
bef18b3fdd
|
@ -155,13 +155,12 @@ object Dns {
|
|||
this.listTypes = mutableListOf()
|
||||
var domainType: DomainType? = null
|
||||
|
||||
var exceptions: MutableSet<String>? = null
|
||||
var rules: MutableSet<String>? = null
|
||||
var wildcards: MutableSet<String>? = null
|
||||
var exceptions: MutableSet<String> = mutableSetOf()
|
||||
var rules: MutableSet<String> = mutableSetOf()
|
||||
var wildcards: MutableSet<String> = mutableSetOf()
|
||||
|
||||
// now load this file into memory, so it's faster to process.
|
||||
val tldResource = Dns.javaClass.getResourceAsStream("/public_suffix_list.dat")
|
||||
|
||||
tldResource?.bufferedReader()?.useLines { lines ->
|
||||
lines.forEach { line ->
|
||||
if (line.isEmpty()) {
|
||||
|
@ -177,22 +176,12 @@ object Dns {
|
|||
}
|
||||
} else {
|
||||
if (line.contains("===END ICANN DOMAINS===") || line.contains("===END PRIVATE DOMAINS===")) {
|
||||
if (rules == null) {
|
||||
rules = mutableSetOf()
|
||||
}
|
||||
if (exceptions == null) {
|
||||
exceptions = mutableSetOf()
|
||||
}
|
||||
if (wildcards == null) {
|
||||
wildcards = mutableSetOf()
|
||||
}
|
||||
|
||||
listTypes.add(PublicSuffixList(domainType!!, rules!!, exceptions!!, wildcards!!))
|
||||
listTypes.add(PublicSuffixList(domainType!!, rules, exceptions, wildcards))
|
||||
|
||||
domainType = null
|
||||
rules = null
|
||||
exceptions = null
|
||||
wildcards = null
|
||||
rules = mutableSetOf()
|
||||
exceptions = mutableSetOf()
|
||||
wildcards = mutableSetOf()
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -217,33 +206,17 @@ object Dns {
|
|||
// *.kawasaki.jp
|
||||
//!city.kawasaki.jp
|
||||
line = line.substring(1)
|
||||
|
||||
if (exceptions == null) {
|
||||
exceptions = mutableSetOf()
|
||||
}
|
||||
|
||||
exceptions!!.add(line)
|
||||
exceptions.add(line)
|
||||
} else if (line.startsWith("*")) {
|
||||
// *.kawasaki.jp
|
||||
// motors.kawasaki.jp IS A TLD
|
||||
// kawasaki.jp IS NOT a TLD
|
||||
// city.kawasaki.jp IS NOT a TLD (!city.kawasaki.jp is a rule)
|
||||
line = line.substring(2)
|
||||
|
||||
if (wildcards == null) {
|
||||
wildcards = mutableSetOf()
|
||||
}
|
||||
|
||||
|
||||
wildcards!!.add(line)
|
||||
wildcards.add(line)
|
||||
} else {
|
||||
// this is a normal rule
|
||||
|
||||
if (rules == null) {
|
||||
rules = mutableSetOf()
|
||||
}
|
||||
|
||||
rules!!.add(line)
|
||||
rules.add(line)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
@ -0,0 +1,156 @@
|
|||
/*
|
||||
* Copyright 2023 dorkbox, llc
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
/*
|
||||
* Copyright (c) 2005-2012, Paul Tuckey
|
||||
* All rights reserved.
|
||||
* ====================================================================
|
||||
* Licensed under the BSD License. Text as follows.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
* are met:
|
||||
*
|
||||
* - Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
* - Redistributions in binary form must reproduce the above
|
||||
* copyright notice, this list of conditions and the following
|
||||
* disclaimer in the documentation and/or other materials provided
|
||||
* with the distribution.
|
||||
* - Neither the name tuckey.org nor the names of its contributors
|
||||
* may be used to endorse or promote products derived from this
|
||||
* software without specific prior written permission.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
|
||||
* FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
|
||||
* COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
|
||||
* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
|
||||
* BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
|
||||
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
|
||||
* CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
|
||||
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
|
||||
* ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
* POSSIBILITY OF SUCH DAMAGE.
|
||||
* ====================================================================
|
||||
*
|
||||
* https://www.talisman.org/%7Eerlkonig/misc/lunatech%5Ewhat-every-webdev-must-know-about-url-encoding/
|
||||
*/
|
||||
package dorkbox.netUtil.web
|
||||
|
||||
|
||||
import java.io.UnsupportedEncodingException
|
||||
import java.net.URISyntaxException
|
||||
import java.nio.charset.Charset
|
||||
|
||||
object URLDecoder {
|
||||
private const val byte_0 = '0'.code.toByte()
|
||||
private const val byte_1 = '1'.code.toByte()
|
||||
private const val byte_2 = '2'.code.toByte()
|
||||
private const val byte_3 = '3'.code.toByte()
|
||||
private const val byte_4 = '4'.code.toByte()
|
||||
private const val byte_5 = '5'.code.toByte()
|
||||
private const val byte_6 = '6'.code.toByte()
|
||||
private const val byte_7= '7'.code.toByte()
|
||||
private const val byte_8 = '8'.code.toByte()
|
||||
private const val byte_9 = '9'.code.toByte()
|
||||
private const val byte_a = 'a'.code.toByte()
|
||||
private const val byte_b = 'b'.code.toByte()
|
||||
private const val byte_c = 'c'.code.toByte()
|
||||
private const val byte_d = 'd'.code.toByte()
|
||||
private const val byte_e = 'e'.code.toByte()
|
||||
private const val byte_f = 'f'.code.toByte()
|
||||
private const val byte_A = 'A'.code.toByte()
|
||||
private const val byteB = 'B'.code.toByte()
|
||||
private const val byteC = 'C'.code.toByte()
|
||||
private const val byte_D= 'D'.code.toByte()
|
||||
private const val byte_E= 'E'.code.toByte()
|
||||
private const val byte_F = 'F'.code.toByte()
|
||||
|
||||
@Throws(URISyntaxException::class)
|
||||
fun decodeURL(url: String, charset: Charset): String {
|
||||
val queryPart = url.indexOf('?')
|
||||
var query: String? = null
|
||||
var path = url
|
||||
if (queryPart != -1) {
|
||||
query = url.substring(queryPart + 1)
|
||||
path = url.substring(0, queryPart)
|
||||
}
|
||||
val decodedPath = decodePath(path, charset)
|
||||
return if (query != null) decodedPath + '?' + decodeQuery(query, charset) else decodedPath
|
||||
}
|
||||
|
||||
@Throws(URISyntaxException::class)
|
||||
fun decodePath(path: String, charset: Charset): String {
|
||||
return decodeURLEncoded(path, false, charset)
|
||||
}
|
||||
|
||||
@Throws(URISyntaxException::class)
|
||||
fun decodeQuery(query: String, charset: Charset): String {
|
||||
return decodeURLEncoded(query, true, charset)
|
||||
}
|
||||
|
||||
@Throws(URISyntaxException::class)
|
||||
fun decodeURLEncoded(part: String, query: Boolean, charset: Charset): String {
|
||||
return try {
|
||||
val ascii = part.toByteArray(Charsets.US_ASCII)
|
||||
val decoded = ByteArray(ascii.size)
|
||||
var j = 0
|
||||
var i = 0
|
||||
while (i < ascii.size) {
|
||||
if (ascii[i] == '%'.code.toByte()) {
|
||||
if (i + 2 >= ascii.size) throw URISyntaxException(part, "Invalid URL-encoded string at char $i")
|
||||
// get the next two bytes
|
||||
val first = ascii[++i]
|
||||
val second = ascii[++i]
|
||||
decoded[j] = (hexToByte(first) * 16 + hexToByte(second)).toByte()
|
||||
} else if (query && ascii[i] == '+'.code.toByte()) decoded[j] = ' '.code.toByte() else decoded[j] = ascii[i]
|
||||
i++
|
||||
j++
|
||||
}
|
||||
// now decode
|
||||
String(decoded, 0, j, charset)
|
||||
} catch (x: UnsupportedEncodingException) {
|
||||
throw URISyntaxException(part, "Invalid encoding: $charset")
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
|
||||
@Throws(URISyntaxException::class)
|
||||
private fun hexToByte(b: Byte): Byte {
|
||||
when (b) {
|
||||
byte_0 -> return 0
|
||||
byte_1 -> return 1
|
||||
byte_2 -> return 2
|
||||
byte_3 -> return 3
|
||||
byte_4 -> return 4
|
||||
byte_5 -> return 5
|
||||
byte_6 -> return 6
|
||||
byte_7 -> return 7
|
||||
byte_8 -> return 8
|
||||
byte_9 -> return 9
|
||||
byte_a, byte_A -> return 10
|
||||
byte_b, byteB -> return 11
|
||||
byte_c, byteC -> return 12
|
||||
byte_d, byte_D -> return 13
|
||||
byte_e, byte_E -> return 14
|
||||
byte_f, byte_F -> return 15
|
||||
}
|
||||
throw URISyntaxException(b.toString(), "Invalid URL-encoded string")
|
||||
}
|
||||
}
|
|
@ -0,0 +1,264 @@
|
|||
/*
|
||||
* Copyright 2023 dorkbox, llc
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
/*
|
||||
* Copyright (c) 2005-2012, Paul Tuckey
|
||||
* All rights reserved.
|
||||
* ====================================================================
|
||||
* Licensed under the BSD License. Text as follows.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
* are met:
|
||||
*
|
||||
* - Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
* - Redistributions in binary form must reproduce the above
|
||||
* copyright notice, this list of conditions and the following
|
||||
* disclaimer in the documentation and/or other materials provided
|
||||
* with the distribution.
|
||||
* - Neither the name tuckey.org nor the names of its contributors
|
||||
* may be used to endorse or promote products derived from this
|
||||
* software without specific prior written permission.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
|
||||
* FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
|
||||
* COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
|
||||
* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
|
||||
* BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
|
||||
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
|
||||
* CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
|
||||
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
|
||||
* ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
* POSSIBILITY OF SUCH DAMAGE.
|
||||
* ====================================================================
|
||||
*
|
||||
* https://www.talisman.org/%7Eerlkonig/misc/lunatech%5Ewhat-every-webdev-must-know-about-url-encoding/
|
||||
*/
|
||||
|
||||
package dorkbox.netUtil.web
|
||||
|
||||
import java.io.UnsupportedEncodingException
|
||||
import java.nio.charset.Charset
|
||||
import java.util.*
|
||||
|
||||
/**
|
||||
* URL-encoding utility for each URL part according to the RFC specs
|
||||
* see the rfc at http://www.ietf.org/rfc/rfc2396.txt
|
||||
*
|
||||
* @author stephane
|
||||
*/
|
||||
object URLEncoder {
|
||||
/**
|
||||
* mark = "-" | "_" | "." | "!" | "~" | "*" | "'" | "(" | ")"
|
||||
*/
|
||||
val MARK = BitSet()
|
||||
|
||||
init {
|
||||
MARK.set('-'.code)
|
||||
MARK.set('_'.code)
|
||||
MARK.set('.'.code)
|
||||
MARK.set('!'.code)
|
||||
MARK.set('~'.code)
|
||||
MARK.set('*'.code)
|
||||
MARK.set('\''.code)
|
||||
MARK.set('('.code)
|
||||
MARK.set(')'.code)
|
||||
}
|
||||
|
||||
/**
|
||||
* lowalpha = "a" | "b" | "c" | "d" | "e" | "f" | "g" | "h" | "i" | "j" | "k" | "l" | "m" | "n" | "o" | "p" | "q" |
|
||||
* "r" | "s" | "t" | "u" | "v" | "w" | "x" | "y" | "z"
|
||||
*/
|
||||
val LOW_ALPHA = BitSet()
|
||||
|
||||
init {
|
||||
LOW_ALPHA.set('a'.code)
|
||||
LOW_ALPHA.set('b'.code)
|
||||
LOW_ALPHA.set('c'.code)
|
||||
LOW_ALPHA.set('d'.code)
|
||||
LOW_ALPHA.set('e'.code)
|
||||
LOW_ALPHA.set('f'.code)
|
||||
LOW_ALPHA.set('g'.code)
|
||||
LOW_ALPHA.set('h'.code)
|
||||
LOW_ALPHA.set('i'.code)
|
||||
LOW_ALPHA.set('j'.code)
|
||||
LOW_ALPHA.set('k'.code)
|
||||
LOW_ALPHA.set('l'.code)
|
||||
LOW_ALPHA.set('m'.code)
|
||||
LOW_ALPHA.set('n'.code)
|
||||
LOW_ALPHA.set('o'.code)
|
||||
LOW_ALPHA.set('p'.code)
|
||||
LOW_ALPHA.set('q'.code)
|
||||
LOW_ALPHA.set('r'.code)
|
||||
LOW_ALPHA.set('s'.code)
|
||||
LOW_ALPHA.set('t'.code)
|
||||
LOW_ALPHA.set('u'.code)
|
||||
LOW_ALPHA.set('v'.code)
|
||||
LOW_ALPHA.set('w'.code)
|
||||
LOW_ALPHA.set('x'.code)
|
||||
LOW_ALPHA.set('y'.code)
|
||||
LOW_ALPHA.set('z'.code)
|
||||
}
|
||||
|
||||
/**
|
||||
* upalpha = "A" | "B" | "C" | "D" | "E" | "F" | "G" | "H" | "I" | "J" | "K" | "L" | "M" | "N" | "O" | "P" | "Q" |
|
||||
* "R" | "S" | "T" | "U" | "V" | "W" | "X" | "Y" | "Z"
|
||||
*/
|
||||
val UP_ALPHA = BitSet()
|
||||
|
||||
init {
|
||||
UP_ALPHA.set('A'.code)
|
||||
UP_ALPHA.set('B'.code)
|
||||
UP_ALPHA.set('C'.code)
|
||||
UP_ALPHA.set('D'.code)
|
||||
UP_ALPHA.set('E'.code)
|
||||
UP_ALPHA.set('F'.code)
|
||||
UP_ALPHA.set('G'.code)
|
||||
UP_ALPHA.set('H'.code)
|
||||
UP_ALPHA.set('I'.code)
|
||||
UP_ALPHA.set('J'.code)
|
||||
UP_ALPHA.set('K'.code)
|
||||
UP_ALPHA.set('L'.code)
|
||||
UP_ALPHA.set('M'.code)
|
||||
UP_ALPHA.set('N'.code)
|
||||
UP_ALPHA.set('O'.code)
|
||||
UP_ALPHA.set('P'.code)
|
||||
UP_ALPHA.set('Q'.code)
|
||||
UP_ALPHA.set('R'.code)
|
||||
UP_ALPHA.set('S'.code)
|
||||
UP_ALPHA.set('T'.code)
|
||||
UP_ALPHA.set('U'.code)
|
||||
UP_ALPHA.set('V'.code)
|
||||
UP_ALPHA.set('W'.code)
|
||||
UP_ALPHA.set('X'.code)
|
||||
UP_ALPHA.set('Y'.code)
|
||||
UP_ALPHA.set('Z'.code)
|
||||
}
|
||||
|
||||
/**
|
||||
* alpha = lowalpha | upalpha
|
||||
*/
|
||||
val ALPHA = BitSet()
|
||||
|
||||
init {
|
||||
ALPHA.or(LOW_ALPHA)
|
||||
ALPHA.or(UP_ALPHA)
|
||||
}
|
||||
|
||||
/**
|
||||
* digit = "0" | "1" | "2" | "3" | "4" | "5" | "6" | "7" | "8" | "9"
|
||||
*/
|
||||
val DIGIT = BitSet()
|
||||
|
||||
init {
|
||||
DIGIT.set('0'.code)
|
||||
DIGIT.set('1'.code)
|
||||
DIGIT.set('2'.code)
|
||||
DIGIT.set('3'.code)
|
||||
DIGIT.set('4'.code)
|
||||
DIGIT.set('5'.code)
|
||||
DIGIT.set('6'.code)
|
||||
DIGIT.set('7'.code)
|
||||
DIGIT.set('8'.code)
|
||||
DIGIT.set('9'.code)
|
||||
}
|
||||
|
||||
/**
|
||||
* alphanum = alpha | digit
|
||||
*/
|
||||
val ALPHANUM = BitSet()
|
||||
|
||||
init {
|
||||
ALPHANUM.or(ALPHA)
|
||||
ALPHANUM.or(DIGIT)
|
||||
}
|
||||
|
||||
/**
|
||||
* unreserved = alphanum | mark
|
||||
*/
|
||||
val UNRESERVED = BitSet()
|
||||
|
||||
init {
|
||||
UNRESERVED.or(ALPHANUM)
|
||||
UNRESERVED.or(MARK)
|
||||
}
|
||||
|
||||
/**
|
||||
* pchar = unreserved | escaped | ":" | "@" | "&" | "=" | "+" | "$" | ","
|
||||
*
|
||||
*
|
||||
* Note: we don't allow escaped here since we will escape it ourselves, so we don't want to allow them in the
|
||||
* unescaped sequences
|
||||
*/
|
||||
val PCHAR = BitSet()
|
||||
|
||||
init {
|
||||
PCHAR.or(UNRESERVED)
|
||||
PCHAR.set(':'.code)
|
||||
PCHAR.set('@'.code)
|
||||
PCHAR.set('&'.code)
|
||||
PCHAR.set('='.code)
|
||||
PCHAR.set('+'.code)
|
||||
PCHAR.set('$'.code)
|
||||
PCHAR.set(','.code)
|
||||
}
|
||||
|
||||
/**
|
||||
* Encodes a string to be a valid path parameter URL, which means it can contain PCHAR* only (do not put the leading
|
||||
* ";" or it will be escaped.
|
||||
*
|
||||
* @throws UnsupportedEncodingException
|
||||
*/
|
||||
@Throws(UnsupportedEncodingException::class)
|
||||
fun encodePathParam(pathParam: String, charset: Charset): String {
|
||||
return encodePathSegment(pathParam, charset)
|
||||
}
|
||||
|
||||
/**
|
||||
* Encodes a string to be a valid path segment URL, which means it can contain PCHAR* only (do not put path
|
||||
* parameters or they will be escaped.
|
||||
*
|
||||
* @throws UnsupportedEncodingException
|
||||
*/
|
||||
@Throws(UnsupportedEncodingException::class)
|
||||
fun encodePathSegment(pathSegment: String, charset: Charset): String {
|
||||
// start at *3 for the worst case when everything is %encoded on one byte
|
||||
val encoded = StringBuffer(pathSegment.length * 3)
|
||||
val toEncode = pathSegment.toCharArray()
|
||||
|
||||
for (i in toEncode.indices) {
|
||||
val c = toEncode[i]
|
||||
if (PCHAR[c.code]) {
|
||||
encoded.append(c)
|
||||
} else {
|
||||
val bytes = c.toString().toByteArray(charset)
|
||||
for (j in bytes.indices) {
|
||||
val b = bytes[j]
|
||||
// make it unsigned (safe, since we only goto max 255, but makes conversion to hex easier)
|
||||
val u8: Int = b.toInt() and 0xFF
|
||||
encoded.append("%")
|
||||
if (u8 < 16) encoded.append("0")
|
||||
encoded.append(Integer.toHexString(u8))
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return encoded.toString()
|
||||
}
|
||||
}
|
|
@ -0,0 +1,689 @@
|
|||
/*
|
||||
* Copyright 2023 dorkbox, llc
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
package dorkbox.netUtil.web
|
||||
|
||||
|
||||
import dorkbox.netUtil.Dns
|
||||
import kotlinx.coroutines.Dispatchers
|
||||
import kotlinx.coroutines.withContext
|
||||
import java.io.InputStream
|
||||
import java.net.HttpURLConnection
|
||||
import java.net.URL
|
||||
import java.net.URLDecoder
|
||||
import java.net.UnknownHostException
|
||||
import java.security.cert.X509Certificate
|
||||
import java.util.regex.*
|
||||
import javax.net.ssl.HostnameVerifier
|
||||
import javax.net.ssl.HttpsURLConnection
|
||||
import javax.net.ssl.SSLContext
|
||||
import javax.net.ssl.TrustManager
|
||||
import javax.net.ssl.X509TrustManager
|
||||
|
||||
@Suppress("unused")
|
||||
object WebUtil {
|
||||
private val SECOND_LEVEL_DOMAIN_PATTERN = Pattern.compile("^(https?:\\/\\/)?([\\dA-Za-z\\.-]+)\\.([a-z\\.]{2,6})([\\w \\.-]*)*$")
|
||||
|
||||
/**
|
||||
* Regular expression to match all IANA top-level domains.
|
||||
* List accurate as of 2010/02/05. List taken from:
|
||||
* http://data.iana.org/TLD/tlds-alpha-by-domain.txt
|
||||
* This pattern is auto-generated by frameworks/base/common/tools/make-iana-tld-pattern.py
|
||||
*/
|
||||
@Volatile
|
||||
private var TOP_LEVEL_DOMAIN_STR_FOR_WEB_URL = ("((aaa|aarp|abarth|abb|abbott|abbvie|abc|able|abogado|abudhabi|academy|accenture|accountant|accountants|aco|actor|adac|ads|adult|aeg|aero|aetna|afamilycompany|afl|africa|agakhan|agency|aig|airbus|airforce|airtel|akdn|alfaromeo|alibaba|alipay|allfinanz|allstate|ally|alsace|alstom|amazon|americanexpress|americanfamily|amex|amfam|amica|amsterdam|analytics|android|anquan|anz|aol|apartments|app|apple|aquarelle|arab|aramco|archi|army|arpa|art|arte|asda|asia|associates|athleta|attorney|auction|audi|audible|audio|auspost|author|auto|autos|avianca|aws|axa|azure|a[cdefgilmoqrstuwxz])"
|
||||
+ "|(baby|baidu|banamex|bananarepublic|band|bank|bar|barcelona|barclaycard|barclays|barefoot|bargains|baseball|basketball|bauhaus|bayern|bbc|bbt|bbva|bcg|bcn|beats|beauty|beer|bentley|berlin|best|bestbuy|bet|bharti|bible|bid|bike|bing|bingo|bio|biz|black|blackfriday|blockbuster|blog|bloomberg|blue|bms|bmw|bnpparibas|boats|boehringer|bofa|bom|bond|boo|book|booking|bosch|bostik|boston|bot|boutique|box|bradesco|bridgestone|broadway|broker|brother|brussels|budapest|bugatti|build|builders|business|buy|buzz|bzh|b[abdefghijmnorstvwyz])"
|
||||
+ "|(cab|cafe|cal|call|calvinklein|cam|camera|camp|cancerresearch|canon|capetown|capital|capitalone|car|caravan|cards|care|career|careers|cars|casa|case|caseih|cash|casino|cat|catering|catholic|cba|cbn|cbre|cbs|ceb|center|ceo|cern|cfa|cfd|chanel|channel|charity|chase|chat|cheap|chintai|christmas|chrome|church|cipriani|circle|cisco|citadel|citi|citic|city|cityeats|claims|cleaning|click|clinic|clinique|clothing|cloud|club|clubmed|coach|codes|coffee|college|cologne|com|comcast|commbank|community|company|compare|computer|comsec|condos|construction|consulting|contact|contractors|cooking|cookingchannel|cool|coop|corsica|country|coupon|coupons|courses|cpa|credit|creditcard|creditunion|cricket|crown|crs|cruise|cruises|csc|cuisinella|cymru|cyou|c[acdfghiklmnoruvwxyz])"
|
||||
+ "|(dabur|dad|dance|data|date|dating|datsun|day|dclk|dds|deal|dealer|deals|degree|delivery|dell|deloitte|delta|democrat|dental|dentist|desi|design|dev|dhl|diamonds|diet|digital|direct|directory|discount|discover|dish|diy|dnp|docs|doctor|dog|domains|dot|download|drive|dtv|dubai|duck|dunlop|dupont|durban|dvag|dvr|d[ejkmoz])"
|
||||
+ "|(earth|eat|eco|edeka|edu|education|email|emerck|energy|engineer|engineering|enterprises|epson|equipment|ericsson|erni|esq|estate|etisalat|eurovision|eus|events|exchange|expert|exposed|express|extraspace|e[cegrstu])"
|
||||
+ "|(fage|fail|fairwinds|faith|family|fan|fans|farm|farmers|fashion|fast|fedex|feedback|ferrari|ferrero|fiat|fidelity|fido|film|final|finance|financial|fire|firestone|firmdale|fish|fishing|fit|fitness|flickr|flights|flir|florist|flowers|fly|foo|food|foodnetwork|football|ford|forex|forsale|forum|foundation|fox|free|fresenius|frl|frogans|frontdoor|frontier|ftr|fujitsu|fujixerox|fun|fund|furniture|futbol|fyi|f[ijkmor])"
|
||||
+ "|(gal|gallery|gallo|gallup|game|games|gap|garden|gay|gbiz|gdn|gea|gent|genting|george|ggee|gift|gifts|gives|giving|glade|glass|gle|global|globo|gmail|gmbh|gmo|gmx|godaddy|gold|goldpoint|golf|goo|goodyear|goog|google|gop|got|gov|grainger|graphics|gratis|green|gripe|grocery|group|guardian|gucci|guge|guide|guitars|guru|g[abdefghilmnpqrstuwy])"
|
||||
+ "|(hair|hamburg|hangout|haus|hbo|hdfc|hdfcbank|health|healthcare|help|helsinki|here|hermes|hgtv|hiphop|hisamitsu|hitachi|hiv|hkt|hockey|holdings|holiday|homedepot|homegoods|homes|homesense|honda|horse|hospital|host|hosting|hot|hoteles|hotels|hotmail|house|how|hsbc|hughes|hyatt|hyundai|h[kmnrtu])"
|
||||
+ "|(ibm|icbc|ice|icu|ieee|ifm|ikano|imamat|imdb|immo|immobilien|inc|industries|infiniti|info|ing|ink|institute|insurance|insure|int|intel|international|intuit|investments|ipiranga|irish|ismaili|ist|istanbul|itau|itv|iveco|i[delmnoqrst])"
|
||||
+ "|(jaguar|java|jcb|jcp|jeep|jetzt|jewelry|jio|jll|jmp|jnj|jobs|joburg|jot|joy|jpmorgan|jprs|juegos|juniper|j[emop])"
|
||||
+ "|(kaufen|kddi|kerryhotels|kerrylogistics|kerryproperties|kfh|kia|kim|kinder|kindle|kitchen|kiwi|koeln|komatsu|kosher|kpmg|kpn|krd|kred|kuokgroup|kyoto|k[eghimnprwyz])"
|
||||
+ "|(lacaixa|lamborghini|lamer|lancaster|lancia|land|landrover|lanxess|lasalle|lat|latino|latrobe|law|lawyer|lds|lease|leclerc|lefrak|legal|lego|lexus|lgbt|lidl|life|lifeinsurance|lifestyle|lighting|like|lilly|limited|limo|lincoln|linde|link|lipsy|live|living|lixil|llc|llp|loan|loans|locker|locus|loft|lol|london|lotte|lotto|love|lpl|lplfinancial|ltd|ltda|lundbeck|lupin|luxe|luxury|l[abcikrstuvy])"
|
||||
+ "|(macys|madrid|maif|maison|makeup|man|management|mango|map|market|marketing|markets|marriott|marshalls|maserati|mattel|mba|mckinsey|med|media|meet|melbourne|meme|memorial|men|menu|merckmsd|miami|microsoft|mil|mini|mint|mit|mitsubishi|mlb|mls|mma|mobi|mobile|moda|moe|moi|mom|monash|money|monster|mormon|mortgage|moscow|moto|motorcycles|mov|movie|msd|mtn|mtr|museum|mutual|m[acdeghklmnopqrstuvwxyz])"
|
||||
+ "|(nab|nagoya|name|nationwide|natura|navy|nba|nec|net|netbank|netflix|network|neustar|new|newholland|news|next|nextdirect|nexus|nfl|ngo|nhk|nico|nike|nikon|ninja|nissan|nissay|nokia|northwesternmutual|norton|now|nowruz|nowtv|nra|nrw|ntt|nyc|n[acefgilopruz])"
|
||||
+ "|(obi|observer|off|office|okinawa|olayan|olayangroup|oldnavy|ollo|omega|one|ong|onl|online|onyourside|ooo|open|oracle|orange|org|organic|origins|osaka|otsuka|ott|ovh|om)"
|
||||
+ "|(page|panasonic|paris|pars|partners|parts|party|passagens|pay|pccw|pet|pfizer|pharmacy|phd|philips|phone|photo|photography|photos|physio|pics|pictet|pictures|pid|pin|ping|pink|pioneer|pizza|place|play|playstation|plumbing|plus|pnc|pohl|poker|politie|porn|post|pramerica|praxi|press|prime|pro|prod|productions|prof|progressive|promo|properties|property|protection|pru|prudential|pub|pwc|p[aefghklmnrstwy])"
|
||||
+ "|(qpon|quebec|quest|qvc|qa)"
|
||||
+ "|(racing|radio|raid|read|realestate|realtor|realty|recipes|red|redstone|redumbrella|rehab|reise|reisen|reit|reliance|ren|rent|rentals|repair|report|republican|rest|restaurant|review|reviews|rexroth|rich|richardli|ricoh|ril|rio|rip|rmit|rocher|rocks|rodeo|rogers|room|rsvp|rugby|ruhr|run|rwe|ryukyu|r[eosuw])"
|
||||
+ "|(saarland|safe|safety|sakura|sale|salon|samsclub|samsung|sandvik|sandvikcoromant|sanofi|sap|sarl|sas|save|saxo|sbi|sbs|sca|scb|schaeffler|schmidt|scholarships|school|schule|schwarz|science|scjohnson|scot|search|seat|secure|security|seek|select|sener|services|ses|seven|sew|sex|sexy|sfr|shangrila|sharp|shaw|shell|shia|shiksha|shoes|shop|shopping|shouji|show|showtime|shriram|silk|sina|singles|site|ski|skin|sky|skype|sling|smart|smile|sncf|soccer|social|softbank|software|sohu|solar|solutions|song|sony|soy|space|sport|spot|spreadbetting|srl|stada|staples|star|statebank|statefarm|stc|stcgroup|stockholm|storage|store|stream|studio|study|style|sucks|supplies|supply|support|surf|surgery|suzuki|swatch|swiftcover|swiss|sydney|systems|s[abcdeghijklmnorstuvxyz])"
|
||||
+ "|(tab|taipei|talk|taobao|target|tatamotors|tatar|tattoo|tax|taxi|tci|tdk|team|tech|technology|tel|temasek|tennis|teva|thd|theater|theatre|tiaa|tickets|tienda|tiffany|tips|tires|tirol|tjmaxx|tjx|tkmaxx|tmall|today|tokyo|tools|top|toray|toshiba|total|tours|town|toyota|toys|trade|trading|training|travel|travelchannel|travelers|travelersinsurance|trust|trv|tube|tui|tunes|tushu|tvs|t[cdfghjklmnortvwz])"
|
||||
+ "|(ubank|ubs|unicom|university|uno|uol|ups|u[agksyz])"
|
||||
+ "|(vacations|vana|vanguard|vegas|ventures|verisign|versicherung|vet|viajes|video|vig|viking|villas|vin|vip|virgin|visa|vision|viva|vivo|vlaanderen|vodka|volkswagen|volvo|vote|voting|voto|voyage|vuelos|v[aceginu])"
|
||||
+ "|(wales|walmart|walter|wang|wanggou|watch|watches|weather|weatherchannel|webcam|weber|website|wed|wedding|weibo|weir|whoswho|wien|wiki|williamhill|win|windows|wine|winners|wme|wolterskluwer|woodside|work|works|world|wow|wtc|wtf|w[fs])"
|
||||
+ "|(xbox|xerox|xfinity|xihuan|xin|xn\\-\\-11b4c3d|xn\\-\\-1ck2e1b|xn\\-\\-1qqw23a|xn\\-\\-2scrj9c|xn\\-\\-30rr7y|xn\\-\\-3bst00m|xn\\-\\-3ds443g|xn\\-\\-3e0b707e|xn\\-\\-3hcrj9c|xn\\-\\-3oq18vl8pn36a|xn\\-\\-3pxu8k|xn\\-\\-42c2d9a|xn\\-\\-45br5cyl|xn\\-\\-45brj9c|xn\\-\\-45q11c|xn\\-\\-4gbrim|xn\\-\\-54b7fta0cc|xn\\-\\-55qw42g|xn\\-\\-55qx5d|xn\\-\\-5su34j936bgsg|xn\\-\\-5tzm5g|xn\\-\\-6frz82g|xn\\-\\-6qq986b3xl|xn\\-\\-80adxhks|xn\\-\\-80ao21a|xn\\-\\-80aqecdr1a|xn\\-\\-80asehdb|xn\\-\\-80aswg|xn\\-\\-8y0a063a|xn\\-\\-90a3ac|xn\\-\\-90ae|xn\\-\\-90ais|xn\\-\\-9dbq2a|xn\\-\\-9et52u|xn\\-\\-9krt00a|xn\\-\\-b4w605ferd|xn\\-\\-bck1b9a5dre4c|xn\\-\\-c1avg|xn\\-\\-c2br7g|xn\\-\\-cck2b3b|xn\\-\\-cckwcxetd|xn\\-\\-cg4bki|xn\\-\\-clchc0ea0b2g2a9gcd|xn\\-\\-czr694b|xn\\-\\-czrs0t|xn\\-\\-czru2d|xn\\-\\-d1acj3b|xn\\-\\-d1alf|xn\\-\\-e1a4c|xn\\-\\-eckvdtc9d|xn\\-\\-efvy88h|xn\\-\\-fct429k|xn\\-\\-fhbei|xn\\-\\-fiq228c5hs|xn\\-\\-fiq64b|xn\\-\\-fiqs8s|xn\\-\\-fiqz9s|xn\\-\\-fjq720a|xn\\-\\-flw351e|xn\\-\\-fpcrj9c3d|xn\\-\\-fzc2c9e2c|xn\\-\\-fzys8d69uvgm|xn\\-\\-g2xx48c|xn\\-\\-gckr3f0f|xn\\-\\-gecrj9c|xn\\-\\-gk3at1e|xn\\-\\-h2breg3eve|xn\\-\\-h2brj9c|xn\\-\\-h2brj9c8c|xn\\-\\-hxt814e|xn\\-\\-i1b6b1a6a2e|xn\\-\\-imr513n|xn\\-\\-io0a7i|xn\\-\\-j1aef|xn\\-\\-j1amh|xn\\-\\-j6w193g|xn\\-\\-jlq480n2rg|xn\\-\\-jlq61u9w7b|xn\\-\\-jvr189m|xn\\-\\-kcrx77d1x4a|xn\\-\\-kprw13d|xn\\-\\-kpry57d|xn\\-\\-kput3i|xn\\-\\-l1acc|xn\\-\\-lgbbat1ad8j|xn\\-\\-mgb9awbf|xn\\-\\-mgba3a3ejt|xn\\-\\-mgba3a4f16a|xn\\-\\-mgba7c0bbn0a|xn\\-\\-mgbaakc7dvf|xn\\-\\-mgbaam7a8h|xn\\-\\-mgbab2bd|xn\\-\\-mgbah1a3hjkrd|xn\\-\\-mgbai9azgqp6j|xn\\-\\-mgbayh7gpa|xn\\-\\-mgbbh1a|xn\\-\\-mgbbh1a71e|xn\\-\\-mgbc0a9azcg|xn\\-\\-mgbca7dzdo|xn\\-\\-mgbcpq6gpa1a|xn\\-\\-mgberp4a5d4ar|xn\\-\\-mgbgu82a|xn\\-\\-mgbi4ecexp|xn\\-\\-mgbpl2fh|xn\\-\\-mgbt3dhd|xn\\-\\-mgbtx2b|xn\\-\\-mgbx4cd0ab|xn\\-\\-mix891f|xn\\-\\-mk1bu44c|xn\\-\\-mxtq1m|xn\\-\\-ngbc5azd|xn\\-\\-ngbe9e0a|xn\\-\\-ngbrx|xn\\-\\-node|xn\\-\\-nqv7f|xn\\-\\-nqv7fs00ema|xn\\-\\-nyqy26a|xn\\-\\-o3cw4h|xn\\-\\-ogbpf8fl|xn\\-\\-otu796d|xn\\-\\-p1acf|xn\\-\\-p1ai|xn\\-\\-pgbs0dh|xn\\-\\-pssy2u|xn\\-\\-q7ce6a|xn\\-\\-q9jyb4c|xn\\-\\-qcka1pmc|xn\\-\\-qxa6a|xn\\-\\-qxam|xn\\-\\-rhqv96g|xn\\-\\-rovu88b|xn\\-\\-rvc1e0am3e|xn\\-\\-s9brj9c|xn\\-\\-ses554g|xn\\-\\-t60b56a|xn\\-\\-tckwe|xn\\-\\-tiq49xqyj|xn\\-\\-unup4y|xn\\-\\-vermgensberater\\-ctb|xn\\-\\-vermgensberatung\\-pwb|xn\\-\\-vhquv|xn\\-\\-vuq861b|xn\\-\\-w4r85el8fhu5dnra|xn\\-\\-w4rs40l|xn\\-\\-wgbh1c|xn\\-\\-wgbl6a|xn\\-\\-xhq521b|xn\\-\\-xkc2al3hye2a|xn\\-\\-xkc2dl3a5ee0h|xn\\-\\-y9a3aq|xn\\-\\-yfro4i67o|xn\\-\\-ygbi2ammx|xn\\-\\-zfr164b|xxx|xyz)"
|
||||
+ "|(yachts|yahoo|yamaxun|yandex|yodobashi|yoga|yokohama|you|youtube|yun|y[et])"
|
||||
+ "|(zappos|zara|zero|zip|zone|zuerich|z[amw])))")
|
||||
|
||||
|
||||
/**
|
||||
* Good characters for Internationalized Resource Identifiers (IRI).
|
||||
* This comprises most common used Unicode characters allowed in IRI
|
||||
* as detailed in RFC 3987.
|
||||
* Specifically, those two byte Unicode characters are not included.
|
||||
*/
|
||||
const val GOOD_IRI_CHAR = "a-zA-Z0-9\u00A0-\uD7FF\uF900-\uFDCF\uFDF0-\uFFEF"
|
||||
|
||||
|
||||
|
||||
/**
|
||||
* Marks the WEB_URL pattern as dirty, and will recompile it on its next usage
|
||||
*/
|
||||
@Volatile
|
||||
private var MARK_URL_PATTERN_DIRTY = false
|
||||
|
||||
/**
|
||||
* Regular expression pattern to match most part of RFC 3987
|
||||
* Internationalized URLs, aka IRIs. Commonly used Unicode characters are
|
||||
* added.
|
||||
*/
|
||||
@Volatile
|
||||
private var WEB_URL = compileWebUrl()
|
||||
|
||||
/**
|
||||
* Updates the web URL mega-regex, and marks usages as dirty (so they are updated)
|
||||
*/
|
||||
fun updateWebUrlRegex(topLeveDomainUrls: String) {
|
||||
TOP_LEVEL_DOMAIN_STR_FOR_WEB_URL = topLeveDomainUrls
|
||||
MARK_URL_PATTERN_DIRTY = true // update the next time we use it.
|
||||
}
|
||||
|
||||
|
||||
private fun compileWebUrl(): Pattern {
|
||||
return Pattern.compile(
|
||||
"((?:(http|https|Http|Https|rtsp|Rtsp):\\/\\/(?:(?:[a-zA-Z0-9\\$\\-\\_\\.\\+\\!\\*\\'\\(\\)"
|
||||
+ "\\,\\;\\?\\&\\=]|(?:\\%[a-fA-F0-9]{2})){1,64}(?:\\:(?:[a-zA-Z0-9\\$\\-\\_"
|
||||
+ "\\.\\+\\!\\*\\'\\(\\)\\,\\;\\?\\&\\=]|(?:\\%[a-fA-F0-9]{2})){1,25})?\\@)?)?"
|
||||
+ "((?:(?:[" + GOOD_IRI_CHAR + "][" + GOOD_IRI_CHAR + "\\-]{0,64}\\.)+" // named host
|
||||
+ TOP_LEVEL_DOMAIN_STR_FOR_WEB_URL
|
||||
+ "|(?:(?:25[0-5]|2[0-4]" // or ip address
|
||||
+ "[0-9]|[0-1][0-9]{2}|[1-9][0-9]|[1-9])\\.(?:25[0-5]|2[0-4][0-9]"
|
||||
+ "|[0-1][0-9]{2}|[1-9][0-9]|[1-9]|0)\\.(?:25[0-5]|2[0-4][0-9]|[0-1]"
|
||||
+ "[0-9]{2}|[1-9][0-9]|[1-9]|0)\\.(?:25[0-5]|2[0-4][0-9]|[0-1][0-9]{2}"
|
||||
+ "|[1-9][0-9]|[0-9])))"
|
||||
+ "(?:\\:\\d{1,5})?)" // plus option port number
|
||||
+ "(\\/(?:(?:[a-zA-Z0-9\\;\\/\\?\\:\\@\\&\\=\\#\\~" // plus option query params
|
||||
+ "\\-\\.\\+\\!\\*\\'\\(\\)\\,\\_])|(?:\\%[a-fA-F0-9]{2}))*)?"
|
||||
+ "(?:\\b|$)")
|
||||
|
||||
}
|
||||
|
||||
/**
|
||||
* Only removes the path and query parameters. Only the transport + domain remain.
|
||||
* ie:
|
||||
* http://foo.com/index.php --> http://foo.com
|
||||
* https://www.aa.foo.com/index.php --> https://www.aa.foo.com
|
||||
* https://www.aa.foo.com/index&foo%bar --> https://www.aa.foo.com
|
||||
* https://www.aa.foo.com%foobar --> https://www.aa.foo.com
|
||||
*/
|
||||
fun cleanupAndRemovePath(fullDomainName: String): String {
|
||||
var start = fullDomainName.indexOf("://")
|
||||
if (start == -1) {
|
||||
start = 0
|
||||
}
|
||||
else {
|
||||
start += 3 // 3 is the length of ://
|
||||
}
|
||||
|
||||
var end = fullDomainName.length
|
||||
|
||||
val slash = fullDomainName.indexOf("/", start + 3)
|
||||
if (slash > -1 && slash < end) {
|
||||
end = slash
|
||||
}
|
||||
|
||||
val colon = fullDomainName.indexOf(":", start + 3)
|
||||
if (colon > -1 && colon < end) {
|
||||
end = colon
|
||||
}
|
||||
|
||||
val percent = fullDomainName.indexOf("%", start)
|
||||
if (percent > -1 && percent < end) {
|
||||
end = percent
|
||||
}
|
||||
|
||||
val ampersand = fullDomainName.indexOf("&", start)
|
||||
if (ampersand > -1 && ampersand < end) {
|
||||
end = ampersand
|
||||
}
|
||||
|
||||
val question = fullDomainName.indexOf("?", start)
|
||||
if (question > -1 && question < end) {
|
||||
end = question
|
||||
}
|
||||
|
||||
|
||||
return fullDomainName.substring(0, end)
|
||||
}
|
||||
|
||||
|
||||
|
||||
/**
|
||||
* Only removes http?s:// and the path (if it's present) and www. (if it's present). Also removes *. (if it's present)
|
||||
* ie:
|
||||
* http://foo.com/index.php --> foo.com
|
||||
* https://www.aa.foo.com/index.php --> aa.foo.com
|
||||
* https://www.aa.foo.com/index&foo%bar --> aa.foo.com
|
||||
* https://www.aa.foo.com%foobar --> aa.foo.com
|
||||
*/
|
||||
fun cleanupAndRemoveWwwAndPath(fullDomainName: String): String {
|
||||
var start = fullDomainName.indexOf("://")
|
||||
if (start == -1) {
|
||||
start = 0
|
||||
}
|
||||
else {
|
||||
start += 3 // 3 is the length of ://
|
||||
}
|
||||
|
||||
// get rid of the www. part if it exists.
|
||||
val www = fullDomainName.indexOf("www.", start)
|
||||
if (www > -1 && www <= 8) {
|
||||
start = www + 4 // 4 is the length of www.
|
||||
}
|
||||
|
||||
val star = fullDomainName.indexOf("*.", start)
|
||||
if (star > -1) {
|
||||
start = star + 2 // 2 is the length of *.
|
||||
}
|
||||
|
||||
var end = fullDomainName.length
|
||||
|
||||
val slash = fullDomainName.indexOf("/", start + 3)
|
||||
if (slash > -1 && slash < end) {
|
||||
end = slash
|
||||
}
|
||||
|
||||
val colon = fullDomainName.indexOf(":", start + 3)
|
||||
if (colon > -1 && colon < end) {
|
||||
end = colon
|
||||
}
|
||||
|
||||
val percent = fullDomainName.indexOf("%", start)
|
||||
if (percent > -1 && percent < end) {
|
||||
end = percent
|
||||
}
|
||||
|
||||
val ampersand = fullDomainName.indexOf("&", start)
|
||||
if (ampersand > -1 && ampersand < end) {
|
||||
end = ampersand
|
||||
}
|
||||
|
||||
val question = fullDomainName.indexOf("?", start)
|
||||
if (question > -1 && question < end) {
|
||||
end = question
|
||||
}
|
||||
|
||||
|
||||
return fullDomainName.substring(start, end)
|
||||
}
|
||||
|
||||
/**
|
||||
* Only removes http?s:// and www. (if it's present). Also removes *. (if it's present)
|
||||
* ie:
|
||||
* http://foo.com/index.php --> foo.com/index.php
|
||||
* https://www.aa.foo.com/index.php --> aa.foo.com/index.php
|
||||
* https://www.aa.foo.com/index&foo%bar --> aa.foo.com/index&foo%bar
|
||||
* https://www.aa.foo.com%foobar --> aa.foo.com%foobar
|
||||
*/
|
||||
fun cleanupAndPreservePath(fullDomainName: String, removeQueryString: Boolean = true): String {
|
||||
var start = fullDomainName.indexOf("://")
|
||||
if (start == -1) {
|
||||
start = 0
|
||||
}
|
||||
else {
|
||||
start += 3 // 3 is the length of ://
|
||||
}
|
||||
|
||||
// get rid of the www. part if it exists.
|
||||
val www = fullDomainName.indexOf("www.", start)
|
||||
if (www > -1 && www <= 8) {
|
||||
start = www + 4 // 4 is the length of www.
|
||||
}
|
||||
|
||||
val star = fullDomainName.indexOf("*.", start)
|
||||
if (star > -1) {
|
||||
start = star + 2 // 2 is the length of *.
|
||||
}
|
||||
|
||||
var end = if (removeQueryString) {
|
||||
var end = fullDomainName.length
|
||||
|
||||
val percent = fullDomainName.indexOf("%", start)
|
||||
if (percent > -1 && percent < end) {
|
||||
end = percent
|
||||
}
|
||||
|
||||
val ampersand = fullDomainName.indexOf("&", start)
|
||||
if (ampersand > -1 && ampersand < end) {
|
||||
end = ampersand
|
||||
}
|
||||
|
||||
val question = fullDomainName.indexOf("?", start)
|
||||
if (question > -1 && question < end) {
|
||||
end = question
|
||||
}
|
||||
|
||||
end
|
||||
} else {
|
||||
fullDomainName.length
|
||||
}
|
||||
|
||||
// If the last char is a /, remove it
|
||||
if (end -1 >= 0 && fullDomainName[end - 1] == '/') {
|
||||
end--
|
||||
}
|
||||
|
||||
return fullDomainName.substring(start, end)
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Only removes www. (if it's present). Also removes *. (if it's present)
|
||||
*
|
||||
*
|
||||
* ie:
|
||||
* foo.com/index.php --> foo.com
|
||||
* www.aa.foo.com/index.php --> aa.foo.com
|
||||
* www.aa.foo.com/index&foo%bar --> aa.foo.com
|
||||
* www.aa.foo.com%foobar --> aa.foo.com
|
||||
*
|
||||
*
|
||||
* NOTE: ONLY use this if you can GUARANTEE that there is no http?s://
|
||||
*/
|
||||
fun removeWww(fullDomainName: String?): String? {
|
||||
if (fullDomainName == null) {
|
||||
return null
|
||||
}
|
||||
|
||||
// get rid of the www. part if it exists.
|
||||
var start = fullDomainName.indexOf("www.")
|
||||
if (start > -1) {
|
||||
start += 4 // 4 is the length of www.
|
||||
}
|
||||
else {
|
||||
start = 0
|
||||
}
|
||||
|
||||
val star = fullDomainName.indexOf("*.", start)
|
||||
if (star > -1) {
|
||||
start = star + 2 // 2 is the length of *.
|
||||
}
|
||||
|
||||
var end = fullDomainName.indexOf("/", start + 3)
|
||||
if (end == -1) {
|
||||
if (start == 0) {
|
||||
// it was already clean.
|
||||
return fullDomainName
|
||||
}
|
||||
|
||||
end = fullDomainName.length
|
||||
}
|
||||
|
||||
val percent = fullDomainName.indexOf("%", start)
|
||||
if (percent > -1 && percent < end) {
|
||||
end = percent
|
||||
}
|
||||
|
||||
return fullDomainName.substring(start, end)
|
||||
}
|
||||
|
||||
fun isValidUrl(url: String?): Boolean {
|
||||
return if (url.isNullOrEmpty()) {
|
||||
false // Don't even need to check, not a valid domain
|
||||
}
|
||||
else {
|
||||
if (MARK_URL_PATTERN_DIRTY) {
|
||||
// race conditions don't matter, this just guarantees that it's updated.
|
||||
WEB_URL = compileWebUrl()
|
||||
MARK_URL_PATTERN_DIRTY = false
|
||||
}
|
||||
|
||||
val m = WEB_URL.matcher(url)
|
||||
m.matches()
|
||||
}
|
||||
}
|
||||
|
||||
fun isSubDomain(fullDomainName: String): Boolean {
|
||||
var start = fullDomainName.indexOf("://")
|
||||
if (start == -1) {
|
||||
start = 0
|
||||
}
|
||||
else {
|
||||
start += 3
|
||||
}
|
||||
|
||||
if (fullDomainName.contains("www.")) {
|
||||
start += 4 // 4 is the length of www.
|
||||
}
|
||||
|
||||
var end = fullDomainName.indexOf("/", start + 3)
|
||||
if (end == -1) {
|
||||
end = fullDomainName.length
|
||||
}
|
||||
|
||||
val substring = fullDomainName.substring(start, end)
|
||||
|
||||
val dots = substring.count { it == '.' }
|
||||
|
||||
return dots > 1
|
||||
}
|
||||
|
||||
/**
|
||||
* Only remove http?s://www and the path (if it's present).
|
||||
* Get the next level domain after cleanup if next level domain is not top level domain.
|
||||
* ie:
|
||||
* http://www.a.b.foo.com -> b.foo.com
|
||||
* https://www.foo.com -> foo.com
|
||||
* foo.com -> foo.com
|
||||
*/
|
||||
|
||||
fun cleanupAndGetNextLevelDomain(fullDomainName: String): String? {
|
||||
var start = fullDomainName.indexOf("://")
|
||||
if (start == -1) {
|
||||
start = 0
|
||||
}
|
||||
else {
|
||||
start += 3
|
||||
}
|
||||
|
||||
if (fullDomainName.contains("www.")) {
|
||||
start += 4 // 4 is the length of www.
|
||||
}
|
||||
|
||||
var end = fullDomainName.indexOf("/", start + 3)
|
||||
if (end == -1) {
|
||||
end = fullDomainName.length
|
||||
}
|
||||
|
||||
var substring = fullDomainName.substring(start, end)
|
||||
val last = substring
|
||||
|
||||
val nextDot = substring.indexOf(".")
|
||||
if (nextDot == -1) {
|
||||
return null
|
||||
}
|
||||
|
||||
substring = substring.substring(nextDot + 1)
|
||||
|
||||
if (Dns.isTLD(substring)) {
|
||||
substring = last
|
||||
}
|
||||
|
||||
return substring
|
||||
}
|
||||
|
||||
fun getNextLevelDomain(fullDomainName: String): String? {
|
||||
val nextDot = fullDomainName.indexOf(".")
|
||||
if (nextDot == -1) {
|
||||
return null
|
||||
}
|
||||
|
||||
return fullDomainName.substring(nextDot + 1)
|
||||
}
|
||||
|
||||
/**
|
||||
* Only removes http?s:// and the path (if it's present).
|
||||
* ie:
|
||||
* http://foo.com/index.php --> foo.com
|
||||
* https://www.aa.foo.com/index.php --> foo.com
|
||||
*/
|
||||
fun cleanupAndGetSecondLevelDomain(fullDomainName: String): String? {
|
||||
// File URLs will return null at the extractSLD step, so this case is explicitly for logging purposes.
|
||||
// We want to know when the returned value is null because it's a file, vs returning null for other reasons.
|
||||
if (fullDomainName.startsWith("file://", true)){
|
||||
return null
|
||||
}
|
||||
|
||||
var start = fullDomainName.indexOf("://")
|
||||
if (start == -1) {
|
||||
start = 0
|
||||
}
|
||||
else {
|
||||
start += 3
|
||||
}
|
||||
|
||||
var end = fullDomainName.indexOf("/", start + 3)
|
||||
if (end == -1) {
|
||||
if (start == 0) {
|
||||
// it was already clean.
|
||||
return Dns.extractSLD(fullDomainName)
|
||||
}
|
||||
|
||||
end = fullDomainName.length
|
||||
}
|
||||
|
||||
// for now, get the SLD as well
|
||||
val substring = fullDomainName.substring(start, end)
|
||||
return Dns.extractSLD(substring)
|
||||
}
|
||||
|
||||
/**
|
||||
* Get the third level domain of google domains if it has one.
|
||||
* ie:
|
||||
* http://google.com/index.php -> google.com
|
||||
* http://docs.google.com/index.php -> docs.google.com
|
||||
* https://32.32.432.fdsa.docs.google.com/index.php -> docs.google.com
|
||||
*/
|
||||
|
||||
fun cleanupAndGetThirdLevelDomain(fullDomainName: String): String {
|
||||
var cleanDomain = cleanupAndRemoveWwwAndPath(fullDomainName)
|
||||
|
||||
val periodCount = cleanDomain.count { it == '.'}
|
||||
|
||||
if (periodCount <= 2) {
|
||||
return cleanDomain
|
||||
}
|
||||
|
||||
|
||||
for (x in periodCount downTo 3) {
|
||||
val nextDot = cleanDomain.indexOf(".")
|
||||
|
||||
cleanDomain = cleanDomain.substring(nextDot + 1)
|
||||
}
|
||||
|
||||
return cleanDomain
|
||||
}
|
||||
|
||||
/**
|
||||
* Get the last portion of the file uri, the file name itself.
|
||||
* ie:
|
||||
* file://Downloads/example.pdf -> example.pdf
|
||||
* file:///media.jpg -> media.jpg
|
||||
*/
|
||||
fun cleanupFileUri(domain: String): String {
|
||||
val lastSlashIndex = domain.lastIndexOf("/")
|
||||
|
||||
if (lastSlashIndex == -1) {
|
||||
return domain
|
||||
}
|
||||
|
||||
return domain.substring(lastSlashIndex + 1)
|
||||
}
|
||||
|
||||
|
||||
fun forceAcceptAllTlsCertificates() {
|
||||
/*
|
||||
* fix for
|
||||
* Exception in thread "main" javax.net.ssl.SSLHandshakeException:
|
||||
* sun.security.validator.ValidatorException:
|
||||
* PKIX path building failed: sun.security.provider.certpath.SunCertPathBuilderException:
|
||||
* unable to find valid certification path to requested target
|
||||
*/
|
||||
val trustAllCerts = arrayOf<TrustManager>(object : X509TrustManager {
|
||||
override fun getAcceptedIssuers(): Array<X509Certificate>? {
|
||||
return null
|
||||
}
|
||||
|
||||
override fun checkClientTrusted(certs: Array<X509Certificate>, authType: String) {}
|
||||
|
||||
override fun checkServerTrusted(certs: Array<X509Certificate>, authType: String) {}
|
||||
})
|
||||
|
||||
|
||||
val sc = SSLContext.getInstance("SSL")
|
||||
sc.init(null, trustAllCerts, java.security.SecureRandom())
|
||||
HttpsURLConnection.setDefaultSSLSocketFactory(sc.socketFactory)
|
||||
|
||||
// Create all-trusting host name verifier
|
||||
val allHostsValid = HostnameVerifier { _, _ -> true }
|
||||
|
||||
// Install the all-trusting host verifier
|
||||
HttpsURLConnection.setDefaultHostnameVerifier(allHostsValid)
|
||||
}
|
||||
|
||||
|
||||
|
||||
// @JvmStatic
|
||||
// fun main(args: Array<String>) {
|
||||
// println(cleanupAndPreservePath("https://www.youtube.com/watch?v=YP6EaIDlmEg&t=1s", removeQueryString = true))
|
||||
// println(cleanupAndPreservePath("https://www.khanacademy.org/", removeQueryString = true))
|
||||
// println(cleanupAndRemoveWwwAndPath("https://sat184.cloud1.tds.airast.org/student/V746/Pages/TestShell.aspx"))
|
||||
// println(cleanupAndRemoveWwwAndPath("https://sat184.cloud1.tds.airast.org/student/V746/Pages/TestShell.aspx"))
|
||||
//
|
||||
// }
|
||||
|
||||
// println(WEB_URL.matcher("https://www.youtube.com/watch?v=WEVctuQTeaI").matches())
|
||||
// println(WEB_URL.matcher("www.youtube.com/watch?v=WEVctuQTeaI").matches())
|
||||
// println(WEB_URL.matcher("youtube.com/watch?v=WEVctuQTeaI").matches())
|
||||
// println(WEB_URL.matcher("youtube.com").matches())
|
||||
// println(WEB_URL.matcher("https://www.espn.com/nba/").matches())
|
||||
// println(WEB_URL.matcher("https://www.espn.com/nba").matches())
|
||||
// println(getNextLevelDomain("admin.regression.net-ref.com"))
|
||||
// println(cleanupAndGetGoogleDomain("https://www.google.com/search?rlz=1CAZGSZ_enUS848&tbm=isch&q=pretty+backgrounds&chips=q:pretty+backgrounds,g_1:iphone:lJzZkCc6kg8%3D&usg=AI4_-kSfq6w5oVz33oUhcFfHeJC-MtmIww&sa=X&ved=0ahUKEwi0hP-Sk4riAhUUpJ4KHaWJDi0Q4lYIJigA&biw=1517&bih=695&dpr=0.9&safe=active&ssui=on"));
|
||||
// println(cleanupAndRemoveWww("http://fasttmath.capousd.org:55880/fmng/loader/"))
|
||||
// println(cleanupAndRemoveWww("http://fasttmath.capousd.org:55880/fmng/loader/"))
|
||||
// println(cleanupAndRemoveWww("http://fasttmath.capousd.org:55880/fmng/loader/"))
|
||||
// println(cleanupAndRemoveWww("https://clever.com/oauth/authorize?channel=clever-portal&client_id=8c54ced0462a3fe2da0a&confirmed=true&district_id=556cc0739496cf01000003f2" +
|
||||
// "&redirect_uri=https%3A%2F%2Fapp.typingagent.com%2Fclever%2Findex%3Foauth%3Dtrue&response_type=code"))
|
||||
// println(cleanupAndRemoveWww(
|
||||
// "https://www.clever.com/oauth/authorize?channel=clever-portal&client_id=ae17f3b6f000d1bb4f2c&confirmed=true&district_id=556cc0739496cf01000003f2&redirect_uri=https%3A%2F%2Fwww" +
|
||||
// ".khanacademy.org%2Flogin%2Fclever&response_type=code"))
|
||||
// println(cleanupAndRemoveWww(cleanupAndRemoveWww("https://sat184.cloud1.tds.airast.org/student/V746/Pages/TestShell.aspx")))
|
||||
//
|
||||
// println(cleanupAndPreservePath("http://fasttmath.capousd.org:55880/fmng/loader/"))
|
||||
// println(cleanupAndPreservePath(
|
||||
// "https://www.clever.com/oauth/authorize?channel=clever-portal&client_id=ae17f3b6f000d1bb4f2c&confirmed=true&district_id=556cc0739496cf01000003f2&redirect_uri=https%3A%2F%2Fwww" +
|
||||
// ".khanacademy.org%2Flogin%2Fclever&response_type=code"))
|
||||
|
||||
// }
|
||||
|
||||
|
||||
/**
|
||||
* Runs the 'action' function when the scheme+domain+path(s) when it was successful. Runs the 'onError' function when it fails.
|
||||
*/
|
||||
suspend fun fetchData(scheme: String, domain: String, vararg paths: String, retryCount: Int = 10,
|
||||
onError: (String) ->Unit,
|
||||
onSuccess: suspend (InputStream)->Unit) = withContext(Dispatchers.IO) {
|
||||
val encodedPath = paths.joinToString(separator = "/") { URLEncoder.encodePathSegment(it, Charsets.UTF_8) }
|
||||
var location = "$scheme://$domain/$encodedPath"
|
||||
var alreadyTriedOtherScheme = false
|
||||
|
||||
// logger.trace{ "Getting data: $location" }
|
||||
|
||||
// We DO want to support redirects, in case OLD code is running in the wild.
|
||||
var base: URL
|
||||
var next: URL
|
||||
var visitedCount = 0
|
||||
|
||||
while (true) {
|
||||
visitedCount += 1
|
||||
if (visitedCount > retryCount) {
|
||||
onError("Stuck in a loop for '$location' --- more than $visitedCount attempts")
|
||||
return@withContext
|
||||
}
|
||||
|
||||
try {
|
||||
base = URL(location)
|
||||
with(base.openConnection() as HttpURLConnection) {
|
||||
useCaches = false
|
||||
instanceFollowRedirects = true
|
||||
|
||||
// if (logger.isTraceEnabled) {
|
||||
// logger.trace { "Requesting URL : $url" }
|
||||
// logger.trace { "Response Code : $responseCode" }
|
||||
// }
|
||||
|
||||
when (responseCode) {
|
||||
HttpURLConnection.HTTP_MOVED_PERM, HttpURLConnection.HTTP_MOVED_TEMP -> {
|
||||
location = getHeaderField("Location")
|
||||
// java.net.URLDecoder is only valid for query parameters/headers -- NOT FOR ACTUAL URLS!
|
||||
location = URLDecoder.decode(location, "US-ASCII")
|
||||
|
||||
|
||||
// logger.trace { "Response to '$url' redirected to '$location'" }
|
||||
|
||||
next = URL(base, location) // Deal with relative URLs
|
||||
location = next.toExternalForm()
|
||||
|
||||
// loop again with the new location
|
||||
return@with
|
||||
}
|
||||
HttpURLConnection.HTTP_OK -> {
|
||||
inputStream.use {
|
||||
onSuccess(it)
|
||||
}
|
||||
|
||||
// done
|
||||
return@withContext
|
||||
}
|
||||
HttpsURLConnection.HTTP_NOT_FOUND -> {
|
||||
if (alreadyTriedOtherScheme) {
|
||||
onError("Error '$responseCode' getting location '$location' HTTPS option exhausted.")
|
||||
|
||||
// done
|
||||
return@withContext
|
||||
}
|
||||
|
||||
// if we are HTTPS, retry again as HTTP.
|
||||
alreadyTriedOtherScheme = true
|
||||
visitedCount = 0
|
||||
|
||||
location = if (location.startsWith("https")) {
|
||||
"http://$domain/$encodedPath"
|
||||
} else {
|
||||
"https://$domain/$encodedPath"
|
||||
}
|
||||
|
||||
// loop again with the new location
|
||||
return@with
|
||||
}
|
||||
else -> {
|
||||
onError("Error '$responseCode' getting location '$location'")
|
||||
|
||||
// done
|
||||
return@withContext
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
catch (e: UnknownHostException) {
|
||||
// TMI for what's going on. We just can't, so leave it at that.
|
||||
onError("Failed to retrieve or write icon for location: '${location}'")
|
||||
return@withContext
|
||||
}
|
||||
catch (e: Exception) {
|
||||
onError("Failed to retrieve or write icon for location: '${location}'. ${e.message}")
|
||||
return@withContext
|
||||
}
|
||||
}
|
||||
|
||||
@Suppress("UNREACHABLE_CODE")
|
||||
null
|
||||
}
|
||||
}
|
|
@ -0,0 +1,17 @@
|
|||
/*
|
||||
* Copyright 2023 dorkbox, llc
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
package dorkbox.netUtil.web;
|
Loading…
Reference in New Issue