Moved TLD stuff to utils (since it's not explicitly using DNS), but is a utility for understanding/parsing DNS info
parent
797e5c6112
commit
78974edf2b
File diff suppressed because it is too large
Load Diff
|
@ -38,6 +38,9 @@ object Dns {
|
||||||
|
|
||||||
const val DEFAULT_SEARCH_DOMAIN = ""
|
const val DEFAULT_SEARCH_DOMAIN = ""
|
||||||
|
|
||||||
|
private val exceptions = HashSet<String>()
|
||||||
|
private val suffixes = HashSet<String>()
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* @throws IOException if the DNS resolve.conf file cannot be read
|
* @throws IOException if the DNS resolve.conf file cannot be read
|
||||||
*/
|
*/
|
||||||
|
@ -116,7 +119,156 @@ object Dns {
|
||||||
tryParse.second
|
tryParse.second
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
init {
|
||||||
|
/**
|
||||||
|
* And the effective_tld_names.dat is from mozilla (the following are all the same data)
|
||||||
|
*
|
||||||
|
*
|
||||||
|
* https://mxr.mozilla.org/mozilla-central/source/netwerk/dns/effective_tld_names.dat?raw=1
|
||||||
|
* which is...
|
||||||
|
* https://publicsuffix.org/list/effective_tld_names.dat
|
||||||
|
*
|
||||||
|
*
|
||||||
|
* also
|
||||||
|
*
|
||||||
|
*
|
||||||
|
* https://publicsuffix.org/list/public_suffix_list.dat
|
||||||
|
*
|
||||||
|
*
|
||||||
|
* Parses the list from publicsuffix.org
|
||||||
|
* new one at:
|
||||||
|
* http://svn.apache.org/repos/asf/httpcomponents/httpclient/trunk/httpclient5/src/main/java/org/apache/hc/client5/http/impl/cookie/PublicSuffixDomainFilter.java
|
||||||
|
* and
|
||||||
|
* http://svn.apache.org/repos/asf/httpcomponents/httpclient/trunk/httpclient5/src/main/java/org/apache/hc/client5/http/psl/
|
||||||
|
*/
|
||||||
|
|
||||||
|
// now load this file into memory, so it's faster to process.
|
||||||
|
val tldResource = Dns.javaClass.getResourceAsStream("/effective_tld_names.dat")
|
||||||
|
tldResource.bufferedReader().useLines { lines ->
|
||||||
|
lines.forEach { line ->
|
||||||
|
var line = line
|
||||||
|
|
||||||
|
// entire lines can also be commented using //
|
||||||
|
if (line.isNotEmpty() && !line.startsWith("//")) {
|
||||||
|
|
||||||
|
if (line.startsWith(".")) {
|
||||||
|
line = line.substring(1) // A leading dot is optional
|
||||||
|
}
|
||||||
|
|
||||||
|
// An exclamation mark (!) at the start of a rule marks an exception
|
||||||
|
// to a previous wildcard rule
|
||||||
|
val isException = line.startsWith("!")
|
||||||
|
if (isException) {
|
||||||
|
line = line.substring(1)
|
||||||
|
}
|
||||||
|
|
||||||
|
if (isException) {
|
||||||
|
exceptions.add(line)
|
||||||
|
} else {
|
||||||
|
suffixes.add(line)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Extracts the second level domain, from a fully qualified domain (ie: www.aa.com, or www.amazon.co.uk).
|
||||||
|
*
|
||||||
|
*
|
||||||
|
* This algorithm works from left to right parsing the domain string parameter
|
||||||
|
*
|
||||||
|
* @param domain a fully qualified domain (ie: www.aa.com, or www.amazon.co.uk)
|
||||||
|
*
|
||||||
|
* @return null (if there is no second level domain) or the SLD www.aa.com -> aa.com , or www.amazon.co.uk -> amazon.co.uk
|
||||||
|
*/
|
||||||
|
fun extractSLD(domain: String): String? {
|
||||||
|
var domain = domain
|
||||||
|
var last = domain
|
||||||
|
var anySLD = false
|
||||||
|
|
||||||
|
do {
|
||||||
|
if (isTLD(domain)) {
|
||||||
|
return if (anySLD) {
|
||||||
|
last
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
null
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
anySLD = true
|
||||||
|
last = domain
|
||||||
|
|
||||||
|
val nextDot = domain.indexOf(".")
|
||||||
|
if (nextDot == -1) {
|
||||||
|
return null
|
||||||
|
}
|
||||||
|
|
||||||
|
domain = domain.substring(nextDot + 1)
|
||||||
|
} while (domain.isNotEmpty())
|
||||||
|
|
||||||
|
return null
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Returns a domain that is without its TLD at the end.
|
||||||
|
*
|
||||||
|
* @param domain domain a fully qualified domain or not, (ie: www.aa.com, or amazon.co.uk).
|
||||||
|
*
|
||||||
|
* @return a domain that is without it's TLD, ie: www.aa.com -> www.aa, or google.com -> google
|
||||||
|
*/
|
||||||
|
fun withoutTLD(domain: String): String {
|
||||||
|
var index = 0
|
||||||
|
while (index != -1) {
|
||||||
|
index = domain.indexOf('.', index)
|
||||||
|
|
||||||
|
if (index != -1) {
|
||||||
|
if (isTLD(domain.substring(index))) {
|
||||||
|
return domain.substring(0, index)
|
||||||
|
}
|
||||||
|
index++
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
return ""
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return ""
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Checks if the domain is a TLD.
|
||||||
|
*/
|
||||||
|
fun isTLD(domain: String): Boolean {
|
||||||
|
var domain = domain
|
||||||
|
if (domain.startsWith(".")) {
|
||||||
|
domain = domain.substring(1)
|
||||||
|
}
|
||||||
|
|
||||||
|
// An exception rule takes priority over any other matching rule.
|
||||||
|
// Exceptions are ones that are not a TLD, but would match a pattern rule
|
||||||
|
// e.g. bl.uk is not a TLD, but the rule *.uk means it is. Hence there is an exception rule
|
||||||
|
// stating that bl.uk is not a TLD.
|
||||||
|
if (exceptions.contains(domain)) {
|
||||||
|
return false
|
||||||
|
}
|
||||||
|
|
||||||
|
if (suffixes.contains(domain)) {
|
||||||
|
return true
|
||||||
|
}
|
||||||
|
|
||||||
|
// Try patterns. ie *.jp means that boo.jp is a TLD
|
||||||
|
val nextdot = domain.indexOf('.')
|
||||||
|
if (nextdot == -1) {
|
||||||
|
return false
|
||||||
|
}
|
||||||
|
domain = "*" + domain.substring(nextdot)
|
||||||
|
|
||||||
|
return suffixes.contains(domain)
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
//TODO add domain TLD, etc util tools
|
|
||||||
|
|
|
@ -0,0 +1,47 @@
|
||||||
|
/*
|
||||||
|
* Copyright 2021 dorkbox, llc
|
||||||
|
* Copyright (C) 2014 ZeroTurnaround <support@zeroturnaround.com>
|
||||||
|
* Contains fragments of code from Apache Commons Exec, rights owned
|
||||||
|
* by Apache Software Foundation (ASF).
|
||||||
|
*
|
||||||
|
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
* you may not use this file except in compliance with the License.
|
||||||
|
* You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
|
||||||
|
package dorkbox.netUtil
|
||||||
|
|
||||||
|
import org.junit.Assert
|
||||||
|
import org.junit.Test
|
||||||
|
|
||||||
|
class TopLevelDomainTest {
|
||||||
|
@Test
|
||||||
|
fun isTLD() {
|
||||||
|
Assert.assertFalse(Dns.isTLD("www.espn.com"))
|
||||||
|
Assert.assertFalse(Dns.isTLD("espn.com"))
|
||||||
|
Assert.assertTrue(Dns.isTLD("com"))
|
||||||
|
Assert.assertTrue(Dns.isTLD("co.uk"))
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
fun withoutTLD() {
|
||||||
|
Assert.assertEquals("espn", Dns.withoutTLD("espn.com"))
|
||||||
|
Assert.assertEquals("www.espn", Dns.withoutTLD("www.espn.com"))
|
||||||
|
Assert.assertEquals("a.b.c.www.espn", Dns.withoutTLD("a.b.c.www.espn.com"))
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
fun extractSLD() {
|
||||||
|
Assert.assertEquals("espn.com", Dns.extractSLD("espn.com"))
|
||||||
|
Assert.assertEquals("espn.com", Dns.extractSLD("www.espn.com"))
|
||||||
|
Assert.assertEquals("espn.com", Dns.extractSLD("a.b.c.www.espn.com"))
|
||||||
|
}
|
||||||
|
}
|
Loading…
Reference in New Issue