Moved TLD stuff to utils (since it's not explicitly using DNS), but is a utility for understanding/parsing DNS info
parent
797e5c6112
commit
78974edf2b
File diff suppressed because it is too large
Load Diff
|
@ -38,6 +38,9 @@ object Dns {
|
|||
|
||||
const val DEFAULT_SEARCH_DOMAIN = ""
|
||||
|
||||
private val exceptions = HashSet<String>()
|
||||
private val suffixes = HashSet<String>()
|
||||
|
||||
/**
|
||||
* @throws IOException if the DNS resolve.conf file cannot be read
|
||||
*/
|
||||
|
@ -116,7 +119,156 @@ object Dns {
|
|||
tryParse.second
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
|
||||
init {
|
||||
/**
|
||||
* And the effective_tld_names.dat is from mozilla (the following are all the same data)
|
||||
*
|
||||
*
|
||||
* https://mxr.mozilla.org/mozilla-central/source/netwerk/dns/effective_tld_names.dat?raw=1
|
||||
* which is...
|
||||
* https://publicsuffix.org/list/effective_tld_names.dat
|
||||
*
|
||||
*
|
||||
* also
|
||||
*
|
||||
*
|
||||
* https://publicsuffix.org/list/public_suffix_list.dat
|
||||
*
|
||||
*
|
||||
* Parses the list from publicsuffix.org
|
||||
* new one at:
|
||||
* http://svn.apache.org/repos/asf/httpcomponents/httpclient/trunk/httpclient5/src/main/java/org/apache/hc/client5/http/impl/cookie/PublicSuffixDomainFilter.java
|
||||
* and
|
||||
* http://svn.apache.org/repos/asf/httpcomponents/httpclient/trunk/httpclient5/src/main/java/org/apache/hc/client5/http/psl/
|
||||
*/
|
||||
|
||||
// now load this file into memory, so it's faster to process.
|
||||
val tldResource = Dns.javaClass.getResourceAsStream("/effective_tld_names.dat")
|
||||
tldResource.bufferedReader().useLines { lines ->
|
||||
lines.forEach { line ->
|
||||
var line = line
|
||||
|
||||
// entire lines can also be commented using //
|
||||
if (line.isNotEmpty() && !line.startsWith("//")) {
|
||||
|
||||
if (line.startsWith(".")) {
|
||||
line = line.substring(1) // A leading dot is optional
|
||||
}
|
||||
|
||||
// An exclamation mark (!) at the start of a rule marks an exception
|
||||
// to a previous wildcard rule
|
||||
val isException = line.startsWith("!")
|
||||
if (isException) {
|
||||
line = line.substring(1)
|
||||
}
|
||||
|
||||
if (isException) {
|
||||
exceptions.add(line)
|
||||
} else {
|
||||
suffixes.add(line)
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Extracts the second level domain, from a fully qualified domain (ie: www.aa.com, or www.amazon.co.uk).
|
||||
*
|
||||
*
|
||||
* This algorithm works from left to right parsing the domain string parameter
|
||||
*
|
||||
* @param domain a fully qualified domain (ie: www.aa.com, or www.amazon.co.uk)
|
||||
*
|
||||
* @return null (if there is no second level domain) or the SLD www.aa.com -> aa.com , or www.amazon.co.uk -> amazon.co.uk
|
||||
*/
|
||||
fun extractSLD(domain: String): String? {
|
||||
var domain = domain
|
||||
var last = domain
|
||||
var anySLD = false
|
||||
|
||||
do {
|
||||
if (isTLD(domain)) {
|
||||
return if (anySLD) {
|
||||
last
|
||||
}
|
||||
else {
|
||||
null
|
||||
}
|
||||
}
|
||||
|
||||
anySLD = true
|
||||
last = domain
|
||||
|
||||
val nextDot = domain.indexOf(".")
|
||||
if (nextDot == -1) {
|
||||
return null
|
||||
}
|
||||
|
||||
domain = domain.substring(nextDot + 1)
|
||||
} while (domain.isNotEmpty())
|
||||
|
||||
return null
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns a domain that is without its TLD at the end.
|
||||
*
|
||||
* @param domain domain a fully qualified domain or not, (ie: www.aa.com, or amazon.co.uk).
|
||||
*
|
||||
* @return a domain that is without it's TLD, ie: www.aa.com -> www.aa, or google.com -> google
|
||||
*/
|
||||
fun withoutTLD(domain: String): String {
|
||||
var index = 0
|
||||
while (index != -1) {
|
||||
index = domain.indexOf('.', index)
|
||||
|
||||
if (index != -1) {
|
||||
if (isTLD(domain.substring(index))) {
|
||||
return domain.substring(0, index)
|
||||
}
|
||||
index++
|
||||
}
|
||||
else {
|
||||
return ""
|
||||
}
|
||||
}
|
||||
|
||||
return ""
|
||||
}
|
||||
|
||||
/**
|
||||
* Checks if the domain is a TLD.
|
||||
*/
|
||||
fun isTLD(domain: String): Boolean {
|
||||
var domain = domain
|
||||
if (domain.startsWith(".")) {
|
||||
domain = domain.substring(1)
|
||||
}
|
||||
|
||||
// An exception rule takes priority over any other matching rule.
|
||||
// Exceptions are ones that are not a TLD, but would match a pattern rule
|
||||
// e.g. bl.uk is not a TLD, but the rule *.uk means it is. Hence there is an exception rule
|
||||
// stating that bl.uk is not a TLD.
|
||||
if (exceptions.contains(domain)) {
|
||||
return false
|
||||
}
|
||||
|
||||
if (suffixes.contains(domain)) {
|
||||
return true
|
||||
}
|
||||
|
||||
// Try patterns. ie *.jp means that boo.jp is a TLD
|
||||
val nextdot = domain.indexOf('.')
|
||||
if (nextdot == -1) {
|
||||
return false
|
||||
}
|
||||
domain = "*" + domain.substring(nextdot)
|
||||
|
||||
return suffixes.contains(domain)
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
//TODO add domain TLD, etc util tools
|
||||
|
|
|
@ -0,0 +1,47 @@
|
|||
/*
|
||||
* Copyright 2021 dorkbox, llc
|
||||
* Copyright (C) 2014 ZeroTurnaround <support@zeroturnaround.com>
|
||||
* Contains fragments of code from Apache Commons Exec, rights owned
|
||||
* by Apache Software Foundation (ASF).
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
package dorkbox.netUtil
|
||||
|
||||
import org.junit.Assert
|
||||
import org.junit.Test
|
||||
|
||||
class TopLevelDomainTest {
|
||||
@Test
|
||||
fun isTLD() {
|
||||
Assert.assertFalse(Dns.isTLD("www.espn.com"))
|
||||
Assert.assertFalse(Dns.isTLD("espn.com"))
|
||||
Assert.assertTrue(Dns.isTLD("com"))
|
||||
Assert.assertTrue(Dns.isTLD("co.uk"))
|
||||
}
|
||||
|
||||
@Test
|
||||
fun withoutTLD() {
|
||||
Assert.assertEquals("espn", Dns.withoutTLD("espn.com"))
|
||||
Assert.assertEquals("www.espn", Dns.withoutTLD("www.espn.com"))
|
||||
Assert.assertEquals("a.b.c.www.espn", Dns.withoutTLD("a.b.c.www.espn.com"))
|
||||
}
|
||||
|
||||
@Test
|
||||
fun extractSLD() {
|
||||
Assert.assertEquals("espn.com", Dns.extractSLD("espn.com"))
|
||||
Assert.assertEquals("espn.com", Dns.extractSLD("www.espn.com"))
|
||||
Assert.assertEquals("espn.com", Dns.extractSLD("a.b.c.www.espn.com"))
|
||||
}
|
||||
}
|
Loading…
Reference in New Issue