From 7a1341c822c8ef427436a2e787a1670f5cd3a71a Mon Sep 17 00:00:00 2001
From: will-ca <willchencontact@gmail.com>
Date: Mon, 24 Jan 2022 01:00:11 -0800
Subject: [PATCH] Check rulesets for potential typos. (#6027)

* Add fuzzy text comparison algorithm.

* Add mod validity check for probable misspellings.

* Code comment for super-corner-case/impossible error.
---
 core/src/com/unciv/models/ruleset/Ruleset.kt  | 29 +++++++-
 core/src/com/unciv/ui/utils/TextSimilarity.kt | 71 +++++++++++++++++++
 2 files changed, 99 insertions(+), 1 deletion(-)
 create mode 100644 core/src/com/unciv/ui/utils/TextSimilarity.kt

diff --git a/core/src/com/unciv/models/ruleset/Ruleset.kt b/core/src/com/unciv/models/ruleset/Ruleset.kt
index e54377c276..10161c8d39 100644
--- a/core/src/com/unciv/models/ruleset/Ruleset.kt
+++ b/core/src/com/unciv/models/ruleset/Ruleset.kt
@@ -25,6 +25,7 @@ import com.unciv.models.stats.Stats
 import com.unciv.models.translations.fillPlaceholders
 import com.unciv.models.translations.tr
 import com.unciv.ui.utils.colorFromRGB
+import com.unciv.ui.utils.getRelativeTextDistance
 import kotlin.collections.set
 
 object ModOptionsConstants {
@@ -297,13 +298,39 @@ class Ruleset {
         return stringList.joinToString { it.tr() }
     }
 
+    /** Similarity below which an untyped unique can be considered a potential misspelling.
+     * Roughly corresponds to the fraction of the Unique placeholder text that can be different/misspelled, but with some extra room for [getRelativeTextDistance] idiosyncrasies. */
+    private val uniqueMisspellingThreshold = 0.15 // Tweak as needed. Simple misspellings seem to be around 0.025, so would mostly be caught by 0.05. IMO 0.1 would be good, but raising to 0.15 also seemed to catch what may be an outdated Unique.
 
     fun checkUniques(uniqueContainer:IHasUniques, lines:RulesetErrorList,
                      severityToReport: UniqueType.UniqueComplianceErrorSeverity) {
         val name = if (uniqueContainer is INamed) uniqueContainer.name else ""
 
         for (unique in uniqueContainer.uniqueObjects) {
-            if (unique.type == null) continue
+            if (unique.type == null) {
+                val similarUniques = UniqueType.values().filter { getRelativeTextDistance(it.placeholderText, unique.placeholderText) <= uniqueMisspellingThreshold }
+                val equalUniques = similarUniques.filter { it.placeholderText == unique.placeholderText }
+                if (equalUniques.isNotEmpty()) {
+                    lines.add( // This should only ever happen if a bug is or has been introduced that prevents Unique.type from being set for a valid UniqueType, I think.
+                        "$name's unique \"${unique.text}\" looks like it should be fine, but for some reason isn't recognized.",
+                        RulesetErrorSeverity.OK
+                    )
+                } else if (similarUniques.isNotEmpty()) {
+                    lines.add("$name's unique \"${unique.text}\" looks like it may be a misspelling of:\n" +
+                        similarUniques.joinToString("\n") { uniqueType ->
+                            val deprecationAnnotation = UniqueType::class.java.getField(uniqueType.name)
+                                    .getAnnotation(Deprecated::class.java)
+                            if (deprecationAnnotation == null)
+                                "\"${uniqueType.text}\""
+                            else
+                                "\"${uniqueType.text}\" (Deprecated)"
+                        }.prependIndent("\t"),
+                        RulesetErrorSeverity.OK
+                    )
+
+                }
+                continue
+            }
             val complianceErrors = unique.type.getComplianceErrors(unique, this)
             for (complianceError in complianceErrors) {
                 if (complianceError.errorSeverity == severityToReport)
diff --git a/core/src/com/unciv/ui/utils/TextSimilarity.kt b/core/src/com/unciv/ui/utils/TextSimilarity.kt
new file mode 100644
index 0000000000..e51c13d2fd
--- /dev/null
+++ b/core/src/com/unciv/ui/utils/TextSimilarity.kt
@@ -0,0 +1,71 @@
+package com.unciv.ui.utils
+
+/**
+ * Algorithm:
+ *  - Keep an index for each string.
+ *  - Iteratively advance by one character in each string.
+ *      - If the character at the index of each string is not the same, then pause.
+ *          - Try to find the minumum number of characters to skip in the first string to find the current character of the second string.
+ *          - Try to find the minimum number of characters to skip in the second string to find the current character of the first string.
+ *          - If the above condition cannot be satisifed for either string, then skip both by one character and continue advancing them together.
+ *          - Otherwise, skip ahead in either the first string or the second string, depending on which requires the lowest offset, and continue advancing both strings together.
+ *      - Stop when either one of the above steps cannot be completed or the end of either string has been reached.
+ *  - The distance returned is the apprximately total number of characters skipped, plus the total number of characters unaccounted for at the end.
+ *
+ * Meant to run in linear-ish time.
+ * Order of comparands shouldn't matter too much, but does a little.
+ * This seemed simpler than a thorough implementation of other string comparison algorithms, and maybe more performant than a naïve implementation of other string comparisons, as well as sufficient for the fairly simple use case.
+ *
+ * @param text1 String to compare.
+ * @param text2 String to compare.
+ * @return Approximate distance between them.
+ */
+fun getTextDistance(text1: String, text2: String): Int {
+    var dist = 0;
+    var i1 = 0;
+    var i2 = 0;
+
+//    fun String.debugTraversal(index: Int) = println(this.substring(0..index-1)+"["+this[index]+"]"+this.substring(index+1..this.lastIndex))
+//    /** Uncomment this and stick it at the start of the `while` if you want to see what's happening. */
+//    fun debugTraversal() { println(); text1.debugTraversal(i1); text2.debugTraversal(i2); }
+
+    fun inRange() = i1 < text1.length && i2 < text2.length // Length is O(1), apparently.
+    while (inRange()) {
+//        debugTraversal()
+        var char1 = text1[i1] // Indexing may not be, though.
+        var char2 = text2[i2]
+        if (char1 == char2) {
+            i1++
+            i2++
+        } else {
+            val firstMatchIndex1 = (i1..text1.lastIndex).firstOrNull { text1[it] == char2 }
+            val firstMatchIndex2 = (i2..text2.lastIndex).firstOrNull { text2[it] == char1 }
+            if (firstMatchIndex1 == null && firstMatchIndex2 == null) {
+                dist++
+                i1++
+                i2++
+                continue
+            }
+            val firstMatchOffset1 = firstMatchIndex1?.minus(i1)
+            val firstMatchOffset2 = firstMatchIndex2?.minus(i2)
+            when {
+                (firstMatchOffset2 == null || (firstMatchOffset1 != null && firstMatchOffset1 < firstMatchOffset2)) -> { // Preferential behaviour when the offsets are equal does make the operation slightly non-commutative, I think.
+                    dist += firstMatchOffset1!!
+                    i1 = firstMatchIndex1 + 1
+                    i2++
+                }
+                (firstMatchOffset1 == null || firstMatchOffset1 >= firstMatchOffset2) -> {
+                    dist += firstMatchOffset2
+                    i1++
+                    i2 = firstMatchIndex2 + 1
+                }
+                else -> throw IllegalStateException("Can't compare Strings:\n\t${text1}\n\t${text2}")
+            }
+        }
+    }
+    dist += ((text1.length - i1) + (text2.length - i2)) / 2
+    return dist
+}
+
+/** @return the [getTextDistance] of two strings relative to their average length. */
+fun getRelativeTextDistance(text1: String, text2: String) = getTextDistance(text1, text2).toDouble() / (text1.length + text2.length) * 2.0