From e315ee67fc4f75ec6c8432e10fbbd5350e1c5499 Mon Sep 17 00:00:00 2001 From: UnknownShadow200 Date: Sat, 13 Aug 2016 19:59:57 +1000 Subject: [PATCH] Rewrite ProfanityFilter to be faster, and also tidy up its code. --- Server/Extra/ProfanityFilter.cs | 150 +++++++++++--------------------- 1 file changed, 50 insertions(+), 100 deletions(-) diff --git a/Server/Extra/ProfanityFilter.cs b/Server/Extra/ProfanityFilter.cs index f9ba28414..e715c69ab 100644 --- a/Server/Extra/ProfanityFilter.cs +++ b/Server/Extra/ProfanityFilter.cs @@ -17,37 +17,55 @@ or implied. See the Licenses for the specific language governing permissions and limitations under the Licenses. */ -using System; -using System.Collections.Generic; -using System.IO; -using System.Text; -using System.Text.RegularExpressions; +using System; +using System.Collections.Generic; +using System.IO; +using System.Text; namespace MCGalaxy { public static class ProfanityFilter { - private static Dictionary RegexReduce; - private static List BadWords; - public static void Init() - { - // Initializes the reduction dictionary and word list - RegexReduce = new Dictionary(); - RegexReduce.Add("a", "[@]"); - RegexReduce.Add("b", "i3|l3"); - RegexReduce.Add("c", "[(]"); - RegexReduce.Add("e", "[3]"); - RegexReduce.Add("f", "ph"); - RegexReduce.Add("g", "[6]"); - RegexReduce.Add("h", "#"); - // Because Is and Ls are similar, the swear list will contain a lowercase I instead of Ls. - // For example, the word "asshole" would be saved as "asshoie". - RegexReduce.Add("i", "[l!1]"); - RegexReduce.Add("o", "[0]"); - RegexReduce.Add("q", "[9]"); - RegexReduce.Add("s", "[$5]"); - RegexReduce.Add("w", "vv"); - RegexReduce.Add("z", "[2]"); + static string[] reduceKeys, reduceValues; + static List filters; + + public static void Init() { + InitReduceTable(); + LoadBadWords(); + } - // Load/create the badwords.txt file and import them into the BadWords list + public static string Parse(string text) { return FilterWords(text); } + + + // Replace any whole word containing a bad word inside it (including partial word matches) + static string FilterWords(string text) { + string[] words = text.Split(' '); + string[] reduced = Reduce(text).Split(' '); + + // Loop through each reduced word, looking for a bad word + for (int i = 0; i < reduced.Length; i++) { + bool isFiltered = false; + foreach (string filter in filters) { + if (reduced[i].Contains(filter)) { + isFiltered = true; break; + } + } + if (!isFiltered) continue; + + // If a bad word is found anywhere in the word, replace the word + int length = words[i].Length; + words[i] = new String('*', length); + } + return String.Join(" ", words); + } + + static void InitReduceTable() { + if (reduceKeys != null) return; + // Because some letters are similar (Like i and l), they are reduced to the same form. + // For example, the word "@t3$5t ll" is reduced to "atest ii"; + reduceKeys = "@|i3|l3|(|3|ph|6|#|l|!|1|0|9|$5|vv|2".Split('|'); + reduceValues= "a|b|b|c|e|f|g|h|i|i|i|o|q|s|w|z".Split('|'); + } + + static void LoadBadWords() { if (!File.Exists("text/badwords.txt")) { // No file exists yet, so let's create one StringBuilder sb = new StringBuilder(); @@ -56,88 +74,20 @@ namespace MCGalaxy { File.WriteAllText("text/badwords.txt", sb.ToString()); } - // OK the file should exist now List lines = CP437Reader.ReadAllLines("text/badwords.txt"); // Run the badwords through the reducer to ensure things like Ls become Is and everything is lowercase - // Also remove lines starting with a "#" since they are comments - BadWords = new List(); + filters = new List(); foreach (string line in lines) { if (line.StartsWith("#") || line.Trim().Length == 0) continue; string word = Reduce(line.ToLower()); - BadWords.Add(word); + filters.Add(word); } } - public static string Parse(string text) { - //return ParseMatchWholeWords(text); - return ParseMatchPartialWords(text); - } - - // Replace bad words only if the whole word matches - private static string ParseMatchWholeWords(string text) - { - var result = new List(); - var originalWords = text.Split(' '); - var reducedWords = Reduce(text).Split(' '); - for (var i = 0; i < originalWords.Length; i++) - { - if (BadWords.Contains(reducedWords[i].ToLower())) - { - // A reduced word matched a bad word from our file! - result.Add(new String('*', originalWords[i].Length)); - } - else - { - result.Add(originalWords[i]); - } - } - - return String.Join(" ", result.ToArray()); - } - - // Replace any whole word containing a bad word inside it (including partial word matches) - private static string ParseMatchPartialWords(string text) - { - var result = new List(); - var originalWords = text.Split(' '); - var reducedWords = Reduce(text).Split(' '); - - // Loop through each reduced word, looking for a badword - for(int i=0; i