From b55089cbd653ddff11fc22ddcf1c03057e7274d8 Mon Sep 17 00:00:00 2001 From: Ted Unangst Date: Wed, 24 Apr 2019 01:16:34 -0400 Subject: [PATCH] experimental text filtering to stop the zalgo --- fun.go | 2 ++ go.mod | 1 + go.sum | 2 ++ skulduggery.go | 88 ++++++++++++++++++++++++++++++++++++++++++++++++++ 4 files changed, 93 insertions(+) create mode 100644 skulduggery.go diff --git a/fun.go b/fun.go index d234010..f4a6cb4 100644 --- a/fun.go +++ b/fun.go @@ -54,6 +54,7 @@ func reverbolate(honks []*Honk) { } } zap := make(map[*Donk]bool) + h.Noise = unpucker(h.Noise) h.HTML = cleanstring(h.Noise) emuxifier := func(e string) string { for _, d := range h.Donks { @@ -151,6 +152,7 @@ func herdofemus(noise string) []Emu { var re_bolder = regexp.MustCompile(`(^|\W)\*\*([\w\s,.!?']+)\*\*($|\W)`) var re_italicer = regexp.MustCompile(`(^|\W)\*([\w\s,.!?']+)\*($|\W)`) + func markitzero(s string) string { s = re_bolder.ReplaceAllString(s, "$1$2$3") s = re_italicer.ReplaceAllString(s, "$1$2$3") diff --git a/go.mod b/go.mod index 63d0423..9c93516 100644 --- a/go.mod +++ b/go.mod @@ -2,6 +2,7 @@ module humungus.tedunangst.com/r/honk require ( github.com/gorilla/mux v1.7.1 + github.com/mattn/go-runewidth v0.0.4 golang.org/x/crypto v0.0.0-20190404164418-38d8ce5564a5 golang.org/x/net v0.0.0-20190404232315-eb5bcb51f2a3 humungus.tedunangst.com/r/go-sqlite3 v1.1.2 diff --git a/go.sum b/go.sum index f4da27a..2de8af4 100644 --- a/go.sum +++ b/go.sum @@ -1,5 +1,7 @@ github.com/gorilla/mux v1.7.1 h1:Dw4jY2nghMMRsh1ol8dv1axHkDwMQK2DHerMNJsIpJU= github.com/gorilla/mux v1.7.1/go.mod h1:1lud6UwP+6orDFRuTfBEV8e9/aOM/c4fVVCaMa2zaAs= +github.com/mattn/go-runewidth v0.0.4 h1:2BvfKmzob6Bmd4YsL0zygOqfdFnK7GR4QL06Do4/p7Y= +github.com/mattn/go-runewidth v0.0.4/go.mod h1:LwmH8dsx7+W8Uxz3IHJYH5QSwggIsqBzpuz5H//U1FU= golang.org/x/crypto v0.0.0-20190308221718-c2843e01d9a2/go.mod h1:djNgcEr1/C05ACkg1iLfiJU5Ep61QUkGW8qpdssI0+w= golang.org/x/crypto v0.0.0-20190404164418-38d8ce5564a5 h1:bselrhR0Or1vomJZC8ZIjWtbDmn9OYFLX5Ik9alpJpE= golang.org/x/crypto v0.0.0-20190404164418-38d8ce5564a5/go.mod h1:WFFai1msRO1wXaEeE5yQxYXgSfI8pQAWXbQop6sCtWE= diff --git a/skulduggery.go b/skulduggery.go new file mode 100644 index 0000000..fded8c3 --- /dev/null +++ b/skulduggery.go @@ -0,0 +1,88 @@ +// +// Copyright (c) 2019 Ted Unangst +// +// Permission to use, copy, modify, and distribute this software for any +// purpose with or without fee is hereby granted, provided that the above +// copyright notice and this permission notice appear in all copies. +// +// THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES +// WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF +// MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR +// ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES +// WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN +// ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF +// OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + +package main + +import ( + "regexp" + + "github.com/mattn/go-runewidth" +) + +var bigboldshitz = "๐€๐๐‚๐ƒ๐„๐…๐†๐‡๐ˆ๐‰๐Š๐‹๐Œ๐๐Ž๐๐๐‘๐’๐“๐”๐•๐–๐—๐˜๐™" +var lilboldshitz = "๐š๐›๐œ๐๐ž๐Ÿ๐ ๐ก๐ข๐ฃ๐ค๐ฅ๐ฆ๐ง๐จ๐ฉ๐ช๐ซ๐ฌ๐ญ๐ฎ๐ฏ๐ฐ๐ฑ๐ฒ๐ณ" +var biggothshitz = "๐•ฌ๐•ญ๐•ฎ๐•ฏ๐•ฐ๐•ฑ๐•ฒ๐•ณ๐•ด๐•ต๐•ถ๐•ท๐•ธ๐•น๐•บ๐•ป๐•ผ๐•ฝ๐•พ๐•ฟ๐–€๐–๐–‚๐–ƒ๐–„๐–…" +var lilgothshitz = "๐–†๐–‡๐–ˆ๐–‰๐–Š๐–‹๐–Œ๐–๐–Ž๐–๐–๐–‘๐–’๐–“๐–”๐–•๐––๐–—๐–˜๐–™๐–š๐–›๐–œ๐–๐–ž๐–Ÿ" +var bigitalshitz = "๐‘จ๐‘ฉ๐‘ช๐‘ซ๐‘ฌ๐‘ญ๐‘ฎ๐‘ฏ๐‘ฐ๐‘ฑ๐‘ฒ๐‘ณ๐‘ด๐‘ต๐‘ถ๐‘ท๐‘ธ๐‘น๐‘บ๐‘ป๐‘ผ๐‘ฝ๐‘พ๐‘ฟ๐’€๐’" +var lilitalshitz = "๐’‚๐’ƒ๐’„๐’…๐’†๐’‡๐’ˆ๐’‰๐’Š๐’‹๐’Œ๐’๐’Ž๐’๐’๐’‘๐’’๐’“๐’”๐’•๐’–๐’—๐’˜๐’™๐’š๐’›" +var bigbangshitz = "๐”ธ๐”นโ„‚๐”ป๐”ผ๐”ฝ๐”พโ„๐•€๐•๐•‚๐•ƒ๐•„โ„•๐•†โ„™โ„šโ„๐•Š๐•‹๐•Œ๐•๐•Ž๐•๐•โ„ค" +var lilbangshitz = "๐•’๐•“๐•”๐••๐•–๐•—๐•˜๐•™๐•š๐•›๐•œ๐•๐•ž๐•Ÿ๐• ๐•ก๐•ข๐•ฃ๐•ค๐•ฅ๐•ฆ๐•ง๐•จ๐•ฉ๐•ช๐•ซ" + +var re_alltheshitz = regexp.MustCompile(`[` + + bigboldshitz + lilboldshitz + + biggothshitz + lilgothshitz + + bigitalshitz + lilitalshitz + + bigbangshitz + lilbangshitz + + `]{2,}`) + +// this may not be especially fast +func unpucker(s string) string { + fixer := func(r string) string { + x := make([]byte, len(r)) + xi := 0 + loop1: + for _, c := range r { + xi++ + for _, set := range []string{bigboldshitz, biggothshitz, bigitalshitz, bigbangshitz} { + i := 0 + for _, rr := range set { + if rr == c { + x[xi] = byte('A' + i) + continue loop1 + } + i++ + } + } + for _, set := range []string{lilboldshitz, lilgothshitz, lilitalshitz, lilbangshitz} { + i := 0 + for _, rr := range set { + if rr == c { + x[xi] = byte('a' + i) + continue loop1 + } + i++ + } + } + x[xi] = '.' + } + return string(x) + } + s = re_alltheshitz.ReplaceAllStringFunc(s, fixer) + x := make([]byte, 0, len(s)) + zw := false + for _, c := range s { + if runewidth.RuneWidth(c) == 0 { + if zw { + continue + } + zw = true + } else { + zw = false + } + q := string(c) + x = append(x, []byte(q)...) + } + return string(x) +}