From 79d00b356f11324ca13b92b2257ff2459dfcfce9 Mon Sep 17 00:00:00 2001 From: Unlock Music Dev Date: Mon, 5 Dec 2022 11:04:57 +0800 Subject: [PATCH 1/3] feat(qmc): use unicode normalize to match filename --- algo/qmc/key_mmkv.go | 54 +++++++++++++++++++------------------------- go.mod | 2 +- go.sum | 11 ++++----- 3 files changed, 28 insertions(+), 39 deletions(-) diff --git a/algo/qmc/key_mmkv.go b/algo/qmc/key_mmkv.go index 81f891a..4481039 100644 --- a/algo/qmc/key_mmkv.go +++ b/algo/qmc/key_mmkv.go @@ -6,12 +6,11 @@ import ( "os" "path/filepath" "runtime" - "strings" - "github.com/hbollon/go-edlib" "github.com/samber/lo" "go.uber.org/zap" "golang.org/x/exp/slices" + "golang.org/x/text/unicode/norm" "unlock-music.dev/mmkv" ) @@ -51,45 +50,34 @@ func readKeyFromMMKV(file string, logger *zap.Logger) ([]byte, error) { } _, partName := filepath.Split(file) - buf, err := streamKeyVault.GetBytes(file) + partName = normalizeUnicode(partName) + buf, err := streamKeyVault.GetBytes(file) if buf == nil { filePaths := streamKeyVault.Keys() + fileNames := lo.Map(filePaths, func(filePath string, _ int) string { + _, name := filepath.Split(filePath) + return normalizeUnicode(name) + }) - for _, key := range filePaths { // fallback 1: match filename only - if !strings.HasSuffix(key, partName) { + logger.Debug("filename", + zap.String("partName", partName), + zap.Int("partName", len(partName)), + zap.String("fileNames", fileNames[0]), + zap.Int("fileNames", len(fileNames[0])), + ) + + for _, key := range fileNames { // fallback: match filename only + if key != partName { continue } - buf, err = streamKeyVault.GetBytes(key) + idx := slices.Index(fileNames, key) + buf, err = streamKeyVault.GetBytes(filePaths[idx]) if err != nil { - logger.Warn("read key from mmkv", zap.String("key", key), zap.Error(err)) + logger.Warn("read key from mmkv", zap.String("key", filePaths[idx]), zap.Error(err)) } } - if buf == nil { // fallback 2: match filename with edit distance - // use editorial judgement to select the best match - // since macOS may change some characters in the file name. - // e.g. "ぜ"(e3 81 9c) -> "ぜ"(e3 81 9b e3 82 99) - fileNames := lo.Map(filePaths, func(filePath string, _ int) string { - _, name := filepath.Split(filePath) - return name - }) - - minDisStr, err := edlib.FuzzySearch(partName, fileNames, edlib.Levenshtein) - if err != nil { - logger.Warn("fuzzy search failed", zap.Error(err)) - } - - // TODO: make distance configurable - // for now, assume only 1 character changed to 2 characters - if edlib.LevenshteinDistance(partName, minDisStr) < 3 { - idx := slices.Index(fileNames, minDisStr) - buf, err = streamKeyVault.GetBytes(filePaths[idx]) - if err != nil { - logger.Warn("read key from mmkv", zap.String("key", minDisStr), zap.Error(err)) - } - } - } } if len(buf) == 0 { @@ -135,3 +123,7 @@ func getDefaultMMKVDir() (string, error) { return mmkvDir, nil } + +func normalizeUnicode(str string) string { + return norm.NFC.String(str) +} diff --git a/go.mod b/go.mod index cbd1ab6..77ed2b1 100644 --- a/go.mod +++ b/go.mod @@ -3,12 +3,12 @@ module unlock-music.dev/cli go 1.19 require ( - github.com/hbollon/go-edlib v1.6.0 github.com/samber/lo v1.36.0 github.com/urfave/cli/v2 v2.23.6 go.uber.org/zap v1.24.0 golang.org/x/crypto v0.3.0 golang.org/x/exp v0.0.0-20221204150635-6dcec336b2bb + golang.org/x/text v0.5.0 unlock-music.dev/mmkv v0.0.0-20221204231432-41a75bd29939 ) diff --git a/go.sum b/go.sum index 4a019ce..f3acf8d 100644 --- a/go.sum +++ b/go.sum @@ -4,14 +4,11 @@ github.com/cpuguy83/go-md2man/v2 v2.0.2/go.mod h1:tgQtvFlXSQOSOSIRvRPT7W67SCa46t github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c= github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= -github.com/golang/protobuf v1.5.0 h1:LUVKkCeviFUMKqHa4tXIIij/lbhnMbP7Fn5wKdKkRh4= github.com/golang/protobuf v1.5.0/go.mod h1:FsONVRAS9T7sI+LIUmWTfcYkHO4aIWwzhcaSAoJOfIk= github.com/golang/protobuf v1.5.2 h1:ROPKBNFfQgOUMifHyP+KYbvpjbdoFNs+aK7DXlji0Tw= github.com/golang/protobuf v1.5.2/go.mod h1:XVQd3VNwM+JqD3oG2Ue2ip4fOMUkwXdXDdiuN0vRsmY= github.com/google/go-cmp v0.5.5/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE= github.com/google/go-cmp v0.5.8 h1:e6P7q2lk1O+qJJb4BtCQXlK8vWEO8V1ZeuEdJNOqZyg= -github.com/hbollon/go-edlib v1.6.0 h1:ga7AwwVIvP8mHm9GsPueC0d71cfRU/52hmPJ7Tprv4E= -github.com/hbollon/go-edlib v1.6.0/go.mod h1:wnt6o6EIVEzUfgbUZY7BerzQ2uvzp354qmS2xaLkrhM= github.com/pkg/errors v0.8.1 h1:iURUrRGxPUNPdy5/HRSm+Yj6okJ6UtLINN0Q9M4+h3I= github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM= github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4= @@ -24,8 +21,6 @@ github.com/stretchr/testify v1.3.0/go.mod h1:M5WIy9Dh21IEIfnGCwXGc5bZfKNJtfHm1UV github.com/stretchr/testify v1.7.0/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg= github.com/stretchr/testify v1.8.1 h1:w7B6lhMri9wdJUVmEZPGGhZzrYTPvgJArz7wNPgYKsk= github.com/thoas/go-funk v0.9.1 h1:O549iLZqPpTUQ10ykd26sZhzD+rmR5pWhuElrhbC20M= -github.com/urfave/cli/v2 v2.23.5 h1:xbrU7tAYviSpqeR3X4nEFWUdB/uDZ6DE+HxmRU7Xtyw= -github.com/urfave/cli/v2 v2.23.5/go.mod h1:GHupkWPMM0M/sj1a2b4wUrWBPzazNrIjouW6fmdJLxc= github.com/urfave/cli/v2 v2.23.6 h1:iWmtKD+prGo1nKUtLO0Wg4z9esfBM4rAV4QRLQiEmJ4= github.com/urfave/cli/v2 v2.23.6/go.mod h1:GHupkWPMM0M/sj1a2b4wUrWBPzazNrIjouW6fmdJLxc= github.com/xrash/smetrics v0.0.0-20201216005158-039620a65673 h1:bAn7/zixMGCfxrRTfdpNzjtPYqr8smhKouy9mxVdGPU= @@ -36,14 +31,16 @@ go.uber.org/atomic v1.10.0/go.mod h1:LUxbIzbOniOlMKjJjyPfpl4v+PKK2cNJn91OQbhoJI0 go.uber.org/goleak v1.1.11 h1:wy28qYRKZgnJTxGxvye5/wgWr1EKjmUDGYox5mGlRlI= go.uber.org/multierr v1.8.0 h1:dg6GjLku4EH+249NNmoIciG9N/jURbDG+pFlTkhzIC8= go.uber.org/multierr v1.8.0/go.mod h1:7EAYxJLBy9rStEaz58O2t4Uvip6FSURkq8/ppBp95ak= -go.uber.org/zap v1.23.0 h1:OjGQ5KQDEUawVHxNwQgPpiypGHOxo2mNZsOqTak4fFY= -go.uber.org/zap v1.23.0/go.mod h1:D+nX8jyLsMHMYrln8A0rJjFt/T/9/bGgIhAqxv5URuY= go.uber.org/zap v1.24.0 h1:FiJd5l1UOLj0wCgbSE0rwwXHzEdAZS6hiiSnxJN/D60= go.uber.org/zap v1.24.0/go.mod h1:2kMP+WWQ8aoFoedH3T2sq6iJ2yDWpHbP0f6MQbS9Gkg= golang.org/x/crypto v0.3.0 h1:a06MkbcxBrEFc0w0QIZWXrH/9cCX6KJyWbBOIwAn+7A= golang.org/x/crypto v0.3.0/go.mod h1:hebNnKkNXi2UzZN1eVRvBB7co0a+JxK6XbPiWVs/3J4= golang.org/x/exp v0.0.0-20221204150635-6dcec336b2bb h1:QIsP/NmClBICkqnJ4rSIhnrGiGR7Yv9ZORGGnmmLTPk= golang.org/x/exp v0.0.0-20221204150635-6dcec336b2bb/go.mod h1:CxIveKay+FTh1D0yPZemJVgC/95VzuuOLq5Qi4xnoYc= +golang.org/x/text v0.4.0 h1:BrVqGRd7+k1DiOgtnFvAkoQEWQvBc25ouMJM6429SFg= +golang.org/x/text v0.4.0/go.mod h1:mrYo+phRRbMaCq/xk9113O4dZlRixOauAjOtrjsXDZ8= +golang.org/x/text v0.5.0 h1:OLmvp0KP+FVG99Ct/qFiL/Fhk4zp4QQnZ7b2U+5piUM= +golang.org/x/text v0.5.0/go.mod h1:mrYo+phRRbMaCq/xk9113O4dZlRixOauAjOtrjsXDZ8= golang.org/x/xerrors v0.0.0-20191204190536-9bdfabe68543/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= google.golang.org/protobuf v1.26.0-rc.1/go.mod h1:jlhhOSvTdKEhbULTjvd4ARK9grFBp09yW+WbY/TyQbw= google.golang.org/protobuf v1.26.0/go.mod h1:9q0QmTI4eRPtz6boOQmLYwt+qCgq0jsYwAQnmE0givc= -- 2.45.2 From 6f033af336026f16512d916703e3de62661162cc Mon Sep 17 00:00:00 2001 From: Unlock Music Dev Date: Mon, 5 Dec 2022 11:08:54 +0800 Subject: [PATCH 2/3] chore: remove unused debug log --- algo/qmc/key_mmkv.go | 7 ------- 1 file changed, 7 deletions(-) diff --git a/algo/qmc/key_mmkv.go b/algo/qmc/key_mmkv.go index 4481039..742b4e2 100644 --- a/algo/qmc/key_mmkv.go +++ b/algo/qmc/key_mmkv.go @@ -60,13 +60,6 @@ func readKeyFromMMKV(file string, logger *zap.Logger) ([]byte, error) { return normalizeUnicode(name) }) - logger.Debug("filename", - zap.String("partName", partName), - zap.Int("partName", len(partName)), - zap.String("fileNames", fileNames[0]), - zap.Int("fileNames", len(fileNames[0])), - ) - for _, key := range fileNames { // fallback: match filename only if key != partName { continue -- 2.45.2 From 12be881d42d9ca961ed37035962716ac7c8515c6 Mon Sep 17 00:00:00 2001 From: Unlock Music Dev Date: Mon, 5 Dec 2022 11:10:40 +0800 Subject: [PATCH 3/3] chore: add accident removed comment --- algo/qmc/key_mmkv.go | 3 +++ 1 file changed, 3 insertions(+) diff --git a/algo/qmc/key_mmkv.go b/algo/qmc/key_mmkv.go index 742b4e2..3dff9eb 100644 --- a/algo/qmc/key_mmkv.go +++ b/algo/qmc/key_mmkv.go @@ -117,6 +117,9 @@ func getDefaultMMKVDir() (string, error) { return mmkvDir, nil } +// normalizeUnicode normalizes unicode string to NFC. +// since macOS may change some characters in the file name. +// e.g. "ぜ"(e3 81 9c) -> "ぜ"(e3 81 9b e3 82 99) func normalizeUnicode(str string) string { return norm.NFC.String(str) } -- 2.45.2