mirror of
https://github.com/dolthub/dolt.git
synced 2026-01-08 00:39:48 -06:00
Clean up usernames from import and limit to top 30
This commit is contained in:
committed by
Dan Willhite
parent
f23cbe5344
commit
cc42196818
@@ -72,21 +72,17 @@ func runImport(dir, dsSpec string) error {
|
||||
|
||||
userpat := regexp.MustCompile(`^[a-zA-Z][a-zA-Z\s]*\d*$`)
|
||||
fmt.Println("Creating users")
|
||||
usermap := map[string]struct{}{}
|
||||
usermap := map[string]int{}
|
||||
outer:
|
||||
for _, msg := range msgs {
|
||||
name := strings.TrimSpace(msg.Author)
|
||||
if !userpat.MatchString(name) {
|
||||
continue outer
|
||||
}
|
||||
usermap[name] = struct{}{}
|
||||
usermap[name] += 1
|
||||
}
|
||||
|
||||
users := []string{}
|
||||
for k, _ := range usermap {
|
||||
users = append(users, k)
|
||||
}
|
||||
sort.Strings(users)
|
||||
users := topUsers(usermap)
|
||||
fmt.Println("Committing data")
|
||||
root := Root{Messages: m, Index: termDocs, Users: users}
|
||||
_, err = ds.Database().CommitValue(ds, marshal.MustMarshal(root))
|
||||
@@ -127,3 +123,30 @@ func characterName(n *html.Node) string {
|
||||
}
|
||||
return strings.TrimSpace(n.FirstChild.Data)
|
||||
}
|
||||
|
||||
type cpair struct {
|
||||
character string
|
||||
cnt int
|
||||
}
|
||||
|
||||
func topUsers(usermap map[string]int) []string {
|
||||
pairs := []cpair{}
|
||||
for name, cnt := range usermap {
|
||||
if len(name) > 1 && !strings.HasPrefix(name, "ANOTHER") {
|
||||
pairs = append(pairs, cpair{character: strings.ToLower(name), cnt: cnt})
|
||||
}
|
||||
}
|
||||
// sort descending by cnt
|
||||
sort.Slice(pairs, func(i, j int) bool {
|
||||
return pairs[j].cnt < pairs[i].cnt
|
||||
})
|
||||
users := []string{}
|
||||
for i, p := range pairs {
|
||||
if i >= 30 {
|
||||
break
|
||||
}
|
||||
users = append(users, p.character)
|
||||
}
|
||||
sort.Strings(users)
|
||||
return users
|
||||
}
|
||||
|
||||
@@ -8,8 +8,6 @@ import (
|
||||
"context"
|
||||
"encoding/base64"
|
||||
|
||||
floodsub "gx/ipfs/QmZdsQf8BiCpAj61nz9NgqVeRUkw9vATvCs7UHFTxoUMDb/floodsub"
|
||||
|
||||
"github.com/attic-labs/noms/go/d"
|
||||
"github.com/attic-labs/noms/go/datas"
|
||||
"github.com/attic-labs/noms/go/hash"
|
||||
@@ -17,6 +15,7 @@ import (
|
||||
"github.com/attic-labs/noms/go/merge"
|
||||
"github.com/attic-labs/noms/go/types"
|
||||
"github.com/attic-labs/noms/samples/go/ipfs-chat/dbg"
|
||||
"gx/ipfs/QmZdsQf8BiCpAj61nz9NgqVeRUkw9vATvCs7UHFTxoUMDb/floodsub"
|
||||
)
|
||||
|
||||
func Replicate(sub *floodsub.Subscription, source, dest datas.Dataset, didChange func(ds datas.Dataset)) {
|
||||
|
||||
Reference in New Issue
Block a user