Merge pull request #9727 from fschade/toggle-unified-roles

Enhancement: Unified Roles Management
This commit is contained in:
Florian Schade
2024-08-27 20:05:27 +02:00
committed by GitHub
67 changed files with 2770 additions and 1667 deletions

View File

@@ -2084,6 +2084,7 @@ def ocisServer(storage, accounts_hash_difficulty = 4, volumes = [], depends_on =
"NATS_NATS_PORT": 9233,
"OCIS_JWT_SECRET": "some-ocis-jwt-secret",
"EVENTHISTORY_STORE": "memory",
"GRAPH_AVAILABLE_ROLES": "b1e2218d-eef8-4d4c-b82d-0f1a1b48f3b5,a8d5fe5e-96e3-418d-825b-534dbdf22b99,fb6c3e19-e378-47e5-b277-9732f9de6e21,58c63c02-1d89-4572-916a-870abc5a1b7d,2d00ce52-1fc2-4dbc-8b95-a73b73395f5a,1c996275-f1c9-4e71-abdf-a42f6495e960,312c0871-5ef7-4b3a-85b6-0e4074c64049,aa97fe03-7980-45ac-9e50-b325749fd7e6",
}
if deploy_type == "":

View File

@@ -0,0 +1,41 @@
Enhancement: Unified Roles Management
Improved management of unified roles with the introduction of default enabled/disabled states and a new command for listing available roles.
It is important to note that a disabled role does not lose previously assigned permissions;
it only means that the role is not available for new assignments.
The following roles are now enabled by default:
- UnifiedRoleViewerID
- UnifiedRoleSpaceViewer
- UnifiedRoleEditor
- UnifiedRoleSpaceEditor
- UnifiedRoleFileEditor
- UnifiedRoleEditorLite
- UnifiedRoleManager
The following roles are now disabled by default:
- UnifiedRoleSecureViewer
To enable the UnifiedRoleSecureViewer role, you must provide a list of all available roles through one of the following methods:
- Using the GRAPH_AVAILABLE_ROLES environment variable.
- Setting the available_roles configuration value.
To enable a role, include the UID of the role in the list of available roles.
A new command has been introduced to simplify the process of finding out which UID belongs to which role. The command is:
```
$ ocis graph list-unified-roles
```
The output of this command includes the following information for each role:
- uid: The unique identifier of the role.
- Description: A short description of the role.
- Enabled: Whether the role is enabled or not.
https://github.com/owncloud/ocis/pull/9727
https://github.com/owncloud/ocis/issues/9698

6
go.mod
View File

@@ -263,8 +263,8 @@ require (
github.com/longsleep/rndm v1.2.0 // indirect
github.com/mattermost/xml-roundtrip-validator v0.1.0 // indirect
github.com/mattn/go-colorable v0.1.13 // indirect
github.com/mattn/go-isatty v0.0.19 // indirect
github.com/mattn/go-runewidth v0.0.13 // indirect
github.com/mattn/go-isatty v0.0.20 // indirect
github.com/mattn/go-runewidth v0.0.15 // indirect
github.com/mattn/go-sqlite3 v1.14.22 // indirect
github.com/maxymania/go-system v0.0.0-20170110133659-647cc364bf0b // indirect
github.com/mendsley/gojwk v0.0.0-20141217222730-4d5ec6e58103 // indirect
@@ -303,7 +303,7 @@ require (
github.com/prometheus/procfs v0.15.1 // indirect
github.com/prometheus/statsd_exporter v0.22.8 // indirect
github.com/rcrowley/go-metrics v0.0.0-20200313005456-10cdbea86bc0 // indirect
github.com/rivo/uniseg v0.4.2 // indirect
github.com/rivo/uniseg v0.4.7 // indirect
github.com/rs/xid v1.5.0 // indirect
github.com/russellhaering/goxmldsig v1.4.0 // indirect
github.com/russross/blackfriday/v2 v2.1.0 // indirect

11
go.sum
View File

@@ -818,13 +818,14 @@ github.com/mattn/go-isatty v0.0.11/go.mod h1:PhnuNfih5lzO57/f3n+odYbM4JtupLOxQOA
github.com/mattn/go-isatty v0.0.12/go.mod h1:cbi8OIDigv2wuxKPP5vlRcQ1OAZbq2CE4Kysco4FUpU=
github.com/mattn/go-isatty v0.0.14/go.mod h1:7GGIvUiUoEMVVmxf/4nioHXj79iQHKdU27kJ6hsGG94=
github.com/mattn/go-isatty v0.0.16/go.mod h1:kYGgaQfpe5nmfYZH+SKPsOc2e4SrIfOl2e/yFXSvRLM=
github.com/mattn/go-isatty v0.0.19 h1:JITubQf0MOLdlGRuRq+jtsDlekdYPia9ZFsB8h/APPA=
github.com/mattn/go-isatty v0.0.19/go.mod h1:W+V8PltTTMOvKvAeJH7IuucS94S2C6jfK/D7dTCTo3Y=
github.com/mattn/go-isatty v0.0.20 h1:xfD0iDuEKnDkl03q4limB+vH+GxLEtL/jb4xVJSWWEY=
github.com/mattn/go-isatty v0.0.20/go.mod h1:W+V8PltTTMOvKvAeJH7IuucS94S2C6jfK/D7dTCTo3Y=
github.com/mattn/go-runewidth v0.0.2/go.mod h1:LwmH8dsx7+W8Uxz3IHJYH5QSwggIsqBzpuz5H//U1FU=
github.com/mattn/go-runewidth v0.0.6/go.mod h1:H031xJmbD/WCDINGzjvQ9THkh0rPKHF+m2gUSrubnMI=
github.com/mattn/go-runewidth v0.0.9/go.mod h1:H031xJmbD/WCDINGzjvQ9THkh0rPKHF+m2gUSrubnMI=
github.com/mattn/go-runewidth v0.0.13 h1:lTGmDsbAYt5DmK6OnoV7EuIF1wEIFAcxld6ypU4OSgU=
github.com/mattn/go-runewidth v0.0.13/go.mod h1:Jdepj2loyihRzMpdS35Xk/zdY8IAYHsh153qUoGf23w=
github.com/mattn/go-runewidth v0.0.15 h1:UNAjwbU9l54TA3KzvqLGxwWjHmMgBUVhBiTjelZgg3U=
github.com/mattn/go-runewidth v0.0.15/go.mod h1:Jdepj2loyihRzMpdS35Xk/zdY8IAYHsh153qUoGf23w=
github.com/mattn/go-sqlite3 v1.14.22 h1:2gZY6PC6kBnID23Tichd1K+Z0oS6nE/XwU+Vz/5o4kU=
github.com/mattn/go-sqlite3 v1.14.22/go.mod h1:Uh1q+B4BYcTPb+yiD3kU8Ct7aC0hY9fxUwlHK0RXw+Y=
github.com/mattn/go-tty v0.0.0-20180219170247-931426f7535a/go.mod h1:XPvLUNfbS4fJH25nqRHfWLMa1ONC8Amw+mIA639KxkE=
@@ -1042,8 +1043,8 @@ github.com/rcrowley/go-metrics v0.0.0-20200313005456-10cdbea86bc0/go.mod h1:bCqn
github.com/riandyrn/otelchi v0.9.0 h1:BuQxXR7/JF2yYOQl21Yyz5d52hns/96ecAaPUZiKQzc=
github.com/riandyrn/otelchi v0.9.0/go.mod h1:iX30kllzThsf8oEcEbl3GifPJZtN4cnCWUUc+UhE4yM=
github.com/rivo/uniseg v0.2.0/go.mod h1:J6wj4VEh+S6ZtnVlnTBMWIodfgj8LQOQFoIToxlJtxc=
github.com/rivo/uniseg v0.4.2 h1:YwD0ulJSJytLpiaWua0sBDusfsCZohxjxzVTYjwxfV8=
github.com/rivo/uniseg v0.4.2/go.mod h1:FN3SvrM+Zdj16jyLfmOkMNblXMcoc8DfTHruCPUcx88=
github.com/rivo/uniseg v0.4.7 h1:WUdvkW8uEhrYfLC4ZzdpI2ztxP1I582+49Oc5Mq64VQ=
github.com/rivo/uniseg v0.4.7/go.mod h1:FN3SvrM+Zdj16jyLfmOkMNblXMcoc8DfTHruCPUcx88=
github.com/rogpeppe/fastuuid v0.0.0-20150106093220-6724a57986af/go.mod h1:XWv6SoW27p1b0cqNHllgS5HIMJraePCO15w5zCzIWYg=
github.com/rogpeppe/go-internal v1.3.0/go.mod h1:M8bDsm7K2OlrFYOpmOWEs/qY81heoFRclV5y23lUDJ4=
github.com/rogpeppe/go-internal v1.6.1/go.mod h1:xXDCJY+GAPziupqXw64V24skbSoqbTEfhy4qGm1nDQc=

View File

@@ -45,7 +45,7 @@ l10n-push:
.PHONY: l10n-read
l10n-read: $(GO_XGETTEXT)
go-xgettext -o $(OUTPUT_DIR)/graph.pot --keyword=l10n.Template --add-comments -s pkg/service/v0/spacetemplates.go -s pkg/unifiedrole/unifiedrole.go
go-xgettext -o $(OUTPUT_DIR)/graph.pot --keyword=l10n.Template --add-comments -s pkg/service/v0/spacetemplates.go -s pkg/unifiedrole/roles.go
.PHONY: l10n-write
l10n-write:

View File

@@ -5,13 +5,14 @@ import (
"github.com/owncloud/ocis/v2/ocis-pkg/clihelper"
"github.com/owncloud/ocis/v2/services/graph/pkg/config"
"github.com/urfave/cli/v2"
"github.com/owncloud/ocis/v2/services/graph/pkg/config"
)
// GetCommands provides all commands for this service
func GetCommands(cfg *config.Config) cli.Commands {
return []*cli.Command{
return append([]*cli.Command{
// start this service
Server(cfg),
@@ -20,7 +21,7 @@ func GetCommands(cfg *config.Config) cli.Commands {
// infos about this service
Health(cfg),
Version(cfg),
}
}, UnifiedRoles(cfg)...)
}
// Execute is the entry point for the ocis-graph command.

View File

@@ -0,0 +1,80 @@
package command
import (
"os"
"slices"
"strings"
"github.com/olekukonko/tablewriter"
"github.com/urfave/cli/v2"
"github.com/owncloud/ocis/v2/ocis-pkg/config/configlog"
"github.com/owncloud/ocis/v2/services/graph/pkg/config"
"github.com/owncloud/ocis/v2/services/graph/pkg/config/parser"
"github.com/owncloud/ocis/v2/services/graph/pkg/unifiedrole"
)
// UnifiedRoles bundles available commands for unified roles
func UnifiedRoles(cfg *config.Config) cli.Commands {
cmds := cli.Commands{
listUnifiedRoles(cfg),
}
for _, cmd := range cmds {
cmd.Category = "unified-roles"
cmd.Name = strings.Join([]string{cmd.Name, "unified-roles"}, "-")
cmd.Before = func(c *cli.Context) error {
return configlog.ReturnError(parser.ParseConfig(cfg))
}
}
return cmds
}
// unifiedRolesStatus lists available unified roles, it contains an indicator to show if the role is enabled or not
func listUnifiedRoles(cfg *config.Config) *cli.Command {
return &cli.Command{
Name: "list",
Usage: "list available unified roles",
Action: func(c *cli.Context) error {
tbl := tablewriter.NewWriter(os.Stdout)
tbl.SetRowLine(true)
tbl.SetAutoMergeCellsByColumnIndex([]int{0}) // rowspan should only affect the first column
headers := []string{"UID", "Enabled", "Description", "Condition", "Allowed resource actions"}
tbl.SetHeader(headers)
for _, definition := range unifiedrole.GetRoles(unifiedrole.RoleFilterAll()) {
const enabled = "enabled"
const disabled = "disabled"
rows := [][]string{
{definition.GetId(), disabled, definition.GetDescription()},
}
if slices.Contains(cfg.UnifiedRoles.AvailableRoles, definition.GetId()) {
rows[0][1] = enabled
}
for i, rolePermission := range definition.GetRolePermissions() {
actions := strings.Join(rolePermission.GetAllowedResourceActions(), "\n")
row := []string{rolePermission.GetCondition(), actions}
switch i {
case 0:
rows[0] = append(rows[0], row...)
default:
rows = append(rows, append(slices.Clone(rows[0][:len(rows[0])-len(row)]), row...))
}
}
for _, row := range rows {
// balance the row before adding it to the table,
// this prevents the row from having empty columns.
tbl.Append(append(row, make([]string, len(headers)-len(row))...))
}
}
tbl.Render()
return nil
},
}
}

View File

@@ -25,11 +25,12 @@ type Config struct {
TokenManager *TokenManager `yaml:"token_manager"`
GRPCClientTLS *shared.GRPCClientTLS `yaml:"grpc_client_tls"`
Application Application `yaml:"application"`
Spaces Spaces `yaml:"spaces"`
Identity Identity `yaml:"identity"`
IncludeOCMSharees bool `yaml:"include_ocm_sharees" env:"OCIS_ENABLE_OCM;GRAPH_INCLUDE_OCM_SHAREES" desc:"Include OCM sharees when listing users." introductionVersion:"5.0"`
Events Events `yaml:"events"`
Application Application `yaml:"application"`
Spaces Spaces `yaml:"spaces"`
Identity Identity `yaml:"identity"`
IncludeOCMSharees bool `yaml:"include_ocm_sharees" env:"OCIS_ENABLE_OCM;GRAPH_INCLUDE_OCM_SHAREES" desc:"Include OCM sharees when listing users." introductionVersion:"5.0"`
Events Events `yaml:"events"`
UnifiedRoles UnifiedRoles `yaml:"unified_roles"`
Keycloak Keycloak `yaml:"keycloak"`
ServiceAccount ServiceAccount `yaml:"service_account"`

View File

@@ -9,6 +9,13 @@ import (
"github.com/owncloud/ocis/v2/ocis-pkg/shared"
"github.com/owncloud/ocis/v2/ocis-pkg/structs"
"github.com/owncloud/ocis/v2/services/graph/pkg/config"
"github.com/owncloud/ocis/v2/services/graph/pkg/unifiedrole"
)
var (
// _disabledByDefaultUnifiedRoleRoleIDs contains all roles that are not enabled by default,
// but can be enabled by the user.
_disabledByDefaultUnifiedRoleRoleIDs = []string{unifiedrole.UnifiedRoleSecureViewerID}
)
// FullDefaultConfig returns a fully initialized default configuration
@@ -107,6 +114,9 @@ func DefaultConfig() *config.Config {
Cluster: "ocis-cluster",
EnableTLS: false,
},
UnifiedRoles: config.UnifiedRoles{
AvailableRoles: nil, // will be populated with defaults in EnsureDefaults
},
}
}
@@ -164,6 +174,16 @@ func EnsureDefaults(cfg *config.Config) {
if cfg.Identity.LDAP.GroupCreateBaseDN == "" {
cfg.Identity.LDAP.GroupCreateBaseDN = cfg.Identity.LDAP.GroupBaseDN
}
// set default roles, if no roles are defined, we need to take care and provide all the default roles
if len(cfg.UnifiedRoles.AvailableRoles) == 0 {
for _, definition := range unifiedrole.GetRoles(
// filter out the roles that are disabled by default
unifiedrole.RoleFilterInvert(unifiedrole.RoleFilterIDs(_disabledByDefaultUnifiedRoleRoleIDs...)),
) {
cfg.UnifiedRoles.AvailableRoles = append(cfg.UnifiedRoles.AvailableRoles, definition.GetId())
}
}
}
// Sanitize sanitized the configuration

View File

@@ -5,13 +5,14 @@ import (
"fmt"
"github.com/go-ldap/ldap/v3"
ociscfg "github.com/owncloud/ocis/v2/ocis-pkg/config"
defaults2 "github.com/owncloud/ocis/v2/ocis-pkg/config/defaults"
"github.com/owncloud/ocis/v2/ocis-pkg/config/envdecode"
"github.com/owncloud/ocis/v2/ocis-pkg/shared"
"github.com/owncloud/ocis/v2/services/graph/pkg/config"
"github.com/owncloud/ocis/v2/services/graph/pkg/config/defaults"
"github.com/owncloud/ocis/v2/ocis-pkg/config/envdecode"
"github.com/owncloud/ocis/v2/services/graph/pkg/unifiedrole"
)
// ParseConfig loads configuration from known paths.
@@ -72,6 +73,23 @@ func Validate(cfg *config.Config) error {
return shared.MissingServiceAccountSecret(cfg.Service.Name)
}
// validate unified roles
{
var err error
for _, uid := range cfg.UnifiedRoles.AvailableRoles {
// check if the role is known
if len(unifiedrole.GetRoles(unifiedrole.RoleFilterIDs(uid))) == 0 {
// collect all possible errors to return them all at once
err = errors.Join(err, fmt.Errorf("%w: %s", unifiedrole.ErrUnknownRole, uid))
}
}
if err != nil {
return err
}
}
return nil
}

View File

@@ -0,0 +1,6 @@
package config
// UnifiedRoles contains all settings related to unified roles.
type UnifiedRoles struct {
AvailableRoles []string `yaml:"available_roles" env:"GRAPH_AVAILABLE_ROLES" desc:"A list of roles that are available for assignment." introductionVersion:"%%NEXT%%"`
}

View File

@@ -7,6 +7,7 @@ import (
provider "github.com/cs3org/go-cs3apis/cs3/storage/provider/v1beta1"
"github.com/cs3org/reva/v2/pkg/storage/utils/grants"
libregraph "github.com/owncloud/libre-graph-api-go"
"github.com/owncloud/ocis/v2/services/graph/pkg/unifiedrole"
)

View File

@@ -25,6 +25,7 @@ import (
"github.com/go-chi/chi/v5"
"github.com/go-chi/render"
libregraph "github.com/owncloud/libre-graph-api-go"
"github.com/owncloud/ocis/v2/ocis-pkg/l10n"
l10n_pkg "github.com/owncloud/ocis/v2/services/graph/pkg/l10n"
@@ -104,7 +105,12 @@ func (s DriveItemPermissionsService) Invite(ctx context.Context, resourceId *sto
unifiedRolePermissions := []*libregraph.UnifiedRolePermission{{AllowedResourceActions: invite.LibreGraphPermissionsActions}}
for _, roleID := range invite.GetRoles() {
role, err := unifiedrole.NewUnifiedRoleFromID(roleID)
// only allow roles that are enabled in the config
if !slices.Contains(s.config.UnifiedRoles.AvailableRoles, roleID) {
return libregraph.Permission{}, unifiedrole.ErrUnknownRole
}
role, err := unifiedrole.GetRole(unifiedrole.RoleFilterIDs(roleID))
if err != nil {
s.logger.Debug().Err(err).Interface("role", invite.GetRoles()[0]).Msg("unable to convert requested role")
return libregraph.Permission{}, err
@@ -124,7 +130,8 @@ func (s DriveItemPermissionsService) Invite(ctx context.Context, resourceId *sto
cs3ResourcePermissions := unifiedrole.PermissionsToCS3ResourcePermissions(unifiedRolePermissions)
permission := &libregraph.Permission{}
if role := unifiedrole.CS3ResourcePermissionsToUnifiedRole(cs3ResourcePermissions, condition, false); role != nil {
availableRoles := unifiedrole.GetRoles(unifiedrole.RoleFilterIDs(s.config.UnifiedRoles.AvailableRoles...))
if role := unifiedrole.CS3ResourcePermissionsToRole(availableRoles, cs3ResourcePermissions, condition, false); role != nil {
permission.Roles = []string{role.GetId()}
}
@@ -345,7 +352,8 @@ func (s DriveItemPermissionsService) ListPermissions(ctx context.Context, itemID
collectionOfPermissions = libregraph.CollectionOfPermissionsWithAllowedValues{
LibreGraphPermissionsActionsAllowedValues: allowedActions,
LibreGraphPermissionsRolesAllowedValues: conversions.ToValueSlice(
unifiedrole.GetApplicableRoleDefinitionsForActions(
unifiedrole.GetRolesByPermissions(
unifiedrole.GetRoles(unifiedrole.RoleFilterIDs(s.config.UnifiedRoles.AvailableRoles...)),
allowedActions,
condition,
listFederatedRoles,
@@ -555,18 +563,20 @@ func (s DriveItemPermissionsService) UpdateSpaceRootPermission(ctx context.Conte
return s.UpdatePermission(ctx, rootResourceID, permissionID, newPermission)
}
// DriveItemPermissionsService is the api that registers the http endpoints which expose needed operation to the graph api.
// DriveItemPermissionsApi is the api that registers the http endpoints which expose needed operation to the graph api.
// the business logic is delegated to the permissions service and further down to the cs3 client.
type DriveItemPermissionsApi struct {
logger log.Logger
driveItemPermissionsService DriveItemPermissionsProvider
config *config.Config
}
// NewDriveItemPermissionsApi creates a new DriveItemPermissionsApi
func NewDriveItemPermissionsApi(driveItemPermissionService DriveItemPermissionsProvider, logger log.Logger) (DriveItemPermissionsApi, error) {
func NewDriveItemPermissionsApi(driveItemPermissionService DriveItemPermissionsProvider, logger log.Logger, c *config.Config) (DriveItemPermissionsApi, error) {
return DriveItemPermissionsApi{
logger: log.Logger{Logger: logger.With().Str("graph api", "DrivesDriveItemApi").Logger()},
driveItemPermissionsService: driveItemPermissionService,
config: c,
}, nil
}
@@ -586,15 +596,14 @@ func (api DriveItemPermissionsApi) Invite(w http.ResponseWriter, r *http.Request
return
}
ctx := r.Context()
ctx := validate.ContextWithAllowedRoleIDs(r.Context(), api.config.UnifiedRoles.AvailableRoles)
if err = validate.StructCtx(ctx, driveItemInvite); err != nil {
api.logger.Debug().Err(err).Interface("Body", r.Body).Msg("invalid request body")
errorcode.InvalidRequest.Render(w, r, http.StatusBadRequest, err.Error())
return
}
permission, err := api.driveItemPermissionsService.Invite(ctx, &itemID, *driveItemInvite)
permission, err := api.driveItemPermissionsService.Invite(ctx, &itemID, *driveItemInvite)
if err != nil {
errorcode.RenderError(w, r, err)
return
@@ -620,7 +629,7 @@ func (api DriveItemPermissionsApi) SpaceRootInvite(w http.ResponseWriter, r *htt
return
}
ctx := r.Context()
ctx := validate.ContextWithAllowedRoleIDs(r.Context(), api.config.UnifiedRoles.AvailableRoles)
if err = validate.StructCtx(ctx, driveItemInvite); err != nil {
api.logger.Debug().Err(err).Interface("Body", r.Body).Msg("invalid request body")
errorcode.InvalidRequest.Render(w, r, http.StatusBadRequest, err.Error())

View File

@@ -7,6 +7,7 @@ import (
"errors"
"net/http"
"net/http/httptest"
"slices"
"time"
gateway "github.com/cs3org/go-cs3apis/cs3/gateway/v1beta1"
@@ -54,6 +55,7 @@ var _ = Describe("DriveItemPermissionsService", func() {
OpaqueId: "user",
},
}
cache identity.IdentityCache
statResponse *provider.StatResponse
driveItemId *provider.ResourceId
ctx context.Context
@@ -66,7 +68,7 @@ var _ = Describe("DriveItemPermissionsService", func() {
gatewaySelector = mocks.NewSelectable[gateway.GatewayAPIClient](GinkgoT())
gatewaySelector.On("Next").Return(gatewayClient, nil)
cache := identity.NewIdentityCache(identity.IdentityCacheWithGatewaySelector(gatewaySelector))
cache = identity.NewIdentityCache(identity.IdentityCacheWithGatewaySelector(gatewaySelector))
cfg := defaults.FullDefaultConfig()
service, err := svc.NewDriveItemPermissionsService(logger, gatewaySelector, cache, cfg)
@@ -169,13 +171,13 @@ var _ = Describe("DriveItemPermissionsService", func() {
driveItemInvite.Recipients = []libregraph.DriveRecipient{
{ObjectId: libregraph.PtrString("1"), LibreGraphRecipientType: libregraph.PtrString("user")},
}
driveItemInvite.Roles = []string{unifiedrole.NewViewerUnifiedRole().GetId()}
driveItemInvite.Roles = []string{unifiedrole.UnifiedRoleViewerID}
permission, err := driveItemPermissionsService.Invite(context.Background(), driveItemId, driveItemInvite)
Expect(err).ToNot(HaveOccurred())
Expect(permission.GetRoles()).To(HaveLen(1))
Expect(permission.GetRoles()[0]).To(Equal(unifiedrole.NewViewerUnifiedRole().GetId()))
Expect(permission.GetRoles()[0]).To(Equal(unifiedrole.UnifiedRoleViewerID))
})
It("succeeds with folder roles (happy path)", func() {
@@ -185,20 +187,20 @@ var _ = Describe("DriveItemPermissionsService", func() {
driveItemInvite.Recipients = []libregraph.DriveRecipient{
{ObjectId: libregraph.PtrString("1"), LibreGraphRecipientType: libregraph.PtrString("user")},
}
driveItemInvite.Roles = []string{unifiedrole.NewEditorUnifiedRole().GetId()}
driveItemInvite.Roles = []string{unifiedrole.UnifiedRoleEditorID}
permission, err := driveItemPermissionsService.Invite(context.Background(), driveItemId, driveItemInvite)
Expect(err).ToNot(HaveOccurred())
Expect(permission.GetRoles()).To(HaveLen(1))
Expect(permission.GetRoles()[0]).To(Equal(unifiedrole.NewEditorUnifiedRole().GetId()))
Expect(permission.GetRoles()[0]).To(Equal(unifiedrole.UnifiedRoleEditorID))
})
It("fails with when trying to set a space role on a file", func() {
driveItemInvite.Recipients = []libregraph.DriveRecipient{
{ObjectId: libregraph.PtrString("1"), LibreGraphRecipientType: libregraph.PtrString("user")},
}
driveItemInvite.Roles = []string{unifiedrole.NewManagerUnifiedRole().GetId()}
driveItemInvite.Roles = []string{unifiedrole.UnifiedRoleManagerID}
permission, err := driveItemPermissionsService.Invite(context.Background(), driveItemId, driveItemInvite)
Expect(err).To(MatchError(errorcode.New(errorcode.InvalidRequest, "role not applicable to this resource")))
@@ -209,7 +211,7 @@ var _ = Describe("DriveItemPermissionsService", func() {
driveItemInvite.Recipients = []libregraph.DriveRecipient{
{ObjectId: libregraph.PtrString("1"), LibreGraphRecipientType: libregraph.PtrString("user")},
}
driveItemInvite.Roles = []string{unifiedrole.NewEditorUnifiedRole().GetId()}
driveItemInvite.Roles = []string{unifiedrole.UnifiedRoleEditorID}
permission, err := driveItemPermissionsService.Invite(context.Background(), driveItemId, driveItemInvite)
Expect(err).To(MatchError(errorcode.New(errorcode.InvalidRequest, "role not applicable to this resource")))
@@ -221,7 +223,7 @@ var _ = Describe("DriveItemPermissionsService", func() {
driveItemInvite.Recipients = []libregraph.DriveRecipient{
{ObjectId: libregraph.PtrString("1"), LibreGraphRecipientType: libregraph.PtrString("user")},
}
driveItemInvite.Roles = []string{unifiedrole.NewFileEditorUnifiedRole().GetId()}
driveItemInvite.Roles = []string{unifiedrole.UnifiedRoleFileEditorID}
permission, err := driveItemPermissionsService.Invite(context.Background(), driveItemId, driveItemInvite)
Expect(err).To(MatchError(errorcode.New(errorcode.InvalidRequest, "role not applicable to this resource")))
@@ -245,6 +247,7 @@ var _ = Describe("DriveItemPermissionsService", func() {
Expect(permission.GetLibreGraphPermissionsActions()).To(HaveLen(1))
Expect(permission.GetLibreGraphPermissionsActions()[0]).To(Equal(unifiedrole.DriveItemContentRead))
})
It("fails with a missing driveritem", func() {
statResponse.Status = status.NewNotFound(context.Background(), "not found")
permission, err := driveItemPermissionsService.Invite(context.Background(), driveItemId, driveItemInvite)
@@ -252,6 +255,20 @@ var _ = Describe("DriveItemPermissionsService", func() {
Expect(err).To(MatchError(errorcode.New(errorcode.ItemNotFound, "not found").WithOrigin(errorcode.ErrorOriginCS3)))
Expect(permission).To(BeZero())
})
It("fails with unknown or disable role", func() {
cfg := defaults.FullDefaultConfig()
slices.DeleteFunc(cfg.UnifiedRoles.AvailableRoles, func(s string) bool {
// SecureViewer is enabled in ci, we need to remove it in the unit test
return s != unifiedrole.UnifiedRoleSecureViewerID
})
service, err := svc.NewDriveItemPermissionsService(log.NewLogger(), gatewaySelector, cache, cfg)
Expect(err).ToNot(HaveOccurred())
driveItemInvite.Roles = []string{unifiedrole.UnifiedRoleViewerID, unifiedrole.UnifiedRoleSecureViewerID}
_, err = service.Invite(context.Background(), driveItemId, driveItemInvite)
Expect(err).To(MatchError(unifiedrole.ErrUnknownRole))
})
})
Describe("SpaceRootInvite", func() {
var (
@@ -829,7 +846,7 @@ var _ = Describe("DriveItemPermissionsService", func() {
gatewayClient.On("GetUser", mock.Anything, mock.Anything).Return(getUserResponse, nil)
driveItemPermission.SetRoles([]string{unifiedrole.NewSpaceViewerUnifiedRole().GetId()})
driveItemPermission.SetRoles([]string{unifiedrole.UnifiedRoleSpaceViewerID})
res, err := driveItemPermissionsService.UpdatePermission(context.Background(), driveItemId, "permissionid", driveItemPermission)
Expect(err).To(MatchError(errorcode.New(errorcode.InvalidRequest, "role not applicable to this resource")))
Expect(res).To(BeZero())
@@ -846,7 +863,7 @@ var _ = Describe("DriveItemPermissionsService", func() {
gatewayClient.On("Stat", mock.Anything, mock.Anything).Return(statResponse, nil)
gatewayClient.On("GetUser", mock.Anything, mock.Anything).Return(getUserResponse, nil)
driveItemPermission.SetRoles([]string{unifiedrole.NewFileEditorUnifiedRole().GetId()})
driveItemPermission.SetRoles([]string{unifiedrole.UnifiedRoleFileEditorID})
spaceId := &provider.ResourceId{
StorageId: "1",
SpaceId: "2",
@@ -1015,7 +1032,7 @@ var _ = Describe("DriveItemPermissionsApi", func() {
logger := log.NewLogger()
mockProvider = mocks.NewDriveItemPermissionsProvider(GinkgoT())
api, err := svc.NewDriveItemPermissionsApi(mockProvider, logger)
api, err := svc.NewDriveItemPermissionsApi(mockProvider, logger, defaults.FullDefaultConfig())
Expect(err).ToNot(HaveOccurred())
httpAPI = api
@@ -1029,7 +1046,7 @@ var _ = Describe("DriveItemPermissionsApi", func() {
ObjectId: libregraph.PtrString("1"),
LibreGraphRecipientType: libregraph.PtrString("user")},
},
Roles: []string{unifiedrole.NewViewerUnifiedRole().GetId()},
Roles: []string{unifiedrole.UnifiedRoleViewerID},
}
})
@@ -1097,6 +1114,22 @@ var _ = Describe("DriveItemPermissionsApi", func() {
Expect(responseRecorder.Code).To(Equal(http.StatusOK))
})
It("fails with unknown or disable role", func() {
rCTX.URLParams.Add("itemID", "1$2!3")
responseRecorder := httptest.NewRecorder()
invite.Roles = []string{unifiedrole.UnifiedRoleViewerID, unifiedrole.UnifiedRoleSecureViewerID}
inviteJson, err := json.Marshal(invite)
Expect(err).ToNot(HaveOccurred())
request := httptest.NewRequest(http.MethodPost, "/", bytes.NewBuffer(inviteJson)).
WithContext(
context.WithValue(context.Background(), chi.RouteCtxKey, rCTX),
)
httpAPI.Invite(responseRecorder, request)
Expect(responseRecorder.Code).To(Equal(http.StatusBadRequest))
})
})
Describe("SpaceRootInvite", func() {
It("call the Invite provider with the correct arguments", func() {
@@ -1132,6 +1165,21 @@ var _ = Describe("DriveItemPermissionsApi", func() {
Expect(responseRecorder.Code).To(Equal(http.StatusUnprocessableEntity))
})
It("fails with unknown or disable role", func() {
responseRecorder := httptest.NewRecorder()
invite.Roles = []string{unifiedrole.UnifiedRoleViewerID, unifiedrole.UnifiedRoleSecureViewerID}
inviteJson, err := json.Marshal(invite)
Expect(err).ToNot(HaveOccurred())
request := httptest.NewRequest(http.MethodPost, "/", bytes.NewBuffer(inviteJson)).
WithContext(
context.WithValue(context.Background(), chi.RouteCtxKey, rCTX),
)
httpAPI.SpaceRootInvite(responseRecorder, request)
Expect(responseRecorder.Code).To(Equal(http.StatusBadRequest))
})
})
Describe("ListPermissions", func() {
It("calls the ListPermissions provider with the correct arguments", func() {

View File

@@ -85,7 +85,8 @@ func (g BaseGraphService) CS3ReceivedSharesToDriveItems(ctx context.Context, rec
return nil, err
}
return cs3ReceivedSharesToDriveItems(ctx, g.logger, gatewayClient, g.identityCache, receivedShares)
availableRoles := unifiedrole.GetRoles(unifiedrole.RoleFilterIDs(g.config.UnifiedRoles.AvailableRoles...))
return cs3ReceivedSharesToDriveItems(ctx, g.logger, gatewayClient, g.identityCache, receivedShares, availableRoles)
}
func (g BaseGraphService) CS3ReceivedOCMSharesToDriveItems(ctx context.Context, receivedShares []*ocm.ReceivedShare) ([]libregraph.DriveItem, error) {
@@ -94,7 +95,8 @@ func (g BaseGraphService) CS3ReceivedOCMSharesToDriveItems(ctx context.Context,
return nil, err
}
return cs3ReceivedOCMSharesToDriveItems(ctx, g.logger, gatewayClient, g.identityCache, receivedShares)
availableRoles := unifiedrole.GetRoles(unifiedrole.RoleFilterIDs(g.config.UnifiedRoles.AvailableRoles...))
return cs3ReceivedOCMSharesToDriveItems(ctx, g.logger, gatewayClient, g.identityCache, receivedShares, availableRoles)
}
func (g BaseGraphService) cs3SpacePermissionsToLibreGraph(ctx context.Context, space *storageprovider.StorageSpace, apiVersion APIVersion) []libregraph.Permission {
@@ -191,10 +193,16 @@ func (g BaseGraphService) cs3SpacePermissionsToLibreGraph(ctx context.Context, s
p.SetExpirationDateTime(time.Unix(int64(exp.GetSeconds()), int64(exp.GetNanos())))
}
if role := unifiedrole.CS3ResourcePermissionsToUnifiedRole(perm, unifiedrole.UnifiedRoleConditionDrive, false); role != nil {
availableRoles := unifiedrole.GetRoles(unifiedrole.RoleFilterIDs(g.config.UnifiedRoles.AvailableRoles...))
if role := unifiedrole.CS3ResourcePermissionsToRole(
availableRoles,
perm,
unifiedrole.UnifiedRoleConditionDrive,
false,
); role != nil {
switch apiVersion {
case APIVersion_1:
if r := unifiedrole.GetLegacyName(*role); r != "" {
if r := unifiedrole.GetLegacyRoleName(*role); r != "" {
p.SetRoles([]string{r})
}
case APIVersion_1_Beta_1:
@@ -202,6 +210,12 @@ func (g BaseGraphService) cs3SpacePermissionsToLibreGraph(ctx context.Context, s
}
}
// if there is no role, we need to set the actions as a fallback
// this could happen if a role is disabled or unknown
if !p.HasRoles() {
p.SetLibreGraphPermissionsActions(unifiedrole.CS3ResourcePermissionsToLibregraphActions(perm))
}
permissions = append(permissions, p)
}
return permissions
@@ -456,7 +470,8 @@ func (g BaseGraphService) cs3UserShareToPermission(ctx context.Context, share *c
if share.GetCtime() != nil {
perm.SetCreatedDateTime(cs3TimestampToTime(share.GetCtime()))
}
role := unifiedrole.CS3ResourcePermissionsToUnifiedRole(
role := unifiedrole.CS3ResourcePermissionsToRole(
unifiedrole.GetRoles(unifiedrole.RoleFilterIDs(g.config.UnifiedRoles.AvailableRoles...)),
share.GetPermissions().GetPermissions(),
roleCondition,
false,
@@ -540,7 +555,9 @@ func (g BaseGraphService) cs3OCMShareToPermission(ctx context.Context, share *oc
}
}
role := unifiedrole.CS3ResourcePermissionsToUnifiedRole(
availableRoles := unifiedrole.GetRoles(unifiedrole.RoleFilterIDs(g.config.UnifiedRoles.AvailableRoles...))
role := unifiedrole.CS3ResourcePermissionsToRole(
availableRoles,
permissions,
roleCondition,
true,
@@ -844,7 +861,7 @@ func (g BaseGraphService) updateUserShare(ctx context.Context, permissionID stri
var permissionsUpdated, ok bool
if roles, ok = newPermission.GetRolesOk(); ok && len(roles) > 0 {
for _, roleID := range roles {
role, err := unifiedrole.NewUnifiedRoleFromID(roleID)
role, err := unifiedrole.GetRole(unifiedrole.RoleFilterIDs(roleID))
if err != nil {
g.logger.Debug().Err(err).Interface("role", role).Msg("unable to convert requested role")
return nil, err

View File

@@ -0,0 +1,5 @@
package svc
var (
CS3ReceivedShareToLibreGraphPermissions = cs3ReceivedShareToLibreGraphPermissions
)

View File

@@ -1,13 +1,12 @@
package svc
import (
"fmt"
"net/http"
"net/url"
"github.com/go-chi/chi/v5"
"github.com/go-chi/render"
libregraph "github.com/owncloud/libre-graph-api-go"
"github.com/owncloud/ocis/v2/services/graph/pkg/errorcode"
"github.com/owncloud/ocis/v2/services/graph/pkg/unifiedrole"
)
@@ -15,7 +14,7 @@ import (
// GetRoleDefinitions a list of permission roles than can be used when sharing with users or groups
func (g Graph) GetRoleDefinitions(w http.ResponseWriter, r *http.Request) {
render.Status(r, http.StatusOK)
render.JSON(w, r, unifiedrole.GetBuiltinRoleDefinitionList())
render.JSON(w, r, unifiedrole.GetRoles(unifiedrole.RoleFilterIDs(g.config.UnifiedRoles.AvailableRoles...)))
}
// GetRoleDefinition a permission role than can be used when sharing with users or groups
@@ -27,7 +26,7 @@ func (g Graph) GetRoleDefinition(w http.ResponseWriter, r *http.Request) {
errorcode.InvalidRequest.Render(w, r, http.StatusBadRequest, "unescaping role id failed")
return
}
role, err := getRoleDefinition(roleID)
role, err := unifiedrole.GetRole(unifiedrole.RoleFilterIDs(roleID))
if err != nil {
logger.Debug().Str("roleID", roleID).Msg("could not get role: not found")
errorcode.ItemNotFound.Render(w, r, http.StatusNotFound, err.Error())
@@ -36,13 +35,3 @@ func (g Graph) GetRoleDefinition(w http.ResponseWriter, r *http.Request) {
render.Status(r, http.StatusOK)
render.JSON(w, r, role)
}
func getRoleDefinition(roleID string) (*libregraph.UnifiedRoleDefinition, error) {
roleList := unifiedrole.GetBuiltinRoleDefinitionList()
for _, role := range roleList {
if role != nil && role.Id != nil && *role.Id == roleID {
return role, nil
}
}
return nil, fmt.Errorf("role not found")
}

View File

@@ -214,7 +214,7 @@ func NewService(opts ...Option) (Graph, error) { //nolint:maintidx
return svc, err
}
driveItemPermissionsApi, err := NewDriveItemPermissionsApi(driveItemPermissionsService, options.Logger)
driveItemPermissionsApi, err := NewDriveItemPermissionsApi(driveItemPermissionsService, options.Logger, options.Config)
if err != nil {
return svc, err
}

View File

@@ -10,6 +10,7 @@ import (
libregraph "github.com/owncloud/libre-graph-api-go"
"github.com/owncloud/ocis/v2/services/graph/pkg/errorcode"
"github.com/owncloud/ocis/v2/services/graph/pkg/unifiedrole"
)
// ListSharedWithMe lists the files shared with the current user.
@@ -39,7 +40,8 @@ func (g Graph) listSharedWithMe(ctx context.Context) ([]libregraph.DriveItem, er
g.logger.Error().Err(err).Msg("listing shares failed")
return nil, err
}
driveItems, err := cs3ReceivedSharesToDriveItems(ctx, g.logger, gatewayClient, g.identityCache, listReceivedSharesResponse.GetShares())
availableRoles := unifiedrole.GetRoles(unifiedrole.RoleFilterIDs(g.config.UnifiedRoles.AvailableRoles...))
driveItems, err := cs3ReceivedSharesToDriveItems(ctx, g.logger, gatewayClient, g.identityCache, listReceivedSharesResponse.GetShares(), availableRoles)
if err != nil {
g.logger.Error().Err(err).Msg("could not convert received shares to drive items")
return nil, err
@@ -51,7 +53,7 @@ func (g Graph) listSharedWithMe(ctx context.Context) ([]libregraph.DriveItem, er
g.logger.Error().Err(err).Msg("listing shares failed")
return nil, err
}
ocmDriveItems, err := cs3ReceivedOCMSharesToDriveItems(ctx, g.logger, gatewayClient, g.identityCache, listReceivedOCMSharesResponse.GetShares())
ocmDriveItems, err := cs3ReceivedOCMSharesToDriveItems(ctx, g.logger, gatewayClient, g.identityCache, listReceivedOCMSharesResponse.GetShares(), availableRoles)
if err != nil {
g.logger.Error().Err(err).Msg("could not convert received shares to drive items")
return nil, err

View File

@@ -16,11 +16,12 @@ import (
"github.com/cs3org/reva/v2/pkg/storagespace"
"github.com/cs3org/reva/v2/pkg/utils"
libregraph "github.com/owncloud/libre-graph-api-go"
"golang.org/x/sync/errgroup"
"github.com/owncloud/ocis/v2/ocis-pkg/log"
"github.com/owncloud/ocis/v2/services/graph/pkg/errorcode"
"github.com/owncloud/ocis/v2/services/graph/pkg/identity"
"github.com/owncloud/ocis/v2/services/graph/pkg/unifiedrole"
"golang.org/x/sync/errgroup"
)
// StrictJSONUnmarshal is a wrapper around json.Unmarshal that returns an error if the json contains unknown fields.
@@ -172,7 +173,9 @@ func cs3ReceivedSharesToDriveItems(ctx context.Context,
logger *log.Logger,
gatewayClient gateway.GatewayAPIClient,
identityCache identity.IdentityCache,
receivedShares []*collaboration.ReceivedShare) ([]libregraph.DriveItem, error) {
receivedShares []*collaboration.ReceivedShare,
availableRoles []*libregraph.UnifiedRoleDefinition,
) ([]libregraph.DriveItem, error) {
group := new(errgroup.Group)
// Set max concurrency
@@ -214,7 +217,7 @@ func cs3ReceivedSharesToDriveItems(ctx context.Context,
return errCode
}
driveItem, err := fillDriveItemPropertiesFromReceivedShare(ctx, logger, identityCache, receivedShares, shareStat.GetInfo())
driveItem, err := fillDriveItemPropertiesFromReceivedShare(ctx, logger, identityCache, receivedShares, shareStat.GetInfo(), availableRoles)
if err != nil {
return err
}
@@ -351,7 +354,7 @@ func cs3ReceivedSharesToDriveItems(ctx context.Context,
func fillDriveItemPropertiesFromReceivedShare(ctx context.Context, logger *log.Logger,
identityCache identity.IdentityCache, receivedShares []*collaboration.ReceivedShare,
resourceInfo *storageprovider.ResourceInfo) (*libregraph.DriveItem, error) {
resourceInfo *storageprovider.ResourceInfo, availableRoles []*libregraph.UnifiedRoleDefinition) (*libregraph.DriveItem, error) {
driveItem := libregraph.NewDriveItem()
permissions := make([]libregraph.Permission, 0, len(receivedShares))
@@ -365,7 +368,7 @@ func fillDriveItemPropertiesFromReceivedShare(ctx context.Context, logger *log.L
oldestReceivedShare = receivedShare
}
permission, err := cs3ReceivedShareToLibreGraphPermissions(ctx, logger, identityCache, receivedShare, resourceInfo)
permission, err := cs3ReceivedShareToLibreGraphPermissions(ctx, logger, identityCache, receivedShare, resourceInfo, availableRoles)
if err != nil {
return driveItem, err
}
@@ -426,7 +429,7 @@ func fillDriveItemPropertiesFromReceivedShare(ctx context.Context, logger *log.L
func cs3ReceivedShareToLibreGraphPermissions(ctx context.Context, logger *log.Logger,
identityCache identity.IdentityCache, receivedShare *collaboration.ReceivedShare,
resourceInfo *storageprovider.ResourceInfo) (*libregraph.Permission, error) {
resourceInfo *storageprovider.ResourceInfo, availableRoles []*libregraph.UnifiedRoleDefinition) (*libregraph.Permission, error) {
permission := libregraph.NewPermission()
if id := receivedShare.GetShare().GetId().GetOpaqueId(); id != "" {
permission.SetId(id)
@@ -445,8 +448,13 @@ func cs3ReceivedShareToLibreGraphPermissions(ctx context.Context, logger *log.Lo
if err != nil {
return nil, err
}
role := unifiedrole.CS3ResourcePermissionsToUnifiedRole(permissionSet, condition, false)
role := unifiedrole.CS3ResourcePermissionsToRole(
availableRoles,
permissionSet,
condition,
false,
)
if role != nil {
permission.SetRoles([]string{role.GetId()})
}
@@ -515,7 +523,7 @@ func cs3ReceivedOCMSharesToDriveItems(ctx context.Context,
logger *log.Logger,
gatewayClient gateway.GatewayAPIClient,
identityCache identity.IdentityCache,
receivedShares []*ocm.ReceivedShare) ([]libregraph.DriveItem, error) {
receivedShares []*ocm.ReceivedShare, availableRoles []*libregraph.UnifiedRoleDefinition) ([]libregraph.DriveItem, error) {
group := new(errgroup.Group)
// Set max concurrency
@@ -559,7 +567,7 @@ func cs3ReceivedOCMSharesToDriveItems(ctx context.Context,
return errCode
}
driveItem, err := fillDriveItemPropertiesFromReceivedOCMShare(ctx, logger, identityCache, receivedShares, shareStat.GetInfo())
driveItem, err := fillDriveItemPropertiesFromReceivedOCMShare(ctx, logger, identityCache, receivedShares, shareStat.GetInfo(), availableRoles)
if err != nil {
return err
}
@@ -696,7 +704,7 @@ func cs3ReceivedOCMSharesToDriveItems(ctx context.Context,
func fillDriveItemPropertiesFromReceivedOCMShare(ctx context.Context, logger *log.Logger,
identityCache identity.IdentityCache, receivedShares []*ocm.ReceivedShare,
resourceInfo *storageprovider.ResourceInfo) (*libregraph.DriveItem, error) {
resourceInfo *storageprovider.ResourceInfo, availableRoles []*libregraph.UnifiedRoleDefinition) (*libregraph.DriveItem, error) {
driveItem := libregraph.NewDriveItem()
permissions := make([]libregraph.Permission, 0, len(receivedShares))
@@ -710,7 +718,7 @@ func fillDriveItemPropertiesFromReceivedOCMShare(ctx context.Context, logger *lo
oldestReceivedShare = receivedShare
}
permission, err := cs3ReceivedOCMShareToLibreGraphPermissions(ctx, logger, identityCache, receivedShare, resourceInfo)
permission, err := cs3ReceivedOCMShareToLibreGraphPermissions(ctx, logger, identityCache, receivedShare, resourceInfo, availableRoles)
if err != nil {
return driveItem, err
}
@@ -775,7 +783,7 @@ func fillDriveItemPropertiesFromReceivedOCMShare(ctx context.Context, logger *lo
func cs3ReceivedOCMShareToLibreGraphPermissions(ctx context.Context, logger *log.Logger,
identityCache identity.IdentityCache, receivedShare *ocm.ReceivedShare,
resourceInfo *storageprovider.ResourceInfo) (*libregraph.Permission, error) {
resourceInfo *storageprovider.ResourceInfo, availableRoles []*libregraph.UnifiedRoleDefinition) (*libregraph.Permission, error) {
permission := libregraph.NewPermission()
if id := receivedShare.GetId().GetOpaqueId(); id != "" {
permission.SetId(id)
@@ -799,7 +807,8 @@ func cs3ReceivedOCMShareToLibreGraphPermissions(ctx context.Context, logger *log
if err != nil {
return nil, err
}
role := unifiedrole.CS3ResourcePermissionsToUnifiedRole(
role := unifiedrole.CS3ResourcePermissionsToRole(
availableRoles,
permissions,
condition,
true,

View File

@@ -5,17 +5,22 @@ import (
"net/http"
"net/http/httptest"
collaboration "github.com/cs3org/go-cs3apis/cs3/sharing/collaboration/v1beta1"
rConversions "github.com/cs3org/reva/v2/pkg/conversions"
"github.com/cs3org/reva/v2/pkg/utils"
"github.com/go-chi/chi/v5"
. "github.com/onsi/ginkgo/v2"
. "github.com/onsi/gomega"
libregraph "github.com/owncloud/libre-graph-api-go"
provider "github.com/cs3org/go-cs3apis/cs3/storage/provider/v1beta1"
"github.com/cs3org/reva/v2/pkg/storagespace"
"github.com/owncloud/ocis/v2/ocis-pkg/conversions"
"github.com/owncloud/ocis/v2/ocis-pkg/log"
"github.com/owncloud/ocis/v2/services/graph/pkg/identity"
service "github.com/owncloud/ocis/v2/services/graph/pkg/service/v0"
"github.com/owncloud/ocis/v2/services/graph/pkg/unifiedrole"
)
var _ = Describe("Utils", func() {
@@ -104,4 +109,44 @@ var _ = Describe("Utils", func() {
SpaceId: "123",
}, false),
)
DescribeTable("_cs3ReceivedShareToLibreGraphPermissions",
func(permissionSet *provider.ResourcePermissions, match func(*libregraph.Permission)) {
permission, err := service.CS3ReceivedShareToLibreGraphPermissions(
context.Background(),
nil,
identity.IdentityCache{},
&collaboration.ReceivedShare{
Share: &collaboration.Share{
Permissions: &collaboration.SharePermissions{
Permissions: permissionSet,
},
},
}, &provider.ResourceInfo{
Type: provider.ResourceType_RESOURCE_TYPE_FILE,
},
unifiedrole.GetRoles(unifiedrole.RoleFilterAll()),
)
Expect(err).ToNot(HaveOccurred())
match(permission)
},
Entry(
"permissions match a role",
rConversions.NewViewerRole().CS3ResourcePermissions(),
func(p *libregraph.Permission) {
Expect(p.GetRoles()).To(HaveExactElements([]string{unifiedrole.UnifiedRoleViewerID}))
Expect(p.GetLibreGraphPermissionsActions()).To(BeNil())
},
),
Entry(
"permissions do not match any role",
&provider.ResourcePermissions{
AddGrant: true,
},
func(p *libregraph.Permission) {
Expect(p.GetRoles()).To(BeNil())
Expect(p.GetLibreGraphPermissionsActions()).To(HaveExactElements([]string{unifiedrole.DriveItemPermissionsCreate}))
},
),
)
})

View File

@@ -0,0 +1,224 @@
package unifiedrole
import (
"strings"
provider "github.com/cs3org/go-cs3apis/cs3/storage/provider/v1beta1"
"github.com/cs3org/reva/v2/pkg/conversions"
libregraph "github.com/owncloud/libre-graph-api-go"
)
// PermissionsToCS3ResourcePermissions converts the provided libregraph UnifiedRolePermissions to a cs3 ResourcePermissions
func PermissionsToCS3ResourcePermissions(unifiedRolePermissions []*libregraph.UnifiedRolePermission) *provider.ResourcePermissions {
p := &provider.ResourcePermissions{}
for _, permission := range unifiedRolePermissions {
for _, allowedResourceAction := range permission.AllowedResourceActions {
switch allowedResourceAction {
case DriveItemPermissionsCreate:
p.AddGrant = true
case DriveItemChildrenCreate:
p.CreateContainer = true
case DriveItemStandardDelete:
p.Delete = true
case DriveItemPathRead:
p.GetPath = true
case DriveItemQuotaRead:
p.GetQuota = true
case DriveItemContentRead:
p.InitiateFileDownload = true
case DriveItemUploadCreate:
p.InitiateFileUpload = true
case DriveItemPermissionsRead:
p.ListGrants = true
case DriveItemChildrenRead:
p.ListContainer = true
case DriveItemVersionsRead:
p.ListFileVersions = true
case DriveItemDeletedRead:
p.ListRecycle = true
case DriveItemPathUpdate:
p.Move = true
case DriveItemPermissionsDelete:
p.RemoveGrant = true
case DriveItemDeletedDelete:
p.PurgeRecycle = true
case DriveItemVersionsUpdate:
p.RestoreFileVersion = true
case DriveItemDeletedUpdate:
p.RestoreRecycleItem = true
case DriveItemBasicRead:
p.Stat = true
case DriveItemPermissionsUpdate:
p.UpdateGrant = true
case DriveItemPermissionsDeny:
p.DenyGrant = true
}
}
}
return p
}
// CS3ResourcePermissionsToLibregraphActions converts the provided cs3 ResourcePermissions to a list of
// libregraph actions
func CS3ResourcePermissionsToLibregraphActions(p *provider.ResourcePermissions) []string {
var actions []string
if p.GetAddGrant() {
actions = append(actions, DriveItemPermissionsCreate)
}
if p.GetCreateContainer() {
actions = append(actions, DriveItemChildrenCreate)
}
if p.GetDelete() {
actions = append(actions, DriveItemStandardDelete)
}
if p.GetGetPath() {
actions = append(actions, DriveItemPathRead)
}
if p.GetGetQuota() {
actions = append(actions, DriveItemQuotaRead)
}
if p.GetInitiateFileDownload() {
actions = append(actions, DriveItemContentRead)
}
if p.GetInitiateFileUpload() {
actions = append(actions, DriveItemUploadCreate)
}
if p.GetListGrants() {
actions = append(actions, DriveItemPermissionsRead)
}
if p.GetListContainer() {
actions = append(actions, DriveItemChildrenRead)
}
if p.GetListFileVersions() {
actions = append(actions, DriveItemVersionsRead)
}
if p.GetListRecycle() {
actions = append(actions, DriveItemDeletedRead)
}
if p.GetMove() {
actions = append(actions, DriveItemPathUpdate)
}
if p.GetRemoveGrant() {
actions = append(actions, DriveItemPermissionsDelete)
}
if p.GetPurgeRecycle() {
actions = append(actions, DriveItemDeletedDelete)
}
if p.GetRestoreFileVersion() {
actions = append(actions, DriveItemVersionsUpdate)
}
if p.GetRestoreRecycleItem() {
actions = append(actions, DriveItemDeletedUpdate)
}
if p.GetStat() {
actions = append(actions, DriveItemBasicRead)
}
if p.GetUpdateGrant() {
actions = append(actions, DriveItemPermissionsUpdate)
}
if p.GetDenyGrant() {
actions = append(actions, DriveItemPermissionsDeny)
}
return actions
}
// CS3ResourcePermissionsToRole converts the provided cs3 ResourcePermissions to a libregraph UnifiedRoleDefinition
func CS3ResourcePermissionsToRole(roleSet []*libregraph.UnifiedRoleDefinition, p *provider.ResourcePermissions, constraints string, listFederatedRoles bool) *libregraph.UnifiedRoleDefinition {
actionSet := map[string]struct{}{}
for _, action := range CS3ResourcePermissionsToLibregraphActions(p) {
actionSet[action] = struct{}{}
}
var res *libregraph.UnifiedRoleDefinition
for _, uRole := range roleSet {
definitionMatch := false
for _, permission := range uRole.GetRolePermissions() {
// this is a dirty comparison because we are not really parsing the SDDL, but as long as we && the conditions we are good
isFederatedRole := strings.Contains(permission.GetCondition(), UnifiedRoleConditionFederatedUser)
switch {
case !strings.Contains(permission.GetCondition(), constraints):
continue
case listFederatedRoles && !isFederatedRole:
continue
case !listFederatedRoles && isFederatedRole:
continue
}
// if the actions converted from the ResourcePermissions equal the action the defined for the role, we have match
if resourceActionsEqual(actionSet, permission.GetAllowedResourceActions()) {
definitionMatch = true
break
}
}
if definitionMatch {
res = uRole
break
}
}
return res
}
// resourceActionsEqual checks if the provided actions are equal to the actions defined for a resource
func resourceActionsEqual(targetActionSet map[string]struct{}, actions []string) bool {
if len(targetActionSet) != len(actions) {
return false
}
for _, action := range actions {
if _, ok := targetActionSet[action]; !ok {
return false
}
}
return true
}
// cs3RoleToDisplayName converts a CS3 role to a human-readable display name
func cs3RoleToDisplayName(role *conversions.Role) string {
if role == nil {
return ""
}
switch role.Name {
case conversions.RoleViewer:
return _viewerUnifiedRoleDisplayName
case conversions.RoleSpaceViewer:
return _spaceViewerUnifiedRoleDisplayName
case conversions.RoleEditor:
return _editorUnifiedRoleDisplayName
case conversions.RoleSpaceEditor:
return _spaceEditorUnifiedRoleDisplayName
case conversions.RoleFileEditor:
return _fileEditorUnifiedRoleDisplayName
case conversions.RoleEditorLite:
return _editorLiteUnifiedRoleDisplayName
case conversions.RoleManager:
return _managerUnifiedRoleDisplayName
case conversions.RoleSecureViewer:
return _secureViewerUnifiedRoleDisplayName
default:
return ""
}
}

View File

@@ -0,0 +1,76 @@
package unifiedrole_test
import (
"testing"
provider "github.com/cs3org/go-cs3apis/cs3/storage/provider/v1beta1"
cs3Conversions "github.com/cs3org/reva/v2/pkg/conversions"
. "github.com/onsi/gomega"
"github.com/onsi/gomega/types"
libregraph "github.com/owncloud/libre-graph-api-go"
"github.com/owncloud/ocis/v2/ocis-pkg/conversions"
"github.com/owncloud/ocis/v2/services/graph/pkg/unifiedrole"
)
func TestPermissionsToCS3ResourcePermissions(t *testing.T) {
tests := map[string]struct {
cs3Role *cs3Conversions.Role
unifiedRoleDefinition *libregraph.UnifiedRoleDefinition
match bool
}{
cs3Conversions.RoleViewer: {cs3Conversions.NewViewerRole(), unifiedrole.RoleViewer, true},
cs3Conversions.RoleEditor: {cs3Conversions.NewEditorRole(), unifiedrole.RoleEditor, true},
cs3Conversions.RoleFileEditor: {cs3Conversions.NewFileEditorRole(), unifiedrole.RoleFileEditor, true},
cs3Conversions.RoleManager: {cs3Conversions.NewManagerRole(), unifiedrole.RoleManager, true},
cs3Conversions.RoleSecureViewer: {cs3Conversions.NewSecureViewerRole(), unifiedrole.RoleSecureViewer, true},
"no match": {cs3Conversions.NewFileEditorRole(), unifiedrole.RoleManager, false},
}
for name, tc := range tests {
t.Run(name, func(t *testing.T) {
g := NewWithT(t)
permsFromCS3 := tc.cs3Role.CS3ResourcePermissions()
permsFromUnifiedRole := unifiedrole.PermissionsToCS3ResourcePermissions(
conversions.ToPointerSlice(tc.unifiedRoleDefinition.RolePermissions),
)
var matcher types.GomegaMatcher
if tc.match {
matcher = Equal(permsFromUnifiedRole)
} else {
matcher = Not(Equal(permsFromUnifiedRole))
}
g.Expect(permsFromCS3).To(matcher)
})
}
}
func TestCS3ResourcePermissionsToRole(t *testing.T) {
tests := map[string]struct {
cs3ResourcePermissions *provider.ResourcePermissions
unifiedRoleDefinition *libregraph.UnifiedRoleDefinition
constraints string
}{
cs3Conversions.RoleViewer + "1": {cs3Conversions.NewViewerRole().CS3ResourcePermissions(), unifiedrole.RoleViewer, unifiedrole.UnifiedRoleConditionFile},
cs3Conversions.RoleViewer + "2": {cs3Conversions.NewViewerRole().CS3ResourcePermissions(), unifiedrole.RoleViewer, unifiedrole.UnifiedRoleConditionFolder},
cs3Conversions.RoleEditor: {cs3Conversions.NewEditorRole().CS3ResourcePermissions(), unifiedrole.RoleEditor, unifiedrole.UnifiedRoleConditionFolder},
cs3Conversions.RoleFileEditor: {cs3Conversions.NewFileEditorRole().CS3ResourcePermissions(), unifiedrole.RoleFileEditor, unifiedrole.UnifiedRoleConditionFile},
cs3Conversions.RoleManager: {cs3Conversions.NewManagerRole().CS3ResourcePermissions(), unifiedrole.RoleManager, unifiedrole.UnifiedRoleConditionDrive},
cs3Conversions.RoleSpaceViewer: {cs3Conversions.NewSpaceViewerRole().CS3ResourcePermissions(), unifiedrole.RoleSpaceViewer, unifiedrole.UnifiedRoleConditionDrive},
cs3Conversions.RoleSpaceEditor: {cs3Conversions.NewSpaceEditorRole().CS3ResourcePermissions(), unifiedrole.RoleSpaceEditor, unifiedrole.UnifiedRoleConditionDrive},
cs3Conversions.RoleSecureViewer + "1": {cs3Conversions.NewSecureViewerRole().CS3ResourcePermissions(), unifiedrole.RoleSecureViewer, unifiedrole.UnifiedRoleConditionFile},
cs3Conversions.RoleSecureViewer + "2": {cs3Conversions.NewSecureViewerRole().CS3ResourcePermissions(), unifiedrole.RoleSecureViewer, unifiedrole.UnifiedRoleConditionFolder},
"custom 1": {&provider.ResourcePermissions{GetPath: true}, nil, unifiedrole.UnifiedRoleConditionFolder},
}
for name, tc := range tests {
t.Run(name, func(t *testing.T) {
NewWithT(t).Expect(
unifiedrole.CS3ResourcePermissionsToRole(unifiedrole.BuildInRoles, tc.cs3ResourcePermissions, tc.constraints, false),
).To(Equal(tc.unifiedRoleDefinition))
})
}
}

View File

@@ -0,0 +1,10 @@
package unifiedrole
import (
"errors"
)
var (
// ErrUnknownRole is returned when an unknown unified role is requested.
ErrUnknownRole = errors.New("unknown role, check if the role is enabled")
)

View File

@@ -0,0 +1,16 @@
package unifiedrole
var (
RoleViewer = roleViewer
RoleSpaceViewer = roleSpaceViewer
RoleEditor = roleEditor
RoleSpaceEditor = roleSpaceEditor
RoleFileEditor = roleFileEditor
RoleEditorLite = roleEditorLite
RoleManager = roleManager
RoleSecureViewer = roleSecureViewer
BuildInRoles = buildInRoles
WeightDefinitions = weightRoles
)

View File

@@ -0,0 +1,44 @@
package unifiedrole
import (
"slices"
libregraph "github.com/owncloud/libre-graph-api-go"
)
type (
// RoleFilter is used to filter role collections
RoleFilter func(*libregraph.UnifiedRoleDefinition) bool
)
// RoleFilterInvert inverts the provided role filter
func RoleFilterInvert(f RoleFilter) RoleFilter {
return func(r *libregraph.UnifiedRoleDefinition) bool {
return !f(r)
}
}
// RoleFilterAll returns a role filter that matches all roles
func RoleFilterAll() RoleFilter {
return func(r *libregraph.UnifiedRoleDefinition) bool {
return true
}
}
// RoleFilterIDs returns a role filter that matches the provided ids
// the filter is always OR!
func RoleFilterIDs(ids ...string) RoleFilter {
return func(r *libregraph.UnifiedRoleDefinition) bool {
return slices.Contains(ids, r.GetId())
}
}
// filterRoles filters the provided roles by the provided filter
func filterRoles(roles []*libregraph.UnifiedRoleDefinition, f RoleFilter) []*libregraph.UnifiedRoleDefinition {
return slices.DeleteFunc(
slices.Clone(roles),
func(r *libregraph.UnifiedRoleDefinition) bool {
return !f(r)
},
)
}

View File

@@ -0,0 +1,35 @@
package unifiedrole_test
import (
"testing"
. "github.com/onsi/gomega"
libregraph "github.com/owncloud/libre-graph-api-go"
"github.com/owncloud/ocis/v2/services/graph/pkg/unifiedrole"
)
func TestRoleFilterIDs(t *testing.T) {
NewWithT(t).Expect(
unifiedrole.RoleFilterIDs(
unifiedrole.UnifiedRoleEditorLiteID,
unifiedrole.UnifiedRoleSpaceEditorID,
)(unifiedrole.RoleEditorLite),
).To(BeTrue())
}
func TestRoleFilterInvert(t *testing.T) {
NewWithT(t).Expect(
unifiedrole.RoleFilterInvert(
func(_ *libregraph.UnifiedRoleDefinition) bool {
return true
},
)(unifiedrole.RoleEditorLite),
).To(BeFalse())
}
func TestRoleFilterAll(t *testing.T) {
NewWithT(t).Expect(
unifiedrole.RoleFilterAll()(unifiedrole.RoleEditorLite),
).To(BeTrue())
}

View File

@@ -0,0 +1,435 @@
package unifiedrole
import (
"cmp"
"slices"
"strings"
libregraph "github.com/owncloud/libre-graph-api-go"
"google.golang.org/protobuf/proto"
"github.com/cs3org/reva/v2/pkg/conversions"
"github.com/owncloud/ocis/v2/ocis-pkg/l10n"
)
const (
// UnifiedRoleViewerID Unified role viewer id.
UnifiedRoleViewerID = "b1e2218d-eef8-4d4c-b82d-0f1a1b48f3b5"
// UnifiedRoleSpaceViewerID Unified role space viewer id.
UnifiedRoleSpaceViewerID = "a8d5fe5e-96e3-418d-825b-534dbdf22b99"
// UnifiedRoleEditorID Unified role editor id.
UnifiedRoleEditorID = "fb6c3e19-e378-47e5-b277-9732f9de6e21"
// UnifiedRoleSpaceEditorID Unified role space editor id.
UnifiedRoleSpaceEditorID = "58c63c02-1d89-4572-916a-870abc5a1b7d"
// UnifiedRoleFileEditorID Unified role file editor id.
UnifiedRoleFileEditorID = "2d00ce52-1fc2-4dbc-8b95-a73b73395f5a"
// UnifiedRoleEditorLiteID Unified role editor-lite id.
UnifiedRoleEditorLiteID = "1c996275-f1c9-4e71-abdf-a42f6495e960"
// UnifiedRoleManagerID Unified role manager id.
UnifiedRoleManagerID = "312c0871-5ef7-4b3a-85b6-0e4074c64049"
// UnifiedRoleSecureViewerID Unified role secure viewer id.
UnifiedRoleSecureViewerID = "aa97fe03-7980-45ac-9e50-b325749fd7e6"
// UnifiedRoleFederatedViewerID Unified role federated viewer id.
UnifiedRoleFederatedViewerID = "be531789-063c-48bf-a9fe-857e6fbee7da"
// UnifiedRoleFederatedEditorID Unified role federated editor id.
UnifiedRoleFederatedEditorID = "36279a93-e4e3-4bbb-8a23-53b05b560963"
// Wile the below conditions follow the SDDL syntax, they are not parsed anywhere. We use them as strings to
// represent the constraints that a role definition applies to. For the actual syntax, see the SDDL documentation
// at https://learn.microsoft.com/en-us/windows/win32/secauthz/security-descriptor-definition-language-for-conditional-aces-#conditional-expressions
// Some roles apply to a specific type of resource, for example, a role that applies to a file or a folder.
// @Resource is the placeholder for the resource that the role is applied to
// .Root, .Folder and .File are facets of the driveItem resource that indicate the type of the resource if they are present.
// UnifiedRoleConditionDrive defines constraint that matches a Driveroot/Spaceroot
UnifiedRoleConditionDrive = "exists @Resource.Root"
// UnifiedRoleConditionFolder defines constraints that matches a DriveItem representing a Folder
UnifiedRoleConditionFolder = "exists @Resource.Folder"
// UnifiedRoleConditionFile defines a constraint that matches a DriveItem representing a File
UnifiedRoleConditionFile = "exists @Resource.File"
// Some roles apply to a specific type of user, for example, a role that applies to a federated user.
// @Subject is the placeholder for the subject that the role is applied to. For sharing roles this is the user that the resource is shared with.
// .UserType is the type of the user: 'Member' for a member of the organization, 'Guest' for a guest user, 'Federated' for a federated user.
// UnifiedRoleConditionFederatedUser defines a constraint that matches a federated user
UnifiedRoleConditionFederatedUser = "@Subject.UserType==\"Federated\""
// For federated sharing we need roles that combine the constraints for the resource and the user.
// UnifiedRoleConditionFileFederatedUser defines a constraint that matches a File and a federated user
UnifiedRoleConditionFileFederatedUser = UnifiedRoleConditionFile + " && " + UnifiedRoleConditionFederatedUser
// UnifiedRoleConditionFolderFederatedUser defines a constraint that matches a Folder and a federated user
UnifiedRoleConditionFolderFederatedUser = UnifiedRoleConditionFolder + " && " + UnifiedRoleConditionFederatedUser
DriveItemPermissionsCreate = "libre.graph/driveItem/permissions/create"
DriveItemChildrenCreate = "libre.graph/driveItem/children/create"
DriveItemStandardDelete = "libre.graph/driveItem/standard/delete"
DriveItemPathRead = "libre.graph/driveItem/path/read"
DriveItemQuotaRead = "libre.graph/driveItem/quota/read"
DriveItemContentRead = "libre.graph/driveItem/content/read"
DriveItemUploadCreate = "libre.graph/driveItem/upload/create"
DriveItemPermissionsRead = "libre.graph/driveItem/permissions/read"
DriveItemChildrenRead = "libre.graph/driveItem/children/read"
DriveItemVersionsRead = "libre.graph/driveItem/versions/read"
DriveItemDeletedRead = "libre.graph/driveItem/deleted/read"
DriveItemPathUpdate = "libre.graph/driveItem/path/update"
DriveItemPermissionsDelete = "libre.graph/driveItem/permissions/delete"
DriveItemDeletedDelete = "libre.graph/driveItem/deleted/delete"
DriveItemVersionsUpdate = "libre.graph/driveItem/versions/update"
DriveItemDeletedUpdate = "libre.graph/driveItem/deleted/update"
DriveItemBasicRead = "libre.graph/driveItem/basic/read"
DriveItemPermissionsUpdate = "libre.graph/driveItem/permissions/update"
DriveItemPermissionsDeny = "libre.graph/driveItem/permissions/deny"
)
var (
// UnifiedRole Viewer, Role Description (resolves directly)
_viewerUnifiedRoleDescription = l10n.Template("View and download.")
// UnifiedRole Viewer, Role DisplayName (resolves directly)
_viewerUnifiedRoleDisplayName = l10n.Template("Can view")
// UnifiedRole SpaceViewer, Role Description (resolves directly)
_spaceViewerUnifiedRoleDescription = l10n.Template("View and download.")
// UnifiedRole SpaseViewer, Role DisplayName (resolves directly)
_spaceViewerUnifiedRoleDisplayName = l10n.Template("Can view")
// UnifiedRole Editor, Role Description (resolves directly)
_editorUnifiedRoleDescription = l10n.Template("View, download, upload, edit, add and delete.")
// UnifiedRole Editor, Role DisplayName (resolves directly)
_editorUnifiedRoleDisplayName = l10n.Template("Can edit")
// UnifiedRole SpaseEditor, Role Description (resolves directly)
_spaceEditorUnifiedRoleDescription = l10n.Template("View, download, upload, edit, add and delete.")
// UnifiedRole SpaseEditor, Role DisplayName (resolves directly)
_spaceEditorUnifiedRoleDisplayName = l10n.Template("Can edit")
// UnifiedRole FileEditor, Role Description (resolves directly)
_fileEditorUnifiedRoleDescription = l10n.Template("View, download and edit.")
// UnifiedRole FileEditor, Role DisplayName (resolves directly)
_fileEditorUnifiedRoleDisplayName = l10n.Template("Can edit")
// UnifiedRole EditorLite, Role Description (resolves directly)
_editorLiteUnifiedRoleDescription = l10n.Template("View, download and upload.")
// UnifiedRole EditorLite, Role DisplayName (resolves directly)
_editorLiteUnifiedRoleDisplayName = l10n.Template("Can upload")
// UnifiedRole Manager, Role Description (resolves directly)
_managerUnifiedRoleDescription = l10n.Template("View, download, upload, edit, add, delete and manage members.")
// UnifiedRole Manager, Role DisplayName (resolves directly)
_managerUnifiedRoleDisplayName = l10n.Template("Can manage")
// UnifiedRole SecureViewer, Role Description (resolves directly)
_secureViewerUnifiedRoleDescription = l10n.Template("View only documents, images and PDFs. Watermarks will be applied.")
// UnifiedRole SecureViewer, Role DisplayName (resolves directly)
_secureViewerUnifiedRoleDisplayName = l10n.Template("Can view (secure)")
// legacyNames contains the legacy role names.
legacyNames = map[string]string{
UnifiedRoleViewerID: conversions.RoleViewer,
// one V1 api the "spaceviewer" role was call "viewer" and the "spaceeditor" was "editor",
// we need to stay compatible with that
UnifiedRoleSpaceViewerID: "viewer",
UnifiedRoleSpaceEditorID: "editor",
UnifiedRoleEditorID: conversions.RoleEditor,
UnifiedRoleFileEditorID: conversions.RoleFileEditor,
UnifiedRoleEditorLiteID: conversions.RoleEditorLite,
UnifiedRoleManagerID: conversions.RoleManager,
UnifiedRoleSecureViewerID: conversions.RoleSecureViewer,
}
// buildInRoles contains the built-in roles.
buildInRoles = []*libregraph.UnifiedRoleDefinition{
roleViewer,
roleSpaceViewer,
roleEditor,
roleSpaceEditor,
roleFileEditor,
roleEditorLite,
roleManager,
roleSecureViewer,
}
// roleViewer creates a viewer role.
roleViewer = func() *libregraph.UnifiedRoleDefinition {
r := conversions.NewViewerRole()
return &libregraph.UnifiedRoleDefinition{
Id: proto.String(UnifiedRoleViewerID),
Description: proto.String(_viewerUnifiedRoleDescription),
DisplayName: proto.String(cs3RoleToDisplayName(r)),
RolePermissions: []libregraph.UnifiedRolePermission{
{
AllowedResourceActions: CS3ResourcePermissionsToLibregraphActions(r.CS3ResourcePermissions()),
Condition: proto.String(UnifiedRoleConditionFile),
},
{
AllowedResourceActions: CS3ResourcePermissionsToLibregraphActions(r.CS3ResourcePermissions()),
Condition: proto.String(UnifiedRoleConditionFolder),
},
{
AllowedResourceActions: CS3ResourcePermissionsToLibregraphActions(r.CS3ResourcePermissions()),
Condition: proto.String(UnifiedRoleConditionFileFederatedUser),
},
{
AllowedResourceActions: CS3ResourcePermissionsToLibregraphActions(r.CS3ResourcePermissions()),
Condition: proto.String(UnifiedRoleConditionFolderFederatedUser),
},
},
LibreGraphWeight: proto.Int32(0),
}
}()
// roleSpaceViewer creates a spaceviewer role
roleSpaceViewer = func() *libregraph.UnifiedRoleDefinition {
r := conversions.NewSpaceViewerRole()
return &libregraph.UnifiedRoleDefinition{
Id: proto.String(UnifiedRoleSpaceViewerID),
Description: proto.String(_spaceViewerUnifiedRoleDescription),
DisplayName: proto.String(cs3RoleToDisplayName(r)),
RolePermissions: []libregraph.UnifiedRolePermission{
{
AllowedResourceActions: CS3ResourcePermissionsToLibregraphActions(r.CS3ResourcePermissions()),
Condition: proto.String(UnifiedRoleConditionDrive),
},
},
LibreGraphWeight: proto.Int32(0),
}
}()
// roleEditor creates an editor role.
roleEditor = func() *libregraph.UnifiedRoleDefinition {
r := conversions.NewEditorRole()
return &libregraph.UnifiedRoleDefinition{
Id: proto.String(UnifiedRoleEditorID),
Description: proto.String(_editorUnifiedRoleDescription),
DisplayName: proto.String(cs3RoleToDisplayName(r)),
RolePermissions: []libregraph.UnifiedRolePermission{
{
AllowedResourceActions: CS3ResourcePermissionsToLibregraphActions(r.CS3ResourcePermissions()),
Condition: proto.String(UnifiedRoleConditionFolder),
},
{
AllowedResourceActions: CS3ResourcePermissionsToLibregraphActions(r.CS3ResourcePermissions()),
Condition: proto.String(UnifiedRoleConditionFolderFederatedUser),
},
},
LibreGraphWeight: proto.Int32(0),
}
}()
// roleSpaceEditor creates an editor role
roleSpaceEditor = func() *libregraph.UnifiedRoleDefinition {
r := conversions.NewSpaceEditorRole()
return &libregraph.UnifiedRoleDefinition{
Id: proto.String(UnifiedRoleSpaceEditorID),
Description: proto.String(_spaceEditorUnifiedRoleDescription),
DisplayName: proto.String(cs3RoleToDisplayName(r)),
RolePermissions: []libregraph.UnifiedRolePermission{
{
AllowedResourceActions: CS3ResourcePermissionsToLibregraphActions(r.CS3ResourcePermissions()),
Condition: proto.String(UnifiedRoleConditionDrive),
},
},
LibreGraphWeight: proto.Int32(0),
}
}()
// roleFileEditor creates a file-editor role
roleFileEditor = func() *libregraph.UnifiedRoleDefinition {
r := conversions.NewFileEditorRole()
return &libregraph.UnifiedRoleDefinition{
Id: proto.String(UnifiedRoleFileEditorID),
Description: proto.String(_fileEditorUnifiedRoleDescription),
DisplayName: proto.String(cs3RoleToDisplayName(r)),
RolePermissions: []libregraph.UnifiedRolePermission{
{
AllowedResourceActions: CS3ResourcePermissionsToLibregraphActions(r.CS3ResourcePermissions()),
Condition: proto.String(UnifiedRoleConditionFile),
},
{
AllowedResourceActions: CS3ResourcePermissionsToLibregraphActions(r.CS3ResourcePermissions()),
Condition: proto.String(UnifiedRoleConditionFileFederatedUser),
},
},
LibreGraphWeight: proto.Int32(0),
}
}()
// roleEditorLite creates an editor-lite role
roleEditorLite = func() *libregraph.UnifiedRoleDefinition {
r := conversions.NewEditorLiteRole()
return &libregraph.UnifiedRoleDefinition{
Id: proto.String(UnifiedRoleEditorLiteID),
Description: proto.String(_editorLiteUnifiedRoleDescription),
DisplayName: proto.String(cs3RoleToDisplayName(r)),
RolePermissions: []libregraph.UnifiedRolePermission{
{
AllowedResourceActions: CS3ResourcePermissionsToLibregraphActions(r.CS3ResourcePermissions()),
Condition: proto.String(UnifiedRoleConditionFolder),
},
},
LibreGraphWeight: proto.Int32(0),
}
}()
// roleManager creates a manager role
roleManager = func() *libregraph.UnifiedRoleDefinition {
r := conversions.NewManagerRole()
return &libregraph.UnifiedRoleDefinition{
Id: proto.String(UnifiedRoleManagerID),
Description: proto.String(_managerUnifiedRoleDescription),
DisplayName: proto.String(cs3RoleToDisplayName(r)),
RolePermissions: []libregraph.UnifiedRolePermission{
{
AllowedResourceActions: CS3ResourcePermissionsToLibregraphActions(r.CS3ResourcePermissions()),
Condition: proto.String(UnifiedRoleConditionDrive),
},
},
LibreGraphWeight: proto.Int32(0),
}
}()
// roleSecureViewer creates a secure viewer role
roleSecureViewer = func() *libregraph.UnifiedRoleDefinition {
r := conversions.NewSecureViewerRole()
return &libregraph.UnifiedRoleDefinition{
Id: proto.String(UnifiedRoleSecureViewerID),
Description: proto.String(_secureViewerUnifiedRoleDescription),
DisplayName: proto.String(cs3RoleToDisplayName(r)),
RolePermissions: []libregraph.UnifiedRolePermission{
{
AllowedResourceActions: CS3ResourcePermissionsToLibregraphActions(r.CS3ResourcePermissions()),
Condition: proto.String(UnifiedRoleConditionFile),
},
{
AllowedResourceActions: CS3ResourcePermissionsToLibregraphActions(r.CS3ResourcePermissions()),
Condition: proto.String(UnifiedRoleConditionFolder),
},
},
LibreGraphWeight: proto.Int32(0),
}
}()
)
// GetRoles returns a role filter that matches the provided resources
func GetRoles(f RoleFilter) []*libregraph.UnifiedRoleDefinition {
return filterRoles(buildInRoles, f)
}
// GetRole returns a role filter that matches the provided resources
func GetRole(f RoleFilter) (*libregraph.UnifiedRoleDefinition, error) {
roles := filterRoles(buildInRoles, f)
if len(roles) == 0 {
return nil, ErrUnknownRole
}
return roles[0], nil
}
// GetRolesByPermissions returns a list of role definitions
// that match the provided actions and constraints
func GetRolesByPermissions(roleSet []*libregraph.UnifiedRoleDefinition, actions []string, constraints string, listFederatedRoles, descending bool) []*libregraph.UnifiedRoleDefinition {
roles := make([]*libregraph.UnifiedRoleDefinition, 0, len(roleSet))
for _, role := range roleSet {
var match bool
for _, permission := range role.GetRolePermissions() {
// this is a dirty comparison because we are not really parsing the SDDL, but as long as we && the conditions we are good
isFederatedRole := strings.Contains(permission.GetCondition(), UnifiedRoleConditionFederatedUser)
switch {
case !strings.Contains(permission.GetCondition(), constraints):
continue
case listFederatedRoles && !isFederatedRole:
continue
case !listFederatedRoles && isFederatedRole:
continue
}
for i, action := range permission.GetAllowedResourceActions() {
if !slices.Contains(actions, action) {
break
}
if i == len(permission.GetAllowedResourceActions())-1 {
match = true
}
}
if match {
break
}
}
if match {
roles = append(roles, role)
}
}
return weightRoles(roles, constraints, descending)
}
// GetLegacyRoleName returns the legacy role name for the provided role
func GetLegacyRoleName(role libregraph.UnifiedRoleDefinition) string {
return legacyNames[role.GetId()]
}
// weightRoles sorts the provided role definitions by the number of permissions[n].actions they grant,
// the implementation is optimistic and assumes that the weight relies on the number of available actions.
// descending - false - sorts the roles from least to most permissions
// descending - true - sorts the roles from most to least permissions
func weightRoles(roleSet []*libregraph.UnifiedRoleDefinition, constraints string, descending bool) []*libregraph.UnifiedRoleDefinition {
slices.SortFunc(roleSet, func(i, j *libregraph.UnifiedRoleDefinition) int {
var ia []string
for _, rp := range i.GetRolePermissions() {
if rp.GetCondition() == constraints {
ia = append(ia, rp.GetAllowedResourceActions()...)
}
}
var ja []string
for _, rp := range j.GetRolePermissions() {
if rp.GetCondition() == constraints {
ja = append(ja, rp.GetAllowedResourceActions()...)
}
}
switch descending {
case true:
return cmp.Compare(len(ja), len(ia))
default:
return cmp.Compare(len(ia), len(ja))
}
})
for i, role := range roleSet {
role.LibreGraphWeight = libregraph.PtrInt32(int32(i) + 1)
}
// return for the sake of consistency, optional because the slice is modified in place
return roleSet
}
// GetAllowedResourceActions returns the allowed resource actions for the provided role by condition
func GetAllowedResourceActions(role *libregraph.UnifiedRoleDefinition, condition string) []string {
if role == nil {
return []string{}
}
for _, p := range role.GetRolePermissions() {
if p.GetCondition() == condition {
return p.GetAllowedResourceActions()
}
}
return []string{}
}

View File

@@ -0,0 +1,263 @@
package unifiedrole_test
import (
"slices"
"testing"
. "github.com/onsi/gomega"
libregraph "github.com/owncloud/libre-graph-api-go"
"google.golang.org/protobuf/proto"
"github.com/owncloud/ocis/v2/services/graph/pkg/unifiedrole"
)
func TestGetDefinition(t *testing.T) {
tests := map[string]struct {
ids []string
unifiedRoleDefinition *libregraph.UnifiedRoleDefinition
expectError error
}{
"pass single": {
ids: []string{unifiedrole.UnifiedRoleViewerID},
unifiedRoleDefinition: unifiedrole.RoleViewer,
},
"pass many": {
ids: []string{unifiedrole.UnifiedRoleViewerID, unifiedrole.UnifiedRoleEditorID},
unifiedRoleDefinition: unifiedrole.RoleViewer,
},
"fail unknown": {
ids: []string{"unknown"},
expectError: unifiedrole.ErrUnknownRole,
},
}
for name, tc := range tests {
t.Run(name, func(t *testing.T) {
g := NewWithT(t)
definition, err := unifiedrole.GetRole(unifiedrole.RoleFilterIDs(tc.ids...))
if tc.expectError != nil {
g.Expect(err).To(MatchError(tc.expectError))
} else {
g.Expect(err).NotTo(HaveOccurred())
g.Expect(definition).To(Equal(tc.unifiedRoleDefinition))
}
})
}
}
func TestWeightDefinitions(t *testing.T) {
tests := map[string]struct {
unifiedRoleDefinition []*libregraph.UnifiedRoleDefinition
constraint string
descending bool
expectedDefinitions []*libregraph.UnifiedRoleDefinition
}{
"ascending": {
[]*libregraph.UnifiedRoleDefinition{
unifiedrole.RoleViewer,
unifiedrole.RoleFileEditor,
},
unifiedrole.UnifiedRoleConditionFile,
false,
[]*libregraph.UnifiedRoleDefinition{
unifiedrole.RoleViewer,
unifiedrole.RoleFileEditor,
},
},
"descending": {
[]*libregraph.UnifiedRoleDefinition{
unifiedrole.RoleViewer,
unifiedrole.RoleFileEditor,
},
unifiedrole.UnifiedRoleConditionFile,
true,
[]*libregraph.UnifiedRoleDefinition{
unifiedrole.RoleFileEditor,
unifiedrole.RoleViewer,
},
},
}
for name, tc := range tests {
t.Run(name, func(t *testing.T) {
g := NewWithT(t)
for i, generatedDefinition := range unifiedrole.WeightDefinitions(tc.unifiedRoleDefinition, tc.constraint, tc.descending) {
g.Expect(generatedDefinition.Id).To(Equal(tc.expectedDefinitions[i].Id))
}
})
}
}
func TestGetRolesByPermissions(t *testing.T) {
tests := map[string]struct {
givenActions []string
constraints string
listFederatedRoles bool
unifiedRoleDefinition []*libregraph.UnifiedRoleDefinition
}{
"RoleViewer | folder": {
givenActions: getRoleActions(unifiedrole.RoleViewer),
constraints: unifiedrole.UnifiedRoleConditionFolder,
unifiedRoleDefinition: []*libregraph.UnifiedRoleDefinition{
unifiedrole.RoleSecureViewer,
unifiedrole.RoleViewer,
},
},
"RoleViewer | file": {
givenActions: getRoleActions(unifiedrole.RoleViewer),
constraints: unifiedrole.UnifiedRoleConditionFile,
unifiedRoleDefinition: []*libregraph.UnifiedRoleDefinition{
unifiedrole.RoleSecureViewer,
unifiedrole.RoleViewer,
},
},
"RoleViewer | file | federated": {
givenActions: getRoleActions(unifiedrole.RoleViewer),
constraints: unifiedrole.UnifiedRoleConditionFile,
listFederatedRoles: true,
unifiedRoleDefinition: []*libregraph.UnifiedRoleDefinition{
unifiedrole.RoleViewer,
},
},
"RoleFileEditor | file": {
givenActions: getRoleActions(unifiedrole.RoleFileEditor),
constraints: unifiedrole.UnifiedRoleConditionFile,
unifiedRoleDefinition: []*libregraph.UnifiedRoleDefinition{
unifiedrole.RoleSecureViewer,
unifiedrole.RoleViewer,
unifiedrole.RoleFileEditor,
},
},
"RoleEditor | folder": {
givenActions: getRoleActions(unifiedrole.RoleEditor),
constraints: unifiedrole.UnifiedRoleConditionFolder,
unifiedRoleDefinition: []*libregraph.UnifiedRoleDefinition{
unifiedrole.RoleSecureViewer,
unifiedrole.RoleViewer,
unifiedrole.RoleEditorLite,
unifiedrole.RoleEditor,
},
},
"RoleEditor | folder | federated": {
givenActions: getRoleActions(unifiedrole.RoleEditor),
constraints: unifiedrole.UnifiedRoleConditionFolder,
listFederatedRoles: true,
unifiedRoleDefinition: []*libregraph.UnifiedRoleDefinition{
unifiedrole.RoleViewer,
unifiedrole.RoleEditor,
},
},
"RoleEditor | file | federated": {
givenActions: getRoleActions(unifiedrole.RoleEditor),
constraints: unifiedrole.UnifiedRoleConditionFile,
listFederatedRoles: true,
unifiedRoleDefinition: []*libregraph.UnifiedRoleDefinition{
unifiedrole.RoleViewer,
unifiedrole.RoleFileEditor,
},
},
"BuildInRoles | file": {
givenActions: getRoleActions(unifiedrole.BuildInRoles...),
constraints: unifiedrole.UnifiedRoleConditionFile,
unifiedRoleDefinition: []*libregraph.UnifiedRoleDefinition{
unifiedrole.RoleSecureViewer,
unifiedrole.RoleViewer,
unifiedrole.RoleFileEditor,
},
},
"BuildInRoles | folder": {
givenActions: getRoleActions(unifiedrole.BuildInRoles...),
constraints: unifiedrole.UnifiedRoleConditionFolder,
unifiedRoleDefinition: []*libregraph.UnifiedRoleDefinition{
unifiedrole.RoleSecureViewer,
unifiedrole.RoleViewer,
unifiedrole.RoleEditorLite,
unifiedrole.RoleEditor,
},
},
"BuildInRoles | drive": {
givenActions: getRoleActions(unifiedrole.BuildInRoles...),
constraints: unifiedrole.UnifiedRoleConditionDrive,
unifiedRoleDefinition: []*libregraph.UnifiedRoleDefinition{
unifiedrole.RoleSpaceViewer,
unifiedrole.RoleSpaceEditor,
unifiedrole.RoleManager,
},
},
"custom | file": {
givenActions: []string{unifiedrole.DriveItemQuotaRead},
constraints: unifiedrole.UnifiedRoleConditionFile,
unifiedRoleDefinition: []*libregraph.UnifiedRoleDefinition{},
},
"RoleEditorLite and custom | folder": {
givenActions: append(getRoleActions(unifiedrole.RoleEditorLite), unifiedrole.DriveItemQuotaRead),
constraints: unifiedrole.UnifiedRoleConditionFolder,
unifiedRoleDefinition: []*libregraph.UnifiedRoleDefinition{
unifiedrole.RoleSecureViewer,
unifiedrole.RoleEditorLite,
},
},
}
for name, tc := range tests {
t.Run(name, func(t *testing.T) {
g := NewWithT(t)
generatedDefinitions := unifiedrole.GetRolesByPermissions(unifiedrole.BuildInRoles, tc.givenActions, tc.constraints, tc.listFederatedRoles, false)
g.Expect(len(generatedDefinitions)).To(Equal(len(tc.unifiedRoleDefinition)))
for i, generatedDefinition := range generatedDefinitions {
g.Expect(generatedDefinition.Id).To(Equal(tc.unifiedRoleDefinition[i].Id))
g.Expect(*generatedDefinition.LibreGraphWeight).To(Equal(int32(i + 1)))
}
generatedActions := getRoleActions(generatedDefinitions...)
g.Expect(len(tc.givenActions) >= len(generatedActions)).To(BeTrue())
for _, generatedAction := range generatedActions {
g.Expect(slices.Contains(tc.givenActions, generatedAction)).To(BeTrue())
}
})
}
}
func TestGetAllowedResourceActions(t *testing.T) {
tests := map[string]struct {
unifiedRoleDefinition *libregraph.UnifiedRoleDefinition
condition string
expectedActions []string
}{
"no role": {
expectedActions: []string{},
},
"no match": {
unifiedRoleDefinition: &libregraph.UnifiedRoleDefinition{
RolePermissions: []libregraph.UnifiedRolePermission{
{Condition: proto.String(unifiedrole.UnifiedRoleConditionDrive), AllowedResourceActions: []string{unifiedrole.DriveItemPermissionsCreate}},
{Condition: proto.String(unifiedrole.UnifiedRoleConditionFolder), AllowedResourceActions: []string{unifiedrole.DriveItemDeletedRead}},
},
},
condition: unifiedrole.UnifiedRoleConditionFile,
expectedActions: []string{},
},
"match": {
unifiedRoleDefinition: &libregraph.UnifiedRoleDefinition{
RolePermissions: []libregraph.UnifiedRolePermission{
{Condition: proto.String(unifiedrole.UnifiedRoleConditionDrive), AllowedResourceActions: []string{unifiedrole.DriveItemPermissionsCreate}},
{Condition: proto.String(unifiedrole.UnifiedRoleConditionFolder), AllowedResourceActions: []string{unifiedrole.DriveItemDeletedRead}},
},
},
condition: unifiedrole.UnifiedRoleConditionFolder,
expectedActions: []string{unifiedrole.DriveItemDeletedRead},
},
}
for name, tc := range tests {
t.Run(name, func(t *testing.T) {
NewWithT(t).
Expect(unifiedrole.GetAllowedResourceActions(tc.unifiedRoleDefinition, tc.condition)).
To(ContainElements(tc.expectedActions))
})
}
}

View File

@@ -1,623 +0,0 @@
package unifiedrole
import (
"cmp"
"errors"
"slices"
"strings"
provider "github.com/cs3org/go-cs3apis/cs3/storage/provider/v1beta1"
libregraph "github.com/owncloud/libre-graph-api-go"
"github.com/owncloud/ocis/v2/ocis-pkg/l10n"
"google.golang.org/protobuf/proto"
"github.com/cs3org/reva/v2/pkg/conversions"
)
const (
// UnifiedRoleViewerID Unified role viewer id.
UnifiedRoleViewerID = "b1e2218d-eef8-4d4c-b82d-0f1a1b48f3b5"
// UnifiedRoleSpaceViewerID Unified role space viewer id.
UnifiedRoleSpaceViewerID = "a8d5fe5e-96e3-418d-825b-534dbdf22b99"
// UnifiedRoleEditorID Unified role editor id.
UnifiedRoleEditorID = "fb6c3e19-e378-47e5-b277-9732f9de6e21"
// UnifiedRoleSpaceEditorID Unified role space editor id.
UnifiedRoleSpaceEditorID = "58c63c02-1d89-4572-916a-870abc5a1b7d"
// UnifiedRoleFileEditorID Unified role file editor id.
UnifiedRoleFileEditorID = "2d00ce52-1fc2-4dbc-8b95-a73b73395f5a"
// UnifiedRoleEditorLiteID Unified role editor-lite id.
UnifiedRoleEditorLiteID = "1c996275-f1c9-4e71-abdf-a42f6495e960"
// UnifiedRoleManagerID Unified role manager id.
UnifiedRoleManagerID = "312c0871-5ef7-4b3a-85b6-0e4074c64049"
// UnifiedRoleSecureViewerID Unified role secure viewer id.
UnifiedRoleSecureViewerID = "aa97fe03-7980-45ac-9e50-b325749fd7e6"
// UnifiedRoleFederatedViewerID Unified role federated viewer id.
UnifiedRoleFederatedViewerID = "be531789-063c-48bf-a9fe-857e6fbee7da"
// UnifiedRoleFederatedEditorID Unified role federated editor id.
UnifiedRoleFederatedEditorID = "36279a93-e4e3-4bbb-8a23-53b05b560963"
// Wile the below conditions follow the SDDL syntax, they are not parsed anywhere. We use them as strings to
// represent the constraints that a role definition applies to. For the actual syntax, see the SDDL documentation
// at https://learn.microsoft.com/en-us/windows/win32/secauthz/security-descriptor-definition-language-for-conditional-aces-#conditional-expressions
// Some roles apply to a specific type of resource, for example, a role that applies to a file or a folder.
// @Resource is the placeholder for the resource that the role is applied to
// .Root, .Folder and .File are facets of the driveItem resource that indicate the type of the resource if they are present.
// UnifiedRoleConditionDrive defines constraint that matches a Driveroot/Spaceroot
UnifiedRoleConditionDrive = "exists @Resource.Root"
// UnifiedRoleConditionFolder defines constraints that matches a DriveItem representing a Folder
UnifiedRoleConditionFolder = "exists @Resource.Folder"
// UnifiedRoleConditionFile defines a constraint that matches a DriveItem representing a File
UnifiedRoleConditionFile = "exists @Resource.File"
// Some roles apply to a specific type of user, for example, a role that applies to a federated user.
// @Subject is the placeholder for the subject that the role is applied to. For sharing roles this is the user that the resource is shared with.
// .UserType is the type of the user: 'Member' for a member of the organization, 'Guest' for a guest user, 'Federated' for a federated user.
// UnifiedRoleConditionFederatedUser defines a constraint that matches a federated user
UnifiedRoleConditionFederatedUser = "@Subject.UserType==\"Federated\""
// For federated sharing we need roles that combine the constraints for the resource and the user.
// UnifiedRoleConditionFileFederatedUser defines a constraint that matches a File and a federated user
UnifiedRoleConditionFileFederatedUser = UnifiedRoleConditionFile + " && " + UnifiedRoleConditionFederatedUser
// UnifiedRoleConditionFolderFederatedUser defines a constraint that matches a Folder and a federated user
UnifiedRoleConditionFolderFederatedUser = UnifiedRoleConditionFolder + " && " + UnifiedRoleConditionFederatedUser
DriveItemPermissionsCreate = "libre.graph/driveItem/permissions/create"
DriveItemChildrenCreate = "libre.graph/driveItem/children/create"
DriveItemStandardDelete = "libre.graph/driveItem/standard/delete"
DriveItemPathRead = "libre.graph/driveItem/path/read"
DriveItemQuotaRead = "libre.graph/driveItem/quota/read"
DriveItemContentRead = "libre.graph/driveItem/content/read"
DriveItemUploadCreate = "libre.graph/driveItem/upload/create"
DriveItemPermissionsRead = "libre.graph/driveItem/permissions/read"
DriveItemChildrenRead = "libre.graph/driveItem/children/read"
DriveItemVersionsRead = "libre.graph/driveItem/versions/read"
DriveItemDeletedRead = "libre.graph/driveItem/deleted/read"
DriveItemPathUpdate = "libre.graph/driveItem/path/update"
DriveItemPermissionsDelete = "libre.graph/driveItem/permissions/delete"
DriveItemDeletedDelete = "libre.graph/driveItem/deleted/delete"
DriveItemVersionsUpdate = "libre.graph/driveItem/versions/update"
DriveItemDeletedUpdate = "libre.graph/driveItem/deleted/update"
DriveItemBasicRead = "libre.graph/driveItem/basic/read"
DriveItemPermissionsUpdate = "libre.graph/driveItem/permissions/update"
DriveItemPermissionsDeny = "libre.graph/driveItem/permissions/deny"
)
var legacyNames map[string]string = map[string]string{
UnifiedRoleViewerID: conversions.RoleViewer,
// one V1 api the "spaceviewer" role was call "viewer" and the "spaceeditor" was "editor",
// we need to stay compatible with that
UnifiedRoleSpaceViewerID: "viewer",
UnifiedRoleSpaceEditorID: "editor",
UnifiedRoleEditorID: conversions.RoleEditor,
UnifiedRoleFileEditorID: conversions.RoleFileEditor,
UnifiedRoleEditorLiteID: conversions.RoleEditorLite,
UnifiedRoleManagerID: conversions.RoleManager,
UnifiedRoleSecureViewerID: conversions.RoleSecureViewer,
}
var (
// UnifiedRole Viewer, Role Description (resolves directly)
_viewerUnifiedRoleDescription = l10n.Template("View and download.")
// UnifiedRole Viewer, Role DisplayName (resolves directly)
_viewerUnifiedRoleDisplayName = l10n.Template("Can view")
// UnifiedRole SpaceViewer, Role Description (resolves directly)
_spaceViewerUnifiedRoleDescription = l10n.Template("View and download.")
// UnifiedRole SpaseViewer, Role DisplayName (resolves directly)
_spaceViewerUnifiedRoleDisplayName = l10n.Template("Can view")
// UnifiedRole Editor, Role Description (resolves directly)
_editorUnifiedRoleDescription = l10n.Template("View, download, upload, edit, add and delete.")
// UnifiedRole Editor, Role DisplayName (resolves directly)
_editorUnifiedRoleDisplayName = l10n.Template("Can edit")
// UnifiedRole SpaseEditor, Role Description (resolves directly)
_spaceEditorUnifiedRoleDescription = l10n.Template("View, download, upload, edit, add and delete.")
// UnifiedRole SpaseEditor, Role DisplayName (resolves directly)
_spaceEditorUnifiedRoleDisplayName = l10n.Template("Can edit")
// UnifiedRole FileEditor, Role Description (resolves directly)
_fileEditorUnifiedRoleDescription = l10n.Template("View, download and edit.")
// UnifiedRole FileEditor, Role DisplayName (resolves directly)
_fileEditorUnifiedRoleDisplayName = l10n.Template("Can edit")
// UnifiedRole EditorLite, Role Description (resolves directly)
_editorLiteUnifiedRoleDescription = l10n.Template("View, download and upload.")
// UnifiedRole EditorLite, Role DisplayName (resolves directly)
_editorLiteUnifiedRoleDisplayName = l10n.Template("Can upload")
// UnifiedRole Manager, Role Description (resolves directly)
_managerUnifiedRoleDescription = l10n.Template("View, download, upload, edit, add, delete and manage members.")
// UnifiedRole Manager, Role DisplayName (resolves directly)
_managerUnifiedRoleDisplayName = l10n.Template("Can manage")
// UnifiedRole SecureViewer, Role Description (resolves directly)
_secureViewerUnifiedRoleDescription = l10n.Template("View only documents, images and PDFs. Watermarks will be applied.")
// UnifiedRole SecureViewer, Role DisplayName (resolves directly)
_secureViewerUnifiedRoleDisplayName = l10n.Template("Can view (secure)")
)
// NewViewerUnifiedRole creates a viewer role.
func NewViewerUnifiedRole() *libregraph.UnifiedRoleDefinition {
r := conversions.NewViewerRole()
return &libregraph.UnifiedRoleDefinition{
Id: proto.String(UnifiedRoleViewerID),
Description: proto.String(_viewerUnifiedRoleDescription),
DisplayName: displayName(r),
RolePermissions: []libregraph.UnifiedRolePermission{
{
AllowedResourceActions: convert(r),
Condition: proto.String(UnifiedRoleConditionFile),
},
{
AllowedResourceActions: convert(r),
Condition: proto.String(UnifiedRoleConditionFolder),
},
{
AllowedResourceActions: convert(r),
Condition: proto.String(UnifiedRoleConditionFileFederatedUser),
},
{
AllowedResourceActions: convert(r),
Condition: proto.String(UnifiedRoleConditionFolderFederatedUser),
},
},
LibreGraphWeight: proto.Int32(0),
}
}
// NewSpaceViewerUnifiedRole creates a spaceviewer role
func NewSpaceViewerUnifiedRole() *libregraph.UnifiedRoleDefinition {
r := conversions.NewSpaceViewerRole()
return &libregraph.UnifiedRoleDefinition{
Id: proto.String(UnifiedRoleSpaceViewerID),
Description: proto.String(_spaceViewerUnifiedRoleDescription),
DisplayName: displayName(r),
RolePermissions: []libregraph.UnifiedRolePermission{
{
AllowedResourceActions: convert(r),
Condition: proto.String(UnifiedRoleConditionDrive),
},
},
LibreGraphWeight: proto.Int32(0),
}
}
// NewEditorUnifiedRole creates an editor role.
func NewEditorUnifiedRole() *libregraph.UnifiedRoleDefinition {
r := conversions.NewEditorRole()
return &libregraph.UnifiedRoleDefinition{
Id: proto.String(UnifiedRoleEditorID),
Description: proto.String(_editorUnifiedRoleDescription),
DisplayName: displayName(r),
RolePermissions: []libregraph.UnifiedRolePermission{
{
AllowedResourceActions: convert(r),
Condition: proto.String(UnifiedRoleConditionFolder),
},
{
AllowedResourceActions: convert(r),
Condition: proto.String(UnifiedRoleConditionFolderFederatedUser),
},
},
LibreGraphWeight: proto.Int32(0),
}
}
// NewSpaceEditorUnifiedRole creates an editor role
func NewSpaceEditorUnifiedRole() *libregraph.UnifiedRoleDefinition {
r := conversions.NewSpaceEditorRole()
return &libregraph.UnifiedRoleDefinition{
Id: proto.String(UnifiedRoleSpaceEditorID),
Description: proto.String(_spaceEditorUnifiedRoleDescription),
DisplayName: displayName(r),
RolePermissions: []libregraph.UnifiedRolePermission{
{
AllowedResourceActions: convert(r),
Condition: proto.String(UnifiedRoleConditionDrive),
},
},
LibreGraphWeight: proto.Int32(0),
}
}
// NewFileEditorUnifiedRole creates a file-editor role
func NewFileEditorUnifiedRole() *libregraph.UnifiedRoleDefinition {
r := conversions.NewFileEditorRole()
return &libregraph.UnifiedRoleDefinition{
Id: proto.String(UnifiedRoleFileEditorID),
Description: proto.String(_fileEditorUnifiedRoleDescription),
DisplayName: displayName(r),
RolePermissions: []libregraph.UnifiedRolePermission{
{
AllowedResourceActions: convert(r),
Condition: proto.String(UnifiedRoleConditionFile),
},
{
AllowedResourceActions: convert(r),
Condition: proto.String(UnifiedRoleConditionFileFederatedUser),
},
},
LibreGraphWeight: proto.Int32(0),
}
}
// NewEditorLiteUnifiedRole creates an editor-lite role
func NewEditorLiteUnifiedRole() *libregraph.UnifiedRoleDefinition {
r := conversions.NewEditorLiteRole()
return &libregraph.UnifiedRoleDefinition{
Id: proto.String(UnifiedRoleEditorLiteID),
Description: proto.String(_editorLiteUnifiedRoleDescription),
DisplayName: displayName(r),
RolePermissions: []libregraph.UnifiedRolePermission{
{
AllowedResourceActions: convert(r),
Condition: proto.String(UnifiedRoleConditionFolder),
},
},
LibreGraphWeight: proto.Int32(0),
}
}
// NewManagerUnifiedRole creates a manager role
func NewManagerUnifiedRole() *libregraph.UnifiedRoleDefinition {
r := conversions.NewManagerRole()
return &libregraph.UnifiedRoleDefinition{
Id: proto.String(UnifiedRoleManagerID),
Description: proto.String(_managerUnifiedRoleDescription),
DisplayName: displayName(r),
RolePermissions: []libregraph.UnifiedRolePermission{
{
AllowedResourceActions: convert(r),
Condition: proto.String(UnifiedRoleConditionDrive),
},
},
LibreGraphWeight: proto.Int32(0),
}
}
// NewSecureViewerUnifiedRole creates a secure viewer role
func NewSecureViewerUnifiedRole() *libregraph.UnifiedRoleDefinition {
r := conversions.NewSecureViewerRole()
return &libregraph.UnifiedRoleDefinition{
Id: proto.String(UnifiedRoleSecureViewerID),
Description: proto.String(_secureViewerUnifiedRoleDescription),
DisplayName: displayName(r),
RolePermissions: []libregraph.UnifiedRolePermission{
{
AllowedResourceActions: convert(r),
Condition: proto.String(UnifiedRoleConditionFile),
},
{
AllowedResourceActions: convert(r),
Condition: proto.String(UnifiedRoleConditionFolder),
},
},
LibreGraphWeight: proto.Int32(0),
}
}
// NewUnifiedRoleFromID returns a unified role definition from the provided id
func NewUnifiedRoleFromID(id string) (*libregraph.UnifiedRoleDefinition, error) {
for _, definition := range GetBuiltinRoleDefinitionList() {
if definition.GetId() != id {
continue
}
return definition, nil
}
return nil, errors.New("role not found")
}
func GetBuiltinRoleDefinitionList() []*libregraph.UnifiedRoleDefinition {
return []*libregraph.UnifiedRoleDefinition{
NewViewerUnifiedRole(),
NewSpaceViewerUnifiedRole(),
NewEditorUnifiedRole(),
NewSpaceEditorUnifiedRole(),
NewFileEditorUnifiedRole(),
NewEditorLiteUnifiedRole(),
NewManagerUnifiedRole(),
NewSecureViewerUnifiedRole(),
}
}
// GetApplicableRoleDefinitionsForActions returns a list of role definitions
// that match the provided actions and constraints
func GetApplicableRoleDefinitionsForActions(actions []string, constraints string, listFederatedRoles, descending bool) []*libregraph.UnifiedRoleDefinition {
builtin := GetBuiltinRoleDefinitionList()
definitions := make([]*libregraph.UnifiedRoleDefinition, 0, len(builtin))
for _, definition := range builtin {
var definitionMatch bool
for _, permission := range definition.GetRolePermissions() {
// this is a dirty comparison because we are not really parsing the SDDL, but as long as we && the conditions we are good
isFederatedRole := strings.Contains(permission.GetCondition(), UnifiedRoleConditionFederatedUser)
switch {
case !strings.Contains(permission.GetCondition(), constraints):
continue
case listFederatedRoles && !isFederatedRole:
continue
case !listFederatedRoles && isFederatedRole:
continue
}
for i, action := range permission.GetAllowedResourceActions() {
if !slices.Contains(actions, action) {
break
}
if i == len(permission.GetAllowedResourceActions())-1 {
definitionMatch = true
}
}
if definitionMatch {
break
}
}
if definitionMatch {
definitions = append(definitions, definition)
}
}
return WeightRoleDefinitions(definitions, constraints, descending)
}
// WeightRoleDefinitions sorts the provided role definitions by the number of permissions[n].actions they grant,
// the implementation is optimistic and assumes that the weight relies on the number of available actions.
// descending - false - sorts the roles from least to most permissions
// descending - true - sorts the roles from most to least permissions
func WeightRoleDefinitions(roleDefinitions []*libregraph.UnifiedRoleDefinition, constraints string, descending bool) []*libregraph.UnifiedRoleDefinition {
slices.SortFunc(roleDefinitions, func(i, j *libregraph.UnifiedRoleDefinition) int {
var ia []string
for _, rp := range i.GetRolePermissions() {
if rp.GetCondition() == constraints {
ia = append(ia, rp.GetAllowedResourceActions()...)
}
}
var ja []string
for _, rp := range j.GetRolePermissions() {
if rp.GetCondition() == constraints {
ja = append(ja, rp.GetAllowedResourceActions()...)
}
}
switch descending {
case true:
return cmp.Compare(len(ja), len(ia))
default:
return cmp.Compare(len(ia), len(ja))
}
})
for i, definition := range roleDefinitions {
definition.LibreGraphWeight = libregraph.PtrInt32(int32(i) + 1)
}
// return for the sage of consistency, optional because the slice is modified in place
return roleDefinitions
}
// PermissionsToCS3ResourcePermissions converts the provided libregraph UnifiedRolePermissions to a cs3 ResourcePermissions
func PermissionsToCS3ResourcePermissions(unifiedRolePermissions []*libregraph.UnifiedRolePermission) *provider.ResourcePermissions {
p := &provider.ResourcePermissions{}
for _, permission := range unifiedRolePermissions {
for _, allowedResourceAction := range permission.AllowedResourceActions {
switch allowedResourceAction {
case DriveItemPermissionsCreate:
p.AddGrant = true
case DriveItemChildrenCreate:
p.CreateContainer = true
case DriveItemStandardDelete:
p.Delete = true
case DriveItemPathRead:
p.GetPath = true
case DriveItemQuotaRead:
p.GetQuota = true
case DriveItemContentRead:
p.InitiateFileDownload = true
case DriveItemUploadCreate:
p.InitiateFileUpload = true
case DriveItemPermissionsRead:
p.ListGrants = true
case DriveItemChildrenRead:
p.ListContainer = true
case DriveItemVersionsRead:
p.ListFileVersions = true
case DriveItemDeletedRead:
p.ListRecycle = true
case DriveItemPathUpdate:
p.Move = true
case DriveItemPermissionsDelete:
p.RemoveGrant = true
case DriveItemDeletedDelete:
p.PurgeRecycle = true
case DriveItemVersionsUpdate:
p.RestoreFileVersion = true
case DriveItemDeletedUpdate:
p.RestoreRecycleItem = true
case DriveItemBasicRead:
p.Stat = true
case DriveItemPermissionsUpdate:
p.UpdateGrant = true
case DriveItemPermissionsDeny:
p.DenyGrant = true
}
}
}
return p
}
// CS3ResourcePermissionsToLibregraphActions converts the provided cs3 ResourcePermissions to a list of
// libregraph actions
func CS3ResourcePermissionsToLibregraphActions(p *provider.ResourcePermissions) (actions []string) {
if p.GetAddGrant() {
actions = append(actions, DriveItemPermissionsCreate)
}
if p.GetCreateContainer() {
actions = append(actions, DriveItemChildrenCreate)
}
if p.GetDelete() {
actions = append(actions, DriveItemStandardDelete)
}
if p.GetGetPath() {
actions = append(actions, DriveItemPathRead)
}
if p.GetGetQuota() {
actions = append(actions, DriveItemQuotaRead)
}
if p.GetInitiateFileDownload() {
actions = append(actions, DriveItemContentRead)
}
if p.GetInitiateFileUpload() {
actions = append(actions, DriveItemUploadCreate)
}
if p.GetListGrants() {
actions = append(actions, DriveItemPermissionsRead)
}
if p.GetListContainer() {
actions = append(actions, DriveItemChildrenRead)
}
if p.GetListFileVersions() {
actions = append(actions, DriveItemVersionsRead)
}
if p.GetListRecycle() {
actions = append(actions, DriveItemDeletedRead)
}
if p.GetMove() {
actions = append(actions, DriveItemPathUpdate)
}
if p.GetRemoveGrant() {
actions = append(actions, DriveItemPermissionsDelete)
}
if p.GetPurgeRecycle() {
actions = append(actions, DriveItemDeletedDelete)
}
if p.GetRestoreFileVersion() {
actions = append(actions, DriveItemVersionsUpdate)
}
if p.GetRestoreRecycleItem() {
actions = append(actions, DriveItemDeletedUpdate)
}
if p.GetStat() {
actions = append(actions, DriveItemBasicRead)
}
if p.GetUpdateGrant() {
actions = append(actions, DriveItemPermissionsUpdate)
}
if p.GetDenyGrant() {
actions = append(actions, DriveItemPermissionsDeny)
}
return actions
}
func GetLegacyName(role libregraph.UnifiedRoleDefinition) string {
return legacyNames[role.GetId()]
}
// CS3ResourcePermissionsToUnifiedRole tries to find the UnifiedRoleDefinition that matches the supplied
// CS3 ResourcePermissions and constraints.
func CS3ResourcePermissionsToUnifiedRole(p *provider.ResourcePermissions, constraints string, listFederatedRoles bool) *libregraph.UnifiedRoleDefinition {
actionSet := map[string]struct{}{}
for _, action := range CS3ResourcePermissionsToLibregraphActions(p) {
actionSet[action] = struct{}{}
}
var res *libregraph.UnifiedRoleDefinition
for _, uRole := range GetBuiltinRoleDefinitionList() {
definitionMatch := false
for _, permission := range uRole.GetRolePermissions() {
// this is a dirty comparison because we are not really parsing the SDDL, but as long as we && the conditions we are good
isFederatedRole := strings.Contains(permission.GetCondition(), UnifiedRoleConditionFederatedUser)
switch {
case !strings.Contains(permission.GetCondition(), constraints):
continue
case listFederatedRoles && !isFederatedRole:
continue
case !listFederatedRoles && isFederatedRole:
continue
}
// if the actions converted from the ResourcePermissions equal the action the defined for the role, we have match
if resourceActionsEqual(actionSet, permission.GetAllowedResourceActions()) {
definitionMatch = true
break
}
}
if definitionMatch {
res = uRole
break
}
}
return res
}
func resourceActionsEqual(targetActionSet map[string]struct{}, actions []string) bool {
if len(targetActionSet) != len(actions) {
return false
}
for _, action := range actions {
if _, ok := targetActionSet[action]; !ok {
return false
}
}
return true
}
func displayName(role *conversions.Role) *string {
if role == nil {
return nil
}
var displayName string
switch role.Name {
case conversions.RoleViewer:
displayName = _viewerUnifiedRoleDisplayName
case conversions.RoleSpaceViewer:
displayName = _spaceViewerUnifiedRoleDisplayName
case conversions.RoleEditor:
displayName = _editorUnifiedRoleDisplayName
case conversions.RoleSpaceEditor:
displayName = _spaceEditorUnifiedRoleDisplayName
case conversions.RoleFileEditor:
displayName = _fileEditorUnifiedRoleDisplayName
case conversions.RoleEditorLite:
displayName = _editorLiteUnifiedRoleDisplayName
case conversions.RoleManager:
displayName = _managerUnifiedRoleDisplayName
case conversions.RoleSecureViewer:
displayName = _secureViewerUnifiedRoleDisplayName
default:
return nil
}
return proto.String(displayName)
}
func convert(role *conversions.Role) []string {
actions := make([]string, 0, 8)
if role == nil && role.CS3ResourcePermissions() == nil {
return actions
}
return CS3ResourcePermissionsToLibregraphActions(role.CS3ResourcePermissions())
}
func GetAllowedResourceActions(role *libregraph.UnifiedRoleDefinition, condition string) []string {
for _, p := range role.GetRolePermissions() {
if p.GetCondition() == condition {
return p.GetAllowedResourceActions()
}
}
return []string{}
}

View File

@@ -1,13 +1,24 @@
package unifiedrole_test
import (
"testing"
"slices"
. "github.com/onsi/ginkgo/v2"
. "github.com/onsi/gomega"
libregraph "github.com/owncloud/libre-graph-api-go"
)
func TestUnifiedrole(t *testing.T) {
RegisterFailHandler(Fail)
RunSpecs(t, "Unifiedrole Suite")
func getRoleActions(definitions ...*libregraph.UnifiedRoleDefinition) []string {
var actions []string
for _, definition := range definitions {
for _, permission := range definition.GetRolePermissions() {
for _, action := range permission.GetAllowedResourceActions() {
if slices.Contains(actions, action) {
continue
}
actions = append(actions, action)
}
}
}
return actions
}

View File

@@ -1,305 +0,0 @@
package unifiedrole_test
import (
"slices"
rConversions "github.com/cs3org/reva/v2/pkg/conversions"
"github.com/owncloud/ocis/v2/ocis-pkg/conversions"
. "github.com/onsi/ginkgo/v2"
. "github.com/onsi/gomega"
"github.com/onsi/gomega/types"
libregraph "github.com/owncloud/libre-graph-api-go"
"github.com/owncloud/ocis/v2/services/graph/pkg/unifiedrole"
)
var _ = Describe("unifiedroles", func() {
DescribeTable("CS3ResourcePermissionsToUnifiedRole",
func(legacyRole *rConversions.Role, unifiedRole *libregraph.UnifiedRoleDefinition, constraints string) {
cs3perm := legacyRole.CS3ResourcePermissions()
r := unifiedrole.CS3ResourcePermissionsToUnifiedRole(cs3perm, constraints, false)
Expect(r.GetId()).To(Equal(unifiedRole.GetId()))
},
Entry(rConversions.RoleViewer, rConversions.NewViewerRole(), unifiedrole.NewViewerUnifiedRole(), unifiedrole.UnifiedRoleConditionFile),
Entry(rConversions.RoleViewer, rConversions.NewViewerRole(), unifiedrole.NewViewerUnifiedRole(), unifiedrole.UnifiedRoleConditionFolder),
Entry(rConversions.RoleEditor, rConversions.NewEditorRole(), unifiedrole.NewEditorUnifiedRole(), unifiedrole.UnifiedRoleConditionFolder),
Entry(rConversions.RoleFileEditor, rConversions.NewFileEditorRole(), unifiedrole.NewFileEditorUnifiedRole(), unifiedrole.UnifiedRoleConditionFile),
Entry(rConversions.RoleManager, rConversions.NewManagerRole(), unifiedrole.NewManagerUnifiedRole(), unifiedrole.UnifiedRoleConditionDrive),
Entry(rConversions.RoleSpaceViewer, rConversions.NewSpaceViewerRole(), unifiedrole.NewSpaceViewerUnifiedRole(), unifiedrole.UnifiedRoleConditionDrive),
Entry(rConversions.RoleSpaceEditor, rConversions.NewSpaceEditorRole(), unifiedrole.NewSpaceEditorUnifiedRole(), unifiedrole.UnifiedRoleConditionDrive),
Entry(rConversions.RoleSecureViewer, rConversions.NewSecureViewerRole(), unifiedrole.NewSecureViewerUnifiedRole(), unifiedrole.UnifiedRoleConditionFile),
Entry(rConversions.RoleSecureViewer, rConversions.NewSecureViewerRole(), unifiedrole.NewSecureViewerUnifiedRole(), unifiedrole.UnifiedRoleConditionFolder),
)
DescribeTable("UnifiedRolePermissionsToCS3ResourcePermissions",
func(cs3Role *rConversions.Role, libregraphRole *libregraph.UnifiedRoleDefinition, match bool) {
permsFromCS3 := cs3Role.CS3ResourcePermissions()
permsFromUnifiedRole := unifiedrole.PermissionsToCS3ResourcePermissions(
conversions.ToPointerSlice(libregraphRole.RolePermissions),
)
var matcher types.GomegaMatcher
if match {
matcher = Equal(permsFromUnifiedRole)
} else {
matcher = Not(Equal(permsFromUnifiedRole))
}
Expect(permsFromCS3).To(matcher)
},
Entry(rConversions.RoleViewer, rConversions.NewViewerRole(), unifiedrole.NewViewerUnifiedRole(), true),
Entry(rConversions.RoleEditor, rConversions.NewEditorRole(), unifiedrole.NewEditorUnifiedRole(), true),
Entry(rConversions.RoleFileEditor, rConversions.NewFileEditorRole(), unifiedrole.NewFileEditorUnifiedRole(), true),
Entry(rConversions.RoleManager, rConversions.NewManagerRole(), unifiedrole.NewManagerUnifiedRole(), true),
Entry(rConversions.RoleSecureViewer, rConversions.NewSecureViewerRole(), unifiedrole.NewSecureViewerUnifiedRole(), true),
Entry("no match", rConversions.NewFileEditorRole(), unifiedrole.NewManagerUnifiedRole(), false),
)
DescribeTable("WeightRoleDefinitions",
func(roleDefinitions []*libregraph.UnifiedRoleDefinition, constraint string, descending bool, expectedDefinitions []*libregraph.UnifiedRoleDefinition) {
for i, generatedDefinition := range unifiedrole.WeightRoleDefinitions(roleDefinitions, constraint, descending) {
Expect(generatedDefinition.Id).To(Equal(expectedDefinitions[i].Id))
}
},
Entry("ascending",
[]*libregraph.UnifiedRoleDefinition{
unifiedrole.NewViewerUnifiedRole(),
unifiedrole.NewFileEditorUnifiedRole(),
},
unifiedrole.UnifiedRoleConditionFile,
false,
[]*libregraph.UnifiedRoleDefinition{
unifiedrole.NewViewerUnifiedRole(),
unifiedrole.NewFileEditorUnifiedRole(),
},
),
Entry("descending",
[]*libregraph.UnifiedRoleDefinition{
unifiedrole.NewViewerUnifiedRole(),
unifiedrole.NewFileEditorUnifiedRole(),
},
unifiedrole.UnifiedRoleConditionFile,
true,
[]*libregraph.UnifiedRoleDefinition{
unifiedrole.NewFileEditorUnifiedRole(),
unifiedrole.NewViewerUnifiedRole(),
},
),
)
{
rolesToAction := func(definitions ...*libregraph.UnifiedRoleDefinition) []string {
var actions []string
for _, definition := range definitions {
for _, permission := range definition.GetRolePermissions() {
for _, action := range permission.GetAllowedResourceActions() {
if slices.Contains(actions, action) {
continue
}
actions = append(actions, action)
}
}
}
return actions
}
DescribeTable("GetApplicableRoleDefinitionsForActions",
func(givenActions []string, constraints string, listFederatedRoles bool, expectedDefinitions []*libregraph.UnifiedRoleDefinition) {
generatedDefinitions := unifiedrole.GetApplicableRoleDefinitionsForActions(givenActions, constraints, listFederatedRoles, false)
Expect(len(generatedDefinitions)).To(Equal(len(expectedDefinitions)))
for i, generatedDefinition := range generatedDefinitions {
Expect(generatedDefinition.Id).To(Equal(expectedDefinitions[i].Id))
Expect(*generatedDefinition.LibreGraphWeight).To(Equal(int32(i + 1)))
}
generatedActions := rolesToAction(generatedDefinitions...)
Expect(len(givenActions) >= len(generatedActions)).To(BeTrue())
for _, generatedAction := range generatedActions {
Expect(slices.Contains(givenActions, generatedAction)).To(BeTrue())
}
},
Entry(
"ViewerUnifiedRole",
rolesToAction(unifiedrole.NewViewerUnifiedRole()),
unifiedrole.UnifiedRoleConditionFolder,
false,
[]*libregraph.UnifiedRoleDefinition{
unifiedrole.NewSecureViewerUnifiedRole(),
unifiedrole.NewViewerUnifiedRole(),
},
),
Entry(
"ViewerUnifiedRole | share",
rolesToAction(unifiedrole.NewViewerUnifiedRole()),
unifiedrole.UnifiedRoleConditionFile,
false,
[]*libregraph.UnifiedRoleDefinition{
unifiedrole.NewSecureViewerUnifiedRole(),
unifiedrole.NewViewerUnifiedRole(),
},
),
Entry(
"ViewerUnifiedRole | share",
rolesToAction(unifiedrole.NewViewerUnifiedRole()),
unifiedrole.UnifiedRoleConditionFile,
true,
[]*libregraph.UnifiedRoleDefinition{
unifiedrole.NewViewerUnifiedRole(),
},
),
Entry(
"EditorUnifiedRole | share folder",
rolesToAction(unifiedrole.NewEditorUnifiedRole()),
unifiedrole.UnifiedRoleConditionFolder,
true,
[]*libregraph.UnifiedRoleDefinition{
unifiedrole.NewViewerUnifiedRole(),
unifiedrole.NewEditorUnifiedRole(),
},
),
Entry(
"EditorUnifiedRole | share file",
rolesToAction(unifiedrole.NewEditorUnifiedRole()),
unifiedrole.UnifiedRoleConditionFile,
true,
[]*libregraph.UnifiedRoleDefinition{
unifiedrole.NewViewerUnifiedRole(),
unifiedrole.NewFileEditorUnifiedRole(),
},
),
Entry(
"NewFileEditorUnifiedRole",
rolesToAction(unifiedrole.NewFileEditorUnifiedRole()),
unifiedrole.UnifiedRoleConditionFile,
false,
[]*libregraph.UnifiedRoleDefinition{
unifiedrole.NewSecureViewerUnifiedRole(),
unifiedrole.NewViewerUnifiedRole(),
unifiedrole.NewFileEditorUnifiedRole(),
},
),
Entry(
"NewEditorUnifiedRole",
rolesToAction(unifiedrole.NewEditorUnifiedRole()),
unifiedrole.UnifiedRoleConditionFolder,
false,
[]*libregraph.UnifiedRoleDefinition{
unifiedrole.NewSecureViewerUnifiedRole(),
unifiedrole.NewViewerUnifiedRole(),
unifiedrole.NewEditorLiteUnifiedRole(),
unifiedrole.NewEditorUnifiedRole(),
},
),
Entry(
"GetBuiltinRoleDefinitionList",
rolesToAction(unifiedrole.GetBuiltinRoleDefinitionList()...),
unifiedrole.UnifiedRoleConditionFile,
false,
[]*libregraph.UnifiedRoleDefinition{
unifiedrole.NewSecureViewerUnifiedRole(),
unifiedrole.NewViewerUnifiedRole(),
unifiedrole.NewFileEditorUnifiedRole(),
},
),
Entry(
"GetBuiltinRoleDefinitionList",
rolesToAction(unifiedrole.GetBuiltinRoleDefinitionList()...),
unifiedrole.UnifiedRoleConditionFolder,
false,
[]*libregraph.UnifiedRoleDefinition{
unifiedrole.NewSecureViewerUnifiedRole(),
unifiedrole.NewViewerUnifiedRole(),
unifiedrole.NewEditorLiteUnifiedRole(),
unifiedrole.NewEditorUnifiedRole(),
},
),
Entry(
"GetBuiltinRoleDefinitionList",
rolesToAction(unifiedrole.GetBuiltinRoleDefinitionList()...),
unifiedrole.UnifiedRoleConditionDrive,
false,
[]*libregraph.UnifiedRoleDefinition{
unifiedrole.NewSpaceViewerUnifiedRole(),
unifiedrole.NewSpaceEditorUnifiedRole(),
unifiedrole.NewManagerUnifiedRole(),
},
),
Entry(
"single",
[]string{unifiedrole.DriveItemQuotaRead},
unifiedrole.UnifiedRoleConditionFile,
false,
[]*libregraph.UnifiedRoleDefinition{},
),
Entry(
"mixed",
append(rolesToAction(unifiedrole.NewEditorLiteUnifiedRole()), unifiedrole.DriveItemQuotaRead),
unifiedrole.UnifiedRoleConditionFolder,
false,
[]*libregraph.UnifiedRoleDefinition{
unifiedrole.NewSecureViewerUnifiedRole(),
unifiedrole.NewEditorLiteUnifiedRole(),
},
),
)
}
{
var newUnifiedRoleFromIDEntries []TableEntry
attachEntry := func(name, id string, definition *libregraph.UnifiedRoleDefinition, errors bool) {
e := Entry(
name,
id,
definition,
errors,
)
newUnifiedRoleFromIDEntries = append(newUnifiedRoleFromIDEntries, e)
}
for _, definition := range unifiedrole.GetBuiltinRoleDefinitionList() {
attachEntry(definition.GetDisplayName(), definition.GetId(), definition, false)
}
attachEntry("unknown", "123", nil, true)
DescribeTable("NewUnifiedRoleFromID",
func(id string, expectedRole *libregraph.UnifiedRoleDefinition, expectError bool) {
role, err := unifiedrole.NewUnifiedRoleFromID(id)
if expectError {
Expect(err).To(HaveOccurred())
} else {
Expect(err).NotTo(HaveOccurred())
Expect(role).To(Equal(expectedRole))
}
},
newUnifiedRoleFromIDEntries,
)
}
})

View File

@@ -1,6 +1,7 @@
package validate
import (
"context"
"slices"
"github.com/go-playground/validator/v10"
@@ -9,6 +10,12 @@ import (
"github.com/owncloud/ocis/v2/services/graph/pkg/unifiedrole"
)
type contextKey int
const (
ContextKeyRoleIDsValueKey contextKey = iota
)
// initLibregraph initializes libregraph validation
func initLibregraph(v *validator.Validate) {
for _, f := range []func(*validator.Validate){
@@ -30,10 +37,9 @@ func libregraphDriveItemInvite(v *validator.Validate) {
"ExpirationDateTime": "omitnil,gt",
}, s)
v.RegisterStructValidation(func(sl validator.StructLevel) {
v.RegisterStructValidationCtx(func(ctx context.Context, sl validator.StructLevel) {
driveItemInvite := sl.Current().Interface().(libregraph.DriveItemInvite)
rolesAndActions(sl, driveItemInvite.Roles, driveItemInvite.LibreGraphPermissionsActions, false)
rolesAndActions(ctx, sl, driveItemInvite.Roles, driveItemInvite.LibreGraphPermissionsActions, false)
}, s)
}
@@ -52,7 +58,7 @@ func libregraphPermission(v *validator.Validate) {
v.RegisterStructValidationMapRules(map[string]string{
"Roles": "max=1",
}, s)
v.RegisterStructValidation(func(sl validator.StructLevel) {
v.RegisterStructValidationCtx(func(ctx context.Context, sl validator.StructLevel) {
permission := sl.Current().Interface().(libregraph.Permission)
if _, ok := permission.GetIdOk(); ok {
@@ -63,14 +69,13 @@ func libregraphPermission(v *validator.Validate) {
sl.ReportError(permission.HasPassword, "hasPassword", "HasPassword", "readonly", "")
}
rolesAndActions(sl, permission.Roles, permission.LibreGraphPermissionsActions, true)
rolesAndActions(ctx, sl, permission.Roles, permission.LibreGraphPermissionsActions, true)
}, s)
}
func rolesAndActions(sl validator.StructLevel, roles, actions []string, allowEmpty bool) {
func rolesAndActions(ctx context.Context, sl validator.StructLevel, roles, actions []string, allowEmpty bool) {
totalRoles := len(roles)
totalActions := len(actions)
switch {
case allowEmpty && totalRoles == 0 && totalActions == 0:
break
@@ -83,10 +88,17 @@ func rolesAndActions(sl validator.StructLevel, roles, actions []string, allowEmp
var availableRoles []string
var availableActions []string
for _, definition := range append(
unifiedrole.GetBuiltinRoleDefinitionList(),
unifiedrole.GetBuiltinRoleDefinitionList()...,
) {
var definitions []*libregraph.UnifiedRoleDefinition
switch roles, ok := ctx.Value(ContextKeyRoleIDsValueKey).([]string); {
case ok:
definitions = unifiedrole.GetRoles(unifiedrole.RoleFilterIDs(roles...))
default:
// it the ctx does not contain the allowed role IDs, we need to fall back to all roles
definitions = unifiedrole.GetRoles(unifiedrole.RoleFilterAll())
}
for _, definition := range definitions {
if slices.Contains(availableRoles, definition.GetId()) {
continue
}
@@ -112,11 +124,16 @@ func rolesAndActions(sl validator.StructLevel, roles, actions []string, allowEmp
sl.ReportError(roles, "Roles", "Roles", "available_role", "")
}
for _, role := range actions {
if slices.Contains(availableActions, role) {
for _, action := range actions {
if slices.Contains(availableActions, action) {
continue
}
sl.ReportError(actions, "LibreGraphPermissionsActions", "LibreGraphPermissionsActions", "available_action", "")
}
}
// ContextWithAllowedRoleIDs returns a new context which includes the allowed role IDs.
func ContextWithAllowedRoleIDs(ctx context.Context, rolesIds []string) context.Context {
return context.WithValue(ctx, ContextKeyRoleIDsValueKey, rolesIds)
}

View File

@@ -16,10 +16,12 @@ import (
type validatableFactory[T any] func() (T, bool)
var _ = Describe("libregraph", func() {
var ctx context.Context
var driveItemInvite libregraph.DriveItemInvite
var driveRecipient libregraph.DriveRecipient
BeforeEach(func() {
ctx = context.Background()
driveRecipient = libregraph.DriveRecipient{
ObjectId: conversions.ToPointer("1"),
LibreGraphRecipientType: conversions.ToPointer("user"),
@@ -31,14 +33,13 @@ var _ = Describe("libregraph", func() {
LibreGraphPermissionsActions: []string{unifiedrole.DriveItemVersionsUpdate},
ExpirationDateTime: libregraph.PtrTime(time.Now().Add(time.Hour)),
}
})
DescribeTable("DriveItemInvite",
func(factories ...validatableFactory[libregraph.DriveItemInvite]) {
for _, factory := range factories {
s, pass := factory()
switch err := validate.StructCtx(context.Background(), s); pass {
switch err := validate.StructCtx(ctx, s); pass {
case false:
Expect(err).To(HaveOccurred())
default:
@@ -72,6 +73,14 @@ var _ = Describe("libregraph", func() {
driveItemInvite.LibreGraphPermissionsActions = nil
return driveItemInvite, false
}),
Entry("fail: disabled role", func() (libregraph.DriveItemInvite, bool) {
ctx = validate.ContextWithAllowedRoleIDs(ctx, []string{unifiedrole.UnifiedRoleEditorID})
driveItemInvite.Roles = []string{
unifiedrole.UnifiedRoleSecureViewerID,
}
driveItemInvite.LibreGraphPermissionsActions = nil
return driveItemInvite, false
}),
Entry("fail: unknown action", func() (libregraph.DriveItemInvite, bool) {
driveItemInvite.Roles = nil
driveItemInvite.LibreGraphPermissionsActions = []string{"foo"}

View File

@@ -1,6 +1,7 @@
//go:build (darwin || freebsd || openbsd || netbsd || dragonfly || hurd) && !appengine
//go:build (darwin || freebsd || openbsd || netbsd || dragonfly || hurd) && !appengine && !tinygo
// +build darwin freebsd openbsd netbsd dragonfly hurd
// +build !appengine
// +build !tinygo
package isatty

View File

@@ -1,5 +1,6 @@
//go:build appengine || js || nacl || wasm
// +build appengine js nacl wasm
//go:build (appengine || js || nacl || tinygo || wasm) && !windows
// +build appengine js nacl tinygo wasm
// +build !windows
package isatty

View File

@@ -1,6 +1,7 @@
//go:build (linux || aix || zos) && !appengine
//go:build (linux || aix || zos) && !appengine && !tinygo
// +build linux aix zos
// +build !appengine
// +build !tinygo
package isatty

View File

@@ -1,16 +0,0 @@
language: go
sudo: false
go:
- 1.13.x
- tip
before_install:
- go get -t -v ./...
script:
- go generate
- git diff --cached --exit-code
- ./go.test.sh
after_success:
- bash <(curl -s https://codecov.io/bash)

View File

@@ -1,7 +1,7 @@
go-runewidth
============
[![Build Status](https://travis-ci.org/mattn/go-runewidth.png?branch=master)](https://travis-ci.org/mattn/go-runewidth)
[![Build Status](https://github.com/mattn/go-runewidth/workflows/test/badge.svg?branch=master)](https://github.com/mattn/go-runewidth/actions?query=workflow%3Atest)
[![Codecov](https://codecov.io/gh/mattn/go-runewidth/branch/master/graph/badge.svg)](https://codecov.io/gh/mattn/go-runewidth)
[![GoDoc](https://godoc.org/github.com/mattn/go-runewidth?status.svg)](http://godoc.org/github.com/mattn/go-runewidth)
[![Go Report Card](https://goreportcard.com/badge/github.com/mattn/go-runewidth)](https://goreportcard.com/report/github.com/mattn/go-runewidth)

View File

@@ -1,12 +0,0 @@
#!/usr/bin/env bash
set -e
echo "" > coverage.txt
for d in $(go list ./... | grep -v vendor); do
go test -race -coverprofile=profile.out -covermode=atomic "$d"
if [ -f profile.out ]; then
cat profile.out >> coverage.txt
rm profile.out
fi
done

View File

@@ -2,6 +2,7 @@ package runewidth
import (
"os"
"strings"
"github.com/rivo/uniseg"
)
@@ -34,7 +35,13 @@ func handleEnv() {
EastAsianWidth = env == "1"
}
// update DefaultCondition
DefaultCondition.EastAsianWidth = EastAsianWidth
if DefaultCondition.EastAsianWidth != EastAsianWidth {
DefaultCondition.EastAsianWidth = EastAsianWidth
if len(DefaultCondition.combinedLut) > 0 {
DefaultCondition.combinedLut = DefaultCondition.combinedLut[:0]
CreateLUT()
}
}
}
type interval struct {
@@ -89,6 +96,7 @@ var nonprint = table{
// Condition have flag EastAsianWidth whether the current locale is CJK or not.
type Condition struct {
combinedLut []byte
EastAsianWidth bool
StrictEmojiNeutral bool
}
@@ -104,10 +112,16 @@ func NewCondition() *Condition {
// RuneWidth returns the number of cells in r.
// See http://www.unicode.org/reports/tr11/
func (c *Condition) RuneWidth(r rune) int {
if r < 0 || r > 0x10FFFF {
return 0
}
if len(c.combinedLut) > 0 {
return int(c.combinedLut[r>>1]>>(uint(r&1)*4)) & 3
}
// optimized version, verified by TestRuneWidthChecksums()
if !c.EastAsianWidth {
switch {
case r < 0x20 || r > 0x10FFFF:
case r < 0x20:
return 0
case (r >= 0x7F && r <= 0x9F) || r == 0xAD: // nonprint
return 0
@@ -124,7 +138,7 @@ func (c *Condition) RuneWidth(r rune) int {
}
} else {
switch {
case r < 0 || r > 0x10FFFF || inTables(r, nonprint, combining):
case inTables(r, nonprint, combining):
return 0
case inTable(r, narrow):
return 1
@@ -138,6 +152,27 @@ func (c *Condition) RuneWidth(r rune) int {
}
}
// CreateLUT will create an in-memory lookup table of 557056 bytes for faster operation.
// This should not be called concurrently with other operations on c.
// If options in c is changed, CreateLUT should be called again.
func (c *Condition) CreateLUT() {
const max = 0x110000
lut := c.combinedLut
if len(c.combinedLut) != 0 {
// Remove so we don't use it.
c.combinedLut = nil
} else {
lut = make([]byte, max/2)
}
for i := range lut {
i32 := int32(i * 2)
x0 := c.RuneWidth(i32)
x1 := c.RuneWidth(i32 + 1)
lut[i] = uint8(x0) | uint8(x1)<<4
}
c.combinedLut = lut
}
// StringWidth return width as you can see
func (c *Condition) StringWidth(s string) (width int) {
g := uniseg.NewGraphemes(s)
@@ -180,11 +215,47 @@ func (c *Condition) Truncate(s string, w int, tail string) string {
return s[:pos] + tail
}
// TruncateLeft cuts w cells from the beginning of the `s`.
func (c *Condition) TruncateLeft(s string, w int, prefix string) string {
if c.StringWidth(s) <= w {
return prefix
}
var width int
pos := len(s)
g := uniseg.NewGraphemes(s)
for g.Next() {
var chWidth int
for _, r := range g.Runes() {
chWidth = c.RuneWidth(r)
if chWidth > 0 {
break // See StringWidth() for details.
}
}
if width+chWidth > w {
if width < w {
_, pos = g.Positions()
prefix += strings.Repeat(" ", width+chWidth-w)
} else {
pos, _ = g.Positions()
}
break
}
width += chWidth
}
return prefix + s[pos:]
}
// Wrap return string wrapped with w cells
func (c *Condition) Wrap(s string, w int) string {
width := 0
out := ""
for _, r := range []rune(s) {
for _, r := range s {
cw := c.RuneWidth(r)
if r == '\n' {
out += string(r)
@@ -257,6 +328,11 @@ func Truncate(s string, w int, tail string) string {
return DefaultCondition.Truncate(s, w, tail)
}
// TruncateLeft cuts w cells from the beginning of the `s`.
func TruncateLeft(s string, w int, prefix string) string {
return DefaultCondition.TruncateLeft(s, w, prefix)
}
// Wrap return string wrapped with w cells
func Wrap(s string, w int) string {
return DefaultCondition.Wrap(s, w)
@@ -271,3 +347,12 @@ func FillLeft(s string, w int) string {
func FillRight(s string, w int) string {
return DefaultCondition.FillRight(s, w)
}
// CreateLUT will create an in-memory lookup table of 557055 bytes for faster operation.
// This should not be called concurrently with other operations.
func CreateLUT() {
if len(DefaultCondition.combinedLut) > 0 {
return
}
DefaultCondition.CreateLUT()
}

View File

@@ -1,3 +1,4 @@
//go:build appengine
// +build appengine
package runewidth

View File

@@ -1,5 +1,5 @@
// +build js
// +build !appengine
//go:build js && !appengine
// +build js,!appengine
package runewidth

View File

@@ -1,6 +1,5 @@
// +build !windows
// +build !js
// +build !appengine
//go:build !windows && !js && !appengine
// +build !windows,!js,!appengine
package runewidth

View File

@@ -1,5 +1,5 @@
// +build windows
// +build !appengine
//go:build windows && !appengine
// +build windows,!appengine
package runewidth

View File

@@ -3,7 +3,7 @@
[![Go Reference](https://pkg.go.dev/badge/github.com/rivo/uniseg.svg)](https://pkg.go.dev/github.com/rivo/uniseg)
[![Go Report](https://img.shields.io/badge/go%20report-A%2B-brightgreen.svg)](https://goreportcard.com/report/github.com/rivo/uniseg)
This Go package implements Unicode Text Segmentation according to [Unicode Standard Annex #29](https://unicode.org/reports/tr29/), Unicode Line Breaking according to [Unicode Standard Annex #14](https://unicode.org/reports/tr14/) (Unicode version 14.0.0), and monospace font string width calculation similar to [wcwidth](https://man7.org/linux/man-pages/man3/wcwidth.3.html).
This Go package implements Unicode Text Segmentation according to [Unicode Standard Annex #29](https://unicode.org/reports/tr29/), Unicode Line Breaking according to [Unicode Standard Annex #14](https://unicode.org/reports/tr14/) (Unicode version 15.0.0), and monospace font string width calculation similar to [wcwidth](https://man7.org/linux/man-pages/man3/wcwidth.3.html).
## Background
@@ -73,7 +73,7 @@ for gr.Next() {
### Using the [`Step`](https://pkg.go.dev/github.com/rivo/uniseg#Step) or [`StepString`](https://pkg.go.dev/github.com/rivo/uniseg#StepString) Function
This is orders of magnitude faster than the `Graphemes` class, but it requires the handling of states and boundaries:
This avoids allocating a new `Graphemes` object but it requires the handling of states and boundaries:
```go
str := "🇩🇪🏳️‍🌈"
@@ -88,29 +88,7 @@ for len(str) > 0 {
### Advanced Examples
Breaking into grapheme clusters and evaluating line breaks:
```go
str := "First line.\nSecond line."
state := -1
var (
c string
boundaries int
)
for len(str) > 0 {
c, str, boundaries, state = uniseg.StepString(str, state)
fmt.Print(c)
if boundaries&uniseg.MaskLine == uniseg.LineCanBreak {
fmt.Print("|")
} else if boundaries&uniseg.MaskLine == uniseg.LineMustBreak {
fmt.Print("‖")
}
}
// First |line.
// ‖Second |line.‖
```
If you're only interested in word segmentation, use [`FirstWord`](https://pkg.go.dev/github.com/rivo/uniseg#FirstWord) or [`FirstWordInString`](https://pkg.go.dev/github.com/rivo/uniseg#FirstWordInString):
The [`Graphemes`](https://pkg.go.dev/github.com/rivo/uniseg#Graphemes) class offers the most convenient way to access all functionality of this package. But in some cases, it may be better to use the specialized functions directly. For example, if you're only interested in word segmentation, use [`FirstWord`](https://pkg.go.dev/github.com/rivo/uniseg#FirstWord) or [`FirstWordInString`](https://pkg.go.dev/github.com/rivo/uniseg#FirstWordInString):
```go
str := "Hello, world!"
@@ -133,6 +111,15 @@ Similarly, use
- [`FirstSentence`](https://pkg.go.dev/github.com/rivo/uniseg#FirstSentence) or [`FirstSentenceInString`](https://pkg.go.dev/github.com/rivo/uniseg#FirstSentenceInString) for sentence segmentation only, and
- [`FirstLineSegment`](https://pkg.go.dev/github.com/rivo/uniseg#FirstLineSegment) or [`FirstLineSegmentInString`](https://pkg.go.dev/github.com/rivo/uniseg#FirstLineSegmentInString) for line breaking / word wrapping (although using [`Step`](https://pkg.go.dev/github.com/rivo/uniseg#Step) or [`StepString`](https://pkg.go.dev/github.com/rivo/uniseg#StepString) is preferred as it will observe grapheme cluster boundaries).
If you're only interested in the width of characters, use [`FirstGraphemeCluster`](https://pkg.go.dev/github.com/rivo/uniseg#FirstGraphemeCluster) or [`FirstGraphemeClusterInString`](https://pkg.go.dev/github.com/rivo/uniseg#FirstGraphemeClusterInString). It is much faster than using [`Step`](https://pkg.go.dev/github.com/rivo/uniseg#Step), [`StepString`](https://pkg.go.dev/github.com/rivo/uniseg#StepString), or the [`Graphemes`](https://pkg.go.dev/github.com/rivo/uniseg#Graphemes) class because it does not include the logic for word / sentence / line boundaries.
Finally, if you need to reverse a string while preserving grapheme clusters, use [`ReverseString`](https://pkg.go.dev/github.com/rivo/uniseg#ReverseString):
```go
fmt.Println(uniseg.ReverseString("🇩🇪🏳️‍🌈"))
// 🏳️‍🌈🇩🇪
```
## Documentation
Refer to https://pkg.go.dev/github.com/rivo/uniseg for the package's documentation.

View File

@@ -70,10 +70,10 @@ broken.
Monospace width, as referred to in this package, is the width of a string in a
monospace font. This is commonly used in terminal user interfaces or text
displays or editors that don't support proportional fonts. A width of 1
corresponds to a single character cell. The C function [wcwidth()] and its
corresponds to a single character cell. The C function [wcswidth()] and its
implementation in other programming languages is in widespread use for the same
purpose. However, there is no standard for the calculation of such widths, and
this package differs from wcwidth() in a number of ways, presumably to generate
this package differs from wcswidth() in a number of ways, presumably to generate
more visually pleasing results.
To start, we assume that every code point has a width of 1, with the following
@@ -103,6 +103,6 @@ Note that whether these widths appear correct depends on your application's
render engine, to which extent it conforms to the Unicode Standard, and its
choice of font.
[wcwidth()]: https://man7.org/linux/man-pages/man3/wcwidth.3.html
[wcswidth()]: https://man7.org/linux/man-pages/man3/wcswidth.3.html
*/
package uniseg

View File

@@ -1,13 +1,13 @@
package uniseg
// Code generated via go generate from gen_properties.go. DO NOT EDIT.
package uniseg
// eastAsianWidth are taken from
// https://www.unicode.org/Public/14.0.0/ucd/EastAsianWidth.txt
// https://www.unicode.org/Public/15.0.0/ucd/EastAsianWidth.txt
// and
// https://unicode.org/Public/14.0.0/ucd/emoji/emoji-data.txt
// https://unicode.org/Public/15.0.0/ucd/emoji/emoji-data.txt
// ("Extended_Pictographic" only)
// on September 10, 2022. See https://www.unicode.org/license.html for the Unicode
// on September 5, 2023. See https://www.unicode.org/license.html for the Unicode
// license agreement.
var eastAsianWidth = [][3]int{
{0x0000, 0x001F, prN}, // Cc [32] <control-0000>..<control-001F>
@@ -504,6 +504,7 @@ var eastAsianWidth = [][3]int{
{0x0CE2, 0x0CE3, prN}, // Mn [2] KANNADA VOWEL SIGN VOCALIC L..KANNADA VOWEL SIGN VOCALIC LL
{0x0CE6, 0x0CEF, prN}, // Nd [10] KANNADA DIGIT ZERO..KANNADA DIGIT NINE
{0x0CF1, 0x0CF2, prN}, // Lo [2] KANNADA SIGN JIHVAMULIYA..KANNADA SIGN UPADHMANIYA
{0x0CF3, 0x0CF3, prN}, // Mc KANNADA SIGN COMBINING ANUSVARA ABOVE RIGHT
{0x0D00, 0x0D01, prN}, // Mn [2] MALAYALAM SIGN COMBINING ANUSVARA ABOVE..MALAYALAM SIGN CANDRABINDU
{0x0D02, 0x0D03, prN}, // Mc [2] MALAYALAM SIGN ANUSVARA..MALAYALAM SIGN VISARGA
{0x0D04, 0x0D0C, prN}, // Lo [9] MALAYALAM LETTER VEDIC ANUSVARA..MALAYALAM LETTER VOCALIC L
@@ -565,7 +566,7 @@ var eastAsianWidth = [][3]int{
{0x0EBD, 0x0EBD, prN}, // Lo LAO SEMIVOWEL SIGN NYO
{0x0EC0, 0x0EC4, prN}, // Lo [5] LAO VOWEL SIGN E..LAO VOWEL SIGN AI
{0x0EC6, 0x0EC6, prN}, // Lm LAO KO LA
{0x0EC8, 0x0ECD, prN}, // Mn [6] LAO TONE MAI EK..LAO NIGGAHITA
{0x0EC8, 0x0ECE, prN}, // Mn [7] LAO TONE MAI EK..LAO YAMAKKAN
{0x0ED0, 0x0ED9, prN}, // Nd [10] LAO DIGIT ZERO..LAO DIGIT NINE
{0x0EDC, 0x0EDF, prN}, // Lo [4] LAO HO NO..LAO LETTER KHMU NYO
{0x0F00, 0x0F00, prN}, // Lo TIBETAN SYLLABLE OM
@@ -1916,6 +1917,7 @@ var eastAsianWidth = [][3]int{
{0x10EAB, 0x10EAC, prN}, // Mn [2] YEZIDI COMBINING HAMZA MARK..YEZIDI COMBINING MADDA MARK
{0x10EAD, 0x10EAD, prN}, // Pd YEZIDI HYPHENATION MARK
{0x10EB0, 0x10EB1, prN}, // Lo [2] YEZIDI LETTER LAM WITH DOT ABOVE..YEZIDI LETTER YOT WITH CIRCUMFLEX ABOVE
{0x10EFD, 0x10EFF, prN}, // Mn [3] ARABIC SMALL LOW WORD SAKTA..ARABIC SMALL LOW WORD MADDA
{0x10F00, 0x10F1C, prN}, // Lo [29] OLD SOGDIAN LETTER ALEPH..OLD SOGDIAN LETTER FINAL TAW WITH VERTICAL TAIL
{0x10F1D, 0x10F26, prN}, // No [10] OLD SOGDIAN NUMBER ONE..OLD SOGDIAN FRACTION ONE HALF
{0x10F27, 0x10F27, prN}, // Lo OLD SOGDIAN LIGATURE AYIN-DALETH
@@ -1998,6 +2000,8 @@ var eastAsianWidth = [][3]int{
{0x11236, 0x11237, prN}, // Mn [2] KHOJKI SIGN NUKTA..KHOJKI SIGN SHADDA
{0x11238, 0x1123D, prN}, // Po [6] KHOJKI DANDA..KHOJKI ABBREVIATION SIGN
{0x1123E, 0x1123E, prN}, // Mn KHOJKI SIGN SUKUN
{0x1123F, 0x11240, prN}, // Lo [2] KHOJKI LETTER QA..KHOJKI LETTER SHORT I
{0x11241, 0x11241, prN}, // Mn KHOJKI VOWEL SIGN VOCALIC R
{0x11280, 0x11286, prN}, // Lo [7] MULTANI LETTER A..MULTANI LETTER GA
{0x11288, 0x11288, prN}, // Lo MULTANI LETTER GHA
{0x1128A, 0x1128D, prN}, // Lo [4] MULTANI LETTER CA..MULTANI LETTER JJA
@@ -2160,6 +2164,7 @@ var eastAsianWidth = [][3]int{
{0x11A9E, 0x11AA2, prN}, // Po [5] SOYOMBO HEAD MARK WITH MOON AND SUN AND TRIPLE FLAME..SOYOMBO TERMINAL MARK-2
{0x11AB0, 0x11ABF, prN}, // Lo [16] CANADIAN SYLLABICS NATTILIK HI..CANADIAN SYLLABICS SPA
{0x11AC0, 0x11AF8, prN}, // Lo [57] PAU CIN HAU LETTER PA..PAU CIN HAU GLOTTAL STOP FINAL
{0x11B00, 0x11B09, prN}, // Po [10] DEVANAGARI HEAD MARK..DEVANAGARI SIGN MINDU
{0x11C00, 0x11C08, prN}, // Lo [9] BHAIKSUKI LETTER A..BHAIKSUKI LETTER VOCALIC L
{0x11C0A, 0x11C2E, prN}, // Lo [37] BHAIKSUKI LETTER E..BHAIKSUKI LETTER HA
{0x11C2F, 0x11C2F, prN}, // Mc BHAIKSUKI VOWEL SIGN AA
@@ -2205,6 +2210,19 @@ var eastAsianWidth = [][3]int{
{0x11EF3, 0x11EF4, prN}, // Mn [2] MAKASAR VOWEL SIGN I..MAKASAR VOWEL SIGN U
{0x11EF5, 0x11EF6, prN}, // Mc [2] MAKASAR VOWEL SIGN E..MAKASAR VOWEL SIGN O
{0x11EF7, 0x11EF8, prN}, // Po [2] MAKASAR PASSIMBANG..MAKASAR END OF SECTION
{0x11F00, 0x11F01, prN}, // Mn [2] KAWI SIGN CANDRABINDU..KAWI SIGN ANUSVARA
{0x11F02, 0x11F02, prN}, // Lo KAWI SIGN REPHA
{0x11F03, 0x11F03, prN}, // Mc KAWI SIGN VISARGA
{0x11F04, 0x11F10, prN}, // Lo [13] KAWI LETTER A..KAWI LETTER O
{0x11F12, 0x11F33, prN}, // Lo [34] KAWI LETTER KA..KAWI LETTER JNYA
{0x11F34, 0x11F35, prN}, // Mc [2] KAWI VOWEL SIGN AA..KAWI VOWEL SIGN ALTERNATE AA
{0x11F36, 0x11F3A, prN}, // Mn [5] KAWI VOWEL SIGN I..KAWI VOWEL SIGN VOCALIC R
{0x11F3E, 0x11F3F, prN}, // Mc [2] KAWI VOWEL SIGN E..KAWI VOWEL SIGN AI
{0x11F40, 0x11F40, prN}, // Mn KAWI VOWEL SIGN EU
{0x11F41, 0x11F41, prN}, // Mc KAWI SIGN KILLER
{0x11F42, 0x11F42, prN}, // Mn KAWI CONJOINER
{0x11F43, 0x11F4F, prN}, // Po [13] KAWI DANDA..KAWI PUNCTUATION CLOSING SPIRAL
{0x11F50, 0x11F59, prN}, // Nd [10] KAWI DIGIT ZERO..KAWI DIGIT NINE
{0x11FB0, 0x11FB0, prN}, // Lo LISU LETTER YHA
{0x11FC0, 0x11FD4, prN}, // No [21] TAMIL FRACTION ONE THREE-HUNDRED-AND-TWENTIETH..TAMIL FRACTION DOWNSCALING FACTOR KIIZH
{0x11FD5, 0x11FDC, prN}, // So [8] TAMIL SIGN NEL..TAMIL SIGN MUKKURUNI
@@ -2217,8 +2235,11 @@ var eastAsianWidth = [][3]int{
{0x12480, 0x12543, prN}, // Lo [196] CUNEIFORM SIGN AB TIMES NUN TENU..CUNEIFORM SIGN ZU5 TIMES THREE DISH TENU
{0x12F90, 0x12FF0, prN}, // Lo [97] CYPRO-MINOAN SIGN CM001..CYPRO-MINOAN SIGN CM114
{0x12FF1, 0x12FF2, prN}, // Po [2] CYPRO-MINOAN SIGN CM301..CYPRO-MINOAN SIGN CM302
{0x13000, 0x1342E, prN}, // Lo [1071] EGYPTIAN HIEROGLYPH A001..EGYPTIAN HIEROGLYPH AA032
{0x13430, 0x13438, prN}, // Cf [9] EGYPTIAN HIEROGLYPH VERTICAL JOINER..EGYPTIAN HIEROGLYPH END SEGMENT
{0x13000, 0x1342F, prN}, // Lo [1072] EGYPTIAN HIEROGLYPH A001..EGYPTIAN HIEROGLYPH V011D
{0x13430, 0x1343F, prN}, // Cf [16] EGYPTIAN HIEROGLYPH VERTICAL JOINER..EGYPTIAN HIEROGLYPH END WALLED ENCLOSURE
{0x13440, 0x13440, prN}, // Mn EGYPTIAN HIEROGLYPH MIRROR HORIZONTALLY
{0x13441, 0x13446, prN}, // Lo [6] EGYPTIAN HIEROGLYPH FULL BLANK..EGYPTIAN HIEROGLYPH WIDE LOST SIGN
{0x13447, 0x13455, prN}, // Mn [15] EGYPTIAN HIEROGLYPH MODIFIER DAMAGED AT TOP START..EGYPTIAN HIEROGLYPH MODIFIER DAMAGED
{0x14400, 0x14646, prN}, // Lo [583] ANATOLIAN HIEROGLYPH A001..ANATOLIAN HIEROGLYPH A530
{0x16800, 0x16A38, prN}, // Lo [569] BAMUM LETTER PHASE-A NGKUE MFON..BAMUM LETTER PHASE-F VUEQ
{0x16A40, 0x16A5E, prN}, // Lo [31] MRO LETTER TA..MRO LETTER TEK
@@ -2263,7 +2284,9 @@ var eastAsianWidth = [][3]int{
{0x1AFFD, 0x1AFFE, prW}, // Lm [2] KATAKANA LETTER MINNAN NASALIZED TONE-7..KATAKANA LETTER MINNAN NASALIZED TONE-8
{0x1B000, 0x1B0FF, prW}, // Lo [256] KATAKANA LETTER ARCHAIC E..HENTAIGANA LETTER RE-2
{0x1B100, 0x1B122, prW}, // Lo [35] HENTAIGANA LETTER RE-3..KATAKANA LETTER ARCHAIC WU
{0x1B132, 0x1B132, prW}, // Lo HIRAGANA LETTER SMALL KO
{0x1B150, 0x1B152, prW}, // Lo [3] HIRAGANA LETTER SMALL WI..HIRAGANA LETTER SMALL WO
{0x1B155, 0x1B155, prW}, // Lo KATAKANA LETTER SMALL KO
{0x1B164, 0x1B167, prW}, // Lo [4] KATAKANA LETTER SMALL WI..KATAKANA LETTER SMALL N
{0x1B170, 0x1B2FB, prW}, // Lo [396] NUSHU CHARACTER-1B170..NUSHU CHARACTER-1B2FB
{0x1BC00, 0x1BC6A, prN}, // Lo [107] DUPLOYAN LETTER H..DUPLOYAN LETTER VOCALIC M
@@ -2294,6 +2317,7 @@ var eastAsianWidth = [][3]int{
{0x1D200, 0x1D241, prN}, // So [66] GREEK VOCAL NOTATION SYMBOL-1..GREEK INSTRUMENTAL NOTATION SYMBOL-54
{0x1D242, 0x1D244, prN}, // Mn [3] COMBINING GREEK MUSICAL TRISEME..COMBINING GREEK MUSICAL PENTASEME
{0x1D245, 0x1D245, prN}, // So GREEK MUSICAL LEIMMA
{0x1D2C0, 0x1D2D3, prN}, // No [20] KAKTOVIK NUMERAL ZERO..KAKTOVIK NUMERAL NINETEEN
{0x1D2E0, 0x1D2F3, prN}, // No [20] MAYAN NUMERAL ZERO..MAYAN NUMERAL NINETEEN
{0x1D300, 0x1D356, prN}, // So [87] MONOGRAM FOR EARTH..TETRAGRAM FOR FOSTERING
{0x1D360, 0x1D378, prN}, // No [25] COUNTING ROD UNIT DIGIT ONE..TALLY MARK FIVE
@@ -2353,11 +2377,14 @@ var eastAsianWidth = [][3]int{
{0x1DF00, 0x1DF09, prN}, // Ll [10] LATIN SMALL LETTER FENG DIGRAPH WITH TRILL..LATIN SMALL LETTER T WITH HOOK AND RETROFLEX HOOK
{0x1DF0A, 0x1DF0A, prN}, // Lo LATIN LETTER RETROFLEX CLICK WITH RETROFLEX HOOK
{0x1DF0B, 0x1DF1E, prN}, // Ll [20] LATIN SMALL LETTER ESH WITH DOUBLE BAR..LATIN SMALL LETTER S WITH CURL
{0x1DF25, 0x1DF2A, prN}, // Ll [6] LATIN SMALL LETTER D WITH MID-HEIGHT LEFT HOOK..LATIN SMALL LETTER T WITH MID-HEIGHT LEFT HOOK
{0x1E000, 0x1E006, prN}, // Mn [7] COMBINING GLAGOLITIC LETTER AZU..COMBINING GLAGOLITIC LETTER ZHIVETE
{0x1E008, 0x1E018, prN}, // Mn [17] COMBINING GLAGOLITIC LETTER ZEMLJA..COMBINING GLAGOLITIC LETTER HERU
{0x1E01B, 0x1E021, prN}, // Mn [7] COMBINING GLAGOLITIC LETTER SHTA..COMBINING GLAGOLITIC LETTER YATI
{0x1E023, 0x1E024, prN}, // Mn [2] COMBINING GLAGOLITIC LETTER YU..COMBINING GLAGOLITIC LETTER SMALL YUS
{0x1E026, 0x1E02A, prN}, // Mn [5] COMBINING GLAGOLITIC LETTER YO..COMBINING GLAGOLITIC LETTER FITA
{0x1E030, 0x1E06D, prN}, // Lm [62] MODIFIER LETTER CYRILLIC SMALL A..MODIFIER LETTER CYRILLIC SMALL STRAIGHT U WITH STROKE
{0x1E08F, 0x1E08F, prN}, // Mn COMBINING CYRILLIC SMALL LETTER BYELORUSSIAN-UKRAINIAN I
{0x1E100, 0x1E12C, prN}, // Lo [45] NYIAKENG PUACHUE HMONG LETTER MA..NYIAKENG PUACHUE HMONG LETTER W
{0x1E130, 0x1E136, prN}, // Mn [7] NYIAKENG PUACHUE HMONG TONE-B..NYIAKENG PUACHUE HMONG TONE-D
{0x1E137, 0x1E13D, prN}, // Lm [7] NYIAKENG PUACHUE HMONG SIGN FOR PERSON..NYIAKENG PUACHUE HMONG SYLLABLE LENGTHENER
@@ -2370,6 +2397,10 @@ var eastAsianWidth = [][3]int{
{0x1E2EC, 0x1E2EF, prN}, // Mn [4] WANCHO TONE TUP..WANCHO TONE KOINI
{0x1E2F0, 0x1E2F9, prN}, // Nd [10] WANCHO DIGIT ZERO..WANCHO DIGIT NINE
{0x1E2FF, 0x1E2FF, prN}, // Sc WANCHO NGUN SIGN
{0x1E4D0, 0x1E4EA, prN}, // Lo [27] NAG MUNDARI LETTER O..NAG MUNDARI LETTER ELL
{0x1E4EB, 0x1E4EB, prN}, // Lm NAG MUNDARI SIGN OJOD
{0x1E4EC, 0x1E4EF, prN}, // Mn [4] NAG MUNDARI SIGN MUHOR..NAG MUNDARI SIGN SUTUH
{0x1E4F0, 0x1E4F9, prN}, // Nd [10] NAG MUNDARI DIGIT ZERO..NAG MUNDARI DIGIT NINE
{0x1E7E0, 0x1E7E6, prN}, // Lo [7] ETHIOPIC SYLLABLE HHYA..ETHIOPIC SYLLABLE HHYO
{0x1E7E8, 0x1E7EB, prN}, // Lo [4] ETHIOPIC SYLLABLE GURAGE HHWA..ETHIOPIC SYLLABLE HHWE
{0x1E7ED, 0x1E7EE, prN}, // Lo [2] ETHIOPIC SYLLABLE GURAGE MWI..ETHIOPIC SYLLABLE GURAGE MWEE
@@ -2498,13 +2529,14 @@ var eastAsianWidth = [][3]int{
{0x1F6D0, 0x1F6D2, prW}, // So [3] PLACE OF WORSHIP..SHOPPING TROLLEY
{0x1F6D3, 0x1F6D4, prN}, // So [2] STUPA..PAGODA
{0x1F6D5, 0x1F6D7, prW}, // So [3] HINDU TEMPLE..ELEVATOR
{0x1F6DD, 0x1F6DF, prW}, // So [3] PLAYGROUND SLIDE..RING BUOY
{0x1F6DC, 0x1F6DF, prW}, // So [4] WIRELESS..RING BUOY
{0x1F6E0, 0x1F6EA, prN}, // So [11] HAMMER AND WRENCH..NORTHEAST-POINTING AIRPLANE
{0x1F6EB, 0x1F6EC, prW}, // So [2] AIRPLANE DEPARTURE..AIRPLANE ARRIVING
{0x1F6F0, 0x1F6F3, prN}, // So [4] SATELLITE..PASSENGER SHIP
{0x1F6F4, 0x1F6FC, prW}, // So [9] SCOOTER..ROLLER SKATE
{0x1F700, 0x1F773, prN}, // So [116] ALCHEMICAL SYMBOL FOR QUINTESSENCE..ALCHEMICAL SYMBOL FOR HALF OUNCE
{0x1F780, 0x1F7D8, prN}, // So [89] BLACK LEFT-POINTING ISOSCELES RIGHT TRIANGLE..NEGATIVE CIRCLED SQUARE
{0x1F700, 0x1F776, prN}, // So [119] ALCHEMICAL SYMBOL FOR QUINTESSENCE..LUNAR ECLIPSE
{0x1F77B, 0x1F77F, prN}, // So [5] HAUMEA..ORCUS
{0x1F780, 0x1F7D9, prN}, // So [90] BLACK LEFT-POINTING ISOSCELES RIGHT TRIANGLE..NINE POINTED WHITE STAR
{0x1F7E0, 0x1F7EB, prW}, // So [12] LARGE ORANGE CIRCLE..LARGE BROWN SQUARE
{0x1F7F0, 0x1F7F0, prW}, // So HEAVY EQUALS SIGN
{0x1F800, 0x1F80B, prN}, // So [12] LEFTWARDS ARROW WITH SMALL TRIANGLE ARROWHEAD..DOWNWARDS ARROW WITH LARGE TRIANGLE ARROWHEAD
@@ -2521,22 +2553,20 @@ var eastAsianWidth = [][3]int{
{0x1F947, 0x1F9FF, prW}, // So [185] FIRST PLACE MEDAL..NAZAR AMULET
{0x1FA00, 0x1FA53, prN}, // So [84] NEUTRAL CHESS KING..BLACK CHESS KNIGHT-BISHOP
{0x1FA60, 0x1FA6D, prN}, // So [14] XIANGQI RED GENERAL..XIANGQI BLACK SOLDIER
{0x1FA70, 0x1FA74, prW}, // So [5] BALLET SHOES..THONG SANDAL
{0x1FA78, 0x1FA7C, prW}, // So [5] DROP OF BLOOD..CRUTCH
{0x1FA80, 0x1FA86, prW}, // So [7] YO-YO..NESTING DOLLS
{0x1FA90, 0x1FAAC, prW}, // So [29] RINGED PLANET..HAMSA
{0x1FAB0, 0x1FABA, prW}, // So [11] FLY..NEST WITH EGGS
{0x1FAC0, 0x1FAC5, prW}, // So [6] ANATOMICAL HEART..PERSON WITH CROWN
{0x1FAD0, 0x1FAD9, prW}, // So [10] BLUEBERRIES..JAR
{0x1FAE0, 0x1FAE7, prW}, // So [8] MELTING FACE..BUBBLES
{0x1FAF0, 0x1FAF6, prW}, // So [7] HAND WITH INDEX FINGER AND THUMB CROSSED..HEART HANDS
{0x1FA70, 0x1FA7C, prW}, // So [13] BALLET SHOES..CRUTCH
{0x1FA80, 0x1FA88, prW}, // So [9] YO-YO..FLUTE
{0x1FA90, 0x1FABD, prW}, // So [46] RINGED PLANET..WING
{0x1FABF, 0x1FAC5, prW}, // So [7] GOOSE..PERSON WITH CROWN
{0x1FACE, 0x1FADB, prW}, // So [14] MOOSE..PEA POD
{0x1FAE0, 0x1FAE8, prW}, // So [9] MELTING FACE..SHAKING FACE
{0x1FAF0, 0x1FAF8, prW}, // So [9] HAND WITH INDEX FINGER AND THUMB CROSSED..RIGHTWARDS PUSHING HAND
{0x1FB00, 0x1FB92, prN}, // So [147] BLOCK SEXTANT-1..UPPER HALF INVERSE MEDIUM SHADE AND LOWER HALF BLOCK
{0x1FB94, 0x1FBCA, prN}, // So [55] LEFT HALF INVERSE MEDIUM SHADE AND RIGHT HALF BLOCK..WHITE UP-POINTING CHEVRON
{0x1FBF0, 0x1FBF9, prN}, // Nd [10] SEGMENTED DIGIT ZERO..SEGMENTED DIGIT NINE
{0x20000, 0x2A6DF, prW}, // Lo [42720] CJK UNIFIED IDEOGRAPH-20000..CJK UNIFIED IDEOGRAPH-2A6DF
{0x2A6E0, 0x2A6FF, prW}, // Cn [32] <reserved-2A6E0>..<reserved-2A6FF>
{0x2A700, 0x2B738, prW}, // Lo [4153] CJK UNIFIED IDEOGRAPH-2A700..CJK UNIFIED IDEOGRAPH-2B738
{0x2B739, 0x2B73F, prW}, // Cn [7] <reserved-2B739>..<reserved-2B73F>
{0x2A700, 0x2B739, prW}, // Lo [4154] CJK UNIFIED IDEOGRAPH-2A700..CJK UNIFIED IDEOGRAPH-2B739
{0x2B73A, 0x2B73F, prW}, // Cn [6] <reserved-2B73A>..<reserved-2B73F>
{0x2B740, 0x2B81D, prW}, // Lo [222] CJK UNIFIED IDEOGRAPH-2B740..CJK UNIFIED IDEOGRAPH-2B81D
{0x2B81E, 0x2B81F, prW}, // Cn [2] <reserved-2B81E>..<reserved-2B81F>
{0x2B820, 0x2CEA1, prW}, // Lo [5762] CJK UNIFIED IDEOGRAPH-2B820..CJK UNIFIED IDEOGRAPH-2CEA1
@@ -2547,7 +2577,9 @@ var eastAsianWidth = [][3]int{
{0x2FA1E, 0x2FA1F, prW}, // Cn [2] <reserved-2FA1E>..<reserved-2FA1F>
{0x2FA20, 0x2FFFD, prW}, // Cn [1502] <reserved-2FA20>..<reserved-2FFFD>
{0x30000, 0x3134A, prW}, // Lo [4939] CJK UNIFIED IDEOGRAPH-30000..CJK UNIFIED IDEOGRAPH-3134A
{0x3134B, 0x3FFFD, prW}, // Cn [60595] <reserved-3134B>..<reserved-3FFFD>
{0x3134B, 0x3134F, prW}, // Cn [5] <reserved-3134B>..<reserved-3134F>
{0x31350, 0x323AF, prW}, // Lo [4192] CJK UNIFIED IDEOGRAPH-31350..CJK UNIFIED IDEOGRAPH-323AF
{0x323B0, 0x3FFFD, prW}, // Cn [56398] <reserved-323B0>..<reserved-3FFFD>
{0xE0001, 0xE0001, prN}, // Cf LANGUAGE TAG
{0xE0020, 0xE007F, prN}, // Cf [96] TAG SPACE..CANCEL TAG
{0xE0100, 0xE01EF, prA}, // Mn [240] VARIATION SELECTOR-17..VARIATION SELECTOR-256

View File

@@ -1,13 +1,13 @@
package uniseg
// Code generated via go generate from gen_properties.go. DO NOT EDIT.
package uniseg
// emojiPresentation are taken from
//
// and
// https://unicode.org/Public/14.0.0/ucd/emoji/emoji-data.txt
// https://unicode.org/Public/15.0.0/ucd/emoji/emoji-data.txt
// ("Extended_Pictographic" only)
// on September 10, 2022. See https://www.unicode.org/license.html for the Unicode
// on September 5, 2023. See https://www.unicode.org/license.html for the Unicode
// license agreement.
var emojiPresentation = [][3]int{
{0x231A, 0x231B, prEmojiPresentation}, // E0.6 [2] (⌚..⌛) watch..hourglass done
@@ -211,6 +211,7 @@ var emojiPresentation = [][3]int{
{0x1F6D1, 0x1F6D2, prEmojiPresentation}, // E3.0 [2] (🛑..🛒) stop sign..shopping cart
{0x1F6D5, 0x1F6D5, prEmojiPresentation}, // E12.0 [1] (🛕) hindu temple
{0x1F6D6, 0x1F6D7, prEmojiPresentation}, // E13.0 [2] (🛖..🛗) hut..elevator
{0x1F6DC, 0x1F6DC, prEmojiPresentation}, // E15.0 [1] (🛜) wireless
{0x1F6DD, 0x1F6DF, prEmojiPresentation}, // E14.0 [3] (🛝..🛟) playground slide..ring buoy
{0x1F6EB, 0x1F6EC, prEmojiPresentation}, // E1.0 [2] (🛫..🛬) airplane departure..airplane arrival
{0x1F6F4, 0x1F6F6, prEmojiPresentation}, // E3.0 [3] (🛴..🛶) kick scooter..canoe
@@ -267,19 +268,28 @@ var emojiPresentation = [][3]int{
{0x1F9E7, 0x1F9FF, prEmojiPresentation}, // E11.0 [25] (🧧..🧿) red envelope..nazar amulet
{0x1FA70, 0x1FA73, prEmojiPresentation}, // E12.0 [4] (🩰..🩳) ballet shoes..shorts
{0x1FA74, 0x1FA74, prEmojiPresentation}, // E13.0 [1] (🩴) thong sandal
{0x1FA75, 0x1FA77, prEmojiPresentation}, // E15.0 [3] (🩵..🩷) light blue heart..pink heart
{0x1FA78, 0x1FA7A, prEmojiPresentation}, // E12.0 [3] (🩸..🩺) drop of blood..stethoscope
{0x1FA7B, 0x1FA7C, prEmojiPresentation}, // E14.0 [2] (🩻..🩼) x-ray..crutch
{0x1FA80, 0x1FA82, prEmojiPresentation}, // E12.0 [3] (🪀..🪂) yo-yo..parachute
{0x1FA83, 0x1FA86, prEmojiPresentation}, // E13.0 [4] (🪃..🪆) boomerang..nesting dolls
{0x1FA87, 0x1FA88, prEmojiPresentation}, // E15.0 [2] (🪇..🪈) maracas..flute
{0x1FA90, 0x1FA95, prEmojiPresentation}, // E12.0 [6] (🪐..🪕) ringed planet..banjo
{0x1FA96, 0x1FAA8, prEmojiPresentation}, // E13.0 [19] (🪖..🪨) military helmet..rock
{0x1FAA9, 0x1FAAC, prEmojiPresentation}, // E14.0 [4] (🪩..🪬) mirror ball..hamsa
{0x1FAAD, 0x1FAAF, prEmojiPresentation}, // E15.0 [3] (🪭..🪯) folding hand fan..khanda
{0x1FAB0, 0x1FAB6, prEmojiPresentation}, // E13.0 [7] (🪰..🪶) fly..feather
{0x1FAB7, 0x1FABA, prEmojiPresentation}, // E14.0 [4] (🪷..🪺) lotus..nest with eggs
{0x1FABB, 0x1FABD, prEmojiPresentation}, // E15.0 [3] (🪻..🪽) hyacinth..wing
{0x1FABF, 0x1FABF, prEmojiPresentation}, // E15.0 [1] (🪿) goose
{0x1FAC0, 0x1FAC2, prEmojiPresentation}, // E13.0 [3] (🫀..🫂) anatomical heart..people hugging
{0x1FAC3, 0x1FAC5, prEmojiPresentation}, // E14.0 [3] (🫃..🫅) pregnant man..person with crown
{0x1FACE, 0x1FACF, prEmojiPresentation}, // E15.0 [2] (🫎..🫏) moose..donkey
{0x1FAD0, 0x1FAD6, prEmojiPresentation}, // E13.0 [7] (🫐..🫖) blueberries..teapot
{0x1FAD7, 0x1FAD9, prEmojiPresentation}, // E14.0 [3] (🫗..🫙) pouring liquid..jar
{0x1FADA, 0x1FADB, prEmojiPresentation}, // E15.0 [2] (🫚..🫛) ginger root..pea pod
{0x1FAE0, 0x1FAE7, prEmojiPresentation}, // E14.0 [8] (🫠..🫧) melting face..bubbles
{0x1FAE8, 0x1FAE8, prEmojiPresentation}, // E15.0 [1] (🫨) shaking face
{0x1FAF0, 0x1FAF6, prEmojiPresentation}, // E14.0 [7] (🫰..🫶) hand with index finger and thumb crossed..heart hands
{0x1FAF7, 0x1FAF8, prEmojiPresentation}, // E15.0 [2] (🫷..🫸) leftwards pushing hand..rightwards pushing hand
}

View File

@@ -32,7 +32,7 @@ import (
// We want to test against a specific version rather than the latest. When the
// package is upgraded to a new version, change these to generate new tests.
const (
testCaseURL = `https://www.unicode.org/Public/14.0.0/ucd/auxiliary/%s.txt`
testCaseURL = `https://www.unicode.org/Public/15.0.0/ucd/auxiliary/%s.txt`
)
func main() {
@@ -76,9 +76,9 @@ func parse(url string) ([]byte, error) {
buf := new(bytes.Buffer)
buf.Grow(120 << 10)
buf.WriteString(`package uniseg
buf.WriteString(`// Code generated via go generate from gen_breaktest.go. DO NOT EDIT.
// Code generated via go generate from gen_breaktest.go. DO NOT EDIT.
package uniseg
// ` + os.Args[3] + ` are Grapheme testcases taken from
// ` + url + `
@@ -136,7 +136,9 @@ var (
//
// E.g. for the input b="÷ 0020 × 0308 ÷ 1F1E6 ÷"
// it will append
// "\u0020\u0308\U0001F1E6"
//
// "\u0020\u0308\U0001F1E6"
//
// and "[][]rune{{0x0020,0x0308},{0x1F1E6},}"
// to orig and exp respectively.
//

View File

@@ -41,8 +41,8 @@ import (
// We want to test against a specific version rather than the latest. When the
// package is upgraded to a new version, change these to generate new tests.
const (
propertyURL = `https://www.unicode.org/Public/14.0.0/ucd/%s.txt`
emojiURL = `https://unicode.org/Public/14.0.0/ucd/emoji/emoji-data.txt`
propertyURL = `https://www.unicode.org/Public/15.0.0/ucd/%s.txt`
emojiURL = `https://unicode.org/Public/15.0.0/ucd/emoji/emoji-data.txt`
)
// The regular expression for a line containing a code point range property.
@@ -178,6 +178,11 @@ func parse(propertyURL, emojiProperty string, includeGeneralCategory bool) (stri
}
}
// Avoid overflow during binary search.
if len(properties) >= 1<<31 {
return "", errors.New("too many properties")
}
// Sort properties.
sort.Slice(properties, func(i, j int) bool {
left, _ := strconv.ParseUint(properties[i][0], 16, 64)
@@ -200,9 +205,9 @@ func parse(propertyURL, emojiProperty string, includeGeneralCategory bool) (stri
// ` + emojiURL + `
// ("Extended_Pictographic" only)`
}
buf.WriteString(`package uniseg
buf.WriteString(`// Code generated via go generate from gen_properties.go. DO NOT EDIT.
// Code generated via go generate from gen_properties.go. DO NOT EDIT.
package uniseg
// ` + os.Args[3] + ` are taken from
// ` + propertyURL + emojiComment + `

View File

@@ -13,9 +13,10 @@ import "unicode/utf8"
// well as boundary information and character width is available via the various
// methods (see examples below).
//
// Using this class to iterate over a string is convenient but it is much slower
// than using this package's [Step] or [StepString] functions or any of the
// other specialized functions starting with "First".
// This class basically wraps the [StepString] parser and provides a convenient
// interface to it. If you are only interested in some parts of this package's
// functionality, using the specialized functions starting with "First" is
// almost always faster.
type Graphemes struct {
// The original string.
original string
@@ -163,13 +164,32 @@ func GraphemeClusterCount(s string) (n int) {
return
}
// ReverseString reverses the given string while observing grapheme cluster
// boundaries.
func ReverseString(s string) string {
str := []byte(s)
reversed := make([]byte, len(str))
state := -1
index := len(str)
for len(str) > 0 {
var cluster []byte
cluster, str, _, state = FirstGraphemeCluster(str, state)
index -= len(cluster)
copy(reversed[index:], cluster)
if index <= len(str)/2 {
break
}
}
return string(reversed)
}
// The number of bits the grapheme property must be shifted to make place for
// grapheme states.
const shiftGraphemePropState = 4
// FirstGraphemeCluster returns the first grapheme cluster found in the given
// byte slice according to the rules of Unicode Standard Annex #29, Grapheme
// Cluster Boundaries. This function can be called continuously to extract all
// byte slice according to the rules of [Unicode Standard Annex #29, Grapheme
// Cluster Boundaries]. This function can be called continuously to extract all
// grapheme clusters from a byte slice, as illustrated in the example below.
//
// If you don't know the current state, for example when calling the function
@@ -190,6 +210,8 @@ const shiftGraphemePropState = 4
// While slightly less convenient than using the Graphemes class, this function
// has much better performance and makes no allocations. It lends itself well to
// large byte slices.
//
// [Unicode Standard Annex #29, Grapheme Cluster Boundaries]: http://unicode.org/reports/tr29/#Grapheme_Cluster_Boundaries
func FirstGraphemeCluster(b []byte, state int) (cluster, rest []byte, width, newState int) {
// An empty byte slice returns nothing.
if len(b) == 0 {
@@ -201,7 +223,7 @@ func FirstGraphemeCluster(b []byte, state int) (cluster, rest []byte, width, new
if len(b) <= length { // If we're already past the end, there is nothing else to parse.
var prop int
if state < 0 {
prop = property(graphemeCodePoints, r)
prop = propertyGraphemes(r)
} else {
prop = state >> shiftGraphemePropState
}
@@ -231,16 +253,14 @@ func FirstGraphemeCluster(b []byte, state int) (cluster, rest []byte, width, new
return b[:length], b[length:], width, state | (prop << shiftGraphemePropState)
}
if r == vs16 {
width = 2
} else if firstProp != prExtendedPictographic && firstProp != prRegionalIndicator && firstProp != prL {
width += runeWidth(r, prop)
} else if firstProp == prExtendedPictographic {
if firstProp == prExtendedPictographic {
if r == vs15 {
width = 1
} else {
} else if r == vs16 {
width = 2
}
} else if firstProp != prRegionalIndicator && firstProp != prL {
width += runeWidth(r, prop)
}
length += l
@@ -263,7 +283,7 @@ func FirstGraphemeClusterInString(str string, state int) (cluster, rest string,
if len(str) <= length { // If we're already past the end, there is nothing else to parse.
var prop int
if state < 0 {
prop = property(graphemeCodePoints, r)
prop = propertyGraphemes(r)
} else {
prop = state >> shiftGraphemePropState
}
@@ -293,16 +313,14 @@ func FirstGraphemeClusterInString(str string, state int) (cluster, rest string,
return str[:length], str[length:], width, state | (prop << shiftGraphemePropState)
}
if r == vs16 {
width = 2
} else if firstProp != prExtendedPictographic && firstProp != prRegionalIndicator && firstProp != prL {
width += runeWidth(r, prop)
} else if firstProp == prExtendedPictographic {
if firstProp == prExtendedPictographic {
if r == vs15 {
width = 1
} else {
} else if r == vs16 {
width = 2
}
} else if firstProp != prRegionalIndicator && firstProp != prL {
width += runeWidth(r, prop)
}
length += l

View File

@@ -1,13 +1,13 @@
package uniseg
// Code generated via go generate from gen_properties.go. DO NOT EDIT.
package uniseg
// graphemeCodePoints are taken from
// https://www.unicode.org/Public/14.0.0/ucd/auxiliary/GraphemeBreakProperty.txt
// https://www.unicode.org/Public/15.0.0/ucd/auxiliary/GraphemeBreakProperty.txt
// and
// https://unicode.org/Public/14.0.0/ucd/emoji/emoji-data.txt
// https://unicode.org/Public/15.0.0/ucd/emoji/emoji-data.txt
// ("Extended_Pictographic" only)
// on September 10, 2022. See https://www.unicode.org/license.html for the Unicode
// on September 5, 2023. See https://www.unicode.org/license.html for the Unicode
// license agreement.
var graphemeCodePoints = [][3]int{
{0x0000, 0x0009, prControl}, // Cc [10] <control-0000>..<control-0009>
@@ -143,6 +143,7 @@ var graphemeCodePoints = [][3]int{
{0x0CCC, 0x0CCD, prExtend}, // Mn [2] KANNADA VOWEL SIGN AU..KANNADA SIGN VIRAMA
{0x0CD5, 0x0CD6, prExtend}, // Mc [2] KANNADA LENGTH MARK..KANNADA AI LENGTH MARK
{0x0CE2, 0x0CE3, prExtend}, // Mn [2] KANNADA VOWEL SIGN VOCALIC L..KANNADA VOWEL SIGN VOCALIC LL
{0x0CF3, 0x0CF3, prSpacingMark}, // Mc KANNADA SIGN COMBINING ANUSVARA ABOVE RIGHT
{0x0D00, 0x0D01, prExtend}, // Mn [2] MALAYALAM SIGN COMBINING ANUSVARA ABOVE..MALAYALAM SIGN CANDRABINDU
{0x0D02, 0x0D03, prSpacingMark}, // Mc [2] MALAYALAM SIGN ANUSVARA..MALAYALAM SIGN VISARGA
{0x0D3B, 0x0D3C, prExtend}, // Mn [2] MALAYALAM SIGN VERTICAL BAR VIRAMA..MALAYALAM SIGN CIRCULAR VIRAMA
@@ -172,7 +173,7 @@ var graphemeCodePoints = [][3]int{
{0x0EB1, 0x0EB1, prExtend}, // Mn LAO VOWEL SIGN MAI KAN
{0x0EB3, 0x0EB3, prSpacingMark}, // Lo LAO VOWEL SIGN AM
{0x0EB4, 0x0EBC, prExtend}, // Mn [9] LAO VOWEL SIGN I..LAO SEMIVOWEL SIGN LO
{0x0EC8, 0x0ECD, prExtend}, // Mn [6] LAO TONE MAI EK..LAO NIGGAHITA
{0x0EC8, 0x0ECE, prExtend}, // Mn [7] LAO TONE MAI EK..LAO YAMAKKAN
{0x0F18, 0x0F19, prExtend}, // Mn [2] TIBETAN ASTROLOGICAL SIGN -KHYUD PA..TIBETAN ASTROLOGICAL SIGN SDONG TSHUGS
{0x0F35, 0x0F35, prExtend}, // Mn TIBETAN MARK NGAS BZUNG NYI ZLA
{0x0F37, 0x0F37, prExtend}, // Mn TIBETAN MARK NGAS BZUNG SGOR RTAGS
@@ -1336,6 +1337,7 @@ var graphemeCodePoints = [][3]int{
{0x10AE5, 0x10AE6, prExtend}, // Mn [2] MANICHAEAN ABBREVIATION MARK ABOVE..MANICHAEAN ABBREVIATION MARK BELOW
{0x10D24, 0x10D27, prExtend}, // Mn [4] HANIFI ROHINGYA SIGN HARBAHAY..HANIFI ROHINGYA SIGN TASSI
{0x10EAB, 0x10EAC, prExtend}, // Mn [2] YEZIDI COMBINING HAMZA MARK..YEZIDI COMBINING MADDA MARK
{0x10EFD, 0x10EFF, prExtend}, // Mn [3] ARABIC SMALL LOW WORD SAKTA..ARABIC SMALL LOW WORD MADDA
{0x10F46, 0x10F50, prExtend}, // Mn [11] SOGDIAN COMBINING DOT BELOW..SOGDIAN COMBINING STROKE BELOW
{0x10F82, 0x10F85, prExtend}, // Mn [4] OLD UYGHUR COMBINING DOT ABOVE..OLD UYGHUR COMBINING TWO DOTS BELOW
{0x11000, 0x11000, prSpacingMark}, // Mc BRAHMI SIGN CANDRABINDU
@@ -1375,6 +1377,7 @@ var graphemeCodePoints = [][3]int{
{0x11235, 0x11235, prSpacingMark}, // Mc KHOJKI SIGN VIRAMA
{0x11236, 0x11237, prExtend}, // Mn [2] KHOJKI SIGN NUKTA..KHOJKI SIGN SHADDA
{0x1123E, 0x1123E, prExtend}, // Mn KHOJKI SIGN SUKUN
{0x11241, 0x11241, prExtend}, // Mn KHOJKI VOWEL SIGN VOCALIC R
{0x112DF, 0x112DF, prExtend}, // Mn KHUDAWADI SIGN ANUSVARA
{0x112E0, 0x112E2, prSpacingMark}, // Mc [3] KHUDAWADI VOWEL SIGN AA..KHUDAWADI VOWEL SIGN II
{0x112E3, 0x112EA, prExtend}, // Mn [8] KHUDAWADI VOWEL SIGN U..KHUDAWADI SIGN VIRAMA
@@ -1494,7 +1497,18 @@ var graphemeCodePoints = [][3]int{
{0x11D97, 0x11D97, prExtend}, // Mn GUNJALA GONDI VIRAMA
{0x11EF3, 0x11EF4, prExtend}, // Mn [2] MAKASAR VOWEL SIGN I..MAKASAR VOWEL SIGN U
{0x11EF5, 0x11EF6, prSpacingMark}, // Mc [2] MAKASAR VOWEL SIGN E..MAKASAR VOWEL SIGN O
{0x13430, 0x13438, prControl}, // Cf [9] EGYPTIAN HIEROGLYPH VERTICAL JOINER..EGYPTIAN HIEROGLYPH END SEGMENT
{0x11F00, 0x11F01, prExtend}, // Mn [2] KAWI SIGN CANDRABINDU..KAWI SIGN ANUSVARA
{0x11F02, 0x11F02, prPrepend}, // Lo KAWI SIGN REPHA
{0x11F03, 0x11F03, prSpacingMark}, // Mc KAWI SIGN VISARGA
{0x11F34, 0x11F35, prSpacingMark}, // Mc [2] KAWI VOWEL SIGN AA..KAWI VOWEL SIGN ALTERNATE AA
{0x11F36, 0x11F3A, prExtend}, // Mn [5] KAWI VOWEL SIGN I..KAWI VOWEL SIGN VOCALIC R
{0x11F3E, 0x11F3F, prSpacingMark}, // Mc [2] KAWI VOWEL SIGN E..KAWI VOWEL SIGN AI
{0x11F40, 0x11F40, prExtend}, // Mn KAWI VOWEL SIGN EU
{0x11F41, 0x11F41, prSpacingMark}, // Mc KAWI SIGN KILLER
{0x11F42, 0x11F42, prExtend}, // Mn KAWI CONJOINER
{0x13430, 0x1343F, prControl}, // Cf [16] EGYPTIAN HIEROGLYPH VERTICAL JOINER..EGYPTIAN HIEROGLYPH END WALLED ENCLOSURE
{0x13440, 0x13440, prExtend}, // Mn EGYPTIAN HIEROGLYPH MIRROR HORIZONTALLY
{0x13447, 0x13455, prExtend}, // Mn [15] EGYPTIAN HIEROGLYPH MODIFIER DAMAGED AT TOP START..EGYPTIAN HIEROGLYPH MODIFIER DAMAGED
{0x16AF0, 0x16AF4, prExtend}, // Mn [5] BASSA VAH COMBINING HIGH TONE..BASSA VAH COMBINING HIGH-LOW TONE
{0x16B30, 0x16B36, prExtend}, // Mn [7] PAHAWH HMONG MARK CIM TUB..PAHAWH HMONG MARK CIM TAUM
{0x16F4F, 0x16F4F, prExtend}, // Mn MIAO SIGN CONSONANT MODIFIER BAR
@@ -1527,9 +1541,11 @@ var graphemeCodePoints = [][3]int{
{0x1E01B, 0x1E021, prExtend}, // Mn [7] COMBINING GLAGOLITIC LETTER SHTA..COMBINING GLAGOLITIC LETTER YATI
{0x1E023, 0x1E024, prExtend}, // Mn [2] COMBINING GLAGOLITIC LETTER YU..COMBINING GLAGOLITIC LETTER SMALL YUS
{0x1E026, 0x1E02A, prExtend}, // Mn [5] COMBINING GLAGOLITIC LETTER YO..COMBINING GLAGOLITIC LETTER FITA
{0x1E08F, 0x1E08F, prExtend}, // Mn COMBINING CYRILLIC SMALL LETTER BYELORUSSIAN-UKRAINIAN I
{0x1E130, 0x1E136, prExtend}, // Mn [7] NYIAKENG PUACHUE HMONG TONE-B..NYIAKENG PUACHUE HMONG TONE-D
{0x1E2AE, 0x1E2AE, prExtend}, // Mn TOTO SIGN RISING TONE
{0x1E2EC, 0x1E2EF, prExtend}, // Mn [4] WANCHO TONE TUP..WANCHO TONE KOINI
{0x1E4EC, 0x1E4EF, prExtend}, // Mn [4] NAG MUNDARI SIGN MUHOR..NAG MUNDARI SIGN SUTUH
{0x1E8D0, 0x1E8D6, prExtend}, // Mn [7] MENDE KIKAKUI COMBINING NUMBER TEENS..MENDE KIKAKUI COMBINING NUMBER MILLIONS
{0x1E944, 0x1E94A, prExtend}, // Mn [7] ADLAM ALIF LENGTHENER..ADLAM NUKTA
{0x1F000, 0x1F003, prExtendedPictographic}, // E0.0 [4] (🀀..🀃) MAHJONG TILE EAST WIND..MAHJONG TILE NORTH WIND
@@ -1780,7 +1796,8 @@ var graphemeCodePoints = [][3]int{
{0x1F6D3, 0x1F6D4, prExtendedPictographic}, // E0.0 [2] (🛓..🛔) STUPA..PAGODA
{0x1F6D5, 0x1F6D5, prExtendedPictographic}, // E12.0 [1] (🛕) hindu temple
{0x1F6D6, 0x1F6D7, prExtendedPictographic}, // E13.0 [2] (🛖..🛗) hut..elevator
{0x1F6D8, 0x1F6DC, prExtendedPictographic}, // E0.0 [5] (🛘..🛜) <reserved-1F6D8>..<reserved-1F6DC>
{0x1F6D8, 0x1F6DB, prExtendedPictographic}, // E0.0 [4] (🛘..🛛) <reserved-1F6D8>..<reserved-1F6DB>
{0x1F6DC, 0x1F6DC, prExtendedPictographic}, // E15.0 [1] (🛜) wireless
{0x1F6DD, 0x1F6DF, prExtendedPictographic}, // E14.0 [3] (🛝..🛟) playground slide..ring buoy
{0x1F6E0, 0x1F6E5, prExtendedPictographic}, // E0.7 [6] (🛠️..🛥️) hammer and wrench..motor boat
{0x1F6E6, 0x1F6E8, prExtendedPictographic}, // E0.0 [3] (🛦..🛨) UP-POINTING MILITARY AIRPLANE..UP-POINTING SMALL AIRPLANE
@@ -1797,7 +1814,7 @@ var graphemeCodePoints = [][3]int{
{0x1F6FA, 0x1F6FA, prExtendedPictographic}, // E12.0 [1] (🛺) auto rickshaw
{0x1F6FB, 0x1F6FC, prExtendedPictographic}, // E13.0 [2] (🛻..🛼) pickup truck..roller skate
{0x1F6FD, 0x1F6FF, prExtendedPictographic}, // E0.0 [3] (🛽..🛿) <reserved-1F6FD>..<reserved-1F6FF>
{0x1F774, 0x1F77F, prExtendedPictographic}, // E0.0 [12] (🝴..🝿) <reserved-1F774>..<reserved-1F77F>
{0x1F774, 0x1F77F, prExtendedPictographic}, // E0.0 [12] (🝴..🝿) LOT OF FORTUNE..ORCUS
{0x1F7D5, 0x1F7DF, prExtendedPictographic}, // E0.0 [11] (🟕..🟟) CIRCLED TRIANGLE..<reserved-1F7DF>
{0x1F7E0, 0x1F7EB, prExtendedPictographic}, // E12.0 [12] (🟠..🟫) orange circle..brown square
{0x1F7EC, 0x1F7EF, prExtendedPictographic}, // E0.0 [4] (🟬..🟯) <reserved-1F7EC>..<reserved-1F7EF>
@@ -1856,30 +1873,37 @@ var graphemeCodePoints = [][3]int{
{0x1FA00, 0x1FA6F, prExtendedPictographic}, // E0.0 [112] (🨀..🩯) NEUTRAL CHESS KING..<reserved-1FA6F>
{0x1FA70, 0x1FA73, prExtendedPictographic}, // E12.0 [4] (🩰..🩳) ballet shoes..shorts
{0x1FA74, 0x1FA74, prExtendedPictographic}, // E13.0 [1] (🩴) thong sandal
{0x1FA75, 0x1FA77, prExtendedPictographic}, // E0.0 [3] (🩵..🩷) <reserved-1FA75>..<reserved-1FA77>
{0x1FA75, 0x1FA77, prExtendedPictographic}, // E15.0 [3] (🩵..🩷) light blue heart..pink heart
{0x1FA78, 0x1FA7A, prExtendedPictographic}, // E12.0 [3] (🩸..🩺) drop of blood..stethoscope
{0x1FA7B, 0x1FA7C, prExtendedPictographic}, // E14.0 [2] (🩻..🩼) x-ray..crutch
{0x1FA7D, 0x1FA7F, prExtendedPictographic}, // E0.0 [3] (🩽..🩿) <reserved-1FA7D>..<reserved-1FA7F>
{0x1FA80, 0x1FA82, prExtendedPictographic}, // E12.0 [3] (🪀..🪂) yo-yo..parachute
{0x1FA83, 0x1FA86, prExtendedPictographic}, // E13.0 [4] (🪃..🪆) boomerang..nesting dolls
{0x1FA87, 0x1FA8F, prExtendedPictographic}, // E0.0 [9] (🪇..🪏) <reserved-1FA87>..<reserved-1FA8F>
{0x1FA87, 0x1FA88, prExtendedPictographic}, // E15.0 [2] (🪇..🪈) maracas..flute
{0x1FA89, 0x1FA8F, prExtendedPictographic}, // E0.0 [7] (🪉..🪏) <reserved-1FA89>..<reserved-1FA8F>
{0x1FA90, 0x1FA95, prExtendedPictographic}, // E12.0 [6] (🪐..🪕) ringed planet..banjo
{0x1FA96, 0x1FAA8, prExtendedPictographic}, // E13.0 [19] (🪖..🪨) military helmet..rock
{0x1FAA9, 0x1FAAC, prExtendedPictographic}, // E14.0 [4] (🪩..🪬) mirror ball..hamsa
{0x1FAAD, 0x1FAAF, prExtendedPictographic}, // E0.0 [3] (🪭..🪯) <reserved-1FAAD>..<reserved-1FAAF>
{0x1FAAD, 0x1FAAF, prExtendedPictographic}, // E15.0 [3] (🪭..🪯) folding hand fan..khanda
{0x1FAB0, 0x1FAB6, prExtendedPictographic}, // E13.0 [7] (🪰..🪶) fly..feather
{0x1FAB7, 0x1FABA, prExtendedPictographic}, // E14.0 [4] (🪷..🪺) lotus..nest with eggs
{0x1FABB, 0x1FABF, prExtendedPictographic}, // E0.0 [5] (🪻..🪿) <reserved-1FABB>..<reserved-1FABF>
{0x1FABB, 0x1FABD, prExtendedPictographic}, // E15.0 [3] (🪻..🪽) hyacinth..wing
{0x1FABE, 0x1FABE, prExtendedPictographic}, // E0.0 [1] (🪾) <reserved-1FABE>
{0x1FABF, 0x1FABF, prExtendedPictographic}, // E15.0 [1] (🪿) goose
{0x1FAC0, 0x1FAC2, prExtendedPictographic}, // E13.0 [3] (🫀..🫂) anatomical heart..people hugging
{0x1FAC3, 0x1FAC5, prExtendedPictographic}, // E14.0 [3] (🫃..🫅) pregnant man..person with crown
{0x1FAC6, 0x1FACF, prExtendedPictographic}, // E0.0 [10] (🫆..🫏) <reserved-1FAC6>..<reserved-1FACF>
{0x1FAC6, 0x1FACD, prExtendedPictographic}, // E0.0 [8] (🫆..🫍) <reserved-1FAC6>..<reserved-1FACD>
{0x1FACE, 0x1FACF, prExtendedPictographic}, // E15.0 [2] (🫎..🫏) moose..donkey
{0x1FAD0, 0x1FAD6, prExtendedPictographic}, // E13.0 [7] (🫐..🫖) blueberries..teapot
{0x1FAD7, 0x1FAD9, prExtendedPictographic}, // E14.0 [3] (🫗..🫙) pouring liquid..jar
{0x1FADA, 0x1FADF, prExtendedPictographic}, // E0.0 [6] (🫚..🫟) <reserved-1FADA>..<reserved-1FADF>
{0x1FADA, 0x1FADB, prExtendedPictographic}, // E15.0 [2] (🫚..🫛) ginger root..pea pod
{0x1FADC, 0x1FADF, prExtendedPictographic}, // E0.0 [4] (🫜..🫟) <reserved-1FADC>..<reserved-1FADF>
{0x1FAE0, 0x1FAE7, prExtendedPictographic}, // E14.0 [8] (🫠..🫧) melting face..bubbles
{0x1FAE8, 0x1FAEF, prExtendedPictographic}, // E0.0 [8] (🫨..🫯) <reserved-1FAE8>..<reserved-1FAEF>
{0x1FAE8, 0x1FAE8, prExtendedPictographic}, // E15.0 [1] (🫨) shaking face
{0x1FAE9, 0x1FAEF, prExtendedPictographic}, // E0.0 [7] (🫩..🫯) <reserved-1FAE9>..<reserved-1FAEF>
{0x1FAF0, 0x1FAF6, prExtendedPictographic}, // E14.0 [7] (🫰..🫶) hand with index finger and thumb crossed..heart hands
{0x1FAF7, 0x1FAFF, prExtendedPictographic}, // E0.0 [9] (🫷..🫿) <reserved-1FAF7>..<reserved-1FAFF>
{0x1FAF7, 0x1FAF8, prExtendedPictographic}, // E15.0 [2] (🫷..🫸) leftwards pushing hand..rightwards pushing hand
{0x1FAF9, 0x1FAFF, prExtendedPictographic}, // E0.0 [7] (🫹..🫿) <reserved-1FAF9>..<reserved-1FAFF>
{0x1FC00, 0x1FFFD, prExtendedPictographic}, // E0.0[1022] (🰀..🿽) <reserved-1FC00>..<reserved-1FFFD>
{0xE0000, 0xE0000, prControl}, // Cn <reserved-E0000>
{0xE0001, 0xE0001, prControl}, // Cf LANGUAGE TAG

View File

@@ -21,11 +21,12 @@ const (
grBoundary
)
// The grapheme cluster parser's state transitions. Maps (state, property) to
// (new state, breaking instruction, rule number). The breaking instruction
// always refers to the boundary between the last and next code point.
// grTransitions implements the grapheme cluster parser's state transitions.
// Maps state and property to a new state, a breaking instruction, and rule
// number. The breaking instruction always refers to the boundary between the
// last and next code point. Returns negative values if no transition is found.
//
// This map is queried as follows:
// This function is used as follows:
//
// 1. Find specific state + specific property. Stop if found.
// 2. Find specific state + any property.
@@ -36,59 +37,96 @@ const (
// are equal. Stop.
// 6. Assume grAny and grBoundary.
//
// Unicode version 14.0.0.
var grTransitions = map[[2]int][3]int{
// Unicode version 15.0.0.
func grTransitions(state, prop int) (newState int, newProp int, boundary int) {
// It turns out that using a big switch statement is much faster than using
// a map.
switch uint64(state) | uint64(prop)<<32 {
// GB5
{grAny, prCR}: {grCR, grBoundary, 50},
{grAny, prLF}: {grControlLF, grBoundary, 50},
{grAny, prControl}: {grControlLF, grBoundary, 50},
case grAny | prCR<<32:
return grCR, grBoundary, 50
case grAny | prLF<<32:
return grControlLF, grBoundary, 50
case grAny | prControl<<32:
return grControlLF, grBoundary, 50
// GB4
{grCR, prAny}: {grAny, grBoundary, 40},
{grControlLF, prAny}: {grAny, grBoundary, 40},
case grCR | prAny<<32:
return grAny, grBoundary, 40
case grControlLF | prAny<<32:
return grAny, grBoundary, 40
// GB3.
{grCR, prLF}: {grAny, grNoBoundary, 30},
// GB3
case grCR | prLF<<32:
return grControlLF, grNoBoundary, 30
// GB6.
{grAny, prL}: {grL, grBoundary, 9990},
{grL, prL}: {grL, grNoBoundary, 60},
{grL, prV}: {grLVV, grNoBoundary, 60},
{grL, prLV}: {grLVV, grNoBoundary, 60},
{grL, prLVT}: {grLVTT, grNoBoundary, 60},
// GB6
case grAny | prL<<32:
return grL, grBoundary, 9990
case grL | prL<<32:
return grL, grNoBoundary, 60
case grL | prV<<32:
return grLVV, grNoBoundary, 60
case grL | prLV<<32:
return grLVV, grNoBoundary, 60
case grL | prLVT<<32:
return grLVTT, grNoBoundary, 60
// GB7.
{grAny, prLV}: {grLVV, grBoundary, 9990},
{grAny, prV}: {grLVV, grBoundary, 9990},
{grLVV, prV}: {grLVV, grNoBoundary, 70},
{grLVV, prT}: {grLVTT, grNoBoundary, 70},
// GB7
case grAny | prLV<<32:
return grLVV, grBoundary, 9990
case grAny | prV<<32:
return grLVV, grBoundary, 9990
case grLVV | prV<<32:
return grLVV, grNoBoundary, 70
case grLVV | prT<<32:
return grLVTT, grNoBoundary, 70
// GB8.
{grAny, prLVT}: {grLVTT, grBoundary, 9990},
{grAny, prT}: {grLVTT, grBoundary, 9990},
{grLVTT, prT}: {grLVTT, grNoBoundary, 80},
// GB8
case grAny | prLVT<<32:
return grLVTT, grBoundary, 9990
case grAny | prT<<32:
return grLVTT, grBoundary, 9990
case grLVTT | prT<<32:
return grLVTT, grNoBoundary, 80
// GB9.
{grAny, prExtend}: {grAny, grNoBoundary, 90},
{grAny, prZWJ}: {grAny, grNoBoundary, 90},
// GB9
case grAny | prExtend<<32:
return grAny, grNoBoundary, 90
case grAny | prZWJ<<32:
return grAny, grNoBoundary, 90
// GB9a.
{grAny, prSpacingMark}: {grAny, grNoBoundary, 91},
// GB9a
case grAny | prSpacingMark<<32:
return grAny, grNoBoundary, 91
// GB9b.
{grAny, prPrepend}: {grPrepend, grBoundary, 9990},
{grPrepend, prAny}: {grAny, grNoBoundary, 92},
// GB9b
case grAny | prPrepend<<32:
return grPrepend, grBoundary, 9990
case grPrepend | prAny<<32:
return grAny, grNoBoundary, 92
// GB11.
{grAny, prExtendedPictographic}: {grExtendedPictographic, grBoundary, 9990},
{grExtendedPictographic, prExtend}: {grExtendedPictographic, grNoBoundary, 110},
{grExtendedPictographic, prZWJ}: {grExtendedPictographicZWJ, grNoBoundary, 110},
{grExtendedPictographicZWJ, prExtendedPictographic}: {grExtendedPictographic, grNoBoundary, 110},
// GB11
case grAny | prExtendedPictographic<<32:
return grExtendedPictographic, grBoundary, 9990
case grExtendedPictographic | prExtend<<32:
return grExtendedPictographic, grNoBoundary, 110
case grExtendedPictographic | prZWJ<<32:
return grExtendedPictographicZWJ, grNoBoundary, 110
case grExtendedPictographicZWJ | prExtendedPictographic<<32:
return grExtendedPictographic, grNoBoundary, 110
// GB12 / GB13.
{grAny, prRegionalIndicator}: {grRIOdd, grBoundary, 9990},
{grRIOdd, prRegionalIndicator}: {grRIEven, grNoBoundary, 120},
{grRIEven, prRegionalIndicator}: {grRIOdd, grBoundary, 120},
// GB12 / GB13
case grAny | prRegionalIndicator<<32:
return grRIOdd, grBoundary, 9990
case grRIOdd | prRegionalIndicator<<32:
return grRIEven, grNoBoundary, 120
case grRIEven | prRegionalIndicator<<32:
return grRIOdd, grBoundary, 120
default:
return -1, -1, -1
}
}
// transitionGraphemeState determines the new state of the grapheme cluster
@@ -97,40 +135,40 @@ var grTransitions = map[[2]int][3]int{
// table) and whether a cluster boundary was detected.
func transitionGraphemeState(state int, r rune) (newState, prop int, boundary bool) {
// Determine the property of the next character.
prop = property(graphemeCodePoints, r)
prop = propertyGraphemes(r)
// Find the applicable transition.
transition, ok := grTransitions[[2]int{state, prop}]
if ok {
nextState, nextProp, _ := grTransitions(state, prop)
if nextState >= 0 {
// We have a specific transition. We'll use it.
return transition[0], prop, transition[1] == grBoundary
return nextState, prop, nextProp == grBoundary
}
// No specific transition found. Try the less specific ones.
transAnyProp, okAnyProp := grTransitions[[2]int{state, prAny}]
transAnyState, okAnyState := grTransitions[[2]int{grAny, prop}]
if okAnyProp && okAnyState {
anyPropState, anyPropProp, anyPropRule := grTransitions(state, prAny)
anyStateState, anyStateProp, anyStateRule := grTransitions(grAny, prop)
if anyPropState >= 0 && anyStateState >= 0 {
// Both apply. We'll use a mix (see comments for grTransitions).
newState = transAnyState[0]
boundary = transAnyState[1] == grBoundary
if transAnyProp[2] < transAnyState[2] {
boundary = transAnyProp[1] == grBoundary
newState = anyStateState
boundary = anyStateProp == grBoundary
if anyPropRule < anyStateRule {
boundary = anyPropProp == grBoundary
}
return
}
if okAnyProp {
if anyPropState >= 0 {
// We only have a specific state.
return transAnyProp[0], prop, transAnyProp[1] == grBoundary
return anyPropState, prop, anyPropProp == grBoundary
// This branch will probably never be reached because okAnyState will
// always be true given the current transition map. But we keep it here
// for future modifications to the transition map where this may not be
// true anymore.
}
if okAnyState {
if anyStateState >= 0 {
// We only have a specific property.
return transAnyState[0], prop, transAnyState[1] == grBoundary
return anyStateState, prop, anyStateProp == grBoundary
}
// No known transition. GB999: Any ÷ Any.

View File

@@ -4,7 +4,7 @@ import "unicode/utf8"
// FirstLineSegment returns the prefix of the given byte slice after which a
// decision to break the string over to the next line can or must be made,
// according to the rules of Unicode Standard Annex #14. This is used to
// according to the rules of [Unicode Standard Annex #14]. This is used to
// implement line breaking.
//
// Line breaking, also known as word wrapping, is the process of breaking a
@@ -35,7 +35,7 @@ import "unicode/utf8"
//
// Given an empty byte slice "b", the function returns nil values.
//
// Note that in accordance with UAX #14 LB3, the final segment will end with
// Note that in accordance with [UAX #14 LB3], the final segment will end with
// "mustBreak" set to true. You can choose to ignore this by checking if the
// length of the "rest" slice is 0 and calling [HasTrailingLineBreak] or
// [HasTrailingLineBreakInString] on the last rune.
@@ -43,6 +43,9 @@ import "unicode/utf8"
// Note also that this algorithm may break within grapheme clusters. This is
// addressed in Section 8.2 Example 6 of UAX #14. To avoid this, you can use
// the [Step] function instead.
//
// [Unicode Standard Annex #14]: https://www.unicode.org/reports/tr14/
// [UAX #14 LB3]: https://www.unicode.org/reports/tr14/#Algorithm
func FirstLineSegment(b []byte, state int) (segment, rest []byte, mustBreak bool, newState int) {
// An empty byte slice returns nothing.
if len(b) == 0 {
@@ -77,7 +80,7 @@ func FirstLineSegment(b []byte, state int) (segment, rest []byte, mustBreak bool
}
}
// FirstLineSegmentInString is like FirstLineSegment() but its input and outputs
// FirstLineSegmentInString is like [FirstLineSegment] but its input and outputs
// are strings.
func FirstLineSegmentInString(str string, state int) (segment, rest string, mustBreak bool, newState int) {
// An empty byte slice returns nothing.
@@ -119,13 +122,13 @@ func FirstLineSegmentInString(str string, state int) (segment, rest string, must
// [UAX #14]: https://www.unicode.org/reports/tr14/#Algorithm
func HasTrailingLineBreak(b []byte) bool {
r, _ := utf8.DecodeLastRune(b)
property, _ := propertyWithGenCat(lineBreakCodePoints, r)
return property == lbBK || property == lbCR || property == lbLF || property == lbNL
property, _ := propertyLineBreak(r)
return property == prBK || property == prCR || property == prLF || property == prNL
}
// HasTrailingLineBreakInString is like [HasTrailingLineBreak] but for a string.
func HasTrailingLineBreakInString(str string) bool {
r, _ := utf8.DecodeLastRuneInString(str)
property, _ := propertyWithGenCat(lineBreakCodePoints, r)
return property == lbBK || property == lbCR || property == lbLF || property == lbNL
property, _ := propertyLineBreak(r)
return property == prBK || property == prCR || property == prLF || property == prNL
}

View File

@@ -1,13 +1,13 @@
package uniseg
// Code generated via go generate from gen_properties.go. DO NOT EDIT.
package uniseg
// lineBreakCodePoints are taken from
// https://www.unicode.org/Public/14.0.0/ucd/LineBreak.txt
// https://www.unicode.org/Public/15.0.0/ucd/LineBreak.txt
// and
// https://unicode.org/Public/14.0.0/ucd/emoji/emoji-data.txt
// https://unicode.org/Public/15.0.0/ucd/emoji/emoji-data.txt
// ("Extended_Pictographic" only)
// on September 10, 2022. See https://www.unicode.org/license.html for the Unicode
// on September 5, 2023. See https://www.unicode.org/license.html for the Unicode
// license agreement.
var lineBreakCodePoints = [][4]int{
{0x0000, 0x0008, prCM, gcCc}, // [9] <control-0000>..<control-0008>
@@ -439,6 +439,7 @@ var lineBreakCodePoints = [][4]int{
{0x0CE2, 0x0CE3, prCM, gcMn}, // [2] KANNADA VOWEL SIGN VOCALIC L..KANNADA VOWEL SIGN VOCALIC LL
{0x0CE6, 0x0CEF, prNU, gcNd}, // [10] KANNADA DIGIT ZERO..KANNADA DIGIT NINE
{0x0CF1, 0x0CF2, prAL, gcLo}, // [2] KANNADA SIGN JIHVAMULIYA..KANNADA SIGN UPADHMANIYA
{0x0CF3, 0x0CF3, prCM, gcMc}, // KANNADA SIGN COMBINING ANUSVARA ABOVE RIGHT
{0x0D00, 0x0D01, prCM, gcMn}, // [2] MALAYALAM SIGN COMBINING ANUSVARA ABOVE..MALAYALAM SIGN CANDRABINDU
{0x0D02, 0x0D03, prCM, gcMc}, // [2] MALAYALAM SIGN ANUSVARA..MALAYALAM SIGN VISARGA
{0x0D04, 0x0D0C, prAL, gcLo}, // [9] MALAYALAM LETTER VEDIC ANUSVARA..MALAYALAM LETTER VOCALIC L
@@ -500,7 +501,7 @@ var lineBreakCodePoints = [][4]int{
{0x0EBD, 0x0EBD, prSA, gcLo}, // LAO SEMIVOWEL SIGN NYO
{0x0EC0, 0x0EC4, prSA, gcLo}, // [5] LAO VOWEL SIGN E..LAO VOWEL SIGN AI
{0x0EC6, 0x0EC6, prSA, gcLm}, // LAO KO LA
{0x0EC8, 0x0ECD, prSA, gcMn}, // [6] LAO TONE MAI EK..LAO NIGGAHITA
{0x0EC8, 0x0ECE, prSA, gcMn}, // [7] LAO TONE MAI EK..LAO YAMAKKAN
{0x0ED0, 0x0ED9, prNU, gcNd}, // [10] LAO DIGIT ZERO..LAO DIGIT NINE
{0x0EDC, 0x0EDF, prSA, gcLo}, // [4] LAO HO NO..LAO LETTER KHMU NYO
{0x0F00, 0x0F00, prAL, gcLo}, // TIBETAN SYLLABLE OM
@@ -813,7 +814,11 @@ var lineBreakCodePoints = [][4]int{
{0x1D79, 0x1D7F, prAL, gcLl}, // [7] LATIN SMALL LETTER INSULAR G..LATIN SMALL LETTER UPSILON WITH STROKE
{0x1D80, 0x1D9A, prAL, gcLl}, // [27] LATIN SMALL LETTER B WITH PALATAL HOOK..LATIN SMALL LETTER EZH WITH RETROFLEX HOOK
{0x1D9B, 0x1DBF, prAL, gcLm}, // [37] MODIFIER LETTER SMALL TURNED ALPHA..MODIFIER LETTER SMALL THETA
{0x1DC0, 0x1DFF, prCM, gcMn}, // [64] COMBINING DOTTED GRAVE ACCENT..COMBINING RIGHT ARROWHEAD AND DOWN ARROWHEAD BELOW
{0x1DC0, 0x1DCC, prCM, gcMn}, // [13] COMBINING DOTTED GRAVE ACCENT..COMBINING MACRON-BREVE
{0x1DCD, 0x1DCD, prGL, gcMn}, // COMBINING DOUBLE CIRCUMFLEX ABOVE
{0x1DCE, 0x1DFB, prCM, gcMn}, // [46] COMBINING OGONEK ABOVE..COMBINING DELETION MARK
{0x1DFC, 0x1DFC, prGL, gcMn}, // COMBINING DOUBLE INVERTED BREVE BELOW
{0x1DFD, 0x1DFF, prCM, gcMn}, // [3] COMBINING ALMOST EQUAL TO BELOW..COMBINING RIGHT ARROWHEAD AND DOWN ARROWHEAD BELOW
{0x1E00, 0x1EFF, prAL, gcLC}, // [256] LATIN CAPITAL LETTER A WITH RING BELOW..LATIN SMALL LETTER Y WITH LOOP
{0x1F00, 0x1F15, prAL, gcLC}, // [22] GREEK SMALL LETTER ALPHA WITH PSILI..GREEK SMALL LETTER EPSILON WITH DASIA AND OXIA
{0x1F18, 0x1F1D, prAL, gcLu}, // [6] GREEK CAPITAL LETTER EPSILON WITH PSILI..GREEK CAPITAL LETTER EPSILON WITH DASIA AND OXIA
@@ -889,7 +894,7 @@ var lineBreakCodePoints = [][4]int{
{0x2054, 0x2054, prAL, gcPc}, // INVERTED UNDERTIE
{0x2055, 0x2055, prAL, gcPo}, // FLOWER PUNCTUATION MARK
{0x2056, 0x2056, prBA, gcPo}, // THREE DOT PUNCTUATION
{0x2057, 0x2057, prAL, gcPo}, // QUADRUPLE PRIME
{0x2057, 0x2057, prPO, gcPo}, // QUADRUPLE PRIME
{0x2058, 0x205B, prBA, gcPo}, // [4] FOUR DOT PUNCTUATION..FOUR DOT MARK
{0x205C, 0x205C, prAL, gcPo}, // DOTTED CROSS
{0x205D, 0x205E, prBA, gcPo}, // [2] TRICOLON..VERTICAL FOUR DOTS
@@ -2751,6 +2756,7 @@ var lineBreakCodePoints = [][4]int{
{0x10EAB, 0x10EAC, prCM, gcMn}, // [2] YEZIDI COMBINING HAMZA MARK..YEZIDI COMBINING MADDA MARK
{0x10EAD, 0x10EAD, prBA, gcPd}, // YEZIDI HYPHENATION MARK
{0x10EB0, 0x10EB1, prAL, gcLo}, // [2] YEZIDI LETTER LAM WITH DOT ABOVE..YEZIDI LETTER YOT WITH CIRCUMFLEX ABOVE
{0x10EFD, 0x10EFF, prCM, gcMn}, // [3] ARABIC SMALL LOW WORD SAKTA..ARABIC SMALL LOW WORD MADDA
{0x10F00, 0x10F1C, prAL, gcLo}, // [29] OLD SOGDIAN LETTER ALEPH..OLD SOGDIAN LETTER FINAL TAW WITH VERTICAL TAIL
{0x10F1D, 0x10F26, prAL, gcNo}, // [10] OLD SOGDIAN NUMBER ONE..OLD SOGDIAN FRACTION ONE HALF
{0x10F27, 0x10F27, prAL, gcLo}, // OLD SOGDIAN LIGATURE AYIN-DALETH
@@ -2840,6 +2846,8 @@ var lineBreakCodePoints = [][4]int{
{0x1123B, 0x1123C, prBA, gcPo}, // [2] KHOJKI SECTION MARK..KHOJKI DOUBLE SECTION MARK
{0x1123D, 0x1123D, prAL, gcPo}, // KHOJKI ABBREVIATION SIGN
{0x1123E, 0x1123E, prCM, gcMn}, // KHOJKI SIGN SUKUN
{0x1123F, 0x11240, prAL, gcLo}, // [2] KHOJKI LETTER QA..KHOJKI LETTER SHORT I
{0x11241, 0x11241, prCM, gcMn}, // KHOJKI VOWEL SIGN VOCALIC R
{0x11280, 0x11286, prAL, gcLo}, // [7] MULTANI LETTER A..MULTANI LETTER GA
{0x11288, 0x11288, prAL, gcLo}, // MULTANI LETTER GHA
{0x1128A, 0x1128D, prAL, gcLo}, // [4] MULTANI LETTER CA..MULTANI LETTER JJA
@@ -3013,6 +3021,7 @@ var lineBreakCodePoints = [][4]int{
{0x11AA1, 0x11AA2, prBA, gcPo}, // [2] SOYOMBO TERMINAL MARK-1..SOYOMBO TERMINAL MARK-2
{0x11AB0, 0x11ABF, prAL, gcLo}, // [16] CANADIAN SYLLABICS NATTILIK HI..CANADIAN SYLLABICS SPA
{0x11AC0, 0x11AF8, prAL, gcLo}, // [57] PAU CIN HAU LETTER PA..PAU CIN HAU GLOTTAL STOP FINAL
{0x11B00, 0x11B09, prBB, gcPo}, // [10] DEVANAGARI HEAD MARK..DEVANAGARI SIGN MINDU
{0x11C00, 0x11C08, prAL, gcLo}, // [9] BHAIKSUKI LETTER A..BHAIKSUKI LETTER VOCALIC L
{0x11C0A, 0x11C2E, prAL, gcLo}, // [37] BHAIKSUKI LETTER E..BHAIKSUKI LETTER HA
{0x11C2F, 0x11C2F, prCM, gcMc}, // BHAIKSUKI VOWEL SIGN AA
@@ -3059,6 +3068,20 @@ var lineBreakCodePoints = [][4]int{
{0x11EF3, 0x11EF4, prCM, gcMn}, // [2] MAKASAR VOWEL SIGN I..MAKASAR VOWEL SIGN U
{0x11EF5, 0x11EF6, prCM, gcMc}, // [2] MAKASAR VOWEL SIGN E..MAKASAR VOWEL SIGN O
{0x11EF7, 0x11EF8, prAL, gcPo}, // [2] MAKASAR PASSIMBANG..MAKASAR END OF SECTION
{0x11F00, 0x11F01, prCM, gcMn}, // [2] KAWI SIGN CANDRABINDU..KAWI SIGN ANUSVARA
{0x11F02, 0x11F02, prAL, gcLo}, // KAWI SIGN REPHA
{0x11F03, 0x11F03, prCM, gcMc}, // KAWI SIGN VISARGA
{0x11F04, 0x11F10, prAL, gcLo}, // [13] KAWI LETTER A..KAWI LETTER O
{0x11F12, 0x11F33, prAL, gcLo}, // [34] KAWI LETTER KA..KAWI LETTER JNYA
{0x11F34, 0x11F35, prCM, gcMc}, // [2] KAWI VOWEL SIGN AA..KAWI VOWEL SIGN ALTERNATE AA
{0x11F36, 0x11F3A, prCM, gcMn}, // [5] KAWI VOWEL SIGN I..KAWI VOWEL SIGN VOCALIC R
{0x11F3E, 0x11F3F, prCM, gcMc}, // [2] KAWI VOWEL SIGN E..KAWI VOWEL SIGN AI
{0x11F40, 0x11F40, prCM, gcMn}, // KAWI VOWEL SIGN EU
{0x11F41, 0x11F41, prCM, gcMc}, // KAWI SIGN KILLER
{0x11F42, 0x11F42, prCM, gcMn}, // KAWI CONJOINER
{0x11F43, 0x11F44, prBA, gcPo}, // [2] KAWI DANDA..KAWI DOUBLE DANDA
{0x11F45, 0x11F4F, prID, gcPo}, // [11] KAWI PUNCTUATION SECTION MARKER..KAWI PUNCTUATION CLOSING SPIRAL
{0x11F50, 0x11F59, prNU, gcNd}, // [10] KAWI DIGIT ZERO..KAWI DIGIT NINE
{0x11FB0, 0x11FB0, prAL, gcLo}, // LISU LETTER YHA
{0x11FC0, 0x11FD4, prAL, gcNo}, // [21] TAMIL FRACTION ONE THREE-HUNDRED-AND-TWENTIETH..TAMIL FRACTION DOWNSCALING FACTOR KIIZH
{0x11FD5, 0x11FDC, prAL, gcSo}, // [8] TAMIL SIGN NEL..TAMIL SIGN MUKKURUNI
@@ -3084,10 +3107,18 @@ var lineBreakCodePoints = [][4]int{
{0x1328A, 0x13378, prAL, gcLo}, // [239] EGYPTIAN HIEROGLYPH O037..EGYPTIAN HIEROGLYPH V011
{0x13379, 0x13379, prOP, gcLo}, // EGYPTIAN HIEROGLYPH V011A
{0x1337A, 0x1337B, prCL, gcLo}, // [2] EGYPTIAN HIEROGLYPH V011B..EGYPTIAN HIEROGLYPH V011C
{0x1337C, 0x1342E, prAL, gcLo}, // [179] EGYPTIAN HIEROGLYPH V012..EGYPTIAN HIEROGLYPH AA032
{0x1337C, 0x1342F, prAL, gcLo}, // [180] EGYPTIAN HIEROGLYPH V012..EGYPTIAN HIEROGLYPH V011D
{0x13430, 0x13436, prGL, gcCf}, // [7] EGYPTIAN HIEROGLYPH VERTICAL JOINER..EGYPTIAN HIEROGLYPH OVERLAY MIDDLE
{0x13437, 0x13437, prOP, gcCf}, // EGYPTIAN HIEROGLYPH BEGIN SEGMENT
{0x13438, 0x13438, prCL, gcCf}, // EGYPTIAN HIEROGLYPH END SEGMENT
{0x13439, 0x1343B, prGL, gcCf}, // [3] EGYPTIAN HIEROGLYPH INSERT AT MIDDLE..EGYPTIAN HIEROGLYPH INSERT AT BOTTOM
{0x1343C, 0x1343C, prOP, gcCf}, // EGYPTIAN HIEROGLYPH BEGIN ENCLOSURE
{0x1343D, 0x1343D, prCL, gcCf}, // EGYPTIAN HIEROGLYPH END ENCLOSURE
{0x1343E, 0x1343E, prOP, gcCf}, // EGYPTIAN HIEROGLYPH BEGIN WALLED ENCLOSURE
{0x1343F, 0x1343F, prCL, gcCf}, // EGYPTIAN HIEROGLYPH END WALLED ENCLOSURE
{0x13440, 0x13440, prCM, gcMn}, // EGYPTIAN HIEROGLYPH MIRROR HORIZONTALLY
{0x13441, 0x13446, prAL, gcLo}, // [6] EGYPTIAN HIEROGLYPH FULL BLANK..EGYPTIAN HIEROGLYPH WIDE LOST SIGN
{0x13447, 0x13455, prCM, gcMn}, // [15] EGYPTIAN HIEROGLYPH MODIFIER DAMAGED AT TOP START..EGYPTIAN HIEROGLYPH MODIFIER DAMAGED
{0x14400, 0x145CD, prAL, gcLo}, // [462] ANATOLIAN HIEROGLYPH A001..ANATOLIAN HIEROGLYPH A409
{0x145CE, 0x145CE, prOP, gcLo}, // ANATOLIAN HIEROGLYPH A410 BEGIN LOGOGRAM MARK
{0x145CF, 0x145CF, prCL, gcLo}, // ANATOLIAN HIEROGLYPH A410A END LOGOGRAM MARK
@@ -3137,7 +3168,9 @@ var lineBreakCodePoints = [][4]int{
{0x1AFFD, 0x1AFFE, prAL, gcLm}, // [2] KATAKANA LETTER MINNAN NASALIZED TONE-7..KATAKANA LETTER MINNAN NASALIZED TONE-8
{0x1B000, 0x1B0FF, prID, gcLo}, // [256] KATAKANA LETTER ARCHAIC E..HENTAIGANA LETTER RE-2
{0x1B100, 0x1B122, prID, gcLo}, // [35] HENTAIGANA LETTER RE-3..KATAKANA LETTER ARCHAIC WU
{0x1B132, 0x1B132, prCJ, gcLo}, // HIRAGANA LETTER SMALL KO
{0x1B150, 0x1B152, prCJ, gcLo}, // [3] HIRAGANA LETTER SMALL WI..HIRAGANA LETTER SMALL WO
{0x1B155, 0x1B155, prCJ, gcLo}, // KATAKANA LETTER SMALL KO
{0x1B164, 0x1B167, prCJ, gcLo}, // [4] KATAKANA LETTER SMALL WI..KATAKANA LETTER SMALL N
{0x1B170, 0x1B2FB, prID, gcLo}, // [396] NUSHU CHARACTER-1B170..NUSHU CHARACTER-1B2FB
{0x1BC00, 0x1BC6A, prAL, gcLo}, // [107] DUPLOYAN LETTER H..DUPLOYAN LETTER VOCALIC M
@@ -3168,6 +3201,7 @@ var lineBreakCodePoints = [][4]int{
{0x1D200, 0x1D241, prAL, gcSo}, // [66] GREEK VOCAL NOTATION SYMBOL-1..GREEK INSTRUMENTAL NOTATION SYMBOL-54
{0x1D242, 0x1D244, prCM, gcMn}, // [3] COMBINING GREEK MUSICAL TRISEME..COMBINING GREEK MUSICAL PENTASEME
{0x1D245, 0x1D245, prAL, gcSo}, // GREEK MUSICAL LEIMMA
{0x1D2C0, 0x1D2D3, prAL, gcNo}, // [20] KAKTOVIK NUMERAL ZERO..KAKTOVIK NUMERAL NINETEEN
{0x1D2E0, 0x1D2F3, prAL, gcNo}, // [20] MAYAN NUMERAL ZERO..MAYAN NUMERAL NINETEEN
{0x1D300, 0x1D356, prAL, gcSo}, // [87] MONOGRAM FOR EARTH..TETRAGRAM FOR FOSTERING
{0x1D360, 0x1D378, prAL, gcNo}, // [25] COUNTING ROD UNIT DIGIT ONE..TALLY MARK FIVE
@@ -3228,11 +3262,14 @@ var lineBreakCodePoints = [][4]int{
{0x1DF00, 0x1DF09, prAL, gcLl}, // [10] LATIN SMALL LETTER FENG DIGRAPH WITH TRILL..LATIN SMALL LETTER T WITH HOOK AND RETROFLEX HOOK
{0x1DF0A, 0x1DF0A, prAL, gcLo}, // LATIN LETTER RETROFLEX CLICK WITH RETROFLEX HOOK
{0x1DF0B, 0x1DF1E, prAL, gcLl}, // [20] LATIN SMALL LETTER ESH WITH DOUBLE BAR..LATIN SMALL LETTER S WITH CURL
{0x1DF25, 0x1DF2A, prAL, gcLl}, // [6] LATIN SMALL LETTER D WITH MID-HEIGHT LEFT HOOK..LATIN SMALL LETTER T WITH MID-HEIGHT LEFT HOOK
{0x1E000, 0x1E006, prCM, gcMn}, // [7] COMBINING GLAGOLITIC LETTER AZU..COMBINING GLAGOLITIC LETTER ZHIVETE
{0x1E008, 0x1E018, prCM, gcMn}, // [17] COMBINING GLAGOLITIC LETTER ZEMLJA..COMBINING GLAGOLITIC LETTER HERU
{0x1E01B, 0x1E021, prCM, gcMn}, // [7] COMBINING GLAGOLITIC LETTER SHTA..COMBINING GLAGOLITIC LETTER YATI
{0x1E023, 0x1E024, prCM, gcMn}, // [2] COMBINING GLAGOLITIC LETTER YU..COMBINING GLAGOLITIC LETTER SMALL YUS
{0x1E026, 0x1E02A, prCM, gcMn}, // [5] COMBINING GLAGOLITIC LETTER YO..COMBINING GLAGOLITIC LETTER FITA
{0x1E030, 0x1E06D, prAL, gcLm}, // [62] MODIFIER LETTER CYRILLIC SMALL A..MODIFIER LETTER CYRILLIC SMALL STRAIGHT U WITH STROKE
{0x1E08F, 0x1E08F, prCM, gcMn}, // COMBINING CYRILLIC SMALL LETTER BYELORUSSIAN-UKRAINIAN I
{0x1E100, 0x1E12C, prAL, gcLo}, // [45] NYIAKENG PUACHUE HMONG LETTER MA..NYIAKENG PUACHUE HMONG LETTER W
{0x1E130, 0x1E136, prCM, gcMn}, // [7] NYIAKENG PUACHUE HMONG TONE-B..NYIAKENG PUACHUE HMONG TONE-D
{0x1E137, 0x1E13D, prAL, gcLm}, // [7] NYIAKENG PUACHUE HMONG SIGN FOR PERSON..NYIAKENG PUACHUE HMONG SYLLABLE LENGTHENER
@@ -3245,6 +3282,10 @@ var lineBreakCodePoints = [][4]int{
{0x1E2EC, 0x1E2EF, prCM, gcMn}, // [4] WANCHO TONE TUP..WANCHO TONE KOINI
{0x1E2F0, 0x1E2F9, prNU, gcNd}, // [10] WANCHO DIGIT ZERO..WANCHO DIGIT NINE
{0x1E2FF, 0x1E2FF, prPR, gcSc}, // WANCHO NGUN SIGN
{0x1E4D0, 0x1E4EA, prAL, gcLo}, // [27] NAG MUNDARI LETTER O..NAG MUNDARI LETTER ELL
{0x1E4EB, 0x1E4EB, prAL, gcLm}, // NAG MUNDARI SIGN OJOD
{0x1E4EC, 0x1E4EF, prCM, gcMn}, // [4] NAG MUNDARI SIGN MUHOR..NAG MUNDARI SIGN SUTUH
{0x1E4F0, 0x1E4F9, prNU, gcNd}, // [10] NAG MUNDARI DIGIT ZERO..NAG MUNDARI DIGIT NINE
{0x1E7E0, 0x1E7E6, prAL, gcLo}, // [7] ETHIOPIC SYLLABLE HHYA..ETHIOPIC SYLLABLE HHYO
{0x1E7E8, 0x1E7EB, prAL, gcLo}, // [4] ETHIOPIC SYLLABLE GURAGE HHWA..ETHIOPIC SYLLABLE HHWE
{0x1E7ED, 0x1E7EE, prAL, gcLo}, // [2] ETHIOPIC SYLLABLE GURAGE MWI..ETHIOPIC SYLLABLE GURAGE MWEE
@@ -3412,16 +3453,18 @@ var lineBreakCodePoints = [][4]int{
{0x1F6C1, 0x1F6CB, prID, gcSo}, // [11] BATHTUB..COUCH AND LAMP
{0x1F6CC, 0x1F6CC, prEB, gcSo}, // SLEEPING ACCOMMODATION
{0x1F6CD, 0x1F6D7, prID, gcSo}, // [11] SHOPPING BAGS..ELEVATOR
{0x1F6D8, 0x1F6DC, prID, gcCn}, // [5] <reserved-1F6D8>..<reserved-1F6DC>
{0x1F6DD, 0x1F6EC, prID, gcSo}, // [16] PLAYGROUND SLIDE..AIRPLANE ARRIVING
{0x1F6D8, 0x1F6DB, prID, gcCn}, // [4] <reserved-1F6D8>..<reserved-1F6DB>
{0x1F6DC, 0x1F6EC, prID, gcSo}, // [17] WIRELESS..AIRPLANE ARRIVING
{0x1F6ED, 0x1F6EF, prID, gcCn}, // [3] <reserved-1F6ED>..<reserved-1F6EF>
{0x1F6F0, 0x1F6FC, prID, gcSo}, // [13] SATELLITE..ROLLER SKATE
{0x1F6FD, 0x1F6FF, prID, gcCn}, // [3] <reserved-1F6FD>..<reserved-1F6FF>
{0x1F700, 0x1F773, prAL, gcSo}, // [116] ALCHEMICAL SYMBOL FOR QUINTESSENCE..ALCHEMICAL SYMBOL FOR HALF OUNCE
{0x1F774, 0x1F77F, prID, gcCn}, // [12] <reserved-1F774>..<reserved-1F77F>
{0x1F774, 0x1F776, prID, gcSo}, // [3] LOT OF FORTUNE..LUNAR ECLIPSE
{0x1F777, 0x1F77A, prID, gcCn}, // [4] <reserved-1F777>..<reserved-1F77A>
{0x1F77B, 0x1F77F, prID, gcSo}, // [5] HAUMEA..ORCUS
{0x1F780, 0x1F7D4, prAL, gcSo}, // [85] BLACK LEFT-POINTING ISOSCELES RIGHT TRIANGLE..HEAVY TWELVE POINTED PINWHEEL STAR
{0x1F7D5, 0x1F7D8, prID, gcSo}, // [4] CIRCLED TRIANGLE..NEGATIVE CIRCLED SQUARE
{0x1F7D9, 0x1F7DF, prID, gcCn}, // [7] <reserved-1F7D9>..<reserved-1F7DF>
{0x1F7D5, 0x1F7D9, prID, gcSo}, // [5] CIRCLED TRIANGLE..NINE POINTED WHITE STAR
{0x1F7DA, 0x1F7DF, prID, gcCn}, // [6] <reserved-1F7DA>..<reserved-1F7DF>
{0x1F7E0, 0x1F7EB, prID, gcSo}, // [12] LARGE ORANGE CIRCLE..LARGE BROWN SQUARE
{0x1F7EC, 0x1F7EF, prID, gcCn}, // [4] <reserved-1F7EC>..<reserved-1F7EF>
{0x1F7F0, 0x1F7F0, prID, gcSo}, // HEAVY EQUALS SIGN
@@ -3467,33 +3510,29 @@ var lineBreakCodePoints = [][4]int{
{0x1FA54, 0x1FA5F, prID, gcCn}, // [12] <reserved-1FA54>..<reserved-1FA5F>
{0x1FA60, 0x1FA6D, prID, gcSo}, // [14] XIANGQI RED GENERAL..XIANGQI BLACK SOLDIER
{0x1FA6E, 0x1FA6F, prID, gcCn}, // [2] <reserved-1FA6E>..<reserved-1FA6F>
{0x1FA70, 0x1FA74, prID, gcSo}, // [5] BALLET SHOES..THONG SANDAL
{0x1FA75, 0x1FA77, prID, gcCn}, // [3] <reserved-1FA75>..<reserved-1FA77>
{0x1FA78, 0x1FA7C, prID, gcSo}, // [5] DROP OF BLOOD..CRUTCH
{0x1FA70, 0x1FA7C, prID, gcSo}, // [13] BALLET SHOES..CRUTCH
{0x1FA7D, 0x1FA7F, prID, gcCn}, // [3] <reserved-1FA7D>..<reserved-1FA7F>
{0x1FA80, 0x1FA86, prID, gcSo}, // [7] YO-YO..NESTING DOLLS
{0x1FA87, 0x1FA8F, prID, gcCn}, // [9] <reserved-1FA87>..<reserved-1FA8F>
{0x1FA90, 0x1FAAC, prID, gcSo}, // [29] RINGED PLANET..HAMSA
{0x1FAAD, 0x1FAAF, prID, gcCn}, // [3] <reserved-1FAAD>..<reserved-1FAAF>
{0x1FAB0, 0x1FABA, prID, gcSo}, // [11] FLY..NEST WITH EGGS
{0x1FABB, 0x1FABF, prID, gcCn}, // [5] <reserved-1FABB>..<reserved-1FABF>
{0x1FAC0, 0x1FAC2, prID, gcSo}, // [3] ANATOMICAL HEART..PEOPLE HUGGING
{0x1FA80, 0x1FA88, prID, gcSo}, // [9] YO-YO..FLUTE
{0x1FA89, 0x1FA8F, prID, gcCn}, // [7] <reserved-1FA89>..<reserved-1FA8F>
{0x1FA90, 0x1FABD, prID, gcSo}, // [46] RINGED PLANET..WING
{0x1FABE, 0x1FABE, prID, gcCn}, // <reserved-1FABE>
{0x1FABF, 0x1FAC2, prID, gcSo}, // [4] GOOSE..PEOPLE HUGGING
{0x1FAC3, 0x1FAC5, prEB, gcSo}, // [3] PREGNANT MAN..PERSON WITH CROWN
{0x1FAC6, 0x1FACF, prID, gcCn}, // [10] <reserved-1FAC6>..<reserved-1FACF>
{0x1FAD0, 0x1FAD9, prID, gcSo}, // [10] BLUEBERRIES..JAR
{0x1FADA, 0x1FADF, prID, gcCn}, // [6] <reserved-1FADA>..<reserved-1FADF>
{0x1FAE0, 0x1FAE7, prID, gcSo}, // [8] MELTING FACE..BUBBLES
{0x1FAE8, 0x1FAEF, prID, gcCn}, // [8] <reserved-1FAE8>..<reserved-1FAEF>
{0x1FAF0, 0x1FAF6, prEB, gcSo}, // [7] HAND WITH INDEX FINGER AND THUMB CROSSED..HEART HANDS
{0x1FAF7, 0x1FAFF, prID, gcCn}, // [9] <reserved-1FAF7>..<reserved-1FAFF>
{0x1FAC6, 0x1FACD, prID, gcCn}, // [8] <reserved-1FAC6>..<reserved-1FACD>
{0x1FACE, 0x1FADB, prID, gcSo}, // [14] MOOSE..PEA POD
{0x1FADC, 0x1FADF, prID, gcCn}, // [4] <reserved-1FADC>..<reserved-1FADF>
{0x1FAE0, 0x1FAE8, prID, gcSo}, // [9] MELTING FACE..SHAKING FACE
{0x1FAE9, 0x1FAEF, prID, gcCn}, // [7] <reserved-1FAE9>..<reserved-1FAEF>
{0x1FAF0, 0x1FAF8, prEB, gcSo}, // [9] HAND WITH INDEX FINGER AND THUMB CROSSED..RIGHTWARDS PUSHING HAND
{0x1FAF9, 0x1FAFF, prID, gcCn}, // [7] <reserved-1FAF9>..<reserved-1FAFF>
{0x1FB00, 0x1FB92, prAL, gcSo}, // [147] BLOCK SEXTANT-1..UPPER HALF INVERSE MEDIUM SHADE AND LOWER HALF BLOCK
{0x1FB94, 0x1FBCA, prAL, gcSo}, // [55] LEFT HALF INVERSE MEDIUM SHADE AND RIGHT HALF BLOCK..WHITE UP-POINTING CHEVRON
{0x1FBF0, 0x1FBF9, prNU, gcNd}, // [10] SEGMENTED DIGIT ZERO..SEGMENTED DIGIT NINE
{0x1FC00, 0x1FFFD, prID, gcCn}, // [1022] <reserved-1FC00>..<reserved-1FFFD>
{0x20000, 0x2A6DF, prID, gcLo}, // [42720] CJK UNIFIED IDEOGRAPH-20000..CJK UNIFIED IDEOGRAPH-2A6DF
{0x2A6E0, 0x2A6FF, prID, gcCn}, // [32] <reserved-2A6E0>..<reserved-2A6FF>
{0x2A700, 0x2B738, prID, gcLo}, // [4153] CJK UNIFIED IDEOGRAPH-2A700..CJK UNIFIED IDEOGRAPH-2B738
{0x2B739, 0x2B73F, prID, gcCn}, // [7] <reserved-2B739>..<reserved-2B73F>
{0x2A700, 0x2B739, prID, gcLo}, // [4154] CJK UNIFIED IDEOGRAPH-2A700..CJK UNIFIED IDEOGRAPH-2B739
{0x2B73A, 0x2B73F, prID, gcCn}, // [6] <reserved-2B73A>..<reserved-2B73F>
{0x2B740, 0x2B81D, prID, gcLo}, // [222] CJK UNIFIED IDEOGRAPH-2B740..CJK UNIFIED IDEOGRAPH-2B81D
{0x2B81E, 0x2B81F, prID, gcCn}, // [2] <reserved-2B81E>..<reserved-2B81F>
{0x2B820, 0x2CEA1, prID, gcLo}, // [5762] CJK UNIFIED IDEOGRAPH-2B820..CJK UNIFIED IDEOGRAPH-2CEA1
@@ -3504,7 +3543,9 @@ var lineBreakCodePoints = [][4]int{
{0x2FA1E, 0x2FA1F, prID, gcCn}, // [2] <reserved-2FA1E>..<reserved-2FA1F>
{0x2FA20, 0x2FFFD, prID, gcCn}, // [1502] <reserved-2FA20>..<reserved-2FFFD>
{0x30000, 0x3134A, prID, gcLo}, // [4939] CJK UNIFIED IDEOGRAPH-30000..CJK UNIFIED IDEOGRAPH-3134A
{0x3134B, 0x3FFFD, prID, gcCn}, // [60595] <reserved-3134B>..<reserved-3FFFD>
{0x3134B, 0x3134F, prID, gcCn}, // [5] <reserved-3134B>..<reserved-3134F>
{0x31350, 0x323AF, prID, gcLo}, // [4192] CJK UNIFIED IDEOGRAPH-31350..CJK UNIFIED IDEOGRAPH-323AF
{0x323B0, 0x3FFFD, prID, gcCn}, // [56398] <reserved-323B0>..<reserved-3FFFD>
{0xE0001, 0xE0001, prCM, gcCf}, // LANGUAGE TAG
{0xE0020, 0xE007F, prCM, gcCf}, // [96] TAG SPACE..CANCEL TAG
{0xE0100, 0xE01EF, prCM, gcMn}, // [240] VARIATION SELECTOR-17..VARIATION SELECTOR-256

View File

@@ -64,222 +64,381 @@ const (
LineMustBreak // You must break the line here.
)
// The line break parser's state transitions. It's anologous to grTransitions,
// see comments there for details. Unicode version 14.0.0.
var lbTransitions = map[[2]int][3]int{
// lbTransitions implements the line break parser's state transitions. It's
// anologous to [grTransitions], see comments there for details.
//
// Unicode version 15.0.0.
func lbTransitions(state, prop int) (newState, lineBreak, rule int) {
switch uint64(state) | uint64(prop)<<32 {
// LB4.
{lbAny, prBK}: {lbBK, LineCanBreak, 310},
{lbBK, prAny}: {lbAny, LineMustBreak, 40},
case lbBK | prAny<<32:
return lbAny, LineMustBreak, 40
// LB5.
{lbAny, prCR}: {lbCR, LineCanBreak, 310},
{lbAny, prLF}: {lbLF, LineCanBreak, 310},
{lbAny, prNL}: {lbNL, LineCanBreak, 310},
{lbCR, prLF}: {lbLF, LineDontBreak, 50},
{lbCR, prAny}: {lbAny, LineMustBreak, 50},
{lbLF, prAny}: {lbAny, LineMustBreak, 50},
{lbNL, prAny}: {lbAny, LineMustBreak, 50},
case lbCR | prLF<<32:
return lbLF, LineDontBreak, 50
case lbCR | prAny<<32:
return lbAny, LineMustBreak, 50
case lbLF | prAny<<32:
return lbAny, LineMustBreak, 50
case lbNL | prAny<<32:
return lbAny, LineMustBreak, 50
// LB6.
{lbAny, prBK}: {lbBK, LineDontBreak, 60},
{lbAny, prCR}: {lbCR, LineDontBreak, 60},
{lbAny, prLF}: {lbLF, LineDontBreak, 60},
{lbAny, prNL}: {lbNL, LineDontBreak, 60},
case lbAny | prBK<<32:
return lbBK, LineDontBreak, 60
case lbAny | prCR<<32:
return lbCR, LineDontBreak, 60
case lbAny | prLF<<32:
return lbLF, LineDontBreak, 60
case lbAny | prNL<<32:
return lbNL, LineDontBreak, 60
// LB7.
{lbAny, prSP}: {lbSP, LineDontBreak, 70},
{lbAny, prZW}: {lbZW, LineDontBreak, 70},
case lbAny | prSP<<32:
return lbSP, LineDontBreak, 70
case lbAny | prZW<<32:
return lbZW, LineDontBreak, 70
// LB8.
{lbZW, prSP}: {lbZW, LineDontBreak, 70},
{lbZW, prAny}: {lbAny, LineCanBreak, 80},
case lbZW | prSP<<32:
return lbZW, LineDontBreak, 70
case lbZW | prAny<<32:
return lbAny, LineCanBreak, 80
// LB11.
{lbAny, prWJ}: {lbWJ, LineDontBreak, 110},
{lbWJ, prAny}: {lbAny, LineDontBreak, 110},
case lbAny | prWJ<<32:
return lbWJ, LineDontBreak, 110
case lbWJ | prAny<<32:
return lbAny, LineDontBreak, 110
// LB12.
{lbAny, prGL}: {lbGL, LineCanBreak, 310},
{lbGL, prAny}: {lbAny, LineDontBreak, 120},
case lbAny | prGL<<32:
return lbGL, LineCanBreak, 310
case lbGL | prAny<<32:
return lbAny, LineDontBreak, 120
// LB13 (simple transitions).
{lbAny, prCL}: {lbCL, LineCanBreak, 310},
{lbAny, prCP}: {lbCP, LineCanBreak, 310},
{lbAny, prEX}: {lbEX, LineDontBreak, 130},
{lbAny, prIS}: {lbIS, LineCanBreak, 310},
{lbAny, prSY}: {lbSY, LineCanBreak, 310},
case lbAny | prCL<<32:
return lbCL, LineCanBreak, 310
case lbAny | prCP<<32:
return lbCP, LineCanBreak, 310
case lbAny | prEX<<32:
return lbEX, LineDontBreak, 130
case lbAny | prIS<<32:
return lbIS, LineCanBreak, 310
case lbAny | prSY<<32:
return lbSY, LineCanBreak, 310
// LB14.
{lbAny, prOP}: {lbOP, LineCanBreak, 310},
{lbOP, prSP}: {lbOP, LineDontBreak, 70},
{lbOP, prAny}: {lbAny, LineDontBreak, 140},
case lbAny | prOP<<32:
return lbOP, LineCanBreak, 310
case lbOP | prSP<<32:
return lbOP, LineDontBreak, 70
case lbOP | prAny<<32:
return lbAny, LineDontBreak, 140
// LB15.
{lbQU, prSP}: {lbQUSP, LineDontBreak, 70},
{lbQU, prOP}: {lbOP, LineDontBreak, 150},
{lbQUSP, prOP}: {lbOP, LineDontBreak, 150},
case lbQU | prSP<<32:
return lbQUSP, LineDontBreak, 70
case lbQU | prOP<<32:
return lbOP, LineDontBreak, 150
case lbQUSP | prOP<<32:
return lbOP, LineDontBreak, 150
// LB16.
{lbCL, prSP}: {lbCLCPSP, LineDontBreak, 70},
{lbNUCL, prSP}: {lbCLCPSP, LineDontBreak, 70},
{lbCP, prSP}: {lbCLCPSP, LineDontBreak, 70},
{lbNUCP, prSP}: {lbCLCPSP, LineDontBreak, 70},
{lbCL, prNS}: {lbNS, LineDontBreak, 160},
{lbNUCL, prNS}: {lbNS, LineDontBreak, 160},
{lbCP, prNS}: {lbNS, LineDontBreak, 160},
{lbNUCP, prNS}: {lbNS, LineDontBreak, 160},
{lbCLCPSP, prNS}: {lbNS, LineDontBreak, 160},
case lbCL | prSP<<32:
return lbCLCPSP, LineDontBreak, 70
case lbNUCL | prSP<<32:
return lbCLCPSP, LineDontBreak, 70
case lbCP | prSP<<32:
return lbCLCPSP, LineDontBreak, 70
case lbNUCP | prSP<<32:
return lbCLCPSP, LineDontBreak, 70
case lbCL | prNS<<32:
return lbNS, LineDontBreak, 160
case lbNUCL | prNS<<32:
return lbNS, LineDontBreak, 160
case lbCP | prNS<<32:
return lbNS, LineDontBreak, 160
case lbNUCP | prNS<<32:
return lbNS, LineDontBreak, 160
case lbCLCPSP | prNS<<32:
return lbNS, LineDontBreak, 160
// LB17.
{lbAny, prB2}: {lbB2, LineCanBreak, 310},
{lbB2, prSP}: {lbB2SP, LineDontBreak, 70},
{lbB2, prB2}: {lbB2, LineDontBreak, 170},
{lbB2SP, prB2}: {lbB2, LineDontBreak, 170},
case lbAny | prB2<<32:
return lbB2, LineCanBreak, 310
case lbB2 | prSP<<32:
return lbB2SP, LineDontBreak, 70
case lbB2 | prB2<<32:
return lbB2, LineDontBreak, 170
case lbB2SP | prB2<<32:
return lbB2, LineDontBreak, 170
// LB18.
{lbSP, prAny}: {lbAny, LineCanBreak, 180},
{lbQUSP, prAny}: {lbAny, LineCanBreak, 180},
{lbCLCPSP, prAny}: {lbAny, LineCanBreak, 180},
{lbB2SP, prAny}: {lbAny, LineCanBreak, 180},
case lbSP | prAny<<32:
return lbAny, LineCanBreak, 180
case lbQUSP | prAny<<32:
return lbAny, LineCanBreak, 180
case lbCLCPSP | prAny<<32:
return lbAny, LineCanBreak, 180
case lbB2SP | prAny<<32:
return lbAny, LineCanBreak, 180
// LB19.
{lbAny, prQU}: {lbQU, LineDontBreak, 190},
{lbQU, prAny}: {lbAny, LineDontBreak, 190},
case lbAny | prQU<<32:
return lbQU, LineDontBreak, 190
case lbQU | prAny<<32:
return lbAny, LineDontBreak, 190
// LB20.
{lbAny, prCB}: {lbCB, LineCanBreak, 200},
{lbCB, prAny}: {lbAny, LineCanBreak, 200},
case lbAny | prCB<<32:
return lbCB, LineCanBreak, 200
case lbCB | prAny<<32:
return lbAny, LineCanBreak, 200
// LB21.
{lbAny, prBA}: {lbBA, LineDontBreak, 210},
{lbAny, prHY}: {lbHY, LineDontBreak, 210},
{lbAny, prNS}: {lbNS, LineDontBreak, 210},
{lbAny, prBB}: {lbBB, LineCanBreak, 310},
{lbBB, prAny}: {lbAny, LineDontBreak, 210},
case lbAny | prBA<<32:
return lbBA, LineDontBreak, 210
case lbAny | prHY<<32:
return lbHY, LineDontBreak, 210
case lbAny | prNS<<32:
return lbNS, LineDontBreak, 210
case lbAny | prBB<<32:
return lbBB, LineCanBreak, 310
case lbBB | prAny<<32:
return lbAny, LineDontBreak, 210
// LB21a.
{lbAny, prHL}: {lbHL, LineCanBreak, 310},
{lbHL, prHY}: {lbLB21a, LineDontBreak, 210},
{lbHL, prBA}: {lbLB21a, LineDontBreak, 210},
{lbLB21a, prAny}: {lbAny, LineDontBreak, 211},
case lbAny | prHL<<32:
return lbHL, LineCanBreak, 310
case lbHL | prHY<<32:
return lbLB21a, LineDontBreak, 210
case lbHL | prBA<<32:
return lbLB21a, LineDontBreak, 210
case lbLB21a | prAny<<32:
return lbAny, LineDontBreak, 211
// LB21b.
{lbSY, prHL}: {lbHL, LineDontBreak, 212},
{lbNUSY, prHL}: {lbHL, LineDontBreak, 212},
case lbSY | prHL<<32:
return lbHL, LineDontBreak, 212
case lbNUSY | prHL<<32:
return lbHL, LineDontBreak, 212
// LB22.
{lbAny, prIN}: {lbAny, LineDontBreak, 220},
case lbAny | prIN<<32:
return lbAny, LineDontBreak, 220
// LB23.
{lbAny, prAL}: {lbAL, LineCanBreak, 310},
{lbAny, prNU}: {lbNU, LineCanBreak, 310},
{lbAL, prNU}: {lbNU, LineDontBreak, 230},
{lbHL, prNU}: {lbNU, LineDontBreak, 230},
{lbNU, prAL}: {lbAL, LineDontBreak, 230},
{lbNU, prHL}: {lbHL, LineDontBreak, 230},
{lbNUNU, prAL}: {lbAL, LineDontBreak, 230},
{lbNUNU, prHL}: {lbHL, LineDontBreak, 230},
case lbAny | prAL<<32:
return lbAL, LineCanBreak, 310
case lbAny | prNU<<32:
return lbNU, LineCanBreak, 310
case lbAL | prNU<<32:
return lbNU, LineDontBreak, 230
case lbHL | prNU<<32:
return lbNU, LineDontBreak, 230
case lbNU | prAL<<32:
return lbAL, LineDontBreak, 230
case lbNU | prHL<<32:
return lbHL, LineDontBreak, 230
case lbNUNU | prAL<<32:
return lbAL, LineDontBreak, 230
case lbNUNU | prHL<<32:
return lbHL, LineDontBreak, 230
// LB23a.
{lbAny, prPR}: {lbPR, LineCanBreak, 310},
{lbAny, prID}: {lbIDEM, LineCanBreak, 310},
{lbAny, prEB}: {lbEB, LineCanBreak, 310},
{lbAny, prEM}: {lbIDEM, LineCanBreak, 310},
{lbPR, prID}: {lbIDEM, LineDontBreak, 231},
{lbPR, prEB}: {lbEB, LineDontBreak, 231},
{lbPR, prEM}: {lbIDEM, LineDontBreak, 231},
{lbIDEM, prPO}: {lbPO, LineDontBreak, 231},
{lbEB, prPO}: {lbPO, LineDontBreak, 231},
case lbAny | prPR<<32:
return lbPR, LineCanBreak, 310
case lbAny | prID<<32:
return lbIDEM, LineCanBreak, 310
case lbAny | prEB<<32:
return lbEB, LineCanBreak, 310
case lbAny | prEM<<32:
return lbIDEM, LineCanBreak, 310
case lbPR | prID<<32:
return lbIDEM, LineDontBreak, 231
case lbPR | prEB<<32:
return lbEB, LineDontBreak, 231
case lbPR | prEM<<32:
return lbIDEM, LineDontBreak, 231
case lbIDEM | prPO<<32:
return lbPO, LineDontBreak, 231
case lbEB | prPO<<32:
return lbPO, LineDontBreak, 231
// LB24.
{lbAny, prPO}: {lbPO, LineCanBreak, 310},
{lbPR, prAL}: {lbAL, LineDontBreak, 240},
{lbPR, prHL}: {lbHL, LineDontBreak, 240},
{lbPO, prAL}: {lbAL, LineDontBreak, 240},
{lbPO, prHL}: {lbHL, LineDontBreak, 240},
{lbAL, prPR}: {lbPR, LineDontBreak, 240},
{lbAL, prPO}: {lbPO, LineDontBreak, 240},
{lbHL, prPR}: {lbPR, LineDontBreak, 240},
{lbHL, prPO}: {lbPO, LineDontBreak, 240},
case lbAny | prPO<<32:
return lbPO, LineCanBreak, 310
case lbPR | prAL<<32:
return lbAL, LineDontBreak, 240
case lbPR | prHL<<32:
return lbHL, LineDontBreak, 240
case lbPO | prAL<<32:
return lbAL, LineDontBreak, 240
case lbPO | prHL<<32:
return lbHL, LineDontBreak, 240
case lbAL | prPR<<32:
return lbPR, LineDontBreak, 240
case lbAL | prPO<<32:
return lbPO, LineDontBreak, 240
case lbHL | prPR<<32:
return lbPR, LineDontBreak, 240
case lbHL | prPO<<32:
return lbPO, LineDontBreak, 240
// LB25 (simple transitions).
{lbPR, prNU}: {lbNU, LineDontBreak, 250},
{lbPO, prNU}: {lbNU, LineDontBreak, 250},
{lbOP, prNU}: {lbNU, LineDontBreak, 250},
{lbHY, prNU}: {lbNU, LineDontBreak, 250},
{lbNU, prNU}: {lbNUNU, LineDontBreak, 250},
{lbNU, prSY}: {lbNUSY, LineDontBreak, 250},
{lbNU, prIS}: {lbNUIS, LineDontBreak, 250},
{lbNUNU, prNU}: {lbNUNU, LineDontBreak, 250},
{lbNUNU, prSY}: {lbNUSY, LineDontBreak, 250},
{lbNUNU, prIS}: {lbNUIS, LineDontBreak, 250},
{lbNUSY, prNU}: {lbNUNU, LineDontBreak, 250},
{lbNUSY, prSY}: {lbNUSY, LineDontBreak, 250},
{lbNUSY, prIS}: {lbNUIS, LineDontBreak, 250},
{lbNUIS, prNU}: {lbNUNU, LineDontBreak, 250},
{lbNUIS, prSY}: {lbNUSY, LineDontBreak, 250},
{lbNUIS, prIS}: {lbNUIS, LineDontBreak, 250},
{lbNU, prCL}: {lbNUCL, LineDontBreak, 250},
{lbNU, prCP}: {lbNUCP, LineDontBreak, 250},
{lbNUNU, prCL}: {lbNUCL, LineDontBreak, 250},
{lbNUNU, prCP}: {lbNUCP, LineDontBreak, 250},
{lbNUSY, prCL}: {lbNUCL, LineDontBreak, 250},
{lbNUSY, prCP}: {lbNUCP, LineDontBreak, 250},
{lbNUIS, prCL}: {lbNUCL, LineDontBreak, 250},
{lbNUIS, prCP}: {lbNUCP, LineDontBreak, 250},
{lbNU, prPO}: {lbPO, LineDontBreak, 250},
{lbNUNU, prPO}: {lbPO, LineDontBreak, 250},
{lbNUSY, prPO}: {lbPO, LineDontBreak, 250},
{lbNUIS, prPO}: {lbPO, LineDontBreak, 250},
{lbNUCL, prPO}: {lbPO, LineDontBreak, 250},
{lbNUCP, prPO}: {lbPO, LineDontBreak, 250},
{lbNU, prPR}: {lbPR, LineDontBreak, 250},
{lbNUNU, prPR}: {lbPR, LineDontBreak, 250},
{lbNUSY, prPR}: {lbPR, LineDontBreak, 250},
{lbNUIS, prPR}: {lbPR, LineDontBreak, 250},
{lbNUCL, prPR}: {lbPR, LineDontBreak, 250},
{lbNUCP, prPR}: {lbPR, LineDontBreak, 250},
case lbPR | prNU<<32:
return lbNU, LineDontBreak, 250
case lbPO | prNU<<32:
return lbNU, LineDontBreak, 250
case lbOP | prNU<<32:
return lbNU, LineDontBreak, 250
case lbHY | prNU<<32:
return lbNU, LineDontBreak, 250
case lbNU | prNU<<32:
return lbNUNU, LineDontBreak, 250
case lbNU | prSY<<32:
return lbNUSY, LineDontBreak, 250
case lbNU | prIS<<32:
return lbNUIS, LineDontBreak, 250
case lbNUNU | prNU<<32:
return lbNUNU, LineDontBreak, 250
case lbNUNU | prSY<<32:
return lbNUSY, LineDontBreak, 250
case lbNUNU | prIS<<32:
return lbNUIS, LineDontBreak, 250
case lbNUSY | prNU<<32:
return lbNUNU, LineDontBreak, 250
case lbNUSY | prSY<<32:
return lbNUSY, LineDontBreak, 250
case lbNUSY | prIS<<32:
return lbNUIS, LineDontBreak, 250
case lbNUIS | prNU<<32:
return lbNUNU, LineDontBreak, 250
case lbNUIS | prSY<<32:
return lbNUSY, LineDontBreak, 250
case lbNUIS | prIS<<32:
return lbNUIS, LineDontBreak, 250
case lbNU | prCL<<32:
return lbNUCL, LineDontBreak, 250
case lbNU | prCP<<32:
return lbNUCP, LineDontBreak, 250
case lbNUNU | prCL<<32:
return lbNUCL, LineDontBreak, 250
case lbNUNU | prCP<<32:
return lbNUCP, LineDontBreak, 250
case lbNUSY | prCL<<32:
return lbNUCL, LineDontBreak, 250
case lbNUSY | prCP<<32:
return lbNUCP, LineDontBreak, 250
case lbNUIS | prCL<<32:
return lbNUCL, LineDontBreak, 250
case lbNUIS | prCP<<32:
return lbNUCP, LineDontBreak, 250
case lbNU | prPO<<32:
return lbPO, LineDontBreak, 250
case lbNUNU | prPO<<32:
return lbPO, LineDontBreak, 250
case lbNUSY | prPO<<32:
return lbPO, LineDontBreak, 250
case lbNUIS | prPO<<32:
return lbPO, LineDontBreak, 250
case lbNUCL | prPO<<32:
return lbPO, LineDontBreak, 250
case lbNUCP | prPO<<32:
return lbPO, LineDontBreak, 250
case lbNU | prPR<<32:
return lbPR, LineDontBreak, 250
case lbNUNU | prPR<<32:
return lbPR, LineDontBreak, 250
case lbNUSY | prPR<<32:
return lbPR, LineDontBreak, 250
case lbNUIS | prPR<<32:
return lbPR, LineDontBreak, 250
case lbNUCL | prPR<<32:
return lbPR, LineDontBreak, 250
case lbNUCP | prPR<<32:
return lbPR, LineDontBreak, 250
// LB26.
{lbAny, prJL}: {lbJL, LineCanBreak, 310},
{lbAny, prJV}: {lbJV, LineCanBreak, 310},
{lbAny, prJT}: {lbJT, LineCanBreak, 310},
{lbAny, prH2}: {lbH2, LineCanBreak, 310},
{lbAny, prH3}: {lbH3, LineCanBreak, 310},
{lbJL, prJL}: {lbJL, LineDontBreak, 260},
{lbJL, prJV}: {lbJV, LineDontBreak, 260},
{lbJL, prH2}: {lbH2, LineDontBreak, 260},
{lbJL, prH3}: {lbH3, LineDontBreak, 260},
{lbJV, prJV}: {lbJV, LineDontBreak, 260},
{lbJV, prJT}: {lbJT, LineDontBreak, 260},
{lbH2, prJV}: {lbJV, LineDontBreak, 260},
{lbH2, prJT}: {lbJT, LineDontBreak, 260},
{lbJT, prJT}: {lbJT, LineDontBreak, 260},
{lbH3, prJT}: {lbJT, LineDontBreak, 260},
case lbAny | prJL<<32:
return lbJL, LineCanBreak, 310
case lbAny | prJV<<32:
return lbJV, LineCanBreak, 310
case lbAny | prJT<<32:
return lbJT, LineCanBreak, 310
case lbAny | prH2<<32:
return lbH2, LineCanBreak, 310
case lbAny | prH3<<32:
return lbH3, LineCanBreak, 310
case lbJL | prJL<<32:
return lbJL, LineDontBreak, 260
case lbJL | prJV<<32:
return lbJV, LineDontBreak, 260
case lbJL | prH2<<32:
return lbH2, LineDontBreak, 260
case lbJL | prH3<<32:
return lbH3, LineDontBreak, 260
case lbJV | prJV<<32:
return lbJV, LineDontBreak, 260
case lbJV | prJT<<32:
return lbJT, LineDontBreak, 260
case lbH2 | prJV<<32:
return lbJV, LineDontBreak, 260
case lbH2 | prJT<<32:
return lbJT, LineDontBreak, 260
case lbJT | prJT<<32:
return lbJT, LineDontBreak, 260
case lbH3 | prJT<<32:
return lbJT, LineDontBreak, 260
// LB27.
{lbJL, prPO}: {lbPO, LineDontBreak, 270},
{lbJV, prPO}: {lbPO, LineDontBreak, 270},
{lbJT, prPO}: {lbPO, LineDontBreak, 270},
{lbH2, prPO}: {lbPO, LineDontBreak, 270},
{lbH3, prPO}: {lbPO, LineDontBreak, 270},
{lbPR, prJL}: {lbJL, LineDontBreak, 270},
{lbPR, prJV}: {lbJV, LineDontBreak, 270},
{lbPR, prJT}: {lbJT, LineDontBreak, 270},
{lbPR, prH2}: {lbH2, LineDontBreak, 270},
{lbPR, prH3}: {lbH3, LineDontBreak, 270},
case lbJL | prPO<<32:
return lbPO, LineDontBreak, 270
case lbJV | prPO<<32:
return lbPO, LineDontBreak, 270
case lbJT | prPO<<32:
return lbPO, LineDontBreak, 270
case lbH2 | prPO<<32:
return lbPO, LineDontBreak, 270
case lbH3 | prPO<<32:
return lbPO, LineDontBreak, 270
case lbPR | prJL<<32:
return lbJL, LineDontBreak, 270
case lbPR | prJV<<32:
return lbJV, LineDontBreak, 270
case lbPR | prJT<<32:
return lbJT, LineDontBreak, 270
case lbPR | prH2<<32:
return lbH2, LineDontBreak, 270
case lbPR | prH3<<32:
return lbH3, LineDontBreak, 270
// LB28.
{lbAL, prAL}: {lbAL, LineDontBreak, 280},
{lbAL, prHL}: {lbHL, LineDontBreak, 280},
{lbHL, prAL}: {lbAL, LineDontBreak, 280},
{lbHL, prHL}: {lbHL, LineDontBreak, 280},
case lbAL | prAL<<32:
return lbAL, LineDontBreak, 280
case lbAL | prHL<<32:
return lbHL, LineDontBreak, 280
case lbHL | prAL<<32:
return lbAL, LineDontBreak, 280
case lbHL | prHL<<32:
return lbHL, LineDontBreak, 280
// LB29.
{lbIS, prAL}: {lbAL, LineDontBreak, 290},
{lbIS, prHL}: {lbHL, LineDontBreak, 290},
{lbNUIS, prAL}: {lbAL, LineDontBreak, 290},
{lbNUIS, prHL}: {lbHL, LineDontBreak, 290},
case lbIS | prAL<<32:
return lbAL, LineDontBreak, 290
case lbIS | prHL<<32:
return lbHL, LineDontBreak, 290
case lbNUIS | prAL<<32:
return lbAL, LineDontBreak, 290
case lbNUIS | prHL<<32:
return lbHL, LineDontBreak, 290
default:
return -1, -1, -1
}
}
// transitionLineBreakState determines the new state of the line break parser
@@ -290,7 +449,7 @@ var lbTransitions = map[[2]int][3]int{
// further lookups.
func transitionLineBreakState(state int, r rune, b []byte, str string) (newState int, lineBreak int) {
// Determine the property of the next character.
nextProperty, generalCategory := propertyWithGenCat(lineBreakCodePoints, r)
nextProperty, generalCategory := propertyLineBreak(r)
// Prepare.
var forceNoBreak, isCPeaFWH bool
@@ -306,7 +465,7 @@ func transitionLineBreakState(state int, r rune, b []byte, str string) (newState
defer func() {
// Transition into LB30.
if newState == lbCP || newState == lbNUCP {
ea := property(eastAsianWidth, r)
ea := propertyEastAsianWidth(r)
if ea != prF && ea != prW && ea != prH {
newState |= lbCPeaFWHBit
}
@@ -352,30 +511,27 @@ func transitionLineBreakState(state int, r rune, b []byte, str string) (newState
// Find the applicable transition in the table.
var rule int
transition, ok := lbTransitions[[2]int{state, nextProperty}]
if ok {
// We have a specific transition. We'll use it.
newState, lineBreak, rule = transition[0], transition[1], transition[2]
} else {
newState, lineBreak, rule = lbTransitions(state, nextProperty)
if newState < 0 {
// No specific transition found. Try the less specific ones.
transAnyProp, okAnyProp := lbTransitions[[2]int{state, prAny}]
transAnyState, okAnyState := lbTransitions[[2]int{lbAny, nextProperty}]
if okAnyProp && okAnyState {
anyPropProp, anyPropLineBreak, anyPropRule := lbTransitions(state, prAny)
anyStateProp, anyStateLineBreak, anyStateRule := lbTransitions(lbAny, nextProperty)
if anyPropProp >= 0 && anyStateProp >= 0 {
// Both apply. We'll use a mix (see comments for grTransitions).
newState, lineBreak, rule = transAnyState[0], transAnyState[1], transAnyState[2]
if transAnyProp[2] < transAnyState[2] {
lineBreak, rule = transAnyProp[1], transAnyProp[2]
newState, lineBreak, rule = anyStateProp, anyStateLineBreak, anyStateRule
if anyPropRule < anyStateRule {
lineBreak, rule = anyPropLineBreak, anyPropRule
}
} else if okAnyProp {
} else if anyPropProp >= 0 {
// We only have a specific state.
newState, lineBreak, rule = transAnyProp[0], transAnyProp[1], transAnyProp[2]
newState, lineBreak, rule = anyPropProp, anyPropLineBreak, anyPropRule
// This branch will probably never be reached because okAnyState will
// always be true given the current transition map. But we keep it here
// for future modifications to the transition map where this may not be
// true anymore.
} else if okAnyState {
} else if anyStateProp >= 0 {
// We only have a specific property.
newState, lineBreak, rule = transAnyState[0], transAnyState[1], transAnyState[2]
newState, lineBreak, rule = anyStateProp, anyStateLineBreak, anyStateRule
} else {
// No known transition. LB31: ALL ÷ ALL.
newState, lineBreak, rule = lbAny, LineCanBreak, 310
@@ -414,7 +570,7 @@ func transitionLineBreakState(state int, r rune, b []byte, str string) (newState
r, _ = utf8.DecodeRuneInString(str)
}
if r != utf8.RuneError {
pr, _ := propertyWithGenCat(lineBreakCodePoints, r)
pr, _ := propertyLineBreak(r)
if pr == prNU {
return lbNU, LineDontBreak
}
@@ -424,7 +580,7 @@ func transitionLineBreakState(state int, r rune, b []byte, str string) (newState
// LB30 (part one).
if rule > 300 {
if (state == lbAL || state == lbHL || state == lbNU || state == lbNUNU) && nextProperty == prOP {
ea := property(eastAsianWidth, r)
ea := propertyEastAsianWidth(r)
if ea != prF && ea != prW && ea != prH {
return lbOP, LineDontBreak
}
@@ -460,7 +616,7 @@ func transitionLineBreakState(state int, r rune, b []byte, str string) (newState
return prAny, LineDontBreak
}
}
graphemeProperty := property(graphemeCodePoints, r)
graphemeProperty := propertyGraphemes(r)
if graphemeProperty == prExtendedPictographic && generalCategory == gcCn {
return lbExtPicCn, LineCanBreak
}

View File

@@ -160,9 +160,49 @@ func property(dictionary [][3]int, r rune) int {
return propertySearch(dictionary, r)[2]
}
// propertyWithGenCat returns the Unicode property value and General Category
// (see constants above) of the given code point.
func propertyWithGenCat(dictionary [][4]int, r rune) (property, generalCategory int) {
entry := propertySearch(dictionary, r)
// propertyLineBreak returns the Unicode property value and General Category
// (see constants above) of the given code point, as listed in the line break
// code points table, while fast tracking ASCII digits and letters.
func propertyLineBreak(r rune) (property, generalCategory int) {
if r >= 'a' && r <= 'z' {
return prAL, gcLl
}
if r >= 'A' && r <= 'Z' {
return prAL, gcLu
}
if r >= '0' && r <= '9' {
return prNU, gcNd
}
entry := propertySearch(lineBreakCodePoints, r)
return entry[2], entry[3]
}
// propertyGraphemes returns the Unicode grapheme cluster property value of the
// given code point while fast tracking ASCII characters.
func propertyGraphemes(r rune) int {
if r >= 0x20 && r <= 0x7e {
return prAny
}
if r == 0x0a {
return prLF
}
if r == 0x0d {
return prCR
}
if r >= 0 && r <= 0x1f || r == 0x7f {
return prControl
}
return property(graphemeCodePoints, r)
}
// propertyEastAsianWidth returns the Unicode East Asian Width property value of
// the given code point while fast tracking ASCII characters.
func propertyEastAsianWidth(r rune) int {
if r >= 0x20 && r <= 0x7e {
return prNa
}
if r >= 0 && r <= 0x1f || r == 0x7f {
return prN
}
return property(eastAsianWidth, r)
}

View File

@@ -3,7 +3,7 @@ package uniseg
import "unicode/utf8"
// FirstSentence returns the first sentence found in the given byte slice
// according to the rules of Unicode Standard Annex #29, Sentence Boundaries.
// according to the rules of [Unicode Standard Annex #29, Sentence Boundaries].
// This function can be called continuously to extract all sentences from a byte
// slice, as illustrated in the example below.
//
@@ -17,6 +17,8 @@ import "unicode/utf8"
// slice is the sub-slice of the input slice containing the identified sentence.
//
// Given an empty byte slice "b", the function returns nil values.
//
// [Unicode Standard Annex #29, Sentence Boundaries]: http://unicode.org/reports/tr29/#Sentence_Boundaries
func FirstSentence(b []byte, state int) (sentence, rest []byte, newState int) {
// An empty byte slice returns nothing.
if len(b) == 0 {

View File

@@ -1,13 +1,13 @@
package uniseg
// Code generated via go generate from gen_properties.go. DO NOT EDIT.
package uniseg
// sentenceBreakCodePoints are taken from
// https://www.unicode.org/Public/14.0.0/ucd/auxiliary/SentenceBreakProperty.txt
// https://www.unicode.org/Public/15.0.0/ucd/auxiliary/SentenceBreakProperty.txt
// and
// https://unicode.org/Public/14.0.0/ucd/emoji/emoji-data.txt
// https://unicode.org/Public/15.0.0/ucd/emoji/emoji-data.txt
// ("Extended_Pictographic" only)
// on September 10, 2022. See https://www.unicode.org/license.html for the Unicode
// on September 5, 2023. See https://www.unicode.org/license.html for the Unicode
// license agreement.
var sentenceBreakCodePoints = [][3]int{
{0x0009, 0x0009, prSp}, // Cc <control-0009>
@@ -843,6 +843,7 @@ var sentenceBreakCodePoints = [][3]int{
{0x0CE2, 0x0CE3, prExtend}, // Mn [2] KANNADA VOWEL SIGN VOCALIC L..KANNADA VOWEL SIGN VOCALIC LL
{0x0CE6, 0x0CEF, prNumeric}, // Nd [10] KANNADA DIGIT ZERO..KANNADA DIGIT NINE
{0x0CF1, 0x0CF2, prOLetter}, // Lo [2] KANNADA SIGN JIHVAMULIYA..KANNADA SIGN UPADHMANIYA
{0x0CF3, 0x0CF3, prExtend}, // Mc KANNADA SIGN COMBINING ANUSVARA ABOVE RIGHT
{0x0D00, 0x0D01, prExtend}, // Mn [2] MALAYALAM SIGN COMBINING ANUSVARA ABOVE..MALAYALAM SIGN CANDRABINDU
{0x0D02, 0x0D03, prExtend}, // Mc [2] MALAYALAM SIGN ANUSVARA..MALAYALAM SIGN VISARGA
{0x0D04, 0x0D0C, prOLetter}, // Lo [9] MALAYALAM LETTER VEDIC ANUSVARA..MALAYALAM LETTER VOCALIC L
@@ -896,7 +897,7 @@ var sentenceBreakCodePoints = [][3]int{
{0x0EBD, 0x0EBD, prOLetter}, // Lo LAO SEMIVOWEL SIGN NYO
{0x0EC0, 0x0EC4, prOLetter}, // Lo [5] LAO VOWEL SIGN E..LAO VOWEL SIGN AI
{0x0EC6, 0x0EC6, prOLetter}, // Lm LAO KO LA
{0x0EC8, 0x0ECD, prExtend}, // Mn [6] LAO TONE MAI EK..LAO NIGGAHITA
{0x0EC8, 0x0ECE, prExtend}, // Mn [7] LAO TONE MAI EK..LAO YAMAKKAN
{0x0ED0, 0x0ED9, prNumeric}, // Nd [10] LAO DIGIT ZERO..LAO DIGIT NINE
{0x0EDC, 0x0EDF, prOLetter}, // Lo [4] LAO HO NO..LAO LETTER KHMU NYO
{0x0F00, 0x0F00, prOLetter}, // Lo TIBETAN SYLLABLE OM
@@ -958,7 +959,7 @@ var sentenceBreakCodePoints = [][3]int{
{0x10C7, 0x10C7, prUpper}, // L& GEORGIAN CAPITAL LETTER YN
{0x10CD, 0x10CD, prUpper}, // L& GEORGIAN CAPITAL LETTER AEN
{0x10D0, 0x10FA, prOLetter}, // L& [43] GEORGIAN LETTER AN..GEORGIAN LETTER AIN
{0x10FC, 0x10FC, prOLetter}, // Lm MODIFIER LETTER GEORGIAN NAR
{0x10FC, 0x10FC, prLower}, // Lm MODIFIER LETTER GEORGIAN NAR
{0x10FD, 0x10FF, prOLetter}, // L& [3] GEORGIAN LETTER AEN..GEORGIAN LETTER LABIAL SIGN
{0x1100, 0x1248, prOLetter}, // Lo [329] HANGUL CHOSEONG KIYEOK..ETHIOPIC SYLLABLE QWA
{0x124A, 0x124D, prOLetter}, // Lo [4] ETHIOPIC SYLLABLE QWI..ETHIOPIC SYLLABLE QWE
@@ -2034,7 +2035,7 @@ var sentenceBreakCodePoints = [][3]int{
{0xA7D7, 0xA7D7, prLower}, // L& LATIN SMALL LETTER MIDDLE SCOTS S
{0xA7D8, 0xA7D8, prUpper}, // L& LATIN CAPITAL LETTER SIGMOID S
{0xA7D9, 0xA7D9, prLower}, // L& LATIN SMALL LETTER SIGMOID S
{0xA7F2, 0xA7F4, prOLetter}, // Lm [3] MODIFIER LETTER CAPITAL C..MODIFIER LETTER CAPITAL Q
{0xA7F2, 0xA7F4, prLower}, // Lm [3] MODIFIER LETTER CAPITAL C..MODIFIER LETTER CAPITAL Q
{0xA7F5, 0xA7F5, prUpper}, // L& LATIN CAPITAL LETTER REVERSED HALF H
{0xA7F6, 0xA7F6, prLower}, // L& LATIN SMALL LETTER REVERSED HALF H
{0xA7F7, 0xA7F7, prOLetter}, // Lo LATIN EPIGRAPHIC LETTER SIDEWAYS I
@@ -2140,7 +2141,7 @@ var sentenceBreakCodePoints = [][3]int{
{0xAB30, 0xAB5A, prLower}, // L& [43] LATIN SMALL LETTER BARRED ALPHA..LATIN SMALL LETTER Y WITH SHORT RIGHT LEG
{0xAB5C, 0xAB5F, prLower}, // Lm [4] MODIFIER LETTER SMALL HENG..MODIFIER LETTER SMALL U WITH LEFT HOOK
{0xAB60, 0xAB68, prLower}, // L& [9] LATIN SMALL LETTER SAKHA YAT..LATIN SMALL LETTER TURNED R WITH MIDDLE TILDE
{0xAB69, 0xAB69, prOLetter}, // Lm MODIFIER LETTER SMALL TURNED W
{0xAB69, 0xAB69, prLower}, // Lm MODIFIER LETTER SMALL TURNED W
{0xAB70, 0xABBF, prLower}, // L& [80] CHEROKEE SMALL LETTER A..CHEROKEE SMALL LETTER YA
{0xABC0, 0xABE2, prOLetter}, // Lo [35] MEETEI MAYEK LETTER KOK..MEETEI MAYEK LETTER I LONSUM
{0xABE3, 0xABE4, prExtend}, // Mc [2] MEETEI MAYEK VOWEL SIGN ONAP..MEETEI MAYEK VOWEL SIGN INAP
@@ -2334,6 +2335,7 @@ var sentenceBreakCodePoints = [][3]int{
{0x10E80, 0x10EA9, prOLetter}, // Lo [42] YEZIDI LETTER ELIF..YEZIDI LETTER ET
{0x10EAB, 0x10EAC, prExtend}, // Mn [2] YEZIDI COMBINING HAMZA MARK..YEZIDI COMBINING MADDA MARK
{0x10EB0, 0x10EB1, prOLetter}, // Lo [2] YEZIDI LETTER LAM WITH DOT ABOVE..YEZIDI LETTER YOT WITH CIRCUMFLEX ABOVE
{0x10EFD, 0x10EFF, prExtend}, // Mn [3] ARABIC SMALL LOW WORD SAKTA..ARABIC SMALL LOW WORD MADDA
{0x10F00, 0x10F1C, prOLetter}, // Lo [29] OLD SOGDIAN LETTER ALEPH..OLD SOGDIAN LETTER FINAL TAW WITH VERTICAL TAIL
{0x10F27, 0x10F27, prOLetter}, // Lo OLD SOGDIAN LIGATURE AYIN-DALETH
{0x10F30, 0x10F45, prOLetter}, // Lo [22] SOGDIAN LETTER ALEPH..SOGDIAN INDEPENDENT SHIN
@@ -2408,6 +2410,8 @@ var sentenceBreakCodePoints = [][3]int{
{0x11238, 0x11239, prSTerm}, // Po [2] KHOJKI DANDA..KHOJKI DOUBLE DANDA
{0x1123B, 0x1123C, prSTerm}, // Po [2] KHOJKI SECTION MARK..KHOJKI DOUBLE SECTION MARK
{0x1123E, 0x1123E, prExtend}, // Mn KHOJKI SIGN SUKUN
{0x1123F, 0x11240, prOLetter}, // Lo [2] KHOJKI LETTER QA..KHOJKI LETTER SHORT I
{0x11241, 0x11241, prExtend}, // Mn KHOJKI VOWEL SIGN VOCALIC R
{0x11280, 0x11286, prOLetter}, // Lo [7] MULTANI LETTER A..MULTANI LETTER GA
{0x11288, 0x11288, prOLetter}, // Lo MULTANI LETTER GHA
{0x1128A, 0x1128D, prOLetter}, // Lo [4] MULTANI LETTER CA..MULTANI LETTER JJA
@@ -2603,13 +2607,29 @@ var sentenceBreakCodePoints = [][3]int{
{0x11EF3, 0x11EF4, prExtend}, // Mn [2] MAKASAR VOWEL SIGN I..MAKASAR VOWEL SIGN U
{0x11EF5, 0x11EF6, prExtend}, // Mc [2] MAKASAR VOWEL SIGN E..MAKASAR VOWEL SIGN O
{0x11EF7, 0x11EF8, prSTerm}, // Po [2] MAKASAR PASSIMBANG..MAKASAR END OF SECTION
{0x11F00, 0x11F01, prExtend}, // Mn [2] KAWI SIGN CANDRABINDU..KAWI SIGN ANUSVARA
{0x11F02, 0x11F02, prOLetter}, // Lo KAWI SIGN REPHA
{0x11F03, 0x11F03, prExtend}, // Mc KAWI SIGN VISARGA
{0x11F04, 0x11F10, prOLetter}, // Lo [13] KAWI LETTER A..KAWI LETTER O
{0x11F12, 0x11F33, prOLetter}, // Lo [34] KAWI LETTER KA..KAWI LETTER JNYA
{0x11F34, 0x11F35, prExtend}, // Mc [2] KAWI VOWEL SIGN AA..KAWI VOWEL SIGN ALTERNATE AA
{0x11F36, 0x11F3A, prExtend}, // Mn [5] KAWI VOWEL SIGN I..KAWI VOWEL SIGN VOCALIC R
{0x11F3E, 0x11F3F, prExtend}, // Mc [2] KAWI VOWEL SIGN E..KAWI VOWEL SIGN AI
{0x11F40, 0x11F40, prExtend}, // Mn KAWI VOWEL SIGN EU
{0x11F41, 0x11F41, prExtend}, // Mc KAWI SIGN KILLER
{0x11F42, 0x11F42, prExtend}, // Mn KAWI CONJOINER
{0x11F43, 0x11F44, prSTerm}, // Po [2] KAWI DANDA..KAWI DOUBLE DANDA
{0x11F50, 0x11F59, prNumeric}, // Nd [10] KAWI DIGIT ZERO..KAWI DIGIT NINE
{0x11FB0, 0x11FB0, prOLetter}, // Lo LISU LETTER YHA
{0x12000, 0x12399, prOLetter}, // Lo [922] CUNEIFORM SIGN A..CUNEIFORM SIGN U U
{0x12400, 0x1246E, prOLetter}, // Nl [111] CUNEIFORM NUMERIC SIGN TWO ASH..CUNEIFORM NUMERIC SIGN NINE U VARIANT FORM
{0x12480, 0x12543, prOLetter}, // Lo [196] CUNEIFORM SIGN AB TIMES NUN TENU..CUNEIFORM SIGN ZU5 TIMES THREE DISH TENU
{0x12F90, 0x12FF0, prOLetter}, // Lo [97] CYPRO-MINOAN SIGN CM001..CYPRO-MINOAN SIGN CM114
{0x13000, 0x1342E, prOLetter}, // Lo [1071] EGYPTIAN HIEROGLYPH A001..EGYPTIAN HIEROGLYPH AA032
{0x13430, 0x13438, prFormat}, // Cf [9] EGYPTIAN HIEROGLYPH VERTICAL JOINER..EGYPTIAN HIEROGLYPH END SEGMENT
{0x13000, 0x1342F, prOLetter}, // Lo [1072] EGYPTIAN HIEROGLYPH A001..EGYPTIAN HIEROGLYPH V011D
{0x13430, 0x1343F, prFormat}, // Cf [16] EGYPTIAN HIEROGLYPH VERTICAL JOINER..EGYPTIAN HIEROGLYPH END WALLED ENCLOSURE
{0x13440, 0x13440, prExtend}, // Mn EGYPTIAN HIEROGLYPH MIRROR HORIZONTALLY
{0x13441, 0x13446, prOLetter}, // Lo [6] EGYPTIAN HIEROGLYPH FULL BLANK..EGYPTIAN HIEROGLYPH WIDE LOST SIGN
{0x13447, 0x13455, prExtend}, // Mn [15] EGYPTIAN HIEROGLYPH MODIFIER DAMAGED AT TOP START..EGYPTIAN HIEROGLYPH MODIFIER DAMAGED
{0x14400, 0x14646, prOLetter}, // Lo [583] ANATOLIAN HIEROGLYPH A001..ANATOLIAN HIEROGLYPH A530
{0x16800, 0x16A38, prOLetter}, // Lo [569] BAMUM LETTER PHASE-A NGKUE MFON..BAMUM LETTER PHASE-F VUEQ
{0x16A40, 0x16A5E, prOLetter}, // Lo [31] MRO LETTER TA..MRO LETTER TEK
@@ -2648,7 +2668,9 @@ var sentenceBreakCodePoints = [][3]int{
{0x1AFF5, 0x1AFFB, prOLetter}, // Lm [7] KATAKANA LETTER MINNAN TONE-7..KATAKANA LETTER MINNAN NASALIZED TONE-5
{0x1AFFD, 0x1AFFE, prOLetter}, // Lm [2] KATAKANA LETTER MINNAN NASALIZED TONE-7..KATAKANA LETTER MINNAN NASALIZED TONE-8
{0x1B000, 0x1B122, prOLetter}, // Lo [291] KATAKANA LETTER ARCHAIC E..KATAKANA LETTER ARCHAIC WU
{0x1B132, 0x1B132, prOLetter}, // Lo HIRAGANA LETTER SMALL KO
{0x1B150, 0x1B152, prOLetter}, // Lo [3] HIRAGANA LETTER SMALL WI..HIRAGANA LETTER SMALL WO
{0x1B155, 0x1B155, prOLetter}, // Lo KATAKANA LETTER SMALL KO
{0x1B164, 0x1B167, prOLetter}, // Lo [4] KATAKANA LETTER SMALL WI..KATAKANA LETTER SMALL N
{0x1B170, 0x1B2FB, prOLetter}, // Lo [396] NUSHU CHARACTER-1B170..NUSHU CHARACTER-1B2FB
{0x1BC00, 0x1BC6A, prOLetter}, // Lo [107] DUPLOYAN LETTER H..DUPLOYAN LETTER VOCALIC M
@@ -2738,11 +2760,14 @@ var sentenceBreakCodePoints = [][3]int{
{0x1DF00, 0x1DF09, prLower}, // L& [10] LATIN SMALL LETTER FENG DIGRAPH WITH TRILL..LATIN SMALL LETTER T WITH HOOK AND RETROFLEX HOOK
{0x1DF0A, 0x1DF0A, prOLetter}, // Lo LATIN LETTER RETROFLEX CLICK WITH RETROFLEX HOOK
{0x1DF0B, 0x1DF1E, prLower}, // L& [20] LATIN SMALL LETTER ESH WITH DOUBLE BAR..LATIN SMALL LETTER S WITH CURL
{0x1DF25, 0x1DF2A, prLower}, // L& [6] LATIN SMALL LETTER D WITH MID-HEIGHT LEFT HOOK..LATIN SMALL LETTER T WITH MID-HEIGHT LEFT HOOK
{0x1E000, 0x1E006, prExtend}, // Mn [7] COMBINING GLAGOLITIC LETTER AZU..COMBINING GLAGOLITIC LETTER ZHIVETE
{0x1E008, 0x1E018, prExtend}, // Mn [17] COMBINING GLAGOLITIC LETTER ZEMLJA..COMBINING GLAGOLITIC LETTER HERU
{0x1E01B, 0x1E021, prExtend}, // Mn [7] COMBINING GLAGOLITIC LETTER SHTA..COMBINING GLAGOLITIC LETTER YATI
{0x1E023, 0x1E024, prExtend}, // Mn [2] COMBINING GLAGOLITIC LETTER YU..COMBINING GLAGOLITIC LETTER SMALL YUS
{0x1E026, 0x1E02A, prExtend}, // Mn [5] COMBINING GLAGOLITIC LETTER YO..COMBINING GLAGOLITIC LETTER FITA
{0x1E030, 0x1E06D, prLower}, // Lm [62] MODIFIER LETTER CYRILLIC SMALL A..MODIFIER LETTER CYRILLIC SMALL STRAIGHT U WITH STROKE
{0x1E08F, 0x1E08F, prExtend}, // Mn COMBINING CYRILLIC SMALL LETTER BYELORUSSIAN-UKRAINIAN I
{0x1E100, 0x1E12C, prOLetter}, // Lo [45] NYIAKENG PUACHUE HMONG LETTER MA..NYIAKENG PUACHUE HMONG LETTER W
{0x1E130, 0x1E136, prExtend}, // Mn [7] NYIAKENG PUACHUE HMONG TONE-B..NYIAKENG PUACHUE HMONG TONE-D
{0x1E137, 0x1E13D, prOLetter}, // Lm [7] NYIAKENG PUACHUE HMONG SIGN FOR PERSON..NYIAKENG PUACHUE HMONG SYLLABLE LENGTHENER
@@ -2753,6 +2778,10 @@ var sentenceBreakCodePoints = [][3]int{
{0x1E2C0, 0x1E2EB, prOLetter}, // Lo [44] WANCHO LETTER AA..WANCHO LETTER YIH
{0x1E2EC, 0x1E2EF, prExtend}, // Mn [4] WANCHO TONE TUP..WANCHO TONE KOINI
{0x1E2F0, 0x1E2F9, prNumeric}, // Nd [10] WANCHO DIGIT ZERO..WANCHO DIGIT NINE
{0x1E4D0, 0x1E4EA, prOLetter}, // Lo [27] NAG MUNDARI LETTER O..NAG MUNDARI LETTER ELL
{0x1E4EB, 0x1E4EB, prOLetter}, // Lm NAG MUNDARI SIGN OJOD
{0x1E4EC, 0x1E4EF, prExtend}, // Mn [4] NAG MUNDARI SIGN MUHOR..NAG MUNDARI SIGN SUTUH
{0x1E4F0, 0x1E4F9, prNumeric}, // Nd [10] NAG MUNDARI DIGIT ZERO..NAG MUNDARI DIGIT NINE
{0x1E7E0, 0x1E7E6, prOLetter}, // Lo [7] ETHIOPIC SYLLABLE HHYA..ETHIOPIC SYLLABLE HHYO
{0x1E7E8, 0x1E7EB, prOLetter}, // Lo [4] ETHIOPIC SYLLABLE GURAGE HHWA..ETHIOPIC SYLLABLE HHWE
{0x1E7ED, 0x1E7EE, prOLetter}, // Lo [2] ETHIOPIC SYLLABLE GURAGE MWI..ETHIOPIC SYLLABLE GURAGE MWEE
@@ -2803,12 +2832,13 @@ var sentenceBreakCodePoints = [][3]int{
{0x1F676, 0x1F678, prClose}, // So [3] SANS-SERIF HEAVY DOUBLE TURNED COMMA QUOTATION MARK ORNAMENT..SANS-SERIF HEAVY LOW DOUBLE COMMA QUOTATION MARK ORNAMENT
{0x1FBF0, 0x1FBF9, prNumeric}, // Nd [10] SEGMENTED DIGIT ZERO..SEGMENTED DIGIT NINE
{0x20000, 0x2A6DF, prOLetter}, // Lo [42720] CJK UNIFIED IDEOGRAPH-20000..CJK UNIFIED IDEOGRAPH-2A6DF
{0x2A700, 0x2B738, prOLetter}, // Lo [4153] CJK UNIFIED IDEOGRAPH-2A700..CJK UNIFIED IDEOGRAPH-2B738
{0x2A700, 0x2B739, prOLetter}, // Lo [4154] CJK UNIFIED IDEOGRAPH-2A700..CJK UNIFIED IDEOGRAPH-2B739
{0x2B740, 0x2B81D, prOLetter}, // Lo [222] CJK UNIFIED IDEOGRAPH-2B740..CJK UNIFIED IDEOGRAPH-2B81D
{0x2B820, 0x2CEA1, prOLetter}, // Lo [5762] CJK UNIFIED IDEOGRAPH-2B820..CJK UNIFIED IDEOGRAPH-2CEA1
{0x2CEB0, 0x2EBE0, prOLetter}, // Lo [7473] CJK UNIFIED IDEOGRAPH-2CEB0..CJK UNIFIED IDEOGRAPH-2EBE0
{0x2F800, 0x2FA1D, prOLetter}, // Lo [542] CJK COMPATIBILITY IDEOGRAPH-2F800..CJK COMPATIBILITY IDEOGRAPH-2FA1D
{0x30000, 0x3134A, prOLetter}, // Lo [4939] CJK UNIFIED IDEOGRAPH-30000..CJK UNIFIED IDEOGRAPH-3134A
{0x31350, 0x323AF, prOLetter}, // Lo [4192] CJK UNIFIED IDEOGRAPH-31350..CJK UNIFIED IDEOGRAPH-323AF
{0xE0001, 0xE0001, prFormat}, // Cf LANGUAGE TAG
{0xE0020, 0xE007F, prExtend}, // Cf [96] TAG SPACE..CANCEL TAG
{0xE0100, 0xE01EF, prExtend}, // Mn [240] VARIATION SELECTOR-17..VARIATION SELECTOR-256

View File

@@ -18,104 +18,178 @@ const (
sbSB8aSp
)
// The sentence break parser's breaking instructions.
const (
sbDontBreak = iota
sbBreak
)
// The sentence break parser's state transitions. It's anologous to
// grTransitions, see comments there for details. Unicode version 14.0.0.
var sbTransitions = map[[2]int][3]int{
// sbTransitions implements the sentence break parser's state transitions. It's
// anologous to [grTransitions], see comments there for details.
//
// Unicode version 15.0.0.
func sbTransitions(state, prop int) (newState int, sentenceBreak bool, rule int) {
switch uint64(state) | uint64(prop)<<32 {
// SB3.
{sbAny, prCR}: {sbCR, sbDontBreak, 9990},
{sbCR, prLF}: {sbParaSep, sbDontBreak, 30},
case sbAny | prCR<<32:
return sbCR, false, 9990
case sbCR | prLF<<32:
return sbParaSep, false, 30
// SB4.
{sbAny, prSep}: {sbParaSep, sbDontBreak, 9990},
{sbAny, prLF}: {sbParaSep, sbDontBreak, 9990},
{sbParaSep, prAny}: {sbAny, sbBreak, 40},
{sbCR, prAny}: {sbAny, sbBreak, 40},
case sbAny | prSep<<32:
return sbParaSep, false, 9990
case sbAny | prLF<<32:
return sbParaSep, false, 9990
case sbParaSep | prAny<<32:
return sbAny, true, 40
case sbCR | prAny<<32:
return sbAny, true, 40
// SB6.
{sbAny, prATerm}: {sbATerm, sbDontBreak, 9990},
{sbATerm, prNumeric}: {sbAny, sbDontBreak, 60},
{sbSB7, prNumeric}: {sbAny, sbDontBreak, 60}, // Because ATerm also appears in SB7.
case sbAny | prATerm<<32:
return sbATerm, false, 9990
case sbATerm | prNumeric<<32:
return sbAny, false, 60
case sbSB7 | prNumeric<<32:
return sbAny, false, 60 // Because ATerm also appears in SB7.
// SB7.
{sbAny, prUpper}: {sbUpper, sbDontBreak, 9990},
{sbAny, prLower}: {sbLower, sbDontBreak, 9990},
{sbUpper, prATerm}: {sbSB7, sbDontBreak, 70},
{sbLower, prATerm}: {sbSB7, sbDontBreak, 70},
{sbSB7, prUpper}: {sbUpper, sbDontBreak, 70},
case sbAny | prUpper<<32:
return sbUpper, false, 9990
case sbAny | prLower<<32:
return sbLower, false, 9990
case sbUpper | prATerm<<32:
return sbSB7, false, 70
case sbLower | prATerm<<32:
return sbSB7, false, 70
case sbSB7 | prUpper<<32:
return sbUpper, false, 70
// SB8a.
{sbAny, prSTerm}: {sbSTerm, sbDontBreak, 9990},
{sbATerm, prSContinue}: {sbAny, sbDontBreak, 81},
{sbATerm, prATerm}: {sbATerm, sbDontBreak, 81},
{sbATerm, prSTerm}: {sbSTerm, sbDontBreak, 81},
{sbSB7, prSContinue}: {sbAny, sbDontBreak, 81},
{sbSB7, prATerm}: {sbATerm, sbDontBreak, 81},
{sbSB7, prSTerm}: {sbSTerm, sbDontBreak, 81},
{sbSB8Close, prSContinue}: {sbAny, sbDontBreak, 81},
{sbSB8Close, prATerm}: {sbATerm, sbDontBreak, 81},
{sbSB8Close, prSTerm}: {sbSTerm, sbDontBreak, 81},
{sbSB8Sp, prSContinue}: {sbAny, sbDontBreak, 81},
{sbSB8Sp, prATerm}: {sbATerm, sbDontBreak, 81},
{sbSB8Sp, prSTerm}: {sbSTerm, sbDontBreak, 81},
{sbSTerm, prSContinue}: {sbAny, sbDontBreak, 81},
{sbSTerm, prATerm}: {sbATerm, sbDontBreak, 81},
{sbSTerm, prSTerm}: {sbSTerm, sbDontBreak, 81},
{sbSB8aClose, prSContinue}: {sbAny, sbDontBreak, 81},
{sbSB8aClose, prATerm}: {sbATerm, sbDontBreak, 81},
{sbSB8aClose, prSTerm}: {sbSTerm, sbDontBreak, 81},
{sbSB8aSp, prSContinue}: {sbAny, sbDontBreak, 81},
{sbSB8aSp, prATerm}: {sbATerm, sbDontBreak, 81},
{sbSB8aSp, prSTerm}: {sbSTerm, sbDontBreak, 81},
case sbAny | prSTerm<<32:
return sbSTerm, false, 9990
case sbATerm | prSContinue<<32:
return sbAny, false, 81
case sbATerm | prATerm<<32:
return sbATerm, false, 81
case sbATerm | prSTerm<<32:
return sbSTerm, false, 81
case sbSB7 | prSContinue<<32:
return sbAny, false, 81
case sbSB7 | prATerm<<32:
return sbATerm, false, 81
case sbSB7 | prSTerm<<32:
return sbSTerm, false, 81
case sbSB8Close | prSContinue<<32:
return sbAny, false, 81
case sbSB8Close | prATerm<<32:
return sbATerm, false, 81
case sbSB8Close | prSTerm<<32:
return sbSTerm, false, 81
case sbSB8Sp | prSContinue<<32:
return sbAny, false, 81
case sbSB8Sp | prATerm<<32:
return sbATerm, false, 81
case sbSB8Sp | prSTerm<<32:
return sbSTerm, false, 81
case sbSTerm | prSContinue<<32:
return sbAny, false, 81
case sbSTerm | prATerm<<32:
return sbATerm, false, 81
case sbSTerm | prSTerm<<32:
return sbSTerm, false, 81
case sbSB8aClose | prSContinue<<32:
return sbAny, false, 81
case sbSB8aClose | prATerm<<32:
return sbATerm, false, 81
case sbSB8aClose | prSTerm<<32:
return sbSTerm, false, 81
case sbSB8aSp | prSContinue<<32:
return sbAny, false, 81
case sbSB8aSp | prATerm<<32:
return sbATerm, false, 81
case sbSB8aSp | prSTerm<<32:
return sbSTerm, false, 81
// SB9.
{sbATerm, prClose}: {sbSB8Close, sbDontBreak, 90},
{sbSB7, prClose}: {sbSB8Close, sbDontBreak, 90},
{sbSB8Close, prClose}: {sbSB8Close, sbDontBreak, 90},
{sbATerm, prSp}: {sbSB8Sp, sbDontBreak, 90},
{sbSB7, prSp}: {sbSB8Sp, sbDontBreak, 90},
{sbSB8Close, prSp}: {sbSB8Sp, sbDontBreak, 90},
{sbSTerm, prClose}: {sbSB8aClose, sbDontBreak, 90},
{sbSB8aClose, prClose}: {sbSB8aClose, sbDontBreak, 90},
{sbSTerm, prSp}: {sbSB8aSp, sbDontBreak, 90},
{sbSB8aClose, prSp}: {sbSB8aSp, sbDontBreak, 90},
{sbATerm, prSep}: {sbParaSep, sbDontBreak, 90},
{sbATerm, prCR}: {sbParaSep, sbDontBreak, 90},
{sbATerm, prLF}: {sbParaSep, sbDontBreak, 90},
{sbSB7, prSep}: {sbParaSep, sbDontBreak, 90},
{sbSB7, prCR}: {sbParaSep, sbDontBreak, 90},
{sbSB7, prLF}: {sbParaSep, sbDontBreak, 90},
{sbSB8Close, prSep}: {sbParaSep, sbDontBreak, 90},
{sbSB8Close, prCR}: {sbParaSep, sbDontBreak, 90},
{sbSB8Close, prLF}: {sbParaSep, sbDontBreak, 90},
{sbSTerm, prSep}: {sbParaSep, sbDontBreak, 90},
{sbSTerm, prCR}: {sbParaSep, sbDontBreak, 90},
{sbSTerm, prLF}: {sbParaSep, sbDontBreak, 90},
{sbSB8aClose, prSep}: {sbParaSep, sbDontBreak, 90},
{sbSB8aClose, prCR}: {sbParaSep, sbDontBreak, 90},
{sbSB8aClose, prLF}: {sbParaSep, sbDontBreak, 90},
case sbATerm | prClose<<32:
return sbSB8Close, false, 90
case sbSB7 | prClose<<32:
return sbSB8Close, false, 90
case sbSB8Close | prClose<<32:
return sbSB8Close, false, 90
case sbATerm | prSp<<32:
return sbSB8Sp, false, 90
case sbSB7 | prSp<<32:
return sbSB8Sp, false, 90
case sbSB8Close | prSp<<32:
return sbSB8Sp, false, 90
case sbSTerm | prClose<<32:
return sbSB8aClose, false, 90
case sbSB8aClose | prClose<<32:
return sbSB8aClose, false, 90
case sbSTerm | prSp<<32:
return sbSB8aSp, false, 90
case sbSB8aClose | prSp<<32:
return sbSB8aSp, false, 90
case sbATerm | prSep<<32:
return sbParaSep, false, 90
case sbATerm | prCR<<32:
return sbParaSep, false, 90
case sbATerm | prLF<<32:
return sbParaSep, false, 90
case sbSB7 | prSep<<32:
return sbParaSep, false, 90
case sbSB7 | prCR<<32:
return sbParaSep, false, 90
case sbSB7 | prLF<<32:
return sbParaSep, false, 90
case sbSB8Close | prSep<<32:
return sbParaSep, false, 90
case sbSB8Close | prCR<<32:
return sbParaSep, false, 90
case sbSB8Close | prLF<<32:
return sbParaSep, false, 90
case sbSTerm | prSep<<32:
return sbParaSep, false, 90
case sbSTerm | prCR<<32:
return sbParaSep, false, 90
case sbSTerm | prLF<<32:
return sbParaSep, false, 90
case sbSB8aClose | prSep<<32:
return sbParaSep, false, 90
case sbSB8aClose | prCR<<32:
return sbParaSep, false, 90
case sbSB8aClose | prLF<<32:
return sbParaSep, false, 90
// SB10.
{sbSB8Sp, prSp}: {sbSB8Sp, sbDontBreak, 100},
{sbSB8aSp, prSp}: {sbSB8aSp, sbDontBreak, 100},
{sbSB8Sp, prSep}: {sbParaSep, sbDontBreak, 100},
{sbSB8Sp, prCR}: {sbParaSep, sbDontBreak, 100},
{sbSB8Sp, prLF}: {sbParaSep, sbDontBreak, 100},
case sbSB8Sp | prSp<<32:
return sbSB8Sp, false, 100
case sbSB8aSp | prSp<<32:
return sbSB8aSp, false, 100
case sbSB8Sp | prSep<<32:
return sbParaSep, false, 100
case sbSB8Sp | prCR<<32:
return sbParaSep, false, 100
case sbSB8Sp | prLF<<32:
return sbParaSep, false, 100
// SB11.
{sbATerm, prAny}: {sbAny, sbBreak, 110},
{sbSB7, prAny}: {sbAny, sbBreak, 110},
{sbSB8Close, prAny}: {sbAny, sbBreak, 110},
{sbSB8Sp, prAny}: {sbAny, sbBreak, 110},
{sbSTerm, prAny}: {sbAny, sbBreak, 110},
{sbSB8aClose, prAny}: {sbAny, sbBreak, 110},
{sbSB8aSp, prAny}: {sbAny, sbBreak, 110},
case sbATerm | prAny<<32:
return sbAny, true, 110
case sbSB7 | prAny<<32:
return sbAny, true, 110
case sbSB8Close | prAny<<32:
return sbAny, true, 110
case sbSB8Sp | prAny<<32:
return sbAny, true, 110
case sbSTerm | prAny<<32:
return sbAny, true, 110
case sbSB8aClose | prAny<<32:
return sbAny, true, 110
case sbSB8aSp | prAny<<32:
return sbAny, true, 110
// We'll always break after ParaSep due to SB4.
default:
return -1, false, -1
}
}
// transitionSentenceBreakState determines the new state of the sentence break
@@ -141,30 +215,27 @@ func transitionSentenceBreakState(state int, r rune, b []byte, str string) (newS
// Find the applicable transition in the table.
var rule int
transition, ok := sbTransitions[[2]int{state, nextProperty}]
if ok {
// We have a specific transition. We'll use it.
newState, sentenceBreak, rule = transition[0], transition[1] == sbBreak, transition[2]
} else {
newState, sentenceBreak, rule = sbTransitions(state, nextProperty)
if newState < 0 {
// No specific transition found. Try the less specific ones.
transAnyProp, okAnyProp := sbTransitions[[2]int{state, prAny}]
transAnyState, okAnyState := sbTransitions[[2]int{sbAny, nextProperty}]
if okAnyProp && okAnyState {
anyPropState, anyPropProp, anyPropRule := sbTransitions(state, prAny)
anyStateState, anyStateProp, anyStateRule := sbTransitions(sbAny, nextProperty)
if anyPropState >= 0 && anyStateState >= 0 {
// Both apply. We'll use a mix (see comments for grTransitions).
newState, sentenceBreak, rule = transAnyState[0], transAnyState[1] == sbBreak, transAnyState[2]
if transAnyProp[2] < transAnyState[2] {
sentenceBreak, rule = transAnyProp[1] == sbBreak, transAnyProp[2]
newState, sentenceBreak, rule = anyStateState, anyStateProp, anyStateRule
if anyPropRule < anyStateRule {
sentenceBreak, rule = anyPropProp, anyPropRule
}
} else if okAnyProp {
} else if anyPropState >= 0 {
// We only have a specific state.
newState, sentenceBreak, rule = transAnyProp[0], transAnyProp[1] == sbBreak, transAnyProp[2]
newState, sentenceBreak, rule = anyPropState, anyPropProp, anyPropRule
// This branch will probably never be reached because okAnyState will
// always be true given the current transition map. But we keep it here
// for future modifications to the transition map where this may not be
// true anymore.
} else if okAnyState {
} else if anyStateState >= 0 {
// We only have a specific property.
newState, sentenceBreak, rule = transAnyState[0], transAnyState[1] == sbBreak, transAnyState[2]
newState, sentenceBreak, rule = anyStateState, anyStateProp, anyStateRule
} else {
// No known transition. SB999: Any × Any.
newState, sentenceBreak, rule = sbAny, false, 9990

View File

@@ -83,10 +83,12 @@ const (
// has much better performance and makes no allocations. It lends itself well to
// large byte slices.
//
// Note that in accordance with UAX #14 LB3, the final segment will end with
// Note that in accordance with [UAX #14 LB3], the final segment will end with
// a mandatory line break (boundaries&MaskLine == LineMustBreak). You can choose
// to ignore this by checking if the length of the "rest" slice is 0 and calling
// [HasTrailingLineBreak] or [HasTrailingLineBreakInString] on the last rune.
//
// [UAX #14 LB3]: https://www.unicode.org/reports/tr14/#Algorithm
func Step(b []byte, state int) (cluster, rest []byte, boundaries int, newState int) {
// An empty byte slice returns nothing.
if len(b) == 0 {
@@ -98,7 +100,7 @@ func Step(b []byte, state int) (cluster, rest []byte, boundaries int, newState i
if len(b) <= length { // If we're already past the end, there is nothing else to parse.
var prop int
if state < 0 {
prop = property(graphemeCodePoints, r)
prop = propertyGraphemes(r)
} else {
prop = state >> shiftPropState
}
@@ -148,16 +150,14 @@ func Step(b []byte, state int) (cluster, rest []byte, boundaries int, newState i
return b[:length], b[length:], boundary, graphemeState | (wordState << shiftWordState) | (sentenceState << shiftSentenceState) | (lineState << shiftLineState) | (prop << shiftPropState)
}
if r == vs16 {
width = 2
} else if firstProp != prExtendedPictographic && firstProp != prRegionalIndicator && firstProp != prL {
width += runeWidth(r, prop)
} else if firstProp == prExtendedPictographic {
if firstProp == prExtendedPictographic {
if r == vs15 {
width = 1
} else {
} else if r == vs16 {
width = 2
}
} else if firstProp != prRegionalIndicator && firstProp != prL {
width += runeWidth(r, prop)
}
length += l
@@ -177,7 +177,7 @@ func StepString(str string, state int) (cluster, rest string, boundaries int, ne
// Extract the first rune.
r, length := utf8.DecodeRuneInString(str)
if len(str) <= length { // If we're already past the end, there is nothing else to parse.
prop := property(graphemeCodePoints, r)
prop := propertyGraphemes(r)
return str, "", LineMustBreak | (1 << shiftWord) | (1 << shiftSentence) | (runeWidth(r, prop) << ShiftWidth), grAny | (wbAny << shiftWordState) | (sbAny << shiftSentenceState) | (lbAny << shiftLineState)
}
@@ -224,16 +224,14 @@ func StepString(str string, state int) (cluster, rest string, boundaries int, ne
return str[:length], str[length:], boundary, graphemeState | (wordState << shiftWordState) | (sentenceState << shiftSentenceState) | (lineState << shiftLineState) | (prop << shiftPropState)
}
if r == vs16 {
width = 2
} else if firstProp != prExtendedPictographic && firstProp != prRegionalIndicator && firstProp != prL {
width += runeWidth(r, prop)
} else if firstProp == prExtendedPictographic {
if firstProp == prExtendedPictographic {
if r == vs15 {
width = 1
} else {
} else if r == vs16 {
width = 2
}
} else if firstProp != prRegionalIndicator && firstProp != prL {
width += runeWidth(r, prop)
}
length += l

View File

@@ -1,5 +1,10 @@
package uniseg
// EastAsianAmbiguousWidth specifies the monospace width for East Asian
// characters classified as Ambiguous. The default is 1 but some rare fonts
// render them with a width of 2.
var EastAsianAmbiguousWidth = 1
// runeWidth returns the monospace width for the given rune. The provided
// grapheme property is a value mapped by the [graphemeCodePoints] table.
//
@@ -33,9 +38,11 @@ func runeWidth(r rune, graphemeProperty int) int {
return 4
}
switch property(eastAsianWidth, r) {
switch propertyEastAsianWidth(r) {
case prW, prF:
return 2
case prA:
return EastAsianAmbiguousWidth
}
return 1

View File

@@ -3,7 +3,7 @@ package uniseg
import "unicode/utf8"
// FirstWord returns the first word found in the given byte slice according to
// the rules of Unicode Standard Annex #29, Word Boundaries. This function can
// the rules of [Unicode Standard Annex #29, Word Boundaries]. This function can
// be called continuously to extract all words from a byte slice, as illustrated
// in the example below.
//
@@ -17,6 +17,8 @@ import "unicode/utf8"
// the sub-slice of the input slice containing the identified word.
//
// Given an empty byte slice "b", the function returns nil values.
//
// [Unicode Standard Annex #29, Word Boundaries]: http://unicode.org/reports/tr29/#Word_Boundaries
func FirstWord(b []byte, state int) (word, rest []byte, newState int) {
// An empty byte slice returns nothing.
if len(b) == 0 {

View File

@@ -1,13 +1,13 @@
package uniseg
// Code generated via go generate from gen_properties.go. DO NOT EDIT.
package uniseg
// workBreakCodePoints are taken from
// https://www.unicode.org/Public/14.0.0/ucd/auxiliary/WordBreakProperty.txt
// https://www.unicode.org/Public/15.0.0/ucd/auxiliary/WordBreakProperty.txt
// and
// https://unicode.org/Public/14.0.0/ucd/emoji/emoji-data.txt
// https://unicode.org/Public/15.0.0/ucd/emoji/emoji-data.txt
// ("Extended_Pictographic" only)
// on September 10, 2022. See https://www.unicode.org/license.html for the Unicode
// on September 5, 2023. See https://www.unicode.org/license.html for the Unicode
// license agreement.
var workBreakCodePoints = [][3]int{
{0x000A, 0x000A, prLF}, // Cc <control-000A>
@@ -318,6 +318,7 @@ var workBreakCodePoints = [][3]int{
{0x0CE2, 0x0CE3, prExtend}, // Mn [2] KANNADA VOWEL SIGN VOCALIC L..KANNADA VOWEL SIGN VOCALIC LL
{0x0CE6, 0x0CEF, prNumeric}, // Nd [10] KANNADA DIGIT ZERO..KANNADA DIGIT NINE
{0x0CF1, 0x0CF2, prALetter}, // Lo [2] KANNADA SIGN JIHVAMULIYA..KANNADA SIGN UPADHMANIYA
{0x0CF3, 0x0CF3, prExtend}, // Mc KANNADA SIGN COMBINING ANUSVARA ABOVE RIGHT
{0x0D00, 0x0D01, prExtend}, // Mn [2] MALAYALAM SIGN COMBINING ANUSVARA ABOVE..MALAYALAM SIGN CANDRABINDU
{0x0D02, 0x0D03, prExtend}, // Mc [2] MALAYALAM SIGN ANUSVARA..MALAYALAM SIGN VISARGA
{0x0D04, 0x0D0C, prALetter}, // Lo [9] MALAYALAM LETTER VEDIC ANUSVARA..MALAYALAM LETTER VOCALIC L
@@ -357,7 +358,7 @@ var workBreakCodePoints = [][3]int{
{0x0E50, 0x0E59, prNumeric}, // Nd [10] THAI DIGIT ZERO..THAI DIGIT NINE
{0x0EB1, 0x0EB1, prExtend}, // Mn LAO VOWEL SIGN MAI KAN
{0x0EB4, 0x0EBC, prExtend}, // Mn [9] LAO VOWEL SIGN I..LAO SEMIVOWEL SIGN LO
{0x0EC8, 0x0ECD, prExtend}, // Mn [6] LAO TONE MAI EK..LAO NIGGAHITA
{0x0EC8, 0x0ECE, prExtend}, // Mn [7] LAO TONE MAI EK..LAO YAMAKKAN
{0x0ED0, 0x0ED9, prNumeric}, // Nd [10] LAO DIGIT ZERO..LAO DIGIT NINE
{0x0F00, 0x0F00, prALetter}, // Lo TIBETAN SYLLABLE OM
{0x0F18, 0x0F19, prExtend}, // Mn [2] TIBETAN ASTROLOGICAL SIGN -KHYUD PA..TIBETAN ASTROLOGICAL SIGN SDONG TSHUGS
@@ -1093,6 +1094,7 @@ var workBreakCodePoints = [][3]int{
{0x10E80, 0x10EA9, prALetter}, // Lo [42] YEZIDI LETTER ELIF..YEZIDI LETTER ET
{0x10EAB, 0x10EAC, prExtend}, // Mn [2] YEZIDI COMBINING HAMZA MARK..YEZIDI COMBINING MADDA MARK
{0x10EB0, 0x10EB1, prALetter}, // Lo [2] YEZIDI LETTER LAM WITH DOT ABOVE..YEZIDI LETTER YOT WITH CIRCUMFLEX ABOVE
{0x10EFD, 0x10EFF, prExtend}, // Mn [3] ARABIC SMALL LOW WORD SAKTA..ARABIC SMALL LOW WORD MADDA
{0x10F00, 0x10F1C, prALetter}, // Lo [29] OLD SOGDIAN LETTER ALEPH..OLD SOGDIAN LETTER FINAL TAW WITH VERTICAL TAIL
{0x10F27, 0x10F27, prALetter}, // Lo OLD SOGDIAN LIGATURE AYIN-DALETH
{0x10F30, 0x10F45, prALetter}, // Lo [22] SOGDIAN LETTER ALEPH..SOGDIAN INDEPENDENT SHIN
@@ -1157,6 +1159,8 @@ var workBreakCodePoints = [][3]int{
{0x11235, 0x11235, prExtend}, // Mc KHOJKI SIGN VIRAMA
{0x11236, 0x11237, prExtend}, // Mn [2] KHOJKI SIGN NUKTA..KHOJKI SIGN SHADDA
{0x1123E, 0x1123E, prExtend}, // Mn KHOJKI SIGN SUKUN
{0x1123F, 0x11240, prALetter}, // Lo [2] KHOJKI LETTER QA..KHOJKI LETTER SHORT I
{0x11241, 0x11241, prExtend}, // Mn KHOJKI VOWEL SIGN VOCALIC R
{0x11280, 0x11286, prALetter}, // Lo [7] MULTANI LETTER A..MULTANI LETTER GA
{0x11288, 0x11288, prALetter}, // Lo MULTANI LETTER GHA
{0x1128A, 0x1128D, prALetter}, // Lo [4] MULTANI LETTER CA..MULTANI LETTER JJA
@@ -1337,13 +1341,28 @@ var workBreakCodePoints = [][3]int{
{0x11EE0, 0x11EF2, prALetter}, // Lo [19] MAKASAR LETTER KA..MAKASAR ANGKA
{0x11EF3, 0x11EF4, prExtend}, // Mn [2] MAKASAR VOWEL SIGN I..MAKASAR VOWEL SIGN U
{0x11EF5, 0x11EF6, prExtend}, // Mc [2] MAKASAR VOWEL SIGN E..MAKASAR VOWEL SIGN O
{0x11F00, 0x11F01, prExtend}, // Mn [2] KAWI SIGN CANDRABINDU..KAWI SIGN ANUSVARA
{0x11F02, 0x11F02, prALetter}, // Lo KAWI SIGN REPHA
{0x11F03, 0x11F03, prExtend}, // Mc KAWI SIGN VISARGA
{0x11F04, 0x11F10, prALetter}, // Lo [13] KAWI LETTER A..KAWI LETTER O
{0x11F12, 0x11F33, prALetter}, // Lo [34] KAWI LETTER KA..KAWI LETTER JNYA
{0x11F34, 0x11F35, prExtend}, // Mc [2] KAWI VOWEL SIGN AA..KAWI VOWEL SIGN ALTERNATE AA
{0x11F36, 0x11F3A, prExtend}, // Mn [5] KAWI VOWEL SIGN I..KAWI VOWEL SIGN VOCALIC R
{0x11F3E, 0x11F3F, prExtend}, // Mc [2] KAWI VOWEL SIGN E..KAWI VOWEL SIGN AI
{0x11F40, 0x11F40, prExtend}, // Mn KAWI VOWEL SIGN EU
{0x11F41, 0x11F41, prExtend}, // Mc KAWI SIGN KILLER
{0x11F42, 0x11F42, prExtend}, // Mn KAWI CONJOINER
{0x11F50, 0x11F59, prNumeric}, // Nd [10] KAWI DIGIT ZERO..KAWI DIGIT NINE
{0x11FB0, 0x11FB0, prALetter}, // Lo LISU LETTER YHA
{0x12000, 0x12399, prALetter}, // Lo [922] CUNEIFORM SIGN A..CUNEIFORM SIGN U U
{0x12400, 0x1246E, prALetter}, // Nl [111] CUNEIFORM NUMERIC SIGN TWO ASH..CUNEIFORM NUMERIC SIGN NINE U VARIANT FORM
{0x12480, 0x12543, prALetter}, // Lo [196] CUNEIFORM SIGN AB TIMES NUN TENU..CUNEIFORM SIGN ZU5 TIMES THREE DISH TENU
{0x12F90, 0x12FF0, prALetter}, // Lo [97] CYPRO-MINOAN SIGN CM001..CYPRO-MINOAN SIGN CM114
{0x13000, 0x1342E, prALetter}, // Lo [1071] EGYPTIAN HIEROGLYPH A001..EGYPTIAN HIEROGLYPH AA032
{0x13430, 0x13438, prFormat}, // Cf [9] EGYPTIAN HIEROGLYPH VERTICAL JOINER..EGYPTIAN HIEROGLYPH END SEGMENT
{0x13000, 0x1342F, prALetter}, // Lo [1072] EGYPTIAN HIEROGLYPH A001..EGYPTIAN HIEROGLYPH V011D
{0x13430, 0x1343F, prFormat}, // Cf [16] EGYPTIAN HIEROGLYPH VERTICAL JOINER..EGYPTIAN HIEROGLYPH END WALLED ENCLOSURE
{0x13440, 0x13440, prExtend}, // Mn EGYPTIAN HIEROGLYPH MIRROR HORIZONTALLY
{0x13441, 0x13446, prALetter}, // Lo [6] EGYPTIAN HIEROGLYPH FULL BLANK..EGYPTIAN HIEROGLYPH WIDE LOST SIGN
{0x13447, 0x13455, prExtend}, // Mn [15] EGYPTIAN HIEROGLYPH MODIFIER DAMAGED AT TOP START..EGYPTIAN HIEROGLYPH MODIFIER DAMAGED
{0x14400, 0x14646, prALetter}, // Lo [583] ANATOLIAN HIEROGLYPH A001..ANATOLIAN HIEROGLYPH A530
{0x16800, 0x16A38, prALetter}, // Lo [569] BAMUM LETTER PHASE-A NGKUE MFON..BAMUM LETTER PHASE-F VUEQ
{0x16A40, 0x16A5E, prALetter}, // Lo [31] MRO LETTER TA..MRO LETTER TEK
@@ -1374,6 +1393,7 @@ var workBreakCodePoints = [][3]int{
{0x1AFFD, 0x1AFFE, prKatakana}, // Lm [2] KATAKANA LETTER MINNAN NASALIZED TONE-7..KATAKANA LETTER MINNAN NASALIZED TONE-8
{0x1B000, 0x1B000, prKatakana}, // Lo KATAKANA LETTER ARCHAIC E
{0x1B120, 0x1B122, prKatakana}, // Lo [3] KATAKANA LETTER ARCHAIC YI..KATAKANA LETTER ARCHAIC WU
{0x1B155, 0x1B155, prKatakana}, // Lo KATAKANA LETTER SMALL KO
{0x1B164, 0x1B167, prKatakana}, // Lo [4] KATAKANA LETTER SMALL WI..KATAKANA LETTER SMALL N
{0x1BC00, 0x1BC6A, prALetter}, // Lo [107] DUPLOYAN LETTER H..DUPLOYAN LETTER VOCALIC M
{0x1BC70, 0x1BC7C, prALetter}, // Lo [13] DUPLOYAN AFFIX LEFT HORIZONTAL SECANT..DUPLOYAN AFFIX ATTACHED TANGENT HOOK
@@ -1431,11 +1451,14 @@ var workBreakCodePoints = [][3]int{
{0x1DF00, 0x1DF09, prALetter}, // L& [10] LATIN SMALL LETTER FENG DIGRAPH WITH TRILL..LATIN SMALL LETTER T WITH HOOK AND RETROFLEX HOOK
{0x1DF0A, 0x1DF0A, prALetter}, // Lo LATIN LETTER RETROFLEX CLICK WITH RETROFLEX HOOK
{0x1DF0B, 0x1DF1E, prALetter}, // L& [20] LATIN SMALL LETTER ESH WITH DOUBLE BAR..LATIN SMALL LETTER S WITH CURL
{0x1DF25, 0x1DF2A, prALetter}, // L& [6] LATIN SMALL LETTER D WITH MID-HEIGHT LEFT HOOK..LATIN SMALL LETTER T WITH MID-HEIGHT LEFT HOOK
{0x1E000, 0x1E006, prExtend}, // Mn [7] COMBINING GLAGOLITIC LETTER AZU..COMBINING GLAGOLITIC LETTER ZHIVETE
{0x1E008, 0x1E018, prExtend}, // Mn [17] COMBINING GLAGOLITIC LETTER ZEMLJA..COMBINING GLAGOLITIC LETTER HERU
{0x1E01B, 0x1E021, prExtend}, // Mn [7] COMBINING GLAGOLITIC LETTER SHTA..COMBINING GLAGOLITIC LETTER YATI
{0x1E023, 0x1E024, prExtend}, // Mn [2] COMBINING GLAGOLITIC LETTER YU..COMBINING GLAGOLITIC LETTER SMALL YUS
{0x1E026, 0x1E02A, prExtend}, // Mn [5] COMBINING GLAGOLITIC LETTER YO..COMBINING GLAGOLITIC LETTER FITA
{0x1E030, 0x1E06D, prALetter}, // Lm [62] MODIFIER LETTER CYRILLIC SMALL A..MODIFIER LETTER CYRILLIC SMALL STRAIGHT U WITH STROKE
{0x1E08F, 0x1E08F, prExtend}, // Mn COMBINING CYRILLIC SMALL LETTER BYELORUSSIAN-UKRAINIAN I
{0x1E100, 0x1E12C, prALetter}, // Lo [45] NYIAKENG PUACHUE HMONG LETTER MA..NYIAKENG PUACHUE HMONG LETTER W
{0x1E130, 0x1E136, prExtend}, // Mn [7] NYIAKENG PUACHUE HMONG TONE-B..NYIAKENG PUACHUE HMONG TONE-D
{0x1E137, 0x1E13D, prALetter}, // Lm [7] NYIAKENG PUACHUE HMONG SIGN FOR PERSON..NYIAKENG PUACHUE HMONG SYLLABLE LENGTHENER
@@ -1446,6 +1469,10 @@ var workBreakCodePoints = [][3]int{
{0x1E2C0, 0x1E2EB, prALetter}, // Lo [44] WANCHO LETTER AA..WANCHO LETTER YIH
{0x1E2EC, 0x1E2EF, prExtend}, // Mn [4] WANCHO TONE TUP..WANCHO TONE KOINI
{0x1E2F0, 0x1E2F9, prNumeric}, // Nd [10] WANCHO DIGIT ZERO..WANCHO DIGIT NINE
{0x1E4D0, 0x1E4EA, prALetter}, // Lo [27] NAG MUNDARI LETTER O..NAG MUNDARI LETTER ELL
{0x1E4EB, 0x1E4EB, prALetter}, // Lm NAG MUNDARI SIGN OJOD
{0x1E4EC, 0x1E4EF, prExtend}, // Mn [4] NAG MUNDARI SIGN MUHOR..NAG MUNDARI SIGN SUTUH
{0x1E4F0, 0x1E4F9, prNumeric}, // Nd [10] NAG MUNDARI DIGIT ZERO..NAG MUNDARI DIGIT NINE
{0x1E7E0, 0x1E7E6, prALetter}, // Lo [7] ETHIOPIC SYLLABLE HHYA..ETHIOPIC SYLLABLE HHYO
{0x1E7E8, 0x1E7EB, prALetter}, // Lo [4] ETHIOPIC SYLLABLE GURAGE HHWA..ETHIOPIC SYLLABLE HHWE
{0x1E7ED, 0x1E7EE, prALetter}, // Lo [2] ETHIOPIC SYLLABLE GURAGE MWI..ETHIOPIC SYLLABLE GURAGE MWEE
@@ -1740,7 +1767,8 @@ var workBreakCodePoints = [][3]int{
{0x1F6D3, 0x1F6D4, prExtendedPictographic}, // E0.0 [2] (🛓..🛔) STUPA..PAGODA
{0x1F6D5, 0x1F6D5, prExtendedPictographic}, // E12.0 [1] (🛕) hindu temple
{0x1F6D6, 0x1F6D7, prExtendedPictographic}, // E13.0 [2] (🛖..🛗) hut..elevator
{0x1F6D8, 0x1F6DC, prExtendedPictographic}, // E0.0 [5] (🛘..🛜) <reserved-1F6D8>..<reserved-1F6DC>
{0x1F6D8, 0x1F6DB, prExtendedPictographic}, // E0.0 [4] (🛘..🛛) <reserved-1F6D8>..<reserved-1F6DB>
{0x1F6DC, 0x1F6DC, prExtendedPictographic}, // E15.0 [1] (🛜) wireless
{0x1F6DD, 0x1F6DF, prExtendedPictographic}, // E14.0 [3] (🛝..🛟) playground slide..ring buoy
{0x1F6E0, 0x1F6E5, prExtendedPictographic}, // E0.7 [6] (🛠️..🛥️) hammer and wrench..motor boat
{0x1F6E6, 0x1F6E8, prExtendedPictographic}, // E0.0 [3] (🛦..🛨) UP-POINTING MILITARY AIRPLANE..UP-POINTING SMALL AIRPLANE
@@ -1757,7 +1785,7 @@ var workBreakCodePoints = [][3]int{
{0x1F6FA, 0x1F6FA, prExtendedPictographic}, // E12.0 [1] (🛺) auto rickshaw
{0x1F6FB, 0x1F6FC, prExtendedPictographic}, // E13.0 [2] (🛻..🛼) pickup truck..roller skate
{0x1F6FD, 0x1F6FF, prExtendedPictographic}, // E0.0 [3] (🛽..🛿) <reserved-1F6FD>..<reserved-1F6FF>
{0x1F774, 0x1F77F, prExtendedPictographic}, // E0.0 [12] (🝴..🝿) <reserved-1F774>..<reserved-1F77F>
{0x1F774, 0x1F77F, prExtendedPictographic}, // E0.0 [12] (🝴..🝿) LOT OF FORTUNE..ORCUS
{0x1F7D5, 0x1F7DF, prExtendedPictographic}, // E0.0 [11] (🟕..🟟) CIRCLED TRIANGLE..<reserved-1F7DF>
{0x1F7E0, 0x1F7EB, prExtendedPictographic}, // E12.0 [12] (🟠..🟫) orange circle..brown square
{0x1F7EC, 0x1F7EF, prExtendedPictographic}, // E0.0 [4] (🟬..🟯) <reserved-1F7EC>..<reserved-1F7EF>
@@ -1816,30 +1844,37 @@ var workBreakCodePoints = [][3]int{
{0x1FA00, 0x1FA6F, prExtendedPictographic}, // E0.0 [112] (🨀..🩯) NEUTRAL CHESS KING..<reserved-1FA6F>
{0x1FA70, 0x1FA73, prExtendedPictographic}, // E12.0 [4] (🩰..🩳) ballet shoes..shorts
{0x1FA74, 0x1FA74, prExtendedPictographic}, // E13.0 [1] (🩴) thong sandal
{0x1FA75, 0x1FA77, prExtendedPictographic}, // E0.0 [3] (🩵..🩷) <reserved-1FA75>..<reserved-1FA77>
{0x1FA75, 0x1FA77, prExtendedPictographic}, // E15.0 [3] (🩵..🩷) light blue heart..pink heart
{0x1FA78, 0x1FA7A, prExtendedPictographic}, // E12.0 [3] (🩸..🩺) drop of blood..stethoscope
{0x1FA7B, 0x1FA7C, prExtendedPictographic}, // E14.0 [2] (🩻..🩼) x-ray..crutch
{0x1FA7D, 0x1FA7F, prExtendedPictographic}, // E0.0 [3] (🩽..🩿) <reserved-1FA7D>..<reserved-1FA7F>
{0x1FA80, 0x1FA82, prExtendedPictographic}, // E12.0 [3] (🪀..🪂) yo-yo..parachute
{0x1FA83, 0x1FA86, prExtendedPictographic}, // E13.0 [4] (🪃..🪆) boomerang..nesting dolls
{0x1FA87, 0x1FA8F, prExtendedPictographic}, // E0.0 [9] (🪇..🪏) <reserved-1FA87>..<reserved-1FA8F>
{0x1FA87, 0x1FA88, prExtendedPictographic}, // E15.0 [2] (🪇..🪈) maracas..flute
{0x1FA89, 0x1FA8F, prExtendedPictographic}, // E0.0 [7] (🪉..🪏) <reserved-1FA89>..<reserved-1FA8F>
{0x1FA90, 0x1FA95, prExtendedPictographic}, // E12.0 [6] (🪐..🪕) ringed planet..banjo
{0x1FA96, 0x1FAA8, prExtendedPictographic}, // E13.0 [19] (🪖..🪨) military helmet..rock
{0x1FAA9, 0x1FAAC, prExtendedPictographic}, // E14.0 [4] (🪩..🪬) mirror ball..hamsa
{0x1FAAD, 0x1FAAF, prExtendedPictographic}, // E0.0 [3] (🪭..🪯) <reserved-1FAAD>..<reserved-1FAAF>
{0x1FAAD, 0x1FAAF, prExtendedPictographic}, // E15.0 [3] (🪭..🪯) folding hand fan..khanda
{0x1FAB0, 0x1FAB6, prExtendedPictographic}, // E13.0 [7] (🪰..🪶) fly..feather
{0x1FAB7, 0x1FABA, prExtendedPictographic}, // E14.0 [4] (🪷..🪺) lotus..nest with eggs
{0x1FABB, 0x1FABF, prExtendedPictographic}, // E0.0 [5] (🪻..🪿) <reserved-1FABB>..<reserved-1FABF>
{0x1FABB, 0x1FABD, prExtendedPictographic}, // E15.0 [3] (🪻..🪽) hyacinth..wing
{0x1FABE, 0x1FABE, prExtendedPictographic}, // E0.0 [1] (🪾) <reserved-1FABE>
{0x1FABF, 0x1FABF, prExtendedPictographic}, // E15.0 [1] (🪿) goose
{0x1FAC0, 0x1FAC2, prExtendedPictographic}, // E13.0 [3] (🫀..🫂) anatomical heart..people hugging
{0x1FAC3, 0x1FAC5, prExtendedPictographic}, // E14.0 [3] (🫃..🫅) pregnant man..person with crown
{0x1FAC6, 0x1FACF, prExtendedPictographic}, // E0.0 [10] (🫆..🫏) <reserved-1FAC6>..<reserved-1FACF>
{0x1FAC6, 0x1FACD, prExtendedPictographic}, // E0.0 [8] (🫆..🫍) <reserved-1FAC6>..<reserved-1FACD>
{0x1FACE, 0x1FACF, prExtendedPictographic}, // E15.0 [2] (🫎..🫏) moose..donkey
{0x1FAD0, 0x1FAD6, prExtendedPictographic}, // E13.0 [7] (🫐..🫖) blueberries..teapot
{0x1FAD7, 0x1FAD9, prExtendedPictographic}, // E14.0 [3] (🫗..🫙) pouring liquid..jar
{0x1FADA, 0x1FADF, prExtendedPictographic}, // E0.0 [6] (🫚..🫟) <reserved-1FADA>..<reserved-1FADF>
{0x1FADA, 0x1FADB, prExtendedPictographic}, // E15.0 [2] (🫚..🫛) ginger root..pea pod
{0x1FADC, 0x1FADF, prExtendedPictographic}, // E0.0 [4] (🫜..🫟) <reserved-1FADC>..<reserved-1FADF>
{0x1FAE0, 0x1FAE7, prExtendedPictographic}, // E14.0 [8] (🫠..🫧) melting face..bubbles
{0x1FAE8, 0x1FAEF, prExtendedPictographic}, // E0.0 [8] (🫨..🫯) <reserved-1FAE8>..<reserved-1FAEF>
{0x1FAE8, 0x1FAE8, prExtendedPictographic}, // E15.0 [1] (🫨) shaking face
{0x1FAE9, 0x1FAEF, prExtendedPictographic}, // E0.0 [7] (🫩..🫯) <reserved-1FAE9>..<reserved-1FAEF>
{0x1FAF0, 0x1FAF6, prExtendedPictographic}, // E14.0 [7] (🫰..🫶) hand with index finger and thumb crossed..heart hands
{0x1FAF7, 0x1FAFF, prExtendedPictographic}, // E0.0 [9] (🫷..🫿) <reserved-1FAF7>..<reserved-1FAFF>
{0x1FAF7, 0x1FAF8, prExtendedPictographic}, // E15.0 [2] (🫷..🫸) leftwards pushing hand..rightwards pushing hand
{0x1FAF9, 0x1FAFF, prExtendedPictographic}, // E0.0 [7] (🫹..🫿) <reserved-1FAF9>..<reserved-1FAFF>
{0x1FBF0, 0x1FBF9, prNumeric}, // Nd [10] SEGMENTED DIGIT ZERO..SEGMENTED DIGIT NINE
{0x1FC00, 0x1FFFD, prExtendedPictographic}, // E0.0[1022] (🰀..🿽) <reserved-1FC00>..<reserved-1FFFD>
{0xE0001, 0xE0001, prFormat}, // Cf LANGUAGE TAG

View File

@@ -22,82 +22,121 @@ const (
wbZWJBit = 16 // This bit is set for any states followed by at least one zero-width joiner (see WB4 and WB3c).
)
// The word break parser's breaking instructions.
const (
wbDontBreak = iota
wbBreak
)
// The word break parser's state transitions. It's anologous to grTransitions,
// see comments there for details. Unicode version 14.0.0.
var wbTransitions = map[[2]int][3]int{
// wbTransitions implements the word break parser's state transitions. It's
// anologous to [grTransitions], see comments there for details.
//
// Unicode version 15.0.0.
func wbTransitions(state, prop int) (newState int, wordBreak bool, rule int) {
switch uint64(state) | uint64(prop)<<32 {
// WB3b.
{wbAny, prNewline}: {wbNewline, wbBreak, 32},
{wbAny, prCR}: {wbCR, wbBreak, 32},
{wbAny, prLF}: {wbLF, wbBreak, 32},
case wbAny | prNewline<<32:
return wbNewline, true, 32
case wbAny | prCR<<32:
return wbCR, true, 32
case wbAny | prLF<<32:
return wbLF, true, 32
// WB3a.
{wbNewline, prAny}: {wbAny, wbBreak, 31},
{wbCR, prAny}: {wbAny, wbBreak, 31},
{wbLF, prAny}: {wbAny, wbBreak, 31},
case wbNewline | prAny<<32:
return wbAny, true, 31
case wbCR | prAny<<32:
return wbAny, true, 31
case wbLF | prAny<<32:
return wbAny, true, 31
// WB3.
{wbCR, prLF}: {wbLF, wbDontBreak, 30},
case wbCR | prLF<<32:
return wbLF, false, 30
// WB3d.
{wbAny, prWSegSpace}: {wbWSegSpace, wbBreak, 9990},
{wbWSegSpace, prWSegSpace}: {wbWSegSpace, wbDontBreak, 34},
case wbAny | prWSegSpace<<32:
return wbWSegSpace, true, 9990
case wbWSegSpace | prWSegSpace<<32:
return wbWSegSpace, false, 34
// WB5.
{wbAny, prALetter}: {wbALetter, wbBreak, 9990},
{wbAny, prHebrewLetter}: {wbHebrewLetter, wbBreak, 9990},
{wbALetter, prALetter}: {wbALetter, wbDontBreak, 50},
{wbALetter, prHebrewLetter}: {wbHebrewLetter, wbDontBreak, 50},
{wbHebrewLetter, prALetter}: {wbALetter, wbDontBreak, 50},
{wbHebrewLetter, prHebrewLetter}: {wbHebrewLetter, wbDontBreak, 50},
case wbAny | prALetter<<32:
return wbALetter, true, 9990
case wbAny | prHebrewLetter<<32:
return wbHebrewLetter, true, 9990
case wbALetter | prALetter<<32:
return wbALetter, false, 50
case wbALetter | prHebrewLetter<<32:
return wbHebrewLetter, false, 50
case wbHebrewLetter | prALetter<<32:
return wbALetter, false, 50
case wbHebrewLetter | prHebrewLetter<<32:
return wbHebrewLetter, false, 50
// WB7. Transitions to wbWB7 handled by transitionWordBreakState().
{wbWB7, prALetter}: {wbALetter, wbDontBreak, 70},
{wbWB7, prHebrewLetter}: {wbHebrewLetter, wbDontBreak, 70},
case wbWB7 | prALetter<<32:
return wbALetter, false, 70
case wbWB7 | prHebrewLetter<<32:
return wbHebrewLetter, false, 70
// WB7a.
{wbHebrewLetter, prSingleQuote}: {wbAny, wbDontBreak, 71},
case wbHebrewLetter | prSingleQuote<<32:
return wbAny, false, 71
// WB7c. Transitions to wbWB7c handled by transitionWordBreakState().
{wbWB7c, prHebrewLetter}: {wbHebrewLetter, wbDontBreak, 73},
case wbWB7c | prHebrewLetter<<32:
return wbHebrewLetter, false, 73
// WB8.
{wbAny, prNumeric}: {wbNumeric, wbBreak, 9990},
{wbNumeric, prNumeric}: {wbNumeric, wbDontBreak, 80},
case wbAny | prNumeric<<32:
return wbNumeric, true, 9990
case wbNumeric | prNumeric<<32:
return wbNumeric, false, 80
// WB9.
{wbALetter, prNumeric}: {wbNumeric, wbDontBreak, 90},
{wbHebrewLetter, prNumeric}: {wbNumeric, wbDontBreak, 90},
case wbALetter | prNumeric<<32:
return wbNumeric, false, 90
case wbHebrewLetter | prNumeric<<32:
return wbNumeric, false, 90
// WB10.
{wbNumeric, prALetter}: {wbALetter, wbDontBreak, 100},
{wbNumeric, prHebrewLetter}: {wbHebrewLetter, wbDontBreak, 100},
case wbNumeric | prALetter<<32:
return wbALetter, false, 100
case wbNumeric | prHebrewLetter<<32:
return wbHebrewLetter, false, 100
// WB11. Transitions to wbWB11 handled by transitionWordBreakState().
{wbWB11, prNumeric}: {wbNumeric, wbDontBreak, 110},
case wbWB11 | prNumeric<<32:
return wbNumeric, false, 110
// WB13.
{wbAny, prKatakana}: {wbKatakana, wbBreak, 9990},
{wbKatakana, prKatakana}: {wbKatakana, wbDontBreak, 130},
case wbAny | prKatakana<<32:
return wbKatakana, true, 9990
case wbKatakana | prKatakana<<32:
return wbKatakana, false, 130
// WB13a.
{wbAny, prExtendNumLet}: {wbExtendNumLet, wbBreak, 9990},
{wbALetter, prExtendNumLet}: {wbExtendNumLet, wbDontBreak, 131},
{wbHebrewLetter, prExtendNumLet}: {wbExtendNumLet, wbDontBreak, 131},
{wbNumeric, prExtendNumLet}: {wbExtendNumLet, wbDontBreak, 131},
{wbKatakana, prExtendNumLet}: {wbExtendNumLet, wbDontBreak, 131},
{wbExtendNumLet, prExtendNumLet}: {wbExtendNumLet, wbDontBreak, 131},
case wbAny | prExtendNumLet<<32:
return wbExtendNumLet, true, 9990
case wbALetter | prExtendNumLet<<32:
return wbExtendNumLet, false, 131
case wbHebrewLetter | prExtendNumLet<<32:
return wbExtendNumLet, false, 131
case wbNumeric | prExtendNumLet<<32:
return wbExtendNumLet, false, 131
case wbKatakana | prExtendNumLet<<32:
return wbExtendNumLet, false, 131
case wbExtendNumLet | prExtendNumLet<<32:
return wbExtendNumLet, false, 131
// WB13b.
{wbExtendNumLet, prALetter}: {wbALetter, wbDontBreak, 132},
{wbExtendNumLet, prHebrewLetter}: {wbHebrewLetter, wbDontBreak, 132},
{wbExtendNumLet, prNumeric}: {wbNumeric, wbDontBreak, 132},
{wbExtendNumLet, prKatakana}: {prKatakana, wbDontBreak, 132},
case wbExtendNumLet | prALetter<<32:
return wbALetter, false, 132
case wbExtendNumLet | prHebrewLetter<<32:
return wbHebrewLetter, false, 132
case wbExtendNumLet | prNumeric<<32:
return wbNumeric, false, 132
case wbExtendNumLet | prKatakana<<32:
return wbKatakana, false, 132
default:
return -1, false, -1
}
}
// transitionWordBreakState determines the new state of the word break parser
@@ -141,30 +180,27 @@ func transitionWordBreakState(state int, r rune, b []byte, str string) (newState
// Find the applicable transition in the table.
var rule int
transition, ok := wbTransitions[[2]int{state, nextProperty}]
if ok {
// We have a specific transition. We'll use it.
newState, wordBreak, rule = transition[0], transition[1] == wbBreak, transition[2]
} else {
newState, wordBreak, rule = wbTransitions(state, nextProperty)
if newState < 0 {
// No specific transition found. Try the less specific ones.
transAnyProp, okAnyProp := wbTransitions[[2]int{state, prAny}]
transAnyState, okAnyState := wbTransitions[[2]int{wbAny, nextProperty}]
if okAnyProp && okAnyState {
anyPropState, anyPropWordBreak, anyPropRule := wbTransitions(state, prAny)
anyStateState, anyStateWordBreak, anyStateRule := wbTransitions(wbAny, nextProperty)
if anyPropState >= 0 && anyStateState >= 0 {
// Both apply. We'll use a mix (see comments for grTransitions).
newState, wordBreak, rule = transAnyState[0], transAnyState[1] == wbBreak, transAnyState[2]
if transAnyProp[2] < transAnyState[2] {
wordBreak, rule = transAnyProp[1] == wbBreak, transAnyProp[2]
newState, wordBreak, rule = anyStateState, anyStateWordBreak, anyStateRule
if anyPropRule < anyStateRule {
wordBreak, rule = anyPropWordBreak, anyPropRule
}
} else if okAnyProp {
} else if anyPropState >= 0 {
// We only have a specific state.
newState, wordBreak, rule = transAnyProp[0], transAnyProp[1] == wbBreak, transAnyProp[2]
newState, wordBreak, rule = anyPropState, anyPropWordBreak, anyPropRule
// This branch will probably never be reached because okAnyState will
// always be true given the current transition map. But we keep it here
// for future modifications to the transition map where this may not be
// true anymore.
} else if okAnyState {
} else if anyStateState >= 0 {
// We only have a specific property.
newState, wordBreak, rule = transAnyState[0], transAnyState[1] == wbBreak, transAnyState[2]
newState, wordBreak, rule = anyStateState, anyStateWordBreak, anyStateRule
} else {
// No known transition. WB999: Any ÷ Any.
newState, wordBreak, rule = wbAny, true, 9990

6
vendor/modules.txt vendored
View File

@@ -1341,10 +1341,10 @@ github.com/mattermost/xml-roundtrip-validator
# github.com/mattn/go-colorable v0.1.13
## explicit; go 1.15
github.com/mattn/go-colorable
# github.com/mattn/go-isatty v0.0.19
# github.com/mattn/go-isatty v0.0.20
## explicit; go 1.15
github.com/mattn/go-isatty
# github.com/mattn/go-runewidth v0.0.13
# github.com/mattn/go-runewidth v0.0.15
## explicit; go 1.9
github.com/mattn/go-runewidth
# github.com/mattn/go-sqlite3 v1.14.22
@@ -1708,7 +1708,7 @@ github.com/rcrowley/go-metrics
# github.com/riandyrn/otelchi v0.9.0
## explicit; go 1.21
github.com/riandyrn/otelchi
# github.com/rivo/uniseg v0.4.2
# github.com/rivo/uniseg v0.4.7
## explicit; go 1.18
github.com/rivo/uniseg
# github.com/rogpeppe/go-internal v1.12.0