From bd1fc8a70b54ec52d6d31d587001787b82048e22 Mon Sep 17 00:00:00 2001 From: Florian Schade Date: Mon, 31 Mar 2025 17:29:35 +0200 Subject: [PATCH] feat(antivirus): add partial scan mode - Introduced partial mode for antivirus scanning to handle large files efficiently. - Introduced clamAV scan timeout --- deployments/examples/opencloud_full/.env | 7 + .../examples/opencloud_full/clamav.yml | 2 + services/antivirus/README.md | 27 ++- services/antivirus/pkg/command/server.go | 2 +- services/antivirus/pkg/config/config.go | 30 +++- .../pkg/config/defaults/defaultconfig.go | 14 +- services/antivirus/pkg/scanners/clamav.go | 39 +++-- .../antivirus/pkg/scanners/clamav_test.go | 120 ++++++++++++++ services/antivirus/pkg/scanners/scanners.go | 36 ++-- services/antivirus/pkg/service/service.go | 156 ++++++++++-------- 10 files changed, 332 insertions(+), 101 deletions(-) create mode 100644 services/antivirus/pkg/scanners/clamav_test.go diff --git a/deployments/examples/opencloud_full/.env b/deployments/examples/opencloud_full/.env index a437d30a77..e3eca91309 100644 --- a/deployments/examples/opencloud_full/.env +++ b/deployments/examples/opencloud_full/.env @@ -214,6 +214,13 @@ COLLABORA_SSL_VERIFICATION=false # envvar in the OpenCloud Settings above by adding 'antivirus' to the list. # Note: the leading colon is required to enable the service. #CLAMAV=:clamav.yml +# The maximum scan size the virus scanner can handle, needs adjustment in the scanner config as well. +# Usable common abbreviations: [KB, KiB, MB, MiB, GB, GiB, TB, TiB, PB, PiB, EB, EiB], example: 2GB. +# Defaults to "100MB" +#ANTIVIRUS_MAX_SCAN_SIZE= +# Usable modes: partial, skip. +# Defaults to "partial" +#ANTIVIRUS_MAX_SCAN_SIZE_MODE= # Image version of the ClamAV container. # Defaults to "latest" CLAMAV_DOCKER_TAG= diff --git a/deployments/examples/opencloud_full/clamav.yml b/deployments/examples/opencloud_full/clamav.yml index 4af6d68dce..311f52bf64 100644 --- a/deployments/examples/opencloud_full/clamav.yml +++ b/deployments/examples/opencloud_full/clamav.yml @@ -4,6 +4,8 @@ services: environment: ANTIVIRUS_SCANNER_TYPE: "clamav" ANTIVIRUS_CLAMAV_SOCKET: "/var/run/clamav/clamd.sock" + ANTIVIRUS_MAX_SCAN_SIZE_MODE: ${ANTIVIRUS_MAX_SCAN_SIZE_MODE:-partial} + ANTIVIRUS_MAX_SCAN_SIZE: ${ANTIVIRUS_MAX_SCAN_SIZE:-100MB} # the antivirus service needs manual startup, see .env and opencloud.yaml for START_ADDITIONAL_SERVICES # configure the antivirus service POSTPROCESSING_STEPS: "virusscan" diff --git a/services/antivirus/README.md b/services/antivirus/README.md index 07da6b4513..2f6dc5322e 100644 --- a/services/antivirus/README.md +++ b/services/antivirus/README.md @@ -4,7 +4,10 @@ The `antivirus` service is responsible for scanning files for viruses. ## Memory Considerations -The antivirus service can consume considerably amounts of memory. This is relevant to provide or define sufficient memory for the deployment selected. To avoid out of memory (OOM) situations, the following equation gives a rough overview based on experiences made. The memory calculation comes without any guarantee, is intended as overview only and subject of change. +The antivirus service can consume considerable amounts of memory. +This is relevant to provide or define sufficient memory for the deployment selected. +To avoid out of memory (OOM) situations, the following equation gives a rough overview based on experiences made. +The memory calculation comes without any guarantee, is intended as overview only and subject of change. `memory limit` = `max file size` x `workers` x `factor 8 - 14` @@ -19,17 +22,31 @@ With: ### Antivirus Scanner Type -The antivirus service currently supports [ICAP](https://tools.ietf.org/html/rfc3507) and [ClamAV](http://www.clamav.net/index.html) as antivirus scanners. The `ANTIVIRUS_SCANNER_TYPE` environment variable is used to select the scanner. The detailed configuration for each scanner heavily depends on the scanner type selected. See the environment variables for more details. +The antivirus service currently supports [ICAP](https://tools.ietf.org/html/rfc3507) and [ClamAV](http://www.clamav.net/index.html) as antivirus scanners. +The `ANTIVIRUS_SCANNER_TYPE` environment variable is used to select the scanner. +The detailed configuration for each scanner heavily depends on the scanner type selected. +See the environment variables for more details. - For `icap`, only scanners using the `X-Infection-Found` header are currently supported. - For `clamav` only local sockets can currently be configured. ### Maximum Scan Size -Several factors can make it necessary to limit the maximum filesize the antivirus service will use for scanning. Use the `ANTIVIRUS_MAX_SCAN_SIZE` environment variable to scan only a given amount of bytes. Obviously, it is recommended to scan the whole file, but several factors like scanner type and version, bandwidth, performance issues, etc. might make a limit necessary. +Several factors can make it necessary to limit the maximum filesize the antivirus service uses for scanning. +Use the `ANTIVIRUS_MAX_SCAN_SIZE` environment variable to scan only a given number of bytes, +or to skip the whole resource. + +Even if it's recommended to scan the whole file, several factors like scanner type and version, +bandwidth, performance issues, etc. might make a limit necessary. + +In such cases, the antivirus the max scan size mode can be handy, the following modes are available: + + - `partial`: The file is scanned up to the given size. The rest of the file is not scanned. This is the default mode `ANTIVIRUS_MAX_SCAN_SIZE=partial` + - `skip`: The file is skipped and not scanned. `ANTIVIRUS_MAX_SCAN_SIZE=skip` **IMPORTANT** -> Streaming of files to the virus scan service still [needs to be implemented](https://github.com/owncloud/ocis/issues/6803). To prevent OOM errors `ANTIVIRUS_MAX_SCAN_SIZE` needs to be set lower than available ram. +> Streaming of files to the virus scan service still [needs to be implemented](https://github.com/owncloud/ocis/issues/6803). +> To prevent OOM errors `ANTIVIRUS_MAX_SCAN_SIZE` needs to be set lower than available ram and or the maximum file size that can be scanned by the virus scanner. ### Antivirus Workers @@ -41,7 +58,7 @@ The antivirus service allows three different ways of handling infected files. Th - `delete`: (default): Infected files will be deleted immediately, further postprocessing is cancelled. - `abort`: (advanced option): Infected files will be kept, further postprocessing is cancelled. Files can be manually retrieved and inspected by an admin. To identify the file for further investigation, the antivirus service logs the abort/infected state including the file ID. The file is located in the `storage/users/uploads` folder of the OpenCloud data directory and persists until it is manually deleted by the admin via the [Manage Unfinished Uploads](https://github.com/opencloud-eu/opencloud/tree/main/services/storage-users#manage-unfinished-uploads) command. - - `continue`: (obviously not recommended): Infected files will be marked via metadata as infected but postprocessing continues normally. Note: Infected Files are moved to their final destination and therefore not prevented from download which includes the risk of spreading viruses. + - `continue`: (not recommended): Infected files will be marked via metadata as infected, but postprocessing continues normally. Note: Infected Files are moved to their final destination and therefore not prevented from download, which includes the risk of spreading viruses. In all cases, a log entry is added declaring the infection and handling method and a notification via the `userlog` service sent. diff --git a/services/antivirus/pkg/command/server.go b/services/antivirus/pkg/command/server.go index 3b4c8b6051..4b05bd8fbe 100644 --- a/services/antivirus/pkg/command/server.go +++ b/services/antivirus/pkg/command/server.go @@ -45,7 +45,7 @@ func Server(cfg *config.Config) *cli.Command { { svc, err := service.NewAntivirus(cfg, logger, traceProvider) if err != nil { - return err + return cli.Exit(err.Error(), 1) } gr.Add(svc.Run, func(_ error) { diff --git a/services/antivirus/pkg/config/config.go b/services/antivirus/pkg/config/config.go index 67bca40677..69f5b1950e 100644 --- a/services/antivirus/pkg/config/config.go +++ b/services/antivirus/pkg/config/config.go @@ -5,6 +5,26 @@ import ( "time" ) +// ScannerType gives info which scanner is used +type ScannerType string + +const ( + // ScannerTypeClamAV defines that clamav is used + ScannerTypeClamAV ScannerType = "clamav" + // ScannerTypeICap defines that icap is used + ScannerTypeICap ScannerType = "icap" +) + +// MaxScanSizeMode defines the mode of handling files that exceed the maximum scan size +type MaxScanSizeMode string + +const ( + // MaxScanSizeModeSkip defines that files that are bigger than the max scan size will be skipped + MaxScanSizeModeSkip MaxScanSizeMode = "skip" + // MaxScanSizeModePartial defines that only the file up to the max size will be used + MaxScanSizeModePartial MaxScanSizeMode = "partial" +) + // Config combines all available configuration parts. type Config struct { File string @@ -20,8 +40,9 @@ type Config struct { Events Events Workers int `yaml:"workers" env:"ANTIVIRUS_WORKERS" desc:"The number of concurrent go routines that fetch events from the event queue." introductionVersion:"1.0.0"` - Scanner Scanner - MaxScanSize string `yaml:"max-scan-size" env:"ANTIVIRUS_MAX_SCAN_SIZE" desc:"The maximum scan size the virus scanner can handle. Only this many bytes of a file will be scanned. 0 means unlimited and is the default. Usable common abbreviations: [KB, KiB, MB, MiB, GB, GiB, TB, TiB, PB, PiB, EB, EiB], example: 2GB." introductionVersion:"1.0.0"` + Scanner Scanner + MaxScanSize string `yaml:"max-scan-size" env:"ANTIVIRUS_MAX_SCAN_SIZE" desc:"The maximum scan size the virus scanner can handle.0 means unlimited. Usable common abbreviations: [KB, KiB, MB, MiB, GB, GiB, TB, TiB, PB, PiB, EB, EiB], example: 2GB." introductionVersion:"1.0.0"` + MaxScanSizeMode MaxScanSizeMode `yaml:"max-scan-size-mode" env:"ANTIVIRUS_MAX_SCAN_SIZE_MODE" desc:"Defines the mode of handling files that exceed the maximum scan size. Supported options are: 'skip', which skips files that are bigger than the max scan size, and 'truncate' (default), which only uses the file up to the max size." introductionVersion:"2.1.0"` Context context.Context `json:"-" yaml:"-"` @@ -62,7 +83,7 @@ type Events struct { // Scanner provides configuration options for the virus scanner type Scanner struct { - Type string `yaml:"type" env:"ANTIVIRUS_SCANNER_TYPE" desc:"The antivirus scanner to use. Supported values are 'clamav' and 'icap'." introductionVersion:"1.0.0"` + Type ScannerType `yaml:"type" env:"ANTIVIRUS_SCANNER_TYPE" desc:"The antivirus scanner to use. Supported values are 'clamav' and 'icap'." introductionVersion:"1.0.0"` ClamAV ClamAV // only if Type == clamav ICAP ICAP // only if Type == icap @@ -70,7 +91,8 @@ type Scanner struct { // ClamAV provides configuration option for clamav type ClamAV struct { - Socket string `yaml:"socket" env:"ANTIVIRUS_CLAMAV_SOCKET" desc:"The socket clamav is running on. Note the default value is an example which needs adaption according your OS." introductionVersion:"1.0.0"` + Socket string `yaml:"socket" env:"ANTIVIRUS_CLAMAV_SOCKET" desc:"The socket clamav is running on. Note the default value is an example which needs adaption according your OS." introductionVersion:"1.0.0"` + Timeout time.Duration `yaml:"scan_timeout" env:"ANTIVIRUS_CLAMAV_SCAN_TIMEOUT" desc:"Scan timeout for the ClamAV client. Defaults to '5m' (5 minutes). See the Environment Variable Types description for more details." introductionVersion:"2.1.0"` } // ICAP provides configuration options for icap diff --git a/services/antivirus/pkg/config/defaults/defaultconfig.go b/services/antivirus/pkg/config/defaults/defaultconfig.go index f8987a8e1f..992ac9b15f 100644 --- a/services/antivirus/pkg/config/defaults/defaultconfig.go +++ b/services/antivirus/pkg/config/defaults/defaultconfig.go @@ -30,10 +30,15 @@ func DefaultConfig() *config.Config { }, Workers: 10, InfectedFileHandling: "delete", + // defaults from clamav sample conf: MaxScanSize=400M, MaxFileSize=100M, StreamMaxLength=100M + // https://github.com/Cisco-Talos/clamav/blob/main/etc/clamd.conf.sample + MaxScanSize: "100MB", + MaxScanSizeMode: config.MaxScanSizeModePartial, Scanner: config.Scanner{ - Type: "clamav", + Type: config.ScannerTypeClamAV, ClamAV: config.ClamAV{ - Socket: "/run/clamav/clamd.ctl", + Socket: "/run/clamav/clamd.ctl", + Timeout: 5 * time.Minute, }, ICAP: config.ICAP{ URL: "icap://127.0.0.1:1344", @@ -57,4 +62,9 @@ func EnsureDefaults(cfg *config.Config) { // Sanitize sanitizes the configuration func Sanitize(cfg *config.Config) { + defaultConfig := DefaultConfig() + + if cfg.MaxScanSize == "" { + cfg.MaxScanSize = defaultConfig.MaxScanSize + } } diff --git a/services/antivirus/pkg/scanners/clamav.go b/services/antivirus/pkg/scanners/clamav.go index 2706af99b5..9af56cc4a9 100644 --- a/services/antivirus/pkg/scanners/clamav.go +++ b/services/antivirus/pkg/scanners/clamav.go @@ -1,34 +1,51 @@ package scanners import ( + "fmt" "time" "github.com/dutchcoders/go-clamd" ) // NewClamAV returns a Scanner talking to clamAV via socket -func NewClamAV(socket string) *ClamAV { - return &ClamAV{ - clamd: clamd.NewClamd(socket), +func NewClamAV(socket string, timeout time.Duration) (*ClamAV, error) { + c := clamd.NewClamd(socket) + + if err := c.Ping(); err != nil { + return nil, fmt.Errorf("%w: %w", ErrScannerNotReachable, err) } + + return &ClamAV{ + clamd: clamd.NewClamd(socket), + timeout: timeout, + }, nil } // ClamAV is a Scanner based on clamav type ClamAV struct { - clamd *clamd.Clamd + clamd *clamd.Clamd + timeout time.Duration } // Scan to fulfill Scanner interface func (s ClamAV) Scan(in Input) (Result, error) { - ch, err := s.clamd.ScanStream(in.Body, make(chan bool)) + abort := make(chan bool, 1) + defer close(abort) + + ch, err := s.clamd.ScanStream(in.Body, abort) if err != nil { return Result{}, err } - r := <-ch - return Result{ - Infected: r.Status == clamd.RES_FOUND, - Description: r.Description, - ScanTime: time.Now(), - }, nil + select { + case <-time.After(s.timeout): + abort <- true + return Result{}, fmt.Errorf("%w: %s", ErrScanTimeout, in.Url) + case s := <-ch: + return Result{ + Infected: s.Status == clamd.RES_FOUND, + Description: s.Description, + ScanTime: time.Now(), + }, nil + } } diff --git a/services/antivirus/pkg/scanners/clamav_test.go b/services/antivirus/pkg/scanners/clamav_test.go new file mode 100644 index 0000000000..9326ff72c9 --- /dev/null +++ b/services/antivirus/pkg/scanners/clamav_test.go @@ -0,0 +1,120 @@ +package scanners_test + +import ( + "context" + "net" + "os" + "path/filepath" + "strings" + "testing" + "time" + + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" + + "github.com/opencloud-eu/opencloud/services/antivirus/pkg/scanners" +) + +func newUnixListener(t testing.TB, lc net.ListenConfig, v ...string) net.Listener { + d, err := os.MkdirTemp("", "") + assert.NoError(t, err) + t.Cleanup(func() { + require.NoError(t, os.RemoveAll(d)) + }) + + nl, err := lc.Listen(context.Background(), "unix", filepath.Join(d, "sock")) + require.NoError(t, err) + + go func() { + i := 0 + for { + if len(v) == i { + break + } + + conn, err := nl.Accept() + require.NoError(t, err) + + time.Sleep(100 * time.Millisecond) + + _, err = conn.Write([]byte(v[i])) + require.NoError(t, err) + require.NoError(t, conn.Close()) + i++ + } + }() + + return nl +} + +func TestNewClamAV(t *testing.T) { + t.Run("returns a scanner", func(t *testing.T) { + ul := newUnixListener(t, net.ListenConfig{}, "PONG\n") + defer func() { + assert.NoError(t, ul.Close()) + }() + + done := make(chan bool, 1) + + go func() { + _, err := scanners.NewClamAV(ul.Addr().String(), 10*time.Second) + assert.NoError(t, err) + done <- true + }() + + assert.True(t, <-done) + }) + + t.Run("fails if scanner is not pingable", func(t *testing.T) { + _, err := scanners.NewClamAV("", 0) + assert.ErrorIs(t, err, scanners.ErrScannerNotReachable) + }) +} + +func TestNewClamAV_Scan(t *testing.T) { + t.Run("returns a result", func(t *testing.T) { + ul := newUnixListener(t, net.ListenConfig{}, "PONG\n", "stream: Win.Test.EICAR_HDB-1 FOUND\n") + defer func() { + assert.NoError(t, ul.Close()) + }() + + done := make(chan bool, 1) + + go func() { + scanner, err := scanners.NewClamAV(ul.Addr().String(), 10*time.Second) + assert.NoError(t, err) + + result, err := scanner.Scan(scanners.Input{Body: strings.NewReader("DATA")}) + assert.NoError(t, err) + + assert.Equal(t, result.Description, "Win.Test.EICAR_HDB-1") + assert.True(t, result.Infected) + done <- true + }() + + assert.True(t, <-done) + }) + + t.Run("aborts after a certain time", func(t *testing.T) { + ul := newUnixListener(t, net.ListenConfig{}, "PONG\n", "stream: Win.Test.EICAR_HDB-1 FOUND\n") + defer func() { + assert.NoError(t, ul.Close()) + }() + + done := make(chan bool, 1) + + go func() { + scanner, err := scanners.NewClamAV(ul.Addr().String(), 10*time.Second) + assert.NoError(t, err) + + result, err := scanner.Scan(scanners.Input{Body: strings.NewReader("DATA")}) + assert.NoError(t, err) + + assert.Equal(t, result.Description, "Win.Test.EICAR_HDB-1") + assert.True(t, result.Infected) + done <- true + }() + + assert.True(t, <-done) + }) +} diff --git a/services/antivirus/pkg/scanners/scanners.go b/services/antivirus/pkg/scanners/scanners.go index a56d0fe81e..223bd4fc1d 100644 --- a/services/antivirus/pkg/scanners/scanners.go +++ b/services/antivirus/pkg/scanners/scanners.go @@ -1,21 +1,31 @@ package scanners import ( + "errors" "io" "time" ) -// The Result is the common scan result to all scanners -type Result struct { - Infected bool - ScanTime time.Time - Description string -} +var ( + // ErrScanTimeout is returned when a scan times out + ErrScanTimeout = errors.New("time out waiting for clamav to respond while scanning") + // ErrScannerNotReachable is returned when the scanner is not reachable + ErrScannerNotReachable = errors.New("failed to reach the scanner") +) -// The Input is the common input to all scanners -type Input struct { - Body io.Reader - Size int64 - Url string - Name string -} +type ( + // The Result is the common scan result to all scanners + Result struct { + Infected bool + ScanTime time.Time + Description string + } + + // The Input is the common input to all scanners + Input struct { + Body io.Reader + Size int64 + Url string + Name string + } +) diff --git a/services/antivirus/pkg/service/service.go b/services/antivirus/pkg/service/service.go index 2831a32192..e0603132e6 100644 --- a/services/antivirus/pkg/service/service.go +++ b/services/antivirus/pkg/service/service.go @@ -9,6 +9,7 @@ import ( "io" "net/http" "os" + "slices" "sync" "time" @@ -37,38 +38,44 @@ type Scanner interface { } // NewAntivirus returns a service implementation for Service. -func NewAntivirus(c *config.Config, l log.Logger, tp trace.TracerProvider) (Antivirus, error) { - +func NewAntivirus(cfg *config.Config, logger log.Logger, tracerProvider trace.TracerProvider) (Antivirus, error) { var scanner Scanner var err error - switch c.Scanner.Type { + switch cfg.Scanner.Type { default: - return Antivirus{}, fmt.Errorf("unknown av scanner: '%s'", c.Scanner.Type) - case "clamav": - scanner = scanners.NewClamAV(c.Scanner.ClamAV.Socket) - case "icap": - scanner, err = scanners.NewICAP(c.Scanner.ICAP.URL, c.Scanner.ICAP.Service, c.Scanner.ICAP.Timeout) + return Antivirus{}, fmt.Errorf("unknown av scanner: '%s'", cfg.Scanner.Type) + case config.ScannerTypeClamAV: + scanner, err = scanners.NewClamAV(cfg.Scanner.ClamAV.Socket, cfg.Scanner.ClamAV.Timeout) + case config.ScannerTypeICap: + scanner, err = scanners.NewICAP(cfg.Scanner.ICAP.URL, cfg.Scanner.ICAP.Service, cfg.Scanner.ICAP.Timeout) } if err != nil { return Antivirus{}, err } - av := Antivirus{c: c, l: l, tp: tp, s: scanner, client: rhttp.GetHTTPClient(rhttp.Insecure(true))} + av := Antivirus{config: cfg, log: logger, tracerProvider: tracerProvider, scanner: scanner, client: rhttp.GetHTTPClient(rhttp.Insecure(true))} - switch o := events.PostprocessingOutcome(c.InfectedFileHandling); o { - case events.PPOutcomeContinue, events.PPOutcomeAbort, events.PPOutcomeDelete: - av.o = o + switch mode := cfg.MaxScanSizeMode; mode { + case config.MaxScanSizeModeSkip, config.MaxScanSizeModePartial: + break default: - return av, fmt.Errorf("unknown infected file handling '%s'", o) + return av, fmt.Errorf("unknown max scan size mode '%s'", cfg.MaxScanSizeMode) } - if c.MaxScanSize != "" { - b, err := bytesize.Parse(c.MaxScanSize) + switch outcome := events.PostprocessingOutcome(cfg.InfectedFileHandling); outcome { + case events.PPOutcomeContinue, events.PPOutcomeAbort, events.PPOutcomeDelete: + av.outcome = outcome + default: + return av, fmt.Errorf("unknown infected file handling '%s'", outcome) + } + + if cfg.MaxScanSize != "" { + b, err := bytesize.Parse(cfg.MaxScanSize) if err != nil { return av, err } - av.m = b.Bytes() + av.maxScanSize = b.Bytes() } return av, nil @@ -76,23 +83,23 @@ func NewAntivirus(c *config.Config, l log.Logger, tp trace.TracerProvider) (Anti // Antivirus defines implements the business logic for Service. type Antivirus struct { - c *config.Config - l log.Logger - s Scanner - o events.PostprocessingOutcome - m uint64 - tp trace.TracerProvider + config *config.Config + log log.Logger + scanner Scanner + outcome events.PostprocessingOutcome + maxScanSize uint64 + tracerProvider trace.TracerProvider client *http.Client } // Run runs the service func (av Antivirus) Run() error { - evtsCfg := av.c.Events + eventsCfg := av.config.Events var rootCAPool *x509.CertPool - if av.c.Events.TLSRootCACertificate != "" { - rootCrtFile, err := os.Open(evtsCfg.TLSRootCACertificate) + if av.config.Events.TLSRootCACertificate != "" { + rootCrtFile, err := os.Open(eventsCfg.TLSRootCACertificate) if err != nil { return err } @@ -104,10 +111,10 @@ func (av Antivirus) Run() error { rootCAPool = x509.NewCertPool() rootCAPool.AppendCertsFromPEM(certBytes.Bytes()) - av.c.Events.TLSInsecure = false + av.config.Events.TLSInsecure = false } - natsStream, err := stream.NatsFromConfig(av.c.Service.Name, false, stream.NatsConfig(av.c.Events)) + natsStream, err := stream.NatsFromConfig(av.config.Service.Name, false, stream.NatsConfig(av.config.Events)) if err != nil { return err } @@ -118,7 +125,7 @@ func (av Antivirus) Run() error { } wg := sync.WaitGroup{} - for i := 0; i < av.c.Workers; i++ { + for i := 0; i < av.config.Workers; i++ { wg.Add(1) go func() { defer wg.Done() @@ -127,11 +134,11 @@ func (av Antivirus) Run() error { if err != nil { switch { case errors.Is(err, ErrFatal): - av.l.Fatal().Err(err).Msg("fatal error - exiting") + av.log.Fatal().Err(err).Msg("fatal error - exiting") case errors.Is(err, ErrEvent): - av.l.Error().Err(err).Msg("continuing") + av.log.Error().Err(err).Msg("continuing") default: - av.l.Fatal().Err(err).Msg("unknown error - exiting") + av.log.Fatal().Err(err).Msg("unknown error - exiting") } } } @@ -143,20 +150,20 @@ func (av Antivirus) Run() error { } func (av Antivirus) processEvent(e events.Event, s events.Publisher) error { - ctx := e.GetTraceContext(context.Background()) - ctx, span := av.tp.Tracer("antivirus").Start(ctx, "processEvent") + ctx, span := av.tracerProvider.Tracer("antivirus").Start(e.GetTraceContext(context.Background()), "processEvent") defer span.End() - av.l.Info().Str("traceID", span.SpanContext().TraceID().String()).Msg("TraceID") + av.log.Info().Str("traceID", span.SpanContext().TraceID().String()).Msg("TraceID") + ev := e.Event.(events.StartPostprocessingStep) if ev.StepToStart != events.PPStepAntivirus { return nil } - if av.c.DebugScanOutcome != "" { - av.l.Warn().Str("antivir, clamav", ">>>>>>> ANTIVIRUS_DEBUG_SCAN_OUTCOME IS SET NO ACTUAL VIRUS SCAN IS PERFORMED!").Send() + if av.config.DebugScanOutcome != "" { + av.log.Warn().Str("antivir, clamav", ">>>>>>> ANTIVIRUS_DEBUG_SCAN_OUTCOME IS SET NO ACTUAL VIRUS SCAN IS PERFORMED!").Send() if err := events.Publish(ctx, s, events.PostprocessingStepFinished{ FinishedStep: events.PPStepAntivirus, - Outcome: events.PostprocessingOutcome(av.c.DebugScanOutcome), + Outcome: events.PostprocessingOutcome(av.config.DebugScanOutcome), UploadID: ev.UploadID, ExecutingUser: ev.ExecutingUser, Filename: ev.Filename, @@ -167,13 +174,14 @@ func (av Antivirus) processEvent(e events.Event, s events.Publisher) error { ResourceID: ev.ResourceID, }, }); err != nil { - av.l.Fatal().Err(err).Str("uploadid", ev.UploadID).Interface("resourceID", ev.ResourceID).Msg("cannot publish events - exiting") + av.log.Fatal().Err(err).Str("uploadid", ev.UploadID).Interface("resourceID", ev.ResourceID).Msg("cannot publish events - exiting") return fmt.Errorf("%w: cannot publish events", ErrFatal) } return fmt.Errorf("%w: no actual virus scan performed", ErrEvent) } - av.l.Debug().Str("uploadid", ev.UploadID).Str("filename", ev.Filename).Msg("Starting virus scan.") + av.log.Debug().Str("uploadid", ev.UploadID).Str("filename", ev.Filename).Msg("Starting virus scan.") + var errmsg string start := time.Now() res, err := av.process(ev) @@ -185,17 +193,17 @@ func (av Antivirus) processEvent(e events.Event, s events.Publisher) error { var outcome events.PostprocessingOutcome switch { case res.Infected: - outcome = av.o + outcome = av.outcome case !res.Infected && err == nil: outcome = events.PPOutcomeContinue case err != nil: outcome = events.PPOutcomeRetry default: - // Not sure what this is about. abort. + // Not sure what this is about. Abort. outcome = events.PPOutcomeAbort } - av.l.Info().Str("uploadid", ev.UploadID).Interface("resourceID", ev.ResourceID).Str("virus", res.Description).Str("outcome", string(outcome)).Str("filename", ev.Filename).Str("user", ev.ExecutingUser.GetId().GetOpaqueId()).Bool("infected", res.Infected).Dur("duration", duration).Msg("File scanned") + av.log.Info().Str("uploadid", ev.UploadID).Interface("resourceID", ev.ResourceID).Str("virus", res.Description).Str("outcome", string(outcome)).Str("filename", ev.Filename).Str("user", ev.ExecutingUser.GetId().GetOpaqueId()).Bool("infected", res.Infected).Dur("duration", duration).Msg("File scanned") if err := events.Publish(ctx, s, events.PostprocessingStepFinished{ FinishedStep: events.PPStepAntivirus, Outcome: outcome, @@ -210,7 +218,7 @@ func (av Antivirus) processEvent(e events.Event, s events.Publisher) error { ErrorMsg: errmsg, }, }); err != nil { - av.l.Fatal().Err(err).Str("uploadid", ev.UploadID).Interface("resourceID", ev.ResourceID).Msg("cannot publish events - exiting") + av.log.Fatal().Err(err).Str("uploadid", ev.UploadID).Interface("resourceID", ev.ResourceID).Msg("cannot publish events - exiting") return fmt.Errorf("%w: %s", ErrFatal, err) } return nil @@ -218,11 +226,24 @@ func (av Antivirus) processEvent(e events.Event, s events.Publisher) error { // process the scan func (av Antivirus) process(ev events.StartPostprocessingStep) (scanners.Result, error) { - if ev.Filesize == 0 || (0 < av.m && av.m < ev.Filesize) { - av.l.Info().Str("uploadid", ev.UploadID).Uint64("limit", av.m).Uint64("filesize", ev.Filesize).Msg("Skipping file to be virus scanned because its file size is higher than the defined limit.") - return scanners.Result{ - ScanTime: time.Now(), - }, nil + if ev.Filesize == 0 { + av.log.Info().Str("uploadid", ev.UploadID).Msg("Skipping file to be virus scanned, file size is 0.") + return scanners.Result{ScanTime: time.Now()}, nil + } + + headers := make(map[string]string) + switch { + case av.maxScanSize == 0: + // there is no size limit + break + case av.config.MaxScanSizeMode == config.MaxScanSizeModeSkip && ev.Filesize > av.maxScanSize: + // skip the file if it is bigger than the max scan size + av.log.Info().Str("uploadid", ev.UploadID).Uint64("filesize", ev.Filesize). + Msg("Skipping file to be virus scanned, file size is bigger than max scan size.") + return scanners.Result{ScanTime: time.Now()}, nil + case av.config.MaxScanSizeMode == config.MaxScanSizeModePartial && ev.Filesize > av.maxScanSize: + // set the range header to only download the first maxScanSize bytes + headers["Range"] = fmt.Sprintf("bytes=0-%d", av.maxScanSize-1) } var err error @@ -230,56 +251,61 @@ func (av Antivirus) process(ev events.StartPostprocessingStep) (scanners.Result, switch ev.UploadID { default: - rrc, err = av.downloadViaToken(ev.URL) + rrc, err = av.downloadViaToken(ev.URL, headers) case "": - rrc, err = av.downloadViaReva(ev.URL, ev.Token, ev.RevaToken) + rrc, err = av.downloadViaReva(ev.URL, ev.Token, ev.RevaToken, headers) } if err != nil { - av.l.Error().Err(err).Str("uploadid", ev.UploadID).Msg("error downloading file") + av.log.Error().Err(err).Str("uploadid", ev.UploadID).Msg("error downloading file") return scanners.Result{}, err } - defer rrc.Close() - av.l.Debug().Str("uploadid", ev.UploadID).Msg("Downloaded file successfully, starting virusscan") + defer func() { + _ = rrc.Close() + }() - res, err := av.s.Scan(scanners.Input{Body: rrc, Size: int64(ev.Filesize), Url: ev.URL, Name: ev.Filename}) + av.log.Debug().Str("uploadid", ev.UploadID).Msg("Downloaded file successfully, starting virusscan") + + res, err := av.scanner.Scan(scanners.Input{Body: rrc, Size: int64(ev.Filesize), Url: ev.URL, Name: ev.Filename}) if err != nil { - av.l.Error().Err(err).Str("uploadid", ev.UploadID).Msg("error scanning file") + av.log.Error().Err(err).Str("uploadid", ev.UploadID).Msg("error scanning file") } return res, err } // download will download the file -func (av Antivirus) downloadViaToken(url string) (io.ReadCloser, error) { +func (av Antivirus) downloadViaToken(url string, headers map[string]string) (io.ReadCloser, error) { req, err := http.NewRequest(http.MethodGet, url, nil) if err != nil { return nil, err } - return av.doDownload(req) + return av.doDownload(req, headers) } // download will download the file -func (av Antivirus) downloadViaReva(url string, dltoken string, revatoken string) (io.ReadCloser, error) { - ctx := ctxpkg.ContextSetToken(context.Background(), revatoken) - - req, err := rhttp.NewRequest(ctx, http.MethodGet, url, nil) +func (av Antivirus) downloadViaReva(url string, dltoken string, revatoken string, headers map[string]string) (io.ReadCloser, error) { + req, err := rhttp.NewRequest(ctxpkg.ContextSetToken(context.Background(), revatoken), http.MethodGet, url, nil) if err != nil { return nil, err } req.Header.Set("X-Reva-Transfer", dltoken) - return av.doDownload(req) + return av.doDownload(req, headers) } -func (av Antivirus) doDownload(req *http.Request) (io.ReadCloser, error) { +func (av Antivirus) doDownload(req *http.Request, headers map[string]string) (io.ReadCloser, error) { + for k, v := range headers { + req.Header.Add(k, v) + } + res, err := av.client.Do(req) if err != nil { return nil, err } - if res.StatusCode != http.StatusOK { - res.Body.Close() + if !slices.Contains([]int{http.StatusOK, http.StatusPartialContent}, res.StatusCode) { + _ = res.Body.Close() return nil, fmt.Errorf("unexpected status code from Download %v", res.StatusCode) }