feat(BRIDGE-396): Observability metrics for vault issues; Extension to observability service to support caching

This commit is contained in:
Atanas Janeshliev
2025-07-11 13:20:49 +02:00
parent de3fd34998
commit 7faf32d0ff
19 changed files with 452 additions and 79 deletions

2
go.mod
View File

@ -9,7 +9,7 @@ require (
github.com/Masterminds/semver/v3 v3.2.0 github.com/Masterminds/semver/v3 v3.2.0
github.com/ProtonMail/gluon v0.17.1-0.20250627102828-b014b7cc8132 github.com/ProtonMail/gluon v0.17.1-0.20250627102828-b014b7cc8132
github.com/ProtonMail/go-autostart v0.0.0-20210130080809-00ed301c8e9a github.com/ProtonMail/go-autostart v0.0.0-20210130080809-00ed301c8e9a
github.com/ProtonMail/go-proton-api v0.4.1-0.20250627135952-bf973947255c github.com/ProtonMail/go-proton-api v0.4.1-0.20250717114555-7525f013ddc1
github.com/ProtonMail/gopenpgp/v2 v2.9.0-proton github.com/ProtonMail/gopenpgp/v2 v2.9.0-proton
github.com/PuerkitoBio/goquery v1.8.1 github.com/PuerkitoBio/goquery v1.8.1
github.com/abiosoft/ishell v2.0.0+incompatible github.com/abiosoft/ishell v2.0.0+incompatible

4
go.sum
View File

@ -45,8 +45,8 @@ github.com/ProtonMail/go-message v0.13.1-0.20240919135104-3bc88e6a9423 h1:p8nBDx
github.com/ProtonMail/go-message v0.13.1-0.20240919135104-3bc88e6a9423/go.mod h1:NBAn21zgCJ/52WLDyed18YvYFm5tEoeDauubFqLokM4= github.com/ProtonMail/go-message v0.13.1-0.20240919135104-3bc88e6a9423/go.mod h1:NBAn21zgCJ/52WLDyed18YvYFm5tEoeDauubFqLokM4=
github.com/ProtonMail/go-mime v0.0.0-20230322103455-7d82a3887f2f h1:tCbYj7/299ekTTXpdwKYF8eBlsYsDVoggDAuAjoK66k= github.com/ProtonMail/go-mime v0.0.0-20230322103455-7d82a3887f2f h1:tCbYj7/299ekTTXpdwKYF8eBlsYsDVoggDAuAjoK66k=
github.com/ProtonMail/go-mime v0.0.0-20230322103455-7d82a3887f2f/go.mod h1:gcr0kNtGBqin9zDW9GOHcVntrwnjrK+qdJ06mWYBybw= github.com/ProtonMail/go-mime v0.0.0-20230322103455-7d82a3887f2f/go.mod h1:gcr0kNtGBqin9zDW9GOHcVntrwnjrK+qdJ06mWYBybw=
github.com/ProtonMail/go-proton-api v0.4.1-0.20250627135952-bf973947255c h1:FhfHIrGgehnTV/T2NkyVauKcJ3NzPq1uLcU/0eK661A= github.com/ProtonMail/go-proton-api v0.4.1-0.20250717114555-7525f013ddc1 h1:Ryqa36leoJnZA1Ya4vfaH0rcTrrqjBuz9UG8HqCROrQ=
github.com/ProtonMail/go-proton-api v0.4.1-0.20250627135952-bf973947255c/go.mod h1:9t9+oQfH+6ssa7O2nLv34Uyjv8UmqTPGbVNcFToewck= github.com/ProtonMail/go-proton-api v0.4.1-0.20250717114555-7525f013ddc1/go.mod h1:9t9+oQfH+6ssa7O2nLv34Uyjv8UmqTPGbVNcFToewck=
github.com/ProtonMail/go-smtp v0.0.0-20231109081432-2b3d50599865 h1:EP1gnxLL5Z7xBSymE9nSTM27nRYINuvssAtDmG0suD8= github.com/ProtonMail/go-smtp v0.0.0-20231109081432-2b3d50599865 h1:EP1gnxLL5Z7xBSymE9nSTM27nRYINuvssAtDmG0suD8=
github.com/ProtonMail/go-smtp v0.0.0-20231109081432-2b3d50599865/go.mod h1:qm27SGYgoIPRot6ubfQ/GpiPy/g3PaZAVRxiO/sDUgQ= github.com/ProtonMail/go-smtp v0.0.0-20231109081432-2b3d50599865/go.mod h1:qm27SGYgoIPRot6ubfQ/GpiPy/g3PaZAVRxiO/sDUgQ=
github.com/ProtonMail/go-srp v0.0.7 h1:Sos3Qk+th4tQR64vsxGIxYpN3rdnG9Wf9K4ZloC1JrI= github.com/ProtonMail/go-srp v0.0.7 h1:Sos3Qk+th4tQR64vsxGIxYpN3rdnG9Wf9K4ZloC1JrI=

View File

@ -41,6 +41,7 @@ import (
"github.com/ProtonMail/proton-bridge/v3/internal/logging" "github.com/ProtonMail/proton-bridge/v3/internal/logging"
"github.com/ProtonMail/proton-bridge/v3/internal/platform" "github.com/ProtonMail/proton-bridge/v3/internal/platform"
"github.com/ProtonMail/proton-bridge/v3/internal/sentry" "github.com/ProtonMail/proton-bridge/v3/internal/sentry"
"github.com/ProtonMail/proton-bridge/v3/internal/services/observability"
"github.com/ProtonMail/proton-bridge/v3/internal/unleash" "github.com/ProtonMail/proton-bridge/v3/internal/unleash"
"github.com/ProtonMail/proton-bridge/v3/internal/useragent" "github.com/ProtonMail/proton-bridge/v3/internal/useragent"
"github.com/ProtonMail/proton-bridge/v3/internal/vault" "github.com/ProtonMail/proton-bridge/v3/internal/vault"
@ -292,53 +293,56 @@ func run(c *cli.Context) error {
return withSingleInstance(settings, locations.GetLockFile(), version, func() error { return withSingleInstance(settings, locations.GetLockFile(), version, func() error {
// Look for available keychains // Look for available keychains
return WithKeychainList(crashHandler, func(keychains *keychain.List) error { return WithKeychainList(crashHandler, func(keychains *keychain.List) error {
// Unlock the encrypted vault. // Pre-init the observability service, load the cached metrics.
return WithVault(reporter, locations, keychains, featureFlags, crashHandler, func(v *vault.Vault, insecure, corrupt bool) error { return observability.WithObservability(locations, func(obsService *observability.Service) error {
if !v.Migrated() { // Unlock the encrypted vault.
// Migrate old settings into the vault. return WithVault(reporter, locations, keychains, obsService, featureFlags, crashHandler, func(v *vault.Vault, insecure, corrupt bool) error {
if err := migrateOldSettings(v); err != nil { if !v.Migrated() {
logrus.WithError(err).Error("Failed to migrate old settings") // Migrate old settings into the vault.
} if err := migrateOldSettings(v); err != nil {
logrus.WithError(err).Error("Failed to migrate old settings")
// Migrate old accounts into the vault.
if err := migrateOldAccounts(locations, keychains, v); err != nil {
logrus.WithError(err).Error("Failed to migrate old accounts")
}
// The vault has been migrated.
if err := v.SetMigrated(); err != nil {
logrus.WithError(err).Error("Failed to mark vault as migrated")
}
}
logrus.WithFields(logrus.Fields{
"lastVersion": v.GetLastVersion().String(),
"showAllMail": v.GetShowAllMail(),
"updateCh": v.GetUpdateChannel(),
"autoUpdate": v.GetAutoUpdate(),
"rollout": v.GetUpdateRollout(),
"DoH": v.GetProxyAllowed(),
}).Info("Vault loaded")
// Load the cookies from the vault.
return withCookieJar(v, func(cookieJar http.CookieJar) error {
// Create a new bridge instance.
return withBridge(c, exe, locations, version, identifier, crashHandler, reporter, v, cookieJar, keychains, func(b *bridge.Bridge, eventCh <-chan events.Event) error {
if insecure {
logrus.Warn("The vault key could not be retrieved; the vault will not be encrypted")
b.PushError(bridge.ErrVaultInsecure)
} }
if corrupt { // Migrate old accounts into the vault.
logrus.Warn("The vault is corrupt and has been wiped") if err := migrateOldAccounts(locations, keychains, v); err != nil {
b.PushError(bridge.ErrVaultCorrupt) logrus.WithError(err).Error("Failed to migrate old accounts")
} }
// Remove old updates files // The vault has been migrated.
b.RemoveOldUpdates() if err := v.SetMigrated(); err != nil {
logrus.WithError(err).Error("Failed to mark vault as migrated")
}
}
// Run the frontend. logrus.WithFields(logrus.Fields{
return runFrontend(c, crashHandler, restarter, locations, b, eventCh, quitCh, c.Int(flagParentPID)) "lastVersion": v.GetLastVersion().String(),
"showAllMail": v.GetShowAllMail(),
"updateCh": v.GetUpdateChannel(),
"autoUpdate": v.GetAutoUpdate(),
"rollout": v.GetUpdateRollout(),
"DoH": v.GetProxyAllowed(),
}).Info("Vault loaded")
// Load the cookies from the vault.
return withCookieJar(v, func(cookieJar http.CookieJar) error {
// Create a new bridge instance.
return withBridge(c, exe, locations, version, identifier, obsService, crashHandler, reporter, v, cookieJar, keychains, func(b *bridge.Bridge, eventCh <-chan events.Event) error {
if insecure {
logrus.Warn("The vault key could not be retrieved; the vault will not be encrypted")
b.PushError(bridge.ErrVaultInsecure)
}
if corrupt {
logrus.Warn("The vault is corrupt and has been wiped")
b.PushError(bridge.ErrVaultCorrupt)
}
// Remove old updates files
b.RemoveOldUpdates()
// Run the frontend.
return runFrontend(c, crashHandler, restarter, locations, b, eventCh, quitCh, c.Int(flagParentPID))
})
}) })
}) })
}) })

View File

@ -33,6 +33,7 @@ import (
"github.com/ProtonMail/proton-bridge/v3/internal/events" "github.com/ProtonMail/proton-bridge/v3/internal/events"
"github.com/ProtonMail/proton-bridge/v3/internal/locations" "github.com/ProtonMail/proton-bridge/v3/internal/locations"
"github.com/ProtonMail/proton-bridge/v3/internal/sentry" "github.com/ProtonMail/proton-bridge/v3/internal/sentry"
"github.com/ProtonMail/proton-bridge/v3/internal/services/observability"
"github.com/ProtonMail/proton-bridge/v3/internal/updater" "github.com/ProtonMail/proton-bridge/v3/internal/updater"
"github.com/ProtonMail/proton-bridge/v3/internal/useragent" "github.com/ProtonMail/proton-bridge/v3/internal/useragent"
"github.com/ProtonMail/proton-bridge/v3/internal/vault" "github.com/ProtonMail/proton-bridge/v3/internal/vault"
@ -52,6 +53,7 @@ func withBridge(
locations *locations.Locations, locations *locations.Locations,
version *semver.Version, version *semver.Version,
identifier *useragent.UserAgent, identifier *useragent.UserAgent,
obsService *observability.Service,
crashHandler *crash.Handler, crashHandler *crash.Handler,
reporter *sentry.Reporter, reporter *sentry.Reporter,
vault *vault.Vault, vault *vault.Vault,
@ -100,6 +102,7 @@ func withBridge(
updater, updater,
version, version,
keychains, keychains,
obsService,
// The API stuff. // The API stuff.
constants.APIHost, constants.APIHost,

View File

@ -31,18 +31,20 @@ import (
"github.com/ProtonMail/proton-bridge/v3/internal/locations" "github.com/ProtonMail/proton-bridge/v3/internal/locations"
"github.com/ProtonMail/proton-bridge/v3/internal/platform" "github.com/ProtonMail/proton-bridge/v3/internal/platform"
"github.com/ProtonMail/proton-bridge/v3/internal/sentry" "github.com/ProtonMail/proton-bridge/v3/internal/sentry"
"github.com/ProtonMail/proton-bridge/v3/internal/services/observability"
"github.com/ProtonMail/proton-bridge/v3/internal/unleash" "github.com/ProtonMail/proton-bridge/v3/internal/unleash"
"github.com/ProtonMail/proton-bridge/v3/internal/vault" "github.com/ProtonMail/proton-bridge/v3/internal/vault"
"github.com/ProtonMail/proton-bridge/v3/internal/vault/observabilitymetrics"
"github.com/ProtonMail/proton-bridge/v3/pkg/keychain" "github.com/ProtonMail/proton-bridge/v3/pkg/keychain"
"github.com/sirupsen/logrus" "github.com/sirupsen/logrus"
) )
func WithVault(reporter *sentry.Reporter, locations *locations.Locations, keychains *keychain.List, featureFlags unleash.FeatureFlagStartupStore, panicHandler async.PanicHandler, fn func(*vault.Vault, bool, bool) error) error { func WithVault(reporter *sentry.Reporter, locations *locations.Locations, keychains *keychain.List, obsSender observability.BasicSender, featureFlags unleash.FeatureFlagStartupStore, panicHandler async.PanicHandler, fn func(*vault.Vault, bool, bool) error) error {
logrus.Debug("Creating vault") logrus.Debug("Creating vault")
defer logrus.Debug("Vault stopped") defer logrus.Debug("Vault stopped")
// Create the encVault. // Create the encVault.
encVault, insecure, corrupt, err := newVault(reporter, locations, keychains, featureFlags, panicHandler) encVault, insecure, corrupt, err := newVault(reporter, locations, keychains, obsSender, featureFlags, panicHandler)
if err != nil { if err != nil {
return fmt.Errorf("could not create vault: %w", err) return fmt.Errorf("could not create vault: %w", err)
} }
@ -64,7 +66,7 @@ func WithVault(reporter *sentry.Reporter, locations *locations.Locations, keycha
return fn(encVault, insecure, corrupt != nil) return fn(encVault, insecure, corrupt != nil)
} }
func newVault(reporter *sentry.Reporter, locations *locations.Locations, keychains *keychain.List, featureFlags unleash.FeatureFlagStartupStore, panicHandler async.PanicHandler) (*vault.Vault, bool, error, error) { func newVault(reporter *sentry.Reporter, locations *locations.Locations, keychains *keychain.List, obsSender observability.BasicSender, featureFlags unleash.FeatureFlagStartupStore, panicHandler async.PanicHandler) (*vault.Vault, bool, error, error) {
vaultDir, err := locations.ProvideSettingsPath() vaultDir, err := locations.ProvideSettingsPath()
if err != nil { if err != nil {
return nil, false, nil, fmt.Errorf("could not get vault dir: %w", err) return nil, false, nil, fmt.Errorf("could not get vault dir: %w", err)
@ -101,6 +103,9 @@ func newVault(reporter *sentry.Reporter, locations *locations.Locations, keychai
// We store the insecure vault in a separate directory // We store the insecure vault in a separate directory
vaultDir = path.Join(vaultDir, "insecure") vaultDir = path.Join(vaultDir, "insecure")
// Schedule the relevant observability metric for sending.
obsSender.AddMetrics(observabilitymetrics.GenerateVaultKeyFetchGenericErrorMetric())
} else { } else {
vaultKey = key vaultKey = key
lastUsedHelper = helper lastUsedHelper = helper
@ -114,9 +119,14 @@ func newVault(reporter *sentry.Reporter, locations *locations.Locations, keychai
userVault, corrupt, err := vault.New(vaultDir, gluonCacheDir, vaultKey, panicHandler) userVault, corrupt, err := vault.New(vaultDir, gluonCacheDir, vaultKey, panicHandler)
if err != nil { if err != nil {
obsSender.AddMetrics(observabilitymetrics.GenerateVaultCreationGenericErrorMetric())
return nil, false, corrupt, fmt.Errorf("could not create vault: %w", err) return nil, false, corrupt, fmt.Errorf("could not create vault: %w", err)
} }
if corrupt != nil {
obsSender.AddMetrics(observabilitymetrics.GenerateVaultCreationCorruptErrorMetric())
}
// Remember the last successfully used keychain on Linux and store that as the user preference. // Remember the last successfully used keychain on Linux and store that as the user preference.
if runtime.GOOS == platform.LINUX { if runtime.GOOS == platform.LINUX {
if err := vault.SetHelper(vaultDir, lastUsedHelper); err != nil { if err := vault.SetHelper(vaultDir, lastUsedHelper); err != nil {

View File

@ -168,6 +168,7 @@ func New(
updater Updater, // the updater to fetch and install updates updater Updater, // the updater to fetch and install updates
curVersion *semver.Version, // the current version of the bridge curVersion *semver.Version, // the current version of the bridge
keychains *keychain.List, // usable keychains keychains *keychain.List, // usable keychains
obsService *observability.Service,
apiURL string, // the URL of the API to use apiURL string, // the URL of the API to use
cookieJar http.CookieJar, // the cookie jar to use cookieJar http.CookieJar, // the cookie jar to use
@ -206,6 +207,7 @@ func New(
keychains, keychains,
panicHandler, panicHandler,
reporter, reporter,
obsService,
api, api,
identifier, identifier,
@ -242,6 +244,7 @@ func newBridge(
keychains *keychain.List, keychains *keychain.List,
panicHandler async.PanicHandler, panicHandler async.PanicHandler,
reporter reporter.Reporter, reporter reporter.Reporter,
obsService *observability.Service,
api *proton.Manager, api *proton.Manager,
identifier identifier.Identifier, identifier identifier.Identifier,
@ -275,7 +278,7 @@ func newBridge(
unleashService := unleash.NewBridgeService(ctx, api, locator, panicHandler, vault.GetFeatureFlagStickyKey()) unleashService := unleash.NewBridgeService(ctx, api, locator, panicHandler, vault.GetFeatureFlagStickyKey())
observabilityService := observability.NewService(ctx, panicHandler) obsService.Initialize(ctx, panicHandler)
bridge := &Bridge{ bridge := &Bridge{
vault: vault, vault: vault,
@ -317,11 +320,11 @@ func newBridge(
lastVersion: lastVersion, lastVersion: lastVersion,
tasks: tasks, tasks: tasks,
syncService: syncservice.NewService(panicHandler, observabilityService), syncService: syncservice.NewService(panicHandler, obsService),
unleashService: unleashService, unleashService: unleashService,
observabilityService: observabilityService, observabilityService: obsService,
notificationStore: notifications.NewStore(locator.ProvideNotificationsCachePath), notificationStore: notifications.NewStore(locator.ProvideNotificationsCachePath),
@ -336,7 +339,7 @@ func newBridge(
reporter, reporter,
uidValidityGenerator, uidValidityGenerator,
&bridgeIMAPSMTPTelemetry{b: bridge}, &bridgeIMAPSMTPTelemetry{b: bridge},
observabilityService, obsService,
unleashService, unleashService,
) )

View File

@ -45,6 +45,7 @@ import (
"github.com/ProtonMail/proton-bridge/v3/internal/focus" "github.com/ProtonMail/proton-bridge/v3/internal/focus"
"github.com/ProtonMail/proton-bridge/v3/internal/locations" "github.com/ProtonMail/proton-bridge/v3/internal/locations"
"github.com/ProtonMail/proton-bridge/v3/internal/services/imapsmtpserver" "github.com/ProtonMail/proton-bridge/v3/internal/services/imapsmtpserver"
"github.com/ProtonMail/proton-bridge/v3/internal/services/observability"
"github.com/ProtonMail/proton-bridge/v3/internal/unleash" "github.com/ProtonMail/proton-bridge/v3/internal/unleash"
"github.com/ProtonMail/proton-bridge/v3/internal/updater" "github.com/ProtonMail/proton-bridge/v3/internal/updater"
"github.com/ProtonMail/proton-bridge/v3/internal/user" "github.com/ProtonMail/proton-bridge/v3/internal/user"
@ -944,6 +945,7 @@ func withBridgeNoMocks(
mocks.Updater, mocks.Updater,
v2_3_0, v2_3_0,
keychain.NewTestKeychainsList(), keychain.NewTestKeychainsList(),
observability.NewTestService(),
// The API stuff. // The API stuff.
apiURL, apiURL,

View File

@ -95,7 +95,7 @@ func TestTLSSignedCertTrustedPublicKey(t *testing.T) {
_, dialer, _, checker, _ := createClientWithPinningDialer("") _, dialer, _, checker, _ := createClientWithPinningDialer("")
copyTrustedPins(checker) copyTrustedPins(checker)
checker.trustedPins = append(checker.trustedPins, `pin-sha256="FlvTPG/nIMKtOj9nelnEjujwSZ5EDyfiKYxZgbXREls="`) checker.trustedPins = append(checker.trustedPins, `pin-sha256="coQ/Z6I+kjMViHVis67UVQDyCzXa1IVEuTKwF8wR9uQ="`)
_, err := dialer.DialTLSContext(context.Background(), "tcp", "rsa4096.badssl.com:443") _, err := dialer.DialTLSContext(context.Background(), "tcp", "rsa4096.badssl.com:443")
r.NoError(t, err, "expected dial to succeed because public key is known and cert is signed by CA") r.NoError(t, err, "expected dial to succeed because public key is known and cert is signed by CA")
} }

View File

@ -215,6 +215,14 @@ func (l *Locations) ProvideUnleashStartupCachePath() (string, error) {
return l.getUnleashStartupCachePath(), nil return l.getUnleashStartupCachePath(), nil
} }
func (l *Locations) ProvideObservabilityMetricsCachePath() (string, error) {
if err := os.MkdirAll(l.getObservabilityMetricsCachePath(), 0o700); err != nil {
return "", err
}
return l.getObservabilityMetricsCachePath(), nil
}
func (l *Locations) getGluonCachePath() string { func (l *Locations) getGluonCachePath() string {
return filepath.Join(l.userData, "gluon") return filepath.Join(l.userData, "gluon")
} }
@ -257,6 +265,10 @@ func (l *Locations) getUnleashStartupCachePath() string {
return filepath.Join(l.userCache, "unleash_startup_cache") return filepath.Join(l.userCache, "unleash_startup_cache")
} }
func (l *Locations) getObservabilityMetricsCachePath() string {
return filepath.Join(l.userCache, "observability_cache")
}
// Clear removes everything except the lock and update files. // Clear removes everything except the lock and update files.
func (l *Locations) Clear(except ...string) error { func (l *Locations) Clear(except ...string) error {
return files.Remove( return files.Remove(

View File

@ -19,12 +19,17 @@ package observability
import ( import (
"context" "context"
"encoding/json"
"os"
"path/filepath"
"sync" "sync"
"time" "time"
"github.com/ProtonMail/gluon/async" "github.com/ProtonMail/gluon/async"
"github.com/ProtonMail/go-proton-api" "github.com/ProtonMail/go-proton-api"
"github.com/ProtonMail/proton-bridge/v3/internal/locations"
"github.com/ProtonMail/proton-bridge/v3/internal/services/telemetry" "github.com/ProtonMail/proton-bridge/v3/internal/services/telemetry"
"github.com/bradenaw/juniper/xslices"
"github.com/sirupsen/logrus" "github.com/sirupsen/logrus"
) )
@ -34,6 +39,7 @@ var throttleDuration = 5 * time.Second //nolint:gochecknoglobals
const ( const (
maxStorageSize = 5000 maxStorageSize = 5000
maxBatchSize = 1000 maxBatchSize = 1000
filename = "metric_cache.json"
) )
type client struct { type client struct {
@ -50,15 +56,23 @@ type Sender interface {
GetEmailClient() string GetEmailClient() string
} }
type BasicSender interface {
AddMetrics(metric ...proton.ObservabilityMetric)
}
type Service struct { type Service struct {
ctx context.Context ctx context.Context
cancel context.CancelFunc cancel context.CancelFunc
panicHandler async.PanicHandler panicHandler async.PanicHandler
cachePath string
lastDispatch time.Time lastDispatch time.Time
isDispatchScheduled bool isDispatchScheduled bool
wg sync.WaitGroup
signalDataArrived chan struct{} signalDataArrived chan struct{}
signalDispatch chan struct{} signalDispatch chan struct{}
@ -73,41 +87,70 @@ type Service struct {
distinctionUtility *distinctionUtility distinctionUtility *distinctionUtility
} }
func NewService(ctx context.Context, panicHandler async.PanicHandler) *Service { func newService() *Service {
ctx, cancel := context.WithCancel(ctx) return &Service{
ctx: context.Background(),
service := &Service{ metricStore: make([]proton.ObservabilityMetric, 0),
ctx: ctx, log: logrus.WithFields(logrus.Fields{"pkg": "observability"}),
cancel: cancel,
panicHandler: panicHandler,
lastDispatch: time.Now().Add(-throttleDuration),
signalDataArrived: make(chan struct{}, 1),
signalDispatch: make(chan struct{}, 1),
log: logrus.WithFields(logrus.Fields{"pkg": "observability"}),
metricStore: make([]proton.ObservabilityMetric, 0),
userClientStore: make(map[string]*client), userClientStore: make(map[string]*client),
} }
}
service.distinctionUtility = newDistinctionUtility(ctx, panicHandler, service) // NewTestService initializes a new basic observability service with the required struct fields.
// Should only be used for testing.
func NewTestService() *Service {
return newService()
}
return service func WithObservability(locations *locations.Locations, fn func(service *Service) error) error {
service := newService()
cacheDir, err := locations.ProvideObservabilityMetricsCachePath()
if err != nil {
service.log.WithError(err).Warn("Could not obtain cache path")
return fn(service)
}
cachePath := filepath.Clean(filepath.Join(cacheDir, filename))
service.cachePath = cachePath
service.readCacheFile()
defer service.writeCacheFile()
return fn(service)
}
// Initialize sets up the observability Service. If not initialized, the service will remain inactive and emit no metrics.
// Should exclusively be called during bridge set-up.
func (s *Service) Initialize(ctx context.Context, panicHandler async.PanicHandler) {
ctx, cancel := context.WithCancel(ctx)
s.ctx = ctx
s.cancel = cancel
s.panicHandler = panicHandler
s.lastDispatch = time.Now().Add(-throttleDuration)
s.signalDataArrived = make(chan struct{}, 1)
s.signalDispatch = make(chan struct{}, 1)
s.distinctionUtility = newDistinctionUtility(ctx, panicHandler, s)
} }
// Run starts the observability service goroutine. // Run starts the observability service goroutine.
// The function also sets some utility functions to a helper struct aimed at differentiating the amount of users sending metric updates. // The function also sets some utility functions to a helper struct aimed at differentiating the amount of users sending metric updates.
func (s *Service) Run(settingsGetter settingsGetter) { func (s *Service) Run(settingsGetter settingsGetter) {
s.log.Info("Starting service") if s.log != nil {
s.log.Info("Starting service")
}
s.distinctionUtility.setSettingsGetter(settingsGetter) s.distinctionUtility.setSettingsGetter(settingsGetter)
s.distinctionUtility.runHeartbeat() s.distinctionUtility.runHeartbeat()
s.wg.Add(1)
go func() { go func() {
defer s.wg.Done()
s.start() s.start()
}() }()
} }
@ -142,6 +185,62 @@ func (s *Service) start() {
} }
} }
func (s *Service) readCacheFile() {
if s.cachePath == "" {
return
}
file, err := os.Open(s.cachePath)
if err != nil {
s.log.WithError(err).Info("Unable to open cache file")
return
}
defer func(file *os.File) {
if err := file.Close(); err != nil {
s.log.WithError(err).Error("Unable to close cache file after read")
}
}(file)
s.withMetricStoreLock(func() {
if err = json.NewDecoder(file).Decode(&s.metricStore); err != nil {
s.log.WithError(err).Error("Unable to decode cache file")
}
// Since we omit marshalling the field, we need to explicitly overwrite it.
for i := range s.metricStore {
s.metricStore[i].ShouldCache = true
}
})
}
func (s *Service) writeCacheFile() {
if s.cachePath == "" {
return
}
file, err := os.Create(s.cachePath)
if err != nil {
s.log.WithError(err).Warn("Unable to create cache file")
}
defer func(file *os.File) {
if err := file.Close(); err != nil {
s.log.WithError(err).Error("Unable to close cache file after write")
}
}(file)
s.withMetricStoreLock(func() {
metricsToCache := xslices.Filter(s.metricStore, func(m proton.ObservabilityMetric) bool {
return m.ShouldCache
})
if err = json.NewEncoder(file).Encode(metricsToCache); err != nil {
s.log.WithError(err).Error("Unable to encode data to cache file")
}
})
}
func (s *Service) dispatchData() { func (s *Service) dispatchData() {
s.isDispatchScheduled = false // Only accessed via a single goroutine, so no mutexes. s.isDispatchScheduled = false // Only accessed via a single goroutine, so no mutexes.
if !s.haveMetricsAndClients() { if !s.haveMetricsAndClients() {
@ -237,6 +336,12 @@ func (s *Service) addMetrics(metric ...proton.ObservabilityMetric) {
s.sendSignal(s.signalDataArrived) s.sendSignal(s.signalDataArrived)
} }
func (s *Service) flushMetricsTest() {
s.withMetricStoreLock(func() {
s.metricStore = make([]proton.ObservabilityMetric, 0)
})
}
// addMetricsIfClients - will append a metric only if there are authenticated clients // addMetricsIfClients - will append a metric only if there are authenticated clients
// via which we can reach the endpoint. // via which we can reach the endpoint.
func (s *Service) addMetricsIfClients(metric ...proton.ObservabilityMetric) { func (s *Service) addMetricsIfClients(metric ...proton.ObservabilityMetric) {
@ -280,6 +385,7 @@ func (s *Service) Stop() {
s.log.Info("Stopping service") s.log.Info("Stopping service")
s.cancel() s.cancel()
s.wg.Wait()
close(s.signalDataArrived) close(s.signalDataArrived)
close(s.signalDispatch) close(s.signalDispatch)
} }

View File

@ -0,0 +1,151 @@
// Copyright (c) 2025 Proton AG
//
// This file is part of Proton Mail Bridge.
//
// Proton Mail Bridge is free software: you can redistribute it and/or modify
// it under the terms of the GNU General Public License as published by
// the Free Software Foundation, either version 3 of the License, or
// (at your option) any later version.
//
// Proton Mail Bridge is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU General Public License for more details.
//
// You should have received a copy of the GNU General Public License
// along with Proton Mail Bridge. If not, see <https://www.gnu.org/licenses/>.
package observability
import (
"context"
"path/filepath"
"testing"
"time"
"github.com/ProtonMail/go-proton-api"
"github.com/stretchr/testify/require"
)
func TestService_cacheFile_NoCachePath(t *testing.T) {
s := NewTestService()
s.readCacheFile()
s.writeCacheFile()
require.Empty(t, s.metricStore)
}
func TestService_cacheFile_ValidCachePath(t *testing.T) {
tempDir := t.TempDir()
cachePath := filepath.Join(tempDir, "test_cache.json")
s := NewTestService()
s.cachePath = cachePath
s.readCacheFile()
s.writeCacheFile()
require.Empty(t, s.metricStore)
}
func TestService_cacheFile_AllMetricsCacheable(t *testing.T) {
tempDir := t.TempDir()
cachePath := filepath.Clean(filepath.Join(tempDir, "test_cache.json"))
s := NewTestService()
s.cachePath = cachePath
s.ctx = context.Background()
testMetrics := []proton.ObservabilityMetric{
{
Name: "test1",
Version: 1,
Timestamp: time.Now().Unix(),
Data: nil,
ShouldCache: true,
},
{
Name: "test2",
Version: 2,
Timestamp: time.Now().Unix(),
Data: nil,
ShouldCache: true,
},
{
Name: "test3",
Version: 3,
Timestamp: time.Now().Unix(),
Data: nil,
ShouldCache: true,
},
}
s.readCacheFile()
require.Empty(t, s.metricStore)
s.addMetrics(testMetrics...)
require.Equal(t, s.metricStore, testMetrics)
s.writeCacheFile()
s.flushMetricsTest()
require.Empty(t, s.metricStore)
s.readCacheFile()
require.Equal(t, s.metricStore, testMetrics)
}
func TestService_cacheFile_SomeMetricsCacheable(t *testing.T) {
tempDir := t.TempDir()
cachePath := filepath.Clean(filepath.Join(tempDir, "test_cache.json"))
s := NewTestService()
s.cachePath = cachePath
s.ctx = context.Background()
testMetricsCacheable := []proton.ObservabilityMetric{
{
Name: "test1",
Version: 1,
Timestamp: time.Now().Unix(),
Data: nil,
ShouldCache: true,
},
{
Name: "test2",
Version: 2,
Timestamp: time.Now().Unix(),
Data: nil,
ShouldCache: true,
},
}
testMetricsNonCacheable := []proton.ObservabilityMetric{
{
Name: "test3",
Version: 3,
Timestamp: time.Now().Unix(),
},
{
Name: "test2",
Version: 2,
Timestamp: time.Now().Unix(),
},
}
s.readCacheFile()
require.Empty(t, s.metricStore)
s.addMetrics(testMetricsCacheable...)
s.addMetrics(testMetricsNonCacheable...)
require.Equal(t, s.metricStore, append(testMetricsCacheable, testMetricsNonCacheable...))
s.writeCacheFile()
s.flushMetricsTest()
require.Empty(t, s.metricStore)
s.readCacheFile()
require.Equal(t, s.metricStore, testMetricsCacheable)
}

View File

@ -168,7 +168,7 @@ func withUser(tb testing.TB, ctx context.Context, _ *server.Server, m *proton.Ma
nullSMTPServerManager, nullSMTPServerManager,
nullEventSubscription, nullEventSubscription,
nil, nil,
observability.NewService(context.Background(), nil), observability.NewTestService(),
"", "",
true, true,
notifications.NewStore(func() (string, error) { notifications.NewStore(func() (string, error) {

View File

@ -0,0 +1,56 @@
// Copyright (c) 2025 Proton AG
//
// This file is part of Proton Mail Bridge.
//
// Proton Mail Bridge is free software: you can redistribute it and/or modify
// it under the terms of the GNU General Public License as published by
// the Free Software Foundation, either version 3 of the License, or
// (at your option) any later version.
//
// Proton Mail Bridge is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU General Public License for more details.
//
// You should have received a copy of the GNU General Public License
// along with Proton Mail Bridge. If not, see <https://www.gnu.org/licenses/>.
package observabilitymetrics
import (
"time"
"github.com/ProtonMail/go-proton-api"
)
const (
vaultErrorsSchemaName = "bridge_vault_errors_total"
vaultErrorsSchemaVersion = 1
)
func generateVaultErrorObservabilityMetric(errorType string) proton.ObservabilityMetric {
return proton.ObservabilityMetric{
Name: vaultErrorsSchemaName,
Version: vaultErrorsSchemaVersion,
Timestamp: time.Now().Unix(),
ShouldCache: true,
Data: map[string]interface{}{
"Value": 1,
"Labels": map[string]string{
"errorType": errorType,
},
},
}
}
func GenerateVaultCreationCorruptErrorMetric() proton.ObservabilityMetric {
return generateVaultErrorObservabilityMetric("vaultCorrupt")
}
func GenerateVaultCreationGenericErrorMetric() proton.ObservabilityMetric {
return generateVaultErrorObservabilityMetric("vaultError")
}
func GenerateVaultKeyFetchGenericErrorMetric() proton.ObservabilityMetric {
return generateVaultErrorObservabilityMetric("keychainError")
}

View File

@ -39,6 +39,7 @@ import (
"github.com/ProtonMail/proton-bridge/v3/internal/events" "github.com/ProtonMail/proton-bridge/v3/internal/events"
frontend "github.com/ProtonMail/proton-bridge/v3/internal/frontend/grpc" frontend "github.com/ProtonMail/proton-bridge/v3/internal/frontend/grpc"
"github.com/ProtonMail/proton-bridge/v3/internal/service" "github.com/ProtonMail/proton-bridge/v3/internal/service"
"github.com/ProtonMail/proton-bridge/v3/internal/services/observability"
"github.com/ProtonMail/proton-bridge/v3/internal/useragent" "github.com/ProtonMail/proton-bridge/v3/internal/useragent"
"github.com/ProtonMail/proton-bridge/v3/internal/vault" "github.com/ProtonMail/proton-bridge/v3/internal/vault"
"github.com/ProtonMail/proton-bridge/v3/pkg/keychain" "github.com/ProtonMail/proton-bridge/v3/pkg/keychain"
@ -172,6 +173,7 @@ func (t *testCtx) initBridge() (<-chan events.Event, error) {
t.mocks.Updater, t.mocks.Updater,
t.version, t.version,
keychain.NewTestKeychainsList(), keychain.NewTestKeychainsList(),
observability.NewTestService(),
// API stuff // API stuff
t.api.GetHostURL(), t.api.GetHostURL(),

View File

@ -50,4 +50,8 @@ Feature: Bridge send remote notification observability metrics
And the user with username "[user:user1]" sends an SMTP send request observability metric And the user with username "[user:user1]" sends an SMTP send request observability metric
Then it succeeds Then it succeeds
Scenario: Test all possible Vault/Keychain related error observability metrics
When the user logs in with username "[user:user1]" and password "password"
And the user with username "[user:user1]" sends all possible vault or keychain related error observability metrics
Then it succeeds

View File

@ -28,6 +28,7 @@ import (
"github.com/ProtonMail/proton-bridge/v3/internal/services/observability/gluonmetrics" "github.com/ProtonMail/proton-bridge/v3/internal/services/observability/gluonmetrics"
smtpMetrics "github.com/ProtonMail/proton-bridge/v3/internal/services/smtp/observabilitymetrics" smtpMetrics "github.com/ProtonMail/proton-bridge/v3/internal/services/smtp/observabilitymetrics"
"github.com/ProtonMail/proton-bridge/v3/internal/services/syncservice/observabilitymetrics" "github.com/ProtonMail/proton-bridge/v3/internal/services/syncservice/observabilitymetrics"
vaultMetrics "github.com/ProtonMail/proton-bridge/v3/internal/vault/observabilitymetrics"
) )
// userHeartbeatPermutationsObservability corresponds to bridge_generic_user_heartbeat_total_v1.schema.json. // userHeartbeatPermutationsObservability corresponds to bridge_generic_user_heartbeat_total_v1.schema.json.
@ -234,3 +235,18 @@ func (s *scenario) GluonNewlyOpenedIMAPConnectionsExceedThreshold(username strin
return err return err
}) })
} }
func (s *scenario) VaultKeychainErrorsObservabilityMetrics(username string) error {
batch := proton.ObservabilityBatch{
Metrics: []proton.ObservabilityMetric{
vaultMetrics.GenerateVaultKeyFetchGenericErrorMetric(),
vaultMetrics.GenerateVaultCreationCorruptErrorMetric(),
vaultMetrics.GenerateVaultCreationGenericErrorMetric(),
},
}
return s.t.withClientPass(context.Background(), username, s.t.getUserByName(username).userPass, func(ctx context.Context, c *proton.Client) error {
err := c.SendObservabilityBatch(ctx, batch)
return err
})
}

View File

@ -244,6 +244,8 @@ func (s *scenario) steps(ctx *godog.ScenarioContext) {
ctx.Step(`^the user with username "([^"]*)" sends SMTP send success observability metric$`, s.SMTPSendSuccessObservabilityMetric) ctx.Step(`^the user with username "([^"]*)" sends SMTP send success observability metric$`, s.SMTPSendSuccessObservabilityMetric)
// SMTP submission metric // SMTP submission metric
ctx.Step(`^the user with username "([^"]*)" sends an SMTP send request observability metric$`, s.SMTPSendRequestObservabilityMetric) ctx.Step(`^the user with username "([^"]*)" sends an SMTP send request observability metric$`, s.SMTPSendRequestObservabilityMetric)
// Vault/Keychain related error metrics.
ctx.Step(`^the user with username "([^"]*)" sends all possible vault or keychain related error observability metrics$`, s.VaultKeychainErrorsObservabilityMetrics)
// Gluon related metrics // Gluon related metrics
ctx.Step(`^the user with username "([^"]*)" sends all possible gluon error observability metrics$`, s.testGluonErrorObservabilityMetrics) ctx.Step(`^the user with username "([^"]*)" sends all possible gluon error observability metrics$`, s.testGluonErrorObservabilityMetrics)

View File

@ -24,6 +24,7 @@ import (
"github.com/ProtonMail/gluon/async" "github.com/ProtonMail/gluon/async"
"github.com/ProtonMail/proton-bridge/v3/internal/app" "github.com/ProtonMail/proton-bridge/v3/internal/app"
"github.com/ProtonMail/proton-bridge/v3/internal/locations" "github.com/ProtonMail/proton-bridge/v3/internal/locations"
"github.com/ProtonMail/proton-bridge/v3/internal/services/observability"
"github.com/ProtonMail/proton-bridge/v3/internal/vault" "github.com/ProtonMail/proton-bridge/v3/internal/vault"
"github.com/ProtonMail/proton-bridge/v3/pkg/keychain" "github.com/ProtonMail/proton-bridge/v3/pkg/keychain"
"github.com/sirupsen/logrus" "github.com/sirupsen/logrus"
@ -62,7 +63,7 @@ func main() {
func getRollout(_ *cli.Context) error { func getRollout(_ *cli.Context) error {
return app.WithLocations(func(locations *locations.Locations) error { return app.WithLocations(func(locations *locations.Locations) error {
return app.WithKeychainList(async.NoopPanicHandler{}, func(keychains *keychain.List) error { return app.WithKeychainList(async.NoopPanicHandler{}, func(keychains *keychain.List) error {
return app.WithVault(nil, locations, keychains, make(map[string]bool), async.NoopPanicHandler{}, func(vault *vault.Vault, _, _ bool) error { return app.WithVault(nil, locations, keychains, observability.NewTestService(), make(map[string]bool), async.NoopPanicHandler{}, func(vault *vault.Vault, _, _ bool) error {
fmt.Println(vault.GetUpdateRollout()) fmt.Println(vault.GetUpdateRollout())
return nil return nil
}) })
@ -73,7 +74,7 @@ func getRollout(_ *cli.Context) error {
func setRollout(c *cli.Context) error { func setRollout(c *cli.Context) error {
return app.WithLocations(func(locations *locations.Locations) error { return app.WithLocations(func(locations *locations.Locations) error {
return app.WithKeychainList(async.NoopPanicHandler{}, func(keychains *keychain.List) error { return app.WithKeychainList(async.NoopPanicHandler{}, func(keychains *keychain.List) error {
return app.WithVault(nil, locations, keychains, make(map[string]bool), async.NoopPanicHandler{}, func(vault *vault.Vault, _, _ bool) error { return app.WithVault(nil, locations, keychains, observability.NewTestService(), make(map[string]bool), async.NoopPanicHandler{}, func(vault *vault.Vault, _, _ bool) error {
clamped := max(0.0, min(1.0, c.Float64("value"))) clamped := max(0.0, min(1.0, c.Float64("value")))
if err := vault.SetUpdateRollout(clamped); err != nil { if err := vault.SetUpdateRollout(clamped); err != nil {
return err return err

View File

@ -27,6 +27,7 @@ import (
"github.com/ProtonMail/gluon/async" "github.com/ProtonMail/gluon/async"
"github.com/ProtonMail/proton-bridge/v3/internal/app" "github.com/ProtonMail/proton-bridge/v3/internal/app"
"github.com/ProtonMail/proton-bridge/v3/internal/locations" "github.com/ProtonMail/proton-bridge/v3/internal/locations"
"github.com/ProtonMail/proton-bridge/v3/internal/services/observability"
"github.com/ProtonMail/proton-bridge/v3/internal/unleash" "github.com/ProtonMail/proton-bridge/v3/internal/unleash"
"github.com/ProtonMail/proton-bridge/v3/internal/vault" "github.com/ProtonMail/proton-bridge/v3/internal/vault"
"github.com/ProtonMail/proton-bridge/v3/pkg/keychain" "github.com/ProtonMail/proton-bridge/v3/pkg/keychain"
@ -53,7 +54,7 @@ func main() {
func readAction(c *cli.Context) error { func readAction(c *cli.Context) error {
return app.WithLocations(func(locations *locations.Locations) error { return app.WithLocations(func(locations *locations.Locations) error {
return app.WithKeychainList(async.NoopPanicHandler{}, func(keychains *keychain.List) error { return app.WithKeychainList(async.NoopPanicHandler{}, func(keychains *keychain.List) error {
return app.WithVault(nil, locations, keychains, make(unleash.FeatureFlagStartupStore), async.NoopPanicHandler{}, func(vault *vault.Vault, insecure, corrupt bool) error { return app.WithVault(nil, locations, keychains, observability.NewTestService(), make(unleash.FeatureFlagStartupStore), async.NoopPanicHandler{}, func(vault *vault.Vault, insecure, corrupt bool) error {
if _, err := os.Stdout.Write(vault.ExportJSON()); err != nil { if _, err := os.Stdout.Write(vault.ExportJSON()); err != nil {
return fmt.Errorf("failed to write vault: %w", err) return fmt.Errorf("failed to write vault: %w", err)
} }
@ -67,7 +68,7 @@ func readAction(c *cli.Context) error {
func writeAction(c *cli.Context) error { func writeAction(c *cli.Context) error {
return app.WithLocations(func(locations *locations.Locations) error { return app.WithLocations(func(locations *locations.Locations) error {
return app.WithKeychainList(async.NoopPanicHandler{}, func(keychains *keychain.List) error { return app.WithKeychainList(async.NoopPanicHandler{}, func(keychains *keychain.List) error {
return app.WithVault(nil, locations, keychains, make(unleash.FeatureFlagStartupStore), async.NoopPanicHandler{}, func(vault *vault.Vault, insecure, corrupt bool) error { return app.WithVault(nil, locations, keychains, observability.NewTestService(), make(unleash.FeatureFlagStartupStore), async.NoopPanicHandler{}, func(vault *vault.Vault, insecure, corrupt bool) error {
b, err := io.ReadAll(os.Stdin) b, err := io.ReadAll(os.Stdin)
if err != nil { if err != nil {
return fmt.Errorf("failed to read vault: %w", err) return fmt.Errorf("failed to read vault: %w", err)