forked from Silverfish/proton-bridge
feat(BRIDGE-396): Observability metrics for vault issues; Extension to observability service to support caching
This commit is contained in:
2
go.mod
2
go.mod
@ -9,7 +9,7 @@ require (
|
||||
github.com/Masterminds/semver/v3 v3.2.0
|
||||
github.com/ProtonMail/gluon v0.17.1-0.20250627102828-b014b7cc8132
|
||||
github.com/ProtonMail/go-autostart v0.0.0-20210130080809-00ed301c8e9a
|
||||
github.com/ProtonMail/go-proton-api v0.4.1-0.20250627135952-bf973947255c
|
||||
github.com/ProtonMail/go-proton-api v0.4.1-0.20250717114555-7525f013ddc1
|
||||
github.com/ProtonMail/gopenpgp/v2 v2.9.0-proton
|
||||
github.com/PuerkitoBio/goquery v1.8.1
|
||||
github.com/abiosoft/ishell v2.0.0+incompatible
|
||||
|
||||
4
go.sum
4
go.sum
@ -45,8 +45,8 @@ github.com/ProtonMail/go-message v0.13.1-0.20240919135104-3bc88e6a9423 h1:p8nBDx
|
||||
github.com/ProtonMail/go-message v0.13.1-0.20240919135104-3bc88e6a9423/go.mod h1:NBAn21zgCJ/52WLDyed18YvYFm5tEoeDauubFqLokM4=
|
||||
github.com/ProtonMail/go-mime v0.0.0-20230322103455-7d82a3887f2f h1:tCbYj7/299ekTTXpdwKYF8eBlsYsDVoggDAuAjoK66k=
|
||||
github.com/ProtonMail/go-mime v0.0.0-20230322103455-7d82a3887f2f/go.mod h1:gcr0kNtGBqin9zDW9GOHcVntrwnjrK+qdJ06mWYBybw=
|
||||
github.com/ProtonMail/go-proton-api v0.4.1-0.20250627135952-bf973947255c h1:FhfHIrGgehnTV/T2NkyVauKcJ3NzPq1uLcU/0eK661A=
|
||||
github.com/ProtonMail/go-proton-api v0.4.1-0.20250627135952-bf973947255c/go.mod h1:9t9+oQfH+6ssa7O2nLv34Uyjv8UmqTPGbVNcFToewck=
|
||||
github.com/ProtonMail/go-proton-api v0.4.1-0.20250717114555-7525f013ddc1 h1:Ryqa36leoJnZA1Ya4vfaH0rcTrrqjBuz9UG8HqCROrQ=
|
||||
github.com/ProtonMail/go-proton-api v0.4.1-0.20250717114555-7525f013ddc1/go.mod h1:9t9+oQfH+6ssa7O2nLv34Uyjv8UmqTPGbVNcFToewck=
|
||||
github.com/ProtonMail/go-smtp v0.0.0-20231109081432-2b3d50599865 h1:EP1gnxLL5Z7xBSymE9nSTM27nRYINuvssAtDmG0suD8=
|
||||
github.com/ProtonMail/go-smtp v0.0.0-20231109081432-2b3d50599865/go.mod h1:qm27SGYgoIPRot6ubfQ/GpiPy/g3PaZAVRxiO/sDUgQ=
|
||||
github.com/ProtonMail/go-srp v0.0.7 h1:Sos3Qk+th4tQR64vsxGIxYpN3rdnG9Wf9K4ZloC1JrI=
|
||||
|
||||
@ -41,6 +41,7 @@ import (
|
||||
"github.com/ProtonMail/proton-bridge/v3/internal/logging"
|
||||
"github.com/ProtonMail/proton-bridge/v3/internal/platform"
|
||||
"github.com/ProtonMail/proton-bridge/v3/internal/sentry"
|
||||
"github.com/ProtonMail/proton-bridge/v3/internal/services/observability"
|
||||
"github.com/ProtonMail/proton-bridge/v3/internal/unleash"
|
||||
"github.com/ProtonMail/proton-bridge/v3/internal/useragent"
|
||||
"github.com/ProtonMail/proton-bridge/v3/internal/vault"
|
||||
@ -292,53 +293,56 @@ func run(c *cli.Context) error {
|
||||
return withSingleInstance(settings, locations.GetLockFile(), version, func() error {
|
||||
// Look for available keychains
|
||||
return WithKeychainList(crashHandler, func(keychains *keychain.List) error {
|
||||
// Unlock the encrypted vault.
|
||||
return WithVault(reporter, locations, keychains, featureFlags, crashHandler, func(v *vault.Vault, insecure, corrupt bool) error {
|
||||
if !v.Migrated() {
|
||||
// Migrate old settings into the vault.
|
||||
if err := migrateOldSettings(v); err != nil {
|
||||
logrus.WithError(err).Error("Failed to migrate old settings")
|
||||
}
|
||||
|
||||
// Migrate old accounts into the vault.
|
||||
if err := migrateOldAccounts(locations, keychains, v); err != nil {
|
||||
logrus.WithError(err).Error("Failed to migrate old accounts")
|
||||
}
|
||||
|
||||
// The vault has been migrated.
|
||||
if err := v.SetMigrated(); err != nil {
|
||||
logrus.WithError(err).Error("Failed to mark vault as migrated")
|
||||
}
|
||||
}
|
||||
|
||||
logrus.WithFields(logrus.Fields{
|
||||
"lastVersion": v.GetLastVersion().String(),
|
||||
"showAllMail": v.GetShowAllMail(),
|
||||
"updateCh": v.GetUpdateChannel(),
|
||||
"autoUpdate": v.GetAutoUpdate(),
|
||||
"rollout": v.GetUpdateRollout(),
|
||||
"DoH": v.GetProxyAllowed(),
|
||||
}).Info("Vault loaded")
|
||||
|
||||
// Load the cookies from the vault.
|
||||
return withCookieJar(v, func(cookieJar http.CookieJar) error {
|
||||
// Create a new bridge instance.
|
||||
return withBridge(c, exe, locations, version, identifier, crashHandler, reporter, v, cookieJar, keychains, func(b *bridge.Bridge, eventCh <-chan events.Event) error {
|
||||
if insecure {
|
||||
logrus.Warn("The vault key could not be retrieved; the vault will not be encrypted")
|
||||
b.PushError(bridge.ErrVaultInsecure)
|
||||
// Pre-init the observability service, load the cached metrics.
|
||||
return observability.WithObservability(locations, func(obsService *observability.Service) error {
|
||||
// Unlock the encrypted vault.
|
||||
return WithVault(reporter, locations, keychains, obsService, featureFlags, crashHandler, func(v *vault.Vault, insecure, corrupt bool) error {
|
||||
if !v.Migrated() {
|
||||
// Migrate old settings into the vault.
|
||||
if err := migrateOldSettings(v); err != nil {
|
||||
logrus.WithError(err).Error("Failed to migrate old settings")
|
||||
}
|
||||
|
||||
if corrupt {
|
||||
logrus.Warn("The vault is corrupt and has been wiped")
|
||||
b.PushError(bridge.ErrVaultCorrupt)
|
||||
// Migrate old accounts into the vault.
|
||||
if err := migrateOldAccounts(locations, keychains, v); err != nil {
|
||||
logrus.WithError(err).Error("Failed to migrate old accounts")
|
||||
}
|
||||
|
||||
// Remove old updates files
|
||||
b.RemoveOldUpdates()
|
||||
// The vault has been migrated.
|
||||
if err := v.SetMigrated(); err != nil {
|
||||
logrus.WithError(err).Error("Failed to mark vault as migrated")
|
||||
}
|
||||
}
|
||||
|
||||
// Run the frontend.
|
||||
return runFrontend(c, crashHandler, restarter, locations, b, eventCh, quitCh, c.Int(flagParentPID))
|
||||
logrus.WithFields(logrus.Fields{
|
||||
"lastVersion": v.GetLastVersion().String(),
|
||||
"showAllMail": v.GetShowAllMail(),
|
||||
"updateCh": v.GetUpdateChannel(),
|
||||
"autoUpdate": v.GetAutoUpdate(),
|
||||
"rollout": v.GetUpdateRollout(),
|
||||
"DoH": v.GetProxyAllowed(),
|
||||
}).Info("Vault loaded")
|
||||
|
||||
// Load the cookies from the vault.
|
||||
return withCookieJar(v, func(cookieJar http.CookieJar) error {
|
||||
// Create a new bridge instance.
|
||||
return withBridge(c, exe, locations, version, identifier, obsService, crashHandler, reporter, v, cookieJar, keychains, func(b *bridge.Bridge, eventCh <-chan events.Event) error {
|
||||
if insecure {
|
||||
logrus.Warn("The vault key could not be retrieved; the vault will not be encrypted")
|
||||
b.PushError(bridge.ErrVaultInsecure)
|
||||
}
|
||||
|
||||
if corrupt {
|
||||
logrus.Warn("The vault is corrupt and has been wiped")
|
||||
b.PushError(bridge.ErrVaultCorrupt)
|
||||
}
|
||||
|
||||
// Remove old updates files
|
||||
b.RemoveOldUpdates()
|
||||
|
||||
// Run the frontend.
|
||||
return runFrontend(c, crashHandler, restarter, locations, b, eventCh, quitCh, c.Int(flagParentPID))
|
||||
})
|
||||
})
|
||||
})
|
||||
})
|
||||
|
||||
@ -33,6 +33,7 @@ import (
|
||||
"github.com/ProtonMail/proton-bridge/v3/internal/events"
|
||||
"github.com/ProtonMail/proton-bridge/v3/internal/locations"
|
||||
"github.com/ProtonMail/proton-bridge/v3/internal/sentry"
|
||||
"github.com/ProtonMail/proton-bridge/v3/internal/services/observability"
|
||||
"github.com/ProtonMail/proton-bridge/v3/internal/updater"
|
||||
"github.com/ProtonMail/proton-bridge/v3/internal/useragent"
|
||||
"github.com/ProtonMail/proton-bridge/v3/internal/vault"
|
||||
@ -52,6 +53,7 @@ func withBridge(
|
||||
locations *locations.Locations,
|
||||
version *semver.Version,
|
||||
identifier *useragent.UserAgent,
|
||||
obsService *observability.Service,
|
||||
crashHandler *crash.Handler,
|
||||
reporter *sentry.Reporter,
|
||||
vault *vault.Vault,
|
||||
@ -100,6 +102,7 @@ func withBridge(
|
||||
updater,
|
||||
version,
|
||||
keychains,
|
||||
obsService,
|
||||
|
||||
// The API stuff.
|
||||
constants.APIHost,
|
||||
|
||||
@ -31,18 +31,20 @@ import (
|
||||
"github.com/ProtonMail/proton-bridge/v3/internal/locations"
|
||||
"github.com/ProtonMail/proton-bridge/v3/internal/platform"
|
||||
"github.com/ProtonMail/proton-bridge/v3/internal/sentry"
|
||||
"github.com/ProtonMail/proton-bridge/v3/internal/services/observability"
|
||||
"github.com/ProtonMail/proton-bridge/v3/internal/unleash"
|
||||
"github.com/ProtonMail/proton-bridge/v3/internal/vault"
|
||||
"github.com/ProtonMail/proton-bridge/v3/internal/vault/observabilitymetrics"
|
||||
"github.com/ProtonMail/proton-bridge/v3/pkg/keychain"
|
||||
"github.com/sirupsen/logrus"
|
||||
)
|
||||
|
||||
func WithVault(reporter *sentry.Reporter, locations *locations.Locations, keychains *keychain.List, featureFlags unleash.FeatureFlagStartupStore, panicHandler async.PanicHandler, fn func(*vault.Vault, bool, bool) error) error {
|
||||
func WithVault(reporter *sentry.Reporter, locations *locations.Locations, keychains *keychain.List, obsSender observability.BasicSender, featureFlags unleash.FeatureFlagStartupStore, panicHandler async.PanicHandler, fn func(*vault.Vault, bool, bool) error) error {
|
||||
logrus.Debug("Creating vault")
|
||||
defer logrus.Debug("Vault stopped")
|
||||
|
||||
// Create the encVault.
|
||||
encVault, insecure, corrupt, err := newVault(reporter, locations, keychains, featureFlags, panicHandler)
|
||||
encVault, insecure, corrupt, err := newVault(reporter, locations, keychains, obsSender, featureFlags, panicHandler)
|
||||
if err != nil {
|
||||
return fmt.Errorf("could not create vault: %w", err)
|
||||
}
|
||||
@ -64,7 +66,7 @@ func WithVault(reporter *sentry.Reporter, locations *locations.Locations, keycha
|
||||
return fn(encVault, insecure, corrupt != nil)
|
||||
}
|
||||
|
||||
func newVault(reporter *sentry.Reporter, locations *locations.Locations, keychains *keychain.List, featureFlags unleash.FeatureFlagStartupStore, panicHandler async.PanicHandler) (*vault.Vault, bool, error, error) {
|
||||
func newVault(reporter *sentry.Reporter, locations *locations.Locations, keychains *keychain.List, obsSender observability.BasicSender, featureFlags unleash.FeatureFlagStartupStore, panicHandler async.PanicHandler) (*vault.Vault, bool, error, error) {
|
||||
vaultDir, err := locations.ProvideSettingsPath()
|
||||
if err != nil {
|
||||
return nil, false, nil, fmt.Errorf("could not get vault dir: %w", err)
|
||||
@ -101,6 +103,9 @@ func newVault(reporter *sentry.Reporter, locations *locations.Locations, keychai
|
||||
|
||||
// We store the insecure vault in a separate directory
|
||||
vaultDir = path.Join(vaultDir, "insecure")
|
||||
|
||||
// Schedule the relevant observability metric for sending.
|
||||
obsSender.AddMetrics(observabilitymetrics.GenerateVaultKeyFetchGenericErrorMetric())
|
||||
} else {
|
||||
vaultKey = key
|
||||
lastUsedHelper = helper
|
||||
@ -114,9 +119,14 @@ func newVault(reporter *sentry.Reporter, locations *locations.Locations, keychai
|
||||
|
||||
userVault, corrupt, err := vault.New(vaultDir, gluonCacheDir, vaultKey, panicHandler)
|
||||
if err != nil {
|
||||
obsSender.AddMetrics(observabilitymetrics.GenerateVaultCreationGenericErrorMetric())
|
||||
return nil, false, corrupt, fmt.Errorf("could not create vault: %w", err)
|
||||
}
|
||||
|
||||
if corrupt != nil {
|
||||
obsSender.AddMetrics(observabilitymetrics.GenerateVaultCreationCorruptErrorMetric())
|
||||
}
|
||||
|
||||
// Remember the last successfully used keychain on Linux and store that as the user preference.
|
||||
if runtime.GOOS == platform.LINUX {
|
||||
if err := vault.SetHelper(vaultDir, lastUsedHelper); err != nil {
|
||||
|
||||
@ -168,6 +168,7 @@ func New(
|
||||
updater Updater, // the updater to fetch and install updates
|
||||
curVersion *semver.Version, // the current version of the bridge
|
||||
keychains *keychain.List, // usable keychains
|
||||
obsService *observability.Service,
|
||||
|
||||
apiURL string, // the URL of the API to use
|
||||
cookieJar http.CookieJar, // the cookie jar to use
|
||||
@ -206,6 +207,7 @@ func New(
|
||||
keychains,
|
||||
panicHandler,
|
||||
reporter,
|
||||
obsService,
|
||||
|
||||
api,
|
||||
identifier,
|
||||
@ -242,6 +244,7 @@ func newBridge(
|
||||
keychains *keychain.List,
|
||||
panicHandler async.PanicHandler,
|
||||
reporter reporter.Reporter,
|
||||
obsService *observability.Service,
|
||||
|
||||
api *proton.Manager,
|
||||
identifier identifier.Identifier,
|
||||
@ -275,7 +278,7 @@ func newBridge(
|
||||
|
||||
unleashService := unleash.NewBridgeService(ctx, api, locator, panicHandler, vault.GetFeatureFlagStickyKey())
|
||||
|
||||
observabilityService := observability.NewService(ctx, panicHandler)
|
||||
obsService.Initialize(ctx, panicHandler)
|
||||
|
||||
bridge := &Bridge{
|
||||
vault: vault,
|
||||
@ -317,11 +320,11 @@ func newBridge(
|
||||
lastVersion: lastVersion,
|
||||
|
||||
tasks: tasks,
|
||||
syncService: syncservice.NewService(panicHandler, observabilityService),
|
||||
syncService: syncservice.NewService(panicHandler, obsService),
|
||||
|
||||
unleashService: unleashService,
|
||||
|
||||
observabilityService: observabilityService,
|
||||
observabilityService: obsService,
|
||||
|
||||
notificationStore: notifications.NewStore(locator.ProvideNotificationsCachePath),
|
||||
|
||||
@ -336,7 +339,7 @@ func newBridge(
|
||||
reporter,
|
||||
uidValidityGenerator,
|
||||
&bridgeIMAPSMTPTelemetry{b: bridge},
|
||||
observabilityService,
|
||||
obsService,
|
||||
unleashService,
|
||||
)
|
||||
|
||||
|
||||
@ -45,6 +45,7 @@ import (
|
||||
"github.com/ProtonMail/proton-bridge/v3/internal/focus"
|
||||
"github.com/ProtonMail/proton-bridge/v3/internal/locations"
|
||||
"github.com/ProtonMail/proton-bridge/v3/internal/services/imapsmtpserver"
|
||||
"github.com/ProtonMail/proton-bridge/v3/internal/services/observability"
|
||||
"github.com/ProtonMail/proton-bridge/v3/internal/unleash"
|
||||
"github.com/ProtonMail/proton-bridge/v3/internal/updater"
|
||||
"github.com/ProtonMail/proton-bridge/v3/internal/user"
|
||||
@ -944,6 +945,7 @@ func withBridgeNoMocks(
|
||||
mocks.Updater,
|
||||
v2_3_0,
|
||||
keychain.NewTestKeychainsList(),
|
||||
observability.NewTestService(),
|
||||
|
||||
// The API stuff.
|
||||
apiURL,
|
||||
|
||||
@ -95,7 +95,7 @@ func TestTLSSignedCertTrustedPublicKey(t *testing.T) {
|
||||
|
||||
_, dialer, _, checker, _ := createClientWithPinningDialer("")
|
||||
copyTrustedPins(checker)
|
||||
checker.trustedPins = append(checker.trustedPins, `pin-sha256="FlvTPG/nIMKtOj9nelnEjujwSZ5EDyfiKYxZgbXREls="`)
|
||||
checker.trustedPins = append(checker.trustedPins, `pin-sha256="coQ/Z6I+kjMViHVis67UVQDyCzXa1IVEuTKwF8wR9uQ="`)
|
||||
_, err := dialer.DialTLSContext(context.Background(), "tcp", "rsa4096.badssl.com:443")
|
||||
r.NoError(t, err, "expected dial to succeed because public key is known and cert is signed by CA")
|
||||
}
|
||||
|
||||
@ -215,6 +215,14 @@ func (l *Locations) ProvideUnleashStartupCachePath() (string, error) {
|
||||
return l.getUnleashStartupCachePath(), nil
|
||||
}
|
||||
|
||||
func (l *Locations) ProvideObservabilityMetricsCachePath() (string, error) {
|
||||
if err := os.MkdirAll(l.getObservabilityMetricsCachePath(), 0o700); err != nil {
|
||||
return "", err
|
||||
}
|
||||
|
||||
return l.getObservabilityMetricsCachePath(), nil
|
||||
}
|
||||
|
||||
func (l *Locations) getGluonCachePath() string {
|
||||
return filepath.Join(l.userData, "gluon")
|
||||
}
|
||||
@ -257,6 +265,10 @@ func (l *Locations) getUnleashStartupCachePath() string {
|
||||
return filepath.Join(l.userCache, "unleash_startup_cache")
|
||||
}
|
||||
|
||||
func (l *Locations) getObservabilityMetricsCachePath() string {
|
||||
return filepath.Join(l.userCache, "observability_cache")
|
||||
}
|
||||
|
||||
// Clear removes everything except the lock and update files.
|
||||
func (l *Locations) Clear(except ...string) error {
|
||||
return files.Remove(
|
||||
|
||||
@ -19,12 +19,17 @@ package observability
|
||||
|
||||
import (
|
||||
"context"
|
||||
"encoding/json"
|
||||
"os"
|
||||
"path/filepath"
|
||||
"sync"
|
||||
"time"
|
||||
|
||||
"github.com/ProtonMail/gluon/async"
|
||||
"github.com/ProtonMail/go-proton-api"
|
||||
"github.com/ProtonMail/proton-bridge/v3/internal/locations"
|
||||
"github.com/ProtonMail/proton-bridge/v3/internal/services/telemetry"
|
||||
"github.com/bradenaw/juniper/xslices"
|
||||
"github.com/sirupsen/logrus"
|
||||
)
|
||||
|
||||
@ -34,6 +39,7 @@ var throttleDuration = 5 * time.Second //nolint:gochecknoglobals
|
||||
const (
|
||||
maxStorageSize = 5000
|
||||
maxBatchSize = 1000
|
||||
filename = "metric_cache.json"
|
||||
)
|
||||
|
||||
type client struct {
|
||||
@ -50,15 +56,23 @@ type Sender interface {
|
||||
GetEmailClient() string
|
||||
}
|
||||
|
||||
type BasicSender interface {
|
||||
AddMetrics(metric ...proton.ObservabilityMetric)
|
||||
}
|
||||
|
||||
type Service struct {
|
||||
ctx context.Context
|
||||
cancel context.CancelFunc
|
||||
|
||||
panicHandler async.PanicHandler
|
||||
|
||||
cachePath string
|
||||
|
||||
lastDispatch time.Time
|
||||
isDispatchScheduled bool
|
||||
|
||||
wg sync.WaitGroup
|
||||
|
||||
signalDataArrived chan struct{}
|
||||
signalDispatch chan struct{}
|
||||
|
||||
@ -73,41 +87,70 @@ type Service struct {
|
||||
distinctionUtility *distinctionUtility
|
||||
}
|
||||
|
||||
func NewService(ctx context.Context, panicHandler async.PanicHandler) *Service {
|
||||
ctx, cancel := context.WithCancel(ctx)
|
||||
|
||||
service := &Service{
|
||||
ctx: ctx,
|
||||
cancel: cancel,
|
||||
|
||||
panicHandler: panicHandler,
|
||||
|
||||
lastDispatch: time.Now().Add(-throttleDuration),
|
||||
|
||||
signalDataArrived: make(chan struct{}, 1),
|
||||
signalDispatch: make(chan struct{}, 1),
|
||||
|
||||
log: logrus.WithFields(logrus.Fields{"pkg": "observability"}),
|
||||
|
||||
metricStore: make([]proton.ObservabilityMetric, 0),
|
||||
|
||||
func newService() *Service {
|
||||
return &Service{
|
||||
ctx: context.Background(),
|
||||
metricStore: make([]proton.ObservabilityMetric, 0),
|
||||
log: logrus.WithFields(logrus.Fields{"pkg": "observability"}),
|
||||
userClientStore: make(map[string]*client),
|
||||
}
|
||||
}
|
||||
|
||||
service.distinctionUtility = newDistinctionUtility(ctx, panicHandler, service)
|
||||
// NewTestService initializes a new basic observability service with the required struct fields.
|
||||
// Should only be used for testing.
|
||||
func NewTestService() *Service {
|
||||
return newService()
|
||||
}
|
||||
|
||||
return service
|
||||
func WithObservability(locations *locations.Locations, fn func(service *Service) error) error {
|
||||
service := newService()
|
||||
|
||||
cacheDir, err := locations.ProvideObservabilityMetricsCachePath()
|
||||
if err != nil {
|
||||
service.log.WithError(err).Warn("Could not obtain cache path")
|
||||
return fn(service)
|
||||
}
|
||||
|
||||
cachePath := filepath.Clean(filepath.Join(cacheDir, filename))
|
||||
service.cachePath = cachePath
|
||||
|
||||
service.readCacheFile()
|
||||
|
||||
defer service.writeCacheFile()
|
||||
|
||||
return fn(service)
|
||||
}
|
||||
|
||||
// Initialize sets up the observability Service. If not initialized, the service will remain inactive and emit no metrics.
|
||||
// Should exclusively be called during bridge set-up.
|
||||
func (s *Service) Initialize(ctx context.Context, panicHandler async.PanicHandler) {
|
||||
ctx, cancel := context.WithCancel(ctx)
|
||||
|
||||
s.ctx = ctx
|
||||
s.cancel = cancel
|
||||
s.panicHandler = panicHandler
|
||||
|
||||
s.lastDispatch = time.Now().Add(-throttleDuration)
|
||||
|
||||
s.signalDataArrived = make(chan struct{}, 1)
|
||||
s.signalDispatch = make(chan struct{}, 1)
|
||||
|
||||
s.distinctionUtility = newDistinctionUtility(ctx, panicHandler, s)
|
||||
}
|
||||
|
||||
// Run starts the observability service goroutine.
|
||||
// The function also sets some utility functions to a helper struct aimed at differentiating the amount of users sending metric updates.
|
||||
func (s *Service) Run(settingsGetter settingsGetter) {
|
||||
s.log.Info("Starting service")
|
||||
if s.log != nil {
|
||||
s.log.Info("Starting service")
|
||||
}
|
||||
|
||||
s.distinctionUtility.setSettingsGetter(settingsGetter)
|
||||
s.distinctionUtility.runHeartbeat()
|
||||
|
||||
s.wg.Add(1)
|
||||
go func() {
|
||||
defer s.wg.Done()
|
||||
s.start()
|
||||
}()
|
||||
}
|
||||
@ -142,6 +185,62 @@ func (s *Service) start() {
|
||||
}
|
||||
}
|
||||
|
||||
func (s *Service) readCacheFile() {
|
||||
if s.cachePath == "" {
|
||||
return
|
||||
}
|
||||
|
||||
file, err := os.Open(s.cachePath)
|
||||
if err != nil {
|
||||
s.log.WithError(err).Info("Unable to open cache file")
|
||||
return
|
||||
}
|
||||
|
||||
defer func(file *os.File) {
|
||||
if err := file.Close(); err != nil {
|
||||
s.log.WithError(err).Error("Unable to close cache file after read")
|
||||
}
|
||||
}(file)
|
||||
|
||||
s.withMetricStoreLock(func() {
|
||||
if err = json.NewDecoder(file).Decode(&s.metricStore); err != nil {
|
||||
s.log.WithError(err).Error("Unable to decode cache file")
|
||||
}
|
||||
|
||||
// Since we omit marshalling the field, we need to explicitly overwrite it.
|
||||
for i := range s.metricStore {
|
||||
s.metricStore[i].ShouldCache = true
|
||||
}
|
||||
})
|
||||
}
|
||||
|
||||
func (s *Service) writeCacheFile() {
|
||||
if s.cachePath == "" {
|
||||
return
|
||||
}
|
||||
|
||||
file, err := os.Create(s.cachePath)
|
||||
if err != nil {
|
||||
s.log.WithError(err).Warn("Unable to create cache file")
|
||||
}
|
||||
|
||||
defer func(file *os.File) {
|
||||
if err := file.Close(); err != nil {
|
||||
s.log.WithError(err).Error("Unable to close cache file after write")
|
||||
}
|
||||
}(file)
|
||||
|
||||
s.withMetricStoreLock(func() {
|
||||
metricsToCache := xslices.Filter(s.metricStore, func(m proton.ObservabilityMetric) bool {
|
||||
return m.ShouldCache
|
||||
})
|
||||
|
||||
if err = json.NewEncoder(file).Encode(metricsToCache); err != nil {
|
||||
s.log.WithError(err).Error("Unable to encode data to cache file")
|
||||
}
|
||||
})
|
||||
}
|
||||
|
||||
func (s *Service) dispatchData() {
|
||||
s.isDispatchScheduled = false // Only accessed via a single goroutine, so no mutexes.
|
||||
if !s.haveMetricsAndClients() {
|
||||
@ -237,6 +336,12 @@ func (s *Service) addMetrics(metric ...proton.ObservabilityMetric) {
|
||||
s.sendSignal(s.signalDataArrived)
|
||||
}
|
||||
|
||||
func (s *Service) flushMetricsTest() {
|
||||
s.withMetricStoreLock(func() {
|
||||
s.metricStore = make([]proton.ObservabilityMetric, 0)
|
||||
})
|
||||
}
|
||||
|
||||
// addMetricsIfClients - will append a metric only if there are authenticated clients
|
||||
// via which we can reach the endpoint.
|
||||
func (s *Service) addMetricsIfClients(metric ...proton.ObservabilityMetric) {
|
||||
@ -280,6 +385,7 @@ func (s *Service) Stop() {
|
||||
s.log.Info("Stopping service")
|
||||
|
||||
s.cancel()
|
||||
s.wg.Wait()
|
||||
close(s.signalDataArrived)
|
||||
close(s.signalDispatch)
|
||||
}
|
||||
|
||||
151
internal/services/observability/service_test.go
Normal file
151
internal/services/observability/service_test.go
Normal file
@ -0,0 +1,151 @@
|
||||
// Copyright (c) 2025 Proton AG
|
||||
//
|
||||
// This file is part of Proton Mail Bridge.
|
||||
//
|
||||
// Proton Mail Bridge is free software: you can redistribute it and/or modify
|
||||
// it under the terms of the GNU General Public License as published by
|
||||
// the Free Software Foundation, either version 3 of the License, or
|
||||
// (at your option) any later version.
|
||||
//
|
||||
// Proton Mail Bridge is distributed in the hope that it will be useful,
|
||||
// but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
// GNU General Public License for more details.
|
||||
//
|
||||
// You should have received a copy of the GNU General Public License
|
||||
// along with Proton Mail Bridge. If not, see <https://www.gnu.org/licenses/>.
|
||||
|
||||
package observability
|
||||
|
||||
import (
|
||||
"context"
|
||||
"path/filepath"
|
||||
"testing"
|
||||
"time"
|
||||
|
||||
"github.com/ProtonMail/go-proton-api"
|
||||
"github.com/stretchr/testify/require"
|
||||
)
|
||||
|
||||
func TestService_cacheFile_NoCachePath(t *testing.T) {
|
||||
s := NewTestService()
|
||||
s.readCacheFile()
|
||||
s.writeCacheFile()
|
||||
require.Empty(t, s.metricStore)
|
||||
}
|
||||
|
||||
func TestService_cacheFile_ValidCachePath(t *testing.T) {
|
||||
tempDir := t.TempDir()
|
||||
cachePath := filepath.Join(tempDir, "test_cache.json")
|
||||
|
||||
s := NewTestService()
|
||||
s.cachePath = cachePath
|
||||
|
||||
s.readCacheFile()
|
||||
s.writeCacheFile()
|
||||
require.Empty(t, s.metricStore)
|
||||
}
|
||||
|
||||
func TestService_cacheFile_AllMetricsCacheable(t *testing.T) {
|
||||
tempDir := t.TempDir()
|
||||
cachePath := filepath.Clean(filepath.Join(tempDir, "test_cache.json"))
|
||||
|
||||
s := NewTestService()
|
||||
s.cachePath = cachePath
|
||||
s.ctx = context.Background()
|
||||
|
||||
testMetrics := []proton.ObservabilityMetric{
|
||||
{
|
||||
Name: "test1",
|
||||
Version: 1,
|
||||
Timestamp: time.Now().Unix(),
|
||||
Data: nil,
|
||||
ShouldCache: true,
|
||||
},
|
||||
{
|
||||
|
||||
Name: "test2",
|
||||
Version: 2,
|
||||
Timestamp: time.Now().Unix(),
|
||||
Data: nil,
|
||||
ShouldCache: true,
|
||||
},
|
||||
{
|
||||
|
||||
Name: "test3",
|
||||
Version: 3,
|
||||
Timestamp: time.Now().Unix(),
|
||||
Data: nil,
|
||||
ShouldCache: true,
|
||||
},
|
||||
}
|
||||
|
||||
s.readCacheFile()
|
||||
require.Empty(t, s.metricStore)
|
||||
|
||||
s.addMetrics(testMetrics...)
|
||||
require.Equal(t, s.metricStore, testMetrics)
|
||||
|
||||
s.writeCacheFile()
|
||||
s.flushMetricsTest()
|
||||
require.Empty(t, s.metricStore)
|
||||
|
||||
s.readCacheFile()
|
||||
require.Equal(t, s.metricStore, testMetrics)
|
||||
}
|
||||
|
||||
func TestService_cacheFile_SomeMetricsCacheable(t *testing.T) {
|
||||
tempDir := t.TempDir()
|
||||
cachePath := filepath.Clean(filepath.Join(tempDir, "test_cache.json"))
|
||||
|
||||
s := NewTestService()
|
||||
s.cachePath = cachePath
|
||||
s.ctx = context.Background()
|
||||
|
||||
testMetricsCacheable := []proton.ObservabilityMetric{
|
||||
{
|
||||
Name: "test1",
|
||||
Version: 1,
|
||||
Timestamp: time.Now().Unix(),
|
||||
Data: nil,
|
||||
ShouldCache: true,
|
||||
},
|
||||
{
|
||||
|
||||
Name: "test2",
|
||||
Version: 2,
|
||||
Timestamp: time.Now().Unix(),
|
||||
Data: nil,
|
||||
ShouldCache: true,
|
||||
},
|
||||
}
|
||||
|
||||
testMetricsNonCacheable := []proton.ObservabilityMetric{
|
||||
{
|
||||
Name: "test3",
|
||||
Version: 3,
|
||||
Timestamp: time.Now().Unix(),
|
||||
},
|
||||
{
|
||||
|
||||
Name: "test2",
|
||||
Version: 2,
|
||||
Timestamp: time.Now().Unix(),
|
||||
},
|
||||
}
|
||||
|
||||
s.readCacheFile()
|
||||
require.Empty(t, s.metricStore)
|
||||
|
||||
s.addMetrics(testMetricsCacheable...)
|
||||
s.addMetrics(testMetricsNonCacheable...)
|
||||
|
||||
require.Equal(t, s.metricStore, append(testMetricsCacheable, testMetricsNonCacheable...))
|
||||
|
||||
s.writeCacheFile()
|
||||
s.flushMetricsTest()
|
||||
require.Empty(t, s.metricStore)
|
||||
|
||||
s.readCacheFile()
|
||||
require.Equal(t, s.metricStore, testMetricsCacheable)
|
||||
}
|
||||
@ -168,7 +168,7 @@ func withUser(tb testing.TB, ctx context.Context, _ *server.Server, m *proton.Ma
|
||||
nullSMTPServerManager,
|
||||
nullEventSubscription,
|
||||
nil,
|
||||
observability.NewService(context.Background(), nil),
|
||||
observability.NewTestService(),
|
||||
"",
|
||||
true,
|
||||
notifications.NewStore(func() (string, error) {
|
||||
|
||||
56
internal/vault/observabilitymetrics/metrics.go
Normal file
56
internal/vault/observabilitymetrics/metrics.go
Normal file
@ -0,0 +1,56 @@
|
||||
// Copyright (c) 2025 Proton AG
|
||||
//
|
||||
// This file is part of Proton Mail Bridge.
|
||||
//
|
||||
// Proton Mail Bridge is free software: you can redistribute it and/or modify
|
||||
// it under the terms of the GNU General Public License as published by
|
||||
// the Free Software Foundation, either version 3 of the License, or
|
||||
// (at your option) any later version.
|
||||
//
|
||||
// Proton Mail Bridge is distributed in the hope that it will be useful,
|
||||
// but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
// GNU General Public License for more details.
|
||||
//
|
||||
// You should have received a copy of the GNU General Public License
|
||||
// along with Proton Mail Bridge. If not, see <https://www.gnu.org/licenses/>.
|
||||
|
||||
package observabilitymetrics
|
||||
|
||||
import (
|
||||
"time"
|
||||
|
||||
"github.com/ProtonMail/go-proton-api"
|
||||
)
|
||||
|
||||
const (
|
||||
vaultErrorsSchemaName = "bridge_vault_errors_total"
|
||||
vaultErrorsSchemaVersion = 1
|
||||
)
|
||||
|
||||
func generateVaultErrorObservabilityMetric(errorType string) proton.ObservabilityMetric {
|
||||
return proton.ObservabilityMetric{
|
||||
Name: vaultErrorsSchemaName,
|
||||
Version: vaultErrorsSchemaVersion,
|
||||
Timestamp: time.Now().Unix(),
|
||||
ShouldCache: true,
|
||||
Data: map[string]interface{}{
|
||||
"Value": 1,
|
||||
"Labels": map[string]string{
|
||||
"errorType": errorType,
|
||||
},
|
||||
},
|
||||
}
|
||||
}
|
||||
|
||||
func GenerateVaultCreationCorruptErrorMetric() proton.ObservabilityMetric {
|
||||
return generateVaultErrorObservabilityMetric("vaultCorrupt")
|
||||
}
|
||||
|
||||
func GenerateVaultCreationGenericErrorMetric() proton.ObservabilityMetric {
|
||||
return generateVaultErrorObservabilityMetric("vaultError")
|
||||
}
|
||||
|
||||
func GenerateVaultKeyFetchGenericErrorMetric() proton.ObservabilityMetric {
|
||||
return generateVaultErrorObservabilityMetric("keychainError")
|
||||
}
|
||||
@ -39,6 +39,7 @@ import (
|
||||
"github.com/ProtonMail/proton-bridge/v3/internal/events"
|
||||
frontend "github.com/ProtonMail/proton-bridge/v3/internal/frontend/grpc"
|
||||
"github.com/ProtonMail/proton-bridge/v3/internal/service"
|
||||
"github.com/ProtonMail/proton-bridge/v3/internal/services/observability"
|
||||
"github.com/ProtonMail/proton-bridge/v3/internal/useragent"
|
||||
"github.com/ProtonMail/proton-bridge/v3/internal/vault"
|
||||
"github.com/ProtonMail/proton-bridge/v3/pkg/keychain"
|
||||
@ -172,6 +173,7 @@ func (t *testCtx) initBridge() (<-chan events.Event, error) {
|
||||
t.mocks.Updater,
|
||||
t.version,
|
||||
keychain.NewTestKeychainsList(),
|
||||
observability.NewTestService(),
|
||||
|
||||
// API stuff
|
||||
t.api.GetHostURL(),
|
||||
|
||||
@ -50,4 +50,8 @@ Feature: Bridge send remote notification observability metrics
|
||||
And the user with username "[user:user1]" sends an SMTP send request observability metric
|
||||
Then it succeeds
|
||||
|
||||
Scenario: Test all possible Vault/Keychain related error observability metrics
|
||||
When the user logs in with username "[user:user1]" and password "password"
|
||||
And the user with username "[user:user1]" sends all possible vault or keychain related error observability metrics
|
||||
Then it succeeds
|
||||
|
||||
|
||||
@ -28,6 +28,7 @@ import (
|
||||
"github.com/ProtonMail/proton-bridge/v3/internal/services/observability/gluonmetrics"
|
||||
smtpMetrics "github.com/ProtonMail/proton-bridge/v3/internal/services/smtp/observabilitymetrics"
|
||||
"github.com/ProtonMail/proton-bridge/v3/internal/services/syncservice/observabilitymetrics"
|
||||
vaultMetrics "github.com/ProtonMail/proton-bridge/v3/internal/vault/observabilitymetrics"
|
||||
)
|
||||
|
||||
// userHeartbeatPermutationsObservability corresponds to bridge_generic_user_heartbeat_total_v1.schema.json.
|
||||
@ -234,3 +235,18 @@ func (s *scenario) GluonNewlyOpenedIMAPConnectionsExceedThreshold(username strin
|
||||
return err
|
||||
})
|
||||
}
|
||||
|
||||
func (s *scenario) VaultKeychainErrorsObservabilityMetrics(username string) error {
|
||||
batch := proton.ObservabilityBatch{
|
||||
Metrics: []proton.ObservabilityMetric{
|
||||
vaultMetrics.GenerateVaultKeyFetchGenericErrorMetric(),
|
||||
vaultMetrics.GenerateVaultCreationCorruptErrorMetric(),
|
||||
vaultMetrics.GenerateVaultCreationGenericErrorMetric(),
|
||||
},
|
||||
}
|
||||
|
||||
return s.t.withClientPass(context.Background(), username, s.t.getUserByName(username).userPass, func(ctx context.Context, c *proton.Client) error {
|
||||
err := c.SendObservabilityBatch(ctx, batch)
|
||||
return err
|
||||
})
|
||||
}
|
||||
|
||||
@ -244,6 +244,8 @@ func (s *scenario) steps(ctx *godog.ScenarioContext) {
|
||||
ctx.Step(`^the user with username "([^"]*)" sends SMTP send success observability metric$`, s.SMTPSendSuccessObservabilityMetric)
|
||||
// SMTP submission metric
|
||||
ctx.Step(`^the user with username "([^"]*)" sends an SMTP send request observability metric$`, s.SMTPSendRequestObservabilityMetric)
|
||||
// Vault/Keychain related error metrics.
|
||||
ctx.Step(`^the user with username "([^"]*)" sends all possible vault or keychain related error observability metrics$`, s.VaultKeychainErrorsObservabilityMetrics)
|
||||
|
||||
// Gluon related metrics
|
||||
ctx.Step(`^the user with username "([^"]*)" sends all possible gluon error observability metrics$`, s.testGluonErrorObservabilityMetrics)
|
||||
|
||||
@ -24,6 +24,7 @@ import (
|
||||
"github.com/ProtonMail/gluon/async"
|
||||
"github.com/ProtonMail/proton-bridge/v3/internal/app"
|
||||
"github.com/ProtonMail/proton-bridge/v3/internal/locations"
|
||||
"github.com/ProtonMail/proton-bridge/v3/internal/services/observability"
|
||||
"github.com/ProtonMail/proton-bridge/v3/internal/vault"
|
||||
"github.com/ProtonMail/proton-bridge/v3/pkg/keychain"
|
||||
"github.com/sirupsen/logrus"
|
||||
@ -62,7 +63,7 @@ func main() {
|
||||
func getRollout(_ *cli.Context) error {
|
||||
return app.WithLocations(func(locations *locations.Locations) error {
|
||||
return app.WithKeychainList(async.NoopPanicHandler{}, func(keychains *keychain.List) error {
|
||||
return app.WithVault(nil, locations, keychains, make(map[string]bool), async.NoopPanicHandler{}, func(vault *vault.Vault, _, _ bool) error {
|
||||
return app.WithVault(nil, locations, keychains, observability.NewTestService(), make(map[string]bool), async.NoopPanicHandler{}, func(vault *vault.Vault, _, _ bool) error {
|
||||
fmt.Println(vault.GetUpdateRollout())
|
||||
return nil
|
||||
})
|
||||
@ -73,7 +74,7 @@ func getRollout(_ *cli.Context) error {
|
||||
func setRollout(c *cli.Context) error {
|
||||
return app.WithLocations(func(locations *locations.Locations) error {
|
||||
return app.WithKeychainList(async.NoopPanicHandler{}, func(keychains *keychain.List) error {
|
||||
return app.WithVault(nil, locations, keychains, make(map[string]bool), async.NoopPanicHandler{}, func(vault *vault.Vault, _, _ bool) error {
|
||||
return app.WithVault(nil, locations, keychains, observability.NewTestService(), make(map[string]bool), async.NoopPanicHandler{}, func(vault *vault.Vault, _, _ bool) error {
|
||||
clamped := max(0.0, min(1.0, c.Float64("value")))
|
||||
if err := vault.SetUpdateRollout(clamped); err != nil {
|
||||
return err
|
||||
|
||||
@ -27,6 +27,7 @@ import (
|
||||
"github.com/ProtonMail/gluon/async"
|
||||
"github.com/ProtonMail/proton-bridge/v3/internal/app"
|
||||
"github.com/ProtonMail/proton-bridge/v3/internal/locations"
|
||||
"github.com/ProtonMail/proton-bridge/v3/internal/services/observability"
|
||||
"github.com/ProtonMail/proton-bridge/v3/internal/unleash"
|
||||
"github.com/ProtonMail/proton-bridge/v3/internal/vault"
|
||||
"github.com/ProtonMail/proton-bridge/v3/pkg/keychain"
|
||||
@ -53,7 +54,7 @@ func main() {
|
||||
func readAction(c *cli.Context) error {
|
||||
return app.WithLocations(func(locations *locations.Locations) error {
|
||||
return app.WithKeychainList(async.NoopPanicHandler{}, func(keychains *keychain.List) error {
|
||||
return app.WithVault(nil, locations, keychains, make(unleash.FeatureFlagStartupStore), async.NoopPanicHandler{}, func(vault *vault.Vault, insecure, corrupt bool) error {
|
||||
return app.WithVault(nil, locations, keychains, observability.NewTestService(), make(unleash.FeatureFlagStartupStore), async.NoopPanicHandler{}, func(vault *vault.Vault, insecure, corrupt bool) error {
|
||||
if _, err := os.Stdout.Write(vault.ExportJSON()); err != nil {
|
||||
return fmt.Errorf("failed to write vault: %w", err)
|
||||
}
|
||||
@ -67,7 +68,7 @@ func readAction(c *cli.Context) error {
|
||||
func writeAction(c *cli.Context) error {
|
||||
return app.WithLocations(func(locations *locations.Locations) error {
|
||||
return app.WithKeychainList(async.NoopPanicHandler{}, func(keychains *keychain.List) error {
|
||||
return app.WithVault(nil, locations, keychains, make(unleash.FeatureFlagStartupStore), async.NoopPanicHandler{}, func(vault *vault.Vault, insecure, corrupt bool) error {
|
||||
return app.WithVault(nil, locations, keychains, observability.NewTestService(), make(unleash.FeatureFlagStartupStore), async.NoopPanicHandler{}, func(vault *vault.Vault, insecure, corrupt bool) error {
|
||||
b, err := io.ReadAll(os.Stdin)
|
||||
if err != nil {
|
||||
return fmt.Errorf("failed to read vault: %w", err)
|
||||
|
||||
Reference in New Issue
Block a user