feat(BRIDGE-363): Observability metrics for IMAP connections; minor unleash service refactor;

This commit is contained in:
Atanas Janeshliev
2025-05-12 13:35:52 +02:00
parent a305ee1113
commit 89da7335b6
26 changed files with 334 additions and 64 deletions

View File

@ -19,6 +19,7 @@ package observability
import (
"github.com/ProtonMail/go-proton-api"
"github.com/ProtonMail/proton-bridge/v3/internal/services/observability/gluonmetrics"
)
type Adapter struct {
@ -88,6 +89,15 @@ func (adapter *Adapter) AddDistinctMetrics(errType interface{}, metrics ...map[s
}
if len(typedMetrics) > 0 {
adapter.sender.AddDistinctMetrics(DistinctionErrorTypeEnum(errTypeInt), typedMetrics...)
adapter.sender.AddDistinctMetrics(DistinctionMetricTypeEnum(errTypeInt), typedMetrics...)
}
}
func (adapter *Adapter) AddIMAPConnectionsExceededThresholdMetric(totalOpenIMAPConnections, newIMAPConnections int) {
metric := gluonmetrics.GenerateNewOpenedIMAPConnectionsExceedThreshold(
adapter.sender.GetEmailClient(),
BucketIMAPConnections(totalOpenIMAPConnections),
BucketIMAPConnections(newIMAPConnections))
adapter.sender.AddTimeLimitedMetric(NewIMAPConnectionsExceedThreshold, metric)
}

View File

@ -19,21 +19,22 @@ package observability
import "time"
// DistinctionErrorTypeEnum - maps to the specific error schema for which we
// want to send a user update.
type DistinctionErrorTypeEnum int
// DistinctionMetricTypeEnum - used to distinct specific metrics which we want to limit over some interval.
// Most enums are tied to a specific error schema for which we also send a specific distinction user update.
type DistinctionMetricTypeEnum int
const (
SyncError DistinctionErrorTypeEnum = iota
SyncError DistinctionMetricTypeEnum = iota
GluonImapError
GluonMessageError
GluonOtherError
SMTPError
EventLoopError // EventLoopError - should always be kept last when inserting new keys.
NewIMAPConnectionsExceedThreshold
)
// errorSchemaMap - maps between the DistinctionErrorTypeEnum and the relevant schema name.
var errorSchemaMap = map[DistinctionErrorTypeEnum]string{ //nolint:gochecknoglobals
// errorSchemaMap - maps between some DistinctionMetricTypeEnum and the relevant schema name.
var errorSchemaMap = map[DistinctionMetricTypeEnum]string{ //nolint:gochecknoglobals
SyncError: "bridge_sync_errors_users_total",
EventLoopError: "bridge_event_loop_events_errors_users_total",
GluonImapError: "bridge_gluon_imap_errors_users_total",
@ -43,9 +44,9 @@ var errorSchemaMap = map[DistinctionErrorTypeEnum]string{ //nolint:gochecknoglob
}
// createLastSentMap - needs to be updated whenever we make changes to the enum.
func createLastSentMap() map[DistinctionErrorTypeEnum]time.Time {
func createLastSentMap() map[DistinctionMetricTypeEnum]time.Time {
registerTime := time.Now().Add(-updateInterval)
lastSentMap := make(map[DistinctionErrorTypeEnum]time.Time)
lastSentMap := make(map[DistinctionMetricTypeEnum]time.Time)
for errType := SyncError; errType <= EventLoopError; errType++ {
lastSentMap[errType] = registerTime

View File

@ -40,7 +40,7 @@ type distinctionUtility struct {
panicHandler async.PanicHandler
lastSentMap map[DistinctionErrorTypeEnum]time.Time // Ensures we don't step over the limit of one user update every 5 mins.
lastSentMap map[DistinctionMetricTypeEnum]time.Time // Ensures we don't step over the limit of one user update every 5 mins.
observabilitySender observabilitySender
settingsGetter settingsGetter
@ -87,7 +87,7 @@ func (d *distinctionUtility) setSettingsGetter(getter settingsGetter) {
// checkAndUpdateLastSentMap - checks whether we have sent a relevant user update metric
// within the last 5 minutes.
func (d *distinctionUtility) checkAndUpdateLastSentMap(key DistinctionErrorTypeEnum) bool {
func (d *distinctionUtility) checkAndUpdateLastSentMap(key DistinctionMetricTypeEnum) bool {
curTime := time.Now()
val, ok := d.lastSentMap[key]
if !ok {
@ -107,7 +107,7 @@ func (d *distinctionUtility) checkAndUpdateLastSentMap(key DistinctionErrorTypeE
// and the relevant settings. In the future this will need to be expanded to support multiple
// versions of the metric if we ever decide to change them.
func (d *distinctionUtility) generateUserMetric(
metricType DistinctionErrorTypeEnum,
metricType DistinctionMetricTypeEnum,
) proton.ObservabilityMetric {
schemaName, ok := errorSchemaMap[metricType]
if !ok {
@ -138,7 +138,7 @@ func generateUserMetric(schemaName, plan, mailClient, dohEnabled, betaAccess str
}
}
func (d *distinctionUtility) generateDistinctMetrics(errType DistinctionErrorTypeEnum, metrics ...proton.ObservabilityMetric) []proton.ObservabilityMetric {
func (d *distinctionUtility) generateDistinctMetrics(errType DistinctionMetricTypeEnum, metrics ...proton.ObservabilityMetric) []proton.ObservabilityMetric {
d.updateHeartbeatData(errType)
if d.checkAndUpdateLastSentMap(errType) {

View File

@ -0,0 +1,45 @@
// Copyright (c) 2025 Proton AG
//
// This file is part of Proton Mail Bridge.
//
// Proton Mail Bridge is free software: you can redistribute it and/or modify
// it under the terms of the GNU General Public License as published by
// the Free Software Foundation, either version 3 of the License, or
// (at your option) any later version.
//
// Proton Mail Bridge is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU General Public License for more details.
//
// You should have received a copy of the GNU General Public License
// along with Proton Mail Bridge. If not, see <https://www.gnu.org/licenses/>.
package gluonmetrics
import (
"time"
"github.com/ProtonMail/go-proton-api"
)
const (
newIMAPConnectionThresholdExceededSchemaName = "bridge_imap_recently_opened_connections_total"
newIMAPConnectionThresholdExceededVersion = 1
)
func GenerateNewOpenedIMAPConnectionsExceedThreshold(emailClient, totalOpenIMAPConnectionCount, newlyOpenedIMAPConnectionCount string) proton.ObservabilityMetric {
return proton.ObservabilityMetric{
Name: newIMAPConnectionThresholdExceededSchemaName,
Version: newIMAPConnectionThresholdExceededVersion,
Timestamp: time.Now().Unix(),
Data: map[string]interface{}{
"Value": 1,
"Labels": map[string]string{
"mailClient": emailClient,
"numberOfOpenIMAPConnectionsBuckets": totalOpenIMAPConnectionCount,
"numberOfRecentlyOpenedIMAPConnectionsBuckets": newlyOpenedIMAPConnectionCount,
},
},
}
}

View File

@ -42,7 +42,7 @@ func (d *distinctionUtility) resetHeartbeatData() {
d.heartbeatData.receivedGluonError = false
}
func (d *distinctionUtility) updateHeartbeatData(errType DistinctionErrorTypeEnum) {
func (d *distinctionUtility) updateHeartbeatData(errType DistinctionMetricTypeEnum) {
d.withUpdateHeartbeatDataLock(func() {
//nolint:exhaustive
switch errType {

View File

@ -45,7 +45,9 @@ type client struct {
// so we can easily pass them down to relevant components.
type Sender interface {
AddMetrics(metrics ...proton.ObservabilityMetric)
AddDistinctMetrics(errType DistinctionErrorTypeEnum, metrics ...proton.ObservabilityMetric)
AddDistinctMetrics(errType DistinctionMetricTypeEnum, metrics ...proton.ObservabilityMetric)
AddTimeLimitedMetric(metricType DistinctionMetricTypeEnum, metric proton.ObservabilityMetric)
GetEmailClient() string
}
type Service struct {
@ -325,11 +327,25 @@ func (s *Service) AddMetrics(metrics ...proton.ObservabilityMetric) {
// what number of events come from what number of users.
// As the binning interval is what allows us to do this we
// should not send these if there are no logged-in users at that moment.
func (s *Service) AddDistinctMetrics(errType DistinctionErrorTypeEnum, metrics ...proton.ObservabilityMetric) {
func (s *Service) AddDistinctMetrics(errType DistinctionMetricTypeEnum, metrics ...proton.ObservabilityMetric) {
metrics = s.distinctionUtility.generateDistinctMetrics(errType, metrics...)
s.addMetricsIfClients(metrics...)
}
// AddTimeLimitedMetric - schedules a metric to be sent if a metric of the same type has not been sent within some interval.
// The interval is defined in the distinction utility.
func (s *Service) AddTimeLimitedMetric(metricType DistinctionMetricTypeEnum, metric proton.ObservabilityMetric) {
if !s.distinctionUtility.checkAndUpdateLastSentMap(metricType) {
return
}
s.addMetricsIfClients(metric)
}
func (s *Service) GetEmailClient() string {
return s.distinctionUtility.getEmailClientUserAgent()
}
// ModifyHeartbeatInterval - should only be used for testing. Resets the heartbeat ticker.
func (s *Service) ModifyHeartbeatInterval(duration time.Duration) {
s.distinctionUtility.heartbeatTicker.Reset(duration)

View File

@ -66,3 +66,30 @@ func getEnabled(value bool) string {
}
return "enabled"
}
func BucketIMAPConnections(val int) string {
switch {
case val < 10:
return "<10"
case val < 25:
return "10-24"
case val < 50:
return "25-49"
case val < 100:
return "50-99"
case val < 200:
return "100-199"
case val < 300:
return "200-299"
case val < 500:
return "300-499"
case val < 1000:
return "500-999"
case val < 2000:
return "1000-1999"
case val < 3000:
return "2000-2999"
default:
return "3000+"
}
}