forked from Silverfish/proton-bridge
feat(BRIDGE-150): Observability service modification; user distinction utility & heartbeat; various observbility metrics & relevant integration tests
This commit is contained in:
@ -160,10 +160,6 @@ func (s *childJob) onError(err error) {
|
||||
s.job.onError(err)
|
||||
}
|
||||
|
||||
func (s *childJob) userID() string {
|
||||
return s.job.userID
|
||||
}
|
||||
|
||||
func (s *childJob) chunkDivide(chunks [][]proton.FullMessage) []childJob {
|
||||
numChunks := len(chunks)
|
||||
|
||||
|
||||
@ -0,0 +1,67 @@
|
||||
// Copyright (c) 2024 Proton AG
|
||||
//
|
||||
// This file is part of Proton Mail Bridge.
|
||||
//
|
||||
// Proton Mail Bridge is free software: you can redistribute it and/or modify
|
||||
// it under the terms of the GNU General Public License as published by
|
||||
// the Free Software Foundation, either version 3 of the License, or
|
||||
// (at your option) any later version.
|
||||
//
|
||||
// Proton Mail Bridge is distributed in the hope that it will be useful,
|
||||
// but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
// GNU General Public License for more details.
|
||||
//
|
||||
// You should have received a copy of the GNU General Public License
|
||||
// along with Proton Mail Bridge. If not, see <https://www.gnu.org/licenses/>.
|
||||
|
||||
package observabilitymetrics
|
||||
|
||||
import (
|
||||
"time"
|
||||
|
||||
"github.com/ProtonMail/go-proton-api"
|
||||
)
|
||||
|
||||
const (
|
||||
errorCaseSchemaName = "bridge_sync_message_build_errors_total"
|
||||
errorCaseSchemaVersion = 1
|
||||
successCaseSchemaName = "bridge_sync_message_build_success_total"
|
||||
successCaseSchemaVersion = 1
|
||||
)
|
||||
|
||||
func generateStageBuildFailureObservabilityMetric(errorType string) proton.ObservabilityMetric {
|
||||
return proton.ObservabilityMetric{
|
||||
Name: errorCaseSchemaName,
|
||||
Version: errorCaseSchemaVersion,
|
||||
Timestamp: time.Now().Unix(),
|
||||
Data: map[string]interface{}{
|
||||
"Value": 1,
|
||||
"Labels": map[string]string{
|
||||
"errorType": errorType,
|
||||
},
|
||||
},
|
||||
}
|
||||
}
|
||||
|
||||
func GenerateNoUnlockedKeyringMetric() proton.ObservabilityMetric {
|
||||
return generateStageBuildFailureObservabilityMetric("noUnlockedKeyring")
|
||||
}
|
||||
|
||||
func GenerateFailedToBuildMetric() proton.ObservabilityMetric {
|
||||
return generateStageBuildFailureObservabilityMetric("failedToBuild")
|
||||
}
|
||||
|
||||
// GenerateMessageBuiltSuccessMetric - Maybe this is incorrect, I'm not sure how metrics with no labels
|
||||
// should be dealt with. The integration tests will tell us.
|
||||
func GenerateMessageBuiltSuccessMetric() proton.ObservabilityMetric {
|
||||
return proton.ObservabilityMetric{
|
||||
Name: successCaseSchemaName,
|
||||
Version: successCaseSchemaVersion,
|
||||
Timestamp: time.Now().Unix(),
|
||||
Data: map[string]interface{}{
|
||||
"Value": 1,
|
||||
"Labels": map[string]string{},
|
||||
},
|
||||
}
|
||||
}
|
||||
@ -21,7 +21,7 @@ import (
|
||||
"context"
|
||||
|
||||
"github.com/ProtonMail/gluon/async"
|
||||
"github.com/ProtonMail/gluon/reporter"
|
||||
"github.com/ProtonMail/proton-bridge/v3/internal/services/observability"
|
||||
)
|
||||
|
||||
// Service which mediates IMAP syncing in Bridge.
|
||||
@ -36,8 +36,9 @@ type Service struct {
|
||||
group *async.Group
|
||||
}
|
||||
|
||||
func NewService(reporter reporter.Reporter,
|
||||
func NewService(
|
||||
panicHandler async.PanicHandler,
|
||||
observabilitySender observability.Sender,
|
||||
) *Service {
|
||||
limits := newSyncLimits(2 * Gigabyte)
|
||||
|
||||
@ -50,7 +51,7 @@ func NewService(reporter reporter.Reporter,
|
||||
limits: limits,
|
||||
metadataStage: NewMetadataStage(metaCh, downloadCh, limits.DownloadRequestMem, panicHandler),
|
||||
downloadStage: NewDownloadStage(downloadCh, buildCh, limits.MaxParallelDownloads, panicHandler),
|
||||
buildStage: NewBuildStage(buildCh, applyCh, limits.MessageBuildMem, panicHandler, reporter),
|
||||
buildStage: NewBuildStage(buildCh, applyCh, limits.MessageBuildMem, panicHandler, observabilitySender),
|
||||
applyStage: NewApplyStage(applyCh),
|
||||
metaCh: metaCh,
|
||||
group: async.NewGroup(context.Background(), panicHandler),
|
||||
|
||||
@ -26,9 +26,10 @@ import (
|
||||
|
||||
"github.com/ProtonMail/gluon/async"
|
||||
"github.com/ProtonMail/gluon/logging"
|
||||
"github.com/ProtonMail/gluon/reporter"
|
||||
"github.com/ProtonMail/go-proton-api"
|
||||
"github.com/ProtonMail/gopenpgp/v2/crypto"
|
||||
"github.com/ProtonMail/proton-bridge/v3/internal/services/observability"
|
||||
obsMetrics "github.com/ProtonMail/proton-bridge/v3/internal/services/syncservice/observabilitymetrics"
|
||||
"github.com/bradenaw/juniper/parallel"
|
||||
"github.com/bradenaw/juniper/xslices"
|
||||
"github.com/sirupsen/logrus"
|
||||
@ -50,8 +51,10 @@ type BuildStage struct {
|
||||
maxBuildMem uint64
|
||||
|
||||
panicHandler async.PanicHandler
|
||||
reporter reporter.Reporter
|
||||
log *logrus.Entry
|
||||
|
||||
// Observability
|
||||
observabilitySender observability.Sender
|
||||
}
|
||||
|
||||
func NewBuildStage(
|
||||
@ -59,15 +62,15 @@ func NewBuildStage(
|
||||
output BuildStageOutput,
|
||||
maxBuildMem uint64,
|
||||
panicHandler async.PanicHandler,
|
||||
reporter reporter.Reporter,
|
||||
observabilitySender observability.Sender,
|
||||
) *BuildStage {
|
||||
return &BuildStage{
|
||||
input: input,
|
||||
output: output,
|
||||
maxBuildMem: maxBuildMem,
|
||||
log: logrus.WithField("sync-stage", "build"),
|
||||
panicHandler: panicHandler,
|
||||
reporter: reporter,
|
||||
input: input,
|
||||
output: output,
|
||||
maxBuildMem: maxBuildMem,
|
||||
log: logrus.WithField("sync-stage", "build"),
|
||||
panicHandler: panicHandler,
|
||||
observabilitySender: observabilitySender,
|
||||
}
|
||||
}
|
||||
|
||||
@ -147,35 +150,24 @@ func (b *BuildStage) run(ctx context.Context) {
|
||||
req.job.log.WithError(err).Error("Failed to add failed message ID")
|
||||
}
|
||||
|
||||
if err := b.reporter.ReportMessageWithContext("Failed to build message - no unlocked keyring (sync)", reporter.Context{
|
||||
"messageID": msg.ID,
|
||||
"userID": req.userID(),
|
||||
}); err != nil {
|
||||
req.job.log.WithError(err).Error("Failed to report message build error")
|
||||
}
|
||||
b.observabilitySender.AddDistinctMetrics(observability.SyncError, obsMetrics.GenerateNoUnlockedKeyringMetric())
|
||||
return BuildResult{}, nil
|
||||
}
|
||||
|
||||
res, err := req.job.messageBuilder.BuildMessage(req.job.labels, msg, kr, new(bytes.Buffer))
|
||||
if err != nil {
|
||||
req.job.log.WithError(err).WithField("msgID", msg.ID).Error("Failed to build message (syn)")
|
||||
req.job.log.WithError(err).WithField("msgID", msg.ID).Error("Failed to build message (sync)")
|
||||
|
||||
if err := req.job.state.AddFailedMessageID(req.getContext(), msg.ID); err != nil {
|
||||
req.job.log.WithError(err).Error("Failed to add failed message ID")
|
||||
}
|
||||
|
||||
if err := b.reporter.ReportMessageWithContext("Failed to build message (sync)", reporter.Context{
|
||||
"messageID": msg.ID,
|
||||
"error": err,
|
||||
"userID": req.userID(),
|
||||
}); err != nil {
|
||||
req.job.log.WithError(err).Error("Failed to report message build error")
|
||||
}
|
||||
|
||||
b.observabilitySender.AddDistinctMetrics(observability.SyncError, obsMetrics.GenerateFailedToBuildMetric())
|
||||
// We could sync a placeholder message here, but for now we skip it entirely.
|
||||
return BuildResult{}, nil
|
||||
}
|
||||
|
||||
b.observabilitySender.AddMetrics(obsMetrics.GenerateMessageBuiltSuccessMetric())
|
||||
return res, nil
|
||||
})
|
||||
if err != nil {
|
||||
|
||||
@ -24,10 +24,11 @@ import (
|
||||
|
||||
"github.com/ProtonMail/gluon/async"
|
||||
"github.com/ProtonMail/gluon/imap"
|
||||
"github.com/ProtonMail/gluon/reporter"
|
||||
"github.com/ProtonMail/go-proton-api"
|
||||
"github.com/ProtonMail/gopenpgp/v2/crypto"
|
||||
"github.com/ProtonMail/proton-bridge/v3/internal/bridge/mocks"
|
||||
"github.com/ProtonMail/proton-bridge/v3/internal/services/observability"
|
||||
obsMetrics "github.com/ProtonMail/proton-bridge/v3/internal/services/syncservice/observabilitymetrics"
|
||||
"github.com/bradenaw/juniper/xslices"
|
||||
"github.com/golang/mock/gomock"
|
||||
"github.com/stretchr/testify/require"
|
||||
@ -67,7 +68,6 @@ func TestBuildStage_SuccessRemovesFailedMessage(t *testing.T) {
|
||||
|
||||
input := NewChannelConsumerProducer[BuildRequest]()
|
||||
output := NewChannelConsumerProducer[ApplyRequest]()
|
||||
reporter := mocks.NewMockReporter(mockCtrl)
|
||||
|
||||
labels := getTestLabels()
|
||||
|
||||
@ -105,7 +105,10 @@ func TestBuildStage_SuccessRemovesFailedMessage(t *testing.T) {
|
||||
tj.messageBuilder.EXPECT().BuildMessage(gomock.Eq(labels), gomock.Eq(msg), gomock.Any(), gomock.Any()).Return(buildResult, nil)
|
||||
tj.state.EXPECT().RemFailedMessageID(gomock.Any(), gomock.Eq("MSG"))
|
||||
|
||||
stage := NewBuildStage(input, output, 1024, &async.NoopPanicHandler{}, reporter)
|
||||
observabilityService := mocks.NewMockObservabilitySender(mockCtrl)
|
||||
observabilityService.EXPECT().AddMetrics(obsMetrics.GenerateMessageBuiltSuccessMetric())
|
||||
|
||||
stage := NewBuildStage(input, output, 1024, &async.NoopPanicHandler{}, observabilityService)
|
||||
|
||||
go func() {
|
||||
stage.run(ctx)
|
||||
@ -125,7 +128,7 @@ func TestBuildStage_BuildFailureIsReportedButDoesNotCancelJob(t *testing.T) {
|
||||
|
||||
input := NewChannelConsumerProducer[BuildRequest]()
|
||||
output := NewChannelConsumerProducer[ApplyRequest]()
|
||||
mockReporter := mocks.NewMockReporter(mockCtrl)
|
||||
mockObservabilityService := mocks.NewMockObservabilitySender(mockCtrl)
|
||||
|
||||
labels := getTestLabels()
|
||||
|
||||
@ -156,15 +159,12 @@ func TestBuildStage_BuildFailureIsReportedButDoesNotCancelJob(t *testing.T) {
|
||||
|
||||
tj.messageBuilder.EXPECT().BuildMessage(gomock.Eq(labels), gomock.Eq(msg), gomock.Any(), gomock.Any()).Return(BuildResult{}, buildError)
|
||||
tj.state.EXPECT().AddFailedMessageID(gomock.Any(), gomock.Eq([]string{"MSG"}))
|
||||
mockReporter.EXPECT().ReportMessageWithContext(gomock.Any(), gomock.Eq(reporter.Context{
|
||||
"userID": "u",
|
||||
"messageID": "MSG",
|
||||
"error": buildError,
|
||||
})).Return(nil)
|
||||
|
||||
tj.syncReporter.EXPECT().OnProgress(gomock.Any(), gomock.Eq(int64(10)))
|
||||
|
||||
stage := NewBuildStage(input, output, 1024, &async.NoopPanicHandler{}, mockReporter)
|
||||
mockObservabilityService.EXPECT().AddDistinctMetrics(observability.SyncError, obsMetrics.GenerateNoUnlockedKeyringMetric())
|
||||
|
||||
stage := NewBuildStage(input, output, 1024, &async.NoopPanicHandler{}, mockObservabilityService)
|
||||
|
||||
go func() {
|
||||
stage.run(ctx)
|
||||
@ -183,7 +183,6 @@ func TestBuildStage_FailedToLocateKeyRingIsReportedButDoesNotFailBuild(t *testin
|
||||
|
||||
input := NewChannelConsumerProducer[BuildRequest]()
|
||||
output := NewChannelConsumerProducer[ApplyRequest]()
|
||||
mockReporter := mocks.NewMockReporter(mockCtrl)
|
||||
|
||||
labels := getTestLabels()
|
||||
|
||||
@ -209,14 +208,13 @@ func TestBuildStage_FailedToLocateKeyRingIsReportedButDoesNotFailBuild(t *testin
|
||||
tj.job.end()
|
||||
|
||||
tj.state.EXPECT().AddFailedMessageID(gomock.Any(), gomock.Eq([]string{"MSG"}))
|
||||
mockReporter.EXPECT().ReportMessageWithContext(gomock.Any(), gomock.Eq(reporter.Context{
|
||||
"userID": "u",
|
||||
"messageID": "MSG",
|
||||
})).Return(nil)
|
||||
|
||||
tj.syncReporter.EXPECT().OnProgress(gomock.Any(), gomock.Eq(int64(10)))
|
||||
|
||||
stage := NewBuildStage(input, output, 1024, &async.NoopPanicHandler{}, mockReporter)
|
||||
observabilitySender := mocks.NewMockObservabilitySender(mockCtrl)
|
||||
observabilitySender.EXPECT().AddDistinctMetrics(observability.SyncError)
|
||||
|
||||
stage := NewBuildStage(input, output, 1024, &async.NoopPanicHandler{}, observabilitySender)
|
||||
|
||||
go func() {
|
||||
stage.run(ctx)
|
||||
@ -235,7 +233,6 @@ func TestBuildStage_OtherErrorsFailJob(t *testing.T) {
|
||||
|
||||
input := NewChannelConsumerProducer[BuildRequest]()
|
||||
output := NewChannelConsumerProducer[ApplyRequest]()
|
||||
mockReporter := mocks.NewMockReporter(mockCtrl)
|
||||
|
||||
labels := getTestLabels()
|
||||
|
||||
@ -261,7 +258,7 @@ func TestBuildStage_OtherErrorsFailJob(t *testing.T) {
|
||||
childJob := tj.job.newChildJob("f", 10)
|
||||
tj.job.end()
|
||||
|
||||
stage := NewBuildStage(input, output, 1024, &async.NoopPanicHandler{}, mockReporter)
|
||||
stage := NewBuildStage(input, output, 1024, &async.NoopPanicHandler{}, mocks.NewMockObservabilitySender(mockCtrl))
|
||||
|
||||
go func() {
|
||||
stage.run(ctx)
|
||||
@ -283,7 +280,6 @@ func TestBuildStage_CancelledJobIsDiscarded(t *testing.T) {
|
||||
|
||||
input := NewChannelConsumerProducer[BuildRequest]()
|
||||
output := NewChannelConsumerProducer[ApplyRequest]()
|
||||
mockReporter := mocks.NewMockReporter(mockCtrl)
|
||||
|
||||
msg := proton.FullMessage{
|
||||
Message: proton.Message{
|
||||
@ -294,7 +290,7 @@ func TestBuildStage_CancelledJobIsDiscarded(t *testing.T) {
|
||||
},
|
||||
}
|
||||
|
||||
stage := NewBuildStage(input, output, 1024, &async.NoopPanicHandler{}, mockReporter)
|
||||
stage := NewBuildStage(input, output, 1024, &async.NoopPanicHandler{}, mocks.NewMockObservabilitySender(mockCtrl))
|
||||
|
||||
ctx, cancel := context.WithCancel(context.Background())
|
||||
|
||||
@ -327,7 +323,6 @@ func TestTask_EmptyInputDoesNotCrash(t *testing.T) {
|
||||
|
||||
input := NewChannelConsumerProducer[BuildRequest]()
|
||||
output := NewChannelConsumerProducer[ApplyRequest]()
|
||||
reporter := mocks.NewMockReporter(mockCtrl)
|
||||
|
||||
labels := getTestLabels()
|
||||
|
||||
@ -340,7 +335,7 @@ func TestTask_EmptyInputDoesNotCrash(t *testing.T) {
|
||||
childJob := tj.job.newChildJob("f", 10)
|
||||
tj.job.end()
|
||||
|
||||
stage := NewBuildStage(input, output, 1024, &async.NoopPanicHandler{}, reporter)
|
||||
stage := NewBuildStage(input, output, 1024, &async.NoopPanicHandler{}, mocks.NewMockObservabilitySender(mockCtrl))
|
||||
|
||||
go func() {
|
||||
stage.run(ctx)
|
||||
|
||||
Reference in New Issue
Block a user