GODT-1356 GODT-1302: Cache on disk concurency and API retries

- GODT-1302: Change maximum resty retries from 0 to 30
- GODT-1302: Make sure we are closing GetAttachmen io.ReadCloser on error
- GODT-1356: Do not use attachmentPool - it was useless anyway
- GODT-1356: Increase cache watcher limit to 10min
- GODT-1356: Start cache watcher right after start (do not wait first 10 min)
- GODT-1356: Limit number of buildJobs (memory allocation) in BuildAndCacheMessage
- Other: Pass context from job options (message builder) to fetcher (both message and attachments)
- Other: BuildJob contains same function as returned buildDone (proper map locking)
This commit is contained in:
Jakub
2021-10-19 17:08:12 +02:00
parent db7ead3901
commit a93a8e7be9
8 changed files with 118 additions and 48 deletions

View File

@ -32,6 +32,7 @@ import (
"github.com/ProtonMail/proton-bridge/internal/frontend/types" "github.com/ProtonMail/proton-bridge/internal/frontend/types"
"github.com/ProtonMail/proton-bridge/internal/imap" "github.com/ProtonMail/proton-bridge/internal/imap"
"github.com/ProtonMail/proton-bridge/internal/smtp" "github.com/ProtonMail/proton-bridge/internal/smtp"
"github.com/ProtonMail/proton-bridge/internal/store"
"github.com/ProtonMail/proton-bridge/internal/store/cache" "github.com/ProtonMail/proton-bridge/internal/store/cache"
"github.com/ProtonMail/proton-bridge/internal/updater" "github.com/ProtonMail/proton-bridge/internal/updater"
"github.com/ProtonMail/proton-bridge/pkg/message" "github.com/ProtonMail/proton-bridge/pkg/message"
@ -74,7 +75,7 @@ func run(b *base.Base, c *cli.Context) error { // nolint[funlen]
return err return err
} }
cache, err := loadCache(b) cache, err := loadMessageCache(b)
if err != nil { if err != nil {
return err return err
} }
@ -247,16 +248,20 @@ func checkAndHandleUpdate(u types.Updater, f frontend.Frontend, autoUpdate bool)
f.NotifySilentUpdateInstalled() f.NotifySilentUpdateInstalled()
} }
// NOTE(GODT-1158): How big should in-memory cache be? func loadMessageCache(b *base.Base) (cache.Cache, error) {
// NOTE(GODT-1158): How to handle cache location migration if user changes custom path?
func loadCache(b *base.Base) (cache.Cache, error) {
if !b.Settings.GetBool(settings.CacheEnabledKey) { if !b.Settings.GetBool(settings.CacheEnabledKey) {
return cache.NewInMemoryCache(100 * (1 << 20)), nil // Memory cache was estimated by empirical usage in past and it
// was set to 100MB.
// NOTE: This value must not be less than maximal size of one
// email (~30MB).
return cache.NewInMemoryCache(100 << 20), nil
} }
var compressor cache.Compressor var compressor cache.Compressor
// NOTE(GODT-1158): If user changes compression setting we have to nuke the cache. // NOTE(GODT-1158): Changing compression is not an option currently
// available for user but, if user changes compression setting we have
// to nuke the cache.
if b.Settings.GetBool(settings.CacheCompressionKey) { if b.Settings.GetBool(settings.CacheCompressionKey) {
compressor = &cache.GZipCompressor{} compressor = &cache.GZipCompressor{}
} else { } else {
@ -269,10 +274,15 @@ func loadCache(b *base.Base) (cache.Cache, error) {
path = customPath path = customPath
} else { } else {
path = b.Cache.GetDefaultMessageCacheDir() path = b.Cache.GetDefaultMessageCacheDir()
// Store path so it will allways persist if default location will be changed in new version. // Store path so it will allways persist if default location
// will be changed in new version.
b.Settings.Set(settings.CacheLocationKey, path) b.Settings.Set(settings.CacheLocationKey, path)
} }
// To prevent memory peaks we set maximal write concurency for store
// build jobs.
store.SetBuildAndCacheJobLimit(b.Settings.GetInt(settings.CacheConcurrencyWrite))
return cache.NewOnDiskCache(path, compressor, cache.Options{ return cache.NewOnDiskCache(path, compressor, cache.Options{
MinFreeAbs: uint64(b.Settings.GetInt(settings.CacheMinFreeAbsKey)), MinFreeAbs: uint64(b.Settings.GetInt(settings.CacheMinFreeAbsKey)),
MinFreeRat: b.Settings.GetFloat64(settings.CacheMinFreeRatKey), MinFreeRat: b.Settings.GetFloat64(settings.CacheMinFreeRatKey),

View File

@ -89,6 +89,24 @@ func (store *Store) clearCachePassphrase() error {
}) })
} }
// buildAndCacheJobs is used to limit the number of parallel background build
// jobs by using a buffered channel. When channel is blocking the go routines
// is running but the download didn't started yet and hence no space needs to
// be allocated. Once other instances are finished the job can continue. The
// bottleneck is `store.cache.Set` which can be take some time to write all
// downloaded bytes. Therefore, it is not effective to start fetching and
// building the message for more than maximum of possible parallel cache
// writers.
//
// Default buildAndCacheJobs vaule is 16, it can be changed by SetBuildAndCacheJobLimit.
var (
buildAndCacheJobs = make(chan struct{}, 16) //nolint[gochecknoglobals]
)
func SetBuildAndCacheJobLimit(maxJobs int) {
buildAndCacheJobs = make(chan struct{}, maxJobs)
}
func (store *Store) getCachedMessage(messageID string) ([]byte, error) { func (store *Store) getCachedMessage(messageID string) ([]byte, error) {
if store.cache.Has(store.user.ID(), messageID) { if store.cache.Has(store.user.ID(), messageID) {
return store.cache.Get(store.user.ID(), messageID) return store.cache.Get(store.user.ID(), messageID)
@ -118,6 +136,9 @@ func (store *Store) IsCached(messageID string) bool {
// BuildAndCacheMessage builds the given message (with background priority) and puts it in the cache. // BuildAndCacheMessage builds the given message (with background priority) and puts it in the cache.
// It builds with background priority. // It builds with background priority.
func (store *Store) BuildAndCacheMessage(messageID string) error { func (store *Store) BuildAndCacheMessage(messageID string) error {
buildAndCacheJobs <- struct{}{}
defer func() { <-buildAndCacheJobs }()
job, done := store.newBuildJob(messageID, message.BackgroundPriority) job, done := store.newBuildJob(messageID, message.BackgroundPriority)
defer done() defer done()

View File

@ -23,12 +23,10 @@ func (store *Store) StartWatcher() {
store.done = make(chan struct{}) store.done = make(chan struct{})
go func() { go func() {
ticker := time.NewTicker(3 * time.Minute) ticker := time.NewTicker(10 * time.Minute)
defer ticker.Stop() defer ticker.Stop()
for { for {
select {
case <-ticker.C:
// NOTE(GODT-1158): Race condition here? What if DB was already closed? // NOTE(GODT-1158): Race condition here? What if DB was already closed?
messageIDs, err := store.getAllMessageIDs() messageIDs, err := store.getAllMessageIDs()
if err != nil { if err != nil {
@ -41,8 +39,11 @@ func (store *Store) StartWatcher() {
} }
} }
select {
case <-store.done: case <-store.done:
return return
case <-ticker.C:
continue
} }
} }
}() }()

View File

@ -58,10 +58,10 @@ type Fetcher interface {
// The returned builder is ready to handle jobs -- see (*Builder).NewJob for more information. // The returned builder is ready to handle jobs -- see (*Builder).NewJob for more information.
// //
// Call (*Builder).Done to shut down the builder and stop all workers. // Call (*Builder).Done to shut down the builder and stop all workers.
func NewBuilder(fetchWorkers, attachWorkers int) *Builder { func NewBuilder(fetchWorkers, attachmentWorkers int) *Builder {
attacherPool := pool.New(attachWorkers, newAttacherWorkFunc()) attachmentPool := pool.New(attachmentWorkers, newAttacherWorkFunc())
fetcherPool := pool.New(fetchWorkers, newFetcherWorkFunc(attacherPool)) fetcherPool := pool.New(fetchWorkers, newFetcherWorkFunc(attachmentPool))
return &Builder{ return &Builder{
pool: fetcherPool, pool: fetcherPool,
@ -87,6 +87,7 @@ func (builder *Builder) NewJobWithOptions(ctx context.Context, fetcher Fetcher,
job, done := builder.pool.NewJob( job, done := builder.pool.NewJob(
&fetchReq{ &fetchReq{
ctx: ctx,
fetcher: fetcher, fetcher: fetcher,
messageID: messageID, messageID: messageID,
options: opts, options: opts,
@ -94,14 +95,7 @@ func (builder *Builder) NewJobWithOptions(ctx context.Context, fetcher Fetcher,
prio, prio,
) )
buildJob := &Job{ buildDone := func() {
Job: job,
done: done,
}
builder.jobs[messageID] = buildJob
return buildJob, func() {
builder.lock.Lock() builder.lock.Lock()
defer builder.lock.Unlock() defer builder.lock.Unlock()
@ -111,6 +105,15 @@ func (builder *Builder) NewJobWithOptions(ctx context.Context, fetcher Fetcher,
// And mark it as done. // And mark it as done.
done() done()
} }
buildJob := &Job{
Job: job,
done: buildDone,
}
builder.jobs[messageID] = buildJob
return buildJob, buildDone
} }
func (builder *Builder) Done() { func (builder *Builder) Done() {
@ -118,12 +121,14 @@ func (builder *Builder) Done() {
} }
type fetchReq struct { type fetchReq struct {
ctx context.Context
fetcher Fetcher fetcher Fetcher
messageID string messageID string
options JobOptions options JobOptions
} }
type attachReq struct { type attachReq struct {
ctx context.Context
fetcher Fetcher fetcher Fetcher
message *pmapi.Message message *pmapi.Message
} }
@ -143,6 +148,10 @@ func (job *Job) GetResult() ([]byte, error) {
return res.([]byte), nil return res.([]byte), nil
} }
// NOTE: This is not used because it is actually not doing what was expected: It
// downloads all the attachments which belongs to one message sequentially
// within one goroutine. We should have one job per one attachment. This doesn't look
// like a bottle neck right now.
func newAttacherWorkFunc() pool.WorkFunc { func newAttacherWorkFunc() pool.WorkFunc {
return func(payload interface{}, prio int) (interface{}, error) { return func(payload interface{}, prio int) (interface{}, error) {
req, ok := payload.(*attachReq) req, ok := payload.(*attachReq)
@ -153,7 +162,7 @@ func newAttacherWorkFunc() pool.WorkFunc {
res := make(map[string][]byte) res := make(map[string][]byte)
for _, att := range req.message.Attachments { for _, att := range req.message.Attachments {
rc, err := req.fetcher.GetAttachment(context.Background(), att.ID) rc, err := req.fetcher.GetAttachment(req.ctx, att.ID)
if err != nil { if err != nil {
return nil, err return nil, err
} }
@ -174,32 +183,43 @@ func newAttacherWorkFunc() pool.WorkFunc {
} }
} }
func newFetcherWorkFunc(attacherPool *pool.Pool) pool.WorkFunc { func newFetcherWorkFunc(attachmentPool *pool.Pool) pool.WorkFunc {
return func(payload interface{}, prio int) (interface{}, error) { return func(payload interface{}, prio int) (interface{}, error) {
req, ok := payload.(*fetchReq) req, ok := payload.(*fetchReq)
if !ok { if !ok {
panic("bad payload type") panic("bad payload type")
} }
msg, err := req.fetcher.GetMessage(context.Background(), req.messageID) msg, err := req.fetcher.GetMessage(req.ctx, req.messageID)
if err != nil { if err != nil {
return nil, err return nil, err
} }
attJob, attDone := attacherPool.NewJob(&attachReq{ attData := make(map[string][]byte)
fetcher: req.fetcher,
message: msg,
}, prio)
defer attDone()
val, err := attJob.GetResult() for _, att := range msg.Attachments {
// NOTE: Potential place for optimization:
// Use attachmentPool to download each attachment in
// separate parallel job. It is not straightforward
// because we need to make sure we call attachment-job-done
// function in case of any error or after we collect all
// attachment bytes asynchronously.
rc, err := req.fetcher.GetAttachment(req.ctx, att.ID)
if err != nil { if err != nil {
return nil, err return nil, err
} }
attData, ok := val.(map[string][]byte) b, err := ioutil.ReadAll(rc)
if !ok { if err != nil {
panic("bad response type") _ = rc.Close()
return nil, err
}
if err := rc.Close(); err != nil {
return nil, err
}
attData[att.ID] = b
} }
kr, err := req.fetcher.KeyRingForAddressID(msg.AddressID) kr, err := req.fetcher.KeyRingForAddressID(msg.AddressID)

View File

@ -84,7 +84,7 @@ func (c *client) r(ctx context.Context) (*resty.Request, error) {
return r, nil return r, nil
} }
// do executes fn and may repeate it in case "401 Unauthorized" error is returned. // do executes fn and may repeat execution in case of retry after "401 Unauthorized" error.
// Note: fn may be called more than once. // Note: fn may be called more than once.
func (c *client) do(ctx context.Context, fn func(*resty.Request) (*resty.Response, error)) (*resty.Response, error) { func (c *client) do(ctx context.Context, fn func(*resty.Request) (*resty.Response, error)) (*resty.Response, error) {
r, err := c.r(ctx) r, err := c.r(ctx)

View File

@ -66,6 +66,13 @@ func newManager(cfg Config) *manager {
m.rc.OnError(m.handleRequestFailure) m.rc.OnError(m.handleRequestFailure)
// Configure retry mechanism. // Configure retry mechanism.
//
// SetRetryCount(30): The most probable value of Retry-After is 1s (max
// 10s). Retrying up to 30 times will most probably cause a delay of
// 30s. The worst case scenario is 5min which is OK for background
// requests. We shuold use context to control a foreground requests
// which should be finish or fail sooner.
m.rc.SetRetryCount(30)
m.rc.SetRetryMaxWaitTime(time.Minute) m.rc.SetRetryMaxWaitTime(time.Minute)
m.rc.SetRetryAfter(catchRetryAfter) m.rc.SetRetryAfter(catchRetryAfter)
m.rc.AddRetryCondition(shouldRetry) m.rc.AddRetryCondition(shouldRetry)

View File

@ -34,6 +34,11 @@ const testForceUpgradeBody = `{
"Error":"Upgrade!" "Error":"Upgrade!"
}` }`
const testTooManyAPIRequests = `{
"Code":85131,
"Error":"Too many recent API requests"
}`
func TestHandleTooManyRequests(t *testing.T) { func TestHandleTooManyRequests(t *testing.T) {
var numCalls int var numCalls int
@ -42,6 +47,8 @@ func TestHandleTooManyRequests(t *testing.T) {
if numCalls < 5 { if numCalls < 5 {
w.WriteHeader(http.StatusTooManyRequests) w.WriteHeader(http.StatusTooManyRequests)
w.Header().Set("content-type", "application/json;charset=utf-8")
fmt.Fprint(w, testTooManyAPIRequests)
} else { } else {
w.WriteHeader(http.StatusOK) w.WriteHeader(http.StatusOK)
} }

View File

@ -129,6 +129,10 @@ func isTooManyRequest(res *resty.Response) bool {
} }
func isNoResponse(res *resty.Response, err error) bool { func isNoResponse(res *resty.Response, err error) bool {
// Do not retry TLS failures
if errors.Is(err, ErrTLSMismatch) {
return false
}
return res.RawResponse == nil && err != nil return res.RawResponse == nil && err != nil
} }