We build too many walls and not enough bridges

2020-04-08 12:59:16 +02:00
commit 17f4d6097a
494 changed files with 62753 additions and 0 deletions
--- a/test/benchmarks/bench_results/human-table.py
+++ b/test/benchmarks/bench_results/human-table.py
@ -0,0 +1,104 @@
+#!/usr/bin/env python
+
+import glob
+import pandas as pd
+import re
+
+
+def print_header(report_file):
+    print('\n======== %s ========' %
+          (report_file.replace("./bench-", "").replace(".log", "")))
+
+
+rx_line = {
+    'exists': re.compile(r'.*Res[A-Za-z]?: [*] (?P<mails>\d+) EXISTS.*\n'),
+    'bench': re.compile(r'Benchmark(?P<name>[^ \t]+)[ \t]+(?P<rpts>\d+)[ \t]+(?P<ns>\d+) ns/op.*\n'),
+    # 'total' : re.compile(r'ok[ \t]+(?P<pkg>[^ \t]+)[ \t]+(?P<time>[^ \t\n]+)[ \t]*\n'),
+}
+
+
+def parse_line(line):
+    for key, rx in rx_line.items():
+        match = rx.search(line)
+        if match:
+            return key, match
+    # if there are no matches
+    return None, None
+
+
+rx_count = re.compile(r'Fetch/1:(?P<count>\d+)-')
+
+
+def parse_file(filepath):
+    data = []  # create an empty list to collect the data
+    # open the file and read through it line by line
+    with open(filepath, 'r') as file_object:
+        line = file_object.readline()
+        last_count = 0
+        mails = 1
+        while line:
+            # at each line check for a match with a regex
+            key, match = parse_line(line)
+            # print(line, key, match)
+            if key != None:
+                row = match.groupdict()
+                if key == 'exists':
+                    mails = int(row['mails'])
+                    last_count = 0
+                if key == 'bench':
+                    match = rx_count.search(row['name'])
+                    row['mails'] = mails - last_count
+                    if match:
+                        count = int(match.group('count'))
+                        if count < mails:
+                            row['mails'] = count - last_count
+                        last_count = count
+                    row['rpts'] = int(row['rpts'])
+                    row['ns'] = int(row['ns'])
+                    row['time/op'] = human_duration(row['ns'])
+                    if row['mails'] > 0:
+                        row['time/mails'] = human_duration(
+                            row['ns']/row['mails']
+                        )
+                    data.append(row)
+                if key == 'total':
+                    row['name'] = key
+                    data.append(row)
+            line = file_object.readline()
+
+    return data
+
+
+def human_duration(duration_ns):
+    unit = 'ns'
+    factor = 1.
+    unit_factors = [
+        ('us', 1.e3),
+        ('ms', 1.e3),
+        ('s ', 1.e3),
+        ('m ', 60.),
+        ('h ', 60.),
+        ('d ', 24.),
+        ('w ', 7.),
+        ('m ', 30./7.),
+        ('y ', 12.),
+    ]
+    for unit_factor in unit_factors:
+        if (abs(duration_ns) / factor / unit_factor[1]) < 1.0:
+            break
+        unit = unit_factor[0]
+        factor *= unit_factor[1]
+    return "%4.2f%s" % (duration_ns/factor, unit)
+
+
+def print_table(data):
+    data = pd.DataFrame(data)
+    data.set_index('name', inplace=True)
+    print(data)
+
+
+if __name__ == "__main__":
+    # for d in [ 0.5, 1, 2, 5, 1e3, 5e3, 1e4, 1e5, 1e6, 1e9, 2e9, 1e10, 1e11, 1e12, ]: print(human_duration(int(d)))
+    for report_file in glob.glob("./*.log"):
+        print_header(report_file)
+        print_table(parse_file(report_file))
--- a/test/benchmarks/bench_test.go
+++ b/test/benchmarks/bench_test.go
@ -0,0 +1,201 @@
+// Copyright (c) 2020 Proton Technologies AG
+//
+// This file is part of ProtonMail Bridge.Bridge.
+//
+// ProtonMail Bridge is free software: you can redistribute it and/or modify
+// it under the terms of the GNU General Public License as published by
+// the Free Software Foundation, either version 3 of the License, or
+// (at your option) any later version.
+//
+// ProtonMail Bridge is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+// GNU General Public License for more details.
+//
+// You should have received a copy of the GNU General Public License
+// along with ProtonMail Bridge.  If not, see <https://www.gnu.org/licenses/>.
+
+package benchmarks
+
+import (
+	"strings"
+	"testing"
+	"time"
+
+	"github.com/ProtonMail/proton-bridge/test/context"
+	"github.com/ProtonMail/proton-bridge/test/mocks"
+)
+
+func benchTestContext() (*context.TestContext, *mocks.IMAPClient) {
+	ctx := context.New()
+
+	username := "user"
+	account := ctx.GetTestAccount(username)
+	if account == nil {
+		panic("account " + username + " does not exist")
+	}
+
+	_ = ctx.GetPMAPIController().AddUser(account.User(), account.Addresses(), account.Password(), account.IsTwoFAEnabled())
+	if err := ctx.LoginUser(account.Username(), account.Password(), account.MailboxPassword()); err != nil {
+		panic(err)
+	}
+
+	imapClient := ctx.GetIMAPClient("client")
+	imapClient.Login(account.Address(), account.BridgePassword())
+
+	// waitForSync between bridge and API. There is no way to know precisely
+	// from the outside when the bridge is synced. We could wait for first
+	// response from any fetch, but we don't know how many messages should be
+	// there. Unless we hard code the number of messages.
+	// Please, check this time is enough when doing benchmarks and don't forget
+	// to exclude this time from total time.
+	time.Sleep(10 * time.Second)
+
+	return ctx, imapClient
+}
+
+func BenchmarkIMAPFetch(b *testing.B) {
+	tc, c := benchTestContext()
+	defer tc.Cleanup()
+
+	c.Select("All Mail").AssertOK()
+
+	fetchBench := []struct{ ids, args string }{
+		{"1:10", "rfc822.size"},
+		{"1:100", "rfc822.size"},
+		{"1:1000", "rfc822.size"},
+		{"1:*", "rfc822.size"},
+	}
+
+	for _, bd := range fetchBench {
+		ids, args := bd.ids, bd.args // pin
+		b.Run(ids+"-"+args, func(b *testing.B) {
+			for i := 0; i < b.N; i++ {
+				c.Fetch(ids, args)
+			}
+		})
+	}
+}
+
+func BenchmarkCachingFetch(b *testing.B) {
+	tc, c := benchTestContext()
+	defer tc.Cleanup()
+
+	c.Select("\"All Mail\"").AssertOK()
+
+	ids := "1:100"
+	args := "body.peek[]"
+	tries := []string{"long", "short"}
+
+	for _, try := range tries {
+		b.Run(strings.Join([]string{ids, args, try}, "-"), func(b *testing.B) {
+			for i := 0; i < b.N; i++ {
+				c.Fetch(ids, args)
+			}
+		})
+	}
+}
+
+func BenchmarkIMAPAppleMail(b *testing.B) {
+	tc, c := benchTestContext()
+	defer tc.Cleanup()
+
+	// assume we have at least 50 messages in INBOX
+	idRange := "1:50"
+	newUID := "50" // assume that Apple mail don't know about this mail
+
+	// I will use raw send command to completely reproduce the calls
+	// (including quotation and case sensitivity)
+	b.Run("default", func(b *testing.B) {
+		for i := 0; i < b.N; i++ {
+			for _, command := range []string{
+				"CAPABILITY",
+				"ID (" +
+					`"name" "Mac OS X Mail" ` +
+					`"version" "11.5 (3445.9.1)" ` +
+					`"os" "Mac OS X" ` +
+					`"os-version" "10.13.6 (17G3025)" ` +
+					`"vendor" "Apple Inc."` +
+					")",
+				`LIST "" ""`,
+				`STATUS INBOX (MESSAGES UIDNEXT UIDVALIDITY UNSEEN)`,
+				`SELECT INBOX`,
+				`FETCH ` + idRange + ` (FLAGS UID)`,
+				`FETCH ` + idRange + " " +
+					`(` +
+					`INTERNALDATE UID RFC822.SIZE FLAGS ` +
+					`BODY.PEEK[` +
+					`HEADER.FIELDS (` +
+					`date subject from to cc message-id in-reply-to references ` +
+					`x-priority x-uniform-type-identifier x-universally-unique-identifier ` +
+					`list-id list-unsubscribe` +
+					`)])`,
+				`UID FETCH ` + newUID + ` (BODYSTRUCTURE BODY.PEEK[HEADER])`,
+				// if email has attachment it is splitted to several fetches
+				//`UID FETCH 133 (BODY.PEEK[3]<0.5877469> BODY.PEEK[1] BODY.PEEK[2])`,
+				//`UID FETCH 133 BODY.PEEK[3]<5877469.2925661>`,
+				// here I will just use section download, which is used by AppleMail
+				`UID FETCH ` + newUID + ` BODY.PEEK[1]`,
+				// here I will just use partial download, which is used by AppleMail
+				`UID FETCH ` + newUID + ` BODY.PEEK[]<0.2000>`,
+			} {
+				c.SendCommand(command).AssertOK()
+			}
+		}
+	})
+}
+
+func BenchmarkIMAPOutlook(b *testing.B) {
+	tc, c := benchTestContext()
+	defer tc.Cleanup()
+
+	// assume we have at least 50 messages in INBOX
+	idRange := "1:50"
+
+	// I will use raw send command to completely reproduce the calls
+	// (including quotation and case sensitivity)
+	b.Run("default", func(b *testing.B) {
+		for i := 0; i < b.N; i++ {
+			for _, command := range []string{
+
+				/*
+					"ID ("+
+						`"name" "Mac OS X Mail" `+
+						`"version" "11.5 (3445.9.1)" `+
+						`"os" "Mac OS X" `+
+						`"os-version" "10.13.6 (17G3025)" `+
+						`"vendor" "Apple Inc."`+
+						")",
+				*/
+
+				`SELECT "INBOX"`,
+				`UID SEARCH ` + idRange + ` SINCE 01-Sep-2019`,
+				`UID FETCH 1:* (UID FLAGS)`,
+				`UID FETCH ` + idRange + ` (UID FLAGS RFC822.SIZE BODY.PEEK[] INTERNALDATE)`,
+			} {
+				c.SendCommand(command).AssertOK()
+			}
+		}
+	})
+}
+
+func BenchmarkIMAPThunderbird(b *testing.B) {
+	tc, c := benchTestContext()
+	defer tc.Cleanup()
+
+	// I will use raw send command to completely reproduce the calls
+	// (including quotation and case sensitivity)
+	b.Run("default", func(b *testing.B) {
+		for i := 0; i < b.N; i++ {
+			for _, command := range []string{
+				`capability`,
+				`ID ("name" "Thunderbird" "version" "68.2.0")`,
+				`select "INBOX"`,
+				`getquotaroot "INBOX"`,
+				`UID fetch 1:* (FLAGS)`,
+			} {
+				c.SendCommand(command).AssertOK()
+			}
+		}
+	})
+}