263 lines
12 KiB
Diff
263 lines
12 KiB
Diff
From 915a38faca33caf04cab2398a52d743dea554359 Mon Sep 17 00:00:00 2001
|
|
From: Michael Gratton <mike@vee.net>
|
|
Date: Fri, 11 Sep 2020 00:00:02 +1000
|
|
Subject: [PATCH 008/124] Geary.ImapDb.Account: Slice up search table
|
|
population work better
|
|
|
|
Although populating the search table had been broken up into batches
|
|
of 50 email, it was still search for and loading every single message
|
|
id in both the MessageTable and MessageSearchTable, doing a manual
|
|
join, and then updating the batch, for *each* batch, and in a RW
|
|
transaction.
|
|
|
|
Break this up so that the ids are loaded and joined only once, the
|
|
queries happens in a RO transaction, the manual join happens in a side
|
|
thread, leaving each RW transaction only having to load the messages
|
|
and update the search index for up to 50 messages.
|
|
---
|
|
src/engine/imap-db/imap-db-account.vala | 205 ++++++++++++++----------
|
|
1 file changed, 120 insertions(+), 85 deletions(-)
|
|
|
|
diff --git a/src/engine/imap-db/imap-db-account.vala b/src/engine/imap-db/imap-db-account.vala
|
|
index 99244dc2..54522b90 100644
|
|
--- a/src/engine/imap-db/imap-db-account.vala
|
|
+++ b/src/engine/imap-db/imap-db-account.vala
|
|
@@ -955,8 +955,78 @@ private class Geary.ImapDB.Account : BaseObject {
|
|
|
|
public async void populate_search_table(Cancellable? cancellable) {
|
|
debug("%s: Populating search table", account_information.id);
|
|
+ // Since all queries involved can be quite extensive and this
|
|
+ // is not a time-critical operation, split them up
|
|
+
|
|
+ var search_ids = new Gee.HashSet<int64?>(
|
|
+ Collection.int64_hash_func,
|
|
+ Collection.int64_equal_func
|
|
+ );
|
|
+ var message_ids = new Gee.HashSet<int64?>(
|
|
+ Collection.int64_hash_func,
|
|
+ Collection.int64_equal_func
|
|
+ );
|
|
+ var unindexed_message_ids = new Gee.HashSet<int64?>(
|
|
+ Collection.int64_hash_func,
|
|
+ Collection.int64_equal_func
|
|
+ );
|
|
+
|
|
try {
|
|
- while (!yield populate_search_table_batch_async(50, cancellable)) {
|
|
+ yield this.db.exec_transaction_async(
|
|
+ RO,
|
|
+ (cx, cancellable) => {
|
|
+ // Embedding a SELECT within a SELECT is painfully slow
|
|
+ // with SQLite, and a LEFT OUTER JOIN will still take in
|
|
+ // the order of seconds, so manually perform the operation
|
|
+
|
|
+ var result = cx.prepare(
|
|
+ "SELECT docid FROM MessageSearchTable"
|
|
+ ).exec(cancellable);
|
|
+ while (!result.finished) {
|
|
+ search_ids.add(result.rowid_at(0));
|
|
+ result.next(cancellable);
|
|
+ }
|
|
+
|
|
+ var stmt = cx.prepare(
|
|
+ "SELECT id FROM MessageTable WHERE (fields & ?) = ?"
|
|
+ );
|
|
+ stmt.bind_uint(0, Geary.ImapDB.Folder.REQUIRED_FTS_FIELDS);
|
|
+ stmt.bind_uint(1, Geary.ImapDB.Folder.REQUIRED_FTS_FIELDS);
|
|
+ result = stmt.exec(cancellable);
|
|
+ while (!result.finished) {
|
|
+ message_ids.add(result.rowid_at(0));
|
|
+ result.next(cancellable);
|
|
+ }
|
|
+
|
|
+ return DONE;
|
|
+ },
|
|
+ cancellable
|
|
+ );
|
|
+
|
|
+ // Run this in a separate thread since it could be quite a
|
|
+ // substantial process for large accounts
|
|
+ yield Nonblocking.Concurrent.global.schedule_async(
|
|
+ () => {
|
|
+ foreach (int64 message_id in message_ids) {
|
|
+ if (!search_ids.contains(message_id)) {
|
|
+ unindexed_message_ids.add(message_id);
|
|
+ }
|
|
+ }
|
|
+ },
|
|
+ cancellable
|
|
+ );
|
|
+
|
|
+ debug("%s: Found %d missing messages to populate",
|
|
+ this.account_information.id,
|
|
+ unindexed_message_ids.size
|
|
+ );
|
|
+
|
|
+ // Do the actual updating in batches since these require
|
|
+ // RW transactions
|
|
+ while (!unindexed_message_ids.is_empty) {
|
|
+ yield populate_search_table_batch_async(
|
|
+ 50, unindexed_message_ids, cancellable
|
|
+ );
|
|
// With multiple accounts, meaning multiple background threads
|
|
// doing such CPU- and disk-heavy work, this process can cause
|
|
// the main thread to slow to a crawl. This delay means the
|
|
@@ -965,105 +1035,70 @@ private class Geary.ImapDB.Account : BaseObject {
|
|
yield Geary.Scheduler.sleep_ms_async(50);
|
|
}
|
|
} catch (Error e) {
|
|
- debug("Error populating %s search table: %s", account_information.id, e.message);
|
|
+ debug("%s: Error populating search table: %s", account_information.id, e.message);
|
|
}
|
|
|
|
debug("%s: Done populating search table", account_information.id);
|
|
}
|
|
|
|
- private static Gee.HashSet<int64?> do_build_rowid_set(Db.Result result, Cancellable? cancellable)
|
|
- throws Error {
|
|
- Gee.HashSet<int64?> rowid_set = new Gee.HashSet<int64?>(Collection.int64_hash_func,
|
|
- Collection.int64_equal_func);
|
|
- while (!result.finished) {
|
|
- rowid_set.add(result.rowid_at(0));
|
|
- result.next(cancellable);
|
|
- }
|
|
-
|
|
- return rowid_set;
|
|
- }
|
|
-
|
|
- private async bool populate_search_table_batch_async(int limit, Cancellable? cancellable)
|
|
- throws Error {
|
|
+ private async void populate_search_table_batch_async(
|
|
+ int limit,
|
|
+ Gee.HashSet<int64?> unindexed_message_ids,
|
|
+ GLib.Cancellable? cancellable
|
|
+ ) throws GLib.Error {
|
|
check_open();
|
|
- debug("%s: Searching for up to %d missing indexed messages...", account_information.id,
|
|
- limit);
|
|
-
|
|
- int count = 0, total_unindexed = 0;
|
|
- yield db.exec_transaction_async(Db.TransactionType.RW, (cx, cancellable) => {
|
|
- // Embedding a SELECT within a SELECT is painfully slow
|
|
- // with SQLite, and a LEFT OUTER JOIN will still take in
|
|
- // the order of seconds, so manually perform the operation
|
|
|
|
- Db.Statement stmt = cx.prepare("""
|
|
- SELECT docid FROM MessageSearchTable
|
|
- """);
|
|
- Gee.HashSet<int64?> search_ids = do_build_rowid_set(stmt.exec(cancellable), cancellable);
|
|
-
|
|
- stmt = cx.prepare("""
|
|
- SELECT id FROM MessageTable WHERE (fields & ?) = ?
|
|
- """);
|
|
- stmt.bind_uint(0, Geary.ImapDB.Folder.REQUIRED_FTS_FIELDS);
|
|
- stmt.bind_uint(1, Geary.ImapDB.Folder.REQUIRED_FTS_FIELDS);
|
|
- Gee.HashSet<int64?> message_ids = do_build_rowid_set(stmt.exec(cancellable), cancellable);
|
|
-
|
|
- // This is hard to calculate correctly without doing a
|
|
- // join (which we should above, but is currently too
|
|
- // slow), and if we do get it wrong the progress monitor
|
|
- // will crash and burn, so just something too big to fail
|
|
- // for now. See Bug 776383.
|
|
- total_unindexed = message_ids.size;
|
|
-
|
|
- // chaff out any MessageTable entries not present in the MessageSearchTable ... since
|
|
- // we're given a limit, stuff messages req'ing search into separate set and stop when limit
|
|
- // reached
|
|
- Gee.HashSet<int64?> unindexed_message_ids = new Gee.HashSet<int64?>(Collection.int64_hash_func,
|
|
- Collection.int64_equal_func);
|
|
- foreach (int64 message_id in message_ids) {
|
|
- if (search_ids.contains(message_id))
|
|
- continue;
|
|
-
|
|
- unindexed_message_ids.add(message_id);
|
|
- if (unindexed_message_ids.size >= limit)
|
|
- break;
|
|
- }
|
|
-
|
|
- // For all remaining MessageTable rowid's, generate search table entry
|
|
- foreach (int64 message_id in unindexed_message_ids) {
|
|
- try {
|
|
- Geary.Email.Field search_fields = Geary.Email.REQUIRED_FOR_MESSAGE |
|
|
- Geary.Email.Field.ORIGINATORS | Geary.Email.Field.RECEIVERS |
|
|
- Geary.Email.Field.SUBJECT;
|
|
+ uint count = 0;
|
|
+ var iter = unindexed_message_ids.iterator();
|
|
+ yield this.db.exec_transaction_async(
|
|
+ RW,
|
|
+ (cx, cancellable) => {
|
|
+ while (iter.has_next() && count < limit) {
|
|
+ iter.next();
|
|
+ int64 message_id = iter.get();
|
|
+ try {
|
|
+ Email.Field search_fields = (
|
|
+ Email.REQUIRED_FOR_MESSAGE |
|
|
+ Email.Field.ORIGINATORS |
|
|
+ Email.Field.RECEIVERS |
|
|
+ Email.Field.SUBJECT
|
|
+ );
|
|
|
|
- Geary.Email.Field db_fields;
|
|
- MessageRow row = Geary.ImapDB.Folder.do_fetch_message_row(
|
|
- cx, message_id, search_fields, out db_fields, cancellable);
|
|
- Geary.Email email = row.to_email(new Geary.ImapDB.EmailIdentifier(message_id, null));
|
|
- Attachment.add_attachments(
|
|
- cx, this.db.attachments_path, email, message_id, cancellable
|
|
- );
|
|
+ Email.Field db_fields;
|
|
+ MessageRow row = Geary.ImapDB.Folder.do_fetch_message_row(
|
|
+ cx, message_id, search_fields, out db_fields, cancellable
|
|
+ );
|
|
+ Email email = row.to_email(
|
|
+ new Geary.ImapDB.EmailIdentifier(message_id, null)
|
|
+ );
|
|
+ Attachment.add_attachments(
|
|
+ cx, this.db.attachments_path, email, message_id, cancellable
|
|
+ );
|
|
+ Geary.ImapDB.Folder.do_add_email_to_search_table(
|
|
+ cx, message_id, email, cancellable
|
|
+ );
|
|
+ } catch (GLib.Error e) {
|
|
+ // This is a somewhat serious issue since we rely on
|
|
+ // there always being a row in the search table for
|
|
+ // every message.
|
|
+ warning(
|
|
+ "Error populating message %s for indexing: %s",
|
|
+ message_id.to_string(),
|
|
+ e.message
|
|
+ );
|
|
+ }
|
|
|
|
- Geary.ImapDB.Folder.do_add_email_to_search_table(cx, message_id, email, cancellable);
|
|
- } catch (Error e) {
|
|
- // This is a somewhat serious issue since we rely on
|
|
- // there always being a row in the search table for
|
|
- // every message.
|
|
- warning("Error adding message %s to the search table: %s", message_id.to_string(),
|
|
- e.message);
|
|
+ iter.remove();
|
|
+ ++count;
|
|
}
|
|
|
|
- ++count;
|
|
- }
|
|
-
|
|
- return Db.TransactionOutcome.DONE;
|
|
+ return COMMIT;
|
|
}, cancellable);
|
|
|
|
if (count > 0) {
|
|
- debug("%s: Found %d/%d missing indexed messages, %d remaining...",
|
|
- account_information.id, count, limit, total_unindexed);
|
|
+ debug("%s: Populated %u missing indexed messages...",
|
|
+ account_information.id, count);
|
|
}
|
|
-
|
|
- return (count < limit);
|
|
}
|
|
|
|
//
|
|
--
|
|
2.29.2
|
|
|