{% if include.truncate %} {% if page.content contains '' %} diff --git a/3rdParty/rocksdb/v5.16.X/docs/_includes/powered_by.html b/3rdParty/rocksdb/v5.18.X/docs/_includes/powered_by.html similarity index 100% rename from 3rdParty/rocksdb/v5.16.X/docs/_includes/powered_by.html rename to 3rdParty/rocksdb/v5.18.X/docs/_includes/powered_by.html diff --git a/3rdParty/rocksdb/v5.16.X/docs/_includes/social_plugins.html b/3rdParty/rocksdb/v5.18.X/docs/_includes/social_plugins.html similarity index 100% rename from 3rdParty/rocksdb/v5.16.X/docs/_includes/social_plugins.html rename to 3rdParty/rocksdb/v5.18.X/docs/_includes/social_plugins.html diff --git a/3rdParty/rocksdb/v5.16.X/docs/_includes/ui/button.html b/3rdParty/rocksdb/v5.18.X/docs/_includes/ui/button.html similarity index 100% rename from 3rdParty/rocksdb/v5.16.X/docs/_includes/ui/button.html rename to 3rdParty/rocksdb/v5.18.X/docs/_includes/ui/button.html diff --git a/3rdParty/rocksdb/v5.16.X/docs/_layouts/basic.html b/3rdParty/rocksdb/v5.18.X/docs/_layouts/basic.html similarity index 100% rename from 3rdParty/rocksdb/v5.16.X/docs/_layouts/basic.html rename to 3rdParty/rocksdb/v5.18.X/docs/_layouts/basic.html diff --git a/3rdParty/rocksdb/v5.16.X/docs/_layouts/blog.html b/3rdParty/rocksdb/v5.18.X/docs/_layouts/blog.html similarity index 100% rename from 3rdParty/rocksdb/v5.16.X/docs/_layouts/blog.html rename to 3rdParty/rocksdb/v5.18.X/docs/_layouts/blog.html diff --git a/3rdParty/rocksdb/v5.16.X/docs/_layouts/blog_default.html b/3rdParty/rocksdb/v5.18.X/docs/_layouts/blog_default.html similarity index 100% rename from 3rdParty/rocksdb/v5.16.X/docs/_layouts/blog_default.html rename to 3rdParty/rocksdb/v5.18.X/docs/_layouts/blog_default.html diff --git a/3rdParty/rocksdb/v5.16.X/docs/_layouts/default.html b/3rdParty/rocksdb/v5.18.X/docs/_layouts/default.html similarity index 100% rename from 3rdParty/rocksdb/v5.16.X/docs/_layouts/default.html rename to 3rdParty/rocksdb/v5.18.X/docs/_layouts/default.html diff --git a/3rdParty/rocksdb/v5.16.X/docs/_layouts/doc_default.html b/3rdParty/rocksdb/v5.18.X/docs/_layouts/doc_default.html similarity index 100% rename from 3rdParty/rocksdb/v5.16.X/docs/_layouts/doc_default.html rename to 3rdParty/rocksdb/v5.18.X/docs/_layouts/doc_default.html diff --git a/3rdParty/rocksdb/v5.16.X/docs/_layouts/doc_page.html b/3rdParty/rocksdb/v5.18.X/docs/_layouts/doc_page.html similarity index 100% rename from 3rdParty/rocksdb/v5.16.X/docs/_layouts/doc_page.html rename to 3rdParty/rocksdb/v5.18.X/docs/_layouts/doc_page.html diff --git a/3rdParty/rocksdb/v5.16.X/docs/_layouts/docs.html b/3rdParty/rocksdb/v5.18.X/docs/_layouts/docs.html similarity index 100% rename from 3rdParty/rocksdb/v5.16.X/docs/_layouts/docs.html rename to 3rdParty/rocksdb/v5.18.X/docs/_layouts/docs.html diff --git a/3rdParty/rocksdb/v5.16.X/docs/_layouts/home.html b/3rdParty/rocksdb/v5.18.X/docs/_layouts/home.html similarity index 100% rename from 3rdParty/rocksdb/v5.16.X/docs/_layouts/home.html rename to 3rdParty/rocksdb/v5.18.X/docs/_layouts/home.html diff --git a/3rdParty/rocksdb/v5.16.X/docs/_layouts/page.html b/3rdParty/rocksdb/v5.18.X/docs/_layouts/page.html similarity index 100% rename from 3rdParty/rocksdb/v5.16.X/docs/_layouts/page.html rename to 3rdParty/rocksdb/v5.18.X/docs/_layouts/page.html diff --git a/3rdParty/rocksdb/v5.16.X/docs/_layouts/plain.html b/3rdParty/rocksdb/v5.18.X/docs/_layouts/plain.html similarity index 100% rename from 3rdParty/rocksdb/v5.16.X/docs/_layouts/plain.html rename to 3rdParty/rocksdb/v5.18.X/docs/_layouts/plain.html diff --git a/3rdParty/rocksdb/v5.16.X/docs/_layouts/post.html b/3rdParty/rocksdb/v5.18.X/docs/_layouts/post.html similarity index 100% rename from 3rdParty/rocksdb/v5.16.X/docs/_layouts/post.html rename to 3rdParty/rocksdb/v5.18.X/docs/_layouts/post.html diff --git a/3rdParty/rocksdb/v5.16.X/docs/_layouts/redirect.html b/3rdParty/rocksdb/v5.18.X/docs/_layouts/redirect.html similarity index 100% rename from 3rdParty/rocksdb/v5.16.X/docs/_layouts/redirect.html rename to 3rdParty/rocksdb/v5.18.X/docs/_layouts/redirect.html diff --git a/3rdParty/rocksdb/v5.16.X/docs/_layouts/top-level.html b/3rdParty/rocksdb/v5.18.X/docs/_layouts/top-level.html similarity index 100% rename from 3rdParty/rocksdb/v5.16.X/docs/_layouts/top-level.html rename to 3rdParty/rocksdb/v5.18.X/docs/_layouts/top-level.html diff --git a/3rdParty/rocksdb/v5.16.X/docs/_posts/2014-03-27-how-to-backup-rocksdb.markdown b/3rdParty/rocksdb/v5.18.X/docs/_posts/2014-03-27-how-to-backup-rocksdb.markdown similarity index 100% rename from 3rdParty/rocksdb/v5.16.X/docs/_posts/2014-03-27-how-to-backup-rocksdb.markdown rename to 3rdParty/rocksdb/v5.18.X/docs/_posts/2014-03-27-how-to-backup-rocksdb.markdown diff --git a/3rdParty/rocksdb/v5.16.X/docs/_posts/2014-03-27-how-to-persist-in-memory-rocksdb-database.markdown b/3rdParty/rocksdb/v5.18.X/docs/_posts/2014-03-27-how-to-persist-in-memory-rocksdb-database.markdown similarity index 100% rename from 3rdParty/rocksdb/v5.16.X/docs/_posts/2014-03-27-how-to-persist-in-memory-rocksdb-database.markdown rename to 3rdParty/rocksdb/v5.18.X/docs/_posts/2014-03-27-how-to-persist-in-memory-rocksdb-database.markdown diff --git a/3rdParty/rocksdb/v5.16.X/docs/_posts/2014-04-02-the-1st-rocksdb-local-meetup-held-on-march-27-2014.markdown b/3rdParty/rocksdb/v5.18.X/docs/_posts/2014-04-02-the-1st-rocksdb-local-meetup-held-on-march-27-2014.markdown similarity index 100% rename from 3rdParty/rocksdb/v5.16.X/docs/_posts/2014-04-02-the-1st-rocksdb-local-meetup-held-on-march-27-2014.markdown rename to 3rdParty/rocksdb/v5.18.X/docs/_posts/2014-04-02-the-1st-rocksdb-local-meetup-held-on-march-27-2014.markdown diff --git a/3rdParty/rocksdb/v5.16.X/docs/_posts/2014-04-07-rocksdb-2-8-release.markdown b/3rdParty/rocksdb/v5.18.X/docs/_posts/2014-04-07-rocksdb-2-8-release.markdown similarity index 100% rename from 3rdParty/rocksdb/v5.16.X/docs/_posts/2014-04-07-rocksdb-2-8-release.markdown rename to 3rdParty/rocksdb/v5.18.X/docs/_posts/2014-04-07-rocksdb-2-8-release.markdown diff --git a/3rdParty/rocksdb/v5.16.X/docs/_posts/2014-04-21-indexing-sst-files-for-better-lookup-performance.markdown b/3rdParty/rocksdb/v5.18.X/docs/_posts/2014-04-21-indexing-sst-files-for-better-lookup-performance.markdown similarity index 100% rename from 3rdParty/rocksdb/v5.16.X/docs/_posts/2014-04-21-indexing-sst-files-for-better-lookup-performance.markdown rename to 3rdParty/rocksdb/v5.18.X/docs/_posts/2014-04-21-indexing-sst-files-for-better-lookup-performance.markdown diff --git a/3rdParty/rocksdb/v5.16.X/docs/_posts/2014-05-14-lock.markdown b/3rdParty/rocksdb/v5.18.X/docs/_posts/2014-05-14-lock.markdown similarity index 100% rename from 3rdParty/rocksdb/v5.16.X/docs/_posts/2014-05-14-lock.markdown rename to 3rdParty/rocksdb/v5.18.X/docs/_posts/2014-05-14-lock.markdown diff --git a/3rdParty/rocksdb/v5.16.X/docs/_posts/2014-05-19-rocksdb-3-0-release.markdown b/3rdParty/rocksdb/v5.18.X/docs/_posts/2014-05-19-rocksdb-3-0-release.markdown similarity index 100% rename from 3rdParty/rocksdb/v5.16.X/docs/_posts/2014-05-19-rocksdb-3-0-release.markdown rename to 3rdParty/rocksdb/v5.18.X/docs/_posts/2014-05-19-rocksdb-3-0-release.markdown diff --git a/3rdParty/rocksdb/v5.16.X/docs/_posts/2014-05-22-rocksdb-3-1-release.markdown b/3rdParty/rocksdb/v5.18.X/docs/_posts/2014-05-22-rocksdb-3-1-release.markdown similarity index 100% rename from 3rdParty/rocksdb/v5.16.X/docs/_posts/2014-05-22-rocksdb-3-1-release.markdown rename to 3rdParty/rocksdb/v5.18.X/docs/_posts/2014-05-22-rocksdb-3-1-release.markdown diff --git a/3rdParty/rocksdb/v5.16.X/docs/_posts/2014-06-23-plaintable-a-new-file-format.markdown b/3rdParty/rocksdb/v5.18.X/docs/_posts/2014-06-23-plaintable-a-new-file-format.markdown similarity index 100% rename from 3rdParty/rocksdb/v5.16.X/docs/_posts/2014-06-23-plaintable-a-new-file-format.markdown rename to 3rdParty/rocksdb/v5.18.X/docs/_posts/2014-06-23-plaintable-a-new-file-format.markdown diff --git a/3rdParty/rocksdb/v5.16.X/docs/_posts/2014-06-27-avoid-expensive-locks-in-get.markdown b/3rdParty/rocksdb/v5.18.X/docs/_posts/2014-06-27-avoid-expensive-locks-in-get.markdown similarity index 100% rename from 3rdParty/rocksdb/v5.16.X/docs/_posts/2014-06-27-avoid-expensive-locks-in-get.markdown rename to 3rdParty/rocksdb/v5.18.X/docs/_posts/2014-06-27-avoid-expensive-locks-in-get.markdown diff --git a/3rdParty/rocksdb/v5.16.X/docs/_posts/2014-06-27-rocksdb-3-2-release.markdown b/3rdParty/rocksdb/v5.18.X/docs/_posts/2014-06-27-rocksdb-3-2-release.markdown similarity index 100% rename from 3rdParty/rocksdb/v5.16.X/docs/_posts/2014-06-27-rocksdb-3-2-release.markdown rename to 3rdParty/rocksdb/v5.18.X/docs/_posts/2014-06-27-rocksdb-3-2-release.markdown diff --git a/3rdParty/rocksdb/v5.16.X/docs/_posts/2014-07-29-rocksdb-3-3-release.markdown b/3rdParty/rocksdb/v5.18.X/docs/_posts/2014-07-29-rocksdb-3-3-release.markdown similarity index 100% rename from 3rdParty/rocksdb/v5.16.X/docs/_posts/2014-07-29-rocksdb-3-3-release.markdown rename to 3rdParty/rocksdb/v5.18.X/docs/_posts/2014-07-29-rocksdb-3-3-release.markdown diff --git a/3rdParty/rocksdb/v5.16.X/docs/_posts/2014-09-12-cuckoo.markdown b/3rdParty/rocksdb/v5.18.X/docs/_posts/2014-09-12-cuckoo.markdown similarity index 100% rename from 3rdParty/rocksdb/v5.16.X/docs/_posts/2014-09-12-cuckoo.markdown rename to 3rdParty/rocksdb/v5.18.X/docs/_posts/2014-09-12-cuckoo.markdown diff --git a/3rdParty/rocksdb/v5.16.X/docs/_posts/2014-09-12-new-bloom-filter-format.markdown b/3rdParty/rocksdb/v5.18.X/docs/_posts/2014-09-12-new-bloom-filter-format.markdown similarity index 100% rename from 3rdParty/rocksdb/v5.16.X/docs/_posts/2014-09-12-new-bloom-filter-format.markdown rename to 3rdParty/rocksdb/v5.18.X/docs/_posts/2014-09-12-new-bloom-filter-format.markdown diff --git a/3rdParty/rocksdb/v5.16.X/docs/_posts/2014-09-15-rocksdb-3-5-release.markdown b/3rdParty/rocksdb/v5.18.X/docs/_posts/2014-09-15-rocksdb-3-5-release.markdown similarity index 100% rename from 3rdParty/rocksdb/v5.16.X/docs/_posts/2014-09-15-rocksdb-3-5-release.markdown rename to 3rdParty/rocksdb/v5.18.X/docs/_posts/2014-09-15-rocksdb-3-5-release.markdown diff --git a/3rdParty/rocksdb/v5.16.X/docs/_posts/2015-01-16-migrating-from-leveldb-to-rocksdb-2.markdown b/3rdParty/rocksdb/v5.18.X/docs/_posts/2015-01-16-migrating-from-leveldb-to-rocksdb-2.markdown similarity index 100% rename from 3rdParty/rocksdb/v5.16.X/docs/_posts/2015-01-16-migrating-from-leveldb-to-rocksdb-2.markdown rename to 3rdParty/rocksdb/v5.18.X/docs/_posts/2015-01-16-migrating-from-leveldb-to-rocksdb-2.markdown diff --git a/3rdParty/rocksdb/v5.16.X/docs/_posts/2015-02-24-reading-rocksdb-options-from-a-file.markdown b/3rdParty/rocksdb/v5.18.X/docs/_posts/2015-02-24-reading-rocksdb-options-from-a-file.markdown similarity index 100% rename from 3rdParty/rocksdb/v5.16.X/docs/_posts/2015-02-24-reading-rocksdb-options-from-a-file.markdown rename to 3rdParty/rocksdb/v5.18.X/docs/_posts/2015-02-24-reading-rocksdb-options-from-a-file.markdown diff --git a/3rdParty/rocksdb/v5.16.X/docs/_posts/2015-02-27-write-batch-with-index.markdown b/3rdParty/rocksdb/v5.18.X/docs/_posts/2015-02-27-write-batch-with-index.markdown similarity index 100% rename from 3rdParty/rocksdb/v5.16.X/docs/_posts/2015-02-27-write-batch-with-index.markdown rename to 3rdParty/rocksdb/v5.18.X/docs/_posts/2015-02-27-write-batch-with-index.markdown diff --git a/3rdParty/rocksdb/v5.16.X/docs/_posts/2015-04-22-integrating-rocksdb-with-mongodb-2.markdown b/3rdParty/rocksdb/v5.18.X/docs/_posts/2015-04-22-integrating-rocksdb-with-mongodb-2.markdown similarity index 100% rename from 3rdParty/rocksdb/v5.16.X/docs/_posts/2015-04-22-integrating-rocksdb-with-mongodb-2.markdown rename to 3rdParty/rocksdb/v5.18.X/docs/_posts/2015-04-22-integrating-rocksdb-with-mongodb-2.markdown diff --git a/3rdParty/rocksdb/v5.16.X/docs/_posts/2015-06-12-rocksdb-in-osquery.markdown b/3rdParty/rocksdb/v5.18.X/docs/_posts/2015-06-12-rocksdb-in-osquery.markdown similarity index 100% rename from 3rdParty/rocksdb/v5.16.X/docs/_posts/2015-06-12-rocksdb-in-osquery.markdown rename to 3rdParty/rocksdb/v5.18.X/docs/_posts/2015-06-12-rocksdb-in-osquery.markdown diff --git a/3rdParty/rocksdb/v5.16.X/docs/_posts/2015-07-15-rocksdb-2015-h2-roadmap.markdown b/3rdParty/rocksdb/v5.18.X/docs/_posts/2015-07-15-rocksdb-2015-h2-roadmap.markdown similarity index 100% rename from 3rdParty/rocksdb/v5.16.X/docs/_posts/2015-07-15-rocksdb-2015-h2-roadmap.markdown rename to 3rdParty/rocksdb/v5.18.X/docs/_posts/2015-07-15-rocksdb-2015-h2-roadmap.markdown diff --git a/3rdParty/rocksdb/v5.16.X/docs/_posts/2015-07-17-spatial-indexing-in-rocksdb.markdown b/3rdParty/rocksdb/v5.18.X/docs/_posts/2015-07-17-spatial-indexing-in-rocksdb.markdown similarity index 100% rename from 3rdParty/rocksdb/v5.16.X/docs/_posts/2015-07-17-spatial-indexing-in-rocksdb.markdown rename to 3rdParty/rocksdb/v5.18.X/docs/_posts/2015-07-17-spatial-indexing-in-rocksdb.markdown diff --git a/3rdParty/rocksdb/v5.16.X/docs/_posts/2015-07-22-rocksdb-is-now-available-in-windows-platform.markdown b/3rdParty/rocksdb/v5.18.X/docs/_posts/2015-07-22-rocksdb-is-now-available-in-windows-platform.markdown similarity index 100% rename from 3rdParty/rocksdb/v5.16.X/docs/_posts/2015-07-22-rocksdb-is-now-available-in-windows-platform.markdown rename to 3rdParty/rocksdb/v5.18.X/docs/_posts/2015-07-22-rocksdb-is-now-available-in-windows-platform.markdown diff --git a/3rdParty/rocksdb/v5.16.X/docs/_posts/2015-07-23-dynamic-level.markdown b/3rdParty/rocksdb/v5.18.X/docs/_posts/2015-07-23-dynamic-level.markdown similarity index 100% rename from 3rdParty/rocksdb/v5.16.X/docs/_posts/2015-07-23-dynamic-level.markdown rename to 3rdParty/rocksdb/v5.18.X/docs/_posts/2015-07-23-dynamic-level.markdown diff --git a/3rdParty/rocksdb/v5.16.X/docs/_posts/2015-10-27-getthreadlist.markdown b/3rdParty/rocksdb/v5.18.X/docs/_posts/2015-10-27-getthreadlist.markdown similarity index 100% rename from 3rdParty/rocksdb/v5.16.X/docs/_posts/2015-10-27-getthreadlist.markdown rename to 3rdParty/rocksdb/v5.18.X/docs/_posts/2015-10-27-getthreadlist.markdown diff --git a/3rdParty/rocksdb/v5.16.X/docs/_posts/2015-11-10-use-checkpoints-for-efficient-snapshots.markdown b/3rdParty/rocksdb/v5.18.X/docs/_posts/2015-11-10-use-checkpoints-for-efficient-snapshots.markdown similarity index 100% rename from 3rdParty/rocksdb/v5.16.X/docs/_posts/2015-11-10-use-checkpoints-for-efficient-snapshots.markdown rename to 3rdParty/rocksdb/v5.18.X/docs/_posts/2015-11-10-use-checkpoints-for-efficient-snapshots.markdown diff --git a/3rdParty/rocksdb/v5.16.X/docs/_posts/2015-11-16-analysis-file-read-latency-by-level.markdown b/3rdParty/rocksdb/v5.18.X/docs/_posts/2015-11-16-analysis-file-read-latency-by-level.markdown similarity index 100% rename from 3rdParty/rocksdb/v5.16.X/docs/_posts/2015-11-16-analysis-file-read-latency-by-level.markdown rename to 3rdParty/rocksdb/v5.18.X/docs/_posts/2015-11-16-analysis-file-read-latency-by-level.markdown diff --git a/3rdParty/rocksdb/v5.16.X/docs/_posts/2016-01-29-compaction_pri.markdown b/3rdParty/rocksdb/v5.18.X/docs/_posts/2016-01-29-compaction_pri.markdown similarity index 100% rename from 3rdParty/rocksdb/v5.16.X/docs/_posts/2016-01-29-compaction_pri.markdown rename to 3rdParty/rocksdb/v5.18.X/docs/_posts/2016-01-29-compaction_pri.markdown diff --git a/3rdParty/rocksdb/v5.16.X/docs/_posts/2016-02-24-rocksdb-4-2-release.markdown b/3rdParty/rocksdb/v5.18.X/docs/_posts/2016-02-24-rocksdb-4-2-release.markdown similarity index 100% rename from 3rdParty/rocksdb/v5.16.X/docs/_posts/2016-02-24-rocksdb-4-2-release.markdown rename to 3rdParty/rocksdb/v5.18.X/docs/_posts/2016-02-24-rocksdb-4-2-release.markdown diff --git a/3rdParty/rocksdb/v5.16.X/docs/_posts/2016-02-25-rocksdb-ama.markdown b/3rdParty/rocksdb/v5.18.X/docs/_posts/2016-02-25-rocksdb-ama.markdown similarity index 100% rename from 3rdParty/rocksdb/v5.16.X/docs/_posts/2016-02-25-rocksdb-ama.markdown rename to 3rdParty/rocksdb/v5.18.X/docs/_posts/2016-02-25-rocksdb-ama.markdown diff --git a/3rdParty/rocksdb/v5.16.X/docs/_posts/2016-03-07-rocksdb-options-file.markdown b/3rdParty/rocksdb/v5.18.X/docs/_posts/2016-03-07-rocksdb-options-file.markdown similarity index 100% rename from 3rdParty/rocksdb/v5.16.X/docs/_posts/2016-03-07-rocksdb-options-file.markdown rename to 3rdParty/rocksdb/v5.18.X/docs/_posts/2016-03-07-rocksdb-options-file.markdown diff --git a/3rdParty/rocksdb/v5.16.X/docs/_posts/2016-04-26-rocksdb-4-5-1-released.markdown b/3rdParty/rocksdb/v5.18.X/docs/_posts/2016-04-26-rocksdb-4-5-1-released.markdown similarity index 100% rename from 3rdParty/rocksdb/v5.16.X/docs/_posts/2016-04-26-rocksdb-4-5-1-released.markdown rename to 3rdParty/rocksdb/v5.18.X/docs/_posts/2016-04-26-rocksdb-4-5-1-released.markdown diff --git a/3rdParty/rocksdb/v5.16.X/docs/_posts/2016-07-26-rocksdb-4-8-released.markdown b/3rdParty/rocksdb/v5.18.X/docs/_posts/2016-07-26-rocksdb-4-8-released.markdown similarity index 100% rename from 3rdParty/rocksdb/v5.16.X/docs/_posts/2016-07-26-rocksdb-4-8-released.markdown rename to 3rdParty/rocksdb/v5.18.X/docs/_posts/2016-07-26-rocksdb-4-8-released.markdown diff --git a/3rdParty/rocksdb/v5.16.X/docs/_posts/2016-09-28-rocksdb-4-11-2-released.markdown b/3rdParty/rocksdb/v5.18.X/docs/_posts/2016-09-28-rocksdb-4-11-2-released.markdown similarity index 100% rename from 3rdParty/rocksdb/v5.16.X/docs/_posts/2016-09-28-rocksdb-4-11-2-released.markdown rename to 3rdParty/rocksdb/v5.18.X/docs/_posts/2016-09-28-rocksdb-4-11-2-released.markdown diff --git a/3rdParty/rocksdb/v5.16.X/docs/_posts/2017-01-06-rocksdb-5-0-1-released.markdown b/3rdParty/rocksdb/v5.18.X/docs/_posts/2017-01-06-rocksdb-5-0-1-released.markdown similarity index 100% rename from 3rdParty/rocksdb/v5.16.X/docs/_posts/2017-01-06-rocksdb-5-0-1-released.markdown rename to 3rdParty/rocksdb/v5.18.X/docs/_posts/2017-01-06-rocksdb-5-0-1-released.markdown diff --git a/3rdParty/rocksdb/v5.16.X/docs/_posts/2017-02-07-rocksdb-5-1-2-released.markdown b/3rdParty/rocksdb/v5.18.X/docs/_posts/2017-02-07-rocksdb-5-1-2-released.markdown similarity index 100% rename from 3rdParty/rocksdb/v5.16.X/docs/_posts/2017-02-07-rocksdb-5-1-2-released.markdown rename to 3rdParty/rocksdb/v5.18.X/docs/_posts/2017-02-07-rocksdb-5-1-2-released.markdown diff --git a/3rdParty/rocksdb/v5.16.X/docs/_posts/2017-02-17-bulkoad-ingest-sst-file.markdown b/3rdParty/rocksdb/v5.18.X/docs/_posts/2017-02-17-bulkoad-ingest-sst-file.markdown similarity index 100% rename from 3rdParty/rocksdb/v5.16.X/docs/_posts/2017-02-17-bulkoad-ingest-sst-file.markdown rename to 3rdParty/rocksdb/v5.18.X/docs/_posts/2017-02-17-bulkoad-ingest-sst-file.markdown diff --git a/3rdParty/rocksdb/v5.16.X/docs/_posts/2017-03-02-rocksdb-5-2-1-released.markdown b/3rdParty/rocksdb/v5.18.X/docs/_posts/2017-03-02-rocksdb-5-2-1-released.markdown similarity index 100% rename from 3rdParty/rocksdb/v5.16.X/docs/_posts/2017-03-02-rocksdb-5-2-1-released.markdown rename to 3rdParty/rocksdb/v5.18.X/docs/_posts/2017-03-02-rocksdb-5-2-1-released.markdown diff --git a/3rdParty/rocksdb/v5.16.X/docs/_posts/2017-05-12-partitioned-index-filter.markdown b/3rdParty/rocksdb/v5.18.X/docs/_posts/2017-05-12-partitioned-index-filter.markdown similarity index 100% rename from 3rdParty/rocksdb/v5.16.X/docs/_posts/2017-05-12-partitioned-index-filter.markdown rename to 3rdParty/rocksdb/v5.18.X/docs/_posts/2017-05-12-partitioned-index-filter.markdown diff --git a/3rdParty/rocksdb/v5.16.X/docs/_posts/2017-05-14-core-local-stats.markdown b/3rdParty/rocksdb/v5.18.X/docs/_posts/2017-05-14-core-local-stats.markdown similarity index 100% rename from 3rdParty/rocksdb/v5.16.X/docs/_posts/2017-05-14-core-local-stats.markdown rename to 3rdParty/rocksdb/v5.18.X/docs/_posts/2017-05-14-core-local-stats.markdown diff --git a/3rdParty/rocksdb/v5.16.X/docs/_posts/2017-05-26-rocksdb-5-4-5-released.markdown b/3rdParty/rocksdb/v5.18.X/docs/_posts/2017-05-26-rocksdb-5-4-5-released.markdown similarity index 100% rename from 3rdParty/rocksdb/v5.16.X/docs/_posts/2017-05-26-rocksdb-5-4-5-released.markdown rename to 3rdParty/rocksdb/v5.18.X/docs/_posts/2017-05-26-rocksdb-5-4-5-released.markdown diff --git a/3rdParty/rocksdb/v5.16.X/docs/_posts/2017-06-26-17-level-based-changes.markdown b/3rdParty/rocksdb/v5.18.X/docs/_posts/2017-06-26-17-level-based-changes.markdown similarity index 100% rename from 3rdParty/rocksdb/v5.16.X/docs/_posts/2017-06-26-17-level-based-changes.markdown rename to 3rdParty/rocksdb/v5.18.X/docs/_posts/2017-06-26-17-level-based-changes.markdown diff --git a/3rdParty/rocksdb/v5.16.X/docs/_posts/2017-06-29-rocksdb-5-5-1-released.markdown b/3rdParty/rocksdb/v5.18.X/docs/_posts/2017-06-29-rocksdb-5-5-1-released.markdown similarity index 100% rename from 3rdParty/rocksdb/v5.16.X/docs/_posts/2017-06-29-rocksdb-5-5-1-released.markdown rename to 3rdParty/rocksdb/v5.18.X/docs/_posts/2017-06-29-rocksdb-5-5-1-released.markdown diff --git a/3rdParty/rocksdb/v5.16.X/docs/_posts/2017-07-25-rocksdb-5-6-1-released.markdown b/3rdParty/rocksdb/v5.18.X/docs/_posts/2017-07-25-rocksdb-5-6-1-released.markdown similarity index 100% rename from 3rdParty/rocksdb/v5.16.X/docs/_posts/2017-07-25-rocksdb-5-6-1-released.markdown rename to 3rdParty/rocksdb/v5.18.X/docs/_posts/2017-07-25-rocksdb-5-6-1-released.markdown diff --git a/3rdParty/rocksdb/v5.16.X/docs/_posts/2017-08-24-pinnableslice.markdown b/3rdParty/rocksdb/v5.18.X/docs/_posts/2017-08-24-pinnableslice.markdown similarity index 100% rename from 3rdParty/rocksdb/v5.16.X/docs/_posts/2017-08-24-pinnableslice.markdown rename to 3rdParty/rocksdb/v5.18.X/docs/_posts/2017-08-24-pinnableslice.markdown diff --git a/3rdParty/rocksdb/v5.16.X/docs/_posts/2017-08-25-flushwal.markdown b/3rdParty/rocksdb/v5.18.X/docs/_posts/2017-08-25-flushwal.markdown similarity index 100% rename from 3rdParty/rocksdb/v5.16.X/docs/_posts/2017-08-25-flushwal.markdown rename to 3rdParty/rocksdb/v5.18.X/docs/_posts/2017-08-25-flushwal.markdown diff --git a/3rdParty/rocksdb/v5.16.X/docs/_posts/2017-09-28-rocksdb-5-8-released.markdown b/3rdParty/rocksdb/v5.18.X/docs/_posts/2017-09-28-rocksdb-5-8-released.markdown similarity index 100% rename from 3rdParty/rocksdb/v5.16.X/docs/_posts/2017-09-28-rocksdb-5-8-released.markdown rename to 3rdParty/rocksdb/v5.18.X/docs/_posts/2017-09-28-rocksdb-5-8-released.markdown diff --git a/3rdParty/rocksdb/v5.16.X/docs/_posts/2017-12-18-17-auto-tuned-rate-limiter.markdown b/3rdParty/rocksdb/v5.18.X/docs/_posts/2017-12-18-17-auto-tuned-rate-limiter.markdown similarity index 100% rename from 3rdParty/rocksdb/v5.16.X/docs/_posts/2017-12-18-17-auto-tuned-rate-limiter.markdown rename to 3rdParty/rocksdb/v5.18.X/docs/_posts/2017-12-18-17-auto-tuned-rate-limiter.markdown diff --git a/3rdParty/rocksdb/v5.16.X/docs/_posts/2017-12-19-write-prepared-txn.markdown b/3rdParty/rocksdb/v5.18.X/docs/_posts/2017-12-19-write-prepared-txn.markdown similarity index 88% rename from 3rdParty/rocksdb/v5.16.X/docs/_posts/2017-12-19-write-prepared-txn.markdown rename to 3rdParty/rocksdb/v5.18.X/docs/_posts/2017-12-19-write-prepared-txn.markdown index d592b6f7b1..439b3f83cc 100644 --- a/3rdParty/rocksdb/v5.16.X/docs/_posts/2017-12-19-write-prepared-txn.markdown +++ b/3rdParty/rocksdb/v5.18.X/docs/_posts/2017-12-19-write-prepared-txn.markdown @@ -7,8 +7,6 @@ category: blog RocksDB supports both optimistic and pessimistic concurrency controls. The pessimistic transactions make use of locks to provide isolation between the transactions. The default write policy in pessimistic transactions is _WriteCommitted_, which means that the data is written to the DB, i.e., the memtable, only after the transaction is committed. This policy simplified the implementation but came with some limitations in throughput, transaction size, and variety in supported isolation levels. In the below, we explain these in detail and present the other write policies, _WritePrepared_ and _WriteUnprepared_. We then dive into the design of _WritePrepared_ transactions. -> _WritePrepared_ are to be announced as production-ready soon. - ### WriteCommitted, Pros and Cons With _WriteCommitted_ write policy, the data is written to the memtable only after the transaction commits. This greatly simplifies the read path as any data that is read by other transactions can be assumed to be committed. This write policy, however, implies that the writes are buffered in memory in the meanwhile. This makes memory a bottleneck for large transactions. The delay of the commit phase in 2PC (two-phase commit) also becomes noticeable since most of the work, i.e., writing to memtable, is done at the commit phase. When the commit of multiple transactions are done in a serial fashion, such as in 2PC implementation of MySQL, the lengthy commit latency becomes a major contributor to lower throughput. Moreover this write policy cannot provide weaker isolation levels, such as READ UNCOMMITTED, that could potentially provide higher throughput for some applications. @@ -28,10 +26,16 @@ With _WritePrepared_, a transaction still buffers the writes in a write batch ob The _CommitCache_ is a lock-free data structure that caches the recent commit entries. Looking up the entries in the cache must be enough for almost all th transactions that commit in a timely manner. When evicting the older entries from the cache, it still maintains some other data structures to cover the corner cases for transactions that takes abnormally too long to finish. We will cover them in the design details below. -### Preliminary Results -The full experimental results are to be reported soon. Here we present the improvement in tps observed in some preliminary experiments with MyRocks: -* sysbench update-noindex: 25% -* sysbench read-write: 7.6% -* linkbench: 3.7% +### Benchmark Results +Here we presents the improvements observed in MyRocks with sysbench and linkbench: +* benchmark...........tps.........p95 latency....cpu/query +* insert...................68% +* update-noindex...30%......38% +* update-index.......61%.......28% +* read-write............6%........3.5% +* read-only...........-1.2%.....-1.8% +* linkbench.............1.9%......+overall........0.6% + +Here are also the detailed results for [In-Memory Sysbench](https://gist.github.com/maysamyabandeh/bdb868091b2929a6d938615fdcf58424) and [SSD Sysbench](https://gist.github.com/maysamyabandeh/ff94f378ab48925025c34c47eff99306) curtesy of [@mdcallag](https://github.com/mdcallag). Learn more [here](https://github.com/facebook/rocksdb/wiki/WritePrepared-Transactions). diff --git a/3rdParty/rocksdb/v5.16.X/docs/_posts/2018-02-05-rocksdb-5-10-2-released.markdown b/3rdParty/rocksdb/v5.18.X/docs/_posts/2018-02-05-rocksdb-5-10-2-released.markdown similarity index 100% rename from 3rdParty/rocksdb/v5.16.X/docs/_posts/2018-02-05-rocksdb-5-10-2-released.markdown rename to 3rdParty/rocksdb/v5.18.X/docs/_posts/2018-02-05-rocksdb-5-10-2-released.markdown diff --git a/3rdParty/rocksdb/v5.16.X/docs/_posts/2018-08-01-rocksdb-tuning-advisor.markdown b/3rdParty/rocksdb/v5.18.X/docs/_posts/2018-08-01-rocksdb-tuning-advisor.markdown similarity index 100% rename from 3rdParty/rocksdb/v5.16.X/docs/_posts/2018-08-01-rocksdb-tuning-advisor.markdown rename to 3rdParty/rocksdb/v5.18.X/docs/_posts/2018-08-01-rocksdb-tuning-advisor.markdown diff --git a/3rdParty/rocksdb/v5.18.X/docs/_posts/2018-08-23-data-block-hash-index.markdown b/3rdParty/rocksdb/v5.18.X/docs/_posts/2018-08-23-data-block-hash-index.markdown new file mode 100644 index 0000000000..c4b24ec2ac --- /dev/null +++ b/3rdParty/rocksdb/v5.18.X/docs/_posts/2018-08-23-data-block-hash-index.markdown @@ -0,0 +1,118 @@ +--- +title: Improving Point-Lookup Using Data Block Hash Index +layout: post +author: fgwu +category: blog +--- +We've designed and implemented a _data block hash index_ in RocksDB that has the benefit of both reducing the CPU util and increasing the throughput for point lookup queries with a reasonable and tunable space overhead. + +Specifially, we append a compact hash table to the end of the data block for efficient indexing. It is backward compatible with the data base created without this feature. After turned on the hash index feature, existing data will be gradually converted to the hash index format. + +Benchmarks with `db_bench` show the CPU utilization of one of the main functions in the point lookup code path, `DataBlockIter::Seek()`, is reduced by 21.8%, and the overall RocksDB throughput is increased by 10% under purely cached workloads, at an overhead of 4.6% more space. Shadow testing with Facebook production traffic shows good CPU improvements too. + + +### How to use it +Two new options are added as part of this feature: `BlockBasedTableOptions::data_block_index_type` and `BlockBasedTableOptions::data_block_hash_table_util_ratio`. + +The hash index is disabled by default unless `BlockBasedTableOptions::data_block_index_type` is set to `data_block_index_type = kDataBlockBinaryAndHash`. The hash table utilization ratio is adjustable using `BlockBasedTableOptions::data_block_hash_table_util_ratio`, which is valid only if `data_block_index_type = kDataBlockBinaryAndHash`. + + +``` +// the definitions can be found in include/rocksdb/table.h + +// The index type that will be used for the data block. +enum DataBlockIndexType : char { + kDataBlockBinarySearch = 0, // traditional block type + kDataBlockBinaryAndHash = 1, // additional hash index +}; + +// Set to kDataBlockBinaryAndHash to enable hash index +DataBlockIndexType data_block_index_type = kDataBlockBinarySearch; + +// #entries/#buckets. It is valid only when data_block_hash_index_type is +// kDataBlockBinaryAndHash. +double data_block_hash_table_util_ratio = 0.75; + +``` + + +### Data Block Hash Index Design + +Current data block format groups adjacent keys together as a restart interval. One block consists of multiple restart intervals. The byte offset of the beginning of each restart interval, i.e. a restart point, is stored in an array called restart interval index or binary seek index. RocksDB does a binary search when performing point lookup for keys in data blocks to find the right restart interval the key may reside. We will use binary seek and binary search interchangeably in this post. + +In order to find the right location where the key may reside using binary search, multiple key parsing and comparison are needed. Each binary search branching triggers CPU cache miss, causing much CPU utilization. We have seen that this binary search takes up considerable CPU in production use-cases. + +![](/static/images/data-block-hash-index/block-format-binary-seek.png) + +We implemented a hash map at the end of the block to index the key to reduce the CPU overhead of the binary search. The hash index is just an array of pointers pointing into the binary seek index. + +![](/static/images/data-block-hash-index/block-format-hash-index.png) + + +Each array element is considered as a hash bucket when storing the location of a key (or more precisely, the restart index of the restart interval where the key resides). When multiple keys happen to hash into the same bucket (hash collision), we just mark the bucket as “collision”. So that when later querying on that key, the hash table lookup knows that there was a hash collision happened so it can fall back to the traditional binary search to find the location of the key. + +We define hash table utilization ratio as the #keys/#buckets. If a utilization ratio is 0.5 and there are 100 buckets, 50 keys are stored in the bucket. The less the util ratio, the less hash collision, and the less chance for a point lookup falls back to binary seek (fall back ratio) due to the collision. So a small util ratio has more benefit to reduce the CPU time but introduces more space overhead. + +Space overhead depends on the util ratio. Each bucket is a `uint8_t` (i.e. one byte). For a util ratio of 1, the space overhead is 1Byte per key, the fall back ratio observed is ~52%. + +![](/static/images/data-block-hash-index/hash-index-data-structure.png) + +### Things that Need Attention + +**Customized Comparator** + +Hash index will hash different keys (keys with different content, or byte sequence) into different hash values. This assumes the comparator will not treat different keys as equal if they have different content. + +The default bytewise comparator orders the keys in alphabetical order and works well with hash index, as different keys will never be regarded as equal. However, some specially crafted comparators will do. For example, say, a `StringToIntComparator` can convert a string into an integer, and use the integer to perform the comparison. Key string “16” and “0x10” is equal to each other as seen by this `StringToIntComparator`, but they probably hash to different value. Later queries to one form of the key will not be able to find the existing key been stored in the other format. + +We add a new function member to the comparator interface: + +``` +virtual bool CanKeysWithDifferentByteContentsBeEqual() const { return true; } +``` + + +Every comparator implementation should override this function and specify the behavior of the comparator. If a comparator can regard different keys equal, the function returns true, and as a result the hash index feature will not be enabled, and vice versa. + +NOTE: to use the hash index feature, one should 1) have a comparator that can never treat different keys as equal; and 2) override the `CanKeysWithDifferentByteContentsBeEqual()` function to return `false`, so the hash index can be enabled. + + +**Util Ratio's Impact on Data Block Cache** + +Adding the hash index to the end of the data block essentially takes up the data block cache space, making the effective data block cache size smaller and increasing the data block cache miss ratio. Therefore, a very small util ratio will result in a large data block cache miss ratio, and the extra I/O may drag down the throughput gain achieved by the hash index lookup. Besides, when compression is enabled, cache miss also incurs data block decompression, which is CPU-consuming. Therefore the CPU may even increase if using a too small util ratio. The best util ratio depends on workloads, cache to data ratio, disk bandwidth/latency etc. In our experiment, we found util ratio = 0.5 ~ 1 is a good range to explore that brings both CPU and throughput gains. + + +### Limitations + +As we use `uint8_t` to store binary seek index, i.e. restart interval index, the total number of restart intervals cannot be more than 253 (we reserved 255 and 254 as special flags). For blocks having a larger number of restart intervals, the hash index will not be created and the point lookup will be done by traditional binary seek. + +Data block hash index only supports point lookup. We do not support range lookup. Range lookup request will fall back to BinarySeek. + +RocksDB supports many types of records, such as `Put`, `Delete`, `Merge`, etc (visit [here](https://github.com/facebook/rocksdb/wiki/rocksdb-basics) for more information). Currently we only support `Put` and `Delete`, but not `Merge`. Internally we have a limited set of supported record types: + + +``` +kPutRecord, <=== supported +kDeleteRecord, <=== supported +kSingleDeleteRecord, <=== supported +kTypeBlobIndex, <=== supported +``` + +For records not supported, the searching process will fall back to the traditional binary seek. + + + +### Evaluation +To evaluate the CPU util reduction and isolate other factors such as disk I/O and block decompression, we first evaluate the hash idnex in a purely cached workload. We observe that the CPU utilization of one of the main functions in the point lookup code path, DataBlockIter::Seek(), is reduced by 21.8% and the overall throughput is increased by 10% at an overhead of 4.6% more space. + +However, general worload is not always purely cached. So we also evaluate the performance under different cache space pressure. In the following test, we use `db_bench` with RocksDB deployed on SSDs. The total DB size is 5~6GB, and it is about 14GB if decompressed. Different block cache sizes are used, ranging from 14GB down to 2GB, with an increasing cache miss ratio. + +Orange bars are representing our hash index performance. We use a hash util ratio of 1.0 in this test. Block size are set to 16KiB with the restart interval as 16. + +![](/static/images/data-block-hash-index/perf-throughput.png) +![](/static/images/data-block-hash-index/perf-cache-miss.png) + +We can see that if cache size is greater than 8GB, hash index can bring throughput gain. Cache size greater than 8GB can be translated to a cache miss ratio smaller than 40%. So if the workload has a cache miss ratio smaller than 40%, hash index is able to increase the throughput. + +Besides, shadow testing with Facebook production traffic shows good CPU improvements too. + diff --git a/3rdParty/rocksdb/v5.18.X/docs/_posts/2018-11-21-delete-range.markdown b/3rdParty/rocksdb/v5.18.X/docs/_posts/2018-11-21-delete-range.markdown new file mode 100644 index 0000000000..96fc3562d1 --- /dev/null +++ b/3rdParty/rocksdb/v5.18.X/docs/_posts/2018-11-21-delete-range.markdown @@ -0,0 +1,292 @@ +--- +title: "DeleteRange: A New Native RocksDB Operation" +layout: post +author: +- abhimadan +- ajkr +category: blog +--- +## Motivation + +### Deletion patterns in LSM + +Deleting a range of keys is a common pattern in RocksDB. Most systems built on top of +RocksDB have multi-component key schemas, where keys sharing a common prefix are +logically related. Here are some examples. + +MyRocks is a MySQL fork using RocksDB as its storage engine. Each key's first +four bytes identify the table or index to which that key belongs. Thus dropping +a table or index involves deleting all the keys with that prefix. + +Rockssandra is a Cassandra variant that uses RocksDB as its storage engine. One +of its admin tool commands, `nodetool cleanup`, removes key-ranges that have been migrated +to other nodes in the cluster. + +Marketplace uses RocksDB to store product data. Its key begins with product ID, +and it stores various data associated with the product in separate keys. When a +product is removed, all these keys must be deleted. + +When we decide what to improve, we try to find a use case that's common across +users, since we want to build a generally useful system, not one that has many +one-off features for individual users. The range deletion pattern is common as +illustrated above, so from this perspective it's a good target for optimization. + +### Existing mechanisms: challenges and opportunities + +The most common pattern we see is scan-and-delete, i.e., advance an iterator +through the to-be-deleted range, and issue a `Delete` for each key. This is +slow (involves read I/O) so cannot be done in any critical path. Additionally, +it creates many tombstones, which slows down iterators and doesn't offer a deadline +for space reclamation. + +Another common pattern is using a custom compaction filter that drops keys in +the deleted range(s). This deletes the range asynchronously, so cannot be used +in cases where readers must not see keys in deleted ranges. Further, it has the +disadvantage of outputting tombstones to all but the bottom level. That's +because compaction cannot detect whether dropping a key would cause an older +version at a lower level to reappear. + +If space reclamation time is important, or it is important that the deleted +range not affect iterators, the user can trigger `CompactRange` on the deleted +range. This can involve arbitrarily long waits in the compaction queue, and +increases write-amp. By the time it's finished, however, the range is completely +gone from the LSM. + +`DeleteFilesInRange` can be used prior to compacting the deleted range as long +as snapshot readers do not need to access them. It drops files that are +completely contained in the deleted range. That saves write-amp because, in +`CompactRange`, the file data would have to be rewritten several times before it +reaches the bottom of the LSM, where tombstones can finally be dropped. + +In addition to the above approaches having various drawbacks, they are quite +complicated to reason about and implement. In an ideal world, deleting a range +of keys would be (1) simple, i.e., a single API call; (2) synchronous, i.e., +when the call finishes, the keys are guaranteed to be wiped from the DB; (3) low +latency so it can be used in critical paths; and (4) a first-class operation +with all the guarantees of any other write, like atomicity, crash-recovery, etc. + +## v1: Getting it to work + +### Where to persist them? + +The first place we thought about storing them is inline with the data blocks. +We could not think of a good way to do it, however, since the start of a range +tombstone covering a key could be anywhere, making binary search impossible. +So, we decided to investigate segregated storage. + +A second solution we considered is appending to the manifest. This file is +append-only, periodically compacted, and stores metadata like the level to which +each SST belongs. This is tempting because it leverages an existing file, which +is maintained in the background and fully read when the DB is opened. However, +it conceptually violates the manifest's purpose, which is to store metadata. It +also has no way to detect when a range tombstone no longer covers anything and +is droppable. Further, it'd be possible for keys above a range tombstone to disappear +when they have their seqnums zeroed upon compaction to the bottommost level. + +A third candidate is using a separate column family. This has similar problems +to the manifest approach. That is, we cannot easily detect when a range +tombstone is obsolete, and seqnum zeroing can cause a key +to go from above a range tombstone to below, i.e., disappearing. The upside is +we can reuse logic for memory buffering, consistent reads/writes, etc. + +The problems with the second and third solutions indicate a need for range +tombstones to be aware of flush/compaction. An easy way to achieve this is put +them in the SST files themselves - but not in the data blocks, as explained for +the first solution. So, we introduced a separate meta-block for range tombstones. +This resolved the problem of when to obsolete range tombstones, as it's simple: +when they're compacted to the bottom level. We also reused the LSM invariants +that newer versions of a key are always in a higher level to prevent the seqnum +zeroing problem. This approach has the side benefit of constraining the range +tombstones seen during reads to ones in a similar key-range. + +![](/static/images/delrange/delrange_sst_blocks.png) +{: style="display: block; margin-left: auto; margin-right: auto; width: 80%"} + +*When there are range tombstones in an SST, they are segregated in a separate meta-block* +{: style="text-align: center"} + +![](/static/images/delrange/delrange_key_schema.png) +{: style="display: block; margin-left: auto; margin-right: auto; width: 80%"} + +*Logical range tombstones (left) and their corresponding physical key-value representation (right)* +{: style="text-align: center"} + +### Write path + +`WriteBatch` stores range tombstones in its buffer which are logged to the WAL and +then applied to a dedicated range tombstone memtable during `Write`. Later in +the background the range tombstone memtable and its corresponding data memtable +are flushed together into a single SST with a range tombstone meta-block. SSTs +periodically undergo compaction which rewrites SSTs with point data and range +tombstones dropped or merged wherever possible. + +We chose to use a dedicated memtable for range tombstones. The memtable +representation is always skiplist in order to minimize overhead in the usual +case, which is the memtable contains zero or a small number of range tombstones. +The range tombstones are segregated to a separate memtable for the same reason +we segregated range tombstones in SSTs. That is, we did not know how to +interleave the range tombstone with point data in a way that we would be able to +find it for arbitrary keys that it covers. + +![](/static/images/delrange/delrange_write_path.png) +{: style="display: block; margin-left: auto; margin-right: auto; width: 70%"} + +*Lifetime of point keys and range tombstones in RocksDB* +{: style="text-align: center"} + +During flush and compaction, we chose to write out all non-obsolete range +tombstones unsorted. Sorting by a single dimension is easy to implement, but +doesn't bring asymptotic improvement to queries over range data. Ideally, we +want to store skylines (see “Read Path” subsection below) computed over our ranges so we can binary search. +However, a couple of concerns cause doing this in flush and compaction to feel +unsatisfactory: (1) we need to store multiple skylines, one for each snapshot, +which further complicates the range tombstone meta-block encoding; and (2) even +if we implement this, the range tombstone memtable still needs to be linearly +scanned. Given these concerns we decided to defer collapsing work to the read +side, hoping a good caching strategy could optimize this at some future point. + + +### Read path + +In point lookups, we aggregate range tombstones in an unordered vector as we +search through live memtable, immutable memtables, and then SSTs. When a key is +found that matches the lookup key, we do a scan through the vector, checking +whether the key is deleted. + +In iterators, we aggregate range tombstones into a skyline as we visit live +memtable, immutable memtables, and SSTs. The skyline is expensive to construct but fast to determine whether a key is covered. The skyline keeps track of the most recent range tombstone found to optimize `Next` and `Prev`. + +|![](/static/images/delrange/delrange_uncollapsed.png) |![](/static/images/delrange/delrange_collapsed.png) | + +*([Image source: Leetcode](https://leetcode.com/problems/the-skyline-problem/description/)) The skyline problem involves taking building location/height data in the +unsearchable form of A and converting it to the form of B, which is +binary-searchable. With overlapping range tombstones, to achieve efficient +searching we need to solve an analogous problem, where the x-axis is the +key-space and the y-axis is the sequence number.* +{: style="text-align: center"} + +### Performance characteristics + +For the v1 implementation, writes are much faster compared to the scan and +delete (optionally within a transaction) pattern. `DeleteRange` only logs to WAL +and applies to memtable. Logging to WAL always `fflush`es, and optionally +`fsync`s or `fdatasync`s. Applying to memtable is always an in-memory operation. +Since range tombstones have a dedicated skiplist memtable, the complexity of inserting is O(log(T)), where T is the number of existing buffered range tombstones. + +Reading in the presence of v1 range tombstones, however, is much slower than reads +in a database where scan-and-delete has happened, due to the linear scan over +range tombstone memtables/meta-blocks. + +Iterating in a database with v1 range tombstones is usually slower than in a +scan-and-delete database, although the gap lessens as iterations grow longer. +When an iterator is first created and seeked, we construct a skyline over its +tombstones. This operation is O(T\*log(T)) where T is the number of tombstones +found across live memtable, immutable memtable, L0 files, and one file from each +of the L1+ levels. However, moving the iterator forwards or backwards is simply +a constant-time operation (excluding edge cases, e.g., many range tombstones +between consecutive point keys). + +## v2: Making it fast + +`DeleteRange`’s negative impact on read perf is a barrier to its adoption. The +root cause is range tombstones are not stored or cached in a format that can be +efficiently searched. We needed to design DeleteRange so that we could maintain +write performance while making read performance competitive with workarounds +used in production (e.g., scan-and-delete). + +### Representations + +The key idea of the redesign is that, instead of globally collapsing range tombstones, + we can locally “fragment” them for each SST file and memtable to guarantee that: + +* no range tombstones overlap; and +* range tombstones are ordered by start key. + +Combined, these properties make range tombstones binary searchable. This + fragmentation will happen on the read path, but unlike the previous design, we can + easily cache many of these range tombstone fragments on the read path. + +### Write path + +The write path remains unchanged. + +### Read path + +When an SST file is opened, its range tombstones are fragmented and cached. For point + lookups, we binary search each file's fragmented range tombstones for one that covers + the lookup key. Unlike the old design, once we find a tombstone, we no longer need to + search for the key in lower levels, since we know that any keys on those levels will be + covered (though we do still check the current level since there may be keys written after + the range tombstone). + +For range scans, we create iterators over all the fragmented range + tombstones and store them in a list, seeking each one to cover the start key of the range + scan (if possible), and query each encountered key in this structure as in the old design, + advancing range tombstone iterators as necessary. In effect, we implicitly create a skyline. + This requires significantly less work on iterator creation, but since each memtable/SST has +its own range tombstone iterator, querying range tombstones requires key comparisons (and +possibly iterator increments) for several iterators (as opposed to v1, where we had a global +collapsed representation of all range tombstones). As a result, very long range scans may become + slower than before, but short range scans are an order of magnitude faster, which are the + more common class of range scan. + +## Benchmarks + +To understand the performance of this new design, we used `db_bench` to compare point lookup, short range scan, + and long range scan performance across: + +* the v1 DeleteRange design, +* the scan-and-delete workaround, and +* the v2 DeleteRange design. + +In these benchmarks, we used a database with 5 million data keys, and 10000 range tombstones (ignoring +those dropped during compaction) that were written in regular intervals after 4.5 million data keys were written. +Writing the range tombstones ensures that most of them are not compacted away, and we have more tombstones +in higher levels that cover keys in lower levels, which allows the benchmarks to exercise more interesting behavior +when reading deleted keys. + +Point lookup benchmarks read 100000 keys from a database using `readwhilewriting`. Range scan benchmarks used +`seekrandomwhilewriting` and seeked 100000 times, and advanced up to 10 keys away from the seek position for short range scans, and advanced up to 1000 keys away from the seek position for long range scans. + +The results are summarized in the tables below, averaged over 10 runs (note the +different SHAs for v1 benchmarks are due to a new `db_bench` flag that was added in order to compare performance with databases with no tombstones; for brevity, those results are not reported here). Also note that the block cache was large enough to hold the entire db, so the large throughput is due to limited I/Os and little time spent on decompression. The range tombstone blocks are always pinned uncompressed in memory. We believe these setup details should not affect relative performance between versions. + +### Point Lookups + +|Name |SHA |avg micros/op |avg ops/sec | +|v1 |35cd754a6 |1.3179 |759,830.90 | +|scan-del |7528130e3 |0.6036 |1,667,237.70 | +|v2 |7528130e3 |0.6128 |1,634,633.40 | + +### Short Range Scans + +|Name |SHA |avg micros/op |avg ops/sec | +|v1 |0ed738fdd |6.23 |176,562.00 | +|scan-del |PR 4677 |2.6844 |377,313.00 | +|v2 |PR 4677 |2.8226 |361,249.70 | + +### Long Range scans + +|Name |SHA |avg micros/op |avg ops/sec | +|v1 |0ed738fdd |52.7066 |19,074.00 | +|scan-del |PR 4677 |38.0325 |26,648.60 | +|v2 |PR 4677 |41.2882 |24,714.70 | + +## Future Work + +Note that memtable range tombstones are fragmented every read; for now this is acceptable, + since we expect there to be relatively few range tombstones in memtables (and users can + enforce this by keeping track of the number of memtable range deletions and manually flushing + after it passes a threshold). In the future, a specialized data structure can be used for storing + range tombstones in memory to avoid this work. + +Another future optimization is to create a new format version that requires range tombstones to + be stored in a fragmented form. This would save time when opening SST files, and when `max_open_files` +is not -1 (i.e., files may be opened several times). + +## Acknowledgements + +Special thanks to Peter Mattis and Nikhil Benesch from Cockroach Labs, who were early users of +DeleteRange v1 in production, contributed the cleanest/most efficient v1 aggregation implementation, found and fixed bugs, and provided initial DeleteRange v2 design and continued help. + +Thanks to Huachao Huang and Jinpeng Zhang from PingCAP for early DeleteRange v1 adoption, bug reports, and fixes. diff --git a/3rdParty/rocksdb/v5.16.X/docs/_sass/_base.scss b/3rdParty/rocksdb/v5.18.X/docs/_sass/_base.scss similarity index 100% rename from 3rdParty/rocksdb/v5.16.X/docs/_sass/_base.scss rename to 3rdParty/rocksdb/v5.18.X/docs/_sass/_base.scss diff --git a/3rdParty/rocksdb/v5.16.X/docs/_sass/_blog.scss b/3rdParty/rocksdb/v5.18.X/docs/_sass/_blog.scss similarity index 85% rename from 3rdParty/rocksdb/v5.16.X/docs/_sass/_blog.scss rename to 3rdParty/rocksdb/v5.18.X/docs/_sass/_blog.scss index 74335d10b4..12a73c1fcd 100644 --- a/3rdParty/rocksdb/v5.16.X/docs/_sass/_blog.scss +++ b/3rdParty/rocksdb/v5.18.X/docs/_sass/_blog.scss @@ -35,11 +35,13 @@ border-radius: 50%; height: 50px; left: 50%; - margin-left: -25px; + margin-left: auto; + margin-right: auto; + display: inline-block; overflow: hidden; - position: absolute; + position: static; top: -25px; width: 50px; } } -} \ No newline at end of file +} diff --git a/3rdParty/rocksdb/v5.16.X/docs/_sass/_buttons.scss b/3rdParty/rocksdb/v5.18.X/docs/_sass/_buttons.scss similarity index 100% rename from 3rdParty/rocksdb/v5.16.X/docs/_sass/_buttons.scss rename to 3rdParty/rocksdb/v5.18.X/docs/_sass/_buttons.scss diff --git a/3rdParty/rocksdb/v5.16.X/docs/_sass/_footer.scss b/3rdParty/rocksdb/v5.18.X/docs/_sass/_footer.scss similarity index 100% rename from 3rdParty/rocksdb/v5.16.X/docs/_sass/_footer.scss rename to 3rdParty/rocksdb/v5.18.X/docs/_sass/_footer.scss diff --git a/3rdParty/rocksdb/v5.16.X/docs/_sass/_gridBlock.scss b/3rdParty/rocksdb/v5.18.X/docs/_sass/_gridBlock.scss similarity index 100% rename from 3rdParty/rocksdb/v5.16.X/docs/_sass/_gridBlock.scss rename to 3rdParty/rocksdb/v5.18.X/docs/_sass/_gridBlock.scss diff --git a/3rdParty/rocksdb/v5.16.X/docs/_sass/_header.scss b/3rdParty/rocksdb/v5.18.X/docs/_sass/_header.scss similarity index 100% rename from 3rdParty/rocksdb/v5.16.X/docs/_sass/_header.scss rename to 3rdParty/rocksdb/v5.18.X/docs/_sass/_header.scss diff --git a/3rdParty/rocksdb/v5.16.X/docs/_sass/_poweredby.scss b/3rdParty/rocksdb/v5.18.X/docs/_sass/_poweredby.scss similarity index 100% rename from 3rdParty/rocksdb/v5.16.X/docs/_sass/_poweredby.scss rename to 3rdParty/rocksdb/v5.18.X/docs/_sass/_poweredby.scss diff --git a/3rdParty/rocksdb/v5.16.X/docs/_sass/_promo.scss b/3rdParty/rocksdb/v5.18.X/docs/_sass/_promo.scss similarity index 100% rename from 3rdParty/rocksdb/v5.16.X/docs/_sass/_promo.scss rename to 3rdParty/rocksdb/v5.18.X/docs/_sass/_promo.scss diff --git a/3rdParty/rocksdb/v5.16.X/docs/_sass/_react_docs_nav.scss b/3rdParty/rocksdb/v5.18.X/docs/_sass/_react_docs_nav.scss similarity index 100% rename from 3rdParty/rocksdb/v5.16.X/docs/_sass/_react_docs_nav.scss rename to 3rdParty/rocksdb/v5.18.X/docs/_sass/_react_docs_nav.scss diff --git a/3rdParty/rocksdb/v5.16.X/docs/_sass/_react_header_nav.scss b/3rdParty/rocksdb/v5.18.X/docs/_sass/_react_header_nav.scss similarity index 100% rename from 3rdParty/rocksdb/v5.16.X/docs/_sass/_react_header_nav.scss rename to 3rdParty/rocksdb/v5.18.X/docs/_sass/_react_header_nav.scss diff --git a/3rdParty/rocksdb/v5.16.X/docs/_sass/_reset.scss b/3rdParty/rocksdb/v5.18.X/docs/_sass/_reset.scss similarity index 100% rename from 3rdParty/rocksdb/v5.16.X/docs/_sass/_reset.scss rename to 3rdParty/rocksdb/v5.18.X/docs/_sass/_reset.scss diff --git a/3rdParty/rocksdb/v5.16.X/docs/_sass/_search.scss b/3rdParty/rocksdb/v5.18.X/docs/_sass/_search.scss similarity index 100% rename from 3rdParty/rocksdb/v5.16.X/docs/_sass/_search.scss rename to 3rdParty/rocksdb/v5.18.X/docs/_sass/_search.scss diff --git a/3rdParty/rocksdb/v5.16.X/docs/_sass/_slideshow.scss b/3rdParty/rocksdb/v5.18.X/docs/_sass/_slideshow.scss similarity index 100% rename from 3rdParty/rocksdb/v5.16.X/docs/_sass/_slideshow.scss rename to 3rdParty/rocksdb/v5.18.X/docs/_sass/_slideshow.scss diff --git a/3rdParty/rocksdb/v5.16.X/docs/_sass/_syntax-highlighting.scss b/3rdParty/rocksdb/v5.18.X/docs/_sass/_syntax-highlighting.scss similarity index 100% rename from 3rdParty/rocksdb/v5.16.X/docs/_sass/_syntax-highlighting.scss rename to 3rdParty/rocksdb/v5.18.X/docs/_sass/_syntax-highlighting.scss diff --git a/3rdParty/rocksdb/v5.16.X/docs/_sass/_tables.scss b/3rdParty/rocksdb/v5.18.X/docs/_sass/_tables.scss similarity index 100% rename from 3rdParty/rocksdb/v5.16.X/docs/_sass/_tables.scss rename to 3rdParty/rocksdb/v5.18.X/docs/_sass/_tables.scss diff --git a/3rdParty/rocksdb/v5.16.X/docs/_top-level/support.md b/3rdParty/rocksdb/v5.18.X/docs/_top-level/support.md similarity index 100% rename from 3rdParty/rocksdb/v5.16.X/docs/_top-level/support.md rename to 3rdParty/rocksdb/v5.18.X/docs/_top-level/support.md diff --git a/3rdParty/rocksdb/v5.16.X/docs/blog/all.html b/3rdParty/rocksdb/v5.18.X/docs/blog/all.html similarity index 100% rename from 3rdParty/rocksdb/v5.16.X/docs/blog/all.html rename to 3rdParty/rocksdb/v5.18.X/docs/blog/all.html diff --git a/3rdParty/rocksdb/v5.16.X/docs/blog/index.html b/3rdParty/rocksdb/v5.18.X/docs/blog/index.html similarity index 100% rename from 3rdParty/rocksdb/v5.16.X/docs/blog/index.html rename to 3rdParty/rocksdb/v5.18.X/docs/blog/index.html diff --git a/3rdParty/rocksdb/v5.16.X/docs/css/main.scss b/3rdParty/rocksdb/v5.18.X/docs/css/main.scss similarity index 100% rename from 3rdParty/rocksdb/v5.16.X/docs/css/main.scss rename to 3rdParty/rocksdb/v5.18.X/docs/css/main.scss diff --git a/3rdParty/rocksdb/v5.16.X/docs/doc-type-examples/2016-04-07-blog-post-example.md b/3rdParty/rocksdb/v5.18.X/docs/doc-type-examples/2016-04-07-blog-post-example.md similarity index 100% rename from 3rdParty/rocksdb/v5.16.X/docs/doc-type-examples/2016-04-07-blog-post-example.md rename to 3rdParty/rocksdb/v5.18.X/docs/doc-type-examples/2016-04-07-blog-post-example.md diff --git a/3rdParty/rocksdb/v5.16.X/docs/doc-type-examples/docs-hello-world.md b/3rdParty/rocksdb/v5.18.X/docs/doc-type-examples/docs-hello-world.md similarity index 100% rename from 3rdParty/rocksdb/v5.16.X/docs/doc-type-examples/docs-hello-world.md rename to 3rdParty/rocksdb/v5.18.X/docs/doc-type-examples/docs-hello-world.md diff --git a/3rdParty/rocksdb/v5.16.X/docs/doc-type-examples/top-level-example.md b/3rdParty/rocksdb/v5.18.X/docs/doc-type-examples/top-level-example.md similarity index 100% rename from 3rdParty/rocksdb/v5.16.X/docs/doc-type-examples/top-level-example.md rename to 3rdParty/rocksdb/v5.18.X/docs/doc-type-examples/top-level-example.md diff --git a/3rdParty/rocksdb/v5.16.X/docs/docs/index.html b/3rdParty/rocksdb/v5.18.X/docs/docs/index.html similarity index 100% rename from 3rdParty/rocksdb/v5.16.X/docs/docs/index.html rename to 3rdParty/rocksdb/v5.18.X/docs/docs/index.html diff --git a/3rdParty/rocksdb/v5.16.X/docs/feed.xml b/3rdParty/rocksdb/v5.18.X/docs/feed.xml similarity index 96% rename from 3rdParty/rocksdb/v5.16.X/docs/feed.xml rename to 3rdParty/rocksdb/v5.18.X/docs/feed.xml index 590cbd9d76..725f00566c 100644 --- a/3rdParty/rocksdb/v5.16.X/docs/feed.xml +++ b/3rdParty/rocksdb/v5.18.X/docs/feed.xml @@ -6,7 +6,7 @@ layout: null {{ site.title | xml_escape }} {{ site.description | xml_escape }} - {{ absolute_url }}/ + https://rocksdb.org/feed.xml {{ site.time | date_to_rfc822 }} {{ site.time | date_to_rfc822 }} diff --git a/3rdParty/rocksdb/v5.16.X/docs/index.md b/3rdParty/rocksdb/v5.18.X/docs/index.md similarity index 100% rename from 3rdParty/rocksdb/v5.16.X/docs/index.md rename to 3rdParty/rocksdb/v5.18.X/docs/index.md diff --git a/3rdParty/rocksdb/v5.16.X/docs/static/favicon.png b/3rdParty/rocksdb/v5.18.X/docs/static/favicon.png similarity index 100% rename from 3rdParty/rocksdb/v5.16.X/docs/static/favicon.png rename to 3rdParty/rocksdb/v5.18.X/docs/static/favicon.png diff --git a/3rdParty/rocksdb/v5.16.X/docs/static/fonts/LatoLatin-Black.woff b/3rdParty/rocksdb/v5.18.X/docs/static/fonts/LatoLatin-Black.woff similarity index 100% rename from 3rdParty/rocksdb/v5.16.X/docs/static/fonts/LatoLatin-Black.woff rename to 3rdParty/rocksdb/v5.18.X/docs/static/fonts/LatoLatin-Black.woff diff --git a/3rdParty/rocksdb/v5.16.X/docs/static/fonts/LatoLatin-Black.woff2 b/3rdParty/rocksdb/v5.18.X/docs/static/fonts/LatoLatin-Black.woff2 similarity index 100% rename from 3rdParty/rocksdb/v5.16.X/docs/static/fonts/LatoLatin-Black.woff2 rename to 3rdParty/rocksdb/v5.18.X/docs/static/fonts/LatoLatin-Black.woff2 diff --git a/3rdParty/rocksdb/v5.16.X/docs/static/fonts/LatoLatin-BlackItalic.woff b/3rdParty/rocksdb/v5.18.X/docs/static/fonts/LatoLatin-BlackItalic.woff similarity index 100% rename from 3rdParty/rocksdb/v5.16.X/docs/static/fonts/LatoLatin-BlackItalic.woff rename to 3rdParty/rocksdb/v5.18.X/docs/static/fonts/LatoLatin-BlackItalic.woff diff --git a/3rdParty/rocksdb/v5.16.X/docs/static/fonts/LatoLatin-BlackItalic.woff2 b/3rdParty/rocksdb/v5.18.X/docs/static/fonts/LatoLatin-BlackItalic.woff2 similarity index 100% rename from 3rdParty/rocksdb/v5.16.X/docs/static/fonts/LatoLatin-BlackItalic.woff2 rename to 3rdParty/rocksdb/v5.18.X/docs/static/fonts/LatoLatin-BlackItalic.woff2 diff --git a/3rdParty/rocksdb/v5.16.X/docs/static/fonts/LatoLatin-Italic.woff b/3rdParty/rocksdb/v5.18.X/docs/static/fonts/LatoLatin-Italic.woff similarity index 100% rename from 3rdParty/rocksdb/v5.16.X/docs/static/fonts/LatoLatin-Italic.woff rename to 3rdParty/rocksdb/v5.18.X/docs/static/fonts/LatoLatin-Italic.woff diff --git a/3rdParty/rocksdb/v5.16.X/docs/static/fonts/LatoLatin-Italic.woff2 b/3rdParty/rocksdb/v5.18.X/docs/static/fonts/LatoLatin-Italic.woff2 similarity index 100% rename from 3rdParty/rocksdb/v5.16.X/docs/static/fonts/LatoLatin-Italic.woff2 rename to 3rdParty/rocksdb/v5.18.X/docs/static/fonts/LatoLatin-Italic.woff2 diff --git a/3rdParty/rocksdb/v5.16.X/docs/static/fonts/LatoLatin-Light.woff b/3rdParty/rocksdb/v5.18.X/docs/static/fonts/LatoLatin-Light.woff similarity index 100% rename from 3rdParty/rocksdb/v5.16.X/docs/static/fonts/LatoLatin-Light.woff rename to 3rdParty/rocksdb/v5.18.X/docs/static/fonts/LatoLatin-Light.woff diff --git a/3rdParty/rocksdb/v5.16.X/docs/static/fonts/LatoLatin-Light.woff2 b/3rdParty/rocksdb/v5.18.X/docs/static/fonts/LatoLatin-Light.woff2 similarity index 100% rename from 3rdParty/rocksdb/v5.16.X/docs/static/fonts/LatoLatin-Light.woff2 rename to 3rdParty/rocksdb/v5.18.X/docs/static/fonts/LatoLatin-Light.woff2 diff --git a/3rdParty/rocksdb/v5.16.X/docs/static/fonts/LatoLatin-Regular.woff b/3rdParty/rocksdb/v5.18.X/docs/static/fonts/LatoLatin-Regular.woff similarity index 100% rename from 3rdParty/rocksdb/v5.16.X/docs/static/fonts/LatoLatin-Regular.woff rename to 3rdParty/rocksdb/v5.18.X/docs/static/fonts/LatoLatin-Regular.woff diff --git a/3rdParty/rocksdb/v5.16.X/docs/static/fonts/LatoLatin-Regular.woff2 b/3rdParty/rocksdb/v5.18.X/docs/static/fonts/LatoLatin-Regular.woff2 similarity index 100% rename from 3rdParty/rocksdb/v5.16.X/docs/static/fonts/LatoLatin-Regular.woff2 rename to 3rdParty/rocksdb/v5.18.X/docs/static/fonts/LatoLatin-Regular.woff2 diff --git a/3rdParty/rocksdb/v5.16.X/docs/static/images/Resize-of-20140327_200754-300x225.jpg b/3rdParty/rocksdb/v5.18.X/docs/static/images/Resize-of-20140327_200754-300x225.jpg similarity index 100% rename from 3rdParty/rocksdb/v5.16.X/docs/static/images/Resize-of-20140327_200754-300x225.jpg rename to 3rdParty/rocksdb/v5.18.X/docs/static/images/Resize-of-20140327_200754-300x225.jpg diff --git a/3rdParty/rocksdb/v5.18.X/docs/static/images/binaryseek.png b/3rdParty/rocksdb/v5.18.X/docs/static/images/binaryseek.png new file mode 100644 index 0000000000..0e213f0482 Binary files /dev/null and b/3rdParty/rocksdb/v5.18.X/docs/static/images/binaryseek.png differ diff --git a/3rdParty/rocksdb/v5.16.X/docs/static/images/compaction/full-range.png b/3rdParty/rocksdb/v5.18.X/docs/static/images/compaction/full-range.png similarity index 100% rename from 3rdParty/rocksdb/v5.16.X/docs/static/images/compaction/full-range.png rename to 3rdParty/rocksdb/v5.18.X/docs/static/images/compaction/full-range.png diff --git a/3rdParty/rocksdb/v5.16.X/docs/static/images/compaction/l0-l1-contend.png b/3rdParty/rocksdb/v5.18.X/docs/static/images/compaction/l0-l1-contend.png similarity index 100% rename from 3rdParty/rocksdb/v5.16.X/docs/static/images/compaction/l0-l1-contend.png rename to 3rdParty/rocksdb/v5.18.X/docs/static/images/compaction/l0-l1-contend.png diff --git a/3rdParty/rocksdb/v5.16.X/docs/static/images/compaction/l1-l2-contend.png b/3rdParty/rocksdb/v5.18.X/docs/static/images/compaction/l1-l2-contend.png similarity index 100% rename from 3rdParty/rocksdb/v5.16.X/docs/static/images/compaction/l1-l2-contend.png rename to 3rdParty/rocksdb/v5.18.X/docs/static/images/compaction/l1-l2-contend.png diff --git a/3rdParty/rocksdb/v5.16.X/docs/static/images/compaction/part-range-old.png b/3rdParty/rocksdb/v5.18.X/docs/static/images/compaction/part-range-old.png similarity index 100% rename from 3rdParty/rocksdb/v5.16.X/docs/static/images/compaction/part-range-old.png rename to 3rdParty/rocksdb/v5.18.X/docs/static/images/compaction/part-range-old.png diff --git a/3rdParty/rocksdb/v5.18.X/docs/static/images/delrange/delrange_collapsed.png b/3rdParty/rocksdb/v5.18.X/docs/static/images/delrange/delrange_collapsed.png new file mode 100644 index 0000000000..52246c2c1d Binary files /dev/null and b/3rdParty/rocksdb/v5.18.X/docs/static/images/delrange/delrange_collapsed.png differ diff --git a/3rdParty/rocksdb/v5.18.X/docs/static/images/delrange/delrange_key_schema.png b/3rdParty/rocksdb/v5.18.X/docs/static/images/delrange/delrange_key_schema.png new file mode 100644 index 0000000000..0a14d4a3a5 Binary files /dev/null and b/3rdParty/rocksdb/v5.18.X/docs/static/images/delrange/delrange_key_schema.png differ diff --git a/3rdParty/rocksdb/v5.18.X/docs/static/images/delrange/delrange_sst_blocks.png b/3rdParty/rocksdb/v5.18.X/docs/static/images/delrange/delrange_sst_blocks.png new file mode 100644 index 0000000000..6003e42ae8 Binary files /dev/null and b/3rdParty/rocksdb/v5.18.X/docs/static/images/delrange/delrange_sst_blocks.png differ diff --git a/3rdParty/rocksdb/v5.18.X/docs/static/images/delrange/delrange_uncollapsed.png b/3rdParty/rocksdb/v5.18.X/docs/static/images/delrange/delrange_uncollapsed.png new file mode 100644 index 0000000000..39c7097af9 Binary files /dev/null and b/3rdParty/rocksdb/v5.18.X/docs/static/images/delrange/delrange_uncollapsed.png differ diff --git a/3rdParty/rocksdb/v5.18.X/docs/static/images/delrange/delrange_write_path.png b/3rdParty/rocksdb/v5.18.X/docs/static/images/delrange/delrange_write_path.png new file mode 100644 index 0000000000..229dfb349a Binary files /dev/null and b/3rdParty/rocksdb/v5.18.X/docs/static/images/delrange/delrange_write_path.png differ diff --git a/3rdParty/rocksdb/v5.16.X/docs/static/images/pcache-blockindex.jpg b/3rdParty/rocksdb/v5.18.X/docs/static/images/pcache-blockindex.jpg similarity index 100% rename from 3rdParty/rocksdb/v5.16.X/docs/static/images/pcache-blockindex.jpg rename to 3rdParty/rocksdb/v5.18.X/docs/static/images/pcache-blockindex.jpg diff --git a/3rdParty/rocksdb/v5.16.X/docs/static/images/pcache-fileindex.jpg b/3rdParty/rocksdb/v5.18.X/docs/static/images/pcache-fileindex.jpg similarity index 100% rename from 3rdParty/rocksdb/v5.16.X/docs/static/images/pcache-fileindex.jpg rename to 3rdParty/rocksdb/v5.18.X/docs/static/images/pcache-fileindex.jpg diff --git a/3rdParty/rocksdb/v5.16.X/docs/static/images/pcache-filelayout.jpg b/3rdParty/rocksdb/v5.18.X/docs/static/images/pcache-filelayout.jpg similarity index 100% rename from 3rdParty/rocksdb/v5.16.X/docs/static/images/pcache-filelayout.jpg rename to 3rdParty/rocksdb/v5.18.X/docs/static/images/pcache-filelayout.jpg diff --git a/3rdParty/rocksdb/v5.16.X/docs/static/images/pcache-readiopath.jpg b/3rdParty/rocksdb/v5.18.X/docs/static/images/pcache-readiopath.jpg similarity index 100% rename from 3rdParty/rocksdb/v5.16.X/docs/static/images/pcache-readiopath.jpg rename to 3rdParty/rocksdb/v5.18.X/docs/static/images/pcache-readiopath.jpg diff --git a/3rdParty/rocksdb/v5.16.X/docs/static/images/pcache-tieredstorage.jpg b/3rdParty/rocksdb/v5.18.X/docs/static/images/pcache-tieredstorage.jpg similarity index 100% rename from 3rdParty/rocksdb/v5.16.X/docs/static/images/pcache-tieredstorage.jpg rename to 3rdParty/rocksdb/v5.18.X/docs/static/images/pcache-tieredstorage.jpg diff --git a/3rdParty/rocksdb/v5.16.X/docs/static/images/pcache-writeiopath.jpg b/3rdParty/rocksdb/v5.18.X/docs/static/images/pcache-writeiopath.jpg similarity index 100% rename from 3rdParty/rocksdb/v5.16.X/docs/static/images/pcache-writeiopath.jpg rename to 3rdParty/rocksdb/v5.18.X/docs/static/images/pcache-writeiopath.jpg diff --git a/3rdParty/rocksdb/v5.16.X/docs/static/images/promo-adapt.svg b/3rdParty/rocksdb/v5.18.X/docs/static/images/promo-adapt.svg similarity index 100% rename from 3rdParty/rocksdb/v5.16.X/docs/static/images/promo-adapt.svg rename to 3rdParty/rocksdb/v5.18.X/docs/static/images/promo-adapt.svg diff --git a/3rdParty/rocksdb/v5.16.X/docs/static/images/promo-flash.svg b/3rdParty/rocksdb/v5.18.X/docs/static/images/promo-flash.svg similarity index 100% rename from 3rdParty/rocksdb/v5.16.X/docs/static/images/promo-flash.svg rename to 3rdParty/rocksdb/v5.18.X/docs/static/images/promo-flash.svg diff --git a/3rdParty/rocksdb/v5.16.X/docs/static/images/promo-operations.svg b/3rdParty/rocksdb/v5.18.X/docs/static/images/promo-operations.svg similarity index 100% rename from 3rdParty/rocksdb/v5.16.X/docs/static/images/promo-operations.svg rename to 3rdParty/rocksdb/v5.18.X/docs/static/images/promo-operations.svg diff --git a/3rdParty/rocksdb/v5.16.X/docs/static/images/promo-performance.svg b/3rdParty/rocksdb/v5.18.X/docs/static/images/promo-performance.svg similarity index 100% rename from 3rdParty/rocksdb/v5.16.X/docs/static/images/promo-performance.svg rename to 3rdParty/rocksdb/v5.18.X/docs/static/images/promo-performance.svg diff --git a/3rdParty/rocksdb/v5.16.X/docs/static/images/rate-limiter/auto-tuned-write-KBps-series.png b/3rdParty/rocksdb/v5.18.X/docs/static/images/rate-limiter/auto-tuned-write-KBps-series.png similarity index 100% rename from 3rdParty/rocksdb/v5.16.X/docs/static/images/rate-limiter/auto-tuned-write-KBps-series.png rename to 3rdParty/rocksdb/v5.18.X/docs/static/images/rate-limiter/auto-tuned-write-KBps-series.png diff --git a/3rdParty/rocksdb/v5.16.X/docs/static/images/rate-limiter/write-KBps-cdf.png b/3rdParty/rocksdb/v5.18.X/docs/static/images/rate-limiter/write-KBps-cdf.png similarity index 100% rename from 3rdParty/rocksdb/v5.16.X/docs/static/images/rate-limiter/write-KBps-cdf.png rename to 3rdParty/rocksdb/v5.18.X/docs/static/images/rate-limiter/write-KBps-cdf.png diff --git a/3rdParty/rocksdb/v5.16.X/docs/static/images/rate-limiter/write-KBps-series.png b/3rdParty/rocksdb/v5.18.X/docs/static/images/rate-limiter/write-KBps-series.png similarity index 100% rename from 3rdParty/rocksdb/v5.16.X/docs/static/images/rate-limiter/write-KBps-series.png rename to 3rdParty/rocksdb/v5.18.X/docs/static/images/rate-limiter/write-KBps-series.png diff --git a/3rdParty/rocksdb/v5.16.X/docs/static/images/tree_example1.png b/3rdParty/rocksdb/v5.18.X/docs/static/images/tree_example1.png similarity index 100% rename from 3rdParty/rocksdb/v5.16.X/docs/static/images/tree_example1.png rename to 3rdParty/rocksdb/v5.18.X/docs/static/images/tree_example1.png diff --git a/3rdParty/rocksdb/v5.16.X/docs/static/logo.svg b/3rdParty/rocksdb/v5.18.X/docs/static/logo.svg similarity index 100% rename from 3rdParty/rocksdb/v5.16.X/docs/static/logo.svg rename to 3rdParty/rocksdb/v5.18.X/docs/static/logo.svg diff --git a/3rdParty/rocksdb/v5.16.X/docs/static/og_image.png b/3rdParty/rocksdb/v5.18.X/docs/static/og_image.png similarity index 100% rename from 3rdParty/rocksdb/v5.16.X/docs/static/og_image.png rename to 3rdParty/rocksdb/v5.18.X/docs/static/og_image.png diff --git a/3rdParty/rocksdb/v5.16.X/env/env.cc b/3rdParty/rocksdb/v5.18.X/env/env.cc similarity index 92% rename from 3rdParty/rocksdb/v5.16.X/env/env.cc rename to 3rdParty/rocksdb/v5.18.X/env/env.cc index 9b7f5e40de..a41feaf00e 100644 --- a/3rdParty/rocksdb/v5.16.X/env/env.cc +++ b/3rdParty/rocksdb/v5.18.X/env/env.cc @@ -43,7 +43,7 @@ uint64_t Env::GetThreadID() const { Status Env::ReuseWritableFile(const std::string& fname, const std::string& old_fname, - unique_ptr* result, + std::unique_ptr* result, const EnvOptions& options) { Status s = RenameFile(old_fname, fname); if (!s.ok()) { @@ -242,11 +242,11 @@ void Fatal(Logger* info_log, const char* format, ...) { va_end(ap); } -void LogFlush(const shared_ptr& info_log) { +void LogFlush(const std::shared_ptr& info_log) { LogFlush(info_log.get()); } -void Log(const InfoLogLevel log_level, const shared_ptr& info_log, +void Log(const InfoLogLevel log_level, const std::shared_ptr& info_log, const char* format, ...) { va_list ap; va_start(ap, format); @@ -254,49 +254,49 @@ void Log(const InfoLogLevel log_level, const shared_ptr& info_log, va_end(ap); } -void Header(const shared_ptr& info_log, const char* format, ...) { +void Header(const std::shared_ptr& info_log, const char* format, ...) { va_list ap; va_start(ap, format); Headerv(info_log.get(), format, ap); va_end(ap); } -void Debug(const shared_ptr& info_log, const char* format, ...) { +void Debug(const std::shared_ptr& info_log, const char* format, ...) { va_list ap; va_start(ap, format); Debugv(info_log.get(), format, ap); va_end(ap); } -void Info(const shared_ptr& info_log, const char* format, ...) { +void Info(const std::shared_ptr& info_log, const char* format, ...) { va_list ap; va_start(ap, format); Infov(info_log.get(), format, ap); va_end(ap); } -void Warn(const shared_ptr& info_log, const char* format, ...) { +void Warn(const std::shared_ptr& info_log, const char* format, ...) { va_list ap; va_start(ap, format); Warnv(info_log.get(), format, ap); va_end(ap); } -void Error(const shared_ptr& info_log, const char* format, ...) { +void Error(const std::shared_ptr& info_log, const char* format, ...) { va_list ap; va_start(ap, format); Errorv(info_log.get(), format, ap); va_end(ap); } -void Fatal(const shared_ptr& info_log, const char* format, ...) { +void Fatal(const std::shared_ptr& info_log, const char* format, ...) { va_list ap; va_start(ap, format); Fatalv(info_log.get(), format, ap); va_end(ap); } -void Log(const shared_ptr& info_log, const char* format, ...) { +void Log(const std::shared_ptr& info_log, const char* format, ...) { va_list ap; va_start(ap, format); Logv(info_log.get(), format, ap); @@ -305,7 +305,7 @@ void Log(const shared_ptr& info_log, const char* format, ...) { Status WriteStringToFile(Env* env, const Slice& data, const std::string& fname, bool should_sync) { - unique_ptr file; + std::unique_ptr file; EnvOptions soptions; Status s = env->NewWritableFile(fname, &file, soptions); if (!s.ok()) { @@ -324,7 +324,7 @@ Status WriteStringToFile(Env* env, const Slice& data, const std::string& fname, Status ReadFileToString(Env* env, const std::string& fname, std::string* data) { EnvOptions soptions; data->clear(); - unique_ptr file; + std::unique_ptr file; Status s = env->NewSequentialFile(fname, &file, soptions); if (!s.ok()) { return s; diff --git a/3rdParty/rocksdb/v5.16.X/env/env_basic_test.cc b/3rdParty/rocksdb/v5.18.X/env/env_basic_test.cc similarity index 96% rename from 3rdParty/rocksdb/v5.16.X/env/env_basic_test.cc rename to 3rdParty/rocksdb/v5.18.X/env/env_basic_test.cc index e05f61aa65..22983dbecd 100644 --- a/3rdParty/rocksdb/v5.16.X/env/env_basic_test.cc +++ b/3rdParty/rocksdb/v5.18.X/env/env_basic_test.cc @@ -133,7 +133,7 @@ INSTANTIATE_TEST_CASE_P(CustomEnv, EnvMoreTestWithParam, TEST_P(EnvBasicTestWithParam, Basics) { uint64_t file_size; - unique_ptr writable_file; + std::unique_ptr writable_file; std::vector children; // Check that the directory is empty. @@ -186,8 +186,8 @@ TEST_P(EnvBasicTestWithParam, Basics) { ASSERT_EQ(0U, file_size); // Check that opening non-existent file fails. - unique_ptr seq_file; - unique_ptr rand_file; + std::unique_ptr seq_file; + std::unique_ptr rand_file; ASSERT_TRUE(!env_->NewSequentialFile(test_dir_ + "/non_existent", &seq_file, soptions_) .ok()); @@ -208,9 +208,9 @@ TEST_P(EnvBasicTestWithParam, Basics) { } TEST_P(EnvBasicTestWithParam, ReadWrite) { - unique_ptr writable_file; - unique_ptr seq_file; - unique_ptr rand_file; + std::unique_ptr writable_file; + std::unique_ptr seq_file; + std::unique_ptr rand_file; Slice result; char scratch[100]; @@ -247,7 +247,7 @@ TEST_P(EnvBasicTestWithParam, ReadWrite) { } TEST_P(EnvBasicTestWithParam, Misc) { - unique_ptr writable_file; + std::unique_ptr writable_file; ASSERT_OK(env_->NewWritableFile(test_dir_ + "/b", &writable_file, soptions_)); // These are no-ops, but we test they return success. @@ -266,14 +266,14 @@ TEST_P(EnvBasicTestWithParam, LargeWrite) { write_data.append(1, static_cast(i)); } - unique_ptr writable_file; + std::unique_ptr writable_file; ASSERT_OK(env_->NewWritableFile(test_dir_ + "/f", &writable_file, soptions_)); ASSERT_OK(writable_file->Append("foo")); ASSERT_OK(writable_file->Append(write_data)); ASSERT_OK(writable_file->Close()); writable_file.reset(); - unique_ptr seq_file; + std::unique_ptr seq_file; Slice result; ASSERT_OK(env_->NewSequentialFile(test_dir_ + "/f", &seq_file, soptions_)); ASSERT_OK(seq_file->Read(3, &result, scratch)); // Read "foo". @@ -340,7 +340,7 @@ TEST_P(EnvMoreTestWithParam, GetChildren) { // if dir is a file, returns IOError ASSERT_OK(env_->CreateDir(test_dir_)); - unique_ptr writable_file; + std::unique_ptr writable_file; ASSERT_OK( env_->NewWritableFile(test_dir_ + "/file", &writable_file, soptions_)); ASSERT_OK(writable_file->Close()); diff --git a/3rdParty/rocksdb/v5.16.X/env/env_chroot.cc b/3rdParty/rocksdb/v5.18.X/env/env_chroot.cc similarity index 96% rename from 3rdParty/rocksdb/v5.16.X/env/env_chroot.cc rename to 3rdParty/rocksdb/v5.18.X/env/env_chroot.cc index 6a1fda8a83..f6236c81b2 100644 --- a/3rdParty/rocksdb/v5.16.X/env/env_chroot.cc +++ b/3rdParty/rocksdb/v5.18.X/env/env_chroot.cc @@ -50,7 +50,7 @@ class ChrootEnv : public EnvWrapper { } virtual Status NewRandomAccessFile(const std::string& fname, - unique_ptr* result, + std::unique_ptr* result, const EnvOptions& options) override { auto status_and_enc_path = EncodePathWithNewBasename(fname); if (!status_and_enc_path.first.ok()) { @@ -61,7 +61,7 @@ class ChrootEnv : public EnvWrapper { } virtual Status NewWritableFile(const std::string& fname, - unique_ptr* result, + std::unique_ptr* result, const EnvOptions& options) override { auto status_and_enc_path = EncodePathWithNewBasename(fname); if (!status_and_enc_path.first.ok()) { @@ -73,7 +73,7 @@ class ChrootEnv : public EnvWrapper { virtual Status ReuseWritableFile(const std::string& fname, const std::string& old_fname, - unique_ptr* result, + std::unique_ptr* result, const EnvOptions& options) override { auto status_and_enc_path = EncodePathWithNewBasename(fname); if (!status_and_enc_path.first.ok()) { @@ -89,7 +89,7 @@ class ChrootEnv : public EnvWrapper { } virtual Status NewRandomRWFile(const std::string& fname, - unique_ptr* result, + std::unique_ptr* result, const EnvOptions& options) override { auto status_and_enc_path = EncodePathWithNewBasename(fname); if (!status_and_enc_path.first.ok()) { @@ -100,7 +100,7 @@ class ChrootEnv : public EnvWrapper { } virtual Status NewDirectory(const std::string& dir, - unique_ptr* result) override { + std::unique_ptr* result) override { auto status_and_enc_path = EncodePathWithNewBasename(dir); if (!status_and_enc_path.first.ok()) { return status_and_enc_path.first; @@ -238,7 +238,7 @@ class ChrootEnv : public EnvWrapper { } virtual Status NewLogger(const std::string& fname, - shared_ptr* result) override { + std::shared_ptr* result) override { auto status_and_enc_path = EncodePathWithNewBasename(fname); if (!status_and_enc_path.first.ok()) { return status_and_enc_path.first; diff --git a/3rdParty/rocksdb/v5.16.X/env/env_chroot.h b/3rdParty/rocksdb/v5.18.X/env/env_chroot.h similarity index 100% rename from 3rdParty/rocksdb/v5.16.X/env/env_chroot.h rename to 3rdParty/rocksdb/v5.18.X/env/env_chroot.h diff --git a/3rdParty/rocksdb/v5.16.X/env/env_encryption.cc b/3rdParty/rocksdb/v5.18.X/env/env_encryption.cc similarity index 96% rename from 3rdParty/rocksdb/v5.16.X/env/env_encryption.cc rename to 3rdParty/rocksdb/v5.18.X/env/env_encryption.cc index e80796fe0c..e38693e3ce 100644 --- a/3rdParty/rocksdb/v5.16.X/env/env_encryption.cc +++ b/3rdParty/rocksdb/v5.18.X/env/env_encryption.cc @@ -422,7 +422,7 @@ class EncryptedEnv : public EnvWrapper { // NewRandomAccessFile opens a file for random read access. virtual Status NewRandomAccessFile(const std::string& fname, - unique_ptr* result, + std::unique_ptr* result, const EnvOptions& options) override { result->reset(); if (options.use_mmap_reads) { @@ -456,10 +456,10 @@ class EncryptedEnv : public EnvWrapper { (*result) = std::unique_ptr(new EncryptedRandomAccessFile(underlying.release(), stream.release(), prefixLength)); return Status::OK(); } - + // NewWritableFile opens a file for sequential writing. virtual Status NewWritableFile(const std::string& fname, - unique_ptr* result, + std::unique_ptr* result, const EnvOptions& options) override { result->reset(); if (options.use_mmap_writes) { @@ -505,8 +505,8 @@ class EncryptedEnv : public EnvWrapper { // // The returned file will only be accessed by one thread at a time. virtual Status ReopenWritableFile(const std::string& fname, - unique_ptr* result, - const EnvOptions& options) override { + std::unique_ptr* result, + const EnvOptions& options) override { result->reset(); if (options.use_mmap_writes) { return Status::InvalidArgument(); @@ -546,7 +546,7 @@ class EncryptedEnv : public EnvWrapper { // Reuse an existing file by renaming it and opening it as writable. virtual Status ReuseWritableFile(const std::string& fname, const std::string& old_fname, - unique_ptr* result, + std::unique_ptr* result, const EnvOptions& options) override { result->reset(); if (options.use_mmap_writes) { @@ -590,7 +590,7 @@ class EncryptedEnv : public EnvWrapper { // // The returned file will only be accessed by one thread at a time. virtual Status NewRandomRWFile(const std::string& fname, - unique_ptr* result, + std::unique_ptr* result, const EnvOptions& options) override { result->reset(); if (options.use_mmap_reads || options.use_mmap_writes) { @@ -692,7 +692,7 @@ Status BlockAccessCipherStream::Encrypt(uint64_t fileOffset, char *data, size_t auto blockSize = BlockSize(); uint64_t blockIndex = fileOffset / blockSize; size_t blockOffset = fileOffset % blockSize; - unique_ptr blockBuffer; + std::unique_ptr blockBuffer; std::string scratch; AllocateScratch(scratch); @@ -705,8 +705,8 @@ Status BlockAccessCipherStream::Encrypt(uint64_t fileOffset, char *data, size_t // We're not encrypting a full block. // Copy data to blockBuffer if (!blockBuffer.get()) { - // Allocate buffer - blockBuffer = unique_ptr(new char[blockSize]); + // Allocate buffer + blockBuffer = std::unique_ptr(new char[blockSize]); } block = blockBuffer.get(); // Copy plain data to block buffer @@ -737,7 +737,7 @@ Status BlockAccessCipherStream::Decrypt(uint64_t fileOffset, char *data, size_t auto blockSize = BlockSize(); uint64_t blockIndex = fileOffset / blockSize; size_t blockOffset = fileOffset % blockSize; - unique_ptr blockBuffer; + std::unique_ptr blockBuffer; std::string scratch; AllocateScratch(scratch); @@ -750,8 +750,8 @@ Status BlockAccessCipherStream::Decrypt(uint64_t fileOffset, char *data, size_t // We're not decrypting a full block. // Copy data to blockBuffer if (!blockBuffer.get()) { - // Allocate buffer - blockBuffer = unique_ptr(new char[blockSize]); + // Allocate buffer + blockBuffer = std::unique_ptr(new char[blockSize]); } block = blockBuffer.get(); // Copy encrypted data to block buffer @@ -882,7 +882,9 @@ size_t CTREncryptionProvider::PopulateSecretPrefixPart(char* /*prefix*/, return 0; } -Status CTREncryptionProvider::CreateCipherStream(const std::string& fname, const EnvOptions& options, Slice &prefix, unique_ptr* result) { +Status CTREncryptionProvider::CreateCipherStream( + const std::string& fname, const EnvOptions& options, Slice& prefix, + std::unique_ptr* result) { // Read plain text part of prefix. auto blockSize = cipher_.BlockSize(); uint64_t initialCounter; @@ -905,8 +907,9 @@ Status CTREncryptionProvider::CreateCipherStream(const std::string& fname, const Status CTREncryptionProvider::CreateCipherStreamFromPrefix( const std::string& /*fname*/, const EnvOptions& /*options*/, uint64_t initialCounter, const Slice& iv, const Slice& /*prefix*/, - unique_ptr* result) { - (*result) = unique_ptr(new CTRCipherStream(cipher_, iv.data(), initialCounter)); + std::unique_ptr* result) { + (*result) = std::unique_ptr( + new CTRCipherStream(cipher_, iv.data(), initialCounter)); return Status::OK(); } diff --git a/3rdParty/rocksdb/v5.16.X/env/env_hdfs.cc b/3rdParty/rocksdb/v5.18.X/env/env_hdfs.cc similarity index 97% rename from 3rdParty/rocksdb/v5.16.X/env/env_hdfs.cc rename to 3rdParty/rocksdb/v5.18.X/env/env_hdfs.cc index 1eaea3a1ce..14fb902f0d 100644 --- a/3rdParty/rocksdb/v5.16.X/env/env_hdfs.cc +++ b/3rdParty/rocksdb/v5.18.X/env/env_hdfs.cc @@ -381,7 +381,7 @@ const std::string HdfsEnv::pathsep = "/"; // open a file for sequential reading Status HdfsEnv::NewSequentialFile(const std::string& fname, - unique_ptr* result, + std::unique_ptr* result, const EnvOptions& options) { result->reset(); HdfsReadableFile* f = new HdfsReadableFile(fileSys_, fname); @@ -396,7 +396,7 @@ Status HdfsEnv::NewSequentialFile(const std::string& fname, // open a file for random reading Status HdfsEnv::NewRandomAccessFile(const std::string& fname, - unique_ptr* result, + std::unique_ptr* result, const EnvOptions& options) { result->reset(); HdfsReadableFile* f = new HdfsReadableFile(fileSys_, fname); @@ -411,7 +411,7 @@ Status HdfsEnv::NewRandomAccessFile(const std::string& fname, // create a new file for writing Status HdfsEnv::NewWritableFile(const std::string& fname, - unique_ptr* result, + std::unique_ptr* result, const EnvOptions& options) { result->reset(); Status s; @@ -437,7 +437,7 @@ class HdfsDirectory : public Directory { }; Status HdfsEnv::NewDirectory(const std::string& name, - unique_ptr* result) { + std::unique_ptr* result) { int value = hdfsExists(fileSys_, name.c_str()); switch (value) { case HDFS_EXISTS: @@ -581,7 +581,7 @@ Status HdfsEnv::UnlockFile(FileLock* lock) { } Status HdfsEnv::NewLogger(const std::string& fname, - shared_ptr* result) { + std::shared_ptr* result) { HdfsWritableFile* f = new HdfsWritableFile(fileSys_, fname); if (f == nullptr || !f->isValid()) { delete f; @@ -610,10 +610,10 @@ Status NewHdfsEnv(Env** hdfs_env, const std::string& fsname) { // dummy placeholders used when HDFS is not available namespace rocksdb { Status HdfsEnv::NewSequentialFile(const std::string& /*fname*/, - unique_ptr* /*result*/, + std::unique_ptr* /*result*/, const EnvOptions& /*options*/) { return Status::NotSupported("Not compiled with hdfs support"); - } +} Status NewHdfsEnv(Env** /*hdfs_env*/, const std::string& /*fsname*/) { return Status::NotSupported("Not compiled with hdfs support"); diff --git a/3rdParty/rocksdb/v5.16.X/env/env_posix.cc b/3rdParty/rocksdb/v5.18.X/env/env_posix.cc similarity index 93% rename from 3rdParty/rocksdb/v5.16.X/env/env_posix.cc rename to 3rdParty/rocksdb/v5.18.X/env/env_posix.cc index ae8088f657..c2e456a661 100644 --- a/3rdParty/rocksdb/v5.16.X/env/env_posix.cc +++ b/3rdParty/rocksdb/v5.18.X/env/env_posix.cc @@ -25,6 +25,7 @@ #include #include #endif +#include #include #include #include @@ -102,6 +103,18 @@ class PosixFileLock : public FileLock { std::string filename; }; +int cloexec_flags(int flags, const EnvOptions* options) { + // If the system supports opening the file with cloexec enabled, + // do so, as this avoids a race condition if a db is opened around + // the same time that a child process is forked +#ifdef O_CLOEXEC + if (options == nullptr || options->set_fd_cloexec) { + flags |= O_CLOEXEC; + } +#endif + return flags; +} + class PosixEnv : public Env { public: PosixEnv(); @@ -129,11 +142,11 @@ class PosixEnv : public Env { } virtual Status NewSequentialFile(const std::string& fname, - unique_ptr* result, + std::unique_ptr* result, const EnvOptions& options) override { result->reset(); int fd = -1; - int flags = O_RDONLY; + int flags = cloexec_flags(O_RDONLY, &options); FILE* file = nullptr; if (options.use_direct_reads && !options.use_mmap_reads) { @@ -179,12 +192,13 @@ class PosixEnv : public Env { } virtual Status NewRandomAccessFile(const std::string& fname, - unique_ptr* result, + std::unique_ptr* result, const EnvOptions& options) override { result->reset(); Status s; int fd; - int flags = O_RDONLY; + int flags = cloexec_flags(O_RDONLY, &options); + if (options.use_direct_reads && !options.use_mmap_reads) { #ifdef ROCKSDB_LITE return Status::IOError(fname, "Direct I/O not supported in RocksDB lite"); @@ -235,7 +249,7 @@ class PosixEnv : public Env { } virtual Status OpenWritableFile(const std::string& fname, - unique_ptr* result, + std::unique_ptr* result, const EnvOptions& options, bool reopen = false) { result->reset(); @@ -266,6 +280,8 @@ class PosixEnv : public Env { flags |= O_WRONLY; } + flags = cloexec_flags(flags, &options); + do { IOSTATS_TIMER_GUARD(open_nanos); fd = open(fname.c_str(), flags, GetDBFileMode(allow_non_owner_access_)); @@ -317,20 +333,20 @@ class PosixEnv : public Env { } virtual Status NewWritableFile(const std::string& fname, - unique_ptr* result, + std::unique_ptr* result, const EnvOptions& options) override { return OpenWritableFile(fname, result, options, false); } virtual Status ReopenWritableFile(const std::string& fname, - unique_ptr* result, + std::unique_ptr* result, const EnvOptions& options) override { return OpenWritableFile(fname, result, options, true); } virtual Status ReuseWritableFile(const std::string& fname, const std::string& old_fname, - unique_ptr* result, + std::unique_ptr* result, const EnvOptions& options) override { result->reset(); Status s; @@ -354,6 +370,8 @@ class PosixEnv : public Env { flags |= O_WRONLY; } + flags = cloexec_flags(flags, &options); + do { IOSTATS_TIMER_GUARD(open_nanos); fd = open(old_fname.c_str(), flags, @@ -412,12 +430,15 @@ class PosixEnv : public Env { } virtual Status NewRandomRWFile(const std::string& fname, - unique_ptr* result, + std::unique_ptr* result, const EnvOptions& options) override { int fd = -1; + int flags = cloexec_flags(O_RDWR, &options); + while (fd < 0) { IOSTATS_TIMER_GUARD(open_nanos); - fd = open(fname.c_str(), O_RDWR, GetDBFileMode(allow_non_owner_access_)); + + fd = open(fname.c_str(), flags, GetDBFileMode(allow_non_owner_access_)); if (fd < 0) { // Error while opening the file if (errno == EINTR) { @@ -434,12 +455,14 @@ class PosixEnv : public Env { virtual Status NewMemoryMappedFileBuffer( const std::string& fname, - unique_ptr* result) override { + std::unique_ptr* result) override { int fd = -1; Status status; + int flags = cloexec_flags(O_RDWR, nullptr); + while (fd < 0) { IOSTATS_TIMER_GUARD(open_nanos); - fd = open(fname.c_str(), O_RDWR, 0644); + fd = open(fname.c_str(), flags, 0644); if (fd < 0) { // Error while opening the file if (errno == EINTR) { @@ -474,12 +497,13 @@ class PosixEnv : public Env { } virtual Status NewDirectory(const std::string& name, - unique_ptr* result) override { + std::unique_ptr* result) override { result->reset(); int fd; + int flags = cloexec_flags(0, nullptr); { IOSTATS_TIMER_GUARD(open_nanos); - fd = open(name.c_str(), 0); + fd = open(name.c_str(), flags); } if (fd < 0) { return IOError("While open directory", name, errno); @@ -496,7 +520,8 @@ class PosixEnv : public Env { return Status::OK(); } - switch (errno) { + int err = errno; + switch (err) { case EACCES: case ELOOP: case ENAMETOOLONG: @@ -504,8 +529,8 @@ class PosixEnv : public Env { case ENOTDIR: return Status::NotFound(); default: - assert(result == EIO || result == ENOMEM); - return Status::IOError("Unexpected error(" + ToString(result) + + assert(err == EIO || err == ENOMEM); + return Status::IOError("Unexpected error(" + ToString(err) + ") accessing file `" + fname + "' "); } } @@ -663,9 +688,11 @@ class PosixEnv : public Env { } int fd; + int flags = cloexec_flags(O_RDWR | O_CREAT, nullptr); + { IOSTATS_TIMER_GUARD(open_nanos); - fd = open(fname.c_str(), O_RDWR | O_CREAT, 0644); + fd = open(fname.c_str(), flags, 0644); } if (fd < 0) { result = IOError("while open a file for lock", fname, errno); @@ -751,12 +778,30 @@ class PosixEnv : public Env { return gettid(pthread_self()); } + virtual Status GetFreeSpace(const std::string& fname, + uint64_t* free_space) override { + struct statvfs sbuf; + + if (statvfs(fname.c_str(), &sbuf) < 0) { + return IOError("While doing statvfs", fname, errno); + } + + *free_space = ((uint64_t)sbuf.f_bsize * sbuf.f_bfree); + return Status::OK(); + } + virtual Status NewLogger(const std::string& fname, - shared_ptr* result) override { + std::shared_ptr* result) override { FILE* f; { IOSTATS_TIMER_GUARD(open_nanos); - f = fopen(fname.c_str(), "w"); + f = fopen(fname.c_str(), "w" +#ifdef __GLIBC_PREREQ +#if __GLIBC_PREREQ(2, 7) + "e" // glibc extension to enable O_CLOEXEC +#endif +#endif + ); } if (f == nullptr) { result->reset(); diff --git a/3rdParty/rocksdb/v5.16.X/env/env_test.cc b/3rdParty/rocksdb/v5.18.X/env/env_test.cc similarity index 94% rename from 3rdParty/rocksdb/v5.16.X/env/env_test.cc rename to 3rdParty/rocksdb/v5.18.X/env/env_test.cc index eda6b9d5d7..36cbd735d7 100644 --- a/3rdParty/rocksdb/v5.16.X/env/env_test.cc +++ b/3rdParty/rocksdb/v5.18.X/env/env_test.cc @@ -181,11 +181,11 @@ TEST_F(EnvPosixTest, DISABLED_FilePermission) { std::vector fileNames{ test::PerThreadDBPath(env_, "testfile"), test::PerThreadDBPath(env_, "testfile1")}; - unique_ptr wfile; + std::unique_ptr wfile; ASSERT_OK(env_->NewWritableFile(fileNames[0], &wfile, soptions)); ASSERT_OK(env_->NewWritableFile(fileNames[1], &wfile, soptions)); wfile.reset(); - unique_ptr rwfile; + std::unique_ptr rwfile; ASSERT_OK(env_->NewRandomRWFile(fileNames[1], &rwfile, soptions)); struct stat sb; @@ -217,7 +217,7 @@ TEST_F(EnvPosixTest, MemoryMappedFileBuffer) { std::string expected_data; std::string fname = test::PerThreadDBPath(env_, "testfile"); { - unique_ptr wfile; + std::unique_ptr wfile; const EnvOptions soptions; ASSERT_OK(env_->NewWritableFile(fname, &wfile, soptions)); @@ -812,7 +812,7 @@ class IoctlFriendlyTmpdir { #ifndef ROCKSDB_LITE TEST_F(EnvPosixTest, PositionedAppend) { - unique_ptr writable_file; + std::unique_ptr writable_file; EnvOptions options; options.use_direct_writes = true; options.use_mmap_writes = false; @@ -832,7 +832,7 @@ TEST_F(EnvPosixTest, PositionedAppend) { // The file now has 1 sector worth of a followed by a page worth of b // Verify the above - unique_ptr seq_file; + std::unique_ptr seq_file; ASSERT_OK(env_->NewSequentialFile(ift.name() + "/f", &seq_file, options)); char scratch[kPageSize * 2]; Slice result; @@ -851,10 +851,10 @@ TEST_P(EnvPosixTestWithParam, RandomAccessUniqueID) { soptions.use_direct_reads = soptions.use_direct_writes = direct_io_; IoctlFriendlyTmpdir ift; std::string fname = ift.name() + "/testfile"; - unique_ptr wfile; + std::unique_ptr wfile; ASSERT_OK(env_->NewWritableFile(fname, &wfile, soptions)); - unique_ptr file; + std::unique_ptr file; // Get Unique ID ASSERT_OK(env_->NewRandomAccessFile(fname, &file, soptions)); @@ -921,7 +921,7 @@ TEST_P(EnvPosixTestWithParam, AllocateTest) { EnvOptions soptions; soptions.use_mmap_writes = false; soptions.use_direct_reads = soptions.use_direct_writes = direct_io_; - unique_ptr wfile; + std::unique_ptr wfile; ASSERT_OK(env_->NewWritableFile(fname, &wfile, soptions)); // allocate 100 MB @@ -990,14 +990,14 @@ TEST_P(EnvPosixTestWithParam, RandomAccessUniqueIDConcurrent) { fnames.push_back(ift.name() + "/" + "testfile" + ToString(i)); // Create file. - unique_ptr wfile; + std::unique_ptr wfile; ASSERT_OK(env_->NewWritableFile(fnames[i], &wfile, soptions)); } // Collect and check whether the IDs are unique. std::unordered_set ids; for (const std::string fname : fnames) { - unique_ptr file; + std::unique_ptr file; std::string unique_id; ASSERT_OK(env_->NewRandomAccessFile(fname, &file, soptions)); size_t id_size = file->GetUniqueId(temp_id, MAX_ID_SIZE); @@ -1033,14 +1033,14 @@ TEST_P(EnvPosixTestWithParam, RandomAccessUniqueIDDeletes) { for (int i = 0; i < 1000; ++i) { // Create file. { - unique_ptr wfile; + std::unique_ptr wfile; ASSERT_OK(env_->NewWritableFile(fname, &wfile, soptions)); } // Get Unique ID std::string unique_id; { - unique_ptr file; + std::unique_ptr file; ASSERT_OK(env_->NewRandomAccessFile(fname, &file, soptions)); size_t id_size = file->GetUniqueId(temp_id, MAX_ID_SIZE); ASSERT_TRUE(id_size > 0); @@ -1076,7 +1076,7 @@ TEST_P(EnvPosixTestWithParam, InvalidateCache) { // Create file. { - unique_ptr wfile; + std::unique_ptr wfile; #if !defined(OS_MACOSX) && !defined(OS_WIN) && !defined(OS_SOLARIS) && !defined(OS_AIX) if (soptions.use_direct_writes) { soptions.use_direct_writes = false; @@ -1090,7 +1090,7 @@ TEST_P(EnvPosixTestWithParam, InvalidateCache) { // Random Read { - unique_ptr file; + std::unique_ptr file; auto scratch = NewAligned(kSectorSize, 0); Slice result; #if !defined(OS_MACOSX) && !defined(OS_WIN) && !defined(OS_SOLARIS) && !defined(OS_AIX) @@ -1107,7 +1107,7 @@ TEST_P(EnvPosixTestWithParam, InvalidateCache) { // Sequential Read { - unique_ptr file; + std::unique_ptr file; auto scratch = NewAligned(kSectorSize, 0); Slice result; #if !defined(OS_MACOSX) && !defined(OS_WIN) && !defined(OS_SOLARIS) && !defined(OS_AIX) @@ -1252,7 +1252,7 @@ TEST_P(EnvPosixTestWithParam, LogBufferMaxSizeTest) { TEST_P(EnvPosixTestWithParam, Preallocation) { rocksdb::SyncPoint::GetInstance()->EnableProcessing(); const std::string src = test::PerThreadDBPath(env_, "testfile"); - unique_ptr srcfile; + std::unique_ptr srcfile; EnvOptions soptions; soptions.use_direct_reads = soptions.use_direct_writes = direct_io_; #if !defined(OS_MACOSX) && !defined(OS_WIN) && !defined(OS_SOLARIS) && !defined(OS_AIX) && !defined(OS_OPENBSD) && !defined(OS_FREEBSD) @@ -1315,7 +1315,7 @@ TEST_P(EnvPosixTestWithParam, ConsistentChildrenAttributes) { for (int i = 0; i < kNumChildren; ++i) { const std::string path = test::TmpDir(env_) + "/" + "testfile_" + std::to_string(i); - unique_ptr file; + std::unique_ptr file; #if !defined(OS_MACOSX) && !defined(OS_WIN) && !defined(OS_SOLARIS) && !defined(OS_AIX) && !defined(OS_OPENBSD) && !defined(OS_FREEBSD) if (soptions.use_direct_writes) { rocksdb::SyncPoint::GetInstance()->SetCallBack( @@ -1368,50 +1368,110 @@ TEST_P(EnvPosixTestWithParam, WritableFileWrapper) { inc(1); return Status::OK(); } - Status Truncate(uint64_t /*size*/) override { return Status::OK(); } - Status Close() override { inc(2); return Status::OK(); } - Status Flush() override { inc(3); return Status::OK(); } - Status Sync() override { inc(4); return Status::OK(); } - Status Fsync() override { inc(5); return Status::OK(); } - void SetIOPriority(Env::IOPriority /*pri*/) override { inc(6); } - uint64_t GetFileSize() override { inc(7); return 0; } - void GetPreallocationStatus(size_t* /*block_size*/, - size_t* /*last_allocated_block*/) override { - inc(8); - } - size_t GetUniqueId(char* /*id*/, size_t /*max_size*/) const override { - inc(9); - return 0; - } - Status InvalidateCache(size_t /*offset*/, size_t /*length*/) override { - inc(10); + + Status PositionedAppend(const Slice& /*data*/, + uint64_t /*offset*/) override { + inc(2); return Status::OK(); } - protected: - Status Allocate(uint64_t /*offset*/, uint64_t /*len*/) override { - inc(11); + Status Truncate(uint64_t /*size*/) override { + inc(3); return Status::OK(); } - Status RangeSync(uint64_t /*offset*/, uint64_t /*nbytes*/) override { + + Status Close() override { + inc(4); + return Status::OK(); + } + + Status Flush() override { + inc(5); + return Status::OK(); + } + + Status Sync() override { + inc(6); + return Status::OK(); + } + + Status Fsync() override { + inc(7); + return Status::OK(); + } + + bool IsSyncThreadSafe() const override { + inc(8); + return true; + } + + bool use_direct_io() const override { + inc(9); + return true; + } + + size_t GetRequiredBufferAlignment() const override { + inc(10); + return 0; + } + + void SetIOPriority(Env::IOPriority /*pri*/) override { inc(11); } + + Env::IOPriority GetIOPriority() override { inc(12); + return Env::IOPriority::IO_LOW; + } + + void SetWriteLifeTimeHint(Env::WriteLifeTimeHint /*hint*/) override { + inc(13); + } + + Env::WriteLifeTimeHint GetWriteLifeTimeHint() override { + inc(14); + return Env::WriteLifeTimeHint::WLTH_NOT_SET; + } + + uint64_t GetFileSize() override { + inc(15); + return 0; + } + + void SetPreallocationBlockSize(size_t /*size*/) override { inc(16); } + + void GetPreallocationStatus(size_t* /*block_size*/, + size_t* /*last_allocated_block*/) override { + inc(17); + } + + size_t GetUniqueId(char* /*id*/, size_t /*max_size*/) const override { + inc(18); + return 0; + } + + Status InvalidateCache(size_t /*offset*/, size_t /*length*/) override { + inc(19); + return Status::OK(); + } + + Status RangeSync(uint64_t /*offset*/, uint64_t /*nbytes*/) override { + inc(20); + return Status::OK(); + } + + void PrepareWrite(size_t /*offset*/, size_t /*len*/) override { inc(21); } + + Status Allocate(uint64_t /*offset*/, uint64_t /*len*/) override { + inc(22); return Status::OK(); } public: - ~Base() { - inc(13); - } + ~Base() { inc(23); } }; class Wrapper : public WritableFileWrapper { public: explicit Wrapper(WritableFile* target) : WritableFileWrapper(target) {} - - void CallProtectedMethods() { - Allocate(0, 0); - RangeSync(0, 0); - } }; int step = 0; @@ -1420,19 +1480,30 @@ TEST_P(EnvPosixTestWithParam, WritableFileWrapper) { Base b(&step); Wrapper w(&b); w.Append(Slice()); + w.PositionedAppend(Slice(), 0); + w.Truncate(0); w.Close(); w.Flush(); w.Sync(); w.Fsync(); + w.IsSyncThreadSafe(); + w.use_direct_io(); + w.GetRequiredBufferAlignment(); w.SetIOPriority(Env::IOPriority::IO_HIGH); + w.GetIOPriority(); + w.SetWriteLifeTimeHint(Env::WriteLifeTimeHint::WLTH_NOT_SET); + w.GetWriteLifeTimeHint(); w.GetFileSize(); + w.SetPreallocationBlockSize(0); w.GetPreallocationStatus(nullptr, nullptr); w.GetUniqueId(nullptr, 0); w.InvalidateCache(0, 0); - w.CallProtectedMethods(); + w.RangeSync(0, 0); + w.PrepareWrite(0, 0); + w.Allocate(0, 0); } - EXPECT_EQ(14, step); + EXPECT_EQ(24, step); } TEST_P(EnvPosixTestWithParam, PosixRandomRWFile) { @@ -1567,7 +1638,7 @@ TEST_P(EnvPosixTestWithParam, PosixRandomRWFileRandomized) { const std::string path = test::PerThreadDBPath(env_, "random_rw_file_rand"); env_->DeleteFile(path); - unique_ptr file; + std::unique_ptr file; #ifdef OS_LINUX // Cannot open non-existing file. @@ -1641,7 +1712,7 @@ class TestEnv : public EnvWrapper { int GetCloseCount() { return close_count; } virtual Status NewLogger(const std::string& /*fname*/, - shared_ptr* result) { + std::shared_ptr* result) { result->reset(new TestLogger(this)); return Status::OK(); } @@ -1685,8 +1756,8 @@ INSTANTIATE_TEST_CASE_P(DefaultEnvWithDirectIO, EnvPosixTestWithParam, #endif // !defined(ROCKSDB_LITE) #if !defined(ROCKSDB_LITE) && !defined(OS_WIN) -static unique_ptr chroot_env(NewChrootEnv(Env::Default(), - test::TmpDir(Env::Default()))); +static std::unique_ptr chroot_env( + NewChrootEnv(Env::Default(), test::TmpDir(Env::Default()))); INSTANTIATE_TEST_CASE_P( ChrootEnvWithoutDirectIO, EnvPosixTestWithParam, ::testing::Values(std::pair(chroot_env.get(), false))); diff --git a/3rdParty/rocksdb/v5.16.X/env/io_posix.cc b/3rdParty/rocksdb/v5.18.X/env/io_posix.cc similarity index 100% rename from 3rdParty/rocksdb/v5.16.X/env/io_posix.cc rename to 3rdParty/rocksdb/v5.18.X/env/io_posix.cc diff --git a/3rdParty/rocksdb/v5.16.X/env/io_posix.h b/3rdParty/rocksdb/v5.18.X/env/io_posix.h similarity index 100% rename from 3rdParty/rocksdb/v5.16.X/env/io_posix.h rename to 3rdParty/rocksdb/v5.18.X/env/io_posix.h diff --git a/3rdParty/rocksdb/v5.16.X/env/mock_env.cc b/3rdParty/rocksdb/v5.18.X/env/mock_env.cc similarity index 97% rename from 3rdParty/rocksdb/v5.16.X/env/mock_env.cc rename to 3rdParty/rocksdb/v5.18.X/env/mock_env.cc index 9b019260dd..84b3060717 100644 --- a/3rdParty/rocksdb/v5.16.X/env/mock_env.cc +++ b/3rdParty/rocksdb/v5.18.X/env/mock_env.cc @@ -201,7 +201,7 @@ class MockSequentialFile : public SequentialFile { if (n > available) { n = available; } - pos_ += n; + pos_ += static_cast(n); return Status::OK(); } @@ -319,7 +319,7 @@ class TestMemLogger : public Logger { static const uint64_t flush_every_seconds_ = 5; std::atomic_uint_fast64_t last_flush_micros_; Env* env_; - bool flush_pending_; + std::atomic flush_pending_; public: TestMemLogger(std::unique_ptr f, Env* env, @@ -424,7 +424,7 @@ MockEnv::~MockEnv() { // Partial implementation of the Env interface. Status MockEnv::NewSequentialFile(const std::string& fname, - unique_ptr* result, + std::unique_ptr* result, const EnvOptions& /*soptions*/) { auto fn = NormalizePath(fname); MutexLock lock(&mutex_); @@ -441,7 +441,7 @@ Status MockEnv::NewSequentialFile(const std::string& fname, } Status MockEnv::NewRandomAccessFile(const std::string& fname, - unique_ptr* result, + std::unique_ptr* result, const EnvOptions& /*soptions*/) { auto fn = NormalizePath(fname); MutexLock lock(&mutex_); @@ -458,7 +458,7 @@ Status MockEnv::NewRandomAccessFile(const std::string& fname, } Status MockEnv::NewRandomRWFile(const std::string& fname, - unique_ptr* result, + std::unique_ptr* result, const EnvOptions& /*soptions*/) { auto fn = NormalizePath(fname); MutexLock lock(&mutex_); @@ -476,7 +476,7 @@ Status MockEnv::NewRandomRWFile(const std::string& fname, Status MockEnv::ReuseWritableFile(const std::string& fname, const std::string& old_fname, - unique_ptr* result, + std::unique_ptr* result, const EnvOptions& options) { auto s = RenameFile(old_fname, fname); if (!s.ok()) { @@ -487,7 +487,7 @@ Status MockEnv::ReuseWritableFile(const std::string& fname, } Status MockEnv::NewWritableFile(const std::string& fname, - unique_ptr* result, + std::unique_ptr* result, const EnvOptions& env_options) { auto fn = NormalizePath(fname); MutexLock lock(&mutex_); @@ -503,7 +503,7 @@ Status MockEnv::NewWritableFile(const std::string& fname, } Status MockEnv::NewDirectory(const std::string& /*name*/, - unique_ptr* result) { + std::unique_ptr* result) { result->reset(new MockEnvDirectory()); return Status::OK(); } @@ -660,7 +660,7 @@ Status MockEnv::LinkFile(const std::string& src, const std::string& dest) { } Status MockEnv::NewLogger(const std::string& fname, - shared_ptr* result) { + std::shared_ptr* result) { auto fn = NormalizePath(fname); MutexLock lock(&mutex_); auto iter = file_map_.find(fn); diff --git a/3rdParty/rocksdb/v5.16.X/env/mock_env.h b/3rdParty/rocksdb/v5.18.X/env/mock_env.h similarity index 87% rename from 3rdParty/rocksdb/v5.16.X/env/mock_env.h rename to 3rdParty/rocksdb/v5.18.X/env/mock_env.h index 816256ab08..87b8deaf8c 100644 --- a/3rdParty/rocksdb/v5.16.X/env/mock_env.h +++ b/3rdParty/rocksdb/v5.18.X/env/mock_env.h @@ -28,28 +28,28 @@ class MockEnv : public EnvWrapper { // Partial implementation of the Env interface. virtual Status NewSequentialFile(const std::string& fname, - unique_ptr* result, + std::unique_ptr* result, const EnvOptions& soptions) override; virtual Status NewRandomAccessFile(const std::string& fname, - unique_ptr* result, + std::unique_ptr* result, const EnvOptions& soptions) override; virtual Status NewRandomRWFile(const std::string& fname, - unique_ptr* result, + std::unique_ptr* result, const EnvOptions& options) override; virtual Status ReuseWritableFile(const std::string& fname, const std::string& old_fname, - unique_ptr* result, + std::unique_ptr* result, const EnvOptions& options) override; virtual Status NewWritableFile(const std::string& fname, - unique_ptr* result, + std::unique_ptr* result, const EnvOptions& env_options) override; virtual Status NewDirectory(const std::string& name, - unique_ptr* result) override; + std::unique_ptr* result) override; virtual Status FileExists(const std::string& fname) override; @@ -81,7 +81,7 @@ class MockEnv : public EnvWrapper { const std::string& target) override; virtual Status NewLogger(const std::string& fname, - shared_ptr* result) override; + std::shared_ptr* result) override; virtual Status LockFile(const std::string& fname, FileLock** flock) override; diff --git a/3rdParty/rocksdb/v5.16.X/env/mock_env_test.cc b/3rdParty/rocksdb/v5.18.X/env/mock_env_test.cc similarity index 96% rename from 3rdParty/rocksdb/v5.16.X/env/mock_env_test.cc rename to 3rdParty/rocksdb/v5.18.X/env/mock_env_test.cc index 19e259ccd8..abd5b89f0b 100644 --- a/3rdParty/rocksdb/v5.16.X/env/mock_env_test.cc +++ b/3rdParty/rocksdb/v5.18.X/env/mock_env_test.cc @@ -29,7 +29,7 @@ TEST_F(MockEnvTest, Corrupt) { const std::string kGood = "this is a good string, synced to disk"; const std::string kCorrupted = "this part may be corrupted"; const std::string kFileName = "/dir/f"; - unique_ptr writable_file; + std::unique_ptr writable_file; ASSERT_OK(env_->NewWritableFile(kFileName, &writable_file, soptions_)); ASSERT_OK(writable_file->Append(kGood)); ASSERT_TRUE(writable_file->GetFileSize() == kGood.size()); @@ -37,7 +37,7 @@ TEST_F(MockEnvTest, Corrupt) { std::string scratch; scratch.resize(kGood.size() + kCorrupted.size() + 16); Slice result; - unique_ptr rand_file; + std::unique_ptr rand_file; ASSERT_OK(env_->NewRandomAccessFile(kFileName, &rand_file, soptions_)); ASSERT_OK(rand_file->Read(0, kGood.size(), &result, &(scratch[0]))); ASSERT_EQ(result.compare(kGood), 0); diff --git a/3rdParty/rocksdb/v5.16.X/env/posix_logger.h b/3rdParty/rocksdb/v5.18.X/env/posix_logger.h similarity index 99% rename from 3rdParty/rocksdb/v5.16.X/env/posix_logger.h rename to 3rdParty/rocksdb/v5.18.X/env/posix_logger.h index e983ba704e..401df6a3ff 100644 --- a/3rdParty/rocksdb/v5.16.X/env/posix_logger.h +++ b/3rdParty/rocksdb/v5.18.X/env/posix_logger.h @@ -165,7 +165,6 @@ class PosixLogger : public Logger { size_t sz = fwrite(base, 1, write_size, file_); flush_pending_ = true; - assert(sz == write_size); if (sz > 0) { log_size_ += write_size; } diff --git a/3rdParty/rocksdb/v5.16.X/examples/.gitignore b/3rdParty/rocksdb/v5.18.X/examples/.gitignore similarity index 100% rename from 3rdParty/rocksdb/v5.16.X/examples/.gitignore rename to 3rdParty/rocksdb/v5.18.X/examples/.gitignore diff --git a/3rdParty/rocksdb/v5.16.X/examples/Makefile b/3rdParty/rocksdb/v5.18.X/examples/Makefile similarity index 100% rename from 3rdParty/rocksdb/v5.16.X/examples/Makefile rename to 3rdParty/rocksdb/v5.18.X/examples/Makefile diff --git a/3rdParty/rocksdb/v5.16.X/examples/README.md b/3rdParty/rocksdb/v5.18.X/examples/README.md similarity index 100% rename from 3rdParty/rocksdb/v5.16.X/examples/README.md rename to 3rdParty/rocksdb/v5.18.X/examples/README.md diff --git a/3rdParty/rocksdb/v5.16.X/examples/c_simple_example.c b/3rdParty/rocksdb/v5.18.X/examples/c_simple_example.c similarity index 100% rename from 3rdParty/rocksdb/v5.16.X/examples/c_simple_example.c rename to 3rdParty/rocksdb/v5.18.X/examples/c_simple_example.c diff --git a/3rdParty/rocksdb/v5.16.X/examples/column_families_example.cc b/3rdParty/rocksdb/v5.18.X/examples/column_families_example.cc similarity index 100% rename from 3rdParty/rocksdb/v5.16.X/examples/column_families_example.cc rename to 3rdParty/rocksdb/v5.18.X/examples/column_families_example.cc diff --git a/3rdParty/rocksdb/v5.16.X/examples/compact_files_example.cc b/3rdParty/rocksdb/v5.18.X/examples/compact_files_example.cc similarity index 100% rename from 3rdParty/rocksdb/v5.16.X/examples/compact_files_example.cc rename to 3rdParty/rocksdb/v5.18.X/examples/compact_files_example.cc diff --git a/3rdParty/rocksdb/v5.16.X/examples/compaction_filter_example.cc b/3rdParty/rocksdb/v5.18.X/examples/compaction_filter_example.cc similarity index 100% rename from 3rdParty/rocksdb/v5.16.X/examples/compaction_filter_example.cc rename to 3rdParty/rocksdb/v5.18.X/examples/compaction_filter_example.cc diff --git a/3rdParty/rocksdb/v5.16.X/examples/optimistic_transaction_example.cc b/3rdParty/rocksdb/v5.18.X/examples/optimistic_transaction_example.cc similarity index 100% rename from 3rdParty/rocksdb/v5.16.X/examples/optimistic_transaction_example.cc rename to 3rdParty/rocksdb/v5.18.X/examples/optimistic_transaction_example.cc diff --git a/3rdParty/rocksdb/v5.16.X/examples/options_file_example.cc b/3rdParty/rocksdb/v5.18.X/examples/options_file_example.cc similarity index 100% rename from 3rdParty/rocksdb/v5.16.X/examples/options_file_example.cc rename to 3rdParty/rocksdb/v5.18.X/examples/options_file_example.cc diff --git a/3rdParty/rocksdb/v5.16.X/examples/rocksdb_option_file_example.ini b/3rdParty/rocksdb/v5.18.X/examples/rocksdb_option_file_example.ini similarity index 100% rename from 3rdParty/rocksdb/v5.16.X/examples/rocksdb_option_file_example.ini rename to 3rdParty/rocksdb/v5.18.X/examples/rocksdb_option_file_example.ini diff --git a/3rdParty/rocksdb/v5.16.X/examples/simple_example.cc b/3rdParty/rocksdb/v5.18.X/examples/simple_example.cc similarity index 100% rename from 3rdParty/rocksdb/v5.16.X/examples/simple_example.cc rename to 3rdParty/rocksdb/v5.18.X/examples/simple_example.cc diff --git a/3rdParty/rocksdb/v5.16.X/examples/transaction_example.cc b/3rdParty/rocksdb/v5.18.X/examples/transaction_example.cc similarity index 100% rename from 3rdParty/rocksdb/v5.16.X/examples/transaction_example.cc rename to 3rdParty/rocksdb/v5.18.X/examples/transaction_example.cc diff --git a/3rdParty/rocksdb/v5.16.X/hdfs/README b/3rdParty/rocksdb/v5.18.X/hdfs/README similarity index 100% rename from 3rdParty/rocksdb/v5.16.X/hdfs/README rename to 3rdParty/rocksdb/v5.18.X/hdfs/README diff --git a/3rdParty/rocksdb/v5.16.X/hdfs/env_hdfs.h b/3rdParty/rocksdb/v5.18.X/hdfs/env_hdfs.h similarity index 95% rename from 3rdParty/rocksdb/v5.16.X/hdfs/env_hdfs.h rename to 3rdParty/rocksdb/v5.18.X/hdfs/env_hdfs.h index b0c9e33fd7..a77c42e0af 100644 --- a/3rdParty/rocksdb/v5.16.X/hdfs/env_hdfs.h +++ b/3rdParty/rocksdb/v5.18.X/hdfs/env_hdfs.h @@ -255,23 +255,24 @@ class HdfsEnv : public Env { } virtual Status NewSequentialFile(const std::string& fname, - unique_ptr* result, + std::unique_ptr* result, const EnvOptions& options) override; - virtual Status NewRandomAccessFile(const std::string& /*fname*/, - unique_ptr* /*result*/, - const EnvOptions& /*options*/) override { + virtual Status NewRandomAccessFile( + const std::string& /*fname*/, + std::unique_ptr* /*result*/, + const EnvOptions& /*options*/) override { return notsup; } virtual Status NewWritableFile(const std::string& /*fname*/, - unique_ptr* /*result*/, + std::unique_ptr* /*result*/, const EnvOptions& /*options*/) override { return notsup; } virtual Status NewDirectory(const std::string& /*name*/, - unique_ptr* /*result*/) override { + std::unique_ptr* /*result*/) override { return notsup; } @@ -328,7 +329,7 @@ class HdfsEnv : public Env { virtual Status UnlockFile(FileLock* /*lock*/) override { return notsup; } virtual Status NewLogger(const std::string& /*fname*/, - shared_ptr* /*result*/) override { + std::shared_ptr* /*result*/) override { return notsup; } diff --git a/3rdParty/rocksdb/v5.16.X/hdfs/setup.sh b/3rdParty/rocksdb/v5.18.X/hdfs/setup.sh similarity index 100% rename from 3rdParty/rocksdb/v5.16.X/hdfs/setup.sh rename to 3rdParty/rocksdb/v5.18.X/hdfs/setup.sh diff --git a/3rdParty/rocksdb/v5.16.X/include/rocksdb/advanced_options.h b/3rdParty/rocksdb/v5.18.X/include/rocksdb/advanced_options.h similarity index 93% rename from 3rdParty/rocksdb/v5.16.X/include/rocksdb/advanced_options.h rename to 3rdParty/rocksdb/v5.18.X/include/rocksdb/advanced_options.h index 940a6f6b74..fe331482e2 100644 --- a/3rdParty/rocksdb/v5.16.X/include/rocksdb/advanced_options.h +++ b/3rdParty/rocksdb/v5.18.X/include/rocksdb/advanced_options.h @@ -413,6 +413,7 @@ struct AdvancedColumnFamilyOptions { // of the level. // At the same time max_bytes_for_level_multiplier and // max_bytes_for_level_multiplier_additional are still satisfied. + // (When L0 is too large, we make some adjustment. See below.) // // With this option on, from an empty DB, we make last level the base level, // which means merging L0 data into the last level, until it exceeds @@ -451,6 +452,29 @@ struct AdvancedColumnFamilyOptions { // max_bytes_for_level_base, for a more predictable LSM tree shape. It is // useful to limit worse case space amplification. // + // + // If the compaction from L0 is lagged behind, a special mode will be turned + // on to prioritize write amplification against max_bytes_for_level_multiplier + // or max_bytes_for_level_base. The L0 compaction is lagged behind by looking + // at number of L0 files and total L0 size. If number of L0 files is at least + // the double of level0_file_num_compaction_trigger, or the total size is + // at least max_bytes_for_level_base, this mode is on. The target of L1 grows + // to the actual data size in L0, and then determine the target for each level + // so that each level will have the same level multiplier. + // + // For example, when L0 size is 100MB, the size of last level is 1600MB, + // max_bytes_for_level_base = 80MB, and max_bytes_for_level_multiplier = 10. + // Since L0 size is larger than max_bytes_for_level_base, this is a L0 + // compaction backlogged mode. So that the L1 size is determined to be 100MB. + // Based on max_bytes_for_level_multiplier = 10, at least 3 non-0 levels will + // be needed. The level multiplier will be calculated to be 4 and the three + // levels' target to be [100MB, 400MB, 1600MB]. + // + // In this mode, The number of levels will be no more than the normal mode, + // and the level multiplier will be lower. The write amplification will + // likely to be reduced. + // + // // max_bytes_for_level_multiplier_additional is ignored with this flag on. // // Turning this feature on or off for an existing DB can cause unexpected @@ -478,19 +502,25 @@ struct AdvancedColumnFamilyOptions { // threshold. But it's not guaranteed. // Value 0 will be sanitized. // - // Default: result.target_file_size_base * 25 + // Default: target_file_size_base * 25 + // + // Dynamically changeable through SetOptions() API uint64_t max_compaction_bytes = 0; // All writes will be slowed down to at least delayed_write_rate if estimated // bytes needed to be compaction exceed this threshold. // // Default: 64GB + // + // Dynamically changeable through SetOptions() API uint64_t soft_pending_compaction_bytes_limit = 64 * 1073741824ull; // All writes are stopped if estimated bytes needed to be compaction exceed // this threshold. // // Default: 256GB + // + // Dynamically changeable through SetOptions() API uint64_t hard_pending_compaction_bytes_limit = 256 * 1073741824ull; // The compaction style. Default: kCompactionStyleLevel @@ -502,13 +532,17 @@ struct AdvancedColumnFamilyOptions { CompactionPri compaction_pri = kByCompensatedSize; // The options needed to support Universal Style compactions + // + // Dynamically changeable through SetOptions() API + // Dynamic change example: + // SetOptions("compaction_options_universal", "{size_ratio=2;}") CompactionOptionsUniversal compaction_options_universal; // The options for FIFO compaction style // // Dynamically changeable through SetOptions() API // Dynamic change example: - // SetOption("compaction_options_fifo", "{max_table_files_size=100;ttl=2;}") + // SetOptions("compaction_options_fifo", "{max_table_files_size=100;ttl=2;}") CompactionOptionsFIFO compaction_options_fifo; // An iteration->Next() sequentially skips over keys with the same @@ -578,7 +612,10 @@ struct AdvancedColumnFamilyOptions { bool optimize_filters_for_hits = false; // After writing every SST file, reopen it and read all the keys. + // // Default: false + // + // Dynamically changeable through SetOptions() API bool paranoid_file_checks = false; // In debug mode, RocksDB run consistency checks on the LSM every time the LSM @@ -588,7 +625,10 @@ struct AdvancedColumnFamilyOptions { bool force_consistency_checks = false; // Measure IO stats in compactions and flushes, if true. + // // Default: false + // + // Dynamically changeable through SetOptions() API bool report_bg_io_stats = false; // Non-bottom-level files older than TTL will go through the compaction diff --git a/3rdParty/rocksdb/v5.16.X/include/rocksdb/c.h b/3rdParty/rocksdb/v5.18.X/include/rocksdb/c.h similarity index 97% rename from 3rdParty/rocksdb/v5.16.X/include/rocksdb/c.h rename to 3rdParty/rocksdb/v5.18.X/include/rocksdb/c.h index d86b9737d9..cf46054aa3 100644 --- a/3rdParty/rocksdb/v5.16.X/include/rocksdb/c.h +++ b/3rdParty/rocksdb/v5.18.X/include/rocksdb/c.h @@ -42,9 +42,6 @@ (5) All of the pointer arguments must be non-NULL. */ -#ifndef STORAGE_ROCKSDB_INCLUDE_C_H_ -#define STORAGE_ROCKSDB_INCLUDE_C_H_ - #pragma once #ifdef _WIN32 @@ -126,6 +123,8 @@ typedef struct rocksdb_transaction_t rocksdb_transaction_t; typedef struct rocksdb_checkpoint_t rocksdb_checkpoint_t; typedef struct rocksdb_wal_iterator_t rocksdb_wal_iterator_t; typedef struct rocksdb_wal_readoptions_t rocksdb_wal_readoptions_t; +typedef struct rocksdb_memory_consumers_t rocksdb_memory_consumers_t; +typedef struct rocksdb_memory_usage_t rocksdb_memory_usage_t; /* DB operations */ @@ -1423,6 +1422,10 @@ extern ROCKSDB_LIBRARY_API const char* rocksdb_livefiles_smallestkey( const rocksdb_livefiles_t*, int index, size_t* size); extern ROCKSDB_LIBRARY_API const char* rocksdb_livefiles_largestkey( const rocksdb_livefiles_t*, int index, size_t* size); +extern ROCKSDB_LIBRARY_API uint64_t rocksdb_livefiles_entries( + const rocksdb_livefiles_t*, int index); +extern ROCKSDB_LIBRARY_API uint64_t rocksdb_livefiles_deletions( + const rocksdb_livefiles_t*, int index); extern ROCKSDB_LIBRARY_API void rocksdb_livefiles_destroy( const rocksdb_livefiles_t*); @@ -1675,8 +1678,33 @@ extern ROCKSDB_LIBRARY_API void rocksdb_pinnableslice_destroy( extern ROCKSDB_LIBRARY_API const char* rocksdb_pinnableslice_value( const rocksdb_pinnableslice_t* t, size_t* vlen); +extern ROCKSDB_LIBRARY_API rocksdb_memory_consumers_t* + rocksdb_memory_consumers_create(); +extern ROCKSDB_LIBRARY_API void rocksdb_memory_consumers_add_db( + rocksdb_memory_consumers_t* consumers, rocksdb_t* db); +extern ROCKSDB_LIBRARY_API void rocksdb_memory_consumers_add_cache( + rocksdb_memory_consumers_t* consumers, rocksdb_cache_t* cache); +extern ROCKSDB_LIBRARY_API void rocksdb_memory_consumers_destroy( + rocksdb_memory_consumers_t* consumers); +extern ROCKSDB_LIBRARY_API rocksdb_memory_usage_t* +rocksdb_approximate_memory_usage_create(rocksdb_memory_consumers_t* consumers, + char** errptr); +extern ROCKSDB_LIBRARY_API void rocksdb_approximate_memory_usage_destroy( + rocksdb_memory_usage_t* usage); + +extern ROCKSDB_LIBRARY_API uint64_t +rocksdb_approximate_memory_usage_get_mem_table_total( + rocksdb_memory_usage_t* memory_usage); +extern ROCKSDB_LIBRARY_API uint64_t +rocksdb_approximate_memory_usage_get_mem_table_unflushed( + rocksdb_memory_usage_t* memory_usage); +extern ROCKSDB_LIBRARY_API uint64_t +rocksdb_approximate_memory_usage_get_mem_table_readers_total( + rocksdb_memory_usage_t* memory_usage); +extern ROCKSDB_LIBRARY_API uint64_t +rocksdb_approximate_memory_usage_get_cache_total( + rocksdb_memory_usage_t* memory_usage); + #ifdef __cplusplus } /* end extern "C" */ #endif - -#endif /* STORAGE_ROCKSDB_INCLUDE_C_H_ */ diff --git a/3rdParty/rocksdb/v5.16.X/include/rocksdb/cache.h b/3rdParty/rocksdb/v5.18.X/include/rocksdb/cache.h similarity index 89% rename from 3rdParty/rocksdb/v5.16.X/include/rocksdb/cache.h rename to 3rdParty/rocksdb/v5.18.X/include/rocksdb/cache.h index da3b934d83..190112b37e 100644 --- a/3rdParty/rocksdb/v5.16.X/include/rocksdb/cache.h +++ b/3rdParty/rocksdb/v5.18.X/include/rocksdb/cache.h @@ -25,6 +25,7 @@ #include #include #include +#include "rocksdb/memory_allocator.h" #include "rocksdb/slice.h" #include "rocksdb/statistics.h" #include "rocksdb/status.h" @@ -58,13 +59,24 @@ struct LRUCacheOptions { // BlockBasedTableOptions::cache_index_and_filter_blocks_with_high_priority. double high_pri_pool_ratio = 0.0; + // If non-nullptr will use this allocator instead of system allocator when + // allocating memory for cache blocks. Call this method before you start using + // the cache! + // + // Caveat: when the cache is used as block cache, the memory allocator is + // ignored when dealing with compression libraries that allocate memory + // internally (currently only XPRESS). + std::shared_ptr memory_allocator; + LRUCacheOptions() {} LRUCacheOptions(size_t _capacity, int _num_shard_bits, - bool _strict_capacity_limit, double _high_pri_pool_ratio) + bool _strict_capacity_limit, double _high_pri_pool_ratio, + std::shared_ptr _memory_allocator = nullptr) : capacity(_capacity), num_shard_bits(_num_shard_bits), strict_capacity_limit(_strict_capacity_limit), - high_pri_pool_ratio(_high_pri_pool_ratio) {} + high_pri_pool_ratio(_high_pri_pool_ratio), + memory_allocator(std::move(_memory_allocator)) {} }; // Create a new cache with a fixed size capacity. The cache is sharded @@ -75,10 +87,10 @@ struct LRUCacheOptions { // high_pri_pool_pct. // num_shard_bits = -1 means it is automatically determined: every shard // will be at least 512KB and number of shard bits will not exceed 6. -extern std::shared_ptr NewLRUCache(size_t capacity, - int num_shard_bits = -1, - bool strict_capacity_limit = false, - double high_pri_pool_ratio = 0.0); +extern std::shared_ptr NewLRUCache( + size_t capacity, int num_shard_bits = -1, + bool strict_capacity_limit = false, double high_pri_pool_ratio = 0.0, + std::shared_ptr memory_allocator = nullptr); extern std::shared_ptr NewLRUCache(const LRUCacheOptions& cache_opts); @@ -97,7 +109,8 @@ class Cache { // likely to get evicted than low priority entries. enum class Priority { HIGH, LOW }; - Cache() {} + Cache(std::shared_ptr allocator = nullptr) + : memory_allocator_(std::move(allocator)) {} // Destroys all existing entries by calling the "deleter" // function that was passed via the Insert() function. @@ -228,10 +241,14 @@ class Cache { virtual void TEST_mark_as_data_block(const Slice& /*key*/, size_t /*charge*/) {} + MemoryAllocator* memory_allocator() const { return memory_allocator_.get(); } + private: // No copying allowed Cache(const Cache&); Cache& operator=(const Cache&); + + std::shared_ptr memory_allocator_; }; } // namespace rocksdb diff --git a/3rdParty/rocksdb/v5.16.X/include/rocksdb/cleanable.h b/3rdParty/rocksdb/v5.18.X/include/rocksdb/cleanable.h similarity index 95% rename from 3rdParty/rocksdb/v5.16.X/include/rocksdb/cleanable.h rename to 3rdParty/rocksdb/v5.18.X/include/rocksdb/cleanable.h index ee4ee44241..6dba8d9531 100644 --- a/3rdParty/rocksdb/v5.16.X/include/rocksdb/cleanable.h +++ b/3rdParty/rocksdb/v5.18.X/include/rocksdb/cleanable.h @@ -16,8 +16,7 @@ // non-const method, all threads accessing the same Iterator must use // external synchronization. -#ifndef INCLUDE_ROCKSDB_CLEANABLE_H_ -#define INCLUDE_ROCKSDB_CLEANABLE_H_ +#pragma once namespace rocksdb { @@ -78,5 +77,3 @@ class Cleanable { }; } // namespace rocksdb - -#endif // INCLUDE_ROCKSDB_CLEANABLE_H_ diff --git a/3rdParty/rocksdb/v5.16.X/include/rocksdb/compaction_filter.h b/3rdParty/rocksdb/v5.18.X/include/rocksdb/compaction_filter.h similarity index 98% rename from 3rdParty/rocksdb/v5.16.X/include/rocksdb/compaction_filter.h rename to 3rdParty/rocksdb/v5.18.X/include/rocksdb/compaction_filter.h index 29b7e50771..98f86c2812 100644 --- a/3rdParty/rocksdb/v5.16.X/include/rocksdb/compaction_filter.h +++ b/3rdParty/rocksdb/v5.18.X/include/rocksdb/compaction_filter.h @@ -6,8 +6,7 @@ // Use of this source code is governed by a BSD-style license that can be // found in the LICENSE file. See the AUTHORS file for names of contributors. -#ifndef STORAGE_ROCKSDB_INCLUDE_COMPACTION_FILTER_H_ -#define STORAGE_ROCKSDB_INCLUDE_COMPACTION_FILTER_H_ +#pragma once #include #include @@ -206,5 +205,3 @@ class CompactionFilterFactory { }; } // namespace rocksdb - -#endif // STORAGE_ROCKSDB_INCLUDE_COMPACTION_FILTER_H_ diff --git a/3rdParty/rocksdb/v5.16.X/include/rocksdb/compaction_job_stats.h b/3rdParty/rocksdb/v5.18.X/include/rocksdb/compaction_job_stats.h similarity index 100% rename from 3rdParty/rocksdb/v5.16.X/include/rocksdb/compaction_job_stats.h rename to 3rdParty/rocksdb/v5.18.X/include/rocksdb/compaction_job_stats.h diff --git a/3rdParty/rocksdb/v5.16.X/include/rocksdb/comparator.h b/3rdParty/rocksdb/v5.18.X/include/rocksdb/comparator.h similarity index 96% rename from 3rdParty/rocksdb/v5.16.X/include/rocksdb/comparator.h rename to 3rdParty/rocksdb/v5.18.X/include/rocksdb/comparator.h index b048ebaf42..12e05ffee7 100644 --- a/3rdParty/rocksdb/v5.16.X/include/rocksdb/comparator.h +++ b/3rdParty/rocksdb/v5.18.X/include/rocksdb/comparator.h @@ -6,8 +6,7 @@ // Use of this source code is governed by a BSD-style license that can be // found in the LICENSE file. See the AUTHORS file for names of contributors. -#ifndef STORAGE_ROCKSDB_INCLUDE_COMPARATOR_H_ -#define STORAGE_ROCKSDB_INCLUDE_COMPARATOR_H_ +#pragma once #include @@ -92,5 +91,3 @@ extern const Comparator* BytewiseComparator(); extern const Comparator* ReverseBytewiseComparator(); } // namespace rocksdb - -#endif // STORAGE_ROCKSDB_INCLUDE_COMPARATOR_H_ diff --git a/3rdParty/rocksdb/v5.16.X/include/rocksdb/convenience.h b/3rdParty/rocksdb/v5.18.X/include/rocksdb/convenience.h similarity index 100% rename from 3rdParty/rocksdb/v5.16.X/include/rocksdb/convenience.h rename to 3rdParty/rocksdb/v5.18.X/include/rocksdb/convenience.h diff --git a/3rdParty/rocksdb/v5.16.X/include/rocksdb/db.h b/3rdParty/rocksdb/v5.18.X/include/rocksdb/db.h similarity index 96% rename from 3rdParty/rocksdb/v5.16.X/include/rocksdb/db.h rename to 3rdParty/rocksdb/v5.18.X/include/rocksdb/db.h index 746770836e..6a37084c52 100644 --- a/3rdParty/rocksdb/v5.16.X/include/rocksdb/db.h +++ b/3rdParty/rocksdb/v5.18.X/include/rocksdb/db.h @@ -6,8 +6,7 @@ // Use of this source code is governed by a BSD-style license that can be // found in the LICENSE file. See the AUTHORS file for names of contributors. -#ifndef STORAGE_ROCKSDB_INCLUDE_DB_H_ -#define STORAGE_ROCKSDB_INCLUDE_DB_H_ +#pragma once #include #include @@ -288,16 +287,12 @@ class DB { // a non-OK status on error. It is not an error if no keys exist in the range // ["begin_key", "end_key"). // - // This feature is currently an experimental performance optimization for - // deleting very large ranges of contiguous keys. Invoking it many times or on - // small ranges may severely degrade read performance; in particular, the - // resulting performance can be worse than calling Delete() for each key in - // the range. Note also the degraded read performance affects keys outside the - // deleted ranges, and affects database operations involving scans, like flush - // and compaction. - // - // Consider setting ReadOptions::ignore_range_deletions = true to speed - // up reads for key(s) that are known to be unaffected by range deletions. + // This feature is now usable in production, with the following caveats: + // 1) Accumulating many range tombstones in the memtable will degrade read + // performance; this can be avoided by manually flushing occasionally. + // 2) Limiting the maximum number of open files in the presence of range + // tombstones can degrade read performance. To avoid this problem, set + // max_open_files to -1 whenever possible. virtual Status DeleteRange(const WriteOptions& options, ColumnFamilyHandle* column_family, const Slice& begin_key, const Slice& end_key); @@ -573,6 +568,11 @@ class DB { // log files that should be kept. static const std::string kMinLogNumberToKeep; + // "rocksdb.min-obsolete-sst-number-to-keep" - return the minimum file + // number for an obsolete SST to be kept. The max value of `uint64_t` + // will be returned if all obsolete files can be deleted. + static const std::string kMinObsoleteSstNumberToKeep; + // "rocksdb.total-sst-files-size" - returns total size (bytes) of all SST // files. // WARNING: may slow down online queries if there are too many files. @@ -671,6 +671,7 @@ class DB { // "rocksdb.current-super-version-number" // "rocksdb.estimate-live-data-size" // "rocksdb.min-log-number-to-keep" + // "rocksdb.min-obsolete-sst-number-to-keep" // "rocksdb.total-sst-files-size" // "rocksdb.live-sst-files-size" // "rocksdb.base-level" @@ -901,11 +902,22 @@ class DB { virtual DBOptions GetDBOptions() const = 0; // Flush all mem-table data. + // Flush a single column family, even when atomic flush is enabled. To flush + // multiple column families, use Flush(options, column_families). virtual Status Flush(const FlushOptions& options, ColumnFamilyHandle* column_family) = 0; virtual Status Flush(const FlushOptions& options) { return Flush(options, DefaultColumnFamily()); } + // Flushes multiple column families. + // If atomic flush is not enabled, Flush(options, column_families) is + // equivalent to calling Flush(options, column_family) multiple times. + // If atomic flush is enabled, Flush(options, column_families) will flush all + // column families specified in 'column_families' up to the latest sequence + // number at the time when flush is requested. + virtual Status Flush( + const FlushOptions& options, + const std::vector& column_families) = 0; // Flush the WAL memory buffer to the file. If sync is true, it calls SyncWAL // afterwards. @@ -950,14 +962,14 @@ class DB { // GetLiveFiles followed by GetSortedWalFiles can generate a lossless backup // Retrieve the list of all files in the database. The files are - // relative to the dbname and are not absolute paths. The valid size of the - // manifest file is returned in manifest_file_size. The manifest file is an - // ever growing file, but only the portion specified by manifest_file_size is - // valid for this snapshot. - // Setting flush_memtable to true does Flush before recording the live files. - // Setting flush_memtable to false is useful when we don't want to wait for - // flush which may have to wait for compaction to complete taking an - // indeterminate time. + // relative to the dbname and are not absolute paths. Despite being relative + // paths, the file names begin with "/". The valid size of the manifest file + // is returned in manifest_file_size. The manifest file is an ever growing + // file, but only the portion specified by manifest_file_size is valid for + // this snapshot. Setting flush_memtable to true does Flush before recording + // the live files. Setting flush_memtable to false is useful when we don't + // want to wait for flush which may have to wait for compaction to complete + // taking an indeterminate time. // // In case you have multiple column families, even if flush_memtable is true, // you still need to call GetSortedWalFiles after GetLiveFiles to compensate @@ -980,9 +992,9 @@ class DB { // cleared aggressively and the iterator might keep getting invalid before // an update is read. virtual Status GetUpdatesSince( - SequenceNumber seq_number, unique_ptr* iter, - const TransactionLogIterator::ReadOptions& - read_options = TransactionLogIterator::ReadOptions()) = 0; + SequenceNumber seq_number, std::unique_ptr* iter, + const TransactionLogIterator::ReadOptions& read_options = + TransactionLogIterator::ReadOptions()) = 0; // Windows API macro interference #undef DeleteFile @@ -1221,5 +1233,3 @@ Status RepairDB(const std::string& dbname, const Options& options); #endif } // namespace rocksdb - -#endif // STORAGE_ROCKSDB_INCLUDE_DB_H_ diff --git a/3rdParty/rocksdb/v5.16.X/include/rocksdb/db_bench_tool.h b/3rdParty/rocksdb/v5.18.X/include/rocksdb/db_bench_tool.h similarity index 100% rename from 3rdParty/rocksdb/v5.16.X/include/rocksdb/db_bench_tool.h rename to 3rdParty/rocksdb/v5.18.X/include/rocksdb/db_bench_tool.h diff --git a/3rdParty/rocksdb/v5.16.X/include/rocksdb/db_dump_tool.h b/3rdParty/rocksdb/v5.18.X/include/rocksdb/db_dump_tool.h similarity index 100% rename from 3rdParty/rocksdb/v5.16.X/include/rocksdb/db_dump_tool.h rename to 3rdParty/rocksdb/v5.18.X/include/rocksdb/db_dump_tool.h diff --git a/3rdParty/rocksdb/v5.16.X/include/rocksdb/env.h b/3rdParty/rocksdb/v5.18.X/include/rocksdb/env.h similarity index 94% rename from 3rdParty/rocksdb/v5.16.X/include/rocksdb/env.h rename to 3rdParty/rocksdb/v5.18.X/include/rocksdb/env.h index c6ca725c52..bc439ac1c4 100644 --- a/3rdParty/rocksdb/v5.16.X/include/rocksdb/env.h +++ b/3rdParty/rocksdb/v5.18.X/include/rocksdb/env.h @@ -14,8 +14,7 @@ // All Env implementations are safe for concurrent access from // multiple threads without any external synchronization. -#ifndef STORAGE_ROCKSDB_INCLUDE_ENV_H_ -#define STORAGE_ROCKSDB_INCLUDE_ENV_H_ +#pragma once #include #include @@ -138,9 +137,8 @@ class Env { // // The returned file will only be accessed by one thread at a time. virtual Status NewSequentialFile(const std::string& fname, - unique_ptr* result, - const EnvOptions& options) - = 0; + std::unique_ptr* result, + const EnvOptions& options) = 0; // Create a brand new random access read-only file with the // specified name. On success, stores a pointer to the new file in @@ -150,9 +148,8 @@ class Env { // // The returned file may be concurrently accessed by multiple threads. virtual Status NewRandomAccessFile(const std::string& fname, - unique_ptr* result, - const EnvOptions& options) - = 0; + std::unique_ptr* result, + const EnvOptions& options) = 0; // These values match Linux definition // https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/tree/include/uapi/linux/fcntl.h#n56 enum WriteLifeTimeHint { @@ -172,7 +169,7 @@ class Env { // // The returned file will only be accessed by one thread at a time. virtual Status NewWritableFile(const std::string& fname, - unique_ptr* result, + std::unique_ptr* result, const EnvOptions& options) = 0; // Create an object that writes to a new file with the specified @@ -183,7 +180,7 @@ class Env { // // The returned file will only be accessed by one thread at a time. virtual Status ReopenWritableFile(const std::string& /*fname*/, - unique_ptr* /*result*/, + std::unique_ptr* /*result*/, const EnvOptions& /*options*/) { return Status::NotSupported(); } @@ -191,7 +188,7 @@ class Env { // Reuse an existing file by renaming it and opening it as writable. virtual Status ReuseWritableFile(const std::string& fname, const std::string& old_fname, - unique_ptr* result, + std::unique_ptr* result, const EnvOptions& options); // Open `fname` for random read and write, if file doesn't exist the file @@ -200,7 +197,7 @@ class Env { // // The returned file will only be accessed by one thread at a time. virtual Status NewRandomRWFile(const std::string& /*fname*/, - unique_ptr* /*result*/, + std::unique_ptr* /*result*/, const EnvOptions& /*options*/) { return Status::NotSupported("RandomRWFile is not implemented in this Env"); } @@ -210,7 +207,7 @@ class Env { // file in `*result`. The file must exist prior to this call. virtual Status NewMemoryMappedFileBuffer( const std::string& /*fname*/, - unique_ptr* /*result*/) { + std::unique_ptr* /*result*/) { return Status::NotSupported( "MemoryMappedFileBuffer is not implemented in this Env"); } @@ -223,7 +220,7 @@ class Env { // *result and returns OK. On failure stores nullptr in *result and // returns non-OK. virtual Status NewDirectory(const std::string& name, - unique_ptr* result) = 0; + std::unique_ptr* result) = 0; // Returns OK if the named file exists. // NotFound if the named file does not exist, @@ -371,7 +368,7 @@ class Env { // Create and return a log file for storing informational messages. virtual Status NewLogger(const std::string& fname, - shared_ptr* result) = 0; + std::shared_ptr* result) = 0; // Returns the number of micro-seconds since some fixed point in time. // It is often used as system time such as in GenericRateLimiter @@ -478,6 +475,15 @@ class Env { // Returns the ID of the current thread. virtual uint64_t GetThreadID() const; +// This seems to clash with a macro on Windows, so #undef it here +#undef GetFreeSpace + + // Get the amount of free disk space + virtual Status GetFreeSpace(const std::string& /*path*/, + uint64_t* /*diskfree*/) { + return Status::NotSupported(); + } + protected: // The pointer to an internal structure that will update the // status of each thread. @@ -934,24 +940,32 @@ class FileLock { void operator=(const FileLock&); }; -extern void LogFlush(const shared_ptr& info_log); +extern void LogFlush(const std::shared_ptr& info_log); extern void Log(const InfoLogLevel log_level, - const shared_ptr& info_log, const char* format, ...); + const std::shared_ptr& info_log, const char* format, + ...); // a set of log functions with different log levels. -extern void Header(const shared_ptr& info_log, const char* format, ...); -extern void Debug(const shared_ptr& info_log, const char* format, ...); -extern void Info(const shared_ptr& info_log, const char* format, ...); -extern void Warn(const shared_ptr& info_log, const char* format, ...); -extern void Error(const shared_ptr& info_log, const char* format, ...); -extern void Fatal(const shared_ptr& info_log, const char* format, ...); +extern void Header(const std::shared_ptr& info_log, const char* format, + ...); +extern void Debug(const std::shared_ptr& info_log, const char* format, + ...); +extern void Info(const std::shared_ptr& info_log, const char* format, + ...); +extern void Warn(const std::shared_ptr& info_log, const char* format, + ...); +extern void Error(const std::shared_ptr& info_log, const char* format, + ...); +extern void Fatal(const std::shared_ptr& info_log, const char* format, + ...); // Log the specified data to *info_log if info_log is non-nullptr. // The default info log level is InfoLogLevel::INFO_LEVEL. -extern void Log(const shared_ptr& info_log, const char* format, ...) +extern void Log(const std::shared_ptr& info_log, const char* format, + ...) # if defined(__GNUC__) || defined(__clang__) - __attribute__((__format__ (__printf__, 2, 3))) + __attribute__((__format__(__printf__, 2, 3))) # endif ; @@ -997,37 +1011,38 @@ class EnvWrapper : public Env { Env* target() const { return target_; } // The following text is boilerplate that forwards all methods to target() - Status NewSequentialFile(const std::string& f, unique_ptr* r, + Status NewSequentialFile(const std::string& f, + std::unique_ptr* r, const EnvOptions& options) override { return target_->NewSequentialFile(f, r, options); } Status NewRandomAccessFile(const std::string& f, - unique_ptr* r, + std::unique_ptr* r, const EnvOptions& options) override { return target_->NewRandomAccessFile(f, r, options); } - Status NewWritableFile(const std::string& f, unique_ptr* r, + Status NewWritableFile(const std::string& f, std::unique_ptr* r, const EnvOptions& options) override { return target_->NewWritableFile(f, r, options); } Status ReopenWritableFile(const std::string& fname, - unique_ptr* result, + std::unique_ptr* result, const EnvOptions& options) override { return target_->ReopenWritableFile(fname, result, options); } Status ReuseWritableFile(const std::string& fname, const std::string& old_fname, - unique_ptr* r, + std::unique_ptr* r, const EnvOptions& options) override { return target_->ReuseWritableFile(fname, old_fname, r, options); } Status NewRandomRWFile(const std::string& fname, - unique_ptr* result, + std::unique_ptr* result, const EnvOptions& options) override { return target_->NewRandomRWFile(fname, result, options); } Status NewDirectory(const std::string& name, - unique_ptr* result) override { + std::unique_ptr* result) override { return target_->NewDirectory(name, result); } Status FileExists(const std::string& f) override { @@ -1105,7 +1120,7 @@ class EnvWrapper : public Env { return target_->GetTestDirectory(path); } Status NewLogger(const std::string& fname, - shared_ptr* result) override { + std::shared_ptr* result) override { return target_->NewLogger(fname, result); } uint64_t NowMicros() override { return target_->NowMicros(); } @@ -1216,36 +1231,57 @@ class WritableFileWrapper : public WritableFile { Status Sync() override { return target_->Sync(); } Status Fsync() override { return target_->Fsync(); } bool IsSyncThreadSafe() const override { return target_->IsSyncThreadSafe(); } + + bool use_direct_io() const override { return target_->use_direct_io(); } + + size_t GetRequiredBufferAlignment() const override { + return target_->GetRequiredBufferAlignment(); + } + void SetIOPriority(Env::IOPriority pri) override { target_->SetIOPriority(pri); } + Env::IOPriority GetIOPriority() override { return target_->GetIOPriority(); } + + void SetWriteLifeTimeHint(Env::WriteLifeTimeHint hint) override { + target_->SetWriteLifeTimeHint(hint); + } + + Env::WriteLifeTimeHint GetWriteLifeTimeHint() override { + return target_->GetWriteLifeTimeHint(); + } + uint64_t GetFileSize() override { return target_->GetFileSize(); } - void GetPreallocationStatus(size_t* block_size, - size_t* last_allocated_block) override { - target_->GetPreallocationStatus(block_size, last_allocated_block); - } - size_t GetUniqueId(char* id, size_t max_size) const override { - return target_->GetUniqueId(id, max_size); - } - Status InvalidateCache(size_t offset, size_t length) override { - return target_->InvalidateCache(offset, length); - } void SetPreallocationBlockSize(size_t size) override { target_->SetPreallocationBlockSize(size); } + + void GetPreallocationStatus(size_t* block_size, + size_t* last_allocated_block) override { + target_->GetPreallocationStatus(block_size, last_allocated_block); + } + + size_t GetUniqueId(char* id, size_t max_size) const override { + return target_->GetUniqueId(id, max_size); + } + + Status InvalidateCache(size_t offset, size_t length) override { + return target_->InvalidateCache(offset, length); + } + + Status RangeSync(uint64_t offset, uint64_t nbytes) override { + return target_->RangeSync(offset, nbytes); + } + void PrepareWrite(size_t offset, size_t len) override { target_->PrepareWrite(offset, len); } - protected: Status Allocate(uint64_t offset, uint64_t len) override { return target_->Allocate(offset, len); } - Status RangeSync(uint64_t offset, uint64_t nbytes) override { - return target_->RangeSync(offset, nbytes); - } private: WritableFile* target_; @@ -1267,5 +1303,3 @@ Status NewHdfsEnv(Env** hdfs_env, const std::string& fsname); Env* NewTimedEnv(Env* base_env); } // namespace rocksdb - -#endif // STORAGE_ROCKSDB_INCLUDE_ENV_H_ diff --git a/3rdParty/rocksdb/v5.16.X/include/rocksdb/env_encryption.h b/3rdParty/rocksdb/v5.18.X/include/rocksdb/env_encryption.h similarity index 92% rename from 3rdParty/rocksdb/v5.16.X/include/rocksdb/env_encryption.h rename to 3rdParty/rocksdb/v5.18.X/include/rocksdb/env_encryption.h index 70dce616a6..a6e9195465 100644 --- a/3rdParty/rocksdb/v5.16.X/include/rocksdb/env_encryption.h +++ b/3rdParty/rocksdb/v5.18.X/include/rocksdb/env_encryption.h @@ -142,8 +142,9 @@ class EncryptionProvider { // CreateCipherStream creates a block access cipher stream for a file given // given name and options. - virtual Status CreateCipherStream(const std::string& fname, const EnvOptions& options, - Slice& prefix, unique_ptr* result) = 0; + virtual Status CreateCipherStream( + const std::string& fname, const EnvOptions& options, Slice& prefix, + std::unique_ptr* result) = 0; }; // This encryption provider uses a CTR cipher stream, with a given block cipher @@ -174,10 +175,11 @@ class CTREncryptionProvider : public EncryptionProvider { // CreateCipherStream creates a block access cipher stream for a file given // given name and options. - virtual Status CreateCipherStream(const std::string& fname, const EnvOptions& options, - Slice& prefix, unique_ptr* result) override; + virtual Status CreateCipherStream( + const std::string& fname, const EnvOptions& options, Slice& prefix, + std::unique_ptr* result) override; - protected: + protected: // PopulateSecretPrefixPart initializes the data into a new prefix block // that will be encrypted. This function will store the data in plain text. // It will be encrypted later (before written to disk). @@ -187,8 +189,10 @@ class CTREncryptionProvider : public EncryptionProvider { // CreateCipherStreamFromPrefix creates a block access cipher stream for a file given // given name and options. The given prefix is already decrypted. - virtual Status CreateCipherStreamFromPrefix(const std::string& fname, const EnvOptions& options, - uint64_t initialCounter, const Slice& iv, const Slice& prefix, unique_ptr* result); + virtual Status CreateCipherStreamFromPrefix( + const std::string& fname, const EnvOptions& options, + uint64_t initialCounter, const Slice& iv, const Slice& prefix, + std::unique_ptr* result); }; } // namespace rocksdb diff --git a/3rdParty/rocksdb/v5.16.X/include/rocksdb/experimental.h b/3rdParty/rocksdb/v5.18.X/include/rocksdb/experimental.h similarity index 100% rename from 3rdParty/rocksdb/v5.16.X/include/rocksdb/experimental.h rename to 3rdParty/rocksdb/v5.18.X/include/rocksdb/experimental.h diff --git a/3rdParty/rocksdb/v5.16.X/include/rocksdb/filter_policy.h b/3rdParty/rocksdb/v5.18.X/include/rocksdb/filter_policy.h similarity index 95% rename from 3rdParty/rocksdb/v5.16.X/include/rocksdb/filter_policy.h rename to 3rdParty/rocksdb/v5.18.X/include/rocksdb/filter_policy.h index 4706f38c30..9c0904456f 100644 --- a/3rdParty/rocksdb/v5.16.X/include/rocksdb/filter_policy.h +++ b/3rdParty/rocksdb/v5.18.X/include/rocksdb/filter_policy.h @@ -17,8 +17,7 @@ // Most people will want to use the builtin bloom filter support (see // NewBloomFilterPolicy() below). -#ifndef STORAGE_ROCKSDB_INCLUDE_FILTER_POLICY_H_ -#define STORAGE_ROCKSDB_INCLUDE_FILTER_POLICY_H_ +#pragma once #include #include @@ -146,8 +145,6 @@ class FilterPolicy { // ignores trailing spaces, it would be incorrect to use a // FilterPolicy (like NewBloomFilterPolicy) that does not ignore // trailing spaces in keys. -extern const FilterPolicy* NewBloomFilterPolicy(int bits_per_key, - bool use_block_based_builder = true); +extern const FilterPolicy* NewBloomFilterPolicy( + int bits_per_key, bool use_block_based_builder = false); } - -#endif // STORAGE_ROCKSDB_INCLUDE_FILTER_POLICY_H_ diff --git a/3rdParty/rocksdb/v5.16.X/include/rocksdb/flush_block_policy.h b/3rdParty/rocksdb/v5.18.X/include/rocksdb/flush_block_policy.h similarity index 100% rename from 3rdParty/rocksdb/v5.16.X/include/rocksdb/flush_block_policy.h rename to 3rdParty/rocksdb/v5.18.X/include/rocksdb/flush_block_policy.h diff --git a/3rdParty/rocksdb/v5.16.X/include/rocksdb/iostats_context.h b/3rdParty/rocksdb/v5.18.X/include/rocksdb/iostats_context.h similarity index 100% rename from 3rdParty/rocksdb/v5.16.X/include/rocksdb/iostats_context.h rename to 3rdParty/rocksdb/v5.18.X/include/rocksdb/iostats_context.h diff --git a/3rdParty/rocksdb/v5.16.X/include/rocksdb/iterator.h b/3rdParty/rocksdb/v5.18.X/include/rocksdb/iterator.h similarity index 97% rename from 3rdParty/rocksdb/v5.16.X/include/rocksdb/iterator.h rename to 3rdParty/rocksdb/v5.18.X/include/rocksdb/iterator.h index 4475eb396f..e99b434a01 100644 --- a/3rdParty/rocksdb/v5.16.X/include/rocksdb/iterator.h +++ b/3rdParty/rocksdb/v5.18.X/include/rocksdb/iterator.h @@ -16,8 +16,7 @@ // non-const method, all threads accessing the same Iterator must use // external synchronization. -#ifndef STORAGE_ROCKSDB_INCLUDE_ITERATOR_H_ -#define STORAGE_ROCKSDB_INCLUDE_ITERATOR_H_ +#pragma once #include #include "rocksdb/cleanable.h" @@ -119,5 +118,3 @@ extern Iterator* NewEmptyIterator(); extern Iterator* NewErrorIterator(const Status& status); } // namespace rocksdb - -#endif // STORAGE_ROCKSDB_INCLUDE_ITERATOR_H_ diff --git a/3rdParty/rocksdb/v5.16.X/include/rocksdb/ldb_tool.h b/3rdParty/rocksdb/v5.18.X/include/rocksdb/ldb_tool.h similarity index 100% rename from 3rdParty/rocksdb/v5.16.X/include/rocksdb/ldb_tool.h rename to 3rdParty/rocksdb/v5.18.X/include/rocksdb/ldb_tool.h index 0ec2da9fc0..0dbc65c4bb 100644 --- a/3rdParty/rocksdb/v5.16.X/include/rocksdb/ldb_tool.h +++ b/3rdParty/rocksdb/v5.18.X/include/rocksdb/ldb_tool.h @@ -2,8 +2,8 @@ // This source code is licensed under both the GPLv2 (found in the // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). -#ifndef ROCKSDB_LITE #pragma once +#ifndef ROCKSDB_LITE #include #include #include "rocksdb/db.h" diff --git a/3rdParty/rocksdb/v5.16.X/include/rocksdb/listener.h b/3rdParty/rocksdb/v5.18.X/include/rocksdb/listener.h similarity index 87% rename from 3rdParty/rocksdb/v5.16.X/include/rocksdb/listener.h rename to 3rdParty/rocksdb/v5.18.X/include/rocksdb/listener.h index 601951cd0f..8ceb2ed4c0 100644 --- a/3rdParty/rocksdb/v5.16.X/include/rocksdb/listener.h +++ b/3rdParty/rocksdb/v5.18.X/include/rocksdb/listener.h @@ -27,6 +27,7 @@ enum class TableFileCreationReason { kFlush, kCompaction, kRecovery, + kMisc, }; struct TableFileCreationBriefInfo { @@ -103,6 +104,7 @@ enum class FlushReason : int { kDeleteFiles = 0x08, kAutoCompaction = 0x09, kManualFlush = 0x0a, + kErrorRecovery = 0xb, }; enum class BackgroundErrorReason { @@ -141,6 +143,16 @@ struct TableFileDeletionInfo { Status status; }; +struct FileOperationInfo { + const std::string& path; + uint64_t offset; + size_t length; + time_t start_timestamp; + time_t finish_timestamp; + Status status; + FileOperationInfo(const std::string& _path) : path(_path) {} +}; + struct FlushJobInfo { // the name of the column family std::string cf_name; @@ -175,6 +187,8 @@ struct CompactionJobInfo { explicit CompactionJobInfo(const CompactionJobStats& _stats) : stats(_stats) {} + // the id of the column family where the compaction happened. + uint32_t cf_id; // the name of the column family where the compaction happened. std::string cf_name; // the status indicating whether the compaction was successful or not. @@ -295,6 +309,16 @@ class EventListener { // returned value. virtual void OnTableFileDeleted(const TableFileDeletionInfo& /*info*/) {} + // A callback function to RocksDB which will be called before a + // RocksDB starts to compact. The default implementation is + // no-op. + // + // Note that the this function must be implemented in a way such that + // it should not run for an extended period of time before the function + // returns. Otherwise, RocksDB may be blocked. + virtual void OnCompactionBegin(DB* /*db*/, + const CompactionJobInfo& /*ci*/) {} + // A callback function for RocksDB which will be called whenever // a registered RocksDB compacts a file. The default implementation // is a no-op. @@ -393,6 +417,33 @@ class EventListener { // returns. Otherwise, RocksDB may be blocked. virtual void OnStallConditionsChanged(const WriteStallInfo& /*info*/) {} + // A callback function for RocksDB which will be called whenever a file read + // operation finishes. + virtual void OnFileReadFinish(const FileOperationInfo& /* info */) {} + + // A callback function for RocksDB which will be called whenever a file write + // operation finishes. + virtual void OnFileWriteFinish(const FileOperationInfo& /* info */) {} + + // If true, the OnFileReadFinish and OnFileWriteFinish will be called. If + // false, then they won't be called. + virtual bool ShouldBeNotifiedOnFileIO() { return false; } + + // A callback function for RocksDB which will be called just before + // starting the automatic recovery process for recoverable background + // errors, such as NoSpace(). The callback can suppress the automatic + // recovery by setting *auto_recovery to false. The database will then + // have to be transitioned out of read-only mode by calling DB::Resume() + virtual void OnErrorRecoveryBegin(BackgroundErrorReason /* reason */, + Status /* bg_error */, + bool* /* auto_recovery */) {} + + // A callback function for RocksDB which will be called once the database + // is recovered from read-only mode after an error. When this is called, it + // means normal writes to the database can be issued and the user can + // initiate any further recovery actions needed + virtual void OnErrorRecoveryCompleted(Status /* old_bg_error */) {} + virtual ~EventListener() {} }; diff --git a/3rdParty/rocksdb/v5.18.X/include/rocksdb/memory_allocator.h b/3rdParty/rocksdb/v5.18.X/include/rocksdb/memory_allocator.h new file mode 100644 index 0000000000..889c0e9218 --- /dev/null +++ b/3rdParty/rocksdb/v5.18.X/include/rocksdb/memory_allocator.h @@ -0,0 +1,77 @@ +// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. +// This source code is licensed under both the GPLv2 (found in the +// COPYING file in the root directory) and Apache 2.0 License +// (found in the LICENSE.Apache file in the root directory). + +#pragma once + +#include "rocksdb/status.h" + +#include + +namespace rocksdb { + +// MemoryAllocator is an interface that a client can implement to supply custom +// memory allocation and deallocation methods. See rocksdb/cache.h for more +// information. +// All methods should be thread-safe. +class MemoryAllocator { + public: + virtual ~MemoryAllocator() = default; + + // Name of the cache allocator, printed in the log + virtual const char* Name() const = 0; + + // Allocate a block of at least size. Has to be thread-safe. + virtual void* Allocate(size_t size) = 0; + + // Deallocate previously allocated block. Has to be thread-safe. + virtual void Deallocate(void* p) = 0; + + // Returns the memory size of the block allocated at p. The default + // implementation that just returns the original allocation_size is fine. + virtual size_t UsableSize(void* /*p*/, size_t allocation_size) const { + // default implementation just returns the allocation size + return allocation_size; + } +}; + +struct JemallocAllocatorOptions { + // Jemalloc tcache cache allocations by size class. For each size class, + // it caches between 20 (for large size classes) to 200 (for small size + // classes). To reduce tcache memory usage in case the allocator is access + // by large number of threads, we can control whether to cache an allocation + // by its size. + bool limit_tcache_size = false; + + // Lower bound of allocation size to use tcache, if limit_tcache_size=true. + // When used with block cache, it is recommneded to set it to block_size/4. + size_t tcache_size_lower_bound = 1024; + + // Upper bound of allocation size to use tcache, if limit_tcache_size=true. + // When used with block cache, it is recommneded to set it to block_size. + size_t tcache_size_upper_bound = 16 * 1024; +}; + +// Generate memory allocators which allocates through Jemalloc and utilize +// MADV_DONTDUMP through madvice to exclude cache items from core dump. +// Applications can use the allocator with block cache to exclude block cache +// usage from core dump. +// +// Implementation details: +// The JemallocNodumpAllocator creates a delicated jemalloc arena, and all +// allocations of the JemallocNodumpAllocator is through the same arena. +// The memory allocator hooks memory allocation of the arena, and call +// madvice() with MADV_DONTDUMP flag to exclude the piece of memory from +// core dump. Side benefit of using single arena would be reduce of jemalloc +// metadata for some workload. +// +// To mitigate mutex contention for using one single arena, jemalloc tcache +// (thread-local cache) is enabled to cache unused allocations for future use. +// The tcache normally incur 0.5M extra memory usage per-thread. The usage +// can be reduce by limitting allocation sizes to cache. +extern Status NewJemallocNodumpAllocator( + JemallocAllocatorOptions& options, + std::shared_ptr* memory_allocator); + +} // namespace rocksdb diff --git a/3rdParty/rocksdb/v5.16.X/include/rocksdb/memtablerep.h b/3rdParty/rocksdb/v5.18.X/include/rocksdb/memtablerep.h similarity index 98% rename from 3rdParty/rocksdb/v5.16.X/include/rocksdb/memtablerep.h rename to 3rdParty/rocksdb/v5.18.X/include/rocksdb/memtablerep.h index 4b6e897a6d..4c2a23e0a8 100644 --- a/3rdParty/rocksdb/v5.16.X/include/rocksdb/memtablerep.h +++ b/3rdParty/rocksdb/v5.18.X/include/rocksdb/memtablerep.h @@ -144,6 +144,14 @@ class MemTableRep { // or any writes done directly to entries accessed through the iterator.) virtual void MarkReadOnly() { } + // Notify this table rep that it has been flushed to stable storage. + // By default, does nothing. + // + // Invariant: MarkReadOnly() is called, before MarkFlushed(). + // Note that this method if overridden, should not run for an extended period + // of time. Otherwise, RocksDB may be blocked. + virtual void MarkFlushed() { } + // Look up key from the mem table, since the first key in the mem table whose // user_key matches the one given k, call the function callback_func(), with // callback_args directly forwarded as the first parameter, and the mem table diff --git a/3rdParty/rocksdb/v5.16.X/include/rocksdb/merge_operator.h b/3rdParty/rocksdb/v5.18.X/include/rocksdb/merge_operator.h similarity index 98% rename from 3rdParty/rocksdb/v5.16.X/include/rocksdb/merge_operator.h rename to 3rdParty/rocksdb/v5.18.X/include/rocksdb/merge_operator.h index 8406d4a74f..b90f3d72f1 100644 --- a/3rdParty/rocksdb/v5.16.X/include/rocksdb/merge_operator.h +++ b/3rdParty/rocksdb/v5.18.X/include/rocksdb/merge_operator.h @@ -3,8 +3,7 @@ // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). -#ifndef STORAGE_ROCKSDB_INCLUDE_MERGE_OPERATOR_H_ -#define STORAGE_ROCKSDB_INCLUDE_MERGE_OPERATOR_H_ +#pragma once #include #include @@ -241,5 +240,3 @@ class AssociativeMergeOperator : public MergeOperator { }; } // namespace rocksdb - -#endif // STORAGE_ROCKSDB_INCLUDE_MERGE_OPERATOR_H_ diff --git a/3rdParty/rocksdb/v5.16.X/include/rocksdb/metadata.h b/3rdParty/rocksdb/v5.18.X/include/rocksdb/metadata.h similarity index 90% rename from 3rdParty/rocksdb/v5.16.X/include/rocksdb/metadata.h rename to 3rdParty/rocksdb/v5.18.X/include/rocksdb/metadata.h index f6be889ba9..e62d4f4098 100644 --- a/3rdParty/rocksdb/v5.16.X/include/rocksdb/metadata.h +++ b/3rdParty/rocksdb/v5.18.X/include/rocksdb/metadata.h @@ -63,9 +63,12 @@ struct SstFileMetaData { smallestkey(""), largestkey(""), num_reads_sampled(0), - being_compacted(false) {} + being_compacted(false), + num_entries(0), + num_deletions(0) {} + SstFileMetaData(const std::string& _file_name, const std::string& _path, - uint64_t _size, SequenceNumber _smallest_seqno, + size_t _size, SequenceNumber _smallest_seqno, SequenceNumber _largest_seqno, const std::string& _smallestkey, const std::string& _largestkey, uint64_t _num_reads_sampled, @@ -78,10 +81,12 @@ struct SstFileMetaData { smallestkey(_smallestkey), largestkey(_largestkey), num_reads_sampled(_num_reads_sampled), - being_compacted(_being_compacted) {} + being_compacted(_being_compacted), + num_entries(0), + num_deletions(0) {} // File size in bytes. - uint64_t size; + size_t size; // The name of the file. std::string name; // The full path where the file locates. @@ -93,11 +98,15 @@ struct SstFileMetaData { std::string largestkey; // Largest user defined key in the file. uint64_t num_reads_sampled; // How many times the file is read. bool being_compacted; // true if the file is currently being compacted. + + uint64_t num_entries; + uint64_t num_deletions; }; // The full set of metadata associated with each SST file. struct LiveFileMetaData : SstFileMetaData { std::string column_family_name; // Name of the column family int level; // Level at which this file resides. + LiveFileMetaData() : column_family_name(), level(0) {} }; } // namespace rocksdb diff --git a/3rdParty/rocksdb/v5.16.X/include/rocksdb/options.h b/3rdParty/rocksdb/v5.18.X/include/rocksdb/options.h similarity index 95% rename from 3rdParty/rocksdb/v5.16.X/include/rocksdb/options.h rename to 3rdParty/rocksdb/v5.18.X/include/rocksdb/options.h index 5499595d5e..c3ba448394 100644 --- a/3rdParty/rocksdb/v5.16.X/include/rocksdb/options.h +++ b/3rdParty/rocksdb/v5.18.X/include/rocksdb/options.h @@ -6,8 +6,7 @@ // Use of this source code is governed by a BSD-style license that can be // found in the LICENSE file. See the AUTHORS file for names of contributors. -#ifndef STORAGE_ROCKSDB_INCLUDE_OPTIONS_H_ -#define STORAGE_ROCKSDB_INCLUDE_OPTIONS_H_ +#pragma once #include #include @@ -189,8 +188,7 @@ struct ColumnFamilyOptions : public AdvancedColumnFamilyOptions { // Dynamically changeable through SetOptions() API size_t write_buffer_size = 64 << 20; - // Compress blocks using the specified compression algorithm. This - // parameter can be changed dynamically. + // Compress blocks using the specified compression algorithm. // // Default: kSnappyCompression, if it's supported. If snappy is not linked // with the library, the default is kNoCompression. @@ -213,6 +211,8 @@ struct ColumnFamilyOptions : public AdvancedColumnFamilyOptions { // - kZlibCompression: Z_DEFAULT_COMPRESSION (currently -1) // - kLZ4HCCompression: 0 // - For all others, we do not specify a compression level + // + // Dynamically changeable through SetOptions() API CompressionType compression; // Compression algorithm that will be used for the bottommost level that @@ -417,7 +417,10 @@ struct DBOptions { // files opened are always kept open. You can estimate number of files based // on target_file_size_base and target_file_size_multiplier for level-based // compaction. For universal-style compaction, you can usually set it to -1. + // // Default: -1 + // + // Dynamically changeable through SetDBOptions() API. int max_open_files = -1; // If max_open_files is -1, DB will open all files on DB::Open(). You can @@ -432,7 +435,10 @@ struct DBOptions { // [sum of all write_buffer_size * max_write_buffer_number] * 4 // This option takes effect only when there are more than one column family as // otherwise the wal size is dictated by the write_buffer_size. + // // Default: 0 + // + // Dynamically changeable through SetDBOptions() API. uint64_t max_total_wal_size = 0; // If non-null, then we should collect metrics about database operations @@ -493,13 +499,23 @@ struct DBOptions { // value is 6 hours. The files that get out of scope by compaction // process will still get automatically delete on every compaction, // regardless of this setting + // + // Default: 6 hours + // + // Dynamically changeable through SetDBOptions() API. uint64_t delete_obsolete_files_period_micros = 6ULL * 60 * 60 * 1000000; // Maximum number of concurrent background jobs (compactions and flushes). + // + // Default: 2 + // + // Dynamically changeable through SetDBOptions() API. int max_background_jobs = 2; // NOT SUPPORTED ANYMORE: RocksDB automatically decides this based on the // value of max_background_jobs. This option is ignored. + // + // Dynamically changeable through SetDBOptions() API. int base_background_compactions = -1; // NOT SUPPORTED ANYMORE: RocksDB automatically decides this based on the @@ -514,7 +530,10 @@ struct DBOptions { // If you're increasing this, also consider increasing number of threads in // LOW priority thread pool. For more information, see // Env::SetBackgroundThreads + // // Default: -1 + // + // Dynamically changeable through SetDBOptions() API. int max_background_compactions = -1; // This value represents the maximum number of threads that will @@ -643,7 +662,10 @@ struct DBOptions { bool skip_log_error_on_recovery = false; // if not zero, dump rocksdb.stats to LOG every stats_dump_period_sec + // // Default: 600 (10 min) + // + // Dynamically changeable through SetDBOptions() API. unsigned int stats_dump_period_sec = 600; // If set true, will hint the underlying file system that the file @@ -710,6 +732,8 @@ struct DBOptions { // true. // // Default: 0 + // + // Dynamically changeable through SetDBOptions() API. size_t compaction_readahead_size = 0; // This is a maximum buffer size that is used by WinMmapReadableFile in @@ -736,6 +760,8 @@ struct DBOptions { // write requests if the logical sector size is unusual // // Default: 1024 * 1024 (1 MB) + // + // Dynamically changeable through SetDBOptions() API. size_t writable_file_max_buffer_size = 1024 * 1024; @@ -758,17 +784,23 @@ struct DBOptions { // to smooth out write I/Os over time. Users shouldn't rely on it for // persistency guarantee. // Issue one request for every bytes_per_sync written. 0 turns it off. - // Default: 0 // // You may consider using rate_limiter to regulate write rate to device. // When rate limiter is enabled, it automatically enables bytes_per_sync // to 1MB. // // This option applies to table files + // + // Default: 0, turned off + // + // Dynamically changeable through SetDBOptions() API. uint64_t bytes_per_sync = 0; // Same as bytes_per_sync, but applies to WAL files + // // Default: 0, turned off + // + // Dynamically changeable through SetDBOptions() API. uint64_t wal_bytes_per_sync = 0; // A vector of EventListeners which callback functions will be called @@ -795,6 +827,8 @@ struct DBOptions { // Unit: byte per second. // // Default: 0 + // + // Dynamically changeable through SetDBOptions() API. uint64_t delayed_write_rate = 0; // By default, a single write thread queue is maintained. The thread gets @@ -944,6 +978,20 @@ struct DBOptions { // relies on manual invocation of FlushWAL to write the WAL buffer to its // file. bool manual_wal_flush = false; + + // If true, RocksDB supports flushing multiple column families and committing + // their results atomically to MANIFEST. Note that it is not + // necessary to set atomic_flush to true if WAL is always enabled since WAL + // allows the database to be restored to the last persistent state in WAL. + // This option is useful when there are column families with writes NOT + // protected by WAL. + // For manual flush, application has to specify which column families to + // flush atomically in DB::Flush. + // For auto-triggered flush, RocksDB atomically flushes ALL column families. + // + // Currently, any WAL-enabled writes after atomic flush may be replayed + // independently if the process crashes later and tries to recover. + bool atomic_flush = false; }; // Options to control the behavior of a database (passed to DB::Open) @@ -1183,8 +1231,13 @@ struct FlushOptions { // If true, the flush will wait until the flush is done. // Default: true bool wait; - - FlushOptions() : wait(true) {} + // If true, the flush would proceed immediately even it means writes will + // stall for the duration of the flush; if false the operation will wait + // until it's possible to do flush w/o causing stall or until required flush + // is performed by someone else (foreground call or background thread). + // Default: false + bool allow_write_stall; + FlushOptions() : wait(true), allow_write_stall(false) {} }; // Create a Logger from provided DBOptions @@ -1196,6 +1249,9 @@ extern Status CreateLoggerFromOptions(const std::string& dbname, struct CompactionOptions { // Compaction output compression type // Default: snappy + // If set to `kDisableCompressionOption`, RocksDB will choose compression type + // according to the `ColumnFamilyOptions`, taking into account the output + // level if `compression_per_level` is specified. CompressionType compression; // Compaction will create files of size `output_file_size_limit`. // Default: MAX, which means that compaction will create a single file @@ -1281,8 +1337,11 @@ struct IngestExternalFileOptions { bool write_global_seqno = true; }; -struct TraceOptions {}; +// TraceOptions is used for StartTrace +struct TraceOptions { + // To avoid the trace file size grows large than the storage space, + // user can set the max trace file size in Bytes. Default is 64GB + uint64_t max_trace_file_size = uint64_t{64} * 1024 * 1024 * 1024; +}; } // namespace rocksdb - -#endif // STORAGE_ROCKSDB_INCLUDE_OPTIONS_H_ diff --git a/3rdParty/rocksdb/v5.16.X/include/rocksdb/perf_context.h b/3rdParty/rocksdb/v5.18.X/include/rocksdb/perf_context.h similarity index 85% rename from 3rdParty/rocksdb/v5.16.X/include/rocksdb/perf_context.h rename to 3rdParty/rocksdb/v5.18.X/include/rocksdb/perf_context.h index c3d61a3f62..1b11fa3cc2 100644 --- a/3rdParty/rocksdb/v5.16.X/include/rocksdb/perf_context.h +++ b/3rdParty/rocksdb/v5.18.X/include/rocksdb/perf_context.h @@ -3,9 +3,9 @@ // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). -#ifndef STORAGE_ROCKSDB_INCLUDE_PERF_CONTEXT_H -#define STORAGE_ROCKSDB_INCLUDE_PERF_CONTEXT_H +#pragma once +#include #include #include @@ -17,12 +17,44 @@ namespace rocksdb { // and transparently. // Use SetPerfLevel(PerfLevel::kEnableTime) to enable time stats. +// Break down performance counters by level and store per-level perf context in +// PerfContextByLevel +struct PerfContextByLevel { + // # of times bloom filter has avoided file reads, i.e., negatives. + uint64_t bloom_filter_useful = 0; + // # of times bloom FullFilter has not avoided the reads. + uint64_t bloom_filter_full_positive = 0; + // # of times bloom FullFilter has not avoided the reads and data actually + // exist. + uint64_t bloom_filter_full_true_positive = 0; + + // total number of user key returned (only include keys that are found, does + // not include keys that are deleted or merged without a final put + uint64_t user_key_return_count; + + // total nanos spent on reading data from SST files + uint64_t get_from_table_nanos; + + void Reset(); // reset all performance counters to zero +}; + struct PerfContext { + ~PerfContext(); + void Reset(); // reset all performance counters to zero std::string ToString(bool exclude_zero_counters = false) const; + // enable per level perf context and allocate storage for PerfContextByLevel + void EnablePerLevelPerfContext(); + + // temporarily disable per level perf contxt by setting the flag to false + void DisablePerLevelPerfContext(); + + // free the space for PerfContextByLevel, also disable per level perf context + void ClearPerLevelPerfContext(); + uint64_t user_key_comparison_count; // total number of user key comparisons uint64_t block_cache_hit_count; // total number of block cache hits uint64_t block_read_count; // total number of block reads (with IO) @@ -169,6 +201,8 @@ struct PerfContext { uint64_t env_lock_file_nanos; uint64_t env_unlock_file_nanos; uint64_t env_new_logger_nanos; + std::map* level_to_perf_context; + bool per_level_perf_context_enabled; }; // Get Thread-local PerfContext object pointer @@ -176,5 +210,3 @@ struct PerfContext { PerfContext* get_perf_context(); } - -#endif diff --git a/3rdParty/rocksdb/v5.16.X/include/rocksdb/perf_level.h b/3rdParty/rocksdb/v5.18.X/include/rocksdb/perf_level.h similarity index 90% rename from 3rdParty/rocksdb/v5.16.X/include/rocksdb/perf_level.h rename to 3rdParty/rocksdb/v5.18.X/include/rocksdb/perf_level.h index 84a331c355..218c6015f8 100644 --- a/3rdParty/rocksdb/v5.16.X/include/rocksdb/perf_level.h +++ b/3rdParty/rocksdb/v5.18.X/include/rocksdb/perf_level.h @@ -3,8 +3,7 @@ // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). -#ifndef INCLUDE_ROCKSDB_PERF_LEVEL_H_ -#define INCLUDE_ROCKSDB_PERF_LEVEL_H_ +#pragma once #include #include @@ -29,5 +28,3 @@ void SetPerfLevel(PerfLevel level); PerfLevel GetPerfLevel(); } // namespace rocksdb - -#endif // INCLUDE_ROCKSDB_PERF_LEVEL_H_ diff --git a/3rdParty/rocksdb/v5.16.X/include/rocksdb/persistent_cache.h b/3rdParty/rocksdb/v5.18.X/include/rocksdb/persistent_cache.h similarity index 100% rename from 3rdParty/rocksdb/v5.16.X/include/rocksdb/persistent_cache.h rename to 3rdParty/rocksdb/v5.18.X/include/rocksdb/persistent_cache.h diff --git a/3rdParty/rocksdb/v5.16.X/include/rocksdb/rate_limiter.h b/3rdParty/rocksdb/v5.18.X/include/rocksdb/rate_limiter.h similarity index 100% rename from 3rdParty/rocksdb/v5.16.X/include/rocksdb/rate_limiter.h rename to 3rdParty/rocksdb/v5.18.X/include/rocksdb/rate_limiter.h diff --git a/3rdParty/rocksdb/v5.16.X/include/rocksdb/slice.h b/3rdParty/rocksdb/v5.18.X/include/rocksdb/slice.h similarity index 97% rename from 3rdParty/rocksdb/v5.16.X/include/rocksdb/slice.h rename to 3rdParty/rocksdb/v5.18.X/include/rocksdb/slice.h index 2eeedb9cf6..9ccbdc51e5 100644 --- a/3rdParty/rocksdb/v5.16.X/include/rocksdb/slice.h +++ b/3rdParty/rocksdb/v5.18.X/include/rocksdb/slice.h @@ -16,8 +16,7 @@ // non-const method, all threads accessing the same Slice must use // external synchronization. -#ifndef STORAGE_ROCKSDB_INCLUDE_SLICE_H_ -#define STORAGE_ROCKSDB_INCLUDE_SLICE_H_ +#pragma once #include #include @@ -48,7 +47,7 @@ class Slice { #ifdef __cpp_lib_string_view // Create a slice that refers to the same contents as "sv" /* implicit */ - Slice(std::string_view sv) : data_(sv.data()), size_(sv.size()) { } + Slice(std::string_view sv) : data_(sv.data()), size_(sv.size()) {} #endif // Create a slice that refers to s[0,strlen(s)-1] @@ -256,6 +255,4 @@ inline size_t Slice::difference_offset(const Slice& b) const { return off; } -} // namespace rocksdb - -#endif // STORAGE_ROCKSDB_INCLUDE_SLICE_H_ +} // namespace rocksdb \ No newline at end of file diff --git a/3rdParty/rocksdb/v5.16.X/include/rocksdb/slice_transform.h b/3rdParty/rocksdb/v5.18.X/include/rocksdb/slice_transform.h similarity index 96% rename from 3rdParty/rocksdb/v5.16.X/include/rocksdb/slice_transform.h rename to 3rdParty/rocksdb/v5.18.X/include/rocksdb/slice_transform.h index 5a461b7769..2bbe06153a 100644 --- a/3rdParty/rocksdb/v5.16.X/include/rocksdb/slice_transform.h +++ b/3rdParty/rocksdb/v5.18.X/include/rocksdb/slice_transform.h @@ -12,8 +12,7 @@ // define InDomain and InRange to determine which slices are in either // of these sets respectively. -#ifndef STORAGE_ROCKSDB_INCLUDE_SLICE_TRANSFORM_H_ -#define STORAGE_ROCKSDB_INCLUDE_SLICE_TRANSFORM_H_ +#pragma once #include @@ -100,5 +99,3 @@ extern const SliceTransform* NewCappedPrefixTransform(size_t cap_len); extern const SliceTransform* NewNoopTransform(); } - -#endif // STORAGE_ROCKSDB_INCLUDE_SLICE_TRANSFORM_H_ diff --git a/3rdParty/rocksdb/v5.16.X/include/rocksdb/snapshot.h b/3rdParty/rocksdb/v5.18.X/include/rocksdb/snapshot.h similarity index 100% rename from 3rdParty/rocksdb/v5.16.X/include/rocksdb/snapshot.h rename to 3rdParty/rocksdb/v5.18.X/include/rocksdb/snapshot.h diff --git a/3rdParty/rocksdb/v5.16.X/include/rocksdb/sst_dump_tool.h b/3rdParty/rocksdb/v5.18.X/include/rocksdb/sst_dump_tool.h similarity index 100% rename from 3rdParty/rocksdb/v5.16.X/include/rocksdb/sst_dump_tool.h rename to 3rdParty/rocksdb/v5.18.X/include/rocksdb/sst_dump_tool.h diff --git a/3rdParty/rocksdb/v5.16.X/include/rocksdb/sst_file_manager.h b/3rdParty/rocksdb/v5.18.X/include/rocksdb/sst_file_manager.h similarity index 100% rename from 3rdParty/rocksdb/v5.16.X/include/rocksdb/sst_file_manager.h rename to 3rdParty/rocksdb/v5.18.X/include/rocksdb/sst_file_manager.h diff --git a/3rdParty/rocksdb/v5.18.X/include/rocksdb/sst_file_reader.h b/3rdParty/rocksdb/v5.18.X/include/rocksdb/sst_file_reader.h new file mode 100644 index 0000000000..e58c84792e --- /dev/null +++ b/3rdParty/rocksdb/v5.18.X/include/rocksdb/sst_file_reader.h @@ -0,0 +1,45 @@ +// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. +// This source code is licensed under both the GPLv2 (found in the +// COPYING file in the root directory) and Apache 2.0 License +// (found in the LICENSE.Apache file in the root directory). + +#pragma once + +#ifndef ROCKSDB_LITE + +#include "rocksdb/slice.h" +#include "rocksdb/options.h" +#include "rocksdb/iterator.h" +#include "rocksdb/table_properties.h" + +namespace rocksdb { + +// SstFileReader is used to read sst files that are generated by DB or +// SstFileWriter. +class SstFileReader { + public: + SstFileReader(const Options& options); + + ~SstFileReader(); + + // Prepares to read from the file located at "file_path". + Status Open(const std::string& file_path); + + // Returns a new iterator over the table contents. + // Most read options provide the same control as we read from DB. + // If "snapshot" is nullptr, the iterator returns only the latest keys. + Iterator* NewIterator(const ReadOptions& options); + + std::shared_ptr GetTableProperties() const; + + // Verifies whether there is corruption in this table. + Status VerifyChecksum(); + + private: + struct Rep; + std::unique_ptr rep_; +}; + +} // namespace rocksdb + +#endif // !ROCKSDB_LITE diff --git a/3rdParty/rocksdb/v5.16.X/include/rocksdb/sst_file_writer.h b/3rdParty/rocksdb/v5.18.X/include/rocksdb/sst_file_writer.h similarity index 100% rename from 3rdParty/rocksdb/v5.16.X/include/rocksdb/sst_file_writer.h rename to 3rdParty/rocksdb/v5.18.X/include/rocksdb/sst_file_writer.h index 97e3e30d70..24bcdbd135 100644 --- a/3rdParty/rocksdb/v5.16.X/include/rocksdb/sst_file_writer.h +++ b/3rdParty/rocksdb/v5.18.X/include/rocksdb/sst_file_writer.h @@ -3,10 +3,10 @@ // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). -#ifndef ROCKSDB_LITE - #pragma once +#ifndef ROCKSDB_LITE + #include #include diff --git a/3rdParty/rocksdb/v5.16.X/include/rocksdb/statistics.h b/3rdParty/rocksdb/v5.18.X/include/rocksdb/statistics.h similarity index 58% rename from 3rdParty/rocksdb/v5.16.X/include/rocksdb/statistics.h rename to 3rdParty/rocksdb/v5.18.X/include/rocksdb/statistics.h index 30e79b0998..14e6195fae 100644 --- a/3rdParty/rocksdb/v5.16.X/include/rocksdb/statistics.h +++ b/3rdParty/rocksdb/v5.18.X/include/rocksdb/statistics.h @@ -3,8 +3,7 @@ // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). -#ifndef STORAGE_ROCKSDB_INCLUDE_STATISTICS_H_ -#define STORAGE_ROCKSDB_INCLUDE_STATISTICS_H_ +#pragma once #include #include @@ -156,7 +155,8 @@ enum Tickers : uint32_t { // Disabled by default. To enable it set stats level to kAll DB_MUTEX_WAIT_MICROS, RATE_LIMIT_DELAY_MILLIS, - NO_ITERATORS, // number of iterators currently open + // DEPRECATED number of iterators currently open + NO_ITERATORS, // Number of MultiGet calls, keys read, and bytes read NUMBER_MULTIGET_CALLS, @@ -323,159 +323,15 @@ enum Tickers : uint32_t { // Number of keys actually found in MultiGet calls (vs number requested by caller) // NUMBER_MULTIGET_KEYS_READ gives the number requested by caller NUMBER_MULTIGET_KEYS_FOUND, + + NO_ITERATOR_CREATED, // number of iterators created + NO_ITERATOR_DELETED, // number of iterators deleted TICKER_ENUM_MAX }; // The order of items listed in Tickers should be the same as // the order listed in TickersNameMap -const std::vector> TickersNameMap = { - {BLOCK_CACHE_MISS, "rocksdb.block.cache.miss"}, - {BLOCK_CACHE_HIT, "rocksdb.block.cache.hit"}, - {BLOCK_CACHE_ADD, "rocksdb.block.cache.add"}, - {BLOCK_CACHE_ADD_FAILURES, "rocksdb.block.cache.add.failures"}, - {BLOCK_CACHE_INDEX_MISS, "rocksdb.block.cache.index.miss"}, - {BLOCK_CACHE_INDEX_HIT, "rocksdb.block.cache.index.hit"}, - {BLOCK_CACHE_INDEX_ADD, "rocksdb.block.cache.index.add"}, - {BLOCK_CACHE_INDEX_BYTES_INSERT, "rocksdb.block.cache.index.bytes.insert"}, - {BLOCK_CACHE_INDEX_BYTES_EVICT, "rocksdb.block.cache.index.bytes.evict"}, - {BLOCK_CACHE_FILTER_MISS, "rocksdb.block.cache.filter.miss"}, - {BLOCK_CACHE_FILTER_HIT, "rocksdb.block.cache.filter.hit"}, - {BLOCK_CACHE_FILTER_ADD, "rocksdb.block.cache.filter.add"}, - {BLOCK_CACHE_FILTER_BYTES_INSERT, - "rocksdb.block.cache.filter.bytes.insert"}, - {BLOCK_CACHE_FILTER_BYTES_EVICT, "rocksdb.block.cache.filter.bytes.evict"}, - {BLOCK_CACHE_DATA_MISS, "rocksdb.block.cache.data.miss"}, - {BLOCK_CACHE_DATA_HIT, "rocksdb.block.cache.data.hit"}, - {BLOCK_CACHE_DATA_ADD, "rocksdb.block.cache.data.add"}, - {BLOCK_CACHE_DATA_BYTES_INSERT, "rocksdb.block.cache.data.bytes.insert"}, - {BLOCK_CACHE_BYTES_READ, "rocksdb.block.cache.bytes.read"}, - {BLOCK_CACHE_BYTES_WRITE, "rocksdb.block.cache.bytes.write"}, - {BLOOM_FILTER_USEFUL, "rocksdb.bloom.filter.useful"}, - {BLOOM_FILTER_FULL_POSITIVE, "rocksdb.bloom.filter.full.positive"}, - {BLOOM_FILTER_FULL_TRUE_POSITIVE, - "rocksdb.bloom.filter.full.true.positive"}, - {PERSISTENT_CACHE_HIT, "rocksdb.persistent.cache.hit"}, - {PERSISTENT_CACHE_MISS, "rocksdb.persistent.cache.miss"}, - {SIM_BLOCK_CACHE_HIT, "rocksdb.sim.block.cache.hit"}, - {SIM_BLOCK_CACHE_MISS, "rocksdb.sim.block.cache.miss"}, - {MEMTABLE_HIT, "rocksdb.memtable.hit"}, - {MEMTABLE_MISS, "rocksdb.memtable.miss"}, - {GET_HIT_L0, "rocksdb.l0.hit"}, - {GET_HIT_L1, "rocksdb.l1.hit"}, - {GET_HIT_L2_AND_UP, "rocksdb.l2andup.hit"}, - {COMPACTION_KEY_DROP_NEWER_ENTRY, "rocksdb.compaction.key.drop.new"}, - {COMPACTION_KEY_DROP_OBSOLETE, "rocksdb.compaction.key.drop.obsolete"}, - {COMPACTION_KEY_DROP_RANGE_DEL, "rocksdb.compaction.key.drop.range_del"}, - {COMPACTION_KEY_DROP_USER, "rocksdb.compaction.key.drop.user"}, - {COMPACTION_RANGE_DEL_DROP_OBSOLETE, - "rocksdb.compaction.range_del.drop.obsolete"}, - {COMPACTION_OPTIMIZED_DEL_DROP_OBSOLETE, - "rocksdb.compaction.optimized.del.drop.obsolete"}, - {COMPACTION_CANCELLED, "rocksdb.compaction.cancelled"}, - {NUMBER_KEYS_WRITTEN, "rocksdb.number.keys.written"}, - {NUMBER_KEYS_READ, "rocksdb.number.keys.read"}, - {NUMBER_KEYS_UPDATED, "rocksdb.number.keys.updated"}, - {BYTES_WRITTEN, "rocksdb.bytes.written"}, - {BYTES_READ, "rocksdb.bytes.read"}, - {NUMBER_DB_SEEK, "rocksdb.number.db.seek"}, - {NUMBER_DB_NEXT, "rocksdb.number.db.next"}, - {NUMBER_DB_PREV, "rocksdb.number.db.prev"}, - {NUMBER_DB_SEEK_FOUND, "rocksdb.number.db.seek.found"}, - {NUMBER_DB_NEXT_FOUND, "rocksdb.number.db.next.found"}, - {NUMBER_DB_PREV_FOUND, "rocksdb.number.db.prev.found"}, - {ITER_BYTES_READ, "rocksdb.db.iter.bytes.read"}, - {NO_FILE_CLOSES, "rocksdb.no.file.closes"}, - {NO_FILE_OPENS, "rocksdb.no.file.opens"}, - {NO_FILE_ERRORS, "rocksdb.no.file.errors"}, - {STALL_L0_SLOWDOWN_MICROS, "rocksdb.l0.slowdown.micros"}, - {STALL_MEMTABLE_COMPACTION_MICROS, "rocksdb.memtable.compaction.micros"}, - {STALL_L0_NUM_FILES_MICROS, "rocksdb.l0.num.files.stall.micros"}, - {STALL_MICROS, "rocksdb.stall.micros"}, - {DB_MUTEX_WAIT_MICROS, "rocksdb.db.mutex.wait.micros"}, - {RATE_LIMIT_DELAY_MILLIS, "rocksdb.rate.limit.delay.millis"}, - {NO_ITERATORS, "rocksdb.num.iterators"}, - {NUMBER_MULTIGET_CALLS, "rocksdb.number.multiget.get"}, - {NUMBER_MULTIGET_KEYS_READ, "rocksdb.number.multiget.keys.read"}, - {NUMBER_MULTIGET_BYTES_READ, "rocksdb.number.multiget.bytes.read"}, - {NUMBER_FILTERED_DELETES, "rocksdb.number.deletes.filtered"}, - {NUMBER_MERGE_FAILURES, "rocksdb.number.merge.failures"}, - {BLOOM_FILTER_PREFIX_CHECKED, "rocksdb.bloom.filter.prefix.checked"}, - {BLOOM_FILTER_PREFIX_USEFUL, "rocksdb.bloom.filter.prefix.useful"}, - {NUMBER_OF_RESEEKS_IN_ITERATION, "rocksdb.number.reseeks.iteration"}, - {GET_UPDATES_SINCE_CALLS, "rocksdb.getupdatessince.calls"}, - {BLOCK_CACHE_COMPRESSED_MISS, "rocksdb.block.cachecompressed.miss"}, - {BLOCK_CACHE_COMPRESSED_HIT, "rocksdb.block.cachecompressed.hit"}, - {BLOCK_CACHE_COMPRESSED_ADD, "rocksdb.block.cachecompressed.add"}, - {BLOCK_CACHE_COMPRESSED_ADD_FAILURES, - "rocksdb.block.cachecompressed.add.failures"}, - {WAL_FILE_SYNCED, "rocksdb.wal.synced"}, - {WAL_FILE_BYTES, "rocksdb.wal.bytes"}, - {WRITE_DONE_BY_SELF, "rocksdb.write.self"}, - {WRITE_DONE_BY_OTHER, "rocksdb.write.other"}, - {WRITE_TIMEDOUT, "rocksdb.write.timeout"}, - {WRITE_WITH_WAL, "rocksdb.write.wal"}, - {COMPACT_READ_BYTES, "rocksdb.compact.read.bytes"}, - {COMPACT_WRITE_BYTES, "rocksdb.compact.write.bytes"}, - {FLUSH_WRITE_BYTES, "rocksdb.flush.write.bytes"}, - {NUMBER_DIRECT_LOAD_TABLE_PROPERTIES, - "rocksdb.number.direct.load.table.properties"}, - {NUMBER_SUPERVERSION_ACQUIRES, "rocksdb.number.superversion_acquires"}, - {NUMBER_SUPERVERSION_RELEASES, "rocksdb.number.superversion_releases"}, - {NUMBER_SUPERVERSION_CLEANUPS, "rocksdb.number.superversion_cleanups"}, - {NUMBER_BLOCK_COMPRESSED, "rocksdb.number.block.compressed"}, - {NUMBER_BLOCK_DECOMPRESSED, "rocksdb.number.block.decompressed"}, - {NUMBER_BLOCK_NOT_COMPRESSED, "rocksdb.number.block.not_compressed"}, - {MERGE_OPERATION_TOTAL_TIME, "rocksdb.merge.operation.time.nanos"}, - {FILTER_OPERATION_TOTAL_TIME, "rocksdb.filter.operation.time.nanos"}, - {ROW_CACHE_HIT, "rocksdb.row.cache.hit"}, - {ROW_CACHE_MISS, "rocksdb.row.cache.miss"}, - {READ_AMP_ESTIMATE_USEFUL_BYTES, "rocksdb.read.amp.estimate.useful.bytes"}, - {READ_AMP_TOTAL_READ_BYTES, "rocksdb.read.amp.total.read.bytes"}, - {NUMBER_RATE_LIMITER_DRAINS, "rocksdb.number.rate_limiter.drains"}, - {NUMBER_ITER_SKIP, "rocksdb.number.iter.skip"}, - {BLOB_DB_NUM_PUT, "rocksdb.blobdb.num.put"}, - {BLOB_DB_NUM_WRITE, "rocksdb.blobdb.num.write"}, - {BLOB_DB_NUM_GET, "rocksdb.blobdb.num.get"}, - {BLOB_DB_NUM_MULTIGET, "rocksdb.blobdb.num.multiget"}, - {BLOB_DB_NUM_SEEK, "rocksdb.blobdb.num.seek"}, - {BLOB_DB_NUM_NEXT, "rocksdb.blobdb.num.next"}, - {BLOB_DB_NUM_PREV, "rocksdb.blobdb.num.prev"}, - {BLOB_DB_NUM_KEYS_WRITTEN, "rocksdb.blobdb.num.keys.written"}, - {BLOB_DB_NUM_KEYS_READ, "rocksdb.blobdb.num.keys.read"}, - {BLOB_DB_BYTES_WRITTEN, "rocksdb.blobdb.bytes.written"}, - {BLOB_DB_BYTES_READ, "rocksdb.blobdb.bytes.read"}, - {BLOB_DB_WRITE_INLINED, "rocksdb.blobdb.write.inlined"}, - {BLOB_DB_WRITE_INLINED_TTL, "rocksdb.blobdb.write.inlined.ttl"}, - {BLOB_DB_WRITE_BLOB, "rocksdb.blobdb.write.blob"}, - {BLOB_DB_WRITE_BLOB_TTL, "rocksdb.blobdb.write.blob.ttl"}, - {BLOB_DB_BLOB_FILE_BYTES_WRITTEN, "rocksdb.blobdb.blob.file.bytes.written"}, - {BLOB_DB_BLOB_FILE_BYTES_READ, "rocksdb.blobdb.blob.file.bytes.read"}, - {BLOB_DB_BLOB_FILE_SYNCED, "rocksdb.blobdb.blob.file.synced"}, - {BLOB_DB_BLOB_INDEX_EXPIRED_COUNT, - "rocksdb.blobdb.blob.index.expired.count"}, - {BLOB_DB_BLOB_INDEX_EXPIRED_SIZE, "rocksdb.blobdb.blob.index.expired.size"}, - {BLOB_DB_BLOB_INDEX_EVICTED_COUNT, - "rocksdb.blobdb.blob.index.evicted.count"}, - {BLOB_DB_BLOB_INDEX_EVICTED_SIZE, "rocksdb.blobdb.blob.index.evicted.size"}, - {BLOB_DB_GC_NUM_FILES, "rocksdb.blobdb.gc.num.files"}, - {BLOB_DB_GC_NUM_NEW_FILES, "rocksdb.blobdb.gc.num.new.files"}, - {BLOB_DB_GC_FAILURES, "rocksdb.blobdb.gc.failures"}, - {BLOB_DB_GC_NUM_KEYS_OVERWRITTEN, "rocksdb.blobdb.gc.num.keys.overwritten"}, - {BLOB_DB_GC_NUM_KEYS_EXPIRED, "rocksdb.blobdb.gc.num.keys.expired"}, - {BLOB_DB_GC_NUM_KEYS_RELOCATED, "rocksdb.blobdb.gc.num.keys.relocated"}, - {BLOB_DB_GC_BYTES_OVERWRITTEN, "rocksdb.blobdb.gc.bytes.overwritten"}, - {BLOB_DB_GC_BYTES_EXPIRED, "rocksdb.blobdb.gc.bytes.expired"}, - {BLOB_DB_GC_BYTES_RELOCATED, "rocksdb.blobdb.gc.bytes.relocated"}, - {BLOB_DB_FIFO_NUM_FILES_EVICTED, "rocksdb.blobdb.fifo.num.files.evicted"}, - {BLOB_DB_FIFO_NUM_KEYS_EVICTED, "rocksdb.blobdb.fifo.num.keys.evicted"}, - {BLOB_DB_FIFO_BYTES_EVICTED, "rocksdb.blobdb.fifo.bytes.evicted"}, - {TXN_PREPARE_MUTEX_OVERHEAD, "rocksdb.txn.overhead.mutex.prepare"}, - {TXN_OLD_COMMIT_MAP_MUTEX_OVERHEAD, - "rocksdb.txn.overhead.mutex.old.commit.map"}, - {TXN_DUPLICATE_KEY_OVERHEAD, "rocksdb.txn.overhead.duplicate.key"}, - {TXN_SNAPSHOT_MUTEX_OVERHEAD, "rocksdb.txn.overhead.mutex.snapshot"}, - {NUMBER_MULTIGET_KEYS_FOUND, "rocksdb.number.multiget.keys.found"}, -}; +extern const std::vector> TickersNameMap; /** * Keep adding histogram's here. @@ -558,57 +414,10 @@ enum Histograms : uint32_t { // Time spent flushing memtable to disk FLUSH_TIME, - HISTOGRAM_ENUM_MAX, // TODO(ldemailly): enforce HistogramsNameMap match + HISTOGRAM_ENUM_MAX, }; -const std::vector> HistogramsNameMap = { - {DB_GET, "rocksdb.db.get.micros"}, - {DB_WRITE, "rocksdb.db.write.micros"}, - {COMPACTION_TIME, "rocksdb.compaction.times.micros"}, - {SUBCOMPACTION_SETUP_TIME, "rocksdb.subcompaction.setup.times.micros"}, - {TABLE_SYNC_MICROS, "rocksdb.table.sync.micros"}, - {COMPACTION_OUTFILE_SYNC_MICROS, "rocksdb.compaction.outfile.sync.micros"}, - {WAL_FILE_SYNC_MICROS, "rocksdb.wal.file.sync.micros"}, - {MANIFEST_FILE_SYNC_MICROS, "rocksdb.manifest.file.sync.micros"}, - {TABLE_OPEN_IO_MICROS, "rocksdb.table.open.io.micros"}, - {DB_MULTIGET, "rocksdb.db.multiget.micros"}, - {READ_BLOCK_COMPACTION_MICROS, "rocksdb.read.block.compaction.micros"}, - {READ_BLOCK_GET_MICROS, "rocksdb.read.block.get.micros"}, - {WRITE_RAW_BLOCK_MICROS, "rocksdb.write.raw.block.micros"}, - {STALL_L0_SLOWDOWN_COUNT, "rocksdb.l0.slowdown.count"}, - {STALL_MEMTABLE_COMPACTION_COUNT, "rocksdb.memtable.compaction.count"}, - {STALL_L0_NUM_FILES_COUNT, "rocksdb.num.files.stall.count"}, - {HARD_RATE_LIMIT_DELAY_COUNT, "rocksdb.hard.rate.limit.delay.count"}, - {SOFT_RATE_LIMIT_DELAY_COUNT, "rocksdb.soft.rate.limit.delay.count"}, - {NUM_FILES_IN_SINGLE_COMPACTION, "rocksdb.numfiles.in.singlecompaction"}, - {DB_SEEK, "rocksdb.db.seek.micros"}, - {WRITE_STALL, "rocksdb.db.write.stall"}, - {SST_READ_MICROS, "rocksdb.sst.read.micros"}, - {NUM_SUBCOMPACTIONS_SCHEDULED, "rocksdb.num.subcompactions.scheduled"}, - {BYTES_PER_READ, "rocksdb.bytes.per.read"}, - {BYTES_PER_WRITE, "rocksdb.bytes.per.write"}, - {BYTES_PER_MULTIGET, "rocksdb.bytes.per.multiget"}, - {BYTES_COMPRESSED, "rocksdb.bytes.compressed"}, - {BYTES_DECOMPRESSED, "rocksdb.bytes.decompressed"}, - {COMPRESSION_TIMES_NANOS, "rocksdb.compression.times.nanos"}, - {DECOMPRESSION_TIMES_NANOS, "rocksdb.decompression.times.nanos"}, - {READ_NUM_MERGE_OPERANDS, "rocksdb.read.num.merge_operands"}, - {BLOB_DB_KEY_SIZE, "rocksdb.blobdb.key.size"}, - {BLOB_DB_VALUE_SIZE, "rocksdb.blobdb.value.size"}, - {BLOB_DB_WRITE_MICROS, "rocksdb.blobdb.write.micros"}, - {BLOB_DB_GET_MICROS, "rocksdb.blobdb.get.micros"}, - {BLOB_DB_MULTIGET_MICROS, "rocksdb.blobdb.multiget.micros"}, - {BLOB_DB_SEEK_MICROS, "rocksdb.blobdb.seek.micros"}, - {BLOB_DB_NEXT_MICROS, "rocksdb.blobdb.next.micros"}, - {BLOB_DB_PREV_MICROS, "rocksdb.blobdb.prev.micros"}, - {BLOB_DB_BLOB_FILE_WRITE_MICROS, "rocksdb.blobdb.blob.file.write.micros"}, - {BLOB_DB_BLOB_FILE_READ_MICROS, "rocksdb.blobdb.blob.file.read.micros"}, - {BLOB_DB_BLOB_FILE_SYNC_MICROS, "rocksdb.blobdb.blob.file.sync.micros"}, - {BLOB_DB_GC_MICROS, "rocksdb.blobdb.gc.micros"}, - {BLOB_DB_COMPRESSION_MICROS, "rocksdb.blobdb.compression.micros"}, - {BLOB_DB_DECOMPRESSION_MICROS, "rocksdb.blobdb.decompression.micros"}, - {FLUSH_TIME, "rocksdb.db.flush.micros"}, -}; +extern const std::vector> HistogramsNameMap; struct HistogramData { double median; @@ -673,5 +482,3 @@ class Statistics { std::shared_ptr CreateDBStatistics(); } // namespace rocksdb - -#endif // STORAGE_ROCKSDB_INCLUDE_STATISTICS_H_ diff --git a/3rdParty/rocksdb/v5.16.X/include/rocksdb/status.h b/3rdParty/rocksdb/v5.18.X/include/rocksdb/status.h similarity index 98% rename from 3rdParty/rocksdb/v5.16.X/include/rocksdb/status.h rename to 3rdParty/rocksdb/v5.18.X/include/rocksdb/status.h index db41c3efdb..40b374ecf6 100644 --- a/3rdParty/rocksdb/v5.16.X/include/rocksdb/status.h +++ b/3rdParty/rocksdb/v5.18.X/include/rocksdb/status.h @@ -14,8 +14,7 @@ // non-const method, all threads accessing the same Status must use // external synchronization. -#ifndef STORAGE_ROCKSDB_INCLUDE_STATUS_H_ -#define STORAGE_ROCKSDB_INCLUDE_STATUS_H_ +#pragma once #include #include "rocksdb/slice.h" @@ -348,5 +347,3 @@ inline bool Status::operator!=(const Status& rhs) const { } } // namespace rocksdb - -#endif // STORAGE_ROCKSDB_INCLUDE_STATUS_H_ diff --git a/3rdParty/rocksdb/v5.16.X/include/rocksdb/table.h b/3rdParty/rocksdb/v5.18.X/include/rocksdb/table.h similarity index 96% rename from 3rdParty/rocksdb/v5.16.X/include/rocksdb/table.h rename to 3rdParty/rocksdb/v5.18.X/include/rocksdb/table.h index 701fff8b4a..a99c8bf6e7 100644 --- a/3rdParty/rocksdb/v5.16.X/include/rocksdb/table.h +++ b/3rdParty/rocksdb/v5.18.X/include/rocksdb/table.h @@ -16,6 +16,7 @@ // https://github.com/facebook/rocksdb/wiki/A-Tutorial-of-RocksDB-SST-formats#wiki-examples #pragma once + #include #include #include @@ -46,6 +47,7 @@ enum ChecksumType : char { kNoChecksum = 0x0, kCRC32c = 0x1, kxxHash = 0x2, + kxxHash64 = 0x3, }; // For advanced user only @@ -136,6 +138,8 @@ struct BlockBasedTableOptions { // If non-NULL use the specified cache for compressed blocks. // If NULL, rocksdb will not use a compressed block cache. + // Note: though it looks similar to `block_cache`, RocksDB doesn't put the + // same type of object there. std::shared_ptr block_cache_compressed = nullptr; // Approximate size of user data packed per block. Note that the @@ -238,6 +242,12 @@ struct BlockBasedTableOptions { // version 5.15, you should probably use this. // This option only affects newly written tables. When reading existing // tables, the information about version is read from the footer. + // 4 -- Can be read by RocksDB's versions since 5.16. Changes the way we + // encode the values in index blocks. If you don't plan to run RocksDB before + // version 5.16 and you are using index_block_restart_interval > 1, you should + // probably use this as it would reduce the index size. + // This option only affects newly written tables. When reading existing + // tables, the information about version is read from the footer. uint32_t format_version = 2; // Store index blocks on disk in compressed format. Changing this option to @@ -442,7 +452,7 @@ class TableFactory { // NewTableReader() is called in three places: // (1) TableCache::FindTable() calls the function when table cache miss // and cache the table object returned. - // (2) SstFileReader (for SST Dump) opens the table and dump the table + // (2) SstFileDumper (for SST Dump) opens the table and dump the table // contents using the iterator of the table. // (3) DBImpl::IngestExternalFile() calls this function to read the contents of // the sst file it's attempting to add @@ -454,8 +464,8 @@ class TableFactory { // table_reader is the output table reader. virtual Status NewTableReader( const TableReaderOptions& table_reader_options, - unique_ptr&& file, uint64_t file_size, - unique_ptr* table_reader, + std::unique_ptr&& file, uint64_t file_size, + std::unique_ptr* table_reader, bool prefetch_index_and_filter_in_cache = true) const = 0; // Return a table builder to write to a file for this table type. diff --git a/3rdParty/rocksdb/v5.16.X/include/rocksdb/table_properties.h b/3rdParty/rocksdb/v5.18.X/include/rocksdb/table_properties.h similarity index 95% rename from 3rdParty/rocksdb/v5.16.X/include/rocksdb/table_properties.h rename to 3rdParty/rocksdb/v5.18.X/include/rocksdb/table_properties.h index d545e455ff..75c180ff4f 100644 --- a/3rdParty/rocksdb/v5.16.X/include/rocksdb/table_properties.h +++ b/3rdParty/rocksdb/v5.18.X/include/rocksdb/table_properties.h @@ -40,6 +40,8 @@ struct TablePropertiesNames { static const std::string kRawValueSize; static const std::string kNumDataBlocks; static const std::string kNumEntries; + static const std::string kDeletedKeys; + static const std::string kMergeOperands; static const std::string kNumRangeDeletions; static const std::string kFormatVersion; static const std::string kFixedKeyLen; @@ -152,6 +154,10 @@ struct TableProperties { uint64_t num_data_blocks = 0; // the number of entries in this table uint64_t num_entries = 0; + // the number of deletions in the table + uint64_t num_deletions = 0; + // the number of merge operands in the table + uint64_t num_merge_operands = 0; // the number of range deletions in this table uint64_t num_range_deletions = 0; // format version, reserved for backward compatibility @@ -216,6 +222,10 @@ struct TableProperties { // Below is a list of non-basic properties that are collected by database // itself. Especially some properties regarding to the internal keys (which // is unknown to `table`). +// +// DEPRECATED: these properties now belong as TableProperties members. Please +// use TableProperties::num_deletions and TableProperties::num_merge_operands, +// respectively. extern uint64_t GetDeletedKeys(const UserCollectedProperties& props); extern uint64_t GetMergeOperands(const UserCollectedProperties& props, bool* property_present); diff --git a/3rdParty/rocksdb/v5.16.X/include/rocksdb/thread_status.h b/3rdParty/rocksdb/v5.18.X/include/rocksdb/thread_status.h similarity index 100% rename from 3rdParty/rocksdb/v5.16.X/include/rocksdb/thread_status.h rename to 3rdParty/rocksdb/v5.18.X/include/rocksdb/thread_status.h diff --git a/3rdParty/rocksdb/v5.16.X/include/rocksdb/threadpool.h b/3rdParty/rocksdb/v5.18.X/include/rocksdb/threadpool.h similarity index 100% rename from 3rdParty/rocksdb/v5.16.X/include/rocksdb/threadpool.h rename to 3rdParty/rocksdb/v5.18.X/include/rocksdb/threadpool.h diff --git a/3rdParty/rocksdb/v5.16.X/include/rocksdb/trace_reader_writer.h b/3rdParty/rocksdb/v5.18.X/include/rocksdb/trace_reader_writer.h similarity index 97% rename from 3rdParty/rocksdb/v5.16.X/include/rocksdb/trace_reader_writer.h rename to 3rdParty/rocksdb/v5.18.X/include/rocksdb/trace_reader_writer.h index 31226487b8..28919a0fad 100644 --- a/3rdParty/rocksdb/v5.16.X/include/rocksdb/trace_reader_writer.h +++ b/3rdParty/rocksdb/v5.18.X/include/rocksdb/trace_reader_writer.h @@ -24,6 +24,7 @@ class TraceWriter { virtual Status Write(const Slice& data) = 0; virtual Status Close() = 0; + virtual uint64_t GetFileSize() = 0; }; // TraceReader allows reading RocksDB traces from any system, one operation at diff --git a/3rdParty/rocksdb/v5.16.X/include/rocksdb/transaction_log.h b/3rdParty/rocksdb/v5.18.X/include/rocksdb/transaction_log.h similarity index 94% rename from 3rdParty/rocksdb/v5.16.X/include/rocksdb/transaction_log.h rename to 3rdParty/rocksdb/v5.18.X/include/rocksdb/transaction_log.h index 7fc46ae264..cf80a633f1 100644 --- a/3rdParty/rocksdb/v5.16.X/include/rocksdb/transaction_log.h +++ b/3rdParty/rocksdb/v5.18.X/include/rocksdb/transaction_log.h @@ -3,8 +3,7 @@ // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). -#ifndef STORAGE_ROCKSDB_INCLUDE_TRANSACTION_LOG_ITERATOR_H_ -#define STORAGE_ROCKSDB_INCLUDE_TRANSACTION_LOG_ITERATOR_H_ +#pragma once #include "rocksdb/status.h" #include "rocksdb/types.h" @@ -61,7 +60,7 @@ struct BatchResult { // Add empty __ctor and __dtor for the rule of five // However, preserve the original semantics and prohibit copying - // as the unique_ptr member does not copy. + // as the std::unique_ptr member does not copy. BatchResult() {} ~BatchResult() {} @@ -121,5 +120,3 @@ class TransactionLogIterator { }; }; } // namespace rocksdb - -#endif // STORAGE_ROCKSDB_INCLUDE_TRANSACTION_LOG_ITERATOR_H_ diff --git a/3rdParty/rocksdb/v5.16.X/include/rocksdb/types.h b/3rdParty/rocksdb/v5.18.X/include/rocksdb/types.h similarity index 91% rename from 3rdParty/rocksdb/v5.16.X/include/rocksdb/types.h rename to 3rdParty/rocksdb/v5.18.X/include/rocksdb/types.h index 3a73b7d96c..0868a74157 100644 --- a/3rdParty/rocksdb/v5.16.X/include/rocksdb/types.h +++ b/3rdParty/rocksdb/v5.18.X/include/rocksdb/types.h @@ -3,8 +3,7 @@ // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). -#ifndef STORAGE_ROCKSDB_INCLUDE_TYPES_H_ -#define STORAGE_ROCKSDB_INCLUDE_TYPES_H_ +#pragma once #include #include "rocksdb/slice.h" @@ -53,5 +52,3 @@ struct FullKey { bool ParseFullKey(const Slice& internal_key, FullKey* result); } // namespace rocksdb - -#endif // STORAGE_ROCKSDB_INCLUDE_TYPES_H_ diff --git a/3rdParty/rocksdb/v5.16.X/include/rocksdb/universal_compaction.h b/3rdParty/rocksdb/v5.18.X/include/rocksdb/universal_compaction.h similarity index 95% rename from 3rdParty/rocksdb/v5.16.X/include/rocksdb/universal_compaction.h rename to 3rdParty/rocksdb/v5.18.X/include/rocksdb/universal_compaction.h index ed2220873c..04e2c849f9 100644 --- a/3rdParty/rocksdb/v5.16.X/include/rocksdb/universal_compaction.h +++ b/3rdParty/rocksdb/v5.18.X/include/rocksdb/universal_compaction.h @@ -3,8 +3,7 @@ // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). -#ifndef STORAGE_ROCKSDB_UNIVERSAL_COMPACTION_OPTIONS_H -#define STORAGE_ROCKSDB_UNIVERSAL_COMPACTION_OPTIONS_H +#pragma once #include #include @@ -86,5 +85,3 @@ class CompactionOptionsUniversal { }; } // namespace rocksdb - -#endif // STORAGE_ROCKSDB_UNIVERSAL_COMPACTION_OPTIONS_H diff --git a/3rdParty/rocksdb/v5.16.X/include/rocksdb/utilities/backupable_db.h b/3rdParty/rocksdb/v5.18.X/include/rocksdb/utilities/backupable_db.h similarity index 100% rename from 3rdParty/rocksdb/v5.16.X/include/rocksdb/utilities/backupable_db.h rename to 3rdParty/rocksdb/v5.18.X/include/rocksdb/utilities/backupable_db.h diff --git a/3rdParty/rocksdb/v5.16.X/include/rocksdb/utilities/checkpoint.h b/3rdParty/rocksdb/v5.18.X/include/rocksdb/utilities/checkpoint.h similarity index 100% rename from 3rdParty/rocksdb/v5.16.X/include/rocksdb/utilities/checkpoint.h rename to 3rdParty/rocksdb/v5.18.X/include/rocksdb/utilities/checkpoint.h diff --git a/3rdParty/rocksdb/v5.16.X/include/rocksdb/utilities/convenience.h b/3rdParty/rocksdb/v5.18.X/include/rocksdb/utilities/convenience.h similarity index 100% rename from 3rdParty/rocksdb/v5.16.X/include/rocksdb/utilities/convenience.h rename to 3rdParty/rocksdb/v5.18.X/include/rocksdb/utilities/convenience.h diff --git a/3rdParty/rocksdb/v5.16.X/include/rocksdb/utilities/date_tiered_db.h b/3rdParty/rocksdb/v5.18.X/include/rocksdb/utilities/date_tiered_db.h similarity index 100% rename from 3rdParty/rocksdb/v5.16.X/include/rocksdb/utilities/date_tiered_db.h rename to 3rdParty/rocksdb/v5.18.X/include/rocksdb/utilities/date_tiered_db.h diff --git a/3rdParty/rocksdb/v5.16.X/include/rocksdb/utilities/db_ttl.h b/3rdParty/rocksdb/v5.18.X/include/rocksdb/utilities/db_ttl.h similarity index 100% rename from 3rdParty/rocksdb/v5.16.X/include/rocksdb/utilities/db_ttl.h rename to 3rdParty/rocksdb/v5.18.X/include/rocksdb/utilities/db_ttl.h diff --git a/3rdParty/rocksdb/v5.16.X/include/rocksdb/utilities/debug.h b/3rdParty/rocksdb/v5.18.X/include/rocksdb/utilities/debug.h similarity index 100% rename from 3rdParty/rocksdb/v5.16.X/include/rocksdb/utilities/debug.h rename to 3rdParty/rocksdb/v5.18.X/include/rocksdb/utilities/debug.h diff --git a/3rdParty/rocksdb/v5.16.X/include/rocksdb/utilities/document_db.h b/3rdParty/rocksdb/v5.18.X/include/rocksdb/utilities/document_db.h similarity index 100% rename from 3rdParty/rocksdb/v5.16.X/include/rocksdb/utilities/document_db.h rename to 3rdParty/rocksdb/v5.18.X/include/rocksdb/utilities/document_db.h diff --git a/3rdParty/rocksdb/v5.16.X/include/rocksdb/utilities/env_librados.h b/3rdParty/rocksdb/v5.18.X/include/rocksdb/utilities/env_librados.h similarity index 98% rename from 3rdParty/rocksdb/v5.16.X/include/rocksdb/utilities/env_librados.h rename to 3rdParty/rocksdb/v5.18.X/include/rocksdb/utilities/env_librados.h index 272365f0c6..82a1f0ba5a 100644 --- a/3rdParty/rocksdb/v5.16.X/include/rocksdb/utilities/env_librados.h +++ b/3rdParty/rocksdb/v5.18.X/include/rocksdb/utilities/env_librados.h @@ -2,8 +2,8 @@ // This source code is licensed under both the GPLv2 (found in the // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). -#ifndef ROCKSDB_UTILITIES_ENV_LIBRADOS_H -#define ROCKSDB_UTILITIES_ENV_LIBRADOS_H + +#pragma once #include #include @@ -173,4 +173,3 @@ class EnvLibrados : public EnvWrapper { friend class LibradosWritableFile; }; } -#endif diff --git a/3rdParty/rocksdb/v5.16.X/include/rocksdb/utilities/env_mirror.h b/3rdParty/rocksdb/v5.18.X/include/rocksdb/utilities/env_mirror.h similarity index 92% rename from 3rdParty/rocksdb/v5.16.X/include/rocksdb/utilities/env_mirror.h rename to 3rdParty/rocksdb/v5.18.X/include/rocksdb/utilities/env_mirror.h index bc27cdc488..40e9411ffa 100644 --- a/3rdParty/rocksdb/v5.16.X/include/rocksdb/utilities/env_mirror.h +++ b/3rdParty/rocksdb/v5.18.X/include/rocksdb/utilities/env_mirror.h @@ -48,20 +48,21 @@ class EnvMirror : public EnvWrapper { delete b_; } - Status NewSequentialFile(const std::string& f, unique_ptr* r, + Status NewSequentialFile(const std::string& f, + std::unique_ptr* r, const EnvOptions& options) override; Status NewRandomAccessFile(const std::string& f, - unique_ptr* r, + std::unique_ptr* r, const EnvOptions& options) override; - Status NewWritableFile(const std::string& f, unique_ptr* r, + Status NewWritableFile(const std::string& f, std::unique_ptr* r, const EnvOptions& options) override; Status ReuseWritableFile(const std::string& fname, const std::string& old_fname, - unique_ptr* r, + std::unique_ptr* r, const EnvOptions& options) override; virtual Status NewDirectory(const std::string& name, - unique_ptr* result) override { - unique_ptr br; + std::unique_ptr* result) override { + std::unique_ptr br; Status as = a_->NewDirectory(name, result); Status bs = b_->NewDirectory(name, &br); assert(as == bs); diff --git a/3rdParty/rocksdb/v5.16.X/include/rocksdb/utilities/geo_db.h b/3rdParty/rocksdb/v5.18.X/include/rocksdb/utilities/geo_db.h similarity index 100% rename from 3rdParty/rocksdb/v5.16.X/include/rocksdb/utilities/geo_db.h rename to 3rdParty/rocksdb/v5.18.X/include/rocksdb/utilities/geo_db.h diff --git a/3rdParty/rocksdb/v5.16.X/include/rocksdb/utilities/info_log_finder.h b/3rdParty/rocksdb/v5.18.X/include/rocksdb/utilities/info_log_finder.h similarity index 100% rename from 3rdParty/rocksdb/v5.16.X/include/rocksdb/utilities/info_log_finder.h rename to 3rdParty/rocksdb/v5.18.X/include/rocksdb/utilities/info_log_finder.h diff --git a/3rdParty/rocksdb/v5.16.X/include/rocksdb/utilities/json_document.h b/3rdParty/rocksdb/v5.18.X/include/rocksdb/utilities/json_document.h similarity index 100% rename from 3rdParty/rocksdb/v5.16.X/include/rocksdb/utilities/json_document.h rename to 3rdParty/rocksdb/v5.18.X/include/rocksdb/utilities/json_document.h diff --git a/3rdParty/rocksdb/v5.16.X/include/rocksdb/utilities/ldb_cmd.h b/3rdParty/rocksdb/v5.18.X/include/rocksdb/utilities/ldb_cmd.h similarity index 100% rename from 3rdParty/rocksdb/v5.16.X/include/rocksdb/utilities/ldb_cmd.h rename to 3rdParty/rocksdb/v5.18.X/include/rocksdb/utilities/ldb_cmd.h diff --git a/3rdParty/rocksdb/v5.16.X/include/rocksdb/utilities/ldb_cmd_execute_result.h b/3rdParty/rocksdb/v5.18.X/include/rocksdb/utilities/ldb_cmd_execute_result.h similarity index 100% rename from 3rdParty/rocksdb/v5.16.X/include/rocksdb/utilities/ldb_cmd_execute_result.h rename to 3rdParty/rocksdb/v5.18.X/include/rocksdb/utilities/ldb_cmd_execute_result.h diff --git a/3rdParty/rocksdb/v5.16.X/include/rocksdb/utilities/leveldb_options.h b/3rdParty/rocksdb/v5.18.X/include/rocksdb/utilities/leveldb_options.h similarity index 100% rename from 3rdParty/rocksdb/v5.16.X/include/rocksdb/utilities/leveldb_options.h rename to 3rdParty/rocksdb/v5.18.X/include/rocksdb/utilities/leveldb_options.h diff --git a/3rdParty/rocksdb/v5.16.X/include/rocksdb/utilities/lua/rocks_lua_compaction_filter.h b/3rdParty/rocksdb/v5.18.X/include/rocksdb/utilities/lua/rocks_lua_compaction_filter.h similarity index 100% rename from 3rdParty/rocksdb/v5.16.X/include/rocksdb/utilities/lua/rocks_lua_compaction_filter.h rename to 3rdParty/rocksdb/v5.18.X/include/rocksdb/utilities/lua/rocks_lua_compaction_filter.h diff --git a/3rdParty/rocksdb/v5.16.X/include/rocksdb/utilities/lua/rocks_lua_custom_library.h b/3rdParty/rocksdb/v5.18.X/include/rocksdb/utilities/lua/rocks_lua_custom_library.h similarity index 100% rename from 3rdParty/rocksdb/v5.16.X/include/rocksdb/utilities/lua/rocks_lua_custom_library.h rename to 3rdParty/rocksdb/v5.18.X/include/rocksdb/utilities/lua/rocks_lua_custom_library.h diff --git a/3rdParty/rocksdb/v5.16.X/include/rocksdb/utilities/lua/rocks_lua_util.h b/3rdParty/rocksdb/v5.18.X/include/rocksdb/utilities/lua/rocks_lua_util.h similarity index 100% rename from 3rdParty/rocksdb/v5.16.X/include/rocksdb/utilities/lua/rocks_lua_util.h rename to 3rdParty/rocksdb/v5.18.X/include/rocksdb/utilities/lua/rocks_lua_util.h diff --git a/3rdParty/rocksdb/v5.16.X/include/rocksdb/utilities/memory_util.h b/3rdParty/rocksdb/v5.18.X/include/rocksdb/utilities/memory_util.h similarity index 100% rename from 3rdParty/rocksdb/v5.16.X/include/rocksdb/utilities/memory_util.h rename to 3rdParty/rocksdb/v5.18.X/include/rocksdb/utilities/memory_util.h diff --git a/3rdParty/rocksdb/v5.16.X/include/rocksdb/utilities/object_registry.h b/3rdParty/rocksdb/v5.18.X/include/rocksdb/utilities/object_registry.h similarity index 95% rename from 3rdParty/rocksdb/v5.16.X/include/rocksdb/utilities/object_registry.h rename to 3rdParty/rocksdb/v5.18.X/include/rocksdb/utilities/object_registry.h index b046ba7c1f..86a51b92ea 100644 --- a/3rdParty/rocksdb/v5.16.X/include/rocksdb/utilities/object_registry.h +++ b/3rdParty/rocksdb/v5.18.X/include/rocksdb/utilities/object_registry.h @@ -27,8 +27,8 @@ namespace rocksdb { template T* NewCustomObject(const std::string& target, std::unique_ptr* res_guard); -// Returns a new T when called with a string. Populates the unique_ptr argument -// if granting ownership to caller. +// Returns a new T when called with a string. Populates the std::unique_ptr +// argument if granting ownership to caller. template using FactoryFunc = std::function*)>; diff --git a/3rdParty/rocksdb/v5.16.X/include/rocksdb/utilities/optimistic_transaction_db.h b/3rdParty/rocksdb/v5.18.X/include/rocksdb/utilities/optimistic_transaction_db.h similarity index 100% rename from 3rdParty/rocksdb/v5.16.X/include/rocksdb/utilities/optimistic_transaction_db.h rename to 3rdParty/rocksdb/v5.18.X/include/rocksdb/utilities/optimistic_transaction_db.h diff --git a/3rdParty/rocksdb/v5.16.X/include/rocksdb/utilities/option_change_migration.h b/3rdParty/rocksdb/v5.18.X/include/rocksdb/utilities/option_change_migration.h similarity index 100% rename from 3rdParty/rocksdb/v5.16.X/include/rocksdb/utilities/option_change_migration.h rename to 3rdParty/rocksdb/v5.18.X/include/rocksdb/utilities/option_change_migration.h diff --git a/3rdParty/rocksdb/v5.16.X/include/rocksdb/utilities/options_util.h b/3rdParty/rocksdb/v5.18.X/include/rocksdb/utilities/options_util.h similarity index 100% rename from 3rdParty/rocksdb/v5.16.X/include/rocksdb/utilities/options_util.h rename to 3rdParty/rocksdb/v5.18.X/include/rocksdb/utilities/options_util.h diff --git a/3rdParty/rocksdb/v5.16.X/include/rocksdb/utilities/sim_cache.h b/3rdParty/rocksdb/v5.18.X/include/rocksdb/utilities/sim_cache.h similarity index 100% rename from 3rdParty/rocksdb/v5.16.X/include/rocksdb/utilities/sim_cache.h rename to 3rdParty/rocksdb/v5.18.X/include/rocksdb/utilities/sim_cache.h diff --git a/3rdParty/rocksdb/v5.16.X/include/rocksdb/utilities/spatial_db.h b/3rdParty/rocksdb/v5.18.X/include/rocksdb/utilities/spatial_db.h similarity index 100% rename from 3rdParty/rocksdb/v5.16.X/include/rocksdb/utilities/spatial_db.h rename to 3rdParty/rocksdb/v5.18.X/include/rocksdb/utilities/spatial_db.h diff --git a/3rdParty/rocksdb/v5.16.X/include/rocksdb/utilities/stackable_db.h b/3rdParty/rocksdb/v5.18.X/include/rocksdb/utilities/stackable_db.h similarity index 98% rename from 3rdParty/rocksdb/v5.16.X/include/rocksdb/utilities/stackable_db.h rename to 3rdParty/rocksdb/v5.18.X/include/rocksdb/utilities/stackable_db.h index 721203f7ce..eae3a85ea1 100644 --- a/3rdParty/rocksdb/v5.16.X/include/rocksdb/utilities/stackable_db.h +++ b/3rdParty/rocksdb/v5.18.X/include/rocksdb/utilities/stackable_db.h @@ -278,6 +278,11 @@ class StackableDB : public DB { ColumnFamilyHandle* column_family) override { return db_->Flush(fopts, column_family); } + virtual Status Flush( + const FlushOptions& fopts, + const std::vector& column_families) override { + return db_->Flush(fopts, column_families); + } virtual Status SyncWAL() override { return db_->SyncWAL(); @@ -364,7 +369,7 @@ class StackableDB : public DB { } virtual Status GetUpdatesSince( - SequenceNumber seq_number, unique_ptr* iter, + SequenceNumber seq_number, std::unique_ptr* iter, const TransactionLogIterator::ReadOptions& read_options) override { return db_->GetUpdatesSince(seq_number, iter, read_options); } diff --git a/3rdParty/rocksdb/v5.18.X/include/rocksdb/utilities/table_properties_collectors.h b/3rdParty/rocksdb/v5.18.X/include/rocksdb/utilities/table_properties_collectors.h new file mode 100644 index 0000000000..c74f89bc9a --- /dev/null +++ b/3rdParty/rocksdb/v5.18.X/include/rocksdb/utilities/table_properties_collectors.h @@ -0,0 +1,77 @@ +// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. +// This source code is licensed under both the GPLv2 (found in the +// COPYING file in the root directory) and Apache 2.0 License +// (found in the LICENSE.Apache file in the root directory). + +#pragma once +#ifndef ROCKSDB_LITE +#include +#include + +#include "rocksdb/table_properties.h" + +namespace rocksdb { + +// A factory of a table property collector that marks a SST +// file as need-compaction when it observe at least "D" deletion +// entries in any "N" consecutive entires. +class CompactOnDeletionCollectorFactory + : public TablePropertiesCollectorFactory { + public: + virtual ~CompactOnDeletionCollectorFactory() {} + + virtual TablePropertiesCollector* CreateTablePropertiesCollector( + TablePropertiesCollectorFactory::Context context) override; + + // Change the value of sliding_window_size "N" + // Setting it to 0 disables the delete triggered compaction + void SetWindowSize(size_t sliding_window_size) { + sliding_window_size_.store(sliding_window_size); + } + + // Change the value of deletion_trigger "D" + void SetDeletionTrigger(size_t deletion_trigger) { + deletion_trigger_.store(deletion_trigger); + } + + virtual const char* Name() const override { + return "CompactOnDeletionCollector"; + } + + private: + friend std::shared_ptr + NewCompactOnDeletionCollectorFactory( + size_t sliding_window_size, + size_t deletion_trigger); + // A factory of a table property collector that marks a SST + // file as need-compaction when it observe at least "D" deletion + // entries in any "N" consecutive entires. + // + // @param sliding_window_size "N" + // @param deletion_trigger "D" + CompactOnDeletionCollectorFactory( + size_t sliding_window_size, + size_t deletion_trigger) : + sliding_window_size_(sliding_window_size), + deletion_trigger_(deletion_trigger) {} + + std::atomic sliding_window_size_; + std::atomic deletion_trigger_; +}; + +// Creates a factory of a table property collector that marks a SST +// file as need-compaction when it observe at least "D" deletion +// entries in any "N" consecutive entires. +// +// @param sliding_window_size "N". Note that this number will be +// round up to the smallest multiple of 128 that is no less +// than the specified size. +// @param deletion_trigger "D". Note that even when "N" is changed, +// the specified number for "D" will not be changed. +extern std::shared_ptr + NewCompactOnDeletionCollectorFactory( + size_t sliding_window_size, + size_t deletion_trigger); +} // namespace rocksdb + +#endif // !ROCKSDB_LITE diff --git a/3rdParty/rocksdb/v5.16.X/include/rocksdb/utilities/transaction.h b/3rdParty/rocksdb/v5.18.X/include/rocksdb/utilities/transaction.h similarity index 97% rename from 3rdParty/rocksdb/v5.16.X/include/rocksdb/utilities/transaction.h rename to 3rdParty/rocksdb/v5.18.X/include/rocksdb/utilities/transaction.h index d6974ca6e2..c1e2441bc3 100644 --- a/3rdParty/rocksdb/v5.16.X/include/rocksdb/utilities/transaction.h +++ b/3rdParty/rocksdb/v5.18.X/include/rocksdb/utilities/transaction.h @@ -239,14 +239,15 @@ class Transaction { // An overload of the above method that receives a PinnableSlice // For backward compatibility a default implementation is provided virtual Status GetForUpdate(const ReadOptions& options, - ColumnFamilyHandle* /*column_family*/, + ColumnFamilyHandle* column_family, const Slice& key, PinnableSlice* pinnable_val, - bool /*exclusive*/ = true) { + bool exclusive = true) { if (pinnable_val == nullptr) { std::string* null_str = nullptr; - return GetForUpdate(options, key, null_str); + return GetForUpdate(options, column_family, key, null_str, exclusive); } else { - auto s = GetForUpdate(options, key, pinnable_val->GetSelf()); + auto s = GetForUpdate(options, column_family, key, + pinnable_val->GetSelf(), exclusive); pinnable_val->PinSelf(); return s; } @@ -437,12 +438,6 @@ class Transaction { virtual void SetLogNumber(uint64_t log) { log_number_ = log; } virtual uint64_t GetLogNumber() const { return log_number_; } - - // Sequence number in WAL where operations start, only valid after - // a successfull commit with the WRITE_COMMITTED db txn policy - virtual SequenceNumber GetCommitedSeqNumber() const { - return 0; - } virtual Status SetName(const TransactionName& name) = 0; diff --git a/3rdParty/rocksdb/v5.16.X/include/rocksdb/utilities/transaction_db.h b/3rdParty/rocksdb/v5.18.X/include/rocksdb/utilities/transaction_db.h similarity index 95% rename from 3rdParty/rocksdb/v5.16.X/include/rocksdb/utilities/transaction_db.h rename to 3rdParty/rocksdb/v5.18.X/include/rocksdb/utilities/transaction_db.h index 92b2d98a59..1a692f2a7a 100644 --- a/3rdParty/rocksdb/v5.16.X/include/rocksdb/utilities/transaction_db.h +++ b/3rdParty/rocksdb/v5.18.X/include/rocksdb/utilities/transaction_db.h @@ -137,6 +137,15 @@ struct TransactionOptions { // The maximum number of bytes used for the write batch. 0 means no limit. size_t max_write_batch_size = 0; + + // Skip Concurrency Control. This could be as an optimization if the + // application knows that the transaction would not have any conflict with + // concurrent transactions. It could also be used during recovery if (i) + // application guarantees no conflict between prepared transactions in the WAL + // (ii) application guarantees that recovered transactions will be rolled + // back/commit before new transactions start. + // Default: false + bool skip_concurrency_control = false; }; // The per-write optimizations that do not involve transactions. TransactionDB @@ -162,8 +171,8 @@ struct KeyLockInfo { struct DeadlockInfo { TransactionID m_txn_id; uint32_t m_cf_id; - std::string m_waiting_key; bool m_exclusive; + std::string m_waiting_key; }; struct DeadlockPath { diff --git a/3rdParty/rocksdb/v5.16.X/include/rocksdb/utilities/transaction_db_mutex.h b/3rdParty/rocksdb/v5.18.X/include/rocksdb/utilities/transaction_db_mutex.h similarity index 100% rename from 3rdParty/rocksdb/v5.16.X/include/rocksdb/utilities/transaction_db_mutex.h rename to 3rdParty/rocksdb/v5.18.X/include/rocksdb/utilities/transaction_db_mutex.h diff --git a/3rdParty/rocksdb/v5.16.X/include/rocksdb/utilities/utility_db.h b/3rdParty/rocksdb/v5.18.X/include/rocksdb/utilities/utility_db.h similarity index 100% rename from 3rdParty/rocksdb/v5.16.X/include/rocksdb/utilities/utility_db.h rename to 3rdParty/rocksdb/v5.18.X/include/rocksdb/utilities/utility_db.h diff --git a/3rdParty/rocksdb/v5.16.X/include/rocksdb/utilities/write_batch_with_index.h b/3rdParty/rocksdb/v5.18.X/include/rocksdb/utilities/write_batch_with_index.h similarity index 100% rename from 3rdParty/rocksdb/v5.16.X/include/rocksdb/utilities/write_batch_with_index.h rename to 3rdParty/rocksdb/v5.18.X/include/rocksdb/utilities/write_batch_with_index.h diff --git a/3rdParty/rocksdb/v5.16.X/include/rocksdb/version.h b/3rdParty/rocksdb/v5.18.X/include/rocksdb/version.h similarity index 96% rename from 3rdParty/rocksdb/v5.16.X/include/rocksdb/version.h rename to 3rdParty/rocksdb/v5.18.X/include/rocksdb/version.h index a2a6426fb5..89802521d9 100644 --- a/3rdParty/rocksdb/v5.16.X/include/rocksdb/version.h +++ b/3rdParty/rocksdb/v5.18.X/include/rocksdb/version.h @@ -5,7 +5,7 @@ #pragma once #define ROCKSDB_MAJOR 5 -#define ROCKSDB_MINOR 16 +#define ROCKSDB_MINOR 18 #define ROCKSDB_PATCH 0 // Do not use these. We made the mistake of declaring macros starting with diff --git a/3rdParty/rocksdb/v5.16.X/include/rocksdb/wal_filter.h b/3rdParty/rocksdb/v5.18.X/include/rocksdb/wal_filter.h similarity index 99% rename from 3rdParty/rocksdb/v5.16.X/include/rocksdb/wal_filter.h rename to 3rdParty/rocksdb/v5.18.X/include/rocksdb/wal_filter.h index a22dca9237..b8be77b232 100644 --- a/3rdParty/rocksdb/v5.16.X/include/rocksdb/wal_filter.h +++ b/3rdParty/rocksdb/v5.18.X/include/rocksdb/wal_filter.h @@ -4,6 +4,7 @@ // (found in the LICENSE.Apache file in the root directory). #pragma once + #include #include diff --git a/3rdParty/rocksdb/v5.16.X/include/rocksdb/write_batch.h b/3rdParty/rocksdb/v5.18.X/include/rocksdb/write_batch.h similarity index 98% rename from 3rdParty/rocksdb/v5.16.X/include/rocksdb/write_batch.h rename to 3rdParty/rocksdb/v5.18.X/include/rocksdb/write_batch.h index 7db177f866..c40c448fdd 100644 --- a/3rdParty/rocksdb/v5.16.X/include/rocksdb/write_batch.h +++ b/3rdParty/rocksdb/v5.18.X/include/rocksdb/write_batch.h @@ -22,8 +22,7 @@ // non-const method, all threads accessing the same WriteBatch must use // external synchronization. -#ifndef STORAGE_ROCKSDB_INCLUDE_WRITE_BATCH_H_ -#define STORAGE_ROCKSDB_INCLUDE_WRITE_BATCH_H_ +#pragma once #include #include @@ -367,5 +366,3 @@ class WriteBatch : public WriteBatchBase { }; } // namespace rocksdb - -#endif // STORAGE_ROCKSDB_INCLUDE_WRITE_BATCH_H_ diff --git a/3rdParty/rocksdb/v5.16.X/include/rocksdb/write_batch_base.h b/3rdParty/rocksdb/v5.18.X/include/rocksdb/write_batch_base.h similarity index 100% rename from 3rdParty/rocksdb/v5.16.X/include/rocksdb/write_batch_base.h rename to 3rdParty/rocksdb/v5.18.X/include/rocksdb/write_batch_base.h diff --git a/3rdParty/rocksdb/v5.16.X/include/rocksdb/write_buffer_manager.h b/3rdParty/rocksdb/v5.18.X/include/rocksdb/write_buffer_manager.h similarity index 98% rename from 3rdParty/rocksdb/v5.16.X/include/rocksdb/write_buffer_manager.h rename to 3rdParty/rocksdb/v5.18.X/include/rocksdb/write_buffer_manager.h index 856cf4b246..dea904c187 100644 --- a/3rdParty/rocksdb/v5.16.X/include/rocksdb/write_buffer_manager.h +++ b/3rdParty/rocksdb/v5.18.X/include/rocksdb/write_buffer_manager.h @@ -30,6 +30,8 @@ class WriteBufferManager { bool enabled() const { return buffer_size_ != 0; } + bool cost_to_cache() const { return cache_rep_ != nullptr; } + // Only valid if enabled() size_t memory_usage() const { return memory_used_.load(std::memory_order_relaxed); diff --git a/3rdParty/rocksdb/v5.16.X/issue_template.md b/3rdParty/rocksdb/v5.18.X/issue_template.md similarity index 100% rename from 3rdParty/rocksdb/v5.16.X/issue_template.md rename to 3rdParty/rocksdb/v5.18.X/issue_template.md diff --git a/3rdParty/rocksdb/v5.16.X/java/CMakeLists.txt b/3rdParty/rocksdb/v5.18.X/java/CMakeLists.txt similarity index 97% rename from 3rdParty/rocksdb/v5.16.X/java/CMakeLists.txt rename to 3rdParty/rocksdb/v5.18.X/java/CMakeLists.txt index 96c08b2318..8f4ec9a568 100644 --- a/3rdParty/rocksdb/v5.16.X/java/CMakeLists.txt +++ b/3rdParty/rocksdb/v5.18.X/java/CMakeLists.txt @@ -25,6 +25,7 @@ set(JNI_NATIVE_SOURCES rocksjni/jnicallback.cc rocksjni/loggerjnicallback.cc rocksjni/lru_cache.cc + rocksjni/memory_util.cc rocksjni/memtablejni.cc rocksjni/merge_operator.cc rocksjni/native_comparator_wrapper_test.cc @@ -57,6 +58,7 @@ set(JNI_NATIVE_SOURCES rocksjni/writebatchhandlerjnicallback.cc rocksjni/write_batch_test.cc rocksjni/write_batch_with_index.cc + rocksjni/write_buffer_manager.cc ) set(NATIVE_JAVA_CLASSES @@ -96,6 +98,7 @@ set(NATIVE_JAVA_CLASSES org.rocksdb.IngestExternalFileOptions org.rocksdb.Logger org.rocksdb.LRUCache + org.rocksdb.MemoryUtil org.rocksdb.MemTableConfig org.rocksdb.NativeComparatorWrapper org.rocksdb.NativeLibraryLoader @@ -130,6 +133,7 @@ set(NATIVE_JAVA_CLASSES org.rocksdb.TransactionLogIterator org.rocksdb.TransactionOptions org.rocksdb.TtlDB + org.rocksdb.UInt64AddOperator org.rocksdb.VectorMemTableConfig org.rocksdb.WBWIRocksIterator org.rocksdb.WriteBatch @@ -142,6 +146,7 @@ set(NATIVE_JAVA_CLASSES org.rocksdb.SnapshotTest org.rocksdb.WriteBatchTest org.rocksdb.WriteBatchTestInternalHelper + org.rocksdb.WriteBufferManager ) include(FindJava) @@ -222,6 +227,8 @@ add_jar( src/main/java/org/rocksdb/IngestExternalFileOptions.java src/main/java/org/rocksdb/Logger.java src/main/java/org/rocksdb/LRUCache.java + src/main/java/org/rocksdb/MemoryUsageType.java + src/main/java/org/rocksdb/MemoryUtil.java src/main/java/org/rocksdb/MemTableConfig.java src/main/java/org/rocksdb/MergeOperator.java src/main/java/org/rocksdb/MutableColumnFamilyOptionsInterface.java @@ -278,6 +285,7 @@ add_jar( src/main/java/org/rocksdb/WriteBatch.java src/main/java/org/rocksdb/WriteBatchWithIndex.java src/main/java/org/rocksdb/WriteOptions.java + src/main/java/org/rocksdb/WriteBufferManager.java src/main/java/org/rocksdb/util/BytewiseComparator.java src/main/java/org/rocksdb/util/DirectBytewiseComparator.java src/main/java/org/rocksdb/util/Environment.java @@ -290,6 +298,7 @@ add_jar( src/test/java/org/rocksdb/RocksDBExceptionTest.java src/test/java/org/rocksdb/RocksMemoryResource.java src/test/java/org/rocksdb/SnapshotTest.java + src/main/java/org/rocksdb/UInt64AddOperator.java src/test/java/org/rocksdb/WriteBatchTest.java src/test/java/org/rocksdb/util/CapturingWriteBatchHandler.java src/test/java/org/rocksdb/util/WriteBatchGetter.java diff --git a/3rdParty/rocksdb/v5.16.X/java/HISTORY-JAVA.md b/3rdParty/rocksdb/v5.18.X/java/HISTORY-JAVA.md similarity index 100% rename from 3rdParty/rocksdb/v5.16.X/java/HISTORY-JAVA.md rename to 3rdParty/rocksdb/v5.18.X/java/HISTORY-JAVA.md diff --git a/3rdParty/rocksdb/v5.16.X/java/Makefile b/3rdParty/rocksdb/v5.18.X/java/Makefile similarity index 98% rename from 3rdParty/rocksdb/v5.16.X/java/Makefile rename to 3rdParty/rocksdb/v5.18.X/java/Makefile index f58fff06e5..b3b89eb837 100644 --- a/3rdParty/rocksdb/v5.16.X/java/Makefile +++ b/3rdParty/rocksdb/v5.18.X/java/Makefile @@ -30,6 +30,8 @@ NATIVE_JAVA_CLASSES = org.rocksdb.AbstractCompactionFilter\ org.rocksdb.HashSkipListMemTableConfig\ org.rocksdb.Logger\ org.rocksdb.LRUCache\ + org.rocksdb.MemoryUsageType\ + org.rocksdb.MemoryUtil\ org.rocksdb.MergeOperator\ org.rocksdb.NativeComparatorWrapper\ org.rocksdb.OptimisticTransactionDB\ @@ -60,10 +62,12 @@ NATIVE_JAVA_CLASSES = org.rocksdb.AbstractCompactionFilter\ org.rocksdb.VectorMemTableConfig\ org.rocksdb.Snapshot\ org.rocksdb.StringAppendOperator\ + org.rocksdb.UInt64AddOperator\ org.rocksdb.WriteBatch\ org.rocksdb.WriteBatch.Handler\ org.rocksdb.WriteOptions\ org.rocksdb.WriteBatchWithIndex\ + org.rocksdb.WriteBufferManager\ org.rocksdb.WBWIRocksIterator NATIVE_JAVA_TEST_CLASSES = org.rocksdb.RocksDBExceptionTest\ @@ -111,6 +115,7 @@ JAVA_TESTS = org.rocksdb.BackupableDBOptionsTest\ org.rocksdb.KeyMayExistTest\ org.rocksdb.LoggerTest\ org.rocksdb.LRUCacheTest\ + org.rocksdb.MemoryUtilTest\ org.rocksdb.MemTableTest\ org.rocksdb.MergeTest\ org.rocksdb.MixedOptionsTest\ diff --git a/3rdParty/rocksdb/v5.16.X/java/RELEASE.md b/3rdParty/rocksdb/v5.18.X/java/RELEASE.md similarity index 100% rename from 3rdParty/rocksdb/v5.16.X/java/RELEASE.md rename to 3rdParty/rocksdb/v5.18.X/java/RELEASE.md diff --git a/3rdParty/rocksdb/v5.16.X/java/benchmark/src/main/java/org/rocksdb/benchmark/DbBenchmark.java b/3rdParty/rocksdb/v5.18.X/java/benchmark/src/main/java/org/rocksdb/benchmark/DbBenchmark.java similarity index 100% rename from 3rdParty/rocksdb/v5.16.X/java/benchmark/src/main/java/org/rocksdb/benchmark/DbBenchmark.java rename to 3rdParty/rocksdb/v5.18.X/java/benchmark/src/main/java/org/rocksdb/benchmark/DbBenchmark.java diff --git a/3rdParty/rocksdb/v5.16.X/java/crossbuild/Vagrantfile b/3rdParty/rocksdb/v5.18.X/java/crossbuild/Vagrantfile similarity index 100% rename from 3rdParty/rocksdb/v5.16.X/java/crossbuild/Vagrantfile rename to 3rdParty/rocksdb/v5.18.X/java/crossbuild/Vagrantfile diff --git a/3rdParty/rocksdb/v5.16.X/java/crossbuild/build-linux-centos.sh b/3rdParty/rocksdb/v5.18.X/java/crossbuild/build-linux-centos.sh similarity index 100% rename from 3rdParty/rocksdb/v5.16.X/java/crossbuild/build-linux-centos.sh rename to 3rdParty/rocksdb/v5.18.X/java/crossbuild/build-linux-centos.sh diff --git a/3rdParty/rocksdb/v5.16.X/java/crossbuild/build-linux.sh b/3rdParty/rocksdb/v5.18.X/java/crossbuild/build-linux.sh similarity index 100% rename from 3rdParty/rocksdb/v5.16.X/java/crossbuild/build-linux.sh rename to 3rdParty/rocksdb/v5.18.X/java/crossbuild/build-linux.sh diff --git a/3rdParty/rocksdb/v5.16.X/java/crossbuild/docker-build-linux-centos.sh b/3rdParty/rocksdb/v5.18.X/java/crossbuild/docker-build-linux-centos.sh similarity index 100% rename from 3rdParty/rocksdb/v5.16.X/java/crossbuild/docker-build-linux-centos.sh rename to 3rdParty/rocksdb/v5.18.X/java/crossbuild/docker-build-linux-centos.sh diff --git a/3rdParty/rocksdb/v5.16.X/java/jdb_bench.sh b/3rdParty/rocksdb/v5.18.X/java/jdb_bench.sh similarity index 100% rename from 3rdParty/rocksdb/v5.16.X/java/jdb_bench.sh rename to 3rdParty/rocksdb/v5.18.X/java/jdb_bench.sh diff --git a/3rdParty/rocksdb/v5.16.X/java/rocksjni.pom b/3rdParty/rocksdb/v5.18.X/java/rocksjni.pom similarity index 100% rename from 3rdParty/rocksdb/v5.16.X/java/rocksjni.pom rename to 3rdParty/rocksdb/v5.18.X/java/rocksjni.pom diff --git a/3rdParty/rocksdb/v5.16.X/java/rocksjni/backupablejni.cc b/3rdParty/rocksdb/v5.18.X/java/rocksjni/backupablejni.cc similarity index 100% rename from 3rdParty/rocksdb/v5.16.X/java/rocksjni/backupablejni.cc rename to 3rdParty/rocksdb/v5.18.X/java/rocksjni/backupablejni.cc diff --git a/3rdParty/rocksdb/v5.16.X/java/rocksjni/backupenginejni.cc b/3rdParty/rocksdb/v5.18.X/java/rocksjni/backupenginejni.cc similarity index 100% rename from 3rdParty/rocksdb/v5.16.X/java/rocksjni/backupenginejni.cc rename to 3rdParty/rocksdb/v5.18.X/java/rocksjni/backupenginejni.cc diff --git a/3rdParty/rocksdb/v5.16.X/java/rocksjni/cassandra_compactionfilterjni.cc b/3rdParty/rocksdb/v5.18.X/java/rocksjni/cassandra_compactionfilterjni.cc similarity index 100% rename from 3rdParty/rocksdb/v5.16.X/java/rocksjni/cassandra_compactionfilterjni.cc rename to 3rdParty/rocksdb/v5.18.X/java/rocksjni/cassandra_compactionfilterjni.cc diff --git a/3rdParty/rocksdb/v5.16.X/java/rocksjni/cassandra_value_operator.cc b/3rdParty/rocksdb/v5.18.X/java/rocksjni/cassandra_value_operator.cc similarity index 100% rename from 3rdParty/rocksdb/v5.16.X/java/rocksjni/cassandra_value_operator.cc rename to 3rdParty/rocksdb/v5.18.X/java/rocksjni/cassandra_value_operator.cc diff --git a/3rdParty/rocksdb/v5.16.X/java/rocksjni/checkpoint.cc b/3rdParty/rocksdb/v5.18.X/java/rocksjni/checkpoint.cc similarity index 100% rename from 3rdParty/rocksdb/v5.16.X/java/rocksjni/checkpoint.cc rename to 3rdParty/rocksdb/v5.18.X/java/rocksjni/checkpoint.cc diff --git a/3rdParty/rocksdb/v5.16.X/java/rocksjni/clock_cache.cc b/3rdParty/rocksdb/v5.18.X/java/rocksjni/clock_cache.cc similarity index 100% rename from 3rdParty/rocksdb/v5.16.X/java/rocksjni/clock_cache.cc rename to 3rdParty/rocksdb/v5.18.X/java/rocksjni/clock_cache.cc diff --git a/3rdParty/rocksdb/v5.16.X/java/rocksjni/columnfamilyhandle.cc b/3rdParty/rocksdb/v5.18.X/java/rocksjni/columnfamilyhandle.cc similarity index 100% rename from 3rdParty/rocksdb/v5.16.X/java/rocksjni/columnfamilyhandle.cc rename to 3rdParty/rocksdb/v5.18.X/java/rocksjni/columnfamilyhandle.cc diff --git a/3rdParty/rocksdb/v5.16.X/java/rocksjni/compact_range_options.cc b/3rdParty/rocksdb/v5.18.X/java/rocksjni/compact_range_options.cc similarity index 100% rename from 3rdParty/rocksdb/v5.16.X/java/rocksjni/compact_range_options.cc rename to 3rdParty/rocksdb/v5.18.X/java/rocksjni/compact_range_options.cc diff --git a/3rdParty/rocksdb/v5.16.X/java/rocksjni/compaction_filter.cc b/3rdParty/rocksdb/v5.18.X/java/rocksjni/compaction_filter.cc similarity index 100% rename from 3rdParty/rocksdb/v5.16.X/java/rocksjni/compaction_filter.cc rename to 3rdParty/rocksdb/v5.18.X/java/rocksjni/compaction_filter.cc diff --git a/3rdParty/rocksdb/v5.16.X/java/rocksjni/compaction_filter_factory.cc b/3rdParty/rocksdb/v5.18.X/java/rocksjni/compaction_filter_factory.cc similarity index 100% rename from 3rdParty/rocksdb/v5.16.X/java/rocksjni/compaction_filter_factory.cc rename to 3rdParty/rocksdb/v5.18.X/java/rocksjni/compaction_filter_factory.cc diff --git a/3rdParty/rocksdb/v5.16.X/java/rocksjni/compaction_filter_factory_jnicallback.cc b/3rdParty/rocksdb/v5.18.X/java/rocksjni/compaction_filter_factory_jnicallback.cc similarity index 100% rename from 3rdParty/rocksdb/v5.16.X/java/rocksjni/compaction_filter_factory_jnicallback.cc rename to 3rdParty/rocksdb/v5.18.X/java/rocksjni/compaction_filter_factory_jnicallback.cc diff --git a/3rdParty/rocksdb/v5.16.X/java/rocksjni/compaction_filter_factory_jnicallback.h b/3rdParty/rocksdb/v5.18.X/java/rocksjni/compaction_filter_factory_jnicallback.h similarity index 100% rename from 3rdParty/rocksdb/v5.16.X/java/rocksjni/compaction_filter_factory_jnicallback.h rename to 3rdParty/rocksdb/v5.18.X/java/rocksjni/compaction_filter_factory_jnicallback.h diff --git a/3rdParty/rocksdb/v5.16.X/java/rocksjni/compaction_options_fifo.cc b/3rdParty/rocksdb/v5.18.X/java/rocksjni/compaction_options_fifo.cc similarity index 56% rename from 3rdParty/rocksdb/v5.16.X/java/rocksjni/compaction_options_fifo.cc rename to 3rdParty/rocksdb/v5.18.X/java/rocksjni/compaction_options_fifo.cc index 95bbfc621d..00761b6ac5 100644 --- a/3rdParty/rocksdb/v5.16.X/java/rocksjni/compaction_options_fifo.cc +++ b/3rdParty/rocksdb/v5.18.X/java/rocksjni/compaction_options_fifo.cc @@ -46,6 +46,53 @@ jlong Java_org_rocksdb_CompactionOptionsFIFO_maxTableFilesSize(JNIEnv* /*env*/, return static_cast(opt->max_table_files_size); } +/* + * Class: org_rocksdb_CompactionOptionsFIFO + * Method: setTtl + * Signature: (JJ)V + */ +void Java_org_rocksdb_CompactionOptionsFIFO_setTtl(JNIEnv* /*env*/, + jobject /*jobj*/, + jlong jhandle, jlong ttl) { + auto* opt = reinterpret_cast(jhandle); + opt->ttl = static_cast(ttl); +} + +/* + * Class: org_rocksdb_CompactionOptionsFIFO + * Method: ttl + * Signature: (J)J + */ +jlong Java_org_rocksdb_CompactionOptionsFIFO_ttl(JNIEnv* /*env*/, + jobject /*jobj*/, + jlong jhandle) { + auto* opt = reinterpret_cast(jhandle); + return static_cast(opt->ttl); +} + +/* + * Class: org_rocksdb_CompactionOptionsFIFO + * Method: setAllowCompaction + * Signature: (JZ)V + */ +void Java_org_rocksdb_CompactionOptionsFIFO_setAllowCompaction( + JNIEnv* /*env*/, jobject /*jobj*/, jlong jhandle, + jboolean allow_compaction) { + auto* opt = reinterpret_cast(jhandle); + opt->allow_compaction = static_cast(allow_compaction); +} + +/* + * Class: org_rocksdb_CompactionOptionsFIFO + * Method: allowCompaction + * Signature: (J)Z + */ +jboolean Java_org_rocksdb_CompactionOptionsFIFO_allowCompaction( + JNIEnv* /*env*/, jobject /*jobj*/, jlong jhandle) { + auto* opt = reinterpret_cast(jhandle); + return static_cast(opt->allow_compaction); +} + /* * Class: org_rocksdb_CompactionOptionsFIFO * Method: disposeInternal diff --git a/3rdParty/rocksdb/v5.16.X/java/rocksjni/compaction_options_universal.cc b/3rdParty/rocksdb/v5.18.X/java/rocksjni/compaction_options_universal.cc similarity index 100% rename from 3rdParty/rocksdb/v5.16.X/java/rocksjni/compaction_options_universal.cc rename to 3rdParty/rocksdb/v5.18.X/java/rocksjni/compaction_options_universal.cc diff --git a/3rdParty/rocksdb/v5.16.X/java/rocksjni/comparator.cc b/3rdParty/rocksdb/v5.18.X/java/rocksjni/comparator.cc similarity index 100% rename from 3rdParty/rocksdb/v5.16.X/java/rocksjni/comparator.cc rename to 3rdParty/rocksdb/v5.18.X/java/rocksjni/comparator.cc diff --git a/3rdParty/rocksdb/v5.16.X/java/rocksjni/comparatorjnicallback.cc b/3rdParty/rocksdb/v5.18.X/java/rocksjni/comparatorjnicallback.cc similarity index 100% rename from 3rdParty/rocksdb/v5.16.X/java/rocksjni/comparatorjnicallback.cc rename to 3rdParty/rocksdb/v5.18.X/java/rocksjni/comparatorjnicallback.cc diff --git a/3rdParty/rocksdb/v5.16.X/java/rocksjni/comparatorjnicallback.h b/3rdParty/rocksdb/v5.18.X/java/rocksjni/comparatorjnicallback.h similarity index 100% rename from 3rdParty/rocksdb/v5.16.X/java/rocksjni/comparatorjnicallback.h rename to 3rdParty/rocksdb/v5.18.X/java/rocksjni/comparatorjnicallback.h diff --git a/3rdParty/rocksdb/v5.16.X/java/rocksjni/compression_options.cc b/3rdParty/rocksdb/v5.18.X/java/rocksjni/compression_options.cc similarity index 100% rename from 3rdParty/rocksdb/v5.16.X/java/rocksjni/compression_options.cc rename to 3rdParty/rocksdb/v5.18.X/java/rocksjni/compression_options.cc diff --git a/3rdParty/rocksdb/v5.16.X/java/rocksjni/env.cc b/3rdParty/rocksdb/v5.18.X/java/rocksjni/env.cc similarity index 100% rename from 3rdParty/rocksdb/v5.16.X/java/rocksjni/env.cc rename to 3rdParty/rocksdb/v5.18.X/java/rocksjni/env.cc diff --git a/3rdParty/rocksdb/v5.16.X/java/rocksjni/env_options.cc b/3rdParty/rocksdb/v5.18.X/java/rocksjni/env_options.cc similarity index 100% rename from 3rdParty/rocksdb/v5.16.X/java/rocksjni/env_options.cc rename to 3rdParty/rocksdb/v5.18.X/java/rocksjni/env_options.cc diff --git a/3rdParty/rocksdb/v5.16.X/java/rocksjni/filter.cc b/3rdParty/rocksdb/v5.18.X/java/rocksjni/filter.cc similarity index 100% rename from 3rdParty/rocksdb/v5.16.X/java/rocksjni/filter.cc rename to 3rdParty/rocksdb/v5.18.X/java/rocksjni/filter.cc diff --git a/3rdParty/rocksdb/v5.16.X/java/rocksjni/ingest_external_file_options.cc b/3rdParty/rocksdb/v5.18.X/java/rocksjni/ingest_external_file_options.cc similarity index 100% rename from 3rdParty/rocksdb/v5.16.X/java/rocksjni/ingest_external_file_options.cc rename to 3rdParty/rocksdb/v5.18.X/java/rocksjni/ingest_external_file_options.cc diff --git a/3rdParty/rocksdb/v5.16.X/java/rocksjni/iterator.cc b/3rdParty/rocksdb/v5.18.X/java/rocksjni/iterator.cc similarity index 100% rename from 3rdParty/rocksdb/v5.16.X/java/rocksjni/iterator.cc rename to 3rdParty/rocksdb/v5.18.X/java/rocksjni/iterator.cc diff --git a/3rdParty/rocksdb/v5.16.X/java/rocksjni/jnicallback.cc b/3rdParty/rocksdb/v5.18.X/java/rocksjni/jnicallback.cc similarity index 100% rename from 3rdParty/rocksdb/v5.16.X/java/rocksjni/jnicallback.cc rename to 3rdParty/rocksdb/v5.18.X/java/rocksjni/jnicallback.cc diff --git a/3rdParty/rocksdb/v5.16.X/java/rocksjni/jnicallback.h b/3rdParty/rocksdb/v5.18.X/java/rocksjni/jnicallback.h similarity index 100% rename from 3rdParty/rocksdb/v5.16.X/java/rocksjni/jnicallback.h rename to 3rdParty/rocksdb/v5.18.X/java/rocksjni/jnicallback.h diff --git a/3rdParty/rocksdb/v5.16.X/java/rocksjni/loggerjnicallback.cc b/3rdParty/rocksdb/v5.18.X/java/rocksjni/loggerjnicallback.cc similarity index 100% rename from 3rdParty/rocksdb/v5.16.X/java/rocksjni/loggerjnicallback.cc rename to 3rdParty/rocksdb/v5.18.X/java/rocksjni/loggerjnicallback.cc diff --git a/3rdParty/rocksdb/v5.16.X/java/rocksjni/loggerjnicallback.h b/3rdParty/rocksdb/v5.18.X/java/rocksjni/loggerjnicallback.h similarity index 100% rename from 3rdParty/rocksdb/v5.16.X/java/rocksjni/loggerjnicallback.h rename to 3rdParty/rocksdb/v5.18.X/java/rocksjni/loggerjnicallback.h diff --git a/3rdParty/rocksdb/v5.16.X/java/rocksjni/lru_cache.cc b/3rdParty/rocksdb/v5.18.X/java/rocksjni/lru_cache.cc similarity index 100% rename from 3rdParty/rocksdb/v5.16.X/java/rocksjni/lru_cache.cc rename to 3rdParty/rocksdb/v5.18.X/java/rocksjni/lru_cache.cc diff --git a/3rdParty/rocksdb/v5.18.X/java/rocksjni/memory_util.cc b/3rdParty/rocksdb/v5.18.X/java/rocksjni/memory_util.cc new file mode 100644 index 0000000000..9c2bfd04e2 --- /dev/null +++ b/3rdParty/rocksdb/v5.18.X/java/rocksjni/memory_util.cc @@ -0,0 +1,100 @@ +// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. +// This source code is licensed under both the GPLv2 (found in the +// COPYING file in the root directory) and Apache 2.0 License +// (found in the LICENSE.Apache file in the root directory). + +#include +#include +#include +#include +#include + +#include "include/org_rocksdb_MemoryUtil.h" + +#include "rocksjni/portal.h" + +#include "rocksdb/utilities/memory_util.h" + + +/* + * Class: org_rocksdb_MemoryUtil + * Method: getApproximateMemoryUsageByType + * Signature: ([J[J)Ljava/util/Map; + */ +jobject Java_org_rocksdb_MemoryUtil_getApproximateMemoryUsageByType( + JNIEnv *env, jclass /*jclazz*/, jlongArray jdb_handles, jlongArray jcache_handles) { + + std::vector dbs; + jsize db_handle_count = env->GetArrayLength(jdb_handles); + if(db_handle_count > 0) { + jlong *ptr_jdb_handles = env->GetLongArrayElements(jdb_handles, nullptr); + if (ptr_jdb_handles == nullptr) { + // exception thrown: OutOfMemoryError + return nullptr; + } + for (jsize i = 0; i < db_handle_count; i++) { + dbs.push_back(reinterpret_cast(ptr_jdb_handles[i])); + } + env->ReleaseLongArrayElements(jdb_handles, ptr_jdb_handles, JNI_ABORT); + } + + std::unordered_set cache_set; + jsize cache_handle_count = env->GetArrayLength(jcache_handles); + if(cache_handle_count > 0) { + jlong *ptr_jcache_handles = env->GetLongArrayElements(jcache_handles, nullptr); + if (ptr_jcache_handles == nullptr) { + // exception thrown: OutOfMemoryError + return nullptr; + } + for (jsize i = 0; i < cache_handle_count; i++) { + auto *cache_ptr = + reinterpret_cast *>(ptr_jcache_handles[i]); + cache_set.insert(cache_ptr->get()); + } + env->ReleaseLongArrayElements(jcache_handles, ptr_jcache_handles, JNI_ABORT); + } + + std::map usage_by_type; + if(rocksdb::MemoryUtil::GetApproximateMemoryUsageByType(dbs, cache_set, &usage_by_type) != rocksdb::Status::OK()) { + // Non-OK status + return nullptr; + } + + jobject jusage_by_type = rocksdb::HashMapJni::construct( + env, static_cast(usage_by_type.size())); + if (jusage_by_type == nullptr) { + // exception occurred + return nullptr; + } + const rocksdb::HashMapJni::FnMapKV + fn_map_kv = + [env](const std::pair& pair) { + // Construct key + const jobject jusage_type = + rocksdb::ByteJni::valueOf(env, rocksdb::MemoryUsageTypeJni::toJavaMemoryUsageType(pair.first)); + if (jusage_type == nullptr) { + // an error occurred + return std::unique_ptr>(nullptr); + } + // Construct value + const jobject jusage_value = + rocksdb::LongJni::valueOf(env, pair.second); + if (jusage_value == nullptr) { + // an error occurred + return std::unique_ptr>(nullptr); + } + // Construct and return pointer to pair of jobjects + return std::unique_ptr>( + new std::pair(jusage_type, + jusage_value)); + }; + + if (!rocksdb::HashMapJni::putAll(env, jusage_by_type, usage_by_type.begin(), + usage_by_type.end(), fn_map_kv)) { + // exception occcurred + jusage_by_type = nullptr; + } + + return jusage_by_type; + +} diff --git a/3rdParty/rocksdb/v5.16.X/java/rocksjni/memtablejni.cc b/3rdParty/rocksdb/v5.18.X/java/rocksjni/memtablejni.cc similarity index 100% rename from 3rdParty/rocksdb/v5.16.X/java/rocksjni/memtablejni.cc rename to 3rdParty/rocksdb/v5.18.X/java/rocksjni/memtablejni.cc diff --git a/3rdParty/rocksdb/v5.16.X/java/rocksjni/merge_operator.cc b/3rdParty/rocksdb/v5.18.X/java/rocksjni/merge_operator.cc similarity index 65% rename from 3rdParty/rocksdb/v5.16.X/java/rocksjni/merge_operator.cc rename to 3rdParty/rocksdb/v5.18.X/java/rocksjni/merge_operator.cc index 782153f571..e06a06f7e3 100644 --- a/3rdParty/rocksdb/v5.16.X/java/rocksjni/merge_operator.cc +++ b/3rdParty/rocksdb/v5.18.X/java/rocksjni/merge_operator.cc @@ -13,6 +13,7 @@ #include #include "include/org_rocksdb_StringAppendOperator.h" +#include "include/org_rocksdb_UInt64AddOperator.h" #include "rocksdb/db.h" #include "rocksdb/memtablerep.h" #include "rocksdb/merge_operator.h" @@ -47,3 +48,28 @@ void Java_org_rocksdb_StringAppendOperator_disposeInternal(JNIEnv* /*env*/, reinterpret_cast*>(jhandle); delete sptr_string_append_op; // delete std::shared_ptr } + +/* + * Class: org_rocksdb_UInt64AddOperator + * Method: newSharedUInt64AddOperator + * Signature: ()J + */ +jlong Java_org_rocksdb_UInt64AddOperator_newSharedUInt64AddOperator( + JNIEnv* /*env*/, jclass /*jclazz*/) { + auto* sptr_uint64_add_op = new std::shared_ptr( + rocksdb::MergeOperators::CreateUInt64AddOperator()); + return reinterpret_cast(sptr_uint64_add_op); +} + +/* + * Class: org_rocksdb_UInt64AddOperator + * Method: disposeInternal + * Signature: (J)V + */ +void Java_org_rocksdb_UInt64AddOperator_disposeInternal(JNIEnv* /*env*/, + jobject /*jobj*/, + jlong jhandle) { + auto* sptr_uint64_add_op = + reinterpret_cast*>(jhandle); + delete sptr_uint64_add_op; // delete std::shared_ptr +} diff --git a/3rdParty/rocksdb/v5.16.X/java/rocksjni/native_comparator_wrapper_test.cc b/3rdParty/rocksdb/v5.18.X/java/rocksjni/native_comparator_wrapper_test.cc similarity index 100% rename from 3rdParty/rocksdb/v5.16.X/java/rocksjni/native_comparator_wrapper_test.cc rename to 3rdParty/rocksdb/v5.18.X/java/rocksjni/native_comparator_wrapper_test.cc diff --git a/3rdParty/rocksdb/v5.16.X/java/rocksjni/optimistic_transaction_db.cc b/3rdParty/rocksdb/v5.18.X/java/rocksjni/optimistic_transaction_db.cc similarity index 100% rename from 3rdParty/rocksdb/v5.16.X/java/rocksjni/optimistic_transaction_db.cc rename to 3rdParty/rocksdb/v5.18.X/java/rocksjni/optimistic_transaction_db.cc diff --git a/3rdParty/rocksdb/v5.16.X/java/rocksjni/optimistic_transaction_options.cc b/3rdParty/rocksdb/v5.18.X/java/rocksjni/optimistic_transaction_options.cc similarity index 100% rename from 3rdParty/rocksdb/v5.16.X/java/rocksjni/optimistic_transaction_options.cc rename to 3rdParty/rocksdb/v5.18.X/java/rocksjni/optimistic_transaction_options.cc diff --git a/3rdParty/rocksdb/v5.16.X/java/rocksjni/options.cc b/3rdParty/rocksdb/v5.18.X/java/rocksjni/options.cc similarity index 99% rename from 3rdParty/rocksdb/v5.16.X/java/rocksjni/options.cc rename to 3rdParty/rocksdb/v5.18.X/java/rocksjni/options.cc index 9aed80e1e6..342ee3e9e4 100644 --- a/3rdParty/rocksdb/v5.16.X/java/rocksjni/options.cc +++ b/3rdParty/rocksdb/v5.18.X/java/rocksjni/options.cc @@ -250,6 +250,20 @@ void Java_org_rocksdb_Options_setWriteBufferSize(JNIEnv* env, jobject /*jobj*/, } } +/* + * Class: org_rocksdb_Options + * Method: setWriteBufferManager + * Signature: (JJ)V + */ +void Java_org_rocksdb_Options_setWriteBufferManager(JNIEnv* /*env*/, jobject /*jobj*/, + jlong joptions_handle, + jlong jwrite_buffer_manager_handle) { + auto* write_buffer_manager = + reinterpret_cast *>(jwrite_buffer_manager_handle); + reinterpret_cast(joptions_handle)->write_buffer_manager = + *write_buffer_manager; +} + /* * Class: org_rocksdb_Options * Method: writeBufferSize @@ -1956,8 +1970,8 @@ jbyte Java_org_rocksdb_Options_compressionType(JNIEnv* /*env*/, * @param jcompression_levels A reference to a java byte array * where each byte indicates a compression level * - * @return A unique_ptr to the vector, or unique_ptr(nullptr) if a JNI exception - * occurs + * @return A std::unique_ptr to the vector, or std::unique_ptr(nullptr) if a JNI + * exception occurs */ std::unique_ptr> rocksdb_compression_vector_helper(JNIEnv* env, jbyteArray jcompression_levels) { @@ -5518,6 +5532,20 @@ void Java_org_rocksdb_DBOptions_setDbWriteBufferSize( opt->db_write_buffer_size = static_cast(jdb_write_buffer_size); } +/* + * Class: org_rocksdb_DBOptions + * Method: setWriteBufferManager + * Signature: (JJ)V + */ +void Java_org_rocksdb_DBOptions_setWriteBufferManager(JNIEnv* /*env*/, jobject /*jobj*/, + jlong jdb_options_handle, + jlong jwrite_buffer_manager_handle) { + auto* write_buffer_manager = + reinterpret_cast *>(jwrite_buffer_manager_handle); + reinterpret_cast(jdb_options_handle)->write_buffer_manager = + *write_buffer_manager; +} + /* * Class: org_rocksdb_DBOptions * Method: dbWriteBufferSize @@ -6525,6 +6553,31 @@ jlong Java_org_rocksdb_ReadOptions_iterateUpperBound(JNIEnv* /*env*/, return reinterpret_cast(upper_bound_slice_handle); } +/* + * Class: org_rocksdb_ReadOptions + * Method: setIterateLowerBound + * Signature: (JJ)I + */ +void Java_org_rocksdb_ReadOptions_setIterateLowerBound( + JNIEnv* /*env*/, jobject /*jobj*/, jlong jhandle, + jlong jlower_bound_slice_handle) { + reinterpret_cast(jhandle)->iterate_lower_bound = + reinterpret_cast(jlower_bound_slice_handle); +} + +/* + * Class: org_rocksdb_ReadOptions + * Method: iterateLowerBound + * Signature: (J)J + */ +jlong Java_org_rocksdb_ReadOptions_iterateLowerBound(JNIEnv* /*env*/, + jobject /*jobj*/, + jlong jhandle) { + auto& lower_bound_slice_handle = + reinterpret_cast(jhandle)->iterate_lower_bound; + return reinterpret_cast(lower_bound_slice_handle); +} + ///////////////////////////////////////////////////////////////////// // rocksdb::ComparatorOptions diff --git a/3rdParty/rocksdb/v5.16.X/java/rocksjni/options_util.cc b/3rdParty/rocksdb/v5.18.X/java/rocksjni/options_util.cc similarity index 100% rename from 3rdParty/rocksdb/v5.16.X/java/rocksjni/options_util.cc rename to 3rdParty/rocksdb/v5.18.X/java/rocksjni/options_util.cc diff --git a/3rdParty/rocksdb/v5.16.X/java/rocksjni/portal.h b/3rdParty/rocksdb/v5.18.X/java/rocksjni/portal.h similarity index 98% rename from 3rdParty/rocksdb/v5.16.X/java/rocksjni/portal.h rename to 3rdParty/rocksdb/v5.18.X/java/rocksjni/portal.h index a0d1846a65..0bf2867c1c 100644 --- a/3rdParty/rocksdb/v5.16.X/java/rocksjni/portal.h +++ b/3rdParty/rocksdb/v5.18.X/java/rocksjni/portal.h @@ -26,6 +26,7 @@ #include "rocksdb/rate_limiter.h" #include "rocksdb/status.h" #include "rocksdb/utilities/backupable_db.h" +#include "rocksdb/utilities/memory_util.h" #include "rocksdb/utilities/transaction_db.h" #include "rocksdb/utilities/write_batch_with_index.h" #include "rocksjni/compaction_filter_factory_jnicallback.h" @@ -2251,7 +2252,7 @@ class ByteJni : public JavaClass { * @param env A pointer to the Java environment * * @return The Java Method ID or nullptr if the class or method id could not - * be retieved + * be retrieved */ static jmethodID getByteValueMethod(JNIEnv* env) { jclass clazz = getJClass(env); @@ -2264,6 +2265,39 @@ class ByteJni : public JavaClass { assert(mid != nullptr); return mid; } + + /** + * Calls the Java Method: Byte#valueOf, returning a constructed Byte jobject + * + * @param env A pointer to the Java environment + * + * @return A constructing Byte object or nullptr if the class or method id could not + * be retrieved, or an exception occurred + */ + static jobject valueOf(JNIEnv* env, jbyte jprimitive_byte) { + jclass clazz = getJClass(env); + if (clazz == nullptr) { + // exception occurred accessing class + return nullptr; + } + + static jmethodID mid = + env->GetStaticMethodID(clazz, "valueOf", "(B)Ljava/lang/Byte;"); + if (mid == nullptr) { + // exception thrown: NoSuchMethodException or OutOfMemoryError + return nullptr; + } + + const jobject jbyte_obj = + env->CallStaticObjectMethod(clazz, mid, jprimitive_byte); + if (env->ExceptionCheck()) { + // exception occurred + return nullptr; + } + + return jbyte_obj; + } + }; // The portal class for java.lang.StringBuilder @@ -3345,8 +3379,12 @@ class TickerTypeJni { return 0x5D; case rocksdb::Tickers::NUMBER_MULTIGET_KEYS_FOUND: return 0x5E; - case rocksdb::Tickers::TICKER_ENUM_MAX: + case rocksdb::Tickers::NO_ITERATOR_CREATED: return 0x5F; + case rocksdb::Tickers::NO_ITERATOR_DELETED: + return 0x60; + case rocksdb::Tickers::TICKER_ENUM_MAX: + return 0x61; default: // undefined/default @@ -3549,6 +3587,10 @@ class TickerTypeJni { case 0x5E: return rocksdb::Tickers::NUMBER_MULTIGET_KEYS_FOUND; case 0x5F: + return rocksdb::Tickers::NO_ITERATOR_CREATED; + case 0x60: + return rocksdb::Tickers::NO_ITERATOR_DELETED; + case 0x61: return rocksdb::Tickers::TICKER_ENUM_MAX; default: @@ -3795,6 +3837,48 @@ class RateLimiterModeJni { } }; +// The portal class for org.rocksdb.MemoryUsageType +class MemoryUsageTypeJni { +public: + // Returns the equivalent org.rocksdb.MemoryUsageType for the provided + // C++ rocksdb::MemoryUtil::UsageType enum + static jbyte toJavaMemoryUsageType( + const rocksdb::MemoryUtil::UsageType& usage_type) { + switch(usage_type) { + case rocksdb::MemoryUtil::UsageType::kMemTableTotal: + return 0x0; + case rocksdb::MemoryUtil::UsageType::kMemTableUnFlushed: + return 0x1; + case rocksdb::MemoryUtil::UsageType::kTableReadersTotal: + return 0x2; + case rocksdb::MemoryUtil::UsageType::kCacheTotal: + return 0x3; + default: + // undefined: use kNumUsageTypes + return 0x4; + } + } + + // Returns the equivalent C++ rocksdb::MemoryUtil::UsageType enum for the + // provided Java org.rocksdb.MemoryUsageType + static rocksdb::MemoryUtil::UsageType toCppMemoryUsageType( + jbyte usage_type) { + switch(usage_type) { + case 0x0: + return rocksdb::MemoryUtil::UsageType::kMemTableTotal; + case 0x1: + return rocksdb::MemoryUtil::UsageType::kMemTableUnFlushed; + case 0x2: + return rocksdb::MemoryUtil::UsageType::kTableReadersTotal; + case 0x3: + return rocksdb::MemoryUtil::UsageType::kCacheTotal; + default: + // undefined/default: use kNumUsageTypes + return rocksdb::MemoryUtil::UsageType::kNumUsageTypes; + } + } +}; + // The portal class for org.rocksdb.Transaction class TransactionJni : public JavaClass { public: diff --git a/3rdParty/rocksdb/v5.16.X/java/rocksjni/ratelimiterjni.cc b/3rdParty/rocksdb/v5.18.X/java/rocksjni/ratelimiterjni.cc similarity index 100% rename from 3rdParty/rocksdb/v5.16.X/java/rocksjni/ratelimiterjni.cc rename to 3rdParty/rocksdb/v5.18.X/java/rocksjni/ratelimiterjni.cc diff --git a/3rdParty/rocksdb/v5.16.X/java/rocksjni/remove_emptyvalue_compactionfilterjni.cc b/3rdParty/rocksdb/v5.18.X/java/rocksjni/remove_emptyvalue_compactionfilterjni.cc similarity index 100% rename from 3rdParty/rocksdb/v5.16.X/java/rocksjni/remove_emptyvalue_compactionfilterjni.cc rename to 3rdParty/rocksdb/v5.18.X/java/rocksjni/remove_emptyvalue_compactionfilterjni.cc diff --git a/3rdParty/rocksdb/v5.16.X/java/rocksjni/restorejni.cc b/3rdParty/rocksdb/v5.18.X/java/rocksjni/restorejni.cc similarity index 100% rename from 3rdParty/rocksdb/v5.16.X/java/rocksjni/restorejni.cc rename to 3rdParty/rocksdb/v5.18.X/java/rocksjni/restorejni.cc diff --git a/3rdParty/rocksdb/v5.16.X/java/rocksjni/rocks_callback_object.cc b/3rdParty/rocksdb/v5.18.X/java/rocksjni/rocks_callback_object.cc similarity index 100% rename from 3rdParty/rocksdb/v5.16.X/java/rocksjni/rocks_callback_object.cc rename to 3rdParty/rocksdb/v5.18.X/java/rocksjni/rocks_callback_object.cc diff --git a/3rdParty/rocksdb/v5.16.X/java/rocksjni/rocksdb_exception_test.cc b/3rdParty/rocksdb/v5.18.X/java/rocksjni/rocksdb_exception_test.cc similarity index 100% rename from 3rdParty/rocksdb/v5.16.X/java/rocksjni/rocksdb_exception_test.cc rename to 3rdParty/rocksdb/v5.18.X/java/rocksjni/rocksdb_exception_test.cc diff --git a/3rdParty/rocksdb/v5.16.X/java/rocksjni/rocksjni.cc b/3rdParty/rocksdb/v5.18.X/java/rocksjni/rocksjni.cc similarity index 99% rename from 3rdParty/rocksdb/v5.16.X/java/rocksjni/rocksjni.cc rename to 3rdParty/rocksdb/v5.18.X/java/rocksjni/rocksjni.cc index c5b9fd591c..6e50c32f7a 100644 --- a/3rdParty/rocksdb/v5.16.X/java/rocksjni/rocksjni.cc +++ b/3rdParty/rocksdb/v5.18.X/java/rocksjni/rocksjni.cc @@ -1844,6 +1844,32 @@ jlong Java_org_rocksdb_RocksDB_getLongProperty__JJLjava_lang_String_2I( return 0; } +/* + * Class: org_rocksdb_RocksDB + * Method: getAggregatedLongProperty + * Signature: (JLjava/lang/String;I)J + */ +jlong Java_org_rocksdb_RocksDB_getAggregatedLongProperty( + JNIEnv* env, jobject, jlong db_handle, jstring jproperty, jint jproperty_len) { + const char* property = env->GetStringUTFChars(jproperty, nullptr); + if (property == nullptr) { + return 0; + } + rocksdb::Slice property_slice(property, jproperty_len); + auto* db = reinterpret_cast(db_handle); + uint64_t property_value = 0; + bool retCode = db->GetAggregatedIntProperty(property_slice, &property_value); + env->ReleaseStringUTFChars(jproperty, property); + + if (retCode) { + return property_value; + } + + rocksdb::RocksDBExceptionJni::ThrowNew(env, rocksdb::Status::NotFound()); + return 0; +} + + ////////////////////////////////////////////////////////////////////////////// // rocksdb::DB::Flush diff --git a/3rdParty/rocksdb/v5.16.X/java/rocksjni/slice.cc b/3rdParty/rocksdb/v5.18.X/java/rocksjni/slice.cc similarity index 100% rename from 3rdParty/rocksdb/v5.16.X/java/rocksjni/slice.cc rename to 3rdParty/rocksdb/v5.18.X/java/rocksjni/slice.cc diff --git a/3rdParty/rocksdb/v5.16.X/java/rocksjni/snapshot.cc b/3rdParty/rocksdb/v5.18.X/java/rocksjni/snapshot.cc similarity index 100% rename from 3rdParty/rocksdb/v5.16.X/java/rocksjni/snapshot.cc rename to 3rdParty/rocksdb/v5.18.X/java/rocksjni/snapshot.cc diff --git a/3rdParty/rocksdb/v5.16.X/java/rocksjni/sst_file_manager.cc b/3rdParty/rocksdb/v5.18.X/java/rocksjni/sst_file_manager.cc similarity index 100% rename from 3rdParty/rocksdb/v5.16.X/java/rocksjni/sst_file_manager.cc rename to 3rdParty/rocksdb/v5.18.X/java/rocksjni/sst_file_manager.cc diff --git a/3rdParty/rocksdb/v5.16.X/java/rocksjni/sst_file_writerjni.cc b/3rdParty/rocksdb/v5.18.X/java/rocksjni/sst_file_writerjni.cc similarity index 100% rename from 3rdParty/rocksdb/v5.16.X/java/rocksjni/sst_file_writerjni.cc rename to 3rdParty/rocksdb/v5.18.X/java/rocksjni/sst_file_writerjni.cc diff --git a/3rdParty/rocksdb/v5.16.X/java/rocksjni/statistics.cc b/3rdParty/rocksdb/v5.18.X/java/rocksjni/statistics.cc similarity index 100% rename from 3rdParty/rocksdb/v5.16.X/java/rocksjni/statistics.cc rename to 3rdParty/rocksdb/v5.18.X/java/rocksjni/statistics.cc diff --git a/3rdParty/rocksdb/v5.16.X/java/rocksjni/statisticsjni.cc b/3rdParty/rocksdb/v5.18.X/java/rocksjni/statisticsjni.cc similarity index 92% rename from 3rdParty/rocksdb/v5.16.X/java/rocksjni/statisticsjni.cc rename to 3rdParty/rocksdb/v5.18.X/java/rocksjni/statisticsjni.cc index 3ac1e5b413..8fddc437a0 100644 --- a/3rdParty/rocksdb/v5.16.X/java/rocksjni/statisticsjni.cc +++ b/3rdParty/rocksdb/v5.18.X/java/rocksjni/statisticsjni.cc @@ -11,11 +11,11 @@ namespace rocksdb { StatisticsJni::StatisticsJni(std::shared_ptr stats) - : StatisticsImpl(stats, false), m_ignore_histograms() { + : StatisticsImpl(stats), m_ignore_histograms() { } StatisticsJni::StatisticsJni(std::shared_ptr stats, - const std::set ignore_histograms) : StatisticsImpl(stats, false), + const std::set ignore_histograms) : StatisticsImpl(stats), m_ignore_histograms(ignore_histograms) { } diff --git a/3rdParty/rocksdb/v5.16.X/java/rocksjni/statisticsjni.h b/3rdParty/rocksdb/v5.18.X/java/rocksjni/statisticsjni.h similarity index 100% rename from 3rdParty/rocksdb/v5.16.X/java/rocksjni/statisticsjni.h rename to 3rdParty/rocksdb/v5.18.X/java/rocksjni/statisticsjni.h diff --git a/3rdParty/rocksdb/v5.16.X/java/rocksjni/table.cc b/3rdParty/rocksdb/v5.18.X/java/rocksjni/table.cc similarity index 89% rename from 3rdParty/rocksdb/v5.16.X/java/rocksjni/table.cc rename to 3rdParty/rocksdb/v5.18.X/java/rocksjni/table.cc index 5f5f8cd2ab..3dbd13280a 100644 --- a/3rdParty/rocksdb/v5.16.X/java/rocksjni/table.cc +++ b/3rdParty/rocksdb/v5.18.X/java/rocksjni/table.cc @@ -37,7 +37,7 @@ jlong Java_org_rocksdb_PlainTableConfig_newTableFactoryHandle( /* * Class: org_rocksdb_BlockBasedTableConfig * Method: newTableFactoryHandle - * Signature: (ZJIJJIIZIZZZJIBBI)J + * Signature: (ZJIJJIIZJZZZZJZZJIBBI)J */ jlong Java_org_rocksdb_BlockBasedTableConfig_newTableFactoryHandle( JNIEnv * /*env*/, jobject /*jobj*/, jboolean no_block_cache, @@ -45,7 +45,10 @@ jlong Java_org_rocksdb_BlockBasedTableConfig_newTableFactoryHandle( jlong block_size, jint block_size_deviation, jint block_restart_interval, jboolean whole_key_filtering, jlong jfilter_policy, jboolean cache_index_and_filter_blocks, + jboolean cache_index_and_filter_blocks_with_high_priority, jboolean pin_l0_filter_and_index_blocks_in_cache, + jboolean partition_filters, jlong metadata_block_size, + jboolean pin_top_level_index_and_filter, jboolean hash_index_allow_collision, jlong block_cache_compressed_size, jint block_cache_compressd_num_shard_bits, jbyte jchecksum_type, jbyte jindex_type, jint jformat_version) { @@ -77,8 +80,13 @@ jlong Java_org_rocksdb_BlockBasedTableConfig_newTableFactoryHandle( options.filter_policy = *pFilterPolicy; } options.cache_index_and_filter_blocks = cache_index_and_filter_blocks; + options.cache_index_and_filter_blocks_with_high_priority = + cache_index_and_filter_blocks_with_high_priority; options.pin_l0_filter_and_index_blocks_in_cache = pin_l0_filter_and_index_blocks_in_cache; + options.partition_filters = partition_filters; + options.metadata_block_size = metadata_block_size; + options.pin_top_level_index_and_filter = pin_top_level_index_and_filter; options.hash_index_allow_collision = hash_index_allow_collision; if (block_cache_compressed_size > 0) { if (block_cache_compressd_num_shard_bits > 0) { diff --git a/3rdParty/rocksdb/v5.16.X/java/rocksjni/transaction.cc b/3rdParty/rocksdb/v5.18.X/java/rocksjni/transaction.cc similarity index 99% rename from 3rdParty/rocksdb/v5.16.X/java/rocksjni/transaction.cc rename to 3rdParty/rocksdb/v5.18.X/java/rocksjni/transaction.cc index 43be095600..a29736df2d 100644 --- a/3rdParty/rocksdb/v5.16.X/java/rocksjni/transaction.cc +++ b/3rdParty/rocksdb/v5.18.X/java/rocksjni/transaction.cc @@ -612,7 +612,11 @@ void txn_write_kv_parts_helper(JNIEnv* env, const jint& jkey_parts_len, const jobjectArray& jvalue_parts, const jint& jvalue_parts_len) { +#ifndef DEBUG + (void) jvalue_parts_len; +#else assert(jkey_parts_len == jvalue_parts_len); +#endif auto key_parts = std::vector(); auto value_parts = std::vector(); diff --git a/3rdParty/rocksdb/v5.16.X/java/rocksjni/transaction_db.cc b/3rdParty/rocksdb/v5.18.X/java/rocksjni/transaction_db.cc similarity index 100% rename from 3rdParty/rocksdb/v5.16.X/java/rocksjni/transaction_db.cc rename to 3rdParty/rocksdb/v5.18.X/java/rocksjni/transaction_db.cc diff --git a/3rdParty/rocksdb/v5.16.X/java/rocksjni/transaction_db_options.cc b/3rdParty/rocksdb/v5.18.X/java/rocksjni/transaction_db_options.cc similarity index 100% rename from 3rdParty/rocksdb/v5.16.X/java/rocksjni/transaction_db_options.cc rename to 3rdParty/rocksdb/v5.18.X/java/rocksjni/transaction_db_options.cc diff --git a/3rdParty/rocksdb/v5.16.X/java/rocksjni/transaction_log.cc b/3rdParty/rocksdb/v5.18.X/java/rocksjni/transaction_log.cc similarity index 100% rename from 3rdParty/rocksdb/v5.16.X/java/rocksjni/transaction_log.cc rename to 3rdParty/rocksdb/v5.18.X/java/rocksjni/transaction_log.cc diff --git a/3rdParty/rocksdb/v5.16.X/java/rocksjni/transaction_notifier.cc b/3rdParty/rocksdb/v5.18.X/java/rocksjni/transaction_notifier.cc similarity index 100% rename from 3rdParty/rocksdb/v5.16.X/java/rocksjni/transaction_notifier.cc rename to 3rdParty/rocksdb/v5.18.X/java/rocksjni/transaction_notifier.cc diff --git a/3rdParty/rocksdb/v5.16.X/java/rocksjni/transaction_notifier_jnicallback.cc b/3rdParty/rocksdb/v5.18.X/java/rocksjni/transaction_notifier_jnicallback.cc similarity index 100% rename from 3rdParty/rocksdb/v5.16.X/java/rocksjni/transaction_notifier_jnicallback.cc rename to 3rdParty/rocksdb/v5.18.X/java/rocksjni/transaction_notifier_jnicallback.cc diff --git a/3rdParty/rocksdb/v5.16.X/java/rocksjni/transaction_notifier_jnicallback.h b/3rdParty/rocksdb/v5.18.X/java/rocksjni/transaction_notifier_jnicallback.h similarity index 100% rename from 3rdParty/rocksdb/v5.16.X/java/rocksjni/transaction_notifier_jnicallback.h rename to 3rdParty/rocksdb/v5.18.X/java/rocksjni/transaction_notifier_jnicallback.h diff --git a/3rdParty/rocksdb/v5.16.X/java/rocksjni/transaction_options.cc b/3rdParty/rocksdb/v5.18.X/java/rocksjni/transaction_options.cc similarity index 100% rename from 3rdParty/rocksdb/v5.16.X/java/rocksjni/transaction_options.cc rename to 3rdParty/rocksdb/v5.18.X/java/rocksjni/transaction_options.cc diff --git a/3rdParty/rocksdb/v5.16.X/java/rocksjni/ttl.cc b/3rdParty/rocksdb/v5.18.X/java/rocksjni/ttl.cc similarity index 100% rename from 3rdParty/rocksdb/v5.16.X/java/rocksjni/ttl.cc rename to 3rdParty/rocksdb/v5.18.X/java/rocksjni/ttl.cc diff --git a/3rdParty/rocksdb/v5.16.X/java/rocksjni/write_batch.cc b/3rdParty/rocksdb/v5.18.X/java/rocksjni/write_batch.cc similarity index 100% rename from 3rdParty/rocksdb/v5.16.X/java/rocksjni/write_batch.cc rename to 3rdParty/rocksdb/v5.18.X/java/rocksjni/write_batch.cc diff --git a/3rdParty/rocksdb/v5.16.X/java/rocksjni/write_batch_test.cc b/3rdParty/rocksdb/v5.18.X/java/rocksjni/write_batch_test.cc similarity index 100% rename from 3rdParty/rocksdb/v5.16.X/java/rocksjni/write_batch_test.cc rename to 3rdParty/rocksdb/v5.18.X/java/rocksjni/write_batch_test.cc diff --git a/3rdParty/rocksdb/v5.16.X/java/rocksjni/write_batch_with_index.cc b/3rdParty/rocksdb/v5.18.X/java/rocksjni/write_batch_with_index.cc similarity index 100% rename from 3rdParty/rocksdb/v5.16.X/java/rocksjni/write_batch_with_index.cc rename to 3rdParty/rocksdb/v5.18.X/java/rocksjni/write_batch_with_index.cc diff --git a/3rdParty/rocksdb/v5.18.X/java/rocksjni/write_buffer_manager.cc b/3rdParty/rocksdb/v5.18.X/java/rocksjni/write_buffer_manager.cc new file mode 100644 index 0000000000..043f69031c --- /dev/null +++ b/3rdParty/rocksdb/v5.18.X/java/rocksjni/write_buffer_manager.cc @@ -0,0 +1,38 @@ +// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. +// This source code is licensed under both the GPLv2 (found in the +// COPYING file in the root directory) and Apache 2.0 License +// (found in the LICENSE.Apache file in the root directory). + +#include + +#include "include/org_rocksdb_WriteBufferManager.h" + +#include "rocksdb/cache.h" +#include "rocksdb/write_buffer_manager.h" + +/* + * Class: org_rocksdb_WriteBufferManager + * Method: newWriteBufferManager + * Signature: (JJ)J + */ +jlong Java_org_rocksdb_WriteBufferManager_newWriteBufferManager( + JNIEnv* /*env*/, jclass /*jclazz*/, jlong jbuffer_size, jlong jcache_handle) { + auto* cache_ptr = + reinterpret_cast *>(jcache_handle); + auto* write_buffer_manager = new std::shared_ptr( + std::make_shared(jbuffer_size, *cache_ptr)); + return reinterpret_cast(write_buffer_manager); +} + +/* + * Class: org_rocksdb_WriteBufferManager + * Method: disposeInternal + * Signature: (J)V + */ +void Java_org_rocksdb_WriteBufferManager_disposeInternal( + JNIEnv* /*env*/, jobject /*jobj*/, jlong jhandle) { + auto* write_buffer_manager = + reinterpret_cast *>(jhandle); + assert(write_buffer_manager != nullptr); + delete write_buffer_manager; +} diff --git a/3rdParty/rocksdb/v5.16.X/java/rocksjni/writebatchhandlerjnicallback.cc b/3rdParty/rocksdb/v5.18.X/java/rocksjni/writebatchhandlerjnicallback.cc similarity index 99% rename from 3rdParty/rocksdb/v5.16.X/java/rocksjni/writebatchhandlerjnicallback.cc rename to 3rdParty/rocksdb/v5.18.X/java/rocksjni/writebatchhandlerjnicallback.cc index 69fe876590..bf9001110a 100644 --- a/3rdParty/rocksdb/v5.16.X/java/rocksjni/writebatchhandlerjnicallback.cc +++ b/3rdParty/rocksdb/v5.18.X/java/rocksjni/writebatchhandlerjnicallback.cc @@ -306,7 +306,11 @@ rocksdb::Status WriteBatchHandlerJniCallback::PutBlobIndexCF(uint32_t column_fam } rocksdb::Status WriteBatchHandlerJniCallback::MarkBeginPrepare(bool unprepare) { +#ifndef DEBUG + (void) unprepare; +#else assert(!unprepare); +#endif m_env->CallVoidMethod(m_jcallback_obj, m_jMarkBeginPrepareMethodId); // check for Exception, in-particular RocksDBException diff --git a/3rdParty/rocksdb/v5.16.X/java/rocksjni/writebatchhandlerjnicallback.h b/3rdParty/rocksdb/v5.18.X/java/rocksjni/writebatchhandlerjnicallback.h similarity index 100% rename from 3rdParty/rocksdb/v5.16.X/java/rocksjni/writebatchhandlerjnicallback.h rename to 3rdParty/rocksdb/v5.18.X/java/rocksjni/writebatchhandlerjnicallback.h diff --git a/3rdParty/rocksdb/v5.16.X/java/samples/src/main/java/OptimisticTransactionSample.java b/3rdParty/rocksdb/v5.18.X/java/samples/src/main/java/OptimisticTransactionSample.java similarity index 100% rename from 3rdParty/rocksdb/v5.16.X/java/samples/src/main/java/OptimisticTransactionSample.java rename to 3rdParty/rocksdb/v5.18.X/java/samples/src/main/java/OptimisticTransactionSample.java diff --git a/3rdParty/rocksdb/v5.16.X/java/samples/src/main/java/RocksDBColumnFamilySample.java b/3rdParty/rocksdb/v5.18.X/java/samples/src/main/java/RocksDBColumnFamilySample.java similarity index 100% rename from 3rdParty/rocksdb/v5.16.X/java/samples/src/main/java/RocksDBColumnFamilySample.java rename to 3rdParty/rocksdb/v5.18.X/java/samples/src/main/java/RocksDBColumnFamilySample.java diff --git a/3rdParty/rocksdb/v5.16.X/java/samples/src/main/java/RocksDBSample.java b/3rdParty/rocksdb/v5.18.X/java/samples/src/main/java/RocksDBSample.java similarity index 100% rename from 3rdParty/rocksdb/v5.16.X/java/samples/src/main/java/RocksDBSample.java rename to 3rdParty/rocksdb/v5.18.X/java/samples/src/main/java/RocksDBSample.java diff --git a/3rdParty/rocksdb/v5.16.X/java/samples/src/main/java/TransactionSample.java b/3rdParty/rocksdb/v5.18.X/java/samples/src/main/java/TransactionSample.java similarity index 100% rename from 3rdParty/rocksdb/v5.16.X/java/samples/src/main/java/TransactionSample.java rename to 3rdParty/rocksdb/v5.18.X/java/samples/src/main/java/TransactionSample.java diff --git a/3rdParty/rocksdb/v5.16.X/java/src/main/java/org/rocksdb/AbstractCompactionFilter.java b/3rdParty/rocksdb/v5.18.X/java/src/main/java/org/rocksdb/AbstractCompactionFilter.java similarity index 100% rename from 3rdParty/rocksdb/v5.16.X/java/src/main/java/org/rocksdb/AbstractCompactionFilter.java rename to 3rdParty/rocksdb/v5.18.X/java/src/main/java/org/rocksdb/AbstractCompactionFilter.java diff --git a/3rdParty/rocksdb/v5.16.X/java/src/main/java/org/rocksdb/AbstractCompactionFilterFactory.java b/3rdParty/rocksdb/v5.18.X/java/src/main/java/org/rocksdb/AbstractCompactionFilterFactory.java similarity index 100% rename from 3rdParty/rocksdb/v5.16.X/java/src/main/java/org/rocksdb/AbstractCompactionFilterFactory.java rename to 3rdParty/rocksdb/v5.18.X/java/src/main/java/org/rocksdb/AbstractCompactionFilterFactory.java diff --git a/3rdParty/rocksdb/v5.16.X/java/src/main/java/org/rocksdb/AbstractComparator.java b/3rdParty/rocksdb/v5.18.X/java/src/main/java/org/rocksdb/AbstractComparator.java similarity index 100% rename from 3rdParty/rocksdb/v5.16.X/java/src/main/java/org/rocksdb/AbstractComparator.java rename to 3rdParty/rocksdb/v5.18.X/java/src/main/java/org/rocksdb/AbstractComparator.java diff --git a/3rdParty/rocksdb/v5.16.X/java/src/main/java/org/rocksdb/AbstractImmutableNativeReference.java b/3rdParty/rocksdb/v5.18.X/java/src/main/java/org/rocksdb/AbstractImmutableNativeReference.java similarity index 100% rename from 3rdParty/rocksdb/v5.16.X/java/src/main/java/org/rocksdb/AbstractImmutableNativeReference.java rename to 3rdParty/rocksdb/v5.18.X/java/src/main/java/org/rocksdb/AbstractImmutableNativeReference.java diff --git a/3rdParty/rocksdb/v5.16.X/java/src/main/java/org/rocksdb/AbstractNativeReference.java b/3rdParty/rocksdb/v5.18.X/java/src/main/java/org/rocksdb/AbstractNativeReference.java similarity index 100% rename from 3rdParty/rocksdb/v5.16.X/java/src/main/java/org/rocksdb/AbstractNativeReference.java rename to 3rdParty/rocksdb/v5.18.X/java/src/main/java/org/rocksdb/AbstractNativeReference.java diff --git a/3rdParty/rocksdb/v5.16.X/java/src/main/java/org/rocksdb/AbstractRocksIterator.java b/3rdParty/rocksdb/v5.18.X/java/src/main/java/org/rocksdb/AbstractRocksIterator.java similarity index 100% rename from 3rdParty/rocksdb/v5.16.X/java/src/main/java/org/rocksdb/AbstractRocksIterator.java rename to 3rdParty/rocksdb/v5.18.X/java/src/main/java/org/rocksdb/AbstractRocksIterator.java diff --git a/3rdParty/rocksdb/v5.16.X/java/src/main/java/org/rocksdb/AbstractSlice.java b/3rdParty/rocksdb/v5.18.X/java/src/main/java/org/rocksdb/AbstractSlice.java similarity index 100% rename from 3rdParty/rocksdb/v5.16.X/java/src/main/java/org/rocksdb/AbstractSlice.java rename to 3rdParty/rocksdb/v5.18.X/java/src/main/java/org/rocksdb/AbstractSlice.java diff --git a/3rdParty/rocksdb/v5.16.X/java/src/main/java/org/rocksdb/AbstractTransactionNotifier.java b/3rdParty/rocksdb/v5.18.X/java/src/main/java/org/rocksdb/AbstractTransactionNotifier.java similarity index 100% rename from 3rdParty/rocksdb/v5.16.X/java/src/main/java/org/rocksdb/AbstractTransactionNotifier.java rename to 3rdParty/rocksdb/v5.18.X/java/src/main/java/org/rocksdb/AbstractTransactionNotifier.java diff --git a/3rdParty/rocksdb/v5.16.X/java/src/main/java/org/rocksdb/AbstractWriteBatch.java b/3rdParty/rocksdb/v5.18.X/java/src/main/java/org/rocksdb/AbstractWriteBatch.java similarity index 100% rename from 3rdParty/rocksdb/v5.16.X/java/src/main/java/org/rocksdb/AbstractWriteBatch.java rename to 3rdParty/rocksdb/v5.18.X/java/src/main/java/org/rocksdb/AbstractWriteBatch.java diff --git a/3rdParty/rocksdb/v5.16.X/java/src/main/java/org/rocksdb/AccessHint.java b/3rdParty/rocksdb/v5.18.X/java/src/main/java/org/rocksdb/AccessHint.java similarity index 100% rename from 3rdParty/rocksdb/v5.16.X/java/src/main/java/org/rocksdb/AccessHint.java rename to 3rdParty/rocksdb/v5.18.X/java/src/main/java/org/rocksdb/AccessHint.java diff --git a/3rdParty/rocksdb/v5.16.X/java/src/main/java/org/rocksdb/AdvancedColumnFamilyOptionsInterface.java b/3rdParty/rocksdb/v5.18.X/java/src/main/java/org/rocksdb/AdvancedColumnFamilyOptionsInterface.java similarity index 100% rename from 3rdParty/rocksdb/v5.16.X/java/src/main/java/org/rocksdb/AdvancedColumnFamilyOptionsInterface.java rename to 3rdParty/rocksdb/v5.18.X/java/src/main/java/org/rocksdb/AdvancedColumnFamilyOptionsInterface.java diff --git a/3rdParty/rocksdb/v5.16.X/java/src/main/java/org/rocksdb/AdvancedMutableColumnFamilyOptionsInterface.java b/3rdParty/rocksdb/v5.18.X/java/src/main/java/org/rocksdb/AdvancedMutableColumnFamilyOptionsInterface.java similarity index 100% rename from 3rdParty/rocksdb/v5.16.X/java/src/main/java/org/rocksdb/AdvancedMutableColumnFamilyOptionsInterface.java rename to 3rdParty/rocksdb/v5.18.X/java/src/main/java/org/rocksdb/AdvancedMutableColumnFamilyOptionsInterface.java diff --git a/3rdParty/rocksdb/v5.16.X/java/src/main/java/org/rocksdb/BackupEngine.java b/3rdParty/rocksdb/v5.18.X/java/src/main/java/org/rocksdb/BackupEngine.java similarity index 100% rename from 3rdParty/rocksdb/v5.16.X/java/src/main/java/org/rocksdb/BackupEngine.java rename to 3rdParty/rocksdb/v5.18.X/java/src/main/java/org/rocksdb/BackupEngine.java diff --git a/3rdParty/rocksdb/v5.16.X/java/src/main/java/org/rocksdb/BackupInfo.java b/3rdParty/rocksdb/v5.18.X/java/src/main/java/org/rocksdb/BackupInfo.java similarity index 100% rename from 3rdParty/rocksdb/v5.16.X/java/src/main/java/org/rocksdb/BackupInfo.java rename to 3rdParty/rocksdb/v5.18.X/java/src/main/java/org/rocksdb/BackupInfo.java diff --git a/3rdParty/rocksdb/v5.16.X/java/src/main/java/org/rocksdb/BackupableDBOptions.java b/3rdParty/rocksdb/v5.18.X/java/src/main/java/org/rocksdb/BackupableDBOptions.java similarity index 100% rename from 3rdParty/rocksdb/v5.16.X/java/src/main/java/org/rocksdb/BackupableDBOptions.java rename to 3rdParty/rocksdb/v5.18.X/java/src/main/java/org/rocksdb/BackupableDBOptions.java diff --git a/3rdParty/rocksdb/v5.16.X/java/src/main/java/org/rocksdb/BlockBasedTableConfig.java b/3rdParty/rocksdb/v5.18.X/java/src/main/java/org/rocksdb/BlockBasedTableConfig.java similarity index 78% rename from 3rdParty/rocksdb/v5.16.X/java/src/main/java/org/rocksdb/BlockBasedTableConfig.java rename to 3rdParty/rocksdb/v5.18.X/java/src/main/java/org/rocksdb/BlockBasedTableConfig.java index 2dbbc64d35..1032be6e79 100644 --- a/3rdParty/rocksdb/v5.16.X/java/src/main/java/org/rocksdb/BlockBasedTableConfig.java +++ b/3rdParty/rocksdb/v5.18.X/java/src/main/java/org/rocksdb/BlockBasedTableConfig.java @@ -22,7 +22,11 @@ public class BlockBasedTableConfig extends TableFormatConfig { wholeKeyFiltering_ = true; filter_ = null; cacheIndexAndFilterBlocks_ = false; + cacheIndexAndFilterBlocksWithHighPriority_ = false; pinL0FilterAndIndexBlocksInCache_ = false; + partitionFilters_ = false; + metadataBlockSize_ = 4096; + pinTopLevelIndexAndFilter_ = true; hashIndexAllowCollision_ = true; blockCacheCompressedSize_ = 0; blockCacheCompressedNumShardBits_ = 0; @@ -246,6 +250,31 @@ public class BlockBasedTableConfig extends TableFormatConfig { return this; } + /** + * Indicates if index and filter blocks will be treated as high-priority in the block cache. + * See note below about applicability. If not specified, defaults to false. + * + * @return if index and filter blocks will be treated as high-priority. + */ + public boolean cacheIndexAndFilterBlocksWithHighPriority() { + return cacheIndexAndFilterBlocksWithHighPriority_; + } + + /** + * If true, cache index and filter blocks with high priority. If set to true, + * depending on implementation of block cache, index and filter blocks may be + * less likely to be evicted than data blocks. + * + * @param cacheIndexAndFilterBlocksWithHighPriority if index and filter blocks + * will be treated as high-priority. + * @return the reference to the current config. + */ + public BlockBasedTableConfig setCacheIndexAndFilterBlocksWithHighPriority( + final boolean cacheIndexAndFilterBlocksWithHighPriority) { + cacheIndexAndFilterBlocksWithHighPriority_ = cacheIndexAndFilterBlocksWithHighPriority; + return this; + } + /** * Indicating if we'd like to pin L0 index/filter blocks to the block cache. If not specified, defaults to false. @@ -269,6 +298,70 @@ public class BlockBasedTableConfig extends TableFormatConfig { return this; } + /** + * Indicating if we're using partitioned filters. Defaults to false. + * + * @return if we're using partition filters. + */ + public boolean partitionFilters() { + return partitionFilters_; + } + + /** + * Use partitioned full filters for each SST file. This option is incompatible with + * block-based filters. + * + * @param partitionFilters use partition filters. + * @return the reference to the current config. + */ + public BlockBasedTableConfig setPartitionFilters(final boolean partitionFilters) { + partitionFilters_ = partitionFilters; + return this; + } + + /** + * @return block size for partitioned metadata. + */ + public long metadataBlockSize() { + return metadataBlockSize_; + } + + /** + * Set block size for partitioned metadata. + * + * @param metadataBlockSize Partitioned metadata block size. + * @return the reference to the current config. + */ + public BlockBasedTableConfig setMetadataBlockSize( + final long metadataBlockSize) { + metadataBlockSize_ = metadataBlockSize; + return this; + } + + /** + * Indicates if top-level index and filter blocks should be pinned. + * + * @return if top-level index and filter blocks should be pinned. + */ + public boolean pinTopLevelIndexAndFilter() { + return pinTopLevelIndexAndFilter_; + } + + /** + * If cacheIndexAndFilterBlocks is true and the below is true, then + * the top-level index of partitioned filter and index blocks are stored in + * the cache, but a reference is held in the "table reader" object so the + * blocks are pinned and only evicted from cache when the table reader is + * freed. This is not limited to l0 in LSM tree. + * + * @param pinTopLevelIndexAndFilter if top-level index and filter blocks should be pinned. + * @return the reference to the current config. + */ + public BlockBasedTableConfig setPinTopLevelIndexAndFilter(final boolean pinTopLevelIndexAndFilter) { + pinTopLevelIndexAndFilter_ = pinTopLevelIndexAndFilter; + return this; + } + /** * Influence the behavior when kHashSearch is used. if false, stores a precise prefix to block range mapping @@ -440,20 +533,27 @@ public class BlockBasedTableConfig extends TableFormatConfig { return newTableFactoryHandle(noBlockCache_, blockCacheSize_, blockCacheNumShardBits_, blockCacheHandle, blockSize_, blockSizeDeviation_, blockRestartInterval_, wholeKeyFiltering_, filterHandle, cacheIndexAndFilterBlocks_, - pinL0FilterAndIndexBlocksInCache_, hashIndexAllowCollision_, blockCacheCompressedSize_, - blockCacheCompressedNumShardBits_, checksumType_.getValue(), indexType_.getValue(), - formatVersion_); + cacheIndexAndFilterBlocksWithHighPriority_, pinL0FilterAndIndexBlocksInCache_, + partitionFilters_, metadataBlockSize_, pinTopLevelIndexAndFilter_, + hashIndexAllowCollision_, blockCacheCompressedSize_, blockCacheCompressedNumShardBits_, + checksumType_.getValue(), indexType_.getValue(), formatVersion_); } private native long newTableFactoryHandle(boolean noBlockCache, long blockCacheSize, int blockCacheNumShardBits, long blockCacheHandle, long blockSize, int blockSizeDeviation, int blockRestartInterval, boolean wholeKeyFiltering, long filterPolicyHandle, - boolean cacheIndexAndFilterBlocks, boolean pinL0FilterAndIndexBlocksInCache, - boolean hashIndexAllowCollision, long blockCacheCompressedSize, - int blockCacheCompressedNumShardBits, byte checkSumType, byte indexType, int formatVersion); + boolean cacheIndexAndFilterBlocks, boolean cacheIndexAndFilterBlocksWithHighPriority, + boolean pinL0FilterAndIndexBlocksInCache, boolean partitionFilters, long metadataBlockSize, + boolean pinTopLevelIndexAndFilter, boolean hashIndexAllowCollision, + long blockCacheCompressedSize, int blockCacheCompressedNumShardBits, + byte checkSumType, byte indexType, int formatVersion); private boolean cacheIndexAndFilterBlocks_; + private boolean cacheIndexAndFilterBlocksWithHighPriority_; private boolean pinL0FilterAndIndexBlocksInCache_; + private boolean partitionFilters_; + private long metadataBlockSize_; + private boolean pinTopLevelIndexAndFilter_; private IndexType indexType_; private boolean hashIndexAllowCollision_; private ChecksumType checksumType_; diff --git a/3rdParty/rocksdb/v5.16.X/java/src/main/java/org/rocksdb/BloomFilter.java b/3rdParty/rocksdb/v5.18.X/java/src/main/java/org/rocksdb/BloomFilter.java similarity index 100% rename from 3rdParty/rocksdb/v5.16.X/java/src/main/java/org/rocksdb/BloomFilter.java rename to 3rdParty/rocksdb/v5.18.X/java/src/main/java/org/rocksdb/BloomFilter.java diff --git a/3rdParty/rocksdb/v5.16.X/java/src/main/java/org/rocksdb/BuiltinComparator.java b/3rdParty/rocksdb/v5.18.X/java/src/main/java/org/rocksdb/BuiltinComparator.java similarity index 100% rename from 3rdParty/rocksdb/v5.16.X/java/src/main/java/org/rocksdb/BuiltinComparator.java rename to 3rdParty/rocksdb/v5.18.X/java/src/main/java/org/rocksdb/BuiltinComparator.java diff --git a/3rdParty/rocksdb/v5.16.X/java/src/main/java/org/rocksdb/Cache.java b/3rdParty/rocksdb/v5.18.X/java/src/main/java/org/rocksdb/Cache.java similarity index 100% rename from 3rdParty/rocksdb/v5.16.X/java/src/main/java/org/rocksdb/Cache.java rename to 3rdParty/rocksdb/v5.18.X/java/src/main/java/org/rocksdb/Cache.java diff --git a/3rdParty/rocksdb/v5.16.X/java/src/main/java/org/rocksdb/CassandraCompactionFilter.java b/3rdParty/rocksdb/v5.18.X/java/src/main/java/org/rocksdb/CassandraCompactionFilter.java similarity index 100% rename from 3rdParty/rocksdb/v5.16.X/java/src/main/java/org/rocksdb/CassandraCompactionFilter.java rename to 3rdParty/rocksdb/v5.18.X/java/src/main/java/org/rocksdb/CassandraCompactionFilter.java diff --git a/3rdParty/rocksdb/v5.16.X/java/src/main/java/org/rocksdb/CassandraValueMergeOperator.java b/3rdParty/rocksdb/v5.18.X/java/src/main/java/org/rocksdb/CassandraValueMergeOperator.java similarity index 100% rename from 3rdParty/rocksdb/v5.16.X/java/src/main/java/org/rocksdb/CassandraValueMergeOperator.java rename to 3rdParty/rocksdb/v5.18.X/java/src/main/java/org/rocksdb/CassandraValueMergeOperator.java diff --git a/3rdParty/rocksdb/v5.16.X/java/src/main/java/org/rocksdb/Checkpoint.java b/3rdParty/rocksdb/v5.18.X/java/src/main/java/org/rocksdb/Checkpoint.java similarity index 100% rename from 3rdParty/rocksdb/v5.16.X/java/src/main/java/org/rocksdb/Checkpoint.java rename to 3rdParty/rocksdb/v5.18.X/java/src/main/java/org/rocksdb/Checkpoint.java diff --git a/3rdParty/rocksdb/v5.16.X/java/src/main/java/org/rocksdb/ChecksumType.java b/3rdParty/rocksdb/v5.18.X/java/src/main/java/org/rocksdb/ChecksumType.java similarity index 100% rename from 3rdParty/rocksdb/v5.16.X/java/src/main/java/org/rocksdb/ChecksumType.java rename to 3rdParty/rocksdb/v5.18.X/java/src/main/java/org/rocksdb/ChecksumType.java diff --git a/3rdParty/rocksdb/v5.16.X/java/src/main/java/org/rocksdb/ClockCache.java b/3rdParty/rocksdb/v5.18.X/java/src/main/java/org/rocksdb/ClockCache.java similarity index 100% rename from 3rdParty/rocksdb/v5.16.X/java/src/main/java/org/rocksdb/ClockCache.java rename to 3rdParty/rocksdb/v5.18.X/java/src/main/java/org/rocksdb/ClockCache.java diff --git a/3rdParty/rocksdb/v5.16.X/java/src/main/java/org/rocksdb/ColumnFamilyDescriptor.java b/3rdParty/rocksdb/v5.18.X/java/src/main/java/org/rocksdb/ColumnFamilyDescriptor.java similarity index 100% rename from 3rdParty/rocksdb/v5.16.X/java/src/main/java/org/rocksdb/ColumnFamilyDescriptor.java rename to 3rdParty/rocksdb/v5.18.X/java/src/main/java/org/rocksdb/ColumnFamilyDescriptor.java diff --git a/3rdParty/rocksdb/v5.16.X/java/src/main/java/org/rocksdb/ColumnFamilyHandle.java b/3rdParty/rocksdb/v5.18.X/java/src/main/java/org/rocksdb/ColumnFamilyHandle.java similarity index 100% rename from 3rdParty/rocksdb/v5.16.X/java/src/main/java/org/rocksdb/ColumnFamilyHandle.java rename to 3rdParty/rocksdb/v5.18.X/java/src/main/java/org/rocksdb/ColumnFamilyHandle.java diff --git a/3rdParty/rocksdb/v5.16.X/java/src/main/java/org/rocksdb/ColumnFamilyOptions.java b/3rdParty/rocksdb/v5.18.X/java/src/main/java/org/rocksdb/ColumnFamilyOptions.java similarity index 100% rename from 3rdParty/rocksdb/v5.16.X/java/src/main/java/org/rocksdb/ColumnFamilyOptions.java rename to 3rdParty/rocksdb/v5.18.X/java/src/main/java/org/rocksdb/ColumnFamilyOptions.java diff --git a/3rdParty/rocksdb/v5.16.X/java/src/main/java/org/rocksdb/ColumnFamilyOptionsInterface.java b/3rdParty/rocksdb/v5.18.X/java/src/main/java/org/rocksdb/ColumnFamilyOptionsInterface.java similarity index 100% rename from 3rdParty/rocksdb/v5.16.X/java/src/main/java/org/rocksdb/ColumnFamilyOptionsInterface.java rename to 3rdParty/rocksdb/v5.18.X/java/src/main/java/org/rocksdb/ColumnFamilyOptionsInterface.java diff --git a/3rdParty/rocksdb/v5.16.X/java/src/main/java/org/rocksdb/CompactRangeOptions.java b/3rdParty/rocksdb/v5.18.X/java/src/main/java/org/rocksdb/CompactRangeOptions.java similarity index 100% rename from 3rdParty/rocksdb/v5.16.X/java/src/main/java/org/rocksdb/CompactRangeOptions.java rename to 3rdParty/rocksdb/v5.18.X/java/src/main/java/org/rocksdb/CompactRangeOptions.java diff --git a/3rdParty/rocksdb/v5.18.X/java/src/main/java/org/rocksdb/CompactionOptionsFIFO.java b/3rdParty/rocksdb/v5.18.X/java/src/main/java/org/rocksdb/CompactionOptionsFIFO.java new file mode 100644 index 0000000000..36d78fe6e6 --- /dev/null +++ b/3rdParty/rocksdb/v5.18.X/java/src/main/java/org/rocksdb/CompactionOptionsFIFO.java @@ -0,0 +1,119 @@ +// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. +// This source code is licensed under both the GPLv2 (found in the +// COPYING file in the root directory) and Apache 2.0 License +// (found in the LICENSE.Apache file in the root directory). + +package org.rocksdb; + +/** + * Options for FIFO Compaction + */ +public class CompactionOptionsFIFO extends RocksObject { + + public CompactionOptionsFIFO() { + super(newCompactionOptionsFIFO()); + } + + /** + * Once the total sum of table files reaches this, we will delete the oldest + * table file + * + * Default: 1GB + * + * @param maxTableFilesSize The maximum size of the table files + * + * @return the reference to the current options. + */ + public CompactionOptionsFIFO setMaxTableFilesSize( + final long maxTableFilesSize) { + setMaxTableFilesSize(nativeHandle_, maxTableFilesSize); + return this; + } + + /** + * Once the total sum of table files reaches this, we will delete the oldest + * table file + * + * Default: 1GB + * + * @return max table file size in bytes + */ + public long maxTableFilesSize() { + return maxTableFilesSize(nativeHandle_); + } + + /** + * Drop files older than TTL. TTL based deletion will take precedence over + * size based deletion if ttl > 0. + * delete if sst_file_creation_time < (current_time - ttl). + * unit: seconds. Ex: 1 day = 1 * 24 * 60 * 60 + * + * Default: 0 (disabled) + * + * @param ttl The ttl for the table files in seconds + * + * @return the reference to the current options. + */ + public CompactionOptionsFIFO setTtl(final long ttl) { + setTtl(nativeHandle_, ttl); + return this; + } + + /** + * The current ttl value. + * Drop files older than TTL. TTL based deletion will take precedence over + * size based deletion if ttl > 0. + * delete if sst_file_creation_time < (current_time - ttl). + * + * Default: 0 (disabled) + * + * @return the ttl in seconds + */ + public long ttl() { + return ttl(nativeHandle_); + } + + /** + * If true, try to do compaction to compact smaller files into larger ones. + * Minimum files to compact follows options.level0_file_num_compaction_trigger + * and compaction won't trigger if average compact bytes per del file is + * larger than options.write_buffer_size. This is to protect large files + * from being compacted again. + * + * Default: false + * + * @param allowCompaction should allow intra-L0 compaction? + * + * @return the reference to the current options. + */ + public CompactionOptionsFIFO setAllowCompaction(final boolean allowCompaction) { + setAllowCompaction(nativeHandle_, allowCompaction); + return this; + } + + /** + * Check if intra-L0 compaction is enabled. + * If true, try to do compaction to compact smaller files into larger ones. + * Minimum files to compact follows options.level0_file_num_compaction_trigger + * and compaction won't trigger if average compact bytes per del file is + * larger than options.write_buffer_size. This is to protect large files + * from being compacted again. + * + * Default: false + * + * @return a boolean value indicating whether intra-L0 compaction is enabled + */ + public boolean allowCompaction() { + return allowCompaction(nativeHandle_); + } + + private native void setMaxTableFilesSize(long handle, long maxTableFilesSize); + private native long maxTableFilesSize(long handle); + private native void setTtl(long handle, long ttl); + private native long ttl(long handle); + private native void setAllowCompaction(long handle, boolean allowCompaction); + private native boolean allowCompaction(long handle); + + private native static long newCompactionOptionsFIFO(); + @Override protected final native void disposeInternal(final long handle); +} diff --git a/3rdParty/rocksdb/v5.16.X/java/src/main/java/org/rocksdb/CompactionOptionsUniversal.java b/3rdParty/rocksdb/v5.18.X/java/src/main/java/org/rocksdb/CompactionOptionsUniversal.java similarity index 100% rename from 3rdParty/rocksdb/v5.16.X/java/src/main/java/org/rocksdb/CompactionOptionsUniversal.java rename to 3rdParty/rocksdb/v5.18.X/java/src/main/java/org/rocksdb/CompactionOptionsUniversal.java diff --git a/3rdParty/rocksdb/v5.16.X/java/src/main/java/org/rocksdb/CompactionPriority.java b/3rdParty/rocksdb/v5.18.X/java/src/main/java/org/rocksdb/CompactionPriority.java similarity index 100% rename from 3rdParty/rocksdb/v5.16.X/java/src/main/java/org/rocksdb/CompactionPriority.java rename to 3rdParty/rocksdb/v5.18.X/java/src/main/java/org/rocksdb/CompactionPriority.java diff --git a/3rdParty/rocksdb/v5.16.X/java/src/main/java/org/rocksdb/CompactionStopStyle.java b/3rdParty/rocksdb/v5.18.X/java/src/main/java/org/rocksdb/CompactionStopStyle.java similarity index 100% rename from 3rdParty/rocksdb/v5.16.X/java/src/main/java/org/rocksdb/CompactionStopStyle.java rename to 3rdParty/rocksdb/v5.18.X/java/src/main/java/org/rocksdb/CompactionStopStyle.java diff --git a/3rdParty/rocksdb/v5.16.X/java/src/main/java/org/rocksdb/CompactionStyle.java b/3rdParty/rocksdb/v5.18.X/java/src/main/java/org/rocksdb/CompactionStyle.java similarity index 100% rename from 3rdParty/rocksdb/v5.16.X/java/src/main/java/org/rocksdb/CompactionStyle.java rename to 3rdParty/rocksdb/v5.18.X/java/src/main/java/org/rocksdb/CompactionStyle.java diff --git a/3rdParty/rocksdb/v5.16.X/java/src/main/java/org/rocksdb/Comparator.java b/3rdParty/rocksdb/v5.18.X/java/src/main/java/org/rocksdb/Comparator.java similarity index 100% rename from 3rdParty/rocksdb/v5.16.X/java/src/main/java/org/rocksdb/Comparator.java rename to 3rdParty/rocksdb/v5.18.X/java/src/main/java/org/rocksdb/Comparator.java diff --git a/3rdParty/rocksdb/v5.16.X/java/src/main/java/org/rocksdb/ComparatorOptions.java b/3rdParty/rocksdb/v5.18.X/java/src/main/java/org/rocksdb/ComparatorOptions.java similarity index 100% rename from 3rdParty/rocksdb/v5.16.X/java/src/main/java/org/rocksdb/ComparatorOptions.java rename to 3rdParty/rocksdb/v5.18.X/java/src/main/java/org/rocksdb/ComparatorOptions.java diff --git a/3rdParty/rocksdb/v5.16.X/java/src/main/java/org/rocksdb/ComparatorType.java b/3rdParty/rocksdb/v5.18.X/java/src/main/java/org/rocksdb/ComparatorType.java similarity index 100% rename from 3rdParty/rocksdb/v5.16.X/java/src/main/java/org/rocksdb/ComparatorType.java rename to 3rdParty/rocksdb/v5.18.X/java/src/main/java/org/rocksdb/ComparatorType.java diff --git a/3rdParty/rocksdb/v5.16.X/java/src/main/java/org/rocksdb/CompressionOptions.java b/3rdParty/rocksdb/v5.18.X/java/src/main/java/org/rocksdb/CompressionOptions.java similarity index 100% rename from 3rdParty/rocksdb/v5.16.X/java/src/main/java/org/rocksdb/CompressionOptions.java rename to 3rdParty/rocksdb/v5.18.X/java/src/main/java/org/rocksdb/CompressionOptions.java diff --git a/3rdParty/rocksdb/v5.16.X/java/src/main/java/org/rocksdb/CompressionType.java b/3rdParty/rocksdb/v5.18.X/java/src/main/java/org/rocksdb/CompressionType.java similarity index 100% rename from 3rdParty/rocksdb/v5.16.X/java/src/main/java/org/rocksdb/CompressionType.java rename to 3rdParty/rocksdb/v5.18.X/java/src/main/java/org/rocksdb/CompressionType.java diff --git a/3rdParty/rocksdb/v5.16.X/java/src/main/java/org/rocksdb/DBOptions.java b/3rdParty/rocksdb/v5.18.X/java/src/main/java/org/rocksdb/DBOptions.java similarity index 98% rename from 3rdParty/rocksdb/v5.16.X/java/src/main/java/org/rocksdb/DBOptions.java rename to 3rdParty/rocksdb/v5.18.X/java/src/main/java/org/rocksdb/DBOptions.java index c323293889..280623a208 100644 --- a/3rdParty/rocksdb/v5.16.X/java/src/main/java/org/rocksdb/DBOptions.java +++ b/3rdParty/rocksdb/v5.18.X/java/src/main/java/org/rocksdb/DBOptions.java @@ -46,6 +46,7 @@ public class DBOptions this.numShardBits_ = other.numShardBits_; this.rateLimiter_ = other.rateLimiter_; this.rowCache_ = other.rowCache_; + this.writeBufferManager_ = other.writeBufferManager_; } /** @@ -668,6 +669,20 @@ public class DBOptions } @Override + public DBOptions setWriteBufferManager(final WriteBufferManager writeBufferManager) { + assert(isOwningHandle()); + setWriteBufferManager(nativeHandle_, writeBufferManager.nativeHandle_); + this.writeBufferManager_ = writeBufferManager; + return this; + } + + @Override + public WriteBufferManager writeBufferManager() { + assert(isOwningHandle()); + return this.writeBufferManager_; + } + + @Override public long dbWriteBufferSize() { assert(isOwningHandle()); return dbWriteBufferSize(nativeHandle_); @@ -1087,6 +1102,8 @@ public class DBOptions private native boolean adviseRandomOnOpen(long handle); private native void setDbWriteBufferSize(final long handle, final long dbWriteBufferSize); + private native void setWriteBufferManager(final long dbOptionsHandle, + final long writeBufferManagerHandle); private native long dbWriteBufferSize(final long handle); private native void setAccessHintOnCompactionStart(final long handle, final byte accessHintOnCompactionStart); @@ -1158,4 +1175,5 @@ public class DBOptions private int numShardBits_; private RateLimiter rateLimiter_; private Cache rowCache_; + private WriteBufferManager writeBufferManager_; } diff --git a/3rdParty/rocksdb/v5.16.X/java/src/main/java/org/rocksdb/DBOptionsInterface.java b/3rdParty/rocksdb/v5.18.X/java/src/main/java/org/rocksdb/DBOptionsInterface.java similarity index 98% rename from 3rdParty/rocksdb/v5.16.X/java/src/main/java/org/rocksdb/DBOptionsInterface.java rename to 3rdParty/rocksdb/v5.18.X/java/src/main/java/org/rocksdb/DBOptionsInterface.java index 7c406eaf8a..accfb4c59a 100644 --- a/3rdParty/rocksdb/v5.16.X/java/src/main/java/org/rocksdb/DBOptionsInterface.java +++ b/3rdParty/rocksdb/v5.18.X/java/src/main/java/org/rocksdb/DBOptionsInterface.java @@ -991,6 +991,28 @@ public interface DBOptionsInterface { */ T setDbWriteBufferSize(long dbWriteBufferSize); + /** + * Use passed {@link WriteBufferManager} to control memory usage across + * multiple column families and/or DB instances. + * + * Check + * https://github.com/facebook/rocksdb/wiki/Write-Buffer-Manager + * for more details on when to use it + * + * @param writeBufferManager The WriteBufferManager to use + * @return the reference of the current options. + */ + T setWriteBufferManager(final WriteBufferManager writeBufferManager); + + /** + * Reference to {@link WriteBufferManager} used by it.
+ * + * Default: null (Disabled) + * + * @return a reference to WriteBufferManager + */ + WriteBufferManager writeBufferManager(); + /** * Amount of data to build up in memtables across all column * families before writing to disk. diff --git a/3rdParty/rocksdb/v5.16.X/java/src/main/java/org/rocksdb/DbPath.java b/3rdParty/rocksdb/v5.18.X/java/src/main/java/org/rocksdb/DbPath.java similarity index 100% rename from 3rdParty/rocksdb/v5.16.X/java/src/main/java/org/rocksdb/DbPath.java rename to 3rdParty/rocksdb/v5.18.X/java/src/main/java/org/rocksdb/DbPath.java diff --git a/3rdParty/rocksdb/v5.16.X/java/src/main/java/org/rocksdb/DirectComparator.java b/3rdParty/rocksdb/v5.18.X/java/src/main/java/org/rocksdb/DirectComparator.java similarity index 100% rename from 3rdParty/rocksdb/v5.16.X/java/src/main/java/org/rocksdb/DirectComparator.java rename to 3rdParty/rocksdb/v5.18.X/java/src/main/java/org/rocksdb/DirectComparator.java diff --git a/3rdParty/rocksdb/v5.16.X/java/src/main/java/org/rocksdb/DirectSlice.java b/3rdParty/rocksdb/v5.18.X/java/src/main/java/org/rocksdb/DirectSlice.java similarity index 100% rename from 3rdParty/rocksdb/v5.16.X/java/src/main/java/org/rocksdb/DirectSlice.java rename to 3rdParty/rocksdb/v5.18.X/java/src/main/java/org/rocksdb/DirectSlice.java diff --git a/3rdParty/rocksdb/v5.16.X/java/src/main/java/org/rocksdb/EncodingType.java b/3rdParty/rocksdb/v5.18.X/java/src/main/java/org/rocksdb/EncodingType.java similarity index 100% rename from 3rdParty/rocksdb/v5.16.X/java/src/main/java/org/rocksdb/EncodingType.java rename to 3rdParty/rocksdb/v5.18.X/java/src/main/java/org/rocksdb/EncodingType.java diff --git a/3rdParty/rocksdb/v5.16.X/java/src/main/java/org/rocksdb/Env.java b/3rdParty/rocksdb/v5.18.X/java/src/main/java/org/rocksdb/Env.java similarity index 100% rename from 3rdParty/rocksdb/v5.16.X/java/src/main/java/org/rocksdb/Env.java rename to 3rdParty/rocksdb/v5.18.X/java/src/main/java/org/rocksdb/Env.java diff --git a/3rdParty/rocksdb/v5.16.X/java/src/main/java/org/rocksdb/EnvOptions.java b/3rdParty/rocksdb/v5.18.X/java/src/main/java/org/rocksdb/EnvOptions.java similarity index 100% rename from 3rdParty/rocksdb/v5.16.X/java/src/main/java/org/rocksdb/EnvOptions.java rename to 3rdParty/rocksdb/v5.18.X/java/src/main/java/org/rocksdb/EnvOptions.java diff --git a/3rdParty/rocksdb/v5.16.X/java/src/main/java/org/rocksdb/Experimental.java b/3rdParty/rocksdb/v5.18.X/java/src/main/java/org/rocksdb/Experimental.java similarity index 100% rename from 3rdParty/rocksdb/v5.16.X/java/src/main/java/org/rocksdb/Experimental.java rename to 3rdParty/rocksdb/v5.18.X/java/src/main/java/org/rocksdb/Experimental.java diff --git a/3rdParty/rocksdb/v5.16.X/java/src/main/java/org/rocksdb/Filter.java b/3rdParty/rocksdb/v5.18.X/java/src/main/java/org/rocksdb/Filter.java similarity index 100% rename from 3rdParty/rocksdb/v5.16.X/java/src/main/java/org/rocksdb/Filter.java rename to 3rdParty/rocksdb/v5.18.X/java/src/main/java/org/rocksdb/Filter.java diff --git a/3rdParty/rocksdb/v5.16.X/java/src/main/java/org/rocksdb/FlushOptions.java b/3rdParty/rocksdb/v5.18.X/java/src/main/java/org/rocksdb/FlushOptions.java similarity index 100% rename from 3rdParty/rocksdb/v5.16.X/java/src/main/java/org/rocksdb/FlushOptions.java rename to 3rdParty/rocksdb/v5.18.X/java/src/main/java/org/rocksdb/FlushOptions.java diff --git a/3rdParty/rocksdb/v5.16.X/java/src/main/java/org/rocksdb/HashLinkedListMemTableConfig.java b/3rdParty/rocksdb/v5.18.X/java/src/main/java/org/rocksdb/HashLinkedListMemTableConfig.java similarity index 100% rename from 3rdParty/rocksdb/v5.16.X/java/src/main/java/org/rocksdb/HashLinkedListMemTableConfig.java rename to 3rdParty/rocksdb/v5.18.X/java/src/main/java/org/rocksdb/HashLinkedListMemTableConfig.java diff --git a/3rdParty/rocksdb/v5.16.X/java/src/main/java/org/rocksdb/HashSkipListMemTableConfig.java b/3rdParty/rocksdb/v5.18.X/java/src/main/java/org/rocksdb/HashSkipListMemTableConfig.java similarity index 100% rename from 3rdParty/rocksdb/v5.16.X/java/src/main/java/org/rocksdb/HashSkipListMemTableConfig.java rename to 3rdParty/rocksdb/v5.18.X/java/src/main/java/org/rocksdb/HashSkipListMemTableConfig.java diff --git a/3rdParty/rocksdb/v5.16.X/java/src/main/java/org/rocksdb/HistogramData.java b/3rdParty/rocksdb/v5.18.X/java/src/main/java/org/rocksdb/HistogramData.java similarity index 100% rename from 3rdParty/rocksdb/v5.16.X/java/src/main/java/org/rocksdb/HistogramData.java rename to 3rdParty/rocksdb/v5.18.X/java/src/main/java/org/rocksdb/HistogramData.java diff --git a/3rdParty/rocksdb/v5.16.X/java/src/main/java/org/rocksdb/HistogramType.java b/3rdParty/rocksdb/v5.18.X/java/src/main/java/org/rocksdb/HistogramType.java similarity index 100% rename from 3rdParty/rocksdb/v5.16.X/java/src/main/java/org/rocksdb/HistogramType.java rename to 3rdParty/rocksdb/v5.18.X/java/src/main/java/org/rocksdb/HistogramType.java diff --git a/3rdParty/rocksdb/v5.16.X/java/src/main/java/org/rocksdb/IndexType.java b/3rdParty/rocksdb/v5.18.X/java/src/main/java/org/rocksdb/IndexType.java similarity index 100% rename from 3rdParty/rocksdb/v5.16.X/java/src/main/java/org/rocksdb/IndexType.java rename to 3rdParty/rocksdb/v5.18.X/java/src/main/java/org/rocksdb/IndexType.java diff --git a/3rdParty/rocksdb/v5.16.X/java/src/main/java/org/rocksdb/InfoLogLevel.java b/3rdParty/rocksdb/v5.18.X/java/src/main/java/org/rocksdb/InfoLogLevel.java similarity index 100% rename from 3rdParty/rocksdb/v5.16.X/java/src/main/java/org/rocksdb/InfoLogLevel.java rename to 3rdParty/rocksdb/v5.18.X/java/src/main/java/org/rocksdb/InfoLogLevel.java diff --git a/3rdParty/rocksdb/v5.16.X/java/src/main/java/org/rocksdb/IngestExternalFileOptions.java b/3rdParty/rocksdb/v5.18.X/java/src/main/java/org/rocksdb/IngestExternalFileOptions.java similarity index 100% rename from 3rdParty/rocksdb/v5.16.X/java/src/main/java/org/rocksdb/IngestExternalFileOptions.java rename to 3rdParty/rocksdb/v5.18.X/java/src/main/java/org/rocksdb/IngestExternalFileOptions.java diff --git a/3rdParty/rocksdb/v5.16.X/java/src/main/java/org/rocksdb/LRUCache.java b/3rdParty/rocksdb/v5.18.X/java/src/main/java/org/rocksdb/LRUCache.java similarity index 100% rename from 3rdParty/rocksdb/v5.16.X/java/src/main/java/org/rocksdb/LRUCache.java rename to 3rdParty/rocksdb/v5.18.X/java/src/main/java/org/rocksdb/LRUCache.java diff --git a/3rdParty/rocksdb/v5.16.X/java/src/main/java/org/rocksdb/Logger.java b/3rdParty/rocksdb/v5.18.X/java/src/main/java/org/rocksdb/Logger.java similarity index 100% rename from 3rdParty/rocksdb/v5.16.X/java/src/main/java/org/rocksdb/Logger.java rename to 3rdParty/rocksdb/v5.18.X/java/src/main/java/org/rocksdb/Logger.java diff --git a/3rdParty/rocksdb/v5.16.X/java/src/main/java/org/rocksdb/MemTableConfig.java b/3rdParty/rocksdb/v5.18.X/java/src/main/java/org/rocksdb/MemTableConfig.java similarity index 100% rename from 3rdParty/rocksdb/v5.16.X/java/src/main/java/org/rocksdb/MemTableConfig.java rename to 3rdParty/rocksdb/v5.18.X/java/src/main/java/org/rocksdb/MemTableConfig.java diff --git a/3rdParty/rocksdb/v5.18.X/java/src/main/java/org/rocksdb/MemoryUsageType.java b/3rdParty/rocksdb/v5.18.X/java/src/main/java/org/rocksdb/MemoryUsageType.java new file mode 100644 index 0000000000..3523cd0ee6 --- /dev/null +++ b/3rdParty/rocksdb/v5.18.X/java/src/main/java/org/rocksdb/MemoryUsageType.java @@ -0,0 +1,72 @@ +// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. +// This source code is licensed under both the GPLv2 (found in the +// COPYING file in the root directory) and Apache 2.0 License +// (found in the LICENSE.Apache file in the root directory). + +package org.rocksdb; + +/** + * MemoryUsageType + * + *

The value will be used as a key to indicate the type of memory usage + * described

+ */ +public enum MemoryUsageType { + /** + * Memory usage of all the mem-tables. + */ + kMemTableTotal((byte) 0), + /** + * Memory usage of those un-flushed mem-tables. + */ + kMemTableUnFlushed((byte) 1), + /** + * Memory usage of all the table readers. + */ + kTableReadersTotal((byte) 2), + /** + * Memory usage by Cache. + */ + kCacheTotal((byte) 3), + /** + * Max usage types - copied to keep 1:1 with native. + */ + kNumUsageTypes((byte) 4); + + /** + * Returns the byte value of the enumerations value + * + * @return byte representation + */ + public byte getValue() { + return value_; + } + + /** + *

Get the MemoryUsageType enumeration value by + * passing the byte identifier to this method.

+ * + * @param byteIdentifier of MemoryUsageType. + * + * @return MemoryUsageType instance. + * + * @throws IllegalArgumentException if the usage type for the byteIdentifier + * cannot be found + */ + public static MemoryUsageType getMemoryUsageType(final byte byteIdentifier) { + for (final MemoryUsageType MemoryUsageType : MemoryUsageType.values()) { + if (MemoryUsageType.getValue() == byteIdentifier) { + return MemoryUsageType; + } + } + + throw new IllegalArgumentException( + "Illegal value provided for MemoryUsageType."); + } + + private MemoryUsageType(byte value) { + value_ = value; + } + + private final byte value_; +} diff --git a/3rdParty/rocksdb/v5.18.X/java/src/main/java/org/rocksdb/MemoryUtil.java b/3rdParty/rocksdb/v5.18.X/java/src/main/java/org/rocksdb/MemoryUtil.java new file mode 100644 index 0000000000..52b2175e6b --- /dev/null +++ b/3rdParty/rocksdb/v5.18.X/java/src/main/java/org/rocksdb/MemoryUtil.java @@ -0,0 +1,60 @@ +// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. +// This source code is licensed under both the GPLv2 (found in the +// COPYING file in the root directory) and Apache 2.0 License +// (found in the LICENSE.Apache file in the root directory). + +package org.rocksdb; + +import java.util.*; + +/** + * JNI passthrough for MemoryUtil. + */ +public class MemoryUtil { + + /** + *

Returns the approximate memory usage of different types in the input + * list of DBs and Cache set. For instance, in the output map the key + * kMemTableTotal will be associated with the memory + * usage of all the mem-tables from all the input rocksdb instances.

+ * + *

Note that for memory usage inside Cache class, we will + * only report the usage of the input "cache_set" without + * including those Cache usage inside the input list "dbs" + * of DBs.

+ * + * @param dbs List of dbs to collect memory usage for. + * @param caches Set of caches to collect memory usage for. + * @return Map from {@link MemoryUsageType} to memory usage as a {@link Long}. + */ + public static Map getApproximateMemoryUsageByType(final List dbs, final Set caches) { + int dbCount = (dbs == null) ? 0 : dbs.size(); + int cacheCount = (caches == null) ? 0 : caches.size(); + long[] dbHandles = new long[dbCount]; + long[] cacheHandles = new long[cacheCount]; + if (dbCount > 0) { + ListIterator dbIter = dbs.listIterator(); + while (dbIter.hasNext()) { + dbHandles[dbIter.nextIndex()] = dbIter.next().nativeHandle_; + } + } + if (cacheCount > 0) { + // NOTE: This index handling is super ugly but I couldn't get a clean way to track both the + // index and the iterator simultaneously within a Set. + int i = 0; + for (Cache cache : caches) { + cacheHandles[i] = cache.nativeHandle_; + i++; + } + } + Map byteOutput = getApproximateMemoryUsageByType(dbHandles, cacheHandles); + Map output = new HashMap<>(); + for(Map.Entry longEntry : byteOutput.entrySet()) { + output.put(MemoryUsageType.getMemoryUsageType(longEntry.getKey()), longEntry.getValue()); + } + return output; + } + + private native static Map getApproximateMemoryUsageByType(final long[] dbHandles, + final long[] cacheHandles); +} diff --git a/3rdParty/rocksdb/v5.16.X/java/src/main/java/org/rocksdb/MergeOperator.java b/3rdParty/rocksdb/v5.18.X/java/src/main/java/org/rocksdb/MergeOperator.java similarity index 100% rename from 3rdParty/rocksdb/v5.16.X/java/src/main/java/org/rocksdb/MergeOperator.java rename to 3rdParty/rocksdb/v5.18.X/java/src/main/java/org/rocksdb/MergeOperator.java diff --git a/3rdParty/rocksdb/v5.16.X/java/src/main/java/org/rocksdb/MutableColumnFamilyOptions.java b/3rdParty/rocksdb/v5.18.X/java/src/main/java/org/rocksdb/MutableColumnFamilyOptions.java similarity index 100% rename from 3rdParty/rocksdb/v5.16.X/java/src/main/java/org/rocksdb/MutableColumnFamilyOptions.java rename to 3rdParty/rocksdb/v5.18.X/java/src/main/java/org/rocksdb/MutableColumnFamilyOptions.java diff --git a/3rdParty/rocksdb/v5.16.X/java/src/main/java/org/rocksdb/MutableColumnFamilyOptionsInterface.java b/3rdParty/rocksdb/v5.18.X/java/src/main/java/org/rocksdb/MutableColumnFamilyOptionsInterface.java similarity index 100% rename from 3rdParty/rocksdb/v5.16.X/java/src/main/java/org/rocksdb/MutableColumnFamilyOptionsInterface.java rename to 3rdParty/rocksdb/v5.18.X/java/src/main/java/org/rocksdb/MutableColumnFamilyOptionsInterface.java diff --git a/3rdParty/rocksdb/v5.16.X/java/src/main/java/org/rocksdb/NativeComparatorWrapper.java b/3rdParty/rocksdb/v5.18.X/java/src/main/java/org/rocksdb/NativeComparatorWrapper.java similarity index 100% rename from 3rdParty/rocksdb/v5.16.X/java/src/main/java/org/rocksdb/NativeComparatorWrapper.java rename to 3rdParty/rocksdb/v5.18.X/java/src/main/java/org/rocksdb/NativeComparatorWrapper.java diff --git a/3rdParty/rocksdb/v5.16.X/java/src/main/java/org/rocksdb/NativeLibraryLoader.java b/3rdParty/rocksdb/v5.18.X/java/src/main/java/org/rocksdb/NativeLibraryLoader.java similarity index 100% rename from 3rdParty/rocksdb/v5.16.X/java/src/main/java/org/rocksdb/NativeLibraryLoader.java rename to 3rdParty/rocksdb/v5.18.X/java/src/main/java/org/rocksdb/NativeLibraryLoader.java diff --git a/3rdParty/rocksdb/v5.16.X/java/src/main/java/org/rocksdb/OptimisticTransactionDB.java b/3rdParty/rocksdb/v5.18.X/java/src/main/java/org/rocksdb/OptimisticTransactionDB.java similarity index 100% rename from 3rdParty/rocksdb/v5.16.X/java/src/main/java/org/rocksdb/OptimisticTransactionDB.java rename to 3rdParty/rocksdb/v5.18.X/java/src/main/java/org/rocksdb/OptimisticTransactionDB.java diff --git a/3rdParty/rocksdb/v5.16.X/java/src/main/java/org/rocksdb/OptimisticTransactionOptions.java b/3rdParty/rocksdb/v5.18.X/java/src/main/java/org/rocksdb/OptimisticTransactionOptions.java similarity index 100% rename from 3rdParty/rocksdb/v5.16.X/java/src/main/java/org/rocksdb/OptimisticTransactionOptions.java rename to 3rdParty/rocksdb/v5.18.X/java/src/main/java/org/rocksdb/OptimisticTransactionOptions.java diff --git a/3rdParty/rocksdb/v5.16.X/java/src/main/java/org/rocksdb/Options.java b/3rdParty/rocksdb/v5.18.X/java/src/main/java/org/rocksdb/Options.java similarity index 98% rename from 3rdParty/rocksdb/v5.16.X/java/src/main/java/org/rocksdb/Options.java rename to 3rdParty/rocksdb/v5.18.X/java/src/main/java/org/rocksdb/Options.java index cac4fc5a36..2ff4ec1204 100644 --- a/3rdParty/rocksdb/v5.16.X/java/src/main/java/org/rocksdb/Options.java +++ b/3rdParty/rocksdb/v5.18.X/java/src/main/java/org/rocksdb/Options.java @@ -70,6 +70,7 @@ public class Options extends RocksObject this.compactionOptionsFIFO_ = other.compactionOptionsFIFO_; this.compressionOptions_ = other.compressionOptions_; this.rowCache_ = other.rowCache_; + this.writeBufferManager_ = other.writeBufferManager_; } @Override @@ -724,6 +725,20 @@ public class Options extends RocksObject } @Override + public Options setWriteBufferManager(final WriteBufferManager writeBufferManager) { + assert(isOwningHandle()); + setWriteBufferManager(nativeHandle_, writeBufferManager.nativeHandle_); + this.writeBufferManager_ = writeBufferManager; + return this; + } + + @Override + public WriteBufferManager writeBufferManager() { + assert(isOwningHandle()); + return this.writeBufferManager_; + } + + @Override public long dbWriteBufferSize() { assert(isOwningHandle()); return dbWriteBufferSize(nativeHandle_); @@ -1690,6 +1705,8 @@ public class Options extends RocksObject private native boolean adviseRandomOnOpen(long handle); private native void setDbWriteBufferSize(final long handle, final long dbWriteBufferSize); + private native void setWriteBufferManager(final long handle, + final long writeBufferManagerHandle); private native long dbWriteBufferSize(final long handle); private native void setAccessHintOnCompactionStart(final long handle, final byte accessHintOnCompactionStart); @@ -1909,4 +1926,5 @@ public class Options extends RocksObject private CompactionOptionsFIFO compactionOptionsFIFO_; private CompressionOptions compressionOptions_; private Cache rowCache_; + private WriteBufferManager writeBufferManager_; } diff --git a/3rdParty/rocksdb/v5.16.X/java/src/main/java/org/rocksdb/OptionsUtil.java b/3rdParty/rocksdb/v5.18.X/java/src/main/java/org/rocksdb/OptionsUtil.java similarity index 100% rename from 3rdParty/rocksdb/v5.16.X/java/src/main/java/org/rocksdb/OptionsUtil.java rename to 3rdParty/rocksdb/v5.18.X/java/src/main/java/org/rocksdb/OptionsUtil.java diff --git a/3rdParty/rocksdb/v5.16.X/java/src/main/java/org/rocksdb/PlainTableConfig.java b/3rdParty/rocksdb/v5.18.X/java/src/main/java/org/rocksdb/PlainTableConfig.java similarity index 100% rename from 3rdParty/rocksdb/v5.16.X/java/src/main/java/org/rocksdb/PlainTableConfig.java rename to 3rdParty/rocksdb/v5.18.X/java/src/main/java/org/rocksdb/PlainTableConfig.java diff --git a/3rdParty/rocksdb/v5.16.X/java/src/main/java/org/rocksdb/RateLimiter.java b/3rdParty/rocksdb/v5.18.X/java/src/main/java/org/rocksdb/RateLimiter.java similarity index 100% rename from 3rdParty/rocksdb/v5.16.X/java/src/main/java/org/rocksdb/RateLimiter.java rename to 3rdParty/rocksdb/v5.18.X/java/src/main/java/org/rocksdb/RateLimiter.java diff --git a/3rdParty/rocksdb/v5.16.X/java/src/main/java/org/rocksdb/RateLimiterMode.java b/3rdParty/rocksdb/v5.18.X/java/src/main/java/org/rocksdb/RateLimiterMode.java similarity index 100% rename from 3rdParty/rocksdb/v5.16.X/java/src/main/java/org/rocksdb/RateLimiterMode.java rename to 3rdParty/rocksdb/v5.18.X/java/src/main/java/org/rocksdb/RateLimiterMode.java diff --git a/3rdParty/rocksdb/v5.16.X/java/src/main/java/org/rocksdb/ReadOptions.java b/3rdParty/rocksdb/v5.18.X/java/src/main/java/org/rocksdb/ReadOptions.java similarity index 87% rename from 3rdParty/rocksdb/v5.16.X/java/src/main/java/org/rocksdb/ReadOptions.java rename to 3rdParty/rocksdb/v5.18.X/java/src/main/java/org/rocksdb/ReadOptions.java index be8aec6b32..f176d249b0 100644 --- a/3rdParty/rocksdb/v5.16.X/java/src/main/java/org/rocksdb/ReadOptions.java +++ b/3rdParty/rocksdb/v5.18.X/java/src/main/java/org/rocksdb/ReadOptions.java @@ -27,6 +27,7 @@ public class ReadOptions extends RocksObject { public ReadOptions(ReadOptions other) { super(copyReadOptions(other.nativeHandle_)); iterateUpperBoundSlice_ = other.iterateUpperBoundSlice_; + iterateLowerBoundSlice_ = other.iterateLowerBoundSlice_; } /** @@ -423,15 +424,65 @@ public class ReadOptions extends RocksObject { return null; } + /** + * Defines the smallest key at which the backward iterator can return an + * entry. Once the bound is passed, Valid() will be false. + * `iterate_lower_bound` is inclusive ie the bound value is a valid entry. + * + * If prefix_extractor is not null, the Seek target and `iterate_lower_bound` + * need to have the same prefix. This is because ordering is not guaranteed + * outside of prefix domain. + * + * Default: nullptr + * + * @param iterateLowerBound Slice representing the lower bound + * @return the reference to the current ReadOptions. + */ + public ReadOptions setIterateLowerBound(final Slice iterateLowerBound) { + assert(isOwningHandle()); + if (iterateLowerBound != null) { + // Hold onto a reference so it doesn't get garbaged collected out from under us. + iterateLowerBoundSlice_ = iterateLowerBound; + setIterateLowerBound(nativeHandle_, iterateLowerBoundSlice_.getNativeHandle()); + } + return this; + } + + /** + * Defines the smallest key at which the backward iterator can return an + * entry. Once the bound is passed, Valid() will be false. + * `iterate_lower_bound` is inclusive ie the bound value is a valid entry. + * + * If prefix_extractor is not null, the Seek target and `iterate_lower_bound` + * need to have the same prefix. This is because ordering is not guaranteed + * outside of prefix domain. + * + * Default: nullptr + * + * @return Slice representing current iterate_lower_bound setting, or null if + * one does not exist. + */ + public Slice iterateLowerBound() { + assert(isOwningHandle()); + long lowerBoundSliceHandle = iterateLowerBound(nativeHandle_); + if (lowerBoundSliceHandle != 0) { + // Disown the new slice - it's owned by the C++ side of the JNI boundary + // from the perspective of this method. + return new Slice(lowerBoundSliceHandle, false); + } + return null; + } + // instance variables // NOTE: If you add new member variables, please update the copy constructor above! // - // Hold a reference to any iterate upper bound that was set on this object - // until we're destroyed or it's overwritten. That way the caller can freely + // Hold a reference to any iterate upper/lower bound that was set on this object + // until we're destroyed or it's overwritten. That way the caller can freely // leave scope without us losing the Java Slice object, which during close() // would also reap its associated rocksdb::Slice native object since it's // possibly (likely) to be an owning handle. protected Slice iterateUpperBoundSlice_; + protected Slice iterateLowerBoundSlice_; private native static long newReadOptions(); private native static long copyReadOptions(long handle); @@ -465,6 +516,9 @@ public class ReadOptions extends RocksObject { private native void setIterateUpperBound(final long handle, final long upperBoundSliceHandle); private native long iterateUpperBound(final long handle); + private native void setIterateLowerBound(final long handle, + final long upperBoundSliceHandle); + private native long iterateLowerBound(final long handle); @Override protected final native void disposeInternal(final long handle); diff --git a/3rdParty/rocksdb/v5.16.X/java/src/main/java/org/rocksdb/ReadTier.java b/3rdParty/rocksdb/v5.18.X/java/src/main/java/org/rocksdb/ReadTier.java similarity index 100% rename from 3rdParty/rocksdb/v5.16.X/java/src/main/java/org/rocksdb/ReadTier.java rename to 3rdParty/rocksdb/v5.18.X/java/src/main/java/org/rocksdb/ReadTier.java diff --git a/3rdParty/rocksdb/v5.16.X/java/src/main/java/org/rocksdb/RemoveEmptyValueCompactionFilter.java b/3rdParty/rocksdb/v5.18.X/java/src/main/java/org/rocksdb/RemoveEmptyValueCompactionFilter.java similarity index 100% rename from 3rdParty/rocksdb/v5.16.X/java/src/main/java/org/rocksdb/RemoveEmptyValueCompactionFilter.java rename to 3rdParty/rocksdb/v5.18.X/java/src/main/java/org/rocksdb/RemoveEmptyValueCompactionFilter.java diff --git a/3rdParty/rocksdb/v5.16.X/java/src/main/java/org/rocksdb/RestoreOptions.java b/3rdParty/rocksdb/v5.18.X/java/src/main/java/org/rocksdb/RestoreOptions.java similarity index 100% rename from 3rdParty/rocksdb/v5.16.X/java/src/main/java/org/rocksdb/RestoreOptions.java rename to 3rdParty/rocksdb/v5.18.X/java/src/main/java/org/rocksdb/RestoreOptions.java diff --git a/3rdParty/rocksdb/v5.16.X/java/src/main/java/org/rocksdb/RocksCallbackObject.java b/3rdParty/rocksdb/v5.18.X/java/src/main/java/org/rocksdb/RocksCallbackObject.java similarity index 100% rename from 3rdParty/rocksdb/v5.16.X/java/src/main/java/org/rocksdb/RocksCallbackObject.java rename to 3rdParty/rocksdb/v5.18.X/java/src/main/java/org/rocksdb/RocksCallbackObject.java diff --git a/3rdParty/rocksdb/v5.16.X/java/src/main/java/org/rocksdb/RocksDB.java b/3rdParty/rocksdb/v5.18.X/java/src/main/java/org/rocksdb/RocksDB.java similarity index 76% rename from 3rdParty/rocksdb/v5.16.X/java/src/main/java/org/rocksdb/RocksDB.java rename to 3rdParty/rocksdb/v5.18.X/java/src/main/java/org/rocksdb/RocksDB.java index 3e2ba7b560..7ac08fdf05 100644 --- a/3rdParty/rocksdb/v5.16.X/java/src/main/java/org/rocksdb/RocksDB.java +++ b/3rdParty/rocksdb/v5.18.X/java/src/main/java/org/rocksdb/RocksDB.java @@ -439,6 +439,12 @@ public class RocksDB extends RocksObject { options_ = options; } + private static void checkBounds(int offset, int len, int size) { + if ((offset | len | (offset + len) | (size - (offset + len))) < 0) { + throw new IndexOutOfBoundsException(String.format("offset(%d), len(%d), size(%d)", offset, len, size)); + } + } + /** * Set the database entry for "key" to "value". * @@ -453,6 +459,28 @@ public class RocksDB extends RocksObject { put(nativeHandle_, key, 0, key.length, value, 0, value.length); } + /** + * Set the database entry for "key" to "value" + * + * @param key The specified key to be inserted + * @param offset the offset of the "key" array to be used, must be non-negative and + * no larger than "key".length + * @param len the length of the "key" array to be used, must be non-negative and + * must be non-negative and no larger than ("key".length - offset) + * @param value the value associated with the specified key + * @param vOffset the offset of the "value" array to be used, must be non-negative and + * no longer than "key".length + * @param vLen the length of the "value" array to be used, must be non-negative and + * must be non-negative and no larger than ("value".length - offset) + * + * @throws RocksDBException thrown if errors happens in underlying native library. + */ + public void put(final byte[] key, int offset, int len, final byte[] value, int vOffset, int vLen) throws RocksDBException { + checkBounds(offset, len, key.length); + checkBounds(vOffset, vLen, value.length); + put(nativeHandle_, key, offset, len, value, vOffset, vLen); + } + /** * Set the database entry for "key" to "value" in the specified * column family. @@ -473,6 +501,32 @@ public class RocksDB extends RocksObject { columnFamilyHandle.nativeHandle_); } + /** + * Set the database entry for "key" to "value" in the specified + * column family. + * + * @param columnFamilyHandle {@link org.rocksdb.ColumnFamilyHandle} + * instance + * @param key The specified key to be inserted + * @param offset the offset of the "key" array to be used, must be non-negative and + * no larger than "key".length + * @param len the length of the "key" array to be used, must be non-negative and + * must be non-negative and no larger than ("key".length - offset) + * @param value the value associated with the specified key + * @param vOffset the offset of the "value" array to be used, must be non-negative and + * no longer than "key".length + * @param vLen the length of the "value" array to be used, must be non-negative and + * must be non-negative and no larger than ("value".length - offset) + * + * @throws RocksDBException thrown if errors happens in underlying native library. + */ + public void put(final ColumnFamilyHandle columnFamilyHandle, final byte[] key, int offset, int len, final byte[] value, int vOffset, int vLen) throws RocksDBException { + checkBounds(offset, len, key.length); + checkBounds(vOffset, vLen, value.length); + put(nativeHandle_, key, offset, len, value, vOffset, vLen, + columnFamilyHandle.nativeHandle_); + } + /** * Set the database entry for "key" to "value". * @@ -489,6 +543,32 @@ public class RocksDB extends RocksObject { key, 0, key.length, value, 0, value.length); } + /** + * Set the database entry for "key" to "value". + * + * @param writeOpts {@link org.rocksdb.WriteOptions} instance. + * @param key The specified key to be inserted + * @param offset the offset of the "key" array to be used, must be non-negative and + * no larger than "key".length + * @param len the length of the "key" array to be used, must be non-negative and + * must be non-negative and no larger than ("key".length - offset) + * @param value the value associated with the specified key + * @param vOffset the offset of the "value" array to be used, must be non-negative and + * no longer than "key".length + * @param vLen the length of the "value" array to be used, must be non-negative and + * must be non-negative and no larger than ("value".length - offset) + * + * @throws RocksDBException thrown if error happens in underlying + * native library. + */ + public void put(final WriteOptions writeOpts, byte[] key, int offset, int len, byte[] value, int vOffset, int vLen) throws RocksDBException { + checkBounds(offset, len, key.length); + checkBounds(vOffset, vLen, value.length); + put(nativeHandle_, writeOpts.nativeHandle_, + key, offset, len, value, vOffset, vLen); + } + + /** * Set the database entry for "key" to "value" for the specified * column family. @@ -512,6 +592,36 @@ public class RocksDB extends RocksObject { 0, value.length, columnFamilyHandle.nativeHandle_); } + /** + * Set the database entry for "key" to "value" for the specified + * column family. + * + * @param columnFamilyHandle {@link org.rocksdb.ColumnFamilyHandle} + * instance + * @param writeOpts {@link org.rocksdb.WriteOptions} instance. + * @param key The specified key to be inserted + * @param offset the offset of the "key" array to be used, must be non-negative and + * no larger than "key".length + * @param len the length of the "key" array to be used, must be non-negative and + * must be non-negative and no larger than ("key".length - offset) + * @param value the value associated with the specified key + * @param vOffset the offset of the "value" array to be used, must be non-negative and + * no longer than "key".length + * @param vLen the length of the "value" array to be used, must be non-negative and + * must be non-negative and no larger than ("value".length - offset) + * + * @throws RocksDBException thrown if error happens in underlying + * native library. + */ + public void put(final ColumnFamilyHandle columnFamilyHandle, + final WriteOptions writeOpts, final byte[] key, int offset, int len, + final byte[] value, int vOffset, int vLen) throws RocksDBException { + checkBounds(offset, len, key.length); + checkBounds(vOffset, vLen, value.length); + put(nativeHandle_, writeOpts.nativeHandle_, key, offset, len, value, + vOffset, vLen, columnFamilyHandle.nativeHandle_); + } + /** * If the key definitely does not exist in the database, then this method * returns false, else true. @@ -528,6 +638,27 @@ public class RocksDB extends RocksObject { return keyMayExist(nativeHandle_, key, 0, key.length, value); } + /** + * If the key definitely does not exist in the database, then this method + * returns false, else true. + * + * This check is potentially lighter-weight than invoking DB::Get(). One way + * to make this lighter weight is to avoid doing any IOs. + * + * @param key byte array of a key to search for + * @param offset the offset of the "key" array to be used, must be non-negative and + * no larger than "key".length + * @param len the length of the "key" array to be used, must be non-negative and + * @param value StringBuilder instance which is a out parameter if a value is + * found in block-cache. + * + * @return boolean value indicating if key does not exist or might exist. + */ + public boolean keyMayExist(final byte[] key, int offset, int len, final StringBuilder value) { + checkBounds(offset, len, key.length); + return keyMayExist(nativeHandle_, key, offset, len, value); + } + /** * If the key definitely does not exist in the database, then this method * returns false, else true. @@ -547,6 +678,30 @@ public class RocksDB extends RocksObject { columnFamilyHandle.nativeHandle_, value); } + /** + * If the key definitely does not exist in the database, then this method + * returns false, else true. + * + * This check is potentially lighter-weight than invoking DB::Get(). One way + * to make this lighter weight is to avoid doing any IOs. + * + * @param columnFamilyHandle {@link ColumnFamilyHandle} instance + * @param key byte array of a key to search for + * @param offset the offset of the "key" array to be used, must be non-negative and + * no larger than "key".length + * @param len the length of the "key" array to be used, must be non-negative and + * @param value StringBuilder instance which is a out parameter if a value is + * found in block-cache. + * @return boolean value indicating if key does not exist or might exist. + */ + public boolean keyMayExist(final ColumnFamilyHandle columnFamilyHandle, + final byte[] key, int offset, int len, final StringBuilder value) { + checkBounds(offset, len, key.length); + return keyMayExist(nativeHandle_, key, offset, len, + columnFamilyHandle.nativeHandle_, value); + } + + /** * If the key definitely does not exist in the database, then this method * returns false, else true. @@ -566,6 +721,29 @@ public class RocksDB extends RocksObject { key, 0, key.length, value); } + /** + * If the key definitely does not exist in the database, then this method + * returns false, else true. + * + * This check is potentially lighter-weight than invoking DB::Get(). One way + * to make this lighter weight is to avoid doing any IOs. + * + * @param readOptions {@link ReadOptions} instance + * @param key byte array of a key to search for + * @param offset the offset of the "key" array to be used, must be non-negative and + * no larger than "key".length + * @param len the length of the "key" array to be used, must be non-negative and + * @param value StringBuilder instance which is a out parameter if a value is + * found in block-cache. + * @return boolean value indicating if key does not exist or might exist. + */ + public boolean keyMayExist(final ReadOptions readOptions, + final byte[] key, int offset, int len, final StringBuilder value) { + checkBounds(offset, len, key.length); + return keyMayExist(nativeHandle_, readOptions.nativeHandle_, + key, offset, len, value); + } + /** * If the key definitely does not exist in the database, then this method * returns false, else true. @@ -588,6 +766,32 @@ public class RocksDB extends RocksObject { value); } + /** + * If the key definitely does not exist in the database, then this method + * returns false, else true. + * + * This check is potentially lighter-weight than invoking DB::Get(). One way + * to make this lighter weight is to avoid doing any IOs. + * + * @param readOptions {@link ReadOptions} instance + * @param columnFamilyHandle {@link ColumnFamilyHandle} instance + * @param key byte array of a key to search for + * @param offset the offset of the "key" array to be used, must be non-negative and + * no larger than "key".length + * @param len the length of the "key" array to be used, must be non-negative and + * @param value StringBuilder instance which is a out parameter if a value is + * found in block-cache. + * @return boolean value indicating if key does not exist or might exist. + */ + public boolean keyMayExist(final ReadOptions readOptions, + final ColumnFamilyHandle columnFamilyHandle, final byte[] key, int offset, int len, + final StringBuilder value) { + checkBounds(offset, len, key.length); + return keyMayExist(nativeHandle_, readOptions.nativeHandle_, + key, offset, len, columnFamilyHandle.nativeHandle_, + value); + } + /** * Apply the specified updates to the database. * @@ -631,6 +835,30 @@ public class RocksDB extends RocksObject { merge(nativeHandle_, key, 0, key.length, value, 0, value.length); } + /** + * Add merge operand for key/value pair. + * + * @param key the specified key to be merged. + * @param offset the offset of the "key" array to be used, must be non-negative and + * no larger than "key".length + * @param len the length of the "key" array to be used, must be non-negative and + * @param value the value to be merged with the current value for the specified key. + * @param vOffset the offset of the "value" array to be used, must be non-negative and + * no longer than "key".length + * @param vLen the length of the "value" array to be used, must be non-negative and + * must be non-negative and no larger than ("value".length - offset) + * + * @throws RocksDBException thrown if error happens in underlying + * native library. + */ + public void merge(final byte[] key, int offset, int len, final byte[] value, int vOffset, int vLen) + throws RocksDBException { + checkBounds(offset, len, key.length); + checkBounds(vOffset, vLen, value.length); + merge(nativeHandle_, key, offset, len, value, vOffset, vLen); + } + + /** * Add merge operand for key/value pair in a ColumnFamily. * @@ -648,6 +876,32 @@ public class RocksDB extends RocksObject { columnFamilyHandle.nativeHandle_); } + /** + * Add merge operand for key/value pair in a ColumnFamily. + * + * @param columnFamilyHandle {@link ColumnFamilyHandle} instance + * @param key the specified key to be merged. + * @param offset the offset of the "key" array to be used, must be non-negative and + * no larger than "key".length + * @param len the length of the "key" array to be used, must be non-negative and + * @param value the value to be merged with the current value for + * the specified key. + * @param vOffset the offset of the "value" array to be used, must be non-negative and + * no longer than "key".length + * @param vLen the length of the "value" array to be used, must be non-negative and + * must be non-negative and no larger than ("value".length - offset) + * + * @throws RocksDBException thrown if error happens in underlying + * native library. + */ + public void merge(final ColumnFamilyHandle columnFamilyHandle, + final byte[] key, int offset, int len, final byte[] value, int vOffset, int vLen) throws RocksDBException { + checkBounds(offset, len, key.length); + checkBounds(vOffset, vLen, value.length); + merge(nativeHandle_, key, offset, len, value, vOffset, vLen, + columnFamilyHandle.nativeHandle_); + } + /** * Add merge operand for key/value pair. * @@ -665,6 +919,32 @@ public class RocksDB extends RocksObject { key, 0, key.length, value, 0, value.length); } + /** + * Add merge operand for key/value pair. + * + * @param writeOpts {@link WriteOptions} for this write. + * @param key the specified key to be merged. + * @param offset the offset of the "key" array to be used, must be non-negative and + * no larger than "key".length + * @param len the length of the "key" array to be used, must be non-negative and + * @param value the value to be merged with the current value for + * the specified key. + * @param vOffset the offset of the "value" array to be used, must be non-negative and + * no longer than "key".length + * @param vLen the length of the "value" array to be used, must be non-negative and + * must be non-negative and no larger than ("value".length - offset) + * + * @throws RocksDBException thrown if error happens in underlying + * native library. + */ + public void merge(final WriteOptions writeOpts, final byte[] key, int offset, int len, + final byte[] value, int vOffset, int vLen) throws RocksDBException { + checkBounds(offset, len, key.length); + checkBounds(vOffset, vLen, value.length); + merge(nativeHandle_, writeOpts.nativeHandle_, + key, offset, len, value, vOffset, vLen); + } + /** * Add merge operand for key/value pair. * @@ -685,13 +965,44 @@ public class RocksDB extends RocksObject { columnFamilyHandle.nativeHandle_); } + /** + * Add merge operand for key/value pair. + * + * @param columnFamilyHandle {@link ColumnFamilyHandle} instance + * @param writeOpts {@link WriteOptions} for this write. + * @param key the specified key to be merged. + * @param offset the offset of the "key" array to be used, must be non-negative and + * no larger than "key".length + * @param len the length of the "key" array to be used, must be non-negative and + * @param value the value to be merged with the current value for + * the specified key. + * @param vOffset the offset of the "value" array to be used, must be non-negative and + * no longer than "key".length + * @param vLen the length of the "value" array to be used, must be non-negative and + * must be non-negative and no larger than ("value".length - offset) + * + * @throws RocksDBException thrown if error happens in underlying + * native library. + */ + public void merge(final ColumnFamilyHandle columnFamilyHandle, + final WriteOptions writeOpts, final byte[] key, int offset, int len, + final byte[] value, int vOffset, int vLen) throws RocksDBException { + checkBounds(offset, len, key.length); + checkBounds(vOffset, vLen, value.length); + merge(nativeHandle_, writeOpts.nativeHandle_, + key, offset, len, value, vOffset, vLen, + columnFamilyHandle.nativeHandle_); + } + // TODO(AR) we should improve the #get() API, returning -1 (RocksDB.NOT_FOUND) is not very nice // when we could communicate better status into, also the C++ code show that -2 could be returned /** * Get the value associated with the specified key within column family* + * * @param key the key to retrieve the value. * @param value the out-value to receive the retrieved value. + * * @return The size of the actual value that matches the specified * {@code key} in byte. If the return value is greater than the * length of {@code value}, then it indicates that the size of the @@ -706,6 +1017,35 @@ public class RocksDB extends RocksObject { return get(nativeHandle_, key, 0, key.length, value, 0, value.length); } + /** + * Get the value associated with the specified key within column family* + * + * @param key the key to retrieve the value. + * @param offset the offset of the "key" array to be used, must be non-negative and + * no larger than "key".length + * @param len the length of the "key" array to be used, must be non-negative and + * @param value the out-value to receive the retrieved value. + * @param vOffset the offset of the "value" array to be used, must be non-negative and + * no longer than "key".length + * @param vLen the length of the "value" array to be used, must be non-negative and + * must be non-negative and no larger than ("value".length - offset) + * + * @return The size of the actual value that matches the specified + * {@code key} in byte. If the return value is greater than the + * length of {@code value}, then it indicates that the size of the + * input buffer {@code value} is insufficient and partial result will + * be returned. RocksDB.NOT_FOUND will be returned if the value not + * found. + * + * @throws RocksDBException thrown if error happens in underlying + * native library. + */ + public int get(final byte[] key, int offset, int len, final byte[] value, int vOffset, int vLen) throws RocksDBException { + checkBounds(offset, len, key.length); + checkBounds(vOffset, vLen, value.length); + return get(nativeHandle_, key, offset, len, value, vOffset, vLen); + } + /** * Get the value associated with the specified key within column family. * @@ -729,6 +1069,39 @@ public class RocksDB extends RocksObject { columnFamilyHandle.nativeHandle_); } + /** + * Get the value associated with the specified key within column family. + * + * @param columnFamilyHandle {@link org.rocksdb.ColumnFamilyHandle} + * instance + * @param key the key to retrieve the value. + * @param offset the offset of the "key" array to be used, must be non-negative and + * no larger than "key".length + * @param len the length of the "key" array to be used, must be non-negative and + * @param value the out-value to receive the retrieved value. + * @param vOffset the offset of the "value" array to be used, must be non-negative and + * no longer than "key".length + * @param vLen the length of the "value" array to be used, must be non-negative and + * must be non-negative and no larger than ("value".length - offset) + * + * @return The size of the actual value that matches the specified + * {@code key} in byte. If the return value is greater than the + * length of {@code value}, then it indicates that the size of the + * input buffer {@code value} is insufficient and partial result will + * be returned. RocksDB.NOT_FOUND will be returned if the value not + * found. + * + * @throws RocksDBException thrown if error happens in underlying + * native library. + */ + public int get(final ColumnFamilyHandle columnFamilyHandle, final byte[] key, int offset, int len, + final byte[] value, int vOffset, int vLen) throws RocksDBException, IllegalArgumentException { + checkBounds(offset, len, key.length); + checkBounds(vOffset, vLen, value.length); + return get(nativeHandle_, key, offset, len, value, vOffset, vLen, + columnFamilyHandle.nativeHandle_); + } + /** * Get the value associated with the specified key. * @@ -750,6 +1123,38 @@ public class RocksDB extends RocksObject { return get(nativeHandle_, opt.nativeHandle_, key, 0, key.length, value, 0, value.length); } + + /** + * Get the value associated with the specified key. + * + * @param opt {@link org.rocksdb.ReadOptions} instance. + * @param key the key to retrieve the value. + * @param offset the offset of the "key" array to be used, must be non-negative and + * no larger than "key".length + * @param len the length of the "key" array to be used, must be non-negative and + * @param value the out-value to receive the retrieved value. + * @param vOffset the offset of the "value" array to be used, must be non-negative and + * no longer than "key".length + * @param vLen the length of the "value" array to be used, must be non-negative and + * must be non-negative and no larger than ("value".length - offset) + * @return The size of the actual value that matches the specified + * {@code key} in byte. If the return value is greater than the + * length of {@code value}, then it indicates that the size of the + * input buffer {@code value} is insufficient and partial result will + * be returned. RocksDB.NOT_FOUND will be returned if the value not + * found. + * + * @throws RocksDBException thrown if error happens in underlying + * native library. + */ + public int get(final ReadOptions opt, final byte[] key, int offset, int len, + final byte[] value, int vOffset, int vLen) throws RocksDBException { + checkBounds(offset, len, key.length); + checkBounds(vOffset, vLen, value.length); + return get(nativeHandle_, opt.nativeHandle_, + key, offset, len, value, vOffset, vLen); + } + /** * Get the value associated with the specified key within column family. * @@ -775,6 +1180,40 @@ public class RocksDB extends RocksObject { 0, value.length, columnFamilyHandle.nativeHandle_); } + /** + * Get the value associated with the specified key within column family. + * + * @param columnFamilyHandle {@link org.rocksdb.ColumnFamilyHandle} + * instance + * @param opt {@link org.rocksdb.ReadOptions} instance. + * @param key the key to retrieve the value. + * @param offset the offset of the "key" array to be used, must be non-negative and + * no larger than "key".length + * @param len the length of the "key" array to be used, must be non-negative and + * @param value the out-value to receive the retrieved value. + * @param vOffset the offset of the "value" array to be used, must be non-negative and + * no longer than "key".length + * @param vLen the length of the "value" array to be used, must be non-negative and + * must be non-negative and no larger than ("value".length - offset) + * @return The size of the actual value that matches the specified + * {@code key} in byte. If the return value is greater than the + * length of {@code value}, then it indicates that the size of the + * input buffer {@code value} is insufficient and partial result will + * be returned. RocksDB.NOT_FOUND will be returned if the value not + * found. + * + * @throws RocksDBException thrown if error happens in underlying + * native library. + */ + public int get(final ColumnFamilyHandle columnFamilyHandle, + final ReadOptions opt, final byte[] key, int offset, int len, final byte[] value, int vOffset, int vLen) + throws RocksDBException { + checkBounds(offset, len, key.length); + checkBounds(vOffset, vLen, value.length); + return get(nativeHandle_, opt.nativeHandle_, key, offset, len, value, + vOffset, vLen, columnFamilyHandle.nativeHandle_); + } + /** * The simplified version of get which returns a new byte array storing * the value associated with the specified input key if any. null will be @@ -791,6 +1230,26 @@ public class RocksDB extends RocksObject { return get(nativeHandle_, key, 0, key.length); } + /** + * The simplified version of get which returns a new byte array storing + * the value associated with the specified input key if any. null will be + * returned if the specified key is not found. + * + * @param key the key retrieve the value. + * @param offset the offset of the "key" array to be used, must be non-negative and + * no larger than "key".length + * @param len the length of the "key" array to be used, must be non-negative and + * @return a byte array storing the value associated with the input key if + * any. null if it does not find the specified key. + * + * @throws RocksDBException thrown if error happens in underlying + * native library. + */ + public byte[] get(final byte[] key, int offset, int len) throws RocksDBException { + checkBounds(offset, len, key.length); + return get(nativeHandle_, key, offset, len); + } + /** * The simplified version of get which returns a new byte array storing * the value associated with the specified input key if any. null will be @@ -811,6 +1270,30 @@ public class RocksDB extends RocksObject { columnFamilyHandle.nativeHandle_); } + /** + * The simplified version of get which returns a new byte array storing + * the value associated with the specified input key if any. null will be + * returned if the specified key is not found. + * + * @param columnFamilyHandle {@link org.rocksdb.ColumnFamilyHandle} + * instance + * @param key the key retrieve the value. + * @param offset the offset of the "key" array to be used, must be non-negative and + * no larger than "key".length + * @param len the length of the "key" array to be used, must be non-negative and + * @return a byte array storing the value associated with the input key if + * any. null if it does not find the specified key. + * + * @throws RocksDBException thrown if error happens in underlying + * native library. + */ + public byte[] get(final ColumnFamilyHandle columnFamilyHandle, + final byte[] key, int offset, int len) throws RocksDBException { + checkBounds(offset, len, key.length); + return get(nativeHandle_, key, offset, len, + columnFamilyHandle.nativeHandle_); + } + /** * The simplified version of get which returns a new byte array storing * the value associated with the specified input key if any. null will be @@ -829,6 +1312,28 @@ public class RocksDB extends RocksObject { return get(nativeHandle_, opt.nativeHandle_, key, 0, key.length); } + /** + * The simplified version of get which returns a new byte array storing + * the value associated with the specified input key if any. null will be + * returned if the specified key is not found. + * + * @param key the key retrieve the value. + * @param offset the offset of the "key" array to be used, must be non-negative and + * no larger than "key".length + * @param len the length of the "key" array to be used, must be non-negative and + * @param opt Read options. + * @return a byte array storing the value associated with the input key if + * any. null if it does not find the specified key. + * + * @throws RocksDBException thrown if error happens in underlying + * native library. + */ + public byte[] get(final ReadOptions opt, final byte[] key, int offset, int len) + throws RocksDBException { + checkBounds(offset, len, key.length); + return get(nativeHandle_, opt.nativeHandle_, key, offset, len); + } + /** * The simplified version of get which returns a new byte array storing * the value associated with the specified input key if any. null will be @@ -850,6 +1355,31 @@ public class RocksDB extends RocksObject { columnFamilyHandle.nativeHandle_); } + /** + * The simplified version of get which returns a new byte array storing + * the value associated with the specified input key if any. null will be + * returned if the specified key is not found. + * + * @param columnFamilyHandle {@link org.rocksdb.ColumnFamilyHandle} + * instance + * @param key the key retrieve the value. + * @param offset the offset of the "key" array to be used, must be non-negative and + * no larger than "key".length + * @param len the length of the "key" array to be used, must be non-negative and + * @param opt Read options. + * @return a byte array storing the value associated with the input key if + * any. null if it does not find the specified key. + * + * @throws RocksDBException thrown if error happens in underlying + * native library. + */ + public byte[] get(final ColumnFamilyHandle columnFamilyHandle, + final ReadOptions opt, final byte[] key, int offset, int len) throws RocksDBException { + checkBounds(offset, len, key.length); + return get(nativeHandle_, opt.nativeHandle_, key, offset, len, + columnFamilyHandle.nativeHandle_); + } + /** * Returns a map of keys for which values were found in DB. * @@ -1073,6 +1603,23 @@ public class RocksDB extends RocksObject { delete(nativeHandle_, key, 0, key.length); } + /** + * Delete the database entry (if any) for "key". Returns OK on + * success, and a non-OK status on error. It is not an error if "key" + * did not exist in the database. + * + * @param key Key to delete within database + * @param offset the offset of the "key" array to be used, must be non-negative and + * no larger than "key".length + * @param len the length of the "key" array to be used, must be non-negative and + * + * @throws RocksDBException thrown if error happens in underlying + * native library. + */ + public void delete(final byte[] key, int offset, int len) throws RocksDBException { + delete(nativeHandle_, key, offset, len); + } + /** * Remove the database entry (if any) for "key". Returns OK on * success, and a non-OK status on error. It is not an error if "key" @@ -1110,6 +1657,26 @@ public class RocksDB extends RocksObject { delete(nativeHandle_, key, 0, key.length, columnFamilyHandle.nativeHandle_); } + /** + * Delete the database entry (if any) for "key". Returns OK on + * success, and a non-OK status on error. It is not an error if "key" + * did not exist in the database. + * + * @param columnFamilyHandle {@link org.rocksdb.ColumnFamilyHandle} + * instance + * @param key Key to delete within database + * @param offset the offset of the "key" array to be used, must be non-negative and + * no larger than "key".length + * @param len the length of the "key" array to be used, must be non-negative and + * + * @throws RocksDBException thrown if error happens in underlying + * native library. + */ + public void delete(final ColumnFamilyHandle columnFamilyHandle, + final byte[] key, int offset, int len) throws RocksDBException { + delete(nativeHandle_, key, offset, len, columnFamilyHandle.nativeHandle_); + } + /** * Remove the database entry (if any) for "key". Returns OK on * success, and a non-OK status on error. It is not an error if "key" @@ -1145,6 +1712,25 @@ public class RocksDB extends RocksObject { delete(nativeHandle_, writeOpt.nativeHandle_, key, 0, key.length); } + /** + * Delete the database entry (if any) for "key". Returns OK on + * success, and a non-OK status on error. It is not an error if "key" + * did not exist in the database. + * + * @param writeOpt WriteOptions to be used with delete operation + * @param key Key to delete within database + * @param offset the offset of the "key" array to be used, must be non-negative and + * no larger than "key".length + * @param len the length of the "key" array to be used, must be non-negative and + * + * @throws RocksDBException thrown if error happens in underlying + * native library. + */ + public void delete(final WriteOptions writeOpt, final byte[] key, int offset, int len) + throws RocksDBException { + delete(nativeHandle_, writeOpt.nativeHandle_, key, offset, len); + } + /** * Remove the database entry (if any) for "key". Returns OK on * success, and a non-OK status on error. It is not an error if "key" @@ -1187,6 +1773,29 @@ public class RocksDB extends RocksObject { columnFamilyHandle.nativeHandle_); } + /** + * Delete the database entry (if any) for "key". Returns OK on + * success, and a non-OK status on error. It is not an error if "key" + * did not exist in the database. + * + * @param columnFamilyHandle {@link org.rocksdb.ColumnFamilyHandle} + * instance + * @param writeOpt WriteOptions to be used with delete operation + * @param key Key to delete within database + * @param offset the offset of the "key" array to be used, must be non-negative and + * no larger than "key".length + * @param len the length of the "key" array to be used, must be non-negative and + * + * @throws RocksDBException thrown if error happens in underlying + * native library. + */ + public void delete(final ColumnFamilyHandle columnFamilyHandle, + final WriteOptions writeOpt, final byte[] key, int offset, int len) + throws RocksDBException { + delete(nativeHandle_, writeOpt.nativeHandle_, key, offset, len, + columnFamilyHandle.nativeHandle_); + } + /** * Remove the database entry for {@code key}. Requires that the key exists * and was not overwritten. It is not an error if the key did not exist @@ -1518,6 +2127,31 @@ public class RocksDB extends RocksObject { property, property.length()); } + /** + *

Return sum of the getLongProperty of all the column families

+ * + *

Note: As the returned property is of type + * {@code uint64_t} on C++ side the returning value can be negative + * because Java supports in Java 7 only signed long values.

+ * + *

Java 7: To mitigate the problem of the non + * existent unsigned long tpye, values should be encapsulated using + * {@link java.math.BigInteger} to reflect the correct value. The correct + * behavior is guaranteed if {@code 2^64} is added to negative values.

+ * + *

Java 8: In Java 8 the value should be treated as + * unsigned long using provided methods of type {@link Long}.

+ * + * @param property to be fetched. + * + * @return numerical property value + * + * @throws RocksDBException if an error happens in the underlying native code. + */ + public long getAggregatedLongProperty(final String property) throws RocksDBException { + return getAggregatedLongProperty(nativeHandle_, property, property.length()); + } + /** *

Return a heap-allocated iterator over the contents of the * database. The result of newIterator() is initially invalid @@ -2383,6 +3017,8 @@ public class RocksDB extends RocksObject { int propertyLength) throws RocksDBException; protected native long getLongProperty(long nativeHandle, long cfHandle, String property, int propertyLength) throws RocksDBException; + protected native long getAggregatedLongProperty(long nativeHandle, String property, + int propertyLength) throws RocksDBException; protected native long iterator(long handle); protected native long iterator(long handle, long readOptHandle); protected native long iteratorCF(long handle, long cfHandle); diff --git a/3rdParty/rocksdb/v5.16.X/java/src/main/java/org/rocksdb/RocksDBException.java b/3rdParty/rocksdb/v5.18.X/java/src/main/java/org/rocksdb/RocksDBException.java similarity index 100% rename from 3rdParty/rocksdb/v5.16.X/java/src/main/java/org/rocksdb/RocksDBException.java rename to 3rdParty/rocksdb/v5.18.X/java/src/main/java/org/rocksdb/RocksDBException.java diff --git a/3rdParty/rocksdb/v5.16.X/java/src/main/java/org/rocksdb/RocksEnv.java b/3rdParty/rocksdb/v5.18.X/java/src/main/java/org/rocksdb/RocksEnv.java similarity index 100% rename from 3rdParty/rocksdb/v5.16.X/java/src/main/java/org/rocksdb/RocksEnv.java rename to 3rdParty/rocksdb/v5.18.X/java/src/main/java/org/rocksdb/RocksEnv.java diff --git a/3rdParty/rocksdb/v5.16.X/java/src/main/java/org/rocksdb/RocksIterator.java b/3rdParty/rocksdb/v5.18.X/java/src/main/java/org/rocksdb/RocksIterator.java similarity index 100% rename from 3rdParty/rocksdb/v5.16.X/java/src/main/java/org/rocksdb/RocksIterator.java rename to 3rdParty/rocksdb/v5.18.X/java/src/main/java/org/rocksdb/RocksIterator.java diff --git a/3rdParty/rocksdb/v5.16.X/java/src/main/java/org/rocksdb/RocksIteratorInterface.java b/3rdParty/rocksdb/v5.18.X/java/src/main/java/org/rocksdb/RocksIteratorInterface.java similarity index 100% rename from 3rdParty/rocksdb/v5.16.X/java/src/main/java/org/rocksdb/RocksIteratorInterface.java rename to 3rdParty/rocksdb/v5.18.X/java/src/main/java/org/rocksdb/RocksIteratorInterface.java diff --git a/3rdParty/rocksdb/v5.16.X/java/src/main/java/org/rocksdb/RocksMemEnv.java b/3rdParty/rocksdb/v5.18.X/java/src/main/java/org/rocksdb/RocksMemEnv.java similarity index 100% rename from 3rdParty/rocksdb/v5.16.X/java/src/main/java/org/rocksdb/RocksMemEnv.java rename to 3rdParty/rocksdb/v5.18.X/java/src/main/java/org/rocksdb/RocksMemEnv.java diff --git a/3rdParty/rocksdb/v5.16.X/java/src/main/java/org/rocksdb/RocksMutableObject.java b/3rdParty/rocksdb/v5.18.X/java/src/main/java/org/rocksdb/RocksMutableObject.java similarity index 100% rename from 3rdParty/rocksdb/v5.16.X/java/src/main/java/org/rocksdb/RocksMutableObject.java rename to 3rdParty/rocksdb/v5.18.X/java/src/main/java/org/rocksdb/RocksMutableObject.java diff --git a/3rdParty/rocksdb/v5.16.X/java/src/main/java/org/rocksdb/RocksObject.java b/3rdParty/rocksdb/v5.18.X/java/src/main/java/org/rocksdb/RocksObject.java similarity index 100% rename from 3rdParty/rocksdb/v5.16.X/java/src/main/java/org/rocksdb/RocksObject.java rename to 3rdParty/rocksdb/v5.18.X/java/src/main/java/org/rocksdb/RocksObject.java diff --git a/3rdParty/rocksdb/v5.16.X/java/src/main/java/org/rocksdb/SkipListMemTableConfig.java b/3rdParty/rocksdb/v5.18.X/java/src/main/java/org/rocksdb/SkipListMemTableConfig.java similarity index 100% rename from 3rdParty/rocksdb/v5.16.X/java/src/main/java/org/rocksdb/SkipListMemTableConfig.java rename to 3rdParty/rocksdb/v5.18.X/java/src/main/java/org/rocksdb/SkipListMemTableConfig.java diff --git a/3rdParty/rocksdb/v5.16.X/java/src/main/java/org/rocksdb/Slice.java b/3rdParty/rocksdb/v5.18.X/java/src/main/java/org/rocksdb/Slice.java similarity index 100% rename from 3rdParty/rocksdb/v5.16.X/java/src/main/java/org/rocksdb/Slice.java rename to 3rdParty/rocksdb/v5.18.X/java/src/main/java/org/rocksdb/Slice.java diff --git a/3rdParty/rocksdb/v5.16.X/java/src/main/java/org/rocksdb/Snapshot.java b/3rdParty/rocksdb/v5.18.X/java/src/main/java/org/rocksdb/Snapshot.java similarity index 100% rename from 3rdParty/rocksdb/v5.16.X/java/src/main/java/org/rocksdb/Snapshot.java rename to 3rdParty/rocksdb/v5.18.X/java/src/main/java/org/rocksdb/Snapshot.java diff --git a/3rdParty/rocksdb/v5.16.X/java/src/main/java/org/rocksdb/SstFileManager.java b/3rdParty/rocksdb/v5.18.X/java/src/main/java/org/rocksdb/SstFileManager.java similarity index 100% rename from 3rdParty/rocksdb/v5.16.X/java/src/main/java/org/rocksdb/SstFileManager.java rename to 3rdParty/rocksdb/v5.18.X/java/src/main/java/org/rocksdb/SstFileManager.java diff --git a/3rdParty/rocksdb/v5.16.X/java/src/main/java/org/rocksdb/SstFileWriter.java b/3rdParty/rocksdb/v5.18.X/java/src/main/java/org/rocksdb/SstFileWriter.java similarity index 100% rename from 3rdParty/rocksdb/v5.16.X/java/src/main/java/org/rocksdb/SstFileWriter.java rename to 3rdParty/rocksdb/v5.18.X/java/src/main/java/org/rocksdb/SstFileWriter.java diff --git a/3rdParty/rocksdb/v5.16.X/java/src/main/java/org/rocksdb/Statistics.java b/3rdParty/rocksdb/v5.18.X/java/src/main/java/org/rocksdb/Statistics.java similarity index 100% rename from 3rdParty/rocksdb/v5.16.X/java/src/main/java/org/rocksdb/Statistics.java rename to 3rdParty/rocksdb/v5.18.X/java/src/main/java/org/rocksdb/Statistics.java diff --git a/3rdParty/rocksdb/v5.16.X/java/src/main/java/org/rocksdb/StatisticsCollector.java b/3rdParty/rocksdb/v5.18.X/java/src/main/java/org/rocksdb/StatisticsCollector.java similarity index 98% rename from 3rdParty/rocksdb/v5.16.X/java/src/main/java/org/rocksdb/StatisticsCollector.java rename to 3rdParty/rocksdb/v5.18.X/java/src/main/java/org/rocksdb/StatisticsCollector.java index 48cf8af88e..fb3f57150f 100644 --- a/3rdParty/rocksdb/v5.16.X/java/src/main/java/org/rocksdb/StatisticsCollector.java +++ b/3rdParty/rocksdb/v5.18.X/java/src/main/java/org/rocksdb/StatisticsCollector.java @@ -93,9 +93,9 @@ public class StatisticsCollector { statsCallback.histogramCallback(histogramType, histogramData); } } - - Thread.sleep(_statsCollectionInterval); } + + Thread.sleep(_statsCollectionInterval); } catch (final InterruptedException e) { Thread.currentThread().interrupt(); diff --git a/3rdParty/rocksdb/v5.16.X/java/src/main/java/org/rocksdb/StatisticsCollectorCallback.java b/3rdParty/rocksdb/v5.18.X/java/src/main/java/org/rocksdb/StatisticsCollectorCallback.java similarity index 100% rename from 3rdParty/rocksdb/v5.16.X/java/src/main/java/org/rocksdb/StatisticsCollectorCallback.java rename to 3rdParty/rocksdb/v5.18.X/java/src/main/java/org/rocksdb/StatisticsCollectorCallback.java diff --git a/3rdParty/rocksdb/v5.16.X/java/src/main/java/org/rocksdb/StatsCollectorInput.java b/3rdParty/rocksdb/v5.18.X/java/src/main/java/org/rocksdb/StatsCollectorInput.java similarity index 100% rename from 3rdParty/rocksdb/v5.16.X/java/src/main/java/org/rocksdb/StatsCollectorInput.java rename to 3rdParty/rocksdb/v5.18.X/java/src/main/java/org/rocksdb/StatsCollectorInput.java diff --git a/3rdParty/rocksdb/v5.16.X/java/src/main/java/org/rocksdb/StatsLevel.java b/3rdParty/rocksdb/v5.18.X/java/src/main/java/org/rocksdb/StatsLevel.java similarity index 100% rename from 3rdParty/rocksdb/v5.16.X/java/src/main/java/org/rocksdb/StatsLevel.java rename to 3rdParty/rocksdb/v5.18.X/java/src/main/java/org/rocksdb/StatsLevel.java diff --git a/3rdParty/rocksdb/v5.16.X/java/src/main/java/org/rocksdb/Status.java b/3rdParty/rocksdb/v5.18.X/java/src/main/java/org/rocksdb/Status.java similarity index 100% rename from 3rdParty/rocksdb/v5.16.X/java/src/main/java/org/rocksdb/Status.java rename to 3rdParty/rocksdb/v5.18.X/java/src/main/java/org/rocksdb/Status.java diff --git a/3rdParty/rocksdb/v5.16.X/java/src/main/java/org/rocksdb/StringAppendOperator.java b/3rdParty/rocksdb/v5.18.X/java/src/main/java/org/rocksdb/StringAppendOperator.java similarity index 100% rename from 3rdParty/rocksdb/v5.16.X/java/src/main/java/org/rocksdb/StringAppendOperator.java rename to 3rdParty/rocksdb/v5.18.X/java/src/main/java/org/rocksdb/StringAppendOperator.java diff --git a/3rdParty/rocksdb/v5.16.X/java/src/main/java/org/rocksdb/TableFormatConfig.java b/3rdParty/rocksdb/v5.18.X/java/src/main/java/org/rocksdb/TableFormatConfig.java similarity index 100% rename from 3rdParty/rocksdb/v5.16.X/java/src/main/java/org/rocksdb/TableFormatConfig.java rename to 3rdParty/rocksdb/v5.18.X/java/src/main/java/org/rocksdb/TableFormatConfig.java diff --git a/3rdParty/rocksdb/v5.16.X/java/src/main/java/org/rocksdb/TickerType.java b/3rdParty/rocksdb/v5.18.X/java/src/main/java/org/rocksdb/TickerType.java similarity index 98% rename from 3rdParty/rocksdb/v5.16.X/java/src/main/java/org/rocksdb/TickerType.java rename to 3rdParty/rocksdb/v5.18.X/java/src/main/java/org/rocksdb/TickerType.java index fdcf62ff8a..08ed18fb3e 100644 --- a/3rdParty/rocksdb/v5.16.X/java/src/main/java/org/rocksdb/TickerType.java +++ b/3rdParty/rocksdb/v5.18.X/java/src/main/java/org/rocksdb/TickerType.java @@ -304,9 +304,9 @@ public enum TickerType { RATE_LIMIT_DELAY_MILLIS((byte) 0x37), /** - * Number of iterators currently open. + * Number of iterators created. */ - NO_ITERATORS((byte) 0x38), + NO_ITERATOR_CREATED((byte) 0x38), /** * Number of MultiGet calls. @@ -475,7 +475,12 @@ public enum TickerType { */ NUMBER_MULTIGET_KEYS_FOUND((byte) 0x5E), - TICKER_ENUM_MAX((byte) 0x5F); + /** + * Number of iterators deleted. + */ + NO_ITERATOR_DELETED((byte) 0x5F), + + TICKER_ENUM_MAX((byte) 0x60); private final byte value; diff --git a/3rdParty/rocksdb/v5.16.X/java/src/main/java/org/rocksdb/Transaction.java b/3rdParty/rocksdb/v5.18.X/java/src/main/java/org/rocksdb/Transaction.java similarity index 100% rename from 3rdParty/rocksdb/v5.16.X/java/src/main/java/org/rocksdb/Transaction.java rename to 3rdParty/rocksdb/v5.18.X/java/src/main/java/org/rocksdb/Transaction.java diff --git a/3rdParty/rocksdb/v5.16.X/java/src/main/java/org/rocksdb/TransactionDB.java b/3rdParty/rocksdb/v5.18.X/java/src/main/java/org/rocksdb/TransactionDB.java similarity index 100% rename from 3rdParty/rocksdb/v5.16.X/java/src/main/java/org/rocksdb/TransactionDB.java rename to 3rdParty/rocksdb/v5.18.X/java/src/main/java/org/rocksdb/TransactionDB.java diff --git a/3rdParty/rocksdb/v5.16.X/java/src/main/java/org/rocksdb/TransactionDBOptions.java b/3rdParty/rocksdb/v5.18.X/java/src/main/java/org/rocksdb/TransactionDBOptions.java similarity index 100% rename from 3rdParty/rocksdb/v5.16.X/java/src/main/java/org/rocksdb/TransactionDBOptions.java rename to 3rdParty/rocksdb/v5.18.X/java/src/main/java/org/rocksdb/TransactionDBOptions.java diff --git a/3rdParty/rocksdb/v5.16.X/java/src/main/java/org/rocksdb/TransactionLogIterator.java b/3rdParty/rocksdb/v5.18.X/java/src/main/java/org/rocksdb/TransactionLogIterator.java similarity index 100% rename from 3rdParty/rocksdb/v5.16.X/java/src/main/java/org/rocksdb/TransactionLogIterator.java rename to 3rdParty/rocksdb/v5.18.X/java/src/main/java/org/rocksdb/TransactionLogIterator.java diff --git a/3rdParty/rocksdb/v5.16.X/java/src/main/java/org/rocksdb/TransactionOptions.java b/3rdParty/rocksdb/v5.18.X/java/src/main/java/org/rocksdb/TransactionOptions.java similarity index 100% rename from 3rdParty/rocksdb/v5.16.X/java/src/main/java/org/rocksdb/TransactionOptions.java rename to 3rdParty/rocksdb/v5.18.X/java/src/main/java/org/rocksdb/TransactionOptions.java diff --git a/3rdParty/rocksdb/v5.16.X/java/src/main/java/org/rocksdb/TransactionalDB.java b/3rdParty/rocksdb/v5.18.X/java/src/main/java/org/rocksdb/TransactionalDB.java similarity index 100% rename from 3rdParty/rocksdb/v5.16.X/java/src/main/java/org/rocksdb/TransactionalDB.java rename to 3rdParty/rocksdb/v5.18.X/java/src/main/java/org/rocksdb/TransactionalDB.java diff --git a/3rdParty/rocksdb/v5.16.X/java/src/main/java/org/rocksdb/TransactionalOptions.java b/3rdParty/rocksdb/v5.18.X/java/src/main/java/org/rocksdb/TransactionalOptions.java similarity index 100% rename from 3rdParty/rocksdb/v5.16.X/java/src/main/java/org/rocksdb/TransactionalOptions.java rename to 3rdParty/rocksdb/v5.18.X/java/src/main/java/org/rocksdb/TransactionalOptions.java diff --git a/3rdParty/rocksdb/v5.16.X/java/src/main/java/org/rocksdb/TtlDB.java b/3rdParty/rocksdb/v5.18.X/java/src/main/java/org/rocksdb/TtlDB.java similarity index 100% rename from 3rdParty/rocksdb/v5.16.X/java/src/main/java/org/rocksdb/TtlDB.java rename to 3rdParty/rocksdb/v5.18.X/java/src/main/java/org/rocksdb/TtlDB.java diff --git a/3rdParty/rocksdb/v5.16.X/java/src/main/java/org/rocksdb/TxnDBWritePolicy.java b/3rdParty/rocksdb/v5.18.X/java/src/main/java/org/rocksdb/TxnDBWritePolicy.java similarity index 100% rename from 3rdParty/rocksdb/v5.16.X/java/src/main/java/org/rocksdb/TxnDBWritePolicy.java rename to 3rdParty/rocksdb/v5.18.X/java/src/main/java/org/rocksdb/TxnDBWritePolicy.java diff --git a/3rdParty/rocksdb/v5.18.X/java/src/main/java/org/rocksdb/UInt64AddOperator.java b/3rdParty/rocksdb/v5.18.X/java/src/main/java/org/rocksdb/UInt64AddOperator.java new file mode 100644 index 0000000000..cce9b298d8 --- /dev/null +++ b/3rdParty/rocksdb/v5.18.X/java/src/main/java/org/rocksdb/UInt64AddOperator.java @@ -0,0 +1,19 @@ +// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. +// This source code is licensed under both the GPLv2 (found in the +// COPYING file in the root directory) and Apache 2.0 License +// (found in the LICENSE.Apache file in the root directory). + +package org.rocksdb; + +/** + * Uint64AddOperator is a merge operator that accumlates a long + * integer value. + */ +public class UInt64AddOperator extends MergeOperator { + public UInt64AddOperator() { + super(newSharedUInt64AddOperator()); + } + + private native static long newSharedUInt64AddOperator(); + @Override protected final native void disposeInternal(final long handle); +} diff --git a/3rdParty/rocksdb/v5.16.X/java/src/main/java/org/rocksdb/VectorMemTableConfig.java b/3rdParty/rocksdb/v5.18.X/java/src/main/java/org/rocksdb/VectorMemTableConfig.java similarity index 100% rename from 3rdParty/rocksdb/v5.16.X/java/src/main/java/org/rocksdb/VectorMemTableConfig.java rename to 3rdParty/rocksdb/v5.18.X/java/src/main/java/org/rocksdb/VectorMemTableConfig.java diff --git a/3rdParty/rocksdb/v5.16.X/java/src/main/java/org/rocksdb/WALRecoveryMode.java b/3rdParty/rocksdb/v5.18.X/java/src/main/java/org/rocksdb/WALRecoveryMode.java similarity index 100% rename from 3rdParty/rocksdb/v5.16.X/java/src/main/java/org/rocksdb/WALRecoveryMode.java rename to 3rdParty/rocksdb/v5.18.X/java/src/main/java/org/rocksdb/WALRecoveryMode.java diff --git a/3rdParty/rocksdb/v5.16.X/java/src/main/java/org/rocksdb/WBWIRocksIterator.java b/3rdParty/rocksdb/v5.18.X/java/src/main/java/org/rocksdb/WBWIRocksIterator.java similarity index 100% rename from 3rdParty/rocksdb/v5.16.X/java/src/main/java/org/rocksdb/WBWIRocksIterator.java rename to 3rdParty/rocksdb/v5.18.X/java/src/main/java/org/rocksdb/WBWIRocksIterator.java diff --git a/3rdParty/rocksdb/v5.16.X/java/src/main/java/org/rocksdb/WriteBatch.java b/3rdParty/rocksdb/v5.18.X/java/src/main/java/org/rocksdb/WriteBatch.java similarity index 100% rename from 3rdParty/rocksdb/v5.16.X/java/src/main/java/org/rocksdb/WriteBatch.java rename to 3rdParty/rocksdb/v5.18.X/java/src/main/java/org/rocksdb/WriteBatch.java diff --git a/3rdParty/rocksdb/v5.16.X/java/src/main/java/org/rocksdb/WriteBatchInterface.java b/3rdParty/rocksdb/v5.18.X/java/src/main/java/org/rocksdb/WriteBatchInterface.java similarity index 100% rename from 3rdParty/rocksdb/v5.16.X/java/src/main/java/org/rocksdb/WriteBatchInterface.java rename to 3rdParty/rocksdb/v5.18.X/java/src/main/java/org/rocksdb/WriteBatchInterface.java diff --git a/3rdParty/rocksdb/v5.16.X/java/src/main/java/org/rocksdb/WriteBatchWithIndex.java b/3rdParty/rocksdb/v5.18.X/java/src/main/java/org/rocksdb/WriteBatchWithIndex.java similarity index 100% rename from 3rdParty/rocksdb/v5.16.X/java/src/main/java/org/rocksdb/WriteBatchWithIndex.java rename to 3rdParty/rocksdb/v5.18.X/java/src/main/java/org/rocksdb/WriteBatchWithIndex.java diff --git a/3rdParty/rocksdb/v5.18.X/java/src/main/java/org/rocksdb/WriteBufferManager.java b/3rdParty/rocksdb/v5.18.X/java/src/main/java/org/rocksdb/WriteBufferManager.java new file mode 100644 index 0000000000..a5f80644fb --- /dev/null +++ b/3rdParty/rocksdb/v5.18.X/java/src/main/java/org/rocksdb/WriteBufferManager.java @@ -0,0 +1,30 @@ +package org.rocksdb; + +import org.rocksdb.Cache; + +/** + * Java wrapper over native write_buffer_manager class + */ +public class WriteBufferManager extends RocksObject { + static { + RocksDB.loadLibrary(); + } + + /** + * Construct a new instance of WriteBufferManager. + * + * Check + * https://github.com/facebook/rocksdb/wiki/Write-Buffer-Manager + * for more details on when to use it + * + * @param bufferSizeBytes buffer size(in bytes) to use for native write_buffer_manager + * @param cache cache whose memory should be bounded by this write buffer manager + */ + public WriteBufferManager(final long bufferSizeBytes, final Cache cache){ + super(newWriteBufferManager(bufferSizeBytes, cache.nativeHandle_)); + } + + private native static long newWriteBufferManager(final long bufferSizeBytes, final long cacheHandle); + @Override + protected native void disposeInternal(final long handle); +} diff --git a/3rdParty/rocksdb/v5.16.X/java/src/main/java/org/rocksdb/WriteOptions.java b/3rdParty/rocksdb/v5.18.X/java/src/main/java/org/rocksdb/WriteOptions.java similarity index 100% rename from 3rdParty/rocksdb/v5.16.X/java/src/main/java/org/rocksdb/WriteOptions.java rename to 3rdParty/rocksdb/v5.18.X/java/src/main/java/org/rocksdb/WriteOptions.java diff --git a/3rdParty/rocksdb/v5.16.X/java/src/main/java/org/rocksdb/util/BytewiseComparator.java b/3rdParty/rocksdb/v5.18.X/java/src/main/java/org/rocksdb/util/BytewiseComparator.java similarity index 100% rename from 3rdParty/rocksdb/v5.16.X/java/src/main/java/org/rocksdb/util/BytewiseComparator.java rename to 3rdParty/rocksdb/v5.18.X/java/src/main/java/org/rocksdb/util/BytewiseComparator.java diff --git a/3rdParty/rocksdb/v5.16.X/java/src/main/java/org/rocksdb/util/DirectBytewiseComparator.java b/3rdParty/rocksdb/v5.18.X/java/src/main/java/org/rocksdb/util/DirectBytewiseComparator.java similarity index 100% rename from 3rdParty/rocksdb/v5.16.X/java/src/main/java/org/rocksdb/util/DirectBytewiseComparator.java rename to 3rdParty/rocksdb/v5.18.X/java/src/main/java/org/rocksdb/util/DirectBytewiseComparator.java diff --git a/3rdParty/rocksdb/v5.16.X/java/src/main/java/org/rocksdb/util/Environment.java b/3rdParty/rocksdb/v5.18.X/java/src/main/java/org/rocksdb/util/Environment.java similarity index 100% rename from 3rdParty/rocksdb/v5.16.X/java/src/main/java/org/rocksdb/util/Environment.java rename to 3rdParty/rocksdb/v5.18.X/java/src/main/java/org/rocksdb/util/Environment.java diff --git a/3rdParty/rocksdb/v5.16.X/java/src/main/java/org/rocksdb/util/ReverseBytewiseComparator.java b/3rdParty/rocksdb/v5.18.X/java/src/main/java/org/rocksdb/util/ReverseBytewiseComparator.java similarity index 100% rename from 3rdParty/rocksdb/v5.16.X/java/src/main/java/org/rocksdb/util/ReverseBytewiseComparator.java rename to 3rdParty/rocksdb/v5.18.X/java/src/main/java/org/rocksdb/util/ReverseBytewiseComparator.java diff --git a/3rdParty/rocksdb/v5.16.X/java/src/main/java/org/rocksdb/util/SizeUnit.java b/3rdParty/rocksdb/v5.18.X/java/src/main/java/org/rocksdb/util/SizeUnit.java similarity index 100% rename from 3rdParty/rocksdb/v5.16.X/java/src/main/java/org/rocksdb/util/SizeUnit.java rename to 3rdParty/rocksdb/v5.18.X/java/src/main/java/org/rocksdb/util/SizeUnit.java diff --git a/3rdParty/rocksdb/v5.16.X/java/src/test/java/org/rocksdb/AbstractComparatorTest.java b/3rdParty/rocksdb/v5.18.X/java/src/test/java/org/rocksdb/AbstractComparatorTest.java similarity index 100% rename from 3rdParty/rocksdb/v5.16.X/java/src/test/java/org/rocksdb/AbstractComparatorTest.java rename to 3rdParty/rocksdb/v5.18.X/java/src/test/java/org/rocksdb/AbstractComparatorTest.java diff --git a/3rdParty/rocksdb/v5.16.X/java/src/test/java/org/rocksdb/AbstractTransactionTest.java b/3rdParty/rocksdb/v5.18.X/java/src/test/java/org/rocksdb/AbstractTransactionTest.java similarity index 100% rename from 3rdParty/rocksdb/v5.16.X/java/src/test/java/org/rocksdb/AbstractTransactionTest.java rename to 3rdParty/rocksdb/v5.18.X/java/src/test/java/org/rocksdb/AbstractTransactionTest.java diff --git a/3rdParty/rocksdb/v5.16.X/java/src/test/java/org/rocksdb/BackupEngineTest.java b/3rdParty/rocksdb/v5.18.X/java/src/test/java/org/rocksdb/BackupEngineTest.java similarity index 100% rename from 3rdParty/rocksdb/v5.16.X/java/src/test/java/org/rocksdb/BackupEngineTest.java rename to 3rdParty/rocksdb/v5.18.X/java/src/test/java/org/rocksdb/BackupEngineTest.java diff --git a/3rdParty/rocksdb/v5.16.X/java/src/test/java/org/rocksdb/BackupableDBOptionsTest.java b/3rdParty/rocksdb/v5.18.X/java/src/test/java/org/rocksdb/BackupableDBOptionsTest.java similarity index 100% rename from 3rdParty/rocksdb/v5.16.X/java/src/test/java/org/rocksdb/BackupableDBOptionsTest.java rename to 3rdParty/rocksdb/v5.18.X/java/src/test/java/org/rocksdb/BackupableDBOptionsTest.java diff --git a/3rdParty/rocksdb/v5.16.X/java/src/test/java/org/rocksdb/BlockBasedTableConfigTest.java b/3rdParty/rocksdb/v5.18.X/java/src/test/java/org/rocksdb/BlockBasedTableConfigTest.java similarity index 83% rename from 3rdParty/rocksdb/v5.16.X/java/src/test/java/org/rocksdb/BlockBasedTableConfigTest.java rename to 3rdParty/rocksdb/v5.18.X/java/src/test/java/org/rocksdb/BlockBasedTableConfigTest.java index 2b15b69f81..754cf11c03 100644 --- a/3rdParty/rocksdb/v5.16.X/java/src/test/java/org/rocksdb/BlockBasedTableConfigTest.java +++ b/3rdParty/rocksdb/v5.18.X/java/src/test/java/org/rocksdb/BlockBasedTableConfigTest.java @@ -95,6 +95,46 @@ public class BlockBasedTableConfigTest { } + @Test + public void cacheIndexAndFilterBlocksWithHighPriority() { + BlockBasedTableConfig blockBasedTableConfig = new BlockBasedTableConfig(); + blockBasedTableConfig.setCacheIndexAndFilterBlocksWithHighPriority(true); + assertThat(blockBasedTableConfig.cacheIndexAndFilterBlocksWithHighPriority()). + isTrue(); + } + + @Test + public void pinL0FilterAndIndexBlocksInCache() { + BlockBasedTableConfig blockBasedTableConfig = new BlockBasedTableConfig(); + blockBasedTableConfig.setPinL0FilterAndIndexBlocksInCache(true); + assertThat(blockBasedTableConfig.pinL0FilterAndIndexBlocksInCache()). + isTrue(); + } + + @Test + public void partitionFilters() { + BlockBasedTableConfig blockBasedTableConfig = new BlockBasedTableConfig(); + blockBasedTableConfig.setPartitionFilters(true); + assertThat(blockBasedTableConfig.partitionFilters()). + isTrue(); + } + + @Test + public void metadataBlockSize() { + BlockBasedTableConfig blockBasedTableConfig = new BlockBasedTableConfig(); + blockBasedTableConfig.setMetadataBlockSize(1024); + assertThat(blockBasedTableConfig.metadataBlockSize()). + isEqualTo(1024); + } + + @Test + public void pinTopLevelIndexAndFilter() { + BlockBasedTableConfig blockBasedTableConfig = new BlockBasedTableConfig(); + blockBasedTableConfig.setPinTopLevelIndexAndFilter(false); + assertThat(blockBasedTableConfig.pinTopLevelIndexAndFilter()). + isFalse(); + } + @Test public void hashIndexAllowCollision() { BlockBasedTableConfig blockBasedTableConfig = new BlockBasedTableConfig(); diff --git a/3rdParty/rocksdb/v5.16.X/java/src/test/java/org/rocksdb/CheckPointTest.java b/3rdParty/rocksdb/v5.18.X/java/src/test/java/org/rocksdb/CheckPointTest.java similarity index 100% rename from 3rdParty/rocksdb/v5.16.X/java/src/test/java/org/rocksdb/CheckPointTest.java rename to 3rdParty/rocksdb/v5.18.X/java/src/test/java/org/rocksdb/CheckPointTest.java diff --git a/3rdParty/rocksdb/v5.16.X/java/src/test/java/org/rocksdb/ClockCacheTest.java b/3rdParty/rocksdb/v5.18.X/java/src/test/java/org/rocksdb/ClockCacheTest.java similarity index 100% rename from 3rdParty/rocksdb/v5.16.X/java/src/test/java/org/rocksdb/ClockCacheTest.java rename to 3rdParty/rocksdb/v5.18.X/java/src/test/java/org/rocksdb/ClockCacheTest.java diff --git a/3rdParty/rocksdb/v5.16.X/java/src/test/java/org/rocksdb/ColumnFamilyOptionsTest.java b/3rdParty/rocksdb/v5.18.X/java/src/test/java/org/rocksdb/ColumnFamilyOptionsTest.java similarity index 100% rename from 3rdParty/rocksdb/v5.16.X/java/src/test/java/org/rocksdb/ColumnFamilyOptionsTest.java rename to 3rdParty/rocksdb/v5.18.X/java/src/test/java/org/rocksdb/ColumnFamilyOptionsTest.java diff --git a/3rdParty/rocksdb/v5.16.X/java/src/test/java/org/rocksdb/ColumnFamilyTest.java b/3rdParty/rocksdb/v5.18.X/java/src/test/java/org/rocksdb/ColumnFamilyTest.java similarity index 99% rename from 3rdParty/rocksdb/v5.16.X/java/src/test/java/org/rocksdb/ColumnFamilyTest.java rename to 3rdParty/rocksdb/v5.18.X/java/src/test/java/org/rocksdb/ColumnFamilyTest.java index 3df63c65ff..0b943ac96e 100644 --- a/3rdParty/rocksdb/v5.16.X/java/src/test/java/org/rocksdb/ColumnFamilyTest.java +++ b/3rdParty/rocksdb/v5.18.X/java/src/test/java/org/rocksdb/ColumnFamilyTest.java @@ -404,6 +404,10 @@ public class ColumnFamilyTest { "rocksdb.stats")).isNotNull(); assertThat(db.getProperty(columnFamilyHandleList.get(1), "rocksdb.sstables")).isNotNull(); + assertThat(db.getAggregatedLongProperty("rocksdb.estimate-num-keys")). + isNotNull(); + assertThat(db.getAggregatedLongProperty("rocksdb.estimate-num-keys")). + isGreaterThanOrEqualTo(0); } finally { for (final ColumnFamilyHandle columnFamilyHandle : columnFamilyHandleList) { diff --git a/3rdParty/rocksdb/v5.16.X/java/src/test/java/org/rocksdb/CompactRangeOptionsTest.java b/3rdParty/rocksdb/v5.18.X/java/src/test/java/org/rocksdb/CompactRangeOptionsTest.java similarity index 100% rename from 3rdParty/rocksdb/v5.16.X/java/src/test/java/org/rocksdb/CompactRangeOptionsTest.java rename to 3rdParty/rocksdb/v5.18.X/java/src/test/java/org/rocksdb/CompactRangeOptionsTest.java diff --git a/3rdParty/rocksdb/v5.16.X/java/src/test/java/org/rocksdb/CompactionFilterFactoryTest.java b/3rdParty/rocksdb/v5.18.X/java/src/test/java/org/rocksdb/CompactionFilterFactoryTest.java similarity index 100% rename from 3rdParty/rocksdb/v5.16.X/java/src/test/java/org/rocksdb/CompactionFilterFactoryTest.java rename to 3rdParty/rocksdb/v5.18.X/java/src/test/java/org/rocksdb/CompactionFilterFactoryTest.java diff --git a/3rdParty/rocksdb/v5.16.X/java/src/test/java/org/rocksdb/CompactionOptionsFIFOTest.java b/3rdParty/rocksdb/v5.18.X/java/src/test/java/org/rocksdb/CompactionOptionsFIFOTest.java similarity index 52% rename from 3rdParty/rocksdb/v5.16.X/java/src/test/java/org/rocksdb/CompactionOptionsFIFOTest.java rename to 3rdParty/rocksdb/v5.18.X/java/src/test/java/org/rocksdb/CompactionOptionsFIFOTest.java index 370a28e819..df4c98ec14 100644 --- a/3rdParty/rocksdb/v5.16.X/java/src/test/java/org/rocksdb/CompactionOptionsFIFOTest.java +++ b/3rdParty/rocksdb/v5.18.X/java/src/test/java/org/rocksdb/CompactionOptionsFIFOTest.java @@ -18,9 +18,27 @@ public class CompactionOptionsFIFOTest { @Test public void maxTableFilesSize() { final long size = 500 * 1024 * 1026; - try(final CompactionOptionsFIFO opt = new CompactionOptionsFIFO()) { + try (final CompactionOptionsFIFO opt = new CompactionOptionsFIFO()) { opt.setMaxTableFilesSize(size); assertThat(opt.maxTableFilesSize()).isEqualTo(size); } } + + @Test + public void ttl() { + final long ttl = 7 * 24 * 60 * 60; // 7 days + try (final CompactionOptionsFIFO opt = new CompactionOptionsFIFO()) { + opt.setTtl(ttl); + assertThat(opt.ttl()).isEqualTo(ttl); + } + } + + @Test + public void allowCompaction() { + final boolean allowCompaction = true; + try (final CompactionOptionsFIFO opt = new CompactionOptionsFIFO()) { + opt.setAllowCompaction(allowCompaction); + assertThat(opt.allowCompaction()).isEqualTo(allowCompaction); + } + } } diff --git a/3rdParty/rocksdb/v5.16.X/java/src/test/java/org/rocksdb/CompactionOptionsUniversalTest.java b/3rdParty/rocksdb/v5.18.X/java/src/test/java/org/rocksdb/CompactionOptionsUniversalTest.java similarity index 100% rename from 3rdParty/rocksdb/v5.16.X/java/src/test/java/org/rocksdb/CompactionOptionsUniversalTest.java rename to 3rdParty/rocksdb/v5.18.X/java/src/test/java/org/rocksdb/CompactionOptionsUniversalTest.java diff --git a/3rdParty/rocksdb/v5.16.X/java/src/test/java/org/rocksdb/CompactionPriorityTest.java b/3rdParty/rocksdb/v5.18.X/java/src/test/java/org/rocksdb/CompactionPriorityTest.java similarity index 100% rename from 3rdParty/rocksdb/v5.16.X/java/src/test/java/org/rocksdb/CompactionPriorityTest.java rename to 3rdParty/rocksdb/v5.18.X/java/src/test/java/org/rocksdb/CompactionPriorityTest.java diff --git a/3rdParty/rocksdb/v5.16.X/java/src/test/java/org/rocksdb/CompactionStopStyleTest.java b/3rdParty/rocksdb/v5.18.X/java/src/test/java/org/rocksdb/CompactionStopStyleTest.java similarity index 100% rename from 3rdParty/rocksdb/v5.16.X/java/src/test/java/org/rocksdb/CompactionStopStyleTest.java rename to 3rdParty/rocksdb/v5.18.X/java/src/test/java/org/rocksdb/CompactionStopStyleTest.java diff --git a/3rdParty/rocksdb/v5.16.X/java/src/test/java/org/rocksdb/ComparatorOptionsTest.java b/3rdParty/rocksdb/v5.18.X/java/src/test/java/org/rocksdb/ComparatorOptionsTest.java similarity index 100% rename from 3rdParty/rocksdb/v5.16.X/java/src/test/java/org/rocksdb/ComparatorOptionsTest.java rename to 3rdParty/rocksdb/v5.18.X/java/src/test/java/org/rocksdb/ComparatorOptionsTest.java diff --git a/3rdParty/rocksdb/v5.16.X/java/src/test/java/org/rocksdb/ComparatorTest.java b/3rdParty/rocksdb/v5.18.X/java/src/test/java/org/rocksdb/ComparatorTest.java similarity index 100% rename from 3rdParty/rocksdb/v5.16.X/java/src/test/java/org/rocksdb/ComparatorTest.java rename to 3rdParty/rocksdb/v5.18.X/java/src/test/java/org/rocksdb/ComparatorTest.java diff --git a/3rdParty/rocksdb/v5.16.X/java/src/test/java/org/rocksdb/CompressionOptionsTest.java b/3rdParty/rocksdb/v5.18.X/java/src/test/java/org/rocksdb/CompressionOptionsTest.java similarity index 100% rename from 3rdParty/rocksdb/v5.16.X/java/src/test/java/org/rocksdb/CompressionOptionsTest.java rename to 3rdParty/rocksdb/v5.18.X/java/src/test/java/org/rocksdb/CompressionOptionsTest.java diff --git a/3rdParty/rocksdb/v5.16.X/java/src/test/java/org/rocksdb/CompressionTypesTest.java b/3rdParty/rocksdb/v5.18.X/java/src/test/java/org/rocksdb/CompressionTypesTest.java similarity index 100% rename from 3rdParty/rocksdb/v5.16.X/java/src/test/java/org/rocksdb/CompressionTypesTest.java rename to 3rdParty/rocksdb/v5.18.X/java/src/test/java/org/rocksdb/CompressionTypesTest.java diff --git a/3rdParty/rocksdb/v5.16.X/java/src/test/java/org/rocksdb/DBOptionsTest.java b/3rdParty/rocksdb/v5.18.X/java/src/test/java/org/rocksdb/DBOptionsTest.java similarity index 95% rename from 3rdParty/rocksdb/v5.16.X/java/src/test/java/org/rocksdb/DBOptionsTest.java rename to 3rdParty/rocksdb/v5.18.X/java/src/test/java/org/rocksdb/DBOptionsTest.java index 453639d574..bad01c4354 100644 --- a/3rdParty/rocksdb/v5.16.X/java/src/test/java/org/rocksdb/DBOptionsTest.java +++ b/3rdParty/rocksdb/v5.18.X/java/src/test/java/org/rocksdb/DBOptionsTest.java @@ -424,6 +424,26 @@ public class DBOptionsTest { } } + @Test + public void setWriteBufferManager() throws RocksDBException { + try (final DBOptions opt = new DBOptions(); + final Cache cache = new LRUCache(1 * 1024 * 1024); + final WriteBufferManager writeBufferManager = new WriteBufferManager(2000l, cache)) { + opt.setWriteBufferManager(writeBufferManager); + assertThat(opt.writeBufferManager()).isEqualTo(writeBufferManager); + } + } + + @Test + public void setWriteBufferManagerWithZeroBufferSize() throws RocksDBException { + try (final DBOptions opt = new DBOptions(); + final Cache cache = new LRUCache(1 * 1024 * 1024); + final WriteBufferManager writeBufferManager = new WriteBufferManager(0l, cache)) { + opt.setWriteBufferManager(writeBufferManager); + assertThat(opt.writeBufferManager()).isEqualTo(writeBufferManager); + } + } + @Test public void accessHintOnCompactionStart() { try(final DBOptions opt = new DBOptions()) { diff --git a/3rdParty/rocksdb/v5.16.X/java/src/test/java/org/rocksdb/DirectComparatorTest.java b/3rdParty/rocksdb/v5.18.X/java/src/test/java/org/rocksdb/DirectComparatorTest.java similarity index 100% rename from 3rdParty/rocksdb/v5.16.X/java/src/test/java/org/rocksdb/DirectComparatorTest.java rename to 3rdParty/rocksdb/v5.18.X/java/src/test/java/org/rocksdb/DirectComparatorTest.java diff --git a/3rdParty/rocksdb/v5.16.X/java/src/test/java/org/rocksdb/DirectSliceTest.java b/3rdParty/rocksdb/v5.18.X/java/src/test/java/org/rocksdb/DirectSliceTest.java similarity index 100% rename from 3rdParty/rocksdb/v5.16.X/java/src/test/java/org/rocksdb/DirectSliceTest.java rename to 3rdParty/rocksdb/v5.18.X/java/src/test/java/org/rocksdb/DirectSliceTest.java diff --git a/3rdParty/rocksdb/v5.16.X/java/src/test/java/org/rocksdb/EnvOptionsTest.java b/3rdParty/rocksdb/v5.18.X/java/src/test/java/org/rocksdb/EnvOptionsTest.java similarity index 100% rename from 3rdParty/rocksdb/v5.16.X/java/src/test/java/org/rocksdb/EnvOptionsTest.java rename to 3rdParty/rocksdb/v5.18.X/java/src/test/java/org/rocksdb/EnvOptionsTest.java diff --git a/3rdParty/rocksdb/v5.16.X/java/src/test/java/org/rocksdb/FilterTest.java b/3rdParty/rocksdb/v5.18.X/java/src/test/java/org/rocksdb/FilterTest.java similarity index 100% rename from 3rdParty/rocksdb/v5.16.X/java/src/test/java/org/rocksdb/FilterTest.java rename to 3rdParty/rocksdb/v5.18.X/java/src/test/java/org/rocksdb/FilterTest.java diff --git a/3rdParty/rocksdb/v5.16.X/java/src/test/java/org/rocksdb/FlushTest.java b/3rdParty/rocksdb/v5.18.X/java/src/test/java/org/rocksdb/FlushTest.java similarity index 100% rename from 3rdParty/rocksdb/v5.16.X/java/src/test/java/org/rocksdb/FlushTest.java rename to 3rdParty/rocksdb/v5.18.X/java/src/test/java/org/rocksdb/FlushTest.java diff --git a/3rdParty/rocksdb/v5.16.X/java/src/test/java/org/rocksdb/InfoLogLevelTest.java b/3rdParty/rocksdb/v5.18.X/java/src/test/java/org/rocksdb/InfoLogLevelTest.java similarity index 100% rename from 3rdParty/rocksdb/v5.16.X/java/src/test/java/org/rocksdb/InfoLogLevelTest.java rename to 3rdParty/rocksdb/v5.18.X/java/src/test/java/org/rocksdb/InfoLogLevelTest.java diff --git a/3rdParty/rocksdb/v5.16.X/java/src/test/java/org/rocksdb/IngestExternalFileOptionsTest.java b/3rdParty/rocksdb/v5.18.X/java/src/test/java/org/rocksdb/IngestExternalFileOptionsTest.java similarity index 100% rename from 3rdParty/rocksdb/v5.16.X/java/src/test/java/org/rocksdb/IngestExternalFileOptionsTest.java rename to 3rdParty/rocksdb/v5.18.X/java/src/test/java/org/rocksdb/IngestExternalFileOptionsTest.java diff --git a/3rdParty/rocksdb/v5.16.X/java/src/test/java/org/rocksdb/KeyMayExistTest.java b/3rdParty/rocksdb/v5.18.X/java/src/test/java/org/rocksdb/KeyMayExistTest.java similarity index 64% rename from 3rdParty/rocksdb/v5.16.X/java/src/test/java/org/rocksdb/KeyMayExistTest.java rename to 3rdParty/rocksdb/v5.18.X/java/src/test/java/org/rocksdb/KeyMayExistTest.java index 8092270eb2..577fe2eadf 100644 --- a/3rdParty/rocksdb/v5.16.X/java/src/test/java/org/rocksdb/KeyMayExistTest.java +++ b/3rdParty/rocksdb/v5.18.X/java/src/test/java/org/rocksdb/KeyMayExistTest.java @@ -48,12 +48,33 @@ public class KeyMayExistTest { assertThat(exists).isTrue(); assertThat(retValue.toString()).isEqualTo("value"); + // Slice key + StringBuilder builder = new StringBuilder("prefix"); + int offset = builder.toString().length(); + builder.append("slice key 0"); + int len = builder.toString().length() - offset; + builder.append("suffix"); + + byte[] sliceKey = builder.toString().getBytes(); + byte[] sliceValue = "slice value 0".getBytes(); + db.put(sliceKey, offset, len, sliceValue, 0, sliceValue.length); + + retValue = new StringBuilder(); + exists = db.keyMayExist(sliceKey, offset, len, retValue); + assertThat(exists).isTrue(); + assertThat(retValue.toString().getBytes()).isEqualTo(sliceValue); + // Test without column family but with readOptions try (final ReadOptions readOptions = new ReadOptions()) { retValue = new StringBuilder(); exists = db.keyMayExist(readOptions, "key".getBytes(), retValue); assertThat(exists).isTrue(); assertThat(retValue.toString()).isEqualTo("value"); + + retValue = new StringBuilder(); + exists = db.keyMayExist(readOptions, sliceKey, offset, len, retValue); + assertThat(exists).isTrue(); + assertThat(retValue.toString().getBytes()).isEqualTo(sliceValue); } // Test with column family @@ -63,6 +84,13 @@ public class KeyMayExistTest { assertThat(exists).isTrue(); assertThat(retValue.toString()).isEqualTo("value"); + // Test slice sky with column family + retValue = new StringBuilder(); + exists = db.keyMayExist(columnFamilyHandleList.get(0), sliceKey, offset, len, + retValue); + assertThat(exists).isTrue(); + assertThat(retValue.toString().getBytes()).isEqualTo(sliceValue); + // Test with column family and readOptions try (final ReadOptions readOptions = new ReadOptions()) { retValue = new StringBuilder(); @@ -71,11 +99,23 @@ public class KeyMayExistTest { retValue); assertThat(exists).isTrue(); assertThat(retValue.toString()).isEqualTo("value"); + + // Test slice key with column family and read options + retValue = new StringBuilder(); + exists = db.keyMayExist(readOptions, + columnFamilyHandleList.get(0), sliceKey, offset, len, + retValue); + assertThat(exists).isTrue(); + assertThat(retValue.toString().getBytes()).isEqualTo(sliceValue); } // KeyMayExist in CF1 must return false assertThat(db.keyMayExist(columnFamilyHandleList.get(1), "key".getBytes(), retValue)).isFalse(); + + // slice key + assertThat(db.keyMayExist(columnFamilyHandleList.get(1), + sliceKey, 1, 3, retValue)).isFalse(); } finally { for (final ColumnFamilyHandle columnFamilyHandle : columnFamilyHandleList) { diff --git a/3rdParty/rocksdb/v5.16.X/java/src/test/java/org/rocksdb/LRUCacheTest.java b/3rdParty/rocksdb/v5.18.X/java/src/test/java/org/rocksdb/LRUCacheTest.java similarity index 100% rename from 3rdParty/rocksdb/v5.16.X/java/src/test/java/org/rocksdb/LRUCacheTest.java rename to 3rdParty/rocksdb/v5.18.X/java/src/test/java/org/rocksdb/LRUCacheTest.java diff --git a/3rdParty/rocksdb/v5.16.X/java/src/test/java/org/rocksdb/LoggerTest.java b/3rdParty/rocksdb/v5.18.X/java/src/test/java/org/rocksdb/LoggerTest.java similarity index 100% rename from 3rdParty/rocksdb/v5.16.X/java/src/test/java/org/rocksdb/LoggerTest.java rename to 3rdParty/rocksdb/v5.18.X/java/src/test/java/org/rocksdb/LoggerTest.java diff --git a/3rdParty/rocksdb/v5.16.X/java/src/test/java/org/rocksdb/MemTableTest.java b/3rdParty/rocksdb/v5.18.X/java/src/test/java/org/rocksdb/MemTableTest.java similarity index 100% rename from 3rdParty/rocksdb/v5.16.X/java/src/test/java/org/rocksdb/MemTableTest.java rename to 3rdParty/rocksdb/v5.18.X/java/src/test/java/org/rocksdb/MemTableTest.java diff --git a/3rdParty/rocksdb/v5.18.X/java/src/test/java/org/rocksdb/MemoryUtilTest.java b/3rdParty/rocksdb/v5.18.X/java/src/test/java/org/rocksdb/MemoryUtilTest.java new file mode 100644 index 0000000000..73fcc87c32 --- /dev/null +++ b/3rdParty/rocksdb/v5.18.X/java/src/test/java/org/rocksdb/MemoryUtilTest.java @@ -0,0 +1,143 @@ +// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. +// This source code is licensed under both the GPLv2 (found in the +// COPYING file in the root directory) and Apache 2.0 License +// (found in the LICENSE.Apache file in the root directory). + +package org.rocksdb; + +import org.junit.ClassRule; +import org.junit.Rule; +import org.junit.Test; +import org.junit.rules.TemporaryFolder; + +import java.nio.charset.StandardCharsets; +import java.util.*; + +import static org.assertj.core.api.Assertions.assertThat; + +public class MemoryUtilTest { + + private static final String MEMTABLE_SIZE = "rocksdb.size-all-mem-tables"; + private static final String UNFLUSHED_MEMTABLE_SIZE = "rocksdb.cur-size-all-mem-tables"; + private static final String TABLE_READERS = "rocksdb.estimate-table-readers-mem"; + + private final byte[] key = "some-key".getBytes(StandardCharsets.UTF_8); + private final byte[] value = "some-value".getBytes(StandardCharsets.UTF_8); + + @ClassRule + public static final RocksMemoryResource rocksMemoryResource = + new RocksMemoryResource(); + + @Rule public TemporaryFolder dbFolder1 = new TemporaryFolder(); + @Rule public TemporaryFolder dbFolder2 = new TemporaryFolder(); + + /** + * Test MemoryUtil.getApproximateMemoryUsageByType before and after a put + get + */ + @Test + public void getApproximateMemoryUsageByType() throws RocksDBException { + try (final Cache cache = new LRUCache(8 * 1024 * 1024); + final Options options = + new Options() + .setCreateIfMissing(true) + .setTableFormatConfig(new BlockBasedTableConfig().setBlockCache(cache)); + final FlushOptions flushOptions = + new FlushOptions().setWaitForFlush(true); + final RocksDB db = + RocksDB.open(options, dbFolder1.getRoot().getAbsolutePath())) { + + List dbs = new ArrayList<>(1); + dbs.add(db); + Set caches = new HashSet<>(1); + caches.add(cache); + Map usage = MemoryUtil.getApproximateMemoryUsageByType(dbs, caches); + + assertThat(usage.get(MemoryUsageType.kMemTableTotal)).isEqualTo( + db.getAggregatedLongProperty(MEMTABLE_SIZE)); + assertThat(usage.get(MemoryUsageType.kMemTableUnFlushed)).isEqualTo( + db.getAggregatedLongProperty(UNFLUSHED_MEMTABLE_SIZE)); + assertThat(usage.get(MemoryUsageType.kTableReadersTotal)).isEqualTo( + db.getAggregatedLongProperty(TABLE_READERS)); + assertThat(usage.get(MemoryUsageType.kCacheTotal)).isEqualTo(0); + + db.put(key, value); + db.flush(flushOptions); + db.get(key); + + usage = MemoryUtil.getApproximateMemoryUsageByType(dbs, caches); + assertThat(usage.get(MemoryUsageType.kMemTableTotal)).isGreaterThan(0); + assertThat(usage.get(MemoryUsageType.kMemTableTotal)).isEqualTo( + db.getAggregatedLongProperty(MEMTABLE_SIZE)); + assertThat(usage.get(MemoryUsageType.kMemTableUnFlushed)).isGreaterThan(0); + assertThat(usage.get(MemoryUsageType.kMemTableUnFlushed)).isEqualTo( + db.getAggregatedLongProperty(UNFLUSHED_MEMTABLE_SIZE)); + assertThat(usage.get(MemoryUsageType.kTableReadersTotal)).isGreaterThan(0); + assertThat(usage.get(MemoryUsageType.kTableReadersTotal)).isEqualTo( + db.getAggregatedLongProperty(TABLE_READERS)); + assertThat(usage.get(MemoryUsageType.kCacheTotal)).isGreaterThan(0); + + } + } + + /** + * Test MemoryUtil.getApproximateMemoryUsageByType with null inputs + */ + @Test + public void getApproximateMemoryUsageByTypeNulls() throws RocksDBException { + Map usage = MemoryUtil.getApproximateMemoryUsageByType(null, null); + + assertThat(usage.get(MemoryUsageType.kMemTableTotal)).isEqualTo(null); + assertThat(usage.get(MemoryUsageType.kMemTableUnFlushed)).isEqualTo(null); + assertThat(usage.get(MemoryUsageType.kTableReadersTotal)).isEqualTo(null); + assertThat(usage.get(MemoryUsageType.kCacheTotal)).isEqualTo(null); + } + + /** + * Test MemoryUtil.getApproximateMemoryUsageByType with two DBs and two caches + */ + @Test + public void getApproximateMemoryUsageByTypeMultiple() throws RocksDBException { + try (final Cache cache1 = new LRUCache(1 * 1024 * 1024); + final Options options1 = + new Options() + .setCreateIfMissing(true) + .setTableFormatConfig(new BlockBasedTableConfig().setBlockCache(cache1)); + final RocksDB db1 = + RocksDB.open(options1, dbFolder1.getRoot().getAbsolutePath()); + final Cache cache2 = new LRUCache(1 * 1024 * 1024); + final Options options2 = + new Options() + .setCreateIfMissing(true) + .setTableFormatConfig(new BlockBasedTableConfig().setBlockCache(cache2)); + final RocksDB db2 = + RocksDB.open(options2, dbFolder2.getRoot().getAbsolutePath()); + final FlushOptions flushOptions = + new FlushOptions().setWaitForFlush(true); + + ) { + List dbs = new ArrayList<>(1); + dbs.add(db1); + dbs.add(db2); + Set caches = new HashSet<>(1); + caches.add(cache1); + caches.add(cache2); + + for (RocksDB db: dbs) { + db.put(key, value); + db.flush(flushOptions); + db.get(key); + } + + Map usage = MemoryUtil.getApproximateMemoryUsageByType(dbs, caches); + assertThat(usage.get(MemoryUsageType.kMemTableTotal)).isEqualTo( + db1.getAggregatedLongProperty(MEMTABLE_SIZE) + db2.getAggregatedLongProperty(MEMTABLE_SIZE)); + assertThat(usage.get(MemoryUsageType.kMemTableUnFlushed)).isEqualTo( + db1.getAggregatedLongProperty(UNFLUSHED_MEMTABLE_SIZE) + db2.getAggregatedLongProperty(UNFLUSHED_MEMTABLE_SIZE)); + assertThat(usage.get(MemoryUsageType.kTableReadersTotal)).isEqualTo( + db1.getAggregatedLongProperty(TABLE_READERS) + db2.getAggregatedLongProperty(TABLE_READERS)); + assertThat(usage.get(MemoryUsageType.kCacheTotal)).isGreaterThan(0); + + } + } + +} diff --git a/3rdParty/rocksdb/v5.16.X/java/src/test/java/org/rocksdb/MergeTest.java b/3rdParty/rocksdb/v5.18.X/java/src/test/java/org/rocksdb/MergeTest.java similarity index 53% rename from 3rdParty/rocksdb/v5.16.X/java/src/test/java/org/rocksdb/MergeTest.java rename to 3rdParty/rocksdb/v5.18.X/java/src/test/java/org/rocksdb/MergeTest.java index 73b90869cf..b2ec62635a 100644 --- a/3rdParty/rocksdb/v5.16.X/java/src/test/java/org/rocksdb/MergeTest.java +++ b/3rdParty/rocksdb/v5.18.X/java/src/test/java/org/rocksdb/MergeTest.java @@ -5,6 +5,7 @@ package org.rocksdb; +import java.nio.ByteBuffer; import java.util.Arrays; import java.util.List; import java.util.ArrayList; @@ -44,6 +45,38 @@ public class MergeTest { } } + private byte[] longToByteArray(long l) { + ByteBuffer buf = ByteBuffer.allocate(Long.BYTES); + buf.putLong(l); + return buf.array(); + } + + private long longFromByteArray(byte[] a) { + ByteBuffer buf = ByteBuffer.allocate(Long.BYTES); + buf.put(a); + buf.flip(); + return buf.getLong(); + } + + @Test + public void uint64AddOption() + throws InterruptedException, RocksDBException { + try (final Options opt = new Options() + .setCreateIfMissing(true) + .setMergeOperatorName("uint64add"); + final RocksDB db = RocksDB.open(opt, + dbFolder.getRoot().getAbsolutePath())) { + // writing (long)100 under key + db.put("key".getBytes(), longToByteArray(100)); + // merge (long)1 under key + db.merge("key".getBytes(), longToByteArray(1)); + + final byte[] value = db.get("key".getBytes()); + final long longValue = longFromByteArray(value); + assertThat(longValue).isEqualTo(101); + } + } + @Test public void cFStringOption() throws InterruptedException, RocksDBException { @@ -86,6 +119,48 @@ public class MergeTest { } } + @Test + public void cFUInt64AddOption() + throws InterruptedException, RocksDBException { + + try (final ColumnFamilyOptions cfOpt1 = new ColumnFamilyOptions() + .setMergeOperatorName("uint64add"); + final ColumnFamilyOptions cfOpt2 = new ColumnFamilyOptions() + .setMergeOperatorName("uint64add") + ) { + final List cfDescriptors = Arrays.asList( + new ColumnFamilyDescriptor(RocksDB.DEFAULT_COLUMN_FAMILY, cfOpt1), + new ColumnFamilyDescriptor(RocksDB.DEFAULT_COLUMN_FAMILY, cfOpt2) + ); + + final List columnFamilyHandleList = new ArrayList<>(); + try (final DBOptions opt = new DBOptions() + .setCreateIfMissing(true) + .setCreateMissingColumnFamilies(true); + final RocksDB db = RocksDB.open(opt, + dbFolder.getRoot().getAbsolutePath(), cfDescriptors, + columnFamilyHandleList)) { + try { + // writing (long)100 under key + db.put(columnFamilyHandleList.get(1), + "cfkey".getBytes(), longToByteArray(100)); + // merge (long)1 under key + db.merge(columnFamilyHandleList.get(1), + "cfkey".getBytes(), longToByteArray(1)); + + byte[] value = db.get(columnFamilyHandleList.get(1), + "cfkey".getBytes()); + long longValue = longFromByteArray(value); + assertThat(longValue).isEqualTo(101); + } finally { + for (final ColumnFamilyHandle handle : columnFamilyHandleList) { + handle.close(); + } + } + } + } + } + @Test public void operatorOption() throws InterruptedException, RocksDBException { @@ -108,6 +183,28 @@ public class MergeTest { } } + @Test + public void uint64AddOperatorOption() + throws InterruptedException, RocksDBException { + try (final UInt64AddOperator uint64AddOperator = new UInt64AddOperator(); + final Options opt = new Options() + .setCreateIfMissing(true) + .setMergeOperator(uint64AddOperator); + final RocksDB db = RocksDB.open(opt, + dbFolder.getRoot().getAbsolutePath())) { + // Writing (long)100 under key + db.put("key".getBytes(), longToByteArray(100)); + + // Writing (long)1 under key + db.merge("key".getBytes(), longToByteArray(1)); + + final byte[] value = db.get("key".getBytes()); + final long longValue = longFromByteArray(value); + + assertThat(longValue).isEqualTo(101); + } + } + @Test public void cFOperatorOption() throws InterruptedException, RocksDBException { @@ -170,6 +267,68 @@ public class MergeTest { } } + @Test + public void cFUInt64AddOperatorOption() + throws InterruptedException, RocksDBException { + try (final UInt64AddOperator uint64AddOperator = new UInt64AddOperator(); + final ColumnFamilyOptions cfOpt1 = new ColumnFamilyOptions() + .setMergeOperator(uint64AddOperator); + final ColumnFamilyOptions cfOpt2 = new ColumnFamilyOptions() + .setMergeOperator(uint64AddOperator) + ) { + final List cfDescriptors = Arrays.asList( + new ColumnFamilyDescriptor(RocksDB.DEFAULT_COLUMN_FAMILY, cfOpt1), + new ColumnFamilyDescriptor("new_cf".getBytes(), cfOpt2) + ); + final List columnFamilyHandleList = new ArrayList<>(); + try (final DBOptions opt = new DBOptions() + .setCreateIfMissing(true) + .setCreateMissingColumnFamilies(true); + final RocksDB db = RocksDB.open(opt, + dbFolder.getRoot().getAbsolutePath(), cfDescriptors, + columnFamilyHandleList) + ) { + try { + // writing (long)100 under key + db.put(columnFamilyHandleList.get(1), + "cfkey".getBytes(), longToByteArray(100)); + // merge (long)1 under key + db.merge(columnFamilyHandleList.get(1), + "cfkey".getBytes(), longToByteArray(1)); + byte[] value = db.get(columnFamilyHandleList.get(1), + "cfkey".getBytes()); + long longValue = longFromByteArray(value); + + // Test also with createColumnFamily + try (final ColumnFamilyOptions cfHandleOpts = + new ColumnFamilyOptions() + .setMergeOperator(uint64AddOperator); + final ColumnFamilyHandle cfHandle = + db.createColumnFamily( + new ColumnFamilyDescriptor("new_cf2".getBytes(), + cfHandleOpts)) + ) { + // writing (long)200 under cfkey2 + db.put(cfHandle, "cfkey2".getBytes(), longToByteArray(200)); + // merge (long)50 under cfkey2 + db.merge(cfHandle, new WriteOptions(), "cfkey2".getBytes(), + longToByteArray(50)); + value = db.get(cfHandle, "cfkey2".getBytes()); + long longValueTmpCf = longFromByteArray(value); + + assertThat(longValue).isEqualTo(101); + assertThat(longValueTmpCf).isEqualTo(250); + } + } finally { + for (final ColumnFamilyHandle columnFamilyHandle : + columnFamilyHandleList) { + columnFamilyHandle.close(); + } + } + } + } + } + @Test public void operatorGcBehaviour() throws RocksDBException { @@ -182,7 +341,6 @@ public class MergeTest { //no-op } - // test reuse try (final Options opt = new Options() .setMergeOperator(stringAppendOperator); @@ -213,6 +371,48 @@ public class MergeTest { } } + @Test + public void uint64AddOperatorGcBehaviour() + throws RocksDBException { + try (final UInt64AddOperator uint64AddOperator = new UInt64AddOperator()) { + try (final Options opt = new Options() + .setCreateIfMissing(true) + .setMergeOperator(uint64AddOperator); + final RocksDB db = RocksDB.open(opt, + dbFolder.getRoot().getAbsolutePath())) { + //no-op + } + + // test reuse + try (final Options opt = new Options() + .setMergeOperator(uint64AddOperator); + final RocksDB db = RocksDB.open(opt, + dbFolder.getRoot().getAbsolutePath())) { + //no-op + } + + // test param init + try (final UInt64AddOperator uint64AddOperator2 = new UInt64AddOperator(); + final Options opt = new Options() + .setMergeOperator(uint64AddOperator2); + final RocksDB db = RocksDB.open(opt, + dbFolder.getRoot().getAbsolutePath())) { + //no-op + } + + // test replace one with another merge operator instance + try (final Options opt = new Options() + .setMergeOperator(uint64AddOperator); + final UInt64AddOperator newUInt64AddOperator = new UInt64AddOperator()) { + opt.setMergeOperator(newUInt64AddOperator); + try (final RocksDB db = RocksDB.open(opt, + dbFolder.getRoot().getAbsolutePath())) { + //no-op + } + } + } + } + @Test public void emptyStringInSetMergeOperatorByName() { try (final Options opt = new Options() diff --git a/3rdParty/rocksdb/v5.16.X/java/src/test/java/org/rocksdb/MixedOptionsTest.java b/3rdParty/rocksdb/v5.18.X/java/src/test/java/org/rocksdb/MixedOptionsTest.java similarity index 100% rename from 3rdParty/rocksdb/v5.16.X/java/src/test/java/org/rocksdb/MixedOptionsTest.java rename to 3rdParty/rocksdb/v5.18.X/java/src/test/java/org/rocksdb/MixedOptionsTest.java diff --git a/3rdParty/rocksdb/v5.16.X/java/src/test/java/org/rocksdb/MutableColumnFamilyOptionsTest.java b/3rdParty/rocksdb/v5.18.X/java/src/test/java/org/rocksdb/MutableColumnFamilyOptionsTest.java similarity index 100% rename from 3rdParty/rocksdb/v5.16.X/java/src/test/java/org/rocksdb/MutableColumnFamilyOptionsTest.java rename to 3rdParty/rocksdb/v5.18.X/java/src/test/java/org/rocksdb/MutableColumnFamilyOptionsTest.java diff --git a/3rdParty/rocksdb/v5.16.X/java/src/test/java/org/rocksdb/NativeComparatorWrapperTest.java b/3rdParty/rocksdb/v5.18.X/java/src/test/java/org/rocksdb/NativeComparatorWrapperTest.java similarity index 100% rename from 3rdParty/rocksdb/v5.16.X/java/src/test/java/org/rocksdb/NativeComparatorWrapperTest.java rename to 3rdParty/rocksdb/v5.18.X/java/src/test/java/org/rocksdb/NativeComparatorWrapperTest.java diff --git a/3rdParty/rocksdb/v5.16.X/java/src/test/java/org/rocksdb/NativeLibraryLoaderTest.java b/3rdParty/rocksdb/v5.18.X/java/src/test/java/org/rocksdb/NativeLibraryLoaderTest.java similarity index 100% rename from 3rdParty/rocksdb/v5.16.X/java/src/test/java/org/rocksdb/NativeLibraryLoaderTest.java rename to 3rdParty/rocksdb/v5.18.X/java/src/test/java/org/rocksdb/NativeLibraryLoaderTest.java diff --git a/3rdParty/rocksdb/v5.16.X/java/src/test/java/org/rocksdb/OptimisticTransactionDBTest.java b/3rdParty/rocksdb/v5.18.X/java/src/test/java/org/rocksdb/OptimisticTransactionDBTest.java similarity index 100% rename from 3rdParty/rocksdb/v5.16.X/java/src/test/java/org/rocksdb/OptimisticTransactionDBTest.java rename to 3rdParty/rocksdb/v5.18.X/java/src/test/java/org/rocksdb/OptimisticTransactionDBTest.java diff --git a/3rdParty/rocksdb/v5.16.X/java/src/test/java/org/rocksdb/OptimisticTransactionOptionsTest.java b/3rdParty/rocksdb/v5.18.X/java/src/test/java/org/rocksdb/OptimisticTransactionOptionsTest.java similarity index 100% rename from 3rdParty/rocksdb/v5.16.X/java/src/test/java/org/rocksdb/OptimisticTransactionOptionsTest.java rename to 3rdParty/rocksdb/v5.18.X/java/src/test/java/org/rocksdb/OptimisticTransactionOptionsTest.java diff --git a/3rdParty/rocksdb/v5.16.X/java/src/test/java/org/rocksdb/OptimisticTransactionTest.java b/3rdParty/rocksdb/v5.18.X/java/src/test/java/org/rocksdb/OptimisticTransactionTest.java similarity index 100% rename from 3rdParty/rocksdb/v5.16.X/java/src/test/java/org/rocksdb/OptimisticTransactionTest.java rename to 3rdParty/rocksdb/v5.18.X/java/src/test/java/org/rocksdb/OptimisticTransactionTest.java diff --git a/3rdParty/rocksdb/v5.16.X/java/src/test/java/org/rocksdb/OptionsTest.java b/3rdParty/rocksdb/v5.18.X/java/src/test/java/org/rocksdb/OptionsTest.java similarity index 97% rename from 3rdParty/rocksdb/v5.16.X/java/src/test/java/org/rocksdb/OptionsTest.java rename to 3rdParty/rocksdb/v5.18.X/java/src/test/java/org/rocksdb/OptionsTest.java index 7f7679d732..2571c3e26f 100644 --- a/3rdParty/rocksdb/v5.16.X/java/src/test/java/org/rocksdb/OptionsTest.java +++ b/3rdParty/rocksdb/v5.18.X/java/src/test/java/org/rocksdb/OptionsTest.java @@ -645,6 +645,26 @@ public class OptionsTest { } } + @Test + public void setWriteBufferManager() throws RocksDBException { + try (final Options opt = new Options(); + final Cache cache = new LRUCache(1 * 1024 * 1024); + final WriteBufferManager writeBufferManager = new WriteBufferManager(2000l, cache)) { + opt.setWriteBufferManager(writeBufferManager); + assertThat(opt.writeBufferManager()).isEqualTo(writeBufferManager); + } + } + + @Test + public void setWriteBufferManagerWithZeroBufferSize() throws RocksDBException { + try (final Options opt = new Options(); + final Cache cache = new LRUCache(1 * 1024 * 1024); + final WriteBufferManager writeBufferManager = new WriteBufferManager(0l, cache)) { + opt.setWriteBufferManager(writeBufferManager); + assertThat(opt.writeBufferManager()).isEqualTo(writeBufferManager); + } + } + @Test public void accessHintOnCompactionStart() { try (final Options opt = new Options()) { diff --git a/3rdParty/rocksdb/v5.16.X/java/src/test/java/org/rocksdb/OptionsUtilTest.java b/3rdParty/rocksdb/v5.18.X/java/src/test/java/org/rocksdb/OptionsUtilTest.java similarity index 100% rename from 3rdParty/rocksdb/v5.16.X/java/src/test/java/org/rocksdb/OptionsUtilTest.java rename to 3rdParty/rocksdb/v5.18.X/java/src/test/java/org/rocksdb/OptionsUtilTest.java diff --git a/3rdParty/rocksdb/v5.16.X/java/src/test/java/org/rocksdb/PlainTableConfigTest.java b/3rdParty/rocksdb/v5.18.X/java/src/test/java/org/rocksdb/PlainTableConfigTest.java similarity index 100% rename from 3rdParty/rocksdb/v5.16.X/java/src/test/java/org/rocksdb/PlainTableConfigTest.java rename to 3rdParty/rocksdb/v5.18.X/java/src/test/java/org/rocksdb/PlainTableConfigTest.java diff --git a/3rdParty/rocksdb/v5.16.X/java/src/test/java/org/rocksdb/PlatformRandomHelper.java b/3rdParty/rocksdb/v5.18.X/java/src/test/java/org/rocksdb/PlatformRandomHelper.java similarity index 100% rename from 3rdParty/rocksdb/v5.16.X/java/src/test/java/org/rocksdb/PlatformRandomHelper.java rename to 3rdParty/rocksdb/v5.18.X/java/src/test/java/org/rocksdb/PlatformRandomHelper.java diff --git a/3rdParty/rocksdb/v5.16.X/java/src/test/java/org/rocksdb/RateLimiterTest.java b/3rdParty/rocksdb/v5.18.X/java/src/test/java/org/rocksdb/RateLimiterTest.java similarity index 100% rename from 3rdParty/rocksdb/v5.16.X/java/src/test/java/org/rocksdb/RateLimiterTest.java rename to 3rdParty/rocksdb/v5.18.X/java/src/test/java/org/rocksdb/RateLimiterTest.java diff --git a/3rdParty/rocksdb/v5.16.X/java/src/test/java/org/rocksdb/ReadOnlyTest.java b/3rdParty/rocksdb/v5.18.X/java/src/test/java/org/rocksdb/ReadOnlyTest.java similarity index 100% rename from 3rdParty/rocksdb/v5.16.X/java/src/test/java/org/rocksdb/ReadOnlyTest.java rename to 3rdParty/rocksdb/v5.18.X/java/src/test/java/org/rocksdb/ReadOnlyTest.java diff --git a/3rdParty/rocksdb/v5.16.X/java/src/test/java/org/rocksdb/ReadOptionsTest.java b/3rdParty/rocksdb/v5.18.X/java/src/test/java/org/rocksdb/ReadOptionsTest.java similarity index 86% rename from 3rdParty/rocksdb/v5.16.X/java/src/test/java/org/rocksdb/ReadOptionsTest.java rename to 3rdParty/rocksdb/v5.18.X/java/src/test/java/org/rocksdb/ReadOptionsTest.java index f7d799909d..4e860ae4cc 100644 --- a/3rdParty/rocksdb/v5.16.X/java/src/test/java/org/rocksdb/ReadOptionsTest.java +++ b/3rdParty/rocksdb/v5.18.X/java/src/test/java/org/rocksdb/ReadOptionsTest.java @@ -144,16 +144,34 @@ public class ReadOptionsTest { } } + @Test + public void iterateLowerBound() { + try (final ReadOptions opt = new ReadOptions()) { + Slice lowerBound = buildRandomSlice(); + opt.setIterateLowerBound(lowerBound); + assertThat(Arrays.equals(lowerBound.data(), opt.iterateLowerBound().data())).isTrue(); + } + } + + @Test + public void iterateLowerBoundNull() { + try (final ReadOptions opt = new ReadOptions()) { + assertThat(opt.iterateLowerBound()).isNull(); + } + } + @Test public void copyConstructor() { try (final ReadOptions opt = new ReadOptions()) { opt.setVerifyChecksums(false); opt.setFillCache(false); opt.setIterateUpperBound(buildRandomSlice()); + opt.setIterateLowerBound(buildRandomSlice()); ReadOptions other = new ReadOptions(opt); assertThat(opt.verifyChecksums()).isEqualTo(other.verifyChecksums()); assertThat(opt.fillCache()).isEqualTo(other.fillCache()); assertThat(Arrays.equals(opt.iterateUpperBound().data(), other.iterateUpperBound().data())).isTrue(); + assertThat(Arrays.equals(opt.iterateLowerBound().data(), other.iterateLowerBound().data())).isTrue(); } } @@ -237,6 +255,22 @@ public class ReadOptionsTest { } } + @Test + public void failSetIterateLowerBoundUninitialized() { + try (final ReadOptions readOptions = + setupUninitializedReadOptions(exception)) { + readOptions.setIterateLowerBound(null); + } + } + + @Test + public void failIterateLowerBoundUninitialized() { + try (final ReadOptions readOptions = + setupUninitializedReadOptions(exception)) { + readOptions.iterateLowerBound(); + } + } + private ReadOptions setupUninitializedReadOptions( ExpectedException exception) { final ReadOptions readOptions = new ReadOptions(); diff --git a/3rdParty/rocksdb/v5.16.X/java/src/test/java/org/rocksdb/RocksDBExceptionTest.java b/3rdParty/rocksdb/v5.18.X/java/src/test/java/org/rocksdb/RocksDBExceptionTest.java similarity index 100% rename from 3rdParty/rocksdb/v5.16.X/java/src/test/java/org/rocksdb/RocksDBExceptionTest.java rename to 3rdParty/rocksdb/v5.18.X/java/src/test/java/org/rocksdb/RocksDBExceptionTest.java diff --git a/3rdParty/rocksdb/v5.16.X/java/src/test/java/org/rocksdb/RocksDBTest.java b/3rdParty/rocksdb/v5.18.X/java/src/test/java/org/rocksdb/RocksDBTest.java similarity index 92% rename from 3rdParty/rocksdb/v5.16.X/java/src/test/java/org/rocksdb/RocksDBTest.java rename to 3rdParty/rocksdb/v5.18.X/java/src/test/java/org/rocksdb/RocksDBTest.java index 158b8d56a8..66ebc69db8 100644 --- a/3rdParty/rocksdb/v5.16.X/java/src/test/java/org/rocksdb/RocksDBTest.java +++ b/3rdParty/rocksdb/v5.18.X/java/src/test/java/org/rocksdb/RocksDBTest.java @@ -4,6 +4,7 @@ // (found in the LICENSE.Apache file in the root directory). package org.rocksdb; +import org.junit.Assert; import org.junit.Assume; import org.junit.ClassRule; import org.junit.Rule; @@ -11,6 +12,7 @@ import org.junit.Test; import org.junit.rules.ExpectedException; import org.junit.rules.TemporaryFolder; +import java.nio.ByteBuffer; import java.util.*; import static org.assertj.core.api.Assertions.assertThat; @@ -70,6 +72,57 @@ public class RocksDBTest { "value".getBytes()); assertThat(db.get("key2".getBytes())).isEqualTo( "12345678".getBytes()); + + + // put + Segment key3 = sliceSegment("key3"); + Segment key4 = sliceSegment("key4"); + Segment value0 = sliceSegment("value 0"); + Segment value1 = sliceSegment("value 1"); + db.put(key3.data, key3.offset, key3.len, value0.data, value0.offset, value0.len); + db.put(opt, key4.data, key4.offset, key4.len, value1.data, value1.offset, value1.len); + + // compare + Assert.assertTrue(value0.isSamePayload(db.get(key3.data, key3.offset, key3.len))); + Assert.assertTrue(value1.isSamePayload(db.get(key4.data, key4.offset, key4.len))); + } + } + + private static Segment sliceSegment(String key) { + ByteBuffer rawKey = ByteBuffer.allocate(key.length() + 4); + rawKey.put((byte)0); + rawKey.put((byte)0); + rawKey.put(key.getBytes()); + + return new Segment(rawKey.array(), 2, key.length()); + } + + private static class Segment { + final byte[] data; + final int offset; + final int len; + + public boolean isSamePayload(byte[] value) { + if (value == null) { + return false; + } + if (value.length != len) { + return false; + } + + for (int i = 0; i < value.length; i++) { + if (data[i + offset] != value[i]) { + return false; + } + } + + return true; + } + + public Segment(byte[] value, int offset, int len) { + this.data = value; + this.offset = offset; + this.len = len; } } @@ -242,6 +295,18 @@ public class RocksDBTest { db.merge(wOpt, "key2".getBytes(), "xxxx".getBytes()); assertThat(db.get("key2".getBytes())).isEqualTo( "xxxx".getBytes()); + + Segment key3 = sliceSegment("key3"); + Segment key4 = sliceSegment("key4"); + Segment value0 = sliceSegment("value 0"); + Segment value1 = sliceSegment("value 1"); + + db.merge(key3.data, key3.offset, key3.len, value0.data, value0.offset, value0.len); + db.merge(wOpt, key4.data, key4.offset, key4.len, value1.data, value1.offset, value1.len); + + // compare + Assert.assertTrue(value0.isSamePayload(db.get(key3.data, key3.offset, key3.len))); + Assert.assertTrue(value1.isSamePayload(db.get(key4.data, key4.offset, key4.len))); } } @@ -259,6 +324,18 @@ public class RocksDBTest { db.delete(wOpt, "key2".getBytes()); assertThat(db.get("key1".getBytes())).isNull(); assertThat(db.get("key2".getBytes())).isNull(); + + + Segment key3 = sliceSegment("key3"); + Segment key4 = sliceSegment("key4"); + db.put("key3".getBytes(), "key3 value".getBytes()); + db.put("key4".getBytes(), "key4 value".getBytes()); + + db.delete(key3.data, key3.offset, key3.len); + db.delete(wOpt, key4.data, key4.offset, key4.len); + + assertThat(db.get("key3".getBytes())).isNull(); + assertThat(db.get("key4".getBytes())).isNull(); } } diff --git a/3rdParty/rocksdb/v5.16.X/java/src/test/java/org/rocksdb/RocksEnvTest.java b/3rdParty/rocksdb/v5.18.X/java/src/test/java/org/rocksdb/RocksEnvTest.java similarity index 100% rename from 3rdParty/rocksdb/v5.16.X/java/src/test/java/org/rocksdb/RocksEnvTest.java rename to 3rdParty/rocksdb/v5.18.X/java/src/test/java/org/rocksdb/RocksEnvTest.java diff --git a/3rdParty/rocksdb/v5.16.X/java/src/test/java/org/rocksdb/RocksIteratorTest.java b/3rdParty/rocksdb/v5.18.X/java/src/test/java/org/rocksdb/RocksIteratorTest.java similarity index 100% rename from 3rdParty/rocksdb/v5.16.X/java/src/test/java/org/rocksdb/RocksIteratorTest.java rename to 3rdParty/rocksdb/v5.18.X/java/src/test/java/org/rocksdb/RocksIteratorTest.java diff --git a/3rdParty/rocksdb/v5.16.X/java/src/test/java/org/rocksdb/RocksMemEnvTest.java b/3rdParty/rocksdb/v5.18.X/java/src/test/java/org/rocksdb/RocksMemEnvTest.java similarity index 100% rename from 3rdParty/rocksdb/v5.16.X/java/src/test/java/org/rocksdb/RocksMemEnvTest.java rename to 3rdParty/rocksdb/v5.18.X/java/src/test/java/org/rocksdb/RocksMemEnvTest.java diff --git a/3rdParty/rocksdb/v5.16.X/java/src/test/java/org/rocksdb/RocksMemoryResource.java b/3rdParty/rocksdb/v5.18.X/java/src/test/java/org/rocksdb/RocksMemoryResource.java similarity index 100% rename from 3rdParty/rocksdb/v5.16.X/java/src/test/java/org/rocksdb/RocksMemoryResource.java rename to 3rdParty/rocksdb/v5.18.X/java/src/test/java/org/rocksdb/RocksMemoryResource.java diff --git a/3rdParty/rocksdb/v5.16.X/java/src/test/java/org/rocksdb/SliceTest.java b/3rdParty/rocksdb/v5.18.X/java/src/test/java/org/rocksdb/SliceTest.java similarity index 100% rename from 3rdParty/rocksdb/v5.16.X/java/src/test/java/org/rocksdb/SliceTest.java rename to 3rdParty/rocksdb/v5.18.X/java/src/test/java/org/rocksdb/SliceTest.java diff --git a/3rdParty/rocksdb/v5.16.X/java/src/test/java/org/rocksdb/SnapshotTest.java b/3rdParty/rocksdb/v5.18.X/java/src/test/java/org/rocksdb/SnapshotTest.java similarity index 100% rename from 3rdParty/rocksdb/v5.16.X/java/src/test/java/org/rocksdb/SnapshotTest.java rename to 3rdParty/rocksdb/v5.18.X/java/src/test/java/org/rocksdb/SnapshotTest.java diff --git a/3rdParty/rocksdb/v5.16.X/java/src/test/java/org/rocksdb/SstFileManagerTest.java b/3rdParty/rocksdb/v5.18.X/java/src/test/java/org/rocksdb/SstFileManagerTest.java similarity index 100% rename from 3rdParty/rocksdb/v5.16.X/java/src/test/java/org/rocksdb/SstFileManagerTest.java rename to 3rdParty/rocksdb/v5.18.X/java/src/test/java/org/rocksdb/SstFileManagerTest.java diff --git a/3rdParty/rocksdb/v5.16.X/java/src/test/java/org/rocksdb/SstFileWriterTest.java b/3rdParty/rocksdb/v5.18.X/java/src/test/java/org/rocksdb/SstFileWriterTest.java similarity index 100% rename from 3rdParty/rocksdb/v5.16.X/java/src/test/java/org/rocksdb/SstFileWriterTest.java rename to 3rdParty/rocksdb/v5.18.X/java/src/test/java/org/rocksdb/SstFileWriterTest.java diff --git a/3rdParty/rocksdb/v5.16.X/java/src/test/java/org/rocksdb/StatisticsCollectorTest.java b/3rdParty/rocksdb/v5.18.X/java/src/test/java/org/rocksdb/StatisticsCollectorTest.java similarity index 100% rename from 3rdParty/rocksdb/v5.16.X/java/src/test/java/org/rocksdb/StatisticsCollectorTest.java rename to 3rdParty/rocksdb/v5.18.X/java/src/test/java/org/rocksdb/StatisticsCollectorTest.java diff --git a/3rdParty/rocksdb/v5.16.X/java/src/test/java/org/rocksdb/StatisticsTest.java b/3rdParty/rocksdb/v5.18.X/java/src/test/java/org/rocksdb/StatisticsTest.java similarity index 100% rename from 3rdParty/rocksdb/v5.16.X/java/src/test/java/org/rocksdb/StatisticsTest.java rename to 3rdParty/rocksdb/v5.18.X/java/src/test/java/org/rocksdb/StatisticsTest.java diff --git a/3rdParty/rocksdb/v5.16.X/java/src/test/java/org/rocksdb/StatsCallbackMock.java b/3rdParty/rocksdb/v5.18.X/java/src/test/java/org/rocksdb/StatsCallbackMock.java similarity index 100% rename from 3rdParty/rocksdb/v5.16.X/java/src/test/java/org/rocksdb/StatsCallbackMock.java rename to 3rdParty/rocksdb/v5.18.X/java/src/test/java/org/rocksdb/StatsCallbackMock.java diff --git a/3rdParty/rocksdb/v5.16.X/java/src/test/java/org/rocksdb/TransactionDBOptionsTest.java b/3rdParty/rocksdb/v5.18.X/java/src/test/java/org/rocksdb/TransactionDBOptionsTest.java similarity index 100% rename from 3rdParty/rocksdb/v5.16.X/java/src/test/java/org/rocksdb/TransactionDBOptionsTest.java rename to 3rdParty/rocksdb/v5.18.X/java/src/test/java/org/rocksdb/TransactionDBOptionsTest.java diff --git a/3rdParty/rocksdb/v5.16.X/java/src/test/java/org/rocksdb/TransactionDBTest.java b/3rdParty/rocksdb/v5.18.X/java/src/test/java/org/rocksdb/TransactionDBTest.java similarity index 100% rename from 3rdParty/rocksdb/v5.16.X/java/src/test/java/org/rocksdb/TransactionDBTest.java rename to 3rdParty/rocksdb/v5.18.X/java/src/test/java/org/rocksdb/TransactionDBTest.java diff --git a/3rdParty/rocksdb/v5.16.X/java/src/test/java/org/rocksdb/TransactionLogIteratorTest.java b/3rdParty/rocksdb/v5.18.X/java/src/test/java/org/rocksdb/TransactionLogIteratorTest.java similarity index 100% rename from 3rdParty/rocksdb/v5.16.X/java/src/test/java/org/rocksdb/TransactionLogIteratorTest.java rename to 3rdParty/rocksdb/v5.18.X/java/src/test/java/org/rocksdb/TransactionLogIteratorTest.java diff --git a/3rdParty/rocksdb/v5.16.X/java/src/test/java/org/rocksdb/TransactionOptionsTest.java b/3rdParty/rocksdb/v5.18.X/java/src/test/java/org/rocksdb/TransactionOptionsTest.java similarity index 100% rename from 3rdParty/rocksdb/v5.16.X/java/src/test/java/org/rocksdb/TransactionOptionsTest.java rename to 3rdParty/rocksdb/v5.18.X/java/src/test/java/org/rocksdb/TransactionOptionsTest.java diff --git a/3rdParty/rocksdb/v5.16.X/java/src/test/java/org/rocksdb/TransactionTest.java b/3rdParty/rocksdb/v5.18.X/java/src/test/java/org/rocksdb/TransactionTest.java similarity index 100% rename from 3rdParty/rocksdb/v5.16.X/java/src/test/java/org/rocksdb/TransactionTest.java rename to 3rdParty/rocksdb/v5.18.X/java/src/test/java/org/rocksdb/TransactionTest.java diff --git a/3rdParty/rocksdb/v5.16.X/java/src/test/java/org/rocksdb/TtlDBTest.java b/3rdParty/rocksdb/v5.18.X/java/src/test/java/org/rocksdb/TtlDBTest.java similarity index 100% rename from 3rdParty/rocksdb/v5.16.X/java/src/test/java/org/rocksdb/TtlDBTest.java rename to 3rdParty/rocksdb/v5.18.X/java/src/test/java/org/rocksdb/TtlDBTest.java diff --git a/3rdParty/rocksdb/v5.16.X/java/src/test/java/org/rocksdb/Types.java b/3rdParty/rocksdb/v5.18.X/java/src/test/java/org/rocksdb/Types.java similarity index 100% rename from 3rdParty/rocksdb/v5.16.X/java/src/test/java/org/rocksdb/Types.java rename to 3rdParty/rocksdb/v5.18.X/java/src/test/java/org/rocksdb/Types.java diff --git a/3rdParty/rocksdb/v5.16.X/java/src/test/java/org/rocksdb/WALRecoveryModeTest.java b/3rdParty/rocksdb/v5.18.X/java/src/test/java/org/rocksdb/WALRecoveryModeTest.java similarity index 100% rename from 3rdParty/rocksdb/v5.16.X/java/src/test/java/org/rocksdb/WALRecoveryModeTest.java rename to 3rdParty/rocksdb/v5.18.X/java/src/test/java/org/rocksdb/WALRecoveryModeTest.java diff --git a/3rdParty/rocksdb/v5.16.X/java/src/test/java/org/rocksdb/WriteBatchHandlerTest.java b/3rdParty/rocksdb/v5.18.X/java/src/test/java/org/rocksdb/WriteBatchHandlerTest.java similarity index 100% rename from 3rdParty/rocksdb/v5.16.X/java/src/test/java/org/rocksdb/WriteBatchHandlerTest.java rename to 3rdParty/rocksdb/v5.18.X/java/src/test/java/org/rocksdb/WriteBatchHandlerTest.java diff --git a/3rdParty/rocksdb/v5.16.X/java/src/test/java/org/rocksdb/WriteBatchTest.java b/3rdParty/rocksdb/v5.18.X/java/src/test/java/org/rocksdb/WriteBatchTest.java similarity index 100% rename from 3rdParty/rocksdb/v5.16.X/java/src/test/java/org/rocksdb/WriteBatchTest.java rename to 3rdParty/rocksdb/v5.18.X/java/src/test/java/org/rocksdb/WriteBatchTest.java diff --git a/3rdParty/rocksdb/v5.16.X/java/src/test/java/org/rocksdb/WriteBatchThreadedTest.java b/3rdParty/rocksdb/v5.18.X/java/src/test/java/org/rocksdb/WriteBatchThreadedTest.java similarity index 100% rename from 3rdParty/rocksdb/v5.16.X/java/src/test/java/org/rocksdb/WriteBatchThreadedTest.java rename to 3rdParty/rocksdb/v5.18.X/java/src/test/java/org/rocksdb/WriteBatchThreadedTest.java diff --git a/3rdParty/rocksdb/v5.16.X/java/src/test/java/org/rocksdb/WriteBatchWithIndexTest.java b/3rdParty/rocksdb/v5.18.X/java/src/test/java/org/rocksdb/WriteBatchWithIndexTest.java similarity index 100% rename from 3rdParty/rocksdb/v5.16.X/java/src/test/java/org/rocksdb/WriteBatchWithIndexTest.java rename to 3rdParty/rocksdb/v5.18.X/java/src/test/java/org/rocksdb/WriteBatchWithIndexTest.java diff --git a/3rdParty/rocksdb/v5.16.X/java/src/test/java/org/rocksdb/WriteOptionsTest.java b/3rdParty/rocksdb/v5.18.X/java/src/test/java/org/rocksdb/WriteOptionsTest.java similarity index 100% rename from 3rdParty/rocksdb/v5.16.X/java/src/test/java/org/rocksdb/WriteOptionsTest.java rename to 3rdParty/rocksdb/v5.18.X/java/src/test/java/org/rocksdb/WriteOptionsTest.java diff --git a/3rdParty/rocksdb/v5.16.X/java/src/test/java/org/rocksdb/test/RocksJunitRunner.java b/3rdParty/rocksdb/v5.18.X/java/src/test/java/org/rocksdb/test/RocksJunitRunner.java similarity index 100% rename from 3rdParty/rocksdb/v5.16.X/java/src/test/java/org/rocksdb/test/RocksJunitRunner.java rename to 3rdParty/rocksdb/v5.18.X/java/src/test/java/org/rocksdb/test/RocksJunitRunner.java diff --git a/3rdParty/rocksdb/v5.16.X/java/src/test/java/org/rocksdb/util/BytewiseComparatorTest.java b/3rdParty/rocksdb/v5.18.X/java/src/test/java/org/rocksdb/util/BytewiseComparatorTest.java similarity index 100% rename from 3rdParty/rocksdb/v5.16.X/java/src/test/java/org/rocksdb/util/BytewiseComparatorTest.java rename to 3rdParty/rocksdb/v5.18.X/java/src/test/java/org/rocksdb/util/BytewiseComparatorTest.java diff --git a/3rdParty/rocksdb/v5.16.X/java/src/test/java/org/rocksdb/util/CapturingWriteBatchHandler.java b/3rdParty/rocksdb/v5.18.X/java/src/test/java/org/rocksdb/util/CapturingWriteBatchHandler.java similarity index 100% rename from 3rdParty/rocksdb/v5.16.X/java/src/test/java/org/rocksdb/util/CapturingWriteBatchHandler.java rename to 3rdParty/rocksdb/v5.18.X/java/src/test/java/org/rocksdb/util/CapturingWriteBatchHandler.java diff --git a/3rdParty/rocksdb/v5.16.X/java/src/test/java/org/rocksdb/util/EnvironmentTest.java b/3rdParty/rocksdb/v5.18.X/java/src/test/java/org/rocksdb/util/EnvironmentTest.java similarity index 100% rename from 3rdParty/rocksdb/v5.16.X/java/src/test/java/org/rocksdb/util/EnvironmentTest.java rename to 3rdParty/rocksdb/v5.18.X/java/src/test/java/org/rocksdb/util/EnvironmentTest.java diff --git a/3rdParty/rocksdb/v5.16.X/java/src/test/java/org/rocksdb/util/SizeUnitTest.java b/3rdParty/rocksdb/v5.18.X/java/src/test/java/org/rocksdb/util/SizeUnitTest.java similarity index 100% rename from 3rdParty/rocksdb/v5.16.X/java/src/test/java/org/rocksdb/util/SizeUnitTest.java rename to 3rdParty/rocksdb/v5.18.X/java/src/test/java/org/rocksdb/util/SizeUnitTest.java diff --git a/3rdParty/rocksdb/v5.16.X/java/src/test/java/org/rocksdb/util/WriteBatchGetter.java b/3rdParty/rocksdb/v5.18.X/java/src/test/java/org/rocksdb/util/WriteBatchGetter.java similarity index 100% rename from 3rdParty/rocksdb/v5.16.X/java/src/test/java/org/rocksdb/util/WriteBatchGetter.java rename to 3rdParty/rocksdb/v5.18.X/java/src/test/java/org/rocksdb/util/WriteBatchGetter.java diff --git a/3rdParty/rocksdb/v5.16.X/memtable/alloc_tracker.cc b/3rdParty/rocksdb/v5.18.X/memtable/alloc_tracker.cc similarity index 86% rename from 3rdParty/rocksdb/v5.16.X/memtable/alloc_tracker.cc rename to 3rdParty/rocksdb/v5.18.X/memtable/alloc_tracker.cc index 9889cc4230..a1fa4938c5 100644 --- a/3rdParty/rocksdb/v5.16.X/memtable/alloc_tracker.cc +++ b/3rdParty/rocksdb/v5.18.X/memtable/alloc_tracker.cc @@ -24,7 +24,8 @@ AllocTracker::~AllocTracker() { FreeMem(); } void AllocTracker::Allocate(size_t bytes) { assert(write_buffer_manager_ != nullptr); - if (write_buffer_manager_->enabled()) { + if (write_buffer_manager_->enabled() || + write_buffer_manager_->cost_to_cache()) { bytes_allocated_.fetch_add(bytes, std::memory_order_relaxed); write_buffer_manager_->ReserveMem(bytes); } @@ -32,7 +33,8 @@ void AllocTracker::Allocate(size_t bytes) { void AllocTracker::DoneAllocating() { if (write_buffer_manager_ != nullptr && !done_allocating_) { - if (write_buffer_manager_->enabled()) { + if (write_buffer_manager_->enabled() || + write_buffer_manager_->cost_to_cache()) { write_buffer_manager_->ScheduleFreeMem( bytes_allocated_.load(std::memory_order_relaxed)); } else { @@ -47,7 +49,8 @@ void AllocTracker::FreeMem() { DoneAllocating(); } if (write_buffer_manager_ != nullptr && !freed_) { - if (write_buffer_manager_->enabled()) { + if (write_buffer_manager_->enabled() || + write_buffer_manager_->cost_to_cache()) { write_buffer_manager_->FreeMem( bytes_allocated_.load(std::memory_order_relaxed)); } else { diff --git a/3rdParty/rocksdb/v5.16.X/memtable/hash_cuckoo_rep.cc b/3rdParty/rocksdb/v5.18.X/memtable/hash_cuckoo_rep.cc similarity index 99% rename from 3rdParty/rocksdb/v5.16.X/memtable/hash_cuckoo_rep.cc rename to 3rdParty/rocksdb/v5.18.X/memtable/hash_cuckoo_rep.cc index 39078633f6..aa6e3dbf3b 100644 --- a/3rdParty/rocksdb/v5.16.X/memtable/hash_cuckoo_rep.cc +++ b/3rdParty/rocksdb/v5.18.X/memtable/hash_cuckoo_rep.cc @@ -408,6 +408,7 @@ bool HashCuckooRep::QuickInsert(const char* internal_key, const Slice& user_key, const auto bucket_user_key = UserKey(stored_key); if (bucket_user_key.compare(user_key) == 0) { cuckoo_bucket_id = bucket_ids[hid]; + assert(cuckoo_bucket_id != -1); break; } } diff --git a/3rdParty/rocksdb/v5.16.X/memtable/hash_cuckoo_rep.h b/3rdParty/rocksdb/v5.18.X/memtable/hash_cuckoo_rep.h similarity index 100% rename from 3rdParty/rocksdb/v5.16.X/memtable/hash_cuckoo_rep.h rename to 3rdParty/rocksdb/v5.18.X/memtable/hash_cuckoo_rep.h diff --git a/3rdParty/rocksdb/v5.16.X/memtable/hash_linklist_rep.cc b/3rdParty/rocksdb/v5.18.X/memtable/hash_linklist_rep.cc similarity index 100% rename from 3rdParty/rocksdb/v5.16.X/memtable/hash_linklist_rep.cc rename to 3rdParty/rocksdb/v5.18.X/memtable/hash_linklist_rep.cc diff --git a/3rdParty/rocksdb/v5.16.X/memtable/hash_linklist_rep.h b/3rdParty/rocksdb/v5.18.X/memtable/hash_linklist_rep.h similarity index 100% rename from 3rdParty/rocksdb/v5.16.X/memtable/hash_linklist_rep.h rename to 3rdParty/rocksdb/v5.18.X/memtable/hash_linklist_rep.h diff --git a/3rdParty/rocksdb/v5.16.X/memtable/hash_skiplist_rep.cc b/3rdParty/rocksdb/v5.18.X/memtable/hash_skiplist_rep.cc similarity index 99% rename from 3rdParty/rocksdb/v5.16.X/memtable/hash_skiplist_rep.cc rename to 3rdParty/rocksdb/v5.18.X/memtable/hash_skiplist_rep.cc index 93082b1ec2..a5c46011e3 100644 --- a/3rdParty/rocksdb/v5.16.X/memtable/hash_skiplist_rep.cc +++ b/3rdParty/rocksdb/v5.18.X/memtable/hash_skiplist_rep.cc @@ -168,7 +168,7 @@ class HashSkipListRep : public MemTableRep { Bucket* list_; Bucket::Iterator iter_; // here we track if we own list_. If we own it, we are also - // responsible for it's cleaning. This is a poor man's shared_ptr + // responsible for it's cleaning. This is a poor man's std::shared_ptr bool own_list_; std::unique_ptr arena_; std::string tmp_; // For passing to EncodeKey diff --git a/3rdParty/rocksdb/v5.16.X/memtable/hash_skiplist_rep.h b/3rdParty/rocksdb/v5.18.X/memtable/hash_skiplist_rep.h similarity index 100% rename from 3rdParty/rocksdb/v5.16.X/memtable/hash_skiplist_rep.h rename to 3rdParty/rocksdb/v5.18.X/memtable/hash_skiplist_rep.h diff --git a/3rdParty/rocksdb/v5.16.X/memtable/inlineskiplist.h b/3rdParty/rocksdb/v5.18.X/memtable/inlineskiplist.h similarity index 100% rename from 3rdParty/rocksdb/v5.16.X/memtable/inlineskiplist.h rename to 3rdParty/rocksdb/v5.18.X/memtable/inlineskiplist.h diff --git a/3rdParty/rocksdb/v5.16.X/memtable/inlineskiplist_test.cc b/3rdParty/rocksdb/v5.18.X/memtable/inlineskiplist_test.cc similarity index 100% rename from 3rdParty/rocksdb/v5.16.X/memtable/inlineskiplist_test.cc rename to 3rdParty/rocksdb/v5.18.X/memtable/inlineskiplist_test.cc diff --git a/3rdParty/rocksdb/v5.16.X/memtable/memtablerep_bench.cc b/3rdParty/rocksdb/v5.18.X/memtable/memtablerep_bench.cc similarity index 100% rename from 3rdParty/rocksdb/v5.16.X/memtable/memtablerep_bench.cc rename to 3rdParty/rocksdb/v5.18.X/memtable/memtablerep_bench.cc diff --git a/3rdParty/rocksdb/v5.16.X/memtable/skiplist.h b/3rdParty/rocksdb/v5.18.X/memtable/skiplist.h similarity index 100% rename from 3rdParty/rocksdb/v5.16.X/memtable/skiplist.h rename to 3rdParty/rocksdb/v5.18.X/memtable/skiplist.h diff --git a/3rdParty/rocksdb/v5.16.X/memtable/skiplist_test.cc b/3rdParty/rocksdb/v5.18.X/memtable/skiplist_test.cc similarity index 100% rename from 3rdParty/rocksdb/v5.16.X/memtable/skiplist_test.cc rename to 3rdParty/rocksdb/v5.18.X/memtable/skiplist_test.cc diff --git a/3rdParty/rocksdb/v5.16.X/memtable/skiplistrep.cc b/3rdParty/rocksdb/v5.18.X/memtable/skiplistrep.cc similarity index 100% rename from 3rdParty/rocksdb/v5.16.X/memtable/skiplistrep.cc rename to 3rdParty/rocksdb/v5.18.X/memtable/skiplistrep.cc diff --git a/3rdParty/rocksdb/v5.16.X/memtable/stl_wrappers.h b/3rdParty/rocksdb/v5.18.X/memtable/stl_wrappers.h similarity index 100% rename from 3rdParty/rocksdb/v5.16.X/memtable/stl_wrappers.h rename to 3rdParty/rocksdb/v5.18.X/memtable/stl_wrappers.h diff --git a/3rdParty/rocksdb/v5.16.X/memtable/vectorrep.cc b/3rdParty/rocksdb/v5.18.X/memtable/vectorrep.cc similarity index 100% rename from 3rdParty/rocksdb/v5.16.X/memtable/vectorrep.cc rename to 3rdParty/rocksdb/v5.18.X/memtable/vectorrep.cc diff --git a/3rdParty/rocksdb/v5.16.X/memtable/write_buffer_manager.cc b/3rdParty/rocksdb/v5.18.X/memtable/write_buffer_manager.cc similarity index 99% rename from 3rdParty/rocksdb/v5.16.X/memtable/write_buffer_manager.cc rename to 3rdParty/rocksdb/v5.18.X/memtable/write_buffer_manager.cc index 21b18c8f76..7f2e664ab5 100644 --- a/3rdParty/rocksdb/v5.16.X/memtable/write_buffer_manager.cc +++ b/3rdParty/rocksdb/v5.18.X/memtable/write_buffer_manager.cc @@ -79,7 +79,7 @@ WriteBufferManager::~WriteBufferManager() { void WriteBufferManager::ReserveMemWithCache(size_t mem) { #ifndef ROCKSDB_LITE assert(cache_rep_ != nullptr); - // Use a mutex to protect various data structures. Can be optimzied to a + // Use a mutex to protect various data structures. Can be optimized to a // lock-free solution if it ends up with a performance bottleneck. std::lock_guard lock(cache_rep_->cache_mutex_); @@ -102,14 +102,14 @@ void WriteBufferManager::ReserveMemWithCache(size_t mem) { void WriteBufferManager::FreeMemWithCache(size_t mem) { #ifndef ROCKSDB_LITE assert(cache_rep_ != nullptr); - // Use a mutex to protect various data structures. Can be optimzied to a + // Use a mutex to protect various data structures. Can be optimized to a // lock-free solution if it ends up with a performance bottleneck. std::lock_guard lock(cache_rep_->cache_mutex_); size_t new_mem_used = memory_used_.load(std::memory_order_relaxed) - mem; memory_used_.store(new_mem_used, std::memory_order_relaxed); // Gradually shrink memory costed in the block cache if the actual // usage is less than 3/4 of what we reserve from the block cache. - // We do this becausse: + // We do this because: // 1. we don't pay the cost of the block cache immediately a memtable is // freed, as block cache insert is expensive; // 2. eventually, if we walk away from a temporary memtable size increase, diff --git a/3rdParty/rocksdb/v5.16.X/memtable/write_buffer_manager_test.cc b/3rdParty/rocksdb/v5.18.X/memtable/write_buffer_manager_test.cc similarity index 100% rename from 3rdParty/rocksdb/v5.16.X/memtable/write_buffer_manager_test.cc rename to 3rdParty/rocksdb/v5.18.X/memtable/write_buffer_manager_test.cc diff --git a/3rdParty/rocksdb/v5.16.X/monitoring/file_read_sample.h b/3rdParty/rocksdb/v5.18.X/monitoring/file_read_sample.h similarity index 100% rename from 3rdParty/rocksdb/v5.16.X/monitoring/file_read_sample.h rename to 3rdParty/rocksdb/v5.18.X/monitoring/file_read_sample.h diff --git a/3rdParty/rocksdb/v5.16.X/monitoring/histogram.cc b/3rdParty/rocksdb/v5.18.X/monitoring/histogram.cc similarity index 100% rename from 3rdParty/rocksdb/v5.16.X/monitoring/histogram.cc rename to 3rdParty/rocksdb/v5.18.X/monitoring/histogram.cc diff --git a/3rdParty/rocksdb/v5.16.X/monitoring/histogram.h b/3rdParty/rocksdb/v5.18.X/monitoring/histogram.h similarity index 100% rename from 3rdParty/rocksdb/v5.16.X/monitoring/histogram.h rename to 3rdParty/rocksdb/v5.18.X/monitoring/histogram.h diff --git a/3rdParty/rocksdb/v5.16.X/monitoring/histogram_test.cc b/3rdParty/rocksdb/v5.18.X/monitoring/histogram_test.cc similarity index 100% rename from 3rdParty/rocksdb/v5.16.X/monitoring/histogram_test.cc rename to 3rdParty/rocksdb/v5.18.X/monitoring/histogram_test.cc diff --git a/3rdParty/rocksdb/v5.16.X/monitoring/histogram_windowing.cc b/3rdParty/rocksdb/v5.18.X/monitoring/histogram_windowing.cc similarity index 97% rename from 3rdParty/rocksdb/v5.16.X/monitoring/histogram_windowing.cc rename to 3rdParty/rocksdb/v5.18.X/monitoring/histogram_windowing.cc index 5c49fcd16b..ecd6f090a5 100644 --- a/3rdParty/rocksdb/v5.16.X/monitoring/histogram_windowing.cc +++ b/3rdParty/rocksdb/v5.18.X/monitoring/histogram_windowing.cc @@ -17,7 +17,7 @@ namespace rocksdb { HistogramWindowingImpl::HistogramWindowingImpl() { env_ = Env::Default(); - window_stats_.reset(new HistogramStat[num_windows_]); + window_stats_.reset(new HistogramStat[static_cast(num_windows_)]); Clear(); } @@ -29,7 +29,7 @@ HistogramWindowingImpl::HistogramWindowingImpl( micros_per_window_(micros_per_window), min_num_per_window_(min_num_per_window) { env_ = Env::Default(); - window_stats_.reset(new HistogramStat[num_windows_]); + window_stats_.reset(new HistogramStat[static_cast(num_windows_)]); Clear(); } diff --git a/3rdParty/rocksdb/v5.16.X/monitoring/histogram_windowing.h b/3rdParty/rocksdb/v5.18.X/monitoring/histogram_windowing.h similarity index 100% rename from 3rdParty/rocksdb/v5.16.X/monitoring/histogram_windowing.h rename to 3rdParty/rocksdb/v5.18.X/monitoring/histogram_windowing.h diff --git a/3rdParty/rocksdb/v5.16.X/monitoring/instrumented_mutex.cc b/3rdParty/rocksdb/v5.18.X/monitoring/instrumented_mutex.cc similarity index 100% rename from 3rdParty/rocksdb/v5.16.X/monitoring/instrumented_mutex.cc rename to 3rdParty/rocksdb/v5.18.X/monitoring/instrumented_mutex.cc diff --git a/3rdParty/rocksdb/v5.16.X/monitoring/instrumented_mutex.h b/3rdParty/rocksdb/v5.18.X/monitoring/instrumented_mutex.h similarity index 100% rename from 3rdParty/rocksdb/v5.16.X/monitoring/instrumented_mutex.h rename to 3rdParty/rocksdb/v5.18.X/monitoring/instrumented_mutex.h diff --git a/3rdParty/rocksdb/v5.16.X/monitoring/iostats_context.cc b/3rdParty/rocksdb/v5.18.X/monitoring/iostats_context.cc similarity index 100% rename from 3rdParty/rocksdb/v5.16.X/monitoring/iostats_context.cc rename to 3rdParty/rocksdb/v5.18.X/monitoring/iostats_context.cc diff --git a/3rdParty/rocksdb/v5.16.X/monitoring/iostats_context_imp.h b/3rdParty/rocksdb/v5.18.X/monitoring/iostats_context_imp.h similarity index 100% rename from 3rdParty/rocksdb/v5.16.X/monitoring/iostats_context_imp.h rename to 3rdParty/rocksdb/v5.18.X/monitoring/iostats_context_imp.h diff --git a/3rdParty/rocksdb/v5.16.X/monitoring/iostats_context_test.cc b/3rdParty/rocksdb/v5.18.X/monitoring/iostats_context_test.cc similarity index 100% rename from 3rdParty/rocksdb/v5.16.X/monitoring/iostats_context_test.cc rename to 3rdParty/rocksdb/v5.18.X/monitoring/iostats_context_test.cc diff --git a/3rdParty/rocksdb/v5.16.X/monitoring/perf_context.cc b/3rdParty/rocksdb/v5.18.X/monitoring/perf_context.cc similarity index 78% rename from 3rdParty/rocksdb/v5.16.X/monitoring/perf_context.cc rename to 3rdParty/rocksdb/v5.18.X/monitoring/perf_context.cc index 9bba841f8f..423443869b 100644 --- a/3rdParty/rocksdb/v5.16.X/monitoring/perf_context.cc +++ b/3rdParty/rocksdb/v5.18.X/monitoring/perf_context.cc @@ -15,7 +15,7 @@ PerfContext perf_context; #if defined(OS_SOLARIS) __thread PerfContext perf_context_; #else -__thread PerfContext perf_context; +thread_local PerfContext perf_context; #endif #endif @@ -31,6 +31,12 @@ PerfContext* get_perf_context() { #endif } +PerfContext::~PerfContext() { +#if !defined(NPERF_CONTEXT) && defined(ROCKSDB_SUPPORT_THREAD_LOCAL) && !defined(OS_SOLARIS) + ClearPerLevelPerfContext(); +#endif +} + void PerfContext::Reset() { #ifndef NPERF_CONTEXT user_key_comparison_count = 0; @@ -104,6 +110,11 @@ void PerfContext::Reset() { env_lock_file_nanos = 0; env_unlock_file_nanos = 0; env_new_logger_nanos = 0; + if (per_level_perf_context_enabled && level_to_perf_context) { + for (auto& kv : *level_to_perf_context) { + kv.second.Reset(); + } + } #endif } @@ -112,6 +123,25 @@ void PerfContext::Reset() { ss << #counter << " = " << counter << ", "; \ } +#define PERF_CONTEXT_BY_LEVEL_OUTPUT_ONE_COUNTER(counter) \ + if (per_level_perf_context_enabled && \ + level_to_perf_context) { \ + ss << #counter << " = "; \ + for (auto& kv : *level_to_perf_context) { \ + if (!exclude_zero_counters || (kv.second.counter > 0)) { \ + ss << kv.second.counter << "@level" << kv.first << ", "; \ + } \ + } \ + } + +void PerfContextByLevel::Reset() { +#ifndef NPERF_CONTEXT + bloom_filter_useful = 0; + bloom_filter_full_positive = 0; + bloom_filter_full_true_positive = 0; +#endif +} + std::string PerfContext::ToString(bool exclude_zero_counters) const { #ifdef NPERF_CONTEXT return ""; @@ -186,8 +216,30 @@ std::string PerfContext::ToString(bool exclude_zero_counters) const { PERF_CONTEXT_OUTPUT(env_lock_file_nanos); PERF_CONTEXT_OUTPUT(env_unlock_file_nanos); PERF_CONTEXT_OUTPUT(env_new_logger_nanos); + PERF_CONTEXT_BY_LEVEL_OUTPUT_ONE_COUNTER(bloom_filter_useful); + PERF_CONTEXT_BY_LEVEL_OUTPUT_ONE_COUNTER(bloom_filter_full_positive); + PERF_CONTEXT_BY_LEVEL_OUTPUT_ONE_COUNTER(bloom_filter_full_true_positive); return ss.str(); #endif } +void PerfContext::EnablePerLevelPerfContext() { + if (!level_to_perf_context) { + level_to_perf_context = new std::map(); + } + per_level_perf_context_enabled = true; +} + +void PerfContext::DisablePerLevelPerfContext(){ + per_level_perf_context_enabled = false; +} + +void PerfContext::ClearPerLevelPerfContext(){ + if (level_to_perf_context) { + delete level_to_perf_context; + level_to_perf_context = nullptr; + } + per_level_perf_context_enabled = false; +} + } diff --git a/3rdParty/rocksdb/v5.16.X/monitoring/perf_context_imp.h b/3rdParty/rocksdb/v5.18.X/monitoring/perf_context_imp.h similarity index 64% rename from 3rdParty/rocksdb/v5.16.X/monitoring/perf_context_imp.h rename to 3rdParty/rocksdb/v5.18.X/monitoring/perf_context_imp.h index cfcded1c96..d67654914e 100644 --- a/3rdParty/rocksdb/v5.16.X/monitoring/perf_context_imp.h +++ b/3rdParty/rocksdb/v5.18.X/monitoring/perf_context_imp.h @@ -16,7 +16,7 @@ extern PerfContext perf_context; extern __thread PerfContext perf_context_; #define perf_context (*get_perf_context()) #else -extern __thread PerfContext perf_context; +extern thread_local PerfContext perf_context; #endif #endif @@ -59,6 +59,22 @@ extern __thread PerfContext perf_context; perf_context.metric += value; \ } +// Increase metric value +#define PERF_COUNTER_BY_LEVEL_ADD(metric, value, level) \ + if (perf_level >= PerfLevel::kEnableCount && \ + perf_context.per_level_perf_context_enabled && \ + perf_context.level_to_perf_context) { \ + if ((*(perf_context.level_to_perf_context)).find(level) != \ + (*(perf_context.level_to_perf_context)).end()) { \ + (*(perf_context.level_to_perf_context))[level].metric += value; \ + } \ + else { \ + PerfContextByLevel empty_context; \ + (*(perf_context.level_to_perf_context))[level] = empty_context; \ + (*(perf_context.level_to_perf_context))[level].metric += value; \ + } \ + } \ + #endif } diff --git a/3rdParty/rocksdb/v5.16.X/monitoring/perf_level.cc b/3rdParty/rocksdb/v5.18.X/monitoring/perf_level.cc similarity index 100% rename from 3rdParty/rocksdb/v5.16.X/monitoring/perf_level.cc rename to 3rdParty/rocksdb/v5.18.X/monitoring/perf_level.cc diff --git a/3rdParty/rocksdb/v5.16.X/monitoring/perf_level_imp.h b/3rdParty/rocksdb/v5.18.X/monitoring/perf_level_imp.h similarity index 100% rename from 3rdParty/rocksdb/v5.16.X/monitoring/perf_level_imp.h rename to 3rdParty/rocksdb/v5.18.X/monitoring/perf_level_imp.h diff --git a/3rdParty/rocksdb/v5.16.X/monitoring/perf_step_timer.h b/3rdParty/rocksdb/v5.18.X/monitoring/perf_step_timer.h similarity index 100% rename from 3rdParty/rocksdb/v5.16.X/monitoring/perf_step_timer.h rename to 3rdParty/rocksdb/v5.18.X/monitoring/perf_step_timer.h diff --git a/3rdParty/rocksdb/v5.18.X/monitoring/statistics.cc b/3rdParty/rocksdb/v5.18.X/monitoring/statistics.cc new file mode 100644 index 0000000000..cba427ae4b --- /dev/null +++ b/3rdParty/rocksdb/v5.18.X/monitoring/statistics.cc @@ -0,0 +1,379 @@ +// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. +// This source code is licensed under both the GPLv2 (found in the +// COPYING file in the root directory) and Apache 2.0 License +// (found in the LICENSE.Apache file in the root directory). +// +#include "monitoring/statistics.h" + +#ifndef __STDC_FORMAT_MACROS +#define __STDC_FORMAT_MACROS +#endif + +#include +#include "rocksdb/statistics.h" +#include "port/likely.h" +#include +#include + +namespace rocksdb { + +// The order of items listed in Tickers should be the same as +// the order listed in TickersNameMap +const std::vector> TickersNameMap = { + {BLOCK_CACHE_MISS, "rocksdb.block.cache.miss"}, + {BLOCK_CACHE_HIT, "rocksdb.block.cache.hit"}, + {BLOCK_CACHE_ADD, "rocksdb.block.cache.add"}, + {BLOCK_CACHE_ADD_FAILURES, "rocksdb.block.cache.add.failures"}, + {BLOCK_CACHE_INDEX_MISS, "rocksdb.block.cache.index.miss"}, + {BLOCK_CACHE_INDEX_HIT, "rocksdb.block.cache.index.hit"}, + {BLOCK_CACHE_INDEX_ADD, "rocksdb.block.cache.index.add"}, + {BLOCK_CACHE_INDEX_BYTES_INSERT, "rocksdb.block.cache.index.bytes.insert"}, + {BLOCK_CACHE_INDEX_BYTES_EVICT, "rocksdb.block.cache.index.bytes.evict"}, + {BLOCK_CACHE_FILTER_MISS, "rocksdb.block.cache.filter.miss"}, + {BLOCK_CACHE_FILTER_HIT, "rocksdb.block.cache.filter.hit"}, + {BLOCK_CACHE_FILTER_ADD, "rocksdb.block.cache.filter.add"}, + {BLOCK_CACHE_FILTER_BYTES_INSERT, + "rocksdb.block.cache.filter.bytes.insert"}, + {BLOCK_CACHE_FILTER_BYTES_EVICT, "rocksdb.block.cache.filter.bytes.evict"}, + {BLOCK_CACHE_DATA_MISS, "rocksdb.block.cache.data.miss"}, + {BLOCK_CACHE_DATA_HIT, "rocksdb.block.cache.data.hit"}, + {BLOCK_CACHE_DATA_ADD, "rocksdb.block.cache.data.add"}, + {BLOCK_CACHE_DATA_BYTES_INSERT, "rocksdb.block.cache.data.bytes.insert"}, + {BLOCK_CACHE_BYTES_READ, "rocksdb.block.cache.bytes.read"}, + {BLOCK_CACHE_BYTES_WRITE, "rocksdb.block.cache.bytes.write"}, + {BLOOM_FILTER_USEFUL, "rocksdb.bloom.filter.useful"}, + {BLOOM_FILTER_FULL_POSITIVE, "rocksdb.bloom.filter.full.positive"}, + {BLOOM_FILTER_FULL_TRUE_POSITIVE, + "rocksdb.bloom.filter.full.true.positive"}, + {PERSISTENT_CACHE_HIT, "rocksdb.persistent.cache.hit"}, + {PERSISTENT_CACHE_MISS, "rocksdb.persistent.cache.miss"}, + {SIM_BLOCK_CACHE_HIT, "rocksdb.sim.block.cache.hit"}, + {SIM_BLOCK_CACHE_MISS, "rocksdb.sim.block.cache.miss"}, + {MEMTABLE_HIT, "rocksdb.memtable.hit"}, + {MEMTABLE_MISS, "rocksdb.memtable.miss"}, + {GET_HIT_L0, "rocksdb.l0.hit"}, + {GET_HIT_L1, "rocksdb.l1.hit"}, + {GET_HIT_L2_AND_UP, "rocksdb.l2andup.hit"}, + {COMPACTION_KEY_DROP_NEWER_ENTRY, "rocksdb.compaction.key.drop.new"}, + {COMPACTION_KEY_DROP_OBSOLETE, "rocksdb.compaction.key.drop.obsolete"}, + {COMPACTION_KEY_DROP_RANGE_DEL, "rocksdb.compaction.key.drop.range_del"}, + {COMPACTION_KEY_DROP_USER, "rocksdb.compaction.key.drop.user"}, + {COMPACTION_RANGE_DEL_DROP_OBSOLETE, + "rocksdb.compaction.range_del.drop.obsolete"}, + {COMPACTION_OPTIMIZED_DEL_DROP_OBSOLETE, + "rocksdb.compaction.optimized.del.drop.obsolete"}, + {COMPACTION_CANCELLED, "rocksdb.compaction.cancelled"}, + {NUMBER_KEYS_WRITTEN, "rocksdb.number.keys.written"}, + {NUMBER_KEYS_READ, "rocksdb.number.keys.read"}, + {NUMBER_KEYS_UPDATED, "rocksdb.number.keys.updated"}, + {BYTES_WRITTEN, "rocksdb.bytes.written"}, + {BYTES_READ, "rocksdb.bytes.read"}, + {NUMBER_DB_SEEK, "rocksdb.number.db.seek"}, + {NUMBER_DB_NEXT, "rocksdb.number.db.next"}, + {NUMBER_DB_PREV, "rocksdb.number.db.prev"}, + {NUMBER_DB_SEEK_FOUND, "rocksdb.number.db.seek.found"}, + {NUMBER_DB_NEXT_FOUND, "rocksdb.number.db.next.found"}, + {NUMBER_DB_PREV_FOUND, "rocksdb.number.db.prev.found"}, + {ITER_BYTES_READ, "rocksdb.db.iter.bytes.read"}, + {NO_FILE_CLOSES, "rocksdb.no.file.closes"}, + {NO_FILE_OPENS, "rocksdb.no.file.opens"}, + {NO_FILE_ERRORS, "rocksdb.no.file.errors"}, + {STALL_L0_SLOWDOWN_MICROS, "rocksdb.l0.slowdown.micros"}, + {STALL_MEMTABLE_COMPACTION_MICROS, "rocksdb.memtable.compaction.micros"}, + {STALL_L0_NUM_FILES_MICROS, "rocksdb.l0.num.files.stall.micros"}, + {STALL_MICROS, "rocksdb.stall.micros"}, + {DB_MUTEX_WAIT_MICROS, "rocksdb.db.mutex.wait.micros"}, + {RATE_LIMIT_DELAY_MILLIS, "rocksdb.rate.limit.delay.millis"}, + {NO_ITERATORS, "rocksdb.num.iterators"}, + {NUMBER_MULTIGET_CALLS, "rocksdb.number.multiget.get"}, + {NUMBER_MULTIGET_KEYS_READ, "rocksdb.number.multiget.keys.read"}, + {NUMBER_MULTIGET_BYTES_READ, "rocksdb.number.multiget.bytes.read"}, + {NUMBER_FILTERED_DELETES, "rocksdb.number.deletes.filtered"}, + {NUMBER_MERGE_FAILURES, "rocksdb.number.merge.failures"}, + {BLOOM_FILTER_PREFIX_CHECKED, "rocksdb.bloom.filter.prefix.checked"}, + {BLOOM_FILTER_PREFIX_USEFUL, "rocksdb.bloom.filter.prefix.useful"}, + {NUMBER_OF_RESEEKS_IN_ITERATION, "rocksdb.number.reseeks.iteration"}, + {GET_UPDATES_SINCE_CALLS, "rocksdb.getupdatessince.calls"}, + {BLOCK_CACHE_COMPRESSED_MISS, "rocksdb.block.cachecompressed.miss"}, + {BLOCK_CACHE_COMPRESSED_HIT, "rocksdb.block.cachecompressed.hit"}, + {BLOCK_CACHE_COMPRESSED_ADD, "rocksdb.block.cachecompressed.add"}, + {BLOCK_CACHE_COMPRESSED_ADD_FAILURES, + "rocksdb.block.cachecompressed.add.failures"}, + {WAL_FILE_SYNCED, "rocksdb.wal.synced"}, + {WAL_FILE_BYTES, "rocksdb.wal.bytes"}, + {WRITE_DONE_BY_SELF, "rocksdb.write.self"}, + {WRITE_DONE_BY_OTHER, "rocksdb.write.other"}, + {WRITE_TIMEDOUT, "rocksdb.write.timeout"}, + {WRITE_WITH_WAL, "rocksdb.write.wal"}, + {COMPACT_READ_BYTES, "rocksdb.compact.read.bytes"}, + {COMPACT_WRITE_BYTES, "rocksdb.compact.write.bytes"}, + {FLUSH_WRITE_BYTES, "rocksdb.flush.write.bytes"}, + {NUMBER_DIRECT_LOAD_TABLE_PROPERTIES, + "rocksdb.number.direct.load.table.properties"}, + {NUMBER_SUPERVERSION_ACQUIRES, "rocksdb.number.superversion_acquires"}, + {NUMBER_SUPERVERSION_RELEASES, "rocksdb.number.superversion_releases"}, + {NUMBER_SUPERVERSION_CLEANUPS, "rocksdb.number.superversion_cleanups"}, + {NUMBER_BLOCK_COMPRESSED, "rocksdb.number.block.compressed"}, + {NUMBER_BLOCK_DECOMPRESSED, "rocksdb.number.block.decompressed"}, + {NUMBER_BLOCK_NOT_COMPRESSED, "rocksdb.number.block.not_compressed"}, + {MERGE_OPERATION_TOTAL_TIME, "rocksdb.merge.operation.time.nanos"}, + {FILTER_OPERATION_TOTAL_TIME, "rocksdb.filter.operation.time.nanos"}, + {ROW_CACHE_HIT, "rocksdb.row.cache.hit"}, + {ROW_CACHE_MISS, "rocksdb.row.cache.miss"}, + {READ_AMP_ESTIMATE_USEFUL_BYTES, "rocksdb.read.amp.estimate.useful.bytes"}, + {READ_AMP_TOTAL_READ_BYTES, "rocksdb.read.amp.total.read.bytes"}, + {NUMBER_RATE_LIMITER_DRAINS, "rocksdb.number.rate_limiter.drains"}, + {NUMBER_ITER_SKIP, "rocksdb.number.iter.skip"}, + {BLOB_DB_NUM_PUT, "rocksdb.blobdb.num.put"}, + {BLOB_DB_NUM_WRITE, "rocksdb.blobdb.num.write"}, + {BLOB_DB_NUM_GET, "rocksdb.blobdb.num.get"}, + {BLOB_DB_NUM_MULTIGET, "rocksdb.blobdb.num.multiget"}, + {BLOB_DB_NUM_SEEK, "rocksdb.blobdb.num.seek"}, + {BLOB_DB_NUM_NEXT, "rocksdb.blobdb.num.next"}, + {BLOB_DB_NUM_PREV, "rocksdb.blobdb.num.prev"}, + {BLOB_DB_NUM_KEYS_WRITTEN, "rocksdb.blobdb.num.keys.written"}, + {BLOB_DB_NUM_KEYS_READ, "rocksdb.blobdb.num.keys.read"}, + {BLOB_DB_BYTES_WRITTEN, "rocksdb.blobdb.bytes.written"}, + {BLOB_DB_BYTES_READ, "rocksdb.blobdb.bytes.read"}, + {BLOB_DB_WRITE_INLINED, "rocksdb.blobdb.write.inlined"}, + {BLOB_DB_WRITE_INLINED_TTL, "rocksdb.blobdb.write.inlined.ttl"}, + {BLOB_DB_WRITE_BLOB, "rocksdb.blobdb.write.blob"}, + {BLOB_DB_WRITE_BLOB_TTL, "rocksdb.blobdb.write.blob.ttl"}, + {BLOB_DB_BLOB_FILE_BYTES_WRITTEN, "rocksdb.blobdb.blob.file.bytes.written"}, + {BLOB_DB_BLOB_FILE_BYTES_READ, "rocksdb.blobdb.blob.file.bytes.read"}, + {BLOB_DB_BLOB_FILE_SYNCED, "rocksdb.blobdb.blob.file.synced"}, + {BLOB_DB_BLOB_INDEX_EXPIRED_COUNT, + "rocksdb.blobdb.blob.index.expired.count"}, + {BLOB_DB_BLOB_INDEX_EXPIRED_SIZE, "rocksdb.blobdb.blob.index.expired.size"}, + {BLOB_DB_BLOB_INDEX_EVICTED_COUNT, + "rocksdb.blobdb.blob.index.evicted.count"}, + {BLOB_DB_BLOB_INDEX_EVICTED_SIZE, "rocksdb.blobdb.blob.index.evicted.size"}, + {BLOB_DB_GC_NUM_FILES, "rocksdb.blobdb.gc.num.files"}, + {BLOB_DB_GC_NUM_NEW_FILES, "rocksdb.blobdb.gc.num.new.files"}, + {BLOB_DB_GC_FAILURES, "rocksdb.blobdb.gc.failures"}, + {BLOB_DB_GC_NUM_KEYS_OVERWRITTEN, "rocksdb.blobdb.gc.num.keys.overwritten"}, + {BLOB_DB_GC_NUM_KEYS_EXPIRED, "rocksdb.blobdb.gc.num.keys.expired"}, + {BLOB_DB_GC_NUM_KEYS_RELOCATED, "rocksdb.blobdb.gc.num.keys.relocated"}, + {BLOB_DB_GC_BYTES_OVERWRITTEN, "rocksdb.blobdb.gc.bytes.overwritten"}, + {BLOB_DB_GC_BYTES_EXPIRED, "rocksdb.blobdb.gc.bytes.expired"}, + {BLOB_DB_GC_BYTES_RELOCATED, "rocksdb.blobdb.gc.bytes.relocated"}, + {BLOB_DB_FIFO_NUM_FILES_EVICTED, "rocksdb.blobdb.fifo.num.files.evicted"}, + {BLOB_DB_FIFO_NUM_KEYS_EVICTED, "rocksdb.blobdb.fifo.num.keys.evicted"}, + {BLOB_DB_FIFO_BYTES_EVICTED, "rocksdb.blobdb.fifo.bytes.evicted"}, + {TXN_PREPARE_MUTEX_OVERHEAD, "rocksdb.txn.overhead.mutex.prepare"}, + {TXN_OLD_COMMIT_MAP_MUTEX_OVERHEAD, + "rocksdb.txn.overhead.mutex.old.commit.map"}, + {TXN_DUPLICATE_KEY_OVERHEAD, "rocksdb.txn.overhead.duplicate.key"}, + {TXN_SNAPSHOT_MUTEX_OVERHEAD, "rocksdb.txn.overhead.mutex.snapshot"}, + {NUMBER_MULTIGET_KEYS_FOUND, "rocksdb.number.multiget.keys.found"}, + {NO_ITERATOR_CREATED, "rocksdb.num.iterator.created"}, + {NO_ITERATOR_DELETED, "rocksdb.num.iterator.deleted"}, +}; + +const std::vector> HistogramsNameMap = { + {DB_GET, "rocksdb.db.get.micros"}, + {DB_WRITE, "rocksdb.db.write.micros"}, + {COMPACTION_TIME, "rocksdb.compaction.times.micros"}, + {SUBCOMPACTION_SETUP_TIME, "rocksdb.subcompaction.setup.times.micros"}, + {TABLE_SYNC_MICROS, "rocksdb.table.sync.micros"}, + {COMPACTION_OUTFILE_SYNC_MICROS, "rocksdb.compaction.outfile.sync.micros"}, + {WAL_FILE_SYNC_MICROS, "rocksdb.wal.file.sync.micros"}, + {MANIFEST_FILE_SYNC_MICROS, "rocksdb.manifest.file.sync.micros"}, + {TABLE_OPEN_IO_MICROS, "rocksdb.table.open.io.micros"}, + {DB_MULTIGET, "rocksdb.db.multiget.micros"}, + {READ_BLOCK_COMPACTION_MICROS, "rocksdb.read.block.compaction.micros"}, + {READ_BLOCK_GET_MICROS, "rocksdb.read.block.get.micros"}, + {WRITE_RAW_BLOCK_MICROS, "rocksdb.write.raw.block.micros"}, + {STALL_L0_SLOWDOWN_COUNT, "rocksdb.l0.slowdown.count"}, + {STALL_MEMTABLE_COMPACTION_COUNT, "rocksdb.memtable.compaction.count"}, + {STALL_L0_NUM_FILES_COUNT, "rocksdb.num.files.stall.count"}, + {HARD_RATE_LIMIT_DELAY_COUNT, "rocksdb.hard.rate.limit.delay.count"}, + {SOFT_RATE_LIMIT_DELAY_COUNT, "rocksdb.soft.rate.limit.delay.count"}, + {NUM_FILES_IN_SINGLE_COMPACTION, "rocksdb.numfiles.in.singlecompaction"}, + {DB_SEEK, "rocksdb.db.seek.micros"}, + {WRITE_STALL, "rocksdb.db.write.stall"}, + {SST_READ_MICROS, "rocksdb.sst.read.micros"}, + {NUM_SUBCOMPACTIONS_SCHEDULED, "rocksdb.num.subcompactions.scheduled"}, + {BYTES_PER_READ, "rocksdb.bytes.per.read"}, + {BYTES_PER_WRITE, "rocksdb.bytes.per.write"}, + {BYTES_PER_MULTIGET, "rocksdb.bytes.per.multiget"}, + {BYTES_COMPRESSED, "rocksdb.bytes.compressed"}, + {BYTES_DECOMPRESSED, "rocksdb.bytes.decompressed"}, + {COMPRESSION_TIMES_NANOS, "rocksdb.compression.times.nanos"}, + {DECOMPRESSION_TIMES_NANOS, "rocksdb.decompression.times.nanos"}, + {READ_NUM_MERGE_OPERANDS, "rocksdb.read.num.merge_operands"}, + {BLOB_DB_KEY_SIZE, "rocksdb.blobdb.key.size"}, + {BLOB_DB_VALUE_SIZE, "rocksdb.blobdb.value.size"}, + {BLOB_DB_WRITE_MICROS, "rocksdb.blobdb.write.micros"}, + {BLOB_DB_GET_MICROS, "rocksdb.blobdb.get.micros"}, + {BLOB_DB_MULTIGET_MICROS, "rocksdb.blobdb.multiget.micros"}, + {BLOB_DB_SEEK_MICROS, "rocksdb.blobdb.seek.micros"}, + {BLOB_DB_NEXT_MICROS, "rocksdb.blobdb.next.micros"}, + {BLOB_DB_PREV_MICROS, "rocksdb.blobdb.prev.micros"}, + {BLOB_DB_BLOB_FILE_WRITE_MICROS, "rocksdb.blobdb.blob.file.write.micros"}, + {BLOB_DB_BLOB_FILE_READ_MICROS, "rocksdb.blobdb.blob.file.read.micros"}, + {BLOB_DB_BLOB_FILE_SYNC_MICROS, "rocksdb.blobdb.blob.file.sync.micros"}, + {BLOB_DB_GC_MICROS, "rocksdb.blobdb.gc.micros"}, + {BLOB_DB_COMPRESSION_MICROS, "rocksdb.blobdb.compression.micros"}, + {BLOB_DB_DECOMPRESSION_MICROS, "rocksdb.blobdb.decompression.micros"}, + {FLUSH_TIME, "rocksdb.db.flush.micros"}, +}; + +std::shared_ptr CreateDBStatistics() { + return std::make_shared(nullptr); +} + +StatisticsImpl::StatisticsImpl(std::shared_ptr stats) + : stats_(std::move(stats)) {} + +StatisticsImpl::~StatisticsImpl() {} + +uint64_t StatisticsImpl::getTickerCount(uint32_t tickerType) const { + MutexLock lock(&aggregate_lock_); + return getTickerCountLocked(tickerType); +} + +uint64_t StatisticsImpl::getTickerCountLocked(uint32_t tickerType) const { + assert(tickerType < TICKER_ENUM_MAX); + uint64_t res = 0; + for (size_t core_idx = 0; core_idx < per_core_stats_.Size(); ++core_idx) { + res += per_core_stats_.AccessAtCore(core_idx)->tickers_[tickerType]; + } + return res; +} + +void StatisticsImpl::histogramData(uint32_t histogramType, + HistogramData* const data) const { + MutexLock lock(&aggregate_lock_); + getHistogramImplLocked(histogramType)->Data(data); +} + +std::unique_ptr StatisticsImpl::getHistogramImplLocked( + uint32_t histogramType) const { + assert(histogramType < HISTOGRAM_ENUM_MAX); + std::unique_ptr res_hist(new HistogramImpl()); + for (size_t core_idx = 0; core_idx < per_core_stats_.Size(); ++core_idx) { + res_hist->Merge( + per_core_stats_.AccessAtCore(core_idx)->histograms_[histogramType]); + } + return res_hist; +} + +std::string StatisticsImpl::getHistogramString(uint32_t histogramType) const { + MutexLock lock(&aggregate_lock_); + return getHistogramImplLocked(histogramType)->ToString(); +} + +void StatisticsImpl::setTickerCount(uint32_t tickerType, uint64_t count) { + { + MutexLock lock(&aggregate_lock_); + setTickerCountLocked(tickerType, count); + } + if (stats_ && tickerType < TICKER_ENUM_MAX) { + stats_->setTickerCount(tickerType, count); + } +} + +void StatisticsImpl::setTickerCountLocked(uint32_t tickerType, uint64_t count) { + assert(tickerType < TICKER_ENUM_MAX); + for (size_t core_idx = 0; core_idx < per_core_stats_.Size(); ++core_idx) { + if (core_idx == 0) { + per_core_stats_.AccessAtCore(core_idx)->tickers_[tickerType] = count; + } else { + per_core_stats_.AccessAtCore(core_idx)->tickers_[tickerType] = 0; + } + } +} + +uint64_t StatisticsImpl::getAndResetTickerCount(uint32_t tickerType) { + uint64_t sum = 0; + { + MutexLock lock(&aggregate_lock_); + assert(tickerType < TICKER_ENUM_MAX); + for (size_t core_idx = 0; core_idx < per_core_stats_.Size(); ++core_idx) { + sum += + per_core_stats_.AccessAtCore(core_idx)->tickers_[tickerType].exchange( + 0, std::memory_order_relaxed); + } + } + if (stats_ && tickerType < TICKER_ENUM_MAX) { + stats_->setTickerCount(tickerType, 0); + } + return sum; +} + +void StatisticsImpl::recordTick(uint32_t tickerType, uint64_t count) { + assert(tickerType < TICKER_ENUM_MAX); + per_core_stats_.Access()->tickers_[tickerType].fetch_add( + count, std::memory_order_relaxed); + if (stats_ && tickerType < TICKER_ENUM_MAX) { + stats_->recordTick(tickerType, count); + } +} + +void StatisticsImpl::measureTime(uint32_t histogramType, uint64_t value) { + assert(histogramType < HISTOGRAM_ENUM_MAX); + per_core_stats_.Access()->histograms_[histogramType].Add(value); + if (stats_ && histogramType < HISTOGRAM_ENUM_MAX) { + stats_->measureTime(histogramType, value); + } +} + +Status StatisticsImpl::Reset() { + MutexLock lock(&aggregate_lock_); + for (uint32_t i = 0; i < TICKER_ENUM_MAX; ++i) { + setTickerCountLocked(i, 0); + } + for (uint32_t i = 0; i < HISTOGRAM_ENUM_MAX; ++i) { + for (size_t core_idx = 0; core_idx < per_core_stats_.Size(); ++core_idx) { + per_core_stats_.AccessAtCore(core_idx)->histograms_[i].Clear(); + } + } + return Status::OK(); +} + +namespace { + +// a buffer size used for temp string buffers +const int kTmpStrBufferSize = 200; + +} // namespace + +std::string StatisticsImpl::ToString() const { + MutexLock lock(&aggregate_lock_); + std::string res; + res.reserve(20000); + for (const auto& t : TickersNameMap) { + assert(t.first < TICKER_ENUM_MAX); + char buffer[kTmpStrBufferSize]; + snprintf(buffer, kTmpStrBufferSize, "%s COUNT : %" PRIu64 "\n", + t.second.c_str(), getTickerCountLocked(t.first)); + res.append(buffer); + } + for (const auto& h : HistogramsNameMap) { + assert(h.first < HISTOGRAM_ENUM_MAX); + char buffer[kTmpStrBufferSize]; + HistogramData hData; + getHistogramImplLocked(h.first)->Data(&hData); + // don't handle failures - buffer should always be big enough and arguments + // should be provided correctly + int ret = snprintf( + buffer, kTmpStrBufferSize, + "%s P50 : %f P95 : %f P99 : %f P100 : %f COUNT : %" PRIu64 " SUM : %" + PRIu64 "\n", h.second.c_str(), hData.median, hData.percentile95, + hData.percentile99, hData.max, hData.count, hData.sum); + if (ret < 0 || ret >= kTmpStrBufferSize) { + assert(false); + continue; + } + res.append(buffer); + } + res.shrink_to_fit(); + return res; +} + +bool StatisticsImpl::HistEnabledForType(uint32_t type) const { + return type < HISTOGRAM_ENUM_MAX; +} + +} // namespace rocksdb diff --git a/3rdParty/rocksdb/v5.16.X/monitoring/statistics.h b/3rdParty/rocksdb/v5.18.X/monitoring/statistics.h similarity index 95% rename from 3rdParty/rocksdb/v5.16.X/monitoring/statistics.h rename to 3rdParty/rocksdb/v5.18.X/monitoring/statistics.h index 4427c8c546..dcd5f7a010 100644 --- a/3rdParty/rocksdb/v5.16.X/monitoring/statistics.h +++ b/3rdParty/rocksdb/v5.18.X/monitoring/statistics.h @@ -41,8 +41,7 @@ enum HistogramsInternal : uint32_t { class StatisticsImpl : public Statistics { public: - StatisticsImpl(std::shared_ptr stats, - bool enable_internal_stats); + StatisticsImpl(std::shared_ptr stats); virtual ~StatisticsImpl(); virtual uint64_t getTickerCount(uint32_t ticker_type) const override; @@ -62,8 +61,6 @@ class StatisticsImpl : public Statistics { private: // If non-nullptr, forwards updates to the object pointed to by `stats_`. std::shared_ptr stats_; - // TODO(ajkr): clean this up since there are no internal stats anymore - bool enable_internal_stats_; // Synchronizes anything that operates across other cores' local data, // such that operations like Reset() can be performed atomically. mutable port::Mutex aggregate_lock_; diff --git a/3rdParty/rocksdb/v5.16.X/monitoring/statistics_test.cc b/3rdParty/rocksdb/v5.18.X/monitoring/statistics_test.cc similarity index 67% rename from 3rdParty/rocksdb/v5.16.X/monitoring/statistics_test.cc rename to 3rdParty/rocksdb/v5.18.X/monitoring/statistics_test.cc index 43aacde9c1..a77022bfb3 100644 --- a/3rdParty/rocksdb/v5.16.X/monitoring/statistics_test.cc +++ b/3rdParty/rocksdb/v5.18.X/monitoring/statistics_test.cc @@ -16,7 +16,7 @@ class StatisticsTest : public testing::Test {}; // Sanity check to make sure that contents and order of TickersNameMap // match Tickers enum -TEST_F(StatisticsTest, Sanity) { +TEST_F(StatisticsTest, SanityTickers) { EXPECT_EQ(static_cast(Tickers::TICKER_ENUM_MAX), TickersNameMap.size()); @@ -26,6 +26,18 @@ TEST_F(StatisticsTest, Sanity) { } } +// Sanity check to make sure that contents and order of HistogramsNameMap +// match Tickers enum +TEST_F(StatisticsTest, SanityHistograms) { + EXPECT_EQ(static_cast(Histograms::HISTOGRAM_ENUM_MAX), + HistogramsNameMap.size()); + + for (uint32_t h = 0; h < Histograms::HISTOGRAM_ENUM_MAX; h++) { + auto pair = HistogramsNameMap[static_cast(h)]; + ASSERT_EQ(pair.first, h) << "Miss match at " << pair.second; + } +} + } // namespace rocksdb int main(int argc, char** argv) { diff --git a/3rdParty/rocksdb/v5.16.X/monitoring/thread_status_impl.cc b/3rdParty/rocksdb/v5.18.X/monitoring/thread_status_impl.cc similarity index 100% rename from 3rdParty/rocksdb/v5.16.X/monitoring/thread_status_impl.cc rename to 3rdParty/rocksdb/v5.18.X/monitoring/thread_status_impl.cc diff --git a/3rdParty/rocksdb/v5.16.X/monitoring/thread_status_updater.cc b/3rdParty/rocksdb/v5.18.X/monitoring/thread_status_updater.cc similarity index 100% rename from 3rdParty/rocksdb/v5.16.X/monitoring/thread_status_updater.cc rename to 3rdParty/rocksdb/v5.18.X/monitoring/thread_status_updater.cc diff --git a/3rdParty/rocksdb/v5.16.X/monitoring/thread_status_updater.h b/3rdParty/rocksdb/v5.18.X/monitoring/thread_status_updater.h similarity index 100% rename from 3rdParty/rocksdb/v5.16.X/monitoring/thread_status_updater.h rename to 3rdParty/rocksdb/v5.18.X/monitoring/thread_status_updater.h diff --git a/3rdParty/rocksdb/v5.16.X/monitoring/thread_status_updater_debug.cc b/3rdParty/rocksdb/v5.18.X/monitoring/thread_status_updater_debug.cc similarity index 100% rename from 3rdParty/rocksdb/v5.16.X/monitoring/thread_status_updater_debug.cc rename to 3rdParty/rocksdb/v5.18.X/monitoring/thread_status_updater_debug.cc diff --git a/3rdParty/rocksdb/v5.16.X/monitoring/thread_status_util.cc b/3rdParty/rocksdb/v5.18.X/monitoring/thread_status_util.cc similarity index 100% rename from 3rdParty/rocksdb/v5.16.X/monitoring/thread_status_util.cc rename to 3rdParty/rocksdb/v5.18.X/monitoring/thread_status_util.cc diff --git a/3rdParty/rocksdb/v5.16.X/monitoring/thread_status_util.h b/3rdParty/rocksdb/v5.18.X/monitoring/thread_status_util.h similarity index 100% rename from 3rdParty/rocksdb/v5.16.X/monitoring/thread_status_util.h rename to 3rdParty/rocksdb/v5.18.X/monitoring/thread_status_util.h diff --git a/3rdParty/rocksdb/v5.16.X/monitoring/thread_status_util_debug.cc b/3rdParty/rocksdb/v5.18.X/monitoring/thread_status_util_debug.cc similarity index 100% rename from 3rdParty/rocksdb/v5.16.X/monitoring/thread_status_util_debug.cc rename to 3rdParty/rocksdb/v5.18.X/monitoring/thread_status_util_debug.cc diff --git a/3rdParty/rocksdb/v5.16.X/options/cf_options.cc b/3rdParty/rocksdb/v5.18.X/options/cf_options.cc similarity index 100% rename from 3rdParty/rocksdb/v5.16.X/options/cf_options.cc rename to 3rdParty/rocksdb/v5.18.X/options/cf_options.cc diff --git a/3rdParty/rocksdb/v5.16.X/options/cf_options.h b/3rdParty/rocksdb/v5.18.X/options/cf_options.h similarity index 99% rename from 3rdParty/rocksdb/v5.16.X/options/cf_options.h rename to 3rdParty/rocksdb/v5.18.X/options/cf_options.h index 1658bf427a..69b0b0105a 100644 --- a/3rdParty/rocksdb/v5.16.X/options/cf_options.h +++ b/3rdParty/rocksdb/v5.18.X/options/cf_options.h @@ -18,7 +18,7 @@ namespace rocksdb { // ImmutableCFOptions is a data struct used by RocksDB internal. It contains a // subset of Options that should not be changed during the entire lifetime // of DB. Raw pointers defined in this struct do not have ownership to the data -// they point to. Options contains shared_ptr to these data. +// they point to. Options contains std::shared_ptr to these data. struct ImmutableCFOptions { ImmutableCFOptions(); explicit ImmutableCFOptions(const Options& options); diff --git a/3rdParty/rocksdb/v5.16.X/options/db_options.cc b/3rdParty/rocksdb/v5.18.X/options/db_options.cc similarity index 99% rename from 3rdParty/rocksdb/v5.16.X/options/db_options.cc rename to 3rdParty/rocksdb/v5.18.X/options/db_options.cc index fd3cdcccd6..4e8134511b 100644 --- a/3rdParty/rocksdb/v5.16.X/options/db_options.cc +++ b/3rdParty/rocksdb/v5.18.X/options/db_options.cc @@ -85,7 +85,8 @@ ImmutableDBOptions::ImmutableDBOptions(const DBOptions& options) allow_ingest_behind(options.allow_ingest_behind), preserve_deletes(options.preserve_deletes), two_write_queues(options.two_write_queues), - manual_wal_flush(options.manual_wal_flush) { + manual_wal_flush(options.manual_wal_flush), + atomic_flush(options.atomic_flush) { } void ImmutableDBOptions::Dump(Logger* log) const { diff --git a/3rdParty/rocksdb/v5.16.X/options/db_options.h b/3rdParty/rocksdb/v5.18.X/options/db_options.h similarity index 99% rename from 3rdParty/rocksdb/v5.16.X/options/db_options.h rename to 3rdParty/rocksdb/v5.18.X/options/db_options.h index 107d35c877..2cd83b55d4 100644 --- a/3rdParty/rocksdb/v5.16.X/options/db_options.h +++ b/3rdParty/rocksdb/v5.18.X/options/db_options.h @@ -78,6 +78,7 @@ struct ImmutableDBOptions { bool preserve_deletes; bool two_write_queues; bool manual_wal_flush; + bool atomic_flush; }; struct MutableDBOptions { diff --git a/3rdParty/rocksdb/v5.16.X/options/options.cc b/3rdParty/rocksdb/v5.18.X/options/options.cc similarity index 100% rename from 3rdParty/rocksdb/v5.16.X/options/options.cc rename to 3rdParty/rocksdb/v5.18.X/options/options.cc diff --git a/3rdParty/rocksdb/v5.16.X/options/options_helper.cc b/3rdParty/rocksdb/v5.18.X/options/options_helper.cc similarity index 99% rename from 3rdParty/rocksdb/v5.16.X/options/options_helper.cc rename to 3rdParty/rocksdb/v5.18.X/options/options_helper.cc index f4c59ff06e..27a2252a02 100644 --- a/3rdParty/rocksdb/v5.16.X/options/options_helper.cc +++ b/3rdParty/rocksdb/v5.18.X/options/options_helper.cc @@ -126,6 +126,7 @@ DBOptions BuildDBOptions(const ImmutableDBOptions& immutable_db_options, immutable_db_options.preserve_deletes; options.two_write_queues = immutable_db_options.two_write_queues; options.manual_wal_flush = immutable_db_options.manual_wal_flush; + options.atomic_flush = immutable_db_options.atomic_flush; return options; } @@ -215,7 +216,8 @@ std::map std::unordered_map OptionsHelper::checksum_type_string_map = {{"kNoChecksum", kNoChecksum}, {"kCRC32c", kCRC32c}, - {"kxxHash", kxxHash}}; + {"kxxHash", kxxHash}, + {"kxxHash64", kxxHash64}}; std::unordered_map OptionsHelper::compression_type_string_map = { @@ -1554,7 +1556,11 @@ std::unordered_map offsetof(struct ImmutableDBOptions, manual_wal_flush)}}, {"seq_per_batch", {0, OptionType::kBoolean, OptionVerificationType::kDeprecated, false, - 0}}}; + 0}}, + {"atomic_flush", + {offsetof(struct DBOptions, atomic_flush), OptionType::kBoolean, + OptionVerificationType::kNormal, false, + offsetof(struct ImmutableDBOptions, atomic_flush)}}}; std::unordered_map OptionsHelper::block_base_table_index_type_string_map = { diff --git a/3rdParty/rocksdb/v5.16.X/options/options_helper.h b/3rdParty/rocksdb/v5.18.X/options/options_helper.h similarity index 100% rename from 3rdParty/rocksdb/v5.16.X/options/options_helper.h rename to 3rdParty/rocksdb/v5.18.X/options/options_helper.h diff --git a/3rdParty/rocksdb/v5.16.X/options/options_parser.cc b/3rdParty/rocksdb/v5.18.X/options/options_parser.cc similarity index 99% rename from 3rdParty/rocksdb/v5.16.X/options/options_parser.cc rename to 3rdParty/rocksdb/v5.18.X/options/options_parser.cc index 35bbc82cd2..32cfb8d531 100644 --- a/3rdParty/rocksdb/v5.16.X/options/options_parser.cc +++ b/3rdParty/rocksdb/v5.18.X/options/options_parser.cc @@ -48,8 +48,8 @@ Status PersistRocksDBOptions(const DBOptions& db_opt, if (!s.ok()) { return s; } - unique_ptr writable; - writable.reset(new WritableFileWriter(std::move(wf), EnvOptions(), + std::unique_ptr writable; + writable.reset(new WritableFileWriter(std::move(wf), file_name, EnvOptions(), nullptr /* statistics */)); std::string options_file_content; diff --git a/3rdParty/rocksdb/v5.16.X/options/options_parser.h b/3rdParty/rocksdb/v5.18.X/options/options_parser.h similarity index 100% rename from 3rdParty/rocksdb/v5.16.X/options/options_parser.h rename to 3rdParty/rocksdb/v5.18.X/options/options_parser.h diff --git a/3rdParty/rocksdb/v5.16.X/options/options_sanity_check.cc b/3rdParty/rocksdb/v5.18.X/options/options_sanity_check.cc similarity index 100% rename from 3rdParty/rocksdb/v5.16.X/options/options_sanity_check.cc rename to 3rdParty/rocksdb/v5.18.X/options/options_sanity_check.cc diff --git a/3rdParty/rocksdb/v5.16.X/options/options_sanity_check.h b/3rdParty/rocksdb/v5.18.X/options/options_sanity_check.h similarity index 100% rename from 3rdParty/rocksdb/v5.16.X/options/options_sanity_check.h rename to 3rdParty/rocksdb/v5.18.X/options/options_sanity_check.h diff --git a/3rdParty/rocksdb/v5.16.X/options/options_settable_test.cc b/3rdParty/rocksdb/v5.18.X/options/options_settable_test.cc similarity index 99% rename from 3rdParty/rocksdb/v5.16.X/options/options_settable_test.cc rename to 3rdParty/rocksdb/v5.18.X/options/options_settable_test.cc index ded152ba99..cad1af3d76 100644 --- a/3rdParty/rocksdb/v5.16.X/options/options_settable_test.cc +++ b/3rdParty/rocksdb/v5.18.X/options/options_settable_test.cc @@ -291,7 +291,8 @@ TEST_F(OptionsSettableTest, DBOptionsAllFieldsSettable) { "concurrent_prepare=false;" "two_write_queues=false;" "manual_wal_flush=false;" - "seq_per_batch=false;", + "seq_per_batch=false;" + "atomic_flush=false", new_options)); ASSERT_EQ(unset_bytes_base, NumUnsetBytes(new_options_ptr, sizeof(DBOptions), diff --git a/3rdParty/rocksdb/v5.16.X/options/options_test.cc b/3rdParty/rocksdb/v5.18.X/options/options_test.cc similarity index 100% rename from 3rdParty/rocksdb/v5.16.X/options/options_test.cc rename to 3rdParty/rocksdb/v5.18.X/options/options_test.cc diff --git a/3rdParty/rocksdb/v5.16.X/port/README b/3rdParty/rocksdb/v5.18.X/port/README similarity index 100% rename from 3rdParty/rocksdb/v5.16.X/port/README rename to 3rdParty/rocksdb/v5.18.X/port/README diff --git a/3rdParty/rocksdb/v5.16.X/port/dirent.h b/3rdParty/rocksdb/v5.18.X/port/dirent.h similarity index 89% rename from 3rdParty/rocksdb/v5.16.X/port/dirent.h rename to 3rdParty/rocksdb/v5.18.X/port/dirent.h index 7bcc356978..cb1adbe129 100644 --- a/3rdParty/rocksdb/v5.16.X/port/dirent.h +++ b/3rdParty/rocksdb/v5.18.X/port/dirent.h @@ -9,8 +9,7 @@ // // See port_example.h for documentation for the following types/functions. -#ifndef STORAGE_LEVELDB_PORT_DIRENT_H_ -#define STORAGE_LEVELDB_PORT_DIRENT_H_ +#pragma once #ifdef ROCKSDB_PLATFORM_POSIX #include @@ -43,5 +42,3 @@ using port::closedir; } // namespace rocksdb #endif // OS_WIN - -#endif // STORAGE_LEVELDB_PORT_DIRENT_H_ diff --git a/3rdParty/rocksdb/v5.16.X/port/likely.h b/3rdParty/rocksdb/v5.18.X/port/likely.h similarity index 90% rename from 3rdParty/rocksdb/v5.16.X/port/likely.h rename to 3rdParty/rocksdb/v5.18.X/port/likely.h index e5ef786f2e..397d757133 100644 --- a/3rdParty/rocksdb/v5.16.X/port/likely.h +++ b/3rdParty/rocksdb/v5.18.X/port/likely.h @@ -7,8 +7,7 @@ // Use of this source code is governed by a BSD-style license that can be // found in the LICENSE file. See the AUTHORS file for names of contributors. -#ifndef PORT_LIKELY_H_ -#define PORT_LIKELY_H_ +#pragma once #if defined(__GNUC__) && __GNUC__ >= 4 #define LIKELY(x) (__builtin_expect((x), 1)) @@ -17,5 +16,3 @@ #define LIKELY(x) (x) #define UNLIKELY(x) (x) #endif - -#endif // PORT_LIKELY_H_ diff --git a/3rdParty/rocksdb/v5.16.X/port/port.h b/3rdParty/rocksdb/v5.18.X/port/port.h similarity index 100% rename from 3rdParty/rocksdb/v5.16.X/port/port.h rename to 3rdParty/rocksdb/v5.18.X/port/port.h diff --git a/3rdParty/rocksdb/v5.16.X/port/port_example.h b/3rdParty/rocksdb/v5.18.X/port/port_example.h similarity index 96% rename from 3rdParty/rocksdb/v5.16.X/port/port_example.h rename to 3rdParty/rocksdb/v5.18.X/port/port_example.h index 05b3240669..a94dc93c26 100644 --- a/3rdParty/rocksdb/v5.16.X/port/port_example.h +++ b/3rdParty/rocksdb/v5.18.X/port/port_example.h @@ -12,8 +12,7 @@ // specific port_.h file. Use this file as a reference for // how to port this package to a new platform. -#ifndef STORAGE_LEVELDB_PORT_PORT_EXAMPLE_H_ -#define STORAGE_LEVELDB_PORT_PORT_EXAMPLE_H_ +#pragma once namespace rocksdb { namespace port { @@ -100,5 +99,3 @@ extern bool Snappy_Uncompress(const char* input_data, size_t input_length, } // namespace port } // namespace rocksdb - -#endif // STORAGE_LEVELDB_PORT_PORT_EXAMPLE_H_ diff --git a/3rdParty/rocksdb/v5.16.X/port/port_posix.cc b/3rdParty/rocksdb/v5.18.X/port/port_posix.cc similarity index 100% rename from 3rdParty/rocksdb/v5.16.X/port/port_posix.cc rename to 3rdParty/rocksdb/v5.18.X/port/port_posix.cc diff --git a/3rdParty/rocksdb/v5.16.X/port/port_posix.h b/3rdParty/rocksdb/v5.18.X/port/port_posix.h similarity index 100% rename from 3rdParty/rocksdb/v5.16.X/port/port_posix.h rename to 3rdParty/rocksdb/v5.18.X/port/port_posix.h diff --git a/3rdParty/rocksdb/v5.16.X/port/stack_trace.cc b/3rdParty/rocksdb/v5.18.X/port/stack_trace.cc similarity index 100% rename from 3rdParty/rocksdb/v5.16.X/port/stack_trace.cc rename to 3rdParty/rocksdb/v5.18.X/port/stack_trace.cc diff --git a/3rdParty/rocksdb/v5.16.X/port/stack_trace.h b/3rdParty/rocksdb/v5.18.X/port/stack_trace.h similarity index 100% rename from 3rdParty/rocksdb/v5.16.X/port/stack_trace.h rename to 3rdParty/rocksdb/v5.18.X/port/stack_trace.h diff --git a/3rdParty/rocksdb/v5.16.X/port/sys_time.h b/3rdParty/rocksdb/v5.18.X/port/sys_time.h similarity index 89% rename from 3rdParty/rocksdb/v5.16.X/port/sys_time.h rename to 3rdParty/rocksdb/v5.18.X/port/sys_time.h index 1e2ad0f5d6..2f83da8b3e 100644 --- a/3rdParty/rocksdb/v5.16.X/port/sys_time.h +++ b/3rdParty/rocksdb/v5.18.X/port/sys_time.h @@ -10,8 +10,7 @@ // This file is a portable substitute for sys/time.h which does not exist on // Windows -#ifndef STORAGE_LEVELDB_PORT_SYS_TIME_H_ -#define STORAGE_LEVELDB_PORT_SYS_TIME_H_ +#pragma once #if defined(OS_WIN) && defined(_MSC_VER) @@ -44,5 +43,3 @@ using port::localtime_r; #include #include #endif - -#endif // STORAGE_LEVELDB_PORT_SYS_TIME_H_ diff --git a/3rdParty/rocksdb/v5.16.X/port/util_logger.h b/3rdParty/rocksdb/v5.18.X/port/util_logger.h similarity index 85% rename from 3rdParty/rocksdb/v5.16.X/port/util_logger.h rename to 3rdParty/rocksdb/v5.18.X/port/util_logger.h index a8255ad6d6..ba424705b2 100644 --- a/3rdParty/rocksdb/v5.16.X/port/util_logger.h +++ b/3rdParty/rocksdb/v5.18.X/port/util_logger.h @@ -7,8 +7,7 @@ // Use of this source code is governed by a BSD-style license that can be // found in the LICENSE file. See the AUTHORS file for names of contributors. -#ifndef STORAGE_LEVELDB_PORT_UTIL_LOGGER_H_ -#define STORAGE_LEVELDB_PORT_UTIL_LOGGER_H_ +#pragma once // Include the appropriate platform specific file below. If you are // porting to a new platform, see "port_example.h" for documentation @@ -19,5 +18,3 @@ #elif defined(OS_WIN) #include "port/win/win_logger.h" #endif - -#endif // STORAGE_LEVELDB_PORT_UTIL_LOGGER_H_ diff --git a/3rdParty/rocksdb/v5.16.X/port/win/env_default.cc b/3rdParty/rocksdb/v5.18.X/port/win/env_default.cc similarity index 100% rename from 3rdParty/rocksdb/v5.16.X/port/win/env_default.cc rename to 3rdParty/rocksdb/v5.18.X/port/win/env_default.cc diff --git a/3rdParty/rocksdb/v5.16.X/port/win/env_win.cc b/3rdParty/rocksdb/v5.18.X/port/win/env_win.cc similarity index 98% rename from 3rdParty/rocksdb/v5.16.X/port/win/env_win.cc rename to 3rdParty/rocksdb/v5.18.X/port/win/env_win.cc index daa4789dc9..d301390670 100644 --- a/3rdParty/rocksdb/v5.16.X/port/win/env_win.cc +++ b/3rdParty/rocksdb/v5.18.X/port/win/env_win.cc @@ -236,7 +236,7 @@ Status WinEnvIO::NewRandomAccessFile(const std::string& fname, MapViewOfFileEx(hMap, FILE_MAP_READ, 0, // High DWORD of access start 0, // Low DWORD - fileSize, + static_cast(fileSize), NULL); // Let the OS choose the mapping if (!mapped_region) { @@ -247,7 +247,7 @@ Status WinEnvIO::NewRandomAccessFile(const std::string& fname, } result->reset(new WinMmapReadableFile(fname, hFile, hMap, mapped_region, - fileSize)); + static_cast(fileSize))); mapGuard.release(); fileGuard.release(); @@ -449,7 +449,7 @@ Status WinEnvIO::NewMemoryMappedFileBuffer(const std::string & fname, void* base = MapViewOfFileEx(hMap, FILE_MAP_WRITE, 0, // High DWORD of access start 0, // Low DWORD - fileSize, + static_cast(fileSize), NULL); // Let the OS choose the mapping if (!base) { @@ -707,6 +707,9 @@ Status WinEnvIO::LinkFile(const std::string& src, if (!RX_CreateHardLink(RX_FN(target).c_str(), RX_FN(src).c_str(), NULL)) { DWORD lastError = GetLastError(); + if (lastError == ERROR_NOT_SAME_DEVICE) { + return Status::NotSupported("No cross FS links allowed"); + } std::string text("Failed to link: "); text.append(src).append(" to: ").append(target); @@ -717,30 +720,28 @@ Status WinEnvIO::LinkFile(const std::string& src, return result; } -Status WinEnvIO::NumFileLinks(const std::string& fname, - uint64_t* count) { +Status WinEnvIO::NumFileLinks(const std::string& fname, uint64_t* count) { Status s; - HANDLE handle = RX_CreateFile(RX_FN(fname).c_str(), 0, - FILE_SHARE_DELETE | FILE_SHARE_READ | FILE_SHARE_WRITE, - NULL, - OPEN_EXISTING, - FILE_FLAG_BACKUP_SEMANTICS, - NULL); + HANDLE handle = RX_CreateFile( + RX_FN(fname).c_str(), 0, + FILE_SHARE_DELETE | FILE_SHARE_READ | FILE_SHARE_WRITE, + NULL, OPEN_EXISTING, FILE_FLAG_BACKUP_SEMANTICS, NULL); if (INVALID_HANDLE_VALUE == handle) { auto lastError = GetLastError(); - s = IOErrorFromWindowsError( - "NumFileLinks: " + fname, lastError); + s = IOErrorFromWindowsError("NumFileLinks: " + fname, lastError); return s; } UniqueCloseHandlePtr handle_guard(handle, CloseHandleFunc); FILE_STANDARD_INFO standard_info; - if (0 != GetFileInformationByHandleEx(handle, FileStandardInfo, - &standard_info, sizeof(standard_info))) { + if (0 != GetFileInformationByHandleEx(handle, FileStandardInfo, + &standard_info, + sizeof(standard_info))) { *count = standard_info.NumberOfLinks; } else { auto lastError = GetLastError(); - s = IOErrorFromWindowsError("GetFileInformationByHandleEx: " + fname, lastError); + s = IOErrorFromWindowsError("GetFileInformationByHandleEx: " + fname, + lastError); } return s; } diff --git a/3rdParty/rocksdb/v5.16.X/port/win/env_win.h b/3rdParty/rocksdb/v5.18.X/port/win/env_win.h similarity index 97% rename from 3rdParty/rocksdb/v5.16.X/port/win/env_win.h rename to 3rdParty/rocksdb/v5.18.X/port/win/env_win.h index 08c1534f34..d61ac3acd6 100644 --- a/3rdParty/rocksdb/v5.16.X/port/win/env_win.h +++ b/3rdParty/rocksdb/v5.18.X/port/win/env_win.h @@ -109,8 +109,8 @@ public: // The returned file will only be accessed by one thread at a time. virtual Status NewRandomRWFile(const std::string& fname, - unique_ptr* result, - const EnvOptions& options); + std::unique_ptr* result, + const EnvOptions& options); virtual Status NewMemoryMappedFileBuffer( const std::string& fname, @@ -145,7 +145,7 @@ public: const std::string& target); virtual Status NumFileLinks(const std::string& /*fname*/, - uint64_t* /*count*/); + uint64_t* /*count*/); virtual Status AreFilesSame(const std::string& first, const std::string& second, bool* res); @@ -271,8 +271,7 @@ public: Status LinkFile(const std::string& src, const std::string& target) override; - Status NumFileLinks(const std::string& fname, - uint64_t* count) override; + Status NumFileLinks(const std::string& fname, uint64_t* count) override; Status AreFilesSame(const std::string& first, const std::string& second, bool* res) override; diff --git a/3rdParty/rocksdb/v5.16.X/port/win/io_win.cc b/3rdParty/rocksdb/v5.18.X/port/win/io_win.cc similarity index 97% rename from 3rdParty/rocksdb/v5.16.X/port/win/io_win.cc rename to 3rdParty/rocksdb/v5.18.X/port/win/io_win.cc index 66fe8a11e6..128cb60b9f 100644 --- a/3rdParty/rocksdb/v5.16.X/port/win/io_win.cc +++ b/3rdParty/rocksdb/v5.18.X/port/win/io_win.cc @@ -260,7 +260,7 @@ Status WinMmapReadableFile::Read(uint64_t offset, size_t n, Slice* result, *result = Slice(); return IOError(filename_, EINVAL); } else if (offset + n > length_) { - n = length_ - offset; + n = length_ - static_cast(offset); } *result = Slice(reinterpret_cast(mapped_region_)+offset, n); @@ -317,7 +317,7 @@ Status WinMmapFile::MapNewRegion() { assert(mapped_begin_ == nullptr); - size_t minDiskSize = file_offset_ + view_size_; + size_t minDiskSize = static_cast(file_offset_) + view_size_; if (minDiskSize > reserved_size_) { status = Allocate(file_offset_, view_size_); @@ -579,7 +579,7 @@ Status WinMmapFile::Allocate(uint64_t offset, uint64_t len) { // Make sure that we reserve an aligned amount of space // since the reservation block size is driven outside so we want // to check if we are ok with reservation here - size_t spaceToReserve = Roundup(offset + len, view_size_); + size_t spaceToReserve = Roundup(static_cast(offset + len), view_size_); // Nothing to do if (spaceToReserve <= reserved_size_) { return status; @@ -656,14 +656,14 @@ Status WinSequentialFile::PositionedRead(uint64_t offset, size_t n, Slice* resul return Status::NotSupported("This function is only used for direct_io"); } - if (!IsSectorAligned(offset) || + if (!IsSectorAligned(static_cast(offset)) || !IsSectorAligned(n)) { return Status::InvalidArgument( "WinSequentialFile::PositionedRead: offset is not properly aligned"); } size_t bytes_read = 0; // out param - s = PositionedReadInternal(scratch, n, offset, bytes_read); + s = PositionedReadInternal(scratch, static_cast(n), offset, bytes_read); *result = Slice(scratch, bytes_read); return s; } @@ -721,7 +721,7 @@ Status WinRandomAccessImpl::ReadImpl(uint64_t offset, size_t n, Slice* result, // Check buffer alignment if (file_base_->use_direct_io()) { - if (!IsSectorAligned(offset) || + if (!IsSectorAligned(static_cast(offset)) || !IsAligned(alignment_, scratch)) { return Status::InvalidArgument( "WinRandomAccessImpl::ReadImpl: offset or scratch is not properly aligned"); @@ -818,7 +818,7 @@ Status WinWritableImpl::AppendImpl(const Slice& data) { // to the end of the file assert(IsSectorAligned(next_write_offset_)); if (!IsSectorAligned(data.size()) || - !IsAligned(GetAlignement(), data.data())) { + !IsAligned(static_cast(GetAlignement()), data.data())) { s = Status::InvalidArgument( "WriteData must be page aligned, size must be sector aligned"); } else { @@ -857,9 +857,9 @@ inline Status WinWritableImpl::PositionedAppendImpl(const Slice& data, uint64_t offset) { if(file_data_->use_direct_io()) { - if (!IsSectorAligned(offset) || + if (!IsSectorAligned(static_cast(offset)) || !IsSectorAligned(data.size()) || - !IsAligned(GetAlignement(), data.data())) { + !IsAligned(static_cast(GetAlignement()), data.data())) { return Status::InvalidArgument( "Data and offset must be page aligned, size must be sector aligned"); } @@ -944,7 +944,7 @@ Status WinWritableImpl::AllocateImpl(uint64_t offset, uint64_t len) { // Make sure that we reserve an aligned amount of space // since the reservation block size is driven outside so we want // to check if we are ok with reservation here - size_t spaceToReserve = Roundup(offset + len, alignment_); + size_t spaceToReserve = Roundup(static_cast(offset + len), static_cast(alignment_)); // Nothing to do if (spaceToReserve <= reservedsize_) { return status; @@ -977,7 +977,7 @@ WinWritableFile::~WinWritableFile() { bool WinWritableFile::use_direct_io() const { return WinFileData::use_direct_io(); } size_t WinWritableFile::GetRequiredBufferAlignment() const { - return GetAlignement(); + return static_cast(GetAlignement()); } Status WinWritableFile::Append(const Slice& data) { @@ -1037,7 +1037,7 @@ WinRandomRWFile::WinRandomRWFile(const std::string& fname, HANDLE hFile, bool WinRandomRWFile::use_direct_io() const { return WinFileData::use_direct_io(); } size_t WinRandomRWFile::GetRequiredBufferAlignment() const { - return GetAlignement(); + return static_cast(GetAlignement()); } Status WinRandomRWFile::Write(uint64_t offset, const Slice & data) { diff --git a/3rdParty/rocksdb/v5.16.X/port/win/io_win.h b/3rdParty/rocksdb/v5.18.X/port/win/io_win.h similarity index 99% rename from 3rdParty/rocksdb/v5.16.X/port/win/io_win.h rename to 3rdParty/rocksdb/v5.18.X/port/win/io_win.h index 3b08c394f4..c46876b8c0 100644 --- a/3rdParty/rocksdb/v5.16.X/port/win/io_win.h +++ b/3rdParty/rocksdb/v5.18.X/port/win/io_win.h @@ -58,7 +58,7 @@ class WinFileData { protected: const std::string filename_; HANDLE hFile_; - // If ture, the I/O issued would be direct I/O which the buffer + // If true, the I/O issued would be direct I/O which the buffer // will need to be aligned (not sure there is a guarantee that the buffer // passed in is aligned). const bool use_direct_io_; diff --git a/3rdParty/rocksdb/v5.16.X/port/win/port_win.cc b/3rdParty/rocksdb/v5.18.X/port/win/port_win.cc similarity index 99% rename from 3rdParty/rocksdb/v5.16.X/port/win/port_win.cc rename to 3rdParty/rocksdb/v5.18.X/port/win/port_win.cc index a6587ab01a..6ca5bba3b9 100644 --- a/3rdParty/rocksdb/v5.16.X/port/win/port_win.cc +++ b/3rdParty/rocksdb/v5.18.X/port/win/port_win.cc @@ -38,6 +38,7 @@ namespace rocksdb { namespace port { +#ifdef ROCKSDB_WINDOWS_UTF8_FILENAMES std::string utf16_to_utf8(const std::wstring& utf16) { std::wstring_convert,wchar_t> convert; return convert.to_bytes(utf16); @@ -47,6 +48,7 @@ std::wstring utf8_to_utf16(const std::string& utf8) { std::wstring_convert> converter; return converter.from_bytes(utf8); } +#endif void gettimeofday(struct timeval* tv, struct timezone* /* tz */) { using namespace std::chrono; diff --git a/3rdParty/rocksdb/v5.16.X/port/win/port_win.h b/3rdParty/rocksdb/v5.18.X/port/win/port_win.h similarity index 98% rename from 3rdParty/rocksdb/v5.16.X/port/win/port_win.h rename to 3rdParty/rocksdb/v5.18.X/port/win/port_win.h index 19ce244349..9b8ba9ff89 100644 --- a/3rdParty/rocksdb/v5.16.X/port/win/port_win.h +++ b/3rdParty/rocksdb/v5.18.X/port/win/port_win.h @@ -9,8 +9,7 @@ // // See port_example.h for documentation for the following types/functions. -#ifndef STORAGE_LEVELDB_PORT_PORT_WIN_H_ -#define STORAGE_LEVELDB_PORT_PORT_WIN_H_ +#pragma once // Always want minimum headers #ifndef WIN32_LEAN_AND_MEAN @@ -392,5 +391,3 @@ using port::pthread_getspecific; using port::truncate; } // namespace rocksdb - -#endif // STORAGE_LEVELDB_PORT_PORT_WIN_H_ diff --git a/3rdParty/rocksdb/v5.16.X/port/win/win_jemalloc.cc b/3rdParty/rocksdb/v5.18.X/port/win/win_jemalloc.cc similarity index 100% rename from 3rdParty/rocksdb/v5.16.X/port/win/win_jemalloc.cc rename to 3rdParty/rocksdb/v5.18.X/port/win/win_jemalloc.cc diff --git a/3rdParty/rocksdb/v5.16.X/port/win/win_logger.cc b/3rdParty/rocksdb/v5.18.X/port/win/win_logger.cc similarity index 100% rename from 3rdParty/rocksdb/v5.16.X/port/win/win_logger.cc rename to 3rdParty/rocksdb/v5.18.X/port/win/win_logger.cc diff --git a/3rdParty/rocksdb/v5.16.X/port/win/win_logger.h b/3rdParty/rocksdb/v5.18.X/port/win/win_logger.h similarity index 100% rename from 3rdParty/rocksdb/v5.16.X/port/win/win_logger.h rename to 3rdParty/rocksdb/v5.18.X/port/win/win_logger.h diff --git a/3rdParty/rocksdb/v5.16.X/port/win/win_thread.cc b/3rdParty/rocksdb/v5.18.X/port/win/win_thread.cc similarity index 98% rename from 3rdParty/rocksdb/v5.16.X/port/win/win_thread.cc rename to 3rdParty/rocksdb/v5.18.X/port/win/win_thread.cc index b48af2370f..9a976e2c6b 100644 --- a/3rdParty/rocksdb/v5.16.X/port/win/win_thread.cc +++ b/3rdParty/rocksdb/v5.18.X/port/win/win_thread.cc @@ -40,7 +40,7 @@ struct WindowsThread::Data { void WindowsThread::Init(std::function&& func) { data_ = std::make_shared(std::move(func)); - // We create another instance of shared_ptr to get an additional ref + // We create another instance of std::shared_ptr to get an additional ref // since we may detach and destroy this instance before the threadproc // may start to run. We choose to allocate this additional ref on the heap // so we do not need to synchronize and allow this thread to proceed diff --git a/3rdParty/rocksdb/v5.16.X/port/win/win_thread.h b/3rdParty/rocksdb/v5.18.X/port/win/win_thread.h similarity index 100% rename from 3rdParty/rocksdb/v5.16.X/port/win/win_thread.h rename to 3rdParty/rocksdb/v5.18.X/port/win/win_thread.h diff --git a/3rdParty/rocksdb/v5.16.X/port/win/xpress_win.cc b/3rdParty/rocksdb/v5.18.X/port/win/xpress_win.cc similarity index 100% rename from 3rdParty/rocksdb/v5.16.X/port/win/xpress_win.cc rename to 3rdParty/rocksdb/v5.18.X/port/win/xpress_win.cc diff --git a/3rdParty/rocksdb/v5.16.X/port/win/xpress_win.h b/3rdParty/rocksdb/v5.18.X/port/win/xpress_win.h similarity index 100% rename from 3rdParty/rocksdb/v5.16.X/port/win/xpress_win.h rename to 3rdParty/rocksdb/v5.18.X/port/win/xpress_win.h diff --git a/3rdParty/rocksdb/v5.16.X/port/xpress.h b/3rdParty/rocksdb/v5.18.X/port/xpress.h similarity index 100% rename from 3rdParty/rocksdb/v5.16.X/port/xpress.h rename to 3rdParty/rocksdb/v5.18.X/port/xpress.h diff --git a/3rdParty/rocksdb/v5.16.X/src.mk b/3rdParty/rocksdb/v5.18.X/src.mk similarity index 97% rename from 3rdParty/rocksdb/v5.16.X/src.mk rename to 3rdParty/rocksdb/v5.18.X/src.mk index 68aa8618c6..5ba7f4b7c0 100644 --- a/3rdParty/rocksdb/v5.16.X/src.mk +++ b/3rdParty/rocksdb/v5.18.X/src.mk @@ -11,6 +11,7 @@ LIB_SOURCES = \ db/compaction_iterator.cc \ db/compaction_job.cc \ db/compaction_picker.cc \ + db/compaction_picker_fifo.cc \ db/compaction_picker_universal.cc \ db/convenience.cc \ db/db_filesnapshot.cc \ @@ -43,6 +44,8 @@ LIB_SOURCES = \ db/merge_helper.cc \ db/merge_operator.cc \ db/range_del_aggregator.cc \ + db/range_del_aggregator_v2.cc \ + db/range_tombstone_fragmenter.cc \ db/repair.cc \ db/snapshot_impl.cc \ db/table_cache.cc \ @@ -120,6 +123,7 @@ LIB_SOURCES = \ table/plain_table_index.cc \ table/plain_table_key_coding.cc \ table/plain_table_reader.cc \ + table/sst_file_reader.cc \ table/sst_file_writer.cc \ table/table_properties.cc \ table/two_level_iterator.cc \ @@ -142,6 +146,7 @@ LIB_SOURCES = \ util/filename.cc \ util/filter_policy.cc \ util/hash.cc \ + util/jemalloc_nodump_allocator.cc \ util/log_buffer.cc \ util/murmurhash.cc \ util/random.cc \ @@ -161,6 +166,7 @@ LIB_SOURCES = \ utilities/blob_db/blob_compaction_filter.cc \ utilities/blob_db/blob_db.cc \ utilities/blob_db/blob_db_impl.cc \ + utilities/blob_db/blob_db_impl_filesnapshot.cc \ utilities/blob_db/blob_file.cc \ utilities/blob_db/blob_log_format.cc \ utilities/blob_db/blob_log_reader.cc \ @@ -327,6 +333,9 @@ MAIN_SOURCES = \ db/redis_test.cc \ db/repair_test.cc \ db/range_del_aggregator_test.cc \ + db/range_del_aggregator_bench.cc \ + db/range_del_aggregator_v2_test.cc \ + db/range_tombstone_fragmenter_test.cc \ db/table_properties_collector_test.cc \ db/util_merge_operators_test.cc \ db/version_builder_test.cc \ @@ -355,6 +364,7 @@ MAIN_SOURCES = \ table/data_block_hash_index_test.cc \ table/full_filter_block_test.cc \ table/merger_test.cc \ + table/sst_file_reader_test.cc \ table/table_reader_bench.cc \ table/table_test.cc \ third-party/gtest-1.7.0/fused-src/gtest/gtest-all.cc \ @@ -376,6 +386,7 @@ MAIN_SOURCES = \ util/filelock_test.cc \ util/log_write_bench.cc \ util/rate_limiter_test.cc \ + util/repeatable_thread_test.cc \ util/slice_transform_test.cc \ util/timer_queue_test.cc \ util/thread_list_test.cc \ @@ -434,6 +445,7 @@ JNI_NATIVE_SOURCES = \ java/rocksjni/loggerjnicallback.cc \ java/rocksjni/lru_cache.cc \ java/rocksjni/memtablejni.cc \ + java/rocksjni/memory_util.cc \ java/rocksjni/merge_operator.cc \ java/rocksjni/native_comparator_wrapper_test.cc \ java/rocksjni/optimistic_transaction_db.cc \ @@ -466,4 +478,5 @@ JNI_NATIVE_SOURCES = \ java/rocksjni/write_batch.cc \ java/rocksjni/writebatchhandlerjnicallback.cc \ java/rocksjni/write_batch_test.cc \ - java/rocksjni/write_batch_with_index.cc + java/rocksjni/write_batch_with_index.cc \ + java/rocksjni/write_buffer_manager.cc diff --git a/3rdParty/rocksdb/v5.16.X/table/adaptive_table_factory.cc b/3rdParty/rocksdb/v5.18.X/table/adaptive_table_factory.cc similarity index 97% rename from 3rdParty/rocksdb/v5.16.X/table/adaptive_table_factory.cc rename to 3rdParty/rocksdb/v5.18.X/table/adaptive_table_factory.cc index 0a3e9415ad..bbba3b9193 100644 --- a/3rdParty/rocksdb/v5.16.X/table/adaptive_table_factory.cc +++ b/3rdParty/rocksdb/v5.18.X/table/adaptive_table_factory.cc @@ -42,8 +42,8 @@ extern const uint64_t kCuckooTableMagicNumber; Status AdaptiveTableFactory::NewTableReader( const TableReaderOptions& table_reader_options, - unique_ptr&& file, uint64_t file_size, - unique_ptr* table, + std::unique_ptr&& file, uint64_t file_size, + std::unique_ptr* table, bool /*prefetch_index_and_filter_in_cache*/) const { Footer footer; auto s = ReadFooterFromFile(file.get(), nullptr /* prefetch_buffer */, diff --git a/3rdParty/rocksdb/v5.16.X/table/adaptive_table_factory.h b/3rdParty/rocksdb/v5.18.X/table/adaptive_table_factory.h similarity index 93% rename from 3rdParty/rocksdb/v5.16.X/table/adaptive_table_factory.h rename to 3rdParty/rocksdb/v5.18.X/table/adaptive_table_factory.h index 00af6a76e9..2a82dbfa98 100644 --- a/3rdParty/rocksdb/v5.16.X/table/adaptive_table_factory.h +++ b/3rdParty/rocksdb/v5.18.X/table/adaptive_table_factory.h @@ -35,8 +35,8 @@ class AdaptiveTableFactory : public TableFactory { Status NewTableReader( const TableReaderOptions& table_reader_options, - unique_ptr&& file, uint64_t file_size, - unique_ptr* table, + std::unique_ptr&& file, uint64_t file_size, + std::unique_ptr* table, bool prefetch_index_and_filter_in_cache = true) const override; TableBuilder* NewTableBuilder( diff --git a/3rdParty/rocksdb/v5.16.X/table/block.cc b/3rdParty/rocksdb/v5.18.X/table/block.cc similarity index 87% rename from 3rdParty/rocksdb/v5.16.X/table/block.cc rename to 3rdParty/rocksdb/v5.18.X/table/block.cc index f26416e1b7..4e8d6e5ca5 100644 --- a/3rdParty/rocksdb/v5.16.X/table/block.cc +++ b/3rdParty/rocksdb/v5.18.X/table/block.cc @@ -235,7 +235,7 @@ void DataBlockIter::Seek(const Slice& target) { // // If the return value is FALSE, iter location is undefined, and it means: // 1) there is no key in this block falling into the range: -// ["seek_user_key @ type | seqno", "seek_user_key @ type | 0"], +// ["seek_user_key @ type | seqno", "seek_user_key @ kTypeDeletion | 0"], // inclusive; AND // 2) the last key of this block has a greater user_key from seek_user_key // @@ -243,13 +243,21 @@ void DataBlockIter::Seek(const Slice& target) { // 1) If iter is valid, it is set to a location as if set by BinarySeek. In // this case, it points to the first key_ with a larger user_key or a // matching user_key with a seqno no greater than the seeking seqno. -// 2) If the iter is invalid, it means either the block has no such user_key, -// or the block ends with a matching user_key but with a larger seqno. +// 2) If the iter is invalid, it means that either all the user_key is less +// than the seek_user_key, or the block ends with a matching user_key but +// with a smaller [ type | seqno ] (i.e. a larger seqno, or the same seqno +// but larger type). bool DataBlockIter::SeekForGetImpl(const Slice& target) { Slice user_key = ExtractUserKey(target); uint32_t map_offset = restarts_ + num_restarts_ * sizeof(uint32_t); uint8_t entry = data_block_hash_index_->Lookup(data_, map_offset, user_key); + if (entry == kCollision) { + // HashSeek not effective, falling back + Seek(target); + return true; + } + if (entry == kNoEntry) { // Even if we cannot find the user_key in this block, the result may // exist in the next block. Consider this exmpale: @@ -260,16 +268,13 @@ bool DataBlockIter::SeekForGetImpl(const Slice& target) { // // If seek_key = axy@60, the search will starts from Block N. // Even if the user_key is not found in the hash map, the caller still - // have to conntinue searching the next block. So we invalidate the - // iterator to tell the caller to go on. - current_ = restarts_; // Invalidate the iter - return true; - } - - if (entry == kCollision) { - // HashSeek not effective, falling back - Seek(target); - return true; + // have to conntinue searching the next block. + // + // In this case, we pretend the key is the the last restart interval. + // The while-loop below will search the last restart interval for the + // key. It will stop at the first key that is larger than the seek_key, + // or to the end of the block if no one is larger. + entry = static_cast(num_restarts_ - 1); } uint32_t restart_index = entry; @@ -299,24 +304,26 @@ bool DataBlockIter::SeekForGetImpl(const Slice& target) { } if (current_ == restarts_) { - // Search reaches to the end of the block. There are two possibilites; + // Search reaches to the end of the block. There are three possibilites: // 1) there is only one user_key match in the block (otherwise collsion). - // the matching user_key resides in the last restart interval. - // it is the last key of the restart interval and of the block too. - // ParseNextDataKey() skiped it as its seqno is newer. + // the matching user_key resides in the last restart interval, and it + // is the last key of the restart interval and of the block as well. + // ParseNextDataKey() skiped it as its [ type | seqno ] is smaller. // - // 2) The seek_key is a false positive and got hashed to the last restart - // interval. - // All existing keys in the restart interval are less than seek_key. + // 2) The seek_key is not found in the HashIndex Lookup(), i.e. kNoEntry, + // AND all existing user_keys in the restart interval are smaller than + // seek_user_key. // - // The result may exist in the next block in either case, so may_exist is - // returned as true. + // 3) The seek_key is a false positive and happens to be hashed to the + // last restart interval, AND all existing user_keys in the restart + // interval are smaller than seek_user_key. + // + // The result may exist in the next block each case, so we return true. return true; } if (user_comparator_->Compare(key_.GetUserKey(), user_key) != 0) { // the key is not in this block and cannot be at the next block either. - // return false to tell the caller to break from the top-level for-loop return false; } @@ -349,10 +356,10 @@ void IndexBlockIter::Seek(const Slice& target) { ok = PrefixSeek(target, &index); } else if (value_delta_encoded_) { ok = BinarySeek(seek_key, 0, num_restarts_ - 1, &index, - active_comparator_); + comparator_); } else { ok = BinarySeek(seek_key, 0, num_restarts_ - 1, &index, - active_comparator_); + comparator_); } if (!ok) { @@ -725,13 +732,16 @@ uint32_t Block::NumRestarts() const { uint32_t block_footer = DecodeFixed32(data_ + size_ - sizeof(uint32_t)); uint32_t num_restarts = block_footer; if (size_ > kMaxBlockSizeSupportedByHashIndex) { - // We ensure a block with HashIndex is less than 64KiB in BlockBuilder. - // Therefore the footer cannot be encoded as a packed index type and + // In BlockBuilder, we have ensured a block with HashIndex is less than + // kMaxBlockSizeSupportedByHashIndex (64KiB). + // + // Therefore, if we encounter a block with a size > 64KiB, the block + // cannot have HashIndex. So the footer will directly interpreted as // num_restarts. - // Such check can ensure legacy block with a vary large num_restarts - // i.e. >= 0x10000000 can be interpreted correctly as no HashIndex. - // If a legacy block hash a num_restarts >= 0x10000000, size_ will be - // much large than 64KiB. + // + // Such check is for backward compatibility. We can ensure legacy block + // with a vary large num_restarts i.e. >= 0x80000000 can be interpreted + // correctly as no HashIndex even if the MSB of num_restarts is set. return num_restarts; } BlockBasedTableOptions::DataBlockIndexType index_type; @@ -752,7 +762,11 @@ BlockBasedTableOptions::DataBlockIndexType Block::IndexType() const { return index_type; } -Block::~Block() { TEST_SYNC_POINT("Block::~Block"); } +Block::~Block() { + // This sync point can be re-enabled if RocksDB can control the + // initialization order of any/all static options created by the user. + // TEST_SYNC_POINT("Block::~Block"); +} Block::Block(BlockContents&& contents, SequenceNumber _global_seqno, size_t read_amp_bytes_per_bit, Statistics* statistics) @@ -767,47 +781,45 @@ Block::Block(BlockContents&& contents, SequenceNumber _global_seqno, size_ = 0; // Error marker } else { // Should only decode restart points for uncompressed blocks - if (compression_type() == kNoCompression) { - num_restarts_ = NumRestarts(); - switch (IndexType()) { - case BlockBasedTableOptions::kDataBlockBinarySearch: - restart_offset_ = static_cast(size_) - - (1 + num_restarts_) * sizeof(uint32_t); - if (restart_offset_ > size_ - sizeof(uint32_t)) { - // The size is too small for NumRestarts() and therefore - // restart_offset_ wrapped around. - size_ = 0; - } + num_restarts_ = NumRestarts(); + switch (IndexType()) { + case BlockBasedTableOptions::kDataBlockBinarySearch: + restart_offset_ = static_cast(size_) - + (1 + num_restarts_) * sizeof(uint32_t); + if (restart_offset_ > size_ - sizeof(uint32_t)) { + // The size is too small for NumRestarts() and therefore + // restart_offset_ wrapped around. + size_ = 0; + } + break; + case BlockBasedTableOptions::kDataBlockBinaryAndHash: + if (size_ < sizeof(uint32_t) /* block footer */ + + sizeof(uint16_t) /* NUM_BUCK */) { + size_ = 0; break; - case BlockBasedTableOptions::kDataBlockBinaryAndHash: - if (size_ < sizeof(uint32_t) /* block footer */ + - sizeof(uint16_t) /* NUM_BUCK */) { - size_ = 0; - break; - } + } - uint16_t map_offset; - data_block_hash_index_.Initialize( - contents.data.data(), - static_cast(contents.data.size() - - sizeof(uint32_t)), /*chop off - NUM_RESTARTS*/ - &map_offset); + uint16_t map_offset; + data_block_hash_index_.Initialize( + contents.data.data(), + static_cast(contents.data.size() - + sizeof(uint32_t)), /*chop off + NUM_RESTARTS*/ + &map_offset); - restart_offset_ = map_offset - num_restarts_ * sizeof(uint32_t); + restart_offset_ = map_offset - num_restarts_ * sizeof(uint32_t); - if (restart_offset_ > map_offset) { - // map_offset is too small for NumRestarts() and - // therefore restart_offset_ wrapped around. - size_ = 0; - break; - } + if (restart_offset_ > map_offset) { + // map_offset is too small for NumRestarts() and + // therefore restart_offset_ wrapped around. + size_ = 0; break; - default: - size_ = 0; // Error marker - } + } + break; + default: + size_ = 0; // Error marker + } } - } if (read_amp_bytes_per_bit != 0 && statistics && size_ != 0) { read_amp_bitmap_.reset(new BlockReadAmpBitmap( restart_offset_, read_amp_bytes_per_bit, statistics)); @@ -820,6 +832,7 @@ DataBlockIter* Block::NewIterator(const Comparator* cmp, const Comparator* ucmp, bool /*total_order_seek*/, bool /*key_includes_seq*/, bool /*value_is_full*/, + bool block_contents_pinned, BlockPrefixIndex* /*prefix_index*/) { DataBlockIter* ret_iter; if (iter != nullptr) { @@ -838,7 +851,7 @@ DataBlockIter* Block::NewIterator(const Comparator* cmp, const Comparator* ucmp, } else { ret_iter->Initialize( cmp, ucmp, data_, restart_offset_, num_restarts_, global_seqno_, - read_amp_bitmap_.get(), cachable(), + read_amp_bitmap_.get(), block_contents_pinned, data_block_hash_index_.Valid() ? &data_block_hash_index_ : nullptr); if (read_amp_bitmap_) { if (read_amp_bitmap_->GetStatistics() != stats) { @@ -856,6 +869,7 @@ IndexBlockIter* Block::NewIterator(const Comparator* cmp, const Comparator* ucmp, IndexBlockIter* iter, Statistics* /*stats*/, bool total_order_seek, bool key_includes_seq, bool value_is_full, + bool block_contents_pinned, BlockPrefixIndex* prefix_index) { IndexBlockIter* ret_iter; if (iter != nullptr) { @@ -876,7 +890,8 @@ IndexBlockIter* Block::NewIterator(const Comparator* cmp, total_order_seek ? nullptr : prefix_index; ret_iter->Initialize(cmp, ucmp, data_, restart_offset_, num_restarts_, prefix_index_ptr, key_includes_seq, value_is_full, - cachable(), nullptr /* data_block_hash_index */); + block_contents_pinned, + nullptr /* data_block_hash_index */); } return ret_iter; diff --git a/3rdParty/rocksdb/v5.16.X/table/block.h b/3rdParty/rocksdb/v5.18.X/table/block.h similarity index 93% rename from 3rdParty/rocksdb/v5.16.X/table/block.h rename to 3rdParty/rocksdb/v5.18.X/table/block.h index ee3f07b43b..1a8073203b 100644 --- a/3rdParty/rocksdb/v5.16.X/table/block.h +++ b/3rdParty/rocksdb/v5.18.X/table/block.h @@ -153,14 +153,12 @@ class Block { size_t size() const { return size_; } const char* data() const { return data_; } - bool cachable() const { return contents_.cachable; } // The additional memory space taken by the block data. size_t usable_size() const { return contents_.usable_size(); } uint32_t NumRestarts() const; + bool own_bytes() const { return contents_.own_bytes(); } + BlockBasedTableOptions::DataBlockIndexType IndexType() const; - CompressionType compression_type() const { - return contents_.compression_type; - } // If comparator is InternalKeyComparator, user_comparator is its user // comparator; they are equal otherwise. @@ -170,7 +168,7 @@ class Block { // // key_includes_seq, default true, means that the keys are in internal key // format. - // value_is_full, default ture, means that no delta encoding is + // value_is_full, default true, means that no delta encoding is // applied to values. // // NewIterator @@ -180,6 +178,14 @@ class Block { // If `prefix_index` is not nullptr this block will do hash lookup for the key // prefix. If total_order_seek is true, prefix_index_ is ignored. // + // If `block_contents_pinned` is true, the caller will guarantee that when + // the cleanup functions are transferred from the iterator to other + // classes, e.g. PinnableSlice, the pointer to the bytes will still be + // valid. Either the iterator holds cache handle or ownership of some resource + // and release them in a release function, or caller is sure that the data + // will not go away (for example, it's from mmapped file which will not be + // closed). + // // NOTE: for the hash based lookup, if a key prefix doesn't match any key, // the iterator will simply be set as "invalid", rather than returning // the key that is just pass the target key. @@ -188,7 +194,8 @@ class Block { const Comparator* comparator, const Comparator* user_comparator, TBlockIter* iter = nullptr, Statistics* stats = nullptr, bool total_order_seek = true, bool key_includes_seq = true, - bool value_is_full = true, BlockPrefixIndex* prefix_index = nullptr); + bool value_is_full = true, bool block_contents_pinned = false, + BlockPrefixIndex* prefix_index = nullptr); // Report an approximation of how much memory has been used. size_t ApproximateMemoryUsage() const; @@ -295,7 +302,9 @@ class BlockIter : public InternalIteratorBase { Slice value_; Status status_; bool key_pinned_; - // whether the block data is guaranteed to outlive this iterator + // Whether the block data is guaranteed to outlive this iterator, and + // as long as the cleanup functions are transferred to another class, + // e.g. PinnableSlice, the pointer to the bytes will still be valid. bool block_contents_pinned_; SequenceNumber global_seqno_; @@ -449,7 +458,7 @@ class IndexBlockIter final : public BlockIter { } // key_includes_seq, default true, means that the keys are in internal key // format. - // value_is_full, default ture, means that no delta encoding is + // value_is_full, default true, means that no delta encoding is // applied to values. IndexBlockIter(const Comparator* comparator, const Comparator* user_comparator, const char* data, @@ -468,10 +477,10 @@ class IndexBlockIter final : public BlockIter { BlockPrefixIndex* prefix_index, bool key_includes_seq, bool value_is_full, bool block_contents_pinned, DataBlockHashIndex* /*data_block_hash_index*/) { - InitializeBase(comparator, data, restarts, num_restarts, - kDisableGlobalSequenceNumber, block_contents_pinned); + InitializeBase(key_includes_seq ? comparator : user_comparator, data, + restarts, num_restarts, kDisableGlobalSequenceNumber, + block_contents_pinned); key_includes_seq_ = key_includes_seq; - active_comparator_ = key_includes_seq_ ? comparator_ : user_comparator; key_.SetIsUserKey(!key_includes_seq_); prefix_index_ = prefix_index; value_delta_encoded_ = !value_is_full; @@ -517,8 +526,6 @@ class IndexBlockIter final : public BlockIter { // Key is in InternalKey format bool key_includes_seq_; bool value_delta_encoded_; - // key_includes_seq_ ? comparator_ : user_comparator_ - const Comparator* active_comparator_; BlockPrefixIndex* prefix_index_; // Whether the value is delta encoded. In that case the value is assumed to be // BlockHandle. The first value in each restart interval is the full encoded @@ -535,11 +542,11 @@ class IndexBlockIter final : public BlockIter { inline int CompareBlockKey(uint32_t block_index, const Slice& target); inline int Compare(const Slice& a, const Slice& b) const { - return active_comparator_->Compare(a, b); + return comparator_->Compare(a, b); } inline int Compare(const IterKey& ikey, const Slice& b) const { - return active_comparator_->Compare(ikey.GetKey(), b); + return comparator_->Compare(ikey.GetKey(), b); } inline bool ParseNextIndexKey(); diff --git a/3rdParty/rocksdb/v5.16.X/table/block_based_filter_block.cc b/3rdParty/rocksdb/v5.18.X/table/block_based_filter_block.cc similarity index 100% rename from 3rdParty/rocksdb/v5.16.X/table/block_based_filter_block.cc rename to 3rdParty/rocksdb/v5.18.X/table/block_based_filter_block.cc diff --git a/3rdParty/rocksdb/v5.16.X/table/block_based_filter_block.h b/3rdParty/rocksdb/v5.18.X/table/block_based_filter_block.h similarity index 100% rename from 3rdParty/rocksdb/v5.16.X/table/block_based_filter_block.h rename to 3rdParty/rocksdb/v5.18.X/table/block_based_filter_block.h diff --git a/3rdParty/rocksdb/v5.16.X/table/block_based_filter_block_test.cc b/3rdParty/rocksdb/v5.18.X/table/block_based_filter_block_test.cc similarity index 95% rename from 3rdParty/rocksdb/v5.16.X/table/block_based_filter_block_test.cc rename to 3rdParty/rocksdb/v5.18.X/table/block_based_filter_block_test.cc index 8de857f4ef..3cba09847a 100644 --- a/3rdParty/rocksdb/v5.16.X/table/block_based_filter_block_test.cc +++ b/3rdParty/rocksdb/v5.18.X/table/block_based_filter_block_test.cc @@ -55,7 +55,7 @@ class FilterBlockTest : public testing::Test { TEST_F(FilterBlockTest, EmptyBuilder) { BlockBasedFilterBlockBuilder builder(nullptr, table_options_); - BlockContents block(builder.Finish(), false, kNoCompression); + BlockContents block(builder.Finish()); ASSERT_EQ("\\x00\\x00\\x00\\x00\\x0b", EscapeString(block.data)); BlockBasedFilterBlockReader reader(nullptr, table_options_, true, std::move(block), nullptr); @@ -75,7 +75,7 @@ TEST_F(FilterBlockTest, SingleChunk) { builder.StartBlock(300); builder.Add("hello"); ASSERT_EQ(5, builder.NumAdded()); - BlockContents block(builder.Finish(), false, kNoCompression); + BlockContents block(builder.Finish()); BlockBasedFilterBlockReader reader(nullptr, table_options_, true, std::move(block), nullptr); ASSERT_TRUE(reader.KeyMayMatch("foo", nullptr, 100)); @@ -107,7 +107,7 @@ TEST_F(FilterBlockTest, MultiChunk) { builder.Add("box"); builder.Add("hello"); - BlockContents block(builder.Finish(), false, kNoCompression); + BlockContents block(builder.Finish()); BlockBasedFilterBlockReader reader(nullptr, table_options_, true, std::move(block), nullptr); @@ -152,7 +152,7 @@ class BlockBasedFilterBlockTest : public testing::Test { TEST_F(BlockBasedFilterBlockTest, BlockBasedEmptyBuilder) { FilterBlockBuilder* builder = new BlockBasedFilterBlockBuilder( nullptr, table_options_); - BlockContents block(builder->Finish(), false, kNoCompression); + BlockContents block(builder->Finish()); ASSERT_EQ("\\x00\\x00\\x00\\x00\\x0b", EscapeString(block.data)); FilterBlockReader* reader = new BlockBasedFilterBlockReader( nullptr, table_options_, true, std::move(block), nullptr); @@ -174,7 +174,7 @@ TEST_F(BlockBasedFilterBlockTest, BlockBasedSingleChunk) { builder->Add("box"); builder->StartBlock(300); builder->Add("hello"); - BlockContents block(builder->Finish(), false, kNoCompression); + BlockContents block(builder->Finish()); FilterBlockReader* reader = new BlockBasedFilterBlockReader( nullptr, table_options_, true, std::move(block), nullptr); ASSERT_TRUE(reader->KeyMayMatch("foo", nullptr, 100)); @@ -210,7 +210,7 @@ TEST_F(BlockBasedFilterBlockTest, BlockBasedMultiChunk) { builder->Add("box"); builder->Add("hello"); - BlockContents block(builder->Finish(), false, kNoCompression); + BlockContents block(builder->Finish()); FilterBlockReader* reader = new BlockBasedFilterBlockReader( nullptr, table_options_, true, std::move(block), nullptr); diff --git a/3rdParty/rocksdb/v5.16.X/table/block_based_table_builder.cc b/3rdParty/rocksdb/v5.18.X/table/block_based_table_builder.cc similarity index 96% rename from 3rdParty/rocksdb/v5.16.X/table/block_based_table_builder.cc rename to 3rdParty/rocksdb/v5.18.X/table/block_based_table_builder.cc index 59c385d65a..a4007b07a2 100644 --- a/3rdParty/rocksdb/v5.16.X/table/block_based_table_builder.cc +++ b/3rdParty/rocksdb/v5.18.X/table/block_based_table_builder.cc @@ -42,6 +42,7 @@ #include "util/coding.h" #include "util/compression.h" #include "util/crc32c.h" +#include "util/memory_allocator.h" #include "util/stop_watch.h" #include "util/string_util.h" #include "util/xxhash.h" @@ -449,6 +450,11 @@ void BlockBasedTableBuilder::Add(const Slice& key, const Slice& value) { r->props.num_entries++; r->props.raw_key_size += key.size(); r->props.raw_value_size += value.size(); + if (value_type == kTypeDeletion || value_type == kTypeSingleDeletion) { + r->props.num_deletions++; + } else if (value_type == kTypeMerge) { + r->props.num_merge_operands++; + } r->index_builder->OnKeyAdded(key); NotifyCollectTableCollectorsOnAdd(key, value, r->offset, @@ -609,6 +615,18 @@ void BlockBasedTableBuilder::WriteRawBlock(const Slice& block_contents, EncodeFixed32(trailer_without_type, XXH32_digest(xxh)); break; } + case kxxHash64: { + XXH64_state_t* const state = XXH64_createState(); + XXH64_reset(state, 0); + XXH64_update(state, block_contents.data(), + static_cast(block_contents.size())); + XXH64_update(state, trailer, 1); // Extend to cover block type + EncodeFixed32(trailer_without_type, + static_cast(XXH64_digest(state) & // lower 32 bits + uint64_t{0xffffffff})); + XXH64_freeState(state); + break; + } } assert(r->status.ok()); @@ -636,9 +654,9 @@ Status BlockBasedTableBuilder::status() const { return rep_->status; } -static void DeleteCachedBlock(const Slice& /*key*/, void* value) { - Block* block = reinterpret_cast(value); - delete block; +static void DeleteCachedBlockContents(const Slice& /*key*/, void* value) { + BlockContents* bc = reinterpret_cast(value); + delete bc; } // @@ -654,13 +672,16 @@ Status BlockBasedTableBuilder::InsertBlockInCache(const Slice& block_contents, size_t size = block_contents.size(); - std::unique_ptr ubuf(new char[size + 1]); + auto ubuf = + AllocateBlock(size + 1, block_cache_compressed->memory_allocator()); memcpy(ubuf.get(), block_contents.data(), size); ubuf[size] = type; - BlockContents results(std::move(ubuf), size, true, type); - - Block* block = new Block(std::move(results), kDisableGlobalSequenceNumber); + BlockContents* block_contents_to_cache = + new BlockContents(std::move(ubuf), size); +#ifndef NDEBUG + block_contents_to_cache->is_raw_block = true; +#endif // NDEBUG // make cache key by appending the file offset to the cache prefix id char* end = EncodeVarint64( @@ -671,8 +692,10 @@ Status BlockBasedTableBuilder::InsertBlockInCache(const Slice& block_contents, (end - r->compressed_cache_key_prefix)); // Insert into compressed block cache. - block_cache_compressed->Insert(key, block, block->ApproximateMemoryUsage(), - &DeleteCachedBlock); + block_cache_compressed->Insert( + key, block_contents_to_cache, + block_contents_to_cache->ApproximateMemoryUsage(), + &DeleteCachedBlockContents); // Invalidate OS cache. r->file->InvalidateCache(static_cast(r->offset), size); diff --git a/3rdParty/rocksdb/v5.16.X/table/block_based_table_builder.h b/3rdParty/rocksdb/v5.18.X/table/block_based_table_builder.h similarity index 100% rename from 3rdParty/rocksdb/v5.16.X/table/block_based_table_builder.h rename to 3rdParty/rocksdb/v5.18.X/table/block_based_table_builder.h diff --git a/3rdParty/rocksdb/v5.16.X/table/block_based_table_factory.cc b/3rdParty/rocksdb/v5.18.X/table/block_based_table_factory.cc similarity index 99% rename from 3rdParty/rocksdb/v5.16.X/table/block_based_table_factory.cc rename to 3rdParty/rocksdb/v5.18.X/table/block_based_table_factory.cc index 485aed8704..fbb7406a3d 100644 --- a/3rdParty/rocksdb/v5.16.X/table/block_based_table_factory.cc +++ b/3rdParty/rocksdb/v5.18.X/table/block_based_table_factory.cc @@ -194,8 +194,8 @@ BlockBasedTableFactory::BlockBasedTableFactory( Status BlockBasedTableFactory::NewTableReader( const TableReaderOptions& table_reader_options, - unique_ptr&& file, uint64_t file_size, - unique_ptr* table_reader, + std::unique_ptr&& file, uint64_t file_size, + std::unique_ptr* table_reader, bool prefetch_index_and_filter_in_cache) const { return BlockBasedTable::Open( table_reader_options.ioptions, table_reader_options.env_options, diff --git a/3rdParty/rocksdb/v5.16.X/table/block_based_table_factory.h b/3rdParty/rocksdb/v5.18.X/table/block_based_table_factory.h similarity index 98% rename from 3rdParty/rocksdb/v5.16.X/table/block_based_table_factory.h rename to 3rdParty/rocksdb/v5.18.X/table/block_based_table_factory.h index b30bd6232a..cde6f65357 100644 --- a/3rdParty/rocksdb/v5.16.X/table/block_based_table_factory.h +++ b/3rdParty/rocksdb/v5.18.X/table/block_based_table_factory.h @@ -53,8 +53,8 @@ class BlockBasedTableFactory : public TableFactory { Status NewTableReader( const TableReaderOptions& table_reader_options, - unique_ptr&& file, uint64_t file_size, - unique_ptr* table_reader, + std::unique_ptr&& file, uint64_t file_size, + std::unique_ptr* table_reader, bool prefetch_index_and_filter_in_cache = true) const override; TableBuilder* NewTableBuilder( diff --git a/3rdParty/rocksdb/v5.16.X/table/block_based_table_reader.cc b/3rdParty/rocksdb/v5.18.X/table/block_based_table_reader.cc similarity index 89% rename from 3rdParty/rocksdb/v5.16.X/table/block_based_table_reader.cc rename to 3rdParty/rocksdb/v5.18.X/table/block_based_table_reader.cc index f240b84b10..a126de88c0 100644 --- a/3rdParty/rocksdb/v5.16.X/table/block_based_table_reader.cc +++ b/3rdParty/rocksdb/v5.18.X/table/block_based_table_reader.cc @@ -78,13 +78,14 @@ Status ReadBlockFromFile( RandomAccessFileReader* file, FilePrefetchBuffer* prefetch_buffer, const Footer& footer, const ReadOptions& options, const BlockHandle& handle, std::unique_ptr* result, const ImmutableCFOptions& ioptions, - bool do_uncompress, const Slice& compression_dict, + bool do_uncompress, bool maybe_compressed, const Slice& compression_dict, const PersistentCacheOptions& cache_options, SequenceNumber global_seqno, - size_t read_amp_bytes_per_bit, const bool immortal_file = false) { + size_t read_amp_bytes_per_bit, MemoryAllocator* memory_allocator) { BlockContents contents; BlockFetcher block_fetcher(file, prefetch_buffer, footer, options, handle, &contents, ioptions, do_uncompress, - compression_dict, cache_options, immortal_file); + maybe_compressed, compression_dict, cache_options, + memory_allocator); Status s = block_fetcher.ReadBlockContents(); if (s.ok()) { result->reset(new Block(std::move(contents), global_seqno, @@ -94,6 +95,20 @@ Status ReadBlockFromFile( return s; } +inline MemoryAllocator* GetMemoryAllocator( + const BlockBasedTableOptions& table_options) { + return table_options.block_cache.get() + ? table_options.block_cache->memory_allocator() + : nullptr; +} + +inline MemoryAllocator* GetMemoryAllocatorForCompressedBlock( + const BlockBasedTableOptions& table_options) { + return table_options.block_cache_compressed.get() + ? table_options.block_cache_compressed->memory_allocator() + : nullptr; +} + // Delete the resource that is held by the iterator. template void DeleteHeldResource(void* arg, void* /*ignored*/) { @@ -215,13 +230,15 @@ class PartitionIndexReader : public IndexReader, public Cleanable { IndexReader** index_reader, const PersistentCacheOptions& cache_options, const int level, const bool index_key_includes_seq, - const bool index_value_is_full) { + const bool index_value_is_full, + MemoryAllocator* memory_allocator) { std::unique_ptr index_block; auto s = ReadBlockFromFile( file, prefetch_buffer, footer, ReadOptions(), index_handle, &index_block, ioptions, true /* decompress */, - Slice() /*compression dict*/, cache_options, - kDisableGlobalSequenceNumber, 0 /* read_amp_bytes_per_bit */); + true /*maybe_compressed*/, Slice() /*compression dict*/, cache_options, + kDisableGlobalSequenceNumber, 0 /* read_amp_bytes_per_bit */, + memory_allocator); if (s.ok()) { *index_reader = new PartitionIndexReader( @@ -239,6 +256,8 @@ class PartitionIndexReader : public IndexReader, public Cleanable { Statistics* kNullStats = nullptr; // Filters are already checked before seeking the index if (!partition_map_.empty()) { + // We don't return pinned datat from index blocks, so no need + // to set `block_contents_pinned`. return NewTwoLevelIterator( new BlockBasedTable::PartitionedIndexIteratorState( table_, &partition_map_, index_key_includes_seq_, @@ -250,6 +269,8 @@ class PartitionIndexReader : public IndexReader, public Cleanable { auto ro = ReadOptions(); ro.fill_cache = fill_cache; bool kIsIndex = true; + // We don't return pinned datat from index blocks, so no need + // to set `block_contents_pinned`. return new BlockBasedTableIterator( table_, ro, *icomparator_, index_block_->NewIterator( @@ -270,6 +291,8 @@ class PartitionIndexReader : public IndexReader, public Cleanable { IndexBlockIter biter; BlockHandle handle; Statistics* kNullStats = nullptr; + // We don't return pinned datat from index blocks, so no need + // to set `block_contents_pinned`. index_block_->NewIterator( icomparator_, icomparator_->user_comparator(), &biter, kNullStats, true, index_key_includes_seq_, index_value_is_full_); @@ -312,7 +335,7 @@ class PartitionIndexReader : public IndexReader, public Cleanable { const bool is_index = true; // TODO: Support counter batch update for partitioned index and // filter blocks - s = table_->MaybeLoadDataBlockToCache( + s = table_->MaybeReadBlockAndLoadToCache( prefetch_buffer.get(), rep, ro, handle, compression_dict, &block, is_index, nullptr /* get_context */); @@ -388,13 +411,15 @@ class BinarySearchIndexReader : public IndexReader { IndexReader** index_reader, const PersistentCacheOptions& cache_options, const bool index_key_includes_seq, - const bool index_value_is_full) { + const bool index_value_is_full, + MemoryAllocator* memory_allocator) { std::unique_ptr index_block; auto s = ReadBlockFromFile( file, prefetch_buffer, footer, ReadOptions(), index_handle, &index_block, ioptions, true /* decompress */, - Slice() /*compression dict*/, cache_options, - kDisableGlobalSequenceNumber, 0 /* read_amp_bytes_per_bit */); + true /*maybe_compressed*/, Slice() /*compression dict*/, cache_options, + kDisableGlobalSequenceNumber, 0 /* read_amp_bytes_per_bit */, + memory_allocator); if (s.ok()) { *index_reader = new BinarySearchIndexReader( @@ -409,6 +434,8 @@ class BinarySearchIndexReader : public IndexReader { IndexBlockIter* iter = nullptr, bool /*dont_care*/ = true, bool /*dont_care*/ = true) override { Statistics* kNullStats = nullptr; + // We don't return pinned datat from index blocks, so no need + // to set `block_contents_pinned`. return index_block_->NewIterator( icomparator_, icomparator_->user_comparator(), iter, kNullStats, true, index_key_includes_seq_, index_value_is_full_); @@ -458,13 +485,15 @@ class HashIndexReader : public IndexReader { InternalIterator* meta_index_iter, IndexReader** index_reader, bool /*hash_index_allow_collision*/, const PersistentCacheOptions& cache_options, - const bool index_key_includes_seq, const bool index_value_is_full) { + const bool index_key_includes_seq, const bool index_value_is_full, + MemoryAllocator* memory_allocator) { std::unique_ptr index_block; auto s = ReadBlockFromFile( file, prefetch_buffer, footer, ReadOptions(), index_handle, &index_block, ioptions, true /* decompress */, - Slice() /*compression dict*/, cache_options, - kDisableGlobalSequenceNumber, 0 /* read_amp_bytes_per_bit */); + true /*maybe_compressed*/, Slice() /*compression dict*/, cache_options, + kDisableGlobalSequenceNumber, 0 /* read_amp_bytes_per_bit */, + memory_allocator); if (!s.ok()) { return s; @@ -502,8 +531,9 @@ class HashIndexReader : public IndexReader { BlockContents prefixes_contents; BlockFetcher prefixes_block_fetcher( file, prefetch_buffer, footer, ReadOptions(), prefixes_handle, - &prefixes_contents, ioptions, true /* decompress */, - dummy_comp_dict /*compression dict*/, cache_options); + &prefixes_contents, ioptions, true /*decompress*/, + true /*maybe_compressed*/, dummy_comp_dict /*compression dict*/, + cache_options, memory_allocator); s = prefixes_block_fetcher.ReadBlockContents(); if (!s.ok()) { return s; @@ -511,8 +541,9 @@ class HashIndexReader : public IndexReader { BlockContents prefixes_meta_contents; BlockFetcher prefixes_meta_block_fetcher( file, prefetch_buffer, footer, ReadOptions(), prefixes_meta_handle, - &prefixes_meta_contents, ioptions, true /* decompress */, - dummy_comp_dict /*compression dict*/, cache_options); + &prefixes_meta_contents, ioptions, true /*decompress*/, + true /*maybe_compressed*/, dummy_comp_dict /*compression dict*/, + cache_options, memory_allocator); s = prefixes_meta_block_fetcher.ReadBlockContents(); if (!s.ok()) { // TODO: log error @@ -534,10 +565,12 @@ class HashIndexReader : public IndexReader { IndexBlockIter* iter = nullptr, bool total_order_seek = true, bool /*dont_care*/ = true) override { Statistics* kNullStats = nullptr; + // We don't return pinned datat from index blocks, so no need + // to set `block_contents_pinned`. return index_block_->NewIterator( icomparator_, icomparator_->user_comparator(), iter, kNullStats, total_order_seek, index_key_includes_seq_, index_value_is_full_, - prefix_index_.get()); + false /* block_contents_pinned */, prefix_index_.get()); } virtual size_t size() const override { return index_block_->size(); } @@ -572,8 +605,7 @@ class HashIndexReader : public IndexReader { assert(index_block_ != nullptr); } - ~HashIndexReader() { - } + ~HashIndexReader() {} std::unique_ptr index_block_; std::unique_ptr prefix_index_; @@ -737,9 +769,9 @@ Status BlockBasedTable::Open(const ImmutableCFOptions& ioptions, const EnvOptions& env_options, const BlockBasedTableOptions& table_options, const InternalKeyComparator& internal_comparator, - unique_ptr&& file, + std::unique_ptr&& file, uint64_t file_size, - unique_ptr* table_reader, + std::unique_ptr* table_reader, const SliceTransform* prefix_extractor, const bool prefetch_index_and_filter_in_cache, const bool skip_filters, const int level, @@ -807,7 +839,7 @@ Status BlockBasedTable::Open(const ImmutableCFOptions& ioptions, // raw pointer will be used to create HashIndexReader, whose reset may // access a dangling pointer. Rep* rep = new BlockBasedTable::Rep(ioptions, env_options, table_options, - internal_comparator, skip_filters, + internal_comparator, skip_filters, level, immortal_table); rep->file = std::move(file); rep->footer = footer; @@ -818,7 +850,7 @@ Status BlockBasedTable::Open(const ImmutableCFOptions& ioptions, rep->internal_prefix_transform.reset( new InternalKeySliceTransform(prefix_extractor)); SetupCacheKeyPrefix(rep, file_size); - unique_ptr new_table(new BlockBasedTable(rep)); + std::unique_ptr new_table(new BlockBasedTable(rep)); // page cache options rep->persistent_cache_options = @@ -878,7 +910,9 @@ Status BlockBasedTable::Open(const ImmutableCFOptions& ioptions, if (s.ok()) { s = ReadProperties(meta_iter->value(), rep->file.get(), prefetch_buffer.get(), rep->footer, rep->ioptions, - &table_properties, false /* compression_type_missing */); + &table_properties, + false /* compression_type_missing */, + nullptr /* memory_allocator */); } if (!s.ok()) { @@ -921,9 +955,10 @@ Status BlockBasedTable::Open(const ImmutableCFOptions& ioptions, ReadOptions read_options; read_options.verify_checksums = false; BlockFetcher compression_block_fetcher( - rep->file.get(), prefetch_buffer.get(), rep->footer, read_options, - compression_dict_handle, compression_dict_cont.get(), rep->ioptions, false /* decompress */, - Slice() /*compression dict*/, cache_options); + rep->file.get(), prefetch_buffer.get(), rep->footer, read_options, + compression_dict_handle, compression_dict_cont.get(), rep->ioptions, + false /* decompress */, false /*maybe_compressed*/, + Slice() /*compression dict*/, cache_options); s = compression_block_fetcher.ReadBlockContents(); if (!s.ok()) { @@ -964,20 +999,22 @@ Status BlockBasedTable::Open(const ImmutableCFOptions& ioptions, rep->ioptions.info_log, "Error when seeking to range delete tombstones block from file: %s", s.ToString().c_str()); - } else { - if (found_range_del_block && !rep->range_del_handle.IsNull()) { - ReadOptions read_options; - s = MaybeLoadDataBlockToCache( - prefetch_buffer.get(), rep, read_options, rep->range_del_handle, - Slice() /* compression_dict */, &rep->range_del_entry, - false /* is_index */, nullptr /* get_context */); - if (!s.ok()) { - ROCKS_LOG_WARN( - rep->ioptions.info_log, - "Encountered error while reading data from range del block %s", - s.ToString().c_str()); - } + } else if (found_range_del_block && !rep->range_del_handle.IsNull()) { + ReadOptions read_options; + s = MaybeReadBlockAndLoadToCache( + prefetch_buffer.get(), rep, read_options, rep->range_del_handle, + Slice() /* compression_dict */, &rep->range_del_entry, + false /* is_index */, nullptr /* get_context */); + if (!s.ok()) { + ROCKS_LOG_WARN( + rep->ioptions.info_log, + "Encountered error while reading data from range del block %s", + s.ToString().c_str()); } + auto iter = std::unique_ptr( + new_table->NewUnfragmentedRangeTombstoneIterator(read_options)); + rep->fragmented_range_dels = std::make_shared( + std::move(iter), internal_comparator); } bool need_upper_bound_check = @@ -1019,7 +1056,7 @@ Status BlockBasedTable::Open(const ImmutableCFOptions& ioptions, bool disable_prefix_seek = rep->index_type == BlockBasedTableOptions::kHashSearch && need_upper_bound_check; - unique_ptr> iter( + std::unique_ptr> iter( new_table->NewIndexIterator(ReadOptions(), disable_prefix_seek, nullptr, &index_entry)); s = iter->status(); @@ -1094,7 +1131,7 @@ Status BlockBasedTable::Open(const ImmutableCFOptions& ioptions, if (tail_prefetch_stats != nullptr) { assert(prefetch_buffer->min_offset_read() < file_size); tail_prefetch_stats->RecordEffectiveSize( - file_size - prefetch_buffer->min_offset_read()); + static_cast(file_size) - prefetch_buffer->min_offset_read()); } *table_reader = std::move(new_table); } @@ -1148,9 +1185,10 @@ Status BlockBasedTable::ReadMetaBlock(Rep* rep, Status s = ReadBlockFromFile( rep->file.get(), prefetch_buffer, rep->footer, ReadOptions(), rep->footer.metaindex_handle(), &meta, rep->ioptions, - true /* decompress */, Slice() /*compression dict*/, - rep->persistent_cache_options, kDisableGlobalSequenceNumber, - 0 /* read_amp_bytes_per_bit */); + true /* decompress */, true /*maybe_compressed*/, + Slice() /*compression dict*/, rep->persistent_cache_options, + kDisableGlobalSequenceNumber, 0 /* read_amp_bytes_per_bit */, + GetMemoryAllocator(rep->table_options)); if (!s.ok()) { ROCKS_LOG_ERROR(rep->ioptions.info_log, @@ -1169,15 +1207,14 @@ Status BlockBasedTable::ReadMetaBlock(Rep* rep, Status BlockBasedTable::GetDataBlockFromCache( const Slice& block_cache_key, const Slice& compressed_block_cache_key, - Cache* block_cache, Cache* block_cache_compressed, - const ImmutableCFOptions& ioptions, const ReadOptions& read_options, - BlockBasedTable::CachableEntry* block, uint32_t format_version, - const Slice& compression_dict, size_t read_amp_bytes_per_bit, bool is_index, - GetContext* get_context) { + Cache* block_cache, Cache* block_cache_compressed, Rep* rep, + const ReadOptions& read_options, + BlockBasedTable::CachableEntry* block, const Slice& compression_dict, + size_t read_amp_bytes_per_bit, bool is_index, GetContext* get_context) { Status s; - Block* compressed_block = nullptr; + BlockContents* compressed_block = nullptr; Cache::Handle* block_cache_compressed_handle = nullptr; - Statistics* statistics = ioptions.statistics; + Statistics* statistics = rep->ioptions.statistics; // Lookup uncompressed cache first if (block_cache != nullptr) { @@ -1220,32 +1257,34 @@ Status BlockBasedTable::GetDataBlockFromCache( // found compressed block RecordTick(statistics, BLOCK_CACHE_COMPRESSED_HIT); - compressed_block = reinterpret_cast( + compressed_block = reinterpret_cast( block_cache_compressed->Value(block_cache_compressed_handle)); - assert(compressed_block->compression_type() != kNoCompression); + CompressionType compression_type = compressed_block->get_compression_type(); + assert(compression_type != kNoCompression); // Retrieve the uncompressed contents into a new buffer BlockContents contents; - UncompressionContext uncompresssion_ctx(compressed_block->compression_type(), - compression_dict); - s = UncompressBlockContents(uncompresssion_ctx, compressed_block->data(), - compressed_block->size(), &contents, - format_version, ioptions); + UncompressionContext uncompresssion_ctx(compression_type, compression_dict); + s = UncompressBlockContents(uncompresssion_ctx, compressed_block->data.data(), + compressed_block->data.size(), &contents, + rep->table_options.format_version, rep->ioptions, + GetMemoryAllocator(rep->table_options)); // Insert uncompressed block into block cache if (s.ok()) { block->value = - new Block(std::move(contents), compressed_block->global_seqno(), + new Block(std::move(contents), rep->get_global_seqno(is_index), read_amp_bytes_per_bit, statistics); // uncompressed block - assert(block->value->compression_type() == kNoCompression); - if (block_cache != nullptr && block->value->cachable() && + if (block_cache != nullptr && block->value->own_bytes() && read_options.fill_cache) { size_t charge = block->value->ApproximateMemoryUsage(); s = block_cache->Insert(block_cache_key, block->value, charge, &DeleteCachedEntry, &(block->cache_handle)); +#ifndef NDEBUG block_cache->TEST_mark_as_data_block(block_cache_key, charge); +#endif // NDEBUG if (s.ok()) { if (get_context != nullptr) { get_context->get_context_stats_.num_cache_add++; @@ -1290,64 +1329,77 @@ Status BlockBasedTable::PutDataBlockToCache( const Slice& block_cache_key, const Slice& compressed_block_cache_key, Cache* block_cache, Cache* block_cache_compressed, const ReadOptions& /*read_options*/, const ImmutableCFOptions& ioptions, - CachableEntry* block, Block* raw_block, uint32_t format_version, - const Slice& compression_dict, size_t read_amp_bytes_per_bit, bool is_index, - Cache::Priority priority, GetContext* get_context) { - assert(raw_block->compression_type() == kNoCompression || + CachableEntry* cached_block, BlockContents* raw_block_contents, + CompressionType raw_block_comp_type, uint32_t format_version, + const Slice& compression_dict, SequenceNumber seq_no, + size_t read_amp_bytes_per_bit, MemoryAllocator* memory_allocator, + bool is_index, Cache::Priority priority, GetContext* get_context) { + assert(raw_block_comp_type == kNoCompression || block_cache_compressed != nullptr); Status s; // Retrieve the uncompressed contents into a new buffer - BlockContents contents; + BlockContents uncompressed_block_contents; Statistics* statistics = ioptions.statistics; - if (raw_block->compression_type() != kNoCompression) { - UncompressionContext uncompression_ctx(raw_block->compression_type(), + if (raw_block_comp_type != kNoCompression) { + UncompressionContext uncompression_ctx(raw_block_comp_type, compression_dict); - s = UncompressBlockContents(uncompression_ctx, raw_block->data(), - raw_block->size(), &contents, format_version, - ioptions); + s = UncompressBlockContents( + uncompression_ctx, raw_block_contents->data.data(), + raw_block_contents->data.size(), &uncompressed_block_contents, + format_version, ioptions, memory_allocator); } if (!s.ok()) { - delete raw_block; return s; } - if (raw_block->compression_type() != kNoCompression) { - block->value = new Block(std::move(contents), raw_block->global_seqno(), - read_amp_bytes_per_bit, - statistics); // uncompressed block + if (raw_block_comp_type != kNoCompression) { + cached_block->value = new Block(std::move(uncompressed_block_contents), + seq_no, read_amp_bytes_per_bit, + statistics); // uncompressed block } else { - block->value = raw_block; - raw_block = nullptr; + cached_block->value = + new Block(std::move(*raw_block_contents), seq_no, + read_amp_bytes_per_bit, ioptions.statistics); } // Insert compressed block into compressed block cache. // Release the hold on the compressed cache entry immediately. - if (block_cache_compressed != nullptr && raw_block != nullptr && - raw_block->cachable()) { - s = block_cache_compressed->Insert(compressed_block_cache_key, raw_block, - raw_block->ApproximateMemoryUsage(), - &DeleteCachedEntry); + if (block_cache_compressed != nullptr && + raw_block_comp_type != kNoCompression && raw_block_contents != nullptr && + raw_block_contents->own_bytes()) { +#ifndef NDEBUG + assert(raw_block_contents->is_raw_block); +#endif // NDEBUG + + // We cannot directly put raw_block_contents because this could point to + // an object in the stack. + BlockContents* block_cont_for_comp_cache = + new BlockContents(std::move(*raw_block_contents)); + s = block_cache_compressed->Insert( + compressed_block_cache_key, block_cont_for_comp_cache, + block_cont_for_comp_cache->ApproximateMemoryUsage(), + &DeleteCachedEntry); if (s.ok()) { // Avoid the following code to delete this cached block. - raw_block = nullptr; RecordTick(statistics, BLOCK_CACHE_COMPRESSED_ADD); } else { RecordTick(statistics, BLOCK_CACHE_COMPRESSED_ADD_FAILURES); + delete block_cont_for_comp_cache; } } - delete raw_block; // insert into uncompressed block cache - assert((block->value->compression_type() == kNoCompression)); - if (block_cache != nullptr && block->value->cachable()) { - size_t charge = block->value->ApproximateMemoryUsage(); - s = block_cache->Insert(block_cache_key, block->value, charge, - &DeleteCachedEntry, &(block->cache_handle), - priority); + if (block_cache != nullptr && cached_block->value->own_bytes()) { + size_t charge = cached_block->value->ApproximateMemoryUsage(); + s = block_cache->Insert(block_cache_key, cached_block->value, charge, + &DeleteCachedEntry, + &(cached_block->cache_handle), priority); +#ifndef NDEBUG block_cache->TEST_mark_as_data_block(block_cache_key, charge); +#endif // NDEBUG if (s.ok()) { - assert(block->cache_handle != nullptr); + assert(cached_block->cache_handle != nullptr); if (get_context != nullptr) { get_context->get_context_stats_.num_cache_add++; get_context->get_context_stats_.num_cache_bytes_write += charge; @@ -1373,12 +1425,12 @@ Status BlockBasedTable::PutDataBlockToCache( RecordTick(statistics, BLOCK_CACHE_DATA_BYTES_INSERT, charge); } } - assert(reinterpret_cast( - block_cache->Value(block->cache_handle)) == block->value); + assert(reinterpret_cast(block_cache->Value( + cached_block->cache_handle)) == cached_block->value); } else { RecordTick(statistics, BLOCK_CACHE_ADD_FAILURES); - delete block->value; - block->value = nullptr; + delete cached_block->value; + cached_block->value = nullptr; } } @@ -1399,10 +1451,11 @@ FilterBlockReader* BlockBasedTable::ReadFilter( Slice dummy_comp_dict; - BlockFetcher block_fetcher(rep->file.get(), prefetch_buffer, rep->footer, - ReadOptions(), filter_handle, &block, - rep->ioptions, false /* decompress */, - dummy_comp_dict, rep->persistent_cache_options); + BlockFetcher block_fetcher( + rep->file.get(), prefetch_buffer, rep->footer, ReadOptions(), + filter_handle, &block, rep->ioptions, false /* decompress */, + false /*maybe_compressed*/, dummy_comp_dict, + rep->persistent_cache_options, GetMemoryAllocator(rep->table_options)); Status s = block_fetcher.ReadBlockContents(); if (!s.ok()) { @@ -1551,12 +1604,16 @@ InternalIteratorBase* BlockBasedTable::NewIndexIterator( GetContext* get_context) { // index reader has already been pre-populated. if (rep_->index_reader) { + // We don't return pinned datat from index blocks, so no need + // to set `block_contents_pinned`. return rep_->index_reader->NewIterator( input_iter, read_options.total_order_seek || disable_prefix_seek, read_options.fill_cache); } // we have a pinned index block if (rep_->index_entry.IsSet()) { + // We don't return pinned datat from index blocks, so no need + // to set `block_contents_pinned`. return rep_->index_entry.value->NewIterator( input_iter, read_options.total_order_seek || disable_prefix_seek, read_options.fill_cache); @@ -1639,6 +1696,8 @@ InternalIteratorBase* BlockBasedTable::NewIndexIterator( } assert(cache_handle); + // We don't return pinned datat from index blocks, so no need + // to set `block_contents_pinned`. auto* iter = index_reader->NewIterator( input_iter, read_options.total_order_seek || disable_prefix_seek); @@ -1673,9 +1732,9 @@ TBlockIter* BlockBasedTable::NewDataBlockIterator( if (rep->compression_dict_block) { compression_dict = rep->compression_dict_block->data; } - s = MaybeLoadDataBlockToCache(prefetch_buffer, rep, ro, handle, - compression_dict, &block, is_index, - get_context); + s = MaybeReadBlockAndLoadToCache(prefetch_buffer, rep, ro, handle, + compression_dict, &block, is_index, + get_context); } TBlockIter* iter; @@ -1697,10 +1756,13 @@ TBlockIter* BlockBasedTable::NewDataBlockIterator( READ_BLOCK_GET_MICROS); s = ReadBlockFromFile( rep->file.get(), prefetch_buffer, rep->footer, ro, handle, - &block_value, rep->ioptions, rep->blocks_maybe_compressed, - compression_dict, rep->persistent_cache_options, + &block_value, rep->ioptions, + rep->blocks_maybe_compressed /*do_decompress*/, + rep->blocks_maybe_compressed, compression_dict, + rep->persistent_cache_options, is_index ? kDisableGlobalSequenceNumber : rep->global_seqno, - rep->table_options.read_amp_bytes_per_bit, rep->immortal_table); + rep->table_options.read_amp_bytes_per_bit, + GetMemoryAllocator(rep->table_options)); } if (s.ok()) { block.value = block_value.release(); @@ -1710,10 +1772,20 @@ TBlockIter* BlockBasedTable::NewDataBlockIterator( if (s.ok()) { assert(block.value != nullptr); const bool kTotalOrderSeek = true; + // Block contents are pinned and it is still pinned after the iterator + // is destoryed as long as cleanup functions are moved to another object, + // when: + // 1. block cache handle is set to be released in cleanup function, or + // 2. it's pointing to immortable source. If own_bytes is true then we are + // not reading data from the original source, weather immortal or not. + // Otherwise, the block is pinned iff the source is immortal. + bool block_contents_pinned = + (block.cache_handle != nullptr || + (!block.value->own_bytes() && rep->immortal_table)); iter = block.value->NewIterator( &rep->internal_comparator, rep->internal_comparator.user_comparator(), iter, rep->ioptions.statistics, kTotalOrderSeek, key_includes_seq, - index_key_is_full); + index_key_is_full, block_contents_pinned); if (block.cache_handle != nullptr) { iter->RegisterCleanup(&ReleaseCachedEntry, block_cache, block.cache_handle); @@ -1722,7 +1794,7 @@ TBlockIter* BlockBasedTable::NewDataBlockIterator( // insert a dummy record to block cache to track the memory usage Cache::Handle* cache_handle; // There are two other types of cache keys: 1) SST cache key added in - // `MaybeLoadDataBlockToCache` 2) dummy cache key added in + // `MaybeReadBlockAndLoadToCache` 2) dummy cache key added in // `write_buffer_manager`. Use longer prefix (41 bytes) to differentiate // from SST cache key(31 bytes), and use non-zero prefix to // differentiate from `write_buffer_manager` @@ -1758,25 +1830,28 @@ TBlockIter* BlockBasedTable::NewDataBlockIterator( return iter; } -Status BlockBasedTable::MaybeLoadDataBlockToCache( +Status BlockBasedTable::MaybeReadBlockAndLoadToCache( FilePrefetchBuffer* prefetch_buffer, Rep* rep, const ReadOptions& ro, const BlockHandle& handle, Slice compression_dict, CachableEntry* block_entry, bool is_index, GetContext* get_context) { assert(block_entry != nullptr); const bool no_io = (ro.read_tier == kBlockCacheTier); Cache* block_cache = rep->table_options.block_cache.get(); - Cache* block_cache_compressed = - rep->table_options.block_cache_compressed.get(); + // No point to cache compressed blocks if it never goes away + Cache* block_cache_compressed = + rep->immortal_table ? nullptr + : rep->table_options.block_cache_compressed.get(); + + // First, try to get the block from the cache + // // If either block cache is enabled, we'll try to read from it. Status s; + char cache_key[kMaxCacheKeyPrefixSize + kMaxVarint64Length]; + char compressed_cache_key[kMaxCacheKeyPrefixSize + kMaxVarint64Length]; + Slice key /* key to the block cache */; + Slice ckey /* key to the compressed block cache */; if (block_cache != nullptr || block_cache_compressed != nullptr) { - Statistics* statistics = rep->ioptions.statistics; - char cache_key[kMaxCacheKeyPrefixSize + kMaxVarint64Length]; - char compressed_cache_key[kMaxCacheKeyPrefixSize + kMaxVarint64Length]; - Slice key, /* key to the block cache */ - ckey /* key to the compressed block cache */; - // create key for block cache if (block_cache != nullptr) { key = GetCacheKey(rep->cache_key_prefix, rep->cache_key_prefix_size, @@ -1789,30 +1864,42 @@ Status BlockBasedTable::MaybeLoadDataBlockToCache( compressed_cache_key); } - s = GetDataBlockFromCache( - key, ckey, block_cache, block_cache_compressed, rep->ioptions, ro, - block_entry, rep->table_options.format_version, compression_dict, - rep->table_options.read_amp_bytes_per_bit, is_index, get_context); + s = GetDataBlockFromCache(key, ckey, block_cache, block_cache_compressed, + rep, ro, block_entry, compression_dict, + rep->table_options.read_amp_bytes_per_bit, + is_index, get_context); + // Can't find the block from the cache. If I/O is allowed, read from the + // file. if (block_entry->value == nullptr && !no_io && ro.fill_cache) { - std::unique_ptr raw_block; + Statistics* statistics = rep->ioptions.statistics; + bool do_decompress = + block_cache_compressed == nullptr && rep->blocks_maybe_compressed; + CompressionType raw_block_comp_type; + BlockContents raw_block_contents; { StopWatch sw(rep->ioptions.env, statistics, READ_BLOCK_GET_MICROS); - s = ReadBlockFromFile( + BlockFetcher block_fetcher( rep->file.get(), prefetch_buffer, rep->footer, ro, handle, - &raw_block, rep->ioptions, - block_cache_compressed == nullptr && rep->blocks_maybe_compressed, + &raw_block_contents, rep->ioptions, + do_decompress /* do uncompress */, rep->blocks_maybe_compressed, compression_dict, rep->persistent_cache_options, - is_index ? kDisableGlobalSequenceNumber : rep->global_seqno, - rep->table_options.read_amp_bytes_per_bit, rep->immortal_table); + GetMemoryAllocator(rep->table_options), + GetMemoryAllocatorForCompressedBlock(rep->table_options)); + s = block_fetcher.ReadBlockContents(); + raw_block_comp_type = block_fetcher.get_compression_type(); } if (s.ok()) { + SequenceNumber seq_no = rep->get_global_seqno(is_index); + // If filling cache is allowed and a cache is configured, try to put the + // block to the cache. s = PutDataBlockToCache( key, ckey, block_cache, block_cache_compressed, ro, rep->ioptions, - block_entry, raw_block.release(), rep->table_options.format_version, - compression_dict, rep->table_options.read_amp_bytes_per_bit, - is_index, + block_entry, &raw_block_contents, raw_block_comp_type, + rep->table_options.format_version, compression_dict, seq_no, + rep->table_options.read_amp_bytes_per_bit, + GetMemoryAllocator(rep->table_options), is_index, is_index && rep->table_options .cache_index_and_filter_blocks_with_high_priority ? Cache::Priority::HIGH @@ -1855,6 +1942,8 @@ BlockBasedTable::PartitionedIndexIteratorState::NewSecondaryIterator( RecordTick(rep->ioptions.statistics, BLOCK_CACHE_BYTES_READ, block_cache->GetUsage(block->second.cache_handle)); Statistics* kNullStats = nullptr; + // We don't return pinned datat from index blocks, so no need + // to set `block_contents_pinned`. return block->second.value->NewIterator( &rep->internal_comparator, rep->internal_comparator.user_comparator(), nullptr, kNullStats, true, index_key_includes_seq_, index_key_is_full_); @@ -1933,7 +2022,7 @@ bool BlockBasedTable::PrefixMayMatch( // Then, try find it within each block // we already know prefix_extractor and prefix_extractor_name must match // because `CheckPrefixMayMatch` first checks `check_filter_ == true` - unique_ptr> iiter( + std::unique_ptr> iiter( NewIndexIterator(no_io_read_options, /* need_upper_bound_check */ false)); iiter->Seek(internal_prefix); @@ -2249,7 +2338,20 @@ InternalIterator* BlockBasedTable::NewIterator( } } -InternalIterator* BlockBasedTable::NewRangeTombstoneIterator( +FragmentedRangeTombstoneIterator* BlockBasedTable::NewRangeTombstoneIterator( + const ReadOptions& read_options) { + if (rep_->fragmented_range_dels == nullptr) { + return nullptr; + } + SequenceNumber snapshot = kMaxSequenceNumber; + if (read_options.snapshot != nullptr) { + snapshot = read_options.snapshot->GetSequenceNumber(); + } + return new FragmentedRangeTombstoneIterator( + rep_->fragmented_range_dels, rep_->internal_comparator, snapshot); +} + +InternalIterator* BlockBasedTable::NewUnfragmentedRangeTombstoneIterator( const ReadOptions& read_options) { if (rep_->range_del_handle.IsNull()) { // The block didn't exist, nullptr indicates no range tombstones. @@ -2302,6 +2404,7 @@ bool BlockBasedTable::FullFilterKeyMayMatch( } if (may_match) { RecordTick(rep_->ioptions.statistics, BLOOM_FILTER_FULL_POSITIVE); + PERF_COUNTER_BY_LEVEL_ADD(bloom_filter_full_positive, 1, rep_->level); } return may_match; } @@ -2326,6 +2429,7 @@ Status BlockBasedTable::Get(const ReadOptions& read_options, const Slice& key, if (!FullFilterKeyMayMatch(read_options, filter, key, no_io, prefix_extractor)) { RecordTick(rep_->ioptions.statistics, BLOOM_FILTER_USEFUL); + PERF_COUNTER_BY_LEVEL_ADD(bloom_filter_useful, 1, rep_->level); } else { IndexBlockIter iiter_on_stack; // if prefix_extractor found in block differs from options, disable @@ -2358,6 +2462,7 @@ Status BlockBasedTable::Get(const ReadOptions& read_options, const Slice& key, // TODO: think about interaction with Merge. If a user key cannot // cross one data block, we should be fine. RecordTick(rep_->ioptions.statistics, BLOOM_FILTER_USEFUL); + PERF_COUNTER_BY_LEVEL_ADD(bloom_filter_useful, 1, rep_->level); break; } else { DataBlockIter biter; @@ -2410,6 +2515,8 @@ Status BlockBasedTable::Get(const ReadOptions& read_options, const Slice& key, } if (matched && filter != nullptr && !filter->IsBlockBased()) { RecordTick(rep_->ioptions.statistics, BLOOM_FILTER_FULL_TRUE_POSITIVE); + PERF_COUNTER_BY_LEVEL_ADD(bloom_filter_full_true_positive, 1, + rep_->level); } if (s.ok()) { s = iiter->status(); @@ -2524,11 +2631,11 @@ Status BlockBasedTable::VerifyChecksumInBlocks( BlockHandle handle = index_iter->value(); BlockContents contents; Slice dummy_comp_dict; - BlockFetcher block_fetcher(rep_->file.get(), nullptr /* prefetch buffer */, - rep_->footer, ReadOptions(), handle, &contents, - rep_->ioptions, false /* decompress */, - dummy_comp_dict /*compression dict*/, - rep_->persistent_cache_options); + BlockFetcher block_fetcher( + rep_->file.get(), nullptr /* prefetch buffer */, rep_->footer, + ReadOptions(), handle, &contents, rep_->ioptions, + false /* decompress */, false /*maybe_compressed*/, + dummy_comp_dict /*compression dict*/, rep_->persistent_cache_options); s = block_fetcher.ReadBlockContents(); if (!s.ok()) { break; @@ -2550,11 +2657,11 @@ Status BlockBasedTable::VerifyChecksumInBlocks( s = handle.DecodeFrom(&input); BlockContents contents; Slice dummy_comp_dict; - BlockFetcher block_fetcher(rep_->file.get(), nullptr /* prefetch buffer */, - rep_->footer, ReadOptions(), handle, &contents, - rep_->ioptions, false /* decompress */, - dummy_comp_dict /*compression dict*/, - rep_->persistent_cache_options); + BlockFetcher block_fetcher( + rep_->file.get(), nullptr /* prefetch buffer */, rep_->footer, + ReadOptions(), handle, &contents, rep_->ioptions, + false /* decompress */, false /*maybe_compressed*/, + dummy_comp_dict /*compression dict*/, rep_->persistent_cache_options); s = block_fetcher.ReadBlockContents(); if (!s.ok()) { break; @@ -2583,8 +2690,7 @@ bool BlockBasedTable::TEST_KeyInCache(const ReadOptions& options, Status s; s = GetDataBlockFromCache( - cache_key, ckey, block_cache, nullptr, rep_->ioptions, options, &block, - rep_->table_options.format_version, + cache_key, ckey, block_cache, nullptr, rep_, options, &block, rep_->compression_dict_block ? rep_->compression_dict_block->data : Slice(), 0 /* read_amp_bytes_per_bit */); @@ -2644,7 +2750,8 @@ Status BlockBasedTable::CreateIndexReader( rep_->table_properties == nullptr || rep_->table_properties->index_key_is_user_key == 0, rep_->table_properties == nullptr || - rep_->table_properties->index_value_is_delta_encoded == 0); + rep_->table_properties->index_value_is_delta_encoded == 0, + GetMemoryAllocator(rep_->table_options)); } case BlockBasedTableOptions::kBinarySearch: { return BinarySearchIndexReader::Create( @@ -2653,7 +2760,8 @@ Status BlockBasedTable::CreateIndexReader( rep_->table_properties == nullptr || rep_->table_properties->index_key_is_user_key == 0, rep_->table_properties == nullptr || - rep_->table_properties->index_value_is_delta_encoded == 0); + rep_->table_properties->index_value_is_delta_encoded == 0, + GetMemoryAllocator(rep_->table_options)); } case BlockBasedTableOptions::kHashSearch: { std::unique_ptr meta_guard; @@ -2675,7 +2783,8 @@ Status BlockBasedTable::CreateIndexReader( rep_->table_properties == nullptr || rep_->table_properties->index_key_is_user_key == 0, rep_->table_properties == nullptr || - rep_->table_properties->index_value_is_delta_encoded == 0); + rep_->table_properties->index_value_is_delta_encoded == 0, + GetMemoryAllocator(rep_->table_options)); } meta_index_iter = meta_iter_guard.get(); } @@ -2688,7 +2797,8 @@ Status BlockBasedTable::CreateIndexReader( rep_->table_properties == nullptr || rep_->table_properties->index_key_is_user_key == 0, rep_->table_properties == nullptr || - rep_->table_properties->index_value_is_delta_encoded == 0); + rep_->table_properties->index_value_is_delta_encoded == 0, + GetMemoryAllocator(rep_->table_options)); } default: { std::string error_message = @@ -2699,7 +2809,7 @@ Status BlockBasedTable::CreateIndexReader( } uint64_t BlockBasedTable::ApproximateOffsetOf(const Slice& key) { - unique_ptr> index_iter( + std::unique_ptr> index_iter( NewIndexIterator(ReadOptions())); index_iter->Seek(key); @@ -2857,7 +2967,8 @@ Status BlockBasedTable::DumpTable(WritableFile* out_file, BlockFetcher block_fetcher( rep_->file.get(), nullptr /* prefetch_buffer */, rep_->footer, ReadOptions(), handle, &block, rep_->ioptions, - false /*decompress*/, dummy_comp_dict /*compression dict*/, + false /*decompress*/, false /*maybe_compressed*/, + dummy_comp_dict /*compression dict*/, rep_->persistent_cache_options); s = block_fetcher.ReadBlockContents(); if (!s.ok()) { diff --git a/3rdParty/rocksdb/v5.16.X/table/block_based_table_reader.h b/3rdParty/rocksdb/v5.18.X/table/block_based_table_reader.h similarity index 93% rename from 3rdParty/rocksdb/v5.16.X/table/block_based_table_reader.h rename to 3rdParty/rocksdb/v5.18.X/table/block_based_table_reader.h index 3cada0c2c2..cb6a865660 100644 --- a/3rdParty/rocksdb/v5.16.X/table/block_based_table_reader.h +++ b/3rdParty/rocksdb/v5.18.X/table/block_based_table_reader.h @@ -16,6 +16,7 @@ #include #include +#include "db/range_tombstone_fragmenter.h" #include "options/cf_options.h" #include "rocksdb/options.h" #include "rocksdb/persistent_cache.h" @@ -88,8 +89,9 @@ class BlockBasedTable : public TableReader { const EnvOptions& env_options, const BlockBasedTableOptions& table_options, const InternalKeyComparator& internal_key_comparator, - unique_ptr&& file, - uint64_t file_size, unique_ptr* table_reader, + std::unique_ptr&& file, + uint64_t file_size, + std::unique_ptr* table_reader, const SliceTransform* prefix_extractor = nullptr, bool prefetch_index_and_filter_in_cache = true, bool skip_filters = false, int level = -1, @@ -112,7 +114,7 @@ class BlockBasedTable : public TableReader { bool skip_filters = false, bool for_compaction = false) override; - InternalIterator* NewRangeTombstoneIterator( + FragmentedRangeTombstoneIterator* NewRangeTombstoneIterator( const ReadOptions& read_options) override; // @param skip_filters Disables loading/accessing the filter block @@ -255,13 +257,11 @@ class BlockBasedTable : public TableReader { // @param block_entry value is set to the uncompressed block if found. If // in uncompressed block cache, also sets cache_handle to reference that // block. - static Status MaybeLoadDataBlockToCache(FilePrefetchBuffer* prefetch_buffer, - Rep* rep, const ReadOptions& ro, - const BlockHandle& handle, - Slice compression_dict, - CachableEntry* block_entry, - bool is_index = false, - GetContext* get_context = nullptr); + static Status MaybeReadBlockAndLoadToCache( + FilePrefetchBuffer* prefetch_buffer, Rep* rep, const ReadOptions& ro, + const BlockHandle& handle, Slice compression_dict, + CachableEntry* block_entry, bool is_index = false, + GetContext* get_context = nullptr); // For the following two functions: // if `no_io == true`, we will not try to read filter/index from sst file @@ -299,9 +299,9 @@ class BlockBasedTable : public TableReader { // dictionary. static Status GetDataBlockFromCache( const Slice& block_cache_key, const Slice& compressed_block_cache_key, - Cache* block_cache, Cache* block_cache_compressed, - const ImmutableCFOptions& ioptions, const ReadOptions& read_options, - BlockBasedTable::CachableEntry* block, uint32_t format_version, + Cache* block_cache, Cache* block_cache_compressed, Rep* rep, + const ReadOptions& read_options, + BlockBasedTable::CachableEntry* block, const Slice& compression_dict, size_t read_amp_bytes_per_bit, bool is_index = false, GetContext* get_context = nullptr); @@ -311,16 +311,18 @@ class BlockBasedTable : public TableReader { // On success, Status::OK will be returned; also @block will be populated with // uncompressed block and its cache handle. // - // REQUIRES: raw_block is heap-allocated. PutDataBlockToCache() will be - // responsible for releasing its memory if error occurs. + // Allocated memory managed by raw_block_contents will be transferred to + // PutDataBlockToCache(). After the call, the object will be invalid. // @param compression_dict Data for presetting the compression library's // dictionary. static Status PutDataBlockToCache( const Slice& block_cache_key, const Slice& compressed_block_cache_key, Cache* block_cache, Cache* block_cache_compressed, const ReadOptions& read_options, const ImmutableCFOptions& ioptions, - CachableEntry* block, Block* raw_block, uint32_t format_version, - const Slice& compression_dict, size_t read_amp_bytes_per_bit, + CachableEntry* block, BlockContents* raw_block_contents, + CompressionType raw_block_comp_type, uint32_t format_version, + const Slice& compression_dict, SequenceNumber seq_no, + size_t read_amp_bytes_per_bit, MemoryAllocator* memory_allocator, bool is_index = false, Cache::Priority pri = Cache::Priority::LOW, GetContext* get_context = nullptr); @@ -383,6 +385,9 @@ class BlockBasedTable : public TableReader { friend class PartitionedFilterBlockReader; friend class PartitionedFilterBlockTest; + + InternalIterator* NewUnfragmentedRangeTombstoneIterator( + const ReadOptions& read_options); }; // Maitaning state of a two-level iteration on a partitioned index structure @@ -431,7 +436,7 @@ struct BlockBasedTable::Rep { Rep(const ImmutableCFOptions& _ioptions, const EnvOptions& _env_options, const BlockBasedTableOptions& _table_opt, const InternalKeyComparator& _internal_comparator, bool skip_filters, - const bool _immortal_table) + int _level, const bool _immortal_table) : ioptions(_ioptions), env_options(_env_options), table_options(_table_opt), @@ -444,6 +449,7 @@ struct BlockBasedTable::Rep { prefix_filtering(true), range_del_handle(BlockHandle::NullBlockHandle()), global_seqno(kDisableGlobalSequenceNumber), + level(_level), immortal_table(_immortal_table) {} const ImmutableCFOptions& ioptions; @@ -452,7 +458,7 @@ struct BlockBasedTable::Rep { const FilterPolicy* const filter_policy; const InternalKeyComparator& internal_comparator; Status status; - unique_ptr file; + std::unique_ptr file; char cache_key_prefix[kMaxCacheKeyPrefixSize]; size_t cache_key_prefix_size = 0; char persistent_cache_key_prefix[kMaxCacheKeyPrefixSize]; @@ -468,8 +474,8 @@ struct BlockBasedTable::Rep { // index_reader and filter will be populated and used only when // options.block_cache is nullptr; otherwise we will get the index block via // the block cache. - unique_ptr index_reader; - unique_ptr filter; + std::unique_ptr index_reader; + std::unique_ptr filter; enum class FilterType { kNoFilter, @@ -494,7 +500,7 @@ struct BlockBasedTable::Rep { // module should not be relying on db module. However to make things easier // and compatible with existing code, we introduce a wrapper that allows // block to extract prefix without knowing if a key is internal or not. - unique_ptr internal_prefix_transform; + std::unique_ptr internal_prefix_transform; std::shared_ptr table_prefix_extractor; // only used in level 0 files when pin_l0_filter_and_index_blocks_in_cache is @@ -509,6 +515,7 @@ struct BlockBasedTable::Rep { // cache is enabled. CachableEntry range_del_entry; BlockHandle range_del_handle; + std::shared_ptr fragmented_range_dels; // If global_seqno is used, all Keys in this file will have the same // seqno with value `global_seqno`. @@ -517,12 +524,20 @@ struct BlockBasedTable::Rep { // and every key have it's own seqno. SequenceNumber global_seqno; + // the level when the table is opened, could potentially change when trivial + // move is involved + int level; + // If false, blocks in this file are definitely all uncompressed. Knowing this // before reading individual blocks enables certain optimizations. bool blocks_maybe_compressed = true; bool closed = false; const bool immortal_table; + + SequenceNumber get_global_seqno(bool is_index) const { + return is_index ? kDisableGlobalSequenceNumber : global_seqno; + } }; template diff --git a/3rdParty/rocksdb/v5.16.X/table/block_builder.cc b/3rdParty/rocksdb/v5.18.X/table/block_builder.cc similarity index 100% rename from 3rdParty/rocksdb/v5.16.X/table/block_builder.cc rename to 3rdParty/rocksdb/v5.18.X/table/block_builder.cc diff --git a/3rdParty/rocksdb/v5.16.X/table/block_builder.h b/3rdParty/rocksdb/v5.18.X/table/block_builder.h similarity index 97% rename from 3rdParty/rocksdb/v5.16.X/table/block_builder.h rename to 3rdParty/rocksdb/v5.18.X/table/block_builder.h index 52b9571162..3b7fc1768b 100644 --- a/3rdParty/rocksdb/v5.16.X/table/block_builder.h +++ b/3rdParty/rocksdb/v5.18.X/table/block_builder.h @@ -60,7 +60,7 @@ class BlockBuilder { private: const int block_restart_interval_; - //TODO(myabandeh): put it into a separate IndexBlockBuilder + // TODO(myabandeh): put it into a separate IndexBlockBuilder const bool use_delta_encoding_; // Refer to BlockIter::DecodeCurrentValue for format of delta encoded values const bool use_value_delta_encoding_; diff --git a/3rdParty/rocksdb/v5.16.X/table/block_fetcher.cc b/3rdParty/rocksdb/v5.18.X/table/block_fetcher.cc similarity index 79% rename from 3rdParty/rocksdb/v5.16.X/table/block_fetcher.cc rename to 3rdParty/rocksdb/v5.18.X/table/block_fetcher.cc index ea97066ec4..9ad254a59f 100644 --- a/3rdParty/rocksdb/v5.16.X/table/block_fetcher.cc +++ b/3rdParty/rocksdb/v5.18.X/table/block_fetcher.cc @@ -17,13 +17,14 @@ #include "rocksdb/env.h" #include "table/block.h" #include "table/block_based_table_reader.h" -#include "table/persistent_cache_helper.h" #include "table/format.h" +#include "table/persistent_cache_helper.h" #include "util/coding.h" #include "util/compression.h" #include "util/crc32c.h" #include "util/file_reader_writer.h" #include "util/logging.h" +#include "util/memory_allocator.h" #include "util/stop_watch.h" #include "util/string_util.h" #include "util/xxhash.h" @@ -48,6 +49,12 @@ void BlockFetcher::CheckBlockChecksum() { case kxxHash: actual = XXH32(data, static_cast(block_size_) + 1, 0); break; + case kxxHash64: + actual =static_cast ( + XXH64(data, static_cast(block_size_) + 1, 0) & + uint64_t{0xffffffff} + ); + break; default: status_ = Status::Corruption( "unknown checksum type " + ToString(footer_.checksum()) + " in " + @@ -107,9 +114,11 @@ bool BlockFetcher::TryGetCompressedBlockFromPersistentCache() { if (cache_options_.persistent_cache && cache_options_.persistent_cache->IsCompressed()) { // lookup uncompressed cache mode p-cache + std::unique_ptr raw_data; status_ = PersistentCacheHelper::LookupRawPage( - cache_options_, handle_, &heap_buf_, block_size_ + kBlockTrailerSize); + cache_options_, handle_, &raw_data, block_size_ + kBlockTrailerSize); if (status_.ok()) { + heap_buf_ = CacheAllocationPtr(raw_data.release()); used_buf_ = heap_buf_.get(); slice_ = Slice(heap_buf_.get(), block_size_); return true; @@ -131,8 +140,13 @@ void BlockFetcher::PrepareBufferForBlockFromFile() { // If we've got a small enough hunk of data, read it in to the // trivially allocated stack buffer instead of needing a full malloc() used_buf_ = &stack_buf_[0]; + } else if (maybe_compressed_ && !do_uncompress_) { + compressed_buf_ = AllocateBlock(block_size_ + kBlockTrailerSize, + memory_allocator_compressed_); + used_buf_ = compressed_buf_.get(); } else { - heap_buf_.reset(new char[block_size_ + kBlockTrailerSize]); + heap_buf_ = + AllocateBlock(block_size_ + kBlockTrailerSize, memory_allocator_); used_buf_ = heap_buf_.get(); } } @@ -159,29 +173,45 @@ void BlockFetcher::InsertUncompressedBlockToPersistentCacheIfNeeded() { } } +inline void BlockFetcher::CopyBufferToHeap() { + assert(used_buf_ != heap_buf_.get()); + heap_buf_ = AllocateBlock(block_size_ + kBlockTrailerSize, memory_allocator_); + memcpy(heap_buf_.get(), used_buf_, block_size_ + kBlockTrailerSize); +} + inline void BlockFetcher::GetBlockContents() { if (slice_.data() != used_buf_) { // the slice content is not the buffer provided - *contents_ = BlockContents(Slice(slice_.data(), block_size_), - immortal_source_, compression_type); + *contents_ = BlockContents(Slice(slice_.data(), block_size_)); } else { // page can be either uncompressed or compressed, the buffer either stack // or heap provided. Refer to https://github.com/facebook/rocksdb/pull/4096 if (got_from_prefetch_buffer_ || used_buf_ == &stack_buf_[0]) { - assert(used_buf_ != heap_buf_.get()); - heap_buf_.reset(new char[block_size_ + kBlockTrailerSize]); - memcpy(heap_buf_.get(), used_buf_, block_size_ + kBlockTrailerSize); + CopyBufferToHeap(); + } else if (used_buf_ == compressed_buf_.get()) { + if (compression_type_ == kNoCompression && + memory_allocator_ != memory_allocator_compressed_) { + CopyBufferToHeap(); + } else { + heap_buf_ = std::move(compressed_buf_); + } } - *contents_ = BlockContents(std::move(heap_buf_), block_size_, true, - compression_type); + *contents_ = BlockContents(std::move(heap_buf_), block_size_); } +#ifndef NDEBUG + contents_->is_raw_block = true; +#endif } Status BlockFetcher::ReadBlockContents() { block_size_ = static_cast(handle_.size()); if (TryGetUncompressBlockFromPersistentCache()) { + compression_type_ = kNoCompression; +#ifndef NDEBUG + contents_->is_raw_block = true; +#endif // NDEBUG return Status::OK(); } if (TryGetFromPrefetchBuffer()) { @@ -222,15 +252,16 @@ Status BlockFetcher::ReadBlockContents() { PERF_TIMER_GUARD(block_decompress_time); - compression_type = - static_cast(slice_.data()[block_size_]); + compression_type_ = get_block_compression_type(slice_.data(), block_size_); - if (do_uncompress_ && compression_type != kNoCompression) { + if (do_uncompress_ && compression_type_ != kNoCompression) { // compressed page, uncompress, update cache - UncompressionContext uncompression_ctx(compression_type, compression_dict_); - status_ = - UncompressBlockContents(uncompression_ctx, slice_.data(), block_size_, - contents_, footer_.version(), ioptions_); + UncompressionContext uncompression_ctx(compression_type_, + compression_dict_); + status_ = UncompressBlockContents(uncompression_ctx, slice_.data(), + block_size_, contents_, footer_.version(), + ioptions_, memory_allocator_); + compression_type_ = kNoCompression; } else { GetBlockContents(); } diff --git a/3rdParty/rocksdb/v5.16.X/table/block_fetcher.h b/3rdParty/rocksdb/v5.18.X/table/block_fetcher.h similarity index 75% rename from 3rdParty/rocksdb/v5.16.X/table/block_fetcher.h rename to 3rdParty/rocksdb/v5.18.X/table/block_fetcher.h index 9e0d2448dd..aed73a3925 100644 --- a/3rdParty/rocksdb/v5.16.X/table/block_fetcher.h +++ b/3rdParty/rocksdb/v5.18.X/table/block_fetcher.h @@ -10,6 +10,7 @@ #pragma once #include "table/block.h" #include "table/format.h" +#include "util/memory_allocator.h" namespace rocksdb { class BlockFetcher { @@ -24,9 +25,11 @@ class BlockFetcher { FilePrefetchBuffer* prefetch_buffer, const Footer& footer, const ReadOptions& read_options, const BlockHandle& handle, BlockContents* contents, const ImmutableCFOptions& ioptions, - bool do_uncompress, const Slice& compression_dict, + bool do_uncompress, bool maybe_compressed, + const Slice& compression_dict, const PersistentCacheOptions& cache_options, - const bool immortal_source = false) + MemoryAllocator* memory_allocator = nullptr, + MemoryAllocator* memory_allocator_compressed = nullptr) : file_(file), prefetch_buffer_(prefetch_buffer), footer_(footer), @@ -35,10 +38,13 @@ class BlockFetcher { contents_(contents), ioptions_(ioptions), do_uncompress_(do_uncompress), - immortal_source_(immortal_source), + maybe_compressed_(maybe_compressed), compression_dict_(compression_dict), - cache_options_(cache_options) {} + cache_options_(cache_options), + memory_allocator_(memory_allocator), + memory_allocator_compressed_(memory_allocator_compressed) {} Status ReadBlockContents(); + CompressionType get_compression_type() const { return compression_type_; } private: static const uint32_t kDefaultStackBufferSize = 5000; @@ -51,17 +57,20 @@ class BlockFetcher { BlockContents* contents_; const ImmutableCFOptions& ioptions_; bool do_uncompress_; - const bool immortal_source_; + bool maybe_compressed_; const Slice& compression_dict_; const PersistentCacheOptions& cache_options_; + MemoryAllocator* memory_allocator_; + MemoryAllocator* memory_allocator_compressed_; Status status_; Slice slice_; char* used_buf_ = nullptr; size_t block_size_; - std::unique_ptr heap_buf_; + CacheAllocationPtr heap_buf_; + CacheAllocationPtr compressed_buf_; char stack_buf_[kDefaultStackBufferSize]; bool got_from_prefetch_buffer_ = false; - rocksdb::CompressionType compression_type; + rocksdb::CompressionType compression_type_; // return true if found bool TryGetUncompressBlockFromPersistentCache(); @@ -69,6 +78,8 @@ class BlockFetcher { bool TryGetFromPrefetchBuffer(); bool TryGetCompressedBlockFromPersistentCache(); void PrepareBufferForBlockFromFile(); + // Copy content from used_buf_ to new heap buffer. + void CopyBufferToHeap(); void GetBlockContents(); void InsertCompressedBlockToPersistentCacheIfNeeded(); void InsertUncompressedBlockToPersistentCacheIfNeeded(); diff --git a/3rdParty/rocksdb/v5.16.X/table/block_prefix_index.cc b/3rdParty/rocksdb/v5.18.X/table/block_prefix_index.cc similarity index 100% rename from 3rdParty/rocksdb/v5.16.X/table/block_prefix_index.cc rename to 3rdParty/rocksdb/v5.18.X/table/block_prefix_index.cc diff --git a/3rdParty/rocksdb/v5.16.X/table/block_prefix_index.h b/3rdParty/rocksdb/v5.18.X/table/block_prefix_index.h similarity index 100% rename from 3rdParty/rocksdb/v5.16.X/table/block_prefix_index.h rename to 3rdParty/rocksdb/v5.18.X/table/block_prefix_index.h diff --git a/3rdParty/rocksdb/v5.16.X/table/block_test.cc b/3rdParty/rocksdb/v5.18.X/table/block_test.cc similarity index 97% rename from 3rdParty/rocksdb/v5.16.X/table/block_test.cc rename to 3rdParty/rocksdb/v5.18.X/table/block_test.cc index 009740a283..5ac9ffb214 100644 --- a/3rdParty/rocksdb/v5.16.X/table/block_test.cc +++ b/3rdParty/rocksdb/v5.18.X/table/block_test.cc @@ -70,10 +70,10 @@ void GenerateRandomKVs(std::vector *keys, // Same as GenerateRandomKVs but the values are BlockHandle void GenerateRandomKBHs(std::vector *keys, - std::vector *values, const int from, - const int len, const int step = 1, - const int padding_size = 0, - const int keys_share_prefix = 1) { + std::vector *values, const int from, + const int len, const int step = 1, + const int padding_size = 0, + const int keys_share_prefix = 1) { Random rnd(302); uint64_t offset = 0; @@ -117,7 +117,6 @@ TEST_F(BlockTest, SimpleTest) { // create block reader BlockContents contents; contents.data = rawblock; - contents.cachable = false; Block reader(std::move(contents), kDisableGlobalSequenceNumber); // read contents of block sequentially @@ -188,7 +187,6 @@ TEST_F(BlockTest, ValueDeltaEncodingTest) { // create block reader BlockContents contents; contents.data = rawblock; - contents.cachable = false; Block reader(std::move(contents), kDisableGlobalSequenceNumber); const bool kTotalOrderSeek = true; @@ -247,7 +245,6 @@ BlockContents GetBlockContents(std::unique_ptr *builder, BlockContents contents; contents.data = rawblock; - contents.cachable = false; return contents; } @@ -257,8 +254,7 @@ void CheckBlockContents(BlockContents contents, const int max_key, const std::vector &values) { const size_t prefix_size = 6; // create block reader - BlockContents contents_ref(contents.data, contents.cachable, - contents.compression_type); + BlockContents contents_ref(contents.data); Block reader1(std::move(contents), kDisableGlobalSequenceNumber); Block reader2(std::move(contents_ref), kDisableGlobalSequenceNumber); @@ -486,7 +482,6 @@ TEST_F(BlockTest, BlockWithReadAmpBitmap) { // create block reader BlockContents contents; contents.data = rawblock; - contents.cachable = true; Block reader(std::move(contents), kDisableGlobalSequenceNumber, kBytesPerBit, stats.get()); @@ -521,7 +516,6 @@ TEST_F(BlockTest, BlockWithReadAmpBitmap) { // create block reader BlockContents contents; contents.data = rawblock; - contents.cachable = true; Block reader(std::move(contents), kDisableGlobalSequenceNumber, kBytesPerBit, stats.get()); @@ -558,7 +552,6 @@ TEST_F(BlockTest, BlockWithReadAmpBitmap) { // create block reader BlockContents contents; contents.data = rawblock; - contents.cachable = true; Block reader(std::move(contents), kDisableGlobalSequenceNumber, kBytesPerBit, stats.get()); diff --git a/3rdParty/rocksdb/v5.16.X/table/bloom_block.cc b/3rdParty/rocksdb/v5.18.X/table/bloom_block.cc similarity index 100% rename from 3rdParty/rocksdb/v5.16.X/table/bloom_block.cc rename to 3rdParty/rocksdb/v5.18.X/table/bloom_block.cc diff --git a/3rdParty/rocksdb/v5.16.X/table/bloom_block.h b/3rdParty/rocksdb/v5.18.X/table/bloom_block.h similarity index 100% rename from 3rdParty/rocksdb/v5.16.X/table/bloom_block.h rename to 3rdParty/rocksdb/v5.18.X/table/bloom_block.h diff --git a/3rdParty/rocksdb/v5.16.X/table/cleanable_test.cc b/3rdParty/rocksdb/v5.18.X/table/cleanable_test.cc similarity index 100% rename from 3rdParty/rocksdb/v5.16.X/table/cleanable_test.cc rename to 3rdParty/rocksdb/v5.18.X/table/cleanable_test.cc diff --git a/3rdParty/rocksdb/v5.16.X/table/cuckoo_table_builder.cc b/3rdParty/rocksdb/v5.18.X/table/cuckoo_table_builder.cc similarity index 91% rename from 3rdParty/rocksdb/v5.16.X/table/cuckoo_table_builder.cc rename to 3rdParty/rocksdb/v5.18.X/table/cuckoo_table_builder.cc index 0da4d84ddc..f590e6ad40 100644 --- a/3rdParty/rocksdb/v5.16.X/table/cuckoo_table_builder.cc +++ b/3rdParty/rocksdb/v5.18.X/table/cuckoo_table_builder.cc @@ -164,9 +164,9 @@ bool CuckooTableBuilder::IsDeletedKey(uint64_t idx) const { Slice CuckooTableBuilder::GetKey(uint64_t idx) const { assert(closed_); if (IsDeletedKey(idx)) { - return Slice(&deleted_keys_[(idx - num_values_) * key_size_], key_size_); + return Slice(&deleted_keys_[static_cast((idx - num_values_) * key_size_)], static_cast(key_size_)); } - return Slice(&kvs_[idx * (key_size_ + value_size_)], key_size_); + return Slice(&kvs_[static_cast(idx * (key_size_ + value_size_))], static_cast(key_size_)); } Slice CuckooTableBuilder::GetUserKey(uint64_t idx) const { @@ -177,14 +177,14 @@ Slice CuckooTableBuilder::GetUserKey(uint64_t idx) const { Slice CuckooTableBuilder::GetValue(uint64_t idx) const { assert(closed_); if (IsDeletedKey(idx)) { - static std::string empty_value(value_size_, 'a'); + static std::string empty_value(static_cast(value_size_), 'a'); return Slice(empty_value); } - return Slice(&kvs_[idx * (key_size_ + value_size_) + key_size_], value_size_); + return Slice(&kvs_[static_cast(idx * (key_size_ + value_size_) + key_size_)], static_cast(value_size_)); } Status CuckooTableBuilder::MakeHashTable(std::vector* buckets) { - buckets->resize(hash_table_size_ + cuckoo_block_size_ - 1); + buckets->resize(static_cast(hash_table_size_ + cuckoo_block_size_ - 1)); uint32_t make_space_for_key_call_id = 0; for (uint32_t vector_idx = 0; vector_idx < num_entries_; vector_idx++) { uint64_t bucket_id = 0; @@ -200,13 +200,13 @@ Status CuckooTableBuilder::MakeHashTable(std::vector* buckets) { // stop searching and proceed for next hash function. for (uint32_t block_idx = 0; block_idx < cuckoo_block_size_; ++block_idx, ++hash_val) { - if ((*buckets)[hash_val].vector_idx == kMaxVectorIdx) { + if ((*buckets)[static_cast(hash_val)].vector_idx == kMaxVectorIdx) { bucket_id = hash_val; bucket_found = true; break; } else { if (ucomp_->Compare(user_key, - GetUserKey((*buckets)[hash_val].vector_idx)) == 0) { + GetUserKey((*buckets)[static_cast(hash_val)].vector_idx)) == 0) { return Status::NotSupported("Same key is being inserted again."); } hash_vals.push_back(hash_val); @@ -226,7 +226,7 @@ Status CuckooTableBuilder::MakeHashTable(std::vector* buckets) { ++num_hash_func_; for (uint32_t block_idx = 0; block_idx < cuckoo_block_size_; ++block_idx, ++hash_val) { - if ((*buckets)[hash_val].vector_idx == kMaxVectorIdx) { + if ((*buckets)[static_cast(hash_val)].vector_idx == kMaxVectorIdx) { bucket_found = true; bucket_id = hash_val; break; @@ -235,7 +235,7 @@ Status CuckooTableBuilder::MakeHashTable(std::vector* buckets) { } } } - (*buckets)[bucket_id].vector_idx = vector_idx; + (*buckets)[static_cast(bucket_id)].vector_idx = vector_idx; } return Status::OK(); } @@ -289,13 +289,14 @@ Status CuckooTableBuilder::Finish() { } } properties_.num_entries = num_entries_; + properties_.num_deletions = num_entries_ - num_values_; properties_.fixed_key_len = key_size_; properties_.user_collected_properties[ CuckooTablePropertyNames::kValueLength].assign( reinterpret_cast(&value_size_), sizeof(value_size_)); uint64_t bucket_size = key_size_ + value_size_; - unused_bucket.resize(bucket_size, 'a'); + unused_bucket.resize(static_cast(bucket_size), 'a'); // Write the table. uint32_t num_added = 0; for (auto& bucket : buckets) { @@ -320,7 +321,7 @@ Status CuckooTableBuilder::Finish() { uint64_t offset = buckets.size() * bucket_size; properties_.data_size = offset; - unused_bucket.resize(properties_.fixed_key_len); + unused_bucket.resize(static_cast(properties_.fixed_key_len)); properties_.user_collected_properties[ CuckooTablePropertyNames::kEmptyKey] = unused_bucket; properties_.user_collected_properties[ @@ -456,7 +457,7 @@ bool CuckooTableBuilder::MakeSpaceForKey( // no. of times this will be called is <= max_num_hash_func_ + num_entries_. for (uint32_t hash_cnt = 0; hash_cnt < num_hash_func_; ++hash_cnt) { uint64_t bid = hash_vals[hash_cnt]; - (*buckets)[bid].make_space_for_key_call_id = make_space_for_key_call_id; + (*buckets)[static_cast(bid)].make_space_for_key_call_id = make_space_for_key_call_id; tree.push_back(CuckooNode(bid, 0, 0)); } bool null_found = false; @@ -467,7 +468,7 @@ bool CuckooTableBuilder::MakeSpaceForKey( if (curr_depth >= max_search_depth_) { break; } - CuckooBucket& curr_bucket = (*buckets)[curr_node.bucket_id]; + CuckooBucket& curr_bucket = (*buckets)[static_cast(curr_node.bucket_id)]; for (uint32_t hash_cnt = 0; hash_cnt < num_hash_func_ && !null_found; ++hash_cnt) { uint64_t child_bucket_id = CuckooHash(GetUserKey(curr_bucket.vector_idx), @@ -476,15 +477,15 @@ bool CuckooTableBuilder::MakeSpaceForKey( // Iterate inside Cuckoo Block. for (uint32_t block_idx = 0; block_idx < cuckoo_block_size_; ++block_idx, ++child_bucket_id) { - if ((*buckets)[child_bucket_id].make_space_for_key_call_id == + if ((*buckets)[static_cast(child_bucket_id)].make_space_for_key_call_id == make_space_for_key_call_id) { continue; } - (*buckets)[child_bucket_id].make_space_for_key_call_id = + (*buckets)[static_cast(child_bucket_id)].make_space_for_key_call_id = make_space_for_key_call_id; tree.push_back(CuckooNode(child_bucket_id, curr_depth + 1, curr_pos)); - if ((*buckets)[child_bucket_id].vector_idx == kMaxVectorIdx) { + if ((*buckets)[static_cast(child_bucket_id)].vector_idx == kMaxVectorIdx) { null_found = true; break; } @@ -502,8 +503,8 @@ bool CuckooTableBuilder::MakeSpaceForKey( uint32_t bucket_to_replace_pos = static_cast(tree.size()) - 1; while (bucket_to_replace_pos >= num_hash_func_) { CuckooNode& curr_node = tree[bucket_to_replace_pos]; - (*buckets)[curr_node.bucket_id] = - (*buckets)[tree[curr_node.parent_pos].bucket_id]; + (*buckets)[static_cast(curr_node.bucket_id)] = + (*buckets)[static_cast(tree[curr_node.parent_pos].bucket_id)]; bucket_to_replace_pos = curr_node.parent_pos; } *bucket_id = tree[bucket_to_replace_pos].bucket_id; diff --git a/3rdParty/rocksdb/v5.16.X/table/cuckoo_table_builder.h b/3rdParty/rocksdb/v5.18.X/table/cuckoo_table_builder.h similarity index 100% rename from 3rdParty/rocksdb/v5.16.X/table/cuckoo_table_builder.h rename to 3rdParty/rocksdb/v5.18.X/table/cuckoo_table_builder.h diff --git a/3rdParty/rocksdb/v5.16.X/table/cuckoo_table_builder_test.cc b/3rdParty/rocksdb/v5.18.X/table/cuckoo_table_builder_test.cc similarity index 83% rename from 3rdParty/rocksdb/v5.16.X/table/cuckoo_table_builder_test.cc rename to 3rdParty/rocksdb/v5.18.X/table/cuckoo_table_builder_test.cc index 0d24da7194..c1e350327f 100644 --- a/3rdParty/rocksdb/v5.16.X/table/cuckoo_table_builder_test.cc +++ b/3rdParty/rocksdb/v5.18.X/table/cuckoo_table_builder_test.cc @@ -43,8 +43,15 @@ class CuckooBuilderTest : public testing::Test { std::string expected_unused_bucket, uint64_t expected_table_size, uint32_t expected_num_hash_func, bool expected_is_last_level, uint32_t expected_cuckoo_block_size = 1) { + uint64_t num_deletions = 0; + for (const auto& key : keys) { + ParsedInternalKey parsed; + if (ParseInternalKey(key, &parsed) && parsed.type == kTypeDeletion) { + num_deletions++; + } + } // Read file - unique_ptr read_file; + std::unique_ptr read_file; ASSERT_OK(env_->NewRandomAccessFile(fname, &read_file, env_options_)); uint64_t read_file_size; ASSERT_OK(env_->GetFileSize(fname, &read_file_size)); @@ -56,7 +63,7 @@ class CuckooBuilderTest : public testing::Test { // Assert Table Properties. TableProperties* props = nullptr; - unique_ptr file_reader( + std::unique_ptr file_reader( new RandomAccessFileReader(std::move(read_file), fname)); ASSERT_OK(ReadTableProperties(file_reader.get(), read_file_size, kCuckooTableMagicNumber, ioptions, @@ -90,6 +97,7 @@ class CuckooBuilderTest : public testing::Test { ASSERT_EQ(expected_is_last_level, is_last_level_found); ASSERT_EQ(props->num_entries, keys.size()); + ASSERT_EQ(props->num_deletions, num_deletions); ASSERT_EQ(props->fixed_key_len, keys.empty() ? 0 : keys[0].size()); ASSERT_EQ(props->data_size, expected_unused_bucket.size() * (expected_table_size + expected_cuckoo_block_size - 1)); @@ -126,9 +134,10 @@ class CuckooBuilderTest : public testing::Test { } } - std::string GetInternalKey(Slice user_key, bool zero_seqno) { + std::string GetInternalKey(Slice user_key, bool zero_seqno, + ValueType type = kTypeValue) { IterKey ikey; - ikey.SetInternalKey(user_key, zero_seqno ? 0 : 1000, kTypeValue); + ikey.SetInternalKey(user_key, zero_seqno ? 0 : 1000, type); return ikey.GetInternalKey().ToString(); } @@ -152,11 +161,11 @@ class CuckooBuilderTest : public testing::Test { }; TEST_F(CuckooBuilderTest, SuccessWithEmptyFile) { - unique_ptr writable_file; + std::unique_ptr writable_file; fname = test::PerThreadDBPath("EmptyFile"); ASSERT_OK(env_->NewWritableFile(fname, &writable_file, env_options_)); - unique_ptr file_writer( - new WritableFileWriter(std::move(writable_file), EnvOptions())); + std::unique_ptr file_writer( + new WritableFileWriter(std::move(writable_file), fname, EnvOptions())); CuckooTableBuilder builder(file_writer.get(), kHashTableRatio, 4, 100, BytewiseComparator(), 1, false, false, GetSliceHash, 0 /* column_family_id */, @@ -169,50 +178,57 @@ TEST_F(CuckooBuilderTest, SuccessWithEmptyFile) { } TEST_F(CuckooBuilderTest, WriteSuccessNoCollisionFullKey) { - uint32_t num_hash_fun = 4; - std::vector user_keys = {"key01", "key02", "key03", "key04"}; - std::vector values = {"v01", "v02", "v03", "v04"}; - // Need to have a temporary variable here as VS compiler does not currently - // support operator= with initializer_list as a parameter - std::unordered_map> hm = { - {user_keys[0], {0, 1, 2, 3}}, - {user_keys[1], {1, 2, 3, 4}}, - {user_keys[2], {2, 3, 4, 5}}, - {user_keys[3], {3, 4, 5, 6}}}; - hash_map = std::move(hm); + for (auto type : {kTypeValue, kTypeDeletion}) { + uint32_t num_hash_fun = 4; + std::vector user_keys = {"key01", "key02", "key03", "key04"}; + std::vector values; + if (type == kTypeValue) { + values = {"v01", "v02", "v03", "v04"}; + } else { + values = {"", "", "", ""}; + } + // Need to have a temporary variable here as VS compiler does not currently + // support operator= with initializer_list as a parameter + std::unordered_map> hm = { + {user_keys[0], {0, 1, 2, 3}}, + {user_keys[1], {1, 2, 3, 4}}, + {user_keys[2], {2, 3, 4, 5}}, + {user_keys[3], {3, 4, 5, 6}}}; + hash_map = std::move(hm); - std::vector expected_locations = {0, 1, 2, 3}; - std::vector keys; - for (auto& user_key : user_keys) { - keys.push_back(GetInternalKey(user_key, false)); - } - uint64_t expected_table_size = GetExpectedTableSize(keys.size()); + std::vector expected_locations = {0, 1, 2, 3}; + std::vector keys; + for (auto& user_key : user_keys) { + keys.push_back(GetInternalKey(user_key, false, type)); + } + uint64_t expected_table_size = GetExpectedTableSize(keys.size()); - unique_ptr writable_file; - fname = test::PerThreadDBPath("NoCollisionFullKey"); - ASSERT_OK(env_->NewWritableFile(fname, &writable_file, env_options_)); - unique_ptr file_writer( - new WritableFileWriter(std::move(writable_file), EnvOptions())); - CuckooTableBuilder builder(file_writer.get(), kHashTableRatio, num_hash_fun, - 100, BytewiseComparator(), 1, false, false, - GetSliceHash, 0 /* column_family_id */, - kDefaultColumnFamilyName); - ASSERT_OK(builder.status()); - for (uint32_t i = 0; i < user_keys.size(); i++) { - builder.Add(Slice(keys[i]), Slice(values[i])); - ASSERT_EQ(builder.NumEntries(), i + 1); + std::unique_ptr writable_file; + fname = test::PerThreadDBPath("NoCollisionFullKey"); + ASSERT_OK(env_->NewWritableFile(fname, &writable_file, env_options_)); + std::unique_ptr file_writer( + new WritableFileWriter(std::move(writable_file), fname, EnvOptions())); + CuckooTableBuilder builder(file_writer.get(), kHashTableRatio, num_hash_fun, + 100, BytewiseComparator(), 1, false, false, + GetSliceHash, 0 /* column_family_id */, + kDefaultColumnFamilyName); ASSERT_OK(builder.status()); - } - size_t bucket_size = keys[0].size() + values[0].size(); - ASSERT_EQ(expected_table_size * bucket_size - 1, builder.FileSize()); - ASSERT_OK(builder.Finish()); - ASSERT_OK(file_writer->Close()); - ASSERT_LE(expected_table_size * bucket_size, builder.FileSize()); + for (uint32_t i = 0; i < user_keys.size(); i++) { + builder.Add(Slice(keys[i]), Slice(values[i])); + ASSERT_EQ(builder.NumEntries(), i + 1); + ASSERT_OK(builder.status()); + } + size_t bucket_size = keys[0].size() + values[0].size(); + ASSERT_EQ(expected_table_size * bucket_size - 1, builder.FileSize()); + ASSERT_OK(builder.Finish()); + ASSERT_OK(file_writer->Close()); + ASSERT_LE(expected_table_size * bucket_size, builder.FileSize()); - std::string expected_unused_bucket = GetInternalKey("key00", true); - expected_unused_bucket += std::string(values[0].size(), 'a'); - CheckFileContents(keys, values, expected_locations, - expected_unused_bucket, expected_table_size, 2, false); + std::string expected_unused_bucket = GetInternalKey("key00", true); + expected_unused_bucket += std::string(values[0].size(), 'a'); + CheckFileContents(keys, values, expected_locations, expected_unused_bucket, + expected_table_size, 2, false); + } } TEST_F(CuckooBuilderTest, WriteSuccessWithCollisionFullKey) { @@ -236,11 +252,11 @@ TEST_F(CuckooBuilderTest, WriteSuccessWithCollisionFullKey) { } uint64_t expected_table_size = GetExpectedTableSize(keys.size()); - unique_ptr writable_file; + std::unique_ptr writable_file; fname = test::PerThreadDBPath("WithCollisionFullKey"); ASSERT_OK(env_->NewWritableFile(fname, &writable_file, env_options_)); - unique_ptr file_writer( - new WritableFileWriter(std::move(writable_file), EnvOptions())); + std::unique_ptr file_writer( + new WritableFileWriter(std::move(writable_file), fname, EnvOptions())); CuckooTableBuilder builder(file_writer.get(), kHashTableRatio, num_hash_fun, 100, BytewiseComparator(), 1, false, false, GetSliceHash, 0 /* column_family_id */, @@ -284,12 +300,12 @@ TEST_F(CuckooBuilderTest, WriteSuccessWithCollisionAndCuckooBlock) { } uint64_t expected_table_size = GetExpectedTableSize(keys.size()); - unique_ptr writable_file; + std::unique_ptr writable_file; uint32_t cuckoo_block_size = 2; fname = test::PerThreadDBPath("WithCollisionFullKey2"); ASSERT_OK(env_->NewWritableFile(fname, &writable_file, env_options_)); - unique_ptr file_writer( - new WritableFileWriter(std::move(writable_file), EnvOptions())); + std::unique_ptr file_writer( + new WritableFileWriter(std::move(writable_file), fname, EnvOptions())); CuckooTableBuilder builder( file_writer.get(), kHashTableRatio, num_hash_fun, 100, BytewiseComparator(), cuckoo_block_size, false, false, GetSliceHash, @@ -338,11 +354,11 @@ TEST_F(CuckooBuilderTest, WithCollisionPathFullKey) { } uint64_t expected_table_size = GetExpectedTableSize(keys.size()); - unique_ptr writable_file; + std::unique_ptr writable_file; fname = test::PerThreadDBPath("WithCollisionPathFullKey"); ASSERT_OK(env_->NewWritableFile(fname, &writable_file, env_options_)); - unique_ptr file_writer( - new WritableFileWriter(std::move(writable_file), EnvOptions())); + std::unique_ptr file_writer( + new WritableFileWriter(std::move(writable_file), fname, EnvOptions())); CuckooTableBuilder builder(file_writer.get(), kHashTableRatio, num_hash_fun, 100, BytewiseComparator(), 1, false, false, GetSliceHash, 0 /* column_family_id */, @@ -388,11 +404,11 @@ TEST_F(CuckooBuilderTest, WithCollisionPathFullKeyAndCuckooBlock) { } uint64_t expected_table_size = GetExpectedTableSize(keys.size()); - unique_ptr writable_file; + std::unique_ptr writable_file; fname = test::PerThreadDBPath("WithCollisionPathFullKeyAndCuckooBlock"); ASSERT_OK(env_->NewWritableFile(fname, &writable_file, env_options_)); - unique_ptr file_writer( - new WritableFileWriter(std::move(writable_file), EnvOptions())); + std::unique_ptr file_writer( + new WritableFileWriter(std::move(writable_file), fname, EnvOptions())); CuckooTableBuilder builder(file_writer.get(), kHashTableRatio, num_hash_fun, 100, BytewiseComparator(), 2, false, false, GetSliceHash, 0 /* column_family_id */, @@ -431,11 +447,11 @@ TEST_F(CuckooBuilderTest, WriteSuccessNoCollisionUserKey) { std::vector expected_locations = {0, 1, 2, 3}; uint64_t expected_table_size = GetExpectedTableSize(user_keys.size()); - unique_ptr writable_file; + std::unique_ptr writable_file; fname = test::PerThreadDBPath("NoCollisionUserKey"); ASSERT_OK(env_->NewWritableFile(fname, &writable_file, env_options_)); - unique_ptr file_writer( - new WritableFileWriter(std::move(writable_file), EnvOptions())); + std::unique_ptr file_writer( + new WritableFileWriter(std::move(writable_file), fname, EnvOptions())); CuckooTableBuilder builder(file_writer.get(), kHashTableRatio, num_hash_fun, 100, BytewiseComparator(), 1, false, false, GetSliceHash, 0 /* column_family_id */, @@ -475,11 +491,11 @@ TEST_F(CuckooBuilderTest, WriteSuccessWithCollisionUserKey) { std::vector expected_locations = {0, 1, 2, 3}; uint64_t expected_table_size = GetExpectedTableSize(user_keys.size()); - unique_ptr writable_file; + std::unique_ptr writable_file; fname = test::PerThreadDBPath("WithCollisionUserKey"); ASSERT_OK(env_->NewWritableFile(fname, &writable_file, env_options_)); - unique_ptr file_writer( - new WritableFileWriter(std::move(writable_file), EnvOptions())); + std::unique_ptr file_writer( + new WritableFileWriter(std::move(writable_file), fname, EnvOptions())); CuckooTableBuilder builder(file_writer.get(), kHashTableRatio, num_hash_fun, 100, BytewiseComparator(), 1, false, false, GetSliceHash, 0 /* column_family_id */, @@ -521,11 +537,11 @@ TEST_F(CuckooBuilderTest, WithCollisionPathUserKey) { std::vector expected_locations = {0, 1, 3, 4, 2}; uint64_t expected_table_size = GetExpectedTableSize(user_keys.size()); - unique_ptr writable_file; + std::unique_ptr writable_file; fname = test::PerThreadDBPath("WithCollisionPathUserKey"); ASSERT_OK(env_->NewWritableFile(fname, &writable_file, env_options_)); - unique_ptr file_writer( - new WritableFileWriter(std::move(writable_file), EnvOptions())); + std::unique_ptr file_writer( + new WritableFileWriter(std::move(writable_file), fname, EnvOptions())); CuckooTableBuilder builder(file_writer.get(), kHashTableRatio, num_hash_fun, 2, BytewiseComparator(), 1, false, false, GetSliceHash, 0 /* column_family_id */, @@ -566,11 +582,11 @@ TEST_F(CuckooBuilderTest, FailWhenCollisionPathTooLong) { }; hash_map = std::move(hm); - unique_ptr writable_file; + std::unique_ptr writable_file; fname = test::PerThreadDBPath("WithCollisionPathUserKey"); ASSERT_OK(env_->NewWritableFile(fname, &writable_file, env_options_)); - unique_ptr file_writer( - new WritableFileWriter(std::move(writable_file), EnvOptions())); + std::unique_ptr file_writer( + new WritableFileWriter(std::move(writable_file), fname, EnvOptions())); CuckooTableBuilder builder(file_writer.get(), kHashTableRatio, num_hash_fun, 2, BytewiseComparator(), 1, false, false, GetSliceHash, 0 /* column_family_id */, @@ -594,11 +610,11 @@ TEST_F(CuckooBuilderTest, FailWhenSameKeyInserted) { uint32_t num_hash_fun = 4; std::string user_key = "repeatedkey"; - unique_ptr writable_file; + std::unique_ptr writable_file; fname = test::PerThreadDBPath("FailWhenSameKeyInserted"); ASSERT_OK(env_->NewWritableFile(fname, &writable_file, env_options_)); - unique_ptr file_writer( - new WritableFileWriter(std::move(writable_file), EnvOptions())); + std::unique_ptr file_writer( + new WritableFileWriter(std::move(writable_file), fname, EnvOptions())); CuckooTableBuilder builder(file_writer.get(), kHashTableRatio, num_hash_fun, 100, BytewiseComparator(), 1, false, false, GetSliceHash, 0 /* column_family_id */, diff --git a/3rdParty/rocksdb/v5.16.X/table/cuckoo_table_factory.cc b/3rdParty/rocksdb/v5.18.X/table/cuckoo_table_factory.cc similarity index 97% rename from 3rdParty/rocksdb/v5.16.X/table/cuckoo_table_factory.cc rename to 3rdParty/rocksdb/v5.18.X/table/cuckoo_table_factory.cc index 84d22468eb..74d18d5121 100644 --- a/3rdParty/rocksdb/v5.16.X/table/cuckoo_table_factory.cc +++ b/3rdParty/rocksdb/v5.18.X/table/cuckoo_table_factory.cc @@ -14,7 +14,7 @@ namespace rocksdb { Status CuckooTableFactory::NewTableReader( const TableReaderOptions& table_reader_options, - unique_ptr&& file, uint64_t file_size, + std::unique_ptr&& file, uint64_t file_size, std::unique_ptr* table, bool /*prefetch_index_and_filter_in_cache*/) const { std::unique_ptr new_reader(new CuckooTableReader( diff --git a/3rdParty/rocksdb/v5.16.X/table/cuckoo_table_factory.h b/3rdParty/rocksdb/v5.18.X/table/cuckoo_table_factory.h similarity index 96% rename from 3rdParty/rocksdb/v5.16.X/table/cuckoo_table_factory.h rename to 3rdParty/rocksdb/v5.18.X/table/cuckoo_table_factory.h index a96635de57..eb3c5e5176 100644 --- a/3rdParty/rocksdb/v5.16.X/table/cuckoo_table_factory.h +++ b/3rdParty/rocksdb/v5.18.X/table/cuckoo_table_factory.h @@ -60,8 +60,8 @@ class CuckooTableFactory : public TableFactory { Status NewTableReader( const TableReaderOptions& table_reader_options, - unique_ptr&& file, uint64_t file_size, - unique_ptr* table, + std::unique_ptr&& file, uint64_t file_size, + std::unique_ptr* table, bool prefetch_index_and_filter_in_cache = true) const override; TableBuilder* NewTableBuilder( diff --git a/3rdParty/rocksdb/v5.16.X/table/cuckoo_table_reader.cc b/3rdParty/rocksdb/v5.18.X/table/cuckoo_table_reader.cc similarity index 98% rename from 3rdParty/rocksdb/v5.16.X/table/cuckoo_table_reader.cc rename to 3rdParty/rocksdb/v5.18.X/table/cuckoo_table_reader.cc index fb14b17595..be7b1ffa9d 100644 --- a/3rdParty/rocksdb/v5.16.X/table/cuckoo_table_reader.cc +++ b/3rdParty/rocksdb/v5.18.X/table/cuckoo_table_reader.cc @@ -136,7 +136,7 @@ CuckooTableReader::CuckooTableReader( cuckoo_block_size_ = *reinterpret_cast( cuckoo_block_size->second.data()); cuckoo_block_bytes_minus_one_ = cuckoo_block_size_ * bucket_length_ - 1; - status_ = file_->Read(0, file_size, &file_data_, nullptr); + status_ = file_->Read(0, static_cast(file_size), &file_data_, nullptr); } Status CuckooTableReader::Get(const ReadOptions& /*readOptions*/, @@ -268,7 +268,7 @@ void CuckooTableIterator::InitIfNeeded() { if (initialized_) { return; } - sorted_bucket_ids_.reserve(reader_->GetTableProperties()->num_entries); + sorted_bucket_ids_.reserve(static_cast(reader_->GetTableProperties()->num_entries)); uint64_t num_buckets = reader_->table_size_ + reader_->cuckoo_block_size_ - 1; assert(num_buckets < kInvalidIndex); const char* bucket = reader_->file_data_.data(); diff --git a/3rdParty/rocksdb/v5.16.X/table/cuckoo_table_reader.h b/3rdParty/rocksdb/v5.18.X/table/cuckoo_table_reader.h similarity index 100% rename from 3rdParty/rocksdb/v5.16.X/table/cuckoo_table_reader.h rename to 3rdParty/rocksdb/v5.18.X/table/cuckoo_table_reader.h diff --git a/3rdParty/rocksdb/v5.16.X/table/cuckoo_table_reader_test.cc b/3rdParty/rocksdb/v5.18.X/table/cuckoo_table_reader_test.cc similarity index 97% rename from 3rdParty/rocksdb/v5.16.X/table/cuckoo_table_reader_test.cc rename to 3rdParty/rocksdb/v5.18.X/table/cuckoo_table_reader_test.cc index 3aeca5e132..74fb52e6c7 100644 --- a/3rdParty/rocksdb/v5.16.X/table/cuckoo_table_reader_test.cc +++ b/3rdParty/rocksdb/v5.18.X/table/cuckoo_table_reader_test.cc @@ -95,8 +95,8 @@ class CuckooReaderTest : public testing::Test { const Comparator* ucomp = BytewiseComparator()) { std::unique_ptr writable_file; ASSERT_OK(env->NewWritableFile(fname, &writable_file, env_options)); - unique_ptr file_writer( - new WritableFileWriter(std::move(writable_file), env_options)); + std::unique_ptr file_writer( + new WritableFileWriter(std::move(writable_file), fname, env_options)); CuckooTableBuilder builder( file_writer.get(), 0.9, kNumHashFunc, 100, ucomp, 2, false, false, @@ -115,7 +115,7 @@ class CuckooReaderTest : public testing::Test { // Check reader now. std::unique_ptr read_file; ASSERT_OK(env->NewRandomAccessFile(fname, &read_file, env_options)); - unique_ptr file_reader( + std::unique_ptr file_reader( new RandomAccessFileReader(std::move(read_file), fname)); const ImmutableCFOptions ioptions(options); CuckooTableReader reader(ioptions, std::move(file_reader), file_size, ucomp, @@ -144,7 +144,7 @@ class CuckooReaderTest : public testing::Test { void CheckIterator(const Comparator* ucomp = BytewiseComparator()) { std::unique_ptr read_file; ASSERT_OK(env->NewRandomAccessFile(fname, &read_file, env_options)); - unique_ptr file_reader( + std::unique_ptr file_reader( new RandomAccessFileReader(std::move(read_file), fname)); const ImmutableCFOptions ioptions(options); CuckooTableReader reader(ioptions, std::move(file_reader), file_size, ucomp, @@ -323,7 +323,7 @@ TEST_F(CuckooReaderTest, WhenKeyNotFound) { CreateCuckooFileAndCheckReader(); std::unique_ptr read_file; ASSERT_OK(env->NewRandomAccessFile(fname, &read_file, env_options)); - unique_ptr file_reader( + std::unique_ptr file_reader( new RandomAccessFileReader(std::move(read_file), fname)); const ImmutableCFOptions ioptions(options); CuckooTableReader reader(ioptions, std::move(file_reader), file_size, ucmp, @@ -411,8 +411,8 @@ void WriteFile(const std::vector& keys, std::unique_ptr writable_file; ASSERT_OK(env->NewWritableFile(fname, &writable_file, env_options)); - unique_ptr file_writer( - new WritableFileWriter(std::move(writable_file), env_options)); + std::unique_ptr file_writer( + new WritableFileWriter(std::move(writable_file), fname, env_options)); CuckooTableBuilder builder( file_writer.get(), hash_ratio, 64, 1000, test::Uint64Comparator(), 5, false, FLAGS_identity_as_first_hash, nullptr, 0 /* column_family_id */, @@ -432,7 +432,7 @@ void WriteFile(const std::vector& keys, env->GetFileSize(fname, &file_size); std::unique_ptr read_file; ASSERT_OK(env->NewRandomAccessFile(fname, &read_file, env_options)); - unique_ptr file_reader( + std::unique_ptr file_reader( new RandomAccessFileReader(std::move(read_file), fname)); const ImmutableCFOptions ioptions(options); @@ -464,7 +464,7 @@ void ReadKeys(uint64_t num, uint32_t batch_size) { env->GetFileSize(fname, &file_size); std::unique_ptr read_file; ASSERT_OK(env->NewRandomAccessFile(fname, &read_file, env_options)); - unique_ptr file_reader( + std::unique_ptr file_reader( new RandomAccessFileReader(std::move(read_file), fname)); const ImmutableCFOptions ioptions(options); diff --git a/3rdParty/rocksdb/v5.16.X/table/data_block_footer.cc b/3rdParty/rocksdb/v5.18.X/table/data_block_footer.cc similarity index 100% rename from 3rdParty/rocksdb/v5.16.X/table/data_block_footer.cc rename to 3rdParty/rocksdb/v5.18.X/table/data_block_footer.cc diff --git a/3rdParty/rocksdb/v5.16.X/table/data_block_footer.h b/3rdParty/rocksdb/v5.18.X/table/data_block_footer.h similarity index 100% rename from 3rdParty/rocksdb/v5.16.X/table/data_block_footer.h rename to 3rdParty/rocksdb/v5.18.X/table/data_block_footer.h diff --git a/3rdParty/rocksdb/v5.16.X/table/data_block_hash_index.cc b/3rdParty/rocksdb/v5.18.X/table/data_block_hash_index.cc similarity index 100% rename from 3rdParty/rocksdb/v5.16.X/table/data_block_hash_index.cc rename to 3rdParty/rocksdb/v5.18.X/table/data_block_hash_index.cc diff --git a/3rdParty/rocksdb/v5.16.X/table/data_block_hash_index.h b/3rdParty/rocksdb/v5.18.X/table/data_block_hash_index.h similarity index 100% rename from 3rdParty/rocksdb/v5.16.X/table/data_block_hash_index.h rename to 3rdParty/rocksdb/v5.18.X/table/data_block_hash_index.h diff --git a/3rdParty/rocksdb/v5.16.X/table/data_block_hash_index_test.cc b/3rdParty/rocksdb/v5.18.X/table/data_block_hash_index_test.cc similarity index 98% rename from 3rdParty/rocksdb/v5.16.X/table/data_block_hash_index_test.cc rename to 3rdParty/rocksdb/v5.18.X/table/data_block_hash_index_test.cc index c2b059893f..ac12bbf935 100644 --- a/3rdParty/rocksdb/v5.16.X/table/data_block_hash_index_test.cc +++ b/3rdParty/rocksdb/v5.18.X/table/data_block_hash_index_test.cc @@ -7,12 +7,14 @@ #include #include +#include "db/table_properties_collector.h" #include "rocksdb/slice.h" #include "table/block.h" #include "table/block_based_table_reader.h" #include "table/block_builder.h" #include "table/data_block_hash_index.h" #include "table/get_context.h" +#include "table/table_builder.h" #include "util/testharness.h" #include "util/testutil.h" @@ -282,7 +284,6 @@ TEST(DataBlockHashIndex, BlockRestartIndexExceedMax) { // create block reader BlockContents contents; contents.data = rawblock; - contents.cachable = false; Block reader(std::move(contents), kDisableGlobalSequenceNumber); ASSERT_EQ(reader.IndexType(), @@ -305,7 +306,6 @@ TEST(DataBlockHashIndex, BlockRestartIndexExceedMax) { // create block reader BlockContents contents; contents.data = rawblock; - contents.cachable = false; Block reader(std::move(contents), kDisableGlobalSequenceNumber); ASSERT_EQ(reader.IndexType(), @@ -337,7 +337,6 @@ TEST(DataBlockHashIndex, BlockSizeExceedMax) { // create block reader BlockContents contents; contents.data = rawblock; - contents.cachable = false; Block reader(std::move(contents), kDisableGlobalSequenceNumber); ASSERT_EQ(reader.IndexType(), @@ -362,7 +361,6 @@ TEST(DataBlockHashIndex, BlockSizeExceedMax) { // create block reader BlockContents contents; contents.data = rawblock; - contents.cachable = false; Block reader(std::move(contents), kDisableGlobalSequenceNumber); // the index type have fallen back to binary when build finish. @@ -390,7 +388,6 @@ TEST(DataBlockHashIndex, BlockTestSingleKey) { // create block reader BlockContents contents; contents.data = rawblock; - contents.cachable = false; Block reader(std::move(contents), kDisableGlobalSequenceNumber); const InternalKeyComparator icmp(BytewiseComparator()); @@ -472,7 +469,6 @@ TEST(DataBlockHashIndex, BlockTestLarge) { // create block reader BlockContents contents; contents.data = rawblock; - contents.cachable = false; Block reader(std::move(contents), kDisableGlobalSequenceNumber); const InternalKeyComparator icmp(BytewiseComparator()); @@ -540,9 +536,9 @@ TEST(DataBlockHashIndex, BlockTestLarge) { void TestBoundary(InternalKey& ik1, std::string& v1, InternalKey& ik2, std::string& v2, InternalKey& seek_ikey, GetContext& get_context, Options& options) { - unique_ptr file_writer; - unique_ptr file_reader; - unique_ptr table_reader; + std::unique_ptr file_writer; + std::unique_ptr file_reader; + std::unique_ptr table_reader; int level_ = -1; std::vector keys; @@ -553,8 +549,9 @@ void TestBoundary(InternalKey& ik1, std::string& v1, InternalKey& ik2, EnvOptions soptions; soptions.use_mmap_reads = ioptions.allow_mmap_reads; - file_writer.reset(test::GetWritableFileWriter(new test::StringSink())); - unique_ptr builder; + file_writer.reset( + test::GetWritableFileWriter(new test::StringSink(), "" /* don't care */)); + std::unique_ptr builder; std::vector> int_tbl_prop_collector_factories; std::string column_family_name; diff --git a/3rdParty/rocksdb/v5.16.X/table/filter_block.h b/3rdParty/rocksdb/v5.18.X/table/filter_block.h similarity index 100% rename from 3rdParty/rocksdb/v5.16.X/table/filter_block.h rename to 3rdParty/rocksdb/v5.18.X/table/filter_block.h diff --git a/3rdParty/rocksdb/v5.16.X/table/flush_block_policy.cc b/3rdParty/rocksdb/v5.18.X/table/flush_block_policy.cc similarity index 100% rename from 3rdParty/rocksdb/v5.16.X/table/flush_block_policy.cc rename to 3rdParty/rocksdb/v5.18.X/table/flush_block_policy.cc diff --git a/3rdParty/rocksdb/v5.16.X/table/format.cc b/3rdParty/rocksdb/v5.18.X/table/format.cc similarity index 90% rename from 3rdParty/rocksdb/v5.16.X/table/format.cc rename to 3rdParty/rocksdb/v5.18.X/table/format.cc index a4e448870b..0e43e82433 100644 --- a/3rdParty/rocksdb/v5.16.X/table/format.cc +++ b/3rdParty/rocksdb/v5.18.X/table/format.cc @@ -24,6 +24,7 @@ #include "util/crc32c.h" #include "util/file_reader_writer.h" #include "util/logging.h" +#include "util/memory_allocator.h" #include "util/stop_watch.h" #include "util/string_util.h" #include "util/xxhash.h" @@ -54,13 +55,6 @@ void BlockHandle::EncodeTo(std::string* dst) const { PutVarint64Varint64(dst, offset_, size_); } -void BlockHandle::EncodeSizeTo(std::string* dst) const { - // Sanity check that all fields have been set - assert(offset_ != ~static_cast(0)); - assert(size_ != ~static_cast(0)); - PutVarint64(dst, size_); -} - Status BlockHandle::DecodeFrom(Slice* input) { if (GetVarint64(input, &offset_) && GetVarint64(input, &size_)) { @@ -286,8 +280,8 @@ Status ReadFooterFromFile(RandomAccessFileReader* file, Status UncompressBlockContentsForCompressionType( const UncompressionContext& uncompression_ctx, const char* data, size_t n, BlockContents* contents, uint32_t format_version, - const ImmutableCFOptions& ioptions) { - std::unique_ptr ubuf; + const ImmutableCFOptions& ioptions, MemoryAllocator* allocator) { + CacheAllocationPtr ubuf; assert(uncompression_ctx.type() != kNoCompression && "Invalid compression type"); @@ -303,81 +297,82 @@ Status UncompressBlockContentsForCompressionType( if (!Snappy_GetUncompressedLength(data, n, &ulength)) { return Status::Corruption(snappy_corrupt_msg); } - ubuf.reset(new char[ulength]); + ubuf = AllocateBlock(ulength, allocator); if (!Snappy_Uncompress(data, n, ubuf.get())) { return Status::Corruption(snappy_corrupt_msg); } - *contents = BlockContents(std::move(ubuf), ulength, true, kNoCompression); + *contents = BlockContents(std::move(ubuf), ulength); break; } case kZlibCompression: - ubuf.reset(Zlib_Uncompress( + ubuf = Zlib_Uncompress( uncompression_ctx, data, n, &decompress_size, - GetCompressFormatForVersion(kZlibCompression, format_version))); + GetCompressFormatForVersion(kZlibCompression, format_version), + allocator); if (!ubuf) { static char zlib_corrupt_msg[] = "Zlib not supported or corrupted Zlib compressed block contents"; return Status::Corruption(zlib_corrupt_msg); } - *contents = - BlockContents(std::move(ubuf), decompress_size, true, kNoCompression); + *contents = BlockContents(std::move(ubuf), decompress_size); break; case kBZip2Compression: - ubuf.reset(BZip2_Uncompress( + ubuf = BZip2_Uncompress( data, n, &decompress_size, - GetCompressFormatForVersion(kBZip2Compression, format_version))); + GetCompressFormatForVersion(kBZip2Compression, format_version), + allocator); if (!ubuf) { static char bzip2_corrupt_msg[] = "Bzip2 not supported or corrupted Bzip2 compressed block contents"; return Status::Corruption(bzip2_corrupt_msg); } - *contents = - BlockContents(std::move(ubuf), decompress_size, true, kNoCompression); + *contents = BlockContents(std::move(ubuf), decompress_size); break; case kLZ4Compression: - ubuf.reset(LZ4_Uncompress( + ubuf = LZ4_Uncompress( uncompression_ctx, data, n, &decompress_size, - GetCompressFormatForVersion(kLZ4Compression, format_version))); + GetCompressFormatForVersion(kLZ4Compression, format_version), + allocator); if (!ubuf) { static char lz4_corrupt_msg[] = "LZ4 not supported or corrupted LZ4 compressed block contents"; return Status::Corruption(lz4_corrupt_msg); } - *contents = - BlockContents(std::move(ubuf), decompress_size, true, kNoCompression); + *contents = BlockContents(std::move(ubuf), decompress_size); break; case kLZ4HCCompression: - ubuf.reset(LZ4_Uncompress( + ubuf = LZ4_Uncompress( uncompression_ctx, data, n, &decompress_size, - GetCompressFormatForVersion(kLZ4HCCompression, format_version))); + GetCompressFormatForVersion(kLZ4HCCompression, format_version), + allocator); if (!ubuf) { static char lz4hc_corrupt_msg[] = "LZ4HC not supported or corrupted LZ4HC compressed block contents"; return Status::Corruption(lz4hc_corrupt_msg); } - *contents = - BlockContents(std::move(ubuf), decompress_size, true, kNoCompression); + *contents = BlockContents(std::move(ubuf), decompress_size); break; case kXpressCompression: + // XPRESS allocates memory internally, thus no support for custom + // allocator. ubuf.reset(XPRESS_Uncompress(data, n, &decompress_size)); if (!ubuf) { static char xpress_corrupt_msg[] = "XPRESS not supported or corrupted XPRESS compressed block contents"; return Status::Corruption(xpress_corrupt_msg); } - *contents = - BlockContents(std::move(ubuf), decompress_size, true, kNoCompression); + *contents = BlockContents(std::move(ubuf), decompress_size); break; case kZSTD: case kZSTDNotFinalCompression: - ubuf.reset(ZSTD_Uncompress(uncompression_ctx, data, n, &decompress_size)); + ubuf = ZSTD_Uncompress(uncompression_ctx, data, n, &decompress_size, + allocator); if (!ubuf) { static char zstd_corrupt_msg[] = "ZSTD not supported or corrupted ZSTD compressed block contents"; return Status::Corruption(zstd_corrupt_msg); } - *contents = - BlockContents(std::move(ubuf), decompress_size, true, kNoCompression); + *contents = BlockContents(std::move(ubuf), decompress_size); break; default: return Status::Corruption("bad block type"); @@ -403,11 +398,13 @@ Status UncompressBlockContentsForCompressionType( Status UncompressBlockContents(const UncompressionContext& uncompression_ctx, const char* data, size_t n, BlockContents* contents, uint32_t format_version, - const ImmutableCFOptions& ioptions) { + const ImmutableCFOptions& ioptions, + MemoryAllocator* allocator) { assert(data[n] != kNoCompression); assert(data[n] == uncompression_ctx.type()); - return UncompressBlockContentsForCompressionType( - uncompression_ctx, data, n, contents, format_version, ioptions); + return UncompressBlockContentsForCompressionType(uncompression_ctx, data, n, + contents, format_version, + ioptions, allocator); } } // namespace rocksdb diff --git a/3rdParty/rocksdb/v5.16.X/table/format.h b/3rdParty/rocksdb/v5.18.X/table/format.h similarity index 85% rename from 3rdParty/rocksdb/v5.16.X/table/format.h rename to 3rdParty/rocksdb/v5.18.X/table/format.h index ebc9c25397..ae2bdafcc1 100644 --- a/3rdParty/rocksdb/v5.16.X/table/format.h +++ b/3rdParty/rocksdb/v5.18.X/table/format.h @@ -26,6 +26,7 @@ #include "port/port.h" // noexcept #include "table/persistent_cache_options.h" #include "util/file_reader_writer.h" +#include "util/memory_allocator.h" namespace rocksdb { @@ -55,7 +56,6 @@ class BlockHandle { void EncodeTo(std::string* dst) const; Status DecodeFrom(Slice* input); Status DecodeSizeFrom(uint64_t offset, Slice* input); - void EncodeSizeTo(std::string* dst) const; // Return a string that contains the copy of handle. std::string ToString(bool hex = true) const; @@ -189,28 +189,50 @@ Status ReadFooterFromFile(RandomAccessFileReader* file, // 1-byte type + 32-bit crc static const size_t kBlockTrailerSize = 5; +inline CompressionType get_block_compression_type(const char* block_data, + size_t block_size) { + return static_cast(block_data[block_size]); +} + struct BlockContents { Slice data; // Actual contents of data - bool cachable; // True iff data can be cached - CompressionType compression_type; - std::unique_ptr allocation; + CacheAllocationPtr allocation; - BlockContents() : cachable(false), compression_type(kNoCompression) {} +#ifndef NDEBUG + // Whether the block is a raw block, which contains compression type + // byte. It is only used for assertion. + bool is_raw_block = false; +#endif // NDEBUG - BlockContents(const Slice& _data, bool _cachable, - CompressionType _compression_type) - : data(_data), cachable(_cachable), compression_type(_compression_type) {} + BlockContents() {} - BlockContents(std::unique_ptr&& _data, size_t _size, bool _cachable, - CompressionType _compression_type) - : data(_data.get(), _size), - cachable(_cachable), - compression_type(_compression_type), - allocation(std::move(_data)) {} + BlockContents(const Slice& _data) : data(_data) {} + + BlockContents(CacheAllocationPtr&& _data, size_t _size) + : data(_data.get(), _size), allocation(std::move(_data)) {} + + BlockContents(std::unique_ptr&& _data, size_t _size) + : data(_data.get(), _size) { + allocation.reset(_data.release()); + } + + bool own_bytes() const { return allocation.get() != nullptr; } + + // It's the caller's responsibility to make sure that this is + // for raw block contents, which contains the compression + // byte in the end. + CompressionType get_compression_type() const { + assert(is_raw_block); + return get_block_compression_type(data.data(), data.size()); + } // The additional memory space taken by the block data. size_t usable_size() const { if (allocation.get() != nullptr) { + auto allocator = allocation.get_deleter().allocator; + if (allocator) { + return allocator->UsableSize(allocation.get(), data.size()); + } #ifdef ROCKSDB_MALLOC_USABLE_SIZE return malloc_usable_size(allocation.get()); #else @@ -221,15 +243,20 @@ struct BlockContents { } } + size_t ApproximateMemoryUsage() const { + return usable_size() + sizeof(*this); + } + BlockContents(BlockContents&& other) ROCKSDB_NOEXCEPT { *this = std::move(other); } BlockContents& operator=(BlockContents&& other) { data = std::move(other.data); - cachable = other.cachable; - compression_type = other.compression_type; allocation = std::move(other.allocation); +#ifndef NDEBUG + is_raw_block = other.is_raw_block; +#endif // NDEBUG return *this; } }; @@ -253,7 +280,7 @@ extern Status ReadBlockContents( extern Status UncompressBlockContents( const UncompressionContext& uncompression_ctx, const char* data, size_t n, BlockContents* contents, uint32_t compress_format_version, - const ImmutableCFOptions& ioptions); + const ImmutableCFOptions& ioptions, MemoryAllocator* allocator = nullptr); // This is an extension to UncompressBlockContents that accepts // a specific compression type. This is used by un-wrapped blocks @@ -261,7 +288,7 @@ extern Status UncompressBlockContents( extern Status UncompressBlockContentsForCompressionType( const UncompressionContext& uncompression_ctx, const char* data, size_t n, BlockContents* contents, uint32_t compress_format_version, - const ImmutableCFOptions& ioptions); + const ImmutableCFOptions& ioptions, MemoryAllocator* allocator = nullptr); // Implementation details follow. Clients should ignore, diff --git a/3rdParty/rocksdb/v5.16.X/table/full_filter_bits_builder.h b/3rdParty/rocksdb/v5.18.X/table/full_filter_bits_builder.h similarity index 100% rename from 3rdParty/rocksdb/v5.16.X/table/full_filter_bits_builder.h rename to 3rdParty/rocksdb/v5.18.X/table/full_filter_bits_builder.h diff --git a/3rdParty/rocksdb/v5.16.X/table/full_filter_block.cc b/3rdParty/rocksdb/v5.18.X/table/full_filter_block.cc similarity index 100% rename from 3rdParty/rocksdb/v5.16.X/table/full_filter_block.cc rename to 3rdParty/rocksdb/v5.18.X/table/full_filter_block.cc diff --git a/3rdParty/rocksdb/v5.16.X/table/full_filter_block.h b/3rdParty/rocksdb/v5.18.X/table/full_filter_block.h similarity index 100% rename from 3rdParty/rocksdb/v5.16.X/table/full_filter_block.h rename to 3rdParty/rocksdb/v5.18.X/table/full_filter_block.h diff --git a/3rdParty/rocksdb/v5.16.X/table/full_filter_block_test.cc b/3rdParty/rocksdb/v5.18.X/table/full_filter_block_test.cc similarity index 100% rename from 3rdParty/rocksdb/v5.16.X/table/full_filter_block_test.cc rename to 3rdParty/rocksdb/v5.18.X/table/full_filter_block_test.cc diff --git a/3rdParty/rocksdb/v5.16.X/table/get_context.cc b/3rdParty/rocksdb/v5.18.X/table/get_context.cc similarity index 97% rename from 3rdParty/rocksdb/v5.16.X/table/get_context.cc rename to 3rdParty/rocksdb/v5.18.X/table/get_context.cc index 0aa75b6079..6f0bd2ebbc 100644 --- a/3rdParty/rocksdb/v5.16.X/table/get_context.cc +++ b/3rdParty/rocksdb/v5.18.X/table/get_context.cc @@ -43,7 +43,7 @@ GetContext::GetContext(const Comparator* ucmp, Statistics* statistics, GetState init_state, const Slice& user_key, PinnableSlice* pinnable_val, bool* value_found, MergeContext* merge_context, - RangeDelAggregator* _range_del_agg, Env* env, + SequenceNumber* _max_covering_tombstone_seq, Env* env, SequenceNumber* seq, PinnedIteratorsManager* _pinned_iters_mgr, ReadCallback* callback, bool* is_blob_index) @@ -56,7 +56,7 @@ GetContext::GetContext(const Comparator* ucmp, pinnable_val_(pinnable_val), value_found_(value_found), merge_context_(merge_context), - range_del_agg_(_range_del_agg), + max_covering_tombstone_seq_(_max_covering_tombstone_seq), env_(env), seq_(seq), replay_log_(nullptr), @@ -185,7 +185,8 @@ bool GetContext::SaveValue(const ParsedInternalKey& parsed_key, auto type = parsed_key.type; // Key matches. Process it if ((type == kTypeValue || type == kTypeMerge || type == kTypeBlobIndex) && - range_del_agg_ != nullptr && range_del_agg_->ShouldDelete(parsed_key)) { + max_covering_tombstone_seq_ != nullptr && + *max_covering_tombstone_seq_ > parsed_key.sequence) { type = kTypeRangeDeletion; } switch (type) { diff --git a/3rdParty/rocksdb/v5.16.X/table/get_context.h b/3rdParty/rocksdb/v5.18.X/table/get_context.h similarity index 93% rename from 3rdParty/rocksdb/v5.16.X/table/get_context.h rename to 3rdParty/rocksdb/v5.18.X/table/get_context.h index 066be104ba..407473808f 100644 --- a/3rdParty/rocksdb/v5.16.X/table/get_context.h +++ b/3rdParty/rocksdb/v5.18.X/table/get_context.h @@ -6,7 +6,6 @@ #pragma once #include #include "db/merge_context.h" -#include "db/range_del_aggregator.h" #include "db/read_callback.h" #include "rocksdb/env.h" #include "rocksdb/statistics.h" @@ -52,8 +51,9 @@ class GetContext { GetContext(const Comparator* ucmp, const MergeOperator* merge_operator, Logger* logger, Statistics* statistics, GetState init_state, const Slice& user_key, PinnableSlice* value, bool* value_found, - MergeContext* merge_context, RangeDelAggregator* range_del_agg, - Env* env, SequenceNumber* seq = nullptr, + MergeContext* merge_context, + SequenceNumber* max_covering_tombstone_seq, Env* env, + SequenceNumber* seq = nullptr, PinnedIteratorsManager* _pinned_iters_mgr = nullptr, ReadCallback* callback = nullptr, bool* is_blob_index = nullptr); @@ -76,7 +76,9 @@ class GetContext { GetState State() const { return state_; } - RangeDelAggregator* range_del_agg() { return range_del_agg_; } + SequenceNumber* max_covering_tombstone_seq() { + return max_covering_tombstone_seq_; + } PinnedIteratorsManager* pinned_iters_mgr() { return pinned_iters_mgr_; } @@ -111,7 +113,7 @@ class GetContext { PinnableSlice* pinnable_val_; bool* value_found_; // Is value set correctly? Used by KeyMayExist MergeContext* merge_context_; - RangeDelAggregator* range_del_agg_; + SequenceNumber* max_covering_tombstone_seq_; Env* env_; // If a key is found, seq_ will be set to the SequenceNumber of most recent // write to the key or kMaxSequenceNumber if unknown diff --git a/3rdParty/rocksdb/v5.16.X/table/index_builder.cc b/3rdParty/rocksdb/v5.18.X/table/index_builder.cc similarity index 99% rename from 3rdParty/rocksdb/v5.16.X/table/index_builder.cc rename to 3rdParty/rocksdb/v5.18.X/table/index_builder.cc index 5eaecbad10..cd28c42a8b 100644 --- a/3rdParty/rocksdb/v5.16.X/table/index_builder.cc +++ b/3rdParty/rocksdb/v5.18.X/table/index_builder.cc @@ -210,7 +210,5 @@ Status PartitionedIndexBuilder::Finish( } } -size_t PartitionedIndexBuilder::NumPartitions() const { - return partition_cnt_; -} +size_t PartitionedIndexBuilder::NumPartitions() const { return partition_cnt_; } } // namespace rocksdb diff --git a/3rdParty/rocksdb/v5.16.X/table/index_builder.h b/3rdParty/rocksdb/v5.18.X/table/index_builder.h similarity index 98% rename from 3rdParty/rocksdb/v5.16.X/table/index_builder.h rename to 3rdParty/rocksdb/v5.18.X/table/index_builder.h index 147108d3b1..87d7b7a71b 100644 --- a/3rdParty/rocksdb/v5.16.X/table/index_builder.h +++ b/3rdParty/rocksdb/v5.18.X/table/index_builder.h @@ -181,9 +181,7 @@ class ShortenedIndexBuilder : public IndexBuilder { return Status::OK(); } - virtual size_t IndexSize() const override { - return index_size_; - } + virtual size_t IndexSize() const override { return index_size_; } virtual bool seperator_is_key_plus_seq() override { return seperator_is_key_plus_seq_; @@ -353,12 +351,8 @@ class PartitionedIndexBuilder : public IndexBuilder { IndexBlocks* index_blocks, const BlockHandle& last_partition_block_handle) override; - virtual size_t IndexSize() const override { - return index_size_; - } - size_t TopLevelIndexSize(uint64_t) const { - return top_level_index_size_; - } + virtual size_t IndexSize() const override { return index_size_; } + size_t TopLevelIndexSize(uint64_t) const { return top_level_index_size_; } size_t NumPartitions() const; inline bool ShouldCutFilterBlock() { diff --git a/3rdParty/rocksdb/v5.16.X/table/internal_iterator.h b/3rdParty/rocksdb/v5.18.X/table/internal_iterator.h similarity index 100% rename from 3rdParty/rocksdb/v5.16.X/table/internal_iterator.h rename to 3rdParty/rocksdb/v5.18.X/table/internal_iterator.h diff --git a/3rdParty/rocksdb/v5.16.X/table/iter_heap.h b/3rdParty/rocksdb/v5.18.X/table/iter_heap.h similarity index 100% rename from 3rdParty/rocksdb/v5.16.X/table/iter_heap.h rename to 3rdParty/rocksdb/v5.18.X/table/iter_heap.h diff --git a/3rdParty/rocksdb/v5.16.X/table/iterator.cc b/3rdParty/rocksdb/v5.18.X/table/iterator.cc similarity index 99% rename from 3rdParty/rocksdb/v5.16.X/table/iterator.cc rename to 3rdParty/rocksdb/v5.18.X/table/iterator.cc index 97c47fb285..3a1063f6ef 100644 --- a/3rdParty/rocksdb/v5.16.X/table/iterator.cc +++ b/3rdParty/rocksdb/v5.18.X/table/iterator.cc @@ -103,7 +103,7 @@ Status Iterator::GetProperty(std::string prop_name, std::string* prop) { *prop = "0"; return Status::OK(); } - return Status::InvalidArgument("Undentified property."); + return Status::InvalidArgument("Unidentified property."); } namespace { diff --git a/3rdParty/rocksdb/v5.16.X/table/iterator_wrapper.h b/3rdParty/rocksdb/v5.18.X/table/iterator_wrapper.h similarity index 100% rename from 3rdParty/rocksdb/v5.16.X/table/iterator_wrapper.h rename to 3rdParty/rocksdb/v5.18.X/table/iterator_wrapper.h diff --git a/3rdParty/rocksdb/v5.16.X/table/merger_test.cc b/3rdParty/rocksdb/v5.18.X/table/merger_test.cc similarity index 100% rename from 3rdParty/rocksdb/v5.16.X/table/merger_test.cc rename to 3rdParty/rocksdb/v5.18.X/table/merger_test.cc diff --git a/3rdParty/rocksdb/v5.16.X/table/merging_iterator.cc b/3rdParty/rocksdb/v5.18.X/table/merging_iterator.cc similarity index 100% rename from 3rdParty/rocksdb/v5.16.X/table/merging_iterator.cc rename to 3rdParty/rocksdb/v5.18.X/table/merging_iterator.cc diff --git a/3rdParty/rocksdb/v5.16.X/table/merging_iterator.h b/3rdParty/rocksdb/v5.18.X/table/merging_iterator.h similarity index 100% rename from 3rdParty/rocksdb/v5.16.X/table/merging_iterator.h rename to 3rdParty/rocksdb/v5.18.X/table/merging_iterator.h diff --git a/3rdParty/rocksdb/v5.16.X/table/meta_blocks.cc b/3rdParty/rocksdb/v5.18.X/table/meta_blocks.cc similarity index 85% rename from 3rdParty/rocksdb/v5.16.X/table/meta_blocks.cc rename to 3rdParty/rocksdb/v5.18.X/table/meta_blocks.cc index 256730bfa7..fdf8a56120 100644 --- a/3rdParty/rocksdb/v5.16.X/table/meta_blocks.cc +++ b/3rdParty/rocksdb/v5.18.X/table/meta_blocks.cc @@ -79,6 +79,8 @@ void PropertyBlockBuilder::AddTableProperty(const TableProperties& props) { Add(TablePropertiesNames::kIndexValueIsDeltaEncoded, props.index_value_is_delta_encoded); Add(TablePropertiesNames::kNumEntries, props.num_entries); + Add(TablePropertiesNames::kDeletedKeys, props.num_deletions); + Add(TablePropertiesNames::kMergeOperands, props.num_merge_operands); Add(TablePropertiesNames::kNumRangeDeletions, props.num_range_deletions); Add(TablePropertiesNames::kNumDataBlocks, props.num_data_blocks); Add(TablePropertiesNames::kFilterSize, props.filter_size); @@ -173,7 +175,8 @@ Status ReadProperties(const Slice& handle_value, RandomAccessFileReader* file, FilePrefetchBuffer* prefetch_buffer, const Footer& footer, const ImmutableCFOptions& ioptions, TableProperties** table_properties, - bool compression_type_missing) { + bool /*compression_type_missing*/, + MemoryAllocator* memory_allocator) { assert(table_properties); Slice v = handle_value; @@ -189,15 +192,13 @@ Status ReadProperties(const Slice& handle_value, RandomAccessFileReader* file, Slice compression_dict; PersistentCacheOptions cache_options; - BlockFetcher block_fetcher( - file, prefetch_buffer, footer, read_options, handle, &block_contents, - ioptions, false /* decompress */, compression_dict, cache_options); + BlockFetcher block_fetcher(file, prefetch_buffer, footer, read_options, + handle, &block_contents, ioptions, + false /* decompress */, false /*maybe_compressed*/, + compression_dict, cache_options, memory_allocator); s = block_fetcher.ReadBlockContents(); - // override compression_type when table file is known to contain undefined - // value at compression type marker - if (compression_type_missing) { - block_contents.compression_type = kNoCompression; - } + // property block is never compressed. Need to add uncompress logic if we are + // to compress it.. if (!s.ok()) { return s; @@ -229,6 +230,10 @@ Status ReadProperties(const Slice& handle_value, RandomAccessFileReader* file, {TablePropertiesNames::kNumDataBlocks, &new_table_properties->num_data_blocks}, {TablePropertiesNames::kNumEntries, &new_table_properties->num_entries}, + {TablePropertiesNames::kDeletedKeys, + &new_table_properties->num_deletions}, + {TablePropertiesNames::kMergeOperands, + &new_table_properties->num_merge_operands}, {TablePropertiesNames::kNumRangeDeletions, &new_table_properties->num_range_deletions}, {TablePropertiesNames::kFormatVersion, @@ -263,6 +268,12 @@ Status ReadProperties(const Slice& handle_value, RandomAccessFileReader* file, {key, handle.offset() + iter.ValueOffset()}); if (pos != predefined_uint64_properties.end()) { + if (key == TablePropertiesNames::kDeletedKeys || + key == TablePropertiesNames::kMergeOperands) { + // Insert in user-collected properties for API backwards compatibility + new_table_properties->user_collected_properties.insert( + {key, raw_val.ToString()}); + } // handle predefined rocksdb properties uint64_t val; if (!GetVarint64(&raw_val, &val)) { @@ -305,9 +316,10 @@ Status ReadProperties(const Slice& handle_value, RandomAccessFileReader* file, Status ReadTableProperties(RandomAccessFileReader* file, uint64_t file_size, uint64_t table_magic_number, - const ImmutableCFOptions &ioptions, + const ImmutableCFOptions& ioptions, TableProperties** properties, - bool compression_type_missing) { + bool compression_type_missing, + MemoryAllocator* memory_allocator) { // -- Read metaindex block Footer footer; auto s = ReadFooterFromFile(file, nullptr /* prefetch_buffer */, file_size, @@ -323,19 +335,17 @@ Status ReadTableProperties(RandomAccessFileReader* file, uint64_t file_size, Slice compression_dict; PersistentCacheOptions cache_options; - BlockFetcher block_fetcher( - file, nullptr /* prefetch_buffer */, footer, read_options, - metaindex_handle, &metaindex_contents, ioptions, false /* decompress */, - compression_dict, cache_options); + BlockFetcher block_fetcher(file, nullptr /* prefetch_buffer */, footer, + read_options, metaindex_handle, + &metaindex_contents, ioptions, + false /* decompress */, false /*maybe_compressed*/, + compression_dict, cache_options, memory_allocator); s = block_fetcher.ReadBlockContents(); if (!s.ok()) { return s; } - // override compression_type when table file is known to contain undefined - // value at compression type marker - if (compression_type_missing) { - metaindex_contents.compression_type = kNoCompression; - } + // property blocks are never compressed. Need to add uncompress logic if we + // are to compress it. Block metaindex_block(std::move(metaindex_contents), kDisableGlobalSequenceNumber); std::unique_ptr meta_iter( @@ -352,7 +362,8 @@ Status ReadTableProperties(RandomAccessFileReader* file, uint64_t file_size, TableProperties table_properties; if (found_properties_block == true) { s = ReadProperties(meta_iter->value(), file, nullptr /* prefetch_buffer */, - footer, ioptions, properties, compression_type_missing); + footer, ioptions, properties, compression_type_missing, + memory_allocator); } else { s = Status::NotFound(); } @@ -375,10 +386,11 @@ Status FindMetaBlock(InternalIterator* meta_index_iter, Status FindMetaBlock(RandomAccessFileReader* file, uint64_t file_size, uint64_t table_magic_number, - const ImmutableCFOptions &ioptions, + const ImmutableCFOptions& ioptions, const std::string& meta_block_name, BlockHandle* block_handle, - bool compression_type_missing) { + bool /*compression_type_missing*/, + MemoryAllocator* memory_allocator) { Footer footer; auto s = ReadFooterFromFile(file, nullptr /* prefetch_buffer */, file_size, &footer, table_magic_number); @@ -395,16 +407,14 @@ Status FindMetaBlock(RandomAccessFileReader* file, uint64_t file_size, BlockFetcher block_fetcher( file, nullptr /* prefetch_buffer */, footer, read_options, metaindex_handle, &metaindex_contents, ioptions, - false /* do decompression */, compression_dict, cache_options); + false /* do decompression */, false /*maybe_compressed*/, + compression_dict, cache_options, memory_allocator); s = block_fetcher.ReadBlockContents(); if (!s.ok()) { return s; } - // override compression_type when table file is known to contain undefined - // value at compression type marker - if (compression_type_missing) { - metaindex_contents.compression_type = kNoCompression; - } + // meta blocks are never compressed. Need to add uncompress logic if we are to + // compress it. Block metaindex_block(std::move(metaindex_contents), kDisableGlobalSequenceNumber); @@ -420,7 +430,8 @@ Status ReadMetaBlock(RandomAccessFileReader* file, uint64_t table_magic_number, const ImmutableCFOptions& ioptions, const std::string& meta_block_name, - BlockContents* contents, bool compression_type_missing) { + BlockContents* contents, bool /*compression_type_missing*/, + MemoryAllocator* memory_allocator) { Status status; Footer footer; status = ReadFooterFromFile(file, prefetch_buffer, file_size, &footer, @@ -439,17 +450,14 @@ Status ReadMetaBlock(RandomAccessFileReader* file, BlockFetcher block_fetcher(file, prefetch_buffer, footer, read_options, metaindex_handle, &metaindex_contents, ioptions, - false /* decompress */, compression_dict, - cache_options); + false /* decompress */, false /*maybe_compressed*/, + compression_dict, cache_options, memory_allocator); status = block_fetcher.ReadBlockContents(); if (!status.ok()) { return status; } - // override compression_type when table file is known to contain undefined - // value at compression type marker - if (compression_type_missing) { - metaindex_contents.compression_type = kNoCompression; - } + // meta block is never compressed. Need to add uncompress logic if we are to + // compress it. // Finding metablock Block metaindex_block(std::move(metaindex_contents), @@ -469,7 +477,8 @@ Status ReadMetaBlock(RandomAccessFileReader* file, // Reading metablock BlockFetcher block_fetcher2( file, prefetch_buffer, footer, read_options, block_handle, contents, - ioptions, false /* decompress */, compression_dict, cache_options); + ioptions, false /* decompress */, false /*maybe_compressed*/, + compression_dict, cache_options, memory_allocator); return block_fetcher2.ReadBlockContents(); } diff --git a/3rdParty/rocksdb/v5.16.X/table/meta_blocks.h b/3rdParty/rocksdb/v5.18.X/table/meta_blocks.h similarity index 89% rename from 3rdParty/rocksdb/v5.16.X/table/meta_blocks.h rename to 3rdParty/rocksdb/v5.18.X/table/meta_blocks.h index a18c8edc47..1c8fe686ca 100644 --- a/3rdParty/rocksdb/v5.16.X/table/meta_blocks.h +++ b/3rdParty/rocksdb/v5.18.X/table/meta_blocks.h @@ -11,12 +11,13 @@ #include "db/builder.h" #include "db/table_properties_collector.h" -#include "util/kv_map.h" #include "rocksdb/comparator.h" +#include "rocksdb/memory_allocator.h" #include "rocksdb/options.h" #include "rocksdb/slice.h" #include "table/block_builder.h" #include "table/format.h" +#include "util/kv_map.h" namespace rocksdb { @@ -96,7 +97,8 @@ Status ReadProperties(const Slice& handle_value, RandomAccessFileReader* file, FilePrefetchBuffer* prefetch_buffer, const Footer& footer, const ImmutableCFOptions& ioptions, TableProperties** table_properties, - bool compression_type_missing = false); + bool compression_type_missing = false, + MemoryAllocator* memory_allocator = nullptr); // Directly read the properties from the properties block of a plain table. // @returns a status to indicate if the operation succeeded. On success, @@ -108,9 +110,10 @@ Status ReadProperties(const Slice& handle_value, RandomAccessFileReader* file, // `ReadProperties`, `FindMetaBlock`, and `ReadMetaBlock` Status ReadTableProperties(RandomAccessFileReader* file, uint64_t file_size, uint64_t table_magic_number, - const ImmutableCFOptions &ioptions, + const ImmutableCFOptions& ioptions, TableProperties** properties, - bool compression_type_missing = false); + bool compression_type_missing = false, + MemoryAllocator* memory_allocator = nullptr); // Find the meta block from the meta index block. Status FindMetaBlock(InternalIterator* meta_index_iter, @@ -120,10 +123,11 @@ Status FindMetaBlock(InternalIterator* meta_index_iter, // Find the meta block Status FindMetaBlock(RandomAccessFileReader* file, uint64_t file_size, uint64_t table_magic_number, - const ImmutableCFOptions &ioptions, + const ImmutableCFOptions& ioptions, const std::string& meta_block_name, BlockHandle* block_handle, - bool compression_type_missing = false); + bool compression_type_missing = false, + MemoryAllocator* memory_allocator = nullptr); // Read the specified meta block with name meta_block_name // from `file` and initialize `contents` with contents of this block. @@ -134,6 +138,7 @@ Status ReadMetaBlock(RandomAccessFileReader* file, const ImmutableCFOptions& ioptions, const std::string& meta_block_name, BlockContents* contents, - bool compression_type_missing = false); + bool compression_type_missing = false, + MemoryAllocator* memory_allocator = nullptr); } // namespace rocksdb diff --git a/3rdParty/rocksdb/v5.16.X/table/mock_table.cc b/3rdParty/rocksdb/v5.18.X/table/mock_table.cc similarity index 95% rename from 3rdParty/rocksdb/v5.16.X/table/mock_table.cc rename to 3rdParty/rocksdb/v5.18.X/table/mock_table.cc index 54bab73d8a..65a4361696 100644 --- a/3rdParty/rocksdb/v5.16.X/table/mock_table.cc +++ b/3rdParty/rocksdb/v5.18.X/table/mock_table.cc @@ -60,8 +60,8 @@ MockTableFactory::MockTableFactory() : next_id_(1) {} Status MockTableFactory::NewTableReader( const TableReaderOptions& /*table_reader_options*/, - unique_ptr&& file, uint64_t /*file_size*/, - unique_ptr* table_reader, + std::unique_ptr&& file, uint64_t /*file_size*/, + std::unique_ptr* table_reader, bool /*prefetch_index_and_filter_in_cache*/) const { uint32_t id = GetIDFromFile(file.get()); @@ -93,7 +93,7 @@ Status MockTableFactory::CreateMockTable(Env* env, const std::string& fname, return s; } - WritableFileWriter file_writer(std::move(file), EnvOptions()); + WritableFileWriter file_writer(std::move(file), fname, EnvOptions()); uint32_t id = GetAndWriteNextID(&file_writer); file_system_.files.insert({id, std::move(file_contents)}); diff --git a/3rdParty/rocksdb/v5.16.X/table/mock_table.h b/3rdParty/rocksdb/v5.18.X/table/mock_table.h similarity index 97% rename from 3rdParty/rocksdb/v5.16.X/table/mock_table.h rename to 3rdParty/rocksdb/v5.18.X/table/mock_table.h index 92cf87370f..2f123a963c 100644 --- a/3rdParty/rocksdb/v5.16.X/table/mock_table.h +++ b/3rdParty/rocksdb/v5.18.X/table/mock_table.h @@ -157,8 +157,8 @@ class MockTableFactory : public TableFactory { const char* Name() const override { return "MockTable"; } Status NewTableReader( const TableReaderOptions& table_reader_options, - unique_ptr&& file, uint64_t file_size, - unique_ptr* table_reader, + std::unique_ptr&& file, uint64_t file_size, + std::unique_ptr* table_reader, bool prefetch_index_and_filter_in_cache = true) const override; TableBuilder* NewTableBuilder( const TableBuilderOptions& table_builder_options, diff --git a/3rdParty/rocksdb/v5.16.X/table/partitioned_filter_block.cc b/3rdParty/rocksdb/v5.18.X/table/partitioned_filter_block.cc similarity index 98% rename from 3rdParty/rocksdb/v5.16.X/table/partitioned_filter_block.cc rename to 3rdParty/rocksdb/v5.18.X/table/partitioned_filter_block.cc index 6084133b75..aab0f5509b 100644 --- a/3rdParty/rocksdb/v5.16.X/table/partitioned_filter_block.cc +++ b/3rdParty/rocksdb/v5.18.X/table/partitioned_filter_block.cc @@ -79,7 +79,10 @@ Slice PartitionedFilterBlockBuilder::Finish( std::string handle_encoding; last_partition_block_handle.EncodeTo(&handle_encoding); std::string handle_delta_encoding; - last_partition_block_handle.EncodeSizeTo(&handle_delta_encoding); + PutVarsignedint64( + &handle_delta_encoding, + last_partition_block_handle.size() - last_encoded_handle_.size()); + last_encoded_handle_ = last_partition_block_handle; const Slice handle_delta_encoding_slice(handle_delta_encoding); index_on_filter_block_builder_.Add(last_entry.key, handle_encoding, &handle_delta_encoding_slice); diff --git a/3rdParty/rocksdb/v5.16.X/table/partitioned_filter_block.h b/3rdParty/rocksdb/v5.18.X/table/partitioned_filter_block.h similarity index 99% rename from 3rdParty/rocksdb/v5.16.X/table/partitioned_filter_block.h rename to 3rdParty/rocksdb/v5.18.X/table/partitioned_filter_block.h index f6241749db..5d55da5449 100644 --- a/3rdParty/rocksdb/v5.16.X/table/partitioned_filter_block.h +++ b/3rdParty/rocksdb/v5.18.X/table/partitioned_filter_block.h @@ -66,6 +66,7 @@ class PartitionedFilterBlockBuilder : public FullFilterBlockBuilder { uint32_t filters_in_partition_; // Number of keys added size_t num_added_; + BlockHandle last_encoded_handle_; }; class PartitionedFilterBlockReader : public FilterBlockReader, diff --git a/3rdParty/rocksdb/v5.16.X/table/partitioned_filter_block_test.cc b/3rdParty/rocksdb/v5.18.X/table/partitioned_filter_block_test.cc similarity index 91% rename from 3rdParty/rocksdb/v5.16.X/table/partitioned_filter_block_test.cc rename to 3rdParty/rocksdb/v5.18.X/table/partitioned_filter_block_test.cc index 150eac6a8d..ffa8a9a563 100644 --- a/3rdParty/rocksdb/v5.16.X/table/partitioned_filter_block_test.cc +++ b/3rdParty/rocksdb/v5.18.X/table/partitioned_filter_block_test.cc @@ -33,7 +33,7 @@ class MockedBlockBasedTable : public BlockBasedTable { const SliceTransform* prefix_extractor) const override { Slice slice = slices[filter_blk_handle.offset()]; auto obj = new FullFilterBlockReader( - prefix_extractor, true, BlockContents(slice, false, kNoCompression), + prefix_extractor, true, BlockContents(slice), rep_->table_options.filter_policy->GetFilterBitsReader(slice), nullptr); return {obj, nullptr}; } @@ -44,13 +44,15 @@ class MockedBlockBasedTable : public BlockBasedTable { const SliceTransform* prefix_extractor) const override { Slice slice = slices[filter_blk_handle.offset()]; auto obj = new FullFilterBlockReader( - prefix_extractor, true, BlockContents(slice, false, kNoCompression), + prefix_extractor, true, BlockContents(slice), rep_->table_options.filter_policy->GetFilterBitsReader(slice), nullptr); return obj; } }; -class PartitionedFilterBlockTest : public testing::Test { +class PartitionedFilterBlockTest + : public testing::Test, + virtual public ::testing::WithParamInterface { public: BlockBasedTableOptions table_options_; InternalKeyComparator icomp = InternalKeyComparator(BytewiseComparator()); @@ -60,6 +62,8 @@ class PartitionedFilterBlockTest : public testing::Test { table_options_.no_block_cache = true; // Otherwise BlockBasedTable::Close // will access variable that are not // initialized in our mocked version + table_options_.format_version = GetParam(); + table_options_.index_block_restart_interval = 3; } std::shared_ptr cache_; @@ -143,10 +147,10 @@ class PartitionedFilterBlockTest : public testing::Test { const bool kImmortal = true; table.reset(new MockedBlockBasedTable( new BlockBasedTable::Rep(ioptions, env_options, table_options_, icomp, - !kSkipFilters, !kImmortal))); + !kSkipFilters, 0, !kImmortal))); auto reader = new PartitionedFilterBlockReader( - prefix_extractor, true, BlockContents(slice, false, kNoCompression), - nullptr, nullptr, icomp, table.get(), pib->seperator_is_key_plus_seq(), + prefix_extractor, true, BlockContents(slice), nullptr, nullptr, icomp, + table.get(), pib->seperator_is_key_plus_seq(), !pib->get_use_value_delta_encoding()); return reader; } @@ -279,14 +283,19 @@ class PartitionedFilterBlockTest : public testing::Test { } }; -TEST_F(PartitionedFilterBlockTest, EmptyBuilder) { +INSTANTIATE_TEST_CASE_P(FormatDef, PartitionedFilterBlockTest, + testing::Values(test::kDefaultFormatVersion)); +INSTANTIATE_TEST_CASE_P(FormatLatest, PartitionedFilterBlockTest, + testing::Values(test::kLatestFormatVersion)); + +TEST_P(PartitionedFilterBlockTest, EmptyBuilder) { std::unique_ptr pib(NewIndexBuilder()); std::unique_ptr builder(NewBuilder(pib.get())); const bool empty = true; VerifyReader(builder.get(), pib.get(), empty); } -TEST_F(PartitionedFilterBlockTest, OneBlock) { +TEST_P(PartitionedFilterBlockTest, OneBlock) { uint64_t max_index_size = MaxIndexSize(); for (uint64_t i = 1; i < max_index_size + 1; i++) { table_options_.metadata_block_size = i; @@ -294,7 +303,7 @@ TEST_F(PartitionedFilterBlockTest, OneBlock) { } } -TEST_F(PartitionedFilterBlockTest, TwoBlocksPerKey) { +TEST_P(PartitionedFilterBlockTest, TwoBlocksPerKey) { uint64_t max_index_size = MaxIndexSize(); for (uint64_t i = 1; i < max_index_size + 1; i++) { table_options_.metadata_block_size = i; @@ -304,7 +313,7 @@ TEST_F(PartitionedFilterBlockTest, TwoBlocksPerKey) { // This reproduces the bug that a prefix is the same among multiple consecutive // blocks but the bug would add it only to the first block. -TEST_F(PartitionedFilterBlockTest, SamePrefixInMultipleBlocks) { +TEST_P(PartitionedFilterBlockTest, SamePrefixInMultipleBlocks) { // some small number to cause partition cuts table_options_.metadata_block_size = 1; std::unique_ptr prefix_extractor @@ -330,7 +339,7 @@ TEST_F(PartitionedFilterBlockTest, SamePrefixInMultipleBlocks) { } } -TEST_F(PartitionedFilterBlockTest, OneBlockPerKey) { +TEST_P(PartitionedFilterBlockTest, OneBlockPerKey) { uint64_t max_index_size = MaxIndexSize(); for (uint64_t i = 1; i < max_index_size + 1; i++) { table_options_.metadata_block_size = i; @@ -338,7 +347,7 @@ TEST_F(PartitionedFilterBlockTest, OneBlockPerKey) { } } -TEST_F(PartitionedFilterBlockTest, PartitionCount) { +TEST_P(PartitionedFilterBlockTest, PartitionCount) { int num_keys = sizeof(keys) / sizeof(*keys); table_options_.metadata_block_size = std::max(MaxIndexSize(), MaxFilterSize()); diff --git a/3rdParty/rocksdb/v5.16.X/table/persistent_cache_helper.cc b/3rdParty/rocksdb/v5.18.X/table/persistent_cache_helper.cc similarity index 93% rename from 3rdParty/rocksdb/v5.16.X/table/persistent_cache_helper.cc rename to 3rdParty/rocksdb/v5.18.X/table/persistent_cache_helper.cc index 103f57c80a..4e90697a6e 100644 --- a/3rdParty/rocksdb/v5.16.X/table/persistent_cache_helper.cc +++ b/3rdParty/rocksdb/v5.18.X/table/persistent_cache_helper.cc @@ -29,12 +29,9 @@ void PersistentCacheHelper::InsertUncompressedPage( const BlockContents& contents) { assert(cache_options.persistent_cache); assert(!cache_options.persistent_cache->IsCompressed()); - if (!contents.cachable || contents.compression_type != kNoCompression) { - // We shouldn't cache this. Either - // (1) content is not cacheable - // (2) content is compressed - return; - } + // Precondition: + // (1) content is cacheable + // (2) content is not compressed // construct the page key char cache_key[BlockBasedTable::kMaxCacheKeyPrefixSize + kMaxVarint64Length]; @@ -109,8 +106,7 @@ Status PersistentCacheHelper::LookupUncompressedPage( // update stats RecordTick(cache_options.statistics, PERSISTENT_CACHE_HIT); // construct result and return - *contents = - BlockContents(std::move(data), size, false /*cacheable*/, kNoCompression); + *contents = BlockContents(std::move(data), size); return Status::OK(); } diff --git a/3rdParty/rocksdb/v5.16.X/table/persistent_cache_helper.h b/3rdParty/rocksdb/v5.18.X/table/persistent_cache_helper.h similarity index 100% rename from 3rdParty/rocksdb/v5.16.X/table/persistent_cache_helper.h rename to 3rdParty/rocksdb/v5.18.X/table/persistent_cache_helper.h diff --git a/3rdParty/rocksdb/v5.16.X/table/persistent_cache_options.h b/3rdParty/rocksdb/v5.18.X/table/persistent_cache_options.h similarity index 100% rename from 3rdParty/rocksdb/v5.16.X/table/persistent_cache_options.h rename to 3rdParty/rocksdb/v5.18.X/table/persistent_cache_options.h diff --git a/3rdParty/rocksdb/v5.16.X/table/plain_table_builder.cc b/3rdParty/rocksdb/v5.18.X/table/plain_table_builder.cc similarity index 97% rename from 3rdParty/rocksdb/v5.16.X/table/plain_table_builder.cc rename to 3rdParty/rocksdb/v5.18.X/table/plain_table_builder.cc index 717635cc1a..453b6c768b 100644 --- a/3rdParty/rocksdb/v5.16.X/table/plain_table_builder.cc +++ b/3rdParty/rocksdb/v5.18.X/table/plain_table_builder.cc @@ -166,6 +166,12 @@ void PlainTableBuilder::Add(const Slice& key, const Slice& value) { properties_.num_entries++; properties_.raw_key_size += key.size(); properties_.raw_value_size += value.size(); + if (internal_key.type == kTypeDeletion || + internal_key.type == kTypeSingleDeletion) { + properties_.num_deletions++; + } else if (internal_key.type == kTypeMerge) { + properties_.num_merge_operands++; + } // notify property collectors NotifyCollectTableCollectorsOnAdd( diff --git a/3rdParty/rocksdb/v5.16.X/table/plain_table_builder.h b/3rdParty/rocksdb/v5.18.X/table/plain_table_builder.h similarity index 100% rename from 3rdParty/rocksdb/v5.16.X/table/plain_table_builder.h rename to 3rdParty/rocksdb/v5.18.X/table/plain_table_builder.h diff --git a/3rdParty/rocksdb/v5.16.X/table/plain_table_factory.cc b/3rdParty/rocksdb/v5.18.X/table/plain_table_factory.cc similarity index 98% rename from 3rdParty/rocksdb/v5.16.X/table/plain_table_factory.cc rename to 3rdParty/rocksdb/v5.18.X/table/plain_table_factory.cc index b88a689d4b..273a1bd4f2 100644 --- a/3rdParty/rocksdb/v5.16.X/table/plain_table_factory.cc +++ b/3rdParty/rocksdb/v5.18.X/table/plain_table_factory.cc @@ -19,8 +19,8 @@ namespace rocksdb { Status PlainTableFactory::NewTableReader( const TableReaderOptions& table_reader_options, - unique_ptr&& file, uint64_t file_size, - unique_ptr* table, + std::unique_ptr&& file, uint64_t file_size, + std::unique_ptr* table, bool /*prefetch_index_and_filter_in_cache*/) const { return PlainTableReader::Open( table_reader_options.ioptions, table_reader_options.env_options, diff --git a/3rdParty/rocksdb/v5.16.X/table/plain_table_factory.h b/3rdParty/rocksdb/v5.18.X/table/plain_table_factory.h similarity index 98% rename from 3rdParty/rocksdb/v5.16.X/table/plain_table_factory.h rename to 3rdParty/rocksdb/v5.18.X/table/plain_table_factory.h index f540a92b89..157e3acda0 100644 --- a/3rdParty/rocksdb/v5.16.X/table/plain_table_factory.h +++ b/3rdParty/rocksdb/v5.18.X/table/plain_table_factory.h @@ -149,8 +149,8 @@ class PlainTableFactory : public TableFactory { const char* Name() const override { return "PlainTable"; } Status NewTableReader(const TableReaderOptions& table_reader_options, - unique_ptr&& file, - uint64_t file_size, unique_ptr* table, + std::unique_ptr&& file, + uint64_t file_size, std::unique_ptr* table, bool prefetch_index_and_filter_in_cache) const override; TableBuilder* NewTableBuilder( diff --git a/3rdParty/rocksdb/v5.16.X/table/plain_table_index.cc b/3rdParty/rocksdb/v5.18.X/table/plain_table_index.cc similarity index 100% rename from 3rdParty/rocksdb/v5.16.X/table/plain_table_index.cc rename to 3rdParty/rocksdb/v5.18.X/table/plain_table_index.cc diff --git a/3rdParty/rocksdb/v5.16.X/table/plain_table_index.h b/3rdParty/rocksdb/v5.18.X/table/plain_table_index.h similarity index 100% rename from 3rdParty/rocksdb/v5.16.X/table/plain_table_index.h rename to 3rdParty/rocksdb/v5.18.X/table/plain_table_index.h diff --git a/3rdParty/rocksdb/v5.16.X/table/plain_table_key_coding.cc b/3rdParty/rocksdb/v5.18.X/table/plain_table_key_coding.cc similarity index 100% rename from 3rdParty/rocksdb/v5.16.X/table/plain_table_key_coding.cc rename to 3rdParty/rocksdb/v5.18.X/table/plain_table_key_coding.cc diff --git a/3rdParty/rocksdb/v5.16.X/table/plain_table_key_coding.h b/3rdParty/rocksdb/v5.18.X/table/plain_table_key_coding.h similarity index 99% rename from 3rdParty/rocksdb/v5.16.X/table/plain_table_key_coding.h rename to 3rdParty/rocksdb/v5.18.X/table/plain_table_key_coding.h index 321e0aed59..9a27ad06b7 100644 --- a/3rdParty/rocksdb/v5.16.X/table/plain_table_key_coding.h +++ b/3rdParty/rocksdb/v5.18.X/table/plain_table_key_coding.h @@ -114,7 +114,7 @@ class PlainTableFileReader { }; // Keep buffers for two recent reads. - std::array, 2> buffers_; + std::array, 2> buffers_; uint32_t num_buf_; Status status_; diff --git a/3rdParty/rocksdb/v5.16.X/table/plain_table_reader.cc b/3rdParty/rocksdb/v5.18.X/table/plain_table_reader.cc similarity index 96% rename from 3rdParty/rocksdb/v5.16.X/table/plain_table_reader.cc rename to 3rdParty/rocksdb/v5.18.X/table/plain_table_reader.cc index 1143eb1cd2..ae656763cb 100644 --- a/3rdParty/rocksdb/v5.16.X/table/plain_table_reader.cc +++ b/3rdParty/rocksdb/v5.18.X/table/plain_table_reader.cc @@ -91,14 +91,13 @@ class PlainTableIterator : public InternalIterator { }; extern const uint64_t kPlainTableMagicNumber; -PlainTableReader::PlainTableReader(const ImmutableCFOptions& ioptions, - unique_ptr&& file, - const EnvOptions& storage_options, - const InternalKeyComparator& icomparator, - EncodingType encoding_type, - uint64_t file_size, - const TableProperties* table_properties, - const SliceTransform* prefix_extractor) +PlainTableReader::PlainTableReader( + const ImmutableCFOptions& ioptions, + std::unique_ptr&& file, + const EnvOptions& storage_options, const InternalKeyComparator& icomparator, + EncodingType encoding_type, uint64_t file_size, + const TableProperties* table_properties, + const SliceTransform* prefix_extractor) : internal_comparator_(icomparator), encoding_type_(encoding_type), full_scan_mode_(false), @@ -118,8 +117,8 @@ PlainTableReader::~PlainTableReader() { Status PlainTableReader::Open( const ImmutableCFOptions& ioptions, const EnvOptions& env_options, const InternalKeyComparator& internal_comparator, - unique_ptr&& file, uint64_t file_size, - unique_ptr* table_reader, const int bloom_bits_per_key, + std::unique_ptr&& file, uint64_t file_size, + std::unique_ptr* table_reader, const int bloom_bits_per_key, double hash_table_ratio, size_t index_sparseness, size_t huge_page_tlb_size, bool full_scan_mode, const SliceTransform* prefix_extractor) { if (file_size > PlainTableIndex::kMaxFileSize) { @@ -277,7 +276,7 @@ void PlainTableReader::FillBloom(vector* prefix_hashes) { Status PlainTableReader::MmapDataIfNeeded() { if (file_info_.is_mmap_mode) { // Get mmapped memory. - return file_info_.file->Read(0, file_size_, &file_info_.file_data, nullptr); + return file_info_.file->Read(0, static_cast(file_size_), &file_info_.file_data, nullptr); } return Status::OK(); } diff --git a/3rdParty/rocksdb/v5.16.X/table/plain_table_reader.h b/3rdParty/rocksdb/v5.18.X/table/plain_table_reader.h similarity index 95% rename from 3rdParty/rocksdb/v5.16.X/table/plain_table_reader.h rename to 3rdParty/rocksdb/v5.18.X/table/plain_table_reader.h index df08a98fa1..5f8248dd71 100644 --- a/3rdParty/rocksdb/v5.16.X/table/plain_table_reader.h +++ b/3rdParty/rocksdb/v5.18.X/table/plain_table_reader.h @@ -48,7 +48,7 @@ struct PlainTableReaderFileInfo { bool is_mmap_mode; Slice file_data; uint32_t data_end_offset; - unique_ptr file; + std::unique_ptr file; PlainTableReaderFileInfo(unique_ptr&& _file, const EnvOptions& storage_options, @@ -71,8 +71,8 @@ class PlainTableReader: public TableReader { static Status Open(const ImmutableCFOptions& ioptions, const EnvOptions& env_options, const InternalKeyComparator& internal_comparator, - unique_ptr&& file, - uint64_t file_size, unique_ptr* table, + std::unique_ptr&& file, + uint64_t file_size, std::unique_ptr* table, const int bloom_bits_per_key, double hash_table_ratio, size_t index_sparseness, size_t huge_page_tlb_size, bool full_scan_mode, @@ -104,7 +104,7 @@ class PlainTableReader: public TableReader { } PlainTableReader(const ImmutableCFOptions& ioptions, - unique_ptr&& file, + std::unique_ptr&& file, const EnvOptions& env_options, const InternalKeyComparator& internal_comparator, EncodingType encoding_type, uint64_t file_size, @@ -153,8 +153,8 @@ class PlainTableReader: public TableReader { DynamicBloom bloom_; PlainTableReaderFileInfo file_info_; Arena arena_; - std::unique_ptr index_block_alloc_; - std::unique_ptr bloom_block_alloc_; + CacheAllocationPtr index_block_alloc_; + CacheAllocationPtr bloom_block_alloc_; const ImmutableCFOptions& ioptions_; uint64_t file_size_; diff --git a/3rdParty/rocksdb/v5.16.X/table/scoped_arena_iterator.h b/3rdParty/rocksdb/v5.18.X/table/scoped_arena_iterator.h similarity index 100% rename from 3rdParty/rocksdb/v5.16.X/table/scoped_arena_iterator.h rename to 3rdParty/rocksdb/v5.18.X/table/scoped_arena_iterator.h diff --git a/3rdParty/rocksdb/v5.18.X/table/sst_file_reader.cc b/3rdParty/rocksdb/v5.18.X/table/sst_file_reader.cc new file mode 100644 index 0000000000..a915449bee --- /dev/null +++ b/3rdParty/rocksdb/v5.18.X/table/sst_file_reader.cc @@ -0,0 +1,84 @@ +// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. +// This source code is licensed under both the GPLv2 (found in the +// COPYING file in the root directory) and Apache 2.0 License +// (found in the LICENSE.Apache file in the root directory). + +#ifndef ROCKSDB_LITE + +#include "rocksdb/sst_file_reader.h" + +#include "db/db_iter.h" +#include "options/cf_options.h" +#include "table/get_context.h" +#include "table/table_reader.h" +#include "table/table_builder.h" +#include "util/file_reader_writer.h" + +namespace rocksdb { + +struct SstFileReader::Rep { + Options options; + EnvOptions soptions; + ImmutableCFOptions ioptions; + MutableCFOptions moptions; + + std::unique_ptr table_reader; + + Rep(const Options& opts) + : options(opts), + soptions(options), + ioptions(options), + moptions(ColumnFamilyOptions(options)) {} +}; + +SstFileReader::SstFileReader(const Options& options) + : rep_(new Rep(options)) {} + +SstFileReader::~SstFileReader() {} + +Status SstFileReader::Open(const std::string& file_path) { + auto r = rep_.get(); + Status s; + uint64_t file_size = 0; + std::unique_ptr file; + std::unique_ptr file_reader; + s = r->options.env->GetFileSize(file_path, &file_size); + if (s.ok()) { + s = r->options.env->NewRandomAccessFile(file_path, &file, r->soptions); + } + if (s.ok()) { + file_reader.reset(new RandomAccessFileReader(std::move(file), file_path)); + } + if (s.ok()) { + s = r->options.table_factory->NewTableReader( + TableReaderOptions(r->ioptions, r->moptions.prefix_extractor.get(), + r->soptions, r->ioptions.internal_comparator), + std::move(file_reader), file_size, &r->table_reader); + } + return s; +} + +Iterator* SstFileReader::NewIterator(const ReadOptions& options) { + auto r = rep_.get(); + auto sequence = options.snapshot != nullptr ? + options.snapshot->GetSequenceNumber() : + kMaxSequenceNumber; + auto internal_iter = r->table_reader->NewIterator( + options, r->moptions.prefix_extractor.get()); + return NewDBIterator(r->options.env, options, r->ioptions, r->moptions, + r->ioptions.user_comparator, internal_iter, sequence, + r->moptions.max_sequential_skip_in_iterations, + nullptr /* read_callback */); +} + +std::shared_ptr SstFileReader::GetTableProperties() const { + return rep_->table_reader->GetTableProperties(); +} + +Status SstFileReader::VerifyChecksum() { + return rep_->table_reader->VerifyChecksum(); +} + +} // namespace rocksdb + +#endif // !ROCKSDB_LITE diff --git a/3rdParty/rocksdb/v5.18.X/table/sst_file_reader_test.cc b/3rdParty/rocksdb/v5.18.X/table/sst_file_reader_test.cc new file mode 100644 index 0000000000..8da366fd7c --- /dev/null +++ b/3rdParty/rocksdb/v5.18.X/table/sst_file_reader_test.cc @@ -0,0 +1,106 @@ +// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. +// This source code is licensed under both the GPLv2 (found in the +// COPYING file in the root directory) and Apache 2.0 License +// (found in the LICENSE.Apache file in the root directory). + +#ifndef ROCKSDB_LITE + +#include + +#include "rocksdb/sst_file_reader.h" +#include "rocksdb/sst_file_writer.h" +#include "util/testharness.h" +#include "util/testutil.h" +#include "utilities/merge_operators.h" + +namespace rocksdb { + +std::string EncodeAsString(uint64_t v) { + char buf[16]; + snprintf(buf, sizeof(buf), "%08" PRIu64, v); + return std::string(buf); +} + +std::string EncodeAsUint64(uint64_t v) { + std::string dst; + PutFixed64(&dst, v); + return dst; +} + +class SstFileReaderTest : public testing::Test { + public: + SstFileReaderTest() { + options_.merge_operator = MergeOperators::CreateUInt64AddOperator(); + sst_name_ = test::PerThreadDBPath("sst_file"); + } + + void CreateFileAndCheck(const std::vector& keys) { + SstFileWriter writer(soptions_, options_); + ASSERT_OK(writer.Open(sst_name_)); + for (size_t i = 0; i + 2 < keys.size(); i += 3) { + ASSERT_OK(writer.Put(keys[i], keys[i])); + ASSERT_OK(writer.Merge(keys[i+1], EncodeAsUint64(i+1))); + ASSERT_OK(writer.Delete(keys[i+2])); + } + ASSERT_OK(writer.Finish()); + + ReadOptions ropts; + SstFileReader reader(options_); + ASSERT_OK(reader.Open(sst_name_)); + ASSERT_OK(reader.VerifyChecksum()); + std::unique_ptr iter(reader.NewIterator(ropts)); + iter->SeekToFirst(); + for (size_t i = 0; i + 2 < keys.size(); i += 3) { + ASSERT_TRUE(iter->Valid()); + ASSERT_EQ(iter->key().compare(keys[i]), 0); + ASSERT_EQ(iter->value().compare(keys[i]), 0); + iter->Next(); + ASSERT_TRUE(iter->Valid()); + ASSERT_EQ(iter->key().compare(keys[i+1]), 0); + ASSERT_EQ(iter->value().compare(EncodeAsUint64(i+1)), 0); + iter->Next(); + } + ASSERT_FALSE(iter->Valid()); + } + + protected: + Options options_; + EnvOptions soptions_; + std::string sst_name_; +}; + +const uint64_t kNumKeys = 100; + +TEST_F(SstFileReaderTest, Basic) { + std::vector keys; + for (uint64_t i = 0; i < kNumKeys; i++) { + keys.emplace_back(EncodeAsString(i)); + } + CreateFileAndCheck(keys); +} + +TEST_F(SstFileReaderTest, Uint64Comparator) { + options_.comparator = test::Uint64Comparator(); + std::vector keys; + for (uint64_t i = 0; i < kNumKeys; i++) { + keys.emplace_back(EncodeAsUint64(i)); + } + CreateFileAndCheck(keys); +} + +} // namespace rocksdb + +int main(int argc, char** argv) { + ::testing::InitGoogleTest(&argc, argv); + return RUN_ALL_TESTS(); +} + +#else +#include + +int main(int /*argc*/, char** /*argv*/) { + fprintf(stderr, "SKIPPED as SstFileReader is not supported in ROCKSDB_LITE\n"); + return 0; +} + +#endif // ROCKSDB_LITE diff --git a/3rdParty/rocksdb/v5.16.X/table/sst_file_writer.cc b/3rdParty/rocksdb/v5.18.X/table/sst_file_writer.cc similarity index 98% rename from 3rdParty/rocksdb/v5.16.X/table/sst_file_writer.cc rename to 3rdParty/rocksdb/v5.18.X/table/sst_file_writer.cc index ae189daa59..a752504c8f 100644 --- a/3rdParty/rocksdb/v5.16.X/table/sst_file_writer.cc +++ b/3rdParty/rocksdb/v5.18.X/table/sst_file_writer.cc @@ -238,7 +238,8 @@ Status SstFileWriter::Open(const std::string& file_path) { nullptr /* compression_dict */, r->skip_filters, r->column_family_name, unknown_level); r->file_writer.reset( - new WritableFileWriter(std::move(sst_file), r->env_options)); + new WritableFileWriter(std::move(sst_file), file_path, r->env_options, + nullptr /* stats */, r->ioptions.listeners)); // TODO(tec) : If table_factory is using compressed block cache, we will // be adding the external sst file blocks into it, which is wasteful. diff --git a/3rdParty/rocksdb/v5.16.X/table/sst_file_writer_collectors.h b/3rdParty/rocksdb/v5.18.X/table/sst_file_writer_collectors.h similarity index 100% rename from 3rdParty/rocksdb/v5.16.X/table/sst_file_writer_collectors.h rename to 3rdParty/rocksdb/v5.18.X/table/sst_file_writer_collectors.h diff --git a/3rdParty/rocksdb/v5.16.X/table/table_builder.h b/3rdParty/rocksdb/v5.18.X/table/table_builder.h similarity index 100% rename from 3rdParty/rocksdb/v5.16.X/table/table_builder.h rename to 3rdParty/rocksdb/v5.18.X/table/table_builder.h diff --git a/3rdParty/rocksdb/v5.16.X/table/table_properties.cc b/3rdParty/rocksdb/v5.18.X/table/table_properties.cc similarity index 95% rename from 3rdParty/rocksdb/v5.16.X/table/table_properties.cc rename to 3rdParty/rocksdb/v5.18.X/table/table_properties.cc index 207a641911..56e1d03f1f 100644 --- a/3rdParty/rocksdb/v5.16.X/table/table_properties.cc +++ b/3rdParty/rocksdb/v5.18.X/table/table_properties.cc @@ -78,6 +78,9 @@ std::string TableProperties::ToString( AppendProperty(result, "# data blocks", num_data_blocks, prop_delim, kv_delim); AppendProperty(result, "# entries", num_entries, prop_delim, kv_delim); + AppendProperty(result, "# deletions", num_deletions, prop_delim, kv_delim); + AppendProperty(result, "# merge operands", num_merge_operands, prop_delim, + kv_delim); AppendProperty(result, "# range deletions", num_range_deletions, prop_delim, kv_delim); @@ -170,6 +173,8 @@ void TableProperties::Add(const TableProperties& tp) { raw_value_size += tp.raw_value_size; num_data_blocks += tp.num_data_blocks; num_entries += tp.num_entries; + num_deletions += tp.num_deletions; + num_merge_operands += tp.num_merge_operands; num_range_deletions += tp.num_range_deletions; } @@ -195,6 +200,9 @@ const std::string TablePropertiesNames::kNumDataBlocks = "rocksdb.num.data.blocks"; const std::string TablePropertiesNames::kNumEntries = "rocksdb.num.entries"; +const std::string TablePropertiesNames::kDeletedKeys = "rocksdb.deleted.keys"; +const std::string TablePropertiesNames::kMergeOperands = + "rocksdb.merge.operands"; const std::string TablePropertiesNames::kNumRangeDeletions = "rocksdb.num.range-deletions"; const std::string TablePropertiesNames::kFilterPolicy = diff --git a/3rdParty/rocksdb/v5.16.X/table/table_properties_internal.h b/3rdParty/rocksdb/v5.18.X/table/table_properties_internal.h similarity index 100% rename from 3rdParty/rocksdb/v5.16.X/table/table_properties_internal.h rename to 3rdParty/rocksdb/v5.18.X/table/table_properties_internal.h diff --git a/3rdParty/rocksdb/v5.16.X/table/table_reader.h b/3rdParty/rocksdb/v5.18.X/table/table_reader.h similarity index 97% rename from 3rdParty/rocksdb/v5.16.X/table/table_reader.h rename to 3rdParty/rocksdb/v5.18.X/table/table_reader.h index 505b5ba1fb..a5f15e1304 100644 --- a/3rdParty/rocksdb/v5.16.X/table/table_reader.h +++ b/3rdParty/rocksdb/v5.18.X/table/table_reader.h @@ -9,6 +9,7 @@ #pragma once #include +#include "db/range_tombstone_fragmenter.h" #include "rocksdb/slice_transform.h" #include "table/internal_iterator.h" @@ -44,7 +45,7 @@ class TableReader { bool skip_filters = false, bool for_compaction = false) = 0; - virtual InternalIterator* NewRangeTombstoneIterator( + virtual FragmentedRangeTombstoneIterator* NewRangeTombstoneIterator( const ReadOptions& /*read_options*/) { return nullptr; } diff --git a/3rdParty/rocksdb/v5.16.X/table/table_reader_bench.cc b/3rdParty/rocksdb/v5.18.X/table/table_reader_bench.cc similarity index 96% rename from 3rdParty/rocksdb/v5.16.X/table/table_reader_bench.cc rename to 3rdParty/rocksdb/v5.18.X/table/table_reader_bench.cc index b05e3e0526..fbcfac826c 100644 --- a/3rdParty/rocksdb/v5.16.X/table/table_reader_bench.cc +++ b/3rdParty/rocksdb/v5.18.X/table/table_reader_bench.cc @@ -86,15 +86,16 @@ void TableReaderBenchmark(Options& opts, EnvOptions& env_options, const ImmutableCFOptions ioptions(opts); const ColumnFamilyOptions cfo(opts); const MutableCFOptions moptions(cfo); - unique_ptr file_writer; + std::unique_ptr file_writer; if (!through_db) { - unique_ptr file; + std::unique_ptr file; env->NewWritableFile(file_name, &file, env_options); std::vector > int_tbl_prop_collector_factories; - file_writer.reset(new WritableFileWriter(std::move(file), env_options)); + file_writer.reset( + new WritableFileWriter(std::move(file), file_name, env_options)); int unknown_level = -1; tb = opts.table_factory->NewTableBuilder( TableBuilderOptions( @@ -126,9 +127,9 @@ void TableReaderBenchmark(Options& opts, EnvOptions& env_options, db->Flush(FlushOptions()); } - unique_ptr table_reader; + std::unique_ptr table_reader; if (!through_db) { - unique_ptr raf; + std::unique_ptr raf; s = env->NewRandomAccessFile(file_name, &raf, env_options); if (!s.ok()) { fprintf(stderr, "Create File Error: %s\n", s.ToString().c_str()); @@ -136,7 +137,7 @@ void TableReaderBenchmark(Options& opts, EnvOptions& env_options, } uint64_t file_size; env->GetFileSize(file_name, &file_size); - unique_ptr file_reader( + std::unique_ptr file_reader( new RandomAccessFileReader(std::move(raf), file_name)); s = opts.table_factory->NewTableReader( TableReaderOptions(ioptions, moptions.prefix_extractor.get(), @@ -169,12 +170,12 @@ void TableReaderBenchmark(Options& opts, EnvOptions& env_options, if (!through_db) { PinnableSlice value; MergeContext merge_context; - RangeDelAggregator range_del_agg(ikc, {} /* snapshots */); + SequenceNumber max_covering_tombstone_seq = 0; GetContext get_context(ioptions.user_comparator, ioptions.merge_operator, ioptions.info_log, ioptions.statistics, GetContext::kNotFound, Slice(key), &value, nullptr, &merge_context, - &range_del_agg, env); + &max_covering_tombstone_seq, env); s = table_reader->Get(read_options, key, &get_context, nullptr); } else { s = db->Get(read_options, key, &result); diff --git a/3rdParty/rocksdb/v5.16.X/table/table_test.cc b/3rdParty/rocksdb/v5.18.X/table/table_test.cc similarity index 95% rename from 3rdParty/rocksdb/v5.16.X/table/table_test.cc rename to 3rdParty/rocksdb/v5.18.X/table/table_test.cc index af648bc053..5ec613bec4 100644 --- a/3rdParty/rocksdb/v5.16.X/table/table_test.cc +++ b/3rdParty/rocksdb/v5.18.X/table/table_test.cc @@ -232,7 +232,6 @@ class BlockConstructor: public Constructor { data_ = builder.Finish().ToString(); BlockContents contents; contents.data = data_; - contents.cachable = false; block_ = new Block(std::move(contents), kDisableGlobalSequenceNumber); return Status::OK(); } @@ -323,8 +322,9 @@ class TableConstructor: public Constructor { const stl_wrappers::KVMap& kv_map) override { Reset(); soptions.use_mmap_reads = ioptions.allow_mmap_reads; - file_writer_.reset(test::GetWritableFileWriter(new test::StringSink())); - unique_ptr builder; + file_writer_.reset(test::GetWritableFileWriter(new test::StringSink(), + "" /* don't care */)); + std::unique_ptr builder; std::vector> int_tbl_prop_collector_factories; std::string column_family_name; @@ -422,9 +422,9 @@ class TableConstructor: public Constructor { } uint64_t uniq_id_; - unique_ptr file_writer_; - unique_ptr file_reader_; - unique_ptr table_reader_; + std::unique_ptr file_writer_; + std::unique_ptr file_reader_; + std::unique_ptr table_reader_; bool convert_to_internal_key_; int level_; @@ -507,7 +507,7 @@ class InternalIteratorFromIterator : public InternalIterator { virtual Status status() const override { return it_->status(); } private: - unique_ptr it_; + std::unique_ptr it_; }; class DBConstructor: public Constructor { @@ -1023,7 +1023,7 @@ class HarnessTest : public testing::Test { WriteBufferManager write_buffer_; bool support_prev_; bool only_support_prefix_seek_; - shared_ptr internal_comparator_; + std::shared_ptr internal_comparator_; }; static bool Between(uint64_t val, uint64_t low, uint64_t high) { @@ -1277,6 +1277,13 @@ TEST_P(BlockBasedTableTest, RangeDelBlock) { std::vector keys = {"1pika", "2chu"}; std::vector vals = {"p", "c"}; + std::vector expected_tombstones = { + {"1pika", "2chu", 0}, + {"2chu", "c", 1}, + {"2chu", "c", 0}, + {"c", "p", 0}, + }; + for (int i = 0; i < 2; i++) { RangeTombstone t(keys[i], vals[i], i); std::pair p = t.Serialize(); @@ -1309,14 +1316,15 @@ TEST_P(BlockBasedTableTest, RangeDelBlock) { ASSERT_FALSE(iter->Valid()); iter->SeekToFirst(); ASSERT_TRUE(iter->Valid()); - for (int i = 0; i < 2; i++) { + for (size_t i = 0; i < expected_tombstones.size(); i++) { ASSERT_TRUE(iter->Valid()); ParsedInternalKey parsed_key; ASSERT_TRUE(ParseInternalKey(iter->key(), &parsed_key)); RangeTombstone t(parsed_key, iter->value()); - ASSERT_EQ(t.start_key_, keys[i]); - ASSERT_EQ(t.end_key_, vals[i]); - ASSERT_EQ(t.seq_, i); + const auto& expected_t = expected_tombstones[i]; + ASSERT_EQ(t.start_key_, expected_t.start_key_); + ASSERT_EQ(t.end_key_, expected_t.end_key_); + ASSERT_EQ(t.seq_, expected_t.seq_); iter->Next(); } ASSERT_TRUE(!iter->Valid()); @@ -1384,8 +1392,8 @@ void PrefetchRange(TableConstructor* c, Options* opt, // prefetch auto* table_reader = dynamic_cast(c->GetTableReader()); Status s; - unique_ptr begin, end; - unique_ptr i_begin, i_end; + std::unique_ptr begin, end; + std::unique_ptr i_begin, i_end; if (key_begin != nullptr) { if (c->ConvertToInternalKey()) { i_begin.reset(new InternalKey(key_begin, kMaxSequenceNumber, kTypeValue)); @@ -1416,7 +1424,7 @@ TEST_P(BlockBasedTableTest, PrefetchTest) { // The purpose of this test is to test the prefetching operation built into // BlockBasedTable. Options opt; - unique_ptr ikc; + std::unique_ptr ikc; ikc.reset(new test::PlainInternalKeyComparator(opt.comparator)); opt.compression = kNoCompression; BlockBasedTableOptions table_options = GetBlockBasedTableOptions(); @@ -2008,7 +2016,7 @@ TEST_P(BlockBasedTableTest, FilterBlockInBlockCache) { // -- PART 1: Open with regular block cache. // Since block_cache is disabled, no cache activities will be involved. - unique_ptr iter; + std::unique_ptr iter; int64_t last_cache_bytes_read = 0; // At first, no block will be accessed. @@ -2342,7 +2350,7 @@ TEST_P(BlockBasedTableTest, NoObjectInCacheAfterTableClose) { } // Create a table Options opt; - unique_ptr ikc; + std::unique_ptr ikc; ikc.reset(new test::PlainInternalKeyComparator(opt.comparator)); opt.compression = kNoCompression; BlockBasedTableOptions table_options = @@ -2418,7 +2426,7 @@ TEST_P(BlockBasedTableTest, BlockCacheLeak) { // unique ID from the file. Options opt; - unique_ptr ikc; + std::unique_ptr ikc; ikc.reset(new test::PlainInternalKeyComparator(opt.comparator)); opt.compression = kNoCompression; BlockBasedTableOptions table_options = GetBlockBasedTableOptions(); @@ -2441,7 +2449,7 @@ TEST_P(BlockBasedTableTest, BlockCacheLeak) { const MutableCFOptions moptions(opt); c.Finish(opt, ioptions, moptions, table_options, *ikc, &keys, &kvmap); - unique_ptr iter( + std::unique_ptr iter( c.NewIterator(moptions.prefix_extractor.get())); iter->SeekToFirst(); while (iter->Valid()) { @@ -2476,6 +2484,78 @@ TEST_P(BlockBasedTableTest, BlockCacheLeak) { c.ResetTableReader(); } +namespace { +class CustomMemoryAllocator : public MemoryAllocator { + public: + virtual const char* Name() const override { return "CustomMemoryAllocator"; } + + void* Allocate(size_t size) override { + ++numAllocations; + auto ptr = new char[size + 16]; + memcpy(ptr, "memory_allocator_", 16); // mangle first 16 bytes + return reinterpret_cast(ptr + 16); + } + void Deallocate(void* p) override { + ++numDeallocations; + char* ptr = reinterpret_cast(p) - 16; + delete[] ptr; + } + + std::atomic numAllocations; + std::atomic numDeallocations; +}; +} // namespace + +TEST_P(BlockBasedTableTest, MemoryAllocator) { + auto custom_memory_allocator = std::make_shared(); + { + Options opt; + std::unique_ptr ikc; + ikc.reset(new test::PlainInternalKeyComparator(opt.comparator)); + opt.compression = kNoCompression; + BlockBasedTableOptions table_options; + table_options.block_size = 1024; + LRUCacheOptions lruOptions; + lruOptions.memory_allocator = custom_memory_allocator; + lruOptions.capacity = 16 * 1024 * 1024; + lruOptions.num_shard_bits = 4; + table_options.block_cache = NewLRUCache(std::move(lruOptions)); + opt.table_factory.reset(NewBlockBasedTableFactory(table_options)); + + TableConstructor c(BytewiseComparator(), + true /* convert_to_internal_key_ */); + c.Add("k01", "hello"); + c.Add("k02", "hello2"); + c.Add("k03", std::string(10000, 'x')); + c.Add("k04", std::string(200000, 'x')); + c.Add("k05", std::string(300000, 'x')); + c.Add("k06", "hello3"); + c.Add("k07", std::string(100000, 'x')); + std::vector keys; + stl_wrappers::KVMap kvmap; + const ImmutableCFOptions ioptions(opt); + const MutableCFOptions moptions(opt); + c.Finish(opt, ioptions, moptions, table_options, *ikc, &keys, &kvmap); + + std::unique_ptr iter( + c.NewIterator(moptions.prefix_extractor.get())); + iter->SeekToFirst(); + while (iter->Valid()) { + iter->key(); + iter->value(); + iter->Next(); + } + ASSERT_OK(iter->status()); + } + + // out of scope, block cache should have been deleted, all allocations + // deallocated + EXPECT_EQ(custom_memory_allocator->numAllocations.load(), + custom_memory_allocator->numDeallocations.load()); + // make sure that allocations actually happened through the cache allocator + EXPECT_GT(custom_memory_allocator->numAllocations.load(), 0); +} + TEST_P(BlockBasedTableTest, NewIndexIteratorLeak) { // A regression test to avoid data race described in // https://github.com/facebook/rocksdb/issues/1267 @@ -2549,8 +2629,8 @@ TEST_F(PlainTableTest, BasicPlainTableProperties) { PlainTableFactory factory(plain_table_options); test::StringSink sink; - unique_ptr file_writer( - test::GetWritableFileWriter(new test::StringSink())); + std::unique_ptr file_writer( + test::GetWritableFileWriter(new test::StringSink(), "" /* don't care */)); Options options; const ImmutableCFOptions ioptions(options); const MutableCFOptions moptions(options); @@ -2578,7 +2658,7 @@ TEST_F(PlainTableTest, BasicPlainTableProperties) { test::StringSink* ss = static_cast(file_writer->writable_file()); - unique_ptr file_reader( + std::unique_ptr file_reader( test::GetRandomAccessFileReader( new test::StringSource(ss->contents(), 72242, true))); @@ -2657,9 +2737,9 @@ static void DoCompressionTest(CompressionType comp) { ASSERT_TRUE(Between(c.ApproximateOffsetOf("abc"), 0, 0)); ASSERT_TRUE(Between(c.ApproximateOffsetOf("k01"), 0, 0)); ASSERT_TRUE(Between(c.ApproximateOffsetOf("k02"), 0, 0)); - ASSERT_TRUE(Between(c.ApproximateOffsetOf("k03"), 2000, 3000)); - ASSERT_TRUE(Between(c.ApproximateOffsetOf("k04"), 2000, 3000)); - ASSERT_TRUE(Between(c.ApproximateOffsetOf("xyz"), 4000, 6100)); + ASSERT_TRUE(Between(c.ApproximateOffsetOf("k03"), 2000, 3500)); + ASSERT_TRUE(Between(c.ApproximateOffsetOf("k04"), 2000, 3500)); + ASSERT_TRUE(Between(c.ApproximateOffsetOf("xyz"), 4000, 6500)); c.ResetTableReader(); } @@ -2705,6 +2785,7 @@ TEST_F(GeneralTableTest, ApproximateOffsetOfCompressed) { } } +#ifndef ROCKSDB_VALGRIND_RUN // RandomizedHarnessTest is very slow for certain combination of arguments // Split into 8 pieces to reduce the time individual tests take. TEST_F(HarnessTest, Randomized1) { @@ -2788,6 +2869,7 @@ TEST_F(HarnessTest, RandomizedLongDB) { ASSERT_GT(files, 0); } #endif // ROCKSDB_LITE +#endif // ROCKSDB_VALGRIND_RUN class MemTableTest : public testing::Test {}; @@ -2823,7 +2905,8 @@ TEST_F(MemTableTest, Simple) { iter = memtable->NewIterator(ReadOptions(), &arena); arena_iter_guard.set(iter); } else { - iter = memtable->NewRangeTombstoneIterator(ReadOptions()); + iter = memtable->NewRangeTombstoneIterator( + ReadOptions(), kMaxSequenceNumber /* read_seq */); iter_guard.reset(iter); } if (iter == nullptr) { @@ -2923,6 +3006,26 @@ TEST_F(HarnessTest, FooterTests) { ASSERT_EQ(decoded_footer.index_handle().size(), index.size()); ASSERT_EQ(decoded_footer.version(), 1U); } + { + // xxhash64 block based + std::string encoded; + Footer footer(kBlockBasedTableMagicNumber, 1); + BlockHandle meta_index(10, 5), index(20, 15); + footer.set_metaindex_handle(meta_index); + footer.set_index_handle(index); + footer.set_checksum(kxxHash64); + footer.EncodeTo(&encoded); + Footer decoded_footer; + Slice encoded_slice(encoded); + decoded_footer.DecodeFrom(&encoded_slice); + ASSERT_EQ(decoded_footer.table_magic_number(), kBlockBasedTableMagicNumber); + ASSERT_EQ(decoded_footer.checksum(), kxxHash64); + ASSERT_EQ(decoded_footer.metaindex_handle().offset(), meta_index.offset()); + ASSERT_EQ(decoded_footer.metaindex_handle().size(), meta_index.size()); + ASSERT_EQ(decoded_footer.index_handle().offset(), index.offset()); + ASSERT_EQ(decoded_footer.index_handle().size(), index.size()); + ASSERT_EQ(decoded_footer.version(), 1U); + } // Plain table is not supported in ROCKSDB_LITE #ifndef ROCKSDB_LITE { @@ -3150,7 +3253,8 @@ TEST_F(PrefixTest, PrefixAndWholeKeyTest) { TEST_P(BlockBasedTableTest, DISABLED_TableWithGlobalSeqno) { BlockBasedTableOptions bbto = GetBlockBasedTableOptions(); test::StringSink* sink = new test::StringSink(); - unique_ptr file_writer(test::GetWritableFileWriter(sink)); + std::unique_ptr file_writer( + test::GetWritableFileWriter(sink, "" /* don't care */)); Options options; options.table_factory.reset(NewBlockBasedTableFactory(bbto)); const ImmutableCFOptions ioptions(options); @@ -3187,7 +3291,7 @@ TEST_P(BlockBasedTableTest, DISABLED_TableWithGlobalSeqno) { // Helper function to get version, global_seqno, global_seqno_offset std::function GetVersionAndGlobalSeqno = [&]() { - unique_ptr file_reader( + std::unique_ptr file_reader( test::GetRandomAccessFileReader( new test::StringSource(ss_rw.contents(), 73342, true))); @@ -3216,9 +3320,9 @@ TEST_P(BlockBasedTableTest, DISABLED_TableWithGlobalSeqno) { }; // Helper function to get the contents of the table InternalIterator - unique_ptr table_reader; + std::unique_ptr table_reader; std::function GetTableInternalIter = [&]() { - unique_ptr file_reader( + std::unique_ptr file_reader( test::GetRandomAccessFileReader( new test::StringSource(ss_rw.contents(), 73342, true))); @@ -3331,7 +3435,8 @@ TEST_P(BlockBasedTableTest, BlockAlignTest) { BlockBasedTableOptions bbto = GetBlockBasedTableOptions(); bbto.block_align = true; test::StringSink* sink = new test::StringSink(); - unique_ptr file_writer(test::GetWritableFileWriter(sink)); + std::unique_ptr file_writer( + test::GetWritableFileWriter(sink, "" /* don't care */)); Options options; options.compression = kNoCompression; options.table_factory.reset(NewBlockBasedTableFactory(bbto)); @@ -3362,7 +3467,7 @@ TEST_P(BlockBasedTableTest, BlockAlignTest) { file_writer->Flush(); test::RandomRWStringSink ss_rw(sink); - unique_ptr file_reader( + std::unique_ptr file_reader( test::GetRandomAccessFileReader( new test::StringSource(ss_rw.contents(), 73342, true))); @@ -3420,7 +3525,8 @@ TEST_P(BlockBasedTableTest, PropertiesBlockRestartPointTest) { BlockBasedTableOptions bbto = GetBlockBasedTableOptions(); bbto.block_align = true; test::StringSink* sink = new test::StringSink(); - unique_ptr file_writer(test::GetWritableFileWriter(sink)); + std::unique_ptr file_writer( + test::GetWritableFileWriter(sink, "" /* don't care */)); Options options; options.compression = kNoCompression; @@ -3454,7 +3560,7 @@ TEST_P(BlockBasedTableTest, PropertiesBlockRestartPointTest) { file_writer->Flush(); test::RandomRWStringSink ss_rw(sink); - unique_ptr file_reader( + std::unique_ptr file_reader( test::GetRandomAccessFileReader( new test::StringSource(ss_rw.contents(), 73342, true))); @@ -3473,10 +3579,10 @@ TEST_P(BlockBasedTableTest, PropertiesBlockRestartPointTest) { Slice compression_dict; PersistentCacheOptions cache_options; - BlockFetcher block_fetcher(file, nullptr /* prefetch_buffer */, footer, - read_options, handle, contents, ioptions, - false /* decompress */, compression_dict, - cache_options); + BlockFetcher block_fetcher( + file, nullptr /* prefetch_buffer */, footer, read_options, handle, + contents, ioptions, false /* decompress */, + false /*maybe_compressed*/, compression_dict, cache_options); ASSERT_OK(block_fetcher.ReadBlockContents()); }; @@ -3562,7 +3668,8 @@ TEST_P(BlockBasedTableTest, PropertiesMetaBlockLast) { BlockFetcher block_fetcher( table_reader.get(), nullptr /* prefetch_buffer */, footer, ReadOptions(), metaindex_handle, &metaindex_contents, ioptions, false /* decompress */, - compression_dict, pcache_opts); + false /*maybe_compressed*/, compression_dict, pcache_opts, + nullptr /*memory_allocator*/); ASSERT_OK(block_fetcher.ReadBlockContents()); Block metaindex_block(std::move(metaindex_contents), kDisableGlobalSequenceNumber); diff --git a/3rdParty/rocksdb/v5.16.X/table/two_level_iterator.cc b/3rdParty/rocksdb/v5.18.X/table/two_level_iterator.cc similarity index 100% rename from 3rdParty/rocksdb/v5.16.X/table/two_level_iterator.cc rename to 3rdParty/rocksdb/v5.18.X/table/two_level_iterator.cc diff --git a/3rdParty/rocksdb/v5.16.X/table/two_level_iterator.h b/3rdParty/rocksdb/v5.18.X/table/two_level_iterator.h similarity index 100% rename from 3rdParty/rocksdb/v5.16.X/table/two_level_iterator.h rename to 3rdParty/rocksdb/v5.18.X/table/two_level_iterator.h diff --git a/3rdParty/rocksdb/v5.16.X/third-party/fbson/COMMIT.md b/3rdParty/rocksdb/v5.18.X/third-party/fbson/COMMIT.md similarity index 100% rename from 3rdParty/rocksdb/v5.16.X/third-party/fbson/COMMIT.md rename to 3rdParty/rocksdb/v5.18.X/third-party/fbson/COMMIT.md diff --git a/3rdParty/rocksdb/v5.16.X/third-party/fbson/FbsonDocument.h b/3rdParty/rocksdb/v5.18.X/third-party/fbson/FbsonDocument.h similarity index 99% rename from 3rdParty/rocksdb/v5.16.X/third-party/fbson/FbsonDocument.h rename to 3rdParty/rocksdb/v5.18.X/third-party/fbson/FbsonDocument.h index 11b6fe28e4..c69fcb45f5 100644 --- a/3rdParty/rocksdb/v5.16.X/third-party/fbson/FbsonDocument.h +++ b/3rdParty/rocksdb/v5.18.X/third-party/fbson/FbsonDocument.h @@ -55,8 +55,7 @@ * @author Tian Xia */ -#ifndef FBSON_FBSONDOCUMENT_H -#define FBSON_FBSONDOCUMENT_H +#pragma once #include #include @@ -889,5 +888,3 @@ inline FbsonValue* FbsonValue::findPath(const char* key_path, #pragma pack(pop) } // namespace fbson - -#endif // FBSON_FBSONDOCUMENT_H diff --git a/3rdParty/rocksdb/v5.16.X/third-party/fbson/FbsonJsonParser.h b/3rdParty/rocksdb/v5.18.X/third-party/fbson/FbsonJsonParser.h similarity index 99% rename from 3rdParty/rocksdb/v5.16.X/third-party/fbson/FbsonJsonParser.h rename to 3rdParty/rocksdb/v5.18.X/third-party/fbson/FbsonJsonParser.h index 47bff77fe5..f4b8ed2515 100644 --- a/3rdParty/rocksdb/v5.16.X/third-party/fbson/FbsonJsonParser.h +++ b/3rdParty/rocksdb/v5.18.X/third-party/fbson/FbsonJsonParser.h @@ -47,8 +47,7 @@ * @author Tian Xia */ -#ifndef FBSON_FBSONPARSER_H -#define FBSON_FBSONPARSER_H +#pragma once #include #include @@ -741,5 +740,3 @@ class FbsonJsonParserT { typedef FbsonJsonParserT FbsonJsonParser; } // namespace fbson - -#endif // FBSON_FBSONPARSER_H diff --git a/3rdParty/rocksdb/v5.16.X/third-party/fbson/FbsonStream.h b/3rdParty/rocksdb/v5.18.X/third-party/fbson/FbsonStream.h similarity index 98% rename from 3rdParty/rocksdb/v5.16.X/third-party/fbson/FbsonStream.h rename to 3rdParty/rocksdb/v5.18.X/third-party/fbson/FbsonStream.h index 12723ea30e..b20cb1c3bf 100644 --- a/3rdParty/rocksdb/v5.16.X/third-party/fbson/FbsonStream.h +++ b/3rdParty/rocksdb/v5.18.X/third-party/fbson/FbsonStream.h @@ -18,8 +18,7 @@ * @author Tian Xia */ -#ifndef FBSON_FBSONSTREAM_H -#define FBSON_FBSONSTREAM_H +#pragma once #ifndef __STDC_FORMAT_MACROS #define __STDC_FORMAT_MACROS @@ -178,5 +177,3 @@ class FbsonOutStream : public std::ostream { }; } // namespace fbson - -#endif // FBSON_FBSONSTREAM_H diff --git a/3rdParty/rocksdb/v5.16.X/third-party/fbson/FbsonUtil.h b/3rdParty/rocksdb/v5.18.X/third-party/fbson/FbsonUtil.h similarity index 97% rename from 3rdParty/rocksdb/v5.16.X/third-party/fbson/FbsonUtil.h rename to 3rdParty/rocksdb/v5.18.X/third-party/fbson/FbsonUtil.h index 2b6d6f5c97..70ac6cb2ba 100644 --- a/3rdParty/rocksdb/v5.16.X/third-party/fbson/FbsonUtil.h +++ b/3rdParty/rocksdb/v5.18.X/third-party/fbson/FbsonUtil.h @@ -9,8 +9,7 @@ * @author Tian Xia */ -#ifndef FBSON_FBSONUTIL_H -#define FBSON_FBSONUTIL_H +#pragma once #include #include "FbsonDocument.h" @@ -159,5 +158,3 @@ class FbsonToJson { }; } // namespace fbson - -#endif // FBSON_FBSONUTIL_H diff --git a/3rdParty/rocksdb/v5.16.X/third-party/fbson/FbsonWriter.h b/3rdParty/rocksdb/v5.18.X/third-party/fbson/FbsonWriter.h similarity index 99% rename from 3rdParty/rocksdb/v5.16.X/third-party/fbson/FbsonWriter.h rename to 3rdParty/rocksdb/v5.18.X/third-party/fbson/FbsonWriter.h index 2b94ef0a01..e5010fade2 100644 --- a/3rdParty/rocksdb/v5.16.X/third-party/fbson/FbsonWriter.h +++ b/3rdParty/rocksdb/v5.18.X/third-party/fbson/FbsonWriter.h @@ -25,8 +25,7 @@ * @author Tian Xia */ -#ifndef FBSON_FBSONWRITER_H -#define FBSON_FBSONWRITER_H +#pragma once #include #include "FbsonDocument.h" @@ -433,5 +432,3 @@ class FbsonWriterT { typedef FbsonWriterT FbsonWriter; } // namespace fbson - -#endif // FBSON_FBSONWRITER_H diff --git a/3rdParty/rocksdb/v5.16.X/third-party/gtest-1.7.0/fused-src/gtest/CMakeLists.txt b/3rdParty/rocksdb/v5.18.X/third-party/gtest-1.7.0/fused-src/gtest/CMakeLists.txt similarity index 100% rename from 3rdParty/rocksdb/v5.16.X/third-party/gtest-1.7.0/fused-src/gtest/CMakeLists.txt rename to 3rdParty/rocksdb/v5.18.X/third-party/gtest-1.7.0/fused-src/gtest/CMakeLists.txt diff --git a/3rdParty/rocksdb/v5.16.X/third-party/gtest-1.7.0/fused-src/gtest/gtest-all.cc b/3rdParty/rocksdb/v5.18.X/third-party/gtest-1.7.0/fused-src/gtest/gtest-all.cc similarity index 100% rename from 3rdParty/rocksdb/v5.16.X/third-party/gtest-1.7.0/fused-src/gtest/gtest-all.cc rename to 3rdParty/rocksdb/v5.18.X/third-party/gtest-1.7.0/fused-src/gtest/gtest-all.cc diff --git a/3rdParty/rocksdb/v5.16.X/third-party/gtest-1.7.0/fused-src/gtest/gtest.h b/3rdParty/rocksdb/v5.18.X/third-party/gtest-1.7.0/fused-src/gtest/gtest.h similarity index 100% rename from 3rdParty/rocksdb/v5.16.X/third-party/gtest-1.7.0/fused-src/gtest/gtest.h rename to 3rdParty/rocksdb/v5.18.X/third-party/gtest-1.7.0/fused-src/gtest/gtest.h diff --git a/3rdParty/rocksdb/v5.16.X/thirdparty.inc b/3rdParty/rocksdb/v5.18.X/thirdparty.inc similarity index 100% rename from 3rdParty/rocksdb/v5.16.X/thirdparty.inc rename to 3rdParty/rocksdb/v5.18.X/thirdparty.inc diff --git a/3rdParty/rocksdb/v5.16.X/tools/CMakeLists.txt b/3rdParty/rocksdb/v5.18.X/tools/CMakeLists.txt similarity index 100% rename from 3rdParty/rocksdb/v5.16.X/tools/CMakeLists.txt rename to 3rdParty/rocksdb/v5.18.X/tools/CMakeLists.txt diff --git a/3rdParty/rocksdb/v5.16.X/tools/Dockerfile b/3rdParty/rocksdb/v5.18.X/tools/Dockerfile similarity index 100% rename from 3rdParty/rocksdb/v5.16.X/tools/Dockerfile rename to 3rdParty/rocksdb/v5.18.X/tools/Dockerfile diff --git a/3rdParty/rocksdb/v5.16.X/tools/advisor/README.md b/3rdParty/rocksdb/v5.18.X/tools/advisor/README.md similarity index 100% rename from 3rdParty/rocksdb/v5.16.X/tools/advisor/README.md rename to 3rdParty/rocksdb/v5.18.X/tools/advisor/README.md diff --git a/3rdParty/rocksdb/v5.16.X/tools/advisor/advisor/__init__.py b/3rdParty/rocksdb/v5.18.X/tools/advisor/advisor/__init__.py similarity index 100% rename from 3rdParty/rocksdb/v5.16.X/tools/advisor/advisor/__init__.py rename to 3rdParty/rocksdb/v5.18.X/tools/advisor/advisor/__init__.py diff --git a/3rdParty/rocksdb/v5.16.X/tools/advisor/advisor/bench_runner.py b/3rdParty/rocksdb/v5.18.X/tools/advisor/advisor/bench_runner.py similarity index 100% rename from 3rdParty/rocksdb/v5.16.X/tools/advisor/advisor/bench_runner.py rename to 3rdParty/rocksdb/v5.18.X/tools/advisor/advisor/bench_runner.py diff --git a/3rdParty/rocksdb/v5.16.X/tools/advisor/advisor/config_optimizer_example.py b/3rdParty/rocksdb/v5.18.X/tools/advisor/advisor/config_optimizer_example.py similarity index 100% rename from 3rdParty/rocksdb/v5.16.X/tools/advisor/advisor/config_optimizer_example.py rename to 3rdParty/rocksdb/v5.18.X/tools/advisor/advisor/config_optimizer_example.py diff --git a/3rdParty/rocksdb/v5.16.X/tools/advisor/advisor/db_bench_runner.py b/3rdParty/rocksdb/v5.18.X/tools/advisor/advisor/db_bench_runner.py similarity index 100% rename from 3rdParty/rocksdb/v5.16.X/tools/advisor/advisor/db_bench_runner.py rename to 3rdParty/rocksdb/v5.18.X/tools/advisor/advisor/db_bench_runner.py diff --git a/3rdParty/rocksdb/v5.16.X/tools/advisor/advisor/db_config_optimizer.py b/3rdParty/rocksdb/v5.18.X/tools/advisor/advisor/db_config_optimizer.py similarity index 100% rename from 3rdParty/rocksdb/v5.16.X/tools/advisor/advisor/db_config_optimizer.py rename to 3rdParty/rocksdb/v5.18.X/tools/advisor/advisor/db_config_optimizer.py diff --git a/3rdParty/rocksdb/v5.16.X/tools/advisor/advisor/db_log_parser.py b/3rdParty/rocksdb/v5.18.X/tools/advisor/advisor/db_log_parser.py similarity index 100% rename from 3rdParty/rocksdb/v5.16.X/tools/advisor/advisor/db_log_parser.py rename to 3rdParty/rocksdb/v5.18.X/tools/advisor/advisor/db_log_parser.py diff --git a/3rdParty/rocksdb/v5.16.X/tools/advisor/advisor/db_options_parser.py b/3rdParty/rocksdb/v5.18.X/tools/advisor/advisor/db_options_parser.py similarity index 100% rename from 3rdParty/rocksdb/v5.16.X/tools/advisor/advisor/db_options_parser.py rename to 3rdParty/rocksdb/v5.18.X/tools/advisor/advisor/db_options_parser.py diff --git a/3rdParty/rocksdb/v5.16.X/tools/advisor/advisor/db_stats_fetcher.py b/3rdParty/rocksdb/v5.18.X/tools/advisor/advisor/db_stats_fetcher.py similarity index 100% rename from 3rdParty/rocksdb/v5.16.X/tools/advisor/advisor/db_stats_fetcher.py rename to 3rdParty/rocksdb/v5.18.X/tools/advisor/advisor/db_stats_fetcher.py diff --git a/3rdParty/rocksdb/v5.16.X/tools/advisor/advisor/db_timeseries_parser.py b/3rdParty/rocksdb/v5.18.X/tools/advisor/advisor/db_timeseries_parser.py similarity index 100% rename from 3rdParty/rocksdb/v5.16.X/tools/advisor/advisor/db_timeseries_parser.py rename to 3rdParty/rocksdb/v5.18.X/tools/advisor/advisor/db_timeseries_parser.py diff --git a/3rdParty/rocksdb/v5.16.X/tools/advisor/advisor/ini_parser.py b/3rdParty/rocksdb/v5.18.X/tools/advisor/advisor/ini_parser.py similarity index 100% rename from 3rdParty/rocksdb/v5.16.X/tools/advisor/advisor/ini_parser.py rename to 3rdParty/rocksdb/v5.18.X/tools/advisor/advisor/ini_parser.py diff --git a/3rdParty/rocksdb/v5.16.X/tools/advisor/advisor/rule_parser.py b/3rdParty/rocksdb/v5.18.X/tools/advisor/advisor/rule_parser.py similarity index 100% rename from 3rdParty/rocksdb/v5.16.X/tools/advisor/advisor/rule_parser.py rename to 3rdParty/rocksdb/v5.18.X/tools/advisor/advisor/rule_parser.py diff --git a/3rdParty/rocksdb/v5.16.X/tools/advisor/advisor/rule_parser_example.py b/3rdParty/rocksdb/v5.18.X/tools/advisor/advisor/rule_parser_example.py similarity index 100% rename from 3rdParty/rocksdb/v5.16.X/tools/advisor/advisor/rule_parser_example.py rename to 3rdParty/rocksdb/v5.18.X/tools/advisor/advisor/rule_parser_example.py diff --git a/3rdParty/rocksdb/v5.16.X/tools/advisor/advisor/rules.ini b/3rdParty/rocksdb/v5.18.X/tools/advisor/advisor/rules.ini similarity index 100% rename from 3rdParty/rocksdb/v5.16.X/tools/advisor/advisor/rules.ini rename to 3rdParty/rocksdb/v5.18.X/tools/advisor/advisor/rules.ini diff --git a/3rdParty/rocksdb/v5.16.X/tools/advisor/test/__init__.py b/3rdParty/rocksdb/v5.18.X/tools/advisor/test/__init__.py similarity index 100% rename from 3rdParty/rocksdb/v5.16.X/tools/advisor/test/__init__.py rename to 3rdParty/rocksdb/v5.18.X/tools/advisor/test/__init__.py diff --git a/3rdParty/rocksdb/v5.16.X/tools/advisor/test/input_files/LOG-0 b/3rdParty/rocksdb/v5.18.X/tools/advisor/test/input_files/LOG-0 similarity index 100% rename from 3rdParty/rocksdb/v5.16.X/tools/advisor/test/input_files/LOG-0 rename to 3rdParty/rocksdb/v5.18.X/tools/advisor/test/input_files/LOG-0 diff --git a/3rdParty/rocksdb/v5.16.X/tools/advisor/test/input_files/LOG-1 b/3rdParty/rocksdb/v5.18.X/tools/advisor/test/input_files/LOG-1 similarity index 100% rename from 3rdParty/rocksdb/v5.16.X/tools/advisor/test/input_files/LOG-1 rename to 3rdParty/rocksdb/v5.18.X/tools/advisor/test/input_files/LOG-1 diff --git a/3rdParty/rocksdb/v5.16.X/tools/advisor/test/input_files/OPTIONS-000005 b/3rdParty/rocksdb/v5.18.X/tools/advisor/test/input_files/OPTIONS-000005 similarity index 100% rename from 3rdParty/rocksdb/v5.16.X/tools/advisor/test/input_files/OPTIONS-000005 rename to 3rdParty/rocksdb/v5.18.X/tools/advisor/test/input_files/OPTIONS-000005 diff --git a/3rdParty/rocksdb/v5.16.X/tools/advisor/test/input_files/log_stats_parser_keys_ts b/3rdParty/rocksdb/v5.18.X/tools/advisor/test/input_files/log_stats_parser_keys_ts similarity index 100% rename from 3rdParty/rocksdb/v5.16.X/tools/advisor/test/input_files/log_stats_parser_keys_ts rename to 3rdParty/rocksdb/v5.18.X/tools/advisor/test/input_files/log_stats_parser_keys_ts diff --git a/3rdParty/rocksdb/v5.16.X/tools/advisor/test/input_files/rules_err1.ini b/3rdParty/rocksdb/v5.18.X/tools/advisor/test/input_files/rules_err1.ini similarity index 100% rename from 3rdParty/rocksdb/v5.16.X/tools/advisor/test/input_files/rules_err1.ini rename to 3rdParty/rocksdb/v5.18.X/tools/advisor/test/input_files/rules_err1.ini diff --git a/3rdParty/rocksdb/v5.16.X/tools/advisor/test/input_files/rules_err2.ini b/3rdParty/rocksdb/v5.18.X/tools/advisor/test/input_files/rules_err2.ini similarity index 100% rename from 3rdParty/rocksdb/v5.16.X/tools/advisor/test/input_files/rules_err2.ini rename to 3rdParty/rocksdb/v5.18.X/tools/advisor/test/input_files/rules_err2.ini diff --git a/3rdParty/rocksdb/v5.16.X/tools/advisor/test/input_files/rules_err3.ini b/3rdParty/rocksdb/v5.18.X/tools/advisor/test/input_files/rules_err3.ini similarity index 100% rename from 3rdParty/rocksdb/v5.16.X/tools/advisor/test/input_files/rules_err3.ini rename to 3rdParty/rocksdb/v5.18.X/tools/advisor/test/input_files/rules_err3.ini diff --git a/3rdParty/rocksdb/v5.16.X/tools/advisor/test/input_files/rules_err4.ini b/3rdParty/rocksdb/v5.18.X/tools/advisor/test/input_files/rules_err4.ini similarity index 100% rename from 3rdParty/rocksdb/v5.16.X/tools/advisor/test/input_files/rules_err4.ini rename to 3rdParty/rocksdb/v5.18.X/tools/advisor/test/input_files/rules_err4.ini diff --git a/3rdParty/rocksdb/v5.16.X/tools/advisor/test/input_files/test_rules.ini b/3rdParty/rocksdb/v5.18.X/tools/advisor/test/input_files/test_rules.ini similarity index 100% rename from 3rdParty/rocksdb/v5.16.X/tools/advisor/test/input_files/test_rules.ini rename to 3rdParty/rocksdb/v5.18.X/tools/advisor/test/input_files/test_rules.ini diff --git a/3rdParty/rocksdb/v5.16.X/tools/advisor/test/input_files/triggered_rules.ini b/3rdParty/rocksdb/v5.18.X/tools/advisor/test/input_files/triggered_rules.ini similarity index 100% rename from 3rdParty/rocksdb/v5.16.X/tools/advisor/test/input_files/triggered_rules.ini rename to 3rdParty/rocksdb/v5.18.X/tools/advisor/test/input_files/triggered_rules.ini diff --git a/3rdParty/rocksdb/v5.16.X/tools/advisor/test/test_db_bench_runner.py b/3rdParty/rocksdb/v5.18.X/tools/advisor/test/test_db_bench_runner.py similarity index 100% rename from 3rdParty/rocksdb/v5.16.X/tools/advisor/test/test_db_bench_runner.py rename to 3rdParty/rocksdb/v5.18.X/tools/advisor/test/test_db_bench_runner.py diff --git a/3rdParty/rocksdb/v5.16.X/tools/advisor/test/test_db_log_parser.py b/3rdParty/rocksdb/v5.18.X/tools/advisor/test/test_db_log_parser.py similarity index 100% rename from 3rdParty/rocksdb/v5.16.X/tools/advisor/test/test_db_log_parser.py rename to 3rdParty/rocksdb/v5.18.X/tools/advisor/test/test_db_log_parser.py diff --git a/3rdParty/rocksdb/v5.16.X/tools/advisor/test/test_db_options_parser.py b/3rdParty/rocksdb/v5.18.X/tools/advisor/test/test_db_options_parser.py similarity index 100% rename from 3rdParty/rocksdb/v5.16.X/tools/advisor/test/test_db_options_parser.py rename to 3rdParty/rocksdb/v5.18.X/tools/advisor/test/test_db_options_parser.py diff --git a/3rdParty/rocksdb/v5.16.X/tools/advisor/test/test_db_stats_fetcher.py b/3rdParty/rocksdb/v5.18.X/tools/advisor/test/test_db_stats_fetcher.py similarity index 100% rename from 3rdParty/rocksdb/v5.16.X/tools/advisor/test/test_db_stats_fetcher.py rename to 3rdParty/rocksdb/v5.18.X/tools/advisor/test/test_db_stats_fetcher.py diff --git a/3rdParty/rocksdb/v5.16.X/tools/advisor/test/test_rule_parser.py b/3rdParty/rocksdb/v5.18.X/tools/advisor/test/test_rule_parser.py similarity index 100% rename from 3rdParty/rocksdb/v5.16.X/tools/advisor/test/test_rule_parser.py rename to 3rdParty/rocksdb/v5.18.X/tools/advisor/test/test_rule_parser.py diff --git a/3rdParty/rocksdb/v5.16.X/tools/auto_sanity_test.sh b/3rdParty/rocksdb/v5.18.X/tools/auto_sanity_test.sh similarity index 100% rename from 3rdParty/rocksdb/v5.16.X/tools/auto_sanity_test.sh rename to 3rdParty/rocksdb/v5.18.X/tools/auto_sanity_test.sh diff --git a/3rdParty/rocksdb/v5.16.X/tools/benchmark.sh b/3rdParty/rocksdb/v5.18.X/tools/benchmark.sh similarity index 98% rename from 3rdParty/rocksdb/v5.16.X/tools/benchmark.sh rename to 3rdParty/rocksdb/v5.18.X/tools/benchmark.sh index b7c2eefdb2..0ba1081e19 100755 --- a/3rdParty/rocksdb/v5.16.X/tools/benchmark.sh +++ b/3rdParty/rocksdb/v5.18.X/tools/benchmark.sh @@ -151,8 +151,8 @@ function summarize_result { stall_pct=$( grep "^Cumulative stall" $test_out| tail -1 | awk '{ print $5 }' ) ops_sec=$( grep ^${bench_name} $test_out | awk '{ print $5 }' ) mb_sec=$( grep ^${bench_name} $test_out | awk '{ print $7 }' ) - lo_wgb=$( grep "^ L0" $test_out | tail -1 | awk '{ print $8 }' ) - sum_wgb=$( grep "^ Sum" $test_out | tail -1 | awk '{ print $8 }' ) + lo_wgb=$( grep "^ L0" $test_out | tail -1 | awk '{ print $9 }' ) + sum_wgb=$( grep "^ Sum" $test_out | tail -1 | awk '{ print $9 }' ) sum_size=$( grep "^ Sum" $test_out | tail -1 | awk '{ printf "%.1f", $3 / 1024.0 }' ) wamp=$( echo "scale=1; $sum_wgb / $lo_wgb" | bc ) wmb_ps=$( echo "scale=1; ( $sum_wgb * 1024.0 ) / $uptime" | bc ) @@ -177,6 +177,7 @@ function run_bulkload { $params_bulkload \ --threads=1 \ --memtablerep=vector \ + --allow_concurrent_memtable_write=false \ --disable_wal=1 \ --seed=$( date +%s ) \ 2>&1 | tee -a $output_dir/benchmark_bulkload_fillrandom.log" @@ -229,6 +230,7 @@ function run_manual_compaction_worker { --compaction_style=$2 \ --subcompactions=$3 \ --memtablerep=vector \ + --allow_concurrent_memtable_write=false \ --disable_wal=1 \ --max_background_jobs=$4 \ --seed=$( date +%s ) \ @@ -313,6 +315,7 @@ function run_fillseq { --min_level_to_compress=0 \ --threads=1 \ --memtablerep=vector \ + --allow_concurrent_memtable_write=false \ --disable_wal=$1 \ --seed=$( date +%s ) \ 2>&1 | tee -a $log_file_name" diff --git a/3rdParty/rocksdb/v5.16.X/tools/benchmark_leveldb.sh b/3rdParty/rocksdb/v5.18.X/tools/benchmark_leveldb.sh similarity index 100% rename from 3rdParty/rocksdb/v5.16.X/tools/benchmark_leveldb.sh rename to 3rdParty/rocksdb/v5.18.X/tools/benchmark_leveldb.sh diff --git a/3rdParty/rocksdb/v5.16.X/tools/blob_dump.cc b/3rdParty/rocksdb/v5.18.X/tools/blob_dump.cc similarity index 100% rename from 3rdParty/rocksdb/v5.16.X/tools/blob_dump.cc rename to 3rdParty/rocksdb/v5.18.X/tools/blob_dump.cc diff --git a/3rdParty/rocksdb/v5.16.X/tools/check_format_compatible.sh b/3rdParty/rocksdb/v5.18.X/tools/check_format_compatible.sh similarity index 68% rename from 3rdParty/rocksdb/v5.16.X/tools/check_format_compatible.sh rename to 3rdParty/rocksdb/v5.18.X/tools/check_format_compatible.sh index 39335446da..2d260c7ecc 100755 --- a/3rdParty/rocksdb/v5.16.X/tools/check_format_compatible.sh +++ b/3rdParty/rocksdb/v5.18.X/tools/check_format_compatible.sh @@ -17,7 +17,7 @@ mkdir $input_data_path || true rm -rf $script_copy_dir cp $scriptpath $script_copy_dir -rf -# Generate four random files. +# Generate random files. for i in {1..6} do input_data[$i]=$input_data_path/data$i @@ -41,10 +41,22 @@ with open('${input_data[$i]}', 'w') as f: EOF done +# Generate file(s) with sorted keys. +sorted_input_data=$input_data_path/sorted_data +echo == Generating file with sorted keys ${sorted_input_data} +python - <> f, k + " ==> " + v +EOF + declare -a backward_compatible_checkout_objs=("2.2.fb.branch" "2.3.fb.branch" "2.4.fb.branch" "2.5.fb.branch" "2.6.fb.branch" "2.7.fb.branch" "2.8.1.fb" "3.0.fb.branch" "3.1.fb" "3.2.fb" "3.3.fb" "3.4.fb" "3.5.fb" "3.6.fb" "3.7.fb" "3.8.fb" "3.9.fb") declare -a forward_compatible_checkout_objs=("3.10.fb" "3.11.fb" "3.12.fb" "3.13.fb" "4.0.fb" "4.1.fb" "4.2.fb" "4.3.fb" "4.4.fb" "4.5.fb" "4.6.fb" "4.7.fb" "4.8.fb" "4.9.fb" "4.10.fb" "4.11.fb" "4.12.fb" "4.13.fb" "5.0.fb" "5.1.fb" "5.2.fb" "5.3.fb" "5.4.fb" "5.5.fb" "5.6.fb" "5.7.fb" "5.8.fb" "5.9.fb" "5.10.fb") declare -a forward_compatible_with_options_checkout_objs=("5.11.fb" "5.12.fb" "5.13.fb" "5.14.fb") declare -a checkout_objs=(${backward_compatible_checkout_objs[@]} ${forward_compatible_checkout_objs[@]} ${forward_compatible_with_options_checkout_objs[@]}) +declare -a extern_sst_ingestion_compatible_checkout_objs=("5.14.fb" "5.15.fb" "5.16.fb" "5.17.fb") generate_db() { @@ -68,6 +80,28 @@ compare_db() set -e } +write_external_sst() +{ + set +e + $script_copy_dir/write_external_sst.sh $1 $2 $3 + if [ $? -ne 0 ]; then + echo ==== Error writing external SST file using data from $1 to $3 ==== + exit 1 + fi + set -e +} + +ingest_external_sst() +{ + set +e + $script_copy_dir/ingest_external_sst.sh $1 $2 + if [ $? -ne 0 ]; then + echo ==== Error ingesting external SST in $2 to DB at $1 ==== + exit 1 + fi + set -e +} + # Sandcastle sets us up with a remote that is just another directory on the same # machine and doesn't have our branches. Need to fetch them so checkout works. # Remote add may fail if added previously (we don't cleanup). @@ -75,6 +109,41 @@ git remote add github_origin "https://github.com/facebook/rocksdb.git" set -e https_proxy="fwdproxy:8080" git fetch github_origin +# Compatibility test for external SST file ingestion +for checkout_obj in "${extern_sst_ingestion_compatible_checkout_objs[@]}" +do + echo == Generating DB with extern SST file in "$checkout_obj" ... + https_proxy="fwdproxy:8080" git checkout github_origin/$checkout_obj -b $checkout_obj + make clean + make ldb -j32 + write_external_sst $input_data_path $test_dir/$checkout_obj $test_dir/$checkout_obj + ingest_external_sst $test_dir/$checkout_obj $test_dir/$checkout_obj +done + +checkout_flag=${1:-"master"} + +echo == Building $checkout_flag debug +https_proxy="fwdproxy:8080" git checkout github_origin/$checkout_flag -b tmp-$checkout_flag +make clean +make ldb -j32 +compare_base_db_dir=$test_dir"/base_db_dir" +write_external_sst $input_data_path $compare_base_db_dir $compare_base_db_dir +ingest_external_sst $compare_base_db_dir $compare_base_db_dir + +for checkout_obj in "${extern_sst_ingestion_compatible_checkout_objs[@]}" +do + echo == Build "$checkout_obj" and try to open DB generated using $checkout_flag + git checkout $checkout_obj + make clean + make ldb -j32 + compare_db $test_dir/$checkout_obj $compare_base_db_dir db_dump.txt 1 1 + git checkout tmp-$checkout_flag + # Clean up + git branch -D $checkout_obj +done + +echo == Finish compatibility test for SST ingestion. + for checkout_obj in "${checkout_objs[@]}" do echo == Generating DB from "$checkout_obj" ... @@ -87,7 +156,7 @@ done checkout_flag=${1:-"master"} echo == Building $checkout_flag debug -https_proxy="fwdproxy:8080" git checkout github_origin/$checkout_flag -b tmp-$checkout_flag +git checkout tmp-$checkout_flag make clean make ldb -j32 compare_base_db_dir=$test_dir"/base_db_dir" diff --git a/3rdParty/rocksdb/v5.16.X/tools/db_bench.cc b/3rdParty/rocksdb/v5.18.X/tools/db_bench.cc similarity index 100% rename from 3rdParty/rocksdb/v5.16.X/tools/db_bench.cc rename to 3rdParty/rocksdb/v5.18.X/tools/db_bench.cc diff --git a/3rdParty/rocksdb/v5.16.X/tools/db_bench_tool.cc b/3rdParty/rocksdb/v5.18.X/tools/db_bench_tool.cc similarity index 97% rename from 3rdParty/rocksdb/v5.16.X/tools/db_bench_tool.cc rename to 3rdParty/rocksdb/v5.18.X/tools/db_bench_tool.cc index c32822c8f1..75f1d449d5 100644 --- a/3rdParty/rocksdb/v5.16.X/tools/db_bench_tool.cc +++ b/3rdParty/rocksdb/v5.18.X/tools/db_bench_tool.cc @@ -33,10 +33,12 @@ #include #include "db/db_impl.h" +#include "db/malloc_stats.h" #include "db/version_set.h" #include "hdfs/env_hdfs.h" #include "monitoring/histogram.h" #include "monitoring/statistics.h" +#include "options/cf_options.h" #include "port/port.h" #include "port/stack_trace.h" #include "rocksdb/cache.h" @@ -45,7 +47,6 @@ #include "rocksdb/filter_policy.h" #include "rocksdb/memtablerep.h" #include "rocksdb/options.h" -#include "options/cf_options.h" #include "rocksdb/perf_context.h" #include "rocksdb/persistent_cache.h" #include "rocksdb/rate_limiter.h" @@ -248,6 +249,10 @@ DEFINE_bool(reverse_iterator, false, "When true use Prev rather than Next for iterators that do " "Seek and then Next"); +DEFINE_int64(max_scan_distance, 0, + "Used to define iterate_upper_bound (or iterate_lower_bound " + "if FLAGS_reverse_iterator is set to true) when value is nonzero"); + DEFINE_bool(use_uint64_comparator, false, "use Uint64 user comparator"); DEFINE_int64(batch_size, 1, "Batch size"); @@ -640,9 +645,11 @@ DEFINE_bool(optimize_filters_for_hits, false, DEFINE_uint64(delete_obsolete_files_period_micros, 0, "Ignored. Left here for backward compatibility"); +DEFINE_int64(writes_before_delete_range, 0, + "Number of writes before DeleteRange is called regularly."); + DEFINE_int64(writes_per_range_tombstone, 0, - "Number of writes between range " - "tombstones"); + "Number of writes between range tombstones"); DEFINE_int64(range_tombstone_width, 100, "Number of keys in tombstone's range"); @@ -935,6 +942,9 @@ DEFINE_uint64(max_compaction_bytes, rocksdb::Options().max_compaction_bytes, #ifndef ROCKSDB_LITE DEFINE_bool(readonly, false, "Run read only benchmarks."); + +DEFINE_bool(print_malloc_stats, false, + "Print malloc stats to stdout after benchmarks finish."); #endif // ROCKSDB_LITE DEFINE_bool(disable_auto_compactions, false, "Do not auto trigger compactions"); @@ -1137,11 +1147,12 @@ class ReportFileOpEnv : public EnvWrapper { counters_.bytes_written_ = 0; } - Status NewSequentialFile(const std::string& f, unique_ptr* r, + Status NewSequentialFile(const std::string& f, + std::unique_ptr* r, const EnvOptions& soptions) override { class CountingFile : public SequentialFile { private: - unique_ptr target_; + std::unique_ptr target_; ReportFileOpCounters* counters_; public: @@ -1169,11 +1180,11 @@ class ReportFileOpEnv : public EnvWrapper { } Status NewRandomAccessFile(const std::string& f, - unique_ptr* r, + std::unique_ptr* r, const EnvOptions& soptions) override { class CountingFile : public RandomAccessFile { private: - unique_ptr target_; + std::unique_ptr target_; ReportFileOpCounters* counters_; public: @@ -1198,11 +1209,11 @@ class ReportFileOpEnv : public EnvWrapper { return s; } - Status NewWritableFile(const std::string& f, unique_ptr* r, + Status NewWritableFile(const std::string& f, std::unique_ptr* r, const EnvOptions& soptions) override { class CountingFile : public WritableFile { private: - unique_ptr target_; + std::unique_ptr target_; ReportFileOpCounters* counters_; public: @@ -1968,12 +1979,15 @@ class Benchmark { int prefix_size_; int64_t keys_per_prefix_; int64_t entries_per_batch_; + int64_t writes_before_delete_range_; int64_t writes_per_range_tombstone_; int64_t range_tombstone_width_; int64_t max_num_range_tombstones_; WriteOptions write_options_; Options open_options_; // keep options around to properly destroy db later +#ifndef ROCKSDB_LITE TraceOptions trace_options_; +#endif int64_t reads_; int64_t deletes_; double read_random_exp_range_; @@ -1983,6 +1997,52 @@ class Benchmark { bool report_file_operations_; bool use_blob_db_; + class ErrorHandlerListener : public EventListener { + public: + ErrorHandlerListener() + : mutex_(), + cv_(&mutex_), + no_auto_recovery_(false), + recovery_complete_(false) {} + + ~ErrorHandlerListener() {} + + void OnErrorRecoveryBegin(BackgroundErrorReason /*reason*/, + Status /*bg_error*/, bool* auto_recovery) { + if (*auto_recovery && no_auto_recovery_) { + *auto_recovery = false; + } + } + + void OnErrorRecoveryCompleted(Status /*old_bg_error*/) { + InstrumentedMutexLock l(&mutex_); + recovery_complete_ = true; + cv_.SignalAll(); + } + + bool WaitForRecovery(uint64_t /*abs_time_us*/) { + InstrumentedMutexLock l(&mutex_); + if (!recovery_complete_) { + cv_.Wait(/*abs_time_us*/); + } + if (recovery_complete_) { + recovery_complete_ = false; + return true; + } + return false; + } + + void EnableAutoRecovery(bool enable = true) { no_auto_recovery_ = !enable; } + + private: + InstrumentedMutex mutex_; + InstrumentedCondVar cv_; + bool no_auto_recovery_; + bool recovery_complete_; + }; + + std::shared_ptr listener_; + bool SanityCheck() { if (FLAGS_compression_ratio > 1) { fprintf(stderr, "compression_ratio should be between 0 and 1\n"); @@ -2318,6 +2378,8 @@ class Benchmark { } } } + + listener_.reset(new ErrorHandlerListener()); } ~Benchmark() { @@ -2447,6 +2509,7 @@ void VerifyDBFromDB(std::string& truth_db_name) { value_size_ = FLAGS_value_size; key_size_ = FLAGS_key_size; entries_per_batch_ = FLAGS_batch_size; + writes_before_delete_range_ = FLAGS_writes_before_delete_range; writes_per_range_tombstone_ = FLAGS_writes_per_range_tombstone; range_tombstone_width_ = FLAGS_range_tombstone_width; max_num_range_tombstones_ = FLAGS_max_num_range_tombstones; @@ -2801,6 +2864,7 @@ void VerifyDBFromDB(std::string& truth_db_name) { } SetPerfLevel(static_cast (shared->perf_level)); + perf_context.EnablePerLevelPerfContext(); thread->stats.Start(thread->tid); (arg->bm->*(arg->method))(thread); thread->stats.Stop(); @@ -2992,7 +3056,7 @@ void VerifyDBFromDB(std::string& truth_db_name) { int64_t bytes = 0; int decompress_size; while (ok && bytes < 1024 * 1048576) { - char *uncompressed = nullptr; + CacheAllocationPtr uncompressed; switch (FLAGS_compression_type_e) { case rocksdb::kSnappyCompression: { // get size and allocate here to make comparison fair @@ -3002,45 +3066,44 @@ void VerifyDBFromDB(std::string& truth_db_name) { ok = false; break; } - uncompressed = new char[ulength]; + uncompressed = AllocateBlock(ulength, nullptr); ok = Snappy_Uncompress(compressed.data(), compressed.size(), - uncompressed); + uncompressed.get()); break; } case rocksdb::kZlibCompression: uncompressed = Zlib_Uncompress(uncompression_ctx, compressed.data(), compressed.size(), &decompress_size, 2); - ok = uncompressed != nullptr; + ok = uncompressed.get() != nullptr; break; case rocksdb::kBZip2Compression: uncompressed = BZip2_Uncompress(compressed.data(), compressed.size(), &decompress_size, 2); - ok = uncompressed != nullptr; + ok = uncompressed.get() != nullptr; break; case rocksdb::kLZ4Compression: uncompressed = LZ4_Uncompress(uncompression_ctx, compressed.data(), compressed.size(), &decompress_size, 2); - ok = uncompressed != nullptr; + ok = uncompressed.get() != nullptr; break; case rocksdb::kLZ4HCCompression: uncompressed = LZ4_Uncompress(uncompression_ctx, compressed.data(), compressed.size(), &decompress_size, 2); - ok = uncompressed != nullptr; + ok = uncompressed.get() != nullptr; break; case rocksdb::kXpressCompression: - uncompressed = XPRESS_Uncompress(compressed.data(), compressed.size(), - &decompress_size); - ok = uncompressed != nullptr; + uncompressed.reset(XPRESS_Uncompress( + compressed.data(), compressed.size(), &decompress_size)); + ok = uncompressed.get() != nullptr; break; case rocksdb::kZSTD: uncompressed = ZSTD_Uncompress(uncompression_ctx, compressed.data(), compressed.size(), &decompress_size); - ok = uncompressed != nullptr; + ok = uncompressed.get() != nullptr; break; default: ok = false; } - delete[] uncompressed; bytes += input.size(); thread->stats.FinishedOps(nullptr, nullptr, 1, kUncompress); } @@ -3500,6 +3563,7 @@ void VerifyDBFromDB(std::string& truth_db_name) { FLAGS_rate_limiter_auto_tuned)); } + options.listeners.emplace_back(listener_); if (FLAGS_num_multi_db <= 1) { OpenDb(options, FLAGS_db, &db_); } else { @@ -3745,7 +3809,8 @@ void VerifyDBFromDB(std::string& truth_db_name) { Duration duration(test_duration, max_ops, ops_per_stage); for (size_t i = 0; i < num_key_gens; i++) { - key_gens[i].reset(new KeyGenerator(&(thread->rand), write_mode, num_, + key_gens[i].reset(new KeyGenerator(&(thread->rand), write_mode, + num_ + max_num_range_tombstones_, ops_per_stage)); } @@ -3826,9 +3891,13 @@ void VerifyDBFromDB(std::string& truth_db_name) { bytes += value_size_ + key_size_; ++num_written; if (writes_per_range_tombstone_ > 0 && - num_written / writes_per_range_tombstone_ <= + num_written > writes_before_delete_range_ && + (num_written - writes_before_delete_range_) / + writes_per_range_tombstone_ <= max_num_range_tombstones_ && - num_written % writes_per_range_tombstone_ == 0) { + (num_written - writes_before_delete_range_) % + writes_per_range_tombstone_ == + 0) { int64_t begin_num = key_gens[id]->Next(); if (FLAGS_expand_range_tombstones) { for (int64_t offset = 0; offset < range_tombstone_width_; @@ -3892,6 +3961,10 @@ void VerifyDBFromDB(std::string& truth_db_name) { NewGenericRateLimiter(write_rate)); } } + if (!s.ok()) { + s = listener_->WaitForRecovery(600000000) ? Status::OK() : s; + } + if (!s.ok()) { fprintf(stderr, "put error: %s\n", s.ToString().c_str()); exit(1); @@ -4175,7 +4248,7 @@ void VerifyDBFromDB(std::string& truth_db_name) { } if (levelMeta.level == 0) { for (auto& fileMeta : levelMeta.files) { - fprintf(stdout, "Level[%d]: %s(size: %" PRIu64 " bytes)\n", + fprintf(stdout, "Level[%d]: %s(size: %" ROCKSDB_PRIszt " bytes)\n", levelMeta.level, fileMeta.name.c_str(), fileMeta.size); } } else { @@ -4494,9 +4567,31 @@ void VerifyDBFromDB(std::string& truth_db_name) { std::unique_ptr key_guard; Slice key = AllocateKey(&key_guard); + std::unique_ptr upper_bound_key_guard; + Slice upper_bound = AllocateKey(&upper_bound_key_guard); + std::unique_ptr lower_bound_key_guard; + Slice lower_bound = AllocateKey(&lower_bound_key_guard); + Duration duration(FLAGS_duration, reads_); char value_buffer[256]; while (!duration.Done(1)) { + int64_t seek_pos = thread->rand.Next() % FLAGS_num; + GenerateKeyFromInt((uint64_t)seek_pos, FLAGS_num, &key); + if (FLAGS_max_scan_distance != 0) { + if (FLAGS_reverse_iterator) { + GenerateKeyFromInt( + (uint64_t)std::max((int64_t)0, + seek_pos - FLAGS_max_scan_distance), + FLAGS_num, &lower_bound); + options.iterate_lower_bound = &lower_bound; + } else { + GenerateKeyFromInt( + (uint64_t)std::min(FLAGS_num, seek_pos + FLAGS_max_scan_distance), + FLAGS_num, &upper_bound); + options.iterate_upper_bound = &upper_bound; + } + } + if (!FLAGS_use_tailing_iterator) { if (db_.db != nullptr) { delete single_iter; @@ -4517,7 +4612,6 @@ void VerifyDBFromDB(std::string& truth_db_name) { iter_to_use = multi_iters[thread->rand.Next() % multi_iters.size()]; } - GenerateKeyFromInt(thread->rand.Next() % FLAGS_num, FLAGS_num, &key); iter_to_use->Seek(key); read++; if (iter_to_use->Valid() && iter_to_use->key().compare(key) == 0) { @@ -5614,7 +5708,7 @@ void VerifyDBFromDB(std::string& truth_db_name) { void Replay(ThreadState* /*thread*/, DBWithColumnFamilies* db_with_cfh) { Status s; - unique_ptr trace_reader; + std::unique_ptr trace_reader; s = NewFileTraceReader(FLAGS_env, EnvOptions(), FLAGS_trace_file, &trace_reader); if (!s.ok()) { @@ -5742,6 +5836,15 @@ int db_bench_tool(int argc, char** argv) { rocksdb::Benchmark benchmark; benchmark.Run(); + +#ifndef ROCKSDB_LITE + if (FLAGS_print_malloc_stats) { + std::string stats_string; + rocksdb::DumpMallocStats(&stats_string); + fprintf(stdout, "Malloc stats:\n%s\n", stats_string.c_str()); + } +#endif // ROCKSDB_LITE + return 0; } } // namespace rocksdb diff --git a/3rdParty/rocksdb/v5.16.X/tools/db_bench_tool_test.cc b/3rdParty/rocksdb/v5.18.X/tools/db_bench_tool_test.cc similarity index 99% rename from 3rdParty/rocksdb/v5.16.X/tools/db_bench_tool_test.cc rename to 3rdParty/rocksdb/v5.18.X/tools/db_bench_tool_test.cc index 67426066eb..dfc461193c 100644 --- a/3rdParty/rocksdb/v5.16.X/tools/db_bench_tool_test.cc +++ b/3rdParty/rocksdb/v5.18.X/tools/db_bench_tool_test.cc @@ -279,7 +279,7 @@ const std::string options_file_content = R"OPTIONS_FILE( TEST_F(DBBenchTest, OptionsFileFromFile) { const std::string kOptionsFileName = test_path_ + "/OPTIONS_flash"; - unique_ptr writable; + std::unique_ptr writable; ASSERT_OK(Env::Default()->NewWritableFile(kOptionsFileName, &writable, EnvOptions())); ASSERT_OK(writable->Append(options_file_content)); diff --git a/3rdParty/rocksdb/v5.16.X/tools/db_crashtest.py b/3rdParty/rocksdb/v5.18.X/tools/db_crashtest.py similarity index 95% rename from 3rdParty/rocksdb/v5.16.X/tools/db_crashtest.py rename to 3rdParty/rocksdb/v5.18.X/tools/db_crashtest.py index d6ae40c28c..0bf43780df 100644 --- a/3rdParty/rocksdb/v5.16.X/tools/db_crashtest.py +++ b/3rdParty/rocksdb/v5.18.X/tools/db_crashtest.py @@ -15,6 +15,9 @@ import argparse # default_params < {blackbox,whitebox}_default_params < # simple_default_params < # {blackbox,whitebox}_simple_default_params < args +# for enable_atomic_flush: +# default_params < {blackbox,whitebox}_default_params < +# atomic_flush_params < args expected_values_file = tempfile.NamedTemporaryFile() @@ -32,6 +35,7 @@ default_params = { "destroy_db_initially": 0, "enable_pipelined_write": lambda: random.randint(0, 1), "expected_values_path": expected_values_file.name, + "flush_one_in": 1000000, "max_background_compactions": 20, "max_bytes_for_level_base": 10485760, "max_key": 100000000, @@ -55,6 +59,7 @@ default_params = { "write_buffer_size": 4 * 1024 * 1024, "writepercent": 35, "format_version": lambda: random.randint(2, 4), + "index_block_restart_interval": lambda: random.choice(range(1, 16)), } _TEST_DIR_ENV_VAR = 'TEST_TMPDIR' @@ -120,6 +125,15 @@ blackbox_simple_default_params = { whitebox_simple_default_params = {} +atomic_flush_params = { + "atomic_flush": 1, + "disable_wal": 1, + "reopen": 0, + # use small value for write_buffer_size so that RocksDB triggers flush + # more frequently + "write_buffer_size": 1024 * 1024, +} + def finalize_and_sanitize(src_params): dest_params = dict([(k, v() if callable(v) else v) @@ -150,6 +164,8 @@ def gen_cmd_params(args): params.update(blackbox_simple_default_params) if args.test_type == 'whitebox': params.update(whitebox_simple_default_params) + if args.enable_atomic_flush: + params.update(atomic_flush_params) for k, v in vars(args).items(): if v is not None: @@ -162,7 +178,7 @@ def gen_cmd(params, unknown_params): '--{0}={1}'.format(k, v) for k, v in finalize_and_sanitize(params).items() if k not in set(['test_type', 'simple', 'duration', 'interval', - 'random_kill_odd']) + 'random_kill_odd', 'enable_atomic_flush']) and v is not None] + unknown_params return cmd @@ -354,6 +370,7 @@ def main(): db_stress multiple times") parser.add_argument("test_type", choices=["blackbox", "whitebox"]) parser.add_argument("--simple", action="store_true") + parser.add_argument("--enable_atomic_flush", action='store_true') all_params = dict(default_params.items() + blackbox_default_params.items() diff --git a/3rdParty/rocksdb/v5.16.X/tools/db_repl_stress.cc b/3rdParty/rocksdb/v5.18.X/tools/db_repl_stress.cc similarity index 98% rename from 3rdParty/rocksdb/v5.16.X/tools/db_repl_stress.cc rename to 3rdParty/rocksdb/v5.18.X/tools/db_repl_stress.cc index 5901b97778..c640b5945b 100644 --- a/3rdParty/rocksdb/v5.16.X/tools/db_repl_stress.cc +++ b/3rdParty/rocksdb/v5.18.X/tools/db_repl_stress.cc @@ -67,7 +67,7 @@ struct ReplicationThread { static void ReplicationThreadBody(void* arg) { ReplicationThread* t = reinterpret_cast(arg); DB* db = t->db; - unique_ptr iter; + std::unique_ptr iter; SequenceNumber currentSeqNum = 1; while (!t->stop.load(std::memory_order_acquire)) { iter.reset(); diff --git a/3rdParty/rocksdb/v5.16.X/tools/db_sanity_test.cc b/3rdParty/rocksdb/v5.18.X/tools/db_sanity_test.cc similarity index 100% rename from 3rdParty/rocksdb/v5.16.X/tools/db_sanity_test.cc rename to 3rdParty/rocksdb/v5.18.X/tools/db_sanity_test.cc diff --git a/3rdParty/rocksdb/v5.16.X/tools/db_stress.cc b/3rdParty/rocksdb/v5.18.X/tools/db_stress.cc similarity index 85% rename from 3rdParty/rocksdb/v5.16.X/tools/db_stress.cc rename to 3rdParty/rocksdb/v5.18.X/tools/db_stress.cc index a7cfd51d66..20b2899e95 100644 --- a/3rdParty/rocksdb/v5.16.X/tools/db_stress.cc +++ b/3rdParty/rocksdb/v5.18.X/tools/db_stress.cc @@ -133,6 +133,8 @@ DEFINE_bool(test_batches_snapshots, false, "\t(b) No long validation at the end (more speed up)\n" "\t(c) Test snapshot and atomicity of batch writes"); +DEFINE_bool(atomic_flush, false, "If true, the test enables atomic flush\n"); + DEFINE_int32(threads, 32, "Number of concurrent threads to run."); DEFINE_int32(ttl, -1, @@ -232,6 +234,11 @@ DEFINE_int32( static_cast(rocksdb::BlockBasedTableOptions().format_version), "Format version of SST files."); +DEFINE_int32(index_block_restart_interval, + rocksdb::BlockBasedTableOptions().index_block_restart_interval, + "Number of keys between restart points " + "for delta encoding of keys in index block."); + DEFINE_int32(max_background_compactions, rocksdb::Options().max_background_compactions, "The maximum number of concurrent background compactions " @@ -412,6 +419,10 @@ DEFINE_int32(compact_range_one_in, 0, "If non-zero, then CompactRange() will be called once for every N " "operations on average. 0 indicates CompactRange() is disabled."); +DEFINE_int32(flush_one_in, 0, + "If non-zero, then Flush() will be called once for every N ops " + "on average. 0 indicates calls to Flush() are disabled."); + DEFINE_int32(compact_range_width, 10000, "The width of the ranges passed to CompactRange()."); @@ -419,6 +430,10 @@ DEFINE_int32(acquire_snapshot_one_in, 0, "If non-zero, then acquires a snapshot once every N operations on " "average."); +DEFINE_bool(compare_full_db_state_snapshot, false, + "If set we compare state of entire db (in one of the threads) with" + "each snapshot."); + DEFINE_uint64(snapshot_hold_ops, 0, "If non-zero, then releases snapshots N operations after they're " "acquired."); @@ -591,7 +606,7 @@ enum RepFactory StringToRepFactory(const char* ctype) { #ifdef _MSC_VER #pragma warning(push) // truncation of constant value on static_cast -#pragma warning(disable: 4309) +#pragma warning(disable : 4309) #endif bool GetNextPrefix(const rocksdb::Slice& src, std::string* v) { std::string ret = src.ToString(); @@ -651,6 +666,18 @@ static std::string Key(int64_t val) { return big_endian_key; } +static bool GetIntVal(std::string big_endian_key, uint64_t *key_p) { + unsigned int size_key = sizeof(*key_p); + assert(big_endian_key.size() == size_key); + std::string little_endian_key; + little_endian_key.resize(size_key); + for (size_t i = 0 ; i < size_key; ++i) { + little_endian_key[i] = big_endian_key[size_key - 1 - i]; + } + Slice little_endian_slice = Slice(little_endian_key); + return GetFixed64(&little_endian_slice, key_p); +} + static std::string StringToHex(const std::string& str) { std::string result = "0x"; result.append(Slice(str).ToString(true)); @@ -765,46 +792,36 @@ class Stats { } } - void AddBytesForWrites(int nwrites, size_t nbytes) { + void AddBytesForWrites(long nwrites, size_t nbytes) { writes_ += nwrites; bytes_ += nbytes; } - void AddGets(int ngets, int nfounds) { + void AddGets(long ngets, long nfounds) { founds_ += nfounds; gets_ += ngets; } - void AddPrefixes(int nprefixes, int count) { + void AddPrefixes(long nprefixes, long count) { prefixes_ += nprefixes; iterator_size_sums_ += count; } - void AddIterations(int n) { - iterations_ += n; - } + void AddIterations(long n) { iterations_ += n; } - void AddDeletes(int n) { - deletes_ += n; - } + void AddDeletes(long n) { deletes_ += n; } void AddSingleDeletes(size_t n) { single_deletes_ += n; } - void AddRangeDeletions(int n) { - range_deletions_ += n; - } + void AddRangeDeletions(long n) { range_deletions_ += n; } - void AddCoveredByRangeDeletions(int n) { - covered_by_range_deletions_ += n; - } + void AddCoveredByRangeDeletions(long n) { covered_by_range_deletions_ += n; } - void AddErrors(int n) { - errors_ += n; - } + void AddErrors(long n) { errors_ += n; } - void AddNumCompactFilesSucceed(int n) { num_compact_files_succeed_ += n; } + void AddNumCompactFilesSucceed(long n) { num_compact_files_succeed_ += n; } - void AddNumCompactFilesFailed(int n) { num_compact_files_failed_ += n; } + void AddNumCompactFilesFailed(long n) { num_compact_files_failed_ += n; } void Report(const char* name) { std::string extra; @@ -923,7 +940,7 @@ class SharedState { if (status.ok()) { status = FLAGS_env->GetFileSize(FLAGS_expected_values_path, &size); } - unique_ptr wfile; + std::unique_ptr wfile; if (status.ok() && size == 0) { const EnvOptions soptions; status = FLAGS_env->NewWritableFile(FLAGS_expected_values_path, &wfile, @@ -1207,6 +1224,8 @@ struct ThreadState { Status status; // The value of the Get std::string value; + // optional state of all keys in the db + std::vector *key_vec; }; std::queue > snapshot_queue; @@ -1219,9 +1238,13 @@ class DbStressListener : public EventListener { DbStressListener(const std::string& db_name, const std::vector& db_paths, const std::vector& column_families) - : db_name_(db_name), db_paths_(db_paths), - column_families_(column_families) {} - virtual ~DbStressListener() {} + : db_name_(db_name), + db_paths_(db_paths), + column_families_(column_families), + num_pending_file_creations_(0) {} + virtual ~DbStressListener() { + assert(num_pending_file_creations_ == 0); + } #ifndef ROCKSDB_LITE virtual void OnFlushCompleted(DB* /*db*/, const FlushJobInfo& info) override { assert(IsValidColumnFamilyName(info.cf_name)); @@ -1246,16 +1269,23 @@ class DbStressListener : public EventListener { std::chrono::microseconds(Random::GetTLSInstance()->Uniform(5000))); } + virtual void OnTableFileCreationStarted( + const TableFileCreationBriefInfo& /*info*/) override { + ++num_pending_file_creations_; + } virtual void OnTableFileCreated(const TableFileCreationInfo& info) override { assert(info.db_name == db_name_); assert(IsValidColumnFamilyName(info.cf_name)); - VerifyFilePath(info.file_path); + if (info.file_size) { + VerifyFilePath(info.file_path); + } assert(info.job_id > 0 || FLAGS_compact_files_one_in > 0); if (info.status.ok() && info.file_size > 0) { assert(info.table_properties.data_size > 0); assert(info.table_properties.raw_key_size > 0); assert(info.table_properties.num_entries > 0); } + --num_pending_file_creations_; } protected: @@ -1328,6 +1358,7 @@ class DbStressListener : public EventListener { std::string db_name_; std::vector db_paths_; std::vector column_families_; + std::atomic num_pending_file_creations_; }; } // namespace @@ -1703,6 +1734,24 @@ class StressTest { ")"); } } + if (snap_state.key_vec != nullptr) { + // When `prefix_extractor` is set, seeking to beginning and scanning + // across prefixes are only supported with `total_order_seek` set. + ropt.total_order_seek = true; + std::unique_ptr iterator(db->NewIterator(ropt)); + std::unique_ptr> tmp_bitvec(new std::vector(FLAGS_max_key)); + for (iterator->SeekToFirst(); iterator->Valid(); iterator->Next()) { + uint64_t key_val; + if (GetIntVal(iterator->key().ToString(), &key_val)) { + (*tmp_bitvec.get())[key_val] = true; + } + } + if (!std::equal(snap_state.key_vec->begin(), + snap_state.key_vec->end(), + tmp_bitvec.get()->begin())) { + return Status::Corruption("Found inconsistent keys at this snapshot"); + } + } return Status::OK(); } @@ -1786,6 +1835,7 @@ class StressTest { while (!thread->snapshot_queue.empty()) { db_->ReleaseSnapshot( thread->snapshot_queue.front().second.snapshot); + delete thread->snapshot_queue.front().second.key_vec; thread->snapshot_queue.pop(); } thread->shared->IncVotedReopen(); @@ -1837,27 +1887,6 @@ class StressTest { } } - if (FLAGS_backup_one_in > 0 && - thread->rand.Uniform(FLAGS_backup_one_in) == 0) { - std::string backup_dir = FLAGS_db + "/.backup" + ToString(thread->tid); - BackupableDBOptions backup_opts(backup_dir); - BackupEngine* backup_engine = nullptr; - Status s = BackupEngine::Open(FLAGS_env, backup_opts, &backup_engine); - if (s.ok()) { - s = backup_engine->CreateNewBackup(db_); - } - if (s.ok()) { - s = backup_engine->PurgeOldBackups(0 /* num_backups_to_keep */); - } - if (!s.ok()) { - printf("A BackupEngine operation failed with: %s\n", - s.ToString().c_str()); - } - if (backup_engine != nullptr) { - delete backup_engine; - } - } - if (FLAGS_compact_files_one_in > 0 && thread->rand.Uniform(FLAGS_compact_files_one_in) == 0) { auto* random_cf = @@ -1897,8 +1926,8 @@ class StressTest { db_->CompactFiles(CompactionOptions(), random_cf, input_files, static_cast(output_level)); if (!s.ok()) { - printf("Unable to perform CompactFiles(): %s\n", - s.ToString().c_str()); + fprintf(stdout, "Unable to perform CompactFiles(): %s\n", + s.ToString().c_str()); thread->stats.AddNumCompactFilesFailed(1); } else { thread->stats.AddNumCompactFilesSucceed(1); @@ -1943,6 +1972,21 @@ class StressTest { std::vector rand_column_families = GenerateColumnFamilies(FLAGS_column_families, rand_column_family); + + if (FLAGS_flush_one_in > 0 && + thread->rand.Uniform(FLAGS_flush_one_in) == 0) { + FlushOptions flush_opts; + std::vector cfhs; + std::for_each( + rand_column_families.begin(), rand_column_families.end(), + [this, &cfhs](int k) { cfhs.push_back(column_families_[k]); }); + Status status = db_->Flush(flush_opts, cfhs); + if (!status.ok()) { + fprintf(stdout, "Unable to perform Flush(): %s\n", + status.ToString().c_str()); + } + } + std::vector rand_keys = GenerateKeys(rand_key); if (FLAGS_ingest_external_file_one_in > 0 && @@ -1950,6 +1994,15 @@ class StressTest { TestIngestExternalFile(thread, rand_column_families, rand_keys, lock); } + if (FLAGS_backup_one_in > 0 && + thread->rand.Uniform(FLAGS_backup_one_in) == 0) { + Status s = TestBackupRestore(thread, rand_column_families, rand_keys); + if (!s.ok()) { + VerificationAbort(shared, "Backup/restore gave inconsistent state", + s); + } + } + if (FLAGS_acquire_snapshot_one_in > 0 && thread->rand.Uniform(FLAGS_acquire_snapshot_one_in) == 0) { auto snapshot = db_->GetSnapshot(); @@ -1960,9 +2013,26 @@ class StressTest { // will later read the same key before releasing the snapshot and verify // that the results are the same. auto status_at = db_->Get(ropt, column_family, key, &value_at); + std::vector *key_vec = nullptr; + + if (FLAGS_compare_full_db_state_snapshot && + (thread->tid == 0)) { + key_vec = new std::vector(FLAGS_max_key); + // When `prefix_extractor` is set, seeking to beginning and scanning + // across prefixes are only supported with `total_order_seek` set. + ropt.total_order_seek = true; + std::unique_ptr iterator(db_->NewIterator(ropt)); + for (iterator->SeekToFirst(); iterator->Valid(); iterator->Next()) { + uint64_t key_val; + if (GetIntVal(iterator->key().ToString(), &key_val)) { + (*key_vec)[key_val] = true; + } + } + } + ThreadState::SnapshotState snap_state = { snapshot, rand_column_family, column_family->GetName(), - keystr, status_at, value_at}; + keystr, status_at, value_at, key_vec}; thread->snapshot_queue.emplace( std::min(FLAGS_ops_per_thread - 1, i + FLAGS_snapshot_hold_ops), snap_state); @@ -1980,6 +2050,7 @@ class StressTest { VerificationAbort(shared, "Snapshot gave inconsistent state", s); } db_->ReleaseSnapshot(snap_state.snapshot); + delete snap_state.key_vec; thread->snapshot_queue.pop(); } @@ -1997,14 +2068,14 @@ class StressTest { } else if (prefixBound <= prob_op && prob_op < writeBound) { // OPERATION write TestPut(thread, write_opts, read_opts, rand_column_families, rand_keys, - value, lock); + value, lock); } else if (writeBound <= prob_op && prob_op < delBound) { // OPERATION delete TestDelete(thread, write_opts, rand_column_families, rand_keys, lock); } else if (delBound <= prob_op && prob_op < delRangeBound) { // OPERATION delete range TestDeleteRange(thread, write_opts, rand_column_families, rand_keys, - lock); + lock); } else { // OPERATION iterate TestIterate(thread, read_opts, rand_column_families, rand_keys); @@ -2022,8 +2093,7 @@ class StressTest { virtual bool ShouldAcquireMutexOnKey() const { return false; } virtual std::vector GenerateColumnFamilies( - const int /* num_column_families */, - int rand_column_family) const { + const int /* num_column_families */, int rand_column_family) const { return {rand_column_family}; } @@ -2121,6 +2191,106 @@ class StressTest { return s; } +#ifdef ROCKSDB_LITE + virtual Status TestBackupRestore( + ThreadState* /* thread */, + const std::vector& /* rand_column_families */, + const std::vector& /* rand_keys */) { + assert(false); + fprintf(stderr, + "RocksDB lite does not support " + "TestBackupRestore\n"); + std::terminate(); + } +#else // ROCKSDB_LITE + virtual Status TestBackupRestore(ThreadState* thread, + const std::vector& rand_column_families, + const std::vector& rand_keys) { + // Note the column families chosen by `rand_column_families` cannot be + // dropped while the locks for `rand_keys` are held. So we should not have + // to worry about accessing those column families throughout this function. + assert(rand_column_families.size() == rand_keys.size()); + std::string backup_dir = FLAGS_db + "/.backup" + ToString(thread->tid); + std::string restore_dir = FLAGS_db + "/.restore" + ToString(thread->tid); + BackupableDBOptions backup_opts(backup_dir); + BackupEngine* backup_engine = nullptr; + Status s = BackupEngine::Open(FLAGS_env, backup_opts, &backup_engine); + if (s.ok()) { + s = backup_engine->CreateNewBackup(db_); + } + if (s.ok()) { + delete backup_engine; + backup_engine = nullptr; + s = BackupEngine::Open(FLAGS_env, backup_opts, &backup_engine); + } + if (s.ok()) { + s = backup_engine->RestoreDBFromLatestBackup(restore_dir /* db_dir */, + restore_dir /* wal_dir */); + } + if (s.ok()) { + s = backup_engine->PurgeOldBackups(0 /* num_backups_to_keep */); + } + DB* restored_db = nullptr; + std::vector restored_cf_handles; + if (s.ok()) { + Options restore_options(options_); + restore_options.listeners.clear(); + std::vector cf_descriptors; + // TODO(ajkr): `column_family_names_` is not safe to access here when + // `clear_column_family_one_in != 0`. But we can't easily switch to + // `ListColumnFamilies` to get names because it won't necessarily give + // the same order as `column_family_names_`. + assert(FLAGS_clear_column_family_one_in == 0); + for (auto name : column_family_names_) { + cf_descriptors.emplace_back(name, ColumnFamilyOptions(restore_options)); + } + s = DB::Open(DBOptions(restore_options), restore_dir, cf_descriptors, + &restored_cf_handles, &restored_db); + } + // for simplicity, currently only verifies existence/non-existence of a few + // keys + for (size_t i = 0; s.ok() && i < rand_column_families.size(); ++i) { + std::string key_str = Key(rand_keys[i]); + Slice key = key_str; + std::string restored_value; + Status get_status = restored_db->Get( + ReadOptions(), restored_cf_handles[rand_column_families[i]], key, + &restored_value); + bool exists = + thread->shared->Exists(rand_column_families[i], rand_keys[i]); + if (get_status.ok()) { + if (!exists) { + s = Status::Corruption( + "key exists in restore but not in original db"); + } + } else if (get_status.IsNotFound()) { + if (exists) { + s = Status::Corruption( + "key exists in original db but not in restore"); + } + } else { + s = get_status; + } + } + if (backup_engine != nullptr) { + delete backup_engine; + backup_engine = nullptr; + } + if (restored_db != nullptr) { + for (auto* cf_handle : restored_cf_handles) { + restored_db->DestroyColumnFamilyHandle(cf_handle); + } + delete restored_db; + restored_db = nullptr; + } + if (!s.ok()) { + printf("A backup/restore operation failed with: %s\n", + s.ToString().c_str()); + } + return s; + } +#endif // ROCKSDB_LITE + void VerificationAbort(SharedState* shared, std::string msg, Status s) const { printf("Verification failed: %s. Status is %s\n", msg.c_str(), s.ToString().c_str()); @@ -2140,6 +2310,8 @@ class StressTest { fprintf(stdout, "Format version : %d\n", FLAGS_format_version); fprintf(stdout, "TransactionDB : %s\n", FLAGS_use_txn ? "true" : "false"); + fprintf(stdout, "Atomic flush : %s\n", + FLAGS_atomic_flush ? "true" : "false"); fprintf(stdout, "Column families : %d\n", FLAGS_column_families); if (!FLAGS_test_batches_snapshots) { fprintf(stdout, "Clear CFs one in : %d\n", @@ -2223,6 +2395,8 @@ class StressTest { block_based_options.block_size = FLAGS_block_size; block_based_options.format_version = static_cast(FLAGS_format_version); + block_based_options.index_block_restart_interval = + static_cast(FLAGS_index_block_restart_interval); block_based_options.filter_policy = filter_policy_; options_.table_factory.reset( NewBlockBasedTableFactory(block_based_options)); @@ -2283,6 +2457,7 @@ class StressTest { FLAGS_universal_max_merge_width; options_.compaction_options_universal.max_size_amplification_percent = FLAGS_universal_max_size_amplification_percent; + options_.atomic_flush = FLAGS_atomic_flush; } else { #ifdef ROCKSDB_LITE fprintf(stderr, "--options_file not supported in lite mode\n"); @@ -2514,7 +2689,7 @@ class NonBatchedOpsStressTest : public StressTest { } if (!thread->rand.OneIn(2)) { // Use iterator to verify this range - unique_ptr iter( + std::unique_ptr iter( db_->NewIterator(options, column_families_[cf])); iter->Seek(Key(start)); for (auto i = start; i < end; i++) { @@ -2653,16 +2828,15 @@ class NonBatchedOpsStressTest : public StressTest { } Iterator* iter = db_->NewIterator(ro_copy, cfh); - int64_t count = 0; + long count = 0; for (iter->Seek(prefix); iter->Valid() && iter->key().starts_with(prefix); iter->Next()) { ++count; } - assert(count <= - (static_cast(1) << ((8 - FLAGS_prefix_size) * 8))); + assert(count <= (static_cast(1) << ((8 - FLAGS_prefix_size) * 8))); Status s = iter->status(); if (iter->status().ok()) { - thread->stats.AddPrefixes(1, static_cast(count)); + thread->stats.AddPrefixes(1, count); } else { thread->stats.AddErrors(1); } @@ -3192,7 +3366,7 @@ class BatchedOpsStressTest : public StressTest { iters[i]->Seek(prefix_slices[i]); } - int count = 0; + long count = 0; while (iters[0]->Valid() && iters[0]->key().starts_with(prefix_slices[0])) { count++; std::string values[10]; @@ -3247,6 +3421,274 @@ class BatchedOpsStressTest : public StressTest { virtual void VerifyDb(ThreadState* /* thread */) const {} }; +class AtomicFlushStressTest : public StressTest { + public: + AtomicFlushStressTest() : batch_id_(0) {} + + virtual ~AtomicFlushStressTest() {} + + virtual Status TestPut(ThreadState* thread, WriteOptions& write_opts, + const ReadOptions& /* read_opts */, + const std::vector& rand_column_families, + const std::vector& rand_keys, + char (&value)[100], + std::unique_ptr& /* lock */) { + std::string key_str = Key(rand_keys[0]); + Slice key = key_str; + uint64_t value_base = batch_id_.fetch_add(1); + size_t sz = + GenerateValue(static_cast(value_base), value, sizeof(value)); + Slice v(value, sz); + WriteBatch batch; + for (auto cf : rand_column_families) { + ColumnFamilyHandle* cfh = column_families_[cf]; + if (FLAGS_use_merge) { + batch.Merge(cfh, key, v); + } else { /* !FLAGS_use_merge */ + batch.Put(cfh, key, v); + } + } + Status s = db_->Write(write_opts, &batch); + if (!s.ok()) { + fprintf(stderr, "multi put or merge error: %s\n", s.ToString().c_str()); + thread->stats.AddErrors(1); + } else { + auto num = static_cast(rand_column_families.size()); + thread->stats.AddBytesForWrites(num, (sz + 1) * num); + } + + return s; + } + + virtual Status TestDelete(ThreadState* thread, WriteOptions& write_opts, + const std::vector& rand_column_families, + const std::vector& rand_keys, + std::unique_ptr& /* lock */) { + std::string key_str = Key(rand_keys[0]); + Slice key = key_str; + WriteBatch batch; + for (auto cf : rand_column_families) { + ColumnFamilyHandle* cfh = column_families_[cf]; + batch.Delete(cfh, key); + } + Status s = db_->Write(write_opts, &batch); + if (!s.ok()) { + fprintf(stderr, "multidel error: %s\n", s.ToString().c_str()); + thread->stats.AddErrors(1); + } else { + thread->stats.AddDeletes(static_cast(rand_column_families.size())); + } + return s; + } + + virtual Status TestDeleteRange(ThreadState* thread, WriteOptions& write_opts, + const std::vector& rand_column_families, + const std::vector& rand_keys, + std::unique_ptr& /* lock */) { + int64_t rand_key = rand_keys[0]; + auto shared = thread->shared; + int64_t max_key = shared->GetMaxKey(); + if (rand_key > max_key - FLAGS_range_deletion_width) { + rand_key = + thread->rand.Next() % (max_key - FLAGS_range_deletion_width + 1); + } + std::string key_str = Key(rand_key); + Slice key = key_str; + std::string end_key_str = Key(rand_key + FLAGS_range_deletion_width); + Slice end_key = end_key_str; + WriteBatch batch; + for (auto cf : rand_column_families) { + ColumnFamilyHandle* cfh = column_families_[rand_column_families[cf]]; + batch.DeleteRange(cfh, key, end_key); + } + Status s = db_->Write(write_opts, &batch); + if (!s.ok()) { + fprintf(stderr, "multi del range error: %s\n", s.ToString().c_str()); + thread->stats.AddErrors(1); + } else { + thread->stats.AddRangeDeletions( + static_cast(rand_column_families.size())); + } + return s; + } + + virtual void TestIngestExternalFile( + ThreadState* /* thread */, + const std::vector& /* rand_column_families */, + const std::vector& /* rand_keys */, + std::unique_ptr& /* lock */) { + assert(false); + fprintf(stderr, + "AtomicFlushStressTest does not support TestIngestExternalFile " + "because it's not possible to verify the result\n"); + std::terminate(); + } + + virtual Status TestGet(ThreadState* thread, const ReadOptions& readoptions, + const std::vector& rand_column_families, + const std::vector& rand_keys) { + std::string key_str = Key(rand_keys[0]); + Slice key = key_str; + auto cfh = + column_families_[rand_column_families[thread->rand.Next() % + rand_column_families.size()]]; + std::string from_db; + Status s = db_->Get(readoptions, cfh, key, &from_db); + if (s.ok()) { + thread->stats.AddGets(1, 1); + } else if (s.IsNotFound()) { + thread->stats.AddGets(1, 0); + } else { + thread->stats.AddErrors(1); + } + return s; + } + + virtual Status TestPrefixScan(ThreadState* thread, + const ReadOptions& readoptions, + const std::vector& rand_column_families, + const std::vector& rand_keys) { + std::string key_str = Key(rand_keys[0]); + Slice key = key_str; + Slice prefix = Slice(key.data(), FLAGS_prefix_size); + + std::string upper_bound; + Slice ub_slice; + ReadOptions ro_copy = readoptions; + if (thread->rand.OneIn(2) && GetNextPrefix(prefix, &upper_bound)) { + ub_slice = Slice(upper_bound); + ro_copy.iterate_upper_bound = &ub_slice; + } + auto cfh = + column_families_[rand_column_families[thread->rand.Next() % + rand_column_families.size()]]; + Iterator* iter = db_->NewIterator(ro_copy, cfh); + long count = 0; + for (iter->Seek(prefix); iter->Valid() && iter->key().starts_with(prefix); + iter->Next()) { + ++count; + } + assert(count <= (static_cast(1) << ((8 - FLAGS_prefix_size) * 8))); + Status s = iter->status(); + if (s.ok()) { + thread->stats.AddPrefixes(1, count); + } else { + thread->stats.AddErrors(1); + } + delete iter; + return s; + } + + virtual void VerifyDb(ThreadState* thread) const { + ReadOptions options(FLAGS_verify_checksum, true); + // We must set total_order_seek to true because we are doing a SeekToFirst + // on a column family whose memtables may support (by default) prefix-based + // iterator. In this case, NewIterator with options.total_order_seek being + // false returns a prefix-based iterator. Calling SeekToFirst using this + // iterator causes the iterator to become invalid. That means we cannot + // iterate the memtable using this iterator any more, although the memtable + // contains the most up-to-date key-values. + options.total_order_seek = true; + assert(thread != nullptr); + auto shared = thread->shared; + std::vector > iters(column_families_.size()); + for (size_t i = 0; i != column_families_.size(); ++i) { + iters[i].reset(db_->NewIterator(options, column_families_[i])); + } + for (auto& iter : iters) { + iter->SeekToFirst(); + } + size_t num = column_families_.size(); + assert(num == iters.size()); + std::vector statuses(num, Status::OK()); + do { + size_t valid_cnt = 0; + size_t idx = 0; + for (auto& iter : iters) { + if (iter->Valid()) { + ++valid_cnt; + } else { + statuses[idx] = iter->status(); + } + ++idx; + } + if (valid_cnt == 0) { + Status status; + for (size_t i = 0; i != num; ++i) { + const auto& s = statuses[i]; + if (!s.ok()) { + status = s; + fprintf(stderr, "Iterator on cf %s has error: %s\n", + column_families_[i]->GetName().c_str(), + s.ToString().c_str()); + shared->SetVerificationFailure(); + } + } + if (status.ok()) { + fprintf(stdout, "Finished scanning all column families.\n"); + } + break; + } else if (valid_cnt != iters.size()) { + for (size_t i = 0; i != num; ++i) { + if (!iters[i]->Valid()) { + if (statuses[i].ok()) { + fprintf(stderr, "Finished scanning cf %s\n", + column_families_[i]->GetName().c_str()); + } else { + fprintf(stderr, "Iterator on cf %s has error: %s\n", + column_families_[i]->GetName().c_str(), + statuses[i].ToString().c_str()); + } + } else { + fprintf(stderr, "cf %s has remaining data to scan\n", + column_families_[i]->GetName().c_str()); + } + } + shared->SetVerificationFailure(); + break; + } + // If the program reaches here, then all column families' iterators are + // still valid. + Slice key; + Slice value; + for (size_t i = 0; i != num; ++i) { + if (i == 0) { + key = iters[i]->key(); + value = iters[i]->value(); + } else { + if (key.compare(iters[i]->key()) != 0) { + fprintf(stderr, "Verification failed\n"); + fprintf(stderr, "cf%s: %s => %s\n", + column_families_[0]->GetName().c_str(), + key.ToString(true /* hex */).c_str(), + value.ToString(/* hex */).c_str()); + fprintf(stderr, "cf%s: %s => %s\n", + column_families_[i]->GetName().c_str(), + iters[i]->key().ToString(true /* hex */).c_str(), + iters[i]->value().ToString(true /* hex */).c_str()); + shared->SetVerificationFailure(); + } + } + } + for (auto& iter : iters) { + iter->Next(); + } + } while (true); + } + + virtual std::vector GenerateColumnFamilies( + const int /* num_column_families */, int /* rand_column_family */) const { + std::vector ret; + int num = static_cast(column_families_.size()); + int k = 0; + std::generate_n(back_inserter(ret), num, [&k]() -> int { return k++; }); + return ret; + } + + private: + std::atomic batch_id_; +}; + } // namespace rocksdb int main(int argc, char** argv) { @@ -3335,6 +3777,11 @@ int main(int argc, char** argv) { "Error: nooverwritepercent must be 0 when using file ingestion\n"); exit(1); } + if (FLAGS_clear_column_family_one_in > 0 && FLAGS_backup_one_in > 0) { + fprintf(stderr, + "Error: clear_column_family_one_in must be 0 when using backup\n"); + exit(1); + } // Choose a location for the test database if none given with --db= if (FLAGS_db.empty()) { @@ -3348,7 +3795,9 @@ int main(int argc, char** argv) { rocksdb_kill_prefix_blacklist = SplitString(FLAGS_kill_prefix_blacklist); std::unique_ptr stress; - if (FLAGS_test_batches_snapshots) { + if (FLAGS_atomic_flush) { + stress.reset(new rocksdb::AtomicFlushStressTest()); + } else if (FLAGS_test_batches_snapshots) { stress.reset(new rocksdb::BatchedOpsStressTest()); } else { stress.reset(new rocksdb::NonBatchedOpsStressTest()); diff --git a/3rdParty/rocksdb/v5.16.X/tools/dbench_monitor b/3rdParty/rocksdb/v5.18.X/tools/dbench_monitor similarity index 100% rename from 3rdParty/rocksdb/v5.16.X/tools/dbench_monitor rename to 3rdParty/rocksdb/v5.18.X/tools/dbench_monitor diff --git a/3rdParty/rocksdb/v5.16.X/tools/dump/db_dump_tool.cc b/3rdParty/rocksdb/v5.18.X/tools/dump/db_dump_tool.cc similarity index 100% rename from 3rdParty/rocksdb/v5.16.X/tools/dump/db_dump_tool.cc rename to 3rdParty/rocksdb/v5.18.X/tools/dump/db_dump_tool.cc diff --git a/3rdParty/rocksdb/v5.16.X/tools/dump/rocksdb_dump.cc b/3rdParty/rocksdb/v5.18.X/tools/dump/rocksdb_dump.cc similarity index 100% rename from 3rdParty/rocksdb/v5.16.X/tools/dump/rocksdb_dump.cc rename to 3rdParty/rocksdb/v5.18.X/tools/dump/rocksdb_dump.cc diff --git a/3rdParty/rocksdb/v5.16.X/tools/dump/rocksdb_undump.cc b/3rdParty/rocksdb/v5.18.X/tools/dump/rocksdb_undump.cc similarity index 100% rename from 3rdParty/rocksdb/v5.16.X/tools/dump/rocksdb_undump.cc rename to 3rdParty/rocksdb/v5.18.X/tools/dump/rocksdb_undump.cc diff --git a/3rdParty/rocksdb/v5.16.X/tools/generate_random_db.sh b/3rdParty/rocksdb/v5.18.X/tools/generate_random_db.sh similarity index 100% rename from 3rdParty/rocksdb/v5.16.X/tools/generate_random_db.sh rename to 3rdParty/rocksdb/v5.18.X/tools/generate_random_db.sh diff --git a/3rdParty/rocksdb/v5.18.X/tools/ingest_external_sst.sh b/3rdParty/rocksdb/v5.18.X/tools/ingest_external_sst.sh new file mode 100755 index 0000000000..54ca3db3b3 --- /dev/null +++ b/3rdParty/rocksdb/v5.18.X/tools/ingest_external_sst.sh @@ -0,0 +1,17 @@ +#!/usr/bin/env bash +# +# + +if [ "$#" -lt 2 ]; then + echo "usage: $BASH_SOURCE " + exit 1 +fi + +db_dir=$1 +external_sst_dir=$2 + +for f in `find $external_sst_dir -name extern_sst*` +do + echo == Ingesting external SST file $f to DB at $db_dir + ./ldb --db=$db_dir --create_if_missing ingest_extern_sst $f +done diff --git a/3rdParty/rocksdb/v5.16.X/tools/ldb.cc b/3rdParty/rocksdb/v5.18.X/tools/ldb.cc similarity index 100% rename from 3rdParty/rocksdb/v5.16.X/tools/ldb.cc rename to 3rdParty/rocksdb/v5.18.X/tools/ldb.cc diff --git a/3rdParty/rocksdb/v5.16.X/tools/ldb_cmd.cc b/3rdParty/rocksdb/v5.18.X/tools/ldb_cmd.cc similarity index 99% rename from 3rdParty/rocksdb/v5.16.X/tools/ldb_cmd.cc rename to 3rdParty/rocksdb/v5.18.X/tools/ldb_cmd.cc index e0e24a73de..997718ef28 100644 --- a/3rdParty/rocksdb/v5.16.X/tools/ldb_cmd.cc +++ b/3rdParty/rocksdb/v5.18.X/tools/ldb_cmd.cc @@ -1964,11 +1964,11 @@ void DumpWalFile(std::string wal_file, bool print_header, bool print_values, bool is_write_committed, LDBCommandExecuteResult* exec_state) { Env* env_ = Env::Default(); EnvOptions soptions; - unique_ptr wal_file_reader; + std::unique_ptr wal_file_reader; Status status; { - unique_ptr file; + std::unique_ptr file; status = env_->NewSequentialFile(wal_file, &file, soptions); if (status.ok()) { wal_file_reader.reset( @@ -1999,7 +1999,8 @@ void DumpWalFile(std::string wal_file, bool print_header, bool print_values, } DBOptions db_options; log::Reader reader(db_options.info_log, std::move(wal_file_reader), - &reporter, true, 0, log_number); + &reporter, true /* checksum */, log_number, + false /* retry_after_eof */); std::string scratch; WriteBatch batch; Slice record; @@ -2844,8 +2845,8 @@ void DumpSstFile(std::string filename, bool output_hex, bool show_properties) { return; } // no verification - rocksdb::SstFileReader reader(filename, false, output_hex); - Status st = reader.ReadSequential(true, std::numeric_limits::max(), false, // has_from + rocksdb::SstFileDumper dumper(filename, false, output_hex); + Status st = dumper.ReadSequential(true, std::numeric_limits::max(), false, // has_from from_key, false, // has_to to_key); if (!st.ok()) { @@ -2859,21 +2860,17 @@ void DumpSstFile(std::string filename, bool output_hex, bool show_properties) { std::shared_ptr table_properties_from_reader; - st = reader.ReadTableProperties(&table_properties_from_reader); + st = dumper.ReadTableProperties(&table_properties_from_reader); if (!st.ok()) { std::cerr << filename << ": " << st.ToString() << ". Try to use initial table properties" << std::endl; - table_properties = reader.GetInitTableProperties(); + table_properties = dumper.GetInitTableProperties(); } else { table_properties = table_properties_from_reader.get(); } if (table_properties != nullptr) { std::cout << std::endl << "Table Properties:" << std::endl; std::cout << table_properties->ToString("\n") << std::endl; - std::cout << "# deleted keys: " - << rocksdb::GetDeletedKeys( - table_properties->user_collected_properties) - << std::endl; } } } diff --git a/3rdParty/rocksdb/v5.16.X/tools/ldb_cmd_impl.h b/3rdParty/rocksdb/v5.18.X/tools/ldb_cmd_impl.h similarity index 100% rename from 3rdParty/rocksdb/v5.16.X/tools/ldb_cmd_impl.h rename to 3rdParty/rocksdb/v5.18.X/tools/ldb_cmd_impl.h diff --git a/3rdParty/rocksdb/v5.16.X/tools/ldb_cmd_test.cc b/3rdParty/rocksdb/v5.18.X/tools/ldb_cmd_test.cc similarity index 100% rename from 3rdParty/rocksdb/v5.16.X/tools/ldb_cmd_test.cc rename to 3rdParty/rocksdb/v5.18.X/tools/ldb_cmd_test.cc diff --git a/3rdParty/rocksdb/v5.16.X/tools/ldb_test.py b/3rdParty/rocksdb/v5.18.X/tools/ldb_test.py similarity index 100% rename from 3rdParty/rocksdb/v5.16.X/tools/ldb_test.py rename to 3rdParty/rocksdb/v5.18.X/tools/ldb_test.py diff --git a/3rdParty/rocksdb/v5.16.X/tools/ldb_tool.cc b/3rdParty/rocksdb/v5.18.X/tools/ldb_tool.cc similarity index 100% rename from 3rdParty/rocksdb/v5.16.X/tools/ldb_tool.cc rename to 3rdParty/rocksdb/v5.18.X/tools/ldb_tool.cc diff --git a/3rdParty/rocksdb/v5.16.X/tools/pflag b/3rdParty/rocksdb/v5.18.X/tools/pflag similarity index 100% rename from 3rdParty/rocksdb/v5.16.X/tools/pflag rename to 3rdParty/rocksdb/v5.18.X/tools/pflag diff --git a/3rdParty/rocksdb/v5.16.X/tools/rdb/.gitignore b/3rdParty/rocksdb/v5.18.X/tools/rdb/.gitignore similarity index 100% rename from 3rdParty/rocksdb/v5.16.X/tools/rdb/.gitignore rename to 3rdParty/rocksdb/v5.18.X/tools/rdb/.gitignore diff --git a/3rdParty/rocksdb/v5.16.X/tools/rdb/API.md b/3rdParty/rocksdb/v5.18.X/tools/rdb/API.md similarity index 100% rename from 3rdParty/rocksdb/v5.16.X/tools/rdb/API.md rename to 3rdParty/rocksdb/v5.18.X/tools/rdb/API.md diff --git a/3rdParty/rocksdb/v5.16.X/tools/rdb/README.md b/3rdParty/rocksdb/v5.18.X/tools/rdb/README.md similarity index 100% rename from 3rdParty/rocksdb/v5.16.X/tools/rdb/README.md rename to 3rdParty/rocksdb/v5.18.X/tools/rdb/README.md diff --git a/3rdParty/rocksdb/v5.16.X/tools/rdb/binding.gyp b/3rdParty/rocksdb/v5.18.X/tools/rdb/binding.gyp similarity index 100% rename from 3rdParty/rocksdb/v5.16.X/tools/rdb/binding.gyp rename to 3rdParty/rocksdb/v5.18.X/tools/rdb/binding.gyp diff --git a/3rdParty/rocksdb/v5.16.X/tools/rdb/db_wrapper.cc b/3rdParty/rocksdb/v5.18.X/tools/rdb/db_wrapper.cc similarity index 100% rename from 3rdParty/rocksdb/v5.16.X/tools/rdb/db_wrapper.cc rename to 3rdParty/rocksdb/v5.18.X/tools/rdb/db_wrapper.cc diff --git a/3rdParty/rocksdb/v5.16.X/tools/rdb/db_wrapper.h b/3rdParty/rocksdb/v5.18.X/tools/rdb/db_wrapper.h similarity index 100% rename from 3rdParty/rocksdb/v5.16.X/tools/rdb/db_wrapper.h rename to 3rdParty/rocksdb/v5.18.X/tools/rdb/db_wrapper.h diff --git a/3rdParty/rocksdb/v5.16.X/tools/rdb/rdb b/3rdParty/rocksdb/v5.18.X/tools/rdb/rdb similarity index 100% rename from 3rdParty/rocksdb/v5.16.X/tools/rdb/rdb rename to 3rdParty/rocksdb/v5.18.X/tools/rdb/rdb diff --git a/3rdParty/rocksdb/v5.16.X/tools/rdb/rdb.cc b/3rdParty/rocksdb/v5.18.X/tools/rdb/rdb.cc similarity index 100% rename from 3rdParty/rocksdb/v5.16.X/tools/rdb/rdb.cc rename to 3rdParty/rocksdb/v5.18.X/tools/rdb/rdb.cc diff --git a/3rdParty/rocksdb/v5.16.X/tools/rdb/unit_test.js b/3rdParty/rocksdb/v5.18.X/tools/rdb/unit_test.js similarity index 100% rename from 3rdParty/rocksdb/v5.16.X/tools/rdb/unit_test.js rename to 3rdParty/rocksdb/v5.18.X/tools/rdb/unit_test.js diff --git a/3rdParty/rocksdb/v5.16.X/tools/reduce_levels_test.cc b/3rdParty/rocksdb/v5.18.X/tools/reduce_levels_test.cc similarity index 100% rename from 3rdParty/rocksdb/v5.16.X/tools/reduce_levels_test.cc rename to 3rdParty/rocksdb/v5.18.X/tools/reduce_levels_test.cc diff --git a/3rdParty/rocksdb/v5.16.X/tools/regression_test.sh b/3rdParty/rocksdb/v5.18.X/tools/regression_test.sh similarity index 100% rename from 3rdParty/rocksdb/v5.16.X/tools/regression_test.sh rename to 3rdParty/rocksdb/v5.18.X/tools/regression_test.sh diff --git a/3rdParty/rocksdb/v5.16.X/tools/report_lite_binary_size.sh b/3rdParty/rocksdb/v5.18.X/tools/report_lite_binary_size.sh similarity index 100% rename from 3rdParty/rocksdb/v5.16.X/tools/report_lite_binary_size.sh rename to 3rdParty/rocksdb/v5.18.X/tools/report_lite_binary_size.sh diff --git a/3rdParty/rocksdb/v5.16.X/tools/rocksdb_dump_test.sh b/3rdParty/rocksdb/v5.18.X/tools/rocksdb_dump_test.sh similarity index 100% rename from 3rdParty/rocksdb/v5.16.X/tools/rocksdb_dump_test.sh rename to 3rdParty/rocksdb/v5.18.X/tools/rocksdb_dump_test.sh diff --git a/3rdParty/rocksdb/v5.16.X/tools/run_flash_bench.sh b/3rdParty/rocksdb/v5.18.X/tools/run_flash_bench.sh similarity index 100% rename from 3rdParty/rocksdb/v5.16.X/tools/run_flash_bench.sh rename to 3rdParty/rocksdb/v5.18.X/tools/run_flash_bench.sh diff --git a/3rdParty/rocksdb/v5.16.X/tools/run_leveldb.sh b/3rdParty/rocksdb/v5.18.X/tools/run_leveldb.sh similarity index 100% rename from 3rdParty/rocksdb/v5.16.X/tools/run_leveldb.sh rename to 3rdParty/rocksdb/v5.18.X/tools/run_leveldb.sh diff --git a/3rdParty/rocksdb/v5.16.X/tools/sample-dump.dmp b/3rdParty/rocksdb/v5.18.X/tools/sample-dump.dmp similarity index 100% rename from 3rdParty/rocksdb/v5.16.X/tools/sample-dump.dmp rename to 3rdParty/rocksdb/v5.18.X/tools/sample-dump.dmp diff --git a/3rdParty/rocksdb/v5.16.X/tools/sst_dump.cc b/3rdParty/rocksdb/v5.18.X/tools/sst_dump.cc similarity index 100% rename from 3rdParty/rocksdb/v5.16.X/tools/sst_dump.cc rename to 3rdParty/rocksdb/v5.18.X/tools/sst_dump.cc diff --git a/3rdParty/rocksdb/v5.16.X/tools/sst_dump_test.cc b/3rdParty/rocksdb/v5.18.X/tools/sst_dump_test.cc similarity index 95% rename from 3rdParty/rocksdb/v5.16.X/tools/sst_dump_test.cc rename to 3rdParty/rocksdb/v5.18.X/tools/sst_dump_test.cc index 17b59fd773..9032123cc6 100644 --- a/3rdParty/rocksdb/v5.16.X/tools/sst_dump_test.cc +++ b/3rdParty/rocksdb/v5.18.X/tools/sst_dump_test.cc @@ -43,7 +43,7 @@ void createSST(const std::string& file_name, std::shared_ptr tf; tf.reset(new rocksdb::BlockBasedTableFactory(table_options)); - unique_ptr file; + std::unique_ptr file; Env* env = Env::Default(); EnvOptions env_options; ReadOptions read_options; @@ -51,7 +51,7 @@ void createSST(const std::string& file_name, const ImmutableCFOptions imoptions(opts); const MutableCFOptions moptions(opts); rocksdb::InternalKeyComparator ikc(opts.comparator); - unique_ptr tb; + std::unique_ptr tb; ASSERT_OK(env->NewWritableFile(file_name, &file, env_options)); @@ -59,7 +59,7 @@ void createSST(const std::string& file_name, std::vector > int_tbl_prop_collector_factories; std::unique_ptr file_writer( - new WritableFileWriter(std::move(file), EnvOptions())); + new WritableFileWriter(std::move(file), file_name, EnvOptions())); std::string column_family_name; int unknown_level = -1; tb.reset(opts.table_factory->NewTableBuilder( @@ -92,12 +92,11 @@ void cleanup(const std::string& file_name) { // Test for sst dump tool "raw" mode class SSTDumpToolTest : public testing::Test { std::string testDir_; -public: + + public: BlockBasedTableOptions table_options_; - SSTDumpToolTest() { - testDir_ = test::TmpDir(); - } + SSTDumpToolTest() { testDir_ = test::TmpDir(); } ~SSTDumpToolTest() {} @@ -107,9 +106,9 @@ public: return path; } - template + template void PopulateCommandArgs(const std::string& file_path, const char* command, - char* (&usage)[N]) const { + char* (&usage)[N]) const { for (int i = 0; i < static_cast(N); ++i) { usage[i] = new char[optLength]; } diff --git a/3rdParty/rocksdb/v5.16.X/tools/sst_dump_tool.cc b/3rdParty/rocksdb/v5.18.X/tools/sst_dump_tool.cc similarity index 91% rename from 3rdParty/rocksdb/v5.16.X/tools/sst_dump_tool.cc rename to 3rdParty/rocksdb/v5.18.X/tools/sst_dump_tool.cc index f7a097a3a8..25699777e8 100644 --- a/3rdParty/rocksdb/v5.16.X/tools/sst_dump_tool.cc +++ b/3rdParty/rocksdb/v5.18.X/tools/sst_dump_tool.cc @@ -43,7 +43,7 @@ namespace rocksdb { -SstFileReader::SstFileReader(const std::string& file_path, bool verify_checksum, +SstFileDumper::SstFileDumper(const std::string& file_path, bool verify_checksum, bool output_hex) : file_name_(file_path), read_num_(0), @@ -74,7 +74,7 @@ static const std::vector> {CompressionType::kXpressCompression, "kXpressCompression"}, {CompressionType::kZSTD, "kZSTD"}}; -Status SstFileReader::GetTableReader(const std::string& file_path) { +Status SstFileDumper::GetTableReader(const std::string& file_path) { // Warning about 'magic_number' being uninitialized shows up only in UBsan // builds. Though access is guarded by 's.ok()' checks, fix the issue to // avoid any warnings. @@ -83,7 +83,7 @@ Status SstFileReader::GetTableReader(const std::string& file_path) { // read table magic number Footer footer; - unique_ptr file; + std::unique_ptr file; uint64_t file_size = 0; Status s = options_.env->NewRandomAccessFile(file_path, &file, soptions_); if (s.ok()) { @@ -123,10 +123,10 @@ Status SstFileReader::GetTableReader(const std::string& file_path) { return s; } -Status SstFileReader::NewTableReader( +Status SstFileDumper::NewTableReader( const ImmutableCFOptions& /*ioptions*/, const EnvOptions& /*soptions*/, const InternalKeyComparator& /*internal_comparator*/, uint64_t file_size, - unique_ptr* /*table_reader*/) { + std::unique_ptr* /*table_reader*/) { // We need to turn off pre-fetching of index and filter nodes for // BlockBasedTable if (BlockBasedTableFactory::kName == options_.table_factory->Name()) { @@ -143,12 +143,12 @@ Status SstFileReader::NewTableReader( std::move(file_), file_size, &table_reader_); } -Status SstFileReader::VerifyChecksum() { +Status SstFileDumper::VerifyChecksum() { return table_reader_->VerifyChecksum(); } -Status SstFileReader::DumpTable(const std::string& out_filename) { - unique_ptr out_file; +Status SstFileDumper::DumpTable(const std::string& out_filename) { + std::unique_ptr out_file; Env* env = Env::Default(); env->NewWritableFile(out_filename, &out_file, soptions_); Status s = table_reader_->DumpTable(out_file.get(), @@ -157,22 +157,23 @@ Status SstFileReader::DumpTable(const std::string& out_filename) { return s; } -uint64_t SstFileReader::CalculateCompressedTableSize( +uint64_t SstFileDumper::CalculateCompressedTableSize( const TableBuilderOptions& tb_options, size_t block_size) { - unique_ptr out_file; - unique_ptr env(NewMemEnv(Env::Default())); + std::unique_ptr out_file; + std::unique_ptr env(NewMemEnv(Env::Default())); env->NewWritableFile(testFileName, &out_file, soptions_); - unique_ptr dest_writer; - dest_writer.reset(new WritableFileWriter(std::move(out_file), soptions_)); + std::unique_ptr dest_writer; + dest_writer.reset( + new WritableFileWriter(std::move(out_file), testFileName, soptions_)); BlockBasedTableOptions table_options; table_options.block_size = block_size; BlockBasedTableFactory block_based_tf(table_options); - unique_ptr table_builder; + std::unique_ptr table_builder; table_builder.reset(block_based_tf.NewTableBuilder( tb_options, TablePropertiesCollectorFactory::Context::kUnknownColumnFamily, dest_writer.get())); - unique_ptr iter(table_reader_->NewIterator( + std::unique_ptr iter(table_reader_->NewIterator( ReadOptions(), moptions_.prefix_extractor.get())); for (iter->SeekToFirst(); iter->Valid(); iter->Next()) { if (!iter->status().ok()) { @@ -191,7 +192,7 @@ uint64_t SstFileReader::CalculateCompressedTableSize( return size; } -int SstFileReader::ShowAllCompressionSizes( +int SstFileDumper::ShowAllCompressionSizes( size_t block_size, const std::vector>& compression_types) { @@ -225,7 +226,7 @@ int SstFileReader::ShowAllCompressionSizes( return 0; } -Status SstFileReader::ReadTableProperties(uint64_t table_magic_number, +Status SstFileDumper::ReadTableProperties(uint64_t table_magic_number, RandomAccessFileReader* file, uint64_t file_size) { TableProperties* table_properties = nullptr; @@ -239,7 +240,7 @@ Status SstFileReader::ReadTableProperties(uint64_t table_magic_number, return s; } -Status SstFileReader::SetTableOptionsByMagicNumber( +Status SstFileDumper::SetTableOptionsByMagicNumber( uint64_t table_magic_number) { assert(table_properties_); if (table_magic_number == kBlockBasedTableMagicNumber || @@ -282,7 +283,7 @@ Status SstFileReader::SetTableOptionsByMagicNumber( return Status::OK(); } -Status SstFileReader::SetOldTableOptions() { +Status SstFileDumper::SetOldTableOptions() { assert(table_properties_ == nullptr); options_.table_factory = std::make_shared(); fprintf(stdout, "Sst file format: block-based(old version)\n"); @@ -290,7 +291,7 @@ Status SstFileReader::SetOldTableOptions() { return Status::OK(); } -Status SstFileReader::ReadSequential(bool print_kv, uint64_t read_num, +Status SstFileDumper::ReadSequential(bool print_kv, uint64_t read_num, bool has_from, const std::string& from_key, bool has_to, const std::string& to_key, bool use_from_as_prefix) { @@ -347,7 +348,7 @@ Status SstFileReader::ReadSequential(bool print_kv, uint64_t read_num, return ret; } -Status SstFileReader::ReadTableProperties( +Status SstFileDumper::ReadTableProperties( std::shared_ptr* table_properties) { if (!table_reader_) { return init_result_; @@ -569,16 +570,16 @@ int SSTDumpTool::Run(int argc, char** argv) { filename = std::string(dir_or_file) + "/" + filename; } - rocksdb::SstFileReader reader(filename, verify_checksum, + rocksdb::SstFileDumper dumper(filename, verify_checksum, output_hex); - if (!reader.getStatus().ok()) { + if (!dumper.getStatus().ok()) { fprintf(stderr, "%s: %s\n", filename.c_str(), - reader.getStatus().ToString().c_str()); + dumper.getStatus().ToString().c_str()); continue; } if (command == "recompress") { - reader.ShowAllCompressionSizes( + dumper.ShowAllCompressionSizes( set_block_size ? block_size : 16384, compression_types.empty() ? kCompressions : compression_types); return 0; @@ -588,7 +589,7 @@ int SSTDumpTool::Run(int argc, char** argv) { std::string out_filename = filename.substr(0, filename.length() - 4); out_filename.append("_dump.txt"); - st = reader.DumpTable(out_filename); + st = dumper.DumpTable(out_filename); if (!st.ok()) { fprintf(stderr, "%s: %s\n", filename.c_str(), st.ToString().c_str()); exit(1); @@ -600,7 +601,7 @@ int SSTDumpTool::Run(int argc, char** argv) { // scan all files in give file path. if (command == "" || command == "scan" || command == "check") { - st = reader.ReadSequential( + st = dumper.ReadSequential( command == "scan", read_num > 0 ? (read_num - total_read) : read_num, has_from || use_from_as_prefix, from_key, has_to, to_key, use_from_as_prefix); @@ -608,14 +609,14 @@ int SSTDumpTool::Run(int argc, char** argv) { fprintf(stderr, "%s: %s\n", filename.c_str(), st.ToString().c_str()); } - total_read += reader.GetReadNumber(); + total_read += dumper.GetReadNumber(); if (read_num > 0 && total_read > read_num) { break; } } if (command == "verify") { - st = reader.VerifyChecksum(); + st = dumper.VerifyChecksum(); if (!st.ok()) { fprintf(stderr, "%s is corrupted: %s\n", filename.c_str(), st.ToString().c_str()); @@ -630,11 +631,11 @@ int SSTDumpTool::Run(int argc, char** argv) { std::shared_ptr table_properties_from_reader; - st = reader.ReadTableProperties(&table_properties_from_reader); + st = dumper.ReadTableProperties(&table_properties_from_reader); if (!st.ok()) { fprintf(stderr, "%s: %s\n", filename.c_str(), st.ToString().c_str()); fprintf(stderr, "Try to use initial table properties\n"); - table_properties = reader.GetInitTableProperties(); + table_properties = dumper.GetInitTableProperties(); } else { table_properties = table_properties_from_reader.get(); } @@ -645,19 +646,6 @@ int SSTDumpTool::Run(int argc, char** argv) { "------------------------------\n" " %s", table_properties->ToString("\n ", ": ").c_str()); - fprintf(stdout, "# deleted keys: %" PRIu64 "\n", - rocksdb::GetDeletedKeys( - table_properties->user_collected_properties)); - - bool property_present; - uint64_t merge_operands = rocksdb::GetMergeOperands( - table_properties->user_collected_properties, &property_present); - if (property_present) { - fprintf(stdout, " # merge operands: %" PRIu64 "\n", - merge_operands); - } else { - fprintf(stdout, " # merge operands: UNKNOWN\n"); - } } total_num_files += 1; total_num_data_blocks += table_properties->num_data_blocks; diff --git a/3rdParty/rocksdb/v5.16.X/tools/sst_dump_tool_imp.h b/3rdParty/rocksdb/v5.18.X/tools/sst_dump_tool_imp.h similarity index 89% rename from 3rdParty/rocksdb/v5.16.X/tools/sst_dump_tool_imp.h rename to 3rdParty/rocksdb/v5.18.X/tools/sst_dump_tool_imp.h index ca60dd93c9..9e83d8d040 100644 --- a/3rdParty/rocksdb/v5.16.X/tools/sst_dump_tool_imp.h +++ b/3rdParty/rocksdb/v5.18.X/tools/sst_dump_tool_imp.h @@ -15,9 +15,9 @@ namespace rocksdb { -class SstFileReader { +class SstFileDumper { public: - explicit SstFileReader(const std::string& file_name, bool verify_checksum, + explicit SstFileDumper(const std::string& file_name, bool verify_checksum, bool output_hex); Status ReadSequential(bool print_kv, uint64_t read_num, bool has_from, @@ -57,7 +57,7 @@ class SstFileReader { const EnvOptions& soptions, const InternalKeyComparator& internal_comparator, uint64_t file_size, - unique_ptr* table_reader); + std::unique_ptr* table_reader); std::string file_name_; uint64_t read_num_; @@ -70,13 +70,13 @@ class SstFileReader { Options options_; Status init_result_; - unique_ptr table_reader_; - unique_ptr file_; + std::unique_ptr table_reader_; + std::unique_ptr file_; const ImmutableCFOptions ioptions_; const MutableCFOptions moptions_; InternalKeyComparator internal_comparator_; - unique_ptr table_properties_; + std::unique_ptr table_properties_; }; } // namespace rocksdb diff --git a/3rdParty/rocksdb/v5.16.X/tools/trace_analyzer.cc b/3rdParty/rocksdb/v5.18.X/tools/trace_analyzer.cc similarity index 100% rename from 3rdParty/rocksdb/v5.16.X/tools/trace_analyzer.cc rename to 3rdParty/rocksdb/v5.18.X/tools/trace_analyzer.cc diff --git a/3rdParty/rocksdb/v5.16.X/tools/trace_analyzer_test.cc b/3rdParty/rocksdb/v5.18.X/tools/trace_analyzer_test.cc similarity index 75% rename from 3rdParty/rocksdb/v5.16.X/tools/trace_analyzer_test.cc rename to 3rdParty/rocksdb/v5.18.X/tools/trace_analyzer_test.cc index 49cd31ba3b..768f789cc8 100644 --- a/3rdParty/rocksdb/v5.16.X/tools/trace_analyzer_test.cc +++ b/3rdParty/rocksdb/v5.18.X/tools/trace_analyzer_test.cc @@ -37,7 +37,7 @@ namespace rocksdb { namespace { static const int kMaxArgCount = 100; static const size_t kArgBufferSize = 100000; -} +} // namespace // The helper functions for the test class TraceAnalyzerTest : public testing::Test { @@ -62,6 +62,7 @@ class TraceAnalyzerTest : public testing::Test { DB* db_ = nullptr; std::string value; std::unique_ptr trace_writer; + Iterator* single_iter = nullptr; ASSERT_OK( NewFileTraceWriter(env_, env_options_, trace_path, &trace_writer)); @@ -77,7 +78,12 @@ class TraceAnalyzerTest : public testing::Test { ASSERT_OK(db_->Write(wo, &batch)); ASSERT_OK(db_->Get(ro, "a", &value)); + single_iter = db_->NewIterator(ro); + single_iter->Seek("a"); + single_iter->SeekForPrev("b"); + delete single_iter; std::this_thread::sleep_for (std::chrono::seconds(1)); + db_->Get(ro, "g", &value); ASSERT_OK(db_->EndTrace()); @@ -141,6 +147,30 @@ class TraceAnalyzerTest : public testing::Test { return; } + void AnalyzeTrace(std::vector& paras_diff, + std::string output_path, std::string trace_path) { + std::vector paras = {"./trace_analyzer", + "-convert_to_human_readable_trace", + "-output_key_stats", + "-output_access_count_stats", + "-output_prefix=test", + "-output_prefix_cut=1", + "-output_time_series", + "-output_value_distribution", + "-output_qps_stats", + "-no_key", + "-no_print"}; + for (auto& para : paras_diff) { + paras.push_back(para); + } + Status s = env_->FileExists(trace_path); + if (!s.ok()) { + GenerateTrace(trace_path); + } + env_->CreateDir(output_path); + RunTraceAnalyzer(paras); + } + rocksdb::Env* env_; EnvOptions env_options_; std::string test_path_; @@ -152,28 +182,11 @@ TEST_F(TraceAnalyzerTest, Get) { std::string trace_path = test_path_ + "/trace"; std::string output_path = test_path_ + "/get"; std::string file_path; - std::vector paras = {"./trace_analyzer", - "-analyze_get", - "-convert_to_human_readable_trace", - "-output_key_stats", - "-output_access_count_stats", - "-output_prefix=test", - "-output_prefix_cut=1", - "-output_time_series", - "-output_value_distribution", - "-output_qps_stats", - "-no_key", - "-no_print"}; - Status s = env_->FileExists(trace_path); - if (!s.ok()) { - GenerateTrace(trace_path); - } + std::vector paras = {"-analyze_get"}; paras.push_back("-output_dir=" + output_path); paras.push_back("-trace_path=" + trace_path); paras.push_back("-key_space_dir=" + test_path_); - - env_->CreateDir(output_path); - RunTraceAnalyzer(paras); + AnalyzeTrace(paras, output_path, trace_path); // check the key_stats file std::vector k_stats = {"0 10 0 1 1.000000", "0 10 1 1 1.000000"}; @@ -186,7 +199,8 @@ TEST_F(TraceAnalyzerTest, Get) { CheckFileContent(k_dist, file_path, true); // Check the trace sequence - std::vector k_sequence = {"1", "5", "2", "3", "4", "0", "0"}; + std::vector k_sequence = {"1", "5", "2", "3", "4", + "0", "6", "7", "0"}; file_path = output_path + "/test-human_readable_trace.txt"; CheckFileContent(k_sequence, file_path, false); @@ -234,29 +248,11 @@ TEST_F(TraceAnalyzerTest, Put) { std::string trace_path = test_path_ + "/trace"; std::string output_path = test_path_ + "/put"; std::string file_path; - std::vector paras = {"./trace_analyzer", - "-analyze_get", - "-analyze_put", - "-convert_to_human_readable_trace", - "-output_key_stats", - "-output_access_count_stats", - "-output_prefix=test", - "-output_prefix_cut=1", - "-output_time_series", - "-output_value_distribution", - "-output_qps_stats", - "-no_key", - "-no_print"}; - Status s = env_->FileExists(trace_path); - if (!s.ok()) { - GenerateTrace(trace_path); - } + std::vector paras = {"-analyze_put"}; paras.push_back("-output_dir=" + output_path); paras.push_back("-trace_path=" + trace_path); paras.push_back("-key_space_dir=" + test_path_); - - env_->CreateDir(output_path); - RunTraceAnalyzer(paras); + AnalyzeTrace(paras, output_path, trace_path); // check the key_stats file std::vector k_stats = {"0 9 0 1 1.000000"}; @@ -269,7 +265,8 @@ TEST_F(TraceAnalyzerTest, Put) { CheckFileContent(k_dist, file_path, true); // Check the trace sequence - std::vector k_sequence = {"1", "5", "2", "3", "4", "0", "0"}; + std::vector k_sequence = {"1", "5", "2", "3", "4", + "0", "6", "7", "0"}; file_path = output_path + "/test-human_readable_trace.txt"; CheckFileContent(k_sequence, file_path, false); @@ -299,7 +296,7 @@ TEST_F(TraceAnalyzerTest, Put) { file_path = output_path + "/test-qps_stats.txt"; CheckFileContent(all_qps, file_path, true); - // Check the qps of get + // Check the qps of Put std::vector get_qps = {"1"}; file_path = output_path + "/test-put-0-qps_stats.txt"; CheckFileContent(get_qps, file_path, true); @@ -322,30 +319,11 @@ TEST_F(TraceAnalyzerTest, Delete) { std::string trace_path = test_path_ + "/trace"; std::string output_path = test_path_ + "/delete"; std::string file_path; - std::vector paras = {"./trace_analyzer", - "-analyze_get", - "-analyze_put", - "-analyze_delete", - "-convert_to_human_readable_trace", - "-output_key_stats", - "-output_access_count_stats", - "-output_prefix=test", - "-output_prefix_cut=1", - "-output_time_series", - "-output_value_distribution", - "-output_qps_stats", - "-no_key", - "-no_print"}; - Status s = env_->FileExists(trace_path); - if (!s.ok()) { - GenerateTrace(trace_path); - } + std::vector paras = {"-analyze_delete"}; paras.push_back("-output_dir=" + output_path); paras.push_back("-trace_path=" + trace_path); paras.push_back("-key_space_dir=" + test_path_); - - env_->CreateDir(output_path); - RunTraceAnalyzer(paras); + AnalyzeTrace(paras, output_path, trace_path); // check the key_stats file std::vector k_stats = {"0 0 0 1 1.000000"}; @@ -359,7 +337,8 @@ TEST_F(TraceAnalyzerTest, Delete) { CheckFileContent(k_dist, file_path, true); // Check the trace sequence - std::vector k_sequence = {"1", "5", "2", "3", "4", "0", "0"}; + std::vector k_sequence = {"1", "5", "2", "3", "4", + "0", "6", "7", "0"}; file_path = output_path + "/test-human_readable_trace.txt"; CheckFileContent(k_sequence, file_path, false); @@ -389,7 +368,7 @@ TEST_F(TraceAnalyzerTest, Delete) { file_path = output_path + "/test-qps_stats.txt"; CheckFileContent(all_qps, file_path, true); - // Check the qps of get + // Check the qps of Delete std::vector get_qps = {"1"}; file_path = output_path + "/test-delete-0-qps_stats.txt"; CheckFileContent(get_qps, file_path, true); @@ -406,31 +385,11 @@ TEST_F(TraceAnalyzerTest, Merge) { std::string trace_path = test_path_ + "/trace"; std::string output_path = test_path_ + "/merge"; std::string file_path; - std::vector paras = {"./trace_analyzer", - "-analyze_get", - "-analyze_put", - "-analyze_delete", - "-analyze_merge", - "-convert_to_human_readable_trace", - "-output_key_stats", - "-output_access_count_stats", - "-output_prefix=test", - "-output_prefix_cut=1", - "-output_time_series", - "-output_value_distribution", - "-output_qps_stats", - "-no_key", - "-no_print"}; - Status s = env_->FileExists(trace_path); - if (!s.ok()) { - GenerateTrace(trace_path); - } + std::vector paras = {"-analyze_merge"}; paras.push_back("-output_dir=" + output_path); paras.push_back("-trace_path=" + trace_path); paras.push_back("-key_space_dir=" + test_path_); - - env_->CreateDir(output_path); - RunTraceAnalyzer(paras); + AnalyzeTrace(paras, output_path, trace_path); // check the key_stats file std::vector k_stats = {"0 20 0 1 1.000000"}; @@ -443,7 +402,8 @@ TEST_F(TraceAnalyzerTest, Merge) { CheckFileContent(k_dist, file_path, true); // Check the trace sequence - std::vector k_sequence = {"1", "5", "2", "3", "4", "0", "0"}; + std::vector k_sequence = {"1", "5", "2", "3", "4", + "0", "6", "7", "0"}; file_path = output_path + "/test-human_readable_trace.txt"; CheckFileContent(k_sequence, file_path, false); @@ -473,7 +433,7 @@ TEST_F(TraceAnalyzerTest, Merge) { file_path = output_path + "/test-qps_stats.txt"; CheckFileContent(all_qps, file_path, true); - // Check the qps of get + // Check the qps of Merge std::vector get_qps = {"1"}; file_path = output_path + "/test-merge-0-qps_stats.txt"; CheckFileContent(get_qps, file_path, true); @@ -497,32 +457,11 @@ TEST_F(TraceAnalyzerTest, SingleDelete) { std::string trace_path = test_path_ + "/trace"; std::string output_path = test_path_ + "/single_delete"; std::string file_path; - std::vector paras = {"./trace_analyzer", - "-analyze_get", - "-analyze_put", - "-analyze_delete", - "-analyze_merge", - "-analyze_single_delete", - "-convert_to_human_readable_trace", - "-output_key_stats", - "-output_access_count_stats", - "-output_prefix=test", - "-output_prefix_cut=1", - "-output_time_series", - "-output_value_distribution", - "-output_qps_stats", - "-no_key", - "-no_print"}; - Status s = env_->FileExists(trace_path); - if (!s.ok()) { - GenerateTrace(trace_path); - } + std::vector paras = {"-analyze_single_delete"}; paras.push_back("-output_dir=" + output_path); paras.push_back("-trace_path=" + trace_path); paras.push_back("-key_space_dir=" + test_path_); - - env_->CreateDir(output_path); - RunTraceAnalyzer(paras); + AnalyzeTrace(paras, output_path, trace_path); // check the key_stats file std::vector k_stats = {"0 0 0 1 1.000000"}; @@ -536,7 +475,8 @@ TEST_F(TraceAnalyzerTest, SingleDelete) { CheckFileContent(k_dist, file_path, true); // Check the trace sequence - std::vector k_sequence = {"1", "5", "2", "3", "4", "0", "0"}; + std::vector k_sequence = {"1", "5", "2", "3", "4", + "0", "6", "7", "0"}; file_path = output_path + "/test-human_readable_trace.txt"; CheckFileContent(k_sequence, file_path, false); @@ -566,7 +506,7 @@ TEST_F(TraceAnalyzerTest, SingleDelete) { file_path = output_path + "/test-qps_stats.txt"; CheckFileContent(all_qps, file_path, true); - // Check the qps of get + // Check the qps of SingleDelete std::vector get_qps = {"1"}; file_path = output_path + "/test-single_delete-0-qps_stats.txt"; CheckFileContent(get_qps, file_path, true); @@ -584,33 +524,11 @@ TEST_F(TraceAnalyzerTest, DeleteRange) { std::string trace_path = test_path_ + "/trace"; std::string output_path = test_path_ + "/range_delete"; std::string file_path; - std::vector paras = {"./trace_analyzer", - "-analyze_get", - "-analyze_put", - "-analyze_delete", - "-analyze_merge", - "-analyze_single_delete", - "-analyze_range_delete", - "-convert_to_human_readable_trace", - "-output_key_stats", - "-output_access_count_stats", - "-output_prefix=test", - "-output_prefix_cut=1", - "-output_time_series", - "-output_value_distribution", - "-output_qps_stats", - "-no_key", - "-no_print"}; - Status s = env_->FileExists(trace_path); - if (!s.ok()) { - GenerateTrace(trace_path); - } + std::vector paras = {"-analyze_range_delete"}; paras.push_back("-output_dir=" + output_path); paras.push_back("-trace_path=" + trace_path); paras.push_back("-key_space_dir=" + test_path_); - - env_->CreateDir(output_path); - RunTraceAnalyzer(paras); + AnalyzeTrace(paras, output_path, trace_path); // check the key_stats file std::vector k_stats = {"0 0 0 1 1.000000", "0 0 1 1 1.000000"}; @@ -624,7 +542,8 @@ TEST_F(TraceAnalyzerTest, DeleteRange) { CheckFileContent(k_dist, file_path, true); // Check the trace sequence - std::vector k_sequence = {"1", "5", "2", "3", "4", "0", "0"}; + std::vector k_sequence = {"1", "5", "2", "3", "4", + "0", "6", "7", "0"}; file_path = output_path + "/test-human_readable_trace.txt"; CheckFileContent(k_sequence, file_path, false); @@ -655,7 +574,7 @@ TEST_F(TraceAnalyzerTest, DeleteRange) { file_path = output_path + "/test-qps_stats.txt"; CheckFileContent(all_qps, file_path, true); - // Check the qps of get + // Check the qps of DeleteRange std::vector get_qps = {"2"}; file_path = output_path + "/test-range_delete-0-qps_stats.txt"; CheckFileContent(get_qps, file_path, true); @@ -669,6 +588,121 @@ TEST_F(TraceAnalyzerTest, DeleteRange) { CheckFileContent(top_qps, file_path, true); } +// Test analyzing of Iterator +TEST_F(TraceAnalyzerTest, Iterator) { + std::string trace_path = test_path_ + "/trace"; + std::string output_path = test_path_ + "/iterator"; + std::string file_path; + std::vector paras = {"-analyze_iterator"}; + paras.push_back("-output_dir=" + output_path); + paras.push_back("-trace_path=" + trace_path); + paras.push_back("-key_space_dir=" + test_path_); + AnalyzeTrace(paras, output_path, trace_path); + + // Check the output of Seek + // check the key_stats file + std::vector k_stats = {"0 0 0 1 1.000000"}; + file_path = output_path + "/test-iterator_Seek-0-accessed_key_stats.txt"; + CheckFileContent(k_stats, file_path, true); + + // Check the access count distribution + std::vector k_dist = {"access_count: 1 num: 1"}; + file_path = + output_path + "/test-iterator_Seek-0-accessed_key_count_distribution.txt"; + CheckFileContent(k_dist, file_path, true); + + // Check the trace sequence + std::vector k_sequence = {"1", "5", "2", "3", "4", + "0", "6", "7", "0"}; + file_path = output_path + "/test-human_readable_trace.txt"; + CheckFileContent(k_sequence, file_path, false); + + // Check the prefix + std::vector k_prefix = {"0 0 0 0.000000 0.000000 0x30"}; + file_path = output_path + "/test-iterator_Seek-0-accessed_key_prefix_cut.txt"; + CheckFileContent(k_prefix, file_path, true); + + // Check the time series + std::vector k_series = {"6 1 0"}; + file_path = output_path + "/test-iterator_Seek-0-time_series.txt"; + CheckFileContent(k_series, file_path, false); + + // Check the accessed key in whole key space + std::vector k_whole_access = {"0 1"}; + file_path = output_path + "/test-iterator_Seek-0-whole_key_stats.txt"; + CheckFileContent(k_whole_access, file_path, true); + + // Check the whole key prefix cut + std::vector k_whole_prefix = {"0 0x61", "1 0x62", "2 0x63", + "3 0x64", "4 0x65", "5 0x66"}; + file_path = output_path + "/test-iterator_Seek-0-whole_key_prefix_cut.txt"; + CheckFileContent(k_whole_prefix, file_path, true); + + // Check the overall qps + std::vector all_qps = {"1 1 1 1 2 1 1 1 9"}; + file_path = output_path + "/test-qps_stats.txt"; + CheckFileContent(all_qps, file_path, true); + + // Check the qps of Iterator_Seek + std::vector get_qps = {"1"}; + file_path = output_path + "/test-iterator_Seek-0-qps_stats.txt"; + CheckFileContent(get_qps, file_path, true); + + // Check the top k qps prefix cut + std::vector top_qps = {"At time: 0 with QPS: 1", + "The prefix: 0x61 Access count: 1"}; + file_path = + output_path + "/test-iterator_Seek-0-accessed_top_k_qps_prefix_cut.txt"; + CheckFileContent(top_qps, file_path, true); + + // Check the output of SeekForPrev + // check the key_stats file + k_stats = {"0 0 0 1 1.000000"}; + file_path = + output_path + "/test-iterator_SeekForPrev-0-accessed_key_stats.txt"; + CheckFileContent(k_stats, file_path, true); + + // Check the access count distribution + k_dist = {"access_count: 1 num: 1"}; + file_path = + output_path + + "/test-iterator_SeekForPrev-0-accessed_key_count_distribution.txt"; + CheckFileContent(k_dist, file_path, true); + + // Check the prefix + k_prefix = {"0 0 0 0.000000 0.000000 0x30"}; + file_path = + output_path + "/test-iterator_SeekForPrev-0-accessed_key_prefix_cut.txt"; + CheckFileContent(k_prefix, file_path, true); + + // Check the time series + k_series = {"7 0 0"}; + file_path = output_path + "/test-iterator_SeekForPrev-0-time_series.txt"; + CheckFileContent(k_series, file_path, false); + + // Check the accessed key in whole key space + k_whole_access = {"1 1"}; + file_path = output_path + "/test-iterator_SeekForPrev-0-whole_key_stats.txt"; + CheckFileContent(k_whole_access, file_path, true); + + // Check the whole key prefix cut + k_whole_prefix = {"0 0x61", "1 0x62", "2 0x63", "3 0x64", "4 0x65", "5 0x66"}; + file_path = + output_path + "/test-iterator_SeekForPrev-0-whole_key_prefix_cut.txt"; + CheckFileContent(k_whole_prefix, file_path, true); + + // Check the qps of Iterator_SeekForPrev + get_qps = {"1"}; + file_path = output_path + "/test-iterator_SeekForPrev-0-qps_stats.txt"; + CheckFileContent(get_qps, file_path, true); + + // Check the top k qps prefix cut + top_qps = {"At time: 0 with QPS: 1", "The prefix: 0x62 Access count: 1"}; + file_path = output_path + + "/test-iterator_SeekForPrev-0-accessed_top_k_qps_prefix_cut.txt"; + CheckFileContent(top_qps, file_path, true); +} + } // namespace rocksdb int main(int argc, char** argv) { diff --git a/3rdParty/rocksdb/v5.16.X/tools/trace_analyzer_tool.cc b/3rdParty/rocksdb/v5.18.X/tools/trace_analyzer_tool.cc similarity index 88% rename from 3rdParty/rocksdb/v5.16.X/tools/trace_analyzer_tool.cc rename to 3rdParty/rocksdb/v5.18.X/tools/trace_analyzer_tool.cc index fc95b6a08f..49f2175a39 100644 --- a/3rdParty/rocksdb/v5.16.X/tools/trace_analyzer_tool.cc +++ b/3rdParty/rocksdb/v5.18.X/tools/trace_analyzer_tool.cc @@ -63,36 +63,36 @@ DEFINE_string(output_prefix, "trace", DEFINE_bool(output_key_stats, false, "Output the key access count statistics to file\n" "for accessed keys:\n" - "file name: ---accessed_key_stats.txt\n" + "file name: ---accessed_key_stats.txt\n" "Format:[cf_id value_size access_keyid access_count]\n" "for the whole key space keys:\n" - "File name: ---whole_key_stats.txt\n" + "File name: ---whole_key_stats.txt\n" "Format:[whole_key_space_keyid access_count]"); DEFINE_bool(output_access_count_stats, false, "Output the access count distribution statistics to file.\n" - "File name: ---accessed_" + "File name: ---accessed_" "key_count_distribution.txt \n" "Format:[access_count number_of_access_count]"); DEFINE_bool(output_time_series, false, "Output the access time in second of each key, " "such that we can have the time series data of the queries \n" - "File name: ---time_series.txt\n" + "File name: ---time_series.txt\n" "Format:[type_id time_in_sec access_keyid]."); DEFINE_int32(output_prefix_cut, 0, "The number of bytes as prefix to cut the keys.\n" - "if it is enabled, it will generate the following:\n" - "for accessed keys:\n" - "File name: ---" + "If it is enabled, it will generate the following:\n" + "For accessed keys:\n" + "File name: ---" "accessed_key_prefix_cut.txt \n" "Format:[acessed_keyid access_count_of_prefix " "number_of_keys_in_prefix average_key_access " "prefix_succ_ratio prefix]\n" - "for whole key space keys:\n" - "File name: --" + "For whole key space keys:\n" + "File name: --" "-whole_key_prefix_cut.txt\n" "Format:[start_keyid_in_whole_keyspace prefix]\n" "if 'output_qps_stats' and 'top_k' are enabled, it will output:\n" - "File name: --" + "File name: --" "-accessed_top_k_qps_prefix_cut.txt\n" "Format:[the_top_ith_qps_time QPS], [prefix qps_of_this_second]."); DEFINE_bool(convert_to_human_readable_trace, false, @@ -101,7 +101,7 @@ DEFINE_bool(convert_to_human_readable_trace, false, "This file will be extremely large " "(similar size as the original binary trace file). " "You can specify 'no_key' to reduce the size, if key is not " - "needed in the next step\n" + "needed in the next step.\n" "File name: _human_readable_trace.txt\n" "Format:[type_id cf_id value_size time_in_micorsec ]."); DEFINE_bool(output_qps_stats, false, @@ -110,8 +110,8 @@ DEFINE_bool(output_qps_stats, false, "The time is started from the first trace record\n" "File name: _qps_stats.txt\n" "Format: [qps_type_1 qps_type_2 ...... overall_qps]\n" - "For each cf and query, it will have its own qps output\n" - "File name: --_qps_stats.txt \n" + "For each cf and query, it will have its own qps output.\n" + "File name: --_qps_stats.txt \n" "Format:[query_count_in_this_second]."); DEFINE_bool(no_print, false, "Do not print out any result"); DEFINE_string( @@ -122,7 +122,7 @@ DEFINE_string( "get, put, delete, single_delete, rangle_delete, merge. No space " "between the pairs separated by commar. Example: =[get,get]... " "It will print out the number of pairs of 'A after B' and " - "the average time interval between the two query"); + "the average time interval between the two query."); DEFINE_string(key_space_dir, "", " \n" "The key space files should be: .txt"); @@ -139,11 +139,11 @@ DEFINE_bool(no_key, false, DEFINE_bool(print_overall_stats, true, " Print the stats of the whole trace, " "like total requests, keys, and etc."); -DEFINE_bool(print_key_distribution, false, "Print the key size distribution."); +DEFINE_bool(output_key_distribution, false, "Print the key size distribution."); DEFINE_bool( output_value_distribution, false, "Out put the value size distribution, only available for Put and Merge.\n" - "File name: --" + "File name: --" "-accessed_value_size_distribution.txt\n" "Format:[Number_of_value_size_between x and " "x+value_interval is: ]"); @@ -158,6 +158,9 @@ DEFINE_int32(value_interval, 8, "To output the value distribution, we need to set the value " "intervals and make the statistic of the value size distribution " "in different intervals. The default is 8."); +DEFINE_double(sample_ratio, 1.0, + "If the trace size is extremely huge or user want to sample " + "the trace when analyzing, sample ratio can be set (0, 1.0]"); namespace rocksdb { @@ -185,7 +188,7 @@ uint64_t MultiplyCheckOverflow(uint64_t op1, uint64_t op2) { return (op1 * op2); } -void DecodeCFAndKey(std::string& buffer, uint32_t* cf_id, Slice* key) { +void DecodeCFAndKeyFromString(std::string& buffer, uint32_t* cf_id, Slice* key) { Slice buf(buffer); GetFixed32(&buf, cf_id); GetLengthPrefixedSlice(&buf, key); @@ -276,9 +279,17 @@ TraceAnalyzer::TraceAnalyzer(std::string& trace_path, std::string& output_path, total_access_keys_ = 0; total_gets_ = 0; total_writes_ = 0; + trace_create_time_ = 0; begin_time_ = 0; end_time_ = 0; time_series_start_ = 0; + cur_time_sec_ = 0; + if (FLAGS_sample_ratio > 1.0 || FLAGS_sample_ratio <= 0) { + sample_max_ = 1; + } else { + sample_max_ = static_cast(1.0 / FLAGS_sample_ratio); + } + ta_.resize(kTaTypeNum); ta_[0].type_name = "get"; if (FLAGS_analyze_get) { @@ -328,6 +339,9 @@ TraceAnalyzer::TraceAnalyzer(std::string& trace_path, std::string& output_path, } else { ta_[7].enabled = false; } + for (int i = 0; i < kTaTypeNum; i++) { + ta_[i].sample_count = 0; + } } TraceAnalyzer::~TraceAnalyzer() {} @@ -363,6 +377,13 @@ Status TraceAnalyzer::PrepareProcessing() { if (!s.ok()) { return s; } + + qps_stats_name = + output_path_ + "/" + FLAGS_output_prefix + "-cf_qps_stats.txt"; + s = env_->NewWritableFile(qps_stats_name, &cf_qps_f_, env_options_); + if (!s.ok()) { + return s; + } } return Status::OK(); } @@ -422,6 +443,7 @@ Status TraceAnalyzer::StartProcessing() { fprintf(stderr, "Cannot read the header\n"); return s; } + trace_create_time_ = header.ts; if (FLAGS_output_time_series) { time_series_start_ = header.ts; } @@ -459,7 +481,7 @@ Status TraceAnalyzer::StartProcessing() { } else if (trace.type == kTraceGet) { uint32_t cf_id = 0; Slice key; - DecodeCFAndKey(trace.payload, &cf_id, &key); + DecodeCFAndKeyFromString(trace.payload, &cf_id, &key); total_gets_++; s = HandleGet(cf_id, key.ToString(), trace.ts, 1); @@ -471,7 +493,7 @@ Status TraceAnalyzer::StartProcessing() { trace.type == kTraceIteratorSeekForPrev) { uint32_t cf_id = 0; Slice key; - DecodeCFAndKey(trace.payload, &cf_id, &key); + DecodeCFAndKeyFromString(trace.payload, &cf_id, &key); s = HandleIter(cf_id, key.ToString(), trace.ts, trace.type); if (!s.ok()) { fprintf(stderr, "Cannot process the iterator in the trace\n"); @@ -521,7 +543,7 @@ Status TraceAnalyzer::MakeStatistics() { } // Generate the key size distribution data - if (FLAGS_print_key_distribution) { + if (FLAGS_output_key_distribution) { if (stat.second.a_key_size_stats.find(record.first.size()) == stat.second.a_key_size_stats.end()) { stat.second.a_key_size_stats[record.first.size()] = 1; @@ -565,17 +587,31 @@ Status TraceAnalyzer::MakeStatistics() { // find the medium of the key size uint64_t k_count = 0; + bool get_mid = false; for (auto& record : stat.second.a_key_size_stats) { k_count += record.second; - if (k_count >= stat.second.a_key_mid) { + if (!get_mid && k_count >= stat.second.a_key_mid) { stat.second.a_key_mid = record.first; - break; + get_mid = true; + } + if (FLAGS_output_key_distribution && stat.second.a_key_size_f) { + ret = sprintf(buffer_, "%" PRIu64 " %" PRIu64 "\n", record.first, + record.second); + if (ret < 0) { + return Status::IOError("Format output failed"); + } + std::string printout(buffer_); + s = stat.second.a_key_size_f->Append(printout); + if (!s.ok()) { + fprintf(stderr, "Write key size distribution file failed\n"); + return s; + } } } // output the value size distribution uint64_t v_begin = 0, v_end = 0, v_count = 0; - bool get_mid = false; + get_mid = false; for (auto& record : stat.second.a_value_size_stats) { v_begin = v_end; v_end = (record.first + 1) * FLAGS_value_interval; @@ -740,7 +776,11 @@ Status TraceAnalyzer::MakeStatisticCorrelation(TraceStats& stats, // Process the statistics of QPS Status TraceAnalyzer::MakeStatisticQPS() { - uint32_t duration = static_cast((end_time_ - begin_time_) / 1000000); + if(begin_time_ == 0) { + begin_time_ = trace_create_time_; + } + uint32_t duration = + static_cast((end_time_ - begin_time_) / 1000000); int ret; Status s; std::vector> type_qps( @@ -817,6 +857,32 @@ Status TraceAnalyzer::MakeStatisticQPS() { stat.second.a_ave_qps = (static_cast(cf_qps_sum)) / duration; } + // Output the accessed unique key number change overtime + if (stat.second.a_key_num_f) { + uint64_t cur_uni_key = + static_cast(stat.second.a_key_stats.size()); + double cur_ratio = 0.0; + uint64_t cur_num = 0; + for (uint32_t i = 0; i < duration; i++) { + auto find_time = stat.second.uni_key_num.find(i); + if (find_time != stat.second.uni_key_num.end()) { + cur_ratio = (static_cast(find_time->second)) / cur_uni_key; + cur_num = find_time->second; + } + ret = sprintf(buffer_, "%" PRIu64 " %.12f\n", cur_num, cur_ratio); + if (ret < 0) { + return Status::IOError("Format the output failed"); + } + std::string printout(buffer_); + s = stat.second.a_key_num_f->Append(printout); + if (!s.ok()) { + fprintf(stderr, + "Write accessed unique key number change file failed\n"); + return s; + } + } + } + // output the prefix of top k access peak if (FLAGS_output_prefix_cut > 0 && stat.second.a_top_qps_prefix_f) { while (!stat.second.top_k_qps_sec.empty()) { @@ -881,6 +947,33 @@ Status TraceAnalyzer::MakeStatisticQPS() { } } + if (cf_qps_f_) { + int cfs_size = static_cast(cfs_.size()); + uint32_t v; + for (uint32_t i = 0; i < duration; i++) { + for (int cf = 0; cf < cfs_size; cf++) { + if (cfs_[cf].cf_qps.find(i) != cfs_[cf].cf_qps.end()) { + v = cfs_[cf].cf_qps[i]; + } else { + v = 0; + } + if (cf < cfs_size - 1) { + ret = sprintf(buffer_, "%u ", v); + } else { + ret = sprintf(buffer_, "%u\n", v); + } + if (ret < 0) { + return Status::IOError("Format the output failed"); + } + std::string printout(buffer_); + s = cf_qps_f_->Append(printout); + if (!s.ok()) { + return s; + } + } + } + } + qps_peak_ = qps_peak; for (int type = 0; type <= kTaTypeNum; type++) { if (duration == 0) { @@ -1009,7 +1102,7 @@ Status TraceAnalyzer::ReProcessing() { } // Make the statistics fo the key size distribution - if (FLAGS_print_key_distribution) { + if (FLAGS_output_key_distribution) { if (cfs_[cf_id].w_key_size_stats.find(input_key.size()) == cfs_[cf_id].w_key_size_stats.end()) { cfs_[cf_id].w_key_size_stats[input_key.size()] = 1; @@ -1128,6 +1221,11 @@ Status TraceAnalyzer::KeyStatsInsertion(const uint32_t& type, tmp_qps_map[prefix] = 1; ta_[type].stats[cf_id].a_qps_prefix_stats[time_in_sec] = tmp_qps_map; } + if (time_in_sec != cur_time_sec_) { + ta_[type].stats[cf_id].uni_key_num[cur_time_sec_] = + static_cast(ta_[type].stats[cf_id].a_key_stats.size()); + cur_time_sec_ = time_in_sec; + } } else { found_stats->second.a_count++; found_stats->second.a_key_size_sqsum += MultiplyCheckOverflow( @@ -1148,6 +1246,11 @@ Status TraceAnalyzer::KeyStatsInsertion(const uint32_t& type, s = StatsUnitCorrelationUpdate(found_key->second, type, ts, key); } } + if (time_in_sec != cur_time_sec_) { + found_stats->second.uni_key_num[cur_time_sec_] = + static_cast(found_stats->second.a_key_stats.size()); + cur_time_sec_ = time_in_sec; + } auto found_value = found_stats->second.a_value_size_stats.find(dist_value_size); @@ -1188,6 +1291,10 @@ Status TraceAnalyzer::KeyStatsInsertion(const uint32_t& type, cfs_[cf_id] = cf_unit; } + if (FLAGS_output_qps_stats) { + cfs_[cf_id].cf_qps[time_in_sec]++; + } + if (FLAGS_output_time_series) { TraceUnit trace_u; trace_u.type = type; @@ -1250,6 +1357,9 @@ Status TraceAnalyzer::OpenStatsOutputFiles(const std::string& type, if (FLAGS_output_key_stats) { s = CreateOutputFile(type, new_stats.cf_name, "accessed_key_stats.txt", &new_stats.a_key_f); + s = CreateOutputFile(type, new_stats.cf_name, + "accessed_unique_key_num_change.txt", + &new_stats.a_key_num_f); if (!FLAGS_key_space_dir.empty()) { s = CreateOutputFile(type, new_stats.cf_name, "whole_key_stats.txt", &new_stats.w_key_f); @@ -1288,6 +1398,12 @@ Status TraceAnalyzer::OpenStatsOutputFiles(const std::string& type, &new_stats.a_value_size_f); } + if (FLAGS_output_key_distribution) { + s = CreateOutputFile(type, new_stats.cf_name, + "accessed_key_size_distribution.txt", + &new_stats.a_key_size_f); + } + if (FLAGS_output_qps_stats) { s = CreateOutputFile(type, new_stats.cf_name, "qps_stats.txt", &new_stats.a_qps_f); @@ -1327,6 +1443,10 @@ void TraceAnalyzer::CloseOutputFiles() { stat.second.a_key_f->Close(); } + if (stat.second.a_key_num_f) { + stat.second.a_key_num_f->Close(); + } + if (stat.second.a_count_dist_f) { stat.second.a_count_dist_f->Close(); } @@ -1339,6 +1459,10 @@ void TraceAnalyzer::CloseOutputFiles() { stat.second.a_value_size_f->Close(); } + if (stat.second.a_key_size_f) { + stat.second.a_key_size_f->Close(); + } + if (stat.second.a_qps_f) { stat.second.a_qps_f->Close(); } @@ -1372,6 +1496,15 @@ Status TraceAnalyzer::HandleGet(uint32_t column_family_id, } } + if (ta_[TraceOperationType::kGet].sample_count >= sample_max_) { + ta_[TraceOperationType::kGet].sample_count = 0; + } + if (ta_[TraceOperationType::kGet].sample_count > 0) { + ta_[TraceOperationType::kGet].sample_count++; + return Status::OK(); + } + ta_[TraceOperationType::kGet].sample_count++; + if (!ta_[TraceOperationType::kGet].enabled) { return Status::OK(); } @@ -1399,6 +1532,15 @@ Status TraceAnalyzer::HandlePut(uint32_t column_family_id, const Slice& key, } } + if (ta_[TraceOperationType::kPut].sample_count >= sample_max_) { + ta_[TraceOperationType::kPut].sample_count = 0; + } + if (ta_[TraceOperationType::kPut].sample_count > 0) { + ta_[TraceOperationType::kPut].sample_count++; + return Status::OK(); + } + ta_[TraceOperationType::kPut].sample_count++; + if (!ta_[TraceOperationType::kPut].enabled) { return Status::OK(); } @@ -1423,6 +1565,15 @@ Status TraceAnalyzer::HandleDelete(uint32_t column_family_id, } } + if (ta_[TraceOperationType::kDelete].sample_count >= sample_max_) { + ta_[TraceOperationType::kDelete].sample_count = 0; + } + if (ta_[TraceOperationType::kDelete].sample_count > 0) { + ta_[TraceOperationType::kDelete].sample_count++; + return Status::OK(); + } + ta_[TraceOperationType::kDelete].sample_count++; + if (!ta_[TraceOperationType::kDelete].enabled) { return Status::OK(); } @@ -1447,6 +1598,15 @@ Status TraceAnalyzer::HandleSingleDelete(uint32_t column_family_id, } } + if (ta_[TraceOperationType::kSingleDelete].sample_count >= sample_max_) { + ta_[TraceOperationType::kSingleDelete].sample_count = 0; + } + if (ta_[TraceOperationType::kSingleDelete].sample_count > 0) { + ta_[TraceOperationType::kSingleDelete].sample_count++; + return Status::OK(); + } + ta_[TraceOperationType::kSingleDelete].sample_count++; + if (!ta_[TraceOperationType::kSingleDelete].enabled) { return Status::OK(); } @@ -1472,6 +1632,15 @@ Status TraceAnalyzer::HandleDeleteRange(uint32_t column_family_id, } } + if (ta_[TraceOperationType::kRangeDelete].sample_count >= sample_max_) { + ta_[TraceOperationType::kRangeDelete].sample_count = 0; + } + if (ta_[TraceOperationType::kRangeDelete].sample_count > 0) { + ta_[TraceOperationType::kRangeDelete].sample_count++; + return Status::OK(); + } + ta_[TraceOperationType::kRangeDelete].sample_count++; + if (!ta_[TraceOperationType::kRangeDelete].enabled) { return Status::OK(); } @@ -1498,6 +1667,15 @@ Status TraceAnalyzer::HandleMerge(uint32_t column_family_id, const Slice& key, } } + if (ta_[TraceOperationType::kMerge].sample_count >= sample_max_) { + ta_[TraceOperationType::kMerge].sample_count = 0; + } + if (ta_[TraceOperationType::kMerge].sample_count > 0) { + ta_[TraceOperationType::kMerge].sample_count++; + return Status::OK(); + } + ta_[TraceOperationType::kMerge].sample_count++; + if (!ta_[TraceOperationType::kMerge].enabled) { return Status::OK(); } @@ -1534,6 +1712,15 @@ Status TraceAnalyzer::HandleIter(uint32_t column_family_id, } } + if (ta_[type].sample_count >= sample_max_) { + ta_[type].sample_count = 0; + } + if (ta_[type].sample_count > 0) { + ta_[type].sample_count++; + return Status::OK(); + } + ta_[type].sample_count++; + if (!ta_[type].enabled) { return Status::OK(); } @@ -1595,6 +1782,8 @@ void TraceAnalyzer::PrintStatistics() { ta_[type].total_succ_access += stat.a_succ_count; printf("*********************************************************\n"); printf("colume family id: %u\n", stat.cf_id); + printf("Total number of queries to this cf by %s: %" PRIu64 "\n", + ta_[type].type_name.c_str(), stat.a_count); printf("Total unique keys in this cf: %" PRIu64 "\n", total_a_keys); printf("Average key size: %f key size medium: %" PRIu64 " Key size Variation: %f\n", @@ -1641,15 +1830,6 @@ void TraceAnalyzer::PrintStatistics() { } } - // print the key size distribution - if (FLAGS_print_key_distribution) { - printf("The key size distribution\n"); - for (auto& record : stat.a_key_size_stats) { - printf("key_size %" PRIu64 " nums: %" PRIu64 "\n", record.first, - record.second); - } - } - // print the operation correlations if (!FLAGS_print_correlation.empty()) { for (int correlation = 0; @@ -1699,6 +1879,8 @@ void TraceAnalyzer::PrintStatistics() { printf("Average QPS per second: %f Peak QPS: %u\n", qps_ave_[kTaTypeNum], qps_peak_[kTaTypeNum]); } + printf("The statistics related to query number need to times: %u\n", + sample_max_); printf("Total_requests: %" PRIu64 " Total_accessed_keys: %" PRIu64 " Total_gets: %" PRIu64 " Total_write_batch: %" PRIu64 "\n", total_requests_, total_access_keys_, total_gets_, total_writes_); @@ -1794,5 +1976,5 @@ int trace_analyzer_tool(int argc, char** argv) { } } // namespace rocksdb -#endif // Endif of Gflag -#endif // RocksDB LITE +#endif // Endif of Gflag +#endif // RocksDB LITE diff --git a/3rdParty/rocksdb/v5.16.X/tools/trace_analyzer_tool.h b/3rdParty/rocksdb/v5.18.X/tools/trace_analyzer_tool.h similarity index 92% rename from 3rdParty/rocksdb/v5.16.X/tools/trace_analyzer_tool.h rename to 3rdParty/rocksdb/v5.18.X/tools/trace_analyzer_tool.h index 12a0020849..be96f5005d 100644 --- a/3rdParty/rocksdb/v5.16.X/tools/trace_analyzer_tool.h +++ b/3rdParty/rocksdb/v5.18.X/tools/trace_analyzer_tool.h @@ -115,12 +115,15 @@ struct TraceStats { top_k_qps_sec; std::list time_series; std::vector> correlation_output; + std::map uni_key_num; std::unique_ptr time_series_f; std::unique_ptr a_key_f; std::unique_ptr a_count_dist_f; std::unique_ptr a_prefix_cut_f; std::unique_ptr a_value_size_f; + std::unique_ptr a_key_size_f; + std::unique_ptr a_key_num_f; std::unique_ptr a_qps_f; std::unique_ptr a_top_qps_prefix_f; std::unique_ptr w_key_f; @@ -128,6 +131,10 @@ struct TraceStats { TraceStats(); ~TraceStats(); + TraceStats(const TraceStats&) = delete; + TraceStats& operator=(const TraceStats&) = delete; + TraceStats(TraceStats&&) = default; + TraceStats& operator=(TraceStats&&) = default; }; struct TypeUnit { @@ -136,7 +143,14 @@ struct TypeUnit { uint64_t total_keys; uint64_t total_access; uint64_t total_succ_access; + uint32_t sample_count; std::map stats; + TypeUnit() = default; + ~TypeUnit() = default; + TypeUnit(const TypeUnit&) = delete; + TypeUnit& operator=(const TypeUnit&) = delete; + TypeUnit(TypeUnit&&) = default; + TypeUnit& operator=(TypeUnit&&) = default; }; struct CfUnit { @@ -145,6 +159,7 @@ struct CfUnit { uint64_t a_count; // the total keys in this cf that are accessed std::map w_key_size_stats; // whole key space key size // statistic this cf + std::map cf_qps; }; class TraceAnalyzer { @@ -194,11 +209,15 @@ class TraceAnalyzer { uint64_t total_access_keys_; uint64_t total_gets_; uint64_t total_writes_; + uint64_t trace_create_time_; uint64_t begin_time_; uint64_t end_time_; uint64_t time_series_start_; + uint32_t sample_max_; + uint32_t cur_time_sec_; std::unique_ptr trace_sequence_f_; // readable trace std::unique_ptr qps_f_; // overall qps + std::unique_ptr cf_qps_f_; // The qps of each CF> std::unique_ptr wkey_input_f_; std::vector ta_; // The main statistic collecting data structure std::map cfs_; // All the cf_id appears in this trace; diff --git a/3rdParty/rocksdb/v5.16.X/tools/verify_random_db.sh b/3rdParty/rocksdb/v5.18.X/tools/verify_random_db.sh similarity index 100% rename from 3rdParty/rocksdb/v5.16.X/tools/verify_random_db.sh rename to 3rdParty/rocksdb/v5.18.X/tools/verify_random_db.sh diff --git a/3rdParty/rocksdb/v5.18.X/tools/write_external_sst.sh b/3rdParty/rocksdb/v5.18.X/tools/write_external_sst.sh new file mode 100755 index 0000000000..6efc300202 --- /dev/null +++ b/3rdParty/rocksdb/v5.18.X/tools/write_external_sst.sh @@ -0,0 +1,25 @@ +#!/usr/bin/env bash +# +# +# + +if [ "$#" -lt 3 ]; then + echo "usage: $BASH_SOURCE " + exit 1 +fi + +input_data_dir=$1 +db_dir=$2 +extern_sst_dir=$3 +rm -rf $db_dir + +set -e + +n=0 + +for f in `find $input_data_dir -name sorted_data*` +do + echo == Writing external SST file $f to $extern_sst_dir/extern_sst${n} + ./ldb --db=$db_dir --create_if_missing write_extern_sst $extern_sst_dir/extern_sst${n} < $f + let "n = n + 1" +done diff --git a/3rdParty/rocksdb/v5.16.X/tools/write_stress.cc b/3rdParty/rocksdb/v5.18.X/tools/write_stress.cc similarity index 100% rename from 3rdParty/rocksdb/v5.16.X/tools/write_stress.cc rename to 3rdParty/rocksdb/v5.18.X/tools/write_stress.cc diff --git a/3rdParty/rocksdb/v5.16.X/tools/write_stress_runner.py b/3rdParty/rocksdb/v5.18.X/tools/write_stress_runner.py similarity index 100% rename from 3rdParty/rocksdb/v5.16.X/tools/write_stress_runner.py rename to 3rdParty/rocksdb/v5.18.X/tools/write_stress_runner.py diff --git a/3rdParty/rocksdb/v5.16.X/util/aligned_buffer.h b/3rdParty/rocksdb/v5.18.X/util/aligned_buffer.h similarity index 100% rename from 3rdParty/rocksdb/v5.16.X/util/aligned_buffer.h rename to 3rdParty/rocksdb/v5.18.X/util/aligned_buffer.h diff --git a/3rdParty/rocksdb/v5.16.X/util/allocator.h b/3rdParty/rocksdb/v5.18.X/util/allocator.h similarity index 100% rename from 3rdParty/rocksdb/v5.16.X/util/allocator.h rename to 3rdParty/rocksdb/v5.18.X/util/allocator.h diff --git a/3rdParty/rocksdb/v5.16.X/util/arena.cc b/3rdParty/rocksdb/v5.18.X/util/arena.cc similarity index 100% rename from 3rdParty/rocksdb/v5.16.X/util/arena.cc rename to 3rdParty/rocksdb/v5.18.X/util/arena.cc diff --git a/3rdParty/rocksdb/v5.16.X/util/arena.h b/3rdParty/rocksdb/v5.18.X/util/arena.h similarity index 100% rename from 3rdParty/rocksdb/v5.16.X/util/arena.h rename to 3rdParty/rocksdb/v5.18.X/util/arena.h diff --git a/3rdParty/rocksdb/v5.16.X/util/arena_test.cc b/3rdParty/rocksdb/v5.18.X/util/arena_test.cc similarity index 100% rename from 3rdParty/rocksdb/v5.16.X/util/arena_test.cc rename to 3rdParty/rocksdb/v5.18.X/util/arena_test.cc diff --git a/3rdParty/rocksdb/v5.16.X/util/auto_roll_logger.cc b/3rdParty/rocksdb/v5.18.X/util/auto_roll_logger.cc similarity index 100% rename from 3rdParty/rocksdb/v5.16.X/util/auto_roll_logger.cc rename to 3rdParty/rocksdb/v5.18.X/util/auto_roll_logger.cc diff --git a/3rdParty/rocksdb/v5.16.X/util/auto_roll_logger.h b/3rdParty/rocksdb/v5.18.X/util/auto_roll_logger.h similarity index 100% rename from 3rdParty/rocksdb/v5.16.X/util/auto_roll_logger.h rename to 3rdParty/rocksdb/v5.18.X/util/auto_roll_logger.h diff --git a/3rdParty/rocksdb/v5.16.X/util/auto_roll_logger_test.cc b/3rdParty/rocksdb/v5.18.X/util/auto_roll_logger_test.cc similarity index 99% rename from 3rdParty/rocksdb/v5.16.X/util/auto_roll_logger_test.cc rename to 3rdParty/rocksdb/v5.18.X/util/auto_roll_logger_test.cc index 5a6b3abc11..284a981521 100644 --- a/3rdParty/rocksdb/v5.16.X/util/auto_roll_logger_test.cc +++ b/3rdParty/rocksdb/v5.18.X/util/auto_roll_logger_test.cc @@ -230,7 +230,7 @@ TEST_F(AutoRollLoggerTest, CompositeRollByTimeAndSizeLogger) { TEST_F(AutoRollLoggerTest, CreateLoggerFromOptions) { DBOptions options; NoSleepEnv nse(Env::Default()); - shared_ptr logger; + std::shared_ptr logger; // Normal logger ASSERT_OK(CreateLoggerFromOptions(kTestDir, options, &logger)); @@ -273,7 +273,7 @@ TEST_F(AutoRollLoggerTest, CreateLoggerFromOptions) { TEST_F(AutoRollLoggerTest, LogFlushWhileRolling) { DBOptions options; - shared_ptr logger; + std::shared_ptr logger; InitTestDb(); options.max_log_file_size = 1024 * 5; diff --git a/3rdParty/rocksdb/v5.16.X/util/autovector.h b/3rdParty/rocksdb/v5.18.X/util/autovector.h similarity index 97% rename from 3rdParty/rocksdb/v5.16.X/util/autovector.h rename to 3rdParty/rocksdb/v5.18.X/util/autovector.h index b5c8471245..97348d818a 100644 --- a/3rdParty/rocksdb/v5.16.X/util/autovector.h +++ b/3rdParty/rocksdb/v5.18.X/util/autovector.h @@ -271,7 +271,12 @@ class autovector { template void emplace_back(Args&&... args) { - push_back(value_type(args...)); + if (num_stack_items_ < kSize) { + values_[num_stack_items_++] = + std::move(value_type(std::forward(args)...)); + } else { + vect_.emplace_back(std::forward(args)...); + } } void pop_back() { diff --git a/3rdParty/rocksdb/v5.16.X/util/autovector_test.cc b/3rdParty/rocksdb/v5.18.X/util/autovector_test.cc similarity index 100% rename from 3rdParty/rocksdb/v5.16.X/util/autovector_test.cc rename to 3rdParty/rocksdb/v5.18.X/util/autovector_test.cc diff --git a/3rdParty/rocksdb/v5.16.X/util/bloom.cc b/3rdParty/rocksdb/v5.18.X/util/bloom.cc similarity index 100% rename from 3rdParty/rocksdb/v5.16.X/util/bloom.cc rename to 3rdParty/rocksdb/v5.18.X/util/bloom.cc diff --git a/3rdParty/rocksdb/v5.16.X/util/bloom_test.cc b/3rdParty/rocksdb/v5.18.X/util/bloom_test.cc similarity index 100% rename from 3rdParty/rocksdb/v5.16.X/util/bloom_test.cc rename to 3rdParty/rocksdb/v5.18.X/util/bloom_test.cc diff --git a/3rdParty/rocksdb/v5.16.X/util/build_version.cc.in b/3rdParty/rocksdb/v5.18.X/util/build_version.cc.in similarity index 100% rename from 3rdParty/rocksdb/v5.16.X/util/build_version.cc.in rename to 3rdParty/rocksdb/v5.18.X/util/build_version.cc.in diff --git a/3rdParty/rocksdb/v5.16.X/util/build_version.h b/3rdParty/rocksdb/v5.18.X/util/build_version.h similarity index 100% rename from 3rdParty/rocksdb/v5.16.X/util/build_version.h rename to 3rdParty/rocksdb/v5.18.X/util/build_version.h diff --git a/3rdParty/rocksdb/v5.16.X/util/cast_util.h b/3rdParty/rocksdb/v5.18.X/util/cast_util.h similarity index 100% rename from 3rdParty/rocksdb/v5.16.X/util/cast_util.h rename to 3rdParty/rocksdb/v5.18.X/util/cast_util.h diff --git a/3rdParty/rocksdb/v5.16.X/util/channel.h b/3rdParty/rocksdb/v5.18.X/util/channel.h similarity index 100% rename from 3rdParty/rocksdb/v5.16.X/util/channel.h rename to 3rdParty/rocksdb/v5.18.X/util/channel.h index 1b030192cf..0225482c00 100644 --- a/3rdParty/rocksdb/v5.16.X/util/channel.h +++ b/3rdParty/rocksdb/v5.18.X/util/channel.h @@ -3,13 +3,13 @@ // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). +#pragma once + #include #include #include #include -#pragma once - namespace rocksdb { template diff --git a/3rdParty/rocksdb/v5.16.X/util/coding.cc b/3rdParty/rocksdb/v5.18.X/util/coding.cc similarity index 100% rename from 3rdParty/rocksdb/v5.16.X/util/coding.cc rename to 3rdParty/rocksdb/v5.18.X/util/coding.cc diff --git a/3rdParty/rocksdb/v5.16.X/util/coding.h b/3rdParty/rocksdb/v5.18.X/util/coding.h similarity index 98% rename from 3rdParty/rocksdb/v5.16.X/util/coding.h rename to 3rdParty/rocksdb/v5.18.X/util/coding.h index 27a638347f..4046a2b60b 100644 --- a/3rdParty/rocksdb/v5.16.X/util/coding.h +++ b/3rdParty/rocksdb/v5.18.X/util/coding.h @@ -64,7 +64,8 @@ extern Slice GetLengthPrefixedSlice(const char* data); extern Slice GetSliceUntil(Slice* slice, char delimiter); -// Borrowed from https://github.com/facebook/fbthrift/blob/449a5f77f9f9bae72c9eb5e78093247eef185c04/thrift/lib/cpp/util/VarintUtils-inl.h#L202-L208 +// Borrowed from +// https://github.com/facebook/fbthrift/blob/449a5f77f9f9bae72c9eb5e78093247eef185c04/thrift/lib/cpp/util/VarintUtils-inl.h#L202-L208 constexpr inline uint64_t i64ToZigzag(const int64_t l) { return (static_cast(l) << 1) ^ static_cast(l >> 63); } diff --git a/3rdParty/rocksdb/v5.16.X/util/coding_test.cc b/3rdParty/rocksdb/v5.18.X/util/coding_test.cc similarity index 100% rename from 3rdParty/rocksdb/v5.16.X/util/coding_test.cc rename to 3rdParty/rocksdb/v5.18.X/util/coding_test.cc diff --git a/3rdParty/rocksdb/v5.16.X/util/compaction_job_stats_impl.cc b/3rdParty/rocksdb/v5.18.X/util/compaction_job_stats_impl.cc similarity index 100% rename from 3rdParty/rocksdb/v5.16.X/util/compaction_job_stats_impl.cc rename to 3rdParty/rocksdb/v5.18.X/util/compaction_job_stats_impl.cc diff --git a/3rdParty/rocksdb/v5.16.X/util/comparator.cc b/3rdParty/rocksdb/v5.18.X/util/comparator.cc similarity index 100% rename from 3rdParty/rocksdb/v5.16.X/util/comparator.cc rename to 3rdParty/rocksdb/v5.18.X/util/comparator.cc diff --git a/3rdParty/rocksdb/v5.16.X/util/compression.h b/3rdParty/rocksdb/v5.18.X/util/compression.h similarity index 92% rename from 3rdParty/rocksdb/v5.16.X/util/compression.h rename to 3rdParty/rocksdb/v5.18.X/util/compression.h index 3a980a5d88..e91faeac65 100644 --- a/3rdParty/rocksdb/v5.16.X/util/compression.h +++ b/3rdParty/rocksdb/v5.18.X/util/compression.h @@ -14,8 +14,10 @@ #include #include "rocksdb/options.h" +#include "rocksdb/table.h" #include "util/coding.h" #include "util/compression_context_cache.h" +#include "util/memory_allocator.h" #ifdef SNAPPY #include @@ -36,9 +38,9 @@ #if defined(ZSTD) #include -#if ZSTD_VERSION_NUMBER >= 800 // v0.8.0+ +#if ZSTD_VERSION_NUMBER >= 10103 // v1.1.3+ #include -#endif // ZSTD_VERSION_NUMBER >= 800 +#endif // ZSTD_VERSION_NUMBER >= 10103 namespace rocksdb { // Need this for the context allocation override // On windows we need to do this explicitly @@ -495,11 +497,10 @@ inline bool Zlib_Compress(const CompressionContext& ctx, // header in varint32 format // @param compression_dict Data for presetting the compression library's // dictionary. -inline char* Zlib_Uncompress(const UncompressionContext& ctx, - const char* input_data, size_t input_length, - int* decompress_size, - uint32_t compress_format_version, - int windowBits = -14) { +inline CacheAllocationPtr Zlib_Uncompress( + const UncompressionContext& ctx, const char* input_data, + size_t input_length, int* decompress_size, uint32_t compress_format_version, + MemoryAllocator* allocator = nullptr, int windowBits = -14) { #ifdef ZLIB uint32_t output_len = 0; if (compress_format_version == 2) { @@ -541,9 +542,9 @@ inline char* Zlib_Uncompress(const UncompressionContext& ctx, _stream.next_in = (Bytef*)input_data; _stream.avail_in = static_cast(input_length); - char* output = new char[output_len]; + auto output = AllocateBlock(output_len, allocator); - _stream.next_out = (Bytef*)output; + _stream.next_out = (Bytef*)output.get(); _stream.avail_out = static_cast(output_len); bool done = false; @@ -561,19 +562,17 @@ inline char* Zlib_Uncompress(const UncompressionContext& ctx, size_t old_sz = output_len; uint32_t output_len_delta = output_len / 5; output_len += output_len_delta < 10 ? 10 : output_len_delta; - char* tmp = new char[output_len]; - memcpy(tmp, output, old_sz); - delete[] output; - output = tmp; + auto tmp = AllocateBlock(output_len, allocator); + memcpy(tmp.get(), output.get(), old_sz); + output = std::move(tmp); // Set more output. - _stream.next_out = (Bytef*)(output + old_sz); + _stream.next_out = (Bytef*)(output.get() + old_sz); _stream.avail_out = static_cast(output_len - old_sz); break; } case Z_BUF_ERROR: default: - delete[] output; inflateEnd(&_stream); return nullptr; } @@ -590,6 +589,7 @@ inline char* Zlib_Uncompress(const UncompressionContext& ctx, (void)input_length; (void)decompress_size; (void)compress_format_version; + (void)allocator; (void)windowBits; return nullptr; #endif @@ -660,9 +660,9 @@ inline bool BZip2_Compress(const CompressionContext& /*ctx*/, // block header // compress_format_version == 2 -- decompressed size is included in the block // header in varint32 format -inline char* BZip2_Uncompress(const char* input_data, size_t input_length, - int* decompress_size, - uint32_t compress_format_version) { +inline CacheAllocationPtr BZip2_Uncompress( + const char* input_data, size_t input_length, int* decompress_size, + uint32_t compress_format_version, MemoryAllocator* allocator = nullptr) { #ifdef BZIP2 uint32_t output_len = 0; if (compress_format_version == 2) { @@ -690,9 +690,9 @@ inline char* BZip2_Uncompress(const char* input_data, size_t input_length, _stream.next_in = (char*)input_data; _stream.avail_in = static_cast(input_length); - char* output = new char[output_len]; + auto output = AllocateBlock(output_len, allocator); - _stream.next_out = (char*)output; + _stream.next_out = (char*)output.get(); _stream.avail_out = static_cast(output_len); bool done = false; @@ -709,18 +709,16 @@ inline char* BZip2_Uncompress(const char* input_data, size_t input_length, assert(compress_format_version != 2); uint32_t old_sz = output_len; output_len = output_len * 1.2; - char* tmp = new char[output_len]; - memcpy(tmp, output, old_sz); - delete[] output; - output = tmp; + auto tmp = AllocateBlock(output_len, allocator); + memcpy(tmp.get(), output.get(), old_sz); + output = std::move(tmp); // Set more output. - _stream.next_out = (char*)(output + old_sz); + _stream.next_out = (char*)(output.get() + old_sz); _stream.avail_out = static_cast(output_len - old_sz); break; } default: - delete[] output; BZ2_bzDecompressEnd(&_stream); return nullptr; } @@ -736,6 +734,7 @@ inline char* BZip2_Uncompress(const char* input_data, size_t input_length, (void)input_length; (void)decompress_size; (void)compress_format_version; + (void)allocator; return nullptr; #endif } @@ -791,6 +790,7 @@ inline bool LZ4_Compress(const CompressionContext& ctx, #else // up to r123 outlen = LZ4_compress_limitedOutput(input, &(*output)[output_header_len], static_cast(length), compress_bound); + (void)ctx; #endif // LZ4_VERSION_NUMBER >= 10400 if (outlen == 0) { @@ -814,10 +814,12 @@ inline bool LZ4_Compress(const CompressionContext& ctx, // header in varint32 format // @param compression_dict Data for presetting the compression library's // dictionary. -inline char* LZ4_Uncompress(const UncompressionContext& ctx, - const char* input_data, size_t input_length, - int* decompress_size, - uint32_t compress_format_version) { +inline CacheAllocationPtr LZ4_Uncompress(const UncompressionContext& ctx, + const char* input_data, + size_t input_length, + int* decompress_size, + uint32_t compress_format_version, + MemoryAllocator* allocator = nullptr) { #ifdef LZ4 uint32_t output_len = 0; if (compress_format_version == 2) { @@ -837,7 +839,7 @@ inline char* LZ4_Uncompress(const UncompressionContext& ctx, input_data += 8; } - char* output = new char[output_len]; + auto output = AllocateBlock(output_len, allocator); #if LZ4_VERSION_NUMBER >= 10400 // r124+ LZ4_streamDecode_t* stream = LZ4_createStreamDecode(); if (ctx.dict().size()) { @@ -845,17 +847,17 @@ inline char* LZ4_Uncompress(const UncompressionContext& ctx, static_cast(ctx.dict().size())); } *decompress_size = LZ4_decompress_safe_continue( - stream, input_data, output, static_cast(input_length), + stream, input_data, output.get(), static_cast(input_length), static_cast(output_len)); LZ4_freeStreamDecode(stream); #else // up to r123 - *decompress_size = - LZ4_decompress_safe(input_data, output, static_cast(input_length), - static_cast(output_len)); + *decompress_size = LZ4_decompress_safe(input_data, output.get(), + static_cast(input_length), + static_cast(output_len)); + (void)ctx; #endif // LZ4_VERSION_NUMBER >= 10400 if (*decompress_size < 0) { - delete[] output; return nullptr; } assert(*decompress_size == static_cast(output_len)); @@ -866,6 +868,7 @@ inline char* LZ4_Uncompress(const UncompressionContext& ctx, (void)input_length; (void)decompress_size; (void)compress_format_version; + (void)allocator; return nullptr; #endif } @@ -1028,9 +1031,10 @@ inline bool ZSTD_Compress(const CompressionContext& ctx, const char* input, // @param compression_dict Data for presetting the compression library's // dictionary. -inline char* ZSTD_Uncompress(const UncompressionContext& ctx, - const char* input_data, size_t input_length, - int* decompress_size) { +inline CacheAllocationPtr ZSTD_Uncompress( + const UncompressionContext& ctx, const char* input_data, + size_t input_length, int* decompress_size, + MemoryAllocator* allocator = nullptr) { #ifdef ZSTD uint32_t output_len = 0; if (!compression::GetDecompressedSizeInfo(&input_data, &input_length, @@ -1038,17 +1042,17 @@ inline char* ZSTD_Uncompress(const UncompressionContext& ctx, return nullptr; } - char* output = new char[output_len]; + auto output = AllocateBlock(output_len, allocator); size_t actual_output_length; #if ZSTD_VERSION_NUMBER >= 500 // v0.5.0+ ZSTD_DCtx* context = ctx.GetZSTDContext(); assert(context != nullptr); actual_output_length = ZSTD_decompress_usingDict( - context, output, output_len, input_data, input_length, ctx.dict().data(), - ctx.dict().size()); + context, output.get(), output_len, input_data, input_length, + ctx.dict().data(), ctx.dict().size()); #else // up to v0.4.x actual_output_length = - ZSTD_decompress(output, output_len, input_data, input_length); + ZSTD_decompress(output.get(), output_len, input_data, input_length); #endif // ZSTD_VERSION_NUMBER >= 500 assert(actual_output_length == output_len); *decompress_size = static_cast(actual_output_length); @@ -1058,6 +1062,7 @@ inline char* ZSTD_Uncompress(const UncompressionContext& ctx, (void)input_data; (void)input_length; (void)decompress_size; + (void)allocator; return nullptr; #endif } @@ -1065,9 +1070,10 @@ inline char* ZSTD_Uncompress(const UncompressionContext& ctx, inline std::string ZSTD_TrainDictionary(const std::string& samples, const std::vector& sample_lens, size_t max_dict_bytes) { - // Dictionary trainer is available since v0.6.1, but ZSTD was marked stable - // only since v0.8.0. For now we enable the feature in stable versions only. -#if ZSTD_VERSION_NUMBER >= 800 // v0.8.0+ + // Dictionary trainer is available since v0.6.1 for static linking, but not + // available for dynamic linking until v1.1.3. For now we enable the feature + // in v1.1.3+ only. +#if ZSTD_VERSION_NUMBER >= 10103 // v1.1.3+ std::string dict_data(max_dict_bytes, '\0'); size_t dict_len = ZDICT_trainFromBuffer( &dict_data[0], max_dict_bytes, &samples[0], &sample_lens[0], @@ -1078,13 +1084,13 @@ inline std::string ZSTD_TrainDictionary(const std::string& samples, assert(dict_len <= max_dict_bytes); dict_data.resize(dict_len); return dict_data; -#else // up to v0.7.x +#else // up to v1.1.2 assert(false); (void)samples; (void)sample_lens; (void)max_dict_bytes; return ""; -#endif // ZSTD_VERSION_NUMBER >= 800 +#endif // ZSTD_VERSION_NUMBER >= 10103 } inline std::string ZSTD_TrainDictionary(const std::string& samples, @@ -1092,18 +1098,18 @@ inline std::string ZSTD_TrainDictionary(const std::string& samples, size_t max_dict_bytes) { // Dictionary trainer is available since v0.6.1, but ZSTD was marked stable // only since v0.8.0. For now we enable the feature in stable versions only. -#if ZSTD_VERSION_NUMBER >= 800 // v0.8.0+ +#if ZSTD_VERSION_NUMBER >= 10103 // v1.1.3+ // skips potential partial sample at the end of "samples" size_t num_samples = samples.size() >> sample_len_shift; std::vector sample_lens(num_samples, size_t(1) << sample_len_shift); return ZSTD_TrainDictionary(samples, sample_lens, max_dict_bytes); -#else // up to v0.7.x +#else // up to v1.1.2 assert(false); (void)samples; (void)sample_len_shift; (void)max_dict_bytes; return ""; -#endif // ZSTD_VERSION_NUMBER >= 800 +#endif // ZSTD_VERSION_NUMBER >= 10103 } } // namespace rocksdb diff --git a/3rdParty/rocksdb/v5.16.X/util/compression_context_cache.cc b/3rdParty/rocksdb/v5.18.X/util/compression_context_cache.cc similarity index 100% rename from 3rdParty/rocksdb/v5.16.X/util/compression_context_cache.cc rename to 3rdParty/rocksdb/v5.18.X/util/compression_context_cache.cc diff --git a/3rdParty/rocksdb/v5.16.X/util/compression_context_cache.h b/3rdParty/rocksdb/v5.18.X/util/compression_context_cache.h similarity index 100% rename from 3rdParty/rocksdb/v5.16.X/util/compression_context_cache.h rename to 3rdParty/rocksdb/v5.18.X/util/compression_context_cache.h diff --git a/3rdParty/rocksdb/v5.16.X/util/concurrent_arena.cc b/3rdParty/rocksdb/v5.18.X/util/concurrent_arena.cc similarity index 100% rename from 3rdParty/rocksdb/v5.16.X/util/concurrent_arena.cc rename to 3rdParty/rocksdb/v5.18.X/util/concurrent_arena.cc diff --git a/3rdParty/rocksdb/v5.16.X/util/concurrent_arena.h b/3rdParty/rocksdb/v5.18.X/util/concurrent_arena.h similarity index 100% rename from 3rdParty/rocksdb/v5.16.X/util/concurrent_arena.h rename to 3rdParty/rocksdb/v5.18.X/util/concurrent_arena.h diff --git a/3rdParty/rocksdb/v5.16.X/util/core_local.h b/3rdParty/rocksdb/v5.18.X/util/core_local.h similarity index 100% rename from 3rdParty/rocksdb/v5.16.X/util/core_local.h rename to 3rdParty/rocksdb/v5.18.X/util/core_local.h diff --git a/3rdParty/rocksdb/v5.16.X/util/crc32c.cc b/3rdParty/rocksdb/v5.18.X/util/crc32c.cc similarity index 100% rename from 3rdParty/rocksdb/v5.16.X/util/crc32c.cc rename to 3rdParty/rocksdb/v5.18.X/util/crc32c.cc diff --git a/3rdParty/rocksdb/v5.16.X/util/crc32c.h b/3rdParty/rocksdb/v5.18.X/util/crc32c.h similarity index 100% rename from 3rdParty/rocksdb/v5.16.X/util/crc32c.h rename to 3rdParty/rocksdb/v5.18.X/util/crc32c.h diff --git a/3rdParty/rocksdb/v5.16.X/util/crc32c_ppc.c b/3rdParty/rocksdb/v5.18.X/util/crc32c_ppc.c similarity index 100% rename from 3rdParty/rocksdb/v5.16.X/util/crc32c_ppc.c rename to 3rdParty/rocksdb/v5.18.X/util/crc32c_ppc.c diff --git a/3rdParty/rocksdb/v5.16.X/util/crc32c_ppc.h b/3rdParty/rocksdb/v5.18.X/util/crc32c_ppc.h similarity index 92% rename from 3rdParty/rocksdb/v5.16.X/util/crc32c_ppc.h rename to 3rdParty/rocksdb/v5.18.X/util/crc32c_ppc.h index b52ad9b2a4..3bcaecfe82 100644 --- a/3rdParty/rocksdb/v5.16.X/util/crc32c_ppc.h +++ b/3rdParty/rocksdb/v5.18.X/util/crc32c_ppc.h @@ -6,8 +6,7 @@ // This source code is also licensed under the GPLv2 license found in the // COPYING file in the root directory of this source tree. -#ifndef CRC32C_PPC_H -#define CRC32C_PPC_H +#pragma once #ifdef __cplusplus extern "C" { @@ -19,5 +18,3 @@ extern uint32_t crc32c_ppc(uint32_t crc, unsigned char const *buffer, #ifdef __cplusplus } #endif - -#endif diff --git a/3rdParty/rocksdb/v5.16.X/util/crc32c_ppc_asm.S b/3rdParty/rocksdb/v5.18.X/util/crc32c_ppc_asm.S similarity index 100% rename from 3rdParty/rocksdb/v5.16.X/util/crc32c_ppc_asm.S rename to 3rdParty/rocksdb/v5.18.X/util/crc32c_ppc_asm.S diff --git a/3rdParty/rocksdb/v5.16.X/util/crc32c_ppc_constants.h b/3rdParty/rocksdb/v5.18.X/util/crc32c_ppc_constants.h similarity index 99% rename from 3rdParty/rocksdb/v5.16.X/util/crc32c_ppc_constants.h rename to 3rdParty/rocksdb/v5.18.X/util/crc32c_ppc_constants.h index 1206a957a7..57d6630322 100644 --- a/3rdParty/rocksdb/v5.16.X/util/crc32c_ppc_constants.h +++ b/3rdParty/rocksdb/v5.18.X/util/crc32c_ppc_constants.h @@ -5,8 +5,9 @@ // of patent rights can be found in the PATENTS file in the same directory. // This source code is also licensed under the GPLv2 license found in the // COPYING file in the root directory of this source tree. -#ifndef CRC32C_PPC_CONST_H -#define CRC32C_PPC_CONST_H + +#pragma once + #define CRC 0x1edc6f41 #define REFLECT #define CRC_XOR @@ -898,5 +899,3 @@ static const unsigned int crc_table[] = { /* 33 bit reflected Barrett constant n */ .octa 0x00000000000000000000000105ec76f1 #endif - -#endif diff --git a/3rdParty/rocksdb/v5.16.X/util/crc32c_test.cc b/3rdParty/rocksdb/v5.18.X/util/crc32c_test.cc similarity index 100% rename from 3rdParty/rocksdb/v5.16.X/util/crc32c_test.cc rename to 3rdParty/rocksdb/v5.18.X/util/crc32c_test.cc diff --git a/3rdParty/rocksdb/v5.16.X/util/delete_scheduler.cc b/3rdParty/rocksdb/v5.18.X/util/delete_scheduler.cc similarity index 99% rename from 3rdParty/rocksdb/v5.16.X/util/delete_scheduler.cc rename to 3rdParty/rocksdb/v5.18.X/util/delete_scheduler.cc index 1d51055a3b..a8078b94a1 100644 --- a/3rdParty/rocksdb/v5.16.X/util/delete_scheduler.cc +++ b/3rdParty/rocksdb/v5.18.X/util/delete_scheduler.cc @@ -275,7 +275,7 @@ Status DeleteScheduler::DeleteTrashFile(const std::string& path_in_trash, Status my_status = env_->NumFileLinks(path_in_trash, &num_hard_links); if (my_status.ok()) { if (num_hard_links == 1) { - unique_ptr wf; + std::unique_ptr wf; my_status = env_->ReopenWritableFile(path_in_trash, &wf, EnvOptions()); if (my_status.ok()) { diff --git a/3rdParty/rocksdb/v5.16.X/util/delete_scheduler.h b/3rdParty/rocksdb/v5.18.X/util/delete_scheduler.h similarity index 100% rename from 3rdParty/rocksdb/v5.16.X/util/delete_scheduler.h rename to 3rdParty/rocksdb/v5.18.X/util/delete_scheduler.h diff --git a/3rdParty/rocksdb/v5.16.X/util/delete_scheduler_test.cc b/3rdParty/rocksdb/v5.18.X/util/delete_scheduler_test.cc similarity index 99% rename from 3rdParty/rocksdb/v5.16.X/util/delete_scheduler_test.cc rename to 3rdParty/rocksdb/v5.18.X/util/delete_scheduler_test.cc index 6c33f03daa..bfd9954deb 100644 --- a/3rdParty/rocksdb/v5.16.X/util/delete_scheduler_test.cc +++ b/3rdParty/rocksdb/v5.18.X/util/delete_scheduler_test.cc @@ -89,7 +89,7 @@ class DeleteSchedulerTest : public testing::Test { std::string data(size, 'A'); EXPECT_OK(f->Append(data)); EXPECT_OK(f->Close()); - sst_file_mgr_->OnAddFile(file_path); + sst_file_mgr_->OnAddFile(file_path, false); return file_path; } diff --git a/3rdParty/rocksdb/v5.16.X/util/duplicate_detector.h b/3rdParty/rocksdb/v5.18.X/util/duplicate_detector.h similarity index 100% rename from 3rdParty/rocksdb/v5.16.X/util/duplicate_detector.h rename to 3rdParty/rocksdb/v5.18.X/util/duplicate_detector.h diff --git a/3rdParty/rocksdb/v5.16.X/util/dynamic_bloom.cc b/3rdParty/rocksdb/v5.18.X/util/dynamic_bloom.cc similarity index 100% rename from 3rdParty/rocksdb/v5.16.X/util/dynamic_bloom.cc rename to 3rdParty/rocksdb/v5.18.X/util/dynamic_bloom.cc diff --git a/3rdParty/rocksdb/v5.16.X/util/dynamic_bloom.h b/3rdParty/rocksdb/v5.18.X/util/dynamic_bloom.h similarity index 100% rename from 3rdParty/rocksdb/v5.16.X/util/dynamic_bloom.h rename to 3rdParty/rocksdb/v5.18.X/util/dynamic_bloom.h diff --git a/3rdParty/rocksdb/v5.16.X/util/dynamic_bloom_test.cc b/3rdParty/rocksdb/v5.18.X/util/dynamic_bloom_test.cc similarity index 100% rename from 3rdParty/rocksdb/v5.16.X/util/dynamic_bloom_test.cc rename to 3rdParty/rocksdb/v5.18.X/util/dynamic_bloom_test.cc diff --git a/3rdParty/rocksdb/v5.16.X/util/event_logger.cc b/3rdParty/rocksdb/v5.18.X/util/event_logger.cc similarity index 100% rename from 3rdParty/rocksdb/v5.16.X/util/event_logger.cc rename to 3rdParty/rocksdb/v5.18.X/util/event_logger.cc diff --git a/3rdParty/rocksdb/v5.16.X/util/event_logger.h b/3rdParty/rocksdb/v5.18.X/util/event_logger.h similarity index 100% rename from 3rdParty/rocksdb/v5.16.X/util/event_logger.h rename to 3rdParty/rocksdb/v5.18.X/util/event_logger.h diff --git a/3rdParty/rocksdb/v5.16.X/util/event_logger_test.cc b/3rdParty/rocksdb/v5.18.X/util/event_logger_test.cc similarity index 100% rename from 3rdParty/rocksdb/v5.16.X/util/event_logger_test.cc rename to 3rdParty/rocksdb/v5.18.X/util/event_logger_test.cc diff --git a/3rdParty/rocksdb/v5.16.X/util/fault_injection_test_env.cc b/3rdParty/rocksdb/v5.18.X/util/fault_injection_test_env.cc similarity index 84% rename from 3rdParty/rocksdb/v5.16.X/util/fault_injection_test_env.cc rename to 3rdParty/rocksdb/v5.18.X/util/fault_injection_test_env.cc index 46e1e0d773..64e9da1aac 100644 --- a/3rdParty/rocksdb/v5.16.X/util/fault_injection_test_env.cc +++ b/3rdParty/rocksdb/v5.18.X/util/fault_injection_test_env.cc @@ -29,12 +29,12 @@ std::string GetDirName(const std::string filename) { // A basic file truncation function suitable for this test. Status Truncate(Env* env, const std::string& filename, uint64_t length) { - unique_ptr orig_file; + std::unique_ptr orig_file; const EnvOptions options; Status s = env->NewSequentialFile(filename, &orig_file, options); if (!s.ok()) { - fprintf(stderr, "Cannot truncate file %s: %s\n", filename.c_str(), - s.ToString().c_str()); + fprintf(stderr, "Cannot open file %s for truncation: %s\n", + filename.c_str(), s.ToString().c_str()); return s; } @@ -46,7 +46,7 @@ Status Truncate(Env* env, const std::string& filename, uint64_t length) { #endif if (s.ok()) { std::string tmp_name = GetDirName(filename) + "/truncate.tmp"; - unique_ptr tmp_file; + std::unique_ptr tmp_file; s = env->NewWritableFile(tmp_name, &tmp_file, options); if (s.ok()) { s = tmp_file->Append(result); @@ -103,7 +103,7 @@ Status TestDirectory::Fsync() { } TestWritableFile::TestWritableFile(const std::string& fname, - unique_ptr&& f, + std::unique_ptr&& f, FaultInjectionTestEnv* env) : state_(fname), target_(std::move(f)), @@ -157,8 +157,8 @@ Status TestWritableFile::Sync() { } Status FaultInjectionTestEnv::NewDirectory(const std::string& name, - unique_ptr* result) { - unique_ptr r; + std::unique_ptr* result) { + std::unique_ptr r; Status s = target()->NewDirectory(name, &r); assert(s.ok()); if (!s.ok()) { @@ -168,9 +168,9 @@ Status FaultInjectionTestEnv::NewDirectory(const std::string& name, return Status::OK(); } -Status FaultInjectionTestEnv::NewWritableFile(const std::string& fname, - unique_ptr* result, - const EnvOptions& soptions) { +Status FaultInjectionTestEnv::NewWritableFile( + const std::string& fname, std::unique_ptr* result, + const EnvOptions& soptions) { if (!IsFilesystemActive()) { return GetError(); } @@ -197,6 +197,36 @@ Status FaultInjectionTestEnv::NewWritableFile(const std::string& fname, return s; } +Status FaultInjectionTestEnv::ReopenWritableFile( + const std::string& fname, std::unique_ptr* result, + const EnvOptions& soptions) { + if (!IsFilesystemActive()) { + return GetError(); + } + Status s = target()->ReopenWritableFile(fname, result, soptions); + if (s.ok()) { + result->reset(new TestWritableFile(fname, std::move(*result), this)); + // WritableFileWriter* file is opened + // again then it will be truncated - so forget our saved state. + UntrackFile(fname); + MutexLock l(&mutex_); + open_files_.insert(fname); + auto dir_and_name = GetDirAndName(fname); + auto& list = dir_to_new_files_since_last_sync_[dir_and_name.first]; + list.insert(dir_and_name.second); + } + return s; +} + +Status FaultInjectionTestEnv::NewRandomAccessFile( + const std::string& fname, std::unique_ptr* result, + const EnvOptions& soptions) { + if (!IsFilesystemActive()) { + return GetError(); + } + return target()->NewRandomAccessFile(fname, result, soptions); +} + Status FaultInjectionTestEnv::DeleteFile(const std::string& f) { if (!IsFilesystemActive()) { return GetError(); diff --git a/3rdParty/rocksdb/v5.16.X/util/fault_injection_test_env.h b/3rdParty/rocksdb/v5.18.X/util/fault_injection_test_env.h similarity index 83% rename from 3rdParty/rocksdb/v5.16.X/util/fault_injection_test_env.h rename to 3rdParty/rocksdb/v5.18.X/util/fault_injection_test_env.h index 1a62c619e8..d3775d3a3f 100644 --- a/3rdParty/rocksdb/v5.16.X/util/fault_injection_test_env.h +++ b/3rdParty/rocksdb/v5.18.X/util/fault_injection_test_env.h @@ -11,8 +11,7 @@ // the last "sync". It then checks for data loss errors by purposely dropping // file data (or entire files) not protected by a "sync". -#ifndef UTIL_FAULT_INJECTION_TEST_ENV_H_ -#define UTIL_FAULT_INJECTION_TEST_ENV_H_ +#pragma once #include #include @@ -57,7 +56,7 @@ struct FileState { class TestWritableFile : public WritableFile { public: explicit TestWritableFile(const std::string& fname, - unique_ptr&& f, + std::unique_ptr&& f, FaultInjectionTestEnv* env); virtual ~TestWritableFile(); virtual Status Append(const Slice& data) override; @@ -78,7 +77,7 @@ class TestWritableFile : public WritableFile { private: FileState state_; - unique_ptr target_; + std::unique_ptr target_; bool writable_file_opened_; FaultInjectionTestEnv* env_; }; @@ -95,7 +94,7 @@ class TestDirectory : public Directory { private: FaultInjectionTestEnv* env_; std::string dirname_; - unique_ptr dir_; + std::unique_ptr dir_; }; class FaultInjectionTestEnv : public EnvWrapper { @@ -105,17 +104,35 @@ class FaultInjectionTestEnv : public EnvWrapper { virtual ~FaultInjectionTestEnv() {} Status NewDirectory(const std::string& name, - unique_ptr* result) override; + std::unique_ptr* result) override; Status NewWritableFile(const std::string& fname, - unique_ptr* result, + std::unique_ptr* result, const EnvOptions& soptions) override; + Status ReopenWritableFile(const std::string& fname, + std::unique_ptr* result, + const EnvOptions& soptions) override; + + Status NewRandomAccessFile(const std::string& fname, + std::unique_ptr* result, + const EnvOptions& soptions) override; + virtual Status DeleteFile(const std::string& f) override; virtual Status RenameFile(const std::string& s, const std::string& t) override; + virtual Status GetFreeSpace(const std::string& path, + uint64_t* disk_free) override { + if (!IsFilesystemActive() && error_ == Status::NoSpace()) { + *disk_free = 0; + return Status::OK(); + } else { + return target()->GetFreeSpace(path, disk_free); + } + } + void WritableFileClosed(const FileState& state); // For every file that is not fully synced, make a call to `func` with @@ -171,5 +188,3 @@ class FaultInjectionTestEnv : public EnvWrapper { }; } // namespace rocksdb - -#endif // UTIL_FAULT_INJECTION_TEST_ENV_H_ diff --git a/3rdParty/rocksdb/v5.16.X/util/file_reader_writer.cc b/3rdParty/rocksdb/v5.18.X/util/file_reader_writer.cc similarity index 92% rename from 3rdParty/rocksdb/v5.16.X/util/file_reader_writer.cc rename to 3rdParty/rocksdb/v5.18.X/util/file_reader_writer.cc index f0bbb52829..821d657b0a 100644 --- a/3rdParty/rocksdb/v5.16.X/util/file_reader_writer.cc +++ b/3rdParty/rocksdb/v5.18.X/util/file_reader_writer.cc @@ -62,7 +62,7 @@ Status SequentialFileReader::Read(size_t n, Slice* result, char* scratch) { Status SequentialFileReader::Skip(uint64_t n) { #ifndef ROCKSDB_LITE if (use_direct_io()) { - offset_ += n; + offset_ += static_cast(n); return Status::OK(); } #endif // !ROCKSDB_LITE @@ -81,9 +81,9 @@ Status RandomAccessFileReader::Read(uint64_t offset, size_t n, Slice* result, if (use_direct_io()) { #ifndef ROCKSDB_LITE size_t alignment = file_->GetRequiredBufferAlignment(); - size_t aligned_offset = TruncateToPageBoundary(alignment, offset); - size_t offset_advance = offset - aligned_offset; - size_t read_size = Roundup(offset + n, alignment) - aligned_offset; + size_t aligned_offset = TruncateToPageBoundary(alignment, static_cast(offset)); + size_t offset_advance = static_cast(offset) - aligned_offset; + size_t read_size = Roundup(static_cast(offset + n), alignment) - aligned_offset; AlignedBuffer buf; buf.Alignment(alignment); buf.AllocateNewBuffer(read_size); @@ -98,8 +98,20 @@ Status RandomAccessFileReader::Read(uint64_t offset, size_t n, Slice* result, allowed = read_size; } Slice tmp; + + time_t start_ts = 0; + uint64_t orig_offset = 0; + if (ShouldNotifyListeners()) { + start_ts = std::chrono::system_clock::to_time_t( + std::chrono::system_clock::now()); + orig_offset = aligned_offset + buf.CurrentSize(); + } s = file_->Read(aligned_offset + buf.CurrentSize(), allowed, &tmp, buf.Destination()); + if (ShouldNotifyListeners()) { + NotifyOnFileReadFinish(orig_offset, tmp.size(), start_ts, s); + } + buf.Size(buf.CurrentSize() + tmp.size()); if (!s.ok() || tmp.size() < allowed) { break; @@ -131,7 +143,21 @@ Status RandomAccessFileReader::Read(uint64_t offset, size_t n, Slice* result, allowed = n; } Slice tmp_result; + +#ifndef ROCKSDB_LITE + time_t start_ts = 0; + if (ShouldNotifyListeners()) { + start_ts = std::chrono::system_clock::to_time_t( + std::chrono::system_clock::now()); + } +#endif s = file_->Read(offset + pos, allowed, &tmp_result, scratch + pos); +#ifndef ROCKSDB_LITE + if (ShouldNotifyListeners()) { + NotifyOnFileReadFinish(offset + pos, tmp_result.size(), start_ts, s); + } +#endif + if (res_scratch == nullptr) { // we can't simply use `scratch` because reads of mmap'd files return // data in a different buffer. @@ -414,7 +440,22 @@ Status WritableFileWriter::WriteBuffered(const char* data, size_t size) { { IOSTATS_TIMER_GUARD(write_nanos); TEST_SYNC_POINT("WritableFileWriter::Flush:BeforeAppend"); + +#ifndef ROCKSDB_LITE + time_t start_ts = 0; + uint64_t old_size = writable_file_->GetFileSize(); + if (ShouldNotifyListeners()) { + start_ts = std::chrono::system_clock::to_time_t( + std::chrono::system_clock::now()); + old_size = next_write_offset_; + } +#endif s = writable_file_->Append(Slice(src, allowed)); +#ifndef ROCKSDB_LITE + if (ShouldNotifyListeners()) { + NotifyOnFileWriteFinish(old_size, allowed, start_ts, s); + } +#endif if (!s.ok()) { return s; } @@ -477,8 +518,16 @@ Status WritableFileWriter::WriteDirect() { { IOSTATS_TIMER_GUARD(write_nanos); TEST_SYNC_POINT("WritableFileWriter::Flush:BeforeAppend"); + time_t start_ts(0); + if (ShouldNotifyListeners()) { + start_ts = std::chrono::system_clock::to_time_t( + std::chrono::system_clock::now()); + } // direct writes must be positional s = writable_file_->PositionedAppend(Slice(src, size), write_offset); + if (ShouldNotifyListeners()) { + NotifyOnFileWriteFinish(write_offset, size, start_ts, s); + } if (!s.ok()) { buf_.Size(file_advance + leftover_tail); return s; @@ -673,7 +722,7 @@ Status FilePrefetchBuffer::Prefetch(RandomAccessFileReader* reader, // Only a few requested bytes are in the buffer. memmove those chunk of // bytes to the beginning, and memcpy them back into the new buffer if a // new buffer is created. - chunk_offset_in_buffer = Rounddown(offset - buffer_offset_, alignment); + chunk_offset_in_buffer = Rounddown(static_cast(offset - buffer_offset_), alignment); chunk_len = buffer_.CurrentSize() - chunk_offset_in_buffer; assert(chunk_offset_in_buffer % alignment == 0); assert(chunk_len % alignment == 0); @@ -694,11 +743,11 @@ Status FilePrefetchBuffer::Prefetch(RandomAccessFileReader* reader, buffer_.Alignment(alignment); buffer_.AllocateNewBuffer(static_cast(roundup_len), copy_data_to_new_buffer, chunk_offset_in_buffer, - chunk_len); + static_cast(chunk_len)); } else if (chunk_len > 0) { // New buffer not needed. But memmove bytes from tail to the beginning since // chunk_len is greater than 0. - buffer_.RefitTail(chunk_offset_in_buffer, chunk_len); + buffer_.RefitTail(static_cast(chunk_offset_in_buffer), static_cast(chunk_len)); } Slice result; @@ -707,7 +756,7 @@ Status FilePrefetchBuffer::Prefetch(RandomAccessFileReader* reader, buffer_.BufferStart() + chunk_len); if (s.ok()) { buffer_offset_ = rounddown_offset; - buffer_.Size(chunk_len + result.size()); + buffer_.Size(static_cast(chunk_len) + result.size()); } return s; } @@ -715,7 +764,7 @@ Status FilePrefetchBuffer::Prefetch(RandomAccessFileReader* reader, bool FilePrefetchBuffer::TryReadFromCache(uint64_t offset, size_t n, Slice* result) { if (track_min_offset_ && offset < min_offset_read_) { - min_offset_read_ = offset; + min_offset_read_ = static_cast(offset); } if (!enable_ || offset < buffer_offset_) { return false; @@ -753,7 +802,7 @@ std::unique_ptr NewReadaheadRandomAccessFile( } Status NewWritableFile(Env* env, const std::string& fname, - unique_ptr* result, + std::unique_ptr* result, const EnvOptions& options) { Status s = env->NewWritableFile(fname, result, options); TEST_KILL_RANDOM("NewWritableFile:0", rocksdb_kill_odds * REDUCE_ODDS2); diff --git a/3rdParty/rocksdb/v5.16.X/util/file_reader_writer.h b/3rdParty/rocksdb/v5.18.X/util/file_reader_writer.h similarity index 72% rename from 3rdParty/rocksdb/v5.16.X/util/file_reader_writer.h rename to 3rdParty/rocksdb/v5.18.X/util/file_reader_writer.h index 93155fa3c5..ec7acebcc4 100644 --- a/3rdParty/rocksdb/v5.16.X/util/file_reader_writer.h +++ b/3rdParty/rocksdb/v5.18.X/util/file_reader_writer.h @@ -12,6 +12,7 @@ #include #include "port/port.h" #include "rocksdb/env.h" +#include "rocksdb/listener.h" #include "rocksdb/rate_limiter.h" #include "util/aligned_buffer.h" #include "util/sync_point.h" @@ -62,6 +63,26 @@ class SequentialFileReader { class RandomAccessFileReader { private: +#ifndef ROCKSDB_LITE + void NotifyOnFileReadFinish(uint64_t offset, size_t length, time_t start_ts, + const Status& status) const { + FileOperationInfo info(file_name_); + info.offset = offset; + info.length = length; + info.start_timestamp = start_ts; + time_t finish_ts = + std::chrono::system_clock::to_time_t(std::chrono::system_clock::now()); + info.finish_timestamp = finish_ts; + info.status = status; + + for (auto& listener : listeners_) { + listener->OnFileReadFinish(info); + } + } +#endif // ROCKSDB_LITE + + bool ShouldNotifyListeners() const { return !listeners_.empty(); } + std::unique_ptr file_; std::string file_name_; Env* env_; @@ -70,16 +91,15 @@ class RandomAccessFileReader { HistogramImpl* file_read_hist_; RateLimiter* rate_limiter_; bool for_compaction_; + std::vector> listeners_; public: - explicit RandomAccessFileReader(std::unique_ptr&& raf, - std::string _file_name, - Env* env = nullptr, - Statistics* stats = nullptr, - uint32_t hist_type = 0, - HistogramImpl* file_read_hist = nullptr, - RateLimiter* rate_limiter = nullptr, - bool for_compaction = false) + explicit RandomAccessFileReader( + std::unique_ptr&& raf, std::string _file_name, + Env* env = nullptr, Statistics* stats = nullptr, uint32_t hist_type = 0, + HistogramImpl* file_read_hist = nullptr, + RateLimiter* rate_limiter = nullptr, bool for_compaction = false, + const std::vector>& listeners = {}) : file_(std::move(raf)), file_name_(std::move(_file_name)), env_(env), @@ -87,7 +107,19 @@ class RandomAccessFileReader { hist_type_(hist_type), file_read_hist_(file_read_hist), rate_limiter_(rate_limiter), - for_compaction_(for_compaction) {} + for_compaction_(for_compaction), + listeners_() { +#ifndef ROCKSDB_LITE + std::for_each(listeners.begin(), listeners.end(), + [this](const std::shared_ptr& e) { + if (e->ShouldBeNotifiedOnFileIO()) { + listeners_.emplace_back(e); + } + }); +#else // !ROCKSDB_LITE + (void)listeners; +#endif + } RandomAccessFileReader(RandomAccessFileReader&& o) ROCKSDB_NOEXCEPT { *this = std::move(o); @@ -124,7 +156,28 @@ class RandomAccessFileReader { // Use posix write to write data to a file. class WritableFileWriter { private: +#ifndef ROCKSDB_LITE + void NotifyOnFileWriteFinish(uint64_t offset, size_t length, time_t start_ts, + const Status& status) { + FileOperationInfo info(file_name_); + info.offset = offset; + info.length = length; + info.start_timestamp = start_ts; + time_t finish_ts = + std::chrono::system_clock::to_time_t(std::chrono::system_clock::now()); + info.finish_timestamp = finish_ts; + info.status = status; + + for (auto& listener : listeners_) { + listener->OnFileWriteFinish(info); + } + } +#endif // ROCKSDB_LITE + + bool ShouldNotifyListeners() const { return !listeners_.empty(); } + std::unique_ptr writable_file_; + std::string file_name_; AlignedBuffer buf_; size_t max_buffer_size_; // Actually written data size can be used for truncate @@ -141,11 +194,15 @@ class WritableFileWriter { uint64_t bytes_per_sync_; RateLimiter* rate_limiter_; Statistics* stats_; + std::vector> listeners_; public: - WritableFileWriter(std::unique_ptr&& file, - const EnvOptions& options, Statistics* stats = nullptr) + WritableFileWriter( + std::unique_ptr&& file, const std::string& _file_name, + const EnvOptions& options, Statistics* stats = nullptr, + const std::vector>& listeners = {}) : writable_file_(std::move(file)), + file_name_(_file_name), buf_(), max_buffer_size_(options.writable_file_max_buffer_size), filesize_(0), @@ -156,11 +213,22 @@ class WritableFileWriter { last_sync_size_(0), bytes_per_sync_(options.bytes_per_sync), rate_limiter_(options.rate_limiter), - stats_(stats) { + stats_(stats), + listeners_() { TEST_SYNC_POINT_CALLBACK("WritableFileWriter::WritableFileWriter:0", reinterpret_cast(max_buffer_size_)); buf_.Alignment(writable_file_->GetRequiredBufferAlignment()); buf_.AllocateNewBuffer(std::min((size_t)65536, max_buffer_size_)); +#ifndef ROCKSDB_LITE + std::for_each(listeners.begin(), listeners.end(), + [this](const std::shared_ptr& e) { + if (e->ShouldBeNotifiedOnFileIO()) { + listeners_.emplace_back(e); + } + }); +#else // !ROCKSDB_LITE + (void)listeners; +#endif } WritableFileWriter(const WritableFileWriter&) = delete; @@ -169,6 +237,8 @@ class WritableFileWriter { ~WritableFileWriter() { Close(); } + std::string file_name() const { return file_name_; } + Status Append(const Slice& data); Status Pad(const size_t pad_bytes); @@ -249,7 +319,7 @@ class FilePrefetchBuffer { }; extern Status NewWritableFile(Env* env, const std::string& fname, - unique_ptr* result, + std::unique_ptr* result, const EnvOptions& options); bool ReadOneLine(std::istringstream* iss, SequentialFile* seq_file, std::string* output, bool* has_data, Status* result); diff --git a/3rdParty/rocksdb/v5.16.X/util/file_reader_writer_test.cc b/3rdParty/rocksdb/v5.18.X/util/file_reader_writer_test.cc similarity index 92% rename from 3rdParty/rocksdb/v5.16.X/util/file_reader_writer_test.cc rename to 3rdParty/rocksdb/v5.18.X/util/file_reader_writer_test.cc index 4425f87a0a..72dd625c1f 100644 --- a/3rdParty/rocksdb/v5.16.X/util/file_reader_writer_test.cc +++ b/3rdParty/rocksdb/v5.18.X/util/file_reader_writer_test.cc @@ -71,9 +71,9 @@ TEST_F(WritableFileWriterTest, RangeSync) { EnvOptions env_options; env_options.bytes_per_sync = kMb; - unique_ptr wf(new FakeWF); - unique_ptr writer( - new WritableFileWriter(std::move(wf), env_options)); + std::unique_ptr wf(new FakeWF); + std::unique_ptr writer( + new WritableFileWriter(std::move(wf), "" /* don't care */, env_options)); Random r(301); std::unique_ptr large_buf(new char[10 * kMb]); for (int i = 0; i < 1000; i++) { @@ -147,15 +147,15 @@ TEST_F(WritableFileWriterTest, IncrementalBuffer) { env_options.writable_file_max_buffer_size = (attempt < kNumAttempts / 2) ? 512 * 1024 : 700 * 1024; std::string actual; - unique_ptr wf(new FakeWF(&actual, + std::unique_ptr wf(new FakeWF(&actual, #ifndef ROCKSDB_LITE - attempt % 2 == 1, + attempt % 2 == 1, #else - false, + false, #endif - no_flush)); - unique_ptr writer( - new WritableFileWriter(std::move(wf), env_options)); + no_flush)); + std::unique_ptr writer(new WritableFileWriter( + std::move(wf), "" /* don't care */, env_options)); std::string target; for (int i = 0; i < 20; i++) { @@ -206,10 +206,10 @@ TEST_F(WritableFileWriterTest, AppendStatusReturn) { bool use_direct_io_; bool io_error_; }; - unique_ptr wf(new FakeWF()); + std::unique_ptr wf(new FakeWF()); wf->Setuse_direct_io(true); - unique_ptr writer( - new WritableFileWriter(std::move(wf), EnvOptions())); + std::unique_ptr writer( + new WritableFileWriter(std::move(wf), "" /* don't care */, EnvOptions())); ASSERT_OK(writer->Append(std::string(2 * kMb, 'a'))); @@ -238,8 +238,9 @@ class ReadaheadRandomAccessFileTest return std::string(result.data(), result.size()); } void ResetSourceStr(const std::string& str = "") { - auto write_holder = std::unique_ptr( - test::GetWritableFileWriter(new test::StringSink(&control_contents_))); + auto write_holder = + std::unique_ptr(test::GetWritableFileWriter( + new test::StringSink(&control_contents_), "" /* don't care */)); write_holder->Append(Slice(str)); write_holder->Flush(); auto read_holder = std::unique_ptr( diff --git a/3rdParty/rocksdb/v5.16.X/util/file_util.cc b/3rdParty/rocksdb/v5.18.X/util/file_util.cc similarity index 78% rename from 3rdParty/rocksdb/v5.16.X/util/file_util.cc rename to 3rdParty/rocksdb/v5.18.X/util/file_util.cc index ee8b3fb537..bf56592efc 100644 --- a/3rdParty/rocksdb/v5.16.X/util/file_util.cc +++ b/3rdParty/rocksdb/v5.18.X/util/file_util.cc @@ -19,16 +19,16 @@ Status CopyFile(Env* env, const std::string& source, const std::string& destination, uint64_t size, bool use_fsync) { const EnvOptions soptions; Status s; - unique_ptr src_reader; - unique_ptr dest_writer; + std::unique_ptr src_reader; + std::unique_ptr dest_writer; { - unique_ptr srcfile; + std::unique_ptr srcfile; s = env->NewSequentialFile(source, &srcfile, soptions); if (!s.ok()) { return s; } - unique_ptr destfile; + std::unique_ptr destfile; s = env->NewWritableFile(destination, &destfile, soptions); if (!s.ok()) { return s; @@ -42,7 +42,8 @@ Status CopyFile(Env* env, const std::string& source, } } src_reader.reset(new SequentialFileReader(std::move(srcfile), source)); - dest_writer.reset(new WritableFileWriter(std::move(destfile), soptions)); + dest_writer.reset( + new WritableFileWriter(std::move(destfile), destination, soptions)); } char buffer[4096]; @@ -67,18 +68,23 @@ Status CopyFile(Env* env, const std::string& source, // Utility function to create a file with the provided contents Status CreateFile(Env* env, const std::string& destination, - const std::string& contents) { + const std::string& contents, bool use_fsync) { const EnvOptions soptions; Status s; - unique_ptr dest_writer; + std::unique_ptr dest_writer; - unique_ptr destfile; + std::unique_ptr destfile; s = env->NewWritableFile(destination, &destfile, soptions); if (!s.ok()) { return s; } - dest_writer.reset(new WritableFileWriter(std::move(destfile), soptions)); - return dest_writer->Append(Slice(contents)); + dest_writer.reset( + new WritableFileWriter(std::move(destfile), destination, soptions)); + s = dest_writer->Append(Slice(contents)); + if (!s.ok()) { + return s; + } + return dest_writer->Sync(use_fsync); } Status DeleteSSTFile(const ImmutableDBOptions* db_options, diff --git a/3rdParty/rocksdb/v5.16.X/util/file_util.h b/3rdParty/rocksdb/v5.18.X/util/file_util.h similarity index 93% rename from 3rdParty/rocksdb/v5.16.X/util/file_util.h rename to 3rdParty/rocksdb/v5.18.X/util/file_util.h index 4df597275b..5c05c9def6 100644 --- a/3rdParty/rocksdb/v5.16.X/util/file_util.h +++ b/3rdParty/rocksdb/v5.18.X/util/file_util.h @@ -19,7 +19,7 @@ extern Status CopyFile(Env* env, const std::string& source, bool use_fsync); extern Status CreateFile(Env* env, const std::string& destination, - const std::string& contents); + const std::string& contents, bool use_fsync); extern Status DeleteSSTFile(const ImmutableDBOptions* db_options, const std::string& fname, diff --git a/3rdParty/rocksdb/v5.16.X/util/filelock_test.cc b/3rdParty/rocksdb/v5.18.X/util/filelock_test.cc similarity index 100% rename from 3rdParty/rocksdb/v5.16.X/util/filelock_test.cc rename to 3rdParty/rocksdb/v5.18.X/util/filelock_test.cc diff --git a/3rdParty/rocksdb/v5.16.X/util/filename.cc b/3rdParty/rocksdb/v5.18.X/util/filename.cc similarity index 98% rename from 3rdParty/rocksdb/v5.16.X/util/filename.cc rename to 3rdParty/rocksdb/v5.18.X/util/filename.cc index 759b22a16c..32289aecb4 100644 --- a/3rdParty/rocksdb/v5.16.X/util/filename.cc +++ b/3rdParty/rocksdb/v5.18.X/util/filename.cc @@ -80,6 +80,13 @@ std::string BlobFileName(const std::string& blobdirname, uint64_t number) { return MakeFileName(blobdirname, number, kRocksDBBlobFileExt.c_str()); } +std::string BlobFileName(const std::string& dbname, const std::string& blob_dir, + uint64_t number) { + assert(number > 0); + return MakeFileName(dbname + "/" + blob_dir, number, + kRocksDBBlobFileExt.c_str()); +} + std::string ArchivalDirectory(const std::string& dir) { return dir + "/" + ARCHIVAL_DIR; } diff --git a/3rdParty/rocksdb/v5.16.X/util/filename.h b/3rdParty/rocksdb/v5.18.X/util/filename.h similarity index 97% rename from 3rdParty/rocksdb/v5.16.X/util/filename.h rename to 3rdParty/rocksdb/v5.18.X/util/filename.h index 0d4bacf536..eea6b1b02f 100644 --- a/3rdParty/rocksdb/v5.16.X/util/filename.h +++ b/3rdParty/rocksdb/v5.18.X/util/filename.h @@ -49,6 +49,9 @@ extern std::string LogFileName(const std::string& dbname, uint64_t number); extern std::string BlobFileName(const std::string& bdirname, uint64_t number); +extern std::string BlobFileName(const std::string& dbname, + const std::string& blob_dir, uint64_t number); + static const std::string ARCHIVAL_DIR = "archive"; extern std::string ArchivalDirectory(const std::string& dbname); diff --git a/3rdParty/rocksdb/v5.16.X/util/filter_policy.cc b/3rdParty/rocksdb/v5.18.X/util/filter_policy.cc similarity index 100% rename from 3rdParty/rocksdb/v5.16.X/util/filter_policy.cc rename to 3rdParty/rocksdb/v5.18.X/util/filter_policy.cc diff --git a/3rdParty/rocksdb/v5.16.X/util/gflags_compat.h b/3rdParty/rocksdb/v5.18.X/util/gflags_compat.h similarity index 100% rename from 3rdParty/rocksdb/v5.16.X/util/gflags_compat.h rename to 3rdParty/rocksdb/v5.18.X/util/gflags_compat.h diff --git a/3rdParty/rocksdb/v5.16.X/util/hash.cc b/3rdParty/rocksdb/v5.18.X/util/hash.cc similarity index 100% rename from 3rdParty/rocksdb/v5.16.X/util/hash.cc rename to 3rdParty/rocksdb/v5.18.X/util/hash.cc diff --git a/3rdParty/rocksdb/v5.16.X/util/hash.h b/3rdParty/rocksdb/v5.18.X/util/hash.h similarity index 100% rename from 3rdParty/rocksdb/v5.16.X/util/hash.h rename to 3rdParty/rocksdb/v5.18.X/util/hash.h diff --git a/3rdParty/rocksdb/v5.16.X/util/hash_map.h b/3rdParty/rocksdb/v5.18.X/util/hash_map.h similarity index 100% rename from 3rdParty/rocksdb/v5.16.X/util/hash_map.h rename to 3rdParty/rocksdb/v5.18.X/util/hash_map.h diff --git a/3rdParty/rocksdb/v5.16.X/util/hash_test.cc b/3rdParty/rocksdb/v5.18.X/util/hash_test.cc similarity index 100% rename from 3rdParty/rocksdb/v5.16.X/util/hash_test.cc rename to 3rdParty/rocksdb/v5.18.X/util/hash_test.cc diff --git a/3rdParty/rocksdb/v5.16.X/util/heap.h b/3rdParty/rocksdb/v5.18.X/util/heap.h similarity index 98% rename from 3rdParty/rocksdb/v5.16.X/util/heap.h rename to 3rdParty/rocksdb/v5.18.X/util/heap.h index 4d5894134f..6093c20e2b 100644 --- a/3rdParty/rocksdb/v5.16.X/util/heap.h +++ b/3rdParty/rocksdb/v5.18.X/util/heap.h @@ -92,9 +92,9 @@ class BinaryHeap { reset_root_cmp_cache(); } - bool empty() const { - return data_.empty(); - } + bool empty() const { return data_.empty(); } + + size_t size() const { return data_.size(); } void reset_root_cmp_cache() { root_cmp_cache_ = port::kMaxSizet; } diff --git a/3rdParty/rocksdb/v5.16.X/util/heap_test.cc b/3rdParty/rocksdb/v5.18.X/util/heap_test.cc similarity index 100% rename from 3rdParty/rocksdb/v5.16.X/util/heap_test.cc rename to 3rdParty/rocksdb/v5.18.X/util/heap_test.cc diff --git a/3rdParty/rocksdb/v5.18.X/util/jemalloc_nodump_allocator.cc b/3rdParty/rocksdb/v5.18.X/util/jemalloc_nodump_allocator.cc new file mode 100644 index 0000000000..1db939b4f8 --- /dev/null +++ b/3rdParty/rocksdb/v5.18.X/util/jemalloc_nodump_allocator.cc @@ -0,0 +1,202 @@ +// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. +// This source code is licensed under both the GPLv2 (found in the +// COPYING file in the root directory) and Apache 2.0 License +// (found in the LICENSE.Apache file in the root directory). + +#include "util/jemalloc_nodump_allocator.h" + +#include +#include + +#include "port/likely.h" +#include "port/port.h" +#include "util/string_util.h" + +namespace rocksdb { + +#ifdef ROCKSDB_JEMALLOC_NODUMP_ALLOCATOR + +std::atomic JemallocNodumpAllocator::original_alloc_{nullptr}; + +JemallocNodumpAllocator::JemallocNodumpAllocator( + JemallocAllocatorOptions& options, + std::unique_ptr&& arena_hooks, unsigned arena_index) + : options_(options), + arena_hooks_(std::move(arena_hooks)), + arena_index_(arena_index), + tcache_(&JemallocNodumpAllocator::DestroyThreadSpecificCache) {} + +int JemallocNodumpAllocator::GetThreadSpecificCache(size_t size) { + // We always enable tcache. The only corner case is when there are a ton of + // threads accessing with low frequency, then it could consume a lot of + // memory (may reach # threads * ~1MB) without bringing too much benefit. + if (options_.limit_tcache_size && (size <= options_.tcache_size_lower_bound || + size > options_.tcache_size_upper_bound)) { + return MALLOCX_TCACHE_NONE; + } + unsigned* tcache_index = reinterpret_cast(tcache_.Get()); + if (UNLIKELY(tcache_index == nullptr)) { + // Instantiate tcache. + tcache_index = new unsigned(0); + size_t tcache_index_size = sizeof(unsigned); + int ret = + mallctl("tcache.create", tcache_index, &tcache_index_size, nullptr, 0); + if (ret != 0) { + // No good way to expose the error. Silently disable tcache. + delete tcache_index; + return MALLOCX_TCACHE_NONE; + } + tcache_.Reset(static_cast(tcache_index)); + } + return MALLOCX_TCACHE(*tcache_index); +} + +void* JemallocNodumpAllocator::Allocate(size_t size) { + int tcache_flag = GetThreadSpecificCache(size); + return mallocx(size, MALLOCX_ARENA(arena_index_) | tcache_flag); +} + +void JemallocNodumpAllocator::Deallocate(void* p) { + // Obtain tcache. + size_t size = 0; + if (options_.limit_tcache_size) { + size = malloc_usable_size(p); + } + int tcache_flag = GetThreadSpecificCache(size); + // No need to pass arena index to dallocx(). Jemalloc will find arena index + // from its own metadata. + dallocx(p, tcache_flag); +} + +void* JemallocNodumpAllocator::Alloc(extent_hooks_t* extent, void* new_addr, + size_t size, size_t alignment, bool* zero, + bool* commit, unsigned arena_ind) { + extent_alloc_t* original_alloc = + original_alloc_.load(std::memory_order_relaxed); + assert(original_alloc != nullptr); + void* result = original_alloc(extent, new_addr, size, alignment, zero, commit, + arena_ind); + if (result != nullptr) { + int ret = madvise(result, size, MADV_DONTDUMP); + if (ret != 0) { + fprintf( + stderr, + "JemallocNodumpAllocator failed to set MADV_DONTDUMP, error code: %d", + ret); + assert(false); + } + } + return result; +} + +Status JemallocNodumpAllocator::DestroyArena(unsigned arena_index) { + assert(arena_index != 0); + std::string key = "arena." + ToString(arena_index) + ".destroy"; + int ret = mallctl(key.c_str(), nullptr, 0, nullptr, 0); + if (ret != 0) { + return Status::Incomplete("Failed to destroy jemalloc arena, error code: " + + ToString(ret)); + } + return Status::OK(); +} + +void JemallocNodumpAllocator::DestroyThreadSpecificCache(void* ptr) { + assert(ptr != nullptr); + unsigned* tcache_index = static_cast(ptr); + size_t tcache_index_size = sizeof(unsigned); + int ret __attribute__((__unused__)) = + mallctl("tcache.destroy", nullptr, 0, tcache_index, tcache_index_size); + // Silently ignore error. + assert(ret == 0); + delete tcache_index; +} + +JemallocNodumpAllocator::~JemallocNodumpAllocator() { + // Destroy tcache before destroying arena. + autovector tcache_list; + tcache_.Scrape(&tcache_list, nullptr); + for (void* tcache_index : tcache_list) { + DestroyThreadSpecificCache(tcache_index); + } + // Destroy arena. Silently ignore error. + Status s __attribute__((__unused__)) = DestroyArena(arena_index_); + assert(s.ok()); +} + +size_t JemallocNodumpAllocator::UsableSize(void* p, + size_t /*allocation_size*/) const { + return malloc_usable_size(static_cast(p)); +} +#endif // ROCKSDB_JEMALLOC_NODUMP_ALLOCATOR + +Status NewJemallocNodumpAllocator( + JemallocAllocatorOptions& options, + std::shared_ptr* memory_allocator) { + *memory_allocator = nullptr; +#ifndef ROCKSDB_JEMALLOC_NODUMP_ALLOCATOR + (void) options; + return Status::NotSupported( + "JemallocNodumpAllocator only available with jemalloc version >= 5 " + "and MADV_DONTDUMP is available."); +#else + if (memory_allocator == nullptr) { + return Status::InvalidArgument("memory_allocator must be non-null."); + } + if (options.limit_tcache_size && + options.tcache_size_lower_bound >= options.tcache_size_upper_bound) { + return Status::InvalidArgument( + "tcache_size_lower_bound larger or equal to tcache_size_upper_bound."); + } + + // Create arena. + unsigned arena_index = 0; + size_t arena_index_size = sizeof(arena_index); + int ret = + mallctl("arenas.create", &arena_index, &arena_index_size, nullptr, 0); + if (ret != 0) { + return Status::Incomplete("Failed to create jemalloc arena, error code: " + + ToString(ret)); + } + assert(arena_index != 0); + + // Read existing hooks. + std::string key = "arena." + ToString(arena_index) + ".extent_hooks"; + extent_hooks_t* hooks; + size_t hooks_size = sizeof(hooks); + ret = mallctl(key.c_str(), &hooks, &hooks_size, nullptr, 0); + if (ret != 0) { + JemallocNodumpAllocator::DestroyArena(arena_index); + return Status::Incomplete("Failed to read existing hooks, error code: " + + ToString(ret)); + } + + // Store existing alloc. + extent_alloc_t* original_alloc = hooks->alloc; + extent_alloc_t* expected = nullptr; + bool success = + JemallocNodumpAllocator::original_alloc_.compare_exchange_strong( + expected, original_alloc); + if (!success && original_alloc != expected) { + JemallocNodumpAllocator::DestroyArena(arena_index); + return Status::Incomplete("Original alloc conflict."); + } + + // Set the custom hook. + std::unique_ptr new_hooks(new extent_hooks_t(*hooks)); + new_hooks->alloc = &JemallocNodumpAllocator::Alloc; + extent_hooks_t* hooks_ptr = new_hooks.get(); + ret = mallctl(key.c_str(), nullptr, nullptr, &hooks_ptr, sizeof(hooks_ptr)); + if (ret != 0) { + JemallocNodumpAllocator::DestroyArena(arena_index); + return Status::Incomplete("Failed to set custom hook, error code: " + + ToString(ret)); + } + + // Create cache allocator. + memory_allocator->reset( + new JemallocNodumpAllocator(options, std::move(new_hooks), arena_index)); + return Status::OK(); +#endif // ROCKSDB_JEMALLOC_NODUMP_ALLOCATOR +} + +} // namespace rocksdb diff --git a/3rdParty/rocksdb/v5.18.X/util/jemalloc_nodump_allocator.h b/3rdParty/rocksdb/v5.18.X/util/jemalloc_nodump_allocator.h new file mode 100644 index 0000000000..914088de18 --- /dev/null +++ b/3rdParty/rocksdb/v5.18.X/util/jemalloc_nodump_allocator.h @@ -0,0 +1,79 @@ +// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. +// This source code is licensed under both the GPLv2 (found in the +// COPYING file in the root directory) and Apache 2.0 License +// (found in the LICENSE.Apache file in the root directory). + +#pragma once + +#include +#include + +#include "port/port.h" +#include "rocksdb/memory_allocator.h" +#include "util/core_local.h" +#include "util/thread_local.h" + +#if defined(ROCKSDB_JEMALLOC) && defined(ROCKSDB_PLATFORM_POSIX) + +#include +#include + +#if (JEMALLOC_VERSION_MAJOR >= 5) && defined(MADV_DONTDUMP) +#define ROCKSDB_JEMALLOC_NODUMP_ALLOCATOR + +namespace rocksdb { + +class JemallocNodumpAllocator : public MemoryAllocator { + public: + JemallocNodumpAllocator(JemallocAllocatorOptions& options, + std::unique_ptr&& arena_hooks, + unsigned arena_index); + ~JemallocNodumpAllocator(); + + const char* Name() const override { return "JemallocNodumpAllocator"; } + void* Allocate(size_t size) override; + void Deallocate(void* p) override; + size_t UsableSize(void* p, size_t allocation_size) const override; + + private: + friend Status NewJemallocNodumpAllocator( + JemallocAllocatorOptions& options, + std::shared_ptr* memory_allocator); + + // Custom alloc hook to replace jemalloc default alloc. + static void* Alloc(extent_hooks_t* extent, void* new_addr, size_t size, + size_t alignment, bool* zero, bool* commit, + unsigned arena_ind); + + // Destroy arena on destruction of the allocator, or on failure. + static Status DestroyArena(unsigned arena_index); + + // Destroy tcache on destruction of the allocator, or thread exit. + static void DestroyThreadSpecificCache(void* ptr); + + // Get or create tcache. Return flag suitable to use with `mallocx`: + // either MALLOCX_TCACHE_NONE or MALLOCX_TCACHE(tc). + int GetThreadSpecificCache(size_t size); + + // A function pointer to jemalloc default alloc. Use atomic to make sure + // NewJemallocNodumpAllocator is thread-safe. + // + // Hack: original_alloc_ needs to be static for Alloc() to access it. + // alloc needs to be static to pass to jemalloc as function pointer. + static std::atomic original_alloc_; + + const JemallocAllocatorOptions options_; + + // Custom hooks has to outlive corresponding arena. + const std::unique_ptr arena_hooks_; + + // Arena index. + const unsigned arena_index_; + + // Hold thread-local tcache index. + ThreadLocalPtr tcache_; +}; + +} // namespace rocksdb +#endif // (JEMALLOC_VERSION_MAJOR >= 5) && MADV_DONTDUMP +#endif // ROCKSDB_JEMALLOC && ROCKSDB_PLATFORM_POSIX diff --git a/3rdParty/rocksdb/v5.16.X/util/kv_map.h b/3rdParty/rocksdb/v5.18.X/util/kv_map.h similarity index 100% rename from 3rdParty/rocksdb/v5.16.X/util/kv_map.h rename to 3rdParty/rocksdb/v5.18.X/util/kv_map.h diff --git a/3rdParty/rocksdb/v5.16.X/util/log_buffer.cc b/3rdParty/rocksdb/v5.18.X/util/log_buffer.cc similarity index 100% rename from 3rdParty/rocksdb/v5.16.X/util/log_buffer.cc rename to 3rdParty/rocksdb/v5.18.X/util/log_buffer.cc diff --git a/3rdParty/rocksdb/v5.16.X/util/log_buffer.h b/3rdParty/rocksdb/v5.18.X/util/log_buffer.h similarity index 100% rename from 3rdParty/rocksdb/v5.16.X/util/log_buffer.h rename to 3rdParty/rocksdb/v5.18.X/util/log_buffer.h diff --git a/3rdParty/rocksdb/v5.16.X/util/log_write_bench.cc b/3rdParty/rocksdb/v5.18.X/util/log_write_bench.cc similarity index 96% rename from 3rdParty/rocksdb/v5.16.X/util/log_write_bench.cc rename to 3rdParty/rocksdb/v5.18.X/util/log_write_bench.cc index b4e12b948c..5c9b3e84bf 100644 --- a/3rdParty/rocksdb/v5.16.X/util/log_write_bench.cc +++ b/3rdParty/rocksdb/v5.18.X/util/log_write_bench.cc @@ -35,9 +35,9 @@ void RunBenchmark() { Env* env = Env::Default(); EnvOptions env_options = env->OptimizeForLogWrite(EnvOptions()); env_options.bytes_per_sync = FLAGS_bytes_per_sync; - unique_ptr file; + std::unique_ptr file; env->NewWritableFile(file_name, &file, env_options); - unique_ptr writer; + std::unique_ptr writer; writer.reset(new WritableFileWriter(std::move(file), env_options)); std::string record; diff --git a/3rdParty/rocksdb/v5.18.X/util/logging.h b/3rdParty/rocksdb/v5.18.X/util/logging.h new file mode 100644 index 0000000000..f605d36a5a --- /dev/null +++ b/3rdParty/rocksdb/v5.18.X/util/logging.h @@ -0,0 +1,57 @@ +// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. +// This source code is licensed under both the GPLv2 (found in the +// COPYING file in the root directory) and Apache 2.0 License +// (found in the LICENSE.Apache file in the root directory). +// +// Copyright (c) 2011 The LevelDB Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. See the AUTHORS file for names of contributors. +// +// Must not be included from any .h files to avoid polluting the namespace +// with macros. + +#pragma once + +// Helper macros that include information about file name and line number +#define ROCKS_LOG_STRINGIFY(x) #x +#define ROCKS_LOG_TOSTRING(x) ROCKS_LOG_STRINGIFY(x) +#define ROCKS_LOG_PREPEND_FILE_LINE(FMT) ("[%s:" ROCKS_LOG_TOSTRING(__LINE__) "] " FMT) + +inline const char* RocksLogShorterFileName(const char* file) +{ + // 15 is the length of "util/logging.h". + // If the name of this file changed, please change this number, too. + return file + (sizeof(__FILE__) > 15 ? sizeof(__FILE__) - 15 : 0); +} + +// Don't inclide file/line info in HEADER level +#define ROCKS_LOG_HEADER(LGR, FMT, ...) \ + rocksdb::Log(InfoLogLevel::HEADER_LEVEL, LGR, FMT, ##__VA_ARGS__) + +#define ROCKS_LOG_DEBUG(LGR, FMT, ...) \ + rocksdb::Log(InfoLogLevel::DEBUG_LEVEL, LGR, ROCKS_LOG_PREPEND_FILE_LINE(FMT), \ + RocksLogShorterFileName(__FILE__), ##__VA_ARGS__) + +#define ROCKS_LOG_INFO(LGR, FMT, ...) \ + rocksdb::Log(InfoLogLevel::INFO_LEVEL, LGR, ROCKS_LOG_PREPEND_FILE_LINE(FMT), \ + RocksLogShorterFileName(__FILE__), ##__VA_ARGS__) + +#define ROCKS_LOG_WARN(LGR, FMT, ...) \ + rocksdb::Log(InfoLogLevel::WARN_LEVEL, LGR, ROCKS_LOG_PREPEND_FILE_LINE(FMT), \ + RocksLogShorterFileName(__FILE__), ##__VA_ARGS__) + +#define ROCKS_LOG_ERROR(LGR, FMT, ...) \ + rocksdb::Log(InfoLogLevel::ERROR_LEVEL, LGR, ROCKS_LOG_PREPEND_FILE_LINE(FMT), \ + RocksLogShorterFileName(__FILE__), ##__VA_ARGS__) + +#define ROCKS_LOG_FATAL(LGR, FMT, ...) \ + rocksdb::Log(InfoLogLevel::FATAL_LEVEL, LGR, ROCKS_LOG_PREPEND_FILE_LINE(FMT), \ + RocksLogShorterFileName(__FILE__), ##__VA_ARGS__) + +#define ROCKS_LOG_BUFFER(LOG_BUF, FMT, ...) \ + rocksdb::LogToBuffer(LOG_BUF, ROCKS_LOG_PREPEND_FILE_LINE(FMT), \ + RocksLogShorterFileName(__FILE__), ##__VA_ARGS__) + +#define ROCKS_LOG_BUFFER_MAX_SZ(LOG_BUF, MAX_LOG_SIZE, FMT, ...) \ + rocksdb::LogToBuffer(LOG_BUF, MAX_LOG_SIZE, ROCKS_LOG_PREPEND_FILE_LINE(FMT), \ + RocksLogShorterFileName(__FILE__), ##__VA_ARGS__) diff --git a/3rdParty/rocksdb/v5.18.X/util/memory_allocator.h b/3rdParty/rocksdb/v5.18.X/util/memory_allocator.h new file mode 100644 index 0000000000..99a7241d0a --- /dev/null +++ b/3rdParty/rocksdb/v5.18.X/util/memory_allocator.h @@ -0,0 +1,38 @@ +// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. +// This source code is licensed under both the GPLv2 (found in the +// COPYING file in the root directory) and Apache 2.0 License +// (found in the LICENSE.Apache file in the root directory). +// + +#pragma once + +#include "rocksdb/memory_allocator.h" + +namespace rocksdb { + +struct CustomDeleter { + CustomDeleter(MemoryAllocator* a = nullptr) : allocator(a) {} + + void operator()(char* ptr) const { + if (allocator) { + allocator->Deallocate(reinterpret_cast(ptr)); + } else { + delete[] ptr; + } + } + + MemoryAllocator* allocator; +}; + +using CacheAllocationPtr = std::unique_ptr; + +inline CacheAllocationPtr AllocateBlock(size_t size, + MemoryAllocator* allocator) { + if (allocator) { + auto block = reinterpret_cast(allocator->Allocate(size)); + return CacheAllocationPtr(block, allocator); + } + return CacheAllocationPtr(new char[size]); +} + +} // namespace rocksdb diff --git a/3rdParty/rocksdb/v5.16.X/util/memory_usage.h b/3rdParty/rocksdb/v5.18.X/util/memory_usage.h similarity index 100% rename from 3rdParty/rocksdb/v5.16.X/util/memory_usage.h rename to 3rdParty/rocksdb/v5.18.X/util/memory_usage.h diff --git a/3rdParty/rocksdb/v5.18.X/util/mock_time_env.h b/3rdParty/rocksdb/v5.18.X/util/mock_time_env.h new file mode 100644 index 0000000000..c6ab8a7483 --- /dev/null +++ b/3rdParty/rocksdb/v5.18.X/util/mock_time_env.h @@ -0,0 +1,43 @@ +// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. +// This source code is licensed under both the GPLv2 (found in the +// COPYING file in the root directory) and Apache 2.0 License +// (found in the LICENSE.Apache file in the root directory). + +#pragma once + +#include "rocksdb/env.h" + +namespace rocksdb { + +class MockTimeEnv : public EnvWrapper { + public: + explicit MockTimeEnv(Env* base) : EnvWrapper(base) {} + + virtual Status GetCurrentTime(int64_t* time) override { + assert(time != nullptr); + assert(current_time_ <= + static_cast(std::numeric_limits::max())); + *time = static_cast(current_time_); + return Status::OK(); + } + + virtual uint64_t NowMicros() override { + assert(current_time_ <= std::numeric_limits::max() / 1000000); + return current_time_ * 1000000; + } + + virtual uint64_t NowNanos() override { + assert(current_time_ <= std::numeric_limits::max() / 1000000000); + return current_time_ * 1000000000; + } + + void set_current_time(uint64_t time) { + assert(time >= current_time_); + current_time_ = time; + } + + private: + std::atomic current_time_{0}; +}; + +} // namespace rocksdb diff --git a/3rdParty/rocksdb/v5.16.X/util/murmurhash.cc b/3rdParty/rocksdb/v5.18.X/util/murmurhash.cc similarity index 100% rename from 3rdParty/rocksdb/v5.16.X/util/murmurhash.cc rename to 3rdParty/rocksdb/v5.18.X/util/murmurhash.cc diff --git a/3rdParty/rocksdb/v5.16.X/util/murmurhash.h b/3rdParty/rocksdb/v5.18.X/util/murmurhash.h similarity index 100% rename from 3rdParty/rocksdb/v5.16.X/util/murmurhash.h rename to 3rdParty/rocksdb/v5.18.X/util/murmurhash.h diff --git a/3rdParty/rocksdb/v5.16.X/util/mutexlock.h b/3rdParty/rocksdb/v5.18.X/util/mutexlock.h similarity index 100% rename from 3rdParty/rocksdb/v5.16.X/util/mutexlock.h rename to 3rdParty/rocksdb/v5.18.X/util/mutexlock.h diff --git a/3rdParty/rocksdb/v5.16.X/util/ppc-opcode.h b/3rdParty/rocksdb/v5.18.X/util/ppc-opcode.h similarity index 96% rename from 3rdParty/rocksdb/v5.16.X/util/ppc-opcode.h rename to 3rdParty/rocksdb/v5.18.X/util/ppc-opcode.h index eeb0ae08ff..554fa50a89 100644 --- a/3rdParty/rocksdb/v5.16.X/util/ppc-opcode.h +++ b/3rdParty/rocksdb/v5.18.X/util/ppc-opcode.h @@ -6,8 +6,7 @@ // This source code is also licensed under the GPLv2 license found in the // COPYING file in the root directory of this source tree. -#ifndef __OPCODES_H -#define __OPCODES_H +#pragma once #define __PPC_RA(a) (((a)&0x1f) << 16) #define __PPC_RB(b) (((b)&0x1f) << 11) @@ -27,5 +26,3 @@ #define VPMSUMD(t, a, b) .long PPC_INST_VPMSUMD | VSX_XX3((t), a, b) #define MFVRD(a, t) .long PPC_INST_MFVSRD | VSX_XX1((t) + 32, a, 0) #define MTVRD(t, a) .long PPC_INST_MTVSRD | VSX_XX1((t) + 32, a, 0) - -#endif diff --git a/3rdParty/rocksdb/v5.16.X/util/random.cc b/3rdParty/rocksdb/v5.18.X/util/random.cc similarity index 100% rename from 3rdParty/rocksdb/v5.16.X/util/random.cc rename to 3rdParty/rocksdb/v5.18.X/util/random.cc diff --git a/3rdParty/rocksdb/v5.16.X/util/random.h b/3rdParty/rocksdb/v5.18.X/util/random.h similarity index 100% rename from 3rdParty/rocksdb/v5.16.X/util/random.h rename to 3rdParty/rocksdb/v5.18.X/util/random.h diff --git a/3rdParty/rocksdb/v5.16.X/util/rate_limiter.cc b/3rdParty/rocksdb/v5.18.X/util/rate_limiter.cc similarity index 100% rename from 3rdParty/rocksdb/v5.16.X/util/rate_limiter.cc rename to 3rdParty/rocksdb/v5.18.X/util/rate_limiter.cc diff --git a/3rdParty/rocksdb/v5.16.X/util/rate_limiter.h b/3rdParty/rocksdb/v5.18.X/util/rate_limiter.h similarity index 100% rename from 3rdParty/rocksdb/v5.16.X/util/rate_limiter.h rename to 3rdParty/rocksdb/v5.18.X/util/rate_limiter.h diff --git a/3rdParty/rocksdb/v5.16.X/util/rate_limiter_test.cc b/3rdParty/rocksdb/v5.18.X/util/rate_limiter_test.cc similarity index 100% rename from 3rdParty/rocksdb/v5.16.X/util/rate_limiter_test.cc rename to 3rdParty/rocksdb/v5.18.X/util/rate_limiter_test.cc diff --git a/3rdParty/rocksdb/v5.18.X/util/repeatable_thread.h b/3rdParty/rocksdb/v5.18.X/util/repeatable_thread.h new file mode 100644 index 0000000000..3506234f9e --- /dev/null +++ b/3rdParty/rocksdb/v5.18.X/util/repeatable_thread.h @@ -0,0 +1,153 @@ +// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. +// This source code is licensed under both the GPLv2 (found in the +// COPYING file in the root directory) and Apache 2.0 License +// (found in the LICENSE.Apache file in the root directory). + +#pragma once + +#include +#include + +#include "port/port.h" +#include "rocksdb/env.h" +#include "util/mock_time_env.h" +#include "util/mutexlock.h" + +namespace rocksdb { + +class RepeatableThread { + public: + RepeatableThread(std::function function, + const std::string& thread_name, Env* env, uint64_t delay_us, + uint64_t initial_delay_us = 0) + : function_(function), + thread_name_("rocksdb:" + thread_name), + env_(env), + delay_us_(delay_us), + initial_delay_us_(initial_delay_us), + cond_var_(&mutex_), + running_(true), +#ifndef NDEBUG + waiting_(false), + run_count_(0), +#endif + thread_([this] { thread(); }) { + } + + void cancel() { + { + MutexLock l(&mutex_); + if (!running_) { + return; + } + running_ = false; + cond_var_.SignalAll(); + } + thread_.join(); + } + + ~RepeatableThread() { cancel(); } + +#ifndef NDEBUG + // Wait until RepeatableThread starting waiting, call the optional callback, + // then wait for one run of RepeatableThread. Tests can use provide a + // custom env object to mock time, and use the callback here to bump current + // time and trigger RepeatableThread. See repeatable_thread_test for example. + // + // Note: only support one caller of this method. + void TEST_WaitForRun(std::function callback = nullptr) { + MutexLock l(&mutex_); + while (!waiting_) { + cond_var_.Wait(); + } + uint64_t prev_count = run_count_; + if (callback != nullptr) { + callback(); + } + cond_var_.SignalAll(); + while (!(run_count_ > prev_count)) { + cond_var_.Wait(); + } + } +#endif + + private: + bool wait(uint64_t delay) { + MutexLock l(&mutex_); + if (running_ && delay > 0) { + uint64_t wait_until = env_->NowMicros() + delay; +#ifndef NDEBUG + waiting_ = true; + cond_var_.SignalAll(); +#endif + while (running_) { +#ifndef NDEBUG + if (dynamic_cast(env_) != nullptr) { + // MockTimeEnv is used. Since it is not easy to mock TimedWait, + // we wait without timeout to wait for TEST_WaitForRun to wake us up. + cond_var_.Wait(); + } else { + cond_var_.TimedWait(wait_until); + } +#else + cond_var_.TimedWait(wait_until); +#endif + if (env_->NowMicros() >= wait_until) { + break; + } + } +#ifndef NDEBUG + waiting_ = false; +#endif + } + return running_; + } + + void thread() { +#if defined(_GNU_SOURCE) && defined(__GLIBC_PREREQ) +#if __GLIBC_PREREQ(2, 12) + // Set thread name. + auto thread_handle = thread_.native_handle(); + int ret __attribute__((__unused__)) = + pthread_setname_np(thread_handle, thread_name_.c_str()); + assert(ret == 0); +#endif +#endif + + assert(delay_us_ > 0); + if (!wait(initial_delay_us_)) { + return; + } + do { + function_(); +#ifndef NDEBUG + { + MutexLock l(&mutex_); + run_count_++; + cond_var_.SignalAll(); + } +#endif + } while (wait(delay_us_)); + } + + const std::function function_; + const std::string thread_name_; + Env* const env_; + const uint64_t delay_us_; + const uint64_t initial_delay_us_; + + // Mutex lock should be held when accessing running_, waiting_ + // and run_count_. + port::Mutex mutex_; + port::CondVar cond_var_; + bool running_; +#ifndef NDEBUG + // RepeatableThread waiting for timeout. + bool waiting_; + // Times function_ had run. + uint64_t run_count_; +#endif + port::Thread thread_; +}; + +} // namespace rocksdb diff --git a/3rdParty/rocksdb/v5.18.X/util/repeatable_thread_test.cc b/3rdParty/rocksdb/v5.18.X/util/repeatable_thread_test.cc new file mode 100644 index 0000000000..dec437da32 --- /dev/null +++ b/3rdParty/rocksdb/v5.18.X/util/repeatable_thread_test.cc @@ -0,0 +1,76 @@ +// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. +// This source code is licensed under both the GPLv2 (found in the +// COPYING file in the root directory) and Apache 2.0 License +// (found in the LICENSE.Apache file in the root directory). + +#include +#include + +#include "db/db_test_util.h" +#include "util/repeatable_thread.h" +#include "util/testharness.h" + +class RepeatableThreadTest : public testing::Test { + public: + RepeatableThreadTest() + : mock_env_(new rocksdb::MockTimeEnv(rocksdb::Env::Default())) {} + + protected: + std::unique_ptr mock_env_; +}; + +TEST_F(RepeatableThreadTest, TimedTest) { + constexpr uint64_t kSecond = 1000000; // 1s = 1000000us + constexpr int kIteration = 3; + rocksdb::Env* env = rocksdb::Env::Default(); + rocksdb::port::Mutex mutex; + rocksdb::port::CondVar test_cv(&mutex); + int count = 0; + uint64_t prev_time = env->NowMicros(); + rocksdb::RepeatableThread thread( + [&] { + rocksdb::MutexLock l(&mutex); + count++; + uint64_t now = env->NowMicros(); + assert(count == 1 || prev_time + 1 * kSecond <= now); + prev_time = now; + if (count >= kIteration) { + test_cv.SignalAll(); + } + }, + "rt_test", env, 1 * kSecond); + // Wait for execution finish. + { + rocksdb::MutexLock l(&mutex); + while (count < kIteration) { + test_cv.Wait(); + } + } + + // Test cancel + thread.cancel(); +} + +TEST_F(RepeatableThreadTest, MockEnvTest) { + constexpr uint64_t kSecond = 1000000; // 1s = 1000000us + constexpr int kIteration = 3; + mock_env_->set_current_time(0); // in seconds + std::atomic count{0}; + rocksdb::RepeatableThread thread([&] { count++; }, "rt_test", mock_env_.get(), + 1 * kSecond, 1 * kSecond); + for (int i = 1; i <= kIteration; i++) { + // Bump current time + thread.TEST_WaitForRun([&] { mock_env_->set_current_time(i); }); + } + // Test function should be exectued exactly kIteraion times. + ASSERT_EQ(kIteration, count.load()); + + // Test cancel + thread.cancel(); +} + +int main(int argc, char** argv) { + ::testing::InitGoogleTest(&argc, argv); + + return RUN_ALL_TESTS(); +} diff --git a/3rdParty/rocksdb/v5.16.X/util/set_comparator.h b/3rdParty/rocksdb/v5.18.X/util/set_comparator.h similarity index 100% rename from 3rdParty/rocksdb/v5.16.X/util/set_comparator.h rename to 3rdParty/rocksdb/v5.18.X/util/set_comparator.h diff --git a/3rdParty/rocksdb/v5.16.X/util/slice.cc b/3rdParty/rocksdb/v5.18.X/util/slice.cc similarity index 100% rename from 3rdParty/rocksdb/v5.16.X/util/slice.cc rename to 3rdParty/rocksdb/v5.18.X/util/slice.cc diff --git a/3rdParty/rocksdb/v5.16.X/util/slice_transform_test.cc b/3rdParty/rocksdb/v5.18.X/util/slice_transform_test.cc similarity index 97% rename from 3rdParty/rocksdb/v5.16.X/util/slice_transform_test.cc rename to 3rdParty/rocksdb/v5.18.X/util/slice_transform_test.cc index ddbb9f4bfa..2eb56af6d6 100644 --- a/3rdParty/rocksdb/v5.16.X/util/slice_transform_test.cc +++ b/3rdParty/rocksdb/v5.18.X/util/slice_transform_test.cc @@ -24,7 +24,7 @@ TEST_F(SliceTransformTest, CapPrefixTransform) { std::string s; s = "abcdefge"; - unique_ptr transform; + std::unique_ptr transform; transform.reset(NewCappedPrefixTransform(6)); ASSERT_EQ(transform->Transform(s).ToString(), "abcdef"); @@ -115,7 +115,7 @@ TEST_F(SliceTransformDBTest, CapPrefix) { ASSERT_OK(db()->Put(wo, "foo3", "bar3")); ASSERT_OK(db()->Flush(fo)); - unique_ptr iter(db()->NewIterator(ro)); + std::unique_ptr iter(db()->NewIterator(ro)); iter->Seek("foo"); ASSERT_OK(iter->status()); diff --git a/3rdParty/rocksdb/v5.18.X/util/sst_file_manager_impl.cc b/3rdParty/rocksdb/v5.18.X/util/sst_file_manager_impl.cc new file mode 100644 index 0000000000..ee1394bc91 --- /dev/null +++ b/3rdParty/rocksdb/v5.18.X/util/sst_file_manager_impl.cc @@ -0,0 +1,511 @@ +// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. +// This source code is licensed under both the GPLv2 (found in the +// COPYING file in the root directory) and Apache 2.0 License +// (found in the LICENSE.Apache file in the root directory). + +#include "util/sst_file_manager_impl.h" + +#include + +#include "db/db_impl.h" +#include "port/port.h" +#include "rocksdb/env.h" +#include "rocksdb/sst_file_manager.h" +#include "util/mutexlock.h" +#include "util/sync_point.h" + +namespace rocksdb { + +#ifndef ROCKSDB_LITE +SstFileManagerImpl::SstFileManagerImpl(Env* env, std::shared_ptr logger, + int64_t rate_bytes_per_sec, + double max_trash_db_ratio, + uint64_t bytes_max_delete_chunk) + : env_(env), + logger_(logger), + total_files_size_(0), + in_progress_files_size_(0), + compaction_buffer_size_(0), + cur_compactions_reserved_size_(0), + max_allowed_space_(0), + delete_scheduler_(env, rate_bytes_per_sec, logger.get(), this, + max_trash_db_ratio, bytes_max_delete_chunk), + cv_(&mu_), + closing_(false), + bg_thread_(nullptr), + reserved_disk_buffer_(0), + free_space_trigger_(0), + cur_instance_(nullptr) { +} + +SstFileManagerImpl::~SstFileManagerImpl() { + Close(); +} + +void SstFileManagerImpl::Close() { + { + MutexLock l(&mu_); + if (closing_) { + return; + } + closing_ = true; + cv_.SignalAll(); + } + if (bg_thread_) { + bg_thread_->join(); + } +} + +Status SstFileManagerImpl::OnAddFile(const std::string& file_path, + bool compaction) { + uint64_t file_size; + Status s = env_->GetFileSize(file_path, &file_size); + if (s.ok()) { + MutexLock l(&mu_); + OnAddFileImpl(file_path, file_size, compaction); + } + TEST_SYNC_POINT("SstFileManagerImpl::OnAddFile"); + return s; +} + +Status SstFileManagerImpl::OnDeleteFile(const std::string& file_path) { + { + MutexLock l(&mu_); + OnDeleteFileImpl(file_path); + } + TEST_SYNC_POINT("SstFileManagerImpl::OnDeleteFile"); + return Status::OK(); +} + +void SstFileManagerImpl::OnCompactionCompletion(Compaction* c) { + MutexLock l(&mu_); + uint64_t size_added_by_compaction = 0; + for (size_t i = 0; i < c->num_input_levels(); i++) { + for (size_t j = 0; j < c->num_input_files(i); j++) { + FileMetaData* filemeta = c->input(i, j); + size_added_by_compaction += filemeta->fd.GetFileSize(); + } + } + cur_compactions_reserved_size_ -= size_added_by_compaction; + + auto new_files = c->edit()->GetNewFiles(); + for (auto& new_file : new_files) { + auto fn = TableFileName(c->immutable_cf_options()->cf_paths, + new_file.second.fd.GetNumber(), + new_file.second.fd.GetPathId()); + if (in_progress_files_.find(fn) != in_progress_files_.end()) { + auto tracked_file = tracked_files_.find(fn); + assert(tracked_file != tracked_files_.end()); + in_progress_files_size_ -= tracked_file->second; + in_progress_files_.erase(fn); + } + } +} + +Status SstFileManagerImpl::OnMoveFile(const std::string& old_path, + const std::string& new_path, + uint64_t* file_size) { + { + MutexLock l(&mu_); + if (file_size != nullptr) { + *file_size = tracked_files_[old_path]; + } + OnAddFileImpl(new_path, tracked_files_[old_path], false); + OnDeleteFileImpl(old_path); + } + TEST_SYNC_POINT("SstFileManagerImpl::OnMoveFile"); + return Status::OK(); +} + +void SstFileManagerImpl::SetMaxAllowedSpaceUsage(uint64_t max_allowed_space) { + MutexLock l(&mu_); + max_allowed_space_ = max_allowed_space; +} + +void SstFileManagerImpl::SetCompactionBufferSize( + uint64_t compaction_buffer_size) { + MutexLock l(&mu_); + compaction_buffer_size_ = compaction_buffer_size; +} + +bool SstFileManagerImpl::IsMaxAllowedSpaceReached() { + MutexLock l(&mu_); + if (max_allowed_space_ <= 0) { + return false; + } + return total_files_size_ >= max_allowed_space_; +} + +bool SstFileManagerImpl::IsMaxAllowedSpaceReachedIncludingCompactions() { + MutexLock l(&mu_); + if (max_allowed_space_ <= 0) { + return false; + } + return total_files_size_ + cur_compactions_reserved_size_ >= + max_allowed_space_; +} + +bool SstFileManagerImpl::EnoughRoomForCompaction( + ColumnFamilyData* cfd, const std::vector& inputs, + Status bg_error) { + MutexLock l(&mu_); + uint64_t size_added_by_compaction = 0; + // First check if we even have the space to do the compaction + for (size_t i = 0; i < inputs.size(); i++) { + for (size_t j = 0; j < inputs[i].size(); j++) { + FileMetaData* filemeta = inputs[i][j]; + size_added_by_compaction += filemeta->fd.GetFileSize(); + } + } + + // Update cur_compactions_reserved_size_ so concurrent compaction + // don't max out space + size_t needed_headroom = + cur_compactions_reserved_size_ + size_added_by_compaction + + compaction_buffer_size_; + if (max_allowed_space_ != 0 && + (needed_headroom + total_files_size_ > max_allowed_space_)) { + return false; + } + + // Implement more aggressive checks only if this DB instance has already + // seen a NoSpace() error. This is tin order to contain a single potentially + // misbehaving DB instance and prevent it from slowing down compactions of + // other DB instances + if (CheckFreeSpace() && bg_error == Status::NoSpace()) { + auto fn = + TableFileName(cfd->ioptions()->cf_paths, inputs[0][0]->fd.GetNumber(), + inputs[0][0]->fd.GetPathId()); + uint64_t free_space = 0; + env_->GetFreeSpace(fn, &free_space); + // needed_headroom is based on current size reserved by compactions, + // minus any files created by running compactions as they would count + // against the reserved size. If user didn't specify any compaction + // buffer, add reserved_disk_buffer_ that's calculated by default so the + // compaction doesn't end up leaving nothing for logs and flush SSTs + if (compaction_buffer_size_ == 0) { + needed_headroom += reserved_disk_buffer_; + } + needed_headroom -= in_progress_files_size_; + if (free_space < needed_headroom + size_added_by_compaction) { + // We hit the condition of not enough disk space + ROCKS_LOG_ERROR(logger_, "free space [%d bytes] is less than " + "needed headroom [%d bytes]\n", free_space, needed_headroom); + return false; + } + } + + cur_compactions_reserved_size_ += size_added_by_compaction; + // Take a snapshot of cur_compactions_reserved_size_ for when we encounter + // a NoSpace error. + free_space_trigger_ = cur_compactions_reserved_size_; + return true; +} + +uint64_t SstFileManagerImpl::GetCompactionsReservedSize() { + MutexLock l(&mu_); + return cur_compactions_reserved_size_; +} + +uint64_t SstFileManagerImpl::GetTotalSize() { + MutexLock l(&mu_); + return total_files_size_; +} + +std::unordered_map +SstFileManagerImpl::GetTrackedFiles() { + MutexLock l(&mu_); + return tracked_files_; +} + +int64_t SstFileManagerImpl::GetDeleteRateBytesPerSecond() { + return delete_scheduler_.GetRateBytesPerSecond(); +} + +void SstFileManagerImpl::SetDeleteRateBytesPerSecond(int64_t delete_rate) { + return delete_scheduler_.SetRateBytesPerSecond(delete_rate); +} + +double SstFileManagerImpl::GetMaxTrashDBRatio() { + return delete_scheduler_.GetMaxTrashDBRatio(); +} + +void SstFileManagerImpl::SetMaxTrashDBRatio(double r) { + return delete_scheduler_.SetMaxTrashDBRatio(r); +} + +uint64_t SstFileManagerImpl::GetTotalTrashSize() { + return delete_scheduler_.GetTotalTrashSize(); +} + +void SstFileManagerImpl::ReserveDiskBuffer(uint64_t size, + const std::string& path) { + MutexLock l(&mu_); + + reserved_disk_buffer_ += size; + if (path_.empty()) { + path_ = path; + } +} + +void SstFileManagerImpl::ClearError() { + while (true) { + MutexLock l(&mu_); + + if (closing_) { + return; + } + + uint64_t free_space; + Status s = env_->GetFreeSpace(path_, &free_space); + if (s.ok()) { + // In case of multi-DB instances, some of them may have experienced a + // soft error and some a hard error. In the SstFileManagerImpl, a hard + // error will basically override previously reported soft errors. Once + // we clear the hard error, we don't keep track of previous errors for + // now + if (bg_err_.severity() == Status::Severity::kHardError) { + if (free_space < reserved_disk_buffer_) { + ROCKS_LOG_ERROR(logger_, "free space [%d bytes] is less than " + "required disk buffer [%d bytes]\n", free_space, + reserved_disk_buffer_); + ROCKS_LOG_ERROR(logger_, "Cannot clear hard error\n"); + s = Status::NoSpace(); + } + } else if (bg_err_.severity() == Status::Severity::kSoftError) { + if (free_space < free_space_trigger_) { + ROCKS_LOG_WARN(logger_, "free space [%d bytes] is less than " + "free space for compaction trigger [%d bytes]\n", free_space, + free_space_trigger_); + ROCKS_LOG_WARN(logger_, "Cannot clear soft error\n"); + s = Status::NoSpace(); + } + } + } + + // Someone could have called CancelErrorRecovery() and the list could have + // become empty, so check again here + if (s.ok() && !error_handler_list_.empty()) { + auto error_handler = error_handler_list_.front(); + // Since we will release the mutex, set cur_instance_ to signal to the + // shutdown thread, if it calls // CancelErrorRecovery() the meantime, + // to indicate that this DB instance is busy. The DB instance is + // guaranteed to not be deleted before RecoverFromBGError() returns, + // since the ErrorHandler::recovery_in_prog_ flag would be true + cur_instance_ = error_handler; + mu_.Unlock(); + s = error_handler->RecoverFromBGError(); + mu_.Lock(); + // The DB instance might have been deleted while we were + // waiting for the mutex, so check cur_instance_ to make sure its + // still non-null + if (cur_instance_) { + // Check for error again, since the instance may have recovered but + // immediately got another error. If that's the case, and the new + // error is also a NoSpace() non-fatal error, leave the instance in + // the list + Status err = cur_instance_->GetBGError(); + if (s.ok() && err == Status::NoSpace() && + err.severity() < Status::Severity::kFatalError) { + s = err; + } + cur_instance_ = nullptr; + } + + if (s.ok() || s.IsShutdownInProgress() || + (!s.ok() && s.severity() >= Status::Severity::kFatalError)) { + // If shutdown is in progress, abandon this handler instance + // and continue with the others + error_handler_list_.pop_front(); + } + } + + if (!error_handler_list_.empty()) { + // If there are more instances to be recovered, reschedule after 5 + // seconds + int64_t wait_until = env_->NowMicros() + 5000000; + cv_.TimedWait(wait_until); + } + + // Check again for error_handler_list_ empty, as a DB instance shutdown + // could have removed it from the queue while we were in timed wait + if (error_handler_list_.empty()) { + ROCKS_LOG_INFO(logger_, "Clearing error\n"); + bg_err_ = Status::OK(); + return; + } + } +} + +void SstFileManagerImpl::StartErrorRecovery(ErrorHandler* handler, + Status bg_error) { + MutexLock l(&mu_); + if (bg_error.severity() == Status::Severity::kSoftError) { + if (bg_err_.ok()) { + // Setting bg_err_ basically means we're in degraded mode + // Assume that all pending compactions will fail similarly. The trigger + // for clearing this condition is set to current compaction reserved + // size, so we stop checking disk space available in + // EnoughRoomForCompaction once this much free space is available + bg_err_ = bg_error; + } + } else if (bg_error.severity() == Status::Severity::kHardError) { + bg_err_ = bg_error; + } else { + assert(false); + } + + // If this is the first instance of this error, kick of a thread to poll + // and recover from this condition + if (error_handler_list_.empty()) { + error_handler_list_.push_back(handler); + // Release lock before calling join. Its ok to do so because + // error_handler_list_ is now non-empty, so no other invocation of this + // function will execute this piece of code + mu_.Unlock(); + if (bg_thread_) { + bg_thread_->join(); + } + // Start a new thread. The previous one would have exited. + bg_thread_.reset(new port::Thread(&SstFileManagerImpl::ClearError, this)); + mu_.Lock(); + } else { + // Check if this DB instance is already in the list + for (auto iter = error_handler_list_.begin(); + iter != error_handler_list_.end(); ++iter) { + if ((*iter) == handler) { + return; + } + } + error_handler_list_.push_back(handler); + } +} + +bool SstFileManagerImpl::CancelErrorRecovery(ErrorHandler* handler) { + MutexLock l(&mu_); + + if (cur_instance_ == handler) { + // This instance is currently busy attempting to recover + // Nullify it so the recovery thread doesn't attempt to access it again + cur_instance_ = nullptr; + return false; + } + + for (auto iter = error_handler_list_.begin(); + iter != error_handler_list_.end(); ++iter) { + if ((*iter) == handler) { + error_handler_list_.erase(iter); + return true; + } + } + return false; +} + +Status SstFileManagerImpl::ScheduleFileDeletion( + const std::string& file_path, const std::string& path_to_sync) { + return delete_scheduler_.DeleteFile(file_path, path_to_sync); +} + +void SstFileManagerImpl::WaitForEmptyTrash() { + delete_scheduler_.WaitForEmptyTrash(); +} + +void SstFileManagerImpl::OnAddFileImpl(const std::string& file_path, + uint64_t file_size, bool compaction) { + auto tracked_file = tracked_files_.find(file_path); + if (tracked_file != tracked_files_.end()) { + // File was added before, we will just update the size + assert(!compaction); + total_files_size_ -= tracked_file->second; + total_files_size_ += file_size; + cur_compactions_reserved_size_ -= file_size; + } else { + total_files_size_ += file_size; + if (compaction) { + // Keep track of the size of files created by in-progress compactions. + // When calculating whether there's enough headroom for new compactions, + // this will be subtracted from cur_compactions_reserved_size_. + // Otherwise, compactions will be double counted. + in_progress_files_size_ += file_size; + in_progress_files_.insert(file_path); + } + } + tracked_files_[file_path] = file_size; +} + +void SstFileManagerImpl::OnDeleteFileImpl(const std::string& file_path) { + auto tracked_file = tracked_files_.find(file_path); + if (tracked_file == tracked_files_.end()) { + // File is not tracked + assert(in_progress_files_.find(file_path) == in_progress_files_.end()); + return; + } + + total_files_size_ -= tracked_file->second; + // Check if it belonged to an in-progress compaction + if (in_progress_files_.find(file_path) != in_progress_files_.end()) { + in_progress_files_size_ -= tracked_file->second; + in_progress_files_.erase(file_path); + } + tracked_files_.erase(tracked_file); +} + +SstFileManager* NewSstFileManager(Env* env, std::shared_ptr info_log, + std::string trash_dir, + int64_t rate_bytes_per_sec, + bool delete_existing_trash, Status* status, + double max_trash_db_ratio, + uint64_t bytes_max_delete_chunk) { + SstFileManagerImpl* res = + new SstFileManagerImpl(env, info_log, rate_bytes_per_sec, + max_trash_db_ratio, bytes_max_delete_chunk); + + // trash_dir is deprecated and not needed anymore, but if user passed it + // we will still remove files in it. + Status s; + if (delete_existing_trash && trash_dir != "") { + std::vector files_in_trash; + s = env->GetChildren(trash_dir, &files_in_trash); + if (s.ok()) { + for (const std::string& trash_file : files_in_trash) { + if (trash_file == "." || trash_file == "..") { + continue; + } + + std::string path_in_trash = trash_dir + "/" + trash_file; + res->OnAddFile(path_in_trash); + Status file_delete = + res->ScheduleFileDeletion(path_in_trash, trash_dir); + if (s.ok() && !file_delete.ok()) { + s = file_delete; + } + } + } + } + + if (status) { + *status = s; + } + + return res; +} + +#else + +SstFileManager* NewSstFileManager(Env* /*env*/, + std::shared_ptr /*info_log*/, + std::string /*trash_dir*/, + int64_t /*rate_bytes_per_sec*/, + bool /*delete_existing_trash*/, + Status* status, double /*max_trash_db_ratio*/, + uint64_t /*bytes_max_delete_chunk*/) { + if (status) { + *status = + Status::NotSupported("SstFileManager is not supported in ROCKSDB_LITE"); + } + return nullptr; +} + +#endif // ROCKSDB_LITE + +} // namespace rocksdb diff --git a/3rdParty/rocksdb/v5.16.X/util/sst_file_manager_impl.h b/3rdParty/rocksdb/v5.18.X/util/sst_file_manager_impl.h similarity index 67% rename from 3rdParty/rocksdb/v5.16.X/util/sst_file_manager_impl.h rename to 3rdParty/rocksdb/v5.18.X/util/sst_file_manager_impl.h index 90815d44f2..d11035df80 100644 --- a/3rdParty/rocksdb/v5.16.X/util/sst_file_manager_impl.h +++ b/3rdParty/rocksdb/v5.18.X/util/sst_file_manager_impl.h @@ -12,6 +12,7 @@ #include "port/port.h" #include "db/compaction.h" +#include "db/error_handler.h" #include "rocksdb/sst_file_manager.h" #include "util/delete_scheduler.h" @@ -33,7 +34,7 @@ class SstFileManagerImpl : public SstFileManager { ~SstFileManagerImpl(); // DB will call OnAddFile whenever a new sst file is added. - Status OnAddFile(const std::string& file_path); + Status OnAddFile(const std::string& file_path, bool compaction = false); // DB will call OnDeleteFile whenever an sst file is deleted. Status OnDeleteFile(const std::string& file_path); @@ -67,7 +68,9 @@ class SstFileManagerImpl : public SstFileManager { // estimates how much space is currently being used by compactions (i.e. // if a compaction has started, this function bumps the used space by // the full compaction size). - bool EnoughRoomForCompaction(const std::vector& inputs); + bool EnoughRoomForCompaction(ColumnFamilyData* cfd, + const std::vector& inputs, + Status bg_error); // Bookkeeping so total_file_sizes_ goes back to normal after compaction // finishes @@ -96,6 +99,18 @@ class SstFileManagerImpl : public SstFileManager { // Return the total size of trash files uint64_t GetTotalTrashSize() override; + // Called by each DB instance using this sst file manager to reserve + // disk buffer space for recovery from out of space errors + void ReserveDiskBuffer(uint64_t buffer, const std::string& path); + + // Set a flag upon encountering disk full. May enqueue the ErrorHandler + // instance for background polling and recovery + void StartErrorRecovery(ErrorHandler* db, Status bg_error); + + // Remove the given Errorhandler instance from the recovery queue. Its + // not guaranteed + bool CancelErrorRecovery(ErrorHandler* db); + // Mark file as trash and schedule it's deletion. virtual Status ScheduleFileDeletion(const std::string& file_path, const std::string& dir_to_sync); @@ -106,18 +121,30 @@ class SstFileManagerImpl : public SstFileManager { DeleteScheduler* delete_scheduler() { return &delete_scheduler_; } + // Stop the error recovery background thread. This should be called only + // once in the object's lifetime, and before the destructor + void Close(); + private: // REQUIRES: mutex locked - void OnAddFileImpl(const std::string& file_path, uint64_t file_size); + void OnAddFileImpl(const std::string& file_path, uint64_t file_size, + bool compaction); // REQUIRES: mutex locked void OnDeleteFileImpl(const std::string& file_path); + void ClearError(); + bool CheckFreeSpace() { + return bg_err_.severity() == Status::Severity::kSoftError; + } + Env* env_; std::shared_ptr logger_; // Mutex to protect tracked_files_, total_files_size_ port::Mutex mu_; // The summation of the sizes of all files in tracked_files_ map uint64_t total_files_size_; + // The summation of all output files of in-progress compactions + uint64_t in_progress_files_size_; // Compactions should only execute if they can leave at least // this amount of buffer space for logs and flushes uint64_t compaction_buffer_size_; @@ -126,10 +153,32 @@ class SstFileManagerImpl : public SstFileManager { // A map containing all tracked files and there sizes // file_path => file_size std::unordered_map tracked_files_; + // A set of files belonging to in-progress compactions + std::unordered_set in_progress_files_; // The maximum allowed space (in bytes) for sst files. uint64_t max_allowed_space_; // DeleteScheduler used to throttle file deletition. DeleteScheduler delete_scheduler_; + port::CondVar cv_; + // Flag to force error recovery thread to exit + bool closing_; + // Background error recovery thread + std::unique_ptr bg_thread_; + // A path in the filesystem corresponding to this SFM. This is used for + // calling Env::GetFreeSpace. Posix requires a path in the filesystem + std::string path_; + // Save the current background error + Status bg_err_; + // Amount of free disk headroom before allowing recovery from hard errors + uint64_t reserved_disk_buffer_; + // For soft errors, amount of free disk space before we can allow + // compactions to run full throttle. If disk space is below this trigger, + // compactions will be gated by free disk space > input size + uint64_t free_space_trigger_; + // List of database error handler instances tracked by this sst file manager + std::list error_handler_list_; + // Pointer to ErrorHandler instance that is currently processing recovery + ErrorHandler* cur_instance_; }; } // namespace rocksdb diff --git a/3rdParty/rocksdb/v5.16.X/util/status.cc b/3rdParty/rocksdb/v5.18.X/util/status.cc similarity index 100% rename from 3rdParty/rocksdb/v5.16.X/util/status.cc rename to 3rdParty/rocksdb/v5.18.X/util/status.cc diff --git a/3rdParty/rocksdb/v5.16.X/util/stderr_logger.h b/3rdParty/rocksdb/v5.18.X/util/stderr_logger.h similarity index 100% rename from 3rdParty/rocksdb/v5.16.X/util/stderr_logger.h rename to 3rdParty/rocksdb/v5.18.X/util/stderr_logger.h diff --git a/3rdParty/rocksdb/v5.16.X/util/stop_watch.h b/3rdParty/rocksdb/v5.18.X/util/stop_watch.h similarity index 100% rename from 3rdParty/rocksdb/v5.16.X/util/stop_watch.h rename to 3rdParty/rocksdb/v5.18.X/util/stop_watch.h diff --git a/3rdParty/rocksdb/v5.16.X/util/string_util.cc b/3rdParty/rocksdb/v5.18.X/util/string_util.cc similarity index 100% rename from 3rdParty/rocksdb/v5.16.X/util/string_util.cc rename to 3rdParty/rocksdb/v5.18.X/util/string_util.cc diff --git a/3rdParty/rocksdb/v5.16.X/util/string_util.h b/3rdParty/rocksdb/v5.18.X/util/string_util.h similarity index 100% rename from 3rdParty/rocksdb/v5.16.X/util/string_util.h rename to 3rdParty/rocksdb/v5.18.X/util/string_util.h diff --git a/3rdParty/rocksdb/v5.16.X/util/sync_point.cc b/3rdParty/rocksdb/v5.18.X/util/sync_point.cc similarity index 97% rename from 3rdParty/rocksdb/v5.16.X/util/sync_point.cc rename to 3rdParty/rocksdb/v5.18.X/util/sync_point.cc index ce0fa0a972..4599c256d9 100644 --- a/3rdParty/rocksdb/v5.16.X/util/sync_point.cc +++ b/3rdParty/rocksdb/v5.18.X/util/sync_point.cc @@ -17,9 +17,7 @@ SyncPoint* SyncPoint::GetInstance() { return &sync_point; } -SyncPoint::SyncPoint() : - impl_(new Data) { -} +SyncPoint::SyncPoint() : impl_(new Data) {} SyncPoint:: ~SyncPoint() { delete impl_; diff --git a/3rdParty/rocksdb/v5.16.X/util/sync_point.h b/3rdParty/rocksdb/v5.18.X/util/sync_point.h similarity index 100% rename from 3rdParty/rocksdb/v5.16.X/util/sync_point.h rename to 3rdParty/rocksdb/v5.18.X/util/sync_point.h diff --git a/3rdParty/rocksdb/v5.16.X/util/sync_point_impl.cc b/3rdParty/rocksdb/v5.18.X/util/sync_point_impl.cc similarity index 100% rename from 3rdParty/rocksdb/v5.16.X/util/sync_point_impl.cc rename to 3rdParty/rocksdb/v5.18.X/util/sync_point_impl.cc diff --git a/3rdParty/rocksdb/v5.16.X/util/sync_point_impl.h b/3rdParty/rocksdb/v5.18.X/util/sync_point_impl.h similarity index 100% rename from 3rdParty/rocksdb/v5.16.X/util/sync_point_impl.h rename to 3rdParty/rocksdb/v5.18.X/util/sync_point_impl.h diff --git a/3rdParty/rocksdb/v5.16.X/util/testharness.cc b/3rdParty/rocksdb/v5.18.X/util/testharness.cc similarity index 100% rename from 3rdParty/rocksdb/v5.16.X/util/testharness.cc rename to 3rdParty/rocksdb/v5.18.X/util/testharness.cc diff --git a/3rdParty/rocksdb/v5.16.X/util/testharness.h b/3rdParty/rocksdb/v5.18.X/util/testharness.h similarity index 100% rename from 3rdParty/rocksdb/v5.16.X/util/testharness.h rename to 3rdParty/rocksdb/v5.18.X/util/testharness.h diff --git a/3rdParty/rocksdb/v5.16.X/util/testutil.cc b/3rdParty/rocksdb/v5.18.X/util/testutil.cc similarity index 94% rename from 3rdParty/rocksdb/v5.16.X/util/testutil.cc rename to 3rdParty/rocksdb/v5.18.X/util/testutil.cc index 6094d7ba0a..2f8e31cd57 100644 --- a/3rdParty/rocksdb/v5.16.X/util/testutil.cc +++ b/3rdParty/rocksdb/v5.18.X/util/testutil.cc @@ -124,20 +124,21 @@ const Comparator* Uint64Comparator() { return &uint64comp; } -WritableFileWriter* GetWritableFileWriter(WritableFile* wf) { - unique_ptr file(wf); - return new WritableFileWriter(std::move(file), EnvOptions()); +WritableFileWriter* GetWritableFileWriter(WritableFile* wf, + const std::string& fname) { + std::unique_ptr file(wf); + return new WritableFileWriter(std::move(file), fname, EnvOptions()); } RandomAccessFileReader* GetRandomAccessFileReader(RandomAccessFile* raf) { - unique_ptr file(raf); + std::unique_ptr file(raf); return new RandomAccessFileReader(std::move(file), "[test RandomAccessFileReader]"); } SequentialFileReader* GetSequentialFileReader(SequentialFile* se, const std::string& fname) { - unique_ptr file(se); + std::unique_ptr file(se); return new SequentialFileReader(std::move(file), fname); } @@ -400,5 +401,21 @@ Status DestroyDir(Env* env, const std::string& dir) { return s; } +bool IsDirectIOSupported(Env* env, const std::string& dir) { + EnvOptions env_options; + env_options.use_mmap_writes = false; + env_options.use_direct_writes = true; + std::string tmp = TempFileName(dir, 999); + Status s; + { + std::unique_ptr file; + s = env->NewWritableFile(tmp, &file, env_options); + } + if (s.ok()) { + s = env->DeleteFile(tmp); + } + return s.ok(); +} + } // namespace test } // namespace rocksdb diff --git a/3rdParty/rocksdb/v5.16.X/util/testutil.h b/3rdParty/rocksdb/v5.18.X/util/testutil.h similarity index 96% rename from 3rdParty/rocksdb/v5.16.X/util/testutil.h rename to 3rdParty/rocksdb/v5.18.X/util/testutil.h index ffdaa5ad60..2aab3df72c 100644 --- a/3rdParty/rocksdb/v5.16.X/util/testutil.h +++ b/3rdParty/rocksdb/v5.18.X/util/testutil.h @@ -64,7 +64,7 @@ class ErrorEnv : public EnvWrapper { num_writable_file_errors_(0) { } virtual Status NewWritableFile(const std::string& fname, - unique_ptr* result, + std::unique_ptr* result, const EnvOptions& soptions) override { result->reset(); if (writable_file_error_) { @@ -183,7 +183,8 @@ class VectorIterator : public InternalIterator { std::vector values_; size_t current_; }; -extern WritableFileWriter* GetWritableFileWriter(WritableFile* wf); +extern WritableFileWriter* GetWritableFileWriter(WritableFile* wf, + const std::string& fname); extern RandomAccessFileReader* GetRandomAccessFileReader(RandomAccessFile* raf); @@ -249,7 +250,7 @@ class RandomRWStringSink : public RandomRWFile { Status Write(uint64_t offset, const Slice& data) override { if (offset + data.size() > ss_->contents_.size()) { - ss_->contents_.resize(offset + data.size(), '\0'); + ss_->contents_.resize(static_cast(offset) + data.size(), '\0'); } char* pos = const_cast(ss_->contents_.data() + offset); @@ -517,7 +518,7 @@ class StringEnv : public EnvWrapper { "Attemp to read when it already reached eof."); } // TODO(yhchiang): Currently doesn't handle the overflow case. - offset_ += n; + offset_ += static_cast(n); return Status::OK(); } @@ -531,7 +532,7 @@ class StringEnv : public EnvWrapper { explicit StringSink(std::string* contents) : WritableFile(), contents_(contents) {} virtual Status Truncate(uint64_t size) override { - contents_->resize(size); + contents_->resize(static_cast(size)); return Status::OK(); } virtual Status Close() override { return Status::OK(); } @@ -553,7 +554,7 @@ class StringEnv : public EnvWrapper { const Status WriteToNewFile(const std::string& file_name, const std::string& content) { - unique_ptr r; + std::unique_ptr r; auto s = NewWritableFile(file_name, &r, EnvOptions()); if (!s.ok()) { return s; @@ -566,7 +567,8 @@ class StringEnv : public EnvWrapper { } // The following text is boilerplate that forwards all methods to target() - Status NewSequentialFile(const std::string& f, unique_ptr* r, + Status NewSequentialFile(const std::string& f, + std::unique_ptr* r, const EnvOptions& /*options*/) override { auto iter = files_.find(f); if (iter == files_.end()) { @@ -576,11 +578,11 @@ class StringEnv : public EnvWrapper { return Status::OK(); } Status NewRandomAccessFile(const std::string& /*f*/, - unique_ptr* /*r*/, + std::unique_ptr* /*r*/, const EnvOptions& /*options*/) override { return Status::NotSupported(); } - Status NewWritableFile(const std::string& f, unique_ptr* r, + Status NewWritableFile(const std::string& f, std::unique_ptr* r, const EnvOptions& /*options*/) override { auto iter = files_.find(f); if (iter != files_.end()) { @@ -590,7 +592,7 @@ class StringEnv : public EnvWrapper { return Status::OK(); } virtual Status NewDirectory(const std::string& /*name*/, - unique_ptr* /*result*/) override { + std::unique_ptr* /*result*/) override { return Status::NotSupported(); } Status FileExists(const std::string& f) override { @@ -746,5 +748,7 @@ std::string RandomName(Random* rnd, const size_t len); Status DestroyDir(Env* env, const std::string& dir); +bool IsDirectIOSupported(Env* env, const std::string& dir); + } // namespace test } // namespace rocksdb diff --git a/3rdParty/rocksdb/v5.16.X/util/thread_list_test.cc b/3rdParty/rocksdb/v5.18.X/util/thread_list_test.cc similarity index 100% rename from 3rdParty/rocksdb/v5.16.X/util/thread_list_test.cc rename to 3rdParty/rocksdb/v5.18.X/util/thread_list_test.cc diff --git a/3rdParty/rocksdb/v5.16.X/util/thread_local.cc b/3rdParty/rocksdb/v5.18.X/util/thread_local.cc similarity index 99% rename from 3rdParty/rocksdb/v5.16.X/util/thread_local.cc rename to 3rdParty/rocksdb/v5.18.X/util/thread_local.cc index dea2002a02..7346eff11e 100644 --- a/3rdParty/rocksdb/v5.16.X/util/thread_local.cc +++ b/3rdParty/rocksdb/v5.18.X/util/thread_local.cc @@ -204,7 +204,7 @@ extern "C" { // The linker must not discard thread_callback_on_exit. (We force a reference // to this variable with a linker /include:symbol pragma to ensure that.) If // this variable is discarded, the OnThreadExit function will never be called. -#ifdef _WIN64 +#ifndef _X86_ // .CRT section is merged with .rdata on x64 so it must be constant data. #pragma const_seg(".CRT$XLB") @@ -219,7 +219,7 @@ const PIMAGE_TLS_CALLBACK p_thread_callback_on_exit = #pragma comment(linker, "/include:_tls_used") #pragma comment(linker, "/include:p_thread_callback_on_exit") -#else // _WIN64 +#else // _X86_ #pragma data_seg(".CRT$XLB") PIMAGE_TLS_CALLBACK p_thread_callback_on_exit = wintlscleanup::WinOnThreadExit; @@ -229,7 +229,7 @@ PIMAGE_TLS_CALLBACK p_thread_callback_on_exit = wintlscleanup::WinOnThreadExit; #pragma comment(linker, "/INCLUDE:__tls_used") #pragma comment(linker, "/INCLUDE:_p_thread_callback_on_exit") -#endif // _WIN64 +#endif // _X86_ #else // https://github.com/couchbase/gperftools/blob/master/src/windows/port.cc diff --git a/3rdParty/rocksdb/v5.16.X/util/thread_local.h b/3rdParty/rocksdb/v5.18.X/util/thread_local.h similarity index 100% rename from 3rdParty/rocksdb/v5.16.X/util/thread_local.h rename to 3rdParty/rocksdb/v5.18.X/util/thread_local.h diff --git a/3rdParty/rocksdb/v5.16.X/util/thread_local_test.cc b/3rdParty/rocksdb/v5.18.X/util/thread_local_test.cc similarity index 100% rename from 3rdParty/rocksdb/v5.16.X/util/thread_local_test.cc rename to 3rdParty/rocksdb/v5.18.X/util/thread_local_test.cc diff --git a/3rdParty/rocksdb/v5.16.X/util/thread_operation.h b/3rdParty/rocksdb/v5.18.X/util/thread_operation.h similarity index 98% rename from 3rdParty/rocksdb/v5.16.X/util/thread_operation.h rename to 3rdParty/rocksdb/v5.18.X/util/thread_operation.h index 025392b59d..f1827da0a0 100644 --- a/3rdParty/rocksdb/v5.16.X/util/thread_operation.h +++ b/3rdParty/rocksdb/v5.18.X/util/thread_operation.h @@ -70,7 +70,7 @@ static OperationStageInfo global_op_stage_table[] = { {ThreadStatus::STAGE_MEMTABLE_ROLLBACK, "MemTableList::RollbackMemtableFlush"}, {ThreadStatus::STAGE_MEMTABLE_INSTALL_FLUSH_RESULTS, - "MemTableList::InstallMemtableFlushResults"}, + "MemTableList::TryInstallMemtableFlushResults"}, }; // The structure that describes a state. diff --git a/3rdParty/rocksdb/v5.16.X/util/threadpool_imp.cc b/3rdParty/rocksdb/v5.18.X/util/threadpool_imp.cc similarity index 97% rename from 3rdParty/rocksdb/v5.16.X/util/threadpool_imp.cc rename to 3rdParty/rocksdb/v5.18.X/util/threadpool_imp.cc index 3b1f762b8f..b431830ee6 100644 --- a/3rdParty/rocksdb/v5.16.X/util/threadpool_imp.cc +++ b/3rdParty/rocksdb/v5.18.X/util/threadpool_imp.cc @@ -188,7 +188,7 @@ void ThreadPoolImpl::Impl::BGThread(size_t thread_id) { bool low_cpu_priority = false; while (true) { -// Wait until there is an item that is ready to run + // Wait until there is an item that is ready to run std::unique_lock lock(mu_); // Stop waiting if the thread needs to do work or needs to terminate. while (!exit_all_threads_ && !IsLastExcessiveThread(thread_id) && @@ -198,7 +198,7 @@ void ThreadPoolImpl::Impl::BGThread(size_t thread_id) { if (exit_all_threads_) { // mechanism to let BG threads exit safely - if(!wait_for_jobs_to_complete_ || + if (!wait_for_jobs_to_complete_ || queue_.empty()) { break; } @@ -469,16 +469,12 @@ void ThreadPoolImpl::SubmitJob(std::function&& job) { void ThreadPoolImpl::Schedule(void(*function)(void* arg1), void* arg, void* tag, void(*unschedFunction)(void* arg)) { - - std::function fn = [arg, function] { function(arg); }; - - std::function unfn; - if (unschedFunction != nullptr) { - auto uf = [arg, unschedFunction] { unschedFunction(arg); }; - unfn = std::move(uf); + if (unschedFunction == nullptr) { + impl_->Submit(std::bind(function, arg), std::function(), tag); + } else { + impl_->Submit(std::bind(function, arg), std::bind(unschedFunction, arg), + tag); } - - impl_->Submit(std::move(fn), std::move(unfn), tag); } int ThreadPoolImpl::UnSchedule(void* arg) { diff --git a/3rdParty/rocksdb/v5.16.X/util/threadpool_imp.h b/3rdParty/rocksdb/v5.18.X/util/threadpool_imp.h similarity index 100% rename from 3rdParty/rocksdb/v5.16.X/util/threadpool_imp.h rename to 3rdParty/rocksdb/v5.18.X/util/threadpool_imp.h diff --git a/3rdParty/rocksdb/v5.16.X/util/timer_queue.h b/3rdParty/rocksdb/v5.18.X/util/timer_queue.h similarity index 100% rename from 3rdParty/rocksdb/v5.16.X/util/timer_queue.h rename to 3rdParty/rocksdb/v5.18.X/util/timer_queue.h diff --git a/3rdParty/rocksdb/v5.16.X/util/timer_queue_test.cc b/3rdParty/rocksdb/v5.18.X/util/timer_queue_test.cc similarity index 100% rename from 3rdParty/rocksdb/v5.16.X/util/timer_queue_test.cc rename to 3rdParty/rocksdb/v5.18.X/util/timer_queue_test.cc diff --git a/3rdParty/rocksdb/v5.16.X/util/trace_replay.cc b/3rdParty/rocksdb/v5.18.X/util/trace_replay.cc similarity index 90% rename from 3rdParty/rocksdb/v5.16.X/util/trace_replay.cc rename to 3rdParty/rocksdb/v5.18.X/util/trace_replay.cc index cd2e3ee95e..5b9bec651e 100644 --- a/3rdParty/rocksdb/v5.16.X/util/trace_replay.cc +++ b/3rdParty/rocksdb/v5.18.X/util/trace_replay.cc @@ -16,6 +16,8 @@ namespace rocksdb { +const std::string kTraceMagic = "feedcafedeadbeef"; + namespace { void EncodeCFAndKey(std::string* dst, uint32_t cf_id, const Slice& key) { PutFixed32(dst, cf_id); @@ -29,14 +31,20 @@ void DecodeCFAndKey(std::string& buffer, uint32_t* cf_id, Slice* key) { } } // namespace -Tracer::Tracer(Env* env, std::unique_ptr&& trace_writer) - : env_(env), trace_writer_(std::move(trace_writer)) { +Tracer::Tracer(Env* env, const TraceOptions& trace_options, + std::unique_ptr&& trace_writer) + : env_(env), + trace_options_(trace_options), + trace_writer_(std::move(trace_writer)) { WriteHeader(); } Tracer::~Tracer() { trace_writer_.reset(); } Status Tracer::Write(WriteBatch* write_batch) { + if (IsTraceFileOverMax()) { + return Status::OK(); + } Trace trace; trace.ts = env_->NowMicros(); trace.type = kTraceWrite; @@ -45,6 +53,9 @@ Status Tracer::Write(WriteBatch* write_batch) { } Status Tracer::Get(ColumnFamilyHandle* column_family, const Slice& key) { + if (IsTraceFileOverMax()) { + return Status::OK(); + } Trace trace; trace.ts = env_->NowMicros(); trace.type = kTraceGet; @@ -53,6 +64,9 @@ Status Tracer::Get(ColumnFamilyHandle* column_family, const Slice& key) { } Status Tracer::IteratorSeek(const uint32_t& cf_id, const Slice& key) { + if (IsTraceFileOverMax()) { + return Status::OK(); + } Trace trace; trace.ts = env_->NowMicros(); trace.type = kTraceIteratorSeek; @@ -61,6 +75,9 @@ Status Tracer::IteratorSeek(const uint32_t& cf_id, const Slice& key) { } Status Tracer::IteratorSeekForPrev(const uint32_t& cf_id, const Slice& key) { + if (IsTraceFileOverMax()) { + return Status::OK(); + } Trace trace; trace.ts = env_->NowMicros(); trace.type = kTraceIteratorSeekForPrev; @@ -68,6 +85,11 @@ Status Tracer::IteratorSeekForPrev(const uint32_t& cf_id, const Slice& key) { return WriteTrace(trace); } +bool Tracer::IsTraceFileOverMax() { + uint64_t trace_file_size = trace_writer_->GetFileSize(); + return (trace_file_size > trace_options_.max_trace_file_size); +} + Status Tracer::WriteHeader() { std::ostringstream s; s << kTraceMagic << "\t" @@ -103,7 +125,7 @@ Status Tracer::WriteTrace(const Trace& trace) { Status Tracer::Close() { return WriteFooter(); } Replayer::Replayer(DB* db, const std::vector& handles, - unique_ptr&& reader) + std::unique_ptr&& reader) : trace_reader_(std::move(reader)) { assert(db != nullptr); db_ = static_cast(db->GetRootDB()); diff --git a/3rdParty/rocksdb/v5.16.X/util/trace_replay.h b/3rdParty/rocksdb/v5.18.X/util/trace_replay.h similarity index 88% rename from 3rdParty/rocksdb/v5.16.X/util/trace_replay.h rename to 3rdParty/rocksdb/v5.18.X/util/trace_replay.h index b324696f01..d935f65ce7 100644 --- a/3rdParty/rocksdb/v5.16.X/util/trace_replay.h +++ b/3rdParty/rocksdb/v5.18.X/util/trace_replay.h @@ -10,6 +10,7 @@ #include #include "rocksdb/env.h" +#include "rocksdb/options.h" #include "rocksdb/trace_reader_writer.h" namespace rocksdb { @@ -21,7 +22,7 @@ class DBImpl; class Slice; class WriteBatch; -const std::string kTraceMagic = "feedcafedeadbeef"; +extern const std::string kTraceMagic; const unsigned int kTraceTimestampSize = 8; const unsigned int kTraceTypeSize = 1; const unsigned int kTracePayloadLengthSize = 4; @@ -55,13 +56,15 @@ struct Trace { // Trace RocksDB operations using a TraceWriter. class Tracer { public: - Tracer(Env* env, std::unique_ptr&& trace_writer); + Tracer(Env* env, const TraceOptions& trace_options, + std::unique_ptr&& trace_writer); ~Tracer(); Status Write(WriteBatch* write_batch); Status Get(ColumnFamilyHandle* cfname, const Slice& key); Status IteratorSeek(const uint32_t& cf_id, const Slice& key); Status IteratorSeekForPrev(const uint32_t& cf_id, const Slice& key); + bool IsTraceFileOverMax(); Status Close(); @@ -71,7 +74,8 @@ class Tracer { Status WriteTrace(const Trace& trace); Env* env_; - unique_ptr trace_writer_; + TraceOptions trace_options_; + std::unique_ptr trace_writer_; }; // Replay RocksDB operations from a trace. diff --git a/3rdParty/rocksdb/v5.16.X/util/transaction_test_util.cc b/3rdParty/rocksdb/v5.18.X/util/transaction_test_util.cc similarity index 97% rename from 3rdParty/rocksdb/v5.16.X/util/transaction_test_util.cc rename to 3rdParty/rocksdb/v5.18.X/util/transaction_test_util.cc index 19d27b1a14..58d95b2ae1 100644 --- a/3rdParty/rocksdb/v5.16.X/util/transaction_test_util.cc +++ b/3rdParty/rocksdb/v5.18.X/util/transaction_test_util.cc @@ -13,6 +13,7 @@ #include #include #include +#include #include #include @@ -135,8 +136,8 @@ bool RandomTransactionInserter::DoInsert(DB* db, Transaction* txn, std::vector set_vec(num_sets_); std::iota(set_vec.begin(), set_vec.end(), static_cast(0)); - std::random_shuffle(set_vec.begin(), set_vec.end(), - [&](uint64_t r) { return rand_->Uniform(r); }); + std::shuffle(set_vec.begin(), set_vec.end(), std::random_device{}); + // For each set, pick a key at random and increment it for (uint16_t set_i : set_vec) { uint64_t int_value = 0; @@ -257,10 +258,8 @@ Status RandomTransactionInserter::Verify(DB* db, uint16_t num_sets, std::vector set_vec(num_sets); std::iota(set_vec.begin(), set_vec.end(), static_cast(0)); - if (rand) { - std::random_shuffle(set_vec.begin(), set_vec.end(), - [&](uint64_t r) { return rand->Uniform(r); }); - } + std::shuffle(set_vec.begin(), set_vec.end(), std::random_device{}); + // For each set of keys with the same prefix, sum all the values for (uint16_t set_i : set_vec) { // Five digits (since the largest uint16_t is 65535) plus the NUL diff --git a/3rdParty/rocksdb/v5.16.X/util/transaction_test_util.h b/3rdParty/rocksdb/v5.18.X/util/transaction_test_util.h similarity index 100% rename from 3rdParty/rocksdb/v5.16.X/util/transaction_test_util.h rename to 3rdParty/rocksdb/v5.18.X/util/transaction_test_util.h diff --git a/3rdParty/rocksdb/v5.16.X/util/util.h b/3rdParty/rocksdb/v5.18.X/util/util.h similarity index 100% rename from 3rdParty/rocksdb/v5.16.X/util/util.h rename to 3rdParty/rocksdb/v5.18.X/util/util.h diff --git a/3rdParty/rocksdb/v5.18.X/util/vector_iterator.h b/3rdParty/rocksdb/v5.18.X/util/vector_iterator.h new file mode 100644 index 0000000000..da60eb229c --- /dev/null +++ b/3rdParty/rocksdb/v5.18.X/util/vector_iterator.h @@ -0,0 +1,100 @@ +#pragma once + +#include +#include +#include + +#include "db/dbformat.h" +#include "rocksdb/iterator.h" +#include "rocksdb/slice.h" +#include "table/internal_iterator.h" + +namespace rocksdb { + +// Iterator over a vector of keys/values +class VectorIterator : public InternalIterator { + public: + VectorIterator(std::vector keys, std::vector values, + const InternalKeyComparator* icmp) + : keys_(std::move(keys)), + values_(std::move(values)), + indexed_cmp_(icmp, &keys_), + current_(keys.size()) { + assert(keys_.size() == values_.size()); + + indices_.reserve(keys_.size()); + for (size_t i = 0; i < keys_.size(); i++) { + indices_.push_back(i); + } + std::sort(indices_.begin(), indices_.end(), indexed_cmp_); + } + + virtual bool Valid() const override { + return !indices_.empty() && current_ < indices_.size(); + } + + virtual void SeekToFirst() override { current_ = 0; } + virtual void SeekToLast() override { current_ = indices_.size() - 1; } + + virtual void Seek(const Slice& target) override { + current_ = std::lower_bound(indices_.begin(), indices_.end(), target, + indexed_cmp_) - + indices_.begin(); + } + + virtual void SeekForPrev(const Slice& target) override { + current_ = std::lower_bound(indices_.begin(), indices_.end(), target, + indexed_cmp_) - + indices_.begin(); + if (!Valid()) { + SeekToLast(); + } else { + Prev(); + } + } + + virtual void Next() override { current_++; } + virtual void Prev() override { current_--; } + + virtual Slice key() const override { + return Slice(keys_[indices_[current_]]); + } + virtual Slice value() const override { + return Slice(values_[indices_[current_]]); + } + + virtual Status status() const override { return Status::OK(); } + + virtual bool IsKeyPinned() const override { return true; } + virtual bool IsValuePinned() const override { return true; } + + private: + struct IndexedKeyComparator { + IndexedKeyComparator(const InternalKeyComparator* c, + const std::vector* ks) + : cmp(c), keys(ks) {} + + bool operator()(size_t a, size_t b) const { + return cmp->Compare((*keys)[a], (*keys)[b]) < 0; + } + + bool operator()(size_t a, const Slice& b) const { + return cmp->Compare((*keys)[a], b) < 0; + } + + bool operator()(const Slice& a, size_t b) const { + return cmp->Compare(a, (*keys)[b]) < 0; + } + + const InternalKeyComparator* cmp; + const std::vector* keys; + }; + + std::vector keys_; + std::vector values_; + IndexedKeyComparator indexed_cmp_; + std::vector indices_; + size_t current_; +}; + +} // namespace rocksdb diff --git a/3rdParty/rocksdb/v5.18.X/util/xxhash.cc b/3rdParty/rocksdb/v5.18.X/util/xxhash.cc new file mode 100644 index 0000000000..2ec95a636e --- /dev/null +++ b/3rdParty/rocksdb/v5.18.X/util/xxhash.cc @@ -0,0 +1,1074 @@ +/* +xxHash - Fast Hash algorithm +Copyright (C) 2012-2014, Yann Collet. +BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php) + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are +met: + +* Redistributions of source code must retain the above copyright +notice, this list of conditions and the following disclaimer. +* Redistributions in binary form must reproduce the above +copyright notice, this list of conditions and the following disclaimer +in the documentation and/or other materials provided with the +distribution. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +You can contact the author at : +- xxHash source repository : http://code.google.com/p/xxhash/ +*/ + + +//************************************** +// Tuning parameters +//************************************** +/*!XXH_FORCE_MEMORY_ACCESS : + * By default, access to unaligned memory is controlled by `memcpy()`, which is + * safe and portable. Unfortunately, on some target/compiler combinations, the + * generated assembly is sub-optimal. The below switch allow to select different + * access method for improved performance. Method 0 (default) : use `memcpy()`. + * Safe and portable. Method 1 : `__packed` statement. It depends on compiler + * extension (ie, not portable). This method is safe if your compiler supports + * it, and *generally* as fast or faster than `memcpy`. Method 2 : direct + * access. This method doesn't depend on compiler but violate C standard. It can + * generate buggy code on targets which do not support unaligned memory + * accesses. But in some circumstances, it's the only known way to get the most + * performance (ie GCC + ARMv6) See http://stackoverflow.com/a/32095106/646947 + * for details. Prefer these methods in priority order (0 > 1 > 2) + */ + +#include "util/util.h" + +#ifndef XXH_FORCE_MEMORY_ACCESS /* can be defined externally, on command line \ + for example */ +#if defined(__GNUC__) && \ + (defined(__ARM_ARCH_6__) || defined(__ARM_ARCH_6J__) || \ + defined(__ARM_ARCH_6K__) || defined(__ARM_ARCH_6Z__) || \ + defined(__ARM_ARCH_6ZK__) || defined(__ARM_ARCH_6T2__)) +#define XXH_FORCE_MEMORY_ACCESS 2 +#elif (defined(__INTEL_COMPILER) && !defined(_WIN32)) || \ + (defined(__GNUC__) && \ + (defined(__ARM_ARCH_7__) || defined(__ARM_ARCH_7A__) || \ + defined(__ARM_ARCH_7R__) || defined(__ARM_ARCH_7M__) || \ + defined(__ARM_ARCH_7S__))) +#define XXH_FORCE_MEMORY_ACCESS 1 +#endif +#endif + +// Unaligned memory access is automatically enabled for "common" CPU, such as x86. +// For others CPU, the compiler will be more cautious, and insert extra code to ensure aligned access is respected. +// If you know your target CPU supports unaligned memory access, you want to force this option manually to improve performance. +// You can also enable this parameter if you know your input data will always be aligned (boundaries of 4, for U32). +#if defined(__ARM_FEATURE_UNALIGNED) || defined(__i386) || defined(_M_IX86) || defined(__x86_64__) || defined(_M_X64) +# define XXH_USE_UNALIGNED_ACCESS 1 +#endif + +// XXH_ACCEPT_NULL_INPUT_POINTER : +// If the input pointer is a null pointer, xxHash default behavior is to trigger a memory access error, since it is a bad pointer. +// When this option is enabled, xxHash output for null input pointers will be the same as a null-length input. +// This option has a very small performance cost (only measurable on small inputs). +// By default, this option is disabled. To enable it, uncomment below define : +//#define XXH_ACCEPT_NULL_INPUT_POINTER 1 + +// XXH_FORCE_NATIVE_FORMAT : +// By default, xxHash library provides endian-independent Hash values, based on little-endian convention. +// Results are therefore identical for little-endian and big-endian CPU. +// This comes at a performance cost for big-endian CPU, since some swapping is required to emulate little-endian format. +// Should endian-independence be of no importance for your application, you may set the #define below to 1. +// It will improve speed for Big-endian CPU. +// This option has no impact on Little_Endian CPU. +#define XXH_FORCE_NATIVE_FORMAT 0 + +/*!XXH_FORCE_ALIGN_CHECK : + * This is a minor performance trick, only useful with lots of very small keys. + * It means : check for aligned/unaligned input. + * The check costs one initial branch per hash; + * set it to 0 when the input is guaranteed to be aligned, + * or when alignment doesn't matter for performance. + */ +#ifndef XXH_FORCE_ALIGN_CHECK /* can be defined externally */ +#if defined(__i386) || defined(_M_IX86) || defined(__x86_64__) || \ + defined(_M_X64) +#define XXH_FORCE_ALIGN_CHECK 0 +#else +#define XXH_FORCE_ALIGN_CHECK 1 +#endif +#endif + +//************************************** +// Compiler Specific Options +//************************************** +// Disable some Visual warning messages +#ifdef _MSC_VER // Visual Studio +# pragma warning(disable : 4127) // disable: C4127: conditional expression is constant +# pragma warning(disable : 4804) // disable: C4804: 'operation' : unsafe use of type 'bool' in operation (static assert line 313) +#endif + +#ifdef _MSC_VER // Visual Studio +# define FORCE_INLINE static __forceinline +#else +# ifdef __GNUC__ +# define FORCE_INLINE static inline __attribute__((always_inline)) +# else +# define FORCE_INLINE static inline +# endif +#endif + + +//************************************** +// Includes & Memory related functions +//************************************** +#include "xxhash.h" +// Modify the local functions below should you wish to use some other memory related routines +// for malloc(), free() +#include +FORCE_INLINE void* XXH_malloc(size_t s) { return malloc(s); } +FORCE_INLINE void XXH_free (void* p) { free(p); } +// for memcpy() +#include +FORCE_INLINE void* XXH_memcpy(void* dest, const void* src, size_t size) { return memcpy(dest,src,size); } +#include /* assert */ + +namespace rocksdb { +//************************************** +// Basic Types +//************************************** +#if defined (__STDC_VERSION__) && __STDC_VERSION__ >= 199901L // C99 +# include + typedef uint8_t BYTE; + typedef uint16_t U16; + typedef uint32_t U32; + typedef int32_t S32; + typedef uint64_t U64; +#else + typedef unsigned char BYTE; + typedef unsigned short U16; + typedef unsigned int U32; + typedef signed int S32; + typedef unsigned long long U64; +#endif + +#if defined(__GNUC__) && !defined(XXH_USE_UNALIGNED_ACCESS) +# define _PACKED __attribute__ ((packed)) +#else +# define _PACKED +#endif + +#if !defined(XXH_USE_UNALIGNED_ACCESS) && !defined(__GNUC__) +# ifdef __IBMC__ +# pragma pack(1) +# else +# pragma pack(push, 1) +# endif +#endif + +typedef struct _U32_S { U32 v; } _PACKED U32_S; + +#if !defined(XXH_USE_UNALIGNED_ACCESS) && !defined(__GNUC__) +# pragma pack(pop) +#endif + +#define A32(x) (((U32_S *)(x))->v) + +#if (defined(XXH_FORCE_MEMORY_ACCESS) && (XXH_FORCE_MEMORY_ACCESS == 2)) + +/* Force direct memory access. Only works on CPU which support unaligned memory + * access in hardware */ +static U32 XXH_read32(const void* memPtr) { return *(const U32*)memPtr; } + +#elif (defined(XXH_FORCE_MEMORY_ACCESS) && (XXH_FORCE_MEMORY_ACCESS == 1)) + +/* __pack instructions are safer, but compiler specific, hence potentially + * problematic for some compilers */ +/* currently only defined for gcc and icc */ +typedef union { + U32 u32; +} __attribute__((packed)) unalign; +static U32 XXH_read32(const void* ptr) { return ((const unalign*)ptr)->u32; } + +#else + +/* portable and safe solution. Generally efficient. + * see : http://stackoverflow.com/a/32095106/646947 + */ +static U32 XXH_read32(const void* memPtr) { + U32 val; + memcpy(&val, memPtr, sizeof(val)); + return val; +} + +#endif /* XXH_FORCE_DIRECT_MEMORY_ACCESS */ + +//*************************************** +// Compiler-specific Functions and Macros +//*************************************** +#define GCC_VERSION (__GNUC__ * 100 + __GNUC_MINOR__) + +// Note : although _rotl exists for minGW (GCC under windows), performance seems poor +#if defined(_MSC_VER) +# define XXH_rotl32(x,r) _rotl(x,r) +#define XXH_rotl64(x, r) _rotl64(x, r) +#else +# define XXH_rotl32(x,r) ((x << r) | (x >> (32 - r))) +#define XXH_rotl64(x, r) ((x << r) | (x >> (64 - r))) +#endif + +#if defined(_MSC_VER) // Visual Studio +# define XXH_swap32 _byteswap_ulong +#elif GCC_VERSION >= 403 +# define XXH_swap32 __builtin_bswap32 +#else +static inline U32 XXH_swap32 (U32 x) { + return ((x << 24) & 0xff000000 ) | + ((x << 8) & 0x00ff0000 ) | + ((x >> 8) & 0x0000ff00 ) | + ((x >> 24) & 0x000000ff );} +#endif + + +//************************************** +// Constants +//************************************** +#define PRIME32_1 2654435761U +#define PRIME32_2 2246822519U +#define PRIME32_3 3266489917U +#define PRIME32_4 668265263U +#define PRIME32_5 374761393U + + +//************************************** +// Architecture Macros +//************************************** +typedef enum { XXH_bigEndian=0, XXH_littleEndian=1 } XXH_endianess; +#ifndef XXH_CPU_LITTLE_ENDIAN // It is possible to define XXH_CPU_LITTLE_ENDIAN externally, for example using a compiler switch + static const int one = 1; +# define XXH_CPU_LITTLE_ENDIAN (*(char*)(&one)) +#endif + + +//************************************** +// Macros +//************************************** +#define XXH_STATIC_ASSERT(c) { enum { XXH_static_assert = 1/(!!(c)) }; } // use only *after* variable declarations + + +//**************************** +// Memory reads +//**************************** +typedef enum { XXH_aligned, XXH_unaligned } XXH_alignment; + +FORCE_INLINE U32 XXH_readLE32_align(const U32* ptr, XXH_endianess endian, XXH_alignment align) +{ + if (align==XXH_unaligned) + return endian==XXH_littleEndian ? A32(ptr) : XXH_swap32(A32(ptr)); + else + return endian==XXH_littleEndian ? *ptr : XXH_swap32(*ptr); +} + +FORCE_INLINE U32 XXH_readLE32_align(const void* ptr, XXH_endianess endian, + XXH_alignment align) { + if (align == XXH_unaligned) + return endian == XXH_littleEndian ? XXH_read32(ptr) + : XXH_swap32(XXH_read32(ptr)); + else + return endian == XXH_littleEndian ? *(const U32*)ptr + : XXH_swap32(*(const U32*)ptr); +} + +FORCE_INLINE U32 XXH_readLE32(const U32* ptr, XXH_endianess endian) { + return XXH_readLE32_align(ptr, endian, XXH_unaligned); +} + +//**************************** +// Simple Hash Functions +//**************************** +#define XXH_get32bits(p) XXH_readLE32_align(p, endian, align) + +FORCE_INLINE U32 XXH32_endian_align(const void* input, int len, U32 seed, XXH_endianess endian, XXH_alignment align) +{ + const BYTE* p = (const BYTE*)input; + const BYTE* const bEnd = p + len; + U32 h32; + +#ifdef XXH_ACCEPT_NULL_INPUT_POINTER + if (p==NULL) { len=0; p=(const BYTE*)(size_t)16; } +#endif + + if (len>=16) + { + const BYTE* const limit = bEnd - 16; + U32 v1 = seed + PRIME32_1 + PRIME32_2; + U32 v2 = seed + PRIME32_2; + U32 v3 = seed + 0; + U32 v4 = seed - PRIME32_1; + + do + { + v1 += XXH_readLE32_align((const U32*)p, endian, align) * PRIME32_2; v1 = XXH_rotl32(v1, 13); v1 *= PRIME32_1; p+=4; + v2 += XXH_readLE32_align((const U32*)p, endian, align) * PRIME32_2; v2 = XXH_rotl32(v2, 13); v2 *= PRIME32_1; p+=4; + v3 += XXH_readLE32_align((const U32*)p, endian, align) * PRIME32_2; v3 = XXH_rotl32(v3, 13); v3 *= PRIME32_1; p+=4; + v4 += XXH_readLE32_align((const U32*)p, endian, align) * PRIME32_2; v4 = XXH_rotl32(v4, 13); v4 *= PRIME32_1; p+=4; + } while (p<=limit); + + h32 = XXH_rotl32(v1, 1) + XXH_rotl32(v2, 7) + XXH_rotl32(v3, 12) + XXH_rotl32(v4, 18); + } + else + { + h32 = seed + PRIME32_5; + } + + h32 += (U32) len; + + while (p<=bEnd-4) + { + h32 += XXH_readLE32_align((const U32*)p, endian, align) * PRIME32_3; + h32 = XXH_rotl32(h32, 17) * PRIME32_4 ; + p+=4; + } + + while (p> 15; + h32 *= PRIME32_2; + h32 ^= h32 >> 13; + h32 *= PRIME32_3; + h32 ^= h32 >> 16; + + return h32; +} + + +U32 XXH32(const void* input, int len, U32 seed) +{ +#if 0 + // Simple version, good for code maintenance, but unfortunately slow for small inputs + void* state = XXH32_init(seed); + XXH32_update(state, input, len); + return XXH32_digest(state); +#else + XXH_endianess endian_detected = (XXH_endianess)XXH_CPU_LITTLE_ENDIAN; + +# if !defined(XXH_USE_UNALIGNED_ACCESS) + if ((((size_t)input) & 3)) // Input is aligned, let's leverage the speed advantage + { + if ((endian_detected==XXH_littleEndian) || XXH_FORCE_NATIVE_FORMAT) + return XXH32_endian_align(input, len, seed, XXH_littleEndian, XXH_aligned); + else + return XXH32_endian_align(input, len, seed, XXH_bigEndian, XXH_aligned); + } +# endif + + if ((endian_detected==XXH_littleEndian) || XXH_FORCE_NATIVE_FORMAT) + return XXH32_endian_align(input, len, seed, XXH_littleEndian, XXH_unaligned); + else + return XXH32_endian_align(input, len, seed, XXH_bigEndian, XXH_unaligned); +#endif +} + + +//**************************** +// Advanced Hash Functions +//**************************** + +struct XXH_state32_t +{ + U64 total_len; + U32 seed; + U32 v1; + U32 v2; + U32 v3; + U32 v4; + int memsize; + char memory[16]; +}; + + +int XXH32_sizeofState() +{ + XXH_STATIC_ASSERT(XXH32_SIZEOFSTATE >= sizeof(struct XXH_state32_t)); // A compilation error here means XXH32_SIZEOFSTATE is not large enough + return sizeof(struct XXH_state32_t); +} + + +XXH_errorcode XXH32_resetState(void* state_in, U32 seed) +{ + struct XXH_state32_t * state = (struct XXH_state32_t *) state_in; + state->seed = seed; + state->v1 = seed + PRIME32_1 + PRIME32_2; + state->v2 = seed + PRIME32_2; + state->v3 = seed + 0; + state->v4 = seed - PRIME32_1; + state->total_len = 0; + state->memsize = 0; + return XXH_OK; +} + + +void* XXH32_init (U32 seed) +{ + void* state = XXH_malloc (sizeof(struct XXH_state32_t)); + XXH32_resetState(state, seed); + return state; +} + + +FORCE_INLINE XXH_errorcode XXH32_update_endian (void* state_in, const void* input, int len, XXH_endianess endian) +{ + struct XXH_state32_t * state = (struct XXH_state32_t *) state_in; + const BYTE* p = (const BYTE*)input; + const BYTE* const bEnd = p + len; + +#ifdef XXH_ACCEPT_NULL_INPUT_POINTER + if (input==NULL) return XXH_ERROR; +#endif + + state->total_len += len; + + if (state->memsize + len < 16) // fill in tmp buffer + { + XXH_memcpy(state->memory + state->memsize, input, len); + state->memsize += len; + return XXH_OK; + } + + if (state->memsize) // some data left from previous update + { + XXH_memcpy(state->memory + state->memsize, input, 16-state->memsize); + { + const U32* p32 = (const U32*)state->memory; + state->v1 += XXH_readLE32(p32, endian) * PRIME32_2; state->v1 = XXH_rotl32(state->v1, 13); state->v1 *= PRIME32_1; p32++; + state->v2 += XXH_readLE32(p32, endian) * PRIME32_2; state->v2 = XXH_rotl32(state->v2, 13); state->v2 *= PRIME32_1; p32++; + state->v3 += XXH_readLE32(p32, endian) * PRIME32_2; state->v3 = XXH_rotl32(state->v3, 13); state->v3 *= PRIME32_1; p32++; + state->v4 += XXH_readLE32(p32, endian) * PRIME32_2; state->v4 = XXH_rotl32(state->v4, 13); state->v4 *= PRIME32_1; p32++; + } + p += 16-state->memsize; + state->memsize = 0; + } + + if (p <= bEnd-16) + { + const BYTE* const limit = bEnd - 16; + U32 v1 = state->v1; + U32 v2 = state->v2; + U32 v3 = state->v3; + U32 v4 = state->v4; + + do + { + v1 += XXH_readLE32((const U32*)p, endian) * PRIME32_2; v1 = XXH_rotl32(v1, 13); v1 *= PRIME32_1; p+=4; + v2 += XXH_readLE32((const U32*)p, endian) * PRIME32_2; v2 = XXH_rotl32(v2, 13); v2 *= PRIME32_1; p+=4; + v3 += XXH_readLE32((const U32*)p, endian) * PRIME32_2; v3 = XXH_rotl32(v3, 13); v3 *= PRIME32_1; p+=4; + v4 += XXH_readLE32((const U32*)p, endian) * PRIME32_2; v4 = XXH_rotl32(v4, 13); v4 *= PRIME32_1; p+=4; + } while (p<=limit); + + state->v1 = v1; + state->v2 = v2; + state->v3 = v3; + state->v4 = v4; + } + + if (p < bEnd) + { + XXH_memcpy(state->memory, p, bEnd-p); + state->memsize = (int)(bEnd-p); + } + + return XXH_OK; +} + +XXH_errorcode XXH32_update (void* state_in, const void* input, int len) +{ + XXH_endianess endian_detected = (XXH_endianess)XXH_CPU_LITTLE_ENDIAN; + + if ((endian_detected==XXH_littleEndian) || XXH_FORCE_NATIVE_FORMAT) + return XXH32_update_endian(state_in, input, len, XXH_littleEndian); + else + return XXH32_update_endian(state_in, input, len, XXH_bigEndian); +} + + + +FORCE_INLINE U32 XXH32_intermediateDigest_endian (void* state_in, XXH_endianess endian) +{ + struct XXH_state32_t * state = (struct XXH_state32_t *) state_in; + const BYTE * p = (const BYTE*)state->memory; + BYTE* bEnd = (BYTE*)state->memory + state->memsize; + U32 h32; + + if (state->total_len >= 16) + { + h32 = XXH_rotl32(state->v1, 1) + XXH_rotl32(state->v2, 7) + XXH_rotl32(state->v3, 12) + XXH_rotl32(state->v4, 18); + } + else + { + h32 = state->seed + PRIME32_5; + } + + h32 += (U32) state->total_len; + + while (p<=bEnd-4) + { + h32 += XXH_readLE32((const U32*)p, endian) * PRIME32_3; + h32 = XXH_rotl32(h32, 17) * PRIME32_4; + p+=4; + } + + while (p> 15; + h32 *= PRIME32_2; + h32 ^= h32 >> 13; + h32 *= PRIME32_3; + h32 ^= h32 >> 16; + + return h32; +} + + +U32 XXH32_intermediateDigest (void* state_in) +{ + XXH_endianess endian_detected = (XXH_endianess)XXH_CPU_LITTLE_ENDIAN; + + if ((endian_detected==XXH_littleEndian) || XXH_FORCE_NATIVE_FORMAT) + return XXH32_intermediateDigest_endian(state_in, XXH_littleEndian); + else + return XXH32_intermediateDigest_endian(state_in, XXH_bigEndian); +} + + +U32 XXH32_digest (void* state_in) +{ + U32 h32 = XXH32_intermediateDigest(state_in); + + XXH_free(state_in); + + return h32; +} + +/* ******************************************************************* + * 64-bit hash functions + *********************************************************************/ + + #if (defined(XXH_FORCE_MEMORY_ACCESS) && (XXH_FORCE_MEMORY_ACCESS==2)) + + /* Force direct memory access. Only works on CPU which support unaligned memory access in hardware */ + static U64 XXH_read64(const void* memPtr) { return *(const U64*) memPtr; } + + #elif (defined(XXH_FORCE_MEMORY_ACCESS) && (XXH_FORCE_MEMORY_ACCESS==1)) + + /* __pack instructions are safer, but compiler specific, hence potentially problematic for some compilers */ + /* currently only defined for gcc and icc */ + typedef union { U32 u32; U64 u64; } __attribute__((packed)) unalign64; + static U64 XXH_read64(const void* ptr) { return ((const unalign64*)ptr)->u64; } + + #else + + /* portable and safe solution. Generally efficient. + * see : http://stackoverflow.com/a/32095106/646947 + */ + + static U64 XXH_read64(const void* memPtr) + { + U64 val; + memcpy(&val, memPtr, sizeof(val)); + return val; + } +#endif /* XXH_FORCE_DIRECT_MEMORY_ACCESS */ + +#if defined(_MSC_VER) /* Visual Studio */ +#define XXH_swap64 _byteswap_uint64 +#elif XXH_GCC_VERSION >= 403 +#define XXH_swap64 __builtin_bswap64 +#else +static U64 XXH_swap64(U64 x) { + return ((x << 56) & 0xff00000000000000ULL) | + ((x << 40) & 0x00ff000000000000ULL) | + ((x << 24) & 0x0000ff0000000000ULL) | + ((x << 8) & 0x000000ff00000000ULL) | + ((x >> 8) & 0x00000000ff000000ULL) | + ((x >> 24) & 0x0000000000ff0000ULL) | + ((x >> 40) & 0x000000000000ff00ULL) | + ((x >> 56) & 0x00000000000000ffULL); +} +#endif + +FORCE_INLINE U64 XXH_readLE64_align(const void* ptr, XXH_endianess endian, + XXH_alignment align) { + if (align == XXH_unaligned) + return endian == XXH_littleEndian ? XXH_read64(ptr) + : XXH_swap64(XXH_read64(ptr)); + else + return endian == XXH_littleEndian ? *(const U64*)ptr + : XXH_swap64(*(const U64*)ptr); +} + +FORCE_INLINE U64 XXH_readLE64(const void* ptr, XXH_endianess endian) { + return XXH_readLE64_align(ptr, endian, XXH_unaligned); +} + +static U64 XXH_readBE64(const void* ptr) { + return XXH_CPU_LITTLE_ENDIAN ? XXH_swap64(XXH_read64(ptr)) : XXH_read64(ptr); +} + +/*====== xxh64 ======*/ + +static const U64 PRIME64_1 = + 11400714785074694791ULL; /* 0b1001111000110111011110011011000110000101111010111100101010000111 + */ +static const U64 PRIME64_2 = + 14029467366897019727ULL; /* 0b1100001010110010101011100011110100100111110101001110101101001111 + */ +static const U64 PRIME64_3 = + 1609587929392839161ULL; /* 0b0001011001010110011001111011000110011110001101110111100111111001 + */ +static const U64 PRIME64_4 = + 9650029242287828579ULL; /* 0b1000010111101011110010100111011111000010101100101010111001100011 + */ +static const U64 PRIME64_5 = + 2870177450012600261ULL; /* 0b0010011111010100111010110010111100010110010101100110011111000101 + */ + +static U64 XXH64_round(U64 acc, U64 input) { + acc += input * PRIME64_2; + acc = XXH_rotl64(acc, 31); + acc *= PRIME64_1; + return acc; +} + +static U64 XXH64_mergeRound(U64 acc, U64 val) { + val = XXH64_round(0, val); + acc ^= val; + acc = acc * PRIME64_1 + PRIME64_4; + return acc; +} + +static U64 XXH64_avalanche(U64 h64) { + h64 ^= h64 >> 33; + h64 *= PRIME64_2; + h64 ^= h64 >> 29; + h64 *= PRIME64_3; + h64 ^= h64 >> 32; + return h64; +} + +#define XXH_get64bits(p) XXH_readLE64_align(p, endian, align) + +static U64 XXH64_finalize(U64 h64, const void* ptr, size_t len, + XXH_endianess endian, XXH_alignment align) { + const BYTE* p = (const BYTE*)ptr; + +#define PROCESS1_64 \ + h64 ^= (*p++) * PRIME64_5; \ + h64 = XXH_rotl64(h64, 11) * PRIME64_1; + +#define PROCESS4_64 \ + h64 ^= (U64)(XXH_get32bits(p)) * PRIME64_1; \ + p += 4; \ + h64 = XXH_rotl64(h64, 23) * PRIME64_2 + PRIME64_3; + +#define PROCESS8_64 \ + { \ + U64 const k1 = XXH64_round(0, XXH_get64bits(p)); \ + p += 8; \ + h64 ^= k1; \ + h64 = XXH_rotl64(h64, 27) * PRIME64_1 + PRIME64_4; \ + } + + switch (len & 31) { + case 24: + PROCESS8_64; + FALLTHROUGH_INTENDED; + /* fallthrough */ + case 16: + PROCESS8_64; + FALLTHROUGH_INTENDED; + /* fallthrough */ + case 8: + PROCESS8_64; + return XXH64_avalanche(h64); + + case 28: + PROCESS8_64; + FALLTHROUGH_INTENDED; + /* fallthrough */ + case 20: + PROCESS8_64; + FALLTHROUGH_INTENDED; + /* fallthrough */ + case 12: + PROCESS8_64; + FALLTHROUGH_INTENDED; + /* fallthrough */ + case 4: + PROCESS4_64; + return XXH64_avalanche(h64); + + case 25: + PROCESS8_64; + FALLTHROUGH_INTENDED; + /* fallthrough */ + case 17: + PROCESS8_64; + FALLTHROUGH_INTENDED; + /* fallthrough */ + case 9: + PROCESS8_64; + PROCESS1_64; + return XXH64_avalanche(h64); + + case 29: + PROCESS8_64; + FALLTHROUGH_INTENDED; + /* fallthrough */ + case 21: + PROCESS8_64; + FALLTHROUGH_INTENDED; + /* fallthrough */ + case 13: + PROCESS8_64; + FALLTHROUGH_INTENDED; + /* fallthrough */ + case 5: + PROCESS4_64; + PROCESS1_64; + return XXH64_avalanche(h64); + + case 26: + PROCESS8_64; + FALLTHROUGH_INTENDED; + /* fallthrough */ + case 18: + PROCESS8_64; + FALLTHROUGH_INTENDED; + /* fallthrough */ + case 10: + PROCESS8_64; + PROCESS1_64; + PROCESS1_64; + return XXH64_avalanche(h64); + + case 30: + PROCESS8_64; + FALLTHROUGH_INTENDED; + /* fallthrough */ + case 22: + PROCESS8_64; + FALLTHROUGH_INTENDED; + /* fallthrough */ + case 14: + PROCESS8_64; + FALLTHROUGH_INTENDED; + /* fallthrough */ + case 6: + PROCESS4_64; + PROCESS1_64; + PROCESS1_64; + return XXH64_avalanche(h64); + + case 27: + PROCESS8_64; + FALLTHROUGH_INTENDED; + /* fallthrough */ + case 19: + PROCESS8_64; + FALLTHROUGH_INTENDED; + /* fallthrough */ + case 11: + PROCESS8_64; + PROCESS1_64; + PROCESS1_64; + PROCESS1_64; + return XXH64_avalanche(h64); + + case 31: + PROCESS8_64; + FALLTHROUGH_INTENDED; + /* fallthrough */ + case 23: + PROCESS8_64; + FALLTHROUGH_INTENDED; + /* fallthrough */ + case 15: + PROCESS8_64; + FALLTHROUGH_INTENDED; + /* fallthrough */ + case 7: + PROCESS4_64; + FALLTHROUGH_INTENDED; + /* fallthrough */ + case 3: + PROCESS1_64; + FALLTHROUGH_INTENDED; + /* fallthrough */ + case 2: + PROCESS1_64; + FALLTHROUGH_INTENDED; + /* fallthrough */ + case 1: + PROCESS1_64; + FALLTHROUGH_INTENDED; + /* fallthrough */ + case 0: + return XXH64_avalanche(h64); + } + + /* impossible to reach */ + assert(0); + return 0; /* unreachable, but some compilers complain without it */ +} + +FORCE_INLINE U64 XXH64_endian_align(const void* input, size_t len, U64 seed, + XXH_endianess endian, XXH_alignment align) { + const BYTE* p = (const BYTE*)input; + const BYTE* bEnd = p + len; + U64 h64; + +#if defined(XXH_ACCEPT_NULL_INPUT_POINTER) && \ + (XXH_ACCEPT_NULL_INPUT_POINTER >= 1) + if (p == NULL) { + len = 0; + bEnd = p = (const BYTE*)(size_t)32; + } +#endif + + if (len >= 32) { + const BYTE* const limit = bEnd - 32; + U64 v1 = seed + PRIME64_1 + PRIME64_2; + U64 v2 = seed + PRIME64_2; + U64 v3 = seed + 0; + U64 v4 = seed - PRIME64_1; + + do { + v1 = XXH64_round(v1, XXH_get64bits(p)); + p += 8; + v2 = XXH64_round(v2, XXH_get64bits(p)); + p += 8; + v3 = XXH64_round(v3, XXH_get64bits(p)); + p += 8; + v4 = XXH64_round(v4, XXH_get64bits(p)); + p += 8; + } while (p <= limit); + + h64 = XXH_rotl64(v1, 1) + XXH_rotl64(v2, 7) + XXH_rotl64(v3, 12) + + XXH_rotl64(v4, 18); + h64 = XXH64_mergeRound(h64, v1); + h64 = XXH64_mergeRound(h64, v2); + h64 = XXH64_mergeRound(h64, v3); + h64 = XXH64_mergeRound(h64, v4); + + } else { + h64 = seed + PRIME64_5; + } + + h64 += (U64)len; + + return XXH64_finalize(h64, p, len, endian, align); +} + +unsigned long long XXH64(const void* input, size_t len, + unsigned long long seed) { +#if 0 + /* Simple version, good for code maintenance, but unfortunately slow for small inputs */ + XXH64_state_t state; + XXH64_reset(&state, seed); + XXH64_update(&state, input, len); + return XXH64_digest(&state); +#else + XXH_endianess endian_detected = (XXH_endianess)XXH_CPU_LITTLE_ENDIAN; + + if (XXH_FORCE_ALIGN_CHECK) { + if ((((size_t)input) & 7) == + 0) { /* Input is aligned, let's leverage the speed advantage */ + if ((endian_detected == XXH_littleEndian) || XXH_FORCE_NATIVE_FORMAT) + return XXH64_endian_align(input, len, seed, XXH_littleEndian, + XXH_aligned); + else + return XXH64_endian_align(input, len, seed, XXH_bigEndian, XXH_aligned); + } + } + + if ((endian_detected == XXH_littleEndian) || XXH_FORCE_NATIVE_FORMAT) + return XXH64_endian_align(input, len, seed, XXH_littleEndian, + XXH_unaligned); + else + return XXH64_endian_align(input, len, seed, XXH_bigEndian, XXH_unaligned); +#endif +} + +/*====== Hash Streaming ======*/ + +XXH64_state_t* XXH64_createState(void) { + return (XXH64_state_t*)XXH_malloc(sizeof(XXH64_state_t)); +} +XXH_errorcode XXH64_freeState(XXH64_state_t* statePtr) { + XXH_free(statePtr); + return XXH_OK; +} + +void XXH64_copyState(XXH64_state_t* dstState, const XXH64_state_t* srcState) { + memcpy(dstState, srcState, sizeof(*dstState)); +} + +XXH_errorcode XXH64_reset(XXH64_state_t* statePtr, unsigned long long seed) { + XXH64_state_t state; /* using a local state to memcpy() in order to avoid + strict-aliasing warnings */ + memset(&state, 0, sizeof(state)); + state.v1 = seed + PRIME64_1 + PRIME64_2; + state.v2 = seed + PRIME64_2; + state.v3 = seed + 0; + state.v4 = seed - PRIME64_1; + /* do not write into reserved, planned to be removed in a future version */ + memcpy(statePtr, &state, sizeof(state) - sizeof(state.reserved)); + return XXH_OK; +} + +FORCE_INLINE XXH_errorcode XXH64_update_endian(XXH64_state_t* state, + const void* input, size_t len, + XXH_endianess endian) { + if (input == NULL) +#if defined(XXH_ACCEPT_NULL_INPUT_POINTER) && \ + (XXH_ACCEPT_NULL_INPUT_POINTER >= 1) + return XXH_OK; +#else + return XXH_ERROR; +#endif + + { + const BYTE* p = (const BYTE*)input; + const BYTE* const bEnd = p + len; + + state->total_len += len; + + if (state->memsize + len < 32) { /* fill in tmp buffer */ + XXH_memcpy(((BYTE*)state->mem64) + state->memsize, input, len); + state->memsize += (U32)len; + return XXH_OK; + } + + if (state->memsize) { /* tmp buffer is full */ + XXH_memcpy(((BYTE*)state->mem64) + state->memsize, input, + 32 - state->memsize); + state->v1 = + XXH64_round(state->v1, XXH_readLE64(state->mem64 + 0, endian)); + state->v2 = + XXH64_round(state->v2, XXH_readLE64(state->mem64 + 1, endian)); + state->v3 = + XXH64_round(state->v3, XXH_readLE64(state->mem64 + 2, endian)); + state->v4 = + XXH64_round(state->v4, XXH_readLE64(state->mem64 + 3, endian)); + p += 32 - state->memsize; + state->memsize = 0; + } + + if (p + 32 <= bEnd) { + const BYTE* const limit = bEnd - 32; + U64 v1 = state->v1; + U64 v2 = state->v2; + U64 v3 = state->v3; + U64 v4 = state->v4; + + do { + v1 = XXH64_round(v1, XXH_readLE64(p, endian)); + p += 8; + v2 = XXH64_round(v2, XXH_readLE64(p, endian)); + p += 8; + v3 = XXH64_round(v3, XXH_readLE64(p, endian)); + p += 8; + v4 = XXH64_round(v4, XXH_readLE64(p, endian)); + p += 8; + } while (p <= limit); + + state->v1 = v1; + state->v2 = v2; + state->v3 = v3; + state->v4 = v4; + } + + if (p < bEnd) { + XXH_memcpy(state->mem64, p, (size_t)(bEnd - p)); + state->memsize = (unsigned)(bEnd - p); + } + } + + return XXH_OK; +} + +XXH_errorcode XXH64_update(XXH64_state_t* state_in, const void* input, + size_t len) { + XXH_endianess endian_detected = (XXH_endianess)XXH_CPU_LITTLE_ENDIAN; + + if ((endian_detected == XXH_littleEndian) || XXH_FORCE_NATIVE_FORMAT) + return XXH64_update_endian(state_in, input, len, XXH_littleEndian); + else + return XXH64_update_endian(state_in, input, len, XXH_bigEndian); +} + +FORCE_INLINE U64 XXH64_digest_endian(const XXH64_state_t* state, + XXH_endianess endian) { + U64 h64; + + if (state->total_len >= 32) { + U64 const v1 = state->v1; + U64 const v2 = state->v2; + U64 const v3 = state->v3; + U64 const v4 = state->v4; + + h64 = XXH_rotl64(v1, 1) + XXH_rotl64(v2, 7) + XXH_rotl64(v3, 12) + + XXH_rotl64(v4, 18); + h64 = XXH64_mergeRound(h64, v1); + h64 = XXH64_mergeRound(h64, v2); + h64 = XXH64_mergeRound(h64, v3); + h64 = XXH64_mergeRound(h64, v4); + } else { + h64 = state->v3 /*seed*/ + PRIME64_5; + } + + h64 += (U64)state->total_len; + + return XXH64_finalize(h64, state->mem64, (size_t)state->total_len, endian, + XXH_aligned); +} + +unsigned long long XXH64_digest(const XXH64_state_t* state_in) { + XXH_endianess endian_detected = (XXH_endianess)XXH_CPU_LITTLE_ENDIAN; + + if ((endian_detected == XXH_littleEndian) || XXH_FORCE_NATIVE_FORMAT) + return XXH64_digest_endian(state_in, XXH_littleEndian); + else + return XXH64_digest_endian(state_in, XXH_bigEndian); +} + +/*====== Canonical representation ======*/ + +void XXH64_canonicalFromHash(XXH64_canonical_t* dst, XXH64_hash_t hash) { + XXH_STATIC_ASSERT(sizeof(XXH64_canonical_t) == sizeof(XXH64_hash_t)); + if (XXH_CPU_LITTLE_ENDIAN) hash = XXH_swap64(hash); + memcpy(dst, &hash, sizeof(*dst)); +} + +XXH64_hash_t XXH64_hashFromCanonical(const XXH64_canonical_t* src) { + return XXH_readBE64(src); +} +} // namespace rocksdb diff --git a/3rdParty/rocksdb/v5.16.X/util/xxhash.h b/3rdParty/rocksdb/v5.18.X/util/xxhash.h similarity index 70% rename from 3rdParty/rocksdb/v5.16.X/util/xxhash.h rename to 3rdParty/rocksdb/v5.18.X/util/xxhash.h index 3343e3488f..88352ac75f 100644 --- a/3rdParty/rocksdb/v5.16.X/util/xxhash.h +++ b/3rdParty/rocksdb/v5.18.X/util/xxhash.h @@ -59,6 +59,14 @@ It depends on successfully passing SMHasher test set. #pragma once +#include + +#if !defined(__VMS) && \ + (defined(__cplusplus) || \ + (defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) /* C99 */)) +#include +#endif + #if defined (__cplusplus) namespace rocksdb { #endif @@ -67,6 +75,7 @@ namespace rocksdb { //**************************** // Type //**************************** +/* size_t */ typedef enum { XXH_OK=0, XXH_ERROR } XXH_errorcode; @@ -157,7 +166,74 @@ To free memory context, use XXH32_digest(), or free(). #define XXH32_result XXH32_digest #define XXH32_getIntermediateResult XXH32_intermediateDigest +/*-********************************************************************** + * 64-bit hash + ************************************************************************/ +typedef unsigned long long XXH64_hash_t; +/*! XXH64() : + Calculate the 64-bit hash of sequence of length "len" stored at memory + address "input". "seed" can be used to alter the result predictably. This + function runs faster on 64-bit systems, but slower on 32-bit systems (see + benchmark). +*/ +XXH64_hash_t XXH64(const void* input, size_t length, unsigned long long seed); + +/*====== Streaming ======*/ +typedef struct XXH64_state_s XXH64_state_t; /* incomplete type */ +XXH64_state_t* XXH64_createState(void); +XXH_errorcode XXH64_freeState(XXH64_state_t* statePtr); +void XXH64_copyState(XXH64_state_t* dst_state, const XXH64_state_t* src_state); + +XXH_errorcode XXH64_reset(XXH64_state_t* statePtr, unsigned long long seed); +XXH_errorcode XXH64_update(XXH64_state_t* statePtr, const void* input, + size_t length); +XXH64_hash_t XXH64_digest(const XXH64_state_t* statePtr); + +/*====== Canonical representation ======*/ +typedef struct { + unsigned char digest[8]; +} XXH64_canonical_t; +void XXH64_canonicalFromHash(XXH64_canonical_t* dst, XXH64_hash_t hash); +XXH64_hash_t XXH64_hashFromCanonical(const XXH64_canonical_t* src); + +/* These definitions are only present to allow + * static allocation of XXH state, on stack or in a struct for example. + * Never **ever** use members directly. */ + +#if !defined(__VMS) && \ + (defined(__cplusplus) || \ + (defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) /* C99 */)) + +struct XXH64_state_s { + uint64_t total_len; + uint64_t v1; + uint64_t v2; + uint64_t v3; + uint64_t v4; + uint64_t mem64[4]; + uint32_t memsize; + uint32_t reserved[2]; /* never read nor write, might be removed in a future + version */ +}; /* typedef'd to XXH64_state_t */ + +#else + +#ifndef XXH_NO_LONG_LONG /* remove 64-bit support */ +struct XXH64_state_s { + unsigned long long total_len; + unsigned long long v1; + unsigned long long v2; + unsigned long long v3; + unsigned long long v4; + unsigned long long mem64[4]; + unsigned memsize; + unsigned reserved[2]; /* never read nor write, might be removed in a future + version */ +}; /* typedef'd to XXH64_state_t */ +#endif + +#endif #if defined (__cplusplus) } // namespace rocksdb diff --git a/3rdParty/rocksdb/v5.16.X/utilities/backupable/backupable_db.cc b/3rdParty/rocksdb/v5.18.X/utilities/backupable/backupable_db.cc similarity index 92% rename from 3rdParty/rocksdb/v5.16.X/utilities/backupable/backupable_db.cc rename to 3rdParty/rocksdb/v5.18.X/utilities/backupable/backupable_db.cc index 75a65c89a3..78def188cf 100644 --- a/3rdParty/rocksdb/v5.16.X/utilities/backupable/backupable_db.cc +++ b/3rdParty/rocksdb/v5.18.X/utilities/backupable/backupable_db.cc @@ -305,16 +305,16 @@ class BackupEngineImpl : public BackupEngine { // @param contents If non-empty, the file will be created with these contents. Status CopyOrCreateFile(const std::string& src, const std::string& dst, const std::string& contents, Env* src_env, - Env* dst_env, bool sync, RateLimiter* rate_limiter, + Env* dst_env, const EnvOptions& src_env_options, + bool sync, RateLimiter* rate_limiter, uint64_t* size = nullptr, uint32_t* checksum_value = nullptr, uint64_t size_limit = 0, std::function progress_callback = []() {}); - Status CalculateChecksum(const std::string& src, - Env* src_env, - uint64_t size_limit, - uint32_t* checksum_value); + Status CalculateChecksum(const std::string& src, Env* src_env, + const EnvOptions& src_env_options, + uint64_t size_limit, uint32_t* checksum_value); struct CopyOrCreateResult { uint64_t size; @@ -331,6 +331,7 @@ class BackupEngineImpl : public BackupEngine { std::string contents; Env* src_env; Env* dst_env; + EnvOptions src_env_options; bool sync; RateLimiter* rate_limiter; uint64_t size_limit; @@ -338,14 +339,15 @@ class BackupEngineImpl : public BackupEngine { std::function progress_callback; CopyOrCreateWorkItem() - : src_path(""), - dst_path(""), - contents(""), - src_env(nullptr), - dst_env(nullptr), - sync(false), - rate_limiter(nullptr), - size_limit(0) {} + : src_path(""), + dst_path(""), + contents(""), + src_env(nullptr), + dst_env(nullptr), + src_env_options(), + sync(false), + rate_limiter(nullptr), + size_limit(0) {} CopyOrCreateWorkItem(const CopyOrCreateWorkItem&) = delete; CopyOrCreateWorkItem& operator=(const CopyOrCreateWorkItem&) = delete; @@ -360,6 +362,7 @@ class BackupEngineImpl : public BackupEngine { contents = std::move(o.contents); src_env = o.src_env; dst_env = o.dst_env; + src_env_options = std::move(o.src_env_options); sync = o.sync; rate_limiter = o.rate_limiter; size_limit = o.size_limit; @@ -370,14 +373,15 @@ class BackupEngineImpl : public BackupEngine { CopyOrCreateWorkItem(std::string _src_path, std::string _dst_path, std::string _contents, Env* _src_env, Env* _dst_env, - bool _sync, RateLimiter* _rate_limiter, - uint64_t _size_limit, + EnvOptions _src_env_options, bool _sync, + RateLimiter* _rate_limiter, uint64_t _size_limit, std::function _progress_callback = []() {}) : src_path(std::move(_src_path)), dst_path(std::move(_dst_path)), contents(std::move(_contents)), src_env(_src_env), dst_env(_dst_env), + src_env_options(std::move(_src_env_options)), sync(_sync), rate_limiter(_rate_limiter), size_limit(_size_limit), @@ -471,7 +475,8 @@ class BackupEngineImpl : public BackupEngine { std::vector& backup_items_to_finish, BackupID backup_id, bool shared, const std::string& src_dir, const std::string& fname, // starts with "/" - RateLimiter* rate_limiter, uint64_t size_bytes, uint64_t size_limit = 0, + const EnvOptions& src_env_options, RateLimiter* rate_limiter, + uint64_t size_bytes, uint64_t size_limit = 0, bool shared_checksum = false, std::function progress_callback = []() {}, const std::string& contents = std::string()); @@ -479,9 +484,9 @@ class BackupEngineImpl : public BackupEngine { // backup state data BackupID latest_backup_id_; BackupID latest_valid_backup_id_; - std::map> backups_; - std::map>> corrupt_backups_; + std::map> backups_; + std::map>> + corrupt_backups_; std::unordered_map> backuped_file_infos_; std::atomic stop_backup_; @@ -492,10 +497,10 @@ class BackupEngineImpl : public BackupEngine { Env* backup_env_; // directories - unique_ptr backup_directory_; - unique_ptr shared_directory_; - unique_ptr meta_directory_; - unique_ptr private_directory_; + std::unique_ptr backup_directory_; + std::unique_ptr shared_directory_; + std::unique_ptr meta_directory_; + std::unique_ptr private_directory_; static const size_t kDefaultCopyFileBufferSize = 5 * 1024 * 1024LL; // 5MB size_t copy_file_buffer_size_; @@ -616,7 +621,7 @@ Status BackupEngineImpl::Initialize() { } assert(backups_.find(backup_id) == backups_.end()); backups_.insert(std::make_pair( - backup_id, unique_ptr(new BackupMeta( + backup_id, std::unique_ptr(new BackupMeta( GetBackupMetaFile(backup_id, false /* tmp */), GetBackupMetaFile(backup_id, true /* tmp */), &backuped_file_infos_, backup_env_)))); @@ -723,9 +728,10 @@ Status BackupEngineImpl::Initialize() { CopyOrCreateResult result; result.status = CopyOrCreateFile( work_item.src_path, work_item.dst_path, work_item.contents, - work_item.src_env, work_item.dst_env, work_item.sync, - work_item.rate_limiter, &result.size, &result.checksum_value, - work_item.size_limit, work_item.progress_callback); + work_item.src_env, work_item.dst_env, work_item.src_env_options, + work_item.sync, work_item.rate_limiter, &result.size, + &result.checksum_value, work_item.size_limit, + work_item.progress_callback); work_item.result.set_value(std::move(result)); } }); @@ -761,7 +767,7 @@ Status BackupEngineImpl::CreateNewBackupWithMetadata( } auto ret = backups_.insert(std::make_pair( - new_backup_id, unique_ptr(new BackupMeta( + new_backup_id, std::unique_ptr(new BackupMeta( GetBackupMetaFile(new_backup_id, false /* tmp */), GetBackupMetaFile(new_backup_id, true /* tmp */), &backuped_file_infos_, backup_env_)))); @@ -781,7 +787,7 @@ Status BackupEngineImpl::CreateNewBackupWithMetadata( RateLimiter* rate_limiter = options_.backup_rate_limiter.get(); if (rate_limiter) { - copy_file_buffer_size_ = rate_limiter->GetSingleBurstBytes(); + copy_file_buffer_size_ = static_cast(rate_limiter->GetSingleBurstBytes()); } // A set into which we will insert the dst_paths that are calculated for live @@ -796,8 +802,10 @@ Status BackupEngineImpl::CreateNewBackupWithMetadata( if (s.ok()) { CheckpointImpl checkpoint(db); uint64_t sequence_number = 0; + DBOptions db_options = db->GetDBOptions(); + EnvOptions src_raw_env_options(db_options); s = checkpoint.CreateCustomCheckpoint( - db->GetDBOptions(), + db_options, [&](const std::string& /*src_dirname*/, const std::string& /*fname*/, FileType) { // custom checkpoint will switch to calling copy_file_cb after it sees @@ -815,11 +823,33 @@ Status BackupEngineImpl::CreateNewBackupWithMetadata( if (type == kTableFile) { st = db_env_->GetFileSize(src_dirname + fname, &size_bytes); } + EnvOptions src_env_options; + switch (type) { + case kLogFile: + src_env_options = + db_env_->OptimizeForLogRead(src_raw_env_options); + break; + case kTableFile: + src_env_options = db_env_->OptimizeForCompactionTableRead( + src_raw_env_options, ImmutableDBOptions(db_options)); + break; + case kDescriptorFile: + src_env_options = + db_env_->OptimizeForManifestRead(src_raw_env_options); + break; + default: + // Other backed up files (like options file) are not read by live + // DB, so don't need to worry about avoiding mixing buffered and + // direct I/O. Just use plain defaults. + src_env_options = src_raw_env_options; + break; + } if (st.ok()) { st = AddBackupFileWorkItem( live_dst_paths, backup_items_to_finish, new_backup_id, options_.share_table_files && type == kTableFile, src_dirname, - fname, rate_limiter, size_bytes, size_limit_bytes, + fname, src_env_options, rate_limiter, size_bytes, + size_limit_bytes, options_.share_files_with_checksum && type == kTableFile, progress_callback); } @@ -829,8 +859,9 @@ Status BackupEngineImpl::CreateNewBackupWithMetadata( Log(options_.info_log, "add file for backup %s", fname.c_str()); return AddBackupFileWorkItem( live_dst_paths, backup_items_to_finish, new_backup_id, - false /* shared */, "" /* src_dir */, fname, rate_limiter, - contents.size(), 0 /* size_limit */, false /* shared_checksum */, + false /* shared */, "" /* src_dir */, fname, + EnvOptions() /* src_env_options */, rate_limiter, contents.size(), + 0 /* size_limit */, false /* shared_checksum */, progress_callback, contents); } /* create_file_cb */, &sequence_number, flush_before_backup ? 0 : port::kMaxUint64); @@ -869,7 +900,7 @@ Status BackupEngineImpl::CreateNewBackupWithMetadata( s = new_backup->StoreToFile(options_.sync); } if (s.ok() && options_.sync) { - unique_ptr backup_private_directory; + std::unique_ptr backup_private_directory; backup_env_->NewDirectory( GetAbsolutePath(GetPrivateFileRel(new_backup_id, false)), &backup_private_directory); @@ -1078,7 +1109,7 @@ Status BackupEngineImpl::RestoreDBFromBackup( RateLimiter* rate_limiter = options_.restore_rate_limiter.get(); if (rate_limiter) { - copy_file_buffer_size_ = rate_limiter->GetSingleBurstBytes(); + copy_file_buffer_size_ = static_cast(rate_limiter->GetSingleBurstBytes()); } Status s; std::vector restore_items_to_finish; @@ -1114,7 +1145,8 @@ Status BackupEngineImpl::RestoreDBFromBackup( dst.c_str()); CopyOrCreateWorkItem copy_or_create_work_item( GetAbsolutePath(file), dst, "" /* contents */, backup_env_, db_env_, - false, rate_limiter, 0 /* size_limit */); + EnvOptions() /* src_env_options */, false, rate_limiter, + 0 /* size_limit */); RestoreAfterCopyOrCreateWorkItem after_copy_or_create_work_item( copy_or_create_work_item.result.get_future(), file_info->checksum_value); @@ -1183,15 +1215,15 @@ Status BackupEngineImpl::VerifyBackup(BackupID backup_id) { Status BackupEngineImpl::CopyOrCreateFile( const std::string& src, const std::string& dst, const std::string& contents, - Env* src_env, Env* dst_env, bool sync, RateLimiter* rate_limiter, - uint64_t* size, uint32_t* checksum_value, uint64_t size_limit, - std::function progress_callback) { + Env* src_env, Env* dst_env, const EnvOptions& src_env_options, bool sync, + RateLimiter* rate_limiter, uint64_t* size, uint32_t* checksum_value, + uint64_t size_limit, std::function progress_callback) { assert(src.empty() != contents.empty()); Status s; - unique_ptr dst_file; - unique_ptr src_file; - EnvOptions env_options; - env_options.use_mmap_writes = false; + std::unique_ptr dst_file; + std::unique_ptr src_file; + EnvOptions dst_env_options; + dst_env_options.use_mmap_writes = false; // TODO:(gzh) maybe use direct reads/writes here if possible if (size != nullptr) { *size = 0; @@ -1205,18 +1237,18 @@ Status BackupEngineImpl::CopyOrCreateFile( size_limit = std::numeric_limits::max(); } - s = dst_env->NewWritableFile(dst, &dst_file, env_options); + s = dst_env->NewWritableFile(dst, &dst_file, dst_env_options); if (s.ok() && !src.empty()) { - s = src_env->NewSequentialFile(src, &src_file, env_options); + s = src_env->NewSequentialFile(src, &src_file, src_env_options); } if (!s.ok()) { return s; } - unique_ptr dest_writer( - new WritableFileWriter(std::move(dst_file), env_options)); - unique_ptr src_reader; - unique_ptr buf; + std::unique_ptr dest_writer( + new WritableFileWriter(std::move(dst_file), dst, dst_env_options)); + std::unique_ptr src_reader; + std::unique_ptr buf; if (!src.empty()) { src_reader.reset(new SequentialFileReader(std::move(src_file), src)); buf.reset(new char[copy_file_buffer_size_]); @@ -1231,7 +1263,7 @@ Status BackupEngineImpl::CopyOrCreateFile( if (!src.empty()) { size_t buffer_to_read = (copy_file_buffer_size_ < size_limit) ? copy_file_buffer_size_ - : size_limit; + : static_cast(size_limit); s = src_reader->Read(buffer_to_read, &data, buf.get()); processed_buffer_size += buffer_to_read; } else { @@ -1276,9 +1308,10 @@ Status BackupEngineImpl::AddBackupFileWorkItem( std::unordered_set& live_dst_paths, std::vector& backup_items_to_finish, BackupID backup_id, bool shared, const std::string& src_dir, - const std::string& fname, RateLimiter* rate_limiter, uint64_t size_bytes, - uint64_t size_limit, bool shared_checksum, - std::function progress_callback, const std::string& contents) { + const std::string& fname, const EnvOptions& src_env_options, + RateLimiter* rate_limiter, uint64_t size_bytes, uint64_t size_limit, + bool shared_checksum, std::function progress_callback, + const std::string& contents) { assert(!fname.empty() && fname[0] == '/'); assert(contents.empty() != src_dir.empty()); @@ -1289,7 +1322,7 @@ Status BackupEngineImpl::AddBackupFileWorkItem( if (shared && shared_checksum) { // add checksum and file length to the file name - s = CalculateChecksum(src_dir + fname, db_env_, size_limit, + s = CalculateChecksum(src_dir + fname, db_env_, src_env_options, size_limit, &checksum_value); if (!s.ok()) { return s; @@ -1365,8 +1398,8 @@ Status BackupEngineImpl::AddBackupFileWorkItem( // the file is present and referenced by a backup ROCKS_LOG_INFO(options_.info_log, "%s already present, calculate checksum", fname.c_str()); - s = CalculateChecksum(src_dir + fname, db_env_, size_limit, - &checksum_value); + s = CalculateChecksum(src_dir + fname, db_env_, src_env_options, + size_limit, &checksum_value); } } live_dst_paths.insert(final_dest_path); @@ -1376,8 +1409,8 @@ Status BackupEngineImpl::AddBackupFileWorkItem( copy_dest_path->c_str()); CopyOrCreateWorkItem copy_or_create_work_item( src_dir.empty() ? "" : src_dir + fname, *copy_dest_path, contents, - db_env_, backup_env_, options_.sync, rate_limiter, size_limit, - progress_callback); + db_env_, backup_env_, src_env_options, options_.sync, rate_limiter, + size_limit, progress_callback); BackupAfterCopyOrCreateWorkItem after_copy_or_create_work_item( copy_or_create_work_item.result.get_future(), shared, need_to_copy, backup_env_, temp_dest_path, final_dest_path, dst_relative); @@ -1399,6 +1432,7 @@ Status BackupEngineImpl::AddBackupFileWorkItem( } Status BackupEngineImpl::CalculateChecksum(const std::string& src, Env* src_env, + const EnvOptions& src_env_options, uint64_t size_limit, uint32_t* checksum_value) { *checksum_value = 0; @@ -1406,17 +1440,13 @@ Status BackupEngineImpl::CalculateChecksum(const std::string& src, Env* src_env, size_limit = std::numeric_limits::max(); } - EnvOptions env_options; - env_options.use_mmap_writes = false; - env_options.use_direct_reads = false; - std::unique_ptr src_file; - Status s = src_env->NewSequentialFile(src, &src_file, env_options); + Status s = src_env->NewSequentialFile(src, &src_file, src_env_options); if (!s.ok()) { return s; } - unique_ptr src_reader( + std::unique_ptr src_reader( new SequentialFileReader(std::move(src_file), src)); std::unique_ptr buf(new char[copy_file_buffer_size_]); Slice data; @@ -1426,7 +1456,7 @@ Status BackupEngineImpl::CalculateChecksum(const std::string& src, Env* src_env, return Status::Incomplete("Backup stopped"); } size_t buffer_to_read = (copy_file_buffer_size_ < size_limit) ? - copy_file_buffer_size_ : size_limit; + copy_file_buffer_size_ : static_cast(size_limit); s = src_reader->Read(buffer_to_read, &data, buf.get()); if (!s.ok()) { @@ -1634,15 +1664,15 @@ Status BackupEngineImpl::BackupMeta::LoadFromFile( const std::unordered_map& abs_path_to_size) { assert(Empty()); Status s; - unique_ptr backup_meta_file; + std::unique_ptr backup_meta_file; s = env_->NewSequentialFile(meta_filename_, &backup_meta_file, EnvOptions()); if (!s.ok()) { return s; } - unique_ptr backup_meta_reader( + std::unique_ptr backup_meta_reader( new SequentialFileReader(std::move(backup_meta_file), meta_filename_)); - unique_ptr buf(new char[max_backup_meta_file_size_ + 1]); + std::unique_ptr buf(new char[max_backup_meta_file_size_ + 1]); Slice data; s = backup_meta_reader->Read(max_backup_meta_file_size_, &data, buf.get()); @@ -1736,7 +1766,7 @@ Status BackupEngineImpl::BackupMeta::LoadFromFile( Status BackupEngineImpl::BackupMeta::StoreToFile(bool sync) { Status s; - unique_ptr backup_meta_file; + std::unique_ptr backup_meta_file; EnvOptions env_options; env_options.use_mmap_writes = false; env_options.use_direct_writes = false; @@ -1745,7 +1775,7 @@ Status BackupEngineImpl::BackupMeta::StoreToFile(bool sync) { return s; } - unique_ptr buf(new char[max_backup_meta_file_size_]); + std::unique_ptr buf(new char[max_backup_meta_file_size_]); size_t len = 0, buf_size = max_backup_meta_file_size_; len += snprintf(buf.get(), buf_size, "%" PRId64 "\n", timestamp_); len += snprintf(buf.get() + len, buf_size - len, "%" PRIu64 "\n", @@ -1762,7 +1792,8 @@ Status BackupEngineImpl::BackupMeta::StoreToFile(bool sync) { else if (len + hex_meta_strlen >= buf_size) { backup_meta_file->Append(Slice(buf.get(), len)); buf.reset(); - unique_ptr new_reset_buf(new char[max_backup_meta_file_size_]); + std::unique_ptr new_reset_buf( + new char[max_backup_meta_file_size_]); buf.swap(new_reset_buf); len = 0; } @@ -1776,7 +1807,7 @@ Status BackupEngineImpl::BackupMeta::StoreToFile(bool sync) { "%" ROCKSDB_PRIszt "\n", files_.size()) >= buf_size) { backup_meta_file->Append(Slice(buf.get(), len)); buf.reset(); - unique_ptr new_reset_buf(new char[max_backup_meta_file_size_]); + std::unique_ptr new_reset_buf(new char[max_backup_meta_file_size_]); buf.swap(new_reset_buf); len = 0; } @@ -1794,7 +1825,8 @@ Status BackupEngineImpl::BackupMeta::StoreToFile(bool sync) { if (newlen >= buf_size) { backup_meta_file->Append(Slice(buf.get(), len)); buf.reset(); - unique_ptr new_reset_buf(new char[max_backup_meta_file_size_]); + std::unique_ptr new_reset_buf( + new char[max_backup_meta_file_size_]); buf.swap(new_reset_buf); len = 0; } diff --git a/3rdParty/rocksdb/v5.16.X/utilities/backupable/backupable_db_test.cc b/3rdParty/rocksdb/v5.18.X/utilities/backupable/backupable_db_test.cc similarity index 92% rename from 3rdParty/rocksdb/v5.16.X/utilities/backupable/backupable_db_test.cc rename to 3rdParty/rocksdb/v5.18.X/utilities/backupable/backupable_db_test.cc index 9fdc058fd0..26ff00e91a 100644 --- a/3rdParty/rocksdb/v5.16.X/utilities/backupable/backupable_db_test.cc +++ b/3rdParty/rocksdb/v5.18.X/utilities/backupable/backupable_db_test.cc @@ -179,7 +179,8 @@ class TestEnv : public EnvWrapper { bool fail_reads_; }; - Status NewSequentialFile(const std::string& f, unique_ptr* r, + Status NewSequentialFile(const std::string& f, + std::unique_ptr* r, const EnvOptions& options) override { MutexLock l(&mutex_); if (dummy_sequential_file_) { @@ -187,11 +188,18 @@ class TestEnv : public EnvWrapper { new TestEnv::DummySequentialFile(dummy_sequential_file_fail_reads_)); return Status::OK(); } else { - return EnvWrapper::NewSequentialFile(f, r, options); + Status s = EnvWrapper::NewSequentialFile(f, r, options); + if (s.ok()) { + if ((*r)->use_direct_io()) { + ++num_direct_seq_readers_; + } + ++num_seq_readers_; + } + return s; } } - Status NewWritableFile(const std::string& f, unique_ptr* r, + Status NewWritableFile(const std::string& f, std::unique_ptr* r, const EnvOptions& options) override { MutexLock l(&mutex_); written_files_.push_back(f); @@ -199,7 +207,28 @@ class TestEnv : public EnvWrapper { return Status::NotSupported("Sorry, can't do this"); } limit_written_files_--; - return EnvWrapper::NewWritableFile(f, r, options); + Status s = EnvWrapper::NewWritableFile(f, r, options); + if (s.ok()) { + if ((*r)->use_direct_io()) { + ++num_direct_writers_; + } + ++num_writers_; + } + return s; + } + + virtual Status NewRandomAccessFile(const std::string& fname, + unique_ptr* result, + const EnvOptions& options) override { + MutexLock l(&mutex_); + Status s = EnvWrapper::NewRandomAccessFile(fname, result, options); + if (s.ok()) { + if ((*result)->use_direct_io()) { + ++num_direct_rand_readers_; + } + ++num_rand_readers_; + } + return s; } virtual Status DeleteFile(const std::string& fname) override { @@ -308,13 +337,30 @@ class TestEnv : public EnvWrapper { void SetNewDirectoryFailure(bool fail) { new_directory_failure_ = fail; } virtual Status NewDirectory(const std::string& name, - unique_ptr* result) override { + std::unique_ptr* result) override { if (new_directory_failure_) { return Status::IOError("SimulatedFailure"); } return EnvWrapper::NewDirectory(name, result); } + void ClearFileOpenCounters() { + MutexLock l(&mutex_); + num_rand_readers_ = 0; + num_direct_rand_readers_ = 0; + num_seq_readers_ = 0; + num_direct_seq_readers_ = 0; + num_writers_ = 0; + num_direct_writers_ = 0; + } + + int num_rand_readers() { return num_rand_readers_; } + int num_direct_rand_readers() { return num_direct_rand_readers_; } + int num_seq_readers() { return num_seq_readers_; } + int num_direct_seq_readers() { return num_direct_seq_readers_; } + int num_writers() { return num_writers_; } + int num_direct_writers() { return num_direct_writers_; } + private: port::Mutex mutex_; bool dummy_sequential_file_ = false; @@ -328,6 +374,15 @@ class TestEnv : public EnvWrapper { bool get_children_failure_ = false; bool create_dir_if_missing_failure_ = false; bool new_directory_failure_ = false; + + // Keeps track of how many files of each type were successfully opened, and + // out of those, how many were opened with direct I/O. + std::atomic num_rand_readers_; + std::atomic num_direct_rand_readers_; + std::atomic num_seq_readers_; + std::atomic num_direct_seq_readers_; + std::atomic num_writers_; + std::atomic num_direct_writers_; }; // TestEnv class FileManager : public EnvWrapper { @@ -427,7 +482,7 @@ class FileManager : public EnvWrapper { } Status WriteToFile(const std::string& fname, const std::string& data) { - unique_ptr file; + std::unique_ptr file; EnvOptions env_options; env_options.use_mmap_writes = false; Status s = EnvWrapper::NewWritableFile(fname, &file, env_options); @@ -620,22 +675,22 @@ class BackupableDBTest : public testing::Test { std::shared_ptr logger_; // envs - unique_ptr db_chroot_env_; - unique_ptr backup_chroot_env_; - unique_ptr test_db_env_; - unique_ptr test_backup_env_; - unique_ptr file_manager_; + std::unique_ptr db_chroot_env_; + std::unique_ptr backup_chroot_env_; + std::unique_ptr test_db_env_; + std::unique_ptr test_backup_env_; + std::unique_ptr file_manager_; // all the dbs! DummyDB* dummy_db_; // BackupableDB owns dummy_db_ - unique_ptr db_; - unique_ptr backup_engine_; + std::unique_ptr db_; + std::unique_ptr backup_engine_; // options Options options_; protected: - unique_ptr backupable_options_; + std::unique_ptr backupable_options_; }; // BackupableDBTest void AppendPath(const std::string& path, std::vector& v) { @@ -1633,6 +1688,59 @@ TEST_F(BackupableDBTest, WriteOnlyEngineNoSharedFileDeletion) { AssertBackupConsistency(i + 1, 0, (i + 1) * kNumKeys); } } + +TEST_P(BackupableDBTestWithParam, BackupUsingDirectIO) { + // Tests direct I/O on the backup engine's reads and writes on the DB env and + // backup env + // We use ChrootEnv underneath so the below line checks for direct I/O support + // in the chroot directory, not the true filesystem root. + if (!test::IsDirectIOSupported(test_db_env_.get(), "/")) { + return; + } + const int kNumKeysPerBackup = 100; + const int kNumBackups = 3; + options_.use_direct_reads = true; + OpenDBAndBackupEngine(true /* destroy_old_data */); + for (int i = 0; i < kNumBackups; ++i) { + FillDB(db_.get(), i * kNumKeysPerBackup /* from */, + (i + 1) * kNumKeysPerBackup /* to */); + ASSERT_OK(db_->Flush(FlushOptions())); + + // Clear the file open counters and then do a bunch of backup engine ops. + // For all ops, files should be opened in direct mode. + test_backup_env_->ClearFileOpenCounters(); + test_db_env_->ClearFileOpenCounters(); + CloseBackupEngine(); + OpenBackupEngine(); + ASSERT_OK(backup_engine_->CreateNewBackup(db_.get(), + false /* flush_before_backup */)); + ASSERT_OK(backup_engine_->VerifyBackup(i + 1)); + CloseBackupEngine(); + OpenBackupEngine(); + std::vector backup_infos; + backup_engine_->GetBackupInfo(&backup_infos); + ASSERT_EQ(static_cast(i + 1), backup_infos.size()); + + // Verify backup engine always opened files with direct I/O + ASSERT_EQ(0, test_db_env_->num_writers()); + ASSERT_EQ(0, test_db_env_->num_rand_readers()); + ASSERT_GT(test_db_env_->num_direct_seq_readers(), 0); + // Currently the DB doesn't support reading WALs or manifest with direct + // I/O, so subtract two. + ASSERT_EQ(test_db_env_->num_seq_readers() - 2, + test_db_env_->num_direct_seq_readers()); + ASSERT_EQ(0, test_db_env_->num_rand_readers()); + } + CloseDBAndBackupEngine(); + + for (int i = 0; i < kNumBackups; ++i) { + AssertBackupConsistency(i + 1 /* backup_id */, + i * kNumKeysPerBackup /* start_exist */, + (i + 1) * kNumKeysPerBackup /* end_exist */, + (i + 2) * kNumKeysPerBackup /* end */); + } +} + } // anon namespace } // namespace rocksdb diff --git a/3rdParty/rocksdb/v5.16.X/utilities/blob_db/blob_compaction_filter.cc b/3rdParty/rocksdb/v5.18.X/utilities/blob_db/blob_compaction_filter.cc similarity index 100% rename from 3rdParty/rocksdb/v5.16.X/utilities/blob_db/blob_compaction_filter.cc rename to 3rdParty/rocksdb/v5.18.X/utilities/blob_db/blob_compaction_filter.cc diff --git a/3rdParty/rocksdb/v5.16.X/utilities/blob_db/blob_compaction_filter.h b/3rdParty/rocksdb/v5.18.X/utilities/blob_db/blob_compaction_filter.h similarity index 100% rename from 3rdParty/rocksdb/v5.16.X/utilities/blob_db/blob_compaction_filter.h rename to 3rdParty/rocksdb/v5.18.X/utilities/blob_db/blob_compaction_filter.h diff --git a/3rdParty/rocksdb/v5.16.X/utilities/blob_db/blob_db.cc b/3rdParty/rocksdb/v5.18.X/utilities/blob_db/blob_db.cc similarity index 100% rename from 3rdParty/rocksdb/v5.16.X/utilities/blob_db/blob_db.cc rename to 3rdParty/rocksdb/v5.18.X/utilities/blob_db/blob_db.cc diff --git a/3rdParty/rocksdb/v5.16.X/utilities/blob_db/blob_db.h b/3rdParty/rocksdb/v5.18.X/utilities/blob_db/blob_db.h similarity index 100% rename from 3rdParty/rocksdb/v5.16.X/utilities/blob_db/blob_db.h rename to 3rdParty/rocksdb/v5.18.X/utilities/blob_db/blob_db.h diff --git a/3rdParty/rocksdb/v5.16.X/utilities/blob_db/blob_db_impl.cc b/3rdParty/rocksdb/v5.18.X/utilities/blob_db/blob_db_impl.cc similarity index 87% rename from 3rdParty/rocksdb/v5.16.X/utilities/blob_db/blob_db_impl.cc rename to 3rdParty/rocksdb/v5.18.X/utilities/blob_db/blob_db_impl.cc index 06ef0ed3c4..bdec654628 100644 --- a/3rdParty/rocksdb/v5.16.X/utilities/blob_db/blob_db_impl.cc +++ b/3rdParty/rocksdb/v5.18.X/utilities/blob_db/blob_db_impl.cc @@ -304,13 +304,17 @@ void BlobDBImpl::CloseRandomAccessLocked( open_file_count_--; } -std::shared_ptr BlobDBImpl::GetOrOpenRandomAccessReader( - const std::shared_ptr& bfile, Env* env, - const EnvOptions& env_options) { +Status BlobDBImpl::GetBlobFileReader( + const std::shared_ptr& blob_file, + std::shared_ptr* reader) { + assert(reader != nullptr); bool fresh_open = false; - auto rar = bfile->GetOrOpenRandomAccessReader(env, env_options, &fresh_open); - if (fresh_open) open_file_count_++; - return rar; + Status s = blob_file->GetReader(env_, env_options_, reader, &fresh_open); + if (s.ok() && fresh_open) { + assert(*reader != nullptr); + open_file_count_++; + } + return s; } std::shared_ptr BlobDBImpl::NewBlobFile(const std::string& reason) { @@ -338,7 +342,7 @@ Status BlobDBImpl::CreateWriterLocked(const std::shared_ptr& bfile) { } std::unique_ptr fwriter; - fwriter.reset(new WritableFileWriter(std::move(wfile), env_options_)); + fwriter.reset(new WritableFileWriter(std::move(wfile), fpath, env_options_)); uint64_t boffset = bfile->GetFileSize(); if (debug_level_ >= 2 && boffset) { @@ -400,82 +404,91 @@ std::shared_ptr BlobDBImpl::FindBlobFileLocked( return (b1 || b2) ? nullptr : (*finditr); } -std::shared_ptr BlobDBImpl::CheckOrCreateWriterLocked( - const std::shared_ptr& bfile) { - std::shared_ptr writer = bfile->GetWriter(); - if (writer) return writer; - - Status s = CreateWriterLocked(bfile); - if (!s.ok()) return nullptr; - - writer = bfile->GetWriter(); - return writer; +Status BlobDBImpl::CheckOrCreateWriterLocked( + const std::shared_ptr& blob_file, + std::shared_ptr* writer) { + assert(writer != nullptr); + *writer = blob_file->GetWriter(); + if (*writer != nullptr) { + return Status::OK(); + } + Status s = CreateWriterLocked(blob_file); + if (s.ok()) { + *writer = blob_file->GetWriter(); + } + return s; } -std::shared_ptr BlobDBImpl::SelectBlobFile() { +Status BlobDBImpl::SelectBlobFile(std::shared_ptr* blob_file) { + assert(blob_file != nullptr); { ReadLock rl(&mutex_); if (open_non_ttl_file_ != nullptr) { - return open_non_ttl_file_; + *blob_file = open_non_ttl_file_; + return Status::OK(); } } // CHECK again WriteLock wl(&mutex_); if (open_non_ttl_file_ != nullptr) { - return open_non_ttl_file_; + *blob_file = open_non_ttl_file_; + return Status::OK(); } - std::shared_ptr bfile = NewBlobFile("SelectBlobFile"); - assert(bfile); + *blob_file = NewBlobFile("SelectBlobFile"); + assert(*blob_file != nullptr); // file not visible, hence no lock - std::shared_ptr writer = CheckOrCreateWriterLocked(bfile); - if (!writer) { + std::shared_ptr writer; + Status s = CheckOrCreateWriterLocked(*blob_file, &writer); + if (!s.ok()) { ROCKS_LOG_ERROR(db_options_.info_log, - "Failed to get writer from blob file: %s", - bfile->PathName().c_str()); - return nullptr; + "Failed to get writer from blob file: %s, error: %s", + (*blob_file)->PathName().c_str(), s.ToString().c_str()); + return s; } - bfile->file_size_ = BlobLogHeader::kSize; - bfile->header_.compression = bdb_options_.compression; - bfile->header_.has_ttl = false; - bfile->header_.column_family_id = + (*blob_file)->file_size_ = BlobLogHeader::kSize; + (*blob_file)->header_.compression = bdb_options_.compression; + (*blob_file)->header_.has_ttl = false; + (*blob_file)->header_.column_family_id = reinterpret_cast(DefaultColumnFamily())->GetID(); - bfile->header_valid_ = true; - bfile->SetColumnFamilyId(bfile->header_.column_family_id); - bfile->SetHasTTL(false); - bfile->SetCompression(bdb_options_.compression); + (*blob_file)->header_valid_ = true; + (*blob_file)->SetColumnFamilyId((*blob_file)->header_.column_family_id); + (*blob_file)->SetHasTTL(false); + (*blob_file)->SetCompression(bdb_options_.compression); - Status s = writer->WriteHeader(bfile->header_); + s = writer->WriteHeader((*blob_file)->header_); if (!s.ok()) { ROCKS_LOG_ERROR(db_options_.info_log, "Failed to write header to new blob file: %s" " status: '%s'", - bfile->PathName().c_str(), s.ToString().c_str()); - return nullptr; + (*blob_file)->PathName().c_str(), s.ToString().c_str()); + return s; } - blob_files_.insert(std::make_pair(bfile->BlobFileNumber(), bfile)); - open_non_ttl_file_ = bfile; + blob_files_.insert( + std::make_pair((*blob_file)->BlobFileNumber(), *blob_file)); + open_non_ttl_file_ = *blob_file; total_blob_size_ += BlobLogHeader::kSize; - return bfile; + return s; } -std::shared_ptr BlobDBImpl::SelectBlobFileTTL(uint64_t expiration) { +Status BlobDBImpl::SelectBlobFileTTL(uint64_t expiration, + std::shared_ptr* blob_file) { + assert(blob_file != nullptr); assert(expiration != kNoExpiration); uint64_t epoch_read = 0; - std::shared_ptr bfile; { ReadLock rl(&mutex_); - bfile = FindBlobFileLocked(expiration); + *blob_file = FindBlobFileLocked(expiration); epoch_read = epoch_of_.load(); } - if (bfile) { - assert(!bfile->Immutable()); - return bfile; + if (*blob_file != nullptr) { + assert(!(*blob_file)->Immutable()); + return Status::OK(); } uint64_t exp_low = @@ -483,61 +496,66 @@ std::shared_ptr BlobDBImpl::SelectBlobFileTTL(uint64_t expiration) { uint64_t exp_high = exp_low + bdb_options_.ttl_range_secs; ExpirationRange expiration_range = std::make_pair(exp_low, exp_high); - bfile = NewBlobFile("SelectBlobFileTTL"); - assert(bfile); + *blob_file = NewBlobFile("SelectBlobFileTTL"); + assert(*blob_file != nullptr); ROCKS_LOG_INFO(db_options_.info_log, "New blob file TTL range: %s %d %d", - bfile->PathName().c_str(), exp_low, exp_high); + (*blob_file)->PathName().c_str(), exp_low, exp_high); LogFlush(db_options_.info_log); // we don't need to take lock as no other thread is seeing bfile yet - std::shared_ptr writer = CheckOrCreateWriterLocked(bfile); - if (!writer) { - ROCKS_LOG_ERROR(db_options_.info_log, - "Failed to get writer from blob file with TTL: %s", - bfile->PathName().c_str()); - return nullptr; + std::shared_ptr writer; + Status s = CheckOrCreateWriterLocked(*blob_file, &writer); + if (!s.ok()) { + ROCKS_LOG_ERROR( + db_options_.info_log, + "Failed to get writer from blob file with TTL: %s, error: %s", + (*blob_file)->PathName().c_str(), s.ToString().c_str()); + return s; } - bfile->header_.expiration_range = expiration_range; - bfile->header_.compression = bdb_options_.compression; - bfile->header_.has_ttl = true; - bfile->header_.column_family_id = + (*blob_file)->header_.expiration_range = expiration_range; + (*blob_file)->header_.compression = bdb_options_.compression; + (*blob_file)->header_.has_ttl = true; + (*blob_file)->header_.column_family_id = reinterpret_cast(DefaultColumnFamily())->GetID(); - ; - bfile->header_valid_ = true; - bfile->SetColumnFamilyId(bfile->header_.column_family_id); - bfile->SetHasTTL(true); - bfile->SetCompression(bdb_options_.compression); - bfile->file_size_ = BlobLogHeader::kSize; + (*blob_file)->header_valid_ = true; + (*blob_file)->SetColumnFamilyId((*blob_file)->header_.column_family_id); + (*blob_file)->SetHasTTL(true); + (*blob_file)->SetCompression(bdb_options_.compression); + (*blob_file)->file_size_ = BlobLogHeader::kSize; // set the first value of the range, since that is // concrete at this time. also necessary to add to open_ttl_files_ - bfile->expiration_range_ = expiration_range; + (*blob_file)->expiration_range_ = expiration_range; WriteLock wl(&mutex_); // in case the epoch has shifted in the interim, then check // check condition again - should be rare. if (epoch_of_.load() != epoch_read) { - auto bfile2 = FindBlobFileLocked(expiration); - if (bfile2) return bfile2; + std::shared_ptr blob_file2 = FindBlobFileLocked(expiration); + if (blob_file2 != nullptr) { + *blob_file = std::move(blob_file2); + return Status::OK(); + } } - Status s = writer->WriteHeader(bfile->header_); + s = writer->WriteHeader((*blob_file)->header_); if (!s.ok()) { ROCKS_LOG_ERROR(db_options_.info_log, "Failed to write header to new blob file: %s" " status: '%s'", - bfile->PathName().c_str(), s.ToString().c_str()); - return nullptr; + (*blob_file)->PathName().c_str(), s.ToString().c_str()); + return s; } - blob_files_.insert(std::make_pair(bfile->BlobFileNumber(), bfile)); - open_ttl_files_.insert(bfile); + blob_files_.insert( + std::make_pair((*blob_file)->BlobFileNumber(), *blob_file)); + open_ttl_files_.insert(*blob_file); total_blob_size_ += BlobLogHeader::kSize; epoch_of_++; - return bfile; + return s; } class BlobDBImpl::BlobInserter : public WriteBatch::Handler { @@ -562,8 +580,8 @@ class BlobDBImpl::BlobInserter : public WriteBatch::Handler { return Status::NotSupported( "Blob DB doesn't support non-default column family."); } - Status s = blob_db_impl_->PutBlobValue(options_, key, value, - kNoExpiration, &batch_); + Status s = blob_db_impl_->PutBlobValue(options_, key, value, kNoExpiration, + &batch_); return s; } @@ -621,39 +639,6 @@ Status BlobDBImpl::Write(const WriteOptions& options, WriteBatch* updates) { return db_->Write(options, blob_inserter.batch()); } -Status BlobDBImpl::GetLiveFiles(std::vector& ret, - uint64_t* manifest_file_size, - bool flush_memtable) { - // Hold a lock in the beginning to avoid updates to base DB during the call - ReadLock rl(&mutex_); - Status s = db_->GetLiveFiles(ret, manifest_file_size, flush_memtable); - if (!s.ok()) { - return s; - } - ret.reserve(ret.size() + blob_files_.size()); - for (auto bfile_pair : blob_files_) { - auto blob_file = bfile_pair.second; - ret.emplace_back(blob_file->PathName()); - } - return Status::OK(); -} - -void BlobDBImpl::GetLiveFilesMetaData(std::vector* metadata) { - // Hold a lock in the beginning to avoid updates to base DB during the call - ReadLock rl(&mutex_); - db_->GetLiveFilesMetaData(metadata); - for (auto bfile_pair : blob_files_) { - auto blob_file = bfile_pair.second; - LiveFileMetaData filemetadata; - filemetadata.size = blob_file->GetFileSize(); - filemetadata.name = blob_file->PathName(); - auto cfh = - reinterpret_cast(DefaultColumnFamily()); - filemetadata.column_family_name = cfh->GetName(); - metadata->emplace_back(filemetadata); - } -} - Status BlobDBImpl::Put(const WriteOptions& options, const Slice& key, const Slice& value) { return PutUntil(options, key, value, kNoExpiration); @@ -724,36 +709,41 @@ Status BlobDBImpl::PutBlobValue(const WriteOptions& /*options*/, return s; } - std::shared_ptr bfile = (expiration != kNoExpiration) - ? SelectBlobFileTTL(expiration) - : SelectBlobFile(); - assert(bfile != nullptr); - assert(bfile->compression() == bdb_options_.compression); - - s = AppendBlob(bfile, headerbuf, key, value_compressed, expiration, - &index_entry); - if (expiration == kNoExpiration) { - RecordTick(statistics_, BLOB_DB_WRITE_BLOB); + std::shared_ptr blob_file; + if (expiration != kNoExpiration) { + s = SelectBlobFileTTL(expiration, &blob_file); } else { - RecordTick(statistics_, BLOB_DB_WRITE_BLOB_TTL); + s = SelectBlobFile(&blob_file); + } + if (s.ok()) { + assert(blob_file != nullptr); + assert(blob_file->compression() == bdb_options_.compression); + s = AppendBlob(blob_file, headerbuf, key, value_compressed, expiration, + &index_entry); } - if (s.ok()) { if (expiration != kNoExpiration) { - bfile->ExtendExpirationRange(expiration); + blob_file->ExtendExpirationRange(expiration); } - s = CloseBlobFileIfNeeded(bfile); - if (s.ok()) { - s = WriteBatchInternal::PutBlobIndex(batch, column_family_id, key, - index_entry); + s = CloseBlobFileIfNeeded(blob_file); + } + if (s.ok()) { + s = WriteBatchInternal::PutBlobIndex(batch, column_family_id, key, + index_entry); + } + if (s.ok()) { + if (expiration == kNoExpiration) { + RecordTick(statistics_, BLOB_DB_WRITE_BLOB); + } else { + RecordTick(statistics_, BLOB_DB_WRITE_BLOB_TTL); } } else { ROCKS_LOG_ERROR(db_options_.info_log, "Failed to append blob to FILE: %s: KEY: %s VALSZ: %d" " status: '%s' blob_file: '%s'", - bfile->PathName().c_str(), key.ToString().c_str(), + blob_file->PathName().c_str(), key.ToString().c_str(), value.size(), s.ToString().c_str(), - bfile->DumpState().c_str()); + blob_file->DumpState().c_str()); } } @@ -896,9 +886,10 @@ Status BlobDBImpl::AppendBlob(const std::shared_ptr& bfile, uint64_t key_offset = 0; { WriteLock lockbfile_w(&bfile->mutex_); - std::shared_ptr writer = CheckOrCreateWriterLocked(bfile); - if (!writer) { - return Status::IOError("Failed to create blob writer"); + std::shared_ptr writer; + s = CheckOrCreateWriterLocked(bfile, &writer); + if (!s.ok()) { + return s; } // write the blob to the blob log. @@ -1031,22 +1022,25 @@ Status BlobDBImpl::GetBlobValue(const Slice& key, const Slice& index_entry, } // takes locks when called - std::shared_ptr reader = - GetOrOpenRandomAccessReader(bfile, env_, env_options_); + std::shared_ptr reader; + s = GetBlobFileReader(bfile, &reader); + if (!s.ok()) { + return s; + } assert(blob_index.offset() > key.size() + sizeof(uint32_t)); uint64_t record_offset = blob_index.offset() - key.size() - sizeof(uint32_t); uint64_t record_size = sizeof(uint32_t) + key.size() + blob_index.size(); // Allocate the buffer. This is safe in C++11 - std::string buffer_str(record_size, static_cast(0)); + std::string buffer_str(static_cast(record_size), static_cast(0)); char* buffer = &buffer_str[0]; // A partial blob record contain checksum, key and value. Slice blob_record; { StopWatch read_sw(env_, statistics_, BLOB_DB_BLOB_FILE_READ_MICROS); - s = reader->Read(record_offset, record_size, &blob_record, buffer); + s = reader->Read(record_offset, static_cast(record_size), &blob_record, buffer); RecordTick(statistics_, BLOB_DB_BLOB_FILE_BYTES_READ, blob_record.size()); } if (!s.ok()) { @@ -1072,7 +1066,7 @@ Status BlobDBImpl::GetBlobValue(const Slice& key, const Slice& index_entry, } Slice crc_slice(blob_record.data(), sizeof(uint32_t)); Slice blob_value(blob_record.data() + sizeof(uint32_t) + key.size(), - blob_index.size()); + static_cast(blob_index.size())); uint32_t crc_exp; if (!GetFixed32(&crc_slice, &crc_exp)) { ROCKS_LOG_DEBUG(db_options_.info_log, @@ -1144,9 +1138,10 @@ Status BlobDBImpl::GetImpl(const ReadOptions& read_options, ReadOptions ro(read_options); bool snapshot_created = SetSnapshotIfNeeded(&ro); + PinnableSlice index_entry; Status s; bool is_blob_index = false; - s = db_impl_->GetImpl(ro, column_family, key, value, + s = db_impl_->GetImpl(ro, column_family, key, &index_entry, nullptr /*value_found*/, nullptr /*read_callback*/, &is_blob_index); TEST_SYNC_POINT("BlobDBImpl::Get:AfterIndexEntryGet:1"); @@ -1154,27 +1149,30 @@ Status BlobDBImpl::GetImpl(const ReadOptions& read_options, if (expiration != nullptr) { *expiration = kNoExpiration; } - if (s.ok() && is_blob_index) { - std::string index_entry = value->ToString(); - value->Reset(); - s = GetBlobValue(key, index_entry, value, expiration); + RecordTick(statistics_, BLOB_DB_NUM_KEYS_READ); + if (s.ok()) { + if (is_blob_index) { + s = GetBlobValue(key, index_entry, value, expiration); + } else { + // The index entry is the value itself in this case. + value->PinSelf(index_entry); + } + RecordTick(statistics_, BLOB_DB_BYTES_READ, value->size()); } if (snapshot_created) { db_->ReleaseSnapshot(ro.snapshot); } - RecordTick(statistics_, BLOB_DB_NUM_KEYS_READ); - RecordTick(statistics_, BLOB_DB_BYTES_READ, value->size()); return s; } std::pair BlobDBImpl::SanityCheck(bool aborted) { - if (aborted) return std::make_pair(false, -1); + if (aborted) { + return std::make_pair(false, -1); + } ROCKS_LOG_INFO(db_options_.info_log, "Starting Sanity Check"); - ROCKS_LOG_INFO(db_options_.info_log, "Number of files %" PRIu64, blob_files_.size()); - ROCKS_LOG_INFO(db_options_.info_log, "Number of open files %" PRIu64, open_ttl_files_.size()); @@ -1182,14 +1180,33 @@ std::pair BlobDBImpl::SanityCheck(bool aborted) { assert(!bfile->Immutable()); } - uint64_t epoch_now = EpochNow(); + uint64_t now = EpochNow(); - for (auto bfile_pair : blob_files_) { - auto bfile = bfile_pair.second; - ROCKS_LOG_INFO( - db_options_.info_log, "Blob File %s %" PRIu64 " %" PRIu64 " %" PRIu64, - bfile->PathName().c_str(), bfile->GetFileSize(), bfile->BlobCount(), - (bfile->expiration_range_.second - epoch_now)); + for (auto blob_file_pair : blob_files_) { + auto blob_file = blob_file_pair.second; + char buf[1000]; + int pos = snprintf(buf, sizeof(buf), + "Blob file %" PRIu64 ", size %" PRIu64 + ", blob count %" PRIu64 ", immutable %d", + blob_file->BlobFileNumber(), blob_file->GetFileSize(), + blob_file->BlobCount(), blob_file->Immutable()); + if (blob_file->HasTTL()) { + auto expiration_range = blob_file->GetExpirationRange(); + pos += snprintf(buf + pos, sizeof(buf) - pos, + ", expiration range (%" PRIu64 ", %" PRIu64 ")", + expiration_range.first, expiration_range.second); + if (!blob_file->Obsolete()) { + pos += snprintf(buf + pos, sizeof(buf) - pos, + ", expire in %" PRIu64 " seconds", + expiration_range.second - now); + } + } + if (blob_file->Obsolete()) { + pos += snprintf(buf + pos, sizeof(buf) - pos, ", obsolete at %" PRIu64, + blob_file->GetObsoleteSequence()); + } + snprintf(buf + pos, sizeof(buf) - pos, "."); + ROCKS_LOG_INFO(db_options_.info_log, "%s", buf); } // reschedule @@ -1279,7 +1296,14 @@ bool BlobDBImpl::VisibleToActiveSnapshot( oldest_snapshot = snapshots.oldest()->GetSequenceNumber(); } } - return oldest_snapshot < obsolete_sequence; + bool visible = oldest_snapshot < obsolete_sequence; + if (visible) { + ROCKS_LOG_INFO(db_options_.info_log, + "Obsolete blob file %" PRIu64 " (obsolete at %" PRIu64 + ") visible to oldest snapshot %" PRIu64 ".", + bfile->BlobFileNumber(), obsolete_sequence, oldest_snapshot); + } + return visible; } std::pair BlobDBImpl::EvictExpiredFiles(bool aborted) { @@ -1455,8 +1479,7 @@ Status BlobDBImpl::GCFileAndUpdateLSM(const std::shared_ptr& bfptr, return s; } - auto* cfh = - db_impl_->GetColumnFamilyHandleUnlocked(bfptr->column_family_id()); + auto cfh = db_impl_->DefaultColumnFamily(); auto* cfd = reinterpret_cast(cfh)->cfd(); auto column_family_id = cfd->GetID(); bool has_ttl = header.has_ttl; @@ -1571,7 +1594,13 @@ Status BlobDBImpl::GCFileAndUpdateLSM(const std::shared_ptr& bfptr, reason += bfptr->PathName(); newfile = NewBlobFile(reason); - new_writer = CheckOrCreateWriterLocked(newfile); + s = CheckOrCreateWriterLocked(newfile, &new_writer); + if (!s.ok()) { + ROCKS_LOG_ERROR(db_options_.info_log, + "Failed to open file %s for writer, error: %s", + newfile->PathName().c_str(), s.ToString().c_str()); + break; + } // Can't use header beyond this point newfile->header_ = std::move(header); newfile->header_valid_ = true; @@ -1680,16 +1709,21 @@ Status BlobDBImpl::GCFileAndUpdateLSM(const std::shared_ptr& bfptr, } std::pair BlobDBImpl::DeleteObsoleteFiles(bool aborted) { - if (aborted) return std::make_pair(false, -1); + if (aborted) { + return std::make_pair(false, -1); + } - { - ReadLock rl(&mutex_); - if (obsolete_files_.empty()) return std::make_pair(true, -1); + MutexLock delete_file_lock(&delete_file_mutex_); + if (disable_file_deletions_ > 0) { + return std::make_pair(true, -1); } std::list> tobsolete; { WriteLock wl(&mutex_); + if (obsolete_files_.empty()) { + return std::make_pair(true, -1); + } tobsolete.swap(obsolete_files_); } diff --git a/3rdParty/rocksdb/v5.16.X/utilities/blob_db/blob_db_impl.h b/3rdParty/rocksdb/v5.18.X/utilities/blob_db/blob_db_impl.h similarity index 88% rename from 3rdParty/rocksdb/v5.16.X/utilities/blob_db/blob_db_impl.h rename to 3rdParty/rocksdb/v5.18.X/utilities/blob_db/blob_db_impl.h index 0579a9ab42..8d5148def6 100644 --- a/3rdParty/rocksdb/v5.16.X/utilities/blob_db/blob_db_impl.h +++ b/3rdParty/rocksdb/v5.18.X/utilities/blob_db/blob_db_impl.h @@ -155,12 +155,6 @@ class BlobDBImpl : public BlobDB { virtual Status Close() override; - virtual Status GetLiveFiles(std::vector&, - uint64_t* manifest_file_size, - bool flush_memtable = true) override; - virtual void GetLiveFilesMetaData( - std::vector* ) override; - using BlobDB::PutWithTTL; Status PutWithTTL(const WriteOptions& options, const Slice& key, const Slice& value, uint64_t ttl) override; @@ -175,6 +169,15 @@ class BlobDBImpl : public BlobDB { const DBOptions& db_options, const ColumnFamilyOptions& cf_options); + virtual Status DisableFileDeletions() override; + + virtual Status EnableFileDeletions(bool force) override; + + virtual Status GetLiveFiles(std::vector&, + uint64_t* manifest_file_size, + bool flush_memtable = true) override; + virtual void GetLiveFilesMetaData(std::vector*) override; + ~BlobDBImpl(); Status Open(std::vector* handles); @@ -252,10 +255,11 @@ class BlobDBImpl : public BlobDB { // find an existing blob log file based on the expiration unix epoch // if such a file does not exist, return nullptr - std::shared_ptr SelectBlobFileTTL(uint64_t expiration); + Status SelectBlobFileTTL(uint64_t expiration, + std::shared_ptr* blob_file); // find an existing blob log file to append the value to - std::shared_ptr SelectBlobFile(); + Status SelectBlobFile(std::shared_ptr* blob_file); std::shared_ptr FindBlobFileLocked(uint64_t expiration) const; @@ -293,11 +297,8 @@ class BlobDBImpl : public BlobDB { // Open all blob files found in blob_dir. Status OpenAllBlobFiles(); - // hold write mutex on file and call - // creates a Random Access reader for GET call - std::shared_ptr GetOrOpenRandomAccessReader( - const std::shared_ptr& bfile, Env* env, - const EnvOptions& env_options); + Status GetBlobFileReader(const std::shared_ptr& blob_file, + std::shared_ptr* reader); // hold write mutex on file and call. // Close the above Random Access reader @@ -309,8 +310,8 @@ class BlobDBImpl : public BlobDB { // returns a Writer object for the file. If writer is not // already present, creates one. Needs Write Mutex to be held - std::shared_ptr CheckOrCreateWriterLocked( - const std::shared_ptr& bfile); + Status CheckOrCreateWriterLocked(const std::shared_ptr& blob_file, + std::shared_ptr* writer); // Iterate through keys and values on Blob and write into // separate file the remaining blobs and delete/update pointers @@ -347,7 +348,8 @@ class BlobDBImpl : public BlobDB { ColumnFamilyOptions cf_options_; EnvOptions env_options_; - // Raw pointer of statistic. db_options_ has a shared_ptr to hold ownership. + // Raw pointer of statistic. db_options_ has a std::shared_ptr to hold + // ownership. Statistics* statistics_; // by default this is "blob_dir" under dbname_ @@ -408,6 +410,26 @@ class BlobDBImpl : public BlobDB { std::list> obsolete_files_; + // DeleteObsoleteFiles, DiableFileDeletions and EnableFileDeletions block + // on the mutex to avoid contention. + // + // While DeleteObsoleteFiles hold both mutex_ and delete_file_mutex_, note + // the difference. mutex_ only needs to be held when access the + // data-structure, and delete_file_mutex_ needs to be held the whole time + // during DeleteObsoleteFiles to avoid being run simultaneously with + // DisableFileDeletions. + // + // If both of mutex_ and delete_file_mutex_ needs to be held, it is adviced + // to hold delete_file_mutex_ first to avoid deadlock. + mutable port::Mutex delete_file_mutex_; + + // Each call of DisableFileDeletions will increase disable_file_deletion_ + // by 1. EnableFileDeletions will either decrease the count by 1 or reset + // it to zeor, depending on the force flag. + // + // REQUIRES: access with delete_file_mutex_ held. + int disable_file_deletions_ = 0; + uint32_t debug_level_; }; diff --git a/3rdParty/rocksdb/v5.18.X/utilities/blob_db/blob_db_impl_filesnapshot.cc b/3rdParty/rocksdb/v5.18.X/utilities/blob_db/blob_db_impl_filesnapshot.cc new file mode 100644 index 0000000000..8effe88c0a --- /dev/null +++ b/3rdParty/rocksdb/v5.18.X/utilities/blob_db/blob_db_impl_filesnapshot.cc @@ -0,0 +1,108 @@ +// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. +// This source code is licensed under both the GPLv2 (found in the +// COPYING file in the root directory) and Apache 2.0 License +// (found in the LICENSE.Apache file in the root directory). + +#ifndef ROCKSDB_LITE + +#include "utilities/blob_db/blob_db_impl.h" + +#include "util/filename.h" +#include "util/logging.h" +#include "util/mutexlock.h" + +// BlobDBImpl methods to get snapshot of files, e.g. for replication. + +namespace rocksdb { +namespace blob_db { + +Status BlobDBImpl::DisableFileDeletions() { + // Disable base DB file deletions. + Status s = db_impl_->DisableFileDeletions(); + if (!s.ok()) { + return s; + } + + int count = 0; + { + // Hold delete_file_mutex_ to make sure no DeleteObsoleteFiles job + // is running. + MutexLock l(&delete_file_mutex_); + count = ++disable_file_deletions_; + } + + ROCKS_LOG_INFO(db_options_.info_log, + "Disalbed blob file deletions. count: %d", count); + return Status::OK(); +} + +Status BlobDBImpl::EnableFileDeletions(bool force) { + // Enable base DB file deletions. + Status s = db_impl_->EnableFileDeletions(force); + if (!s.ok()) { + return s; + } + + int count = 0; + { + MutexLock l(&delete_file_mutex_); + if (force) { + disable_file_deletions_ = 0; + } else if (disable_file_deletions_ > 0) { + count = --disable_file_deletions_; + } + assert(count >= 0); + } + + ROCKS_LOG_INFO(db_options_.info_log, "Enabled blob file deletions. count: %d", + count); + // Consider trigger DeleteobsoleteFiles once after re-enabled, if we are to + // make DeleteobsoleteFiles re-run interval configuration. + return Status::OK(); +} + +Status BlobDBImpl::GetLiveFiles(std::vector& ret, + uint64_t* manifest_file_size, + bool flush_memtable) { + if (!bdb_options_.path_relative) { + return Status::NotSupported( + "Not able to get relative blob file path from absolute blob_dir."); + } + // Hold a lock in the beginning to avoid updates to base DB during the call + ReadLock rl(&mutex_); + Status s = db_->GetLiveFiles(ret, manifest_file_size, flush_memtable); + if (!s.ok()) { + return s; + } + ret.reserve(ret.size() + blob_files_.size()); + for (auto bfile_pair : blob_files_) { + auto blob_file = bfile_pair.second; + // Path should be relative to db_name, but begin with slash. + ret.emplace_back( + BlobFileName("", bdb_options_.blob_dir, blob_file->BlobFileNumber())); + } + return Status::OK(); +} + +void BlobDBImpl::GetLiveFilesMetaData(std::vector* metadata) { + // Path should be relative to db_name. + assert(bdb_options_.path_relative); + // Hold a lock in the beginning to avoid updates to base DB during the call + ReadLock rl(&mutex_); + db_->GetLiveFilesMetaData(metadata); + for (auto bfile_pair : blob_files_) { + auto blob_file = bfile_pair.second; + LiveFileMetaData filemetadata; + filemetadata.size = static_cast(blob_file->GetFileSize()); + // Path should be relative to db_name, but begin with slash. + filemetadata.name = + BlobFileName("", bdb_options_.blob_dir, blob_file->BlobFileNumber()); + auto cfh = reinterpret_cast(DefaultColumnFamily()); + filemetadata.column_family_name = cfh->GetName(); + metadata->emplace_back(filemetadata); + } +} + +} // namespace blob_db +} // namespace rocksdb +#endif // !ROCKSDB_LITE diff --git a/3rdParty/rocksdb/v5.16.X/utilities/blob_db/blob_db_iterator.h b/3rdParty/rocksdb/v5.18.X/utilities/blob_db/blob_db_iterator.h similarity index 100% rename from 3rdParty/rocksdb/v5.16.X/utilities/blob_db/blob_db_iterator.h rename to 3rdParty/rocksdb/v5.18.X/utilities/blob_db/blob_db_iterator.h diff --git a/3rdParty/rocksdb/v5.16.X/utilities/blob_db/blob_db_listener.h b/3rdParty/rocksdb/v5.18.X/utilities/blob_db/blob_db_listener.h similarity index 100% rename from 3rdParty/rocksdb/v5.16.X/utilities/blob_db/blob_db_listener.h rename to 3rdParty/rocksdb/v5.18.X/utilities/blob_db/blob_db_listener.h diff --git a/3rdParty/rocksdb/v5.16.X/utilities/blob_db/blob_db_test.cc b/3rdParty/rocksdb/v5.18.X/utilities/blob_db/blob_db_test.cc similarity index 93% rename from 3rdParty/rocksdb/v5.16.X/utilities/blob_db/blob_db_test.cc rename to 3rdParty/rocksdb/v5.18.X/utilities/blob_db/blob_db_test.cc index 99a2cb1acc..1c1867e4e2 100644 --- a/3rdParty/rocksdb/v5.16.X/utilities/blob_db/blob_db_test.cc +++ b/3rdParty/rocksdb/v5.18.X/utilities/blob_db/blob_db_test.cc @@ -16,6 +16,7 @@ #include "port/port.h" #include "rocksdb/utilities/debug.h" #include "util/cast_util.h" +#include "util/fault_injection_test_env.h" #include "util/random.h" #include "util/string_util.h" #include "util/sync_point.h" @@ -40,6 +41,7 @@ class BlobDBTest : public testing::Test { BlobDBTest() : dbname_(test::PerThreadDBPath("blob_db_test")), mock_env_(new MockTimeEnv(Env::Default())), + fault_injection_env_(new FaultInjectionTestEnv(Env::Default())), blob_db_(nullptr) { Status s = DestroyBlobDB(dbname_, Options(), BlobDBOptions()); assert(s.ok()); @@ -236,6 +238,7 @@ class BlobDBTest : public testing::Test { const std::string dbname_; std::unique_ptr mock_env_; + std::unique_ptr fault_injection_env_; BlobDB *blob_db_; }; // class BlobDBTest @@ -354,6 +357,36 @@ TEST_F(BlobDBTest, GetExpiration) { ASSERT_EQ(300 /* = 100 + 200 */, expiration); } +TEST_F(BlobDBTest, GetIOError) { + Options options; + options.env = fault_injection_env_.get(); + BlobDBOptions bdb_options; + bdb_options.min_blob_size = 0; // Make sure value write to blob file + bdb_options.disable_background_tasks = true; + Open(bdb_options, options); + ColumnFamilyHandle *column_family = blob_db_->DefaultColumnFamily(); + PinnableSlice value; + ASSERT_OK(Put("foo", "bar")); + fault_injection_env_->SetFilesystemActive(false, Status::IOError()); + Status s = blob_db_->Get(ReadOptions(), column_family, "foo", &value); + ASSERT_TRUE(s.IsIOError()); + // Reactivate file system to allow test to close DB. + fault_injection_env_->SetFilesystemActive(true); +} + +TEST_F(BlobDBTest, PutIOError) { + Options options; + options.env = fault_injection_env_.get(); + BlobDBOptions bdb_options; + bdb_options.min_blob_size = 0; // Make sure value write to blob file + bdb_options.disable_background_tasks = true; + Open(bdb_options, options); + fault_injection_env_->SetFilesystemActive(false, Status::IOError()); + ASSERT_TRUE(Put("foo", "v1").IsIOError()); + fault_injection_env_->SetFilesystemActive(true, Status::IOError()); + ASSERT_OK(Put("bar", "v1")); +} + TEST_F(BlobDBTest, WriteBatch) { Random rnd(301); BlobDBOptions bdb_options; @@ -461,7 +494,6 @@ TEST_F(BlobDBTest, DecompressAfterReopen) { Reopen(bdb_options); VerifyDB(data); } - #endif TEST_F(BlobDBTest, MultipleWriters) { @@ -834,6 +866,8 @@ TEST_F(BlobDBTest, ColumnFamilyNotSupported) { TEST_F(BlobDBTest, GetLiveFilesMetaData) { Random rnd(301); BlobDBOptions bdb_options; + bdb_options.blob_dir = "blob_dir"; + bdb_options.path_relative = true; bdb_options.min_blob_size = 0; bdb_options.disable_background_tasks = true; Open(bdb_options); @@ -841,16 +875,16 @@ TEST_F(BlobDBTest, GetLiveFilesMetaData) { for (size_t i = 0; i < 100; i++) { PutRandom("key" + ToString(i), &rnd, &data); } - auto *bdb_impl = static_cast(blob_db_); std::vector metadata; - bdb_impl->GetLiveFilesMetaData(&metadata); + blob_db_->GetLiveFilesMetaData(&metadata); ASSERT_EQ(1U, metadata.size()); - std::string filename = dbname_ + "/blob_dir/000001.blob"; + // Path should be relative to db_name, but begin with slash. + std::string filename = "/blob_dir/000001.blob"; ASSERT_EQ(filename, metadata[0].name); ASSERT_EQ("default", metadata[0].column_family_name); std::vector livefile; uint64_t mfs; - bdb_impl->GetLiveFiles(livefile, &mfs, false); + ASSERT_OK(blob_db_->GetLiveFiles(livefile, &mfs, false)); ASSERT_EQ(4U, livefile.size()); ASSERT_EQ(filename, livefile[3]); VerifyDB(data); @@ -1413,6 +1447,52 @@ TEST_F(BlobDBTest, EvictExpiredFile) { blob_db_impl()->TEST_DeleteObsoleteFiles(); ASSERT_EQ(0, blob_db_impl()->TEST_GetBlobFiles().size()); ASSERT_EQ(0, blob_db_impl()->TEST_GetObsoleteFiles().size()); + // Make sure we don't return garbage value after blob file being evicted, + // but the blob index still exists in the LSM tree. + std::string val = ""; + ASSERT_TRUE(blob_db_->Get(ReadOptions(), "foo", &val).IsNotFound()); + ASSERT_EQ("", val); +} + +TEST_F(BlobDBTest, DisableFileDeletions) { + BlobDBOptions bdb_options; + bdb_options.disable_background_tasks = true; + Open(bdb_options); + std::map data; + for (bool force : {true, false}) { + ASSERT_OK(Put("foo", "v", &data)); + auto blob_files = blob_db_impl()->TEST_GetBlobFiles(); + ASSERT_EQ(1, blob_files.size()); + auto blob_file = blob_files[0]; + ASSERT_OK(blob_db_impl()->TEST_CloseBlobFile(blob_file)); + blob_db_impl()->TEST_ObsoleteBlobFile(blob_file); + ASSERT_EQ(1, blob_db_impl()->TEST_GetBlobFiles().size()); + ASSERT_EQ(1, blob_db_impl()->TEST_GetObsoleteFiles().size()); + // Call DisableFileDeletions twice. + ASSERT_OK(blob_db_->DisableFileDeletions()); + ASSERT_OK(blob_db_->DisableFileDeletions()); + // File deletions should be disabled. + blob_db_impl()->TEST_DeleteObsoleteFiles(); + ASSERT_EQ(1, blob_db_impl()->TEST_GetBlobFiles().size()); + ASSERT_EQ(1, blob_db_impl()->TEST_GetObsoleteFiles().size()); + VerifyDB(data); + // Enable file deletions once. If force=true, file deletion is enabled. + // Otherwise it needs to enable it for a second time. + ASSERT_OK(blob_db_->EnableFileDeletions(force)); + blob_db_impl()->TEST_DeleteObsoleteFiles(); + if (!force) { + ASSERT_EQ(1, blob_db_impl()->TEST_GetBlobFiles().size()); + ASSERT_EQ(1, blob_db_impl()->TEST_GetObsoleteFiles().size()); + VerifyDB(data); + // Call EnableFileDeletions a second time. + ASSERT_OK(blob_db_->EnableFileDeletions(false)); + blob_db_impl()->TEST_DeleteObsoleteFiles(); + } + // Regardless of value of `force`, file should be deleted by now. + ASSERT_EQ(0, blob_db_impl()->TEST_GetBlobFiles().size()); + ASSERT_EQ(0, blob_db_impl()->TEST_GetObsoleteFiles().size()); + VerifyDB({}); + } } } // namespace blob_db diff --git a/3rdParty/rocksdb/v5.16.X/utilities/blob_db/blob_dump_tool.cc b/3rdParty/rocksdb/v5.18.X/utilities/blob_db/blob_dump_tool.cc similarity index 95% rename from 3rdParty/rocksdb/v5.16.X/utilities/blob_db/blob_dump_tool.cc rename to 3rdParty/rocksdb/v5.18.X/utilities/blob_db/blob_dump_tool.cc index e75ec18598..7ce0697e3b 100644 --- a/3rdParty/rocksdb/v5.16.X/utilities/blob_db/blob_dump_tool.cc +++ b/3rdParty/rocksdb/v5.18.X/utilities/blob_db/blob_dump_tool.cc @@ -199,7 +199,7 @@ Status BlobDumpTool::DumpRecord(DisplayType show_key, DisplayType show_blob, fprintf(stdout, " expiration : %" PRIu64 "\n", record.expiration); } *offset += BlobLogRecord::kHeaderSize; - s = Read(*offset, key_size + value_size, &slice); + s = Read(*offset, static_cast(key_size + value_size), &slice); if (!s.ok()) { return s; } @@ -210,8 +210,8 @@ Status BlobDumpTool::DumpRecord(DisplayType show_key, DisplayType show_blob, BlockContents contents; UncompressionContext uncompression_ctx(compression); s = UncompressBlockContentsForCompressionType( - uncompression_ctx, slice.data() + key_size, value_size, &contents, - 2 /*compress_format_version*/, ImmutableCFOptions(Options())); + uncompression_ctx, slice.data() + key_size, static_cast(value_size), + &contents, 2 /*compress_format_version*/, ImmutableCFOptions(Options())); if (!s.ok()) { return s; } @@ -219,10 +219,10 @@ Status BlobDumpTool::DumpRecord(DisplayType show_key, DisplayType show_blob, } if (show_key != DisplayType::kNone) { fprintf(stdout, " key : "); - DumpSlice(Slice(slice.data(), key_size), show_key); + DumpSlice(Slice(slice.data(), static_cast(key_size)), show_key); if (show_blob != DisplayType::kNone) { fprintf(stdout, " blob : "); - DumpSlice(Slice(slice.data() + key_size, value_size), show_blob); + DumpSlice(Slice(slice.data() + static_cast(key_size), static_cast(value_size)), show_blob); } if (show_uncompressed_blob != DisplayType::kNone) { fprintf(stdout, " raw blob : "); diff --git a/3rdParty/rocksdb/v5.16.X/utilities/blob_db/blob_dump_tool.h b/3rdParty/rocksdb/v5.18.X/utilities/blob_db/blob_dump_tool.h similarity index 97% rename from 3rdParty/rocksdb/v5.16.X/utilities/blob_db/blob_dump_tool.h rename to 3rdParty/rocksdb/v5.18.X/utilities/blob_db/blob_dump_tool.h index e91feffa79..ff4672fd3f 100644 --- a/3rdParty/rocksdb/v5.16.X/utilities/blob_db/blob_dump_tool.h +++ b/3rdParty/rocksdb/v5.18.X/utilities/blob_db/blob_dump_tool.h @@ -33,7 +33,7 @@ class BlobDumpTool { private: std::unique_ptr reader_; - std::unique_ptr buffer_; + std::unique_ptr buffer_; size_t buffer_size_; Status Read(uint64_t offset, size_t size, Slice* result); diff --git a/3rdParty/rocksdb/v5.16.X/utilities/blob_db/blob_file.cc b/3rdParty/rocksdb/v5.18.X/utilities/blob_db/blob_file.cc similarity index 94% rename from 3rdParty/rocksdb/v5.16.X/utilities/blob_db/blob_file.cc rename to 3rdParty/rocksdb/v5.18.X/utilities/blob_db/blob_file.cc index c34ad9098b..6e70bdcb0a 100644 --- a/3rdParty/rocksdb/v5.16.X/utilities/blob_db/blob_file.cc +++ b/3rdParty/rocksdb/v5.18.X/utilities/blob_db/blob_file.cc @@ -191,36 +191,48 @@ void BlobFile::CloseRandomAccessLocked() { last_access_ = -1; } -std::shared_ptr BlobFile::GetOrOpenRandomAccessReader( - Env* env, const EnvOptions& env_options, bool* fresh_open) { +Status BlobFile::GetReader(Env* env, const EnvOptions& env_options, + std::shared_ptr* reader, + bool* fresh_open) { + assert(reader != nullptr); + assert(fresh_open != nullptr); *fresh_open = false; int64_t current_time = 0; env->GetCurrentTime(¤t_time); last_access_.store(current_time); + Status s; { ReadLock lockbfile_r(&mutex_); - if (ra_file_reader_) return ra_file_reader_; + if (ra_file_reader_) { + *reader = ra_file_reader_; + return s; + } } WriteLock lockbfile_w(&mutex_); - if (ra_file_reader_) return ra_file_reader_; + // Double check. + if (ra_file_reader_) { + *reader = ra_file_reader_; + return s; + } std::unique_ptr rfile; - Status s = env->NewRandomAccessFile(PathName(), &rfile, env_options); + s = env->NewRandomAccessFile(PathName(), &rfile, env_options); if (!s.ok()) { ROCKS_LOG_ERROR(info_log_, "Failed to open blob file for random-read: %s status: '%s'" " exists: '%s'", PathName().c_str(), s.ToString().c_str(), env->FileExists(PathName()).ToString().c_str()); - return nullptr; + return s; } ra_file_reader_ = std::make_shared(std::move(rfile), PathName()); + *reader = ra_file_reader_; *fresh_open = true; - return ra_file_reader_; + return s; } Status BlobFile::ReadMetadata(Env* env, const EnvOptions& env_options) { diff --git a/3rdParty/rocksdb/v5.16.X/utilities/blob_db/blob_file.h b/3rdParty/rocksdb/v5.18.X/utilities/blob_db/blob_file.h similarity index 97% rename from 3rdParty/rocksdb/v5.16.X/utilities/blob_db/blob_file.h rename to 3rdParty/rocksdb/v5.18.X/utilities/blob_db/blob_file.h index 288523e773..668a037228 100644 --- a/3rdParty/rocksdb/v5.16.X/utilities/blob_db/blob_file.h +++ b/3rdParty/rocksdb/v5.18.X/utilities/blob_db/blob_file.h @@ -181,6 +181,10 @@ class BlobFile { // footer_valid_ to false and return Status::OK. Status ReadMetadata(Env* env, const EnvOptions& env_options); + Status GetReader(Env* env, const EnvOptions& env_options, + std::shared_ptr* reader, + bool* fresh_open); + private: std::shared_ptr OpenRandomAccessReader( Env* env, const DBOptions& db_options, @@ -190,9 +194,6 @@ class BlobFile { Status WriteFooterAndCloseLocked(); - std::shared_ptr GetOrOpenRandomAccessReader( - Env* env, const EnvOptions& env_options, bool* fresh_open); - void CloseRandomAccessLocked(); // this is used, when you are reading only the footer of a diff --git a/3rdParty/rocksdb/v5.16.X/utilities/blob_db/blob_index.h b/3rdParty/rocksdb/v5.18.X/utilities/blob_db/blob_index.h similarity index 100% rename from 3rdParty/rocksdb/v5.16.X/utilities/blob_db/blob_index.h rename to 3rdParty/rocksdb/v5.18.X/utilities/blob_db/blob_index.h diff --git a/3rdParty/rocksdb/v5.16.X/utilities/blob_db/blob_log_format.cc b/3rdParty/rocksdb/v5.18.X/utilities/blob_db/blob_log_format.cc similarity index 100% rename from 3rdParty/rocksdb/v5.16.X/utilities/blob_db/blob_log_format.cc rename to 3rdParty/rocksdb/v5.18.X/utilities/blob_db/blob_log_format.cc diff --git a/3rdParty/rocksdb/v5.16.X/utilities/blob_db/blob_log_format.h b/3rdParty/rocksdb/v5.18.X/utilities/blob_db/blob_log_format.h similarity index 97% rename from 3rdParty/rocksdb/v5.16.X/utilities/blob_db/blob_log_format.h rename to 3rdParty/rocksdb/v5.18.X/utilities/blob_db/blob_log_format.h index 3e1b686aa1..fcc042f06d 100644 --- a/3rdParty/rocksdb/v5.16.X/utilities/blob_db/blob_log_format.h +++ b/3rdParty/rocksdb/v5.18.X/utilities/blob_db/blob_log_format.h @@ -10,7 +10,9 @@ #ifndef ROCKSDB_LITE #include +#include #include + #include "rocksdb/options.h" #include "rocksdb/slice.h" #include "rocksdb/status.h" @@ -106,8 +108,8 @@ struct BlobLogRecord { uint32_t blob_crc = 0; Slice key; Slice value; - std::string key_buf; - std::string value_buf; + std::unique_ptr key_buf; + std::unique_ptr value_buf; uint64_t record_size() const { return kHeaderSize + key_size + value_size; } diff --git a/3rdParty/rocksdb/v5.16.X/utilities/blob_db/blob_log_reader.cc b/3rdParty/rocksdb/v5.18.X/utilities/blob_db/blob_log_reader.cc similarity index 75% rename from 3rdParty/rocksdb/v5.16.X/utilities/blob_db/blob_log_reader.cc rename to 3rdParty/rocksdb/v5.18.X/utilities/blob_db/blob_log_reader.cc index c2ba214571..0f098f2d45 100644 --- a/3rdParty/rocksdb/v5.16.X/utilities/blob_db/blob_log_reader.cc +++ b/3rdParty/rocksdb/v5.18.X/utilities/blob_db/blob_log_reader.cc @@ -24,10 +24,9 @@ Reader::Reader(unique_ptr&& file_reader, Env* env, buffer_(), next_byte_(0) {} -Status Reader::ReadSlice(uint64_t size, Slice* slice, std::string* buf) { +Status Reader::ReadSlice(uint64_t size, Slice* slice, char* buf) { StopWatch read_sw(env_, statistics_, BLOB_DB_BLOB_FILE_READ_MICROS); - buf->reserve(size); - Status s = file_->Read(next_byte_, size, slice, &(*buf)[0]); + Status s = file_->Read(next_byte_, static_cast(size), slice, buf); next_byte_ += size; if (!s.ok()) { return s; @@ -42,7 +41,7 @@ Status Reader::ReadSlice(uint64_t size, Slice* slice, std::string* buf) { Status Reader::ReadHeader(BlobLogHeader* header) { assert(file_.get() != nullptr); assert(next_byte_ == 0); - Status s = ReadSlice(BlobLogHeader::kSize, &buffer_, &backing_store_); + Status s = ReadSlice(BlobLogHeader::kSize, &buffer_, header_buf_); if (!s.ok()) { return s; } @@ -56,7 +55,7 @@ Status Reader::ReadHeader(BlobLogHeader* header) { Status Reader::ReadRecord(BlobLogRecord* record, ReadLevel level, uint64_t* blob_offset) { - Status s = ReadSlice(BlobLogRecord::kHeaderSize, &buffer_, &backing_store_); + Status s = ReadSlice(BlobLogRecord::kHeaderSize, &buffer_, header_buf_); if (!s.ok()) { return s; } @@ -80,14 +79,18 @@ Status Reader::ReadRecord(BlobLogRecord* record, ReadLevel level, break; case kReadHeaderKey: - s = ReadSlice(record->key_size, &record->key, &record->key_buf); + record->key_buf.reset(new char[record->key_size]); + s = ReadSlice(record->key_size, &record->key, record->key_buf.get()); next_byte_ += record->value_size; break; case kReadHeaderKeyBlob: - s = ReadSlice(record->key_size, &record->key, &record->key_buf); + record->key_buf.reset(new char[record->key_size]); + s = ReadSlice(record->key_size, &record->key, record->key_buf.get()); if (s.ok()) { - s = ReadSlice(record->value_size, &record->value, &record->value_buf); + record->value_buf.reset(new char[record->value_size]); + s = ReadSlice(record->value_size, &record->value, + record->value_buf.get()); } if (s.ok()) { s = record->CheckBlobCRC(); diff --git a/3rdParty/rocksdb/v5.16.X/utilities/blob_db/blob_log_reader.h b/3rdParty/rocksdb/v5.18.X/utilities/blob_db/blob_log_reader.h similarity index 95% rename from 3rdParty/rocksdb/v5.16.X/utilities/blob_db/blob_log_reader.h rename to 3rdParty/rocksdb/v5.18.X/utilities/blob_db/blob_log_reader.h index 4b780decd5..45e2e95514 100644 --- a/3rdParty/rocksdb/v5.16.X/utilities/blob_db/blob_log_reader.h +++ b/3rdParty/rocksdb/v5.18.X/utilities/blob_db/blob_log_reader.h @@ -60,19 +60,19 @@ class Reader { Status ReadRecord(BlobLogRecord* record, ReadLevel level = kReadHeader, uint64_t* blob_offset = nullptr); - Status ReadSlice(uint64_t size, Slice* slice, std::string* buf); - void ResetNextByte() { next_byte_ = 0; } uint64_t GetNextByte() const { return next_byte_; } private: + Status ReadSlice(uint64_t size, Slice* slice, char* buf); + const std::unique_ptr file_; Env* env_; Statistics* statistics_; - std::string backing_store_; Slice buffer_; + char header_buf_[BlobLogRecord::kHeaderSize]; // which byte to read next. For asserting proper usage uint64_t next_byte_; diff --git a/3rdParty/rocksdb/v5.16.X/utilities/blob_db/blob_log_writer.cc b/3rdParty/rocksdb/v5.18.X/utilities/blob_db/blob_log_writer.cc similarity index 100% rename from 3rdParty/rocksdb/v5.16.X/utilities/blob_db/blob_log_writer.cc rename to 3rdParty/rocksdb/v5.18.X/utilities/blob_db/blob_log_writer.cc diff --git a/3rdParty/rocksdb/v5.16.X/utilities/blob_db/blob_log_writer.h b/3rdParty/rocksdb/v5.18.X/utilities/blob_db/blob_log_writer.h similarity index 100% rename from 3rdParty/rocksdb/v5.16.X/utilities/blob_db/blob_log_writer.h rename to 3rdParty/rocksdb/v5.18.X/utilities/blob_db/blob_log_writer.h diff --git a/3rdParty/rocksdb/v5.16.X/utilities/cassandra/cassandra_compaction_filter.cc b/3rdParty/rocksdb/v5.18.X/utilities/cassandra/cassandra_compaction_filter.cc similarity index 100% rename from 3rdParty/rocksdb/v5.16.X/utilities/cassandra/cassandra_compaction_filter.cc rename to 3rdParty/rocksdb/v5.18.X/utilities/cassandra/cassandra_compaction_filter.cc diff --git a/3rdParty/rocksdb/v5.16.X/utilities/cassandra/cassandra_compaction_filter.h b/3rdParty/rocksdb/v5.18.X/utilities/cassandra/cassandra_compaction_filter.h similarity index 100% rename from 3rdParty/rocksdb/v5.16.X/utilities/cassandra/cassandra_compaction_filter.h rename to 3rdParty/rocksdb/v5.18.X/utilities/cassandra/cassandra_compaction_filter.h diff --git a/3rdParty/rocksdb/v5.16.X/utilities/cassandra/cassandra_format_test.cc b/3rdParty/rocksdb/v5.18.X/utilities/cassandra/cassandra_format_test.cc similarity index 100% rename from 3rdParty/rocksdb/v5.16.X/utilities/cassandra/cassandra_format_test.cc rename to 3rdParty/rocksdb/v5.18.X/utilities/cassandra/cassandra_format_test.cc diff --git a/3rdParty/rocksdb/v5.16.X/utilities/cassandra/cassandra_functional_test.cc b/3rdParty/rocksdb/v5.18.X/utilities/cassandra/cassandra_functional_test.cc similarity index 99% rename from 3rdParty/rocksdb/v5.16.X/utilities/cassandra/cassandra_functional_test.cc rename to 3rdParty/rocksdb/v5.18.X/utilities/cassandra/cassandra_functional_test.cc index 3e612b3ad6..653e6da72b 100644 --- a/3rdParty/rocksdb/v5.16.X/utilities/cassandra/cassandra_functional_test.cc +++ b/3rdParty/rocksdb/v5.18.X/utilities/cassandra/cassandra_functional_test.cc @@ -101,7 +101,7 @@ public: virtual std::unique_ptr CreateCompactionFilter( const CompactionFilter::Context& /*context*/) override { - return unique_ptr(new CassandraCompactionFilter( + return std::unique_ptr(new CassandraCompactionFilter( purge_ttl_on_expiration_, gc_grace_period_in_seconds_)); } diff --git a/3rdParty/rocksdb/v5.16.X/utilities/cassandra/cassandra_row_merge_test.cc b/3rdParty/rocksdb/v5.18.X/utilities/cassandra/cassandra_row_merge_test.cc similarity index 100% rename from 3rdParty/rocksdb/v5.16.X/utilities/cassandra/cassandra_row_merge_test.cc rename to 3rdParty/rocksdb/v5.18.X/utilities/cassandra/cassandra_row_merge_test.cc diff --git a/3rdParty/rocksdb/v5.16.X/utilities/cassandra/cassandra_serialize_test.cc b/3rdParty/rocksdb/v5.18.X/utilities/cassandra/cassandra_serialize_test.cc similarity index 100% rename from 3rdParty/rocksdb/v5.16.X/utilities/cassandra/cassandra_serialize_test.cc rename to 3rdParty/rocksdb/v5.18.X/utilities/cassandra/cassandra_serialize_test.cc diff --git a/3rdParty/rocksdb/v5.16.X/utilities/cassandra/format.cc b/3rdParty/rocksdb/v5.18.X/utilities/cassandra/format.cc similarity index 99% rename from 3rdParty/rocksdb/v5.16.X/utilities/cassandra/format.cc rename to 3rdParty/rocksdb/v5.18.X/utilities/cassandra/format.cc index 4a22658de1..42cd7206b6 100644 --- a/3rdParty/rocksdb/v5.16.X/utilities/cassandra/format.cc +++ b/3rdParty/rocksdb/v5.18.X/utilities/cassandra/format.cc @@ -266,7 +266,7 @@ RowValue RowValue::ConvertExpiredColumnsToTombstones(bool* changed) const { std::static_pointer_cast(column); if(expiring_column->Expired()) { - shared_ptr tombstone = expiring_column->ToTombstone(); + std::shared_ptr tombstone = expiring_column->ToTombstone(); new_columns.push_back(tombstone); *changed = true; continue; diff --git a/3rdParty/rocksdb/v5.16.X/utilities/cassandra/format.h b/3rdParty/rocksdb/v5.18.X/utilities/cassandra/format.h similarity index 100% rename from 3rdParty/rocksdb/v5.16.X/utilities/cassandra/format.h rename to 3rdParty/rocksdb/v5.18.X/utilities/cassandra/format.h diff --git a/3rdParty/rocksdb/v5.16.X/utilities/cassandra/merge_operator.cc b/3rdParty/rocksdb/v5.18.X/utilities/cassandra/merge_operator.cc similarity index 100% rename from 3rdParty/rocksdb/v5.16.X/utilities/cassandra/merge_operator.cc rename to 3rdParty/rocksdb/v5.18.X/utilities/cassandra/merge_operator.cc diff --git a/3rdParty/rocksdb/v5.16.X/utilities/cassandra/merge_operator.h b/3rdParty/rocksdb/v5.18.X/utilities/cassandra/merge_operator.h similarity index 100% rename from 3rdParty/rocksdb/v5.16.X/utilities/cassandra/merge_operator.h rename to 3rdParty/rocksdb/v5.18.X/utilities/cassandra/merge_operator.h diff --git a/3rdParty/rocksdb/v5.16.X/utilities/cassandra/serialize.h b/3rdParty/rocksdb/v5.18.X/utilities/cassandra/serialize.h similarity index 100% rename from 3rdParty/rocksdb/v5.16.X/utilities/cassandra/serialize.h rename to 3rdParty/rocksdb/v5.18.X/utilities/cassandra/serialize.h diff --git a/3rdParty/rocksdb/v5.16.X/utilities/cassandra/test_utils.cc b/3rdParty/rocksdb/v5.18.X/utilities/cassandra/test_utils.cc similarity index 100% rename from 3rdParty/rocksdb/v5.16.X/utilities/cassandra/test_utils.cc rename to 3rdParty/rocksdb/v5.18.X/utilities/cassandra/test_utils.cc diff --git a/3rdParty/rocksdb/v5.16.X/utilities/cassandra/test_utils.h b/3rdParty/rocksdb/v5.18.X/utilities/cassandra/test_utils.h similarity index 100% rename from 3rdParty/rocksdb/v5.16.X/utilities/cassandra/test_utils.h rename to 3rdParty/rocksdb/v5.18.X/utilities/cassandra/test_utils.h diff --git a/3rdParty/rocksdb/v5.16.X/utilities/checkpoint/checkpoint_impl.cc b/3rdParty/rocksdb/v5.18.X/utilities/checkpoint/checkpoint_impl.cc similarity index 99% rename from 3rdParty/rocksdb/v5.16.X/utilities/checkpoint/checkpoint_impl.cc rename to 3rdParty/rocksdb/v5.18.X/utilities/checkpoint/checkpoint_impl.cc index fc8efe8836..9863ac1d56 100644 --- a/3rdParty/rocksdb/v5.16.X/utilities/checkpoint/checkpoint_impl.cc +++ b/3rdParty/rocksdb/v5.18.X/utilities/checkpoint/checkpoint_impl.cc @@ -120,7 +120,8 @@ Status CheckpointImpl::CreateCheckpoint(const std::string& checkpoint_dir, } /* copy_file_cb */, [&](const std::string& fname, const std::string& contents, FileType) { ROCKS_LOG_INFO(db_options.info_log, "Creating %s", fname.c_str()); - return CreateFile(db_->GetEnv(), full_private_path + fname, contents); + return CreateFile(db_->GetEnv(), full_private_path + fname, contents, + db_options.use_fsync); } /* create_file_cb */, &sequence_number, log_size_for_flush); // we copied all the files, enable file deletions @@ -132,7 +133,7 @@ Status CheckpointImpl::CreateCheckpoint(const std::string& checkpoint_dir, s = db_->GetEnv()->RenameFile(full_private_path, checkpoint_dir); } if (s.ok()) { - unique_ptr checkpoint_directory; + std::unique_ptr checkpoint_directory; db_->GetEnv()->NewDirectory(checkpoint_dir, &checkpoint_directory); if (checkpoint_directory != nullptr) { s = checkpoint_directory->Fsync(); diff --git a/3rdParty/rocksdb/v5.16.X/utilities/checkpoint/checkpoint_impl.h b/3rdParty/rocksdb/v5.18.X/utilities/checkpoint/checkpoint_impl.h similarity index 100% rename from 3rdParty/rocksdb/v5.16.X/utilities/checkpoint/checkpoint_impl.h rename to 3rdParty/rocksdb/v5.18.X/utilities/checkpoint/checkpoint_impl.h diff --git a/3rdParty/rocksdb/v5.16.X/utilities/checkpoint/checkpoint_test.cc b/3rdParty/rocksdb/v5.18.X/utilities/checkpoint/checkpoint_test.cc similarity index 95% rename from 3rdParty/rocksdb/v5.16.X/utilities/checkpoint/checkpoint_test.cc rename to 3rdParty/rocksdb/v5.18.X/utilities/checkpoint/checkpoint_test.cc index b47f240c33..62c78faa8b 100644 --- a/3rdParty/rocksdb/v5.16.X/utilities/checkpoint/checkpoint_test.cc +++ b/3rdParty/rocksdb/v5.18.X/utilities/checkpoint/checkpoint_test.cc @@ -17,12 +17,13 @@ #include #include #include "db/db_impl.h" -#include "port/stack_trace.h" #include "port/port.h" +#include "port/stack_trace.h" #include "rocksdb/db.h" #include "rocksdb/env.h" #include "rocksdb/utilities/checkpoint.h" #include "rocksdb/utilities/transaction_db.h" +#include "util/fault_injection_test_env.h" #include "util/sync_point.h" #include "util/testharness.h" @@ -585,6 +586,32 @@ TEST_F(CheckpointTest, CheckpointWithParallelWrites) { thread.join(); } +TEST_F(CheckpointTest, CheckpointWithUnsyncedDataDropped) { + Options options = CurrentOptions(); + std::unique_ptr env(new FaultInjectionTestEnv(env_)); + options.env = env.get(); + Reopen(options); + ASSERT_OK(Put("key1", "val1")); + Checkpoint* checkpoint; + ASSERT_OK(Checkpoint::Create(db_, &checkpoint)); + ASSERT_OK(checkpoint->CreateCheckpoint(snapshot_name_)); + delete checkpoint; + env->DropUnsyncedFileData(); + + // make sure it's openable even though whatever data that wasn't synced got + // dropped. + options.env = env_; + DB* snapshot_db; + ASSERT_OK(DB::Open(options, snapshot_name_, &snapshot_db)); + ReadOptions read_opts; + std::string get_result; + ASSERT_OK(snapshot_db->Get(read_opts, "key1", &get_result)); + ASSERT_EQ("val1", get_result); + delete snapshot_db; + delete db_; + db_ = nullptr; +} + } // namespace rocksdb int main(int argc, char** argv) { diff --git a/3rdParty/rocksdb/v5.16.X/utilities/col_buf_decoder.cc b/3rdParty/rocksdb/v5.18.X/utilities/col_buf_decoder.cc similarity index 98% rename from 3rdParty/rocksdb/v5.16.X/utilities/col_buf_decoder.cc rename to 3rdParty/rocksdb/v5.18.X/utilities/col_buf_decoder.cc index 3fb31794f7..8f9fa74abd 100644 --- a/3rdParty/rocksdb/v5.16.X/utilities/col_buf_decoder.cc +++ b/3rdParty/rocksdb/v5.18.X/utilities/col_buf_decoder.cc @@ -147,7 +147,7 @@ size_t FixedLengthColBufDecoder::Decode(const char* src, char** dest) { col_compression_type_ == kColDict) { uint64_t dict_val = read_val; assert(dict_val < dict_vec_.size()); - write_val = dict_vec_[dict_val]; + write_val = dict_vec_[static_cast(dict_val)]; } // dest->append(reinterpret_cast(&write_val), size_); @@ -222,7 +222,7 @@ size_t VariableChunkColBufDecoder::Decode(const char* src, char** dest) { uint64_t dict_val; ReadVarint64(&src, &dict_val); assert(dict_val < dict_vec_.size()); - chunk_buf = dict_vec_[dict_val]; + chunk_buf = dict_vec_[static_cast(dict_val)]; } else { memcpy(&chunk_buf, src, chunk_size); src += chunk_size; diff --git a/3rdParty/rocksdb/v5.16.X/utilities/col_buf_decoder.h b/3rdParty/rocksdb/v5.18.X/utilities/col_buf_decoder.h similarity index 100% rename from 3rdParty/rocksdb/v5.16.X/utilities/col_buf_decoder.h rename to 3rdParty/rocksdb/v5.18.X/utilities/col_buf_decoder.h diff --git a/3rdParty/rocksdb/v5.16.X/utilities/col_buf_encoder.cc b/3rdParty/rocksdb/v5.18.X/utilities/col_buf_encoder.cc similarity index 100% rename from 3rdParty/rocksdb/v5.16.X/utilities/col_buf_encoder.cc rename to 3rdParty/rocksdb/v5.18.X/utilities/col_buf_encoder.cc diff --git a/3rdParty/rocksdb/v5.16.X/utilities/col_buf_encoder.h b/3rdParty/rocksdb/v5.18.X/utilities/col_buf_encoder.h similarity index 100% rename from 3rdParty/rocksdb/v5.16.X/utilities/col_buf_encoder.h rename to 3rdParty/rocksdb/v5.18.X/utilities/col_buf_encoder.h diff --git a/3rdParty/rocksdb/v5.16.X/utilities/column_aware_encoding_exp.cc b/3rdParty/rocksdb/v5.18.X/utilities/column_aware_encoding_exp.cc similarity index 98% rename from 3rdParty/rocksdb/v5.16.X/utilities/column_aware_encoding_exp.cc rename to 3rdParty/rocksdb/v5.18.X/utilities/column_aware_encoding_exp.cc index 988a59b3c7..c251c985ec 100644 --- a/3rdParty/rocksdb/v5.16.X/utilities/column_aware_encoding_exp.cc +++ b/3rdParty/rocksdb/v5.18.X/utilities/column_aware_encoding_exp.cc @@ -88,7 +88,7 @@ class ColumnAwareEncodingExp { EnvOptions env_options; if (CompressionTypeSupported(compression_type)) { fprintf(stdout, "[%s]\n", FLAGS_compression_type.c_str()); - unique_ptr encoded_out_file; + std::unique_ptr encoded_out_file; std::unique_ptr env(NewMemEnv(Env::Default())); if (!FLAGS_encoded_file.empty()) { @@ -116,7 +116,7 @@ class ColumnAwareEncodingExp { uint64_t encode_time = sw.ElapsedNanosSafe(false /* reset */); fprintf(stdout, "Encode time: %" PRIu64 "\n", encode_time); if (decode) { - unique_ptr decoded_out_file; + std::unique_ptr decoded_out_file; if (!FLAGS_decoded_file.empty()) { env->NewWritableFile(FLAGS_decoded_file, &decoded_out_file, env_options); diff --git a/3rdParty/rocksdb/v5.16.X/utilities/column_aware_encoding_test.cc b/3rdParty/rocksdb/v5.18.X/utilities/column_aware_encoding_test.cc similarity index 100% rename from 3rdParty/rocksdb/v5.16.X/utilities/column_aware_encoding_test.cc rename to 3rdParty/rocksdb/v5.18.X/utilities/column_aware_encoding_test.cc diff --git a/3rdParty/rocksdb/v5.16.X/utilities/column_aware_encoding_util.cc b/3rdParty/rocksdb/v5.18.X/utilities/column_aware_encoding_util.cc similarity index 98% rename from 3rdParty/rocksdb/v5.16.X/utilities/column_aware_encoding_util.cc rename to 3rdParty/rocksdb/v5.18.X/utilities/column_aware_encoding_util.cc index 45dedca08f..222ee46803 100644 --- a/3rdParty/rocksdb/v5.16.X/utilities/column_aware_encoding_util.cc +++ b/3rdParty/rocksdb/v5.18.X/utilities/column_aware_encoding_util.cc @@ -100,7 +100,7 @@ void ColumnAwareEncodingReader::DecodeBlocks( size_t num_kv_pairs; const char* header_content_ptr = content_ptr; - num_kv_pairs = DecodeFixed64(header_content_ptr); + num_kv_pairs = static_cast(DecodeFixed64(header_content_ptr)); header_content_ptr += sizeof(size_t); size_t num_key_columns = key_col_bufs.size(); @@ -118,7 +118,7 @@ void ColumnAwareEncodingReader::DecodeBlocks( key_content_ptr[i] = col_content_ptr; key_content_ptr[i] += key_col_bufs[i]->Init(key_content_ptr[i]); size_t offset; - offset = DecodeFixed64(header_content_ptr); + offset = static_cast(DecodeFixed64(header_content_ptr)); header_content_ptr += sizeof(size_t); col_content_ptr += offset; } @@ -126,7 +126,7 @@ void ColumnAwareEncodingReader::DecodeBlocks( value_content_ptr[i] = col_content_ptr; value_content_ptr[i] += value_col_bufs[i]->Init(value_content_ptr[i]); size_t offset; - offset = DecodeFixed64(header_content_ptr); + offset = static_cast(DecodeFixed64(header_content_ptr)); header_content_ptr += sizeof(size_t); col_content_ptr += offset; } diff --git a/3rdParty/rocksdb/v5.16.X/utilities/column_aware_encoding_util.h b/3rdParty/rocksdb/v5.18.X/utilities/column_aware_encoding_util.h similarity index 100% rename from 3rdParty/rocksdb/v5.16.X/utilities/column_aware_encoding_util.h rename to 3rdParty/rocksdb/v5.18.X/utilities/column_aware_encoding_util.h diff --git a/3rdParty/rocksdb/v5.16.X/utilities/compaction_filters/remove_emptyvalue_compactionfilter.cc b/3rdParty/rocksdb/v5.18.X/utilities/compaction_filters/remove_emptyvalue_compactionfilter.cc similarity index 100% rename from 3rdParty/rocksdb/v5.16.X/utilities/compaction_filters/remove_emptyvalue_compactionfilter.cc rename to 3rdParty/rocksdb/v5.18.X/utilities/compaction_filters/remove_emptyvalue_compactionfilter.cc diff --git a/3rdParty/rocksdb/v5.16.X/utilities/compaction_filters/remove_emptyvalue_compactionfilter.h b/3rdParty/rocksdb/v5.18.X/utilities/compaction_filters/remove_emptyvalue_compactionfilter.h similarity index 100% rename from 3rdParty/rocksdb/v5.16.X/utilities/compaction_filters/remove_emptyvalue_compactionfilter.h rename to 3rdParty/rocksdb/v5.18.X/utilities/compaction_filters/remove_emptyvalue_compactionfilter.h diff --git a/3rdParty/rocksdb/v5.16.X/utilities/convenience/info_log_finder.cc b/3rdParty/rocksdb/v5.18.X/utilities/convenience/info_log_finder.cc similarity index 100% rename from 3rdParty/rocksdb/v5.16.X/utilities/convenience/info_log_finder.cc rename to 3rdParty/rocksdb/v5.18.X/utilities/convenience/info_log_finder.cc diff --git a/3rdParty/rocksdb/v5.16.X/utilities/date_tiered/date_tiered_db_impl.cc b/3rdParty/rocksdb/v5.18.X/utilities/date_tiered/date_tiered_db_impl.cc similarity index 99% rename from 3rdParty/rocksdb/v5.16.X/utilities/date_tiered/date_tiered_db_impl.cc rename to 3rdParty/rocksdb/v5.18.X/utilities/date_tiered/date_tiered_db_impl.cc index 978bfb2e49..2574d379f2 100644 --- a/3rdParty/rocksdb/v5.16.X/utilities/date_tiered/date_tiered_db_impl.cc +++ b/3rdParty/rocksdb/v5.18.X/utilities/date_tiered/date_tiered_db_impl.cc @@ -389,7 +389,7 @@ Iterator* DateTieredDBImpl::NewIterator(const ReadOptions& opts) { for (auto& item : handle_map_) { auto handle = item.second; builder.AddIterator(db_impl->NewInternalIterator( - arena, db_iter->GetRangeDelAggregator(), handle)); + arena, db_iter->GetRangeDelAggregator(), kMaxSequenceNumber, handle)); } auto internal_iter = builder.Finish(); db_iter->SetIterUnderDBIter(internal_iter); diff --git a/3rdParty/rocksdb/v5.16.X/utilities/date_tiered/date_tiered_db_impl.h b/3rdParty/rocksdb/v5.18.X/utilities/date_tiered/date_tiered_db_impl.h similarity index 100% rename from 3rdParty/rocksdb/v5.16.X/utilities/date_tiered/date_tiered_db_impl.h rename to 3rdParty/rocksdb/v5.18.X/utilities/date_tiered/date_tiered_db_impl.h diff --git a/3rdParty/rocksdb/v5.16.X/utilities/date_tiered/date_tiered_test.cc b/3rdParty/rocksdb/v5.18.X/utilities/date_tiered/date_tiered_test.cc similarity index 99% rename from 3rdParty/rocksdb/v5.16.X/utilities/date_tiered/date_tiered_test.cc rename to 3rdParty/rocksdb/v5.18.X/utilities/date_tiered/date_tiered_test.cc index 8e7fced58a..35f15584e5 100644 --- a/3rdParty/rocksdb/v5.16.X/utilities/date_tiered/date_tiered_test.cc +++ b/3rdParty/rocksdb/v5.18.X/utilities/date_tiered/date_tiered_test.cc @@ -13,6 +13,7 @@ #include "rocksdb/compaction_filter.h" #include "rocksdb/utilities/date_tiered_db.h" +#include "port/port.h" #include "util/logging.h" #include "util/string_util.h" #include "util/testharness.h" @@ -131,7 +132,7 @@ class DateTieredTest : public testing::Test { Options options_; KVMap::iterator kv_it_; const std::string kNewValue_ = "new_value"; - unique_ptr test_comp_filter_; + std::unique_ptr test_comp_filter_; }; // Puts a set of values and checks its presence using Get during ttl diff --git a/3rdParty/rocksdb/v5.16.X/utilities/debug.cc b/3rdParty/rocksdb/v5.18.X/utilities/debug.cc similarity index 88% rename from 3rdParty/rocksdb/v5.16.X/utilities/debug.cc rename to 3rdParty/rocksdb/v5.18.X/utilities/debug.cc index e0c5f5566e..3dfde980ec 100644 --- a/3rdParty/rocksdb/v5.16.X/utilities/debug.cc +++ b/3rdParty/rocksdb/v5.18.X/utilities/debug.cc @@ -19,9 +19,11 @@ Status GetAllKeyVersions(DB* db, Slice begin_key, Slice end_key, DBImpl* idb = static_cast(db->GetRootDB()); auto icmp = InternalKeyComparator(idb->GetOptions().comparator); - RangeDelAggregator range_del_agg(icmp, {} /* snapshots */); + ReadRangeDelAggregatorV2 range_del_agg(&icmp, + kMaxSequenceNumber /* upper_bound */); Arena arena; - ScopedArenaIterator iter(idb->NewInternalIterator(&arena, &range_del_agg)); + ScopedArenaIterator iter( + idb->NewInternalIterator(&arena, &range_del_agg, kMaxSequenceNumber)); if (!begin_key.empty()) { InternalKey ikey; diff --git a/3rdParty/rocksdb/v5.16.X/utilities/document/document_db.cc b/3rdParty/rocksdb/v5.18.X/utilities/document/document_db.cc similarity index 99% rename from 3rdParty/rocksdb/v5.16.X/utilities/document/document_db.cc rename to 3rdParty/rocksdb/v5.18.X/utilities/document/document_db.cc index 939327ed1b..279e4cb4da 100644 --- a/3rdParty/rocksdb/v5.16.X/utilities/document/document_db.cc +++ b/3rdParty/rocksdb/v5.18.X/utilities/document/document_db.cc @@ -1155,10 +1155,10 @@ Options GetRocksDBOptionsFromOptions(const DocumentDBOptions& options) { Options rocksdb_options; rocksdb_options.max_background_compactions = options.background_threads - 1; rocksdb_options.max_background_flushes = 1; - rocksdb_options.write_buffer_size = options.memtable_size; + rocksdb_options.write_buffer_size = static_cast(options.memtable_size); rocksdb_options.max_write_buffer_number = 6; BlockBasedTableOptions table_options; - table_options.block_cache = NewLRUCache(options.cache_size); + table_options.block_cache = NewLRUCache(static_cast(options.cache_size)); rocksdb_options.table_factory.reset(NewBlockBasedTableFactory(table_options)); return rocksdb_options; } diff --git a/3rdParty/rocksdb/v5.16.X/utilities/document/document_db_test.cc b/3rdParty/rocksdb/v5.18.X/utilities/document/document_db_test.cc similarity index 99% rename from 3rdParty/rocksdb/v5.16.X/utilities/document/document_db_test.cc rename to 3rdParty/rocksdb/v5.18.X/utilities/document/document_db_test.cc index 652f1e4558..3ee560db12 100644 --- a/3rdParty/rocksdb/v5.16.X/utilities/document/document_db_test.cc +++ b/3rdParty/rocksdb/v5.18.X/utilities/document/document_db_test.cc @@ -75,8 +75,10 @@ TEST_F(DocumentDBTest, SimpleQueryTest) { ASSERT_OK(DocumentDB::Open(options, dbname_, {}, &db_)); CreateIndexes({index}); delete db_; + db_ = nullptr; // now there is index present ASSERT_OK(DocumentDB::Open(options, dbname_, {index}, &db_)); + assert(db_ != nullptr); delete index.description; std::vector json_objects = { diff --git a/3rdParty/rocksdb/v5.16.X/utilities/document/json_document.cc b/3rdParty/rocksdb/v5.18.X/utilities/document/json_document.cc similarity index 100% rename from 3rdParty/rocksdb/v5.16.X/utilities/document/json_document.cc rename to 3rdParty/rocksdb/v5.18.X/utilities/document/json_document.cc diff --git a/3rdParty/rocksdb/v5.16.X/utilities/document/json_document_builder.cc b/3rdParty/rocksdb/v5.18.X/utilities/document/json_document_builder.cc similarity index 100% rename from 3rdParty/rocksdb/v5.16.X/utilities/document/json_document_builder.cc rename to 3rdParty/rocksdb/v5.18.X/utilities/document/json_document_builder.cc diff --git a/3rdParty/rocksdb/v5.16.X/utilities/document/json_document_test.cc b/3rdParty/rocksdb/v5.18.X/utilities/document/json_document_test.cc similarity index 96% rename from 3rdParty/rocksdb/v5.16.X/utilities/document/json_document_test.cc rename to 3rdParty/rocksdb/v5.18.X/utilities/document/json_document_test.cc index 977905b915..9d79c41cf5 100644 --- a/3rdParty/rocksdb/v5.16.X/utilities/document/json_document_test.cc +++ b/3rdParty/rocksdb/v5.18.X/utilities/document/json_document_test.cc @@ -249,21 +249,23 @@ TEST_F(JSONDocumentTest, OperatorEqualsTest) { ASSERT_TRUE(JSONDocument(static_cast(15)) == JSONDocument(static_cast(15))); - unique_ptr arrayWithInt8Doc(JSONDocument::ParseJSON("[8]")); + std::unique_ptr arrayWithInt8Doc( + JSONDocument::ParseJSON("[8]")); ASSERT_TRUE(arrayWithInt8Doc != nullptr); ASSERT_TRUE(arrayWithInt8Doc->IsArray()); ASSERT_TRUE((*arrayWithInt8Doc)[0].IsInt64()); ASSERT_TRUE((*arrayWithInt8Doc)[0] == JSONDocument(static_cast(8))); - unique_ptr arrayWithInt16Doc(JSONDocument::ParseJSON("[512]")); + std::unique_ptr arrayWithInt16Doc( + JSONDocument::ParseJSON("[512]")); ASSERT_TRUE(arrayWithInt16Doc != nullptr); ASSERT_TRUE(arrayWithInt16Doc->IsArray()); ASSERT_TRUE((*arrayWithInt16Doc)[0].IsInt64()); ASSERT_TRUE((*arrayWithInt16Doc)[0] == JSONDocument(static_cast(512))); - unique_ptr arrayWithInt32Doc( - JSONDocument::ParseJSON("[1000000]")); + std::unique_ptr arrayWithInt32Doc( + JSONDocument::ParseJSON("[1000000]")); ASSERT_TRUE(arrayWithInt32Doc != nullptr); ASSERT_TRUE(arrayWithInt32Doc->IsArray()); ASSERT_TRUE((*arrayWithInt32Doc)[0].IsInt64()); @@ -277,8 +279,8 @@ TEST_F(JSONDocumentTest, OperatorEqualsTest) { } TEST_F(JSONDocumentTest, JSONDocumentBuilderTest) { - unique_ptr parsedArray( - JSONDocument::ParseJSON("[1, [123, \"a\", \"b\"], {\"b\":\"c\"}]")); + std::unique_ptr parsedArray( + JSONDocument::ParseJSON("[1, [123, \"a\", \"b\"], {\"b\":\"c\"}]")); ASSERT_TRUE(parsedArray != nullptr); JSONDocumentBuilder builder; diff --git a/3rdParty/rocksdb/v5.16.X/utilities/env_librados.cc b/3rdParty/rocksdb/v5.18.X/utilities/env_librados.cc similarity index 100% rename from 3rdParty/rocksdb/v5.16.X/utilities/env_librados.cc rename to 3rdParty/rocksdb/v5.18.X/utilities/env_librados.cc diff --git a/3rdParty/rocksdb/v5.16.X/utilities/env_librados.md b/3rdParty/rocksdb/v5.18.X/utilities/env_librados.md similarity index 100% rename from 3rdParty/rocksdb/v5.16.X/utilities/env_librados.md rename to 3rdParty/rocksdb/v5.18.X/utilities/env_librados.md diff --git a/3rdParty/rocksdb/v5.16.X/utilities/env_librados_test.cc b/3rdParty/rocksdb/v5.18.X/utilities/env_librados_test.cc similarity index 97% rename from 3rdParty/rocksdb/v5.16.X/utilities/env_librados_test.cc rename to 3rdParty/rocksdb/v5.18.X/utilities/env_librados_test.cc index 7d9b252ea4..fb10224e7d 100644 --- a/3rdParty/rocksdb/v5.16.X/utilities/env_librados_test.cc +++ b/3rdParty/rocksdb/v5.18.X/utilities/env_librados_test.cc @@ -108,7 +108,7 @@ public: TEST_F(EnvLibradosTest, Basics) { uint64_t file_size; - unique_ptr writable_file; + std::unique_ptr writable_file; std::vector children; ASSERT_OK(env_->CreateDir("/dir")); @@ -150,8 +150,8 @@ TEST_F(EnvLibradosTest, Basics) { ASSERT_EQ(3U, file_size); // Check that opening non-existent file fails. - unique_ptr seq_file; - unique_ptr rand_file; + std::unique_ptr seq_file; + std::unique_ptr rand_file; ASSERT_TRUE( !env_->NewSequentialFile("/dir/non_existent", &seq_file, soptions_).ok()); ASSERT_TRUE(!seq_file); @@ -169,9 +169,9 @@ TEST_F(EnvLibradosTest, Basics) { } TEST_F(EnvLibradosTest, ReadWrite) { - unique_ptr writable_file; - unique_ptr seq_file; - unique_ptr rand_file; + std::unique_ptr writable_file; + std::unique_ptr seq_file; + std::unique_ptr rand_file; Slice result; char scratch[100]; @@ -210,7 +210,7 @@ TEST_F(EnvLibradosTest, ReadWrite) { TEST_F(EnvLibradosTest, Locks) { FileLock* lock = nullptr; - unique_ptr writable_file; + std::unique_ptr writable_file; ASSERT_OK(env_->CreateDir("/dir")); @@ -229,7 +229,7 @@ TEST_F(EnvLibradosTest, Misc) { ASSERT_OK(env_->GetTestDirectory(&test_dir)); ASSERT_TRUE(!test_dir.empty()); - unique_ptr writable_file; + std::unique_ptr writable_file; ASSERT_TRUE(!env_->NewWritableFile("/a/b", &writable_file, soptions_).ok()); ASSERT_OK(env_->NewWritableFile("/a", &writable_file, soptions_)); @@ -249,14 +249,14 @@ TEST_F(EnvLibradosTest, LargeWrite) { write_data.append(1, 'h'); } - unique_ptr writable_file; + std::unique_ptr writable_file; ASSERT_OK(env_->CreateDir("/dir")); ASSERT_OK(env_->NewWritableFile("/dir/g", &writable_file, soptions_)); ASSERT_OK(writable_file->Append("foo")); ASSERT_OK(writable_file->Append(write_data)); writable_file.reset(); - unique_ptr seq_file; + std::unique_ptr seq_file; Slice result; ASSERT_OK(env_->NewSequentialFile("/dir/g", &seq_file, soptions_)); ASSERT_OK(seq_file->Read(3, &result, scratch)); // Read "foo". @@ -282,7 +282,7 @@ TEST_F(EnvLibradosTest, FrequentlySmallWrite) { write_data.append(1, 'h'); } - unique_ptr writable_file; + std::unique_ptr writable_file; ASSERT_OK(env_->CreateDir("/dir")); ASSERT_OK(env_->NewWritableFile("/dir/g", &writable_file, soptions_)); ASSERT_OK(writable_file->Append("foo")); @@ -292,7 +292,7 @@ TEST_F(EnvLibradosTest, FrequentlySmallWrite) { } writable_file.reset(); - unique_ptr seq_file; + std::unique_ptr seq_file; Slice result; ASSERT_OK(env_->NewSequentialFile("/dir/g", &seq_file, soptions_)); ASSERT_OK(seq_file->Read(3, &result, scratch)); // Read "foo". @@ -317,7 +317,7 @@ TEST_F(EnvLibradosTest, Truncate) { write_data.append(1, 'h'); } - unique_ptr writable_file; + std::unique_ptr writable_file; ASSERT_OK(env_->CreateDir("/dir")); ASSERT_OK(env_->NewWritableFile("/dir/g", &writable_file, soptions_)); ASSERT_OK(writable_file->Append(write_data)); @@ -801,7 +801,7 @@ public: TEST_F(EnvLibradosMutipoolTest, Basics) { uint64_t file_size; - unique_ptr writable_file; + std::unique_ptr writable_file; std::vector children; std::vector v = {"/tmp/dir1", "/tmp/dir2", "/tmp/dir3", "/tmp/dir4", "dir"}; @@ -850,8 +850,8 @@ TEST_F(EnvLibradosMutipoolTest, Basics) { ASSERT_EQ(3U, file_size); // Check that opening non-existent file fails. - unique_ptr seq_file; - unique_ptr rand_file; + std::unique_ptr seq_file; + std::unique_ptr rand_file; ASSERT_TRUE( !env_->NewSequentialFile(dir_non_existent.c_str(), &seq_file, soptions_).ok()); ASSERT_TRUE(!seq_file); diff --git a/3rdParty/rocksdb/v5.16.X/utilities/env_mirror.cc b/3rdParty/rocksdb/v5.18.X/utilities/env_mirror.cc similarity index 93% rename from 3rdParty/rocksdb/v5.16.X/utilities/env_mirror.cc rename to 3rdParty/rocksdb/v5.18.X/utilities/env_mirror.cc index e4da918346..327d8e1622 100644 --- a/3rdParty/rocksdb/v5.16.X/utilities/env_mirror.cc +++ b/3rdParty/rocksdb/v5.18.X/utilities/env_mirror.cc @@ -16,7 +16,7 @@ namespace rocksdb { // Env's. This is useful for debugging purposes. class SequentialFileMirror : public SequentialFile { public: - unique_ptr a_, b_; + std::unique_ptr a_, b_; std::string fname; explicit SequentialFileMirror(std::string f) : fname(f) {} @@ -60,11 +60,12 @@ class SequentialFileMirror : public SequentialFile { class RandomAccessFileMirror : public RandomAccessFile { public: - unique_ptr a_, b_; + std::unique_ptr a_, b_; std::string fname; explicit RandomAccessFileMirror(std::string f) : fname(f) {} - Status Read(uint64_t offset, size_t n, Slice* result, char* scratch) const override { + Status Read(uint64_t offset, size_t n, Slice* result, + char* scratch) const override { Status as = a_->Read(offset, n, result, scratch); if (as == Status::OK()) { char* bscratch = new char[n]; @@ -94,7 +95,7 @@ class RandomAccessFileMirror : public RandomAccessFile { class WritableFileMirror : public WritableFile { public: - unique_ptr a_, b_; + std::unique_ptr a_, b_; std::string fname; explicit WritableFileMirror(std::string f) : fname(f) {} @@ -190,7 +191,7 @@ class WritableFileMirror : public WritableFile { }; Status EnvMirror::NewSequentialFile(const std::string& f, - unique_ptr* r, + std::unique_ptr* r, const EnvOptions& options) { if (f.find("/proc/") == 0) { return a_->NewSequentialFile(f, r, options); @@ -207,7 +208,7 @@ Status EnvMirror::NewSequentialFile(const std::string& f, } Status EnvMirror::NewRandomAccessFile(const std::string& f, - unique_ptr* r, + std::unique_ptr* r, const EnvOptions& options) { if (f.find("/proc/") == 0) { return a_->NewRandomAccessFile(f, r, options); @@ -224,7 +225,7 @@ Status EnvMirror::NewRandomAccessFile(const std::string& f, } Status EnvMirror::NewWritableFile(const std::string& f, - unique_ptr* r, + std::unique_ptr* r, const EnvOptions& options) { if (f.find("/proc/") == 0) return a_->NewWritableFile(f, r, options); WritableFileMirror* mf = new WritableFileMirror(f); @@ -240,7 +241,7 @@ Status EnvMirror::NewWritableFile(const std::string& f, Status EnvMirror::ReuseWritableFile(const std::string& fname, const std::string& old_fname, - unique_ptr* r, + std::unique_ptr* r, const EnvOptions& options) { if (fname.find("/proc/") == 0) return a_->ReuseWritableFile(fname, old_fname, r, options); diff --git a/3rdParty/rocksdb/v5.16.X/utilities/env_mirror_test.cc b/3rdParty/rocksdb/v5.18.X/utilities/env_mirror_test.cc similarity index 94% rename from 3rdParty/rocksdb/v5.16.X/utilities/env_mirror_test.cc rename to 3rdParty/rocksdb/v5.18.X/utilities/env_mirror_test.cc index 2bf8ec8583..812595ca1e 100644 --- a/3rdParty/rocksdb/v5.16.X/utilities/env_mirror_test.cc +++ b/3rdParty/rocksdb/v5.18.X/utilities/env_mirror_test.cc @@ -32,7 +32,7 @@ class EnvMirrorTest : public testing::Test { TEST_F(EnvMirrorTest, Basics) { uint64_t file_size; - unique_ptr writable_file; + std::unique_ptr writable_file; std::vector children; ASSERT_OK(env_->CreateDir("/dir")); @@ -91,8 +91,8 @@ TEST_F(EnvMirrorTest, Basics) { ASSERT_EQ(3U, file_size); // Check that opening non-existent file fails. - unique_ptr seq_file; - unique_ptr rand_file; + std::unique_ptr seq_file; + std::unique_ptr rand_file; ASSERT_TRUE( !env_->NewSequentialFile("/dir/non_existent", &seq_file, soptions_).ok()); ASSERT_TRUE(!seq_file); @@ -110,9 +110,9 @@ TEST_F(EnvMirrorTest, Basics) { } TEST_F(EnvMirrorTest, ReadWrite) { - unique_ptr writable_file; - unique_ptr seq_file; - unique_ptr rand_file; + std::unique_ptr writable_file; + std::unique_ptr seq_file; + std::unique_ptr rand_file; Slice result; char scratch[100]; @@ -162,7 +162,7 @@ TEST_F(EnvMirrorTest, Misc) { ASSERT_OK(env_->GetTestDirectory(&test_dir)); ASSERT_TRUE(!test_dir.empty()); - unique_ptr writable_file; + std::unique_ptr writable_file; ASSERT_OK(env_->NewWritableFile("/a/b", &writable_file, soptions_)); // These are no-ops, but we test they return success. @@ -181,13 +181,13 @@ TEST_F(EnvMirrorTest, LargeWrite) { write_data.append(1, static_cast(i)); } - unique_ptr writable_file; + std::unique_ptr writable_file; ASSERT_OK(env_->NewWritableFile("/dir/f", &writable_file, soptions_)); ASSERT_OK(writable_file->Append("foo")); ASSERT_OK(writable_file->Append(write_data)); writable_file.reset(); - unique_ptr seq_file; + std::unique_ptr seq_file; Slice result; ASSERT_OK(env_->NewSequentialFile("/dir/f", &seq_file, soptions_)); ASSERT_OK(seq_file->Read(3, &result, scratch)); // Read "foo". diff --git a/3rdParty/rocksdb/v5.16.X/utilities/env_timed.cc b/3rdParty/rocksdb/v5.18.X/utilities/env_timed.cc similarity index 90% rename from 3rdParty/rocksdb/v5.16.X/utilities/env_timed.cc rename to 3rdParty/rocksdb/v5.18.X/utilities/env_timed.cc index 6afd45bf99..86455ee65c 100644 --- a/3rdParty/rocksdb/v5.16.X/utilities/env_timed.cc +++ b/3rdParty/rocksdb/v5.18.X/utilities/env_timed.cc @@ -18,21 +18,21 @@ class TimedEnv : public EnvWrapper { explicit TimedEnv(Env* base_env) : EnvWrapper(base_env) {} virtual Status NewSequentialFile(const std::string& fname, - unique_ptr* result, + std::unique_ptr* result, const EnvOptions& options) override { PERF_TIMER_GUARD(env_new_sequential_file_nanos); return EnvWrapper::NewSequentialFile(fname, result, options); } virtual Status NewRandomAccessFile(const std::string& fname, - unique_ptr* result, + std::unique_ptr* result, const EnvOptions& options) override { PERF_TIMER_GUARD(env_new_random_access_file_nanos); return EnvWrapper::NewRandomAccessFile(fname, result, options); } virtual Status NewWritableFile(const std::string& fname, - unique_ptr* result, + std::unique_ptr* result, const EnvOptions& options) override { PERF_TIMER_GUARD(env_new_writable_file_nanos); return EnvWrapper::NewWritableFile(fname, result, options); @@ -40,21 +40,21 @@ class TimedEnv : public EnvWrapper { virtual Status ReuseWritableFile(const std::string& fname, const std::string& old_fname, - unique_ptr* result, + std::unique_ptr* result, const EnvOptions& options) override { PERF_TIMER_GUARD(env_reuse_writable_file_nanos); return EnvWrapper::ReuseWritableFile(fname, old_fname, result, options); } virtual Status NewRandomRWFile(const std::string& fname, - unique_ptr* result, + std::unique_ptr* result, const EnvOptions& options) override { PERF_TIMER_GUARD(env_new_random_rw_file_nanos); return EnvWrapper::NewRandomRWFile(fname, result, options); } virtual Status NewDirectory(const std::string& name, - unique_ptr* result) override { + std::unique_ptr* result) override { PERF_TIMER_GUARD(env_new_directory_nanos); return EnvWrapper::NewDirectory(name, result); } @@ -131,7 +131,7 @@ class TimedEnv : public EnvWrapper { } virtual Status NewLogger(const std::string& fname, - shared_ptr* result) override { + std::shared_ptr* result) override { PERF_TIMER_GUARD(env_new_logger_nanos); return EnvWrapper::NewLogger(fname, result); } diff --git a/3rdParty/rocksdb/v5.16.X/utilities/env_timed_test.cc b/3rdParty/rocksdb/v5.18.X/utilities/env_timed_test.cc similarity index 100% rename from 3rdParty/rocksdb/v5.16.X/utilities/env_timed_test.cc rename to 3rdParty/rocksdb/v5.18.X/utilities/env_timed_test.cc diff --git a/3rdParty/rocksdb/v5.16.X/utilities/geodb/geodb_impl.cc b/3rdParty/rocksdb/v5.18.X/utilities/geodb/geodb_impl.cc similarity index 99% rename from 3rdParty/rocksdb/v5.16.X/utilities/geodb/geodb_impl.cc rename to 3rdParty/rocksdb/v5.18.X/utilities/geodb/geodb_impl.cc index 97c4da0f73..9150b16b2c 100644 --- a/3rdParty/rocksdb/v5.16.X/utilities/geodb/geodb_impl.cc +++ b/3rdParty/rocksdb/v5.18.X/utilities/geodb/geodb_impl.cc @@ -222,7 +222,7 @@ GeoIterator* GeoDBImpl::SearchRadial(const GeoPosition& pos, Iterator* iter = db_->NewIterator(ReadOptions()); // Process each prospective quadkey - for (std::string qid : qids) { + for (const std::string& qid : qids) { // The user is interested in only these many objects. if (number_of_values == 0) { break; diff --git a/3rdParty/rocksdb/v5.16.X/utilities/geodb/geodb_impl.h b/3rdParty/rocksdb/v5.18.X/utilities/geodb/geodb_impl.h similarity index 100% rename from 3rdParty/rocksdb/v5.16.X/utilities/geodb/geodb_impl.h rename to 3rdParty/rocksdb/v5.18.X/utilities/geodb/geodb_impl.h diff --git a/3rdParty/rocksdb/v5.16.X/utilities/geodb/geodb_test.cc b/3rdParty/rocksdb/v5.18.X/utilities/geodb/geodb_test.cc similarity index 100% rename from 3rdParty/rocksdb/v5.16.X/utilities/geodb/geodb_test.cc rename to 3rdParty/rocksdb/v5.18.X/utilities/geodb/geodb_test.cc diff --git a/3rdParty/rocksdb/v5.16.X/utilities/leveldb_options/leveldb_options.cc b/3rdParty/rocksdb/v5.18.X/utilities/leveldb_options/leveldb_options.cc similarity index 100% rename from 3rdParty/rocksdb/v5.16.X/utilities/leveldb_options/leveldb_options.cc rename to 3rdParty/rocksdb/v5.18.X/utilities/leveldb_options/leveldb_options.cc diff --git a/3rdParty/rocksdb/v5.16.X/utilities/lua/rocks_lua_compaction_filter.cc b/3rdParty/rocksdb/v5.18.X/utilities/lua/rocks_lua_compaction_filter.cc similarity index 100% rename from 3rdParty/rocksdb/v5.16.X/utilities/lua/rocks_lua_compaction_filter.cc rename to 3rdParty/rocksdb/v5.18.X/utilities/lua/rocks_lua_compaction_filter.cc diff --git a/3rdParty/rocksdb/v5.16.X/utilities/lua/rocks_lua_test.cc b/3rdParty/rocksdb/v5.18.X/utilities/lua/rocks_lua_test.cc similarity index 100% rename from 3rdParty/rocksdb/v5.16.X/utilities/lua/rocks_lua_test.cc rename to 3rdParty/rocksdb/v5.18.X/utilities/lua/rocks_lua_test.cc diff --git a/3rdParty/rocksdb/v5.16.X/utilities/memory/memory_test.cc b/3rdParty/rocksdb/v5.18.X/utilities/memory/memory_test.cc similarity index 100% rename from 3rdParty/rocksdb/v5.16.X/utilities/memory/memory_test.cc rename to 3rdParty/rocksdb/v5.18.X/utilities/memory/memory_test.cc diff --git a/3rdParty/rocksdb/v5.16.X/utilities/memory/memory_util.cc b/3rdParty/rocksdb/v5.18.X/utilities/memory/memory_util.cc similarity index 100% rename from 3rdParty/rocksdb/v5.16.X/utilities/memory/memory_util.cc rename to 3rdParty/rocksdb/v5.18.X/utilities/memory/memory_util.cc diff --git a/3rdParty/rocksdb/v5.16.X/utilities/merge_operators.h b/3rdParty/rocksdb/v5.18.X/utilities/merge_operators.h similarity index 100% rename from 3rdParty/rocksdb/v5.16.X/utilities/merge_operators.h rename to 3rdParty/rocksdb/v5.18.X/utilities/merge_operators.h diff --git a/3rdParty/rocksdb/v5.16.X/utilities/merge_operators/bytesxor.cc b/3rdParty/rocksdb/v5.18.X/utilities/merge_operators/bytesxor.cc similarity index 100% rename from 3rdParty/rocksdb/v5.16.X/utilities/merge_operators/bytesxor.cc rename to 3rdParty/rocksdb/v5.18.X/utilities/merge_operators/bytesxor.cc diff --git a/3rdParty/rocksdb/v5.16.X/utilities/merge_operators/bytesxor.h b/3rdParty/rocksdb/v5.18.X/utilities/merge_operators/bytesxor.h similarity index 90% rename from 3rdParty/rocksdb/v5.16.X/utilities/merge_operators/bytesxor.h rename to 3rdParty/rocksdb/v5.18.X/utilities/merge_operators/bytesxor.h index 1562ca852a..762e372445 100644 --- a/3rdParty/rocksdb/v5.16.X/utilities/merge_operators/bytesxor.h +++ b/3rdParty/rocksdb/v5.18.X/utilities/merge_operators/bytesxor.h @@ -3,8 +3,7 @@ // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). -#ifndef UTILITIES_MERGE_OPERATORS_BYTESXOR_H_ -#define UTILITIES_MERGE_OPERATORS_BYTESXOR_H_ +#pragma once #include #include @@ -38,5 +37,3 @@ class BytesXOROperator : public AssociativeMergeOperator { }; } // namespace rocksdb - -#endif // UTILITIES_MERGE_OPERATORS_BYTESXOR_H_ diff --git a/3rdParty/rocksdb/v5.16.X/utilities/merge_operators/max.cc b/3rdParty/rocksdb/v5.18.X/utilities/merge_operators/max.cc similarity index 100% rename from 3rdParty/rocksdb/v5.16.X/utilities/merge_operators/max.cc rename to 3rdParty/rocksdb/v5.18.X/utilities/merge_operators/max.cc diff --git a/3rdParty/rocksdb/v5.16.X/utilities/merge_operators/put.cc b/3rdParty/rocksdb/v5.18.X/utilities/merge_operators/put.cc similarity index 100% rename from 3rdParty/rocksdb/v5.16.X/utilities/merge_operators/put.cc rename to 3rdParty/rocksdb/v5.18.X/utilities/merge_operators/put.cc diff --git a/3rdParty/rocksdb/v5.16.X/utilities/merge_operators/string_append/stringappend.cc b/3rdParty/rocksdb/v5.18.X/utilities/merge_operators/string_append/stringappend.cc similarity index 100% rename from 3rdParty/rocksdb/v5.16.X/utilities/merge_operators/string_append/stringappend.cc rename to 3rdParty/rocksdb/v5.18.X/utilities/merge_operators/string_append/stringappend.cc diff --git a/3rdParty/rocksdb/v5.16.X/utilities/merge_operators/string_append/stringappend.h b/3rdParty/rocksdb/v5.18.X/utilities/merge_operators/string_append/stringappend.h similarity index 100% rename from 3rdParty/rocksdb/v5.16.X/utilities/merge_operators/string_append/stringappend.h rename to 3rdParty/rocksdb/v5.18.X/utilities/merge_operators/string_append/stringappend.h diff --git a/3rdParty/rocksdb/v5.16.X/utilities/merge_operators/string_append/stringappend2.cc b/3rdParty/rocksdb/v5.18.X/utilities/merge_operators/string_append/stringappend2.cc similarity index 100% rename from 3rdParty/rocksdb/v5.16.X/utilities/merge_operators/string_append/stringappend2.cc rename to 3rdParty/rocksdb/v5.18.X/utilities/merge_operators/string_append/stringappend2.cc diff --git a/3rdParty/rocksdb/v5.16.X/utilities/merge_operators/string_append/stringappend2.h b/3rdParty/rocksdb/v5.18.X/utilities/merge_operators/string_append/stringappend2.h similarity index 100% rename from 3rdParty/rocksdb/v5.16.X/utilities/merge_operators/string_append/stringappend2.h rename to 3rdParty/rocksdb/v5.18.X/utilities/merge_operators/string_append/stringappend2.h diff --git a/3rdParty/rocksdb/v5.16.X/utilities/merge_operators/string_append/stringappend_test.cc b/3rdParty/rocksdb/v5.18.X/utilities/merge_operators/string_append/stringappend_test.cc similarity index 100% rename from 3rdParty/rocksdb/v5.16.X/utilities/merge_operators/string_append/stringappend_test.cc rename to 3rdParty/rocksdb/v5.18.X/utilities/merge_operators/string_append/stringappend_test.cc diff --git a/3rdParty/rocksdb/v5.16.X/utilities/merge_operators/uint64add.cc b/3rdParty/rocksdb/v5.18.X/utilities/merge_operators/uint64add.cc similarity index 100% rename from 3rdParty/rocksdb/v5.16.X/utilities/merge_operators/uint64add.cc rename to 3rdParty/rocksdb/v5.18.X/utilities/merge_operators/uint64add.cc diff --git a/3rdParty/rocksdb/v5.16.X/utilities/object_registry_test.cc b/3rdParty/rocksdb/v5.18.X/utilities/object_registry_test.cc similarity index 100% rename from 3rdParty/rocksdb/v5.16.X/utilities/object_registry_test.cc rename to 3rdParty/rocksdb/v5.18.X/utilities/object_registry_test.cc diff --git a/3rdParty/rocksdb/v5.16.X/utilities/option_change_migration/option_change_migration.cc b/3rdParty/rocksdb/v5.18.X/utilities/option_change_migration/option_change_migration.cc similarity index 100% rename from 3rdParty/rocksdb/v5.16.X/utilities/option_change_migration/option_change_migration.cc rename to 3rdParty/rocksdb/v5.18.X/utilities/option_change_migration/option_change_migration.cc diff --git a/3rdParty/rocksdb/v5.16.X/utilities/option_change_migration/option_change_migration_test.cc b/3rdParty/rocksdb/v5.18.X/utilities/option_change_migration/option_change_migration_test.cc similarity index 100% rename from 3rdParty/rocksdb/v5.16.X/utilities/option_change_migration/option_change_migration_test.cc rename to 3rdParty/rocksdb/v5.18.X/utilities/option_change_migration/option_change_migration_test.cc diff --git a/3rdParty/rocksdb/v5.16.X/utilities/options/options_util.cc b/3rdParty/rocksdb/v5.18.X/utilities/options/options_util.cc similarity index 100% rename from 3rdParty/rocksdb/v5.16.X/utilities/options/options_util.cc rename to 3rdParty/rocksdb/v5.18.X/utilities/options/options_util.cc diff --git a/3rdParty/rocksdb/v5.16.X/utilities/options/options_util_test.cc b/3rdParty/rocksdb/v5.18.X/utilities/options/options_util_test.cc similarity index 98% rename from 3rdParty/rocksdb/v5.16.X/utilities/options/options_util_test.cc rename to 3rdParty/rocksdb/v5.18.X/utilities/options/options_util_test.cc index bf830190c6..4c12f1a67d 100644 --- a/3rdParty/rocksdb/v5.16.X/utilities/options/options_util_test.cc +++ b/3rdParty/rocksdb/v5.18.X/utilities/options/options_util_test.cc @@ -104,8 +104,8 @@ class DummyTableFactory : public TableFactory { virtual Status NewTableReader( const TableReaderOptions& /*table_reader_options*/, - unique_ptr&& /*file*/, uint64_t /*file_size*/, - unique_ptr* /*table_reader*/, + std::unique_ptr&& /*file*/, + uint64_t /*file_size*/, std::unique_ptr* /*table_reader*/, bool /*prefetch_index_and_filter_in_cache*/) const override { return Status::NotSupported(); } diff --git a/3rdParty/rocksdb/v5.16.X/utilities/persistent_cache/block_cache_tier.cc b/3rdParty/rocksdb/v5.18.X/utilities/persistent_cache/block_cache_tier.cc similarity index 98% rename from 3rdParty/rocksdb/v5.16.X/utilities/persistent_cache/block_cache_tier.cc rename to 3rdParty/rocksdb/v5.18.X/utilities/persistent_cache/block_cache_tier.cc index 1ebf8ae6b3..f7f72df6df 100644 --- a/3rdParty/rocksdb/v5.16.X/utilities/persistent_cache/block_cache_tier.cc +++ b/3rdParty/rocksdb/v5.18.X/utilities/persistent_cache/block_cache_tier.cc @@ -263,7 +263,7 @@ Status BlockCacheTier::InsertImpl(const Slice& key, const Slice& data) { return Status::OK(); } -Status BlockCacheTier::Lookup(const Slice& key, unique_ptr* val, +Status BlockCacheTier::Lookup(const Slice& key, std::unique_ptr* val, size_t* size) { StopWatchNano timer(opt_.env, /*auto_start=*/ true); @@ -287,7 +287,7 @@ Status BlockCacheTier::Lookup(const Slice& key, unique_ptr* val, assert(file->refs_); - unique_ptr scratch(new char[lba.size_]); + std::unique_ptr scratch(new char[lba.size_]); Slice blk_key; Slice blk_val; @@ -369,7 +369,7 @@ bool BlockCacheTier::Reserve(const size_t size) { const double retain_fac = (100 - kEvictPct) / static_cast(100); while (size + size_ > opt_.cache_size * retain_fac) { - unique_ptr f(metadata_.Evict()); + std::unique_ptr f(metadata_.Evict()); if (!f) { // nothing is evictable return false; diff --git a/3rdParty/rocksdb/v5.16.X/utilities/persistent_cache/block_cache_tier.h b/3rdParty/rocksdb/v5.18.X/utilities/persistent_cache/block_cache_tier.h similarity index 96% rename from 3rdParty/rocksdb/v5.16.X/utilities/persistent_cache/block_cache_tier.h rename to 3rdParty/rocksdb/v5.18.X/utilities/persistent_cache/block_cache_tier.h index dcb97258b4..2b2c0ef4f1 100644 --- a/3rdParty/rocksdb/v5.16.X/utilities/persistent_cache/block_cache_tier.h +++ b/3rdParty/rocksdb/v5.18.X/utilities/persistent_cache/block_cache_tier.h @@ -44,9 +44,9 @@ class BlockCacheTier : public PersistentCacheTier { public: explicit BlockCacheTier(const PersistentCacheConfig& opt) : opt_(opt), - insert_ops_(opt_.max_write_pipeline_backlog_size), + insert_ops_(static_cast(opt_.max_write_pipeline_backlog_size)), buffer_allocator_(opt.write_buffer_size, opt.write_buffer_count()), - writer_(this, opt_.writer_qdepth, opt_.writer_dispatch_size) { + writer_(this, opt_.writer_qdepth, static_cast(opt_.writer_dispatch_size)) { Info(opt_.log, "Initializing allocator. size=%d B count=%d", opt_.write_buffer_size, opt_.write_buffer_count()); } diff --git a/3rdParty/rocksdb/v5.16.X/utilities/persistent_cache/block_cache_tier_file.cc b/3rdParty/rocksdb/v5.18.X/utilities/persistent_cache/block_cache_tier_file.cc similarity index 100% rename from 3rdParty/rocksdb/v5.16.X/utilities/persistent_cache/block_cache_tier_file.cc rename to 3rdParty/rocksdb/v5.18.X/utilities/persistent_cache/block_cache_tier_file.cc diff --git a/3rdParty/rocksdb/v5.16.X/utilities/persistent_cache/block_cache_tier_file.h b/3rdParty/rocksdb/v5.18.X/utilities/persistent_cache/block_cache_tier_file.h similarity index 99% rename from 3rdParty/rocksdb/v5.16.X/utilities/persistent_cache/block_cache_tier_file.h rename to 3rdParty/rocksdb/v5.18.X/utilities/persistent_cache/block_cache_tier_file.h index ef5dbab040..e38b6c9a1d 100644 --- a/3rdParty/rocksdb/v5.16.X/utilities/persistent_cache/block_cache_tier_file.h +++ b/3rdParty/rocksdb/v5.18.X/utilities/persistent_cache/block_cache_tier_file.h @@ -149,7 +149,7 @@ class RandomAccessCacheFile : public BlockCacheFile { public: explicit RandomAccessCacheFile(Env* const env, const std::string& dir, const uint32_t cache_id, - const shared_ptr& log) + const std::shared_ptr& log) : BlockCacheFile(env, dir, cache_id), log_(log) {} virtual ~RandomAccessCacheFile() {} diff --git a/3rdParty/rocksdb/v5.16.X/utilities/persistent_cache/block_cache_tier_file_buffer.h b/3rdParty/rocksdb/v5.18.X/utilities/persistent_cache/block_cache_tier_file_buffer.h similarity index 100% rename from 3rdParty/rocksdb/v5.16.X/utilities/persistent_cache/block_cache_tier_file_buffer.h rename to 3rdParty/rocksdb/v5.18.X/utilities/persistent_cache/block_cache_tier_file_buffer.h diff --git a/3rdParty/rocksdb/v5.16.X/utilities/persistent_cache/block_cache_tier_metadata.cc b/3rdParty/rocksdb/v5.18.X/utilities/persistent_cache/block_cache_tier_metadata.cc similarity index 100% rename from 3rdParty/rocksdb/v5.16.X/utilities/persistent_cache/block_cache_tier_metadata.cc rename to 3rdParty/rocksdb/v5.18.X/utilities/persistent_cache/block_cache_tier_metadata.cc diff --git a/3rdParty/rocksdb/v5.16.X/utilities/persistent_cache/block_cache_tier_metadata.h b/3rdParty/rocksdb/v5.18.X/utilities/persistent_cache/block_cache_tier_metadata.h similarity index 100% rename from 3rdParty/rocksdb/v5.16.X/utilities/persistent_cache/block_cache_tier_metadata.h rename to 3rdParty/rocksdb/v5.18.X/utilities/persistent_cache/block_cache_tier_metadata.h diff --git a/3rdParty/rocksdb/v5.16.X/utilities/persistent_cache/hash_table.h b/3rdParty/rocksdb/v5.18.X/utilities/persistent_cache/hash_table.h similarity index 100% rename from 3rdParty/rocksdb/v5.16.X/utilities/persistent_cache/hash_table.h rename to 3rdParty/rocksdb/v5.18.X/utilities/persistent_cache/hash_table.h diff --git a/3rdParty/rocksdb/v5.16.X/utilities/persistent_cache/hash_table_bench.cc b/3rdParty/rocksdb/v5.18.X/utilities/persistent_cache/hash_table_bench.cc similarity index 100% rename from 3rdParty/rocksdb/v5.16.X/utilities/persistent_cache/hash_table_bench.cc rename to 3rdParty/rocksdb/v5.18.X/utilities/persistent_cache/hash_table_bench.cc diff --git a/3rdParty/rocksdb/v5.16.X/utilities/persistent_cache/hash_table_evictable.h b/3rdParty/rocksdb/v5.18.X/utilities/persistent_cache/hash_table_evictable.h similarity index 100% rename from 3rdParty/rocksdb/v5.16.X/utilities/persistent_cache/hash_table_evictable.h rename to 3rdParty/rocksdb/v5.18.X/utilities/persistent_cache/hash_table_evictable.h diff --git a/3rdParty/rocksdb/v5.16.X/utilities/persistent_cache/hash_table_test.cc b/3rdParty/rocksdb/v5.18.X/utilities/persistent_cache/hash_table_test.cc similarity index 100% rename from 3rdParty/rocksdb/v5.16.X/utilities/persistent_cache/hash_table_test.cc rename to 3rdParty/rocksdb/v5.18.X/utilities/persistent_cache/hash_table_test.cc diff --git a/3rdParty/rocksdb/v5.16.X/utilities/persistent_cache/lrulist.h b/3rdParty/rocksdb/v5.18.X/utilities/persistent_cache/lrulist.h similarity index 100% rename from 3rdParty/rocksdb/v5.16.X/utilities/persistent_cache/lrulist.h rename to 3rdParty/rocksdb/v5.18.X/utilities/persistent_cache/lrulist.h diff --git a/3rdParty/rocksdb/v5.16.X/utilities/persistent_cache/persistent_cache_bench.cc b/3rdParty/rocksdb/v5.18.X/utilities/persistent_cache/persistent_cache_bench.cc similarity index 99% rename from 3rdParty/rocksdb/v5.16.X/utilities/persistent_cache/persistent_cache_bench.cc rename to 3rdParty/rocksdb/v5.18.X/utilities/persistent_cache/persistent_cache_bench.cc index 7d26c3a7de..64d75c7a51 100644 --- a/3rdParty/rocksdb/v5.16.X/utilities/persistent_cache/persistent_cache_bench.cc +++ b/3rdParty/rocksdb/v5.18.X/utilities/persistent_cache/persistent_cache_bench.cc @@ -251,7 +251,7 @@ class CacheTierBenchmark { // create data for a key by filling with a certain pattern std::unique_ptr NewBlock(const uint64_t val) { - unique_ptr data(new char[FLAGS_iosize]); + std::unique_ptr data(new char[FLAGS_iosize]); memset(data.get(), val % 255, FLAGS_iosize); return data; } diff --git a/3rdParty/rocksdb/v5.16.X/utilities/persistent_cache/persistent_cache_test.cc b/3rdParty/rocksdb/v5.18.X/utilities/persistent_cache/persistent_cache_test.cc similarity index 100% rename from 3rdParty/rocksdb/v5.16.X/utilities/persistent_cache/persistent_cache_test.cc rename to 3rdParty/rocksdb/v5.18.X/utilities/persistent_cache/persistent_cache_test.cc diff --git a/3rdParty/rocksdb/v5.16.X/utilities/persistent_cache/persistent_cache_test.h b/3rdParty/rocksdb/v5.18.X/utilities/persistent_cache/persistent_cache_test.h similarity index 99% rename from 3rdParty/rocksdb/v5.16.X/utilities/persistent_cache/persistent_cache_test.h rename to 3rdParty/rocksdb/v5.18.X/utilities/persistent_cache/persistent_cache_test.h index 37e842f2e2..ad99ea864b 100644 --- a/3rdParty/rocksdb/v5.16.X/utilities/persistent_cache/persistent_cache_test.h +++ b/3rdParty/rocksdb/v5.18.X/utilities/persistent_cache/persistent_cache_test.h @@ -157,7 +157,7 @@ class PersistentCacheTierTest : public testing::Test { memset(edata, '0' + (i % 10), sizeof(edata)); auto k = prefix + PaddedNumber(i, /*count=*/8); Slice key(k); - unique_ptr block; + std::unique_ptr block; size_t block_size; if (eviction_enabled) { @@ -210,7 +210,7 @@ class PersistentCacheTierTest : public testing::Test { } const std::string path_; - shared_ptr log_; + std::shared_ptr log_; std::shared_ptr cache_; std::atomic key_{0}; size_t max_keys_ = 0; diff --git a/3rdParty/rocksdb/v5.16.X/utilities/persistent_cache/persistent_cache_tier.cc b/3rdParty/rocksdb/v5.18.X/utilities/persistent_cache/persistent_cache_tier.cc similarity index 100% rename from 3rdParty/rocksdb/v5.16.X/utilities/persistent_cache/persistent_cache_tier.cc rename to 3rdParty/rocksdb/v5.18.X/utilities/persistent_cache/persistent_cache_tier.cc diff --git a/3rdParty/rocksdb/v5.16.X/utilities/persistent_cache/persistent_cache_tier.h b/3rdParty/rocksdb/v5.18.X/utilities/persistent_cache/persistent_cache_tier.h similarity index 100% rename from 3rdParty/rocksdb/v5.16.X/utilities/persistent_cache/persistent_cache_tier.h rename to 3rdParty/rocksdb/v5.18.X/utilities/persistent_cache/persistent_cache_tier.h diff --git a/3rdParty/rocksdb/v5.16.X/utilities/persistent_cache/persistent_cache_util.h b/3rdParty/rocksdb/v5.18.X/utilities/persistent_cache/persistent_cache_util.h similarity index 100% rename from 3rdParty/rocksdb/v5.16.X/utilities/persistent_cache/persistent_cache_util.h rename to 3rdParty/rocksdb/v5.18.X/utilities/persistent_cache/persistent_cache_util.h diff --git a/3rdParty/rocksdb/v5.16.X/utilities/persistent_cache/volatile_tier_impl.cc b/3rdParty/rocksdb/v5.18.X/utilities/persistent_cache/volatile_tier_impl.cc similarity index 100% rename from 3rdParty/rocksdb/v5.16.X/utilities/persistent_cache/volatile_tier_impl.cc rename to 3rdParty/rocksdb/v5.18.X/utilities/persistent_cache/volatile_tier_impl.cc diff --git a/3rdParty/rocksdb/v5.16.X/utilities/persistent_cache/volatile_tier_impl.h b/3rdParty/rocksdb/v5.18.X/utilities/persistent_cache/volatile_tier_impl.h similarity index 100% rename from 3rdParty/rocksdb/v5.16.X/utilities/persistent_cache/volatile_tier_impl.h rename to 3rdParty/rocksdb/v5.18.X/utilities/persistent_cache/volatile_tier_impl.h diff --git a/3rdParty/rocksdb/v5.16.X/utilities/redis/README b/3rdParty/rocksdb/v5.18.X/utilities/redis/README similarity index 100% rename from 3rdParty/rocksdb/v5.16.X/utilities/redis/README rename to 3rdParty/rocksdb/v5.18.X/utilities/redis/README diff --git a/3rdParty/rocksdb/v5.16.X/utilities/redis/redis_list_exception.h b/3rdParty/rocksdb/v5.18.X/utilities/redis/redis_list_exception.h similarity index 100% rename from 3rdParty/rocksdb/v5.16.X/utilities/redis/redis_list_exception.h rename to 3rdParty/rocksdb/v5.18.X/utilities/redis/redis_list_exception.h index f93bcbb190..bc2b39a310 100644 --- a/3rdParty/rocksdb/v5.16.X/utilities/redis/redis_list_exception.h +++ b/3rdParty/rocksdb/v5.18.X/utilities/redis/redis_list_exception.h @@ -5,8 +5,8 @@ * Copyright 2013 Facebook */ -#ifndef ROCKSDB_LITE #pragma once +#ifndef ROCKSDB_LITE #include namespace rocksdb { diff --git a/3rdParty/rocksdb/v5.16.X/utilities/redis/redis_list_iterator.h b/3rdParty/rocksdb/v5.18.X/utilities/redis/redis_list_iterator.h similarity index 100% rename from 3rdParty/rocksdb/v5.16.X/utilities/redis/redis_list_iterator.h rename to 3rdParty/rocksdb/v5.18.X/utilities/redis/redis_list_iterator.h diff --git a/3rdParty/rocksdb/v5.16.X/utilities/redis/redis_lists.cc b/3rdParty/rocksdb/v5.18.X/utilities/redis/redis_lists.cc similarity index 100% rename from 3rdParty/rocksdb/v5.16.X/utilities/redis/redis_lists.cc rename to 3rdParty/rocksdb/v5.18.X/utilities/redis/redis_lists.cc diff --git a/3rdParty/rocksdb/v5.16.X/utilities/redis/redis_lists.h b/3rdParty/rocksdb/v5.18.X/utilities/redis/redis_lists.h similarity index 100% rename from 3rdParty/rocksdb/v5.16.X/utilities/redis/redis_lists.h rename to 3rdParty/rocksdb/v5.18.X/utilities/redis/redis_lists.h diff --git a/3rdParty/rocksdb/v5.16.X/utilities/redis/redis_lists_test.cc b/3rdParty/rocksdb/v5.18.X/utilities/redis/redis_lists_test.cc similarity index 100% rename from 3rdParty/rocksdb/v5.16.X/utilities/redis/redis_lists_test.cc rename to 3rdParty/rocksdb/v5.18.X/utilities/redis/redis_lists_test.cc diff --git a/3rdParty/rocksdb/v5.16.X/utilities/simulator_cache/sim_cache.cc b/3rdParty/rocksdb/v5.18.X/utilities/simulator_cache/sim_cache.cc similarity index 99% rename from 3rdParty/rocksdb/v5.16.X/utilities/simulator_cache/sim_cache.cc rename to 3rdParty/rocksdb/v5.18.X/utilities/simulator_cache/sim_cache.cc index e7750dd5e2..bdf6c5aa87 100644 --- a/3rdParty/rocksdb/v5.16.X/utilities/simulator_cache/sim_cache.cc +++ b/3rdParty/rocksdb/v5.18.X/utilities/simulator_cache/sim_cache.cc @@ -46,7 +46,8 @@ class CacheActivityLogger { if (!status.ok()) { return status; } - file_writer_.reset(new WritableFileWriter(std::move(log_file), env_opts)); + file_writer_.reset(new WritableFileWriter(std::move(log_file), + activity_log_file, env_opts)); max_logging_size_ = max_logging_size; activity_logging_enabled_.store(true); diff --git a/3rdParty/rocksdb/v5.16.X/utilities/simulator_cache/sim_cache_test.cc b/3rdParty/rocksdb/v5.18.X/utilities/simulator_cache/sim_cache_test.cc similarity index 100% rename from 3rdParty/rocksdb/v5.16.X/utilities/simulator_cache/sim_cache_test.cc rename to 3rdParty/rocksdb/v5.18.X/utilities/simulator_cache/sim_cache_test.cc diff --git a/3rdParty/rocksdb/v5.16.X/utilities/spatialdb/spatial_db.cc b/3rdParty/rocksdb/v5.18.X/utilities/spatialdb/spatial_db.cc similarity index 98% rename from 3rdParty/rocksdb/v5.16.X/utilities/spatialdb/spatial_db.cc rename to 3rdParty/rocksdb/v5.18.X/utilities/spatialdb/spatial_db.cc index a9b990ee20..b34976eb81 100644 --- a/3rdParty/rocksdb/v5.16.X/utilities/spatialdb/spatial_db.cc +++ b/3rdParty/rocksdb/v5.18.X/utilities/spatialdb/spatial_db.cc @@ -354,8 +354,8 @@ class SpatialIndexCursor : public Cursor { : value_getter_(value_getter), valid_(true) { // calculate quad keys we'll need to query std::vector quad_keys; - quad_keys.reserve((tile_bbox.max_x - tile_bbox.min_x + 1) * - (tile_bbox.max_y - tile_bbox.min_y + 1)); + quad_keys.reserve(static_cast((tile_bbox.max_x - tile_bbox.min_x + 1) * + (tile_bbox.max_y - tile_bbox.min_y + 1))); for (uint64_t x = tile_bbox.min_x; x <= tile_bbox.max_x; ++x) { for (uint64_t y = tile_bbox.min_y; y <= tile_bbox.max_y; ++y) { quad_keys.push_back(GetQuadKeyFromTile(x, y, tile_bits)); @@ -473,7 +473,7 @@ class SpatialIndexCursor : public Cursor { } - unique_ptr value_getter_; + std::unique_ptr value_getter_; bool valid_; Status status_; @@ -791,7 +791,7 @@ Status SpatialDB::Create( db_options.create_missing_column_families = true; db_options.error_if_exists = true; - auto block_cache = NewLRUCache(options.cache_size); + auto block_cache = NewLRUCache(static_cast(options.cache_size)); ColumnFamilyOptions column_family_options = GetColumnFamilyOptions(options, block_cache); @@ -832,7 +832,7 @@ Status SpatialDB::Create( Status SpatialDB::Open(const SpatialDBOptions& options, const std::string& name, SpatialDB** db, bool read_only) { DBOptions db_options = GetDBOptionsFromSpatialDBOptions(options); - auto block_cache = NewLRUCache(options.cache_size); + auto block_cache = NewLRUCache(static_cast(options.cache_size)); ColumnFamilyOptions column_family_options = GetColumnFamilyOptions(options, block_cache); diff --git a/3rdParty/rocksdb/v5.16.X/utilities/spatialdb/spatial_db_test.cc b/3rdParty/rocksdb/v5.18.X/utilities/spatialdb/spatial_db_test.cc similarity index 99% rename from 3rdParty/rocksdb/v5.16.X/utilities/spatialdb/spatial_db_test.cc rename to 3rdParty/rocksdb/v5.18.X/utilities/spatialdb/spatial_db_test.cc index 783b347d0a..cb92af8b1a 100644 --- a/3rdParty/rocksdb/v5.16.X/utilities/spatialdb/spatial_db_test.cc +++ b/3rdParty/rocksdb/v5.18.X/utilities/spatialdb/spatial_db_test.cc @@ -94,7 +94,7 @@ TEST_F(SpatialDBTest, FeatureSetSerializeTest) { ASSERT_EQ(deserialized.Get("m").get_double(), 3.25); // corrupted serialization - serialized = serialized.substr(0, serialized.size() - 4); + serialized = serialized.substr(0, serialized.size() - 1); deserialized.Clear(); ASSERT_TRUE(!deserialized.Deserialize(serialized)); } diff --git a/3rdParty/rocksdb/v5.16.X/utilities/spatialdb/utils.h b/3rdParty/rocksdb/v5.18.X/utilities/spatialdb/utils.h similarity index 100% rename from 3rdParty/rocksdb/v5.16.X/utilities/spatialdb/utils.h rename to 3rdParty/rocksdb/v5.18.X/utilities/spatialdb/utils.h diff --git a/3rdParty/rocksdb/v5.16.X/utilities/table_properties_collectors/compact_on_deletion_collector.cc b/3rdParty/rocksdb/v5.18.X/utilities/table_properties_collectors/compact_on_deletion_collector.cc similarity index 91% rename from 3rdParty/rocksdb/v5.16.X/utilities/table_properties_collectors/compact_on_deletion_collector.cc rename to 3rdParty/rocksdb/v5.18.X/utilities/table_properties_collectors/compact_on_deletion_collector.cc index fdb7389a7a..eaf2c501d1 100644 --- a/3rdParty/rocksdb/v5.16.X/utilities/table_properties_collectors/compact_on_deletion_collector.cc +++ b/3rdParty/rocksdb/v5.18.X/utilities/table_properties_collectors/compact_on_deletion_collector.cc @@ -20,7 +20,6 @@ CompactOnDeletionCollector::CompactOnDeletionCollector( deletion_trigger_(deletion_trigger), need_compaction_(false), finished_(false) { - assert(bucket_size_ > 0U); memset(num_deletions_in_buckets_, 0, sizeof(size_t) * kNumBuckets); } @@ -35,6 +34,11 @@ Status CompactOnDeletionCollector::AddUserKey(const Slice& /*key*/, SequenceNumber /*seq*/, uint64_t /*file_size*/) { assert(!finished_); + if (bucket_size_ == 0) { + // This collector is effectively disabled + return Status::OK(); + } + if (need_compaction_) { // If the output file already needs to be compacted, skip the check. return Status::OK(); @@ -71,14 +75,14 @@ TablePropertiesCollector* CompactOnDeletionCollectorFactory::CreateTablePropertiesCollector( TablePropertiesCollectorFactory::Context /*context*/) { return new CompactOnDeletionCollector( - sliding_window_size_, deletion_trigger_); + sliding_window_size_.load(), deletion_trigger_.load()); } -std::shared_ptr +std::shared_ptr NewCompactOnDeletionCollectorFactory( size_t sliding_window_size, size_t deletion_trigger) { - return std::shared_ptr( + return std::shared_ptr( new CompactOnDeletionCollectorFactory( sliding_window_size, deletion_trigger)); } diff --git a/3rdParty/rocksdb/v5.16.X/utilities/table_properties_collectors/compact_on_deletion_collector.h b/3rdParty/rocksdb/v5.18.X/utilities/table_properties_collectors/compact_on_deletion_collector.h similarity index 69% rename from 3rdParty/rocksdb/v5.16.X/utilities/table_properties_collectors/compact_on_deletion_collector.h rename to 3rdParty/rocksdb/v5.18.X/utilities/table_properties_collectors/compact_on_deletion_collector.h index 5406ecea28..96c8db896f 100644 --- a/3rdParty/rocksdb/v5.16.X/utilities/table_properties_collectors/compact_on_deletion_collector.h +++ b/3rdParty/rocksdb/v5.18.X/utilities/table_properties_collectors/compact_on_deletion_collector.h @@ -9,38 +9,6 @@ #include "rocksdb/utilities/table_properties_collectors.h" namespace rocksdb { -// A factory of a table property collector that marks a SST -// file as need-compaction when it observe at least "D" deletion -// entries in any "N" consecutive entires. -class CompactOnDeletionCollectorFactory - : public TablePropertiesCollectorFactory { - public: - // A factory of a table property collector that marks a SST - // file as need-compaction when it observe at least "D" deletion - // entries in any "N" consecutive entires. - // - // @param sliding_window_size "N" - // @param deletion_trigger "D" - CompactOnDeletionCollectorFactory( - size_t sliding_window_size, - size_t deletion_trigger) : - sliding_window_size_(sliding_window_size), - deletion_trigger_(deletion_trigger) {} - - virtual ~CompactOnDeletionCollectorFactory() {} - - virtual TablePropertiesCollector* CreateTablePropertiesCollector( - TablePropertiesCollectorFactory::Context context) override; - - virtual const char* Name() const override { - return "CompactOnDeletionCollector"; - } - - private: - size_t sliding_window_size_; - size_t deletion_trigger_; -}; - class CompactOnDeletionCollector : public TablePropertiesCollector { public: CompactOnDeletionCollector( diff --git a/3rdParty/rocksdb/v5.16.X/utilities/table_properties_collectors/compact_on_deletion_collector_test.cc b/3rdParty/rocksdb/v5.18.X/utilities/table_properties_collectors/compact_on_deletion_collector_test.cc similarity index 100% rename from 3rdParty/rocksdb/v5.16.X/utilities/table_properties_collectors/compact_on_deletion_collector_test.cc rename to 3rdParty/rocksdb/v5.18.X/utilities/table_properties_collectors/compact_on_deletion_collector_test.cc diff --git a/3rdParty/rocksdb/v5.16.X/utilities/trace/file_trace_reader_writer.cc b/3rdParty/rocksdb/v5.18.X/utilities/trace/file_trace_reader_writer.cc similarity index 90% rename from 3rdParty/rocksdb/v5.16.X/utilities/trace/file_trace_reader_writer.cc rename to 3rdParty/rocksdb/v5.18.X/utilities/trace/file_trace_reader_writer.cc index dde36aa93c..4a81516a8b 100644 --- a/3rdParty/rocksdb/v5.16.X/utilities/trace/file_trace_reader_writer.cc +++ b/3rdParty/rocksdb/v5.18.X/utilities/trace/file_trace_reader_writer.cc @@ -83,16 +83,18 @@ Status FileTraceWriter::Write(const Slice& data) { return file_writer_->Append(data); } +uint64_t FileTraceWriter::GetFileSize() { return file_writer_->GetFileSize(); } + Status NewFileTraceReader(Env* env, const EnvOptions& env_options, const std::string& trace_filename, std::unique_ptr* trace_reader) { - unique_ptr trace_file; + std::unique_ptr trace_file; Status s = env->NewRandomAccessFile(trace_filename, &trace_file, env_options); if (!s.ok()) { return s; } - unique_ptr file_reader; + std::unique_ptr file_reader; file_reader.reset( new RandomAccessFileReader(std::move(trace_file), trace_filename)); trace_reader->reset(new FileTraceReader(std::move(file_reader))); @@ -102,14 +104,15 @@ Status NewFileTraceReader(Env* env, const EnvOptions& env_options, Status NewFileTraceWriter(Env* env, const EnvOptions& env_options, const std::string& trace_filename, std::unique_ptr* trace_writer) { - unique_ptr trace_file; + std::unique_ptr trace_file; Status s = env->NewWritableFile(trace_filename, &trace_file, env_options); if (!s.ok()) { return s; } - unique_ptr file_writer; - file_writer.reset(new WritableFileWriter(std::move(trace_file), env_options)); + std::unique_ptr file_writer; + file_writer.reset(new WritableFileWriter(std::move(trace_file), + trace_filename, env_options)); trace_writer->reset(new FileTraceWriter(std::move(file_writer))); return s; } diff --git a/3rdParty/rocksdb/v5.16.X/utilities/trace/file_trace_reader_writer.h b/3rdParty/rocksdb/v5.18.X/utilities/trace/file_trace_reader_writer.h similarity index 88% rename from 3rdParty/rocksdb/v5.16.X/utilities/trace/file_trace_reader_writer.h rename to 3rdParty/rocksdb/v5.18.X/utilities/trace/file_trace_reader_writer.h index b363a3f09f..863f5d9d06 100644 --- a/3rdParty/rocksdb/v5.16.X/utilities/trace/file_trace_reader_writer.h +++ b/3rdParty/rocksdb/v5.18.X/utilities/trace/file_trace_reader_writer.h @@ -22,7 +22,7 @@ class FileTraceReader : public TraceReader { virtual Status Close() override; private: - unique_ptr file_reader_; + std::unique_ptr file_reader_; Slice result_; size_t offset_; char* const buffer_; @@ -39,9 +39,10 @@ class FileTraceWriter : public TraceWriter { virtual Status Write(const Slice& data) override; virtual Status Close() override; + virtual uint64_t GetFileSize() override; private: - unique_ptr file_writer_; + std::unique_ptr file_writer_; }; } // namespace rocksdb diff --git a/3rdParty/rocksdb/v5.16.X/utilities/transactions/optimistic_transaction.cc b/3rdParty/rocksdb/v5.18.X/utilities/transactions/optimistic_transaction.cc similarity index 100% rename from 3rdParty/rocksdb/v5.16.X/utilities/transactions/optimistic_transaction.cc rename to 3rdParty/rocksdb/v5.18.X/utilities/transactions/optimistic_transaction.cc diff --git a/3rdParty/rocksdb/v5.16.X/utilities/transactions/optimistic_transaction.h b/3rdParty/rocksdb/v5.18.X/utilities/transactions/optimistic_transaction.h similarity index 100% rename from 3rdParty/rocksdb/v5.16.X/utilities/transactions/optimistic_transaction.h rename to 3rdParty/rocksdb/v5.18.X/utilities/transactions/optimistic_transaction.h diff --git a/3rdParty/rocksdb/v5.16.X/utilities/transactions/optimistic_transaction_db_impl.cc b/3rdParty/rocksdb/v5.18.X/utilities/transactions/optimistic_transaction_db_impl.cc similarity index 100% rename from 3rdParty/rocksdb/v5.16.X/utilities/transactions/optimistic_transaction_db_impl.cc rename to 3rdParty/rocksdb/v5.18.X/utilities/transactions/optimistic_transaction_db_impl.cc diff --git a/3rdParty/rocksdb/v5.16.X/utilities/transactions/optimistic_transaction_db_impl.h b/3rdParty/rocksdb/v5.18.X/utilities/transactions/optimistic_transaction_db_impl.h similarity index 100% rename from 3rdParty/rocksdb/v5.16.X/utilities/transactions/optimistic_transaction_db_impl.h rename to 3rdParty/rocksdb/v5.18.X/utilities/transactions/optimistic_transaction_db_impl.h diff --git a/3rdParty/rocksdb/v5.16.X/utilities/transactions/optimistic_transaction_test.cc b/3rdParty/rocksdb/v5.18.X/utilities/transactions/optimistic_transaction_test.cc similarity index 100% rename from 3rdParty/rocksdb/v5.16.X/utilities/transactions/optimistic_transaction_test.cc rename to 3rdParty/rocksdb/v5.18.X/utilities/transactions/optimistic_transaction_test.cc diff --git a/3rdParty/rocksdb/v5.16.X/utilities/transactions/pessimistic_transaction.cc b/3rdParty/rocksdb/v5.18.X/utilities/transactions/pessimistic_transaction.cc similarity index 94% rename from 3rdParty/rocksdb/v5.16.X/utilities/transactions/pessimistic_transaction.cc rename to 3rdParty/rocksdb/v5.18.X/utilities/transactions/pessimistic_transaction.cc index 65487ea505..d895d9d935 100644 --- a/3rdParty/rocksdb/v5.16.X/utilities/transactions/pessimistic_transaction.cc +++ b/3rdParty/rocksdb/v5.18.X/utilities/transactions/pessimistic_transaction.cc @@ -46,7 +46,8 @@ PessimisticTransaction::PessimisticTransaction( waiting_key_(nullptr), lock_timeout_(0), deadlock_detect_(false), - deadlock_detect_depth_(0) { + deadlock_detect_depth_(0), + skip_concurrency_control_(false) { txn_db_impl_ = static_cast_with_check(txn_db); db_impl_ = static_cast_with_check(db_); @@ -61,6 +62,7 @@ void PessimisticTransaction::Initialize(const TransactionOptions& txn_options) { deadlock_detect_ = txn_options.deadlock_detect; deadlock_detect_depth_ = txn_options.deadlock_detect_depth; write_batch_.SetMaxBytes(txn_options.max_write_batch_size); + skip_concurrency_control_ = txn_options.skip_concurrency_control; lock_timeout_ = txn_options.lock_timeout * 1000; if (lock_timeout_ < 0) { @@ -125,8 +127,7 @@ bool PessimisticTransaction::IsExpired() const { WriteCommittedTxn::WriteCommittedTxn(TransactionDB* txn_db, const WriteOptions& write_options, const TransactionOptions& txn_options) - : PessimisticTransaction(txn_db, write_options, txn_options), - _commited_seq_nr(0) {}; + : PessimisticTransaction(txn_db, write_options, txn_options){}; Status PessimisticTransaction::CommitBatch(WriteBatch* batch) { TransactionKeyMap keys_to_unlock; @@ -229,15 +230,10 @@ Status WriteCommittedTxn::PrepareInternal() { WriteOptions write_options = write_options_; write_options.disableWAL = false; WriteBatchInternal::MarkEndPrepare(GetWriteBatch()->GetWriteBatch(), name_); - uint64_t seq_used = kMaxSequenceNumber; Status s = db_impl_->WriteImpl(write_options, GetWriteBatch()->GetWriteBatch(), - /*callback*/ nullptr, &log_number_, /*log ref*/ 0, - /*disable_memtable*/ true, &seq_used); - assert(!s.ok() || seq_used != kMaxSequenceNumber); - if (s.ok()) { - _commited_seq_nr = seq_used; - } + /*callback*/ nullptr, &log_number_, /*log_ref*/ 0, + /* disable_memtable*/ true); return s; } @@ -327,18 +323,26 @@ Status PessimisticTransaction::Commit() { Status WriteCommittedTxn::CommitWithoutPrepareInternal() { uint64_t seq_used = kMaxSequenceNumber; - auto s = db_impl_->WriteImpl(write_options_, GetWriteBatch()->GetWriteBatch(), - /*callback*/ nullptr, /*log nr*/ nullptr, - /*log ref*/ 0, /*disable_memtable*/ false, &seq_used); + auto s = + db_impl_->WriteImpl(write_options_, GetWriteBatch()->GetWriteBatch(), + /*callback*/ nullptr, /*log_used*/ nullptr, + /*log_ref*/ 0, /*disable_memtable*/ false, &seq_used); assert(!s.ok() || seq_used != kMaxSequenceNumber); if (s.ok()) { - _commited_seq_nr = seq_used; + SetId(seq_used); } return s; } Status WriteCommittedTxn::CommitBatchInternal(WriteBatch* batch, size_t) { - Status s = db_->Write(write_options_, batch); + uint64_t seq_used = kMaxSequenceNumber; + auto s = db_impl_->WriteImpl(write_options_, batch, /*callback*/ nullptr, + /*log_used*/ nullptr, /*log_ref*/ 0, + /*disable_memtable*/ false, &seq_used); + assert(!s.ok() || seq_used != kMaxSequenceNumber); + if (s.ok()) { + SetId(seq_used); + } return s; } @@ -356,8 +360,15 @@ Status WriteCommittedTxn::CommitInternal() { // in non recovery mode and simply insert the values WriteBatchInternal::Append(working_batch, GetWriteBatch()->GetWriteBatch()); - auto s = db_impl_->WriteImpl(write_options_, working_batch, nullptr, nullptr, - log_number_); + uint64_t seq_used = kMaxSequenceNumber; + auto s = + db_impl_->WriteImpl(write_options_, working_batch, /*callback*/ nullptr, + /*log_used*/ nullptr, /*log_ref*/ log_number_, + /*disable_memtable*/ false, &seq_used); + assert(!s.ok() || seq_used != kMaxSequenceNumber); + if (s.ok()) { + SetId(seq_used); + } return s; } @@ -505,11 +516,14 @@ Status PessimisticTransaction::LockBatch(WriteBatch* batch, Status PessimisticTransaction::TryLock(ColumnFamilyHandle* column_family, const Slice& key, bool read_only, bool exclusive, bool skip_validate) { + Status s; + if (UNLIKELY(skip_concurrency_control_)) { + return s; + } uint32_t cfh_id = GetColumnFamilyID(column_family); std::string key_str = key.ToString(); bool previously_locked; bool lock_upgrade = false; - Status s; // lock this key if this transactions hasn't already locked it SequenceNumber tracked_at_seq = kMaxSequenceNumber; diff --git a/3rdParty/rocksdb/v5.16.X/utilities/transactions/pessimistic_transaction.h b/3rdParty/rocksdb/v5.18.X/utilities/transactions/pessimistic_transaction.h similarity index 97% rename from 3rdParty/rocksdb/v5.16.X/utilities/transactions/pessimistic_transaction.h rename to 3rdParty/rocksdb/v5.18.X/utilities/transactions/pessimistic_transaction.h index 899441d62f..145d561fb1 100644 --- a/3rdParty/rocksdb/v5.16.X/utilities/transactions/pessimistic_transaction.h +++ b/3rdParty/rocksdb/v5.18.X/utilities/transactions/pessimistic_transaction.h @@ -183,6 +183,9 @@ class PessimisticTransaction : public TransactionBaseImpl { // Whether to perform deadlock detection or not. int64_t deadlock_detect_depth_; + // Refer to TransactionOptions::skip_concurrency_control + bool skip_concurrency_control_; + virtual Status ValidateSnapshot(ColumnFamilyHandle* column_family, const Slice& key, SequenceNumber* tracked_at_seq); @@ -201,11 +204,6 @@ class WriteCommittedTxn : public PessimisticTransaction { const TransactionOptions& txn_options); virtual ~WriteCommittedTxn() {} - - SequenceNumber GetCommitedSeqNumber() const override { - assert(txn_state_ == COMMITED); - return _commited_seq_nr; - } private: Status PrepareInternal() override; @@ -221,10 +219,6 @@ class WriteCommittedTxn : public PessimisticTransaction { // No copying allowed WriteCommittedTxn(const WriteCommittedTxn&); void operator=(const WriteCommittedTxn&); - - protected: - // seq_nr of WriteBatch in WAL - SequenceNumber _commited_seq_nr; }; } // namespace rocksdb diff --git a/3rdParty/rocksdb/v5.16.X/utilities/transactions/pessimistic_transaction_db.cc b/3rdParty/rocksdb/v5.18.X/utilities/transactions/pessimistic_transaction_db.cc similarity index 96% rename from 3rdParty/rocksdb/v5.16.X/utilities/transactions/pessimistic_transaction_db.cc rename to 3rdParty/rocksdb/v5.18.X/utilities/transactions/pessimistic_transaction_db.cc index 0f1d9b41e0..8eb21777a9 100644 --- a/3rdParty/rocksdb/v5.16.X/utilities/transactions/pessimistic_transaction_db.cc +++ b/3rdParty/rocksdb/v5.18.X/utilities/transactions/pessimistic_transaction_db.cc @@ -133,12 +133,22 @@ Status PessimisticTransactionDB::Initialize( WriteOptions w_options; w_options.sync = true; TransactionOptions t_options; + // This would help avoiding deadlock for keys that although exist in the WAL + // did not go through concurrency control. This includes the merge that + // MyRocks uses for auto-inc columns. It is safe to do so, since (i) if + // there is a conflict between the keys of two transactions that must be + // avoided, it is already avoided by the application, MyRocks, before the + // restart (ii) application, MyRocks, guarntees to rollback/commit the + // recovered transactions before new transactions start. + t_options.skip_concurrency_control = true; Transaction* real_trx = BeginTransaction(w_options, t_options, nullptr); assert(real_trx); real_trx->SetLogNumber(batch_info.log_number_); assert(seq != kMaxSequenceNumber); - real_trx->SetId(seq); + if (GetTxnDBOptions().write_policy != WRITE_COMMITTED) { + real_trx->SetId(seq); + } s = real_trx->SetName(recovered_trx->name_); if (!s.ok()) { diff --git a/3rdParty/rocksdb/v5.16.X/utilities/transactions/pessimistic_transaction_db.h b/3rdParty/rocksdb/v5.18.X/utilities/transactions/pessimistic_transaction_db.h similarity index 100% rename from 3rdParty/rocksdb/v5.16.X/utilities/transactions/pessimistic_transaction_db.h rename to 3rdParty/rocksdb/v5.18.X/utilities/transactions/pessimistic_transaction_db.h diff --git a/3rdParty/rocksdb/v5.16.X/utilities/transactions/snapshot_checker.cc b/3rdParty/rocksdb/v5.18.X/utilities/transactions/snapshot_checker.cc similarity index 100% rename from 3rdParty/rocksdb/v5.16.X/utilities/transactions/snapshot_checker.cc rename to 3rdParty/rocksdb/v5.18.X/utilities/transactions/snapshot_checker.cc diff --git a/3rdParty/rocksdb/v5.16.X/utilities/transactions/transaction_base.cc b/3rdParty/rocksdb/v5.18.X/utilities/transactions/transaction_base.cc similarity index 100% rename from 3rdParty/rocksdb/v5.16.X/utilities/transactions/transaction_base.cc rename to 3rdParty/rocksdb/v5.18.X/utilities/transactions/transaction_base.cc diff --git a/3rdParty/rocksdb/v5.16.X/utilities/transactions/transaction_base.h b/3rdParty/rocksdb/v5.18.X/utilities/transactions/transaction_base.h similarity index 100% rename from 3rdParty/rocksdb/v5.16.X/utilities/transactions/transaction_base.h rename to 3rdParty/rocksdb/v5.18.X/utilities/transactions/transaction_base.h diff --git a/3rdParty/rocksdb/v5.16.X/utilities/transactions/transaction_db_mutex_impl.cc b/3rdParty/rocksdb/v5.18.X/utilities/transactions/transaction_db_mutex_impl.cc similarity index 100% rename from 3rdParty/rocksdb/v5.16.X/utilities/transactions/transaction_db_mutex_impl.cc rename to 3rdParty/rocksdb/v5.18.X/utilities/transactions/transaction_db_mutex_impl.cc diff --git a/3rdParty/rocksdb/v5.16.X/utilities/transactions/transaction_db_mutex_impl.h b/3rdParty/rocksdb/v5.18.X/utilities/transactions/transaction_db_mutex_impl.h similarity index 100% rename from 3rdParty/rocksdb/v5.16.X/utilities/transactions/transaction_db_mutex_impl.h rename to 3rdParty/rocksdb/v5.18.X/utilities/transactions/transaction_db_mutex_impl.h diff --git a/3rdParty/rocksdb/v5.16.X/utilities/transactions/transaction_lock_mgr.cc b/3rdParty/rocksdb/v5.18.X/utilities/transactions/transaction_lock_mgr.cc similarity index 98% rename from 3rdParty/rocksdb/v5.16.X/utilities/transactions/transaction_lock_mgr.cc rename to 3rdParty/rocksdb/v5.18.X/utilities/transactions/transaction_lock_mgr.cc index 19321de485..8086f7c7c0 100644 --- a/3rdParty/rocksdb/v5.16.X/utilities/transactions/transaction_lock_mgr.cc +++ b/3rdParty/rocksdb/v5.18.X/utilities/transactions/transaction_lock_mgr.cc @@ -104,7 +104,7 @@ void DeadlockInfoBuffer::AddNewPath(DeadlockPath path) { return; } - paths_buffer_[buffer_idx_] = path; + paths_buffer_[buffer_idx_] = std::move(path); buffer_idx_ = (buffer_idx_ + 1) % paths_buffer_.size(); } @@ -222,9 +222,9 @@ void TransactionLockMgr::RemoveColumnFamily(uint32_t column_family_id) { } } -// Look up the LockMap shared_ptr for a given column_family_id. +// Look up the LockMap std::shared_ptr for a given column_family_id. // Note: The LockMap is only valid as long as the caller is still holding on -// to the returned shared_ptr. +// to the returned std::shared_ptr. std::shared_ptr TransactionLockMgr::GetLockMap( uint32_t column_family_id) { // First check thread-local cache @@ -446,8 +446,8 @@ bool TransactionLockMgr::IncrementWaiters( const autovector& wait_ids, const std::string& key, const uint32_t& cf_id, const bool& exclusive, Env* const env) { auto id = txn->GetID(); - std::vector queue_parents(txn->GetDeadlockDetectDepth()); - std::vector queue_values(txn->GetDeadlockDetectDepth()); + std::vector queue_parents(static_cast(txn->GetDeadlockDetectDepth())); + std::vector queue_values(static_cast(txn->GetDeadlockDetectDepth())); std::lock_guard lock(wait_txn_map_mutex_); assert(!wait_txn_map_.Contains(id)); @@ -494,8 +494,8 @@ bool TransactionLockMgr::IncrementWaiters( auto extracted_info = wait_txn_map_.Get(queue_values[head]); path.push_back({queue_values[head], extracted_info.m_cf_id, - extracted_info.m_waiting_key, - extracted_info.m_exclusive}); + extracted_info.m_exclusive, + extracted_info.m_waiting_key}); head = queue_parents[head]; } env->GetCurrentTime(&deadlock_time); diff --git a/3rdParty/rocksdb/v5.16.X/utilities/transactions/transaction_lock_mgr.h b/3rdParty/rocksdb/v5.18.X/utilities/transactions/transaction_lock_mgr.h similarity index 100% rename from 3rdParty/rocksdb/v5.16.X/utilities/transactions/transaction_lock_mgr.h rename to 3rdParty/rocksdb/v5.18.X/utilities/transactions/transaction_lock_mgr.h diff --git a/3rdParty/rocksdb/v5.16.X/utilities/transactions/transaction_test.cc b/3rdParty/rocksdb/v5.18.X/utilities/transactions/transaction_test.cc similarity index 99% rename from 3rdParty/rocksdb/v5.16.X/utilities/transactions/transaction_test.cc rename to 3rdParty/rocksdb/v5.18.X/utilities/transactions/transaction_test.cc index 44c6e35964..0968b9a349 100644 --- a/3rdParty/rocksdb/v5.16.X/utilities/transactions/transaction_test.cc +++ b/3rdParty/rocksdb/v5.18.X/utilities/transactions/transaction_test.cc @@ -606,6 +606,7 @@ TEST_P(TransactionTest, DeadlockCycleShared) { } } +#ifndef ROCKSDB_VALGRIND_RUN TEST_P(TransactionStressTest, DeadlockCycle) { WriteOptions write_options; ReadOptions read_options; @@ -768,6 +769,7 @@ TEST_P(TransactionStressTest, DeadlockStress) { t.join(); } } +#endif // ROCKSDB_VALGRIND_RUN TEST_P(TransactionTest, CommitTimeBatchFailTest) { WriteOptions write_options; @@ -796,6 +798,7 @@ TEST_P(TransactionTest, LogMarkLeakTest) { WriteOptions write_options; options.write_buffer_size = 1024; ASSERT_OK(ReOpenNoDelete()); + assert(db != nullptr); Random rnd(47); std::vector txns; DBImpl* db_impl = reinterpret_cast(db->GetRootDB()); @@ -1096,6 +1099,7 @@ TEST_P(TransactionTest, TwoPhaseEmptyWriteTest) { } } +#ifndef ROCKSDB_VALGRIND_RUN TEST_P(TransactionStressTest, TwoPhaseExpirationTest) { Status s; @@ -1254,6 +1258,7 @@ TEST_P(TransactionTest, PersistentTwoPhaseTransactionTest) { reinterpret_cast(db)->TEST_Crash(); s = ReOpenNoDelete(); ASSERT_OK(s); + assert(db != nullptr); db_impl = reinterpret_cast(db->GetRootDB()); // find trans in list of prepared transactions @@ -1332,6 +1337,7 @@ TEST_P(TransactionTest, PersistentTwoPhaseTransactionTest) { // deleting transaction should unregister transaction ASSERT_EQ(db->GetTransactionByName("xid"), nullptr); } +#endif // ROCKSDB_VALGRIND_RUN // TODO this test needs to be updated with serial commits TEST_P(TransactionTest, DISABLED_TwoPhaseMultiThreadTest) { @@ -1717,7 +1723,7 @@ TEST_P(TransactionTest, TwoPhaseLogRollingTest) { } // flush only cfa memtable - s = db_impl->TEST_FlushMemTable(true, cfa); + s = db_impl->TEST_FlushMemTable(true, false, cfa); ASSERT_OK(s); switch (txn_db_options.write_policy) { @@ -1736,7 +1742,7 @@ TEST_P(TransactionTest, TwoPhaseLogRollingTest) { } // flush only cfb memtable - s = db_impl->TEST_FlushMemTable(true, cfb); + s = db_impl->TEST_FlushMemTable(true, false, cfb); ASSERT_OK(s); // should show not dependency on logs @@ -3786,7 +3792,7 @@ TEST_P(TransactionTest, SavepointTest3) { s = txn1->Put("A", ""); ASSERT_OK(s); - + s = txn1->PopSavePoint(); // Still no SavePoint present ASSERT_TRUE(s.IsNotFound()); @@ -3796,21 +3802,21 @@ TEST_P(TransactionTest, SavepointTest3) { ASSERT_OK(s); s = txn1->PopSavePoint(); // Remove 1 - ASSERT_TRUE(txn1->RollbackToSavePoint().IsNotFound()); + ASSERT_TRUE(txn1->RollbackToSavePoint().IsNotFound()); - // Verify that "A" is still locked + // Verify that "A" is still locked Transaction* txn2 = db->BeginTransaction(write_options, txn_options); ASSERT_TRUE(txn2); s = txn2->Put("A", "a2"); ASSERT_TRUE(s.IsTimedOut()); delete txn2; - + txn1->SetSavePoint(); // 2 s = txn1->Put("B", "b"); ASSERT_OK(s); - + txn1->SetSavePoint(); // 3 s = txn1->Put("B", "b2"); @@ -3820,7 +3826,7 @@ TEST_P(TransactionTest, SavepointTest3) { s = txn1->PopSavePoint(); ASSERT_OK(s); - + s = txn1->PopSavePoint(); ASSERT_TRUE(s.IsNotFound()); @@ -3834,12 +3840,12 @@ TEST_P(TransactionTest, SavepointTest3) { s = db->Get(read_options, "A", &value); ASSERT_OK(s); ASSERT_EQ("a", value); - + // tnx1 should have set "B" to just "b" s = db->Get(read_options, "B", &value); ASSERT_OK(s); ASSERT_EQ("b", value); - + s = db->Get(read_options, "C", &value); ASSERT_TRUE(s.IsNotFound()); } @@ -5511,7 +5517,7 @@ TEST_P(TransactionTest, DuplicateKeys) { db->FlushWAL(true); // Flush only cf 1 reinterpret_cast(db->GetRootDB()) - ->TEST_FlushMemTable(true, handles[1]); + ->TEST_FlushMemTable(true, false, handles[1]); reinterpret_cast(db)->TEST_Crash(); ASSERT_OK(ReOpenNoDelete(cfds, &handles)); txn0 = db->GetTransactionByName("xid"); @@ -5549,7 +5555,7 @@ TEST_P(TransactionTest, DuplicateKeys) { ASSERT_OK(db->FlushWAL(true)); // Flush only cf 1 reinterpret_cast(db->GetRootDB()) - ->TEST_FlushMemTable(true, handles[1]); + ->TEST_FlushMemTable(true, false, handles[1]); reinterpret_cast(db)->TEST_Crash(); ASSERT_OK(ReOpenNoDelete(cfds, &handles)); txn0 = db->GetTransactionByName("xid"); @@ -5582,7 +5588,7 @@ TEST_P(TransactionTest, DuplicateKeys) { ASSERT_OK(db->FlushWAL(true)); // Flush only cf 1 reinterpret_cast(db->GetRootDB()) - ->TEST_FlushMemTable(true, handles[1]); + ->TEST_FlushMemTable(true, false, handles[1]); reinterpret_cast(db)->TEST_Crash(); ASSERT_OK(ReOpenNoDelete(cfds, &handles)); txn0 = db->GetTransactionByName("xid"); @@ -5609,7 +5615,7 @@ TEST_P(TransactionTest, DuplicateKeys) { ASSERT_OK(db->FlushWAL(true)); // Flush only cf 1 reinterpret_cast(db->GetRootDB()) - ->TEST_FlushMemTable(true, handles[1]); + ->TEST_FlushMemTable(true, false, handles[1]); reinterpret_cast(db)->TEST_Crash(); ASSERT_OK(ReOpenNoDelete(cfds, &handles)); txn0 = db->GetTransactionByName("xid"); @@ -5636,7 +5642,7 @@ TEST_P(TransactionTest, DuplicateKeys) { ASSERT_OK(db->FlushWAL(true)); // Flush only cf 1 reinterpret_cast(db->GetRootDB()) - ->TEST_FlushMemTable(true, handles[1]); + ->TEST_FlushMemTable(true, false, handles[1]); reinterpret_cast(db)->TEST_Crash(); ASSERT_OK(ReOpenNoDelete(cfds, &handles)); txn0 = db->GetTransactionByName("xid"); diff --git a/3rdParty/rocksdb/v5.16.X/utilities/transactions/transaction_test.h b/3rdParty/rocksdb/v5.18.X/utilities/transactions/transaction_test.h similarity index 97% rename from 3rdParty/rocksdb/v5.16.X/utilities/transactions/transaction_test.h rename to 3rdParty/rocksdb/v5.18.X/utilities/transactions/transaction_test.h index d024cecb21..cdc014acb2 100644 --- a/3rdParty/rocksdb/v5.16.X/utilities/transactions/transaction_test.h +++ b/3rdParty/rocksdb/v5.18.X/utilities/transactions/transaction_test.h @@ -100,6 +100,7 @@ class TransactionTestBase : public ::testing::Test { } else { s = OpenWithStackableDB(); } + assert(!s.ok() || db != nullptr); return s; } @@ -121,6 +122,7 @@ class TransactionTestBase : public ::testing::Test { } else { s = OpenWithStackableDB(cfs, handles); } + assert(db != nullptr); return s; } @@ -134,6 +136,7 @@ class TransactionTestBase : public ::testing::Test { } else { s = OpenWithStackableDB(); } + assert(db != nullptr); return s; } @@ -184,15 +187,17 @@ class TransactionTestBase : public ::testing::Test { txn_db_options.write_policy == WRITE_PREPARED; Status s = DBImpl::Open(options_copy, dbname, column_families, &handles, &root_db, use_seq_per_batch, use_batch_per_txn); - StackableDB* stackable_db = new StackableDB(root_db); - if (s.ok()) { - assert(root_db != nullptr); - assert(handles.size() == 1); - s = TransactionDB::WrapStackableDB(stackable_db, txn_db_options, - compaction_enabled_cf_indices, handles, - &db); - delete handles[0]; + if (!s.ok()) { + delete root_db; + return s; } + StackableDB* stackable_db = new StackableDB(root_db); + assert(root_db != nullptr); + assert(handles.size() == 1); + s = TransactionDB::WrapStackableDB(stackable_db, txn_db_options, + compaction_enabled_cf_indices, handles, + &db); + delete handles[0]; if (!s.ok()) { delete stackable_db; // just in case it was not deleted (and not set to nullptr). diff --git a/3rdParty/rocksdb/v5.16.X/utilities/transactions/transaction_util.cc b/3rdParty/rocksdb/v5.18.X/utilities/transactions/transaction_util.cc similarity index 100% rename from 3rdParty/rocksdb/v5.16.X/utilities/transactions/transaction_util.cc rename to 3rdParty/rocksdb/v5.18.X/utilities/transactions/transaction_util.cc diff --git a/3rdParty/rocksdb/v5.16.X/utilities/transactions/transaction_util.h b/3rdParty/rocksdb/v5.18.X/utilities/transactions/transaction_util.h similarity index 100% rename from 3rdParty/rocksdb/v5.16.X/utilities/transactions/transaction_util.h rename to 3rdParty/rocksdb/v5.18.X/utilities/transactions/transaction_util.h diff --git a/3rdParty/rocksdb/v5.16.X/utilities/transactions/write_prepared_transaction_test.cc b/3rdParty/rocksdb/v5.18.X/utilities/transactions/write_prepared_transaction_test.cc similarity index 94% rename from 3rdParty/rocksdb/v5.16.X/utilities/transactions/write_prepared_transaction_test.cc rename to 3rdParty/rocksdb/v5.18.X/utilities/transactions/write_prepared_transaction_test.cc index 391d58defd..1d645d237f 100644 --- a/3rdParty/rocksdb/v5.16.X/utilities/transactions/write_prepared_transaction_test.cc +++ b/3rdParty/rocksdb/v5.18.X/utilities/transactions/write_prepared_transaction_test.cc @@ -731,6 +731,71 @@ TEST_P(WritePreparedTransactionTest, MaybeUpdateOldCommitMap) { MaybeUpdateOldCommitMapTestWithNext(p, c, s, ns, false); } +// Reproduce the bug with two snapshots with the same seuqence number and test +// that the release of the first snapshot will not affect the reads by the other +// snapshot +TEST_P(WritePreparedTransactionTest, DoubleSnapshot) { + TransactionOptions txn_options; + Status s; + + // Insert initial value + ASSERT_OK(db->Put(WriteOptions(), "key", "value1")); + + WritePreparedTxnDB* wp_db = dynamic_cast(db); + Transaction* txn = + wp_db->BeginTransaction(WriteOptions(), txn_options, nullptr); + ASSERT_OK(txn->SetName("txn")); + ASSERT_OK(txn->Put("key", "value2")); + ASSERT_OK(txn->Prepare()); + // Three snapshots with the same seq number + const Snapshot* snapshot0 = wp_db->GetSnapshot(); + const Snapshot* snapshot1 = wp_db->GetSnapshot(); + const Snapshot* snapshot2 = wp_db->GetSnapshot(); + ASSERT_OK(txn->Commit()); + SequenceNumber cache_size = wp_db->COMMIT_CACHE_SIZE; + SequenceNumber overlap_seq = txn->GetId() + cache_size; + delete txn; + + // 4th snapshot with a larger seq + const Snapshot* snapshot3 = wp_db->GetSnapshot(); + // Cause an eviction to advance max evicted seq number + // This also fetches the 4 snapshots from db since their seq is lower than the + // new max + wp_db->AddCommitted(overlap_seq, overlap_seq); + + ReadOptions ropt; + // It should see the value before commit + ropt.snapshot = snapshot2; + PinnableSlice pinnable_val; + s = wp_db->Get(ropt, wp_db->DefaultColumnFamily(), "key", &pinnable_val); + ASSERT_OK(s); + ASSERT_TRUE(pinnable_val == "value1"); + pinnable_val.Reset(); + + wp_db->ReleaseSnapshot(snapshot1); + + // It should still see the value before commit + s = wp_db->Get(ropt, wp_db->DefaultColumnFamily(), "key", &pinnable_val); + ASSERT_OK(s); + ASSERT_TRUE(pinnable_val == "value1"); + pinnable_val.Reset(); + + // Cause an eviction to advance max evicted seq number and trigger updating + // the snapshot list + overlap_seq += cache_size; + wp_db->AddCommitted(overlap_seq, overlap_seq); + + // It should still see the value before commit + s = wp_db->Get(ropt, wp_db->DefaultColumnFamily(), "key", &pinnable_val); + ASSERT_OK(s); + ASSERT_TRUE(pinnable_val == "value1"); + pinnable_val.Reset(); + + wp_db->ReleaseSnapshot(snapshot0); + wp_db->ReleaseSnapshot(snapshot2); + wp_db->ReleaseSnapshot(snapshot3); +} + // Test that the entries in old_commit_map_ get garbage collected properly TEST_P(WritePreparedTransactionTest, OldCommitMapGC) { const size_t snapshot_cache_bits = 0; @@ -816,6 +881,7 @@ TEST_P(WritePreparedTransactionTest, CheckAgainstSnapshotsTest) { std::vector snapshots = {100l, 200l, 300l, 400l, 500l, 600l, 700l, 800l, 900l}; const size_t snapshot_cache_bits = 2; + const uint64_t cache_size = 1ul << snapshot_cache_bits; // Safety check to express the intended size in the test. Can be adjusted if // the snapshots lists changed. assert((1ul << snapshot_cache_bits) * 2 + 1 == snapshots.size()); @@ -843,6 +909,57 @@ TEST_P(WritePreparedTransactionTest, CheckAgainstSnapshotsTest) { commit_entry.prep_seq <= snapshots.back(); ASSERT_EQ(expect_update, !wp_db->old_commit_map_empty_); } + + // Test that search will include multiple snapshot from snapshot cache + { + // exclude first and last item in the cache + CommitEntry commit_entry = {snapshots.front() + 1, + snapshots[cache_size - 1] - 1}; + wp_db->old_commit_map_empty_ = true; // reset + wp_db->old_commit_map_.clear(); + wp_db->CheckAgainstSnapshots(commit_entry); + ASSERT_EQ(wp_db->old_commit_map_.size(), cache_size - 2); + } + + // Test that search will include multiple snapshot from old snapshots + { + // include two in the middle + CommitEntry commit_entry = {snapshots[cache_size] + 1, + snapshots[cache_size + 2] + 1}; + wp_db->old_commit_map_empty_ = true; // reset + wp_db->old_commit_map_.clear(); + wp_db->CheckAgainstSnapshots(commit_entry); + ASSERT_EQ(wp_db->old_commit_map_.size(), 2); + } + + // Test that search will include both snapshot cache and old snapshots + // Case 1: includes all in snapshot cache + { + CommitEntry commit_entry = {snapshots.front() - 1, snapshots.back() + 1}; + wp_db->old_commit_map_empty_ = true; // reset + wp_db->old_commit_map_.clear(); + wp_db->CheckAgainstSnapshots(commit_entry); + ASSERT_EQ(wp_db->old_commit_map_.size(), snapshots.size()); + } + + // Case 2: includes all snapshot caches except the smallest + { + CommitEntry commit_entry = {snapshots.front() + 1, snapshots.back() + 1}; + wp_db->old_commit_map_empty_ = true; // reset + wp_db->old_commit_map_.clear(); + wp_db->CheckAgainstSnapshots(commit_entry); + ASSERT_EQ(wp_db->old_commit_map_.size(), snapshots.size() - 1); + } + + // Case 3: includes only the largest of snapshot cache + { + CommitEntry commit_entry = {snapshots[cache_size - 1] - 1, + snapshots.back() + 1}; + wp_db->old_commit_map_empty_ = true; // reset + wp_db->old_commit_map_.clear(); + wp_db->CheckAgainstSnapshots(commit_entry); + ASSERT_EQ(wp_db->old_commit_map_.size(), snapshots.size() - cache_size + 1); + } } // This test is too slow for travis @@ -1015,6 +1132,7 @@ TEST_P(WritePreparedTransactionTest, AdvanceMaxEvictedSeqWithDuplicatesTest) { wp_db->db_impl_->FlushWAL(true); wp_db->TEST_Crash(); ReOpenNoDelete(); + assert(db != nullptr); wp_db = dynamic_cast(db); wp_db->AdvanceMaxEvictedSeq(0, new_max); s = db->Get(ropt, db->DefaultColumnFamily(), "key", &pinnable_val); @@ -1146,6 +1264,7 @@ TEST_P(SeqAdvanceConcurrentTest, SeqAdvanceConcurrentTest) { // Check if recovery preserves the last sequence number db_impl->FlushWAL(true); ReOpenNoDelete(); + assert(db != nullptr); db_impl = reinterpret_cast(db->GetRootDB()); seq = db_impl->TEST_GetLastVisibleSequence(); ASSERT_EQ(exp_seq, seq); @@ -1158,6 +1277,7 @@ TEST_P(SeqAdvanceConcurrentTest, SeqAdvanceConcurrentTest) { // Check if recovery after flush preserves the last sequence number db_impl->FlushWAL(true); ReOpenNoDelete(); + assert(db != nullptr); db_impl = reinterpret_cast(db->GetRootDB()); seq = db_impl->GetLatestSequenceNumber(); ASSERT_EQ(exp_seq, seq); @@ -1212,6 +1332,7 @@ TEST_P(WritePreparedTransactionTest, BasicRecoveryTest) { wp_db->db_impl_->FlushWAL(true); wp_db->TEST_Crash(); ReOpenNoDelete(); + assert(db != nullptr); wp_db = dynamic_cast(db); // After recovery, all the uncommitted txns (0 and 1) should be inserted into // delayed_prepared_ @@ -1256,6 +1377,7 @@ TEST_P(WritePreparedTransactionTest, BasicRecoveryTest) { wp_db->db_impl_->FlushWAL(true); wp_db->TEST_Crash(); ReOpenNoDelete(); + assert(db != nullptr); wp_db = dynamic_cast(db); ASSERT_TRUE(wp_db->prepared_txns_.empty()); ASSERT_FALSE(wp_db->delayed_prepared_empty_); @@ -1290,6 +1412,7 @@ TEST_P(WritePreparedTransactionTest, BasicRecoveryTest) { delete txn2; wp_db->db_impl_->FlushWAL(true); ReOpenNoDelete(); + assert(db != nullptr); wp_db = dynamic_cast(db); ASSERT_TRUE(wp_db->prepared_txns_.empty()); ASSERT_TRUE(wp_db->delayed_prepared_empty_); @@ -1301,55 +1424,6 @@ TEST_P(WritePreparedTransactionTest, BasicRecoveryTest) { pinnable_val.Reset(); } -// After recovery the new transactions should still conflict with recovered -// transactions. -TEST_P(WritePreparedTransactionTest, ConflictDetectionAfterRecoveryTest) { - options.disable_auto_compactions = true; - ReOpen(); - - TransactionOptions txn_options; - WriteOptions write_options; - size_t index = 0; - Transaction* txn0 = db->BeginTransaction(write_options, txn_options); - auto istr0 = std::to_string(index); - auto s = txn0->SetName("xid" + istr0); - ASSERT_OK(s); - s = txn0->Put(Slice("key" + istr0), Slice("bar0" + istr0)); - ASSERT_OK(s); - s = txn0->Prepare(); - - // With the same index 0 and key prefix, txn_t0 should conflict with txn0 - txn_t0_with_status(0, Status::TimedOut()); - delete txn0; - - auto db_impl = reinterpret_cast(db->GetRootDB()); - db_impl->FlushWAL(true); - dynamic_cast(db)->TEST_Crash(); - ReOpenNoDelete(); - - // It should still conflict after the recovery - txn_t0_with_status(0, Status::TimedOut()); - - db_impl = reinterpret_cast(db->GetRootDB()); - db_impl->FlushWAL(true); - ReOpenNoDelete(); - - // Check that a recovered txn will still cause conflicts after 2nd recovery - txn_t0_with_status(0, Status::TimedOut()); - - txn0 = db->GetTransactionByName("xid" + istr0); - ASSERT_NE(txn0, nullptr); - txn0->Commit(); - delete txn0; - - db_impl = reinterpret_cast(db->GetRootDB()); - db_impl->FlushWAL(true); - ReOpenNoDelete(); - - // tnx0 is now committed and should no longer cause a conflict - txn_t0_with_status(0, Status::OK()); -} - // After recovery the commit map is empty while the max is set. The code would // go through a different path which requires a separate test. TEST_P(WritePreparedTransactionTest, IsInSnapshotEmptyMapTest) { @@ -1590,6 +1664,7 @@ TEST_P(WritePreparedTransactionTest, RollbackTest) { db_impl->FlushWAL(true); dynamic_cast(db)->TEST_Crash(); ReOpenNoDelete(); + assert(db != nullptr); wp_db = dynamic_cast(db); txn = db->GetTransactionByName("xid0"); ASSERT_FALSE(wp_db->delayed_prepared_empty_); diff --git a/3rdParty/rocksdb/v5.16.X/utilities/transactions/write_prepared_txn.cc b/3rdParty/rocksdb/v5.18.X/utilities/transactions/write_prepared_txn.cc similarity index 100% rename from 3rdParty/rocksdb/v5.16.X/utilities/transactions/write_prepared_txn.cc rename to 3rdParty/rocksdb/v5.18.X/utilities/transactions/write_prepared_txn.cc diff --git a/3rdParty/rocksdb/v5.16.X/utilities/transactions/write_prepared_txn.h b/3rdParty/rocksdb/v5.18.X/utilities/transactions/write_prepared_txn.h similarity index 100% rename from 3rdParty/rocksdb/v5.16.X/utilities/transactions/write_prepared_txn.h rename to 3rdParty/rocksdb/v5.18.X/utilities/transactions/write_prepared_txn.h diff --git a/3rdParty/rocksdb/v5.16.X/utilities/transactions/write_prepared_txn_db.cc b/3rdParty/rocksdb/v5.18.X/utilities/transactions/write_prepared_txn_db.cc similarity index 91% rename from 3rdParty/rocksdb/v5.16.X/utilities/transactions/write_prepared_txn_db.cc rename to 3rdParty/rocksdb/v5.18.X/utilities/transactions/write_prepared_txn_db.cc index 34ed04aa67..ca728d5071 100644 --- a/3rdParty/rocksdb/v5.16.X/utilities/transactions/write_prepared_txn_db.cc +++ b/3rdParty/rocksdb/v5.18.X/utilities/transactions/write_prepared_txn_db.cc @@ -379,9 +379,9 @@ void WritePreparedTxnDB::Init(const TransactionDBOptions& /* unused */) { // around. INC_STEP_FOR_MAX_EVICTED = std::max(COMMIT_CACHE_SIZE / 100, static_cast(1)); - snapshot_cache_ = unique_ptr[]>( + snapshot_cache_ = std::unique_ptr[]>( new std::atomic[SNAPSHOT_CACHE_SIZE] {}); - commit_cache_ = unique_ptr[]>( + commit_cache_ = std::unique_ptr[]>( new std::atomic[COMMIT_CACHE_SIZE] {}); } @@ -460,7 +460,7 @@ void WritePreparedTxnDB::RemovePrepared(const uint64_t prepare_seq, bool WritePreparedTxnDB::GetCommitEntry(const uint64_t indexed_seq, CommitEntry64b* entry_64b, CommitEntry* entry) const { - *entry_64b = commit_cache_[indexed_seq].load(std::memory_order_acquire); + *entry_64b = commit_cache_[static_cast(indexed_seq)].load(std::memory_order_acquire); bool valid = entry_64b->Parse(indexed_seq, entry, FORMAT); return valid; } @@ -469,7 +469,7 @@ bool WritePreparedTxnDB::AddCommitEntry(const uint64_t indexed_seq, const CommitEntry& new_entry, CommitEntry* evicted_entry) { CommitEntry64b new_entry_64b(new_entry, FORMAT); - CommitEntry64b evicted_entry_64b = commit_cache_[indexed_seq].exchange( + CommitEntry64b evicted_entry_64b = commit_cache_[static_cast(indexed_seq)].exchange( new_entry_64b, std::memory_order_acq_rel); bool valid = evicted_entry_64b.Parse(indexed_seq, evicted_entry, FORMAT); return valid; @@ -478,7 +478,7 @@ bool WritePreparedTxnDB::AddCommitEntry(const uint64_t indexed_seq, bool WritePreparedTxnDB::ExchangeCommitEntry(const uint64_t indexed_seq, CommitEntry64b& expected_entry_64b, const CommitEntry& new_entry) { - auto& atomic_entry = commit_cache_[indexed_seq]; + auto& atomic_entry = commit_cache_[static_cast(indexed_seq)]; CommitEntry64b new_entry_64b(new_entry, FORMAT); bool succ = atomic_entry.compare_exchange_strong( expected_entry_64b, new_entry_64b, std::memory_order_acq_rel, @@ -554,12 +554,6 @@ const std::vector WritePreparedTxnDB::GetSnapshotListFromDB( return db_impl_->snapshots().GetAll(nullptr, max); } -void WritePreparedTxnDB::ReleaseSnapshot(const Snapshot* snapshot) { - auto snap_seq = snapshot->GetSequenceNumber(); - ReleaseSnapshotInternal(snap_seq); - db_impl_->ReleaseSnapshot(snapshot); -} - void WritePreparedTxnDB::ReleaseSnapshotInternal( const SequenceNumber snap_seq) { // relax is enough since max increases monotonically, i.e., if snap_seq < @@ -572,14 +566,16 @@ void WritePreparedTxnDB::ReleaseSnapshotInternal( bool need_gc = false; { WPRecordTick(TXN_OLD_COMMIT_MAP_MUTEX_OVERHEAD); - ROCKS_LOG_WARN(info_log_, "old_commit_map_mutex_ overhead"); + ROCKS_LOG_WARN(info_log_, "old_commit_map_mutex_ overhead for %" PRIu64, + snap_seq); ReadLock rl(&old_commit_map_mutex_); auto prep_set_entry = old_commit_map_.find(snap_seq); need_gc = prep_set_entry != old_commit_map_.end(); } if (need_gc) { WPRecordTick(TXN_OLD_COMMIT_MAP_MUTEX_OVERHEAD); - ROCKS_LOG_WARN(info_log_, "old_commit_map_mutex_ overhead"); + ROCKS_LOG_WARN(info_log_, "old_commit_map_mutex_ overhead for %" PRIu64, + snap_seq); WriteLock wl(&old_commit_map_mutex_); old_commit_map_.erase(snap_seq); old_commit_map_empty_.store(old_commit_map_.empty(), @@ -588,6 +584,33 @@ void WritePreparedTxnDB::ReleaseSnapshotInternal( } } +void WritePreparedTxnDB::CleanupReleasedSnapshots( + const std::vector& new_snapshots, + const std::vector& old_snapshots) { + auto newi = new_snapshots.begin(); + auto oldi = old_snapshots.begin(); + for (; newi != new_snapshots.end() && oldi != old_snapshots.end();) { + assert(*newi >= *oldi); // cannot have new snapshots with lower seq + if (*newi == *oldi) { // still not released + auto value = *newi; + while (newi != new_snapshots.end() && *newi == value) { + newi++; + } + while (oldi != old_snapshots.end() && *oldi == value) { + oldi++; + } + } else { + assert(*newi > *oldi); // *oldi is released + ReleaseSnapshotInternal(*oldi); + oldi++; + } + } + // Everything remained in old_snapshots is released and must be cleaned up + for (; oldi != old_snapshots.end(); oldi++) { + ReleaseSnapshotInternal(*oldi); + } +} + void WritePreparedTxnDB::UpdateSnapshots( const std::vector& snapshots, const SequenceNumber& version) { @@ -636,6 +659,12 @@ void WritePreparedTxnDB::UpdateSnapshots( // Update the size at the end. Otherwise a parallel reader might read // items that are not set yet. snapshots_total_.store(snapshots.size(), std::memory_order_release); + + // Note: this must be done after the snapshots data structures are updated + // with the new list of snapshots. + CleanupReleasedSnapshots(snapshots, snapshots_all_); + snapshots_all_ = snapshots; + TEST_SYNC_POINT("WritePreparedTxnDB::UpdateSnapshots:p:end"); TEST_SYNC_POINT("WritePreparedTxnDB::UpdateSnapshots:s:end"); } @@ -654,13 +683,20 @@ void WritePreparedTxnDB::CheckAgainstSnapshots(const CommitEntry& evicted) { // place before gets overwritten the reader that reads bottom-up will // eventully see it. const bool next_is_larger = true; - SequenceNumber snapshot_seq = kMaxSequenceNumber; + // We will set to true if the border line snapshot suggests that. + bool search_larger_list = false; size_t ip1 = std::min(cnt, SNAPSHOT_CACHE_SIZE); for (; 0 < ip1; ip1--) { - snapshot_seq = snapshot_cache_[ip1 - 1].load(std::memory_order_acquire); + SequenceNumber snapshot_seq = + snapshot_cache_[ip1 - 1].load(std::memory_order_acquire); TEST_IDX_SYNC_POINT("WritePreparedTxnDB::CheckAgainstSnapshots:p:", ++sync_i); TEST_IDX_SYNC_POINT("WritePreparedTxnDB::CheckAgainstSnapshots:s:", sync_i); + if (ip1 == SNAPSHOT_CACHE_SIZE) { // border line snapshot + // snapshot_seq < commit_seq => larger_snapshot_seq <= commit_seq + // then later also continue the search to larger snapshots + search_larger_list = snapshot_seq < evicted.commit_seq; + } if (!MaybeUpdateOldCommitMap(evicted.prep_seq, evicted.commit_seq, snapshot_seq, !next_is_larger)) { break; @@ -675,17 +711,20 @@ void WritePreparedTxnDB::CheckAgainstSnapshots(const CommitEntry& evicted) { #endif TEST_SYNC_POINT("WritePreparedTxnDB::CheckAgainstSnapshots:p:end"); TEST_SYNC_POINT("WritePreparedTxnDB::CheckAgainstSnapshots:s:end"); - if (UNLIKELY(SNAPSHOT_CACHE_SIZE < cnt && ip1 == SNAPSHOT_CACHE_SIZE && - snapshot_seq < evicted.prep_seq)) { + if (UNLIKELY(SNAPSHOT_CACHE_SIZE < cnt && search_larger_list)) { // Then access the less efficient list of snapshots_ WPRecordTick(TXN_SNAPSHOT_MUTEX_OVERHEAD); - ROCKS_LOG_WARN(info_log_, "snapshots_mutex_ overhead"); + ROCKS_LOG_WARN(info_log_, + "snapshots_mutex_ overhead for <%" PRIu64 ",%" PRIu64 + "> with %" ROCKSDB_PRIszt " snapshots", + evicted.prep_seq, evicted.commit_seq, cnt); ReadLock rl(&snapshots_mutex_); // Items could have moved from the snapshots_ to snapshot_cache_ before // accquiring the lock. To make sure that we do not miss a valid snapshot, // read snapshot_cache_ again while holding the lock. for (size_t i = 0; i < SNAPSHOT_CACHE_SIZE; i++) { - snapshot_seq = snapshot_cache_[i].load(std::memory_order_acquire); + SequenceNumber snapshot_seq = + snapshot_cache_[i].load(std::memory_order_acquire); if (!MaybeUpdateOldCommitMap(evicted.prep_seq, evicted.commit_seq, snapshot_seq, next_is_larger)) { break; @@ -713,7 +752,10 @@ bool WritePreparedTxnDB::MaybeUpdateOldCommitMap( // then snapshot_seq < commit_seq if (prep_seq <= snapshot_seq) { // overlapping range WPRecordTick(TXN_OLD_COMMIT_MAP_MUTEX_OVERHEAD); - ROCKS_LOG_WARN(info_log_, "old_commit_map_mutex_ overhead"); + ROCKS_LOG_WARN(info_log_, + "old_commit_map_mutex_ overhead for %" PRIu64 + " commit entry: <%" PRIu64 ",%" PRIu64 ">", + snapshot_seq, prep_seq, commit_seq); WriteLock wl(&old_commit_map_mutex_); old_commit_map_empty_.store(false, std::memory_order_release); auto& vec = old_commit_map_[snapshot_seq]; diff --git a/3rdParty/rocksdb/v5.16.X/utilities/transactions/write_prepared_txn_db.h b/3rdParty/rocksdb/v5.18.X/utilities/transactions/write_prepared_txn_db.h similarity index 97% rename from 3rdParty/rocksdb/v5.16.X/utilities/transactions/write_prepared_txn_db.h rename to 3rdParty/rocksdb/v5.18.X/utilities/transactions/write_prepared_txn_db.h index ec76e27163..e0263d4f7b 100644 --- a/3rdParty/rocksdb/v5.16.X/utilities/transactions/write_prepared_txn_db.h +++ b/3rdParty/rocksdb/v5.18.X/utilities/transactions/write_prepared_txn_db.h @@ -112,8 +112,6 @@ class WritePreparedTxnDB : public PessimisticTransactionDB { const std::vector& column_families, std::vector* iterators) override; - virtual void ReleaseSnapshot(const Snapshot* snapshot) override; - // Check whether the transaction that wrote the value with sequence number seq // is visible to the snapshot with sequence number snapshot_seq. // Returns true if commit_seq <= snapshot_seq @@ -222,7 +220,6 @@ class WritePreparedTxnDB : public PessimisticTransactionDB { // rare case and it is ok to pay the cost of mutex ReadLock for such old, // reading transactions. WPRecordTick(TXN_OLD_COMMIT_MAP_MUTEX_OVERHEAD); - ROCKS_LOG_WARN(info_log_, "old_commit_map_mutex_ overhead"); ReadLock rl(&old_commit_map_mutex_); auto prep_set_entry = old_commit_map_.find(snapshot_seq); bool found = prep_set_entry != old_commit_map_.end(); @@ -380,6 +377,7 @@ class WritePreparedTxnDB : public PessimisticTransactionDB { friend class WritePreparedTransactionTest_AdvanceMaxEvictedSeqWithDuplicatesTest_Test; friend class WritePreparedTransactionTest_BasicRecoveryTest_Test; + friend class WritePreparedTransactionTest_DoubleSnapshot_Test; friend class WritePreparedTransactionTest_IsInSnapshotEmptyMapTest_Test; friend class WritePreparedTransactionTest_OldCommitMapGC_Test; friend class WritePreparedTransactionTest_RollbackTest_Test; @@ -519,6 +517,11 @@ class WritePreparedTxnDB : public PessimisticTransactionDB { // version value. void UpdateSnapshots(const std::vector& snapshots, const SequenceNumber& version); + // Check the new list of new snapshots against the old one to see if any of + // the snapshots are released and to do the cleanup for the released snapshot. + void CleanupReleasedSnapshots( + const std::vector& new_snapshots, + const std::vector& old_snapshots); // Check an evicted entry against live snapshots to see if it should be kept // around or it can be safely discarded (and hence assume committed for all @@ -549,10 +552,14 @@ class WritePreparedTxnDB : public PessimisticTransactionDB { static const size_t DEF_SNAPSHOT_CACHE_BITS = static_cast(7); const size_t SNAPSHOT_CACHE_BITS; const size_t SNAPSHOT_CACHE_SIZE; - unique_ptr[]> snapshot_cache_; + std::unique_ptr[]> snapshot_cache_; // 2nd list for storing snapshots. The list sorted in ascending order. // Thread-safety is provided with snapshots_mutex_. std::vector snapshots_; + // The list of all snapshots: snapshots_ + snapshot_cache_. This list although + // redundant but simplifies CleanupOldSnapshots implementation. + // Thread-safety is provided with snapshots_mutex_. + std::vector snapshots_all_; // The version of the latest list of snapshots. This can be used to avoid // rewriting a list that is concurrently updated with a more recent version. SequenceNumber snapshots_version_ = 0; @@ -567,7 +574,7 @@ class WritePreparedTxnDB : public PessimisticTransactionDB { const CommitEntry64bFormat FORMAT; // commit_cache_ must be initialized to zero to tell apart an empty index from // a filled one. Thread-safety is provided with commit_cache_mutex_. - unique_ptr[]> commit_cache_; + std::unique_ptr[]> commit_cache_; // The largest evicted *commit* sequence number from the commit_cache_. If a // seq is smaller than max_evicted_seq_ is might or might not be present in // commit_cache_. So commit_cache_ must first be checked before consulting diff --git a/3rdParty/rocksdb/v5.16.X/utilities/transactions/write_unprepared_transaction_test.cc b/3rdParty/rocksdb/v5.18.X/utilities/transactions/write_unprepared_transaction_test.cc similarity index 99% rename from 3rdParty/rocksdb/v5.16.X/utilities/transactions/write_unprepared_transaction_test.cc rename to 3rdParty/rocksdb/v5.18.X/utilities/transactions/write_unprepared_transaction_test.cc index 8ec7f6bfba..009991bb7c 100644 --- a/3rdParty/rocksdb/v5.16.X/utilities/transactions/write_unprepared_transaction_test.cc +++ b/3rdParty/rocksdb/v5.18.X/utilities/transactions/write_unprepared_transaction_test.cc @@ -250,7 +250,7 @@ TEST_P(WriteUnpreparedTransactionTest, RecoveryTest) { wup_db->db_impl_->FlushWAL(true); wup_db->TEST_Crash(); ReOpenNoDelete(); - wup_db = dynamic_cast(db); + assert(db != nullptr); db->GetAllPreparedTransactions(&prepared_trans); ASSERT_EQ(prepared_trans.size(), a == UNPREPARED ? 0 : 1); diff --git a/3rdParty/rocksdb/v5.16.X/utilities/transactions/write_unprepared_txn.cc b/3rdParty/rocksdb/v5.18.X/utilities/transactions/write_unprepared_txn.cc similarity index 100% rename from 3rdParty/rocksdb/v5.16.X/utilities/transactions/write_unprepared_txn.cc rename to 3rdParty/rocksdb/v5.18.X/utilities/transactions/write_unprepared_txn.cc diff --git a/3rdParty/rocksdb/v5.16.X/utilities/transactions/write_unprepared_txn.h b/3rdParty/rocksdb/v5.18.X/utilities/transactions/write_unprepared_txn.h similarity index 100% rename from 3rdParty/rocksdb/v5.16.X/utilities/transactions/write_unprepared_txn.h rename to 3rdParty/rocksdb/v5.18.X/utilities/transactions/write_unprepared_txn.h diff --git a/3rdParty/rocksdb/v5.16.X/utilities/transactions/write_unprepared_txn_db.cc b/3rdParty/rocksdb/v5.18.X/utilities/transactions/write_unprepared_txn_db.cc similarity index 100% rename from 3rdParty/rocksdb/v5.16.X/utilities/transactions/write_unprepared_txn_db.cc rename to 3rdParty/rocksdb/v5.18.X/utilities/transactions/write_unprepared_txn_db.cc diff --git a/3rdParty/rocksdb/v5.16.X/utilities/transactions/write_unprepared_txn_db.h b/3rdParty/rocksdb/v5.18.X/utilities/transactions/write_unprepared_txn_db.h similarity index 100% rename from 3rdParty/rocksdb/v5.16.X/utilities/transactions/write_unprepared_txn_db.h rename to 3rdParty/rocksdb/v5.18.X/utilities/transactions/write_unprepared_txn_db.h diff --git a/3rdParty/rocksdb/v5.16.X/utilities/ttl/db_ttl_impl.cc b/3rdParty/rocksdb/v5.18.X/utilities/ttl/db_ttl_impl.cc similarity index 100% rename from 3rdParty/rocksdb/v5.16.X/utilities/ttl/db_ttl_impl.cc rename to 3rdParty/rocksdb/v5.18.X/utilities/ttl/db_ttl_impl.cc diff --git a/3rdParty/rocksdb/v5.16.X/utilities/ttl/db_ttl_impl.h b/3rdParty/rocksdb/v5.18.X/utilities/ttl/db_ttl_impl.h similarity index 100% rename from 3rdParty/rocksdb/v5.16.X/utilities/ttl/db_ttl_impl.h rename to 3rdParty/rocksdb/v5.18.X/utilities/ttl/db_ttl_impl.h diff --git a/3rdParty/rocksdb/v5.16.X/utilities/ttl/ttl_test.cc b/3rdParty/rocksdb/v5.18.X/utilities/ttl/ttl_test.cc similarity index 99% rename from 3rdParty/rocksdb/v5.16.X/utilities/ttl/ttl_test.cc rename to 3rdParty/rocksdb/v5.18.X/utilities/ttl/ttl_test.cc index ee7b317aaf..f434d18570 100644 --- a/3rdParty/rocksdb/v5.16.X/utilities/ttl/ttl_test.cc +++ b/3rdParty/rocksdb/v5.18.X/utilities/ttl/ttl_test.cc @@ -370,14 +370,14 @@ class TtlTest : public testing::Test { static const int64_t kSampleSize_ = 100; std::string dbname_; DBWithTTL* db_ttl_; - unique_ptr env_; + std::unique_ptr env_; private: Options options_; KVMap kvmap_; KVMap::iterator kv_it_; const std::string kNewValue_ = "new_value"; - unique_ptr test_comp_filter_; + std::unique_ptr test_comp_filter_; }; // class TtlTest // If TTL is non positive or not provided, the behaviour is TTL = infinity diff --git a/3rdParty/rocksdb/v5.16.X/utilities/util_merge_operators_test.cc b/3rdParty/rocksdb/v5.18.X/utilities/util_merge_operators_test.cc similarity index 100% rename from 3rdParty/rocksdb/v5.16.X/utilities/util_merge_operators_test.cc rename to 3rdParty/rocksdb/v5.18.X/utilities/util_merge_operators_test.cc diff --git a/3rdParty/rocksdb/v5.16.X/utilities/write_batch_with_index/write_batch_with_index.cc b/3rdParty/rocksdb/v5.18.X/utilities/write_batch_with_index/write_batch_with_index.cc similarity index 97% rename from 3rdParty/rocksdb/v5.16.X/utilities/write_batch_with_index/write_batch_with_index.cc rename to 3rdParty/rocksdb/v5.18.X/utilities/write_batch_with_index/write_batch_with_index.cc index 52eb700246..2202d6baf7 100644 --- a/3rdParty/rocksdb/v5.16.X/utilities/write_batch_with_index/write_batch_with_index.cc +++ b/3rdParty/rocksdb/v5.18.X/utilities/write_batch_with_index/write_batch_with_index.cc @@ -352,14 +352,16 @@ class WBWIIteratorImpl : public WBWIIterator { } virtual void SeekToFirst() override { - WriteBatchIndexEntry search_entry(WriteBatchIndexEntry::kFlagMin, - column_family_id_, 0, 0); + WriteBatchIndexEntry search_entry( + nullptr /* search_key */, column_family_id_, + true /* is_forward_direction */, true /* is_seek_to_first */); skip_list_iter_.Seek(&search_entry); } virtual void SeekToLast() override { - WriteBatchIndexEntry search_entry(WriteBatchIndexEntry::kFlagMin, - column_family_id_ + 1, 0, 0); + WriteBatchIndexEntry search_entry( + nullptr /* search_key */, column_family_id_ + 1, + true /* is_forward_direction */, true /* is_seek_to_first */); skip_list_iter_.Seek(&search_entry); if (!skip_list_iter_.Valid()) { skip_list_iter_.SeekToLast(); @@ -369,12 +371,16 @@ class WBWIIteratorImpl : public WBWIIterator { } virtual void Seek(const Slice& key) override { - WriteBatchIndexEntry search_entry(&key, column_family_id_); + WriteBatchIndexEntry search_entry(&key, column_family_id_, + true /* is_forward_direction */, + false /* is_seek_to_first */); skip_list_iter_.Seek(&search_entry); } virtual void SeekForPrev(const Slice& key) override { - WriteBatchIndexEntry search_entry(&key, column_family_id_); + WriteBatchIndexEntry search_entry(&key, column_family_id_, + false /* is_forward_direction */, + false /* is_seek_to_first */); skip_list_iter_.SeekForPrev(&search_entry); } diff --git a/3rdParty/rocksdb/v5.16.X/utilities/write_batch_with_index/write_batch_with_index_internal.cc b/3rdParty/rocksdb/v5.18.X/utilities/write_batch_with_index/write_batch_with_index_internal.cc similarity index 89% rename from 3rdParty/rocksdb/v5.16.X/utilities/write_batch_with_index/write_batch_with_index_internal.cc rename to 3rdParty/rocksdb/v5.18.X/utilities/write_batch_with_index/write_batch_with_index_internal.cc index 14e5f2147d..243672ce4d 100644 --- a/3rdParty/rocksdb/v5.16.X/utilities/write_batch_with_index/write_batch_with_index_internal.cc +++ b/3rdParty/rocksdb/v5.18.X/utilities/write_batch_with_index/write_batch_with_index_internal.cc @@ -85,6 +85,20 @@ Status ReadableWriteBatch::GetEntryFromDataOffset(size_t data_offset, return Status::OK(); } +// If both of `entry1` and `entry2` point to real entry in write batch, we +// compare the entries as following: +// 1. first compare the column family, the one with larger CF will be larger; +// 2. Inside the same CF, we first decode the entry to find the key of the entry +// and the entry with larger key will be larger; +// 3. If two entries are of the same CF and offset, the one with larger offset +// will be larger. +// Some times either `entry1` or `entry2` is dummy entry, which is actually +// a search key. In this case, in step 2, we don't go ahead and decode the +// entry but use the value in WriteBatchIndexEntry::search_key. +// One special case is WriteBatchIndexEntry::key_size is kFlagMinInCf. +// This indicate that we are going to seek to the first of the column family. +// Once we see this, this entry will be smaller than all the real entries of +// the column family. int WriteBatchEntryComparator::operator()( const WriteBatchIndexEntry* entry1, const WriteBatchIndexEntry* entry2) const { @@ -94,9 +108,10 @@ int WriteBatchEntryComparator::operator()( return -1; } - if (entry1->offset == WriteBatchIndexEntry::kFlagMin) { + // Deal with special case of seeking to the beginning of a column family + if (entry1->is_min_in_cf()) { return -1; - } else if (entry2->offset == WriteBatchIndexEntry::kFlagMin) { + } else if (entry2->is_min_in_cf()) { return 1; } diff --git a/3rdParty/rocksdb/v5.16.X/utilities/write_batch_with_index/write_batch_with_index_internal.h b/3rdParty/rocksdb/v5.18.X/utilities/write_batch_with_index/write_batch_with_index_internal.h similarity index 61% rename from 3rdParty/rocksdb/v5.16.X/utilities/write_batch_with_index/write_batch_with_index_internal.h rename to 3rdParty/rocksdb/v5.18.X/utilities/write_batch_with_index/write_batch_with_index_internal.h index ac20f1b862..3eed7c724c 100644 --- a/3rdParty/rocksdb/v5.16.X/utilities/write_batch_with_index/write_batch_with_index_internal.h +++ b/3rdParty/rocksdb/v5.18.X/utilities/write_batch_with_index/write_batch_with_index_internal.h @@ -31,21 +31,52 @@ struct WriteBatchIndexEntry { key_offset(ko), key_size(ksz), search_key(nullptr) {} - WriteBatchIndexEntry(const Slice* sk, uint32_t c) - : offset(0), - column_family(c), + // Create a dummy entry as the search key. This index entry won't be backed + // by an entry from the write batch, but a pointer to the search key. Or a + // special flag of offset can indicate we are seek to first. + // @_search_key: the search key + // @_column_family: column family + // @is_forward_direction: true for Seek(). False for SeekForPrev() + // @is_seek_to_first: true if we seek to the beginning of the column family + // _search_key should be null in this case. + WriteBatchIndexEntry(const Slice* _search_key, uint32_t _column_family, + bool is_forward_direction, bool is_seek_to_first) + // For SeekForPrev(), we need to make the dummy entry larger than any + // entry who has the same search key. Otherwise, we'll miss those entries. + : offset(is_forward_direction ? 0 : port::kMaxSizet), + column_family(_column_family), key_offset(0), - key_size(0), - search_key(sk) {} + key_size(is_seek_to_first ? kFlagMinInCf : 0), + search_key(_search_key) { + assert(_search_key != nullptr || is_seek_to_first); + } - // If this flag appears in the offset, it indicates a key that is smaller - // than any other entry for the same column family - static const size_t kFlagMin = port::kMaxSizet; + // If this flag appears in the key_size, it indicates a + // key that is smaller than any other entry for the same column family. + static const size_t kFlagMinInCf = port::kMaxSizet; - size_t offset; // offset of an entry in write batch's string buffer. - uint32_t column_family; // column family of the entry. + bool is_min_in_cf() const { + assert(key_size != kFlagMinInCf || + (key_offset == 0 && search_key == nullptr)); + return key_size == kFlagMinInCf; + } + + // offset of an entry in write batch's string buffer. If this is a dummy + // lookup key, in which case search_key != nullptr, offset is set to either + // 0 or max, only for comparison purpose. Because when entries have the same + // key, the entry with larger offset is larger, offset = 0 will make a seek + // key small or equal than all the entries with the seek key, so that Seek() + // will find all the entries of the same key. Similarly, offset = MAX will + // make the entry just larger than all entries with the search key so + // SeekForPrev() will see all the keys with the same key. + size_t offset; + uint32_t column_family; // c1olumn family of the entry. size_t key_offset; // offset of the key in write batch's string buffer. - size_t key_size; // size of the key. + size_t key_size; // size of the key. kFlagMinInCf indicates + // that this is a dummy look up entry for + // SeekToFirst() to the beginning of the column + // family. We use the flag here to save a boolean + // in the struct. const Slice* search_key; // if not null, instead of reading keys from // write batch, use it to compare. This is used diff --git a/3rdParty/rocksdb/v5.16.X/utilities/write_batch_with_index/write_batch_with_index_test.cc b/3rdParty/rocksdb/v5.18.X/utilities/write_batch_with_index/write_batch_with_index_test.cc similarity index 99% rename from 3rdParty/rocksdb/v5.16.X/utilities/write_batch_with_index/write_batch_with_index_test.cc rename to 3rdParty/rocksdb/v5.18.X/utilities/write_batch_with_index/write_batch_with_index_test.cc index aa484cde99..d477968ca9 100644 --- a/3rdParty/rocksdb/v5.16.X/utilities/write_batch_with_index/write_batch_with_index_test.cc +++ b/3rdParty/rocksdb/v5.18.X/utilities/write_batch_with_index/write_batch_with_index_test.cc @@ -621,7 +621,7 @@ TEST_F(WriteBatchWithIndexTest, TestRandomIteraratorWithBase) { for (int i = 0; i < 128; i++) { // Random walk and make sure iter and result_iter returns the // same key and value - int type = rnd.Uniform(5); + int type = rnd.Uniform(6); ASSERT_OK(iter->status()); switch (type) { case 0: @@ -642,7 +642,15 @@ TEST_F(WriteBatchWithIndexTest, TestRandomIteraratorWithBase) { result_iter->Seek(key); break; } - case 3: + case 3: { + // SeekForPrev to random key + auto key_idx = rnd.Uniform(static_cast(source_strings.size())); + auto key = source_strings[key_idx]; + iter->SeekForPrev(key); + result_iter->SeekForPrev(key); + break; + } + case 4: // Next if (is_valid) { iter->Next(); @@ -652,7 +660,7 @@ TEST_F(WriteBatchWithIndexTest, TestRandomIteraratorWithBase) { } break; default: - assert(type == 4); + assert(type == 5); // Prev if (is_valid) { iter->Prev(); diff --git a/arangod/RocksDBEngine/RocksDBTransactionState.cpp b/arangod/RocksDBEngine/RocksDBTransactionState.cpp index 2be51a4a46..0a81430675 100644 --- a/arangod/RocksDBEngine/RocksDBTransactionState.cpp +++ b/arangod/RocksDBEngine/RocksDBTransactionState.cpp @@ -327,7 +327,8 @@ arangodb::Result RocksDBTransactionState::internalCommit() { if (result.ok()) { TRI_ASSERT(numOps > 0); // simon: should hold unless we're being stupid - rocksdb::SequenceNumber postCommitSeq = _rocksTransaction->GetCommitedSeqNumber(); + rocksdb::SequenceNumber postCommitSeq = _rocksTransaction->GetId(); + TRI_ASSERT(postCommitSeq != 0); if (ADB_LIKELY(numOps > 0)) { postCommitSeq += numOps - 1; // add to get to the next batch }