From cddd33a8076aec428dc33f39255d1dd1d652d2fd Mon Sep 17 00:00:00 2001 From: D-Sketon <2055272094@qq.com> Date: Sun, 5 Jan 2025 21:01:39 +0800 Subject: [PATCH 1/2] perf(PostCategory/PostTag): add binary relation index for performance --- lib/hexo/index.ts | 9 +++ lib/models/binary_relation_index.ts | 91 +++++++++++++++++++++++++++++ lib/models/category.ts | 8 +-- lib/models/post.ts | 18 +++--- lib/models/post_category.ts | 15 +++++ lib/models/post_tag.ts | 15 +++++ lib/models/tag.ts | 8 +-- test/scripts/models/category.ts | 2 + test/scripts/models/post.ts | 4 ++ test/scripts/models/tag.ts | 2 + 10 files changed, 156 insertions(+), 16 deletions(-) create mode 100644 lib/models/binary_relation_index.ts diff --git a/lib/hexo/index.ts b/lib/hexo/index.ts index 86926900a1..2c522db4ec 100644 --- a/lib/hexo/index.ts +++ b/lib/hexo/index.ts @@ -40,6 +40,7 @@ import type Box from '../box'; import type { AssetGenerator, LocalsType, NodeJSLikeCallback, NormalPageGenerator, NormalPostGenerator, PageGenerator, PostGenerator, SiteLocals } from '../types'; import type { AddSchemaTypeOptions } from 'warehouse/dist/types'; import type Schema from 'warehouse/dist/schema'; +import BinaryRelationIndex from '../models/binary_relation_index'; const libDir = dirname(__dirname); const dbVersion = 1; @@ -276,6 +277,10 @@ class Hexo extends EventEmitter { static lib_dir: string; static core_dir: string; static version: string; + public _binaryRelationIndex: { + post_tag: BinaryRelationIndex<'post_id', 'tag_id'>; + post_category: BinaryRelationIndex<'post_id', 'category_id'>; + }; constructor(base = process.cwd(), args: Args = {}) { super(); @@ -354,6 +359,10 @@ class Hexo extends EventEmitter { this.theme = new Theme(this); this.locals = new Locals(); this._bindLocals(); + this._binaryRelationIndex = { + post_tag: new BinaryRelationIndex<'post_id', 'tag_id'>('post_id', 'tag_id', 'PostTag', this), + post_category: new BinaryRelationIndex<'post_id', 'category_id'>('post_id', 'category_id', 'PostCategory', this) + }; } _bindLocals(): void { diff --git a/lib/models/binary_relation_index.ts b/lib/models/binary_relation_index.ts new file mode 100644 index 0000000000..7030122030 --- /dev/null +++ b/lib/models/binary_relation_index.ts @@ -0,0 +1,91 @@ +import type Hexo from '../hexo'; + +type BinaryRelationType = { + [key in K]: PropertyKey; +} & { + [key in V]: PropertyKey; +}; + +class BinaryRelationIndex { + keyIndex: Map> = new Map(); + valueIndex: Map> = new Map(); + key: K; + value: V; + ctx: Hexo; + schemaName: string; + + constructor(key: K, value: V, schemaName: string, ctx: Hexo) { + this.key = key; + this.value = value; + this.schemaName = schemaName; + this.ctx = ctx; + } + + saveHook(data: BinaryRelationType & { _id: PropertyKey }) { + const _id = data._id; + const key = data[this.key]; + const value = data[this.value]; + if (!this.keyIndex.has(key)) { + this.keyIndex.set(key, new Set()); + } + this.keyIndex.get(key).add(_id); + + if (!this.valueIndex.has(value)) { + this.valueIndex.set(value, new Set()); + } + this.valueIndex.get(value).add(_id); + } + + removeHook(data: BinaryRelationType & { _id: PropertyKey }) { + const _id = data._id; + const key = data[this.key]; + const value = data[this.value]; + this.keyIndex.get(key)?.delete(_id); + if (this.keyIndex.get(key)?.size === 0) { + this.keyIndex.delete(key); + } + this.valueIndex.get(value)?.delete(_id); + if (this.valueIndex.get(value)?.size === 0) { + this.valueIndex.delete(value); + } + } + + findById(_id: PropertyKey) { + const raw = this.ctx.model(this.schemaName).findById(_id, { lean: true }); + if (!raw) return; + return { ...raw }; + } + + find(query: Partial>) { + const key = query[this.key]; + const value = query[this.value]; + + if (key && value) { + const ids = this.keyIndex.get(key); + if (!ids) return []; + return Array.from(ids) + .map(_id => this.findById(_id)) + .filter(record => record?.[this.value] === value); + } + + if (key) { + const ids = this.keyIndex.get(key); + if (!ids) return []; + return Array.from(ids).map(_id => this.findById(_id)); + } + + if (value) { + const ids = this.valueIndex.get(value); + if (!ids) return []; + return Array.from(ids).map(_id => this.findById(_id)); + } + + return []; + } + + findOne(query: Partial>) { + return this.find(query)[0]; + } +} + +export default BinaryRelationIndex; diff --git a/lib/models/category.ts b/lib/models/category.ts index 2fdbfb58df..41de632f45 100644 --- a/lib/models/category.ts +++ b/lib/models/category.ts @@ -41,9 +41,9 @@ export = (ctx: Hexo) => { }); Category.virtual('posts').get(function() { - const PostCategory = ctx.model('PostCategory'); + const ReadOnlyPostCategory = ctx._binaryRelationIndex.post_category; - const ids = PostCategory.find({category_id: this._id}).map(item => item.post_id); + const ids = ReadOnlyPostCategory.find({category_id: this._id}).map(item => item.post_id); return ctx.locals.get('posts').find({ _id: {$in: ids} @@ -51,9 +51,9 @@ export = (ctx: Hexo) => { }); Category.virtual('length').get(function() { - const PostCategory = ctx.model('PostCategory'); + const ReadOnlyPostCategory = ctx._binaryRelationIndex.post_category; - return PostCategory.find({category_id: this._id}).length; + return ReadOnlyPostCategory.find({category_id: this._id}).length; }); // Check whether a category exists diff --git a/lib/models/post.ts b/lib/models/post.ts index 208e8ea477..6241e7151a 100644 --- a/lib/models/post.ts +++ b/lib/models/post.ts @@ -64,10 +64,10 @@ export = (ctx: Hexo) => { Post.virtual('tags').get(function() { return tagsGetterCache.apply(this._id, () => { - const PostTag = ctx.model('PostTag'); + const ReadOnlyPostTag = ctx._binaryRelationIndex.post_tag; const Tag = ctx.model('Tag'); - const ids = PostTag.find({post_id: this._id}, {lean: true}).map(item => item.tag_id); + const ids = ReadOnlyPostTag.find({post_id: this._id}).map(item => item.tag_id); return Tag.find({_id: {$in: ids}}); }); @@ -87,10 +87,11 @@ export = (ctx: Hexo) => { tagsGetterCache.flush(); tags = removeEmptyTag(tags); + const ReadOnlyPostTag = ctx._binaryRelationIndex.post_tag; const PostTag = ctx.model('PostTag'); const Tag = ctx.model('Tag'); const id = this._id; - const existed = PostTag.find({post_id: id}, {lean: true}).map(pickID); + const existed = ReadOnlyPostTag.find({post_id: id}).map(pickID); return Promise.map(tags, tag => { // Find the tag by name @@ -107,7 +108,7 @@ export = (ctx: Hexo) => { }); }).map(tag => { // Find the reference - const ref = PostTag.findOne({post_id: id, tag_id: tag._id}, {lean: true}); + const ref = ReadOnlyPostTag.findOne({post_id: id, tag_id: tag._id}); if (ref) return ref; // Insert the reference if not exist @@ -123,10 +124,10 @@ export = (ctx: Hexo) => { }); Post.virtual('categories').get(function() { - const PostCategory = ctx.model('PostCategory'); + const ReadOnlyPostCategory = ctx._binaryRelationIndex.post_category; const Category = ctx.model('Category'); - const ids = PostCategory.find({post_id: this._id}, {lean: true}).map(item => item.category_id); + const ids = ReadOnlyPostCategory.find({post_id: this._id}).map(item => item.category_id); return Category.find({_id: {$in: ids}}); }); @@ -142,11 +143,12 @@ export = (ctx: Hexo) => { return Array.isArray(cat) ? removeEmptyTag(cat) : `${cat}`; }); + const ReadOnlyPostCategory = ctx._binaryRelationIndex.post_category; const PostCategory = ctx.model('PostCategory'); const Category = ctx.model('Category'); const id = this._id; const allIds = []; - const existed = PostCategory.find({post_id: id}, {lean: true}).map(pickID); + const existed = ReadOnlyPostCategory.find({post_id: id}).map(pickID); const hasHierarchy = cats.filter(Array.isArray).length > 0; // Add a hierarchy of categories @@ -192,7 +194,7 @@ export = (ctx: Hexo) => { return (hasHierarchy ? Promise.each(cats, addHierarchy) : Promise.resolve(addHierarchy(cats)) ).then(() => allIds).map(catId => { // Find the reference - const ref = PostCategory.findOne({post_id: id, category_id: catId}, {lean: true}); + const ref = ReadOnlyPostCategory.findOne({post_id: id, category_id: catId}); if (ref) return ref; // Insert the reference if not exist diff --git a/lib/models/post_category.ts b/lib/models/post_category.ts index 8b8feb0c0a..cf2b9bb647 100644 --- a/lib/models/post_category.ts +++ b/lib/models/post_category.ts @@ -7,5 +7,20 @@ export = (ctx: Hexo) => { category_id: {type: warehouse.Schema.Types.CUID, ref: 'Category'} }); + PostCategory.pre('save', data => { + ctx._binaryRelationIndex.post_category.removeHook(data); + return data; + }); + + PostCategory.post('save', data => { + ctx._binaryRelationIndex.post_category.saveHook(data); + return data; + }); + + PostCategory.pre('remove', data => { + ctx._binaryRelationIndex.post_category.removeHook(data); + return data; + }); + return PostCategory; }; diff --git a/lib/models/post_tag.ts b/lib/models/post_tag.ts index b55533c49a..8ddfc063e7 100644 --- a/lib/models/post_tag.ts +++ b/lib/models/post_tag.ts @@ -7,5 +7,20 @@ export = (ctx: Hexo) => { tag_id: {type: warehouse.Schema.Types.CUID, ref: 'Tag'} }); + PostTag.pre('save', data => { + ctx._binaryRelationIndex.post_tag.removeHook(data); + return data; + }); + + PostTag.post('save', data => { + ctx._binaryRelationIndex.post_tag.saveHook(data); + return data; + }); + + PostTag.pre('remove', data => { + ctx._binaryRelationIndex.post_tag.removeHook(data); + return data; + }); + return PostTag; }; diff --git a/lib/models/tag.ts b/lib/models/tag.ts index 84a118aa25..9ca690ef43 100644 --- a/lib/models/tag.ts +++ b/lib/models/tag.ts @@ -32,9 +32,9 @@ export = (ctx: Hexo) => { }); Tag.virtual('posts').get(function() { - const PostTag = ctx.model('PostTag'); + const ReadOnlyPostTag = ctx._binaryRelationIndex.post_tag; - const ids = PostTag.find({tag_id: this._id}).map(item => item.post_id); + const ids = ReadOnlyPostTag.find({tag_id: this._id}).map(item => item.post_id); return ctx.locals.get('posts').find({ _id: {$in: ids} @@ -44,9 +44,9 @@ export = (ctx: Hexo) => { Tag.virtual('length').get(function() { // Note: this.posts.length is also working // But it's slow because `find` has to iterate over all posts - const PostTag = ctx.model('PostTag'); + const ReadOnlyPostTag = ctx._binaryRelationIndex.post_tag; - return PostTag.find({tag_id: this._id}).length; + return ReadOnlyPostTag.find({tag_id: this._id}).length; }); // Check whether a tag exists diff --git a/test/scripts/models/category.ts b/test/scripts/models/category.ts index c0b0009b10..f5c75006a5 100644 --- a/test/scripts/models/category.ts +++ b/test/scripts/models/category.ts @@ -6,6 +6,7 @@ describe('Category', () => { const hexo = new Hexo(); const Category = hexo.model('Category'); const Post = hexo.model('Post'); + const ReadOnlyPostCategory = hexo._binaryRelationIndex.post_category; const PostCategory = hexo.model('PostCategory'); before(() => hexo.init()); @@ -283,6 +284,7 @@ describe('Category', () => { await Category.removeById(cat._id!); PostCategory.find({category_id: cat._id}).should.have.lengthOf(0); + ReadOnlyPostCategory.find({category_id: cat._id}).should.have.lengthOf(0); await Promise.all(posts.map(post => post.remove())); }); diff --git a/test/scripts/models/post.ts b/test/scripts/models/post.ts index 258d5dac6e..71aa1ce949 100644 --- a/test/scripts/models/post.ts +++ b/test/scripts/models/post.ts @@ -11,6 +11,8 @@ describe('Post', () => { const Post = hexo.model('Post'); const Tag = hexo.model('Tag'); const Category = hexo.model('Category'); + const ReadOnlyPostTag = hexo._binaryRelationIndex.post_tag; + const ReadOnlyPostCategory = hexo._binaryRelationIndex.post_category; const PostTag = hexo.model('PostTag'); const PostCategory = hexo.model('PostCategory'); const Asset = hexo.model('Asset'); @@ -427,6 +429,7 @@ describe('Post', () => { }).then(post => post.setTags(['foo', 'bar', 'baz']) .thenReturn(Post.findById(post._id))).then(post => Post.removeById(post._id)).then(post => { PostTag.find({post_id: post._id}).should.have.lengthOf(0); + ReadOnlyPostTag.find({post_id: post._id}).should.have.lengthOf(0); Tag.findOne({name: 'foo'}).posts.should.have.lengthOf(0); Tag.findOne({name: 'bar'}).posts.should.have.lengthOf(0); Tag.findOne({name: 'baz'}).posts.should.have.lengthOf(0); @@ -438,6 +441,7 @@ describe('Post', () => { }).then(post => post.setCategories(['foo', 'bar', 'baz']) .thenReturn(Post.findById(post._id))).then(post => Post.removeById(post._id)).then(post => { PostCategory.find({post_id: post._id}).should.have.lengthOf(0); + ReadOnlyPostCategory.find({post_id: post._id}).should.have.lengthOf(0); Category.findOne({name: 'foo'}).posts.should.have.lengthOf(0); Category.findOne({name: 'bar'}).posts.should.have.lengthOf(0); Category.findOne({name: 'baz'}).posts.should.have.lengthOf(0); diff --git a/test/scripts/models/tag.ts b/test/scripts/models/tag.ts index 94108d6521..a80ec85383 100644 --- a/test/scripts/models/tag.ts +++ b/test/scripts/models/tag.ts @@ -7,6 +7,7 @@ describe('Tag', () => { const Tag = hexo.model('Tag'); const Post = hexo.model('Post'); const PostTag = hexo.model('PostTag'); + const ReadOnlyPostTag = hexo._binaryRelationIndex.post_tag; before(() => hexo.init()); @@ -244,6 +245,7 @@ describe('Tag', () => { await Tag.removeById(tag._id!); PostTag.find({tag_id: tag._id}).should.have.lengthOf(0); + ReadOnlyPostTag.find({tag_id: tag._id}).should.have.lengthOf(0); await Promise.all(posts.map(post => Post.removeById(post._id))); }); From e2583ac58b49ed5190047fd014fa8f3bf87518b3 Mon Sep 17 00:00:00 2001 From: D-Sketon <2055272094@qq.com> Date: Sun, 12 Jan 2025 11:40:00 +0800 Subject: [PATCH 2/2] fix: handle _import in warehouse --- lib/hexo/index.ts | 2 ++ lib/models/binary_relation_index.ts | 9 +++++++++ 2 files changed, 11 insertions(+) diff --git a/lib/hexo/index.ts b/lib/hexo/index.ts index 2c522db4ec..2b04809e54 100644 --- a/lib/hexo/index.ts +++ b/lib/hexo/index.ts @@ -502,6 +502,8 @@ class Hexo extends EventEmitter { load(callback?: NodeJSLikeCallback): Promise { return loadDatabase(this).then(() => { + this._binaryRelationIndex.post_tag.load(); + this._binaryRelationIndex.post_category.load(); this.log.info('Start processing'); return Promise.all([ diff --git a/lib/models/binary_relation_index.ts b/lib/models/binary_relation_index.ts index 7030122030..b9da452271 100644 --- a/lib/models/binary_relation_index.ts +++ b/lib/models/binary_relation_index.ts @@ -21,6 +21,15 @@ class BinaryRelationIndex { this.ctx = ctx; } + load() { + this.keyIndex.clear(); + this.valueIndex.clear(); + const raw = this.ctx.model(this.schemaName).data; + for (const _id in raw) { + this.saveHook(raw[_id]); + } + } + saveHook(data: BinaryRelationType & { _id: PropertyKey }) { const _id = data._id; const key = data[this.key];