Skip to content

Commit

Permalink
修改反查词典生成逻辑,实现词组反查
Browse files Browse the repository at this point in the history
已知问题:放弃了内存释放相关的代码
  • Loading branch information
siuze committed Feb 19, 2024
1 parent 140c49e commit 04f13a2
Show file tree
Hide file tree
Showing 3 changed files with 37 additions and 23 deletions.
26 changes: 13 additions & 13 deletions src/rime/dict/dict_compiler.cc
Original file line number Diff line number Diff line change
Expand Up @@ -245,20 +245,20 @@ bool DictCompiler::BuildTable(int table_index,
code.push_back(syllable_to_id[s]);
}
// release memory in time to reduce memory usage
RawCode().swap(r->raw_code);
auto ls = vocabulary.LocateEntries(code);
if (!ls) {
LOG(ERROR) << "Error locating entries in vocabulary.";
continue;
}
auto e = New<ShortDictEntry>();
e->code.swap(code);
e->text.swap(r->text);
e->weight = log(r->weight > 0 ? r->weight : DBL_EPSILON);
ls->push_back(e);
// RawCode().swap(r->raw_code);
// auto ls = vocabulary.LocateEntries(code);
// if (!ls) {
// LOG(ERROR) << "Error locating entries in vocabulary.";
// continue;
// }
// auto e = New<ShortDictEntry>();
// e->code.swap(code);
// e->text.swap(r->text);
// e->weight = log(r->weight > 0 ? r->weight : DBL_EPSILON);
// ls->push_back(e);
}
// release memory in time to reduce memory usage
vector<of<RawDictEntry>>().swap(collector.entries);
// vector<of<RawDictEntry>>().swap(collector.entries);
if (settings->sort_order() != "original") {
vocabulary.SortHomophones();
}
Expand All @@ -285,7 +285,7 @@ bool DictCompiler::BuildReverseDb(DictSettings* settings,
auto target_path = target_resolver_->ResolvePath(dict_name_ + ".reverse.bin");
ReverseDb reverse_db(target_path);
if (!reverse_db.Build(settings, collector.syllabary, vocabulary,
collector.stems, dict_file_checksum) ||
collector.stems, dict_file_checksum, collector) ||
!reverse_db.Save()) {
LOG(ERROR) << "error building reversedb.";
return false;
Expand Down
30 changes: 21 additions & 9 deletions src/rime/dict/reverse_lookup_dictionary.cc
Original file line number Diff line number Diff line change
Expand Up @@ -85,18 +85,30 @@ bool ReverseDb::Build(DictSettings* settings,
const Syllabary& syllabary,
const Vocabulary& vocabulary,
const ReverseLookupTable& stems,
uint32_t dict_file_checksum) {
uint32_t dict_file_checksum,
const EntryCollector& collector) {
LOG(INFO) << "building reversedb...";
ReverseLookupTable rev_table;
int syllable_id = 0;
for (const string& syllable : syllabary) {
auto it = vocabulary.find(syllable_id++);
if (it == vocabulary.end())
continue;
const auto& entries(it->second.entries);
for (const auto& e : entries) {
rev_table[e->text].insert(syllable);
}
// for (const string& syllable : syllabary) {
// std::cout<<"syllable_id:"<<syllable_id<<" syllable:
// "<<syllable<<std::endl;
// std::cout<<"词典大小:"<<vocabulary.size()<<std::endl;
// auto it = vocabulary.find(syllable_id++);
// if (it == vocabulary.end())
// continue;
// const auto& entries(it->second.entries);
// for (const auto& e : entries) {
// std::cout<<"syllable: "<<syllable<<" insert to:
// "<<e->text.c_str()<<std::endl;
// rev_table[e->text].insert(syllable);
// }
// }
for (const auto& e : collector.entries) {
std::cout << "添加词汇collector.entries: " << e->text
<< " 及编码 e->raw_code: " << e->raw_code.ToString()
<< std::endl;
rev_table[e->text].insert(e->raw_code.ToString());
}
StringTableBuilder key_trie_builder;
StringTableBuilder value_trie_builder;
Expand Down
4 changes: 3 additions & 1 deletion src/rime/dict/reverse_lookup_dictionary.h
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@
#include <rime/dict/mapped_file.h>
#include <rime/dict/string_table.h>
#include <rime/dict/vocabulary.h>
#include <rime/dict/entry_collector.h>

namespace rime {

Expand Down Expand Up @@ -48,7 +49,8 @@ class ReverseDb : public MappedFile {
const Syllabary& syllabary,
const Vocabulary& vocabulary,
const ReverseLookupTable& stems,
uint32_t dict_file_checksum);
uint32_t dict_file_checksum,
const EntryCollector& collector);
bool Save();

uint32_t dict_file_checksum() const;
Expand Down

0 comments on commit 04f13a2

Please sign in to comment.