use trie for map lookup

This commit is contained in:
Hojun-Cho 2026-02-08 16:36:59 +09:00
parent 97f897755f
commit 5653298bb1
4 changed files with 190 additions and 80 deletions

22
dat.h
View File

@ -45,7 +45,6 @@ typedef struct Emit Emit;
struct Emit
{
int eat;
int flush;
Str s;
Str next;
Str dict;
@ -62,6 +61,25 @@ struct Hnode
int vlen;
};
typedef struct Tnode Tnode;
struct Tnode
{
int child;
int sibling;
char c;
char *val;
int vlen;
};
typedef struct Trie Trie;
struct Trie
{
int root;
Tnode *nodes;
int n;
int cap;
};
typedef struct Hmap Hmap;
struct Hmap
{
@ -79,7 +97,7 @@ struct Lang
int lang;
char *mapname;
char *dictname;
Hmap *map;
Trie *map;
Hmap *dict;
};

8
fn.h
View File

@ -12,7 +12,11 @@ Rune slastr(Str*);
Hmap* hmapalloc(int, int);
int hmapset(Hmap**, Str*, Str*);
Hnode* hmapget(Hmap*, Str*);
int mapget(Hmap*, Str*, Str*);
int mapget(Trie*, Str*, Str*);
Trie* trieopen(char*);
char* trieget(Trie*, char*, int, int*);
int trielookup(Trie*, char*, int, char**, int*);
Lang* getlang(int);
void mapinit(char*);
@ -22,7 +26,7 @@ void dictthread(void*);
void drawthread(void*);
void imthread(void*);
Emit trans(Im*, Rune);
Emit transmap(Im*, Rune);
void srvthread(void*);

110
strans.c
View File

@ -103,7 +103,7 @@ dotrans(Rune c, Str *com)
Emit e;
Dictreq req;
e = trans(&im, c);
e = transmap(&im, c);
if(e.s.n > 0)
sappend(com, &e.s);
sclear(&im.pre);
@ -128,55 +128,49 @@ getlang(int lang)
return nil;
}
static int
maplookup(Trie *t, Str *key, Str *out)
{
char buf[256], *v;
int klen, vlen;
if(key->n == 0)
return 0;
v = nil;
vlen = 0;
klen = stoutf(key, buf, sizeof(buf));
if(!trielookup(t, buf, klen, &v, &vlen))
return 0;
if(out != nil && v != nil)
sinit(out, v, vlen);
return 1;
}
Emit
trans(Im *im, Rune c)
transmap(Im *im, Rune c)
{
Emit e = {0};
Str key, kana;
Hmap *h;
Rune last;
Str key;
Trie *t;
h = im->l->map;
t = im->l->map;
key = im->pre;
sputr(&key, c);
if(hmapget(h, &key)){
if(maplookup(t, &key, &e.dict)){
e.eat = 1;
e.next = key;
mapget(h, &key, &e.dict);
return e;
}
last = slastr(&im->pre);
if(last == 0)
goto flush;
key = im->pre;
key.n--;
if(mapget(h, &key, &kana)){
sclear(&key);
sputr(&key, last);
sputr(&key, c);
if(hmapget(h, &key)){
e.eat = 1;
e.s = kana;
sputr(&e.next, last);
sputr(&e.next, c);
mapget(h, &e.next, &e.dict);
return e;
}
}
flush:
if(!mapget(h, &im->pre, &e.s))
if(!mapget(t, &im->pre, &e.s))
e.s = im->pre;
sclear(&key);
sputr(&key, c);
if(hmapget(h, &key) == nil){
e.flush = 1;
if(!maplookup(t, &key, &e.dict)){
sputr(&e.s, c);
return e;
}
e.eat = 1;
sputr(&e.next, c);
mapget(h, &e.next, &e.dict);
return e;
}
@ -309,57 +303,21 @@ imthread(void*)
}
int
mapget(Hmap *h, Str *key, Str *out)
mapget(Trie *t, Str *key, Str *out)
{
Hnode *n;
char buf[256], *v;
int klen, vlen;
if(key->n == 0)
return 0;
n = hmapget(h, key);
if(n == nil || n->vlen == 0)
klen = stoutf(key, buf, sizeof(buf));
v = trieget(t, buf, klen, &vlen);
if(v == nil)
return 0;
sinit(out, n->val, n->vlen);
sinit(out, v, vlen);
return 1;
}
static Hmap*
openmap(char *path)
{
Hmap *h;
Biobuf *b;
Str key, val, empty;
char *line, *tab;
int i, klen;
b = Bopen(path, OREAD);
if(b == nil)
die("can't open: %s", path);
h = hmapalloc(1024, 0);
sclear(&empty);
while((line = Brdstr(b, '\n', 1)) != nil){
if(line[0] == '\0'){
free(line);
continue;
}
tab = strchr(line, '\t');
if(tab == nil || tab[1] == '\0')
die("malformed map: %s", path);
*tab = '\0';
klen = strlen(line);
sinit(&key, line, klen);
sinit(&val, tab+1, strlen(tab+1));
hmapset(&h, &key, &val);
for(i = 1; i < klen; i++){
sinit(&key, line, i);
if(hmapget(h, &key) == nil)
hmapset(&h, &key, &empty);
}
free(line);
}
Bterm(b);
return h;
}
void
mapinit(char *dir)
{
@ -370,7 +328,7 @@ mapinit(char *dir)
if(langs[i].mapname == nil)
continue;
snprint(path, sizeof(path), "%s/%s.map", dir, langs[i].mapname);
langs[i].map = openmap(path);
langs[i].map = trieopen(path);
}
}

130
trie.c Normal file
View File

@ -0,0 +1,130 @@
#include "dat.h"
#include "fn.h"
static int
newnode(Trie *t)
{
int i;
if(t->n >= t->cap){
t->cap *= 2;
t->nodes = erealloc(t->nodes, t->cap * sizeof(Tnode));
}
i = t->n++;
memset(&t->nodes[i], 0, sizeof(Tnode));
t->nodes[i].child = -1;
t->nodes[i].sibling = -1;
return i;
}
static int
find(Trie *t, int ni, char c)
{
int pi;
for(pi = t->nodes[ni].child; pi >= 0; pi = t->nodes[pi].sibling)
if(t->nodes[pi].c == c)
return pi;
return -1;
}
static int
add(Trie *t, int ni, char c)
{
int pi;
pi = newnode(t);
t->nodes[pi].c = c;
t->nodes[pi].sibling = t->nodes[ni].child;
t->nodes[ni].child = pi;
return pi;
}
static void
insert(Trie *t, char *key, int klen, char *val, int vlen)
{
int ni, ci;
int i;
ni = t->root;
for(i = 0; i < klen; i++){
ci = find(t, ni, key[i]);
if(ci < 0)
ci = add(t, ni, key[i]);
ni = ci;
}
t->nodes[ni].val = val;
t->nodes[ni].vlen = vlen;
}
Trie*
trieopen(char *path)
{
Trie *t;
Biobuf *b;
char *line, *tab, *key, *val;
int klen, vlen;
b = Bopen(path, OREAD);
if(b == nil)
die("can't open: %s", path);
t = emalloc(sizeof(*t));
t->cap = 1024;
t->nodes = emalloc(t->cap * sizeof(Tnode));
t->n = 0;
t->root = newnode(t);
while((line = Brdstr(b, '\n', 1)) != nil){
if(line[0] == '\0'){
free(line);
continue;
}
tab = strchr(line, '\t');
if(tab == nil || tab[1] == '\0')
die("malformed map: %s", path);
*tab = '\0';
key = line;
klen = tab - line;
val = tab + 1;
vlen = strlen(val);
insert(t, key, klen, val, vlen);
}
Bterm(b);
return t;
}
char*
trieget(Trie *t, char *key, int klen, int *vlen)
{
int ni;
int i;
ni = t->root;
for(i = 0; i < klen; i++){
ni = find(t, ni, key[i]);
if(ni < 0)
return nil;
}
if(t->nodes[ni].val == nil)
return nil;
*vlen = t->nodes[ni].vlen;
return t->nodes[ni].val;
}
int
trielookup(Trie *t, char *key, int klen, char **val, int *vlen)
{
int ni;
int i;
ni = t->root;
for(i = 0; i < klen; i++){
ni = find(t, ni, key[i]);
if(ni < 0)
return 0;
}
if(val != nil && t->nodes[ni].val != nil){
*val = t->nodes[ni].val;
*vlen = t->nodes[ni].vlen;
}
return 1;
}