From 5653298bb18d5bfde5c09de7321448a79e0c4c7b Mon Sep 17 00:00:00 2001 From: Hojun-Cho Date: Sun, 8 Feb 2026 16:36:59 +0900 Subject: [PATCH] use trie for map lookup --- dat.h | 22 +++++++++- fn.h | 8 +++- strans.c | 110 +++++++++++++++------------------------------- trie.c | 130 +++++++++++++++++++++++++++++++++++++++++++++++++++++++ 4 files changed, 190 insertions(+), 80 deletions(-) create mode 100644 trie.c diff --git a/dat.h b/dat.h index dc56a54..d03e13f 100644 --- a/dat.h +++ b/dat.h @@ -45,7 +45,6 @@ typedef struct Emit Emit; struct Emit { int eat; - int flush; Str s; Str next; Str dict; @@ -62,6 +61,25 @@ struct Hnode int vlen; }; +typedef struct Tnode Tnode; +struct Tnode +{ + int child; + int sibling; + char c; + char *val; + int vlen; +}; + +typedef struct Trie Trie; +struct Trie +{ + int root; + Tnode *nodes; + int n; + int cap; +}; + typedef struct Hmap Hmap; struct Hmap { @@ -79,7 +97,7 @@ struct Lang int lang; char *mapname; char *dictname; - Hmap *map; + Trie *map; Hmap *dict; }; diff --git a/fn.h b/fn.h index 7f16a1d..b8cfe57 100644 --- a/fn.h +++ b/fn.h @@ -12,7 +12,11 @@ Rune slastr(Str*); Hmap* hmapalloc(int, int); int hmapset(Hmap**, Str*, Str*); Hnode* hmapget(Hmap*, Str*); -int mapget(Hmap*, Str*, Str*); +int mapget(Trie*, Str*, Str*); + +Trie* trieopen(char*); +char* trieget(Trie*, char*, int, int*); +int trielookup(Trie*, char*, int, char**, int*); Lang* getlang(int); void mapinit(char*); @@ -22,7 +26,7 @@ void dictthread(void*); void drawthread(void*); void imthread(void*); -Emit trans(Im*, Rune); +Emit transmap(Im*, Rune); void srvthread(void*); diff --git a/strans.c b/strans.c index 7f0a3a3..b22cfec 100644 --- a/strans.c +++ b/strans.c @@ -103,7 +103,7 @@ dotrans(Rune c, Str *com) Emit e; Dictreq req; - e = trans(&im, c); + e = transmap(&im, c); if(e.s.n > 0) sappend(com, &e.s); sclear(&im.pre); @@ -128,55 +128,49 @@ getlang(int lang) return nil; } +static int +maplookup(Trie *t, Str *key, Str *out) +{ + char buf[256], *v; + int klen, vlen; + + if(key->n == 0) + return 0; + v = nil; + vlen = 0; + klen = stoutf(key, buf, sizeof(buf)); + if(!trielookup(t, buf, klen, &v, &vlen)) + return 0; + if(out != nil && v != nil) + sinit(out, v, vlen); + return 1; +} + Emit -trans(Im *im, Rune c) +transmap(Im *im, Rune c) { Emit e = {0}; - Str key, kana; - Hmap *h; - Rune last; + Str key; + Trie *t; - h = im->l->map; + t = im->l->map; key = im->pre; sputr(&key, c); - if(hmapget(h, &key)){ + if(maplookup(t, &key, &e.dict)){ e.eat = 1; e.next = key; - mapget(h, &key, &e.dict); return e; } - last = slastr(&im->pre); - if(last == 0) - goto flush; - key = im->pre; - key.n--; - if(mapget(h, &key, &kana)){ - sclear(&key); - sputr(&key, last); - sputr(&key, c); - if(hmapget(h, &key)){ - e.eat = 1; - e.s = kana; - sputr(&e.next, last); - sputr(&e.next, c); - mapget(h, &e.next, &e.dict); - return e; - } - } - -flush: - if(!mapget(h, &im->pre, &e.s)) + if(!mapget(t, &im->pre, &e.s)) e.s = im->pre; sclear(&key); sputr(&key, c); - if(hmapget(h, &key) == nil){ - e.flush = 1; + if(!maplookup(t, &key, &e.dict)){ sputr(&e.s, c); return e; } e.eat = 1; sputr(&e.next, c); - mapget(h, &e.next, &e.dict); return e; } @@ -309,57 +303,21 @@ imthread(void*) } int -mapget(Hmap *h, Str *key, Str *out) +mapget(Trie *t, Str *key, Str *out) { - Hnode *n; + char buf[256], *v; + int klen, vlen; if(key->n == 0) return 0; - n = hmapget(h, key); - if(n == nil || n->vlen == 0) + klen = stoutf(key, buf, sizeof(buf)); + v = trieget(t, buf, klen, &vlen); + if(v == nil) return 0; - sinit(out, n->val, n->vlen); + sinit(out, v, vlen); return 1; } -static Hmap* -openmap(char *path) -{ - Hmap *h; - Biobuf *b; - Str key, val, empty; - char *line, *tab; - int i, klen; - - b = Bopen(path, OREAD); - if(b == nil) - die("can't open: %s", path); - h = hmapalloc(1024, 0); - sclear(&empty); - while((line = Brdstr(b, '\n', 1)) != nil){ - if(line[0] == '\0'){ - free(line); - continue; - } - tab = strchr(line, '\t'); - if(tab == nil || tab[1] == '\0') - die("malformed map: %s", path); - *tab = '\0'; - klen = strlen(line); - sinit(&key, line, klen); - sinit(&val, tab+1, strlen(tab+1)); - hmapset(&h, &key, &val); - for(i = 1; i < klen; i++){ - sinit(&key, line, i); - if(hmapget(h, &key) == nil) - hmapset(&h, &key, &empty); - } - free(line); - } - Bterm(b); - return h; -} - void mapinit(char *dir) { @@ -370,7 +328,7 @@ mapinit(char *dir) if(langs[i].mapname == nil) continue; snprint(path, sizeof(path), "%s/%s.map", dir, langs[i].mapname); - langs[i].map = openmap(path); + langs[i].map = trieopen(path); } } diff --git a/trie.c b/trie.c new file mode 100644 index 0000000..759193d --- /dev/null +++ b/trie.c @@ -0,0 +1,130 @@ +#include "dat.h" +#include "fn.h" + +static int +newnode(Trie *t) +{ + int i; + + if(t->n >= t->cap){ + t->cap *= 2; + t->nodes = erealloc(t->nodes, t->cap * sizeof(Tnode)); + } + i = t->n++; + memset(&t->nodes[i], 0, sizeof(Tnode)); + t->nodes[i].child = -1; + t->nodes[i].sibling = -1; + return i; +} + +static int +find(Trie *t, int ni, char c) +{ + int pi; + + for(pi = t->nodes[ni].child; pi >= 0; pi = t->nodes[pi].sibling) + if(t->nodes[pi].c == c) + return pi; + return -1; +} + +static int +add(Trie *t, int ni, char c) +{ + int pi; + + pi = newnode(t); + t->nodes[pi].c = c; + t->nodes[pi].sibling = t->nodes[ni].child; + t->nodes[ni].child = pi; + return pi; +} + +static void +insert(Trie *t, char *key, int klen, char *val, int vlen) +{ + int ni, ci; + int i; + + ni = t->root; + for(i = 0; i < klen; i++){ + ci = find(t, ni, key[i]); + if(ci < 0) + ci = add(t, ni, key[i]); + ni = ci; + } + t->nodes[ni].val = val; + t->nodes[ni].vlen = vlen; +} + +Trie* +trieopen(char *path) +{ + Trie *t; + Biobuf *b; + char *line, *tab, *key, *val; + int klen, vlen; + + b = Bopen(path, OREAD); + if(b == nil) + die("can't open: %s", path); + t = emalloc(sizeof(*t)); + t->cap = 1024; + t->nodes = emalloc(t->cap * sizeof(Tnode)); + t->n = 0; + t->root = newnode(t); + while((line = Brdstr(b, '\n', 1)) != nil){ + if(line[0] == '\0'){ + free(line); + continue; + } + tab = strchr(line, '\t'); + if(tab == nil || tab[1] == '\0') + die("malformed map: %s", path); + *tab = '\0'; + key = line; + klen = tab - line; + val = tab + 1; + vlen = strlen(val); + insert(t, key, klen, val, vlen); + } + Bterm(b); + return t; +} + +char* +trieget(Trie *t, char *key, int klen, int *vlen) +{ + int ni; + int i; + + ni = t->root; + for(i = 0; i < klen; i++){ + ni = find(t, ni, key[i]); + if(ni < 0) + return nil; + } + if(t->nodes[ni].val == nil) + return nil; + *vlen = t->nodes[ni].vlen; + return t->nodes[ni].val; +} + +int +trielookup(Trie *t, char *key, int klen, char **val, int *vlen) +{ + int ni; + int i; + + ni = t->root; + for(i = 0; i < klen; i++){ + ni = find(t, ni, key[i]); + if(ni < 0) + return 0; + } + if(val != nil && t->nodes[ni].val != nil){ + *val = t->nodes[ni].val; + *vlen = t->nodes[ni].vlen; + } + return 1; +}