sennaでpatricia treeを作る
- Author: harajune
- Filed under: IT
Friday
Aug 28,2009
はてなのようなキーワードリンクをRubyで付与する実例
と、いうのをつくってもらったので、これをもとにCでsennaのpatricia treeを試すプログラムを書いたよ。
機能的にはとりあえずキーワードの検出だけ。
CODE:
-
#include <stdio.h>
-
#include <stdlib.h>
-
#include <senna/senna.h>
-
#include <string.h>
-
//#include <exectime.h>
-
-
void create_index(char* index, char* filename){
-
-
sen_sym *sym;
-
-
if(!(sym = sen_sym_create(index, 0, SEN_INDEX_NORMALIZE, sen_enc_utf8))){
-
printf("cannot open or create sym file\m");
-
abort();
-
}
-
-
FILE * fp = fopen(filename, "rb");
-
-
char buffer[2048];
-
-
sen_id sym_id;
-
while(fgets(buffer, 2048, fp)){
-
sscanf(buffer, "%s\n", buffer);
-
if(!(sym_id = sen_sym_get(sym, buffer))){
-
printf("cannot create link\n");
-
abort();
-
}
-
-
// printf("%d\t", sym_id);
-
}
-
-
fclose(fp);
-
-
sen_sym_close(sym);
-
-
}
-
-
void traverse(char* index, char* filename){
-
printf("traverse\n");
-
sen_sym* sym;
-
-
if(!(sym = sen_sym_open(index))){
-
printf("cannot open index file\n");
-
abort();
-
}
-
-
FILE * fp = fopen(filename, "rb");
-
-
if(!fp){
-
printf("cannot open file\n");
-
abort();
-
}
-
-
fseek(fp, 0, SEEK_END);
-
-
int filesize = ftell(fp);
-
int offset = 0;
-
-
fseek(fp, 0, SEEK_SET);
-
-
char * content = new char[filesize + 1];
-
const char * cp = content;
-
-
-
fread(content, sizeof(char), filesize, fp);
-
content[filesize] = '\0';
-
-
sen_sym_scan_hit sh[32];
-
-
char buffer[2048];
-
-
const char * rest;
-
-
exectime::start_timer();
-
-
while((rest - content) <filesize){
-
int found;
-
if(!(found = sen_sym_scan(sym, cp, filesize, sh, 32, &rest))){
-
break;
-
}
-
-
for(int i=0; i<found; i++){
-
int key_len = sen_sym_key(sym, sh[i].id, buffer, 2048);
-
if(key_len> 0 && sh[i].length> 7){
-
printf("%s\n", buffer);
-
}
-
-
}
-
cp = rest;
-
-
}
-
-
exectime::end_timer();
-
-
printf("time: %f sec\n", exectime::time_result());
-
-
-
}
-
-
int main(int argc, char** argv){
-
-
if(!strcmp(argv[1], "make")){
-
create_index(argv[2], argv[3]);
-
}else if(!strcmp(argv[1], "traverse")){
-
traverse(argv[2], argv[3]);
-
}
-
-
}