diff options
-rw-r--r-- | android/sqlite3_android.cpp | 53 |
1 files changed, 46 insertions, 7 deletions
diff --git a/android/sqlite3_android.cpp b/android/sqlite3_android.cpp index 3e11808..dac93c2 100644 --- a/android/sqlite3_android.cpp +++ b/android/sqlite3_android.cpp @@ -200,23 +200,44 @@ struct SqliteUserData { /** * This function is invoked as: * - * _TOKENIZE('<token_table>', <data_row_id>, <data>, <delimiter>) + * _TOKENIZE('<token_table>', <data_row_id>, <data>, <delimiter>, <use token index>) + * + * If <use token index> is omitted, is is treated as 0. + * + * It will split <data> on each instance of <delimiter> and insert each token + * into <token_table>. <token_table> must have 3 columns: + * token TEXT, source INTEGER, token_index INTEGER + * The token_index column is not needed if <use token index> is 0. + * + * One row is inserted for each token in <data>. + * In each inserted row, 'source' is <data_row_id>. + * In the first inserted row, 'token' is the hex collation key of + * the entire <data> string, and 'token_index' is 0. + * In each row I (where 1 <= I < N, and N is the number of tokens in <data>) + * 'token' will be set to the hex collation key of the I:th token (0-based). + * If <use token index> != 0, 'token_index' will be set to I. + * + * In other words, there will be one row for the entire string, + * and one row for each token except the first one. * - * It will then split data on each instance of delimiter and insert each token - * into token_table's 'token' column with data_row_id in the 'source' column. * The function returns the number of tokens generated. */ static void tokenize(sqlite3_context * context, int argc, sqlite3_value ** argv) { //LOGD("enter tokenize"); int err; + int useTokenIndex = 0; - if (argc != 4) { - LOGE("Tokenize requires 4 arguments"); + if (!(argc == 4 || argc == 5)) { + LOGE("Tokenize requires 4 or 5 arguments"); sqlite3_result_null(context); return; } + if (argc > 4) { + useTokenIndex = sqlite3_value_int(argv[4]); + } + sqlite3 * handle = sqlite3_context_db_handle(context); UCollator* collator = (UCollator*)sqlite3_user_data(context); char const * tokenTable = (char const *)sqlite3_value_text(argv[0]); @@ -229,7 +250,12 @@ static void tokenize(sqlite3_context * context, int argc, sqlite3_value ** argv) // Get or create the prepared statement for the insertions sqlite3_stmt * statement = (sqlite3_stmt *)sqlite3_get_auxdata(context, 0); if (!statement) { - char * sql = sqlite3_mprintf("INSERT INTO %s (token, source) VALUES (?, ?);", tokenTable); + char * sql; + if (useTokenIndex) { + sql = sqlite3_mprintf("INSERT INTO %s (token, source, token_index) VALUES (?, ?, ?);", tokenTable); + } else { + sql = sqlite3_mprintf("INSERT INTO %s (token, source) VALUES (?, ?);", tokenTable); + } err = sqlite3_prepare_v2(handle, sql, -1, &statement, NULL); sqlite3_free(sql); if (err) { @@ -303,6 +329,15 @@ static void tokenize(sqlite3_context * context, int argc, sqlite3_value ** argv) break; } + if (useTokenIndex) { + err = sqlite3_bind_int(statement, 3, numTokens); + if (err != SQLITE_OK) { + LOGE(" sqlite3_bind_int error %d", err); + free(base16buf); + break; + } + } + err = sqlite3_step(statement); free(base16buf); @@ -361,7 +396,11 @@ extern "C" int register_localized_collators(sqlite3* handle, const char* systemL err = sqlite3_create_function(handle, "_TOKENIZE", 4, SQLITE_UTF16, collator, tokenize, NULL, NULL); if (err != SQLITE_OK) { return err; - } + } + err = sqlite3_create_function(handle, "_TOKENIZE", 5, SQLITE_UTF16, collator, tokenize, NULL, NULL); + if (err != SQLITE_OK) { + return err; + } return SQLITE_OK; } |