diff options
author | Bjorn Bringert <bringert@android.com> | 2009-05-13 22:13:09 +0100 |
---|---|---|
committer | Bjorn Bringert <bringert@android.com> | 2009-05-14 15:41:56 +0100 |
commit | 687f1165e7f61d3842e6b8f4ba28d0473fff529b (patch) | |
tree | 68eb5d134cffcf3dc5267e1a0fba2af493907c39 | |
parent | 2da78c0877a7fa924f62cc76700f3da29c47f5ad (diff) |
Add token_index column to _TOKENIZE table.
If the new optional fifth argument to _TOKENIZE is
non-zero, the _TOKENIZE function now uses a third column,
token_index, in addition to the earlier token and source
columns. The new column stores the index of the token within
the string. This is useful for distingusihing between
matches that are prefixes of the original string,
and matches that are prefixes of one of the tokens
inside the original string.
This change is required to fix http://b/issue?id=1847321
-rw-r--r-- | android/sqlite3_android.cpp | 53 |
1 files changed, 46 insertions, 7 deletions
diff --git a/android/sqlite3_android.cpp b/android/sqlite3_android.cpp index 3e11808..dac93c2 100644 --- a/android/sqlite3_android.cpp +++ b/android/sqlite3_android.cpp @@ -200,23 +200,44 @@ struct SqliteUserData { /** * This function is invoked as: * - * _TOKENIZE('<token_table>', <data_row_id>, <data>, <delimiter>) + * _TOKENIZE('<token_table>', <data_row_id>, <data>, <delimiter>, <use token index>) + * + * If <use token index> is omitted, is is treated as 0. + * + * It will split <data> on each instance of <delimiter> and insert each token + * into <token_table>. <token_table> must have 3 columns: + * token TEXT, source INTEGER, token_index INTEGER + * The token_index column is not needed if <use token index> is 0. + * + * One row is inserted for each token in <data>. + * In each inserted row, 'source' is <data_row_id>. + * In the first inserted row, 'token' is the hex collation key of + * the entire <data> string, and 'token_index' is 0. + * In each row I (where 1 <= I < N, and N is the number of tokens in <data>) + * 'token' will be set to the hex collation key of the I:th token (0-based). + * If <use token index> != 0, 'token_index' will be set to I. + * + * In other words, there will be one row for the entire string, + * and one row for each token except the first one. * - * It will then split data on each instance of delimiter and insert each token - * into token_table's 'token' column with data_row_id in the 'source' column. * The function returns the number of tokens generated. */ static void tokenize(sqlite3_context * context, int argc, sqlite3_value ** argv) { //LOGD("enter tokenize"); int err; + int useTokenIndex = 0; - if (argc != 4) { - LOGE("Tokenize requires 4 arguments"); + if (!(argc == 4 || argc == 5)) { + LOGE("Tokenize requires 4 or 5 arguments"); sqlite3_result_null(context); return; } + if (argc > 4) { + useTokenIndex = sqlite3_value_int(argv[4]); + } + sqlite3 * handle = sqlite3_context_db_handle(context); UCollator* collator = (UCollator*)sqlite3_user_data(context); char const * tokenTable = (char const *)sqlite3_value_text(argv[0]); @@ -229,7 +250,12 @@ static void tokenize(sqlite3_context * context, int argc, sqlite3_value ** argv) // Get or create the prepared statement for the insertions sqlite3_stmt * statement = (sqlite3_stmt *)sqlite3_get_auxdata(context, 0); if (!statement) { - char * sql = sqlite3_mprintf("INSERT INTO %s (token, source) VALUES (?, ?);", tokenTable); + char * sql; + if (useTokenIndex) { + sql = sqlite3_mprintf("INSERT INTO %s (token, source, token_index) VALUES (?, ?, ?);", tokenTable); + } else { + sql = sqlite3_mprintf("INSERT INTO %s (token, source) VALUES (?, ?);", tokenTable); + } err = sqlite3_prepare_v2(handle, sql, -1, &statement, NULL); sqlite3_free(sql); if (err) { @@ -303,6 +329,15 @@ static void tokenize(sqlite3_context * context, int argc, sqlite3_value ** argv) break; } + if (useTokenIndex) { + err = sqlite3_bind_int(statement, 3, numTokens); + if (err != SQLITE_OK) { + LOGE(" sqlite3_bind_int error %d", err); + free(base16buf); + break; + } + } + err = sqlite3_step(statement); free(base16buf); @@ -361,7 +396,11 @@ extern "C" int register_localized_collators(sqlite3* handle, const char* systemL err = sqlite3_create_function(handle, "_TOKENIZE", 4, SQLITE_UTF16, collator, tokenize, NULL, NULL); if (err != SQLITE_OK) { return err; - } + } + err = sqlite3_create_function(handle, "_TOKENIZE", 5, SQLITE_UTF16, collator, tokenize, NULL, NULL); + if (err != SQLITE_OK) { + return err; + } return SQLITE_OK; } |