summaryrefslogtreecommitdiff
path: root/android/sqlite3_android.cpp
diff options
context:
space:
mode:
authorBjorn Bringert <bringert@android.com>2009-05-13 22:13:09 +0100
committerBjorn Bringert <bringert@android.com>2009-05-14 15:41:56 +0100
commit687f1165e7f61d3842e6b8f4ba28d0473fff529b (patch)
tree68eb5d134cffcf3dc5267e1a0fba2af493907c39 /android/sqlite3_android.cpp
parent2da78c0877a7fa924f62cc76700f3da29c47f5ad (diff)
Add token_index column to _TOKENIZE table.
If the new optional fifth argument to _TOKENIZE is non-zero, the _TOKENIZE function now uses a third column, token_index, in addition to the earlier token and source columns. The new column stores the index of the token within the string. This is useful for distingusihing between matches that are prefixes of the original string, and matches that are prefixes of one of the tokens inside the original string. This change is required to fix http://b/issue?id=1847321
Diffstat (limited to 'android/sqlite3_android.cpp')
-rw-r--r--android/sqlite3_android.cpp53
1 files changed, 46 insertions, 7 deletions
diff --git a/android/sqlite3_android.cpp b/android/sqlite3_android.cpp
index 3e11808..dac93c2 100644
--- a/android/sqlite3_android.cpp
+++ b/android/sqlite3_android.cpp
@@ -200,23 +200,44 @@ struct SqliteUserData {
/**
* This function is invoked as:
*
- * _TOKENIZE('<token_table>', <data_row_id>, <data>, <delimiter>)
+ * _TOKENIZE('<token_table>', <data_row_id>, <data>, <delimiter>, <use token index>)
+ *
+ * If <use token index> is omitted, is is treated as 0.
+ *
+ * It will split <data> on each instance of <delimiter> and insert each token
+ * into <token_table>. <token_table> must have 3 columns:
+ * token TEXT, source INTEGER, token_index INTEGER
+ * The token_index column is not needed if <use token index> is 0.
+ *
+ * One row is inserted for each token in <data>.
+ * In each inserted row, 'source' is <data_row_id>.
+ * In the first inserted row, 'token' is the hex collation key of
+ * the entire <data> string, and 'token_index' is 0.
+ * In each row I (where 1 <= I < N, and N is the number of tokens in <data>)
+ * 'token' will be set to the hex collation key of the I:th token (0-based).
+ * If <use token index> != 0, 'token_index' will be set to I.
+ *
+ * In other words, there will be one row for the entire string,
+ * and one row for each token except the first one.
*
- * It will then split data on each instance of delimiter and insert each token
- * into token_table's 'token' column with data_row_id in the 'source' column.
* The function returns the number of tokens generated.
*/
static void tokenize(sqlite3_context * context, int argc, sqlite3_value ** argv)
{
//LOGD("enter tokenize");
int err;
+ int useTokenIndex = 0;
- if (argc != 4) {
- LOGE("Tokenize requires 4 arguments");
+ if (!(argc == 4 || argc == 5)) {
+ LOGE("Tokenize requires 4 or 5 arguments");
sqlite3_result_null(context);
return;
}
+ if (argc > 4) {
+ useTokenIndex = sqlite3_value_int(argv[4]);
+ }
+
sqlite3 * handle = sqlite3_context_db_handle(context);
UCollator* collator = (UCollator*)sqlite3_user_data(context);
char const * tokenTable = (char const *)sqlite3_value_text(argv[0]);
@@ -229,7 +250,12 @@ static void tokenize(sqlite3_context * context, int argc, sqlite3_value ** argv)
// Get or create the prepared statement for the insertions
sqlite3_stmt * statement = (sqlite3_stmt *)sqlite3_get_auxdata(context, 0);
if (!statement) {
- char * sql = sqlite3_mprintf("INSERT INTO %s (token, source) VALUES (?, ?);", tokenTable);
+ char * sql;
+ if (useTokenIndex) {
+ sql = sqlite3_mprintf("INSERT INTO %s (token, source, token_index) VALUES (?, ?, ?);", tokenTable);
+ } else {
+ sql = sqlite3_mprintf("INSERT INTO %s (token, source) VALUES (?, ?);", tokenTable);
+ }
err = sqlite3_prepare_v2(handle, sql, -1, &statement, NULL);
sqlite3_free(sql);
if (err) {
@@ -303,6 +329,15 @@ static void tokenize(sqlite3_context * context, int argc, sqlite3_value ** argv)
break;
}
+ if (useTokenIndex) {
+ err = sqlite3_bind_int(statement, 3, numTokens);
+ if (err != SQLITE_OK) {
+ LOGE(" sqlite3_bind_int error %d", err);
+ free(base16buf);
+ break;
+ }
+ }
+
err = sqlite3_step(statement);
free(base16buf);
@@ -361,7 +396,11 @@ extern "C" int register_localized_collators(sqlite3* handle, const char* systemL
err = sqlite3_create_function(handle, "_TOKENIZE", 4, SQLITE_UTF16, collator, tokenize, NULL, NULL);
if (err != SQLITE_OK) {
return err;
- }
+ }
+ err = sqlite3_create_function(handle, "_TOKENIZE", 5, SQLITE_UTF16, collator, tokenize, NULL, NULL);
+ if (err != SQLITE_OK) {
+ return err;
+ }
return SQLITE_OK;
}