diff options
Diffstat (limited to 'android/sqlite3_android.cpp')
-rw-r--r-- | android/sqlite3_android.cpp | 52 |
1 files changed, 38 insertions, 14 deletions
diff --git a/android/sqlite3_android.cpp b/android/sqlite3_android.cpp index b3ae28c..252a0c5 100644 --- a/android/sqlite3_android.cpp +++ b/android/sqlite3_android.cpp @@ -219,14 +219,17 @@ struct SqliteUserData { /** * This function is invoked as: * - * _TOKENIZE('<token_table>', <data_row_id>, <data>, <delimiter>, <use token index>) + * _TOKENIZE('<token_table>', <data_row_id>, <data>, <delimiter>, + * <use_token_index>, <data_tag>) * - * If <use token index> is omitted, is is treated as 0. + * If <use_token_index> is omitted, it is treated as 0. + * If <data_tag> is omitted, it is treated as NULL. * * It will split <data> on each instance of <delimiter> and insert each token - * into <token_table>. <token_table> must have 3 columns: - * token TEXT, source INTEGER, token_index INTEGER - * The token_index column is not needed if <use token index> is 0. + * into <token_table>. The following columns in <token_table> are used: + * token TEXT, source INTEGER, token_index INTEGER, tag (any type) + * The token_index column is not required if <use_token_index> is 0. + * The tag column is not required if <data_tag> is NULL. * * One row is inserted for each token in <data>. * In each inserted row, 'source' is <data_row_id>. @@ -234,7 +237,8 @@ struct SqliteUserData { * the entire <data> string, and 'token_index' is 0. * In each row I (where 1 <= I < N, and N is the number of tokens in <data>) * 'token' will be set to the hex collation key of the I:th token (0-based). - * If <use token index> != 0, 'token_index' will be set to I. + * If <use_token_index> != 0, 'token_index' is set to I. + * If <data_tag> is not NULL, 'tag' is set to <data_tag>. * * In other words, there will be one row for the entire string, * and one row for each token except the first one. @@ -246,9 +250,10 @@ static void tokenize(sqlite3_context * context, int argc, sqlite3_value ** argv) //LOGD("enter tokenize"); int err; int useTokenIndex = 0; + int useDataTag = 0; - if (!(argc == 4 || argc == 5)) { - LOGE("Tokenize requires 4 or 5 arguments"); + if (!(argc >= 4 || argc <= 6)) { + LOGE("Tokenize requires 4 to 6 arguments"); sqlite3_result_null(context); return; } @@ -257,6 +262,10 @@ static void tokenize(sqlite3_context * context, int argc, sqlite3_value ** argv) useTokenIndex = sqlite3_value_int(argv[4]); } + if (argc > 5) { + useDataTag = (sqlite3_value_type(argv[5]) != SQLITE_NULL); + } + sqlite3 * handle = sqlite3_context_db_handle(context); UCollator* collator = (UCollator*)sqlite3_user_data(context); char const * tokenTable = (char const *)sqlite3_value_text(argv[0]); @@ -269,12 +278,12 @@ static void tokenize(sqlite3_context * context, int argc, sqlite3_value ** argv) // Get or create the prepared statement for the insertions sqlite3_stmt * statement = (sqlite3_stmt *)sqlite3_get_auxdata(context, 0); if (!statement) { - char * sql; - if (useTokenIndex) { - sql = sqlite3_mprintf("INSERT INTO %s (token, source, token_index) VALUES (?, ?, ?);", tokenTable); - } else { - sql = sqlite3_mprintf("INSERT INTO %s (token, source) VALUES (?, ?);", tokenTable); - } + char const * tokenIndexCol = useTokenIndex ? ", token_index" : ""; + char const * tokenIndexParam = useTokenIndex ? ", ?" : ""; + char const * dataTagCol = useDataTag ? ", tag" : ""; + char const * dataTagParam = useDataTag ? ", ?" : ""; + char * sql = sqlite3_mprintf("INSERT INTO %s (token, source%s%s) VALUES (?, ?%s%s);", + tokenTable, tokenIndexCol, dataTagCol, tokenIndexParam, dataTagParam); err = sqlite3_prepare_v2(handle, sql, -1, &statement, NULL); sqlite3_free(sql); if (err) { @@ -300,6 +309,17 @@ static void tokenize(sqlite3_context * context, int argc, sqlite3_value ** argv) return; } + // Bind <data_tag> to the tag column + if (useDataTag) { + int dataTagParamIndex = useTokenIndex ? 4 : 3; + err = sqlite3_bind_value(statement, dataTagParamIndex, argv[5]); + if (err != SQLITE_OK) { + LOGE("bind failed"); + sqlite3_result_null(context); + return; + } + } + // Get the raw bytes for the string to tokenize // the string will be modified by following code // however, sqlite did not reuse the string, so it is safe to not dup it @@ -420,6 +440,10 @@ extern "C" int register_localized_collators(sqlite3* handle, const char* systemL if (err != SQLITE_OK) { return err; } + err = sqlite3_create_function(handle, "_TOKENIZE", 6, SQLITE_UTF16, collator, tokenize, NULL, NULL); + if (err != SQLITE_OK) { + return err; + } return SQLITE_OK; } |