diff options
author | Pavlo Yatsukhnenko <yatsukhnenko@gmail.com> | 2018-12-20 16:04:13 +0300 |
---|---|---|
committer | Pavlo Yatsukhnenko <yatsukhnenko@gmail.com> | 2018-12-22 16:31:51 +0300 |
commit | bb32e6f3a0e93b1de9235de2db496fb1bfc400d0 (patch) | |
tree | 32bea8aa6ba652927b18d810d8c0a08b86070498 | |
parent | 3e7e1c833d08aac4e0eeb4e37dcc44768c8b117c (diff) |
Implement consistent hashing algorithm for RedisArray
-rw-r--r-- | common.h | 11 | ||||
-rw-r--r-- | redis.c | 1 | ||||
-rw-r--r-- | redis_array.c | 15 | ||||
-rw-r--r-- | redis_array.h | 11 | ||||
-rw-r--r-- | redis_array_impl.c | 86 | ||||
-rw-r--r-- | redis_array_impl.h | 2 | ||||
-rw-r--r-- | redis_session.c | 11 |
7 files changed, 117 insertions, 20 deletions
@@ -644,6 +644,17 @@ typedef enum _PUBSUB_TYPE { #define REDIS_ENABLE_MODE(redis_sock, m) (redis_sock->mode |= m) #define REDIS_DISABLE_MODE(redis_sock, m) (redis_sock->mode &= ~m) +/* HOST_NAME_MAX doesn't exist everywhere */ +#ifndef HOST_NAME_MAX + #if defined(_POSIX_HOST_NAME_MAX) + #define HOST_NAME_MAX _POSIX_HOST_NAME_MAX + #elif defined(MAXHOSTNAMELEN) + #define HOST_NAME_MAX MAXHOSTNAMELEN + #else + #define HOST_NAME_MAX 255 + #endif +#endif + typedef struct fold_item { zval * (*fun)(INTERNAL_FUNCTION_PARAMETERS, void *, ...); void *ctx; @@ -67,6 +67,7 @@ PHP_INI_BEGIN() PHP_INI_ENTRY("redis.arrays.previous", "", PHP_INI_ALL, NULL) PHP_INI_ENTRY("redis.arrays.readtimeout", "0", PHP_INI_ALL, NULL) PHP_INI_ENTRY("redis.arrays.retryinterval", "0", PHP_INI_ALL, NULL) + PHP_INI_ENTRY("redis.arrays.consistent", "0", PHP_INI_ALL, NULL) /* redis cluster */ PHP_INI_ENTRY("redis.clusters.persistent", "0", PHP_INI_ALL, NULL) diff --git a/redis_array.c b/redis_array.c index bdb9fd66..df661a9f 100644 --- a/redis_array.c +++ b/redis_array.c @@ -138,6 +138,12 @@ redis_array_free(RedisArray *ra) { int i; + /* continuum */ + if (ra->continuum) { + efree(ra->continuum->points); + efree(ra->continuum); + } + /* Redis objects */ for(i = 0; i< ra->count; i++) { zval_dtor(&ra->redis[i]); @@ -264,7 +270,7 @@ PHP_METHOD(RedisArray, __construct) { zval *z0, z_fun, z_dist, *zpData, *z_opts = NULL; RedisArray *ra = NULL; - zend_bool b_index = 0, b_autorehash = 0, b_pconnect = 0; + zend_bool b_index = 0, b_autorehash = 0, b_pconnect = 0, consistent = 0; HashTable *hPrev = NULL, *hOpts = NULL; long l_retry_interval = 0; zend_bool b_lazy_connect = 0; @@ -349,6 +355,11 @@ PHP_METHOD(RedisArray, __construct) read_timeout = atof(Z_STRVAL_P(zpData)); } } + + /* consistent */ + if ((zpData = zend_hash_str_find(hOpts, "consistent", sizeof("consistent") - 1)) != NULL) { + consistent = zval_is_true(zpData); + } } /* extract either name of list of hosts from z0 */ @@ -358,7 +369,7 @@ PHP_METHOD(RedisArray, __construct) break; case IS_ARRAY: - ra = ra_make_array(Z_ARRVAL_P(z0), &z_fun, &z_dist, hPrev, b_index, b_pconnect, l_retry_interval, b_lazy_connect, d_connect_timeout, read_timeout TSRMLS_CC); + ra = ra_make_array(Z_ARRVAL_P(z0), &z_fun, &z_dist, hPrev, b_index, b_pconnect, l_retry_interval, b_lazy_connect, d_connect_timeout, read_timeout, consistent TSRMLS_CC); break; default: diff --git a/redis_array.h b/redis_array.h index 458d814a..ba53fadc 100644 --- a/redis_array.h +++ b/redis_array.h @@ -37,6 +37,15 @@ PHP_METHOD(RedisArray, exec); PHP_METHOD(RedisArray, discard); PHP_METHOD(RedisArray, unwatch); +typedef struct { + uint32_t value; + int index; +} ContinuumPoint; + +typedef struct { + size_t nb_points; + ContinuumPoint *points; +} Continuum; typedef struct RedisArray_ { @@ -52,7 +61,7 @@ typedef struct RedisArray_ { HashTable *pure_cmds; /* hash table */ double connect_timeout; /* socket connect timeout */ double read_timeout; /* socket read timeout */ - + Continuum *continuum; struct RedisArray_ *prev; } RedisArray; diff --git a/redis_array_impl.c b/redis_array_impl.c index 6280b4ca..c5d4796c 100644 --- a/redis_array_impl.c +++ b/redis_array_impl.c @@ -24,6 +24,7 @@ #include "SAPI.h" #include "ext/standard/url.h" #include "ext/standard/crc32.h" +#include "ext/standard/md5.h" #define PHPREDIS_INDEX_NAME "__phpredis_array_index__" @@ -173,9 +174,10 @@ RedisArray *ra_load_array(const char *name TSRMLS_DC) { zval z_params_connect_timeout; zval z_params_read_timeout; zval z_params_lazy_connect; + zval z_params_consistent; RedisArray *ra = NULL; - zend_bool b_index = 0, b_autorehash = 0, b_pconnect = 0; + zend_bool b_index = 0, b_autorehash = 0, b_pconnect = 0, consistent = 0; long l_retry_interval = 0; zend_bool b_lazy_connect = 0; double d_connect_timeout = 0, read_timeout = 0.0; @@ -312,9 +314,20 @@ RedisArray *ra_load_array(const char *name TSRMLS_DC) { } } + /* find consistent option */ + array_init(&z_params_consistent); + if ((iptr = INI_STR("redis.arrays.consistent")) != NULL) { + sapi_module.treat_data(PARSE_STRING, estrdup(iptr), &z_params_consistent TSRMLS_CC); + } + if ((z_data = zend_hash_str_find(Z_ARRVAL(z_params_consistent), name, name_len)) != NULL) { + if (Z_TYPE_P(z_data) == IS_STRING && strncmp(Z_STRVAL_P(z_data), "1", 1) == 0) { + consistent = 1; + } + } + /* create RedisArray object */ - ra = ra_make_array(hHosts, &z_fun, &z_dist, hPrev, b_index, b_pconnect, l_retry_interval, b_lazy_connect, d_connect_timeout, read_timeout TSRMLS_CC); + ra = ra_make_array(hHosts, &z_fun, &z_dist, hPrev, b_index, b_pconnect, l_retry_interval, b_lazy_connect, d_connect_timeout, read_timeout, consistent TSRMLS_CC); if (ra) { ra->auto_rehash = b_autorehash; if(ra->prev) ra->prev->auto_rehash = b_autorehash; @@ -332,14 +345,55 @@ RedisArray *ra_load_array(const char *name TSRMLS_DC) { zval_dtor(&z_params_connect_timeout); zval_dtor(&z_params_read_timeout); zval_dtor(&z_params_lazy_connect); + zval_dtor(&z_params_consistent); zval_dtor(&z_dist); zval_dtor(&z_fun); return ra; } +static int +ra_points_cmp(const void *v1, const void *v2) +{ + const ContinuumPoint *p1 = v1, *p2 = v2; + + return p1->value < p2->value ? - 1 : p1->value > p2->value; +} + +static Continuum * +ra_make_continuum(zend_string **hosts, int nb_hosts) +{ + int i, j, k, len, idx = 0; + char host[HOST_NAME_MAX]; + unsigned char digest[16]; + PHP_MD5_CTX ctx; + Continuum *c; + + c = ecalloc(1, sizeof(*c)); + c->nb_points = nb_hosts * 160; /* 40 hashes, 4 numbers per hash = 160 points per server */ + c->points = ecalloc(c->nb_points, sizeof(*c->points)); + + for (i = 0; i < nb_hosts; ++i) { + for (j = 0; j < 40; ++j) { + len = snprintf(host, sizeof(host), "%.*s-%u", ZSTR_LEN(hosts[i]), ZSTR_VAL(hosts[i]), j); + PHP_MD5Init(&ctx); + PHP_MD5Update(&ctx, host, len); + PHP_MD5Final(digest, &ctx); + for (k = 0; k < 4; ++k) { + c->points[idx].index = i; + c->points[idx++].value = (digest[3 + k * 4] << 24) + | (digest[2 + k * 4] << 16) + | (digest[1 + k * 4] << 8) + | (digest[k * 4]); + } + } + } + qsort(c->points, c->nb_points, sizeof(*c->points), ra_points_cmp); + return c; +} + RedisArray * -ra_make_array(HashTable *hosts, zval *z_fun, zval *z_dist, HashTable *hosts_prev, zend_bool b_index, zend_bool b_pconnect, long retry_interval, zend_bool b_lazy_connect, double connect_timeout, double read_timeout TSRMLS_DC) { +ra_make_array(HashTable *hosts, zval *z_fun, zval *z_dist, HashTable *hosts_prev, zend_bool b_index, zend_bool b_pconnect, long retry_interval, zend_bool b_lazy_connect, double connect_timeout, double read_timeout, zend_bool consistent TSRMLS_DC) { int i, count; RedisArray *ra; @@ -357,6 +411,7 @@ ra_make_array(HashTable *hosts, zval *z_fun, zval *z_dist, HashTable *hosts_prev ra->pconnect = b_pconnect; ra->connect_timeout = connect_timeout; ra->read_timeout = read_timeout; + ra->continuum = NULL; if (ra_load_hosts(ra, hosts, retry_interval, b_lazy_connect TSRMLS_CC) == NULL || !ra->count) { for (i = 0; i < ra->count; ++i) { @@ -368,7 +423,7 @@ ra_make_array(HashTable *hosts, zval *z_fun, zval *z_dist, HashTable *hosts_prev efree(ra); return NULL; } - ra->prev = hosts_prev ? ra_make_array(hosts_prev, z_fun, z_dist, NULL, b_index, b_pconnect, retry_interval, b_lazy_connect, connect_timeout, read_timeout TSRMLS_CC) : NULL; + ra->prev = hosts_prev ? ra_make_array(hosts_prev, z_fun, z_dist, NULL, b_index, b_pconnect, retry_interval, b_lazy_connect, connect_timeout, read_timeout, consistent TSRMLS_CC) : NULL; /* init array data structures */ ra_init_function_table(ra); @@ -377,6 +432,11 @@ ra_make_array(HashTable *hosts, zval *z_fun, zval *z_dist, HashTable *hosts_prev ZVAL_ZVAL(&ra->z_fun, z_fun, 1, 0); ZVAL_ZVAL(&ra->z_dist, z_dist, 1, 0); + /* init continuum */ + if (consistent) { + ra->continuum = ra_make_continuum(ra->hosts, ra->count); + } + return ra; } @@ -480,7 +540,23 @@ ra_find_node(RedisArray *ra, const char *key, int key_len, int *out_pos TSRMLS_D } /* get position on ring */ - pos = (int)((ret ^ 0xffffffff) * ra->count / 0xffffffff); + if (ra->continuum) { + int left = 0, right = ra->continuum->nb_points; + while (left < right) { + i = (int)((left + right) / 2); + if (ra->continuum->points[i].value < ret) { + left = i + 1; + } else { + right = i; + } + } + if (right == ra->continuum->nb_points) { + right = 0; + } + pos = ra->continuum->points[right].index; + } else { + pos = (int)((ret ^ 0xffffffff) * ra->count / 0xffffffff); + } } else { pos = ra_call_distributor(ra, key, key_len TSRMLS_CC); if (pos < 0 || pos >= ra->count) { diff --git a/redis_array_impl.h b/redis_array_impl.h index 385daadf..fa5fd848 100644 --- a/redis_array_impl.h +++ b/redis_array_impl.h @@ -11,7 +11,7 @@ RedisArray *ra_load_hosts(RedisArray *ra, HashTable *hosts, long retry_interval, zend_bool b_lazy_connect TSRMLS_DC); RedisArray *ra_load_array(const char *name TSRMLS_DC); -RedisArray *ra_make_array(HashTable *hosts, zval *z_fun, zval *z_dist, HashTable *hosts_prev, zend_bool b_index, zend_bool b_pconnect, long retry_interval, zend_bool b_lazy_connect, double connect_timeout, double read_timeout TSRMLS_DC); +RedisArray *ra_make_array(HashTable *hosts, zval *z_fun, zval *z_dist, HashTable *hosts_prev, zend_bool b_index, zend_bool b_pconnect, long retry_interval, zend_bool b_lazy_connect, double connect_timeout, double read_timeout, zend_bool consistent TSRMLS_DC); zval *ra_find_node_by_name(RedisArray *ra, const char *host, int host_len TSRMLS_DC); zval *ra_find_node(RedisArray *ra, const char *key, int key_len, int *out_pos TSRMLS_DC); void ra_init_function_table(RedisArray *ra); diff --git a/redis_session.c b/redis_session.c index e1f79c17..1948c6c8 100644 --- a/redis_session.c +++ b/redis_session.c @@ -41,17 +41,6 @@ #include "SAPI.h" #include "ext/standard/url.h" -/* HOST_NAME_MAX doesn't exist everywhere */ -#ifndef HOST_NAME_MAX - #if defined(_POSIX_HOST_NAME_MAX) - #define HOST_NAME_MAX _POSIX_HOST_NAME_MAX - #elif defined(MAXHOSTNAMELEN) - #define HOST_NAME_MAX MAXHOSTNAMELEN - #else - #define HOST_NAME_MAX 255 - #endif -#endif - /* Session lock LUA as well as its SHA1 hash */ #define LOCK_RELEASE_LUA_STR "if redis.call(\"get\",KEYS[1]) == ARGV[1] then return redis.call(\"del\",KEYS[1]) else return 0 end" #define LOCK_RELEASE_LUA_LEN (sizeof(LOCK_RELEASE_LUA_STR) - 1) |